feat: load personas from target repo .review-bot/personas/ #61

Merged
aweiker merged 4 commits from issue-60 into main 2026-05-11 02:54:46 +00:00
4 changed files with 667 additions and 24 deletions
+67 -24
View File
@@ -79,7 +79,6 @@ func main() {
aicoreAPIURL := flag.String("aicore-api-url", envOrDefault("AICORE_API_URL", ""), "SAP AI Core API URL (for provider=aicore)")
aicoreResourceGroup := flag.String("aicore-resource-group", envOrDefault("AICORE_RESOURCE_GROUP", "default"), "SAP AI Core resource group (for provider=aicore)")
Review

[MINOR] Duplicate flag.Parse() call — the diff shows the old flag.Parse() was removed but the remaining one on line 81 is a leftover duplicate. The full file content shows flag.Parse() appears twice consecutively (lines 80-81). This is a copy-paste artifact from the diff. It is harmless since repeated flag.Parse() calls are idempotent, but it's noise and should be cleaned up.

**[MINOR]** Duplicate `flag.Parse()` call — the diff shows the old `flag.Parse()` was removed but the remaining one on line 81 is a leftover duplicate. The full file content shows `flag.Parse()` appears twice consecutively (lines 80-81). This is a copy-paste artifact from the diff. It is harmless since repeated `flag.Parse()` calls are idempotent, but it's noise and should be cleaned up.
flag.Parse()
flag.Parse()
Review

[MINOR] Duplicate flag.Parse() call: the diff shows the original flag.Parse() was removed and a new one kept, but the full file content shows flag.Parse() appears only once. This appears to be a diff artifact (the - removed one and the context line shows the kept one). Verify the compiled binary doesn't call flag.Parse() twice — if it does, this should be fixed as double-parsing has undefined behavior for some flag types.

**[MINOR]** Duplicate `flag.Parse()` call: the diff shows the original `flag.Parse()` was removed and a new one kept, but the full file content shows `flag.Parse()` appears only once. This appears to be a diff artifact (the `-` removed one and the context line shows the kept one). Verify the compiled binary doesn't call `flag.Parse()` twice — if it does, this should be fixed as double-parsing has undefined behavior for some flag types.
if *versionFlag {
@@ -116,29 +115,7 @@ func main() {
os.Exit(1)
}
// Load persona if specified
var persona *review.Persona
if *personaName != "" {
var err error
persona, err = review.LoadBuiltinPersona(*personaName)
if err != nil {
slog.Error("failed to load persona", "persona", *personaName, "error", err)
os.Exit(1)
}
slog.Info("loaded built-in persona", "persona", persona.Name, "display", persona.DisplayName)
} else if *personaFile != "" {
resolvedPath, err := validateWorkspacePath(*personaFile, "persona-file")
if err != nil {
slog.Error("invalid persona-file path", "error", err)
os.Exit(1)
}
persona, err = review.LoadPersona(resolvedPath)
if err != nil {
slog.Error("failed to load persona file", "file", *personaFile, "error", err)
os.Exit(1)
}
slog.Info("loaded persona from file", "file", *personaFile, "persona", persona.Name)
}
// NOTE: Persona loading deferred until after Gitea client init to support repo personas
// Validate reviewer-name: only safe characters allowed in sentinel
if err := validateReviewerName(*reviewerName); err != nil {
@@ -196,6 +173,43 @@ func main() {
ctx, cancel := context.WithTimeout(context.Background(), overallTimeout)
Review

[NIT] When LoadRepoPersonas returns an error, the code relies on nil-map indexing semantics for the fallback. While correct, initializing repoPersonas to an empty map on error may improve readability.

**[NIT]** When LoadRepoPersonas returns an error, the code relies on nil-map indexing semantics for the fallback. While correct, initializing repoPersonas to an empty map on error may improve readability.
defer cancel()
// Load persona if specified (after Gitea client init to support repo personas)
var persona *review.Persona
if *personaName != "" {
// Try loading from repo first, then fall back to built-in
repoPersonas, err := review.LoadRepoPersonas(ctx, newGiteaClientAdapter(giteaClient), owner, repoName)
if err != nil {
slog.Warn("could not load repo personas", "repo", owner+"/"+repoName, "error", err)
// Continue with built-in personas only.
Outdated
Review

[MINOR] When LoadRepoPersonas returns an error (non-404), the code logs a warning and continues with repoPersonas being nil (its zero value). The subsequent if p, ok := repoPersonas[*personaName] safely handles nil maps (map indexing on nil returns zero value), so this works correctly, but it's subtle. A reader might wonder if repoPersonas could cause a panic. Consider initializing repoPersonas to an empty map before the error check, or adding a brief comment that nil map indexing is safe in Go.

**[MINOR]** When `LoadRepoPersonas` returns an error (non-404), the code logs a warning and continues with `repoPersonas` being nil (its zero value). The subsequent `if p, ok := repoPersonas[*personaName]` safely handles nil maps (map indexing on nil returns zero value), so this works correctly, but it's subtle. A reader might wonder if `repoPersonas` could cause a panic. Consider initializing `repoPersonas` to an empty map before the error check, or adding a brief comment that nil map indexing is safe in Go.
Review

[MINOR] When LoadRepoPersonas returns an error, repoPersonas is nil. The comment correctly notes that map indexing on a nil map is safe in Go (returns zero value). However, err from LoadRepoPersonas is shadowed — the outer var persona *review.Persona block has no err declaration, so the err used in persona, err = review.LoadBuiltinPersona(...) on ~line 196 is an undeclared variable in the else branch. Check that the err variable is properly scoped here — it may need a short variable declaration (:=) in the else branch.

**[MINOR]** When `LoadRepoPersonas` returns an error, `repoPersonas` is nil. The comment correctly notes that map indexing on a nil map is safe in Go (returns zero value). However, `err` from `LoadRepoPersonas` is shadowed — the outer `var persona *review.Persona` block has no `err` declaration, so the `err` used in `persona, err = review.LoadBuiltinPersona(...)` on ~line 196 is an undeclared variable in the else branch. Check that the `err` variable is properly scoped here — it may need a short variable declaration (`:=`) in the else branch.
// NOTE: repoPersonas is nil here, but map indexing on a nil map is safe in Go
// (returns the zero value), so the fallback to built-in below works correctly.
Outdated
Review

[MINOR] When LoadRepoPersonas returns an error, repoPersonas will be nil (not an empty map), but the subsequent repoPersonas[*personaName] map index on line ~187 is safe in Go (indexing a nil map returns the zero value, false). However, the err variable from LoadRepoPersonas is shadowed by the later persona, err = review.LoadBuiltinPersona(...) assignment at line ~193, which is correct since the outer err is already declared. The logic is sound but slightly subtle — consider using a blank identifier or a named variable repoErr to make the intent clearer.

**[MINOR]** When `LoadRepoPersonas` returns an error, `repoPersonas` will be nil (not an empty map), but the subsequent `repoPersonas[*personaName]` map index on line ~187 is safe in Go (indexing a nil map returns the zero value, false). However, the `err` variable from `LoadRepoPersonas` is shadowed by the later `persona, err = review.LoadBuiltinPersona(...)` assignment at line ~193, which is correct since the outer `err` is already declared. The logic is sound but slightly subtle — consider using a blank identifier or a named variable `repoErr` to make the intent clearer.
Review

[MINOR] When LoadRepoPersonas returns an error, repoPersonas is nil and the code falls through to repoPersonas[*personaName]. The comment correctly notes this is safe in Go (nil map read returns zero value), but the variable err is declared via repoPersonas, err := ... and then reassigned in the else branch with persona, err = review.LoadBuiltinPersona(...). This works due to Go's scoping rules but the comment about nil map safety would be clearer if placed directly above the map indexing line rather than after the Warn log.

**[MINOR]** When `LoadRepoPersonas` returns an error, `repoPersonas` is `nil` and the code falls through to `repoPersonas[*personaName]`. The comment correctly notes this is safe in Go (nil map read returns zero value), but the variable `err` is declared via `repoPersonas, err := ...` and then reassigned in the `else` branch with `persona, err = review.LoadBuiltinPersona(...)`. This works due to Go's scoping rules but the comment about nil map safety would be clearer if placed directly above the map indexing line rather than after the Warn log.
}
if p, ok := repoPersonas[*personaName]; ok {
persona = p
slog.Info("loaded repo persona", "persona", persona.Name, "display", persona.DisplayName, "repo", owner+"/"+repoName)
} else {
// Fall back to built-in
persona, err = review.LoadBuiltinPersona(*personaName)
if err != nil {
slog.Error("failed to load persona", "persona", *personaName, "error", err)
os.Exit(1)
}
slog.Info("loaded built-in persona", "persona", persona.Name, "display", persona.DisplayName)
}
} else if *personaFile != "" {
resolvedPath, err := validateWorkspacePath(*personaFile, "persona-file")
if err != nil {
slog.Error("invalid persona-file path", "error", err)
os.Exit(1)
}
persona, err = review.LoadPersona(resolvedPath)
if err != nil {
slog.Error("failed to load persona file", "file", *personaFile, "error", err)
os.Exit(1)
}
slog.Info("loaded persona from file", "file", *personaFile, "persona", persona.Name)
}
slog.Info("reviewing pull request", "pr", prNumber, "repo", fmt.Sprintf("%s/%s", owner, repoName))
// Step 1: Fetch PR metadata
@@ -783,3 +797,32 @@ func shouldSkipStaleReview(evaluatedSHA, currentSHA string) bool {
}
return evaluatedSHA != currentSHA
}
// giteaClientAdapter adapts gitea.Client to review.GiteaClient interface.
type giteaClientAdapter struct {
client *gitea.Client
}
func newGiteaClientAdapter(c *gitea.Client) *giteaClientAdapter {
return &giteaClientAdapter{client: c}
}
func (a *giteaClientAdapter) ListContents(ctx context.Context, owner, repo, path string) ([]review.ContentEntry, error) {
entries, err := a.client.ListContents(ctx, owner, repo, path)
if err != nil {
return nil, err
}
result := make([]review.ContentEntry, len(entries))
for i, e := range entries {
result[i] = review.ContentEntry{
Name: e.Name,
Path: e.Path,
Type: e.Type,
}
}
return result, nil
}
Review

[NIT] The giteaClientAdapter struct and its constructor/methods could live in a separate file (e.g., cmd/review-bot/adapter.go) to keep main.go focused, but this is a very minor organizational concern.

**[NIT]** The `giteaClientAdapter` struct and its constructor/methods could live in a separate file (e.g., `cmd/review-bot/adapter.go`) to keep `main.go` focused, but this is a very minor organizational concern.
func (a *giteaClientAdapter) GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error) {
return a.client.GetFileContent(ctx, owner, repo, filepath)
}
+7
View File
@@ -224,6 +224,13 @@ func checkYAMLDepth(node *yaml.Node, depth, maxDepth, maxNodes int, seen map[*ya
return nil
}
// ParsePersonaBytes parses persona data from bytes with a source label for errors.
// This is useful for parsing personas fetched from external sources (e.g., Gitea API)
// without requiring filesystem access. Format is detected by source extension.
func ParsePersonaBytes(data []byte, source string) (*Persona, error) {
return parsePersona(data, source)
Review

[NIT] ParsePersonaBytes delegates to parsePersona, which is fine, but does not apply any size checks like LoadPersona. Consider documenting that size limits are only enforced for filesystem sources, or add a size check for external sources if appropriate.

**[NIT]** ParsePersonaBytes delegates to parsePersona, which is fine, but does not apply any size checks like LoadPersona. Consider documenting that size limits are only enforced for filesystem sources, or add a size check for external sources if appropriate.
}
func validatePersona(p *Persona, source string) error {
if p.Name == "" {
return fmt.Errorf("persona %s: name is required", source)
+150
View File
@@ -0,0 +1,150 @@
package review
import (
"context"
"log/slog"
"strings"
)
// RepoPersonaPath is the directory path where repo-specific personas are stored.
const RepoPersonaPath = ".review-bot/personas"
// GiteaClient defines the subset of gitea.Client methods needed for loading repo personas.
// This interface allows for easier testing and decouples the review package from gitea.
Review

[NIT] The GiteaClient interface name is somewhat generic for a package named review. Since this interface is exported and lives in review, callers see it as review.GiteaClient. A name like RepoFileClient or ContentClient would be more descriptive of what it actually does (list/get file contents) rather than being named after a specific vendor.

**[NIT]** The `GiteaClient` interface name is somewhat generic for a package named `review`. Since this interface is exported and lives in `review`, callers see it as `review.GiteaClient`. A name like `RepoFileClient` or `ContentClient` would be more descriptive of what it actually does (list/get file contents) rather than being named after a specific vendor.
type GiteaClient interface {
ListContents(ctx context.Context, owner, repo, path string) ([]ContentEntry, error)
GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error)
}
// ContentEntry represents a file or directory entry from the contents API.
// This mirrors gitea.ContentEntry to avoid import cycles.
type ContentEntry struct {
Name string `json:"name"`
Path string `json:"path"`
Type string `json:"type"` // "file" or "dir"
}
// LoadRepoPersonas fetches personas from a repository's .review-bot/personas/ directory.
// Returns an empty map (not nil) if the directory doesn't exist or is empty.
// Individual parse failures are logged and skipped; the remaining personas are still returned.
// Auth errors and other non-404 errors are propagated.
// Files exceeding MaxPersonaFileSize are rejected to prevent resource exhaustion.
func LoadRepoPersonas(ctx context.Context, client GiteaClient, owner, repo string) (map[string]*Persona, error) {
result := make(map[string]*Persona)
entries, err := client.ListContents(ctx, owner, repo, RepoPersonaPath)
if err != nil {
// Check if this is a 404 (directory doesn't exist) - expected case
if isNotFoundError(err) {
slog.Debug("no repo personas directory found", "repo", owner+"/"+repo)
return result, nil
}
// Other errors (auth, server) should propagate
return nil, err
}
Review

[MINOR] Unbounded number of files processed: the loader iterates over all entries in .review-bot/personas and fetches each YAML file. A repository could include a very large number of small files to cause many network requests and parses, leading to CPU and memory pressure. Consider imposing a reasonable cap on the number of persona files processed.

**[MINOR]** Unbounded number of files processed: the loader iterates over all entries in .review-bot/personas and fetches each YAML file. A repository could include a very large number of small files to cause many network requests and parses, leading to CPU and memory pressure. Consider imposing a reasonable cap on the number of persona files processed.
if len(entries) == 0 {
slog.Debug("repo personas directory is empty", "repo", owner+"/"+repo)
return result, nil
}
for _, entry := range entries {
Review

[NIT] LoadRepoPersonas logs warnings for per-file fetch/parse failures inside the library package. Consider returning these details (e.g., as a slice of skipped files) and letting callers decide how to log to reduce potential log noise from dependencies.

**[NIT]** LoadRepoPersonas logs warnings for per-file fetch/parse failures inside the library package. Consider returning these details (e.g., as a slice of skipped files) and letting callers decide how to log to reduce potential log noise from dependencies.
Review

[MINOR] LoadRepoPersonas iterates and fetches every YAML file under .review-bot/personas without a per-repo cap. A malicious repo could place a very large number of small YAML files to increase API calls and parsing, causing resource usage spikes within the time budget. Consider short-circuiting when a specific persona is requested (e.g., try .yaml/.yml first) or enforcing a reasonable maximum number of files processed.

**[MINOR]** LoadRepoPersonas iterates and fetches every YAML file under .review-bot/personas without a per-repo cap. A malicious repo could place a very large number of small YAML files to increase API calls and parsing, causing resource usage spikes within the time budget. Consider short-circuiting when a specific persona is requested (e.g., try <name>.yaml/.yml first) or enforcing a reasonable maximum number of files processed.
if entry.Type != "file" {
continue
}
Review

[MAJOR] Unbounded repo persona file size: content fetched from the repository is read into memory without any size limit before parsing. An attacker can commit an extremely large YAML file under .review-bot/personas to cause excessive memory usage or OOM. Unlike LoadPersona (which enforces MaxPersonaFileSize), repo personas have no such cap.

**[MAJOR]** Unbounded repo persona file size: content fetched from the repository is read into memory without any size limit before parsing. An attacker can commit an extremely large YAML file under .review-bot/personas to cause excessive memory usage or OOM. Unlike LoadPersona (which enforces MaxPersonaFileSize), repo personas have no such cap.
// Only process YAML files
if !isYAMLFile(entry.Name) {
continue
}
content, err := client.GetFileContent(ctx, owner, repo, entry.Path)
if err != nil {
slog.Warn("could not fetch repo persona file",
"file", entry.Path,
Review

[MAJOR] Repo persona content is parsed without enforcing a maximum size. An attacker can commit an excessively large YAML persona (e.g., single huge scalar) to the repo, causing high memory/CPU usage when the bot fetches and parses it. Unlike LoadPersona() (filesystem), LoadRepoPersonas()/ParsePersonaBytes() do not check MaxPersonaFileSize, enabling a DoS vector.

**[MAJOR]** Repo persona content is parsed without enforcing a maximum size. An attacker can commit an excessively large YAML persona (e.g., single huge scalar) to the repo, causing high memory/CPU usage when the bot fetches and parses it. Unlike LoadPersona() (filesystem), LoadRepoPersonas()/ParsePersonaBytes() do not check MaxPersonaFileSize, enabling a DoS vector.
"repo", owner+"/"+repo,
Review

[MINOR] LoadRepoPersonas parses repo personas via ParsePersonaBytes without enforcing MaxPersonaFileSize. A very large YAML file with few nodes (e.g., a huge identity string) could still be fetched and parsed, potentially increasing memory usage. Consider checking len(content) against MaxPersonaFileSize before parsing.

**[MINOR]** LoadRepoPersonas parses repo personas via ParsePersonaBytes without enforcing MaxPersonaFileSize. A very large YAML file with few nodes (e.g., a huge identity string) could still be fetched and parsed, potentially increasing memory usage. Consider checking len(content) against MaxPersonaFileSize before parsing.
"error", err)
continue
}
// Enforce size limit before parsing to prevent resource exhaustion
if len(content) > MaxPersonaFileSize {
Review

[MINOR] Persona file size is validated only after fetching full content from the server. An attacker with write access to the default branch could commit very large YAML files and cause increased memory usage when GetFileContent returns them. Consider adding a cap on the number of files processed and/or a total bytes budget, and ensure server/client-side limits or streaming are used where possible.

**[MINOR]** Persona file size is validated only after fetching full content from the server. An attacker with write access to the default branch could commit very large YAML files and cause increased memory usage when GetFileContent returns them. Consider adding a cap on the number of files processed and/or a total bytes budget, and ensure server/client-side limits or streaming are used where possible.
slog.Warn("repo persona file exceeds maximum size",
"file", entry.Path,
"repo", owner+"/"+repo,
"size", len(content),
"max", MaxPersonaFileSize)
continue
}
persona, err := ParsePersonaBytes([]byte(content), entry.Path)
if err != nil {
Review

[NIT] MergePersonas is defined but not used in main.go — the main flow loads repo personas and then falls back to built-in individually, rather than merging all of them first. MergePersonas is a useful utility and is tested, but consider whether it should be used in main (load all built-ins + all repo personas into a merged map, then look up) for consistency and simplicity. As-is, the main flow only fetches repo personas on every invocation when --persona is specified, even though the built-in fallback path doesn't benefit from pre-loading all built-ins.

**[NIT]** `MergePersonas` is defined but not used in `main.go` — the main flow loads repo personas and then falls back to built-in individually, rather than merging all of them first. `MergePersonas` is a useful utility and is tested, but consider whether it should be used in main (load all built-ins + all repo personas into a merged map, then look up) for consistency and simplicity. As-is, the main flow only fetches repo personas on every invocation when `--persona` is specified, even though the built-in fallback path doesn't benefit from pre-loading all built-ins.
slog.Warn("could not parse repo persona file",
"file", entry.Path,
"repo", owner+"/"+repo,
"error", err)
continue
}
Review

[NIT] MergePersonas is defined but never called in main.go — the main flow only calls LoadRepoPersonas and does a direct map lookup. MergePersonas and GetBuiltinPersonasMap are exported utility functions that are tested but unused by the current main flow. This is fine as they're part of the public API for future use, but worth noting in a follow-up if they remain unused.

**[NIT]** `MergePersonas` is defined but never called in `main.go` — the main flow only calls `LoadRepoPersonas` and does a direct map lookup. `MergePersonas` and `GetBuiltinPersonasMap` are exported utility functions that are tested but unused by the current main flow. This is fine as they're part of the public API for future use, but worth noting in a follow-up if they remain unused.
result[persona.Name] = persona
slog.Debug("loaded repo persona",
"name", persona.Name,
"file", entry.Path,
"repo", owner+"/"+repo)
}
return result, nil
}
// MergePersonas combines built-in personas with repo personas.
// Repo personas take precedence on name collision.
// Returns a new map; inputs are not modified.
func MergePersonas(builtin, repo map[string]*Persona) map[string]*Persona {
result := make(map[string]*Persona, len(builtin)+len(repo))
// Copy built-in personas first
Review

[MINOR] isNotFoundError uses a broad substring match ("HTTP 404" or "not found"). This can misclassify non-404 errors that contain "not found" (e.g., upstream message wording) and silently skip persona loading. While not directly exploitable, it can mask auth or transport errors.

**[MINOR]** isNotFoundError uses a broad substring match ("HTTP 404" or "not found"). This can misclassify non-404 errors that contain "not found" (e.g., upstream message wording) and silently skip persona loading. While not directly exploitable, it can mask auth or transport errors.
for name, p := range builtin {
result[name] = p
}
// Overlay repo personas (override on collision)
Review

[MINOR] isNotFoundError relies on substring matching of error messages ("HTTP 404" or "not found"), which is brittle. Prefer a typed error with errors.Is or have the adapter normalize 404s to a sentinel error where possible.

**[MINOR]** isNotFoundError relies on substring matching of error messages ("HTTP 404" or "not found"), which is brittle. Prefer a typed error with errors.Is or have the adapter normalize 404s to a sentinel error where possible.
for name, p := range repo {
Review

[MINOR] LoadRepoPersonas filters to YAML files only via isYAMLFile, but ParsePersonaBytes supports JSON based on extension. Consider accepting .json in repo personas for consistency and user convenience.

**[MINOR]** LoadRepoPersonas filters to YAML files only via isYAMLFile, but ParsePersonaBytes supports JSON based on extension. Consider accepting .json in repo personas for consistency and user convenience.
if _, exists := result[name]; exists {
slog.Debug("repo persona overrides built-in", "name", name)
}
result[name] = p
}
Review

[MINOR] isNotFoundError relies on substring matching "HTTP 404" in error messages, which is brittle and coupled to the client's error text. Consider a more robust approach (typed errors or explicit status code) if possible.

**[MINOR]** isNotFoundError relies on substring matching "HTTP 404" in error messages, which is brittle and coupled to the client's error text. Consider a more robust approach (typed errors or explicit status code) if possible.
return result
}
// GetBuiltinPersonasMap returns all built-in personas as a map keyed by name.
// Returns an empty map (not nil) if loading fails.
Review

[NIT] isNotFoundError uses broad string matching ("not found") which may incorrectly classify unrelated errors as 404s. If possible, narrow the check (e.g., look for an API error type or more specific message prefix) to reduce false positives.

**[NIT]** isNotFoundError uses broad string matching ("not found") which may incorrectly classify unrelated errors as 404s. If possible, narrow the check (e.g., look for an API error type or more specific message prefix) to reduce false positives.
func GetBuiltinPersonasMap() map[string]*Persona {
result := make(map[string]*Persona)
for _, name := range ListBuiltinPersonas() {
p, err := LoadBuiltinPersona(name)
if err != nil {
slog.Warn("could not load built-in persona", "name", name, "error", err)
continue
Review

[MINOR] isNotFoundError uses string-based heuristics ("not found" case-insensitive substring match). This could produce false positives — e.g., an error message like "operation not found in registry" from a completely different layer would be treated as a 404. The comment acknowledges this is a heuristic, which is acceptable given the import cycle constraint, but consider wrapping gitea errors with a typed sentinel or checking only for the more specific "HTTP 404" prefix to reduce false positives.

**[MINOR]** `isNotFoundError` uses string-based heuristics (`"not found"` case-insensitive substring match). This could produce false positives — e.g., an error message like `"operation not found in registry"` from a completely different layer would be treated as a 404. The comment acknowledges this is a heuristic, which is acceptable given the import cycle constraint, but consider wrapping gitea errors with a typed sentinel or checking only for the more specific `"HTTP 404"` prefix to reduce false positives.
}
result[name] = p
Review

[MINOR] isNotFoundError uses string-substring matching on error messages (strings.Contains(err.Error(), "HTTP 404")). This is fragile and couples the review package to the specific error format produced by the gitea package. A more robust approach would be to define a sentinel error type (e.g., ErrNotFound) in the gitea package and use errors.Is here, or to have the GiteaClient interface return a typed error. The current approach is documented and tested, so it's functional, but it's a design smell.

**[MINOR]** `isNotFoundError` uses string-substring matching on error messages (`strings.Contains(err.Error(), "HTTP 404")`). This is fragile and couples the review package to the specific error format produced by the gitea package. A more robust approach would be to define a sentinel error type (e.g., `ErrNotFound`) in the gitea package and use `errors.Is` here, or to have the `GiteaClient` interface return a typed error. The current approach is documented and tested, so it's functional, but it's a design smell.
}
return result
}
Review

[NIT] isNotFoundError uses a string-based heuristic (strings.Contains(errStr, "not found")) that could produce false positives for errors like "user not found" from application logic. The HTTP 404 check is precise; the bare not found check is looser. This is acknowledged in the comment but worth considering whether to tighten to only the HTTP 404 pattern.

**[NIT]** `isNotFoundError` uses a string-based heuristic (`strings.Contains(errStr, "not found")`) that could produce false positives for errors like "user not found" from application logic. The `HTTP 404` check is precise; the bare `not found` check is looser. This is acknowledged in the comment but worth considering whether to tighten to only the `HTTP 404` pattern.
// isYAMLFile checks if a filename has a YAML extension.
func isYAMLFile(name string) bool {
lower := strings.ToLower(name)
return strings.HasSuffix(lower, ".yaml") || strings.HasSuffix(lower, ".yml")
}
// isNotFoundError checks if an error represents a 404 response.
Review

[MINOR] isNotFoundError relies on substring matching "HTTP 404", which may be brittle if the client error format changes. Consider checking a structured status code on the error (if available) or broadening the match without conflating auth/transport errors.

**[MINOR]** isNotFoundError relies on substring matching "HTTP 404", which may be brittle if the client error format changes. Consider checking a structured status code on the error (if available) or broadening the match without conflating auth/transport errors.
// This uses a specific "HTTP 404" substring match rather than a generic "not found"
// match to avoid masking authentication failures or transport errors that might
// contain "not found" in their message.
func isNotFoundError(err error) bool {
Review

[MINOR] isNotFoundError relies on substring matching ("HTTP 404"). This is brittle and could misclassify errors if messages change or include similar text, potentially masking non-404 conditions. Prefer checking a structured status code from the client (if available) or a typed error to avoid misclassification.

**[MINOR]** isNotFoundError relies on substring matching ("HTTP 404"). This is brittle and could misclassify errors if messages change or include similar text, potentially masking non-404 conditions. Prefer checking a structured status code from the client (if available) or a typed error to avoid misclassification.
if err == nil {
return false
Review

[MINOR] isNotFoundError relies on string matching (strings.Contains(err.Error(), "HTTP 404")) against the gitea client's error format. This is a brittle coupling: if the gitea package ever changes its error message format, this silently breaks (directory-not-found becomes a propagated error). A sentinel error or a typed error in the gitea package would be more robust, but given this is an internal package with a defined error format and the function is well-documented with the rationale, this is acceptable in context.

**[MINOR]** `isNotFoundError` relies on string matching (`strings.Contains(err.Error(), "HTTP 404")`) against the gitea client's error format. This is a brittle coupling: if the gitea package ever changes its error message format, this silently breaks (directory-not-found becomes a propagated error). A sentinel error or a typed error in the gitea package would be more robust, but given this is an internal package with a defined error format and the function is well-documented with the rationale, this is acceptable in context.
}
return strings.Contains(err.Error(), "HTTP 404")
}
+443
View File
@@ -0,0 +1,443 @@
package review
import (
"context"
"errors"
"strings"
"testing"
)
func TestParsePersonaBytes(t *testing.T) {
tests := []struct {
name string
data string
Review

[NIT] Minor style: extra blank line in the TestParsePersonaBytes test struct definition between wantName and wantErr fields. This is inconsistent with the other test structs in the file and would be flagged by gofmt.

**[NIT]** Minor style: extra blank line in the `TestParsePersonaBytes` test struct definition between `wantName` and `wantErr` fields. This is inconsistent with the other test structs in the file and would be flagged by `gofmt`.
source string
wantName string
wantErr string
Review

[NIT] Minor style nit: the struct literal in TestParsePersonaBytes has extra blank lines in field alignment (name, data, source, wantName, wantErr). This doesn't affect correctness but is inconsistent with Go's gofmt style.

**[NIT]** Minor style nit: the struct literal in `TestParsePersonaBytes` has extra blank lines in field alignment (`name`, `data`, `source`, `wantName`, `wantErr`). This doesn't affect correctness but is inconsistent with Go's `gofmt` style.
}{
{
Review

[NIT] Minor formatting inconsistency: the TestParsePersonaBytes table struct has inconsistent field alignment (extra blank line between source and wantName fields in some entries). This is cosmetic and gofmt should handle it, but it's visible in the raw source.

**[NIT]** Minor formatting inconsistency: the `TestParsePersonaBytes` table struct has inconsistent field alignment (extra blank line between `source` and `wantName` fields in some entries). This is cosmetic and `gofmt` should handle it, but it's visible in the raw source.
name: "valid yaml",
Review

[NIT] Trailing whitespace on the struct field alignment lines (extra spaces between name and string field definitions). Minor formatting issue — gofmt would normalize this.

**[NIT]** Trailing whitespace on the struct field alignment lines (extra spaces between `name` and `string` field definitions). Minor formatting issue — `gofmt` would normalize this.
data: `name: test
identity: test identity
focus:
- testing
`,
source: "test.yaml",
wantName: "test",
},
{
name: "missing name",
data: "identity: test\n",
source: "test.yaml",
wantErr: "name is required",
},
{
name: "invalid yaml",
data: "not: valid:\n yaml: [broken",
source: "test.yaml",
wantErr: "parse",
},
{
name: "json format by extension",
data: `{"name": "jsontest", "identity": "json identity"}`,
source: "test.json",
wantName: "jsontest",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p, err := ParsePersonaBytes([]byte(tt.data), tt.source)
if tt.wantErr != "" {
if err == nil {
t.Fatalf("expected error containing %q, got nil", tt.wantErr)
}
if !strings.Contains(err.Error(), tt.wantErr) {
t.Errorf("error = %q, want containing %q", err.Error(), tt.wantErr)
}
return
}
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if p.Name != tt.wantName {
t.Errorf("Name = %q, want %q", p.Name, tt.wantName)
}
})
}
}
// mockGiteaClient implements GiteaClient for testing.
type mockGiteaClient struct {
contents map[string][]ContentEntry // path -> entries
files map[string]string // path -> content
listErr error
fileErr map[string]error // path -> error
}
func (m *mockGiteaClient) ListContents(ctx context.Context, owner, repo, path string) ([]ContentEntry, error) {
if m.listErr != nil {
return nil, m.listErr
}
entries, ok := m.contents[path]
if !ok {
return nil, errors.New("list contents .review-bot/personas: HTTP 404: not found")
}
return entries, nil
}
func (m *mockGiteaClient) GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error) {
if m.fileErr != nil {
if err, ok := m.fileErr[filepath]; ok {
return "", err
}
}
content, ok := m.files[filepath]
if !ok {
return "", errors.New("HTTP 404: file not found")
}
return content, nil
}
func TestLoadRepoPersonas(t *testing.T) {
ctx := context.Background()
t.Run("directory not found returns empty map", func(t *testing.T) {
client := &mockGiteaClient{} // No contents configured -> 404
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if personas == nil {
t.Error("expected empty map, got nil")
}
if len(personas) != 0 {
t.Errorf("expected 0 personas, got %d", len(personas))
}
})
t.Run("empty directory returns empty map", func(t *testing.T) {
client := &mockGiteaClient{
contents: map[string][]ContentEntry{
RepoPersonaPath: {},
},
}
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(personas) != 0 {
t.Errorf("expected 0 personas, got %d", len(personas))
}
})
t.Run("loads valid personas", func(t *testing.T) {
client := &mockGiteaClient{
contents: map[string][]ContentEntry{
RepoPersonaPath: {
{Name: "trading.yaml", Path: ".review-bot/personas/trading.yaml", Type: "file"},
{Name: "crypto.yaml", Path: ".review-bot/personas/crypto.yaml", Type: "file"},
},
},
files: map[string]string{
".review-bot/personas/trading.yaml": `name: trading
display_name: Trading Expert
identity: You are a trading expert.
focus:
- order handling
- risk management
`,
".review-bot/personas/crypto.yaml": `name: crypto
display_name: Crypto Expert
identity: You are a cryptography expert.
focus:
- key management
- encryption
`,
},
}
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(personas) != 2 {
t.Fatalf("expected 2 personas, got %d", len(personas))
}
if personas["trading"] == nil {
t.Error("expected trading persona")
}
if personas["crypto"] == nil {
t.Error("expected crypto persona")
}
if personas["trading"].DisplayName != "Trading Expert" {
t.Errorf("trading display name = %q, want %q", personas["trading"].DisplayName, "Trading Expert")
}
})
t.Run("skips invalid persona files", func(t *testing.T) {
client := &mockGiteaClient{
contents: map[string][]ContentEntry{
RepoPersonaPath: {
{Name: "valid.yaml", Path: ".review-bot/personas/valid.yaml", Type: "file"},
{Name: "invalid.yaml", Path: ".review-bot/personas/invalid.yaml", Type: "file"},
},
},
files: map[string]string{
".review-bot/personas/valid.yaml": `name: valid
identity: Valid persona
`,
".review-bot/personas/invalid.yaml": "not valid yaml: [broken",
},
}
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// Should have the valid one, skip the invalid
if len(personas) != 1 {
t.Fatalf("expected 1 persona (skipped invalid), got %d", len(personas))
}
if personas["valid"] == nil {
t.Error("expected valid persona")
}
})
t.Run("skips non-yaml files", func(t *testing.T) {
client := &mockGiteaClient{
contents: map[string][]ContentEntry{
RepoPersonaPath: {
{Name: "persona.yaml", Path: ".review-bot/personas/persona.yaml", Type: "file"},
{Name: "README.md", Path: ".review-bot/personas/README.md", Type: "file"},
{Name: "notes.txt", Path: ".review-bot/personas/notes.txt", Type: "file"},
},
},
files: map[string]string{
".review-bot/personas/persona.yaml": `name: test
identity: Test persona
`,
".review-bot/personas/README.md": "# Personas\n\nPut your personas here.",
},
}
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(personas) != 1 {
t.Fatalf("expected 1 persona (yaml only), got %d", len(personas))
}
})
t.Run("skips subdirectories", func(t *testing.T) {
client := &mockGiteaClient{
contents: map[string][]ContentEntry{
RepoPersonaPath: {
{Name: "persona.yaml", Path: ".review-bot/personas/persona.yaml", Type: "file"},
{Name: "subdir", Path: ".review-bot/personas/subdir", Type: "dir"},
},
},
files: map[string]string{
".review-bot/personas/persona.yaml": `name: test
identity: Test persona
`,
},
}
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(personas) != 1 {
t.Fatalf("expected 1 persona (files only), got %d", len(personas))
}
})
t.Run("propagates auth errors", func(t *testing.T) {
client := &mockGiteaClient{
listErr: errors.New("HTTP 401: unauthorized"),
}
_, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err == nil {
t.Fatal("expected error for auth failure")
}
if !strings.Contains(err.Error(), "401") {
t.Errorf("error = %q, want containing '401'", err.Error())
}
})
t.Run("skips files that fail to fetch", func(t *testing.T) {
client := &mockGiteaClient{
contents: map[string][]ContentEntry{
RepoPersonaPath: {
{Name: "good.yaml", Path: ".review-bot/personas/good.yaml", Type: "file"},
{Name: "bad.yaml", Path: ".review-bot/personas/bad.yaml", Type: "file"},
},
},
files: map[string]string{
".review-bot/personas/good.yaml": `name: good
identity: Good persona
`,
},
fileErr: map[string]error{
".review-bot/personas/bad.yaml": errors.New("HTTP 500: internal server error"),
},
}
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(personas) != 1 {
t.Fatalf("expected 1 persona (skipped failed fetch), got %d", len(personas))
}
})
t.Run("skips oversized files", func(t *testing.T) {
// Create a content string that exceeds MaxPersonaFileSize (64KB)
oversizedContent := strings.Repeat("a", MaxPersonaFileSize+1)
client := &mockGiteaClient{
contents: map[string][]ContentEntry{
RepoPersonaPath: {
{Name: "normal.yaml", Path: ".review-bot/personas/normal.yaml", Type: "file"},
{Name: "huge.yaml", Path: ".review-bot/personas/huge.yaml", Type: "file"},
},
},
files: map[string]string{
".review-bot/personas/normal.yaml": `name: normal
identity: Normal sized persona
`,
".review-bot/personas/huge.yaml": oversizedContent,
},
}
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// Should have the normal one, skip the oversized
if len(personas) != 1 {
t.Fatalf("expected 1 persona (skipped oversized), got %d", len(personas))
}
if personas["normal"] == nil {
t.Error("expected normal persona")
}
})
}
func TestMergePersonas(t *testing.T) {
builtin := map[string]*Persona{
"security": {Name: "security", Identity: "Built-in security"},
"docs": {Name: "docs", Identity: "Built-in docs"},
}
repo := map[string]*Persona{
"security": {Name: "security", Identity: "Repo security override"},
"trading": {Name: "trading", Identity: "Repo trading"},
}
merged := MergePersonas(builtin, repo)
t.Run("repo overrides builtin on collision", func(t *testing.T) {
if merged["security"].Identity != "Repo security override" {
t.Errorf("security identity = %q, want repo override", merged["security"].Identity)
}
})
t.Run("builtin preserved when no collision", func(t *testing.T) {
if merged["docs"].Identity != "Built-in docs" {
t.Errorf("docs identity = %q, want built-in", merged["docs"].Identity)
}
})
t.Run("repo-only persona added", func(t *testing.T) {
if merged["trading"] == nil {
t.Error("expected trading persona from repo")
}
if merged["trading"].Identity != "Repo trading" {
t.Errorf("trading identity = %q, want repo", merged["trading"].Identity)
}
})
t.Run("original maps not modified", func(t *testing.T) {
if builtin["trading"] != nil {
t.Error("builtin map was modified")
}
if len(repo) != 2 {
t.Error("repo map was modified")
}
})
}
func TestGetBuiltinPersonasMap(t *testing.T) {
personas := GetBuiltinPersonasMap()
if len(personas) == 0 {
t.Fatal("expected at least one built-in persona")
}
// Verify expected personas exist
expected := []string{"security", "architect", "docs"}
for _, name := range expected {
if personas[name] == nil {
t.Errorf("expected built-in persona %q", name)
}
}
// Verify personas are valid
for name, p := range personas {
if p.Name != name {
t.Errorf("persona %q has mismatched name %q", name, p.Name)
}
if p.Identity == "" {
t.Errorf("persona %q has empty identity", name)
}
}
}
func TestIsYAMLFile(t *testing.T) {
tests := []struct {
name string
want bool
}{
{"test.yaml", true},
{"test.yml", true},
{"test.YAML", true},
{"test.YML", true},
{"test.json", false},
{"test.md", false},
{"test.txt", false},
{"yaml", false},
{"yaml.md", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := isYAMLFile(tt.name); got != tt.want {
t.Errorf("isYAMLFile(%q) = %v, want %v", tt.name, got, tt.want)
}
})
}
}
func TestIsNotFoundError(t *testing.T) {
tests := []struct {
err error
want bool
}{
{nil, false},
{errors.New("HTTP 404: not found"), true},
{errors.New("HTTP 404"), true},
// Intentionally false: generic "not found" could mask auth/transport errors.
// Only explicit HTTP 404 responses should be treated as "directory doesn't exist".
{errors.New("something not found"), false},
{errors.New("HTTP 401: unauthorized"), false},
{errors.New("connection refused"), false},
}
for _, tt := range tests {
name := "nil"
if tt.err != nil {
name = tt.err.Error()
}
t.Run(name, func(t *testing.T) {
if got := isNotFoundError(tt.err); got != tt.want {
t.Errorf("isNotFoundError(%v) = %v, want %v", tt.err, got, tt.want)
}
})
}
}