feat: load personas from target repo .review-bot/personas/ #61

Merged
aweiker merged 4 commits from issue-60 into main 2026-05-11 02:54:46 +00:00
3 changed files with 52 additions and 6 deletions
Showing only changes of commit 9775cb098c - Show all commits
+3 -2
View File
@@ -79,7 +79,6 @@ func main() {
aicoreAPIURL := flag.String("aicore-api-url", envOrDefault("AICORE_API_URL", ""), "SAP AI Core API URL (for provider=aicore)")
aicoreResourceGroup := flag.String("aicore-resource-group", envOrDefault("AICORE_RESOURCE_GROUP", "default"), "SAP AI Core resource group (for provider=aicore)")
Review

[MINOR] Duplicate flag.Parse() call — the diff shows the old flag.Parse() was removed but the remaining one on line 81 is a leftover duplicate. The full file content shows flag.Parse() appears twice consecutively (lines 80-81). This is a copy-paste artifact from the diff. It is harmless since repeated flag.Parse() calls are idempotent, but it's noise and should be cleaned up.

**[MINOR]** Duplicate `flag.Parse()` call — the diff shows the old `flag.Parse()` was removed but the remaining one on line 81 is a leftover duplicate. The full file content shows `flag.Parse()` appears twice consecutively (lines 80-81). This is a copy-paste artifact from the diff. It is harmless since repeated `flag.Parse()` calls are idempotent, but it's noise and should be cleaned up.
flag.Parse()
flag.Parse()
Review

[MINOR] Duplicate flag.Parse() call: the diff shows the original flag.Parse() was removed and a new one kept, but the full file content shows flag.Parse() appears only once. This appears to be a diff artifact (the - removed one and the context line shows the kept one). Verify the compiled binary doesn't call flag.Parse() twice — if it does, this should be fixed as double-parsing has undefined behavior for some flag types.

**[MINOR]** Duplicate `flag.Parse()` call: the diff shows the original `flag.Parse()` was removed and a new one kept, but the full file content shows `flag.Parse()` appears only once. This appears to be a diff artifact (the `-` removed one and the context line shows the kept one). Verify the compiled binary doesn't call `flag.Parse()` twice — if it does, this should be fixed as double-parsing has undefined behavior for some flag types.
if *versionFlag {
1
@@ -181,7 +180,9 @@ func main() {
repoPersonas, err := review.LoadRepoPersonas(ctx, newGiteaClientAdapter(giteaClient), owner, repoName)
if err != nil {
slog.Warn("could not load repo personas", "repo", owner+"/"+repoName, "error", err)
// Continue with built-in personas only
// Continue with built-in personas only.
Outdated
Review

[MINOR] When LoadRepoPersonas returns an error (non-404), the code logs a warning and continues with repoPersonas being nil (its zero value). The subsequent if p, ok := repoPersonas[*personaName] safely handles nil maps (map indexing on nil returns zero value), so this works correctly, but it's subtle. A reader might wonder if repoPersonas could cause a panic. Consider initializing repoPersonas to an empty map before the error check, or adding a brief comment that nil map indexing is safe in Go.

**[MINOR]** When `LoadRepoPersonas` returns an error (non-404), the code logs a warning and continues with `repoPersonas` being nil (its zero value). The subsequent `if p, ok := repoPersonas[*personaName]` safely handles nil maps (map indexing on nil returns zero value), so this works correctly, but it's subtle. A reader might wonder if `repoPersonas` could cause a panic. Consider initializing `repoPersonas` to an empty map before the error check, or adding a brief comment that nil map indexing is safe in Go.
Review

[MINOR] When LoadRepoPersonas returns an error, repoPersonas is nil. The comment correctly notes that map indexing on a nil map is safe in Go (returns zero value). However, err from LoadRepoPersonas is shadowed — the outer var persona *review.Persona block has no err declaration, so the err used in persona, err = review.LoadBuiltinPersona(...) on ~line 196 is an undeclared variable in the else branch. Check that the err variable is properly scoped here — it may need a short variable declaration (:=) in the else branch.

**[MINOR]** When `LoadRepoPersonas` returns an error, `repoPersonas` is nil. The comment correctly notes that map indexing on a nil map is safe in Go (returns zero value). However, `err` from `LoadRepoPersonas` is shadowed — the outer `var persona *review.Persona` block has no `err` declaration, so the `err` used in `persona, err = review.LoadBuiltinPersona(...)` on ~line 196 is an undeclared variable in the else branch. Check that the `err` variable is properly scoped here — it may need a short variable declaration (`:=`) in the else branch.
// NOTE: repoPersonas is nil here, but map indexing on a nil map is safe in Go
// (returns the zero value), so the fallback to built-in below works correctly.
Outdated
Review

[MINOR] When LoadRepoPersonas returns an error, repoPersonas will be nil (not an empty map), but the subsequent repoPersonas[*personaName] map index on line ~187 is safe in Go (indexing a nil map returns the zero value, false). However, the err variable from LoadRepoPersonas is shadowed by the later persona, err = review.LoadBuiltinPersona(...) assignment at line ~193, which is correct since the outer err is already declared. The logic is sound but slightly subtle — consider using a blank identifier or a named variable repoErr to make the intent clearer.

**[MINOR]** When `LoadRepoPersonas` returns an error, `repoPersonas` will be nil (not an empty map), but the subsequent `repoPersonas[*personaName]` map index on line ~187 is safe in Go (indexing a nil map returns the zero value, false). However, the `err` variable from `LoadRepoPersonas` is shadowed by the later `persona, err = review.LoadBuiltinPersona(...)` assignment at line ~193, which is correct since the outer `err` is already declared. The logic is sound but slightly subtle — consider using a blank identifier or a named variable `repoErr` to make the intent clearer.
Review

[MINOR] When LoadRepoPersonas returns an error, repoPersonas is nil and the code falls through to repoPersonas[*personaName]. The comment correctly notes this is safe in Go (nil map read returns zero value), but the variable err is declared via repoPersonas, err := ... and then reassigned in the else branch with persona, err = review.LoadBuiltinPersona(...). This works due to Go's scoping rules but the comment about nil map safety would be clearer if placed directly above the map indexing line rather than after the Warn log.

**[MINOR]** When `LoadRepoPersonas` returns an error, `repoPersonas` is `nil` and the code falls through to `repoPersonas[*personaName]`. The comment correctly notes this is safe in Go (nil map read returns zero value), but the variable `err` is declared via `repoPersonas, err := ...` and then reassigned in the `else` branch with `persona, err = review.LoadBuiltinPersona(...)`. This works due to Go's scoping rules but the comment about nil map safety would be clearer if placed directly above the map indexing line rather than after the Warn log.
}
if p, ok := repoPersonas[*personaName]; ok {
persona = p
1
+15 -3
View File
1
@@ -28,6 +28,7 @@ type ContentEntry struct {
// Returns an empty map (not nil) if the directory doesn't exist or is empty.
// Individual parse failures are logged and skipped; the remaining personas are still returned.
// Auth errors and other non-404 errors are propagated.
// Files exceeding MaxPersonaFileSize are rejected to prevent resource exhaustion.
func LoadRepoPersonas(ctx context.Context, client GiteaClient, owner, repo string) (map[string]*Persona, error) {
result := make(map[string]*Persona)
6
@@ -65,6 +66,16 @@ func LoadRepoPersonas(ctx context.Context, client GiteaClient, owner, repo strin
continue
}
// Enforce size limit before parsing to prevent resource exhaustion
if len(content) > MaxPersonaFileSize {
Review

[MINOR] Persona file size is validated only after fetching full content from the server. An attacker with write access to the default branch could commit very large YAML files and cause increased memory usage when GetFileContent returns them. Consider adding a cap on the number of files processed and/or a total bytes budget, and ensure server/client-side limits or streaming are used where possible.

**[MINOR]** Persona file size is validated only after fetching full content from the server. An attacker with write access to the default branch could commit very large YAML files and cause increased memory usage when GetFileContent returns them. Consider adding a cap on the number of files processed and/or a total bytes budget, and ensure server/client-side limits or streaming are used where possible.
slog.Warn("repo persona file exceeds maximum size",
"file", entry.Path,
"repo", owner+"/"+repo,
"size", len(content),
"max", MaxPersonaFileSize)
continue
}
persona, err := ParsePersonaBytes([]byte(content), entry.Path)
if err != nil {
Review

[NIT] MergePersonas is defined but not used in main.go — the main flow loads repo personas and then falls back to built-in individually, rather than merging all of them first. MergePersonas is a useful utility and is tested, but consider whether it should be used in main (load all built-ins + all repo personas into a merged map, then look up) for consistency and simplicity. As-is, the main flow only fetches repo personas on every invocation when --persona is specified, even though the built-in fallback path doesn't benefit from pre-loading all built-ins.

**[NIT]** `MergePersonas` is defined but not used in `main.go` — the main flow loads repo personas and then falls back to built-in individually, rather than merging all of them first. `MergePersonas` is a useful utility and is tested, but consider whether it should be used in main (load all built-ins + all repo personas into a merged map, then look up) for consistency and simplicity. As-is, the main flow only fetches repo personas on every invocation when `--persona` is specified, even though the built-in fallback path doesn't benefit from pre-loading all built-ins.
slog.Warn("could not parse repo persona file",
9
@@ -128,11 +139,12 @@ func isYAMLFile(name string) bool {
}
// isNotFoundError checks if an error represents a 404 response.
Review

[MINOR] isNotFoundError relies on substring matching "HTTP 404", which may be brittle if the client error format changes. Consider checking a structured status code on the error (if available) or broadening the match without conflating auth/transport errors.

**[MINOR]** isNotFoundError relies on substring matching "HTTP 404", which may be brittle if the client error format changes. Consider checking a structured status code on the error (if available) or broadening the match without conflating auth/transport errors.
// This is a string-based heuristic since we don't have access to gitea.APIError here.
// This uses a specific "HTTP 404" substring match rather than a generic "not found"
// match to avoid masking authentication failures or transport errors that might
// contain "not found" in their message.
func isNotFoundError(err error) bool {
Review

[MINOR] isNotFoundError relies on substring matching ("HTTP 404"). This is brittle and could misclassify errors if messages change or include similar text, potentially masking non-404 conditions. Prefer checking a structured status code from the client (if available) or a typed error to avoid misclassification.

**[MINOR]** isNotFoundError relies on substring matching ("HTTP 404"). This is brittle and could misclassify errors if messages change or include similar text, potentially masking non-404 conditions. Prefer checking a structured status code from the client (if available) or a typed error to avoid misclassification.
if err == nil {
return false
Review

[MINOR] isNotFoundError relies on string matching (strings.Contains(err.Error(), "HTTP 404")) against the gitea client's error format. This is a brittle coupling: if the gitea package ever changes its error message format, this silently breaks (directory-not-found becomes a propagated error). A sentinel error or a typed error in the gitea package would be more robust, but given this is an internal package with a defined error format and the function is well-documented with the rationale, this is acceptable in context.

**[MINOR]** `isNotFoundError` relies on string matching (`strings.Contains(err.Error(), "HTTP 404")`) against the gitea client's error format. This is a brittle coupling: if the gitea package ever changes its error message format, this silently breaks (directory-not-found becomes a propagated error). A sentinel error or a typed error in the gitea package would be more robust, but given this is an internal package with a defined error format and the function is well-documented with the rationale, this is acceptable in context.
}
errStr := err.Error()
return strings.Contains(errStr, "HTTP 404") || strings.Contains(errStr, "not found")
return strings.Contains(err.Error(), "HTTP 404")
}
+34 -1
View File
4
@@ -288,6 +288,36 @@ identity: Good persona
t.Fatalf("expected 1 persona (skipped failed fetch), got %d", len(personas))
}
})
t.Run("skips oversized files", func(t *testing.T) {
// Create a content string that exceeds MaxPersonaFileSize (64KB)
oversizedContent := strings.Repeat("a", MaxPersonaFileSize+1)
client := &mockGiteaClient{
contents: map[string][]ContentEntry{
RepoPersonaPath: {
{Name: "normal.yaml", Path: ".review-bot/personas/normal.yaml", Type: "file"},
{Name: "huge.yaml", Path: ".review-bot/personas/huge.yaml", Type: "file"},
},
},
files: map[string]string{
".review-bot/personas/normal.yaml": `name: normal
identity: Normal sized persona
`,
".review-bot/personas/huge.yaml": oversizedContent,
},
}
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// Should have the normal one, skip the oversized
if len(personas) != 1 {
t.Fatalf("expected 1 persona (skipped oversized), got %d", len(personas))
}
if personas["normal"] == nil {
t.Error("expected normal persona")
}
})
}
func TestMergePersonas(t *testing.T) {
@@ -391,7 +421,10 @@ func TestIsNotFoundError(t *testing.T) {
}{
{nil, false},
{errors.New("HTTP 404: not found"), true},
{errors.New("something not found"), true},
{errors.New("HTTP 404"), true},
// Intentionally false: generic "not found" could mask auth/transport errors.
// Only explicit HTTP 404 responses should be treated as "directory doesn't exist".
{errors.New("something not found"), false},
{errors.New("HTTP 401: unauthorized"), false},
{errors.New("connection refused"), false},
}