From 9775cb098cdd8788de36f505de328fce6e7dc33a Mon Sep 17 00:00:00 2001 From: Rodin Date: Sun, 10 May 2026 19:29:13 -0700 Subject: [PATCH] fix: address PR #61 review findings MAJOR: - LoadRepoPersonas: add MaxPersonaFileSize check before parsing to prevent resource exhaustion from oversized YAML files committed to target repositories MINOR: - isNotFoundError: tighten substring match to 'HTTP 404' only to avoid masking auth/transport errors containing generic 'not found' - main.go: remove duplicate flag.Parse() call - main.go: add comment explaining nil map indexing is safe in Go when LoadRepoPersonas returns an error Tests updated to reflect the intentional behavior change in isNotFoundError and added test case for oversized file rejection. --- cmd/review-bot/main.go | 5 +++-- review/repo_persona.go | 18 +++++++++++++++--- review/repo_persona_test.go | 35 ++++++++++++++++++++++++++++++++++- 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go index 238c9d8..bc0bea7 100644 --- a/cmd/review-bot/main.go +++ b/cmd/review-bot/main.go @@ -79,7 +79,6 @@ func main() { aicoreAPIURL := flag.String("aicore-api-url", envOrDefault("AICORE_API_URL", ""), "SAP AI Core API URL (for provider=aicore)") aicoreResourceGroup := flag.String("aicore-resource-group", envOrDefault("AICORE_RESOURCE_GROUP", "default"), "SAP AI Core resource group (for provider=aicore)") - flag.Parse() flag.Parse() if *versionFlag { @@ -181,7 +180,9 @@ func main() { repoPersonas, err := review.LoadRepoPersonas(ctx, newGiteaClientAdapter(giteaClient), owner, repoName) if err != nil { slog.Warn("could not load repo personas", "repo", owner+"/"+repoName, "error", err) - // Continue with built-in personas only + // Continue with built-in personas only. + // NOTE: repoPersonas is nil here, but map indexing on a nil map is safe in Go + // (returns the zero value), so the fallback to built-in below works correctly. } if p, ok := repoPersonas[*personaName]; ok { persona = p diff --git a/review/repo_persona.go b/review/repo_persona.go index 22735a0..58c0b9d 100644 --- a/review/repo_persona.go +++ b/review/repo_persona.go @@ -28,6 +28,7 @@ type ContentEntry struct { // Returns an empty map (not nil) if the directory doesn't exist or is empty. // Individual parse failures are logged and skipped; the remaining personas are still returned. // Auth errors and other non-404 errors are propagated. +// Files exceeding MaxPersonaFileSize are rejected to prevent resource exhaustion. func LoadRepoPersonas(ctx context.Context, client GiteaClient, owner, repo string) (map[string]*Persona, error) { result := make(map[string]*Persona) @@ -65,6 +66,16 @@ func LoadRepoPersonas(ctx context.Context, client GiteaClient, owner, repo strin continue } + // Enforce size limit before parsing to prevent resource exhaustion + if len(content) > MaxPersonaFileSize { + slog.Warn("repo persona file exceeds maximum size", + "file", entry.Path, + "repo", owner+"/"+repo, + "size", len(content), + "max", MaxPersonaFileSize) + continue + } + persona, err := ParsePersonaBytes([]byte(content), entry.Path) if err != nil { slog.Warn("could not parse repo persona file", @@ -128,11 +139,12 @@ func isYAMLFile(name string) bool { } // isNotFoundError checks if an error represents a 404 response. -// This is a string-based heuristic since we don't have access to gitea.APIError here. +// This uses a specific "HTTP 404" substring match rather than a generic "not found" +// match to avoid masking authentication failures or transport errors that might +// contain "not found" in their message. func isNotFoundError(err error) bool { if err == nil { return false } - errStr := err.Error() - return strings.Contains(errStr, "HTTP 404") || strings.Contains(errStr, "not found") + return strings.Contains(err.Error(), "HTTP 404") } diff --git a/review/repo_persona_test.go b/review/repo_persona_test.go index 89dbcea..56b0a59 100644 --- a/review/repo_persona_test.go +++ b/review/repo_persona_test.go @@ -288,6 +288,36 @@ identity: Good persona t.Fatalf("expected 1 persona (skipped failed fetch), got %d", len(personas)) } }) + + t.Run("skips oversized files", func(t *testing.T) { + // Create a content string that exceeds MaxPersonaFileSize (64KB) + oversizedContent := strings.Repeat("a", MaxPersonaFileSize+1) + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: { + {Name: "normal.yaml", Path: ".review-bot/personas/normal.yaml", Type: "file"}, + {Name: "huge.yaml", Path: ".review-bot/personas/huge.yaml", Type: "file"}, + }, + }, + files: map[string]string{ + ".review-bot/personas/normal.yaml": `name: normal +identity: Normal sized persona +`, + ".review-bot/personas/huge.yaml": oversizedContent, + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + // Should have the normal one, skip the oversized + if len(personas) != 1 { + t.Fatalf("expected 1 persona (skipped oversized), got %d", len(personas)) + } + if personas["normal"] == nil { + t.Error("expected normal persona") + } + }) } func TestMergePersonas(t *testing.T) { @@ -391,7 +421,10 @@ func TestIsNotFoundError(t *testing.T) { }{ {nil, false}, {errors.New("HTTP 404: not found"), true}, - {errors.New("something not found"), true}, + {errors.New("HTTP 404"), true}, + // Intentionally false: generic "not found" could mask auth/transport errors. + // Only explicit HTTP 404 responses should be treated as "directory doesn't exist". + {errors.New("something not found"), false}, {errors.New("HTTP 401: unauthorized"), false}, {errors.New("connection refused"), false}, }