From 3f06ba2ea6e93753e874827f1ffc4f60ee1966f8 Mon Sep 17 00:00:00 2001 From: Rodin Date: Sun, 10 May 2026 19:05:37 -0700 Subject: [PATCH 1/4] feat: load personas from target repo .review-bot/personas/ Implements #60. - Add ParsePersonaBytes() for parsing personas from byte data - Add LoadRepoPersonas() to fetch personas from repo via Gitea API - Add MergePersonas() to combine built-in and repo personas - Add GetBuiltinPersonasMap() helper - Update main.go to load repo personas first, fall back to built-in - Add giteaClientAdapter to bridge gitea.Client to review.GiteaClient When --persona is specified, the bot now: 1. Attempts to fetch personas from .review-bot/personas/*.yaml 2. If the named persona exists in the repo, uses it 3. Otherwise falls back to built-in personas This allows repos to define domain-specific personas (e.g., trading experts for gargoyle, crypto experts for kms-lite) without modifying the review-bot codebase. --- cmd/review-bot/main.go | 88 ++++++-- review/persona.go | 7 + review/repo_persona.go | 138 ++++++++++++ review/repo_persona_test.go | 410 ++++++++++++++++++++++++++++++++++++ 4 files changed, 620 insertions(+), 23 deletions(-) create mode 100644 review/repo_persona.go create mode 100644 review/repo_persona_test.go diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go index 24e678d..238c9d8 100644 --- a/cmd/review-bot/main.go +++ b/cmd/review-bot/main.go @@ -116,29 +116,7 @@ func main() { os.Exit(1) } - // Load persona if specified - var persona *review.Persona - if *personaName != "" { - var err error - persona, err = review.LoadBuiltinPersona(*personaName) - if err != nil { - slog.Error("failed to load persona", "persona", *personaName, "error", err) - os.Exit(1) - } - slog.Info("loaded built-in persona", "persona", persona.Name, "display", persona.DisplayName) - } else if *personaFile != "" { - resolvedPath, err := validateWorkspacePath(*personaFile, "persona-file") - if err != nil { - slog.Error("invalid persona-file path", "error", err) - os.Exit(1) - } - persona, err = review.LoadPersona(resolvedPath) - if err != nil { - slog.Error("failed to load persona file", "file", *personaFile, "error", err) - os.Exit(1) - } - slog.Info("loaded persona from file", "file", *personaFile, "persona", persona.Name) - } + // NOTE: Persona loading deferred until after Gitea client init to support repo personas // Validate reviewer-name: only safe characters allowed in sentinel if err := validateReviewerName(*reviewerName); err != nil { @@ -196,6 +174,41 @@ func main() { ctx, cancel := context.WithTimeout(context.Background(), overallTimeout) defer cancel() + // Load persona if specified (after Gitea client init to support repo personas) + var persona *review.Persona + if *personaName != "" { + // Try loading from repo first, then fall back to built-in + repoPersonas, err := review.LoadRepoPersonas(ctx, newGiteaClientAdapter(giteaClient), owner, repoName) + if err != nil { + slog.Warn("could not load repo personas", "repo", owner+"/"+repoName, "error", err) + // Continue with built-in personas only + } + if p, ok := repoPersonas[*personaName]; ok { + persona = p + slog.Info("loaded repo persona", "persona", persona.Name, "display", persona.DisplayName, "repo", owner+"/"+repoName) + } else { + // Fall back to built-in + persona, err = review.LoadBuiltinPersona(*personaName) + if err != nil { + slog.Error("failed to load persona", "persona", *personaName, "error", err) + os.Exit(1) + } + slog.Info("loaded built-in persona", "persona", persona.Name, "display", persona.DisplayName) + } + } else if *personaFile != "" { + resolvedPath, err := validateWorkspacePath(*personaFile, "persona-file") + if err != nil { + slog.Error("invalid persona-file path", "error", err) + os.Exit(1) + } + persona, err = review.LoadPersona(resolvedPath) + if err != nil { + slog.Error("failed to load persona file", "file", *personaFile, "error", err) + os.Exit(1) + } + slog.Info("loaded persona from file", "file", *personaFile, "persona", persona.Name) + } + slog.Info("reviewing pull request", "pr", prNumber, "repo", fmt.Sprintf("%s/%s", owner, repoName)) // Step 1: Fetch PR metadata @@ -783,3 +796,32 @@ func shouldSkipStaleReview(evaluatedSHA, currentSHA string) bool { } return evaluatedSHA != currentSHA } + +// giteaClientAdapter adapts gitea.Client to review.GiteaClient interface. +type giteaClientAdapter struct { + client *gitea.Client +} + +func newGiteaClientAdapter(c *gitea.Client) *giteaClientAdapter { + return &giteaClientAdapter{client: c} +} + +func (a *giteaClientAdapter) ListContents(ctx context.Context, owner, repo, path string) ([]review.ContentEntry, error) { + entries, err := a.client.ListContents(ctx, owner, repo, path) + if err != nil { + return nil, err + } + result := make([]review.ContentEntry, len(entries)) + for i, e := range entries { + result[i] = review.ContentEntry{ + Name: e.Name, + Path: e.Path, + Type: e.Type, + } + } + return result, nil +} + +func (a *giteaClientAdapter) GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error) { + return a.client.GetFileContent(ctx, owner, repo, filepath) +} diff --git a/review/persona.go b/review/persona.go index 0f7f141..329aa78 100644 --- a/review/persona.go +++ b/review/persona.go @@ -224,6 +224,13 @@ func checkYAMLDepth(node *yaml.Node, depth, maxDepth, maxNodes int, seen map[*ya return nil } +// ParsePersonaBytes parses persona data from bytes with a source label for errors. +// This is useful for parsing personas fetched from external sources (e.g., Gitea API) +// without requiring filesystem access. Format is detected by source extension. +func ParsePersonaBytes(data []byte, source string) (*Persona, error) { + return parsePersona(data, source) +} + func validatePersona(p *Persona, source string) error { if p.Name == "" { return fmt.Errorf("persona %s: name is required", source) diff --git a/review/repo_persona.go b/review/repo_persona.go new file mode 100644 index 0000000..22735a0 --- /dev/null +++ b/review/repo_persona.go @@ -0,0 +1,138 @@ +package review + +import ( + "context" + "log/slog" + "strings" +) + +// RepoPersonaPath is the directory path where repo-specific personas are stored. +const RepoPersonaPath = ".review-bot/personas" + +// GiteaClient defines the subset of gitea.Client methods needed for loading repo personas. +// This interface allows for easier testing and decouples the review package from gitea. +type GiteaClient interface { + ListContents(ctx context.Context, owner, repo, path string) ([]ContentEntry, error) + GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error) +} + +// ContentEntry represents a file or directory entry from the contents API. +// This mirrors gitea.ContentEntry to avoid import cycles. +type ContentEntry struct { + Name string `json:"name"` + Path string `json:"path"` + Type string `json:"type"` // "file" or "dir" +} + +// LoadRepoPersonas fetches personas from a repository's .review-bot/personas/ directory. +// Returns an empty map (not nil) if the directory doesn't exist or is empty. +// Individual parse failures are logged and skipped; the remaining personas are still returned. +// Auth errors and other non-404 errors are propagated. +func LoadRepoPersonas(ctx context.Context, client GiteaClient, owner, repo string) (map[string]*Persona, error) { + result := make(map[string]*Persona) + + entries, err := client.ListContents(ctx, owner, repo, RepoPersonaPath) + if err != nil { + // Check if this is a 404 (directory doesn't exist) - expected case + if isNotFoundError(err) { + slog.Debug("no repo personas directory found", "repo", owner+"/"+repo) + return result, nil + } + // Other errors (auth, server) should propagate + return nil, err + } + + if len(entries) == 0 { + slog.Debug("repo personas directory is empty", "repo", owner+"/"+repo) + return result, nil + } + + for _, entry := range entries { + if entry.Type != "file" { + continue + } + // Only process YAML files + if !isYAMLFile(entry.Name) { + continue + } + + content, err := client.GetFileContent(ctx, owner, repo, entry.Path) + if err != nil { + slog.Warn("could not fetch repo persona file", + "file", entry.Path, + "repo", owner+"/"+repo, + "error", err) + continue + } + + persona, err := ParsePersonaBytes([]byte(content), entry.Path) + if err != nil { + slog.Warn("could not parse repo persona file", + "file", entry.Path, + "repo", owner+"/"+repo, + "error", err) + continue + } + + result[persona.Name] = persona + slog.Debug("loaded repo persona", + "name", persona.Name, + "file", entry.Path, + "repo", owner+"/"+repo) + } + + return result, nil +} + +// MergePersonas combines built-in personas with repo personas. +// Repo personas take precedence on name collision. +// Returns a new map; inputs are not modified. +func MergePersonas(builtin, repo map[string]*Persona) map[string]*Persona { + result := make(map[string]*Persona, len(builtin)+len(repo)) + + // Copy built-in personas first + for name, p := range builtin { + result[name] = p + } + + // Overlay repo personas (override on collision) + for name, p := range repo { + if _, exists := result[name]; exists { + slog.Debug("repo persona overrides built-in", "name", name) + } + result[name] = p + } + + return result +} + +// GetBuiltinPersonasMap returns all built-in personas as a map keyed by name. +// Returns an empty map (not nil) if loading fails. +func GetBuiltinPersonasMap() map[string]*Persona { + result := make(map[string]*Persona) + for _, name := range ListBuiltinPersonas() { + p, err := LoadBuiltinPersona(name) + if err != nil { + slog.Warn("could not load built-in persona", "name", name, "error", err) + continue + } + result[name] = p + } + return result +} + +// isYAMLFile checks if a filename has a YAML extension. +func isYAMLFile(name string) bool { + lower := strings.ToLower(name) + return strings.HasSuffix(lower, ".yaml") || strings.HasSuffix(lower, ".yml") +} + +// isNotFoundError checks if an error represents a 404 response. +// This is a string-based heuristic since we don't have access to gitea.APIError here. +func isNotFoundError(err error) bool { + if err == nil { + return false + } + errStr := err.Error() + return strings.Contains(errStr, "HTTP 404") || strings.Contains(errStr, "not found") +} diff --git a/review/repo_persona_test.go b/review/repo_persona_test.go new file mode 100644 index 0000000..89dbcea --- /dev/null +++ b/review/repo_persona_test.go @@ -0,0 +1,410 @@ +package review + +import ( + "context" + "errors" + "strings" + "testing" +) + +func TestParsePersonaBytes(t *testing.T) { + tests := []struct { + name string + data string + source string + wantName string + wantErr string + }{ + { + name: "valid yaml", + data: `name: test +identity: test identity +focus: + - testing +`, + source: "test.yaml", + wantName: "test", + }, + { + name: "missing name", + data: "identity: test\n", + source: "test.yaml", + wantErr: "name is required", + }, + { + name: "invalid yaml", + data: "not: valid:\n yaml: [broken", + source: "test.yaml", + wantErr: "parse", + }, + { + name: "json format by extension", + data: `{"name": "jsontest", "identity": "json identity"}`, + source: "test.json", + wantName: "jsontest", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p, err := ParsePersonaBytes([]byte(tt.data), tt.source) + if tt.wantErr != "" { + if err == nil { + t.Fatalf("expected error containing %q, got nil", tt.wantErr) + } + if !strings.Contains(err.Error(), tt.wantErr) { + t.Errorf("error = %q, want containing %q", err.Error(), tt.wantErr) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if p.Name != tt.wantName { + t.Errorf("Name = %q, want %q", p.Name, tt.wantName) + } + }) + } +} + +// mockGiteaClient implements GiteaClient for testing. +type mockGiteaClient struct { + contents map[string][]ContentEntry // path -> entries + files map[string]string // path -> content + listErr error + fileErr map[string]error // path -> error +} + +func (m *mockGiteaClient) ListContents(ctx context.Context, owner, repo, path string) ([]ContentEntry, error) { + if m.listErr != nil { + return nil, m.listErr + } + entries, ok := m.contents[path] + if !ok { + return nil, errors.New("list contents .review-bot/personas: HTTP 404: not found") + } + return entries, nil +} + +func (m *mockGiteaClient) GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error) { + if m.fileErr != nil { + if err, ok := m.fileErr[filepath]; ok { + return "", err + } + } + content, ok := m.files[filepath] + if !ok { + return "", errors.New("HTTP 404: file not found") + } + return content, nil +} + +func TestLoadRepoPersonas(t *testing.T) { + ctx := context.Background() + + t.Run("directory not found returns empty map", func(t *testing.T) { + client := &mockGiteaClient{} // No contents configured -> 404 + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if personas == nil { + t.Error("expected empty map, got nil") + } + if len(personas) != 0 { + t.Errorf("expected 0 personas, got %d", len(personas)) + } + }) + + t.Run("empty directory returns empty map", func(t *testing.T) { + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: {}, + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(personas) != 0 { + t.Errorf("expected 0 personas, got %d", len(personas)) + } + }) + + t.Run("loads valid personas", func(t *testing.T) { + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: { + {Name: "trading.yaml", Path: ".review-bot/personas/trading.yaml", Type: "file"}, + {Name: "crypto.yaml", Path: ".review-bot/personas/crypto.yaml", Type: "file"}, + }, + }, + files: map[string]string{ + ".review-bot/personas/trading.yaml": `name: trading +display_name: Trading Expert +identity: You are a trading expert. +focus: + - order handling + - risk management +`, + ".review-bot/personas/crypto.yaml": `name: crypto +display_name: Crypto Expert +identity: You are a cryptography expert. +focus: + - key management + - encryption +`, + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(personas) != 2 { + t.Fatalf("expected 2 personas, got %d", len(personas)) + } + if personas["trading"] == nil { + t.Error("expected trading persona") + } + if personas["crypto"] == nil { + t.Error("expected crypto persona") + } + if personas["trading"].DisplayName != "Trading Expert" { + t.Errorf("trading display name = %q, want %q", personas["trading"].DisplayName, "Trading Expert") + } + }) + + t.Run("skips invalid persona files", func(t *testing.T) { + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: { + {Name: "valid.yaml", Path: ".review-bot/personas/valid.yaml", Type: "file"}, + {Name: "invalid.yaml", Path: ".review-bot/personas/invalid.yaml", Type: "file"}, + }, + }, + files: map[string]string{ + ".review-bot/personas/valid.yaml": `name: valid +identity: Valid persona +`, + ".review-bot/personas/invalid.yaml": "not valid yaml: [broken", + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + // Should have the valid one, skip the invalid + if len(personas) != 1 { + t.Fatalf("expected 1 persona (skipped invalid), got %d", len(personas)) + } + if personas["valid"] == nil { + t.Error("expected valid persona") + } + }) + + t.Run("skips non-yaml files", func(t *testing.T) { + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: { + {Name: "persona.yaml", Path: ".review-bot/personas/persona.yaml", Type: "file"}, + {Name: "README.md", Path: ".review-bot/personas/README.md", Type: "file"}, + {Name: "notes.txt", Path: ".review-bot/personas/notes.txt", Type: "file"}, + }, + }, + files: map[string]string{ + ".review-bot/personas/persona.yaml": `name: test +identity: Test persona +`, + ".review-bot/personas/README.md": "# Personas\n\nPut your personas here.", + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(personas) != 1 { + t.Fatalf("expected 1 persona (yaml only), got %d", len(personas)) + } + }) + + t.Run("skips subdirectories", func(t *testing.T) { + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: { + {Name: "persona.yaml", Path: ".review-bot/personas/persona.yaml", Type: "file"}, + {Name: "subdir", Path: ".review-bot/personas/subdir", Type: "dir"}, + }, + }, + files: map[string]string{ + ".review-bot/personas/persona.yaml": `name: test +identity: Test persona +`, + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(personas) != 1 { + t.Fatalf("expected 1 persona (files only), got %d", len(personas)) + } + }) + + t.Run("propagates auth errors", func(t *testing.T) { + client := &mockGiteaClient{ + listErr: errors.New("HTTP 401: unauthorized"), + } + _, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err == nil { + t.Fatal("expected error for auth failure") + } + if !strings.Contains(err.Error(), "401") { + t.Errorf("error = %q, want containing '401'", err.Error()) + } + }) + + t.Run("skips files that fail to fetch", func(t *testing.T) { + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: { + {Name: "good.yaml", Path: ".review-bot/personas/good.yaml", Type: "file"}, + {Name: "bad.yaml", Path: ".review-bot/personas/bad.yaml", Type: "file"}, + }, + }, + files: map[string]string{ + ".review-bot/personas/good.yaml": `name: good +identity: Good persona +`, + }, + fileErr: map[string]error{ + ".review-bot/personas/bad.yaml": errors.New("HTTP 500: internal server error"), + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(personas) != 1 { + t.Fatalf("expected 1 persona (skipped failed fetch), got %d", len(personas)) + } + }) +} + +func TestMergePersonas(t *testing.T) { + builtin := map[string]*Persona{ + "security": {Name: "security", Identity: "Built-in security"}, + "docs": {Name: "docs", Identity: "Built-in docs"}, + } + repo := map[string]*Persona{ + "security": {Name: "security", Identity: "Repo security override"}, + "trading": {Name: "trading", Identity: "Repo trading"}, + } + + merged := MergePersonas(builtin, repo) + + t.Run("repo overrides builtin on collision", func(t *testing.T) { + if merged["security"].Identity != "Repo security override" { + t.Errorf("security identity = %q, want repo override", merged["security"].Identity) + } + }) + + t.Run("builtin preserved when no collision", func(t *testing.T) { + if merged["docs"].Identity != "Built-in docs" { + t.Errorf("docs identity = %q, want built-in", merged["docs"].Identity) + } + }) + + t.Run("repo-only persona added", func(t *testing.T) { + if merged["trading"] == nil { + t.Error("expected trading persona from repo") + } + if merged["trading"].Identity != "Repo trading" { + t.Errorf("trading identity = %q, want repo", merged["trading"].Identity) + } + }) + + t.Run("original maps not modified", func(t *testing.T) { + if builtin["trading"] != nil { + t.Error("builtin map was modified") + } + if len(repo) != 2 { + t.Error("repo map was modified") + } + }) +} + +func TestGetBuiltinPersonasMap(t *testing.T) { + personas := GetBuiltinPersonasMap() + + if len(personas) == 0 { + t.Fatal("expected at least one built-in persona") + } + + // Verify expected personas exist + expected := []string{"security", "architect", "docs"} + for _, name := range expected { + if personas[name] == nil { + t.Errorf("expected built-in persona %q", name) + } + } + + // Verify personas are valid + for name, p := range personas { + if p.Name != name { + t.Errorf("persona %q has mismatched name %q", name, p.Name) + } + if p.Identity == "" { + t.Errorf("persona %q has empty identity", name) + } + } +} + +func TestIsYAMLFile(t *testing.T) { + tests := []struct { + name string + want bool + }{ + {"test.yaml", true}, + {"test.yml", true}, + {"test.YAML", true}, + {"test.YML", true}, + {"test.json", false}, + {"test.md", false}, + {"test.txt", false}, + {"yaml", false}, + {"yaml.md", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isYAMLFile(tt.name); got != tt.want { + t.Errorf("isYAMLFile(%q) = %v, want %v", tt.name, got, tt.want) + } + }) + } +} + +func TestIsNotFoundError(t *testing.T) { + tests := []struct { + err error + want bool + }{ + {nil, false}, + {errors.New("HTTP 404: not found"), true}, + {errors.New("something not found"), true}, + {errors.New("HTTP 401: unauthorized"), false}, + {errors.New("connection refused"), false}, + } + + for _, tt := range tests { + name := "nil" + if tt.err != nil { + name = tt.err.Error() + } + t.Run(name, func(t *testing.T) { + if got := isNotFoundError(tt.err); got != tt.want { + t.Errorf("isNotFoundError(%v) = %v, want %v", tt.err, got, tt.want) + } + }) + } +} From 9775cb098cdd8788de36f505de328fce6e7dc33a Mon Sep 17 00:00:00 2001 From: Rodin Date: Sun, 10 May 2026 19:29:13 -0700 Subject: [PATCH 2/4] fix: address PR #61 review findings MAJOR: - LoadRepoPersonas: add MaxPersonaFileSize check before parsing to prevent resource exhaustion from oversized YAML files committed to target repositories MINOR: - isNotFoundError: tighten substring match to 'HTTP 404' only to avoid masking auth/transport errors containing generic 'not found' - main.go: remove duplicate flag.Parse() call - main.go: add comment explaining nil map indexing is safe in Go when LoadRepoPersonas returns an error Tests updated to reflect the intentional behavior change in isNotFoundError and added test case for oversized file rejection. --- cmd/review-bot/main.go | 5 +++-- review/repo_persona.go | 18 +++++++++++++++--- review/repo_persona_test.go | 35 ++++++++++++++++++++++++++++++++++- 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go index 238c9d8..bc0bea7 100644 --- a/cmd/review-bot/main.go +++ b/cmd/review-bot/main.go @@ -79,7 +79,6 @@ func main() { aicoreAPIURL := flag.String("aicore-api-url", envOrDefault("AICORE_API_URL", ""), "SAP AI Core API URL (for provider=aicore)") aicoreResourceGroup := flag.String("aicore-resource-group", envOrDefault("AICORE_RESOURCE_GROUP", "default"), "SAP AI Core resource group (for provider=aicore)") - flag.Parse() flag.Parse() if *versionFlag { @@ -181,7 +180,9 @@ func main() { repoPersonas, err := review.LoadRepoPersonas(ctx, newGiteaClientAdapter(giteaClient), owner, repoName) if err != nil { slog.Warn("could not load repo personas", "repo", owner+"/"+repoName, "error", err) - // Continue with built-in personas only + // Continue with built-in personas only. + // NOTE: repoPersonas is nil here, but map indexing on a nil map is safe in Go + // (returns the zero value), so the fallback to built-in below works correctly. } if p, ok := repoPersonas[*personaName]; ok { persona = p diff --git a/review/repo_persona.go b/review/repo_persona.go index 22735a0..58c0b9d 100644 --- a/review/repo_persona.go +++ b/review/repo_persona.go @@ -28,6 +28,7 @@ type ContentEntry struct { // Returns an empty map (not nil) if the directory doesn't exist or is empty. // Individual parse failures are logged and skipped; the remaining personas are still returned. // Auth errors and other non-404 errors are propagated. +// Files exceeding MaxPersonaFileSize are rejected to prevent resource exhaustion. func LoadRepoPersonas(ctx context.Context, client GiteaClient, owner, repo string) (map[string]*Persona, error) { result := make(map[string]*Persona) @@ -65,6 +66,16 @@ func LoadRepoPersonas(ctx context.Context, client GiteaClient, owner, repo strin continue } + // Enforce size limit before parsing to prevent resource exhaustion + if len(content) > MaxPersonaFileSize { + slog.Warn("repo persona file exceeds maximum size", + "file", entry.Path, + "repo", owner+"/"+repo, + "size", len(content), + "max", MaxPersonaFileSize) + continue + } + persona, err := ParsePersonaBytes([]byte(content), entry.Path) if err != nil { slog.Warn("could not parse repo persona file", @@ -128,11 +139,12 @@ func isYAMLFile(name string) bool { } // isNotFoundError checks if an error represents a 404 response. -// This is a string-based heuristic since we don't have access to gitea.APIError here. +// This uses a specific "HTTP 404" substring match rather than a generic "not found" +// match to avoid masking authentication failures or transport errors that might +// contain "not found" in their message. func isNotFoundError(err error) bool { if err == nil { return false } - errStr := err.Error() - return strings.Contains(errStr, "HTTP 404") || strings.Contains(errStr, "not found") + return strings.Contains(err.Error(), "HTTP 404") } diff --git a/review/repo_persona_test.go b/review/repo_persona_test.go index 89dbcea..56b0a59 100644 --- a/review/repo_persona_test.go +++ b/review/repo_persona_test.go @@ -288,6 +288,36 @@ identity: Good persona t.Fatalf("expected 1 persona (skipped failed fetch), got %d", len(personas)) } }) + + t.Run("skips oversized files", func(t *testing.T) { + // Create a content string that exceeds MaxPersonaFileSize (64KB) + oversizedContent := strings.Repeat("a", MaxPersonaFileSize+1) + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: { + {Name: "normal.yaml", Path: ".review-bot/personas/normal.yaml", Type: "file"}, + {Name: "huge.yaml", Path: ".review-bot/personas/huge.yaml", Type: "file"}, + }, + }, + files: map[string]string{ + ".review-bot/personas/normal.yaml": `name: normal +identity: Normal sized persona +`, + ".review-bot/personas/huge.yaml": oversizedContent, + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + // Should have the normal one, skip the oversized + if len(personas) != 1 { + t.Fatalf("expected 1 persona (skipped oversized), got %d", len(personas)) + } + if personas["normal"] == nil { + t.Error("expected normal persona") + } + }) } func TestMergePersonas(t *testing.T) { @@ -391,7 +421,10 @@ func TestIsNotFoundError(t *testing.T) { }{ {nil, false}, {errors.New("HTTP 404: not found"), true}, - {errors.New("something not found"), true}, + {errors.New("HTTP 404"), true}, + // Intentionally false: generic "not found" could mask auth/transport errors. + // Only explicit HTTP 404 responses should be treated as "directory doesn't exist". + {errors.New("something not found"), false}, {errors.New("HTTP 401: unauthorized"), false}, {errors.New("connection refused"), false}, } From 8f564ea4f887016ef366c88eb6824ab9d95153d8 Mon Sep 17 00:00:00 2001 From: Rodin Date: Sun, 10 May 2026 19:43:46 -0700 Subject: [PATCH 3/4] ci: disable setup-go cache (cache server unreachable) Cache server at 192.168.109.55:35239 times out, adding 4+ minutes to each job. Disable until cache infra is fixed. --- .gitea/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index fa80d7b..c77af05 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -14,6 +14,7 @@ jobs: - uses: actions/setup-go@v5 with: go-version: '1.26' + cache: false - run: go test ./... - run: go vet ./... - run: go build -o review-bot ./cmd/review-bot @@ -44,6 +45,7 @@ jobs: - uses: actions/setup-go@v5 with: go-version: '1.26' + cache: false - run: go build -o review-bot ./cmd/review-bot - name: Run ${{ matrix.name }} review env: From 1dd73bc4dfd8eb41627d2f4f4f6873eebbe4e135 Mon Sep 17 00:00:00 2001 From: Rodin Date: Sun, 10 May 2026 19:44:08 -0700 Subject: [PATCH 4/4] Revert "ci: disable setup-go cache (cache server unreachable)" This reverts commit 8f564ea4f887016ef366c88eb6824ab9d95153d8. --- .gitea/workflows/ci.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index c77af05..fa80d7b 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -14,7 +14,6 @@ jobs: - uses: actions/setup-go@v5 with: go-version: '1.26' - cache: false - run: go test ./... - run: go vet ./... - run: go build -o review-bot ./cmd/review-bot @@ -45,7 +44,6 @@ jobs: - uses: actions/setup-go@v5 with: go-version: '1.26' - cache: false - run: go build -o review-bot ./cmd/review-bot - name: Run ${{ matrix.name }} review env: