diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go index 24e678d..bc0bea7 100644 --- a/cmd/review-bot/main.go +++ b/cmd/review-bot/main.go @@ -79,7 +79,6 @@ func main() { aicoreAPIURL := flag.String("aicore-api-url", envOrDefault("AICORE_API_URL", ""), "SAP AI Core API URL (for provider=aicore)") aicoreResourceGroup := flag.String("aicore-resource-group", envOrDefault("AICORE_RESOURCE_GROUP", "default"), "SAP AI Core resource group (for provider=aicore)") - flag.Parse() flag.Parse() if *versionFlag { @@ -116,29 +115,7 @@ func main() { os.Exit(1) } - // Load persona if specified - var persona *review.Persona - if *personaName != "" { - var err error - persona, err = review.LoadBuiltinPersona(*personaName) - if err != nil { - slog.Error("failed to load persona", "persona", *personaName, "error", err) - os.Exit(1) - } - slog.Info("loaded built-in persona", "persona", persona.Name, "display", persona.DisplayName) - } else if *personaFile != "" { - resolvedPath, err := validateWorkspacePath(*personaFile, "persona-file") - if err != nil { - slog.Error("invalid persona-file path", "error", err) - os.Exit(1) - } - persona, err = review.LoadPersona(resolvedPath) - if err != nil { - slog.Error("failed to load persona file", "file", *personaFile, "error", err) - os.Exit(1) - } - slog.Info("loaded persona from file", "file", *personaFile, "persona", persona.Name) - } + // NOTE: Persona loading deferred until after Gitea client init to support repo personas // Validate reviewer-name: only safe characters allowed in sentinel if err := validateReviewerName(*reviewerName); err != nil { @@ -196,6 +173,43 @@ func main() { ctx, cancel := context.WithTimeout(context.Background(), overallTimeout) defer cancel() + // Load persona if specified (after Gitea client init to support repo personas) + var persona *review.Persona + if *personaName != "" { + // Try loading from repo first, then fall back to built-in + repoPersonas, err := review.LoadRepoPersonas(ctx, newGiteaClientAdapter(giteaClient), owner, repoName) + if err != nil { + slog.Warn("could not load repo personas", "repo", owner+"/"+repoName, "error", err) + // Continue with built-in personas only. + // NOTE: repoPersonas is nil here, but map indexing on a nil map is safe in Go + // (returns the zero value), so the fallback to built-in below works correctly. + } + if p, ok := repoPersonas[*personaName]; ok { + persona = p + slog.Info("loaded repo persona", "persona", persona.Name, "display", persona.DisplayName, "repo", owner+"/"+repoName) + } else { + // Fall back to built-in + persona, err = review.LoadBuiltinPersona(*personaName) + if err != nil { + slog.Error("failed to load persona", "persona", *personaName, "error", err) + os.Exit(1) + } + slog.Info("loaded built-in persona", "persona", persona.Name, "display", persona.DisplayName) + } + } else if *personaFile != "" { + resolvedPath, err := validateWorkspacePath(*personaFile, "persona-file") + if err != nil { + slog.Error("invalid persona-file path", "error", err) + os.Exit(1) + } + persona, err = review.LoadPersona(resolvedPath) + if err != nil { + slog.Error("failed to load persona file", "file", *personaFile, "error", err) + os.Exit(1) + } + slog.Info("loaded persona from file", "file", *personaFile, "persona", persona.Name) + } + slog.Info("reviewing pull request", "pr", prNumber, "repo", fmt.Sprintf("%s/%s", owner, repoName)) // Step 1: Fetch PR metadata @@ -783,3 +797,32 @@ func shouldSkipStaleReview(evaluatedSHA, currentSHA string) bool { } return evaluatedSHA != currentSHA } + +// giteaClientAdapter adapts gitea.Client to review.GiteaClient interface. +type giteaClientAdapter struct { + client *gitea.Client +} + +func newGiteaClientAdapter(c *gitea.Client) *giteaClientAdapter { + return &giteaClientAdapter{client: c} +} + +func (a *giteaClientAdapter) ListContents(ctx context.Context, owner, repo, path string) ([]review.ContentEntry, error) { + entries, err := a.client.ListContents(ctx, owner, repo, path) + if err != nil { + return nil, err + } + result := make([]review.ContentEntry, len(entries)) + for i, e := range entries { + result[i] = review.ContentEntry{ + Name: e.Name, + Path: e.Path, + Type: e.Type, + } + } + return result, nil +} + +func (a *giteaClientAdapter) GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error) { + return a.client.GetFileContent(ctx, owner, repo, filepath) +} diff --git a/review/persona.go b/review/persona.go index 0f7f141..329aa78 100644 --- a/review/persona.go +++ b/review/persona.go @@ -224,6 +224,13 @@ func checkYAMLDepth(node *yaml.Node, depth, maxDepth, maxNodes int, seen map[*ya return nil } +// ParsePersonaBytes parses persona data from bytes with a source label for errors. +// This is useful for parsing personas fetched from external sources (e.g., Gitea API) +// without requiring filesystem access. Format is detected by source extension. +func ParsePersonaBytes(data []byte, source string) (*Persona, error) { + return parsePersona(data, source) +} + func validatePersona(p *Persona, source string) error { if p.Name == "" { return fmt.Errorf("persona %s: name is required", source) diff --git a/review/repo_persona.go b/review/repo_persona.go new file mode 100644 index 0000000..58c0b9d --- /dev/null +++ b/review/repo_persona.go @@ -0,0 +1,150 @@ +package review + +import ( + "context" + "log/slog" + "strings" +) + +// RepoPersonaPath is the directory path where repo-specific personas are stored. +const RepoPersonaPath = ".review-bot/personas" + +// GiteaClient defines the subset of gitea.Client methods needed for loading repo personas. +// This interface allows for easier testing and decouples the review package from gitea. +type GiteaClient interface { + ListContents(ctx context.Context, owner, repo, path string) ([]ContentEntry, error) + GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error) +} + +// ContentEntry represents a file or directory entry from the contents API. +// This mirrors gitea.ContentEntry to avoid import cycles. +type ContentEntry struct { + Name string `json:"name"` + Path string `json:"path"` + Type string `json:"type"` // "file" or "dir" +} + +// LoadRepoPersonas fetches personas from a repository's .review-bot/personas/ directory. +// Returns an empty map (not nil) if the directory doesn't exist or is empty. +// Individual parse failures are logged and skipped; the remaining personas are still returned. +// Auth errors and other non-404 errors are propagated. +// Files exceeding MaxPersonaFileSize are rejected to prevent resource exhaustion. +func LoadRepoPersonas(ctx context.Context, client GiteaClient, owner, repo string) (map[string]*Persona, error) { + result := make(map[string]*Persona) + + entries, err := client.ListContents(ctx, owner, repo, RepoPersonaPath) + if err != nil { + // Check if this is a 404 (directory doesn't exist) - expected case + if isNotFoundError(err) { + slog.Debug("no repo personas directory found", "repo", owner+"/"+repo) + return result, nil + } + // Other errors (auth, server) should propagate + return nil, err + } + + if len(entries) == 0 { + slog.Debug("repo personas directory is empty", "repo", owner+"/"+repo) + return result, nil + } + + for _, entry := range entries { + if entry.Type != "file" { + continue + } + // Only process YAML files + if !isYAMLFile(entry.Name) { + continue + } + + content, err := client.GetFileContent(ctx, owner, repo, entry.Path) + if err != nil { + slog.Warn("could not fetch repo persona file", + "file", entry.Path, + "repo", owner+"/"+repo, + "error", err) + continue + } + + // Enforce size limit before parsing to prevent resource exhaustion + if len(content) > MaxPersonaFileSize { + slog.Warn("repo persona file exceeds maximum size", + "file", entry.Path, + "repo", owner+"/"+repo, + "size", len(content), + "max", MaxPersonaFileSize) + continue + } + + persona, err := ParsePersonaBytes([]byte(content), entry.Path) + if err != nil { + slog.Warn("could not parse repo persona file", + "file", entry.Path, + "repo", owner+"/"+repo, + "error", err) + continue + } + + result[persona.Name] = persona + slog.Debug("loaded repo persona", + "name", persona.Name, + "file", entry.Path, + "repo", owner+"/"+repo) + } + + return result, nil +} + +// MergePersonas combines built-in personas with repo personas. +// Repo personas take precedence on name collision. +// Returns a new map; inputs are not modified. +func MergePersonas(builtin, repo map[string]*Persona) map[string]*Persona { + result := make(map[string]*Persona, len(builtin)+len(repo)) + + // Copy built-in personas first + for name, p := range builtin { + result[name] = p + } + + // Overlay repo personas (override on collision) + for name, p := range repo { + if _, exists := result[name]; exists { + slog.Debug("repo persona overrides built-in", "name", name) + } + result[name] = p + } + + return result +} + +// GetBuiltinPersonasMap returns all built-in personas as a map keyed by name. +// Returns an empty map (not nil) if loading fails. +func GetBuiltinPersonasMap() map[string]*Persona { + result := make(map[string]*Persona) + for _, name := range ListBuiltinPersonas() { + p, err := LoadBuiltinPersona(name) + if err != nil { + slog.Warn("could not load built-in persona", "name", name, "error", err) + continue + } + result[name] = p + } + return result +} + +// isYAMLFile checks if a filename has a YAML extension. +func isYAMLFile(name string) bool { + lower := strings.ToLower(name) + return strings.HasSuffix(lower, ".yaml") || strings.HasSuffix(lower, ".yml") +} + +// isNotFoundError checks if an error represents a 404 response. +// This uses a specific "HTTP 404" substring match rather than a generic "not found" +// match to avoid masking authentication failures or transport errors that might +// contain "not found" in their message. +func isNotFoundError(err error) bool { + if err == nil { + return false + } + return strings.Contains(err.Error(), "HTTP 404") +} diff --git a/review/repo_persona_test.go b/review/repo_persona_test.go new file mode 100644 index 0000000..56b0a59 --- /dev/null +++ b/review/repo_persona_test.go @@ -0,0 +1,443 @@ +package review + +import ( + "context" + "errors" + "strings" + "testing" +) + +func TestParsePersonaBytes(t *testing.T) { + tests := []struct { + name string + data string + source string + wantName string + wantErr string + }{ + { + name: "valid yaml", + data: `name: test +identity: test identity +focus: + - testing +`, + source: "test.yaml", + wantName: "test", + }, + { + name: "missing name", + data: "identity: test\n", + source: "test.yaml", + wantErr: "name is required", + }, + { + name: "invalid yaml", + data: "not: valid:\n yaml: [broken", + source: "test.yaml", + wantErr: "parse", + }, + { + name: "json format by extension", + data: `{"name": "jsontest", "identity": "json identity"}`, + source: "test.json", + wantName: "jsontest", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p, err := ParsePersonaBytes([]byte(tt.data), tt.source) + if tt.wantErr != "" { + if err == nil { + t.Fatalf("expected error containing %q, got nil", tt.wantErr) + } + if !strings.Contains(err.Error(), tt.wantErr) { + t.Errorf("error = %q, want containing %q", err.Error(), tt.wantErr) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if p.Name != tt.wantName { + t.Errorf("Name = %q, want %q", p.Name, tt.wantName) + } + }) + } +} + +// mockGiteaClient implements GiteaClient for testing. +type mockGiteaClient struct { + contents map[string][]ContentEntry // path -> entries + files map[string]string // path -> content + listErr error + fileErr map[string]error // path -> error +} + +func (m *mockGiteaClient) ListContents(ctx context.Context, owner, repo, path string) ([]ContentEntry, error) { + if m.listErr != nil { + return nil, m.listErr + } + entries, ok := m.contents[path] + if !ok { + return nil, errors.New("list contents .review-bot/personas: HTTP 404: not found") + } + return entries, nil +} + +func (m *mockGiteaClient) GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error) { + if m.fileErr != nil { + if err, ok := m.fileErr[filepath]; ok { + return "", err + } + } + content, ok := m.files[filepath] + if !ok { + return "", errors.New("HTTP 404: file not found") + } + return content, nil +} + +func TestLoadRepoPersonas(t *testing.T) { + ctx := context.Background() + + t.Run("directory not found returns empty map", func(t *testing.T) { + client := &mockGiteaClient{} // No contents configured -> 404 + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if personas == nil { + t.Error("expected empty map, got nil") + } + if len(personas) != 0 { + t.Errorf("expected 0 personas, got %d", len(personas)) + } + }) + + t.Run("empty directory returns empty map", func(t *testing.T) { + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: {}, + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(personas) != 0 { + t.Errorf("expected 0 personas, got %d", len(personas)) + } + }) + + t.Run("loads valid personas", func(t *testing.T) { + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: { + {Name: "trading.yaml", Path: ".review-bot/personas/trading.yaml", Type: "file"}, + {Name: "crypto.yaml", Path: ".review-bot/personas/crypto.yaml", Type: "file"}, + }, + }, + files: map[string]string{ + ".review-bot/personas/trading.yaml": `name: trading +display_name: Trading Expert +identity: You are a trading expert. +focus: + - order handling + - risk management +`, + ".review-bot/personas/crypto.yaml": `name: crypto +display_name: Crypto Expert +identity: You are a cryptography expert. +focus: + - key management + - encryption +`, + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(personas) != 2 { + t.Fatalf("expected 2 personas, got %d", len(personas)) + } + if personas["trading"] == nil { + t.Error("expected trading persona") + } + if personas["crypto"] == nil { + t.Error("expected crypto persona") + } + if personas["trading"].DisplayName != "Trading Expert" { + t.Errorf("trading display name = %q, want %q", personas["trading"].DisplayName, "Trading Expert") + } + }) + + t.Run("skips invalid persona files", func(t *testing.T) { + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: { + {Name: "valid.yaml", Path: ".review-bot/personas/valid.yaml", Type: "file"}, + {Name: "invalid.yaml", Path: ".review-bot/personas/invalid.yaml", Type: "file"}, + }, + }, + files: map[string]string{ + ".review-bot/personas/valid.yaml": `name: valid +identity: Valid persona +`, + ".review-bot/personas/invalid.yaml": "not valid yaml: [broken", + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + // Should have the valid one, skip the invalid + if len(personas) != 1 { + t.Fatalf("expected 1 persona (skipped invalid), got %d", len(personas)) + } + if personas["valid"] == nil { + t.Error("expected valid persona") + } + }) + + t.Run("skips non-yaml files", func(t *testing.T) { + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: { + {Name: "persona.yaml", Path: ".review-bot/personas/persona.yaml", Type: "file"}, + {Name: "README.md", Path: ".review-bot/personas/README.md", Type: "file"}, + {Name: "notes.txt", Path: ".review-bot/personas/notes.txt", Type: "file"}, + }, + }, + files: map[string]string{ + ".review-bot/personas/persona.yaml": `name: test +identity: Test persona +`, + ".review-bot/personas/README.md": "# Personas\n\nPut your personas here.", + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(personas) != 1 { + t.Fatalf("expected 1 persona (yaml only), got %d", len(personas)) + } + }) + + t.Run("skips subdirectories", func(t *testing.T) { + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: { + {Name: "persona.yaml", Path: ".review-bot/personas/persona.yaml", Type: "file"}, + {Name: "subdir", Path: ".review-bot/personas/subdir", Type: "dir"}, + }, + }, + files: map[string]string{ + ".review-bot/personas/persona.yaml": `name: test +identity: Test persona +`, + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(personas) != 1 { + t.Fatalf("expected 1 persona (files only), got %d", len(personas)) + } + }) + + t.Run("propagates auth errors", func(t *testing.T) { + client := &mockGiteaClient{ + listErr: errors.New("HTTP 401: unauthorized"), + } + _, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err == nil { + t.Fatal("expected error for auth failure") + } + if !strings.Contains(err.Error(), "401") { + t.Errorf("error = %q, want containing '401'", err.Error()) + } + }) + + t.Run("skips files that fail to fetch", func(t *testing.T) { + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: { + {Name: "good.yaml", Path: ".review-bot/personas/good.yaml", Type: "file"}, + {Name: "bad.yaml", Path: ".review-bot/personas/bad.yaml", Type: "file"}, + }, + }, + files: map[string]string{ + ".review-bot/personas/good.yaml": `name: good +identity: Good persona +`, + }, + fileErr: map[string]error{ + ".review-bot/personas/bad.yaml": errors.New("HTTP 500: internal server error"), + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(personas) != 1 { + t.Fatalf("expected 1 persona (skipped failed fetch), got %d", len(personas)) + } + }) + + t.Run("skips oversized files", func(t *testing.T) { + // Create a content string that exceeds MaxPersonaFileSize (64KB) + oversizedContent := strings.Repeat("a", MaxPersonaFileSize+1) + client := &mockGiteaClient{ + contents: map[string][]ContentEntry{ + RepoPersonaPath: { + {Name: "normal.yaml", Path: ".review-bot/personas/normal.yaml", Type: "file"}, + {Name: "huge.yaml", Path: ".review-bot/personas/huge.yaml", Type: "file"}, + }, + }, + files: map[string]string{ + ".review-bot/personas/normal.yaml": `name: normal +identity: Normal sized persona +`, + ".review-bot/personas/huge.yaml": oversizedContent, + }, + } + personas, err := LoadRepoPersonas(ctx, client, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + // Should have the normal one, skip the oversized + if len(personas) != 1 { + t.Fatalf("expected 1 persona (skipped oversized), got %d", len(personas)) + } + if personas["normal"] == nil { + t.Error("expected normal persona") + } + }) +} + +func TestMergePersonas(t *testing.T) { + builtin := map[string]*Persona{ + "security": {Name: "security", Identity: "Built-in security"}, + "docs": {Name: "docs", Identity: "Built-in docs"}, + } + repo := map[string]*Persona{ + "security": {Name: "security", Identity: "Repo security override"}, + "trading": {Name: "trading", Identity: "Repo trading"}, + } + + merged := MergePersonas(builtin, repo) + + t.Run("repo overrides builtin on collision", func(t *testing.T) { + if merged["security"].Identity != "Repo security override" { + t.Errorf("security identity = %q, want repo override", merged["security"].Identity) + } + }) + + t.Run("builtin preserved when no collision", func(t *testing.T) { + if merged["docs"].Identity != "Built-in docs" { + t.Errorf("docs identity = %q, want built-in", merged["docs"].Identity) + } + }) + + t.Run("repo-only persona added", func(t *testing.T) { + if merged["trading"] == nil { + t.Error("expected trading persona from repo") + } + if merged["trading"].Identity != "Repo trading" { + t.Errorf("trading identity = %q, want repo", merged["trading"].Identity) + } + }) + + t.Run("original maps not modified", func(t *testing.T) { + if builtin["trading"] != nil { + t.Error("builtin map was modified") + } + if len(repo) != 2 { + t.Error("repo map was modified") + } + }) +} + +func TestGetBuiltinPersonasMap(t *testing.T) { + personas := GetBuiltinPersonasMap() + + if len(personas) == 0 { + t.Fatal("expected at least one built-in persona") + } + + // Verify expected personas exist + expected := []string{"security", "architect", "docs"} + for _, name := range expected { + if personas[name] == nil { + t.Errorf("expected built-in persona %q", name) + } + } + + // Verify personas are valid + for name, p := range personas { + if p.Name != name { + t.Errorf("persona %q has mismatched name %q", name, p.Name) + } + if p.Identity == "" { + t.Errorf("persona %q has empty identity", name) + } + } +} + +func TestIsYAMLFile(t *testing.T) { + tests := []struct { + name string + want bool + }{ + {"test.yaml", true}, + {"test.yml", true}, + {"test.YAML", true}, + {"test.YML", true}, + {"test.json", false}, + {"test.md", false}, + {"test.txt", false}, + {"yaml", false}, + {"yaml.md", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isYAMLFile(tt.name); got != tt.want { + t.Errorf("isYAMLFile(%q) = %v, want %v", tt.name, got, tt.want) + } + }) + } +} + +func TestIsNotFoundError(t *testing.T) { + tests := []struct { + err error + want bool + }{ + {nil, false}, + {errors.New("HTTP 404: not found"), true}, + {errors.New("HTTP 404"), true}, + // Intentionally false: generic "not found" could mask auth/transport errors. + // Only explicit HTTP 404 responses should be treated as "directory doesn't exist". + {errors.New("something not found"), false}, + {errors.New("HTTP 401: unauthorized"), false}, + {errors.New("connection refused"), false}, + } + + for _, tt := range tests { + name := "nil" + if tt.err != nil { + name = tt.err.Error() + } + t.Run(name, func(t *testing.T) { + if got := isNotFoundError(tt.err); got != tt.want { + t.Errorf("isNotFoundError(%v) = %v, want %v", tt.err, got, tt.want) + } + }) + } +}