diff --git a/README.md b/README.md index 23fc038..37c61ba 100644 --- a/README.md +++ b/README.md @@ -459,6 +459,41 @@ YAML is the recommended format for personas because it supports: JSON is also supported for backwards compatibility—just use `.json` extension. +### Repository Personas (Auto-Discovery) + +Repositories can ship their own personas in `.review-bot/personas/`. When you specify `--persona `, review-bot will: + +1. **Try to load from the target repo** — Checks `.review-bot/personas/.yaml` (or `.yml`) +2. **Fall back to built-in** — If not found in repo, uses the built-in persona + +This lets each repo define domain-specific personas without modifying CI config: + +``` +my-trading-repo/ +├── .review-bot/ +│ └── personas/ +│ ├── trading.yaml # Custom trading persona +│ └── regulatory.yaml # Compliance-focused reviews +├── lib/ +└── ... +``` + +```yaml +# CI config (no persona-file needed) +- uses: rodin/review-bot/.gitea/actions/review@v1 + with: + reviewer-name: trading + persona: trading # Will find .review-bot/personas/trading.yaml + ... +``` + +**Priority order:** +1. Repo's `.review-bot/personas/.yaml` +2. Built-in persona with matching name +3. Error if neither exists + +This allows repos to override built-in personas (e.g., a custom `security` persona that adds project-specific rules) while keeping the simple `persona: security` syntax in CI. + ### Persona vs system-prompt-file diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go index 24e678d..22a2728 100644 --- a/cmd/review-bot/main.go +++ b/cmd/review-bot/main.go @@ -79,7 +79,6 @@ func main() { aicoreAPIURL := flag.String("aicore-api-url", envOrDefault("AICORE_API_URL", ""), "SAP AI Core API URL (for provider=aicore)") aicoreResourceGroup := flag.String("aicore-resource-group", envOrDefault("AICORE_RESOURCE_GROUP", "default"), "SAP AI Core resource group (for provider=aicore)") - flag.Parse() flag.Parse() if *versionFlag { @@ -116,29 +115,9 @@ func main() { os.Exit(1) } - // Load persona if specified + // Persona loading is deferred until after giteaClient is initialized, + // so we can try loading from the target repo first. var persona *review.Persona - if *personaName != "" { - var err error - persona, err = review.LoadBuiltinPersona(*personaName) - if err != nil { - slog.Error("failed to load persona", "persona", *personaName, "error", err) - os.Exit(1) - } - slog.Info("loaded built-in persona", "persona", persona.Name, "display", persona.DisplayName) - } else if *personaFile != "" { - resolvedPath, err := validateWorkspacePath(*personaFile, "persona-file") - if err != nil { - slog.Error("invalid persona-file path", "error", err) - os.Exit(1) - } - persona, err = review.LoadPersona(resolvedPath) - if err != nil { - slog.Error("failed to load persona file", "file", *personaFile, "error", err) - os.Exit(1) - } - slog.Info("loaded persona from file", "file", *personaFile, "persona", persona.Name) - } // Validate reviewer-name: only safe characters allowed in sentinel if err := validateReviewerName(*reviewerName); err != nil { @@ -196,6 +175,45 @@ func main() { ctx, cancel := context.WithTimeout(context.Background(), overallTimeout) defer cancel() + // Load persona: try remote repo first, then fall back to built-in + if *personaName != "" { + // Try loading from target repo's .review-bot/personas/ directory + fetcher := &giteaFetcher{client: giteaClient} + remotePersonas, err := review.LoadRemotePersonas(ctx, fetcher, owner, repoName) + if err != nil { + slog.Warn("could not load remote personas", "repo", fmt.Sprintf("%s/%s", owner, repoName), "error", err) + // Assign empty map so the lookup below doesn't panic + remotePersonas = map[string]*review.Persona{} + } + + if p, ok := remotePersonas[*personaName]; ok { + persona = p + slog.Info("loaded persona from target repo", "persona", persona.Name, "display", persona.DisplayName) + } else { + // Fall back to built-in persona + var err error + persona, err = review.LoadBuiltinPersona(*personaName) + if err != nil { + slog.Error("failed to load persona", "persona", *personaName, "error", err) + os.Exit(1) + } + slog.Info("loaded built-in persona", "persona", persona.Name, "display", persona.DisplayName) + } + } else if *personaFile != "" { + resolvedPath, err := validateWorkspacePath(*personaFile, "persona-file") + if err != nil { + slog.Error("invalid persona-file path", "error", err) + os.Exit(1) + } + var err2 error + persona, err2 = review.LoadPersona(resolvedPath) + if err2 != nil { + slog.Error("failed to load persona file", "file", *personaFile, "error", err2) + os.Exit(1) + } + slog.Info("loaded persona from file", "file", *personaFile, "persona", persona.Name) + } + slog.Info("reviewing pull request", "pr", prNumber, "repo", fmt.Sprintf("%s/%s", owner, repoName)) // Step 1: Fetch PR metadata @@ -783,3 +801,29 @@ func shouldSkipStaleReview(evaluatedSHA, currentSHA string) bool { } return evaluatedSHA != currentSHA } + +// giteaFetcher adapts gitea.Client to review.PersonaFetcher interface. +type giteaFetcher struct { + client *gitea.Client +} + +func (f *giteaFetcher) ListContents(ctx context.Context, owner, repo, path string) ([]review.ContentEntry, error) { + entries, err := f.client.ListContents(ctx, owner, repo, path) + if err != nil { + return nil, err + } + // Convert gitea.ContentEntry to review.ContentEntry + result := make([]review.ContentEntry, len(entries)) + for i, e := range entries { + result[i] = review.ContentEntry{ + Name: e.Name, + Path: e.Path, + Type: e.Type, + } + } + return result, nil +} + +func (f *giteaFetcher) GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error) { + return f.client.GetFileContent(ctx, owner, repo, filepath) +} diff --git a/review/remote_persona.go b/review/remote_persona.go new file mode 100644 index 0000000..1b9c200 --- /dev/null +++ b/review/remote_persona.go @@ -0,0 +1,164 @@ +package review + +import ( + "context" + "fmt" + "log/slog" + "sort" + "strings" +) + +// PersonaFetcher abstracts fetching files from a remote repository. +// This allows persona loading to work with any Git host API. +type PersonaFetcher interface { + // ListContents returns file/directory entries at a path. + // Returns an error if the path doesn't exist or isn't accessible. + ListContents(ctx context.Context, owner, repo, path string) ([]ContentEntry, error) + + // GetFileContent returns the raw content of a file from the default branch. + GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error) +} + +// ContentEntry represents a file or directory entry. +type ContentEntry struct { + Name string // filename or directory name + Path string // full path from repo root + Type string // "file" or "dir" +} + +// DefaultPersonasPath is the conventional location for repo-specific personas. +const DefaultPersonasPath = ".review-bot/personas" + +// LoadRemotePersonas fetches personas from a remote repository's .review-bot/personas/ directory. +// Returns a map of persona name to Persona. If the directory doesn't exist or is empty, +// returns an empty map with no error (graceful fallback to built-in personas). +// +// Files larger than MaxPersonaFileSize are logged and skipped. +// Invalid YAML files are logged and skipped (partial success model). +// Only .yaml and .yml files are processed; other files are ignored. +func LoadRemotePersonas(ctx context.Context, fetcher PersonaFetcher, owner, repo string) (map[string]*Persona, error) { + return LoadRemotePersonasFromPath(ctx, fetcher, owner, repo, DefaultPersonasPath) +} + +// LoadRemotePersonasFromPath loads personas from a custom path in a remote repository. +// It behaves the same as LoadRemotePersonas but allows specifying a path other than +// the default .review-bot/personas directory. +func LoadRemotePersonasFromPath(ctx context.Context, fetcher PersonaFetcher, owner, repo, path string) (map[string]*Persona, error) { + entries, err := fetcher.ListContents(ctx, owner, repo, path) + if err != nil { + // 404 is expected when repo doesn't have personas — return empty, not error + if isNotFoundError(err) { + slog.Debug("no remote personas directory found", "repo", fmt.Sprintf("%s/%s", owner, repo), "path", path) + return map[string]*Persona{}, nil + } + return nil, fmt.Errorf("list remote personas: %w", err) + } + + // Cap the number of files to process to prevent resource exhaustion + // from repos with thousands of small files. + const maxPersonaFiles = 50 + + result := make(map[string]*Persona) + processed := 0 + for _, entry := range entries { + if processed >= maxPersonaFiles { + slog.Warn("persona file limit reached", "limit", maxPersonaFiles, "repo", fmt.Sprintf("%s/%s", owner, repo)) + break + } + if ctx.Err() != nil { + return nil, ctx.Err() + } + + // Skip directories and non-YAML files + if entry.Type != "file" { + continue + } + if !isYAMLFile(entry.Name) { + continue + } + + content, err := fetcher.GetFileContent(ctx, owner, repo, entry.Path) + if err != nil { + slog.Warn("could not fetch remote persona file", "file", entry.Path, "error", err) + continue + } + + // Check size before parsing (defense in depth) + if len(content) > MaxPersonaFileSize { + slog.Warn("remote persona file exceeds size limit", "file", entry.Path, "size", len(content), "limit", MaxPersonaFileSize) + continue + } + + persona, err := parsePersona([]byte(content), entry.Path) + if err != nil { + slog.Warn("could not parse remote persona file", "file", entry.Path, "error", err) + continue + } + + result[persona.Name] = persona + processed++ + slog.Debug("loaded remote persona", "name", persona.Name, "file", entry.Path) + } + + return result, nil +} + +// MergePersonas combines remote and built-in personas. +// Remote personas take precedence on name collision. +// Returns the merged map and a list of persona names in sorted order. +func MergePersonas(remote, builtin map[string]*Persona) (map[string]*Persona, []string) { + merged := make(map[string]*Persona) + + // Add built-in first + for name, p := range builtin { + merged[name] = p + } + + // Remote overrides built-in on collision + for name, p := range remote { + if _, exists := merged[name]; exists { + slog.Debug("remote persona overrides built-in", "name", name) + } + merged[name] = p + } + + // Collect sorted names + names := make([]string, 0, len(merged)) + for name := range merged { + names = append(names, name) + } + sort.Strings(names) + + return merged, names +} + +// LoadAllBuiltinPersonas loads all built-in personas into a map. +func LoadAllBuiltinPersonas() map[string]*Persona { + result := make(map[string]*Persona) + for _, name := range ListBuiltinPersonas() { + p, err := LoadBuiltinPersona(name) + if err != nil { + slog.Warn("could not load built-in persona", "name", name, "error", err) + continue + } + result[name] = p + } + return result +} + +// isYAMLFile returns true if the filename has a YAML extension. +func isYAMLFile(name string) bool { + lower := strings.ToLower(name) + return strings.HasSuffix(lower, ".yaml") || strings.HasSuffix(lower, ".yml") +} + +// isNotFoundError checks if an error indicates a 404 response. +// This is a simple string check to avoid importing the gitea package +// (which would create a circular dependency). +func isNotFoundError(err error) bool { + if err == nil { + return false + } + errStr := err.Error() + return strings.Contains(errStr, "HTTP 404") +} diff --git a/review/remote_persona_test.go b/review/remote_persona_test.go new file mode 100644 index 0000000..a7292be --- /dev/null +++ b/review/remote_persona_test.go @@ -0,0 +1,394 @@ +package review + +import ( + "context" + "errors" + "testing" +) + +// mockFetcher implements PersonaFetcher for testing. +type mockFetcher struct { + contents map[string][]ContentEntry // path -> entries + files map[string]string // path -> content + listErr error // error to return from ListContents + getFileErr map[string]error // path -> error for GetFileContent + listNotFound bool // return 404-style error +} + +func newMockFetcher() *mockFetcher { + return &mockFetcher{ + contents: make(map[string][]ContentEntry), + files: make(map[string]string), + getFileErr: make(map[string]error), + } +} + +func (m *mockFetcher) ListContents(ctx context.Context, owner, repo, path string) ([]ContentEntry, error) { + if m.listNotFound { + return nil, errors.New("HTTP 404: not found") + } + if m.listErr != nil { + return nil, m.listErr + } + entries, ok := m.contents[path] + if !ok { + return nil, errors.New("HTTP 404: not found") + } + return entries, nil +} + +func (m *mockFetcher) GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error) { + if err, ok := m.getFileErr[filepath]; ok { + return "", err + } + content, ok := m.files[filepath] + if !ok { + return "", errors.New("HTTP 404: file not found") + } + return content, nil +} + +func TestLoadRemotePersonas_NoDirectory(t *testing.T) { + fetcher := newMockFetcher() + fetcher.listNotFound = true + + result, err := LoadRemotePersonas(context.Background(), fetcher, "owner", "repo") + if err != nil { + t.Fatalf("expected no error for missing directory, got: %v", err) + } + if len(result) != 0 { + t.Errorf("expected empty map, got %d personas", len(result)) + } +} + +func TestLoadRemotePersonas_EmptyDirectory(t *testing.T) { + fetcher := newMockFetcher() + fetcher.contents[DefaultPersonasPath] = []ContentEntry{} + + result, err := LoadRemotePersonas(context.Background(), fetcher, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result) != 0 { + t.Errorf("expected empty map, got %d personas", len(result)) + } +} + +func TestLoadRemotePersonas_SinglePersona(t *testing.T) { + fetcher := newMockFetcher() + fetcher.contents[DefaultPersonasPath] = []ContentEntry{ + {Name: "trading.yaml", Path: ".review-bot/personas/trading.yaml", Type: "file"}, + } + fetcher.files[".review-bot/personas/trading.yaml"] = ` +name: trading +display_name: Trading Expert +identity: You are a trading systems expert. +focus: + - order execution + - market data +` + + result, err := LoadRemotePersonas(context.Background(), fetcher, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result) != 1 { + t.Fatalf("expected 1 persona, got %d", len(result)) + } + if result["trading"] == nil { + t.Fatal("expected 'trading' persona") + } + if result["trading"].DisplayName != "Trading Expert" { + t.Errorf("expected display name 'Trading Expert', got %q", result["trading"].DisplayName) + } +} + +func TestLoadRemotePersonas_MultiplePersonas(t *testing.T) { + fetcher := newMockFetcher() + fetcher.contents[DefaultPersonasPath] = []ContentEntry{ + {Name: "one.yaml", Path: ".review-bot/personas/one.yaml", Type: "file"}, + {Name: "two.yml", Path: ".review-bot/personas/two.yml", Type: "file"}, + } + fetcher.files[".review-bot/personas/one.yaml"] = ` +name: one +identity: First persona. +` + fetcher.files[".review-bot/personas/two.yml"] = ` +name: two +identity: Second persona. +` + + result, err := LoadRemotePersonas(context.Background(), fetcher, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result) != 2 { + t.Fatalf("expected 2 personas, got %d", len(result)) + } + if result["one"] == nil || result["two"] == nil { + t.Error("expected both personas to be loaded") + } +} + +func TestLoadRemotePersonas_SkipsNonYAML(t *testing.T) { + fetcher := newMockFetcher() + fetcher.contents[DefaultPersonasPath] = []ContentEntry{ + {Name: "valid.yaml", Path: ".review-bot/personas/valid.yaml", Type: "file"}, + {Name: "readme.md", Path: ".review-bot/personas/readme.md", Type: "file"}, + {Name: "config.json", Path: ".review-bot/personas/config.json", Type: "file"}, + } + fetcher.files[".review-bot/personas/valid.yaml"] = ` +name: valid +identity: Valid persona. +` + + result, err := LoadRemotePersonas(context.Background(), fetcher, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result) != 1 { + t.Fatalf("expected 1 persona (skipping non-YAML), got %d", len(result)) + } +} + +func TestLoadRemotePersonas_SkipsDirectories(t *testing.T) { + fetcher := newMockFetcher() + fetcher.contents[DefaultPersonasPath] = []ContentEntry{ + {Name: "valid.yaml", Path: ".review-bot/personas/valid.yaml", Type: "file"}, + {Name: "subdir", Path: ".review-bot/personas/subdir", Type: "dir"}, + } + fetcher.files[".review-bot/personas/valid.yaml"] = ` +name: valid +identity: Valid persona. +` + + result, err := LoadRemotePersonas(context.Background(), fetcher, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result) != 1 { + t.Fatalf("expected 1 persona (skipping dir), got %d", len(result)) + } +} + +func TestLoadRemotePersonas_SkipsInvalidYAML(t *testing.T) { + fetcher := newMockFetcher() + fetcher.contents[DefaultPersonasPath] = []ContentEntry{ + {Name: "valid.yaml", Path: ".review-bot/personas/valid.yaml", Type: "file"}, + {Name: "invalid.yaml", Path: ".review-bot/personas/invalid.yaml", Type: "file"}, + } + fetcher.files[".review-bot/personas/valid.yaml"] = ` +name: valid +identity: Valid persona. +` + fetcher.files[".review-bot/personas/invalid.yaml"] = ` +this is not valid yaml: [unclosed bracket +` + + result, err := LoadRemotePersonas(context.Background(), fetcher, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result) != 1 { + t.Fatalf("expected 1 persona (skipping invalid), got %d", len(result)) + } + if result["valid"] == nil { + t.Error("expected valid persona to be loaded") + } +} + +func TestLoadRemotePersonas_SkipsOversizedFiles(t *testing.T) { + fetcher := newMockFetcher() + fetcher.contents[DefaultPersonasPath] = []ContentEntry{ + {Name: "huge.yaml", Path: ".review-bot/personas/huge.yaml", Type: "file"}, + } + // Create content larger than MaxPersonaFileSize (64KB) + fetcher.files[".review-bot/personas/huge.yaml"] = ` +name: huge +identity: ` + string(make([]byte, MaxPersonaFileSize+1000)) + + result, err := LoadRemotePersonas(context.Background(), fetcher, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result) != 0 { + t.Errorf("expected 0 personas (oversized file skipped), got %d", len(result)) + } +} + +func TestLoadRemotePersonas_SkipsFetchErrors(t *testing.T) { + fetcher := newMockFetcher() + fetcher.contents[DefaultPersonasPath] = []ContentEntry{ + {Name: "valid.yaml", Path: ".review-bot/personas/valid.yaml", Type: "file"}, + {Name: "error.yaml", Path: ".review-bot/personas/error.yaml", Type: "file"}, + } + fetcher.files[".review-bot/personas/valid.yaml"] = ` +name: valid +identity: Valid persona. +` + fetcher.getFileErr[".review-bot/personas/error.yaml"] = errors.New("network error") + + result, err := LoadRemotePersonas(context.Background(), fetcher, "owner", "repo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result) != 1 { + t.Fatalf("expected 1 persona (skipping error), got %d", len(result)) + } +} + +func TestLoadRemotePersonas_ListContentsError(t *testing.T) { + fetcher := newMockFetcher() + fetcher.listErr = errors.New("server error") + + _, err := LoadRemotePersonas(context.Background(), fetcher, "owner", "repo") + if err == nil { + t.Fatal("expected error for list contents failure") + } +} + +func TestLoadRemotePersonas_ContextCancellation(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() // Cancel immediately + + fetcher := newMockFetcher() + fetcher.contents[DefaultPersonasPath] = []ContentEntry{ + {Name: "one.yaml", Path: ".review-bot/personas/one.yaml", Type: "file"}, + } + fetcher.files[".review-bot/personas/one.yaml"] = ` +name: one +identity: One. +` + + _, err := LoadRemotePersonas(ctx, fetcher, "owner", "repo") + if err == nil { + t.Fatal("expected context cancellation error") + } +} + +func TestMergePersonas_NoOverlap(t *testing.T) { + remote := map[string]*Persona{ + "trading": {Name: "trading", Identity: "Trading expert."}, + } + builtin := map[string]*Persona{ + "security": {Name: "security", Identity: "Security expert."}, + } + + merged, names := MergePersonas(remote, builtin) + + if len(merged) != 2 { + t.Fatalf("expected 2 personas, got %d", len(merged)) + } + if len(names) != 2 { + t.Fatalf("expected 2 names, got %d", len(names)) + } + // Names should be sorted + if names[0] != "security" || names[1] != "trading" { + t.Errorf("expected sorted names [security, trading], got %v", names) + } +} + +func TestMergePersonas_RemoteOverridesBuiltin(t *testing.T) { + remote := map[string]*Persona{ + "security": {Name: "security", Identity: "Custom security expert."}, + } + builtin := map[string]*Persona{ + "security": {Name: "security", Identity: "Default security expert."}, + } + + merged, _ := MergePersonas(remote, builtin) + + if merged["security"].Identity != "Custom security expert." { + t.Errorf("expected remote to override builtin, got identity: %q", merged["security"].Identity) + } +} + +func TestMergePersonas_EmptyRemote(t *testing.T) { + remote := map[string]*Persona{} + builtin := map[string]*Persona{ + "security": {Name: "security", Identity: "Security."}, + } + + merged, names := MergePersonas(remote, builtin) + + if len(merged) != 1 { + t.Fatalf("expected 1 persona, got %d", len(merged)) + } + if names[0] != "security" { + t.Errorf("expected 'security', got %q", names[0]) + } +} + +func TestMergePersonas_EmptyBuiltin(t *testing.T) { + remote := map[string]*Persona{ + "trading": {Name: "trading", Identity: "Trading."}, + } + builtin := map[string]*Persona{} + + merged, names := MergePersonas(remote, builtin) + + if len(merged) != 1 { + t.Fatalf("expected 1 persona, got %d", len(merged)) + } + if names[0] != "trading" { + t.Errorf("expected 'trading', got %q", names[0]) + } +} + +func TestLoadAllBuiltinPersonas(t *testing.T) { + personas := LoadAllBuiltinPersonas() + + // Should load at least the known built-in personas + expected := []string{"architect", "docs", "security"} + for _, name := range expected { + if personas[name] == nil { + t.Errorf("expected built-in persona %q to be loaded", name) + } + } +} + +func TestIsYAMLFile(t *testing.T) { + tests := []struct { + name string + expected bool + }{ + {"test.yaml", true}, + {"test.yml", true}, + {"test.YAML", true}, + {"test.YML", true}, + {"test.json", false}, + {"test.md", false}, + {"yaml", false}, + {"", false}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := isYAMLFile(tc.name); got != tc.expected { + t.Errorf("isYAMLFile(%q) = %v, want %v", tc.name, got, tc.expected) + } + }) + } +} + +func TestIsNotFoundError(t *testing.T) { + tests := []struct { + name string + err error + expected bool + }{ + {"nil error", nil, false}, + {"HTTP 404", errors.New("HTTP 404: not found"), true}, + {"not found text", errors.New("path not found"), false}, + {"server error", errors.New("server error"), false}, + {"HTTP 500", errors.New("HTTP 500: internal error"), false}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := isNotFoundError(tc.err); got != tc.expected { + t.Errorf("isNotFoundError(%v) = %v, want %v", tc.err, got, tc.expected) + } + }) + } +}