feat: load personas from target repo .review-bot/personas/ #61

Merged
aweiker merged 4 commits from issue-60 into main 2026-05-11 02:54:46 +00:00
4 changed files with 620 additions and 23 deletions
Showing only changes of commit 3f06ba2ea6 - Show all commits
+65 -23
View File
2
@@ -116,29 +116,7 @@ func main() {
os.Exit(1)
}
// Load persona if specified
var persona *review.Persona
if *personaName != "" {
var err error
persona, err = review.LoadBuiltinPersona(*personaName)
if err != nil {
slog.Error("failed to load persona", "persona", *personaName, "error", err)
os.Exit(1)
}
slog.Info("loaded built-in persona", "persona", persona.Name, "display", persona.DisplayName)
} else if *personaFile != "" {
resolvedPath, err := validateWorkspacePath(*personaFile, "persona-file")
if err != nil {
slog.Error("invalid persona-file path", "error", err)
os.Exit(1)
}
persona, err = review.LoadPersona(resolvedPath)
if err != nil {
slog.Error("failed to load persona file", "file", *personaFile, "error", err)
os.Exit(1)
}
slog.Info("loaded persona from file", "file", *personaFile, "persona", persona.Name)
}
// NOTE: Persona loading deferred until after Gitea client init to support repo personas
// Validate reviewer-name: only safe characters allowed in sentinel
if err := validateReviewerName(*reviewerName); err != nil {
1
@@ -196,6 +174,41 @@ func main() {
ctx, cancel := context.WithTimeout(context.Background(), overallTimeout)
defer cancel()
// Load persona if specified (after Gitea client init to support repo personas)
var persona *review.Persona
if *personaName != "" {
// Try loading from repo first, then fall back to built-in
repoPersonas, err := review.LoadRepoPersonas(ctx, newGiteaClientAdapter(giteaClient), owner, repoName)
if err != nil {
slog.Warn("could not load repo personas", "repo", owner+"/"+repoName, "error", err)
Outdated
Review

[MINOR] When LoadRepoPersonas returns an error (non-404), the code logs a warning and continues with repoPersonas being nil (its zero value). The subsequent if p, ok := repoPersonas[*personaName] safely handles nil maps (map indexing on nil returns zero value), so this works correctly, but it's subtle. A reader might wonder if repoPersonas could cause a panic. Consider initializing repoPersonas to an empty map before the error check, or adding a brief comment that nil map indexing is safe in Go.

**[MINOR]** When `LoadRepoPersonas` returns an error (non-404), the code logs a warning and continues with `repoPersonas` being nil (its zero value). The subsequent `if p, ok := repoPersonas[*personaName]` safely handles nil maps (map indexing on nil returns zero value), so this works correctly, but it's subtle. A reader might wonder if `repoPersonas` could cause a panic. Consider initializing `repoPersonas` to an empty map before the error check, or adding a brief comment that nil map indexing is safe in Go.
Review

[MINOR] When LoadRepoPersonas returns an error, repoPersonas is nil. The comment correctly notes that map indexing on a nil map is safe in Go (returns zero value). However, err from LoadRepoPersonas is shadowed — the outer var persona *review.Persona block has no err declaration, so the err used in persona, err = review.LoadBuiltinPersona(...) on ~line 196 is an undeclared variable in the else branch. Check that the err variable is properly scoped here — it may need a short variable declaration (:=) in the else branch.

**[MINOR]** When `LoadRepoPersonas` returns an error, `repoPersonas` is nil. The comment correctly notes that map indexing on a nil map is safe in Go (returns zero value). However, `err` from `LoadRepoPersonas` is shadowed — the outer `var persona *review.Persona` block has no `err` declaration, so the `err` used in `persona, err = review.LoadBuiltinPersona(...)` on ~line 196 is an undeclared variable in the else branch. Check that the `err` variable is properly scoped here — it may need a short variable declaration (`:=`) in the else branch.
// Continue with built-in personas only
}
Outdated
Review

[MINOR] When LoadRepoPersonas returns an error, repoPersonas will be nil (not an empty map), but the subsequent repoPersonas[*personaName] map index on line ~187 is safe in Go (indexing a nil map returns the zero value, false). However, the err variable from LoadRepoPersonas is shadowed by the later persona, err = review.LoadBuiltinPersona(...) assignment at line ~193, which is correct since the outer err is already declared. The logic is sound but slightly subtle — consider using a blank identifier or a named variable repoErr to make the intent clearer.

**[MINOR]** When `LoadRepoPersonas` returns an error, `repoPersonas` will be nil (not an empty map), but the subsequent `repoPersonas[*personaName]` map index on line ~187 is safe in Go (indexing a nil map returns the zero value, false). However, the `err` variable from `LoadRepoPersonas` is shadowed by the later `persona, err = review.LoadBuiltinPersona(...)` assignment at line ~193, which is correct since the outer `err` is already declared. The logic is sound but slightly subtle — consider using a blank identifier or a named variable `repoErr` to make the intent clearer.
Review

[MINOR] When LoadRepoPersonas returns an error, repoPersonas is nil and the code falls through to repoPersonas[*personaName]. The comment correctly notes this is safe in Go (nil map read returns zero value), but the variable err is declared via repoPersonas, err := ... and then reassigned in the else branch with persona, err = review.LoadBuiltinPersona(...). This works due to Go's scoping rules but the comment about nil map safety would be clearer if placed directly above the map indexing line rather than after the Warn log.

**[MINOR]** When `LoadRepoPersonas` returns an error, `repoPersonas` is `nil` and the code falls through to `repoPersonas[*personaName]`. The comment correctly notes this is safe in Go (nil map read returns zero value), but the variable `err` is declared via `repoPersonas, err := ...` and then reassigned in the `else` branch with `persona, err = review.LoadBuiltinPersona(...)`. This works due to Go's scoping rules but the comment about nil map safety would be clearer if placed directly above the map indexing line rather than after the Warn log.
if p, ok := repoPersonas[*personaName]; ok {
persona = p
slog.Info("loaded repo persona", "persona", persona.Name, "display", persona.DisplayName, "repo", owner+"/"+repoName)
} else {
// Fall back to built-in
persona, err = review.LoadBuiltinPersona(*personaName)
if err != nil {
slog.Error("failed to load persona", "persona", *personaName, "error", err)
os.Exit(1)
}
slog.Info("loaded built-in persona", "persona", persona.Name, "display", persona.DisplayName)
}
} else if *personaFile != "" {
resolvedPath, err := validateWorkspacePath(*personaFile, "persona-file")
if err != nil {
slog.Error("invalid persona-file path", "error", err)
os.Exit(1)
}
persona, err = review.LoadPersona(resolvedPath)
if err != nil {
slog.Error("failed to load persona file", "file", *personaFile, "error", err)
os.Exit(1)
}
slog.Info("loaded persona from file", "file", *personaFile, "persona", persona.Name)
}
slog.Info("reviewing pull request", "pr", prNumber, "repo", fmt.Sprintf("%s/%s", owner, repoName))
// Step 1: Fetch PR metadata
@@ -783,3 +796,32 @@ func shouldSkipStaleReview(evaluatedSHA, currentSHA string) bool {
}
return evaluatedSHA != currentSHA
}
// giteaClientAdapter adapts gitea.Client to review.GiteaClient interface.
type giteaClientAdapter struct {
client *gitea.Client
}
func newGiteaClientAdapter(c *gitea.Client) *giteaClientAdapter {
return &giteaClientAdapter{client: c}
}
func (a *giteaClientAdapter) ListContents(ctx context.Context, owner, repo, path string) ([]review.ContentEntry, error) {
entries, err := a.client.ListContents(ctx, owner, repo, path)
if err != nil {
return nil, err
}
result := make([]review.ContentEntry, len(entries))
for i, e := range entries {
result[i] = review.ContentEntry{
Name: e.Name,
Path: e.Path,
Type: e.Type,
}
}
return result, nil
}
func (a *giteaClientAdapter) GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error) {
Review

[NIT] The giteaClientAdapter struct and its constructor/methods could live in a separate file (e.g., cmd/review-bot/adapter.go) to keep main.go focused, but this is a very minor organizational concern.

**[NIT]** The `giteaClientAdapter` struct and its constructor/methods could live in a separate file (e.g., `cmd/review-bot/adapter.go`) to keep `main.go` focused, but this is a very minor organizational concern.
return a.client.GetFileContent(ctx, owner, repo, filepath)
}
+7
View File
@@ -224,6 +224,13 @@ func checkYAMLDepth(node *yaml.Node, depth, maxDepth, maxNodes int, seen map[*ya
return nil
}
// ParsePersonaBytes parses persona data from bytes with a source label for errors.
// This is useful for parsing personas fetched from external sources (e.g., Gitea API)
// without requiring filesystem access. Format is detected by source extension.
func ParsePersonaBytes(data []byte, source string) (*Persona, error) {
return parsePersona(data, source)
Review

[NIT] ParsePersonaBytes delegates to parsePersona, which is fine, but does not apply any size checks like LoadPersona. Consider documenting that size limits are only enforced for filesystem sources, or add a size check for external sources if appropriate.

**[NIT]** ParsePersonaBytes delegates to parsePersona, which is fine, but does not apply any size checks like LoadPersona. Consider documenting that size limits are only enforced for filesystem sources, or add a size check for external sources if appropriate.
}
func validatePersona(p *Persona, source string) error {
if p.Name == "" {
return fmt.Errorf("persona %s: name is required", source)
+138
View File
@@ -0,0 +1,138 @@
package review
import (
"context"
"log/slog"
"strings"
)
// RepoPersonaPath is the directory path where repo-specific personas are stored.
const RepoPersonaPath = ".review-bot/personas"
// GiteaClient defines the subset of gitea.Client methods needed for loading repo personas.
// This interface allows for easier testing and decouples the review package from gitea.
Review

[NIT] The GiteaClient interface name is somewhat generic for a package named review. Since this interface is exported and lives in review, callers see it as review.GiteaClient. A name like RepoFileClient or ContentClient would be more descriptive of what it actually does (list/get file contents) rather than being named after a specific vendor.

**[NIT]** The `GiteaClient` interface name is somewhat generic for a package named `review`. Since this interface is exported and lives in `review`, callers see it as `review.GiteaClient`. A name like `RepoFileClient` or `ContentClient` would be more descriptive of what it actually does (list/get file contents) rather than being named after a specific vendor.
type GiteaClient interface {
ListContents(ctx context.Context, owner, repo, path string) ([]ContentEntry, error)
GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error)
}
// ContentEntry represents a file or directory entry from the contents API.
// This mirrors gitea.ContentEntry to avoid import cycles.
type ContentEntry struct {
Name string `json:"name"`
Path string `json:"path"`
Type string `json:"type"` // "file" or "dir"
}
// LoadRepoPersonas fetches personas from a repository's .review-bot/personas/ directory.
// Returns an empty map (not nil) if the directory doesn't exist or is empty.
// Individual parse failures are logged and skipped; the remaining personas are still returned.
// Auth errors and other non-404 errors are propagated.
func LoadRepoPersonas(ctx context.Context, client GiteaClient, owner, repo string) (map[string]*Persona, error) {
result := make(map[string]*Persona)
entries, err := client.ListContents(ctx, owner, repo, RepoPersonaPath)
if err != nil {
// Check if this is a 404 (directory doesn't exist) - expected case
if isNotFoundError(err) {
slog.Debug("no repo personas directory found", "repo", owner+"/"+repo)
return result, nil
}
// Other errors (auth, server) should propagate
return nil, err
}
if len(entries) == 0 {
Review

[MINOR] Unbounded number of files processed: the loader iterates over all entries in .review-bot/personas and fetches each YAML file. A repository could include a very large number of small files to cause many network requests and parses, leading to CPU and memory pressure. Consider imposing a reasonable cap on the number of persona files processed.

**[MINOR]** Unbounded number of files processed: the loader iterates over all entries in .review-bot/personas and fetches each YAML file. A repository could include a very large number of small files to cause many network requests and parses, leading to CPU and memory pressure. Consider imposing a reasonable cap on the number of persona files processed.
slog.Debug("repo personas directory is empty", "repo", owner+"/"+repo)
return result, nil
}
for _, entry := range entries {
if entry.Type != "file" {
Review

[NIT] LoadRepoPersonas logs warnings for per-file fetch/parse failures inside the library package. Consider returning these details (e.g., as a slice of skipped files) and letting callers decide how to log to reduce potential log noise from dependencies.

**[NIT]** LoadRepoPersonas logs warnings for per-file fetch/parse failures inside the library package. Consider returning these details (e.g., as a slice of skipped files) and letting callers decide how to log to reduce potential log noise from dependencies.
Review

[MINOR] LoadRepoPersonas iterates and fetches every YAML file under .review-bot/personas without a per-repo cap. A malicious repo could place a very large number of small YAML files to increase API calls and parsing, causing resource usage spikes within the time budget. Consider short-circuiting when a specific persona is requested (e.g., try .yaml/.yml first) or enforcing a reasonable maximum number of files processed.

**[MINOR]** LoadRepoPersonas iterates and fetches every YAML file under .review-bot/personas without a per-repo cap. A malicious repo could place a very large number of small YAML files to increase API calls and parsing, causing resource usage spikes within the time budget. Consider short-circuiting when a specific persona is requested (e.g., try <name>.yaml/.yml first) or enforcing a reasonable maximum number of files processed.
continue
}
// Only process YAML files
Review

[MAJOR] Unbounded repo persona file size: content fetched from the repository is read into memory without any size limit before parsing. An attacker can commit an extremely large YAML file under .review-bot/personas to cause excessive memory usage or OOM. Unlike LoadPersona (which enforces MaxPersonaFileSize), repo personas have no such cap.

**[MAJOR]** Unbounded repo persona file size: content fetched from the repository is read into memory without any size limit before parsing. An attacker can commit an extremely large YAML file under .review-bot/personas to cause excessive memory usage or OOM. Unlike LoadPersona (which enforces MaxPersonaFileSize), repo personas have no such cap.
if !isYAMLFile(entry.Name) {
continue
}
content, err := client.GetFileContent(ctx, owner, repo, entry.Path)
if err != nil {
slog.Warn("could not fetch repo persona file",
"file", entry.Path,
"repo", owner+"/"+repo,
Review

[MAJOR] Repo persona content is parsed without enforcing a maximum size. An attacker can commit an excessively large YAML persona (e.g., single huge scalar) to the repo, causing high memory/CPU usage when the bot fetches and parses it. Unlike LoadPersona() (filesystem), LoadRepoPersonas()/ParsePersonaBytes() do not check MaxPersonaFileSize, enabling a DoS vector.

**[MAJOR]** Repo persona content is parsed without enforcing a maximum size. An attacker can commit an excessively large YAML persona (e.g., single huge scalar) to the repo, causing high memory/CPU usage when the bot fetches and parses it. Unlike LoadPersona() (filesystem), LoadRepoPersonas()/ParsePersonaBytes() do not check MaxPersonaFileSize, enabling a DoS vector.
"error", err)
Review

[MINOR] LoadRepoPersonas parses repo personas via ParsePersonaBytes without enforcing MaxPersonaFileSize. A very large YAML file with few nodes (e.g., a huge identity string) could still be fetched and parsed, potentially increasing memory usage. Consider checking len(content) against MaxPersonaFileSize before parsing.

**[MINOR]** LoadRepoPersonas parses repo personas via ParsePersonaBytes without enforcing MaxPersonaFileSize. A very large YAML file with few nodes (e.g., a huge identity string) could still be fetched and parsed, potentially increasing memory usage. Consider checking len(content) against MaxPersonaFileSize before parsing.
continue
}
persona, err := ParsePersonaBytes([]byte(content), entry.Path)
if err != nil {
slog.Warn("could not parse repo persona file",
Review

[MINOR] Persona file size is validated only after fetching full content from the server. An attacker with write access to the default branch could commit very large YAML files and cause increased memory usage when GetFileContent returns them. Consider adding a cap on the number of files processed and/or a total bytes budget, and ensure server/client-side limits or streaming are used where possible.

**[MINOR]** Persona file size is validated only after fetching full content from the server. An attacker with write access to the default branch could commit very large YAML files and cause increased memory usage when GetFileContent returns them. Consider adding a cap on the number of files processed and/or a total bytes budget, and ensure server/client-side limits or streaming are used where possible.
"file", entry.Path,
"repo", owner+"/"+repo,
"error", err)
continue
}
result[persona.Name] = persona
slog.Debug("loaded repo persona",
"name", persona.Name,
"file", entry.Path,
Review

[NIT] MergePersonas is defined but not used in main.go — the main flow loads repo personas and then falls back to built-in individually, rather than merging all of them first. MergePersonas is a useful utility and is tested, but consider whether it should be used in main (load all built-ins + all repo personas into a merged map, then look up) for consistency and simplicity. As-is, the main flow only fetches repo personas on every invocation when --persona is specified, even though the built-in fallback path doesn't benefit from pre-loading all built-ins.

**[NIT]** `MergePersonas` is defined but not used in `main.go` — the main flow loads repo personas and then falls back to built-in individually, rather than merging all of them first. `MergePersonas` is a useful utility and is tested, but consider whether it should be used in main (load all built-ins + all repo personas into a merged map, then look up) for consistency and simplicity. As-is, the main flow only fetches repo personas on every invocation when `--persona` is specified, even though the built-in fallback path doesn't benefit from pre-loading all built-ins.
"repo", owner+"/"+repo)
}
return result, nil
}
Review

[NIT] MergePersonas is defined but never called in main.go — the main flow only calls LoadRepoPersonas and does a direct map lookup. MergePersonas and GetBuiltinPersonasMap are exported utility functions that are tested but unused by the current main flow. This is fine as they're part of the public API for future use, but worth noting in a follow-up if they remain unused.

**[NIT]** `MergePersonas` is defined but never called in `main.go` — the main flow only calls `LoadRepoPersonas` and does a direct map lookup. `MergePersonas` and `GetBuiltinPersonasMap` are exported utility functions that are tested but unused by the current main flow. This is fine as they're part of the public API for future use, but worth noting in a follow-up if they remain unused.
// MergePersonas combines built-in personas with repo personas.
// Repo personas take precedence on name collision.
// Returns a new map; inputs are not modified.
func MergePersonas(builtin, repo map[string]*Persona) map[string]*Persona {
result := make(map[string]*Persona, len(builtin)+len(repo))
// Copy built-in personas first
for name, p := range builtin {
result[name] = p
}
// Overlay repo personas (override on collision)
for name, p := range repo {
if _, exists := result[name]; exists {
slog.Debug("repo persona overrides built-in", "name", name)
}
result[name] = p
}
Review

[MINOR] isNotFoundError uses a broad substring match ("HTTP 404" or "not found"). This can misclassify non-404 errors that contain "not found" (e.g., upstream message wording) and silently skip persona loading. While not directly exploitable, it can mask auth or transport errors.

**[MINOR]** isNotFoundError uses a broad substring match ("HTTP 404" or "not found"). This can misclassify non-404 errors that contain "not found" (e.g., upstream message wording) and silently skip persona loading. While not directly exploitable, it can mask auth or transport errors.
return result
}
// GetBuiltinPersonasMap returns all built-in personas as a map keyed by name.
Review

[MINOR] isNotFoundError relies on substring matching of error messages ("HTTP 404" or "not found"), which is brittle. Prefer a typed error with errors.Is or have the adapter normalize 404s to a sentinel error where possible.

**[MINOR]** isNotFoundError relies on substring matching of error messages ("HTTP 404" or "not found"), which is brittle. Prefer a typed error with errors.Is or have the adapter normalize 404s to a sentinel error where possible.
// Returns an empty map (not nil) if loading fails.
Review

[MINOR] LoadRepoPersonas filters to YAML files only via isYAMLFile, but ParsePersonaBytes supports JSON based on extension. Consider accepting .json in repo personas for consistency and user convenience.

**[MINOR]** LoadRepoPersonas filters to YAML files only via isYAMLFile, but ParsePersonaBytes supports JSON based on extension. Consider accepting .json in repo personas for consistency and user convenience.
func GetBuiltinPersonasMap() map[string]*Persona {
result := make(map[string]*Persona)
for _, name := range ListBuiltinPersonas() {
p, err := LoadBuiltinPersona(name)
if err != nil {
slog.Warn("could not load built-in persona", "name", name, "error", err)
Review

[MINOR] isNotFoundError relies on substring matching "HTTP 404" in error messages, which is brittle and coupled to the client's error text. Consider a more robust approach (typed errors or explicit status code) if possible.

**[MINOR]** isNotFoundError relies on substring matching "HTTP 404" in error messages, which is brittle and coupled to the client's error text. Consider a more robust approach (typed errors or explicit status code) if possible.
continue
}
result[name] = p
}
return result
Review

[NIT] isNotFoundError uses broad string matching ("not found") which may incorrectly classify unrelated errors as 404s. If possible, narrow the check (e.g., look for an API error type or more specific message prefix) to reduce false positives.

**[NIT]** isNotFoundError uses broad string matching ("not found") which may incorrectly classify unrelated errors as 404s. If possible, narrow the check (e.g., look for an API error type or more specific message prefix) to reduce false positives.
}
// isYAMLFile checks if a filename has a YAML extension.
func isYAMLFile(name string) bool {
lower := strings.ToLower(name)
return strings.HasSuffix(lower, ".yaml") || strings.HasSuffix(lower, ".yml")
}
Review

[MINOR] isNotFoundError uses string-based heuristics ("not found" case-insensitive substring match). This could produce false positives — e.g., an error message like "operation not found in registry" from a completely different layer would be treated as a 404. The comment acknowledges this is a heuristic, which is acceptable given the import cycle constraint, but consider wrapping gitea errors with a typed sentinel or checking only for the more specific "HTTP 404" prefix to reduce false positives.

**[MINOR]** `isNotFoundError` uses string-based heuristics (`"not found"` case-insensitive substring match). This could produce false positives — e.g., an error message like `"operation not found in registry"` from a completely different layer would be treated as a 404. The comment acknowledges this is a heuristic, which is acceptable given the import cycle constraint, but consider wrapping gitea errors with a typed sentinel or checking only for the more specific `"HTTP 404"` prefix to reduce false positives.
// isNotFoundError checks if an error represents a 404 response.
Review

[MINOR] isNotFoundError uses string-substring matching on error messages (strings.Contains(err.Error(), "HTTP 404")). This is fragile and couples the review package to the specific error format produced by the gitea package. A more robust approach would be to define a sentinel error type (e.g., ErrNotFound) in the gitea package and use errors.Is here, or to have the GiteaClient interface return a typed error. The current approach is documented and tested, so it's functional, but it's a design smell.

**[MINOR]** `isNotFoundError` uses string-substring matching on error messages (`strings.Contains(err.Error(), "HTTP 404")`). This is fragile and couples the review package to the specific error format produced by the gitea package. A more robust approach would be to define a sentinel error type (e.g., `ErrNotFound`) in the gitea package and use `errors.Is` here, or to have the `GiteaClient` interface return a typed error. The current approach is documented and tested, so it's functional, but it's a design smell.
// This is a string-based heuristic since we don't have access to gitea.APIError here.
func isNotFoundError(err error) bool {
if err == nil {
Review

[NIT] isNotFoundError uses a string-based heuristic (strings.Contains(errStr, "not found")) that could produce false positives for errors like "user not found" from application logic. The HTTP 404 check is precise; the bare not found check is looser. This is acknowledged in the comment but worth considering whether to tighten to only the HTTP 404 pattern.

**[NIT]** `isNotFoundError` uses a string-based heuristic (`strings.Contains(errStr, "not found")`) that could produce false positives for errors like "user not found" from application logic. The `HTTP 404` check is precise; the bare `not found` check is looser. This is acknowledged in the comment but worth considering whether to tighten to only the `HTTP 404` pattern.
return false
}
errStr := err.Error()
return strings.Contains(errStr, "HTTP 404") || strings.Contains(errStr, "not found")
}
+410
View File
@@ -0,0 +1,410 @@
package review
import (
"context"
"errors"
"strings"
"testing"
)
func TestParsePersonaBytes(t *testing.T) {
tests := []struct {
name string
data string
Review

[NIT] Minor style: extra blank line in the TestParsePersonaBytes test struct definition between wantName and wantErr fields. This is inconsistent with the other test structs in the file and would be flagged by gofmt.

**[NIT]** Minor style: extra blank line in the `TestParsePersonaBytes` test struct definition between `wantName` and `wantErr` fields. This is inconsistent with the other test structs in the file and would be flagged by `gofmt`.
source string
wantName string
wantErr string
Review

[NIT] Minor style nit: the struct literal in TestParsePersonaBytes has extra blank lines in field alignment (name, data, source, wantName, wantErr). This doesn't affect correctness but is inconsistent with Go's gofmt style.

**[NIT]** Minor style nit: the struct literal in `TestParsePersonaBytes` has extra blank lines in field alignment (`name`, `data`, `source`, `wantName`, `wantErr`). This doesn't affect correctness but is inconsistent with Go's `gofmt` style.
}{
{
Review

[NIT] Minor formatting inconsistency: the TestParsePersonaBytes table struct has inconsistent field alignment (extra blank line between source and wantName fields in some entries). This is cosmetic and gofmt should handle it, but it's visible in the raw source.

**[NIT]** Minor formatting inconsistency: the `TestParsePersonaBytes` table struct has inconsistent field alignment (extra blank line between `source` and `wantName` fields in some entries). This is cosmetic and `gofmt` should handle it, but it's visible in the raw source.
name: "valid yaml",
Review

[NIT] Trailing whitespace on the struct field alignment lines (extra spaces between name and string field definitions). Minor formatting issue — gofmt would normalize this.

**[NIT]** Trailing whitespace on the struct field alignment lines (extra spaces between `name` and `string` field definitions). Minor formatting issue — `gofmt` would normalize this.
data: `name: test
identity: test identity
focus:
- testing
`,
source: "test.yaml",
wantName: "test",
},
{
name: "missing name",
data: "identity: test\n",
source: "test.yaml",
wantErr: "name is required",
},
{
name: "invalid yaml",
data: "not: valid:\n yaml: [broken",
source: "test.yaml",
wantErr: "parse",
},
{
name: "json format by extension",
data: `{"name": "jsontest", "identity": "json identity"}`,
source: "test.json",
wantName: "jsontest",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p, err := ParsePersonaBytes([]byte(tt.data), tt.source)
if tt.wantErr != "" {
if err == nil {
t.Fatalf("expected error containing %q, got nil", tt.wantErr)
}
if !strings.Contains(err.Error(), tt.wantErr) {
t.Errorf("error = %q, want containing %q", err.Error(), tt.wantErr)
}
return
}
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if p.Name != tt.wantName {
t.Errorf("Name = %q, want %q", p.Name, tt.wantName)
}
})
}
}
// mockGiteaClient implements GiteaClient for testing.
type mockGiteaClient struct {
contents map[string][]ContentEntry // path -> entries
files map[string]string // path -> content
listErr error
fileErr map[string]error // path -> error
}
func (m *mockGiteaClient) ListContents(ctx context.Context, owner, repo, path string) ([]ContentEntry, error) {
if m.listErr != nil {
return nil, m.listErr
}
entries, ok := m.contents[path]
if !ok {
return nil, errors.New("list contents .review-bot/personas: HTTP 404: not found")
}
return entries, nil
}
func (m *mockGiteaClient) GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error) {
if m.fileErr != nil {
if err, ok := m.fileErr[filepath]; ok {
return "", err
}
}
content, ok := m.files[filepath]
if !ok {
return "", errors.New("HTTP 404: file not found")
}
return content, nil
}
func TestLoadRepoPersonas(t *testing.T) {
ctx := context.Background()
t.Run("directory not found returns empty map", func(t *testing.T) {
client := &mockGiteaClient{} // No contents configured -> 404
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if personas == nil {
t.Error("expected empty map, got nil")
}
if len(personas) != 0 {
t.Errorf("expected 0 personas, got %d", len(personas))
}
})
t.Run("empty directory returns empty map", func(t *testing.T) {
client := &mockGiteaClient{
contents: map[string][]ContentEntry{
RepoPersonaPath: {},
},
}
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(personas) != 0 {
t.Errorf("expected 0 personas, got %d", len(personas))
}
})
t.Run("loads valid personas", func(t *testing.T) {
client := &mockGiteaClient{
contents: map[string][]ContentEntry{
RepoPersonaPath: {
{Name: "trading.yaml", Path: ".review-bot/personas/trading.yaml", Type: "file"},
{Name: "crypto.yaml", Path: ".review-bot/personas/crypto.yaml", Type: "file"},
},
},
files: map[string]string{
".review-bot/personas/trading.yaml": `name: trading
display_name: Trading Expert
identity: You are a trading expert.
focus:
- order handling
- risk management
`,
".review-bot/personas/crypto.yaml": `name: crypto
display_name: Crypto Expert
identity: You are a cryptography expert.
focus:
- key management
- encryption
`,
},
}
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(personas) != 2 {
t.Fatalf("expected 2 personas, got %d", len(personas))
}
if personas["trading"] == nil {
t.Error("expected trading persona")
}
if personas["crypto"] == nil {
t.Error("expected crypto persona")
}
if personas["trading"].DisplayName != "Trading Expert" {
t.Errorf("trading display name = %q, want %q", personas["trading"].DisplayName, "Trading Expert")
}
})
t.Run("skips invalid persona files", func(t *testing.T) {
client := &mockGiteaClient{
contents: map[string][]ContentEntry{
RepoPersonaPath: {
{Name: "valid.yaml", Path: ".review-bot/personas/valid.yaml", Type: "file"},
{Name: "invalid.yaml", Path: ".review-bot/personas/invalid.yaml", Type: "file"},
},
},
files: map[string]string{
".review-bot/personas/valid.yaml": `name: valid
identity: Valid persona
`,
".review-bot/personas/invalid.yaml": "not valid yaml: [broken",
},
}
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// Should have the valid one, skip the invalid
if len(personas) != 1 {
t.Fatalf("expected 1 persona (skipped invalid), got %d", len(personas))
}
if personas["valid"] == nil {
t.Error("expected valid persona")
}
})
t.Run("skips non-yaml files", func(t *testing.T) {
client := &mockGiteaClient{
contents: map[string][]ContentEntry{
RepoPersonaPath: {
{Name: "persona.yaml", Path: ".review-bot/personas/persona.yaml", Type: "file"},
{Name: "README.md", Path: ".review-bot/personas/README.md", Type: "file"},
{Name: "notes.txt", Path: ".review-bot/personas/notes.txt", Type: "file"},
},
},
files: map[string]string{
".review-bot/personas/persona.yaml": `name: test
identity: Test persona
`,
".review-bot/personas/README.md": "# Personas\n\nPut your personas here.",
},
}
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(personas) != 1 {
t.Fatalf("expected 1 persona (yaml only), got %d", len(personas))
}
})
t.Run("skips subdirectories", func(t *testing.T) {
client := &mockGiteaClient{
contents: map[string][]ContentEntry{
RepoPersonaPath: {
{Name: "persona.yaml", Path: ".review-bot/personas/persona.yaml", Type: "file"},
{Name: "subdir", Path: ".review-bot/personas/subdir", Type: "dir"},
},
},
files: map[string]string{
".review-bot/personas/persona.yaml": `name: test
identity: Test persona
`,
},
}
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(personas) != 1 {
t.Fatalf("expected 1 persona (files only), got %d", len(personas))
}
})
t.Run("propagates auth errors", func(t *testing.T) {
client := &mockGiteaClient{
listErr: errors.New("HTTP 401: unauthorized"),
}
_, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err == nil {
t.Fatal("expected error for auth failure")
}
if !strings.Contains(err.Error(), "401") {
t.Errorf("error = %q, want containing '401'", err.Error())
}
})
t.Run("skips files that fail to fetch", func(t *testing.T) {
client := &mockGiteaClient{
contents: map[string][]ContentEntry{
RepoPersonaPath: {
{Name: "good.yaml", Path: ".review-bot/personas/good.yaml", Type: "file"},
{Name: "bad.yaml", Path: ".review-bot/personas/bad.yaml", Type: "file"},
},
},
files: map[string]string{
".review-bot/personas/good.yaml": `name: good
identity: Good persona
`,
},
fileErr: map[string]error{
".review-bot/personas/bad.yaml": errors.New("HTTP 500: internal server error"),
},
}
personas, err := LoadRepoPersonas(ctx, client, "owner", "repo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(personas) != 1 {
t.Fatalf("expected 1 persona (skipped failed fetch), got %d", len(personas))
}
})
}
func TestMergePersonas(t *testing.T) {
builtin := map[string]*Persona{
"security": {Name: "security", Identity: "Built-in security"},
"docs": {Name: "docs", Identity: "Built-in docs"},
}
repo := map[string]*Persona{
"security": {Name: "security", Identity: "Repo security override"},
"trading": {Name: "trading", Identity: "Repo trading"},
}
merged := MergePersonas(builtin, repo)
t.Run("repo overrides builtin on collision", func(t *testing.T) {
if merged["security"].Identity != "Repo security override" {
t.Errorf("security identity = %q, want repo override", merged["security"].Identity)
}
})
t.Run("builtin preserved when no collision", func(t *testing.T) {
if merged["docs"].Identity != "Built-in docs" {
t.Errorf("docs identity = %q, want built-in", merged["docs"].Identity)
}
})
t.Run("repo-only persona added", func(t *testing.T) {
if merged["trading"] == nil {
t.Error("expected trading persona from repo")
}
if merged["trading"].Identity != "Repo trading" {
t.Errorf("trading identity = %q, want repo", merged["trading"].Identity)
}
})
t.Run("original maps not modified", func(t *testing.T) {
if builtin["trading"] != nil {
t.Error("builtin map was modified")
}
if len(repo) != 2 {
t.Error("repo map was modified")
}
})
}
func TestGetBuiltinPersonasMap(t *testing.T) {
personas := GetBuiltinPersonasMap()
if len(personas) == 0 {
t.Fatal("expected at least one built-in persona")
}
// Verify expected personas exist
expected := []string{"security", "architect", "docs"}
for _, name := range expected {
if personas[name] == nil {
t.Errorf("expected built-in persona %q", name)
}
}
// Verify personas are valid
for name, p := range personas {
if p.Name != name {
t.Errorf("persona %q has mismatched name %q", name, p.Name)
}
if p.Identity == "" {
t.Errorf("persona %q has empty identity", name)
}
}
}
func TestIsYAMLFile(t *testing.T) {
tests := []struct {
name string
want bool
}{
{"test.yaml", true},
{"test.yml", true},
{"test.YAML", true},
{"test.YML", true},
{"test.json", false},
{"test.md", false},
{"test.txt", false},
{"yaml", false},
{"yaml.md", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := isYAMLFile(tt.name); got != tt.want {
t.Errorf("isYAMLFile(%q) = %v, want %v", tt.name, got, tt.want)
}
})
}
}
func TestIsNotFoundError(t *testing.T) {
tests := []struct {
err error
want bool
}{
{nil, false},
{errors.New("HTTP 404: not found"), true},
{errors.New("something not found"), true},
{errors.New("HTTP 401: unauthorized"), false},
{errors.New("connection refused"), false},
}
for _, tt := range tests {
name := "nil"
if tt.err != nil {
name = tt.err.Error()
}
t.Run(name, func(t *testing.T) {
if got := isNotFoundError(tt.err); got != tt.want {
t.Errorf("isNotFoundError(%v) = %v, want %v", tt.err, got, tt.want)
}
})
}
}