Files
review-bot/review/remote_persona.go
T
Rodin 27a9be38bc fix: address PR #63 review findings
1. Refactor err2 to use scoped loadErr variable (MINOR - sonnet-review-bot)
   The else-if branches are mutually exclusive, so the error variable
   should be scoped inside the block, not declared outside with err2.

2. Sanitize DisplayName before embedding in Markdown (MINOR - security-review-bot)
   Remote persona metadata is untrusted. Added sanitizeMarkdownText() to
   escape Markdown special characters and strip control characters.
   Applied to both the header title and the footer attribution.

3. Document YAML DoS mitigations (MINOR - security-review-bot)
   Added comprehensive comment in remote_persona.go explaining existing
   defenses: file size limit, file count cap, depth limit, node count cap,
   and alias cycle detection. These collectively mitigate billion-laughs
   and stack exhaustion attacks.
2026-05-10 20:54:20 -07:00

172 lines
5.8 KiB
Go

package review
import (
"context"
"fmt"
"log/slog"
"sort"
"strings"
)
// PersonaFetcher abstracts fetching files from a remote repository.
// This allows persona loading to work with any Git host API.
type PersonaFetcher interface {
// ListContents returns file/directory entries at a path.
// Returns an error if the path doesn't exist or isn't accessible.
ListContents(ctx context.Context, owner, repo, path string) ([]ContentEntry, error)
// GetFileContent returns the raw content of a file from the default branch.
GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error)
}
// ContentEntry represents a file or directory entry.
type ContentEntry struct {
Name string // filename or directory name
Path string // full path from repo root
Type string // "file" or "dir"
}
// DefaultPersonasPath is the conventional location for repo-specific personas.
const DefaultPersonasPath = ".review-bot/personas"
// LoadRemotePersonas fetches personas from a remote repository's .review-bot/personas/ directory.
// Returns a map of persona name to Persona. If the directory doesn't exist or is empty,
// returns an empty map with no error (graceful fallback to built-in personas).
//
// Files larger than MaxPersonaFileSize are logged and skipped.
// Invalid YAML files are logged and skipped (partial success model).
// Only .yaml and .yml files are processed; other files are ignored.
func LoadRemotePersonas(ctx context.Context, fetcher PersonaFetcher, owner, repo string) (map[string]*Persona, error) {
return LoadRemotePersonasFromPath(ctx, fetcher, owner, repo, DefaultPersonasPath)
}
// LoadRemotePersonasFromPath loads personas from a custom path in a remote repository.
// It behaves the same as LoadRemotePersonas but allows specifying a path other than
// the default .review-bot/personas directory.
func LoadRemotePersonasFromPath(ctx context.Context, fetcher PersonaFetcher, owner, repo, path string) (map[string]*Persona, error) {
entries, err := fetcher.ListContents(ctx, owner, repo, path)
if err != nil {
// 404 is expected when repo doesn't have personas — return empty, not error
if isNotFoundError(err) {
slog.Debug("no remote personas directory found", "repo", fmt.Sprintf("%s/%s", owner, repo), "path", path)
return map[string]*Persona{}, nil
}
return nil, fmt.Errorf("list remote personas: %w", err)
}
// Cap the number of files to process to prevent resource exhaustion
// from repos with thousands of small files.
const maxPersonaFiles = 50
result := make(map[string]*Persona)
processed := 0
for _, entry := range entries {
if processed >= maxPersonaFiles {
slog.Warn("persona file limit reached", "limit", maxPersonaFiles, "repo", fmt.Sprintf("%s/%s", owner, repo))
break
}
if ctx.Err() != nil {
return nil, ctx.Err()
}
// Skip directories and non-YAML files
if entry.Type != "file" {
continue
}
if !isYAMLFile(entry.Name) {
continue
}
content, err := fetcher.GetFileContent(ctx, owner, repo, entry.Path)
if err != nil {
slog.Warn("could not fetch remote persona file", "file", entry.Path, "error", err)
continue
}
// Check size before parsing (defense in depth)
if len(content) > MaxPersonaFileSize {
slog.Warn("remote persona file exceeds size limit", "file", entry.Path, "size", len(content), "limit", MaxPersonaFileSize)
continue
}
// YAML parsing uses parsePersona which has defenses against YAML DoS attacks:
// - MaxPersonaFileSize (above) caps raw input size before any parsing
// - maxPersonaFiles (above) limits the number of files processed per repo
// - unmarshalYAMLWithDepthLimit enforces MaxYAMLDepth to prevent stack exhaustion
// - checkYAMLDepth tracks node counts (MaxYAMLNodes) against "billion laughs" expansion
// - Alias cycles are detected and capped by seen-node tracking
// See persona.go for the implementation details.
persona, err := parsePersona([]byte(content), entry.Path)
if err != nil {
slog.Warn("could not parse remote persona file", "file", entry.Path, "error", err)
continue
}
result[persona.Name] = persona
processed++
slog.Debug("loaded remote persona", "name", persona.Name, "file", entry.Path)
}
return result, nil
}
// MergePersonas combines remote and built-in personas.
// Remote personas take precedence on name collision.
// Returns the merged map and a list of persona names in sorted order.
func MergePersonas(remote, builtin map[string]*Persona) (map[string]*Persona, []string) {
merged := make(map[string]*Persona)
// Add built-in first
for name, p := range builtin {
merged[name] = p
}
// Remote overrides built-in on collision
for name, p := range remote {
if _, exists := merged[name]; exists {
slog.Debug("remote persona overrides built-in", "name", name)
}
merged[name] = p
}
// Collect sorted names
names := make([]string, 0, len(merged))
for name := range merged {
names = append(names, name)
}
sort.Strings(names)
return merged, names
}
// LoadAllBuiltinPersonas loads all built-in personas into a map.
func LoadAllBuiltinPersonas() map[string]*Persona {
result := make(map[string]*Persona)
for _, name := range ListBuiltinPersonas() {
p, err := LoadBuiltinPersona(name)
if err != nil {
slog.Warn("could not load built-in persona", "name", name, "error", err)
continue
}
result[name] = p
}
return result
}
// isYAMLFile returns true if the filename has a YAML extension.
func isYAMLFile(name string) bool {
lower := strings.ToLower(name)
return strings.HasSuffix(lower, ".yaml") || strings.HasSuffix(lower, ".yml")
}
// isNotFoundError checks if an error indicates a 404 response.
// This is a simple string check to avoid importing the gitea package
// (which would create a circular dependency).
func isNotFoundError(err error) bool {
if err == nil {
return false
}
errStr := err.Error()
return strings.Contains(errStr, "HTTP 404")
}