package review import ( "context" "fmt" "log/slog" "sort" "strings" ) // PersonaFetcher abstracts fetching files from a remote repository. // This allows persona loading to work with any Git host API. type PersonaFetcher interface { // ListContents returns file/directory entries at a path. // Returns an error if the path doesn't exist or isn't accessible. ListContents(ctx context.Context, owner, repo, path string) ([]ContentEntry, error) // GetFileContent returns the raw content of a file from the default branch. GetFileContent(ctx context.Context, owner, repo, filepath string) (string, error) } // ContentEntry represents a file or directory entry. type ContentEntry struct { Name string // filename or directory name Path string // full path from repo root Type string // "file" or "dir" } // DefaultPersonasPath is the conventional location for repo-specific personas. const DefaultPersonasPath = ".review-bot/personas" // LoadRemotePersonas fetches personas from a remote repository's .review-bot/personas/ directory. // Returns a map of persona name to Persona. If the directory doesn't exist or is empty, // returns an empty map with no error (graceful fallback to built-in personas). // // Files larger than MaxPersonaFileSize are logged and skipped. // Invalid YAML files are logged and skipped (partial success model). // Only .yaml and .yml files are processed; other files are ignored. func LoadRemotePersonas(ctx context.Context, fetcher PersonaFetcher, owner, repo string) (map[string]*Persona, error) { return LoadRemotePersonasFromPath(ctx, fetcher, owner, repo, DefaultPersonasPath) } // LoadRemotePersonasFromPath loads personas from a custom path in a remote repository. // It behaves the same as LoadRemotePersonas but allows specifying a path other than // the default .review-bot/personas directory. func LoadRemotePersonasFromPath(ctx context.Context, fetcher PersonaFetcher, owner, repo, path string) (map[string]*Persona, error) { entries, err := fetcher.ListContents(ctx, owner, repo, path) if err != nil { // 404 is expected when repo doesn't have personas — return empty, not error if isNotFoundError(err) { slog.Debug("no remote personas directory found", "repo", fmt.Sprintf("%s/%s", owner, repo), "path", path) return map[string]*Persona{}, nil } return nil, fmt.Errorf("list remote personas: %w", err) } // Cap the number of files to process to prevent resource exhaustion // from repos with thousands of small files. const maxPersonaFiles = 50 result := make(map[string]*Persona) processed := 0 for _, entry := range entries { if processed >= maxPersonaFiles { slog.Warn("persona file limit reached", "limit", maxPersonaFiles, "repo", fmt.Sprintf("%s/%s", owner, repo)) break } if ctx.Err() != nil { return nil, ctx.Err() } // Skip directories and non-YAML files if entry.Type != "file" { continue } if !isYAMLFile(entry.Name) { continue } content, err := fetcher.GetFileContent(ctx, owner, repo, entry.Path) if err != nil { slog.Warn("could not fetch remote persona file", "file", entry.Path, "error", err) continue } // Check size before parsing (defense in depth) if len(content) > MaxPersonaFileSize { slog.Warn("remote persona file exceeds size limit", "file", entry.Path, "size", len(content), "limit", MaxPersonaFileSize) continue } // YAML parsing uses parsePersona which has defenses against YAML DoS attacks: // - MaxPersonaFileSize (above) caps raw input size before any parsing // - maxPersonaFiles (above) limits the number of files processed per repo // - unmarshalYAMLWithDepthLimit enforces MaxYAMLDepth to prevent stack exhaustion // - checkYAMLDepth tracks node counts (MaxYAMLNodes) against "billion laughs" expansion // - Alias cycles are detected and capped by seen-node tracking // See persona.go for the implementation details. persona, err := parsePersona([]byte(content), entry.Path) if err != nil { slog.Warn("could not parse remote persona file", "file", entry.Path, "error", err) continue } result[persona.Name] = persona processed++ slog.Debug("loaded remote persona", "name", persona.Name, "file", entry.Path) } return result, nil } // MergePersonas combines remote and built-in personas. // Remote personas take precedence on name collision. // Returns the merged map and a list of persona names in sorted order. func MergePersonas(remote, builtin map[string]*Persona) (map[string]*Persona, []string) { merged := make(map[string]*Persona) // Add built-in first for name, p := range builtin { merged[name] = p } // Remote overrides built-in on collision for name, p := range remote { if _, exists := merged[name]; exists { slog.Debug("remote persona overrides built-in", "name", name) } merged[name] = p } // Collect sorted names names := make([]string, 0, len(merged)) for name := range merged { names = append(names, name) } sort.Strings(names) return merged, names } // LoadAllBuiltinPersonas loads all built-in personas into a map. func LoadAllBuiltinPersonas() map[string]*Persona { result := make(map[string]*Persona) for _, name := range ListBuiltinPersonas() { p, err := LoadBuiltinPersona(name) if err != nil { slog.Warn("could not load built-in persona", "name", name, "error", err) continue } result[name] = p } return result } // isYAMLFile returns true if the filename has a YAML extension. func isYAMLFile(name string) bool { lower := strings.ToLower(name) return strings.HasSuffix(lower, ".yaml") || strings.HasSuffix(lower, ".yml") } // isNotFoundError checks if an error indicates a 404 response. // This is a simple string check to avoid importing the gitea package // (which would create a circular dependency). func isNotFoundError(err error) bool { if err == nil { return false } errStr := err.Error() return strings.Contains(errStr, "HTTP 404") }