3f06ba2ea6
PR Ready Gate / clear-labels (pull_request) Successful in 2s
CI / test (pull_request) Successful in 9m32s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 10m10s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 10m51s
CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 10m33s
Implements #60. - Add ParsePersonaBytes() for parsing personas from byte data - Add LoadRepoPersonas() to fetch personas from repo via Gitea API - Add MergePersonas() to combine built-in and repo personas - Add GetBuiltinPersonasMap() helper - Update main.go to load repo personas first, fall back to built-in - Add giteaClientAdapter to bridge gitea.Client to review.GiteaClient When --persona is specified, the bot now: 1. Attempts to fetch personas from .review-bot/personas/*.yaml 2. If the named persona exists in the repo, uses it 3. Otherwise falls back to built-in personas This allows repos to define domain-specific personas (e.g., trading experts for gargoyle, crypto experts for kms-lite) without modifying the review-bot codebase.
260 lines
9.0 KiB
Go
260 lines
9.0 KiB
Go
package review
|
|
|
|
import (
|
|
"bytes"
|
|
"embed"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"sort"
|
|
"strings"
|
|
"unicode/utf8"
|
|
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
//go:embed personas/*.yaml
|
|
var embeddedPersonas embed.FS
|
|
|
|
// MaxPersonaFileSize is the maximum size for persona files (64 KB).
|
|
// This prevents denial-of-service via excessively large files.
|
|
const MaxPersonaFileSize = 64 * 1024
|
|
|
|
// MaxYAMLDepth is the maximum nesting depth allowed in YAML persona files.
|
|
// This prevents stack exhaustion from deeply nested structures.
|
|
const MaxYAMLDepth = 20
|
|
|
|
// MaxYAMLNodes is the maximum number of YAML nodes allowed in persona files.
|
|
// This prevents DoS via wide-but-shallow structures that bypass depth limits.
|
|
const MaxYAMLNodes = 1000
|
|
|
|
// Persona defines a specialized review role with focused expertise.
|
|
type Persona struct {
|
|
Name string `json:"name" yaml:"name"`
|
|
DisplayName string `json:"display_name" yaml:"display_name"`
|
|
ModelPref string `json:"model_preference,omitempty" yaml:"model_preference,omitempty"`
|
|
Identity string `json:"identity" yaml:"identity"`
|
|
Focus []string `json:"focus" yaml:"focus"`
|
|
Ignore []string `json:"ignore" yaml:"ignore"`
|
|
Severity Severity `json:"severity" yaml:"severity"`
|
|
OutputFormat string `json:"output_format,omitempty" yaml:"output_format,omitempty"`
|
|
}
|
|
|
|
// Severity defines what constitutes each severity level for this persona.
|
|
// These are prompt guidance for the LLM, not output format changes.
|
|
type Severity struct {
|
|
Major string `json:"major" yaml:"major"`
|
|
Minor string `json:"minor" yaml:"minor"`
|
|
Nit string `json:"nit" yaml:"nit"`
|
|
}
|
|
|
|
// LoadPersona loads a persona from a JSON or YAML file path.
|
|
// Format is detected by file extension: .yaml/.yml for YAML, .json or other for JSON.
|
|
// Files larger than MaxPersonaFileSize are rejected.
|
|
//
|
|
// Symlinks are supported: os.Stat follows symlinks, so a symlink pointing to
|
|
// a regular file will pass the IsRegular() check. Symlinks to non-regular files
|
|
// (directories, FIFOs, devices) are still rejected.
|
|
func LoadPersona(path string) (*Persona, error) {
|
|
// os.Stat follows symlinks, so symlinks to regular files are supported.
|
|
// The IsRegular() check operates on the target, not the symlink itself.
|
|
info, err := os.Stat(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("read persona file %s: %w", path, err)
|
|
}
|
|
if !info.Mode().IsRegular() {
|
|
return nil, fmt.Errorf("persona file %s is not a regular file", path)
|
|
}
|
|
if info.Size() > MaxPersonaFileSize {
|
|
return nil, fmt.Errorf("persona file %s exceeds maximum size (%d bytes)", path, MaxPersonaFileSize)
|
|
}
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("read persona file %s: %w", path, err)
|
|
}
|
|
// Re-check size after read to defend against TOCTOU races where file
|
|
// grows between stat and read (e.g., appending process, replaced file).
|
|
if len(data) > MaxPersonaFileSize {
|
|
return nil, fmt.Errorf("persona file %s exceeds maximum size (%d bytes)", path, MaxPersonaFileSize)
|
|
}
|
|
return parsePersona(data, path)
|
|
}
|
|
|
|
// LoadBuiltinPersona loads a built-in persona by name.
|
|
// Returns an error if the persona doesn't exist.
|
|
// Built-in personas are stored in YAML format only (see embed directive).
|
|
func LoadBuiltinPersona(name string) (*Persona, error) {
|
|
yamlFile := name + ".yaml"
|
|
data, err := embeddedPersonas.ReadFile("personas/" + yamlFile)
|
|
if err != nil {
|
|
available := ListBuiltinPersonas()
|
|
return nil, fmt.Errorf("unknown built-in persona %q (available: %s)", name, strings.Join(available, ", "))
|
|
}
|
|
return parsePersona(data, "builtin:"+yamlFile)
|
|
}
|
|
|
|
// ListBuiltinPersonas returns the names of all built-in personas in sorted order.
|
|
// Returns an empty slice if the embedded directory cannot be read.
|
|
func ListBuiltinPersonas() []string {
|
|
entries, err := embeddedPersonas.ReadDir("personas")
|
|
if err != nil {
|
|
return []string{}
|
|
}
|
|
seen := make(map[string]bool)
|
|
for _, e := range entries {
|
|
if e.IsDir() {
|
|
continue
|
|
}
|
|
name := e.Name()
|
|
// Strip extension to get persona name
|
|
var personaName string
|
|
switch {
|
|
case strings.HasSuffix(name, ".yaml"):
|
|
personaName = strings.TrimSuffix(name, ".yaml")
|
|
case strings.HasSuffix(name, ".yml"):
|
|
personaName = strings.TrimSuffix(name, ".yml")
|
|
case strings.HasSuffix(name, ".json"):
|
|
personaName = strings.TrimSuffix(name, ".json")
|
|
default:
|
|
continue
|
|
}
|
|
if !seen[personaName] {
|
|
seen[personaName] = true
|
|
}
|
|
}
|
|
names := make([]string, 0, len(seen))
|
|
for name := range seen {
|
|
names = append(names, name)
|
|
}
|
|
sort.Strings(names)
|
|
return names
|
|
}
|
|
|
|
// parsePersona parses persona data from JSON or YAML format.
|
|
// Format is detected by the source file extension.
|
|
func parsePersona(data []byte, source string) (*Persona, error) {
|
|
lowerSource := strings.ToLower(source)
|
|
isYAML := strings.HasSuffix(lowerSource, ".yaml") || strings.HasSuffix(lowerSource, ".yml")
|
|
|
|
var p Persona
|
|
var err error
|
|
if isYAML {
|
|
err = unmarshalYAMLWithDepthLimit(data, &p, MaxYAMLDepth)
|
|
} else {
|
|
// Use json.Decoder with DisallowUnknownFields for consistency with
|
|
// YAML's KnownFields(true) - both reject unknown fields to catch typos.
|
|
dec := json.NewDecoder(bytes.NewReader(data))
|
|
dec.DisallowUnknownFields()
|
|
err = dec.Decode(&p)
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parse persona %s: %w", source, err)
|
|
}
|
|
if err := validatePersona(&p, source); err != nil {
|
|
return nil, err
|
|
}
|
|
return &p, nil
|
|
}
|
|
|
|
// unmarshalYAMLWithDepthLimit unmarshals YAML data with explicit depth limiting
|
|
// and strict field checking. This protects against stack exhaustion from deeply
|
|
// nested structures and catches typos in field names.
|
|
// Multi-document YAML files are rejected to prevent silent data loss.
|
|
func unmarshalYAMLWithDepthLimit(data []byte, out any, maxDepth int) error {
|
|
// First pass: decode into a yaml.Node to check depth limits and node counts.
|
|
// This prevents stack exhaustion before we attempt to decode into structs.
|
|
var node yaml.Node
|
|
dec := yaml.NewDecoder(bytes.NewReader(data))
|
|
if err := dec.Decode(&node); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Reject multi-document YAML files - silently ignoring additional documents
|
|
// could lead to confusing behavior where users think their changes take effect.
|
|
var extra yaml.Node
|
|
if dec.Decode(&extra) == nil {
|
|
return fmt.Errorf("multi-document YAML is not supported; only single-document files are allowed")
|
|
}
|
|
|
|
nodeCount := 0
|
|
if err := checkYAMLDepth(&node, 0, maxDepth, MaxYAMLNodes, make(map[*yaml.Node]struct{}), &nodeCount); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Second pass: decode with strict field checking enabled.
|
|
// KnownFields(true) rejects unknown keys, catching typos like "focuss" or "identiy".
|
|
// We must re-decode from the original data because yaml.Node.Decode() doesn't
|
|
// support the KnownFields option.
|
|
strictDec := yaml.NewDecoder(bytes.NewReader(data))
|
|
strictDec.KnownFields(true)
|
|
return strictDec.Decode(out)
|
|
}
|
|
|
|
// checkYAMLDepth recursively checks that YAML nodes don't exceed the depth limit
|
|
// or the total node count limit. It also detects alias cycles to prevent infinite
|
|
// recursion from crafted YAML with self-referential aliases.
|
|
func checkYAMLDepth(node *yaml.Node, depth, maxDepth, maxNodes int, seen map[*yaml.Node]struct{}, nodeCount *int) error {
|
|
if depth > maxDepth {
|
|
return fmt.Errorf("YAML nesting depth exceeds maximum (%d)", maxDepth)
|
|
}
|
|
|
|
// Track total nodes visited as defense-in-depth against wide-but-shallow attacks.
|
|
*nodeCount++
|
|
if *nodeCount > maxNodes {
|
|
return fmt.Errorf("YAML node count exceeds maximum (%d)", maxNodes)
|
|
}
|
|
|
|
// Cycle detection: if we've seen this node before, we're in a cycle.
|
|
if _, ok := seen[node]; ok {
|
|
return nil // Already validated this subtree, skip to avoid infinite recursion.
|
|
}
|
|
seen[node] = struct{}{}
|
|
|
|
// Handle alias nodes: follow the alias to its anchor target.
|
|
// Increment depth when following aliases since they expand the effective structure.
|
|
if node.Kind == yaml.AliasNode && node.Alias != nil {
|
|
return checkYAMLDepth(node.Alias, depth+1, maxDepth, maxNodes, seen, nodeCount)
|
|
}
|
|
|
|
for _, child := range node.Content {
|
|
if err := checkYAMLDepth(child, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ParsePersonaBytes parses persona data from bytes with a source label for errors.
|
|
// This is useful for parsing personas fetched from external sources (e.g., Gitea API)
|
|
// without requiring filesystem access. Format is detected by source extension.
|
|
func ParsePersonaBytes(data []byte, source string) (*Persona, error) {
|
|
return parsePersona(data, source)
|
|
}
|
|
|
|
func validatePersona(p *Persona, source string) error {
|
|
if p.Name == "" {
|
|
return fmt.Errorf("persona %s: name is required", source)
|
|
}
|
|
if p.Identity == "" {
|
|
return fmt.Errorf("persona %s: identity is required", source)
|
|
}
|
|
// DisplayName defaults to Name if not set
|
|
if p.DisplayName == "" {
|
|
p.DisplayName = p.Name
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// CapitalizeFirst capitalizes the first rune of a string in a Unicode-safe way.
|
|
// Returns the original string if it's empty.
|
|
func CapitalizeFirst(s string) string {
|
|
if s == "" {
|
|
return s
|
|
}
|
|
r, size := utf8.DecodeRuneInString(s)
|
|
if r == utf8.RuneError {
|
|
return s
|
|
}
|
|
return strings.ToUpper(string(r)) + s[size:]
|
|
}
|