package review import ( "bytes" "embed" "encoding/json" "fmt" "os" "sort" "strings" "unicode/utf8" "gopkg.in/yaml.v3" ) //go:embed personas/*.yaml var embeddedPersonas embed.FS // MaxPersonaFileSize is the maximum size for persona files (64 KB). // This prevents denial-of-service via excessively large files. const MaxPersonaFileSize = 64 * 1024 // MaxYAMLDepth is the maximum nesting depth allowed in YAML persona files. // This prevents stack exhaustion from deeply nested structures. const MaxYAMLDepth = 20 // MaxYAMLNodes is the maximum number of YAML nodes allowed in persona files. // This prevents DoS via wide-but-shallow structures that bypass depth limits. const MaxYAMLNodes = 1000 // Persona defines a specialized review role with focused expertise. type Persona struct { Name string `json:"name" yaml:"name"` DisplayName string `json:"display_name" yaml:"display_name"` ModelPref string `json:"model_preference,omitempty" yaml:"model_preference,omitempty"` Identity string `json:"identity" yaml:"identity"` Focus []string `json:"focus" yaml:"focus"` Ignore []string `json:"ignore" yaml:"ignore"` Severity Severity `json:"severity" yaml:"severity"` OutputFormat string `json:"output_format,omitempty" yaml:"output_format,omitempty"` } // Severity defines what constitutes each severity level for this persona. // These are prompt guidance for the LLM, not output format changes. type Severity struct { Major string `json:"major" yaml:"major"` Minor string `json:"minor" yaml:"minor"` Nit string `json:"nit" yaml:"nit"` } // LoadPersona loads a persona from a JSON or YAML file path. // Format is detected by file extension: .yaml/.yml for YAML, .json or other for JSON. // Files larger than MaxPersonaFileSize are rejected. // // Symlinks are supported: os.Stat follows symlinks, so a symlink pointing to // a regular file will pass the IsRegular() check. Symlinks to non-regular files // (directories, FIFOs, devices) are still rejected. func LoadPersona(path string) (*Persona, error) { // os.Stat follows symlinks, so symlinks to regular files are supported. // The IsRegular() check operates on the target, not the symlink itself. info, err := os.Stat(path) if err != nil { return nil, fmt.Errorf("read persona file %s: %w", path, err) } if !info.Mode().IsRegular() { return nil, fmt.Errorf("persona file %s is not a regular file", path) } if info.Size() > MaxPersonaFileSize { return nil, fmt.Errorf("persona file %s exceeds maximum size (%d bytes)", path, MaxPersonaFileSize) } data, err := os.ReadFile(path) if err != nil { return nil, fmt.Errorf("read persona file %s: %w", path, err) } // Re-check size after read to defend against TOCTOU races where file // grows between stat and read (e.g., appending process, replaced file). if len(data) > MaxPersonaFileSize { return nil, fmt.Errorf("persona file %s exceeds maximum size (%d bytes)", path, MaxPersonaFileSize) } return parsePersona(data, path) } // LoadBuiltinPersona loads a built-in persona by name. // Returns an error if the persona doesn't exist. // Built-in personas are stored in YAML format only (see embed directive). func LoadBuiltinPersona(name string) (*Persona, error) { yamlFile := name + ".yaml" data, err := embeddedPersonas.ReadFile("personas/" + yamlFile) if err != nil { available := ListBuiltinPersonas() return nil, fmt.Errorf("unknown built-in persona %q (available: %s)", name, strings.Join(available, ", ")) } return parsePersona(data, "builtin:"+yamlFile) } // ListBuiltinPersonas returns the names of all built-in personas in sorted order. // Returns an empty slice if the embedded directory cannot be read. func ListBuiltinPersonas() []string { entries, err := embeddedPersonas.ReadDir("personas") if err != nil { return []string{} } seen := make(map[string]bool) for _, e := range entries { if e.IsDir() { continue } name := e.Name() // Strip extension to get persona name var personaName string switch { case strings.HasSuffix(name, ".yaml"): personaName = strings.TrimSuffix(name, ".yaml") case strings.HasSuffix(name, ".yml"): personaName = strings.TrimSuffix(name, ".yml") case strings.HasSuffix(name, ".json"): personaName = strings.TrimSuffix(name, ".json") default: continue } if !seen[personaName] { seen[personaName] = true } } names := make([]string, 0, len(seen)) for name := range seen { names = append(names, name) } sort.Strings(names) return names } // parsePersona parses persona data from JSON or YAML format. // Format is detected by the source file extension. func parsePersona(data []byte, source string) (*Persona, error) { lowerSource := strings.ToLower(source) isYAML := strings.HasSuffix(lowerSource, ".yaml") || strings.HasSuffix(lowerSource, ".yml") var p Persona var err error if isYAML { err = unmarshalYAMLWithDepthLimit(data, &p, MaxYAMLDepth) } else { // Use json.Decoder with DisallowUnknownFields for consistency with // YAML's KnownFields(true) - both reject unknown fields to catch typos. dec := json.NewDecoder(bytes.NewReader(data)) dec.DisallowUnknownFields() err = dec.Decode(&p) } if err != nil { return nil, fmt.Errorf("parse persona %s: %w", source, err) } if err := validatePersona(&p, source); err != nil { return nil, err } return &p, nil } // unmarshalYAMLWithDepthLimit unmarshals YAML data with explicit depth limiting // and strict field checking. This protects against stack exhaustion from deeply // nested structures and catches typos in field names. // Multi-document YAML files are rejected to prevent silent data loss. func unmarshalYAMLWithDepthLimit(data []byte, out any, maxDepth int) error { // First pass: decode into a yaml.Node to check depth limits and node counts. // This prevents stack exhaustion before we attempt to decode into structs. var node yaml.Node dec := yaml.NewDecoder(bytes.NewReader(data)) if err := dec.Decode(&node); err != nil { return err } // Reject multi-document YAML files - silently ignoring additional documents // could lead to confusing behavior where users think their changes take effect. var extra yaml.Node if dec.Decode(&extra) == nil { return fmt.Errorf("multi-document YAML is not supported; only single-document files are allowed") } nodeCount := 0 if err := checkYAMLDepth(&node, 0, maxDepth, MaxYAMLNodes, make(map[*yaml.Node]struct{}), &nodeCount); err != nil { return err } // Second pass: decode with strict field checking enabled. // KnownFields(true) rejects unknown keys, catching typos like "focuss" or "identiy". // We must re-decode from the original data because yaml.Node.Decode() doesn't // support the KnownFields option. strictDec := yaml.NewDecoder(bytes.NewReader(data)) strictDec.KnownFields(true) return strictDec.Decode(out) } // checkYAMLDepth recursively checks that YAML nodes don't exceed the depth limit // or the total node count limit. It also detects alias cycles to prevent infinite // recursion from crafted YAML with self-referential aliases. func checkYAMLDepth(node *yaml.Node, depth, maxDepth, maxNodes int, seen map[*yaml.Node]struct{}, nodeCount *int) error { if depth > maxDepth { return fmt.Errorf("YAML nesting depth exceeds maximum (%d)", maxDepth) } // Track total nodes visited as defense-in-depth against wide-but-shallow attacks. *nodeCount++ if *nodeCount > maxNodes { return fmt.Errorf("YAML node count exceeds maximum (%d)", maxNodes) } // Cycle detection: if we've seen this node before, we're in a cycle. if _, ok := seen[node]; ok { return nil // Already validated this subtree, skip to avoid infinite recursion. } seen[node] = struct{}{} // Handle alias nodes: follow the alias to its anchor target. // Increment depth when following aliases since they expand the effective structure. if node.Kind == yaml.AliasNode && node.Alias != nil { return checkYAMLDepth(node.Alias, depth+1, maxDepth, maxNodes, seen, nodeCount) } for _, child := range node.Content { if err := checkYAMLDepth(child, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil { return err } } return nil } func validatePersona(p *Persona, source string) error { if p.Name == "" { return fmt.Errorf("persona %s: name is required", source) } if p.Identity == "" { return fmt.Errorf("persona %s: identity is required", source) } // DisplayName defaults to Name if not set if p.DisplayName == "" { p.DisplayName = p.Name } return nil } // CapitalizeFirst capitalizes the first rune of a string in a Unicode-safe way. // Returns the original string if it's empty. func CapitalizeFirst(s string) string { if s == "" { return s } r, size := utf8.DecodeRuneInString(s) if r == utf8.RuneError { return s } return strings.ToUpper(string(r)) + s[size:] }