26f326cf51
PR Ready Gate / clear-labels (pull_request) Successful in 2s
CI / test (pull_request) Successful in 9m34s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 9m53s
CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 10m23s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 11m24s
Address security review findings:
MAJOR: Add cycle detection to checkYAMLDepth using a visited set
(seen map[*yaml.Node]struct{}) to prevent infinite recursion from
crafted YAML with self-referential aliases.
MINOR fixes:
- Add MaxYAMLNodes (1000) limit as defense-in-depth against
wide-but-shallow structures that bypass depth limits
- Increment depth when following alias targets (was incorrectly
passing same depth, allowing alias chains to bypass depth limit)
- Reject multi-document YAML files instead of silently ignoring
additional documents (prevents confusing silent data loss)
Tests added:
- TestYAMLAliasCycleDetection: Direct test of cycle detection logic
- TestYAMLMultiDocumentRejection: Verifies multi-doc files rejected
- TestYAMLNodeCountLimit: Verifies wide structures are rejected
- TestCheckYAMLDepthCycleDetectionDirect: Unit test with artificial cycle
251 lines
8.3 KiB
Go
251 lines
8.3 KiB
Go
package review
|
|
|
|
import (
|
|
"bytes"
|
|
"embed"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"sort"
|
|
"strings"
|
|
"unicode/utf8"
|
|
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
//go:embed personas/*.yaml
|
|
var embeddedPersonas embed.FS
|
|
|
|
// MaxPersonaFileSize is the maximum size for persona files (64 KB).
|
|
// This prevents denial-of-service via excessively large files.
|
|
const MaxPersonaFileSize = 64 * 1024
|
|
|
|
// MaxYAMLDepth is the maximum nesting depth allowed in YAML persona files.
|
|
// This prevents stack exhaustion from deeply nested structures.
|
|
const MaxYAMLDepth = 20
|
|
|
|
// MaxYAMLNodes is the maximum number of YAML nodes allowed in persona files.
|
|
// This prevents DoS via wide-but-shallow structures that bypass depth limits.
|
|
const MaxYAMLNodes = 1000
|
|
|
|
// Persona defines a specialized review role with focused expertise.
|
|
type Persona struct {
|
|
Name string `json:"name" yaml:"name"`
|
|
DisplayName string `json:"display_name" yaml:"display_name"`
|
|
ModelPref string `json:"model_preference,omitempty" yaml:"model_preference,omitempty"`
|
|
Identity string `json:"identity" yaml:"identity"`
|
|
Focus []string `json:"focus" yaml:"focus"`
|
|
Ignore []string `json:"ignore" yaml:"ignore"`
|
|
Severity Severity `json:"severity" yaml:"severity"`
|
|
OutputFormat string `json:"output_format,omitempty" yaml:"output_format,omitempty"`
|
|
}
|
|
|
|
// Severity defines what constitutes each severity level for this persona.
|
|
// These are prompt guidance for the LLM, not output format changes.
|
|
type Severity struct {
|
|
Major string `json:"major" yaml:"major"`
|
|
Minor string `json:"minor" yaml:"minor"`
|
|
Nit string `json:"nit" yaml:"nit"`
|
|
}
|
|
|
|
// LoadPersona loads a persona from a JSON or YAML file path.
|
|
// Format is detected by file extension: .yaml/.yml for YAML, .json or other for JSON.
|
|
// Files larger than MaxPersonaFileSize are rejected.
|
|
func LoadPersona(path string) (*Persona, error) {
|
|
info, err := os.Stat(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("read persona file %s: %w", path, err)
|
|
}
|
|
if !info.Mode().IsRegular() {
|
|
return nil, fmt.Errorf("persona file %s is not a regular file", path)
|
|
}
|
|
if info.Size() > MaxPersonaFileSize {
|
|
return nil, fmt.Errorf("persona file %s exceeds maximum size (%d bytes)", path, MaxPersonaFileSize)
|
|
}
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("read persona file %s: %w", path, err)
|
|
}
|
|
// Re-check size after read to defend against TOCTOU races where file
|
|
// grows between stat and read (e.g., appending process, replaced file).
|
|
if len(data) > MaxPersonaFileSize {
|
|
return nil, fmt.Errorf("persona file %s exceeds maximum size (%d bytes)", path, MaxPersonaFileSize)
|
|
}
|
|
return parsePersona(data, path)
|
|
}
|
|
|
|
// LoadBuiltinPersona loads a built-in persona by name.
|
|
// Returns an error if the persona doesn't exist.
|
|
// Built-in personas are stored in YAML format.
|
|
func LoadBuiltinPersona(name string) (*Persona, error) {
|
|
// Try YAML first (preferred format)
|
|
yamlFile := name + ".yaml"
|
|
data, err := embeddedPersonas.ReadFile("personas/" + yamlFile)
|
|
if err == nil {
|
|
return parsePersona(data, "builtin:"+yamlFile)
|
|
}
|
|
|
|
// Fall back to JSON for backwards compatibility
|
|
jsonFile := name + ".json"
|
|
data, err = embeddedPersonas.ReadFile("personas/" + jsonFile)
|
|
if err != nil {
|
|
available := ListBuiltinPersonas()
|
|
return nil, fmt.Errorf("unknown built-in persona %q (available: %s)", name, strings.Join(available, ", "))
|
|
}
|
|
return parsePersona(data, "builtin:"+jsonFile)
|
|
}
|
|
|
|
// ListBuiltinPersonas returns the names of all built-in personas in sorted order.
|
|
// Returns an empty slice if the embedded directory cannot be read.
|
|
func ListBuiltinPersonas() []string {
|
|
entries, err := embeddedPersonas.ReadDir("personas")
|
|
if err != nil {
|
|
return []string{}
|
|
}
|
|
seen := make(map[string]bool)
|
|
for _, e := range entries {
|
|
if e.IsDir() {
|
|
continue
|
|
}
|
|
name := e.Name()
|
|
// Strip extension to get persona name
|
|
var personaName string
|
|
switch {
|
|
case strings.HasSuffix(name, ".yaml"):
|
|
personaName = strings.TrimSuffix(name, ".yaml")
|
|
case strings.HasSuffix(name, ".yml"):
|
|
personaName = strings.TrimSuffix(name, ".yml")
|
|
case strings.HasSuffix(name, ".json"):
|
|
personaName = strings.TrimSuffix(name, ".json")
|
|
default:
|
|
continue
|
|
}
|
|
if !seen[personaName] {
|
|
seen[personaName] = true
|
|
}
|
|
}
|
|
names := make([]string, 0, len(seen))
|
|
for name := range seen {
|
|
names = append(names, name)
|
|
}
|
|
sort.Strings(names)
|
|
return names
|
|
}
|
|
|
|
// parsePersona parses persona data from JSON or YAML format.
|
|
// Format is detected by the source file extension.
|
|
func parsePersona(data []byte, source string) (*Persona, error) {
|
|
lowerSource := strings.ToLower(source)
|
|
isYAML := strings.HasSuffix(lowerSource, ".yaml") || strings.HasSuffix(lowerSource, ".yml")
|
|
|
|
var p Persona
|
|
var err error
|
|
if isYAML {
|
|
err = unmarshalYAMLWithDepthLimit(data, &p, MaxYAMLDepth)
|
|
} else {
|
|
err = json.Unmarshal(data, &p)
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parse persona %s: %w", source, err)
|
|
}
|
|
if err := validatePersona(&p, source); err != nil {
|
|
return nil, err
|
|
}
|
|
return &p, nil
|
|
}
|
|
|
|
// unmarshalYAMLWithDepthLimit unmarshals YAML data with explicit depth limiting
|
|
// and strict field checking. This protects against stack exhaustion from deeply
|
|
// nested structures and catches typos in field names.
|
|
// Multi-document YAML files are rejected to prevent silent data loss.
|
|
func unmarshalYAMLWithDepthLimit(data []byte, out any, maxDepth int) error {
|
|
// First pass: decode into a yaml.Node to check depth limits and node counts.
|
|
// This prevents stack exhaustion before we attempt to decode into structs.
|
|
var node yaml.Node
|
|
dec := yaml.NewDecoder(bytes.NewReader(data))
|
|
if err := dec.Decode(&node); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Reject multi-document YAML files - silently ignoring additional documents
|
|
// could lead to confusing behavior where users think their changes take effect.
|
|
var extra yaml.Node
|
|
if dec.Decode(&extra) == nil {
|
|
return fmt.Errorf("multi-document YAML is not supported; only single-document files are allowed")
|
|
}
|
|
|
|
nodeCount := 0
|
|
if err := checkYAMLDepth(&node, 0, maxDepth, MaxYAMLNodes, make(map[*yaml.Node]struct{}), &nodeCount); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Second pass: decode with strict field checking enabled.
|
|
// KnownFields(true) rejects unknown keys, catching typos like "focuss" or "identiy".
|
|
// We must re-decode from the original data because yaml.Node.Decode() doesn't
|
|
// support the KnownFields option.
|
|
strictDec := yaml.NewDecoder(bytes.NewReader(data))
|
|
strictDec.KnownFields(true)
|
|
return strictDec.Decode(out)
|
|
}
|
|
|
|
// checkYAMLDepth recursively checks that YAML nodes don't exceed the depth limit
|
|
// or the total node count limit. It also detects alias cycles to prevent infinite
|
|
// recursion from crafted YAML with self-referential aliases.
|
|
func checkYAMLDepth(node *yaml.Node, depth, maxDepth, maxNodes int, seen map[*yaml.Node]struct{}, nodeCount *int) error {
|
|
if depth > maxDepth {
|
|
return fmt.Errorf("YAML nesting depth exceeds maximum (%d)", maxDepth)
|
|
}
|
|
|
|
// Track total nodes visited as defense-in-depth against wide-but-shallow attacks.
|
|
*nodeCount++
|
|
if *nodeCount > maxNodes {
|
|
return fmt.Errorf("YAML node count exceeds maximum (%d)", maxNodes)
|
|
}
|
|
|
|
// Cycle detection: if we've seen this node before, we're in a cycle.
|
|
if _, ok := seen[node]; ok {
|
|
return nil // Already validated this subtree, skip to avoid infinite recursion.
|
|
}
|
|
seen[node] = struct{}{}
|
|
|
|
// Handle alias nodes: follow the alias to its anchor target.
|
|
// Increment depth when following aliases since they expand the effective structure.
|
|
if node.Kind == yaml.AliasNode && node.Alias != nil {
|
|
return checkYAMLDepth(node.Alias, depth+1, maxDepth, maxNodes, seen, nodeCount)
|
|
}
|
|
|
|
for _, child := range node.Content {
|
|
if err := checkYAMLDepth(child, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validatePersona(p *Persona, source string) error {
|
|
if p.Name == "" {
|
|
return fmt.Errorf("persona %s: name is required", source)
|
|
}
|
|
if p.Identity == "" {
|
|
return fmt.Errorf("persona %s: identity is required", source)
|
|
}
|
|
// DisplayName defaults to Name if not set
|
|
if p.DisplayName == "" {
|
|
p.DisplayName = p.Name
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// CapitalizeFirst capitalizes the first rune of a string in a Unicode-safe way.
|
|
// Returns the original string if it's empty.
|
|
func CapitalizeFirst(s string) string {
|
|
if s == "" {
|
|
return s
|
|
}
|
|
r, size := utf8.DecodeRuneInString(s)
|
|
if r == utf8.RuneError {
|
|
return s
|
|
}
|
|
return strings.ToUpper(string(r)) + s[size:]
|
|
}
|