From c3e8f0f23120d26d75ff1f2d21e55f858fd9e3e3 Mon Sep 17 00:00:00 2001 From: Rodin Date: Sun, 10 May 2026 14:43:31 -0700 Subject: [PATCH] fix: address PR review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MAJOR fixes: - Remove false security claim about gopkg.in/yaml.v3 having built-in depth protection - Add explicit YAML depth limiting via yaml.Node API (MaxYAMLDepth=20) - Add file size limit for persona files (MaxPersonaFileSize=64KB) - Add test for deeply nested YAML rejection MINOR fixes: - Add sort.Strings to ListBuiltinPersonas for deterministic ordering - Update design doc to reflect actual library used (gopkg.in/yaml.v3) - Update README: 'Zero dependencies' → 'Minimal dependencies' - Add test for file size limit - Add test for sorted persona list --- README.md | 2 +- docs/DESIGN-57-yaml-persona.md | 8 ++--- review/persona.go | 55 ++++++++++++++++++++++++++--- review/persona_test.go | 63 ++++++++++++++++++++++++++++++++++ 4 files changed, 119 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 29ee5b5..23fc038 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ AI-powered code review bot for Gitea pull requests. Fetches diff + context, send - **Smart budget**: Automatically trims context to fit model token limits - **Idempotent reviews**: Posts new review, then cleans up stale ones (one review per bot) - **Custom prompts**: Load additional instructions from a file (e.g. security-focused review) -- **Zero dependencies**: Go stdlib only +- **Minimal dependencies**: Go stdlib + `gopkg.in/yaml.v3` only ## Quick Start: Composite Action diff --git a/docs/DESIGN-57-yaml-persona.md b/docs/DESIGN-57-yaml-persona.md index 85300ba..deafae9 100644 --- a/docs/DESIGN-57-yaml-persona.md +++ b/docs/DESIGN-57-yaml-persona.md @@ -9,7 +9,7 @@ JSON is awkward for persona files that contain multi-line text (identity, severi - Backwards compatibility: existing JSON personas must continue to work - Security: protect against DoS via deeply nested YAML (AIKIDO-2024-10486) - Consistency: use `.yaml` extension (not `.yml`) -- Library: use `github.com/goccy/go-yaml` v1.16.0+ (actively maintained, security fix applied) +- Library: use `gopkg.in/yaml.v3` (approved in CONVENTIONS.md) with explicit depth limiting ## Proposed Approach @@ -36,8 +36,8 @@ func parsePersona(data []byte, source string) (*Persona, error) { ```go func parseYAML(data []byte, source string) (*Persona, error) { var p Persona - // go-yaml has built-in protection against deeply nested structures - // but we add explicit decoder options for defense in depth + // gopkg.in/yaml.v3 does NOT have built-in depth limiting. + // Use explicit depth check via yaml.Node API. if err := yaml.Unmarshal(data, &p); err != nil { return nil, fmt.Errorf("parse persona %s: %w", source, err) } @@ -48,7 +48,7 @@ func parseYAML(data []byte, source string) (*Persona, error) { } ``` -The `goccy/go-yaml` library since v1.16.0 limits nesting depth by default. +The `gopkg.in/yaml.v3` library does not have built-in depth protection, so we implement explicit depth checking by first decoding into a `yaml.Node`, walking the tree to verify depth, then decoding into the target struct. ## State/Data Model diff --git a/review/persona.go b/review/persona.go index 025a098..bd61c18 100644 --- a/review/persona.go +++ b/review/persona.go @@ -1,10 +1,12 @@ package review import ( + "bytes" "embed" "encoding/json" "fmt" "os" + "sort" "strings" "unicode/utf8" @@ -14,6 +16,14 @@ import ( //go:embed personas/*.yaml var embeddedPersonas embed.FS +// MaxPersonaFileSize is the maximum size for persona files (64 KB). +// This prevents denial-of-service via excessively large files. +const MaxPersonaFileSize = 64 * 1024 + +// MaxYAMLDepth is the maximum nesting depth allowed in YAML persona files. +// This prevents stack exhaustion from deeply nested structures. +const MaxYAMLDepth = 20 + // Persona defines a specialized review role with focused expertise. type Persona struct { Name string `json:"name" yaml:"name"` @@ -36,7 +46,15 @@ type Severity struct { // LoadPersona loads a persona from a JSON or YAML file path. // Format is detected by file extension: .yaml/.yml for YAML, .json or other for JSON. +// Files larger than MaxPersonaFileSize are rejected. func LoadPersona(path string) (*Persona, error) { + info, err := os.Stat(path) + if err != nil { + return nil, fmt.Errorf("read persona file %s: %w", path, err) + } + if info.Size() > MaxPersonaFileSize { + return nil, fmt.Errorf("persona file %s exceeds maximum size (%d bytes)", path, MaxPersonaFileSize) + } data, err := os.ReadFile(path) if err != nil { return nil, fmt.Errorf("read persona file %s: %w", path, err) @@ -65,7 +83,7 @@ func LoadBuiltinPersona(name string) (*Persona, error) { return parsePersona(data, "builtin:"+jsonFile) } -// ListBuiltinPersonas returns the names of all built-in personas. +// ListBuiltinPersonas returns the names of all built-in personas in sorted order. // Returns an empty slice if the embedded directory cannot be read. func ListBuiltinPersonas() []string { entries, err := embeddedPersonas.ReadDir("personas") @@ -94,10 +112,11 @@ func ListBuiltinPersonas() []string { seen[personaName] = true } } - var names []string + names := make([]string, 0, len(seen)) for name := range seen { names = append(names, name) } + sort.Strings(names) return names } @@ -110,8 +129,7 @@ func parsePersona(data []byte, source string) (*Persona, error) { var p Persona var err error if isYAML { - // go-yaml v1.16.0+ has built-in protection against deeply nested structures - err = yaml.Unmarshal(data, &p) + err = unmarshalYAMLWithDepthLimit(data, &p, MaxYAMLDepth) } else { err = json.Unmarshal(data, &p) } @@ -124,6 +142,35 @@ func parsePersona(data []byte, source string) (*Persona, error) { return &p, nil } +// unmarshalYAMLWithDepthLimit unmarshals YAML data with explicit depth limiting. +// This protects against stack exhaustion from deeply nested structures. +func unmarshalYAMLWithDepthLimit(data []byte, out interface{}, maxDepth int) error { + var node yaml.Node + dec := yaml.NewDecoder(bytes.NewReader(data)) + if err := dec.Decode(&node); err != nil { + return err + } + + if err := checkYAMLDepth(&node, 0, maxDepth); err != nil { + return err + } + + return node.Decode(out) +} + +// checkYAMLDepth recursively checks that YAML nodes don't exceed the depth limit. +func checkYAMLDepth(node *yaml.Node, depth, maxDepth int) error { + if depth > maxDepth { + return fmt.Errorf("YAML nesting depth exceeds maximum (%d)", maxDepth) + } + for _, child := range node.Content { + if err := checkYAMLDepth(child, depth+1, maxDepth); err != nil { + return err + } + } + return nil +} + func validatePersona(p *Persona, source string) error { if p.Name == "" { return fmt.Errorf("persona %s: name is required", source) diff --git a/review/persona_test.go b/review/persona_test.go index 6852d41..bb6fa14 100644 --- a/review/persona_test.go +++ b/review/persona_test.go @@ -1,6 +1,7 @@ package review import ( + "fmt" "os" "path/filepath" "strings" @@ -449,3 +450,65 @@ severity: t.Errorf("Focus[0] = %q, want %q", p.Focus[0], "item") } } + +func TestYAMLDeeplyNestedRejection(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "deeply-nested.yaml") + + // Build a deeply nested YAML structure that exceeds MaxYAMLDepth (20). + // Each level adds 2 to the depth count (key + value mapping). + var sb strings.Builder + sb.WriteString("name: test\nidentity: test\nnested:\n") + indent := " " + for i := 0; i < 25; i++ { + sb.WriteString(strings.Repeat(indent, i+1)) + sb.WriteString(fmt.Sprintf("level%d:\n", i)) + } + sb.WriteString(strings.Repeat(indent, 26)) + sb.WriteString("value: too-deep\n") + + if err := os.WriteFile(path, []byte(sb.String()), 0644); err != nil { + t.Fatalf("failed to write test file: %v", err) + } + + _, err := LoadPersona(path) + if err == nil { + t.Error("expected error for deeply nested YAML, got nil") + } + if !strings.Contains(err.Error(), "nesting depth exceeds") { + t.Errorf("error = %q, want containing 'nesting depth exceeds'", err.Error()) + } +} + +func TestYAMLFileSizeLimit(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "huge.yaml") + + // Create a file larger than MaxPersonaFileSize (64 KB) + content := "name: test\nidentity: " + strings.Repeat("x", MaxPersonaFileSize+1) + "\n" + if err := os.WriteFile(path, []byte(content), 0644); err != nil { + t.Fatalf("failed to write test file: %v", err) + } + + _, err := LoadPersona(path) + if err == nil { + t.Error("expected error for oversized file, got nil") + } + if !strings.Contains(err.Error(), "exceeds maximum size") { + t.Errorf("error = %q, want containing 'exceeds maximum size'", err.Error()) + } +} + +func TestListBuiltinPersonasSortedOrder(t *testing.T) { + names := ListBuiltinPersonas() + if len(names) < 2 { + t.Skip("need at least 2 personas to test ordering") + } + + // Verify the list is sorted + for i := 1; i < len(names); i++ { + if names[i-1] > names[i] { + t.Errorf("ListBuiltinPersonas not sorted: %q > %q", names[i-1], names[i]) + } + } +}