diff --git a/docs/DESIGN-57-yaml-persona.md b/docs/DESIGN-57-yaml-persona.md index f8e981f..a266248 100644 --- a/docs/DESIGN-57-yaml-persona.md +++ b/docs/DESIGN-57-yaml-persona.md @@ -33,6 +33,11 @@ func parsePersona(data []byte, source string) (*Persona, error) { ### YAML Parsing with Depth Protection +> **Note:** The pseudocode below reflects the initial design using `gopkg.in/yaml.v3` +> types (`yaml.Node`). The actual implementation uses `github.com/goccy/go-yaml` +> with `ast.Node`-based traversal, dual-map cycle/depth tracking, and node-count +> limits. See `review/persona.go` for the current implementation. + ```go func unmarshalYAMLWithDepthLimit(data []byte, out any, maxDepth int) error { var node yaml.Node diff --git a/review/persona.go b/review/persona.go index 29f2336..5975540 100644 --- a/review/persona.go +++ b/review/persona.go @@ -161,7 +161,8 @@ func parsePersona(data []byte, source string) (*Persona, error) { // unmarshalYAMLWithDepthLimit unmarshals YAML data with explicit depth limiting // and strict field checking. This protects against stack exhaustion from deeply // nested structures and catches typos in field names. -// Multi-document YAML files are rejected to prevent silent data loss. +// Multi-document YAML files are rejected to prevent confusing behavior +// where additional documents are silently ignored. func unmarshalYAMLWithDepthLimit(data []byte, out any, maxDepth int) error { // First pass: parse into AST to check depth limits, node counts, and // multi-document rejection. This prevents stack exhaustion before we @@ -214,12 +215,6 @@ func checkYAMLDepth(node ast.Node, depth, maxDepth, maxNodes int, validated map[ return fmt.Errorf("YAML nesting depth exceeds maximum (%d)", maxDepth) } - // Track total nodes visited as defense-in-depth against wide-but-shallow attacks. - *nodeCount++ - if *nodeCount > maxNodes { - return fmt.Errorf("YAML node count exceeds maximum (%d)", maxNodes) - } - // Cycle detection: if we're currently visiting this node on the current // recursion path, it's a cycle (e.g., alias pointing to an ancestor). // Return nil to break the cycle without error — cycles are a structural @@ -228,6 +223,13 @@ func checkYAMLDepth(node ast.Node, depth, maxDepth, maxNodes int, validated map[ return nil } + // Track total nodes visited as defense-in-depth against wide-but-shallow attacks. + // Placed after cycle detection to avoid over-counting cyclic references. + *nodeCount++ + if *nodeCount > maxNodes { + return fmt.Errorf("YAML node count exceeds maximum (%d)", maxNodes) + } + // Depth-aware short-circuit: only skip re-checking a node if we previously // validated it at the same or deeper effective depth. If this visit is at a // greater depth than before (e.g., alias referenced deeper in the tree), diff --git a/review/persona_test.go b/review/persona_test.go index 344596d..a318b97 100644 --- a/review/persona_test.go +++ b/review/persona_test.go @@ -491,9 +491,9 @@ func TestYAMLEmptyFileRejection(t *testing.T) { name string content string }{ - {"completely empty", ""}, - {"whitespace only", " \n\n "}, - {"comment only", "# just a comment\n"}, + {"completely_empty", ""}, + {"whitespace_only", " \n\n "}, + {"comment_only", "# just a comment\n"}, } for _, tc := range tests {