diff --git a/docs/DESIGN-57-yaml-persona.md b/docs/DESIGN-57-yaml-persona.md index a266248..7c2dda5 100644 --- a/docs/DESIGN-57-yaml-persona.md +++ b/docs/DESIGN-57-yaml-persona.md @@ -33,42 +33,16 @@ func parsePersona(data []byte, source string) (*Persona, error) { ### YAML Parsing with Depth Protection -> **Note:** The pseudocode below reflects the initial design using `gopkg.in/yaml.v3` -> types (`yaml.Node`). The actual implementation uses `github.com/goccy/go-yaml` -> with `ast.Node`-based traversal, dual-map cycle/depth tracking, and node-count -> limits. See `review/persona.go` for the current implementation. +We implement a custom AST-based depth/node-count walk (`checkYAMLDepth` in +`review/persona.go`) rather than relying on library decoder options. Key design +decisions: -```go -func unmarshalYAMLWithDepthLimit(data []byte, out any, maxDepth int) error { - var node yaml.Node - dec := yaml.NewDecoder(bytes.NewReader(data)) - if err := dec.Decode(&node); err != nil { - return err - } - if err := checkYAMLDepth(&node, 0, maxDepth); err != nil { - return err - } - return node.Decode(out) -} +- **Library:** `github.com/goccy/go-yaml` with `ast.Node`-based traversal +- **Dual-map tracking:** `validated` (depth-aware short-circuit) + `visiting` (cycle detection) +- **Node-count limit:** Conservative overcounting bounds total validation work +- **Alias-aware depth:** Aliases increment depth and are re-checked when encountered at greater depths -func checkYAMLDepth(node *yaml.Node, depth, maxDepth int) error { - if depth > maxDepth { - return fmt.Errorf("YAML nesting depth exceeds maximum (%d)", maxDepth) - } - // Handle alias nodes by following the Alias pointer - if node.Kind == yaml.AliasNode && node.Alias != nil { - return checkYAMLDepth(node.Alias, depth, maxDepth) - } - for _, child := range node.Content { - if err := checkYAMLDepth(child, depth+1, maxDepth); err != nil { - return err - } - } - return nil -} -``` - -We implement a custom AST-based depth/node-count walk (`checkYAMLDepth`) rather than relying on library decoder options. This gives us precise control over how depth is counted across aliases and anchors, with a depth-aware validated map to prevent alias depth bypass. +See `review/persona.go:checkYAMLDepth` for the authoritative implementation. ## State/Data Model diff --git a/review/persona.go b/review/persona.go index 5975540..fb3cb69 100644 --- a/review/persona.go +++ b/review/persona.go @@ -224,7 +224,11 @@ func checkYAMLDepth(node ast.Node, depth, maxDepth, maxNodes int, validated map[ } // Track total nodes visited as defense-in-depth against wide-but-shallow attacks. - // Placed after cycle detection to avoid over-counting cyclic references. + // Placed after cycle detection but before the depth-aware short-circuit. This means + // nodes revisited at shallower depths (via aliases) are counted each time they are + // encountered — intentional conservative overcounting. This bounds the total work + // performed during validation rather than tracking unique nodes, which is the safer + // security posture for untrusted YAML input. *nodeCount++ if *nodeCount > maxNodes { return fmt.Errorf("YAML node count exceeds maximum (%d)", maxNodes) diff --git a/review/persona_test.go b/review/persona_test.go index a318b97..4f93c13 100644 --- a/review/persona_test.go +++ b/review/persona_test.go @@ -459,8 +459,13 @@ func TestYAMLDeeplyNestedRejection(t *testing.T) { path := filepath.Join(dir, "deeply-nested.yaml") // Build a deeply nested YAML structure that exceeds MaxYAMLDepth (20). - // Each nested mapping key generates a MappingValueNode, incrementing depth - // by 1 per level in the AST walk. With 25 levels, we exceed MaxYAMLDepth (20). + // Depth accumulation trace for "nested: \n level0: \n level1: ...": + // - Document root parsed at depth 0 + // - Root MappingNode children (MappingValueNodes) visited at depth 1 + // - "nested" MappingValueNode: key at depth 2, value at depth 2 + // - Each levelN mapping adds +1 depth (MappingNode → MappingValueNode → value) + // - After 25 levels: effective depth reaches ~27, well past MaxYAMLDepth (20) + // The test uses 25 levels to provide a comfortable margin above the limit. var sb strings.Builder sb.WriteString("name: test\nidentity: test\nnested:\n") indent := " "