docs: address review findings on YAML depth validation

- Add safety note on Strict() decoder not expanding aliases recursively,
  since alias resolution uses the pre-validated AST (finding #1)
- Document that ast.Node map keys rely on pointer identity, which holds
  because all goccy/go-yaml AST types are pointer receivers (finding #2)
- Clarify AnchorNode comment: effective depth budget is reduced for
  anchor+alias pairs, not literally halved (finding #3)
- Improve test depth trace comment for accuracy (finding #4)
- Add HTML comment in CONVENTIONS.md referencing #91 for the two-step
  process deviation (finding #5)
This commit is contained in:
claw
2026-05-12 17:39:38 -07:00
parent 1200ef700d
commit 787ac3b736
3 changed files with 18 additions and 4 deletions
+2
View File
@@ -21,6 +21,8 @@ To request a new dependency:
2. Requires explicit approval from Aaron
3. After merge, a separate PR may use the package
<!-- Deviation from step 1+3 for go-yaml migration: see #91 for rationale. -->
*Enforcement: `scripts/check-deps.sh` parses this table — update only here.*
## Error Handling
+12 -1
View File
@@ -199,6 +199,11 @@ func unmarshalYAMLWithDepthLimit(data []byte, out any, maxDepth int) error {
// Second pass: decode with strict field checking enabled.
// Strict() rejects unknown keys, catching typos like "focuss" or "identiy".
//
// Safety note: goccy/go-yaml's decoder does not expand YAML aliases
// recursively — it resolves them via the pre-built AST, which our first
// pass already depth-checked. Alias chains that would exceed depth limits
// are caught above; the decoder merely reads the resolved scalar values.
dec := yaml.NewDecoder(bytes.NewReader(data), yaml.Strict())
return dec.Decode(out)
}
@@ -246,6 +251,10 @@ func checkYAMLDepth(node ast.Node, depth, maxDepth, maxNodes int, validated map[
// validated it at the same or deeper effective depth. If this visit is at a
// greater depth than before (e.g., alias referenced deeper in the tree),
// we must re-traverse to catch depth limit violations.
//
// Note: using ast.Node (interface) as map key relies on pointer identity,
// which is correct because all goccy/go-yaml AST node types are pointer
// receivers (*MappingNode, *SequenceNode, etc.), never value types.
if prevDepth, ok := validated[node]; ok && depth <= prevDepth {
return nil
}
@@ -288,7 +297,9 @@ func checkYAMLDepth(node ast.Node, depth, maxDepth, maxNodes int, validated map[
// level ensures that deeply nested anchors are caught at definition
// time rather than only when referenced via alias. This +1 is
// asymmetric with alias (which also increments) — by design, the
// combined budget is halved for anchored content that is later aliased.
// effective depth budget for anchored-then-aliased content is reduced
// because both the definition site and the reference site each consume
// a level, making deeply nested anchor/alias pairs hit the limit sooner.
if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, validated, visiting, nodeCount); err != nil {
return err
}
+4 -3
View File
@@ -463,9 +463,10 @@ func TestYAMLDeeplyNestedRejection(t *testing.T) {
// - Document root parsed at depth 0
// - Root MappingNode children (MappingValueNodes) visited at depth 1
// - "nested" MappingValueNode: key at depth 2, value at depth 2
// - Each levelN mapping adds +1 depth (MappingNode → MappingValueNode value)
// - After 25 levels: effective depth reaches ~27, well past MaxYAMLDepth (20)
// The test uses 25 levels to provide a comfortable margin above the limit.
// - Each levelN adds depth via MappingValueNode traversal (key + value)
// - Exact depth per level depends on AST structure (MappingNode wrapping),
// but 25 levels reliably exceeds MaxYAMLDepth (20) with comfortable margin.
// The test uses 25 levels rather than exactly 21 to avoid brittleness.
var sb strings.Builder
sb.WriteString("name: test\nidentity: test\nnested:\n")
indent := " "