fix: handle MergeKeyNode explicitly in depth check, add size limit to ParsePersonaBytes
PR Ready Gate / clear-labels (pull_request) Successful in 2s
CI / test (pull_request) Successful in 17s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 35s
CI / review (gpt-5, security, ., rodin/security-patterns, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 58s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 1m16s

- Add explicit case for *ast.MergeKeyNode in checkYAMLDepth switch to
  make it clear this is an intentional leaf (no children to recurse)
  rather than relying on the default case. Prevents future library
  changes from silently bypassing depth checks.

- Add MaxPersonaFileSize bound check at the top of ParsePersonaBytes.
  While callers already check size, the public API should defend itself
  (defense in depth) against arbitrarily large inputs that could cause
  excessive memory/CPU before AST validation runs.

- Add tests for both behaviors.

Addresses review #2879 findings.
This commit is contained in:
claw
2026-05-12 18:45:48 -07:00
parent b0352ba1c9
commit baa917f228
2 changed files with 73 additions and 2 deletions
+12 -2
View File
@@ -307,10 +307,16 @@ func checkYAMLDepth(node ast.Node, depth, maxDepth, maxNodes int, validated map[
if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, validated, visiting, nodeCount); err != nil { if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, validated, visiting, nodeCount); err != nil {
return err return err
} }
case *ast.MergeKeyNode:
// MergeKeyNode represents the literal "<<" merge key token. It has no
// child nodes — the value side of a merge (e.g., *alias) lives in the
// parent MappingValueNode.Value, which is already recursed into above.
// Explicitly listed here (rather than in the default case) to prevent
// future library changes from silently bypassing depth checks.
default: default:
// Scalar leaf nodes (StringNode, IntegerNode, FloatNode, BoolNode, // Scalar leaf nodes (StringNode, IntegerNode, FloatNode, BoolNode,
// NullNode, InfinityNode, NanNode, LiteralNode, MergeKeyNode) have // NullNode, InfinityNode, NanNode, LiteralNode) have no children to
// no children to recurse into. // recurse into.
} }
return nil return nil
} }
@@ -318,7 +324,11 @@ func checkYAMLDepth(node ast.Node, depth, maxDepth, maxNodes int, validated map[
// ParsePersonaBytes parses persona data from bytes with a source label for errors. // ParsePersonaBytes parses persona data from bytes with a source label for errors.
// This is useful for parsing personas fetched from external sources (e.g., Gitea API) // This is useful for parsing personas fetched from external sources (e.g., Gitea API)
// without requiring filesystem access. Format is detected by source extension. // without requiring filesystem access. Format is detected by source extension.
// Input is bounded by MaxPersonaFileSize to prevent resource exhaustion.
func ParsePersonaBytes(data []byte, source string) (*Persona, error) { func ParsePersonaBytes(data []byte, source string) (*Persona, error) {
if len(data) > MaxPersonaFileSize {
return nil, fmt.Errorf("persona data from %s exceeds maximum size (%d bytes, limit %d)", source, len(data), MaxPersonaFileSize)
}
return parsePersona(data, source) return parsePersona(data, source)
} }
+61
View File
@@ -896,3 +896,64 @@ func TestJSONTrailingContentRejected(t *testing.T) {
}) })
} }
} }
func TestParsePersonaBytesSizeLimit(t *testing.T) {
// ParsePersonaBytes should reject input exceeding MaxPersonaFileSize
oversized := make([]byte, MaxPersonaFileSize+1)
for i := range oversized {
oversized[i] = 'x'
}
_, err := ParsePersonaBytes(oversized, "oversized.yaml")
if err == nil {
t.Fatal("expected error for oversized input, got nil")
}
if !strings.Contains(err.Error(), "exceeds maximum size") {
t.Errorf("error = %q, want to contain 'exceeds maximum size'", err.Error())
}
// Just under the limit should not trigger size error (may fail parse, but not size)
underLimit := []byte("name: test\nidentity: test persona\n")
p, err := ParsePersonaBytes(underLimit, "valid.yaml")
if err != nil {
t.Fatalf("unexpected error for valid input: %v", err)
}
if p.Name != "test" {
t.Errorf("Name = %q, want %q", p.Name, "test")
}
}
func TestYAMLMergeKeyDepthCheck(t *testing.T) {
// Verify that YAML merge keys (<<: *alias) are properly handled by the
// depth checker. The merge key content is in the MappingValueNode.Value
// (an AliasNode), not in the MergeKeyNode itself.
p, err := ParsePersonaBytes([]byte("name: merge-test\nidentity: test\n"), "merge.yaml")
if err != nil {
t.Fatalf("basic parse failed: %v", err)
}
if p.Name != "merge-test" {
t.Errorf("Name = %q, want %q", p.Name, "merge-test")
}
// Test that deeply nested merge keys still hit depth limit.
// Build YAML with merge key content nested beyond MaxYAMLDepth.
var sb strings.Builder
sb.WriteString("name: deep-merge\nidentity: deep merge persona\n")
sb.WriteString("anchor: &deep\n")
indent := " "
for i := 0; i < MaxYAMLDepth+5; i++ {
sb.WriteString(indent)
sb.WriteString(fmt.Sprintf("level%d:\n", i))
indent += " "
}
sb.WriteString(indent + "leaf: value\n")
sb.WriteString("target:\n <<: *deep\n")
_, err = ParsePersonaBytes([]byte(sb.String()), "deep-merge.yaml")
if err == nil {
t.Fatal("expected error for deeply nested merge key content, got nil")
}
if !strings.Contains(err.Error(), "depth") {
t.Errorf("error = %q, want to contain 'depth'", err.Error())
}
}