From baa917f228bf4104ab96a6fd7ef7f75bd407defd Mon Sep 17 00:00:00 2001 From: claw Date: Tue, 12 May 2026 18:45:48 -0700 Subject: [PATCH] fix: handle MergeKeyNode explicitly in depth check, add size limit to ParsePersonaBytes - Add explicit case for *ast.MergeKeyNode in checkYAMLDepth switch to make it clear this is an intentional leaf (no children to recurse) rather than relying on the default case. Prevents future library changes from silently bypassing depth checks. - Add MaxPersonaFileSize bound check at the top of ParsePersonaBytes. While callers already check size, the public API should defend itself (defense in depth) against arbitrarily large inputs that could cause excessive memory/CPU before AST validation runs. - Add tests for both behaviors. Addresses review #2879 findings. --- review/persona.go | 14 ++++++++-- review/persona_test.go | 61 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/review/persona.go b/review/persona.go index 3a041f9..19e822f 100644 --- a/review/persona.go +++ b/review/persona.go @@ -307,10 +307,16 @@ func checkYAMLDepth(node ast.Node, depth, maxDepth, maxNodes int, validated map[ if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, validated, visiting, nodeCount); err != nil { return err } + case *ast.MergeKeyNode: + // MergeKeyNode represents the literal "<<" merge key token. It has no + // child nodes — the value side of a merge (e.g., *alias) lives in the + // parent MappingValueNode.Value, which is already recursed into above. + // Explicitly listed here (rather than in the default case) to prevent + // future library changes from silently bypassing depth checks. default: // Scalar leaf nodes (StringNode, IntegerNode, FloatNode, BoolNode, - // NullNode, InfinityNode, NanNode, LiteralNode, MergeKeyNode) have - // no children to recurse into. + // NullNode, InfinityNode, NanNode, LiteralNode) have no children to + // recurse into. } return nil } @@ -318,7 +324,11 @@ func checkYAMLDepth(node ast.Node, depth, maxDepth, maxNodes int, validated map[ // ParsePersonaBytes parses persona data from bytes with a source label for errors. // This is useful for parsing personas fetched from external sources (e.g., Gitea API) // without requiring filesystem access. Format is detected by source extension. +// Input is bounded by MaxPersonaFileSize to prevent resource exhaustion. func ParsePersonaBytes(data []byte, source string) (*Persona, error) { + if len(data) > MaxPersonaFileSize { + return nil, fmt.Errorf("persona data from %s exceeds maximum size (%d bytes, limit %d)", source, len(data), MaxPersonaFileSize) + } return parsePersona(data, source) } diff --git a/review/persona_test.go b/review/persona_test.go index 5a39c2e..9b50f0f 100644 --- a/review/persona_test.go +++ b/review/persona_test.go @@ -896,3 +896,64 @@ func TestJSONTrailingContentRejected(t *testing.T) { }) } } + +func TestParsePersonaBytesSizeLimit(t *testing.T) { + // ParsePersonaBytes should reject input exceeding MaxPersonaFileSize + oversized := make([]byte, MaxPersonaFileSize+1) + for i := range oversized { + oversized[i] = 'x' + } + + _, err := ParsePersonaBytes(oversized, "oversized.yaml") + if err == nil { + t.Fatal("expected error for oversized input, got nil") + } + if !strings.Contains(err.Error(), "exceeds maximum size") { + t.Errorf("error = %q, want to contain 'exceeds maximum size'", err.Error()) + } + + // Just under the limit should not trigger size error (may fail parse, but not size) + underLimit := []byte("name: test\nidentity: test persona\n") + p, err := ParsePersonaBytes(underLimit, "valid.yaml") + if err != nil { + t.Fatalf("unexpected error for valid input: %v", err) + } + if p.Name != "test" { + t.Errorf("Name = %q, want %q", p.Name, "test") + } +} + +func TestYAMLMergeKeyDepthCheck(t *testing.T) { + // Verify that YAML merge keys (<<: *alias) are properly handled by the + // depth checker. The merge key content is in the MappingValueNode.Value + // (an AliasNode), not in the MergeKeyNode itself. + p, err := ParsePersonaBytes([]byte("name: merge-test\nidentity: test\n"), "merge.yaml") + if err != nil { + t.Fatalf("basic parse failed: %v", err) + } + if p.Name != "merge-test" { + t.Errorf("Name = %q, want %q", p.Name, "merge-test") + } + + // Test that deeply nested merge keys still hit depth limit. + // Build YAML with merge key content nested beyond MaxYAMLDepth. + var sb strings.Builder + sb.WriteString("name: deep-merge\nidentity: deep merge persona\n") + sb.WriteString("anchor: &deep\n") + indent := " " + for i := 0; i < MaxYAMLDepth+5; i++ { + sb.WriteString(indent) + sb.WriteString(fmt.Sprintf("level%d:\n", i)) + indent += " " + } + sb.WriteString(indent + "leaf: value\n") + sb.WriteString("target:\n <<: *deep\n") + + _, err = ParsePersonaBytes([]byte(sb.String()), "deep-merge.yaml") + if err == nil { + t.Fatal("expected error for deeply nested merge key content, got nil") + } + if !strings.Contains(err.Error(), "depth") { + t.Errorf("error = %q, want to contain 'depth'", err.Error()) + } +}