From 8991260333b751fbd65c8747bd74120a7977ac05 Mon Sep 17 00:00:00 2001 From: claw Date: Tue, 12 May 2026 13:27:30 -0700 Subject: [PATCH] fix(deps): replace gopkg.in/yaml.v3 with github.com/goccy/go-yaml Fixes #87. PR #58 incorrectly added gopkg.in/yaml.v3 (abandoned library) instead of github.com/goccy/go-yaml as required by issue #57. Changes: - Replace gopkg.in/yaml.v3 with github.com/goccy/go-yaml v1.19.2 - Update review/persona.go to use goccy/go-yaml API: - parser.ParseBytes for AST-based depth/node count checking - yaml.Strict() decoder option instead of KnownFields(true) - ast.Node types instead of yaml.Node for tree walking - Update review/persona_test.go to use ast types for cycle tests - Remove gopkg.in/yaml.v3 from go.mod and go.sum All existing YAML tests pass with the new library. --- go.mod | 2 +- go.sum | 6 +-- review/persona.go | 89 +++++++++++++++++++++++++++++------------- review/persona_test.go | 60 ++++++++++++++-------------- 4 files changed, 93 insertions(+), 64 deletions(-) diff --git a/go.mod b/go.mod index 0d9eded..5348662 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,4 @@ module gitea.weiker.me/rodin/review-bot go 1.26.2 -require gopkg.in/yaml.v3 v3.0.1 +require github.com/goccy/go-yaml v1.19.2 diff --git a/go.sum b/go.sum index a62c313..bd88ba6 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,2 @@ -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM= +github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= diff --git a/review/persona.go b/review/persona.go index 329aa78..4c263b0 100644 --- a/review/persona.go +++ b/review/persona.go @@ -10,7 +10,9 @@ import ( "strings" "unicode/utf8" - "gopkg.in/yaml.v3" + "github.com/goccy/go-yaml" + "github.com/goccy/go-yaml/ast" + "github.com/goccy/go-yaml/parser" ) //go:embed personas/*.yaml @@ -142,7 +144,7 @@ func parsePersona(data []byte, source string) (*Persona, error) { err = unmarshalYAMLWithDepthLimit(data, &p, MaxYAMLDepth) } else { // Use json.Decoder with DisallowUnknownFields for consistency with - // YAML's KnownFields(true) - both reject unknown fields to catch typos. + // YAML's Strict() - both reject unknown fields to catch typos. dec := json.NewDecoder(bytes.NewReader(data)) dec.DisallowUnknownFields() err = dec.Decode(&p) @@ -161,39 +163,43 @@ func parsePersona(data []byte, source string) (*Persona, error) { // nested structures and catches typos in field names. // Multi-document YAML files are rejected to prevent silent data loss. func unmarshalYAMLWithDepthLimit(data []byte, out any, maxDepth int) error { - // First pass: decode into a yaml.Node to check depth limits and node counts. - // This prevents stack exhaustion before we attempt to decode into structs. - var node yaml.Node - dec := yaml.NewDecoder(bytes.NewReader(data)) - if err := dec.Decode(&node); err != nil { + // First pass: parse into AST to check depth limits, node counts, and + // multi-document rejection. This prevents stack exhaustion before we + // attempt to decode into structs. + file, err := parser.ParseBytes(data, 0) + if err != nil { return err } // Reject multi-document YAML files - silently ignoring additional documents // could lead to confusing behavior where users think their changes take effect. - var extra yaml.Node - if dec.Decode(&extra) == nil { + if len(file.Docs) > 1 { return fmt.Errorf("multi-document YAML is not supported; only single-document files are allowed") } + if len(file.Docs) == 0 { + return fmt.Errorf("empty YAML document") + } + nodeCount := 0 - if err := checkYAMLDepth(&node, 0, maxDepth, MaxYAMLNodes, make(map[*yaml.Node]struct{}), &nodeCount); err != nil { + if err := checkYAMLDepth(file.Docs[0].Body, 0, maxDepth, MaxYAMLNodes, make(map[ast.Node]struct{}), &nodeCount); err != nil { return err } // Second pass: decode with strict field checking enabled. - // KnownFields(true) rejects unknown keys, catching typos like "focuss" or "identiy". - // We must re-decode from the original data because yaml.Node.Decode() doesn't - // support the KnownFields option. - strictDec := yaml.NewDecoder(bytes.NewReader(data)) - strictDec.KnownFields(true) - return strictDec.Decode(out) + // Strict() rejects unknown keys, catching typos like "focuss" or "identiy". + dec := yaml.NewDecoder(bytes.NewReader(data), yaml.Strict()) + return dec.Decode(out) } -// checkYAMLDepth recursively checks that YAML nodes don't exceed the depth limit -// or the total node count limit. It also detects alias cycles to prevent infinite -// recursion from crafted YAML with self-referential aliases. -func checkYAMLDepth(node *yaml.Node, depth, maxDepth, maxNodes int, seen map[*yaml.Node]struct{}, nodeCount *int) error { +// checkYAMLDepth recursively checks that YAML AST nodes don't exceed the depth +// limit or the total node count limit. It also detects alias cycles to prevent +// infinite recursion from crafted YAML with self-referential aliases. +func checkYAMLDepth(node ast.Node, depth, maxDepth, maxNodes int, seen map[ast.Node]struct{}, nodeCount *int) error { + if node == nil { + return nil + } + if depth > maxDepth { return fmt.Errorf("YAML nesting depth exceeds maximum (%d)", maxDepth) } @@ -210,16 +216,43 @@ func checkYAMLDepth(node *yaml.Node, depth, maxDepth, maxNodes int, seen map[*ya } seen[node] = struct{}{} - // Handle alias nodes: follow the alias to its anchor target. - // Increment depth when following aliases since they expand the effective structure. - if node.Kind == yaml.AliasNode && node.Alias != nil { - return checkYAMLDepth(node.Alias, depth+1, maxDepth, maxNodes, seen, nodeCount) - } - - for _, child := range node.Content { - if err := checkYAMLDepth(child, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil { + // Walk children based on node type. + switch n := node.(type) { + case *ast.MappingNode: + for _, value := range n.Values { + if err := checkYAMLDepth(value, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil { + return err + } + } + case *ast.MappingValueNode: + if err := checkYAMLDepth(n.Key, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil { return err } + if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil { + return err + } + case *ast.SequenceNode: + for _, value := range n.Values { + if err := checkYAMLDepth(value, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil { + return err + } + } + case *ast.AliasNode: + // Follow alias to its target, incrementing depth since aliases expand + // the effective structure. + if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil { + return err + } + case *ast.AnchorNode: + if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil { + return err + } + case *ast.TagNode: + if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil { + return err + } + // Scalar types (StringNode, IntegerNode, FloatNode, BoolNode, NullNode, + // InfinityNode, NanNode, LiteralNode, MergeKeyNode) are leaf nodes. } return nil } diff --git a/review/persona_test.go b/review/persona_test.go index b90eae7..e7e66ca 100644 --- a/review/persona_test.go +++ b/review/persona_test.go @@ -7,7 +7,7 @@ import ( "strings" "testing" - "gopkg.in/yaml.v3" + "github.com/goccy/go-yaml/ast" ) func TestLoadBuiltinPersona(t *testing.T) { @@ -459,7 +459,7 @@ func TestYAMLDeeplyNestedRejection(t *testing.T) { path := filepath.Join(dir, "deeply-nested.yaml") // Build a deeply nested YAML structure that exceeds MaxYAMLDepth (20). - // Each level adds 2 to the depth count (key + value mapping). + // Each level adds to the depth count via mapping values. var sb strings.Builder sb.WriteString("name: test\nidentity: test\nnested:\n") indent := " " @@ -505,30 +505,29 @@ func TestYAMLFileSizeLimit(t *testing.T) { func TestYAMLAliasCycleDetection(t *testing.T) { // Test that our checkYAMLDepth function handles alias cycles gracefully // by using the seen map to prevent infinite recursion. - // We test this directly because go-yaml's parser handles most cycles - // at parse time, but we need to ensure our checker is robust. // Create a node structure where an alias points to a parent node, - // simulating what could happen with malicious input that bypasses - // go-yaml's cycle detection. - parent := &yaml.Node{ - Kind: yaml.MappingNode, - Content: []*yaml.Node{ - {Kind: yaml.ScalarNode, Value: "name"}, - {Kind: yaml.ScalarNode, Value: "test"}, - {Kind: yaml.ScalarNode, Value: "nested"}, + // simulating what could happen with crafted input. + parent := &ast.MappingNode{ + Values: []*ast.MappingValueNode{ + { + Key: &ast.StringNode{Value: "name"}, + Value: &ast.StringNode{Value: "test"}, + }, }, } // Create a child that aliases back to the parent (artificial cycle) - aliasToParent := &yaml.Node{ - Kind: yaml.AliasNode, - Alias: parent, + aliasToParent := &ast.AliasNode{ + Value: parent, } - parent.Content = append(parent.Content, aliasToParent) + parent.Values = append(parent.Values, &ast.MappingValueNode{ + Key: &ast.StringNode{Value: "nested"}, + Value: aliasToParent, + }) nodeCount := 0 - seen := make(map[*yaml.Node]struct{}) + seen := make(map[ast.Node]struct{}) // This should NOT hang or stack overflow - the seen map prevents infinite recursion err := checkYAMLDepth(parent, 0, MaxYAMLDepth, MaxYAMLNodes, seen, &nodeCount) @@ -594,27 +593,26 @@ func TestYAMLNodeCountLimit(t *testing.T) { func TestCheckYAMLDepthCycleDetectionDirect(t *testing.T) { // Direct test of cycle detection in checkYAMLDepth by creating // a node structure with an artificial cycle. - // This tests the seen map logic independent of go-yaml's parsing. - node := &yaml.Node{ - Kind: yaml.MappingNode, - Content: []*yaml.Node{ - {Kind: yaml.ScalarNode, Value: "key"}, - {Kind: yaml.ScalarNode, Value: "value"}, + node := &ast.MappingNode{ + Values: []*ast.MappingValueNode{ + { + Key: &ast.StringNode{Value: "key"}, + Value: &ast.StringNode{Value: "value"}, + }, }, } // Create a cycle by making a child reference the parent - cycleChild := &yaml.Node{ - Kind: yaml.AliasNode, - Alias: node, // Points back to the parent + cycleChild := &ast.AliasNode{ + Value: node, // Points back to the parent } - node.Content = append(node.Content, - &yaml.Node{Kind: yaml.ScalarNode, Value: "cyclic"}, - cycleChild, - ) + node.Values = append(node.Values, &ast.MappingValueNode{ + Key: &ast.StringNode{Value: "cyclic"}, + Value: cycleChild, + }) nodeCount := 0 - seen := make(map[*yaml.Node]struct{}) + seen := make(map[ast.Node]struct{}) err := checkYAMLDepth(node, 0, MaxYAMLDepth, MaxYAMLNodes, seen, &nodeCount) // Should complete without infinite recursion due to cycle detection