fix(deps): replace gopkg.in/yaml.v3 with github.com/goccy/go-yaml

Fixes #87.

PR #58 incorrectly added gopkg.in/yaml.v3 (abandoned library) instead of
github.com/goccy/go-yaml as required by issue #57.

Changes:
- Replace gopkg.in/yaml.v3 with github.com/goccy/go-yaml v1.19.2
- Update review/persona.go to use goccy/go-yaml API:
  - parser.ParseBytes for AST-based depth/node count checking
  - yaml.Strict() decoder option instead of KnownFields(true)
  - ast.Node types instead of yaml.Node for tree walking
- Update review/persona_test.go to use ast types for cycle tests
- Remove gopkg.in/yaml.v3 from go.mod and go.sum

All existing YAML tests pass with the new library.
This commit is contained in:
claw
2026-05-12 13:27:30 -07:00
parent 2036f57011
commit 32c89330aa
4 changed files with 93 additions and 64 deletions
+1 -1
View File
@@ -2,4 +2,4 @@ module gitea.weiker.me/rodin/review-bot
go 1.26.2
require gopkg.in/yaml.v3 v3.0.1
require github.com/goccy/go-yaml v1.19.2
+2 -4
View File
@@ -1,4 +1,2 @@
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM=
github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
+61 -28
View File
@@ -10,7 +10,9 @@ import (
"strings"
"unicode/utf8"
"gopkg.in/yaml.v3"
"github.com/goccy/go-yaml"
"github.com/goccy/go-yaml/ast"
"github.com/goccy/go-yaml/parser"
)
//go:embed personas/*.yaml
@@ -142,7 +144,7 @@ func parsePersona(data []byte, source string) (*Persona, error) {
err = unmarshalYAMLWithDepthLimit(data, &p, MaxYAMLDepth)
} else {
// Use json.Decoder with DisallowUnknownFields for consistency with
// YAML's KnownFields(true) - both reject unknown fields to catch typos.
// YAML's Strict() - both reject unknown fields to catch typos.
dec := json.NewDecoder(bytes.NewReader(data))
dec.DisallowUnknownFields()
err = dec.Decode(&p)
@@ -161,39 +163,43 @@ func parsePersona(data []byte, source string) (*Persona, error) {
// nested structures and catches typos in field names.
// Multi-document YAML files are rejected to prevent silent data loss.
func unmarshalYAMLWithDepthLimit(data []byte, out any, maxDepth int) error {
// First pass: decode into a yaml.Node to check depth limits and node counts.
// This prevents stack exhaustion before we attempt to decode into structs.
var node yaml.Node
dec := yaml.NewDecoder(bytes.NewReader(data))
if err := dec.Decode(&node); err != nil {
// First pass: parse into AST to check depth limits, node counts, and
// multi-document rejection. This prevents stack exhaustion before we
// attempt to decode into structs.
file, err := parser.ParseBytes(data, 0)
if err != nil {
return err
}
// Reject multi-document YAML files - silently ignoring additional documents
// could lead to confusing behavior where users think their changes take effect.
var extra yaml.Node
if dec.Decode(&extra) == nil {
if len(file.Docs) > 1 {
return fmt.Errorf("multi-document YAML is not supported; only single-document files are allowed")
}
if len(file.Docs) == 0 {
return fmt.Errorf("empty YAML document")
}
nodeCount := 0
if err := checkYAMLDepth(&node, 0, maxDepth, MaxYAMLNodes, make(map[*yaml.Node]struct{}), &nodeCount); err != nil {
if err := checkYAMLDepth(file.Docs[0].Body, 0, maxDepth, MaxYAMLNodes, make(map[ast.Node]struct{}), &nodeCount); err != nil {
return err
}
// Second pass: decode with strict field checking enabled.
// KnownFields(true) rejects unknown keys, catching typos like "focuss" or "identiy".
// We must re-decode from the original data because yaml.Node.Decode() doesn't
// support the KnownFields option.
strictDec := yaml.NewDecoder(bytes.NewReader(data))
strictDec.KnownFields(true)
return strictDec.Decode(out)
// Strict() rejects unknown keys, catching typos like "focuss" or "identiy".
dec := yaml.NewDecoder(bytes.NewReader(data), yaml.Strict())
return dec.Decode(out)
}
// checkYAMLDepth recursively checks that YAML nodes don't exceed the depth limit
// or the total node count limit. It also detects alias cycles to prevent infinite
// recursion from crafted YAML with self-referential aliases.
func checkYAMLDepth(node *yaml.Node, depth, maxDepth, maxNodes int, seen map[*yaml.Node]struct{}, nodeCount *int) error {
// checkYAMLDepth recursively checks that YAML AST nodes don't exceed the depth
// limit or the total node count limit. It also detects alias cycles to prevent
// infinite recursion from crafted YAML with self-referential aliases.
func checkYAMLDepth(node ast.Node, depth, maxDepth, maxNodes int, seen map[ast.Node]struct{}, nodeCount *int) error {
if node == nil {
return nil
}
if depth > maxDepth {
return fmt.Errorf("YAML nesting depth exceeds maximum (%d)", maxDepth)
}
@@ -210,16 +216,43 @@ func checkYAMLDepth(node *yaml.Node, depth, maxDepth, maxNodes int, seen map[*ya
}
seen[node] = struct{}{}
// Handle alias nodes: follow the alias to its anchor target.
// Increment depth when following aliases since they expand the effective structure.
if node.Kind == yaml.AliasNode && node.Alias != nil {
return checkYAMLDepth(node.Alias, depth+1, maxDepth, maxNodes, seen, nodeCount)
}
for _, child := range node.Content {
if err := checkYAMLDepth(child, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil {
// Walk children based on node type.
switch n := node.(type) {
case *ast.MappingNode:
for _, value := range n.Values {
if err := checkYAMLDepth(value, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil {
return err
}
}
case *ast.MappingValueNode:
if err := checkYAMLDepth(n.Key, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil {
return err
}
if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil {
return err
}
case *ast.SequenceNode:
for _, value := range n.Values {
if err := checkYAMLDepth(value, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil {
return err
}
}
case *ast.AliasNode:
// Follow alias to its target, incrementing depth since aliases expand
// the effective structure.
if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil {
return err
}
case *ast.AnchorNode:
if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil {
return err
}
case *ast.TagNode:
if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil {
return err
}
// Scalar types (StringNode, IntegerNode, FloatNode, BoolNode, NullNode,
// InfinityNode, NanNode, LiteralNode, MergeKeyNode) are leaf nodes.
}
return nil
}
+29 -31
View File
@@ -7,7 +7,7 @@ import (
"strings"
"testing"
"gopkg.in/yaml.v3"
"github.com/goccy/go-yaml/ast"
)
func TestLoadBuiltinPersona(t *testing.T) {
@@ -459,7 +459,7 @@ func TestYAMLDeeplyNestedRejection(t *testing.T) {
path := filepath.Join(dir, "deeply-nested.yaml")
// Build a deeply nested YAML structure that exceeds MaxYAMLDepth (20).
// Each level adds 2 to the depth count (key + value mapping).
// Each level adds to the depth count via mapping values.
var sb strings.Builder
sb.WriteString("name: test\nidentity: test\nnested:\n")
indent := " "
@@ -505,30 +505,29 @@ func TestYAMLFileSizeLimit(t *testing.T) {
func TestYAMLAliasCycleDetection(t *testing.T) {
// Test that our checkYAMLDepth function handles alias cycles gracefully
// by using the seen map to prevent infinite recursion.
// We test this directly because go-yaml's parser handles most cycles
// at parse time, but we need to ensure our checker is robust.
// Create a node structure where an alias points to a parent node,
// simulating what could happen with malicious input that bypasses
// go-yaml's cycle detection.
parent := &yaml.Node{
Kind: yaml.MappingNode,
Content: []*yaml.Node{
{Kind: yaml.ScalarNode, Value: "name"},
{Kind: yaml.ScalarNode, Value: "test"},
{Kind: yaml.ScalarNode, Value: "nested"},
// simulating what could happen with crafted input.
parent := &ast.MappingNode{
Values: []*ast.MappingValueNode{
{
Key: &ast.StringNode{Value: "name"},
Value: &ast.StringNode{Value: "test"},
},
},
}
// Create a child that aliases back to the parent (artificial cycle)
aliasToParent := &yaml.Node{
Kind: yaml.AliasNode,
Alias: parent,
aliasToParent := &ast.AliasNode{
Value: parent,
}
parent.Content = append(parent.Content, aliasToParent)
parent.Values = append(parent.Values, &ast.MappingValueNode{
Key: &ast.StringNode{Value: "nested"},
Value: aliasToParent,
})
nodeCount := 0
seen := make(map[*yaml.Node]struct{})
seen := make(map[ast.Node]struct{})
// This should NOT hang or stack overflow - the seen map prevents infinite recursion
err := checkYAMLDepth(parent, 0, MaxYAMLDepth, MaxYAMLNodes, seen, &nodeCount)
@@ -594,27 +593,26 @@ func TestYAMLNodeCountLimit(t *testing.T) {
func TestCheckYAMLDepthCycleDetectionDirect(t *testing.T) {
// Direct test of cycle detection in checkYAMLDepth by creating
// a node structure with an artificial cycle.
// This tests the seen map logic independent of go-yaml's parsing.
node := &yaml.Node{
Kind: yaml.MappingNode,
Content: []*yaml.Node{
{Kind: yaml.ScalarNode, Value: "key"},
{Kind: yaml.ScalarNode, Value: "value"},
node := &ast.MappingNode{
Values: []*ast.MappingValueNode{
{
Key: &ast.StringNode{Value: "key"},
Value: &ast.StringNode{Value: "value"},
},
},
}
// Create a cycle by making a child reference the parent
cycleChild := &yaml.Node{
Kind: yaml.AliasNode,
Alias: node, // Points back to the parent
cycleChild := &ast.AliasNode{
Value: node, // Points back to the parent
}
node.Content = append(node.Content,
&yaml.Node{Kind: yaml.ScalarNode, Value: "cyclic"},
cycleChild,
)
node.Values = append(node.Values, &ast.MappingValueNode{
Key: &ast.StringNode{Value: "cyclic"},
Value: cycleChild,
})
nodeCount := 0
seen := make(map[*yaml.Node]struct{})
seen := make(map[ast.Node]struct{})
err := checkYAMLDepth(node, 0, MaxYAMLDepth, MaxYAMLNodes, seen, &nodeCount)
// Should complete without infinite recursion due to cycle detection