fix: redesign repairJSON to handle all reviewer-reported edge cases
CI / test (pull_request) Successful in 11s
CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Failing after 10s
CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Failing after 10s
CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Failing after 11s

Rewrites the JSON repair algorithm to address two correctness bugs
identified in code review:

1. Interior quoted word before comma: "say "yes", and go" was
   misidentified as structural close because "," followed the quote.

2. JSON-shaped content in strings: {"key": "val"} inside a string
   value was being parsed as actual JSON structure.

The new approach:
- Distinguishes keys from values (only values need repair)
- Uses first-valid-candidate scan with deep lookahead
- Verifies that after a candidate close, the continuation is not just
  a structural char but a complete valid JSON pattern
- Validates keyword tokens (true/false/null) fully, not just first char
- Checks container closes recursively for valid continuation

Adds comprehensive tests for all reported edge cases plus a complex
combined scenario with nested JSON-like content, quoted words before
commas, and multiple failure modes in one string.
This commit is contained in:
Rodin
2026-05-04 21:27:39 -07:00
parent 80a9a7675b
commit 25d1a670bf
2 changed files with 275 additions and 44 deletions
+73
View File
@@ -149,3 +149,76 @@ func TestRepairJSON_FixesUnescapedQuotes(t *testing.T) {
t.Fatalf("repaired JSON should parse, got: %v\nrepaired: %s", err, result)
}
}
func TestRepairJSON_InteriorQuoteBeforeComma(t *testing.T) {
// Bug reported by reviewer: interior quoted word immediately before a comma
input := `{"msg": "say "yes", and go"}`
result := repairJSON(input)
var m map[string]interface{}
if err := json.Unmarshal([]byte(result), &m); err != nil {
t.Fatalf("repaired JSON should parse, got: %v\nrepaired: %s", err, result)
}
// The full string content should be preserved
msg, ok := m["msg"].(string)
if !ok {
t.Fatal("msg field missing or not a string")
}
if msg != `say "yes", and go` {
t.Errorf("unexpected msg content: %q", msg)
}
}
func TestRepairJSON_InteriorQuoteBeforeCloseBrace(t *testing.T) {
// Bug reported by reviewer: JSON-shaped syntax inside string values
input := `{"msg": "input map {"key": "val"} caused error"}`
result := repairJSON(input)
var m map[string]interface{}
if err := json.Unmarshal([]byte(result), &m); err != nil {
t.Fatalf("repaired JSON should parse, got: %v\nrepaired: %s", err, result)
}
}
func TestRepairJSON_MultipleFields(t *testing.T) {
// Multiple string fields with unescaped quotes in different positions
input := `{"a": "hello "world"", "b": "foo"}`
result := repairJSON(input)
var m map[string]interface{}
if err := json.Unmarshal([]byte(result), &m); err != nil {
t.Fatalf("repaired JSON should parse, got: %v\nrepaired: %s", err, result)
}
if _, ok := m["b"]; !ok {
t.Error("expected 'b' field to be preserved")
}
}
func TestRepairJSON_PreservesEscapedQuotes(t *testing.T) {
// Already-escaped quotes should not be double-escaped
input := `{"msg": "already \"escaped\" here"}`
result := repairJSON(input)
if result != input {
t.Errorf("repairJSON should not modify already-escaped quotes\n got: %s\n want: %s", result, input)
}
var m map[string]interface{}
if err := json.Unmarshal([]byte(result), &m); err != nil {
t.Fatalf("repaired JSON should parse, got: %v\nrepaired: %s", err, result)
}
}
func TestRepairJSON_ComplexNestedContent(t *testing.T) {
// Combines both reviewer bugs: quoted words before commas AND JSON-like content
input := `{"verdict": "APPROVE", "findings": [{"finding": "The map {"key": "val"} and (e.g. "28") and say "yes", then stop"}]}`
result := repairJSON(input)
var parsed map[string]interface{}
if err := json.Unmarshal([]byte(result), &parsed); err != nil {
t.Fatalf("repaired JSON should parse, got: %v\nrepaired: %s", err, result)
}
if parsed["verdict"] != "APPROVE" {
t.Errorf("expected verdict APPROVE, got %v", parsed["verdict"])
}
}