From 80a9a7675bc0b51d7e5c619ba9b7ccd88cc24f9a Mon Sep 17 00:00:00 2001 From: Rodin Date: Sun, 3 May 2026 09:47:22 -0700 Subject: [PATCH] fix: repair unescaped quotes in LLM JSON responses LLMs (especially Sonnet) sometimes emit JSON with unescaped double quotes inside string values, e.g. (e.g. "28") instead of properly escaping them. This caused parse failures in CI. Add a repairJSON fallback that uses a character-by-character scanner to identify interior quotes (those not followed by structural JSON characters) and escape them before retrying the parse. Fixes sonnet-review failures on gargoyle PR #551. --- review/parser.go | 76 ++++++++++++++++++++++++++++++++++++++++++- review/parser_test.go | 37 +++++++++++++++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) diff --git a/review/parser.go b/review/parser.go index dc8eb73..a615e86 100644 --- a/review/parser.go +++ b/review/parser.go @@ -29,7 +29,12 @@ func ParseResponse(response string) (*ReviewResult, error) { var result ReviewResult if err := json.Unmarshal([]byte(cleaned), &result); err != nil { - return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw response: %s", err, response) + // LLMs sometimes produce JSON with unescaped quotes inside string values. + // Try to repair before giving up. + repaired := repairJSON(cleaned) + if err2 := json.Unmarshal([]byte(repaired), &result); err2 != nil { + return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw response: %s", err, response) + } } // Validate verdict @@ -74,3 +79,72 @@ func extractJSON(s string) string { s = strings.TrimSpace(s) return s } + +// repairJSON attempts to fix common LLM JSON issues: +// - Unescaped double quotes inside string values +// +// Strategy: walk the JSON character by character, tracking whether we're inside +// a string value. When we find a quote that doesn't look like a structural +// delimiter (not preceded by \, not followed by : , } ] or whitespace+structural), +// escape it. +func repairJSON(s string) string { + var out strings.Builder + out.Grow(len(s) + 64) + + inString := false + runes := []rune(s) + + for i := 0; i < len(runes); i++ { + c := runes[i] + + if !inString { + out.WriteRune(c) + if c == '"' { + inString = true + } + continue + } + + // We're inside a string + if c == '\\' { + // Escape sequence — pass through both characters + out.WriteRune(c) + if i+1 < len(runes) { + i++ + out.WriteRune(runes[i]) + } + continue + } + + if c == '"' { + // Is this the end of the string, or an unescaped interior quote? + // Look ahead: skip whitespace, then check for structural character + j := i + 1 + for j < len(runes) && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') { + j++ + } + if j < len(runes) { + next := runes[j] + if next == ',' || next == '}' || next == ']' || next == ':' { + // Structural — this is really the end of the string + out.WriteRune(c) + inString = false + continue + } + } else { + // End of input — must be closing quote + out.WriteRune(c) + inString = false + continue + } + // Not a structural close — this is an unescaped interior quote + out.WriteRune('\\') + out.WriteRune('"') + continue + } + + out.WriteRune(c) + } + + return out.String() +} diff --git a/review/parser_test.go b/review/parser_test.go index 138a866..ae8c071 100644 --- a/review/parser_test.go +++ b/review/parser_test.go @@ -1,6 +1,7 @@ package review import ( + "encoding/json" "testing" ) @@ -112,3 +113,39 @@ func TestParseResponse_MarkdownFencesNoLang(t *testing.T) { t.Errorf("expected APPROVE, got %q", result.Verdict) } } + +func TestParseResponse_UnescapedQuotesInStrings(t *testing.T) { + // Real failure from CI: Sonnet puts unescaped quotes like (e.g. "28") in findings + input := `{"verdict": "APPROVE", "summary": "Clean PR", "findings": [{"severity": "NIT", "file": "ci/Dockerfile", "line": 14, "finding": "The comment says OTP_VERSION is the major version (e.g. \"28\") but it actually contains unescaped quotes like (e.g. "28") which breaks JSON"}], "recommendation": "Ship it"}` + + result, err := ParseResponse(input) + if err != nil { + t.Fatalf("expected repair to handle unescaped quotes, got error: %v", err) + } + if result.Verdict != "APPROVE" { + t.Errorf("expected APPROVE, got %q", result.Verdict) + } + if len(result.Findings) != 1 { + t.Fatalf("expected 1 finding, got %d", len(result.Findings)) + } +} + +func TestRepairJSON_NoOpOnValid(t *testing.T) { + valid := `{"key": "value", "num": 42}` + result := repairJSON(valid) + if result != valid { + t.Errorf("repairJSON should not modify valid JSON\n got: %s\n want: %s", result, valid) + } +} + +func TestRepairJSON_FixesUnescapedQuotes(t *testing.T) { + // Interior quote followed by non-structural character + input := `{"msg": "use "foo" here"}` + result := repairJSON(input) + + // Should be parseable now + var m map[string]interface{} + if err := json.Unmarshal([]byte(result), &m); err != nil { + t.Fatalf("repaired JSON should parse, got: %v\nrepaired: %s", err, result) + } +}