fix: repair unescaped quotes in LLM JSON responses
CI / test (pull_request) Successful in 13s
CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Failing after 13s
CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Failing after 13s
CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Failing after 12s

LLMs (especially Sonnet) sometimes emit JSON with unescaped double
quotes inside string values, e.g. (e.g. "28") instead of properly
escaping them. This caused parse failures in CI.

Add a repairJSON fallback that uses a character-by-character scanner
to identify interior quotes (those not followed by structural JSON
characters) and escape them before retrying the parse.

Fixes sonnet-review failures on gargoyle PR #551.
This commit is contained in:
Rodin
2026-05-03 09:47:22 -07:00
parent 8d8a249481
commit 80a9a7675b
2 changed files with 112 additions and 1 deletions
+75 -1
View File
@@ -29,7 +29,12 @@ func ParseResponse(response string) (*ReviewResult, error) {
var result ReviewResult
if err := json.Unmarshal([]byte(cleaned), &result); err != nil {
return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw response: %s", err, response)
// LLMs sometimes produce JSON with unescaped quotes inside string values.
// Try to repair before giving up.
repaired := repairJSON(cleaned)
if err2 := json.Unmarshal([]byte(repaired), &result); err2 != nil {
return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw response: %s", err, response)
}
}
// Validate verdict
@@ -74,3 +79,72 @@ func extractJSON(s string) string {
s = strings.TrimSpace(s)
return s
}
// repairJSON attempts to fix common LLM JSON issues:
// - Unescaped double quotes inside string values
//
// Strategy: walk the JSON character by character, tracking whether we're inside
// a string value. When we find a quote that doesn't look like a structural
// delimiter (not preceded by \, not followed by : , } ] or whitespace+structural),
// escape it.
func repairJSON(s string) string {
var out strings.Builder
out.Grow(len(s) + 64)
inString := false
runes := []rune(s)
for i := 0; i < len(runes); i++ {
c := runes[i]
if !inString {
out.WriteRune(c)
if c == '"' {
inString = true
}
continue
}
// We're inside a string
if c == '\\' {
// Escape sequence — pass through both characters
out.WriteRune(c)
if i+1 < len(runes) {
i++
out.WriteRune(runes[i])
}
continue
}
if c == '"' {
// Is this the end of the string, or an unescaped interior quote?
// Look ahead: skip whitespace, then check for structural character
j := i + 1
for j < len(runes) && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') {
j++
}
if j < len(runes) {
next := runes[j]
if next == ',' || next == '}' || next == ']' || next == ':' {
// Structural — this is really the end of the string
out.WriteRune(c)
inString = false
continue
}
} else {
// End of input — must be closing quote
out.WriteRune(c)
inString = false
continue
}
// Not a structural close — this is an unescaped interior quote
out.WriteRune('\\')
out.WriteRune('"')
continue
}
out.WriteRune(c)
}
return out.String()
}