fix: repair unescaped quotes in LLM JSON responses
CI / test (pull_request) Successful in 13s
CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Failing after 13s
CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Failing after 13s
CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Failing after 12s
CI / test (pull_request) Successful in 13s
CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Failing after 13s
CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Failing after 13s
CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Failing after 12s
LLMs (especially Sonnet) sometimes emit JSON with unescaped double quotes inside string values, e.g. (e.g. "28") instead of properly escaping them. This caused parse failures in CI. Add a repairJSON fallback that uses a character-by-character scanner to identify interior quotes (those not followed by structural JSON characters) and escape them before retrying the parse. Fixes sonnet-review failures on gargoyle PR #551.
This commit is contained in:
+75
-1
@@ -29,7 +29,12 @@ func ParseResponse(response string) (*ReviewResult, error) {
|
||||
|
||||
var result ReviewResult
|
||||
if err := json.Unmarshal([]byte(cleaned), &result); err != nil {
|
||||
return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw response: %s", err, response)
|
||||
// LLMs sometimes produce JSON with unescaped quotes inside string values.
|
||||
// Try to repair before giving up.
|
||||
repaired := repairJSON(cleaned)
|
||||
if err2 := json.Unmarshal([]byte(repaired), &result); err2 != nil {
|
||||
return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw response: %s", err, response)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate verdict
|
||||
@@ -74,3 +79,72 @@ func extractJSON(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
return s
|
||||
}
|
||||
|
||||
// repairJSON attempts to fix common LLM JSON issues:
|
||||
// - Unescaped double quotes inside string values
|
||||
//
|
||||
// Strategy: walk the JSON character by character, tracking whether we're inside
|
||||
// a string value. When we find a quote that doesn't look like a structural
|
||||
// delimiter (not preceded by \, not followed by : , } ] or whitespace+structural),
|
||||
// escape it.
|
||||
func repairJSON(s string) string {
|
||||
var out strings.Builder
|
||||
out.Grow(len(s) + 64)
|
||||
|
||||
inString := false
|
||||
runes := []rune(s)
|
||||
|
||||
for i := 0; i < len(runes); i++ {
|
||||
c := runes[i]
|
||||
|
||||
if !inString {
|
||||
out.WriteRune(c)
|
||||
if c == '"' {
|
||||
inString = true
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// We're inside a string
|
||||
if c == '\\' {
|
||||
// Escape sequence — pass through both characters
|
||||
out.WriteRune(c)
|
||||
if i+1 < len(runes) {
|
||||
i++
|
||||
out.WriteRune(runes[i])
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if c == '"' {
|
||||
// Is this the end of the string, or an unescaped interior quote?
|
||||
// Look ahead: skip whitespace, then check for structural character
|
||||
j := i + 1
|
||||
for j < len(runes) && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') {
|
||||
j++
|
||||
}
|
||||
if j < len(runes) {
|
||||
next := runes[j]
|
||||
if next == ',' || next == '}' || next == ']' || next == ':' {
|
||||
// Structural — this is really the end of the string
|
||||
out.WriteRune(c)
|
||||
inString = false
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
// End of input — must be closing quote
|
||||
out.WriteRune(c)
|
||||
inString = false
|
||||
continue
|
||||
}
|
||||
// Not a structural close — this is an unescaped interior quote
|
||||
out.WriteRune('\\')
|
||||
out.WriteRune('"')
|
||||
continue
|
||||
}
|
||||
|
||||
out.WriteRune(c)
|
||||
}
|
||||
|
||||
return out.String()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user