fix: repair unescaped quotes in LLM JSON responses #45
+75
-1
@@ -29,7 +29,12 @@ func ParseResponse(response string) (*ReviewResult, error) {
|
||||
|
||||
var result ReviewResult
|
||||
if err := json.Unmarshal([]byte(cleaned), &result); err != nil {
|
||||
return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw response: %s", err, response)
|
||||
// LLMs sometimes produce JSON with unescaped quotes inside string values.
|
||||
// Try to repair before giving up.
|
||||
repaired := repairJSON(cleaned)
|
||||
if err2 := json.Unmarshal([]byte(repaired), &result); err2 != nil {
|
||||
return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw response: %s", err, response)
|
||||
|
|
||||
}
|
||||
}
|
||||
|
||||
// Validate verdict
|
||||
@@ -74,3 +79,72 @@ func extractJSON(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
|
sonnet-review-bot
commented
[NIT] The comment above repairJSON says it will "pick the LAST" valid closing quote to maximize content, but findClosingQuote actually returns the FIRST candidate that yields valid continuation. Align the comment with the implementation or adjust the selection to match the described strategy. **[NIT]** The comment above repairJSON says it will "pick the LAST" valid closing quote to maximize content, but findClosingQuote actually returns the FIRST candidate that yields valid continuation. Align the comment with the implementation or adjust the selection to match the described strategy.
|
||||
return s
|
||||
}
|
||||
|
||||
// repairJSON attempts to fix common LLM JSON issues:
|
||||
// - Unescaped double quotes inside string values
|
||||
//
|
||||
// Strategy: walk the JSON character by character, tracking whether we're inside
|
||||
// a string value. When we find a quote that doesn't look like a structural
|
||||
// delimiter (not preceded by \, not followed by : , } ] or whitespace+structural),
|
||||
// escape it.
|
||||
func repairJSON(s string) string {
|
||||
var out strings.Builder
|
||||
out.Grow(len(s) + 64)
|
||||
|
||||
inString := false
|
||||
runes := []rune(s)
|
||||
|
||||
for i := 0; i < len(runes); i++ {
|
||||
c := runes[i]
|
||||
|
||||
if !inString {
|
||||
out.WriteRune(c)
|
||||
if c == '"' {
|
||||
inString = true
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// We're inside a string
|
||||
if c == '\\' {
|
||||
// Escape sequence — pass through both characters
|
||||
out.WriteRune(c)
|
||||
if i+1 < len(runes) {
|
||||
i++
|
||||
out.WriteRune(runes[i])
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if c == '"' {
|
||||
// Is this the end of the string, or an unescaped interior quote?
|
||||
// Look ahead: skip whitespace, then check for structural character
|
||||
j := i + 1
|
||||
for j < len(runes) && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') {
|
||||
j++
|
||||
}
|
||||
if j < len(runes) {
|
||||
next := runes[j]
|
||||
if next == ',' || next == '}' || next == ']' || next == ':' {
|
||||
// Structural — this is really the end of the string
|
||||
out.WriteRune(c)
|
||||
inString = false
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
// End of input — must be closing quote
|
||||
out.WriteRune(c)
|
||||
inString = false
|
||||
continue
|
||||
}
|
||||
// Not a structural close — this is an unescaped interior quote
|
||||
out.WriteRune('\\')
|
||||
out.WriteRune('"')
|
||||
continue
|
||||
}
|
||||
|
||||
out.WriteRune(c)
|
||||
}
|
||||
|
||||
return out.String()
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package review
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -112,3 +113,39 @@ func TestParseResponse_MarkdownFencesNoLang(t *testing.T) {
|
||||
t.Errorf("expected APPROVE, got %q", result.Verdict)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseResponse_UnescapedQuotesInStrings(t *testing.T) {
|
||||
// Real failure from CI: Sonnet puts unescaped quotes like (e.g. "28") in findings
|
||||
input := `{"verdict": "APPROVE", "summary": "Clean PR", "findings": [{"severity": "NIT", "file": "ci/Dockerfile", "line": 14, "finding": "The comment says OTP_VERSION is the major version (e.g. \"28\") but it actually contains unescaped quotes like (e.g. "28") which breaks JSON"}], "recommendation": "Ship it"}`
|
||||
|
||||
result, err := ParseResponse(input)
|
||||
if err != nil {
|
||||
t.Fatalf("expected repair to handle unescaped quotes, got error: %v", err)
|
||||
}
|
||||
if result.Verdict != "APPROVE" {
|
||||
t.Errorf("expected APPROVE, got %q", result.Verdict)
|
||||
}
|
||||
if len(result.Findings) != 1 {
|
||||
t.Fatalf("expected 1 finding, got %d", len(result.Findings))
|
||||
}
|
||||
}
|
||||
|
||||
func TestRepairJSON_NoOpOnValid(t *testing.T) {
|
||||
valid := `{"key": "value", "num": 42}`
|
||||
result := repairJSON(valid)
|
||||
if result != valid {
|
||||
t.Errorf("repairJSON should not modify valid JSON\n got: %s\n want: %s", result, valid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRepairJSON_FixesUnescapedQuotes(t *testing.T) {
|
||||
// Interior quote followed by non-structural character
|
||||
input := `{"msg": "use "foo" here"}`
|
||||
result := repairJSON(input)
|
||||
|
||||
// Should be parseable now
|
||||
var m map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(result), &m); err != nil {
|
||||
t.Fatalf("repaired JSON should parse, got: %v\nrepaired: %s", err, result)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user
[MINOR] The returned error includes the full raw LLM response. This can lead to information leakage and log amplification if upstream logs the error verbatim. Consider redacting or truncating the response (e.g., first N bytes) before including it in error messages.