Files
review-bot/review/parser.go
T
Rodin 80a9a7675b
CI / test (pull_request) Successful in 13s
CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Failing after 13s
CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Failing after 13s
CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Failing after 12s
fix: repair unescaped quotes in LLM JSON responses
LLMs (especially Sonnet) sometimes emit JSON with unescaped double
quotes inside string values, e.g. (e.g. "28") instead of properly
escaping them. This caused parse failures in CI.

Add a repairJSON fallback that uses a character-by-character scanner
to identify interior quotes (those not followed by structural JSON
characters) and escape them before retrying the parse.

Fixes sonnet-review failures on gargoyle PR #551.
2026-05-03 09:47:22 -07:00

151 lines
3.8 KiB
Go

package review
import (
"encoding/json"
"fmt"
"strings"
)
// Finding represents a single code review finding.
type Finding struct {
Severity string `json:"severity"`
File string `json:"file"`
Line int `json:"line"`
Finding string `json:"finding"`
}
// ReviewResult is the structured output from the LLM.
type ReviewResult struct {
Verdict string `json:"verdict"`
Summary string `json:"summary"`
Findings []Finding `json:"findings"`
Recommendation string `json:"recommendation"`
}
// ParseResponse parses the LLM response into a ReviewResult.
func ParseResponse(response string) (*ReviewResult, error) {
// Try to extract JSON from the response — the LLM might wrap it in markdown fences
cleaned := extractJSON(response)
var result ReviewResult
if err := json.Unmarshal([]byte(cleaned), &result); err != nil {
// LLMs sometimes produce JSON with unescaped quotes inside string values.
// Try to repair before giving up.
repaired := repairJSON(cleaned)
if err2 := json.Unmarshal([]byte(repaired), &result); err2 != nil {
return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw response: %s", err, response)
}
}
// Validate verdict
switch result.Verdict {
case "APPROVE", "REQUEST_CHANGES":
// valid
default:
return nil, fmt.Errorf("invalid verdict %q (must be APPROVE or REQUEST_CHANGES)", result.Verdict)
}
// Validate finding severities
for i, f := range result.Findings {
switch f.Severity {
case "MAJOR", "MINOR", "NIT":
// valid
default:
return nil, fmt.Errorf("finding %d has invalid severity %q", i, f.Severity)
}
}
return &result, nil
}
// extractJSON attempts to pull JSON from a potentially markdown-wrapped response.
func extractJSON(s string) string {
s = strings.TrimSpace(s)
// Remove markdown code fences if present
if strings.HasPrefix(s, "```") {
lines := strings.Split(s, "\n")
// Remove first line (```json or ```)
if len(lines) > 2 {
lines = lines[1:]
}
// Remove last line (```)
if len(lines) > 0 && strings.TrimSpace(lines[len(lines)-1]) == "```" {
lines = lines[:len(lines)-1]
}
s = strings.Join(lines, "\n")
}
s = strings.TrimSpace(s)
return s
}
// repairJSON attempts to fix common LLM JSON issues:
// - Unescaped double quotes inside string values
//
// Strategy: walk the JSON character by character, tracking whether we're inside
// a string value. When we find a quote that doesn't look like a structural
// delimiter (not preceded by \, not followed by : , } ] or whitespace+structural),
// escape it.
func repairJSON(s string) string {
var out strings.Builder
out.Grow(len(s) + 64)
inString := false
runes := []rune(s)
for i := 0; i < len(runes); i++ {
c := runes[i]
if !inString {
out.WriteRune(c)
if c == '"' {
inString = true
}
continue
}
// We're inside a string
if c == '\\' {
// Escape sequence — pass through both characters
out.WriteRune(c)
if i+1 < len(runes) {
i++
out.WriteRune(runes[i])
}
continue
}
if c == '"' {
// Is this the end of the string, or an unescaped interior quote?
// Look ahead: skip whitespace, then check for structural character
j := i + 1
for j < len(runes) && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') {
j++
}
if j < len(runes) {
next := runes[j]
if next == ',' || next == '}' || next == ']' || next == ':' {
// Structural — this is really the end of the string
out.WriteRune(c)
inString = false
continue
}
} else {
// End of input — must be closing quote
out.WriteRune(c)
inString = false
continue
}
// Not a structural close — this is an unescaped interior quote
out.WriteRune('\\')
out.WriteRune('"')
continue
}
out.WriteRune(c)
}
return out.String()
}