package review import ( "encoding/json" "fmt" "strings" ) // Finding represents a single code review finding. type Finding struct { Severity string `json:"severity"` File string `json:"file"` Line int `json:"line"` Finding string `json:"finding"` } // ReviewResult is the structured output from the LLM. type ReviewResult struct { Verdict string `json:"verdict"` Summary string `json:"summary"` Findings []Finding `json:"findings"` Recommendation string `json:"recommendation"` } // ParseResponse parses the LLM response into a ReviewResult. func ParseResponse(response string) (*ReviewResult, error) { // Try to extract JSON from the response — the LLM might wrap it in markdown fences cleaned := extractJSON(response) var result ReviewResult if err := json.Unmarshal([]byte(cleaned), &result); err != nil { // LLMs sometimes produce JSON with unescaped quotes inside string values. // Try to repair before giving up. repaired := repairJSON(cleaned) if err2 := json.Unmarshal([]byte(repaired), &result); err2 != nil { // Include diagnostic info: lengths help identify truncation rawLen := len(response) cleanedLen := len(cleaned) preview := cleaned if len(preview) > 200 { preview = preview[:100] + "..." + preview[len(preview)-100:] } return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw length: %d, cleaned length: %d\nCleaned preview: %s", err, rawLen, cleanedLen, preview) } } // Validate verdict switch result.Verdict { case "APPROVE", "REQUEST_CHANGES": // valid default: return nil, fmt.Errorf("invalid verdict %q (must be APPROVE or REQUEST_CHANGES)", result.Verdict) } // Validate finding severities for i, f := range result.Findings { switch f.Severity { case "MAJOR", "MINOR", "NIT": // valid default: return nil, fmt.Errorf("finding %d has invalid severity %q", i, f.Severity) } } return &result, nil } // extractJSON attempts to pull JSON from a potentially markdown-wrapped response. func extractJSON(s string) string { s = strings.TrimSpace(s) // Remove markdown code fences if present if strings.HasPrefix(s, "```") { lines := strings.Split(s, "\n") // Remove first line (```json or ```) if len(lines) > 2 { lines = lines[1:] } // Remove last line (```) if len(lines) > 0 && strings.TrimSpace(lines[len(lines)-1]) == "```" { lines = lines[:len(lines)-1] } s = strings.Join(lines, "\n") } s = strings.TrimSpace(s) return s } // repairJSON attempts to fix common LLM JSON issues: // - Unescaped double quotes inside string values // // Strategy: walk the JSON structurally. Object keys are parsed normally (LLMs // get those right). For string VALUES, we find all candidate closing quotes and // pick the LAST one that leaves valid JSON structure afterward — maximizing // string content, which is the correct bias for the "LLM put unescaped quotes // in a string value" failure mode. func repairJSON(s string) string { runes := []rune(s) var out strings.Builder out.Grow(len(s) + 64) i := 0 for i < len(runes) { c := runes[i] if c != '"' { out.WriteRune(c) i++ continue } // We hit an opening quote. Determine if this is a key or a value. // Keys: the standard JSON parser in LLMs gets keys right, so we parse // them normally (first unescaped quote closes). // Values: may contain unescaped quotes — use the repair heuristic. isValue := isValuePosition(runes, i) if !isValue { // Parse key/simple string normally out.WriteRune('"') i++ for i < len(runes) { ch := runes[i] if ch == '\\' && i+1 < len(runes) { out.WriteRune(ch) i++ out.WriteRune(runes[i]) i++ continue } if ch == '"' { out.WriteRune('"') i++ break } out.WriteRune(ch) i++ } continue } // Value string — find the correct close using last-valid-candidate heuristic out.WriteRune('"') i++ closeIdx := findClosingQuote(runes, i) // Write everything between open and close, escaping interior quotes for j := i; j < closeIdx; j++ { ch := runes[j] if ch == '\\' && j+1 < closeIdx { // Already-escaped sequence — pass through out.WriteRune(ch) j++ out.WriteRune(runes[j]) } else if ch == '"' { out.WriteRune('\\') out.WriteRune('"') } else { out.WriteRune(ch) } } // Write the closing quote out.WriteRune('"') i = closeIdx + 1 } return out.String() } // isValuePosition determines if the quote at position i is opening a JSON value // string (as opposed to an object key). We only apply repair to values that // follow ':' since those are the free-text fields where LLMs produce unescaped // quotes. Array elements and keys are left alone (parsed normally). func isValuePosition(runes []rune, i int) bool { // Look backward, skipping whitespace, for the preceding structural char j := i - 1 for j >= 0 && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') { j-- } if j < 0 { return false } // After ':' → definitely a value return runes[j] == ':' } // findClosingQuote finds the index of the true closing quote for a JSON string // value starting at position start (the character after the opening quote). // It collects all unescaped quote candidates and returns the FIRST one that // produces valid JSON continuation (deeper lookahead verifies the next token). func findClosingQuote(runes []rune, start int) int { // Collect all candidate positions for the closing quote. var candidates []int for j := start; j < len(runes); j++ { if runes[j] == '\\' { j++ // skip escaped character continue } if runes[j] == '"' { candidates = append(candidates, j) } } if len(candidates) == 0 { return len(runes) } if len(candidates) == 1 { return candidates[0] } // Try candidates from FIRST to LAST. The correct closing quote is the // earliest one that produces valid JSON structure after it (verified by // deeper lookahead that checks the next token is a valid JSON start). for _, idx := range candidates { if isValidJSONAfterClose(runes, idx+1) { return idx } } // Fallback: return the last candidate return candidates[len(candidates)-1] } // isValidJSONAfterClose checks whether the runes after a candidate closing quote // look like valid JSON continuation for a VALUE string. Since we only use this // for value positions, ':' is NOT a valid continuation (values are never keys). // Checks deeper structure to avoid being fooled by JSON-like content in strings. func isValidJSONAfterClose(runes []rune, pos int) bool { j := pos for j < len(runes) && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') { j++ } if j >= len(runes) { return true } next := runes[j] if next == '}' || next == ']' { // Closing a container. Verify what follows the close is also valid: // another structural char, comma, or EOF. return isValidAfterContainerClose(runes, j+1) } if next == ',' { // After comma, must be followed by a valid JSON token j++ for j < len(runes) && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') { j++ } if j >= len(runes) { return false // trailing comma with nothing after — invalid } return isJSONTokenStart(runes, j) } // ':' is NOT valid here — we're in a value position, not a key. // Any other character is also invalid. return false } // isValidAfterContainerClose checks that after a } or ], the continuation is // structurally valid: more closes, comma+token, or EOF. func isValidAfterContainerClose(runes []rune, pos int) bool { j := pos for j < len(runes) && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') { j++ } if j >= len(runes) { return true } next := runes[j] if next == '}' || next == ']' { return isValidAfterContainerClose(runes, j+1) } if next == ',' { j++ for j < len(runes) && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') { j++ } if j >= len(runes) { return false } return isJSONTokenStart(runes, j) } return false } // isJSONTokenStart returns true if the rune could begin a JSON value or key. // For keywords (true/false/null), verifies the full keyword is present. func isJSONTokenStart(runes []rune, pos int) bool { if pos >= len(runes) { return false } r := runes[pos] switch { case r == '"': // string return true case r == '{' || r == '[': // object or array return true case r == 't': // true return pos+4 <= len(runes) && string(runes[pos:pos+4]) == "true" case r == 'f': // false return pos+5 <= len(runes) && string(runes[pos:pos+5]) == "false" case r == 'n': // null return pos+4 <= len(runes) && string(runes[pos:pos+4]) == "null" case r >= '0' && r <= '9': // number return true case r == '-': // negative number return true } return false }