db479d0ff4
CI / test (pull_request) Successful in 15s
CI / review (/openai/v1, gpt-4.1, gpt41, openai, GPT_REVIEW_TOKEN) (pull_request) Successful in 25s
CI / review (/openai/v1, gpt-4.1-mini, gpt41-mini, openai, GPT_REVIEW_TOKEN) (pull_request) Successful in 29s
CI / review (/anthropic/v1, claude-sonnet-4-6, sonnet, anthropic, SONNET_REVIEW_TOKEN) (pull_request) Successful in 49s
CI / review (/openai/v1, gpt-5, security, openai, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 50s
CI / review (/openai/v1, gpt-5, gpt, openai, GPT_REVIEW_TOKEN) (pull_request) Successful in 1m15s
CI / review (/openai/v1, gpt-5-mini, gpt5-mini, openai, GPT_REVIEW_TOKEN) (pull_request) Successful in 52s
Addresses intermittent 'unexpected end of JSON input' failures where the LLM response body is truncated in transit between the proxy and client. Root cause: network-level truncation where io.ReadAll returns partial data (observed in 3/50 CI runs through HAI proxy). The response body reading was already using io.ReadAll correctly, but transient network issues between the proxy and client can still cause partial reads. Changes: - Add Content-Length validation in doRequest: detect when fewer bytes arrive than the server declared, triggering a retry - Add retry logic in Complete: retries once on retryable errors (body read failures, content-length mismatches) with a 500ms backoff - Add parse-level retry in main: if ParseResponse fails, re-requests from the LLM once before giving up (defensive, since retries always succeed per issue evidence) - Improve ParseResponse error diagnostics: log raw vs cleaned lengths and a preview of the cleaned content to aid future debugging Does NOT retry on API errors (4xx/5xx) or structural issues — only transient body read problems. Closes #47
316 lines
8.7 KiB
Go
316 lines
8.7 KiB
Go
package review
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"strings"
|
|
)
|
|
|
|
// Finding represents a single code review finding.
|
|
type Finding struct {
|
|
Severity string `json:"severity"`
|
|
File string `json:"file"`
|
|
Line int `json:"line"`
|
|
Finding string `json:"finding"`
|
|
}
|
|
|
|
// ReviewResult is the structured output from the LLM.
|
|
type ReviewResult struct {
|
|
Verdict string `json:"verdict"`
|
|
Summary string `json:"summary"`
|
|
Findings []Finding `json:"findings"`
|
|
Recommendation string `json:"recommendation"`
|
|
}
|
|
|
|
// ParseResponse parses the LLM response into a ReviewResult.
|
|
func ParseResponse(response string) (*ReviewResult, error) {
|
|
// Try to extract JSON from the response — the LLM might wrap it in markdown fences
|
|
cleaned := extractJSON(response)
|
|
|
|
var result ReviewResult
|
|
if err := json.Unmarshal([]byte(cleaned), &result); err != nil {
|
|
// LLMs sometimes produce JSON with unescaped quotes inside string values.
|
|
// Try to repair before giving up.
|
|
repaired := repairJSON(cleaned)
|
|
if err2 := json.Unmarshal([]byte(repaired), &result); err2 != nil {
|
|
// Include diagnostic info: lengths help identify truncation
|
|
rawLen := len(response)
|
|
cleanedLen := len(cleaned)
|
|
preview := cleaned
|
|
if len(preview) > 200 {
|
|
preview = preview[:100] + "..." + preview[len(preview)-100:]
|
|
}
|
|
return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw length: %d, cleaned length: %d\nCleaned preview: %s", err, rawLen, cleanedLen, preview)
|
|
}
|
|
}
|
|
|
|
// Validate verdict
|
|
switch result.Verdict {
|
|
case "APPROVE", "REQUEST_CHANGES":
|
|
// valid
|
|
default:
|
|
return nil, fmt.Errorf("invalid verdict %q (must be APPROVE or REQUEST_CHANGES)", result.Verdict)
|
|
}
|
|
|
|
// Validate finding severities
|
|
for i, f := range result.Findings {
|
|
switch f.Severity {
|
|
case "MAJOR", "MINOR", "NIT":
|
|
// valid
|
|
default:
|
|
return nil, fmt.Errorf("finding %d has invalid severity %q", i, f.Severity)
|
|
}
|
|
}
|
|
|
|
return &result, nil
|
|
}
|
|
|
|
// extractJSON attempts to pull JSON from a potentially markdown-wrapped response.
|
|
func extractJSON(s string) string {
|
|
s = strings.TrimSpace(s)
|
|
|
|
// Remove markdown code fences if present
|
|
if strings.HasPrefix(s, "```") {
|
|
lines := strings.Split(s, "\n")
|
|
// Remove first line (```json or ```)
|
|
if len(lines) > 2 {
|
|
lines = lines[1:]
|
|
}
|
|
// Remove last line (```)
|
|
if len(lines) > 0 && strings.TrimSpace(lines[len(lines)-1]) == "```" {
|
|
lines = lines[:len(lines)-1]
|
|
}
|
|
s = strings.Join(lines, "\n")
|
|
}
|
|
|
|
s = strings.TrimSpace(s)
|
|
return s
|
|
}
|
|
|
|
// repairJSON attempts to fix common LLM JSON issues:
|
|
// - Unescaped double quotes inside string values
|
|
//
|
|
// Strategy: walk the JSON structurally. Object keys are parsed normally (LLMs
|
|
// get those right). For string VALUES, we find all candidate closing quotes and
|
|
// pick the LAST one that leaves valid JSON structure afterward — maximizing
|
|
// string content, which is the correct bias for the "LLM put unescaped quotes
|
|
// in a string value" failure mode.
|
|
func repairJSON(s string) string {
|
|
runes := []rune(s)
|
|
var out strings.Builder
|
|
out.Grow(len(s) + 64)
|
|
|
|
i := 0
|
|
for i < len(runes) {
|
|
c := runes[i]
|
|
|
|
if c != '"' {
|
|
out.WriteRune(c)
|
|
i++
|
|
continue
|
|
}
|
|
|
|
// We hit an opening quote. Determine if this is a key or a value.
|
|
// Keys: the standard JSON parser in LLMs gets keys right, so we parse
|
|
// them normally (first unescaped quote closes).
|
|
// Values: may contain unescaped quotes — use the repair heuristic.
|
|
isValue := isValuePosition(runes, i)
|
|
|
|
if !isValue {
|
|
// Parse key/simple string normally
|
|
out.WriteRune('"')
|
|
i++
|
|
for i < len(runes) {
|
|
ch := runes[i]
|
|
if ch == '\\' && i+1 < len(runes) {
|
|
out.WriteRune(ch)
|
|
i++
|
|
out.WriteRune(runes[i])
|
|
i++
|
|
continue
|
|
}
|
|
if ch == '"' {
|
|
out.WriteRune('"')
|
|
i++
|
|
break
|
|
}
|
|
out.WriteRune(ch)
|
|
i++
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Value string — find the correct close using last-valid-candidate heuristic
|
|
out.WriteRune('"')
|
|
i++
|
|
|
|
closeIdx := findClosingQuote(runes, i)
|
|
|
|
// Write everything between open and close, escaping interior quotes
|
|
for j := i; j < closeIdx; j++ {
|
|
ch := runes[j]
|
|
if ch == '\\' && j+1 < closeIdx {
|
|
// Already-escaped sequence — pass through
|
|
out.WriteRune(ch)
|
|
j++
|
|
out.WriteRune(runes[j])
|
|
} else if ch == '"' {
|
|
out.WriteRune('\\')
|
|
out.WriteRune('"')
|
|
} else {
|
|
out.WriteRune(ch)
|
|
}
|
|
}
|
|
|
|
// Write the closing quote
|
|
out.WriteRune('"')
|
|
i = closeIdx + 1
|
|
}
|
|
|
|
return out.String()
|
|
}
|
|
|
|
// isValuePosition determines if the quote at position i is opening a JSON value
|
|
// string (as opposed to an object key). We only apply repair to values that
|
|
// follow ':' since those are the free-text fields where LLMs produce unescaped
|
|
// quotes. Array elements and keys are left alone (parsed normally).
|
|
func isValuePosition(runes []rune, i int) bool {
|
|
// Look backward, skipping whitespace, for the preceding structural char
|
|
j := i - 1
|
|
for j >= 0 && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') {
|
|
j--
|
|
}
|
|
if j < 0 {
|
|
return false
|
|
}
|
|
// After ':' → definitely a value
|
|
return runes[j] == ':'
|
|
}
|
|
|
|
// findClosingQuote finds the index of the true closing quote for a JSON string
|
|
// value starting at position start (the character after the opening quote).
|
|
// It collects all unescaped quote candidates and returns the FIRST one that
|
|
// produces valid JSON continuation (deeper lookahead verifies the next token).
|
|
func findClosingQuote(runes []rune, start int) int {
|
|
// Collect all candidate positions for the closing quote.
|
|
var candidates []int
|
|
for j := start; j < len(runes); j++ {
|
|
if runes[j] == '\\' {
|
|
j++ // skip escaped character
|
|
continue
|
|
}
|
|
if runes[j] == '"' {
|
|
candidates = append(candidates, j)
|
|
}
|
|
}
|
|
|
|
if len(candidates) == 0 {
|
|
return len(runes)
|
|
}
|
|
|
|
if len(candidates) == 1 {
|
|
return candidates[0]
|
|
}
|
|
|
|
// Try candidates from FIRST to LAST. The correct closing quote is the
|
|
// earliest one that produces valid JSON structure after it (verified by
|
|
// deeper lookahead that checks the next token is a valid JSON start).
|
|
for _, idx := range candidates {
|
|
if isValidJSONAfterClose(runes, idx+1) {
|
|
return idx
|
|
}
|
|
}
|
|
|
|
// Fallback: return the last candidate
|
|
return candidates[len(candidates)-1]
|
|
}
|
|
|
|
// isValidJSONAfterClose checks whether the runes after a candidate closing quote
|
|
// look like valid JSON continuation for a VALUE string. Since we only use this
|
|
// for value positions, ':' is NOT a valid continuation (values are never keys).
|
|
// Checks deeper structure to avoid being fooled by JSON-like content in strings.
|
|
func isValidJSONAfterClose(runes []rune, pos int) bool {
|
|
j := pos
|
|
for j < len(runes) && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') {
|
|
j++
|
|
}
|
|
|
|
if j >= len(runes) {
|
|
return true
|
|
}
|
|
|
|
next := runes[j]
|
|
if next == '}' || next == ']' {
|
|
// Closing a container. Verify what follows the close is also valid:
|
|
// another structural char, comma, or EOF.
|
|
return isValidAfterContainerClose(runes, j+1)
|
|
}
|
|
if next == ',' {
|
|
// After comma, must be followed by a valid JSON token
|
|
j++
|
|
for j < len(runes) && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') {
|
|
j++
|
|
}
|
|
if j >= len(runes) {
|
|
return false // trailing comma with nothing after — invalid
|
|
}
|
|
return isJSONTokenStart(runes, j)
|
|
}
|
|
// ':' is NOT valid here — we're in a value position, not a key.
|
|
// Any other character is also invalid.
|
|
return false
|
|
}
|
|
|
|
// isValidAfterContainerClose checks that after a } or ], the continuation is
|
|
// structurally valid: more closes, comma+token, or EOF.
|
|
func isValidAfterContainerClose(runes []rune, pos int) bool {
|
|
j := pos
|
|
for j < len(runes) && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') {
|
|
j++
|
|
}
|
|
if j >= len(runes) {
|
|
return true
|
|
}
|
|
next := runes[j]
|
|
if next == '}' || next == ']' {
|
|
return isValidAfterContainerClose(runes, j+1)
|
|
}
|
|
if next == ',' {
|
|
j++
|
|
for j < len(runes) && (runes[j] == ' ' || runes[j] == '\t' || runes[j] == '\n' || runes[j] == '\r') {
|
|
j++
|
|
}
|
|
if j >= len(runes) {
|
|
return false
|
|
}
|
|
return isJSONTokenStart(runes, j)
|
|
}
|
|
return false
|
|
}
|
|
|
|
// isJSONTokenStart returns true if the rune could begin a JSON value or key.
|
|
// For keywords (true/false/null), verifies the full keyword is present.
|
|
func isJSONTokenStart(runes []rune, pos int) bool {
|
|
if pos >= len(runes) {
|
|
return false
|
|
}
|
|
r := runes[pos]
|
|
switch {
|
|
case r == '"': // string
|
|
return true
|
|
case r == '{' || r == '[': // object or array
|
|
return true
|
|
case r == 't': // true
|
|
return pos+4 <= len(runes) && string(runes[pos:pos+4]) == "true"
|
|
case r == 'f': // false
|
|
return pos+5 <= len(runes) && string(runes[pos:pos+5]) == "false"
|
|
case r == 'n': // null
|
|
return pos+4 <= len(runes) && string(runes[pos:pos+4]) == "null"
|
|
case r >= '0' && r <= '9': // number
|
|
return true
|
|
case r == '-': // negative number
|
|
return true
|
|
}
|
|
return false
|
|
}
|