fix: retry on transient LLM response body truncation
CI / test (pull_request) Successful in 15s
CI / review (/openai/v1, gpt-4.1, gpt41, openai, GPT_REVIEW_TOKEN) (pull_request) Successful in 25s
CI / review (/openai/v1, gpt-4.1-mini, gpt41-mini, openai, GPT_REVIEW_TOKEN) (pull_request) Successful in 29s
CI / review (/anthropic/v1, claude-sonnet-4-6, sonnet, anthropic, SONNET_REVIEW_TOKEN) (pull_request) Successful in 49s
CI / review (/openai/v1, gpt-5, security, openai, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 50s
CI / review (/openai/v1, gpt-5, gpt, openai, GPT_REVIEW_TOKEN) (pull_request) Successful in 1m15s
CI / review (/openai/v1, gpt-5-mini, gpt5-mini, openai, GPT_REVIEW_TOKEN) (pull_request) Successful in 52s

Addresses intermittent 'unexpected end of JSON input' failures where the
LLM response body is truncated in transit between the proxy and client.

Root cause: network-level truncation where io.ReadAll returns partial data
(observed in 3/50 CI runs through HAI proxy). The response body reading
was already using io.ReadAll correctly, but transient network issues
between the proxy and client can still cause partial reads.

Changes:
- Add Content-Length validation in doRequest: detect when fewer bytes
  arrive than the server declared, triggering a retry
- Add retry logic in Complete: retries once on retryable errors (body
  read failures, content-length mismatches) with a 500ms backoff
- Add parse-level retry in main: if ParseResponse fails, re-requests
  from the LLM once before giving up (defensive, since retries always
  succeed per issue evidence)
- Improve ParseResponse error diagnostics: log raw vs cleaned lengths
  and a preview of the cleaned content to aid future debugging

Does NOT retry on API errors (4xx/5xx) or structural issues — only
transient body read problems.

Closes #47
This commit is contained in:
claw
2026-05-07 00:44:32 -07:00
parent cabbb5a55a
commit db479d0ff4
4 changed files with 216 additions and 18 deletions
+28 -12
View File
@@ -254,25 +254,41 @@ func main() {
slog.Warn("context trimmed to fit budget", "trimmed", budgetResult.Trimmed)
}
// Step 8: Call LLM
// Step 8: Call LLM (with retry on parse failure)
slog.Info("sending request to LLM", "model", *llmModel)
messages := []llm.Message{
{Role: "system", Content: budgetResult.SystemPrompt},
{Role: "user", Content: budgetResult.UserPrompt},
}
response, err := llmClient.Complete(ctx, messages)
if err != nil {
slog.Error("LLM request failed", "model", *llmModel, "error", err)
os.Exit(1)
}
slog.Info("LLM response received", "bytes", len(response))
var response string
var result *review.ReviewResult
for attempt := 1; attempt <= 2; attempt++ {
if attempt > 1 {
slog.Warn("retrying LLM request after parse failure", "attempt", attempt)
time.Sleep(time.Second)
}
// Step 9: Parse response
result, err := review.ParseResponse(response)
if err != nil {
slog.Error("failed to parse LLM response", "error", err)
os.Exit(1)
response, err = llmClient.Complete(ctx, messages)
if err != nil {
slog.Error("LLM request failed", "model", *llmModel, "error", err, "attempt", attempt)
if attempt == 2 {
os.Exit(1)
}
continue
}
slog.Info("LLM response received", "bytes", len(response), "attempt", attempt)
// Step 9: Parse response
result, err = review.ParseResponse(response)
if err != nil {
slog.Error("failed to parse LLM response", "error", err, "attempt", attempt)
if attempt == 2 {
os.Exit(1)
}
continue
}
break
}
slog.Info("review parsed", "verdict", result.Verdict, "findings", len(result.Findings))
+51 -5
View File
@@ -75,12 +75,52 @@ type Message struct {
// Complete sends a chat completion request and returns the assistant's response content.
// The first message with role "system" is treated as the system prompt.
func (c *Client) Complete(ctx context.Context, messages []Message) (string, error) {
switch c.provider {
case ProviderAnthropic:
return c.completeAnthropic(ctx, messages)
default:
return c.completeOpenAI(ctx, messages)
var result string
var err error
for attempt := 0; attempt < 2; attempt++ {
switch c.provider {
case ProviderAnthropic:
result, err = c.completeAnthropic(ctx, messages)
default:
result, err = c.completeOpenAI(ctx, messages)
}
if err == nil {
return result, nil
}
// Only retry on response body read errors (transient network issues).
// Do not retry on context cancellation, status errors, or parse errors
// that indicate a structural API problem.
if !isRetryableError(err) {
return "", err
}
if attempt == 0 && ctx.Err() == nil {
// Brief pause before retry to allow transient issues to resolve.
time.Sleep(500 * time.Millisecond)
}
}
return "", err
}
// isRetryableError returns true for transient errors worth retrying.
func isRetryableError(err error) bool {
if err == nil {
return false
}
s := err.Error()
// Body read failures (connection reset, truncation)
if strings.Contains(s, "read response") {
return true
}
// Unexpected body length (our content-length validation)
if strings.Contains(s, "body length mismatch") {
return true
}
return false
}
// --- OpenAI-compatible implementation ---
@@ -231,6 +271,12 @@ func (c *Client) doRequest(req *http.Request, parse func([]byte) (string, error)
return "", fmt.Errorf("read response: %w", err)
}
// Validate body length against Content-Length header when present.
// A mismatch indicates the response was truncated in transit.
if cl := resp.ContentLength; cl > 0 && int64(len(body)) < cl {
return "", fmt.Errorf("body length mismatch: Content-Length=%d, received=%d", cl, len(body))
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return "", fmt.Errorf("LLM API error (status %d): %s", resp.StatusCode, string(body))
}
+129
View File
@@ -3,6 +3,7 @@ package llm
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"testing"
@@ -295,3 +296,131 @@ func TestWithProvider(t *testing.T) {
t.Errorf("expected provider anthropic, got %s", client.provider)
}
}
func TestComplete_RetryOnBodyReadError(t *testing.T) {
attempts := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
attempts++
if attempts == 1 {
// First attempt: send headers then close connection abruptly
// Simulate by writing partial response and flushing with wrong Content-Length
w.Header().Set("Content-Length", "1000")
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"choices":[{"message":{"con`))
// The test HTTP server will close the connection after handler returns,
// but Content-Length mismatch means client gets fewer bytes than expected
return
}
// Second attempt: succeed
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(ChatResponse{
Choices: []struct {
Message struct {
Content string `json:"content"`
} `json:"message"`
}{{Message: struct {
Content string `json:"content"`
}{Content: "success"}}},
})
}))
defer server.Close()
client := NewClient(server.URL, "key", "model")
got, err := client.Complete(context.Background(), []Message{{Role: "user", Content: "Hi"}})
if err != nil {
t.Fatalf("expected retry to succeed, got error: %v", err)
}
if got != "success" {
t.Errorf("expected %q, got %q", "success", got)
}
if attempts != 2 {
t.Errorf("expected 2 attempts, got %d", attempts)
}
}
func TestComplete_ContentLengthMismatch(t *testing.T) {
attempts := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
attempts++
if attempts == 1 {
// Claim Content-Length is larger than actual body
w.Header().Set("Content-Length", "500")
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
// Write less than 500 bytes
w.Write([]byte(`{"choices":[{"message":{"content":"partial"}}]}`))
return
}
// Second attempt succeeds
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(ChatResponse{
Choices: []struct {
Message struct {
Content string `json:"content"`
} `json:"message"`
}{{Message: struct {
Content string `json:"content"`
}{Content: "complete"}}},
})
}))
defer server.Close()
client := NewClient(server.URL, "key", "model")
got, err := client.Complete(context.Background(), []Message{{Role: "user", Content: "Hi"}})
if err != nil {
t.Fatalf("expected retry to succeed on content-length mismatch, got: %v", err)
}
if got != "complete" {
t.Errorf("expected %q, got %q", "complete", got)
}
}
func TestComplete_NoRetryOnAPIError(t *testing.T) {
attempts := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
attempts++
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte(`{"error":"bad request"}`))
}))
defer server.Close()
client := NewClient(server.URL, "key", "model")
_, err := client.Complete(context.Background(), []Message{{Role: "user", Content: "Hi"}})
if err == nil {
t.Fatal("expected error for 400, got nil")
}
if attempts != 1 {
t.Errorf("should not retry on API errors, got %d attempts", attempts)
}
}
func TestIsRetryableError(t *testing.T) {
tests := []struct {
name string
err string
expected bool
}{
{"nil formatted", "", false},
{"read response error", "read response: unexpected EOF", true},
{"body length mismatch", "body length mismatch: Content-Length=1000, received=500", true},
{"API error", "LLM API error (status 400): bad request", false},
{"parse error", "parse response: unexpected end of JSON input", false},
{"request error", "LLM request: connection refused", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.err == "" {
if isRetryableError(nil) {
t.Error("nil error should not be retryable")
}
return
}
err := fmt.Errorf("%s", tt.err)
got := isRetryableError(err)
if got != tt.expected {
t.Errorf("isRetryableError(%q) = %v, want %v", tt.err, got, tt.expected)
}
})
}
}
+8 -1
View File
@@ -33,7 +33,14 @@ func ParseResponse(response string) (*ReviewResult, error) {
// Try to repair before giving up.
repaired := repairJSON(cleaned)
if err2 := json.Unmarshal([]byte(repaired), &result); err2 != nil {
return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw response: %s", err, response)
// Include diagnostic info: lengths help identify truncation
rawLen := len(response)
cleanedLen := len(cleaned)
preview := cleaned
if len(preview) > 200 {
preview = preview[:100] + "..." + preview[len(preview)-100:]
}
return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw length: %d, cleaned length: %d\nCleaned preview: %s", err, rawLen, cleanedLen, preview)
}
}