diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go index 777b6b4..021ccc3 100644 --- a/cmd/review-bot/main.go +++ b/cmd/review-bot/main.go @@ -254,25 +254,41 @@ func main() { slog.Warn("context trimmed to fit budget", "trimmed", budgetResult.Trimmed) } - // Step 8: Call LLM + // Step 8: Call LLM (with retry on parse failure) slog.Info("sending request to LLM", "model", *llmModel) messages := []llm.Message{ {Role: "system", Content: budgetResult.SystemPrompt}, {Role: "user", Content: budgetResult.UserPrompt}, } - response, err := llmClient.Complete(ctx, messages) - if err != nil { - slog.Error("LLM request failed", "model", *llmModel, "error", err) - os.Exit(1) - } - slog.Info("LLM response received", "bytes", len(response)) + var response string + var result *review.ReviewResult + for attempt := 1; attempt <= 2; attempt++ { + if attempt > 1 { + slog.Warn("retrying LLM request after parse failure", "attempt", attempt) + time.Sleep(time.Second) + } - // Step 9: Parse response - result, err := review.ParseResponse(response) - if err != nil { - slog.Error("failed to parse LLM response", "error", err) - os.Exit(1) + response, err = llmClient.Complete(ctx, messages) + if err != nil { + slog.Error("LLM request failed", "model", *llmModel, "error", err, "attempt", attempt) + if attempt == 2 { + os.Exit(1) + } + continue + } + slog.Info("LLM response received", "bytes", len(response), "attempt", attempt) + + // Step 9: Parse response + result, err = review.ParseResponse(response) + if err != nil { + slog.Error("failed to parse LLM response", "error", err, "attempt", attempt) + if attempt == 2 { + os.Exit(1) + } + continue + } + break } slog.Info("review parsed", "verdict", result.Verdict, "findings", len(result.Findings)) diff --git a/llm/client.go b/llm/client.go index 7bbd8b3..4fc0485 100644 --- a/llm/client.go +++ b/llm/client.go @@ -75,12 +75,52 @@ type Message struct { // Complete sends a chat completion request and returns the assistant's response content. // The first message with role "system" is treated as the system prompt. func (c *Client) Complete(ctx context.Context, messages []Message) (string, error) { - switch c.provider { - case ProviderAnthropic: - return c.completeAnthropic(ctx, messages) - default: - return c.completeOpenAI(ctx, messages) + var result string + var err error + + for attempt := 0; attempt < 2; attempt++ { + switch c.provider { + case ProviderAnthropic: + result, err = c.completeAnthropic(ctx, messages) + default: + result, err = c.completeOpenAI(ctx, messages) + } + + if err == nil { + return result, nil + } + + // Only retry on response body read errors (transient network issues). + // Do not retry on context cancellation, status errors, or parse errors + // that indicate a structural API problem. + if !isRetryableError(err) { + return "", err + } + + if attempt == 0 && ctx.Err() == nil { + // Brief pause before retry to allow transient issues to resolve. + time.Sleep(500 * time.Millisecond) + } } + + return "", err +} + +// isRetryableError returns true for transient errors worth retrying. +func isRetryableError(err error) bool { + if err == nil { + return false + } + s := err.Error() + // Body read failures (connection reset, truncation) + if strings.Contains(s, "read response") { + return true + } + // Unexpected body length (our content-length validation) + if strings.Contains(s, "body length mismatch") { + return true + } + return false } // --- OpenAI-compatible implementation --- @@ -231,6 +271,12 @@ func (c *Client) doRequest(req *http.Request, parse func([]byte) (string, error) return "", fmt.Errorf("read response: %w", err) } + // Validate body length against Content-Length header when present. + // A mismatch indicates the response was truncated in transit. + if cl := resp.ContentLength; cl > 0 && int64(len(body)) < cl { + return "", fmt.Errorf("body length mismatch: Content-Length=%d, received=%d", cl, len(body)) + } + if resp.StatusCode < 200 || resp.StatusCode >= 300 { return "", fmt.Errorf("LLM API error (status %d): %s", resp.StatusCode, string(body)) } diff --git a/llm/client_test.go b/llm/client_test.go index a6881ab..c7713af 100644 --- a/llm/client_test.go +++ b/llm/client_test.go @@ -3,6 +3,7 @@ package llm import ( "context" "encoding/json" + "fmt" "net/http" "net/http/httptest" "testing" @@ -295,3 +296,131 @@ func TestWithProvider(t *testing.T) { t.Errorf("expected provider anthropic, got %s", client.provider) } } + +func TestComplete_RetryOnBodyReadError(t *testing.T) { + attempts := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts++ + if attempts == 1 { + // First attempt: send headers then close connection abruptly + // Simulate by writing partial response and flushing with wrong Content-Length + w.Header().Set("Content-Length", "1000") + w.WriteHeader(http.StatusOK) + w.Write([]byte(`{"choices":[{"message":{"con`)) + // The test HTTP server will close the connection after handler returns, + // but Content-Length mismatch means client gets fewer bytes than expected + return + } + // Second attempt: succeed + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(ChatResponse{ + Choices: []struct { + Message struct { + Content string `json:"content"` + } `json:"message"` + }{{Message: struct { + Content string `json:"content"` + }{Content: "success"}}}, + }) + })) + defer server.Close() + + client := NewClient(server.URL, "key", "model") + got, err := client.Complete(context.Background(), []Message{{Role: "user", Content: "Hi"}}) + if err != nil { + t.Fatalf("expected retry to succeed, got error: %v", err) + } + if got != "success" { + t.Errorf("expected %q, got %q", "success", got) + } + if attempts != 2 { + t.Errorf("expected 2 attempts, got %d", attempts) + } +} + +func TestComplete_ContentLengthMismatch(t *testing.T) { + attempts := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts++ + if attempts == 1 { + // Claim Content-Length is larger than actual body + w.Header().Set("Content-Length", "500") + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + // Write less than 500 bytes + w.Write([]byte(`{"choices":[{"message":{"content":"partial"}}]}`)) + return + } + // Second attempt succeeds + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(ChatResponse{ + Choices: []struct { + Message struct { + Content string `json:"content"` + } `json:"message"` + }{{Message: struct { + Content string `json:"content"` + }{Content: "complete"}}}, + }) + })) + defer server.Close() + + client := NewClient(server.URL, "key", "model") + got, err := client.Complete(context.Background(), []Message{{Role: "user", Content: "Hi"}}) + if err != nil { + t.Fatalf("expected retry to succeed on content-length mismatch, got: %v", err) + } + if got != "complete" { + t.Errorf("expected %q, got %q", "complete", got) + } +} + +func TestComplete_NoRetryOnAPIError(t *testing.T) { + attempts := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts++ + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte(`{"error":"bad request"}`)) + })) + defer server.Close() + + client := NewClient(server.URL, "key", "model") + _, err := client.Complete(context.Background(), []Message{{Role: "user", Content: "Hi"}}) + if err == nil { + t.Fatal("expected error for 400, got nil") + } + if attempts != 1 { + t.Errorf("should not retry on API errors, got %d attempts", attempts) + } +} + +func TestIsRetryableError(t *testing.T) { + tests := []struct { + name string + err string + expected bool + }{ + {"nil formatted", "", false}, + {"read response error", "read response: unexpected EOF", true}, + {"body length mismatch", "body length mismatch: Content-Length=1000, received=500", true}, + {"API error", "LLM API error (status 400): bad request", false}, + {"parse error", "parse response: unexpected end of JSON input", false}, + {"request error", "LLM request: connection refused", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.err == "" { + if isRetryableError(nil) { + t.Error("nil error should not be retryable") + } + return + } + err := fmt.Errorf("%s", tt.err) + got := isRetryableError(err) + if got != tt.expected { + t.Errorf("isRetryableError(%q) = %v, want %v", tt.err, got, tt.expected) + } + }) + } +} diff --git a/review/parser.go b/review/parser.go index 57c0b24..291ff65 100644 --- a/review/parser.go +++ b/review/parser.go @@ -33,7 +33,14 @@ func ParseResponse(response string) (*ReviewResult, error) { // Try to repair before giving up. repaired := repairJSON(cleaned) if err2 := json.Unmarshal([]byte(repaired), &result); err2 != nil { - return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw response: %s", err, response) + // Include diagnostic info: lengths help identify truncation + rawLen := len(response) + cleanedLen := len(cleaned) + preview := cleaned + if len(preview) > 200 { + preview = preview[:100] + "..." + preview[len(preview)-100:] + } + return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw length: %d, cleaned length: %d\nCleaned preview: %s", err, rawLen, cleanedLen, preview) } }