Compare commits

..

3 Commits

Author SHA1 Message Date
Rodin 489457c184 ci: retrigger after LLM_BASE_URL secret fix
CI / test (pull_request) Successful in 13s
CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 20s
CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 1m27s
CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 1m52s
2026-05-04 23:15:08 -07:00
Rodin 25d1a670bf fix: redesign repairJSON to handle all reviewer-reported edge cases
CI / test (pull_request) Successful in 11s
CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Failing after 10s
CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Failing after 10s
CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Failing after 11s
Rewrites the JSON repair algorithm to address two correctness bugs
identified in code review:

1. Interior quoted word before comma: "say "yes", and go" was
   misidentified as structural close because "," followed the quote.

2. JSON-shaped content in strings: {"key": "val"} inside a string
   value was being parsed as actual JSON structure.

The new approach:
- Distinguishes keys from values (only values need repair)
- Uses first-valid-candidate scan with deep lookahead
- Verifies that after a candidate close, the continuation is not just
  a structural char but a complete valid JSON pattern
- Validates keyword tokens (true/false/null) fully, not just first char
- Checks container closes recursively for valid continuation

Adds comprehensive tests for all reported edge cases plus a complex
combined scenario with nested JSON-like content, quoted words before
commas, and multiple failure modes in one string.
2026-05-04 21:27:39 -07:00
Rodin 80a9a7675b fix: repair unescaped quotes in LLM JSON responses
CI / test (pull_request) Successful in 13s
CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Failing after 13s
CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Failing after 13s
CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Failing after 12s
LLMs (especially Sonnet) sometimes emit JSON with unescaped double
quotes inside string values, e.g. (e.g. "28") instead of properly
escaping them. This caused parse failures in CI.

Add a repairJSON fallback that uses a character-by-character scanner
to identify interior quotes (those not followed by structural JSON
characters) and escape them before retrying the parse.

Fixes sonnet-review failures on gargoyle PR #551.
2026-05-03 09:47:22 -07:00
6 changed files with 21 additions and 308 deletions
+3 -11
View File
@@ -19,7 +19,6 @@ jobs:
- run: go build -o review-bot ./cmd/review-bot
# Self-review: builds from source since we're pre-release
# Models configured to match SAP AI Core deployments
review:
runs-on: ubuntu-24.04
if: github.event_name == 'pull_request'
@@ -29,18 +28,12 @@ jobs:
include:
- name: sonnet
token_secret: SONNET_REVIEW_TOKEN
provider: anthropic
llm_path: /anthropic/v1
model: anthropic--claude-4.6-sonnet
model: gpt-5
- name: gpt
token_secret: GPT_REVIEW_TOKEN
provider: openai
llm_path: /openai/v1
model: gpt-5
model: gpt-4.1
- name: security
token_secret: SECURITY_REVIEW_TOKEN
provider: openai
llm_path: /openai/v1
model: gpt-5
system_prompt_file: SECURITY_REVIEW.md
steps:
@@ -56,10 +49,9 @@ jobs:
PR_NUMBER: ${{ github.event.pull_request.number }}
REVIEWER_TOKEN: ${{ secrets[matrix.token_secret] }}
REVIEWER_NAME: ${{ matrix.name }}
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}${{ matrix.llm_path }}
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_MODEL: ${{ matrix.model }}
LLM_PROVIDER: ${{ matrix.provider }}
CONVENTIONS_FILE: "CONVENTIONS.md"
PATTERNS_REPO: "rodin/go-patterns"
PATTERNS_FILES: "README.md,patterns/"
+6 -53
View File
@@ -254,42 +254,26 @@ func main() {
slog.Warn("context trimmed to fit budget", "trimmed", budgetResult.Trimmed)
}
// Step 8: Call LLM (with retry on parse failure)
// Step 8: Call LLM
slog.Info("sending request to LLM", "model", *llmModel)
messages := []llm.Message{
{Role: "system", Content: budgetResult.SystemPrompt},
{Role: "user", Content: budgetResult.UserPrompt},
}
var response string
var result *review.ReviewResult
for attempt := 1; attempt <= 2; attempt++ {
if attempt > 1 {
slog.Warn("retrying LLM request after parse failure", "attempt", attempt)
time.Sleep(time.Second)
}
response, err = llmClient.Complete(ctx, messages)
response, err := llmClient.Complete(ctx, messages)
if err != nil {
slog.Error("LLM request failed", "model", *llmModel, "error", err, "attempt", attempt)
if attempt == 2 {
slog.Error("LLM request failed", "model", *llmModel, "error", err)
os.Exit(1)
}
continue
}
slog.Info("LLM response received", "bytes", len(response), "attempt", attempt)
slog.Info("LLM response received", "bytes", len(response))
// Step 9: Parse response
result, err = review.ParseResponse(response)
result, err := review.ParseResponse(response)
if err != nil {
slog.Error("failed to parse LLM response", "error", err, "attempt", attempt)
if attempt == 2 {
slog.Error("failed to parse LLM response", "error", err)
os.Exit(1)
}
continue
}
break
}
slog.Info("review parsed", "verdict", result.Verdict, "findings", len(result.Findings))
// Step 10: Format and post review
@@ -315,24 +299,6 @@ func main() {
sentinel := fmt.Sprintf("<!-- review-bot:%s -->", *reviewerName)
// Stale check: verify HEAD hasn't moved since we started
evaluatedSHA := pr.Head.Sha
var currentSHA string
currentPR, err := giteaClient.GetPullRequest(ctx, owner, repoName, prNumber)
if err != nil {
slog.Warn("could not re-fetch PR for stale check", "pr", prNumber, "error", err)
// currentSHA stays empty — shouldSkipStaleReview will return false
} else {
currentSHA = currentPR.Head.Sha
}
if shouldSkipStaleReview(evaluatedSHA, currentSHA) {
slog.Warn("HEAD moved during review — skipping stale review",
"evaluated", evaluatedSHA,
"current", currentSHA,
"pr", prNumber)
return
}
// Map findings to inline comments for lines present in the diff
diffRanges := gitea.ParseDiffNewLines(diff)
var inlineComments []gitea.ReviewComment
@@ -684,16 +650,3 @@ func findAllOwnReviews(reviews []gitea.Review, sentinel string) []gitea.Review {
}
return result
}
// shouldSkipStaleReview reports whether to skip posting because HEAD moved.
// Returns true (skip) if evaluatedSHA differs from currentSHA.
// Returns false (don't skip) if:
// - SHAs match (no movement)
// - currentSHA is empty (re-fetch failed; prefer posting stale over failing)
func shouldSkipStaleReview(evaluatedSHA, currentSHA string) bool {
if currentSHA == "" {
// Re-fetch failed; better to post potentially stale than fail
return false
}
return evaluatedSHA != currentSHA
}
-50
View File
@@ -862,53 +862,3 @@ func TestFindAllOwnReviews(t *testing.T) {
}
}
}
func TestShouldSkipStaleReview(t *testing.T) {
tests := []struct {
name string
evaluatedSHA string
currentSHA string
wantSkip bool
}{
{
name: "matching SHAs",
evaluatedSHA: "abc123def456",
currentSHA: "abc123def456",
wantSkip: false,
},
{
name: "different SHAs",
evaluatedSHA: "abc123def456",
currentSHA: "xyz789abc123",
wantSkip: true,
},
{
name: "empty current SHA (re-fetch failed)",
evaluatedSHA: "abc123def456",
currentSHA: "",
wantSkip: false,
},
{
name: "both empty (edge case)",
evaluatedSHA: "",
currentSHA: "",
wantSkip: false,
},
{
name: "only current empty",
evaluatedSHA: "abc123",
currentSHA: "",
wantSkip: false,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got := shouldSkipStaleReview(tc.evaluatedSHA, tc.currentSHA)
if got != tc.wantSkip {
t.Errorf("shouldSkipStaleReview(%q, %q) = %v, want %v",
tc.evaluatedSHA, tc.currentSHA, got, tc.wantSkip)
}
})
}
}
+2 -48
View File
@@ -75,52 +75,12 @@ type Message struct {
// Complete sends a chat completion request and returns the assistant's response content.
// The first message with role "system" is treated as the system prompt.
func (c *Client) Complete(ctx context.Context, messages []Message) (string, error) {
var result string
var err error
for attempt := 0; attempt < 2; attempt++ {
switch c.provider {
case ProviderAnthropic:
result, err = c.completeAnthropic(ctx, messages)
return c.completeAnthropic(ctx, messages)
default:
result, err = c.completeOpenAI(ctx, messages)
return c.completeOpenAI(ctx, messages)
}
if err == nil {
return result, nil
}
// Only retry on response body read errors (transient network issues).
// Do not retry on context cancellation, status errors, or parse errors
// that indicate a structural API problem.
if !isRetryableError(err) {
return "", err
}
if attempt == 0 && ctx.Err() == nil {
// Brief pause before retry to allow transient issues to resolve.
time.Sleep(500 * time.Millisecond)
}
}
return "", err
}
// isRetryableError returns true for transient errors worth retrying.
func isRetryableError(err error) bool {
if err == nil {
return false
}
s := err.Error()
// Body read failures (connection reset, truncation)
if strings.Contains(s, "read response") {
return true
}
// Unexpected body length (our content-length validation)
if strings.Contains(s, "body length mismatch") {
return true
}
return false
}
// --- OpenAI-compatible implementation ---
@@ -271,12 +231,6 @@ func (c *Client) doRequest(req *http.Request, parse func([]byte) (string, error)
return "", fmt.Errorf("read response: %w", err)
}
// Validate body length against Content-Length header when present.
// A mismatch indicates the response was truncated in transit.
if cl := resp.ContentLength; cl > 0 && int64(len(body)) < cl {
return "", fmt.Errorf("body length mismatch: Content-Length=%d, received=%d", cl, len(body))
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return "", fmt.Errorf("LLM API error (status %d): %s", resp.StatusCode, string(body))
}
-129
View File
@@ -3,7 +3,6 @@ package llm
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"testing"
@@ -296,131 +295,3 @@ func TestWithProvider(t *testing.T) {
t.Errorf("expected provider anthropic, got %s", client.provider)
}
}
func TestComplete_RetryOnBodyReadError(t *testing.T) {
attempts := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
attempts++
if attempts == 1 {
// First attempt: send headers then close connection abruptly
// Simulate by writing partial response and flushing with wrong Content-Length
w.Header().Set("Content-Length", "1000")
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"choices":[{"message":{"con`))
// The test HTTP server will close the connection after handler returns,
// but Content-Length mismatch means client gets fewer bytes than expected
return
}
// Second attempt: succeed
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(ChatResponse{
Choices: []struct {
Message struct {
Content string `json:"content"`
} `json:"message"`
}{{Message: struct {
Content string `json:"content"`
}{Content: "success"}}},
})
}))
defer server.Close()
client := NewClient(server.URL, "key", "model")
got, err := client.Complete(context.Background(), []Message{{Role: "user", Content: "Hi"}})
if err != nil {
t.Fatalf("expected retry to succeed, got error: %v", err)
}
if got != "success" {
t.Errorf("expected %q, got %q", "success", got)
}
if attempts != 2 {
t.Errorf("expected 2 attempts, got %d", attempts)
}
}
func TestComplete_ContentLengthMismatch(t *testing.T) {
attempts := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
attempts++
if attempts == 1 {
// Claim Content-Length is larger than actual body
w.Header().Set("Content-Length", "500")
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
// Write less than 500 bytes
w.Write([]byte(`{"choices":[{"message":{"content":"partial"}}]}`))
return
}
// Second attempt succeeds
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(ChatResponse{
Choices: []struct {
Message struct {
Content string `json:"content"`
} `json:"message"`
}{{Message: struct {
Content string `json:"content"`
}{Content: "complete"}}},
})
}))
defer server.Close()
client := NewClient(server.URL, "key", "model")
got, err := client.Complete(context.Background(), []Message{{Role: "user", Content: "Hi"}})
if err != nil {
t.Fatalf("expected retry to succeed on content-length mismatch, got: %v", err)
}
if got != "complete" {
t.Errorf("expected %q, got %q", "complete", got)
}
}
func TestComplete_NoRetryOnAPIError(t *testing.T) {
attempts := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
attempts++
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte(`{"error":"bad request"}`))
}))
defer server.Close()
client := NewClient(server.URL, "key", "model")
_, err := client.Complete(context.Background(), []Message{{Role: "user", Content: "Hi"}})
if err == nil {
t.Fatal("expected error for 400, got nil")
}
if attempts != 1 {
t.Errorf("should not retry on API errors, got %d attempts", attempts)
}
}
func TestIsRetryableError(t *testing.T) {
tests := []struct {
name string
err string
expected bool
}{
{"nil formatted", "", false},
{"read response error", "read response: unexpected EOF", true},
{"body length mismatch", "body length mismatch: Content-Length=1000, received=500", true},
{"API error", "LLM API error (status 400): bad request", false},
{"parse error", "parse response: unexpected end of JSON input", false},
{"request error", "LLM request: connection refused", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.err == "" {
if isRetryableError(nil) {
t.Error("nil error should not be retryable")
}
return
}
err := fmt.Errorf("%s", tt.err)
got := isRetryableError(err)
if got != tt.expected {
t.Errorf("isRetryableError(%q) = %v, want %v", tt.err, got, tt.expected)
}
})
}
}
+1 -8
View File
@@ -33,14 +33,7 @@ func ParseResponse(response string) (*ReviewResult, error) {
// Try to repair before giving up.
repaired := repairJSON(cleaned)
if err2 := json.Unmarshal([]byte(repaired), &result); err2 != nil {
// Include diagnostic info: lengths help identify truncation
rawLen := len(response)
cleanedLen := len(cleaned)
preview := cleaned
if len(preview) > 200 {
preview = preview[:100] + "..." + preview[len(preview)-100:]
}
return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw length: %d, cleaned length: %d\nCleaned preview: %s", err, rawLen, cleanedLen, preview)
return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw response: %s", err, response)
}
}