ci: retrigger after LLM_BASE_URL secret fix

fix: redesign repairJSON to handle all reviewer-reported edge cases
Rewrites the JSON repair algorithm to address two correctness bugs identified in code review: 1. Interior quoted word before comma: "say "yes", and go" was misidentified as structural close because "," followed the quote. 2. JSON-shaped content in strings: {"key": "val"} inside a string value was being parsed as actual JSON structure. The new approach: - Distinguishes keys from values (only values need repair) - Uses first-valid-candidate scan with deep lookahead - Verifies that after a candidate close, the continuation is not just a structural char but a complete valid JSON pattern - Validates keyword tokens (true/false/null) fully, not just first char - Checks container closes recursively for valid continuation Adds comprehensive tests for all reported edge cases plus a complex combined scenario with nested JSON-like content, quoted words before commas, and multiple failure modes in one string.
2026-05-04 23:15:08 -07:00 · 2026-05-04 21:27:39 -07:00 · 2026-05-03 09:47:22 -07:00
6 changed files with 21 additions and 308 deletions
@@ -19,7 +19,6 @@ jobs:
      - run: go build -o review-bot ./cmd/review-bot

  # Self-review: builds from source since we're pre-release
-  # Models configured to match SAP AI Core deployments
  review:
    runs-on: ubuntu-24.04
    if: github.event_name == 'pull_request'
@@ -29,18 +28,12 @@ jobs:
        include:
          - name: sonnet
            token_secret: SONNET_REVIEW_TOKEN
-            provider: anthropic
-            llm_path: /anthropic/v1
-            model: anthropic--claude-4.6-sonnet
+            model: gpt-5
          - name: gpt
            token_secret: GPT_REVIEW_TOKEN
-            provider: openai
-            llm_path: /openai/v1
-            model: gpt-5
+            model: gpt-4.1
          - name: security
            token_secret: SECURITY_REVIEW_TOKEN
-            provider: openai
-            llm_path: /openai/v1
            model: gpt-5
            system_prompt_file: SECURITY_REVIEW.md
    steps:
@@ -56,10 +49,9 @@ jobs:
          PR_NUMBER: ${{ github.event.pull_request.number }}
          REVIEWER_TOKEN: ${{ secrets[matrix.token_secret] }}
          REVIEWER_NAME: ${{ matrix.name }}
-          LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}${{ matrix.llm_path }}
+          LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
          LLM_MODEL: ${{ matrix.model }}
-          LLM_PROVIDER: ${{ matrix.provider }}
          CONVENTIONS_FILE: "CONVENTIONS.md"
          PATTERNS_REPO: "rodin/go-patterns"
          PATTERNS_FILES: "README.md,patterns/"
@@ -254,42 +254,26 @@ func main() {
 		slog.Warn("context trimmed to fit budget", "trimmed", budgetResult.Trimmed)
 	}

-	// Step 8: Call LLM (with retry on parse failure)
+	// Step 8: Call LLM
 	slog.Info("sending request to LLM", "model", *llmModel)
 	messages := []llm.Message{
 		{Role: "system", Content: budgetResult.SystemPrompt},
 		{Role: "user", Content: budgetResult.UserPrompt},
 	}

-	var response string
-	var result *review.ReviewResult
-	for attempt := 1; attempt <= 2; attempt++ {
-		if attempt > 1 {
-			slog.Warn("retrying LLM request after parse failure", "attempt", attempt)
-			time.Sleep(time.Second)
-		}
-
-		response, err = llmClient.Complete(ctx, messages)
+	response, err := llmClient.Complete(ctx, messages)
 	if err != nil {
-			slog.Error("LLM request failed", "model", *llmModel, "error", err, "attempt", attempt)
-			if attempt == 2 {
+		slog.Error("LLM request failed", "model", *llmModel, "error", err)
 		os.Exit(1)
 	}
-			continue
-		}
-		slog.Info("LLM response received", "bytes", len(response), "attempt", attempt)
+	slog.Info("LLM response received", "bytes", len(response))

 	// Step 9: Parse response
-		result, err = review.ParseResponse(response)
+	result, err := review.ParseResponse(response)
 	if err != nil {
-			slog.Error("failed to parse LLM response", "error", err, "attempt", attempt)
-			if attempt == 2 {
+		slog.Error("failed to parse LLM response", "error", err)
 		os.Exit(1)
 	}
-			continue
-		}
-		break
-	}
 	slog.Info("review parsed", "verdict", result.Verdict, "findings", len(result.Findings))

 	// Step 10: Format and post review
@@ -315,24 +299,6 @@ func main() {

 	sentinel := fmt.Sprintf("<!-- review-bot:%s -->", *reviewerName)

-	// Stale check: verify HEAD hasn't moved since we started
-	evaluatedSHA := pr.Head.Sha
-	var currentSHA string
-	currentPR, err := giteaClient.GetPullRequest(ctx, owner, repoName, prNumber)
-	if err != nil {
-		slog.Warn("could not re-fetch PR for stale check", "pr", prNumber, "error", err)
-		// currentSHA stays empty — shouldSkipStaleReview will return false
-	} else {
-		currentSHA = currentPR.Head.Sha
-	}
-	if shouldSkipStaleReview(evaluatedSHA, currentSHA) {
-		slog.Warn("HEAD moved during review — skipping stale review",
-			"evaluated", evaluatedSHA,
-			"current", currentSHA,
-			"pr", prNumber)
-		return
-	}
-
 	// Map findings to inline comments for lines present in the diff
 	diffRanges := gitea.ParseDiffNewLines(diff)
 	var inlineComments []gitea.ReviewComment
@@ -684,16 +650,3 @@ func findAllOwnReviews(reviews []gitea.Review, sentinel string) []gitea.Review {
 	}
 	return result
 }
-
-// shouldSkipStaleReview reports whether to skip posting because HEAD moved.
-// Returns true (skip) if evaluatedSHA differs from currentSHA.
-// Returns false (don't skip) if:
-//   - SHAs match (no movement)
-//   - currentSHA is empty (re-fetch failed; prefer posting stale over failing)
-func shouldSkipStaleReview(evaluatedSHA, currentSHA string) bool {
-	if currentSHA == "" {
-		// Re-fetch failed; better to post potentially stale than fail
-		return false
-	}
-	return evaluatedSHA != currentSHA
-}
@@ -862,53 +862,3 @@ func TestFindAllOwnReviews(t *testing.T) {
 		}
 	}
 }
-
-func TestShouldSkipStaleReview(t *testing.T) {
-	tests := []struct {
-		name         string
-		evaluatedSHA string
-		currentSHA   string
-		wantSkip     bool
-	}{
-		{
-			name:         "matching SHAs",
-			evaluatedSHA: "abc123def456",
-			currentSHA:   "abc123def456",
-			wantSkip:     false,
-		},
-		{
-			name:         "different SHAs",
-			evaluatedSHA: "abc123def456",
-			currentSHA:   "xyz789abc123",
-			wantSkip:     true,
-		},
-		{
-			name:         "empty current SHA (re-fetch failed)",
-			evaluatedSHA: "abc123def456",
-			currentSHA:   "",
-			wantSkip:     false,
-		},
-		{
-			name:         "both empty (edge case)",
-			evaluatedSHA: "",
-			currentSHA:   "",
-			wantSkip:     false,
-		},
-		{
-			name:         "only current empty",
-			evaluatedSHA: "abc123",
-			currentSHA:   "",
-			wantSkip:     false,
-		},
-	}
-
-	for _, tc := range tests {
-		t.Run(tc.name, func(t *testing.T) {
-			got := shouldSkipStaleReview(tc.evaluatedSHA, tc.currentSHA)
-			if got != tc.wantSkip {
-				t.Errorf("shouldSkipStaleReview(%q, %q) = %v, want %v",
-					tc.evaluatedSHA, tc.currentSHA, got, tc.wantSkip)
-			}
-		})
-	}
-}
@@ -75,52 +75,12 @@ type Message struct {
 // Complete sends a chat completion request and returns the assistant's response content.
 // The first message with role "system" is treated as the system prompt.
 func (c *Client) Complete(ctx context.Context, messages []Message) (string, error) {
-	var result string
-	var err error
-
-	for attempt := 0; attempt < 2; attempt++ {
 	switch c.provider {
 	case ProviderAnthropic:
-			result, err = c.completeAnthropic(ctx, messages)
+		return c.completeAnthropic(ctx, messages)
 	default:
-			result, err = c.completeOpenAI(ctx, messages)
+		return c.completeOpenAI(ctx, messages)
 	}
-
-		if err == nil {
-			return result, nil
-		}
-
-		// Only retry on response body read errors (transient network issues).
-		// Do not retry on context cancellation, status errors, or parse errors
-		// that indicate a structural API problem.
-		if !isRetryableError(err) {
-			return "", err
-		}
-
-		if attempt == 0 && ctx.Err() == nil {
-			// Brief pause before retry to allow transient issues to resolve.
-			time.Sleep(500 * time.Millisecond)
-		}
-	}
-
-	return "", err
-}
-
-// isRetryableError returns true for transient errors worth retrying.
-func isRetryableError(err error) bool {
-	if err == nil {
-		return false
-	}
-	s := err.Error()
-	// Body read failures (connection reset, truncation)
-	if strings.Contains(s, "read response") {
-		return true
-	}
-	// Unexpected body length (our content-length validation)
-	if strings.Contains(s, "body length mismatch") {
-		return true
-	}
-	return false
 }

 // --- OpenAI-compatible implementation ---
@@ -271,12 +231,6 @@ func (c *Client) doRequest(req *http.Request, parse func([]byte) (string, error)
 		return "", fmt.Errorf("read response: %w", err)
 	}

-	// Validate body length against Content-Length header when present.
-	// A mismatch indicates the response was truncated in transit.
-	if cl := resp.ContentLength; cl > 0 && int64(len(body)) < cl {
-		return "", fmt.Errorf("body length mismatch: Content-Length=%d, received=%d", cl, len(body))
-	}
-
 	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
 		return "", fmt.Errorf("LLM API error (status %d): %s", resp.StatusCode, string(body))
 	}
@@ -3,7 +3,6 @@ package llm
 import (
 	"context"
 	"encoding/json"
-	"fmt"
 	"net/http"
 	"net/http/httptest"
 	"testing"
@@ -296,131 +295,3 @@ func TestWithProvider(t *testing.T) {
 		t.Errorf("expected provider anthropic, got %s", client.provider)
 	}
 }
-
-func TestComplete_RetryOnBodyReadError(t *testing.T) {
-	attempts := 0
-	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		attempts++
-		if attempts == 1 {
-			// First attempt: send headers then close connection abruptly
-			// Simulate by writing partial response and flushing with wrong Content-Length
-			w.Header().Set("Content-Length", "1000")
-			w.WriteHeader(http.StatusOK)
-			w.Write([]byte(`{"choices":[{"message":{"con`))
-			// The test HTTP server will close the connection after handler returns,
-			// but Content-Length mismatch means client gets fewer bytes than expected
-			return
-		}
-		// Second attempt: succeed
-		w.Header().Set("Content-Type", "application/json")
-		json.NewEncoder(w).Encode(ChatResponse{
-			Choices: []struct {
-				Message struct {
-					Content string `json:"content"`
-				} `json:"message"`
-			}{{Message: struct {
-				Content string `json:"content"`
-			}{Content: "success"}}},
-		})
-	}))
-	defer server.Close()
-
-	client := NewClient(server.URL, "key", "model")
-	got, err := client.Complete(context.Background(), []Message{{Role: "user", Content: "Hi"}})
-	if err != nil {
-		t.Fatalf("expected retry to succeed, got error: %v", err)
-	}
-	if got != "success" {
-		t.Errorf("expected %q, got %q", "success", got)
-	}
-	if attempts != 2 {
-		t.Errorf("expected 2 attempts, got %d", attempts)
-	}
-}
-
-func TestComplete_ContentLengthMismatch(t *testing.T) {
-	attempts := 0
-	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		attempts++
-		if attempts == 1 {
-			// Claim Content-Length is larger than actual body
-			w.Header().Set("Content-Length", "500")
-			w.Header().Set("Content-Type", "application/json")
-			w.WriteHeader(http.StatusOK)
-			// Write less than 500 bytes
-			w.Write([]byte(`{"choices":[{"message":{"content":"partial"}}]}`))
-			return
-		}
-		// Second attempt succeeds
-		w.Header().Set("Content-Type", "application/json")
-		json.NewEncoder(w).Encode(ChatResponse{
-			Choices: []struct {
-				Message struct {
-					Content string `json:"content"`
-				} `json:"message"`
-			}{{Message: struct {
-				Content string `json:"content"`
-			}{Content: "complete"}}},
-		})
-	}))
-	defer server.Close()
-
-	client := NewClient(server.URL, "key", "model")
-	got, err := client.Complete(context.Background(), []Message{{Role: "user", Content: "Hi"}})
-	if err != nil {
-		t.Fatalf("expected retry to succeed on content-length mismatch, got: %v", err)
-	}
-	if got != "complete" {
-		t.Errorf("expected %q, got %q", "complete", got)
-	}
-}
-
-func TestComplete_NoRetryOnAPIError(t *testing.T) {
-	attempts := 0
-	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		attempts++
-		w.WriteHeader(http.StatusBadRequest)
-		w.Write([]byte(`{"error":"bad request"}`))
-	}))
-	defer server.Close()
-
-	client := NewClient(server.URL, "key", "model")
-	_, err := client.Complete(context.Background(), []Message{{Role: "user", Content: "Hi"}})
-	if err == nil {
-		t.Fatal("expected error for 400, got nil")
-	}
-	if attempts != 1 {
-		t.Errorf("should not retry on API errors, got %d attempts", attempts)
-	}
-}
-
-func TestIsRetryableError(t *testing.T) {
-	tests := []struct {
-		name     string
-		err      string
-		expected bool
-	}{
-		{"nil formatted", "", false},
-		{"read response error", "read response: unexpected EOF", true},
-		{"body length mismatch", "body length mismatch: Content-Length=1000, received=500", true},
-		{"API error", "LLM API error (status 400): bad request", false},
-		{"parse error", "parse response: unexpected end of JSON input", false},
-		{"request error", "LLM request: connection refused", false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if tt.err == "" {
-				if isRetryableError(nil) {
-					t.Error("nil error should not be retryable")
-				}
-				return
-			}
-			err := fmt.Errorf("%s", tt.err)
-			got := isRetryableError(err)
-			if got != tt.expected {
-				t.Errorf("isRetryableError(%q) = %v, want %v", tt.err, got, tt.expected)
-			}
-		})
-	}
-}
@@ -33,14 +33,7 @@ func ParseResponse(response string) (*ReviewResult, error) {
 		// Try to repair before giving up.
 		repaired := repairJSON(cleaned)
 		if err2 := json.Unmarshal([]byte(repaired), &result); err2 != nil {
-			// Include diagnostic info: lengths help identify truncation
-			rawLen := len(response)
-			cleanedLen := len(cleaned)
-			preview := cleaned
-			if len(preview) > 200 {
-				preview = preview[:100] + "..." + preview[len(preview)-100:]
-			}
-			return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw length: %d, cleaned length: %d\nCleaned preview: %s", err, rawLen, cleanedLen, preview)
+			return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw response: %s", err, response)
 		}
 	}
Author	SHA1	Message	Date
Rodin	489457c184	ci: retrigger after LLM_BASE_URL secret fix CI / test (pull_request) Successful in 13s Details CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 20s Details CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 1m27s Details CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 1m52s Details	2026-05-04 23:15:08 -07:00
Rodin	25d1a670bf	fix: redesign repairJSON to handle all reviewer-reported edge cases CI / test (pull_request) Successful in 11s Details CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Failing after 10s Details CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Failing after 10s Details CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Failing after 11s Details Rewrites the JSON repair algorithm to address two correctness bugs identified in code review: 1. Interior quoted word before comma: "say "yes", and go" was misidentified as structural close because "," followed the quote. 2. JSON-shaped content in strings: {"key": "val"} inside a string value was being parsed as actual JSON structure. The new approach: - Distinguishes keys from values (only values need repair) - Uses first-valid-candidate scan with deep lookahead - Verifies that after a candidate close, the continuation is not just a structural char but a complete valid JSON pattern - Validates keyword tokens (true/false/null) fully, not just first char - Checks container closes recursively for valid continuation Adds comprehensive tests for all reported edge cases plus a complex combined scenario with nested JSON-like content, quoted words before commas, and multiple failure modes in one string.	2026-05-04 21:27:39 -07:00
Rodin	80a9a7675b	fix: repair unescaped quotes in LLM JSON responses CI / test (pull_request) Successful in 13s Details CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Failing after 13s Details CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Failing after 13s Details CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Failing after 12s Details LLMs (especially Sonnet) sometimes emit JSON with unescaped double quotes inside string values, e.g. (e.g. "28") instead of properly escaping them. This caused parse failures in CI. Add a repairJSON fallback that uses a character-by-character scanner to identify interior quotes (those not followed by structural JSON characters) and escape them before retrying the parse. Fixes sonnet-review failures on gargoyle PR #551.	2026-05-03 09:47:22 -07:00