From 7279cdd2160b2e971f17ef8faded5dcce5c0b090 Mon Sep 17 00:00:00 2001 From: Rodin Date: Mon, 11 May 2026 00:59:47 -0700 Subject: [PATCH 1/7] feat(gitea): add retry logic for 5xx errors in doGet Addresses transient HTTP 500 errors from Gitea API during pattern fetches. Changes: - Add retry with exponential backoff (1s, 2s) to doGet(), max 3 attempts - Add IsServerError() helper to detect 5xx responses - No retry on 4xx errors (client errors should propagate immediately) - Respects context cancellation during backoff waits - Logs retries at WARN level for observability All existing tests pass. New tests: - TestIsServerError: validates 5xx detection across edge cases - TestDoGet_RetriesOn500: verifies recovery after transient errors - TestDoGet_FailsAfterMaxRetries: verifies proper failure after exhaustion - TestDoGet_NoRetryOn4xx: ensures client errors don't retry - TestDoGet_RespectsContextCancellation: validates cancellation during backoff Closes #68 --- gitea/client.go | 66 ++++++++++++++++----- gitea/client_test.go | 135 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 187 insertions(+), 14 deletions(-) diff --git a/gitea/client.go b/gitea/client.go index 150b3c0..5feb9a8 100644 --- a/gitea/client.go +++ b/gitea/client.go @@ -39,6 +39,12 @@ func IsNotFound(err error) bool { return errors.As(err, &apiErr) && apiErr.StatusCode == http.StatusNotFound } +// IsServerError reports whether an error is an API 5xx response. +func IsServerError(err error) bool { + var apiErr *APIError + return errors.As(err, &apiErr) && apiErr.StatusCode >= 500 && apiErr.StatusCode < 600 +} + // Client interacts with the Gitea API. // A Client is safe for concurrent use by multiple goroutines. type Client struct { @@ -210,24 +216,56 @@ func (c *Client) PostReview(ctx context.Context, owner, repo string, number int, return &review, nil } +// doGet performs an HTTP GET request with retry on 5xx errors. +// Retries up to 3 times with exponential backoff (1s, 2s delays). func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { - req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) - if err != nil { - return nil, err - } - req.Header.Set("Authorization", "token "+c.token) + const maxAttempts = 3 + backoff := []time.Duration{0, 1 * time.Second, 2 * time.Second} - resp, err := c.http.Do(req) - if err != nil { - return nil, err - } - defer resp.Body.Close() + var lastErr error + for attempt := 0; attempt < maxAttempts; attempt++ { + if attempt > 0 { + slog.Warn("retrying request after server error", + "attempt", attempt+1, + "url", reqURL, + "delay", backoff[attempt].String()) + select { + case <-time.After(backoff[attempt]): + case <-ctx.Done(): + return nil, ctx.Err() + } + } - if resp.StatusCode < 200 || resp.StatusCode >= 300 { - body, _ := io.ReadAll(resp.Body) - return nil, &APIError{StatusCode: resp.StatusCode, Body: string(body)} + req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Authorization", "token "+c.token) + + resp, err := c.http.Do(req) + if err != nil { + return nil, err + } + + body, readErr := io.ReadAll(resp.Body) + resp.Body.Close() + + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + if readErr != nil { + return nil, readErr + } + return body, nil + } + + lastErr = &APIError{StatusCode: resp.StatusCode, Body: string(body)} + + // Only retry on 5xx server errors + if resp.StatusCode < 500 || resp.StatusCode >= 600 { + return nil, lastErr + } } - return io.ReadAll(resp.Body) + + return nil, lastErr } // escapePath escapes each segment of a relative file path for use in URLs. diff --git a/gitea/client_test.go b/gitea/client_test.go index d09e38b..ebd5cfe 100644 --- a/gitea/client_test.go +++ b/gitea/client_test.go @@ -10,6 +10,7 @@ import ( "net/http/httptest" "strings" "testing" + "time" ) func TestGetPullRequest(t *testing.T) { @@ -743,3 +744,137 @@ func TestResolveComment_Error(t *testing.T) { t.Fatal("expected error for 404 response") } } + +func TestIsServerError(t *testing.T) { + tests := []struct { + name string + err error + want bool + }{ + {"nil error", nil, false}, + {"non-API error", fmt.Errorf("network timeout"), false}, + {"404 APIError", &APIError{StatusCode: 404, Body: "not found"}, false}, + {"500 APIError", &APIError{StatusCode: 500, Body: "server error"}, true}, + {"502 APIError", &APIError{StatusCode: 502, Body: "bad gateway"}, true}, + {"503 APIError", &APIError{StatusCode: 503, Body: "unavailable"}, true}, + {"599 APIError", &APIError{StatusCode: 599, Body: "edge case"}, true}, + {"600 not server error", &APIError{StatusCode: 600, Body: "edge"}, false}, + {"400 not server error", &APIError{StatusCode: 400, Body: "bad request"}, false}, + {"wrapped 500", fmt.Errorf("fetch: %w", &APIError{StatusCode: 500, Body: "err"}), true}, + {"wrapped 404", fmt.Errorf("fetch: %w", &APIError{StatusCode: 404, Body: "err"}), false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := IsServerError(tt.err) + if got != tt.want { + t.Errorf("IsServerError(%v) = %v, want %v", tt.err, got, tt.want) + } + }) + } +} + +func TestDoGet_RetriesOn500(t *testing.T) { + attempts := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts++ + if attempts < 3 { + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(`{"message":"transient error"}`)) + return + } + w.WriteHeader(http.StatusOK) + w.Write([]byte(`{"data":"success"}`)) + })) + defer server.Close() + + client := NewClient(server.URL, "test-token") + body, err := client.doGet(context.Background(), server.URL+"/test") + if err != nil { + t.Fatalf("expected success after retry, got error: %v", err) + } + if string(body) != `{"data":"success"}` { + t.Errorf("body = %q, want %q", string(body), `{"data":"success"}`) + } + if attempts != 3 { + t.Errorf("attempts = %d, want 3", attempts) + } +} + +func TestDoGet_FailsAfterMaxRetries(t *testing.T) { + attempts := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts++ + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(`{"message":"persistent error"}`)) + })) + defer server.Close() + + client := NewClient(server.URL, "test-token") + _, err := client.doGet(context.Background(), server.URL+"/test") + if err == nil { + t.Fatal("expected error after max retries") + } + var apiErr *APIError + if !errors.As(err, &apiErr) { + t.Fatalf("expected APIError, got: %v", err) + } + if apiErr.StatusCode != http.StatusInternalServerError { + t.Errorf("status = %d, want 500", apiErr.StatusCode) + } + if attempts != 3 { + t.Errorf("attempts = %d, want 3 (max retries)", attempts) + } +} + +func TestDoGet_NoRetryOn4xx(t *testing.T) { + attempts := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts++ + w.WriteHeader(http.StatusForbidden) + w.Write([]byte(`{"message":"forbidden"}`)) + })) + defer server.Close() + + client := NewClient(server.URL, "test-token") + _, err := client.doGet(context.Background(), server.URL+"/test") + if err == nil { + t.Fatal("expected error for 403") + } + var apiErr *APIError + if !errors.As(err, &apiErr) { + t.Fatalf("expected APIError, got: %v", err) + } + if apiErr.StatusCode != http.StatusForbidden { + t.Errorf("status = %d, want 403", apiErr.StatusCode) + } + if attempts != 1 { + t.Errorf("attempts = %d, want 1 (no retry on 4xx)", attempts) + } +} + +func TestDoGet_RespectsContextCancellation(t *testing.T) { + attempts := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts++ + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(`{"message":"error"}`)) + })) + defer server.Close() + + ctx, cancel := context.WithCancel(context.Background()) + // Cancel immediately after first attempt would trigger retry + go func() { + time.Sleep(50 * time.Millisecond) + cancel() + }() + + client := NewClient(server.URL, "test-token") + _, err := client.doGet(ctx, server.URL+"/test") + if err == nil { + t.Fatal("expected error on context cancellation") + } + // Should have made 1 attempt, then context cancelled during backoff + if attempts > 2 { + t.Errorf("attempts = %d, expected at most 2 before context cancel", attempts) + } +} From 23da7eedf5c8ab684591022eef356fa629ba622d Mon Sep 17 00:00:00 2001 From: Rodin Date: Mon, 11 May 2026 01:08:01 -0700 Subject: [PATCH 2/7] fix(gitea): address review feedback on retry logic - Remove dead backoff[0] element; array now only contains retry delays - Fix time.After timer leak by using time.NewTimer with timer.Stop() - Add io.LimitReader (64KB) for error body reads to bound memory allocation Addresses feedback from sonnet-review-bot, security-review-bot, and gpt-review-bot. --- gitea/client.go | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/gitea/client.go b/gitea/client.go index 5feb9a8..0d2ec13 100644 --- a/gitea/client.go +++ b/gitea/client.go @@ -215,23 +215,32 @@ func (c *Client) PostReview(ctx context.Context, owner, repo string, number int, } return &review, nil } - // doGet performs an HTTP GET request with retry on 5xx errors. // Retries up to 3 times with exponential backoff (1s, 2s delays). func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { const maxAttempts = 3 - backoff := []time.Duration{0, 1 * time.Second, 2 * time.Second} + // backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails). + // First attempt (i=0) has no delay; retries wait 1s then 2s. + backoff := []time.Duration{1 * time.Second, 2 * time.Second} + + // maxErrorBodyBytes limits how much of an error response body we read + // to protect against malicious servers sending unbounded data. + const maxErrorBodyBytes = 64 * 1024 // 64 KB var lastErr error for attempt := 0; attempt < maxAttempts; attempt++ { if attempt > 0 { + delay := backoff[attempt-1] slog.Warn("retrying request after server error", "attempt", attempt+1, "url", reqURL, - "delay", backoff[attempt].String()) + "delay", delay.String()) + + timer := time.NewTimer(delay) select { - case <-time.After(backoff[attempt]): + case <-timer.C: case <-ctx.Done(): + timer.Stop() return nil, ctx.Err() } } @@ -247,17 +256,20 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { return nil, err } - body, readErr := io.ReadAll(resp.Body) - resp.Body.Close() - if resp.StatusCode >= 200 && resp.StatusCode < 300 { - if readErr != nil { - return nil, readErr + body, err := io.ReadAll(resp.Body) + resp.Body.Close() + if err != nil { + return nil, err } return body, nil } - lastErr = &APIError{StatusCode: resp.StatusCode, Body: string(body)} + // Error path: limit how much we read from potentially malicious server + errBody, _ := io.ReadAll(io.LimitReader(resp.Body, maxErrorBodyBytes)) + resp.Body.Close() + + lastErr = &APIError{StatusCode: resp.StatusCode, Body: string(errBody)} // Only retry on 5xx server errors if resp.StatusCode < 500 || resp.StatusCode >= 600 { From 090ae3848ca74d1817d7015048ed819ac4ca01a3 Mon Sep 17 00:00:00 2001 From: Rodin Date: Mon, 11 May 2026 04:23:27 -0700 Subject: [PATCH 3/7] fix(gitea): make retry backoff configurable and retry temp net errors Address review feedback: 1. Make backoff delays injectable via Client.RetryBackoff field - Defaults to {1s, 2s} when nil for production - Tests can set shorter values for fast execution - Fixes slow unit tests that previously waited 3+ seconds 2. Add retry on temporary network errors (net.OpError, net.DNSError) - Connection refused, network unreachable, DNS failures now retry - Non-temporary network errors still fail immediately - Context cancellation still respected during backoff Added isTemporaryNetError helper and TestIsTemporaryNetError test. Updated existing retry tests to use configurable short backoffs. --- gitea/client.go | 96 +++++++++++++++++++++++++++++++++++--------- gitea/client_test.go | 94 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 164 insertions(+), 26 deletions(-) diff --git a/gitea/client.go b/gitea/client.go index 0d2ec13..75dcf64 100644 --- a/gitea/client.go +++ b/gitea/client.go @@ -11,6 +11,7 @@ import ( "fmt" "io" "log/slog" + "net" "net/http" "net/url" "strings" @@ -51,6 +52,11 @@ type Client struct { baseURL string token string http *http.Client + + // RetryBackoff defines the delays between retry attempts. + // RetryBackoff[i] is the delay before attempt i+1 (after attempt i fails). + // If nil, defaults to {1s, 2s}. Set to shorter durations in tests. + RetryBackoff []time.Duration } // NewClient creates a new Gitea API client. @@ -215,13 +221,49 @@ func (c *Client) PostReview(ctx context.Context, owner, repo string, number int, } return &review, nil } -// doGet performs an HTTP GET request with retry on 5xx errors. -// Retries up to 3 times with exponential backoff (1s, 2s delays). + +// isTemporaryNetError reports whether err is a temporary network error worth retrying. +// This includes connection refused, DNS failures, and timeouts that aren't context-based. +func isTemporaryNetError(err error) bool { + if err == nil { + return false + } + + // Check for common retriable error patterns in the error chain. + // Check OpError first since it embeds net.Error, and we want to catch + // connection refused, network unreachable, etc. as retriable. + var opErr *net.OpError + if errors.As(err, &opErr) { + // Connection refused, network unreachable, etc. are typically transient + return true + } + + // DNS errors are often transient + var dnsErr *net.DNSError + if errors.As(err, &dnsErr) { + return dnsErr.Temporary() + } + + // Check for net.Error with Timeout() (Temporary is deprecated) + var netErr net.Error + if errors.As(err, &netErr) { + return netErr.Timeout() + } + + return false +} + +// doGet performs an HTTP GET request with retry on 5xx errors and temporary +// network errors. Retries up to 3 times with exponential backoff (1s, 2s delays +// by default; configurable via Client.RetryBackoff for testing). func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { const maxAttempts = 3 // backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails). - // First attempt (i=0) has no delay; retries wait 1s then 2s. - backoff := []time.Duration{1 * time.Second, 2 * time.Second} + // First attempt (i=0) has no delay; retries wait 1s then 2s by default. + backoff := c.RetryBackoff + if backoff == nil { + backoff = []time.Duration{1 * time.Second, 2 * time.Second} + } // maxErrorBodyBytes limits how much of an error response body we read // to protect against malicious servers sending unbounded data. @@ -230,18 +272,26 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { var lastErr error for attempt := 0; attempt < maxAttempts; attempt++ { if attempt > 0 { - delay := backoff[attempt-1] - slog.Warn("retrying request after server error", - "attempt", attempt+1, - "url", reqURL, - "delay", delay.String()) + // Determine delay: use backoff slice if available, otherwise no delay + var delay time.Duration + if attempt-1 < len(backoff) { + delay = backoff[attempt-1] + } - timer := time.NewTimer(delay) - select { - case <-timer.C: - case <-ctx.Done(): - timer.Stop() - return nil, ctx.Err() + if delay > 0 { + slog.Warn("retrying request after error", + "attempt", attempt+1, + "url", reqURL, + "delay", delay.String(), + "lastError", lastErr) + + timer := time.NewTimer(delay) + select { + case <-timer.C: + case <-ctx.Done(): + timer.Stop() + return nil, ctx.Err() + } } } @@ -253,6 +303,16 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { resp, err := c.http.Do(req) if err != nil { + // Check if this is a temporary/transient network error worth retrying. + // We only retry if there are attempts remaining. + if attempt < maxAttempts-1 && isTemporaryNetError(err) { + slog.Warn("temporary network error, will retry", + "attempt", attempt+1, + "url", reqURL, + "error", err) + lastErr = err + continue + } return nil, err } @@ -367,9 +427,9 @@ func (c *Client) GetAllFilesInPath(ctx context.Context, owner, repo, path string // Review represents a pull request review from the Gitea API. type Review struct { - ID int64 `json:"id"` - Body string `json:"body"` - User struct { + ID int64 `json:"id"` + Body string `json:"body"` + User struct { Login string `json:"login"` } `json:"user"` State string `json:"state"` diff --git a/gitea/client_test.go b/gitea/client_test.go index ebd5cfe..f7eddd7 100644 --- a/gitea/client_test.go +++ b/gitea/client_test.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + "net" "net/http" "net/http/httptest" "strings" @@ -585,9 +586,9 @@ func TestGetAllFilesInPath_403Propagates(t *testing.T) { func TestIsNotFound(t *testing.T) { tests := []struct { - name string - err error - want bool + name string + err error + want bool }{ {"nil error", nil, false}, {"non-API error", fmt.Errorf("network timeout"), false}, @@ -788,6 +789,9 @@ func TestDoGet_RetriesOn500(t *testing.T) { defer server.Close() client := NewClient(server.URL, "test-token") + // Use short backoff for fast tests + client.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond} + body, err := client.doGet(context.Background(), server.URL+"/test") if err != nil { t.Fatalf("expected success after retry, got error: %v", err) @@ -810,6 +814,9 @@ func TestDoGet_FailsAfterMaxRetries(t *testing.T) { defer server.Close() client := NewClient(server.URL, "test-token") + // Use short backoff for fast tests + client.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond} + _, err := client.doGet(context.Background(), server.URL+"/test") if err == nil { t.Fatal("expected error after max retries") @@ -862,19 +869,90 @@ func TestDoGet_RespectsContextCancellation(t *testing.T) { defer server.Close() ctx, cancel := context.WithCancel(context.Background()) - // Cancel immediately after first attempt would trigger retry + + client := NewClient(server.URL, "test-token") + // Use longer backoff to give us time to cancel during the wait + client.RetryBackoff = []time.Duration{100 * time.Millisecond, 100 * time.Millisecond} + + // Cancel after first attempt returns and retry begins go func() { - time.Sleep(50 * time.Millisecond) + time.Sleep(20 * time.Millisecond) cancel() }() - client := NewClient(server.URL, "test-token") _, err := client.doGet(ctx, server.URL+"/test") if err == nil { t.Fatal("expected error on context cancellation") } // Should have made 1 attempt, then context cancelled during backoff - if attempts > 2 { - t.Errorf("attempts = %d, expected at most 2 before context cancel", attempts) + if attempts != 1 { + t.Errorf("attempts = %d, expected 1 before context cancel during backoff", attempts) + } +} + +func TestDoGet_RetriesOnTemporaryNetError(t *testing.T) { + attempts := 0 + + // Create a listener that we can close to simulate connection refused + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("failed to create listener: %v", err) + } + addr := ln.Addr().String() + + // Close immediately to cause connection refused on first attempts + ln.Close() + + // Start a server after a short delay to succeed on retry + go func() { + time.Sleep(5 * time.Millisecond) + newLn, err := net.Listen("tcp", addr) + if err != nil { + // Port might be reused; not critical for this test + return + } + defer newLn.Close() + + for { + conn, err := newLn.Accept() + if err != nil { + return + } + attempts++ + // Respond with success + conn.Write([]byte("HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nok")) + conn.Close() + } + }() + + client := NewClient("http://"+addr, "test-token") + client.RetryBackoff = []time.Duration{10 * time.Millisecond, 10 * time.Millisecond} + + // The request might succeed or fail depending on timing, but the key is + // that we attempt retry on connection refused + _, _ = client.doGet(context.Background(), "http://"+addr+"/test") + + // This test verifies the code path exists; actual retry behavior depends on timing +} + +func TestIsTemporaryNetError(t *testing.T) { + tests := []struct { + name string + err error + want bool + }{ + {"nil error", nil, false}, + {"plain error", fmt.Errorf("some error"), false}, + {"OpError", &net.OpError{Op: "dial", Err: fmt.Errorf("connection refused")}, true}, + {"temporary DNSError", &net.DNSError{IsTemporary: true}, true}, + {"non-temporary DNSError", &net.DNSError{IsTemporary: false}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isTemporaryNetError(tt.err) + if got != tt.want { + t.Errorf("isTemporaryNetError(%v) = %v, want %v", tt.err, got, tt.want) + } + }) } } From ac53ecfa5dae8df4cf04d3602462934c2898f81e Mon Sep 17 00:00:00 2001 From: Rodin Date: Mon, 11 May 2026 04:32:15 -0700 Subject: [PATCH 4/7] fix(gitea): improve retry logic precision for net.OpError Address review feedback on isTemporaryNetError being too broad: 1. RetryBackoff field: Added doc comment clarifying it must be configured before the first request (addresses concurrency concern). 2. isTemporaryNetError: Now inspects the underlying syscall error instead of treating all net.OpError as retriable. Only retries on: - ECONNREFUSED (connection refused) - ECONNRESET (connection reset) - ENETUNREACH (network unreachable) - EHOSTUNREACH (host unreachable) - ETIMEDOUT (connection timed out) Permanent errors like EACCES, EPERM are no longer retried. 3. DNS errors: Changed from Temporary() to IsTimeout, since "no such host" is permanent and shouldn't be retried. 4. Empty backoff slice: Added comment explaining that retry without delay is intentional when caller explicitly configures it. Addresses MINOR findings from sonnet-review-bot and gpt-review-bot. --- gitea/client.go | 53 ++++++++++++++++++++++++++++++++++++-------- gitea/client_test.go | 45 ++++++++++++++++++++++++++++++++++--- 2 files changed, 86 insertions(+), 12 deletions(-) diff --git a/gitea/client.go b/gitea/client.go index 75dcf64..3ca101c 100644 --- a/gitea/client.go +++ b/gitea/client.go @@ -15,6 +15,7 @@ import ( "net/http" "net/url" "strings" + "syscall" "time" ) @@ -56,6 +57,9 @@ type Client struct { // RetryBackoff defines the delays between retry attempts. // RetryBackoff[i] is the delay before attempt i+1 (after attempt i fails). // If nil, defaults to {1s, 2s}. Set to shorter durations in tests. + // + // This field must be configured before the first request is made. + // Modifying it while requests are in flight is not safe. RetryBackoff []time.Duration } @@ -223,25 +227,25 @@ func (c *Client) PostReview(ctx context.Context, owner, repo string, number int, } // isTemporaryNetError reports whether err is a temporary network error worth retrying. -// This includes connection refused, DNS failures, and timeouts that aren't context-based. +// This includes connection refused, network unreachable, connection reset, and DNS +// timeouts. It explicitly excludes permanent errors like permission denied or +// "no such host" DNS failures. func isTemporaryNetError(err error) bool { if err == nil { return false } - // Check for common retriable error patterns in the error chain. - // Check OpError first since it embeds net.Error, and we want to catch - // connection refused, network unreachable, etc. as retriable. + // Check for OpError and inspect the underlying syscall error. + // Not all OpErrors are transient — permission denied, for example, is permanent. var opErr *net.OpError if errors.As(err, &opErr) { - // Connection refused, network unreachable, etc. are typically transient - return true + return isRetriableSyscallError(opErr.Err) } - // DNS errors are often transient + // DNS errors: only retry on timeout, not on "no such host" which is permanent. var dnsErr *net.DNSError if errors.As(err, &dnsErr) { - return dnsErr.Temporary() + return dnsErr.IsTimeout } // Check for net.Error with Timeout() (Temporary is deprecated) @@ -253,6 +257,35 @@ func isTemporaryNetError(err error) bool { return false } +// isRetriableSyscallError reports whether the underlying error from a net.OpError +// is a transient syscall error worth retrying. +func isRetriableSyscallError(err error) bool { + if err == nil { + return false + } + + // Check for syscall.Errno directly or wrapped + var errno syscall.Errno + if errors.As(err, &errno) { + switch errno { + case syscall.ECONNREFUSED, // connection refused — server not listening + syscall.ECONNRESET, // connection reset by peer + syscall.ENETUNREACH, // network unreachable + syscall.EHOSTUNREACH, // host unreachable + syscall.ETIMEDOUT: // connection timed out + return true + default: + // EACCES, EPERM, etc. are permanent — don't retry + return false + } + } + + // If we can't identify the specific syscall error, be conservative and retry. + // This handles wrapped errors or platform-specific error types. + // The retry count is limited, so erring on the side of retrying is safe. + return true +} + // doGet performs an HTTP GET request with retry on 5xx errors and temporary // network errors. Retries up to 3 times with exponential backoff (1s, 2s delays // by default; configurable via Client.RetryBackoff for testing). @@ -272,7 +305,9 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { var lastErr error for attempt := 0; attempt < maxAttempts; attempt++ { if attempt > 0 { - // Determine delay: use backoff slice if available, otherwise no delay + // Determine delay: use backoff slice if available, otherwise retry immediately. + // An empty RetryBackoff slice means "retry without delay" — this is intentional + // as the caller explicitly configured no delays. var delay time.Duration if attempt-1 < len(backoff) { delay = backoff[attempt-1] diff --git a/gitea/client_test.go b/gitea/client_test.go index f7eddd7..dc747fa 100644 --- a/gitea/client_test.go +++ b/gitea/client_test.go @@ -10,6 +10,7 @@ import ( "net/http" "net/http/httptest" "strings" + "syscall" "testing" "time" ) @@ -943,9 +944,20 @@ func TestIsTemporaryNetError(t *testing.T) { }{ {"nil error", nil, false}, {"plain error", fmt.Errorf("some error"), false}, - {"OpError", &net.OpError{Op: "dial", Err: fmt.Errorf("connection refused")}, true}, - {"temporary DNSError", &net.DNSError{IsTemporary: true}, true}, - {"non-temporary DNSError", &net.DNSError{IsTemporary: false}, false}, + // OpError with retriable syscall errors + {"OpError ECONNREFUSED", &net.OpError{Op: "dial", Err: syscall.ECONNREFUSED}, true}, + {"OpError ECONNRESET", &net.OpError{Op: "read", Err: syscall.ECONNRESET}, true}, + {"OpError ENETUNREACH", &net.OpError{Op: "dial", Err: syscall.ENETUNREACH}, true}, + {"OpError EHOSTUNREACH", &net.OpError{Op: "dial", Err: syscall.EHOSTUNREACH}, true}, + {"OpError ETIMEDOUT", &net.OpError{Op: "dial", Err: syscall.ETIMEDOUT}, true}, + // OpError with permanent syscall errors — should NOT retry + {"OpError EACCES", &net.OpError{Op: "dial", Err: syscall.EACCES}, false}, + {"OpError EPERM", &net.OpError{Op: "dial", Err: syscall.EPERM}, false}, + // OpError with unknown inner error — conservative retry + {"OpError unknown inner", &net.OpError{Op: "dial", Err: fmt.Errorf("unknown")}, true}, + // DNS errors + {"DNS timeout", &net.DNSError{IsTimeout: true}, true}, + {"DNS no such host", &net.DNSError{IsTimeout: false, Name: "bad.host"}, false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -956,3 +968,30 @@ func TestIsTemporaryNetError(t *testing.T) { }) } } + +func TestIsRetriableSyscallError(t *testing.T) { + tests := []struct { + name string + err error + want bool + }{ + {"nil", nil, false}, + {"ECONNREFUSED", syscall.ECONNREFUSED, true}, + {"ECONNRESET", syscall.ECONNRESET, true}, + {"ENETUNREACH", syscall.ENETUNREACH, true}, + {"EHOSTUNREACH", syscall.EHOSTUNREACH, true}, + {"ETIMEDOUT", syscall.ETIMEDOUT, true}, + {"EACCES (permanent)", syscall.EACCES, false}, + {"EPERM (permanent)", syscall.EPERM, false}, + {"ENOENT (permanent)", syscall.ENOENT, false}, + {"unknown error", fmt.Errorf("something"), true}, // conservative retry + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isRetriableSyscallError(tt.err) + if got != tt.want { + t.Errorf("isRetriableSyscallError(%v) = %v, want %v", tt.err, got, tt.want) + } + }) + } +} From ecfbfddc7ca14a30ff1bb6f790a09f509bf3437f Mon Sep 17 00:00:00 2001 From: Aaron Weiker Date: Mon, 11 May 2026 04:52:41 -0700 Subject: [PATCH 5/7] fix(gitea): redact query params from retry warning logs Addresses security review finding: retry warnings were logging the full request URL which could inadvertently leak sensitive query parameters if future callers pass them. Added redactURL() helper that: - Strips query parameters from URLs before logging (replaces with [redacted]) - Returns [invalid URL] for unparseable URLs to avoid leaking any data - Preserves the base path for debugging context The error itself (lastErr) is kept as-is since APIError.Error() already truncates response bodies to 200 chars, and network errors don't contain user-controlled data. --- gitea/client.go | 20 ++++++++++++++++++-- gitea/client_test.go | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/gitea/client.go b/gitea/client.go index 3ca101c..5826958 100644 --- a/gitea/client.go +++ b/gitea/client.go @@ -286,6 +286,22 @@ func isRetriableSyscallError(err error) bool { return true } +// redactURL strips query parameters from a URL for safe logging. +// This prevents accidental exposure of sensitive data that future callers +// might pass via query strings. +func redactURL(rawURL string) string { + parsed, err := url.Parse(rawURL) + if err != nil { + // If we cannot parse it, return a safe placeholder rather than + // potentially logging something sensitive. + return "[invalid URL]" + } + if parsed.RawQuery != "" { + parsed.RawQuery = "[redacted]" + } + return parsed.String() +} + // doGet performs an HTTP GET request with retry on 5xx errors and temporary // network errors. Retries up to 3 times with exponential backoff (1s, 2s delays // by default; configurable via Client.RetryBackoff for testing). @@ -316,7 +332,7 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { if delay > 0 { slog.Warn("retrying request after error", "attempt", attempt+1, - "url", reqURL, + "url", redactURL(reqURL), "delay", delay.String(), "lastError", lastErr) @@ -343,7 +359,7 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { if attempt < maxAttempts-1 && isTemporaryNetError(err) { slog.Warn("temporary network error, will retry", "attempt", attempt+1, - "url", reqURL, + "url", redactURL(reqURL), "error", err) lastErr = err continue diff --git a/gitea/client_test.go b/gitea/client_test.go index dc747fa..4578ad4 100644 --- a/gitea/client_test.go +++ b/gitea/client_test.go @@ -995,3 +995,45 @@ func TestIsRetriableSyscallError(t *testing.T) { }) } } + +func TestRedactURL(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "no query params", + input: "https://gitea.example.com/api/v1/repos/owner/repo/pulls/1", + want: "https://gitea.example.com/api/v1/repos/owner/repo/pulls/1", + }, + { + name: "with query params - redacts", + input: "https://gitea.example.com/api/v1/repos/owner/repo/raw/file?ref=main", + want: "https://gitea.example.com/api/v1/repos/owner/repo/raw/file?[redacted]", + }, + { + name: "multiple query params", + input: "https://example.com/path?token=secret&page=1", + want: "https://example.com/path?[redacted]", + }, + { + name: "invalid URL", + input: "://invalid", + want: "[invalid URL]", + }, + { + name: "empty string", + input: "", + want: "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := redactURL(tt.input) + if got != tt.want { + t.Errorf("redactURL(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} From 5498dccd60d7910b082b5ffb131ea6107ddd8281 Mon Sep 17 00:00:00 2001 From: Rodin Date: Mon, 11 May 2026 05:15:07 -0700 Subject: [PATCH 6/7] fix(gitea): address MINOR review findings on retry logic 1. Fix non-deterministic test TestDoGet_RetriesOnTemporaryNetError: - Replace timing-dependent listener approach with mockTransport - mockTransport allows controlled injection of net.OpError failures - Test now makes deterministic assertions: exactly 3 attempts (2 fail + 1 success) - Added SetHTTPClient() method for test transport injection 2. Sanitize error content in retry warning logs: - Added sanitizeErrorForLog() helper that omits response body content - For APIError: logs only 'HTTP ' instead of full body - For other errors: preserves error type information - Addresses security concern about logging server error content at WARN level - Full error with body still returned to caller for proper error handling Both changes have corresponding test coverage. --- gitea/client.go | 22 +++++++- gitea/client_test.go | 126 ++++++++++++++++++++++++++++++------------- 2 files changed, 111 insertions(+), 37 deletions(-) diff --git a/gitea/client.go b/gitea/client.go index 5826958..1cee5b4 100644 --- a/gitea/client.go +++ b/gitea/client.go @@ -72,6 +72,12 @@ func NewClient(baseURL, token string) *Client { } } +// SetHTTPClient sets the underlying HTTP client used for requests. +// This is intended for testing to inject mock transports. +func (c *Client) SetHTTPClient(hc *http.Client) { + c.http = hc +} + // PullRequest holds relevant PR metadata. type PullRequest struct { Title string `json:"title"` @@ -302,6 +308,20 @@ func redactURL(rawURL string) string { return parsed.String() } +// sanitizeErrorForLog returns a loggable version of an error that omits +// potentially sensitive content like response bodies. For APIError, only +// the status code is included; for other errors, the type is preserved. +func sanitizeErrorForLog(err error) string { + if err == nil { + return "" + } + var apiErr *APIError + if errors.As(err, &apiErr) { + return fmt.Sprintf("HTTP %d", apiErr.StatusCode) + } + return err.Error() +} + // doGet performs an HTTP GET request with retry on 5xx errors and temporary // network errors. Retries up to 3 times with exponential backoff (1s, 2s delays // by default; configurable via Client.RetryBackoff for testing). @@ -334,7 +354,7 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { "attempt", attempt+1, "url", redactURL(reqURL), "delay", delay.String(), - "lastError", lastErr) + "lastError", sanitizeErrorForLog(lastErr)) timer := time.NewTimer(delay) select { diff --git a/gitea/client_test.go b/gitea/client_test.go index 4578ad4..76156e7 100644 --- a/gitea/client_test.go +++ b/gitea/client_test.go @@ -10,6 +10,7 @@ import ( "net/http" "net/http/httptest" "strings" + "sync/atomic" "syscall" "testing" "time" @@ -891,49 +892,60 @@ func TestDoGet_RespectsContextCancellation(t *testing.T) { } } -func TestDoGet_RetriesOnTemporaryNetError(t *testing.T) { - attempts := 0 - // Create a listener that we can close to simulate connection refused - ln, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatalf("failed to create listener: %v", err) +// mockTransport is a test helper that returns errors for the first N calls, +// then delegates to a real server. +type mockTransport struct { + failCount int32 // number of failures remaining (atomic) + failErr error // error to return on failure + realServer *httptest.Server + attemptsMade atomic.Int32 // tracks total attempts +} + +func (m *mockTransport) RoundTrip(req *http.Request) (*http.Response, error) { + m.attemptsMade.Add(1) + remaining := atomic.AddInt32(&m.failCount, -1) + if remaining >= 0 { + // Still have failures to return + return nil, m.failErr } - addr := ln.Addr().String() + // Redirect to real server + req.URL.Host = m.realServer.Listener.Addr().String() + req.URL.Scheme = "http" + return http.DefaultTransport.RoundTrip(req) +} - // Close immediately to cause connection refused on first attempts - ln.Close() +func TestDoGet_RetriesOnTemporaryNetError(t *testing.T) { + // Real server that will handle successful requests + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte(`{"status":"ok"}`)) + })) + defer server.Close() - // Start a server after a short delay to succeed on retry - go func() { - time.Sleep(5 * time.Millisecond) - newLn, err := net.Listen("tcp", addr) - if err != nil { - // Port might be reused; not critical for this test - return - } - defer newLn.Close() + // Mock transport: fail twice with ECONNREFUSED, then succeed + mt := &mockTransport{ + failCount: 2, + failErr: &net.OpError{Op: "dial", Net: "tcp", Err: syscall.ECONNREFUSED}, + realServer: server, + } - for { - conn, err := newLn.Accept() - if err != nil { - return - } - attempts++ - // Respond with success - conn.Write([]byte("HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nok")) - conn.Close() - } - }() + client := NewClient("http://fake-host/", "test-token") + client.SetHTTPClient(&http.Client{Transport: mt}) + client.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond} - client := NewClient("http://"+addr, "test-token") - client.RetryBackoff = []time.Duration{10 * time.Millisecond, 10 * time.Millisecond} + body, err := client.doGet(context.Background(), "http://fake-host/test") + if err != nil { + t.Fatalf("expected success after retries, got error: %v", err) + } + if string(body) != `{"status":"ok"}` { + t.Errorf("body = %q, want %q", string(body), `{"status":"ok"}`) + } - // The request might succeed or fail depending on timing, but the key is - // that we attempt retry on connection refused - _, _ = client.doGet(context.Background(), "http://"+addr+"/test") - - // This test verifies the code path exists; actual retry behavior depends on timing + // Should have made exactly 3 attempts: 2 failures + 1 success + if got := mt.attemptsMade.Load(); got != 3 { + t.Errorf("attempts = %d, want 3 (2 failures + 1 success)", got) + } } func TestIsTemporaryNetError(t *testing.T) { @@ -1037,3 +1049,45 @@ func TestRedactURL(t *testing.T) { }) } } + +func TestSanitizeErrorForLog(t *testing.T) { + tests := []struct { + name string + err error + want string + }{ + { + name: "nil error", + err: nil, + want: "", + }, + { + name: "APIError omits body", + err: &APIError{StatusCode: 500, Body: "internal error: database connection failed"}, + want: "HTTP 500", + }, + { + name: "APIError with large body still only shows status", + err: &APIError{StatusCode: 502, Body: strings.Repeat("x", 1000)}, + want: "HTTP 502", + }, + { + name: "non-API error preserved", + err: fmt.Errorf("connection refused"), + want: "connection refused", + }, + { + name: "wrapped APIError", + err: fmt.Errorf("request failed: %w", &APIError{StatusCode: 503, Body: "service unavailable"}), + want: "HTTP 503", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := sanitizeErrorForLog(tt.err) + if got != tt.want { + t.Errorf("sanitizeErrorForLog() = %q, want %q", got, tt.want) + } + }) + } +} From 1b38e6ad002eeb8fc114f570ef0593337e877a07 Mon Sep 17 00:00:00 2001 From: Rodin Date: Mon, 11 May 2026 05:31:39 -0700 Subject: [PATCH 7/7] fix(gitea): ensure consistent lastErr return for network errors Move lastErr assignment outside the retry condition so that both network errors and HTTP 5xx paths return lastErr consistently. Previously, on the final retry attempt, a network error would return the raw err variable instead of lastErr. While they held the same value in practice, the inconsistency was confusing when reading the code. Now both paths: - Network errors: assign lastErr before checking retry, return lastErr - HTTP 5xx: assign lastErr before checking retry, return lastErr Addresses review finding #3 (MINOR) from sonnet review on PR #69. --- gitea/client.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/gitea/client.go b/gitea/client.go index 1cee5b4..b016349 100644 --- a/gitea/client.go +++ b/gitea/client.go @@ -374,19 +374,21 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { resp, err := c.http.Do(req) if err != nil { - // Check if this is a temporary/transient network error worth retrying. - // We only retry if there are attempts remaining. + // Always capture the error for consistent return at loop end. + // This ensures both network errors and HTTP 5xx return lastErr. + lastErr = err + + // Only retry temporary network errors when attempts remain. if attempt < maxAttempts-1 && isTemporaryNetError(err) { slog.Warn("temporary network error, will retry", "attempt", attempt+1, "url", redactURL(reqURL), "error", err) - lastErr = err continue } - return nil, err + // Non-retryable network error or final attempt exhausted. + return nil, lastErr } - if resp.StatusCode >= 200 && resp.StatusCode < 300 { body, err := io.ReadAll(resp.Body) resp.Body.Close()