feat(gitea): add retry logic for 5xx errors #69
@@ -72,6 +72,12 @@ func NewClient(baseURL, token string) *Client {
|
||||
}
|
||||
|
|
||||
}
|
||||
|
||||
// SetHTTPClient sets the underlying HTTP client used for requests.
|
||||
// This is intended for testing to inject mock transports.
|
||||
func (c *Client) SetHTTPClient(hc *http.Client) {
|
||||
c.http = hc
|
||||
}
|
||||
|
||||
// PullRequest holds relevant PR metadata.
|
||||
type PullRequest struct {
|
||||
Title string `json:"title"`
|
||||
@@ -302,6 +308,20 @@ func redactURL(rawURL string) string {
|
||||
return parsed.String()
|
||||
}
|
||||
|
||||
|
sonnet-review-bot
commented
[NIT] The log field name **[NIT]** The log field name `lastError` uses camelCase. slog convention (and the existing `slog.Warn` call at line 323) uses lowercase snake-case or single-word keys. Recommend `"last_error"` or `"error"` for consistency with the other log call in this function which uses `"error"`.
|
||||
// sanitizeErrorForLog returns a loggable version of an error that omits
|
||||
// potentially sensitive content like response bodies. For APIError, only
|
||||
// the status code is included; for other errors, the type is preserved.
|
||||
func sanitizeErrorForLog(err error) string {
|
||||
if err == nil {
|
||||
|
[MINOR] Retry warning logs include the error object (lastError) which will render the API error message (truncated to 200 chars) and could expose portions of server error responses in logs. While bodies are size-limited and URLs are redacted, consider the operational sensitivity of logging server error content at WARN level. **[MINOR]** Retry warning logs include the error object (lastError) which will render the API error message (truncated to 200 chars) and could expose portions of server error responses in logs. While bodies are size-limited and URLs are redacted, consider the operational sensitivity of logging server error content at WARN level.
|
||||
return "<nil>"
|
||||
|
sonnet-review-bot
commented
[MINOR] When isTemporaryNetError returns true and we set lastErr = err and continue, the delay logic in the next iteration uses attempt-1 as the backoff index. However, unlike HTTP 5xx errors where lastErr is always set before the delay block, network errors also set lastErr. The code is correct (lastErr will be non-nil for the log message), but the flow could be clearer — when a network error occurs on attempt 2 (the last), the code falls through to 'return nil, err' (not lastErr). This is actually correct behavior (returns the raw error), but it is inconsistent with the HTTP error path which always returns lastErr. Minor readability issue. **[MINOR]** When isTemporaryNetError returns true and we set lastErr = err and continue, the delay logic in the next iteration uses attempt-1 as the backoff index. However, unlike HTTP 5xx errors where lastErr is always set before the delay block, network errors also set lastErr. The code is correct (lastErr will be non-nil for the log message), but the flow could be clearer — when a network error occurs on attempt 2 (the last), the code falls through to 'return nil, err' (not lastErr). This is actually correct behavior (returns the raw error), but it is inconsistent with the HTTP error path which always returns lastErr. Minor readability issue.
|
||||
}
|
||||
|
sonnet-review-bot
commented
[MINOR] Timer leak on zero-delay path: when **[MINOR]** Timer leak on zero-delay path: when `delay == 0` (i.e., `attempt-1 >= len(backoff)` or `backoff` is empty), the code skips the timer block entirely and logs nothing. This is correct behavior. However, the `slog.Warn` log is only emitted when `delay > 0`, meaning silent retries happen with no observability when the backoff slice is shorter than `maxAttempts-1`. Consider logging retries even when delay is zero, or document this intentional behavior in the comment.
|
||||
var apiErr *APIError
|
||||
if errors.As(err, &apiErr) {
|
||||
return fmt.Sprintf("HTTP %d", apiErr.StatusCode)
|
||||
}
|
||||
return err.Error()
|
||||
}
|
||||
|
||||
// doGet performs an HTTP GET request with retry on 5xx errors and temporary
|
||||
// network errors. Retries up to 3 times with exponential backoff (1s, 2s delays
|
||||
// by default; configurable via Client.RetryBackoff for testing).
|
||||
@@ -334,7 +354,7 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
||||
"attempt", attempt+1,
|
||||
|
sonnet-review-bot
commented
[NIT] The **[NIT]** The `RetryBackoff` field is exported (capital R), which exposes an implementation detail of the retry mechanism as a public API. The doc comment explicitly warns 'Modifying it while requests are in flight is not safe', which means the `Client` concurrency safety documentation ('A Client is safe for concurrent use by multiple goroutines') is now conditionally true. This is a minor design tension worth noting — the concurrency safety claim in the struct comment should probably be qualified, or `RetryBackoff` should only be settable before any request is made (which the doc comment does say, but the type system can't enforce it).
|
||||
"url", redactURL(reqURL),
|
||||
"delay", delay.String(),
|
||||
"lastError", lastErr)
|
||||
"lastError", sanitizeErrorForLog(lastErr))
|
||||
|
||||
timer := time.NewTimer(delay)
|
||||
select {
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -891,49 +892,60 @@ func TestDoGet_RespectsContextCancellation(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
sonnet-review-bot
commented
[MINOR] **[MINOR]** `TestDoGet_RetriesOnTemporaryNetError` is a timing-dependent test that explicitly acknowledges it may not verify actual retry behavior ('The request might succeed or fail depending on timing'). It provides almost no guaranteed assertion — `attempts` could be 0 and the test still passes. This test would be more valuable if it used a controlled mechanism (e.g., an atomic counter with a channel to sequence listener startup) rather than relying on `time.Sleep`. As-is, it's essentially a no-op assertion that just verifies the code path compiles.
|
||||
|
||||
func TestDoGet_RetriesOnTemporaryNetError(t *testing.T) {
|
||||
attempts := 0
|
||||
|
||||
// Create a listener that we can close to simulate connection refused
|
||||
ln, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create listener: %v", err)
|
||||
// mockTransport is a test helper that returns errors for the first N calls,
|
||||
// then delegates to a real server.
|
||||
|
sonnet-review-bot
commented
[MINOR] **[MINOR]** `TestDoGet_RetriesOnTemporaryNetError` is timing-dependent and makes no deterministic assertions — it explicitly says "actual retry behavior depends on timing". The test comment acknowledges this. While the test does exercise the code path, it provides weak guarantees. A more reliable approach would be to use `isTemporaryNetError` unit tests (which are present) and a separate test with a mock/stub transport that returns `syscall.ECONNREFUSED` directly, avoiding the real TCP listener race.
|
||||
type mockTransport struct {
|
||||
failCount int32 // number of failures remaining (atomic)
|
||||
failErr error // error to return on failure
|
||||
realServer *httptest.Server
|
||||
attemptsMade atomic.Int32 // tracks total attempts
|
||||
}
|
||||
|
||||
func (m *mockTransport) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
m.attemptsMade.Add(1)
|
||||
remaining := atomic.AddInt32(&m.failCount, -1)
|
||||
|
sonnet-review-bot
commented
[NIT] **[NIT]** `TestDoGet_RetriesOnTemporaryNetError` has no meaningful assertions — the comment says 'actual retry behavior depends on timing' and the return values are discarded with `_, _`. This test exercises the code path but doesn't verify any postcondition, making it closer to a smoke test. It won't cause flakiness but also won't catch regressions. Consider whether it provides sufficient value or should be replaced with a more deterministic approach (e.g., using a custom http.Transport that fails N times then succeeds).
|
||||
if remaining >= 0 {
|
||||
// Still have failures to return
|
||||
return nil, m.failErr
|
||||
}
|
||||
addr := ln.Addr().String()
|
||||
// Redirect to real server
|
||||
req.URL.Host = m.realServer.Listener.Addr().String()
|
||||
req.URL.Scheme = "http"
|
||||
return http.DefaultTransport.RoundTrip(req)
|
||||
}
|
||||
|
||||
// Close immediately to cause connection refused on first attempts
|
||||
ln.Close()
|
||||
func TestDoGet_RetriesOnTemporaryNetError(t *testing.T) {
|
||||
// Real server that will handle successful requests
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"status":"ok"}`))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
// Start a server after a short delay to succeed on retry
|
||||
go func() {
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
newLn, err := net.Listen("tcp", addr)
|
||||
if err != nil {
|
||||
// Port might be reused; not critical for this test
|
||||
return
|
||||
}
|
||||
defer newLn.Close()
|
||||
// Mock transport: fail twice with ECONNREFUSED, then succeed
|
||||
mt := &mockTransport{
|
||||
failCount: 2,
|
||||
failErr: &net.OpError{Op: "dial", Net: "tcp", Err: syscall.ECONNREFUSED},
|
||||
realServer: server,
|
||||
}
|
||||
|
||||
for {
|
||||
conn, err := newLn.Accept()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
attempts++
|
||||
// Respond with success
|
||||
conn.Write([]byte("HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nok"))
|
||||
conn.Close()
|
||||
}
|
||||
}()
|
||||
client := NewClient("http://fake-host/", "test-token")
|
||||
client.SetHTTPClient(&http.Client{Transport: mt})
|
||||
client.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond}
|
||||
|
||||
client := NewClient("http://"+addr, "test-token")
|
||||
client.RetryBackoff = []time.Duration{10 * time.Millisecond, 10 * time.Millisecond}
|
||||
body, err := client.doGet(context.Background(), "http://fake-host/test")
|
||||
if err != nil {
|
||||
t.Fatalf("expected success after retries, got error: %v", err)
|
||||
}
|
||||
if string(body) != `{"status":"ok"}` {
|
||||
t.Errorf("body = %q, want %q", string(body), `{"status":"ok"}`)
|
||||
}
|
||||
|
||||
// The request might succeed or fail depending on timing, but the key is
|
||||
// that we attempt retry on connection refused
|
||||
_, _ = client.doGet(context.Background(), "http://"+addr+"/test")
|
||||
|
||||
// This test verifies the code path exists; actual retry behavior depends on timing
|
||||
// Should have made exactly 3 attempts: 2 failures + 1 success
|
||||
if got := mt.attemptsMade.Load(); got != 3 {
|
||||
t.Errorf("attempts = %d, want 3 (2 failures + 1 success)", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsTemporaryNetError(t *testing.T) {
|
||||
@@ -1037,3 +1049,45 @@ func TestRedactURL(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeErrorForLog(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
err error
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "nil error",
|
||||
err: nil,
|
||||
want: "<nil>",
|
||||
},
|
||||
{
|
||||
name: "APIError omits body",
|
||||
err: &APIError{StatusCode: 500, Body: "internal error: database connection failed"},
|
||||
want: "HTTP 500",
|
||||
},
|
||||
{
|
||||
name: "APIError with large body still only shows status",
|
||||
err: &APIError{StatusCode: 502, Body: strings.Repeat("x", 1000)},
|
||||
want: "HTTP 502",
|
||||
},
|
||||
{
|
||||
name: "non-API error preserved",
|
||||
err: fmt.Errorf("connection refused"),
|
||||
want: "connection refused",
|
||||
},
|
||||
{
|
||||
name: "wrapped APIError",
|
||||
err: fmt.Errorf("request failed: %w", &APIError{StatusCode: 503, Body: "service unavailable"}),
|
||||
want: "HTTP 503",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := sanitizeErrorForLog(tt.err)
|
||||
if got != tt.want {
|
||||
t.Errorf("sanitizeErrorForLog() = %q, want %q", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
[NIT]
SetHTTPClientis exported and documented as 'intended for testing'. Exporting test-only methods on production types is generally discouraged — it pollutes the public API and could be misused. Consider using theexport_test.gopattern to expose this only during tests, or making thehttpfield settable via a constructor option.