feat(gitea): add retry logic for 5xx errors #69
@@ -11,9 +11,11 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
@@ -39,12 +41,26 @@ func IsNotFound(err error) bool {
|
||||
return errors.As(err, &apiErr) && apiErr.StatusCode == http.StatusNotFound
|
||||
|
|
||||
}
|
||||
|
||||
// IsServerError reports whether an error is an API 5xx response.
|
||||
func IsServerError(err error) bool {
|
||||
var apiErr *APIError
|
||||
return errors.As(err, &apiErr) && apiErr.StatusCode >= 500 && apiErr.StatusCode < 600
|
||||
}
|
||||
|
||||
// Client interacts with the Gitea API.
|
||||
// A Client is safe for concurrent use by multiple goroutines.
|
||||
type Client struct {
|
||||
baseURL string
|
||||
token string
|
||||
|
sonnet-review-bot
commented
[MINOR] RetryBackoff is an exported field on Client, which is documented as 'safe for concurrent use by multiple goroutines'. However, the comment on RetryBackoff says 'Modifying it while requests are in flight is not safe.' This creates an inconsistency: the type-level doc says concurrent-safe, but this field is an exception. Consider either making RetryBackoff unexported and configuring it only through a constructor option (e.g., a WithRetryBackoff option function or NewClient parameter), or at minimum adding a prominent warning at the type level that RetryBackoff must be configured before first use. **[MINOR]** RetryBackoff is an exported field on Client, which is documented as 'safe for concurrent use by multiple goroutines'. However, the comment on RetryBackoff says 'Modifying it while requests are in flight is not safe.' This creates an inconsistency: the type-level doc says concurrent-safe, but this field is an exception. Consider either making RetryBackoff unexported and configuring it only through a constructor option (e.g., a WithRetryBackoff option function or NewClient parameter), or at minimum adding a prominent warning at the type level that RetryBackoff must be configured before first use.
|
||||
http *http.Client
|
||||
|
||||
|
sonnet-review-bot
commented
[MINOR] RetryBackoff is an exported field on Client, which exposes a testing/configuration hook as part of the public API surface. Per repository conventions and idiomatic Go (nil-opts pattern), this is usable and documented, but it makes the test-configuration concern part of the permanent API contract. A more idiomatic approach would be an unexported field with a package-internal setter, but the current approach is pragmatic and acceptable given the documented intent. **[MINOR]** RetryBackoff is an exported field on Client, which exposes a testing/configuration hook as part of the public API surface. Per repository conventions and idiomatic Go (nil-opts pattern), this is usable and documented, but it makes the test-configuration concern part of the permanent API contract. A more idiomatic approach would be an unexported field with a package-internal setter, but the current approach is pragmatic and acceptable given the documented intent.
sonnet-review-bot
commented
[MINOR] **[MINOR]** `RetryBackoff` is an exported field on `Client`, which violates the concurrency safety guarantee documented on the type: "A Client is safe for concurrent use by multiple goroutines." The field's own comment acknowledges "Modifying it while requests are in flight is not safe", but this creates an exception to the advertised contract. Consider making it unexported and configuring it only through `NewClient` or a constructor option, or at minimum update the concurrency safety doc comment on `Client` to note the exception.
|
||||
// RetryBackoff defines the delays between retry attempts.
|
||||
// RetryBackoff[i] is the delay before attempt i+1 (after attempt i fails).
|
||||
// If nil, defaults to {1s, 2s}. Set to shorter durations in tests.
|
||||
//
|
||||
// This field must be configured before the first request is made.
|
||||
// Modifying it while requests are in flight is not safe.
|
||||
RetryBackoff []time.Duration
|
||||
}
|
||||
|
||||
// NewClient creates a new Gitea API client.
|
||||
@@ -56,6 +72,12 @@ func NewClient(baseURL, token string) *Client {
|
||||
}
|
||||
|
sonnet-review-bot
commented
[NIT] **[NIT]** `SetHTTPClient` is exported and documented as 'intended for testing'. Exporting test-only methods on production types is generally discouraged — it pollutes the public API and could be misused. Consider using the `export_test.go` pattern to expose this only during tests, or making the `http` field settable via a constructor option.
|
||||
}
|
||||
|
||||
// SetHTTPClient sets the underlying HTTP client used for requests.
|
||||
// This is intended for testing to inject mock transports.
|
||||
func (c *Client) SetHTTPClient(hc *http.Client) {
|
||||
c.http = hc
|
||||
}
|
||||
|
||||
// PullRequest holds relevant PR metadata.
|
||||
type PullRequest struct {
|
||||
Title string `json:"title"`
|
||||
@@ -210,24 +232,185 @@ func (c *Client) PostReview(ctx context.Context, owner, repo string, number int,
|
||||
return &review, nil
|
||||
}
|
||||
|
||||
|
gpt-review-bot
commented
[MINOR] isTemporaryNetError treats any net.OpError as retriable, which may cause retries on permanent failures (e.g., no such host). Consider narrowing the check (e.g., prefer net.Error timeouts, DNSError.IsTemporary, or specific syscall errors) to avoid unnecessary retries. **[MINOR]** isTemporaryNetError treats any net.OpError as retriable, which may cause retries on permanent failures (e.g., no such host). Consider narrowing the check (e.g., prefer net.Error timeouts, DNSError.IsTemporary, or specific syscall errors) to avoid unnecessary retries.
|
||||
// isTemporaryNetError reports whether err is a temporary network error worth retrying.
|
||||
// This includes connection refused, network unreachable, connection reset, and DNS
|
||||
|
security-review-bot marked this conversation as resolved
Outdated
[MINOR] On non-2xx responses, the code reads the entire response body with io.ReadAll and stores it in APIError. This can allow a malicious or misbehaving server to force large allocations. Consider limiting the size for error bodies (e.g., io.LimitReader) since the body is only used for error context. **[MINOR]** On non-2xx responses, the code reads the entire response body with io.ReadAll and stores it in APIError. This can allow a malicious or misbehaving server to force large allocations. Consider limiting the size for error bodies (e.g., io.LimitReader) since the body is only used for error context.
|
||||
// timeouts. It explicitly excludes permanent errors like permission denied or
|
||||
// "no such host" DNS failures.
|
||||
func isTemporaryNetError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
|
sonnet-review-bot
commented
[MINOR] Network/transport errors from **[MINOR]** Network/transport errors from `c.http.Do(req)` (e.g., connection refused, DNS failure) are returned immediately without retry. Transient network errors are often just as worth retrying as 5xx responses. This is a design choice, but worth noting — the PR description only mentions 5xx so this is intentional, but it limits the usefulness of the retry in some failure modes.
|
||||
|
||||
// Check for OpError and inspect the underlying syscall error.
|
||||
// Not all OpErrors are transient — permission denied, for example, is permanent.
|
||||
var opErr *net.OpError
|
||||
if errors.As(err, &opErr) {
|
||||
return isRetriableSyscallError(opErr.Err)
|
||||
}
|
||||
|
||||
// DNS errors: only retry on timeout, not on "no such host" which is permanent.
|
||||
var dnsErr *net.DNSError
|
||||
if errors.As(err, &dnsErr) {
|
||||
return dnsErr.IsTimeout
|
||||
}
|
||||
|
||||
// Check for net.Error with Timeout() (Temporary is deprecated)
|
||||
var netErr net.Error
|
||||
if errors.As(err, &netErr) {
|
||||
return netErr.Timeout()
|
||||
|
gpt-review-bot
commented
[NIT] The retry condition duplicates the 5xx check inline. For consistency and reuse, consider using the new IsServerError helper here (errors.As on APIError or check status) to keep logic centralized. **[NIT]** The retry condition duplicates the 5xx check inline. For consistency and reuse, consider using the new IsServerError helper here (errors.As on APIError or check status) to keep logic centralized.
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// isRetriableSyscallError reports whether the underlying error from a net.OpError
|
||||
// is a transient syscall error worth retrying.
|
||||
func isRetriableSyscallError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
|
sonnet-review-bot
commented
[NIT] Network errors from **[NIT]** Network errors from `c.http.Do(req)` are returned immediately without retry. Transient network errors (e.g., connection reset, EOF) are just as likely to be transient as 5xx responses. This is a deliberate design choice and acceptable, but worth a comment explaining why only 5xx is retried and not transport errors.
|
||||
}
|
||||
|
gpt-review-bot
commented
[NIT] Retry log 'attempt' field uses attempt+1 during backoff, which may be interpreted ambiguously. Consider logging the retry number explicitly (e.g., 'retry' count) or the next attempt number for clarity. **[NIT]** Retry log 'attempt' field uses attempt+1 during backoff, which may be interpreted ambiguously. Consider logging the retry number explicitly (e.g., 'retry' count) or the next attempt number for clarity.
|
||||
|
||||
// Check for syscall.Errno directly or wrapped
|
||||
var errno syscall.Errno
|
||||
if errors.As(err, &errno) {
|
||||
switch errno {
|
||||
case syscall.ECONNREFUSED, // connection refused — server not listening
|
||||
|
sonnet-review-bot
commented
[MINOR] In **[MINOR]** In `isTemporaryNetError`, treating all `*net.OpError` as retriable is overly broad. OpError covers connection refused (transient) but also permission denied and other permanent conditions. A more precise check would inspect `opErr.Err` for syscall errors. That said, the comment acknowledges this and the practical impact on the Gitea use-case is low.
|
||||
syscall.ECONNRESET, // connection reset by peer
|
||||
syscall.ENETUNREACH, // network unreachable
|
||||
syscall.EHOSTUNREACH, // host unreachable
|
||||
syscall.ETIMEDOUT: // connection timed out
|
||||
return true
|
||||
default:
|
||||
|
sonnet-review-bot
commented
[NIT] The **[NIT]** The `GetAllFilesInPath_500Propagates` test (already existing) relies on the server always returning 500, but with retry logic now in place, the test server will be hit 3 times before the error propagates. The test still passes (because the server always returns 500), but test execution time increases by ~3ms (with default 0-delay in tests using `RetryBackoff = nil`). Tests for `GetAllFilesInPath` don't set `RetryBackoff`, so they'll use the default 1s/2s delays — but since `httptest.NewServer` is local, the server succeeds immediately and returns 500, so these tests hit the server 3 times with 1s+2s delays... Actually this could make existing integration-style tests significantly slower. Worth verifying that existing tests (not the new retry tests) that hit 500 responses set short `RetryBackoff`, or accept the 3s slowdown.
|
||||
// EACCES, EPERM, etc. are permanent — don't retry
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// If we can't identify the specific syscall error, be conservative and retry.
|
||||
// This handles wrapped errors or platform-specific error types.
|
||||
// The retry count is limited, so erring on the side of retrying is safe.
|
||||
return true
|
||||
}
|
||||
|
||||
// redactURL strips query parameters from a URL for safe logging.
|
||||
// This prevents accidental exposure of sensitive data that future callers
|
||||
|
sonnet-review-bot
commented
[NIT] The comment **[NIT]** The comment `// backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails).` appears both as an inline comment and in the `RetryBackoff` field doc comment. The duplication is harmless but slightly redundant.
|
||||
// might pass via query strings.
|
||||
func redactURL(rawURL string) string {
|
||||
|
sonnet-review-bot
commented
[MINOR] When **[MINOR]** When `delay == 0` (i.e., the backoff slice is shorter than expected or empty), the retry loop silently skips the warning log and delay. This is a valid edge case but could be surprising — if someone sets `RetryBackoff = []time.Duration{}`, all retries happen with zero delay and no logging. The code is correct but slightly implicit; a comment noting this would improve clarity.
|
||||
parsed, err := url.Parse(rawURL)
|
||||
|
sonnet-review-bot
commented
[MINOR] When delay is 0 (either because the backoff slice is exhausted or an explicit zero-duration backoff was provided), the retry happens silently with no log entry. This means if maxAttempts > len(backoff)+1, the extra retry attempts are invisible. A log entry at WARN level would help with observability even for zero-delay retries. The comment 'An empty RetryBackoff slice means retry without delay' is correct but the logging gap could be confusing during production incident investigation. **[MINOR]** When delay is 0 (either because the backoff slice is exhausted or an explicit zero-duration backoff was provided), the retry happens silently with no log entry. This means if maxAttempts > len(backoff)+1, the extra retry attempts are invisible. A log entry at WARN level would help with observability even for zero-delay retries. The comment 'An empty RetryBackoff slice means retry without delay' is correct but the logging gap could be confusing during production incident investigation.
|
||||
if err != nil {
|
||||
// If we cannot parse it, return a safe placeholder rather than
|
||||
|
sonnet-review-bot
commented
[MINOR] When **[MINOR]** When `delay == 0` (because `attempt-1 >= len(backoff)`), the retry proceeds silently without any log message. The WARN log is only emitted when `delay > 0`. In the case where someone sets `RetryBackoff = []time.Duration{}` (empty, not nil), all retries happen silently with no logging at all. This is a minor observability gap — it could be confusing in production if retries happen but nothing is logged. Consider logging even for zero-delay retries, or at least when `attempt > 0`.
|
||||
// potentially logging something sensitive.
|
||||
return "[invalid URL]"
|
||||
}
|
||||
if parsed.RawQuery != "" {
|
||||
parsed.RawQuery = "[redacted]"
|
||||
}
|
||||
|
sonnet-review-bot
commented
[MINOR] The **[MINOR]** The `RetryBackoff` field is exported on `Client`, making it part of the public API surface. This is used as a testing seam, but exporting a mutable slice field on a type documented as 'safe for concurrent use' is a design smell. The field comment itself warns 'Modifying it while requests are in flight is not safe', which contradicts the concurrency safety claim for the type. A cleaner pattern would be to keep it unexported and provide a `WithRetryBackoff([]time.Duration) *Client` constructor option, or accept it only in a constructor. Since it's already exported and the comment addresses the limitation, this is minor but worth noting for future refactors.
|
||||
return parsed.String()
|
||||
}
|
||||
|
||||
|
sonnet-review-bot
commented
[NIT] The log field name **[NIT]** The log field name `lastError` uses camelCase. slog convention (and the existing `slog.Warn` call at line 323) uses lowercase snake-case or single-word keys. Recommend `"last_error"` or `"error"` for consistency with the other log call in this function which uses `"error"`.
|
||||
// sanitizeErrorForLog returns a loggable version of an error that omits
|
||||
// potentially sensitive content like response bodies. For APIError, only
|
||||
// the status code is included; for other errors, the type is preserved.
|
||||
func sanitizeErrorForLog(err error) string {
|
||||
if err == nil {
|
||||
|
[MINOR] Retry warning logs include the error object (lastError) which will render the API error message (truncated to 200 chars) and could expose portions of server error responses in logs. While bodies are size-limited and URLs are redacted, consider the operational sensitivity of logging server error content at WARN level. **[MINOR]** Retry warning logs include the error object (lastError) which will render the API error message (truncated to 200 chars) and could expose portions of server error responses in logs. While bodies are size-limited and URLs are redacted, consider the operational sensitivity of logging server error content at WARN level.
|
||||
return "<nil>"
|
||||
|
sonnet-review-bot
commented
[MINOR] When isTemporaryNetError returns true and we set lastErr = err and continue, the delay logic in the next iteration uses attempt-1 as the backoff index. However, unlike HTTP 5xx errors where lastErr is always set before the delay block, network errors also set lastErr. The code is correct (lastErr will be non-nil for the log message), but the flow could be clearer — when a network error occurs on attempt 2 (the last), the code falls through to 'return nil, err' (not lastErr). This is actually correct behavior (returns the raw error), but it is inconsistent with the HTTP error path which always returns lastErr. Minor readability issue. **[MINOR]** When isTemporaryNetError returns true and we set lastErr = err and continue, the delay logic in the next iteration uses attempt-1 as the backoff index. However, unlike HTTP 5xx errors where lastErr is always set before the delay block, network errors also set lastErr. The code is correct (lastErr will be non-nil for the log message), but the flow could be clearer — when a network error occurs on attempt 2 (the last), the code falls through to 'return nil, err' (not lastErr). This is actually correct behavior (returns the raw error), but it is inconsistent with the HTTP error path which always returns lastErr. Minor readability issue.
|
||||
}
|
||||
|
sonnet-review-bot
commented
[MINOR] Timer leak on zero-delay path: when **[MINOR]** Timer leak on zero-delay path: when `delay == 0` (i.e., `attempt-1 >= len(backoff)` or `backoff` is empty), the code skips the timer block entirely and logs nothing. This is correct behavior. However, the `slog.Warn` log is only emitted when `delay > 0`, meaning silent retries happen with no observability when the backoff slice is shorter than `maxAttempts-1`. Consider logging retries even when delay is zero, or document this intentional behavior in the comment.
|
||||
var apiErr *APIError
|
||||
if errors.As(err, &apiErr) {
|
||||
return fmt.Sprintf("HTTP %d", apiErr.StatusCode)
|
||||
}
|
||||
return err.Error()
|
||||
}
|
||||
|
||||
// doGet performs an HTTP GET request with retry on 5xx errors and temporary
|
||||
// network errors. Retries up to 3 times with exponential backoff (1s, 2s delays
|
||||
// by default; configurable via Client.RetryBackoff for testing).
|
||||
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
const maxAttempts = 3
|
||||
// backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails).
|
||||
// First attempt (i=0) has no delay; retries wait 1s then 2s by default.
|
||||
backoff := c.RetryBackoff
|
||||
if backoff == nil {
|
||||
backoff = []time.Duration{1 * time.Second, 2 * time.Second}
|
||||
}
|
||||
req.Header.Set("Authorization", "token "+c.token)
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
// maxErrorBodyBytes limits how much of an error response body we read
|
||||
// to protect against malicious servers sending unbounded data.
|
||||
const maxErrorBodyBytes = 64 * 1024 // 64 KB
|
||||
|
||||
|
sonnet-review-bot
commented
[MINOR] When **[MINOR]** When `delay == 0` (either because the backoff slice is exhausted or the caller set a zero duration), the retry happens immediately without any log message. This means silent retries occur with no observability when `attempt-1 >= len(backoff)`. The logging block is gated on `delay > 0`, so a zero-delay retry (e.g., `RetryBackoff = []time.Duration{}`) produces no log output at all. Consider logging the retry even at zero delay, or restructuring so the log always fires on retry regardless of delay.
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, &APIError{StatusCode: resp.StatusCode, Body: string(body)}
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < maxAttempts; attempt++ {
|
||||
if attempt > 0 {
|
||||
// Determine delay: use backoff slice if available, otherwise retry immediately.
|
||||
// An empty RetryBackoff slice means "retry without delay" — this is intentional
|
||||
// as the caller explicitly configured no delays.
|
||||
var delay time.Duration
|
||||
if attempt-1 < len(backoff) {
|
||||
delay = backoff[attempt-1]
|
||||
}
|
||||
|
||||
if delay > 0 {
|
||||
slog.Warn("retrying request after error",
|
||||
"attempt", attempt+1,
|
||||
|
sonnet-review-bot
commented
[NIT] The **[NIT]** The `RetryBackoff` field is exported (capital R), which exposes an implementation detail of the retry mechanism as a public API. The doc comment explicitly warns 'Modifying it while requests are in flight is not safe', which means the `Client` concurrency safety documentation ('A Client is safe for concurrent use by multiple goroutines') is now conditionally true. This is a minor design tension worth noting — the concurrency safety claim in the struct comment should probably be qualified, or `RetryBackoff` should only be settable before any request is made (which the doc comment does say, but the type system can't enforce it).
|
||||
"url", redactURL(reqURL),
|
||||
"delay", delay.String(),
|
||||
"lastError", sanitizeErrorForLog(lastErr))
|
||||
|
||||
timer := time.NewTimer(delay)
|
||||
select {
|
||||
case <-timer.C:
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Authorization", "token "+c.token)
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
// Always capture the error for consistent return at loop end.
|
||||
// This ensures both network errors and HTTP 5xx return lastErr.
|
||||
lastErr = err
|
||||
|
||||
// Only retry temporary network errors when attempts remain.
|
||||
if attempt < maxAttempts-1 && isTemporaryNetError(err) {
|
||||
slog.Warn("temporary network error, will retry",
|
||||
"attempt", attempt+1,
|
||||
"url", redactURL(reqURL),
|
||||
"error", err)
|
||||
continue
|
||||
}
|
||||
// Non-retryable network error or final attempt exhausted.
|
||||
return nil, lastErr
|
||||
}
|
||||
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// Error path: limit how much we read from potentially malicious server
|
||||
errBody, _ := io.ReadAll(io.LimitReader(resp.Body, maxErrorBodyBytes))
|
||||
resp.Body.Close()
|
||||
|
||||
lastErr = &APIError{StatusCode: resp.StatusCode, Body: string(errBody)}
|
||||
|
||||
// Only retry on 5xx server errors
|
||||
if resp.StatusCode < 500 || resp.StatusCode >= 600 {
|
||||
return nil, lastErr
|
||||
}
|
||||
}
|
||||
return io.ReadAll(resp.Body)
|
||||
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
// escapePath escapes each segment of a relative file path for use in URLs.
|
||||
@@ -317,9 +500,9 @@ func (c *Client) GetAllFilesInPath(ctx context.Context, owner, repo, path string
|
||||
|
||||
// Review represents a pull request review from the Gitea API.
|
||||
type Review struct {
|
||||
ID int64 `json:"id"`
|
||||
Body string `json:"body"`
|
||||
User struct {
|
||||
ID int64 `json:"id"`
|
||||
Body string `json:"body"`
|
||||
User struct {
|
||||
Login string `json:"login"`
|
||||
} `json:"user"`
|
||||
State string `json:"state"`
|
||||
|
||||
@@ -6,10 +6,14 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestGetPullRequest(t *testing.T) {
|
||||
@@ -584,9 +588,9 @@ func TestGetAllFilesInPath_403Propagates(t *testing.T) {
|
||||
|
||||
func TestIsNotFound(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
err error
|
||||
want bool
|
||||
name string
|
||||
err error
|
||||
want bool
|
||||
}{
|
||||
{"nil error", nil, false},
|
||||
{"non-API error", fmt.Errorf("network timeout"), false},
|
||||
@@ -743,3 +747,347 @@ func TestResolveComment_Error(t *testing.T) {
|
||||
t.Fatal("expected error for 404 response")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsServerError(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
err error
|
||||
want bool
|
||||
}{
|
||||
{"nil error", nil, false},
|
||||
{"non-API error", fmt.Errorf("network timeout"), false},
|
||||
{"404 APIError", &APIError{StatusCode: 404, Body: "not found"}, false},
|
||||
{"500 APIError", &APIError{StatusCode: 500, Body: "server error"}, true},
|
||||
{"502 APIError", &APIError{StatusCode: 502, Body: "bad gateway"}, true},
|
||||
{"503 APIError", &APIError{StatusCode: 503, Body: "unavailable"}, true},
|
||||
{"599 APIError", &APIError{StatusCode: 599, Body: "edge case"}, true},
|
||||
{"600 not server error", &APIError{StatusCode: 600, Body: "edge"}, false},
|
||||
{"400 not server error", &APIError{StatusCode: 400, Body: "bad request"}, false},
|
||||
{"wrapped 500", fmt.Errorf("fetch: %w", &APIError{StatusCode: 500, Body: "err"}), true},
|
||||
{"wrapped 404", fmt.Errorf("fetch: %w", &APIError{StatusCode: 404, Body: "err"}), false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := IsServerError(tt.err)
|
||||
if got != tt.want {
|
||||
t.Errorf("IsServerError(%v) = %v, want %v", tt.err, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoGet_RetriesOn500(t *testing.T) {
|
||||
attempts := 0
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
attempts++
|
||||
if attempts < 3 {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
w.Write([]byte(`{"message":"transient error"}`))
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"data":"success"}`))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL, "test-token")
|
||||
// Use short backoff for fast tests
|
||||
client.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond}
|
||||
|
||||
body, err := client.doGet(context.Background(), server.URL+"/test")
|
||||
if err != nil {
|
||||
t.Fatalf("expected success after retry, got error: %v", err)
|
||||
}
|
||||
if string(body) != `{"data":"success"}` {
|
||||
t.Errorf("body = %q, want %q", string(body), `{"data":"success"}`)
|
||||
}
|
||||
if attempts != 3 {
|
||||
t.Errorf("attempts = %d, want 3", attempts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoGet_FailsAfterMaxRetries(t *testing.T) {
|
||||
attempts := 0
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
attempts++
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
w.Write([]byte(`{"message":"persistent error"}`))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL, "test-token")
|
||||
// Use short backoff for fast tests
|
||||
client.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond}
|
||||
|
||||
_, err := client.doGet(context.Background(), server.URL+"/test")
|
||||
if err == nil {
|
||||
t.Fatal("expected error after max retries")
|
||||
}
|
||||
var apiErr *APIError
|
||||
if !errors.As(err, &apiErr) {
|
||||
t.Fatalf("expected APIError, got: %v", err)
|
||||
}
|
||||
if apiErr.StatusCode != http.StatusInternalServerError {
|
||||
t.Errorf("status = %d, want 500", apiErr.StatusCode)
|
||||
}
|
||||
if attempts != 3 {
|
||||
t.Errorf("attempts = %d, want 3 (max retries)", attempts)
|
||||
|
sonnet-review-bot
commented
[NIT] Double blank line before the mockTransport type declaration. **[NIT]** Double blank line before the mockTransport type declaration.
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoGet_NoRetryOn4xx(t *testing.T) {
|
||||
attempts := 0
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
|
sonnet-review-bot
commented
[NIT] **[NIT]** `TestDoGet_RetriesOn500` and `TestDoGet_FailsAfterMaxRetries` will be slow in CI because the actual 1s and 2s backoff delays fire. The tests don't inject a clock or configurable backoff. Since the convention file notes `go test ./...` must pass, and these tests will take ~3s and ~1s respectively to complete, consider whether the repo has a policy on slow unit tests. This is a common trade-off with time-based tests and not a blocker.
|
||||
attempts++
|
||||
|
sonnet-review-bot
commented
[NIT] Double blank line between **[NIT]** Double blank line between `TestDoGet_RespectsContextCancellation` and the `mockTransport` type definition. Minor formatting issue.
|
||||
w.WriteHeader(http.StatusForbidden)
|
||||
w.Write([]byte(`{"message":"forbidden"}`))
|
||||
|
sonnet-review-bot
commented
[NIT] TestDoGet_RespectsContextCancellation uses time.Sleep(20 * time.Millisecond) to race against a 100ms backoff timer. This is a time-based race and could theoretically flake if the test machine is under heavy load and the goroutine scheduling delays cause the cancel to fire after the backoff completes. Consider using a channel signal from the server handler to know exactly when the first attempt has finished, rather than relying on wall-clock timing. **[NIT]** TestDoGet_RespectsContextCancellation uses time.Sleep(20 * time.Millisecond) to race against a 100ms backoff timer. This is a time-based race and could theoretically flake if the test machine is under heavy load and the goroutine scheduling delays cause the cancel to fire after the backoff completes. Consider using a channel signal from the server handler to know exactly when the first attempt has finished, rather than relying on wall-clock timing.
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL, "test-token")
|
||||
_, err := client.doGet(context.Background(), server.URL+"/test")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for 403")
|
||||
}
|
||||
var apiErr *APIError
|
||||
if !errors.As(err, &apiErr) {
|
||||
t.Fatalf("expected APIError, got: %v", err)
|
||||
}
|
||||
if apiErr.StatusCode != http.StatusForbidden {
|
||||
t.Errorf("status = %d, want 403", apiErr.StatusCode)
|
||||
}
|
||||
|
sonnet-review-bot
commented
[NIT] mockTransport.failCount is declared as int32 with a comment 'atomic' but is accessed with both atomic.AddInt32 and the struct field directly (initial value set in struct literal). The struct literal initialization is safe since it happens before the transport is used, but using atomic.Int32 (a value type from sync/atomic, like attemptsMade) would be more idiomatic and self-documenting. **[NIT]** mockTransport.failCount is declared as int32 with a comment 'atomic' but is accessed with both atomic.AddInt32 and the struct field directly (initial value set in struct literal). The struct literal initialization is safe since it happens before the transport is used, but using atomic.Int32 (a value type from sync/atomic, like attemptsMade) would be more idiomatic and self-documenting.
|
||||
if attempts != 1 {
|
||||
t.Errorf("attempts = %d, want 1 (no retry on 4xx)", attempts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoGet_RespectsContextCancellation(t *testing.T) {
|
||||
attempts := 0
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
attempts++
|
||||
|
sonnet-review-bot
commented
[NIT] **[NIT]** `TestDoGet_RetriesOn500` incurs real 1-second backoff delays (attempt 1→2 waits 1s, attempt 2→3 waits 2s), making the test take ~3 seconds minimum. Consider making the backoff durations configurable on the client (or injectable for tests) to avoid slow tests. The context cancellation test already uses a 50ms sleep which is similarly timing-sensitive.
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
w.Write([]byte(`{"message":"error"}`))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
client := NewClient(server.URL, "test-token")
|
||||
// Use longer backoff to give us time to cancel during the wait
|
||||
client.RetryBackoff = []time.Duration{100 * time.Millisecond, 100 * time.Millisecond}
|
||||
|
||||
// Cancel after first attempt returns and retry begins
|
||||
go func() {
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
cancel()
|
||||
}()
|
||||
|
||||
_, err := client.doGet(ctx, server.URL+"/test")
|
||||
if err == nil {
|
||||
t.Fatal("expected error on context cancellation")
|
||||
}
|
||||
// Should have made 1 attempt, then context cancelled during backoff
|
||||
if attempts != 1 {
|
||||
t.Errorf("attempts = %d, expected 1 before context cancel during backoff", attempts)
|
||||
}
|
||||
}
|
||||
|
sonnet-review-bot
commented
[MINOR] **[MINOR]** `TestDoGet_RetriesOnTemporaryNetError` is a timing-dependent test that explicitly acknowledges it may not verify actual retry behavior ('The request might succeed or fail depending on timing'). It provides almost no guaranteed assertion — `attempts` could be 0 and the test still passes. This test would be more valuable if it used a controlled mechanism (e.g., an atomic counter with a channel to sequence listener startup) rather than relying on `time.Sleep`. As-is, it's essentially a no-op assertion that just verifies the code path compiles.
|
||||
|
||||
|
||||
// mockTransport is a test helper that returns errors for the first N calls,
|
||||
// then delegates to a real server.
|
||||
|
sonnet-review-bot
commented
[MINOR] **[MINOR]** `TestDoGet_RetriesOnTemporaryNetError` is timing-dependent and makes no deterministic assertions — it explicitly says "actual retry behavior depends on timing". The test comment acknowledges this. While the test does exercise the code path, it provides weak guarantees. A more reliable approach would be to use `isTemporaryNetError` unit tests (which are present) and a separate test with a mock/stub transport that returns `syscall.ECONNREFUSED` directly, avoiding the real TCP listener race.
|
||||
type mockTransport struct {
|
||||
failCount int32 // number of failures remaining (atomic)
|
||||
failErr error // error to return on failure
|
||||
realServer *httptest.Server
|
||||
attemptsMade atomic.Int32 // tracks total attempts
|
||||
}
|
||||
|
||||
func (m *mockTransport) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
m.attemptsMade.Add(1)
|
||||
remaining := atomic.AddInt32(&m.failCount, -1)
|
||||
|
sonnet-review-bot
commented
[NIT] **[NIT]** `TestDoGet_RetriesOnTemporaryNetError` has no meaningful assertions — the comment says 'actual retry behavior depends on timing' and the return values are discarded with `_, _`. This test exercises the code path but doesn't verify any postcondition, making it closer to a smoke test. It won't cause flakiness but also won't catch regressions. Consider whether it provides sufficient value or should be replaced with a more deterministic approach (e.g., using a custom http.Transport that fails N times then succeeds).
|
||||
if remaining >= 0 {
|
||||
// Still have failures to return
|
||||
return nil, m.failErr
|
||||
}
|
||||
// Redirect to real server
|
||||
req.URL.Host = m.realServer.Listener.Addr().String()
|
||||
req.URL.Scheme = "http"
|
||||
return http.DefaultTransport.RoundTrip(req)
|
||||
}
|
||||
|
||||
func TestDoGet_RetriesOnTemporaryNetError(t *testing.T) {
|
||||
// Real server that will handle successful requests
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"status":"ok"}`))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
// Mock transport: fail twice with ECONNREFUSED, then succeed
|
||||
mt := &mockTransport{
|
||||
failCount: 2,
|
||||
failErr: &net.OpError{Op: "dial", Net: "tcp", Err: syscall.ECONNREFUSED},
|
||||
realServer: server,
|
||||
}
|
||||
|
||||
client := NewClient("http://fake-host/", "test-token")
|
||||
client.SetHTTPClient(&http.Client{Transport: mt})
|
||||
client.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond}
|
||||
|
||||
body, err := client.doGet(context.Background(), "http://fake-host/test")
|
||||
if err != nil {
|
||||
t.Fatalf("expected success after retries, got error: %v", err)
|
||||
}
|
||||
if string(body) != `{"status":"ok"}` {
|
||||
t.Errorf("body = %q, want %q", string(body), `{"status":"ok"}`)
|
||||
}
|
||||
|
||||
// Should have made exactly 3 attempts: 2 failures + 1 success
|
||||
if got := mt.attemptsMade.Load(); got != 3 {
|
||||
t.Errorf("attempts = %d, want 3 (2 failures + 1 success)", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsTemporaryNetError(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
err error
|
||||
want bool
|
||||
}{
|
||||
{"nil error", nil, false},
|
||||
{"plain error", fmt.Errorf("some error"), false},
|
||||
// OpError with retriable syscall errors
|
||||
{"OpError ECONNREFUSED", &net.OpError{Op: "dial", Err: syscall.ECONNREFUSED}, true},
|
||||
{"OpError ECONNRESET", &net.OpError{Op: "read", Err: syscall.ECONNRESET}, true},
|
||||
{"OpError ENETUNREACH", &net.OpError{Op: "dial", Err: syscall.ENETUNREACH}, true},
|
||||
{"OpError EHOSTUNREACH", &net.OpError{Op: "dial", Err: syscall.EHOSTUNREACH}, true},
|
||||
{"OpError ETIMEDOUT", &net.OpError{Op: "dial", Err: syscall.ETIMEDOUT}, true},
|
||||
// OpError with permanent syscall errors — should NOT retry
|
||||
{"OpError EACCES", &net.OpError{Op: "dial", Err: syscall.EACCES}, false},
|
||||
{"OpError EPERM", &net.OpError{Op: "dial", Err: syscall.EPERM}, false},
|
||||
// OpError with unknown inner error — conservative retry
|
||||
{"OpError unknown inner", &net.OpError{Op: "dial", Err: fmt.Errorf("unknown")}, true},
|
||||
// DNS errors
|
||||
{"DNS timeout", &net.DNSError{IsTimeout: true}, true},
|
||||
{"DNS no such host", &net.DNSError{IsTimeout: false, Name: "bad.host"}, false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := isTemporaryNetError(tt.err)
|
||||
if got != tt.want {
|
||||
t.Errorf("isTemporaryNetError(%v) = %v, want %v", tt.err, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsRetriableSyscallError(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
err error
|
||||
want bool
|
||||
}{
|
||||
{"nil", nil, false},
|
||||
{"ECONNREFUSED", syscall.ECONNREFUSED, true},
|
||||
{"ECONNRESET", syscall.ECONNRESET, true},
|
||||
{"ENETUNREACH", syscall.ENETUNREACH, true},
|
||||
{"EHOSTUNREACH", syscall.EHOSTUNREACH, true},
|
||||
{"ETIMEDOUT", syscall.ETIMEDOUT, true},
|
||||
{"EACCES (permanent)", syscall.EACCES, false},
|
||||
{"EPERM (permanent)", syscall.EPERM, false},
|
||||
{"ENOENT (permanent)", syscall.ENOENT, false},
|
||||
{"unknown error", fmt.Errorf("something"), true}, // conservative retry
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := isRetriableSyscallError(tt.err)
|
||||
if got != tt.want {
|
||||
t.Errorf("isRetriableSyscallError(%v) = %v, want %v", tt.err, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRedactURL(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "no query params",
|
||||
input: "https://gitea.example.com/api/v1/repos/owner/repo/pulls/1",
|
||||
want: "https://gitea.example.com/api/v1/repos/owner/repo/pulls/1",
|
||||
},
|
||||
{
|
||||
name: "with query params - redacts",
|
||||
input: "https://gitea.example.com/api/v1/repos/owner/repo/raw/file?ref=main",
|
||||
want: "https://gitea.example.com/api/v1/repos/owner/repo/raw/file?[redacted]",
|
||||
},
|
||||
{
|
||||
name: "multiple query params",
|
||||
input: "https://example.com/path?token=secret&page=1",
|
||||
want: "https://example.com/path?[redacted]",
|
||||
},
|
||||
{
|
||||
name: "invalid URL",
|
||||
input: "://invalid",
|
||||
want: "[invalid URL]",
|
||||
},
|
||||
{
|
||||
name: "empty string",
|
||||
input: "",
|
||||
want: "",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := redactURL(tt.input)
|
||||
if got != tt.want {
|
||||
t.Errorf("redactURL(%q) = %q, want %q", tt.input, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeErrorForLog(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
err error
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "nil error",
|
||||
err: nil,
|
||||
want: "<nil>",
|
||||
},
|
||||
{
|
||||
name: "APIError omits body",
|
||||
err: &APIError{StatusCode: 500, Body: "internal error: database connection failed"},
|
||||
want: "HTTP 500",
|
||||
},
|
||||
{
|
||||
name: "APIError with large body still only shows status",
|
||||
err: &APIError{StatusCode: 502, Body: strings.Repeat("x", 1000)},
|
||||
want: "HTTP 502",
|
||||
},
|
||||
{
|
||||
name: "non-API error preserved",
|
||||
err: fmt.Errorf("connection refused"),
|
||||
want: "connection refused",
|
||||
},
|
||||
{
|
||||
name: "wrapped APIError",
|
||||
err: fmt.Errorf("request failed: %w", &APIError{StatusCode: 503, Body: "service unavailable"}),
|
||||
want: "HTTP 503",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := sanitizeErrorForLog(tt.err)
|
||||
if got != tt.want {
|
||||
t.Errorf("sanitizeErrorForLog() = %q, want %q", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
[NIT] IsServerError is exported but currently only used in tests; this is fine as a helper, but consider documenting intended external usage or keeping it unexported if not part of the public API.