fix(gitea): make retry backoff configurable and retry temp net errors
PR Ready Gate / clear-labels (pull_request) Successful in 1s
CI / test (pull_request) Successful in 17s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 34s
CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 45s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 1m34s

Address review feedback:

1. Make backoff delays injectable via Client.RetryBackoff field
   - Defaults to {1s, 2s} when nil for production
   - Tests can set shorter values for fast execution
   - Fixes slow unit tests that previously waited 3+ seconds

2. Add retry on temporary network errors (net.OpError, net.DNSError)
   - Connection refused, network unreachable, DNS failures now retry
   - Non-temporary network errors still fail immediately
   - Context cancellation still respected during backoff

Added isTemporaryNetError helper and TestIsTemporaryNetError test.
Updated existing retry tests to use configurable short backoffs.
This commit is contained in:
Rodin
2026-05-11 04:23:27 -07:00
parent 23da7eedf5
commit 090ae3848c
2 changed files with 164 additions and 26 deletions
+78 -18
View File
@@ -11,6 +11,7 @@ import (
"fmt"
"io"
"log/slog"
"net"
"net/http"
"net/url"
"strings"
@@ -51,6 +52,11 @@ type Client struct {
baseURL string
token string
http *http.Client
// RetryBackoff defines the delays between retry attempts.
// RetryBackoff[i] is the delay before attempt i+1 (after attempt i fails).
// If nil, defaults to {1s, 2s}. Set to shorter durations in tests.
RetryBackoff []time.Duration
}
// NewClient creates a new Gitea API client.
@@ -215,13 +221,49 @@ func (c *Client) PostReview(ctx context.Context, owner, repo string, number int,
}
return &review, nil
}
// doGet performs an HTTP GET request with retry on 5xx errors.
// Retries up to 3 times with exponential backoff (1s, 2s delays).
// isTemporaryNetError reports whether err is a temporary network error worth retrying.
// This includes connection refused, DNS failures, and timeouts that aren't context-based.
func isTemporaryNetError(err error) bool {
if err == nil {
return false
}
// Check for common retriable error patterns in the error chain.
// Check OpError first since it embeds net.Error, and we want to catch
// connection refused, network unreachable, etc. as retriable.
var opErr *net.OpError
if errors.As(err, &opErr) {
// Connection refused, network unreachable, etc. are typically transient
return true
}
// DNS errors are often transient
var dnsErr *net.DNSError
if errors.As(err, &dnsErr) {
return dnsErr.Temporary()
}
// Check for net.Error with Timeout() (Temporary is deprecated)
var netErr net.Error
if errors.As(err, &netErr) {
return netErr.Timeout()
}
return false
}
// doGet performs an HTTP GET request with retry on 5xx errors and temporary
// network errors. Retries up to 3 times with exponential backoff (1s, 2s delays
// by default; configurable via Client.RetryBackoff for testing).
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
const maxAttempts = 3
// backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails).
// First attempt (i=0) has no delay; retries wait 1s then 2s.
backoff := []time.Duration{1 * time.Second, 2 * time.Second}
// First attempt (i=0) has no delay; retries wait 1s then 2s by default.
backoff := c.RetryBackoff
if backoff == nil {
backoff = []time.Duration{1 * time.Second, 2 * time.Second}
}
// maxErrorBodyBytes limits how much of an error response body we read
// to protect against malicious servers sending unbounded data.
@@ -230,18 +272,26 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
var lastErr error
for attempt := 0; attempt < maxAttempts; attempt++ {
if attempt > 0 {
delay := backoff[attempt-1]
slog.Warn("retrying request after server error",
"attempt", attempt+1,
"url", reqURL,
"delay", delay.String())
// Determine delay: use backoff slice if available, otherwise no delay
var delay time.Duration
if attempt-1 < len(backoff) {
delay = backoff[attempt-1]
}
timer := time.NewTimer(delay)
select {
case <-timer.C:
case <-ctx.Done():
timer.Stop()
return nil, ctx.Err()
if delay > 0 {
slog.Warn("retrying request after error",
"attempt", attempt+1,
"url", reqURL,
"delay", delay.String(),
"lastError", lastErr)
timer := time.NewTimer(delay)
select {
case <-timer.C:
case <-ctx.Done():
timer.Stop()
return nil, ctx.Err()
}
}
}
@@ -253,6 +303,16 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
resp, err := c.http.Do(req)
if err != nil {
// Check if this is a temporary/transient network error worth retrying.
// We only retry if there are attempts remaining.
if attempt < maxAttempts-1 && isTemporaryNetError(err) {
slog.Warn("temporary network error, will retry",
"attempt", attempt+1,
"url", reqURL,
"error", err)
lastErr = err
continue
}
return nil, err
}
@@ -367,9 +427,9 @@ func (c *Client) GetAllFilesInPath(ctx context.Context, owner, repo, path string
// Review represents a pull request review from the Gitea API.
type Review struct {
ID int64 `json:"id"`
Body string `json:"body"`
User struct {
ID int64 `json:"id"`
Body string `json:"body"`
User struct {
Login string `json:"login"`
} `json:"user"`
State string `json:"state"`