fix(gitea): improve retry logic precision for net.OpError
PR Ready Gate / clear-labels (pull_request) Successful in 2s
CI / test (pull_request) Successful in 17s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 46s
CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 1m11s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 1m23s

Address review feedback on isTemporaryNetError being too broad:

1. RetryBackoff field: Added doc comment clarifying it must be
   configured before the first request (addresses concurrency concern).

2. isTemporaryNetError: Now inspects the underlying syscall error
   instead of treating all net.OpError as retriable. Only retries on:
   - ECONNREFUSED (connection refused)
   - ECONNRESET (connection reset)
   - ENETUNREACH (network unreachable)
   - EHOSTUNREACH (host unreachable)
   - ETIMEDOUT (connection timed out)

   Permanent errors like EACCES, EPERM are no longer retried.

3. DNS errors: Changed from Temporary() to IsTimeout, since
   "no such host" is permanent and shouldn't be retried.

4. Empty backoff slice: Added comment explaining that retry without
   delay is intentional when caller explicitly configures it.

Addresses MINOR findings from sonnet-review-bot and gpt-review-bot.
This commit is contained in:
Rodin
2026-05-11 04:32:15 -07:00
parent 090ae3848c
commit ac53ecfa5d
2 changed files with 86 additions and 12 deletions
+44 -9
View File
@@ -15,6 +15,7 @@ import (
"net/http" "net/http"
"net/url" "net/url"
"strings" "strings"
"syscall"
"time" "time"
) )
@@ -56,6 +57,9 @@ type Client struct {
// RetryBackoff defines the delays between retry attempts. // RetryBackoff defines the delays between retry attempts.
// RetryBackoff[i] is the delay before attempt i+1 (after attempt i fails). // RetryBackoff[i] is the delay before attempt i+1 (after attempt i fails).
// If nil, defaults to {1s, 2s}. Set to shorter durations in tests. // If nil, defaults to {1s, 2s}. Set to shorter durations in tests.
//
// This field must be configured before the first request is made.
// Modifying it while requests are in flight is not safe.
RetryBackoff []time.Duration RetryBackoff []time.Duration
} }
@@ -223,25 +227,25 @@ func (c *Client) PostReview(ctx context.Context, owner, repo string, number int,
} }
// isTemporaryNetError reports whether err is a temporary network error worth retrying. // isTemporaryNetError reports whether err is a temporary network error worth retrying.
// This includes connection refused, DNS failures, and timeouts that aren't context-based. // This includes connection refused, network unreachable, connection reset, and DNS
// timeouts. It explicitly excludes permanent errors like permission denied or
// "no such host" DNS failures.
func isTemporaryNetError(err error) bool { func isTemporaryNetError(err error) bool {
if err == nil { if err == nil {
return false return false
} }
// Check for common retriable error patterns in the error chain. // Check for OpError and inspect the underlying syscall error.
// Check OpError first since it embeds net.Error, and we want to catch // Not all OpErrors are transient — permission denied, for example, is permanent.
// connection refused, network unreachable, etc. as retriable.
var opErr *net.OpError var opErr *net.OpError
if errors.As(err, &opErr) { if errors.As(err, &opErr) {
// Connection refused, network unreachable, etc. are typically transient return isRetriableSyscallError(opErr.Err)
return true
} }
// DNS errors are often transient // DNS errors: only retry on timeout, not on "no such host" which is permanent.
var dnsErr *net.DNSError var dnsErr *net.DNSError
if errors.As(err, &dnsErr) { if errors.As(err, &dnsErr) {
return dnsErr.Temporary() return dnsErr.IsTimeout
} }
// Check for net.Error with Timeout() (Temporary is deprecated) // Check for net.Error with Timeout() (Temporary is deprecated)
@@ -253,6 +257,35 @@ func isTemporaryNetError(err error) bool {
return false return false
} }
// isRetriableSyscallError reports whether the underlying error from a net.OpError
// is a transient syscall error worth retrying.
func isRetriableSyscallError(err error) bool {
if err == nil {
return false
}
// Check for syscall.Errno directly or wrapped
var errno syscall.Errno
if errors.As(err, &errno) {
switch errno {
case syscall.ECONNREFUSED, // connection refused — server not listening
syscall.ECONNRESET, // connection reset by peer
syscall.ENETUNREACH, // network unreachable
syscall.EHOSTUNREACH, // host unreachable
syscall.ETIMEDOUT: // connection timed out
return true
default:
// EACCES, EPERM, etc. are permanent — don't retry
return false
}
}
// If we can't identify the specific syscall error, be conservative and retry.
// This handles wrapped errors or platform-specific error types.
// The retry count is limited, so erring on the side of retrying is safe.
return true
}
// doGet performs an HTTP GET request with retry on 5xx errors and temporary // doGet performs an HTTP GET request with retry on 5xx errors and temporary
// network errors. Retries up to 3 times with exponential backoff (1s, 2s delays // network errors. Retries up to 3 times with exponential backoff (1s, 2s delays
// by default; configurable via Client.RetryBackoff for testing). // by default; configurable via Client.RetryBackoff for testing).
@@ -272,7 +305,9 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
var lastErr error var lastErr error
for attempt := 0; attempt < maxAttempts; attempt++ { for attempt := 0; attempt < maxAttempts; attempt++ {
if attempt > 0 { if attempt > 0 {
// Determine delay: use backoff slice if available, otherwise no delay // Determine delay: use backoff slice if available, otherwise retry immediately.
// An empty RetryBackoff slice means "retry without delay" — this is intentional
// as the caller explicitly configured no delays.
var delay time.Duration var delay time.Duration
if attempt-1 < len(backoff) { if attempt-1 < len(backoff) {
delay = backoff[attempt-1] delay = backoff[attempt-1]
+42 -3
View File
@@ -10,6 +10,7 @@ import (
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"strings" "strings"
"syscall"
"testing" "testing"
"time" "time"
) )
@@ -943,9 +944,20 @@ func TestIsTemporaryNetError(t *testing.T) {
}{ }{
{"nil error", nil, false}, {"nil error", nil, false},
{"plain error", fmt.Errorf("some error"), false}, {"plain error", fmt.Errorf("some error"), false},
{"OpError", &net.OpError{Op: "dial", Err: fmt.Errorf("connection refused")}, true}, // OpError with retriable syscall errors
{"temporary DNSError", &net.DNSError{IsTemporary: true}, true}, {"OpError ECONNREFUSED", &net.OpError{Op: "dial", Err: syscall.ECONNREFUSED}, true},
{"non-temporary DNSError", &net.DNSError{IsTemporary: false}, false}, {"OpError ECONNRESET", &net.OpError{Op: "read", Err: syscall.ECONNRESET}, true},
{"OpError ENETUNREACH", &net.OpError{Op: "dial", Err: syscall.ENETUNREACH}, true},
{"OpError EHOSTUNREACH", &net.OpError{Op: "dial", Err: syscall.EHOSTUNREACH}, true},
{"OpError ETIMEDOUT", &net.OpError{Op: "dial", Err: syscall.ETIMEDOUT}, true},
// OpError with permanent syscall errors — should NOT retry
{"OpError EACCES", &net.OpError{Op: "dial", Err: syscall.EACCES}, false},
{"OpError EPERM", &net.OpError{Op: "dial", Err: syscall.EPERM}, false},
// OpError with unknown inner error — conservative retry
{"OpError unknown inner", &net.OpError{Op: "dial", Err: fmt.Errorf("unknown")}, true},
// DNS errors
{"DNS timeout", &net.DNSError{IsTimeout: true}, true},
{"DNS no such host", &net.DNSError{IsTimeout: false, Name: "bad.host"}, false},
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
@@ -956,3 +968,30 @@ func TestIsTemporaryNetError(t *testing.T) {
}) })
} }
} }
func TestIsRetriableSyscallError(t *testing.T) {
tests := []struct {
name string
err error
want bool
}{
{"nil", nil, false},
{"ECONNREFUSED", syscall.ECONNREFUSED, true},
{"ECONNRESET", syscall.ECONNRESET, true},
{"ENETUNREACH", syscall.ENETUNREACH, true},
{"EHOSTUNREACH", syscall.EHOSTUNREACH, true},
{"ETIMEDOUT", syscall.ETIMEDOUT, true},
{"EACCES (permanent)", syscall.EACCES, false},
{"EPERM (permanent)", syscall.EPERM, false},
{"ENOENT (permanent)", syscall.ENOENT, false},
{"unknown error", fmt.Errorf("something"), true}, // conservative retry
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := isRetriableSyscallError(tt.err)
if got != tt.want {
t.Errorf("isRetriableSyscallError(%v) = %v, want %v", tt.err, got, tt.want)
}
})
}
}