diff --git a/gitea/client.go b/gitea/client.go index 75dcf64..3ca101c 100644 --- a/gitea/client.go +++ b/gitea/client.go @@ -15,6 +15,7 @@ import ( "net/http" "net/url" "strings" + "syscall" "time" ) @@ -56,6 +57,9 @@ type Client struct { // RetryBackoff defines the delays between retry attempts. // RetryBackoff[i] is the delay before attempt i+1 (after attempt i fails). // If nil, defaults to {1s, 2s}. Set to shorter durations in tests. + // + // This field must be configured before the first request is made. + // Modifying it while requests are in flight is not safe. RetryBackoff []time.Duration } @@ -223,25 +227,25 @@ func (c *Client) PostReview(ctx context.Context, owner, repo string, number int, } // isTemporaryNetError reports whether err is a temporary network error worth retrying. -// This includes connection refused, DNS failures, and timeouts that aren't context-based. +// This includes connection refused, network unreachable, connection reset, and DNS +// timeouts. It explicitly excludes permanent errors like permission denied or +// "no such host" DNS failures. func isTemporaryNetError(err error) bool { if err == nil { return false } - // Check for common retriable error patterns in the error chain. - // Check OpError first since it embeds net.Error, and we want to catch - // connection refused, network unreachable, etc. as retriable. + // Check for OpError and inspect the underlying syscall error. + // Not all OpErrors are transient — permission denied, for example, is permanent. var opErr *net.OpError if errors.As(err, &opErr) { - // Connection refused, network unreachable, etc. are typically transient - return true + return isRetriableSyscallError(opErr.Err) } - // DNS errors are often transient + // DNS errors: only retry on timeout, not on "no such host" which is permanent. var dnsErr *net.DNSError if errors.As(err, &dnsErr) { - return dnsErr.Temporary() + return dnsErr.IsTimeout } // Check for net.Error with Timeout() (Temporary is deprecated) @@ -253,6 +257,35 @@ func isTemporaryNetError(err error) bool { return false } +// isRetriableSyscallError reports whether the underlying error from a net.OpError +// is a transient syscall error worth retrying. +func isRetriableSyscallError(err error) bool { + if err == nil { + return false + } + + // Check for syscall.Errno directly or wrapped + var errno syscall.Errno + if errors.As(err, &errno) { + switch errno { + case syscall.ECONNREFUSED, // connection refused — server not listening + syscall.ECONNRESET, // connection reset by peer + syscall.ENETUNREACH, // network unreachable + syscall.EHOSTUNREACH, // host unreachable + syscall.ETIMEDOUT: // connection timed out + return true + default: + // EACCES, EPERM, etc. are permanent — don't retry + return false + } + } + + // If we can't identify the specific syscall error, be conservative and retry. + // This handles wrapped errors or platform-specific error types. + // The retry count is limited, so erring on the side of retrying is safe. + return true +} + // doGet performs an HTTP GET request with retry on 5xx errors and temporary // network errors. Retries up to 3 times with exponential backoff (1s, 2s delays // by default; configurable via Client.RetryBackoff for testing). @@ -272,7 +305,9 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { var lastErr error for attempt := 0; attempt < maxAttempts; attempt++ { if attempt > 0 { - // Determine delay: use backoff slice if available, otherwise no delay + // Determine delay: use backoff slice if available, otherwise retry immediately. + // An empty RetryBackoff slice means "retry without delay" — this is intentional + // as the caller explicitly configured no delays. var delay time.Duration if attempt-1 < len(backoff) { delay = backoff[attempt-1] diff --git a/gitea/client_test.go b/gitea/client_test.go index f7eddd7..dc747fa 100644 --- a/gitea/client_test.go +++ b/gitea/client_test.go @@ -10,6 +10,7 @@ import ( "net/http" "net/http/httptest" "strings" + "syscall" "testing" "time" ) @@ -943,9 +944,20 @@ func TestIsTemporaryNetError(t *testing.T) { }{ {"nil error", nil, false}, {"plain error", fmt.Errorf("some error"), false}, - {"OpError", &net.OpError{Op: "dial", Err: fmt.Errorf("connection refused")}, true}, - {"temporary DNSError", &net.DNSError{IsTemporary: true}, true}, - {"non-temporary DNSError", &net.DNSError{IsTemporary: false}, false}, + // OpError with retriable syscall errors + {"OpError ECONNREFUSED", &net.OpError{Op: "dial", Err: syscall.ECONNREFUSED}, true}, + {"OpError ECONNRESET", &net.OpError{Op: "read", Err: syscall.ECONNRESET}, true}, + {"OpError ENETUNREACH", &net.OpError{Op: "dial", Err: syscall.ENETUNREACH}, true}, + {"OpError EHOSTUNREACH", &net.OpError{Op: "dial", Err: syscall.EHOSTUNREACH}, true}, + {"OpError ETIMEDOUT", &net.OpError{Op: "dial", Err: syscall.ETIMEDOUT}, true}, + // OpError with permanent syscall errors — should NOT retry + {"OpError EACCES", &net.OpError{Op: "dial", Err: syscall.EACCES}, false}, + {"OpError EPERM", &net.OpError{Op: "dial", Err: syscall.EPERM}, false}, + // OpError with unknown inner error — conservative retry + {"OpError unknown inner", &net.OpError{Op: "dial", Err: fmt.Errorf("unknown")}, true}, + // DNS errors + {"DNS timeout", &net.DNSError{IsTimeout: true}, true}, + {"DNS no such host", &net.DNSError{IsTimeout: false, Name: "bad.host"}, false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -956,3 +968,30 @@ func TestIsTemporaryNetError(t *testing.T) { }) } } + +func TestIsRetriableSyscallError(t *testing.T) { + tests := []struct { + name string + err error + want bool + }{ + {"nil", nil, false}, + {"ECONNREFUSED", syscall.ECONNREFUSED, true}, + {"ECONNRESET", syscall.ECONNRESET, true}, + {"ENETUNREACH", syscall.ENETUNREACH, true}, + {"EHOSTUNREACH", syscall.EHOSTUNREACH, true}, + {"ETIMEDOUT", syscall.ETIMEDOUT, true}, + {"EACCES (permanent)", syscall.EACCES, false}, + {"EPERM (permanent)", syscall.EPERM, false}, + {"ENOENT (permanent)", syscall.ENOENT, false}, + {"unknown error", fmt.Errorf("something"), true}, // conservative retry + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isRetriableSyscallError(tt.err) + if got != tt.want { + t.Errorf("isRetriableSyscallError(%v) = %v, want %v", tt.err, got, tt.want) + } + }) + } +}