feat(gitea): add retry logic for 5xx errors #69

Merged
aweiker merged 7 commits from issue-68 into main 2026-05-11 12:59:50 +00:00
2 changed files with 60 additions and 2 deletions
Showing only changes of commit ecfbfddc7c - Show all commits
+18 -2
View File
24
@@ -286,6 +286,22 @@ func isRetriableSyscallError(err error) bool {
return true
}
// redactURL strips query parameters from a URL for safe logging.
// This prevents accidental exposure of sensitive data that future callers
// might pass via query strings.
func redactURL(rawURL string) string {
parsed, err := url.Parse(rawURL)
if err != nil {
// If we cannot parse it, return a safe placeholder rather than
// potentially logging something sensitive.
Review

[NIT] The comment // backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails). appears both as an inline comment and in the RetryBackoff field doc comment. The duplication is harmless but slightly redundant.

**[NIT]** The comment `// backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails).` appears both as an inline comment and in the `RetryBackoff` field doc comment. The duplication is harmless but slightly redundant.
return "[invalid URL]"
}
Outdated
Review

[MINOR] When delay == 0 (i.e., the backoff slice is shorter than expected or empty), the retry loop silently skips the warning log and delay. This is a valid edge case but could be surprising — if someone sets RetryBackoff = []time.Duration{}, all retries happen with zero delay and no logging. The code is correct but slightly implicit; a comment noting this would improve clarity.

**[MINOR]** When `delay == 0` (i.e., the backoff slice is shorter than expected or empty), the retry loop silently skips the warning log and delay. This is a valid edge case but could be surprising — if someone sets `RetryBackoff = []time.Duration{}`, all retries happen with zero delay and no logging. The code is correct but slightly implicit; a comment noting this would improve clarity.
if parsed.RawQuery != "" {
Review

[MINOR] When delay is 0 (either because the backoff slice is exhausted or an explicit zero-duration backoff was provided), the retry happens silently with no log entry. This means if maxAttempts > len(backoff)+1, the extra retry attempts are invisible. A log entry at WARN level would help with observability even for zero-delay retries. The comment 'An empty RetryBackoff slice means retry without delay' is correct but the logging gap could be confusing during production incident investigation.

**[MINOR]** When delay is 0 (either because the backoff slice is exhausted or an explicit zero-duration backoff was provided), the retry happens silently with no log entry. This means if maxAttempts > len(backoff)+1, the extra retry attempts are invisible. A log entry at WARN level would help with observability even for zero-delay retries. The comment 'An empty RetryBackoff slice means retry without delay' is correct but the logging gap could be confusing during production incident investigation.
parsed.RawQuery = "[redacted]"
}
Outdated
Review

[MINOR] When delay == 0 (because attempt-1 >= len(backoff)), the retry proceeds silently without any log message. The WARN log is only emitted when delay > 0. In the case where someone sets RetryBackoff = []time.Duration{} (empty, not nil), all retries happen silently with no logging at all. This is a minor observability gap — it could be confusing in production if retries happen but nothing is logged. Consider logging even for zero-delay retries, or at least when attempt > 0.

**[MINOR]** When `delay == 0` (because `attempt-1 >= len(backoff)`), the retry proceeds silently without any log message. The WARN log is only emitted when `delay > 0`. In the case where someone sets `RetryBackoff = []time.Duration{}` (empty, not nil), all retries happen silently with no logging at all. This is a minor observability gap — it could be confusing in production if retries happen but nothing is logged. Consider logging even for zero-delay retries, or at least when `attempt > 0`.
return parsed.String()
}
// doGet performs an HTTP GET request with retry on 5xx errors and temporary
// network errors. Retries up to 3 times with exponential backoff (1s, 2s delays
// by default; configurable via Client.RetryBackoff for testing).
Review

[MINOR] The RetryBackoff field is exported on Client, making it part of the public API surface. This is used as a testing seam, but exporting a mutable slice field on a type documented as 'safe for concurrent use' is a design smell. The field comment itself warns 'Modifying it while requests are in flight is not safe', which contradicts the concurrency safety claim for the type. A cleaner pattern would be to keep it unexported and provide a WithRetryBackoff([]time.Duration) *Client constructor option, or accept it only in a constructor. Since it's already exported and the comment addresses the limitation, this is minor but worth noting for future refactors.

**[MINOR]** The `RetryBackoff` field is exported on `Client`, making it part of the public API surface. This is used as a testing seam, but exporting a mutable slice field on a type documented as 'safe for concurrent use' is a design smell. The field comment itself warns 'Modifying it while requests are in flight is not safe', which contradicts the concurrency safety claim for the type. A cleaner pattern would be to keep it unexported and provide a `WithRetryBackoff([]time.Duration) *Client` constructor option, or accept it only in a constructor. Since it's already exported and the comment addresses the limitation, this is minor but worth noting for future refactors.
4
@@ -316,7 +332,7 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
if delay > 0 {
slog.Warn("retrying request after error",
"attempt", attempt+1,
"url", reqURL,
"url", redactURL(reqURL),
"delay", delay.String(),
"lastError", lastErr)
2
@@ -343,7 +359,7 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
if attempt < maxAttempts-1 && isTemporaryNetError(err) {
slog.Warn("temporary network error, will retry",
"attempt", attempt+1,
"url", reqURL,
"url", redactURL(reqURL),
"error", err)
lastErr = err
continue
1
+42
View File
9
@@ -995,3 +995,45 @@ func TestIsRetriableSyscallError(t *testing.T) {
})
}
}
func TestRedactURL(t *testing.T) {
tests := []struct {
name string
input string
want string
}{
{
name: "no query params",
input: "https://gitea.example.com/api/v1/repos/owner/repo/pulls/1",
want: "https://gitea.example.com/api/v1/repos/owner/repo/pulls/1",
},
{
name: "with query params - redacts",
input: "https://gitea.example.com/api/v1/repos/owner/repo/raw/file?ref=main",
want: "https://gitea.example.com/api/v1/repos/owner/repo/raw/file?[redacted]",
},
{
name: "multiple query params",
input: "https://example.com/path?token=secret&page=1",
want: "https://example.com/path?[redacted]",
},
{
name: "invalid URL",
input: "://invalid",
want: "[invalid URL]",
},
{
name: "empty string",
input: "",
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := redactURL(tt.input)
if got != tt.want {
t.Errorf("redactURL(%q) = %q, want %q", tt.input, got, tt.want)
}
})
}
}