feat(gitea): add retry logic for 5xx errors #69
@@ -286,6 +286,22 @@ func isRetriableSyscallError(err error) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// redactURL strips query parameters from a URL for safe logging.
|
||||
// This prevents accidental exposure of sensitive data that future callers
|
||||
// might pass via query strings.
|
||||
func redactURL(rawURL string) string {
|
||||
parsed, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
// If we cannot parse it, return a safe placeholder rather than
|
||||
// potentially logging something sensitive.
|
||||
|
|
||||
return "[invalid URL]"
|
||||
}
|
||||
|
sonnet-review-bot
commented
[MINOR] When **[MINOR]** When `delay == 0` (i.e., the backoff slice is shorter than expected or empty), the retry loop silently skips the warning log and delay. This is a valid edge case but could be surprising — if someone sets `RetryBackoff = []time.Duration{}`, all retries happen with zero delay and no logging. The code is correct but slightly implicit; a comment noting this would improve clarity.
|
||||
if parsed.RawQuery != "" {
|
||||
|
sonnet-review-bot
commented
[MINOR] When delay is 0 (either because the backoff slice is exhausted or an explicit zero-duration backoff was provided), the retry happens silently with no log entry. This means if maxAttempts > len(backoff)+1, the extra retry attempts are invisible. A log entry at WARN level would help with observability even for zero-delay retries. The comment 'An empty RetryBackoff slice means retry without delay' is correct but the logging gap could be confusing during production incident investigation. **[MINOR]** When delay is 0 (either because the backoff slice is exhausted or an explicit zero-duration backoff was provided), the retry happens silently with no log entry. This means if maxAttempts > len(backoff)+1, the extra retry attempts are invisible. A log entry at WARN level would help with observability even for zero-delay retries. The comment 'An empty RetryBackoff slice means retry without delay' is correct but the logging gap could be confusing during production incident investigation.
|
||||
parsed.RawQuery = "[redacted]"
|
||||
}
|
||||
|
sonnet-review-bot
commented
[MINOR] When **[MINOR]** When `delay == 0` (because `attempt-1 >= len(backoff)`), the retry proceeds silently without any log message. The WARN log is only emitted when `delay > 0`. In the case where someone sets `RetryBackoff = []time.Duration{}` (empty, not nil), all retries happen silently with no logging at all. This is a minor observability gap — it could be confusing in production if retries happen but nothing is logged. Consider logging even for zero-delay retries, or at least when `attempt > 0`.
|
||||
return parsed.String()
|
||||
}
|
||||
|
||||
// doGet performs an HTTP GET request with retry on 5xx errors and temporary
|
||||
// network errors. Retries up to 3 times with exponential backoff (1s, 2s delays
|
||||
// by default; configurable via Client.RetryBackoff for testing).
|
||||
|
sonnet-review-bot
commented
[MINOR] The **[MINOR]** The `RetryBackoff` field is exported on `Client`, making it part of the public API surface. This is used as a testing seam, but exporting a mutable slice field on a type documented as 'safe for concurrent use' is a design smell. The field comment itself warns 'Modifying it while requests are in flight is not safe', which contradicts the concurrency safety claim for the type. A cleaner pattern would be to keep it unexported and provide a `WithRetryBackoff([]time.Duration) *Client` constructor option, or accept it only in a constructor. Since it's already exported and the comment addresses the limitation, this is minor but worth noting for future refactors.
|
||||
@@ -316,7 +332,7 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
||||
if delay > 0 {
|
||||
slog.Warn("retrying request after error",
|
||||
"attempt", attempt+1,
|
||||
"url", reqURL,
|
||||
"url", redactURL(reqURL),
|
||||
"delay", delay.String(),
|
||||
"lastError", lastErr)
|
||||
|
||||
@@ -343,7 +359,7 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
||||
if attempt < maxAttempts-1 && isTemporaryNetError(err) {
|
||||
slog.Warn("temporary network error, will retry",
|
||||
"attempt", attempt+1,
|
||||
"url", reqURL,
|
||||
"url", redactURL(reqURL),
|
||||
"error", err)
|
||||
lastErr = err
|
||||
continue
|
||||
|
||||
@@ -995,3 +995,45 @@ func TestIsRetriableSyscallError(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRedactURL(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "no query params",
|
||||
input: "https://gitea.example.com/api/v1/repos/owner/repo/pulls/1",
|
||||
want: "https://gitea.example.com/api/v1/repos/owner/repo/pulls/1",
|
||||
},
|
||||
{
|
||||
name: "with query params - redacts",
|
||||
input: "https://gitea.example.com/api/v1/repos/owner/repo/raw/file?ref=main",
|
||||
want: "https://gitea.example.com/api/v1/repos/owner/repo/raw/file?[redacted]",
|
||||
},
|
||||
{
|
||||
name: "multiple query params",
|
||||
input: "https://example.com/path?token=secret&page=1",
|
||||
want: "https://example.com/path?[redacted]",
|
||||
},
|
||||
{
|
||||
name: "invalid URL",
|
||||
input: "://invalid",
|
||||
want: "[invalid URL]",
|
||||
},
|
||||
{
|
||||
name: "empty string",
|
||||
input: "",
|
||||
want: "",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := redactURL(tt.input)
|
||||
if got != tt.want {
|
||||
t.Errorf("redactURL(%q) = %q, want %q", tt.input, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
[NIT] The comment
// backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails).appears both as an inline comment and in theRetryBackofffield doc comment. The duplication is harmless but slightly redundant.