feat(gitea): add retry logic for 5xx errors #69
@@ -215,23 +215,32 @@ func (c *Client) PostReview(ctx context.Context, owner, repo string, number int,
|
||||
}
|
||||
|
|
||||
return &review, nil
|
||||
}
|
||||
|
||||
// doGet performs an HTTP GET request with retry on 5xx errors.
|
||||
// Retries up to 3 times with exponential backoff (1s, 2s delays).
|
||||
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
||||
const maxAttempts = 3
|
||||
backoff := []time.Duration{0, 1 * time.Second, 2 * time.Second}
|
||||
// backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails).
|
||||
// First attempt (i=0) has no delay; retries wait 1s then 2s.
|
||||
backoff := []time.Duration{1 * time.Second, 2 * time.Second}
|
||||
|
||||
// maxErrorBodyBytes limits how much of an error response body we read
|
||||
|
[MINOR] Retry warnings log the full request URL and the last error (which may include server-provided body text). While bodies are truncated and URLs here do not include auth, logging full URLs or server error content can inadvertently leak sensitive query parameters or details if future callers pass sensitive data in query strings. Consider redacting query parameters and limiting error detail in logs. **[MINOR]** Retry warnings log the full request URL and the last error (which may include server-provided body text). While bodies are truncated and URLs here do not include auth, logging full URLs or server error content can inadvertently leak sensitive query parameters or details if future callers pass sensitive data in query strings. Consider redacting query parameters and limiting error detail in logs.
[NIT] isRetriableSyscallError returns true for unknown underlying errors, causing retries even on potentially permanent failures. This is bounded and not a security issue, but could slightly increase request attempts against misconfigured endpoints. Consider restricting retries to known transient error classes. **[NIT]** isRetriableSyscallError returns true for unknown underlying errors, causing retries even on potentially permanent failures. This is bounded and not a security issue, but could slightly increase request attempts against misconfigured endpoints. Consider restricting retries to known transient error classes.
gpt-review-bot
commented
[NIT] Consider adding jitter to the retry backoff to avoid synchronized retries across multiple clients when the server experiences transient issues. **[NIT]** Consider adding jitter to the retry backoff to avoid synchronized retries across multiple clients when the server experiences transient issues.
|
||||
// to protect against malicious servers sending unbounded data.
|
||||
const maxErrorBodyBytes = 64 * 1024 // 64 KB
|
||||
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < maxAttempts; attempt++ {
|
||||
if attempt > 0 {
|
||||
delay := backoff[attempt-1]
|
||||
slog.Warn("retrying request after server error",
|
||||
|
gpt-review-bot
commented
[MINOR] isTemporaryNetError treats any net.OpError as retriable, which may cause retries on permanent failures (e.g., no such host). Consider narrowing the check (e.g., prefer net.Error timeouts, DNSError.IsTemporary, or specific syscall errors) to avoid unnecessary retries. **[MINOR]** isTemporaryNetError treats any net.OpError as retriable, which may cause retries on permanent failures (e.g., no such host). Consider narrowing the check (e.g., prefer net.Error timeouts, DNSError.IsTemporary, or specific syscall errors) to avoid unnecessary retries.
|
||||
"attempt", attempt+1,
|
||||
"url", reqURL,
|
||||
"delay", backoff[attempt].String())
|
||||
"delay", delay.String())
|
||||
|
||||
timer := time.NewTimer(delay)
|
||||
select {
|
||||
case <-time.After(backoff[attempt]):
|
||||
case <-timer.C:
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
}
|
||||
@@ -247,17 +256,20 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
body, readErr := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
||||
if readErr != nil {
|
||||
return nil, readErr
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return body, nil
|
||||
}
|
||||
|
||||
lastErr = &APIError{StatusCode: resp.StatusCode, Body: string(body)}
|
||||
// Error path: limit how much we read from potentially malicious server
|
||||
errBody, _ := io.ReadAll(io.LimitReader(resp.Body, maxErrorBodyBytes))
|
||||
resp.Body.Close()
|
||||
|
||||
lastErr = &APIError{StatusCode: resp.StatusCode, Body: string(errBody)}
|
||||
|
||||
// Only retry on 5xx server errors
|
||||
if resp.StatusCode < 500 || resp.StatusCode >= 600 {
|
||||
|
||||
[MINOR] Hard-coded backoff durations (1s, 2s) make unit tests slow and can increase overall test suite runtime. Consider making backoff configurable (e.g., via a field on Client or a helper function) so tests can use shorter delays while production keeps the intended values.
[MINOR] doGet only retries on 5xx responses; transient transport-level errors from c.http.Do (e.g., temporary network failures) return immediately without retry. If desired, consider retrying on temporary network errors (e.g., timeouts) to further improve resilience.