refactor(gitea): eliminate retry duplication, harden redactURL and MaxInt64 edge case
PR Ready Gate / clear-labels (pull_request) Successful in 2s
CI / test (pull_request) Successful in 24s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 43s
CI / review (gpt-5, security, ., rodin/security-patterns, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 1m4s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 1m57s

- Extract doGetWithReader to share retry/backoff logic between doGet and
  doGetLimited, eliminating ~60 lines of duplicated code (addresses MINOR
  finding from all reviewers).
- redactURL now strips userinfo credentials (user:pass@host) in addition
  to query parameters (addresses security-review-bot finding).
- GetPullRequestDiff treats MaxDiffSize == math.MaxInt64 as disabled,
  preventing the silent enforcement bypass where the overflow clamp makes
  the size check unreachable (addresses security-review-bot finding).
- Improved error message wording: 'response exceeds N bytes' (NIT fix).
This commit is contained in:
claw
2026-05-13 05:40:49 -07:00
parent 84ac50a8cf
commit 1f0636e140
3 changed files with 66 additions and 94 deletions
+44 -94
View File
@@ -70,7 +70,8 @@ type Client struct {
RetryBackoff []time.Duration RetryBackoff []time.Duration
// MaxDiffSize is the maximum number of bytes allowed when fetching a PR diff. // MaxDiffSize is the maximum number of bytes allowed when fetching a PR diff.
// If zero, defaults to DefaultMaxDiffSize (10 MB). Set to any negative value to disable the limit. // If zero, defaults to DefaultMaxDiffSize (10 MB). Set to any negative value
// (or math.MaxInt64) to disable the limit.
// //
// This field must be configured before the first request is made. // This field must be configured before the first request is made.
// Modifying it while requests are in flight is not safe. // Modifying it while requests are in flight is not safe.
@@ -149,8 +150,10 @@ func (c *Client) GetPullRequestDiff(ctx context.Context, owner, repo string, num
maxSize = DefaultMaxDiffSize maxSize = DefaultMaxDiffSize
} }
// When the limit is disabled, use the standard doGet path. // When the limit is disabled (negative) or set to math.MaxInt64 (which
if maxSize < 0 { // would overflow the +1 detection and silently disable enforcement),
// use the standard unlimited doGet path.
if maxSize < 0 || maxSize == math.MaxInt64 {
body, err := c.doGet(ctx, reqURL) body, err := c.doGet(ctx, reqURL)
if err != nil { if err != nil {
return "", fmt.Errorf("fetch diff: %w", err) return "", fmt.Errorf("fetch diff: %w", err)
@@ -323,9 +326,9 @@ func isRetriableSyscallError(err error) bool {
return true return true
} }
// redactURL strips query parameters from a URL for safe logging. // redactURL strips query parameters and userinfo credentials from a URL for
// This prevents accidental exposure of sensitive data that future callers // safe logging. This prevents accidental exposure of sensitive data (tokens in
// might pass via query strings. // query strings, or user:pass in the authority) in log output.
func redactURL(rawURL string) string { func redactURL(rawURL string) string {
parsed, err := url.Parse(rawURL) parsed, err := url.Parse(rawURL)
if err != nil { if err != nil {
@@ -333,6 +336,9 @@ func redactURL(rawURL string) string {
// potentially logging something sensitive. // potentially logging something sensitive.
return "[invalid URL]" return "[invalid URL]"
} }
if parsed.User != nil {
parsed.User = url.User("REDACTED")
}
if parsed.RawQuery != "" { if parsed.RawQuery != "" {
parsed.RawQuery = "[redacted]" parsed.RawQuery = "[redacted]"
} }
@@ -353,10 +359,12 @@ func sanitizeErrorForLog(err error) string {
return err.Error() return err.Error()
} }
// doGet performs an HTTP GET request with retry on 5xx errors and temporary // doGetWithReader performs an HTTP GET request with retry on 5xx errors and
// network errors. Retries up to 3 times with exponential backoff (1s, 2s delays // temporary network errors. Retries up to 3 times with exponential backoff
// by default; configurable via Client.RetryBackoff for testing). // (1s, 2s delays by default; configurable via Client.RetryBackoff for testing).
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { // The readBody function is called with the response body on success (2xx) and
// is responsible for reading and closing it.
func (c *Client) doGetWithReader(ctx context.Context, reqURL string, readBody func(io.ReadCloser) ([]byte, error)) ([]byte, error) {
const maxAttempts = 3 const maxAttempts = 3
// backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails). // backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails).
// First attempt (i=0) has no delay; retries wait 1s then 2s by default. // First attempt (i=0) has no delay; retries wait 1s then 2s by default.
@@ -421,12 +429,7 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
return nil, lastErr return nil, lastErr
} }
if resp.StatusCode >= 200 && resp.StatusCode < 300 { if resp.StatusCode >= 200 && resp.StatusCode < 300 {
body, err := io.ReadAll(resp.Body) return readBody(resp.Body)
resp.Body.Close()
if err != nil {
return nil, err
}
return body, nil
} }
// Error path: limit how much we read from potentially malicious server // Error path: limit how much we read from potentially malicious server
@@ -444,90 +447,37 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
return nil, lastErr return nil, lastErr
} }
// doGetLimited performs an HTTP GET request with retry (like doGet) but enforces // doGet performs an HTTP GET request with retry, reading the full response body.
// a maximum response body size. Returns ErrDiffTooLarge if the response exceeds func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
// maxBytes. It reads maxBytes+1 (clamped to avoid overflow) to detect truncation return c.doGetWithReader(ctx, reqURL, func(body io.ReadCloser) ([]byte, error) {
// without buffering the entire body. defer body.Close()
return io.ReadAll(body)
})
}
// doGetLimited performs an HTTP GET request with retry but enforces a maximum
// response body size. Returns ErrDiffTooLarge if the response exceeds maxBytes.
// It reads maxBytes+1 (clamped to avoid overflow) to detect truncation without
// buffering the entire body.
func (c *Client) doGetLimited(ctx context.Context, reqURL string, maxBytes int64) ([]byte, error) { func (c *Client) doGetLimited(ctx context.Context, reqURL string, maxBytes int64) ([]byte, error) {
const maxAttempts = 3 return c.doGetWithReader(ctx, reqURL, func(body io.ReadCloser) ([]byte, error) {
backoff := c.RetryBackoff defer body.Close()
if backoff == nil { // Read up to maxBytes+1 to detect overflow.
backoff = []time.Duration{1 * time.Second, 2 * time.Second} // Clamp to prevent integer overflow when maxBytes == math.MaxInt64.
} limitBytes := maxBytes + 1
const maxErrorBodyBytes = 64 * 1024 if limitBytes <= 0 {
limitBytes = math.MaxInt64
var lastErr error
for attempt := 0; attempt < maxAttempts; attempt++ {
if attempt > 0 {
var delay time.Duration
if attempt-1 < len(backoff) {
delay = backoff[attempt-1]
}
if delay > 0 {
slog.Warn("retrying request after error",
"attempt", attempt+1,
"url", redactURL(reqURL),
"delay", delay.String(),
"lastError", sanitizeErrorForLog(lastErr))
timer := time.NewTimer(delay)
select {
case <-timer.C:
case <-ctx.Done():
timer.Stop()
return nil, ctx.Err()
}
}
} }
limited := io.LimitReader(body, limitBytes)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) data, err := io.ReadAll(limited)
if err != nil { if err != nil {
return nil, err return nil, err
} }
req.Header.Set("Authorization", "token "+c.token) if int64(len(data)) > maxBytes {
return nil, fmt.Errorf("%w: response exceeds %d bytes", ErrDiffTooLarge, maxBytes)
resp, err := c.http.Do(req)
if err != nil {
lastErr = err
if attempt < maxAttempts-1 && isTemporaryNetError(err) {
slog.Warn("temporary network error, will retry",
"attempt", attempt+1,
"url", redactURL(reqURL),
"error", err)
continue
}
return nil, lastErr
} }
if resp.StatusCode >= 200 && resp.StatusCode < 300 { return data, nil
// Read up to maxBytes+1 to detect overflow. })
// Clamp to prevent integer overflow when maxBytes == math.MaxInt64.
limitBytes := maxBytes + 1
if limitBytes <= 0 {
limitBytes = math.MaxInt64
}
limited := io.LimitReader(resp.Body, limitBytes)
body, err := io.ReadAll(limited)
resp.Body.Close()
if err != nil {
return nil, err
}
if int64(len(body)) > maxBytes {
return nil, fmt.Errorf("%w: response is larger than %d bytes", ErrDiffTooLarge, maxBytes)
}
return body, nil
}
errBody, _ := io.ReadAll(io.LimitReader(resp.Body, maxErrorBodyBytes))
resp.Body.Close()
lastErr = &APIError{StatusCode: resp.StatusCode, Body: string(errBody)}
if resp.StatusCode < 500 || resp.StatusCode >= 600 {
return nil, lastErr
}
}
return nil, lastErr
} }
// escapePath escapes each segment of a relative file path for use in URLs. // escapePath escapes each segment of a relative file path for use in URLs.
+15
View File
@@ -1092,6 +1092,21 @@ func TestRedactURL(t *testing.T) {
input: "", input: "",
want: "", want: "",
}, },
{
name: "with userinfo - redacts credentials",
input: "https://admin:secret@gitea.example.com/api/v1/repos",
want: "https://REDACTED@gitea.example.com/api/v1/repos",
},
{
name: "with userinfo and query params",
input: "https://user:pass@example.com/path?token=abc",
want: "https://REDACTED@example.com/path?[redacted]",
},
{
name: "username only - no password",
input: "https://user@example.com/path",
want: "https://REDACTED@example.com/path",
},
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
+7
View File
@@ -3,6 +3,7 @@ package gitea
import ( import (
"context" "context"
"errors" "errors"
"math"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"strings" "strings"
@@ -48,6 +49,12 @@ func TestGetPullRequestDiff_SizeLimits(t *testing.T) {
maxDiffSize: -1, maxDiffSize: -1,
wantDiff: strings.Repeat("x", 10000), wantDiff: strings.Repeat("x", 10000),
}, },
{
name: "math.MaxInt64 treated as disabled",
diff: strings.Repeat("x", 10000),
maxDiffSize: math.MaxInt64,
wantDiff: strings.Repeat("x", 10000),
},
{ {
name: "default limit", name: "default limit",
diff: "diff content", diff: "diff content",