feat(gitea): harden GetPullRequestDiff against unbounded diff size #109

Merged
aweiker merged 4 commits from review-bot-issue-92 into main 2026-05-13 14:01:22 +00:00
3 changed files with 66 additions and 94 deletions
Showing only changes of commit 49d6ca77a3 - Show all commits
+44 -94
View File
5
@@ -70,7 +70,8 @@ type Client struct {
RetryBackoff []time.Duration
// MaxDiffSize is the maximum number of bytes allowed when fetching a PR diff.
Review

[NIT] The MaxDiffSize field comment suggests using math.MaxInt64 to disable the limit. This dual mechanism (negative and MaxInt64) may confuse users; consider documenting only the negative value as the disable switch.

**[NIT]** The MaxDiffSize field comment suggests using math.MaxInt64 to disable the limit. This dual mechanism (negative and MaxInt64) may confuse users; consider documenting only the negative value as the disable switch.
// If zero, defaults to DefaultMaxDiffSize (10 MB). Set to any negative value to disable the limit.
// If zero, defaults to DefaultMaxDiffSize (10 MB). Set to any negative value
// (or math.MaxInt64) to disable the limit.
//
// This field must be configured before the first request is made.
// Modifying it while requests are in flight is not safe.
2
@@ -149,8 +150,10 @@ func (c *Client) GetPullRequestDiff(ctx context.Context, owner, repo string, num
maxSize = DefaultMaxDiffSize
Review

[MINOR] Special-casing math.MaxInt64 to disable the size limit is unconventional and redundant given the clamp logic in doGetLimited; using any negative value to disable the limit is sufficient and simpler.

**[MINOR]** Special-casing math.MaxInt64 to disable the size limit is unconventional and redundant given the clamp logic in doGetLimited; using any negative value to disable the limit is sufficient and simpler.
}
// When the limit is disabled, use the standard doGet path.
if maxSize < 0 {
// When the limit is disabled (negative) or set to math.MaxInt64 (which
// would overflow the +1 detection and silently disable enforcement),
// use the standard unlimited doGet path.
if maxSize < 0 || maxSize == math.MaxInt64 {
body, err := c.doGet(ctx, reqURL)
if err != nil {
return "", fmt.Errorf("fetch diff: %w", err)
@@ -323,9 +326,9 @@ func isRetriableSyscallError(err error) bool {
return true
}
// redactURL strips query parameters from a URL for safe logging.
// This prevents accidental exposure of sensitive data that future callers
// might pass via query strings.
// redactURL strips query parameters and userinfo credentials from a URL for
// safe logging. This prevents accidental exposure of sensitive data (tokens in
// query strings, or user:pass in the authority) in log output.
func redactURL(rawURL string) string {
parsed, err := url.Parse(rawURL)
if err != nil {
@@ -333,6 +336,9 @@ func redactURL(rawURL string) string {
// potentially logging something sensitive.
return "[invalid URL]"
}
if parsed.User != nil {
parsed.User = url.User("REDACTED")
}
if parsed.RawQuery != "" {
parsed.RawQuery = "[redacted]"
}
@@ -353,10 +359,12 @@ func sanitizeErrorForLog(err error) string {
return err.Error()
}
// doGet performs an HTTP GET request with retry on 5xx errors and temporary
// network errors. Retries up to 3 times with exponential backoff (1s, 2s delays
// by default; configurable via Client.RetryBackoff for testing).
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
// doGetWithReader performs an HTTP GET request with retry on 5xx errors and
// temporary network errors. Retries up to 3 times with exponential backoff
// (1s, 2s delays by default; configurable via Client.RetryBackoff for testing).
// The readBody function is called with the response body on success (2xx) and
// is responsible for reading and closing it.
func (c *Client) doGetWithReader(ctx context.Context, reqURL string, readBody func(io.ReadCloser) ([]byte, error)) ([]byte, error) {
const maxAttempts = 3
// backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails).
// First attempt (i=0) has no delay; retries wait 1s then 2s by default.
@@ -421,12 +429,7 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
return nil, lastErr
}
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
return nil, err
}
return body, nil
return readBody(resp.Body)
}
// Error path: limit how much we read from potentially malicious server
5
@@ -444,90 +447,37 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
return nil, lastErr
}
// doGetLimited performs an HTTP GET request with retry (like doGet) but enforces
// a maximum response body size. Returns ErrDiffTooLarge if the response exceeds
// maxBytes. It reads maxBytes+1 (clamped to avoid overflow) to detect truncation
// without buffering the entire body.
// doGet performs an HTTP GET request with retry, reading the full response body.
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
return c.doGetWithReader(ctx, reqURL, func(body io.ReadCloser) ([]byte, error) {
defer body.Close()
return io.ReadAll(body)
})
}
// doGetLimited performs an HTTP GET request with retry but enforces a maximum
// response body size. Returns ErrDiffTooLarge if the response exceeds maxBytes.
// It reads maxBytes+1 (clamped to avoid overflow) to detect truncation without
// buffering the entire body.
func (c *Client) doGetLimited(ctx context.Context, reqURL string, maxBytes int64) ([]byte, error) {
const maxAttempts = 3
backoff := c.RetryBackoff
if backoff == nil {
backoff = []time.Duration{1 * time.Second, 2 * time.Second}
}
const maxErrorBodyBytes = 64 * 1024
var lastErr error
for attempt := 0; attempt < maxAttempts; attempt++ {
if attempt > 0 {
var delay time.Duration
if attempt-1 < len(backoff) {
delay = backoff[attempt-1]
}
if delay > 0 {
slog.Warn("retrying request after error",
"attempt", attempt+1,
"url", redactURL(reqURL),
"delay", delay.String(),
"lastError", sanitizeErrorForLog(lastErr))
timer := time.NewTimer(delay)
select {
case <-timer.C:
case <-ctx.Done():
timer.Stop()
return nil, ctx.Err()
}
}
return c.doGetWithReader(ctx, reqURL, func(body io.ReadCloser) ([]byte, error) {
defer body.Close()
// Read up to maxBytes+1 to detect overflow.
// Clamp to prevent integer overflow when maxBytes == math.MaxInt64.
limitBytes := maxBytes + 1
Review

[NIT] In doGetLimited, the clamp comment says 'Clamp to prevent integer overflow when maxBytes == math.MaxInt64' but the guard if maxSize < 0 || maxSize == math.MaxInt64 in GetPullRequestDiff already prevents doGetLimited from ever being called with math.MaxInt64. The overflow protection in doGetLimited is therefore defensive-only. This is fine, but worth noting as the comment is slightly misleading — it implies a reachable case that GetPullRequestDiff already prevents. Consider adding a note that the clamp is a belt-and-suspenders defense.

**[NIT]** In `doGetLimited`, the clamp comment says 'Clamp to prevent integer overflow when maxBytes == math.MaxInt64' but the guard `if maxSize < 0 || maxSize == math.MaxInt64` in `GetPullRequestDiff` already prevents `doGetLimited` from ever being called with `math.MaxInt64`. The overflow protection in `doGetLimited` is therefore defensive-only. This is fine, but worth noting as the comment is slightly misleading — it implies a reachable case that `GetPullRequestDiff` already prevents. Consider adding a note that the clamp is a belt-and-suspenders defense.
if limitBytes <= 0 {
limitBytes = math.MaxInt64
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil)
limited := io.LimitReader(body, limitBytes)
data, err := io.ReadAll(limited)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "token "+c.token)
resp, err := c.http.Do(req)
if err != nil {
lastErr = err
if attempt < maxAttempts-1 && isTemporaryNetError(err) {
slog.Warn("temporary network error, will retry",
"attempt", attempt+1,
"url", redactURL(reqURL),
"error", err)
continue
}
return nil, lastErr
if int64(len(data)) > maxBytes {
return nil, fmt.Errorf("%w: response exceeds %d bytes", ErrDiffTooLarge, maxBytes)
}
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
// Read up to maxBytes+1 to detect overflow.
// Clamp to prevent integer overflow when maxBytes == math.MaxInt64.
limitBytes := maxBytes + 1
if limitBytes <= 0 {
limitBytes = math.MaxInt64
}
limited := io.LimitReader(resp.Body, limitBytes)
body, err := io.ReadAll(limited)
resp.Body.Close()
if err != nil {
return nil, err
}
if int64(len(body)) > maxBytes {
return nil, fmt.Errorf("%w: response is larger than %d bytes", ErrDiffTooLarge, maxBytes)
}
return body, nil
}
errBody, _ := io.ReadAll(io.LimitReader(resp.Body, maxErrorBodyBytes))
resp.Body.Close()
lastErr = &APIError{StatusCode: resp.StatusCode, Body: string(errBody)}
if resp.StatusCode < 500 || resp.StatusCode >= 600 {
return nil, lastErr
}
}
return nil, lastErr
return data, nil
})
}
// escapePath escapes each segment of a relative file path for use in URLs.
+15
View File
@@ -1092,6 +1092,21 @@ func TestRedactURL(t *testing.T) {
input: "",
want: "",
},
{
name: "with userinfo - redacts credentials",
input: "https://admin:secret@gitea.example.com/api/v1/repos",
want: "https://REDACTED@gitea.example.com/api/v1/repos",
},
{
name: "with userinfo and query params",
input: "https://user:pass@example.com/path?token=abc",
want: "https://REDACTED@example.com/path?[redacted]",
},
{
name: "username only - no password",
input: "https://user@example.com/path",
want: "https://REDACTED@example.com/path",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
+7
View File
@@ -3,6 +3,7 @@ package gitea
import (
"context"
"errors"
"math"
"net/http"
"net/http/httptest"
"strings"
@@ -48,6 +49,12 @@ func TestGetPullRequestDiff_SizeLimits(t *testing.T) {
maxDiffSize: -1,
wantDiff: strings.Repeat("x", 10000),
},
{
name: "math.MaxInt64 treated as disabled",
diff: strings.Repeat("x", 10000),
maxDiffSize: math.MaxInt64,
wantDiff: strings.Repeat("x", 10000),
},
{
name: "default limit",
diff: "diff content",
1