feat(gitea): harden GetPullRequestDiff against unbounded diff size #109

Merged
aweiker merged 4 commits from review-bot-issue-92 into main 2026-05-13 14:01:22 +00:00
3 changed files with 66 additions and 94 deletions
Showing only changes of commit 49d6ca77a3 - Show all commits
+44 -94
View File
5
@@ -70,7 +70,8 @@ type Client struct {
RetryBackoff []time.Duration
security-review-bot marked this conversation as resolved Outdated
Outdated
Review

[NIT] Disabling the diff size limit (MaxDiffSize = -1) removes protection against large responses and could allow memory exhaustion if configured from untrusted input. Ensure this setting cannot be influenced by untrusted sources.

**[NIT]** Disabling the diff size limit (MaxDiffSize = -1) removes protection against large responses and could allow memory exhaustion if configured from untrusted input. Ensure this setting cannot be influenced by untrusted sources.
// MaxDiffSize is the maximum number of bytes allowed when fetching a PR diff.
Review

[NIT] The MaxDiffSize field comment suggests using math.MaxInt64 to disable the limit. This dual mechanism (negative and MaxInt64) may confuse users; consider documenting only the negative value as the disable switch.

**[NIT]** The MaxDiffSize field comment suggests using math.MaxInt64 to disable the limit. This dual mechanism (negative and MaxInt64) may confuse users; consider documenting only the negative value as the disable switch.
// If zero, defaults to DefaultMaxDiffSize (10 MB). Set to any negative value to disable the limit.
// If zero, defaults to DefaultMaxDiffSize (10 MB). Set to any negative value
// (or math.MaxInt64) to disable the limit.
//
// This field must be configured before the first request is made.
// Modifying it while requests are in flight is not safe.
4
@@ -149,8 +150,10 @@ func (c *Client) GetPullRequestDiff(ctx context.Context, owner, repo string, num
maxSize = DefaultMaxDiffSize
Review

[MINOR] Special-casing math.MaxInt64 to disable the size limit is unconventional and redundant given the clamp logic in doGetLimited; using any negative value to disable the limit is sufficient and simpler.

**[MINOR]** Special-casing math.MaxInt64 to disable the size limit is unconventional and redundant given the clamp logic in doGetLimited; using any negative value to disable the limit is sufficient and simpler.
}
// When the limit is disabled, use the standard doGet path.
if maxSize < 0 {
// When the limit is disabled (negative) or set to math.MaxInt64 (which
// would overflow the +1 detection and silently disable enforcement),
// use the standard unlimited doGet path.
if maxSize < 0 || maxSize == math.MaxInt64 {
Outdated
Review

[NIT] The disabled-limit branch (maxSize < 0) wraps the error with 'fetch diff: %w' but the limited branch also wraps with the same prefix inside the return. This is fine, but worth noting that the error string convention is consistent between the two paths.

**[NIT]** The disabled-limit branch (maxSize < 0) wraps the error with 'fetch diff: %w' but the limited branch also wraps with the same prefix inside the return. This is fine, but worth noting that the error string convention is consistent between the two paths.
body, err := c.doGet(ctx, reqURL)
if err != nil {
return "", fmt.Errorf("fetch diff: %w", err)
1
@@ -323,9 +326,9 @@ func isRetriableSyscallError(err error) bool {
return true
}
// redactURL strips query parameters from a URL for safe logging.
// This prevents accidental exposure of sensitive data that future callers
// might pass via query strings.
// redactURL strips query parameters and userinfo credentials from a URL for
// safe logging. This prevents accidental exposure of sensitive data (tokens in
// query strings, or user:pass in the authority) in log output.
func redactURL(rawURL string) string {
parsed, err := url.Parse(rawURL)
if err != nil {
@@ -333,6 +336,9 @@ func redactURL(rawURL string) string {
// potentially logging something sensitive.
return "[invalid URL]"
}
if parsed.User != nil {
parsed.User = url.User("REDACTED")
}
if parsed.RawQuery != "" {
parsed.RawQuery = "[redacted]"
}
@@ -353,10 +359,12 @@ func sanitizeErrorForLog(err error) string {
return err.Error()
}
// doGet performs an HTTP GET request with retry on 5xx errors and temporary
// network errors. Retries up to 3 times with exponential backoff (1s, 2s delays
// by default; configurable via Client.RetryBackoff for testing).
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
// doGetWithReader performs an HTTP GET request with retry on 5xx errors and
// temporary network errors. Retries up to 3 times with exponential backoff
// (1s, 2s delays by default; configurable via Client.RetryBackoff for testing).
// The readBody function is called with the response body on success (2xx) and
// is responsible for reading and closing it.
func (c *Client) doGetWithReader(ctx context.Context, reqURL string, readBody func(io.ReadCloser) ([]byte, error)) ([]byte, error) {
const maxAttempts = 3
// backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails).
// First attempt (i=0) has no delay; retries wait 1s then 2s by default.
@@ -421,12 +429,7 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
return nil, lastErr
}
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
return nil, err
}
return body, nil
return readBody(resp.Body)
}
// Error path: limit how much we read from potentially malicious server
5
@@ -444,90 +447,37 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
return nil, lastErr
}
// doGetLimited performs an HTTP GET request with retry (like doGet) but enforces
// a maximum response body size. Returns ErrDiffTooLarge if the response exceeds
// maxBytes. It reads maxBytes+1 (clamped to avoid overflow) to detect truncation
// without buffering the entire body.
// doGet performs an HTTP GET request with retry, reading the full response body.
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
return c.doGetWithReader(ctx, reqURL, func(body io.ReadCloser) ([]byte, error) {
defer body.Close()
return io.ReadAll(body)
})
}
// doGetLimited performs an HTTP GET request with retry but enforces a maximum
security-review-bot marked this conversation as resolved Outdated
Outdated
Review

[MINOR] Logging uses redactURL(reqURL) which only strips query parameters and may leak userinfo credentials if present in baseURL (e.g., https://user:pass@host). Consider redacting userinfo in redactURL to avoid potential secret exposure in logs.

**[MINOR]** Logging uses redactURL(reqURL) which only strips query parameters and may leak userinfo credentials if present in baseURL (e.g., https://user:pass@host). Consider redacting userinfo in redactURL to avoid potential secret exposure in logs.
// response body size. Returns ErrDiffTooLarge if the response exceeds maxBytes.
// It reads maxBytes+1 (clamped to avoid overflow) to detect truncation without
// buffering the entire body.
func (c *Client) doGetLimited(ctx context.Context, reqURL string, maxBytes int64) ([]byte, error) {
const maxAttempts = 3
backoff := c.RetryBackoff
if backoff == nil {
backoff = []time.Duration{1 * time.Second, 2 * time.Second}
}
const maxErrorBodyBytes = 64 * 1024
var lastErr error
for attempt := 0; attempt < maxAttempts; attempt++ {
if attempt > 0 {
var delay time.Duration
if attempt-1 < len(backoff) {
delay = backoff[attempt-1]
}
if delay > 0 {
slog.Warn("retrying request after error",
"attempt", attempt+1,
"url", redactURL(reqURL),
"delay", delay.String(),
"lastError", sanitizeErrorForLog(lastErr))
timer := time.NewTimer(delay)
select {
case <-timer.C:
case <-ctx.Done():
timer.Stop()
return nil, ctx.Err()
}
}
return c.doGetWithReader(ctx, reqURL, func(body io.ReadCloser) ([]byte, error) {
defer body.Close()
// Read up to maxBytes+1 to detect overflow.
// Clamp to prevent integer overflow when maxBytes == math.MaxInt64.
limitBytes := maxBytes + 1
Review

[NIT] In doGetLimited, the clamp comment says 'Clamp to prevent integer overflow when maxBytes == math.MaxInt64' but the guard if maxSize < 0 || maxSize == math.MaxInt64 in GetPullRequestDiff already prevents doGetLimited from ever being called with math.MaxInt64. The overflow protection in doGetLimited is therefore defensive-only. This is fine, but worth noting as the comment is slightly misleading — it implies a reachable case that GetPullRequestDiff already prevents. Consider adding a note that the clamp is a belt-and-suspenders defense.

**[NIT]** In `doGetLimited`, the clamp comment says 'Clamp to prevent integer overflow when maxBytes == math.MaxInt64' but the guard `if maxSize < 0 || maxSize == math.MaxInt64` in `GetPullRequestDiff` already prevents `doGetLimited` from ever being called with `math.MaxInt64`. The overflow protection in `doGetLimited` is therefore defensive-only. This is fine, but worth noting as the comment is slightly misleading — it implies a reachable case that `GetPullRequestDiff` already prevents. Consider adding a note that the clamp is a belt-and-suspenders defense.
if limitBytes <= 0 {
limitBytes = math.MaxInt64
}
Outdated
Review

[MINOR] In doGetLimited, the overflow guard if limitBytes <= 0 { limitBytes = math.MaxInt64 } is dead code given the caller's pre-check (maxSize == math.MaxInt64 routes to the unlimited path before this function is ever called). The comment on the clamp says it handles maxBytes == math.MaxInt64, but that case never reaches doGetLimited. The guard is harmless and provides defense-in-depth, but the comment is misleading since it describes a case that is already excluded by the caller.

**[MINOR]** In `doGetLimited`, the overflow guard `if limitBytes <= 0 { limitBytes = math.MaxInt64 }` is dead code given the caller's pre-check (`maxSize == math.MaxInt64` routes to the unlimited path before this function is ever called). The comment on the clamp says it handles `maxBytes == math.MaxInt64`, but that case never reaches `doGetLimited`. The guard is harmless and provides defense-in-depth, but the comment is misleading since it describes a case that is already excluded by the caller.
req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil)
limited := io.LimitReader(body, limitBytes)
data, err := io.ReadAll(limited)
security-review-bot marked this conversation as resolved Outdated
Outdated
Review

[MINOR] In doGetLimited, when maxBytes equals math.MaxInt64, limitBytes overflows and is clamped to math.MaxInt64, causing the reader to effectively allow an unbounded read (up to MaxInt64). While this only happens with an extreme configuration, it could permit large memory usage if someone sets MaxDiffSize to MaxInt64 instead of using -1 to disable the limit.

**[MINOR]** In doGetLimited, when maxBytes equals math.MaxInt64, limitBytes overflows and is clamped to math.MaxInt64, causing the reader to effectively allow an unbounded read (up to MaxInt64). While this only happens with an extreme configuration, it could permit large memory usage if someone sets MaxDiffSize to MaxInt64 instead of using -1 to disable the limit.
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "token "+c.token)
resp, err := c.http.Do(req)
if err != nil {
lastErr = err
if attempt < maxAttempts-1 && isTemporaryNetError(err) {
slog.Warn("temporary network error, will retry",
"attempt", attempt+1,
"url", redactURL(reqURL),
"error", err)
continue
}
return nil, lastErr
if int64(len(data)) > maxBytes {
return nil, fmt.Errorf("%w: response exceeds %d bytes", ErrDiffTooLarge, maxBytes)
}
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
// Read up to maxBytes+1 to detect overflow.
// Clamp to prevent integer overflow when maxBytes == math.MaxInt64.
limitBytes := maxBytes + 1
if limitBytes <= 0 {
limitBytes = math.MaxInt64
}
limited := io.LimitReader(resp.Body, limitBytes)
body, err := io.ReadAll(limited)
resp.Body.Close()
if err != nil {
return nil, err
}
if int64(len(body)) > maxBytes {
return nil, fmt.Errorf("%w: response is larger than %d bytes", ErrDiffTooLarge, maxBytes)
}
return body, nil
}
errBody, _ := io.ReadAll(io.LimitReader(resp.Body, maxErrorBodyBytes))
resp.Body.Close()
lastErr = &APIError{StatusCode: resp.StatusCode, Body: string(errBody)}
if resp.StatusCode < 500 || resp.StatusCode >= 600 {
return nil, lastErr
}
}
return nil, lastErr
return data, nil
})
}
// escapePath escapes each segment of a relative file path for use in URLs.
2
+15
View File
@@ -1092,6 +1092,21 @@ func TestRedactURL(t *testing.T) {
input: "",
want: "",
},
{
name: "with userinfo - redacts credentials",
input: "https://admin:secret@gitea.example.com/api/v1/repos",
want: "https://REDACTED@gitea.example.com/api/v1/repos",
},
{
name: "with userinfo and query params",
input: "https://user:pass@example.com/path?token=abc",
want: "https://REDACTED@example.com/path?[redacted]",
},
{
name: "username only - no password",
input: "https://user@example.com/path",
want: "https://REDACTED@example.com/path",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
+7
View File
1
@@ -3,6 +3,7 @@ package gitea
import (
"context"
"errors"
"math"
"net/http"
"net/http/httptest"
"strings"
2
@@ -48,6 +49,12 @@ func TestGetPullRequestDiff_SizeLimits(t *testing.T) {
maxDiffSize: -1,
wantDiff: strings.Repeat("x", 10000),
},
{
name: "math.MaxInt64 treated as disabled",
diff: strings.Repeat("x", 10000),
maxDiffSize: math.MaxInt64,
wantDiff: strings.Repeat("x", 10000),
},
{
name: "default limit",
diff: "diff content",
2