feat(gitea): harden GetPullRequestDiff against unbounded diff size #109
@@ -70,7 +70,8 @@ type Client struct {
|
||||
RetryBackoff []time.Duration
|
||||
|
||||
|
security-review-bot marked this conversation as resolved
Outdated
|
||||
// MaxDiffSize is the maximum number of bytes allowed when fetching a PR diff.
|
||||
|
gpt-review-bot
commented
[NIT] The MaxDiffSize field comment suggests using math.MaxInt64 to disable the limit. This dual mechanism (negative and MaxInt64) may confuse users; consider documenting only the negative value as the disable switch. **[NIT]** The MaxDiffSize field comment suggests using math.MaxInt64 to disable the limit. This dual mechanism (negative and MaxInt64) may confuse users; consider documenting only the negative value as the disable switch.
|
||||
// If zero, defaults to DefaultMaxDiffSize (10 MB). Set to any negative value to disable the limit.
|
||||
// If zero, defaults to DefaultMaxDiffSize (10 MB). Set to any negative value
|
||||
// (or math.MaxInt64) to disable the limit.
|
||||
//
|
||||
// This field must be configured before the first request is made.
|
||||
// Modifying it while requests are in flight is not safe.
|
||||
@@ -149,8 +150,10 @@ func (c *Client) GetPullRequestDiff(ctx context.Context, owner, repo string, num
|
||||
maxSize = DefaultMaxDiffSize
|
||||
|
gpt-review-bot
commented
[MINOR] Special-casing math.MaxInt64 to disable the size limit is unconventional and redundant given the clamp logic in doGetLimited; using any negative value to disable the limit is sufficient and simpler. **[MINOR]** Special-casing math.MaxInt64 to disable the size limit is unconventional and redundant given the clamp logic in doGetLimited; using any negative value to disable the limit is sufficient and simpler.
|
||||
}
|
||||
|
||||
// When the limit is disabled, use the standard doGet path.
|
||||
if maxSize < 0 {
|
||||
// When the limit is disabled (negative) or set to math.MaxInt64 (which
|
||||
// would overflow the +1 detection and silently disable enforcement),
|
||||
// use the standard unlimited doGet path.
|
||||
if maxSize < 0 || maxSize == math.MaxInt64 {
|
||||
|
sonnet-review-bot
commented
[NIT] The disabled-limit branch (maxSize < 0) wraps the error with 'fetch diff: %w' but the limited branch also wraps with the same prefix inside the return. This is fine, but worth noting that the error string convention is consistent between the two paths. **[NIT]** The disabled-limit branch (maxSize < 0) wraps the error with 'fetch diff: %w' but the limited branch also wraps with the same prefix inside the return. This is fine, but worth noting that the error string convention is consistent between the two paths.
|
||||
body, err := c.doGet(ctx, reqURL)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("fetch diff: %w", err)
|
||||
@@ -323,9 +326,9 @@ func isRetriableSyscallError(err error) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// redactURL strips query parameters from a URL for safe logging.
|
||||
// This prevents accidental exposure of sensitive data that future callers
|
||||
// might pass via query strings.
|
||||
// redactURL strips query parameters and userinfo credentials from a URL for
|
||||
// safe logging. This prevents accidental exposure of sensitive data (tokens in
|
||||
// query strings, or user:pass in the authority) in log output.
|
||||
func redactURL(rawURL string) string {
|
||||
parsed, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
@@ -333,6 +336,9 @@ func redactURL(rawURL string) string {
|
||||
// potentially logging something sensitive.
|
||||
return "[invalid URL]"
|
||||
}
|
||||
if parsed.User != nil {
|
||||
parsed.User = url.User("REDACTED")
|
||||
}
|
||||
if parsed.RawQuery != "" {
|
||||
parsed.RawQuery = "[redacted]"
|
||||
}
|
||||
@@ -353,10 +359,12 @@ func sanitizeErrorForLog(err error) string {
|
||||
return err.Error()
|
||||
}
|
||||
|
||||
// doGet performs an HTTP GET request with retry on 5xx errors and temporary
|
||||
// network errors. Retries up to 3 times with exponential backoff (1s, 2s delays
|
||||
// by default; configurable via Client.RetryBackoff for testing).
|
||||
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
||||
// doGetWithReader performs an HTTP GET request with retry on 5xx errors and
|
||||
// temporary network errors. Retries up to 3 times with exponential backoff
|
||||
// (1s, 2s delays by default; configurable via Client.RetryBackoff for testing).
|
||||
// The readBody function is called with the response body on success (2xx) and
|
||||
// is responsible for reading and closing it.
|
||||
func (c *Client) doGetWithReader(ctx context.Context, reqURL string, readBody func(io.ReadCloser) ([]byte, error)) ([]byte, error) {
|
||||
const maxAttempts = 3
|
||||
// backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails).
|
||||
// First attempt (i=0) has no delay; retries wait 1s then 2s by default.
|
||||
@@ -421,12 +429,7 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
||||
return nil, lastErr
|
||||
}
|
||||
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return body, nil
|
||||
return readBody(resp.Body)
|
||||
}
|
||||
|
||||
// Error path: limit how much we read from potentially malicious server
|
||||
@@ -444,90 +447,37 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
// doGetLimited performs an HTTP GET request with retry (like doGet) but enforces
|
||||
// a maximum response body size. Returns ErrDiffTooLarge if the response exceeds
|
||||
// maxBytes. It reads maxBytes+1 (clamped to avoid overflow) to detect truncation
|
||||
// without buffering the entire body.
|
||||
// doGet performs an HTTP GET request with retry, reading the full response body.
|
||||
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
||||
return c.doGetWithReader(ctx, reqURL, func(body io.ReadCloser) ([]byte, error) {
|
||||
defer body.Close()
|
||||
return io.ReadAll(body)
|
||||
})
|
||||
}
|
||||
|
||||
// doGetLimited performs an HTTP GET request with retry but enforces a maximum
|
||||
|
security-review-bot marked this conversation as resolved
Outdated
[MINOR] Logging uses redactURL(reqURL) which only strips query parameters and may leak userinfo credentials if present in baseURL (e.g., https://user:pass@host). Consider redacting userinfo in redactURL to avoid potential secret exposure in logs. **[MINOR]** Logging uses redactURL(reqURL) which only strips query parameters and may leak userinfo credentials if present in baseURL (e.g., https://user:pass@host). Consider redacting userinfo in redactURL to avoid potential secret exposure in logs.
|
||||
// response body size. Returns ErrDiffTooLarge if the response exceeds maxBytes.
|
||||
// It reads maxBytes+1 (clamped to avoid overflow) to detect truncation without
|
||||
// buffering the entire body.
|
||||
func (c *Client) doGetLimited(ctx context.Context, reqURL string, maxBytes int64) ([]byte, error) {
|
||||
const maxAttempts = 3
|
||||
backoff := c.RetryBackoff
|
||||
if backoff == nil {
|
||||
backoff = []time.Duration{1 * time.Second, 2 * time.Second}
|
||||
}
|
||||
const maxErrorBodyBytes = 64 * 1024
|
||||
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < maxAttempts; attempt++ {
|
||||
if attempt > 0 {
|
||||
var delay time.Duration
|
||||
if attempt-1 < len(backoff) {
|
||||
delay = backoff[attempt-1]
|
||||
}
|
||||
if delay > 0 {
|
||||
slog.Warn("retrying request after error",
|
||||
"attempt", attempt+1,
|
||||
"url", redactURL(reqURL),
|
||||
"delay", delay.String(),
|
||||
"lastError", sanitizeErrorForLog(lastErr))
|
||||
|
||||
timer := time.NewTimer(delay)
|
||||
select {
|
||||
case <-timer.C:
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
}
|
||||
return c.doGetWithReader(ctx, reqURL, func(body io.ReadCloser) ([]byte, error) {
|
||||
defer body.Close()
|
||||
// Read up to maxBytes+1 to detect overflow.
|
||||
// Clamp to prevent integer overflow when maxBytes == math.MaxInt64.
|
||||
limitBytes := maxBytes + 1
|
||||
|
sonnet-review-bot
commented
[NIT] In **[NIT]** In `doGetLimited`, the clamp comment says 'Clamp to prevent integer overflow when maxBytes == math.MaxInt64' but the guard `if maxSize < 0 || maxSize == math.MaxInt64` in `GetPullRequestDiff` already prevents `doGetLimited` from ever being called with `math.MaxInt64`. The overflow protection in `doGetLimited` is therefore defensive-only. This is fine, but worth noting as the comment is slightly misleading — it implies a reachable case that `GetPullRequestDiff` already prevents. Consider adding a note that the clamp is a belt-and-suspenders defense.
|
||||
if limitBytes <= 0 {
|
||||
limitBytes = math.MaxInt64
|
||||
}
|
||||
|
sonnet-review-bot
commented
[MINOR] In **[MINOR]** In `doGetLimited`, the overflow guard `if limitBytes <= 0 { limitBytes = math.MaxInt64 }` is dead code given the caller's pre-check (`maxSize == math.MaxInt64` routes to the unlimited path before this function is ever called). The comment on the clamp says it handles `maxBytes == math.MaxInt64`, but that case never reaches `doGetLimited`. The guard is harmless and provides defense-in-depth, but the comment is misleading since it describes a case that is already excluded by the caller.
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil)
|
||||
limited := io.LimitReader(body, limitBytes)
|
||||
data, err := io.ReadAll(limited)
|
||||
|
security-review-bot marked this conversation as resolved
Outdated
[MINOR] In doGetLimited, when maxBytes equals math.MaxInt64, limitBytes overflows and is clamped to math.MaxInt64, causing the reader to effectively allow an unbounded read (up to MaxInt64). While this only happens with an extreme configuration, it could permit large memory usage if someone sets MaxDiffSize to MaxInt64 instead of using -1 to disable the limit. **[MINOR]** In doGetLimited, when maxBytes equals math.MaxInt64, limitBytes overflows and is clamped to math.MaxInt64, causing the reader to effectively allow an unbounded read (up to MaxInt64). While this only happens with an extreme configuration, it could permit large memory usage if someone sets MaxDiffSize to MaxInt64 instead of using -1 to disable the limit.
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Authorization", "token "+c.token)
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
if attempt < maxAttempts-1 && isTemporaryNetError(err) {
|
||||
slog.Warn("temporary network error, will retry",
|
||||
"attempt", attempt+1,
|
||||
"url", redactURL(reqURL),
|
||||
"error", err)
|
||||
continue
|
||||
}
|
||||
return nil, lastErr
|
||||
if int64(len(data)) > maxBytes {
|
||||
return nil, fmt.Errorf("%w: response exceeds %d bytes", ErrDiffTooLarge, maxBytes)
|
||||
}
|
||||
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
||||
// Read up to maxBytes+1 to detect overflow.
|
||||
// Clamp to prevent integer overflow when maxBytes == math.MaxInt64.
|
||||
limitBytes := maxBytes + 1
|
||||
if limitBytes <= 0 {
|
||||
limitBytes = math.MaxInt64
|
||||
}
|
||||
limited := io.LimitReader(resp.Body, limitBytes)
|
||||
body, err := io.ReadAll(limited)
|
||||
resp.Body.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if int64(len(body)) > maxBytes {
|
||||
return nil, fmt.Errorf("%w: response is larger than %d bytes", ErrDiffTooLarge, maxBytes)
|
||||
}
|
||||
return body, nil
|
||||
}
|
||||
|
||||
errBody, _ := io.ReadAll(io.LimitReader(resp.Body, maxErrorBodyBytes))
|
||||
resp.Body.Close()
|
||||
|
||||
lastErr = &APIError{StatusCode: resp.StatusCode, Body: string(errBody)}
|
||||
|
||||
if resp.StatusCode < 500 || resp.StatusCode >= 600 {
|
||||
return nil, lastErr
|
||||
}
|
||||
}
|
||||
|
||||
return nil, lastErr
|
||||
return data, nil
|
||||
})
|
||||
}
|
||||
|
||||
// escapePath escapes each segment of a relative file path for use in URLs.
|
||||
|
||||
@@ -1092,6 +1092,21 @@ func TestRedactURL(t *testing.T) {
|
||||
input: "",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
name: "with userinfo - redacts credentials",
|
||||
input: "https://admin:secret@gitea.example.com/api/v1/repos",
|
||||
want: "https://REDACTED@gitea.example.com/api/v1/repos",
|
||||
},
|
||||
{
|
||||
name: "with userinfo and query params",
|
||||
input: "https://user:pass@example.com/path?token=abc",
|
||||
want: "https://REDACTED@example.com/path?[redacted]",
|
||||
},
|
||||
{
|
||||
name: "username only - no password",
|
||||
input: "https://user@example.com/path",
|
||||
want: "https://REDACTED@example.com/path",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
|
||||
@@ -3,6 +3,7 @@ package gitea
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"math"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
@@ -48,6 +49,12 @@ func TestGetPullRequestDiff_SizeLimits(t *testing.T) {
|
||||
maxDiffSize: -1,
|
||||
wantDiff: strings.Repeat("x", 10000),
|
||||
},
|
||||
{
|
||||
name: "math.MaxInt64 treated as disabled",
|
||||
diff: strings.Repeat("x", 10000),
|
||||
maxDiffSize: math.MaxInt64,
|
||||
wantDiff: strings.Repeat("x", 10000),
|
||||
},
|
||||
{
|
||||
name: "default limit",
|
||||
diff: "diff content",
|
||||
|
||||
[NIT] Disabling the diff size limit (MaxDiffSize = -1) removes protection against large responses and could allow memory exhaustion if configured from untrusted input. Ensure this setting cannot be influenced by untrusted sources.