feat(gitea): harden GetPullRequestDiff against unbounded diff size #109
@@ -11,6 +11,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"math"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
@@ -47,6 +48,12 @@ func IsServerError(err error) bool {
|
|||||||
return errors.As(err, &apiErr) && apiErr.StatusCode >= 500 && apiErr.StatusCode < 600
|
return errors.As(err, &apiErr) && apiErr.StatusCode >= 500 && apiErr.StatusCode < 600
|
||||||
|
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
// DefaultMaxDiffSize is the default maximum diff size in bytes (10 MB).
|
||||||
|
const DefaultMaxDiffSize = 10 * 1024 * 1024
|
||||||
|
|
||||||
|
// ErrDiffTooLarge is returned when a PR diff exceeds the configured MaxDiffSize.
|
||||||
|
var ErrDiffTooLarge = errors.New("diff size exceeds maximum allowed size")
|
||||||
|
|
||||||
// Client interacts with the Gitea API.
|
// Client interacts with the Gitea API.
|
||||||
// A Client is safe for concurrent use by multiple goroutines.
|
// A Client is safe for concurrent use by multiple goroutines.
|
||||||
type Client struct {
|
type Client struct {
|
||||||
@@ -61,6 +68,14 @@ type Client struct {
|
|||||||
// This field must be configured before the first request is made.
|
// This field must be configured before the first request is made.
|
||||||
|
gpt-review-bot
commented
[MINOR] Adding an exported field (MaxDiffSize) to the Client struct may break external code that constructs Client using unkeyed composite literals. Consider calling this out in release notes or ensuring consumers use NewClient. **[MINOR]** Adding an exported field (MaxDiffSize) to the Client struct may break external code that constructs Client using unkeyed composite literals. Consider calling this out in release notes or ensuring consumers use NewClient.
gpt-review-bot
commented
[NIT] MaxDiffSize lacks an explicit concurrency note like RetryBackoff. Since Client is used concurrently, documenting that this field should be set before use (and not mutated concurrently) would clarify safe usage. **[NIT]** MaxDiffSize lacks an explicit concurrency note like RetryBackoff. Since Client is used concurrently, documenting that this field should be set before use (and not mutated concurrently) would clarify safe usage.
gpt-review-bot
commented
[MINOR] API design nit: using math.MaxInt64 as a special value to disable the diff size limit may be surprising. Since negative values already disable the limit and doGetLimited clamps overflow, consider simplifying the API by documenting only negative values as the disable mechanism and treating very large positive values (including MaxInt64) as enforced limits. **[MINOR]** API design nit: using math.MaxInt64 as a special value to disable the diff size limit may be surprising. Since negative values already disable the limit and doGetLimited clamps overflow, consider simplifying the API by documenting only negative values as the disable mechanism and treating very large positive values (including MaxInt64) as enforced limits.
|
|||||||
// Modifying it while requests are in flight is not safe.
|
// Modifying it while requests are in flight is not safe.
|
||||||
RetryBackoff []time.Duration
|
RetryBackoff []time.Duration
|
||||||
|
|
||||||
|
// MaxDiffSize is the maximum number of bytes allowed when fetching a PR diff.
|
||||||
|
gpt-review-bot
commented
[NIT] The MaxDiffSize field comment suggests using math.MaxInt64 to disable the limit. This dual mechanism (negative and MaxInt64) may confuse users; consider documenting only the negative value as the disable switch. **[NIT]** The MaxDiffSize field comment suggests using math.MaxInt64 to disable the limit. This dual mechanism (negative and MaxInt64) may confuse users; consider documenting only the negative value as the disable switch.
|
|||||||
|
// If zero, defaults to DefaultMaxDiffSize (10 MB). Set to any negative value
|
||||||
|
// (or math.MaxInt64) to disable the limit.
|
||||||
|
//
|
||||||
|
// This field must be configured before the first request is made.
|
||||||
|
// Modifying it while requests are in flight is not safe.
|
||||||
|
MaxDiffSize int64
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewClient creates a new Gitea API client.
|
// NewClient creates a new Gitea API client.
|
||||||
|
sonnet-review-bot
commented
[NIT] The doc comment for **[NIT]** The doc comment for `MaxDiffSize` says 'Set to any negative value (or math.MaxInt64) to disable the limit.' This is slightly awkward API design — `math.MaxInt64` is a magic value that happens to bypass the overflow protection, not an intentional disable signal. The docstring documents it correctly but it would be cleaner if the disable path were only a negative value. However, since the math.MaxInt64 bypass exists to defend against a subtle overflow in doGetLimited (where maxBytes+1 would wrap), documenting both is the right call for transparency.
|
|||||||
@@ -125,9 +140,28 @@ func (c *Client) GetPullRequest(ctx context.Context, owner, repo string, number
|
|||||||
}
|
}
|
||||||
|
gpt-review-bot
commented
[NIT] The MaxDiffSize branching logic and limit calculation are repeated across GetPullRequestDiff and doGetLimited. Consider factoring an internal helper (e.g., effectiveMaxDiffSize() and a boolean for disabled) to centralize the policy and reduce repetition. **[NIT]** The MaxDiffSize branching logic and limit calculation are repeated across GetPullRequestDiff and doGetLimited. Consider factoring an internal helper (e.g., effectiveMaxDiffSize() and a boolean for disabled) to centralize the policy and reduce repetition.
|
|||||||
|
|
||||||
// GetPullRequestDiff fetches the unified diff for a PR.
|
// GetPullRequestDiff fetches the unified diff for a PR.
|
||||||
|
// It enforces MaxDiffSize to prevent unbounded memory allocation.
|
||||||
|
// Returns ErrDiffTooLarge if the diff exceeds the configured limit.
|
||||||
func (c *Client) GetPullRequestDiff(ctx context.Context, owner, repo string, number int) (string, error) {
|
func (c *Client) GetPullRequestDiff(ctx context.Context, owner, repo string, number int) (string, error) {
|
||||||
reqURL := fmt.Sprintf("%s/api/v1/repos/%s/%s/pulls/%d.diff", c.baseURL, url.PathEscape(owner), url.PathEscape(repo), number)
|
reqURL := fmt.Sprintf("%s/api/v1/repos/%s/%s/pulls/%d.diff", c.baseURL, url.PathEscape(owner), url.PathEscape(repo), number)
|
||||||
body, err := c.doGet(ctx, reqURL)
|
|
||||||
|
maxSize := c.MaxDiffSize
|
||||||
|
if maxSize == 0 {
|
||||||
|
maxSize = DefaultMaxDiffSize
|
||||||
|
gpt-review-bot
commented
[MINOR] Special-casing math.MaxInt64 to disable the size limit is unconventional and redundant given the clamp logic in doGetLimited; using any negative value to disable the limit is sufficient and simpler. **[MINOR]** Special-casing math.MaxInt64 to disable the size limit is unconventional and redundant given the clamp logic in doGetLimited; using any negative value to disable the limit is sufficient and simpler.
|
|||||||
|
}
|
||||||
|
|
||||||
|
// When the limit is disabled (negative) or set to math.MaxInt64 (which
|
||||||
|
// would overflow the +1 detection and silently disable enforcement),
|
||||||
|
// use the standard unlimited doGet path.
|
||||||
|
if maxSize < 0 || maxSize == math.MaxInt64 {
|
||||||
|
body, err := c.doGet(ctx, reqURL)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("fetch diff: %w", err)
|
||||||
|
}
|
||||||
|
return string(body), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := c.doGetLimited(ctx, reqURL, maxSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("fetch diff: %w", err)
|
return "", fmt.Errorf("fetch diff: %w", err)
|
||||||
}
|
}
|
||||||
@@ -292,9 +326,9 @@ func isRetriableSyscallError(err error) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// redactURL strips query parameters from a URL for safe logging.
|
// redactURL strips query parameters and userinfo credentials from a URL for
|
||||||
// This prevents accidental exposure of sensitive data that future callers
|
// safe logging. This prevents accidental exposure of sensitive data (tokens in
|
||||||
// might pass via query strings.
|
// query strings, or user:pass in the authority) in log output.
|
||||||
func redactURL(rawURL string) string {
|
func redactURL(rawURL string) string {
|
||||||
parsed, err := url.Parse(rawURL)
|
parsed, err := url.Parse(rawURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -302,6 +336,9 @@ func redactURL(rawURL string) string {
|
|||||||
// potentially logging something sensitive.
|
// potentially logging something sensitive.
|
||||||
return "[invalid URL]"
|
return "[invalid URL]"
|
||||||
}
|
}
|
||||||
|
if parsed.User != nil {
|
||||||
|
parsed.User = url.User("REDACTED")
|
||||||
|
}
|
||||||
if parsed.RawQuery != "" {
|
if parsed.RawQuery != "" {
|
||||||
parsed.RawQuery = "[redacted]"
|
parsed.RawQuery = "[redacted]"
|
||||||
}
|
}
|
||||||
@@ -322,10 +359,12 @@ func sanitizeErrorForLog(err error) string {
|
|||||||
return err.Error()
|
return err.Error()
|
||||||
}
|
}
|
||||||
|
|
||||||
// doGet performs an HTTP GET request with retry on 5xx errors and temporary
|
// doGetWithReader performs an HTTP GET request with retry on 5xx errors and
|
||||||
// network errors. Retries up to 3 times with exponential backoff (1s, 2s delays
|
// temporary network errors. Retries up to 3 times with exponential backoff
|
||||||
// by default; configurable via Client.RetryBackoff for testing).
|
// (1s, 2s delays by default; configurable via Client.RetryBackoff for testing).
|
||||||
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
// The readBody function is called with the response body on success (2xx) and
|
||||||
|
// is responsible for reading and closing it.
|
||||||
|
func (c *Client) doGetWithReader(ctx context.Context, reqURL string, readBody func(io.ReadCloser) ([]byte, error)) ([]byte, error) {
|
||||||
const maxAttempts = 3
|
const maxAttempts = 3
|
||||||
// backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails).
|
// backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails).
|
||||||
// First attempt (i=0) has no delay; retries wait 1s then 2s by default.
|
// First attempt (i=0) has no delay; retries wait 1s then 2s by default.
|
||||||
@@ -390,12 +429,7 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
|||||||
return nil, lastErr
|
return nil, lastErr
|
||||||
}
|
}
|
||||||
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
||||||
body, err := io.ReadAll(resp.Body)
|
return readBody(resp.Body)
|
||||||
resp.Body.Close()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return body, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Error path: limit how much we read from potentially malicious server
|
// Error path: limit how much we read from potentially malicious server
|
||||||
@@ -413,6 +447,39 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
|||||||
return nil, lastErr
|
return nil, lastErr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// doGet performs an HTTP GET request with retry, reading the full response body.
|
||||||
|
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
||||||
|
return c.doGetWithReader(ctx, reqURL, func(body io.ReadCloser) ([]byte, error) {
|
||||||
|
defer body.Close()
|
||||||
|
return io.ReadAll(body)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// doGetLimited performs an HTTP GET request with retry but enforces a maximum
|
||||||
|
// response body size. Returns ErrDiffTooLarge if the response exceeds maxBytes.
|
||||||
|
// It reads maxBytes+1 (clamped to avoid overflow) to detect truncation without
|
||||||
|
// buffering the entire body.
|
||||||
|
func (c *Client) doGetLimited(ctx context.Context, reqURL string, maxBytes int64) ([]byte, error) {
|
||||||
|
return c.doGetWithReader(ctx, reqURL, func(body io.ReadCloser) ([]byte, error) {
|
||||||
|
defer body.Close()
|
||||||
|
// Read up to maxBytes+1 to detect overflow.
|
||||||
|
// Clamp to prevent integer overflow when maxBytes == math.MaxInt64.
|
||||||
|
limitBytes := maxBytes + 1
|
||||||
|
sonnet-review-bot
commented
[NIT] In **[NIT]** In `doGetLimited`, the clamp comment says 'Clamp to prevent integer overflow when maxBytes == math.MaxInt64' but the guard `if maxSize < 0 || maxSize == math.MaxInt64` in `GetPullRequestDiff` already prevents `doGetLimited` from ever being called with `math.MaxInt64`. The overflow protection in `doGetLimited` is therefore defensive-only. This is fine, but worth noting as the comment is slightly misleading — it implies a reachable case that `GetPullRequestDiff` already prevents. Consider adding a note that the clamp is a belt-and-suspenders defense.
|
|||||||
|
if limitBytes <= 0 {
|
||||||
|
limitBytes = math.MaxInt64
|
||||||
|
}
|
||||||
|
limited := io.LimitReader(body, limitBytes)
|
||||||
|
data, err := io.ReadAll(limited)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if int64(len(data)) > maxBytes {
|
||||||
|
return nil, fmt.Errorf("%w: response exceeds %d bytes", ErrDiffTooLarge, maxBytes)
|
||||||
|
}
|
||||||
|
return data, nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// escapePath escapes each segment of a relative file path for use in URLs.
|
// escapePath escapes each segment of a relative file path for use in URLs.
|
||||||
// Slashes are preserved as path separators; other special characters are escaped.
|
// Slashes are preserved as path separators; other special characters are escaped.
|
||||||
// Input should be a relative path (no leading slash). Already-encoded segments
|
// Input should be a relative path (no leading slash). Already-encoded segments
|
||||||
|
|||||||
@@ -1092,6 +1092,21 @@ func TestRedactURL(t *testing.T) {
|
|||||||
input: "",
|
input: "",
|
||||||
want: "",
|
want: "",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "with userinfo - redacts credentials",
|
||||||
|
input: "https://admin:secret@gitea.example.com/api/v1/repos",
|
||||||
|
want: "https://REDACTED@gitea.example.com/api/v1/repos",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "with userinfo and query params",
|
||||||
|
input: "https://user:pass@example.com/path?token=abc",
|
||||||
|
want: "https://REDACTED@example.com/path?[redacted]",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "username only - no password",
|
||||||
|
input: "https://user@example.com/path",
|
||||||
|
want: "https://REDACTED@example.com/path",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
|||||||
@@ -0,0 +1,97 @@
|
|||||||
|
package gitea
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"math"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetPullRequestDiff_SizeLimits(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
diff string
|
||||||
|
maxDiffSize int64
|
||||||
|
wantErr error
|
||||||
|
wantDiff string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "exceeds max size",
|
||||||
|
diff: strings.Repeat("+ added line\n", 1000), // ~13 KB
|
||||||
|
maxDiffSize: 100,
|
||||||
|
wantErr: ErrDiffTooLarge,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "within max size",
|
||||||
|
diff: "diff --git a/f.go b/f.go\n--- a/f.go\n+++ b/f.go\n@@ -1 +1 @@\n-old\n+new\n",
|
||||||
|
maxDiffSize: 1024,
|
||||||
|
wantDiff: "diff --git a/f.go b/f.go\n--- a/f.go\n+++ b/f.go\n@@ -1 +1 @@\n-old\n+new\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "exactly at limit",
|
||||||
|
diff: strings.Repeat("x", 50),
|
||||||
|
maxDiffSize: 50,
|
||||||
|
wantDiff: strings.Repeat("x", 50),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "one byte over limit",
|
||||||
|
diff: strings.Repeat("x", 51),
|
||||||
|
maxDiffSize: 50,
|
||||||
|
wantErr: ErrDiffTooLarge,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "disabled limit",
|
||||||
|
diff: strings.Repeat("x", 10000),
|
||||||
|
maxDiffSize: -1,
|
||||||
|
wantDiff: strings.Repeat("x", 10000),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "math.MaxInt64 treated as disabled",
|
||||||
|
diff: strings.Repeat("x", 10000),
|
||||||
|
maxDiffSize: math.MaxInt64,
|
||||||
|
wantDiff: strings.Repeat("x", 10000),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "default limit",
|
||||||
|
diff: "diff content",
|
||||||
|
maxDiffSize: 0, // zero means use DefaultMaxDiffSize
|
||||||
|
wantDiff: "diff content",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Write([]byte(tt.diff)) //nolint:errcheck // test handler
|
||||||
|
}))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
client := NewClient(server.URL, "test-token")
|
||||||
|
client.MaxDiffSize = tt.maxDiffSize
|
||||||
|
client.RetryBackoff = []time.Duration{}
|
||||||
|
sonnet-review-bot
commented
[NIT] The test case 'math.MaxInt64 treated as disabled' verifies the bypass works, but the test name 'treated as disabled' implies intended behavior rather than an edge-case workaround. A name like 'math.MaxInt64 bypasses limit via overflow protection' would be more precise. Minor naming issue only. **[NIT]** The test case 'math.MaxInt64 treated as disabled' verifies the bypass works, but the test name 'treated as disabled' implies intended behavior rather than an edge-case workaround. A name like 'math.MaxInt64 bypasses limit via overflow protection' would be more precise. Minor naming issue only.
|
|||||||
|
|
||||||
|
got, err := client.GetPullRequestDiff(context.Background(), "owner", "repo", 1)
|
||||||
|
|
||||||
|
if tt.wantErr != nil {
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error, got nil")
|
||||||
|
}
|
||||||
|
if !errors.Is(err, tt.wantErr) {
|
||||||
|
t.Errorf("expected %v, got: %v", tt.wantErr, err)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if got != tt.wantDiff {
|
||||||
|
t.Errorf("diff mismatch: got length %d, want length %d", len(got), len(tt.wantDiff))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
[NIT] ErrDiffTooLarge follows standard practices, but consider prefixing the error message with the package context (e.g., "gitea: diff too large") per error string conventions to improve disambiguation across packages.