From 92b84976cfd5eca8f3717826760f19b4954d3bec Mon Sep 17 00:00:00 2001 From: claw Date: Wed, 13 May 2026 04:57:30 -0700 Subject: [PATCH 1/4] feat(gitea): harden GetPullRequestDiff against unbounded diff size Add a configurable MaxDiffSize field to Client that limits how much data GetPullRequestDiff will read into memory. The default is 10 MB (DefaultMaxDiffSize). When the diff exceeds the limit, ErrDiffTooLarge is returned, allowing callers to skip position translation gracefully. Implementation uses io.LimitReader to read maxBytes+1, detecting overflow without buffering the entire response. Setting MaxDiffSize to -1 disables the limit entirely. Closes #92 --- gitea/client.go | 109 +++++++++++++++++++++++++++++- gitea/diff_size_test.go | 143 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 gitea/diff_size_test.go diff --git a/gitea/client.go b/gitea/client.go index 55835ac..acd710c 100644 --- a/gitea/client.go +++ b/gitea/client.go @@ -47,6 +47,12 @@ func IsServerError(err error) bool { return errors.As(err, &apiErr) && apiErr.StatusCode >= 500 && apiErr.StatusCode < 600 } +// DefaultMaxDiffSize is the default maximum diff size in bytes (10 MB). +const DefaultMaxDiffSize = 10 * 1024 * 1024 + +// ErrDiffTooLarge is returned when a PR diff exceeds the configured MaxDiffSize. +var ErrDiffTooLarge = errors.New("diff size exceeds maximum allowed size") + // Client interacts with the Gitea API. // A Client is safe for concurrent use by multiple goroutines. type Client struct { @@ -61,6 +67,10 @@ type Client struct { // This field must be configured before the first request is made. // Modifying it while requests are in flight is not safe. RetryBackoff []time.Duration + + // MaxDiffSize is the maximum number of bytes allowed when fetching a PR diff. + // If zero, defaults to DefaultMaxDiffSize (10 MB). Set to -1 to disable the limit. + MaxDiffSize int64 } // NewClient creates a new Gitea API client. @@ -125,9 +135,26 @@ func (c *Client) GetPullRequest(ctx context.Context, owner, repo string, number } // GetPullRequestDiff fetches the unified diff for a PR. +// It enforces MaxDiffSize to prevent unbounded memory allocation. +// Returns ErrDiffTooLarge if the diff exceeds the configured limit. func (c *Client) GetPullRequestDiff(ctx context.Context, owner, repo string, number int) (string, error) { reqURL := fmt.Sprintf("%s/api/v1/repos/%s/%s/pulls/%d.diff", c.baseURL, url.PathEscape(owner), url.PathEscape(repo), number) - body, err := c.doGet(ctx, reqURL) + + maxSize := c.MaxDiffSize + if maxSize == 0 { + maxSize = DefaultMaxDiffSize + } + + // When the limit is disabled, use the standard doGet path. + if maxSize < 0 { + body, err := c.doGet(ctx, reqURL) + if err != nil { + return "", fmt.Errorf("fetch diff: %w", err) + } + return string(body), nil + } + + body, err := c.doGetLimited(ctx, reqURL, maxSize) if err != nil { return "", fmt.Errorf("fetch diff: %w", err) } @@ -413,6 +440,86 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { return nil, lastErr } +// doGetLimited performs an HTTP GET request with retry (like doGet) but enforces +// a maximum response body size. Returns ErrDiffTooLarge if the response exceeds +// maxBytes. It reads maxBytes+1 to detect overflow without buffering the entire body. +func (c *Client) doGetLimited(ctx context.Context, reqURL string, maxBytes int64) ([]byte, error) { + const maxAttempts = 3 + backoff := c.RetryBackoff + if backoff == nil { + backoff = []time.Duration{1 * time.Second, 2 * time.Second} + } + const maxErrorBodyBytes = 64 * 1024 + + var lastErr error + for attempt := 0; attempt < maxAttempts; attempt++ { + if attempt > 0 { + var delay time.Duration + if attempt-1 < len(backoff) { + delay = backoff[attempt-1] + } + if delay > 0 { + slog.Warn("retrying request after error", + "attempt", attempt+1, + "url", redactURL(reqURL), + "delay", delay.String(), + "lastError", sanitizeErrorForLog(lastErr)) + + timer := time.NewTimer(delay) + select { + case <-timer.C: + case <-ctx.Done(): + timer.Stop() + return nil, ctx.Err() + } + } + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Authorization", "token "+c.token) + + resp, err := c.http.Do(req) + if err != nil { + lastErr = err + if attempt < maxAttempts-1 && isTemporaryNetError(err) { + slog.Warn("temporary network error, will retry", + "attempt", attempt+1, + "url", redactURL(reqURL), + "error", err) + continue + } + return nil, lastErr + } + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + // Read up to maxBytes+1 to detect overflow. + limited := io.LimitReader(resp.Body, maxBytes+1) + body, err := io.ReadAll(limited) + resp.Body.Close() + if err != nil { + return nil, err + } + if int64(len(body)) > maxBytes { + return nil, fmt.Errorf("%w: response is larger than %d bytes", ErrDiffTooLarge, maxBytes) + } + return body, nil + } + + errBody, _ := io.ReadAll(io.LimitReader(resp.Body, maxErrorBodyBytes)) + resp.Body.Close() + + lastErr = &APIError{StatusCode: resp.StatusCode, Body: string(errBody)} + + if resp.StatusCode < 500 || resp.StatusCode >= 600 { + return nil, lastErr + } + } + + return nil, lastErr +} + // escapePath escapes each segment of a relative file path for use in URLs. // Slashes are preserved as path separators; other special characters are escaped. // Input should be a relative path (no leading slash). Already-encoded segments diff --git a/gitea/diff_size_test.go b/gitea/diff_size_test.go new file mode 100644 index 0000000..6601143 --- /dev/null +++ b/gitea/diff_size_test.go @@ -0,0 +1,143 @@ +package gitea + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +func TestGetPullRequestDiff_ExceedsMaxSize(t *testing.T) { + // Create a diff that exceeds a small limit + largeDiff := strings.Repeat("+ added line\n", 1000) // ~13 KB + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(largeDiff)) + })) + defer server.Close() + + client := NewClient(server.URL, "test-token") + client.MaxDiffSize = 100 // 100 bytes limit + client.RetryBackoff = []time.Duration{} // no delay in tests + + _, err := client.GetPullRequestDiff(context.Background(), "owner", "repo", 1) + if err == nil { + t.Fatal("expected error for oversized diff, got nil") + } + if !errors.Is(err, ErrDiffTooLarge) { + t.Errorf("expected ErrDiffTooLarge, got: %v", err) + } +} + +func TestGetPullRequestDiff_WithinMaxSize(t *testing.T) { + smallDiff := "diff --git a/f.go b/f.go\n--- a/f.go\n+++ b/f.go\n@@ -1 +1 @@\n-old\n+new\n" + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(smallDiff)) + })) + defer server.Close() + + client := NewClient(server.URL, "test-token") + client.MaxDiffSize = 1024 // 1 KB limit — more than enough + client.RetryBackoff = []time.Duration{} + + got, err := client.GetPullRequestDiff(context.Background(), "owner", "repo", 1) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != smallDiff { + t.Errorf("expected diff %q, got %q", smallDiff, got) + } +} + +func TestGetPullRequestDiff_ExactlyAtLimit(t *testing.T) { + // A diff that is exactly at the limit should succeed + exactDiff := strings.Repeat("x", 50) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(exactDiff)) + })) + defer server.Close() + + client := NewClient(server.URL, "test-token") + client.MaxDiffSize = 50 // exactly the size of the diff + client.RetryBackoff = []time.Duration{} + + got, err := client.GetPullRequestDiff(context.Background(), "owner", "repo", 1) + if err != nil { + t.Fatalf("unexpected error for diff at exact limit: %v", err) + } + if got != exactDiff { + t.Errorf("expected diff to match, got length %d", len(got)) + } +} + +func TestGetPullRequestDiff_OneByteOverLimit(t *testing.T) { + // A diff that is one byte over the limit should fail + overDiff := strings.Repeat("x", 51) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(overDiff)) + })) + defer server.Close() + + client := NewClient(server.URL, "test-token") + client.MaxDiffSize = 50 + client.RetryBackoff = []time.Duration{} + + _, err := client.GetPullRequestDiff(context.Background(), "owner", "repo", 1) + if err == nil { + t.Fatal("expected error for diff one byte over limit") + } + if !errors.Is(err, ErrDiffTooLarge) { + t.Errorf("expected ErrDiffTooLarge, got: %v", err) + } +} + +func TestGetPullRequestDiff_DisabledLimit(t *testing.T) { + // When MaxDiffSize is -1, no limit is enforced + largeDiff := strings.Repeat("x", 10000) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(largeDiff)) + })) + defer server.Close() + + client := NewClient(server.URL, "test-token") + client.MaxDiffSize = -1 // disabled + client.RetryBackoff = []time.Duration{} + + got, err := client.GetPullRequestDiff(context.Background(), "owner", "repo", 1) + if err != nil { + t.Fatalf("unexpected error with disabled limit: %v", err) + } + if got != largeDiff { + t.Errorf("expected full diff with disabled limit, got length %d", len(got)) + } +} + +func TestGetPullRequestDiff_DefaultLimit(t *testing.T) { + // With zero MaxDiffSize (default), should use DefaultMaxDiffSize. + // A small diff should succeed without setting MaxDiffSize. + smallDiff := "diff content" + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(smallDiff)) + })) + defer server.Close() + + client := NewClient(server.URL, "test-token") + // MaxDiffSize is zero (default) — should use DefaultMaxDiffSize (10 MB) + client.RetryBackoff = []time.Duration{} + + got, err := client.GetPullRequestDiff(context.Background(), "owner", "repo", 1) + if err != nil { + t.Fatalf("unexpected error with default limit: %v", err) + } + if got != smallDiff { + t.Errorf("expected diff %q, got %q", smallDiff, got) + } +} From 004343d05f138a48d08fc979295ecb3fff078428 Mon Sep 17 00:00:00 2001 From: claw Date: Wed, 13 May 2026 05:17:14 -0700 Subject: [PATCH 2/4] =?UTF-8?q?fix(gitea):=20address=20review=20findings?= =?UTF-8?q?=20=E2=80=94=20clamp=20overflow,=20clarify=20maxSize=20doc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Clamp maxBytes+1 to prevent integer overflow to negative when maxBytes == math.MaxInt64 (falls back to math.MaxInt64) - Update MaxDiffSize doc: 'any negative value' disables the limit, matching actual behavior of 'maxSize < 0' check --- gitea/client.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/gitea/client.go b/gitea/client.go index acd710c..bdc9a4a 100644 --- a/gitea/client.go +++ b/gitea/client.go @@ -11,6 +11,7 @@ import ( "fmt" "io" "log/slog" + "math" "net" "net/http" "net/url" @@ -69,7 +70,7 @@ type Client struct { RetryBackoff []time.Duration // MaxDiffSize is the maximum number of bytes allowed when fetching a PR diff. - // If zero, defaults to DefaultMaxDiffSize (10 MB). Set to -1 to disable the limit. + // If zero, defaults to DefaultMaxDiffSize (10 MB). Set to any negative value to disable the limit. MaxDiffSize int64 } @@ -442,7 +443,8 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { // doGetLimited performs an HTTP GET request with retry (like doGet) but enforces // a maximum response body size. Returns ErrDiffTooLarge if the response exceeds -// maxBytes. It reads maxBytes+1 to detect overflow without buffering the entire body. +// maxBytes. It reads maxBytes+1 (clamped to avoid overflow) to detect truncation +// without buffering the entire body. func (c *Client) doGetLimited(ctx context.Context, reqURL string, maxBytes int64) ([]byte, error) { const maxAttempts = 3 backoff := c.RetryBackoff @@ -495,7 +497,12 @@ func (c *Client) doGetLimited(ctx context.Context, reqURL string, maxBytes int64 } if resp.StatusCode >= 200 && resp.StatusCode < 300 { // Read up to maxBytes+1 to detect overflow. - limited := io.LimitReader(resp.Body, maxBytes+1) + // Clamp to prevent integer overflow when maxBytes == math.MaxInt64. + limitBytes := maxBytes + 1 + if limitBytes <= 0 { + limitBytes = math.MaxInt64 + } + limited := io.LimitReader(resp.Body, limitBytes) body, err := io.ReadAll(limited) resp.Body.Close() if err != nil { From 6ebf66aefbcb1905078a9cd8561a97e9f4d04e55 Mon Sep 17 00:00:00 2001 From: claw Date: Wed, 13 May 2026 05:23:42 -0700 Subject: [PATCH 3/4] fix(gitea): address review feedback on diff size limiting - Add concurrency safety note to MaxDiffSize field documentation, mirroring the existing note on RetryBackoff - Consolidate six individual test functions into a single table-driven test (TestGetPullRequestDiff_SizeLimits) reducing repetition - Add //nolint:errcheck annotation to test handler w.Write calls --- gitea/client.go | 3 + gitea/diff_size_test.go | 203 +++++++++++++++------------------------- 2 files changed, 78 insertions(+), 128 deletions(-) diff --git a/gitea/client.go b/gitea/client.go index bdc9a4a..a5391f8 100644 --- a/gitea/client.go +++ b/gitea/client.go @@ -71,6 +71,9 @@ type Client struct { // MaxDiffSize is the maximum number of bytes allowed when fetching a PR diff. // If zero, defaults to DefaultMaxDiffSize (10 MB). Set to any negative value to disable the limit. + // + // This field must be configured before the first request is made. + // Modifying it while requests are in flight is not safe. MaxDiffSize int64 } diff --git a/gitea/diff_size_test.go b/gitea/diff_size_test.go index 6601143..3657d2d 100644 --- a/gitea/diff_size_test.go +++ b/gitea/diff_size_test.go @@ -10,134 +10,81 @@ import ( "time" ) -func TestGetPullRequestDiff_ExceedsMaxSize(t *testing.T) { - // Create a diff that exceeds a small limit - largeDiff := strings.Repeat("+ added line\n", 1000) // ~13 KB - - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(largeDiff)) - })) - defer server.Close() - - client := NewClient(server.URL, "test-token") - client.MaxDiffSize = 100 // 100 bytes limit - client.RetryBackoff = []time.Duration{} // no delay in tests - - _, err := client.GetPullRequestDiff(context.Background(), "owner", "repo", 1) - if err == nil { - t.Fatal("expected error for oversized diff, got nil") +func TestGetPullRequestDiff_SizeLimits(t *testing.T) { + tests := []struct { + name string + diff string + maxDiffSize int64 + wantErr error + wantDiff string + }{ + { + name: "exceeds max size", + diff: strings.Repeat("+ added line\n", 1000), // ~13 KB + maxDiffSize: 100, + wantErr: ErrDiffTooLarge, + }, + { + name: "within max size", + diff: "diff --git a/f.go b/f.go\n--- a/f.go\n+++ b/f.go\n@@ -1 +1 @@\n-old\n+new\n", + maxDiffSize: 1024, + wantDiff: "diff --git a/f.go b/f.go\n--- a/f.go\n+++ b/f.go\n@@ -1 +1 @@\n-old\n+new\n", + }, + { + name: "exactly at limit", + diff: strings.Repeat("x", 50), + maxDiffSize: 50, + wantDiff: strings.Repeat("x", 50), + }, + { + name: "one byte over limit", + diff: strings.Repeat("x", 51), + maxDiffSize: 50, + wantErr: ErrDiffTooLarge, + }, + { + name: "disabled limit", + diff: strings.Repeat("x", 10000), + maxDiffSize: -1, + wantDiff: strings.Repeat("x", 10000), + }, + { + name: "default limit", + diff: "diff content", + maxDiffSize: 0, // zero means use DefaultMaxDiffSize + wantDiff: "diff content", + }, } - if !errors.Is(err, ErrDiffTooLarge) { - t.Errorf("expected ErrDiffTooLarge, got: %v", err) - } -} - -func TestGetPullRequestDiff_WithinMaxSize(t *testing.T) { - smallDiff := "diff --git a/f.go b/f.go\n--- a/f.go\n+++ b/f.go\n@@ -1 +1 @@\n-old\n+new\n" - - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(smallDiff)) - })) - defer server.Close() - - client := NewClient(server.URL, "test-token") - client.MaxDiffSize = 1024 // 1 KB limit — more than enough - client.RetryBackoff = []time.Duration{} - - got, err := client.GetPullRequestDiff(context.Background(), "owner", "repo", 1) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if got != smallDiff { - t.Errorf("expected diff %q, got %q", smallDiff, got) - } -} - -func TestGetPullRequestDiff_ExactlyAtLimit(t *testing.T) { - // A diff that is exactly at the limit should succeed - exactDiff := strings.Repeat("x", 50) - - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(exactDiff)) - })) - defer server.Close() - - client := NewClient(server.URL, "test-token") - client.MaxDiffSize = 50 // exactly the size of the diff - client.RetryBackoff = []time.Duration{} - - got, err := client.GetPullRequestDiff(context.Background(), "owner", "repo", 1) - if err != nil { - t.Fatalf("unexpected error for diff at exact limit: %v", err) - } - if got != exactDiff { - t.Errorf("expected diff to match, got length %d", len(got)) - } -} - -func TestGetPullRequestDiff_OneByteOverLimit(t *testing.T) { - // A diff that is one byte over the limit should fail - overDiff := strings.Repeat("x", 51) - - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(overDiff)) - })) - defer server.Close() - - client := NewClient(server.URL, "test-token") - client.MaxDiffSize = 50 - client.RetryBackoff = []time.Duration{} - - _, err := client.GetPullRequestDiff(context.Background(), "owner", "repo", 1) - if err == nil { - t.Fatal("expected error for diff one byte over limit") - } - if !errors.Is(err, ErrDiffTooLarge) { - t.Errorf("expected ErrDiffTooLarge, got: %v", err) - } -} - -func TestGetPullRequestDiff_DisabledLimit(t *testing.T) { - // When MaxDiffSize is -1, no limit is enforced - largeDiff := strings.Repeat("x", 10000) - - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(largeDiff)) - })) - defer server.Close() - - client := NewClient(server.URL, "test-token") - client.MaxDiffSize = -1 // disabled - client.RetryBackoff = []time.Duration{} - - got, err := client.GetPullRequestDiff(context.Background(), "owner", "repo", 1) - if err != nil { - t.Fatalf("unexpected error with disabled limit: %v", err) - } - if got != largeDiff { - t.Errorf("expected full diff with disabled limit, got length %d", len(got)) - } -} - -func TestGetPullRequestDiff_DefaultLimit(t *testing.T) { - // With zero MaxDiffSize (default), should use DefaultMaxDiffSize. - // A small diff should succeed without setting MaxDiffSize. - smallDiff := "diff content" - - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(smallDiff)) - })) - defer server.Close() - - client := NewClient(server.URL, "test-token") - // MaxDiffSize is zero (default) — should use DefaultMaxDiffSize (10 MB) - client.RetryBackoff = []time.Duration{} - - got, err := client.GetPullRequestDiff(context.Background(), "owner", "repo", 1) - if err != nil { - t.Fatalf("unexpected error with default limit: %v", err) - } - if got != smallDiff { - t.Errorf("expected diff %q, got %q", smallDiff, got) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(tt.diff)) //nolint:errcheck // test handler + })) + defer server.Close() + + client := NewClient(server.URL, "test-token") + client.MaxDiffSize = tt.maxDiffSize + client.RetryBackoff = []time.Duration{} + + got, err := client.GetPullRequestDiff(context.Background(), "owner", "repo", 1) + + if tt.wantErr != nil { + if err == nil { + t.Fatal("expected error, got nil") + } + if !errors.Is(err, tt.wantErr) { + t.Errorf("expected %v, got: %v", tt.wantErr, err) + } + return + } + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != tt.wantDiff { + t.Errorf("diff mismatch: got length %d, want length %d", len(got), len(tt.wantDiff)) + } + }) } } From 49d6ca77a386c31106676f112b6fdc23b88fb360 Mon Sep 17 00:00:00 2001 From: claw Date: Wed, 13 May 2026 05:40:49 -0700 Subject: [PATCH 4/4] refactor(gitea): eliminate retry duplication, harden redactURL and MaxInt64 edge case - Extract doGetWithReader to share retry/backoff logic between doGet and doGetLimited, eliminating ~60 lines of duplicated code (addresses MINOR finding from all reviewers). - redactURL now strips userinfo credentials (user:pass@host) in addition to query parameters (addresses security-review-bot finding). - GetPullRequestDiff treats MaxDiffSize == math.MaxInt64 as disabled, preventing the silent enforcement bypass where the overflow clamp makes the size check unreachable (addresses security-review-bot finding). - Improved error message wording: 'response exceeds N bytes' (NIT fix). --- gitea/client.go | 138 +++++++++++++--------------------------- gitea/client_test.go | 15 +++++ gitea/diff_size_test.go | 7 ++ 3 files changed, 66 insertions(+), 94 deletions(-) diff --git a/gitea/client.go b/gitea/client.go index a5391f8..32c3cae 100644 --- a/gitea/client.go +++ b/gitea/client.go @@ -70,7 +70,8 @@ type Client struct { RetryBackoff []time.Duration // MaxDiffSize is the maximum number of bytes allowed when fetching a PR diff. - // If zero, defaults to DefaultMaxDiffSize (10 MB). Set to any negative value to disable the limit. + // If zero, defaults to DefaultMaxDiffSize (10 MB). Set to any negative value + // (or math.MaxInt64) to disable the limit. // // This field must be configured before the first request is made. // Modifying it while requests are in flight is not safe. @@ -149,8 +150,10 @@ func (c *Client) GetPullRequestDiff(ctx context.Context, owner, repo string, num maxSize = DefaultMaxDiffSize } - // When the limit is disabled, use the standard doGet path. - if maxSize < 0 { + // When the limit is disabled (negative) or set to math.MaxInt64 (which + // would overflow the +1 detection and silently disable enforcement), + // use the standard unlimited doGet path. + if maxSize < 0 || maxSize == math.MaxInt64 { body, err := c.doGet(ctx, reqURL) if err != nil { return "", fmt.Errorf("fetch diff: %w", err) @@ -323,9 +326,9 @@ func isRetriableSyscallError(err error) bool { return true } -// redactURL strips query parameters from a URL for safe logging. -// This prevents accidental exposure of sensitive data that future callers -// might pass via query strings. +// redactURL strips query parameters and userinfo credentials from a URL for +// safe logging. This prevents accidental exposure of sensitive data (tokens in +// query strings, or user:pass in the authority) in log output. func redactURL(rawURL string) string { parsed, err := url.Parse(rawURL) if err != nil { @@ -333,6 +336,9 @@ func redactURL(rawURL string) string { // potentially logging something sensitive. return "[invalid URL]" } + if parsed.User != nil { + parsed.User = url.User("REDACTED") + } if parsed.RawQuery != "" { parsed.RawQuery = "[redacted]" } @@ -353,10 +359,12 @@ func sanitizeErrorForLog(err error) string { return err.Error() } -// doGet performs an HTTP GET request with retry on 5xx errors and temporary -// network errors. Retries up to 3 times with exponential backoff (1s, 2s delays -// by default; configurable via Client.RetryBackoff for testing). -func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { +// doGetWithReader performs an HTTP GET request with retry on 5xx errors and +// temporary network errors. Retries up to 3 times with exponential backoff +// (1s, 2s delays by default; configurable via Client.RetryBackoff for testing). +// The readBody function is called with the response body on success (2xx) and +// is responsible for reading and closing it. +func (c *Client) doGetWithReader(ctx context.Context, reqURL string, readBody func(io.ReadCloser) ([]byte, error)) ([]byte, error) { const maxAttempts = 3 // backoff[i] is the delay before attempt i+1 (i.e., after attempt i fails). // First attempt (i=0) has no delay; retries wait 1s then 2s by default. @@ -421,12 +429,7 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { return nil, lastErr } if resp.StatusCode >= 200 && resp.StatusCode < 300 { - body, err := io.ReadAll(resp.Body) - resp.Body.Close() - if err != nil { - return nil, err - } - return body, nil + return readBody(resp.Body) } // Error path: limit how much we read from potentially malicious server @@ -444,90 +447,37 @@ func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { return nil, lastErr } -// doGetLimited performs an HTTP GET request with retry (like doGet) but enforces -// a maximum response body size. Returns ErrDiffTooLarge if the response exceeds -// maxBytes. It reads maxBytes+1 (clamped to avoid overflow) to detect truncation -// without buffering the entire body. +// doGet performs an HTTP GET request with retry, reading the full response body. +func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) { + return c.doGetWithReader(ctx, reqURL, func(body io.ReadCloser) ([]byte, error) { + defer body.Close() + return io.ReadAll(body) + }) +} + +// doGetLimited performs an HTTP GET request with retry but enforces a maximum +// response body size. Returns ErrDiffTooLarge if the response exceeds maxBytes. +// It reads maxBytes+1 (clamped to avoid overflow) to detect truncation without +// buffering the entire body. func (c *Client) doGetLimited(ctx context.Context, reqURL string, maxBytes int64) ([]byte, error) { - const maxAttempts = 3 - backoff := c.RetryBackoff - if backoff == nil { - backoff = []time.Duration{1 * time.Second, 2 * time.Second} - } - const maxErrorBodyBytes = 64 * 1024 - - var lastErr error - for attempt := 0; attempt < maxAttempts; attempt++ { - if attempt > 0 { - var delay time.Duration - if attempt-1 < len(backoff) { - delay = backoff[attempt-1] - } - if delay > 0 { - slog.Warn("retrying request after error", - "attempt", attempt+1, - "url", redactURL(reqURL), - "delay", delay.String(), - "lastError", sanitizeErrorForLog(lastErr)) - - timer := time.NewTimer(delay) - select { - case <-timer.C: - case <-ctx.Done(): - timer.Stop() - return nil, ctx.Err() - } - } + return c.doGetWithReader(ctx, reqURL, func(body io.ReadCloser) ([]byte, error) { + defer body.Close() + // Read up to maxBytes+1 to detect overflow. + // Clamp to prevent integer overflow when maxBytes == math.MaxInt64. + limitBytes := maxBytes + 1 + if limitBytes <= 0 { + limitBytes = math.MaxInt64 } - - req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) + limited := io.LimitReader(body, limitBytes) + data, err := io.ReadAll(limited) if err != nil { return nil, err } - req.Header.Set("Authorization", "token "+c.token) - - resp, err := c.http.Do(req) - if err != nil { - lastErr = err - if attempt < maxAttempts-1 && isTemporaryNetError(err) { - slog.Warn("temporary network error, will retry", - "attempt", attempt+1, - "url", redactURL(reqURL), - "error", err) - continue - } - return nil, lastErr + if int64(len(data)) > maxBytes { + return nil, fmt.Errorf("%w: response exceeds %d bytes", ErrDiffTooLarge, maxBytes) } - if resp.StatusCode >= 200 && resp.StatusCode < 300 { - // Read up to maxBytes+1 to detect overflow. - // Clamp to prevent integer overflow when maxBytes == math.MaxInt64. - limitBytes := maxBytes + 1 - if limitBytes <= 0 { - limitBytes = math.MaxInt64 - } - limited := io.LimitReader(resp.Body, limitBytes) - body, err := io.ReadAll(limited) - resp.Body.Close() - if err != nil { - return nil, err - } - if int64(len(body)) > maxBytes { - return nil, fmt.Errorf("%w: response is larger than %d bytes", ErrDiffTooLarge, maxBytes) - } - return body, nil - } - - errBody, _ := io.ReadAll(io.LimitReader(resp.Body, maxErrorBodyBytes)) - resp.Body.Close() - - lastErr = &APIError{StatusCode: resp.StatusCode, Body: string(errBody)} - - if resp.StatusCode < 500 || resp.StatusCode >= 600 { - return nil, lastErr - } - } - - return nil, lastErr + return data, nil + }) } // escapePath escapes each segment of a relative file path for use in URLs. diff --git a/gitea/client_test.go b/gitea/client_test.go index 2637c2e..eec0b7d 100644 --- a/gitea/client_test.go +++ b/gitea/client_test.go @@ -1092,6 +1092,21 @@ func TestRedactURL(t *testing.T) { input: "", want: "", }, + { + name: "with userinfo - redacts credentials", + input: "https://admin:secret@gitea.example.com/api/v1/repos", + want: "https://REDACTED@gitea.example.com/api/v1/repos", + }, + { + name: "with userinfo and query params", + input: "https://user:pass@example.com/path?token=abc", + want: "https://REDACTED@example.com/path?[redacted]", + }, + { + name: "username only - no password", + input: "https://user@example.com/path", + want: "https://REDACTED@example.com/path", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/gitea/diff_size_test.go b/gitea/diff_size_test.go index 3657d2d..005f87c 100644 --- a/gitea/diff_size_test.go +++ b/gitea/diff_size_test.go @@ -3,6 +3,7 @@ package gitea import ( "context" "errors" + "math" "net/http" "net/http/httptest" "strings" @@ -48,6 +49,12 @@ func TestGetPullRequestDiff_SizeLimits(t *testing.T) { maxDiffSize: -1, wantDiff: strings.Repeat("x", 10000), }, + { + name: "math.MaxInt64 treated as disabled", + diff: strings.Repeat("x", 10000), + maxDiffSize: math.MaxInt64, + wantDiff: strings.Repeat("x", 10000), + }, { name: "default limit", diff: "diff content",