feat(github): implement PRReader + FileReader client (#80) #93
@@ -41,6 +41,9 @@ func (e *APIError) Error() string {
|
||||
if len(body) > 200 {
|
||||
body = body[:200] + "...(truncated)"
|
||||
}
|
||||
// Sanitize newlines to prevent log injection from upstream response bodies.
|
||||
|
|
||||
body = strings.ReplaceAll(body, "\n", " ")
|
||||
body = strings.ReplaceAll(body, "\r", " ")
|
||||
return fmt.Sprintf("HTTP %d: %s", e.StatusCode, body)
|
||||
}
|
||||
|
||||
@@ -104,15 +107,21 @@ type Client struct {
|
||||
}
|
||||
|
[MINOR] The CheckRedirect handler allows following cross-host and HTTPS→HTTP redirects (while stripping Authorization). Although the token isn’t leaked, following cross-host redirects may contact untrusted hosts. Consider restricting redirects to same-host and HTTPS-only or fail on cross-host redirects to reduce SSRF-style risks. **[MINOR]** The CheckRedirect handler allows following cross-host and HTTPS→HTTP redirects (while stripping Authorization). Although the token isn’t leaked, following cross-host redirects may contact untrusted hosts. Consider restricting redirects to same-host and HTTPS-only or fail on cross-host redirects to reduce SSRF-style risks.
|
||||
|
||||
// defaultCheckRedirect is the redirect policy used by NewClient and SetHTTPClient(nil).
|
||||
|
[MINOR] defaultCheckRedirect allows cross-host redirects (with Authorization stripped). Although token leakage is mitigated, following cross-host redirects can facilitate SSRF-like behavior if baseURL is misconfigured or points to a compromised server. Consider rejecting cross-host redirects by default or enforcing an allowlist of trusted hosts. **[MINOR]** defaultCheckRedirect allows cross-host redirects (with Authorization stripped). Although token leakage is mitigated, following cross-host redirects can facilitate SSRF-like behavior if baseURL is misconfigured or points to a compromised server. Consider rejecting cross-host redirects by default or enforcing an allowlist of trusted hosts.
|
||||
// It strips the Authorization header on cross-host redirects or protocol downgrades
|
||||
// (HTTPS→HTTP) to prevent credential leakage, while still following the redirect.
|
||||
// It rejects HTTPS→HTTP protocol downgrades (to prevent plaintext leakage) and strips
|
||||
|
gpt-review-bot
commented
[MINOR] Consider setting a User-Agent header on all requests. GitHub recommends identifying clients, and some enterprise installations enforce it. Add req.Header.Set("User-Agent", "review-bot/1.0") or similar. **[MINOR]** Consider setting a User-Agent header on all requests. GitHub recommends identifying clients, and some enterprise installations enforce it. Add req.Header.Set("User-Agent", "review-bot/1.0") or similar.
|
||||
// the Authorization header on cross-host redirects to prevent credential leakage to
|
||||
// third-party hosts (e.g. CDN redirects from GitHub).
|
||||
|
gpt-review-bot
commented
[MINOR] SetHTTPClient allows setting a nil *http.Client; subsequent use (c.httpClient.Do) would panic. Either guard against nil (return error or restore default client) or document that nil is invalid. **[MINOR]** SetHTTPClient allows setting a nil *http.Client; subsequent use (c.httpClient.Do) would panic. Either guard against nil (return error or restore default client) or document that nil is invalid.
|
||||
func defaultCheckRedirect(req *http.Request, via []*http.Request) error {
|
||||
if len(via) >= 10 {
|
||||
return fmt.Errorf("stopped after 10 redirects")
|
||||
|
[MINOR] defaultCheckRedirect follows HTTPS→HTTP redirects after stripping Authorization. While credentials are protected, this still permits plaintext requests to proceed, which can leak metadata and expands attack surface if a misconfigured or compromised server issues such redirects. Prefer failing closed on protocol downgrades. **[MINOR]** defaultCheckRedirect follows HTTPS→HTTP redirects after stripping Authorization. While credentials are protected, this still permits plaintext requests to proceed, which can leak metadata and expands attack surface if a misconfigured or compromised server issues such redirects. Prefer failing closed on protocol downgrades.
|
||||
}
|
||||
|
gpt-review-bot
commented
[MINOR] Authorization header is always set to "Bearer "+token even when token is empty. Consider only setting the header when token is non-empty to avoid sending an empty bearer token on unauthenticated requests. **[MINOR]** Authorization header is always set to "Bearer "+token even when token is empty. Consider only setting the header when token is non-empty to avoid sending an empty bearer token on unauthenticated requests.
gpt-review-bot
commented
[NIT] GitHub classic PATs typically use the "token" scheme while fine-grained tokens use "Bearer". If supporting both is desired, consider documenting or adapting the auth scheme based on token type. **[NIT]** GitHub classic PATs typically use the "token" scheme while fine-grained tokens use "Bearer". If supporting both is desired, consider documenting or adapting the auth scheme based on token type.
sonnet-review-bot
commented
[MINOR] The doc comment on **[MINOR]** The doc comment on `defaultCheckRedirect` says it "strips the Authorization header on cross-host redirects or protocol downgrades (HTTPS→HTTP) to prevent credential leakage, while still following the redirect." However, a protocol downgrade from HTTPS to HTTP is a genuine security issue — stripping the header and still following is debatable. Consider returning an error on HTTPS→HTTP downgrade rather than silently following. This is a design choice that has security implications, not a bug per se, but worth flagging.
gpt-review-bot
commented
[MINOR] defaultCheckRedirect indexes via[len(via)-1] without guarding for len(via) == 0. net/http currently guarantees at least one prior request in via, but adding a len(via) check would make this more robust against misuse. **[MINOR]** defaultCheckRedirect indexes via[len(via)-1] without guarding for len(via) == 0. net/http currently guarantees at least one prior request in via, but adding a len(via) check would make this more robust against misuse.
|
||||
// Strip Authorization on cross-host redirect or protocol downgrade (https→http).
|
||||
prev := via[len(via)-1]
|
||||
if req.URL.Host != prev.URL.Host || (prev.URL.Scheme == "https" && req.URL.Scheme == "http") {
|
||||
// Reject protocol downgrade: HTTPS→HTTP leaks request metadata over plaintext.
|
||||
|
sonnet-review-bot
commented
[NIT] The concurrency safety doc comment says "SetHTTPClient and SetRetryBackoff must not be called concurrently with requests" but these are exported methods with no enforcement (no mutex, no atomic). This is an acceptable design for a test-support method, but the comment could be stronger: e.g., "These methods are intended for test setup only and must be called before any goroutines issue requests." **[NIT]** The concurrency safety doc comment says "SetHTTPClient and SetRetryBackoff must not be called concurrently with requests" but these are exported methods with no enforcement (no mutex, no atomic). This is an acceptable design for a test-support method, but the comment could be stronger: e.g., "These methods are intended for test setup only and must be called before any goroutines issue requests."
|
||||
if prev.URL.Scheme == "https" && req.URL.Scheme == "http" {
|
||||
return fmt.Errorf("refusing redirect from HTTPS to HTTP (%s → %s)", prev.URL.Host, req.URL.Host)
|
||||
}
|
||||
|
gpt-review-bot
commented
[NIT] Parameter name "url" in doRequest shadows the common concept of URLs and can reduce readability given other files import net/url. Consider renaming the parameter to reqURL for clarity. **[NIT]** Parameter name "url" in doRequest shadows the common concept of URLs and can reduce readability given other files import net/url. Consider renaming the parameter to reqURL for clarity.
[MINOR] Redirects to different hosts or to HTTP are allowed (Authorization is stripped), which can lead to consuming responses from untrusted or downgraded endpoints. While token leakage is prevented, consider rejecting cross-host redirects and HTTPS→HTTP downgrades entirely to avoid integrity/confidentiality risks. **[MINOR]** Redirects to different hosts or to HTTP are allowed (Authorization is stripped), which can lead to consuming responses from untrusted or downgraded endpoints. While token leakage is prevented, consider rejecting cross-host redirects and HTTPS→HTTP downgrades entirely to avoid integrity/confidentiality risks.
|
||||
// Strip Authorization on cross-host redirect to avoid leaking credentials
|
||||
// to third-party hosts (GitHub legitimately redirects to CDN hosts).
|
||||
if req.URL.Host != prev.URL.Host {
|
||||
req.Header.Del("Authorization")
|
||||
}
|
||||
|
gpt-review-bot
commented
[MINOR] SetHTTPClient(nil) restores a basic http.Client with only Timeout set and drops the custom CheckRedirect policy configured in NewClient. This diverges from the initial default behavior and could unintentionally allow auth headers on redirects. Either preserve the CheckRedirect behavior when resetting or adjust the comment to clarify the change. **[MINOR]** SetHTTPClient(nil) restores a basic http.Client with only Timeout set and drops the custom CheckRedirect policy configured in NewClient. This diverges from the initial default behavior and could unintentionally allow auth headers on redirects. Either preserve the CheckRedirect behavior when resetting or adjust the comment to clarify the change.
[MINOR] SetHTTPClient accepts an arbitrary *http.Client and does not enforce the safe redirect policy. If a caller supplies a client without a CheckRedirect that strips Authorization on cross-host or downgrade redirects, credentials could leak during redirects. **[MINOR]** SetHTTPClient accepts an arbitrary *http.Client and does not enforce the safe redirect policy. If a caller supplies a client without a CheckRedirect that strips Authorization on cross-host or downgrade redirects, credentials could leak during redirects.
|
||||
return nil
|
||||
@@ -242,12 +251,12 @@ func (c *Client) doRequest(ctx context.Context, method, reqURL string, accept st
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, maxResponseBytes))
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, maxResponseBytes+1))
|
||||
resp.Body.Close()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read response body: %w", err)
|
||||
}
|
||||
if len(body) >= maxResponseBytes {
|
||||
if len(body) > maxResponseBytes {
|
||||
|
sonnet-review-bot
commented
[MINOR] The **[MINOR]** The `int64` cast in `if int64(len(body)) >= maxResponseBytes` is unnecessary since `maxResponseBytes` is an untyped constant and `len()` returns `int`. Both sides of the comparison are `int`-typed. The cast is harmless but adds noise.
|
||||
return nil, fmt.Errorf("response body exceeded %d bytes (truncated)", maxResponseBytes)
|
||||
}
|
||||
return body, nil
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -185,6 +186,17 @@ func TestIsUnauthorized(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestAPIError_SanitizesNewlines(t *testing.T) {
|
||||
err := &APIError{StatusCode: 500, Body: "line1\ninjected\rmore"}
|
||||
msg := err.Error()
|
||||
if strings.Contains(msg, "\n") || strings.Contains(msg, "\r") {
|
||||
t.Errorf("expected newlines to be sanitized, got: %q", msg)
|
||||
}
|
||||
if !strings.Contains(msg, "line1 injected more") {
|
||||
t.Errorf("expected sanitized body, got: %q", msg)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoRequest_429RetryAfterHeader(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping slow retry test in short mode")
|
||||
@@ -264,6 +276,9 @@ func TestDoRequest_RetryAfterDoesNotMutateBackoff(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestDoRequest_429RetryAfterHTTPDate(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping slow Retry-After HTTP-date test in short mode")
|
||||
}
|
||||
attempts := 0
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
attempts++
|
||||
|
sonnet-review-bot
commented
[NIT] **[NIT]** `TestDoRequest_LimitsResponseBody` tests a constant value rather than actual behavior. The comment acknowledges this limitation. This is acceptable as a documentation-style test, but it adds no real safety guarantee — if `maxResponseBytes` is set correctly but the `io.LimitReader` call is removed, the test would still pass. Consider removing it or replacing with a test that actually sends a response exceeding the limit.
|
||||
@@ -379,6 +394,26 @@ func TestDoRequest_LimitsResponseBody(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoRequest_AcceptsExactlyAtLimit(t *testing.T) {
|
||||
// A response body exactly equal to maxResponseBytes should succeed (not error).
|
||||
exactBody := strings.Repeat("x", maxResponseBytes)
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(200)
|
||||
w.Write([]byte(exactBody))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := NewClient("token", srv.URL, AllowInsecureHTTP())
|
||||
c.SetHTTPClient(srv.Client())
|
||||
body, err := c.doGet(context.Background(), srv.URL+"/test")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error for exactly-at-limit body: %v", err)
|
||||
}
|
||||
if len(body) != maxResponseBytes {
|
||||
t.Errorf("expected body length %d, got %d", maxResponseBytes, len(body))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoRequest_SkipsAuthWhenTokenEmpty(t *testing.T) {
|
||||
var gotAuth string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -405,6 +440,51 @@ func TestNewClient_CheckRedirectStripsAuthOnCrossHost(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultCheckRedirect_RejectsHTTPSToHTTP(t *testing.T) {
|
||||
prev := &http.Request{URL: &url.URL{Scheme: "https", Host: "api.github.com", Path: "/foo"}}
|
||||
req := &http.Request{
|
||||
URL: &url.URL{Scheme: "http", Host: "api.github.com", Path: "/foo"},
|
||||
Header: http.Header{"Authorization": []string{"Bearer token"}},
|
||||
}
|
||||
err := defaultCheckRedirect(req, []*http.Request{prev})
|
||||
if err == nil {
|
||||
t.Fatal("expected error on HTTPS→HTTP redirect")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "refusing redirect from HTTPS to HTTP") {
|
||||
t.Errorf("unexpected error message: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultCheckRedirect_StripsAuthOnCrossHost(t *testing.T) {
|
||||
prev := &http.Request{URL: &url.URL{Scheme: "https", Host: "api.github.com", Path: "/foo"}}
|
||||
req := &http.Request{
|
||||
URL: &url.URL{Scheme: "https", Host: "objects.githubusercontent.com", Path: "/bar"},
|
||||
Header: http.Header{"Authorization": []string{"Bearer token"}},
|
||||
}
|
||||
err := defaultCheckRedirect(req, []*http.Request{prev})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if auth := req.Header.Get("Authorization"); auth != "" {
|
||||
t.Errorf("expected Authorization header to be stripped, got %q", auth)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultCheckRedirect_PreservesAuthOnSameHost(t *testing.T) {
|
||||
prev := &http.Request{URL: &url.URL{Scheme: "https", Host: "api.github.com", Path: "/foo"}}
|
||||
req := &http.Request{
|
||||
URL: &url.URL{Scheme: "https", Host: "api.github.com", Path: "/bar"},
|
||||
Header: http.Header{"Authorization": []string{"Bearer token"}},
|
||||
}
|
||||
err := defaultCheckRedirect(req, []*http.Request{prev})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if auth := req.Header.Get("Authorization"); auth != "Bearer token" {
|
||||
t.Errorf("expected Authorization to be preserved, got %q", auth)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoRequest_RejectsHTTPWithToken(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(200)
|
||||
|
||||
@@ -158,6 +158,7 @@ func (c *Client) GetFileContentAtRef(ctx context.Context, owner, repo, path, ref
|
||||
|
||||
// GetCommitStatuses fetches both commit statuses and check runs for a SHA,
|
||||
// merging them into a unified []vcs.CommitStatus slice.
|
||||
// Returns nil (not an empty slice) when there are no statuses or check runs.
|
||||
// If the commit statuses endpoint fails (e.g. 404 for an unknown SHA), the
|
||||
// function returns immediately without attempting the check-runs endpoint.
|
||||
func (c *Client) GetCommitStatuses(ctx context.Context, owner, repo, sha string) ([]vcs.CommitStatus, error) {
|
||||
|
||||
[MINOR] APIError.Error includes up to 200 bytes of upstream response body in the error string. If these errors are logged, this can leak sensitive details from upstream or allow log injection (newlines) if baseURL points to an untrusted endpoint. Consider sanitizing newlines and/or omitting body content from the error string.