feat(github): implement PRReader + FileReader client (#80) #93
@@ -1,6 +1,6 @@
|
||||
// Package github provides a client for the GitHub API.
|
||||
// It supports pull request operations, file content retrieval,
|
||||
// and review submission for both github.com and GitHub Enterprise.
|
||||
// It supports pull request operations, file content retrieval, CI status checks,
|
||||
// and directory listing for both github.com and GitHub Enterprise.
|
||||
package github
|
||||
|
||||
import (
|
||||
@@ -15,6 +15,10 @@ import (
|
||||
)
|
||||
|
||||
const defaultBaseURL = "https://api.github.com"
|
||||
const userAgent = "review-bot/1.0"
|
||||
|
|
||||
|
||||
// maxResponseBytes limits successful response body reads to 10 MiB.
|
||||
const maxResponseBytes = 10 * 1024 * 1024
|
||||
|
||||
// APIError represents an HTTP error response from the GitHub API.
|
||||
// It carries the status code so callers can distinguish between
|
||||
@@ -82,7 +86,19 @@ func NewClient(token, baseURL string) *Client {
|
||||
return &Client{
|
||||
|
sonnet-review-bot
commented
[MINOR] The **[MINOR]** The `SetHTTPClient` method is documented as 'intended for testing to inject mock transports', making it a test-only escape hatch on the public API. Per the package design pattern for `internal/` packages, test-only hooks that are not part of the intended public contract ideally belong in `export_test.go` or a test helper. However, since this is a new package and the project may not yet have an `export_test.go` pattern established, this is a minor concern.
|
||||
baseURL: strings.TrimRight(baseURL, "/"),
|
||||
token: token,
|
||||
http: &http.Client{Timeout: 30 * time.Second},
|
||||
http: &http.Client{
|
||||
|
[MINOR] NewClient accepts any baseURL without enforcing HTTPS or validating against a trusted allowlist. If a misconfiguration allows an attacker-controlled baseURL, the client could send the Authorization token to an untrusted host or over plaintext HTTP. **[MINOR]** NewClient accepts any baseURL without enforcing HTTPS or validating against a trusted allowlist. If a misconfiguration allows an attacker-controlled baseURL, the client could send the Authorization token to an untrusted host or over plaintext HTTP.
|
||||
Timeout: 30 * time.Second,
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
// Prevent forwarding Authorization header to different hosts on redirect.
|
||||
if len(via) > 0 && req.URL.Host != via[0].URL.Host {
|
||||
req.Header.Del("Authorization")
|
||||
|
sonnet-review-bot
commented
[NIT] The **[NIT]** The `SetHTTPClient` and `SetRetryBackoff` methods are exported as test-injection points but documented as unsafe for concurrent use with requests. The convention in the patterns (style.md) suggests using unexported helpers or the functional options pattern for test injection. The current design works but exposes mutation methods on what is documented as a concurrent-safe type. A `//nolint` or explicit doc note would make the intent clearer.
|
||||
}
|
||||
if len(via) >= 10 {
|
||||
return fmt.Errorf("stopped after 10 redirects")
|
||||
}
|
||||
return nil
|
||||
|
[MINOR] CheckRedirect strips Authorization on cross-host or https→http redirects but still follows the redirect. Following cross-host redirects can be an SSRF vector in misconfigured environments; consider blocking cross-host redirects entirely rather than proceeding without Authorization. **[MINOR]** CheckRedirect strips Authorization on cross-host or https→http redirects but still follows the redirect. Following cross-host redirects can be an SSRF vector in misconfigured environments; consider blocking cross-host redirects entirely rather than proceeding without Authorization.
|
||||
},
|
||||
},
|
||||
|
gpt-review-bot
commented
[NIT] The parameter name 'url' in doRequest shadows the conceptual URL type used elsewhere (e.g., net/url in other files). Renaming to 'reqURL' could improve clarity, though this is purely stylistic. **[NIT]** The parameter name 'url' in doRequest shadows the conceptual URL type used elsewhere (e.g., net/url in other files). Renaming to 'reqURL' could improve clarity, though this is purely stylistic.
|
||||
}
|
||||
}
|
||||
|
||||
|
sonnet-review-bot
commented
[NIT] The doc comment for **[NIT]** The doc comment for `Client` says `SetHTTPClient` and `SetRetryBackoff` must not be called concurrently with requests, but these are public methods and there's no enforcement or `noCopy` guard. Given the concurrent-use note, a brief comment in `SetHTTPClient` and `SetRetryBackoff` reiterating the constraint would help (though this matches the stdlib's `tls.Config` immutable-after-use convention, so it's acceptable as-is).
|
||||
@@ -94,6 +110,7 @@ func (c *Client) SetHTTPClient(hc *http.Client) {
|
||||
|
||||
|
gpt-review-bot
commented
[MINOR] Consider setting a User-Agent header on all requests. GitHub recommends identifying clients, and some enterprise installations enforce it. Add req.Header.Set("User-Agent", "review-bot/1.0") or similar. **[MINOR]** Consider setting a User-Agent header on all requests. GitHub recommends identifying clients, and some enterprise installations enforce it. Add req.Header.Set("User-Agent", "review-bot/1.0") or similar.
|
||||
// doRequest performs an HTTP request with retry on 429 rate limit responses.
|
||||
// It respects the Retry-After header when present (capped at maxRetryAfter).
|
||||
|
gpt-review-bot
commented
[MINOR] SetHTTPClient allows setting a nil *http.Client; subsequent use (c.httpClient.Do) would panic. Either guard against nil (return error or restore default client) or document that nil is invalid. **[MINOR]** SetHTTPClient allows setting a nil *http.Client; subsequent use (c.httpClient.Do) would panic. Either guard against nil (return error or restore default client) or document that nil is invalid.
|
||||
// Transport errors (network failures, context cancellation) are not retried.
|
||||
func (c *Client) doRequest(ctx context.Context, method, url string, accept string) ([]byte, error) {
|
||||
const maxAttempts = 3
|
||||
|
[MINOR] defaultCheckRedirect follows HTTPS→HTTP redirects after stripping Authorization. While credentials are protected, this still permits plaintext requests to proceed, which can leak metadata and expands attack surface if a misconfigured or compromised server issues such redirects. Prefer failing closed on protocol downgrades. **[MINOR]** defaultCheckRedirect follows HTTPS→HTTP redirects after stripping Authorization. While credentials are protected, this still permits plaintext requests to proceed, which can leak metadata and expands attack surface if a misconfigured or compromised server issues such redirects. Prefer failing closed on protocol downgrades.
|
||||
const maxRetryAfter = 120 * time.Second
|
||||
|
gpt-review-bot
commented
[MINOR] Authorization header is always set to "Bearer "+token even when token is empty. Consider only setting the header when token is non-empty to avoid sending an empty bearer token on unauthenticated requests. **[MINOR]** Authorization header is always set to "Bearer "+token even when token is empty. Consider only setting the header when token is non-empty to avoid sending an empty bearer token on unauthenticated requests.
gpt-review-bot
commented
[NIT] GitHub classic PATs typically use the "token" scheme while fine-grained tokens use "Bearer". If supporting both is desired, consider documenting or adapting the auth scheme based on token type. **[NIT]** GitHub classic PATs typically use the "token" scheme while fine-grained tokens use "Bearer". If supporting both is desired, consider documenting or adapting the auth scheme based on token type.
sonnet-review-bot
commented
[MINOR] The doc comment on **[MINOR]** The doc comment on `defaultCheckRedirect` says it "strips the Authorization header on cross-host redirects or protocol downgrades (HTTPS→HTTP) to prevent credential leakage, while still following the redirect." However, a protocol downgrade from HTTPS to HTTP is a genuine security issue — stripping the header and still following is debatable. Consider returning an error on HTTPS→HTTP downgrade rather than silently following. This is a design choice that has security implications, not a bug per se, but worth flagging.
gpt-review-bot
commented
[MINOR] defaultCheckRedirect indexes via[len(via)-1] without guarding for len(via) == 0. net/http currently guarantees at least one prior request in via, but adding a len(via) check would make this more robust against misuse. **[MINOR]** defaultCheckRedirect indexes via[len(via)-1] without guarding for len(via) == 0. net/http currently guarantees at least one prior request in via, but adding a len(via) check would make this more robust against misuse.
|
||||
@@ -133,6 +150,7 @@ func (c *Client) doRequest(ctx context.Context, method, url string, accept strin
|
||||
if c.token != "" {
|
||||
|
sonnet-review-bot
commented
[MINOR] The CheckRedirect lambda is duplicated verbatim in both NewClient and SetHTTPClient(nil). Extract it to a package-level function (e.g., **[MINOR]** The CheckRedirect lambda is duplicated verbatim in both NewClient and SetHTTPClient(nil). Extract it to a package-level function (e.g., `defaultCheckRedirect`) to eliminate the duplication and ensure both code paths stay in sync when the policy changes.
|
||||
req.Header.Set("Authorization", "Bearer "+c.token)
|
||||
|
gpt-review-bot
commented
[NIT] Retry-After parsing only handles delta-seconds via Atoi. RFC 7231 allows an HTTP-date format; optionally support parsing HTTP-date to fully respect server guidance. **[NIT]** Retry-After parsing only handles delta-seconds via Atoi. RFC 7231 allows an HTTP-date format; optionally support parsing HTTP-date to fully respect server guidance.
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
sonnet-review-bot
commented
[MINOR] When **[MINOR]** When `c.http.Do(req)` returns an error (network failure, context cancellation), the function returns immediately without retrying. For transient network errors, a retry could be valuable. The current behavior is reasonable for the stated scope (only retry on 429), but the comment says 'It respects the Retry-After header when present' without mentioning the no-retry-on-transport-error behavior. This is a documentation gap rather than a bug.
|
||||
if accept != "" {
|
||||
req.Header.Set("Accept", accept)
|
||||
|
sonnet-review-bot
commented
[MINOR] The security check **[MINOR]** The security check `if !c.allowInsecureHTTP && req.URL.Scheme != "https"` is performed inside the retry loop, meaning it will fail on every retry attempt rather than being checked once before the loop starts. Since the URL doesn't change between retries, this is wasteful and the error message is slightly misleading (it mentions `req.URL.Host` but the real issue is the scheme). Moving the check before the retry loop or to `NewClient` would be cleaner.
|
||||
} else {
|
||||
@@ -145,7 +163,7 @@ func (c *Client) doRequest(ctx context.Context, method, url string, accept strin
|
||||
}
|
||||
|
sonnet-review-bot
commented
[MINOR] The Retry-After handling mutates the **[MINOR]** The Retry-After handling mutates the `backoff` slice in-place: `backoff[attempt] = time.Duration(seconds) * time.Second`. When `c.RetryBackoff` is non-nil (e.g. in tests), this modifies the caller's slice, which is surprising and could cause test pollution if the same slice is reused. A local copy should be made before mutation, or the mutation should only apply to the local `backoff` variable (which it does when `RetryBackoff` is nil since a new slice is allocated, but not when it's non-nil).
|
||||
|
||||
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, maxResponseBytes))
|
||||
resp.Body.Close()
|
||||
if err != nil {
|
||||
|
sonnet-review-bot
commented
[MINOR] After a successful response is read, **[MINOR]** After a successful response is read, `resp.Body.Close()` is called directly after `io.ReadAll`. If `io.ReadAll` returns an error (e.g. partial read), the body is still closed via the subsequent line, which is fine. However, the pattern is slightly inconsistent with the error path below it — consider using `defer resp.Body.Close()` paired with a drain before close on the error path for symmetry. This is purely stylistic; the current approach is correct.
|
||||
return nil, fmt.Errorf("read response body: %w", err)
|
||||
|
||||
@@ -261,3 +261,56 @@ func TestDoRequest_RetryAfterDoesNotMutateBackoff(t *testing.T) {
|
||||
t.Errorf("RetryBackoff[1] was mutated: got %v, want 1ms", c.RetryBackoff[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoRequest_SetsUserAgentHeader(t *testing.T) {
|
||||
var gotUA string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
gotUA = r.Header.Get("User-Agent")
|
||||
w.WriteHeader(200)
|
||||
w.Write([]byte("{}"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := NewClient("token", srv.URL)
|
||||
c.SetHTTPClient(srv.Client())
|
||||
_, _ = c.doGet(context.Background(), srv.URL+"/test")
|
||||
|
||||
if gotUA != "review-bot/1.0" {
|
||||
t.Errorf("expected User-Agent 'review-bot/1.0', got %q", gotUA)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoRequest_LimitsResponseBody(t *testing.T) {
|
||||
// Verify that responses are read through a limit reader.
|
||||
|
sonnet-review-bot
commented
[NIT] **[NIT]** `TestDoRequest_LimitsResponseBody` tests a constant value rather than actual behavior. The comment acknowledges this limitation. This is acceptable as a documentation-style test, but it adds no real safety guarantee — if `maxResponseBytes` is set correctly but the `io.LimitReader` call is removed, the test would still pass. Consider removing it or replacing with a test that actually sends a response exceeding the limit.
|
||||
// We can't easily test the 10 MiB limit without OOM risk,
|
||||
// but we verify the constant is set correctly.
|
||||
if maxResponseBytes != 10*1024*1024 {
|
||||
t.Errorf("expected maxResponseBytes = 10 MiB, got %d", maxResponseBytes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoRequest_SkipsAuthWhenTokenEmpty(t *testing.T) {
|
||||
var gotAuth string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
gotAuth = r.Header.Get("Authorization")
|
||||
|
sonnet-review-bot
commented
[NIT] **[NIT]** `TestDoRequest_LimitsResponseBody` doesn't actually test the limit behavior — it only checks the constant value. The comment acknowledges this. This is a weak test; consider removing it or replacing it with an actual test using a small limit (e.g., setting `maxResponseBytes` to a small value via a test helper, or accepting that this particular boundary isn't testable without refactoring).
|
||||
w.WriteHeader(200)
|
||||
w.Write([]byte("{}"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := NewClient("", srv.URL) // empty token
|
||||
c.SetHTTPClient(srv.Client())
|
||||
_, _ = c.doGet(context.Background(), srv.URL+"/test")
|
||||
|
||||
if gotAuth != "" {
|
||||
t.Errorf("expected no Authorization header with empty token, got %q", gotAuth)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewClient_CheckRedirectStripsAuthOnCrossHost(t *testing.T) {
|
||||
// Verify the CheckRedirect function is configured
|
||||
c := NewClient("secret-token", "https://api.github.com")
|
||||
if c.http.CheckRedirect == nil {
|
||||
t.Fatal("expected CheckRedirect to be set")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,10 +61,10 @@ func escapePath(p string) string {
|
||||
}
|
||||
|
gpt-review-bot
commented
[MINOR] decodeBase64Content strips only '\n'. GitHub or proxies may use CRLF line breaks. Consider removing both '\r' and '\n' (or all whitespace) before decoding, or use base64.NewDecoder which tolerates newlines. **[MINOR]** decodeBase64Content strips only '\n'. GitHub or proxies may use CRLF line breaks. Consider removing both '\r' and '\n' (or all whitespace) before decoding, or use base64.NewDecoder which tolerates newlines.
|
||||
|
||||
// decodeBase64Content decodes base64-encoded content from the GitHub contents API.
|
||||
|
gpt-review-bot
commented
[NIT] escapePath silently removes dot segments ("." and ".."), which may surprise callers if their requested path is altered without error. The comment notes this intent; consider documenting this behavior in the public method docs (e.g., ListContents/GetFileContentAtRef) or returning an explicit error when dot-segments are provided. **[NIT]** escapePath silently removes dot segments ("." and ".."), which may surprise callers if their requested path is altered without error. The comment notes this intent; consider documenting this behavior in the public method docs (e.g., ListContents/GetFileContentAtRef) or returning an explicit error when dot-segments are provided.
|
||||
// GitHub returns base64 content with newlines for formatting, which we strip before decoding.
|
||||
// GitHub returns base64 content with line breaks for formatting; we strip \r and \n before decoding.
|
||||
|
sonnet-review-bot
commented
[NIT] The **[NIT]** The `escapePath` function removes dot-segments silently. The doc comment acknowledges this and explains it's intentional, which is good. But the test case `{"../etc/passwd", "etc/passwd"}` documents that a path traversal attempt is silently resolved to `etc/passwd` rather than returning an error. Depending on threat model, callers may want to know the path was modified. Since this is intentional and documented, this is a NIT-level observation for a future design consideration.
|
||||
func decodeBase64Content(encoded string) (string, error) {
|
||||
// GitHub inserts newlines in base64 content
|
||||
cleaned := strings.ReplaceAll(encoded, "\n", "")
|
||||
cleaned := strings.NewReplacer("\n", "", "\r", "").Replace(encoded)
|
||||
decoded, err := base64.StdEncoding.DecodeString(cleaned)
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
||||
@@ -295,3 +295,15 @@ func TestEscapePath_RejectsDotSegments(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeBase64Content_CRLF(t *testing.T) {
|
||||
// Base64 of "hello world" with CRLF line breaks inserted
|
||||
encoded := "aGVs\r\nbG8g\r\nd29y\r\nbGQ="
|
||||
decoded, err := decodeBase64Content(encoded)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if decoded != "hello world" {
|
||||
t.Errorf("expected 'hello world', got %q", decoded)
|
||||
}
|
||||
}
|
||||
|
||||
[NIT] Three separate
constdeclarations could be grouped into a singleconst (...)block per the style pattern, though this is a pure style nit and not a correctness issue.