feat(github): implement PRReader + FileReader client (#80) #93
@@ -14,11 +14,13 @@ import (
|
||||
"time"
|
||||
|
|
||||
)
|
||||
|
||||
const defaultBaseURL = "https://api.github.com"
|
||||
const userAgent = "review-bot/1.0"
|
||||
const (
|
||||
defaultBaseURL = "https://api.github.com"
|
||||
userAgent = "review-bot/1.0"
|
||||
|
||||
// maxResponseBytes limits successful response body reads to 10 MiB.
|
||||
const maxResponseBytes = 10 * 1024 * 1024
|
||||
// maxResponseBytes limits successful response body reads to 10 MiB.
|
||||
maxResponseBytes = 10 * 1024 * 1024
|
||||
)
|
||||
|
||||
// APIError represents an HTTP error response from the GitHub API.
|
||||
// It carries the status code so callers can distinguish between
|
||||
@@ -68,12 +70,12 @@ func asAPIError(err error) (*APIError, bool) {
|
||||
type Client struct {
|
||||
baseURL string
|
||||
token string
|
||||
http *http.Client
|
||||
httpClient *http.Client
|
||||
|
||||
// RetryBackoff defines the delays between retry attempts for 429 responses.
|
||||
// RetryBackoff[i] is the delay before attempt i+1 (after attempt i fails).
|
||||
// If nil, defaults to {1s, 2s}. Set to shorter durations in tests.
|
||||
RetryBackoff []time.Duration
|
||||
// retryBackoff defines the delays between retry attempts for 429 responses.
|
||||
// retryBackoff[i] is the delay before attempt i+1 (after attempt i fails).
|
||||
// If nil, defaults to {1s, 2s}. Set to shorter durations in tests via SetRetryBackoff.
|
||||
retryBackoff []time.Duration
|
||||
|
[MINOR] AllowInsecureHTTP option permits sending credentials over HTTP when enabled. Although documented for trusted/internal use, accidental enablement in production would expose tokens over cleartext. Consider additional safeguards (e.g., explicit environment gate or failing fast unless a dedicated test flag is present). **[MINOR]** AllowInsecureHTTP option permits sending credentials over HTTP when enabled. Although documented for trusted/internal use, accidental enablement in production would expose tokens over cleartext. Consider additional safeguards (e.g., explicit environment gate or failing fast unless a dedicated test flag is present).
|
||||
}
|
||||
|
||||
// NewClient creates a new GitHub API client.
|
||||
@@ -86,16 +88,17 @@ func NewClient(token, baseURL string) *Client {
|
||||
return &Client{
|
||||
baseURL: strings.TrimRight(baseURL, "/"),
|
||||
token: token,
|
||||
http: &http.Client{
|
||||
httpClient: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
// Prevent forwarding Authorization header to different hosts on redirect.
|
||||
if len(via) > 0 && req.URL.Host != via[0].URL.Host {
|
||||
req.Header.Del("Authorization")
|
||||
}
|
||||
if len(via) >= 10 {
|
||||
return fmt.Errorf("stopped after 10 redirects")
|
||||
}
|
||||
// Strip Authorization on cross-host redirect or protocol downgrade (https→http).
|
||||
prev := via[len(via)-1]
|
||||
if req.URL.Host != prev.URL.Host || (prev.URL.Scheme == "https" && req.URL.Scheme == "http") {
|
||||
req.Header.Del("Authorization")
|
||||
}
|
||||
return nil
|
||||
},
|
||||
},
|
||||
@@ -105,7 +108,13 @@ func NewClient(token, baseURL string) *Client {
|
||||
// SetHTTPClient sets the underlying HTTP client used for requests.
|
||||
// This is intended for testing to inject mock transports.
|
||||
|
[MINOR] defaultCheckRedirect allows cross-host redirects (with Authorization stripped). Although token leakage is mitigated, following cross-host redirects can facilitate SSRF-like behavior if baseURL is misconfigured or points to a compromised server. Consider rejecting cross-host redirects by default or enforcing an allowlist of trusted hosts. **[MINOR]** defaultCheckRedirect allows cross-host redirects (with Authorization stripped). Although token leakage is mitigated, following cross-host redirects can facilitate SSRF-like behavior if baseURL is misconfigured or points to a compromised server. Consider rejecting cross-host redirects by default or enforcing an allowlist of trusted hosts.
|
||||
func (c *Client) SetHTTPClient(hc *http.Client) {
|
||||
c.http = hc
|
||||
c.httpClient = hc
|
||||
}
|
||||
|
||||
// SetRetryBackoff configures the retry backoff durations for testing.
|
||||
// In production the default {1s, 2s} applies.
|
||||
|
[MINOR] defaultCheckRedirect follows HTTPS→HTTP redirects after stripping Authorization. While credentials are protected, this still permits plaintext requests to proceed, which can leak metadata and expands attack surface if a misconfigured or compromised server issues such redirects. Prefer failing closed on protocol downgrades. **[MINOR]** defaultCheckRedirect follows HTTPS→HTTP redirects after stripping Authorization. While credentials are protected, this still permits plaintext requests to proceed, which can leak metadata and expands attack surface if a misconfigured or compromised server issues such redirects. Prefer failing closed on protocol downgrades.
|
||||
func (c *Client) SetRetryBackoff(d []time.Duration) {
|
||||
|
sonnet-review-bot
commented
[MINOR] The doc comment on **[MINOR]** The doc comment on `defaultCheckRedirect` says it "strips the Authorization header on cross-host redirects or protocol downgrades (HTTPS→HTTP) to prevent credential leakage, while still following the redirect." However, a protocol downgrade from HTTPS to HTTP is a genuine security issue — stripping the header and still following is debatable. Consider returning an error on HTTPS→HTTP downgrade rather than silently following. This is a design choice that has security implications, not a bug per se, but worth flagging.
gpt-review-bot
commented
[MINOR] defaultCheckRedirect indexes via[len(via)-1] without guarding for len(via) == 0. net/http currently guarantees at least one prior request in via, but adding a len(via) check would make this more robust against misuse. **[MINOR]** defaultCheckRedirect indexes via[len(via)-1] without guarding for len(via) == 0. net/http currently guarantees at least one prior request in via, but adding a len(via) check would make this more robust against misuse.
|
||||
c.retryBackoff = d
|
||||
}
|
||||
|
||||
// doRequest performs an HTTP request with retry on 429 rate limit responses.
|
||||
@@ -116,9 +125,9 @@ func (c *Client) doRequest(ctx context.Context, method, url string, accept strin
|
||||
const maxRetryAfter = 120 * time.Second
|
||||
|
||||
var backoff []time.Duration
|
||||
if c.RetryBackoff != nil {
|
||||
backoff = make([]time.Duration, len(c.RetryBackoff))
|
||||
copy(backoff, c.RetryBackoff)
|
||||
if c.retryBackoff != nil {
|
||||
backoff = make([]time.Duration, len(c.retryBackoff))
|
||||
copy(backoff, c.retryBackoff)
|
||||
} else {
|
||||
backoff = []time.Duration{1 * time.Second, 2 * time.Second}
|
||||
}
|
||||
@@ -157,7 +166,7 @@ func (c *Client) doRequest(ctx context.Context, method, url string, accept strin
|
||||
req.Header.Set("Accept", "application/vnd.github+json")
|
||||
}
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("do request: %w", err)
|
||||
}
|
||||
|
||||
@@ -81,7 +81,7 @@ func TestDoRequest_429Retry(t *testing.T) {
|
||||
|
||||
c := NewClient("token", srv.URL)
|
||||
c.SetHTTPClient(srv.Client())
|
||||
c.RetryBackoff = []time.Duration{10 * time.Millisecond, 10 * time.Millisecond}
|
||||
c.SetRetryBackoff([]time.Duration{10 * time.Millisecond, 10 * time.Millisecond})
|
||||
|
||||
body, err := c.doGet(context.Background(), srv.URL+"/test")
|
||||
if err != nil {
|
||||
@@ -106,7 +106,7 @@ func TestDoRequest_429ExhaustsRetries(t *testing.T) {
|
||||
|
||||
c := NewClient("token", srv.URL)
|
||||
c.SetHTTPClient(srv.Client())
|
||||
c.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond}
|
||||
c.SetRetryBackoff([]time.Duration{1 * time.Millisecond, 1 * time.Millisecond})
|
||||
|
||||
_, err := c.doGet(context.Background(), srv.URL+"/test")
|
||||
if err == nil {
|
||||
@@ -205,7 +205,7 @@ func TestDoRequest_429RetryAfterHeader(t *testing.T) {
|
||||
c := NewClient("token", srv.URL)
|
||||
c.SetHTTPClient(srv.Client())
|
||||
// Use short backoff; Retry-After should override
|
||||
c.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond}
|
||||
c.SetRetryBackoff([]time.Duration{1 * time.Millisecond, 1 * time.Millisecond})
|
||||
|
||||
start := time.Now()
|
||||
body, err := c.doGet(context.Background(), srv.URL+"/test")
|
||||
@@ -246,19 +246,19 @@ func TestDoRequest_RetryAfterDoesNotMutateBackoff(t *testing.T) {
|
||||
|
||||
c := NewClient("token", srv.URL)
|
||||
c.SetHTTPClient(srv.Client())
|
||||
c.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond}
|
||||
c.SetRetryBackoff([]time.Duration{1 * time.Millisecond, 1 * time.Millisecond})
|
||||
|
||||
_, err := c.doGet(context.Background(), srv.URL+"/test")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
// Verify the original RetryBackoff slice was not mutated
|
||||
if c.RetryBackoff[0] != 1*time.Millisecond {
|
||||
t.Errorf("RetryBackoff[0] was mutated: got %v, want 1ms", c.RetryBackoff[0])
|
||||
// Verify the original retryBackoff slice was not mutated
|
||||
if c.retryBackoff[0] != 1*time.Millisecond {
|
||||
t.Errorf("retryBackoff[0] was mutated: got %v, want 1ms", c.retryBackoff[0])
|
||||
}
|
||||
if c.RetryBackoff[1] != 1*time.Millisecond {
|
||||
t.Errorf("RetryBackoff[1] was mutated: got %v, want 1ms", c.RetryBackoff[1])
|
||||
if c.retryBackoff[1] != 1*time.Millisecond {
|
||||
t.Errorf("retryBackoff[1] was mutated: got %v, want 1ms", c.retryBackoff[1])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -310,7 +310,7 @@ func TestDoRequest_SkipsAuthWhenTokenEmpty(t *testing.T) {
|
||||
func TestNewClient_CheckRedirectStripsAuthOnCrossHost(t *testing.T) {
|
||||
// Verify the CheckRedirect function is configured
|
||||
c := NewClient("secret-token", "https://api.github.com")
|
||||
if c.http.CheckRedirect == nil {
|
||||
if c.httpClient.CheckRedirect == nil {
|
||||
t.Fatal("expected CheckRedirect to be set")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -109,7 +109,7 @@ func TestGetFileContent_429Retry(t *testing.T) {
|
||||
|
||||
c := NewClient("token", srv.URL)
|
||||
c.SetHTTPClient(srv.Client())
|
||||
c.RetryBackoff = []time.Duration{1 * time.Millisecond}
|
||||
c.SetRetryBackoff([]time.Duration{1 * time.Millisecond})
|
||||
|
||||
content, err := c.GetFileContent(context.Background(), "owner", "repo", "file.go", "")
|
||||
if err != nil {
|
||||
@@ -227,7 +227,7 @@ func TestListContents_429Retry(t *testing.T) {
|
||||
|
||||
c := NewClient("token", srv.URL)
|
||||
c.SetHTTPClient(srv.Client())
|
||||
c.RetryBackoff = []time.Duration{1 * time.Millisecond}
|
||||
c.SetRetryBackoff([]time.Duration{1 * time.Millisecond})
|
||||
|
||||
entries, err := c.ListContents(context.Background(), "owner", "repo", ".")
|
||||
if err != nil {
|
||||
|
||||
@@ -84,13 +84,16 @@ func (c *Client) GetPullRequestDiff(ctx context.Context, owner, repo string, num
|
||||
return string(body), nil
|
||||
}
|
||||
|
||||
// maxPages is the upper bound on pagination loops to prevent unbounded iteration
|
||||
// in case the server returns a full page indefinitely.
|
||||
|
sonnet-review-bot
commented
[MINOR] The **[MINOR]** The `maxPages = 100` constant caps pagination at 100 pages × 100 files = 10,000 files for PRs, and 100 pages × 100 check runs = 10,000 check runs. This constant is shared between two very different concerns (PR files and check runs). A PR with 10,000 files is pathological but possible in generated-code repos; silently truncating without returning an error or warning could cause incorrect reviews. Consider either documenting this limit explicitly or returning an error when the cap is hit.
|
||||
const maxPages = 100
|
||||
|
||||
// GetPullRequestFiles fetches the list of files changed in a PR.
|
||||
// Paginates through all pages (100 per page) to collect all files.
|
||||
func (c *Client) GetPullRequestFiles(ctx context.Context, owner, repo string, number int) ([]vcs.ChangedFile, error) {
|
||||
var allFiles []vcs.ChangedFile
|
||||
page := 1
|
||||
|
||||
for {
|
||||
for page := 1; page <= maxPages; page++ {
|
||||
reqURL := fmt.Sprintf("%s/repos/%s/%s/pulls/%d/files?per_page=100&page=%d",
|
||||
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), number, page)
|
||||
body, err := c.doGet(ctx, reqURL)
|
||||
@@ -114,7 +117,6 @@ func (c *Client) GetPullRequestFiles(ctx context.Context, owner, repo string, nu
|
||||
if len(files) < 100 {
|
||||
break
|
||||
}
|
||||
page++
|
||||
}
|
||||
|
||||
return allFiles, nil
|
||||
@@ -175,8 +177,7 @@ func (c *Client) GetCommitStatuses(ctx context.Context, owner, repo, sha string)
|
||||
}
|
||||
|
||||
// Fetch check runs (paginated)
|
||||
checkPage := 1
|
||||
for {
|
||||
for checkPage := 1; checkPage <= maxPages; checkPage++ {
|
||||
checkURL := fmt.Sprintf("%s/repos/%s/%s/commits/%s/check-runs?per_page=100&page=%d",
|
||||
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), url.PathEscape(sha), checkPage)
|
||||
checkBody, err := c.doGet(ctx, checkURL)
|
||||
@@ -198,13 +199,15 @@ func (c *Client) GetCommitStatuses(ctx context.Context, owner, repo, sha string)
|
||||
if len(checkResp.CheckRuns) < 100 {
|
||||
break
|
||||
}
|
||||
checkPage++
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
|
sonnet-review-bot
commented
[NIT] In **[NIT]** In `GetCommitStatuses`, the check run pagination loop uses `checkPage` as the variable name while the PR files loop uses `page`. Naming is internally consistent within each function, but the inconsistency between the two sibling pagination loops is a minor style nit.
|
||||
// mapCheckRunStatus maps a check run conclusion+status to a vcs.CommitStatus status string.
|
||||
// mapCheckRunStatus maps a check run conclusion to a vcs.CommitStatus status string.
|
||||
// The second parameter (check run status field, e.g. "completed", "in_progress") is
|
||||
// unused because conclusion alone determines the mapped state: nil conclusion means
|
||||
// the run is still in progress (pending), regardless of the status field value.
|
||||
func mapCheckRunStatus(conclusion *string, _ string) string {
|
||||
if conclusion == nil {
|
||||
// Still running or queued
|
||||
|
||||
@@ -112,7 +112,7 @@ func TestGetPullRequest_429Retry(t *testing.T) {
|
||||
|
||||
c := NewClient("token", srv.URL)
|
||||
c.SetHTTPClient(srv.Client())
|
||||
c.RetryBackoff = []time.Duration{1 * time.Millisecond}
|
||||
c.SetRetryBackoff([]time.Duration{1 * time.Millisecond})
|
||||
|
||||
pr, err := c.GetPullRequest(context.Background(), "owner", "repo", 1)
|
||||
if err != nil {
|
||||
@@ -447,7 +447,7 @@ func TestGetFileContentAtRef_429Retry(t *testing.T) {
|
||||
|
||||
c := NewClient("token", srv.URL)
|
||||
c.SetHTTPClient(srv.Client())
|
||||
c.RetryBackoff = []time.Duration{1 * time.Millisecond}
|
||||
c.SetRetryBackoff([]time.Duration{1 * time.Millisecond})
|
||||
|
||||
content, err := c.GetFileContentAtRef(context.Background(), "owner", "repo", "file.go", "main")
|
||||
if err != nil {
|
||||
|
||||
[NIT] Package comment mentions "review submission" but this PR doesn't include review endpoints. Consider updating the comment or adding a TODO/ref to avoid misleading users.