feat(github): implement PRReader + FileReader client (#80) #93

Closed
rodin wants to merge 16 commits from review-bot-issue-80 into feature/github-support
5 changed files with 52 additions and 40 deletions
Showing only changes of commit ae91c8aef5 - Show all commits
+28 -19
View File
@@ -14,11 +14,13 @@ import (
"time" "time"
Review

[NIT] Package comment mentions "review submission" but this PR doesn't include review endpoints. Consider updating the comment or adding a TODO/ref to avoid misleading users.

**[NIT]** Package comment mentions "review submission" but this PR doesn't include review endpoints. Consider updating the comment or adding a TODO/ref to avoid misleading users.
) )
const defaultBaseURL = "https://api.github.com" const (
const userAgent = "review-bot/1.0" defaultBaseURL = "https://api.github.com"
userAgent = "review-bot/1.0"
// maxResponseBytes limits successful response body reads to 10 MiB. // maxResponseBytes limits successful response body reads to 10 MiB.
const maxResponseBytes = 10 * 1024 * 1024 maxResponseBytes = 10 * 1024 * 1024
)
// APIError represents an HTTP error response from the GitHub API. // APIError represents an HTTP error response from the GitHub API.
// It carries the status code so callers can distinguish between // It carries the status code so callers can distinguish between
8
@@ -68,12 +70,12 @@ func asAPIError(err error) (*APIError, bool) {
type Client struct { type Client struct {
baseURL string baseURL string
token string token string
http *http.Client httpClient *http.Client
// RetryBackoff defines the delays between retry attempts for 429 responses. // retryBackoff defines the delays between retry attempts for 429 responses.
// RetryBackoff[i] is the delay before attempt i+1 (after attempt i fails). // retryBackoff[i] is the delay before attempt i+1 (after attempt i fails).
// If nil, defaults to {1s, 2s}. Set to shorter durations in tests. // If nil, defaults to {1s, 2s}. Set to shorter durations in tests via SetRetryBackoff.
RetryBackoff []time.Duration retryBackoff []time.Duration
Review

[MINOR] AllowInsecureHTTP option permits sending credentials over HTTP when enabled. Although documented for trusted/internal use, accidental enablement in production would expose tokens over cleartext. Consider additional safeguards (e.g., explicit environment gate or failing fast unless a dedicated test flag is present).

**[MINOR]** AllowInsecureHTTP option permits sending credentials over HTTP when enabled. Although documented for trusted/internal use, accidental enablement in production would expose tokens over cleartext. Consider additional safeguards (e.g., explicit environment gate or failing fast unless a dedicated test flag is present).
} }
// NewClient creates a new GitHub API client. // NewClient creates a new GitHub API client.
@@ -86,16 +88,17 @@ func NewClient(token, baseURL string) *Client {
return &Client{ return &Client{
baseURL: strings.TrimRight(baseURL, "/"), baseURL: strings.TrimRight(baseURL, "/"),
token: token, token: token,
http: &http.Client{ httpClient: &http.Client{
Timeout: 30 * time.Second, Timeout: 30 * time.Second,
CheckRedirect: func(req *http.Request, via []*http.Request) error { CheckRedirect: func(req *http.Request, via []*http.Request) error {
// Prevent forwarding Authorization header to different hosts on redirect.
if len(via) > 0 && req.URL.Host != via[0].URL.Host {
req.Header.Del("Authorization")
}
if len(via) >= 10 { if len(via) >= 10 {
return fmt.Errorf("stopped after 10 redirects") return fmt.Errorf("stopped after 10 redirects")
} }
// Strip Authorization on cross-host redirect or protocol downgrade (https→http).
prev := via[len(via)-1]
if req.URL.Host != prev.URL.Host || (prev.URL.Scheme == "https" && req.URL.Scheme == "http") {
req.Header.Del("Authorization")
}
return nil return nil
}, },
}, },
@@ -105,7 +108,13 @@ func NewClient(token, baseURL string) *Client {
// SetHTTPClient sets the underlying HTTP client used for requests. // SetHTTPClient sets the underlying HTTP client used for requests.
// This is intended for testing to inject mock transports. // This is intended for testing to inject mock transports.
Review

[MINOR] defaultCheckRedirect allows cross-host redirects (with Authorization stripped). Although token leakage is mitigated, following cross-host redirects can facilitate SSRF-like behavior if baseURL is misconfigured or points to a compromised server. Consider rejecting cross-host redirects by default or enforcing an allowlist of trusted hosts.

**[MINOR]** defaultCheckRedirect allows cross-host redirects (with Authorization stripped). Although token leakage is mitigated, following cross-host redirects can facilitate SSRF-like behavior if baseURL is misconfigured or points to a compromised server. Consider rejecting cross-host redirects by default or enforcing an allowlist of trusted hosts.
func (c *Client) SetHTTPClient(hc *http.Client) { func (c *Client) SetHTTPClient(hc *http.Client) {
c.http = hc c.httpClient = hc
}
// SetRetryBackoff configures the retry backoff durations for testing.
// In production the default {1s, 2s} applies.
Review

[MINOR] defaultCheckRedirect follows HTTPS→HTTP redirects after stripping Authorization. While credentials are protected, this still permits plaintext requests to proceed, which can leak metadata and expands attack surface if a misconfigured or compromised server issues such redirects. Prefer failing closed on protocol downgrades.

**[MINOR]** defaultCheckRedirect follows HTTPS→HTTP redirects after stripping Authorization. While credentials are protected, this still permits plaintext requests to proceed, which can leak metadata and expands attack surface if a misconfigured or compromised server issues such redirects. Prefer failing closed on protocol downgrades.
func (c *Client) SetRetryBackoff(d []time.Duration) {
Review

[MINOR] The doc comment on defaultCheckRedirect says it "strips the Authorization header on cross-host redirects or protocol downgrades (HTTPS→HTTP) to prevent credential leakage, while still following the redirect." However, a protocol downgrade from HTTPS to HTTP is a genuine security issue — stripping the header and still following is debatable. Consider returning an error on HTTPS→HTTP downgrade rather than silently following. This is a design choice that has security implications, not a bug per se, but worth flagging.

**[MINOR]** The doc comment on `defaultCheckRedirect` says it "strips the Authorization header on cross-host redirects or protocol downgrades (HTTPS→HTTP) to prevent credential leakage, while still following the redirect." However, a protocol downgrade from HTTPS to HTTP is a genuine security issue — stripping the header and still following is debatable. Consider returning an error on HTTPS→HTTP downgrade rather than silently following. This is a design choice that has security implications, not a bug per se, but worth flagging.
Review

[MINOR] defaultCheckRedirect indexes via[len(via)-1] without guarding for len(via) == 0. net/http currently guarantees at least one prior request in via, but adding a len(via) check would make this more robust against misuse.

**[MINOR]** defaultCheckRedirect indexes via[len(via)-1] without guarding for len(via) == 0. net/http currently guarantees at least one prior request in via, but adding a len(via) check would make this more robust against misuse.
c.retryBackoff = d
} }
// doRequest performs an HTTP request with retry on 429 rate limit responses. // doRequest performs an HTTP request with retry on 429 rate limit responses.
@@ -116,9 +125,9 @@ func (c *Client) doRequest(ctx context.Context, method, url string, accept strin
const maxRetryAfter = 120 * time.Second const maxRetryAfter = 120 * time.Second
var backoff []time.Duration var backoff []time.Duration
if c.RetryBackoff != nil { if c.retryBackoff != nil {
backoff = make([]time.Duration, len(c.RetryBackoff)) backoff = make([]time.Duration, len(c.retryBackoff))
copy(backoff, c.RetryBackoff) copy(backoff, c.retryBackoff)
} else { } else {
backoff = []time.Duration{1 * time.Second, 2 * time.Second} backoff = []time.Duration{1 * time.Second, 2 * time.Second}
} }
1
@@ -157,7 +166,7 @@ func (c *Client) doRequest(ctx context.Context, method, url string, accept strin
req.Header.Set("Accept", "application/vnd.github+json") req.Header.Set("Accept", "application/vnd.github+json")
} }
resp, err := c.http.Do(req) resp, err := c.httpClient.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("do request: %w", err) return nil, fmt.Errorf("do request: %w", err)
} }
1
+10 -10
View File
@@ -81,7 +81,7 @@ func TestDoRequest_429Retry(t *testing.T) {
c := NewClient("token", srv.URL) c := NewClient("token", srv.URL)
c.SetHTTPClient(srv.Client()) c.SetHTTPClient(srv.Client())
c.RetryBackoff = []time.Duration{10 * time.Millisecond, 10 * time.Millisecond} c.SetRetryBackoff([]time.Duration{10 * time.Millisecond, 10 * time.Millisecond})
body, err := c.doGet(context.Background(), srv.URL+"/test") body, err := c.doGet(context.Background(), srv.URL+"/test")
if err != nil { if err != nil {
@@ -106,7 +106,7 @@ func TestDoRequest_429ExhaustsRetries(t *testing.T) {
c := NewClient("token", srv.URL) c := NewClient("token", srv.URL)
c.SetHTTPClient(srv.Client()) c.SetHTTPClient(srv.Client())
c.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond} c.SetRetryBackoff([]time.Duration{1 * time.Millisecond, 1 * time.Millisecond})
_, err := c.doGet(context.Background(), srv.URL+"/test") _, err := c.doGet(context.Background(), srv.URL+"/test")
if err == nil { if err == nil {
3
@@ -205,7 +205,7 @@ func TestDoRequest_429RetryAfterHeader(t *testing.T) {
c := NewClient("token", srv.URL) c := NewClient("token", srv.URL)
c.SetHTTPClient(srv.Client()) c.SetHTTPClient(srv.Client())
// Use short backoff; Retry-After should override // Use short backoff; Retry-After should override
c.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond} c.SetRetryBackoff([]time.Duration{1 * time.Millisecond, 1 * time.Millisecond})
start := time.Now() start := time.Now()
body, err := c.doGet(context.Background(), srv.URL+"/test") body, err := c.doGet(context.Background(), srv.URL+"/test")
@@ -246,19 +246,19 @@ func TestDoRequest_RetryAfterDoesNotMutateBackoff(t *testing.T) {
c := NewClient("token", srv.URL) c := NewClient("token", srv.URL)
c.SetHTTPClient(srv.Client()) c.SetHTTPClient(srv.Client())
c.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond} c.SetRetryBackoff([]time.Duration{1 * time.Millisecond, 1 * time.Millisecond})
_, err := c.doGet(context.Background(), srv.URL+"/test") _, err := c.doGet(context.Background(), srv.URL+"/test")
if err != nil { if err != nil {
t.Fatalf("unexpected error: %v", err) t.Fatalf("unexpected error: %v", err)
} }
// Verify the original RetryBackoff slice was not mutated // Verify the original retryBackoff slice was not mutated
if c.RetryBackoff[0] != 1*time.Millisecond { if c.retryBackoff[0] != 1*time.Millisecond {
t.Errorf("RetryBackoff[0] was mutated: got %v, want 1ms", c.RetryBackoff[0]) t.Errorf("retryBackoff[0] was mutated: got %v, want 1ms", c.retryBackoff[0])
} }
if c.RetryBackoff[1] != 1*time.Millisecond { if c.retryBackoff[1] != 1*time.Millisecond {
t.Errorf("RetryBackoff[1] was mutated: got %v, want 1ms", c.RetryBackoff[1]) t.Errorf("retryBackoff[1] was mutated: got %v, want 1ms", c.retryBackoff[1])
} }
} }
@@ -310,7 +310,7 @@ func TestDoRequest_SkipsAuthWhenTokenEmpty(t *testing.T) {
func TestNewClient_CheckRedirectStripsAuthOnCrossHost(t *testing.T) { func TestNewClient_CheckRedirectStripsAuthOnCrossHost(t *testing.T) {
// Verify the CheckRedirect function is configured // Verify the CheckRedirect function is configured
c := NewClient("secret-token", "https://api.github.com") c := NewClient("secret-token", "https://api.github.com")
if c.http.CheckRedirect == nil { if c.httpClient.CheckRedirect == nil {
t.Fatal("expected CheckRedirect to be set") t.Fatal("expected CheckRedirect to be set")
} }
} }
+2 -2
View File
@@ -109,7 +109,7 @@ func TestGetFileContent_429Retry(t *testing.T) {
c := NewClient("token", srv.URL) c := NewClient("token", srv.URL)
c.SetHTTPClient(srv.Client()) c.SetHTTPClient(srv.Client())
c.RetryBackoff = []time.Duration{1 * time.Millisecond} c.SetRetryBackoff([]time.Duration{1 * time.Millisecond})
content, err := c.GetFileContent(context.Background(), "owner", "repo", "file.go", "") content, err := c.GetFileContent(context.Background(), "owner", "repo", "file.go", "")
if err != nil { if err != nil {
@@ -227,7 +227,7 @@ func TestListContents_429Retry(t *testing.T) {
c := NewClient("token", srv.URL) c := NewClient("token", srv.URL)
c.SetHTTPClient(srv.Client()) c.SetHTTPClient(srv.Client())
c.RetryBackoff = []time.Duration{1 * time.Millisecond} c.SetRetryBackoff([]time.Duration{1 * time.Millisecond})
entries, err := c.ListContents(context.Background(), "owner", "repo", ".") entries, err := c.ListContents(context.Background(), "owner", "repo", ".")
if err != nil { if err != nil {
+10 -7
View File
5
@@ -84,13 +84,16 @@ func (c *Client) GetPullRequestDiff(ctx context.Context, owner, repo string, num
return string(body), nil return string(body), nil
} }
// maxPages is the upper bound on pagination loops to prevent unbounded iteration
// in case the server returns a full page indefinitely.
Review

[MINOR] The maxPages = 100 constant caps pagination at 100 pages × 100 files = 10,000 files for PRs, and 100 pages × 100 check runs = 10,000 check runs. This constant is shared between two very different concerns (PR files and check runs). A PR with 10,000 files is pathological but possible in generated-code repos; silently truncating without returning an error or warning could cause incorrect reviews. Consider either documenting this limit explicitly or returning an error when the cap is hit.

**[MINOR]** The `maxPages = 100` constant caps pagination at 100 pages × 100 files = 10,000 files for PRs, and 100 pages × 100 check runs = 10,000 check runs. This constant is shared between two very different concerns (PR files and check runs). A PR with 10,000 files is pathological but possible in generated-code repos; silently truncating without returning an error or warning could cause incorrect reviews. Consider either documenting this limit explicitly or returning an error when the cap is hit.
const maxPages = 100
// GetPullRequestFiles fetches the list of files changed in a PR. // GetPullRequestFiles fetches the list of files changed in a PR.
// Paginates through all pages (100 per page) to collect all files. // Paginates through all pages (100 per page) to collect all files.
func (c *Client) GetPullRequestFiles(ctx context.Context, owner, repo string, number int) ([]vcs.ChangedFile, error) { func (c *Client) GetPullRequestFiles(ctx context.Context, owner, repo string, number int) ([]vcs.ChangedFile, error) {
var allFiles []vcs.ChangedFile var allFiles []vcs.ChangedFile
page := 1
for { for page := 1; page <= maxPages; page++ {
reqURL := fmt.Sprintf("%s/repos/%s/%s/pulls/%d/files?per_page=100&page=%d", reqURL := fmt.Sprintf("%s/repos/%s/%s/pulls/%d/files?per_page=100&page=%d",
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), number, page) c.baseURL, url.PathEscape(owner), url.PathEscape(repo), number, page)
body, err := c.doGet(ctx, reqURL) body, err := c.doGet(ctx, reqURL)
3
@@ -114,7 +117,6 @@ func (c *Client) GetPullRequestFiles(ctx context.Context, owner, repo string, nu
if len(files) < 100 { if len(files) < 100 {
break break
} }
page++
} }
return allFiles, nil return allFiles, nil
9
@@ -175,8 +177,7 @@ func (c *Client) GetCommitStatuses(ctx context.Context, owner, repo, sha string)
} }
// Fetch check runs (paginated) // Fetch check runs (paginated)
checkPage := 1 for checkPage := 1; checkPage <= maxPages; checkPage++ {
for {
checkURL := fmt.Sprintf("%s/repos/%s/%s/commits/%s/check-runs?per_page=100&page=%d", checkURL := fmt.Sprintf("%s/repos/%s/%s/commits/%s/check-runs?per_page=100&page=%d",
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), url.PathEscape(sha), checkPage) c.baseURL, url.PathEscape(owner), url.PathEscape(repo), url.PathEscape(sha), checkPage)
checkBody, err := c.doGet(ctx, checkURL) checkBody, err := c.doGet(ctx, checkURL)
3
@@ -198,13 +199,15 @@ func (c *Client) GetCommitStatuses(ctx context.Context, owner, repo, sha string)
if len(checkResp.CheckRuns) < 100 { if len(checkResp.CheckRuns) < 100 {
break break
} }
checkPage++
} }
return result, nil return result, nil
} }
Review

[NIT] In GetCommitStatuses, the check run pagination loop uses checkPage as the variable name while the PR files loop uses page. Naming is internally consistent within each function, but the inconsistency between the two sibling pagination loops is a minor style nit.

**[NIT]** In `GetCommitStatuses`, the check run pagination loop uses `checkPage` as the variable name while the PR files loop uses `page`. Naming is internally consistent within each function, but the inconsistency between the two sibling pagination loops is a minor style nit.
// mapCheckRunStatus maps a check run conclusion+status to a vcs.CommitStatus status string. // mapCheckRunStatus maps a check run conclusion to a vcs.CommitStatus status string.
// The second parameter (check run status field, e.g. "completed", "in_progress") is
// unused because conclusion alone determines the mapped state: nil conclusion means
// the run is still in progress (pending), regardless of the status field value.
func mapCheckRunStatus(conclusion *string, _ string) string { func mapCheckRunStatus(conclusion *string, _ string) string {
if conclusion == nil { if conclusion == nil {
// Still running or queued // Still running or queued
+2 -2
View File
@@ -112,7 +112,7 @@ func TestGetPullRequest_429Retry(t *testing.T) {
c := NewClient("token", srv.URL) c := NewClient("token", srv.URL)
c.SetHTTPClient(srv.Client()) c.SetHTTPClient(srv.Client())
c.RetryBackoff = []time.Duration{1 * time.Millisecond} c.SetRetryBackoff([]time.Duration{1 * time.Millisecond})
pr, err := c.GetPullRequest(context.Background(), "owner", "repo", 1) pr, err := c.GetPullRequest(context.Background(), "owner", "repo", 1)
if err != nil { if err != nil {
2
@@ -447,7 +447,7 @@ func TestGetFileContentAtRef_429Retry(t *testing.T) {
c := NewClient("token", srv.URL) c := NewClient("token", srv.URL)
c.SetHTTPClient(srv.Client()) c.SetHTTPClient(srv.Client())
c.RetryBackoff = []time.Duration{1 * time.Millisecond} c.SetRetryBackoff([]time.Duration{1 * time.Millisecond})
content, err := c.GetFileContentAtRef(context.Background(), "owner", "repo", "file.go", "main") content, err := c.GetFileContentAtRef(context.Background(), "owner", "repo", "file.go", "main")
if err != nil { if err != nil {
7