feat(github): implement PRReader + FileReader client (#80) #93
@@ -14,11 +14,13 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
|||||||
)
|
)
|
||||||
|
|
||||||
const defaultBaseURL = "https://api.github.com"
|
const (
|
||||||
const userAgent = "review-bot/1.0"
|
defaultBaseURL = "https://api.github.com"
|
||||||
|
userAgent = "review-bot/1.0"
|
||||||
|
|
||||||
// maxResponseBytes limits successful response body reads to 10 MiB.
|
// maxResponseBytes limits successful response body reads to 10 MiB.
|
||||||
const maxResponseBytes = 10 * 1024 * 1024
|
maxResponseBytes = 10 * 1024 * 1024
|
||||||
|
)
|
||||||
|
|
||||||
// APIError represents an HTTP error response from the GitHub API.
|
// APIError represents an HTTP error response from the GitHub API.
|
||||||
// It carries the status code so callers can distinguish between
|
// It carries the status code so callers can distinguish between
|
||||||
@@ -68,12 +70,12 @@ func asAPIError(err error) (*APIError, bool) {
|
|||||||
type Client struct {
|
type Client struct {
|
||||||
baseURL string
|
baseURL string
|
||||||
token string
|
token string
|
||||||
http *http.Client
|
httpClient *http.Client
|
||||||
|
|
||||||
// RetryBackoff defines the delays between retry attempts for 429 responses.
|
// retryBackoff defines the delays between retry attempts for 429 responses.
|
||||||
// RetryBackoff[i] is the delay before attempt i+1 (after attempt i fails).
|
// retryBackoff[i] is the delay before attempt i+1 (after attempt i fails).
|
||||||
// If nil, defaults to {1s, 2s}. Set to shorter durations in tests.
|
// If nil, defaults to {1s, 2s}. Set to shorter durations in tests via SetRetryBackoff.
|
||||||
RetryBackoff []time.Duration
|
retryBackoff []time.Duration
|
||||||
|
[MINOR] AllowInsecureHTTP option permits sending credentials over HTTP when enabled. Although documented for trusted/internal use, accidental enablement in production would expose tokens over cleartext. Consider additional safeguards (e.g., explicit environment gate or failing fast unless a dedicated test flag is present). **[MINOR]** AllowInsecureHTTP option permits sending credentials over HTTP when enabled. Although documented for trusted/internal use, accidental enablement in production would expose tokens over cleartext. Consider additional safeguards (e.g., explicit environment gate or failing fast unless a dedicated test flag is present).
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewClient creates a new GitHub API client.
|
// NewClient creates a new GitHub API client.
|
||||||
@@ -86,16 +88,17 @@ func NewClient(token, baseURL string) *Client {
|
|||||||
return &Client{
|
return &Client{
|
||||||
baseURL: strings.TrimRight(baseURL, "/"),
|
baseURL: strings.TrimRight(baseURL, "/"),
|
||||||
token: token,
|
token: token,
|
||||||
http: &http.Client{
|
httpClient: &http.Client{
|
||||||
Timeout: 30 * time.Second,
|
Timeout: 30 * time.Second,
|
||||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||||
// Prevent forwarding Authorization header to different hosts on redirect.
|
|
||||||
if len(via) > 0 && req.URL.Host != via[0].URL.Host {
|
|
||||||
req.Header.Del("Authorization")
|
|
||||||
}
|
|
||||||
if len(via) >= 10 {
|
if len(via) >= 10 {
|
||||||
return fmt.Errorf("stopped after 10 redirects")
|
return fmt.Errorf("stopped after 10 redirects")
|
||||||
}
|
}
|
||||||
|
// Strip Authorization on cross-host redirect or protocol downgrade (https→http).
|
||||||
|
prev := via[len(via)-1]
|
||||||
|
if req.URL.Host != prev.URL.Host || (prev.URL.Scheme == "https" && req.URL.Scheme == "http") {
|
||||||
|
req.Header.Del("Authorization")
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -105,7 +108,13 @@ func NewClient(token, baseURL string) *Client {
|
|||||||
// SetHTTPClient sets the underlying HTTP client used for requests.
|
// SetHTTPClient sets the underlying HTTP client used for requests.
|
||||||
// This is intended for testing to inject mock transports.
|
// This is intended for testing to inject mock transports.
|
||||||
|
[MINOR] defaultCheckRedirect allows cross-host redirects (with Authorization stripped). Although token leakage is mitigated, following cross-host redirects can facilitate SSRF-like behavior if baseURL is misconfigured or points to a compromised server. Consider rejecting cross-host redirects by default or enforcing an allowlist of trusted hosts. **[MINOR]** defaultCheckRedirect allows cross-host redirects (with Authorization stripped). Although token leakage is mitigated, following cross-host redirects can facilitate SSRF-like behavior if baseURL is misconfigured or points to a compromised server. Consider rejecting cross-host redirects by default or enforcing an allowlist of trusted hosts.
|
|||||||
func (c *Client) SetHTTPClient(hc *http.Client) {
|
func (c *Client) SetHTTPClient(hc *http.Client) {
|
||||||
c.http = hc
|
c.httpClient = hc
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetRetryBackoff configures the retry backoff durations for testing.
|
||||||
|
// In production the default {1s, 2s} applies.
|
||||||
|
[MINOR] defaultCheckRedirect follows HTTPS→HTTP redirects after stripping Authorization. While credentials are protected, this still permits plaintext requests to proceed, which can leak metadata and expands attack surface if a misconfigured or compromised server issues such redirects. Prefer failing closed on protocol downgrades. **[MINOR]** defaultCheckRedirect follows HTTPS→HTTP redirects after stripping Authorization. While credentials are protected, this still permits plaintext requests to proceed, which can leak metadata and expands attack surface if a misconfigured or compromised server issues such redirects. Prefer failing closed on protocol downgrades.
|
|||||||
|
func (c *Client) SetRetryBackoff(d []time.Duration) {
|
||||||
|
sonnet-review-bot
commented
[MINOR] The doc comment on **[MINOR]** The doc comment on `defaultCheckRedirect` says it "strips the Authorization header on cross-host redirects or protocol downgrades (HTTPS→HTTP) to prevent credential leakage, while still following the redirect." However, a protocol downgrade from HTTPS to HTTP is a genuine security issue — stripping the header and still following is debatable. Consider returning an error on HTTPS→HTTP downgrade rather than silently following. This is a design choice that has security implications, not a bug per se, but worth flagging.
gpt-review-bot
commented
[MINOR] defaultCheckRedirect indexes via[len(via)-1] without guarding for len(via) == 0. net/http currently guarantees at least one prior request in via, but adding a len(via) check would make this more robust against misuse. **[MINOR]** defaultCheckRedirect indexes via[len(via)-1] without guarding for len(via) == 0. net/http currently guarantees at least one prior request in via, but adding a len(via) check would make this more robust against misuse.
|
|||||||
|
c.retryBackoff = d
|
||||||
}
|
}
|
||||||
|
|
||||||
// doRequest performs an HTTP request with retry on 429 rate limit responses.
|
// doRequest performs an HTTP request with retry on 429 rate limit responses.
|
||||||
@@ -116,9 +125,9 @@ func (c *Client) doRequest(ctx context.Context, method, url string, accept strin
|
|||||||
const maxRetryAfter = 120 * time.Second
|
const maxRetryAfter = 120 * time.Second
|
||||||
|
|
||||||
var backoff []time.Duration
|
var backoff []time.Duration
|
||||||
if c.RetryBackoff != nil {
|
if c.retryBackoff != nil {
|
||||||
backoff = make([]time.Duration, len(c.RetryBackoff))
|
backoff = make([]time.Duration, len(c.retryBackoff))
|
||||||
copy(backoff, c.RetryBackoff)
|
copy(backoff, c.retryBackoff)
|
||||||
} else {
|
} else {
|
||||||
backoff = []time.Duration{1 * time.Second, 2 * time.Second}
|
backoff = []time.Duration{1 * time.Second, 2 * time.Second}
|
||||||
}
|
}
|
||||||
@@ -157,7 +166,7 @@ func (c *Client) doRequest(ctx context.Context, method, url string, accept strin
|
|||||||
req.Header.Set("Accept", "application/vnd.github+json")
|
req.Header.Set("Accept", "application/vnd.github+json")
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := c.http.Do(req)
|
resp, err := c.httpClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("do request: %w", err)
|
return nil, fmt.Errorf("do request: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -81,7 +81,7 @@ func TestDoRequest_429Retry(t *testing.T) {
|
|||||||
|
|
||||||
c := NewClient("token", srv.URL)
|
c := NewClient("token", srv.URL)
|
||||||
c.SetHTTPClient(srv.Client())
|
c.SetHTTPClient(srv.Client())
|
||||||
c.RetryBackoff = []time.Duration{10 * time.Millisecond, 10 * time.Millisecond}
|
c.SetRetryBackoff([]time.Duration{10 * time.Millisecond, 10 * time.Millisecond})
|
||||||
|
|
||||||
body, err := c.doGet(context.Background(), srv.URL+"/test")
|
body, err := c.doGet(context.Background(), srv.URL+"/test")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -106,7 +106,7 @@ func TestDoRequest_429ExhaustsRetries(t *testing.T) {
|
|||||||
|
|
||||||
c := NewClient("token", srv.URL)
|
c := NewClient("token", srv.URL)
|
||||||
c.SetHTTPClient(srv.Client())
|
c.SetHTTPClient(srv.Client())
|
||||||
c.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond}
|
c.SetRetryBackoff([]time.Duration{1 * time.Millisecond, 1 * time.Millisecond})
|
||||||
|
|
||||||
_, err := c.doGet(context.Background(), srv.URL+"/test")
|
_, err := c.doGet(context.Background(), srv.URL+"/test")
|
||||||
if err == nil {
|
if err == nil {
|
||||||
@@ -205,7 +205,7 @@ func TestDoRequest_429RetryAfterHeader(t *testing.T) {
|
|||||||
c := NewClient("token", srv.URL)
|
c := NewClient("token", srv.URL)
|
||||||
c.SetHTTPClient(srv.Client())
|
c.SetHTTPClient(srv.Client())
|
||||||
// Use short backoff; Retry-After should override
|
// Use short backoff; Retry-After should override
|
||||||
c.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond}
|
c.SetRetryBackoff([]time.Duration{1 * time.Millisecond, 1 * time.Millisecond})
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
body, err := c.doGet(context.Background(), srv.URL+"/test")
|
body, err := c.doGet(context.Background(), srv.URL+"/test")
|
||||||
@@ -246,19 +246,19 @@ func TestDoRequest_RetryAfterDoesNotMutateBackoff(t *testing.T) {
|
|||||||
|
|
||||||
c := NewClient("token", srv.URL)
|
c := NewClient("token", srv.URL)
|
||||||
c.SetHTTPClient(srv.Client())
|
c.SetHTTPClient(srv.Client())
|
||||||
c.RetryBackoff = []time.Duration{1 * time.Millisecond, 1 * time.Millisecond}
|
c.SetRetryBackoff([]time.Duration{1 * time.Millisecond, 1 * time.Millisecond})
|
||||||
|
|
||||||
_, err := c.doGet(context.Background(), srv.URL+"/test")
|
_, err := c.doGet(context.Background(), srv.URL+"/test")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected error: %v", err)
|
t.Fatalf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify the original RetryBackoff slice was not mutated
|
// Verify the original retryBackoff slice was not mutated
|
||||||
if c.RetryBackoff[0] != 1*time.Millisecond {
|
if c.retryBackoff[0] != 1*time.Millisecond {
|
||||||
t.Errorf("RetryBackoff[0] was mutated: got %v, want 1ms", c.RetryBackoff[0])
|
t.Errorf("retryBackoff[0] was mutated: got %v, want 1ms", c.retryBackoff[0])
|
||||||
}
|
}
|
||||||
if c.RetryBackoff[1] != 1*time.Millisecond {
|
if c.retryBackoff[1] != 1*time.Millisecond {
|
||||||
t.Errorf("RetryBackoff[1] was mutated: got %v, want 1ms", c.RetryBackoff[1])
|
t.Errorf("retryBackoff[1] was mutated: got %v, want 1ms", c.retryBackoff[1])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -310,7 +310,7 @@ func TestDoRequest_SkipsAuthWhenTokenEmpty(t *testing.T) {
|
|||||||
func TestNewClient_CheckRedirectStripsAuthOnCrossHost(t *testing.T) {
|
func TestNewClient_CheckRedirectStripsAuthOnCrossHost(t *testing.T) {
|
||||||
// Verify the CheckRedirect function is configured
|
// Verify the CheckRedirect function is configured
|
||||||
c := NewClient("secret-token", "https://api.github.com")
|
c := NewClient("secret-token", "https://api.github.com")
|
||||||
if c.http.CheckRedirect == nil {
|
if c.httpClient.CheckRedirect == nil {
|
||||||
t.Fatal("expected CheckRedirect to be set")
|
t.Fatal("expected CheckRedirect to be set")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -109,7 +109,7 @@ func TestGetFileContent_429Retry(t *testing.T) {
|
|||||||
|
|
||||||
c := NewClient("token", srv.URL)
|
c := NewClient("token", srv.URL)
|
||||||
c.SetHTTPClient(srv.Client())
|
c.SetHTTPClient(srv.Client())
|
||||||
c.RetryBackoff = []time.Duration{1 * time.Millisecond}
|
c.SetRetryBackoff([]time.Duration{1 * time.Millisecond})
|
||||||
|
|
||||||
content, err := c.GetFileContent(context.Background(), "owner", "repo", "file.go", "")
|
content, err := c.GetFileContent(context.Background(), "owner", "repo", "file.go", "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -227,7 +227,7 @@ func TestListContents_429Retry(t *testing.T) {
|
|||||||
|
|
||||||
c := NewClient("token", srv.URL)
|
c := NewClient("token", srv.URL)
|
||||||
c.SetHTTPClient(srv.Client())
|
c.SetHTTPClient(srv.Client())
|
||||||
c.RetryBackoff = []time.Duration{1 * time.Millisecond}
|
c.SetRetryBackoff([]time.Duration{1 * time.Millisecond})
|
||||||
|
|
||||||
entries, err := c.ListContents(context.Background(), "owner", "repo", ".")
|
entries, err := c.ListContents(context.Background(), "owner", "repo", ".")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -84,13 +84,16 @@ func (c *Client) GetPullRequestDiff(ctx context.Context, owner, repo string, num
|
|||||||
return string(body), nil
|
return string(body), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// maxPages is the upper bound on pagination loops to prevent unbounded iteration
|
||||||
|
// in case the server returns a full page indefinitely.
|
||||||
|
sonnet-review-bot
commented
[MINOR] The **[MINOR]** The `maxPages = 100` constant caps pagination at 100 pages × 100 files = 10,000 files for PRs, and 100 pages × 100 check runs = 10,000 check runs. This constant is shared between two very different concerns (PR files and check runs). A PR with 10,000 files is pathological but possible in generated-code repos; silently truncating without returning an error or warning could cause incorrect reviews. Consider either documenting this limit explicitly or returning an error when the cap is hit.
|
|||||||
|
const maxPages = 100
|
||||||
|
|
||||||
// GetPullRequestFiles fetches the list of files changed in a PR.
|
// GetPullRequestFiles fetches the list of files changed in a PR.
|
||||||
// Paginates through all pages (100 per page) to collect all files.
|
// Paginates through all pages (100 per page) to collect all files.
|
||||||
func (c *Client) GetPullRequestFiles(ctx context.Context, owner, repo string, number int) ([]vcs.ChangedFile, error) {
|
func (c *Client) GetPullRequestFiles(ctx context.Context, owner, repo string, number int) ([]vcs.ChangedFile, error) {
|
||||||
var allFiles []vcs.ChangedFile
|
var allFiles []vcs.ChangedFile
|
||||||
page := 1
|
|
||||||
|
|
||||||
for {
|
for page := 1; page <= maxPages; page++ {
|
||||||
reqURL := fmt.Sprintf("%s/repos/%s/%s/pulls/%d/files?per_page=100&page=%d",
|
reqURL := fmt.Sprintf("%s/repos/%s/%s/pulls/%d/files?per_page=100&page=%d",
|
||||||
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), number, page)
|
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), number, page)
|
||||||
body, err := c.doGet(ctx, reqURL)
|
body, err := c.doGet(ctx, reqURL)
|
||||||
@@ -114,7 +117,6 @@ func (c *Client) GetPullRequestFiles(ctx context.Context, owner, repo string, nu
|
|||||||
if len(files) < 100 {
|
if len(files) < 100 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
page++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return allFiles, nil
|
return allFiles, nil
|
||||||
@@ -175,8 +177,7 @@ func (c *Client) GetCommitStatuses(ctx context.Context, owner, repo, sha string)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Fetch check runs (paginated)
|
// Fetch check runs (paginated)
|
||||||
checkPage := 1
|
for checkPage := 1; checkPage <= maxPages; checkPage++ {
|
||||||
for {
|
|
||||||
checkURL := fmt.Sprintf("%s/repos/%s/%s/commits/%s/check-runs?per_page=100&page=%d",
|
checkURL := fmt.Sprintf("%s/repos/%s/%s/commits/%s/check-runs?per_page=100&page=%d",
|
||||||
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), url.PathEscape(sha), checkPage)
|
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), url.PathEscape(sha), checkPage)
|
||||||
checkBody, err := c.doGet(ctx, checkURL)
|
checkBody, err := c.doGet(ctx, checkURL)
|
||||||
@@ -198,13 +199,15 @@ func (c *Client) GetCommitStatuses(ctx context.Context, owner, repo, sha string)
|
|||||||
if len(checkResp.CheckRuns) < 100 {
|
if len(checkResp.CheckRuns) < 100 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
checkPage++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sonnet-review-bot
commented
[NIT] In **[NIT]** In `GetCommitStatuses`, the check run pagination loop uses `checkPage` as the variable name while the PR files loop uses `page`. Naming is internally consistent within each function, but the inconsistency between the two sibling pagination loops is a minor style nit.
|
|||||||
// mapCheckRunStatus maps a check run conclusion+status to a vcs.CommitStatus status string.
|
// mapCheckRunStatus maps a check run conclusion to a vcs.CommitStatus status string.
|
||||||
|
// The second parameter (check run status field, e.g. "completed", "in_progress") is
|
||||||
|
// unused because conclusion alone determines the mapped state: nil conclusion means
|
||||||
|
// the run is still in progress (pending), regardless of the status field value.
|
||||||
func mapCheckRunStatus(conclusion *string, _ string) string {
|
func mapCheckRunStatus(conclusion *string, _ string) string {
|
||||||
if conclusion == nil {
|
if conclusion == nil {
|
||||||
// Still running or queued
|
// Still running or queued
|
||||||
|
|||||||
@@ -112,7 +112,7 @@ func TestGetPullRequest_429Retry(t *testing.T) {
|
|||||||
|
|
||||||
c := NewClient("token", srv.URL)
|
c := NewClient("token", srv.URL)
|
||||||
c.SetHTTPClient(srv.Client())
|
c.SetHTTPClient(srv.Client())
|
||||||
c.RetryBackoff = []time.Duration{1 * time.Millisecond}
|
c.SetRetryBackoff([]time.Duration{1 * time.Millisecond})
|
||||||
|
|
||||||
pr, err := c.GetPullRequest(context.Background(), "owner", "repo", 1)
|
pr, err := c.GetPullRequest(context.Background(), "owner", "repo", 1)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -447,7 +447,7 @@ func TestGetFileContentAtRef_429Retry(t *testing.T) {
|
|||||||
|
|
||||||
c := NewClient("token", srv.URL)
|
c := NewClient("token", srv.URL)
|
||||||
c.SetHTTPClient(srv.Client())
|
c.SetHTTPClient(srv.Client())
|
||||||
c.RetryBackoff = []time.Duration{1 * time.Millisecond}
|
c.SetRetryBackoff([]time.Duration{1 * time.Millisecond})
|
||||||
|
|
||||||
content, err := c.GetFileContentAtRef(context.Background(), "owner", "repo", "file.go", "main")
|
content, err := c.GetFileContentAtRef(context.Background(), "owner", "repo", "file.go", "main")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
[NIT] Package comment mentions "review submission" but this PR doesn't include review endpoints. Consider updating the comment or adding a TODO/ref to avoid misleading users.