Files
review-bot/github/client.go
T
claw 86beba63da
PR Ready Gate / clear-labels (pull_request) Successful in 2s
CI / test (pull_request) Successful in 19s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 29s
CI / review (gpt-5, security, ., rodin/security-patterns, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 55s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 1m10s
fix(github): add pagination tests and fix truncation warning logic
F1: Add comprehensive pagination tests for ListReviews covering:
- Multi-page behaviour (2 full + 1 partial page)
- Exact-multiple-of-pageSize (extra empty-page round-trip)
- maxReviewPages cutoff (cap hit, results still returned)
- Empty first page (PR with no reviews)

F2: Fix truncation warning logic by moving it outside the loop with
a 'truncated' flag. Previously, the warning fired inline at page==maxPages
which could miss the case where the short-page break fires first on the
cap page. Now it only fires when the loop exits because the cap was reached
and the last page was full (indicating more data likely exists).

Also adds SetReviewPagination to Client for test-time override of page
size and max pages, following the existing SetRetryBackoff pattern.
2026-05-13 16:03:01 -07:00

412 lines
14 KiB
Go

// Package github provides a client for the GitHub API.
// It supports pull request operations, file content retrieval, CI status checks,
// and directory listing for both github.com and GitHub Enterprise.
package github
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strconv"
"strings"
"time"
)
const (
defaultBaseURL = "https://api.github.com"
userAgent = "review-bot/1.0"
// maxResponseBytes limits successful response body reads to 10 MiB.
maxResponseBytes = 10 * 1024 * 1024
// maxRetryAttempts is the number of times doRequest will attempt a request.
// The retry backoff slice must have length maxRetryAttempts-1.
maxRetryAttempts = 3
)
// APIError represents an HTTP error response from the GitHub API.
// It carries the status code so callers can distinguish between
// different failure modes (e.g. 404 vs 500).
//
// The Body field stores up to 4 KiB of the raw response for programmatic
// inspection. Error() truncates to 200 bytes for safe logging, but callers
// should avoid logging or propagating Body directly in production since it may
// contain sensitive details from the upstream server.
type APIError struct {
StatusCode int
Body string
}
func (e *APIError) Error() string {
body := e.Body
if len(body) > 200 {
body = body[:200] + "...(truncated)"
}
// Sanitize newlines to prevent log injection from upstream response bodies.
body = strings.ReplaceAll(body, "\n", " ")
body = strings.ReplaceAll(body, "\r", " ")
return fmt.Sprintf("HTTP %d: %s", e.StatusCode, body)
}
// IsNotFound reports whether an error is an API 404 response.
func IsNotFound(err error) bool {
if apiErr, ok := asAPIError(err); ok {
return apiErr.StatusCode == http.StatusNotFound
}
return false
}
// IsUnauthorized reports whether an error is an API 401 response.
func IsUnauthorized(err error) bool {
if apiErr, ok := asAPIError(err); ok {
return apiErr.StatusCode == http.StatusUnauthorized
}
return false
}
func asAPIError(err error) (*APIError, bool) {
if err == nil {
return nil, false
}
var target *APIError
if errors.As(err, &target) {
return target, true
}
return nil, false
}
// clientConfig holds optional configuration for NewClient.
type clientConfig struct {
allowInsecureHTTP bool
}
// ClientOption configures optional behavior of NewClient.
type ClientOption func(*clientConfig)
// AllowInsecureHTTP permits the client to use HTTP (non-TLS) base URLs.
// This should only be used for trusted internal deployments or testing.
func AllowInsecureHTTP() ClientOption {
return func(c *clientConfig) {
c.allowInsecureHTTP = true
}
}
// Client interacts with the GitHub API.
// A Client is safe for concurrent use by multiple goroutines.
// SetHTTPClient and SetRetryBackoff are intended for test setup only and must
// be called before any goroutines issue requests; they have no synchronization.
type Client struct {
baseURL string
token string
allowInsecureHTTP bool
httpClient *http.Client
// retryBackoff defines the delays between retry attempts for 429 responses.
// retryBackoff[i] is the delay before attempt i+1 (after attempt i fails).
// If nil, defaults to {1s, 2s}. Set to shorter durations in tests via SetRetryBackoff.
retryBackoff []time.Duration
// reviewPageSize overrides reviewsPerPage for testing. Zero means use default.
reviewPageSize int
// reviewMaxPages overrides maxReviewPages for testing. Zero means use default.
reviewMaxPages int
}
// defaultCheckRedirect is the redirect policy used by NewClient and SetHTTPClient(nil).
// It rejects HTTPS→HTTP protocol downgrades (to prevent plaintext leakage) and strips
// the Authorization header on cross-host redirects to prevent credential leakage to
// third-party hosts (e.g. CDN redirects from GitHub).
func defaultCheckRedirect(req *http.Request, via []*http.Request) error {
if len(via) >= 10 {
return fmt.Errorf("stopped after 10 redirects")
}
// Guard: net/http guarantees len(via) >= 1 but this is undocumented;
// defend against zero-length to avoid panic on index out of range.
if len(via) == 0 {
return nil
}
prev := via[len(via)-1]
// Reject protocol downgrade: HTTPS→HTTP leaks request metadata over plaintext.
if prev.URL.Scheme == "https" && req.URL.Scheme == "http" {
return fmt.Errorf("refusing redirect from HTTPS to HTTP (%s → %s)", prev.URL.Host, req.URL.Host)
}
// Strip Authorization on cross-host redirect to avoid leaking credentials
// to third-party hosts (GitHub legitimately redirects to CDN hosts).
if req.URL.Host != prev.URL.Host {
req.Header.Del("Authorization")
}
return nil
}
// NewClient creates a new GitHub API client.
// If baseURL is empty, it defaults to https://api.github.com.
// For GitHub Enterprise, pass the API base URL (e.g. https://github.concur.com/api/v3).
// The baseURL must use HTTPS; pass AllowInsecureHTTP() as an option to permit HTTP
// for trusted internal deployments (e.g. local testing).
func NewClient(token, baseURL string, opts ...ClientOption) *Client {
if baseURL == "" {
baseURL = defaultBaseURL
}
cfg := clientConfig{}
for _, o := range opts {
o(&cfg)
}
return &Client{
baseURL: strings.TrimRight(baseURL, "/"),
allowInsecureHTTP: cfg.allowInsecureHTTP,
token: token,
httpClient: &http.Client{
Timeout: 30 * time.Second,
CheckRedirect: defaultCheckRedirect,
},
}
}
// SetHTTPClient sets the underlying HTTP client used for requests.
// This is intended for test setup only to inject mock transports; it must be
// called before any goroutines issue requests.
//
// Passing nil restores the default client (30s timeout + auth-stripping
// CheckRedirect policy matching NewClient).
//
// Callers providing a non-nil client are responsible for configuring a safe
// CheckRedirect policy. Without one, the default net/http behavior will follow
// redirects and may forward the Authorization header to untrusted hosts.
func (c *Client) SetHTTPClient(hc *http.Client) {
if hc == nil {
hc = &http.Client{
Timeout: 30 * time.Second,
CheckRedirect: defaultCheckRedirect,
}
}
c.httpClient = hc
}
// SetRetryBackoff configures the retry backoff durations for testing.
// It must be called before any goroutines issue requests.
// The slice must have exactly maxRetryAttempts-1 entries (one delay per retry gap).
// In production the default {1s, 2s} applies.
func (c *Client) SetRetryBackoff(d []time.Duration) error {
if len(d) != maxRetryAttempts-1 {
return fmt.Errorf("github: backoff length %d does not match maxRetryAttempts-1 (%d)", len(d), maxRetryAttempts-1)
}
c.retryBackoff = d
return nil
}
// SetReviewPagination overrides the page size and max pages for ListReviews.
// Intended for testing only; must be called before any goroutines issue requests.
func (c *Client) SetReviewPagination(pageSize, maxPages int) {
c.reviewPageSize = pageSize
c.reviewMaxPages = maxPages
}
// requestOptions holds per-request configuration for doRequestCore.
type requestOptions struct {
// bodyFn returns a fresh io.Reader for the request body on each attempt.
// Must be non-nil for any request that carries a body (POST, PUT, PATCH,
// or DELETE when a body is required by the API).
// Returning a fresh reader on each call allows retries to re-send the body.
bodyFn func() io.Reader
// accept overrides the default Accept header. Empty means "application/vnd.github+json".
accept string
// extraHeaders are additional headers to set on each request attempt.
extraHeaders map[string]string
}
// doRequestCore is the shared implementation for all HTTP requests with retry
// on 429 rate limit responses. It respects the Retry-After header when present
// (capped at maxRetryAfter). Transport errors are not retried.
func (c *Client) doRequestCore(ctx context.Context, method, reqURL string, opts requestOptions) ([]byte, error) {
const maxRetryAfter = 120 * time.Second
// maxErrorBodyBytes limits how much of an error response body is stored.
// Kept small (4 KiB) to reduce the risk of sensitive data leakage if callers
// log APIError.Body directly. Error() further truncates to 200 bytes.
const maxErrorBodyBytes = 4 * 1024
// backoff holds per-attempt delays: backoff[i] is the delay before attempt i+1.
// Length must be maxRetryAttempts-1 (one entry per retry gap).
// SetRetryBackoff validates at configuration time; the default is always valid.
defaultBackoff := []time.Duration{1 * time.Second, 2 * time.Second}
var backoff []time.Duration
if c.retryBackoff != nil && len(c.retryBackoff) == maxRetryAttempts-1 {
backoff = make([]time.Duration, len(c.retryBackoff))
copy(backoff, c.retryBackoff)
} else {
backoff = make([]time.Duration, len(defaultBackoff))
copy(backoff, defaultBackoff)
}
// Reject non-HTTPS URLs early since the URL is immutable across retries.
if c.token != "" && !c.allowInsecureHTTP {
parsed, err := url.Parse(reqURL)
if err != nil {
return nil, fmt.Errorf("parse request URL: %w", err)
}
if !strings.EqualFold(parsed.Scheme, "https") {
return nil, fmt.Errorf("refusing to send credentials over non-HTTPS URL %q (use AllowInsecureHTTP option for trusted networks)", reqURL)
}
}
var lastErr error
for attempt := 0; attempt < maxRetryAttempts; attempt++ {
if attempt > 0 {
var delay time.Duration
if attempt-1 < len(backoff) {
delay = backoff[attempt-1]
}
if delay > 0 {
timer := time.NewTimer(delay)
select {
case <-timer.C:
timer.Stop() // no-op after fire; kept for symmetry with the ctx.Done case
case <-ctx.Done():
timer.Stop()
return nil, ctx.Err()
}
}
}
var body io.Reader
if opts.bodyFn != nil {
body = opts.bodyFn()
}
req, err := http.NewRequestWithContext(ctx, method, reqURL, body)
if err != nil {
return nil, fmt.Errorf("create request: %w", err)
}
if c.token != "" {
// Bearer is the OAuth2 standard and is accepted by GitHub for both
// classic PATs and fine-grained tokens. The alternative "token" scheme
// is GitHub-specific and offers no additional compatibility.
req.Header.Set("Authorization", "Bearer "+c.token)
}
req.Header.Set("User-Agent", userAgent)
if opts.accept != "" {
req.Header.Set("Accept", opts.accept)
} else {
req.Header.Set("Accept", "application/vnd.github+json")
}
for k, v := range opts.extraHeaders {
req.Header.Set(k, v)
}
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("do request: %w", err)
}
// Capture response metadata before handleResponse takes body ownership.
respStatus := resp.StatusCode
retryAfterHeader := resp.Header.Get("Retry-After")
respBody, done, handleErr := c.handleResponse(resp, maxResponseBytes, maxErrorBodyBytes)
if done {
return respBody, handleErr
}
lastErr = handleErr
// Retry on 429 rate limit
if respStatus == http.StatusTooManyRequests && attempt < maxRetryAttempts-1 {
// Check for Retry-After header and override backoff if present.
// Supports both integer seconds (common) and HTTP-date format (RFC 7231).
if ra := retryAfterHeader; ra != "" {
if seconds, err := strconv.Atoi(ra); err == nil && seconds > 0 {
delay := time.Duration(seconds) * time.Second
if delay > maxRetryAfter {
delay = maxRetryAfter
}
if attempt < len(backoff) {
backoff[attempt] = delay
}
} else if retryAt, err := http.ParseTime(ra); err == nil {
delay := time.Until(retryAt)
if delay < 0 {
delay = 0
}
if delay > maxRetryAfter {
delay = maxRetryAfter
}
if attempt < len(backoff) {
backoff[attempt] = delay
}
}
}
continue
}
// Don't retry other errors
return nil, lastErr
}
return nil, lastErr
}
// doRequest performs an HTTP request with retry on 429 rate limit responses.
// It respects the Retry-After header when present (capped at maxRetryAfter).
// Transport errors (network failures, context cancellation) are not retried.
func (c *Client) doRequest(ctx context.Context, method, reqURL string, accept string) ([]byte, error) {
return c.doRequestCore(ctx, method, reqURL, requestOptions{accept: accept})
}
// handleResponse reads and closes the response body, returning the result.
// It uses defer to ensure the body is always closed regardless of code path.
// Returns (body, done, err) where done=true means the caller should return immediately.
func (c *Client) handleResponse(resp *http.Response, maxRespBytes int, maxErrBytes int) ([]byte, bool, error) {
defer resp.Body.Close()
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
body, err := io.ReadAll(io.LimitReader(resp.Body, int64(maxRespBytes)+1))
if err != nil {
return nil, true, fmt.Errorf("read response body: %w", err)
}
if len(body) > maxRespBytes {
return nil, true, fmt.Errorf("response body exceeded %d bytes", maxRespBytes)
}
return body, true, nil
}
errBody, readErr := io.ReadAll(io.LimitReader(resp.Body, int64(maxErrBytes)))
if readErr != nil && len(errBody) == 0 {
errBody = []byte(fmt.Sprintf("[error reading response body: %v]", readErr))
}
return nil, false, &APIError{StatusCode: resp.StatusCode, Body: string(errBody)}
}
// doGet is a convenience wrapper for GET requests with the default Accept header.
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
return c.doRequest(ctx, http.MethodGet, reqURL, "")
}
// doRequestWithBody is like doRequest but sends a request body.
// It accepts the raw body bytes and sets Content-Type to application/json.
// Retry semantics match doRequest (retries on 429 with Retry-After support).
func (c *Client) doRequestWithBody(ctx context.Context, method, reqURL string, reqBody []byte) ([]byte, error) {
var opts requestOptions
if reqBody != nil {
opts.bodyFn = func() io.Reader { return bytes.NewReader(reqBody) }
opts.extraHeaders = map[string]string{"Content-Type": "application/json"}
}
return c.doRequestCore(ctx, method, reqURL, opts)
}
// doJSONRequest performs an HTTP request with a JSON body and returns the response body.
// It delegates retry/backoff/429 handling to doRequestWithBody.
// This is a general-purpose helper used by any method that needs to send JSON payloads
// (e.g. PostReview, DismissReview).
func (c *Client) doJSONRequest(ctx context.Context, method, reqURL string, payload any) ([]byte, error) {
jsonBody, err := json.Marshal(payload)
if err != nil {
return nil, fmt.Errorf("marshal request body: %w", err)
}
return c.doRequestWithBody(ctx, method, reqURL, jsonBody)
}