feat(github): implement GitHub API client foundation (#80)
Add GitHub API client with configurable base URL and GHE support, HTTP helpers with 429 retry and Retry-After handling. Also adds Patch field to vcs.ChangedFile. Part 1 of 3 for #80.
This commit is contained in:
@@ -0,0 +1,327 @@
|
||||
// Package github provides a client for the GitHub API.
|
||||
// It supports pull request operations, file content retrieval, CI status checks,
|
||||
// and directory listing for both github.com and GitHub Enterprise.
|
||||
package github
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultBaseURL = "https://api.github.com"
|
||||
userAgent = "review-bot/1.0"
|
||||
|
||||
// maxResponseBytes limits successful response body reads to 10 MiB.
|
||||
maxResponseBytes = 10 * 1024 * 1024
|
||||
)
|
||||
|
||||
// APIError represents an HTTP error response from the GitHub API.
|
||||
// It carries the status code so callers can distinguish between
|
||||
// different failure modes (e.g. 404 vs 500).
|
||||
//
|
||||
// The Body field stores up to 4 KiB of the raw response for programmatic
|
||||
// inspection. Error() truncates to 200 bytes for safe logging, but callers
|
||||
// should avoid logging or propagating Body directly in production since it may
|
||||
// contain sensitive details from the upstream server.
|
||||
type APIError struct {
|
||||
StatusCode int
|
||||
Body string
|
||||
}
|
||||
|
||||
func (e *APIError) Error() string {
|
||||
body := e.Body
|
||||
if len(body) > 200 {
|
||||
body = body[:200] + "...(truncated)"
|
||||
}
|
||||
// Sanitize newlines to prevent log injection from upstream response bodies.
|
||||
body = strings.ReplaceAll(body, "\n", " ")
|
||||
body = strings.ReplaceAll(body, "\r", " ")
|
||||
return fmt.Sprintf("HTTP %d: %s", e.StatusCode, body)
|
||||
}
|
||||
|
||||
// IsNotFound reports whether an error is an API 404 response.
|
||||
func IsNotFound(err error) bool {
|
||||
if apiErr, ok := asAPIError(err); ok {
|
||||
return apiErr.StatusCode == http.StatusNotFound
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// IsUnauthorized reports whether an error is an API 401 response.
|
||||
func IsUnauthorized(err error) bool {
|
||||
if apiErr, ok := asAPIError(err); ok {
|
||||
return apiErr.StatusCode == http.StatusUnauthorized
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func asAPIError(err error) (*APIError, bool) {
|
||||
if err == nil {
|
||||
return nil, false
|
||||
}
|
||||
var target *APIError
|
||||
if errors.As(err, &target) {
|
||||
return target, true
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// clientConfig holds optional configuration for NewClient.
|
||||
type clientConfig struct {
|
||||
allowInsecureHTTP bool
|
||||
}
|
||||
|
||||
// ClientOption configures optional behavior of NewClient.
|
||||
type ClientOption func(*clientConfig)
|
||||
|
||||
// AllowInsecureHTTP permits the client to use HTTP (non-TLS) base URLs.
|
||||
// This should only be used for trusted internal deployments or testing.
|
||||
func AllowInsecureHTTP() ClientOption {
|
||||
return func(c *clientConfig) {
|
||||
c.allowInsecureHTTP = true
|
||||
}
|
||||
}
|
||||
|
||||
// Client interacts with the GitHub API.
|
||||
// A Client is safe for concurrent use by multiple goroutines.
|
||||
// SetHTTPClient and SetRetryBackoff are intended for test setup only and must
|
||||
// be called before any goroutines issue requests; they have no synchronization.
|
||||
type Client struct {
|
||||
baseURL string
|
||||
token string
|
||||
allowInsecureHTTP bool
|
||||
httpClient *http.Client
|
||||
|
||||
// retryBackoff defines the delays between retry attempts for 429 responses.
|
||||
// retryBackoff[i] is the delay before attempt i+1 (after attempt i fails).
|
||||
// If nil, defaults to {1s, 2s}. Set to shorter durations in tests via SetRetryBackoff.
|
||||
retryBackoff []time.Duration
|
||||
}
|
||||
|
||||
// defaultCheckRedirect is the redirect policy used by NewClient and SetHTTPClient(nil).
|
||||
// It rejects HTTPS→HTTP protocol downgrades (to prevent plaintext leakage) and strips
|
||||
// the Authorization header on cross-host redirects to prevent credential leakage to
|
||||
// third-party hosts (e.g. CDN redirects from GitHub).
|
||||
func defaultCheckRedirect(req *http.Request, via []*http.Request) error {
|
||||
if len(via) >= 10 {
|
||||
return fmt.Errorf("stopped after 10 redirects")
|
||||
}
|
||||
// Guard: net/http guarantees len(via) >= 1 but this is undocumented;
|
||||
// defend against zero-length to avoid panic on index out of range.
|
||||
if len(via) == 0 {
|
||||
return nil
|
||||
}
|
||||
prev := via[len(via)-1]
|
||||
// Reject protocol downgrade: HTTPS→HTTP leaks request metadata over plaintext.
|
||||
if prev.URL.Scheme == "https" && req.URL.Scheme == "http" {
|
||||
return fmt.Errorf("refusing redirect from HTTPS to HTTP (%s → %s)", prev.URL.Host, req.URL.Host)
|
||||
}
|
||||
// Strip Authorization on cross-host redirect to avoid leaking credentials
|
||||
// to third-party hosts (GitHub legitimately redirects to CDN hosts).
|
||||
if req.URL.Host != prev.URL.Host {
|
||||
req.Header.Del("Authorization")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewClient creates a new GitHub API client.
|
||||
// If baseURL is empty, it defaults to https://api.github.com.
|
||||
// For GitHub Enterprise, pass the API base URL (e.g. https://github.concur.com/api/v3).
|
||||
// The baseURL must use HTTPS; pass AllowInsecureHTTP() as an option to permit HTTP
|
||||
// for trusted internal deployments (e.g. local testing).
|
||||
func NewClient(token, baseURL string, opts ...ClientOption) *Client {
|
||||
if baseURL == "" {
|
||||
baseURL = defaultBaseURL
|
||||
}
|
||||
cfg := clientConfig{}
|
||||
for _, o := range opts {
|
||||
o(&cfg)
|
||||
}
|
||||
return &Client{
|
||||
baseURL: strings.TrimRight(baseURL, "/"),
|
||||
allowInsecureHTTP: cfg.allowInsecureHTTP,
|
||||
token: token,
|
||||
httpClient: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
CheckRedirect: defaultCheckRedirect,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// SetHTTPClient sets the underlying HTTP client used for requests.
|
||||
// This is intended for test setup only to inject mock transports; it must be
|
||||
// called before any goroutines issue requests.
|
||||
//
|
||||
// Passing nil restores the default client (30s timeout + auth-stripping
|
||||
// CheckRedirect policy matching NewClient).
|
||||
//
|
||||
// Callers providing a non-nil client are responsible for configuring a safe
|
||||
// CheckRedirect policy. Without one, the default net/http behavior will follow
|
||||
// redirects and may forward the Authorization header to untrusted hosts.
|
||||
func (c *Client) SetHTTPClient(hc *http.Client) {
|
||||
if hc == nil {
|
||||
hc = &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
CheckRedirect: defaultCheckRedirect,
|
||||
}
|
||||
}
|
||||
c.httpClient = hc
|
||||
}
|
||||
|
||||
// SetRetryBackoff configures the retry backoff durations for testing.
|
||||
// It must be called before any goroutines issue requests.
|
||||
// In production the default {1s, 2s} applies.
|
||||
func (c *Client) SetRetryBackoff(d []time.Duration) {
|
||||
c.retryBackoff = d
|
||||
}
|
||||
|
||||
// doRequest performs an HTTP request with retry on 429 rate limit responses.
|
||||
// It respects the Retry-After header when present (capped at maxRetryAfter).
|
||||
// Transport errors (network failures, context cancellation) are not retried.
|
||||
func (c *Client) doRequest(ctx context.Context, method, reqURL string, accept string) ([]byte, error) {
|
||||
const maxAttempts = 3
|
||||
const maxRetryAfter = 120 * time.Second
|
||||
|
||||
var backoff []time.Duration
|
||||
if c.retryBackoff != nil {
|
||||
backoff = make([]time.Duration, len(c.retryBackoff))
|
||||
copy(backoff, c.retryBackoff)
|
||||
} else {
|
||||
backoff = []time.Duration{1 * time.Second, 2 * time.Second}
|
||||
}
|
||||
|
||||
// maxErrorBodyBytes limits how much of an error response body is stored.
|
||||
// Kept small (4 KiB) to reduce the risk of sensitive data leakage if callers
|
||||
// log APIError.Body directly. Error() further truncates to 200 bytes.
|
||||
const maxErrorBodyBytes = 4 * 1024
|
||||
|
||||
// Reject non-HTTPS URLs early since the URL is immutable across retries.
|
||||
if c.token != "" && !c.allowInsecureHTTP {
|
||||
parsed, err := url.Parse(reqURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse request URL: %w", err)
|
||||
}
|
||||
if !strings.EqualFold(parsed.Scheme, "https") {
|
||||
return nil, fmt.Errorf("refusing to send credentials over non-HTTPS URL %q (use AllowInsecureHTTP option for trusted networks)", reqURL)
|
||||
}
|
||||
}
|
||||
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < maxAttempts; attempt++ {
|
||||
if attempt > 0 {
|
||||
var delay time.Duration
|
||||
if attempt-1 < len(backoff) {
|
||||
delay = backoff[attempt-1]
|
||||
}
|
||||
if delay > 0 {
|
||||
timer := time.NewTimer(delay)
|
||||
select {
|
||||
case <-timer.C:
|
||||
timer.Stop() // no-op after fire; kept for symmetry with the ctx.Done case
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, method, reqURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create request: %w", err)
|
||||
}
|
||||
if c.token != "" {
|
||||
// Bearer is the OAuth2 standard and is accepted by GitHub for both
|
||||
// classic PATs and fine-grained tokens. The alternative "token" scheme
|
||||
// is GitHub-specific and offers no additional compatibility.
|
||||
req.Header.Set("Authorization", "Bearer "+c.token)
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
if accept != "" {
|
||||
req.Header.Set("Accept", accept)
|
||||
} else {
|
||||
req.Header.Set("Accept", "application/vnd.github+json")
|
||||
}
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("do request: %w", err)
|
||||
}
|
||||
|
||||
body, done, err := c.handleResponse(resp, maxResponseBytes, maxErrorBodyBytes)
|
||||
if done {
|
||||
return body, err
|
||||
}
|
||||
lastErr = err
|
||||
|
||||
// Retry on 429 rate limit
|
||||
if resp.StatusCode == http.StatusTooManyRequests && attempt < maxAttempts-1 {
|
||||
// Check for Retry-After header and override backoff if present.
|
||||
// Supports both integer seconds (common) and HTTP-date format (RFC 7231).
|
||||
if ra := resp.Header.Get("Retry-After"); ra != "" {
|
||||
if seconds, err := strconv.Atoi(ra); err == nil && seconds > 0 {
|
||||
delay := time.Duration(seconds) * time.Second
|
||||
if delay > maxRetryAfter {
|
||||
delay = maxRetryAfter
|
||||
}
|
||||
if attempt < len(backoff) {
|
||||
backoff[attempt] = delay
|
||||
}
|
||||
} else if retryAt, err := http.ParseTime(ra); err == nil {
|
||||
delay := time.Until(retryAt)
|
||||
if delay < 0 {
|
||||
delay = 0
|
||||
}
|
||||
if delay > maxRetryAfter {
|
||||
delay = maxRetryAfter
|
||||
}
|
||||
if attempt < len(backoff) {
|
||||
backoff[attempt] = delay
|
||||
}
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Don't retry other errors
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
// handleResponse reads and closes the response body, returning the result.
|
||||
// It uses defer to ensure the body is always closed regardless of code path.
|
||||
// Returns (body, done, err) where done=true means the caller should return immediately.
|
||||
func (c *Client) handleResponse(resp *http.Response, maxRespBytes int, maxErrBytes int) ([]byte, bool, error) {
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, int64(maxRespBytes)+1))
|
||||
if err != nil {
|
||||
return nil, true, fmt.Errorf("read response body: %w", err)
|
||||
}
|
||||
if len(body) > maxRespBytes {
|
||||
return nil, true, fmt.Errorf("response body exceeded %d bytes (truncated)", maxRespBytes)
|
||||
}
|
||||
return body, true, nil
|
||||
}
|
||||
|
||||
errBody, readErr := io.ReadAll(io.LimitReader(resp.Body, int64(maxErrBytes)))
|
||||
if readErr != nil && len(errBody) == 0 {
|
||||
errBody = []byte(fmt.Sprintf("[error reading response body: %v]", readErr))
|
||||
}
|
||||
return nil, false, &APIError{StatusCode: resp.StatusCode, Body: string(errBody)}
|
||||
}
|
||||
|
||||
// doGet is a convenience wrapper for GET requests with the default Accept header.
|
||||
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
||||
return c.doRequest(ctx, http.MethodGet, reqURL, "")
|
||||
}
|
||||
Reference in New Issue
Block a user