921599542d
PR Ready Gate / clear-labels (pull_request) Successful in 2s
CI / test (pull_request) Successful in 21s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 34s
CI / review (gpt-5, security, ., rodin/security-patterns, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 1m6s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 2m53s
Implement FileReader conformance on the GitHub client: GetFileContent, ListContents, path helpers, base64 decode. Includes compile-time conformance checks for both PRReader and FileReader. Requires PR B (#102). Part 3 of 3 for #80.
161 lines
5.6 KiB
Go
161 lines
5.6 KiB
Go
package github
|
|
|
|
import (
|
|
"context"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/url"
|
|
"path"
|
|
"strings"
|
|
|
|
"gitea.weiker.me/rodin/review-bot/vcs"
|
|
)
|
|
|
|
// GetFileContent fetches a file from a repo at the given ref.
|
|
// Delegates to GetFileContentAtRef with the provided ref.
|
|
func (c *Client) GetFileContent(ctx context.Context, owner, repo, filePath, ref string) (string, error) {
|
|
return c.GetFileContentAtRef(ctx, owner, repo, filePath, ref)
|
|
}
|
|
|
|
// GetFileContentAtRef fetches a file at a specific ref from a repo.
|
|
// If ref is empty, the query parameter is omitted (uses default branch).
|
|
//
|
|
// Returns an error if the path contains dot-segments (".", "..") or
|
|
// attempts to traverse above the repository root.
|
|
func (c *Client) GetFileContentAtRef(ctx context.Context, owner, repo, filePath, ref string) (string, error) {
|
|
escaped, err := escapePath(filePath)
|
|
if err != nil {
|
|
return "", fmt.Errorf("invalid file path: %w", err)
|
|
}
|
|
reqURL := fmt.Sprintf("%s/repos/%s/%s/contents/%s",
|
|
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), escaped)
|
|
if ref != "" {
|
|
reqURL += "?ref=" + url.QueryEscape(ref)
|
|
}
|
|
body, err := c.doGet(ctx, reqURL)
|
|
if err != nil {
|
|
return "", fmt.Errorf("fetch file %s: %w", filePath, err)
|
|
}
|
|
var resp struct {
|
|
Content string `json:"content"`
|
|
Encoding string `json:"encoding"`
|
|
}
|
|
if err := json.Unmarshal(body, &resp); err != nil {
|
|
return "", fmt.Errorf("parse file content JSON: %w", err)
|
|
}
|
|
if resp.Encoding != "base64" {
|
|
return "", fmt.Errorf("unexpected encoding %q for file %s", resp.Encoding, filePath)
|
|
}
|
|
decoded, err := decodeBase64Content(resp.Content)
|
|
if err != nil {
|
|
return "", fmt.Errorf("decode base64 content for %s: %w", filePath, err)
|
|
}
|
|
return decoded, nil
|
|
}
|
|
|
|
// ListContents lists files and directories at a given path in a repo.
|
|
// Returns the directory listing from the GitHub contents API.
|
|
// If the path points to a single file (not a directory), the API returns
|
|
// a JSON object instead of an array; this is handled by returning a
|
|
// single-element slice.
|
|
func (c *Client) ListContents(ctx context.Context, owner, repo, filePath string) ([]vcs.ContentEntry, error) {
|
|
escaped, err := escapePath(filePath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid file path: %w", err)
|
|
}
|
|
reqURL := fmt.Sprintf("%s/repos/%s/%s/contents/%s",
|
|
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), escaped)
|
|
body, err := c.doGet(ctx, reqURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("list contents %s: %w", filePath, err)
|
|
}
|
|
|
|
type entry struct {
|
|
Name string `json:"name"`
|
|
Path string `json:"path"`
|
|
Type string `json:"type"`
|
|
}
|
|
|
|
// The GitHub contents API returns an array for directories and an object
|
|
// for single files. Try array first (common case), then fall back to object.
|
|
// An empty array ([]) is valid — it represents an empty directory — and
|
|
// results in a zero-length slice returned without error.
|
|
var entries []entry
|
|
if err := json.Unmarshal(body, &entries); err != nil {
|
|
var single entry
|
|
if err2 := json.Unmarshal(body, &single); err2 != nil {
|
|
return nil, fmt.Errorf("parse contents JSON: as array: %v; as object: %w", err, err2)
|
|
}
|
|
// Guard against empty objects ({}) or unexpected shapes that
|
|
// unmarshal successfully but carry no useful data.
|
|
if single.Name == "" && single.Path == "" && single.Type == "" {
|
|
return nil, fmt.Errorf("parse contents JSON: unexpected response format")
|
|
}
|
|
entries = []entry{single}
|
|
}
|
|
|
|
result := make([]vcs.ContentEntry, len(entries))
|
|
for i, e := range entries {
|
|
result[i] = vcs.ContentEntry{
|
|
Name: e.Name,
|
|
Path: e.Path,
|
|
Type: e.Type,
|
|
}
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
// escapePath validates and encodes a slash-separated file path for use in
|
|
// GitHub API URLs. Returns an error if the path contains dot-segments ("."
|
|
// or "..") or resolves to a path outside the repository root.
|
|
func escapePath(p string) (string, error) {
|
|
// Reject paths containing dot-segments rather than silently rewriting them.
|
|
for _, seg := range strings.Split(p, "/") {
|
|
if seg == "." || seg == ".." {
|
|
return "", fmt.Errorf("path contains dot-segment %q: %s", seg, p)
|
|
}
|
|
}
|
|
|
|
// Use path.Clean for canonical form, then verify it doesn't escape root.
|
|
cleaned := path.Clean(p)
|
|
if cleaned == "." || strings.HasPrefix(cleaned, "..") {
|
|
return "", fmt.Errorf("path resolves outside repository root: %s", p)
|
|
}
|
|
|
|
// Encode each segment individually.
|
|
parts := strings.Split(cleaned, "/")
|
|
var encoded []string
|
|
for _, part := range parts {
|
|
if part == "" {
|
|
continue
|
|
}
|
|
encoded = append(encoded, url.PathEscape(part))
|
|
}
|
|
return strings.Join(encoded, "/"), nil
|
|
}
|
|
|
|
// maxFileContentSize is the maximum decoded file size (10 MB) to prevent
|
|
// resource exhaustion when decoding base64 content from the API.
|
|
const maxFileContentSize = 10 * 1024 * 1024
|
|
|
|
// decodeBase64Content decodes base64-encoded content from the GitHub contents API.
|
|
// GitHub returns base64 content with line breaks for formatting; we strip \r and \n before decoding.
|
|
// Returns an error if the decoded content exceeds maxFileContentSize.
|
|
func decodeBase64Content(encoded string) (string, error) {
|
|
cleaned := strings.NewReplacer("\n", "", "\r", "").Replace(encoded)
|
|
// Check estimated decoded size before allocating.
|
|
// Base64 encodes 3 bytes into 4 chars, so decoded ~ len*3/4.
|
|
if len(cleaned)*3/4 > maxFileContentSize {
|
|
return "", fmt.Errorf("file content too large: estimated %d bytes exceeds limit of %d", len(cleaned)*3/4, maxFileContentSize)
|
|
}
|
|
decoded, err := base64.StdEncoding.DecodeString(cleaned)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if len(decoded) > maxFileContentSize {
|
|
return "", fmt.Errorf("file content too large: %d bytes exceeds limit of %d", len(decoded), maxFileContentSize)
|
|
}
|
|
return string(decoded), nil
|
|
}
|