Files
review-bot/github/files.go
T
claw 6316007eb1 fix: address review findings from reviews #2955 and #2958
- Convert handleResponse to package-level function (unused receiver) [#17955]
- Add clarifying comment for nil resp on transport error [#17956]
- Use consistent %w wrapping in dual-unmarshal error path [#17957]
- Add SafeError() method to APIError for safe logging [#17964]
- Enforce safe CheckRedirect policy in SetHTTPClient [#17965]
- Add tests for SafeError and SetHTTPClient enforcement
2026-05-12 21:19:01 -07:00

136 lines
4.9 KiB
Go

package github
import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"net/url"
"strings"
"gitea.weiker.me/rodin/review-bot/vcs"
)
// GetFileContent fetches a file from a repo at the given ref.
// Delegates to GetFileContentAtRef with the provided ref.
func (c *Client) GetFileContent(ctx context.Context, owner, repo, path, ref string) (string, error) {
return c.GetFileContentAtRef(ctx, owner, repo, path, ref)
}
// GetFileContentAtRef fetches a file at a specific ref from a repo.
// If ref is empty, the query parameter is omitted (uses default branch).
//
// Note: dot-segments ("." and "..") in the path are silently removed to
// prevent path traversal. This means a path like "foo/../bar" resolves
// to "foo/bar" rather than "bar".
func (c *Client) GetFileContentAtRef(ctx context.Context, owner, repo, path, ref string) (string, error) {
reqURL := fmt.Sprintf("%s/repos/%s/%s/contents/%s",
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), escapePath(path))
if ref != "" {
reqURL += "?ref=" + url.QueryEscape(ref)
}
body, err := c.doGet(ctx, reqURL)
if err != nil {
return "", fmt.Errorf("fetch file %s: %w", path, err)
}
var resp struct {
Content string `json:"content"`
Encoding string `json:"encoding"`
}
if err := json.Unmarshal(body, &resp); err != nil {
return "", fmt.Errorf("parse file content JSON: %w", err)
}
if resp.Encoding != "base64" {
return "", fmt.Errorf("unexpected encoding %q for file %s", resp.Encoding, path)
}
decoded, err := decodeBase64Content(resp.Content)
if err != nil {
return "", fmt.Errorf("decode base64 content for %s: %w", path, err)
}
return decoded, nil
}
// ListContents lists files and directories at a given path in a repo.
// Returns the directory listing from the GitHub contents API.
// If the path points to a single file (not a directory), the API returns
// a JSON object instead of an array; this is handled by returning a
// single-element slice.
//
// Note: dot-segments ("." and "..") in the path are silently removed to
// prevent path traversal. This means a path like "foo/../bar" resolves
// to "foo/bar" rather than "bar".
func (c *Client) ListContents(ctx context.Context, owner, repo, path string) ([]vcs.ContentEntry, error) {
reqURL := fmt.Sprintf("%s/repos/%s/%s/contents/%s",
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), escapePath(path))
body, err := c.doGet(ctx, reqURL)
if err != nil {
return nil, fmt.Errorf("list contents %s: %w", path, err)
}
type entry struct {
Name string `json:"name"`
Path string `json:"path"`
Type string `json:"type"`
}
// The GitHub contents API returns an array for directories and an object
// for single files. Try array first (common case), then fall back to object.
// An empty array ([]) is valid — it represents an empty directory — and
// results in a zero-length slice returned without error.
var entries []entry
if err := json.Unmarshal(body, &entries); err != nil {
var single entry
if err2 := json.Unmarshal(body, &single); err2 != nil {
return nil, fmt.Errorf("parse contents JSON: as array: %w; as object: %w", err, err2)
}
// Guard against empty objects ({}) or unexpected shapes that
// unmarshal successfully but carry no useful data.
if single.Name == "" && single.Path == "" && single.Type == "" {
return nil, fmt.Errorf("parse contents JSON: unexpected response format")
}
entries = []entry{single}
}
result := make([]vcs.ContentEntry, len(entries))
for i, e := range entries {
result[i] = vcs.ContentEntry{
Name: e.Name,
Path: e.Path,
Type: e.Type,
}
}
return result, nil
}
// escapePath escapes each segment of a relative file path for use in URLs.
// Slashes are preserved as path separators; other special characters are escaped.
// Dot-segments ("." and "..") and empty segments (from consecutive slashes like
// "a//b") are silently removed to prevent path traversal and produce canonical
// paths. This is intentional: callers may receive a different path than requested
// without error. The function is package-private, and all callers
// (GetFileContentAtRef, ListContents) already handle missing-file errors from the
// API if the cleaned path doesn't match what the caller intended.
func escapePath(p string) string {
parts := strings.Split(p, "/")
var clean []string
for _, part := range parts {
if part == "." || part == ".." || part == "" {
continue
}
clean = append(clean, url.PathEscape(part))
}
return strings.Join(clean, "/")
}
// decodeBase64Content decodes base64-encoded content from the GitHub contents API.
// GitHub returns base64 content with line breaks for formatting; we strip \r and \n before decoding.
func decodeBase64Content(encoded string) (string, error) {
// GitHub inserts newlines in base64 content
cleaned := strings.NewReplacer("\n", "", "\r", "").Replace(encoded)
decoded, err := base64.StdEncoding.DecodeString(cleaned)
if err != nil {
return "", err
}
return string(decoded), nil
}