package github import ( "context" "encoding/base64" "encoding/json" "fmt" "net/url" "path" "strings" "gitea.weiker.me/rodin/review-bot/vcs" ) // GetFileContent fetches a file from a repo at the given ref. // Delegates to GetFileContentAtRef with the provided ref. func (c *Client) GetFileContent(ctx context.Context, owner, repo, filePath, ref string) (string, error) { return c.GetFileContentAtRef(ctx, owner, repo, filePath, ref) } // GetFileContentAtRef fetches a file at a specific ref from a repo. // If ref is empty, the query parameter is omitted (uses default branch). // // Returns an error if the path contains dot-segments (".", "..") or // attempts to traverse above the repository root. func (c *Client) GetFileContentAtRef(ctx context.Context, owner, repo, filePath, ref string) (string, error) { escaped, err := escapePath(filePath) if err != nil { return "", fmt.Errorf("invalid file path: %w", err) } reqURL := fmt.Sprintf("%s/repos/%s/%s/contents/%s", c.baseURL, url.PathEscape(owner), url.PathEscape(repo), escaped) if ref != "" { reqURL += "?ref=" + url.QueryEscape(ref) } body, err := c.doGet(ctx, reqURL) if err != nil { return "", fmt.Errorf("fetch file %s: %w", filePath, err) } var resp struct { Content string `json:"content"` Encoding string `json:"encoding"` } if err := json.Unmarshal(body, &resp); err != nil { return "", fmt.Errorf("parse file content JSON: %w", err) } if resp.Encoding != "base64" { return "", fmt.Errorf("unexpected encoding %q for file %s", resp.Encoding, filePath) } decoded, err := decodeBase64Content(resp.Content) if err != nil { return "", fmt.Errorf("decode base64 content for %s: %w", filePath, err) } return decoded, nil } // ListContents lists files and directories at a given path in a repo. // Returns the directory listing from the GitHub contents API. // If the path points to a single file (not a directory), the API returns // a JSON object instead of an array; this is handled by returning a // single-element slice. func (c *Client) ListContents(ctx context.Context, owner, repo, filePath string) ([]vcs.ContentEntry, error) { escaped, err := escapePath(filePath) if err != nil { return nil, fmt.Errorf("invalid file path: %w", err) } reqURL := fmt.Sprintf("%s/repos/%s/%s/contents/%s", c.baseURL, url.PathEscape(owner), url.PathEscape(repo), escaped) body, err := c.doGet(ctx, reqURL) if err != nil { return nil, fmt.Errorf("list contents %s: %w", filePath, err) } type entry struct { Name string `json:"name"` Path string `json:"path"` Type string `json:"type"` } // The GitHub contents API returns an array for directories and an object // for single files. Try array first (common case), then fall back to object. // An empty array ([]) is valid — it represents an empty directory — and // results in a zero-length slice returned without error. var entries []entry if err := json.Unmarshal(body, &entries); err != nil { var single entry if err2 := json.Unmarshal(body, &single); err2 != nil { return nil, fmt.Errorf("parse contents JSON: as array: %v; as object: %w", err, err2) } // Guard against empty objects ({}) or unexpected shapes that // unmarshal successfully but carry no useful data. if single.Name == "" && single.Path == "" && single.Type == "" { return nil, fmt.Errorf("parse contents JSON: unexpected response format") } entries = []entry{single} } result := make([]vcs.ContentEntry, len(entries)) for i, e := range entries { result[i] = vcs.ContentEntry{ Name: e.Name, Path: e.Path, Type: e.Type, } } return result, nil } // escapePath validates and encodes a slash-separated file path for use in // GitHub API URLs. Returns an error if the path contains dot-segments ("." // or "..") or resolves to a path outside the repository root. func escapePath(p string) (string, error) { // Reject paths containing dot-segments rather than silently rewriting them. for _, seg := range strings.Split(p, "/") { if seg == "." || seg == ".." { return "", fmt.Errorf("path contains dot-segment %q: %s", seg, p) } } // Use path.Clean for canonical form, then verify it doesn't escape root. cleaned := path.Clean(p) if cleaned == "." || strings.HasPrefix(cleaned, "..") { return "", fmt.Errorf("path resolves outside repository root: %s", p) } // Encode each segment individually. parts := strings.Split(cleaned, "/") var encoded []string for _, part := range parts { if part == "" { continue } encoded = append(encoded, url.PathEscape(part)) } return strings.Join(encoded, "/"), nil } // maxFileContentSize is the maximum decoded file size (10 MB) to prevent // resource exhaustion when decoding base64 content from the API. const maxFileContentSize = 10 * 1024 * 1024 // decodeBase64Content decodes base64-encoded content from the GitHub contents API. // GitHub returns base64 content with line breaks for formatting; we strip \r and \n before decoding. // Returns an error if the decoded content exceeds maxFileContentSize. func decodeBase64Content(encoded string) (string, error) { cleaned := strings.NewReplacer("\n", "", "\r", "").Replace(encoded) // Check estimated decoded size before allocating. // Base64 encodes 3 bytes into 4 chars, so decoded ~ len*3/4. if len(cleaned)*3/4 > maxFileContentSize { return "", fmt.Errorf("file content too large: estimated %d bytes exceeds limit of %d", len(cleaned)*3/4, maxFileContentSize) } decoded, err := base64.StdEncoding.DecodeString(cleaned) if err != nil { return "", err } if len(decoded) > maxFileContentSize { return "", fmt.Errorf("file content too large: %d bytes exceeds limit of %d", len(decoded), maxFileContentSize) } return string(decoded), nil }