feat(github): implement PRReader interface #102
@@ -6,24 +6,28 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
"path"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// GetFileContentAtRef fetches a file at a specific ref from a repo.
|
// GetFileContentAtRef fetches a file at a specific ref from a repo.
|
||||||
// If ref is empty, the query parameter is omitted (uses default branch).
|
// If ref is empty, the query parameter is omitted (uses default branch).
|
||||||
//
|
//
|
||||||
// Note: dot-segments ("." and "..") in the path are silently removed to
|
// Returns an error if the path contains dot-segments (".", "..") or
|
||||||
// prevent path traversal. This means a path like "foo/../bar" resolves
|
// attempts to traverse above the repository root.
|
||||||
// to "foo/bar" rather than "bar".
|
func (c *Client) GetFileContentAtRef(ctx context.Context, owner, repo, filePath, ref string) (string, error) {
|
||||||
func (c *Client) GetFileContentAtRef(ctx context.Context, owner, repo, path, ref string) (string, error) {
|
escaped, err := escapePath(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("invalid file path: %w", err)
|
||||||
|
}
|
||||||
reqURL := fmt.Sprintf("%s/repos/%s/%s/contents/%s",
|
reqURL := fmt.Sprintf("%s/repos/%s/%s/contents/%s",
|
||||||
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), escapePath(path))
|
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), escaped)
|
||||||
if ref != "" {
|
if ref != "" {
|
||||||
reqURL += "?ref=" + url.QueryEscape(ref)
|
reqURL += "?ref=" + url.QueryEscape(ref)
|
||||||
|
|
|||||||
}
|
}
|
||||||
body, err := c.doGet(ctx, reqURL)
|
body, err := c.doGet(ctx, reqURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("fetch file %s: %w", path, err)
|
return "", fmt.Errorf("fetch file %s: %w", filePath, err)
|
||||||
}
|
}
|
||||||
var resp struct {
|
var resp struct {
|
||||||
Content string `json:"content"`
|
Content string `json:"content"`
|
||||||
@@ -33,27 +37,42 @@ func (c *Client) GetFileContentAtRef(ctx context.Context, owner, repo, path, ref
|
|||||||
return "", fmt.Errorf("parse file content JSON: %w", err)
|
return "", fmt.Errorf("parse file content JSON: %w", err)
|
||||||
}
|
}
|
||||||
if resp.Encoding != "base64" {
|
if resp.Encoding != "base64" {
|
||||||
return "", fmt.Errorf("unexpected encoding %q for file %s", resp.Encoding, path)
|
return "", fmt.Errorf("unexpected encoding %q for file %s", resp.Encoding, filePath)
|
||||||
}
|
}
|
||||||
|
gpt-review-bot
commented
[MINOR] escapePath silently strips dot-segments ("." and ".."), causing paths like "foo/../bar" to resolve to "foo/bar" instead of canonical "bar". This can lead to surprising behavior or 404s; consider using path.Clean for canonicalization and rejecting traversal (e.g., resulting paths starting with "../"), or returning an error on dot-segments rather than altering the path. **[MINOR]** escapePath silently strips dot-segments ("." and ".."), causing paths like "foo/../bar" to resolve to "foo/bar" instead of canonical "bar". This can lead to surprising behavior or 404s; consider using path.Clean for canonicalization and rejecting traversal (e.g., resulting paths starting with "../"), or returning an error on dot-segments rather than altering the path.
|
|||||||
decoded, err := decodeBase64Content(resp.Content)
|
decoded, err := decodeBase64Content(resp.Content)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("decode base64 content for %s: %w", path, err)
|
return "", fmt.Errorf("decode base64 content for %s: %w", filePath, err)
|
||||||
}
|
}
|
||||||
return decoded, nil
|
return decoded, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// escapePath encodes each segment of a slash-separated path, stripping
|
// escapePath validates and encodes a slash-separated file path for use in
|
||||||
// dot-segments to prevent path traversal.
|
// GitHub API URLs. Returns an error if the path contains dot-segments ("."
|
||||||
func escapePath(p string) string {
|
// or "..") or resolves to a path outside the repository root.
|
||||||
parts := strings.Split(p, "/")
|
func escapePath(p string) (string, error) {
|
||||||
var clean []string
|
// Reject paths containing dot-segments rather than silently rewriting them.
|
||||||
|
for _, seg := range strings.Split(p, "/") {
|
||||||
|
if seg == "." || seg == ".." {
|
||||||
|
return "", fmt.Errorf("path contains dot-segment %q: %s", seg, p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use path.Clean for canonical form, then verify it doesn't escape root.
|
||||||
|
cleaned := path.Clean(p)
|
||||||
|
if cleaned == "." || strings.HasPrefix(cleaned, "..") {
|
||||||
|
return "", fmt.Errorf("path resolves outside repository root: %s", p)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encode each segment individually.
|
||||||
|
parts := strings.Split(cleaned, "/")
|
||||||
|
var encoded []string
|
||||||
for _, part := range parts {
|
for _, part := range parts {
|
||||||
if part == "." || part == ".." || part == "" {
|
if part == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
clean = append(clean, url.PathEscape(part))
|
encoded = append(encoded, url.PathEscape(part))
|
||||||
}
|
}
|
||||||
return strings.Join(clean, "/")
|
return strings.Join(encoded, "/"), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// decodeBase64Content decodes base64-encoded content from the GitHub contents API.
|
// decodeBase64Content decodes base64-encoded content from the GitHub contents API.
|
||||||
|
|||||||
@@ -0,0 +1,79 @@
|
|||||||
|
package github
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestEscapePath_ValidPaths(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
path string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{"simple file", "file.go", "file.go"},
|
||||||
|
{"nested path", "path/to/file.go", "path/to/file.go"},
|
||||||
|
{"special chars", "path/to/my file.go", "path/to/my%20file.go"},
|
||||||
|
{"leading slash stripped", "/path/to/file.go", "path/to/file.go"},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
got, err := escapePath(tt.path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("escapePath(%q) = %q, want %q", tt.path, got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEscapePath_DotSegments(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
path string
|
||||||
|
}{
|
||||||
|
{"single dot", "./file.go"},
|
||||||
|
{"double dot", "../file.go"},
|
||||||
|
{"dot in middle", "path/./file.go"},
|
||||||
|
{"parent traversal", "path/../file.go"},
|
||||||
|
{"only dots", ".."},
|
||||||
|
{"nested parent traversal", "a/b/../../c"},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
_, err := escapePath(tt.path)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected error for path %q, got nil", tt.path)
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "dot-segment") {
|
||||||
|
t.Errorf("expected error about dot-segment, got: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetFileContentAtRef_DotSegmentError(t *testing.T) {
|
||||||
|
// Server should never be called — the error is caught before the request.
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
t.Fatal("server should not have been called")
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
sonnet-review-bot
commented
[MINOR] **[MINOR]** `TestGetFileContentAtRef_DotSegmentError` does not call `AllowInsecureHTTP()` unlike all other tests that create an httptest server and use `NewClient`. This may still work if `NewClient` defaults to allowing HTTP for test servers, but it is inconsistent and may cause issues if the client enforces HTTPS by default for non-test usage. All tests that spin up an httptest server should use `AllowInsecureHTTP()` for consistency.
|
|||||||
|
c := NewClient(srv.URL, "token")
|
||||||
|
_, err := c.GetFileContentAtRef(context.Background(), "owner", "repo", "foo/../bar.go", "main")
|
||||||
|
sonnet-review-bot
commented
[NIT] TestGetFileContentAtRef_DotSegmentError constructs the client with **[NIT]** TestGetFileContentAtRef_DotSegmentError constructs the client with `NewClient(srv.URL, "token")` — argument order is reversed compared to all other tests in pr_test.go which use `NewClient("token", srv.URL, AllowInsecureHTTP())`. If the constructor signature is `NewClient(token, baseURL string, opts...)`, this test may pass the URL as the token and vice versa, which would be a latent correctness bug. The test happens to work only because it expects the server to never be called (the error is caught before the HTTP request), so the wrong baseURL is never used. Verify the argument order is intentional or align with the rest of the test suite.
|
|||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error for path with dot-segments")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "invalid file path") {
|
||||||
|
t.Errorf("expected 'invalid file path' error, got: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -178,7 +178,7 @@ func (c *Client) GetCommitStatuses(ctx context.Context, owner, repo, sha string)
|
|||||||
result = append(result, vcs.CommitStatus{
|
result = append(result, vcs.CommitStatus{
|
||||||
|
sonnet-review-bot
commented
[MINOR] The **[MINOR]** The `Description` field for check runs is set to `derefString(cr.Conclusion)` (raw conclusion value like 'success', 'failure'). This means the `Description` field has a different semantic for check runs vs commit statuses — for statuses it's a human-readable description ('Build passed'), while for check runs it's a machine value ('success'). This is documented in the comment but may surprise callers. Consider using `cr.Name` or leaving it empty if no textual description is available from the check runs API (the `output.summary` field would require schema expansion).
|
|||||||
Context: cr.Name,
|
Context: cr.Name,
|
||||||
Status: mapCheckRunStatus(cr.Conclusion),
|
Status: mapCheckRunStatus(cr.Conclusion),
|
||||||
Description: derefString(cr.Conclusion), // raw conclusion value (e.g. "success", "failure", "skipped")
|
Description: "", // check runs have no human-readable description; conclusion is captured in Status
|
||||||
TargetURL: cr.HTMLURL,
|
TargetURL: cr.HTMLURL,
|
||||||
|
[MINOR] mapCheckRunStatus maps "cancelled", "skipped", and "neutral" to "success". If this status is later used to gate security-critical actions (e.g., merges), a cancelled required check could be interpreted as success and potentially weaken enforcement. Consider mapping these to a non-success state (e.g., pending) or exposing them distinctly so callers can enforce policies accurately. **[MINOR]** mapCheckRunStatus maps "cancelled", "skipped", and "neutral" to "success". If this status is later used to gate security-critical actions (e.g., merges), a cancelled required check could be interpreted as success and potentially weaken enforcement. Consider mapping these to a non-success state (e.g., pending) or exposing them distinctly so callers can enforce policies accurately.
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -220,10 +220,3 @@ func mapCheckRunStatus(conclusion *string) string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// derefString safely dereferences a string pointer, returning empty string if nil.
|
|
||||||
func derefString(s *string) string {
|
|
||||||
if s == nil {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
return *s
|
|
||||||
}
|
|
||||||
|
|||||||
[NIT] User-controlled path is interpolated into error messages without sanitization. If callers log these errors verbatim, an attacker-controlled path containing newlines could enable log injection. Low practical risk given typical GitHub path constraints.