feat(github): implement PRReader interface #102
@@ -6,24 +6,28 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"path"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// GetFileContentAtRef fetches a file at a specific ref from a repo.
|
||||
// If ref is empty, the query parameter is omitted (uses default branch).
|
||||
//
|
||||
// Note: dot-segments ("." and "..") in the path are silently removed to
|
||||
// prevent path traversal. This means a path like "foo/../bar" resolves
|
||||
// to "foo/bar" rather than "bar".
|
||||
func (c *Client) GetFileContentAtRef(ctx context.Context, owner, repo, path, ref string) (string, error) {
|
||||
// Returns an error if the path contains dot-segments (".", "..") or
|
||||
// attempts to traverse above the repository root.
|
||||
func (c *Client) GetFileContentAtRef(ctx context.Context, owner, repo, filePath, ref string) (string, error) {
|
||||
escaped, err := escapePath(filePath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid file path: %w", err)
|
||||
}
|
||||
reqURL := fmt.Sprintf("%s/repos/%s/%s/contents/%s",
|
||||
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), escapePath(path))
|
||||
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), escaped)
|
||||
if ref != "" {
|
||||
reqURL += "?ref=" + url.QueryEscape(ref)
|
||||
|
|
||||
}
|
||||
body, err := c.doGet(ctx, reqURL)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("fetch file %s: %w", path, err)
|
||||
return "", fmt.Errorf("fetch file %s: %w", filePath, err)
|
||||
}
|
||||
var resp struct {
|
||||
Content string `json:"content"`
|
||||
@@ -33,27 +37,42 @@ func (c *Client) GetFileContentAtRef(ctx context.Context, owner, repo, path, ref
|
||||
return "", fmt.Errorf("parse file content JSON: %w", err)
|
||||
}
|
||||
if resp.Encoding != "base64" {
|
||||
return "", fmt.Errorf("unexpected encoding %q for file %s", resp.Encoding, path)
|
||||
return "", fmt.Errorf("unexpected encoding %q for file %s", resp.Encoding, filePath)
|
||||
|
[NIT] Same as above: error message includes raw path. Defense-in-depth to avoid potential log injection if higher layers log errors without escaping. **[NIT]** Same as above: error message includes raw path. Defense-in-depth to avoid potential log injection if higher layers log errors without escaping.
|
||||
}
|
||||
|
gpt-review-bot
commented
[MINOR] escapePath silently strips dot-segments ("." and ".."), causing paths like "foo/../bar" to resolve to "foo/bar" instead of canonical "bar". This can lead to surprising behavior or 404s; consider using path.Clean for canonicalization and rejecting traversal (e.g., resulting paths starting with "../"), or returning an error on dot-segments rather than altering the path. **[MINOR]** escapePath silently strips dot-segments ("." and ".."), causing paths like "foo/../bar" to resolve to "foo/bar" instead of canonical "bar". This can lead to surprising behavior or 404s; consider using path.Clean for canonicalization and rejecting traversal (e.g., resulting paths starting with "../"), or returning an error on dot-segments rather than altering the path.
|
||||
decoded, err := decodeBase64Content(resp.Content)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("decode base64 content for %s: %w", path, err)
|
||||
return "", fmt.Errorf("decode base64 content for %s: %w", filePath, err)
|
||||
}
|
||||
return decoded, nil
|
||||
}
|
||||
|
||||
// escapePath encodes each segment of a slash-separated path, stripping
|
||||
// dot-segments to prevent path traversal.
|
||||
func escapePath(p string) string {
|
||||
parts := strings.Split(p, "/")
|
||||
var clean []string
|
||||
// escapePath validates and encodes a slash-separated file path for use in
|
||||
// GitHub API URLs. Returns an error if the path contains dot-segments ("."
|
||||
// or "..") or resolves to a path outside the repository root.
|
||||
func escapePath(p string) (string, error) {
|
||||
// Reject paths containing dot-segments rather than silently rewriting them.
|
||||
for _, seg := range strings.Split(p, "/") {
|
||||
if seg == "." || seg == ".." {
|
||||
return "", fmt.Errorf("path contains dot-segment %q: %s", seg, p)
|
||||
}
|
||||
}
|
||||
|
||||
// Use path.Clean for canonical form, then verify it doesn't escape root.
|
||||
|
gpt-review-bot
commented
[NIT] decodeBase64Content recreates a strings.Replacer on every call. For very hot paths, you could optimize by defining a package-level replacer or using a more lightweight approach; not critical here. **[NIT]** decodeBase64Content recreates a strings.Replacer on every call. For very hot paths, you could optimize by defining a package-level replacer or using a more lightweight approach; not critical here.
|
||||
cleaned := path.Clean(p)
|
||||
if cleaned == "." || strings.HasPrefix(cleaned, "..") {
|
||||
return "", fmt.Errorf("path resolves outside repository root: %s", p)
|
||||
}
|
||||
|
||||
// Encode each segment individually.
|
||||
parts := strings.Split(cleaned, "/")
|
||||
var encoded []string
|
||||
for _, part := range parts {
|
||||
if part == "." || part == ".." || part == "" {
|
||||
if part == "" {
|
||||
continue
|
||||
}
|
||||
clean = append(clean, url.PathEscape(part))
|
||||
encoded = append(encoded, url.PathEscape(part))
|
||||
}
|
||||
return strings.Join(clean, "/")
|
||||
return strings.Join(encoded, "/"), nil
|
||||
}
|
||||
|
||||
// decodeBase64Content decodes base64-encoded content from the GitHub contents API.
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
package github
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestEscapePath_ValidPaths(t *testing.T) {
|
||||
t.Parallel()
|
||||
tests := []struct {
|
||||
name string
|
||||
path string
|
||||
want string
|
||||
}{
|
||||
{"simple file", "file.go", "file.go"},
|
||||
{"nested path", "path/to/file.go", "path/to/file.go"},
|
||||
{"special chars", "path/to/my file.go", "path/to/my%20file.go"},
|
||||
{"leading slash stripped", "/path/to/file.go", "path/to/file.go"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
got, err := escapePath(tt.path)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if got != tt.want {
|
||||
t.Errorf("escapePath(%q) = %q, want %q", tt.path, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEscapePath_DotSegments(t *testing.T) {
|
||||
t.Parallel()
|
||||
tests := []struct {
|
||||
name string
|
||||
path string
|
||||
}{
|
||||
{"single dot", "./file.go"},
|
||||
{"double dot", "../file.go"},
|
||||
{"dot in middle", "path/./file.go"},
|
||||
{"parent traversal", "path/../file.go"},
|
||||
{"only dots", ".."},
|
||||
{"nested parent traversal", "a/b/../../c"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
_, err := escapePath(tt.path)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for path %q, got nil", tt.path)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "dot-segment") {
|
||||
t.Errorf("expected error about dot-segment, got: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetFileContentAtRef_DotSegmentError(t *testing.T) {
|
||||
// Server should never be called — the error is caught before the request.
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
t.Fatal("server should not have been called")
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
|
sonnet-review-bot
commented
[MINOR] **[MINOR]** `TestGetFileContentAtRef_DotSegmentError` does not call `AllowInsecureHTTP()` unlike all other tests that create an httptest server and use `NewClient`. This may still work if `NewClient` defaults to allowing HTTP for test servers, but it is inconsistent and may cause issues if the client enforces HTTPS by default for non-test usage. All tests that spin up an httptest server should use `AllowInsecureHTTP()` for consistency.
|
||||
c := NewClient(srv.URL, "token")
|
||||
|
gpt-review-bot
commented
[MINOR] NewClient is called with arguments in an order inconsistent with other tests (srv.URL, "token"). While this test never performs a request, it’s confusing and could break if behavior changes. Use the same argument order as elsewhere (e.g., NewClient("token", srv.URL, ...)). **[MINOR]** NewClient is called with arguments in an order inconsistent with other tests (srv.URL, "token"). While this test never performs a request, it’s confusing and could break if behavior changes. Use the same argument order as elsewhere (e.g., NewClient("token", srv.URL, ...)).
|
||||
_, err := c.GetFileContentAtRef(context.Background(), "owner", "repo", "foo/../bar.go", "main")
|
||||
|
sonnet-review-bot
commented
[NIT] TestGetFileContentAtRef_DotSegmentError constructs the client with **[NIT]** TestGetFileContentAtRef_DotSegmentError constructs the client with `NewClient(srv.URL, "token")` — argument order is reversed compared to all other tests in pr_test.go which use `NewClient("token", srv.URL, AllowInsecureHTTP())`. If the constructor signature is `NewClient(token, baseURL string, opts...)`, this test may pass the URL as the token and vice versa, which would be a latent correctness bug. The test happens to work only because it expects the server to never be called (the error is caught before the HTTP request), so the wrong baseURL is never used. Verify the argument order is intentional or align with the rest of the test suite.
|
||||
if err == nil {
|
||||
t.Fatal("expected error for path with dot-segments")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "invalid file path") {
|
||||
t.Errorf("expected 'invalid file path' error, got: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -178,7 +178,7 @@ func (c *Client) GetCommitStatuses(ctx context.Context, owner, repo, sha string)
|
||||
result = append(result, vcs.CommitStatus{
|
||||
|
sonnet-review-bot
commented
[MINOR] The **[MINOR]** The `Description` field for check run statuses is populated with `derefString(cr.Conclusion)` — the raw conclusion value (e.g. "success", "failure", "skipped"). This is the same value that's being mapped to `Status` via `mapCheckRunStatus`. The doc comment says 'raw conclusion value (e.g. "success", "failure", "skipped")' but for commit statuses, `Description` is the human-readable description string (e.g. 'Build passed'). Using the conclusion as description is a semantic mismatch: for commit statuses `Description` carries a narrative message, while for check runs it carries a machine-readable conclusion. Consumers expecting a human-readable description field will get a raw enum string instead. Consider using `cr.Status` (e.g. 'completed', 'in_progress') or leaving it empty rather than duplicating the conclusion.
sonnet-review-bot
commented
[MINOR] The **[MINOR]** The `Description` field for check runs is set to `derefString(cr.Conclusion)` (raw conclusion value like 'success', 'failure'). This means the `Description` field has a different semantic for check runs vs commit statuses — for statuses it's a human-readable description ('Build passed'), while for check runs it's a machine value ('success'). This is documented in the comment but may surprise callers. Consider using `cr.Name` or leaving it empty if no textual description is available from the check runs API (the `output.summary` field would require schema expansion).
|
||||
Context: cr.Name,
|
||||
Status: mapCheckRunStatus(cr.Conclusion),
|
||||
Description: derefString(cr.Conclusion), // raw conclusion value (e.g. "success", "failure", "skipped")
|
||||
Description: "", // check runs have no human-readable description; conclusion is captured in Status
|
||||
TargetURL: cr.HTMLURL,
|
||||
|
[MINOR] mapCheckRunStatus maps "cancelled", "skipped", and "neutral" to "success". If this status is later used to gate security-critical actions (e.g., merges), a cancelled required check could be interpreted as success and potentially weaken enforcement. Consider mapping these to a non-success state (e.g., pending) or exposing them distinctly so callers can enforce policies accurately. **[MINOR]** mapCheckRunStatus maps "cancelled", "skipped", and "neutral" to "success". If this status is later used to gate security-critical actions (e.g., merges), a cancelled required check could be interpreted as success and potentially weaken enforcement. Consider mapping these to a non-success state (e.g., pending) or exposing them distinctly so callers can enforce policies accurately.
|
||||
})
|
||||
}
|
||||
@@ -220,10 +220,3 @@ func mapCheckRunStatus(conclusion *string) string {
|
||||
}
|
||||
}
|
||||
|
||||
// derefString safely dereferences a string pointer, returning empty string if nil.
|
||||
func derefString(s *string) string {
|
||||
if s == nil {
|
||||
return ""
|
||||
}
|
||||
return *s
|
||||
}
|
||||
|
||||
[NIT] User-controlled path is interpolated into error messages without sanitization. If callers log these errors verbatim, an attacker-controlled path containing newlines could enable log injection. Low practical risk given typical GitHub path constraints.