feat(github): implement PRReader interface #102

Merged
aweiker merged 5 commits from issue-80-b-pr-reader into feature/github-support 2026-05-13 05:30:37 +00:00
3 changed files with 115 additions and 24 deletions
Showing only changes of commit 3cd5ae594e - Show all commits
+35 -16
View File
@@ -6,24 +6,28 @@ import (
"encoding/json"
"fmt"
"net/url"
"path"
"strings"
)
// GetFileContentAtRef fetches a file at a specific ref from a repo.
// If ref is empty, the query parameter is omitted (uses default branch).
//
// Note: dot-segments ("." and "..") in the path are silently removed to
// prevent path traversal. This means a path like "foo/../bar" resolves
// to "foo/bar" rather than "bar".
func (c *Client) GetFileContentAtRef(ctx context.Context, owner, repo, path, ref string) (string, error) {
// Returns an error if the path contains dot-segments (".", "..") or
// attempts to traverse above the repository root.
func (c *Client) GetFileContentAtRef(ctx context.Context, owner, repo, filePath, ref string) (string, error) {
escaped, err := escapePath(filePath)
if err != nil {
return "", fmt.Errorf("invalid file path: %w", err)
}
reqURL := fmt.Sprintf("%s/repos/%s/%s/contents/%s",
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), escapePath(path))
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), escaped)
if ref != "" {
reqURL += "?ref=" + url.QueryEscape(ref)
Review

[NIT] User-controlled path is interpolated into error messages without sanitization. If callers log these errors verbatim, an attacker-controlled path containing newlines could enable log injection. Low practical risk given typical GitHub path constraints.

**[NIT]** User-controlled path is interpolated into error messages without sanitization. If callers log these errors verbatim, an attacker-controlled path containing newlines could enable log injection. Low practical risk given typical GitHub path constraints.
}
body, err := c.doGet(ctx, reqURL)
if err != nil {
return "", fmt.Errorf("fetch file %s: %w", path, err)
return "", fmt.Errorf("fetch file %s: %w", filePath, err)
}
var resp struct {
Content string `json:"content"`
1
@@ -33,27 +37,42 @@ func (c *Client) GetFileContentAtRef(ctx context.Context, owner, repo, path, ref
return "", fmt.Errorf("parse file content JSON: %w", err)
}
if resp.Encoding != "base64" {
return "", fmt.Errorf("unexpected encoding %q for file %s", resp.Encoding, path)
return "", fmt.Errorf("unexpected encoding %q for file %s", resp.Encoding, filePath)
Outdated
Review

[NIT] Same as above: error message includes raw path. Defense-in-depth to avoid potential log injection if higher layers log errors without escaping.

**[NIT]** Same as above: error message includes raw path. Defense-in-depth to avoid potential log injection if higher layers log errors without escaping.
}
Review

[MINOR] escapePath silently strips dot-segments ("." and ".."), causing paths like "foo/../bar" to resolve to "foo/bar" instead of canonical "bar". This can lead to surprising behavior or 404s; consider using path.Clean for canonicalization and rejecting traversal (e.g., resulting paths starting with "../"), or returning an error on dot-segments rather than altering the path.

**[MINOR]** escapePath silently strips dot-segments ("." and ".."), causing paths like "foo/../bar" to resolve to "foo/bar" instead of canonical "bar". This can lead to surprising behavior or 404s; consider using path.Clean for canonicalization and rejecting traversal (e.g., resulting paths starting with "../"), or returning an error on dot-segments rather than altering the path.
decoded, err := decodeBase64Content(resp.Content)
if err != nil {
return "", fmt.Errorf("decode base64 content for %s: %w", path, err)
return "", fmt.Errorf("decode base64 content for %s: %w", filePath, err)
}
return decoded, nil
}
// escapePath encodes each segment of a slash-separated path, stripping
// dot-segments to prevent path traversal.
func escapePath(p string) string {
parts := strings.Split(p, "/")
var clean []string
// escapePath validates and encodes a slash-separated file path for use in
// GitHub API URLs. Returns an error if the path contains dot-segments ("."
// or "..") or resolves to a path outside the repository root.
func escapePath(p string) (string, error) {
// Reject paths containing dot-segments rather than silently rewriting them.
for _, seg := range strings.Split(p, "/") {
if seg == "." || seg == ".." {
return "", fmt.Errorf("path contains dot-segment %q: %s", seg, p)
}
}
// Use path.Clean for canonical form, then verify it doesn't escape root.
Outdated
Review

[NIT] decodeBase64Content recreates a strings.Replacer on every call. For very hot paths, you could optimize by defining a package-level replacer or using a more lightweight approach; not critical here.

**[NIT]** decodeBase64Content recreates a strings.Replacer on every call. For very hot paths, you could optimize by defining a package-level replacer or using a more lightweight approach; not critical here.
cleaned := path.Clean(p)
if cleaned == "." || strings.HasPrefix(cleaned, "..") {
return "", fmt.Errorf("path resolves outside repository root: %s", p)
}
// Encode each segment individually.
parts := strings.Split(cleaned, "/")
var encoded []string
for _, part := range parts {
if part == "." || part == ".." || part == "" {
if part == "" {
continue
}
clean = append(clean, url.PathEscape(part))
encoded = append(encoded, url.PathEscape(part))
}
return strings.Join(clean, "/")
return strings.Join(encoded, "/"), nil
}
// decodeBase64Content decodes base64-encoded content from the GitHub contents API.
1
+79
View File
@@ -0,0 +1,79 @@
package github
import (
"context"
"net/http"
"net/http/httptest"
"strings"
"testing"
)
func TestEscapePath_ValidPaths(t *testing.T) {
t.Parallel()
tests := []struct {
name string
path string
want string
}{
{"simple file", "file.go", "file.go"},
{"nested path", "path/to/file.go", "path/to/file.go"},
{"special chars", "path/to/my file.go", "path/to/my%20file.go"},
{"leading slash stripped", "/path/to/file.go", "path/to/file.go"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
got, err := escapePath(tt.path)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != tt.want {
t.Errorf("escapePath(%q) = %q, want %q", tt.path, got, tt.want)
}
})
}
}
func TestEscapePath_DotSegments(t *testing.T) {
t.Parallel()
tests := []struct {
name string
path string
}{
{"single dot", "./file.go"},
{"double dot", "../file.go"},
{"dot in middle", "path/./file.go"},
{"parent traversal", "path/../file.go"},
{"only dots", ".."},
{"nested parent traversal", "a/b/../../c"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
_, err := escapePath(tt.path)
if err == nil {
t.Fatalf("expected error for path %q, got nil", tt.path)
}
if !strings.Contains(err.Error(), "dot-segment") {
t.Errorf("expected error about dot-segment, got: %v", err)
}
})
}
}
func TestGetFileContentAtRef_DotSegmentError(t *testing.T) {
// Server should never be called — the error is caught before the request.
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
t.Fatal("server should not have been called")
}))
defer srv.Close()
Review

[MINOR] TestGetFileContentAtRef_DotSegmentError does not call AllowInsecureHTTP() unlike all other tests that create an httptest server and use NewClient. This may still work if NewClient defaults to allowing HTTP for test servers, but it is inconsistent and may cause issues if the client enforces HTTPS by default for non-test usage. All tests that spin up an httptest server should use AllowInsecureHTTP() for consistency.

**[MINOR]** `TestGetFileContentAtRef_DotSegmentError` does not call `AllowInsecureHTTP()` unlike all other tests that create an httptest server and use `NewClient`. This may still work if `NewClient` defaults to allowing HTTP for test servers, but it is inconsistent and may cause issues if the client enforces HTTPS by default for non-test usage. All tests that spin up an httptest server should use `AllowInsecureHTTP()` for consistency.
c := NewClient(srv.URL, "token")
Outdated
Review

[MINOR] NewClient is called with arguments in an order inconsistent with other tests (srv.URL, "token"). While this test never performs a request, it’s confusing and could break if behavior changes. Use the same argument order as elsewhere (e.g., NewClient("token", srv.URL, ...)).

**[MINOR]** NewClient is called with arguments in an order inconsistent with other tests (srv.URL, "token"). While this test never performs a request, it’s confusing and could break if behavior changes. Use the same argument order as elsewhere (e.g., NewClient("token", srv.URL, ...)).
_, err := c.GetFileContentAtRef(context.Background(), "owner", "repo", "foo/../bar.go", "main")
Review

[NIT] TestGetFileContentAtRef_DotSegmentError constructs the client with NewClient(srv.URL, "token") — argument order is reversed compared to all other tests in pr_test.go which use NewClient("token", srv.URL, AllowInsecureHTTP()). If the constructor signature is NewClient(token, baseURL string, opts...), this test may pass the URL as the token and vice versa, which would be a latent correctness bug. The test happens to work only because it expects the server to never be called (the error is caught before the HTTP request), so the wrong baseURL is never used. Verify the argument order is intentional or align with the rest of the test suite.

**[NIT]** TestGetFileContentAtRef_DotSegmentError constructs the client with `NewClient(srv.URL, "token")` — argument order is reversed compared to all other tests in pr_test.go which use `NewClient("token", srv.URL, AllowInsecureHTTP())`. If the constructor signature is `NewClient(token, baseURL string, opts...)`, this test may pass the URL as the token and vice versa, which would be a latent correctness bug. The test happens to work only because it expects the server to never be called (the error is caught before the HTTP request), so the wrong baseURL is never used. Verify the argument order is intentional or align with the rest of the test suite.
if err == nil {
t.Fatal("expected error for path with dot-segments")
}
if !strings.Contains(err.Error(), "invalid file path") {
t.Errorf("expected 'invalid file path' error, got: %v", err)
}
}
+1 -8
View File
11
@@ -178,7 +178,7 @@ func (c *Client) GetCommitStatuses(ctx context.Context, owner, repo, sha string)
result = append(result, vcs.CommitStatus{
Outdated
Review

[MINOR] The Description field for check run statuses is populated with derefString(cr.Conclusion) — the raw conclusion value (e.g. "success", "failure", "skipped"). This is the same value that's being mapped to Status via mapCheckRunStatus. The doc comment says 'raw conclusion value (e.g. "success", "failure", "skipped")' but for commit statuses, Description is the human-readable description string (e.g. 'Build passed'). Using the conclusion as description is a semantic mismatch: for commit statuses Description carries a narrative message, while for check runs it carries a machine-readable conclusion. Consumers expecting a human-readable description field will get a raw enum string instead. Consider using cr.Status (e.g. 'completed', 'in_progress') or leaving it empty rather than duplicating the conclusion.

**[MINOR]** The `Description` field for check run statuses is populated with `derefString(cr.Conclusion)` — the raw conclusion value (e.g. "success", "failure", "skipped"). This is the same value that's being mapped to `Status` via `mapCheckRunStatus`. The doc comment says 'raw conclusion value (e.g. "success", "failure", "skipped")' but for commit statuses, `Description` is the human-readable description string (e.g. 'Build passed'). Using the conclusion as description is a semantic mismatch: for commit statuses `Description` carries a narrative message, while for check runs it carries a machine-readable conclusion. Consumers expecting a human-readable description field will get a raw enum string instead. Consider using `cr.Status` (e.g. 'completed', 'in_progress') or leaving it empty rather than duplicating the conclusion.
Review

[MINOR] The Description field for check runs is set to derefString(cr.Conclusion) (raw conclusion value like 'success', 'failure'). This means the Description field has a different semantic for check runs vs commit statuses — for statuses it's a human-readable description ('Build passed'), while for check runs it's a machine value ('success'). This is documented in the comment but may surprise callers. Consider using cr.Name or leaving it empty if no textual description is available from the check runs API (the output.summary field would require schema expansion).

**[MINOR]** The `Description` field for check runs is set to `derefString(cr.Conclusion)` (raw conclusion value like 'success', 'failure'). This means the `Description` field has a different semantic for check runs vs commit statuses — for statuses it's a human-readable description ('Build passed'), while for check runs it's a machine value ('success'). This is documented in the comment but may surprise callers. Consider using `cr.Name` or leaving it empty if no textual description is available from the check runs API (the `output.summary` field would require schema expansion).
Context: cr.Name,
Status: mapCheckRunStatus(cr.Conclusion),
Description: derefString(cr.Conclusion), // raw conclusion value (e.g. "success", "failure", "skipped")
Description: "", // check runs have no human-readable description; conclusion is captured in Status
TargetURL: cr.HTMLURL,
Review

[MINOR] mapCheckRunStatus maps "cancelled", "skipped", and "neutral" to "success". If this status is later used to gate security-critical actions (e.g., merges), a cancelled required check could be interpreted as success and potentially weaken enforcement. Consider mapping these to a non-success state (e.g., pending) or exposing them distinctly so callers can enforce policies accurately.

**[MINOR]** mapCheckRunStatus maps "cancelled", "skipped", and "neutral" to "success". If this status is later used to gate security-critical actions (e.g., merges), a cancelled required check could be interpreted as success and potentially weaken enforcement. Consider mapping these to a non-success state (e.g., pending) or exposing them distinctly so callers can enforce policies accurately.
})
}
3
@@ -220,10 +220,3 @@ func mapCheckRunStatus(conclusion *string) string {
}
}
// derefString safely dereferences a string pointer, returning empty string if nil.
func derefString(s *string) string {
if s == nil {
return ""
}
return *s
}
17