feat(vcs): complete Phase 1 — util.go, type cleanup, interface additions (fixes #84, #85, #86) #88

Merged
aweiker merged 3 commits from review-bot-issue-84 into feature/github-support 2026-05-12 20:18:18 +00:00
3 changed files with 161 additions and 32 deletions
Showing only changes of commit 1749d95727 - Show all commits
+1 -1
View File
2
@@ -835,7 +835,7 @@ func (c *Client) ResolveComment(ctx context.Context, owner, repo string, comment
// DismissReview dismisses a review on a pull request.
// This is a stub for the vcs.Reviewer interface; full implementation is Phase 2.
func (c *Client) DismissReview(ctx context.Context, owner, repo string, number int, reviewID int64, message string) error {
return fmt.Errorf("DismissReview: not implemented")
return fmt.Errorf("DismissReview: %w", errors.ErrUnsupported)
}
Review

[NIT] GetFileContentAtRef is a thin wrapper that does nothing but delegate to GetFileContentRef with the same arguments in the same order. The docstring says "This delegates to GetFileContentRef for the Gitea implementation" — since the method exists only to satisfy the vcs.PRReader interface, a one-liner return c.GetFileContentRef(ctx, owner, repo, path, ref) with no further ceremony would be ideal, which is already the case. No change needed, but consider whether GetFileContentRef should simply be renamed to GetFileContentAtRef to avoid the two names existing in parallel on the same type.

**[NIT]** `GetFileContentAtRef` is a thin wrapper that does nothing but delegate to `GetFileContentRef` with the same arguments in the same order. The docstring says "This delegates to GetFileContentRef for the Gitea implementation" — since the method exists only to satisfy the `vcs.PRReader` interface, a one-liner `return c.GetFileContentRef(ctx, owner, repo, path, ref)` with no further ceremony would be ideal, which is already the case. No change needed, but consider whether `GetFileContentRef` should simply be renamed to `GetFileContentAtRef` to avoid the two names existing in parallel on the same type.
// GetFileContentAtRef fetches a file at a specific ref from a repo.
+79 -31
View File
@@ -3,39 +3,82 @@ package vcs
import (
"context"
"fmt"
"strconv"
"strings"
)
const (
// maxFilesInPath is the maximum number of files GetAllFilesInPath will fetch.
// Prevents unbounded resource consumption on very large directory trees.
maxFilesInPath = 10000
// maxTotalBytesInPath is the maximum total bytes GetAllFilesInPath will accumulate.
// Prevents memory exhaustion when fetching large repositories.
maxTotalBytesInPath = 100 * 1024 * 1024 // 100 MB
)
// GetAllFilesInPath recursively fetches all file contents under a path using the
// provided FileReader. Returns a map of filepath -> content for all files found.
// If the path points to an empty directory, returns an empty map.
//
// This function uses fail-fast error handling: any error from ListContents or
// GetFileContent aborts the entire traversal and returns the error immediately.
// This differs from gitea.Client.GetAllFilesInPath, which logs errors and continues.
// The fail-fast contract ensures callers can trust that a nil error means all files
// were successfully fetched.
//
// Resource limits: the traversal is bounded by maxFilesInPath (file count) and
// maxTotalBytesInPath (total accumulated bytes). The context is checked before each
// recursive call and file fetch to respect cancellation.
func GetAllFilesInPath(ctx context.Context, client FileReader, owner, repo, path string) (map[string]string, error) {
results := make(map[string]string)
Review

[MINOR] GetAllFilesInPath assumes ListContents works for both directories and files. If a FileReader implementation cannot list a file path (only directories), this will return an error rather than fetching the single file content. Consider documenting this requirement or adding a fallback to GetFileContent when ListContents indicates the path is a file.

**[MINOR]** GetAllFilesInPath assumes ListContents works for both directories and files. If a FileReader implementation cannot list a file path (only directories), this will return an error rather than fetching the single file content. Consider documenting this requirement or adding a fallback to GetFileContent when ListContents indicates the path is a file.
totalBytes := 0
entries, err := client.ListContents(ctx, owner, repo, path)
if err != nil {
return nil, fmt.Errorf("list contents %q: %w", path, err)
}
var walk func(string) error
walk = func(dir string) error {
if err := ctx.Err(); err != nil {
return fmt.Errorf("context cancelled during traversal: %w", err)
}
for _, entry := range entries {
switch entry.Type {
case "file":
content, err := client.GetFileContent(ctx, owner, repo, entry.Path, "")
if err != nil {
return nil, fmt.Errorf("get file %q: %w", entry.Path, err)
entries, err := client.ListContents(ctx, owner, repo, dir)
Review

[MINOR] There is a functional duplication between vcs.GetAllFilesInPath and gitea.Client.GetAllFilesInPath. The latter already has identical logic (though with warn-and-continue error handling instead of fail-fast). The doc comment on the vcs version explicitly calls out the difference, which is good, but over time both implementations will need to be kept in sync. Consider whether gitea.Client.GetAllFilesInPath should be refactored to delegate to vcs.GetAllFilesInPath via the vcs.FileReader interface (which gitea.Client will satisfy in Phase 2) to eliminate the duplication entirely.

**[MINOR]** There is a functional duplication between `vcs.GetAllFilesInPath` and `gitea.Client.GetAllFilesInPath`. The latter already has identical logic (though with warn-and-continue error handling instead of fail-fast). The doc comment on the vcs version explicitly calls out the difference, which is good, but over time both implementations will need to be kept in sync. Consider whether `gitea.Client.GetAllFilesInPath` should be refactored to delegate to `vcs.GetAllFilesInPath` via the `vcs.FileReader` interface (which `gitea.Client` will satisfy in Phase 2) to eliminate the duplication entirely.
if err != nil {
Review

[NIT] Error messages use "context cancelled" (double-l) while the standard library uses "context canceled". This is harmless but can make grepping for cancellations less consistent. Consider aligning wording to the stdlib spelling.

**[NIT]** Error messages use "context cancelled" (double-l) while the standard library uses "context canceled". This is harmless but can make grepping for cancellations less consistent. Consider aligning wording to the stdlib spelling.
return fmt.Errorf("list contents %q: %w", dir, err)
}
Review

[MINOR] GetAllFilesInPath does not handle the case where path itself is a file rather than a directory. When ListContents returns a single-element slice containing the file itself (which is how Gitea behaves when the path is a file — see gitea.Client.ListContents which normalises the single-object response to a slice), the walk function will call GetFileContent for that entry and store it with entry.Path as the key, which works correctly. However if the API returns a 404 for the path on ListContents (e.g., a bare filename in a flat repo), the function will immediately return a 'list contents' error rather than trying to fetch the file directly, unlike gitea.Client.GetAllFilesInPath. This difference in behaviour is documented in the doc comment ('fail-fast'), but callers that previously relied on the gitea fallback (single-file fetch on 404) will silently get an error instead. Worth verifying that all call-sites only pass directory paths.

**[MINOR]** GetAllFilesInPath does not handle the case where `path` itself is a file rather than a directory. When `ListContents` returns a single-element slice containing the file itself (which is how Gitea behaves when the path is a file — see gitea.Client.ListContents which normalises the single-object response to a slice), the walk function will call GetFileContent for that entry and store it with entry.Path as the key, which works correctly. However if the API returns a 404 for the path on ListContents (e.g., a bare filename in a flat repo), the function will immediately return a 'list contents' error rather than trying to fetch the file directly, unlike gitea.Client.GetAllFilesInPath. This difference in behaviour is documented in the doc comment ('fail-fast'), but callers that previously relied on the gitea fallback (single-file fetch on 404) will silently get an error instead. Worth verifying that all call-sites only pass directory paths.
for _, entry := range entries {
if err := ctx.Err(); err != nil {
return fmt.Errorf("context cancelled during traversal: %w", err)
}
results[entry.Path] = content
case "dir":
subResults, err := GetAllFilesInPath(ctx, client, owner, repo, entry.Path)
if err != nil {
return nil, fmt.Errorf("recurse into %q: %w", entry.Path, err)
}
for k, v := range subResults {
results[k] = v
switch entry.Type {
case "file":
if len(results) >= maxFilesInPath {
return fmt.Errorf("exceeded max file count (%d) in path %q", maxFilesInPath, path)
}
content, err := client.GetFileContent(ctx, owner, repo, entry.Path, "")
if err != nil {
return fmt.Errorf("get file %q: %w", entry.Path, err)
}
totalBytes += len(content)
if totalBytes > maxTotalBytesInPath {
return fmt.Errorf("exceeded max total bytes (%d) in path %q", maxTotalBytesInPath, path)
Review

[NIT] The walk closure captures path (the original root path) by reference for use in error messages about resource limits. This is correct and intentional, but it's subtle — path is used only in the limit-exceeded error messages, not in the traversal logic. A comment noting this would improve readability.

**[NIT]** The walk closure captures `path` (the original root path) by reference for use in error messages about resource limits. This is correct and intentional, but it's subtle — path is used only in the limit-exceeded error messages, not in the traversal logic. A comment noting this would improve readability.
}
results[entry.Path] = content
case "dir":
if err := walk(entry.Path); err != nil {
return err
}
}
}
return nil
}
if err := walk(path); err != nil {
return nil, err
}
return results, nil
}
3
@@ -92,6 +135,12 @@ func BuildLineToPositionMap(diff string) map[string]map[int]int {
continue
}
// Skip "\ No newline at end of file" markers — these are git diff
// metadata and not part of the file content.
if strings.HasPrefix(line, `\`) {
continue
}
// Process diff content lines
if strings.HasPrefix(line, "+") {
position++
@@ -101,7 +150,10 @@ func BuildLineToPositionMap(diff string) map[string]map[int]int {
position++
// Deletion lines don't map to new line numbers
} else if strings.HasPrefix(line, " ") {
// Context line (space-prefixed)
// Context line (space-prefixed).
// Only map if position > 0, which means we've seen a hunk header.
// Lines before the first hunk header (position == 0) are not part
// of any diff hunk and should be skipped.
if position > 0 {
position++
result[currentFile][newLine] = position
@@ -123,23 +175,19 @@ func parseHunkNewStart(hunkLine string) int {
}
rest := hunkLine[plusIdx+1:]
// Read digits until comma or space
var numStr string
for _, ch := range rest {
if ch >= '0' && ch <= '9' {
numStr += string(ch)
} else {
break
}
// Find the end of the number (first non-digit after +)
endIdx := 0
for endIdx < len(rest) && rest[endIdx] >= '0' && rest[endIdx] <= '9' {
endIdx++
}
if numStr == "" {
if endIdx == 0 {
return 1
}
n := 0
for _, ch := range numStr {
n = n*10 + int(ch-'0')
n, err := strconv.Atoi(rest[:endIdx])
if err != nil {
return 1
}
return n
}
+81
View File
1
@@ -2,6 +2,7 @@ package vcs_test
import (
"context"
"strings"
"fmt"
"testing"
@@ -248,3 +249,83 @@ func TestBuildLineToPositionMap(t *testing.T) {
}
})
}
func TestGetAllFilesInPath_ErrorPropagation(t *testing.T) {
ctx := context.Background()
t.Run("ListContents error propagates", func(t *testing.T) {
client := &mockFileReader{
contents: map[string][]vcs.ContentEntry{
// "src" not in map, so ListContents will fail
},
}
_, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src")
if err == nil {
t.Fatal("expected error, got nil")
}
if !strings.Contains(err.Error(), "list contents") {
t.Errorf("expected error about list contents, got: %v", err)
}
})
t.Run("GetFileContent error propagates", func(t *testing.T) {
client := &mockFileReader{
contents: map[string][]vcs.ContentEntry{
"src": {
{Name: "main.go", Path: "src/main.go", Type: "file"},
},
},
files: map[string]string{
// "src/main.go" not in files map, so GetFileContent will fail
},
}
_, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src")
if err == nil {
t.Fatal("expected error, got nil")
}
if !strings.Contains(err.Error(), "get file") {
t.Errorf("expected error about get file, got: %v", err)
}
})
t.Run("nested ListContents error propagates", func(t *testing.T) {
client := &mockFileReader{
contents: map[string][]vcs.ContentEntry{
"src": {
{Name: "pkg", Path: "src/pkg", Type: "dir"},
},
// "src/pkg" not in map, so recursive ListContents will fail
},
}
_, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src")
if err == nil {
t.Fatal("expected error, got nil")
}
if !strings.Contains(err.Error(), "list contents") {
t.Errorf("expected error about list contents, got: %v", err)
}
})
t.Run("cancelled context propagates", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
cancel() // Cancel immediately
client := &mockFileReader{
contents: map[string][]vcs.ContentEntry{
"src": {
{Name: "main.go", Path: "src/main.go", Type: "file"},
},
},
files: map[string]string{
"src/main.go": "package main",
},
}
_, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src")
if err == nil {
t.Fatal("expected error from cancelled context, got nil")
}
if !strings.Contains(err.Error(), "context cancelled") {
t.Errorf("expected context cancellation error, got: %v", err)
}
})
}