@@ -835,7 +835,7 @@ func (c *Client) ResolveComment(ctx context.Context, owner, repo string, comment
|
|||||||
// DismissReview dismisses a review on a pull request.
|
// DismissReview dismisses a review on a pull request.
|
||||||
// This is a stub for the vcs.Reviewer interface; full implementation is Phase 2.
|
// This is a stub for the vcs.Reviewer interface; full implementation is Phase 2.
|
||||||
func (c *Client) DismissReview(ctx context.Context, owner, repo string, number int, reviewID int64, message string) error {
|
func (c *Client) DismissReview(ctx context.Context, owner, repo string, number int, reviewID int64, message string) error {
|
||||||
return fmt.Errorf("DismissReview: not implemented")
|
return fmt.Errorf("DismissReview: %w", errors.ErrUnsupported)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|||||||
// GetFileContentAtRef fetches a file at a specific ref from a repo.
|
// GetFileContentAtRef fetches a file at a specific ref from a repo.
|
||||||
|
|||||||
@@ -3,39 +3,82 @@ package vcs
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// maxFilesInPath is the maximum number of files GetAllFilesInPath will fetch.
|
||||||
|
// Prevents unbounded resource consumption on very large directory trees.
|
||||||
|
maxFilesInPath = 10000
|
||||||
|
|
||||||
|
// maxTotalBytesInPath is the maximum total bytes GetAllFilesInPath will accumulate.
|
||||||
|
// Prevents memory exhaustion when fetching large repositories.
|
||||||
|
maxTotalBytesInPath = 100 * 1024 * 1024 // 100 MB
|
||||||
|
)
|
||||||
|
|
||||||
// GetAllFilesInPath recursively fetches all file contents under a path using the
|
// GetAllFilesInPath recursively fetches all file contents under a path using the
|
||||||
// provided FileReader. Returns a map of filepath -> content for all files found.
|
// provided FileReader. Returns a map of filepath -> content for all files found.
|
||||||
// If the path points to an empty directory, returns an empty map.
|
// If the path points to an empty directory, returns an empty map.
|
||||||
|
//
|
||||||
|
// This function uses fail-fast error handling: any error from ListContents or
|
||||||
|
// GetFileContent aborts the entire traversal and returns the error immediately.
|
||||||
|
// This differs from gitea.Client.GetAllFilesInPath, which logs errors and continues.
|
||||||
|
// The fail-fast contract ensures callers can trust that a nil error means all files
|
||||||
|
// were successfully fetched.
|
||||||
|
//
|
||||||
|
// Resource limits: the traversal is bounded by maxFilesInPath (file count) and
|
||||||
|
// maxTotalBytesInPath (total accumulated bytes). The context is checked before each
|
||||||
|
// recursive call and file fetch to respect cancellation.
|
||||||
func GetAllFilesInPath(ctx context.Context, client FileReader, owner, repo, path string) (map[string]string, error) {
|
func GetAllFilesInPath(ctx context.Context, client FileReader, owner, repo, path string) (map[string]string, error) {
|
||||||
results := make(map[string]string)
|
results := make(map[string]string)
|
||||||
|
gpt-review-bot
commented
[MINOR] GetAllFilesInPath assumes ListContents works for both directories and files. If a FileReader implementation cannot list a file path (only directories), this will return an error rather than fetching the single file content. Consider documenting this requirement or adding a fallback to GetFileContent when ListContents indicates the path is a file. **[MINOR]** GetAllFilesInPath assumes ListContents works for both directories and files. If a FileReader implementation cannot list a file path (only directories), this will return an error rather than fetching the single file content. Consider documenting this requirement or adding a fallback to GetFileContent when ListContents indicates the path is a file.
|
|||||||
|
totalBytes := 0
|
||||||
|
|
||||||
entries, err := client.ListContents(ctx, owner, repo, path)
|
var walk func(string) error
|
||||||
if err != nil {
|
walk = func(dir string) error {
|
||||||
return nil, fmt.Errorf("list contents %q: %w", path, err)
|
if err := ctx.Err(); err != nil {
|
||||||
}
|
return fmt.Errorf("context cancelled during traversal: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
for _, entry := range entries {
|
entries, err := client.ListContents(ctx, owner, repo, dir)
|
||||||
|
sonnet-review-bot
commented
[MINOR] There is a functional duplication between **[MINOR]** There is a functional duplication between `vcs.GetAllFilesInPath` and `gitea.Client.GetAllFilesInPath`. The latter already has identical logic (though with warn-and-continue error handling instead of fail-fast). The doc comment on the vcs version explicitly calls out the difference, which is good, but over time both implementations will need to be kept in sync. Consider whether `gitea.Client.GetAllFilesInPath` should be refactored to delegate to `vcs.GetAllFilesInPath` via the `vcs.FileReader` interface (which `gitea.Client` will satisfy in Phase 2) to eliminate the duplication entirely.
|
|||||||
switch entry.Type {
|
if err != nil {
|
||||||
|
gpt-review-bot
commented
[NIT] Error messages use "context cancelled" (double-l) while the standard library uses "context canceled". This is harmless but can make grepping for cancellations less consistent. Consider aligning wording to the stdlib spelling. **[NIT]** Error messages use "context cancelled" (double-l) while the standard library uses "context canceled". This is harmless but can make grepping for cancellations less consistent. Consider aligning wording to the stdlib spelling.
|
|||||||
case "file":
|
return fmt.Errorf("list contents %q: %w", dir, err)
|
||||||
content, err := client.GetFileContent(ctx, owner, repo, entry.Path, "")
|
}
|
||||||
|
sonnet-review-bot
commented
[MINOR] GetAllFilesInPath does not handle the case where **[MINOR]** GetAllFilesInPath does not handle the case where `path` itself is a file rather than a directory. When `ListContents` returns a single-element slice containing the file itself (which is how Gitea behaves when the path is a file — see gitea.Client.ListContents which normalises the single-object response to a slice), the walk function will call GetFileContent for that entry and store it with entry.Path as the key, which works correctly. However if the API returns a 404 for the path on ListContents (e.g., a bare filename in a flat repo), the function will immediately return a 'list contents' error rather than trying to fetch the file directly, unlike gitea.Client.GetAllFilesInPath. This difference in behaviour is documented in the doc comment ('fail-fast'), but callers that previously relied on the gitea fallback (single-file fetch on 404) will silently get an error instead. Worth verifying that all call-sites only pass directory paths.
|
|||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("get file %q: %w", entry.Path, err)
|
for _, entry := range entries {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return fmt.Errorf("context cancelled during traversal: %w", err)
|
||||||
}
|
}
|
||||||
results[entry.Path] = content
|
|
||||||
case "dir":
|
switch entry.Type {
|
||||||
subResults, err := GetAllFilesInPath(ctx, client, owner, repo, entry.Path)
|
case "file":
|
||||||
if err != nil {
|
if len(results) >= maxFilesInPath {
|
||||||
return nil, fmt.Errorf("recurse into %q: %w", entry.Path, err)
|
return fmt.Errorf("exceeded max file count (%d) in path %q", maxFilesInPath, path)
|
||||||
}
|
}
|
||||||
for k, v := range subResults {
|
|
||||||
results[k] = v
|
content, err := client.GetFileContent(ctx, owner, repo, entry.Path, "")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("get file %q: %w", entry.Path, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
totalBytes += len(content)
|
||||||
|
if totalBytes > maxTotalBytesInPath {
|
||||||
|
return fmt.Errorf("exceeded max total bytes (%d) in path %q", maxTotalBytesInPath, path)
|
||||||
|
sonnet-review-bot
commented
[NIT] The walk closure captures **[NIT]** The walk closure captures `path` (the original root path) by reference for use in error messages about resource limits. This is correct and intentional, but it's subtle — path is used only in the limit-exceeded error messages, not in the traversal logic. A comment noting this would improve readability.
|
|||||||
|
}
|
||||||
|
|
||||||
|
results[entry.Path] = content
|
||||||
|
case "dir":
|
||||||
|
if err := walk(entry.Path); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := walk(path); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
return results, nil
|
return results, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -92,6 +135,12 @@ func BuildLineToPositionMap(diff string) map[string]map[int]int {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Skip "\ No newline at end of file" markers — these are git diff
|
||||||
|
// metadata and not part of the file content.
|
||||||
|
if strings.HasPrefix(line, `\`) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// Process diff content lines
|
// Process diff content lines
|
||||||
if strings.HasPrefix(line, "+") {
|
if strings.HasPrefix(line, "+") {
|
||||||
position++
|
position++
|
||||||
@@ -101,7 +150,10 @@ func BuildLineToPositionMap(diff string) map[string]map[int]int {
|
|||||||
position++
|
position++
|
||||||
// Deletion lines don't map to new line numbers
|
// Deletion lines don't map to new line numbers
|
||||||
} else if strings.HasPrefix(line, " ") {
|
} else if strings.HasPrefix(line, " ") {
|
||||||
// Context line (space-prefixed)
|
// Context line (space-prefixed).
|
||||||
|
// Only map if position > 0, which means we've seen a hunk header.
|
||||||
|
// Lines before the first hunk header (position == 0) are not part
|
||||||
|
// of any diff hunk and should be skipped.
|
||||||
if position > 0 {
|
if position > 0 {
|
||||||
position++
|
position++
|
||||||
result[currentFile][newLine] = position
|
result[currentFile][newLine] = position
|
||||||
@@ -123,23 +175,19 @@ func parseHunkNewStart(hunkLine string) int {
|
|||||||
}
|
}
|
||||||
rest := hunkLine[plusIdx+1:]
|
rest := hunkLine[plusIdx+1:]
|
||||||
|
|
||||||
// Read digits until comma or space
|
// Find the end of the number (first non-digit after +)
|
||||||
var numStr string
|
endIdx := 0
|
||||||
for _, ch := range rest {
|
for endIdx < len(rest) && rest[endIdx] >= '0' && rest[endIdx] <= '9' {
|
||||||
if ch >= '0' && ch <= '9' {
|
endIdx++
|
||||||
numStr += string(ch)
|
|
||||||
} else {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if numStr == "" {
|
if endIdx == 0 {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
n := 0
|
n, err := strconv.Atoi(rest[:endIdx])
|
||||||
for _, ch := range numStr {
|
if err != nil {
|
||||||
n = n*10 + int(ch-'0')
|
return 1
|
||||||
}
|
}
|
||||||
return n
|
return n
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package vcs_test
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"strings"
|
||||||
"fmt"
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
@@ -248,3 +249,83 @@ func TestBuildLineToPositionMap(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetAllFilesInPath_ErrorPropagation(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
t.Run("ListContents error propagates", func(t *testing.T) {
|
||||||
|
client := &mockFileReader{
|
||||||
|
contents: map[string][]vcs.ContentEntry{
|
||||||
|
// "src" not in map, so ListContents will fail
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error, got nil")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "list contents") {
|
||||||
|
t.Errorf("expected error about list contents, got: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("GetFileContent error propagates", func(t *testing.T) {
|
||||||
|
client := &mockFileReader{
|
||||||
|
contents: map[string][]vcs.ContentEntry{
|
||||||
|
"src": {
|
||||||
|
{Name: "main.go", Path: "src/main.go", Type: "file"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
files: map[string]string{
|
||||||
|
// "src/main.go" not in files map, so GetFileContent will fail
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error, got nil")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "get file") {
|
||||||
|
t.Errorf("expected error about get file, got: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("nested ListContents error propagates", func(t *testing.T) {
|
||||||
|
client := &mockFileReader{
|
||||||
|
contents: map[string][]vcs.ContentEntry{
|
||||||
|
"src": {
|
||||||
|
{Name: "pkg", Path: "src/pkg", Type: "dir"},
|
||||||
|
},
|
||||||
|
// "src/pkg" not in map, so recursive ListContents will fail
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error, got nil")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "list contents") {
|
||||||
|
t.Errorf("expected error about list contents, got: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("cancelled context propagates", func(t *testing.T) {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel() // Cancel immediately
|
||||||
|
|
||||||
|
client := &mockFileReader{
|
||||||
|
contents: map[string][]vcs.ContentEntry{
|
||||||
|
"src": {
|
||||||
|
{Name: "main.go", Path: "src/main.go", Type: "file"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
files: map[string]string{
|
||||||
|
"src/main.go": "package main",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error from cancelled context, got nil")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "context cancelled") {
|
||||||
|
t.Errorf("expected context cancellation error, got: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
[NIT]
GetFileContentAtRefis a thin wrapper that does nothing but delegate toGetFileContentRefwith the same arguments in the same order. The docstring says "This delegates to GetFileContentRef for the Gitea implementation" — since the method exists only to satisfy thevcs.PRReaderinterface, a one-linerreturn c.GetFileContentRef(ctx, owner, repo, path, ref)with no further ceremony would be ideal, which is already the case. No change needed, but consider whetherGetFileContentRefshould simply be renamed toGetFileContentAtRefto avoid the two names existing in parallel on the same type.