@@ -835,7 +835,7 @@ func (c *Client) ResolveComment(ctx context.Context, owner, repo string, comment
|
||||
// DismissReview dismisses a review on a pull request.
|
||||
// This is a stub for the vcs.Reviewer interface; full implementation is Phase 2.
|
||||
func (c *Client) DismissReview(ctx context.Context, owner, repo string, number int, reviewID int64, message string) error {
|
||||
return fmt.Errorf("DismissReview: not implemented")
|
||||
return fmt.Errorf("DismissReview: %w", errors.ErrUnsupported)
|
||||
}
|
||||
|
||||
|
|
||||
// GetFileContentAtRef fetches a file at a specific ref from a repo.
|
||||
|
||||
@@ -3,39 +3,82 @@ package vcs
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
// maxFilesInPath is the maximum number of files GetAllFilesInPath will fetch.
|
||||
|
security-review-bot marked this conversation as resolved
Outdated
[MAJOR] GetAllFilesInPath performs unbounded recursive traversal and loads all file contents into memory without limits on depth, number of files, or total bytes. An attacker controlling repository contents could craft deeply nested directories or many/large files to exhaust CPU/memory, causing a denial of service. Although context is passed to client calls, the function does not check ctx for cancellation nor enforce resource bounds. **[MAJOR]** GetAllFilesInPath performs unbounded recursive traversal and loads all file contents into memory without limits on depth, number of files, or total bytes. An attacker controlling repository contents could craft deeply nested directories or many/large files to exhaust CPU/memory, causing a denial of service. Although context is passed to client calls, the function does not check ctx for cancellation nor enforce resource bounds.
|
||||
// Prevents unbounded resource consumption on very large directory trees.
|
||||
|
security-review-bot marked this conversation as resolved
Outdated
[MINOR] GetAllFilesInPath recursively fetches and accumulates contents for all files without bounding the number of files or aggregate size, which could be leveraged for resource exhaustion if used on large paths. Consider enforcing limits (max files, max bytes) or filtering before fetching content. **[MINOR]** GetAllFilesInPath recursively fetches and accumulates contents for all files without bounding the number of files or aggregate size, which could be leveraged for resource exhaustion if used on large paths. Consider enforcing limits (max files, max bytes) or filtering before fetching content.
|
||||
maxFilesInPath = 10000
|
||||
|
||||
// maxTotalBytesInPath is the maximum total bytes GetAllFilesInPath will accumulate.
|
||||
// Prevents memory exhaustion when fetching large repositories.
|
||||
maxTotalBytesInPath = 100 * 1024 * 1024 // 100 MB
|
||||
)
|
||||
|
||||
// GetAllFilesInPath recursively fetches all file contents under a path using the
|
||||
// provided FileReader. Returns a map of filepath -> content for all files found.
|
||||
// If the path points to an empty directory, returns an empty map.
|
||||
|
gpt-review-bot
commented
[NIT] The switch over entry.Type handles only "file" and "dir"; other types (e.g., symlink, submodule) are silently ignored. Consider adding a default case or a comment to make this intent explicit. **[NIT]** The switch over entry.Type handles only "file" and "dir"; other types (e.g., symlink, submodule) are silently ignored. Consider adding a default case or a comment to make this intent explicit.
|
||||
//
|
||||
// This function uses fail-fast error handling: any error from ListContents or
|
||||
|
gpt-review-bot
commented
[MINOR] GetAllFilesInPath aborts on the first file read error; depending on usage, you may want to continue collecting other files (logging and skipping failures), similar to the gitea.Client implementation, to be more resilient to partial failures. **[MINOR]** GetAllFilesInPath aborts on the first file read error; depending on usage, you may want to continue collecting other files (logging and skipping failures), similar to the gitea.Client implementation, to be more resilient to partial failures.
|
||||
// GetFileContent aborts the entire traversal and returns the error immediately.
|
||||
// This differs from gitea.Client.GetAllFilesInPath, which logs errors and continues.
|
||||
// The fail-fast contract ensures callers can trust that a nil error means all files
|
||||
|
sonnet-review-bot
commented
[MINOR] GetAllFilesInPath propagates errors from GetFileContent and recursive calls (returns nil, err) but the gitea.Client.GetAllFilesInPath in gitea/client.go logs warnings and continues instead of failing. The two implementations now have divergent error-handling semantics for the same conceptual operation. The vcs package version is stricter (fail-fast), which is the right design for a utility, but callers should be aware that a single unreachable file aborts the entire traversal. Consider documenting this difference or adding a comment explaining the fail-fast contract. **[MINOR]** GetAllFilesInPath propagates errors from GetFileContent and recursive calls (returns nil, err) but the gitea.Client.GetAllFilesInPath in gitea/client.go logs warnings and continues instead of failing. The two implementations now have divergent error-handling semantics for the same conceptual operation. The vcs package version is stricter (fail-fast), which is the right design for a utility, but callers should be aware that a single unreachable file aborts the entire traversal. Consider documenting this difference or adding a comment explaining the fail-fast contract.
|
||||
// were successfully fetched.
|
||||
//
|
||||
// Resource limits: the traversal is bounded by maxFilesInPath (file count) and
|
||||
// maxTotalBytesInPath (total accumulated bytes). The context is checked before each
|
||||
// recursive call and file fetch to respect cancellation.
|
||||
func GetAllFilesInPath(ctx context.Context, client FileReader, owner, repo, path string) (map[string]string, error) {
|
||||
results := make(map[string]string)
|
||||
|
gpt-review-bot
commented
[MINOR] GetAllFilesInPath assumes ListContents works for both directories and files. If a FileReader implementation cannot list a file path (only directories), this will return an error rather than fetching the single file content. Consider documenting this requirement or adding a fallback to GetFileContent when ListContents indicates the path is a file. **[MINOR]** GetAllFilesInPath assumes ListContents works for both directories and files. If a FileReader implementation cannot list a file path (only directories), this will return an error rather than fetching the single file content. Consider documenting this requirement or adding a fallback to GetFileContent when ListContents indicates the path is a file.
|
||||
totalBytes := 0
|
||||
|
||||
entries, err := client.ListContents(ctx, owner, repo, path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list contents %q: %w", path, err)
|
||||
}
|
||||
var walk func(string) error
|
||||
walk = func(dir string) error {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return fmt.Errorf("context cancelled during traversal: %w", err)
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
switch entry.Type {
|
||||
case "file":
|
||||
content, err := client.GetFileContent(ctx, owner, repo, entry.Path, "")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get file %q: %w", entry.Path, err)
|
||||
entries, err := client.ListContents(ctx, owner, repo, dir)
|
||||
|
sonnet-review-bot
commented
[MINOR] There is a functional duplication between **[MINOR]** There is a functional duplication between `vcs.GetAllFilesInPath` and `gitea.Client.GetAllFilesInPath`. The latter already has identical logic (though with warn-and-continue error handling instead of fail-fast). The doc comment on the vcs version explicitly calls out the difference, which is good, but over time both implementations will need to be kept in sync. Consider whether `gitea.Client.GetAllFilesInPath` should be refactored to delegate to `vcs.GetAllFilesInPath` via the `vcs.FileReader` interface (which `gitea.Client` will satisfy in Phase 2) to eliminate the duplication entirely.
|
||||
if err != nil {
|
||||
|
gpt-review-bot
commented
[NIT] Error messages use "context cancelled" (double-l) while the standard library uses "context canceled". This is harmless but can make grepping for cancellations less consistent. Consider aligning wording to the stdlib spelling. **[NIT]** Error messages use "context cancelled" (double-l) while the standard library uses "context canceled". This is harmless but can make grepping for cancellations less consistent. Consider aligning wording to the stdlib spelling.
|
||||
return fmt.Errorf("list contents %q: %w", dir, err)
|
||||
}
|
||||
|
sonnet-review-bot
commented
[MINOR] GetAllFilesInPath does not handle the case where **[MINOR]** GetAllFilesInPath does not handle the case where `path` itself is a file rather than a directory. When `ListContents` returns a single-element slice containing the file itself (which is how Gitea behaves when the path is a file — see gitea.Client.ListContents which normalises the single-object response to a slice), the walk function will call GetFileContent for that entry and store it with entry.Path as the key, which works correctly. However if the API returns a 404 for the path on ListContents (e.g., a bare filename in a flat repo), the function will immediately return a 'list contents' error rather than trying to fetch the file directly, unlike gitea.Client.GetAllFilesInPath. This difference in behaviour is documented in the doc comment ('fail-fast'), but callers that previously relied on the gitea fallback (single-file fetch on 404) will silently get an error instead. Worth verifying that all call-sites only pass directory paths.
|
||||
|
||||
for _, entry := range entries {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return fmt.Errorf("context cancelled during traversal: %w", err)
|
||||
|
security-review-bot marked this conversation as resolved
Outdated
[MINOR] BuildLineToPositionMap splits the entire diff into lines and iterates without any size checks. Very large diffs could cause high memory/cpu usage. Consider validating input size or imposing time/size limits when processing untrusted diffs. **[MINOR]** BuildLineToPositionMap splits the entire diff into lines and iterates without any size checks. Very large diffs could cause high memory/cpu usage. Consider validating input size or imposing time/size limits when processing untrusted diffs.
|
||||
}
|
||||
results[entry.Path] = content
|
||||
case "dir":
|
||||
subResults, err := GetAllFilesInPath(ctx, client, owner, repo, entry.Path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("recurse into %q: %w", entry.Path, err)
|
||||
}
|
||||
for k, v := range subResults {
|
||||
results[k] = v
|
||||
|
||||
switch entry.Type {
|
||||
case "file":
|
||||
if len(results) >= maxFilesInPath {
|
||||
return fmt.Errorf("exceeded max file count (%d) in path %q", maxFilesInPath, path)
|
||||
}
|
||||
|
sonnet-review-bot
commented
[NIT] The doc comment uses **[NIT]** The doc comment uses `→` (Unicode arrow) in the Returns line. This is fine for readability but slightly inconsistent with stdlib conventions which typically use plain ASCII (`->`) in godoc. Not a real issue.
|
||||
|
||||
content, err := client.GetFileContent(ctx, owner, repo, entry.Path, "")
|
||||
if err != nil {
|
||||
return fmt.Errorf("get file %q: %w", entry.Path, err)
|
||||
}
|
||||
|
||||
totalBytes += len(content)
|
||||
if totalBytes > maxTotalBytesInPath {
|
||||
return fmt.Errorf("exceeded max total bytes (%d) in path %q", maxTotalBytesInPath, path)
|
||||
|
sonnet-review-bot
commented
[NIT] The walk closure captures **[NIT]** The walk closure captures `path` (the original root path) by reference for use in error messages about resource limits. This is correct and intentional, but it's subtle — path is used only in the limit-exceeded error messages, not in the traversal logic. A comment noting this would improve readability.
|
||||
}
|
||||
|
||||
results[entry.Path] = content
|
||||
|
security-review-bot marked this conversation as resolved
Outdated
[MINOR] BuildLineToPositionMap processes the entire diff string without any size checks. If diff input is attacker-controlled and very large, it could still lead to high memory/CPU usage. While operations are linear, adding input size limits or early termination would harden against DoS. **[MINOR]** BuildLineToPositionMap processes the entire diff string without any size checks. If diff input is attacker-controlled and very large, it could still lead to high memory/CPU usage. While operations are linear, adding input size limits or early termination would harden against DoS.
|
||||
case "dir":
|
||||
if err := walk(entry.Path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
gpt-review-bot
commented
[NIT] BuildLineToPositionMap implicitly ignores '\ No newline at end of file' markers. Consider explicitly checking and skipping such lines for clarity and future maintainability. **[NIT]** BuildLineToPositionMap implicitly ignores '\ No newline at end of file' markers. Consider explicitly checking and skipping such lines for clarity and future maintainability.
|
||||
}
|
||||
|
||||
if err := walk(path); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
gpt-review-bot
commented
[MINOR] The metadata skip includes any line starting with a backslash ("\"). While intended for the "\ No newline at end of file" marker, being explicit (checking for that exact string) would avoid inadvertently skipping any other backslash-prefixed lines in future formats. **[MINOR]** The metadata skip includes any line starting with a backslash ("\\"). While intended for the "\ No newline at end of file" marker, being explicit (checking for that exact string) would avoid inadvertently skipping any other backslash-prefixed lines in future formats.
|
||||
return results, nil
|
||||
}
|
||||
|
||||
@@ -92,6 +135,12 @@ func BuildLineToPositionMap(diff string) map[string]map[int]int {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip "\ No newline at end of file" markers — these are git diff
|
||||
// metadata and not part of the file content.
|
||||
if strings.HasPrefix(line, `\`) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Process diff content lines
|
||||
if strings.HasPrefix(line, "+") {
|
||||
position++
|
||||
@@ -101,7 +150,10 @@ func BuildLineToPositionMap(diff string) map[string]map[int]int {
|
||||
position++
|
||||
// Deletion lines don't map to new line numbers
|
||||
} else if strings.HasPrefix(line, " ") {
|
||||
// Context line (space-prefixed)
|
||||
// Context line (space-prefixed).
|
||||
// Only map if position > 0, which means we've seen a hunk header.
|
||||
// Lines before the first hunk header (position == 0) are not part
|
||||
// of any diff hunk and should be skipped.
|
||||
if position > 0 {
|
||||
position++
|
||||
result[currentFile][newLine] = position
|
||||
@@ -123,23 +175,19 @@ func parseHunkNewStart(hunkLine string) int {
|
||||
}
|
||||
rest := hunkLine[plusIdx+1:]
|
||||
|
||||
// Read digits until comma or space
|
||||
var numStr string
|
||||
for _, ch := range rest {
|
||||
if ch >= '0' && ch <= '9' {
|
||||
numStr += string(ch)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
// Find the end of the number (first non-digit after +)
|
||||
endIdx := 0
|
||||
for endIdx < len(rest) && rest[endIdx] >= '0' && rest[endIdx] <= '9' {
|
||||
endIdx++
|
||||
}
|
||||
|
||||
if numStr == "" {
|
||||
if endIdx == 0 {
|
||||
return 1
|
||||
}
|
||||
|
||||
n := 0
|
||||
for _, ch := range numStr {
|
||||
n = n*10 + int(ch-'0')
|
||||
n, err := strconv.Atoi(rest[:endIdx])
|
||||
if err != nil {
|
||||
return 1
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package vcs_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
|
sonnet-review-bot
commented
[MINOR] Import ordering: **[MINOR]** Import ordering: `fmt` is placed between `strings` and `testing` rather than in the stdlib block before them. goimports would reorder these. The conventions doc requires `go vet ./...` to pass but goimports/gofmt ordering should also be maintained. Not a compilation error but violates the canonical import grouping pattern documented in style.md.
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
@@ -248,3 +249,83 @@ func TestBuildLineToPositionMap(t *testing.T) {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetAllFilesInPath_ErrorPropagation(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
t.Run("ListContents error propagates", func(t *testing.T) {
|
||||
client := &mockFileReader{
|
||||
contents: map[string][]vcs.ContentEntry{
|
||||
// "src" not in map, so ListContents will fail
|
||||
},
|
||||
}
|
||||
_, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src")
|
||||
if err == nil {
|
||||
t.Fatal("expected error, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "list contents") {
|
||||
t.Errorf("expected error about list contents, got: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("GetFileContent error propagates", func(t *testing.T) {
|
||||
client := &mockFileReader{
|
||||
contents: map[string][]vcs.ContentEntry{
|
||||
"src": {
|
||||
{Name: "main.go", Path: "src/main.go", Type: "file"},
|
||||
},
|
||||
},
|
||||
files: map[string]string{
|
||||
// "src/main.go" not in files map, so GetFileContent will fail
|
||||
},
|
||||
}
|
||||
_, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src")
|
||||
if err == nil {
|
||||
t.Fatal("expected error, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "get file") {
|
||||
t.Errorf("expected error about get file, got: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("nested ListContents error propagates", func(t *testing.T) {
|
||||
client := &mockFileReader{
|
||||
contents: map[string][]vcs.ContentEntry{
|
||||
"src": {
|
||||
{Name: "pkg", Path: "src/pkg", Type: "dir"},
|
||||
},
|
||||
// "src/pkg" not in map, so recursive ListContents will fail
|
||||
},
|
||||
}
|
||||
_, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src")
|
||||
if err == nil {
|
||||
t.Fatal("expected error, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "list contents") {
|
||||
t.Errorf("expected error about list contents, got: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("cancelled context propagates", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // Cancel immediately
|
||||
|
||||
client := &mockFileReader{
|
||||
contents: map[string][]vcs.ContentEntry{
|
||||
"src": {
|
||||
{Name: "main.go", Path: "src/main.go", Type: "file"},
|
||||
},
|
||||
},
|
||||
files: map[string]string{
|
||||
"src/main.go": "package main",
|
||||
},
|
||||
}
|
||||
_, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src")
|
||||
if err == nil {
|
||||
t.Fatal("expected error from cancelled context, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "context cancelled") {
|
||||
t.Errorf("expected context cancellation error, got: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
[NIT]
GetFileContentAtRefis a thin wrapper that does nothing but delegate toGetFileContentRefwith the same arguments in the same order. The docstring says "This delegates to GetFileContentRef for the Gitea implementation" — since the method exists only to satisfy thevcs.PRReaderinterface, a one-linerreturn c.GetFileContentRef(ctx, owner, repo, path, ref)with no further ceremony would be ideal, which is already the case. No change needed, but consider whetherGetFileContentRefshould simply be renamed toGetFileContentAtRefto avoid the two names existing in parallel on the same type.