From b5a5533070b8accf20b5cf88d29eb5e62d2905c1 Mon Sep 17 00:00:00 2001 From: claw Date: Tue, 12 May 2026 12:56:13 -0700 Subject: [PATCH] fix(vcs): address review findings on PR #88 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Findings addressed: - F1/G1: Add doc comment to GetAllFilesInPath documenting fail-fast contract - F2/G2: Add explicit backslash-prefix guard to skip '\ No newline' markers - F3: Add comment explaining position > 0 guard (skip lines before first hunk) - F4: Refactor parseHunkNewStart to use strconv.Atoi instead of per-char concat - F5: Add error propagation tests (ListContents, GetFileContent, nested, ctx cancel) - F6: Wrap errors.ErrUnsupported in DismissReview for programmatic checking - S1: Add ctx.Err() checks + max file count/byte constants with clear errors - S2: Addressed by S1 — input bounds are now enforced via the same constants --- gitea/client.go | 2 +- vcs/util.go | 110 ++++++++++++++++++++++++++++++++++------------- vcs/util_test.go | 81 ++++++++++++++++++++++++++++++++++ 3 files changed, 161 insertions(+), 32 deletions(-) diff --git a/gitea/client.go b/gitea/client.go index 74fce56..ab963a6 100644 --- a/gitea/client.go +++ b/gitea/client.go @@ -835,7 +835,7 @@ func (c *Client) ResolveComment(ctx context.Context, owner, repo string, comment // DismissReview dismisses a review on a pull request. // This is a stub for the vcs.Reviewer interface; full implementation is Phase 2. func (c *Client) DismissReview(ctx context.Context, owner, repo string, number int, reviewID int64, message string) error { - return fmt.Errorf("DismissReview: not implemented") + return fmt.Errorf("DismissReview: %w", errors.ErrUnsupported) } // GetFileContentAtRef fetches a file at a specific ref from a repo. diff --git a/vcs/util.go b/vcs/util.go index 9f133fd..f1266c8 100644 --- a/vcs/util.go +++ b/vcs/util.go @@ -3,39 +3,82 @@ package vcs import ( "context" "fmt" + "strconv" "strings" ) +const ( + // maxFilesInPath is the maximum number of files GetAllFilesInPath will fetch. + // Prevents unbounded resource consumption on very large directory trees. + maxFilesInPath = 10000 + + // maxTotalBytesInPath is the maximum total bytes GetAllFilesInPath will accumulate. + // Prevents memory exhaustion when fetching large repositories. + maxTotalBytesInPath = 100 * 1024 * 1024 // 100 MB +) + // GetAllFilesInPath recursively fetches all file contents under a path using the // provided FileReader. Returns a map of filepath -> content for all files found. // If the path points to an empty directory, returns an empty map. +// +// This function uses fail-fast error handling: any error from ListContents or +// GetFileContent aborts the entire traversal and returns the error immediately. +// This differs from gitea.Client.GetAllFilesInPath, which logs errors and continues. +// The fail-fast contract ensures callers can trust that a nil error means all files +// were successfully fetched. +// +// Resource limits: the traversal is bounded by maxFilesInPath (file count) and +// maxTotalBytesInPath (total accumulated bytes). The context is checked before each +// recursive call and file fetch to respect cancellation. func GetAllFilesInPath(ctx context.Context, client FileReader, owner, repo, path string) (map[string]string, error) { results := make(map[string]string) + totalBytes := 0 - entries, err := client.ListContents(ctx, owner, repo, path) - if err != nil { - return nil, fmt.Errorf("list contents %q: %w", path, err) - } + var walk func(string) error + walk = func(dir string) error { + if err := ctx.Err(); err != nil { + return fmt.Errorf("context cancelled during traversal: %w", err) + } - for _, entry := range entries { - switch entry.Type { - case "file": - content, err := client.GetFileContent(ctx, owner, repo, entry.Path, "") - if err != nil { - return nil, fmt.Errorf("get file %q: %w", entry.Path, err) + entries, err := client.ListContents(ctx, owner, repo, dir) + if err != nil { + return fmt.Errorf("list contents %q: %w", dir, err) + } + + for _, entry := range entries { + if err := ctx.Err(); err != nil { + return fmt.Errorf("context cancelled during traversal: %w", err) } - results[entry.Path] = content - case "dir": - subResults, err := GetAllFilesInPath(ctx, client, owner, repo, entry.Path) - if err != nil { - return nil, fmt.Errorf("recurse into %q: %w", entry.Path, err) - } - for k, v := range subResults { - results[k] = v + + switch entry.Type { + case "file": + if len(results) >= maxFilesInPath { + return fmt.Errorf("exceeded max file count (%d) in path %q", maxFilesInPath, path) + } + + content, err := client.GetFileContent(ctx, owner, repo, entry.Path, "") + if err != nil { + return fmt.Errorf("get file %q: %w", entry.Path, err) + } + + totalBytes += len(content) + if totalBytes > maxTotalBytesInPath { + return fmt.Errorf("exceeded max total bytes (%d) in path %q", maxTotalBytesInPath, path) + } + + results[entry.Path] = content + case "dir": + if err := walk(entry.Path); err != nil { + return err + } } } + return nil } + if err := walk(path); err != nil { + return nil, err + } return results, nil } @@ -92,6 +135,12 @@ func BuildLineToPositionMap(diff string) map[string]map[int]int { continue } + // Skip "\ No newline at end of file" markers — these are git diff + // metadata and not part of the file content. + if strings.HasPrefix(line, `\`) { + continue + } + // Process diff content lines if strings.HasPrefix(line, "+") { position++ @@ -101,7 +150,10 @@ func BuildLineToPositionMap(diff string) map[string]map[int]int { position++ // Deletion lines don't map to new line numbers } else if strings.HasPrefix(line, " ") { - // Context line (space-prefixed) + // Context line (space-prefixed). + // Only map if position > 0, which means we've seen a hunk header. + // Lines before the first hunk header (position == 0) are not part + // of any diff hunk and should be skipped. if position > 0 { position++ result[currentFile][newLine] = position @@ -123,23 +175,19 @@ func parseHunkNewStart(hunkLine string) int { } rest := hunkLine[plusIdx+1:] - // Read digits until comma or space - var numStr string - for _, ch := range rest { - if ch >= '0' && ch <= '9' { - numStr += string(ch) - } else { - break - } + // Find the end of the number (first non-digit after +) + endIdx := 0 + for endIdx < len(rest) && rest[endIdx] >= '0' && rest[endIdx] <= '9' { + endIdx++ } - if numStr == "" { + if endIdx == 0 { return 1 } - n := 0 - for _, ch := range numStr { - n = n*10 + int(ch-'0') + n, err := strconv.Atoi(rest[:endIdx]) + if err != nil { + return 1 } return n } diff --git a/vcs/util_test.go b/vcs/util_test.go index acdac7f..8436884 100644 --- a/vcs/util_test.go +++ b/vcs/util_test.go @@ -2,6 +2,7 @@ package vcs_test import ( "context" + "strings" "fmt" "testing" @@ -248,3 +249,83 @@ func TestBuildLineToPositionMap(t *testing.T) { } }) } + +func TestGetAllFilesInPath_ErrorPropagation(t *testing.T) { + ctx := context.Background() + + t.Run("ListContents error propagates", func(t *testing.T) { + client := &mockFileReader{ + contents: map[string][]vcs.ContentEntry{ + // "src" not in map, so ListContents will fail + }, + } + _, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src") + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), "list contents") { + t.Errorf("expected error about list contents, got: %v", err) + } + }) + + t.Run("GetFileContent error propagates", func(t *testing.T) { + client := &mockFileReader{ + contents: map[string][]vcs.ContentEntry{ + "src": { + {Name: "main.go", Path: "src/main.go", Type: "file"}, + }, + }, + files: map[string]string{ + // "src/main.go" not in files map, so GetFileContent will fail + }, + } + _, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src") + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), "get file") { + t.Errorf("expected error about get file, got: %v", err) + } + }) + + t.Run("nested ListContents error propagates", func(t *testing.T) { + client := &mockFileReader{ + contents: map[string][]vcs.ContentEntry{ + "src": { + {Name: "pkg", Path: "src/pkg", Type: "dir"}, + }, + // "src/pkg" not in map, so recursive ListContents will fail + }, + } + _, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src") + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), "list contents") { + t.Errorf("expected error about list contents, got: %v", err) + } + }) + + t.Run("cancelled context propagates", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() // Cancel immediately + + client := &mockFileReader{ + contents: map[string][]vcs.ContentEntry{ + "src": { + {Name: "main.go", Path: "src/main.go", Type: "file"}, + }, + }, + files: map[string]string{ + "src/main.go": "package main", + }, + } + _, err := vcs.GetAllFilesInPath(ctx, client, "owner", "repo", "src") + if err == nil { + t.Fatal("expected error from cancelled context, got nil") + } + if !strings.Contains(err.Error(), "context cancelled") { + t.Errorf("expected context cancellation error, got: %v", err) + } + }) +}