Files
review-bot/vcs/util.go
T
claw ec03dc2373
PR Ready Gate / clear-labels (pull_request) Successful in 2s
CI / test (pull_request) Successful in 17s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 43s
CI / review (gpt-5, security, ., rodin/security-patterns, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 44s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 1m44s
fix: address remaining review findings (interface assertions, DismissReview ctx, import order, filepath param, spelling)
2026-05-12 13:07:41 -07:00

194 lines
5.3 KiB
Go

package vcs
import (
"context"
"fmt"
"strconv"
"strings"
)
const (
// maxFilesInPath is the maximum number of files GetAllFilesInPath will fetch.
// Prevents unbounded resource consumption on very large directory trees.
maxFilesInPath = 10000
// maxTotalBytesInPath is the maximum total bytes GetAllFilesInPath will accumulate.
// Prevents memory exhaustion when fetching large repositories.
maxTotalBytesInPath = 100 * 1024 * 1024 // 100 MB
)
// GetAllFilesInPath recursively fetches all file contents under a path using the
// provided FileReader. Returns a map of filepath -> content for all files found.
// If the path points to an empty directory, returns an empty map.
//
// This function uses fail-fast error handling: any error from ListContents or
// GetFileContent aborts the entire traversal and returns the error immediately.
// This differs from gitea.Client.GetAllFilesInPath, which logs errors and continues.
// The fail-fast contract ensures callers can trust that a nil error means all files
// were successfully fetched.
//
// Resource limits: the traversal is bounded by maxFilesInPath (file count) and
// maxTotalBytesInPath (total accumulated bytes). The context is checked before each
// recursive call and file fetch to respect cancellation.
func GetAllFilesInPath(ctx context.Context, client FileReader, owner, repo, path string) (map[string]string, error) {
results := make(map[string]string)
totalBytes := 0
var walk func(string) error
walk = func(dir string) error {
if err := ctx.Err(); err != nil {
return fmt.Errorf("context canceled during traversal: %w", err)
}
entries, err := client.ListContents(ctx, owner, repo, dir)
if err != nil {
return fmt.Errorf("list contents %q: %w", dir, err)
}
for _, entry := range entries {
if err := ctx.Err(); err != nil {
return fmt.Errorf("context canceled during traversal: %w", err)
}
switch entry.Type {
case "file":
if len(results) >= maxFilesInPath {
return fmt.Errorf("exceeded max file count (%d) in path %q", maxFilesInPath, path)
}
content, err := client.GetFileContent(ctx, owner, repo, entry.Path, "")
if err != nil {
return fmt.Errorf("get file %q: %w", entry.Path, err)
}
totalBytes += len(content)
if totalBytes > maxTotalBytesInPath {
return fmt.Errorf("exceeded max total bytes (%d) in path %q", maxTotalBytesInPath, path)
}
results[entry.Path] = content
case "dir":
if err := walk(entry.Path); err != nil {
return err
}
}
}
return nil
}
if err := walk(path); err != nil {
return nil, err
}
return results, nil
}
// BuildLineToPositionMap parses a unified diff and returns a map of
// filename -> (new line number -> diff position). The diff position is a
// 1-indexed offset from the @@ hunk header line for each file.
// Only lines that appear in the new file (context lines and additions) are mapped.
// Deletion-only lines are not included.
func BuildLineToPositionMap(diff string) map[string]map[int]int {
result := make(map[string]map[int]int)
lines := strings.Split(diff, "\n")
var currentFile string
var position int
var newLine int
for _, line := range lines {
// Detect new file in diff
if strings.HasPrefix(line, "+++ b/") {
currentFile = strings.TrimPrefix(line, "+++ b/")
position = 0
newLine = 0
if result[currentFile] == nil {
result[currentFile] = make(map[int]int)
}
continue
}
// Skip --- lines (old file header)
if strings.HasPrefix(line, "--- ") {
continue
}
// Skip diff --git lines
if strings.HasPrefix(line, "diff --git") {
continue
}
// Skip index lines
if strings.HasPrefix(line, "index ") {
continue
}
// Parse hunk headers
if strings.HasPrefix(line, "@@") {
position++
// Extract new file start line from @@ -a,b +c,d @@
newLine = parseHunkNewStart(line)
continue
}
// We need a current file to map lines
if currentFile == "" {
continue
}
// Skip "\ No newline at end of file" markers — these are git diff
// metadata and not part of the file content.
if strings.HasPrefix(line, `\`) {
continue
}
// Process diff content lines
if strings.HasPrefix(line, "+") {
position++
result[currentFile][newLine] = position
newLine++
} else if strings.HasPrefix(line, "-") {
position++
// Deletion lines don't map to new line numbers
} else if strings.HasPrefix(line, " ") {
// Context line (space-prefixed).
// Only map if position > 0, which means we've seen a hunk header.
// Lines before the first hunk header (position == 0) are not part
// of any diff hunk and should be skipped.
if position > 0 {
position++
result[currentFile][newLine] = position
newLine++
}
}
}
return result
}
// parseHunkNewStart extracts the new-file starting line number from a hunk header.
// Format: @@ -old_start[,old_count] +new_start[,new_count] @@
func parseHunkNewStart(hunkLine string) int {
// Find the +N part
plusIdx := strings.Index(hunkLine, "+")
if plusIdx < 0 {
return 1
}
rest := hunkLine[plusIdx+1:]
// Find the end of the number (first non-digit after +)
endIdx := 0
for endIdx < len(rest) && rest[endIdx] >= '0' && rest[endIdx] <= '9' {
endIdx++
}
if endIdx == 0 {
return 1
}
n, err := strconv.Atoi(rest[:endIdx])
if err != nil {
return 1
}
return n
}