package vcs import ( "context" "fmt" "strconv" "strings" ) const ( // maxFilesInPath is the maximum number of files GetAllFilesInPath will fetch. // Prevents unbounded resource consumption on very large directory trees. maxFilesInPath = 10000 // maxTotalBytesInPath is the maximum total bytes GetAllFilesInPath will accumulate. // Prevents memory exhaustion when fetching large repositories. maxTotalBytesInPath = 100 * 1024 * 1024 // 100 MB ) // GetAllFilesInPath recursively fetches all file contents under a path using the // provided FileReader. Returns a map of filepath -> content for all files found. // If the path points to an empty directory, returns an empty map. // // This function uses fail-fast error handling: any error from ListContents or // GetFileContent aborts the entire traversal and returns the error immediately. // This differs from gitea.Client.GetAllFilesInPath, which logs errors and continues. // The fail-fast contract ensures callers can trust that a nil error means all files // were successfully fetched. // // Resource limits: the traversal is bounded by maxFilesInPath (file count) and // maxTotalBytesInPath (total accumulated bytes). The context is checked before each // recursive call and file fetch to respect cancellation. func GetAllFilesInPath(ctx context.Context, client FileReader, owner, repo, path string) (map[string]string, error) { results := make(map[string]string) totalBytes := 0 var walk func(string) error walk = func(dir string) error { if err := ctx.Err(); err != nil { return fmt.Errorf("context cancelled during traversal: %w", err) } entries, err := client.ListContents(ctx, owner, repo, dir) if err != nil { return fmt.Errorf("list contents %q: %w", dir, err) } for _, entry := range entries { if err := ctx.Err(); err != nil { return fmt.Errorf("context cancelled during traversal: %w", err) } switch entry.Type { case "file": if len(results) >= maxFilesInPath { return fmt.Errorf("exceeded max file count (%d) in path %q", maxFilesInPath, path) } content, err := client.GetFileContent(ctx, owner, repo, entry.Path, "") if err != nil { return fmt.Errorf("get file %q: %w", entry.Path, err) } totalBytes += len(content) if totalBytes > maxTotalBytesInPath { return fmt.Errorf("exceeded max total bytes (%d) in path %q", maxTotalBytesInPath, path) } results[entry.Path] = content case "dir": if err := walk(entry.Path); err != nil { return err } } } return nil } if err := walk(path); err != nil { return nil, err } return results, nil } // BuildLineToPositionMap parses a unified diff and returns a map of // filename -> (new line number -> diff position). The diff position is a // 1-indexed offset from the @@ hunk header line for each file. // Only lines that appear in the new file (context lines and additions) are mapped. // Deletion-only lines are not included. func BuildLineToPositionMap(diff string) map[string]map[int]int { result := make(map[string]map[int]int) lines := strings.Split(diff, "\n") var currentFile string var position int var newLine int for _, line := range lines { // Detect new file in diff if strings.HasPrefix(line, "+++ b/") { currentFile = strings.TrimPrefix(line, "+++ b/") position = 0 newLine = 0 if result[currentFile] == nil { result[currentFile] = make(map[int]int) } continue } // Skip --- lines (old file header) if strings.HasPrefix(line, "--- ") { continue } // Skip diff --git lines if strings.HasPrefix(line, "diff --git") { continue } // Skip index lines if strings.HasPrefix(line, "index ") { continue } // Parse hunk headers if strings.HasPrefix(line, "@@") { position++ // Extract new file start line from @@ -a,b +c,d @@ newLine = parseHunkNewStart(line) continue } // We need a current file to map lines if currentFile == "" { continue } // Skip "\ No newline at end of file" markers — these are git diff // metadata and not part of the file content. if strings.HasPrefix(line, `\`) { continue } // Process diff content lines if strings.HasPrefix(line, "+") { position++ result[currentFile][newLine] = position newLine++ } else if strings.HasPrefix(line, "-") { position++ // Deletion lines don't map to new line numbers } else if strings.HasPrefix(line, " ") { // Context line (space-prefixed). // Only map if position > 0, which means we've seen a hunk header. // Lines before the first hunk header (position == 0) are not part // of any diff hunk and should be skipped. if position > 0 { position++ result[currentFile][newLine] = position newLine++ } } } return result } // parseHunkNewStart extracts the new-file starting line number from a hunk header. // Format: @@ -old_start[,old_count] +new_start[,new_count] @@ func parseHunkNewStart(hunkLine string) int { // Find the +N part plusIdx := strings.Index(hunkLine, "+") if plusIdx < 0 { return 1 } rest := hunkLine[plusIdx+1:] // Find the end of the number (first non-digit after +) endIdx := 0 for endIdx < len(rest) && rest[endIdx] >= '0' && rest[endIdx] <= '9' { endIdx++ } if endIdx == 0 { return 1 } n, err := strconv.Atoi(rest[:endIdx]) if err != nil { return 1 } return n }