From 90959da8309079258c9a28ef0006a5c55d9346b2 Mon Sep 17 00:00:00 2001 From: claw Date: Tue, 12 May 2026 12:31:10 -0700 Subject: [PATCH] feat(vcs): add util.go with GetAllFilesInPath and BuildLineToPositionMap (#84) Add vcs/util.go containing two utility functions that were specified in issue #78 but omitted from PR #83: - GetAllFilesInPath: recursively fetches all file contents under a path using the vcs.FileReader interface. Returns map[string]string of path -> content. - BuildLineToPositionMap: parses a unified diff and returns per-file mapping of new-file line numbers to GitHub diff-position convention. Position is 1-indexed from @@ hunk headers. Deletion lines are not mapped (no new-file line number). Position counter continues across hunks within the same file but resets for each new file. Unit tests cover: - GetAllFilesInPath: empty dir, flat dir, nested dirs, mixed, errors - BuildLineToPositionMap: single hunk, multi-hunk, deletions not mapped, multiple files, empty diff, no-newline marker, single-line hunk, deleted file --- vcs/util.go | 142 +++++++++++++++ vcs/util_test.go | 456 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 598 insertions(+) create mode 100644 vcs/util.go create mode 100644 vcs/util_test.go diff --git a/vcs/util.go b/vcs/util.go new file mode 100644 index 0000000..f10a8be --- /dev/null +++ b/vcs/util.go @@ -0,0 +1,142 @@ +package vcs + +import ( + "context" + "fmt" + "strconv" + "strings" +) + +// GetAllFilesInPath recursively fetches all file contents under a path using the +// provided FileReader. Returns a map of filepath -> content for all files found. +// If the path points to an empty directory, returns an empty map. +func GetAllFilesInPath(ctx context.Context, client FileReader, owner, repo, path string) (map[string]string, error) { + results := make(map[string]string) + + entries, err := client.ListContents(ctx, owner, repo, path) + if err != nil { + return nil, fmt.Errorf("list contents %q: %w", path, err) + } + + for _, entry := range entries { + switch entry.Type { + case "file": + content, err := client.GetFileContent(ctx, owner, repo, entry.Path, "") + if err != nil { + return nil, fmt.Errorf("get file %q: %w", entry.Path, err) + } + results[entry.Path] = content + case "dir": + subResults, err := GetAllFilesInPath(ctx, client, owner, repo, entry.Path) + if err != nil { + return nil, fmt.Errorf("recurse into %q: %w", entry.Path, err) + } + for k, v := range subResults { + results[k] = v + } + } + } + + return results, nil +} + +// BuildLineToPositionMap parses a unified diff and returns a per-file map of +// new-file line number → diff-position. +// +// Position is a 1-indexed offset from the @@ hunk line (GitHub convention): +// - The @@ hunk header itself counts as position 1 (for the first hunk) +// - Every subsequent content line (context, addition, deletion) increments position +// - A second @@ hunk in the same file continues the count; it does not reset +// - Addition and context lines have new-file line numbers and are mapped +// - Deletion lines have a position but no new-file line; they are not in the map +// - "\ No newline at end of file" markers do not count as a position +// +// Returns: map[filename]map[newFileLine]diffPosition +func BuildLineToPositionMap(diff string) map[string]map[int]int { + result := make(map[string]map[int]int) + + lines := strings.Split(diff, "\n") + var currentFile string + var position int + var newLine int + + for _, line := range lines { + // Detect new file in diff — resets position counter per file. + if strings.HasPrefix(line, "+++ b/") { + currentFile = strings.TrimPrefix(line, "+++ b/") + position = 0 + newLine = 0 + if result[currentFile] == nil { + result[currentFile] = make(map[int]int) + } + continue + } + + // Skip deleted-file target (no new lines to map). + if strings.HasPrefix(line, "+++ /dev/null") { + currentFile = "" + continue + } + + // Skip metadata lines that are not part of position counting. + if strings.HasPrefix(line, "diff --git") || + strings.HasPrefix(line, "--- ") || + strings.HasPrefix(line, "index ") || + strings.HasPrefix(line, "\\") { + continue + } + + // Parse hunk headers — the @@ line itself occupies a position. + if strings.HasPrefix(line, "@@") && currentFile != "" { + position++ + newLine = parseHunkNewStart(line) + continue + } + + if currentFile == "" { + continue + } + + // Content lines within a hunk. + switch { + case strings.HasPrefix(line, "+"): + // Addition: maps to both a position and a new-file line. + position++ + result[currentFile][newLine] = position + newLine++ + case strings.HasPrefix(line, "-"): + // Deletion: occupies a position but has no new-file line number. + position++ + case strings.HasPrefix(line, " "): + // Context line: maps to both a position and a new-file line. + position++ + result[currentFile][newLine] = position + newLine++ + } + // Any other line (empty trailing lines from Split, etc.) is ignored. + } + + return result +} + +// parseHunkNewStart extracts the new-file starting line number from a hunk header. +// Format: @@ -old_start[,old_count] +new_start[,new_count] @@[ optional section heading] +func parseHunkNewStart(hunkLine string) int { + // Find the +N part after the first @@ + plusIdx := strings.Index(hunkLine, "+") + if plusIdx < 0 { + return 1 + } + rest := hunkLine[plusIdx+1:] + + // Read digits until comma, space, or end. + if idx := strings.IndexAny(rest, ", @"); idx > 0 { + rest = rest[:idx] + } + + n, err := strconv.Atoi(rest) + if err != nil { + return 1 + } + return n +} diff --git a/vcs/util_test.go b/vcs/util_test.go new file mode 100644 index 0000000..35b6a8b --- /dev/null +++ b/vcs/util_test.go @@ -0,0 +1,456 @@ +package vcs_test + +import ( + "context" + "fmt" + "testing" + + "gitea.weiker.me/rodin/review-bot/vcs" +) + +// mockFileReader implements vcs.FileReader for testing. +type mockFileReader struct { + contents map[string]string // path -> content + dirs map[string][]vcs.ContentEntry // path -> entries +} + +func (m *mockFileReader) GetFileContent(_ context.Context, _, _, path, _ string) (string, error) { + content, ok := m.contents[path] + if !ok { + return "", fmt.Errorf("file not found: %s", path) + } + return content, nil +} + +func (m *mockFileReader) ListContents(_ context.Context, _, _, path string) ([]vcs.ContentEntry, error) { + entries, ok := m.dirs[path] + if !ok { + return nil, fmt.Errorf("directory not found: %s", path) + } + return entries, nil +} + +// --- GetAllFilesInPath tests --- + +func TestGetAllFilesInPath_EmptyDir(t *testing.T) { + mock := &mockFileReader{ + dirs: map[string][]vcs.ContentEntry{ + "src": {}, + }, + } + + result, err := vcs.GetAllFilesInPath(context.Background(), mock, "owner", "repo", "src") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result) != 0 { + t.Errorf("expected empty map, got %d entries", len(result)) + } +} + +func TestGetAllFilesInPath_FlatDir(t *testing.T) { + mock := &mockFileReader{ + dirs: map[string][]vcs.ContentEntry{ + "src": { + {Name: "main.go", Path: "src/main.go", Type: "file"}, + {Name: "util.go", Path: "src/util.go", Type: "file"}, + }, + }, + contents: map[string]string{ + "src/main.go": "package main", + "src/util.go": "package main\n\nfunc helper() {}", + }, + } + + result, err := vcs.GetAllFilesInPath(context.Background(), mock, "owner", "repo", "src") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result) != 2 { + t.Fatalf("expected 2 files, got %d", len(result)) + } + if result["src/main.go"] != "package main" { + t.Errorf("unexpected content for main.go: %q", result["src/main.go"]) + } + if result["src/util.go"] != "package main\n\nfunc helper() {}" { + t.Errorf("unexpected content for util.go: %q", result["src/util.go"]) + } +} + +func TestGetAllFilesInPath_NestedDirs(t *testing.T) { + mock := &mockFileReader{ + dirs: map[string][]vcs.ContentEntry{ + "src": { + {Name: "main.go", Path: "src/main.go", Type: "file"}, + {Name: "pkg", Path: "src/pkg", Type: "dir"}, + }, + "src/pkg": { + {Name: "lib.go", Path: "src/pkg/lib.go", Type: "file"}, + {Name: "internal", Path: "src/pkg/internal", Type: "dir"}, + }, + "src/pkg/internal": { + {Name: "helper.go", Path: "src/pkg/internal/helper.go", Type: "file"}, + }, + }, + contents: map[string]string{ + "src/main.go": "package main", + "src/pkg/lib.go": "package pkg", + "src/pkg/internal/helper.go": "package internal", + }, + } + + result, err := vcs.GetAllFilesInPath(context.Background(), mock, "owner", "repo", "src") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result) != 3 { + t.Fatalf("expected 3 files, got %d", len(result)) + } + if result["src/main.go"] != "package main" { + t.Errorf("unexpected content for main.go") + } + if result["src/pkg/lib.go"] != "package pkg" { + t.Errorf("unexpected content for lib.go") + } + if result["src/pkg/internal/helper.go"] != "package internal" { + t.Errorf("unexpected content for helper.go") + } +} + +func TestGetAllFilesInPath_Mixed(t *testing.T) { + mock := &mockFileReader{ + dirs: map[string][]vcs.ContentEntry{ + "root": { + {Name: "README.md", Path: "root/README.md", Type: "file"}, + {Name: "empty", Path: "root/empty", Type: "dir"}, + {Name: "docs", Path: "root/docs", Type: "dir"}, + }, + "root/empty": {}, + "root/docs": { + {Name: "guide.md", Path: "root/docs/guide.md", Type: "file"}, + }, + }, + contents: map[string]string{ + "root/README.md": "# Hello", + "root/docs/guide.md": "## Guide", + }, + } + + result, err := vcs.GetAllFilesInPath(context.Background(), mock, "owner", "repo", "root") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result) != 2 { + t.Fatalf("expected 2 files, got %d", len(result)) + } + if result["root/README.md"] != "# Hello" { + t.Errorf("unexpected content for README.md") + } + if result["root/docs/guide.md"] != "## Guide" { + t.Errorf("unexpected content for guide.md") + } +} + +func TestGetAllFilesInPath_ListContentsError(t *testing.T) { + mock := &mockFileReader{ + dirs: map[string][]vcs.ContentEntry{}, + } + + _, err := vcs.GetAllFilesInPath(context.Background(), mock, "owner", "repo", "missing") + if err == nil { + t.Fatal("expected error for missing directory") + } +} + +func TestGetAllFilesInPath_GetFileContentError(t *testing.T) { + mock := &mockFileReader{ + dirs: map[string][]vcs.ContentEntry{ + "src": { + {Name: "bad.go", Path: "src/bad.go", Type: "file"}, + }, + }, + contents: map[string]string{}, + } + + _, err := vcs.GetAllFilesInPath(context.Background(), mock, "owner", "repo", "src") + if err == nil { + t.Fatal("expected error when file content fetch fails") + } +} + +// --- BuildLineToPositionMap tests --- + +func TestBuildLineToPositionMap_SingleHunk(t *testing.T) { + diff := `diff --git a/main.go b/main.go +index abc..def 100644 +--- a/main.go ++++ b/main.go +@@ -5,7 +5,8 @@ package main + import "fmt" + + func main() { +- fmt.Println("old") ++ fmt.Println("new") ++ fmt.Println("added") + return + } +` + + result := vcs.BuildLineToPositionMap(diff) + + fileMap, ok := result["main.go"] + if !ok { + t.Fatal("expected main.go in result") + } + + // @@ line is position 1 + // " import \"fmt\"" -> pos 2, newLine 5 + // " " -> pos 3, newLine 6 + // " func main() {" -> pos 4, newLine 7 + // "-\tfmt.Println..." -> pos 5, no new line + // "+\tfmt.Println..." -> pos 6, newLine 8 + // "+\tfmt.Println..." -> pos 7, newLine 9 + // " \treturn" -> pos 8, newLine 10 + // " }" -> pos 9, newLine 11 + + expected := map[int]int{ + 5: 2, + 6: 3, + 7: 4, + 8: 6, + 9: 7, + 10: 8, + 11: 9, + } + + for line, wantPos := range expected { + gotPos, exists := fileMap[line] + if !exists { + t.Errorf("line %d: expected position %d, but line not in map", line, wantPos) + continue + } + if gotPos != wantPos { + t.Errorf("line %d: expected position %d, got %d", line, wantPos, gotPos) + } + } + + if len(fileMap) != len(expected) { + t.Errorf("expected %d entries, got %d", len(expected), len(fileMap)) + } +} + +func TestBuildLineToPositionMap_MultiHunk(t *testing.T) { + diff := `diff --git a/main.go b/main.go +--- a/main.go ++++ b/main.go +@@ -1,3 +1,4 @@ + package main + ++import "fmt" + func main() { +@@ -10,3 +11,4 @@ func main() { + return ++ // extra + } +` + + result := vcs.BuildLineToPositionMap(diff) + + fileMap, ok := result["main.go"] + if !ok { + t.Fatal("expected main.go in result") + } + + // First hunk: + // @@ line -> pos 1 + // " package main" -> pos 2, newLine 1 + // " " -> pos 3, newLine 2 + // "+import..." -> pos 4, newLine 3 + // " func main.." -> pos 5, newLine 4 + // + // Second hunk (position continues!): + // @@ line -> pos 6 + // " \treturn" -> pos 7, newLine 11 + // "+\t// extra" -> pos 8, newLine 12 + // " }" -> pos 9, newLine 13 + + expected := map[int]int{ + 1: 2, + 2: 3, + 3: 4, + 4: 5, + 11: 7, + 12: 8, + 13: 9, + } + + for line, wantPos := range expected { + gotPos, exists := fileMap[line] + if !exists { + t.Errorf("line %d: expected position %d, but line not in map", line, wantPos) + continue + } + if gotPos != wantPos { + t.Errorf("line %d: expected position %d, got %d", line, wantPos, gotPos) + } + } +} + +func TestBuildLineToPositionMap_DeletionLinesNotInMap(t *testing.T) { + diff := `diff --git a/old.go b/old.go +--- a/old.go ++++ b/old.go +@@ -1,3 +1,1 @@ +-line one +-line two + remaining +` + + result := vcs.BuildLineToPositionMap(diff) + + fileMap, ok := result["old.go"] + if !ok { + t.Fatal("expected old.go in result") + } + + // @@ line -> pos 1 + // "-line one" -> pos 2, no new line + // "-line two" -> pos 3, no new line + // " remaining" -> pos 4, newLine 1 + + if pos, ok := fileMap[1]; !ok || pos != 4 { + t.Errorf("line 1: expected position 4, got %d (exists=%v)", pos, ok) + } + if len(fileMap) != 1 { + t.Errorf("expected 1 entry (only 'remaining'), got %d", len(fileMap)) + } +} + +func TestBuildLineToPositionMap_MultipleFiles(t *testing.T) { + diff := `diff --git a/a.go b/a.go +--- a/a.go ++++ b/a.go +@@ -1,2 +1,3 @@ + package a ++// added + +diff --git a/b.go b/b.go +new file mode 100644 +--- /dev/null ++++ b/b.go +@@ -0,0 +1,3 @@ ++package b ++ ++func B() {} +` + + result := vcs.BuildLineToPositionMap(diff) + + // File a.go + aMap, ok := result["a.go"] + if !ok { + t.Fatal("expected a.go in result") + } + // @@ line -> pos 1 + // " package a" -> pos 2, newLine 1 + // "+// added" -> pos 3, newLine 2 + // " " -> pos 4, newLine 3 + if aMap[1] != 2 { + t.Errorf("a.go line 1: expected pos 2, got %d", aMap[1]) + } + if aMap[2] != 3 { + t.Errorf("a.go line 2: expected pos 3, got %d", aMap[2]) + } + if aMap[3] != 4 { + t.Errorf("a.go line 3: expected pos 4, got %d", aMap[3]) + } + + // File b.go — position resets for new file + bMap, ok := result["b.go"] + if !ok { + t.Fatal("expected b.go in result") + } + // @@ line -> pos 1 + // "+package b" -> pos 2, newLine 1 + // "+" -> pos 3, newLine 2 + // "+func B() {}" -> pos 4, newLine 3 + if bMap[1] != 2 { + t.Errorf("b.go line 1: expected pos 2, got %d", bMap[1]) + } + if bMap[2] != 3 { + t.Errorf("b.go line 2: expected pos 3, got %d", bMap[2]) + } + if bMap[3] != 4 { + t.Errorf("b.go line 3: expected pos 4, got %d", bMap[3]) + } +} + +func TestBuildLineToPositionMap_EmptyDiff(t *testing.T) { + result := vcs.BuildLineToPositionMap("") + if len(result) != 0 { + t.Errorf("expected empty map for empty diff, got %d entries", len(result)) + } +} + +func TestBuildLineToPositionMap_NoNewlineMarker(t *testing.T) { + diff := `diff --git a/a.go b/a.go +--- a/a.go ++++ b/a.go +@@ -1,2 +1,2 @@ +-old ++new +\ No newline at end of file +` + + result := vcs.BuildLineToPositionMap(diff) + + fileMap := result["a.go"] + // @@ line -> pos 1 + // "-old" -> pos 2 + // "+new" -> pos 3, newLine 1 + // "\ No.." -> not counted + if fileMap[1] != 3 { + t.Errorf("line 1: expected position 3, got %d", fileMap[1]) + } + if len(fileMap) != 1 { + t.Errorf("expected 1 entry, got %d", len(fileMap)) + } +} + +func TestBuildLineToPositionMap_SingleLineHunk(t *testing.T) { + diff := `diff --git a/single.go b/single.go +--- a/single.go ++++ b/single.go +@@ -1 +1 @@ +-old ++new +` + + result := vcs.BuildLineToPositionMap(diff) + + fileMap := result["single.go"] + // @@ line -> pos 1 + // "-old" -> pos 2 + // "+new" -> pos 3, newLine 1 + if fileMap[1] != 3 { + t.Errorf("line 1: expected position 3, got %d", fileMap[1]) + } +} + +func TestBuildLineToPositionMap_DeletedFile(t *testing.T) { + diff := `diff --git a/deleted.go b/deleted.go +deleted file mode 100644 +--- a/deleted.go ++++ /dev/null +@@ -1,3 +0,0 @@ +-package deleted +- +-func Gone() {} +` + + result := vcs.BuildLineToPositionMap(diff) + + if _, ok := result["deleted.go"]; ok { + t.Error("deleted file should not appear in result") + } +}