diff --git a/cmd/review-bot/validatedocmap.go b/cmd/review-bot/validatedocmap.go index 69b2782..2283c1f 100644 --- a/cmd/review-bot/validatedocmap.go +++ b/cmd/review-bot/validatedocmap.go @@ -12,7 +12,59 @@ import ( "gitea.weiker.me/rodin/review-bot/review" ) -// runValidateDocmap implements the `review-bot validate-docmap` subcommand. +// maxDocmapBytes is the maximum size of the doc-map YAML file that will be +// read. Files larger than this are rejected before reading to prevent memory +// exhaustion from an oversized PR-controlled file. +const maxDocmapBytes int64 = 10 * 1024 * 1024 // 10 MB + +// validateDocmapPath checks that localPath is safe to read as the doc-map +// file. It enforces three invariants before the file is opened: +// +// 1. The path resolves to a regular file within resolvedRoot (path +// confinement): prevents a PR-controlled --docmap from reading arbitrary +// host files via absolute paths or ".." traversal. +// 2. The path is not a symlink: prevents denial-of-service via /dev/zero or +// information disclosure via symlinks that point outside the workspace. +// 3. The file does not exceed maxDocmapBytes: prevents memory exhaustion +// from an oversized but legitimately committed doc-map file. +// +// resolvedRoot must already be an absolute, symlink-free path (obtained from +// filepath.Abs + filepath.EvalSymlinks). +func validateDocmapPath(localPath, resolvedRoot string) error { + // Resolve the docmap path to an absolute path. + absPath, err := filepath.Abs(localPath) + if err != nil { + return fmt.Errorf("cannot resolve path: %w", err) + } + + // Lstat: do NOT follow symlinks. We want to inspect the entry itself. + fi, err := os.Lstat(absPath) + if err != nil { + return fmt.Errorf("cannot stat file: %w", err) + } + + // Reject symlinks outright — a symlink can point to /dev/zero or arbitrary + // host paths, bypassing both the confinement check and the size check. + if fi.Mode()&os.ModeSymlink != 0 { + return fmt.Errorf("symlinks are not allowed") + } + + // Confine to resolvedRoot: the cleaned absolute path must be a descendant + // of the repo root. This catches paths that escaped via "..", absolute + // paths that happen to be outside the root, etc. + rel, err := filepath.Rel(resolvedRoot, absPath) + if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) { + return fmt.Errorf("path must be within --repo-root") + } + + // Enforce size cap before reading to prevent memory exhaustion. + if fi.Size() > maxDocmapBytes { + return fmt.Errorf("file size %d bytes exceeds %d-byte limit", fi.Size(), maxDocmapBytes) + } + + return nil +} + // // It reads changed file paths from stdin (one per line, as produced by // `git diff --name-only`), parses a doc-map YAML file, and performs two checks: @@ -51,6 +103,36 @@ func runValidateDocmap(args []string) int { return 2 } + // Resolve repoRoot first — the docmap path is validated against it below. + // Use an absolute, symlink-free path so a symlinked --repo-root cannot + // bypass the escape guard in validateDocmapPath or checkStaleDocs. + absRoot, err := filepath.Abs(*repoRootFlag) + if err != nil { + fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err) + return 2 + } + resolvedRoot, err := filepath.EvalSymlinks(absRoot) + if err != nil { + if os.IsNotExist(err) { + fmt.Fprintf(errWriter, "Error: --repo-root %q does not exist\n", *repoRootFlag) + } else { + fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err) + } + return 2 + } + + // Harden the docmap file path before reading it. The --docmap flag value + // may reference a PR-controlled file (e.g. .review-bot/doc-map.yml). + // Validate that it: + // 1. Resolves within resolvedRoot (prevent reading arbitrary host files). + // 2. Is not a symlink (prevent /dev/zero or symlink-based host probing). + // 3. Does not exceed maxDocmapBytes (prevent memory exhaustion from an + // oversized committed file). + if err := validateDocmapPath(*docmapFlag, resolvedRoot); err != nil { + fmt.Fprintf(errWriter, "Error: --docmap %q is invalid: %v\n", *docmapFlag, err) + return 2 + } + // Parse docmap YAML. cfg, err := review.ParseDocMapConfig(*docmapFlag) if err != nil { @@ -90,23 +172,6 @@ func runValidateDocmap(args []string) int { } // --- Check 2: Stale docs --- - // Resolve repoRoot to an absolute, symlink-free path before any Rel checks - // so that a symlinked --repo-root cannot be used to bypass the escape - // guard in checkStaleDocs. - absRoot, err := filepath.Abs(*repoRootFlag) - if err != nil { - fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err) - return 2 - } - resolvedRoot, err := filepath.EvalSymlinks(absRoot) - if err != nil { - if os.IsNotExist(err) { - fmt.Fprintf(errWriter, "Error: --repo-root %q does not exist\n", *repoRootFlag) - } else { - fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err) - } - return 2 - } // checkStaleDocs validates each path before touching the filesystem; see // its documentation for the path-traversal hardening applied. staleDocs := checkStaleDocs(cfg, resolvedRoot) diff --git a/cmd/review-bot/validatedocmap_test.go b/cmd/review-bot/validatedocmap_test.go index 08b9f1d..edd131f 100644 --- a/cmd/review-bot/validatedocmap_test.go +++ b/cmd/review-bot/validatedocmap_test.go @@ -9,6 +9,8 @@ import ( ) // makeDocmapYAML writes a YAML string to a temp file and returns its path. +// The file is created in t.TempDir() — use makeDocmapInDir when the docmap +// must be located inside a specific repo-root directory. func makeDocmapYAML(t *testing.T, content string) string { t.Helper() f, err := os.CreateTemp(t.TempDir(), "doc-map-*.yml") @@ -22,6 +24,21 @@ func makeDocmapYAML(t *testing.T, content string) string { return f.Name() } +// makeDocmapInDir writes a YAML string to a file inside dir and returns the +// file path. Use this instead of makeDocmapYAML when also passing --repo-root, +// because validateDocmapPath requires the docmap to be within the repo root. +func makeDocmapInDir(t *testing.T, dir, content string) string { + t.Helper() + if err := os.MkdirAll(filepath.Join(dir, ".review-bot"), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + path := filepath.Join(dir, ".review-bot", "doc-map.yml") + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + return path +} + // makeDocFile creates a file (and any parent dirs) at the given path relative to dir. func makeDocFile(t *testing.T, dir, rel string) { t.Helper() @@ -52,7 +69,7 @@ func TestRunValidateDocmap_Clean(t *testing.T) { dir := t.TempDir() makeDocFile(t, dir, "docs/foo.md") - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/foo/**" @@ -87,10 +104,11 @@ func TestRunValidateDocmap_MissingDocmapFlag(t *testing.T) { } func TestRunValidateDocmap_BadYAML(t *testing.T) { - docmap := makeDocmapYAML(t, "mappings: [{{invalid") + dir := t.TempDir() + docmap := makeDocmapInDir(t, dir, "mappings: [{{invalid") var code int _, stderr := captureOutput(func() { - code = runValidateDocmap([]string{"--docmap", docmap}) + code = runValidateDocmap([]string{"--docmap", docmap, "--repo-root", dir}) }) if code != 2 { t.Errorf("expected exit 2 for bad YAML, got %d", code) @@ -104,7 +122,7 @@ func TestRunValidateDocmap_StaleDocs(t *testing.T) { dir := t.TempDir() // docs/foo.md does NOT exist on disk. - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/foo/**" @@ -166,7 +184,7 @@ func TestRunValidateDocmap_UncoveredFile(t *testing.T) { dir := t.TempDir() makeDocFile(t, dir, "docs/foo.md") - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/foo/**" @@ -193,7 +211,7 @@ func TestRunValidateDocmap_BothFailures(t *testing.T) { dir := t.TempDir() // docs/foo.md intentionally missing - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/foo/**" @@ -220,7 +238,7 @@ func TestRunValidateDocmap_EmptyStdin(t *testing.T) { dir := t.TempDir() makeDocFile(t, dir, "docs/foo.md") - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/foo/**" @@ -244,7 +262,7 @@ func TestRunValidateDocmap_BlankLinesSkipped(t *testing.T) { dir := t.TempDir() makeDocFile(t, dir, "docs/foo.md") - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/foo/**" @@ -270,7 +288,7 @@ func TestRunValidateDocmap_DuplicateDocsDeduped(t *testing.T) { // docs/shared.md intentionally missing — but it appears in TWO mappings. // Should appear only once in stale list. - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/foo/**" @@ -317,7 +335,7 @@ func TestCheckStaleDocs_PathTraversal(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/**" @@ -358,7 +376,7 @@ func TestCheckStaleDocs_SymlinkOutside(t *testing.T) { t.Fatalf("Symlink: %v", err) } - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/**" @@ -393,7 +411,7 @@ func TestCheckStaleDocs_SymlinkInsideRepo(t *testing.T) { t.Fatalf("Symlink: %v", err) } - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/**" @@ -411,7 +429,7 @@ mappings: } // TestRunValidateDocmap_SymlinkRepoRoot verifies that a --repo-root that is -// itself a symlink to a valid directory resolves correctly (Finding #2). +// itself a symlink to a valid directory resolves correctly. func TestRunValidateDocmap_SymlinkRepoRoot(t *testing.T) { realDir := t.TempDir() makeDocFile(t, realDir, "docs/foo.md") @@ -422,7 +440,10 @@ func TestRunValidateDocmap_SymlinkRepoRoot(t *testing.T) { t.Fatalf("Symlink: %v", err) } - docmap := makeDocmapYAML(t, ` + // Place the docmap inside realDir so it passes the confinement check. + // (symlinkDir resolves to realDir, so files inside realDir are also inside + // the resolved repo-root.) + docmap := makeDocmapInDir(t, realDir, ` mappings: - paths: - "lib/**" @@ -442,3 +463,90 @@ mappings: t.Errorf("expected 'OK' in stdout, got %q", stdout) } } + +// TestValidateDocmapPath_Symlink verifies that --docmap pointing at a symlink +// is rejected before the file is read (prevents /dev/zero DOS or arbitrary +// host-file reads via PR-controlled symlinks). +func TestValidateDocmapPath_Symlink(t *testing.T) { + dir := t.TempDir() + + // Create a real docmap file to serve as the symlink target. + realDocmap := makeDocmapInDir(t, dir, ` +mappings: + - paths: + - "lib/**" + docs: + - docs/foo.md +`) + + // Create a symlink inside dir pointing to the real docmap. + symlinkPath := filepath.Join(dir, ".review-bot", "doc-map-link.yml") + if err := os.Symlink(realDocmap, symlinkPath); err != nil { + t.Fatalf("Symlink: %v", err) + } + + code, _, stderr := stdinValidateDocmap(t, + "", + []string{"--docmap", symlinkPath, "--repo-root", dir}, + ) + if code != 2 { + t.Errorf("expected exit 2 for symlink docmap, got %d; stderr: %q", code, stderr) + } + if !strings.Contains(stderr, "symlink") && !strings.Contains(stderr, "invalid") { + t.Errorf("expected symlink rejection in stderr, got %q", stderr) + } +} + +// TestValidateDocmapPath_OutsideRepoRoot verifies that --docmap pointing +// outside --repo-root is rejected (prevents reading arbitrary host files). +func TestValidateDocmapPath_OutsideRepoRoot(t *testing.T) { + repoDir := t.TempDir() + + // Create a docmap in a separate temp dir (outside the repo root). + outside := makeDocmapYAML(t, ` +mappings: + - paths: + - "lib/**" + docs: + - docs/foo.md +`) + + code, _, stderr := stdinValidateDocmap(t, + "", + []string{"--docmap", outside, "--repo-root", repoDir}, + ) + if code != 2 { + t.Errorf("expected exit 2 for docmap outside repo-root, got %d; stderr: %q", code, stderr) + } + if !strings.Contains(stderr, "invalid") && !strings.Contains(stderr, "repo-root") { + t.Errorf("expected confinement rejection in stderr, got %q", stderr) + } +} + +// TestValidateDocmapPath_SizeLimit verifies that --docmap files exceeding +// maxDocmapBytes are rejected before reading (prevents memory exhaustion). +func TestValidateDocmapPath_SizeLimit(t *testing.T) { + dir := t.TempDir() + + // Write a file larger than maxDocmapBytes. + bigPath := filepath.Join(dir, ".review-bot", "big-doc-map.yml") + if err := os.MkdirAll(filepath.Dir(bigPath), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + // Exceed the limit by one byte. + bigContent := make([]byte, maxDocmapBytes+1) + if err := os.WriteFile(bigPath, bigContent, 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + code, _, stderr := stdinValidateDocmap(t, + "", + []string{"--docmap", bigPath, "--repo-root", dir}, + ) + if code != 2 { + t.Errorf("expected exit 2 for oversized docmap, got %d; stderr: %q", code, stderr) + } + if !strings.Contains(stderr, "limit") && !strings.Contains(stderr, "size") && !strings.Contains(stderr, "invalid") { + t.Errorf("expected size limit error in stderr, got %q", stderr) + } +}