From 93268869c5351dbcdd855af4d93b956e2971f0dd Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 15 May 2026 04:46:38 +0000 Subject: [PATCH 01/11] feat(#141): export FileCoveredByDocMap helper in review/docmap.go MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds FileCoveredByDocMap(cfg *DocMapConfig, file string) bool — a thin wrapper over the existing unexported mappingMatches that lets cmd/ check per-file docmap coverage without duplicating glob logic. Also adds unit tests covering matched globs, non-matching paths, empty file, and empty config. --- review/docmap.go | 12 +++++++++++ review/docmap_test.go | 49 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/review/docmap.go b/review/docmap.go index 8916b6f..7317936 100644 --- a/review/docmap.go +++ b/review/docmap.go @@ -66,6 +66,18 @@ func ParseDocMapConfig(localPath string) (*DocMapConfig, error) { return &cfg, nil } +// FileCoveredByDocMap reports whether at least one paths: glob in any mapping +// of cfg matches the given file path. It is used by static validation tooling +// (e.g. the validate-docmap subcommand) to check per-file docmap coverage. +func FileCoveredByDocMap(cfg *DocMapConfig, file string) bool { + for _, mapping := range cfg.Mappings { + if mappingMatches(mapping.Paths, []string{file}) { + return true + } + } + return false +} + // MatchDocs returns deduplicated doc paths for the given changed file paths. // A mapping matches if any of its path globs matches any of the changed files. func MatchDocs(cfg *DocMapConfig, changedFiles []string) []string { diff --git a/review/docmap_test.go b/review/docmap_test.go index 7ad51c0..5e42be7 100644 --- a/review/docmap_test.go +++ b/review/docmap_test.go @@ -436,3 +436,52 @@ func writeTempYAML(t *testing.T, content string) string { } return filepath.Clean(f.Name()) } + +// ============================================================ +// FileCoveredByDocMap +// ============================================================ + +func TestFileCoveredByDocMap(t *testing.T) { + cfg := &DocMapConfig{ + Mappings: []DocMapping{ + { + Paths: []string{"lib/foo/**", "lib/bar/*.go"}, + Docs: []string{"docs/foo.md"}, + }, + { + Paths: []string{"cmd/**"}, + Docs: []string{"docs/cmd.md"}, + }, + }, + } + + cases := []struct { + file string + covered bool + }{ + {"lib/foo/baz.ex", true}, + {"lib/foo/sub/deep.ex", true}, + {"lib/bar/util.go", true}, + {"lib/bar/sub/util.go", false}, // *.go only matches one level + {"cmd/main.go", true}, + {"cmd/sub/main.go", true}, + {"internal/secret.go", false}, + {"", false}, + } + + for _, tc := range cases { + t.Run(tc.file, func(t *testing.T) { + got := FileCoveredByDocMap(cfg, tc.file) + if got != tc.covered { + t.Errorf("FileCoveredByDocMap(%q) = %v, want %v", tc.file, got, tc.covered) + } + }) + } +} + +func TestFileCoveredByDocMap_EmptyConfig(t *testing.T) { + cfg := &DocMapConfig{} + if FileCoveredByDocMap(cfg, "lib/foo/bar.go") { + t.Error("expected false for empty config, got true") + } +} From 69da5df254dde62ddc9df3a22cbea35f6383bb04 Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 15 May 2026 04:47:59 +0000 Subject: [PATCH 02/11] feat(#141): add validate-docmap subcommand Adds 'review-bot validate-docmap' for CI hard-fail on docmap coverage gaps. Usage: git diff --name-only origin/main HEAD | \ review-bot validate-docmap --docmap .review-bot/doc-map.yml --repo-root . Flags: --docmap (required) path to doc-map YAML file --repo-root (optional, default '.') root for resolving docs: paths Two checks, both always run: 1. Coverage: every stdin file must match at least one paths: glob. 2. Stale docs: every docs: entry must exist on disk under --repo-root. Exit codes: 0=clean, 1=failures found, 2=usage/parse error. Tests cover: clean pass, uncovered file, stale doc, both failures, empty stdin, blank-line stdin, and duplicate docs: deduplication. --- cmd/review-bot/main.go | 2 + cmd/review-bot/validatedocmap.go | 139 ++++++++++++ cmd/review-bot/validatedocmap_test.go | 296 ++++++++++++++++++++++++++ 3 files changed, 437 insertions(+) create mode 100644 cmd/review-bot/validatedocmap.go create mode 100644 cmd/review-bot/validatedocmap_test.go diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go index e80fd0c..1ee0380 100644 --- a/cmd/review-bot/main.go +++ b/cmd/review-bot/main.go @@ -64,6 +64,8 @@ func main() { switch os.Args[1] { case "validate-url": os.Exit(runValidateURL(os.Args[2:])) + case "validate-docmap": + os.Exit(runValidateDocmap(os.Args[2:])) } } diff --git a/cmd/review-bot/validatedocmap.go b/cmd/review-bot/validatedocmap.go new file mode 100644 index 0000000..41fac7b --- /dev/null +++ b/cmd/review-bot/validatedocmap.go @@ -0,0 +1,139 @@ +package main + +import ( + "bufio" + "flag" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "gitea.weiker.me/rodin/review-bot/review" +) + +// runValidateDocmap implements the `review-bot validate-docmap` subcommand. +// +// It reads changed file paths from stdin (one per line, as produced by +// `git diff --name-only`), parses a doc-map YAML file, and performs two checks: +// +// 1. Coverage check: every changed file must be matched by at least one +// paths: glob in the docmap. Fails if any file is uncovered. +// +// 2. Stale-docs check: every docs: entry in the docmap must exist on disk +// (relative to --repo-root). Fails if any path is missing. +// +// Both checks always run — all failures are reported before exiting. +// +// Exit codes: +// +// 0 — clean (all files covered, all docs exist) +// 1 — one or more coverage or stale-doc failures +// 2 — usage error, missing flag, or YAML parse error +func runValidateDocmap(args []string) int { + fs := flag.NewFlagSet("validate-docmap", flag.ContinueOnError) + fs.SetOutput(errWriter) + + docmapFlag := fs.String("docmap", "", "Path to doc-map YAML file (required)") + repoRootFlag := fs.String("repo-root", ".", "Repo root for resolving docs: paths (default: cwd)") + + if err := fs.Parse(args); err != nil { + // flag.ContinueOnError already wrote the error to errWriter. + return 2 + } + + if *docmapFlag == "" { + fmt.Fprintln(errWriter, "Error: --docmap is required") + fmt.Fprintln(errWriter, "") + fmt.Fprintln(errWriter, "usage: review-bot validate-docmap --docmap [--repo-root ]") + fmt.Fprintln(errWriter, " Changed files are read from stdin, one per line.") + fmt.Fprintln(errWriter, " Example: git diff --name-only origin/main HEAD | review-bot validate-docmap --docmap .review-bot/doc-map.yml") + return 2 + } + + // Parse docmap YAML. + cfg, err := review.ParseDocMapConfig(*docmapFlag) + if err != nil { + fmt.Fprintf(errWriter, "Error: failed to parse docmap %q: %v\n", *docmapFlag, err) + return 2 + } + + // Read changed files from stdin. + changedFiles, err := readLines(os.Stdin) + if err != nil { + fmt.Fprintf(errWriter, "Error: failed to read stdin: %v\n", err) + return 2 + } + + failed := false + + // --- Check 1: Coverage --- + var uncovered []string + for _, f := range changedFiles { + if !review.FileCoveredByDocMap(cfg, f) { + uncovered = append(uncovered, f) + } + } + if len(uncovered) > 0 { + failed = true + fmt.Fprintln(errWriter, "ERROR: changed files with no docmap coverage:") + for _, f := range uncovered { + fmt.Fprintf(errWriter, " %s\n", f) + } + } + + // --- Check 2: Stale docs --- + repoRoot := filepath.Clean(*repoRootFlag) + staleDocs := checkStaleDocs(cfg, repoRoot) + if len(staleDocs) > 0 { + failed = true + fmt.Fprintln(errWriter, "ERROR: stale docmap docs: entries (paths do not exist):") + for _, d := range staleDocs { + fmt.Fprintf(errWriter, " %s\n", d) + } + } + + if failed { + return 1 + } + + fmt.Fprintln(outWriter, "OK: docmap is valid") + return 0 +} + +// checkStaleDocs returns deduplicated docs: entries that do not exist under repoRoot. +func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string { + seen := make(map[string]struct{}) + var stale []string + + for _, mapping := range cfg.Mappings { + for _, docPath := range mapping.Docs { + if docPath == "" { + continue + } + if _, ok := seen[docPath]; ok { + continue + } + seen[docPath] = struct{}{} + + fullPath := filepath.Join(repoRoot, filepath.FromSlash(docPath)) + if _, err := os.Stat(fullPath); err != nil { + stale = append(stale, docPath) + } + } + } + return stale +} + +// readLines reads all non-empty trimmed lines from r. +func readLines(r io.Reader) ([]string, error) { + scanner := bufio.NewScanner(r) + var lines []string + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line != "" { + lines = append(lines, line) + } + } + return lines, scanner.Err() +} diff --git a/cmd/review-bot/validatedocmap_test.go b/cmd/review-bot/validatedocmap_test.go new file mode 100644 index 0000000..b19ec4f --- /dev/null +++ b/cmd/review-bot/validatedocmap_test.go @@ -0,0 +1,296 @@ +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +// makeDocmapYAML writes a YAML string to a temp file and returns its path. +func makeDocmapYAML(t *testing.T, content string) string { + t.Helper() + f, err := os.CreateTemp(t.TempDir(), "doc-map-*.yml") + if err != nil { + t.Fatalf("CreateTemp: %v", err) + } + defer f.Close() + if _, err := f.WriteString(content); err != nil { + t.Fatalf("WriteString: %v", err) + } + return f.Name() +} + +// makeDocFile creates a file (and any parent dirs) at the given path relative to dir. +func makeDocFile(t *testing.T, dir, rel string) { + t.Helper() + full := filepath.Join(dir, rel) + if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + if err := os.WriteFile(full, []byte("# doc\n"), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } +} + +// captureOutput redirects outWriter/errWriter to buffers for the duration of f. +func captureOutput(f func()) (stdout, stderr string) { + var outBuf, errBuf bytes.Buffer + origOut, origErr := outWriter, errWriter + outWriter = &outBuf + errWriter = &errBuf + defer func() { + outWriter = origOut + errWriter = origErr + }() + f() + return outBuf.String(), errBuf.String() +} + +func TestRunValidateDocmap_Clean(t *testing.T) { + dir := t.TempDir() + makeDocFile(t, dir, "docs/foo.md") + + docmap := makeDocmapYAML(t, ` +mappings: + - paths: + - "lib/foo/**" + docs: + - docs/foo.md +`) + + var code int + stdout, _ := captureOutput(func() { + code = runValidateDocmap([]string{ + "--docmap", docmap, + "--repo-root", dir, + }) + }) + // Provide stdin via the global os.Stdin would require process tricks. + // The implementation reads os.Stdin directly; for tests we need a different approach. + // See TestRunValidateDocmap_WithStdin_* below which use a helper that replaces os.Stdin. + // This case: empty stdin (no changed files) → clean. + if code != 0 { + t.Errorf("expected exit 0 for clean (empty stdin), got %d", code) + } + if !strings.Contains(stdout, "OK") { + t.Errorf("expected 'OK' in stdout, got %q", stdout) + } +} + +func TestRunValidateDocmap_MissingDocmapFlag(t *testing.T) { + var code int + _, stderr := captureOutput(func() { + code = runValidateDocmap([]string{}) + }) + if code != 2 { + t.Errorf("expected exit 2 for missing --docmap, got %d", code) + } + if !strings.Contains(stderr, "--docmap") { + t.Errorf("expected --docmap in stderr, got %q", stderr) + } +} + +func TestRunValidateDocmap_BadYAML(t *testing.T) { + docmap := makeDocmapYAML(t, "mappings: [{{invalid") + var code int + _, stderr := captureOutput(func() { + code = runValidateDocmap([]string{"--docmap", docmap}) + }) + if code != 2 { + t.Errorf("expected exit 2 for bad YAML, got %d", code) + } + if !strings.Contains(stderr, "failed to parse") { + t.Errorf("expected parse error in stderr, got %q", stderr) + } +} + +func TestRunValidateDocmap_StaleDocs(t *testing.T) { + dir := t.TempDir() + // docs/foo.md does NOT exist on disk. + + docmap := makeDocmapYAML(t, ` +mappings: + - paths: + - "lib/foo/**" + docs: + - docs/foo.md +`) + + var code int + _, stderr := captureOutput(func() { + code = runValidateDocmap([]string{ + "--docmap", docmap, + "--repo-root", dir, + }) + }) + if code != 1 { + t.Errorf("expected exit 1 for stale docs, got %d", code) + } + if !strings.Contains(stderr, "docs/foo.md") { + t.Errorf("expected stale path in stderr, got %q", stderr) + } + if !strings.Contains(stderr, "stale docmap") { + t.Errorf("expected 'stale docmap' in stderr, got %q", stderr) + } +} + +// stdinValidateDocmap runs runValidateDocmap with a synthetic stdin. +func stdinValidateDocmap(t *testing.T, stdinContent string, args []string) (code int, stdout, stderr string) { + t.Helper() + // Write stdin content to a temp file and redirect os.Stdin. + f, err := os.CreateTemp(t.TempDir(), "stdin-*") + if err != nil { + t.Fatalf("CreateTemp for stdin: %v", err) + } + defer f.Close() + if _, err := f.WriteString(stdinContent); err != nil { + t.Fatalf("WriteString for stdin: %v", err) + } + if _, err := f.Seek(0, 0); err != nil { + t.Fatalf("Seek for stdin: %v", err) + } + + origStdin := os.Stdin + os.Stdin = f + defer func() { os.Stdin = origStdin }() + + stdout, stderr = captureOutput(func() { + code = runValidateDocmap(args) + }) + return +} + +func TestRunValidateDocmap_UncoveredFile(t *testing.T) { + dir := t.TempDir() + makeDocFile(t, dir, "docs/foo.md") + + docmap := makeDocmapYAML(t, ` +mappings: + - paths: + - "lib/foo/**" + docs: + - docs/foo.md +`) + + code, _, stderr := stdinValidateDocmap(t, + "lib/bar/uncovered.ex\n", + []string{"--docmap", docmap, "--repo-root", dir}, + ) + if code != 1 { + t.Errorf("expected exit 1 for uncovered file, got %d", code) + } + if !strings.Contains(stderr, "lib/bar/uncovered.ex") { + t.Errorf("expected uncovered file in stderr, got %q", stderr) + } + if !strings.Contains(stderr, "no docmap coverage") { + t.Errorf("expected 'no docmap coverage' in stderr, got %q", stderr) + } +} + +func TestRunValidateDocmap_BothFailures(t *testing.T) { + dir := t.TempDir() + // docs/foo.md intentionally missing + + docmap := makeDocmapYAML(t, ` +mappings: + - paths: + - "lib/foo/**" + docs: + - docs/foo.md +`) + + code, _, stderr := stdinValidateDocmap(t, + "lib/bar/uncovered.ex\n", + []string{"--docmap", docmap, "--repo-root", dir}, + ) + if code != 1 { + t.Errorf("expected exit 1 for both failures, got %d", code) + } + if !strings.Contains(stderr, "no docmap coverage") { + t.Errorf("expected coverage error in stderr, got %q", stderr) + } + if !strings.Contains(stderr, "stale docmap") { + t.Errorf("expected stale-docs error in stderr, got %q", stderr) + } +} + +func TestRunValidateDocmap_EmptyStdin(t *testing.T) { + dir := t.TempDir() + makeDocFile(t, dir, "docs/foo.md") + + docmap := makeDocmapYAML(t, ` +mappings: + - paths: + - "lib/foo/**" + docs: + - docs/foo.md +`) + + code, stdout, _ := stdinValidateDocmap(t, + "", + []string{"--docmap", docmap, "--repo-root", dir}, + ) + if code != 0 { + t.Errorf("expected exit 0 for empty stdin, got %d", code) + } + if !strings.Contains(stdout, "OK") { + t.Errorf("expected 'OK' in stdout, got %q", stdout) + } +} + +func TestRunValidateDocmap_BlankLinesSkipped(t *testing.T) { + dir := t.TempDir() + makeDocFile(t, dir, "docs/foo.md") + + docmap := makeDocmapYAML(t, ` +mappings: + - paths: + - "lib/foo/**" + docs: + - docs/foo.md +`) + + // stdin with only blank lines → effectively empty, should be clean + code, stdout, _ := stdinValidateDocmap(t, + "\n \n\n", + []string{"--docmap", docmap, "--repo-root", dir}, + ) + if code != 0 { + t.Errorf("expected exit 0 for blank-only stdin, got %d", code) + } + if !strings.Contains(stdout, "OK") { + t.Errorf("expected 'OK' in stdout for blank-only stdin, got %q", stdout) + } +} + +func TestRunValidateDocmap_DuplicateDocsDeduped(t *testing.T) { + dir := t.TempDir() + // docs/shared.md intentionally missing — but it appears in TWO mappings. + // Should appear only once in stale list. + + docmap := makeDocmapYAML(t, ` +mappings: + - paths: + - "lib/foo/**" + docs: + - docs/shared.md + - paths: + - "lib/bar/**" + docs: + - docs/shared.md +`) + + code, _, stderr := stdinValidateDocmap(t, + "", + []string{"--docmap", docmap, "--repo-root", dir}, + ) + if code != 1 { + t.Errorf("expected exit 1 for stale doc, got %d", code) + } + count := strings.Count(stderr, "docs/shared.md") + if count != 1 { + t.Errorf("expected docs/shared.md to appear exactly once in stderr (deduplicated), got %d occurrences: %q", count, stderr) + } +} From 7cdba1418147913e9360240547bfd8331b9ee07d Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 15 May 2026 04:48:32 +0000 Subject: [PATCH 03/11] docs(#141): add validate-docmap subcommand to README Documents the new validate-docmap subcommand under a new '## Subcommands' section, alongside the existing validate-url documentation. --- README.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/README.md b/README.md index 65e492d..7113d98 100644 --- a/README.md +++ b/README.md @@ -296,6 +296,40 @@ review-bot \ --conventions-file CONVENTIONS.md ``` +## Subcommands + +### `validate-docmap` + +Verifies that a `doc-map.yml` is consistent before running a review. Two checks: + +1. **Coverage**: every changed file is matched by at least one `paths:` glob. +2. **Stale docs**: every `docs:` entry exists on disk under `--repo-root`. + +```bash +# Typical CI usage — pipe git diff into the command +git diff --name-only origin/main HEAD | \ + review-bot validate-docmap \ + --docmap .review-bot/doc-map.yml \ + --repo-root . +``` + +| Flag | Required | Default | Description | +|------|----------|---------|-------------| +| `--docmap` | Yes | — | Path to doc-map YAML file | +| `--repo-root` | No | `.` (cwd) | Root for resolving `docs:` paths | + +Exit codes: `0`=clean, `1`=failures found, `2`=usage/parse error. + +### `validate-url` + +Resolves a URL and verifies all IPs are publicly routable (used in CI to prevent SSRF). + +```bash +review-bot validate-url https://gitea.example.com +``` + +Exit codes: `0`=safe, `1`=blocked/private IP, `2`=error. + ## Environment Variables All flags have environment variable equivalents: From 2ecbd86e24fb25dda62283d072189f7c66338fe6 Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 15 May 2026 04:50:21 +0000 Subject: [PATCH 04/11] =?UTF-8?q?fix(#141):=20use=20stdinValidateDocmap=20?= =?UTF-8?q?in=20Clean=20test=20=E2=80=94=20avoid=20real=20os.Stdin=20depen?= =?UTF-8?q?dency?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TestRunValidateDocmap_Clean was reading real os.Stdin (fragile in CI). Switch to stdinValidateDocmap with a covered file and empty-stdin test already covered by TestRunValidateDocmap_EmptyStdin. --- cmd/review-bot/validatedocmap_test.go | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/cmd/review-bot/validatedocmap_test.go b/cmd/review-bot/validatedocmap_test.go index b19ec4f..55da6f7 100644 --- a/cmd/review-bot/validatedocmap_test.go +++ b/cmd/review-bot/validatedocmap_test.go @@ -60,19 +60,13 @@ mappings: - docs/foo.md `) - var code int - stdout, _ := captureOutput(func() { - code = runValidateDocmap([]string{ - "--docmap", docmap, - "--repo-root", dir, - }) - }) - // Provide stdin via the global os.Stdin would require process tricks. - // The implementation reads os.Stdin directly; for tests we need a different approach. - // See TestRunValidateDocmap_WithStdin_* below which use a helper that replaces os.Stdin. - // This case: empty stdin (no changed files) → clean. + // A covered file with all docs existing → clean. + code, stdout, _ := stdinValidateDocmap(t, + "lib/foo/bar.ex\n", + []string{"--docmap", docmap, "--repo-root", dir}, + ) if code != 0 { - t.Errorf("expected exit 0 for clean (empty stdin), got %d", code) + t.Errorf("expected exit 0 for clean, got %d", code) } if !strings.Contains(stdout, "OK") { t.Errorf("expected 'OK' in stdout, got %q", stdout) From 3f8da76b42447852692b1f4f0603b4a831c770df Mon Sep 17 00:00:00 2001 From: Rodin Date: Thu, 14 May 2026 23:43:24 -0700 Subject: [PATCH 05/11] fix(#141): harden checkStaleDocs against path traversal Export review.ValidateDocPath and use it in checkStaleDocs before calling os.Stat. Add filepath.Clean + filepath.Rel confinement check as defense-in-depth to ensure doc paths from PR-controlled YAML cannot probe filesystem locations outside repoRoot. Also add tests covering: ../../etc/passwd, /etc/passwd, ../outside, a valid present path, and a valid missing path. Addresses security finding from security-review-bot on PR #142. --- cmd/review-bot/validatedocmap.go | 31 ++++++++++++++++-- cmd/review-bot/validatedocmap_test.go | 47 +++++++++++++++++++++++++++ review/docmap.go | 6 ++-- review/docmap_test.go | 6 ++-- 4 files changed, 82 insertions(+), 8 deletions(-) diff --git a/cmd/review-bot/validatedocmap.go b/cmd/review-bot/validatedocmap.go index 41fac7b..0545138 100644 --- a/cmd/review-bot/validatedocmap.go +++ b/cmd/review-bot/validatedocmap.go @@ -83,6 +83,8 @@ func runValidateDocmap(args []string) int { } // --- Check 2: Stale docs --- + // checkStaleDocs validates each path before touching the filesystem; see + // its documentation for the path-traversal hardening applied. repoRoot := filepath.Clean(*repoRootFlag) staleDocs := checkStaleDocs(cfg, repoRoot) if len(staleDocs) > 0 { @@ -101,7 +103,14 @@ func runValidateDocmap(args []string) int { return 0 } -// checkStaleDocs returns deduplicated docs: entries that do not exist under repoRoot. +// checkStaleDocs returns deduplicated docs: entries that do not exist under +// repoRoot. +// +// Path-traversal hardening: each docPath is validated with +// review.ValidateDocPath (rejects absolute paths and ".." segments) and then +// confined to repoRoot via filepath.Clean + filepath.Rel before os.Stat is +// called. Paths that fail either check are treated as invalid (reported as +// stale) without touching the host filesystem. func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string { seen := make(map[string]struct{}) var stale []string @@ -116,7 +125,25 @@ func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string { } seen[docPath] = struct{}{} - fullPath := filepath.Join(repoRoot, filepath.FromSlash(docPath)) + // Guard 1: reject absolute paths and ".." segments sourced from + // PR-controlled YAML before joining with repoRoot. + if err := review.ValidateDocPath(docPath); err != nil { + stale = append(stale, docPath) + continue + } + + // Guard 2: verify the cleaned joined path does not escape repoRoot. + // filepath.Clean resolves any remaining ".." after the join; the + // filepath.Rel check confirms the path is still under repoRoot. + fullPath := filepath.Clean(filepath.Join(repoRoot, filepath.FromSlash(docPath))) + rel, err := filepath.Rel(repoRoot, fullPath) + if err != nil || strings.HasPrefix(rel, "..") { + stale = append(stale, docPath) + continue + } + + // Safe to stat: path is relative, contains no "..", and is + // confined within repoRoot. if _, err := os.Stat(fullPath); err != nil { stale = append(stale, docPath) } diff --git a/cmd/review-bot/validatedocmap_test.go b/cmd/review-bot/validatedocmap_test.go index 55da6f7..b9a0a45 100644 --- a/cmd/review-bot/validatedocmap_test.go +++ b/cmd/review-bot/validatedocmap_test.go @@ -288,3 +288,50 @@ mappings: t.Errorf("expected docs/shared.md to appear exactly once in stderr (deduplicated), got %d occurrences: %q", count, stderr) } } + +// TestCheckStaleDocs_PathTraversal verifies that checkStaleDocs rejects +// traversal and absolute paths without touching the host filesystem. +func TestCheckStaleDocs_PathTraversal(t *testing.T) { + dir := t.TempDir() + + // Baseline: a valid doc that exists. + makeDocFile(t, dir, "docs/valid.md") + + tests := []struct { + name string + docPath string + wantStale bool + }{ + {"dot-dot traversal", "../../etc/passwd", true}, + {"dot-dot single", "../outside", true}, + {"absolute path", "/etc/passwd", true}, + {"valid present path", "docs/valid.md", false}, + {"valid missing path", "docs/missing.md", true}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + docmap := makeDocmapYAML(t, ` +mappings: + - paths: + - "lib/**" + docs: + - `+tc.docPath+` +`) + code, _, stderr := stdinValidateDocmap(t, + "", + []string{"--docmap", docmap, "--repo-root", dir}, + ) + + if tc.wantStale { + if code != 1 { + t.Errorf("path %q: expected exit 1 (stale/invalid), got %d; stderr: %q", tc.docPath, code, stderr) + } + } else { + if code != 0 { + t.Errorf("path %q: expected exit 0 (valid), got %d; stderr: %q", tc.docPath, code, stderr) + } + } + }) + } +} diff --git a/review/docmap.go b/review/docmap.go index 7317936..c629167 100644 --- a/review/docmap.go +++ b/review/docmap.go @@ -257,7 +257,7 @@ type docEntry struct { // If the path is a directory, all .md files under it are returned. // If it's a file, a single entry is returned. func loadDocEntries(ctx context.Context, fetcher DocFetcher, owner, repo, docPath string) ([]docEntry, error) { - if err := validateDocPath(docPath); err != nil { + if err := ValidateDocPath(docPath); err != nil { return nil, fmt.Errorf("doc path %q rejected: %w", docPath, err) } @@ -310,11 +310,11 @@ func readFileBytes(path string) ([]byte, error) { return os.ReadFile(path) } -// validateDocPath rejects doc paths that could cause path traversal via the +// ValidateDocPath rejects doc paths that could cause path traversal via the // VCS API (absolute paths, any ".." segment). Defense-in-depth: the VCS API // should already scope paths to the repo, but we validate locally to avoid // any quirk in backend path handling. -func validateDocPath(p string) error { +func ValidateDocPath(p string) error { if filepath.IsAbs(p) { return fmt.Errorf("absolute paths not allowed") } diff --git a/review/docmap_test.go b/review/docmap_test.go index 5e42be7..0ae6778 100644 --- a/review/docmap_test.go +++ b/review/docmap_test.go @@ -11,7 +11,7 @@ import ( // fakeDocFetcher is a mock DocFetcher for tests. type fakeDocFetcher struct { - files map[string]string // path -> content + files map[string]string // path -> content dirs map[string]map[string]string // dir path -> (file path -> content) } @@ -384,7 +384,7 @@ func TestValidateDocPath(t *testing.T) { "a/b/c", } for _, p := range valid { - if err := validateDocPath(p); err != nil { + if err := ValidateDocPath(p); err != nil { t.Errorf("expected valid path %q to pass, got error: %v", p, err) } } @@ -397,7 +397,7 @@ func TestValidateDocPath(t *testing.T) { "a/b/../c", } for _, p := range invalid { - if err := validateDocPath(p); err == nil { + if err := ValidateDocPath(p); err == nil { t.Errorf("expected path %q to be rejected, but it was accepted", p) } } From b22de19aa10ce33e2cf9ddf8df6fa78d53a7e9ae Mon Sep 17 00:00:00 2001 From: Rodin Date: Thu, 14 May 2026 23:50:12 -0700 Subject: [PATCH 06/11] fix(#141): address security-review-bot REQUEST_CHANGES findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Finding #1 [MAJOR]: replace os.Stat with os.Lstat in checkStaleDocs to prevent symlink traversal. Symlinks under repoRoot could probe arbitrary host file existence; Lstat never follows them. Symlinked docs are now treated as stale. Finding #2 [MINOR]: resolve --repo-root with filepath.Abs + filepath.EvalSymlinks before passing to checkStaleDocs, so a symlinked repo-root cannot bypass the filepath.Rel escape guard. Finding #3 [NIT]: reject backslashes in ValidateDocPath to prevent Windows platform edge cases where a path separator may be normalised differently by the host OS or VCS backend. Tests added: - TestCheckStaleDocs_SymlinkOutside: symlink inside repo → outside - TestCheckStaleDocs_SymlinkInsideRepo: intra-repo symlink also rejected - TestRunValidateDocmap_SymlinkRepoRoot: symlinked --repo-root resolves OK - TestValidateDocPath_Backslash: backslash paths rejected - Backslash cases added to TestValidateDocPath invalid slice All go test ./... pass, go vet ./... clean. --- cmd/review-bot/validatedocmap.go | 36 +++++++-- cmd/review-bot/validatedocmap_test.go | 101 ++++++++++++++++++++++++++ review/docmap.go | 10 ++- review/docmap_test.go | 25 +++++++ 4 files changed, 161 insertions(+), 11 deletions(-) diff --git a/cmd/review-bot/validatedocmap.go b/cmd/review-bot/validatedocmap.go index 0545138..00e8cc1 100644 --- a/cmd/review-bot/validatedocmap.go +++ b/cmd/review-bot/validatedocmap.go @@ -83,10 +83,22 @@ func runValidateDocmap(args []string) int { } // --- Check 2: Stale docs --- + // Resolve repoRoot to an absolute, symlink-free path before any Rel checks + // so that a symlinked --repo-root cannot be used to bypass the escape + // guard in checkStaleDocs. + absRoot, err := filepath.Abs(*repoRootFlag) + if err != nil { + fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err) + return 2 + } + resolvedRoot, err := filepath.EvalSymlinks(absRoot) + if err != nil { + fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err) + return 2 + } // checkStaleDocs validates each path before touching the filesystem; see // its documentation for the path-traversal hardening applied. - repoRoot := filepath.Clean(*repoRootFlag) - staleDocs := checkStaleDocs(cfg, repoRoot) + staleDocs := checkStaleDocs(cfg, resolvedRoot) if len(staleDocs) > 0 { failed = true fmt.Fprintln(errWriter, "ERROR: stale docmap docs: entries (paths do not exist):") @@ -108,9 +120,11 @@ func runValidateDocmap(args []string) int { // // Path-traversal hardening: each docPath is validated with // review.ValidateDocPath (rejects absolute paths and ".." segments) and then -// confined to repoRoot via filepath.Clean + filepath.Rel before os.Stat is -// called. Paths that fail either check are treated as invalid (reported as -// stale) without touching the host filesystem. +// confined to repoRoot via filepath.Clean + filepath.Rel before os.Lstat is +// called. Symlinks are treated as stale — a CI tool running against +// PR-controlled content must not follow symlinks that could probe arbitrary +// host paths. Paths that fail any check are treated as invalid (reported as +// stale) without following any symlinks. func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string { seen := make(map[string]struct{}) var stale []string @@ -142,9 +156,15 @@ func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string { continue } - // Safe to stat: path is relative, contains no "..", and is - // confined within repoRoot. - if _, err := os.Stat(fullPath); err != nil { + // Use Lstat (not Stat) so symlinks are never followed. A symlink + // under repoRoot could point anywhere on the host, allowing a + // malicious PR to probe file existence. Treat symlinks as stale. + fi, err := os.Lstat(fullPath) + if err != nil { + stale = append(stale, docPath) + continue + } + if fi.Mode()&os.ModeSymlink != 0 { stale = append(stale, docPath) } } diff --git a/cmd/review-bot/validatedocmap_test.go b/cmd/review-bot/validatedocmap_test.go index b9a0a45..b2aa704 100644 --- a/cmd/review-bot/validatedocmap_test.go +++ b/cmd/review-bot/validatedocmap_test.go @@ -335,3 +335,104 @@ mappings: }) } } + +// TestCheckStaleDocs_SymlinkOutside verifies that a symlink under repoRoot +// pointing outside the repo is treated as stale (not followed). +func TestCheckStaleDocs_SymlinkOutside(t *testing.T) { + dir := t.TempDir() + + // Create a symlink inside repoRoot pointing to a file outside the repo. + // We point at /etc/hostname (exists on Linux CI) but the test does not + // depend on that file existing — Lstat must reject the symlink itself. + linkPath := filepath.Join(dir, "docs", "secret.md") + if err := os.MkdirAll(filepath.Dir(linkPath), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + if err := os.Symlink("/etc/hostname", linkPath); err != nil { + t.Fatalf("Symlink: %v", err) + } + + docmap := makeDocmapYAML(t, ` +mappings: + - paths: + - "lib/**" + docs: + - docs/secret.md +`) + + code, _, stderr := stdinValidateDocmap(t, + "", + []string{"--docmap", docmap, "--repo-root", dir}, + ) + if code != 1 { + t.Errorf("expected exit 1 for symlink doc, got %d; stderr: %q", code, stderr) + } + if !strings.Contains(stderr, "docs/secret.md") { + t.Errorf("expected stale path in stderr, got %q", stderr) + } +} + +// TestCheckStaleDocs_SymlinkInsideRepo verifies that a symlink pointing to +// another file *within* the repo is also treated as stale. We refuse all +// symlinks regardless of target to keep the check simple and safe. +func TestCheckStaleDocs_SymlinkInsideRepo(t *testing.T) { + dir := t.TempDir() + + // Real doc file. + makeDocFile(t, dir, "docs/real.md") + + // Symlink inside repo pointing at the real file. + linkPath := filepath.Join(dir, "docs", "link.md") + if err := os.Symlink(filepath.Join(dir, "docs", "real.md"), linkPath); err != nil { + t.Fatalf("Symlink: %v", err) + } + + docmap := makeDocmapYAML(t, ` +mappings: + - paths: + - "lib/**" + docs: + - docs/link.md +`) + + code, _, stderr := stdinValidateDocmap(t, + "", + []string{"--docmap", docmap, "--repo-root", dir}, + ) + if code != 1 { + t.Errorf("expected exit 1 for symlink doc (even intra-repo), got %d; stderr: %q", code, stderr) + } +} + +// TestRunValidateDocmap_SymlinkRepoRoot verifies that a --repo-root that is +// itself a symlink to a valid directory resolves correctly (Finding #2). +func TestRunValidateDocmap_SymlinkRepoRoot(t *testing.T) { + realDir := t.TempDir() + makeDocFile(t, realDir, "docs/foo.md") + + // Create a symlink pointing at realDir. + symlinkDir := filepath.Join(t.TempDir(), "link-root") + if err := os.Symlink(realDir, symlinkDir); err != nil { + t.Fatalf("Symlink: %v", err) + } + + docmap := makeDocmapYAML(t, ` +mappings: + - paths: + - "lib/**" + docs: + - docs/foo.md +`) + + // Using the symlinked repo-root: the real doc exists → should be clean. + code, stdout, stderr := stdinValidateDocmap(t, + "lib/foo.go\n", + []string{"--docmap", docmap, "--repo-root", symlinkDir}, + ) + if code != 0 { + t.Errorf("expected exit 0 for symlinked repo-root with existing doc, got %d; stderr: %q", code, stderr) + } + if !strings.Contains(stdout, "OK") { + t.Errorf("expected 'OK' in stdout, got %q", stdout) + } +} diff --git a/review/docmap.go b/review/docmap.go index c629167..761dabf 100644 --- a/review/docmap.go +++ b/review/docmap.go @@ -311,10 +311,14 @@ func readFileBytes(path string) ([]byte, error) { } // ValidateDocPath rejects doc paths that could cause path traversal via the -// VCS API (absolute paths, any ".." segment). Defense-in-depth: the VCS API -// should already scope paths to the repo, but we validate locally to avoid -// any quirk in backend path handling. +// VCS API (absolute paths, any ".." segment, backslashes). Defense-in-depth: +// the VCS API should already scope paths to the repo, but we validate locally +// to avoid any quirk in backend path handling. Backslashes are rejected +// explicitly to prevent Windows platform edge cases. func ValidateDocPath(p string) error { + if strings.Contains(p, "\\") { + return fmt.Errorf("backslashes not allowed in doc paths") + } if filepath.IsAbs(p) { return fmt.Errorf("absolute paths not allowed") } diff --git a/review/docmap_test.go b/review/docmap_test.go index 0ae6778..2674d15 100644 --- a/review/docmap_test.go +++ b/review/docmap_test.go @@ -395,6 +395,10 @@ func TestValidateDocPath(t *testing.T) { "docs/../../../etc/passwd", "../sibling-repo/file.md", "a/b/../c", + // Backslashes must be rejected (Finding #3 — Windows platform edge cases). + `docs\foo.md`, + `docs\..\secret`, + `\absolute`, } for _, p := range invalid { if err := ValidateDocPath(p); err == nil { @@ -420,6 +424,27 @@ func TestLoadMatchingDocs_PathTraversalRejected(t *testing.T) { } } +// TestValidateDocPath_Backslash verifies that backslash-bearing paths are +// rejected to prevent Windows platform edge cases where a path separator +// could be normalised differently by the host OS or VCS backend. +func TestValidateDocPath_Backslash(t *testing.T) { + backslashPaths := []string{ + `docs\foo.md`, + `docs\subdir\file.md`, + `\absolute`, + } + for _, p := range backslashPaths { + if err := ValidateDocPath(p); err == nil { + t.Errorf("expected backslash path %q to be rejected, but it was accepted", p) + } + } + + // Sanity: forward-slash path must still be accepted. + if err := ValidateDocPath("docs/foo.md"); err != nil { + t.Errorf("expected forward-slash path to be accepted, got: %v", err) + } +} + // ============================================================ // Helpers // ============================================================ From 24247a855021ccccdc8ec56e6ff4475cd7cfff25 Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 15 May 2026 07:03:40 +0000 Subject: [PATCH 07/11] =?UTF-8?q?chore(#141):=20update=20dev-loop=20status?= =?UTF-8?q?=20=E2=80=94=20ready=20for=20PR=20submission?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TODO.md | 65 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/TODO.md b/TODO.md index 9e8f5b3..977fc7d 100644 --- a/TODO.md +++ b/TODO.md @@ -1,42 +1,47 @@ -## Dev Loop Status: 2026-05-15 04:35 UTC +## Dev Loop Status: 2026-05-15 07:02 UTC -**Repository:** review-bot (rodin/review-bot on Gitea) -**Status:** ✅ OPTIMAL +**Issue:** #141 — validate-docmap subcommand +**Branch:** issue-141 +**Status:** ✅ READY FOR PR SUBMISSION -### Health Check +### Implementation Complete -- **Working tree:** clean -- **Branch:** main (up to date with origin) -- **Build:** ✅ passes (`go build ./cmd/review-bot`) -- **Tests:** ✅ ALL PASS (6/6 packages) -- **Vet:** ✅ clean -- **Open issues:** 0 -- **Open PRs:** 0 +- ✅ `cmd/review-bot/validatedocmap.go` — main subcommand implementation (139 lines) +- ✅ `cmd/review-bot/validatedocmap_test.go` — 9 test cases (all passing) +- ✅ `review/docmap.go` — exported `FileCoveredByDocMap` helper +- ✅ `cmd/review-bot/main.go` — integrated `validate-docmap` subcommand +- ✅ `README.md` — documented subcommand with examples and flags -### Recent Changes +### Test Results -- ✅ Merged PR #140: test coverage improvements (44.6% → 49.3%) -- ✅ PR #137 (doc-map feature) merged successfully with all reviews approved +``` +TestRunValidateDocmap_Clean ✅ +TestRunValidateDocmap_MissingDocmapFlag ✅ +TestRunValidateDocmap_BadYAML ✅ +TestRunValidateDocmap_StaleDocs ✅ +TestRunValidateDocmap_UncoveredFile ✅ +TestRunValidateDocmap_BothFailures ✅ +TestRunValidateDocmap_EmptyStdin ✅ +TestRunValidateDocmap_BlankLinesSkipped ✅ +TestRunValidateDocmap_DuplicateDocsDeduped ✅ +``` -### Coverage +### Pre-Push Checks -| Package | Coverage | -|---------|----------| -| cmd/review-bot | 49.3% ↑ | -| gitea | 85.2% | -| github | 86.3% | -| review | 92.0% | +- ✅ All packages pass: `go test ./...` (6/6) +- ✅ `go vet ./...` — clean +- ✅ `go build ./cmd/review-bot` — clean +- ✅ `scripts/check-deps.sh` — pass -### Status Summary +### Commits -All recent feature work (doc-map, coverage improvements) successfully integrated. Repository is stable and ready for next development cycle. +1. feat(#141): add validate-docmap subcommand +2. docs(#141): add validate-docmap subcommand to README +3. fix(#141): use stdinValidateDocmap in Clean test — avoid real os.Stdin dependency -### Next Priority +### Next Steps -- Continue increasing cmd/review-bot coverage (target: ≥60%) -- Monitor prod logs for edge cases -- VCS integration stable; GitHub + Gitea paths clear +- [ ] Create PR via gitea-rodin token (title: "feat(#141): add validate-docmap subcommand") +- [ ] Labels: none yet (AI reviewers will assign) +- [ ] Link issue #141 ---- - -_Dev-loop cycle complete at 04:35 UTC._ From 5c6758e9906e068dd84a5cce2b8927c66418a6a9 Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 15 May 2026 00:24:28 -0700 Subject: [PATCH 08/11] =?UTF-8?q?fix(#141):=20address=20review=20feedback?= =?UTF-8?q?=20=E2=80=94=20tighten=20escape=20check,=20improve=20error=20me?= =?UTF-8?q?ssages,=20add=20comments?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmd/review-bot/validatedocmap.go | 15 +++++++++++++-- cmd/review-bot/validatedocmap_test.go | 6 ++++++ review/docmap.go | 10 +++++----- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/cmd/review-bot/validatedocmap.go b/cmd/review-bot/validatedocmap.go index 00e8cc1..69b2782 100644 --- a/cmd/review-bot/validatedocmap.go +++ b/cmd/review-bot/validatedocmap.go @@ -68,8 +68,15 @@ func runValidateDocmap(args []string) int { failed := false // --- Check 1: Coverage --- + // Note: an empty docmap (no mappings) means every changed file is + // uncovered — there are no patterns to match against. This is intentional: + // if you declare a doc-map, every changed file must be accounted for. + // On empty stdin the check is vacuously true (no files to cover). var uncovered []string for _, f := range changedFiles { + // Normalize Windows-style backslashes to forward slashes so that + // changed-file paths from git on Windows match doc-map globs. + f = strings.ReplaceAll(f, "\\", "/") if !review.FileCoveredByDocMap(cfg, f) { uncovered = append(uncovered, f) } @@ -93,7 +100,11 @@ func runValidateDocmap(args []string) int { } resolvedRoot, err := filepath.EvalSymlinks(absRoot) if err != nil { - fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err) + if os.IsNotExist(err) { + fmt.Fprintf(errWriter, "Error: --repo-root %q does not exist\n", *repoRootFlag) + } else { + fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err) + } return 2 } // checkStaleDocs validates each path before touching the filesystem; see @@ -151,7 +162,7 @@ func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string { // filepath.Rel check confirms the path is still under repoRoot. fullPath := filepath.Clean(filepath.Join(repoRoot, filepath.FromSlash(docPath))) rel, err := filepath.Rel(repoRoot, fullPath) - if err != nil || strings.HasPrefix(rel, "..") { + if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) { stale = append(stale, docPath) continue } diff --git a/cmd/review-bot/validatedocmap_test.go b/cmd/review-bot/validatedocmap_test.go index b2aa704..08b9f1d 100644 --- a/cmd/review-bot/validatedocmap_test.go +++ b/cmd/review-bot/validatedocmap_test.go @@ -131,6 +131,12 @@ mappings: } // stdinValidateDocmap runs runValidateDocmap with a synthetic stdin. +// +// Implementation note: we write stdinContent to a temp file and point +// os.Stdin at it. The defer f.Close() fires after stdinValidateDocmap +// returns, which is after runValidateDocmap has finished reading stdin +// synchronously — so the file is not closed while still in use. +// Tests must not call t.Parallel() while sharing the global os.Stdin. func stdinValidateDocmap(t *testing.T, stdinContent string, args []string) (code int, stdout, stderr string) { t.Helper() // Write stdin content to a temp file and redirect os.Stdin. diff --git a/review/docmap.go b/review/docmap.go index 761dabf..4381329 100644 --- a/review/docmap.go +++ b/review/docmap.go @@ -310,11 +310,11 @@ func readFileBytes(path string) ([]byte, error) { return os.ReadFile(path) } -// ValidateDocPath rejects doc paths that could cause path traversal via the -// VCS API (absolute paths, any ".." segment, backslashes). Defense-in-depth: -// the VCS API should already scope paths to the repo, but we validate locally -// to avoid any quirk in backend path handling. Backslashes are rejected -// explicitly to prevent Windows platform edge cases. +// ValidateDocPath rejects doc paths that could cause path traversal +// (absolute paths, any ".." segment, backslashes). Defense-in-depth: callers +// must also confine the joined path to the repo root via filepath.Rel before +// any filesystem access. Backslashes are rejected explicitly to prevent +// Windows platform edge cases. func ValidateDocPath(p string) error { if strings.Contains(p, "\\") { return fmt.Errorf("backslashes not allowed in doc paths") From 83a18354743998bd80a88b669d6ae0887281dae9 Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 15 May 2026 00:24:32 -0700 Subject: [PATCH 09/11] =?UTF-8?q?chore(#141):=20remove=20TODO.md=20?= =?UTF-8?q?=E2=80=94=20dev-loop=20artifact,=20not=20project=20documentatio?= =?UTF-8?q?n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TODO.md | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) delete mode 100644 TODO.md diff --git a/TODO.md b/TODO.md deleted file mode 100644 index 977fc7d..0000000 --- a/TODO.md +++ /dev/null @@ -1,47 +0,0 @@ -## Dev Loop Status: 2026-05-15 07:02 UTC - -**Issue:** #141 — validate-docmap subcommand -**Branch:** issue-141 -**Status:** ✅ READY FOR PR SUBMISSION - -### Implementation Complete - -- ✅ `cmd/review-bot/validatedocmap.go` — main subcommand implementation (139 lines) -- ✅ `cmd/review-bot/validatedocmap_test.go` — 9 test cases (all passing) -- ✅ `review/docmap.go` — exported `FileCoveredByDocMap` helper -- ✅ `cmd/review-bot/main.go` — integrated `validate-docmap` subcommand -- ✅ `README.md` — documented subcommand with examples and flags - -### Test Results - -``` -TestRunValidateDocmap_Clean ✅ -TestRunValidateDocmap_MissingDocmapFlag ✅ -TestRunValidateDocmap_BadYAML ✅ -TestRunValidateDocmap_StaleDocs ✅ -TestRunValidateDocmap_UncoveredFile ✅ -TestRunValidateDocmap_BothFailures ✅ -TestRunValidateDocmap_EmptyStdin ✅ -TestRunValidateDocmap_BlankLinesSkipped ✅ -TestRunValidateDocmap_DuplicateDocsDeduped ✅ -``` - -### Pre-Push Checks - -- ✅ All packages pass: `go test ./...` (6/6) -- ✅ `go vet ./...` — clean -- ✅ `go build ./cmd/review-bot` — clean -- ✅ `scripts/check-deps.sh` — pass - -### Commits - -1. feat(#141): add validate-docmap subcommand -2. docs(#141): add validate-docmap subcommand to README -3. fix(#141): use stdinValidateDocmap in Clean test — avoid real os.Stdin dependency - -### Next Steps - -- [ ] Create PR via gitea-rodin token (title: "feat(#141): add validate-docmap subcommand") -- [ ] Labels: none yet (AI reviewers will assign) -- [ ] Link issue #141 - From 7d7a49e9676da4bce2de9865ccabbc9a8ad347e8 Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 15 May 2026 07:33:49 +0000 Subject: [PATCH 10/11] =?UTF-8?q?fix(#141):=20harden=20docmap=20file=20pat?= =?UTF-8?q?h=20=E2=80=94=20confine=20to=20repo-root,=20reject=20symlinks,?= =?UTF-8?q?=20cap=20size?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address security-review-bot REQUEST_CHANGES findings on PR #142: MAJOR (Finding #1): Docmap file path was read directly without validating it is within --repo-root or checking for symlinks. A malicious PR could create .review-bot/doc-map.yml as a symlink to /dev/zero (resource exhaustion) or an arbitrary host file (information disclosure). Fix: Add validateDocmapPath() called before ParseDocMapConfig(). It: - Resolves --repo-root first (filepath.Abs + EvalSymlinks), moved up before docmap parsing so both checks share the same resolved root - Uses os.Lstat to detect symlinks and rejects them outright - Confirms the docmap path is within resolvedRoot via filepath.Rel - Checks file size against maxDocmapBytes (10 MB) before reading MINOR (Finding #2): No upper bound on docmap YAML size. Fix: os.Lstat size check enforces maxDocmapBytes cap before os.ReadFile. Tests: - TestValidateDocmapPath_Symlink: docmap is a symlink → exit 2 - TestValidateDocmapPath_OutsideRepoRoot: docmap outside repo-root → exit 2 - TestValidateDocmapPath_SizeLimit: docmap exceeds 10 MB cap → exit 2 - Updated all existing tests to use makeDocmapInDir() so the docmap lives inside the repo-root, satisfying the new confinement check --- cmd/review-bot/validatedocmap.go | 101 +++++++++++++++---- cmd/review-bot/validatedocmap_test.go | 136 +++++++++++++++++++++++--- 2 files changed, 205 insertions(+), 32 deletions(-) diff --git a/cmd/review-bot/validatedocmap.go b/cmd/review-bot/validatedocmap.go index 69b2782..2283c1f 100644 --- a/cmd/review-bot/validatedocmap.go +++ b/cmd/review-bot/validatedocmap.go @@ -12,7 +12,59 @@ import ( "gitea.weiker.me/rodin/review-bot/review" ) -// runValidateDocmap implements the `review-bot validate-docmap` subcommand. +// maxDocmapBytes is the maximum size of the doc-map YAML file that will be +// read. Files larger than this are rejected before reading to prevent memory +// exhaustion from an oversized PR-controlled file. +const maxDocmapBytes int64 = 10 * 1024 * 1024 // 10 MB + +// validateDocmapPath checks that localPath is safe to read as the doc-map +// file. It enforces three invariants before the file is opened: +// +// 1. The path resolves to a regular file within resolvedRoot (path +// confinement): prevents a PR-controlled --docmap from reading arbitrary +// host files via absolute paths or ".." traversal. +// 2. The path is not a symlink: prevents denial-of-service via /dev/zero or +// information disclosure via symlinks that point outside the workspace. +// 3. The file does not exceed maxDocmapBytes: prevents memory exhaustion +// from an oversized but legitimately committed doc-map file. +// +// resolvedRoot must already be an absolute, symlink-free path (obtained from +// filepath.Abs + filepath.EvalSymlinks). +func validateDocmapPath(localPath, resolvedRoot string) error { + // Resolve the docmap path to an absolute path. + absPath, err := filepath.Abs(localPath) + if err != nil { + return fmt.Errorf("cannot resolve path: %w", err) + } + + // Lstat: do NOT follow symlinks. We want to inspect the entry itself. + fi, err := os.Lstat(absPath) + if err != nil { + return fmt.Errorf("cannot stat file: %w", err) + } + + // Reject symlinks outright — a symlink can point to /dev/zero or arbitrary + // host paths, bypassing both the confinement check and the size check. + if fi.Mode()&os.ModeSymlink != 0 { + return fmt.Errorf("symlinks are not allowed") + } + + // Confine to resolvedRoot: the cleaned absolute path must be a descendant + // of the repo root. This catches paths that escaped via "..", absolute + // paths that happen to be outside the root, etc. + rel, err := filepath.Rel(resolvedRoot, absPath) + if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) { + return fmt.Errorf("path must be within --repo-root") + } + + // Enforce size cap before reading to prevent memory exhaustion. + if fi.Size() > maxDocmapBytes { + return fmt.Errorf("file size %d bytes exceeds %d-byte limit", fi.Size(), maxDocmapBytes) + } + + return nil +} + // // It reads changed file paths from stdin (one per line, as produced by // `git diff --name-only`), parses a doc-map YAML file, and performs two checks: @@ -51,6 +103,36 @@ func runValidateDocmap(args []string) int { return 2 } + // Resolve repoRoot first — the docmap path is validated against it below. + // Use an absolute, symlink-free path so a symlinked --repo-root cannot + // bypass the escape guard in validateDocmapPath or checkStaleDocs. + absRoot, err := filepath.Abs(*repoRootFlag) + if err != nil { + fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err) + return 2 + } + resolvedRoot, err := filepath.EvalSymlinks(absRoot) + if err != nil { + if os.IsNotExist(err) { + fmt.Fprintf(errWriter, "Error: --repo-root %q does not exist\n", *repoRootFlag) + } else { + fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err) + } + return 2 + } + + // Harden the docmap file path before reading it. The --docmap flag value + // may reference a PR-controlled file (e.g. .review-bot/doc-map.yml). + // Validate that it: + // 1. Resolves within resolvedRoot (prevent reading arbitrary host files). + // 2. Is not a symlink (prevent /dev/zero or symlink-based host probing). + // 3. Does not exceed maxDocmapBytes (prevent memory exhaustion from an + // oversized committed file). + if err := validateDocmapPath(*docmapFlag, resolvedRoot); err != nil { + fmt.Fprintf(errWriter, "Error: --docmap %q is invalid: %v\n", *docmapFlag, err) + return 2 + } + // Parse docmap YAML. cfg, err := review.ParseDocMapConfig(*docmapFlag) if err != nil { @@ -90,23 +172,6 @@ func runValidateDocmap(args []string) int { } // --- Check 2: Stale docs --- - // Resolve repoRoot to an absolute, symlink-free path before any Rel checks - // so that a symlinked --repo-root cannot be used to bypass the escape - // guard in checkStaleDocs. - absRoot, err := filepath.Abs(*repoRootFlag) - if err != nil { - fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err) - return 2 - } - resolvedRoot, err := filepath.EvalSymlinks(absRoot) - if err != nil { - if os.IsNotExist(err) { - fmt.Fprintf(errWriter, "Error: --repo-root %q does not exist\n", *repoRootFlag) - } else { - fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err) - } - return 2 - } // checkStaleDocs validates each path before touching the filesystem; see // its documentation for the path-traversal hardening applied. staleDocs := checkStaleDocs(cfg, resolvedRoot) diff --git a/cmd/review-bot/validatedocmap_test.go b/cmd/review-bot/validatedocmap_test.go index 08b9f1d..edd131f 100644 --- a/cmd/review-bot/validatedocmap_test.go +++ b/cmd/review-bot/validatedocmap_test.go @@ -9,6 +9,8 @@ import ( ) // makeDocmapYAML writes a YAML string to a temp file and returns its path. +// The file is created in t.TempDir() — use makeDocmapInDir when the docmap +// must be located inside a specific repo-root directory. func makeDocmapYAML(t *testing.T, content string) string { t.Helper() f, err := os.CreateTemp(t.TempDir(), "doc-map-*.yml") @@ -22,6 +24,21 @@ func makeDocmapYAML(t *testing.T, content string) string { return f.Name() } +// makeDocmapInDir writes a YAML string to a file inside dir and returns the +// file path. Use this instead of makeDocmapYAML when also passing --repo-root, +// because validateDocmapPath requires the docmap to be within the repo root. +func makeDocmapInDir(t *testing.T, dir, content string) string { + t.Helper() + if err := os.MkdirAll(filepath.Join(dir, ".review-bot"), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + path := filepath.Join(dir, ".review-bot", "doc-map.yml") + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + return path +} + // makeDocFile creates a file (and any parent dirs) at the given path relative to dir. func makeDocFile(t *testing.T, dir, rel string) { t.Helper() @@ -52,7 +69,7 @@ func TestRunValidateDocmap_Clean(t *testing.T) { dir := t.TempDir() makeDocFile(t, dir, "docs/foo.md") - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/foo/**" @@ -87,10 +104,11 @@ func TestRunValidateDocmap_MissingDocmapFlag(t *testing.T) { } func TestRunValidateDocmap_BadYAML(t *testing.T) { - docmap := makeDocmapYAML(t, "mappings: [{{invalid") + dir := t.TempDir() + docmap := makeDocmapInDir(t, dir, "mappings: [{{invalid") var code int _, stderr := captureOutput(func() { - code = runValidateDocmap([]string{"--docmap", docmap}) + code = runValidateDocmap([]string{"--docmap", docmap, "--repo-root", dir}) }) if code != 2 { t.Errorf("expected exit 2 for bad YAML, got %d", code) @@ -104,7 +122,7 @@ func TestRunValidateDocmap_StaleDocs(t *testing.T) { dir := t.TempDir() // docs/foo.md does NOT exist on disk. - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/foo/**" @@ -166,7 +184,7 @@ func TestRunValidateDocmap_UncoveredFile(t *testing.T) { dir := t.TempDir() makeDocFile(t, dir, "docs/foo.md") - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/foo/**" @@ -193,7 +211,7 @@ func TestRunValidateDocmap_BothFailures(t *testing.T) { dir := t.TempDir() // docs/foo.md intentionally missing - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/foo/**" @@ -220,7 +238,7 @@ func TestRunValidateDocmap_EmptyStdin(t *testing.T) { dir := t.TempDir() makeDocFile(t, dir, "docs/foo.md") - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/foo/**" @@ -244,7 +262,7 @@ func TestRunValidateDocmap_BlankLinesSkipped(t *testing.T) { dir := t.TempDir() makeDocFile(t, dir, "docs/foo.md") - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/foo/**" @@ -270,7 +288,7 @@ func TestRunValidateDocmap_DuplicateDocsDeduped(t *testing.T) { // docs/shared.md intentionally missing — but it appears in TWO mappings. // Should appear only once in stale list. - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/foo/**" @@ -317,7 +335,7 @@ func TestCheckStaleDocs_PathTraversal(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/**" @@ -358,7 +376,7 @@ func TestCheckStaleDocs_SymlinkOutside(t *testing.T) { t.Fatalf("Symlink: %v", err) } - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/**" @@ -393,7 +411,7 @@ func TestCheckStaleDocs_SymlinkInsideRepo(t *testing.T) { t.Fatalf("Symlink: %v", err) } - docmap := makeDocmapYAML(t, ` + docmap := makeDocmapInDir(t, dir, ` mappings: - paths: - "lib/**" @@ -411,7 +429,7 @@ mappings: } // TestRunValidateDocmap_SymlinkRepoRoot verifies that a --repo-root that is -// itself a symlink to a valid directory resolves correctly (Finding #2). +// itself a symlink to a valid directory resolves correctly. func TestRunValidateDocmap_SymlinkRepoRoot(t *testing.T) { realDir := t.TempDir() makeDocFile(t, realDir, "docs/foo.md") @@ -422,7 +440,10 @@ func TestRunValidateDocmap_SymlinkRepoRoot(t *testing.T) { t.Fatalf("Symlink: %v", err) } - docmap := makeDocmapYAML(t, ` + // Place the docmap inside realDir so it passes the confinement check. + // (symlinkDir resolves to realDir, so files inside realDir are also inside + // the resolved repo-root.) + docmap := makeDocmapInDir(t, realDir, ` mappings: - paths: - "lib/**" @@ -442,3 +463,90 @@ mappings: t.Errorf("expected 'OK' in stdout, got %q", stdout) } } + +// TestValidateDocmapPath_Symlink verifies that --docmap pointing at a symlink +// is rejected before the file is read (prevents /dev/zero DOS or arbitrary +// host-file reads via PR-controlled symlinks). +func TestValidateDocmapPath_Symlink(t *testing.T) { + dir := t.TempDir() + + // Create a real docmap file to serve as the symlink target. + realDocmap := makeDocmapInDir(t, dir, ` +mappings: + - paths: + - "lib/**" + docs: + - docs/foo.md +`) + + // Create a symlink inside dir pointing to the real docmap. + symlinkPath := filepath.Join(dir, ".review-bot", "doc-map-link.yml") + if err := os.Symlink(realDocmap, symlinkPath); err != nil { + t.Fatalf("Symlink: %v", err) + } + + code, _, stderr := stdinValidateDocmap(t, + "", + []string{"--docmap", symlinkPath, "--repo-root", dir}, + ) + if code != 2 { + t.Errorf("expected exit 2 for symlink docmap, got %d; stderr: %q", code, stderr) + } + if !strings.Contains(stderr, "symlink") && !strings.Contains(stderr, "invalid") { + t.Errorf("expected symlink rejection in stderr, got %q", stderr) + } +} + +// TestValidateDocmapPath_OutsideRepoRoot verifies that --docmap pointing +// outside --repo-root is rejected (prevents reading arbitrary host files). +func TestValidateDocmapPath_OutsideRepoRoot(t *testing.T) { + repoDir := t.TempDir() + + // Create a docmap in a separate temp dir (outside the repo root). + outside := makeDocmapYAML(t, ` +mappings: + - paths: + - "lib/**" + docs: + - docs/foo.md +`) + + code, _, stderr := stdinValidateDocmap(t, + "", + []string{"--docmap", outside, "--repo-root", repoDir}, + ) + if code != 2 { + t.Errorf("expected exit 2 for docmap outside repo-root, got %d; stderr: %q", code, stderr) + } + if !strings.Contains(stderr, "invalid") && !strings.Contains(stderr, "repo-root") { + t.Errorf("expected confinement rejection in stderr, got %q", stderr) + } +} + +// TestValidateDocmapPath_SizeLimit verifies that --docmap files exceeding +// maxDocmapBytes are rejected before reading (prevents memory exhaustion). +func TestValidateDocmapPath_SizeLimit(t *testing.T) { + dir := t.TempDir() + + // Write a file larger than maxDocmapBytes. + bigPath := filepath.Join(dir, ".review-bot", "big-doc-map.yml") + if err := os.MkdirAll(filepath.Dir(bigPath), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + // Exceed the limit by one byte. + bigContent := make([]byte, maxDocmapBytes+1) + if err := os.WriteFile(bigPath, bigContent, 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + code, _, stderr := stdinValidateDocmap(t, + "", + []string{"--docmap", bigPath, "--repo-root", dir}, + ) + if code != 2 { + t.Errorf("expected exit 2 for oversized docmap, got %d; stderr: %q", code, stderr) + } + if !strings.Contains(stderr, "limit") && !strings.Contains(stderr, "size") && !strings.Contains(stderr, "invalid") { + t.Errorf("expected size limit error in stderr, got %q", stderr) + } +} From af8b29fa5d9bec667c1b4f182edcd79c10dfe553 Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 15 May 2026 07:34:18 +0000 Subject: [PATCH 11/11] fix(#141): restore runValidateDocmap doc comment inadvertently truncated --- cmd/review-bot/validatedocmap.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/review-bot/validatedocmap.go b/cmd/review-bot/validatedocmap.go index 2283c1f..6b86b07 100644 --- a/cmd/review-bot/validatedocmap.go +++ b/cmd/review-bot/validatedocmap.go @@ -65,6 +65,7 @@ func validateDocmapPath(localPath, resolvedRoot string) error { return nil } +// runValidateDocmap implements the `review-bot validate-docmap` subcommand. // // It reads changed file paths from stdin (one per line, as produced by // `git diff --name-only`), parses a doc-map YAML file, and performs two checks: