feat(#141): validate-docmap subcommand — CI hard-fail on missing docmap coverage #142

Merged
rodin merged 11 commits from issue-141 into main 2026-05-15 07:39:22 +00:00
3 changed files with 437 additions and 0 deletions
Showing only changes of commit 69da5df254 - Show all commits
+2
View File
@@ -64,6 +64,8 @@ func main() {
switch os.Args[1] {
case "validate-url":
os.Exit(runValidateURL(os.Args[2:]))
case "validate-docmap":
os.Exit(runValidateDocmap(os.Args[2:]))
}
}
+139
View File
@@ -0,0 +1,139 @@
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"gitea.weiker.me/rodin/review-bot/review"
)
Review

[NIT] The maxDocmapBytes constant is typed as int64 solely to compare against fi.Size() (which returns int64). Consider using an untyped constant or keeping it typed but adding a brief comment explaining why int64 was chosen, since the type is not otherwise obvious from context.

**[NIT]** The `maxDocmapBytes` constant is typed as `int64` solely to compare against `fi.Size()` (which returns `int64`). Consider using an untyped constant or keeping it typed but adding a brief comment explaining why `int64` was chosen, since the type is not otherwise obvious from context.
// runValidateDocmap implements the `review-bot validate-docmap` subcommand.
//
// It reads changed file paths from stdin (one per line, as produced by
// `git diff --name-only`), parses a doc-map YAML file, and performs two checks:
//
// 1. Coverage check: every changed file must be matched by at least one
// paths: glob in the docmap. Fails if any file is uncovered.
//
// 2. Stale-docs check: every docs: entry in the docmap must exist on disk
// (relative to --repo-root). Fails if any path is missing.
//
// Both checks always run — all failures are reported before exiting.
//
// Exit codes:
//
// 0 — clean (all files covered, all docs exist)
// 1 — one or more coverage or stale-doc failures
// 2 — usage error, missing flag, or YAML parse error
func runValidateDocmap(args []string) int {
Review

[MINOR] validateDocmapPath does not verify that the docmap input is a regular file. While symlinks are rejected and the path is confined to the repo root, allowing non-regular files (e.g., FIFOs, sockets, device nodes) could lead to denial-of-service (e.g., blocking reads) if such a special file exists in the workspace. Consider checking fi.Mode().IsRegular() and rejecting otherwise.

**[MINOR]** validateDocmapPath does not verify that the docmap input is a regular file. While symlinks are rejected and the path is confined to the repo root, allowing non-regular files (e.g., FIFOs, sockets, device nodes) could lead to denial-of-service (e.g., blocking reads) if such a special file exists in the workspace. Consider checking fi.Mode().IsRegular() and rejecting otherwise.
fs := flag.NewFlagSet("validate-docmap", flag.ContinueOnError)
fs.SetOutput(errWriter)
docmapFlag := fs.String("docmap", "", "Path to doc-map YAML file (required)")
repoRootFlag := fs.String("repo-root", ".", "Repo root for resolving docs: paths (default: cwd)")
if err := fs.Parse(args); err != nil {
// flag.ContinueOnError already wrote the error to errWriter.
Review

[MINOR] validateDocmapPath performs os.Lstat on the provided --docmap path before verifying it is confined within --repo-root. While content is not read and the flag is typically controlled by CI, reordering the confinement check before any filesystem access would avoid even a metadata probe on arbitrary host paths if the path were ever user-controlled.

**[MINOR]** validateDocmapPath performs os.Lstat on the provided --docmap path before verifying it is confined within --repo-root. While content is not read and the flag is typically controlled by CI, reordering the confinement check before any filesystem access would avoid even a metadata probe on arbitrary host paths if the path were ever user-controlled.
return 2
}
if *docmapFlag == "" {
fmt.Fprintln(errWriter, "Error: --docmap is required")
fmt.Fprintln(errWriter, "")
fmt.Fprintln(errWriter, "usage: review-bot validate-docmap --docmap <path> [--repo-root <dir>]")
Review

[MINOR] validateDocmapPath rejects symlinks and enforces a size cap but does not verify that the target is a regular file (fi.Mode().IsRegular()). Adding this check would defensively prevent interacting with special files (FIFOs/devices) in non-standard environments and align with the comment that it should be a "regular file."

**[MINOR]** validateDocmapPath rejects symlinks and enforces a size cap but does not verify that the target is a regular file (fi.Mode().IsRegular()). Adding this check would defensively prevent interacting with special files (FIFOs/devices) in non-standard environments and align with the comment that it should be a "regular file."
fmt.Fprintln(errWriter, " Changed files are read from stdin, one per line.")
fmt.Fprintln(errWriter, " Example: git diff --name-only origin/main HEAD | review-bot validate-docmap --docmap .review-bot/doc-map.yml")
return 2
}
// Parse docmap YAML.
cfg, err := review.ParseDocMapConfig(*docmapFlag)
if err != nil {
fmt.Fprintf(errWriter, "Error: failed to parse docmap %q: %v\n", *docmapFlag, err)
return 2
}
Review

[MINOR] validateDocmapPath does not explicitly ensure the target is a regular file (fi.Mode().IsRegular()). A directory will be rejected later by ParseDocMapConfig via os.ReadFile, but an early explicit check would yield a clearer error and avoid attempting to read directories.

**[MINOR]** validateDocmapPath does not explicitly ensure the target is a regular file (fi.Mode().IsRegular()). A directory will be rejected later by ParseDocMapConfig via os.ReadFile, but an early explicit check would yield a clearer error and avoid attempting to read directories.
// Read changed files from stdin.
changedFiles, err := readLines(os.Stdin)
if err != nil {
fmt.Fprintf(errWriter, "Error: failed to read stdin: %v\n", err)
return 2
}
failed := false
Review

[NIT] The validateDocmapPath function documents that resolvedRoot must be an absolute, symlink-free path, but this is only enforced by convention (the caller must pass the right value). An internal assertion or a brief check if !filepath.IsAbs(resolvedRoot) at the top of the function would make the contract self-enforcing. Low priority since the caller in runValidateDocmap does this correctly.

**[NIT]** The `validateDocmapPath` function documents that `resolvedRoot` must be an absolute, symlink-free path, but this is only enforced by convention (the caller must pass the right value). An internal assertion or a brief check `if !filepath.IsAbs(resolvedRoot)` at the top of the function would make the contract self-enforcing. Low priority since the caller in `runValidateDocmap` does this correctly.
// --- Check 1: Coverage ---
var uncovered []string
for _, f := range changedFiles {
if !review.FileCoveredByDocMap(cfg, f) {
uncovered = append(uncovered, f)
}
}
if len(uncovered) > 0 {
failed = true
fmt.Fprintln(errWriter, "ERROR: changed files with no docmap coverage:")
for _, f := range uncovered {
Review

[MINOR] The stale-docs check (checkStaleDocs) runs even when no changed files were provided on stdin (empty stdin case). This means a docmap with stale docs: entries will always fail, even on runs where no files changed — which is arguably the correct behavior, but worth a comment clarifying the intent. Currently the empty-stdin test passes because the test fixture has the doc file present, so it's fine, but the behavior asymmetry (coverage check is vacuously true on empty input, stale-docs check is not) is undocumented.

**[MINOR]** The stale-docs check (`checkStaleDocs`) runs even when no changed files were provided on stdin (empty stdin case). This means a docmap with stale `docs:` entries will always fail, even on runs where no files changed — which is arguably the correct behavior, but worth a comment clarifying the intent. Currently the empty-stdin test passes because the test fixture has the doc file present, so it's fine, but the behavior asymmetry (coverage check is vacuously true on empty input, stale-docs check is not) is undocumented.
fmt.Fprintf(errWriter, " %s\n", f)
}
}
// --- Check 2: Stale docs ---
repoRoot := filepath.Clean(*repoRootFlag)
staleDocs := checkStaleDocs(cfg, repoRoot)
if len(staleDocs) > 0 {
failed = true
fmt.Fprintln(errWriter, "ERROR: stale docmap docs: entries (paths do not exist):")
for _, d := range staleDocs {
fmt.Fprintf(errWriter, " %s\n", d)
Review

[MINOR] The stale-docs check runs even when --repo-root resolution fails, but only coverage failures are accumulated before the repo-root resolution block. If filepath.Abs or filepath.EvalSymlinks fails the function returns exit code 2 immediately — meaning a coverage failure that was already accumulated (and stored in failed = true) is silently swallowed, and the user sees exit 2 with only the repo-root error rather than exit 1 with both failures. Consider either: (a) emitting the accumulated coverage failures before returning 2, or (b) resolving repo-root before the coverage check so all errors are consistently collected.

**[MINOR]** The stale-docs check runs even when `--repo-root` resolution fails, but only coverage failures are accumulated before the repo-root resolution block. If `filepath.Abs` or `filepath.EvalSymlinks` fails the function returns exit code 2 immediately — meaning a coverage failure that was already accumulated (and stored in `failed = true`) is silently swallowed, and the user sees exit 2 with only the repo-root error rather than exit 1 with both failures. Consider either: (a) emitting the accumulated coverage failures before returning 2, or (b) resolving repo-root before the coverage check so all errors are consistently collected.
}
Review

[NIT] The usage message prints flag help manually (fmt.Fprintln chains) rather than calling fs.Usage(). This is inconsistent with how flag.FlagSet normally surfaces help and could get out of sync with the registered flags. Minor since this is a short flag set, but worth noting.

**[NIT]** The usage message prints flag help manually (fmt.Fprintln chains) rather than calling `fs.Usage()`. This is inconsistent with how flag.FlagSet normally surfaces help and could get out of sync with the registered flags. Minor since this is a short flag set, but worth noting.
}
if failed {
return 1
}
fmt.Fprintln(outWriter, "OK: docmap is valid")
return 0
}
// checkStaleDocs returns deduplicated docs: entries that do not exist under repoRoot.
func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
seen := make(map[string]struct{})
var stale []string
for _, mapping := range cfg.Mappings {
for _, docPath := range mapping.Docs {
if docPath == "" {
continue
}
if _, ok := seen[docPath]; ok {
continue
}
seen[docPath] = struct{}{}
fullPath := filepath.Join(repoRoot, filepath.FromSlash(docPath))
if _, err := os.Stat(fullPath); err != nil {
stale = append(stale, docPath)
}
}
}
return stale
}
// readLines reads all non-empty trimmed lines from r.
func readLines(r io.Reader) ([]string, error) {
scanner := bufio.NewScanner(r)
var lines []string
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line != "" {
lines = append(lines, line)
}
}
return lines, scanner.Err()
}
+296
View File
@@ -0,0 +1,296 @@
package main
import (
"bytes"
"os"
"path/filepath"
"strings"
"testing"
)
// makeDocmapYAML writes a YAML string to a temp file and returns its path.
func makeDocmapYAML(t *testing.T, content string) string {
t.Helper()
f, err := os.CreateTemp(t.TempDir(), "doc-map-*.yml")
if err != nil {
t.Fatalf("CreateTemp: %v", err)
}
defer f.Close()
if _, err := f.WriteString(content); err != nil {
t.Fatalf("WriteString: %v", err)
}
return f.Name()
}
// makeDocFile creates a file (and any parent dirs) at the given path relative to dir.
func makeDocFile(t *testing.T, dir, rel string) {
t.Helper()
full := filepath.Join(dir, rel)
if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
t.Fatalf("MkdirAll: %v", err)
}
if err := os.WriteFile(full, []byte("# doc\n"), 0o644); err != nil {
t.Fatalf("WriteFile: %v", err)
}
}
// captureOutput redirects outWriter/errWriter to buffers for the duration of f.
func captureOutput(f func()) (stdout, stderr string) {
var outBuf, errBuf bytes.Buffer
origOut, origErr := outWriter, errWriter
outWriter = &outBuf
errWriter = &errBuf
defer func() {
outWriter = origOut
Review

[NIT] The captureOutput helper mutates package-level outWriter/errWriter variables without any synchronisation. This is fine for sequential tests but would race if tests run in parallel (t.Parallel()). No parallel calls exist currently, but a comment noting this limitation would be defensive documentation.

**[NIT]** The `captureOutput` helper mutates package-level `outWriter`/`errWriter` variables without any synchronisation. This is fine for sequential tests but would race if tests run in parallel (`t.Parallel()`). No parallel calls exist currently, but a comment noting this limitation would be defensive documentation.
errWriter = origErr
}()
f()
return outBuf.String(), errBuf.String()
}
func TestRunValidateDocmap_Clean(t *testing.T) {
dir := t.TempDir()
makeDocFile(t, dir, "docs/foo.md")
docmap := makeDocmapYAML(t, `
mappings:
- paths:
- "lib/foo/**"
docs:
- docs/foo.md
`)
var code int
stdout, _ := captureOutput(func() {
code = runValidateDocmap([]string{
"--docmap", docmap,
"--repo-root", dir,
})
})
// Provide stdin via the global os.Stdin would require process tricks.
// The implementation reads os.Stdin directly; for tests we need a different approach.
// See TestRunValidateDocmap_WithStdin_* below which use a helper that replaces os.Stdin.
// This case: empty stdin (no changed files) → clean.
Review

[NIT] TestRunValidateDocmap_Clean has a long comment block explaining why it relies on process-level stdin instead of the stdinValidateDocmap helper, which is defined later in the same file. The test would be cleaner and more consistent if it simply used stdinValidateDocmap with an empty string — which is exactly what TestRunValidateDocmap_EmptyStdin does. These two tests appear to be testing the same case (empty stdin → clean). Consider removing TestRunValidateDocmap_Clean or consolidating it with TestRunValidateDocmap_EmptyStdin.

**[NIT]** TestRunValidateDocmap_Clean has a long comment block explaining why it relies on process-level stdin instead of the stdinValidateDocmap helper, which is defined later in the same file. The test would be cleaner and more consistent if it simply used stdinValidateDocmap with an empty string — which is exactly what TestRunValidateDocmap_EmptyStdin does. These two tests appear to be testing the same case (empty stdin → clean). Consider removing TestRunValidateDocmap_Clean or consolidating it with TestRunValidateDocmap_EmptyStdin.
if code != 0 {
t.Errorf("expected exit 0 for clean (empty stdin), got %d", code)
}
if !strings.Contains(stdout, "OK") {
t.Errorf("expected 'OK' in stdout, got %q", stdout)
}
}
func TestRunValidateDocmap_MissingDocmapFlag(t *testing.T) {
var code int
_, stderr := captureOutput(func() {
code = runValidateDocmap([]string{})
})
if code != 2 {
t.Errorf("expected exit 2 for missing --docmap, got %d", code)
}
if !strings.Contains(stderr, "--docmap") {
t.Errorf("expected --docmap in stderr, got %q", stderr)
}
}
func TestRunValidateDocmap_BadYAML(t *testing.T) {
docmap := makeDocmapYAML(t, "mappings: [{{invalid")
var code int
_, stderr := captureOutput(func() {
code = runValidateDocmap([]string{"--docmap", docmap})
})
if code != 2 {
t.Errorf("expected exit 2 for bad YAML, got %d", code)
}
if !strings.Contains(stderr, "failed to parse") {
t.Errorf("expected parse error in stderr, got %q", stderr)
}
}
func TestRunValidateDocmap_StaleDocs(t *testing.T) {
dir := t.TempDir()
// docs/foo.md does NOT exist on disk.
docmap := makeDocmapYAML(t, `
mappings:
- paths:
- "lib/foo/**"
docs:
- docs/foo.md
`)
var code int
_, stderr := captureOutput(func() {
code = runValidateDocmap([]string{
"--docmap", docmap,
"--repo-root", dir,
})
})
if code != 1 {
t.Errorf("expected exit 1 for stale docs, got %d", code)
}
if !strings.Contains(stderr, "docs/foo.md") {
t.Errorf("expected stale path in stderr, got %q", stderr)
}
if !strings.Contains(stderr, "stale docmap") {
t.Errorf("expected 'stale docmap' in stderr, got %q", stderr)
}
}
// stdinValidateDocmap runs runValidateDocmap with a synthetic stdin.
func stdinValidateDocmap(t *testing.T, stdinContent string, args []string) (code int, stdout, stderr string) {
t.Helper()
// Write stdin content to a temp file and redirect os.Stdin.
f, err := os.CreateTemp(t.TempDir(), "stdin-*")
if err != nil {
t.Fatalf("CreateTemp for stdin: %v", err)
}
defer f.Close()
Review

[NIT] stdinValidateDocmap defers f.Close() but the file is opened read-write and os.Stdin is set to f. The defer will close the file descriptor while it may still be in use by runValidateDocmap if Go's test runner parallelises subtests. In practice this is safe because the function runs synchronously and the defer fires after the return, but it is slightly fragile. The existing validateurl pattern likely has the same structure so this is not a regression, just a note.

**[NIT]** stdinValidateDocmap defers f.Close() but the file is opened read-write and os.Stdin is set to f. The defer will close the file descriptor while it may still be in use by runValidateDocmap if Go's test runner parallelises subtests. In practice this is safe because the function runs synchronously and the defer fires after the return, but it is slightly fragile. The existing validateurl pattern likely has the same structure so this is not a regression, just a note.
if _, err := f.WriteString(stdinContent); err != nil {
t.Fatalf("WriteString for stdin: %v", err)
}
if _, err := f.Seek(0, 0); err != nil {
t.Fatalf("Seek for stdin: %v", err)
}
origStdin := os.Stdin
os.Stdin = f
defer func() { os.Stdin = origStdin }()
Review

[NIT] stdinValidateDocmap leaks the temp file handle — it calls defer f.Close() but the file is passed to os.Stdin before the function returns. Because os.Stdin is restored in the deferred restore and runValidateDocmap reads synchronously, this works in practice, but a reader of the test may be confused. A comment explaining the ordering would help.

**[NIT]** stdinValidateDocmap leaks the temp file handle — it calls defer f.Close() but the file is passed to os.Stdin before the function returns. Because os.Stdin is restored in the deferred restore and runValidateDocmap reads synchronously, this works in practice, but a reader of the test may be confused. A comment explaining the ordering would help.
stdout, stderr = captureOutput(func() {
code = runValidateDocmap(args)
})
return
}
func TestRunValidateDocmap_UncoveredFile(t *testing.T) {
dir := t.TempDir()
makeDocFile(t, dir, "docs/foo.md")
Review

[NIT] The comment on stdinValidateDocmap says 'Tests must not call t.Parallel() while sharing the global os.Stdin' but there is no t.Skip or assertion to enforce this. Since os.Stdin mutation is inherently process-global, this is a latent footgun if someone later adds t.Parallel() to these tests. Consider adding a brief comment directing future maintainers to use TestMain to serialize tests instead, or document that this constraint is enforced by convention.

**[NIT]** The comment on `stdinValidateDocmap` says 'Tests must not call t.Parallel() while sharing the global os.Stdin' but there is no `t.Skip` or assertion to enforce this. Since os.Stdin mutation is inherently process-global, this is a latent footgun if someone later adds t.Parallel() to these tests. Consider adding a brief comment directing future maintainers to use `TestMain` to serialize tests instead, or document that this constraint is enforced by convention.
docmap := makeDocmapYAML(t, `
mappings:
- paths:
- "lib/foo/**"
docs:
- docs/foo.md
`)
code, _, stderr := stdinValidateDocmap(t,
Review

[MINOR] The comment on stdinValidateDocmap says "Tests must not call t.Parallel() while sharing the global os.Stdin" — this is a valid concern, but none of the tests call t.Parallel() anyway, so the guard is informational only. The real risk is if a future contributor adds t.Parallel() without reading this comment. Consider adding a // WARNING: prefix or an explicit // do not add t.Parallel() to callers note to make it more prominent.

**[MINOR]** The comment on `stdinValidateDocmap` says "Tests must not call t.Parallel() while sharing the global os.Stdin" — this is a valid concern, but none of the tests call `t.Parallel()` anyway, so the guard is informational only. The real risk is if a future contributor adds `t.Parallel()` without reading this comment. Consider adding a `// WARNING:` prefix or an explicit `// do not add t.Parallel() to callers` note to make it more prominent.
"lib/bar/uncovered.ex\n",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 1 {
t.Errorf("expected exit 1 for uncovered file, got %d", code)
}
if !strings.Contains(stderr, "lib/bar/uncovered.ex") {
t.Errorf("expected uncovered file in stderr, got %q", stderr)
}
if !strings.Contains(stderr, "no docmap coverage") {
t.Errorf("expected 'no docmap coverage' in stderr, got %q", stderr)
}
}
Review

[NIT] TestCheckStaleDocs_PathTraversal constructs the YAML by concatenating tc.docPath directly into the string literal. A path like /etc/passwd happens to be valid YAML, but paths containing YAML-special characters (e.g. :, {, #) could produce a parse error (exit 2) rather than the expected stale-doc error (exit 1). Adding a YAML quoting step or restricting the test vectors would make the test more robust.

**[NIT]** `TestCheckStaleDocs_PathTraversal` constructs the YAML by concatenating `tc.docPath` directly into the string literal. A path like `/etc/passwd` happens to be valid YAML, but paths containing YAML-special characters (e.g. `:`, `{`, `#`) could produce a parse error (exit 2) rather than the expected stale-doc error (exit 1). Adding a YAML quoting step or restricting the test vectors would make the test more robust.
func TestRunValidateDocmap_BothFailures(t *testing.T) {
dir := t.TempDir()
// docs/foo.md intentionally missing
docmap := makeDocmapYAML(t, `
mappings:
- paths:
- "lib/foo/**"
docs:
- docs/foo.md
`)
code, _, stderr := stdinValidateDocmap(t,
"lib/bar/uncovered.ex\n",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 1 {
t.Errorf("expected exit 1 for both failures, got %d", code)
}
if !strings.Contains(stderr, "no docmap coverage") {
t.Errorf("expected coverage error in stderr, got %q", stderr)
}
if !strings.Contains(stderr, "stale docmap") {
t.Errorf("expected stale-docs error in stderr, got %q", stderr)
}
}
func TestRunValidateDocmap_EmptyStdin(t *testing.T) {
dir := t.TempDir()
makeDocFile(t, dir, "docs/foo.md")
docmap := makeDocmapYAML(t, `
mappings:
- paths:
- "lib/foo/**"
docs:
- docs/foo.md
`)
code, stdout, _ := stdinValidateDocmap(t,
"",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 0 {
t.Errorf("expected exit 0 for empty stdin, got %d", code)
}
if !strings.Contains(stdout, "OK") {
t.Errorf("expected 'OK' in stdout, got %q", stdout)
}
}
func TestRunValidateDocmap_BlankLinesSkipped(t *testing.T) {
dir := t.TempDir()
makeDocFile(t, dir, "docs/foo.md")
docmap := makeDocmapYAML(t, `
mappings:
- paths:
- "lib/foo/**"
docs:
- docs/foo.md
`)
// stdin with only blank lines → effectively empty, should be clean
code, stdout, _ := stdinValidateDocmap(t,
"\n \n\n",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 0 {
t.Errorf("expected exit 0 for blank-only stdin, got %d", code)
}
if !strings.Contains(stdout, "OK") {
t.Errorf("expected 'OK' in stdout for blank-only stdin, got %q", stdout)
}
}
func TestRunValidateDocmap_DuplicateDocsDeduped(t *testing.T) {
dir := t.TempDir()
// docs/shared.md intentionally missing — but it appears in TWO mappings.
// Should appear only once in stale list.
docmap := makeDocmapYAML(t, `
mappings:
- paths:
- "lib/foo/**"
docs:
- docs/shared.md
- paths:
- "lib/bar/**"
docs:
- docs/shared.md
`)
code, _, stderr := stdinValidateDocmap(t,
"",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 1 {
t.Errorf("expected exit 1 for stale doc, got %d", code)
}
count := strings.Count(stderr, "docs/shared.md")
if count != 1 {
t.Errorf("expected docs/shared.md to appear exactly once in stderr (deduplicated), got %d occurrences: %q", count, stderr)
}
}