feat(#141): validate-docmap subcommand — CI hard-fail on missing docmap coverage #142

Merged
rodin merged 11 commits from issue-141 into main 2026-05-15 07:39:22 +00:00
4 changed files with 161 additions and 11 deletions
Showing only changes of commit b22de19aa1 - Show all commits
+28 -8
View File
7
@@ -83,10 +83,22 @@ func runValidateDocmap(args []string) int {
} }
// --- Check 2: Stale docs --- // --- Check 2: Stale docs ---
// Resolve repoRoot to an absolute, symlink-free path before any Rel checks
// so that a symlinked --repo-root cannot be used to bypass the escape
// guard in checkStaleDocs.
absRoot, err := filepath.Abs(*repoRootFlag)
if err != nil {
fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err)
return 2
Review

[MINOR] The stale-docs check runs even when --repo-root resolution fails, but only coverage failures are accumulated before the repo-root resolution block. If filepath.Abs or filepath.EvalSymlinks fails the function returns exit code 2 immediately — meaning a coverage failure that was already accumulated (and stored in failed = true) is silently swallowed, and the user sees exit 2 with only the repo-root error rather than exit 1 with both failures. Consider either: (a) emitting the accumulated coverage failures before returning 2, or (b) resolving repo-root before the coverage check so all errors are consistently collected.

**[MINOR]** The stale-docs check runs even when `--repo-root` resolution fails, but only coverage failures are accumulated before the repo-root resolution block. If `filepath.Abs` or `filepath.EvalSymlinks` fails the function returns exit code 2 immediately — meaning a coverage failure that was already accumulated (and stored in `failed = true`) is silently swallowed, and the user sees exit 2 with only the repo-root error rather than exit 1 with both failures. Consider either: (a) emitting the accumulated coverage failures before returning 2, or (b) resolving repo-root before the coverage check so all errors are consistently collected.
}
Review

[NIT] The usage message prints flag help manually (fmt.Fprintln chains) rather than calling fs.Usage(). This is inconsistent with how flag.FlagSet normally surfaces help and could get out of sync with the registered flags. Minor since this is a short flag set, but worth noting.

**[NIT]** The usage message prints flag help manually (fmt.Fprintln chains) rather than calling `fs.Usage()`. This is inconsistent with how flag.FlagSet normally surfaces help and could get out of sync with the registered flags. Minor since this is a short flag set, but worth noting.
resolvedRoot, err := filepath.EvalSymlinks(absRoot)
if err != nil {
fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err)
return 2
}
// checkStaleDocs validates each path before touching the filesystem; see // checkStaleDocs validates each path before touching the filesystem; see
// its documentation for the path-traversal hardening applied. // its documentation for the path-traversal hardening applied.
repoRoot := filepath.Clean(*repoRootFlag) staleDocs := checkStaleDocs(cfg, resolvedRoot)
staleDocs := checkStaleDocs(cfg, repoRoot)
if len(staleDocs) > 0 { if len(staleDocs) > 0 {
failed = true failed = true
fmt.Fprintln(errWriter, "ERROR: stale docmap docs: entries (paths do not exist):") fmt.Fprintln(errWriter, "ERROR: stale docmap docs: entries (paths do not exist):")
@@ -108,9 +120,11 @@ func runValidateDocmap(args []string) int {
// //
// Path-traversal hardening: each docPath is validated with // Path-traversal hardening: each docPath is validated with
// review.ValidateDocPath (rejects absolute paths and ".." segments) and then // review.ValidateDocPath (rejects absolute paths and ".." segments) and then
// confined to repoRoot via filepath.Clean + filepath.Rel before os.Stat is // confined to repoRoot via filepath.Clean + filepath.Rel before os.Lstat is
// called. Paths that fail either check are treated as invalid (reported as // called. Symlinks are treated as stale — a CI tool running against
// stale) without touching the host filesystem. // PR-controlled content must not follow symlinks that could probe arbitrary
// host paths. Paths that fail any check are treated as invalid (reported as
// stale) without following any symlinks.
func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string { func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
seen := make(map[string]struct{}) seen := make(map[string]struct{})
var stale []string var stale []string
1
@@ -142,9 +156,15 @@ func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
continue continue
} }
// Safe to stat: path is relative, contains no "..", and is // Use Lstat (not Stat) so symlinks are never followed. A symlink
// confined within repoRoot. // under repoRoot could point anywhere on the host, allowing a
if _, err := os.Stat(fullPath); err != nil { // malicious PR to probe file existence. Treat symlinks as stale.
fi, err := os.Lstat(fullPath)
if err != nil {
stale = append(stale, docPath)
continue
}
if fi.Mode()&os.ModeSymlink != 0 {
stale = append(stale, docPath) stale = append(stale, docPath)
} }
} }
+101
View File
8
@@ -335,3 +335,104 @@ mappings:
}) })
} }
} }
// TestCheckStaleDocs_SymlinkOutside verifies that a symlink under repoRoot
// pointing outside the repo is treated as stale (not followed).
func TestCheckStaleDocs_SymlinkOutside(t *testing.T) {
dir := t.TempDir()
// Create a symlink inside repoRoot pointing to a file outside the repo.
// We point at /etc/hostname (exists on Linux CI) but the test does not
// depend on that file existing — Lstat must reject the symlink itself.
linkPath := filepath.Join(dir, "docs", "secret.md")
if err := os.MkdirAll(filepath.Dir(linkPath), 0o755); err != nil {
t.Fatalf("MkdirAll: %v", err)
}
if err := os.Symlink("/etc/hostname", linkPath); err != nil {
t.Fatalf("Symlink: %v", err)
}
docmap := makeDocmapYAML(t, `
mappings:
- paths:
- "lib/**"
docs:
- docs/secret.md
`)
code, _, stderr := stdinValidateDocmap(t,
"",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 1 {
t.Errorf("expected exit 1 for symlink doc, got %d; stderr: %q", code, stderr)
}
if !strings.Contains(stderr, "docs/secret.md") {
t.Errorf("expected stale path in stderr, got %q", stderr)
}
}
// TestCheckStaleDocs_SymlinkInsideRepo verifies that a symlink pointing to
// another file *within* the repo is also treated as stale. We refuse all
// symlinks regardless of target to keep the check simple and safe.
func TestCheckStaleDocs_SymlinkInsideRepo(t *testing.T) {
dir := t.TempDir()
// Real doc file.
makeDocFile(t, dir, "docs/real.md")
// Symlink inside repo pointing at the real file.
linkPath := filepath.Join(dir, "docs", "link.md")
if err := os.Symlink(filepath.Join(dir, "docs", "real.md"), linkPath); err != nil {
t.Fatalf("Symlink: %v", err)
}
docmap := makeDocmapYAML(t, `
mappings:
- paths:
- "lib/**"
docs:
- docs/link.md
`)
code, _, stderr := stdinValidateDocmap(t,
"",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 1 {
t.Errorf("expected exit 1 for symlink doc (even intra-repo), got %d; stderr: %q", code, stderr)
}
}
// TestRunValidateDocmap_SymlinkRepoRoot verifies that a --repo-root that is
// itself a symlink to a valid directory resolves correctly (Finding #2).
func TestRunValidateDocmap_SymlinkRepoRoot(t *testing.T) {
realDir := t.TempDir()
makeDocFile(t, realDir, "docs/foo.md")
// Create a symlink pointing at realDir.
symlinkDir := filepath.Join(t.TempDir(), "link-root")
if err := os.Symlink(realDir, symlinkDir); err != nil {
t.Fatalf("Symlink: %v", err)
}
docmap := makeDocmapYAML(t, `
mappings:
- paths:
- "lib/**"
docs:
- docs/foo.md
`)
// Using the symlinked repo-root: the real doc exists → should be clean.
code, stdout, stderr := stdinValidateDocmap(t,
"lib/foo.go\n",
[]string{"--docmap", docmap, "--repo-root", symlinkDir},
)
if code != 0 {
t.Errorf("expected exit 0 for symlinked repo-root with existing doc, got %d; stderr: %q", code, stderr)
}
if !strings.Contains(stdout, "OK") {
t.Errorf("expected 'OK' in stdout, got %q", stdout)
}
}
+7 -3
View File
3
@@ -311,10 +311,14 @@ func readFileBytes(path string) ([]byte, error) {
} }
// ValidateDocPath rejects doc paths that could cause path traversal via the // ValidateDocPath rejects doc paths that could cause path traversal via the
// VCS API (absolute paths, any ".." segment). Defense-in-depth: the VCS API // VCS API (absolute paths, any ".." segment, backslashes). Defense-in-depth:
// should already scope paths to the repo, but we validate locally to avoid // the VCS API should already scope paths to the repo, but we validate locally
Review

[NIT] The comment on ValidateDocPath mentions "Finding #3" which appears to be a reference to an internal issue/finding number that won't be meaningful to future readers of this file. This should be reworded to be self-contained (e.g., remove the parenthetical or rephrase as 'to prevent OS-specific path separator normalization issues').

**[NIT]** The comment on `ValidateDocPath` mentions "Finding #3" which appears to be a reference to an internal issue/finding number that won't be meaningful to future readers of this file. This should be reworded to be self-contained (e.g., remove the parenthetical or rephrase as 'to prevent OS-specific path separator normalization issues').
// any quirk in backend path handling. // to avoid any quirk in backend path handling. Backslashes are rejected
// explicitly to prevent Windows platform edge cases.
func ValidateDocPath(p string) error { func ValidateDocPath(p string) error {
if strings.Contains(p, "\\") {
Review

[NIT] The comment on ValidateDocPath says "Defense-in-depth: the VCS API should already scope paths to the repo" — this is accurate for the VCS-fetch path, but ValidateDocPath is now also used by the local-filesystem stale-docs check where the VCS API is not involved. The comment is still broadly correct but could be generalized: "Defense-in-depth: callers should also confine the joined path to the repo root via filepath.Rel before filesystem access."

**[NIT]** The comment on `ValidateDocPath` says "Defense-in-depth: the VCS API should already scope paths to the repo" — this is accurate for the VCS-fetch path, but `ValidateDocPath` is now also used by the local-filesystem stale-docs check where the VCS API is not involved. The comment is still broadly correct but could be generalized: "Defense-in-depth: callers should also confine the joined path to the repo root via filepath.Rel before filesystem access."
return fmt.Errorf("backslashes not allowed in doc paths")
}
Review

[NIT] The comment on ValidateDocPath says 'Defense-in-depth: callers must also confine the joined path to the repo root via filepath.Rel before any filesystem access.' This is good documentation but the function's contract doesn't verify the caller satisfies that postcondition. This is fine (it's defense-in-depth by definition), just making sure the comment is understood as advisory rather than enforced.

**[NIT]** The comment on `ValidateDocPath` says 'Defense-in-depth: callers must also confine the joined path to the repo root via filepath.Rel before any filesystem access.' This is good documentation but the function's contract doesn't verify the caller satisfies that postcondition. This is fine (it's defense-in-depth by definition), just making sure the comment is understood as advisory rather than enforced.
if filepath.IsAbs(p) { if filepath.IsAbs(p) {
return fmt.Errorf("absolute paths not allowed") return fmt.Errorf("absolute paths not allowed")
} }
+25
View File
@@ -395,6 +395,10 @@ func TestValidateDocPath(t *testing.T) {
"docs/../../../etc/passwd", "docs/../../../etc/passwd",
"../sibling-repo/file.md", "../sibling-repo/file.md",
"a/b/../c", "a/b/../c",
// Backslashes must be rejected (Finding #3 — Windows platform edge cases).
`docs\foo.md`,
`docs\..\secret`,
`\absolute`,
} }
for _, p := range invalid { for _, p := range invalid {
if err := ValidateDocPath(p); err == nil { if err := ValidateDocPath(p); err == nil {
@@ -420,6 +424,27 @@ func TestLoadMatchingDocs_PathTraversalRejected(t *testing.T) {
} }
} }
// TestValidateDocPath_Backslash verifies that backslash-bearing paths are
// rejected to prevent Windows platform edge cases where a path separator
// could be normalised differently by the host OS or VCS backend.
func TestValidateDocPath_Backslash(t *testing.T) {
backslashPaths := []string{
`docs\foo.md`,
`docs\subdir\file.md`,
`\absolute`,
}
for _, p := range backslashPaths {
if err := ValidateDocPath(p); err == nil {
t.Errorf("expected backslash path %q to be rejected, but it was accepted", p)
}
}
// Sanity: forward-slash path must still be accepted.
if err := ValidateDocPath("docs/foo.md"); err != nil {
t.Errorf("expected forward-slash path to be accepted, got: %v", err)
}
}
// ============================================================ // ============================================================
// Helpers // Helpers
// ============================================================ // ============================================================