feat(#141): validate-docmap subcommand — CI hard-fail on missing docmap coverage #142

Merged
rodin merged 11 commits from issue-141 into main 2026-05-15 07:39:22 +00:00
4 changed files with 82 additions and 8 deletions
Showing only changes of commit 3f8da76b42 - Show all commits
+29 -2
View File
7
@@ -83,6 +83,8 @@ func runValidateDocmap(args []string) int {
} }
// --- Check 2: Stale docs --- // --- Check 2: Stale docs ---
// checkStaleDocs validates each path before touching the filesystem; see
// its documentation for the path-traversal hardening applied.
repoRoot := filepath.Clean(*repoRootFlag) repoRoot := filepath.Clean(*repoRootFlag)
staleDocs := checkStaleDocs(cfg, repoRoot) staleDocs := checkStaleDocs(cfg, repoRoot)
if len(staleDocs) > 0 { if len(staleDocs) > 0 {
2
@@ -101,7 +103,14 @@ func runValidateDocmap(args []string) int {
return 0 return 0
} }
// checkStaleDocs returns deduplicated docs: entries that do not exist under repoRoot. // checkStaleDocs returns deduplicated docs: entries that do not exist under
// repoRoot.
//
// Path-traversal hardening: each docPath is validated with
// review.ValidateDocPath (rejects absolute paths and ".." segments) and then
// confined to repoRoot via filepath.Clean + filepath.Rel before os.Stat is
// called. Paths that fail either check are treated as invalid (reported as
// stale) without touching the host filesystem.
func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string { func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
seen := make(map[string]struct{}) seen := make(map[string]struct{})
var stale []string var stale []string
@@ -116,7 +125,25 @@ func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
} }
seen[docPath] = struct{}{} seen[docPath] = struct{}{}
fullPath := filepath.Join(repoRoot, filepath.FromSlash(docPath)) // Guard 1: reject absolute paths and ".." segments sourced from
// PR-controlled YAML before joining with repoRoot.
if err := review.ValidateDocPath(docPath); err != nil {
stale = append(stale, docPath)
continue
}
// Guard 2: verify the cleaned joined path does not escape repoRoot.
// filepath.Clean resolves any remaining ".." after the join; the
// filepath.Rel check confirms the path is still under repoRoot.
fullPath := filepath.Clean(filepath.Join(repoRoot, filepath.FromSlash(docPath)))
rel, err := filepath.Rel(repoRoot, fullPath)
if err != nil || strings.HasPrefix(rel, "..") {
stale = append(stale, docPath)
continue
}
// Safe to stat: path is relative, contains no "..", and is
// confined within repoRoot.
if _, err := os.Stat(fullPath); err != nil { if _, err := os.Stat(fullPath); err != nil {
stale = append(stale, docPath) stale = append(stale, docPath)
} }
1
+47
View File
7
@@ -288,3 +288,50 @@ mappings:
t.Errorf("expected docs/shared.md to appear exactly once in stderr (deduplicated), got %d occurrences: %q", count, stderr) t.Errorf("expected docs/shared.md to appear exactly once in stderr (deduplicated), got %d occurrences: %q", count, stderr)
} }
} }
// TestCheckStaleDocs_PathTraversal verifies that checkStaleDocs rejects
// traversal and absolute paths without touching the host filesystem.
func TestCheckStaleDocs_PathTraversal(t *testing.T) {
dir := t.TempDir()
// Baseline: a valid doc that exists.
makeDocFile(t, dir, "docs/valid.md")
tests := []struct {
name string
docPath string
wantStale bool
}{
{"dot-dot traversal", "../../etc/passwd", true},
{"dot-dot single", "../outside", true},
{"absolute path", "/etc/passwd", true},
{"valid present path", "docs/valid.md", false},
{"valid missing path", "docs/missing.md", true},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
docmap := makeDocmapYAML(t, `
mappings:
- paths:
- "lib/**"
docs:
- `+tc.docPath+`
`)
code, _, stderr := stdinValidateDocmap(t,
"",
[]string{"--docmap", docmap, "--repo-root", dir},
Review

[NIT] In TestCheckStaleDocs_PathTraversal, the inner table test creates a docmap using string concatenation with the docPath directly embedded in YAML (- +tc.docPath+``). For paths like /etc/passwd and ../../etc/passwd, this creates valid YAML. However, for paths containing special YAML characters (:, #, {), this could produce invalid YAML that would fail at parse time rather than at the stale-docs check, making the test misleading. The current test cases are safe, but using fmt.Sprintf with YAML quoting or writing the file programmatically would be more robust.

**[NIT]** In `TestCheckStaleDocs_PathTraversal`, the inner table test creates a docmap using string concatenation with the docPath directly embedded in YAML (`- `+tc.docPath+``). For paths like `/etc/passwd` and `../../etc/passwd`, this creates valid YAML. However, for paths containing special YAML characters (`:`, `#`, `{`), this could produce invalid YAML that would fail at parse time rather than at the stale-docs check, making the test misleading. The current test cases are safe, but using `fmt.Sprintf` with YAML quoting or writing the file programmatically would be more robust.
)
if tc.wantStale {
if code != 1 {
t.Errorf("path %q: expected exit 1 (stale/invalid), got %d; stderr: %q", tc.docPath, code, stderr)
}
} else {
if code != 0 {
t.Errorf("path %q: expected exit 0 (valid), got %d; stderr: %q", tc.docPath, code, stderr)
}
}
})
}
}
+3 -3
View File
2
@@ -257,7 +257,7 @@ type docEntry struct {
// If the path is a directory, all .md files under it are returned. // If the path is a directory, all .md files under it are returned.
// If it's a file, a single entry is returned. // If it's a file, a single entry is returned.
func loadDocEntries(ctx context.Context, fetcher DocFetcher, owner, repo, docPath string) ([]docEntry, error) { func loadDocEntries(ctx context.Context, fetcher DocFetcher, owner, repo, docPath string) ([]docEntry, error) {
if err := validateDocPath(docPath); err != nil { if err := ValidateDocPath(docPath); err != nil {
return nil, fmt.Errorf("doc path %q rejected: %w", docPath, err) return nil, fmt.Errorf("doc path %q rejected: %w", docPath, err)
} }
@@ -310,11 +310,11 @@ func readFileBytes(path string) ([]byte, error) {
return os.ReadFile(path) return os.ReadFile(path)
Review

[NIT] The comment on ValidateDocPath says 'Backslashes are rejected explicitly to prevent Windows platform edge cases.' — the sentence could be clearer that the tool itself may run on Windows and that backslashes in YAML doc paths could be misinterpreted by filepath.Join on that platform. Minor documentation clarity.

**[NIT]** The comment on ValidateDocPath says 'Backslashes are rejected explicitly to prevent Windows platform edge cases.' — the sentence could be clearer that the tool itself may run on Windows and that backslashes in YAML doc paths could be misinterpreted by filepath.Join on that platform. Minor documentation clarity.
} }
// validateDocPath rejects doc paths that could cause path traversal via the // ValidateDocPath rejects doc paths that could cause path traversal via the
// VCS API (absolute paths, any ".." segment). Defense-in-depth: the VCS API // VCS API (absolute paths, any ".." segment). Defense-in-depth: the VCS API
// should already scope paths to the repo, but we validate locally to avoid // should already scope paths to the repo, but we validate locally to avoid
Review

[NIT] The comment on ValidateDocPath mentions "Finding #3" which appears to be a reference to an internal issue/finding number that won't be meaningful to future readers of this file. This should be reworded to be self-contained (e.g., remove the parenthetical or rephrase as 'to prevent OS-specific path separator normalization issues').

**[NIT]** The comment on `ValidateDocPath` mentions "Finding #3" which appears to be a reference to an internal issue/finding number that won't be meaningful to future readers of this file. This should be reworded to be self-contained (e.g., remove the parenthetical or rephrase as 'to prevent OS-specific path separator normalization issues').
// any quirk in backend path handling. // any quirk in backend path handling.
func validateDocPath(p string) error { func ValidateDocPath(p string) error {
if filepath.IsAbs(p) { if filepath.IsAbs(p) {
return fmt.Errorf("absolute paths not allowed") return fmt.Errorf("absolute paths not allowed")
Review

[NIT] The comment on ValidateDocPath says "Defense-in-depth: the VCS API should already scope paths to the repo" — this is accurate for the VCS-fetch path, but ValidateDocPath is now also used by the local-filesystem stale-docs check where the VCS API is not involved. The comment is still broadly correct but could be generalized: "Defense-in-depth: callers should also confine the joined path to the repo root via filepath.Rel before filesystem access."

**[NIT]** The comment on `ValidateDocPath` says "Defense-in-depth: the VCS API should already scope paths to the repo" — this is accurate for the VCS-fetch path, but `ValidateDocPath` is now also used by the local-filesystem stale-docs check where the VCS API is not involved. The comment is still broadly correct but could be generalized: "Defense-in-depth: callers should also confine the joined path to the repo root via filepath.Rel before filesystem access."
} }
1
+3 -3
View File
@@ -11,7 +11,7 @@ import (
// fakeDocFetcher is a mock DocFetcher for tests. // fakeDocFetcher is a mock DocFetcher for tests.
type fakeDocFetcher struct { type fakeDocFetcher struct {
files map[string]string // path -> content files map[string]string // path -> content
dirs map[string]map[string]string // dir path -> (file path -> content) dirs map[string]map[string]string // dir path -> (file path -> content)
} }
@@ -384,7 +384,7 @@ func TestValidateDocPath(t *testing.T) {
"a/b/c", "a/b/c",
} }
for _, p := range valid { for _, p := range valid {
if err := validateDocPath(p); err != nil { if err := ValidateDocPath(p); err != nil {
t.Errorf("expected valid path %q to pass, got error: %v", p, err) t.Errorf("expected valid path %q to pass, got error: %v", p, err)
} }
} }
@@ -397,7 +397,7 @@ func TestValidateDocPath(t *testing.T) {
"a/b/../c", "a/b/../c",
} }
for _, p := range invalid { for _, p := range invalid {
if err := validateDocPath(p); err == nil { if err := ValidateDocPath(p); err == nil {
t.Errorf("expected path %q to be rejected, but it was accepted", p) t.Errorf("expected path %q to be rejected, but it was accepted", p)
} }
} }