feat(#141): validate-docmap subcommand — CI hard-fail on missing docmap coverage #142
@@ -83,6 +83,8 @@ func runValidateDocmap(args []string) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// --- Check 2: Stale docs ---
|
// --- Check 2: Stale docs ---
|
||||||
|
// checkStaleDocs validates each path before touching the filesystem; see
|
||||||
|
// its documentation for the path-traversal hardening applied.
|
||||||
repoRoot := filepath.Clean(*repoRootFlag)
|
repoRoot := filepath.Clean(*repoRootFlag)
|
||||||
staleDocs := checkStaleDocs(cfg, repoRoot)
|
staleDocs := checkStaleDocs(cfg, repoRoot)
|
||||||
if len(staleDocs) > 0 {
|
if len(staleDocs) > 0 {
|
||||||
@@ -101,7 +103,14 @@ func runValidateDocmap(args []string) int {
|
|||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkStaleDocs returns deduplicated docs: entries that do not exist under repoRoot.
|
// checkStaleDocs returns deduplicated docs: entries that do not exist under
|
||||||
|
// repoRoot.
|
||||||
|
//
|
||||||
|
// Path-traversal hardening: each docPath is validated with
|
||||||
|
// review.ValidateDocPath (rejects absolute paths and ".." segments) and then
|
||||||
|
// confined to repoRoot via filepath.Clean + filepath.Rel before os.Stat is
|
||||||
|
// called. Paths that fail either check are treated as invalid (reported as
|
||||||
|
// stale) without touching the host filesystem.
|
||||||
func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
|
func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
|
||||||
seen := make(map[string]struct{})
|
seen := make(map[string]struct{})
|
||||||
var stale []string
|
var stale []string
|
||||||
@@ -116,7 +125,25 @@ func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
|
|||||||
}
|
}
|
||||||
seen[docPath] = struct{}{}
|
seen[docPath] = struct{}{}
|
||||||
|
|
||||||
fullPath := filepath.Join(repoRoot, filepath.FromSlash(docPath))
|
// Guard 1: reject absolute paths and ".." segments sourced from
|
||||||
|
// PR-controlled YAML before joining with repoRoot.
|
||||||
|
if err := review.ValidateDocPath(docPath); err != nil {
|
||||||
|
stale = append(stale, docPath)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Guard 2: verify the cleaned joined path does not escape repoRoot.
|
||||||
|
// filepath.Clean resolves any remaining ".." after the join; the
|
||||||
|
// filepath.Rel check confirms the path is still under repoRoot.
|
||||||
|
fullPath := filepath.Clean(filepath.Join(repoRoot, filepath.FromSlash(docPath)))
|
||||||
|
rel, err := filepath.Rel(repoRoot, fullPath)
|
||||||
|
if err != nil || strings.HasPrefix(rel, "..") {
|
||||||
|
stale = append(stale, docPath)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Safe to stat: path is relative, contains no "..", and is
|
||||||
|
// confined within repoRoot.
|
||||||
if _, err := os.Stat(fullPath); err != nil {
|
if _, err := os.Stat(fullPath); err != nil {
|
||||||
stale = append(stale, docPath)
|
stale = append(stale, docPath)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -288,3 +288,50 @@ mappings:
|
|||||||
t.Errorf("expected docs/shared.md to appear exactly once in stderr (deduplicated), got %d occurrences: %q", count, stderr)
|
t.Errorf("expected docs/shared.md to appear exactly once in stderr (deduplicated), got %d occurrences: %q", count, stderr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestCheckStaleDocs_PathTraversal verifies that checkStaleDocs rejects
|
||||||
|
// traversal and absolute paths without touching the host filesystem.
|
||||||
|
func TestCheckStaleDocs_PathTraversal(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Baseline: a valid doc that exists.
|
||||||
|
makeDocFile(t, dir, "docs/valid.md")
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
docPath string
|
||||||
|
wantStale bool
|
||||||
|
}{
|
||||||
|
{"dot-dot traversal", "../../etc/passwd", true},
|
||||||
|
{"dot-dot single", "../outside", true},
|
||||||
|
{"absolute path", "/etc/passwd", true},
|
||||||
|
{"valid present path", "docs/valid.md", false},
|
||||||
|
{"valid missing path", "docs/missing.md", true},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
docmap := makeDocmapYAML(t, `
|
||||||
|
mappings:
|
||||||
|
- paths:
|
||||||
|
- "lib/**"
|
||||||
|
docs:
|
||||||
|
- `+tc.docPath+`
|
||||||
|
`)
|
||||||
|
code, _, stderr := stdinValidateDocmap(t,
|
||||||
|
"",
|
||||||
|
[]string{"--docmap", docmap, "--repo-root", dir},
|
||||||
|
|
|||||||
|
)
|
||||||
|
|
||||||
|
if tc.wantStale {
|
||||||
|
if code != 1 {
|
||||||
|
t.Errorf("path %q: expected exit 1 (stale/invalid), got %d; stderr: %q", tc.docPath, code, stderr)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if code != 0 {
|
||||||
|
t.Errorf("path %q: expected exit 0 (valid), got %d; stderr: %q", tc.docPath, code, stderr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -257,7 +257,7 @@ type docEntry struct {
|
|||||||
// If the path is a directory, all .md files under it are returned.
|
// If the path is a directory, all .md files under it are returned.
|
||||||
// If it's a file, a single entry is returned.
|
// If it's a file, a single entry is returned.
|
||||||
func loadDocEntries(ctx context.Context, fetcher DocFetcher, owner, repo, docPath string) ([]docEntry, error) {
|
func loadDocEntries(ctx context.Context, fetcher DocFetcher, owner, repo, docPath string) ([]docEntry, error) {
|
||||||
if err := validateDocPath(docPath); err != nil {
|
if err := ValidateDocPath(docPath); err != nil {
|
||||||
return nil, fmt.Errorf("doc path %q rejected: %w", docPath, err)
|
return nil, fmt.Errorf("doc path %q rejected: %w", docPath, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -310,11 +310,11 @@ func readFileBytes(path string) ([]byte, error) {
|
|||||||
return os.ReadFile(path)
|
return os.ReadFile(path)
|
||||||
|
sonnet-review-bot
commented
[NIT] The comment on ValidateDocPath says 'Backslashes are rejected explicitly to prevent Windows platform edge cases.' — the sentence could be clearer that the tool itself may run on Windows and that backslashes in YAML doc paths could be misinterpreted by filepath.Join on that platform. Minor documentation clarity. **[NIT]** The comment on ValidateDocPath says 'Backslashes are rejected explicitly to prevent Windows platform edge cases.' — the sentence could be clearer that the tool itself may run on Windows and that backslashes in YAML doc paths could be misinterpreted by filepath.Join on that platform. Minor documentation clarity.
|
|||||||
}
|
}
|
||||||
|
|
||||||
// validateDocPath rejects doc paths that could cause path traversal via the
|
// ValidateDocPath rejects doc paths that could cause path traversal via the
|
||||||
// VCS API (absolute paths, any ".." segment). Defense-in-depth: the VCS API
|
// VCS API (absolute paths, any ".." segment). Defense-in-depth: the VCS API
|
||||||
// should already scope paths to the repo, but we validate locally to avoid
|
// should already scope paths to the repo, but we validate locally to avoid
|
||||||
|
sonnet-review-bot
commented
[NIT] The comment on **[NIT]** The comment on `ValidateDocPath` mentions "Finding #3" which appears to be a reference to an internal issue/finding number that won't be meaningful to future readers of this file. This should be reworded to be self-contained (e.g., remove the parenthetical or rephrase as 'to prevent OS-specific path separator normalization issues').
|
|||||||
// any quirk in backend path handling.
|
// any quirk in backend path handling.
|
||||||
func validateDocPath(p string) error {
|
func ValidateDocPath(p string) error {
|
||||||
if filepath.IsAbs(p) {
|
if filepath.IsAbs(p) {
|
||||||
return fmt.Errorf("absolute paths not allowed")
|
return fmt.Errorf("absolute paths not allowed")
|
||||||
|
sonnet-review-bot
commented
[NIT] The comment on **[NIT]** The comment on `ValidateDocPath` says "Defense-in-depth: the VCS API should already scope paths to the repo" — this is accurate for the VCS-fetch path, but `ValidateDocPath` is now also used by the local-filesystem stale-docs check where the VCS API is not involved. The comment is still broadly correct but could be generalized: "Defense-in-depth: callers should also confine the joined path to the repo root via filepath.Rel before filesystem access."
|
|||||||
}
|
}
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import (
|
|||||||
|
|
||||||
// fakeDocFetcher is a mock DocFetcher for tests.
|
// fakeDocFetcher is a mock DocFetcher for tests.
|
||||||
type fakeDocFetcher struct {
|
type fakeDocFetcher struct {
|
||||||
files map[string]string // path -> content
|
files map[string]string // path -> content
|
||||||
dirs map[string]map[string]string // dir path -> (file path -> content)
|
dirs map[string]map[string]string // dir path -> (file path -> content)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -384,7 +384,7 @@ func TestValidateDocPath(t *testing.T) {
|
|||||||
"a/b/c",
|
"a/b/c",
|
||||||
}
|
}
|
||||||
for _, p := range valid {
|
for _, p := range valid {
|
||||||
if err := validateDocPath(p); err != nil {
|
if err := ValidateDocPath(p); err != nil {
|
||||||
t.Errorf("expected valid path %q to pass, got error: %v", p, err)
|
t.Errorf("expected valid path %q to pass, got error: %v", p, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -397,7 +397,7 @@ func TestValidateDocPath(t *testing.T) {
|
|||||||
"a/b/../c",
|
"a/b/../c",
|
||||||
}
|
}
|
||||||
for _, p := range invalid {
|
for _, p := range invalid {
|
||||||
if err := validateDocPath(p); err == nil {
|
if err := ValidateDocPath(p); err == nil {
|
||||||
t.Errorf("expected path %q to be rejected, but it was accepted", p)
|
t.Errorf("expected path %q to be rejected, but it was accepted", p)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
[NIT] In
TestCheckStaleDocs_PathTraversal, the inner table test creates a docmap using string concatenation with the docPath directly embedded in YAML (-+tc.docPath+``). For paths like/etc/passwdand../../etc/passwd, this creates valid YAML. However, for paths containing special YAML characters (:,#,{), this could produce invalid YAML that would fail at parse time rather than at the stale-docs check, making the test misleading. The current test cases are safe, but usingfmt.Sprintfwith YAML quoting or writing the file programmatically would be more robust.