feat(#141): validate-docmap subcommand — CI hard-fail on missing docmap coverage #142
@@ -83,10 +83,22 @@ func runValidateDocmap(args []string) int {
|
||||
}
|
||||
|
||||
// --- Check 2: Stale docs ---
|
||||
// Resolve repoRoot to an absolute, symlink-free path before any Rel checks
|
||||
// so that a symlinked --repo-root cannot be used to bypass the escape
|
||||
// guard in checkStaleDocs.
|
||||
absRoot, err := filepath.Abs(*repoRootFlag)
|
||||
if err != nil {
|
||||
fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err)
|
||||
return 2
|
||||
|
|
||||
}
|
||||
|
sonnet-review-bot
commented
[NIT] The usage message prints flag help manually (fmt.Fprintln chains) rather than calling **[NIT]** The usage message prints flag help manually (fmt.Fprintln chains) rather than calling `fs.Usage()`. This is inconsistent with how flag.FlagSet normally surfaces help and could get out of sync with the registered flags. Minor since this is a short flag set, but worth noting.
|
||||
resolvedRoot, err := filepath.EvalSymlinks(absRoot)
|
||||
if err != nil {
|
||||
fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err)
|
||||
return 2
|
||||
}
|
||||
// checkStaleDocs validates each path before touching the filesystem; see
|
||||
// its documentation for the path-traversal hardening applied.
|
||||
repoRoot := filepath.Clean(*repoRootFlag)
|
||||
staleDocs := checkStaleDocs(cfg, repoRoot)
|
||||
staleDocs := checkStaleDocs(cfg, resolvedRoot)
|
||||
if len(staleDocs) > 0 {
|
||||
failed = true
|
||||
fmt.Fprintln(errWriter, "ERROR: stale docmap docs: entries (paths do not exist):")
|
||||
@@ -108,9 +120,11 @@ func runValidateDocmap(args []string) int {
|
||||
//
|
||||
// Path-traversal hardening: each docPath is validated with
|
||||
// review.ValidateDocPath (rejects absolute paths and ".." segments) and then
|
||||
// confined to repoRoot via filepath.Clean + filepath.Rel before os.Stat is
|
||||
// called. Paths that fail either check are treated as invalid (reported as
|
||||
// stale) without touching the host filesystem.
|
||||
// confined to repoRoot via filepath.Clean + filepath.Rel before os.Lstat is
|
||||
// called. Symlinks are treated as stale — a CI tool running against
|
||||
// PR-controlled content must not follow symlinks that could probe arbitrary
|
||||
// host paths. Paths that fail any check are treated as invalid (reported as
|
||||
// stale) without following any symlinks.
|
||||
func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
|
||||
seen := make(map[string]struct{})
|
||||
var stale []string
|
||||
@@ -142,9 +156,15 @@ func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
|
||||
continue
|
||||
}
|
||||
|
||||
// Safe to stat: path is relative, contains no "..", and is
|
||||
// confined within repoRoot.
|
||||
if _, err := os.Stat(fullPath); err != nil {
|
||||
// Use Lstat (not Stat) so symlinks are never followed. A symlink
|
||||
// under repoRoot could point anywhere on the host, allowing a
|
||||
// malicious PR to probe file existence. Treat symlinks as stale.
|
||||
fi, err := os.Lstat(fullPath)
|
||||
if err != nil {
|
||||
stale = append(stale, docPath)
|
||||
continue
|
||||
}
|
||||
if fi.Mode()&os.ModeSymlink != 0 {
|
||||
stale = append(stale, docPath)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -335,3 +335,104 @@ mappings:
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestCheckStaleDocs_SymlinkOutside verifies that a symlink under repoRoot
|
||||
// pointing outside the repo is treated as stale (not followed).
|
||||
func TestCheckStaleDocs_SymlinkOutside(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
// Create a symlink inside repoRoot pointing to a file outside the repo.
|
||||
// We point at /etc/hostname (exists on Linux CI) but the test does not
|
||||
// depend on that file existing — Lstat must reject the symlink itself.
|
||||
linkPath := filepath.Join(dir, "docs", "secret.md")
|
||||
if err := os.MkdirAll(filepath.Dir(linkPath), 0o755); err != nil {
|
||||
t.Fatalf("MkdirAll: %v", err)
|
||||
}
|
||||
if err := os.Symlink("/etc/hostname", linkPath); err != nil {
|
||||
t.Fatalf("Symlink: %v", err)
|
||||
}
|
||||
|
||||
docmap := makeDocmapYAML(t, `
|
||||
mappings:
|
||||
- paths:
|
||||
- "lib/**"
|
||||
docs:
|
||||
- docs/secret.md
|
||||
`)
|
||||
|
||||
code, _, stderr := stdinValidateDocmap(t,
|
||||
"",
|
||||
[]string{"--docmap", docmap, "--repo-root", dir},
|
||||
)
|
||||
if code != 1 {
|
||||
t.Errorf("expected exit 1 for symlink doc, got %d; stderr: %q", code, stderr)
|
||||
}
|
||||
if !strings.Contains(stderr, "docs/secret.md") {
|
||||
t.Errorf("expected stale path in stderr, got %q", stderr)
|
||||
}
|
||||
}
|
||||
|
||||
// TestCheckStaleDocs_SymlinkInsideRepo verifies that a symlink pointing to
|
||||
// another file *within* the repo is also treated as stale. We refuse all
|
||||
// symlinks regardless of target to keep the check simple and safe.
|
||||
func TestCheckStaleDocs_SymlinkInsideRepo(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
// Real doc file.
|
||||
makeDocFile(t, dir, "docs/real.md")
|
||||
|
||||
// Symlink inside repo pointing at the real file.
|
||||
linkPath := filepath.Join(dir, "docs", "link.md")
|
||||
if err := os.Symlink(filepath.Join(dir, "docs", "real.md"), linkPath); err != nil {
|
||||
t.Fatalf("Symlink: %v", err)
|
||||
}
|
||||
|
||||
docmap := makeDocmapYAML(t, `
|
||||
mappings:
|
||||
- paths:
|
||||
- "lib/**"
|
||||
docs:
|
||||
- docs/link.md
|
||||
`)
|
||||
|
||||
code, _, stderr := stdinValidateDocmap(t,
|
||||
"",
|
||||
[]string{"--docmap", docmap, "--repo-root", dir},
|
||||
)
|
||||
if code != 1 {
|
||||
t.Errorf("expected exit 1 for symlink doc (even intra-repo), got %d; stderr: %q", code, stderr)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunValidateDocmap_SymlinkRepoRoot verifies that a --repo-root that is
|
||||
// itself a symlink to a valid directory resolves correctly (Finding #2).
|
||||
func TestRunValidateDocmap_SymlinkRepoRoot(t *testing.T) {
|
||||
realDir := t.TempDir()
|
||||
makeDocFile(t, realDir, "docs/foo.md")
|
||||
|
||||
// Create a symlink pointing at realDir.
|
||||
symlinkDir := filepath.Join(t.TempDir(), "link-root")
|
||||
if err := os.Symlink(realDir, symlinkDir); err != nil {
|
||||
t.Fatalf("Symlink: %v", err)
|
||||
}
|
||||
|
||||
docmap := makeDocmapYAML(t, `
|
||||
mappings:
|
||||
- paths:
|
||||
- "lib/**"
|
||||
docs:
|
||||
- docs/foo.md
|
||||
`)
|
||||
|
||||
// Using the symlinked repo-root: the real doc exists → should be clean.
|
||||
code, stdout, stderr := stdinValidateDocmap(t,
|
||||
"lib/foo.go\n",
|
||||
[]string{"--docmap", docmap, "--repo-root", symlinkDir},
|
||||
)
|
||||
if code != 0 {
|
||||
t.Errorf("expected exit 0 for symlinked repo-root with existing doc, got %d; stderr: %q", code, stderr)
|
||||
}
|
||||
if !strings.Contains(stdout, "OK") {
|
||||
t.Errorf("expected 'OK' in stdout, got %q", stdout)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -311,10 +311,14 @@ func readFileBytes(path string) ([]byte, error) {
|
||||
}
|
||||
|
||||
// ValidateDocPath rejects doc paths that could cause path traversal via the
|
||||
// VCS API (absolute paths, any ".." segment). Defense-in-depth: the VCS API
|
||||
// should already scope paths to the repo, but we validate locally to avoid
|
||||
// any quirk in backend path handling.
|
||||
// VCS API (absolute paths, any ".." segment, backslashes). Defense-in-depth:
|
||||
// the VCS API should already scope paths to the repo, but we validate locally
|
||||
|
sonnet-review-bot
commented
[NIT] The comment on **[NIT]** The comment on `ValidateDocPath` mentions "Finding #3" which appears to be a reference to an internal issue/finding number that won't be meaningful to future readers of this file. This should be reworded to be self-contained (e.g., remove the parenthetical or rephrase as 'to prevent OS-specific path separator normalization issues').
|
||||
// to avoid any quirk in backend path handling. Backslashes are rejected
|
||||
// explicitly to prevent Windows platform edge cases.
|
||||
func ValidateDocPath(p string) error {
|
||||
if strings.Contains(p, "\\") {
|
||||
|
sonnet-review-bot
commented
[NIT] The comment on **[NIT]** The comment on `ValidateDocPath` says "Defense-in-depth: the VCS API should already scope paths to the repo" — this is accurate for the VCS-fetch path, but `ValidateDocPath` is now also used by the local-filesystem stale-docs check where the VCS API is not involved. The comment is still broadly correct but could be generalized: "Defense-in-depth: callers should also confine the joined path to the repo root via filepath.Rel before filesystem access."
|
||||
return fmt.Errorf("backslashes not allowed in doc paths")
|
||||
}
|
||||
|
sonnet-review-bot
commented
[NIT] The comment on **[NIT]** The comment on `ValidateDocPath` says 'Defense-in-depth: callers must also confine the joined path to the repo root via filepath.Rel before any filesystem access.' This is good documentation but the function's contract doesn't verify the caller satisfies that postcondition. This is fine (it's defense-in-depth by definition), just making sure the comment is understood as advisory rather than enforced.
|
||||
if filepath.IsAbs(p) {
|
||||
return fmt.Errorf("absolute paths not allowed")
|
||||
}
|
||||
|
||||
@@ -395,6 +395,10 @@ func TestValidateDocPath(t *testing.T) {
|
||||
"docs/../../../etc/passwd",
|
||||
"../sibling-repo/file.md",
|
||||
"a/b/../c",
|
||||
// Backslashes must be rejected (Finding #3 — Windows platform edge cases).
|
||||
`docs\foo.md`,
|
||||
`docs\..\secret`,
|
||||
`\absolute`,
|
||||
}
|
||||
for _, p := range invalid {
|
||||
if err := ValidateDocPath(p); err == nil {
|
||||
@@ -420,6 +424,27 @@ func TestLoadMatchingDocs_PathTraversalRejected(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestValidateDocPath_Backslash verifies that backslash-bearing paths are
|
||||
// rejected to prevent Windows platform edge cases where a path separator
|
||||
// could be normalised differently by the host OS or VCS backend.
|
||||
func TestValidateDocPath_Backslash(t *testing.T) {
|
||||
backslashPaths := []string{
|
||||
`docs\foo.md`,
|
||||
`docs\subdir\file.md`,
|
||||
`\absolute`,
|
||||
}
|
||||
for _, p := range backslashPaths {
|
||||
if err := ValidateDocPath(p); err == nil {
|
||||
t.Errorf("expected backslash path %q to be rejected, but it was accepted", p)
|
||||
}
|
||||
}
|
||||
|
||||
// Sanity: forward-slash path must still be accepted.
|
||||
if err := ValidateDocPath("docs/foo.md"); err != nil {
|
||||
t.Errorf("expected forward-slash path to be accepted, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Helpers
|
||||
// ============================================================
|
||||
|
||||
[MINOR] The stale-docs check runs even when
--repo-rootresolution fails, but only coverage failures are accumulated before the repo-root resolution block. Iffilepath.Absorfilepath.EvalSymlinksfails the function returns exit code 2 immediately — meaning a coverage failure that was already accumulated (and stored infailed = true) is silently swallowed, and the user sees exit 2 with only the repo-root error rather than exit 1 with both failures. Consider either: (a) emitting the accumulated coverage failures before returning 2, or (b) resolving repo-root before the coverage check so all errors are consistently collected.