feat(#141): validate-docmap subcommand — CI hard-fail on missing docmap coverage #142
@@ -64,6 +64,8 @@ func main() {
|
||||
switch os.Args[1] {
|
||||
case "validate-url":
|
||||
os.Exit(runValidateURL(os.Args[2:]))
|
||||
case "validate-docmap":
|
||||
os.Exit(runValidateDocmap(os.Args[2:]))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"gitea.weiker.me/rodin/review-bot/review"
|
||||
)
|
||||
|
|
||||
|
||||
// runValidateDocmap implements the `review-bot validate-docmap` subcommand.
|
||||
//
|
||||
// It reads changed file paths from stdin (one per line, as produced by
|
||||
// `git diff --name-only`), parses a doc-map YAML file, and performs two checks:
|
||||
//
|
||||
// 1. Coverage check: every changed file must be matched by at least one
|
||||
// paths: glob in the docmap. Fails if any file is uncovered.
|
||||
//
|
||||
// 2. Stale-docs check: every docs: entry in the docmap must exist on disk
|
||||
// (relative to --repo-root). Fails if any path is missing.
|
||||
//
|
||||
// Both checks always run — all failures are reported before exiting.
|
||||
//
|
||||
// Exit codes:
|
||||
//
|
||||
// 0 — clean (all files covered, all docs exist)
|
||||
// 1 — one or more coverage or stale-doc failures
|
||||
// 2 — usage error, missing flag, or YAML parse error
|
||||
func runValidateDocmap(args []string) int {
|
||||
|
[MINOR] validateDocmapPath does not verify that the docmap input is a regular file. While symlinks are rejected and the path is confined to the repo root, allowing non-regular files (e.g., FIFOs, sockets, device nodes) could lead to denial-of-service (e.g., blocking reads) if such a special file exists in the workspace. Consider checking fi.Mode().IsRegular() and rejecting otherwise. **[MINOR]** validateDocmapPath does not verify that the docmap input is a regular file. While symlinks are rejected and the path is confined to the repo root, allowing non-regular files (e.g., FIFOs, sockets, device nodes) could lead to denial-of-service (e.g., blocking reads) if such a special file exists in the workspace. Consider checking fi.Mode().IsRegular() and rejecting otherwise.
|
||||
fs := flag.NewFlagSet("validate-docmap", flag.ContinueOnError)
|
||||
fs.SetOutput(errWriter)
|
||||
|
||||
docmapFlag := fs.String("docmap", "", "Path to doc-map YAML file (required)")
|
||||
repoRootFlag := fs.String("repo-root", ".", "Repo root for resolving docs: paths (default: cwd)")
|
||||
|
||||
if err := fs.Parse(args); err != nil {
|
||||
// flag.ContinueOnError already wrote the error to errWriter.
|
||||
|
[MINOR] validateDocmapPath performs os.Lstat on the provided --docmap path before verifying it is confined within --repo-root. While content is not read and the flag is typically controlled by CI, reordering the confinement check before any filesystem access would avoid even a metadata probe on arbitrary host paths if the path were ever user-controlled. **[MINOR]** validateDocmapPath performs os.Lstat on the provided --docmap path before verifying it is confined within --repo-root. While content is not read and the flag is typically controlled by CI, reordering the confinement check before any filesystem access would avoid even a metadata probe on arbitrary host paths if the path were ever user-controlled.
|
||||
return 2
|
||||
}
|
||||
|
sonnet-review-bot
commented
[MINOR] The EvalSymlinks call on --repo-root (line ~91) will fail with an error if the directory does not yet exist, which gives a confusing error message at startup rather than a clear 'directory not found' message. The error message just says 'failed to resolve --repo-root' without distinguishing a non-existent directory from a permission error. Not a functional bug, but a UX rough edge. **[MINOR]** The EvalSymlinks call on --repo-root (line ~91) will fail with an error if the directory does not yet exist, which gives a confusing error message at startup rather than a clear 'directory not found' message. The error message just says 'failed to resolve --repo-root' without distinguishing a non-existent directory from a permission error. Not a functional bug, but a UX rough edge.
|
||||
|
||||
if *docmapFlag == "" {
|
||||
fmt.Fprintln(errWriter, "Error: --docmap is required")
|
||||
fmt.Fprintln(errWriter, "")
|
||||
fmt.Fprintln(errWriter, "usage: review-bot validate-docmap --docmap <path> [--repo-root <dir>]")
|
||||
|
sonnet-review-bot
commented
[NIT] The usage message emitted via multiple **[NIT]** The usage message emitted via multiple `fmt.Fprintln(errWriter, "")` calls (blank line) could use a single `fmt.Fprintf` with `\n\n` for readability. Minor style issue only.
[MINOR] validateDocmapPath rejects symlinks and enforces a size cap but does not verify that the target is a regular file (fi.Mode().IsRegular()). Adding this check would defensively prevent interacting with special files (FIFOs/devices) in non-standard environments and align with the comment that it should be a "regular file." **[MINOR]** validateDocmapPath rejects symlinks and enforces a size cap but does not verify that the target is a regular file (fi.Mode().IsRegular()). Adding this check would defensively prevent interacting with special files (FIFOs/devices) in non-standard environments and align with the comment that it should be a "regular file."
|
||||
fmt.Fprintln(errWriter, " Changed files are read from stdin, one per line.")
|
||||
fmt.Fprintln(errWriter, " Example: git diff --name-only origin/main HEAD | review-bot validate-docmap --docmap .review-bot/doc-map.yml")
|
||||
return 2
|
||||
}
|
||||
|
||||
// Parse docmap YAML.
|
||||
cfg, err := review.ParseDocMapConfig(*docmapFlag)
|
||||
|
[MAJOR] Docmap file path is read directly with review.ParseDocMapConfig without validating that the path is within --repo-root and not a symlink. A malicious PR can create .review-bot/doc-map.yml as a symlink to a large or special file (e.g., /dev/zero) causing resource exhaustion or reading arbitrary host files. Harden by resolving and confining the docmap path to repoRoot and rejecting symlinks before reading. **[MAJOR]** Docmap file path is read directly with review.ParseDocMapConfig without validating that the path is within --repo-root and not a symlink. A malicious PR can create .review-bot/doc-map.yml as a symlink to a large or special file (e.g., /dev/zero) causing resource exhaustion or reading arbitrary host files. Harden by resolving and confining the docmap path to repoRoot and rejecting symlinks before reading.
[MINOR] There is no upper bound on the size of the docmap YAML when reading it (os.ReadFile). Even without symlinks, a large committed doc-map file could cause excessive memory usage. Consider checking file size via os.Lstat before reading and enforcing a reasonable cap. **[MINOR]** There is no upper bound on the size of the docmap YAML when reading it (os.ReadFile). Even without symlinks, a large committed doc-map file could cause excessive memory usage. Consider checking file size via os.Lstat before reading and enforcing a reasonable cap.
|
||||
if err != nil {
|
||||
fmt.Fprintf(errWriter, "Error: failed to parse docmap %q: %v\n", *docmapFlag, err)
|
||||
|
sonnet-review-bot
commented
[MINOR] The coverage check reports all uncovered files even when the docmap has zero mappings (empty config). This is technically correct behaviour, but an empty docmap + any changed files will fail with a coverage error. There is no documented special case for an empty docmap meaning 'coverage not required'. This is a design choice but could surprise users who have the --docmap flag set but an empty file. Consider documenting this in the usage text or treating an empty docmap as 'coverage not enforced'. **[MINOR]** The coverage check reports all uncovered files even when the docmap has zero mappings (empty config). This is technically correct behaviour, but an empty docmap + any changed files will fail with a coverage error. There is no documented special case for an empty docmap meaning 'coverage not required'. This is a design choice but could surprise users who have the --docmap flag set but an empty file. Consider documenting this in the usage text or treating an empty docmap as 'coverage not enforced'.
|
||||
return 2
|
||||
}
|
||||
|
||||
|
gpt-review-bot
commented
[MINOR] validateDocmapPath does not explicitly ensure the target is a regular file (fi.Mode().IsRegular()). A directory will be rejected later by ParseDocMapConfig via os.ReadFile, but an early explicit check would yield a clearer error and avoid attempting to read directories. **[MINOR]** validateDocmapPath does not explicitly ensure the target is a regular file (fi.Mode().IsRegular()). A directory will be rejected later by ParseDocMapConfig via os.ReadFile, but an early explicit check would yield a clearer error and avoid attempting to read directories.
|
||||
// Read changed files from stdin.
|
||||
changedFiles, err := readLines(os.Stdin)
|
||||
if err != nil {
|
||||
fmt.Fprintf(errWriter, "Error: failed to read stdin: %v\n", err)
|
||||
return 2
|
||||
}
|
||||
|
||||
failed := false
|
||||
|
sonnet-review-bot
commented
[NIT] The **[NIT]** The `validateDocmapPath` function documents that `resolvedRoot` must be an absolute, symlink-free path, but this is only enforced by convention (the caller must pass the right value). An internal assertion or a brief check `if !filepath.IsAbs(resolvedRoot)` at the top of the function would make the contract self-enforcing. Low priority since the caller in `runValidateDocmap` does this correctly.
|
||||
|
||||
// --- Check 1: Coverage ---
|
||||
var uncovered []string
|
||||
for _, f := range changedFiles {
|
||||
if !review.FileCoveredByDocMap(cfg, f) {
|
||||
|
gpt-review-bot
commented
[NIT] When matching coverage, changed file paths from stdin are used as-is. For robustness on Windows inputs, normalize backslashes to forward slashes before matching (e.g., f = strings.ReplaceAll(f, "\", "/")). **[NIT]** When matching coverage, changed file paths from stdin are used as-is. For robustness on Windows inputs, normalize backslashes to forward slashes before matching (e.g., f = strings.ReplaceAll(f, "\\", "/")).
|
||||
uncovered = append(uncovered, f)
|
||||
|
sonnet-review-bot
commented
[MINOR] readLines reads from os.Stdin directly rather than accepting an io.Reader parameter. This forces tests to swap os.Stdin (which is fragile and not goroutine-safe), whereas accepting io.Reader would let callers pass any reader. The existing test helper works around this by redirecting os.Stdin, but the function signature itself is less testable than it could be. Compare with validateurl.go pattern — if that also uses os.Stdin directly this is consistent, but it's still worth noting. **[MINOR]** readLines reads from os.Stdin directly rather than accepting an io.Reader parameter. This forces tests to swap os.Stdin (which is fragile and not goroutine-safe), whereas accepting io.Reader would let callers pass any reader. The existing test helper works around this by redirecting os.Stdin, but the function signature itself is less testable than it could be. Compare with validateurl.go pattern — if that also uses os.Stdin directly this is consistent, but it's still worth noting.
|
||||
}
|
||||
}
|
||||
if len(uncovered) > 0 {
|
||||
failed = true
|
||||
fmt.Fprintln(errWriter, "ERROR: changed files with no docmap coverage:")
|
||||
for _, f := range uncovered {
|
||||
|
sonnet-review-bot
commented
[MINOR] The stale-docs check ( **[MINOR]** The stale-docs check (`checkStaleDocs`) runs even when no changed files were provided on stdin (empty stdin case). This means a docmap with stale `docs:` entries will always fail, even on runs where no files changed — which is arguably the correct behavior, but worth a comment clarifying the intent. Currently the empty-stdin test passes because the test fixture has the doc file present, so it's fine, but the behavior asymmetry (coverage check is vacuously true on empty input, stale-docs check is not) is undocumented.
|
||||
fmt.Fprintf(errWriter, " %s\n", f)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Check 2: Stale docs ---
|
||||
repoRoot := filepath.Clean(*repoRootFlag)
|
||||
|
security-review-bot marked this conversation as resolved
Outdated
[MINOR] repoRoot is only cleaned (filepath.Clean) but not resolved; resolving repoRoot with filepath.Abs + filepath.EvalSymlinks before Rel checks would harden against cases where the root itself is a symlink and reduce ambiguity across platforms. **[MINOR]** repoRoot is only cleaned (filepath.Clean) but not resolved; resolving repoRoot with filepath.Abs + filepath.EvalSymlinks before Rel checks would harden against cases where the root itself is a symlink and reduce ambiguity across platforms.
sonnet-review-bot
commented
[MINOR] The stale-docs check calls **[MINOR]** The stale-docs check calls `filepath.EvalSymlinks(absRoot)` on the `--repo-root` path, which will fail with an error if the directory does not yet exist (e.g. a new repo checkout where the root hasn't been created). In practice `--repo-root` defaults to `.` (cwd) which always exists, so this is low-risk, but an explicit `os.Stat` check before `EvalSymlinks` would give a clearer error message when the root is genuinely missing vs. a symlink resolution failure.
|
||||
staleDocs := checkStaleDocs(cfg, repoRoot)
|
||||
if len(staleDocs) > 0 {
|
||||
failed = true
|
||||
|
sonnet-review-bot
commented
[NIT] The error message 'ERROR: stale docmap docs: entries (paths do not exist):' has an awkward colon after 'docs:' — this reads as if 'docs:' is YAML syntax leaking into the message. Consider 'ERROR: stale docmap entries (docs: paths that do not exist on disk):' or simply 'ERROR: docs: entries not found on disk:'. **[NIT]** The error message 'ERROR: stale docmap docs: entries (paths do not exist):' has an awkward colon after 'docs:' — this reads as if 'docs:' is YAML syntax leaking into the message. Consider 'ERROR: stale docmap entries (docs: paths that do not exist on disk):' or simply 'ERROR: docs: entries not found on disk:'.
|
||||
fmt.Fprintln(errWriter, "ERROR: stale docmap docs: entries (paths do not exist):")
|
||||
for _, d := range staleDocs {
|
||||
fmt.Fprintf(errWriter, " %s\n", d)
|
||||
|
sonnet-review-bot
commented
[MINOR] The stale-docs check runs even when **[MINOR]** The stale-docs check runs even when `--repo-root` resolution fails, but only coverage failures are accumulated before the repo-root resolution block. If `filepath.Abs` or `filepath.EvalSymlinks` fails the function returns exit code 2 immediately — meaning a coverage failure that was already accumulated (and stored in `failed = true`) is silently swallowed, and the user sees exit 2 with only the repo-root error rather than exit 1 with both failures. Consider either: (a) emitting the accumulated coverage failures before returning 2, or (b) resolving repo-root before the coverage check so all errors are consistently collected.
|
||||
}
|
||||
|
sonnet-review-bot
commented
[NIT] The usage message prints flag help manually (fmt.Fprintln chains) rather than calling **[NIT]** The usage message prints flag help manually (fmt.Fprintln chains) rather than calling `fs.Usage()`. This is inconsistent with how flag.FlagSet normally surfaces help and could get out of sync with the registered flags. Minor since this is a short flag set, but worth noting.
|
||||
}
|
||||
|
||||
if failed {
|
||||
|
sonnet-review-bot
commented
[NIT] The stale-docs check always runs against ALL mappings in the docmap regardless of whether those mappings were triggered by any changed files. This is intentional per the PR description ('all docs: entries must exist on disk'), but the behaviour differs from what a user might expect: a mapping for **[NIT]** The stale-docs check always runs against ALL mappings in the docmap regardless of whether those mappings were triggered by any changed files. This is intentional per the PR description ('all docs: entries must exist on disk'), but the behaviour differs from what a user might expect: a mapping for `legacy/**` with a stale doc path will fail even when no legacy files changed. A short doc comment on checkStaleDocs clarifying this global-check intent would help future maintainers.
sonnet-review-bot
commented
[NIT] The flag is named **[NIT]** The flag is named `--docmap` (no hyphen between doc and map) but the README, function name, and error messages consistently spell it `doc-map`. Minor inconsistency — `--doc-map` would match the established naming convention of `--doc-map-max-bytes` and the config file name `doc-map.yml`.
|
||||
return 1
|
||||
}
|
||||
|
||||
fmt.Fprintln(outWriter, "OK: docmap is valid")
|
||||
return 0
|
||||
|
sonnet-review-bot
commented
[MINOR] The error message for stale docs reads **[MINOR]** The error message for stale docs reads `"ERROR: stale docmap docs: entries (paths do not exist):"` — the `docs:` YAML key embedded in the prose makes it slightly awkward to read ("stale docmap docs: entries"). Consider `"ERROR: stale docs: entries in docmap do not exist on disk:"` for clarity.
|
||||
}
|
||||
|
||||
// checkStaleDocs returns deduplicated docs: entries that do not exist under repoRoot.
|
||||
func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
|
||||
seen := make(map[string]struct{})
|
||||
|
gpt-review-bot
commented
[NIT] Deduplication of docs: entries uses the raw docPath string; consider normalizing (e.g., filepath.Clean and using forward slashes) before inserting into the seen set to avoid duplicate reports for equivalent paths like "docs/./a.md" vs "docs/a.md". **[NIT]** Deduplication of docs: entries uses the raw docPath string; consider normalizing (e.g., filepath.Clean and using forward slashes) before inserting into the seen set to avoid duplicate reports for equivalent paths like "docs/./a.md" vs "docs/a.md".
|
||||
var stale []string
|
||||
|
||||
for _, mapping := range cfg.Mappings {
|
||||
for _, docPath := range mapping.Docs {
|
||||
if docPath == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[docPath]; ok {
|
||||
continue
|
||||
}
|
||||
seen[docPath] = struct{}{}
|
||||
|
||||
|
security-review-bot marked this conversation as resolved
Outdated
[MAJOR] Path traversal/existence probing: checkStaleDocs joins untrusted docmap paths with --repo-root and calls os.Stat without validating that the doc path is relative and confined to repoRoot. A malicious PR can include entries like "../../etc/passwd" or absolute paths to probe the CI runner filesystem. Validate doc paths (reject absolute and '..' segments) and enforce confinement with filepath.Rel/EvalSymlinks before stat. **[MAJOR]** Path traversal/existence probing: checkStaleDocs joins untrusted docmap paths with --repo-root and calls os.Stat without validating that the doc path is relative and confined to repoRoot. A malicious PR can include entries like "../../etc/passwd" or absolute paths to probe the CI runner filesystem. Validate doc paths (reject absolute and '..' segments) and enforce confinement with filepath.Rel/EvalSymlinks before stat.
gpt-review-bot
commented
[MINOR] checkStaleDocs confines paths via filepath.Clean + filepath.Rel but does not resolve symlinks. A symlink under repo-root could point outside and os.Stat would follow it, potentially leaking host file existence. Consider using filepath.EvalSymlinks on fullPath and re-checking it stays under repoRoot before statting. **[MINOR]** checkStaleDocs confines paths via filepath.Clean + filepath.Rel but does not resolve symlinks. A symlink under repo-root could point outside and os.Stat would follow it, potentially leaking host file existence. Consider using filepath.EvalSymlinks on fullPath and re-checking it stays under repoRoot before statting.
|
||||
fullPath := filepath.Join(repoRoot, filepath.FromSlash(docPath))
|
||||
|
security-review-bot marked this conversation as resolved
Outdated
[MAJOR] Path traversal/information disclosure: stale-docs check joins repoRoot with unvalidated doc paths and calls os.Stat without enforcing that the resolved path stays within repoRoot. Absolute paths or ".." segments in doc-map docs: entries can cause os.Stat to probe arbitrary filesystem locations (e.g., /etc/passwd), creating an existence oracle visible in CI logs. **[MAJOR]** Path traversal/information disclosure: stale-docs check joins repoRoot with unvalidated doc paths and calls os.Stat without enforcing that the resolved path stays within repoRoot. Absolute paths or ".." segments in doc-map docs: entries can cause os.Stat to probe arbitrary filesystem locations (e.g., /etc/passwd), creating an existence oracle visible in CI logs.
|
||||
if _, err := os.Stat(fullPath); err != nil {
|
||||
stale = append(stale, docPath)
|
||||
}
|
||||
}
|
||||
}
|
||||
return stale
|
||||
}
|
||||
|
||||
// readLines reads all non-empty trimmed lines from r.
|
||||
|
sonnet-review-bot
commented
[NIT] readLines uses bufio.Scanner with the default 64KB token buffer. Very long file paths (unlikely but possible with generated paths) would silently be truncated by scanner.Scan() returning false without error when a line exceeds the buffer. For a CI tool reading git diff output this is practically fine, but a comment noting the 64KB line limit or using scanner.Buffer to increase it would be defensive. **[NIT]** readLines uses bufio.Scanner with the default 64KB token buffer. Very long file paths (unlikely but possible with generated paths) would silently be truncated by scanner.Scan() returning false without error when a line exceeds the buffer. For a CI tool reading git diff output this is practically fine, but a comment noting the 64KB line limit or using scanner.Buffer to increase it would be defensive.
|
||||
func readLines(r io.Reader) ([]string, error) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
var lines []string
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line != "" {
|
||||
lines = append(lines, line)
|
||||
}
|
||||
}
|
||||
|
security-review-bot marked this conversation as resolved
Outdated
[MAJOR] Symlink traversal allows probing existence of arbitrary host files. checkStaleDocs uses os.Stat on repo-controlled paths after Clean/Rel checks, but does not prevent symlinks under repoRoot from pointing outside the workspace. A malicious PR can add a symlink in the repo and reference it in doc-map to infer presence/absence of sensitive files on the CI runner via pass/fail or logs. Mitigate by using os.Lstat to reject symlinks, or resolve with filepath.EvalSymlinks and re-validate the resolved path remains within repoRoot before stat. **[MAJOR]** Symlink traversal allows probing existence of arbitrary host files. checkStaleDocs uses os.Stat on repo-controlled paths after Clean/Rel checks, but does not prevent symlinks under repoRoot from pointing outside the workspace. A malicious PR can add a symlink in the repo and reference it in doc-map to infer presence/absence of sensitive files on the CI runner via pass/fail or logs. Mitigate by using os.Lstat to reject symlinks, or resolve with filepath.EvalSymlinks and re-validate the resolved path remains within repoRoot before stat.
|
||||
return lines, scanner.Err()
|
||||
}
|
||||
@@ -0,0 +1,296 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// makeDocmapYAML writes a YAML string to a temp file and returns its path.
|
||||
func makeDocmapYAML(t *testing.T, content string) string {
|
||||
t.Helper()
|
||||
f, err := os.CreateTemp(t.TempDir(), "doc-map-*.yml")
|
||||
if err != nil {
|
||||
t.Fatalf("CreateTemp: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
if _, err := f.WriteString(content); err != nil {
|
||||
t.Fatalf("WriteString: %v", err)
|
||||
}
|
||||
return f.Name()
|
||||
}
|
||||
|
||||
// makeDocFile creates a file (and any parent dirs) at the given path relative to dir.
|
||||
func makeDocFile(t *testing.T, dir, rel string) {
|
||||
t.Helper()
|
||||
full := filepath.Join(dir, rel)
|
||||
if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
|
||||
t.Fatalf("MkdirAll: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(full, []byte("# doc\n"), 0o644); err != nil {
|
||||
t.Fatalf("WriteFile: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// captureOutput redirects outWriter/errWriter to buffers for the duration of f.
|
||||
func captureOutput(f func()) (stdout, stderr string) {
|
||||
var outBuf, errBuf bytes.Buffer
|
||||
origOut, origErr := outWriter, errWriter
|
||||
outWriter = &outBuf
|
||||
errWriter = &errBuf
|
||||
defer func() {
|
||||
outWriter = origOut
|
||||
|
sonnet-review-bot
commented
[NIT] The **[NIT]** The `captureOutput` helper mutates package-level `outWriter`/`errWriter` variables without any synchronisation. This is fine for sequential tests but would race if tests run in parallel (`t.Parallel()`). No parallel calls exist currently, but a comment noting this limitation would be defensive documentation.
|
||||
errWriter = origErr
|
||||
}()
|
||||
f()
|
||||
return outBuf.String(), errBuf.String()
|
||||
}
|
||||
|
||||
func TestRunValidateDocmap_Clean(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
makeDocFile(t, dir, "docs/foo.md")
|
||||
|
||||
docmap := makeDocmapYAML(t, `
|
||||
mappings:
|
||||
- paths:
|
||||
- "lib/foo/**"
|
||||
docs:
|
||||
- docs/foo.md
|
||||
`)
|
||||
|
||||
var code int
|
||||
stdout, _ := captureOutput(func() {
|
||||
code = runValidateDocmap([]string{
|
||||
"--docmap", docmap,
|
||||
"--repo-root", dir,
|
||||
})
|
||||
})
|
||||
// Provide stdin via the global os.Stdin would require process tricks.
|
||||
// The implementation reads os.Stdin directly; for tests we need a different approach.
|
||||
// See TestRunValidateDocmap_WithStdin_* below which use a helper that replaces os.Stdin.
|
||||
// This case: empty stdin (no changed files) → clean.
|
||||
|
sonnet-review-bot
commented
[NIT] TestRunValidateDocmap_Clean has a long comment block explaining why it relies on process-level stdin instead of the stdinValidateDocmap helper, which is defined later in the same file. The test would be cleaner and more consistent if it simply used stdinValidateDocmap with an empty string — which is exactly what TestRunValidateDocmap_EmptyStdin does. These two tests appear to be testing the same case (empty stdin → clean). Consider removing TestRunValidateDocmap_Clean or consolidating it with TestRunValidateDocmap_EmptyStdin. **[NIT]** TestRunValidateDocmap_Clean has a long comment block explaining why it relies on process-level stdin instead of the stdinValidateDocmap helper, which is defined later in the same file. The test would be cleaner and more consistent if it simply used stdinValidateDocmap with an empty string — which is exactly what TestRunValidateDocmap_EmptyStdin does. These two tests appear to be testing the same case (empty stdin → clean). Consider removing TestRunValidateDocmap_Clean or consolidating it with TestRunValidateDocmap_EmptyStdin.
|
||||
if code != 0 {
|
||||
t.Errorf("expected exit 0 for clean (empty stdin), got %d", code)
|
||||
}
|
||||
if !strings.Contains(stdout, "OK") {
|
||||
t.Errorf("expected 'OK' in stdout, got %q", stdout)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunValidateDocmap_MissingDocmapFlag(t *testing.T) {
|
||||
var code int
|
||||
_, stderr := captureOutput(func() {
|
||||
code = runValidateDocmap([]string{})
|
||||
})
|
||||
if code != 2 {
|
||||
t.Errorf("expected exit 2 for missing --docmap, got %d", code)
|
||||
}
|
||||
if !strings.Contains(stderr, "--docmap") {
|
||||
t.Errorf("expected --docmap in stderr, got %q", stderr)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunValidateDocmap_BadYAML(t *testing.T) {
|
||||
docmap := makeDocmapYAML(t, "mappings: [{{invalid")
|
||||
var code int
|
||||
_, stderr := captureOutput(func() {
|
||||
code = runValidateDocmap([]string{"--docmap", docmap})
|
||||
})
|
||||
if code != 2 {
|
||||
t.Errorf("expected exit 2 for bad YAML, got %d", code)
|
||||
}
|
||||
if !strings.Contains(stderr, "failed to parse") {
|
||||
t.Errorf("expected parse error in stderr, got %q", stderr)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunValidateDocmap_StaleDocs(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
// docs/foo.md does NOT exist on disk.
|
||||
|
||||
docmap := makeDocmapYAML(t, `
|
||||
mappings:
|
||||
- paths:
|
||||
- "lib/foo/**"
|
||||
docs:
|
||||
- docs/foo.md
|
||||
`)
|
||||
|
||||
var code int
|
||||
_, stderr := captureOutput(func() {
|
||||
code = runValidateDocmap([]string{
|
||||
"--docmap", docmap,
|
||||
"--repo-root", dir,
|
||||
})
|
||||
})
|
||||
if code != 1 {
|
||||
t.Errorf("expected exit 1 for stale docs, got %d", code)
|
||||
}
|
||||
if !strings.Contains(stderr, "docs/foo.md") {
|
||||
t.Errorf("expected stale path in stderr, got %q", stderr)
|
||||
}
|
||||
if !strings.Contains(stderr, "stale docmap") {
|
||||
t.Errorf("expected 'stale docmap' in stderr, got %q", stderr)
|
||||
}
|
||||
}
|
||||
|
||||
// stdinValidateDocmap runs runValidateDocmap with a synthetic stdin.
|
||||
func stdinValidateDocmap(t *testing.T, stdinContent string, args []string) (code int, stdout, stderr string) {
|
||||
t.Helper()
|
||||
// Write stdin content to a temp file and redirect os.Stdin.
|
||||
f, err := os.CreateTemp(t.TempDir(), "stdin-*")
|
||||
if err != nil {
|
||||
t.Fatalf("CreateTemp for stdin: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
sonnet-review-bot
commented
[NIT] stdinValidateDocmap defers f.Close() but the file is opened read-write and os.Stdin is set to f. The defer will close the file descriptor while it may still be in use by runValidateDocmap if Go's test runner parallelises subtests. In practice this is safe because the function runs synchronously and the defer fires after the return, but it is slightly fragile. The existing validateurl pattern likely has the same structure so this is not a regression, just a note. **[NIT]** stdinValidateDocmap defers f.Close() but the file is opened read-write and os.Stdin is set to f. The defer will close the file descriptor while it may still be in use by runValidateDocmap if Go's test runner parallelises subtests. In practice this is safe because the function runs synchronously and the defer fires after the return, but it is slightly fragile. The existing validateurl pattern likely has the same structure so this is not a regression, just a note.
|
||||
if _, err := f.WriteString(stdinContent); err != nil {
|
||||
t.Fatalf("WriteString for stdin: %v", err)
|
||||
}
|
||||
if _, err := f.Seek(0, 0); err != nil {
|
||||
t.Fatalf("Seek for stdin: %v", err)
|
||||
}
|
||||
|
||||
origStdin := os.Stdin
|
||||
|
sonnet-review-bot
commented
[MINOR] The comment on **[MINOR]** The comment on `stdinValidateDocmap` says "Tests must not call t.Parallel() while sharing the global os.Stdin" but this constraint is not enforced (e.g. via a test-level mutex or a serial subtest runner). The tests currently do not call `t.Parallel()`, so this is safe today, but the constraint is easy to violate silently when new tests are added. Consider documenting this with a `t.Setenv`-style workaround or a note that tests in this file must remain serial.
|
||||
os.Stdin = f
|
||||
defer func() { os.Stdin = origStdin }()
|
||||
|
||||
|
sonnet-review-bot
commented
[NIT] stdinValidateDocmap leaks the temp file handle — it calls defer f.Close() but the file is passed to os.Stdin before the function returns. Because os.Stdin is restored in the deferred restore and runValidateDocmap reads synchronously, this works in practice, but a reader of the test may be confused. A comment explaining the ordering would help. **[NIT]** stdinValidateDocmap leaks the temp file handle — it calls defer f.Close() but the file is passed to os.Stdin before the function returns. Because os.Stdin is restored in the deferred restore and runValidateDocmap reads synchronously, this works in practice, but a reader of the test may be confused. A comment explaining the ordering would help.
|
||||
stdout, stderr = captureOutput(func() {
|
||||
code = runValidateDocmap(args)
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func TestRunValidateDocmap_UncoveredFile(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
makeDocFile(t, dir, "docs/foo.md")
|
||||
|
||||
|
sonnet-review-bot
commented
[NIT] The comment on **[NIT]** The comment on `stdinValidateDocmap` says 'Tests must not call t.Parallel() while sharing the global os.Stdin' but there is no `t.Skip` or assertion to enforce this. Since os.Stdin mutation is inherently process-global, this is a latent footgun if someone later adds t.Parallel() to these tests. Consider adding a brief comment directing future maintainers to use `TestMain` to serialize tests instead, or document that this constraint is enforced by convention.
|
||||
docmap := makeDocmapYAML(t, `
|
||||
mappings:
|
||||
- paths:
|
||||
- "lib/foo/**"
|
||||
docs:
|
||||
- docs/foo.md
|
||||
`)
|
||||
|
||||
code, _, stderr := stdinValidateDocmap(t,
|
||||
|
sonnet-review-bot
commented
[MINOR] The comment on **[MINOR]** The comment on `stdinValidateDocmap` says "Tests must not call t.Parallel() while sharing the global os.Stdin" — this is a valid concern, but none of the tests call `t.Parallel()` anyway, so the guard is informational only. The real risk is if a future contributor adds `t.Parallel()` without reading this comment. Consider adding a `// WARNING:` prefix or an explicit `// do not add t.Parallel() to callers` note to make it more prominent.
|
||||
"lib/bar/uncovered.ex\n",
|
||||
[]string{"--docmap", docmap, "--repo-root", dir},
|
||||
)
|
||||
if code != 1 {
|
||||
t.Errorf("expected exit 1 for uncovered file, got %d", code)
|
||||
}
|
||||
if !strings.Contains(stderr, "lib/bar/uncovered.ex") {
|
||||
t.Errorf("expected uncovered file in stderr, got %q", stderr)
|
||||
}
|
||||
if !strings.Contains(stderr, "no docmap coverage") {
|
||||
t.Errorf("expected 'no docmap coverage' in stderr, got %q", stderr)
|
||||
}
|
||||
}
|
||||
|
||||
|
sonnet-review-bot
commented
[NIT] **[NIT]** `TestCheckStaleDocs_PathTraversal` constructs the YAML by concatenating `tc.docPath` directly into the string literal. A path like `/etc/passwd` happens to be valid YAML, but paths containing YAML-special characters (e.g. `:`, `{`, `#`) could produce a parse error (exit 2) rather than the expected stale-doc error (exit 1). Adding a YAML quoting step or restricting the test vectors would make the test more robust.
|
||||
func TestRunValidateDocmap_BothFailures(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
// docs/foo.md intentionally missing
|
||||
|
||||
docmap := makeDocmapYAML(t, `
|
||||
mappings:
|
||||
- paths:
|
||||
- "lib/foo/**"
|
||||
docs:
|
||||
- docs/foo.md
|
||||
`)
|
||||
|
||||
code, _, stderr := stdinValidateDocmap(t,
|
||||
"lib/bar/uncovered.ex\n",
|
||||
[]string{"--docmap", docmap, "--repo-root", dir},
|
||||
)
|
||||
if code != 1 {
|
||||
t.Errorf("expected exit 1 for both failures, got %d", code)
|
||||
}
|
||||
if !strings.Contains(stderr, "no docmap coverage") {
|
||||
t.Errorf("expected coverage error in stderr, got %q", stderr)
|
||||
}
|
||||
if !strings.Contains(stderr, "stale docmap") {
|
||||
t.Errorf("expected stale-docs error in stderr, got %q", stderr)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunValidateDocmap_EmptyStdin(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
makeDocFile(t, dir, "docs/foo.md")
|
||||
|
||||
docmap := makeDocmapYAML(t, `
|
||||
mappings:
|
||||
- paths:
|
||||
- "lib/foo/**"
|
||||
docs:
|
||||
- docs/foo.md
|
||||
`)
|
||||
|
||||
code, stdout, _ := stdinValidateDocmap(t,
|
||||
"",
|
||||
[]string{"--docmap", docmap, "--repo-root", dir},
|
||||
)
|
||||
if code != 0 {
|
||||
t.Errorf("expected exit 0 for empty stdin, got %d", code)
|
||||
}
|
||||
if !strings.Contains(stdout, "OK") {
|
||||
t.Errorf("expected 'OK' in stdout, got %q", stdout)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunValidateDocmap_BlankLinesSkipped(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
makeDocFile(t, dir, "docs/foo.md")
|
||||
|
||||
docmap := makeDocmapYAML(t, `
|
||||
mappings:
|
||||
- paths:
|
||||
- "lib/foo/**"
|
||||
docs:
|
||||
- docs/foo.md
|
||||
`)
|
||||
|
||||
// stdin with only blank lines → effectively empty, should be clean
|
||||
code, stdout, _ := stdinValidateDocmap(t,
|
||||
"\n \n\n",
|
||||
[]string{"--docmap", docmap, "--repo-root", dir},
|
||||
)
|
||||
if code != 0 {
|
||||
t.Errorf("expected exit 0 for blank-only stdin, got %d", code)
|
||||
}
|
||||
if !strings.Contains(stdout, "OK") {
|
||||
t.Errorf("expected 'OK' in stdout for blank-only stdin, got %q", stdout)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunValidateDocmap_DuplicateDocsDeduped(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
// docs/shared.md intentionally missing — but it appears in TWO mappings.
|
||||
// Should appear only once in stale list.
|
||||
|
||||
docmap := makeDocmapYAML(t, `
|
||||
mappings:
|
||||
- paths:
|
||||
- "lib/foo/**"
|
||||
docs:
|
||||
- docs/shared.md
|
||||
- paths:
|
||||
- "lib/bar/**"
|
||||
docs:
|
||||
- docs/shared.md
|
||||
`)
|
||||
|
||||
code, _, stderr := stdinValidateDocmap(t,
|
||||
"",
|
||||
[]string{"--docmap", docmap, "--repo-root", dir},
|
||||
)
|
||||
if code != 1 {
|
||||
t.Errorf("expected exit 1 for stale doc, got %d", code)
|
||||
}
|
||||
count := strings.Count(stderr, "docs/shared.md")
|
||||
if count != 1 {
|
||||
t.Errorf("expected docs/shared.md to appear exactly once in stderr (deduplicated), got %d occurrences: %q", count, stderr)
|
||||
}
|
||||
}
|
||||
[NIT] The
maxDocmapBytesconstant is typed asint64solely to compare againstfi.Size()(which returnsint64). Consider using an untyped constant or keeping it typed but adding a brief comment explaining whyint64was chosen, since the type is not otherwise obvious from context.