title
CI / test (push) Successful in 18s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (push) Has been skipped
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (push) Has been skipped
CI / review (gpt-5, security, ., rodin/security-patterns, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (push) Has been skipped

This commit was merged in pull request #142.
This commit is contained in:
2026-05-15 07:39:21 +00:00
7 changed files with 950 additions and 51 deletions
+34
View File
@@ -296,6 +296,40 @@ review-bot \
--conventions-file CONVENTIONS.md --conventions-file CONVENTIONS.md
``` ```
## Subcommands
### `validate-docmap`
Verifies that a `doc-map.yml` is consistent before running a review. Two checks:
1. **Coverage**: every changed file is matched by at least one `paths:` glob.
2. **Stale docs**: every `docs:` entry exists on disk under `--repo-root`.
```bash
# Typical CI usage — pipe git diff into the command
git diff --name-only origin/main HEAD | \
review-bot validate-docmap \
--docmap .review-bot/doc-map.yml \
--repo-root .
```
| Flag | Required | Default | Description |
|------|----------|---------|-------------|
| `--docmap` | Yes | — | Path to doc-map YAML file |
| `--repo-root` | No | `.` (cwd) | Root for resolving `docs:` paths |
Exit codes: `0`=clean, `1`=failures found, `2`=usage/parse error.
### `validate-url`
Resolves a URL and verifies all IPs are publicly routable (used in CI to prevent SSRF).
```bash
review-bot validate-url https://gitea.example.com
```
Exit codes: `0`=safe, `1`=blocked/private IP, `2`=error.
## Environment Variables ## Environment Variables
All flags have environment variable equivalents: All flags have environment variable equivalents:
-42
View File
@@ -1,42 +0,0 @@
## Dev Loop Status: 2026-05-15 04:35 UTC
**Repository:** review-bot (rodin/review-bot on Gitea)
**Status:** ✅ OPTIMAL
### Health Check
- **Working tree:** clean
- **Branch:** main (up to date with origin)
- **Build:** ✅ passes (`go build ./cmd/review-bot`)
- **Tests:** ✅ ALL PASS (6/6 packages)
- **Vet:** ✅ clean
- **Open issues:** 0
- **Open PRs:** 0
### Recent Changes
- ✅ Merged PR #140: test coverage improvements (44.6% → 49.3%)
- ✅ PR #137 (doc-map feature) merged successfully with all reviews approved
### Coverage
| Package | Coverage |
|---------|----------|
| cmd/review-bot | 49.3% ↑ |
| gitea | 85.2% |
| github | 86.3% |
| review | 92.0% |
### Status Summary
All recent feature work (doc-map, coverage improvements) successfully integrated. Repository is stable and ready for next development cycle.
### Next Priority
- Continue increasing cmd/review-bot coverage (target: ≥60%)
- Monitor prod logs for edge cases
- VCS integration stable; GitHub + Gitea paths clear
---
_Dev-loop cycle complete at 04:35 UTC._
+2
View File
@@ -64,6 +64,8 @@ func main() {
switch os.Args[1] { switch os.Args[1] {
case "validate-url": case "validate-url":
os.Exit(runValidateURL(os.Args[2:])) os.Exit(runValidateURL(os.Args[2:]))
case "validate-docmap":
os.Exit(runValidateDocmap(os.Args[2:]))
} }
} }
+263
View File
@@ -0,0 +1,263 @@
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"gitea.weiker.me/rodin/review-bot/review"
)
// maxDocmapBytes is the maximum size of the doc-map YAML file that will be
// read. Files larger than this are rejected before reading to prevent memory
// exhaustion from an oversized PR-controlled file.
const maxDocmapBytes int64 = 10 * 1024 * 1024 // 10 MB
// validateDocmapPath checks that localPath is safe to read as the doc-map
// file. It enforces three invariants before the file is opened:
//
// 1. The path resolves to a regular file within resolvedRoot (path
// confinement): prevents a PR-controlled --docmap from reading arbitrary
// host files via absolute paths or ".." traversal.
// 2. The path is not a symlink: prevents denial-of-service via /dev/zero or
// information disclosure via symlinks that point outside the workspace.
// 3. The file does not exceed maxDocmapBytes: prevents memory exhaustion
// from an oversized but legitimately committed doc-map file.
//
// resolvedRoot must already be an absolute, symlink-free path (obtained from
// filepath.Abs + filepath.EvalSymlinks).
func validateDocmapPath(localPath, resolvedRoot string) error {
// Resolve the docmap path to an absolute path.
absPath, err := filepath.Abs(localPath)
if err != nil {
return fmt.Errorf("cannot resolve path: %w", err)
}
// Lstat: do NOT follow symlinks. We want to inspect the entry itself.
fi, err := os.Lstat(absPath)
if err != nil {
return fmt.Errorf("cannot stat file: %w", err)
}
// Reject symlinks outright — a symlink can point to /dev/zero or arbitrary
// host paths, bypassing both the confinement check and the size check.
if fi.Mode()&os.ModeSymlink != 0 {
return fmt.Errorf("symlinks are not allowed")
}
// Confine to resolvedRoot: the cleaned absolute path must be a descendant
// of the repo root. This catches paths that escaped via "..", absolute
// paths that happen to be outside the root, etc.
rel, err := filepath.Rel(resolvedRoot, absPath)
if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) {
return fmt.Errorf("path must be within --repo-root")
}
// Enforce size cap before reading to prevent memory exhaustion.
if fi.Size() > maxDocmapBytes {
return fmt.Errorf("file size %d bytes exceeds %d-byte limit", fi.Size(), maxDocmapBytes)
}
return nil
}
// runValidateDocmap implements the `review-bot validate-docmap` subcommand.
//
// It reads changed file paths from stdin (one per line, as produced by
// `git diff --name-only`), parses a doc-map YAML file, and performs two checks:
//
// 1. Coverage check: every changed file must be matched by at least one
// paths: glob in the docmap. Fails if any file is uncovered.
//
// 2. Stale-docs check: every docs: entry in the docmap must exist on disk
// (relative to --repo-root). Fails if any path is missing.
//
// Both checks always run — all failures are reported before exiting.
//
// Exit codes:
//
// 0 — clean (all files covered, all docs exist)
// 1 — one or more coverage or stale-doc failures
// 2 — usage error, missing flag, or YAML parse error
func runValidateDocmap(args []string) int {
fs := flag.NewFlagSet("validate-docmap", flag.ContinueOnError)
fs.SetOutput(errWriter)
docmapFlag := fs.String("docmap", "", "Path to doc-map YAML file (required)")
repoRootFlag := fs.String("repo-root", ".", "Repo root for resolving docs: paths (default: cwd)")
if err := fs.Parse(args); err != nil {
// flag.ContinueOnError already wrote the error to errWriter.
return 2
}
if *docmapFlag == "" {
fmt.Fprintln(errWriter, "Error: --docmap is required")
fmt.Fprintln(errWriter, "")
fmt.Fprintln(errWriter, "usage: review-bot validate-docmap --docmap <path> [--repo-root <dir>]")
fmt.Fprintln(errWriter, " Changed files are read from stdin, one per line.")
fmt.Fprintln(errWriter, " Example: git diff --name-only origin/main HEAD | review-bot validate-docmap --docmap .review-bot/doc-map.yml")
return 2
}
// Resolve repoRoot first — the docmap path is validated against it below.
// Use an absolute, symlink-free path so a symlinked --repo-root cannot
// bypass the escape guard in validateDocmapPath or checkStaleDocs.
absRoot, err := filepath.Abs(*repoRootFlag)
if err != nil {
fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err)
return 2
}
resolvedRoot, err := filepath.EvalSymlinks(absRoot)
if err != nil {
if os.IsNotExist(err) {
fmt.Fprintf(errWriter, "Error: --repo-root %q does not exist\n", *repoRootFlag)
} else {
fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err)
}
return 2
}
// Harden the docmap file path before reading it. The --docmap flag value
// may reference a PR-controlled file (e.g. .review-bot/doc-map.yml).
// Validate that it:
// 1. Resolves within resolvedRoot (prevent reading arbitrary host files).
// 2. Is not a symlink (prevent /dev/zero or symlink-based host probing).
// 3. Does not exceed maxDocmapBytes (prevent memory exhaustion from an
// oversized committed file).
if err := validateDocmapPath(*docmapFlag, resolvedRoot); err != nil {
fmt.Fprintf(errWriter, "Error: --docmap %q is invalid: %v\n", *docmapFlag, err)
return 2
}
// Parse docmap YAML.
cfg, err := review.ParseDocMapConfig(*docmapFlag)
if err != nil {
fmt.Fprintf(errWriter, "Error: failed to parse docmap %q: %v\n", *docmapFlag, err)
return 2
}
// Read changed files from stdin.
changedFiles, err := readLines(os.Stdin)
if err != nil {
fmt.Fprintf(errWriter, "Error: failed to read stdin: %v\n", err)
return 2
}
failed := false
// --- Check 1: Coverage ---
// Note: an empty docmap (no mappings) means every changed file is
// uncovered — there are no patterns to match against. This is intentional:
// if you declare a doc-map, every changed file must be accounted for.
// On empty stdin the check is vacuously true (no files to cover).
var uncovered []string
for _, f := range changedFiles {
// Normalize Windows-style backslashes to forward slashes so that
// changed-file paths from git on Windows match doc-map globs.
f = strings.ReplaceAll(f, "\\", "/")
if !review.FileCoveredByDocMap(cfg, f) {
uncovered = append(uncovered, f)
}
}
if len(uncovered) > 0 {
failed = true
fmt.Fprintln(errWriter, "ERROR: changed files with no docmap coverage:")
for _, f := range uncovered {
fmt.Fprintf(errWriter, " %s\n", f)
}
}
// --- Check 2: Stale docs ---
// checkStaleDocs validates each path before touching the filesystem; see
// its documentation for the path-traversal hardening applied.
staleDocs := checkStaleDocs(cfg, resolvedRoot)
if len(staleDocs) > 0 {
failed = true
fmt.Fprintln(errWriter, "ERROR: stale docmap docs: entries (paths do not exist):")
for _, d := range staleDocs {
fmt.Fprintf(errWriter, " %s\n", d)
}
}
if failed {
return 1
}
fmt.Fprintln(outWriter, "OK: docmap is valid")
return 0
}
// checkStaleDocs returns deduplicated docs: entries that do not exist under
// repoRoot.
//
// Path-traversal hardening: each docPath is validated with
// review.ValidateDocPath (rejects absolute paths and ".." segments) and then
// confined to repoRoot via filepath.Clean + filepath.Rel before os.Lstat is
// called. Symlinks are treated as stale — a CI tool running against
// PR-controlled content must not follow symlinks that could probe arbitrary
// host paths. Paths that fail any check are treated as invalid (reported as
// stale) without following any symlinks.
func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
seen := make(map[string]struct{})
var stale []string
for _, mapping := range cfg.Mappings {
for _, docPath := range mapping.Docs {
if docPath == "" {
continue
}
if _, ok := seen[docPath]; ok {
continue
}
seen[docPath] = struct{}{}
// Guard 1: reject absolute paths and ".." segments sourced from
// PR-controlled YAML before joining with repoRoot.
if err := review.ValidateDocPath(docPath); err != nil {
stale = append(stale, docPath)
continue
}
// Guard 2: verify the cleaned joined path does not escape repoRoot.
// filepath.Clean resolves any remaining ".." after the join; the
// filepath.Rel check confirms the path is still under repoRoot.
fullPath := filepath.Clean(filepath.Join(repoRoot, filepath.FromSlash(docPath)))
rel, err := filepath.Rel(repoRoot, fullPath)
if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) {
stale = append(stale, docPath)
continue
}
// Use Lstat (not Stat) so symlinks are never followed. A symlink
// under repoRoot could point anywhere on the host, allowing a
// malicious PR to probe file existence. Treat symlinks as stale.
fi, err := os.Lstat(fullPath)
if err != nil {
stale = append(stale, docPath)
continue
}
if fi.Mode()&os.ModeSymlink != 0 {
stale = append(stale, docPath)
}
}
}
return stale
}
// readLines reads all non-empty trimmed lines from r.
func readLines(r io.Reader) ([]string, error) {
scanner := bufio.NewScanner(r)
var lines []string
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line != "" {
lines = append(lines, line)
}
}
return lines, scanner.Err()
}
+552
View File
@@ -0,0 +1,552 @@
package main
import (
"bytes"
"os"
"path/filepath"
"strings"
"testing"
)
// makeDocmapYAML writes a YAML string to a temp file and returns its path.
// The file is created in t.TempDir() — use makeDocmapInDir when the docmap
// must be located inside a specific repo-root directory.
func makeDocmapYAML(t *testing.T, content string) string {
t.Helper()
f, err := os.CreateTemp(t.TempDir(), "doc-map-*.yml")
if err != nil {
t.Fatalf("CreateTemp: %v", err)
}
defer f.Close()
if _, err := f.WriteString(content); err != nil {
t.Fatalf("WriteString: %v", err)
}
return f.Name()
}
// makeDocmapInDir writes a YAML string to a file inside dir and returns the
// file path. Use this instead of makeDocmapYAML when also passing --repo-root,
// because validateDocmapPath requires the docmap to be within the repo root.
func makeDocmapInDir(t *testing.T, dir, content string) string {
t.Helper()
if err := os.MkdirAll(filepath.Join(dir, ".review-bot"), 0o755); err != nil {
t.Fatalf("MkdirAll: %v", err)
}
path := filepath.Join(dir, ".review-bot", "doc-map.yml")
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
t.Fatalf("WriteFile: %v", err)
}
return path
}
// makeDocFile creates a file (and any parent dirs) at the given path relative to dir.
func makeDocFile(t *testing.T, dir, rel string) {
t.Helper()
full := filepath.Join(dir, rel)
if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
t.Fatalf("MkdirAll: %v", err)
}
if err := os.WriteFile(full, []byte("# doc\n"), 0o644); err != nil {
t.Fatalf("WriteFile: %v", err)
}
}
// captureOutput redirects outWriter/errWriter to buffers for the duration of f.
func captureOutput(f func()) (stdout, stderr string) {
var outBuf, errBuf bytes.Buffer
origOut, origErr := outWriter, errWriter
outWriter = &outBuf
errWriter = &errBuf
defer func() {
outWriter = origOut
errWriter = origErr
}()
f()
return outBuf.String(), errBuf.String()
}
func TestRunValidateDocmap_Clean(t *testing.T) {
dir := t.TempDir()
makeDocFile(t, dir, "docs/foo.md")
docmap := makeDocmapInDir(t, dir, `
mappings:
- paths:
- "lib/foo/**"
docs:
- docs/foo.md
`)
// A covered file with all docs existing → clean.
code, stdout, _ := stdinValidateDocmap(t,
"lib/foo/bar.ex\n",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 0 {
t.Errorf("expected exit 0 for clean, got %d", code)
}
if !strings.Contains(stdout, "OK") {
t.Errorf("expected 'OK' in stdout, got %q", stdout)
}
}
func TestRunValidateDocmap_MissingDocmapFlag(t *testing.T) {
var code int
_, stderr := captureOutput(func() {
code = runValidateDocmap([]string{})
})
if code != 2 {
t.Errorf("expected exit 2 for missing --docmap, got %d", code)
}
if !strings.Contains(stderr, "--docmap") {
t.Errorf("expected --docmap in stderr, got %q", stderr)
}
}
func TestRunValidateDocmap_BadYAML(t *testing.T) {
dir := t.TempDir()
docmap := makeDocmapInDir(t, dir, "mappings: [{{invalid")
var code int
_, stderr := captureOutput(func() {
code = runValidateDocmap([]string{"--docmap", docmap, "--repo-root", dir})
})
if code != 2 {
t.Errorf("expected exit 2 for bad YAML, got %d", code)
}
if !strings.Contains(stderr, "failed to parse") {
t.Errorf("expected parse error in stderr, got %q", stderr)
}
}
func TestRunValidateDocmap_StaleDocs(t *testing.T) {
dir := t.TempDir()
// docs/foo.md does NOT exist on disk.
docmap := makeDocmapInDir(t, dir, `
mappings:
- paths:
- "lib/foo/**"
docs:
- docs/foo.md
`)
var code int
_, stderr := captureOutput(func() {
code = runValidateDocmap([]string{
"--docmap", docmap,
"--repo-root", dir,
})
})
if code != 1 {
t.Errorf("expected exit 1 for stale docs, got %d", code)
}
if !strings.Contains(stderr, "docs/foo.md") {
t.Errorf("expected stale path in stderr, got %q", stderr)
}
if !strings.Contains(stderr, "stale docmap") {
t.Errorf("expected 'stale docmap' in stderr, got %q", stderr)
}
}
// stdinValidateDocmap runs runValidateDocmap with a synthetic stdin.
//
// Implementation note: we write stdinContent to a temp file and point
// os.Stdin at it. The defer f.Close() fires after stdinValidateDocmap
// returns, which is after runValidateDocmap has finished reading stdin
// synchronously — so the file is not closed while still in use.
// Tests must not call t.Parallel() while sharing the global os.Stdin.
func stdinValidateDocmap(t *testing.T, stdinContent string, args []string) (code int, stdout, stderr string) {
t.Helper()
// Write stdin content to a temp file and redirect os.Stdin.
f, err := os.CreateTemp(t.TempDir(), "stdin-*")
if err != nil {
t.Fatalf("CreateTemp for stdin: %v", err)
}
defer f.Close()
if _, err := f.WriteString(stdinContent); err != nil {
t.Fatalf("WriteString for stdin: %v", err)
}
if _, err := f.Seek(0, 0); err != nil {
t.Fatalf("Seek for stdin: %v", err)
}
origStdin := os.Stdin
os.Stdin = f
defer func() { os.Stdin = origStdin }()
stdout, stderr = captureOutput(func() {
code = runValidateDocmap(args)
})
return
}
func TestRunValidateDocmap_UncoveredFile(t *testing.T) {
dir := t.TempDir()
makeDocFile(t, dir, "docs/foo.md")
docmap := makeDocmapInDir(t, dir, `
mappings:
- paths:
- "lib/foo/**"
docs:
- docs/foo.md
`)
code, _, stderr := stdinValidateDocmap(t,
"lib/bar/uncovered.ex\n",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 1 {
t.Errorf("expected exit 1 for uncovered file, got %d", code)
}
if !strings.Contains(stderr, "lib/bar/uncovered.ex") {
t.Errorf("expected uncovered file in stderr, got %q", stderr)
}
if !strings.Contains(stderr, "no docmap coverage") {
t.Errorf("expected 'no docmap coverage' in stderr, got %q", stderr)
}
}
func TestRunValidateDocmap_BothFailures(t *testing.T) {
dir := t.TempDir()
// docs/foo.md intentionally missing
docmap := makeDocmapInDir(t, dir, `
mappings:
- paths:
- "lib/foo/**"
docs:
- docs/foo.md
`)
code, _, stderr := stdinValidateDocmap(t,
"lib/bar/uncovered.ex\n",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 1 {
t.Errorf("expected exit 1 for both failures, got %d", code)
}
if !strings.Contains(stderr, "no docmap coverage") {
t.Errorf("expected coverage error in stderr, got %q", stderr)
}
if !strings.Contains(stderr, "stale docmap") {
t.Errorf("expected stale-docs error in stderr, got %q", stderr)
}
}
func TestRunValidateDocmap_EmptyStdin(t *testing.T) {
dir := t.TempDir()
makeDocFile(t, dir, "docs/foo.md")
docmap := makeDocmapInDir(t, dir, `
mappings:
- paths:
- "lib/foo/**"
docs:
- docs/foo.md
`)
code, stdout, _ := stdinValidateDocmap(t,
"",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 0 {
t.Errorf("expected exit 0 for empty stdin, got %d", code)
}
if !strings.Contains(stdout, "OK") {
t.Errorf("expected 'OK' in stdout, got %q", stdout)
}
}
func TestRunValidateDocmap_BlankLinesSkipped(t *testing.T) {
dir := t.TempDir()
makeDocFile(t, dir, "docs/foo.md")
docmap := makeDocmapInDir(t, dir, `
mappings:
- paths:
- "lib/foo/**"
docs:
- docs/foo.md
`)
// stdin with only blank lines → effectively empty, should be clean
code, stdout, _ := stdinValidateDocmap(t,
"\n \n\n",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 0 {
t.Errorf("expected exit 0 for blank-only stdin, got %d", code)
}
if !strings.Contains(stdout, "OK") {
t.Errorf("expected 'OK' in stdout for blank-only stdin, got %q", stdout)
}
}
func TestRunValidateDocmap_DuplicateDocsDeduped(t *testing.T) {
dir := t.TempDir()
// docs/shared.md intentionally missing — but it appears in TWO mappings.
// Should appear only once in stale list.
docmap := makeDocmapInDir(t, dir, `
mappings:
- paths:
- "lib/foo/**"
docs:
- docs/shared.md
- paths:
- "lib/bar/**"
docs:
- docs/shared.md
`)
code, _, stderr := stdinValidateDocmap(t,
"",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 1 {
t.Errorf("expected exit 1 for stale doc, got %d", code)
}
count := strings.Count(stderr, "docs/shared.md")
if count != 1 {
t.Errorf("expected docs/shared.md to appear exactly once in stderr (deduplicated), got %d occurrences: %q", count, stderr)
}
}
// TestCheckStaleDocs_PathTraversal verifies that checkStaleDocs rejects
// traversal and absolute paths without touching the host filesystem.
func TestCheckStaleDocs_PathTraversal(t *testing.T) {
dir := t.TempDir()
// Baseline: a valid doc that exists.
makeDocFile(t, dir, "docs/valid.md")
tests := []struct {
name string
docPath string
wantStale bool
}{
{"dot-dot traversal", "../../etc/passwd", true},
{"dot-dot single", "../outside", true},
{"absolute path", "/etc/passwd", true},
{"valid present path", "docs/valid.md", false},
{"valid missing path", "docs/missing.md", true},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
docmap := makeDocmapInDir(t, dir, `
mappings:
- paths:
- "lib/**"
docs:
- `+tc.docPath+`
`)
code, _, stderr := stdinValidateDocmap(t,
"",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if tc.wantStale {
if code != 1 {
t.Errorf("path %q: expected exit 1 (stale/invalid), got %d; stderr: %q", tc.docPath, code, stderr)
}
} else {
if code != 0 {
t.Errorf("path %q: expected exit 0 (valid), got %d; stderr: %q", tc.docPath, code, stderr)
}
}
})
}
}
// TestCheckStaleDocs_SymlinkOutside verifies that a symlink under repoRoot
// pointing outside the repo is treated as stale (not followed).
func TestCheckStaleDocs_SymlinkOutside(t *testing.T) {
dir := t.TempDir()
// Create a symlink inside repoRoot pointing to a file outside the repo.
// We point at /etc/hostname (exists on Linux CI) but the test does not
// depend on that file existing — Lstat must reject the symlink itself.
linkPath := filepath.Join(dir, "docs", "secret.md")
if err := os.MkdirAll(filepath.Dir(linkPath), 0o755); err != nil {
t.Fatalf("MkdirAll: %v", err)
}
if err := os.Symlink("/etc/hostname", linkPath); err != nil {
t.Fatalf("Symlink: %v", err)
}
docmap := makeDocmapInDir(t, dir, `
mappings:
- paths:
- "lib/**"
docs:
- docs/secret.md
`)
code, _, stderr := stdinValidateDocmap(t,
"",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 1 {
t.Errorf("expected exit 1 for symlink doc, got %d; stderr: %q", code, stderr)
}
if !strings.Contains(stderr, "docs/secret.md") {
t.Errorf("expected stale path in stderr, got %q", stderr)
}
}
// TestCheckStaleDocs_SymlinkInsideRepo verifies that a symlink pointing to
// another file *within* the repo is also treated as stale. We refuse all
// symlinks regardless of target to keep the check simple and safe.
func TestCheckStaleDocs_SymlinkInsideRepo(t *testing.T) {
dir := t.TempDir()
// Real doc file.
makeDocFile(t, dir, "docs/real.md")
// Symlink inside repo pointing at the real file.
linkPath := filepath.Join(dir, "docs", "link.md")
if err := os.Symlink(filepath.Join(dir, "docs", "real.md"), linkPath); err != nil {
t.Fatalf("Symlink: %v", err)
}
docmap := makeDocmapInDir(t, dir, `
mappings:
- paths:
- "lib/**"
docs:
- docs/link.md
`)
code, _, stderr := stdinValidateDocmap(t,
"",
[]string{"--docmap", docmap, "--repo-root", dir},
)
if code != 1 {
t.Errorf("expected exit 1 for symlink doc (even intra-repo), got %d; stderr: %q", code, stderr)
}
}
// TestRunValidateDocmap_SymlinkRepoRoot verifies that a --repo-root that is
// itself a symlink to a valid directory resolves correctly.
func TestRunValidateDocmap_SymlinkRepoRoot(t *testing.T) {
realDir := t.TempDir()
makeDocFile(t, realDir, "docs/foo.md")
// Create a symlink pointing at realDir.
symlinkDir := filepath.Join(t.TempDir(), "link-root")
if err := os.Symlink(realDir, symlinkDir); err != nil {
t.Fatalf("Symlink: %v", err)
}
// Place the docmap inside realDir so it passes the confinement check.
// (symlinkDir resolves to realDir, so files inside realDir are also inside
// the resolved repo-root.)
docmap := makeDocmapInDir(t, realDir, `
mappings:
- paths:
- "lib/**"
docs:
- docs/foo.md
`)
// Using the symlinked repo-root: the real doc exists → should be clean.
code, stdout, stderr := stdinValidateDocmap(t,
"lib/foo.go\n",
[]string{"--docmap", docmap, "--repo-root", symlinkDir},
)
if code != 0 {
t.Errorf("expected exit 0 for symlinked repo-root with existing doc, got %d; stderr: %q", code, stderr)
}
if !strings.Contains(stdout, "OK") {
t.Errorf("expected 'OK' in stdout, got %q", stdout)
}
}
// TestValidateDocmapPath_Symlink verifies that --docmap pointing at a symlink
// is rejected before the file is read (prevents /dev/zero DOS or arbitrary
// host-file reads via PR-controlled symlinks).
func TestValidateDocmapPath_Symlink(t *testing.T) {
dir := t.TempDir()
// Create a real docmap file to serve as the symlink target.
realDocmap := makeDocmapInDir(t, dir, `
mappings:
- paths:
- "lib/**"
docs:
- docs/foo.md
`)
// Create a symlink inside dir pointing to the real docmap.
symlinkPath := filepath.Join(dir, ".review-bot", "doc-map-link.yml")
if err := os.Symlink(realDocmap, symlinkPath); err != nil {
t.Fatalf("Symlink: %v", err)
}
code, _, stderr := stdinValidateDocmap(t,
"",
[]string{"--docmap", symlinkPath, "--repo-root", dir},
)
if code != 2 {
t.Errorf("expected exit 2 for symlink docmap, got %d; stderr: %q", code, stderr)
}
if !strings.Contains(stderr, "symlink") && !strings.Contains(stderr, "invalid") {
t.Errorf("expected symlink rejection in stderr, got %q", stderr)
}
}
// TestValidateDocmapPath_OutsideRepoRoot verifies that --docmap pointing
// outside --repo-root is rejected (prevents reading arbitrary host files).
func TestValidateDocmapPath_OutsideRepoRoot(t *testing.T) {
repoDir := t.TempDir()
// Create a docmap in a separate temp dir (outside the repo root).
outside := makeDocmapYAML(t, `
mappings:
- paths:
- "lib/**"
docs:
- docs/foo.md
`)
code, _, stderr := stdinValidateDocmap(t,
"",
[]string{"--docmap", outside, "--repo-root", repoDir},
)
if code != 2 {
t.Errorf("expected exit 2 for docmap outside repo-root, got %d; stderr: %q", code, stderr)
}
if !strings.Contains(stderr, "invalid") && !strings.Contains(stderr, "repo-root") {
t.Errorf("expected confinement rejection in stderr, got %q", stderr)
}
}
// TestValidateDocmapPath_SizeLimit verifies that --docmap files exceeding
// maxDocmapBytes are rejected before reading (prevents memory exhaustion).
func TestValidateDocmapPath_SizeLimit(t *testing.T) {
dir := t.TempDir()
// Write a file larger than maxDocmapBytes.
bigPath := filepath.Join(dir, ".review-bot", "big-doc-map.yml")
if err := os.MkdirAll(filepath.Dir(bigPath), 0o755); err != nil {
t.Fatalf("MkdirAll: %v", err)
}
// Exceed the limit by one byte.
bigContent := make([]byte, maxDocmapBytes+1)
if err := os.WriteFile(bigPath, bigContent, 0o644); err != nil {
t.Fatalf("WriteFile: %v", err)
}
code, _, stderr := stdinValidateDocmap(t,
"",
[]string{"--docmap", bigPath, "--repo-root", dir},
)
if code != 2 {
t.Errorf("expected exit 2 for oversized docmap, got %d; stderr: %q", code, stderr)
}
if !strings.Contains(stderr, "limit") && !strings.Contains(stderr, "size") && !strings.Contains(stderr, "invalid") {
t.Errorf("expected size limit error in stderr, got %q", stderr)
}
}
+22 -6
View File
@@ -66,6 +66,18 @@ func ParseDocMapConfig(localPath string) (*DocMapConfig, error) {
return &cfg, nil return &cfg, nil
} }
// FileCoveredByDocMap reports whether at least one paths: glob in any mapping
// of cfg matches the given file path. It is used by static validation tooling
// (e.g. the validate-docmap subcommand) to check per-file docmap coverage.
func FileCoveredByDocMap(cfg *DocMapConfig, file string) bool {
for _, mapping := range cfg.Mappings {
if mappingMatches(mapping.Paths, []string{file}) {
return true
}
}
return false
}
// MatchDocs returns deduplicated doc paths for the given changed file paths. // MatchDocs returns deduplicated doc paths for the given changed file paths.
// A mapping matches if any of its path globs matches any of the changed files. // A mapping matches if any of its path globs matches any of the changed files.
func MatchDocs(cfg *DocMapConfig, changedFiles []string) []string { func MatchDocs(cfg *DocMapConfig, changedFiles []string) []string {
@@ -245,7 +257,7 @@ type docEntry struct {
// If the path is a directory, all .md files under it are returned. // If the path is a directory, all .md files under it are returned.
// If it's a file, a single entry is returned. // If it's a file, a single entry is returned.
func loadDocEntries(ctx context.Context, fetcher DocFetcher, owner, repo, docPath string) ([]docEntry, error) { func loadDocEntries(ctx context.Context, fetcher DocFetcher, owner, repo, docPath string) ([]docEntry, error) {
if err := validateDocPath(docPath); err != nil { if err := ValidateDocPath(docPath); err != nil {
return nil, fmt.Errorf("doc path %q rejected: %w", docPath, err) return nil, fmt.Errorf("doc path %q rejected: %w", docPath, err)
} }
@@ -298,11 +310,15 @@ func readFileBytes(path string) ([]byte, error) {
return os.ReadFile(path) return os.ReadFile(path)
} }
// validateDocPath rejects doc paths that could cause path traversal via the // ValidateDocPath rejects doc paths that could cause path traversal
// VCS API (absolute paths, any ".." segment). Defense-in-depth: the VCS API // (absolute paths, any ".." segment, backslashes). Defense-in-depth: callers
// should already scope paths to the repo, but we validate locally to avoid // must also confine the joined path to the repo root via filepath.Rel before
// any quirk in backend path handling. // any filesystem access. Backslashes are rejected explicitly to prevent
func validateDocPath(p string) error { // Windows platform edge cases.
func ValidateDocPath(p string) error {
if strings.Contains(p, "\\") {
return fmt.Errorf("backslashes not allowed in doc paths")
}
if filepath.IsAbs(p) { if filepath.IsAbs(p) {
return fmt.Errorf("absolute paths not allowed") return fmt.Errorf("absolute paths not allowed")
} }
+77 -3
View File
@@ -11,7 +11,7 @@ import (
// fakeDocFetcher is a mock DocFetcher for tests. // fakeDocFetcher is a mock DocFetcher for tests.
type fakeDocFetcher struct { type fakeDocFetcher struct {
files map[string]string // path -> content files map[string]string // path -> content
dirs map[string]map[string]string // dir path -> (file path -> content) dirs map[string]map[string]string // dir path -> (file path -> content)
} }
@@ -384,7 +384,7 @@ func TestValidateDocPath(t *testing.T) {
"a/b/c", "a/b/c",
} }
for _, p := range valid { for _, p := range valid {
if err := validateDocPath(p); err != nil { if err := ValidateDocPath(p); err != nil {
t.Errorf("expected valid path %q to pass, got error: %v", p, err) t.Errorf("expected valid path %q to pass, got error: %v", p, err)
} }
} }
@@ -395,9 +395,13 @@ func TestValidateDocPath(t *testing.T) {
"docs/../../../etc/passwd", "docs/../../../etc/passwd",
"../sibling-repo/file.md", "../sibling-repo/file.md",
"a/b/../c", "a/b/../c",
// Backslashes must be rejected (Finding #3 — Windows platform edge cases).
`docs\foo.md`,
`docs\..\secret`,
`\absolute`,
} }
for _, p := range invalid { for _, p := range invalid {
if err := validateDocPath(p); err == nil { if err := ValidateDocPath(p); err == nil {
t.Errorf("expected path %q to be rejected, but it was accepted", p) t.Errorf("expected path %q to be rejected, but it was accepted", p)
} }
} }
@@ -420,6 +424,27 @@ func TestLoadMatchingDocs_PathTraversalRejected(t *testing.T) {
} }
} }
// TestValidateDocPath_Backslash verifies that backslash-bearing paths are
// rejected to prevent Windows platform edge cases where a path separator
// could be normalised differently by the host OS or VCS backend.
func TestValidateDocPath_Backslash(t *testing.T) {
backslashPaths := []string{
`docs\foo.md`,
`docs\subdir\file.md`,
`\absolute`,
}
for _, p := range backslashPaths {
if err := ValidateDocPath(p); err == nil {
t.Errorf("expected backslash path %q to be rejected, but it was accepted", p)
}
}
// Sanity: forward-slash path must still be accepted.
if err := ValidateDocPath("docs/foo.md"); err != nil {
t.Errorf("expected forward-slash path to be accepted, got: %v", err)
}
}
// ============================================================ // ============================================================
// Helpers // Helpers
// ============================================================ // ============================================================
@@ -436,3 +461,52 @@ func writeTempYAML(t *testing.T, content string) string {
} }
return filepath.Clean(f.Name()) return filepath.Clean(f.Name())
} }
// ============================================================
// FileCoveredByDocMap
// ============================================================
func TestFileCoveredByDocMap(t *testing.T) {
cfg := &DocMapConfig{
Mappings: []DocMapping{
{
Paths: []string{"lib/foo/**", "lib/bar/*.go"},
Docs: []string{"docs/foo.md"},
},
{
Paths: []string{"cmd/**"},
Docs: []string{"docs/cmd.md"},
},
},
}
cases := []struct {
file string
covered bool
}{
{"lib/foo/baz.ex", true},
{"lib/foo/sub/deep.ex", true},
{"lib/bar/util.go", true},
{"lib/bar/sub/util.go", false}, // *.go only matches one level
{"cmd/main.go", true},
{"cmd/sub/main.go", true},
{"internal/secret.go", false},
{"", false},
}
for _, tc := range cases {
t.Run(tc.file, func(t *testing.T) {
got := FileCoveredByDocMap(cfg, tc.file)
if got != tc.covered {
t.Errorf("FileCoveredByDocMap(%q) = %v, want %v", tc.file, got, tc.covered)
}
})
}
}
func TestFileCoveredByDocMap_EmptyConfig(t *testing.T) {
cfg := &DocMapConfig{}
if FileCoveredByDocMap(cfg, "lib/foo/bar.go") {
t.Error("expected false for empty config, got true")
}
}