bacb25e029
PR Ready Gate / clear-labels (pull_request) Successful in 2s
CI / test (pull_request) Successful in 17s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 22s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 50s
CI / review (gpt-5, security, ., rodin/security-patterns, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 43s
"ERROR: stale docmap docs: entries" had a vestigial "docs:" fragment that reads awkwardly (looks like a YAML reference). Change to: "ERROR: stale docmap entries (paths do not exist):" Addresses NIT finding in review #4175.
285 lines
10 KiB
Go
285 lines
10 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"gitea.weiker.me/rodin/review-bot/review"
|
|
)
|
|
|
|
// maxDocmapBytes is the maximum size of the doc-map YAML file that will be
|
|
// read. Files larger than this are rejected before reading to prevent memory
|
|
// exhaustion from an oversized PR-controlled file.
|
|
const maxDocmapBytes int64 = 10 * 1024 * 1024 // 10 MB
|
|
|
|
// validateDocmapPath checks that localPath is safe to read as the doc-map
|
|
// file. It enforces three invariants before the file is opened:
|
|
//
|
|
// 1. The path resolves to a regular file within resolvedRoot (path
|
|
// confinement): prevents a PR-controlled --docmap from reading arbitrary
|
|
// host files via absolute paths or ".." traversal.
|
|
// 2. The path is not a symlink: prevents denial-of-service via /dev/zero or
|
|
// information disclosure via symlinks that point outside the workspace.
|
|
// 3. The file does not exceed maxDocmapBytes: prevents memory exhaustion
|
|
// from an oversized but legitimately committed doc-map file.
|
|
//
|
|
// resolvedRoot must already be an absolute, symlink-free path (obtained from
|
|
// filepath.Abs + filepath.EvalSymlinks).
|
|
func validateDocmapPath(localPath, resolvedRoot string) error {
|
|
// Resolve the docmap path to an absolute path.
|
|
absPath, err := filepath.Abs(localPath)
|
|
if err != nil {
|
|
return fmt.Errorf("cannot resolve path: %w", err)
|
|
}
|
|
|
|
// Resolve ALL symlink components, not just the final one.
|
|
// os.Lstat only avoids following the *final* path component; intermediate
|
|
// directory symlinks are still followed. EvalSymlinks resolves every
|
|
// component, closing the directory-symlink bypass: a PR that commits
|
|
// .review-bot/ as a directory symlink pointing outside the repo would
|
|
// otherwise pass the filepath.Rel confinement check because the textual
|
|
// path is inside the root while the actual destination is not.
|
|
resolvedPath, err := filepath.EvalSymlinks(absPath)
|
|
if err != nil {
|
|
return fmt.Errorf("cannot resolve path (symlink): %w", err)
|
|
}
|
|
|
|
// Lstat the resolved path — at this point resolvedPath is symlink-free, so
|
|
// ModeSymlink will never be set. We keep the check as defense-in-depth.
|
|
fi, err := os.Lstat(resolvedPath)
|
|
if err != nil {
|
|
return fmt.Errorf("cannot stat file: %w", err)
|
|
}
|
|
|
|
// Defense-in-depth: reject any remaining symlink indicator.
|
|
if fi.Mode()&os.ModeSymlink != 0 {
|
|
return fmt.Errorf("symlinks are not allowed")
|
|
}
|
|
|
|
// Reject anything that is not a regular file (directories, FIFOs, device
|
|
// nodes, etc.) — ParseDocMapConfig expects a plain YAML file and would
|
|
// produce a confusing error on non-regular entries.
|
|
if !fi.Mode().IsRegular() {
|
|
return fmt.Errorf("docmap must be a regular file")
|
|
}
|
|
|
|
// Confine to resolvedRoot: use the fully-resolved path so that a directory
|
|
// symlink inside the repo cannot carry the path outside the root.
|
|
rel, err := filepath.Rel(resolvedRoot, resolvedPath)
|
|
if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) {
|
|
return fmt.Errorf("path must be within --repo-root")
|
|
}
|
|
|
|
// Enforce size cap before reading to prevent memory exhaustion.
|
|
if fi.Size() > maxDocmapBytes {
|
|
return fmt.Errorf("file size %d bytes exceeds %d-byte limit", fi.Size(), maxDocmapBytes)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// runValidateDocmap implements the `review-bot validate-docmap` subcommand.
|
|
//
|
|
// It reads changed file paths from stdin (one per line, as produced by
|
|
// `git diff --name-only`), parses a doc-map YAML file, and performs two checks:
|
|
//
|
|
// 1. Coverage check: every changed file must be matched by at least one
|
|
// paths: glob in the docmap. Fails if any file is uncovered.
|
|
//
|
|
// 2. Stale-docs check: every docs: entry in the docmap must exist on disk
|
|
// (relative to --repo-root). Fails if any path is missing.
|
|
//
|
|
// Both checks always run — all failures are reported before exiting.
|
|
//
|
|
// Exit codes:
|
|
//
|
|
// 0 — clean (all files covered, all docs exist)
|
|
// 1 — one or more coverage or stale-doc failures
|
|
// 2 — usage error, missing flag, or YAML parse error
|
|
func runValidateDocmap(args []string) int {
|
|
fs := flag.NewFlagSet("validate-docmap", flag.ContinueOnError)
|
|
fs.SetOutput(errWriter)
|
|
|
|
docmapFlag := fs.String("docmap", "", "Path to doc-map YAML file (required)")
|
|
repoRootFlag := fs.String("repo-root", ".", "Repo root for resolving docs: paths (default: cwd)")
|
|
|
|
if err := fs.Parse(args); err != nil {
|
|
// flag.ContinueOnError already wrote the error to errWriter.
|
|
return 2
|
|
}
|
|
|
|
if *docmapFlag == "" {
|
|
fmt.Fprintln(errWriter, "Error: --docmap is required")
|
|
fmt.Fprintln(errWriter, "")
|
|
fmt.Fprintln(errWriter, "usage: review-bot validate-docmap --docmap <path> [--repo-root <dir>]")
|
|
fmt.Fprintln(errWriter, " Changed files are read from stdin, one per line.")
|
|
fmt.Fprintln(errWriter, " Example: git diff --name-only origin/main HEAD | review-bot validate-docmap --docmap .review-bot/doc-map.yml")
|
|
return 2
|
|
}
|
|
|
|
// Resolve repoRoot first — the docmap path is validated against it below.
|
|
// Use an absolute, symlink-free path so a symlinked --repo-root cannot
|
|
// bypass the escape guard in validateDocmapPath or checkStaleDocs.
|
|
absRoot, err := filepath.Abs(*repoRootFlag)
|
|
if err != nil {
|
|
fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err)
|
|
return 2
|
|
}
|
|
resolvedRoot, err := filepath.EvalSymlinks(absRoot)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
fmt.Fprintf(errWriter, "Error: --repo-root %q does not exist\n", *repoRootFlag)
|
|
} else {
|
|
fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err)
|
|
}
|
|
return 2
|
|
}
|
|
|
|
// Harden the docmap file path before reading it. The --docmap flag value
|
|
// may reference a PR-controlled file (e.g. .review-bot/doc-map.yml).
|
|
// Validate that it:
|
|
// 1. Resolves within resolvedRoot (prevent reading arbitrary host files).
|
|
// 2. Is not a symlink (prevent /dev/zero or symlink-based host probing).
|
|
// 3. Does not exceed maxDocmapBytes (prevent memory exhaustion from an
|
|
// oversized committed file).
|
|
if err := validateDocmapPath(*docmapFlag, resolvedRoot); err != nil {
|
|
fmt.Fprintf(errWriter, "Error: --docmap %q is invalid: %v\n", *docmapFlag, err)
|
|
return 2
|
|
}
|
|
|
|
// Parse docmap YAML.
|
|
cfg, err := review.ParseDocMapConfig(*docmapFlag)
|
|
if err != nil {
|
|
fmt.Fprintf(errWriter, "Error: failed to parse docmap %q: %v\n", *docmapFlag, err)
|
|
return 2
|
|
}
|
|
|
|
// Read changed files from stdin.
|
|
changedFiles, err := readLines(os.Stdin)
|
|
if err != nil {
|
|
fmt.Fprintf(errWriter, "Error: failed to read stdin: %v\n", err)
|
|
return 2
|
|
}
|
|
|
|
failed := false
|
|
|
|
// --- Check 1: Coverage ---
|
|
// Note: an empty docmap (no mappings) means every changed file is
|
|
// uncovered — there are no patterns to match against. This is intentional:
|
|
// if you declare a doc-map, every changed file must be accounted for.
|
|
// On empty stdin the check is vacuously true (no files to cover).
|
|
var uncovered []string
|
|
for _, f := range changedFiles {
|
|
// Normalize Windows-style backslashes to forward slashes so that
|
|
// changed-file paths from git on Windows match doc-map globs.
|
|
f = strings.ReplaceAll(f, "\\", "/")
|
|
// Strip a leading "./" emitted by non-git tools (e.g. `find`) so that
|
|
// paths like "./cmd/foo.go" match doc-map globs written as "cmd/**".
|
|
f = strings.TrimPrefix(f, "./")
|
|
if !review.FileCoveredByDocMap(cfg, f) {
|
|
uncovered = append(uncovered, f)
|
|
}
|
|
}
|
|
if len(uncovered) > 0 {
|
|
failed = true
|
|
fmt.Fprintln(errWriter, "ERROR: changed files with no docmap coverage:")
|
|
for _, f := range uncovered {
|
|
fmt.Fprintf(errWriter, " %s\n", f)
|
|
}
|
|
}
|
|
|
|
// --- Check 2: Stale docs ---
|
|
// checkStaleDocs validates each path before touching the filesystem; see
|
|
// its documentation for the path-traversal hardening applied.
|
|
staleDocs := checkStaleDocs(cfg, resolvedRoot)
|
|
if len(staleDocs) > 0 {
|
|
failed = true
|
|
fmt.Fprintln(errWriter, "ERROR: stale docmap entries (paths do not exist):")
|
|
for _, d := range staleDocs {
|
|
fmt.Fprintf(errWriter, " %s\n", d)
|
|
}
|
|
}
|
|
|
|
if failed {
|
|
return 1
|
|
}
|
|
|
|
fmt.Fprintln(outWriter, "OK: docmap is valid")
|
|
return 0
|
|
}
|
|
|
|
// checkStaleDocs returns deduplicated docs: entries that do not exist under
|
|
// repoRoot.
|
|
//
|
|
// Path-traversal hardening: each docPath is validated with
|
|
// review.ValidateDocPath (rejects absolute paths and ".." segments) and then
|
|
// confined to repoRoot via filepath.Clean + filepath.Rel before os.Lstat is
|
|
// called. Symlinks are treated as stale — a CI tool running against
|
|
// PR-controlled content must not follow symlinks that could probe arbitrary
|
|
// host paths. Paths that fail any check are treated as invalid (reported as
|
|
// stale) without following any symlinks.
|
|
func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
|
|
seen := make(map[string]struct{})
|
|
var stale []string
|
|
|
|
for _, mapping := range cfg.Mappings {
|
|
for _, docPath := range mapping.Docs {
|
|
if docPath == "" {
|
|
continue
|
|
}
|
|
if _, ok := seen[docPath]; ok {
|
|
continue
|
|
}
|
|
seen[docPath] = struct{}{}
|
|
|
|
// Guard 1: reject absolute paths and ".." segments sourced from
|
|
// PR-controlled YAML before joining with repoRoot.
|
|
if err := review.ValidateDocPath(docPath); err != nil {
|
|
stale = append(stale, docPath)
|
|
continue
|
|
}
|
|
|
|
// Guard 2: verify the cleaned joined path does not escape repoRoot.
|
|
// filepath.Clean resolves any remaining ".." after the join; the
|
|
// filepath.Rel check confirms the path is still under repoRoot.
|
|
fullPath := filepath.Clean(filepath.Join(repoRoot, filepath.FromSlash(docPath)))
|
|
rel, err := filepath.Rel(repoRoot, fullPath)
|
|
if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) {
|
|
stale = append(stale, docPath)
|
|
continue
|
|
}
|
|
|
|
// Use Lstat (not Stat) so symlinks are never followed. A symlink
|
|
// under repoRoot could point anywhere on the host, allowing a
|
|
// malicious PR to probe file existence. Treat symlinks as stale.
|
|
fi, err := os.Lstat(fullPath)
|
|
if err != nil {
|
|
stale = append(stale, docPath)
|
|
continue
|
|
}
|
|
if fi.Mode()&os.ModeSymlink != 0 {
|
|
stale = append(stale, docPath)
|
|
}
|
|
}
|
|
}
|
|
return stale
|
|
}
|
|
|
|
// readLines reads all non-empty trimmed lines from r.
|
|
func readLines(r io.Reader) ([]string, error) {
|
|
scanner := bufio.NewScanner(r)
|
|
var lines []string
|
|
for scanner.Scan() {
|
|
line := strings.TrimSpace(scanner.Text())
|
|
if line != "" {
|
|
lines = append(lines, line)
|
|
}
|
|
}
|
|
return lines, scanner.Err()
|
|
}
|