c76e7dcd2e
After getting the resolved path from validateDocmapPath, Lstat the path
immediately before os.Open, then compare with f.Stat() after open using
os.SameFile. If the file was swapped between validation and open (e.g.,
replaced with a symlink pointing outside the repo), the inode comparison
catches it and returns an error.
Also changes defer f.Close() // nolint:errcheck to
defer func() { _ = f.Close() }() to follow the project convention of
explicit ignores over suppressor comments.
Addresses security bot finding (review 4812) against d6bab7a9.
324 lines
12 KiB
Go
324 lines
12 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"gitea.weiker.me/rodin/review-bot/review"
|
|
)
|
|
|
|
// maxDocmapBytes is the maximum size of the doc-map YAML file that will be
|
|
// read. Files larger than this are rejected before reading to prevent memory
|
|
// exhaustion from an oversized PR-controlled file.
|
|
const maxDocmapBytes int64 = 10 * 1024 * 1024 // 10 MB
|
|
|
|
// validateDocmapPath checks that localPath is safe to read as the doc-map
|
|
// file. It enforces three invariants before the file is opened:
|
|
//
|
|
// 1. The path resolves to a regular file within resolvedRoot (path
|
|
// confinement): prevents a PR-controlled --docmap from reading arbitrary
|
|
// host files via absolute paths or ".." traversal.
|
|
// 2. The resolved path is within resolvedRoot: in-repo file-level symlinks
|
|
// are allowed when their resolved target is still inside the root;
|
|
// symlinks that escape the root are rejected by the confinement check.
|
|
// 3. The file does not exceed maxDocmapBytes: prevents memory exhaustion
|
|
// from an oversized but legitimately committed doc-map file.
|
|
//
|
|
// resolvedRoot must already be an absolute, symlink-free path (obtained from
|
|
// filepath.Abs + filepath.EvalSymlinks).
|
|
func validateDocmapPath(localPath, resolvedRoot string) (string, error) {
|
|
// Resolve the docmap path to an absolute path.
|
|
absPath, err := filepath.Abs(localPath)
|
|
if err != nil {
|
|
return "", fmt.Errorf("cannot resolve path: %w", err)
|
|
}
|
|
|
|
// Resolve ALL symlink components, not just the final one.
|
|
// os.Lstat only avoids following the *final* path component; intermediate
|
|
// directory symlinks are still followed. EvalSymlinks resolves every
|
|
// component, closing the directory-symlink bypass: a PR that commits
|
|
// .review-bot/ as a directory symlink pointing outside the repo would
|
|
// otherwise pass the filepath.Rel confinement check because the textual
|
|
// path is inside the root while the actual destination is not.
|
|
resolvedPath, err := filepath.EvalSymlinks(absPath)
|
|
if err != nil {
|
|
return "", fmt.Errorf("cannot resolve path (symlink): %w", err)
|
|
}
|
|
|
|
// Lstat the resolved path for size and existence checks — EvalSymlinks
|
|
// guarantees no symlink components remain, so ModeSymlink can never be set.
|
|
fi, err := os.Lstat(resolvedPath)
|
|
if err != nil {
|
|
return "", fmt.Errorf("cannot stat file: %w", err)
|
|
}
|
|
|
|
// Reject anything that is not a regular file (directories, FIFOs, device
|
|
// nodes, etc.) — ParseDocMapConfig expects a plain YAML file and would
|
|
// produce a confusing error on non-regular entries.
|
|
if !fi.Mode().IsRegular() {
|
|
return "", fmt.Errorf("docmap must be a regular file")
|
|
}
|
|
|
|
// Confine to resolvedRoot: use the fully-resolved path so that a directory
|
|
// symlink inside the repo cannot carry the path outside the root.
|
|
rel, err := filepath.Rel(resolvedRoot, resolvedPath)
|
|
if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) {
|
|
return "", fmt.Errorf("path must be within --repo-root")
|
|
}
|
|
|
|
// Enforce size cap before reading to prevent memory exhaustion.
|
|
if fi.Size() > maxDocmapBytes {
|
|
return "", fmt.Errorf("file size %d bytes exceeds %d-byte limit", fi.Size(), maxDocmapBytes)
|
|
}
|
|
|
|
return resolvedPath, nil
|
|
}
|
|
|
|
// runValidateDocmap implements the `review-bot validate-docmap` subcommand.
|
|
//
|
|
// It reads changed file paths from stdin (one per line, as produced by
|
|
// `git diff --name-only`), parses a doc-map YAML file, and performs two checks:
|
|
//
|
|
// 1. Coverage check: every changed file must be matched by at least one
|
|
// paths: glob in the docmap. Fails if any file is uncovered.
|
|
//
|
|
// 2. Stale-docs check: every docs: entry in the docmap must exist on disk
|
|
// (relative to --repo-root). Fails if any path is missing.
|
|
//
|
|
// Both checks always run — all failures are reported before exiting.
|
|
//
|
|
// Exit codes:
|
|
//
|
|
// 0 — clean (all files covered, all docs exist)
|
|
// 1 — one or more coverage or stale-doc failures
|
|
// 2 — usage error, missing flag, or YAML parse error
|
|
func runValidateDocmap(args []string) int {
|
|
fs := flag.NewFlagSet("validate-docmap", flag.ContinueOnError)
|
|
fs.SetOutput(errWriter)
|
|
|
|
docmapFlag := fs.String("docmap", "", "Path to doc-map YAML file (required)")
|
|
repoRootFlag := fs.String("repo-root", ".", "Repo root for resolving docs: paths (default: cwd)")
|
|
|
|
if err := fs.Parse(args); err != nil {
|
|
// flag.ContinueOnError already wrote the error to errWriter.
|
|
return 2
|
|
}
|
|
|
|
if *docmapFlag == "" {
|
|
fmt.Fprintln(errWriter, "Error: --docmap is required")
|
|
fmt.Fprintln(errWriter, "")
|
|
fmt.Fprintln(errWriter, "usage: review-bot validate-docmap --docmap <path> [--repo-root <dir>]")
|
|
fmt.Fprintln(errWriter, " Changed files are read from stdin, one per line.")
|
|
fmt.Fprintln(errWriter, " Example: git diff --name-only origin/main HEAD | review-bot validate-docmap --docmap .review-bot/doc-map.yml")
|
|
return 2
|
|
}
|
|
|
|
// Resolve repoRoot first — the docmap path is validated against it below.
|
|
// Use an absolute, symlink-free path so a symlinked --repo-root cannot
|
|
// bypass the escape guard in validateDocmapPath or checkStaleDocs.
|
|
absRoot, err := filepath.Abs(*repoRootFlag)
|
|
if err != nil {
|
|
fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err)
|
|
return 2
|
|
}
|
|
resolvedRoot, err := filepath.EvalSymlinks(absRoot)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
fmt.Fprintf(errWriter, "Error: --repo-root %q does not exist\n", *repoRootFlag)
|
|
} else {
|
|
fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err)
|
|
}
|
|
return 2
|
|
}
|
|
|
|
// Harden the docmap file path before reading it. The --docmap flag value
|
|
// may reference a PR-controlled file (e.g. .review-bot/doc-map.yml).
|
|
// Validate that it:
|
|
// 1. Resolves within resolvedRoot (prevent reading arbitrary host files).
|
|
// 2. Resolved target stays within the root (in-repo symlinks are allowed
|
|
// if they resolve to a path inside the root).
|
|
// 3. Does not exceed maxDocmapBytes (prevent memory exhaustion from an
|
|
// oversized committed file).
|
|
// validateDocmapPath returns the resolved path; use it directly to
|
|
// eliminate any TOCTOU race between validation and use.
|
|
resolvedDocmap, err := validateDocmapPath(*docmapFlag, resolvedRoot)
|
|
if err != nil {
|
|
fmt.Fprintf(errWriter, "Error: --docmap %q is invalid: %v\n", *docmapFlag, err)
|
|
return 2
|
|
}
|
|
|
|
// Open and read the docmap with a LimitedReader — closes the residual TOCTOU
|
|
// window between the Lstat size check in validateDocmapPath and the file open
|
|
// here. The limit is maxDocmapBytes+1 so we can detect a file that grew past
|
|
// the cap after the stat without reading unbounded bytes.
|
|
//
|
|
// Defense-in-depth: stat the path immediately before and after open so we can
|
|
// detect a file swap between validateDocmapPath's validation and this open via
|
|
// os.SameFile. An attacker with workspace write access could otherwise replace
|
|
// the validated file with a symlink in the gap between validation and use.
|
|
preStat, err := os.Lstat(resolvedDocmap)
|
|
if err != nil {
|
|
fmt.Fprintf(errWriter, "Error: failed to stat docmap before open %q: %v\n", *docmapFlag, err)
|
|
return 2
|
|
}
|
|
f, err := os.Open(resolvedDocmap)
|
|
if err != nil {
|
|
fmt.Fprintf(errWriter, "Error: failed to open docmap %q: %v\n", *docmapFlag, err)
|
|
return 2
|
|
}
|
|
defer func() { _ = f.Close() }()
|
|
// Verify we opened the same file that was validated — rejects a swap between
|
|
// the pre-open Lstat and the open call.
|
|
postStat, err := f.Stat()
|
|
if err != nil {
|
|
fmt.Fprintf(errWriter, "Error: failed to stat open docmap %q: %v\n", *docmapFlag, err)
|
|
return 2
|
|
}
|
|
if !os.SameFile(preStat, postStat) {
|
|
fmt.Fprintf(errWriter, "Error: --docmap %q changed between validation and open\n", *docmapFlag)
|
|
return 2
|
|
}
|
|
docmapData, err := io.ReadAll(io.LimitReader(f, maxDocmapBytes+1))
|
|
if err != nil {
|
|
fmt.Fprintf(errWriter, "Error: failed to read docmap %q: %v\n", *docmapFlag, err)
|
|
return 2
|
|
}
|
|
if int64(len(docmapData)) > maxDocmapBytes {
|
|
fmt.Fprintf(errWriter, "Error: --docmap %q exceeded %d-byte limit after open\n", *docmapFlag, maxDocmapBytes)
|
|
return 2
|
|
}
|
|
cfg, err := review.ParseDocMapConfigContent(string(docmapData), *docmapFlag)
|
|
if err != nil {
|
|
fmt.Fprintf(errWriter, "Error: failed to parse docmap %q: %v\n", *docmapFlag, err)
|
|
return 2
|
|
}
|
|
|
|
// Read changed files from stdin.
|
|
changedFiles, err := readLines(os.Stdin)
|
|
if err != nil {
|
|
fmt.Fprintf(errWriter, "Error: failed to read stdin: %v\n", err)
|
|
return 2
|
|
}
|
|
|
|
failed := false
|
|
|
|
// --- Check 1: Coverage ---
|
|
// Note: an empty docmap (no mappings) means every changed file is
|
|
// uncovered — there are no patterns to match against. This is intentional:
|
|
// if you declare a doc-map, every changed file must be accounted for.
|
|
// On empty stdin the check is vacuously true (no files to cover).
|
|
var uncovered []string
|
|
for _, f := range changedFiles {
|
|
// Normalize Windows-style backslashes to forward slashes so that
|
|
// changed-file paths from git on Windows match doc-map globs.
|
|
f = strings.ReplaceAll(f, "\\", "/")
|
|
// Strip a leading "./" emitted by non-git tools (e.g. `find`) so that
|
|
// paths like "./cmd/foo.go" match doc-map globs written as "cmd/**".
|
|
f = strings.TrimPrefix(f, "./")
|
|
if !review.FileCoveredByDocMap(cfg, f) {
|
|
uncovered = append(uncovered, f)
|
|
}
|
|
}
|
|
if len(uncovered) > 0 {
|
|
failed = true
|
|
fmt.Fprintln(errWriter, "ERROR: changed files with no docmap coverage:")
|
|
for _, f := range uncovered {
|
|
fmt.Fprintf(errWriter, " %s\n", f)
|
|
}
|
|
}
|
|
|
|
// --- Check 2: Stale docs ---
|
|
// checkStaleDocs validates each path before touching the filesystem; see
|
|
// its documentation for the path-traversal hardening applied.
|
|
staleDocs := checkStaleDocs(cfg, resolvedRoot)
|
|
if len(staleDocs) > 0 {
|
|
failed = true
|
|
fmt.Fprintln(errWriter, "ERROR: stale docmap entries (paths do not exist):")
|
|
for _, d := range staleDocs {
|
|
fmt.Fprintf(errWriter, " %s\n", d)
|
|
}
|
|
}
|
|
|
|
if failed {
|
|
return 1
|
|
}
|
|
|
|
fmt.Fprintln(outWriter, "OK: docmap is valid")
|
|
return 0
|
|
}
|
|
|
|
// checkStaleDocs returns deduplicated docs: entries that do not exist under
|
|
// repoRoot.
|
|
//
|
|
// Path-traversal hardening: each docPath is validated with
|
|
// review.ValidateDocPath (rejects absolute paths and ".." segments) and then
|
|
// confined to repoRoot via filepath.Clean + filepath.Rel before os.Lstat is
|
|
// called. Symlinks are treated as stale — a CI tool running against
|
|
// PR-controlled content must not follow symlinks that could probe arbitrary
|
|
// host paths. Paths that fail any check are treated as invalid (reported as
|
|
// stale) without following any symlinks.
|
|
func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
|
|
seen := make(map[string]struct{})
|
|
var stale []string
|
|
|
|
for _, mapping := range cfg.Mappings {
|
|
for _, docPath := range mapping.Docs {
|
|
if docPath == "" {
|
|
continue
|
|
}
|
|
if _, ok := seen[docPath]; ok {
|
|
continue
|
|
}
|
|
seen[docPath] = struct{}{}
|
|
|
|
// Guard 1: reject absolute paths and ".." segments sourced from
|
|
// PR-controlled YAML before joining with repoRoot.
|
|
if err := review.ValidateDocPath(docPath); err != nil {
|
|
stale = append(stale, docPath)
|
|
continue
|
|
}
|
|
|
|
// Guard 2: verify the cleaned joined path does not escape repoRoot.
|
|
// filepath.Clean resolves any remaining ".." after the join; the
|
|
// filepath.Rel check confirms the path is still under repoRoot.
|
|
fullPath := filepath.Clean(filepath.Join(repoRoot, filepath.FromSlash(docPath)))
|
|
rel, err := filepath.Rel(repoRoot, fullPath)
|
|
if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) {
|
|
stale = append(stale, docPath)
|
|
continue
|
|
}
|
|
|
|
// Use Lstat (not Stat) so symlinks are never followed. A symlink
|
|
// under repoRoot could point anywhere on the host, allowing a
|
|
// malicious PR to probe file existence. Treat symlinks as stale.
|
|
fi, err := os.Lstat(fullPath)
|
|
if err != nil {
|
|
stale = append(stale, docPath)
|
|
continue
|
|
}
|
|
if fi.Mode()&os.ModeSymlink != 0 {
|
|
stale = append(stale, docPath)
|
|
}
|
|
}
|
|
}
|
|
return stale
|
|
}
|
|
|
|
// readLines reads all non-empty trimmed lines from r.
|
|
func readLines(r io.Reader) ([]string, error) {
|
|
scanner := bufio.NewScanner(r)
|
|
var lines []string
|
|
for scanner.Scan() {
|
|
line := strings.TrimSpace(scanner.Text())
|
|
if line != "" {
|
|
lines = append(lines, line)
|
|
}
|
|
}
|
|
return lines, scanner.Err()
|
|
}
|