4dce8e4454
PR Ready Gate / clear-labels (pull_request) Successful in 2s
CI / test (pull_request) Successful in 17s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 39s
CI / review (gpt-5, security, ., rodin/security-patterns, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 1m3s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 1m16s
The previous implementation called os.Lstat(absPath) which only avoids following the *final* path component. A PR committing .review-bot/ as a directory symlink pointing outside the repo would pass the filepath.Rel confinement check because the textual path was inside the root while the resolved destination was not. Fix: call filepath.EvalSymlinks after filepath.Abs to resolve ALL symlink components before the confinement check. If EvalSymlinks fails (dangling symlink, nonexistent target) the path is rejected. The filepath.Rel check then operates on the fully-resolved path. Semantic change: file-level in-repo symlinks (target also within root) are now allowed — the invariant is about where the content lives, not whether the entry is a symlink. The test TestValidateDocmapPath_Symlink is updated to test an out-of-repo symlink target, which must still be rejected. Tests: - TestValidateDocmapPath_DirSymlinkBypass: reproduces the attack vector (dir symlink bypassing textual confinement check) and verifies it is now rejected - TestValidateDocmapPath_Symlink: updated to test out-of-repo symlink Coverage: 54.0%
275 lines
9.8 KiB
Go
275 lines
9.8 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"gitea.weiker.me/rodin/review-bot/review"
|
|
)
|
|
|
|
// maxDocmapBytes is the maximum size of the doc-map YAML file that will be
|
|
// read. Files larger than this are rejected before reading to prevent memory
|
|
// exhaustion from an oversized PR-controlled file.
|
|
const maxDocmapBytes int64 = 10 * 1024 * 1024 // 10 MB
|
|
|
|
// validateDocmapPath checks that localPath is safe to read as the doc-map
|
|
// file. It enforces three invariants before the file is opened:
|
|
//
|
|
// 1. The path resolves to a regular file within resolvedRoot (path
|
|
// confinement): prevents a PR-controlled --docmap from reading arbitrary
|
|
// host files via absolute paths or ".." traversal.
|
|
// 2. The path is not a symlink: prevents denial-of-service via /dev/zero or
|
|
// information disclosure via symlinks that point outside the workspace.
|
|
// 3. The file does not exceed maxDocmapBytes: prevents memory exhaustion
|
|
// from an oversized but legitimately committed doc-map file.
|
|
//
|
|
// resolvedRoot must already be an absolute, symlink-free path (obtained from
|
|
// filepath.Abs + filepath.EvalSymlinks).
|
|
func validateDocmapPath(localPath, resolvedRoot string) error {
|
|
// Resolve the docmap path to an absolute path.
|
|
absPath, err := filepath.Abs(localPath)
|
|
if err != nil {
|
|
return fmt.Errorf("cannot resolve path: %w", err)
|
|
}
|
|
|
|
// Resolve ALL symlink components, not just the final one.
|
|
// os.Lstat only avoids following the *final* path component; intermediate
|
|
// directory symlinks are still followed. EvalSymlinks resolves every
|
|
// component, closing the directory-symlink bypass: a PR that commits
|
|
// .review-bot/ as a directory symlink pointing outside the repo would
|
|
// otherwise pass the filepath.Rel confinement check because the textual
|
|
// path is inside the root while the actual destination is not.
|
|
resolvedPath, err := filepath.EvalSymlinks(absPath)
|
|
if err != nil {
|
|
return fmt.Errorf("cannot resolve path (symlink): %w", err)
|
|
}
|
|
|
|
// Lstat the resolved path — at this point resolvedPath is symlink-free, so
|
|
// ModeSymlink will never be set. We keep the check as defense-in-depth.
|
|
fi, err := os.Lstat(resolvedPath)
|
|
if err != nil {
|
|
return fmt.Errorf("cannot stat file: %w", err)
|
|
}
|
|
|
|
// Defense-in-depth: reject any remaining symlink indicator.
|
|
if fi.Mode()&os.ModeSymlink != 0 {
|
|
return fmt.Errorf("symlinks are not allowed")
|
|
}
|
|
|
|
// Confine to resolvedRoot: use the fully-resolved path so that a directory
|
|
// symlink inside the repo cannot carry the path outside the root.
|
|
rel, err := filepath.Rel(resolvedRoot, resolvedPath)
|
|
if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) {
|
|
return fmt.Errorf("path must be within --repo-root")
|
|
}
|
|
|
|
// Enforce size cap before reading to prevent memory exhaustion.
|
|
if fi.Size() > maxDocmapBytes {
|
|
return fmt.Errorf("file size %d bytes exceeds %d-byte limit", fi.Size(), maxDocmapBytes)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// runValidateDocmap implements the `review-bot validate-docmap` subcommand.
|
|
//
|
|
// It reads changed file paths from stdin (one per line, as produced by
|
|
// `git diff --name-only`), parses a doc-map YAML file, and performs two checks:
|
|
//
|
|
// 1. Coverage check: every changed file must be matched by at least one
|
|
// paths: glob in the docmap. Fails if any file is uncovered.
|
|
//
|
|
// 2. Stale-docs check: every docs: entry in the docmap must exist on disk
|
|
// (relative to --repo-root). Fails if any path is missing.
|
|
//
|
|
// Both checks always run — all failures are reported before exiting.
|
|
//
|
|
// Exit codes:
|
|
//
|
|
// 0 — clean (all files covered, all docs exist)
|
|
// 1 — one or more coverage or stale-doc failures
|
|
// 2 — usage error, missing flag, or YAML parse error
|
|
func runValidateDocmap(args []string) int {
|
|
fs := flag.NewFlagSet("validate-docmap", flag.ContinueOnError)
|
|
fs.SetOutput(errWriter)
|
|
|
|
docmapFlag := fs.String("docmap", "", "Path to doc-map YAML file (required)")
|
|
repoRootFlag := fs.String("repo-root", ".", "Repo root for resolving docs: paths (default: cwd)")
|
|
|
|
if err := fs.Parse(args); err != nil {
|
|
// flag.ContinueOnError already wrote the error to errWriter.
|
|
return 2
|
|
}
|
|
|
|
if *docmapFlag == "" {
|
|
fmt.Fprintln(errWriter, "Error: --docmap is required")
|
|
fmt.Fprintln(errWriter, "")
|
|
fmt.Fprintln(errWriter, "usage: review-bot validate-docmap --docmap <path> [--repo-root <dir>]")
|
|
fmt.Fprintln(errWriter, " Changed files are read from stdin, one per line.")
|
|
fmt.Fprintln(errWriter, " Example: git diff --name-only origin/main HEAD | review-bot validate-docmap --docmap .review-bot/doc-map.yml")
|
|
return 2
|
|
}
|
|
|
|
// Resolve repoRoot first — the docmap path is validated against it below.
|
|
// Use an absolute, symlink-free path so a symlinked --repo-root cannot
|
|
// bypass the escape guard in validateDocmapPath or checkStaleDocs.
|
|
absRoot, err := filepath.Abs(*repoRootFlag)
|
|
if err != nil {
|
|
fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err)
|
|
return 2
|
|
}
|
|
resolvedRoot, err := filepath.EvalSymlinks(absRoot)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
fmt.Fprintf(errWriter, "Error: --repo-root %q does not exist\n", *repoRootFlag)
|
|
} else {
|
|
fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err)
|
|
}
|
|
return 2
|
|
}
|
|
|
|
// Harden the docmap file path before reading it. The --docmap flag value
|
|
// may reference a PR-controlled file (e.g. .review-bot/doc-map.yml).
|
|
// Validate that it:
|
|
// 1. Resolves within resolvedRoot (prevent reading arbitrary host files).
|
|
// 2. Is not a symlink (prevent /dev/zero or symlink-based host probing).
|
|
// 3. Does not exceed maxDocmapBytes (prevent memory exhaustion from an
|
|
// oversized committed file).
|
|
if err := validateDocmapPath(*docmapFlag, resolvedRoot); err != nil {
|
|
fmt.Fprintf(errWriter, "Error: --docmap %q is invalid: %v\n", *docmapFlag, err)
|
|
return 2
|
|
}
|
|
|
|
// Parse docmap YAML.
|
|
cfg, err := review.ParseDocMapConfig(*docmapFlag)
|
|
if err != nil {
|
|
fmt.Fprintf(errWriter, "Error: failed to parse docmap %q: %v\n", *docmapFlag, err)
|
|
return 2
|
|
}
|
|
|
|
// Read changed files from stdin.
|
|
changedFiles, err := readLines(os.Stdin)
|
|
if err != nil {
|
|
fmt.Fprintf(errWriter, "Error: failed to read stdin: %v\n", err)
|
|
return 2
|
|
}
|
|
|
|
failed := false
|
|
|
|
// --- Check 1: Coverage ---
|
|
// Note: an empty docmap (no mappings) means every changed file is
|
|
// uncovered — there are no patterns to match against. This is intentional:
|
|
// if you declare a doc-map, every changed file must be accounted for.
|
|
// On empty stdin the check is vacuously true (no files to cover).
|
|
var uncovered []string
|
|
for _, f := range changedFiles {
|
|
// Normalize Windows-style backslashes to forward slashes so that
|
|
// changed-file paths from git on Windows match doc-map globs.
|
|
f = strings.ReplaceAll(f, "\\", "/")
|
|
if !review.FileCoveredByDocMap(cfg, f) {
|
|
uncovered = append(uncovered, f)
|
|
}
|
|
}
|
|
if len(uncovered) > 0 {
|
|
failed = true
|
|
fmt.Fprintln(errWriter, "ERROR: changed files with no docmap coverage:")
|
|
for _, f := range uncovered {
|
|
fmt.Fprintf(errWriter, " %s\n", f)
|
|
}
|
|
}
|
|
|
|
// --- Check 2: Stale docs ---
|
|
// checkStaleDocs validates each path before touching the filesystem; see
|
|
// its documentation for the path-traversal hardening applied.
|
|
staleDocs := checkStaleDocs(cfg, resolvedRoot)
|
|
if len(staleDocs) > 0 {
|
|
failed = true
|
|
fmt.Fprintln(errWriter, "ERROR: stale docmap docs: entries (paths do not exist):")
|
|
for _, d := range staleDocs {
|
|
fmt.Fprintf(errWriter, " %s\n", d)
|
|
}
|
|
}
|
|
|
|
if failed {
|
|
return 1
|
|
}
|
|
|
|
fmt.Fprintln(outWriter, "OK: docmap is valid")
|
|
return 0
|
|
}
|
|
|
|
// checkStaleDocs returns deduplicated docs: entries that do not exist under
|
|
// repoRoot.
|
|
//
|
|
// Path-traversal hardening: each docPath is validated with
|
|
// review.ValidateDocPath (rejects absolute paths and ".." segments) and then
|
|
// confined to repoRoot via filepath.Clean + filepath.Rel before os.Lstat is
|
|
// called. Symlinks are treated as stale — a CI tool running against
|
|
// PR-controlled content must not follow symlinks that could probe arbitrary
|
|
// host paths. Paths that fail any check are treated as invalid (reported as
|
|
// stale) without following any symlinks.
|
|
func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string {
|
|
seen := make(map[string]struct{})
|
|
var stale []string
|
|
|
|
for _, mapping := range cfg.Mappings {
|
|
for _, docPath := range mapping.Docs {
|
|
if docPath == "" {
|
|
continue
|
|
}
|
|
if _, ok := seen[docPath]; ok {
|
|
continue
|
|
}
|
|
seen[docPath] = struct{}{}
|
|
|
|
// Guard 1: reject absolute paths and ".." segments sourced from
|
|
// PR-controlled YAML before joining with repoRoot.
|
|
if err := review.ValidateDocPath(docPath); err != nil {
|
|
stale = append(stale, docPath)
|
|
continue
|
|
}
|
|
|
|
// Guard 2: verify the cleaned joined path does not escape repoRoot.
|
|
// filepath.Clean resolves any remaining ".." after the join; the
|
|
// filepath.Rel check confirms the path is still under repoRoot.
|
|
fullPath := filepath.Clean(filepath.Join(repoRoot, filepath.FromSlash(docPath)))
|
|
rel, err := filepath.Rel(repoRoot, fullPath)
|
|
if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) {
|
|
stale = append(stale, docPath)
|
|
continue
|
|
}
|
|
|
|
// Use Lstat (not Stat) so symlinks are never followed. A symlink
|
|
// under repoRoot could point anywhere on the host, allowing a
|
|
// malicious PR to probe file existence. Treat symlinks as stale.
|
|
fi, err := os.Lstat(fullPath)
|
|
if err != nil {
|
|
stale = append(stale, docPath)
|
|
continue
|
|
}
|
|
if fi.Mode()&os.ModeSymlink != 0 {
|
|
stale = append(stale, docPath)
|
|
}
|
|
}
|
|
}
|
|
return stale
|
|
}
|
|
|
|
// readLines reads all non-empty trimmed lines from r.
|
|
func readLines(r io.Reader) ([]string, error) {
|
|
scanner := bufio.NewScanner(r)
|
|
var lines []string
|
|
for scanner.Scan() {
|
|
line := strings.TrimSpace(scanner.Text())
|
|
if line != "" {
|
|
lines = append(lines, line)
|
|
}
|
|
}
|
|
return lines, scanner.Err()
|
|
}
|