package main import ( "bufio" "flag" "fmt" "io" "os" "path/filepath" "strings" "gitea.weiker.me/rodin/review-bot/review" ) // maxDocmapBytes is the maximum size of the doc-map YAML file that will be // read. Files larger than this are rejected before reading to prevent memory // exhaustion from an oversized PR-controlled file. const maxDocmapBytes int64 = 10 * 1024 * 1024 // 10 MB // validateDocmapPath checks that localPath is safe to read as the doc-map // file. It enforces three invariants before the file is opened: // // 1. The path resolves to a regular file within resolvedRoot (path // confinement): prevents a PR-controlled --docmap from reading arbitrary // host files via absolute paths or ".." traversal. // 2. The resolved path is within resolvedRoot: in-repo file-level symlinks // are allowed when their resolved target is still inside the root; // symlinks that escape the root are rejected by the confinement check. // 3. The file does not exceed maxDocmapBytes: prevents memory exhaustion // from an oversized but legitimately committed doc-map file. // // resolvedRoot must already be an absolute, symlink-free path (obtained from // filepath.Abs + filepath.EvalSymlinks). func validateDocmapPath(localPath, resolvedRoot string) (string, error) { // Resolve the docmap path to an absolute path. absPath, err := filepath.Abs(localPath) if err != nil { return "", fmt.Errorf("cannot resolve path: %w", err) } // Resolve ALL symlink components, not just the final one. // os.Lstat only avoids following the *final* path component; intermediate // directory symlinks are still followed. EvalSymlinks resolves every // component, closing the directory-symlink bypass: a PR that commits // .review-bot/ as a directory symlink pointing outside the repo would // otherwise pass the filepath.Rel confinement check because the textual // path is inside the root while the actual destination is not. resolvedPath, err := filepath.EvalSymlinks(absPath) if err != nil { return "", fmt.Errorf("cannot resolve path (symlink): %w", err) } // Lstat the resolved path for size and existence checks — EvalSymlinks // guarantees no symlink components remain, so ModeSymlink can never be set. fi, err := os.Lstat(resolvedPath) if err != nil { return "", fmt.Errorf("cannot stat file: %w", err) } // Reject anything that is not a regular file (directories, FIFOs, device // nodes, etc.) — ParseDocMapConfig expects a plain YAML file and would // produce a confusing error on non-regular entries. if !fi.Mode().IsRegular() { return "", fmt.Errorf("docmap must be a regular file") } // Confine to resolvedRoot: use the fully-resolved path so that a directory // symlink inside the repo cannot carry the path outside the root. rel, err := filepath.Rel(resolvedRoot, resolvedPath) if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) { return "", fmt.Errorf("path must be within --repo-root") } // Enforce size cap before reading to prevent memory exhaustion. if fi.Size() > maxDocmapBytes { return "", fmt.Errorf("file size %d bytes exceeds %d-byte limit", fi.Size(), maxDocmapBytes) } return resolvedPath, nil } // runValidateDocmap implements the `review-bot validate-docmap` subcommand. // // It reads changed file paths from stdin (one per line, as produced by // `git diff --name-only`), parses a doc-map YAML file, and performs two checks: // // 1. Coverage check: every changed file must be matched by at least one // paths: glob in the docmap. Fails if any file is uncovered. // // 2. Stale-docs check: every docs: entry in the docmap must exist on disk // (relative to --repo-root). Fails if any path is missing. // // Both checks always run — all failures are reported before exiting. // // Exit codes: // // 0 — clean (all files covered, all docs exist) // 1 — one or more coverage or stale-doc failures // 2 — usage error, missing flag, or YAML parse error func runValidateDocmap(args []string) int { fs := flag.NewFlagSet("validate-docmap", flag.ContinueOnError) fs.SetOutput(errWriter) docmapFlag := fs.String("docmap", "", "Path to doc-map YAML file (required)") repoRootFlag := fs.String("repo-root", ".", "Repo root for resolving docs: paths (default: cwd)") if err := fs.Parse(args); err != nil { // flag.ContinueOnError already wrote the error to errWriter. return 2 } if *docmapFlag == "" { fmt.Fprintln(errWriter, "Error: --docmap is required") fmt.Fprintln(errWriter, "") fmt.Fprintln(errWriter, "usage: review-bot validate-docmap --docmap [--repo-root ]") fmt.Fprintln(errWriter, " Changed files are read from stdin, one per line.") fmt.Fprintln(errWriter, " Example: git diff --name-only origin/main HEAD | review-bot validate-docmap --docmap .review-bot/doc-map.yml") return 2 } // Resolve repoRoot first — the docmap path is validated against it below. // Use an absolute, symlink-free path so a symlinked --repo-root cannot // bypass the escape guard in validateDocmapPath or checkStaleDocs. absRoot, err := filepath.Abs(*repoRootFlag) if err != nil { fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err) return 2 } resolvedRoot, err := filepath.EvalSymlinks(absRoot) if err != nil { if os.IsNotExist(err) { fmt.Fprintf(errWriter, "Error: --repo-root %q does not exist\n", *repoRootFlag) } else { fmt.Fprintf(errWriter, "Error: failed to resolve --repo-root %q: %v\n", *repoRootFlag, err) } return 2 } // Harden the docmap file path before reading it. The --docmap flag value // may reference a PR-controlled file (e.g. .review-bot/doc-map.yml). // Validate that it: // 1. Resolves within resolvedRoot (prevent reading arbitrary host files). // 2. Resolved target stays within the root (in-repo symlinks are allowed // if they resolve to a path inside the root). // 3. Does not exceed maxDocmapBytes (prevent memory exhaustion from an // oversized committed file). // validateDocmapPath returns the resolved path; use it directly to // eliminate any TOCTOU race between validation and use. resolvedDocmap, err := validateDocmapPath(*docmapFlag, resolvedRoot) if err != nil { fmt.Fprintf(errWriter, "Error: --docmap %q is invalid: %v\n", *docmapFlag, err) return 2 } // Open and read the docmap with a LimitedReader — closes the residual TOCTOU // window between the Lstat size check in validateDocmapPath and the file open // here. The limit is maxDocmapBytes+1 so we can detect a file that grew past // the cap after the stat without reading unbounded bytes. // // Defense-in-depth: stat the path immediately before and after open so we can // detect a file swap between validateDocmapPath's validation and this open via // os.SameFile. An attacker with workspace write access could otherwise replace // the validated file with a symlink in the gap between validation and use. preStat, err := os.Lstat(resolvedDocmap) if err != nil { fmt.Fprintf(errWriter, "Error: failed to stat docmap before open %q: %v\n", *docmapFlag, err) return 2 } f, err := os.Open(resolvedDocmap) if err != nil { fmt.Fprintf(errWriter, "Error: failed to open docmap %q: %v\n", *docmapFlag, err) return 2 } defer func() { _ = f.Close() }() // Verify we opened the same file that was validated — rejects a swap between // the pre-open Lstat and the open call. postStat, err := f.Stat() if err != nil { fmt.Fprintf(errWriter, "Error: failed to stat open docmap %q: %v\n", *docmapFlag, err) return 2 } if !os.SameFile(preStat, postStat) { fmt.Fprintf(errWriter, "Error: --docmap %q changed between validation and open\n", *docmapFlag) return 2 } docmapData, err := io.ReadAll(io.LimitReader(f, maxDocmapBytes+1)) if err != nil { fmt.Fprintf(errWriter, "Error: failed to read docmap %q: %v\n", *docmapFlag, err) return 2 } if int64(len(docmapData)) > maxDocmapBytes { fmt.Fprintf(errWriter, "Error: --docmap %q exceeded %d-byte limit after open\n", *docmapFlag, maxDocmapBytes) return 2 } cfg, err := review.ParseDocMapConfigContent(string(docmapData), *docmapFlag) if err != nil { fmt.Fprintf(errWriter, "Error: failed to parse docmap %q: %v\n", *docmapFlag, err) return 2 } // Read changed files from stdin. changedFiles, err := readLines(os.Stdin) if err != nil { fmt.Fprintf(errWriter, "Error: failed to read stdin: %v\n", err) return 2 } failed := false // --- Check 1: Coverage --- // Note: an empty docmap (no mappings) means every changed file is // uncovered — there are no patterns to match against. This is intentional: // if you declare a doc-map, every changed file must be accounted for. // On empty stdin the check is vacuously true (no files to cover). var uncovered []string for _, f := range changedFiles { // Normalize Windows-style backslashes to forward slashes so that // changed-file paths from git on Windows match doc-map globs. f = strings.ReplaceAll(f, "\\", "/") // Strip a leading "./" emitted by non-git tools (e.g. `find`) so that // paths like "./cmd/foo.go" match doc-map globs written as "cmd/**". f = strings.TrimPrefix(f, "./") if !review.FileCoveredByDocMap(cfg, f) { uncovered = append(uncovered, f) } } if len(uncovered) > 0 { failed = true fmt.Fprintln(errWriter, "ERROR: changed files with no docmap coverage:") for _, f := range uncovered { fmt.Fprintf(errWriter, " %s\n", f) } } // --- Check 2: Stale docs --- // checkStaleDocs validates each path before touching the filesystem; see // its documentation for the path-traversal hardening applied. staleDocs := checkStaleDocs(cfg, resolvedRoot) if len(staleDocs) > 0 { failed = true fmt.Fprintln(errWriter, "ERROR: stale docmap entries (paths do not exist):") for _, d := range staleDocs { fmt.Fprintf(errWriter, " %s\n", d) } } if failed { return 1 } fmt.Fprintln(outWriter, "OK: docmap is valid") return 0 } // checkStaleDocs returns deduplicated docs: entries that do not exist under // repoRoot. // // Path-traversal hardening: each docPath is validated with // review.ValidateDocPath (rejects absolute paths and ".." segments) and then // confined to repoRoot via filepath.Clean + filepath.Rel before os.Lstat is // called. Symlinks are treated as stale — a CI tool running against // PR-controlled content must not follow symlinks that could probe arbitrary // host paths. Paths that fail any check are treated as invalid (reported as // stale) without following any symlinks. func checkStaleDocs(cfg *review.DocMapConfig, repoRoot string) []string { seen := make(map[string]struct{}) var stale []string for _, mapping := range cfg.Mappings { for _, docPath := range mapping.Docs { if docPath == "" { continue } if _, ok := seen[docPath]; ok { continue } seen[docPath] = struct{}{} // Guard 1: reject absolute paths and ".." segments sourced from // PR-controlled YAML before joining with repoRoot. if err := review.ValidateDocPath(docPath); err != nil { stale = append(stale, docPath) continue } // Guard 2: verify the cleaned joined path does not escape repoRoot. // filepath.Clean resolves any remaining ".." after the join; the // filepath.Rel check confirms the path is still under repoRoot. fullPath := filepath.Clean(filepath.Join(repoRoot, filepath.FromSlash(docPath))) rel, err := filepath.Rel(repoRoot, fullPath) if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) { stale = append(stale, docPath) continue } // Use Lstat (not Stat) so symlinks are never followed. A symlink // under repoRoot could point anywhere on the host, allowing a // malicious PR to probe file existence. Treat symlinks as stale. fi, err := os.Lstat(fullPath) if err != nil { stale = append(stale, docPath) continue } if fi.Mode()&os.ModeSymlink != 0 { stale = append(stale, docPath) } } } return stale } // readLines reads all non-empty trimmed lines from r. func readLines(r io.Reader) ([]string, error) { scanner := bufio.NewScanner(r) var lines []string for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if line != "" { lines = append(lines, line) } } return lines, scanner.Err() }