feat(#137): add doc-map input for path-scoped doc injection
CI / test (pull_request) Successful in 18s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 40s
CI / review (gpt-5, security, ., rodin/security-patterns, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 1m26s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 1m27s
CI / test (pull_request) Successful in 18s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 40s
CI / review (gpt-5, security, ., rodin/security-patterns, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 1m26s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 1m27s
- New --doc-map flag (DOC_MAP_FILE env var): path to YAML config mapping source path globs to governing design docs - New --doc-map-max-bytes flag (DOC_MAP_MAX_BYTES env var): cap on total injected doc content, default 100KB - review/docmap.go: DocMapConfig parsing, glob matching with ** support, doc loading via VCS with directory expansion and size guard - budget.Sections: new DesignDocs field, trimmed after conventions - budget.buildResult: injects DesignDocs under ## Design Documents heading - action.yml: doc-map and doc-map-max-bytes inputs wired to env vars - CHANGELOG.md: created with unreleased entry - Tests: ParseDocMapConfig, MatchDocs, globMatch, LoadMatchingDocs
This commit is contained in:
@@ -0,0 +1,303 @@
|
||||
// Package review provides doc-map parsing and doc injection for path-scoped
|
||||
// design document context in AI code reviews.
|
||||
package review
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/goccy/go-yaml"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultDocMapMaxBytes is the default cap on total injected doc content.
|
||||
DefaultDocMapMaxBytes = 100 * 1024 // 100 KB
|
||||
)
|
||||
|
||||
// DocMapping maps a set of path glob patterns to governing doc files/directories.
|
||||
type DocMapping struct {
|
||||
Paths []string `yaml:"paths"` // glob patterns matched against changed PR files
|
||||
Docs []string `yaml:"docs"` // doc file paths or directories in the reviewed repo
|
||||
}
|
||||
|
||||
// DocMapConfig is the top-level structure of a doc-map YAML file.
|
||||
type DocMapConfig struct {
|
||||
Mappings []DocMapping `yaml:"mappings"`
|
||||
}
|
||||
|
||||
// DocMapOptions configures behavior for doc loading.
|
||||
type DocMapOptions struct {
|
||||
// MaxBytes caps the total size of injected doc content. Default: DefaultDocMapMaxBytes.
|
||||
MaxBytes int
|
||||
}
|
||||
|
||||
// DocFetcher reads file and directory content from a VCS repository.
|
||||
// It is a subset of vcsClient, defined here to keep the review package free
|
||||
// of cmd-level dependencies.
|
||||
type DocFetcher interface {
|
||||
// GetFileContent returns the content of a single file at default branch.
|
||||
GetFileContent(ctx context.Context, owner, repo, path string) (string, error)
|
||||
// GetAllFilesInPath returns all files (path → content) under a directory.
|
||||
GetAllFilesInPath(ctx context.Context, owner, repo, path string) (map[string]string, error)
|
||||
}
|
||||
|
||||
// ParseDocMapConfig reads and parses a doc-map YAML file from a local path.
|
||||
// Unknown top-level keys produce a warning but are not fatal.
|
||||
func ParseDocMapConfig(localPath string) (*DocMapConfig, error) {
|
||||
data, err := readFileBytes(localPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read doc-map file %q: %w", localPath, err)
|
||||
}
|
||||
|
||||
var cfg DocMapConfig
|
||||
if err := yaml.UnmarshalWithOptions(data, &cfg, yaml.Strict()); err != nil {
|
||||
// Re-parse without strict mode to log which keys are unknown.
|
||||
var relaxed DocMapConfig
|
||||
if err2 := yaml.Unmarshal(data, &relaxed); err2 != nil {
|
||||
return nil, fmt.Errorf("parse doc-map YAML %q: %w", localPath, err)
|
||||
}
|
||||
slog.Warn("doc-map YAML contains unknown keys (ignored)", "file", localPath, "error", err)
|
||||
cfg = relaxed
|
||||
}
|
||||
return &cfg, nil
|
||||
}
|
||||
|
||||
// MatchDocs returns deduplicated doc paths for the given changed file paths.
|
||||
// A mapping matches if any of its path globs matches any of the changed files.
|
||||
func MatchDocs(cfg *DocMapConfig, changedFiles []string) []string {
|
||||
seen := make(map[string]struct{})
|
||||
var result []string
|
||||
|
||||
for _, mapping := range cfg.Mappings {
|
||||
if len(mapping.Paths) == 0 || len(mapping.Docs) == 0 {
|
||||
continue
|
||||
}
|
||||
if mappingMatches(mapping.Paths, changedFiles) {
|
||||
for _, doc := range mapping.Docs {
|
||||
if doc == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[doc]; !ok {
|
||||
seen[doc] = struct{}{}
|
||||
result = append(result, doc)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// mappingMatches returns true if any glob in patterns matches any file in files.
|
||||
func mappingMatches(patterns, files []string) bool {
|
||||
for _, pat := range patterns {
|
||||
for _, f := range files {
|
||||
if globMatch(pat, f) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// globMatch matches a path against a glob pattern that may contain **.
|
||||
// It supports:
|
||||
// - Standard path.Match patterns (*, ?, [range])
|
||||
// - ** as a path segment that matches zero or more segments
|
||||
// - Trailing /** to match a directory and all its contents
|
||||
//
|
||||
// The pattern and path use forward slash as separator.
|
||||
func globMatch(pattern, path string) bool {
|
||||
return globMatchParts(splitPath(pattern), splitPath(path))
|
||||
}
|
||||
|
||||
// splitPath splits a slash-separated path into non-empty parts.
|
||||
func splitPath(p string) []string {
|
||||
// Clean and split on "/"
|
||||
parts := strings.Split(p, "/")
|
||||
result := make([]string, 0, len(parts))
|
||||
for _, part := range parts {
|
||||
if part != "" {
|
||||
result = append(result, part)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// globMatchParts recursively matches pattern parts against path parts.
|
||||
func globMatchParts(patParts, pathParts []string) bool {
|
||||
for len(patParts) > 0 {
|
||||
pat := patParts[0]
|
||||
if pat == "**" {
|
||||
patParts = patParts[1:]
|
||||
if len(patParts) == 0 {
|
||||
// Trailing **: matches any remaining path (including empty).
|
||||
return true
|
||||
}
|
||||
// ** in the middle: try matching the rest at every position.
|
||||
for i := 0; i <= len(pathParts); i++ {
|
||||
if globMatchParts(patParts, pathParts[i:]) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
// Non-** segment: path must have a segment here.
|
||||
if len(pathParts) == 0 {
|
||||
return false
|
||||
}
|
||||
matched, err := filepath.Match(pat, pathParts[0])
|
||||
if err != nil || !matched {
|
||||
return false
|
||||
}
|
||||
patParts = patParts[1:]
|
||||
pathParts = pathParts[1:]
|
||||
}
|
||||
// All pattern parts consumed; path must also be consumed.
|
||||
return len(pathParts) == 0
|
||||
}
|
||||
|
||||
// LoadMatchingDocs fetches content for the given doc paths via VCS and returns
|
||||
// a formatted string suitable for injection into the system prompt.
|
||||
//
|
||||
// Behavior:
|
||||
// - Paths that look like directories (end with /, or GetAllFilesInPath returns files)
|
||||
// are expanded to all .md files under them.
|
||||
// - Missing files are logged as warnings and skipped.
|
||||
// - Total content is capped at opts.MaxBytes; truncation is noted inline.
|
||||
func LoadMatchingDocs(ctx context.Context, fetcher DocFetcher, owner, repo string, docPaths []string, opts DocMapOptions) (string, error) {
|
||||
if opts.MaxBytes <= 0 {
|
||||
opts.MaxBytes = DefaultDocMapMaxBytes
|
||||
}
|
||||
|
||||
var sb strings.Builder
|
||||
totalBytes := 0
|
||||
limitReached := false
|
||||
|
||||
for _, docPath := range docPaths {
|
||||
if ctx.Err() != nil {
|
||||
break
|
||||
}
|
||||
if limitReached {
|
||||
slog.Warn("doc-map: context size limit reached, skipping remaining docs",
|
||||
"remaining_path", docPath, "limit_bytes", opts.MaxBytes)
|
||||
break
|
||||
}
|
||||
|
||||
entries, err := loadDocEntries(ctx, fetcher, owner, repo, docPath)
|
||||
if err != nil {
|
||||
slog.Warn("doc-map: could not load doc, skipping", "path", docPath, "error", err)
|
||||
continue
|
||||
}
|
||||
if len(entries) == 0 {
|
||||
slog.Debug("doc-map: no .md files found under path", "path", docPath)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
if limitReached {
|
||||
break
|
||||
}
|
||||
available := opts.MaxBytes - totalBytes
|
||||
if available <= 0 {
|
||||
limitReached = true
|
||||
sb.WriteString("\n\n> ⚠️ Design document context truncated — size limit reached.\n")
|
||||
break
|
||||
}
|
||||
|
||||
content := entry.content
|
||||
truncated := false
|
||||
if len(content) > available {
|
||||
content = truncateUTF8(content, available)
|
||||
truncated = true
|
||||
limitReached = true
|
||||
}
|
||||
|
||||
sb.WriteString("### ")
|
||||
sb.WriteString(entry.path)
|
||||
sb.WriteString("\n\n")
|
||||
sb.WriteString(content)
|
||||
sb.WriteString("\n")
|
||||
if truncated {
|
||||
sb.WriteString("\n> ⚠️ (truncated — size limit reached)\n")
|
||||
}
|
||||
totalBytes += len(content)
|
||||
slog.Debug("doc-map: injected doc", "path", entry.path, "bytes", len(content))
|
||||
}
|
||||
}
|
||||
|
||||
if sb.Len() == 0 {
|
||||
return "", nil
|
||||
}
|
||||
return sb.String(), nil
|
||||
}
|
||||
|
||||
// docEntry holds a single doc file path and content.
|
||||
type docEntry struct {
|
||||
path string
|
||||
content string
|
||||
}
|
||||
|
||||
// loadDocEntries returns the doc content for a given path.
|
||||
// If the path is a directory, all .md files under it are returned.
|
||||
// If it's a file, a single entry is returned.
|
||||
func loadDocEntries(ctx context.Context, fetcher DocFetcher, owner, repo, docPath string) ([]docEntry, error) {
|
||||
// Try directory expansion first.
|
||||
files, err := fetcher.GetAllFilesInPath(ctx, owner, repo, docPath)
|
||||
if err == nil && len(files) > 0 {
|
||||
// Filter for .md files only.
|
||||
var entries []docEntry
|
||||
for path, content := range files {
|
||||
if isMDFile(path) {
|
||||
entries = append(entries, docEntry{path: path, content: content})
|
||||
}
|
||||
}
|
||||
// Sort for deterministic output.
|
||||
sortDocEntries(entries)
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
// Try as a single file.
|
||||
content, fileErr := fetcher.GetFileContent(ctx, owner, repo, docPath)
|
||||
if fileErr != nil {
|
||||
// Return the file error (more specific than directory error).
|
||||
return nil, fmt.Errorf("fetch doc %q: %w", docPath, fileErr)
|
||||
}
|
||||
return []docEntry{{path: docPath, content: content}}, nil
|
||||
}
|
||||
|
||||
// isMDFile returns true if the file has a .md extension.
|
||||
func isMDFile(path string) bool {
|
||||
return strings.HasSuffix(strings.ToLower(path), ".md")
|
||||
}
|
||||
|
||||
// sortDocEntries sorts entries by path for deterministic output.
|
||||
func sortDocEntries(entries []docEntry) {
|
||||
// Simple insertion sort (doc lists are small).
|
||||
for i := 1; i < len(entries); i++ {
|
||||
for j := i; j > 0 && entries[j].path < entries[j-1].path; j-- {
|
||||
entries[j], entries[j-1] = entries[j-1], entries[j]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// readFileBytes reads the contents of a local file.
|
||||
func readFileBytes(path string) ([]byte, error) {
|
||||
return os.ReadFile(path)
|
||||
}
|
||||
|
||||
// truncateUTF8 truncates s to at most maxBytes without splitting multi-byte
|
||||
// UTF-8 characters. Returns a valid UTF-8 string of at most maxBytes bytes.
|
||||
func truncateUTF8(s string, maxBytes int) string {
|
||||
if len(s) <= maxBytes {
|
||||
return s
|
||||
}
|
||||
for maxBytes > 0 && !utf8.RuneStart(s[maxBytes]) {
|
||||
maxBytes--
|
||||
}
|
||||
return s[:maxBytes]
|
||||
}
|
||||
Reference in New Issue
Block a user