Compare commits
30 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| dca260f582 | |||
| 921599542d | |||
| 71bb33b6fd | |||
| 55366b3431 | |||
| 3cd5ae594e | |||
| eaccc96073 | |||
| 289b400bfd | |||
| d0b7f09772 | |||
| 377da8ca3a | |||
| 61819ac3e3 | |||
| 3d1260d3b2 | |||
| 0e7e12a99c | |||
| 1862dc999d | |||
| d8270262d6 | |||
| b2eea502d0 | |||
| 0ec5093aeb | |||
| 8a0eed298a | |||
| 8e4c1cc32e | |||
| ec03dc2373 | |||
| 1749d95727 | |||
| 7c83365fc4 | |||
| 6be5e306aa | |||
| cd6cd93bf0 | |||
| c889724dda | |||
| 1ac51669ed | |||
| 2e6f46f28d | |||
| 3fc31c0822 | |||
| 2b611dbd0b | |||
| 3abb611baf | |||
| dd003c66d5 |
+1
-1
@@ -9,7 +9,7 @@
|
||||
|
||||
| Package | Use Case | Scope |
|
||||
|---------|----------|-------|
|
||||
| `github.com/goccy/go-yaml` | YAML parsing and AST inspection (subpkgs: `ast`, `parser`) | production |
|
||||
| `gopkg.in/yaml.v3` | YAML parsing (persona files, config) | production |
|
||||
| `github.com/google/go-cmp` | Test comparisons (`cmp.Diff`) | test only |
|
||||
|
||||
**Any import not in this table or the Go standard library is forbidden.**
|
||||
|
||||
+159
-250
@@ -2,7 +2,6 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
@@ -14,7 +13,6 @@ import (
|
||||
|
||||
"gitea.weiker.me/rodin/review-bot/budget"
|
||||
"gitea.weiker.me/rodin/review-bot/gitea"
|
||||
"gitea.weiker.me/rodin/review-bot/github"
|
||||
"gitea.weiker.me/rodin/review-bot/llm"
|
||||
"gitea.weiker.me/rodin/review-bot/review"
|
||||
"gitea.weiker.me/rodin/review-bot/vcs"
|
||||
@@ -56,22 +54,19 @@ func main() {
|
||||
// Logging flags
|
||||
logFormat := flag.String("log-format", envOrDefault("LOG_FORMAT", "text"), "Log output format: text or json")
|
||||
verbosity := flag.String("verbosity", envOrDefault("LOG_VERBOSITY", "info"), "Log verbosity: debug, info, warn, error")
|
||||
// VCS flags
|
||||
provider := flag.String("provider", envOrDefault("VCS_PROVIDER", "gitea"), "VCS provider: gitea or github")
|
||||
baseURL := flag.String("base-url", envOrDefault("VCS_BASE_URL", ""), "VCS API base URL (for github provider; defaults to https://api.github.com)")
|
||||
vcsURL := flag.String("vcs-url", envOrDefault("VCS_URL", envOrDefault("GITEA_URL", envOrDefault("GITHUB_SERVER_URL", ""))), "VCS instance URL (Gitea) [deprecated alias: --gitea-url]")
|
||||
// Keep --gitea-url as backward-compatible alias (flag package doesn't support aliases natively, handle below)
|
||||
repo := flag.String("repo", envOrDefault("VCS_REPO", envOrDefault("GITEA_REPO", envOrDefault("GITHUB_REPOSITORY", ""))), "Repository (owner/name)")
|
||||
// CLI flags
|
||||
giteaURL := flag.String("gitea-url", envOrDefault("GITEA_URL", envOrDefault("GITHUB_SERVER_URL", "")), "Gitea instance URL")
|
||||
repo := flag.String("repo", envOrDefault("GITEA_REPO", envOrDefault("GITHUB_REPOSITORY", "")), "Repository (owner/name)")
|
||||
prNum := flag.String("pr", envOrDefault("PR_NUMBER", ""), "Pull request number")
|
||||
reviewerName := flag.String("reviewer-name", envOrDefault("REVIEWER_NAME", ""), "Reviewer display name")
|
||||
reviewerToken := flag.String("reviewer-token", envOrDefault("REVIEWER_TOKEN", ""), "VCS token for posting review")
|
||||
reviewerToken := flag.String("reviewer-token", envOrDefault("REVIEWER_TOKEN", ""), "Gitea token for posting review")
|
||||
llmBaseURL := flag.String("llm-base-url", envOrDefault("LLM_BASE_URL", ""), "LLM API base URL")
|
||||
llmAPIKey := flag.String("llm-api-key", envOrDefault("LLM_API_KEY", ""), "LLM API key")
|
||||
llmModel := flag.String("llm-model", envOrDefault("LLM_MODEL", ""), "LLM model name")
|
||||
conventionsFile := flag.String("conventions-file", envOrDefault("CONVENTIONS_FILE", ""), "Conventions file path in repo (e.g. CLAUDE.md)")
|
||||
systemPromptFile := flag.String("system-prompt-file", envOrDefault("SYSTEM_PROMPT_FILE", ""), "Local file with additional system prompt instructions")
|
||||
patternsRepo := flag.String("patterns-repo", envOrDefault("PATTERNS_REPO", ""), "Repo with language patterns (e.g. rodin/elixir-patterns)")
|
||||
patternsFiles := flag.String("patterns-files", envOrDefault("PATTERNS_FILES", ""), "Comma-separated file paths to fetch from patterns repo (empty = all files)")
|
||||
patternsFiles := flag.String("patterns-files", envOrDefault("PATTERNS_FILES", "README.md"), "Comma-separated file paths to fetch from patterns repo")
|
||||
dryRun := flag.Bool("dry-run", false, "Print review to stdout instead of posting")
|
||||
llmTemp := flag.Float64("llm-temperature", envOrDefaultFloat("LLM_TEMPERATURE", 0), "LLM temperature (0 = server default)")
|
||||
llmTimeout := flag.Int("llm-timeout", envOrDefaultInt("LLM_TIMEOUT", 300), "LLM request timeout in seconds (default 300)")
|
||||
@@ -85,18 +80,6 @@ func main() {
|
||||
aicoreAPIURL := flag.String("aicore-api-url", envOrDefault("AICORE_API_URL", ""), "SAP AI Core API URL (for provider=aicore)")
|
||||
aicoreResourceGroup := flag.String("aicore-resource-group", envOrDefault("AICORE_RESOURCE_GROUP", "default"), "SAP AI Core resource group (for provider=aicore)")
|
||||
|
||||
// Register --gitea-url as a backward-compatible alias for --vcs-url.
|
||||
// StringVar shares the *string pointer with vcsURL, so whichever flag is
|
||||
// set last by flag.Parse wins — both point to the same underlying value.
|
||||
// NOTE: If a user passes both --vcs-url and --gitea-url, the last one on
|
||||
// the command line takes effect (standard flag package behavior). This is
|
||||
// acceptable since --gitea-url is deprecated and both serve the same purpose.
|
||||
//
|
||||
// ORDERING: This must remain AFTER vcsURL's flag.String declaration and BEFORE
|
||||
// flag.Parse(). The *vcsURL dereference captures the env-var-resolved default
|
||||
// at registration time; moving flag.Parse() above this line would break it.
|
||||
flag.StringVar(vcsURL, "gitea-url", *vcsURL, "Deprecated: use --vcs-url instead")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
if *versionFlag {
|
||||
@@ -109,25 +92,12 @@ func main() {
|
||||
|
||||
slog.Info("review-bot starting", "version", version)
|
||||
|
||||
// Validate VCS provider
|
||||
switch *provider {
|
||||
case "gitea", "github":
|
||||
// valid
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "Error: invalid --provider %q (valid: gitea, github)\n", *provider)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Validate required fields
|
||||
// For aicore provider, llm-base-url and llm-api-key are not required
|
||||
isAICore := llm.Provider(*llmProvider) == llm.ProviderAICore
|
||||
if *repo == "" || *prNum == "" || *reviewerToken == "" || *llmModel == "" {
|
||||
if *giteaURL == "" || *repo == "" || *prNum == "" || *reviewerToken == "" || *llmModel == "" {
|
||||
fmt.Fprintf(os.Stderr, "Error: missing required flags or environment variables\n\n")
|
||||
fmt.Fprintf(os.Stderr, "Required: --repo, --pr, --reviewer-token, --llm-model\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
// --vcs-url is required only for gitea provider
|
||||
if *provider == "gitea" && *vcsURL == "" {
|
||||
fmt.Fprintf(os.Stderr, "Error: --vcs-url (or --gitea-url) is required for provider=gitea\n")
|
||||
fmt.Fprintf(os.Stderr, "Required: --gitea-url, --repo, --pr, --reviewer-token, --llm-model\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
if !isAICore && (*llmBaseURL == "" || *llmAPIKey == "") {
|
||||
@@ -146,6 +116,8 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// NOTE: Persona loading deferred until after Gitea client init to support repo personas
|
||||
|
||||
// Validate reviewer-name: only safe characters allowed in sentinel
|
||||
if err := validateReviewerName(*reviewerName); err != nil {
|
||||
slog.Error("invalid reviewer name", "error", err)
|
||||
@@ -167,25 +139,8 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Initialize VCS client
|
||||
var client vcs.Client
|
||||
switch *provider {
|
||||
case "gitea":
|
||||
giteaClient := gitea.NewClient(*vcsURL, *reviewerToken)
|
||||
client = gitea.NewAdapter(giteaClient)
|
||||
case "github":
|
||||
ghBaseURL := *baseURL
|
||||
if ghBaseURL == "" {
|
||||
ghBaseURL = "https://api.github.com"
|
||||
}
|
||||
client = github.NewClient(*reviewerToken, ghBaseURL)
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "Error: unhandled provider %q\n", *provider)
|
||||
os.Exit(1)
|
||||
}
|
||||
slog.Info("VCS client initialized", "provider", *provider)
|
||||
|
||||
// Initialize LLM client
|
||||
// Initialize clients
|
||||
giteaClient := gitea.NewClient(*giteaURL, *reviewerToken)
|
||||
llmClient := llm.NewClient(*llmBaseURL, *llmAPIKey, *llmModel)
|
||||
if *llmTemp < 0 || *llmTemp > 2 {
|
||||
slog.Error("invalid LLM temperature", "temperature", *llmTemp, "range", "0-2")
|
||||
@@ -219,13 +174,16 @@ func main() {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), overallTimeout)
|
||||
defer cancel()
|
||||
|
||||
// Load persona if specified
|
||||
// Load persona if specified (after Gitea client init to support repo personas)
|
||||
var persona *review.Persona
|
||||
if *personaName != "" {
|
||||
// Try loading from repo first, then fall back to built-in
|
||||
repoPersonas, err := review.LoadRepoPersonas(ctx, client, owner, repoName)
|
||||
repoPersonas, err := review.LoadRepoPersonas(ctx, newGiteaClientAdapter(giteaClient), owner, repoName)
|
||||
if err != nil {
|
||||
slog.Warn("could not load repo personas", "repo", owner+"/"+repoName, "error", err)
|
||||
// Continue with built-in personas only.
|
||||
// NOTE: repoPersonas is nil here, but map indexing on a nil map is safe in Go
|
||||
// (returns the zero value), so the fallback to built-in below works correctly.
|
||||
}
|
||||
if p, ok := repoPersonas[*personaName]; ok {
|
||||
persona = p
|
||||
@@ -256,7 +214,7 @@ func main() {
|
||||
slog.Info("reviewing pull request", "pr", prNumber, "repo", fmt.Sprintf("%s/%s", owner, repoName))
|
||||
|
||||
// Step 1: Fetch PR metadata
|
||||
pr, err := client.GetPullRequest(ctx, owner, repoName, prNumber)
|
||||
pr, err := giteaClient.GetPullRequest(ctx, owner, repoName, prNumber)
|
||||
if err != nil {
|
||||
slog.Error("failed to fetch PR", "pr", prNumber, "error", err)
|
||||
os.Exit(1)
|
||||
@@ -264,7 +222,7 @@ func main() {
|
||||
slog.Info("fetched PR metadata", "pr", prNumber, "title", pr.Title)
|
||||
|
||||
// Step 2: Fetch diff
|
||||
diff, err := client.GetPullRequestDiff(ctx, owner, repoName, prNumber)
|
||||
diff, err := giteaClient.GetPullRequestDiff(ctx, owner, repoName, prNumber)
|
||||
if err != nil {
|
||||
slog.Error("failed to fetch diff", "pr", prNumber, "error", err)
|
||||
os.Exit(1)
|
||||
@@ -273,21 +231,21 @@ func main() {
|
||||
|
||||
// Step 3: Fetch full file content for modified files
|
||||
fileContext := ""
|
||||
files, err := client.GetPullRequestFiles(ctx, owner, repoName, prNumber)
|
||||
files, err := giteaClient.GetPullRequestFiles(ctx, owner, repoName, prNumber)
|
||||
if err != nil {
|
||||
slog.Warn("could not fetch PR files list", "pr", prNumber, "error", err)
|
||||
} else {
|
||||
fileContext = fetchFileContext(ctx, client, owner, repoName, pr.Head.Ref, files)
|
||||
fileContext = fetchFileContext(ctx, giteaClient, owner, repoName, pr.Head.Ref, files)
|
||||
slog.Debug("fetched file context", "files", len(files))
|
||||
}
|
||||
|
||||
// Step 4: Check CI status
|
||||
ciPassed := true
|
||||
ciDetails := ""
|
||||
if pr.Head.SHA != "" {
|
||||
statuses, err := client.GetCommitStatuses(ctx, owner, repoName, pr.Head.SHA)
|
||||
if pr.Head.Sha != "" {
|
||||
statuses, err := giteaClient.GetCommitStatuses(ctx, owner, repoName, pr.Head.Sha)
|
||||
if err != nil {
|
||||
slog.Warn("could not fetch CI status", "sha", pr.Head.SHA, "error", err)
|
||||
slog.Warn("could not fetch CI status", "sha", pr.Head.Sha, "error", err)
|
||||
} else {
|
||||
ciPassed, ciDetails = evaluateCIStatus(statuses)
|
||||
slog.Info("CI status checked", "passed", ciPassed)
|
||||
@@ -297,7 +255,7 @@ func main() {
|
||||
// Step 5: Load conventions file if specified
|
||||
conventions := ""
|
||||
if *conventionsFile != "" {
|
||||
content, err := client.GetFileContent(ctx, owner, repoName, *conventionsFile, "")
|
||||
content, err := giteaClient.GetFileContent(ctx, owner, repoName, *conventionsFile)
|
||||
if err != nil {
|
||||
slog.Warn("could not load conventions file", "file", *conventionsFile, "error", err)
|
||||
} else {
|
||||
@@ -309,7 +267,7 @@ func main() {
|
||||
// Step 6: Load patterns from external repo if specified
|
||||
patterns := ""
|
||||
if *patternsRepo != "" {
|
||||
patterns = fetchPatterns(ctx, client, *patternsRepo, *patternsFiles)
|
||||
patterns = fetchPatterns(ctx, giteaClient, *patternsRepo, *patternsFiles)
|
||||
slog.Debug("loaded patterns", "repo", *patternsRepo, "bytes", len(patterns))
|
||||
}
|
||||
|
||||
@@ -402,16 +360,15 @@ func main() {
|
||||
}
|
||||
|
||||
// Add commit footer so readers know which commit was evaluated
|
||||
if pr.Head.SHA != "" {
|
||||
shortSHA := pr.Head.SHA
|
||||
if pr.Head.Sha != "" {
|
||||
shortSHA := pr.Head.Sha
|
||||
if len(shortSHA) > 8 {
|
||||
shortSHA = shortSHA[:8]
|
||||
}
|
||||
reviewBody += fmt.Sprintf("\n\n---\n*Evaluated against %s*", shortSHA)
|
||||
}
|
||||
|
||||
// Map verdict to canonical review event
|
||||
event := verdictToEvent(result.Verdict)
|
||||
event := review.GiteaEvent(result.Verdict)
|
||||
|
||||
if *dryRun {
|
||||
fmt.Println("--- DRY RUN ---")
|
||||
@@ -423,13 +380,14 @@ func main() {
|
||||
sentinel := fmt.Sprintf("<!-- review-bot:%s -->", *reviewerName)
|
||||
|
||||
// Stale check: verify HEAD hasn't moved since we started
|
||||
evaluatedSHA := pr.Head.SHA
|
||||
evaluatedSHA := pr.Head.Sha
|
||||
var currentSHA string
|
||||
currentPR, err := client.GetPullRequest(ctx, owner, repoName, prNumber)
|
||||
currentPR, err := giteaClient.GetPullRequest(ctx, owner, repoName, prNumber)
|
||||
if err != nil {
|
||||
slog.Warn("could not re-fetch PR for stale check", "pr", prNumber, "error", err)
|
||||
// currentSHA stays empty — shouldSkipStaleReview will return false
|
||||
} else {
|
||||
currentSHA = currentPR.Head.SHA
|
||||
currentSHA = currentPR.Head.Sha
|
||||
}
|
||||
if shouldSkipStaleReview(evaluatedSHA, currentSHA) {
|
||||
slog.Warn("HEAD moved during review — skipping stale review",
|
||||
@@ -439,24 +397,17 @@ func main() {
|
||||
return
|
||||
}
|
||||
|
||||
// Build line→position map for inline comments
|
||||
lineToPosition := vcs.BuildLineToPositionMap(diff)
|
||||
var inlineComments []vcs.ReviewComment
|
||||
// Map findings to inline comments for lines present in the diff
|
||||
diffRanges := gitea.ParseDiffNewLines(diff)
|
||||
var inlineComments []gitea.ReviewComment
|
||||
for _, f := range result.Findings {
|
||||
if f.File == "" || f.Line <= 0 {
|
||||
continue
|
||||
if f.File != "" && f.Line > 0 && diffRanges.Contains(f.File, f.Line) {
|
||||
inlineComments = append(inlineComments, gitea.ReviewComment{
|
||||
Path: f.File,
|
||||
NewPosition: int64(f.Line),
|
||||
Body: fmt.Sprintf("**[%s]** %s", f.Severity, f.Finding),
|
||||
})
|
||||
}
|
||||
pos, ok := lineToPosition[f.File][f.Line]
|
||||
if !ok {
|
||||
slog.Warn("line not in diff, skipping comment", "file", f.File, "line", f.Line)
|
||||
continue
|
||||
}
|
||||
inlineComments = append(inlineComments, vcs.ReviewComment{
|
||||
Path: f.File,
|
||||
Position: pos,
|
||||
CommitID: pr.Head.SHA,
|
||||
Body: fmt.Sprintf("**[%s]** %s", f.Severity, f.Finding),
|
||||
})
|
||||
}
|
||||
if len(inlineComments) > 0 {
|
||||
slog.Debug("attaching inline comments", "count", len(inlineComments))
|
||||
@@ -465,9 +416,10 @@ func main() {
|
||||
// --- Review update strategy ---
|
||||
// 1. POST new review first (gets non-stale approval badge on HEAD)
|
||||
// 2. Then supersede old review with link to the new one
|
||||
var oldReviews []vcs.Review
|
||||
// Order matters: post first so we have the new review's URL for the supersede message.
|
||||
var oldReviews []gitea.Review
|
||||
if *reviewerName != "" {
|
||||
existingReviews, err := client.ListReviews(ctx, owner, repoName, prNumber)
|
||||
existingReviews, err := giteaClient.ListReviews(ctx, owner, repoName, prNumber)
|
||||
if err != nil {
|
||||
slog.Warn("could not list existing reviews", "pr", prNumber, "error", err)
|
||||
} else {
|
||||
@@ -479,137 +431,74 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
// Self-request as reviewer (Gitea-specific; ensures we appear in required-reviewer checks)
|
||||
if giteaAdapter, ok := client.(*gitea.Adapter); ok {
|
||||
authUser, err := client.GetAuthenticatedUser(ctx)
|
||||
if err != nil {
|
||||
slog.Warn("could not determine authenticated user for reviewer self-request", "error", err)
|
||||
} else if authUser != "" {
|
||||
if err := giteaAdapter.Underlying().RequestReviewer(ctx, owner, repoName, prNumber, authUser); err != nil {
|
||||
slog.Warn("could not self-request as reviewer", "user", authUser, "error", err)
|
||||
} else {
|
||||
slog.Debug("self-requested as reviewer", "user", authUser, "pr", prNumber)
|
||||
}
|
||||
// Self-request as reviewer (ensures we appear in required-reviewer checks)
|
||||
authUser, err := giteaClient.GetAuthenticatedUser(ctx)
|
||||
if err != nil {
|
||||
slog.Warn("could not determine authenticated user for reviewer self-request", "error", err)
|
||||
} else if authUser != "" {
|
||||
if err := giteaClient.RequestReviewer(ctx, owner, repoName, prNumber, authUser); err != nil {
|
||||
slog.Warn("could not self-request as reviewer", "user", authUser, "error", err)
|
||||
} else {
|
||||
slog.Debug("self-requested as reviewer", "user", authUser, "pr", prNumber)
|
||||
}
|
||||
} else {
|
||||
slog.Debug("RequestReviewer not supported for provider, skipping")
|
||||
}
|
||||
|
||||
// POST new review
|
||||
slog.Info("posting review", "event", event, "pr", prNumber)
|
||||
reviewReq := vcs.ReviewRequest{
|
||||
Body: reviewBody,
|
||||
Event: event,
|
||||
Comments: inlineComments,
|
||||
}
|
||||
posted, err := client.PostReview(ctx, owner, repoName, prNumber, reviewReq)
|
||||
posted, err := giteaClient.PostReview(ctx, owner, repoName, prNumber, event, reviewBody, inlineComments)
|
||||
if err != nil {
|
||||
slog.Error("failed to post review", "pr", prNumber, "event", event, "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
slog.Info("review posted", "review_id", posted.ID, "user", posted.User.Login, "pr", prNumber)
|
||||
|
||||
// Supersede all old reviews
|
||||
// Supersede all old reviews with link to the new one
|
||||
if len(oldReviews) > 0 {
|
||||
if err := supersedeOldReviews(ctx, client, *provider, *vcsURL, owner, repoName, prNumber, oldReviews, posted.ID, sentinel); err != nil {
|
||||
slog.Error("failed to supersede old reviews", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// verdictToEvent maps a verdict string from the LLM response to a canonical vcs.ReviewEvent.
|
||||
func verdictToEvent(verdict string) vcs.ReviewEvent {
|
||||
switch verdict {
|
||||
case "APPROVE":
|
||||
return vcs.ReviewEventApprove
|
||||
case "REQUEST_CHANGES":
|
||||
return vcs.ReviewEventRequestChanges
|
||||
default:
|
||||
return vcs.ReviewEventComment
|
||||
}
|
||||
}
|
||||
|
||||
// supersedeOldReviews marks prior reviews as superseded so only the latest review is visible.
|
||||
// For GitHub: dismisses old reviews (vcsURL is unused in this path).
|
||||
// For Gitea: edits the review body with a link to the new review and resolves inline comments.
|
||||
//
|
||||
// The vcsURL parameter is only used in the Gitea path to construct review permalink URLs;
|
||||
// it is accepted unconditionally to keep the function signature uniform across providers.
|
||||
func supersedeOldReviews(ctx context.Context, client vcs.Client, provider, vcsURL, owner, repoName string, prNumber int, oldReviews []vcs.Review, newReviewID int64, sentinel string) error {
|
||||
switch provider {
|
||||
case "github":
|
||||
// Best-effort dismissal: attempt all reviews, join any errors.
|
||||
var errs []error
|
||||
for _, old := range oldReviews {
|
||||
if err := client.DismissReview(ctx, owner, repoName, prNumber, old.ID, "Superseded by new review"); err != nil {
|
||||
slog.Warn("failed to dismiss review", "id", old.ID, "error", err)
|
||||
errs = append(errs, fmt.Errorf("dismiss review %d: %w", old.ID, err))
|
||||
} else {
|
||||
slog.Info("dismissed old review", "review_id", old.ID, "new_review_id", newReviewID, "pr", prNumber)
|
||||
}
|
||||
}
|
||||
return errors.Join(errs...)
|
||||
case "gitea":
|
||||
// Continue to Gitea-specific logic below the switch.
|
||||
default:
|
||||
return fmt.Errorf("supersedeOldReviews: unsupported provider %q", provider)
|
||||
}
|
||||
|
||||
// The type assertion below is guaranteed to succeed: the caller's provider switch
|
||||
// ensures we only reach this point when provider == "gitea", and the gitea provider
|
||||
// always constructs a *gitea.Adapter. The !ok branch guards against future refactors
|
||||
// (e.g. wrapping the adapter in a decorator) that would silently break this path.
|
||||
giteaAdapter, ok := client.(*gitea.Adapter)
|
||||
if !ok {
|
||||
return fmt.Errorf("expected gitea.Adapter for gitea provider, got %T", client)
|
||||
}
|
||||
underlying := giteaAdapter.Underlying()
|
||||
|
||||
newReviewURL := fmt.Sprintf("%s/%s/%s/pulls/%d#pullrequestreview-%d", strings.TrimRight(vcsURL, "/"), owner, repoName, prNumber, newReviewID)
|
||||
for _, oldReview := range oldReviews {
|
||||
cid, err := underlying.GetTimelineReviewCommentIDForReview(ctx, owner, repoName, prNumber, oldReview.ID)
|
||||
if err != nil {
|
||||
slog.Warn("could not find comment ID for old review", "review_id", oldReview.ID, "error", err)
|
||||
continue
|
||||
}
|
||||
supersededBody := buildSupersededBody(oldReview.Body, oldReview.CommitID, newReviewURL, sentinel)
|
||||
if err := underlying.EditComment(ctx, owner, repoName, cid, supersededBody); err != nil {
|
||||
slog.Warn("could not mark old review as superseded", "review_id", oldReview.ID, "comment_id", cid, "error", err)
|
||||
continue
|
||||
}
|
||||
slog.Info("marked old review as superseded", "review_id", oldReview.ID, "new_review_id", newReviewID, "pr", prNumber)
|
||||
|
||||
// Resolve old review's inline comments
|
||||
oldComments, err := underlying.ListReviewComments(ctx, owner, repoName, prNumber, oldReview.ID)
|
||||
if err != nil {
|
||||
slog.Warn("could not list old review comments for resolution", "review_id", oldReview.ID, "error", err)
|
||||
continue
|
||||
}
|
||||
resolved, failed := 0, 0
|
||||
for _, c := range oldComments {
|
||||
if c.ID == 0 {
|
||||
newReviewURL := fmt.Sprintf("%s/%s/%s/pulls/%d#pullrequestreview-%d", strings.TrimRight(*giteaURL, "/"), owner, repoName, prNumber, posted.ID)
|
||||
for _, oldReview := range oldReviews {
|
||||
cid, err := giteaClient.GetTimelineReviewCommentIDForReview(ctx, owner, repoName, prNumber, oldReview.ID)
|
||||
if err != nil {
|
||||
slog.Warn("could not find comment ID for old review", "review_id", oldReview.ID, "error", err)
|
||||
continue
|
||||
}
|
||||
if err := underlying.ResolveComment(ctx, owner, repoName, c.ID); err != nil {
|
||||
slog.Debug("could not resolve inline comment", "comment_id", c.ID, "error", err)
|
||||
failed++
|
||||
} else {
|
||||
resolved++
|
||||
supersededBody := buildSupersededBody(oldReview.Body, oldReview.CommitID, newReviewURL, sentinel)
|
||||
if err := giteaClient.EditComment(ctx, owner, repoName, cid, supersededBody); err != nil {
|
||||
slog.Warn("could not mark old review as superseded", "review_id", oldReview.ID, "comment_id", cid, "error", err)
|
||||
continue
|
||||
}
|
||||
slog.Info("marked old review as superseded", "review_id", oldReview.ID, "new_review_id", posted.ID, "pr", prNumber)
|
||||
|
||||
// Resolve old review's inline comments
|
||||
oldComments, err := giteaClient.ListReviewComments(ctx, owner, repoName, prNumber, oldReview.ID)
|
||||
if err != nil {
|
||||
slog.Warn("could not list old review comments for resolution", "review_id", oldReview.ID, "error", err)
|
||||
continue
|
||||
}
|
||||
resolved, failed := 0, 0
|
||||
for _, c := range oldComments {
|
||||
if c.ID == 0 {
|
||||
continue
|
||||
}
|
||||
if err := giteaClient.ResolveComment(ctx, owner, repoName, c.ID); err != nil {
|
||||
slog.Debug("could not resolve inline comment", "comment_id", c.ID, "error", err)
|
||||
failed++
|
||||
} else {
|
||||
resolved++
|
||||
}
|
||||
}
|
||||
if resolved > 0 {
|
||||
slog.Info("resolved old inline comments", "review_id", oldReview.ID, "count", resolved, "pr", prNumber)
|
||||
}
|
||||
if failed > 0 {
|
||||
slog.Warn("some inline comments could not be resolved", "review_id", oldReview.ID, "failed", failed, "pr", prNumber)
|
||||
}
|
||||
}
|
||||
if resolved > 0 {
|
||||
slog.Info("resolved old inline comments", "review_id", oldReview.ID, "count", resolved, "pr", prNumber)
|
||||
}
|
||||
if failed > 0 {
|
||||
slog.Warn("some inline comments could not be resolved", "review_id", oldReview.ID, "failed", failed, "pr", prNumber)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
// fetchFileContext fetches the full content of modified files from the PR branch.
|
||||
func fetchFileContext(ctx context.Context, client vcs.PRReader, owner, repo, ref string, files []vcs.ChangedFile) string {
|
||||
func fetchFileContext(ctx context.Context, client *gitea.Client, owner, repo, ref string, files []gitea.ChangedFile) string {
|
||||
var sb strings.Builder
|
||||
for _, f := range files {
|
||||
if ctx.Err() != nil {
|
||||
@@ -618,7 +507,7 @@ func fetchFileContext(ctx context.Context, client vcs.PRReader, owner, repo, ref
|
||||
if f.Status == "removed" {
|
||||
continue // Skip deleted files
|
||||
}
|
||||
content, err := client.GetFileContentAtRef(ctx, owner, repo, f.Filename, ref)
|
||||
content, err := client.GetFileContentRef(ctx, owner, repo, f.Filename, ref)
|
||||
if err != nil {
|
||||
slog.Warn("could not fetch file content", "file", f.Filename, "error", err)
|
||||
continue
|
||||
@@ -635,25 +524,11 @@ func fetchFileContext(ctx context.Context, client vcs.PRReader, owner, repo, ref
|
||||
// patternsRepo is comma-separated list of owner/name repos.
|
||||
// patternsFiles is comma-separated list of file paths or directories.
|
||||
// If a path ends with / or is a directory, all files within it are fetched recursively.
|
||||
// If patternsFiles is empty, all files from the repo root are fetched.
|
||||
func fetchPatterns(ctx context.Context, client vcs.FileReader, patternsRepo, patternsFiles string) string {
|
||||
func fetchPatterns(ctx context.Context, client *gitea.Client, patternsRepo, patternsFiles string) string {
|
||||
var sb strings.Builder
|
||||
|
||||
repos := strings.Split(patternsRepo, ",")
|
||||
|
||||
// Build the list of paths to fetch
|
||||
var paths []string
|
||||
if patternsFiles == "" {
|
||||
// Empty patternsFiles means "fetch all files from repo root"
|
||||
paths = []string{""}
|
||||
} else {
|
||||
for _, p := range strings.Split(patternsFiles, ",") {
|
||||
p = strings.TrimSpace(p)
|
||||
if p != "" {
|
||||
paths = append(paths, p)
|
||||
}
|
||||
}
|
||||
}
|
||||
paths := strings.Split(patternsFiles, ",")
|
||||
|
||||
for _, repoRef := range repos {
|
||||
if ctx.Err() != nil {
|
||||
@@ -674,7 +549,12 @@ func fetchPatterns(ctx context.Context, client vcs.FileReader, patternsRepo, pat
|
||||
var repoSkippedFiles []string
|
||||
|
||||
for _, path := range paths {
|
||||
files, err := vcs.GetAllFilesInPath(ctx, client, owner, repo, path)
|
||||
path = strings.TrimSpace(path)
|
||||
if path == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
files, err := client.GetAllFilesInPath(ctx, owner, repo, path)
|
||||
if err != nil {
|
||||
slog.Warn("could not fetch patterns", "path", path, "repo", repoRef, "error", err)
|
||||
continue
|
||||
@@ -713,20 +593,18 @@ func isPatternFile(path string) bool {
|
||||
}
|
||||
|
||||
// evaluateCIStatus checks if all CI statuses indicate success.
|
||||
// Returns passed=true if no checks have failed (pending checks are not treated as failures).
|
||||
func evaluateCIStatus(statuses []vcs.CommitStatus) (passed bool, details string) {
|
||||
func evaluateCIStatus(statuses []gitea.CommitStatus) (passed bool, details string) {
|
||||
if len(statuses) == 0 {
|
||||
return true, "no CI statuses found"
|
||||
}
|
||||
|
||||
var failed []string
|
||||
var pending int
|
||||
for _, s := range statuses {
|
||||
switch s.Status {
|
||||
case "success":
|
||||
// good
|
||||
case "pending":
|
||||
pending++
|
||||
// treat pending as not-failed
|
||||
case "failure", "error":
|
||||
failed = append(failed, fmt.Sprintf("%s: %s", s.Context, s.Description))
|
||||
}
|
||||
@@ -735,9 +613,6 @@ func evaluateCIStatus(statuses []vcs.CommitStatus) (passed bool, details string)
|
||||
if len(failed) > 0 {
|
||||
return false, strings.Join(failed, "; ")
|
||||
}
|
||||
if pending > 0 {
|
||||
return true, fmt.Sprintf("no failures (%d pending)", pending)
|
||||
}
|
||||
return true, "all checks passed"
|
||||
}
|
||||
|
||||
@@ -853,10 +728,10 @@ func buildSupersededBody(originalBody, commitSHA, newReviewURL, sentinel string)
|
||||
}
|
||||
|
||||
// hasSharedToken detects if another review-bot role posted under the same
|
||||
// VCS user. This indicates misconfiguration where two roles share a token
|
||||
// instead of having separate accounts. Returns true if shared token
|
||||
// Gitea user. This indicates misconfiguration where two roles share a token
|
||||
// instead of having separate Gitea accounts. Returns true if shared token
|
||||
// detected (caller should skip update-in-place logic to avoid clobbering).
|
||||
func hasSharedToken(reviews []vcs.Review, ownSentinel string) bool {
|
||||
func hasSharedToken(reviews []gitea.Review, ownSentinel string) bool {
|
||||
ownLogin := ""
|
||||
for _, r := range reviews {
|
||||
if strings.Contains(r.Body, ownSentinel) {
|
||||
@@ -869,7 +744,7 @@ func hasSharedToken(reviews []vcs.Review, ownSentinel string) bool {
|
||||
}
|
||||
for _, r := range reviews {
|
||||
if r.User.Login == ownLogin && strings.Contains(r.Body, "<!-- review-bot:") && !strings.Contains(r.Body, ownSentinel) {
|
||||
slog.Warn("shared token detected — another review-bot role is using the same VCS user",
|
||||
slog.Warn("shared token detected — another review-bot role is using the same Gitea user",
|
||||
"sibling_role", extractSentinelName(r.Body), "user", ownLogin)
|
||||
return true
|
||||
}
|
||||
@@ -890,27 +765,29 @@ func extractSentinelName(body string) string {
|
||||
if end < 0 {
|
||||
return "unknown"
|
||||
}
|
||||
name := rest[:end]
|
||||
// Sanitize: strip control characters to prevent log injection.
|
||||
name = strings.Map(func(r rune) rune {
|
||||
if r < 0x20 || r == 0x7f {
|
||||
return -1
|
||||
}
|
||||
return r
|
||||
}, name)
|
||||
if len(name) > 64 {
|
||||
name = name[:64]
|
||||
}
|
||||
if name == "" {
|
||||
return "unknown"
|
||||
}
|
||||
return name
|
||||
return rest[:end]
|
||||
}
|
||||
|
||||
// findOwnReview locates the most recent non-superseded review matching the sentinel.
|
||||
func findOwnReview(reviews []gitea.Review, sentinel string) *gitea.Review {
|
||||
var best *gitea.Review
|
||||
for i := range reviews {
|
||||
if !strings.Contains(reviews[i].Body, sentinel) {
|
||||
continue
|
||||
}
|
||||
if strings.Contains(reviews[i].Body, "~~Original review~~") {
|
||||
continue
|
||||
}
|
||||
if best == nil || reviews[i].ID > best.ID {
|
||||
best = &reviews[i]
|
||||
}
|
||||
}
|
||||
return best
|
||||
}
|
||||
|
||||
// findAllOwnReviews returns all non-superseded reviews matching the sentinel.
|
||||
func findAllOwnReviews(reviews []vcs.Review, sentinel string) []vcs.Review {
|
||||
var result []vcs.Review
|
||||
func findAllOwnReviews(reviews []gitea.Review, sentinel string) []gitea.Review {
|
||||
var result []gitea.Review
|
||||
for i := range reviews {
|
||||
if !strings.Contains(reviews[i].Body, sentinel) {
|
||||
continue
|
||||
@@ -935,3 +812,35 @@ func shouldSkipStaleReview(evaluatedSHA, currentSHA string) bool {
|
||||
}
|
||||
return evaluatedSHA != currentSHA
|
||||
}
|
||||
|
||||
// giteaClientAdapter adapts gitea.Client to vcs.FileReader interface.
|
||||
type giteaClientAdapter struct {
|
||||
client *gitea.Client
|
||||
}
|
||||
|
||||
func newGiteaClientAdapter(c *gitea.Client) *giteaClientAdapter {
|
||||
return &giteaClientAdapter{client: c}
|
||||
}
|
||||
|
||||
func (a *giteaClientAdapter) ListContents(ctx context.Context, owner, repo, path string) ([]vcs.ContentEntry, error) {
|
||||
entries, err := a.client.ListContents(ctx, owner, repo, path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result := make([]vcs.ContentEntry, len(entries))
|
||||
for i, e := range entries {
|
||||
result[i] = vcs.ContentEntry{
|
||||
Name: e.Name,
|
||||
Path: e.Path,
|
||||
Type: e.Type,
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (a *giteaClientAdapter) GetFileContent(ctx context.Context, owner, repo, filePath, ref string) (string, error) {
|
||||
if ref != "" {
|
||||
return a.client.GetFileContentRef(ctx, owner, repo, filePath, ref)
|
||||
}
|
||||
return a.client.GetFileContent(ctx, owner, repo, filePath)
|
||||
}
|
||||
|
||||
+126
-133
@@ -10,7 +10,7 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.weiker.me/rodin/review-bot/vcs"
|
||||
"gitea.weiker.me/rodin/review-bot/gitea"
|
||||
)
|
||||
|
||||
func TestValidateReviewerName(t *testing.T) {
|
||||
@@ -107,7 +107,9 @@ func TestValidateWorkspacePath(t *testing.T) {
|
||||
workspace: tmpDir,
|
||||
path: "/etc/passwd",
|
||||
wantErr: true,
|
||||
errMatch: "failed to resolve",
|
||||
// Go 1.21+ filepath.Join normalizes absolute paths: Join("/tmp/x", "/etc/passwd")
|
||||
// becomes "/tmp/x/etc/passwd", which is within workspace but doesn't exist.
|
||||
errMatch: "failed to resolve",
|
||||
},
|
||||
{
|
||||
name: "nonexistent file",
|
||||
@@ -152,14 +154,15 @@ func TestValidateWorkspacePath(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func makeReview(id int64, login, state string, stale bool, body string) vcs.Review {
|
||||
return vcs.Review{
|
||||
func makeReview(id int64, login, state string, stale bool, body string) gitea.Review {
|
||||
r := gitea.Review{
|
||||
ID: id,
|
||||
Body: body,
|
||||
User: vcs.UserInfo{Login: login},
|
||||
State: state,
|
||||
Stale: stale,
|
||||
}
|
||||
r.User.Login = login
|
||||
return r
|
||||
}
|
||||
|
||||
func TestBuildSupersededBody(t *testing.T) {
|
||||
@@ -210,11 +213,96 @@ func TestBuildSupersededBodyShortSHA(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindOwnReview(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
reviews []gitea.Review
|
||||
sentinel string
|
||||
wantID int64
|
||||
wantNil bool
|
||||
}{
|
||||
{
|
||||
name: "no reviews",
|
||||
reviews: nil,
|
||||
sentinel: "<!-- review-bot:sonnet -->",
|
||||
wantNil: true,
|
||||
},
|
||||
{
|
||||
name: "found by sentinel",
|
||||
reviews: []gitea.Review{
|
||||
makeReview(42, "bot", "APPROVED", false, "review body\n<!-- review-bot:sonnet -->"),
|
||||
},
|
||||
sentinel: "<!-- review-bot:sonnet -->",
|
||||
wantID: 42,
|
||||
},
|
||||
{
|
||||
name: "wrong sentinel",
|
||||
reviews: []gitea.Review{
|
||||
makeReview(42, "bot", "APPROVED", false, "body\n<!-- review-bot:gpt -->"),
|
||||
},
|
||||
sentinel: "<!-- review-bot:sonnet -->",
|
||||
wantNil: true,
|
||||
},
|
||||
{
|
||||
name: "multiple reviews, returns first match",
|
||||
reviews: []gitea.Review{
|
||||
makeReview(10, "bot", "APPROVED", false, "old\n<!-- review-bot:gpt -->"),
|
||||
makeReview(20, "bot", "APPROVED", false, "new\n<!-- review-bot:sonnet -->"),
|
||||
},
|
||||
sentinel: "<!-- review-bot:sonnet -->",
|
||||
wantID: 20,
|
||||
},
|
||||
{
|
||||
name: "skips superseded review",
|
||||
reviews: []gitea.Review{
|
||||
makeReview(10, "bot", "APPROVED", false, "~~Original review~~\n\n**Superseded**\n<!-- review-bot:sonnet -->"),
|
||||
makeReview(20, "bot", "APPROVED", false, "fresh review\n<!-- review-bot:sonnet -->"),
|
||||
},
|
||||
sentinel: "<!-- review-bot:sonnet -->",
|
||||
wantID: 20,
|
||||
},
|
||||
{
|
||||
name: "only superseded reviews exist",
|
||||
reviews: []gitea.Review{
|
||||
makeReview(10, "bot", "APPROVED", false, "~~Original review~~\n\n<!-- review-bot:sonnet -->"),
|
||||
},
|
||||
sentinel: "<!-- review-bot:sonnet -->",
|
||||
wantNil: true,
|
||||
},
|
||||
{
|
||||
name: "picks highest ID among matches",
|
||||
reviews: []gitea.Review{
|
||||
makeReview(50, "bot", "APPROVED", false, "v1\n<!-- review-bot:sonnet -->"),
|
||||
makeReview(30, "bot", "APPROVED", false, "v0\n<!-- review-bot:sonnet -->"),
|
||||
},
|
||||
sentinel: "<!-- review-bot:sonnet -->",
|
||||
wantID: 50,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := findOwnReview(tc.reviews, tc.sentinel)
|
||||
if tc.wantNil {
|
||||
if got != nil {
|
||||
t.Errorf("findOwnReview() = %v, want nil", got)
|
||||
}
|
||||
} else {
|
||||
if got == nil {
|
||||
t.Fatal("findOwnReview() = nil, want non-nil")
|
||||
}
|
||||
if got.ID != tc.wantID {
|
||||
t.Errorf("findOwnReview().ID = %d, want %d", got.ID, tc.wantID)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestHasSharedToken(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
reviews []vcs.Review
|
||||
reviews []gitea.Review
|
||||
sentinel string
|
||||
want bool
|
||||
}{
|
||||
@@ -226,36 +314,36 @@ func TestHasSharedToken(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "no own review yet - cannot detect",
|
||||
reviews: []vcs.Review{
|
||||
makeReview(1, "other", "APPROVED", false, "<!-- review-bot:gpt --> body"),
|
||||
reviews: []gitea.Review{
|
||||
{ID: 1, User: struct{ Login string `json:"login"` }{Login: "other"}, Body: "<!-- review-bot:gpt --> body"},
|
||||
},
|
||||
sentinel: "<!-- review-bot:sonnet -->",
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "separate users - no shared token",
|
||||
reviews: []vcs.Review{
|
||||
makeReview(1, "sonnet-review-bot", "APPROVED", false, "<!-- review-bot:sonnet --> body"),
|
||||
makeReview(2, "security-review-bot", "APPROVED", false, "<!-- review-bot:security --> body"),
|
||||
reviews: []gitea.Review{
|
||||
{ID: 1, User: struct{ Login string `json:"login"` }{Login: "sonnet-review-bot"}, Body: "<!-- review-bot:sonnet --> body"},
|
||||
{ID: 2, User: struct{ Login string `json:"login"` }{Login: "security-review-bot"}, Body: "<!-- review-bot:security --> body"},
|
||||
},
|
||||
sentinel: "<!-- review-bot:sonnet -->",
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "shared token detected - same user different sentinels",
|
||||
reviews: []vcs.Review{
|
||||
makeReview(1, "sonnet-review-bot", "APPROVED", false, "<!-- review-bot:sonnet --> body"),
|
||||
makeReview(2, "sonnet-review-bot", "APPROVED", false, "<!-- review-bot:security --> body"),
|
||||
reviews: []gitea.Review{
|
||||
{ID: 1, User: struct{ Login string `json:"login"` }{Login: "sonnet-review-bot"}, Body: "<!-- review-bot:sonnet --> body"},
|
||||
{ID: 2, User: struct{ Login string `json:"login"` }{Login: "sonnet-review-bot"}, Body: "<!-- review-bot:security --> body"},
|
||||
},
|
||||
sentinel: "<!-- review-bot:sonnet -->",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "three roles same user",
|
||||
reviews: []vcs.Review{
|
||||
makeReview(1, "bot", "APPROVED", false, "<!-- review-bot:sonnet --> body"),
|
||||
makeReview(2, "bot", "APPROVED", false, "<!-- review-bot:security --> body"),
|
||||
makeReview(3, "bot", "APPROVED", false, "<!-- review-bot:gpt --> body"),
|
||||
reviews: []gitea.Review{
|
||||
{ID: 1, User: struct{ Login string `json:"login"` }{Login: "bot"}, Body: "<!-- review-bot:sonnet --> body"},
|
||||
{ID: 2, User: struct{ Login string `json:"login"` }{Login: "bot"}, Body: "<!-- review-bot:security --> body"},
|
||||
{ID: 3, User: struct{ Login string `json:"login"` }{Login: "bot"}, Body: "<!-- review-bot:gpt --> body"},
|
||||
},
|
||||
sentinel: "<!-- review-bot:sonnet -->",
|
||||
want: true,
|
||||
@@ -416,56 +504,10 @@ func TestIsPatternFile(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuildPatternPaths verifies the path-building logic for fetchPatterns.
|
||||
// Empty patternsFiles means "fetch all from root" (represented as [""]).
|
||||
func TestBuildPatternPaths(t *testing.T) {
|
||||
buildPaths := func(patternsFiles string) []string {
|
||||
if patternsFiles == "" {
|
||||
return []string{""}
|
||||
}
|
||||
var paths []string
|
||||
for _, p := range strings.Split(patternsFiles, ",") {
|
||||
p = strings.TrimSpace(p)
|
||||
if p != "" {
|
||||
paths = append(paths, p)
|
||||
}
|
||||
}
|
||||
return paths
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
want []string
|
||||
}{
|
||||
{"empty fetches root", "", []string{""}},
|
||||
{"single file", "README.md", []string{"README.md"}},
|
||||
{"multiple files", "README.md,PATTERNS.md", []string{"README.md", "PATTERNS.md"}},
|
||||
{"trims whitespace", " foo.md , bar.md ", []string{"foo.md", "bar.md"}},
|
||||
{"skips empty between commas", "foo.md,,bar.md", []string{"foo.md", "bar.md"}},
|
||||
{"directory path", "patterns/", []string{"patterns/"}},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := buildPaths(tc.input)
|
||||
if len(got) != len(tc.want) {
|
||||
t.Errorf("buildPaths(%q) = %v, want %v", tc.input, got, tc.want)
|
||||
return
|
||||
}
|
||||
for i := range got {
|
||||
if got[i] != tc.want[i] {
|
||||
t.Errorf("buildPaths(%q)[%d] = %q, want %q", tc.input, i, got[i], tc.want[i])
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEvaluateCIStatus(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
statuses []vcs.CommitStatus
|
||||
statuses []gitea.CommitStatus
|
||||
wantPassed bool
|
||||
wantSubstr string
|
||||
}{
|
||||
@@ -477,7 +519,7 @@ func TestEvaluateCIStatus(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "all success",
|
||||
statuses: []vcs.CommitStatus{
|
||||
statuses: []gitea.CommitStatus{
|
||||
{Status: "success", Context: "ci/build", Description: "Build passed"},
|
||||
{Status: "success", Context: "ci/test", Description: "Tests passed"},
|
||||
},
|
||||
@@ -486,7 +528,7 @@ func TestEvaluateCIStatus(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "one failure",
|
||||
statuses: []vcs.CommitStatus{
|
||||
statuses: []gitea.CommitStatus{
|
||||
{Status: "success", Context: "ci/build", Description: "Build passed"},
|
||||
{Status: "failure", Context: "ci/test", Description: "Tests failed"},
|
||||
},
|
||||
@@ -495,7 +537,7 @@ func TestEvaluateCIStatus(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "error status",
|
||||
statuses: []vcs.CommitStatus{
|
||||
statuses: []gitea.CommitStatus{
|
||||
{Status: "error", Context: "ci/lint", Description: "Lint error"},
|
||||
},
|
||||
wantPassed: false,
|
||||
@@ -503,16 +545,16 @@ func TestEvaluateCIStatus(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "pending treated as not-failed",
|
||||
statuses: []vcs.CommitStatus{
|
||||
statuses: []gitea.CommitStatus{
|
||||
{Status: "pending", Context: "ci/build", Description: "In progress"},
|
||||
{Status: "success", Context: "ci/test", Description: "Tests passed"},
|
||||
},
|
||||
wantPassed: true,
|
||||
wantSubstr: "no failures",
|
||||
wantSubstr: "all checks passed",
|
||||
},
|
||||
{
|
||||
name: "multiple failures",
|
||||
statuses: []vcs.CommitStatus{
|
||||
statuses: []gitea.CommitStatus{
|
||||
{Status: "failure", Context: "ci/build", Description: "Build failed"},
|
||||
{Status: "failure", Context: "ci/test", Description: "Tests failed"},
|
||||
},
|
||||
@@ -521,7 +563,7 @@ func TestEvaluateCIStatus(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "mixed with pending and failure",
|
||||
statuses: []vcs.CommitStatus{
|
||||
statuses: []gitea.CommitStatus{
|
||||
{Status: "success", Context: "ci/build", Description: "Build passed"},
|
||||
{Status: "pending", Context: "ci/deploy", Description: "Deploying"},
|
||||
{Status: "failure", Context: "ci/test", Description: "Tests failed"},
|
||||
@@ -750,7 +792,7 @@ func TestMainSubprocess_InvalidReviewerName(t *testing.T) {
|
||||
if os.Getenv("TEST_SUBPROCESS_MAIN") == "1" {
|
||||
flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError)
|
||||
os.Args = []string{"review-bot",
|
||||
"--vcs-url", "http://localhost",
|
||||
"--gitea-url", "http://localhost",
|
||||
"--repo", "owner/repo",
|
||||
"--pr", "1",
|
||||
"--reviewer-name", "invalid name",
|
||||
@@ -778,7 +820,7 @@ func TestMainSubprocess_InvalidRepo(t *testing.T) {
|
||||
if os.Getenv("TEST_SUBPROCESS_MAIN") == "1" {
|
||||
flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError)
|
||||
os.Args = []string{"review-bot",
|
||||
"--vcs-url", "http://localhost",
|
||||
"--gitea-url", "http://localhost",
|
||||
"--repo", "invalidrepo",
|
||||
"--pr", "1",
|
||||
"--reviewer-token", "tok",
|
||||
@@ -805,7 +847,7 @@ func TestMainSubprocess_InvalidPRNumber(t *testing.T) {
|
||||
if os.Getenv("TEST_SUBPROCESS_MAIN") == "1" {
|
||||
flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError)
|
||||
os.Args = []string{"review-bot",
|
||||
"--vcs-url", "http://localhost",
|
||||
"--gitea-url", "http://localhost",
|
||||
"--repo", "owner/repo",
|
||||
"--pr", "notanumber",
|
||||
"--reviewer-token", "tok",
|
||||
@@ -832,7 +874,7 @@ func TestMainSubprocess_InvalidTemperature(t *testing.T) {
|
||||
if os.Getenv("TEST_SUBPROCESS_MAIN") == "1" {
|
||||
flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError)
|
||||
os.Args = []string{"review-bot",
|
||||
"--vcs-url", "http://localhost",
|
||||
"--gitea-url", "http://localhost",
|
||||
"--repo", "owner/repo",
|
||||
"--pr", "1",
|
||||
"--reviewer-token", "tok",
|
||||
@@ -860,7 +902,7 @@ func TestMainSubprocess_InvalidProvider(t *testing.T) {
|
||||
if os.Getenv("TEST_SUBPROCESS_MAIN") == "1" {
|
||||
flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError)
|
||||
os.Args = []string{"review-bot",
|
||||
"--vcs-url", "http://localhost",
|
||||
"--gitea-url", "http://localhost",
|
||||
"--repo", "owner/repo",
|
||||
"--pr", "1",
|
||||
"--reviewer-token", "tok",
|
||||
@@ -884,35 +926,7 @@ func TestMainSubprocess_InvalidProvider(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMainSubprocess_InvalidVCSProvider(t *testing.T) {
|
||||
if os.Getenv("TEST_SUBPROCESS_MAIN") == "1" {
|
||||
flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError)
|
||||
os.Args = []string{"review-bot",
|
||||
"--provider", "invalid",
|
||||
"--vcs-url", "http://localhost",
|
||||
"--repo", "owner/repo",
|
||||
"--pr", "1",
|
||||
"--reviewer-token", "tok",
|
||||
"--llm-base-url", "http://localhost",
|
||||
"--llm-api-key", "key",
|
||||
"--llm-model", "model",
|
||||
}
|
||||
main()
|
||||
return
|
||||
}
|
||||
|
||||
cmd := exec.Command(os.Args[0], "-test.run=TestMainSubprocess_InvalidVCSProvider")
|
||||
cmd.Env = append(cleanEnv(), "TEST_SUBPROCESS_MAIN=1")
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err == nil {
|
||||
t.Fatal("expected non-zero exit with invalid VCS provider")
|
||||
}
|
||||
if !strings.Contains(string(out), "invalid --provider") {
|
||||
t.Errorf("expected error about invalid --provider, got: %s", out)
|
||||
}
|
||||
}
|
||||
|
||||
// cleanEnv returns environ without any GITEA/LLM/REVIEWER/VCS env vars that would
|
||||
// cleanEnv returns environ without any GITEA/LLM/REVIEWER env vars that would
|
||||
// interfere with testing missing-flag scenarios.
|
||||
func cleanEnv() []string {
|
||||
var env []string
|
||||
@@ -920,7 +934,6 @@ func cleanEnv() []string {
|
||||
key := strings.SplitN(e, "=", 2)[0]
|
||||
switch {
|
||||
case strings.HasPrefix(key, "GITEA_"),
|
||||
strings.HasPrefix(key, "VCS_"),
|
||||
strings.HasPrefix(key, "LLM_"),
|
||||
strings.HasPrefix(key, "REVIEWER_"),
|
||||
strings.HasPrefix(key, "PR_"),
|
||||
@@ -938,12 +951,12 @@ func cleanEnv() []string {
|
||||
}
|
||||
|
||||
func TestFindAllOwnReviews(t *testing.T) {
|
||||
reviews := []vcs.Review{
|
||||
makeReview(1, "bot", "APPROVED", false, "<!-- review-bot:sonnet -->\nfirst review"),
|
||||
makeReview(2, "bot", "APPROVED", false, "<!-- review-bot:gpt -->\nother bot"),
|
||||
makeReview(3, "bot", "APPROVED", false, "<!-- review-bot:sonnet -->\nsecond review"),
|
||||
makeReview(4, "bot", "APPROVED", false, "~~Original review~~\n<!-- review-bot:sonnet -->\nsuperseded"),
|
||||
makeReview(5, "bot", "APPROVED", false, "<!-- review-bot:sonnet -->\nthird review"),
|
||||
reviews := []gitea.Review{
|
||||
{ID: 1, Body: "<!-- review-bot:sonnet -->\nfirst review"},
|
||||
{ID: 2, Body: "<!-- review-bot:gpt -->\nother bot"},
|
||||
{ID: 3, Body: "<!-- review-bot:sonnet -->\nsecond review"},
|
||||
{ID: 4, Body: "~~Original review~~\n<!-- review-bot:sonnet -->\nsuperseded"},
|
||||
{ID: 5, Body: "<!-- review-bot:sonnet -->\nthird review"},
|
||||
}
|
||||
|
||||
got := findAllOwnReviews(reviews, "<!-- review-bot:sonnet -->")
|
||||
@@ -1007,23 +1020,3 @@ func TestShouldSkipStaleReview(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerdictToEvent(t *testing.T) {
|
||||
tests := []struct {
|
||||
verdict string
|
||||
want vcs.ReviewEvent
|
||||
}{
|
||||
{"APPROVE", vcs.ReviewEventApprove},
|
||||
{"REQUEST_CHANGES", vcs.ReviewEventRequestChanges},
|
||||
{"COMMENT", vcs.ReviewEventComment},
|
||||
{"other", vcs.ReviewEventComment},
|
||||
{"", vcs.ReviewEventComment},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
got := verdictToEvent(tc.verdict)
|
||||
if got != tc.want {
|
||||
t.Errorf("verdictToEvent(%q) = %q, want %q", tc.verdict, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ JSON is awkward for persona files that contain multi-line text (identity, severi
|
||||
- Backwards compatibility: existing JSON personas must continue to work
|
||||
- Security: protect against DoS via deeply nested YAML (AIKIDO-2024-10486)
|
||||
- Consistency: use `.yaml` extension (not `.yml`)
|
||||
- Library: use `github.com/goccy/go-yaml` v1.16.0+ (approved in CONVENTIONS.md); we implement custom AST-based depth/node-count checks for precise alias-aware validation
|
||||
- Library: use `gopkg.in/yaml.v3` (approved in CONVENTIONS.md) with explicit depth limiting
|
||||
|
||||
## Proposed Approach
|
||||
|
||||
@@ -33,16 +33,37 @@ func parsePersona(data []byte, source string) (*Persona, error) {
|
||||
|
||||
### YAML Parsing with Depth Protection
|
||||
|
||||
We implement a custom AST-based depth/node-count walk (`checkYAMLDepth` in
|
||||
`review/persona.go`) rather than relying on library decoder options. Key design
|
||||
decisions:
|
||||
```go
|
||||
func unmarshalYAMLWithDepthLimit(data []byte, out any, maxDepth int) error {
|
||||
var node yaml.Node
|
||||
dec := yaml.NewDecoder(bytes.NewReader(data))
|
||||
if err := dec.Decode(&node); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkYAMLDepth(&node, 0, maxDepth); err != nil {
|
||||
return err
|
||||
}
|
||||
return node.Decode(out)
|
||||
}
|
||||
|
||||
- **Library:** `github.com/goccy/go-yaml` with `ast.Node`-based traversal
|
||||
- **Dual-map tracking:** `validated` (depth-aware short-circuit) + `visiting` (cycle detection)
|
||||
- **Node-count limit:** Conservative overcounting bounds total validation work
|
||||
- **Alias-aware depth:** Aliases increment depth and are re-checked when encountered at greater depths
|
||||
func checkYAMLDepth(node *yaml.Node, depth, maxDepth int) error {
|
||||
if depth > maxDepth {
|
||||
return fmt.Errorf("YAML nesting depth exceeds maximum (%d)", maxDepth)
|
||||
}
|
||||
// Handle alias nodes by following the Alias pointer
|
||||
if node.Kind == yaml.AliasNode && node.Alias != nil {
|
||||
return checkYAMLDepth(node.Alias, depth, maxDepth)
|
||||
}
|
||||
for _, child := range node.Content {
|
||||
if err := checkYAMLDepth(child, depth+1, maxDepth); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
```
|
||||
|
||||
See `review/persona.go:checkYAMLDepth` for the authoritative implementation.
|
||||
The `gopkg.in/yaml.v3` library does not have built-in depth protection, so we implement explicit depth checking by first decoding into a `yaml.Node`, walking the tree to verify depth (including alias resolution), then decoding into the target struct.
|
||||
|
||||
## State/Data Model
|
||||
|
||||
@@ -53,7 +74,7 @@ No new state. Same `Persona` struct, just different parsing.
|
||||
| Error | Handling |
|
||||
|-------|----------|
|
||||
| Invalid YAML syntax | Return parse error with source file |
|
||||
| Deeply nested YAML | Custom AST walk (`checkYAMLDepth`) rejects before decode |
|
||||
| Deeply nested YAML | Library rejects (v1.16.0+ fix) |
|
||||
| Unknown extension | Fall back to JSON parsing |
|
||||
| Missing required fields | Validation rejects after parse |
|
||||
|
||||
|
||||
+23
-67
@@ -4,9 +4,7 @@
|
||||
package github
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -194,19 +192,12 @@ func (c *Client) SetRetryBackoff(d []time.Duration) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// doWithRetry performs an HTTP request with retry on 429 rate limit responses.
|
||||
// It delegates request construction to buildReq, which is called on each attempt
|
||||
// to produce a fresh *http.Request (allowing body re-reads for POST/PUT).
|
||||
// doRequest performs an HTTP request with retry on 429 rate limit responses.
|
||||
// It respects the Retry-After header when present (capped at maxRetryAfter).
|
||||
// Transport errors (network failures, context cancellation) are not retried.
|
||||
func (c *Client) doWithRetry(ctx context.Context, reqURL string, buildReq func() (*http.Request, error)) ([]byte, error) {
|
||||
func (c *Client) doRequest(ctx context.Context, method, reqURL string, accept string) ([]byte, error) {
|
||||
const maxRetryAfter = 120 * time.Second
|
||||
|
||||
// maxErrorBodyBytes limits how much of an error response body is stored.
|
||||
// Kept small (4 KiB) to reduce the risk of sensitive data leakage if callers
|
||||
// log APIError.Body directly. Error() further truncates to 200 bytes.
|
||||
const maxErrorBodyBytes = 4 * 1024
|
||||
|
||||
// backoff holds per-attempt delays: backoff[i] is the delay before attempt i+1.
|
||||
// Length must be maxRetryAttempts-1 (one entry per retry gap).
|
||||
// SetRetryBackoff validates at configuration time; the default is always valid.
|
||||
@@ -220,6 +211,11 @@ func (c *Client) doWithRetry(ctx context.Context, reqURL string, buildReq func()
|
||||
copy(backoff, defaultBackoff)
|
||||
}
|
||||
|
||||
// maxErrorBodyBytes limits how much of an error response body is stored.
|
||||
// Kept small (4 KiB) to reduce the risk of sensitive data leakage if callers
|
||||
// log APIError.Body directly. Error() further truncates to 200 bytes.
|
||||
const maxErrorBodyBytes = 4 * 1024
|
||||
|
||||
// Reject non-HTTPS URLs early since the URL is immutable across retries.
|
||||
if c.token != "" && !c.allowInsecureHTTP {
|
||||
parsed, err := url.Parse(reqURL)
|
||||
@@ -250,10 +246,22 @@ func (c *Client) doWithRetry(ctx context.Context, reqURL string, buildReq func()
|
||||
}
|
||||
}
|
||||
|
||||
req, err := buildReq()
|
||||
req, err := http.NewRequestWithContext(ctx, method, reqURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create request: %w", err)
|
||||
}
|
||||
if c.token != "" {
|
||||
// Bearer is the OAuth2 standard and is accepted by GitHub for both
|
||||
// classic PATs and fine-grained tokens. The alternative "token" scheme
|
||||
// is GitHub-specific and offers no additional compatibility.
|
||||
req.Header.Set("Authorization", "Bearer "+c.token)
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
if accept != "" {
|
||||
req.Header.Set("Accept", accept)
|
||||
} else {
|
||||
req.Header.Set("Accept", "application/vnd.github+json")
|
||||
}
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
@@ -264,11 +272,11 @@ func (c *Client) doWithRetry(ctx context.Context, reqURL string, buildReq func()
|
||||
respStatus := resp.StatusCode
|
||||
retryAfterHeader := resp.Header.Get("Retry-After")
|
||||
|
||||
body, done, handleErr := c.handleResponse(resp, maxResponseBytes, maxErrorBodyBytes)
|
||||
body, done, err := c.handleResponse(resp, maxResponseBytes, maxErrorBodyBytes)
|
||||
if done {
|
||||
return body, handleErr
|
||||
return body, err
|
||||
}
|
||||
lastErr = handleErr
|
||||
lastErr = err
|
||||
|
||||
// Retry on 429 rate limit
|
||||
if respStatus == http.StatusTooManyRequests && attempt < maxRetryAttempts-1 {
|
||||
@@ -306,32 +314,6 @@ func (c *Client) doWithRetry(ctx context.Context, reqURL string, buildReq func()
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
// doRequest performs an HTTP request with retry on 429 rate limit responses.
|
||||
// It respects the Retry-After header when present (capped at maxRetryAfter).
|
||||
// Transport errors (network failures, context cancellation) are not retried.
|
||||
func (c *Client) doRequest(ctx context.Context, method, reqURL string, accept string) ([]byte, error) {
|
||||
buildReq := func() (*http.Request, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, method, reqURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if c.token != "" {
|
||||
// Bearer is the OAuth2 standard and is accepted by GitHub for both
|
||||
// classic PATs and fine-grained tokens. The alternative "token" scheme
|
||||
// is GitHub-specific and offers no additional compatibility.
|
||||
req.Header.Set("Authorization", "Bearer "+c.token)
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
if accept != "" {
|
||||
req.Header.Set("Accept", accept)
|
||||
} else {
|
||||
req.Header.Set("Accept", "application/vnd.github+json")
|
||||
}
|
||||
return req, nil
|
||||
}
|
||||
return c.doWithRetry(ctx, reqURL, buildReq)
|
||||
}
|
||||
|
||||
// handleResponse reads and closes the response body, returning the result.
|
||||
// It uses defer to ensure the body is always closed regardless of code path.
|
||||
// Returns (body, done, err) where done=true means the caller should return immediately.
|
||||
@@ -360,29 +342,3 @@ func (c *Client) handleResponse(resp *http.Response, maxRespBytes int, maxErrByt
|
||||
func (c *Client) doGet(ctx context.Context, reqURL string) ([]byte, error) {
|
||||
return c.doRequest(ctx, http.MethodGet, reqURL, "")
|
||||
}
|
||||
|
||||
// doJSONRequest performs an HTTP request with a JSON body and returns the response body.
|
||||
// It delegates retry/backoff/429 handling to doWithRetry.
|
||||
// This is a general-purpose helper used by any method that needs to send JSON payloads
|
||||
// (e.g. PostReview, DismissReview).
|
||||
func (c *Client) doJSONRequest(ctx context.Context, method, reqURL string, payload any) ([]byte, error) {
|
||||
jsonBody, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal request body: %w", err)
|
||||
}
|
||||
|
||||
buildReq := func() (*http.Request, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, method, reqURL, bytes.NewReader(jsonBody))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if c.token != "" {
|
||||
req.Header.Set("Authorization", "Bearer "+c.token)
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
req.Header.Set("Accept", "application/vnd.github+json")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
return req, nil
|
||||
}
|
||||
return c.doWithRetry(ctx, reqURL, buildReq)
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ package github
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
@@ -593,59 +592,3 @@ func TestSetRetryBackoff_RejectsInvalidLength(t *testing.T) {
|
||||
t.Fatalf("unexpected error for valid backoff: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoJSONRequest_429Retry(t *testing.T) {
|
||||
attempts := 0
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
attempts++
|
||||
if attempts < 3 {
|
||||
w.WriteHeader(429)
|
||||
w.Write([]byte(`{"message":"rate limit exceeded"}`))
|
||||
return
|
||||
}
|
||||
w.WriteHeader(200)
|
||||
w.Write([]byte(`{"id":1}`))
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
c := NewClient("token", ts.URL, AllowInsecureHTTP())
|
||||
if err := c.SetRetryBackoff([]time.Duration{1 * time.Millisecond, 1 * time.Millisecond}); err != nil {
|
||||
t.Fatalf("SetRetryBackoff: %v", err)
|
||||
}
|
||||
|
||||
body, err := c.doJSONRequest(context.Background(), http.MethodPost, ts.URL+"/test", map[string]string{"key": "val"})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if attempts != 3 {
|
||||
t.Errorf("expected 3 attempts, got %d", attempts)
|
||||
}
|
||||
if string(body) != `{"id":1}` {
|
||||
t.Errorf("unexpected body: %s", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoJSONRequest_429ExhaustsRetries(t *testing.T) {
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(429)
|
||||
w.Write([]byte(`{"message":"rate limit"}`))
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
c := NewClient("token", ts.URL, AllowInsecureHTTP())
|
||||
if err := c.SetRetryBackoff([]time.Duration{1 * time.Millisecond, 1 * time.Millisecond}); err != nil {
|
||||
t.Fatalf("SetRetryBackoff: %v", err)
|
||||
}
|
||||
|
||||
_, err := c.doJSONRequest(context.Background(), http.MethodPost, ts.URL+"/test", map[string]string{"key": "val"})
|
||||
if err == nil {
|
||||
t.Fatal("expected error after exhausting retries")
|
||||
}
|
||||
var apiErr *APIError
|
||||
if !errors.As(err, &apiErr) {
|
||||
t.Fatalf("expected APIError, got %T: %v", err, err)
|
||||
}
|
||||
if apiErr.StatusCode != 429 {
|
||||
t.Errorf("expected 429, got %d", apiErr.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,5 +6,8 @@ import (
|
||||
)
|
||||
|
||||
// Compile-time interface conformance assertions.
|
||||
// These verify github.Client satisfies vcs.Client (the full interface).
|
||||
var _ vcs.Client = (*github.Client)(nil)
|
||||
// These verify github.Client satisfies vcs.PRReader and vcs.FileReader.
|
||||
var (
|
||||
_ vcs.PRReader = (*github.Client)(nil)
|
||||
_ vcs.FileReader = (*github.Client)(nil)
|
||||
)
|
||||
|
||||
@@ -1,198 +0,0 @@
|
||||
package github
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"gitea.weiker.me/rodin/review-bot/vcs"
|
||||
)
|
||||
|
||||
const (
|
||||
// reviewsPerPage is the number of reviews to fetch per API page.
|
||||
reviewsPerPage = 100
|
||||
// maxReviewPages is the maximum number of pages to paginate through
|
||||
// when listing reviews. Acts as a safeguard against infinite pagination.
|
||||
maxReviewPages = 100
|
||||
)
|
||||
|
||||
// reviewResponse is the GitHub API response for a pull request review.
|
||||
type reviewResponse struct {
|
||||
ID int64 `json:"id"`
|
||||
Body string `json:"body"`
|
||||
User struct {
|
||||
Login string `json:"login"`
|
||||
} `json:"user"`
|
||||
State string `json:"state"`
|
||||
CommitID string `json:"commit_id"`
|
||||
}
|
||||
|
||||
// reviewCreateRequest is the GitHub API request body for creating a pull request review.
|
||||
type reviewCreateRequest struct {
|
||||
Body string `json:"body"`
|
||||
Event string `json:"event"`
|
||||
Comments []reviewCommentCreate `json:"comments,omitempty"`
|
||||
CommitID string `json:"commit_id,omitempty"`
|
||||
}
|
||||
|
||||
// reviewCommentCreate is a single inline comment in a review creation request.
|
||||
type reviewCommentCreate struct {
|
||||
Path string `json:"path"`
|
||||
Position int `json:"position"`
|
||||
Body string `json:"body"`
|
||||
}
|
||||
|
||||
// dismissReviewRequest is the GitHub API request body for dismissing a review.
|
||||
type dismissReviewRequest struct {
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
// userResponse is the GitHub API response for the authenticated user.
|
||||
type userResponse struct {
|
||||
Login string `json:"login"`
|
||||
}
|
||||
|
||||
// translateReviewEvent converts a vcs.ReviewEvent to the GitHub API event string.
|
||||
func translateReviewEvent(event vcs.ReviewEvent) string {
|
||||
switch event {
|
||||
case vcs.ReviewEventApprove:
|
||||
return "APPROVE"
|
||||
case vcs.ReviewEventRequestChanges:
|
||||
return "REQUEST_CHANGES"
|
||||
default:
|
||||
return "COMMENT"
|
||||
}
|
||||
}
|
||||
|
||||
// PostReview creates a new review on a pull request.
|
||||
func (c *Client) PostReview(ctx context.Context, owner, repo string, number int, req vcs.ReviewRequest) (*vcs.Review, error) {
|
||||
reqURL := fmt.Sprintf("%s/repos/%s/%s/pulls/%d/reviews",
|
||||
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), number)
|
||||
|
||||
payload := reviewCreateRequest{
|
||||
Body: req.Body,
|
||||
Event: translateReviewEvent(req.Event),
|
||||
}
|
||||
|
||||
for _, comment := range req.Comments {
|
||||
rc := reviewCommentCreate{
|
||||
Path: comment.Path,
|
||||
Position: comment.Position,
|
||||
Body: comment.Body,
|
||||
}
|
||||
payload.Comments = append(payload.Comments, rc)
|
||||
// Use CommitID from the first comment that has one.
|
||||
// All comments in a single review are expected to reference the same commit.
|
||||
if payload.CommitID == "" && comment.CommitID != "" {
|
||||
payload.CommitID = comment.CommitID
|
||||
}
|
||||
}
|
||||
|
||||
body, err := c.doJSONRequest(ctx, http.MethodPost, reqURL, payload)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("post review: %w", err)
|
||||
}
|
||||
|
||||
var resp reviewResponse
|
||||
if err := json.Unmarshal(body, &resp); err != nil {
|
||||
return nil, fmt.Errorf("parse review response: %w", err)
|
||||
}
|
||||
|
||||
return &vcs.Review{
|
||||
ID: resp.ID,
|
||||
Body: resp.Body,
|
||||
User: vcs.UserInfo{Login: resp.User.Login},
|
||||
State: resp.State,
|
||||
CommitID: resp.CommitID,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ListReviews lists all reviews on a pull request.
|
||||
func (c *Client) ListReviews(ctx context.Context, owner, repo string, number int) ([]vcs.Review, error) {
|
||||
var allReviews []vcs.Review
|
||||
|
||||
for page := 1; page <= maxReviewPages; page++ {
|
||||
reqURL := fmt.Sprintf("%s/repos/%s/%s/pulls/%d/reviews?per_page=%d&page=%d",
|
||||
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), number, reviewsPerPage, page)
|
||||
body, err := c.doGet(ctx, reqURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list reviews page %d: %w", page, err)
|
||||
}
|
||||
var reviews []reviewResponse
|
||||
if err := json.Unmarshal(body, &reviews); err != nil {
|
||||
return nil, fmt.Errorf("parse reviews JSON: %w", err)
|
||||
}
|
||||
if len(reviews) == 0 {
|
||||
break
|
||||
}
|
||||
for _, r := range reviews {
|
||||
allReviews = append(allReviews, vcs.Review{
|
||||
ID: r.ID,
|
||||
Body: r.Body,
|
||||
User: vcs.UserInfo{Login: r.User.Login},
|
||||
State: r.State,
|
||||
CommitID: r.CommitID,
|
||||
})
|
||||
}
|
||||
if len(reviews) < reviewsPerPage {
|
||||
break
|
||||
}
|
||||
// NOTE: This warning only fires when the final page was full (the short-page
|
||||
// break above did not trigger), meaning additional reviews likely exist beyond
|
||||
// our page limit. The loop naturally exits after this iteration since page
|
||||
// increments past maxReviewPages.
|
||||
if page == maxReviewPages {
|
||||
slog.Warn("ListReviews hit page limit; results may be truncated",
|
||||
"owner", owner, "repo", repo, "pr", number,
|
||||
"maxPages", maxReviewPages, "reviewsFetched", len(allReviews))
|
||||
}
|
||||
}
|
||||
|
||||
return allReviews, nil
|
||||
}
|
||||
|
||||
// DeleteReview permanently deletes a review from a pull request.
|
||||
// Use DismissReview instead when the review should remain visible but marked as dismissed
|
||||
// (e.g., superseding an outdated review while preserving history).
|
||||
func (c *Client) DeleteReview(ctx context.Context, owner, repo string, number int, reviewID int64) error {
|
||||
reqURL := fmt.Sprintf("%s/repos/%s/%s/pulls/%d/reviews/%d",
|
||||
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), number, reviewID)
|
||||
_, err := c.doRequest(ctx, http.MethodDelete, reqURL, "")
|
||||
if err != nil {
|
||||
return fmt.Errorf("delete review: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DismissReview dismisses a review on a pull request with a message.
|
||||
func (c *Client) DismissReview(ctx context.Context, owner, repo string, number int, reviewID int64, message string) error {
|
||||
reqURL := fmt.Sprintf("%s/repos/%s/%s/pulls/%d/reviews/%d/dismissals",
|
||||
c.baseURL, url.PathEscape(owner), url.PathEscape(repo), number, reviewID)
|
||||
|
||||
payload := dismissReviewRequest{
|
||||
Message: message,
|
||||
}
|
||||
|
||||
_, err := c.doJSONRequest(ctx, http.MethodPut, reqURL, payload)
|
||||
if err != nil {
|
||||
return fmt.Errorf("dismiss review: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetAuthenticatedUser returns the login name of the authenticated user.
|
||||
func (c *Client) GetAuthenticatedUser(ctx context.Context) (string, error) {
|
||||
reqURL := fmt.Sprintf("%s/user", c.baseURL)
|
||||
body, err := c.doGet(ctx, reqURL)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("get authenticated user: %w", err)
|
||||
}
|
||||
var resp userResponse
|
||||
if err := json.Unmarshal(body, &resp); err != nil {
|
||||
return "", fmt.Errorf("parse user response: %w", err)
|
||||
}
|
||||
return resp.Login, nil
|
||||
}
|
||||
@@ -2,4 +2,4 @@ module gitea.weiker.me/rodin/review-bot
|
||||
|
||||
go 1.26.2
|
||||
|
||||
require github.com/goccy/go-yaml v1.19.2
|
||||
require gopkg.in/yaml.v3 v3.0.1
|
||||
|
||||
@@ -1,2 +1,4 @@
|
||||
github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM=
|
||||
github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
||||
@@ -10,6 +10,18 @@ func FormatMarkdown(result *ReviewResult, reviewerName string) string {
|
||||
return FormatMarkdownWithDisplay(result, reviewerName, reviewerName)
|
||||
}
|
||||
|
||||
// GiteaEvent converts the verdict to the Gitea API event string.
|
||||
func GiteaEvent(verdict string) string {
|
||||
switch verdict {
|
||||
case "APPROVE":
|
||||
return "APPROVED"
|
||||
case "REQUEST_CHANGES":
|
||||
return "REQUEST_CHANGES"
|
||||
default:
|
||||
return "COMMENT"
|
||||
}
|
||||
}
|
||||
|
||||
// FormatMarkdownWithDisplay formats a ReviewResult with separate display name and sentinel name.
|
||||
// Note: displayName is not HTML-escaped as Gitea sanitizes rendered Markdown.
|
||||
// Persona display names are controlled by repo owners (trusted input).
|
||||
|
||||
@@ -98,6 +98,25 @@ func TestFormatMarkdown_SpecialChars(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGiteaEvent(t *testing.T) {
|
||||
tests := []struct {
|
||||
verdict string
|
||||
expected string
|
||||
}{
|
||||
{"APPROVE", "APPROVED"},
|
||||
{"REQUEST_CHANGES", "REQUEST_CHANGES"},
|
||||
{"UNKNOWN", "COMMENT"},
|
||||
{"", "COMMENT"},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
got := GiteaEvent(tc.verdict)
|
||||
if got != tc.expected {
|
||||
t.Errorf("GiteaEvent(%q) = %q, want %q", tc.verdict, got, tc.expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatMarkdown_Sentinel(t *testing.T) {
|
||||
result := &ReviewResult{
|
||||
Verdict: "APPROVE",
|
||||
|
||||
+38
-146
@@ -5,15 +5,12 @@ import (
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/goccy/go-yaml"
|
||||
"github.com/goccy/go-yaml/ast"
|
||||
"github.com/goccy/go-yaml/parser"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
//go:embed personas/*.yaml
|
||||
@@ -121,7 +118,9 @@ func ListBuiltinPersonas() []string {
|
||||
default:
|
||||
continue
|
||||
}
|
||||
seen[personaName] = true
|
||||
if !seen[personaName] {
|
||||
seen[personaName] = true
|
||||
}
|
||||
}
|
||||
names := make([]string, 0, len(seen))
|
||||
for name := range seen {
|
||||
@@ -143,19 +142,10 @@ func parsePersona(data []byte, source string) (*Persona, error) {
|
||||
err = unmarshalYAMLWithDepthLimit(data, &p, MaxYAMLDepth)
|
||||
} else {
|
||||
// Use json.Decoder with DisallowUnknownFields for consistency with
|
||||
// YAML's Strict() - both reject unknown fields to catch typos.
|
||||
// YAML's KnownFields(true) - both reject unknown fields to catch typos.
|
||||
dec := json.NewDecoder(bytes.NewReader(data))
|
||||
dec.DisallowUnknownFields()
|
||||
err = dec.Decode(&p)
|
||||
if err == nil {
|
||||
// Reject trailing content after the first valid JSON object.
|
||||
// Without this check, input like `{"name":"x"}garbage` would
|
||||
// silently succeed because Decoder stops after one object.
|
||||
var dummy json.RawMessage
|
||||
if err2 := dec.Decode(&dummy); err2 != io.EOF {
|
||||
err = fmt.Errorf("unexpected trailing content after JSON object")
|
||||
}
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse persona %s: %w", source, err)
|
||||
@@ -166,164 +156,70 @@ func parsePersona(data []byte, source string) (*Persona, error) {
|
||||
return &p, nil
|
||||
}
|
||||
|
||||
// unmarshalYAMLWithDepthLimit unmarshals YAML data with three safety checks:
|
||||
// - Depth limiting: rejects AST trees exceeding maxDepth to prevent stack exhaustion.
|
||||
// - Multi-document rejection: prevents silent data loss from ignored extra documents.
|
||||
// - Strict field checking: rejects unknown YAML keys to catch typos early.
|
||||
// unmarshalYAMLWithDepthLimit unmarshals YAML data with explicit depth limiting
|
||||
// and strict field checking. This protects against stack exhaustion from deeply
|
||||
// nested structures and catches typos in field names.
|
||||
// Multi-document YAML files are rejected to prevent silent data loss.
|
||||
func unmarshalYAMLWithDepthLimit(data []byte, out any, maxDepth int) error {
|
||||
// First pass: parse into AST to check depth limits, node counts, and
|
||||
// multi-document rejection. This prevents stack exhaustion before we
|
||||
// attempt to decode into structs.
|
||||
file, err := parser.ParseBytes(data, 0)
|
||||
if err != nil {
|
||||
// First pass: decode into a yaml.Node to check depth limits and node counts.
|
||||
// This prevents stack exhaustion before we attempt to decode into structs.
|
||||
var node yaml.Node
|
||||
dec := yaml.NewDecoder(bytes.NewReader(data))
|
||||
if err := dec.Decode(&node); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Reject empty YAML input (whitespace-only, comment-only, or truly empty files).
|
||||
// The parser returns a single doc with nil body for these cases.
|
||||
if len(file.Docs) == 0 || file.Docs[0].Body == nil {
|
||||
return fmt.Errorf("empty YAML document")
|
||||
}
|
||||
|
||||
// Reject multi-document YAML files - silently ignoring additional documents
|
||||
// could lead to confusing behavior where users think their changes take effect.
|
||||
if len(file.Docs) > 1 {
|
||||
var extra yaml.Node
|
||||
if dec.Decode(&extra) == nil {
|
||||
return fmt.Errorf("multi-document YAML is not supported; only single-document files are allowed")
|
||||
}
|
||||
|
||||
nodeCount := 0
|
||||
if err := checkYAMLDepth(file.Docs[0].Body, 0, maxDepth, MaxYAMLNodes, make(map[ast.Node]int), make(map[ast.Node]bool), &nodeCount); err != nil {
|
||||
if err := checkYAMLDepth(&node, 0, maxDepth, MaxYAMLNodes, make(map[*yaml.Node]struct{}), &nodeCount); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Second pass: decode with strict field checking enabled.
|
||||
// Strict() rejects unknown keys, catching typos like "focuss" or "identiy".
|
||||
//
|
||||
// Safety note: goccy/go-yaml's decoder does not expand YAML aliases
|
||||
// recursively — it resolves them via the pre-built AST, which our first
|
||||
// pass already depth-checked. Alias chains that would exceed depth limits
|
||||
// are caught above; the decoder merely reads the resolved scalar values.
|
||||
dec := yaml.NewDecoder(bytes.NewReader(data), yaml.Strict())
|
||||
return dec.Decode(out)
|
||||
// KnownFields(true) rejects unknown keys, catching typos like "focuss" or "identiy".
|
||||
// We must re-decode from the original data because yaml.Node.Decode() doesn't
|
||||
// support the KnownFields option.
|
||||
strictDec := yaml.NewDecoder(bytes.NewReader(data))
|
||||
strictDec.KnownFields(true)
|
||||
return strictDec.Decode(out)
|
||||
}
|
||||
|
||||
// checkYAMLDepth recursively checks that YAML AST nodes don't exceed the depth
|
||||
// limit or the total node count limit. It uses two tracking maps:
|
||||
// - validated: maps each node to the maximum depth at which it was previously
|
||||
// checked. If a node is revisited at a deeper depth (e.g., via an alias),
|
||||
// we re-check it to ensure the combined effective depth doesn't exceed limits.
|
||||
// - visiting: per-path recursion stack for true cycle detection. A node on the
|
||||
// current path is a cycle (alias loop); we return nil to avoid infinite recursion.
|
||||
//
|
||||
// This design prevents the alias depth bypass where an anchored subtree validated
|
||||
// at a shallow depth could be referenced via alias at a greater depth, effectively
|
||||
// exceeding MaxYAMLDepth.
|
||||
func checkYAMLDepth(node ast.Node, depth, maxDepth, maxNodes int, validated map[ast.Node]int, visiting map[ast.Node]bool, nodeCount *int) error {
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkYAMLDepth recursively checks that YAML nodes don't exceed the depth limit
|
||||
// or the total node count limit. It also detects alias cycles to prevent infinite
|
||||
// recursion from crafted YAML with self-referential aliases.
|
||||
func checkYAMLDepth(node *yaml.Node, depth, maxDepth, maxNodes int, seen map[*yaml.Node]struct{}, nodeCount *int) error {
|
||||
if depth > maxDepth {
|
||||
return fmt.Errorf("YAML nesting depth exceeds maximum (%d)", maxDepth)
|
||||
}
|
||||
|
||||
// Cycle detection: if we're currently visiting this node on the current
|
||||
// recursion path, it's a cycle (e.g., alias pointing to an ancestor).
|
||||
// Return nil to break the cycle without error — cycles are a structural
|
||||
// property, not a depth violation.
|
||||
if visiting[node] {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Track total nodes visited as defense-in-depth against wide-but-shallow attacks.
|
||||
// Placed after cycle detection but before the depth-aware short-circuit. This means
|
||||
// nodes revisited at shallower depths (via aliases) are counted each time they are
|
||||
// encountered — intentional conservative overcounting. This bounds the total work
|
||||
// performed during validation rather than tracking unique nodes, which is the safer
|
||||
// security posture for untrusted YAML input.
|
||||
*nodeCount++
|
||||
if *nodeCount > maxNodes {
|
||||
return fmt.Errorf("YAML node count exceeds maximum (%d)", maxNodes)
|
||||
}
|
||||
|
||||
// Depth-aware short-circuit: skip re-validation only when the current visit
|
||||
// depth is the same or shallower than the depth at which this node was
|
||||
// previously validated. A shallower (or equal) current depth means the
|
||||
// prior, deeper validation already covered any subtree depth violations.
|
||||
// If the current depth exceeds the previous validation depth (e.g., an alias
|
||||
// references this node deeper in the tree), we must re-traverse to ensure
|
||||
// the combined effective depth doesn't exceed maxDepth.
|
||||
//
|
||||
// Note: using ast.Node (interface) as map key relies on pointer identity,
|
||||
// which is correct because all goccy/go-yaml AST node types are pointer
|
||||
// receivers (*MappingNode, *SequenceNode, etc.), never value types.
|
||||
if prevDepth, ok := validated[node]; ok && depth <= prevDepth {
|
||||
return nil
|
||||
// Cycle detection: if we've seen this node before, we're in a cycle.
|
||||
if _, ok := seen[node]; ok {
|
||||
return nil // Already validated this subtree, skip to avoid infinite recursion.
|
||||
}
|
||||
validated[node] = depth
|
||||
seen[node] = struct{}{}
|
||||
|
||||
// Mark as visiting (on the current recursion path) for cycle detection.
|
||||
visiting[node] = true
|
||||
defer func() { visiting[node] = false }()
|
||||
// Handle alias nodes: follow the alias to its anchor target.
|
||||
// Increment depth when following aliases since they expand the effective structure.
|
||||
if node.Kind == yaml.AliasNode && node.Alias != nil {
|
||||
return checkYAMLDepth(node.Alias, depth+1, maxDepth, maxNodes, seen, nodeCount)
|
||||
}
|
||||
|
||||
// Walk children based on node type.
|
||||
switch n := node.(type) {
|
||||
case *ast.MappingNode:
|
||||
for _, value := range n.Values {
|
||||
if err := checkYAMLDepth(value, depth+1, maxDepth, maxNodes, validated, visiting, nodeCount); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
case *ast.MappingValueNode:
|
||||
// Both Key and Value are visited at depth+1 relative to this
|
||||
// MappingValueNode. Since MappingNode visits its MappingValueNode
|
||||
// children at depth+1 as well, keys and values end up at depth+2
|
||||
// from the parent MappingNode. This is intentional: it mirrors the
|
||||
// actual nesting structure (mapping → key-value pair → key/value).
|
||||
if err := checkYAMLDepth(n.Key, depth+1, maxDepth, maxNodes, validated, visiting, nodeCount); err != nil {
|
||||
for _, child := range node.Content {
|
||||
if err := checkYAMLDepth(child, depth+1, maxDepth, maxNodes, seen, nodeCount); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, validated, visiting, nodeCount); err != nil {
|
||||
return err
|
||||
}
|
||||
case *ast.SequenceNode:
|
||||
for _, value := range n.Values {
|
||||
if err := checkYAMLDepth(value, depth+1, maxDepth, maxNodes, validated, visiting, nodeCount); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
case *ast.AliasNode:
|
||||
// Follow alias to its target, incrementing depth since aliases expand
|
||||
// the effective structure.
|
||||
if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, validated, visiting, nodeCount); err != nil {
|
||||
return err
|
||||
}
|
||||
case *ast.AnchorNode:
|
||||
// Increment depth for anchor values as a conservative measure: the
|
||||
// anchor definition itself is structural, and treating it as a depth
|
||||
// level ensures that deeply nested anchors are caught at definition
|
||||
// time rather than only when referenced via alias. This +1 is
|
||||
// asymmetric with alias (which also increments) — by design, the
|
||||
// effective depth budget for anchored-then-aliased content is reduced
|
||||
// because both the definition site and the reference site each consume
|
||||
// a level, making deeply nested anchor/alias pairs hit the limit sooner.
|
||||
if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, validated, visiting, nodeCount); err != nil {
|
||||
return err
|
||||
}
|
||||
case *ast.TagNode:
|
||||
if err := checkYAMLDepth(n.Value, depth+1, maxDepth, maxNodes, validated, visiting, nodeCount); err != nil {
|
||||
return err
|
||||
}
|
||||
case *ast.MergeKeyNode:
|
||||
// MergeKeyNode represents the literal "<<" merge key token. It has no
|
||||
// child nodes — the value side of a merge (e.g., *alias) lives in the
|
||||
// parent MappingValueNode.Value, which is already recursed into above.
|
||||
// Explicitly listed here (rather than in the default case) to prevent
|
||||
// future library changes from silently bypassing depth checks.
|
||||
default:
|
||||
// Scalar leaf nodes (StringNode, IntegerNode, FloatNode, BoolNode,
|
||||
// NullNode, InfinityNode, NanNode, LiteralNode) have no children to
|
||||
// recurse into.
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -331,11 +227,7 @@ func checkYAMLDepth(node ast.Node, depth, maxDepth, maxNodes int, validated map[
|
||||
// ParsePersonaBytes parses persona data from bytes with a source label for errors.
|
||||
// This is useful for parsing personas fetched from external sources (e.g., Gitea API)
|
||||
// without requiring filesystem access. Format is detected by source extension.
|
||||
// Input is bounded by MaxPersonaFileSize to prevent resource exhaustion.
|
||||
func ParsePersonaBytes(data []byte, source string) (*Persona, error) {
|
||||
if len(data) > MaxPersonaFileSize {
|
||||
return nil, fmt.Errorf("persona data from %s exceeds maximum size (%d bytes, limit %d)", source, len(data), MaxPersonaFileSize)
|
||||
}
|
||||
return parsePersona(data, source)
|
||||
}
|
||||
|
||||
|
||||
+41
-222
@@ -7,7 +7,7 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/goccy/go-yaml/ast"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
func TestLoadBuiltinPersona(t *testing.T) {
|
||||
@@ -459,14 +459,7 @@ func TestYAMLDeeplyNestedRejection(t *testing.T) {
|
||||
path := filepath.Join(dir, "deeply-nested.yaml")
|
||||
|
||||
// Build a deeply nested YAML structure that exceeds MaxYAMLDepth (20).
|
||||
// Depth accumulation trace for "nested: \n level0: \n level1: ...":
|
||||
// - Document root parsed at depth 0
|
||||
// - Root MappingNode children (MappingValueNodes) visited at depth 1
|
||||
// - "nested" MappingValueNode: key at depth 2, value at depth 2
|
||||
// - Each levelN adds depth via MappingValueNode traversal (key + value)
|
||||
// - Exact depth per level depends on AST structure (MappingNode wrapping),
|
||||
// but 25 levels reliably exceeds MaxYAMLDepth (20) with comfortable margin.
|
||||
// The test uses 25 levels rather than exactly 21 to avoid brittleness.
|
||||
// Each level adds 2 to the depth count (key + value mapping).
|
||||
var sb strings.Builder
|
||||
sb.WriteString("name: test\nidentity: test\nnested:\n")
|
||||
indent := " "
|
||||
@@ -490,35 +483,6 @@ func TestYAMLDeeplyNestedRejection(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestYAMLEmptyFileRejection(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
}{
|
||||
{"completely_empty", ""},
|
||||
{"whitespace_only", " \n\n "},
|
||||
{"comment_only", "# just a comment\n"},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, tc.name+".yaml")
|
||||
if err := os.WriteFile(path, []byte(tc.content), 0644); err != nil {
|
||||
t.Fatalf("failed to write test file: %v", err)
|
||||
}
|
||||
|
||||
_, err := LoadPersona(path)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for empty YAML input, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "empty YAML document") {
|
||||
t.Errorf("expected error containing %q, got: %v", "empty YAML document", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestYAMLFileSizeLimit(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "huge.yaml")
|
||||
@@ -540,41 +504,41 @@ func TestYAMLFileSizeLimit(t *testing.T) {
|
||||
|
||||
func TestYAMLAliasCycleDetection(t *testing.T) {
|
||||
// Test that our checkYAMLDepth function handles alias cycles gracefully
|
||||
// by using the visiting map to prevent infinite recursion.
|
||||
// by using the seen map to prevent infinite recursion.
|
||||
// We test this directly because go-yaml's parser handles most cycles
|
||||
// at parse time, but we need to ensure our checker is robust.
|
||||
|
||||
// Create a node structure where an alias points to a parent node,
|
||||
// simulating what could happen with crafted input.
|
||||
parent := &ast.MappingNode{
|
||||
Values: []*ast.MappingValueNode{
|
||||
{
|
||||
Key: &ast.StringNode{Value: "name"},
|
||||
Value: &ast.StringNode{Value: "test"},
|
||||
},
|
||||
// simulating what could happen with malicious input that bypasses
|
||||
// go-yaml's cycle detection.
|
||||
parent := &yaml.Node{
|
||||
Kind: yaml.MappingNode,
|
||||
Content: []*yaml.Node{
|
||||
{Kind: yaml.ScalarNode, Value: "name"},
|
||||
{Kind: yaml.ScalarNode, Value: "test"},
|
||||
{Kind: yaml.ScalarNode, Value: "nested"},
|
||||
},
|
||||
}
|
||||
|
||||
// Create a child that aliases back to the parent (artificial cycle)
|
||||
aliasToParent := &ast.AliasNode{
|
||||
Value: parent,
|
||||
aliasToParent := &yaml.Node{
|
||||
Kind: yaml.AliasNode,
|
||||
Alias: parent,
|
||||
}
|
||||
parent.Values = append(parent.Values, &ast.MappingValueNode{
|
||||
Key: &ast.StringNode{Value: "nested"},
|
||||
Value: aliasToParent,
|
||||
})
|
||||
parent.Content = append(parent.Content, aliasToParent)
|
||||
|
||||
nodeCount := 0
|
||||
validated := make(map[ast.Node]int)
|
||||
visiting := make(map[ast.Node]bool)
|
||||
seen := make(map[*yaml.Node]struct{})
|
||||
|
||||
// This should NOT hang or stack overflow - cycle detection prevents infinite recursion
|
||||
err := checkYAMLDepth(parent, 0, MaxYAMLDepth, MaxYAMLNodes, validated, visiting, &nodeCount)
|
||||
// This should NOT hang or stack overflow - the seen map prevents infinite recursion
|
||||
err := checkYAMLDepth(parent, 0, MaxYAMLDepth, MaxYAMLNodes, seen, &nodeCount)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error traversing cyclic structure: %v", err)
|
||||
}
|
||||
|
||||
// Verify we tracked the parent in the validated map
|
||||
if _, ok := validated[parent]; !ok {
|
||||
t.Error("parent node not tracked in validated map")
|
||||
// Verify we tracked the parent in the seen map
|
||||
if _, ok := seen[parent]; !ok {
|
||||
t.Error("parent node not tracked in seen map")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -630,82 +594,36 @@ func TestYAMLNodeCountLimit(t *testing.T) {
|
||||
func TestCheckYAMLDepthCycleDetectionDirect(t *testing.T) {
|
||||
// Direct test of cycle detection in checkYAMLDepth by creating
|
||||
// a node structure with an artificial cycle.
|
||||
node := &ast.MappingNode{
|
||||
Values: []*ast.MappingValueNode{
|
||||
{
|
||||
Key: &ast.StringNode{Value: "key"},
|
||||
Value: &ast.StringNode{Value: "value"},
|
||||
},
|
||||
// This tests the seen map logic independent of go-yaml's parsing.
|
||||
node := &yaml.Node{
|
||||
Kind: yaml.MappingNode,
|
||||
Content: []*yaml.Node{
|
||||
{Kind: yaml.ScalarNode, Value: "key"},
|
||||
{Kind: yaml.ScalarNode, Value: "value"},
|
||||
},
|
||||
}
|
||||
|
||||
// Create a cycle by making a child reference the parent
|
||||
cycleChild := &ast.AliasNode{
|
||||
Value: node, // Points back to the parent
|
||||
cycleChild := &yaml.Node{
|
||||
Kind: yaml.AliasNode,
|
||||
Alias: node, // Points back to the parent
|
||||
}
|
||||
node.Values = append(node.Values, &ast.MappingValueNode{
|
||||
Key: &ast.StringNode{Value: "cyclic"},
|
||||
Value: cycleChild,
|
||||
})
|
||||
node.Content = append(node.Content,
|
||||
&yaml.Node{Kind: yaml.ScalarNode, Value: "cyclic"},
|
||||
cycleChild,
|
||||
)
|
||||
|
||||
nodeCount := 0
|
||||
validated := make(map[ast.Node]int)
|
||||
visiting := make(map[ast.Node]bool)
|
||||
err := checkYAMLDepth(node, 0, MaxYAMLDepth, MaxYAMLNodes, validated, visiting, &nodeCount)
|
||||
seen := make(map[*yaml.Node]struct{})
|
||||
err := checkYAMLDepth(node, 0, MaxYAMLDepth, MaxYAMLNodes, seen, &nodeCount)
|
||||
|
||||
// Should complete without infinite recursion due to cycle detection
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
// The validated map should contain multiple entries
|
||||
if len(validated) < 2 {
|
||||
t.Errorf("validated map has %d entries, expected at least 2", len(validated))
|
||||
}
|
||||
}
|
||||
|
||||
func TestYAMLAliasDepthBypass(t *testing.T) {
|
||||
// Test that an anchored subtree first validated at a shallow depth is
|
||||
// re-checked when referenced via alias at a deeper position. Without the
|
||||
// depth-aware validated map, the alias reference would skip re-checking
|
||||
// and allow the effective nesting to exceed MaxYAMLDepth.
|
||||
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "alias-depth-bypass.yaml")
|
||||
|
||||
// Build YAML with an anchor at shallow depth containing a subtree near the limit,
|
||||
// then reference it via alias deep enough that effective depth exceeds MaxYAMLDepth.
|
||||
var sb strings.Builder
|
||||
sb.WriteString("name: test\nidentity: test\n")
|
||||
|
||||
// Create the anchored subtree at depth 1 (key level) that nests 15 levels deep.
|
||||
sb.WriteString("anchor_key: &deep_anchor\n")
|
||||
for i := 0; i < 15; i++ {
|
||||
sb.WriteString(strings.Repeat(" ", i+1))
|
||||
sb.WriteString(fmt.Sprintf("level%d:\n", i))
|
||||
}
|
||||
sb.WriteString(strings.Repeat(" ", 16))
|
||||
sb.WriteString("leaf: value\n")
|
||||
|
||||
// Create a wrapper that nests 6 levels deep, then references the anchor.
|
||||
// Effective depth at alias target = 6 (wrapper nesting) + 1 (alias) + 15 (subtree) = 22 > 20
|
||||
sb.WriteString("wrapper:\n")
|
||||
for i := 0; i < 6; i++ {
|
||||
sb.WriteString(strings.Repeat(" ", i+1))
|
||||
sb.WriteString(fmt.Sprintf("n%d:\n", i))
|
||||
}
|
||||
sb.WriteString(strings.Repeat(" ", 7))
|
||||
sb.WriteString("alias_ref: *deep_anchor\n")
|
||||
|
||||
if err := os.WriteFile(path, []byte(sb.String()), 0644); err != nil {
|
||||
t.Fatalf("failed to write test file: %v", err)
|
||||
}
|
||||
|
||||
_, err := LoadPersona(path)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for alias depth bypass, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "nesting depth exceeds") {
|
||||
t.Errorf("error = %q, want containing 'nesting depth exceeds'", err.Error())
|
||||
// The seen map should contain multiple entries
|
||||
if len(seen) < 2 {
|
||||
t.Errorf("seen map has %d entries, expected at least 2", len(seen))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -858,102 +776,3 @@ identity: test identity
|
||||
t.Errorf("Name = %q, want %q", p.Name, "test")
|
||||
}
|
||||
}
|
||||
|
||||
func TestJSONTrailingContentRejected(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
}{
|
||||
{
|
||||
name: "trailing garbage after object",
|
||||
content: `{"name":"test","identity":"test identity"}garbage`,
|
||||
},
|
||||
{
|
||||
name: "two JSON objects",
|
||||
content: `{"name":"test","identity":"test identity"}{"name":"other"}`,
|
||||
},
|
||||
{
|
||||
name: "trailing array",
|
||||
content: `{"name":"test","identity":"test identity"}[]`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "test.json")
|
||||
if err := os.WriteFile(path, []byte(tt.content), 0644); err != nil {
|
||||
t.Fatalf("failed to write test file: %v", err)
|
||||
}
|
||||
|
||||
_, err := LoadPersona(path)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for trailing content, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "trailing content") {
|
||||
t.Errorf("error = %q, want to contain 'trailing content'", err.Error())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePersonaBytesSizeLimit(t *testing.T) {
|
||||
// ParsePersonaBytes should reject input exceeding MaxPersonaFileSize
|
||||
oversized := make([]byte, MaxPersonaFileSize+1)
|
||||
for i := range oversized {
|
||||
oversized[i] = 'x'
|
||||
}
|
||||
|
||||
_, err := ParsePersonaBytes(oversized, "oversized.yaml")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for oversized input, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "exceeds maximum size") {
|
||||
t.Errorf("error = %q, want to contain 'exceeds maximum size'", err.Error())
|
||||
}
|
||||
|
||||
// Just under the limit should not trigger size error (may fail parse, but not size)
|
||||
underLimit := []byte("name: test\nidentity: test persona\n")
|
||||
p, err := ParsePersonaBytes(underLimit, "valid.yaml")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error for valid input: %v", err)
|
||||
}
|
||||
if p.Name != "test" {
|
||||
t.Errorf("Name = %q, want %q", p.Name, "test")
|
||||
}
|
||||
}
|
||||
|
||||
func TestYAMLMergeKeyDepthCheck(t *testing.T) {
|
||||
// Verify that YAML merge keys (<<: *alias) are properly handled by the
|
||||
// depth checker. The merge key content is in the MappingValueNode.Value
|
||||
// (an AliasNode), not in the MergeKeyNode itself.
|
||||
p, err := ParsePersonaBytes([]byte("name: merge-test\nidentity: test\n"), "merge.yaml")
|
||||
if err != nil {
|
||||
t.Fatalf("basic parse failed: %v", err)
|
||||
}
|
||||
if p.Name != "merge-test" {
|
||||
t.Errorf("Name = %q, want %q", p.Name, "merge-test")
|
||||
}
|
||||
|
||||
// Test that deeply nested merge keys still hit depth limit.
|
||||
// Build YAML with merge key content nested beyond MaxYAMLDepth.
|
||||
var sb strings.Builder
|
||||
sb.WriteString("name: deep-merge\nidentity: deep merge persona\n")
|
||||
sb.WriteString("anchor: &deep\n")
|
||||
indent := " "
|
||||
for i := 0; i < MaxYAMLDepth+5; i++ {
|
||||
sb.WriteString(indent)
|
||||
sb.WriteString(fmt.Sprintf("level%d:\n", i))
|
||||
indent += " "
|
||||
}
|
||||
sb.WriteString(indent + "leaf: value\n")
|
||||
sb.WriteString("target:\n <<: *deep\n")
|
||||
|
||||
_, err = ParsePersonaBytes([]byte(sb.String()), "deep-merge.yaml")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for deeply nested merge key content, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "depth") {
|
||||
t.Errorf("error = %q, want to contain 'depth'", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user