package main import ( "context" "flag" "fmt" "log/slog" "os" "path/filepath" "strconv" "strings" "time" "gitea.weiker.me/rodin/review-bot/budget" "gitea.weiker.me/rodin/review-bot/gitea" "gitea.weiker.me/rodin/review-bot/llm" "gitea.weiker.me/rodin/review-bot/review" ) var version = "dev" // setupLogger configures the global slog default logger based on format and verbosity. func setupLogger(format, verbosity string) { var level slog.Level switch strings.ToLower(verbosity) { case "debug": level = slog.LevelDebug case "info": level = slog.LevelInfo case "warn": level = slog.LevelWarn case "error": level = slog.LevelError default: level = slog.LevelInfo } opts := &slog.HandlerOptions{Level: level} var handler slog.Handler switch strings.ToLower(format) { case "json": handler = slog.NewJSONHandler(os.Stderr, opts) default: handler = slog.NewTextHandler(os.Stderr, opts) } slog.SetDefault(slog.New(handler)) } func main() { versionFlag := flag.Bool("version", false, "Print version and exit") // Logging flags logFormat := flag.String("log-format", envOrDefault("LOG_FORMAT", "text"), "Log output format: text or json") verbosity := flag.String("verbosity", envOrDefault("LOG_VERBOSITY", "info"), "Log verbosity: debug, info, warn, error") // CLI flags giteaURL := flag.String("gitea-url", envOrDefault("GITEA_URL", ""), "Gitea instance URL") repo := flag.String("repo", envOrDefault("GITEA_REPO", ""), "Repository (owner/name)") prNum := flag.String("pr", envOrDefault("PR_NUMBER", ""), "Pull request number") reviewerName := flag.String("reviewer-name", envOrDefault("REVIEWER_NAME", ""), "Reviewer display name") reviewerToken := flag.String("reviewer-token", envOrDefault("REVIEWER_TOKEN", ""), "Gitea token for posting review") llmBaseURL := flag.String("llm-base-url", envOrDefault("LLM_BASE_URL", ""), "LLM API base URL") llmAPIKey := flag.String("llm-api-key", envOrDefault("LLM_API_KEY", ""), "LLM API key") llmModel := flag.String("llm-model", envOrDefault("LLM_MODEL", ""), "LLM model name") conventionsFile := flag.String("conventions-file", envOrDefault("CONVENTIONS_FILE", ""), "Conventions file path in repo (e.g. CLAUDE.md)") systemPromptFile := flag.String("system-prompt-file", envOrDefault("SYSTEM_PROMPT_FILE", ""), "Local file with additional system prompt instructions") patternsRepo := flag.String("patterns-repo", envOrDefault("PATTERNS_REPO", ""), "Repo with language patterns (e.g. rodin/elixir-patterns)") patternsFiles := flag.String("patterns-files", envOrDefault("PATTERNS_FILES", "README.md"), "Comma-separated file paths to fetch from patterns repo") dryRun := flag.Bool("dry-run", false, "Print review to stdout instead of posting") llmTemp := flag.Float64("llm-temperature", envOrDefaultFloat("LLM_TEMPERATURE", 0), "LLM temperature (0 = server default)") llmTimeout := flag.Int("llm-timeout", envOrDefaultInt("LLM_TIMEOUT", 300), "LLM request timeout in seconds (default 300)") llmProvider := flag.String("llm-provider", envOrDefault("LLM_PROVIDER", "openai"), "LLM API provider: openai, anthropic, or aicore") // AI Core specific flags (only used when provider=aicore) aicoreClientID := flag.String("aicore-client-id", envOrDefault("AICORE_CLIENT_ID", ""), "SAP AI Core client ID (for provider=aicore)") aicoreClientSecret := flag.String("aicore-client-secret", envOrDefault("AICORE_CLIENT_SECRET", ""), "SAP AI Core client secret (for provider=aicore)") aicoreAuthURL := flag.String("aicore-auth-url", envOrDefault("AICORE_AUTH_URL", ""), "SAP AI Core auth URL (for provider=aicore)") aicoreAPIURL := flag.String("aicore-api-url", envOrDefault("AICORE_API_URL", ""), "SAP AI Core API URL (for provider=aicore)") aicoreResourceGroup := flag.String("aicore-resource-group", envOrDefault("AICORE_RESOURCE_GROUP", "default"), "SAP AI Core resource group (for provider=aicore)") flag.Parse() if *versionFlag { fmt.Printf("review-bot %s\n", version) os.Exit(0) } // Initialize structured logger setupLogger(*logFormat, *verbosity) slog.Info("review-bot starting", "version", version) // Validate required fields // For aicore provider, llm-base-url and llm-api-key are not required isAICore := llm.Provider(*llmProvider) == llm.ProviderAICore if *giteaURL == "" || *repo == "" || *prNum == "" || *reviewerToken == "" || *llmModel == "" { fmt.Fprintf(os.Stderr, "Error: missing required flags or environment variables\n\n") fmt.Fprintf(os.Stderr, "Required: --gitea-url, --repo, --pr, --reviewer-token, --llm-model\n") os.Exit(1) } if !isAICore && (*llmBaseURL == "" || *llmAPIKey == "") { fmt.Fprintf(os.Stderr, "Error: --llm-base-url and --llm-api-key are required for provider=%s\n", *llmProvider) os.Exit(1) } if isAICore && (*aicoreClientID == "" || *aicoreClientSecret == "" || *aicoreAuthURL == "" || *aicoreAPIURL == "") { fmt.Fprintf(os.Stderr, "Error: AI Core credentials required for provider=aicore\n\n") fmt.Fprintf(os.Stderr, "Required: --aicore-client-id, --aicore-client-secret, --aicore-auth-url, --aicore-api-url\n") os.Exit(1) } // Validate reviewer-name: only safe characters allowed in sentinel if err := validateReviewerName(*reviewerName); err != nil { slog.Error("invalid reviewer name", "error", err) os.Exit(1) } // Parse repo owner/name parts := strings.SplitN(*repo, "/", 2) if len(parts) != 2 { slog.Error("invalid repo format", "repo", *repo, "expected", "owner/name") os.Exit(1) } owner, repoName := parts[0], parts[1] // Parse PR number prNumber, err := strconv.Atoi(*prNum) if err != nil { slog.Error("invalid PR number", "pr", *prNum, "error", err) os.Exit(1) } // Initialize clients giteaClient := gitea.NewClient(*giteaURL, *reviewerToken) llmClient := llm.NewClient(*llmBaseURL, *llmAPIKey, *llmModel) if *llmTemp < 0 || *llmTemp > 2 { slog.Error("invalid LLM temperature", "temperature", *llmTemp, "range", "0-2") os.Exit(1) } if *llmTemp > 0 { llmClient.WithTemperature(*llmTemp) } switch llm.Provider(*llmProvider) { case llm.ProviderOpenAI, llm.ProviderAnthropic: llmClient.WithProvider(llm.Provider(*llmProvider)) case llm.ProviderAICore: llmClient.WithAICore(llm.AICoreConfig{ ClientID: *aicoreClientID, ClientSecret: *aicoreClientSecret, AuthURL: *aicoreAuthURL, APIURL: *aicoreAPIURL, ResourceGroup: *aicoreResourceGroup, }) slog.Info("using SAP AI Core provider", "resource_group", *aicoreResourceGroup) default: slog.Error("invalid LLM provider", "provider", *llmProvider, "valid", "openai, anthropic, aicore") os.Exit(1) } if *llmTimeout > 0 { llmClient.WithTimeout(time.Duration(*llmTimeout) * time.Second) } // Create a top-level context. Timeout derived from LLM timeout + 1 min for other ops. overallTimeout := time.Duration(*llmTimeout)*time.Second + time.Minute ctx, cancel := context.WithTimeout(context.Background(), overallTimeout) defer cancel() slog.Info("reviewing pull request", "pr", prNumber, "repo", fmt.Sprintf("%s/%s", owner, repoName)) // Step 1: Fetch PR metadata pr, err := giteaClient.GetPullRequest(ctx, owner, repoName, prNumber) if err != nil { slog.Error("failed to fetch PR", "pr", prNumber, "error", err) os.Exit(1) } slog.Info("fetched PR metadata", "pr", prNumber, "title", pr.Title) // Step 2: Fetch diff diff, err := giteaClient.GetPullRequestDiff(ctx, owner, repoName, prNumber) if err != nil { slog.Error("failed to fetch diff", "pr", prNumber, "error", err) os.Exit(1) } slog.Info("fetched diff", "bytes", len(diff)) // Step 3: Fetch full file content for modified files fileContext := "" files, err := giteaClient.GetPullRequestFiles(ctx, owner, repoName, prNumber) if err != nil { slog.Warn("could not fetch PR files list", "pr", prNumber, "error", err) } else { fileContext = fetchFileContext(ctx, giteaClient, owner, repoName, pr.Head.Ref, files) slog.Debug("fetched file context", "files", len(files)) } // Step 4: Check CI status ciPassed := true ciDetails := "" if pr.Head.Sha != "" { statuses, err := giteaClient.GetCommitStatuses(ctx, owner, repoName, pr.Head.Sha) if err != nil { slog.Warn("could not fetch CI status", "sha", pr.Head.Sha, "error", err) } else { ciPassed, ciDetails = evaluateCIStatus(statuses) slog.Info("CI status checked", "passed", ciPassed) } } // Step 5: Load conventions file if specified conventions := "" if *conventionsFile != "" { content, err := giteaClient.GetFileContent(ctx, owner, repoName, *conventionsFile) if err != nil { slog.Warn("could not load conventions file", "file", *conventionsFile, "error", err) } else { conventions = content slog.Debug("loaded conventions file", "file", *conventionsFile, "bytes", len(conventions)) } } // Step 6: Load patterns from external repo if specified patterns := "" if *patternsRepo != "" { patterns = fetchPatterns(ctx, giteaClient, *patternsRepo, *patternsFiles) slog.Debug("loaded patterns", "repo", *patternsRepo, "bytes", len(patterns)) } // Step 6b: Load additional system prompt if specified additionalPrompt := "" if *systemPromptFile != "" { workspace := os.Getenv("GITHUB_WORKSPACE") if workspace == "" { workspace, _ = os.Getwd() } absWorkspace, err := filepath.Abs(workspace) if err != nil { slog.Error("failed to resolve workspace path", "error", err) os.Exit(1) } promptPath := filepath.Join(absWorkspace, *systemPromptFile) promptPath = filepath.Clean(promptPath) if !strings.HasPrefix(promptPath, absWorkspace+string(filepath.Separator)) && promptPath != absWorkspace { slog.Error("system-prompt-file resolves outside workspace", "path", promptPath, "workspace", absWorkspace) os.Exit(1) } // Resolve symlinks and re-validate to prevent symlink traversal resolvedPath, err := filepath.EvalSymlinks(promptPath) if err != nil { slog.Error("failed to resolve system prompt file", "path", promptPath, "error", err) os.Exit(1) } if !strings.HasPrefix(resolvedPath, absWorkspace+string(filepath.Separator)) && resolvedPath != absWorkspace { slog.Error("system-prompt-file symlink resolves outside workspace", "resolved", resolvedPath, "workspace", absWorkspace) os.Exit(1) } data, err := os.ReadFile(resolvedPath) if err != nil { slog.Error("failed to read system prompt file", "path", promptPath, "error", err) os.Exit(1) } additionalPrompt = string(data) slog.Debug("loaded system prompt file", "file", *systemPromptFile, "bytes", len(additionalPrompt)) } // Step 7: Budget-aware prompt assembly systemBase := review.BuildSystemBase() if additionalPrompt != "" { systemBase += "\n\n## Additional Review Instructions\n\n" + additionalPrompt } sections := budget.Sections{ SystemBase: systemBase, Patterns: patterns, Conventions: conventions, FileContext: fileContext, Diff: diff, UserMeta: review.BuildUserMeta(pr.Title, pr.Body, ciPassed, ciDetails), } budgetResult := budget.Fit(*llmModel, sections) slog.Info("token budget calculated", "tokens", budgetResult.EstTokens, "limit", budget.LimitForModel(*llmModel), "model", *llmModel) if len(budgetResult.Trimmed) > 0 { slog.Warn("context trimmed to fit budget", "trimmed", budgetResult.Trimmed) } // Step 8: Call LLM (with retry on parse failure) slog.Info("sending request to LLM", "model", *llmModel) messages := []llm.Message{ {Role: "system", Content: budgetResult.SystemPrompt}, {Role: "user", Content: budgetResult.UserPrompt}, } var response string var result *review.ReviewResult for attempt := 1; attempt <= 2; attempt++ { if attempt > 1 { slog.Warn("retrying LLM request after parse failure", "attempt", attempt) time.Sleep(time.Second) } response, err = llmClient.Complete(ctx, messages) if err != nil { slog.Error("LLM request failed", "model", *llmModel, "error", err, "attempt", attempt) if attempt == 2 { os.Exit(1) } continue } slog.Info("LLM response received", "bytes", len(response), "attempt", attempt) // Step 9: Parse response result, err = review.ParseResponse(response) if err != nil { slog.Error("failed to parse LLM response", "error", err, "attempt", attempt) if attempt == 2 { os.Exit(1) } continue } break } slog.Info("review parsed", "verdict", result.Verdict, "findings", len(result.Findings)) // Step 10: Format and post review reviewBody := review.FormatMarkdown(result, *reviewerName) // Add commit footer so readers know which commit was evaluated if pr.Head.Sha != "" { shortSHA := pr.Head.Sha if len(shortSHA) > 8 { shortSHA = shortSHA[:8] } reviewBody += fmt.Sprintf("\n\n---\n*Evaluated against %s*", shortSHA) } event := review.GiteaEvent(result.Verdict) if *dryRun { fmt.Println("--- DRY RUN ---") fmt.Printf("Event: %s\n\n", event) fmt.Println(reviewBody) return } sentinel := fmt.Sprintf("", *reviewerName) // Map findings to inline comments for lines present in the diff diffRanges := gitea.ParseDiffNewLines(diff) var inlineComments []gitea.ReviewComment for _, f := range result.Findings { if f.File != "" && f.Line > 0 && diffRanges.Contains(f.File, f.Line) { inlineComments = append(inlineComments, gitea.ReviewComment{ Path: f.File, NewPosition: int64(f.Line), Body: fmt.Sprintf("**[%s]** %s", f.Severity, f.Finding), }) } } if len(inlineComments) > 0 { slog.Debug("attaching inline comments", "count", len(inlineComments)) } // --- Review update strategy --- // 1. POST new review first (gets non-stale approval badge on HEAD) // 2. Then supersede old review with link to the new one // Order matters: post first so we have the new review's URL for the supersede message. var oldReviews []gitea.Review if *reviewerName != "" { existingReviews, err := giteaClient.ListReviews(ctx, owner, repoName, prNumber) if err != nil { slog.Warn("could not list existing reviews", "pr", prNumber, "error", err) } else { if hasSharedToken(existingReviews, sentinel) { slog.Warn("shared token mode: skipping supersede to avoid clobbering sibling review") } else { oldReviews = findAllOwnReviews(existingReviews, sentinel) } } } // Self-request as reviewer (ensures we appear in required-reviewer checks) authUser, err := giteaClient.GetAuthenticatedUser(ctx) if err != nil { slog.Warn("could not determine authenticated user for reviewer self-request", "error", err) } else if authUser != "" { if err := giteaClient.RequestReviewer(ctx, owner, repoName, prNumber, authUser); err != nil { slog.Warn("could not self-request as reviewer", "user", authUser, "error", err) } else { slog.Debug("self-requested as reviewer", "user", authUser, "pr", prNumber) } } // POST new review slog.Info("posting review", "event", event, "pr", prNumber) posted, err := giteaClient.PostReview(ctx, owner, repoName, prNumber, event, reviewBody, inlineComments) if err != nil { slog.Error("failed to post review", "pr", prNumber, "event", event, "error", err) os.Exit(1) } slog.Info("review posted", "review_id", posted.ID, "user", posted.User.Login, "pr", prNumber) // Supersede all old reviews with link to the new one if len(oldReviews) > 0 { newReviewURL := fmt.Sprintf("%s/%s/%s/pulls/%d#pullrequestreview-%d", strings.TrimRight(*giteaURL, "/"), owner, repoName, prNumber, posted.ID) for _, oldReview := range oldReviews { cid, err := giteaClient.GetTimelineReviewCommentIDForReview(ctx, owner, repoName, prNumber, oldReview.ID) if err != nil { slog.Warn("could not find comment ID for old review", "review_id", oldReview.ID, "error", err) continue } supersededBody := buildSupersededBody(oldReview.Body, oldReview.CommitID, newReviewURL, sentinel) if err := giteaClient.EditComment(ctx, owner, repoName, cid, supersededBody); err != nil { slog.Warn("could not mark old review as superseded", "review_id", oldReview.ID, "comment_id", cid, "error", err) continue } slog.Info("marked old review as superseded", "review_id", oldReview.ID, "new_review_id", posted.ID, "pr", prNumber) // Resolve old review's inline comments oldComments, err := giteaClient.ListReviewComments(ctx, owner, repoName, prNumber, oldReview.ID) if err != nil { slog.Warn("could not list old review comments for resolution", "review_id", oldReview.ID, "error", err) continue } resolved, failed := 0, 0 for _, c := range oldComments { if c.ID == 0 { continue } if err := giteaClient.ResolveComment(ctx, owner, repoName, c.ID); err != nil { slog.Debug("could not resolve inline comment", "comment_id", c.ID, "error", err) failed++ } else { resolved++ } } if resolved > 0 { slog.Info("resolved old inline comments", "review_id", oldReview.ID, "count", resolved, "pr", prNumber) } if failed > 0 { slog.Warn("some inline comments could not be resolved", "review_id", oldReview.ID, "failed", failed, "pr", prNumber) } } } } // fetchFileContext fetches the full content of modified files from the PR branch. func fetchFileContext(ctx context.Context, client *gitea.Client, owner, repo, ref string, files []gitea.ChangedFile) string { var sb strings.Builder for _, f := range files { if ctx.Err() != nil { break } if f.Status == "removed" { continue // Skip deleted files } content, err := client.GetFileContentRef(ctx, owner, repo, f.Filename, ref) if err != nil { slog.Warn("could not fetch file content", "file", f.Filename, "error", err) continue } sb.WriteString(fmt.Sprintf("--- %s ---\n", f.Filename)) sb.WriteString("```\n") sb.WriteString(content) sb.WriteString("\n```\n\n") } return sb.String() } // fetchPatterns fetches pattern files from one or more external repos. // patternsRepo is comma-separated list of owner/name repos. // patternsFiles is comma-separated list of file paths or directories. // If a path ends with / or is a directory, all files within it are fetched recursively. func fetchPatterns(ctx context.Context, client *gitea.Client, patternsRepo, patternsFiles string) string { var sb strings.Builder repos := strings.Split(patternsRepo, ",") paths := strings.Split(patternsFiles, ",") for _, repoRef := range repos { if ctx.Err() != nil { break } repoRef = strings.TrimSpace(repoRef) if repoRef == "" { continue } parts := strings.SplitN(repoRef, "/", 2) if len(parts) != 2 { slog.Warn("invalid patterns-repo format", "repo", repoRef, "expected", "owner/name") continue } owner, repo := parts[0], parts[1] for _, path := range paths { path = strings.TrimSpace(path) if path == "" { continue } files, err := client.GetAllFilesInPath(ctx, owner, repo, path) if err != nil { slog.Warn("could not fetch patterns", "path", path, "repo", repoRef, "error", err) continue } for filePath, content := range files { // Only include markdown and text files as patterns if !isPatternFile(filePath) { continue } sb.WriteString(fmt.Sprintf("### %s/%s\n\n%s\n\n", repoRef, filePath, content)) } } } return sb.String() } // isPatternFile returns true if the file should be included as pattern content. func isPatternFile(path string) bool { lower := strings.ToLower(path) return strings.HasSuffix(lower, ".md") || strings.HasSuffix(lower, ".txt") || strings.HasSuffix(lower, ".yml") || strings.HasSuffix(lower, ".yaml") } // evaluateCIStatus checks if all CI statuses indicate success. func evaluateCIStatus(statuses []gitea.CommitStatus) (passed bool, details string) { if len(statuses) == 0 { return true, "no CI statuses found" } var failed []string for _, s := range statuses { switch s.Status { case "success": // good case "pending": // treat pending as not-failed case "failure", "error": failed = append(failed, fmt.Sprintf("%s: %s", s.Context, s.Description)) } } if len(failed) > 0 { return false, strings.Join(failed, "; ") } return true, "all checks passed" } func envOrDefault(key, defaultVal string) string { if v := os.Getenv(key); v != "" { return v } return defaultVal } func envOrDefaultFloat(key string, defaultVal float64) float64 { if v := os.Getenv(key); v != "" { f, err := strconv.ParseFloat(v, 64) if err == nil { return f } } return defaultVal } func envOrDefaultInt(key string, defaultVal int) int { if v := os.Getenv(key); v != "" { i, err := strconv.Atoi(v) if err == nil { return i } } return defaultVal } func envOrDefaultBool(key string, defaultVal bool) bool { v := strings.TrimSpace(strings.ToLower(os.Getenv(key))) if v == "" { return defaultVal } return v == "true" || v == "1" || v == "yes" } // validateReviewerName checks that the name contains only safe characters // for embedding in an HTML comment sentinel ([a-zA-Z0-9_-]). func validateReviewerName(name string) error { if name == "" { return nil } for _, ch := range name { if !((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '-' || ch == '_') { return fmt.Errorf("reviewer-name must contain only [a-zA-Z0-9_-] (got %q)", name) } } return nil } // buildSupersededBody creates the body for a superseded review: struck-through banner // with collapsed original content and the commit it was evaluated against. func buildSupersededBody(originalBody, commitSHA, newReviewURL, sentinel string) string { shortSHA := commitSHA if len(shortSHA) > 8 { shortSHA = shortSHA[:8] } var sb strings.Builder sb.WriteString("~~Original review~~\n\n") sb.WriteString("**Superseded** \u2014 [see current review](") sb.WriteString(newReviewURL) sb.WriteString(") for up-to-date findings.\n\n") if shortSHA != "" { sb.WriteString("
Previous findings (commit ") sb.WriteString(shortSHA) sb.WriteString(")\n\n") } else { sb.WriteString("
Previous findings\n\n") } sb.WriteString(originalBody) sb.WriteString("\n\n
\n\n") sb.WriteString(sentinel) return sb.String() } // hasSharedToken detects if another review-bot role posted under the same // Gitea user. This indicates misconfiguration where two roles share a token // instead of having separate Gitea accounts. Returns true if shared token // detected (caller should skip update-in-place logic to avoid clobbering). func hasSharedToken(reviews []gitea.Review, ownSentinel string) bool { ownLogin := "" for _, r := range reviews { if strings.Contains(r.Body, ownSentinel) { ownLogin = r.User.Login break } } if ownLogin == "" { return false } for _, r := range reviews { if r.User.Login == ownLogin && strings.Contains(r.Body, "" idx := strings.Index(body, prefix) if idx < 0 { return "unknown" } rest := body[idx+len(prefix):] end := strings.Index(rest, suffix) if end < 0 { return "unknown" } return rest[:end] } // findOwnReview locates the most recent non-superseded review matching the sentinel. func findOwnReview(reviews []gitea.Review, sentinel string) *gitea.Review { var best *gitea.Review for i := range reviews { if !strings.Contains(reviews[i].Body, sentinel) { continue } if strings.Contains(reviews[i].Body, "~~Original review~~") { continue } if best == nil || reviews[i].ID > best.ID { best = &reviews[i] } } return best } // findAllOwnReviews returns all non-superseded reviews matching the sentinel. func findAllOwnReviews(reviews []gitea.Review, sentinel string) []gitea.Review { var result []gitea.Review for i := range reviews { if !strings.Contains(reviews[i].Body, sentinel) { continue } if strings.Contains(reviews[i].Body, "~~Original review~~") { continue } result = append(result, reviews[i]) } return result }