From 700f186023ece8989e8c3761b8769df7e9ef92b5 Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 1 May 2026 09:42:45 -0700 Subject: [PATCH] Initial implementation: AI code review bot for Gitea - CLI binary with flag/env var configuration - Gitea API client (PR metadata, diff, CI status, post review) - OpenAI-compatible LLM client - Structured review prompt with conventions support - JSON response parser with validation - Markdown review formatter for Gitea - CI failure auto-detection (REQUEST_CHANGES) - Dry-run mode for testing --- .gitignore | 1 + README.md | 58 ++++++++++++++ cmd/review-bot/main.go | 168 +++++++++++++++++++++++++++++++++++++++++ gitea/client.go | 148 ++++++++++++++++++++++++++++++++++++ go.mod | 3 + llm/client.go | 99 ++++++++++++++++++++++++ review/formatter.go | 48 ++++++++++++ review/parser.go | 76 +++++++++++++++++++ review/prompt.go | 72 ++++++++++++++++++ 9 files changed, 673 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 cmd/review-bot/main.go create mode 100644 gitea/client.go create mode 100644 go.mod create mode 100644 llm/client.go create mode 100644 review/formatter.go create mode 100644 review/parser.go create mode 100644 review/prompt.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..60fb0d4 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/review-bot diff --git a/README.md b/README.md new file mode 100644 index 0000000..c16aeaf --- /dev/null +++ b/README.md @@ -0,0 +1,58 @@ +# review-bot + +AI-powered code review bot for Gitea pull requests. + +## Overview + +`review-bot` fetches a PR's diff, title, description, and CI status from Gitea, sends it to an LLM via an OpenAI-compatible API, and posts a structured code review back to Gitea. + +## Usage + +```bash +review-bot \ + --gitea-url https://gitea.weiker.me \ + --repo owner/name \ + --pr 123 \ + --reviewer-name "sonnet-review-bot" \ + --reviewer-token "$(cat /path/to/token)" \ + --llm-base-url "https://proxy.example.com/v1" \ + --llm-api-key "key" \ + --llm-model "anthropic--claude-4.6-sonnet" \ + --conventions-file "CLAUDE.md" +``` + +All flags can also be set via environment variables: + +| Flag | Env Var | +|------|---------| +| `--gitea-url` | `GITEA_URL` | +| `--repo` | `GITEA_REPO` | +| `--pr` | `PR_NUMBER` | +| `--reviewer-name` | `REVIEWER_NAME` | +| `--reviewer-token` | `REVIEWER_TOKEN` | +| `--llm-base-url` | `LLM_BASE_URL` | +| `--llm-api-key` | `LLM_API_KEY` | +| `--llm-model` | `LLM_MODEL` | +| `--conventions-file` | `CONVENTIONS_FILE` | + +Use `--dry-run` to print the review to stdout without posting. + +## Build + +```bash +go build -o review-bot ./cmd/review-bot +``` + +## Architecture + +- `cmd/review-bot/main.go` — CLI entry point +- `gitea/client.go` — Gitea API interactions (fetch PR, diff, CI status, post review) +- `llm/client.go` — OpenAI-compatible chat completion client +- `review/prompt.go` — System/user prompt construction +- `review/parser.go` — Parse LLM JSON response +- `review/formatter.go` — Format markdown review body + +## Constraints + +- Pure Go stdlib, no external dependencies +- No CGO diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go new file mode 100644 index 0000000..3fcc3c8 --- /dev/null +++ b/cmd/review-bot/main.go @@ -0,0 +1,168 @@ +package main + +import ( + "flag" + "fmt" + "log" + "os" + "strconv" + "strings" + + "gitea.weiker.me/rodin/review-bot/gitea" + "gitea.weiker.me/rodin/review-bot/llm" + "gitea.weiker.me/rodin/review-bot/review" +) + +func main() { + // CLI flags + giteaURL := flag.String("gitea-url", envOrDefault("GITEA_URL", ""), "Gitea instance URL") + repo := flag.String("repo", envOrDefault("GITEA_REPO", ""), "Repository (owner/name)") + prNum := flag.String("pr", envOrDefault("PR_NUMBER", ""), "Pull request number") + reviewerName := flag.String("reviewer-name", envOrDefault("REVIEWER_NAME", ""), "Reviewer display name") + reviewerToken := flag.String("reviewer-token", envOrDefault("REVIEWER_TOKEN", ""), "Gitea token for posting review") + llmBaseURL := flag.String("llm-base-url", envOrDefault("LLM_BASE_URL", ""), "LLM API base URL") + llmAPIKey := flag.String("llm-api-key", envOrDefault("LLM_API_KEY", ""), "LLM API key") + llmModel := flag.String("llm-model", envOrDefault("LLM_MODEL", ""), "LLM model name") + conventionsFile := flag.String("conventions-file", envOrDefault("CONVENTIONS_FILE", ""), "Conventions file path in repo (e.g. CLAUDE.md)") + dryRun := flag.Bool("dry-run", false, "Print review to stdout instead of posting") + + flag.Parse() + + // Validate required fields + if *giteaURL == "" || *repo == "" || *prNum == "" || *reviewerToken == "" || + *llmBaseURL == "" || *llmAPIKey == "" || *llmModel == "" { + fmt.Fprintf(os.Stderr, "Error: missing required flags or environment variables\n\n") + fmt.Fprintf(os.Stderr, "Required: --gitea-url, --repo, --pr, --reviewer-token, --llm-base-url, --llm-api-key, --llm-model\n") + os.Exit(1) + } + + // Parse repo owner/name + parts := strings.SplitN(*repo, "/", 2) + if len(parts) != 2 { + log.Fatalf("Invalid repo format %q, expected owner/name", *repo) + } + owner, repoName := parts[0], parts[1] + + // Parse PR number + prNumber, err := strconv.Atoi(*prNum) + if err != nil { + log.Fatalf("Invalid PR number %q: %v", *prNum, err) + } + + // Initialize clients + giteaClient := gitea.NewClient(*giteaURL, *reviewerToken) + llmClient := llm.NewClient(*llmBaseURL, *llmAPIKey, *llmModel) + + log.Printf("Reviewing PR #%d on %s/%s", prNumber, owner, repoName) + + // Step 1: Fetch PR metadata + pr, err := giteaClient.GetPullRequest(owner, repoName, prNumber) + if err != nil { + log.Fatalf("Failed to fetch PR: %v", err) + } + log.Printf("PR: %s", pr.Title) + + // Step 2: Fetch diff + diff, err := giteaClient.GetPullRequestDiff(owner, repoName, prNumber) + if err != nil { + log.Fatalf("Failed to fetch diff: %v", err) + } + log.Printf("Diff size: %d bytes", len(diff)) + + // Step 3: Check CI status + ciPassed := true + ciDetails := "" + if pr.Head.Sha != "" { + statuses, err := giteaClient.GetCommitStatuses(owner, repoName, pr.Head.Sha) + if err != nil { + log.Printf("Warning: could not fetch CI status: %v", err) + } else { + ciPassed, ciDetails = evaluateCIStatus(statuses) + log.Printf("CI status: passed=%v", ciPassed) + } + } + + // Step 4: Load conventions file if specified + conventions := "" + if *conventionsFile != "" { + content, err := giteaClient.GetFileContent(owner, repoName, *conventionsFile) + if err != nil { + log.Printf("Warning: could not load conventions file %q: %v", *conventionsFile, err) + } else { + conventions = content + log.Printf("Loaded conventions file: %s (%d bytes)", *conventionsFile, len(conventions)) + } + } + + // Step 5: Build prompts + systemPrompt := review.BuildSystemPrompt(conventions) + userPrompt := review.BuildUserPrompt(pr.Title, pr.Body, diff, ciPassed, ciDetails) + + // Step 6: Call LLM + log.Printf("Sending to LLM (%s)...", *llmModel) + messages := []llm.Message{ + {Role: "system", Content: systemPrompt}, + {Role: "user", Content: userPrompt}, + } + + response, err := llmClient.Complete(messages) + if err != nil { + log.Fatalf("LLM request failed: %v", err) + } + log.Printf("LLM response received (%d bytes)", len(response)) + + // Step 7: Parse response + result, err := review.ParseResponse(response) + if err != nil { + log.Fatalf("Failed to parse LLM response: %v", err) + } + log.Printf("Verdict: %s (%d findings)", result.Verdict, len(result.Findings)) + + // Step 8: Format and post review + reviewBody := review.FormatMarkdown(result, *reviewerName) + event := review.GiteaEvent(result.Verdict) + + if *dryRun { + fmt.Println("--- DRY RUN ---") + fmt.Printf("Event: %s\n\n", event) + fmt.Println(reviewBody) + return + } + + log.Printf("Posting review (event=%s)...", event) + if err := giteaClient.PostReview(owner, repoName, prNumber, event, reviewBody); err != nil { + log.Fatalf("Failed to post review: %v", err) + } + log.Printf("Review posted successfully!") +} + +// evaluateCIStatus checks if all CI statuses indicate success. +func evaluateCIStatus(statuses []gitea.CommitStatus) (passed bool, details string) { + if len(statuses) == 0 { + return true, "no CI statuses found" + } + + var failed []string + for _, s := range statuses { + switch s.Status { + case "success": + // good + case "pending": + // treat pending as not-failed + case "failure", "error": + failed = append(failed, fmt.Sprintf("%s: %s", s.Context, s.Description)) + } + } + + if len(failed) > 0 { + return false, strings.Join(failed, "; ") + } + return true, "all checks passed" +} + +func envOrDefault(key, defaultVal string) string { + if v := os.Getenv(key); v != "" { + return v + } + return defaultVal +} diff --git a/gitea/client.go b/gitea/client.go new file mode 100644 index 0000000..e225f5c --- /dev/null +++ b/gitea/client.go @@ -0,0 +1,148 @@ +package gitea + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "strings" +) + +// Client interacts with the Gitea API. +type Client struct { + BaseURL string + Token string + HTTP *http.Client +} + +// NewClient creates a new Gitea API client. +func NewClient(baseURL, token string) *Client { + return &Client{ + BaseURL: strings.TrimRight(baseURL, "/"), + Token: token, + HTTP: &http.Client{}, + } +} + +// PullRequest holds relevant PR metadata. +type PullRequest struct { + Title string `json:"title"` + Body string `json:"body"` + Head struct { + Sha string `json:"sha"` + } `json:"head"` +} + +// CommitStatus represents a single CI status entry. +type CommitStatus struct { + Status string `json:"status"` + Context string `json:"context"` + Description string `json:"description"` + TargetURL string `json:"target_url"` +} + +// GetPullRequest fetches PR metadata. +func (c *Client) GetPullRequest(owner, repo string, number int) (*PullRequest, error) { + url := fmt.Sprintf("%s/api/v1/repos/%s/%s/pulls/%d", c.BaseURL, owner, repo, number) + body, err := c.doGet(url) + if err != nil { + return nil, fmt.Errorf("fetch PR: %w", err) + } + var pr PullRequest + if err := json.Unmarshal(body, &pr); err != nil { + return nil, fmt.Errorf("parse PR JSON: %w", err) + } + return &pr, nil +} + +// GetPullRequestDiff fetches the unified diff for a PR. +func (c *Client) GetPullRequestDiff(owner, repo string, number int) (string, error) { + url := fmt.Sprintf("%s/api/v1/repos/%s/%s/pulls/%d.diff", c.BaseURL, owner, repo, number) + body, err := c.doGet(url) + if err != nil { + return "", fmt.Errorf("fetch diff: %w", err) + } + return string(body), nil +} + +// GetCommitStatuses fetches CI statuses for a commit SHA. +func (c *Client) GetCommitStatuses(owner, repo, sha string) ([]CommitStatus, error) { + url := fmt.Sprintf("%s/api/v1/repos/%s/%s/commits/%s/statuses", c.BaseURL, owner, repo, sha) + body, err := c.doGet(url) + if err != nil { + return nil, fmt.Errorf("fetch commit statuses: %w", err) + } + var statuses []CommitStatus + if err := json.Unmarshal(body, &statuses); err != nil { + return nil, fmt.Errorf("parse statuses JSON: %w", err) + } + return statuses, nil +} + +// GetFileContent fetches a file from the default branch of a repo. +func (c *Client) GetFileContent(owner, repo, filepath string) (string, error) { + url := fmt.Sprintf("%s/api/v1/repos/%s/%s/raw/%s", c.BaseURL, owner, repo, filepath) + body, err := c.doGet(url) + if err != nil { + return "", fmt.Errorf("fetch file %s: %w", filepath, err) + } + return string(body), nil +} + +// PostReview submits a review to a PR. +// event should be "APPROVED" or "REQUEST_CHANGES". +func (c *Client) PostReview(owner, repo string, number int, event, body string) error { + url := fmt.Sprintf("%s/api/v1/repos/%s/%s/pulls/%d/reviews", c.BaseURL, owner, repo, number) + + payload := struct { + Body string `json:"body"` + Event string `json:"event"` + }{ + Body: body, + Event: event, + } + + data, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("marshal review payload: %w", err) + } + + req, err := http.NewRequest("POST", url, strings.NewReader(string(data))) + if err != nil { + return fmt.Errorf("create review request: %w", err) + } + req.Header.Set("Authorization", "token "+c.Token) + req.Header.Set("Content-Type", "application/json") + + resp, err := c.HTTP.Do(req) + if err != nil { + return fmt.Errorf("post review: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + respBody, _ := io.ReadAll(resp.Body) + return fmt.Errorf("post review failed (status %d): %s", resp.StatusCode, string(respBody)) + } + return nil +} + +func (c *Client) doGet(url string) ([]byte, error) { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + req.Header.Set("Authorization", "token "+c.Token) + + resp, err := c.HTTP.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body)) + } + return io.ReadAll(resp.Body) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..9b2e8d2 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module gitea.weiker.me/rodin/review-bot + +go 1.26.2 diff --git a/llm/client.go b/llm/client.go new file mode 100644 index 0000000..5dd62bc --- /dev/null +++ b/llm/client.go @@ -0,0 +1,99 @@ +package llm + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" +) + +// Client calls an OpenAI-compatible chat completion API. +type Client struct { + BaseURL string + APIKey string + Model string + HTTP *http.Client +} + +// NewClient creates a new LLM client. +func NewClient(baseURL, apiKey, model string) *Client { + return &Client{ + BaseURL: strings.TrimRight(baseURL, "/"), + APIKey: apiKey, + Model: model, + HTTP: &http.Client{}, + } +} + +// Message represents a chat message. +type Message struct { + Role string `json:"role"` + Content string `json:"content"` +} + +// ChatRequest is the request payload. +type ChatRequest struct { + Model string `json:"model"` + Messages []Message `json:"messages"` + Temperature float64 `json:"temperature"` +} + +// ChatResponse is the response from the API. +type ChatResponse struct { + Choices []struct { + Message struct { + Content string `json:"content"` + } `json:"message"` + } `json:"choices"` +} + +// Complete sends a chat completion request and returns the assistant's response content. +func (c *Client) Complete(messages []Message) (string, error) { + reqBody := ChatRequest{ + Model: c.Model, + Messages: messages, + Temperature: 0.1, + } + + data, err := json.Marshal(reqBody) + if err != nil { + return "", fmt.Errorf("marshal request: %w", err) + } + + url := c.BaseURL + "/chat/completions" + req, err := http.NewRequest("POST", url, bytes.NewReader(data)) + if err != nil { + return "", fmt.Errorf("create request: %w", err) + } + req.Header.Set("Authorization", "Bearer "+c.APIKey) + req.Header.Set("Content-Type", "application/json") + + resp, err := c.HTTP.Do(req) + if err != nil { + return "", fmt.Errorf("LLM request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + body, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("LLM API error (status %d): %s", resp.StatusCode, string(body)) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("read response: %w", err) + } + + var chatResp ChatResponse + if err := json.Unmarshal(body, &chatResp); err != nil { + return "", fmt.Errorf("parse response: %w", err) + } + + if len(chatResp.Choices) == 0 { + return "", fmt.Errorf("no choices in LLM response") + } + + return chatResp.Choices[0].Message.Content, nil +} diff --git a/review/formatter.go b/review/formatter.go new file mode 100644 index 0000000..4048b58 --- /dev/null +++ b/review/formatter.go @@ -0,0 +1,48 @@ +package review + +import ( + "fmt" + "strings" +) + +// FormatMarkdown formats a ReviewResult into the markdown body for a Gitea review. +func FormatMarkdown(result *ReviewResult, reviewerName string) string { + var sb strings.Builder + + sb.WriteString("## Summary\n\n") + sb.WriteString(result.Summary) + sb.WriteString("\n\n") + + if len(result.Findings) > 0 { + sb.WriteString("## Findings\n\n") + sb.WriteString("| # | Severity | File | Line | Finding |\n") + sb.WriteString("|---|----------|------|------|--------|\n") + + for i, f := range result.Findings { + sb.WriteString(fmt.Sprintf("| %d | [%s] | `%s` | %d | %s |\n", + i+1, f.Severity, f.File, f.Line, f.Finding)) + } + sb.WriteString("\n") + } + + sb.WriteString("## Recommendation\n\n") + sb.WriteString(fmt.Sprintf("**%s** — %s\n", result.Verdict, result.Recommendation)) + + if reviewerName != "" { + sb.WriteString(fmt.Sprintf("\n---\n*Review by %s*\n", reviewerName)) + } + + return sb.String() +} + +// GiteaEvent converts the verdict to the Gitea API event string. +func GiteaEvent(verdict string) string { + switch verdict { + case "APPROVE": + return "APPROVED" + case "REQUEST_CHANGES": + return "REQUEST_CHANGES" + default: + return "COMMENT" + } +} diff --git a/review/parser.go b/review/parser.go new file mode 100644 index 0000000..dc8eb73 --- /dev/null +++ b/review/parser.go @@ -0,0 +1,76 @@ +package review + +import ( + "encoding/json" + "fmt" + "strings" +) + +// Finding represents a single code review finding. +type Finding struct { + Severity string `json:"severity"` + File string `json:"file"` + Line int `json:"line"` + Finding string `json:"finding"` +} + +// ReviewResult is the structured output from the LLM. +type ReviewResult struct { + Verdict string `json:"verdict"` + Summary string `json:"summary"` + Findings []Finding `json:"findings"` + Recommendation string `json:"recommendation"` +} + +// ParseResponse parses the LLM response into a ReviewResult. +func ParseResponse(response string) (*ReviewResult, error) { + // Try to extract JSON from the response — the LLM might wrap it in markdown fences + cleaned := extractJSON(response) + + var result ReviewResult + if err := json.Unmarshal([]byte(cleaned), &result); err != nil { + return nil, fmt.Errorf("parse LLM response as JSON: %w\nRaw response: %s", err, response) + } + + // Validate verdict + switch result.Verdict { + case "APPROVE", "REQUEST_CHANGES": + // valid + default: + return nil, fmt.Errorf("invalid verdict %q (must be APPROVE or REQUEST_CHANGES)", result.Verdict) + } + + // Validate finding severities + for i, f := range result.Findings { + switch f.Severity { + case "MAJOR", "MINOR", "NIT": + // valid + default: + return nil, fmt.Errorf("finding %d has invalid severity %q", i, f.Severity) + } + } + + return &result, nil +} + +// extractJSON attempts to pull JSON from a potentially markdown-wrapped response. +func extractJSON(s string) string { + s = strings.TrimSpace(s) + + // Remove markdown code fences if present + if strings.HasPrefix(s, "```") { + lines := strings.Split(s, "\n") + // Remove first line (```json or ```) + if len(lines) > 2 { + lines = lines[1:] + } + // Remove last line (```) + if len(lines) > 0 && strings.TrimSpace(lines[len(lines)-1]) == "```" { + lines = lines[:len(lines)-1] + } + s = strings.Join(lines, "\n") + } + + s = strings.TrimSpace(s) + return s +} diff --git a/review/prompt.go b/review/prompt.go new file mode 100644 index 0000000..0d44970 --- /dev/null +++ b/review/prompt.go @@ -0,0 +1,72 @@ +package review + +import ( + "fmt" + "strings" +) + +// BuildSystemPrompt constructs the system prompt for the LLM reviewer. +func BuildSystemPrompt(conventions string) string { + var sb strings.Builder + + sb.WriteString("You are an expert code reviewer. Review the provided pull request diff carefully.\n\n") + sb.WriteString("Your task:\n") + sb.WriteString("1. Review the diff for correctness, idiomatic code, potential bugs, and design issues.\n") + sb.WriteString("2. Consider the CI status — if CI has failed, that is an automatic REQUEST_CHANGES regardless of code quality.\n") + sb.WriteString("3. Output your review as structured JSON (and ONLY JSON, no markdown fences or other text).\n\n") + sb.WriteString("Output format:\n") + sb.WriteString("{\n") + sb.WriteString(" \"verdict\": \"APPROVE\" or \"REQUEST_CHANGES\",\n") + sb.WriteString(" \"summary\": \"Brief overall assessment (1-3 sentences)\",\n") + sb.WriteString(" \"findings\": [\n") + sb.WriteString(" {\n") + sb.WriteString(" \"severity\": \"MAJOR\" or \"MINOR\" or \"NIT\",\n") + sb.WriteString(" \"file\": \"path/to/file\",\n") + sb.WriteString(" \"line\": ,\n") + sb.WriteString(" \"finding\": \"Description of the issue\"\n") + sb.WriteString(" }\n") + sb.WriteString(" ],\n") + sb.WriteString(" \"recommendation\": \"Full recommendation text explaining your verdict\"\n") + sb.WriteString("}\n\n") + sb.WriteString("Rules:\n") + sb.WriteString("- If there are any MAJOR findings → verdict must be REQUEST_CHANGES\n") + sb.WriteString("- If there are no MAJOR findings → verdict should be APPROVE\n") + sb.WriteString("- If CI has failed → verdict must be REQUEST_CHANGES with a finding noting the CI failure\n") + sb.WriteString("- Be thorough but fair. Don't nitpick style unless it impacts readability significantly.\n") + sb.WriteString("- Line numbers should reference the new file line numbers from the diff headers.\n") + sb.WriteString("- If the diff is empty or trivial (only formatting/whitespace), APPROVE with no findings.\n") + + if conventions != "" { + sb.WriteString(fmt.Sprintf("\n\nThe repository has the following coding conventions that should be respected:\n\n%s\n", conventions)) + } + + return sb.String() +} + +// BuildUserPrompt constructs the user message with PR context. +func BuildUserPrompt(title, description, diff string, ciPassed bool, ciDetails string) string { + var sb strings.Builder + + sb.WriteString(fmt.Sprintf("## Pull Request: %s\n\n", title)) + + if description != "" { + sb.WriteString(fmt.Sprintf("### Description\n%s\n\n", description)) + } + + ciStatus := "PASSED" + if !ciPassed { + ciStatus = "FAILED" + } + sb.WriteString(fmt.Sprintf("### CI Status: %s\n", ciStatus)) + + if ciDetails != "" { + sb.WriteString(fmt.Sprintf("CI Details: %s\n", ciDetails)) + } + + sb.WriteString("\n### Diff\n\n") + sb.WriteString("```diff\n") + sb.WriteString(diff) + sb.WriteString("\n```\n") + + return sb.String() +}