diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 6adf9d9..4ada9a6 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: model: gpt-5 - name: gpt token_secret: GPT_REVIEW_TOKEN - model: gpt-5-mini + model: gpt-4.1 steps: - uses: actions/checkout@v4 - uses: actions/setup-go@v5 @@ -49,5 +49,6 @@ jobs: LLM_MODEL: ${{ matrix.model }} CONVENTIONS_FILE: "CONVENTIONS.md" PATTERNS_REPO: "rodin/go-patterns" - PATTERNS_FILES: "README.md,docs/" + PATTERNS_FILES: "README.md,patterns/" + LLM_TIMEOUT: "600" run: ./review-bot diff --git a/budget/budget.go b/budget/budget.go new file mode 100644 index 0000000..3624efe --- /dev/null +++ b/budget/budget.go @@ -0,0 +1,226 @@ +// Package budget manages LLM context window budgeting for review-bot. +// +// It estimates token usage and progressively trims context content to fit +// within model-specific limits. The trimming order (least important first): +// patterns → conventions → file context → diff truncation. +package budget + +import ( + "fmt" + "strings" + "unicode/utf8" +) + +// modelLimit pairs a model name prefix with its context window size. +type modelLimit struct { + prefix string + limit int +} + +// Known model context limits (in tokens), ordered longest-prefix-first +// for deterministic matching. +var modelLimits = []modelLimit{ + {"claude-haiku-3.5-20241022", 200_000}, + {"claude-sonnet-4-20250514", 200_000}, + {"claude-opus-4-20250514", 200_000}, + {"gpt-4.1-mini", 128_000}, + {"gpt-5-mini", 200_000}, + {"gpt-4.1", 128_000}, + {"gpt-5", 200_000}, +} + +const defaultLimit = 128_000 + +// reserveTokens is headroom for the response generation. +const reserveTokens = 4_000 + +const diffTruncMarker = "\n\n... [diff truncated due to context limit] ..." +const diffTooLargeMarker = "... [diff too large for context window — review manually] ..." +const userMetaTruncMarker = "\n... [description truncated] ..." + +// EstimateTokens estimates the number of tokens in a string. +// Uses the rough heuristic of ~4 bytes per token, which is +// conservative for English text and code. +func EstimateTokens(s string) int { + return len(s) / 4 +} + +// LimitForModel returns the context window size for the given model. +// Uses longest-prefix-first matching for deterministic results. +func LimitForModel(model string) int { + for _, ml := range modelLimits { + if model == ml.prefix || strings.HasPrefix(model, ml.prefix) { + return ml.limit + } + } + return defaultLimit +} + +// Sections holds the prompt content sections in trim priority order. +// When the total exceeds the budget, sections are trimmed from least +// important (Patterns) to most important (Diff). +type Sections struct { + SystemBase string // Core instructions (never trimmed) + Patterns string // Language patterns (trimmed first) + Conventions string // Repo conventions (trimmed second) + FileContext string // Full file content (trimmed third) + Diff string // The actual diff (trimmed last, only truncated) + UserMeta string // PR title, description, CI status (truncated only if base exceeds budget) +} + +// Result holds the trimmed content and metadata about what was dropped. +type Result struct { + SystemPrompt string + UserPrompt string + Trimmed []string // Human-readable descriptions of what was trimmed + EstTokens int // Estimated total tokens after trimming +} + +// Fit trims sections to fit within the model's context limit. +// Returns the assembled prompts and a list of what was trimmed. +func Fit(model string, sections Sections) Result { + limit := LimitForModel(model) - reserveTokens + + baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta) + available := limit - baseTokens + if available < 0 { + // Base content alone exceeds budget. Truncate UserMeta (keep first ~1000 tokens). + if len(sections.UserMeta) > 4000 { + sections.UserMeta = truncateUTF8(sections.UserMeta, 4000) + userMetaTruncMarker + baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta) + available = limit - baseTokens + } + if available < 0 { + available = 0 + } + } + + // Trimmable sections in priority order (first = dropped first) + type entry struct { + name string + content *string + } + entries := []entry{ + {"patterns", §ions.Patterns}, + {"conventions", §ions.Conventions}, + {"file context", §ions.FileContext}, + } + + // Check if everything fits + totalTrimmable := EstimateTokens(sections.Diff) + for _, e := range entries { + totalTrimmable += EstimateTokens(*e.content) + } + + var trimmed []string + if totalTrimmable > available { + // Trim from least important + for i := range entries { + tokens := EstimateTokens(*entries[i].content) + if tokens == 0 { + continue + } + trimmed = append(trimmed, fmt.Sprintf("%s (~%dK tokens)", entries[i].name, tokens/1000)) + *entries[i].content = "" + + // Recalculate + totalTrimmable = EstimateTokens(sections.Diff) + for _, e := range entries { + totalTrimmable += EstimateTokens(*e.content) + } + if totalTrimmable <= available { + break + } + } + } + + // If still too large, truncate the diff + if totalTrimmable > available { + diffBudget := available + for _, e := range entries { + diffBudget -= EstimateTokens(*e.content) + } + if diffBudget < 0 { + diffBudget = 0 + } + // Reserve space for truncation marker + markerBudget := EstimateTokens(diffTruncMarker) + effectiveBudget := diffBudget - markerBudget + if effectiveBudget < 0 { + effectiveBudget = 0 + } + maxChars := effectiveBudget * 4 + if maxChars < len(sections.Diff) { + removed := EstimateTokens(sections.Diff) - diffBudget + trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000)) + if maxChars > 0 { + if diffBudget >= markerBudget { + sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker + } else { + sections.Diff = truncateUTF8(sections.Diff, maxChars) + } + } else { + sections.Diff = diffTooLargeMarker + } + } + } + + finalTokens := baseTokens + for _, e := range entries { + finalTokens += EstimateTokens(*e.content) + } + finalTokens += EstimateTokens(sections.Diff) + + return buildResult(sections, trimmed, finalTokens) +} + +func buildResult(s Sections, trimmed []string, estTokens int) Result { + var sys strings.Builder + sys.WriteString(s.SystemBase) + if s.Patterns != "" { + sys.WriteString("\n\n## Language Patterns & Idioms\n\nUse the following patterns as review criteria. Code that violates these established patterns is a finding:\n\n") + sys.WriteString(s.Patterns) + } + if s.Conventions != "" { + sys.WriteString("\n\n## Repository Conventions\n\nThe repository has the following coding conventions that must be respected:\n\n") + sys.WriteString(s.Conventions) + } + + var usr strings.Builder + usr.WriteString(s.UserMeta) + if s.FileContext != "" { + usr.WriteString("\n### Full File Context (modified files)\n\n") + usr.WriteString(s.FileContext) + usr.WriteString("\n") + } + if s.Diff != "" { + usr.WriteString("\n### Diff (changes to review)\n\n```diff\n") + usr.WriteString(s.Diff) + usr.WriteString("\n```\n") + } + + if len(trimmed) > 0 { + usr.WriteString("\n⚠️ Note: Context was trimmed to fit model limits. Dropped: ") + usr.WriteString(strings.Join(trimmed, ", ")) + usr.WriteString("\n") + } + + return Result{ + SystemPrompt: sys.String(), + UserPrompt: usr.String(), + Trimmed: trimmed, + EstTokens: estTokens, + } +} + +// truncateUTF8 truncates s to at most maxBytes without splitting multi-byte +// UTF-8 characters. Returns a valid UTF-8 string of at most maxBytes bytes. +func truncateUTF8(s string, maxBytes int) string { + if len(s) <= maxBytes { + return s + } + for maxBytes > 0 && !utf8.RuneStart(s[maxBytes]) { + maxBytes-- + } + return s[:maxBytes] +} diff --git a/budget/budget_test.go b/budget/budget_test.go new file mode 100644 index 0000000..7efc917 --- /dev/null +++ b/budget/budget_test.go @@ -0,0 +1,203 @@ +package budget + +import ( + "strings" + "testing" +) + +func TestEstimateTokens(t *testing.T) { + tests := []struct { + input string + want int + }{ + {"", 0}, + {"abcd", 1}, + {"12345678", 2}, + {strings.Repeat("x", 400), 100}, + } + for _, tt := range tests { + got := EstimateTokens(tt.input) + if got != tt.want { + t.Errorf("EstimateTokens(%d chars) = %d, want %d", len(tt.input), got, tt.want) + } + } +} + +func TestLimitForModel(t *testing.T) { + tests := []struct { + model string + want int + }{ + {"gpt-4.1", 128_000}, + {"gpt-5", 200_000}, + {"gpt-5-mini", 200_000}, + {"unknown-model", defaultLimit}, + {"gpt-4.1-2026-01-01", 128_000}, // prefix match + } + for _, tt := range tests { + got := LimitForModel(tt.model) + if got != tt.want { + t.Errorf("LimitForModel(%q) = %d, want %d", tt.model, got, tt.want) + } + } +} + +func TestFit_AllFits(t *testing.T) { + s := Sections{ + SystemBase: "system instructions", + Patterns: "some patterns", + Conventions: "some conventions", + FileContext: "file content", + Diff: "diff content", + UserMeta: "PR: title\n", + } + result := Fit("gpt-5", s) + + if len(result.Trimmed) != 0 { + t.Errorf("expected no trimming, got %v", result.Trimmed) + } + if !strings.Contains(result.SystemPrompt, "some patterns") { + t.Error("expected patterns in system prompt") + } + if !strings.Contains(result.SystemPrompt, "some conventions") { + t.Error("expected conventions in system prompt") + } + if !strings.Contains(result.UserPrompt, "file content") { + t.Error("expected file context in user prompt") + } +} + +func TestFit_TrimsPatterns(t *testing.T) { + // Create content that exceeds 128K token budget for gpt-4.1 + // Budget ≈ 128K - 4K reserve = 124K tokens = ~496K chars + // Fill patterns with enough to push over + bigPatterns := strings.Repeat("x", 500_000) // ~125K tokens + s := Sections{ + SystemBase: "base", + Patterns: bigPatterns, + Conventions: "conventions", + FileContext: "files", + Diff: "diff", + UserMeta: "meta", + } + result := Fit("gpt-4.1", s) + + if len(result.Trimmed) == 0 { + t.Fatal("expected trimming") + } + if !strings.Contains(result.Trimmed[0], "patterns") { + t.Errorf("expected patterns to be trimmed first, got %v", result.Trimmed) + } + if strings.Contains(result.SystemPrompt, bigPatterns[:100]) { + t.Error("expected patterns to be removed from output") + } + // Conventions should survive + if !strings.Contains(result.SystemPrompt, "conventions") { + t.Error("expected conventions to survive after patterns trimmed") + } +} + +func TestFit_TrimsConventions(t *testing.T) { + // Patterns + conventions + diff all exceed budget even after patterns removed + big := strings.Repeat("y", 520_000) // ~130K tokens each (exceeds 124K budget even alone) + s := Sections{ + SystemBase: "base", + Patterns: big, + Conventions: big, + FileContext: "files", + Diff: "diff", + UserMeta: "meta", + } + result := Fit("gpt-4.1", s) + + if len(result.Trimmed) < 2 { + t.Fatalf("expected at least 2 trimmed, got %v", result.Trimmed) + } + if !strings.Contains(result.Trimmed[0], "patterns") { + t.Errorf("expected patterns trimmed first, got %s", result.Trimmed[0]) + } + if !strings.Contains(result.Trimmed[1], "conventions") { + t.Errorf("expected conventions trimmed second, got %s", result.Trimmed[1]) + } +} + +func TestFit_TruncatesDiff(t *testing.T) { + // Only diff is huge, no patterns/conventions + hugeDiff := strings.Repeat("z", 600_000) // ~150K tokens > 128K limit + s := Sections{ + SystemBase: "base", + Diff: hugeDiff, + UserMeta: "meta", + } + result := Fit("gpt-4.1", s) + + if len(result.Trimmed) == 0 { + t.Fatal("expected diff truncation") + } + if !strings.Contains(result.Trimmed[len(result.Trimmed)-1], "diff truncated") { + t.Errorf("expected diff truncation note, got %v", result.Trimmed) + } + if !strings.Contains(result.UserPrompt, "[diff truncated due to context limit]") { + t.Error("expected truncation marker in user prompt") + } +} + +func TestFit_PreservesNoteInOutput(t *testing.T) { + big := strings.Repeat("w", 500_000) + s := Sections{ + SystemBase: "base", + Patterns: big, + Diff: "small diff", + UserMeta: "meta", + } + result := Fit("gpt-4.1", s) + + if !strings.Contains(result.UserPrompt, "⚠️ Note: Context was trimmed") { + t.Error("expected trimming note in user prompt") + } +} + + +func TestFit_HugeUserMeta(t *testing.T) { + // UserMeta so large that base alone exceeds limit + // Use a unique marker past the truncation point + hugeDesc := strings.Repeat("d", 5000) + "UNIQUE_MARKER_PAST_TRUNCATION" + strings.Repeat("d", 595_000) + s := Sections{ + SystemBase: "base", + Diff: "small diff", + UserMeta: hugeDesc, + } + result := Fit("gpt-4.1", s) + + limit := LimitForModel("gpt-4.1") - reserveTokens + if result.EstTokens > limit { + t.Errorf("EstTokens %d exceeds limit %d", result.EstTokens, limit) + } + // Content past truncation point should not be present + if strings.Contains(result.UserPrompt, "UNIQUE_MARKER_PAST_TRUNCATION") { + t.Error("expected UserMeta to be truncated but found content past truncation point") + } + // Truncation marker should be present + if !strings.Contains(result.UserPrompt, "[description truncated]") { + t.Error("expected truncation marker in output") + } +} + +func TestFit_NeverExceedsLimit(t *testing.T) { + // All sections huge — verify final tokens never exceed limit + big := strings.Repeat("a", 200_000) + s := Sections{ + SystemBase: strings.Repeat("s", 8000), + Patterns: big, + Conventions: big, + FileContext: big, + Diff: big, + UserMeta: strings.Repeat("m", 8000), + } + result := Fit("gpt-4.1", s) + + limit := LimitForModel("gpt-4.1") - reserveTokens + if result.EstTokens > limit { + t.Errorf("EstTokens %d exceeds limit %d (trimmed: %v)", result.EstTokens, limit, result.Trimmed) + } +} diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go index bca9979..065afca 100644 --- a/cmd/review-bot/main.go +++ b/cmd/review-bot/main.go @@ -10,6 +10,7 @@ import ( "strings" "time" + "gitea.weiker.me/rodin/review-bot/budget" "gitea.weiker.me/rodin/review-bot/gitea" "gitea.weiker.me/rodin/review-bot/llm" "gitea.weiker.me/rodin/review-bot/review" @@ -148,15 +149,26 @@ func main() { log.Printf("Loaded patterns from %s (%d bytes)", *patternsRepo, len(patterns)) } - // Step 7: Build prompts - systemPrompt := review.BuildSystemPrompt(conventions, patterns) - userPrompt := review.BuildUserPrompt(pr.Title, pr.Body, diff, fileContext, ciPassed, ciDetails) + // Step 7: Budget-aware prompt assembly + sections := budget.Sections{ + SystemBase: review.BuildSystemBase(), + Patterns: patterns, + Conventions: conventions, + FileContext: fileContext, + Diff: diff, + UserMeta: review.BuildUserMeta(pr.Title, pr.Body, ciPassed, ciDetails), + } + budgetResult := budget.Fit(*llmModel, sections) + log.Printf("Token estimate: ~%dK (limit: %dK)", budgetResult.EstTokens/1000, budget.LimitForModel(*llmModel)/1000) + if len(budgetResult.Trimmed) > 0 { + log.Printf("Context trimmed: %v", budgetResult.Trimmed) + } // Step 8: Call LLM log.Printf("Sending to LLM (%s)...", *llmModel) messages := []llm.Message{ - {Role: "system", Content: systemPrompt}, - {Role: "user", Content: userPrompt}, + {Role: "system", Content: budgetResult.SystemPrompt}, + {Role: "user", Content: budgetResult.UserPrompt}, } response, err := llmClient.Complete(ctx, messages) @@ -250,12 +262,12 @@ func fetchPatterns(ctx context.Context, client *gitea.Client, patternsRepo, patt continue } - for filepath, content := range files { + for filePath, content := range files { // Only include markdown and text files as patterns - if !isPatternFile(filepath) { + if !isPatternFile(filePath) { continue } - sb.WriteString(fmt.Sprintf("### %s/%s\n\n%s\n\n", repoRef, filepath, content)) + sb.WriteString(fmt.Sprintf("### %s/%s\n\n%s\n\n", repoRef, filePath, content)) } } } diff --git a/review/prompt.go b/review/prompt.go index a4072a9..24f743b 100644 --- a/review/prompt.go +++ b/review/prompt.go @@ -7,8 +7,10 @@ import ( "strings" ) -// BuildSystemPrompt constructs the system prompt for the LLM reviewer. -func BuildSystemPrompt(conventions, patterns string) string { +// BuildSystemBase returns the core system prompt instructions without +// patterns or conventions. Used by the budget package to separate +// trimmable from non-trimmable content. +func BuildSystemBase() string { var sb strings.Builder sb.WriteString("You are an expert code reviewer. Review the provided pull request diff carefully.\n\n") @@ -42,6 +44,15 @@ func BuildSystemPrompt(conventions, patterns string) string { sb.WriteString("- Line numbers should reference the new file line numbers from the diff headers.\n") sb.WriteString("- If the diff is empty or trivial (only formatting/whitespace), APPROVE with no findings.\n") + return sb.String() +} + +// BuildSystemPrompt constructs the full system prompt with patterns and conventions. +// Deprecated: Use BuildSystemBase with budget.Fit for context-aware assembly. +func BuildSystemPrompt(conventions, patterns string) string { + var sb strings.Builder + sb.WriteString(BuildSystemBase()) + if patterns != "" { sb.WriteString(fmt.Sprintf("\n\n## Language Patterns & Idioms\n\nUse the following patterns as review criteria. Code that violates these established patterns is a finding:\n\n%s\n", patterns)) } @@ -53,8 +64,9 @@ func BuildSystemPrompt(conventions, patterns string) string { return sb.String() } -// BuildUserPrompt constructs the user message with PR context. -func BuildUserPrompt(title, description, diff, fileContext string, ciPassed bool, ciDetails string) string { +// BuildUserMeta returns the PR metadata header (title, description, CI status) +// without the diff or file context. Used by the budget package. +func BuildUserMeta(title, description string, ciPassed bool, ciDetails string) string { var sb strings.Builder sb.WriteString(fmt.Sprintf("## Pull Request: %s\n\n", title)) @@ -73,6 +85,16 @@ func BuildUserPrompt(title, description, diff, fileContext string, ciPassed bool sb.WriteString(fmt.Sprintf("CI Details: %s\n", ciDetails)) } + return sb.String() +} + +// BuildUserPrompt constructs the user message with PR context. +// Deprecated: Use BuildUserMeta with budget.Fit for context-aware assembly. +func BuildUserPrompt(title, description, diff, fileContext string, ciPassed bool, ciDetails string) string { + var sb strings.Builder + + sb.WriteString(BuildUserMeta(title, description, ciPassed, ciDetails)) + if fileContext != "" { sb.WriteString("\n### Full File Context (modified files)\n\n") sb.WriteString(fileContext) diff --git a/review/prompt_test.go b/review/prompt_test.go index c3c1d2a..b8d705e 100644 --- a/review/prompt_test.go +++ b/review/prompt_test.go @@ -116,3 +116,43 @@ func TestBuildUserPrompt_WithoutFileContext(t *testing.T) { t.Error("should not include file context section when empty") } } + + +func TestBuildSystemBase(t *testing.T) { + result := BuildSystemBase() + if result == "" { + t.Fatal("BuildSystemBase returned empty string") + } + if !strings.Contains(result, "expert code reviewer") { + t.Error("expected reviewer role in system base") + } + if !strings.Contains(result, "REQUEST_CHANGES") { + t.Error("expected verdict format in system base") + } + if !strings.Contains(result, "JSON") { + t.Error("expected JSON output instruction in system base") + } +} + +func TestBuildUserMeta(t *testing.T) { + result := BuildUserMeta("Fix bug", "Some description", true, "all checks passed") + if !strings.Contains(result, "Fix bug") { + t.Error("expected title in user meta") + } + if !strings.Contains(result, "Some description") { + t.Error("expected description in user meta") + } + if !strings.Contains(result, "PASSED") { + t.Error("expected CI PASSED status") + } +} + +func TestBuildUserMeta_CIFailed(t *testing.T) { + result := BuildUserMeta("Title", "", false, "test job failed") + if !strings.Contains(result, "FAILED") { + t.Error("expected CI FAILED status") + } + if strings.Contains(result, "Description") { + t.Error("expected no description section when empty") + } +}