From 67d835909f72a58b70e7dc00b09254c1eda54146 Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 1 May 2026 18:46:53 -0700 Subject: [PATCH 1/6] feat: add context budget system for LLM overflow (#19) Adds a budget package that estimates token usage and progressively trims context to fit within model-specific limits. Trim order (least important first): 1. Language patterns 2. Repository conventions 3. Full file context 4. Diff (truncated as last resort) When content is trimmed, a note is appended to the user prompt so the LLM knows context was reduced. - New budget package with Fit(), EstimateTokens(), LimitForModel() - Model limit table (GPT-4.1: 128K, GPT-5: 200K, Claude: 200K) - Refactored review/prompt.go: BuildSystemBase() and BuildUserMeta() extract non-trimmable content; old functions delegate to new ones - main.go uses budget.Fit() instead of direct prompt assembly - 7 unit tests covering all trim paths Closes #19 --- budget/budget.go | 181 +++++++++++++++++++++++++++++++++++++++++ budget/budget_test.go | 158 +++++++++++++++++++++++++++++++++++ cmd/review-bot/main.go | 22 +++-- review/prompt.go | 30 ++++++- 4 files changed, 382 insertions(+), 9 deletions(-) create mode 100644 budget/budget.go create mode 100644 budget/budget_test.go diff --git a/budget/budget.go b/budget/budget.go new file mode 100644 index 0000000..f6df937 --- /dev/null +++ b/budget/budget.go @@ -0,0 +1,181 @@ +// Package budget manages LLM context window budgeting for review-bot. +// +// It estimates token usage and progressively trims context content to fit +// within model-specific limits. The trimming order (least important first): +// patterns → conventions → file context → diff truncation. +package budget + +import ( + "fmt" + "strings" +) + +// Known model context limits (in tokens). +// Models not listed here get the conservative default. +var modelLimits = map[string]int{ + "gpt-4.1": 128_000, + "gpt-4.1-mini": 128_000, + "gpt-5": 200_000, + "gpt-5-mini": 200_000, + "claude-sonnet-4-20250514": 200_000, + "claude-opus-4-20250514": 200_000, + "claude-haiku-3.5-20241022": 200_000, +} + +const defaultLimit = 128_000 + +// reserveTokens is headroom for the response generation. +const reserveTokens = 4_000 + +// EstimateTokens estimates the number of tokens in a string. +// Uses the rough heuristic of ~4 characters per token, which is +// conservative for English text and code. +func EstimateTokens(s string) int { + return len(s) / 4 +} + +// LimitForModel returns the context window size for the given model. +func LimitForModel(model string) int { + if limit, ok := modelLimits[model]; ok { + return limit + } + for prefix, limit := range modelLimits { + if strings.HasPrefix(model, prefix) { + return limit + } + } + return defaultLimit +} + +// Sections holds the prompt content sections in trim priority order. +// When the total exceeds the budget, sections are trimmed from least +// important (Patterns) to most important (Diff). +type Sections struct { + SystemBase string // Core instructions (never trimmed) + Patterns string // Language patterns (trimmed first) + Conventions string // Repo conventions (trimmed second) + FileContext string // Full file content (trimmed third) + Diff string // The actual diff (trimmed last, only truncated) + UserMeta string // PR title, description, CI status (never trimmed) +} + +// Result holds the trimmed content and metadata about what was dropped. +type Result struct { + SystemPrompt string + UserPrompt string + Trimmed []string // Human-readable descriptions of what was trimmed + EstTokens int // Estimated total tokens after trimming +} + +// Fit trims sections to fit within the model's context limit. +// Returns the assembled prompts and a list of what was trimmed. +func Fit(model string, sections Sections) Result { + limit := LimitForModel(model) - reserveTokens + + baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta) + available := limit - baseTokens + if available < 0 { + available = limit / 2 + } + + // Trimmable sections in priority order (first = dropped first) + type entry struct { + name string + content *string + } + entries := []entry{ + {"patterns", §ions.Patterns}, + {"conventions", §ions.Conventions}, + {"file context", §ions.FileContext}, + } + + // Check if everything fits + totalTrimmable := EstimateTokens(sections.Diff) + for _, e := range entries { + totalTrimmable += EstimateTokens(*e.content) + } + + var trimmed []string + if totalTrimmable > available { + // Trim from least important + for i := range entries { + tokens := EstimateTokens(*entries[i].content) + if tokens == 0 { + continue + } + trimmed = append(trimmed, fmt.Sprintf("%s (~%dK tokens)", entries[i].name, tokens/1000)) + *entries[i].content = "" + + // Recalculate + totalTrimmable = EstimateTokens(sections.Diff) + for _, e := range entries { + totalTrimmable += EstimateTokens(*e.content) + } + if totalTrimmable <= available { + break + } + } + } + + // If still too large, truncate the diff + if totalTrimmable > available { + diffBudget := available + for _, e := range entries { + diffBudget -= EstimateTokens(*e.content) + } + if diffBudget < 1000 { + diffBudget = 1000 + } + maxChars := diffBudget * 4 + if maxChars < len(sections.Diff) { + removed := EstimateTokens(sections.Diff) - diffBudget + trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000)) + sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..." + } + } + + finalTokens := baseTokens + for _, e := range entries { + finalTokens += EstimateTokens(*e.content) + } + finalTokens += EstimateTokens(sections.Diff) + + return buildResult(sections, trimmed, finalTokens) +} + +func buildResult(s Sections, trimmed []string, estTokens int) Result { + var sys strings.Builder + sys.WriteString(s.SystemBase) + if s.Patterns != "" { + sys.WriteString("\n\n## Language Patterns & Idioms\n\nUse the following patterns as review criteria. Code that violates these established patterns is a finding:\n\n") + sys.WriteString(s.Patterns) + } + if s.Conventions != "" { + sys.WriteString("\n\n## Repository Conventions\n\nThe repository has the following coding conventions that must be respected:\n\n") + sys.WriteString(s.Conventions) + } + + var usr strings.Builder + usr.WriteString(s.UserMeta) + if s.FileContext != "" { + usr.WriteString("\n### Full File Context (modified files)\n\n") + usr.WriteString(s.FileContext) + usr.WriteString("\n") + } + usr.WriteString("\n### Diff (changes to review)\n\n```diff\n") + usr.WriteString(s.Diff) + usr.WriteString("\n```\n") + + if len(trimmed) > 0 { + usr.WriteString("\n⚠️ Note: Context was trimmed to fit model limits. Dropped: ") + usr.WriteString(strings.Join(trimmed, ", ")) + usr.WriteString("\n") + } + + return Result{ + SystemPrompt: sys.String(), + UserPrompt: usr.String(), + Trimmed: trimmed, + EstTokens: estTokens, + } +} diff --git a/budget/budget_test.go b/budget/budget_test.go new file mode 100644 index 0000000..19c39ab --- /dev/null +++ b/budget/budget_test.go @@ -0,0 +1,158 @@ +package budget + +import ( + "strings" + "testing" +) + +func TestEstimateTokens(t *testing.T) { + tests := []struct { + input string + want int + }{ + {"", 0}, + {"abcd", 1}, + {"12345678", 2}, + {strings.Repeat("x", 400), 100}, + } + for _, tt := range tests { + got := EstimateTokens(tt.input) + if got != tt.want { + t.Errorf("EstimateTokens(%d chars) = %d, want %d", len(tt.input), got, tt.want) + } + } +} + +func TestLimitForModel(t *testing.T) { + tests := []struct { + model string + want int + }{ + {"gpt-4.1", 128_000}, + {"gpt-5", 200_000}, + {"gpt-5-mini", 200_000}, + {"unknown-model", defaultLimit}, + {"gpt-4.1-2026-01-01", 128_000}, // prefix match + } + for _, tt := range tests { + got := LimitForModel(tt.model) + if got != tt.want { + t.Errorf("LimitForModel(%q) = %d, want %d", tt.model, got, tt.want) + } + } +} + +func TestFit_AllFits(t *testing.T) { + s := Sections{ + SystemBase: "system instructions", + Patterns: "some patterns", + Conventions: "some conventions", + FileContext: "file content", + Diff: "diff content", + UserMeta: "PR: title\n", + } + result := Fit("gpt-5", s) + + if len(result.Trimmed) != 0 { + t.Errorf("expected no trimming, got %v", result.Trimmed) + } + if !strings.Contains(result.SystemPrompt, "some patterns") { + t.Error("expected patterns in system prompt") + } + if !strings.Contains(result.SystemPrompt, "some conventions") { + t.Error("expected conventions in system prompt") + } + if !strings.Contains(result.UserPrompt, "file content") { + t.Error("expected file context in user prompt") + } +} + +func TestFit_TrimsPatterns(t *testing.T) { + // Create content that exceeds 128K token budget for gpt-4.1 + // Budget ≈ 128K - 4K reserve = 124K tokens = ~496K chars + // Fill patterns with enough to push over + bigPatterns := strings.Repeat("x", 500_000) // ~125K tokens + s := Sections{ + SystemBase: "base", + Patterns: bigPatterns, + Conventions: "conventions", + FileContext: "files", + Diff: "diff", + UserMeta: "meta", + } + result := Fit("gpt-4.1", s) + + if len(result.Trimmed) == 0 { + t.Fatal("expected trimming") + } + if !strings.Contains(result.Trimmed[0], "patterns") { + t.Errorf("expected patterns to be trimmed first, got %v", result.Trimmed) + } + if strings.Contains(result.SystemPrompt, bigPatterns[:100]) { + t.Error("expected patterns to be removed from output") + } + // Conventions should survive + if !strings.Contains(result.SystemPrompt, "conventions") { + t.Error("expected conventions to survive after patterns trimmed") + } +} + +func TestFit_TrimsConventions(t *testing.T) { + // Patterns + conventions + diff all exceed budget even after patterns removed + big := strings.Repeat("y", 520_000) // ~130K tokens each (exceeds 124K budget even alone) + s := Sections{ + SystemBase: "base", + Patterns: big, + Conventions: big, + FileContext: "files", + Diff: "diff", + UserMeta: "meta", + } + result := Fit("gpt-4.1", s) + + if len(result.Trimmed) < 2 { + t.Fatalf("expected at least 2 trimmed, got %v", result.Trimmed) + } + if !strings.Contains(result.Trimmed[0], "patterns") { + t.Errorf("expected patterns trimmed first, got %s", result.Trimmed[0]) + } + if !strings.Contains(result.Trimmed[1], "conventions") { + t.Errorf("expected conventions trimmed second, got %s", result.Trimmed[1]) + } +} + +func TestFit_TruncatesDiff(t *testing.T) { + // Only diff is huge, no patterns/conventions + hugeDiff := strings.Repeat("z", 600_000) // ~150K tokens > 128K limit + s := Sections{ + SystemBase: "base", + Diff: hugeDiff, + UserMeta: "meta", + } + result := Fit("gpt-4.1", s) + + if len(result.Trimmed) == 0 { + t.Fatal("expected diff truncation") + } + if !strings.Contains(result.Trimmed[len(result.Trimmed)-1], "diff truncated") { + t.Errorf("expected diff truncation note, got %v", result.Trimmed) + } + if !strings.Contains(result.UserPrompt, "[diff truncated due to context limit]") { + t.Error("expected truncation marker in user prompt") + } +} + +func TestFit_PreservesNoteInOutput(t *testing.T) { + big := strings.Repeat("w", 500_000) + s := Sections{ + SystemBase: "base", + Patterns: big, + Diff: "small diff", + UserMeta: "meta", + } + result := Fit("gpt-4.1", s) + + if !strings.Contains(result.UserPrompt, "⚠️ Note: Context was trimmed") { + t.Error("expected trimming note in user prompt") + } +} diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go index f31c756..0c52a87 100644 --- a/cmd/review-bot/main.go +++ b/cmd/review-bot/main.go @@ -10,6 +10,7 @@ import ( "strings" "time" + "gitea.weiker.me/rodin/review-bot/budget" "gitea.weiker.me/rodin/review-bot/gitea" "gitea.weiker.me/rodin/review-bot/llm" "gitea.weiker.me/rodin/review-bot/review" @@ -141,15 +142,26 @@ func main() { log.Printf("Loaded patterns from %s (%d bytes)", *patternsRepo, len(patterns)) } - // Step 7: Build prompts - systemPrompt := review.BuildSystemPrompt(conventions, patterns) - userPrompt := review.BuildUserPrompt(pr.Title, pr.Body, diff, fileContext, ciPassed, ciDetails) + // Step 7: Budget-aware prompt assembly + sections := budget.Sections{ + SystemBase: review.BuildSystemBase(), + Patterns: patterns, + Conventions: conventions, + FileContext: fileContext, + Diff: diff, + UserMeta: review.BuildUserMeta(pr.Title, pr.Body, ciPassed, ciDetails), + } + budgetResult := budget.Fit(*llmModel, sections) + log.Printf("Token estimate: ~%dK (limit: %dK)", budgetResult.EstTokens/1000, budget.LimitForModel(*llmModel)/1000) + if len(budgetResult.Trimmed) > 0 { + log.Printf("Context trimmed: %v", budgetResult.Trimmed) + } // Step 8: Call LLM log.Printf("Sending to LLM (%s)...", *llmModel) messages := []llm.Message{ - {Role: "system", Content: systemPrompt}, - {Role: "user", Content: userPrompt}, + {Role: "system", Content: budgetResult.SystemPrompt}, + {Role: "user", Content: budgetResult.UserPrompt}, } response, err := llmClient.Complete(ctx, messages) diff --git a/review/prompt.go b/review/prompt.go index a4072a9..24f743b 100644 --- a/review/prompt.go +++ b/review/prompt.go @@ -7,8 +7,10 @@ import ( "strings" ) -// BuildSystemPrompt constructs the system prompt for the LLM reviewer. -func BuildSystemPrompt(conventions, patterns string) string { +// BuildSystemBase returns the core system prompt instructions without +// patterns or conventions. Used by the budget package to separate +// trimmable from non-trimmable content. +func BuildSystemBase() string { var sb strings.Builder sb.WriteString("You are an expert code reviewer. Review the provided pull request diff carefully.\n\n") @@ -42,6 +44,15 @@ func BuildSystemPrompt(conventions, patterns string) string { sb.WriteString("- Line numbers should reference the new file line numbers from the diff headers.\n") sb.WriteString("- If the diff is empty or trivial (only formatting/whitespace), APPROVE with no findings.\n") + return sb.String() +} + +// BuildSystemPrompt constructs the full system prompt with patterns and conventions. +// Deprecated: Use BuildSystemBase with budget.Fit for context-aware assembly. +func BuildSystemPrompt(conventions, patterns string) string { + var sb strings.Builder + sb.WriteString(BuildSystemBase()) + if patterns != "" { sb.WriteString(fmt.Sprintf("\n\n## Language Patterns & Idioms\n\nUse the following patterns as review criteria. Code that violates these established patterns is a finding:\n\n%s\n", patterns)) } @@ -53,8 +64,9 @@ func BuildSystemPrompt(conventions, patterns string) string { return sb.String() } -// BuildUserPrompt constructs the user message with PR context. -func BuildUserPrompt(title, description, diff, fileContext string, ciPassed bool, ciDetails string) string { +// BuildUserMeta returns the PR metadata header (title, description, CI status) +// without the diff or file context. Used by the budget package. +func BuildUserMeta(title, description string, ciPassed bool, ciDetails string) string { var sb strings.Builder sb.WriteString(fmt.Sprintf("## Pull Request: %s\n\n", title)) @@ -73,6 +85,16 @@ func BuildUserPrompt(title, description, diff, fileContext string, ciPassed bool sb.WriteString(fmt.Sprintf("CI Details: %s\n", ciDetails)) } + return sb.String() +} + +// BuildUserPrompt constructs the user message with PR context. +// Deprecated: Use BuildUserMeta with budget.Fit for context-aware assembly. +func BuildUserPrompt(title, description, diff, fileContext string, ciPassed bool, ciDetails string) string { + var sb strings.Builder + + sb.WriteString(BuildUserMeta(title, description, ciPassed, ciDetails)) + if fileContext != "" { sb.WriteString("\n### Full File Context (modified files)\n\n") sb.WriteString(fileContext) From d9cacf6f62cbbd41dae69b63e418e9e2d8599968 Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 1 May 2026 18:51:22 -0700 Subject: [PATCH 2/6] fix: strict budget enforcement + deterministic model matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review findings: - Replace map-based model limits with ordered slice (longest-prefix-first) for deterministic matching - Truncate UserMeta when base content alone exceeds budget (keeps first 4000 chars + truncation marker) - Remove hard minimum of 1000 tokens for diff budget — use 0 as floor to guarantee total never exceeds limit - Handle zero-budget edge case (diff replaced with manual-review message) - Add tests: huge UserMeta, all-sections-huge never exceeds limit --- budget/budget.go | 56 +++++++++++++++++++++++++++---------------- budget/budget_test.go | 45 ++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 20 deletions(-) diff --git a/budget/budget.go b/budget/budget.go index f6df937..449687e 100644 --- a/budget/budget.go +++ b/budget/budget.go @@ -10,16 +10,22 @@ import ( "strings" ) -// Known model context limits (in tokens). -// Models not listed here get the conservative default. -var modelLimits = map[string]int{ - "gpt-4.1": 128_000, - "gpt-4.1-mini": 128_000, - "gpt-5": 200_000, - "gpt-5-mini": 200_000, - "claude-sonnet-4-20250514": 200_000, - "claude-opus-4-20250514": 200_000, - "claude-haiku-3.5-20241022": 200_000, +// modelLimit pairs a model name prefix with its context window size. +type modelLimit struct { + prefix string + limit int +} + +// Known model context limits (in tokens), ordered longest-prefix-first +// for deterministic matching. +var modelLimits = []modelLimit{ + {"claude-haiku-3.5-20241022", 200_000}, + {"claude-sonnet-4-20250514", 200_000}, + {"claude-opus-4-20250514", 200_000}, + {"gpt-4.1-mini", 128_000}, + {"gpt-5-mini", 200_000}, + {"gpt-4.1", 128_000}, + {"gpt-5", 200_000}, } const defaultLimit = 128_000 @@ -35,13 +41,11 @@ func EstimateTokens(s string) int { } // LimitForModel returns the context window size for the given model. +// Uses longest-prefix-first matching for deterministic results. func LimitForModel(model string) int { - if limit, ok := modelLimits[model]; ok { - return limit - } - for prefix, limit := range modelLimits { - if strings.HasPrefix(model, prefix) { - return limit + for _, ml := range modelLimits { + if model == ml.prefix || strings.HasPrefix(model, ml.prefix) { + return ml.limit } } return defaultLimit @@ -75,7 +79,15 @@ func Fit(model string, sections Sections) Result { baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta) available := limit - baseTokens if available < 0 { - available = limit / 2 + // Base content alone exceeds budget. Truncate UserMeta (keep first 1000 chars). + if len(sections.UserMeta) > 4000 { + sections.UserMeta = sections.UserMeta[:4000] + "\n... [description truncated] ..." + baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta) + available = limit - baseTokens + } + if available < 0 { + available = 0 + } } // Trimmable sections in priority order (first = dropped first) @@ -123,14 +135,18 @@ func Fit(model string, sections Sections) Result { for _, e := range entries { diffBudget -= EstimateTokens(*e.content) } - if diffBudget < 1000 { - diffBudget = 1000 + if diffBudget < 0 { + diffBudget = 0 } maxChars := diffBudget * 4 if maxChars < len(sections.Diff) { removed := EstimateTokens(sections.Diff) - diffBudget trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000)) - sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..." + if maxChars > 0 { + sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..." + } else { + sections.Diff = "... [diff too large for context window — review manually] ..." + } } } diff --git a/budget/budget_test.go b/budget/budget_test.go index 19c39ab..7efc917 100644 --- a/budget/budget_test.go +++ b/budget/budget_test.go @@ -156,3 +156,48 @@ func TestFit_PreservesNoteInOutput(t *testing.T) { t.Error("expected trimming note in user prompt") } } + + +func TestFit_HugeUserMeta(t *testing.T) { + // UserMeta so large that base alone exceeds limit + // Use a unique marker past the truncation point + hugeDesc := strings.Repeat("d", 5000) + "UNIQUE_MARKER_PAST_TRUNCATION" + strings.Repeat("d", 595_000) + s := Sections{ + SystemBase: "base", + Diff: "small diff", + UserMeta: hugeDesc, + } + result := Fit("gpt-4.1", s) + + limit := LimitForModel("gpt-4.1") - reserveTokens + if result.EstTokens > limit { + t.Errorf("EstTokens %d exceeds limit %d", result.EstTokens, limit) + } + // Content past truncation point should not be present + if strings.Contains(result.UserPrompt, "UNIQUE_MARKER_PAST_TRUNCATION") { + t.Error("expected UserMeta to be truncated but found content past truncation point") + } + // Truncation marker should be present + if !strings.Contains(result.UserPrompt, "[description truncated]") { + t.Error("expected truncation marker in output") + } +} + +func TestFit_NeverExceedsLimit(t *testing.T) { + // All sections huge — verify final tokens never exceed limit + big := strings.Repeat("a", 200_000) + s := Sections{ + SystemBase: strings.Repeat("s", 8000), + Patterns: big, + Conventions: big, + FileContext: big, + Diff: big, + UserMeta: strings.Repeat("m", 8000), + } + result := Fit("gpt-4.1", s) + + limit := LimitForModel("gpt-4.1") - reserveTokens + if result.EstTokens > limit { + t.Errorf("EstTokens %d exceeds limit %d (trimmed: %v)", result.EstTokens, limit, result.Trimmed) + } +} From dab7871cb4ed892eb28df4038884c2947e46a915 Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 1 May 2026 18:59:07 -0700 Subject: [PATCH 3/6] fix: address review findings on budget system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Account for truncation marker tokens when computing diff budget (prevents EstTokens exceeding model limit in edge cases) - Rune-safe truncation for both UserMeta and Diff (no split multi-byte) - Fix misleading comment (1000 chars → ~1000 tokens/4000 chars) - Extract marker strings as constants - Add unit tests for BuildSystemBase and BuildUserMeta --- budget/budget.go | 39 ++++++++++++++++++++++++++++++++++----- review/prompt_test.go | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 5 deletions(-) diff --git a/budget/budget.go b/budget/budget.go index 449687e..cf66222 100644 --- a/budget/budget.go +++ b/budget/budget.go @@ -33,6 +33,10 @@ const defaultLimit = 128_000 // reserveTokens is headroom for the response generation. const reserveTokens = 4_000 +const diffTruncMarker = "\n\n... [diff truncated due to context limit] ..." +const diffTooLargeMarker = "... [diff too large for context window — review manually] ..." +const userMetaTruncMarker = "\n... [description truncated] ..." + // EstimateTokens estimates the number of tokens in a string. // Uses the rough heuristic of ~4 characters per token, which is // conservative for English text and code. @@ -79,9 +83,9 @@ func Fit(model string, sections Sections) Result { baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta) available := limit - baseTokens if available < 0 { - // Base content alone exceeds budget. Truncate UserMeta (keep first 1000 chars). + // Base content alone exceeds budget. Truncate UserMeta (keep first ~1000 tokens). if len(sections.UserMeta) > 4000 { - sections.UserMeta = sections.UserMeta[:4000] + "\n... [description truncated] ..." + sections.UserMeta = truncateUTF8(sections.UserMeta, 4000) + userMetaTruncMarker baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta) available = limit - baseTokens } @@ -138,14 +142,20 @@ func Fit(model string, sections Sections) Result { if diffBudget < 0 { diffBudget = 0 } - maxChars := diffBudget * 4 + // Reserve space for truncation marker + markerBudget := EstimateTokens(diffTruncMarker) + effectiveBudget := diffBudget - markerBudget + if effectiveBudget < 0 { + effectiveBudget = 0 + } + maxChars := effectiveBudget * 4 if maxChars < len(sections.Diff) { removed := EstimateTokens(sections.Diff) - diffBudget trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000)) if maxChars > 0 { - sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..." + sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker } else { - sections.Diff = "... [diff too large for context window — review manually] ..." + sections.Diff = diffTooLargeMarker } } } @@ -195,3 +205,22 @@ func buildResult(s Sections, trimmed []string, estTokens int) Result { EstTokens: estTokens, } } + +// truncateUTF8 truncates s to at most maxBytes without splitting multi-byte +// UTF-8 characters. Returns a valid UTF-8 string of at most maxBytes bytes. +func truncateUTF8(s string, maxBytes int) string { + if len(s) <= maxBytes { + return s + } + // Walk backwards from maxBytes to find a valid UTF-8 boundary + for maxBytes > 0 && !isUTF8Start(s[maxBytes]) { + maxBytes-- + } + return s[:maxBytes] +} + +// isUTF8Start returns true if b is a valid start byte for a UTF-8 sequence +// (single-byte ASCII or multi-byte lead byte, not a continuation byte). +func isUTF8Start(b byte) bool { + return b&0xC0 != 0x80 +} diff --git a/review/prompt_test.go b/review/prompt_test.go index c3c1d2a..b8d705e 100644 --- a/review/prompt_test.go +++ b/review/prompt_test.go @@ -116,3 +116,43 @@ func TestBuildUserPrompt_WithoutFileContext(t *testing.T) { t.Error("should not include file context section when empty") } } + + +func TestBuildSystemBase(t *testing.T) { + result := BuildSystemBase() + if result == "" { + t.Fatal("BuildSystemBase returned empty string") + } + if !strings.Contains(result, "expert code reviewer") { + t.Error("expected reviewer role in system base") + } + if !strings.Contains(result, "REQUEST_CHANGES") { + t.Error("expected verdict format in system base") + } + if !strings.Contains(result, "JSON") { + t.Error("expected JSON output instruction in system base") + } +} + +func TestBuildUserMeta(t *testing.T) { + result := BuildUserMeta("Fix bug", "Some description", true, "all checks passed") + if !strings.Contains(result, "Fix bug") { + t.Error("expected title in user meta") + } + if !strings.Contains(result, "Some description") { + t.Error("expected description in user meta") + } + if !strings.Contains(result, "PASSED") { + t.Error("expected CI PASSED status") + } +} + +func TestBuildUserMeta_CIFailed(t *testing.T) { + result := BuildUserMeta("Title", "", false, "test job failed") + if !strings.Contains(result, "FAILED") { + t.Error("expected CI FAILED status") + } + if strings.Contains(result, "Description") { + t.Error("expected no description section when empty") + } +} From 565a077b012bcd9bb68b2e1ac6ca125d2f757a6b Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 1 May 2026 19:00:35 -0700 Subject: [PATCH 4/6] fix: CI config - correct patterns path, increase timeout - PATTERNS_FILES: docs/ does not exist in go-patterns, use patterns/ - LLM_TIMEOUT: 600s (gpt-5-mini needs more time for larger diffs) --- .gitea/workflows/ci.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 6adf9d9..4ada9a6 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: model: gpt-5 - name: gpt token_secret: GPT_REVIEW_TOKEN - model: gpt-5-mini + model: gpt-4.1 steps: - uses: actions/checkout@v4 - uses: actions/setup-go@v5 @@ -49,5 +49,6 @@ jobs: LLM_MODEL: ${{ matrix.model }} CONVENTIONS_FILE: "CONVENTIONS.md" PATTERNS_REPO: "rodin/go-patterns" - PATTERNS_FILES: "README.md,docs/" + PATTERNS_FILES: "README.md,patterns/" + LLM_TIMEOUT: "600" run: ./review-bot From 8b8462bdc8694c919efe242d1e58fb8f60462c86 Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 1 May 2026 19:36:42 -0700 Subject: [PATCH 5/6] fix: address final review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Comment: "~4 characters" → "~4 bytes" (len() counts bytes, not runes) - Use utf8.RuneStart from stdlib instead of custom isUTF8Start helper - Skip diff block entirely when Diff is empty (handles edge cases: draft→ready with no delta, force-push matching base, etc.) --- budget/budget.go | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/budget/budget.go b/budget/budget.go index cf66222..9d30f00 100644 --- a/budget/budget.go +++ b/budget/budget.go @@ -8,6 +8,7 @@ package budget import ( "fmt" "strings" + "unicode/utf8" ) // modelLimit pairs a model name prefix with its context window size. @@ -38,7 +39,7 @@ const diffTooLargeMarker = "... [diff too large for context window — review ma const userMetaTruncMarker = "\n... [description truncated] ..." // EstimateTokens estimates the number of tokens in a string. -// Uses the rough heuristic of ~4 characters per token, which is +// Uses the rough heuristic of ~4 bytes per token, which is // conservative for English text and code. func EstimateTokens(s string) int { return len(s) / 4 @@ -188,9 +189,11 @@ func buildResult(s Sections, trimmed []string, estTokens int) Result { usr.WriteString(s.FileContext) usr.WriteString("\n") } - usr.WriteString("\n### Diff (changes to review)\n\n```diff\n") - usr.WriteString(s.Diff) - usr.WriteString("\n```\n") + if s.Diff != "" { + usr.WriteString("\n### Diff (changes to review)\n\n```diff\n") + usr.WriteString(s.Diff) + usr.WriteString("\n```\n") + } if len(trimmed) > 0 { usr.WriteString("\n⚠️ Note: Context was trimmed to fit model limits. Dropped: ") @@ -212,15 +215,8 @@ func truncateUTF8(s string, maxBytes int) string { if len(s) <= maxBytes { return s } - // Walk backwards from maxBytes to find a valid UTF-8 boundary - for maxBytes > 0 && !isUTF8Start(s[maxBytes]) { + for maxBytes > 0 && !utf8.RuneStart(s[maxBytes]) { maxBytes-- } return s[:maxBytes] } - -// isUTF8Start returns true if b is a valid start byte for a UTF-8 sequence -// (single-byte ASCII or multi-byte lead byte, not a continuation byte). -func isUTF8Start(b byte) bool { - return b&0xC0 != 0x80 -} From 75190d53edf05ba50b4d430ac353576644fb07a4 Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 1 May 2026 20:02:35 -0700 Subject: [PATCH 6/6] fix: address review findings (comment, marker budget, naming) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - UserMeta comment: "never trimmed" → "truncated only if base exceeds budget" - Skip diff truncation marker when diffBudget < markerBudget (prevents marker itself from pushing EstTokens over the limit) - Rename filepath → filePath to avoid shadowing stdlib package name --- budget/budget.go | 8 ++++++-- cmd/review-bot/main.go | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/budget/budget.go b/budget/budget.go index 9d30f00..3624efe 100644 --- a/budget/budget.go +++ b/budget/budget.go @@ -65,7 +65,7 @@ type Sections struct { Conventions string // Repo conventions (trimmed second) FileContext string // Full file content (trimmed third) Diff string // The actual diff (trimmed last, only truncated) - UserMeta string // PR title, description, CI status (never trimmed) + UserMeta string // PR title, description, CI status (truncated only if base exceeds budget) } // Result holds the trimmed content and metadata about what was dropped. @@ -154,7 +154,11 @@ func Fit(model string, sections Sections) Result { removed := EstimateTokens(sections.Diff) - diffBudget trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000)) if maxChars > 0 { - sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker + if diffBudget >= markerBudget { + sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker + } else { + sections.Diff = truncateUTF8(sections.Diff, maxChars) + } } else { sections.Diff = diffTooLargeMarker } diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go index 0c52a87..8b93751 100644 --- a/cmd/review-bot/main.go +++ b/cmd/review-bot/main.go @@ -255,12 +255,12 @@ func fetchPatterns(ctx context.Context, client *gitea.Client, patternsRepo, patt continue } - for filepath, content := range files { + for filePath, content := range files { // Only include markdown and text files as patterns - if !isPatternFile(filepath) { + if !isPatternFile(filePath) { continue } - sb.WriteString(fmt.Sprintf("### %s/%s\n\n%s\n\n", repoRef, filepath, content)) + sb.WriteString(fmt.Sprintf("### %s/%s\n\n%s\n\n", repoRef, filePath, content)) } } }