feat: add context budget system for LLM overflow (#19)
CI / test (pull_request) Successful in 13s
CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 1m30s
CI / review (gpt-5-mini, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 2m29s

Adds a budget package that estimates token usage and progressively
trims context to fit within model-specific limits.

Trim order (least important first):
1. Language patterns
2. Repository conventions
3. Full file context
4. Diff (truncated as last resort)

When content is trimmed, a note is appended to the user prompt so
the LLM knows context was reduced.

- New budget package with Fit(), EstimateTokens(), LimitForModel()
- Model limit table (GPT-4.1: 128K, GPT-5: 200K, Claude: 200K)
- Refactored review/prompt.go: BuildSystemBase() and BuildUserMeta()
  extract non-trimmable content; old functions delegate to new ones
- main.go uses budget.Fit() instead of direct prompt assembly
- 7 unit tests covering all trim paths

Closes #19
This commit is contained in:
Rodin
2026-05-01 18:46:53 -07:00
parent ef3e6d5e87
commit 67d835909f
4 changed files with 382 additions and 9 deletions
+181
View File
@@ -0,0 +1,181 @@
// Package budget manages LLM context window budgeting for review-bot.
//
// It estimates token usage and progressively trims context content to fit
// within model-specific limits. The trimming order (least important first):
// patterns → conventions → file context → diff truncation.
package budget
import (
"fmt"
"strings"
)
// Known model context limits (in tokens).
// Models not listed here get the conservative default.
var modelLimits = map[string]int{
"gpt-4.1": 128_000,
"gpt-4.1-mini": 128_000,
"gpt-5": 200_000,
"gpt-5-mini": 200_000,
"claude-sonnet-4-20250514": 200_000,
"claude-opus-4-20250514": 200_000,
"claude-haiku-3.5-20241022": 200_000,
}
const defaultLimit = 128_000
// reserveTokens is headroom for the response generation.
const reserveTokens = 4_000
// EstimateTokens estimates the number of tokens in a string.
// Uses the rough heuristic of ~4 characters per token, which is
// conservative for English text and code.
func EstimateTokens(s string) int {
return len(s) / 4
}
// LimitForModel returns the context window size for the given model.
func LimitForModel(model string) int {
if limit, ok := modelLimits[model]; ok {
return limit
}
for prefix, limit := range modelLimits {
if strings.HasPrefix(model, prefix) {
return limit
}
}
return defaultLimit
}
// Sections holds the prompt content sections in trim priority order.
// When the total exceeds the budget, sections are trimmed from least
// important (Patterns) to most important (Diff).
type Sections struct {
SystemBase string // Core instructions (never trimmed)
Patterns string // Language patterns (trimmed first)
Conventions string // Repo conventions (trimmed second)
FileContext string // Full file content (trimmed third)
Diff string // The actual diff (trimmed last, only truncated)
UserMeta string // PR title, description, CI status (never trimmed)
}
// Result holds the trimmed content and metadata about what was dropped.
type Result struct {
SystemPrompt string
UserPrompt string
Trimmed []string // Human-readable descriptions of what was trimmed
EstTokens int // Estimated total tokens after trimming
}
// Fit trims sections to fit within the model's context limit.
// Returns the assembled prompts and a list of what was trimmed.
func Fit(model string, sections Sections) Result {
limit := LimitForModel(model) - reserveTokens
baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
available := limit - baseTokens
if available < 0 {
available = limit / 2
}
// Trimmable sections in priority order (first = dropped first)
type entry struct {
name string
content *string
}
entries := []entry{
{"patterns", &sections.Patterns},
{"conventions", &sections.Conventions},
{"file context", &sections.FileContext},
}
// Check if everything fits
totalTrimmable := EstimateTokens(sections.Diff)
for _, e := range entries {
totalTrimmable += EstimateTokens(*e.content)
}
var trimmed []string
if totalTrimmable > available {
// Trim from least important
for i := range entries {
tokens := EstimateTokens(*entries[i].content)
if tokens == 0 {
continue
}
trimmed = append(trimmed, fmt.Sprintf("%s (~%dK tokens)", entries[i].name, tokens/1000))
*entries[i].content = ""
// Recalculate
totalTrimmable = EstimateTokens(sections.Diff)
for _, e := range entries {
totalTrimmable += EstimateTokens(*e.content)
}
if totalTrimmable <= available {
break
}
}
}
// If still too large, truncate the diff
if totalTrimmable > available {
diffBudget := available
for _, e := range entries {
diffBudget -= EstimateTokens(*e.content)
}
if diffBudget < 1000 {
diffBudget = 1000
}
maxChars := diffBudget * 4
if maxChars < len(sections.Diff) {
removed := EstimateTokens(sections.Diff) - diffBudget
trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
}
}
finalTokens := baseTokens
for _, e := range entries {
finalTokens += EstimateTokens(*e.content)
}
finalTokens += EstimateTokens(sections.Diff)
return buildResult(sections, trimmed, finalTokens)
}
func buildResult(s Sections, trimmed []string, estTokens int) Result {
var sys strings.Builder
sys.WriteString(s.SystemBase)
if s.Patterns != "" {
sys.WriteString("\n\n## Language Patterns & Idioms\n\nUse the following patterns as review criteria. Code that violates these established patterns is a finding:\n\n")
sys.WriteString(s.Patterns)
}
if s.Conventions != "" {
sys.WriteString("\n\n## Repository Conventions\n\nThe repository has the following coding conventions that must be respected:\n\n")
sys.WriteString(s.Conventions)
}
var usr strings.Builder
usr.WriteString(s.UserMeta)
if s.FileContext != "" {
usr.WriteString("\n### Full File Context (modified files)\n\n")
usr.WriteString(s.FileContext)
usr.WriteString("\n")
}
usr.WriteString("\n### Diff (changes to review)\n\n```diff\n")
usr.WriteString(s.Diff)
usr.WriteString("\n```\n")
if len(trimmed) > 0 {
usr.WriteString("\n⚠️ Note: Context was trimmed to fit model limits. Dropped: ")
usr.WriteString(strings.Join(trimmed, ", "))
usr.WriteString("\n")
}
return Result{
SystemPrompt: sys.String(),
UserPrompt: usr.String(),
Trimmed: trimmed,
EstTokens: estTokens,
}
}