feat: add context budget system for LLM overflow (#19)

Adds a budget package that estimates token usage and progressively trims context to fit within model-specific limits. Trim order (least important first): 1. Language patterns 2. Repository conventions 3. Full file context 4. Diff (truncated as last resort) When content is trimmed, a note is appended to the user prompt so the LLM knows context was reduced. - New budget package with Fit(), EstimateTokens(), LimitForModel() - Model limit table (GPT-4.1: 128K, GPT-5: 200K, Claude: 200K) - Refactored review/prompt.go: BuildSystemBase() and BuildUserMeta() extract non-trimmable content; old functions delegate to new ones - main.go uses budget.Fit() instead of direct prompt assembly - 7 unit tests covering all trim paths Closes #19
2026-05-01 18:46:53 -07:00
parent ef3e6d5e87
commit 67d835909f
4 changed files with 382 additions and 9 deletions
@@ -0,0 +1,181 @@
 // Package budget manages LLM context window budgeting for review-bot.
 //
 // It estimates token usage and progressively trims context content to fit
 // within model-specific limits. The trimming order (least important first):
 // patterns → conventions → file context → diff truncation.
 package budget
 import (
 	"fmt"
 	"strings"
 )
 // Known model context limits (in tokens).
 // Models not listed here get the conservative default.
 var modelLimits = map[string]int{
 	"gpt-4.1":                   128_000,
 	"gpt-4.1-mini":              128_000,
 	"gpt-5":                     200_000,
 	"gpt-5-mini":                200_000,
 	"claude-sonnet-4-20250514":  200_000,
 	"claude-opus-4-20250514":    200_000,
 	"claude-haiku-3.5-20241022": 200_000,
 }
 const defaultLimit = 128_000
 // reserveTokens is headroom for the response generation.
 const reserveTokens = 4_000
 // EstimateTokens estimates the number of tokens in a string.
 // Uses the rough heuristic of ~4 characters per token, which is
 // conservative for English text and code.
 func EstimateTokens(s string) int {
 	return len(s) / 4
 }
 // LimitForModel returns the context window size for the given model.
 func LimitForModel(model string) int {
 	if limit, ok := modelLimits[model]; ok {
 		return limit
 	}
 	for prefix, limit := range modelLimits {
 		if strings.HasPrefix(model, prefix) {
 			return limit
 		}
 	}
 	return defaultLimit
 }
 // Sections holds the prompt content sections in trim priority order.
 // When the total exceeds the budget, sections are trimmed from least
 // important (Patterns) to most important (Diff).
 type Sections struct {
 	SystemBase  string // Core instructions (never trimmed)
 	Patterns    string // Language patterns (trimmed first)
 	Conventions string // Repo conventions (trimmed second)
 	FileContext string // Full file content (trimmed third)
 	Diff        string // The actual diff (trimmed last, only truncated)
 	UserMeta    string // PR title, description, CI status (never trimmed)
 }
 // Result holds the trimmed content and metadata about what was dropped.
 type Result struct {
 	SystemPrompt string
 	UserPrompt   string
 	Trimmed      []string // Human-readable descriptions of what was trimmed
 	EstTokens    int      // Estimated total tokens after trimming
 }
 // Fit trims sections to fit within the model's context limit.
 // Returns the assembled prompts and a list of what was trimmed.
 func Fit(model string, sections Sections) Result {
 	limit := LimitForModel(model) - reserveTokens
 	baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
 	available := limit - baseTokens
 	if available < 0 {
 		available = limit / 2
 	}
 	// Trimmable sections in priority order (first = dropped first)
 	type entry struct {
 		name    string
 		content *string
 	}
 	entries := []entry{
 		{"patterns", &sections.Patterns},
 		{"conventions", &sections.Conventions},
 		{"file context", &sections.FileContext},
 	}
 	// Check if everything fits
 	totalTrimmable := EstimateTokens(sections.Diff)
 	for _, e := range entries {
 		totalTrimmable += EstimateTokens(*e.content)
 	}
 	var trimmed []string
 	if totalTrimmable > available {
 		// Trim from least important
 		for i := range entries {
 			tokens := EstimateTokens(*entries[i].content)
 			if tokens == 0 {
 				continue
 			}
 			trimmed = append(trimmed, fmt.Sprintf("%s (~%dK tokens)", entries[i].name, tokens/1000))
 			*entries[i].content = ""
 			// Recalculate
 			totalTrimmable = EstimateTokens(sections.Diff)
 			for _, e := range entries {
 				totalTrimmable += EstimateTokens(*e.content)
 			}
 			if totalTrimmable <= available {
 				break
 			}
 		}
 	}
 	// If still too large, truncate the diff
 	if totalTrimmable > available {
 		diffBudget := available
 		for _, e := range entries {
 			diffBudget -= EstimateTokens(*e.content)
 		}
 		if diffBudget < 1000 {
 			diffBudget = 1000
 		}
 		maxChars := diffBudget * 4
 		if maxChars < len(sections.Diff) {
 			removed := EstimateTokens(sections.Diff) - diffBudget
 			trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
 			sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
 		}
 	}
 	finalTokens := baseTokens
 	for _, e := range entries {
 		finalTokens += EstimateTokens(*e.content)
 	}
 	finalTokens += EstimateTokens(sections.Diff)
 	return buildResult(sections, trimmed, finalTokens)
 }
 func buildResult(s Sections, trimmed []string, estTokens int) Result {
 	var sys strings.Builder
 	sys.WriteString(s.SystemBase)
 	if s.Patterns != "" {
 		sys.WriteString("\n\n## Language Patterns & Idioms\n\nUse the following patterns as review criteria. Code that violates these established patterns is a finding:\n\n")
 		sys.WriteString(s.Patterns)
 	}
 	if s.Conventions != "" {
 		sys.WriteString("\n\n## Repository Conventions\n\nThe repository has the following coding conventions that must be respected:\n\n")
 		sys.WriteString(s.Conventions)
 	}
 	var usr strings.Builder
 	usr.WriteString(s.UserMeta)
 	if s.FileContext != "" {
 		usr.WriteString("\n### Full File Context (modified files)\n\n")
 		usr.WriteString(s.FileContext)
 		usr.WriteString("\n")
 	}
 	usr.WriteString("\n### Diff (changes to review)\n\n```diff\n")
 	usr.WriteString(s.Diff)
 	usr.WriteString("\n```\n")
 	if len(trimmed) > 0 {
 		usr.WriteString("\n⚠️ Note: Context was trimmed to fit model limits. Dropped: ")
 		usr.WriteString(strings.Join(trimmed, ", "))
 		usr.WriteString("\n")
 	}
 	return Result{
 		SystemPrompt: sys.String(),
 		UserPrompt:   usr.String(),
 		Trimmed:      trimmed,
 		EstTokens:    estTokens,
 	}
 }
@@ -0,0 +1,158 @@
 package budget
 import (
 	"strings"
 	"testing"
 )
 func TestEstimateTokens(t *testing.T) {
 	tests := []struct {
 		input string
 		want  int
 	}{
 		{"", 0},
 		{"abcd", 1},
 		{"12345678", 2},
 		{strings.Repeat("x", 400), 100},
 	}
 	for _, tt := range tests {
 		got := EstimateTokens(tt.input)
 		if got != tt.want {
 			t.Errorf("EstimateTokens(%d chars) = %d, want %d", len(tt.input), got, tt.want)
 		}
 	}
 }
 func TestLimitForModel(t *testing.T) {
 	tests := []struct {
 		model string
 		want  int
 	}{
 		{"gpt-4.1", 128_000},
 		{"gpt-5", 200_000},
 		{"gpt-5-mini", 200_000},
 		{"unknown-model", defaultLimit},
 		{"gpt-4.1-2026-01-01", 128_000}, // prefix match
 	}
 	for _, tt := range tests {
 		got := LimitForModel(tt.model)
 		if got != tt.want {
 			t.Errorf("LimitForModel(%q) = %d, want %d", tt.model, got, tt.want)
 		}
 	}
 }
 func TestFit_AllFits(t *testing.T) {
 	s := Sections{
 		SystemBase:  "system instructions",
 		Patterns:    "some patterns",
 		Conventions: "some conventions",
 		FileContext: "file content",
 		Diff:        "diff content",
 		UserMeta:    "PR: title\n",
 	}
 	result := Fit("gpt-5", s)
 	if len(result.Trimmed) != 0 {
 		t.Errorf("expected no trimming, got %v", result.Trimmed)
 	}
 	if !strings.Contains(result.SystemPrompt, "some patterns") {
 		t.Error("expected patterns in system prompt")
 	}
 	if !strings.Contains(result.SystemPrompt, "some conventions") {
 		t.Error("expected conventions in system prompt")
 	}
 	if !strings.Contains(result.UserPrompt, "file content") {
 		t.Error("expected file context in user prompt")
 	}
 }
 func TestFit_TrimsPatterns(t *testing.T) {
 	// Create content that exceeds 128K token budget for gpt-4.1
 	// Budget ≈ 128K - 4K reserve = 124K tokens = ~496K chars
 	// Fill patterns with enough to push over
 	bigPatterns := strings.Repeat("x", 500_000) // ~125K tokens
 	s := Sections{
 		SystemBase:  "base",
 		Patterns:    bigPatterns,
 		Conventions: "conventions",
 		FileContext: "files",
 		Diff:        "diff",
 		UserMeta:    "meta",
 	}
 	result := Fit("gpt-4.1", s)
 	if len(result.Trimmed) == 0 {
 		t.Fatal("expected trimming")
 	}
 	if !strings.Contains(result.Trimmed[0], "patterns") {
 		t.Errorf("expected patterns to be trimmed first, got %v", result.Trimmed)
 	}
 	if strings.Contains(result.SystemPrompt, bigPatterns[:100]) {
 		t.Error("expected patterns to be removed from output")
 	}
 	// Conventions should survive
 	if !strings.Contains(result.SystemPrompt, "conventions") {
 		t.Error("expected conventions to survive after patterns trimmed")
 	}
 }
 func TestFit_TrimsConventions(t *testing.T) {
 	// Patterns + conventions + diff all exceed budget even after patterns removed
 	big := strings.Repeat("y", 520_000) // ~130K tokens each (exceeds 124K budget even alone)
 	s := Sections{
 		SystemBase:  "base",
 		Patterns:    big,
 		Conventions: big,
 		FileContext: "files",
 		Diff:        "diff",
 		UserMeta:    "meta",
 	}
 	result := Fit("gpt-4.1", s)
 	if len(result.Trimmed) < 2 {
 		t.Fatalf("expected at least 2 trimmed, got %v", result.Trimmed)
 	}
 	if !strings.Contains(result.Trimmed[0], "patterns") {
 		t.Errorf("expected patterns trimmed first, got %s", result.Trimmed[0])
 	}
 	if !strings.Contains(result.Trimmed[1], "conventions") {
 		t.Errorf("expected conventions trimmed second, got %s", result.Trimmed[1])
 	}
 }
 func TestFit_TruncatesDiff(t *testing.T) {
 	// Only diff is huge, no patterns/conventions
 	hugeDiff := strings.Repeat("z", 600_000) // ~150K tokens > 128K limit
 	s := Sections{
 		SystemBase: "base",
 		Diff:       hugeDiff,
 		UserMeta:   "meta",
 	}
 	result := Fit("gpt-4.1", s)
 	if len(result.Trimmed) == 0 {
 		t.Fatal("expected diff truncation")
 	}
 	if !strings.Contains(result.Trimmed[len(result.Trimmed)-1], "diff truncated") {
 		t.Errorf("expected diff truncation note, got %v", result.Trimmed)
 	}
 	if !strings.Contains(result.UserPrompt, "[diff truncated due to context limit]") {
 		t.Error("expected truncation marker in user prompt")
 	}
 }
 func TestFit_PreservesNoteInOutput(t *testing.T) {
 	big := strings.Repeat("w", 500_000)
 	s := Sections{
 		SystemBase: "base",
 		Patterns:   big,
 		Diff:       "small diff",
 		UserMeta:   "meta",
 	}
 	result := Fit("gpt-4.1", s)
 	if !strings.Contains(result.UserPrompt, "⚠️ Note: Context was trimmed") {
 		t.Error("expected trimming note in user prompt")
 	}
 }
@@ -10,6 +10,7 @@ import (
 	"strings"
 	"time"
 	"gitea.weiker.me/rodin/review-bot/budget"
 	"gitea.weiker.me/rodin/review-bot/gitea"
 	"gitea.weiker.me/rodin/review-bot/llm"
 	"gitea.weiker.me/rodin/review-bot/review"
@@ -141,15 +142,26 @@ func main() {
 		log.Printf("Loaded patterns from %s (%d bytes)", *patternsRepo, len(patterns))
 	}
-	// Step 7: Build prompts
+	// Step 7: Budget-aware prompt assembly
-	systemPrompt := review.BuildSystemPrompt(conventions, patterns)
+	sections := budget.Sections{
-	userPrompt := review.BuildUserPrompt(pr.Title, pr.Body, diff, fileContext, ciPassed, ciDetails)
+		SystemBase:  review.BuildSystemBase(),
 		Patterns:    patterns,
 		Conventions: conventions,
 		FileContext: fileContext,
 		Diff:        diff,
 		UserMeta:    review.BuildUserMeta(pr.Title, pr.Body, ciPassed, ciDetails),
 	}
 	budgetResult := budget.Fit(*llmModel, sections)
 	log.Printf("Token estimate: ~%dK (limit: %dK)", budgetResult.EstTokens/1000, budget.LimitForModel(*llmModel)/1000)
 	if len(budgetResult.Trimmed) > 0 {
 		log.Printf("Context trimmed: %v", budgetResult.Trimmed)
 	}
 	// Step 8: Call LLM
 	log.Printf("Sending to LLM (%s)...", *llmModel)
 	messages := []llm.Message{
-		{Role: "system", Content: systemPrompt},
+		{Role: "system", Content: budgetResult.SystemPrompt},
-		{Role: "user", Content: userPrompt},
+		{Role: "user", Content: budgetResult.UserPrompt},
 	}
 	response, err := llmClient.Complete(ctx, messages)
@@ -7,8 +7,10 @@ import (
 	"strings"
 )
-// BuildSystemPrompt constructs the system prompt for the LLM reviewer.
+// BuildSystemBase returns the core system prompt instructions without
-func BuildSystemPrompt(conventions, patterns string) string {
+// patterns or conventions. Used by the budget package to separate
 // trimmable from non-trimmable content.
 func BuildSystemBase() string {
 	var sb strings.Builder
 	sb.WriteString("You are an expert code reviewer. Review the provided pull request diff carefully.\n\n")
@@ -42,6 +44,15 @@ func BuildSystemPrompt(conventions, patterns string) string {
 	sb.WriteString("- Line numbers should reference the new file line numbers from the diff headers.\n")
 	sb.WriteString("- If the diff is empty or trivial (only formatting/whitespace), APPROVE with no findings.\n")
 	return sb.String()
 }
 // BuildSystemPrompt constructs the full system prompt with patterns and conventions.
 // Deprecated: Use BuildSystemBase with budget.Fit for context-aware assembly.
 func BuildSystemPrompt(conventions, patterns string) string {
 	var sb strings.Builder
 	sb.WriteString(BuildSystemBase())
 	if patterns != "" {
 		sb.WriteString(fmt.Sprintf("\n\n## Language Patterns & Idioms\n\nUse the following patterns as review criteria. Code that violates these established patterns is a finding:\n\n%s\n", patterns))
 	}
@@ -53,8 +64,9 @@ func BuildSystemPrompt(conventions, patterns string) string {
 	return sb.String()
 }
-// BuildUserPrompt constructs the user message with PR context.
+// BuildUserMeta returns the PR metadata header (title, description, CI status)
-func BuildUserPrompt(title, description, diff, fileContext string, ciPassed bool, ciDetails string) string {
+// without the diff or file context. Used by the budget package.
 func BuildUserMeta(title, description string, ciPassed bool, ciDetails string) string {
 	var sb strings.Builder
 	sb.WriteString(fmt.Sprintf("## Pull Request: %s\n\n", title))
@@ -73,6 +85,16 @@ func BuildUserPrompt(title, description, diff, fileContext string, ciPassed bool
 		sb.WriteString(fmt.Sprintf("CI Details: %s\n", ciDetails))
 	}
 	return sb.String()
 }
 // BuildUserPrompt constructs the user message with PR context.
 // Deprecated: Use BuildUserMeta with budget.Fit for context-aware assembly.
 func BuildUserPrompt(title, description, diff, fileContext string, ciPassed bool, ciDetails string) string {
 	var sb strings.Builder
 	sb.WriteString(BuildUserMeta(title, description, ciPassed, ciDetails))
 	if fileContext != "" {
 		sb.WriteString("\n### Full File Context (modified files)\n\n")
 		sb.WriteString(fileContext)