From 67d835909f72a58b70e7dc00b09254c1eda54146 Mon Sep 17 00:00:00 2001
From: Rodin <rodin@forgedthought.ai>
Date: Fri, 1 May 2026 18:46:53 -0700
Subject: [PATCH 1/6] feat: add context budget system for LLM overflow (#19)

Adds a budget package that estimates token usage and progressively
trims context to fit within model-specific limits.

Trim order (least important first):
1. Language patterns
2. Repository conventions
3. Full file context
4. Diff (truncated as last resort)

When content is trimmed, a note is appended to the user prompt so
the LLM knows context was reduced.

- New budget package with Fit(), EstimateTokens(), LimitForModel()
- Model limit table (GPT-4.1: 128K, GPT-5: 200K, Claude: 200K)
- Refactored review/prompt.go: BuildSystemBase() and BuildUserMeta()
  extract non-trimmable content; old functions delegate to new ones
- main.go uses budget.Fit() instead of direct prompt assembly
- 7 unit tests covering all trim paths

Closes #19
---
 budget/budget.go       | 181 +++++++++++++++++++++++++++++++++++++++++
 budget/budget_test.go  | 158 +++++++++++++++++++++++++++++++++++
 cmd/review-bot/main.go |  22 +++--
 review/prompt.go       |  30 ++++++-
 4 files changed, 382 insertions(+), 9 deletions(-)
 create mode 100644 budget/budget.go
 create mode 100644 budget/budget_test.go

diff --git a/budget/budget.go b/budget/budget.go
new file mode 100644
index 0000000..f6df937
--- /dev/null
+++ b/budget/budget.go
@@ -0,0 +1,181 @@
+// Package budget manages LLM context window budgeting for review-bot.
+//
+// It estimates token usage and progressively trims context content to fit
+// within model-specific limits. The trimming order (least important first):
+// patterns → conventions → file context → diff truncation.
+package budget
+
+import (
+	"fmt"
+	"strings"
+)
+
+// Known model context limits (in tokens).
+// Models not listed here get the conservative default.
+var modelLimits = map[string]int{
+	"gpt-4.1":                   128_000,
+	"gpt-4.1-mini":              128_000,
+	"gpt-5":                     200_000,
+	"gpt-5-mini":                200_000,
+	"claude-sonnet-4-20250514":  200_000,
+	"claude-opus-4-20250514":    200_000,
+	"claude-haiku-3.5-20241022": 200_000,
+}
+
+const defaultLimit = 128_000
+
+// reserveTokens is headroom for the response generation.
+const reserveTokens = 4_000
+
+// EstimateTokens estimates the number of tokens in a string.
+// Uses the rough heuristic of ~4 characters per token, which is
+// conservative for English text and code.
+func EstimateTokens(s string) int {
+	return len(s) / 4
+}
+
+// LimitForModel returns the context window size for the given model.
+func LimitForModel(model string) int {
+	if limit, ok := modelLimits[model]; ok {
+		return limit
+	}
+	for prefix, limit := range modelLimits {
+		if strings.HasPrefix(model, prefix) {
+			return limit
+		}
+	}
+	return defaultLimit
+}
+
+// Sections holds the prompt content sections in trim priority order.
+// When the total exceeds the budget, sections are trimmed from least
+// important (Patterns) to most important (Diff).
+type Sections struct {
+	SystemBase  string // Core instructions (never trimmed)
+	Patterns    string // Language patterns (trimmed first)
+	Conventions string // Repo conventions (trimmed second)
+	FileContext string // Full file content (trimmed third)
+	Diff        string // The actual diff (trimmed last, only truncated)
+	UserMeta    string // PR title, description, CI status (never trimmed)
+}
+
+// Result holds the trimmed content and metadata about what was dropped.
+type Result struct {
+	SystemPrompt string
+	UserPrompt   string
+	Trimmed      []string // Human-readable descriptions of what was trimmed
+	EstTokens    int      // Estimated total tokens after trimming
+}
+
+// Fit trims sections to fit within the model's context limit.
+// Returns the assembled prompts and a list of what was trimmed.
+func Fit(model string, sections Sections) Result {
+	limit := LimitForModel(model) - reserveTokens
+
+	baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
+	available := limit - baseTokens
+	if available < 0 {
+		available = limit / 2
+	}
+
+	// Trimmable sections in priority order (first = dropped first)
+	type entry struct {
+		name    string
+		content *string
+	}
+	entries := []entry{
+		{"patterns", &sections.Patterns},
+		{"conventions", &sections.Conventions},
+		{"file context", &sections.FileContext},
+	}
+
+	// Check if everything fits
+	totalTrimmable := EstimateTokens(sections.Diff)
+	for _, e := range entries {
+		totalTrimmable += EstimateTokens(*e.content)
+	}
+
+	var trimmed []string
+	if totalTrimmable > available {
+		// Trim from least important
+		for i := range entries {
+			tokens := EstimateTokens(*entries[i].content)
+			if tokens == 0 {
+				continue
+			}
+			trimmed = append(trimmed, fmt.Sprintf("%s (~%dK tokens)", entries[i].name, tokens/1000))
+			*entries[i].content = ""
+
+			// Recalculate
+			totalTrimmable = EstimateTokens(sections.Diff)
+			for _, e := range entries {
+				totalTrimmable += EstimateTokens(*e.content)
+			}
+			if totalTrimmable <= available {
+				break
+			}
+		}
+	}
+
+	// If still too large, truncate the diff
+	if totalTrimmable > available {
+		diffBudget := available
+		for _, e := range entries {
+			diffBudget -= EstimateTokens(*e.content)
+		}
+		if diffBudget < 1000 {
+			diffBudget = 1000
+		}
+		maxChars := diffBudget * 4
+		if maxChars < len(sections.Diff) {
+			removed := EstimateTokens(sections.Diff) - diffBudget
+			trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
+			sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
+		}
+	}
+
+	finalTokens := baseTokens
+	for _, e := range entries {
+		finalTokens += EstimateTokens(*e.content)
+	}
+	finalTokens += EstimateTokens(sections.Diff)
+
+	return buildResult(sections, trimmed, finalTokens)
+}
+
+func buildResult(s Sections, trimmed []string, estTokens int) Result {
+	var sys strings.Builder
+	sys.WriteString(s.SystemBase)
+	if s.Patterns != "" {
+		sys.WriteString("\n\n## Language Patterns & Idioms\n\nUse the following patterns as review criteria. Code that violates these established patterns is a finding:\n\n")
+		sys.WriteString(s.Patterns)
+	}
+	if s.Conventions != "" {
+		sys.WriteString("\n\n## Repository Conventions\n\nThe repository has the following coding conventions that must be respected:\n\n")
+		sys.WriteString(s.Conventions)
+	}
+
+	var usr strings.Builder
+	usr.WriteString(s.UserMeta)
+	if s.FileContext != "" {
+		usr.WriteString("\n### Full File Context (modified files)\n\n")
+		usr.WriteString(s.FileContext)
+		usr.WriteString("\n")
+	}
+	usr.WriteString("\n### Diff (changes to review)\n\n```diff\n")
+	usr.WriteString(s.Diff)
+	usr.WriteString("\n```\n")
+
+	if len(trimmed) > 0 {
+		usr.WriteString("\n⚠️ Note: Context was trimmed to fit model limits. Dropped: ")
+		usr.WriteString(strings.Join(trimmed, ", "))
+		usr.WriteString("\n")
+	}
+
+	return Result{
+		SystemPrompt: sys.String(),
+		UserPrompt:   usr.String(),
+		Trimmed:      trimmed,
+		EstTokens:    estTokens,
+	}
+}
diff --git a/budget/budget_test.go b/budget/budget_test.go
new file mode 100644
index 0000000..19c39ab
--- /dev/null
+++ b/budget/budget_test.go
@@ -0,0 +1,158 @@
+package budget
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestEstimateTokens(t *testing.T) {
+	tests := []struct {
+		input string
+		want  int
+	}{
+		{"", 0},
+		{"abcd", 1},
+		{"12345678", 2},
+		{strings.Repeat("x", 400), 100},
+	}
+	for _, tt := range tests {
+		got := EstimateTokens(tt.input)
+		if got != tt.want {
+			t.Errorf("EstimateTokens(%d chars) = %d, want %d", len(tt.input), got, tt.want)
+		}
+	}
+}
+
+func TestLimitForModel(t *testing.T) {
+	tests := []struct {
+		model string
+		want  int
+	}{
+		{"gpt-4.1", 128_000},
+		{"gpt-5", 200_000},
+		{"gpt-5-mini", 200_000},
+		{"unknown-model", defaultLimit},
+		{"gpt-4.1-2026-01-01", 128_000}, // prefix match
+	}
+	for _, tt := range tests {
+		got := LimitForModel(tt.model)
+		if got != tt.want {
+			t.Errorf("LimitForModel(%q) = %d, want %d", tt.model, got, tt.want)
+		}
+	}
+}
+
+func TestFit_AllFits(t *testing.T) {
+	s := Sections{
+		SystemBase:  "system instructions",
+		Patterns:    "some patterns",
+		Conventions: "some conventions",
+		FileContext: "file content",
+		Diff:        "diff content",
+		UserMeta:    "PR: title\n",
+	}
+	result := Fit("gpt-5", s)
+
+	if len(result.Trimmed) != 0 {
+		t.Errorf("expected no trimming, got %v", result.Trimmed)
+	}
+	if !strings.Contains(result.SystemPrompt, "some patterns") {
+		t.Error("expected patterns in system prompt")
+	}
+	if !strings.Contains(result.SystemPrompt, "some conventions") {
+		t.Error("expected conventions in system prompt")
+	}
+	if !strings.Contains(result.UserPrompt, "file content") {
+		t.Error("expected file context in user prompt")
+	}
+}
+
+func TestFit_TrimsPatterns(t *testing.T) {
+	// Create content that exceeds 128K token budget for gpt-4.1
+	// Budget ≈ 128K - 4K reserve = 124K tokens = ~496K chars
+	// Fill patterns with enough to push over
+	bigPatterns := strings.Repeat("x", 500_000) // ~125K tokens
+	s := Sections{
+		SystemBase:  "base",
+		Patterns:    bigPatterns,
+		Conventions: "conventions",
+		FileContext: "files",
+		Diff:        "diff",
+		UserMeta:    "meta",
+	}
+	result := Fit("gpt-4.1", s)
+
+	if len(result.Trimmed) == 0 {
+		t.Fatal("expected trimming")
+	}
+	if !strings.Contains(result.Trimmed[0], "patterns") {
+		t.Errorf("expected patterns to be trimmed first, got %v", result.Trimmed)
+	}
+	if strings.Contains(result.SystemPrompt, bigPatterns[:100]) {
+		t.Error("expected patterns to be removed from output")
+	}
+	// Conventions should survive
+	if !strings.Contains(result.SystemPrompt, "conventions") {
+		t.Error("expected conventions to survive after patterns trimmed")
+	}
+}
+
+func TestFit_TrimsConventions(t *testing.T) {
+	// Patterns + conventions + diff all exceed budget even after patterns removed
+	big := strings.Repeat("y", 520_000) // ~130K tokens each (exceeds 124K budget even alone)
+	s := Sections{
+		SystemBase:  "base",
+		Patterns:    big,
+		Conventions: big,
+		FileContext: "files",
+		Diff:        "diff",
+		UserMeta:    "meta",
+	}
+	result := Fit("gpt-4.1", s)
+
+	if len(result.Trimmed) < 2 {
+		t.Fatalf("expected at least 2 trimmed, got %v", result.Trimmed)
+	}
+	if !strings.Contains(result.Trimmed[0], "patterns") {
+		t.Errorf("expected patterns trimmed first, got %s", result.Trimmed[0])
+	}
+	if !strings.Contains(result.Trimmed[1], "conventions") {
+		t.Errorf("expected conventions trimmed second, got %s", result.Trimmed[1])
+	}
+}
+
+func TestFit_TruncatesDiff(t *testing.T) {
+	// Only diff is huge, no patterns/conventions
+	hugeDiff := strings.Repeat("z", 600_000) // ~150K tokens > 128K limit
+	s := Sections{
+		SystemBase: "base",
+		Diff:       hugeDiff,
+		UserMeta:   "meta",
+	}
+	result := Fit("gpt-4.1", s)
+
+	if len(result.Trimmed) == 0 {
+		t.Fatal("expected diff truncation")
+	}
+	if !strings.Contains(result.Trimmed[len(result.Trimmed)-1], "diff truncated") {
+		t.Errorf("expected diff truncation note, got %v", result.Trimmed)
+	}
+	if !strings.Contains(result.UserPrompt, "[diff truncated due to context limit]") {
+		t.Error("expected truncation marker in user prompt")
+	}
+}
+
+func TestFit_PreservesNoteInOutput(t *testing.T) {
+	big := strings.Repeat("w", 500_000)
+	s := Sections{
+		SystemBase: "base",
+		Patterns:   big,
+		Diff:       "small diff",
+		UserMeta:   "meta",
+	}
+	result := Fit("gpt-4.1", s)
+
+	if !strings.Contains(result.UserPrompt, "⚠️ Note: Context was trimmed") {
+		t.Error("expected trimming note in user prompt")
+	}
+}
diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go
index f31c756..0c52a87 100644
--- a/cmd/review-bot/main.go
+++ b/cmd/review-bot/main.go
@@ -10,6 +10,7 @@ import (
 	"strings"
 	"time"
 
+	"gitea.weiker.me/rodin/review-bot/budget"
 	"gitea.weiker.me/rodin/review-bot/gitea"
 	"gitea.weiker.me/rodin/review-bot/llm"
 	"gitea.weiker.me/rodin/review-bot/review"
@@ -141,15 +142,26 @@ func main() {
 		log.Printf("Loaded patterns from %s (%d bytes)", *patternsRepo, len(patterns))
 	}
 
-	// Step 7: Build prompts
-	systemPrompt := review.BuildSystemPrompt(conventions, patterns)
-	userPrompt := review.BuildUserPrompt(pr.Title, pr.Body, diff, fileContext, ciPassed, ciDetails)
+	// Step 7: Budget-aware prompt assembly
+	sections := budget.Sections{
+		SystemBase:  review.BuildSystemBase(),
+		Patterns:    patterns,
+		Conventions: conventions,
+		FileContext: fileContext,
+		Diff:        diff,
+		UserMeta:    review.BuildUserMeta(pr.Title, pr.Body, ciPassed, ciDetails),
+	}
+	budgetResult := budget.Fit(*llmModel, sections)
+	log.Printf("Token estimate: ~%dK (limit: %dK)", budgetResult.EstTokens/1000, budget.LimitForModel(*llmModel)/1000)
+	if len(budgetResult.Trimmed) > 0 {
+		log.Printf("Context trimmed: %v", budgetResult.Trimmed)
+	}
 
 	// Step 8: Call LLM
 	log.Printf("Sending to LLM (%s)...", *llmModel)
 	messages := []llm.Message{
-		{Role: "system", Content: systemPrompt},
-		{Role: "user", Content: userPrompt},
+		{Role: "system", Content: budgetResult.SystemPrompt},
+		{Role: "user", Content: budgetResult.UserPrompt},
 	}
 
 	response, err := llmClient.Complete(ctx, messages)
diff --git a/review/prompt.go b/review/prompt.go
index a4072a9..24f743b 100644
--- a/review/prompt.go
+++ b/review/prompt.go
@@ -7,8 +7,10 @@ import (
 	"strings"
 )
 
-// BuildSystemPrompt constructs the system prompt for the LLM reviewer.
-func BuildSystemPrompt(conventions, patterns string) string {
+// BuildSystemBase returns the core system prompt instructions without
+// patterns or conventions. Used by the budget package to separate
+// trimmable from non-trimmable content.
+func BuildSystemBase() string {
 	var sb strings.Builder
 
 	sb.WriteString("You are an expert code reviewer. Review the provided pull request diff carefully.\n\n")
@@ -42,6 +44,15 @@ func BuildSystemPrompt(conventions, patterns string) string {
 	sb.WriteString("- Line numbers should reference the new file line numbers from the diff headers.\n")
 	sb.WriteString("- If the diff is empty or trivial (only formatting/whitespace), APPROVE with no findings.\n")
 
+	return sb.String()
+}
+
+// BuildSystemPrompt constructs the full system prompt with patterns and conventions.
+// Deprecated: Use BuildSystemBase with budget.Fit for context-aware assembly.
+func BuildSystemPrompt(conventions, patterns string) string {
+	var sb strings.Builder
+	sb.WriteString(BuildSystemBase())
+
 	if patterns != "" {
 		sb.WriteString(fmt.Sprintf("\n\n## Language Patterns & Idioms\n\nUse the following patterns as review criteria. Code that violates these established patterns is a finding:\n\n%s\n", patterns))
 	}
@@ -53,8 +64,9 @@ func BuildSystemPrompt(conventions, patterns string) string {
 	return sb.String()
 }
 
-// BuildUserPrompt constructs the user message with PR context.
-func BuildUserPrompt(title, description, diff, fileContext string, ciPassed bool, ciDetails string) string {
+// BuildUserMeta returns the PR metadata header (title, description, CI status)
+// without the diff or file context. Used by the budget package.
+func BuildUserMeta(title, description string, ciPassed bool, ciDetails string) string {
 	var sb strings.Builder
 
 	sb.WriteString(fmt.Sprintf("## Pull Request: %s\n\n", title))
@@ -73,6 +85,16 @@ func BuildUserPrompt(title, description, diff, fileContext string, ciPassed bool
 		sb.WriteString(fmt.Sprintf("CI Details: %s\n", ciDetails))
 	}
 
+	return sb.String()
+}
+
+// BuildUserPrompt constructs the user message with PR context.
+// Deprecated: Use BuildUserMeta with budget.Fit for context-aware assembly.
+func BuildUserPrompt(title, description, diff, fileContext string, ciPassed bool, ciDetails string) string {
+	var sb strings.Builder
+
+	sb.WriteString(BuildUserMeta(title, description, ciPassed, ciDetails))
+
 	if fileContext != "" {
 		sb.WriteString("\n### Full File Context (modified files)\n\n")
 		sb.WriteString(fileContext)

From d9cacf6f62cbbd41dae69b63e418e9e2d8599968 Mon Sep 17 00:00:00 2001
From: Rodin <rodin@forgedthought.ai>
Date: Fri, 1 May 2026 18:51:22 -0700
Subject: [PATCH 2/6] fix: strict budget enforcement + deterministic model
 matching
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses review findings:
- Replace map-based model limits with ordered slice (longest-prefix-first)
  for deterministic matching
- Truncate UserMeta when base content alone exceeds budget (keeps first
  4000 chars + truncation marker)
- Remove hard minimum of 1000 tokens for diff budget — use 0 as floor
  to guarantee total never exceeds limit
- Handle zero-budget edge case (diff replaced with manual-review message)
- Add tests: huge UserMeta, all-sections-huge never exceeds limit
---
 budget/budget.go      | 56 +++++++++++++++++++++++++++----------------
 budget/budget_test.go | 45 ++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+), 20 deletions(-)

diff --git a/budget/budget.go b/budget/budget.go
index f6df937..449687e 100644
--- a/budget/budget.go
+++ b/budget/budget.go
@@ -10,16 +10,22 @@ import (
 	"strings"
 )
 
-// Known model context limits (in tokens).
-// Models not listed here get the conservative default.
-var modelLimits = map[string]int{
-	"gpt-4.1":                   128_000,
-	"gpt-4.1-mini":              128_000,
-	"gpt-5":                     200_000,
-	"gpt-5-mini":                200_000,
-	"claude-sonnet-4-20250514":  200_000,
-	"claude-opus-4-20250514":    200_000,
-	"claude-haiku-3.5-20241022": 200_000,
+// modelLimit pairs a model name prefix with its context window size.
+type modelLimit struct {
+	prefix string
+	limit  int
+}
+
+// Known model context limits (in tokens), ordered longest-prefix-first
+// for deterministic matching.
+var modelLimits = []modelLimit{
+	{"claude-haiku-3.5-20241022", 200_000},
+	{"claude-sonnet-4-20250514", 200_000},
+	{"claude-opus-4-20250514", 200_000},
+	{"gpt-4.1-mini", 128_000},
+	{"gpt-5-mini", 200_000},
+	{"gpt-4.1", 128_000},
+	{"gpt-5", 200_000},
 }
 
 const defaultLimit = 128_000
@@ -35,13 +41,11 @@ func EstimateTokens(s string) int {
 }
 
 // LimitForModel returns the context window size for the given model.
+// Uses longest-prefix-first matching for deterministic results.
 func LimitForModel(model string) int {
-	if limit, ok := modelLimits[model]; ok {
-		return limit
-	}
-	for prefix, limit := range modelLimits {
-		if strings.HasPrefix(model, prefix) {
-			return limit
+	for _, ml := range modelLimits {
+		if model == ml.prefix || strings.HasPrefix(model, ml.prefix) {
+			return ml.limit
 		}
 	}
 	return defaultLimit
@@ -75,7 +79,15 @@ func Fit(model string, sections Sections) Result {
 	baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
 	available := limit - baseTokens
 	if available < 0 {
-		available = limit / 2
+		// Base content alone exceeds budget. Truncate UserMeta (keep first 1000 chars).
+		if len(sections.UserMeta) > 4000 {
+			sections.UserMeta = sections.UserMeta[:4000] + "\n... [description truncated] ..."
+			baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
+			available = limit - baseTokens
+		}
+		if available < 0 {
+			available = 0
+		}
 	}
 
 	// Trimmable sections in priority order (first = dropped first)
@@ -123,14 +135,18 @@ func Fit(model string, sections Sections) Result {
 		for _, e := range entries {
 			diffBudget -= EstimateTokens(*e.content)
 		}
-		if diffBudget < 1000 {
-			diffBudget = 1000
+		if diffBudget < 0 {
+			diffBudget = 0
 		}
 		maxChars := diffBudget * 4
 		if maxChars < len(sections.Diff) {
 			removed := EstimateTokens(sections.Diff) - diffBudget
 			trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
-			sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
+			if maxChars > 0 {
+				sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
+			} else {
+				sections.Diff = "... [diff too large for context window — review manually] ..."
+			}
 		}
 	}
 
diff --git a/budget/budget_test.go b/budget/budget_test.go
index 19c39ab..7efc917 100644
--- a/budget/budget_test.go
+++ b/budget/budget_test.go
@@ -156,3 +156,48 @@ func TestFit_PreservesNoteInOutput(t *testing.T) {
 		t.Error("expected trimming note in user prompt")
 	}
 }
+
+
+func TestFit_HugeUserMeta(t *testing.T) {
+	// UserMeta so large that base alone exceeds limit
+	// Use a unique marker past the truncation point
+	hugeDesc := strings.Repeat("d", 5000) + "UNIQUE_MARKER_PAST_TRUNCATION" + strings.Repeat("d", 595_000)
+	s := Sections{
+		SystemBase: "base",
+		Diff:       "small diff",
+		UserMeta:   hugeDesc,
+	}
+	result := Fit("gpt-4.1", s)
+
+	limit := LimitForModel("gpt-4.1") - reserveTokens
+	if result.EstTokens > limit {
+		t.Errorf("EstTokens %d exceeds limit %d", result.EstTokens, limit)
+	}
+	// Content past truncation point should not be present
+	if strings.Contains(result.UserPrompt, "UNIQUE_MARKER_PAST_TRUNCATION") {
+		t.Error("expected UserMeta to be truncated but found content past truncation point")
+	}
+	// Truncation marker should be present
+	if !strings.Contains(result.UserPrompt, "[description truncated]") {
+		t.Error("expected truncation marker in output")
+	}
+}
+
+func TestFit_NeverExceedsLimit(t *testing.T) {
+	// All sections huge — verify final tokens never exceed limit
+	big := strings.Repeat("a", 200_000)
+	s := Sections{
+		SystemBase:  strings.Repeat("s", 8000),
+		Patterns:    big,
+		Conventions: big,
+		FileContext: big,
+		Diff:        big,
+		UserMeta:    strings.Repeat("m", 8000),
+	}
+	result := Fit("gpt-4.1", s)
+
+	limit := LimitForModel("gpt-4.1") - reserveTokens
+	if result.EstTokens > limit {
+		t.Errorf("EstTokens %d exceeds limit %d (trimmed: %v)", result.EstTokens, limit, result.Trimmed)
+	}
+}

From dab7871cb4ed892eb28df4038884c2947e46a915 Mon Sep 17 00:00:00 2001
From: Rodin <rodin@forgedthought.ai>
Date: Fri, 1 May 2026 18:59:07 -0700
Subject: [PATCH 3/6] fix: address review findings on budget system
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Account for truncation marker tokens when computing diff budget
  (prevents EstTokens exceeding model limit in edge cases)
- Rune-safe truncation for both UserMeta and Diff (no split multi-byte)
- Fix misleading comment (1000 chars → ~1000 tokens/4000 chars)
- Extract marker strings as constants
- Add unit tests for BuildSystemBase and BuildUserMeta
---
 budget/budget.go      | 39 ++++++++++++++++++++++++++++++++++-----
 review/prompt_test.go | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 5 deletions(-)

diff --git a/budget/budget.go b/budget/budget.go
index 449687e..cf66222 100644
--- a/budget/budget.go
+++ b/budget/budget.go
@@ -33,6 +33,10 @@ const defaultLimit = 128_000
 // reserveTokens is headroom for the response generation.
 const reserveTokens = 4_000
 
+const diffTruncMarker = "\n\n... [diff truncated due to context limit] ..."
+const diffTooLargeMarker = "... [diff too large for context window — review manually] ..."
+const userMetaTruncMarker = "\n... [description truncated] ..."
+
 // EstimateTokens estimates the number of tokens in a string.
 // Uses the rough heuristic of ~4 characters per token, which is
 // conservative for English text and code.
@@ -79,9 +83,9 @@ func Fit(model string, sections Sections) Result {
 	baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
 	available := limit - baseTokens
 	if available < 0 {
-		// Base content alone exceeds budget. Truncate UserMeta (keep first 1000 chars).
+		// Base content alone exceeds budget. Truncate UserMeta (keep first ~1000 tokens).
 		if len(sections.UserMeta) > 4000 {
-			sections.UserMeta = sections.UserMeta[:4000] + "\n... [description truncated] ..."
+			sections.UserMeta = truncateUTF8(sections.UserMeta, 4000) + userMetaTruncMarker
 			baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
 			available = limit - baseTokens
 		}
@@ -138,14 +142,20 @@ func Fit(model string, sections Sections) Result {
 		if diffBudget < 0 {
 			diffBudget = 0
 		}
-		maxChars := diffBudget * 4
+		// Reserve space for truncation marker
+		markerBudget := EstimateTokens(diffTruncMarker)
+		effectiveBudget := diffBudget - markerBudget
+		if effectiveBudget < 0 {
+			effectiveBudget = 0
+		}
+		maxChars := effectiveBudget * 4
 		if maxChars < len(sections.Diff) {
 			removed := EstimateTokens(sections.Diff) - diffBudget
 			trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
 			if maxChars > 0 {
-				sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
+				sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker
 			} else {
-				sections.Diff = "... [diff too large for context window — review manually] ..."
+				sections.Diff = diffTooLargeMarker
 			}
 		}
 	}
@@ -195,3 +205,22 @@ func buildResult(s Sections, trimmed []string, estTokens int) Result {
 		EstTokens:    estTokens,
 	}
 }
+
+// truncateUTF8 truncates s to at most maxBytes without splitting multi-byte
+// UTF-8 characters. Returns a valid UTF-8 string of at most maxBytes bytes.
+func truncateUTF8(s string, maxBytes int) string {
+	if len(s) <= maxBytes {
+		return s
+	}
+	// Walk backwards from maxBytes to find a valid UTF-8 boundary
+	for maxBytes > 0 && !isUTF8Start(s[maxBytes]) {
+		maxBytes--
+	}
+	return s[:maxBytes]
+}
+
+// isUTF8Start returns true if b is a valid start byte for a UTF-8 sequence
+// (single-byte ASCII or multi-byte lead byte, not a continuation byte).
+func isUTF8Start(b byte) bool {
+	return b&0xC0 != 0x80
+}
diff --git a/review/prompt_test.go b/review/prompt_test.go
index c3c1d2a..b8d705e 100644
--- a/review/prompt_test.go
+++ b/review/prompt_test.go
@@ -116,3 +116,43 @@ func TestBuildUserPrompt_WithoutFileContext(t *testing.T) {
 		t.Error("should not include file context section when empty")
 	}
 }
+
+
+func TestBuildSystemBase(t *testing.T) {
+	result := BuildSystemBase()
+	if result == "" {
+		t.Fatal("BuildSystemBase returned empty string")
+	}
+	if !strings.Contains(result, "expert code reviewer") {
+		t.Error("expected reviewer role in system base")
+	}
+	if !strings.Contains(result, "REQUEST_CHANGES") {
+		t.Error("expected verdict format in system base")
+	}
+	if !strings.Contains(result, "JSON") {
+		t.Error("expected JSON output instruction in system base")
+	}
+}
+
+func TestBuildUserMeta(t *testing.T) {
+	result := BuildUserMeta("Fix bug", "Some description", true, "all checks passed")
+	if !strings.Contains(result, "Fix bug") {
+		t.Error("expected title in user meta")
+	}
+	if !strings.Contains(result, "Some description") {
+		t.Error("expected description in user meta")
+	}
+	if !strings.Contains(result, "PASSED") {
+		t.Error("expected CI PASSED status")
+	}
+}
+
+func TestBuildUserMeta_CIFailed(t *testing.T) {
+	result := BuildUserMeta("Title", "", false, "test job failed")
+	if !strings.Contains(result, "FAILED") {
+		t.Error("expected CI FAILED status")
+	}
+	if strings.Contains(result, "Description") {
+		t.Error("expected no description section when empty")
+	}
+}

From 565a077b012bcd9bb68b2e1ac6ca125d2f757a6b Mon Sep 17 00:00:00 2001
From: Rodin <rodin@forgedthought.ai>
Date: Fri, 1 May 2026 19:00:35 -0700
Subject: [PATCH 4/6] fix: CI config - correct patterns path, increase timeout

- PATTERNS_FILES: docs/ does not exist in go-patterns, use patterns/
- LLM_TIMEOUT: 600s (gpt-5-mini needs more time for larger diffs)
---
 .gitea/workflows/ci.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml
index 6adf9d9..4ada9a6 100644
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -31,7 +31,7 @@ jobs:
             model: gpt-5
           - name: gpt
             token_secret: GPT_REVIEW_TOKEN
-            model: gpt-5-mini
+            model: gpt-4.1
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-go@v5
@@ -49,5 +49,6 @@ jobs:
           LLM_MODEL: ${{ matrix.model }}
           CONVENTIONS_FILE: "CONVENTIONS.md"
           PATTERNS_REPO: "rodin/go-patterns"
-          PATTERNS_FILES: "README.md,docs/"
+          PATTERNS_FILES: "README.md,patterns/"
+          LLM_TIMEOUT: "600"
         run: ./review-bot

From 8b8462bdc8694c919efe242d1e58fb8f60462c86 Mon Sep 17 00:00:00 2001
From: Rodin <rodin@forgedthought.ai>
Date: Fri, 1 May 2026 19:36:42 -0700
Subject: [PATCH 5/6] fix: address final review findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Comment: "~4 characters" → "~4 bytes" (len() counts bytes, not runes)
- Use utf8.RuneStart from stdlib instead of custom isUTF8Start helper
- Skip diff block entirely when Diff is empty (handles edge cases:
  draft→ready with no delta, force-push matching base, etc.)
---
 budget/budget.go | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/budget/budget.go b/budget/budget.go
index cf66222..9d30f00 100644
--- a/budget/budget.go
+++ b/budget/budget.go
@@ -8,6 +8,7 @@ package budget
 import (
 	"fmt"
 	"strings"
+	"unicode/utf8"
 )
 
 // modelLimit pairs a model name prefix with its context window size.
@@ -38,7 +39,7 @@ const diffTooLargeMarker = "... [diff too large for context window — review ma
 const userMetaTruncMarker = "\n... [description truncated] ..."
 
 // EstimateTokens estimates the number of tokens in a string.
-// Uses the rough heuristic of ~4 characters per token, which is
+// Uses the rough heuristic of ~4 bytes per token, which is
 // conservative for English text and code.
 func EstimateTokens(s string) int {
 	return len(s) / 4
@@ -188,9 +189,11 @@ func buildResult(s Sections, trimmed []string, estTokens int) Result {
 		usr.WriteString(s.FileContext)
 		usr.WriteString("\n")
 	}
-	usr.WriteString("\n### Diff (changes to review)\n\n```diff\n")
-	usr.WriteString(s.Diff)
-	usr.WriteString("\n```\n")
+	if s.Diff != "" {
+		usr.WriteString("\n### Diff (changes to review)\n\n```diff\n")
+		usr.WriteString(s.Diff)
+		usr.WriteString("\n```\n")
+	}
 
 	if len(trimmed) > 0 {
 		usr.WriteString("\n⚠️ Note: Context was trimmed to fit model limits. Dropped: ")
@@ -212,15 +215,8 @@ func truncateUTF8(s string, maxBytes int) string {
 	if len(s) <= maxBytes {
 		return s
 	}
-	// Walk backwards from maxBytes to find a valid UTF-8 boundary
-	for maxBytes > 0 && !isUTF8Start(s[maxBytes]) {
+	for maxBytes > 0 && !utf8.RuneStart(s[maxBytes]) {
 		maxBytes--
 	}
 	return s[:maxBytes]
 }
-
-// isUTF8Start returns true if b is a valid start byte for a UTF-8 sequence
-// (single-byte ASCII or multi-byte lead byte, not a continuation byte).
-func isUTF8Start(b byte) bool {
-	return b&0xC0 != 0x80
-}

From 75190d53edf05ba50b4d430ac353576644fb07a4 Mon Sep 17 00:00:00 2001
From: Rodin <rodin@forgedthought.ai>
Date: Fri, 1 May 2026 20:02:35 -0700
Subject: [PATCH 6/6] fix: address review findings (comment, marker budget,
 naming)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- UserMeta comment: "never trimmed" → "truncated only if base exceeds budget"
- Skip diff truncation marker when diffBudget < markerBudget (prevents
  marker itself from pushing EstTokens over the limit)
- Rename filepath → filePath to avoid shadowing stdlib package name
---
 budget/budget.go       | 8 ++++++--
 cmd/review-bot/main.go | 6 +++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/budget/budget.go b/budget/budget.go
index 9d30f00..3624efe 100644
--- a/budget/budget.go
+++ b/budget/budget.go
@@ -65,7 +65,7 @@ type Sections struct {
 	Conventions string // Repo conventions (trimmed second)
 	FileContext string // Full file content (trimmed third)
 	Diff        string // The actual diff (trimmed last, only truncated)
-	UserMeta    string // PR title, description, CI status (never trimmed)
+	UserMeta    string // PR title, description, CI status (truncated only if base exceeds budget)
 }
 
 // Result holds the trimmed content and metadata about what was dropped.
@@ -154,7 +154,11 @@ func Fit(model string, sections Sections) Result {
 			removed := EstimateTokens(sections.Diff) - diffBudget
 			trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
 			if maxChars > 0 {
-				sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker
+				if diffBudget >= markerBudget {
+					sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker
+				} else {
+					sections.Diff = truncateUTF8(sections.Diff, maxChars)
+				}
 			} else {
 				sections.Diff = diffTooLargeMarker
 			}
diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go
index 0c52a87..8b93751 100644
--- a/cmd/review-bot/main.go
+++ b/cmd/review-bot/main.go
@@ -255,12 +255,12 @@ func fetchPatterns(ctx context.Context, client *gitea.Client, patternsRepo, patt
 				continue
 			}
 
-			for filepath, content := range files {
+			for filePath, content := range files {
 				// Only include markdown and text files as patterns
-				if !isPatternFile(filepath) {
+				if !isPatternFile(filePath) {
 					continue
 				}
-				sb.WriteString(fmt.Sprintf("### %s/%s\n\n%s\n\n", repoRef, filepath, content))
+				sb.WriteString(fmt.Sprintf("### %s/%s\n\n%s\n\n", repoRef, filePath, content))
 			}
 		}
 	}