From d9cacf6f62cbbd41dae69b63e418e9e2d8599968 Mon Sep 17 00:00:00 2001
From: Rodin <rodin@forgedthought.ai>
Date: Fri, 1 May 2026 18:51:22 -0700
Subject: [PATCH] fix: strict budget enforcement + deterministic model matching
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses review findings:
- Replace map-based model limits with ordered slice (longest-prefix-first)
  for deterministic matching
- Truncate UserMeta when base content alone exceeds budget (keeps first
  4000 chars + truncation marker)
- Remove hard minimum of 1000 tokens for diff budget — use 0 as floor
  to guarantee total never exceeds limit
- Handle zero-budget edge case (diff replaced with manual-review message)
- Add tests: huge UserMeta, all-sections-huge never exceeds limit
---
 budget/budget.go      | 56 +++++++++++++++++++++++++++----------------
 budget/budget_test.go | 45 ++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+), 20 deletions(-)

diff --git a/budget/budget.go b/budget/budget.go
index f6df937..449687e 100644
--- a/budget/budget.go
+++ b/budget/budget.go
@@ -10,16 +10,22 @@ import (
 	"strings"
 )
 
-// Known model context limits (in tokens).
-// Models not listed here get the conservative default.
-var modelLimits = map[string]int{
-	"gpt-4.1":                   128_000,
-	"gpt-4.1-mini":              128_000,
-	"gpt-5":                     200_000,
-	"gpt-5-mini":                200_000,
-	"claude-sonnet-4-20250514":  200_000,
-	"claude-opus-4-20250514":    200_000,
-	"claude-haiku-3.5-20241022": 200_000,
+// modelLimit pairs a model name prefix with its context window size.
+type modelLimit struct {
+	prefix string
+	limit  int
+}
+
+// Known model context limits (in tokens), ordered longest-prefix-first
+// for deterministic matching.
+var modelLimits = []modelLimit{
+	{"claude-haiku-3.5-20241022", 200_000},
+	{"claude-sonnet-4-20250514", 200_000},
+	{"claude-opus-4-20250514", 200_000},
+	{"gpt-4.1-mini", 128_000},
+	{"gpt-5-mini", 200_000},
+	{"gpt-4.1", 128_000},
+	{"gpt-5", 200_000},
 }
 
 const defaultLimit = 128_000
@@ -35,13 +41,11 @@ func EstimateTokens(s string) int {
 }
 
 // LimitForModel returns the context window size for the given model.
+// Uses longest-prefix-first matching for deterministic results.
 func LimitForModel(model string) int {
-	if limit, ok := modelLimits[model]; ok {
-		return limit
-	}
-	for prefix, limit := range modelLimits {
-		if strings.HasPrefix(model, prefix) {
-			return limit
+	for _, ml := range modelLimits {
+		if model == ml.prefix || strings.HasPrefix(model, ml.prefix) {
+			return ml.limit
 		}
 	}
 	return defaultLimit
@@ -75,7 +79,15 @@ func Fit(model string, sections Sections) Result {
 	baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
 	available := limit - baseTokens
 	if available < 0 {
-		available = limit / 2
+		// Base content alone exceeds budget. Truncate UserMeta (keep first 1000 chars).
+		if len(sections.UserMeta) > 4000 {
+			sections.UserMeta = sections.UserMeta[:4000] + "\n... [description truncated] ..."
+			baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
+			available = limit - baseTokens
+		}
+		if available < 0 {
+			available = 0
+		}
 	}
 
 	// Trimmable sections in priority order (first = dropped first)
@@ -123,14 +135,18 @@ func Fit(model string, sections Sections) Result {
 		for _, e := range entries {
 			diffBudget -= EstimateTokens(*e.content)
 		}
-		if diffBudget < 1000 {
-			diffBudget = 1000
+		if diffBudget < 0 {
+			diffBudget = 0
 		}
 		maxChars := diffBudget * 4
 		if maxChars < len(sections.Diff) {
 			removed := EstimateTokens(sections.Diff) - diffBudget
 			trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
-			sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
+			if maxChars > 0 {
+				sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
+			} else {
+				sections.Diff = "... [diff too large for context window — review manually] ..."
+			}
 		}
 	}
 
diff --git a/budget/budget_test.go b/budget/budget_test.go
index 19c39ab..7efc917 100644
--- a/budget/budget_test.go
+++ b/budget/budget_test.go
@@ -156,3 +156,48 @@ func TestFit_PreservesNoteInOutput(t *testing.T) {
 		t.Error("expected trimming note in user prompt")
 	}
 }
+
+
+func TestFit_HugeUserMeta(t *testing.T) {
+	// UserMeta so large that base alone exceeds limit
+	// Use a unique marker past the truncation point
+	hugeDesc := strings.Repeat("d", 5000) + "UNIQUE_MARKER_PAST_TRUNCATION" + strings.Repeat("d", 595_000)
+	s := Sections{
+		SystemBase: "base",
+		Diff:       "small diff",
+		UserMeta:   hugeDesc,
+	}
+	result := Fit("gpt-4.1", s)
+
+	limit := LimitForModel("gpt-4.1") - reserveTokens
+	if result.EstTokens > limit {
+		t.Errorf("EstTokens %d exceeds limit %d", result.EstTokens, limit)
+	}
+	// Content past truncation point should not be present
+	if strings.Contains(result.UserPrompt, "UNIQUE_MARKER_PAST_TRUNCATION") {
+		t.Error("expected UserMeta to be truncated but found content past truncation point")
+	}
+	// Truncation marker should be present
+	if !strings.Contains(result.UserPrompt, "[description truncated]") {
+		t.Error("expected truncation marker in output")
+	}
+}
+
+func TestFit_NeverExceedsLimit(t *testing.T) {
+	// All sections huge — verify final tokens never exceed limit
+	big := strings.Repeat("a", 200_000)
+	s := Sections{
+		SystemBase:  strings.Repeat("s", 8000),
+		Patterns:    big,
+		Conventions: big,
+		FileContext: big,
+		Diff:        big,
+		UserMeta:    strings.Repeat("m", 8000),
+	}
+	result := Fit("gpt-4.1", s)
+
+	limit := LimitForModel("gpt-4.1") - reserveTokens
+	if result.EstTokens > limit {
+		t.Errorf("EstTokens %d exceeds limit %d (trimmed: %v)", result.EstTokens, limit, result.Trimmed)
+	}
+}