fix: strict budget enforcement + deterministic model matching

Addresses review findings: - Replace map-based model limits with ordered slice (longest-prefix-first) for deterministic matching - Truncate UserMeta when base content alone exceeds budget (keeps first 4000 chars + truncation marker) - Remove hard minimum of 1000 tokens for diff budget — use 0 as floor to guarantee total never exceeds limit - Handle zero-budget edge case (diff replaced with manual-review message) - Add tests: huge UserMeta, all-sections-huge never exceeds limit
2026-05-01 18:51:22 -07:00
parent 67d835909f
commit d9cacf6f62
2 changed files with 81 additions and 20 deletions
@@ -10,16 +10,22 @@ import (
 	"strings"
 )
-// Known model context limits (in tokens).
+// modelLimit pairs a model name prefix with its context window size.
-// Models not listed here get the conservative default.
+type modelLimit struct {
-var modelLimits = map[string]int{
+	prefix string
-	"gpt-4.1":                   128_000,
+	limit  int
-	"gpt-4.1-mini":              128_000,
+}
-	"gpt-5":                     200_000,
+
-	"gpt-5-mini":                200_000,
+// Known model context limits (in tokens), ordered longest-prefix-first
-	"claude-sonnet-4-20250514":  200_000,
+// for deterministic matching.
-	"claude-opus-4-20250514":    200_000,
+var modelLimits = []modelLimit{
-	"claude-haiku-3.5-20241022": 200_000,
+	{"claude-haiku-3.5-20241022", 200_000},
 	{"claude-sonnet-4-20250514", 200_000},
 	{"claude-opus-4-20250514", 200_000},
 	{"gpt-4.1-mini", 128_000},
 	{"gpt-5-mini", 200_000},
 	{"gpt-4.1", 128_000},
 	{"gpt-5", 200_000},
 }
 const defaultLimit = 128_000
@@ -35,13 +41,11 @@ func EstimateTokens(s string) int {
 }
 // LimitForModel returns the context window size for the given model.
 // Uses longest-prefix-first matching for deterministic results.
 func LimitForModel(model string) int {
-	if limit, ok := modelLimits[model]; ok {
+	for _, ml := range modelLimits {
-		return limit
+		if model == ml.prefix || strings.HasPrefix(model, ml.prefix) {
-	}
+			return ml.limit
 	for prefix, limit := range modelLimits {
 		if strings.HasPrefix(model, prefix) {
 			return limit
 		}
 	}
 	return defaultLimit
@@ -75,7 +79,15 @@ func Fit(model string, sections Sections) Result {
 	baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
 	available := limit - baseTokens
 	if available < 0 {
-		available = limit / 2
+		// Base content alone exceeds budget. Truncate UserMeta (keep first 1000 chars).
 		if len(sections.UserMeta) > 4000 {
 			sections.UserMeta = sections.UserMeta[:4000] + "\n... [description truncated] ..."
 			baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
 			available = limit - baseTokens
 		}
 		if available < 0 {
 			available = 0
 		}
 	}
 	// Trimmable sections in priority order (first = dropped first)
@@ -123,14 +135,18 @@ func Fit(model string, sections Sections) Result {
 		for _, e := range entries {
 			diffBudget -= EstimateTokens(*e.content)
 		}
-		if diffBudget < 1000 {
+		if diffBudget < 0 {
-			diffBudget = 1000
+			diffBudget = 0
 		}
 		maxChars := diffBudget * 4
 		if maxChars < len(sections.Diff) {
 			removed := EstimateTokens(sections.Diff) - diffBudget
 			trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
 			if maxChars > 0 {
 				sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
 			} else {
 				sections.Diff = "... [diff too large for context window — review manually] ..."
 			}
 		}
 	}
@@ -156,3 +156,48 @@ func TestFit_PreservesNoteInOutput(t *testing.T) {
 		t.Error("expected trimming note in user prompt")
 	}
 }
 func TestFit_HugeUserMeta(t *testing.T) {
 	// UserMeta so large that base alone exceeds limit
 	// Use a unique marker past the truncation point
 	hugeDesc := strings.Repeat("d", 5000) + "UNIQUE_MARKER_PAST_TRUNCATION" + strings.Repeat("d", 595_000)
 	s := Sections{
 		SystemBase: "base",
 		Diff:       "small diff",
 		UserMeta:   hugeDesc,
 	}
 	result := Fit("gpt-4.1", s)
 	limit := LimitForModel("gpt-4.1") - reserveTokens
 	if result.EstTokens > limit {
 		t.Errorf("EstTokens %d exceeds limit %d", result.EstTokens, limit)
 	}
 	// Content past truncation point should not be present
 	if strings.Contains(result.UserPrompt, "UNIQUE_MARKER_PAST_TRUNCATION") {
 		t.Error("expected UserMeta to be truncated but found content past truncation point")
 	}
 	// Truncation marker should be present
 	if !strings.Contains(result.UserPrompt, "[description truncated]") {
 		t.Error("expected truncation marker in output")
 	}
 }
 func TestFit_NeverExceedsLimit(t *testing.T) {
 	// All sections huge — verify final tokens never exceed limit
 	big := strings.Repeat("a", 200_000)
 	s := Sections{
 		SystemBase:  strings.Repeat("s", 8000),
 		Patterns:    big,
 		Conventions: big,
 		FileContext: big,
 		Diff:        big,
 		UserMeta:    strings.Repeat("m", 8000),
 	}
 	result := Fit("gpt-4.1", s)
 	limit := LimitForModel("gpt-4.1") - reserveTokens
 	if result.EstTokens > limit {
 		t.Errorf("EstTokens %d exceeds limit %d (trimmed: %v)", result.EstTokens, limit, result.Trimmed)
 	}
 }