From d9cacf6f62cbbd41dae69b63e418e9e2d8599968 Mon Sep 17 00:00:00 2001 From: Rodin Date: Fri, 1 May 2026 18:51:22 -0700 Subject: [PATCH] fix: strict budget enforcement + deterministic model matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review findings: - Replace map-based model limits with ordered slice (longest-prefix-first) for deterministic matching - Truncate UserMeta when base content alone exceeds budget (keeps first 4000 chars + truncation marker) - Remove hard minimum of 1000 tokens for diff budget — use 0 as floor to guarantee total never exceeds limit - Handle zero-budget edge case (diff replaced with manual-review message) - Add tests: huge UserMeta, all-sections-huge never exceeds limit --- budget/budget.go | 56 +++++++++++++++++++++++++++---------------- budget/budget_test.go | 45 ++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 20 deletions(-) diff --git a/budget/budget.go b/budget/budget.go index f6df937..449687e 100644 --- a/budget/budget.go +++ b/budget/budget.go @@ -10,16 +10,22 @@ import ( "strings" ) -// Known model context limits (in tokens). -// Models not listed here get the conservative default. -var modelLimits = map[string]int{ - "gpt-4.1": 128_000, - "gpt-4.1-mini": 128_000, - "gpt-5": 200_000, - "gpt-5-mini": 200_000, - "claude-sonnet-4-20250514": 200_000, - "claude-opus-4-20250514": 200_000, - "claude-haiku-3.5-20241022": 200_000, +// modelLimit pairs a model name prefix with its context window size. +type modelLimit struct { + prefix string + limit int +} + +// Known model context limits (in tokens), ordered longest-prefix-first +// for deterministic matching. +var modelLimits = []modelLimit{ + {"claude-haiku-3.5-20241022", 200_000}, + {"claude-sonnet-4-20250514", 200_000}, + {"claude-opus-4-20250514", 200_000}, + {"gpt-4.1-mini", 128_000}, + {"gpt-5-mini", 200_000}, + {"gpt-4.1", 128_000}, + {"gpt-5", 200_000}, } const defaultLimit = 128_000 @@ -35,13 +41,11 @@ func EstimateTokens(s string) int { } // LimitForModel returns the context window size for the given model. +// Uses longest-prefix-first matching for deterministic results. func LimitForModel(model string) int { - if limit, ok := modelLimits[model]; ok { - return limit - } - for prefix, limit := range modelLimits { - if strings.HasPrefix(model, prefix) { - return limit + for _, ml := range modelLimits { + if model == ml.prefix || strings.HasPrefix(model, ml.prefix) { + return ml.limit } } return defaultLimit @@ -75,7 +79,15 @@ func Fit(model string, sections Sections) Result { baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta) available := limit - baseTokens if available < 0 { - available = limit / 2 + // Base content alone exceeds budget. Truncate UserMeta (keep first 1000 chars). + if len(sections.UserMeta) > 4000 { + sections.UserMeta = sections.UserMeta[:4000] + "\n... [description truncated] ..." + baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta) + available = limit - baseTokens + } + if available < 0 { + available = 0 + } } // Trimmable sections in priority order (first = dropped first) @@ -123,14 +135,18 @@ func Fit(model string, sections Sections) Result { for _, e := range entries { diffBudget -= EstimateTokens(*e.content) } - if diffBudget < 1000 { - diffBudget = 1000 + if diffBudget < 0 { + diffBudget = 0 } maxChars := diffBudget * 4 if maxChars < len(sections.Diff) { removed := EstimateTokens(sections.Diff) - diffBudget trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000)) - sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..." + if maxChars > 0 { + sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..." + } else { + sections.Diff = "... [diff too large for context window — review manually] ..." + } } } diff --git a/budget/budget_test.go b/budget/budget_test.go index 19c39ab..7efc917 100644 --- a/budget/budget_test.go +++ b/budget/budget_test.go @@ -156,3 +156,48 @@ func TestFit_PreservesNoteInOutput(t *testing.T) { t.Error("expected trimming note in user prompt") } } + + +func TestFit_HugeUserMeta(t *testing.T) { + // UserMeta so large that base alone exceeds limit + // Use a unique marker past the truncation point + hugeDesc := strings.Repeat("d", 5000) + "UNIQUE_MARKER_PAST_TRUNCATION" + strings.Repeat("d", 595_000) + s := Sections{ + SystemBase: "base", + Diff: "small diff", + UserMeta: hugeDesc, + } + result := Fit("gpt-4.1", s) + + limit := LimitForModel("gpt-4.1") - reserveTokens + if result.EstTokens > limit { + t.Errorf("EstTokens %d exceeds limit %d", result.EstTokens, limit) + } + // Content past truncation point should not be present + if strings.Contains(result.UserPrompt, "UNIQUE_MARKER_PAST_TRUNCATION") { + t.Error("expected UserMeta to be truncated but found content past truncation point") + } + // Truncation marker should be present + if !strings.Contains(result.UserPrompt, "[description truncated]") { + t.Error("expected truncation marker in output") + } +} + +func TestFit_NeverExceedsLimit(t *testing.T) { + // All sections huge — verify final tokens never exceed limit + big := strings.Repeat("a", 200_000) + s := Sections{ + SystemBase: strings.Repeat("s", 8000), + Patterns: big, + Conventions: big, + FileContext: big, + Diff: big, + UserMeta: strings.Repeat("m", 8000), + } + result := Fit("gpt-4.1", s) + + limit := LimitForModel("gpt-4.1") - reserveTokens + if result.EstTokens > limit { + t.Errorf("EstTokens %d exceeds limit %d (trimmed: %v)", result.EstTokens, limit, result.Trimmed) + } +}