fix: strict budget enforcement + deterministic model matching
CI / test (pull_request) Successful in 14s
CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 1m59s
CI / review (gpt-5-mini, gpt, GPT_REVIEW_TOKEN) (pull_request) Failing after 5m12s

Addresses review findings:
- Replace map-based model limits with ordered slice (longest-prefix-first)
  for deterministic matching
- Truncate UserMeta when base content alone exceeds budget (keeps first
  4000 chars + truncation marker)
- Remove hard minimum of 1000 tokens for diff budget — use 0 as floor
  to guarantee total never exceeds limit
- Handle zero-budget edge case (diff replaced with manual-review message)
- Add tests: huge UserMeta, all-sections-huge never exceeds limit
This commit is contained in:
Rodin
2026-05-01 18:51:22 -07:00
parent 67d835909f
commit d9cacf6f62
2 changed files with 81 additions and 20 deletions
+35 -19
View File
@@ -10,16 +10,22 @@ import (
"strings" "strings"
) )
// Known model context limits (in tokens). // modelLimit pairs a model name prefix with its context window size.
// Models not listed here get the conservative default. type modelLimit struct {
var modelLimits = map[string]int{ prefix string
"gpt-4.1": 128_000, limit int
"gpt-4.1-mini": 128_000, }
"gpt-5": 200_000,
"gpt-5-mini": 200_000, // Known model context limits (in tokens), ordered longest-prefix-first
"claude-sonnet-4-20250514": 200_000, // for deterministic matching.
"claude-opus-4-20250514": 200_000, var modelLimits = []modelLimit{
"claude-haiku-3.5-20241022": 200_000, {"claude-haiku-3.5-20241022", 200_000},
{"claude-sonnet-4-20250514", 200_000},
{"claude-opus-4-20250514", 200_000},
{"gpt-4.1-mini", 128_000},
{"gpt-5-mini", 200_000},
{"gpt-4.1", 128_000},
{"gpt-5", 200_000},
} }
const defaultLimit = 128_000 const defaultLimit = 128_000
@@ -35,13 +41,11 @@ func EstimateTokens(s string) int {
} }
// LimitForModel returns the context window size for the given model. // LimitForModel returns the context window size for the given model.
// Uses longest-prefix-first matching for deterministic results.
func LimitForModel(model string) int { func LimitForModel(model string) int {
if limit, ok := modelLimits[model]; ok { for _, ml := range modelLimits {
return limit if model == ml.prefix || strings.HasPrefix(model, ml.prefix) {
} return ml.limit
for prefix, limit := range modelLimits {
if strings.HasPrefix(model, prefix) {
return limit
} }
} }
return defaultLimit return defaultLimit
@@ -75,7 +79,15 @@ func Fit(model string, sections Sections) Result {
baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta) baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
available := limit - baseTokens available := limit - baseTokens
if available < 0 { if available < 0 {
available = limit / 2 // Base content alone exceeds budget. Truncate UserMeta (keep first 1000 chars).
if len(sections.UserMeta) > 4000 {
sections.UserMeta = sections.UserMeta[:4000] + "\n... [description truncated] ..."
baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
available = limit - baseTokens
}
if available < 0 {
available = 0
}
} }
// Trimmable sections in priority order (first = dropped first) // Trimmable sections in priority order (first = dropped first)
@@ -123,14 +135,18 @@ func Fit(model string, sections Sections) Result {
for _, e := range entries { for _, e := range entries {
diffBudget -= EstimateTokens(*e.content) diffBudget -= EstimateTokens(*e.content)
} }
if diffBudget < 1000 { if diffBudget < 0 {
diffBudget = 1000 diffBudget = 0
} }
maxChars := diffBudget * 4 maxChars := diffBudget * 4
if maxChars < len(sections.Diff) { if maxChars < len(sections.Diff) {
removed := EstimateTokens(sections.Diff) - diffBudget removed := EstimateTokens(sections.Diff) - diffBudget
trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000)) trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
if maxChars > 0 {
sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..." sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
} else {
sections.Diff = "... [diff too large for context window — review manually] ..."
}
} }
} }
+45
View File
@@ -156,3 +156,48 @@ func TestFit_PreservesNoteInOutput(t *testing.T) {
t.Error("expected trimming note in user prompt") t.Error("expected trimming note in user prompt")
} }
} }
func TestFit_HugeUserMeta(t *testing.T) {
// UserMeta so large that base alone exceeds limit
// Use a unique marker past the truncation point
hugeDesc := strings.Repeat("d", 5000) + "UNIQUE_MARKER_PAST_TRUNCATION" + strings.Repeat("d", 595_000)
s := Sections{
SystemBase: "base",
Diff: "small diff",
UserMeta: hugeDesc,
}
result := Fit("gpt-4.1", s)
limit := LimitForModel("gpt-4.1") - reserveTokens
if result.EstTokens > limit {
t.Errorf("EstTokens %d exceeds limit %d", result.EstTokens, limit)
}
// Content past truncation point should not be present
if strings.Contains(result.UserPrompt, "UNIQUE_MARKER_PAST_TRUNCATION") {
t.Error("expected UserMeta to be truncated but found content past truncation point")
}
// Truncation marker should be present
if !strings.Contains(result.UserPrompt, "[description truncated]") {
t.Error("expected truncation marker in output")
}
}
func TestFit_NeverExceedsLimit(t *testing.T) {
// All sections huge — verify final tokens never exceed limit
big := strings.Repeat("a", 200_000)
s := Sections{
SystemBase: strings.Repeat("s", 8000),
Patterns: big,
Conventions: big,
FileContext: big,
Diff: big,
UserMeta: strings.Repeat("m", 8000),
}
result := Fit("gpt-4.1", s)
limit := LimitForModel("gpt-4.1") - reserveTokens
if result.EstTokens > limit {
t.Errorf("EstTokens %d exceeds limit %d (trimmed: %v)", result.EstTokens, limit, result.Trimmed)
}
}