fix: strict budget enforcement + deterministic model matching
Addresses review findings: - Replace map-based model limits with ordered slice (longest-prefix-first) for deterministic matching - Truncate UserMeta when base content alone exceeds budget (keeps first 4000 chars + truncation marker) - Remove hard minimum of 1000 tokens for diff budget — use 0 as floor to guarantee total never exceeds limit - Handle zero-budget edge case (diff replaced with manual-review message) - Add tests: huge UserMeta, all-sections-huge never exceeds limit
This commit is contained in:
+36
-20
@@ -10,16 +10,22 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Known model context limits (in tokens).
|
||||
// Models not listed here get the conservative default.
|
||||
var modelLimits = map[string]int{
|
||||
"gpt-4.1": 128_000,
|
||||
"gpt-4.1-mini": 128_000,
|
||||
"gpt-5": 200_000,
|
||||
"gpt-5-mini": 200_000,
|
||||
"claude-sonnet-4-20250514": 200_000,
|
||||
"claude-opus-4-20250514": 200_000,
|
||||
"claude-haiku-3.5-20241022": 200_000,
|
||||
// modelLimit pairs a model name prefix with its context window size.
|
||||
type modelLimit struct {
|
||||
prefix string
|
||||
limit int
|
||||
}
|
||||
|
||||
// Known model context limits (in tokens), ordered longest-prefix-first
|
||||
// for deterministic matching.
|
||||
var modelLimits = []modelLimit{
|
||||
{"claude-haiku-3.5-20241022", 200_000},
|
||||
{"claude-sonnet-4-20250514", 200_000},
|
||||
{"claude-opus-4-20250514", 200_000},
|
||||
{"gpt-4.1-mini", 128_000},
|
||||
{"gpt-5-mini", 200_000},
|
||||
{"gpt-4.1", 128_000},
|
||||
{"gpt-5", 200_000},
|
||||
}
|
||||
|
||||
const defaultLimit = 128_000
|
||||
@@ -35,13 +41,11 @@ func EstimateTokens(s string) int {
|
||||
}
|
||||
|
||||
// LimitForModel returns the context window size for the given model.
|
||||
// Uses longest-prefix-first matching for deterministic results.
|
||||
func LimitForModel(model string) int {
|
||||
if limit, ok := modelLimits[model]; ok {
|
||||
return limit
|
||||
}
|
||||
for prefix, limit := range modelLimits {
|
||||
if strings.HasPrefix(model, prefix) {
|
||||
return limit
|
||||
for _, ml := range modelLimits {
|
||||
if model == ml.prefix || strings.HasPrefix(model, ml.prefix) {
|
||||
return ml.limit
|
||||
}
|
||||
}
|
||||
return defaultLimit
|
||||
@@ -75,7 +79,15 @@ func Fit(model string, sections Sections) Result {
|
||||
baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
|
||||
available := limit - baseTokens
|
||||
if available < 0 {
|
||||
available = limit / 2
|
||||
// Base content alone exceeds budget. Truncate UserMeta (keep first 1000 chars).
|
||||
if len(sections.UserMeta) > 4000 {
|
||||
sections.UserMeta = sections.UserMeta[:4000] + "\n... [description truncated] ..."
|
||||
baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
|
||||
available = limit - baseTokens
|
||||
}
|
||||
if available < 0 {
|
||||
available = 0
|
||||
}
|
||||
}
|
||||
|
||||
// Trimmable sections in priority order (first = dropped first)
|
||||
@@ -123,14 +135,18 @@ func Fit(model string, sections Sections) Result {
|
||||
for _, e := range entries {
|
||||
diffBudget -= EstimateTokens(*e.content)
|
||||
}
|
||||
if diffBudget < 1000 {
|
||||
diffBudget = 1000
|
||||
if diffBudget < 0 {
|
||||
diffBudget = 0
|
||||
}
|
||||
maxChars := diffBudget * 4
|
||||
if maxChars < len(sections.Diff) {
|
||||
removed := EstimateTokens(sections.Diff) - diffBudget
|
||||
trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
|
||||
sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
|
||||
if maxChars > 0 {
|
||||
sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
|
||||
} else {
|
||||
sections.Diff = "... [diff too large for context window — review manually] ..."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -156,3 +156,48 @@ func TestFit_PreservesNoteInOutput(t *testing.T) {
|
||||
t.Error("expected trimming note in user prompt")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func TestFit_HugeUserMeta(t *testing.T) {
|
||||
// UserMeta so large that base alone exceeds limit
|
||||
// Use a unique marker past the truncation point
|
||||
hugeDesc := strings.Repeat("d", 5000) + "UNIQUE_MARKER_PAST_TRUNCATION" + strings.Repeat("d", 595_000)
|
||||
s := Sections{
|
||||
SystemBase: "base",
|
||||
Diff: "small diff",
|
||||
UserMeta: hugeDesc,
|
||||
}
|
||||
result := Fit("gpt-4.1", s)
|
||||
|
||||
limit := LimitForModel("gpt-4.1") - reserveTokens
|
||||
if result.EstTokens > limit {
|
||||
t.Errorf("EstTokens %d exceeds limit %d", result.EstTokens, limit)
|
||||
}
|
||||
// Content past truncation point should not be present
|
||||
if strings.Contains(result.UserPrompt, "UNIQUE_MARKER_PAST_TRUNCATION") {
|
||||
t.Error("expected UserMeta to be truncated but found content past truncation point")
|
||||
}
|
||||
// Truncation marker should be present
|
||||
if !strings.Contains(result.UserPrompt, "[description truncated]") {
|
||||
t.Error("expected truncation marker in output")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFit_NeverExceedsLimit(t *testing.T) {
|
||||
// All sections huge — verify final tokens never exceed limit
|
||||
big := strings.Repeat("a", 200_000)
|
||||
s := Sections{
|
||||
SystemBase: strings.Repeat("s", 8000),
|
||||
Patterns: big,
|
||||
Conventions: big,
|
||||
FileContext: big,
|
||||
Diff: big,
|
||||
UserMeta: strings.Repeat("m", 8000),
|
||||
}
|
||||
result := Fit("gpt-4.1", s)
|
||||
|
||||
limit := LimitForModel("gpt-4.1") - reserveTokens
|
||||
if result.EstTokens > limit {
|
||||
t.Errorf("EstTokens %d exceeds limit %d (trimmed: %v)", result.EstTokens, limit, result.Trimmed)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user