// Package budget manages LLM context window budgeting for review-bot. // // It estimates token usage and progressively trims context content to fit // within model-specific limits. The trimming order (least important first): // patterns → conventions → file context → diff truncation. package budget import ( "fmt" "strings" "unicode/utf8" ) // modelLimit pairs a model name prefix with its context window size. type modelLimit struct { prefix string limit int } // Known model context limits (in tokens), ordered longest-prefix-first // for deterministic matching. var modelLimits = []modelLimit{ {"claude-haiku-3.5-20241022", 200_000}, {"claude-sonnet-4-20250514", 200_000}, {"claude-opus-4-20250514", 200_000}, {"gpt-4.1-mini", 128_000}, {"gpt-5-mini", 200_000}, {"gpt-4.1", 128_000}, {"gpt-5", 200_000}, } const defaultLimit = 128_000 // reserveTokens is headroom for the response generation. const reserveTokens = 4_000 const diffTruncMarker = "\n\n... [diff truncated due to context limit] ..." const diffTooLargeMarker = "... [diff too large for context window — review manually] ..." const userMetaTruncMarker = "\n... [description truncated] ..." // EstimateTokens estimates the number of tokens in a string. // Uses the rough heuristic of ~4 bytes per token, which is // conservative for English text and code. func EstimateTokens(s string) int { return len(s) / 4 } // LimitForModel returns the context window size for the given model. // Uses longest-prefix-first matching for deterministic results. func LimitForModel(model string) int { for _, ml := range modelLimits { if model == ml.prefix || strings.HasPrefix(model, ml.prefix) { return ml.limit } } return defaultLimit } // Sections holds the prompt content sections in trim priority order. // When the total exceeds the budget, sections are trimmed from least // important (Patterns) to most important (Diff). type Sections struct { SystemBase string // Core instructions (never trimmed) Patterns string // Language patterns (trimmed first) Conventions string // Repo conventions (trimmed second) DesignDocs string // Path-scoped design documents (trimmed third) FileContext string // Full file content (trimmed fourth) Diff string // The actual diff (trimmed last, only truncated) UserMeta string // PR title, description, CI status (truncated only if base exceeds budget) } // Result holds the trimmed content and metadata about what was dropped. type Result struct { SystemPrompt string UserPrompt string Trimmed []string // Human-readable descriptions of what was trimmed EstTokens int // Estimated total tokens after trimming } // Fit trims sections to fit within the model's context limit. // Returns the assembled prompts and a list of what was trimmed. func Fit(model string, sections Sections) Result { limit := LimitForModel(model) - reserveTokens baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta) available := limit - baseTokens if available < 0 { // Base content alone exceeds budget. Truncate UserMeta (keep first ~1000 tokens). if len(sections.UserMeta) > 4000 { sections.UserMeta = truncateUTF8(sections.UserMeta, 4000) + userMetaTruncMarker baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta) available = limit - baseTokens } if available < 0 { available = 0 } } // Trimmable sections in priority order (first = dropped first) type entry struct { name string content *string } entries := []entry{ {"patterns", §ions.Patterns}, {"conventions", §ions.Conventions}, {"design docs", §ions.DesignDocs}, {"file context", §ions.FileContext}, } // Check if everything fits totalTrimmable := EstimateTokens(sections.Diff) for _, e := range entries { totalTrimmable += EstimateTokens(*e.content) } var trimmed []string if totalTrimmable > available { // Trim from least important for i := range entries { tokens := EstimateTokens(*entries[i].content) if tokens == 0 { continue } trimmed = append(trimmed, fmt.Sprintf("%s (~%dK tokens)", entries[i].name, tokens/1000)) *entries[i].content = "" // Recalculate totalTrimmable = EstimateTokens(sections.Diff) for _, e := range entries { totalTrimmable += EstimateTokens(*e.content) } if totalTrimmable <= available { break } } } // If still too large, truncate the diff if totalTrimmable > available { diffBudget := available for _, e := range entries { diffBudget -= EstimateTokens(*e.content) } if diffBudget < 0 { diffBudget = 0 } // Reserve space for truncation marker markerBudget := EstimateTokens(diffTruncMarker) effectiveBudget := diffBudget - markerBudget if effectiveBudget < 0 { effectiveBudget = 0 } maxChars := effectiveBudget * 4 if maxChars < len(sections.Diff) { removed := EstimateTokens(sections.Diff) - diffBudget trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000)) if maxChars > 0 { if diffBudget >= markerBudget { sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker } else { sections.Diff = truncateUTF8(sections.Diff, maxChars) } } else { sections.Diff = diffTooLargeMarker } } } finalTokens := baseTokens for _, e := range entries { finalTokens += EstimateTokens(*e.content) } finalTokens += EstimateTokens(sections.Diff) return buildResult(sections, trimmed, finalTokens) } func buildResult(s Sections, trimmed []string, estTokens int) Result { var sys strings.Builder sys.WriteString(s.SystemBase) if s.Patterns != "" { sys.WriteString("\n\n## Language Patterns & Idioms\n\nUse the following patterns as review criteria. Code that violates these established patterns is a finding:\n\n") sys.WriteString(s.Patterns) } if s.Conventions != "" { sys.WriteString("\n\n## Repository Conventions\n\nThe repository has the following coding conventions that must be respected:\n\n") sys.WriteString(s.Conventions) } if s.DesignDocs != "" { sys.WriteString("\n\n## Design Documents\n\nThe following design documents govern the changed code. Review the diff for adherence:\n\n") sys.WriteString(s.DesignDocs) } var usr strings.Builder usr.WriteString(s.UserMeta) if s.FileContext != "" { usr.WriteString("\n### Full File Context (modified files)\n\n") usr.WriteString(s.FileContext) usr.WriteString("\n") } if s.Diff != "" { usr.WriteString("\n### Diff (changes to review)\n\n```diff\n") usr.WriteString(s.Diff) usr.WriteString("\n```\n") } if len(trimmed) > 0 { usr.WriteString("\n⚠️ Note: Context was trimmed to fit model limits. Dropped: ") usr.WriteString(strings.Join(trimmed, ", ")) usr.WriteString("\n") } return Result{ SystemPrompt: sys.String(), UserPrompt: usr.String(), Trimmed: trimmed, EstTokens: estTokens, } } // truncateUTF8 truncates s to at most maxBytes without splitting multi-byte // UTF-8 characters. Returns a valid UTF-8 string of at most maxBytes bytes. func truncateUTF8(s string, maxBytes int) string { if len(s) <= maxBytes { return s } for maxBytes > 0 && !utf8.RuneStart(s[maxBytes]) { maxBytes-- } return s[:maxBytes] }