review-bot/budget/budget.go

// Package budget manages LLM context window budgeting for review-bot.
//
// It estimates token usage and progressively trims context content to fit
// within model-specific limits. The trimming order (least important first):
// patterns → conventions → file context → diff truncation.
package budget

import (
	"fmt"
	"strings"
)

// modelLimit pairs a model name prefix with its context window size.
type modelLimit struct {
	prefix string
	limit  int
}

// Known model context limits (in tokens), ordered longest-prefix-first
// for deterministic matching.
var modelLimits = []modelLimit{
	{"claude-haiku-3.5-20241022", 200_000},
	{"claude-sonnet-4-20250514", 200_000},
	{"claude-opus-4-20250514", 200_000},
	{"gpt-4.1-mini", 128_000},
	{"gpt-5-mini", 200_000},
	{"gpt-4.1", 128_000},
	{"gpt-5", 200_000},
}

const defaultLimit = 128_000

// reserveTokens is headroom for the response generation.
const reserveTokens = 4_000

// EstimateTokens estimates the number of tokens in a string.
// Uses the rough heuristic of ~4 characters per token, which is
// conservative for English text and code.
func EstimateTokens(s string) int {
	return len(s) / 4
}

// LimitForModel returns the context window size for the given model.
// Uses longest-prefix-first matching for deterministic results.
func LimitForModel(model string) int {
	for _, ml := range modelLimits {
		if model == ml.prefix || strings.HasPrefix(model, ml.prefix) {
			return ml.limit
		}
	}
	return defaultLimit
}

// Sections holds the prompt content sections in trim priority order.
// When the total exceeds the budget, sections are trimmed from least
// important (Patterns) to most important (Diff).
type Sections struct {
	SystemBase  string // Core instructions (never trimmed)
	Patterns    string // Language patterns (trimmed first)
	Conventions string // Repo conventions (trimmed second)
	FileContext string // Full file content (trimmed third)
	Diff        string // The actual diff (trimmed last, only truncated)
	UserMeta    string // PR title, description, CI status (never trimmed)
}

// Result holds the trimmed content and metadata about what was dropped.
type Result struct {
	SystemPrompt string
	UserPrompt   string
	Trimmed      []string // Human-readable descriptions of what was trimmed
	EstTokens    int      // Estimated total tokens after trimming
}

// Fit trims sections to fit within the model's context limit.
// Returns the assembled prompts and a list of what was trimmed.
func Fit(model string, sections Sections) Result {
	limit := LimitForModel(model) - reserveTokens

	baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
	available := limit - baseTokens
	if available < 0 {
		// Base content alone exceeds budget. Truncate UserMeta (keep first 1000 chars).
		if len(sections.UserMeta) > 4000 {
			sections.UserMeta = sections.UserMeta[:4000] + "\n... [description truncated] ..."
			baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
			available = limit - baseTokens
		}
		if available < 0 {
			available = 0
		}
	}

	// Trimmable sections in priority order (first = dropped first)
	type entry struct {
		name    string
		content *string
	}
	entries := []entry{
		{"patterns", &sections.Patterns},
		{"conventions", &sections.Conventions},
		{"file context", &sections.FileContext},
	}

	// Check if everything fits
	totalTrimmable := EstimateTokens(sections.Diff)
	for _, e := range entries {
		totalTrimmable += EstimateTokens(*e.content)
	}

	var trimmed []string
	if totalTrimmable > available {
		// Trim from least important
		for i := range entries {
			tokens := EstimateTokens(*entries[i].content)
			if tokens == 0 {
				continue
			}
			trimmed = append(trimmed, fmt.Sprintf("%s (~%dK tokens)", entries[i].name, tokens/1000))
			*entries[i].content = ""

			// Recalculate
			totalTrimmable = EstimateTokens(sections.Diff)
			for _, e := range entries {
				totalTrimmable += EstimateTokens(*e.content)
			}
			if totalTrimmable <= available {
				break
			}
		}
	}

	// If still too large, truncate the diff
	if totalTrimmable > available {
		diffBudget := available
		for _, e := range entries {
			diffBudget -= EstimateTokens(*e.content)
		}
		if diffBudget < 0 {
			diffBudget = 0
		}
		maxChars := diffBudget * 4
		if maxChars < len(sections.Diff) {
			removed := EstimateTokens(sections.Diff) - diffBudget
			trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
			if maxChars > 0 {
				sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
			} else {
				sections.Diff = "... [diff too large for context window — review manually] ..."
			}
		}
	}

	finalTokens := baseTokens
	for _, e := range entries {
		finalTokens += EstimateTokens(*e.content)
	}
	finalTokens += EstimateTokens(sections.Diff)

	return buildResult(sections, trimmed, finalTokens)
}

func buildResult(s Sections, trimmed []string, estTokens int) Result {
	var sys strings.Builder
	sys.WriteString(s.SystemBase)
	if s.Patterns != "" {
		sys.WriteString("\n\n## Language Patterns & Idioms\n\nUse the following patterns as review criteria. Code that violates these established patterns is a finding:\n\n")
		sys.WriteString(s.Patterns)
	}
	if s.Conventions != "" {
		sys.WriteString("\n\n## Repository Conventions\n\nThe repository has the following coding conventions that must be respected:\n\n")
		sys.WriteString(s.Conventions)
	}

	var usr strings.Builder
	usr.WriteString(s.UserMeta)
	if s.FileContext != "" {
		usr.WriteString("\n### Full File Context (modified files)\n\n")
		usr.WriteString(s.FileContext)
		usr.WriteString("\n")
	}
	usr.WriteString("\n### Diff (changes to review)\n\n```diff\n")
	usr.WriteString(s.Diff)
	usr.WriteString("\n```\n")

	if len(trimmed) > 0 {
		usr.WriteString("\n⚠️ Note: Context was trimmed to fit model limits. Dropped: ")
		usr.WriteString(strings.Join(trimmed, ", "))
		usr.WriteString("\n")
	}

	return Result{
		SystemPrompt: sys.String(),
		UserPrompt:   usr.String(),
		Trimmed:      trimmed,
		EstTokens:    estTokens,
	}
}