Files
claw 60c6bd9f49
PR Ready Gate / clear-labels (pull_request) Successful in 1s
CI / test (pull_request) Successful in 17s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 48s
CI / review (gpt-5, security, ., rodin/security-patterns, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 1m52s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Failing after 2m8s
test(budget): add DesignDocs tests; replace PLAN-137 with clean design doc
- budget/budget_test.go: add TestFit_DesignDocsInSystemPrompt,
  TestFit_DesignDocsTrimmedBeforeFileContext, TestFit_DesignDocsEmptyNoHeading
  to cover the new DesignDocs section through Fit() and buildResult()
- Remove PLAN-137.md (contained raw thinking stream, not suitable as repo doc)
- Add docs/DESIGN-137-doc-map.md with clean architectural decision record
2026-05-14 20:36:22 -07:00

272 lines
8.0 KiB
Go

package budget
import (
"strings"
"testing"
)
func TestEstimateTokens(t *testing.T) {
tests := []struct {
input string
want int
}{
{"", 0},
{"abcd", 1},
{"12345678", 2},
{strings.Repeat("x", 400), 100},
}
for _, tt := range tests {
got := EstimateTokens(tt.input)
if got != tt.want {
t.Errorf("EstimateTokens(%d chars) = %d, want %d", len(tt.input), got, tt.want)
}
}
}
func TestLimitForModel(t *testing.T) {
tests := []struct {
model string
want int
}{
{"gpt-4.1", 128_000},
{"gpt-5", 200_000},
{"gpt-5-mini", 200_000},
{"unknown-model", defaultLimit},
{"gpt-4.1-2026-01-01", 128_000}, // prefix match
}
for _, tt := range tests {
got := LimitForModel(tt.model)
if got != tt.want {
t.Errorf("LimitForModel(%q) = %d, want %d", tt.model, got, tt.want)
}
}
}
func TestFit_AllFits(t *testing.T) {
s := Sections{
SystemBase: "system instructions",
Patterns: "some patterns",
Conventions: "some conventions",
FileContext: "file content",
Diff: "diff content",
UserMeta: "PR: title\n",
}
result := Fit("gpt-5", s)
if len(result.Trimmed) != 0 {
t.Errorf("expected no trimming, got %v", result.Trimmed)
}
if !strings.Contains(result.SystemPrompt, "some patterns") {
t.Error("expected patterns in system prompt")
}
if !strings.Contains(result.SystemPrompt, "some conventions") {
t.Error("expected conventions in system prompt")
}
if !strings.Contains(result.UserPrompt, "file content") {
t.Error("expected file context in user prompt")
}
}
func TestFit_TrimsPatterns(t *testing.T) {
// Create content that exceeds 128K token budget for gpt-4.1
// Budget ≈ 128K - 4K reserve = 124K tokens = ~496K chars
// Fill patterns with enough to push over
bigPatterns := strings.Repeat("x", 500_000) // ~125K tokens
s := Sections{
SystemBase: "base",
Patterns: bigPatterns,
Conventions: "conventions",
FileContext: "files",
Diff: "diff",
UserMeta: "meta",
}
result := Fit("gpt-4.1", s)
if len(result.Trimmed) == 0 {
t.Fatal("expected trimming")
}
if !strings.Contains(result.Trimmed[0], "patterns") {
t.Errorf("expected patterns to be trimmed first, got %v", result.Trimmed)
}
if strings.Contains(result.SystemPrompt, bigPatterns[:100]) {
t.Error("expected patterns to be removed from output")
}
// Conventions should survive
if !strings.Contains(result.SystemPrompt, "conventions") {
t.Error("expected conventions to survive after patterns trimmed")
}
}
func TestFit_TrimsConventions(t *testing.T) {
// Patterns + conventions + diff all exceed budget even after patterns removed
big := strings.Repeat("y", 520_000) // ~130K tokens each (exceeds 124K budget even alone)
s := Sections{
SystemBase: "base",
Patterns: big,
Conventions: big,
FileContext: "files",
Diff: "diff",
UserMeta: "meta",
}
result := Fit("gpt-4.1", s)
if len(result.Trimmed) < 2 {
t.Fatalf("expected at least 2 trimmed, got %v", result.Trimmed)
}
if !strings.Contains(result.Trimmed[0], "patterns") {
t.Errorf("expected patterns trimmed first, got %s", result.Trimmed[0])
}
if !strings.Contains(result.Trimmed[1], "conventions") {
t.Errorf("expected conventions trimmed second, got %s", result.Trimmed[1])
}
}
func TestFit_TruncatesDiff(t *testing.T) {
// Only diff is huge, no patterns/conventions
hugeDiff := strings.Repeat("z", 600_000) // ~150K tokens > 128K limit
s := Sections{
SystemBase: "base",
Diff: hugeDiff,
UserMeta: "meta",
}
result := Fit("gpt-4.1", s)
if len(result.Trimmed) == 0 {
t.Fatal("expected diff truncation")
}
if !strings.Contains(result.Trimmed[len(result.Trimmed)-1], "diff truncated") {
t.Errorf("expected diff truncation note, got %v", result.Trimmed)
}
if !strings.Contains(result.UserPrompt, "[diff truncated due to context limit]") {
t.Error("expected truncation marker in user prompt")
}
}
func TestFit_PreservesNoteInOutput(t *testing.T) {
big := strings.Repeat("w", 500_000)
s := Sections{
SystemBase: "base",
Patterns: big,
Diff: "small diff",
UserMeta: "meta",
}
result := Fit("gpt-4.1", s)
if !strings.Contains(result.UserPrompt, "⚠️ Note: Context was trimmed") {
t.Error("expected trimming note in user prompt")
}
}
func TestFit_HugeUserMeta(t *testing.T) {
// UserMeta so large that base alone exceeds limit
// Use a unique marker past the truncation point
hugeDesc := strings.Repeat("d", 5000) + "UNIQUE_MARKER_PAST_TRUNCATION" + strings.Repeat("d", 595_000)
s := Sections{
SystemBase: "base",
Diff: "small diff",
UserMeta: hugeDesc,
}
result := Fit("gpt-4.1", s)
limit := LimitForModel("gpt-4.1") - reserveTokens
if result.EstTokens > limit {
t.Errorf("EstTokens %d exceeds limit %d", result.EstTokens, limit)
}
// Content past truncation point should not be present
if strings.Contains(result.UserPrompt, "UNIQUE_MARKER_PAST_TRUNCATION") {
t.Error("expected UserMeta to be truncated but found content past truncation point")
}
// Truncation marker should be present
if !strings.Contains(result.UserPrompt, "[description truncated]") {
t.Error("expected truncation marker in output")
}
}
func TestFit_NeverExceedsLimit(t *testing.T) {
// All sections huge — verify final tokens never exceed limit
big := strings.Repeat("a", 200_000)
s := Sections{
SystemBase: strings.Repeat("s", 8000),
Patterns: big,
Conventions: big,
FileContext: big,
Diff: big,
UserMeta: strings.Repeat("m", 8000),
}
result := Fit("gpt-4.1", s)
limit := LimitForModel("gpt-4.1") - reserveTokens
if result.EstTokens > limit {
t.Errorf("EstTokens %d exceeds limit %d (trimmed: %v)", result.EstTokens, limit, result.Trimmed)
}
}
// TestFit_DesignDocsInSystemPrompt verifies that DesignDocs content appears in the
// system prompt under the expected heading.
func TestFit_DesignDocsInSystemPrompt(t *testing.T) {
s := Sections{
SystemBase: "base instructions",
DesignDocs: "# Foo Design\n\nSome design content.",
Diff: "diff content",
UserMeta: "PR meta",
}
result := Fit("gpt-4.1", s)
if !strings.Contains(result.SystemPrompt, "## Design Documents") {
t.Errorf("expected ## Design Documents heading in system prompt, got:\n%s", result.SystemPrompt)
}
if !strings.Contains(result.SystemPrompt, "# Foo Design") {
t.Errorf("expected design doc content in system prompt, got:\n%s", result.SystemPrompt)
}
// Sanity: design docs should NOT appear in user prompt.
if strings.Contains(result.UserPrompt, "## Design Documents") {
t.Errorf("design docs heading should not be in user prompt, got:\n%s", result.UserPrompt)
}
}
// TestFit_DesignDocsTrimmedBeforeFileContext verifies trim ordering:
// DesignDocs is trimmed (third) before FileContext (fourth), after Conventions.
func TestFit_DesignDocsTrimmedBeforeFileContext(t *testing.T) {
// Fill budget so design docs and file context can't both fit.
// gpt-4.1 limit = 128_000 - 4_000 = 124_000 tokens.
// SystemBase = 480_000 bytes ≈ 120_000 tokens → leaves ~4_000 tokens.
// Diff = 8_000 bytes ≈ 2_000 tokens.
// DesignDocs = 20_000 bytes ≈ 5_000 tokens → exceeds remaining 2_000.
// Expected: DesignDocs trimmed; FileContext (very small) survives.
s := Sections{
SystemBase: strings.Repeat("s", 480_000),
DesignDocs: strings.Repeat("d", 20_000),
FileContext: "important_file_context",
Diff: strings.Repeat("x", 8_000),
UserMeta: "PR meta",
}
result := Fit("gpt-4.1", s)
found := false
for _, item := range result.Trimmed {
if strings.HasPrefix(item, "design docs") {
found = true
break
}
}
if !found {
t.Errorf("expected 'design docs' in trimmed list, got: %v", result.Trimmed)
}
}
// TestFit_DesignDocsEmptyNoHeading verifies that an empty DesignDocs field
// does not inject the ## Design Documents heading into the system prompt.
func TestFit_DesignDocsEmptyNoHeading(t *testing.T) {
s := Sections{
SystemBase: "base",
DesignDocs: "",
Diff: "diff",
UserMeta: "meta",
}
result := Fit("gpt-4.1", s)
if strings.Contains(result.SystemPrompt, "## Design Documents") {
t.Errorf("empty DesignDocs should not inject heading, got:\n%s", result.SystemPrompt)
}
}