60c6bd9f49
PR Ready Gate / clear-labels (pull_request) Successful in 1s
CI / test (pull_request) Successful in 17s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 48s
CI / review (gpt-5, security, ., rodin/security-patterns, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 1m52s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Failing after 2m8s
- budget/budget_test.go: add TestFit_DesignDocsInSystemPrompt, TestFit_DesignDocsTrimmedBeforeFileContext, TestFit_DesignDocsEmptyNoHeading to cover the new DesignDocs section through Fit() and buildResult() - Remove PLAN-137.md (contained raw thinking stream, not suitable as repo doc) - Add docs/DESIGN-137-doc-map.md with clean architectural decision record
272 lines
8.0 KiB
Go
272 lines
8.0 KiB
Go
package budget
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
)
|
|
|
|
func TestEstimateTokens(t *testing.T) {
|
|
tests := []struct {
|
|
input string
|
|
want int
|
|
}{
|
|
{"", 0},
|
|
{"abcd", 1},
|
|
{"12345678", 2},
|
|
{strings.Repeat("x", 400), 100},
|
|
}
|
|
for _, tt := range tests {
|
|
got := EstimateTokens(tt.input)
|
|
if got != tt.want {
|
|
t.Errorf("EstimateTokens(%d chars) = %d, want %d", len(tt.input), got, tt.want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestLimitForModel(t *testing.T) {
|
|
tests := []struct {
|
|
model string
|
|
want int
|
|
}{
|
|
{"gpt-4.1", 128_000},
|
|
{"gpt-5", 200_000},
|
|
{"gpt-5-mini", 200_000},
|
|
{"unknown-model", defaultLimit},
|
|
{"gpt-4.1-2026-01-01", 128_000}, // prefix match
|
|
}
|
|
for _, tt := range tests {
|
|
got := LimitForModel(tt.model)
|
|
if got != tt.want {
|
|
t.Errorf("LimitForModel(%q) = %d, want %d", tt.model, got, tt.want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestFit_AllFits(t *testing.T) {
|
|
s := Sections{
|
|
SystemBase: "system instructions",
|
|
Patterns: "some patterns",
|
|
Conventions: "some conventions",
|
|
FileContext: "file content",
|
|
Diff: "diff content",
|
|
UserMeta: "PR: title\n",
|
|
}
|
|
result := Fit("gpt-5", s)
|
|
|
|
if len(result.Trimmed) != 0 {
|
|
t.Errorf("expected no trimming, got %v", result.Trimmed)
|
|
}
|
|
if !strings.Contains(result.SystemPrompt, "some patterns") {
|
|
t.Error("expected patterns in system prompt")
|
|
}
|
|
if !strings.Contains(result.SystemPrompt, "some conventions") {
|
|
t.Error("expected conventions in system prompt")
|
|
}
|
|
if !strings.Contains(result.UserPrompt, "file content") {
|
|
t.Error("expected file context in user prompt")
|
|
}
|
|
}
|
|
|
|
func TestFit_TrimsPatterns(t *testing.T) {
|
|
// Create content that exceeds 128K token budget for gpt-4.1
|
|
// Budget ≈ 128K - 4K reserve = 124K tokens = ~496K chars
|
|
// Fill patterns with enough to push over
|
|
bigPatterns := strings.Repeat("x", 500_000) // ~125K tokens
|
|
s := Sections{
|
|
SystemBase: "base",
|
|
Patterns: bigPatterns,
|
|
Conventions: "conventions",
|
|
FileContext: "files",
|
|
Diff: "diff",
|
|
UserMeta: "meta",
|
|
}
|
|
result := Fit("gpt-4.1", s)
|
|
|
|
if len(result.Trimmed) == 0 {
|
|
t.Fatal("expected trimming")
|
|
}
|
|
if !strings.Contains(result.Trimmed[0], "patterns") {
|
|
t.Errorf("expected patterns to be trimmed first, got %v", result.Trimmed)
|
|
}
|
|
if strings.Contains(result.SystemPrompt, bigPatterns[:100]) {
|
|
t.Error("expected patterns to be removed from output")
|
|
}
|
|
// Conventions should survive
|
|
if !strings.Contains(result.SystemPrompt, "conventions") {
|
|
t.Error("expected conventions to survive after patterns trimmed")
|
|
}
|
|
}
|
|
|
|
func TestFit_TrimsConventions(t *testing.T) {
|
|
// Patterns + conventions + diff all exceed budget even after patterns removed
|
|
big := strings.Repeat("y", 520_000) // ~130K tokens each (exceeds 124K budget even alone)
|
|
s := Sections{
|
|
SystemBase: "base",
|
|
Patterns: big,
|
|
Conventions: big,
|
|
FileContext: "files",
|
|
Diff: "diff",
|
|
UserMeta: "meta",
|
|
}
|
|
result := Fit("gpt-4.1", s)
|
|
|
|
if len(result.Trimmed) < 2 {
|
|
t.Fatalf("expected at least 2 trimmed, got %v", result.Trimmed)
|
|
}
|
|
if !strings.Contains(result.Trimmed[0], "patterns") {
|
|
t.Errorf("expected patterns trimmed first, got %s", result.Trimmed[0])
|
|
}
|
|
if !strings.Contains(result.Trimmed[1], "conventions") {
|
|
t.Errorf("expected conventions trimmed second, got %s", result.Trimmed[1])
|
|
}
|
|
}
|
|
|
|
func TestFit_TruncatesDiff(t *testing.T) {
|
|
// Only diff is huge, no patterns/conventions
|
|
hugeDiff := strings.Repeat("z", 600_000) // ~150K tokens > 128K limit
|
|
s := Sections{
|
|
SystemBase: "base",
|
|
Diff: hugeDiff,
|
|
UserMeta: "meta",
|
|
}
|
|
result := Fit("gpt-4.1", s)
|
|
|
|
if len(result.Trimmed) == 0 {
|
|
t.Fatal("expected diff truncation")
|
|
}
|
|
if !strings.Contains(result.Trimmed[len(result.Trimmed)-1], "diff truncated") {
|
|
t.Errorf("expected diff truncation note, got %v", result.Trimmed)
|
|
}
|
|
if !strings.Contains(result.UserPrompt, "[diff truncated due to context limit]") {
|
|
t.Error("expected truncation marker in user prompt")
|
|
}
|
|
}
|
|
|
|
func TestFit_PreservesNoteInOutput(t *testing.T) {
|
|
big := strings.Repeat("w", 500_000)
|
|
s := Sections{
|
|
SystemBase: "base",
|
|
Patterns: big,
|
|
Diff: "small diff",
|
|
UserMeta: "meta",
|
|
}
|
|
result := Fit("gpt-4.1", s)
|
|
|
|
if !strings.Contains(result.UserPrompt, "⚠️ Note: Context was trimmed") {
|
|
t.Error("expected trimming note in user prompt")
|
|
}
|
|
}
|
|
|
|
func TestFit_HugeUserMeta(t *testing.T) {
|
|
// UserMeta so large that base alone exceeds limit
|
|
// Use a unique marker past the truncation point
|
|
hugeDesc := strings.Repeat("d", 5000) + "UNIQUE_MARKER_PAST_TRUNCATION" + strings.Repeat("d", 595_000)
|
|
s := Sections{
|
|
SystemBase: "base",
|
|
Diff: "small diff",
|
|
UserMeta: hugeDesc,
|
|
}
|
|
result := Fit("gpt-4.1", s)
|
|
|
|
limit := LimitForModel("gpt-4.1") - reserveTokens
|
|
if result.EstTokens > limit {
|
|
t.Errorf("EstTokens %d exceeds limit %d", result.EstTokens, limit)
|
|
}
|
|
// Content past truncation point should not be present
|
|
if strings.Contains(result.UserPrompt, "UNIQUE_MARKER_PAST_TRUNCATION") {
|
|
t.Error("expected UserMeta to be truncated but found content past truncation point")
|
|
}
|
|
// Truncation marker should be present
|
|
if !strings.Contains(result.UserPrompt, "[description truncated]") {
|
|
t.Error("expected truncation marker in output")
|
|
}
|
|
}
|
|
|
|
func TestFit_NeverExceedsLimit(t *testing.T) {
|
|
// All sections huge — verify final tokens never exceed limit
|
|
big := strings.Repeat("a", 200_000)
|
|
s := Sections{
|
|
SystemBase: strings.Repeat("s", 8000),
|
|
Patterns: big,
|
|
Conventions: big,
|
|
FileContext: big,
|
|
Diff: big,
|
|
UserMeta: strings.Repeat("m", 8000),
|
|
}
|
|
result := Fit("gpt-4.1", s)
|
|
|
|
limit := LimitForModel("gpt-4.1") - reserveTokens
|
|
if result.EstTokens > limit {
|
|
t.Errorf("EstTokens %d exceeds limit %d (trimmed: %v)", result.EstTokens, limit, result.Trimmed)
|
|
}
|
|
}
|
|
|
|
// TestFit_DesignDocsInSystemPrompt verifies that DesignDocs content appears in the
|
|
// system prompt under the expected heading.
|
|
func TestFit_DesignDocsInSystemPrompt(t *testing.T) {
|
|
s := Sections{
|
|
SystemBase: "base instructions",
|
|
DesignDocs: "# Foo Design\n\nSome design content.",
|
|
Diff: "diff content",
|
|
UserMeta: "PR meta",
|
|
}
|
|
result := Fit("gpt-4.1", s)
|
|
|
|
if !strings.Contains(result.SystemPrompt, "## Design Documents") {
|
|
t.Errorf("expected ## Design Documents heading in system prompt, got:\n%s", result.SystemPrompt)
|
|
}
|
|
if !strings.Contains(result.SystemPrompt, "# Foo Design") {
|
|
t.Errorf("expected design doc content in system prompt, got:\n%s", result.SystemPrompt)
|
|
}
|
|
// Sanity: design docs should NOT appear in user prompt.
|
|
if strings.Contains(result.UserPrompt, "## Design Documents") {
|
|
t.Errorf("design docs heading should not be in user prompt, got:\n%s", result.UserPrompt)
|
|
}
|
|
}
|
|
|
|
// TestFit_DesignDocsTrimmedBeforeFileContext verifies trim ordering:
|
|
// DesignDocs is trimmed (third) before FileContext (fourth), after Conventions.
|
|
func TestFit_DesignDocsTrimmedBeforeFileContext(t *testing.T) {
|
|
// Fill budget so design docs and file context can't both fit.
|
|
// gpt-4.1 limit = 128_000 - 4_000 = 124_000 tokens.
|
|
// SystemBase = 480_000 bytes ≈ 120_000 tokens → leaves ~4_000 tokens.
|
|
// Diff = 8_000 bytes ≈ 2_000 tokens.
|
|
// DesignDocs = 20_000 bytes ≈ 5_000 tokens → exceeds remaining 2_000.
|
|
// Expected: DesignDocs trimmed; FileContext (very small) survives.
|
|
s := Sections{
|
|
SystemBase: strings.Repeat("s", 480_000),
|
|
DesignDocs: strings.Repeat("d", 20_000),
|
|
FileContext: "important_file_context",
|
|
Diff: strings.Repeat("x", 8_000),
|
|
UserMeta: "PR meta",
|
|
}
|
|
result := Fit("gpt-4.1", s)
|
|
|
|
found := false
|
|
for _, item := range result.Trimmed {
|
|
if strings.HasPrefix(item, "design docs") {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
t.Errorf("expected 'design docs' in trimmed list, got: %v", result.Trimmed)
|
|
}
|
|
}
|
|
|
|
// TestFit_DesignDocsEmptyNoHeading verifies that an empty DesignDocs field
|
|
// does not inject the ## Design Documents heading into the system prompt.
|
|
func TestFit_DesignDocsEmptyNoHeading(t *testing.T) {
|
|
s := Sections{
|
|
SystemBase: "base",
|
|
DesignDocs: "",
|
|
Diff: "diff",
|
|
UserMeta: "meta",
|
|
}
|
|
result := Fit("gpt-4.1", s)
|
|
|
|
if strings.Contains(result.SystemPrompt, "## Design Documents") {
|
|
t.Errorf("empty DesignDocs should not inject heading, got:\n%s", result.SystemPrompt)
|
|
}
|
|
}
|