fix: address review findings on budget system
CI / test (pull_request) Successful in 14s
CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 1m41s
CI / review (gpt-5-mini, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 3m2s

- Account for truncation marker tokens when computing diff budget
  (prevents EstTokens exceeding model limit in edge cases)
- Rune-safe truncation for both UserMeta and Diff (no split multi-byte)
- Fix misleading comment (1000 chars → ~1000 tokens/4000 chars)
- Extract marker strings as constants
- Add unit tests for BuildSystemBase and BuildUserMeta
This commit is contained in:
Rodin
2026-05-01 18:59:07 -07:00
parent d9cacf6f62
commit dab7871cb4
2 changed files with 74 additions and 5 deletions
+34 -5
View File
@@ -33,6 +33,10 @@ const defaultLimit = 128_000
// reserveTokens is headroom for the response generation.
const reserveTokens = 4_000
const diffTruncMarker = "\n\n... [diff truncated due to context limit] ..."
const diffTooLargeMarker = "... [diff too large for context window — review manually] ..."
const userMetaTruncMarker = "\n... [description truncated] ..."
// EstimateTokens estimates the number of tokens in a string.
// Uses the rough heuristic of ~4 characters per token, which is
// conservative for English text and code.
@@ -79,9 +83,9 @@ func Fit(model string, sections Sections) Result {
baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
available := limit - baseTokens
if available < 0 {
// Base content alone exceeds budget. Truncate UserMeta (keep first 1000 chars).
// Base content alone exceeds budget. Truncate UserMeta (keep first ~1000 tokens).
if len(sections.UserMeta) > 4000 {
sections.UserMeta = sections.UserMeta[:4000] + "\n... [description truncated] ..."
sections.UserMeta = truncateUTF8(sections.UserMeta, 4000) + userMetaTruncMarker
baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
available = limit - baseTokens
}
@@ -138,14 +142,20 @@ func Fit(model string, sections Sections) Result {
if diffBudget < 0 {
diffBudget = 0
}
maxChars := diffBudget * 4
// Reserve space for truncation marker
markerBudget := EstimateTokens(diffTruncMarker)
effectiveBudget := diffBudget - markerBudget
if effectiveBudget < 0 {
effectiveBudget = 0
}
maxChars := effectiveBudget * 4
if maxChars < len(sections.Diff) {
removed := EstimateTokens(sections.Diff) - diffBudget
trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
if maxChars > 0 {
sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker
} else {
sections.Diff = "... [diff too large for context window — review manually] ..."
sections.Diff = diffTooLargeMarker
}
}
}
@@ -195,3 +205,22 @@ func buildResult(s Sections, trimmed []string, estTokens int) Result {
EstTokens: estTokens,
}
}
// truncateUTF8 truncates s to at most maxBytes without splitting multi-byte
// UTF-8 characters. Returns a valid UTF-8 string of at most maxBytes bytes.
func truncateUTF8(s string, maxBytes int) string {
if len(s) <= maxBytes {
return s
}
// Walk backwards from maxBytes to find a valid UTF-8 boundary
for maxBytes > 0 && !isUTF8Start(s[maxBytes]) {
maxBytes--
}
return s[:maxBytes]
}
// isUTF8Start returns true if b is a valid start byte for a UTF-8 sequence
// (single-byte ASCII or multi-byte lead byte, not a continuation byte).
func isUTF8Start(b byte) bool {
return b&0xC0 != 0x80
}
+40
View File
@@ -116,3 +116,43 @@ func TestBuildUserPrompt_WithoutFileContext(t *testing.T) {
t.Error("should not include file context section when empty")
}
}
func TestBuildSystemBase(t *testing.T) {
result := BuildSystemBase()
if result == "" {
t.Fatal("BuildSystemBase returned empty string")
}
if !strings.Contains(result, "expert code reviewer") {
t.Error("expected reviewer role in system base")
}
if !strings.Contains(result, "REQUEST_CHANGES") {
t.Error("expected verdict format in system base")
}
if !strings.Contains(result, "JSON") {
t.Error("expected JSON output instruction in system base")
}
}
func TestBuildUserMeta(t *testing.T) {
result := BuildUserMeta("Fix bug", "Some description", true, "all checks passed")
if !strings.Contains(result, "Fix bug") {
t.Error("expected title in user meta")
}
if !strings.Contains(result, "Some description") {
t.Error("expected description in user meta")
}
if !strings.Contains(result, "PASSED") {
t.Error("expected CI PASSED status")
}
}
func TestBuildUserMeta_CIFailed(t *testing.T) {
result := BuildUserMeta("Title", "", false, "test job failed")
if !strings.Contains(result, "FAILED") {
t.Error("expected CI FAILED status")
}
if strings.Contains(result, "Description") {
t.Error("expected no description section when empty")
}
}