fix: address review findings on budget system
- Account for truncation marker tokens when computing diff budget (prevents EstTokens exceeding model limit in edge cases) - Rune-safe truncation for both UserMeta and Diff (no split multi-byte) - Fix misleading comment (1000 chars → ~1000 tokens/4000 chars) - Extract marker strings as constants - Add unit tests for BuildSystemBase and BuildUserMeta
This commit is contained in:
+34
-5
@@ -33,6 +33,10 @@ const defaultLimit = 128_000
|
||||
// reserveTokens is headroom for the response generation.
|
||||
const reserveTokens = 4_000
|
||||
|
||||
const diffTruncMarker = "\n\n... [diff truncated due to context limit] ..."
|
||||
const diffTooLargeMarker = "... [diff too large for context window — review manually] ..."
|
||||
const userMetaTruncMarker = "\n... [description truncated] ..."
|
||||
|
||||
// EstimateTokens estimates the number of tokens in a string.
|
||||
// Uses the rough heuristic of ~4 characters per token, which is
|
||||
// conservative for English text and code.
|
||||
@@ -79,9 +83,9 @@ func Fit(model string, sections Sections) Result {
|
||||
baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
|
||||
available := limit - baseTokens
|
||||
if available < 0 {
|
||||
// Base content alone exceeds budget. Truncate UserMeta (keep first 1000 chars).
|
||||
// Base content alone exceeds budget. Truncate UserMeta (keep first ~1000 tokens).
|
||||
if len(sections.UserMeta) > 4000 {
|
||||
sections.UserMeta = sections.UserMeta[:4000] + "\n... [description truncated] ..."
|
||||
sections.UserMeta = truncateUTF8(sections.UserMeta, 4000) + userMetaTruncMarker
|
||||
baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
|
||||
available = limit - baseTokens
|
||||
}
|
||||
@@ -138,14 +142,20 @@ func Fit(model string, sections Sections) Result {
|
||||
if diffBudget < 0 {
|
||||
diffBudget = 0
|
||||
}
|
||||
maxChars := diffBudget * 4
|
||||
// Reserve space for truncation marker
|
||||
markerBudget := EstimateTokens(diffTruncMarker)
|
||||
effectiveBudget := diffBudget - markerBudget
|
||||
if effectiveBudget < 0 {
|
||||
effectiveBudget = 0
|
||||
}
|
||||
maxChars := effectiveBudget * 4
|
||||
if maxChars < len(sections.Diff) {
|
||||
removed := EstimateTokens(sections.Diff) - diffBudget
|
||||
trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
|
||||
if maxChars > 0 {
|
||||
sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
|
||||
sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker
|
||||
} else {
|
||||
sections.Diff = "... [diff too large for context window — review manually] ..."
|
||||
sections.Diff = diffTooLargeMarker
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -195,3 +205,22 @@ func buildResult(s Sections, trimmed []string, estTokens int) Result {
|
||||
EstTokens: estTokens,
|
||||
}
|
||||
}
|
||||
|
||||
// truncateUTF8 truncates s to at most maxBytes without splitting multi-byte
|
||||
// UTF-8 characters. Returns a valid UTF-8 string of at most maxBytes bytes.
|
||||
func truncateUTF8(s string, maxBytes int) string {
|
||||
if len(s) <= maxBytes {
|
||||
return s
|
||||
}
|
||||
// Walk backwards from maxBytes to find a valid UTF-8 boundary
|
||||
for maxBytes > 0 && !isUTF8Start(s[maxBytes]) {
|
||||
maxBytes--
|
||||
}
|
||||
return s[:maxBytes]
|
||||
}
|
||||
|
||||
// isUTF8Start returns true if b is a valid start byte for a UTF-8 sequence
|
||||
// (single-byte ASCII or multi-byte lead byte, not a continuation byte).
|
||||
func isUTF8Start(b byte) bool {
|
||||
return b&0xC0 != 0x80
|
||||
}
|
||||
|
||||
@@ -116,3 +116,43 @@ func TestBuildUserPrompt_WithoutFileContext(t *testing.T) {
|
||||
t.Error("should not include file context section when empty")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func TestBuildSystemBase(t *testing.T) {
|
||||
result := BuildSystemBase()
|
||||
if result == "" {
|
||||
t.Fatal("BuildSystemBase returned empty string")
|
||||
}
|
||||
if !strings.Contains(result, "expert code reviewer") {
|
||||
t.Error("expected reviewer role in system base")
|
||||
}
|
||||
if !strings.Contains(result, "REQUEST_CHANGES") {
|
||||
t.Error("expected verdict format in system base")
|
||||
}
|
||||
if !strings.Contains(result, "JSON") {
|
||||
t.Error("expected JSON output instruction in system base")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildUserMeta(t *testing.T) {
|
||||
result := BuildUserMeta("Fix bug", "Some description", true, "all checks passed")
|
||||
if !strings.Contains(result, "Fix bug") {
|
||||
t.Error("expected title in user meta")
|
||||
}
|
||||
if !strings.Contains(result, "Some description") {
|
||||
t.Error("expected description in user meta")
|
||||
}
|
||||
if !strings.Contains(result, "PASSED") {
|
||||
t.Error("expected CI PASSED status")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildUserMeta_CIFailed(t *testing.T) {
|
||||
result := BuildUserMeta("Title", "", false, "test job failed")
|
||||
if !strings.Contains(result, "FAILED") {
|
||||
t.Error("expected CI FAILED status")
|
||||
}
|
||||
if strings.Contains(result, "Description") {
|
||||
t.Error("expected no description section when empty")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user