fix: address review findings on budget system
- Account for truncation marker tokens when computing diff budget (prevents EstTokens exceeding model limit in edge cases) - Rune-safe truncation for both UserMeta and Diff (no split multi-byte) - Fix misleading comment (1000 chars → ~1000 tokens/4000 chars) - Extract marker strings as constants - Add unit tests for BuildSystemBase and BuildUserMeta
This commit is contained in:
+34
-5
@@ -33,6 +33,10 @@ const defaultLimit = 128_000
|
|||||||
// reserveTokens is headroom for the response generation.
|
// reserveTokens is headroom for the response generation.
|
||||||
const reserveTokens = 4_000
|
const reserveTokens = 4_000
|
||||||
|
|
||||||
|
const diffTruncMarker = "\n\n... [diff truncated due to context limit] ..."
|
||||||
|
const diffTooLargeMarker = "... [diff too large for context window — review manually] ..."
|
||||||
|
const userMetaTruncMarker = "\n... [description truncated] ..."
|
||||||
|
|
||||||
// EstimateTokens estimates the number of tokens in a string.
|
// EstimateTokens estimates the number of tokens in a string.
|
||||||
// Uses the rough heuristic of ~4 characters per token, which is
|
// Uses the rough heuristic of ~4 characters per token, which is
|
||||||
// conservative for English text and code.
|
// conservative for English text and code.
|
||||||
@@ -79,9 +83,9 @@ func Fit(model string, sections Sections) Result {
|
|||||||
baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
|
baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
|
||||||
available := limit - baseTokens
|
available := limit - baseTokens
|
||||||
if available < 0 {
|
if available < 0 {
|
||||||
// Base content alone exceeds budget. Truncate UserMeta (keep first 1000 chars).
|
// Base content alone exceeds budget. Truncate UserMeta (keep first ~1000 tokens).
|
||||||
if len(sections.UserMeta) > 4000 {
|
if len(sections.UserMeta) > 4000 {
|
||||||
sections.UserMeta = sections.UserMeta[:4000] + "\n... [description truncated] ..."
|
sections.UserMeta = truncateUTF8(sections.UserMeta, 4000) + userMetaTruncMarker
|
||||||
baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
|
baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
|
||||||
available = limit - baseTokens
|
available = limit - baseTokens
|
||||||
}
|
}
|
||||||
@@ -138,14 +142,20 @@ func Fit(model string, sections Sections) Result {
|
|||||||
if diffBudget < 0 {
|
if diffBudget < 0 {
|
||||||
diffBudget = 0
|
diffBudget = 0
|
||||||
}
|
}
|
||||||
maxChars := diffBudget * 4
|
// Reserve space for truncation marker
|
||||||
|
markerBudget := EstimateTokens(diffTruncMarker)
|
||||||
|
effectiveBudget := diffBudget - markerBudget
|
||||||
|
if effectiveBudget < 0 {
|
||||||
|
effectiveBudget = 0
|
||||||
|
}
|
||||||
|
maxChars := effectiveBudget * 4
|
||||||
if maxChars < len(sections.Diff) {
|
if maxChars < len(sections.Diff) {
|
||||||
removed := EstimateTokens(sections.Diff) - diffBudget
|
removed := EstimateTokens(sections.Diff) - diffBudget
|
||||||
trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
|
trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
|
||||||
if maxChars > 0 {
|
if maxChars > 0 {
|
||||||
sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
|
sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker
|
||||||
} else {
|
} else {
|
||||||
sections.Diff = "... [diff too large for context window — review manually] ..."
|
sections.Diff = diffTooLargeMarker
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -195,3 +205,22 @@ func buildResult(s Sections, trimmed []string, estTokens int) Result {
|
|||||||
EstTokens: estTokens,
|
EstTokens: estTokens,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// truncateUTF8 truncates s to at most maxBytes without splitting multi-byte
|
||||||
|
// UTF-8 characters. Returns a valid UTF-8 string of at most maxBytes bytes.
|
||||||
|
func truncateUTF8(s string, maxBytes int) string {
|
||||||
|
if len(s) <= maxBytes {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
// Walk backwards from maxBytes to find a valid UTF-8 boundary
|
||||||
|
for maxBytes > 0 && !isUTF8Start(s[maxBytes]) {
|
||||||
|
maxBytes--
|
||||||
|
}
|
||||||
|
return s[:maxBytes]
|
||||||
|
}
|
||||||
|
|
||||||
|
// isUTF8Start returns true if b is a valid start byte for a UTF-8 sequence
|
||||||
|
// (single-byte ASCII or multi-byte lead byte, not a continuation byte).
|
||||||
|
func isUTF8Start(b byte) bool {
|
||||||
|
return b&0xC0 != 0x80
|
||||||
|
}
|
||||||
|
|||||||
@@ -116,3 +116,43 @@ func TestBuildUserPrompt_WithoutFileContext(t *testing.T) {
|
|||||||
t.Error("should not include file context section when empty")
|
t.Error("should not include file context section when empty")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
func TestBuildSystemBase(t *testing.T) {
|
||||||
|
result := BuildSystemBase()
|
||||||
|
if result == "" {
|
||||||
|
t.Fatal("BuildSystemBase returned empty string")
|
||||||
|
}
|
||||||
|
if !strings.Contains(result, "expert code reviewer") {
|
||||||
|
t.Error("expected reviewer role in system base")
|
||||||
|
}
|
||||||
|
if !strings.Contains(result, "REQUEST_CHANGES") {
|
||||||
|
t.Error("expected verdict format in system base")
|
||||||
|
}
|
||||||
|
if !strings.Contains(result, "JSON") {
|
||||||
|
t.Error("expected JSON output instruction in system base")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildUserMeta(t *testing.T) {
|
||||||
|
result := BuildUserMeta("Fix bug", "Some description", true, "all checks passed")
|
||||||
|
if !strings.Contains(result, "Fix bug") {
|
||||||
|
t.Error("expected title in user meta")
|
||||||
|
}
|
||||||
|
if !strings.Contains(result, "Some description") {
|
||||||
|
t.Error("expected description in user meta")
|
||||||
|
}
|
||||||
|
if !strings.Contains(result, "PASSED") {
|
||||||
|
t.Error("expected CI PASSED status")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildUserMeta_CIFailed(t *testing.T) {
|
||||||
|
result := BuildUserMeta("Title", "", false, "test job failed")
|
||||||
|
if !strings.Contains(result, "FAILED") {
|
||||||
|
t.Error("expected CI FAILED status")
|
||||||
|
}
|
||||||
|
if strings.Contains(result, "Description") {
|
||||||
|
t.Error("expected no description section when empty")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user