fix: address review findings on budget system

- Account for truncation marker tokens when computing diff budget (prevents EstTokens exceeding model limit in edge cases) - Rune-safe truncation for both UserMeta and Diff (no split multi-byte) - Fix misleading comment (1000 chars → ~1000 tokens/4000 chars) - Extract marker strings as constants - Add unit tests for BuildSystemBase and BuildUserMeta
2026-05-01 18:59:07 -07:00
parent d9cacf6f62
commit dab7871cb4
2 changed files with 74 additions and 5 deletions
@@ -33,6 +33,10 @@ const defaultLimit = 128_000
 // reserveTokens is headroom for the response generation.
 const reserveTokens = 4_000

+const diffTruncMarker = "\n\n... [diff truncated due to context limit] ..."
+const diffTooLargeMarker = "... [diff too large for context window — review manually] ..."
+const userMetaTruncMarker = "\n... [description truncated] ..."
+
 // EstimateTokens estimates the number of tokens in a string.
 // Uses the rough heuristic of ~4 characters per token, which is
 // conservative for English text and code.
@@ -79,9 +83,9 @@ func Fit(model string, sections Sections) Result {
 	baseTokens := EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
 	available := limit - baseTokens
 	if available < 0 {
-		// Base content alone exceeds budget. Truncate UserMeta (keep first 1000 chars).
+		// Base content alone exceeds budget. Truncate UserMeta (keep first ~1000 tokens).
 		if len(sections.UserMeta) > 4000 {
-			sections.UserMeta = sections.UserMeta[:4000] + "\n... [description truncated] ..."
+			sections.UserMeta = truncateUTF8(sections.UserMeta, 4000) + userMetaTruncMarker
 			baseTokens = EstimateTokens(sections.SystemBase) + EstimateTokens(sections.UserMeta)
 			available = limit - baseTokens
 		}
@@ -138,14 +142,20 @@ func Fit(model string, sections Sections) Result {
 		if diffBudget < 0 {
 			diffBudget = 0
 		}
-		maxChars := diffBudget * 4
+		// Reserve space for truncation marker
+		markerBudget := EstimateTokens(diffTruncMarker)
+		effectiveBudget := diffBudget - markerBudget
+		if effectiveBudget < 0 {
+			effectiveBudget = 0
+		}
+		maxChars := effectiveBudget * 4
 		if maxChars < len(sections.Diff) {
 			removed := EstimateTokens(sections.Diff) - diffBudget
 			trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
 			if maxChars > 0 {
-				sections.Diff = sections.Diff[:maxChars] + "\n\n... [diff truncated due to context limit] ..."
+				sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker
 			} else {
-				sections.Diff = "... [diff too large for context window — review manually] ..."
+				sections.Diff = diffTooLargeMarker
 			}
 		}
 	}
@@ -195,3 +205,22 @@ func buildResult(s Sections, trimmed []string, estTokens int) Result {
 		EstTokens:    estTokens,
 	}
 }
+
+// truncateUTF8 truncates s to at most maxBytes without splitting multi-byte
+// UTF-8 characters. Returns a valid UTF-8 string of at most maxBytes bytes.
+func truncateUTF8(s string, maxBytes int) string {
+	if len(s) <= maxBytes {
+		return s
+	}
+	// Walk backwards from maxBytes to find a valid UTF-8 boundary
+	for maxBytes > 0 && !isUTF8Start(s[maxBytes]) {
+		maxBytes--
+	}
+	return s[:maxBytes]
+}
+
+// isUTF8Start returns true if b is a valid start byte for a UTF-8 sequence
+// (single-byte ASCII or multi-byte lead byte, not a continuation byte).
+func isUTF8Start(b byte) bool {
+	return b&0xC0 != 0x80
+}
@@ -116,3 +116,43 @@ func TestBuildUserPrompt_WithoutFileContext(t *testing.T) {
 		t.Error("should not include file context section when empty")
 	}
 }
+
+
+func TestBuildSystemBase(t *testing.T) {
+	result := BuildSystemBase()
+	if result == "" {
+		t.Fatal("BuildSystemBase returned empty string")
+	}
+	if !strings.Contains(result, "expert code reviewer") {
+		t.Error("expected reviewer role in system base")
+	}
+	if !strings.Contains(result, "REQUEST_CHANGES") {
+		t.Error("expected verdict format in system base")
+	}
+	if !strings.Contains(result, "JSON") {
+		t.Error("expected JSON output instruction in system base")
+	}
+}
+
+func TestBuildUserMeta(t *testing.T) {
+	result := BuildUserMeta("Fix bug", "Some description", true, "all checks passed")
+	if !strings.Contains(result, "Fix bug") {
+		t.Error("expected title in user meta")
+	}
+	if !strings.Contains(result, "Some description") {
+		t.Error("expected description in user meta")
+	}
+	if !strings.Contains(result, "PASSED") {
+		t.Error("expected CI PASSED status")
+	}
+}
+
+func TestBuildUserMeta_CIFailed(t *testing.T) {
+	result := BuildUserMeta("Title", "", false, "test job failed")
+	if !strings.Contains(result, "FAILED") {
+		t.Error("expected CI FAILED status")
+	}
+	if strings.Contains(result, "Description") {
+		t.Error("expected no description section when empty")
+	}
+}