fix: address review findings (comment, marker budget, naming)

- UserMeta comment: "never trimmed" → "truncated only if base exceeds budget" - Skip diff truncation marker when diffBudget < markerBudget (prevents marker itself from pushing EstTokens over the limit) - Rename filepath → filePath to avoid shadowing stdlib package name
fix: address final review findings
2026-05-01 20:02:35 -07:00 · 2026-05-01 19:36:42 -07:00
2 changed files with 17 additions and 17 deletions
@@ -8,6 +8,7 @@ package budget
 import (
 	"fmt"
 	"strings"
+	"unicode/utf8"
 )

 // modelLimit pairs a model name prefix with its context window size.
@@ -38,7 +39,7 @@ const diffTooLargeMarker = "... [diff too large for context window — review ma
 const userMetaTruncMarker = "\n... [description truncated] ..."

 // EstimateTokens estimates the number of tokens in a string.
-// Uses the rough heuristic of ~4 characters per token, which is
+// Uses the rough heuristic of ~4 bytes per token, which is
 // conservative for English text and code.
 func EstimateTokens(s string) int {
 	return len(s) / 4
@@ -64,7 +65,7 @@ type Sections struct {
 	Conventions string // Repo conventions (trimmed second)
 	FileContext string // Full file content (trimmed third)
 	Diff        string // The actual diff (trimmed last, only truncated)
-	UserMeta    string // PR title, description, CI status (never trimmed)
+	UserMeta    string // PR title, description, CI status (truncated only if base exceeds budget)
 }

 // Result holds the trimmed content and metadata about what was dropped.
@@ -153,7 +154,11 @@ func Fit(model string, sections Sections) Result {
 			removed := EstimateTokens(sections.Diff) - diffBudget
 			trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
 			if maxChars > 0 {
+				if diffBudget >= markerBudget {
 					sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker
+				} else {
+					sections.Diff = truncateUTF8(sections.Diff, maxChars)
+				}
 			} else {
 				sections.Diff = diffTooLargeMarker
 			}
@@ -188,9 +193,11 @@ func buildResult(s Sections, trimmed []string, estTokens int) Result {
 		usr.WriteString(s.FileContext)
 		usr.WriteString("\n")
 	}
+	if s.Diff != "" {
 		usr.WriteString("\n### Diff (changes to review)\n\n```diff\n")
 		usr.WriteString(s.Diff)
 		usr.WriteString("\n```\n")
+	}

 	if len(trimmed) > 0 {
 		usr.WriteString("\n⚠️ Note: Context was trimmed to fit model limits. Dropped: ")
@@ -212,15 +219,8 @@ func truncateUTF8(s string, maxBytes int) string {
 	if len(s) <= maxBytes {
 		return s
 	}
-	// Walk backwards from maxBytes to find a valid UTF-8 boundary
-	for maxBytes > 0 && !isUTF8Start(s[maxBytes]) {
+	for maxBytes > 0 && !utf8.RuneStart(s[maxBytes]) {
 		maxBytes--
 	}
 	return s[:maxBytes]
 }
-
-// isUTF8Start returns true if b is a valid start byte for a UTF-8 sequence
-// (single-byte ASCII or multi-byte lead byte, not a continuation byte).
-func isUTF8Start(b byte) bool {
-	return b&0xC0 != 0x80
-}
@@ -255,12 +255,12 @@ func fetchPatterns(ctx context.Context, client *gitea.Client, patternsRepo, patt
 				continue
 			}

-			for filepath, content := range files {
+			for filePath, content := range files {
 				// Only include markdown and text files as patterns
-				if !isPatternFile(filepath) {
+				if !isPatternFile(filePath) {
 					continue
 				}
-				sb.WriteString(fmt.Sprintf("### %s/%s\n\n%s\n\n", repoRef, filepath, content))
+				sb.WriteString(fmt.Sprintf("### %s/%s\n\n%s\n\n", repoRef, filePath, content))
 			}
 		}
 	}
Author	SHA1	Message	Date
Rodin	75190d53ed	fix: address review findings (comment, marker budget, naming) CI / test (pull_request) Successful in 13s Details CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 22s Details CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 1m48s Details - UserMeta comment: "never trimmed" → "truncated only if base exceeds budget" - Skip diff truncation marker when diffBudget < markerBudget (prevents marker itself from pushing EstTokens over the limit) - Rename filepath → filePath to avoid shadowing stdlib package name	2026-05-01 20:02:35 -07:00
Rodin	8b8462bdc8	fix: address final review findings CI / test (pull_request) Successful in 13s Details CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 21s Details CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 1m30s Details - Comment: "~4 characters" → "~4 bytes" (len() counts bytes, not runes) - Use utf8.RuneStart from stdlib instead of custom isUTF8Start helper - Skip diff block entirely when Diff is empty (handles edge cases: draft→ready with no delta, force-push matching base, etc.)	2026-05-01 19:36:42 -07:00