Compare commits

..

3 Commits

Author SHA1 Message Date
Rodin 75190d53ed fix: address review findings (comment, marker budget, naming)
CI / test (pull_request) Successful in 13s
CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 22s
CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 1m48s
- UserMeta comment: "never trimmed" → "truncated only if base exceeds budget"
- Skip diff truncation marker when diffBudget < markerBudget (prevents
  marker itself from pushing EstTokens over the limit)
- Rename filepath → filePath to avoid shadowing stdlib package name
2026-05-01 20:02:35 -07:00
Rodin 8b8462bdc8 fix: address final review findings
CI / test (pull_request) Successful in 13s
CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 21s
CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 1m30s
- Comment: "~4 characters" → "~4 bytes" (len() counts bytes, not runes)
- Use utf8.RuneStart from stdlib instead of custom isUTF8Start helper
- Skip diff block entirely when Diff is empty (handles edge cases:
  draft→ready with no delta, force-push matching base, etc.)
2026-05-01 19:36:42 -07:00
Rodin 565a077b01 fix: CI config - correct patterns path, increase timeout
CI / test (pull_request) Successful in 13s
CI / review (gpt-4.1, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 21s
CI / review (gpt-5, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 2m18s
- PATTERNS_FILES: docs/ does not exist in go-patterns, use patterns/
- LLM_TIMEOUT: 600s (gpt-5-mini needs more time for larger diffs)
2026-05-01 19:06:18 -07:00
3 changed files with 18 additions and 18 deletions
+1 -1
View File
@@ -31,7 +31,7 @@ jobs:
model: gpt-5
- name: gpt
token_secret: GPT_REVIEW_TOKEN
model: gpt-5-mini
model: gpt-4.1
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
+14 -14
View File
@@ -8,6 +8,7 @@ package budget
import (
"fmt"
"strings"
"unicode/utf8"
)
// modelLimit pairs a model name prefix with its context window size.
@@ -38,7 +39,7 @@ const diffTooLargeMarker = "... [diff too large for context window — review ma
const userMetaTruncMarker = "\n... [description truncated] ..."
// EstimateTokens estimates the number of tokens in a string.
// Uses the rough heuristic of ~4 characters per token, which is
// Uses the rough heuristic of ~4 bytes per token, which is
// conservative for English text and code.
func EstimateTokens(s string) int {
return len(s) / 4
@@ -64,7 +65,7 @@ type Sections struct {
Conventions string // Repo conventions (trimmed second)
FileContext string // Full file content (trimmed third)
Diff string // The actual diff (trimmed last, only truncated)
UserMeta string // PR title, description, CI status (never trimmed)
UserMeta string // PR title, description, CI status (truncated only if base exceeds budget)
}
// Result holds the trimmed content and metadata about what was dropped.
@@ -153,7 +154,11 @@ func Fit(model string, sections Sections) Result {
removed := EstimateTokens(sections.Diff) - diffBudget
trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
if maxChars > 0 {
sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker
if diffBudget >= markerBudget {
sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker
} else {
sections.Diff = truncateUTF8(sections.Diff, maxChars)
}
} else {
sections.Diff = diffTooLargeMarker
}
@@ -188,9 +193,11 @@ func buildResult(s Sections, trimmed []string, estTokens int) Result {
usr.WriteString(s.FileContext)
usr.WriteString("\n")
}
usr.WriteString("\n### Diff (changes to review)\n\n```diff\n")
usr.WriteString(s.Diff)
usr.WriteString("\n```\n")
if s.Diff != "" {
usr.WriteString("\n### Diff (changes to review)\n\n```diff\n")
usr.WriteString(s.Diff)
usr.WriteString("\n```\n")
}
if len(trimmed) > 0 {
usr.WriteString("\n⚠️ Note: Context was trimmed to fit model limits. Dropped: ")
@@ -212,15 +219,8 @@ func truncateUTF8(s string, maxBytes int) string {
if len(s) <= maxBytes {
return s
}
// Walk backwards from maxBytes to find a valid UTF-8 boundary
for maxBytes > 0 && !isUTF8Start(s[maxBytes]) {
for maxBytes > 0 && !utf8.RuneStart(s[maxBytes]) {
maxBytes--
}
return s[:maxBytes]
}
// isUTF8Start returns true if b is a valid start byte for a UTF-8 sequence
// (single-byte ASCII or multi-byte lead byte, not a continuation byte).
func isUTF8Start(b byte) bool {
return b&0xC0 != 0x80
}
+3 -3
View File
@@ -255,12 +255,12 @@ func fetchPatterns(ctx context.Context, client *gitea.Client, patternsRepo, patt
continue
}
for filepath, content := range files {
for filePath, content := range files {
// Only include markdown and text files as patterns
if !isPatternFile(filepath) {
if !isPatternFile(filePath) {
continue
}
sb.WriteString(fmt.Sprintf("### %s/%s\n\n%s\n\n", repoRef, filepath, content))
sb.WriteString(fmt.Sprintf("### %s/%s\n\n%s\n\n", repoRef, filePath, content))
}
}
}