Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 75190d53ed | |||
| 8b8462bdc8 |
+14
-14
@@ -8,6 +8,7 @@ package budget
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
// modelLimit pairs a model name prefix with its context window size.
|
// modelLimit pairs a model name prefix with its context window size.
|
||||||
@@ -38,7 +39,7 @@ const diffTooLargeMarker = "... [diff too large for context window — review ma
|
|||||||
const userMetaTruncMarker = "\n... [description truncated] ..."
|
const userMetaTruncMarker = "\n... [description truncated] ..."
|
||||||
|
|
||||||
// EstimateTokens estimates the number of tokens in a string.
|
// EstimateTokens estimates the number of tokens in a string.
|
||||||
// Uses the rough heuristic of ~4 characters per token, which is
|
// Uses the rough heuristic of ~4 bytes per token, which is
|
||||||
// conservative for English text and code.
|
// conservative for English text and code.
|
||||||
func EstimateTokens(s string) int {
|
func EstimateTokens(s string) int {
|
||||||
return len(s) / 4
|
return len(s) / 4
|
||||||
@@ -64,7 +65,7 @@ type Sections struct {
|
|||||||
Conventions string // Repo conventions (trimmed second)
|
Conventions string // Repo conventions (trimmed second)
|
||||||
FileContext string // Full file content (trimmed third)
|
FileContext string // Full file content (trimmed third)
|
||||||
Diff string // The actual diff (trimmed last, only truncated)
|
Diff string // The actual diff (trimmed last, only truncated)
|
||||||
UserMeta string // PR title, description, CI status (never trimmed)
|
UserMeta string // PR title, description, CI status (truncated only if base exceeds budget)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Result holds the trimmed content and metadata about what was dropped.
|
// Result holds the trimmed content and metadata about what was dropped.
|
||||||
@@ -153,7 +154,11 @@ func Fit(model string, sections Sections) Result {
|
|||||||
removed := EstimateTokens(sections.Diff) - diffBudget
|
removed := EstimateTokens(sections.Diff) - diffBudget
|
||||||
trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
|
trimmed = append(trimmed, fmt.Sprintf("diff truncated (~%dK tokens removed)", removed/1000))
|
||||||
if maxChars > 0 {
|
if maxChars > 0 {
|
||||||
sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker
|
if diffBudget >= markerBudget {
|
||||||
|
sections.Diff = truncateUTF8(sections.Diff, maxChars) + diffTruncMarker
|
||||||
|
} else {
|
||||||
|
sections.Diff = truncateUTF8(sections.Diff, maxChars)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
sections.Diff = diffTooLargeMarker
|
sections.Diff = diffTooLargeMarker
|
||||||
}
|
}
|
||||||
@@ -188,9 +193,11 @@ func buildResult(s Sections, trimmed []string, estTokens int) Result {
|
|||||||
usr.WriteString(s.FileContext)
|
usr.WriteString(s.FileContext)
|
||||||
usr.WriteString("\n")
|
usr.WriteString("\n")
|
||||||
}
|
}
|
||||||
usr.WriteString("\n### Diff (changes to review)\n\n```diff\n")
|
if s.Diff != "" {
|
||||||
usr.WriteString(s.Diff)
|
usr.WriteString("\n### Diff (changes to review)\n\n```diff\n")
|
||||||
usr.WriteString("\n```\n")
|
usr.WriteString(s.Diff)
|
||||||
|
usr.WriteString("\n```\n")
|
||||||
|
}
|
||||||
|
|
||||||
if len(trimmed) > 0 {
|
if len(trimmed) > 0 {
|
||||||
usr.WriteString("\n⚠️ Note: Context was trimmed to fit model limits. Dropped: ")
|
usr.WriteString("\n⚠️ Note: Context was trimmed to fit model limits. Dropped: ")
|
||||||
@@ -212,15 +219,8 @@ func truncateUTF8(s string, maxBytes int) string {
|
|||||||
if len(s) <= maxBytes {
|
if len(s) <= maxBytes {
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
// Walk backwards from maxBytes to find a valid UTF-8 boundary
|
for maxBytes > 0 && !utf8.RuneStart(s[maxBytes]) {
|
||||||
for maxBytes > 0 && !isUTF8Start(s[maxBytes]) {
|
|
||||||
maxBytes--
|
maxBytes--
|
||||||
}
|
}
|
||||||
return s[:maxBytes]
|
return s[:maxBytes]
|
||||||
}
|
}
|
||||||
|
|
||||||
// isUTF8Start returns true if b is a valid start byte for a UTF-8 sequence
|
|
||||||
// (single-byte ASCII or multi-byte lead byte, not a continuation byte).
|
|
||||||
func isUTF8Start(b byte) bool {
|
|
||||||
return b&0xC0 != 0x80
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -255,12 +255,12 @@ func fetchPatterns(ctx context.Context, client *gitea.Client, patternsRepo, patt
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
for filepath, content := range files {
|
for filePath, content := range files {
|
||||||
// Only include markdown and text files as patterns
|
// Only include markdown and text files as patterns
|
||||||
if !isPatternFile(filepath) {
|
if !isPatternFile(filePath) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
sb.WriteString(fmt.Sprintf("### %s/%s\n\n%s\n\n", repoRef, filepath, content))
|
sb.WriteString(fmt.Sprintf("### %s/%s\n\n%s\n\n", repoRef, filePath, content))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user