fix: address PR #63 review findings
1. Refactor err2 to use scoped loadErr variable (MINOR - sonnet-review-bot) The else-if branches are mutually exclusive, so the error variable should be scoped inside the block, not declared outside with err2. 2. Sanitize DisplayName before embedding in Markdown (MINOR - security-review-bot) Remote persona metadata is untrusted. Added sanitizeMarkdownText() to escape Markdown special characters and strip control characters. Applied to both the header title and the footer attribution. 3. Document YAML DoS mitigations (MINOR - security-review-bot) Added comprehensive comment in remote_persona.go explaining existing defenses: file size limit, file count cap, depth limit, node count cap, and alias cycle detection. These collectively mitigate billion-laughs and stack exhaustion attacks.
This commit is contained in:
@@ -205,12 +205,12 @@ func main() {
|
||||
slog.Error("invalid persona-file path", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
var err2 error
|
||||
persona, err2 = review.LoadPersona(resolvedPath)
|
||||
if err2 != nil {
|
||||
slog.Error("failed to load persona file", "file", *personaFile, "error", err2)
|
||||
loadedPersona, loadErr := review.LoadPersona(resolvedPath)
|
||||
if loadErr != nil {
|
||||
slog.Error("failed to load persona file", "file", *personaFile, "error", loadErr)
|
||||
os.Exit(1)
|
||||
}
|
||||
persona = loadedPersona
|
||||
slog.Info("loaded persona from file", "file", *personaFile, "persona", persona.Name)
|
||||
}
|
||||
|
||||
|
||||
+27
-5
@@ -2,6 +2,7 @@ package review
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -22,10 +23,29 @@ func GiteaEvent(verdict string) string {
|
||||
}
|
||||
}
|
||||
|
||||
// markdownSpecialChars matches characters that have special meaning in Markdown.
|
||||
// We escape these to prevent untrusted input from breaking formatting.
|
||||
// Uses a quoted string since raw strings can't contain backticks.
|
||||
var markdownSpecialChars = regexp.MustCompile("([\\\\*_`\\[\\]()#<>|~])")
|
||||
|
||||
// sanitizeMarkdownText escapes special Markdown characters in untrusted text.
|
||||
// This prevents markdown injection attacks where a malicious display name could
|
||||
// break formatting, inject links, or create unexpected rendering.
|
||||
func sanitizeMarkdownText(s string) string {
|
||||
// First, remove any control characters and null bytes
|
||||
cleaned := strings.Map(func(r rune) rune {
|
||||
if r < 32 && r != '\t' && r != '\n' {
|
||||
return -1 // drop the character
|
||||
}
|
||||
return r
|
||||
}, s)
|
||||
// Escape special Markdown characters by prepending backslash
|
||||
return markdownSpecialChars.ReplaceAllString(cleaned, `\$1`)
|
||||
}
|
||||
|
||||
// FormatMarkdownWithDisplay formats a ReviewResult with separate display name and sentinel name.
|
||||
// Note: displayName is not HTML-escaped as Gitea sanitizes rendered Markdown.
|
||||
// Persona display names are controlled by repo owners (trusted input).
|
||||
// displayName is used for the header title, sentinelName is used for the cleanup sentinel.
|
||||
// displayName is sanitized to prevent Markdown injection from untrusted remote persona metadata.
|
||||
// sentinelName is used for the cleanup sentinel comment (machine-readable, not rendered).
|
||||
// If displayName is empty, sentinelName is used for both.
|
||||
func FormatMarkdownWithDisplay(result *ReviewResult, displayName, sentinelName string) string {
|
||||
var sb strings.Builder
|
||||
@@ -37,7 +57,8 @@ func FormatMarkdownWithDisplay(result *ReviewResult, displayName, sentinelName s
|
||||
}
|
||||
|
||||
if headerName != "" {
|
||||
title := CapitalizeFirst(headerName)
|
||||
// Sanitize the header name to prevent Markdown injection
|
||||
title := CapitalizeFirst(sanitizeMarkdownText(headerName))
|
||||
sb.WriteString(fmt.Sprintf("# %s Review\n\n", title))
|
||||
}
|
||||
|
||||
@@ -61,7 +82,8 @@ func FormatMarkdownWithDisplay(result *ReviewResult, displayName, sentinelName s
|
||||
sb.WriteString(fmt.Sprintf("**%s** — %s\n", result.Verdict, result.Recommendation))
|
||||
|
||||
if sentinelName != "" {
|
||||
sb.WriteString(fmt.Sprintf("\n---\n*Review by %s*\n", headerName))
|
||||
// Sanitize headerName for the footer as well
|
||||
sb.WriteString(fmt.Sprintf("\n---\n*Review by %s*\n", sanitizeMarkdownText(headerName)))
|
||||
// Hidden sentinel for identifying this bot's reviews during cleanup
|
||||
sb.WriteString(fmt.Sprintf("\n<!-- review-bot:%s -->\n", sentinelName))
|
||||
}
|
||||
|
||||
@@ -214,3 +214,71 @@ func TestFormatMarkdownWithDisplay(t *testing.T) {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestSanitizeMarkdownText(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "plain text unchanged",
|
||||
input: "Security Specialist",
|
||||
want: "Security Specialist",
|
||||
},
|
||||
{
|
||||
name: "escapes asterisks",
|
||||
input: "**bold** attack",
|
||||
want: `\*\*bold\*\* attack`,
|
||||
},
|
||||
{
|
||||
name: "escapes brackets for links",
|
||||
input: "[click me](http://evil.com)",
|
||||
want: `\[click me\]\(http://evil.com\)`,
|
||||
},
|
||||
{
|
||||
name: "escapes backticks",
|
||||
input: "`code` injection",
|
||||
want: "\\`code\\` injection",
|
||||
},
|
||||
{
|
||||
name: "escapes angle brackets",
|
||||
input: "<script>alert(1)</script>",
|
||||
want: `\<script\>alert\(1\)\</script\>`,
|
||||
},
|
||||
{
|
||||
name: "escapes hash for headers",
|
||||
input: "# Fake Header",
|
||||
want: `\# Fake Header`,
|
||||
},
|
||||
{
|
||||
name: "escapes pipe for tables",
|
||||
input: "col1 | col2",
|
||||
want: `col1 \| col2`,
|
||||
},
|
||||
{
|
||||
name: "removes control characters",
|
||||
input: "hello\x00world\x1f",
|
||||
want: "helloworld",
|
||||
},
|
||||
{
|
||||
name: "preserves tabs and newlines",
|
||||
input: "line1\n\tindented",
|
||||
want: "line1\n\tindented",
|
||||
},
|
||||
{
|
||||
name: "escapes tilde for strikethrough",
|
||||
input: "~~strikethrough~~",
|
||||
want: `\~\~strikethrough\~\~`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := sanitizeMarkdownText(tt.input)
|
||||
if got != tt.want {
|
||||
t.Errorf("sanitizeMarkdownText(%q) = %q, want %q", tt.input, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -89,6 +89,13 @@ func LoadRemotePersonasFromPath(ctx context.Context, fetcher PersonaFetcher, own
|
||||
continue
|
||||
}
|
||||
|
||||
// YAML parsing uses parsePersona which has defenses against YAML DoS attacks:
|
||||
// - MaxPersonaFileSize (above) caps raw input size before any parsing
|
||||
// - maxPersonaFiles (above) limits the number of files processed per repo
|
||||
// - unmarshalYAMLWithDepthLimit enforces MaxYAMLDepth to prevent stack exhaustion
|
||||
// - checkYAMLDepth tracks node counts (MaxYAMLNodes) against "billion laughs" expansion
|
||||
// - Alias cycles are detected and capped by seen-node tracking
|
||||
// See persona.go for the implementation details.
|
||||
persona, err := parsePersona([]byte(content), entry.Path)
|
||||
if err != nil {
|
||||
slog.Warn("could not parse remote persona file", "file", entry.Path, "error", err)
|
||||
|
||||
Reference in New Issue
Block a user