From 27a9be38bcea7a4fbbc15b5d2059afad68ef3f57 Mon Sep 17 00:00:00 2001 From: Rodin Date: Sun, 10 May 2026 20:54:20 -0700 Subject: [PATCH] fix: address PR #63 review findings 1. Refactor err2 to use scoped loadErr variable (MINOR - sonnet-review-bot) The else-if branches are mutually exclusive, so the error variable should be scoped inside the block, not declared outside with err2. 2. Sanitize DisplayName before embedding in Markdown (MINOR - security-review-bot) Remote persona metadata is untrusted. Added sanitizeMarkdownText() to escape Markdown special characters and strip control characters. Applied to both the header title and the footer attribution. 3. Document YAML DoS mitigations (MINOR - security-review-bot) Added comprehensive comment in remote_persona.go explaining existing defenses: file size limit, file count cap, depth limit, node count cap, and alias cycle detection. These collectively mitigate billion-laughs and stack exhaustion attacks. --- cmd/review-bot/main.go | 8 ++--- review/formatter.go | 32 ++++++++++++++++--- review/formatter_test.go | 68 ++++++++++++++++++++++++++++++++++++++++ review/remote_persona.go | 7 +++++ 4 files changed, 106 insertions(+), 9 deletions(-) diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go index 22a2728..0b51939 100644 --- a/cmd/review-bot/main.go +++ b/cmd/review-bot/main.go @@ -205,12 +205,12 @@ func main() { slog.Error("invalid persona-file path", "error", err) os.Exit(1) } - var err2 error - persona, err2 = review.LoadPersona(resolvedPath) - if err2 != nil { - slog.Error("failed to load persona file", "file", *personaFile, "error", err2) + loadedPersona, loadErr := review.LoadPersona(resolvedPath) + if loadErr != nil { + slog.Error("failed to load persona file", "file", *personaFile, "error", loadErr) os.Exit(1) } + persona = loadedPersona slog.Info("loaded persona from file", "file", *personaFile, "persona", persona.Name) } diff --git a/review/formatter.go b/review/formatter.go index d2e109f..268ee44 100644 --- a/review/formatter.go +++ b/review/formatter.go @@ -2,6 +2,7 @@ package review import ( "fmt" + "regexp" "strings" ) @@ -22,10 +23,29 @@ func GiteaEvent(verdict string) string { } } +// markdownSpecialChars matches characters that have special meaning in Markdown. +// We escape these to prevent untrusted input from breaking formatting. +// Uses a quoted string since raw strings can't contain backticks. +var markdownSpecialChars = regexp.MustCompile("([\\\\*_`\\[\\]()#<>|~])") + +// sanitizeMarkdownText escapes special Markdown characters in untrusted text. +// This prevents markdown injection attacks where a malicious display name could +// break formatting, inject links, or create unexpected rendering. +func sanitizeMarkdownText(s string) string { + // First, remove any control characters and null bytes + cleaned := strings.Map(func(r rune) rune { + if r < 32 && r != '\t' && r != '\n' { + return -1 // drop the character + } + return r + }, s) + // Escape special Markdown characters by prepending backslash + return markdownSpecialChars.ReplaceAllString(cleaned, `\$1`) +} + // FormatMarkdownWithDisplay formats a ReviewResult with separate display name and sentinel name. -// Note: displayName is not HTML-escaped as Gitea sanitizes rendered Markdown. -// Persona display names are controlled by repo owners (trusted input). -// displayName is used for the header title, sentinelName is used for the cleanup sentinel. +// displayName is sanitized to prevent Markdown injection from untrusted remote persona metadata. +// sentinelName is used for the cleanup sentinel comment (machine-readable, not rendered). // If displayName is empty, sentinelName is used for both. func FormatMarkdownWithDisplay(result *ReviewResult, displayName, sentinelName string) string { var sb strings.Builder @@ -37,7 +57,8 @@ func FormatMarkdownWithDisplay(result *ReviewResult, displayName, sentinelName s } if headerName != "" { - title := CapitalizeFirst(headerName) + // Sanitize the header name to prevent Markdown injection + title := CapitalizeFirst(sanitizeMarkdownText(headerName)) sb.WriteString(fmt.Sprintf("# %s Review\n\n", title)) } @@ -61,7 +82,8 @@ func FormatMarkdownWithDisplay(result *ReviewResult, displayName, sentinelName s sb.WriteString(fmt.Sprintf("**%s** — %s\n", result.Verdict, result.Recommendation)) if sentinelName != "" { - sb.WriteString(fmt.Sprintf("\n---\n*Review by %s*\n", headerName)) + // Sanitize headerName for the footer as well + sb.WriteString(fmt.Sprintf("\n---\n*Review by %s*\n", sanitizeMarkdownText(headerName))) // Hidden sentinel for identifying this bot's reviews during cleanup sb.WriteString(fmt.Sprintf("\n\n", sentinelName)) } diff --git a/review/formatter_test.go b/review/formatter_test.go index 7684e88..6ae5f42 100644 --- a/review/formatter_test.go +++ b/review/formatter_test.go @@ -214,3 +214,71 @@ func TestFormatMarkdownWithDisplay(t *testing.T) { } }) } + +func TestSanitizeMarkdownText(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "plain text unchanged", + input: "Security Specialist", + want: "Security Specialist", + }, + { + name: "escapes asterisks", + input: "**bold** attack", + want: `\*\*bold\*\* attack`, + }, + { + name: "escapes brackets for links", + input: "[click me](http://evil.com)", + want: `\[click me\]\(http://evil.com\)`, + }, + { + name: "escapes backticks", + input: "`code` injection", + want: "\\`code\\` injection", + }, + { + name: "escapes angle brackets", + input: "", + want: `\alert\(1\)\`, + }, + { + name: "escapes hash for headers", + input: "# Fake Header", + want: `\# Fake Header`, + }, + { + name: "escapes pipe for tables", + input: "col1 | col2", + want: `col1 \| col2`, + }, + { + name: "removes control characters", + input: "hello\x00world\x1f", + want: "helloworld", + }, + { + name: "preserves tabs and newlines", + input: "line1\n\tindented", + want: "line1\n\tindented", + }, + { + name: "escapes tilde for strikethrough", + input: "~~strikethrough~~", + want: `\~\~strikethrough\~\~`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := sanitizeMarkdownText(tt.input) + if got != tt.want { + t.Errorf("sanitizeMarkdownText(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} diff --git a/review/remote_persona.go b/review/remote_persona.go index 1b9c200..d452711 100644 --- a/review/remote_persona.go +++ b/review/remote_persona.go @@ -89,6 +89,13 @@ func LoadRemotePersonasFromPath(ctx context.Context, fetcher PersonaFetcher, own continue } + // YAML parsing uses parsePersona which has defenses against YAML DoS attacks: + // - MaxPersonaFileSize (above) caps raw input size before any parsing + // - maxPersonaFiles (above) limits the number of files processed per repo + // - unmarshalYAMLWithDepthLimit enforces MaxYAMLDepth to prevent stack exhaustion + // - checkYAMLDepth tracks node counts (MaxYAMLNodes) against "billion laughs" expansion + // - Alias cycles are detected and capped by seen-node tracking + // See persona.go for the implementation details. persona, err := parsePersona([]byte(content), entry.Path) if err != nil { slog.Warn("could not parse remote persona file", "file", entry.Path, "error", err)