diff --git a/cmd/review-bot/main.go b/cmd/review-bot/main.go index 22a2728..0b51939 100644 --- a/cmd/review-bot/main.go +++ b/cmd/review-bot/main.go @@ -205,12 +205,12 @@ func main() { slog.Error("invalid persona-file path", "error", err) os.Exit(1) } - var err2 error - persona, err2 = review.LoadPersona(resolvedPath) - if err2 != nil { - slog.Error("failed to load persona file", "file", *personaFile, "error", err2) + loadedPersona, loadErr := review.LoadPersona(resolvedPath) + if loadErr != nil { + slog.Error("failed to load persona file", "file", *personaFile, "error", loadErr) os.Exit(1) } + persona = loadedPersona slog.Info("loaded persona from file", "file", *personaFile, "persona", persona.Name) } diff --git a/review/formatter.go b/review/formatter.go index d2e109f..268ee44 100644 --- a/review/formatter.go +++ b/review/formatter.go @@ -2,6 +2,7 @@ package review import ( "fmt" + "regexp" "strings" ) @@ -22,10 +23,29 @@ func GiteaEvent(verdict string) string { } } +// markdownSpecialChars matches characters that have special meaning in Markdown. +// We escape these to prevent untrusted input from breaking formatting. +// Uses a quoted string since raw strings can't contain backticks. +var markdownSpecialChars = regexp.MustCompile("([\\\\*_`\\[\\]()#<>|~])") + +// sanitizeMarkdownText escapes special Markdown characters in untrusted text. +// This prevents markdown injection attacks where a malicious display name could +// break formatting, inject links, or create unexpected rendering. +func sanitizeMarkdownText(s string) string { + // First, remove any control characters and null bytes + cleaned := strings.Map(func(r rune) rune { + if r < 32 && r != '\t' && r != '\n' { + return -1 // drop the character + } + return r + }, s) + // Escape special Markdown characters by prepending backslash + return markdownSpecialChars.ReplaceAllString(cleaned, `\$1`) +} + // FormatMarkdownWithDisplay formats a ReviewResult with separate display name and sentinel name. -// Note: displayName is not HTML-escaped as Gitea sanitizes rendered Markdown. -// Persona display names are controlled by repo owners (trusted input). -// displayName is used for the header title, sentinelName is used for the cleanup sentinel. +// displayName is sanitized to prevent Markdown injection from untrusted remote persona metadata. +// sentinelName is used for the cleanup sentinel comment (machine-readable, not rendered). // If displayName is empty, sentinelName is used for both. func FormatMarkdownWithDisplay(result *ReviewResult, displayName, sentinelName string) string { var sb strings.Builder @@ -37,7 +57,8 @@ func FormatMarkdownWithDisplay(result *ReviewResult, displayName, sentinelName s } if headerName != "" { - title := CapitalizeFirst(headerName) + // Sanitize the header name to prevent Markdown injection + title := CapitalizeFirst(sanitizeMarkdownText(headerName)) sb.WriteString(fmt.Sprintf("# %s Review\n\n", title)) } @@ -61,7 +82,8 @@ func FormatMarkdownWithDisplay(result *ReviewResult, displayName, sentinelName s sb.WriteString(fmt.Sprintf("**%s** — %s\n", result.Verdict, result.Recommendation)) if sentinelName != "" { - sb.WriteString(fmt.Sprintf("\n---\n*Review by %s*\n", headerName)) + // Sanitize headerName for the footer as well + sb.WriteString(fmt.Sprintf("\n---\n*Review by %s*\n", sanitizeMarkdownText(headerName))) // Hidden sentinel for identifying this bot's reviews during cleanup sb.WriteString(fmt.Sprintf("\n\n", sentinelName)) } diff --git a/review/formatter_test.go b/review/formatter_test.go index 7684e88..6ae5f42 100644 --- a/review/formatter_test.go +++ b/review/formatter_test.go @@ -214,3 +214,71 @@ func TestFormatMarkdownWithDisplay(t *testing.T) { } }) } + +func TestSanitizeMarkdownText(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "plain text unchanged", + input: "Security Specialist", + want: "Security Specialist", + }, + { + name: "escapes asterisks", + input: "**bold** attack", + want: `\*\*bold\*\* attack`, + }, + { + name: "escapes brackets for links", + input: "[click me](http://evil.com)", + want: `\[click me\]\(http://evil.com\)`, + }, + { + name: "escapes backticks", + input: "`code` injection", + want: "\\`code\\` injection", + }, + { + name: "escapes angle brackets", + input: "", + want: `\alert\(1\)\`, + }, + { + name: "escapes hash for headers", + input: "# Fake Header", + want: `\# Fake Header`, + }, + { + name: "escapes pipe for tables", + input: "col1 | col2", + want: `col1 \| col2`, + }, + { + name: "removes control characters", + input: "hello\x00world\x1f", + want: "helloworld", + }, + { + name: "preserves tabs and newlines", + input: "line1\n\tindented", + want: "line1\n\tindented", + }, + { + name: "escapes tilde for strikethrough", + input: "~~strikethrough~~", + want: `\~\~strikethrough\~\~`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := sanitizeMarkdownText(tt.input) + if got != tt.want { + t.Errorf("sanitizeMarkdownText(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} diff --git a/review/remote_persona.go b/review/remote_persona.go index 1b9c200..d452711 100644 --- a/review/remote_persona.go +++ b/review/remote_persona.go @@ -89,6 +89,13 @@ func LoadRemotePersonasFromPath(ctx context.Context, fetcher PersonaFetcher, own continue } + // YAML parsing uses parsePersona which has defenses against YAML DoS attacks: + // - MaxPersonaFileSize (above) caps raw input size before any parsing + // - maxPersonaFiles (above) limits the number of files processed per repo + // - unmarshalYAMLWithDepthLimit enforces MaxYAMLDepth to prevent stack exhaustion + // - checkYAMLDepth tracks node counts (MaxYAMLNodes) against "billion laughs" expansion + // - Alias cycles are detected and capped by seen-node tracking + // See persona.go for the implementation details. persona, err := parsePersona([]byte(content), entry.Path) if err != nil { slog.Warn("could not parse remote persona file", "file", entry.Path, "error", err)