diff --git a/docs/DESIGN-57-yaml-persona.md b/docs/DESIGN-57-yaml-persona.md deleted file mode 100644 index 85300ba..0000000 --- a/docs/DESIGN-57-yaml-persona.md +++ /dev/null @@ -1,93 +0,0 @@ -# Design: YAML Support for Persona Files (#57) - -## Problem - -JSON is awkward for persona files that contain multi-line text (identity, severity descriptions). YAML supports cleaner multi-line strings and comments, improving readability and maintainability. - -## Constraints - -- Backwards compatibility: existing JSON personas must continue to work -- Security: protect against DoS via deeply nested YAML (AIKIDO-2024-10486) -- Consistency: use `.yaml` extension (not `.yml`) -- Library: use `github.com/goccy/go-yaml` v1.16.0+ (actively maintained, security fix applied) - -## Proposed Approach - -1. **Update `parsePersona`** to detect format from file extension -2. **Add YAML parsing** with explicit depth limit (defense in depth) -3. **Keep JSON as fallback** for files without `.yaml`/`.yml` extension -4. **Convert built-in personas** to YAML format -5. **Update embed directive** to include both formats - -### File Extension Detection - -```go -func parsePersona(data []byte, source string) (*Persona, error) { - isYAML := strings.HasSuffix(source, ".yaml") || strings.HasSuffix(source, ".yml") - if isYAML { - return parseYAML(data, source) - } - return parseJSON(data, source) -} -``` - -### YAML Parsing with Depth Protection - -```go -func parseYAML(data []byte, source string) (*Persona, error) { - var p Persona - // go-yaml has built-in protection against deeply nested structures - // but we add explicit decoder options for defense in depth - if err := yaml.Unmarshal(data, &p); err != nil { - return nil, fmt.Errorf("parse persona %s: %w", source, err) - } - if err := validatePersona(&p, source); err != nil { - return nil, err - } - return &p, nil -} -``` - -The `goccy/go-yaml` library since v1.16.0 limits nesting depth by default. - -## State/Data Model - -No new state. Same `Persona` struct, just different parsing. - -## Error Cases - -| Error | Handling | -|-------|----------| -| Invalid YAML syntax | Return parse error with source file | -| Deeply nested YAML | Library rejects (v1.16.0+ fix) | -| Unknown extension | Fall back to JSON parsing | -| Missing required fields | Validation rejects after parse | - -## Edge Cases - -- File with `.json` extension but YAML content → JSON parse fails, user sees error -- File with no extension → defaults to JSON -- Embedded persona reference like `builtin:security` → detect by embed path (`personas/X.yaml`) - -## Testing Strategy - -1. Unit tests for YAML parsing (valid, invalid, deeply nested) -2. Unit tests for extension detection -3. Integration test for built-in personas (now YAML) -4. Backwards compat test: verify JSON still works for external files - -## Completion Checklist - -1. [ ] `go-yaml` dependency added at v1.16.0+ -2. [ ] Extension detection uses case-insensitive comparison -3. [ ] YAML parse errors include source file name -4. [ ] JSON parsing still works for `.json` files -5. [ ] Built-in personas converted to YAML with readable multi-line strings -6. [ ] Embed directive updated to include `*.yaml` -7. [ ] Test for deeply nested YAML rejection -8. [ ] All existing tests pass - -## Open Questions - -- Should we support both `.yaml` AND `.yml`? Issue says `.yaml` only for consistency, but some users expect `.yml`. **Decision:** Support both for reading, recommend `.yaml` in docs. -- Should we add a "format" field to detect mismatched extension/content? **Decision:** No, keep it simple. Extension determines format. diff --git a/go.mod b/go.mod index 007337a..9b2e8d2 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,3 @@ module gitea.weiker.me/rodin/review-bot go 1.26.2 - -require github.com/goccy/go-yaml v1.19.2 // indirect diff --git a/go.sum b/go.sum deleted file mode 100644 index bd88ba6..0000000 --- a/go.sum +++ /dev/null @@ -1,2 +0,0 @@ -github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM= -github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= diff --git a/review/persona.go b/review/persona.go index 453d47f..c8d676c 100644 --- a/review/persona.go +++ b/review/persona.go @@ -7,35 +7,32 @@ import ( "os" "strings" "unicode/utf8" - - "github.com/goccy/go-yaml" ) -//go:embed personas/*.yaml +//go:embed personas/*.json var embeddedPersonas embed.FS // Persona defines a specialized review role with focused expertise. type Persona struct { - Name string `json:"name" yaml:"name"` - DisplayName string `json:"display_name" yaml:"display_name"` - ModelPref string `json:"model_preference,omitempty" yaml:"model_preference,omitempty"` - Identity string `json:"identity" yaml:"identity"` - Focus []string `json:"focus" yaml:"focus"` - Ignore []string `json:"ignore" yaml:"ignore"` - Severity Severity `json:"severity" yaml:"severity"` - OutputFormat string `json:"output_format,omitempty" yaml:"output_format,omitempty"` + Name string `json:"name"` + DisplayName string `json:"display_name"` + ModelPref string `json:"model_preference,omitempty"` + Identity string `json:"identity"` + Focus []string `json:"focus"` + Ignore []string `json:"ignore"` + Severity Severity `json:"severity"` + OutputFormat string `json:"output_format,omitempty"` } // Severity defines what constitutes each severity level for this persona. // These are prompt guidance for the LLM, not output format changes. type Severity struct { - Major string `json:"major" yaml:"major"` - Minor string `json:"minor" yaml:"minor"` - Nit string `json:"nit" yaml:"nit"` + Major string `json:"major"` + Minor string `json:"minor"` + Nit string `json:"nit"` } -// LoadPersona loads a persona from a JSON or YAML file path. -// Format is detected by file extension: .yaml/.yml for YAML, .json or other for JSON. +// LoadPersona loads a persona from a JSON file path. func LoadPersona(path string) (*Persona, error) { data, err := os.ReadFile(path) if err != nil { @@ -46,23 +43,14 @@ func LoadPersona(path string) (*Persona, error) { // LoadBuiltinPersona loads a built-in persona by name. // Returns an error if the persona doesn't exist. -// Built-in personas are stored in YAML format. func LoadBuiltinPersona(name string) (*Persona, error) { - // Try YAML first (preferred format) - yamlFile := name + ".yaml" - data, err := embeddedPersonas.ReadFile("personas/" + yamlFile) - if err == nil { - return parsePersona(data, "builtin:"+yamlFile) - } - - // Fall back to JSON for backwards compatibility - jsonFile := name + ".json" - data, err = embeddedPersonas.ReadFile("personas/" + jsonFile) + filename := name + ".json" + data, err := embeddedPersonas.ReadFile("personas/" + filename) // embed.FS paths use forward slashes per io/fs spec if err != nil { available := ListBuiltinPersonas() return nil, fmt.Errorf("unknown built-in persona %q (available: %s)", name, strings.Join(available, ", ")) } - return parsePersona(data, "builtin:"+jsonFile) + return parsePersona(data, "builtin:"+name) } // ListBuiltinPersonas returns the names of all built-in personas. @@ -72,50 +60,22 @@ func ListBuiltinPersonas() []string { if err != nil { return []string{} } - seen := make(map[string]bool) + var names []string for _, e := range entries { if e.IsDir() { continue } name := e.Name() - // Strip extension to get persona name - var personaName string - switch { - case strings.HasSuffix(name, ".yaml"): - personaName = strings.TrimSuffix(name, ".yaml") - case strings.HasSuffix(name, ".yml"): - personaName = strings.TrimSuffix(name, ".yml") - case strings.HasSuffix(name, ".json"): - personaName = strings.TrimSuffix(name, ".json") - default: - continue + if strings.HasSuffix(name, ".json") { + names = append(names, strings.TrimSuffix(name, ".json")) } - if !seen[personaName] { - seen[personaName] = true - } - } - var names []string - for name := range seen { - names = append(names, name) } return names } -// parsePersona parses persona data from JSON or YAML format. -// Format is detected by the source file extension. func parsePersona(data []byte, source string) (*Persona, error) { - lowerSource := strings.ToLower(source) - isYAML := strings.HasSuffix(lowerSource, ".yaml") || strings.HasSuffix(lowerSource, ".yml") - var p Persona - var err error - if isYAML { - // go-yaml v1.16.0+ has built-in protection against deeply nested structures - err = yaml.Unmarshal(data, &p) - } else { - err = json.Unmarshal(data, &p) - } - if err != nil { + if err := json.Unmarshal(data, &p); err != nil { return nil, fmt.Errorf("parse persona %s: %w", source, err) } if err := validatePersona(&p, source); err != nil { diff --git a/review/persona_test.go b/review/persona_test.go index 6852d41..f749ef9 100644 --- a/review/persona_test.go +++ b/review/persona_test.go @@ -87,83 +87,6 @@ func TestListBuiltinPersonas(t *testing.T) { } } -func TestLoadPersonaFromYAMLFile(t *testing.T) { - dir := t.TempDir() - path := filepath.Join(dir, "test.yaml") - - content := `# Test persona -name: test -display_name: Test Persona -identity: | - You are a test persona. - Multi-line identity works. -focus: - - testing - - validation -ignore: - - nothing -severity: - major: Big problems - minor: Small problems - nit: Tiny problems -` - - if err := os.WriteFile(path, []byte(content), 0644); err != nil { - t.Fatalf("failed to write test file: %v", err) - } - - p, err := LoadPersona(path) - if err != nil { - t.Fatalf("LoadPersona failed: %v", err) - } - - if p.Name != "test" { - t.Errorf("Name = %q, want %q", p.Name, "test") - } - if p.DisplayName != "Test Persona" { - t.Errorf("DisplayName = %q, want %q", p.DisplayName, "Test Persona") - } - if len(p.Focus) != 2 { - t.Errorf("Focus len = %d, want 2", len(p.Focus)) - } - if !strings.Contains(p.Identity, "Multi-line") { - t.Error("Identity should contain multi-line content") - } -} - -func TestLoadPersonaFromYMLFile(t *testing.T) { - dir := t.TempDir() - path := filepath.Join(dir, "test.yml") - - content := `name: test -display_name: Test YML -identity: Test identity -focus: - - testing -ignore: [] -severity: - major: Big - minor: Small - nit: Tiny -` - - if err := os.WriteFile(path, []byte(content), 0644); err != nil { - t.Fatalf("failed to write test file: %v", err) - } - - p, err := LoadPersona(path) - if err != nil { - t.Fatalf("LoadPersona failed: %v", err) - } - - if p.Name != "test" { - t.Errorf("Name = %q, want %q", p.Name, "test") - } - if p.DisplayName != "Test YML" { - t.Errorf("DisplayName = %q, want %q", p.DisplayName, "Test YML") - } -} - func TestLoadPersonaFromJSONFile(t *testing.T) { dir := t.TempDir() path := filepath.Join(dir, "test.json") @@ -207,38 +130,22 @@ func TestLoadPersonaFromJSONFile(t *testing.T) { func TestLoadPersonaValidation(t *testing.T) { tests := []struct { name string - content string - ext string + json string wantErr string }{ { - name: "missing name yaml", - content: "identity: test\n", - ext: ".yaml", + name: "missing name", + json: `{"identity": "test"}`, wantErr: "name is required", }, { - name: "missing identity yaml", - content: "name: test\n", - ext: ".yaml", + name: "missing identity", + json: `{"name": "test"}`, wantErr: "identity is required", }, { - name: "missing name json", - content: `{"identity": "test"}`, - ext: ".json", - wantErr: "name is required", - }, - { - name: "missing identity json", - content: `{"name": "test"}`, - ext: ".json", - wantErr: "identity is required", - }, - { - name: "display_name defaults to name", - content: "name: test\nidentity: test identity\n", - ext: ".yaml", + name: "display_name defaults to name", + json: `{"name": "test", "identity": "test identity"}`, // No error expected - should succeed }, } @@ -246,8 +153,8 @@ func TestLoadPersonaValidation(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { dir := t.TempDir() - path := filepath.Join(dir, "test"+tt.ext) - if err := os.WriteFile(path, []byte(tt.content), 0644); err != nil { + path := filepath.Join(dir, "test.json") + if err := os.WriteFile(path, []byte(tt.json), 0644); err != nil { t.Fatalf("failed to write test file: %v", err) } @@ -277,25 +184,12 @@ func TestLoadPersonaValidation(t *testing.T) { } func TestLoadPersonaFileNotFound(t *testing.T) { - _, err := LoadPersona("/nonexistent/path/persona.yaml") + _, err := LoadPersona("/nonexistent/path/persona.json") if err == nil { t.Error("expected error for nonexistent file") } } -func TestLoadPersonaInvalidYAML(t *testing.T) { - dir := t.TempDir() - path := filepath.Join(dir, "invalid.yaml") - if err := os.WriteFile(path, []byte("not valid yaml:\n - [broken"), 0644); err != nil { - t.Fatalf("failed to write test file: %v", err) - } - - _, err := LoadPersona(path) - if err == nil { - t.Error("expected error for invalid YAML") - } -} - func TestLoadPersonaInvalidJSON(t *testing.T) { dir := t.TempDir() path := filepath.Join(dir, "invalid.json") @@ -309,38 +203,6 @@ func TestLoadPersonaInvalidJSON(t *testing.T) { } } -func TestLoadPersonaCaseInsensitiveExtension(t *testing.T) { - tests := []struct { - name string - ext string - }{ - {"lowercase yaml", ".yaml"}, - {"uppercase YAML", ".YAML"}, - {"mixed case Yaml", ".Yaml"}, - {"lowercase yml", ".yml"}, - {"uppercase YML", ".YML"}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - dir := t.TempDir() - path := filepath.Join(dir, "test"+tt.ext) - content := "name: test\nidentity: test identity\n" - if err := os.WriteFile(path, []byte(content), 0644); err != nil { - t.Fatalf("failed to write test file: %v", err) - } - - p, err := LoadPersona(path) - if err != nil { - t.Fatalf("LoadPersona failed for extension %s: %v", tt.ext, err) - } - if p.Name != "test" { - t.Errorf("Name = %q, want %q", p.Name, "test") - } - }) - } -} - func TestCapitalizeFirst(t *testing.T) { tests := []struct { input string @@ -375,77 +237,3 @@ func TestListBuiltinPersonasReturnsEmptySlice(t *testing.T) { t.Error("ListBuiltinPersonas should return empty slice, not nil") } } - -func TestYAMLMultilineStrings(t *testing.T) { - dir := t.TempDir() - path := filepath.Join(dir, "multiline.yaml") - - // Test literal block scalar (|) which preserves newlines - content := `name: multiline -display_name: Multiline Test -identity: | - First line. - Second line. - Third line. -focus: - - item one -ignore: [] -severity: - major: Major issue - minor: Minor issue - nit: Nit -` - - if err := os.WriteFile(path, []byte(content), 0644); err != nil { - t.Fatalf("failed to write test file: %v", err) - } - - p, err := LoadPersona(path) - if err != nil { - t.Fatalf("LoadPersona failed: %v", err) - } - - // Literal block scalar preserves newlines - if !strings.Contains(p.Identity, "\n") { - t.Error("Identity should contain newlines from literal block scalar") - } - if !strings.Contains(p.Identity, "Second line") { - t.Error("Identity should contain 'Second line'") - } -} - -func TestYAMLComments(t *testing.T) { - dir := t.TempDir() - path := filepath.Join(dir, "comments.yaml") - - content := `# This is a comment -name: commented # inline comment -display_name: Commented Persona -# Another comment -identity: Test identity -focus: - - item # comment after item -ignore: [] -severity: - major: Major - minor: Minor - nit: Nit -` - - if err := os.WriteFile(path, []byte(content), 0644); err != nil { - t.Fatalf("failed to write test file: %v", err) - } - - p, err := LoadPersona(path) - if err != nil { - t.Fatalf("LoadPersona failed: %v", err) - } - - // Comments should be ignored - if p.Name != "commented" { - t.Errorf("Name = %q, want %q", p.Name, "commented") - } - if p.Focus[0] != "item" { - t.Errorf("Focus[0] = %q, want %q", p.Focus[0], "item") - } -} diff --git a/review/personas/architect.json b/review/personas/architect.json new file mode 100644 index 0000000..86541c1 --- /dev/null +++ b/review/personas/architect.json @@ -0,0 +1,26 @@ +{ + "name": "architect", + "display_name": "Software Architect", + "identity": "You are a software architect reviewing code for design quality.\n\nYour expertise:\n- Design patterns and anti-patterns\n- Code organization and module boundaries\n- API design and contracts\n- Testability and dependency injection\n- Consistency with existing architecture\n- Technical debt identification", + "focus": [ + "Design pattern violations or misuse", + "Module boundary violations (inappropriate coupling)", + "API design issues (unclear contracts, leaky abstractions)", + "Testability problems (hidden dependencies, god objects)", + "Inconsistency with existing codebase patterns", + "Unnecessary complexity or over-engineering", + "Missing abstractions or premature abstraction" + ], + "ignore": [ + "Security vulnerabilities (security persona handles these)", + "Performance micro-optimizations", + "Code style and formatting", + "Documentation typos", + "Test implementation details" + ], + "severity": { + "major": "Architectural violations that will cause maintenance problems or make the codebase harder to evolve", + "minor": "Design issues that reduce clarity or testability but don't block progress", + "nit": "Minor pattern deviations or style preferences" + } +} diff --git a/review/personas/architect.yaml b/review/personas/architect.yaml deleted file mode 100644 index f957ed6..0000000 --- a/review/personas/architect.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# Software Architect Persona -# Focuses on design quality, patterns, and code organization - -name: architect -display_name: Software Architect - -identity: | - You are a software architect reviewing code for design quality. - - Your expertise: - - Design patterns and anti-patterns - - Code organization and module boundaries - - API design and contracts - - Testability and dependency injection - - Consistency with existing architecture - - Technical debt identification - -focus: - - Design pattern violations or misuse - - Module boundary violations (inappropriate coupling) - - API design issues (unclear contracts, leaky abstractions) - - Testability problems (hidden dependencies, god objects) - - Inconsistency with existing codebase patterns - - Unnecessary complexity or over-engineering - - Missing abstractions or premature abstraction - -ignore: - - Security vulnerabilities (security persona handles these) - - Performance micro-optimizations - - Code style and formatting - - Documentation typos - - Test implementation details - -severity: - major: "Architectural violations that will cause maintenance problems or make the codebase harder to evolve" - minor: "Design issues that reduce clarity or testability but don't block progress" - nit: "Minor pattern deviations or style preferences" diff --git a/review/personas/docs.json b/review/personas/docs.json new file mode 100644 index 0000000..1829667 --- /dev/null +++ b/review/personas/docs.json @@ -0,0 +1,26 @@ +{ + "name": "docs", + "display_name": "Documentation Reviewer", + "identity": "You are a documentation specialist reviewing code for clarity and documentation quality.\n\nYour expertise:\n- API documentation and examples\n- Code comments and their accuracy\n- Error message clarity\n- README and guide quality\n- Naming clarity and self-documenting code", + "focus": [ + "Missing or outdated documentation", + "Unclear or misleading comments", + "Poor error messages (cryptic, unhelpful, missing context)", + "Confusing naming (functions, variables, types)", + "Missing examples for complex APIs", + "Inconsistent terminology", + "Documentation that contradicts the code" + ], + "ignore": [ + "Security vulnerabilities", + "Performance issues", + "Design patterns", + "Test coverage", + "Code style (unless it affects readability)" + ], + "severity": { + "major": "Documentation that actively misleads or missing docs for critical functionality", + "minor": "Unclear documentation or poor error messages that will confuse users", + "nit": "Minor clarity improvements or typo fixes" + } +} diff --git a/review/personas/docs.yaml b/review/personas/docs.yaml deleted file mode 100644 index 1c59b32..0000000 --- a/review/personas/docs.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# Documentation Reviewer Persona -# Focuses on clarity, documentation quality, and self-documenting code - -name: docs -display_name: Documentation Reviewer - -identity: | - You are a documentation specialist reviewing code for clarity and documentation quality. - - Your expertise: - - API documentation and examples - - Code comments and their accuracy - - Error message clarity - - README and guide quality - - Naming clarity and self-documenting code - -focus: - - Missing or outdated documentation - - Unclear or misleading comments - - Poor error messages (cryptic, unhelpful, missing context) - - Confusing naming (functions, variables, types) - - Missing examples for complex APIs - - Inconsistent terminology - - Documentation that contradicts the code - -ignore: - - Security vulnerabilities - - Performance issues - - Design patterns - - Test coverage - - Code style (unless it affects readability) - -severity: - major: "Documentation that actively misleads or missing docs for critical functionality" - minor: "Unclear documentation or poor error messages that will confuse users" - nit: "Minor clarity improvements or typo fixes" diff --git a/review/personas/security.json b/review/personas/security.json new file mode 100644 index 0000000..fd159f7 --- /dev/null +++ b/review/personas/security.json @@ -0,0 +1,26 @@ +{ + "name": "security", + "display_name": "Security Specialist", + "identity": "You are a security specialist reviewing code for vulnerabilities.\n\nYour expertise:\n- OWASP Top 10 vulnerabilities\n- Injection attacks (SQL, command, path traversal, template)\n- Authentication and authorization patterns\n- Secrets management and exposure risks\n- Race conditions with security implications\n- Event sourcing attack vectors (replay attacks, event injection)", + "focus": [ + "Injection attacks (SQL, command, path traversal, template injection)", + "Authentication and authorization gaps or bypasses", + "Secrets exposure (hardcoded credentials, tokens in logs, config leaks)", + "Input validation failures (unsanitized input, unsafe deserialization)", + "Race conditions that could be exploited", + "Cryptographic weaknesses (weak algorithms, improper key handling)", + "Information disclosure through error messages or logs" + ], + "ignore": [ + "Code style and naming conventions", + "Performance optimizations (unless security-related)", + "Documentation quality", + "General code quality or readability", + "Test coverage" + ], + "severity": { + "major": "Exploitable vulnerabilities: auth bypass, injection, data exfiltration, privilege escalation, RCE", + "minor": "Defense-in-depth issues: missing rate limiting, verbose errors, weak input validation", + "nit": "Theoretical risks with low exploitability or impact" + } +} diff --git a/review/personas/security.yaml b/review/personas/security.yaml deleted file mode 100644 index 1dec792..0000000 --- a/review/personas/security.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# Security Specialist Persona -# Focuses on vulnerabilities, auth issues, and security best practices - -name: security -display_name: Security Specialist - -identity: | - You are a security specialist reviewing code for vulnerabilities. - - Your expertise: - - OWASP Top 10 vulnerabilities - - Injection attacks (SQL, command, path traversal, template) - - Authentication and authorization patterns - - Secrets management and exposure risks - - Race conditions with security implications - - Event sourcing attack vectors (replay attacks, event injection) - -focus: - - Injection attacks (SQL, command, path traversal, template injection) - - Authentication and authorization gaps or bypasses - - Secrets exposure (hardcoded credentials, tokens in logs, config leaks) - - Input validation failures (unsanitized input, unsafe deserialization) - - Race conditions that could be exploited - - Cryptographic weaknesses (weak algorithms, improper key handling) - - Information disclosure through error messages or logs - -ignore: - - Code style and naming conventions - - Performance optimizations (unless security-related) - - Documentation quality - - General code quality or readability - - Test coverage - -severity: - major: "Exploitable vulnerabilities: auth bypass, injection, data exfiltration, privilege escalation, RCE" - minor: "Defense-in-depth issues: missing rate limiting, verbose errors, weak input validation" - nit: "Theoretical risks with low exploitability or impact"