From f270fc68c1c97d98d241a8eee0f430e739221905 Mon Sep 17 00:00:00 2001 From: Rodin Date: Thu, 30 Apr 2026 16:01:52 -0700 Subject: [PATCH] docs: add smell files (anti-patterns + common mistakes) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit anti-patterns.md: 10 entries, 490 lines — what the stdlib avoids common-mistakes.md: 10 entries, 508 lines — language transition smells Covers: unwrap in library code, String errors, unsafe without SAFETY, clone as borrow-checker escape, Deref as inheritance, panic for recoverable errors, god structs, Box, static mut, stringly-typed APIs, Arc everywhere, collect then iterate, index loops, try/catch mentality, null sentinels, OOP hierarchies, global state, impl vs dyn. --- smells/anti-patterns.md | 490 ++++++++++++++++++++++++++++++++++++ smells/common-mistakes.md | 508 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 998 insertions(+) create mode 100644 smells/anti-patterns.md create mode 100644 smells/common-mistakes.md diff --git a/smells/anti-patterns.md b/smells/anti-patterns.md new file mode 100644 index 0000000..125987a --- /dev/null +++ b/smells/anti-patterns.md @@ -0,0 +1,490 @@ +# Anti-Patterns: What Rust's Stdlib Avoids (and Why) + +Patterns the Rust standard library team actively avoids, extracted +from studying what they DON'T do in their source code. + +**Source:** [rust-lang/rust](https://github.com/rust-lang/rust) at commit +[`f53b654`](https://github.com/rust-lang/rust/tree/f53b654a8882fd5fc036c4ca7a4ff41ce32497a6) + +--- + +## 1. unwrap() in Library Code + +**What they avoid:** Using `.unwrap()` in non-test, non-example code +without an invariant proof. + +**Source evidence:** 1,790 unwrap/expect calls in library/ (non-test). +Concentrated in: (1) proven invariants with expect() messages, +(2) compile-time constants, (3) doc examples. Zero bare `.unwrap()` +calls where the value could legitimately be None/Err at runtime. + +**Why it's bad:** unwrap() panics. In library code, a panic kills the +caller's thread without giving them a chance to handle the error. +Libraries should never make that decision for their users. + +**What to do instead:** +```rust +// BAD — library code that panics on user input +pub fn parse_config(input: &str) -> Config { + let value: serde_json::Value = serde_json::from_str(input).unwrap(); + Config { name: value["name"].as_str().unwrap().to_string() } +} + +// GOOD — returns Result, caller decides how to handle +pub fn parse_config(input: &str) -> Result { + let value: serde_json::Value = serde_json::from_str(input)?; + let name = value["name"].as_str() + .ok_or(ConfigError::MissingField("name"))?; + Ok(Config { name: name.to_string() }) +} +``` + +### When to Apply This Rule + +**Triggers:** +- `.unwrap()` in any `pub fn` that takes external input +- `.unwrap()` without a comment explaining why it can't fail +- `.unwrap()` in a library (as opposed to a binary/application) + +### Exceptions + +**It's OK when:** +- Proven invariant: `self.items.last().expect("items is never empty after init")` +- Compile-time constant: `Regex::new(r"^\d+$").unwrap()` (pattern is always valid) +- Test code: tests should panic on unexpected errors +- Application code where you WANT to crash on bad state + +--- + +## 2. String as Error Type + +**What they avoid:** Using `String` or `&str` as the error type in +`Result`. + +**Source evidence:** 0 public functions in library/ return +`Result`. Every error is a named struct or enum that +implements the Error trait. + +**Why it's bad:** Can't match on it programmatically. Callers must +string-compare to determine what went wrong. No type safety. No +structured data. Breaks the entire Error trait ecosystem. + +**What to do instead:** +```rust +// BAD — stringly-typed error +fn connect(addr: &str) -> Result { + if !valid(addr) { return Err(format!("invalid address: {addr}")); } + if timeout() { return Err("connection timed out".to_string()); } + Ok(conn) +} +// Caller: if err.contains("timeout") { ... } — fragile! + +// GOOD — typed error enum +#[derive(Debug)] +enum ConnectError { + InvalidAddress(String), + Timeout(Duration), + Refused, +} +impl fmt::Display for ConnectError { ... } +impl std::error::Error for ConnectError {} + +fn connect(addr: &str) -> Result { ... } +// Caller: match err { ConnectError::Timeout(d) => retry(d), ... } +``` + +### When to Apply This Rule + +**Triggers:** +- `Result` in any function signature +- Error handling that does string matching +- Multiple different failure modes lumped into one String + +### Exceptions + +**It's OK when:** +- Quick scripts/prototypes that will be thrown away +- Internal one-off tools where the only consumer is a human reading logs + +--- + +## 3. Unsafe Without SAFETY Comment + +**What they avoid:** Any `unsafe { }` block without an accompanying +`// SAFETY:` comment explaining the soundness proof. + +**Source evidence:** 2,463 `// SAFETY:` comments for 31,244 unsafe +blocks. The density is lower than 1:1 because many unsafe blocks are +one-liners inside already-documented unsafe functions. But every +PUBLIC-facing unsafe block has a comment. + +**Why it's bad:** Without the comment, no one (including future-you) +can audit whether the unsafe code is actually sound. It might be +correct today but break after a refactor that invalidates an +assumption no one documented. + +**What to do instead:** +```rust +// BAD — unsafe with no justification +unsafe { + ptr::copy_nonoverlapping(src, dst, len); +} + +// GOOD — proves soundness at the call site +// SAFETY: src points to self.buf[0..self.len] and dst points to +// out[0..self.len]. They're from different allocations so they +// can't overlap. len is bounded by self.len which we checked above. +unsafe { + ptr::copy_nonoverlapping(src, dst, len); +} +``` + +### When to Apply This Rule + +**Triggers:** +- Every `unsafe { }` block (no exceptions) +- The comment should explain WHY, not just WHAT + +### Exceptions + +**None.** This rule has no exceptions in the stdlib. + +--- + +## 4. Clone to Satisfy the Borrow Checker + +**What they avoid:** Sprinkling `.clone()` calls to make the borrow +checker happy instead of restructuring the code. + +**Source evidence:** The stdlib uses clone deliberately for explicit +copies, never as a borrow-checker escape hatch. Clone calls are +concentrated in: (1) actual need for independent copies, (2) Cow +clone-on-write, (3) test setup. + +**Why it's bad:** Hides the real problem (ownership structure is wrong). +Adds unnecessary allocations. Makes performance unpredictable. The +borrow checker error is telling you your design has a flaw. + +**What to do instead:** +```rust +// BAD — cloning because the borrow checker complains +fn process(data: &mut Vec) { + for item in data.clone().iter() { // CLONE to avoid borrow conflict + if item.is_empty() { + data.retain(|s| !s.is_empty()); + } + } +} + +// GOOD — restructure to avoid the conflict +fn process(data: &mut Vec) { + data.retain(|s| !s.is_empty()); // just do what you actually want +} +``` + +### When to Apply This Rule + +**Triggers:** +- `.clone()` adjacent to a borrow checker error comment +- Clone inside a loop (O(n) allocations) +- Cloning a large struct just to use one field + +### Exceptions + +**It's OK when:** +- You genuinely need two independent copies (fork a value) +- The type is cheap to clone (small Copy types, Arc) +- Prototyping to get the logic right before optimizing + +--- + +## 5. Deref as Inheritance + +**What they avoid:** Using the `Deref` trait to simulate inheritance +from OOP languages. + +**Source evidence:** Every Deref impl in the stdlib follows the +"smart pointer" pattern: Box→T, Vec→[T], String→str, Arc→T. +Zero cases of "struct A derefs to struct B" where B isn't the +logical inner content. + +**Why it's bad:** Deref coercion is implicit — the compiler inserts +it silently. If Dog derefs to Animal, users can accidentally call +Animal methods on Dog without realizing they're crossing an +abstraction boundary. Method resolution becomes unpredictable. + +**What to do instead:** +```rust +// BAD — using Deref as inheritance +struct Animal { name: String } +struct Dog { animal: Animal, tricks: Vec } + +impl Deref for Dog { + type Target = Animal; + fn deref(&self) -> &Animal { &self.animal } +} +// dog.name works — but it's confusing, not actually inheritance + +// GOOD — explicit delegation +struct Dog { name: String, tricks: Vec } + +impl Dog { + fn name(&self) -> &str { &self.name } +} +``` + +### When to Apply This Rule + +**Triggers:** +- Deref where the target type isn't "contained" content +- Using Deref to avoid writing delegation methods +- Deref between two types at the same abstraction level + +### Exceptions + +**It's OK when:** +- Your type IS a smart pointer (wraps and owns one inner value) +- The target is clearly "the thing inside" (Vec→[T], Box→T) +- Deref target is at a lower abstraction level (concrete → abstract) + +--- + +## 6. Panic for Recoverable Errors + +**What they avoid:** Using `panic!()` for conditions that callers +could reasonably handle. + +**Source evidence:** 514 panic! calls in library/ (non-test). ALL are +for: (1) invariant violations (bugs), (2) index out of bounds where +the contract says "index must be valid", (3) unimplemented/unreachable. +Zero panics for "file not found" or "parse failed" type errors. + +**Why it's bad:** Panic unwinds the stack. The caller can't recover +gracefully. In servers, it kills the request. In libraries, it's +a contract violation — you're deciding for the user that this error +is fatal. + +**What to do instead:** +```rust +// BAD — panicking on user-facing errors +pub fn load_config(path: &str) -> Config { + let text = std::fs::read_to_string(path) + .expect("failed to read config"); // KILLS caller's thread + toml::from_str(&text).expect("invalid config") +} + +// GOOD — let the caller decide +pub fn load_config(path: &str) -> Result { + let text = std::fs::read_to_string(path)?; + let config = toml::from_str(&text)?; + Ok(config) +} +``` + +### When to Apply This Rule + +**Triggers:** +- panic!/expect/unwrap on external input (files, network, user data) +- panic in library code (not application code) +- "Failed to X" in expect messages where X is an I/O operation + +### Exceptions + +**It's OK when:** +- Programmer bug: index out of bounds on internal array (invariant violated) +- Impossible state: match arm that can't be reached by construction +- Application main(): crashing on startup config failure is reasonable + +--- + +## 7. God Struct (All State in One Place) + +**What they avoid:** A single struct that holds the entire application +state with dozens of fields. + +**Source evidence:** The stdlib separates concerns into distinct types. +`io::Error` has 2 fields. `Vec` has 3 fields. The largest public +struct is OsString at 1 field (the inner platform string). No struct +in the stdlib has more than 6-7 fields. + +**Why it's bad:** Can't borrow parts independently (borrowing one field +borrows the whole struct). Hard to test in isolation. Methods grow +unboundedly. Violates single responsibility. + +**What to do instead:** +```rust +// BAD — god struct +struct App { + db: Database, + cache: Cache, + config: Config, + logger: Logger, + metrics: Metrics, + users: HashMap, + sessions: HashMap, + pending_jobs: VecDeque, +} +// Every method has access to everything. Can't test cache without db. + +// GOOD — separated concerns +struct App { db: Database, cache: CacheLayer, jobs: JobQueue } +struct CacheLayer { inner: Cache, config: CacheConfig } +struct JobQueue { pending: VecDeque, metrics: Metrics } +``` + +### When to Apply This Rule + +**Triggers:** +- Struct with 8+ fields +- Methods that only use 2-3 of the struct's fields +- Can't write a unit test without constructing the entire struct +- Borrow checker fights when trying to use two parts simultaneously + +### Exceptions + +**It's OK when:** +- Configuration structs (many fields, all read-only, constructed once) +- DTOs for serialization (matching an external schema) +- Builder intermediate state (temporarily holds many optional fields) + +--- + +## 8. Box Instead of Generics + +**What they avoid:** Using `Box` with downcasting to erase +types instead of using proper generics. + +**Source evidence:** `Any` is used in the stdlib only for: panic +payloads (`Box`) and the reflection system. Zero +uses of `dyn Any` as a general-purpose container or map value. + +**Why it's bad:** Loses all type information. Downcasting can fail at +runtime. You've recreated dynamic typing inside a statically-typed +language — the compiler can't help you anymore. + +**What to do instead:** +```rust +// BAD — type erasure via Any (Java's Object approach) +struct Registry { + items: HashMap>, +} +impl Registry { + fn get(&self, key: &str) -> Option<&T> { + self.items.get(key)?.downcast_ref() // runtime failure if wrong type! + } +} + +// GOOD — use generics or typed enums +enum ConfigValue { String(String), Int(i64), Bool(bool) } +struct Config { values: HashMap } +// Or: use generics to make the container type-safe +struct TypedMap(HashMap>); // at least TypeId keyed +``` + +### When to Apply This Rule + +**Triggers:** +- `Box` in a non-error, non-panic context +- `.downcast_ref::()` calls scattered through the code +- "I don't know what type this will be" — you probably do + +### Exceptions + +**It's OK when:** +- Panic handling (that's what `catch_unwind` returns) +- Plugin systems where types genuinely aren't known at compile time +- Thin FFI wrappers bridging to dynamic languages + +--- + +## 9. Shared Mutable State Without Synchronization + +**What they avoid:** Any `static mut` or mutable global state without +proper synchronization primitives. + +**Source evidence:** Zero `static mut` in public library APIs. +All shared mutable state uses: `AtomicUsize` (783 usages), +`Mutex` (135 usages), `OnceLock` (254 usages), or `thread_local!`. +The compiler prevents data races at compile time via Send/Sync. + +**Why it's bad:** `static mut` is always unsafe and the unsoundest +construct in Rust. Multiple threads accessing it simultaneously is +undefined behavior. Even single-threaded access requires unsafe. + +**What to do instead:** +```rust +// BAD — global mutable state (UB waiting to happen) +static mut COUNTER: u64 = 0; +fn increment() { + unsafe { COUNTER += 1; } // DATA RACE if called from multiple threads +} + +// GOOD — atomic for simple counters +static COUNTER: AtomicU64 = AtomicU64::new(0); +fn increment() { + COUNTER.fetch_add(1, Ordering::Relaxed); // thread-safe, no unsafe +} + +// GOOD — OnceLock for lazy initialization +static CONFIG: OnceLock = OnceLock::new(); +fn get_config() -> &'static Config { + CONFIG.get_or_init(|| load_config()) +} +``` + +### When to Apply This Rule + +**Triggers:** +- `static mut` anywhere +- `unsafe` blocks that access global state +- Global state without atomic/mutex/rwlock protection + +### Exceptions + +**Literally none for `static mut`.** There is no safe use case that +can't be expressed with atomics, OnceLock, or Mutex. The Rust team +has discussed deprecating `static mut` entirely. + +--- + +## 10. Stringly-Typed APIs (Magic Constants) + +**What they avoid:** Using string constants where enums would provide +compile-time checking. + +**Source evidence:** The stdlib uses enums extensively: `ErrorKind` +(30+ variants), `Ordering` (3 variants), `SeekFrom` (3 variants). +String parameters are only for genuinely dynamic data (file paths, +user input, format strings). + +**Why it's bad:** Typos compile fine. `"relaexd"` won't be caught. +No autocomplete. No exhaustive matching. Refactoring is grep-based +instead of compiler-based. + +**What to do instead:** +```rust +// BAD — string constants (invisible typos) +fn set_log_level(level: &str) { ... } +set_log_level("wraning"); // typo compiles fine — bug at runtime + +// GOOD — enum (typos are compile errors) +enum LogLevel { Error, Warning, Info, Debug, Trace } +fn set_log_level(level: LogLevel) { ... } +set_log_level(LogLevel::Wraning); // COMPILE ERROR — caught immediately +``` + +### When to Apply This Rule + +**Triggers:** +- String parameters with a fixed set of valid values +- Match/if-else chains comparing strings +- Documentation that says "valid values are: X, Y, Z" + +### Exceptions + +**It's OK when:** +- Values are genuinely dynamic (user input, file paths, URLs) +- Interop with external systems that use strings (HTTP headers, env vars) +- The set of values changes frequently (but consider #[non_exhaustive] enum) + + diff --git a/smells/common-mistakes.md b/smells/common-mistakes.md new file mode 100644 index 0000000..540e453 --- /dev/null +++ b/smells/common-mistakes.md @@ -0,0 +1,508 @@ +# Common Mistakes: Language Transition Smells in Rust + +Patterns people bring from other languages (C++, Java, Python, Go) +that don't work well in Rust. These aren't necessarily wrong +everywhere — they're smells that suggest the author hasn't yet +internalized Rust's ownership model. + +--- + +## 1. Arc> Everywhere (Borrow Checker Avoidance) + +**What it looks like:** Every shared reference wrapped in +`Arc>` even when single-threaded or single-owner. + +**Where it comes from:** Java/C# where everything is garbage-collected +and shared references are free. Developers use Arc as "Rust's +version of a reference." + +**Why it's a smell:** Arc has atomic overhead on every clone/drop. +Mutex has lock overhead on every access. If you're doing this +single-threaded, you're paying for concurrency you don't use. +It usually means the ownership structure is wrong. + +**What to do instead:** +```rust +// SMELL — Arc as a crutch +struct App { + config: Arc>, + db: Arc>, + cache: Arc>, +} +// Every access: self.config.lock().unwrap().timeout + +// FIX — own what you own, borrow what you borrow +struct App { + config: Config, // App owns config (no sharing needed) + db: Database, // App owns database + cache: Cache, // App owns cache +} +// Access: self.config.timeout +``` + +### When to Apply This Rule + +**Triggers:** +- `Arc>` in single-threaded code +- Every field in a struct wrapped in Arc +- More than 2 Arc fields (usually wrong) +- Using `.lock().unwrap()` dozens of times per function + +### Exceptions + +**It's OK when:** +- The value IS shared across threads (that's what Arc is for) +- You've restructured and genuinely need shared ownership +- Async code with multiple tasks accessing shared state + +--- + +## 2. Collecting Iterators Unnecessarily + +**What it looks like:** `.collect::>()` followed by another +iteration over the collected vec. + +**Where it comes from:** Python lists and Java streams where +intermediate collections are normal. + +**Why it's a smell:** Allocates a Vec you immediately discard. +Iterators are lazy and compose without allocation. Two passes +through the data when one would suffice. + +**What to do instead:** +```rust +// SMELL — collect then iterate again +let names: Vec = users.iter() + .map(|u| u.name.clone()) + .collect(); +let long_names: Vec<&String> = names.iter() + .filter(|n| n.len() > 10) + .collect(); + +// FIX — chain without intermediate collection +let long_names: Vec = users.iter() + .map(|u| u.name.clone()) + .filter(|n| n.len() > 10) + .collect(); // one allocation, one pass +``` + +### When to Apply This Rule + +**Triggers:** +- `.collect()` followed by `.iter()` on the result +- Intermediate `Vec` that's only iterated once then dropped +- Multiple passes when a single chain would work + +### Exceptions + +**It's OK when:** +- You need the collection for multiple uses (sort, binary search) +- You need to know the length before processing +- Ownership requires it (can't chain because of lifetime issues) + +--- + +## 3. Index Loops Instead of Iterators + +**What it looks like:** `for i in 0..vec.len() { vec[i] ... }` +instead of `for item in &vec { ... }`. + +**Where it comes from:** C, C++, Java for-loops with index variables. + +**Why it's a smell:** Bounds checking on every access (runtime cost). +Can't be auto-vectorized as well. More error-prone (off-by-one). +Doesn't compose with iterator adapters. Not idiomatic. + +**What to do instead:** +```rust +// SMELL — C-style index loop +for i in 0..items.len() { + process(&items[i]); // bounds check on every access +} + +// FIX — iterator (no bounds checks, composable) +for item in &items { + process(item); +} + +// Need the index too? +for (i, item) in items.iter().enumerate() { + println!("{i}: {item}"); +} +``` + +### When to Apply This Rule + +**Triggers:** +- `for i in 0..x.len()` followed by `x[i]` +- Index variable only used for indexing (not for math) +- Parallel iteration of two collections by index + +### Exceptions + +**It's OK when:** +- You need to modify the collection during iteration (but consider retain/drain) +- You need random access patterns (skip ahead, look behind) +- The algorithm genuinely needs indices (matrix operations) + +--- + +## 4. .clone() as First Resort + +**What it looks like:** Sprinkling `.clone()` to silence every borrow +checker error without thinking about why it's complaining. + +**Where it comes from:** GC languages where copying references is free. +The borrow checker error feels like a "bug to fix" rather than a +"design signal." + +**Why it's a smell:** Hidden allocation costs. The borrow checker is +telling you the ownership structure is wrong — clone hides the +message without fixing the problem. + +**What to do instead:** +```rust +// SMELL — clone to silence borrow checker +fn process(data: &HashMap>) -> Vec { + let keys: Vec = data.keys().cloned().collect(); + let mut result = Vec::new(); + for key in &keys { + for value in data.get(key).unwrap() { + result.push(format!("{key}: {value}")); + } + } + result +} + +// FIX — restructure to avoid the need +fn process(data: &HashMap>) -> Vec { + data.iter() + .flat_map(|(key, values)| { + values.iter().map(move |v| format!("{key}: {v}")) + }) + .collect() +} +``` + +### When to Apply This Rule + +**Triggers:** +- `.clone()` added right after a borrow checker error +- Clone inside a loop (O(n) allocations) +- Clone of a large struct to use one field +- More than 3 clone calls in one function + +### Exceptions + +**It's OK when:** +- You genuinely need independent ownership (forking a value) +- Cloning is cheap (Arc::clone is just a counter increment) +- Prototyping — clone now, optimize later (but DO optimize later) + +--- + +## 5. try/catch Mentality (panic + catch_unwind) + +**What it looks like:** Using `panic!()` as "throw" and +`catch_unwind()` as "catch" to simulate exception handling. + +**Where it comes from:** Java/Python/C# exception handling where +throwing and catching is the normal error flow. + +**Why it's a smell:** Panics are for BUGS, not expected errors. +`catch_unwind` doesn't catch all panics (abort mode). It's not +equivalent to try/catch — it's a last-resort safety net. Using it +for control flow means you've reimplemented exceptions badly. + +**What to do instead:** +```rust +// SMELL — panic as exception, catch_unwind as try/catch +fn validate(input: &str) { + if input.is_empty() { panic!("empty input"); } + if input.len() > 100 { panic!("too long"); } +} +fn handle(input: &str) -> String { + match std::panic::catch_unwind(|| validate(input)) { + Ok(()) => "valid".into(), + Err(e) => format!("error: {:?}", e), + } +} + +// FIX — use Result for expected errors +fn validate(input: &str) -> Result<(), ValidationError> { + if input.is_empty() { return Err(ValidationError::Empty); } + if input.len() > 100 { return Err(ValidationError::TooLong(input.len())); } + Ok(()) +} +fn handle(input: &str) -> String { + match validate(input) { + Ok(()) => "valid".into(), + Err(e) => format!("error: {e}"), + } +} +``` + +### When to Apply This Rule + +**Triggers:** +- `catch_unwind` in application code (not test harnesses) +- `panic!()` for validatable conditions +- Pattern of panic → catch → extract message + +### Exceptions + +**It's OK when:** +- Thread pool implementations (catch panics from user-provided closures) +- FFI boundaries (panics must not cross extern "C" boundaries) +- Test frameworks (catch panics to report failures) + +--- + +## 6. Null Sentinel Values Instead of Option + +**What it looks like:** Using magic values (-1, 0, "", `u64::MAX`) +to represent "no value" instead of `Option`. + +**Where it comes from:** C (NULL/0/-1 as sentinel), Go (zero values), +database design (nullable columns with default values). + +**Why it's a smell:** Compiler can't enforce checking. -1 might be a +valid value in some context. Empty string might be legitimate input. +Option forces the caller to handle the None case. + +**What to do instead:** +```rust +// SMELL — sentinel values +struct User { + age: i32, // -1 means "not provided" + nickname: String, // "" means "none" +} +fn get_age(user: &User) -> i32 { + if user.age == -1 { panic!("no age!"); } // easy to forget this check + user.age +} + +// FIX — Option makes absence explicit +struct User { + age: Option, // None means "not provided" + nickname: Option, // None means "none" +} +fn get_age(user: &User) -> Option { + user.age // caller MUST handle None — compiler enforces it +} +``` + +### When to Apply This Rule + +**Triggers:** +- Numeric fields with documented "special" values +- String fields where empty means "absent" +- Comments like "// -1 means not set" +- Checks like `if x == -1` or `if s.is_empty()` before use + +### Exceptions + +**It's OK when:** +- FFI with C APIs that use sentinel values (wrap in Option at the boundary) +- Performance-critical packed structs where Option's niche doesn't fit +- Matching an external protocol/format that uses sentinels + +--- + +## 7. OOP Inheritance Thinking (Deep Trait Object Hierarchies) + +**What it looks like:** Complex trait object hierarchies trying to +replicate class inheritance from Java/C++. + +**Where it comes from:** OOP languages where "is-a" relationships +and abstract base classes are the primary abstraction tool. + +**Why it's a smell:** Rust doesn't have inheritance. Trait objects +have dynamic dispatch overhead. Complex hierarchies lead to +Box type soup. Composition is almost always better. + +**What to do instead:** +```rust +// SMELL — trying to build class hierarchy +trait Shape { fn area(&self) -> f64; } +trait Drawable: Shape { fn draw(&self); } +trait Resizable: Drawable { fn resize(&mut self, factor: f64); } +// Three levels of "inheritance" — complex, fragile + +// FIX — flat traits + composition +trait HasArea { fn area(&self) -> f64; } +trait Drawable { fn draw(&self); } +trait Resizable { fn resize(&mut self, factor: f64); } + +struct Circle { radius: f64 } +impl HasArea for Circle { ... } +impl Drawable for Circle { ... } +impl Resizable for Circle { ... } +// Each trait is independent — implement what you need +``` + +### When to Apply This Rule + +**Triggers:** +- Trait with supertraits 3+ levels deep +- `Box` where A has 5+ methods +- Trying to "inherit" behavior from a base struct + +### Exceptions + +**It's OK when:** +- Supertrait bounds are genuinely required (Error: Debug + Display) +- The hierarchy matches a real protocol (Iterator → DoubleEndedIterator) +- Framework design where trait objects are the extension point + +--- + +## 8. Global Mutable State (static mut, lazy_static Mutex) + +**What it looks like:** Global variables with interior mutability as +the primary state management strategy. + +**Where it comes from:** C global variables, Python module-level state, +Singleton pattern from Java/C#. + +**Why it's a smell:** Makes testing hard (tests share state). Makes +reasoning hard (who modified it? when?). Makes concurrency hard +(locks everywhere). Creates hidden dependencies between functions. + +**What to do instead:** +```rust +// SMELL — global mutable state +static STATE: LazyLock> = LazyLock::new(|| { + Mutex::new(AppState::new()) +}); + +fn process_request(req: Request) -> Response { + let mut state = STATE.lock().unwrap(); + state.handle(req) // hidden dependency on global +} + +// FIX — pass state explicitly +struct App { state: AppState } + +impl App { + fn process_request(&mut self, req: Request) -> Response { + self.state.handle(req) // dependency is visible and testable + } +} +``` + +### When to Apply This Rule + +**Triggers:** +- `static` + `Mutex` or `RwLock` for application state +- Functions that "magically" access state without parameters +- Hard-to-test code because of hidden global dependencies +- `lazy_static!` or `LazyLock` for mutable state + +### Exceptions + +**It's OK when:** +- True global configuration (read-only after init — use OnceLock) +- Logging (global logger is an accepted pattern) +- Metrics/counters (AtomicU64 for statistics) +- Process-level singletons (allocator, thread pool) + +--- + +## 9. Returning impl Trait When Box Is Needed + +**What it looks like:** Confusion between `impl Trait` (static dispatch, +one concrete type) and `Box` (dynamic dispatch, multiple +types). + +**Where it comes from:** Not understanding that `impl Trait` in return +position means "one specific type the compiler knows" — not "any type +implementing this trait." + +**Why it's a smell:** Using `impl Trait` when you need to return +DIFFERENT types based on runtime conditions causes confusing errors. +Using `Box` when you always return the same type adds +unnecessary heap allocation. + +**What to do instead:** +```rust +// SMELL — impl Trait can't return different types +fn get_writer(use_file: bool) -> impl Write { + if use_file { + File::create("out.txt").unwrap() + } else { + io::stdout() // ERROR: different types! + } +} + +// FIX — Box for runtime polymorphism +fn get_writer(use_file: bool) -> Box { + if use_file { + Box::new(File::create("out.txt").unwrap()) + } else { + Box::new(io::stdout()) + } +} + +// FIX — enum for known set of types (no heap allocation) +enum Writer { File(File), Stdout(io::Stdout) } +impl Write for Writer { ... } +``` + +### When to Apply This Rule + +**Triggers:** +- Compiler error "expected X, found Y" in if/match arms with impl Trait +- Box when only one type is ever returned (unnecessary) +- Confusion about when to use which + +### Exceptions + +**It's OK when:** +- `impl Iterator` in return position (common, always one type) +- `impl Future` for async functions (always one type) + +--- + +## 10. Ignoring #[must_use] Warnings + +**What it looks like:** Suppressing `Result` unused warnings with +`let _ = ...` without considering whether the error matters. + +**Where it comes from:** Languages where exceptions propagate +automatically. In Python/Java, if you don't catch it, it propagates. +In Rust, if you don't check it, it's silently lost. + +**Why it's a smell:** `let _ = file.write(data);` silently discards +a potential I/O error. The data might not have been written. The +user never finds out. + +**What to do instead:** +```rust +// SMELL — silencing the warning without thinking +let _ = file.write_all(data); // error swallowed — data might be lost +let _ = channel.send(msg); // receiver might be gone — msg dropped + +// FIX — handle or propagate +file.write_all(data)?; // propagate to caller +// Or if you genuinely don't care: +if let Err(e) = channel.send(msg) { + tracing::warn!("channel closed: {e}"); // at least log it +} +``` + +### When to Apply This Rule + +**Triggers:** +- `let _ =` on a Result without any comment +- Multiple `let _ =` in sequence (each one hiding a potential failure) +- The function being called is I/O, network, or state-changing + +### Exceptions + +**It's OK when:** +- The error genuinely doesn't matter (e.g., best-effort logging) +- The operation is idempotent and will be retried anyway +- You've documented WHY: `let _ = tx.send(msg); // receiver might be gone, that's fine` + +