647928a0a1
Fundamentals: secure-defaults, input-validation, credential-handling, audit-logging Identity: authentication, authorization Attack Prevention: injection-prevention, dos-prevention, prompt-injection
103 lines
2.6 KiB
Markdown
103 lines
2.6 KiB
Markdown
# Input Validation
|
|
|
|
## Rule
|
|
|
|
Validate all input. Allowlist > blocklist.
|
|
|
|
**Source:** [OWASP Input Validation Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/Input_Validation_Cheat_Sheet.html)
|
|
|
|
## Correct Pattern
|
|
|
|
```python
|
|
import re
|
|
from typing import Optional
|
|
|
|
# Allowlist: only permit known-good patterns
|
|
VALID_USERNAME = re.compile(r'^[a-zA-Z0-9_]{3,20}$')
|
|
VALID_EMAIL = re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$')
|
|
|
|
def validate_username(username: str) -> Optional[str]:
|
|
"""Return sanitized username or None if invalid."""
|
|
if not username:
|
|
return None
|
|
username = username.strip()
|
|
if VALID_USERNAME.match(username):
|
|
return username
|
|
return None
|
|
|
|
def validate_positive_int(value: str, max_value: int = 10000) -> Optional[int]:
|
|
"""Parse and validate positive integer with upper bound."""
|
|
try:
|
|
n = int(value)
|
|
if 0 < n <= max_value:
|
|
return n
|
|
except (ValueError, TypeError):
|
|
pass
|
|
return None
|
|
```
|
|
|
|
## Incorrect Pattern
|
|
|
|
```python
|
|
# Wrong: blocklist approach (attackers find bypasses)
|
|
def sanitize(s):
|
|
bad = ["<script>", "DROP TABLE", "../"]
|
|
for b in bad:
|
|
s = s.replace(b, "")
|
|
return s
|
|
|
|
# Wrong: trusting input without validation
|
|
def get_user(user_id):
|
|
return db.query(f"SELECT * FROM users WHERE id = {user_id}")
|
|
|
|
# Wrong: regex that allows too much
|
|
VALID_PATH = re.compile(r'.*') # Matches anything!
|
|
|
|
# Wrong: validation after use
|
|
def process(data):
|
|
result = expensive_operation(data) # Already used!
|
|
if not is_valid(data):
|
|
raise ValueError("Invalid")
|
|
```
|
|
|
|
## Validation at Boundaries
|
|
|
|
Validate at every trust boundary:
|
|
|
|
```python
|
|
# API endpoint — first line of defense
|
|
@app.route("/users/<user_id>")
|
|
def get_user(user_id: str):
|
|
validated_id = validate_positive_int(user_id)
|
|
if validated_id is None:
|
|
return {"error": "invalid_user_id"}, 400
|
|
|
|
return user_service.get(validated_id)
|
|
|
|
# Service layer — defense in depth
|
|
class UserService:
|
|
def get(self, user_id: int) -> User:
|
|
assert isinstance(user_id, int) and user_id > 0
|
|
return self.repo.find(user_id)
|
|
```
|
|
|
|
## Type Coercion Attacks
|
|
|
|
```python
|
|
# Wrong: loose equality / type confusion
|
|
if user_input == 0: # "0" == 0 in some languages
|
|
grant_admin()
|
|
|
|
# Correct: strict type checking
|
|
if isinstance(user_input, int) and user_input == 0:
|
|
...
|
|
```
|
|
|
|
## Edge Cases
|
|
|
|
- Unicode normalization attacks (homoglyphs)
|
|
- Null byte injection (`file.txt\x00.jpg`)
|
|
- Integer overflow on length checks
|
|
- Locale-dependent parsing (`1,000` vs `1.000`)
|
|
- JSON vs form encoding differences
|