1eac5d3bcc
Complete security patterns collection (23 total): - csp.md: nonces, hashes, strict-dynamic, reporting - file-upload.md: content validation, path traversal, malware scanning - open-redirect.md: URL validation, OAuth redirect URI, bypass techniques - clickjacking.md: X-Frame-Options, frame-ancestors CSP Comprehensive coverage for web application security review.
206 lines
5.8 KiB
Markdown
206 lines
5.8 KiB
Markdown
# File Upload Security
|
|
|
|
## Rule
|
|
|
|
Validate content, not just extension. Store outside webroot. Generate new filenames. Set size limits.
|
|
|
|
**Source:** [OWASP File Upload Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html)
|
|
|
|
## Attack Vectors
|
|
|
|
| Attack | Description |
|
|
|--------|-------------|
|
|
| Web shell | Upload .php/.jsp that executes commands |
|
|
| XSS via SVG | SVG with embedded JavaScript |
|
|
| XXE via Office | DOCX/XLSX contain XML |
|
|
| Path traversal | Filename like `../../../etc/cron.d/shell` |
|
|
| DoS | Upload huge files, exhaust disk |
|
|
| Malware hosting | Use your server to distribute malware |
|
|
|
|
## Correct Pattern
|
|
|
|
```python
|
|
import os
|
|
import uuid
|
|
import magic # python-magic for content detection
|
|
from pathlib import Path
|
|
|
|
UPLOAD_DIR = Path("/var/uploads") # Outside webroot!
|
|
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
|
|
ALLOWED_TYPES = {
|
|
"image/jpeg": ".jpg",
|
|
"image/png": ".png",
|
|
"image/gif": ".gif",
|
|
"application/pdf": ".pdf",
|
|
}
|
|
|
|
def save_upload(file_storage) -> str:
|
|
"""Safely handle file upload."""
|
|
# Check size first (before reading into memory)
|
|
file_storage.seek(0, 2) # Seek to end
|
|
size = file_storage.tell()
|
|
file_storage.seek(0) # Reset
|
|
|
|
if size > MAX_FILE_SIZE:
|
|
raise ValueError("File too large")
|
|
|
|
# Read content for validation
|
|
content = file_storage.read()
|
|
file_storage.seek(0)
|
|
|
|
# Detect MIME type from content, not extension
|
|
detected_type = magic.from_buffer(content, mime=True)
|
|
|
|
if detected_type not in ALLOWED_TYPES:
|
|
raise ValueError(f"File type not allowed: {detected_type}")
|
|
|
|
# Generate safe filename (never use user input)
|
|
extension = ALLOWED_TYPES[detected_type]
|
|
safe_filename = f"{uuid.uuid4()}{extension}"
|
|
|
|
# Store outside webroot
|
|
dest_path = UPLOAD_DIR / safe_filename
|
|
|
|
# Ensure we're still in upload dir (paranoid check)
|
|
if not dest_path.resolve().is_relative_to(UPLOAD_DIR.resolve()):
|
|
raise ValueError("Invalid path")
|
|
|
|
with open(dest_path, "wb") as f:
|
|
f.write(content)
|
|
|
|
return safe_filename
|
|
|
|
def serve_upload(filename: str):
|
|
"""Serve uploaded file safely."""
|
|
# Validate filename format
|
|
if not filename or ".." in filename or "/" in filename:
|
|
raise ValueError("Invalid filename")
|
|
|
|
path = UPLOAD_DIR / filename
|
|
|
|
# Verify path is within upload dir
|
|
if not path.resolve().is_relative_to(UPLOAD_DIR.resolve()):
|
|
raise ValueError("Invalid path")
|
|
|
|
if not path.exists():
|
|
raise FileNotFoundError()
|
|
|
|
# Serve with safe content-type
|
|
return send_file(
|
|
path,
|
|
mimetype="application/octet-stream", # Force download
|
|
as_attachment=True,
|
|
download_name=filename
|
|
)
|
|
```
|
|
|
|
## Incorrect Pattern
|
|
|
|
```python
|
|
import os
|
|
|
|
# Wrong: using user-provided filename
|
|
def bad_upload(file):
|
|
filename = file.filename # User controlled!
|
|
file.save(f"/uploads/{filename}")
|
|
# Attack: filename = "../../../var/www/shell.php"
|
|
|
|
# Wrong: checking only extension
|
|
def bad_validate(filename):
|
|
return filename.endswith((".jpg", ".png"))
|
|
# Attack: shell.php.jpg with PHP content
|
|
|
|
# Wrong: storing in webroot
|
|
def bad_upload_2(file):
|
|
file.save(f"/var/www/html/uploads/{file.filename}")
|
|
# Attacker can access directly, execute scripts
|
|
|
|
# Wrong: trusting Content-Type header
|
|
def bad_validate_2(file):
|
|
return file.content_type.startswith("image/")
|
|
# Header is attacker-controlled!
|
|
|
|
# Wrong: no size limit
|
|
def bad_upload_3(file):
|
|
file.save(f"/uploads/{uuid.uuid4()}")
|
|
# DoS: upload 100GB file
|
|
```
|
|
|
|
## Image-Specific Validation
|
|
|
|
```python
|
|
from PIL import Image
|
|
import io
|
|
|
|
MAX_IMAGE_PIXELS = 4096 * 4096 # Prevent decompression bomb
|
|
|
|
def validate_image(content: bytes) -> bool:
|
|
"""Validate image content."""
|
|
try:
|
|
Image.MAX_IMAGE_PIXELS = MAX_IMAGE_PIXELS
|
|
img = Image.open(io.BytesIO(content))
|
|
|
|
# Actually load the image (validates structure)
|
|
img.verify()
|
|
|
|
# Reopen for further checks (verify() invalidates)
|
|
img = Image.open(io.BytesIO(content))
|
|
|
|
# Check format
|
|
if img.format not in ("JPEG", "PNG", "GIF"):
|
|
return False
|
|
|
|
# Strip EXIF (can contain sensitive data, XSS in some viewers)
|
|
# PIL's save() with specific format strips most metadata
|
|
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
def strip_image_metadata(content: bytes) -> bytes:
|
|
"""Remove EXIF and other metadata."""
|
|
img = Image.open(io.BytesIO(content))
|
|
|
|
# Create new image without metadata
|
|
output = io.BytesIO()
|
|
img.save(output, format=img.format)
|
|
return output.getvalue()
|
|
```
|
|
|
|
## Antivirus Scanning
|
|
|
|
```python
|
|
import clamd # ClamAV client
|
|
|
|
def scan_for_malware(filepath: str) -> bool:
|
|
"""Scan file with ClamAV."""
|
|
try:
|
|
cd = clamd.ClamdUnixSocket()
|
|
result = cd.scan(filepath)
|
|
|
|
if result is None:
|
|
return True # Clean
|
|
|
|
# result = {filepath: ('FOUND', 'Malware.Name')}
|
|
status, name = result.get(filepath, (None, None))
|
|
if status == "FOUND":
|
|
log.warning("Malware detected", filepath=filepath, malware=name)
|
|
os.remove(filepath)
|
|
return False
|
|
|
|
return True
|
|
except Exception as e:
|
|
log.error("Antivirus scan failed", error=str(e))
|
|
return False # Fail closed
|
|
```
|
|
|
|
## Edge Cases
|
|
|
|
- Double extensions: `file.php.jpg` may execute as PHP on misconfigured servers
|
|
- Null byte: `file.php%00.jpg` truncates to `file.php` in some languages
|
|
- Case sensitivity: `.PhP` may execute on Windows
|
|
- SVG can contain JavaScript — treat as dangerous
|
|
- ZIP files need recursive scanning for zip bombs
|
|
- Office files (DOCX) are ZIPs containing XML — check for XXE
|
|
- GIF89a header with PHP code can execute on some servers
|