1eac5d3bcc
Complete security patterns collection (23 total): - csp.md: nonces, hashes, strict-dynamic, reporting - file-upload.md: content validation, path traversal, malware scanning - open-redirect.md: URL validation, OAuth redirect URI, bypass techniques - clickjacking.md: X-Frame-Options, frame-ancestors CSP Comprehensive coverage for web application security review.
5.8 KiB
5.8 KiB
File Upload Security
Rule
Validate content, not just extension. Store outside webroot. Generate new filenames. Set size limits.
Source: OWASP File Upload Cheat Sheet
Attack Vectors
| Attack | Description |
|---|---|
| Web shell | Upload .php/.jsp that executes commands |
| XSS via SVG | SVG with embedded JavaScript |
| XXE via Office | DOCX/XLSX contain XML |
| Path traversal | Filename like ../../../etc/cron.d/shell |
| DoS | Upload huge files, exhaust disk |
| Malware hosting | Use your server to distribute malware |
Correct Pattern
import os
import uuid
import magic # python-magic for content detection
from pathlib import Path
UPLOAD_DIR = Path("/var/uploads") # Outside webroot!
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
ALLOWED_TYPES = {
"image/jpeg": ".jpg",
"image/png": ".png",
"image/gif": ".gif",
"application/pdf": ".pdf",
}
def save_upload(file_storage) -> str:
"""Safely handle file upload."""
# Check size first (before reading into memory)
file_storage.seek(0, 2) # Seek to end
size = file_storage.tell()
file_storage.seek(0) # Reset
if size > MAX_FILE_SIZE:
raise ValueError("File too large")
# Read content for validation
content = file_storage.read()
file_storage.seek(0)
# Detect MIME type from content, not extension
detected_type = magic.from_buffer(content, mime=True)
if detected_type not in ALLOWED_TYPES:
raise ValueError(f"File type not allowed: {detected_type}")
# Generate safe filename (never use user input)
extension = ALLOWED_TYPES[detected_type]
safe_filename = f"{uuid.uuid4()}{extension}"
# Store outside webroot
dest_path = UPLOAD_DIR / safe_filename
# Ensure we're still in upload dir (paranoid check)
if not dest_path.resolve().is_relative_to(UPLOAD_DIR.resolve()):
raise ValueError("Invalid path")
with open(dest_path, "wb") as f:
f.write(content)
return safe_filename
def serve_upload(filename: str):
"""Serve uploaded file safely."""
# Validate filename format
if not filename or ".." in filename or "/" in filename:
raise ValueError("Invalid filename")
path = UPLOAD_DIR / filename
# Verify path is within upload dir
if not path.resolve().is_relative_to(UPLOAD_DIR.resolve()):
raise ValueError("Invalid path")
if not path.exists():
raise FileNotFoundError()
# Serve with safe content-type
return send_file(
path,
mimetype="application/octet-stream", # Force download
as_attachment=True,
download_name=filename
)
Incorrect Pattern
import os
# Wrong: using user-provided filename
def bad_upload(file):
filename = file.filename # User controlled!
file.save(f"/uploads/{filename}")
# Attack: filename = "../../../var/www/shell.php"
# Wrong: checking only extension
def bad_validate(filename):
return filename.endswith((".jpg", ".png"))
# Attack: shell.php.jpg with PHP content
# Wrong: storing in webroot
def bad_upload_2(file):
file.save(f"/var/www/html/uploads/{file.filename}")
# Attacker can access directly, execute scripts
# Wrong: trusting Content-Type header
def bad_validate_2(file):
return file.content_type.startswith("image/")
# Header is attacker-controlled!
# Wrong: no size limit
def bad_upload_3(file):
file.save(f"/uploads/{uuid.uuid4()}")
# DoS: upload 100GB file
Image-Specific Validation
from PIL import Image
import io
MAX_IMAGE_PIXELS = 4096 * 4096 # Prevent decompression bomb
def validate_image(content: bytes) -> bool:
"""Validate image content."""
try:
Image.MAX_IMAGE_PIXELS = MAX_IMAGE_PIXELS
img = Image.open(io.BytesIO(content))
# Actually load the image (validates structure)
img.verify()
# Reopen for further checks (verify() invalidates)
img = Image.open(io.BytesIO(content))
# Check format
if img.format not in ("JPEG", "PNG", "GIF"):
return False
# Strip EXIF (can contain sensitive data, XSS in some viewers)
# PIL's save() with specific format strips most metadata
return True
except Exception:
return False
def strip_image_metadata(content: bytes) -> bytes:
"""Remove EXIF and other metadata."""
img = Image.open(io.BytesIO(content))
# Create new image without metadata
output = io.BytesIO()
img.save(output, format=img.format)
return output.getvalue()
Antivirus Scanning
import clamd # ClamAV client
def scan_for_malware(filepath: str) -> bool:
"""Scan file with ClamAV."""
try:
cd = clamd.ClamdUnixSocket()
result = cd.scan(filepath)
if result is None:
return True # Clean
# result = {filepath: ('FOUND', 'Malware.Name')}
status, name = result.get(filepath, (None, None))
if status == "FOUND":
log.warning("Malware detected", filepath=filepath, malware=name)
os.remove(filepath)
return False
return True
except Exception as e:
log.error("Antivirus scan failed", error=str(e))
return False # Fail closed
Edge Cases
- Double extensions:
file.php.jpgmay execute as PHP on misconfigured servers - Null byte:
file.php%00.jpgtruncates tofile.phpin some languages - Case sensitivity:
.PhPmay execute on Windows - SVG can contain JavaScript — treat as dangerous
- ZIP files need recursive scanning for zip bombs
- Office files (DOCX) are ZIPs containing XML — check for XXE
- GIF89a header with PHP code can execute on some servers