Files
security-patterns/file-upload.md
T
Rodin 1eac5d3bcc Add CSP, file upload, open redirect, clickjacking patterns
Complete security patterns collection (23 total):
- csp.md: nonces, hashes, strict-dynamic, reporting
- file-upload.md: content validation, path traversal, malware scanning
- open-redirect.md: URL validation, OAuth redirect URI, bypass techniques
- clickjacking.md: X-Frame-Options, frame-ancestors CSP

Comprehensive coverage for web application security review.
2026-05-10 23:24:52 -07:00

5.8 KiB

File Upload Security

Rule

Validate content, not just extension. Store outside webroot. Generate new filenames. Set size limits.

Source: OWASP File Upload Cheat Sheet

Attack Vectors

Attack Description
Web shell Upload .php/.jsp that executes commands
XSS via SVG SVG with embedded JavaScript
XXE via Office DOCX/XLSX contain XML
Path traversal Filename like ../../../etc/cron.d/shell
DoS Upload huge files, exhaust disk
Malware hosting Use your server to distribute malware

Correct Pattern

import os
import uuid
import magic  # python-magic for content detection
from pathlib import Path

UPLOAD_DIR = Path("/var/uploads")  # Outside webroot!
MAX_FILE_SIZE = 10 * 1024 * 1024  # 10 MB
ALLOWED_TYPES = {
    "image/jpeg": ".jpg",
    "image/png": ".png",
    "image/gif": ".gif",
    "application/pdf": ".pdf",
}

def save_upload(file_storage) -> str:
    """Safely handle file upload."""
    # Check size first (before reading into memory)
    file_storage.seek(0, 2)  # Seek to end
    size = file_storage.tell()
    file_storage.seek(0)  # Reset
    
    if size > MAX_FILE_SIZE:
        raise ValueError("File too large")
    
    # Read content for validation
    content = file_storage.read()
    file_storage.seek(0)
    
    # Detect MIME type from content, not extension
    detected_type = magic.from_buffer(content, mime=True)
    
    if detected_type not in ALLOWED_TYPES:
        raise ValueError(f"File type not allowed: {detected_type}")
    
    # Generate safe filename (never use user input)
    extension = ALLOWED_TYPES[detected_type]
    safe_filename = f"{uuid.uuid4()}{extension}"
    
    # Store outside webroot
    dest_path = UPLOAD_DIR / safe_filename
    
    # Ensure we're still in upload dir (paranoid check)
    if not dest_path.resolve().is_relative_to(UPLOAD_DIR.resolve()):
        raise ValueError("Invalid path")
    
    with open(dest_path, "wb") as f:
        f.write(content)
    
    return safe_filename

def serve_upload(filename: str):
    """Serve uploaded file safely."""
    # Validate filename format
    if not filename or ".." in filename or "/" in filename:
        raise ValueError("Invalid filename")
    
    path = UPLOAD_DIR / filename
    
    # Verify path is within upload dir
    if not path.resolve().is_relative_to(UPLOAD_DIR.resolve()):
        raise ValueError("Invalid path")
    
    if not path.exists():
        raise FileNotFoundError()
    
    # Serve with safe content-type
    return send_file(
        path,
        mimetype="application/octet-stream",  # Force download
        as_attachment=True,
        download_name=filename
    )

Incorrect Pattern

import os

# Wrong: using user-provided filename
def bad_upload(file):
    filename = file.filename  # User controlled!
    file.save(f"/uploads/{filename}")
    # Attack: filename = "../../../var/www/shell.php"

# Wrong: checking only extension
def bad_validate(filename):
    return filename.endswith((".jpg", ".png"))
    # Attack: shell.php.jpg with PHP content

# Wrong: storing in webroot
def bad_upload_2(file):
    file.save(f"/var/www/html/uploads/{file.filename}")
    # Attacker can access directly, execute scripts

# Wrong: trusting Content-Type header
def bad_validate_2(file):
    return file.content_type.startswith("image/")
    # Header is attacker-controlled!

# Wrong: no size limit
def bad_upload_3(file):
    file.save(f"/uploads/{uuid.uuid4()}")
    # DoS: upload 100GB file

Image-Specific Validation

from PIL import Image
import io

MAX_IMAGE_PIXELS = 4096 * 4096  # Prevent decompression bomb

def validate_image(content: bytes) -> bool:
    """Validate image content."""
    try:
        Image.MAX_IMAGE_PIXELS = MAX_IMAGE_PIXELS
        img = Image.open(io.BytesIO(content))
        
        # Actually load the image (validates structure)
        img.verify()
        
        # Reopen for further checks (verify() invalidates)
        img = Image.open(io.BytesIO(content))
        
        # Check format
        if img.format not in ("JPEG", "PNG", "GIF"):
            return False
        
        # Strip EXIF (can contain sensitive data, XSS in some viewers)
        # PIL's save() with specific format strips most metadata
        
        return True
    except Exception:
        return False

def strip_image_metadata(content: bytes) -> bytes:
    """Remove EXIF and other metadata."""
    img = Image.open(io.BytesIO(content))
    
    # Create new image without metadata
    output = io.BytesIO()
    img.save(output, format=img.format)
    return output.getvalue()

Antivirus Scanning

import clamd  # ClamAV client

def scan_for_malware(filepath: str) -> bool:
    """Scan file with ClamAV."""
    try:
        cd = clamd.ClamdUnixSocket()
        result = cd.scan(filepath)
        
        if result is None:
            return True  # Clean
        
        # result = {filepath: ('FOUND', 'Malware.Name')}
        status, name = result.get(filepath, (None, None))
        if status == "FOUND":
            log.warning("Malware detected", filepath=filepath, malware=name)
            os.remove(filepath)
            return False
        
        return True
    except Exception as e:
        log.error("Antivirus scan failed", error=str(e))
        return False  # Fail closed

Edge Cases

  • Double extensions: file.php.jpg may execute as PHP on misconfigured servers
  • Null byte: file.php%00.jpg truncates to file.php in some languages
  • Case sensitivity: .PhP may execute on Windows
  • SVG can contain JavaScript — treat as dangerous
  • ZIP files need recursive scanning for zip bombs
  • Office files (DOCX) are ZIPs containing XML — check for XXE
  • GIF89a header with PHP code can execute on some servers