/** * File Validation Service * Pre-scan validation layer that catches suspicious files ClamAV won't flag: * - Extension whitelist enforcement * - MIME type ↔ extension mismatch detection * - Double extension blocking (e.g., report.pdf.exe) * - Path traversal blocking (e.g., ../../etc/passwd) * - Magic bytes / file signature validation * - Filename sanitization */ // ── Types ── export interface FileValidationResult { valid: boolean; errors: string[]; warnings: string[]; sanitizedFilename: string; detectedMimeType: string | null; mimeMatchesExtension: boolean; } // ── Allowed Extensions and MIME Mappings ── const EXTENSION_MIME_MAP: Record = { // Documents pdf: ['application/pdf'], doc: ['application/msword'], docx: ['application/vnd.openxmlformats-officedocument.wordprocessingml.document'], xls: ['application/vnd.ms-excel'], xlsx: ['application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'], ppt: ['application/vnd.ms-powerpoint'], pptx: ['application/vnd.openxmlformats-officedocument.presentationml.presentation'], // Images jpg: ['image/jpeg'], jpeg: ['image/jpeg'], png: ['image/png'], gif: ['image/gif'], webp: ['image/webp'], svg: ['image/svg+xml'], // Text txt: ['text/plain'], csv: ['text/csv', 'text/plain', 'application/csv'], md: ['text/markdown', 'text/plain'], log: ['text/plain'], // Archives zip: ['application/zip', 'application/x-zip-compressed'], }; // Magic bytes signatures for common file types const MAGIC_BYTES: Array<{ ext: string; bytes: number[]; offset?: number }> = [ { ext: 'pdf', bytes: [0x25, 0x50, 0x44, 0x46] }, // %PDF { ext: 'png', bytes: [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A] }, // PNG { ext: 'jpg', bytes: [0xFF, 0xD8, 0xFF] }, // JPEG { ext: 'jpeg', bytes: [0xFF, 0xD8, 0xFF] }, // JPEG { ext: 'gif', bytes: [0x47, 0x49, 0x46, 0x38] }, // GIF8 { ext: 'zip', bytes: [0x50, 0x4B, 0x03, 0x04] }, // PK zip { ext: 'docx', bytes: [0x50, 0x4B, 0x03, 0x04] }, // PK (Office OOXML) { ext: 'xlsx', bytes: [0x50, 0x4B, 0x03, 0x04] }, { ext: 'pptx', bytes: [0x50, 0x4B, 0x03, 0x04] }, { ext: 'doc', bytes: [0xD0, 0xCF, 0x11, 0xE0] }, // OLE2 { ext: 'xls', bytes: [0xD0, 0xCF, 0x11, 0xE0] }, { ext: 'ppt', bytes: [0xD0, 0xCF, 0x11, 0xE0] }, { ext: 'webp', bytes: [0x52, 0x49, 0x46, 0x46] }, // RIFF (WebP) ]; // Dangerous executable signatures that should NEVER be uploaded const DANGEROUS_MAGIC_BYTES: Array<{ name: string; bytes: number[] }> = [ { name: 'Windows EXE/DLL (MZ)', bytes: [0x4D, 0x5A] }, // MZ header { name: 'ELF binary', bytes: [0x7F, 0x45, 0x4C, 0x46] }, // ELF { name: 'Java class', bytes: [0xCA, 0xFE, 0xBA, 0xBE] }, // Java bytecode { name: 'Mach-O binary', bytes: [0xCF, 0xFA, 0xED, 0xFE] }, // macOS binary { name: 'Windows shortcut', bytes: [0x4C, 0x00, 0x00, 0x00] }, // LNK ]; // Blocked filename patterns const BLOCKED_PATTERNS: Array<{ pattern: RegExp; reason: string }> = [ { pattern: /\.\./, reason: 'Path traversal attempt (../)' }, { pattern: /[\/\\]/, reason: 'Path separator in filename' }, { pattern: /\x00/, reason: 'Null byte in filename' }, // macOS resource fork files (._filename) — metadata junk, not real documents { pattern: /^\._/, reason: 'macOS resource fork file (._prefix) — not a valid document' }, // Hidden files (starting with .) { pattern: /^\.(?!_)/, reason: 'Hidden file (starts with dot)' }, { pattern: /\.(exe|bat|cmd|com|msi|scr|pif|vbs|vbe|js|jse|wsf|wsh|ps1|sh|bash|cgi|pl|py|rb|jar|dll|sys|drv|ocx|cpl|inf|reg|rgs|sct|url|lnk|hta|chm|hlp|iso|img|dmg|deb|rpm|appimage)$/i, reason: 'Executable or dangerous file extension blocked' }, // Double extensions (e.g., report.pdf.exe, image.jpg.vbs) { pattern: /\.(pdf|doc|docx|xls|xlsx|jpg|jpeg|png|gif|txt)\.(exe|bat|cmd|com|scr|pif|vbs|js|ps1|sh)$/i, reason: 'Double extension — possible disguised executable' }, // Periods before common executable extensions { pattern: /\.\w+\.(exe|bat|cmd|com|msi|scr|pif|vbs|vbe|js|jse|wsf|wsh|ps1|sh|bash)$/i, reason: 'Suspicious double extension' }, // XSS Patterns in filenames { pattern: / maxSizeBytes) { errors.push(`File size (${(fileSizeBytes / 1024 / 1024).toFixed(1)}MB) exceeds limit (${maxSizeMB}MB)`); } if (fileSizeBytes === 0) { errors.push('File is empty (0 bytes)'); } // 4. MIME type ↔ extension mismatch detection (warning only — browsers/multer can report wrong MIME) let mimeMatchesExtension = true; if (ext && EXTENSION_MIME_MAP[ext]) { const allowedMimes = EXTENSION_MIME_MAP[ext]; if (!allowedMimes.includes(mimeType) && mimeType !== 'application/octet-stream') { mimeMatchesExtension = false; warnings.push( `MIME type mismatch: file claims ".${ext}" but has MIME "${mimeType}". ` + `Expected: ${allowedMimes.join(' or ')}` ); } } // 5. Magic bytes / file signature validation let detectedMimeType: string | null = null; if (fileBuffer && fileBuffer.length >= 4) { // Check for dangerous executable signatures FIRST for (const { name, bytes } of DANGEROUS_MAGIC_BYTES) { if (matchesBytes(fileBuffer, bytes)) { errors.push(`File contains ${name} binary signature — executable files are blocked`); } } // Check if magic bytes match claimed extension if (ext) { const expectedSignatures = MAGIC_BYTES.filter(m => m.ext === ext); if (expectedSignatures.length > 0) { const matchesAny = expectedSignatures.some(sig => matchesBytes(fileBuffer, sig.bytes, sig.offset)); if (!matchesAny) { // Warning only — some legitimate files have variant headers // ClamAV will do the real malware check warnings.push( `File header does not match ".${ext}" signature — file may be corrupted or mislabeled` ); } } } // Detect actual type from magic bytes for (const { ext: detExt, bytes } of MAGIC_BYTES) { if (matchesBytes(fileBuffer, bytes)) { const mimes = EXTENSION_MIME_MAP[detExt]; detectedMimeType = mimes ? mimes[0] : null; break; } } } // 6. Sanitize filename const sanitizedFilename = sanitizeFilename(originalName); return { valid: errors.length === 0, errors, warnings, sanitizedFilename, detectedMimeType, mimeMatchesExtension, }; } // ── Helpers ── function matchesBytes(buffer: Buffer, bytes: number[], offset: number = 0): boolean { if (buffer.length < offset + bytes.length) return false; return bytes.every((byte, i) => buffer[offset + i] === byte); } /** * Sanitize a filename: remove dangerous chars, limit length, add UUID prefix */ export function sanitizeFilename(original: string): string { // Strip path components let name = original.replace(/^.*[\\\/]/, ''); // Remove null bytes name = name.replace(/\x00/g, ''); // Replace dangerous characters including XSS-prone characters name = name.replace(/[<>:"|?*\x00-\x1F\x7F]/g, '_'); // More aggressive XSS sanitization (replace suspicious keywords) name = name.replace(/(onerror|onload|onclick|onmouseover|onfocus|alert|eval|javascript|vbscript|script|expression|document)/gi, 'safe'); // Collapse multiple dots name = name.replace(/\.{2,}/g, '.'); // Trim leading/trailing dots and spaces name = name.replace(/^[\s.]+|[\s.]+$/g, ''); // Limit length (keep extension) if (name.length > 200) { const ext = name.split('.').pop() || ''; const base = name.substring(0, 200 - ext.length - 1); name = `${base}.${ext}`; } // Fallback for empty names if (!name || name.length === 0) { name = 'unnamed_file'; } return name; }