246 lines
9.6 KiB
TypeScript
246 lines
9.6 KiB
TypeScript
/**
|
|
* File Validation Service
|
|
* Pre-scan validation layer that catches suspicious files ClamAV won't flag:
|
|
* - Extension whitelist enforcement
|
|
* - MIME type ↔ extension mismatch detection
|
|
* - Double extension blocking (e.g., report.pdf.exe)
|
|
* - Path traversal blocking (e.g., ../../etc/passwd)
|
|
* - Magic bytes / file signature validation
|
|
* - Filename sanitization
|
|
*/
|
|
|
|
// ── Types ──
|
|
|
|
export interface FileValidationResult {
|
|
valid: boolean;
|
|
errors: string[];
|
|
warnings: string[];
|
|
sanitizedFilename: string;
|
|
detectedMimeType: string | null;
|
|
mimeMatchesExtension: boolean;
|
|
}
|
|
|
|
// ── Allowed Extensions and MIME Mappings ──
|
|
|
|
const EXTENSION_MIME_MAP: Record<string, string[]> = {
|
|
// Documents
|
|
pdf: ['application/pdf'],
|
|
doc: ['application/msword'],
|
|
docx: ['application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
|
|
xls: ['application/vnd.ms-excel'],
|
|
xlsx: ['application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'],
|
|
ppt: ['application/vnd.ms-powerpoint'],
|
|
pptx: ['application/vnd.openxmlformats-officedocument.presentationml.presentation'],
|
|
// Images
|
|
jpg: ['image/jpeg'],
|
|
jpeg: ['image/jpeg'],
|
|
png: ['image/png'],
|
|
gif: ['image/gif'],
|
|
webp: ['image/webp'],
|
|
svg: ['image/svg+xml'],
|
|
// Text
|
|
txt: ['text/plain'],
|
|
csv: ['text/csv', 'text/plain', 'application/csv'],
|
|
md: ['text/markdown', 'text/plain'],
|
|
log: ['text/plain'],
|
|
// Archives
|
|
zip: ['application/zip', 'application/x-zip-compressed'],
|
|
};
|
|
|
|
// Magic bytes signatures for common file types
|
|
const MAGIC_BYTES: Array<{ ext: string; bytes: number[]; offset?: number }> = [
|
|
{ ext: 'pdf', bytes: [0x25, 0x50, 0x44, 0x46] }, // %PDF
|
|
{ ext: 'png', bytes: [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A] }, // PNG
|
|
{ ext: 'jpg', bytes: [0xFF, 0xD8, 0xFF] }, // JPEG
|
|
{ ext: 'jpeg', bytes: [0xFF, 0xD8, 0xFF] }, // JPEG
|
|
{ ext: 'gif', bytes: [0x47, 0x49, 0x46, 0x38] }, // GIF8
|
|
{ ext: 'zip', bytes: [0x50, 0x4B, 0x03, 0x04] }, // PK zip
|
|
{ ext: 'docx', bytes: [0x50, 0x4B, 0x03, 0x04] }, // PK (Office OOXML)
|
|
{ ext: 'xlsx', bytes: [0x50, 0x4B, 0x03, 0x04] },
|
|
{ ext: 'pptx', bytes: [0x50, 0x4B, 0x03, 0x04] },
|
|
{ ext: 'doc', bytes: [0xD0, 0xCF, 0x11, 0xE0] }, // OLE2
|
|
{ ext: 'xls', bytes: [0xD0, 0xCF, 0x11, 0xE0] },
|
|
{ ext: 'ppt', bytes: [0xD0, 0xCF, 0x11, 0xE0] },
|
|
{ ext: 'webp', bytes: [0x52, 0x49, 0x46, 0x46] }, // RIFF (WebP)
|
|
];
|
|
|
|
// Dangerous executable signatures that should NEVER be uploaded
|
|
const DANGEROUS_MAGIC_BYTES: Array<{ name: string; bytes: number[] }> = [
|
|
{ name: 'Windows EXE/DLL (MZ)', bytes: [0x4D, 0x5A] }, // MZ header
|
|
{ name: 'ELF binary', bytes: [0x7F, 0x45, 0x4C, 0x46] }, // ELF
|
|
{ name: 'Java class', bytes: [0xCA, 0xFE, 0xBA, 0xBE] }, // Java bytecode
|
|
{ name: 'Mach-O binary', bytes: [0xCF, 0xFA, 0xED, 0xFE] }, // macOS binary
|
|
{ name: 'Windows shortcut', bytes: [0x4C, 0x00, 0x00, 0x00] }, // LNK
|
|
];
|
|
|
|
// Blocked filename patterns
|
|
const BLOCKED_PATTERNS: Array<{ pattern: RegExp; reason: string }> = [
|
|
{ pattern: /\.\./, reason: 'Path traversal attempt (../)' },
|
|
{ pattern: /[\/\\]/, reason: 'Path separator in filename' },
|
|
{ pattern: /\x00/, reason: 'Null byte in filename' },
|
|
// macOS resource fork files (._filename) — metadata junk, not real documents
|
|
{ pattern: /^\._/, reason: 'macOS resource fork file (._prefix) — not a valid document' },
|
|
// Hidden files (starting with .)
|
|
{ pattern: /^\.(?!_)/, reason: 'Hidden file (starts with dot)' },
|
|
{
|
|
pattern: /\.(exe|bat|cmd|com|msi|scr|pif|vbs|vbe|js|jse|wsf|wsh|ps1|sh|bash|cgi|pl|py|rb|jar|dll|sys|drv|ocx|cpl|inf|reg|rgs|sct|url|lnk|hta|chm|hlp|iso|img|dmg|deb|rpm|appimage)$/i,
|
|
reason: 'Executable or dangerous file extension blocked'
|
|
},
|
|
// Double extensions (e.g., report.pdf.exe, image.jpg.vbs)
|
|
{
|
|
pattern: /\.(pdf|doc|docx|xls|xlsx|jpg|jpeg|png|gif|txt)\.(exe|bat|cmd|com|scr|pif|vbs|js|ps1|sh)$/i,
|
|
reason: 'Double extension — possible disguised executable'
|
|
},
|
|
// Periods before common executable extensions
|
|
{
|
|
pattern: /\.\w+\.(exe|bat|cmd|com|msi|scr|pif|vbs|vbe|js|jse|wsf|wsh|ps1|sh|bash)$/i,
|
|
reason: 'Suspicious double extension'
|
|
},
|
|
// XSS Patterns in filenames
|
|
{
|
|
pattern: /<script|javascript:|onerror=|onload=|onclick=|alert\(|eval\(|document\./i,
|
|
reason: 'Potential XSS payload in filename'
|
|
},
|
|
];
|
|
|
|
|
|
// ── Core Validation Function ──
|
|
|
|
/**
|
|
* Validate an uploaded file for security concerns.
|
|
* This runs BEFORE ClamAV and catches things ClamAV won't flag.
|
|
*/
|
|
export function validateFile(
|
|
originalName: string,
|
|
mimeType: string,
|
|
fileBuffer: Buffer | null,
|
|
fileSizeBytes: number,
|
|
maxSizeMB: number = 50,
|
|
): FileValidationResult {
|
|
const errors: string[] = [];
|
|
const warnings: string[] = [];
|
|
|
|
// 1. Extract and validate extension
|
|
const ext = originalName.split('.').pop()?.toLowerCase() || '';
|
|
const allowedExtensions = Object.keys(EXTENSION_MIME_MAP);
|
|
|
|
if (!ext) {
|
|
errors.push('File has no extension');
|
|
} else if (!allowedExtensions.includes(ext)) {
|
|
errors.push(`File extension ".${ext}" is not allowed. Allowed: ${allowedExtensions.join(', ')}`);
|
|
}
|
|
|
|
// 2. Check blocked filename patterns (path traversal, executables, double extensions, macOS resource forks)
|
|
for (const { pattern, reason } of BLOCKED_PATTERNS) {
|
|
if (pattern.test(originalName)) {
|
|
errors.push(`Blocked filename: ${reason}`);
|
|
}
|
|
}
|
|
|
|
|
|
// 3. File size validation
|
|
const maxSizeBytes = maxSizeMB * 1024 * 1024;
|
|
if (fileSizeBytes > maxSizeBytes) {
|
|
errors.push(`File size (${(fileSizeBytes / 1024 / 1024).toFixed(1)}MB) exceeds limit (${maxSizeMB}MB)`);
|
|
}
|
|
if (fileSizeBytes === 0) {
|
|
errors.push('File is empty (0 bytes)');
|
|
}
|
|
|
|
// 4. MIME type ↔ extension mismatch detection (warning only — browsers/multer can report wrong MIME)
|
|
let mimeMatchesExtension = true;
|
|
if (ext && EXTENSION_MIME_MAP[ext]) {
|
|
const allowedMimes = EXTENSION_MIME_MAP[ext];
|
|
if (!allowedMimes.includes(mimeType) && mimeType !== 'application/octet-stream') {
|
|
mimeMatchesExtension = false;
|
|
warnings.push(
|
|
`MIME type mismatch: file claims ".${ext}" but has MIME "${mimeType}". ` +
|
|
`Expected: ${allowedMimes.join(' or ')}`
|
|
);
|
|
}
|
|
}
|
|
|
|
// 5. Magic bytes / file signature validation
|
|
let detectedMimeType: string | null = null;
|
|
if (fileBuffer && fileBuffer.length >= 4) {
|
|
// Check for dangerous executable signatures FIRST
|
|
for (const { name, bytes } of DANGEROUS_MAGIC_BYTES) {
|
|
if (matchesBytes(fileBuffer, bytes)) {
|
|
errors.push(`File contains ${name} binary signature — executable files are blocked`);
|
|
}
|
|
}
|
|
|
|
// Check if magic bytes match claimed extension
|
|
if (ext) {
|
|
const expectedSignatures = MAGIC_BYTES.filter(m => m.ext === ext);
|
|
if (expectedSignatures.length > 0) {
|
|
const matchesAny = expectedSignatures.some(sig => matchesBytes(fileBuffer, sig.bytes, sig.offset));
|
|
if (!matchesAny) {
|
|
// Warning only — some legitimate files have variant headers
|
|
// ClamAV will do the real malware check
|
|
warnings.push(
|
|
`File header does not match ".${ext}" signature — file may be corrupted or mislabeled`
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Detect actual type from magic bytes
|
|
for (const { ext: detExt, bytes } of MAGIC_BYTES) {
|
|
if (matchesBytes(fileBuffer, bytes)) {
|
|
const mimes = EXTENSION_MIME_MAP[detExt];
|
|
detectedMimeType = mimes ? mimes[0] : null;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// 6. Sanitize filename
|
|
const sanitizedFilename = sanitizeFilename(originalName);
|
|
|
|
return {
|
|
valid: errors.length === 0,
|
|
errors,
|
|
warnings,
|
|
sanitizedFilename,
|
|
detectedMimeType,
|
|
mimeMatchesExtension,
|
|
};
|
|
}
|
|
|
|
// ── Helpers ──
|
|
|
|
function matchesBytes(buffer: Buffer, bytes: number[], offset: number = 0): boolean {
|
|
if (buffer.length < offset + bytes.length) return false;
|
|
return bytes.every((byte, i) => buffer[offset + i] === byte);
|
|
}
|
|
|
|
/**
|
|
* Sanitize a filename: remove dangerous chars, limit length, add UUID prefix
|
|
*/
|
|
export function sanitizeFilename(original: string): string {
|
|
// Strip path components
|
|
let name = original.replace(/^.*[\\\/]/, '');
|
|
// Remove null bytes
|
|
name = name.replace(/\x00/g, '');
|
|
// Replace dangerous characters including XSS-prone characters
|
|
name = name.replace(/[<>:"|?*\x00-\x1F\x7F]/g, '_');
|
|
// More aggressive XSS sanitization (replace suspicious keywords)
|
|
name = name.replace(/(onerror|onload|onclick|onmouseover|onfocus|alert|eval|javascript|vbscript|script|expression|document)/gi, 'safe');
|
|
// Collapse multiple dots
|
|
name = name.replace(/\.{2,}/g, '.');
|
|
// Trim leading/trailing dots and spaces
|
|
name = name.replace(/^[\s.]+|[\s.]+$/g, '');
|
|
// Limit length (keep extension)
|
|
if (name.length > 200) {
|
|
const ext = name.split('.').pop() || '';
|
|
const base = name.substring(0, 200 - ext.length - 1);
|
|
name = `${base}.${ext}`;
|
|
}
|
|
// Fallback for empty names
|
|
if (!name || name.length === 0) {
|
|
name = 'unnamed_file';
|
|
}
|
|
return name;
|
|
}
|