Re_Backend/src/services/fileUpload/fileValidationService.ts

246 lines
9.6 KiB
TypeScript

/**
* File Validation Service
* Pre-scan validation layer that catches suspicious files ClamAV won't flag:
* - Extension whitelist enforcement
* - MIME type ↔ extension mismatch detection
* - Double extension blocking (e.g., report.pdf.exe)
* - Path traversal blocking (e.g., ../../etc/passwd)
* - Magic bytes / file signature validation
* - Filename sanitization
*/
// ── Types ──
export interface FileValidationResult {
valid: boolean;
errors: string[];
warnings: string[];
sanitizedFilename: string;
detectedMimeType: string | null;
mimeMatchesExtension: boolean;
}
// ── Allowed Extensions and MIME Mappings ──
const EXTENSION_MIME_MAP: Record<string, string[]> = {
// Documents
pdf: ['application/pdf'],
doc: ['application/msword'],
docx: ['application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
xls: ['application/vnd.ms-excel'],
xlsx: ['application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'],
ppt: ['application/vnd.ms-powerpoint'],
pptx: ['application/vnd.openxmlformats-officedocument.presentationml.presentation'],
// Images
jpg: ['image/jpeg'],
jpeg: ['image/jpeg'],
png: ['image/png'],
gif: ['image/gif'],
webp: ['image/webp'],
svg: ['image/svg+xml'],
// Text
txt: ['text/plain'],
csv: ['text/csv', 'text/plain', 'application/csv'],
md: ['text/markdown', 'text/plain'],
log: ['text/plain'],
// Archives
zip: ['application/zip', 'application/x-zip-compressed'],
};
// Magic bytes signatures for common file types
const MAGIC_BYTES: Array<{ ext: string; bytes: number[]; offset?: number }> = [
{ ext: 'pdf', bytes: [0x25, 0x50, 0x44, 0x46] }, // %PDF
{ ext: 'png', bytes: [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A] }, // PNG
{ ext: 'jpg', bytes: [0xFF, 0xD8, 0xFF] }, // JPEG
{ ext: 'jpeg', bytes: [0xFF, 0xD8, 0xFF] }, // JPEG
{ ext: 'gif', bytes: [0x47, 0x49, 0x46, 0x38] }, // GIF8
{ ext: 'zip', bytes: [0x50, 0x4B, 0x03, 0x04] }, // PK zip
{ ext: 'docx', bytes: [0x50, 0x4B, 0x03, 0x04] }, // PK (Office OOXML)
{ ext: 'xlsx', bytes: [0x50, 0x4B, 0x03, 0x04] },
{ ext: 'pptx', bytes: [0x50, 0x4B, 0x03, 0x04] },
{ ext: 'doc', bytes: [0xD0, 0xCF, 0x11, 0xE0] }, // OLE2
{ ext: 'xls', bytes: [0xD0, 0xCF, 0x11, 0xE0] },
{ ext: 'ppt', bytes: [0xD0, 0xCF, 0x11, 0xE0] },
{ ext: 'webp', bytes: [0x52, 0x49, 0x46, 0x46] }, // RIFF (WebP)
];
// Dangerous executable signatures that should NEVER be uploaded
const DANGEROUS_MAGIC_BYTES: Array<{ name: string; bytes: number[] }> = [
{ name: 'Windows EXE/DLL (MZ)', bytes: [0x4D, 0x5A] }, // MZ header
{ name: 'ELF binary', bytes: [0x7F, 0x45, 0x4C, 0x46] }, // ELF
{ name: 'Java class', bytes: [0xCA, 0xFE, 0xBA, 0xBE] }, // Java bytecode
{ name: 'Mach-O binary', bytes: [0xCF, 0xFA, 0xED, 0xFE] }, // macOS binary
{ name: 'Windows shortcut', bytes: [0x4C, 0x00, 0x00, 0x00] }, // LNK
];
// Blocked filename patterns
const BLOCKED_PATTERNS: Array<{ pattern: RegExp; reason: string }> = [
{ pattern: /\.\./, reason: 'Path traversal attempt (../)' },
{ pattern: /[\/\\]/, reason: 'Path separator in filename' },
{ pattern: /\x00/, reason: 'Null byte in filename' },
// macOS resource fork files (._filename) — metadata junk, not real documents
{ pattern: /^\._/, reason: 'macOS resource fork file (._prefix) — not a valid document' },
// Hidden files (starting with .)
{ pattern: /^\.(?!_)/, reason: 'Hidden file (starts with dot)' },
{
pattern: /\.(exe|bat|cmd|com|msi|scr|pif|vbs|vbe|js|jse|wsf|wsh|ps1|sh|bash|cgi|pl|py|rb|jar|dll|sys|drv|ocx|cpl|inf|reg|rgs|sct|url|lnk|hta|chm|hlp|iso|img|dmg|deb|rpm|appimage)$/i,
reason: 'Executable or dangerous file extension blocked'
},
// Double extensions (e.g., report.pdf.exe, image.jpg.vbs)
{
pattern: /\.(pdf|doc|docx|xls|xlsx|jpg|jpeg|png|gif|txt)\.(exe|bat|cmd|com|scr|pif|vbs|js|ps1|sh)$/i,
reason: 'Double extension — possible disguised executable'
},
// Periods before common executable extensions
{
pattern: /\.\w+\.(exe|bat|cmd|com|msi|scr|pif|vbs|vbe|js|jse|wsf|wsh|ps1|sh|bash)$/i,
reason: 'Suspicious double extension'
},
// XSS Patterns in filenames
{
pattern: /<script|javascript:|onerror=|onload=|onclick=|alert\(|eval\(|document\./i,
reason: 'Potential XSS payload in filename'
},
];
// ── Core Validation Function ──
/**
* Validate an uploaded file for security concerns.
* This runs BEFORE ClamAV and catches things ClamAV won't flag.
*/
export function validateFile(
originalName: string,
mimeType: string,
fileBuffer: Buffer | null,
fileSizeBytes: number,
maxSizeMB: number = 50,
): FileValidationResult {
const errors: string[] = [];
const warnings: string[] = [];
// 1. Extract and validate extension
const ext = originalName.split('.').pop()?.toLowerCase() || '';
const allowedExtensions = Object.keys(EXTENSION_MIME_MAP);
if (!ext) {
errors.push('File has no extension');
} else if (!allowedExtensions.includes(ext)) {
errors.push(`File extension ".${ext}" is not allowed. Allowed: ${allowedExtensions.join(', ')}`);
}
// 2. Check blocked filename patterns (path traversal, executables, double extensions, macOS resource forks)
for (const { pattern, reason } of BLOCKED_PATTERNS) {
if (pattern.test(originalName)) {
errors.push(`Blocked filename: ${reason}`);
}
}
// 3. File size validation
const maxSizeBytes = maxSizeMB * 1024 * 1024;
if (fileSizeBytes > maxSizeBytes) {
errors.push(`File size (${(fileSizeBytes / 1024 / 1024).toFixed(1)}MB) exceeds limit (${maxSizeMB}MB)`);
}
if (fileSizeBytes === 0) {
errors.push('File is empty (0 bytes)');
}
// 4. MIME type ↔ extension mismatch detection (warning only — browsers/multer can report wrong MIME)
let mimeMatchesExtension = true;
if (ext && EXTENSION_MIME_MAP[ext]) {
const allowedMimes = EXTENSION_MIME_MAP[ext];
if (!allowedMimes.includes(mimeType) && mimeType !== 'application/octet-stream') {
mimeMatchesExtension = false;
warnings.push(
`MIME type mismatch: file claims ".${ext}" but has MIME "${mimeType}". ` +
`Expected: ${allowedMimes.join(' or ')}`
);
}
}
// 5. Magic bytes / file signature validation
let detectedMimeType: string | null = null;
if (fileBuffer && fileBuffer.length >= 4) {
// Check for dangerous executable signatures FIRST
for (const { name, bytes } of DANGEROUS_MAGIC_BYTES) {
if (matchesBytes(fileBuffer, bytes)) {
errors.push(`File contains ${name} binary signature — executable files are blocked`);
}
}
// Check if magic bytes match claimed extension
if (ext) {
const expectedSignatures = MAGIC_BYTES.filter(m => m.ext === ext);
if (expectedSignatures.length > 0) {
const matchesAny = expectedSignatures.some(sig => matchesBytes(fileBuffer, sig.bytes, sig.offset));
if (!matchesAny) {
// Warning only — some legitimate files have variant headers
// ClamAV will do the real malware check
warnings.push(
`File header does not match ".${ext}" signature — file may be corrupted or mislabeled`
);
}
}
}
// Detect actual type from magic bytes
for (const { ext: detExt, bytes } of MAGIC_BYTES) {
if (matchesBytes(fileBuffer, bytes)) {
const mimes = EXTENSION_MIME_MAP[detExt];
detectedMimeType = mimes ? mimes[0] : null;
break;
}
}
}
// 6. Sanitize filename
const sanitizedFilename = sanitizeFilename(originalName);
return {
valid: errors.length === 0,
errors,
warnings,
sanitizedFilename,
detectedMimeType,
mimeMatchesExtension,
};
}
// ── Helpers ──
function matchesBytes(buffer: Buffer, bytes: number[], offset: number = 0): boolean {
if (buffer.length < offset + bytes.length) return false;
return bytes.every((byte, i) => buffer[offset + i] === byte);
}
/**
* Sanitize a filename: remove dangerous chars, limit length, add UUID prefix
*/
export function sanitizeFilename(original: string): string {
// Strip path components
let name = original.replace(/^.*[\\\/]/, '');
// Remove null bytes
name = name.replace(/\x00/g, '');
// Replace dangerous characters including XSS-prone characters
name = name.replace(/[<>:"|?*\x00-\x1F\x7F]/g, '_');
// More aggressive XSS sanitization (replace suspicious keywords)
name = name.replace(/(onerror|onload|onclick|onmouseover|onfocus|alert|eval|javascript|vbscript|script|expression|document)/gi, 'safe');
// Collapse multiple dots
name = name.replace(/\.{2,}/g, '.');
// Trim leading/trailing dots and spaces
name = name.replace(/^[\s.]+|[\s.]+$/g, '');
// Limit length (keep extension)
if (name.length > 200) {
const ext = name.split('.').pop() || '';
const base = name.substring(0, 200 - ext.length - 1);
name = `${base}.${ext}`;
}
// Fallback for empty names
if (!name || name.length === 0) {
name = 'unnamed_file';
}
return name;
}