diff --git a/src/jobs/form16SapResponseJob.ts b/src/jobs/form16SapResponseJob.ts index a7172c8..fbe4d3b 100644 --- a/src/jobs/form16SapResponseJob.ts +++ b/src/jobs/form16SapResponseJob.ts @@ -96,6 +96,19 @@ async function processOutgoingFile(fileName: string, resolvedOutgoingDir: string updatedAt: new Date(), }); + // Delete source CSV only after successful DB persistence + read-marking. + // SAP team keeps a parallel archive copy, so main OUTGOING can be safely cleaned. + const sourcePath = path.join(resolvedOutgoingDir, fileName); + try { + if (fs.existsSync(sourcePath)) { + fs.unlinkSync(sourcePath); + logger.info(`[Form16 SAP Job] Deleted processed OUTGOING file: ${sourcePath}`); + } + } catch (e) { + // Keep processing successful even if cleanup fails; next pull will skip due to read marker. + logger.warn(`[Form16 SAP Job] Could not delete processed file: ${sourcePath}`, e); + } + return counts; } diff --git a/src/services/form16.service.ts b/src/services/form16.service.ts index d2e1332..4508e23 100644 --- a/src/services/form16.service.ts +++ b/src/services/form16.service.ts @@ -7,7 +7,7 @@ */ import crypto from 'crypto'; -import { Op, fn, col, QueryTypes } from 'sequelize'; +import { Op, fn, col, QueryTypes, where as sqlWhere } from 'sequelize'; import { sequelize } from '../config/database'; import { Form16CreditNote, @@ -122,7 +122,7 @@ export async function getLatest26asAggregatedForQuarter( const [row] = await sequelize.query<{ sum: string }>( `WITH latest_upload AS ( SELECT MAX(upload_log_id) AS mid FROM tds_26as_entries - WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan + WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan AND financial_year = :fy AND quarter = :qtr AND UPPER(TRIM(COALESCE(section_code, ''))) = :section AND UPPER(TRIM(COALESCE(status_oltas, ''))) IN ('F', 'O') @@ -130,7 +130,7 @@ export async function getLatest26asAggregatedForQuarter( ) SELECT COALESCE(SUM(e.tax_deducted), 0)::text AS sum FROM tds_26as_entries e - WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan + WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan AND e.financial_year = :fy AND e.quarter = :qtr AND UPPER(TRIM(COALESCE(e.section_code, ''))) = :section AND UPPER(TRIM(COALESCE(e.status_oltas, ''))) IN ('F', 'O') @@ -165,7 +165,7 @@ async function getLatest26asRowsForQuarter( }>( `WITH latest_upload AS ( SELECT MAX(upload_log_id) AS mid FROM tds_26as_entries - WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan + WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan AND financial_year = :fy AND quarter = :qtr AND UPPER(TRIM(COALESCE(section_code, ''))) = :section AND UPPER(TRIM(COALESCE(status_oltas, ''))) IN ('F', 'O') @@ -179,7 +179,7 @@ async function getLatest26asRowsForQuarter( e.transaction_date, e.date_of_booking FROM tds_26as_entries e - WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan + WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan AND e.financial_year = :fy AND e.quarter = :qtr AND UPPER(TRIM(COALESCE(e.section_code, ''))) = :section @@ -221,7 +221,7 @@ async function get26asCoverageDebug(tanNumber: string, financialYear: string, qu END )::text AS matching_194q_f_o_rows FROM tds_26as_entries e - WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan + WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan AND e.financial_year = :fy AND e.quarter = :q`, { replacements: { tan: normalizedTan, fy, q, section: SECTION_26AS_194Q }, type: QueryTypes.SELECT } @@ -234,7 +234,7 @@ async function get26asCoverageDebug(tanNumber: string, financialYear: string, qu status_oltas, COUNT(*)::text AS cnt FROM tds_26as_entries e - WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan + WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan AND e.financial_year = :fy AND e.quarter = :q GROUP BY section_code, status_oltas @@ -273,6 +273,30 @@ function normalizeDateOnly(value: unknown): string | null { return `${yyyy}-${mm}-${dd}`; } + // Handle OCR values like "13-Jan-2025" without timezone conversion. + const m2 = raw.match(/^(\d{1,2})[-\/]([A-Za-z]{3,9})[-\/](\d{4})$/); + if (m2) { + const dd = m2[1].padStart(2, '0'); + const mon = m2[2].toLowerCase(); + const yyyy = m2[3]; + const monthMap: Record = { + jan: '01', january: '01', + feb: '02', february: '02', + mar: '03', march: '03', + apr: '04', april: '04', + may: '05', + jun: '06', june: '06', + jul: '07', july: '07', + aug: '08', august: '08', + sep: '09', sept: '09', september: '09', + oct: '10', october: '10', + nov: '11', november: '11', + dec: '12', december: '12', + }; + const mm = monthMap[mon]; + if (mm) return `${yyyy}-${mm}-${dd}`; + } + const d = new Date(raw); if (!Number.isNaN(d.getTime())) return d.toISOString().slice(0, 10); return null; @@ -782,7 +806,20 @@ async function run26asMatchAndCreditNote(submission: Form16aSubmission): Promise const submittedTaxDeducted = toNumberOrNull(extracted.totalTaxDeducted ?? sub.tdsAmount); const submittedTdsDeposited = toNumberOrNull(extracted.totalTdsDeposited ?? sub.tdsAmount); const submittedTransactionDate = normalizeDateOnly(extracted.transactionDate); - const submittedBookingDate = normalizeDateOnly(extracted.dateOfBooking); + const submittedLastUpdatedOn = normalizeDateOnly(extracted.certificateDate ?? extracted.lastUpdatedOn ?? extracted.lastUpdatedDate); + + // Mandatory for matching: Form 16A "Last updated on" must be extracted and matched to 26AS booking date. + if (!submittedLastUpdatedOn) { + const msg = 'OCR could not extract "Last updated on" date from Form 16A. Please resubmit a clear document.'; + await submission.update({ + validationStatus: 'resubmission_needed', + validationNotes: msg, + }); + return { + validationStatus: 'resubmission_needed', + validationNotes: msg, + }; + } // Latest 26AS upload rows for the same TAN + FY + Quarter. let latestRows = await getLatest26asRowsForQuarter(tanNumber, financialYear, quarter); @@ -790,7 +827,7 @@ async function run26asMatchAndCreditNote(submission: Form16aSubmission): Promise // If OCR extracted FY/Quarter incorrectly, derive FY/Quarter from OCR dates and retry. if (latestRows.length === 0) { const derivedFromTx = deriveFyAndQuarterFromDateOnly(submittedTransactionDate); - const derivedFromBooking = deriveFyAndQuarterFromDateOnly(submittedBookingDate); + const derivedFromBooking = deriveFyAndQuarterFromDateOnly(submittedLastUpdatedOn); const derived = derivedFromTx || derivedFromBooking; if (derived && (derived.financialYear !== financialYear || derived.quarter !== quarter)) { const altRows = await getLatest26asRowsForQuarter(tanNumber, derived.financialYear, derived.quarter); @@ -914,16 +951,15 @@ async function run26asMatchAndCreditNote(submission: Form16aSubmission): Promise return { validationStatus: 'failed', validationNotes: 'Transaction date mismatch with latest 26AS.' }; } } - if (submittedBookingDate) { - const hasBookingDate = latestRows.some((r) => normalizeDateOnly(r.dateOfBooking) === submittedBookingDate); - if (!hasBookingDate) { - await submission.update({ - validationStatus: 'failed', - validationNotes: - `Booking date mismatch with latest 26AS for TAN no - ${tanNumber}. No latest 26AS record found with booking date ${submittedBookingDate}.`, - }); - return { validationStatus: 'failed', validationNotes: 'Booking date mismatch with latest 26AS.' }; - } + // Match Form 16A "Last updated on" against 26AS "Date of Booking" + const hasBookingDate = latestRows.some((r) => normalizeDateOnly(r.dateOfBooking) === submittedLastUpdatedOn); + if (!hasBookingDate) { + await submission.update({ + validationStatus: 'failed', + validationNotes: + `Last updated on date mismatch with latest 26AS booking date for TAN no - ${tanNumber}. Form 16A last updated on: ${submittedLastUpdatedOn}.`, + }); + return { validationStatus: 'failed', validationNotes: 'Last updated on date mismatch with latest 26AS booking date.' }; } if (Math.abs(tdsAmount - aggregated26as) > AMOUNT_MATCH_TOLERANCE) { @@ -2241,13 +2277,44 @@ export interface List26asSummary { function build26asWhere(filters?: List26asFilters): Record { const where: Record = {}; - if (filters?.financialYear) where.financialYear = filters.financialYear; - if (filters?.quarter) where.quarter = filters.quarter; - if (filters?.tanNumber) where.tanNumber = { [Op.iLike]: `%${filters.tanNumber}%` }; - if (filters?.search?.trim()) where.deductorName = { [Op.iLike]: `%${filters.search.trim()}%` }; + const andClauses: unknown[] = []; + + if (filters?.financialYear) where.financialYear = normalizeFinancialYear(filters.financialYear) || filters.financialYear; + if (filters?.quarter) where.quarter = normalizeQuarter(filters.quarter) || filters.quarter; if (filters?.status) where.statusOltas = filters.status; if (filters?.assessmentYear) where.assessmentYear = filters.assessmentYear; if (filters?.sectionCode) where.sectionCode = filters.sectionCode; + + if (filters?.tanNumber?.trim()) { + const normalizedTan = normalizeTanNumber(filters.tanNumber); + if (normalizedTan) { + andClauses.push( + sqlWhere( + fn('upper', fn('regexp_replace', fn('coalesce', col('tan_number'), ''), '[^a-zA-Z0-9]', '', 'g')), + { [Op.like]: `%${normalizedTan}%` } + ) + ); + } + } + + if (filters?.search?.trim()) { + const s = filters.search.trim(); + const normalizedSearchTan = normalizeTanNumber(s); + const searchOr: unknown[] = [{ deductorName: { [Op.iLike]: `%${s}%` } }]; + if (normalizedSearchTan) { + searchOr.push( + sqlWhere( + fn('upper', fn('regexp_replace', fn('coalesce', col('tan_number'), ''), '[^a-zA-Z0-9]', '', 'g')), + { [Op.like]: `%${normalizedSearchTan}%` } + ) + ); + } + andClauses.push({ [Op.or]: searchOr }); + } + + if (andClauses.length > 0) { + (where as any)[Op.and] = andClauses; + } return where; } @@ -2257,7 +2324,8 @@ export async function list26asEntries(filters?: List26asFilters): Promise<{ summary: List26asSummary; }> { const where = build26asWhere(filters); - const hasWhere = Object.keys(where).length > 0; + // Use Reflect.ownKeys so symbol keys like Op.and are counted. + const hasWhere = Reflect.ownKeys(where).length > 0; const limit = Math.min(MAX_PAGE_SIZE, Math.max(1, filters?.limit ?? DEFAULT_PAGE_SIZE)); const offset = Math.max(0, filters?.offset ?? 0); diff --git a/src/services/form16Ocr.service.ts b/src/services/form16Ocr.service.ts index 7b98b82..c407e57 100644 --- a/src/services/form16Ocr.service.ts +++ b/src/services/form16Ocr.service.ts @@ -79,7 +79,7 @@ STEP 2 - Extract these fields. For amounts, look in TABLES: find rows or columns 8. statusOfMatchingOltas - "Status of matching with OLTAS" or "OLTAS". Single letter (F, O, M) or word like "Matched". Extract as shown. -9. dateOfBooking - "Date of booking" or "Date of deposit". DD-MM-YYYY or DD/MM/YYYY. +9. dateOfBooking - For this workflow, use Form 16A "Last updated on" (or "Date of certificate") as booking date. DD-MM-YYYY or DD/MM/YYYY. 10. assessmentYear - "Assessment Year" or "AY" from the form header. Format YYYY-YY (e.g. 2025-26). This is the Form 16A assessment year. @@ -355,8 +355,13 @@ function extractAssessmentYear(text: string): string | null { function extractCertificateDate(text: string): string | null { const patterns = [ + /Certificate\s*No\.?[^\n\r]*?Last\s*updated\s*on[:\s]*([0-9]{1,2}[-\/][A-Za-z]{3,9}[-\/][0-9]{4})/i, + /Certificate\s*No\.?[^\n\r]*?Last\s*updated\s*on[:\s]*([0-9]{1,2}[-\/][0-9]{1,2}[-\/][0-9]{4})/i, + /Last\s*updated\s*on[:\s]*([0-9]{1,2}[-\/][A-Za-z]{3,9}[-\/][0-9]{4})/i, + /Last\s*updated\s*on[:\s]*([0-9]{1,2}[-\/][0-9]{1,2}[-\/][0-9]{4})/i, /Certificate\s*Date[:\s]*([0-9]{1,2}[-/][0-9]{1,2}[-/][0-9]{4})/i, - /Date[:\s]*([0-9]{1,2}[-/][0-9]{1,2}[-/][0-9]{4})/i, + /Date\s+of\s+certificate[:\s]*([0-9]{1,2}[-\/][A-Za-z]{3,9}[-\/][0-9]{4})/i, + /Date\s+of\s+certificate[:\s]*([0-9]{1,2}[-\/][0-9]{1,2}[-\/][0-9]{4})/i, /Issued\s*on[:\s]*([0-9]{1,2}[-/][0-9]{1,2}[-/][0-9]{4})/i, ]; for (const pattern of patterns) { @@ -393,7 +398,8 @@ function parseForm16ARawText(text: string): Form16AExtractedData { const transactionDate = extractTransactionDate(fullText); const statusOfMatchingOltas = extractOltasStatus(fullText); const certificateDate = extractCertificateDate(fullText); - const dateOfBooking = extractDateOfBooking(fullText); + // Business rule: Form 16A "Last updated on" is the booking date used for 26AS matching. + const dateOfBooking = certificateDate ?? extractDateOfBooking(fullText); let financialYear = extractFinancialYear(fullText); if (!financialYear && assessmentYear) { const parts = assessmentYear.split(/[-/]/).map((p) => parseInt(p, 10)); @@ -524,7 +530,8 @@ function sanitizeAndCleanGeminiData(extracted: Record): Form16A natureOfPayment: getStr(extracted.natureOfPayment), transactionDate: getStr(extracted.transactionDate), statusOfMatchingOltas: getStr(extracted.statusOfMatchingOltas), - dateOfBooking: getStr(extracted.dateOfBooking), + // Business rule: map "Last updated on" (certificateDate) as booking date for matching/UI. + dateOfBooking: getStr(extracted.certificateDate ?? (extracted as any).lastUpdatedOn ?? extracted.dateOfBooking), assessmentYear: getStr(extracted.assessmentYear), quarter, form16aNumber, @@ -586,6 +593,21 @@ async function extractWithVertexAI(filePath: string, fileBase64: string, mimeTyp return await fallbackExtraction(filePath); } const data = sanitizeAndCleanGeminiData(extractedData); + + // Deterministic safeguard: re-parse raw PDF text and prefer the header "Last updated on" date + // to avoid model picking unrelated "Date" fields (e.g., verification/challan rows). + try { + const fallback = await fallbackExtraction(filePath); + const fallbackData = fallback.success ? (fallback.data as Form16AExtractedData | undefined) : undefined; + const fallbackCert = getStr(fallbackData?.certificateDate); + if (fallbackCert) { + data.certificateDate = fallbackCert; + data.dateOfBooking = fallbackCert; + } + } catch (overrideErr) { + logger.warn('[Form16 OCR] Could not apply fallback date override:', overrideErr); + } + logger.info('[Form16 OCR] Vertex AI extraction completed successfully'); return { success: true,