last updated on fixed

This commit is contained in:
Aaditya Jaiswal 2026-03-27 18:09:13 +05:30
parent 0aec45f7aa
commit 8e176cdf25
3 changed files with 131 additions and 28 deletions

View File

@ -96,6 +96,19 @@ async function processOutgoingFile(fileName: string, resolvedOutgoingDir: string
updatedAt: new Date(), updatedAt: new Date(),
}); });
// Delete source CSV only after successful DB persistence + read-marking.
// SAP team keeps a parallel archive copy, so main OUTGOING can be safely cleaned.
const sourcePath = path.join(resolvedOutgoingDir, fileName);
try {
if (fs.existsSync(sourcePath)) {
fs.unlinkSync(sourcePath);
logger.info(`[Form16 SAP Job] Deleted processed OUTGOING file: ${sourcePath}`);
}
} catch (e) {
// Keep processing successful even if cleanup fails; next pull will skip due to read marker.
logger.warn(`[Form16 SAP Job] Could not delete processed file: ${sourcePath}`, e);
}
return counts; return counts;
} }

View File

@ -7,7 +7,7 @@
*/ */
import crypto from 'crypto'; import crypto from 'crypto';
import { Op, fn, col, QueryTypes } from 'sequelize'; import { Op, fn, col, QueryTypes, where as sqlWhere } from 'sequelize';
import { sequelize } from '../config/database'; import { sequelize } from '../config/database';
import { import {
Form16CreditNote, Form16CreditNote,
@ -122,7 +122,7 @@ export async function getLatest26asAggregatedForQuarter(
const [row] = await sequelize.query<{ sum: string }>( const [row] = await sequelize.query<{ sum: string }>(
`WITH latest_upload AS ( `WITH latest_upload AS (
SELECT MAX(upload_log_id) AS mid FROM tds_26as_entries SELECT MAX(upload_log_id) AS mid FROM tds_26as_entries
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan
AND financial_year = :fy AND quarter = :qtr AND financial_year = :fy AND quarter = :qtr
AND UPPER(TRIM(COALESCE(section_code, ''))) = :section AND UPPER(TRIM(COALESCE(section_code, ''))) = :section
AND UPPER(TRIM(COALESCE(status_oltas, ''))) IN ('F', 'O') AND UPPER(TRIM(COALESCE(status_oltas, ''))) IN ('F', 'O')
@ -130,7 +130,7 @@ export async function getLatest26asAggregatedForQuarter(
) )
SELECT COALESCE(SUM(e.tax_deducted), 0)::text AS sum SELECT COALESCE(SUM(e.tax_deducted), 0)::text AS sum
FROM tds_26as_entries e FROM tds_26as_entries e
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan
AND e.financial_year = :fy AND e.quarter = :qtr AND e.financial_year = :fy AND e.quarter = :qtr
AND UPPER(TRIM(COALESCE(e.section_code, ''))) = :section AND UPPER(TRIM(COALESCE(e.section_code, ''))) = :section
AND UPPER(TRIM(COALESCE(e.status_oltas, ''))) IN ('F', 'O') AND UPPER(TRIM(COALESCE(e.status_oltas, ''))) IN ('F', 'O')
@ -165,7 +165,7 @@ async function getLatest26asRowsForQuarter(
}>( }>(
`WITH latest_upload AS ( `WITH latest_upload AS (
SELECT MAX(upload_log_id) AS mid FROM tds_26as_entries SELECT MAX(upload_log_id) AS mid FROM tds_26as_entries
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan
AND financial_year = :fy AND quarter = :qtr AND financial_year = :fy AND quarter = :qtr
AND UPPER(TRIM(COALESCE(section_code, ''))) = :section AND UPPER(TRIM(COALESCE(section_code, ''))) = :section
AND UPPER(TRIM(COALESCE(status_oltas, ''))) IN ('F', 'O') AND UPPER(TRIM(COALESCE(status_oltas, ''))) IN ('F', 'O')
@ -179,7 +179,7 @@ async function getLatest26asRowsForQuarter(
e.transaction_date, e.transaction_date,
e.date_of_booking e.date_of_booking
FROM tds_26as_entries e FROM tds_26as_entries e
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan
AND e.financial_year = :fy AND e.financial_year = :fy
AND e.quarter = :qtr AND e.quarter = :qtr
AND UPPER(TRIM(COALESCE(e.section_code, ''))) = :section AND UPPER(TRIM(COALESCE(e.section_code, ''))) = :section
@ -221,7 +221,7 @@ async function get26asCoverageDebug(tanNumber: string, financialYear: string, qu
END END
)::text AS matching_194q_f_o_rows )::text AS matching_194q_f_o_rows
FROM tds_26as_entries e FROM tds_26as_entries e
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan
AND e.financial_year = :fy AND e.financial_year = :fy
AND e.quarter = :q`, AND e.quarter = :q`,
{ replacements: { tan: normalizedTan, fy, q, section: SECTION_26AS_194Q }, type: QueryTypes.SELECT } { replacements: { tan: normalizedTan, fy, q, section: SECTION_26AS_194Q }, type: QueryTypes.SELECT }
@ -234,7 +234,7 @@ async function get26asCoverageDebug(tanNumber: string, financialYear: string, qu
status_oltas, status_oltas,
COUNT(*)::text AS cnt COUNT(*)::text AS cnt
FROM tds_26as_entries e FROM tds_26as_entries e
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan
AND e.financial_year = :fy AND e.financial_year = :fy
AND e.quarter = :q AND e.quarter = :q
GROUP BY section_code, status_oltas GROUP BY section_code, status_oltas
@ -273,6 +273,30 @@ function normalizeDateOnly(value: unknown): string | null {
return `${yyyy}-${mm}-${dd}`; return `${yyyy}-${mm}-${dd}`;
} }
// Handle OCR values like "13-Jan-2025" without timezone conversion.
const m2 = raw.match(/^(\d{1,2})[-\/]([A-Za-z]{3,9})[-\/](\d{4})$/);
if (m2) {
const dd = m2[1].padStart(2, '0');
const mon = m2[2].toLowerCase();
const yyyy = m2[3];
const monthMap: Record<string, string> = {
jan: '01', january: '01',
feb: '02', february: '02',
mar: '03', march: '03',
apr: '04', april: '04',
may: '05',
jun: '06', june: '06',
jul: '07', july: '07',
aug: '08', august: '08',
sep: '09', sept: '09', september: '09',
oct: '10', october: '10',
nov: '11', november: '11',
dec: '12', december: '12',
};
const mm = monthMap[mon];
if (mm) return `${yyyy}-${mm}-${dd}`;
}
const d = new Date(raw); const d = new Date(raw);
if (!Number.isNaN(d.getTime())) return d.toISOString().slice(0, 10); if (!Number.isNaN(d.getTime())) return d.toISOString().slice(0, 10);
return null; return null;
@ -782,7 +806,20 @@ async function run26asMatchAndCreditNote(submission: Form16aSubmission): Promise
const submittedTaxDeducted = toNumberOrNull(extracted.totalTaxDeducted ?? sub.tdsAmount); const submittedTaxDeducted = toNumberOrNull(extracted.totalTaxDeducted ?? sub.tdsAmount);
const submittedTdsDeposited = toNumberOrNull(extracted.totalTdsDeposited ?? sub.tdsAmount); const submittedTdsDeposited = toNumberOrNull(extracted.totalTdsDeposited ?? sub.tdsAmount);
const submittedTransactionDate = normalizeDateOnly(extracted.transactionDate); const submittedTransactionDate = normalizeDateOnly(extracted.transactionDate);
const submittedBookingDate = normalizeDateOnly(extracted.dateOfBooking); const submittedLastUpdatedOn = normalizeDateOnly(extracted.certificateDate ?? extracted.lastUpdatedOn ?? extracted.lastUpdatedDate);
// Mandatory for matching: Form 16A "Last updated on" must be extracted and matched to 26AS booking date.
if (!submittedLastUpdatedOn) {
const msg = 'OCR could not extract "Last updated on" date from Form 16A. Please resubmit a clear document.';
await submission.update({
validationStatus: 'resubmission_needed',
validationNotes: msg,
});
return {
validationStatus: 'resubmission_needed',
validationNotes: msg,
};
}
// Latest 26AS upload rows for the same TAN + FY + Quarter. // Latest 26AS upload rows for the same TAN + FY + Quarter.
let latestRows = await getLatest26asRowsForQuarter(tanNumber, financialYear, quarter); let latestRows = await getLatest26asRowsForQuarter(tanNumber, financialYear, quarter);
@ -790,7 +827,7 @@ async function run26asMatchAndCreditNote(submission: Form16aSubmission): Promise
// If OCR extracted FY/Quarter incorrectly, derive FY/Quarter from OCR dates and retry. // If OCR extracted FY/Quarter incorrectly, derive FY/Quarter from OCR dates and retry.
if (latestRows.length === 0) { if (latestRows.length === 0) {
const derivedFromTx = deriveFyAndQuarterFromDateOnly(submittedTransactionDate); const derivedFromTx = deriveFyAndQuarterFromDateOnly(submittedTransactionDate);
const derivedFromBooking = deriveFyAndQuarterFromDateOnly(submittedBookingDate); const derivedFromBooking = deriveFyAndQuarterFromDateOnly(submittedLastUpdatedOn);
const derived = derivedFromTx || derivedFromBooking; const derived = derivedFromTx || derivedFromBooking;
if (derived && (derived.financialYear !== financialYear || derived.quarter !== quarter)) { if (derived && (derived.financialYear !== financialYear || derived.quarter !== quarter)) {
const altRows = await getLatest26asRowsForQuarter(tanNumber, derived.financialYear, derived.quarter); const altRows = await getLatest26asRowsForQuarter(tanNumber, derived.financialYear, derived.quarter);
@ -914,16 +951,15 @@ async function run26asMatchAndCreditNote(submission: Form16aSubmission): Promise
return { validationStatus: 'failed', validationNotes: 'Transaction date mismatch with latest 26AS.' }; return { validationStatus: 'failed', validationNotes: 'Transaction date mismatch with latest 26AS.' };
} }
} }
if (submittedBookingDate) { // Match Form 16A "Last updated on" against 26AS "Date of Booking"
const hasBookingDate = latestRows.some((r) => normalizeDateOnly(r.dateOfBooking) === submittedBookingDate); const hasBookingDate = latestRows.some((r) => normalizeDateOnly(r.dateOfBooking) === submittedLastUpdatedOn);
if (!hasBookingDate) { if (!hasBookingDate) {
await submission.update({ await submission.update({
validationStatus: 'failed', validationStatus: 'failed',
validationNotes: validationNotes:
`Booking date mismatch with latest 26AS for TAN no - ${tanNumber}. No latest 26AS record found with booking date ${submittedBookingDate}.`, `Last updated on date mismatch with latest 26AS booking date for TAN no - ${tanNumber}. Form 16A last updated on: ${submittedLastUpdatedOn}.`,
}); });
return { validationStatus: 'failed', validationNotes: 'Booking date mismatch with latest 26AS.' }; return { validationStatus: 'failed', validationNotes: 'Last updated on date mismatch with latest 26AS booking date.' };
}
} }
if (Math.abs(tdsAmount - aggregated26as) > AMOUNT_MATCH_TOLERANCE) { if (Math.abs(tdsAmount - aggregated26as) > AMOUNT_MATCH_TOLERANCE) {
@ -2241,13 +2277,44 @@ export interface List26asSummary {
function build26asWhere(filters?: List26asFilters): Record<string, unknown> { function build26asWhere(filters?: List26asFilters): Record<string, unknown> {
const where: Record<string, unknown> = {}; const where: Record<string, unknown> = {};
if (filters?.financialYear) where.financialYear = filters.financialYear; const andClauses: unknown[] = [];
if (filters?.quarter) where.quarter = filters.quarter;
if (filters?.tanNumber) where.tanNumber = { [Op.iLike]: `%${filters.tanNumber}%` }; if (filters?.financialYear) where.financialYear = normalizeFinancialYear(filters.financialYear) || filters.financialYear;
if (filters?.search?.trim()) where.deductorName = { [Op.iLike]: `%${filters.search.trim()}%` }; if (filters?.quarter) where.quarter = normalizeQuarter(filters.quarter) || filters.quarter;
if (filters?.status) where.statusOltas = filters.status; if (filters?.status) where.statusOltas = filters.status;
if (filters?.assessmentYear) where.assessmentYear = filters.assessmentYear; if (filters?.assessmentYear) where.assessmentYear = filters.assessmentYear;
if (filters?.sectionCode) where.sectionCode = filters.sectionCode; if (filters?.sectionCode) where.sectionCode = filters.sectionCode;
if (filters?.tanNumber?.trim()) {
const normalizedTan = normalizeTanNumber(filters.tanNumber);
if (normalizedTan) {
andClauses.push(
sqlWhere(
fn('upper', fn('regexp_replace', fn('coalesce', col('tan_number'), ''), '[^a-zA-Z0-9]', '', 'g')),
{ [Op.like]: `%${normalizedTan}%` }
)
);
}
}
if (filters?.search?.trim()) {
const s = filters.search.trim();
const normalizedSearchTan = normalizeTanNumber(s);
const searchOr: unknown[] = [{ deductorName: { [Op.iLike]: `%${s}%` } }];
if (normalizedSearchTan) {
searchOr.push(
sqlWhere(
fn('upper', fn('regexp_replace', fn('coalesce', col('tan_number'), ''), '[^a-zA-Z0-9]', '', 'g')),
{ [Op.like]: `%${normalizedSearchTan}%` }
)
);
}
andClauses.push({ [Op.or]: searchOr });
}
if (andClauses.length > 0) {
(where as any)[Op.and] = andClauses;
}
return where; return where;
} }
@ -2257,7 +2324,8 @@ export async function list26asEntries(filters?: List26asFilters): Promise<{
summary: List26asSummary; summary: List26asSummary;
}> { }> {
const where = build26asWhere(filters); const where = build26asWhere(filters);
const hasWhere = Object.keys(where).length > 0; // Use Reflect.ownKeys so symbol keys like Op.and are counted.
const hasWhere = Reflect.ownKeys(where).length > 0;
const limit = Math.min(MAX_PAGE_SIZE, Math.max(1, filters?.limit ?? DEFAULT_PAGE_SIZE)); const limit = Math.min(MAX_PAGE_SIZE, Math.max(1, filters?.limit ?? DEFAULT_PAGE_SIZE));
const offset = Math.max(0, filters?.offset ?? 0); const offset = Math.max(0, filters?.offset ?? 0);

View File

@ -79,7 +79,7 @@ STEP 2 - Extract these fields. For amounts, look in TABLES: find rows or columns
8. statusOfMatchingOltas - "Status of matching with OLTAS" or "OLTAS". Single letter (F, O, M) or word like "Matched". Extract as shown. 8. statusOfMatchingOltas - "Status of matching with OLTAS" or "OLTAS". Single letter (F, O, M) or word like "Matched". Extract as shown.
9. dateOfBooking - "Date of booking" or "Date of deposit". DD-MM-YYYY or DD/MM/YYYY. 9. dateOfBooking - For this workflow, use Form 16A "Last updated on" (or "Date of certificate") as booking date. DD-MM-YYYY or DD/MM/YYYY.
10. assessmentYear - "Assessment Year" or "AY" from the form header. Format YYYY-YY (e.g. 2025-26). This is the Form 16A assessment year. 10. assessmentYear - "Assessment Year" or "AY" from the form header. Format YYYY-YY (e.g. 2025-26). This is the Form 16A assessment year.
@ -355,8 +355,13 @@ function extractAssessmentYear(text: string): string | null {
function extractCertificateDate(text: string): string | null { function extractCertificateDate(text: string): string | null {
const patterns = [ const patterns = [
/Certificate\s*No\.?[^\n\r]*?Last\s*updated\s*on[:\s]*([0-9]{1,2}[-\/][A-Za-z]{3,9}[-\/][0-9]{4})/i,
/Certificate\s*No\.?[^\n\r]*?Last\s*updated\s*on[:\s]*([0-9]{1,2}[-\/][0-9]{1,2}[-\/][0-9]{4})/i,
/Last\s*updated\s*on[:\s]*([0-9]{1,2}[-\/][A-Za-z]{3,9}[-\/][0-9]{4})/i,
/Last\s*updated\s*on[:\s]*([0-9]{1,2}[-\/][0-9]{1,2}[-\/][0-9]{4})/i,
/Certificate\s*Date[:\s]*([0-9]{1,2}[-/][0-9]{1,2}[-/][0-9]{4})/i, /Certificate\s*Date[:\s]*([0-9]{1,2}[-/][0-9]{1,2}[-/][0-9]{4})/i,
/Date[:\s]*([0-9]{1,2}[-/][0-9]{1,2}[-/][0-9]{4})/i, /Date\s+of\s+certificate[:\s]*([0-9]{1,2}[-\/][A-Za-z]{3,9}[-\/][0-9]{4})/i,
/Date\s+of\s+certificate[:\s]*([0-9]{1,2}[-\/][0-9]{1,2}[-\/][0-9]{4})/i,
/Issued\s*on[:\s]*([0-9]{1,2}[-/][0-9]{1,2}[-/][0-9]{4})/i, /Issued\s*on[:\s]*([0-9]{1,2}[-/][0-9]{1,2}[-/][0-9]{4})/i,
]; ];
for (const pattern of patterns) { for (const pattern of patterns) {
@ -393,7 +398,8 @@ function parseForm16ARawText(text: string): Form16AExtractedData {
const transactionDate = extractTransactionDate(fullText); const transactionDate = extractTransactionDate(fullText);
const statusOfMatchingOltas = extractOltasStatus(fullText); const statusOfMatchingOltas = extractOltasStatus(fullText);
const certificateDate = extractCertificateDate(fullText); const certificateDate = extractCertificateDate(fullText);
const dateOfBooking = extractDateOfBooking(fullText); // Business rule: Form 16A "Last updated on" is the booking date used for 26AS matching.
const dateOfBooking = certificateDate ?? extractDateOfBooking(fullText);
let financialYear = extractFinancialYear(fullText); let financialYear = extractFinancialYear(fullText);
if (!financialYear && assessmentYear) { if (!financialYear && assessmentYear) {
const parts = assessmentYear.split(/[-/]/).map((p) => parseInt(p, 10)); const parts = assessmentYear.split(/[-/]/).map((p) => parseInt(p, 10));
@ -524,7 +530,8 @@ function sanitizeAndCleanGeminiData(extracted: Record<string, unknown>): Form16A
natureOfPayment: getStr(extracted.natureOfPayment), natureOfPayment: getStr(extracted.natureOfPayment),
transactionDate: getStr(extracted.transactionDate), transactionDate: getStr(extracted.transactionDate),
statusOfMatchingOltas: getStr(extracted.statusOfMatchingOltas), statusOfMatchingOltas: getStr(extracted.statusOfMatchingOltas),
dateOfBooking: getStr(extracted.dateOfBooking), // Business rule: map "Last updated on" (certificateDate) as booking date for matching/UI.
dateOfBooking: getStr(extracted.certificateDate ?? (extracted as any).lastUpdatedOn ?? extracted.dateOfBooking),
assessmentYear: getStr(extracted.assessmentYear), assessmentYear: getStr(extracted.assessmentYear),
quarter, quarter,
form16aNumber, form16aNumber,
@ -586,6 +593,21 @@ async function extractWithVertexAI(filePath: string, fileBase64: string, mimeTyp
return await fallbackExtraction(filePath); return await fallbackExtraction(filePath);
} }
const data = sanitizeAndCleanGeminiData(extractedData); const data = sanitizeAndCleanGeminiData(extractedData);
// Deterministic safeguard: re-parse raw PDF text and prefer the header "Last updated on" date
// to avoid model picking unrelated "Date" fields (e.g., verification/challan rows).
try {
const fallback = await fallbackExtraction(filePath);
const fallbackData = fallback.success ? (fallback.data as Form16AExtractedData | undefined) : undefined;
const fallbackCert = getStr(fallbackData?.certificateDate);
if (fallbackCert) {
data.certificateDate = fallbackCert;
data.dateOfBooking = fallbackCert;
}
} catch (overrideErr) {
logger.warn('[Form16 OCR] Could not apply fallback date override:', overrideErr);
}
logger.info('[Form16 OCR] Vertex AI extraction completed successfully'); logger.info('[Form16 OCR] Vertex AI extraction completed successfully');
return { return {
success: true, success: true,