last updated on fixed
This commit is contained in:
parent
0aec45f7aa
commit
8e176cdf25
@ -96,6 +96,19 @@ async function processOutgoingFile(fileName: string, resolvedOutgoingDir: string
|
|||||||
updatedAt: new Date(),
|
updatedAt: new Date(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Delete source CSV only after successful DB persistence + read-marking.
|
||||||
|
// SAP team keeps a parallel archive copy, so main OUTGOING can be safely cleaned.
|
||||||
|
const sourcePath = path.join(resolvedOutgoingDir, fileName);
|
||||||
|
try {
|
||||||
|
if (fs.existsSync(sourcePath)) {
|
||||||
|
fs.unlinkSync(sourcePath);
|
||||||
|
logger.info(`[Form16 SAP Job] Deleted processed OUTGOING file: ${sourcePath}`);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
// Keep processing successful even if cleanup fails; next pull will skip due to read marker.
|
||||||
|
logger.warn(`[Form16 SAP Job] Could not delete processed file: ${sourcePath}`, e);
|
||||||
|
}
|
||||||
|
|
||||||
return counts;
|
return counts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -7,7 +7,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import crypto from 'crypto';
|
import crypto from 'crypto';
|
||||||
import { Op, fn, col, QueryTypes } from 'sequelize';
|
import { Op, fn, col, QueryTypes, where as sqlWhere } from 'sequelize';
|
||||||
import { sequelize } from '../config/database';
|
import { sequelize } from '../config/database';
|
||||||
import {
|
import {
|
||||||
Form16CreditNote,
|
Form16CreditNote,
|
||||||
@ -122,7 +122,7 @@ export async function getLatest26asAggregatedForQuarter(
|
|||||||
const [row] = await sequelize.query<{ sum: string }>(
|
const [row] = await sequelize.query<{ sum: string }>(
|
||||||
`WITH latest_upload AS (
|
`WITH latest_upload AS (
|
||||||
SELECT MAX(upload_log_id) AS mid FROM tds_26as_entries
|
SELECT MAX(upload_log_id) AS mid FROM tds_26as_entries
|
||||||
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan
|
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan
|
||||||
AND financial_year = :fy AND quarter = :qtr
|
AND financial_year = :fy AND quarter = :qtr
|
||||||
AND UPPER(TRIM(COALESCE(section_code, ''))) = :section
|
AND UPPER(TRIM(COALESCE(section_code, ''))) = :section
|
||||||
AND UPPER(TRIM(COALESCE(status_oltas, ''))) IN ('F', 'O')
|
AND UPPER(TRIM(COALESCE(status_oltas, ''))) IN ('F', 'O')
|
||||||
@ -130,7 +130,7 @@ export async function getLatest26asAggregatedForQuarter(
|
|||||||
)
|
)
|
||||||
SELECT COALESCE(SUM(e.tax_deducted), 0)::text AS sum
|
SELECT COALESCE(SUM(e.tax_deducted), 0)::text AS sum
|
||||||
FROM tds_26as_entries e
|
FROM tds_26as_entries e
|
||||||
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan
|
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan
|
||||||
AND e.financial_year = :fy AND e.quarter = :qtr
|
AND e.financial_year = :fy AND e.quarter = :qtr
|
||||||
AND UPPER(TRIM(COALESCE(e.section_code, ''))) = :section
|
AND UPPER(TRIM(COALESCE(e.section_code, ''))) = :section
|
||||||
AND UPPER(TRIM(COALESCE(e.status_oltas, ''))) IN ('F', 'O')
|
AND UPPER(TRIM(COALESCE(e.status_oltas, ''))) IN ('F', 'O')
|
||||||
@ -165,7 +165,7 @@ async function getLatest26asRowsForQuarter(
|
|||||||
}>(
|
}>(
|
||||||
`WITH latest_upload AS (
|
`WITH latest_upload AS (
|
||||||
SELECT MAX(upload_log_id) AS mid FROM tds_26as_entries
|
SELECT MAX(upload_log_id) AS mid FROM tds_26as_entries
|
||||||
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan
|
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan
|
||||||
AND financial_year = :fy AND quarter = :qtr
|
AND financial_year = :fy AND quarter = :qtr
|
||||||
AND UPPER(TRIM(COALESCE(section_code, ''))) = :section
|
AND UPPER(TRIM(COALESCE(section_code, ''))) = :section
|
||||||
AND UPPER(TRIM(COALESCE(status_oltas, ''))) IN ('F', 'O')
|
AND UPPER(TRIM(COALESCE(status_oltas, ''))) IN ('F', 'O')
|
||||||
@ -179,7 +179,7 @@ async function getLatest26asRowsForQuarter(
|
|||||||
e.transaction_date,
|
e.transaction_date,
|
||||||
e.date_of_booking
|
e.date_of_booking
|
||||||
FROM tds_26as_entries e
|
FROM tds_26as_entries e
|
||||||
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan
|
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan
|
||||||
AND e.financial_year = :fy
|
AND e.financial_year = :fy
|
||||||
AND e.quarter = :qtr
|
AND e.quarter = :qtr
|
||||||
AND UPPER(TRIM(COALESCE(e.section_code, ''))) = :section
|
AND UPPER(TRIM(COALESCE(e.section_code, ''))) = :section
|
||||||
@ -221,7 +221,7 @@ async function get26asCoverageDebug(tanNumber: string, financialYear: string, qu
|
|||||||
END
|
END
|
||||||
)::text AS matching_194q_f_o_rows
|
)::text AS matching_194q_f_o_rows
|
||||||
FROM tds_26as_entries e
|
FROM tds_26as_entries e
|
||||||
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan
|
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan
|
||||||
AND e.financial_year = :fy
|
AND e.financial_year = :fy
|
||||||
AND e.quarter = :q`,
|
AND e.quarter = :q`,
|
||||||
{ replacements: { tan: normalizedTan, fy, q, section: SECTION_26AS_194Q }, type: QueryTypes.SELECT }
|
{ replacements: { tan: normalizedTan, fy, q, section: SECTION_26AS_194Q }, type: QueryTypes.SELECT }
|
||||||
@ -234,7 +234,7 @@ async function get26asCoverageDebug(tanNumber: string, financialYear: string, qu
|
|||||||
status_oltas,
|
status_oltas,
|
||||||
COUNT(*)::text AS cnt
|
COUNT(*)::text AS cnt
|
||||||
FROM tds_26as_entries e
|
FROM tds_26as_entries e
|
||||||
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^A-Z0-9]', '', 'g')) = :tan
|
WHERE UPPER(REGEXP_REPLACE(TRIM(COALESCE(e.tan_number, '')), '[^a-zA-Z0-9]', '', 'g')) = :tan
|
||||||
AND e.financial_year = :fy
|
AND e.financial_year = :fy
|
||||||
AND e.quarter = :q
|
AND e.quarter = :q
|
||||||
GROUP BY section_code, status_oltas
|
GROUP BY section_code, status_oltas
|
||||||
@ -273,6 +273,30 @@ function normalizeDateOnly(value: unknown): string | null {
|
|||||||
return `${yyyy}-${mm}-${dd}`;
|
return `${yyyy}-${mm}-${dd}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle OCR values like "13-Jan-2025" without timezone conversion.
|
||||||
|
const m2 = raw.match(/^(\d{1,2})[-\/]([A-Za-z]{3,9})[-\/](\d{4})$/);
|
||||||
|
if (m2) {
|
||||||
|
const dd = m2[1].padStart(2, '0');
|
||||||
|
const mon = m2[2].toLowerCase();
|
||||||
|
const yyyy = m2[3];
|
||||||
|
const monthMap: Record<string, string> = {
|
||||||
|
jan: '01', january: '01',
|
||||||
|
feb: '02', february: '02',
|
||||||
|
mar: '03', march: '03',
|
||||||
|
apr: '04', april: '04',
|
||||||
|
may: '05',
|
||||||
|
jun: '06', june: '06',
|
||||||
|
jul: '07', july: '07',
|
||||||
|
aug: '08', august: '08',
|
||||||
|
sep: '09', sept: '09', september: '09',
|
||||||
|
oct: '10', october: '10',
|
||||||
|
nov: '11', november: '11',
|
||||||
|
dec: '12', december: '12',
|
||||||
|
};
|
||||||
|
const mm = monthMap[mon];
|
||||||
|
if (mm) return `${yyyy}-${mm}-${dd}`;
|
||||||
|
}
|
||||||
|
|
||||||
const d = new Date(raw);
|
const d = new Date(raw);
|
||||||
if (!Number.isNaN(d.getTime())) return d.toISOString().slice(0, 10);
|
if (!Number.isNaN(d.getTime())) return d.toISOString().slice(0, 10);
|
||||||
return null;
|
return null;
|
||||||
@ -782,7 +806,20 @@ async function run26asMatchAndCreditNote(submission: Form16aSubmission): Promise
|
|||||||
const submittedTaxDeducted = toNumberOrNull(extracted.totalTaxDeducted ?? sub.tdsAmount);
|
const submittedTaxDeducted = toNumberOrNull(extracted.totalTaxDeducted ?? sub.tdsAmount);
|
||||||
const submittedTdsDeposited = toNumberOrNull(extracted.totalTdsDeposited ?? sub.tdsAmount);
|
const submittedTdsDeposited = toNumberOrNull(extracted.totalTdsDeposited ?? sub.tdsAmount);
|
||||||
const submittedTransactionDate = normalizeDateOnly(extracted.transactionDate);
|
const submittedTransactionDate = normalizeDateOnly(extracted.transactionDate);
|
||||||
const submittedBookingDate = normalizeDateOnly(extracted.dateOfBooking);
|
const submittedLastUpdatedOn = normalizeDateOnly(extracted.certificateDate ?? extracted.lastUpdatedOn ?? extracted.lastUpdatedDate);
|
||||||
|
|
||||||
|
// Mandatory for matching: Form 16A "Last updated on" must be extracted and matched to 26AS booking date.
|
||||||
|
if (!submittedLastUpdatedOn) {
|
||||||
|
const msg = 'OCR could not extract "Last updated on" date from Form 16A. Please resubmit a clear document.';
|
||||||
|
await submission.update({
|
||||||
|
validationStatus: 'resubmission_needed',
|
||||||
|
validationNotes: msg,
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
validationStatus: 'resubmission_needed',
|
||||||
|
validationNotes: msg,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// Latest 26AS upload rows for the same TAN + FY + Quarter.
|
// Latest 26AS upload rows for the same TAN + FY + Quarter.
|
||||||
let latestRows = await getLatest26asRowsForQuarter(tanNumber, financialYear, quarter);
|
let latestRows = await getLatest26asRowsForQuarter(tanNumber, financialYear, quarter);
|
||||||
@ -790,7 +827,7 @@ async function run26asMatchAndCreditNote(submission: Form16aSubmission): Promise
|
|||||||
// If OCR extracted FY/Quarter incorrectly, derive FY/Quarter from OCR dates and retry.
|
// If OCR extracted FY/Quarter incorrectly, derive FY/Quarter from OCR dates and retry.
|
||||||
if (latestRows.length === 0) {
|
if (latestRows.length === 0) {
|
||||||
const derivedFromTx = deriveFyAndQuarterFromDateOnly(submittedTransactionDate);
|
const derivedFromTx = deriveFyAndQuarterFromDateOnly(submittedTransactionDate);
|
||||||
const derivedFromBooking = deriveFyAndQuarterFromDateOnly(submittedBookingDate);
|
const derivedFromBooking = deriveFyAndQuarterFromDateOnly(submittedLastUpdatedOn);
|
||||||
const derived = derivedFromTx || derivedFromBooking;
|
const derived = derivedFromTx || derivedFromBooking;
|
||||||
if (derived && (derived.financialYear !== financialYear || derived.quarter !== quarter)) {
|
if (derived && (derived.financialYear !== financialYear || derived.quarter !== quarter)) {
|
||||||
const altRows = await getLatest26asRowsForQuarter(tanNumber, derived.financialYear, derived.quarter);
|
const altRows = await getLatest26asRowsForQuarter(tanNumber, derived.financialYear, derived.quarter);
|
||||||
@ -914,16 +951,15 @@ async function run26asMatchAndCreditNote(submission: Form16aSubmission): Promise
|
|||||||
return { validationStatus: 'failed', validationNotes: 'Transaction date mismatch with latest 26AS.' };
|
return { validationStatus: 'failed', validationNotes: 'Transaction date mismatch with latest 26AS.' };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (submittedBookingDate) {
|
// Match Form 16A "Last updated on" against 26AS "Date of Booking"
|
||||||
const hasBookingDate = latestRows.some((r) => normalizeDateOnly(r.dateOfBooking) === submittedBookingDate);
|
const hasBookingDate = latestRows.some((r) => normalizeDateOnly(r.dateOfBooking) === submittedLastUpdatedOn);
|
||||||
if (!hasBookingDate) {
|
if (!hasBookingDate) {
|
||||||
await submission.update({
|
await submission.update({
|
||||||
validationStatus: 'failed',
|
validationStatus: 'failed',
|
||||||
validationNotes:
|
validationNotes:
|
||||||
`Booking date mismatch with latest 26AS for TAN no - ${tanNumber}. No latest 26AS record found with booking date ${submittedBookingDate}.`,
|
`Last updated on date mismatch with latest 26AS booking date for TAN no - ${tanNumber}. Form 16A last updated on: ${submittedLastUpdatedOn}.`,
|
||||||
});
|
});
|
||||||
return { validationStatus: 'failed', validationNotes: 'Booking date mismatch with latest 26AS.' };
|
return { validationStatus: 'failed', validationNotes: 'Last updated on date mismatch with latest 26AS booking date.' };
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Math.abs(tdsAmount - aggregated26as) > AMOUNT_MATCH_TOLERANCE) {
|
if (Math.abs(tdsAmount - aggregated26as) > AMOUNT_MATCH_TOLERANCE) {
|
||||||
@ -2241,13 +2277,44 @@ export interface List26asSummary {
|
|||||||
|
|
||||||
function build26asWhere(filters?: List26asFilters): Record<string, unknown> {
|
function build26asWhere(filters?: List26asFilters): Record<string, unknown> {
|
||||||
const where: Record<string, unknown> = {};
|
const where: Record<string, unknown> = {};
|
||||||
if (filters?.financialYear) where.financialYear = filters.financialYear;
|
const andClauses: unknown[] = [];
|
||||||
if (filters?.quarter) where.quarter = filters.quarter;
|
|
||||||
if (filters?.tanNumber) where.tanNumber = { [Op.iLike]: `%${filters.tanNumber}%` };
|
if (filters?.financialYear) where.financialYear = normalizeFinancialYear(filters.financialYear) || filters.financialYear;
|
||||||
if (filters?.search?.trim()) where.deductorName = { [Op.iLike]: `%${filters.search.trim()}%` };
|
if (filters?.quarter) where.quarter = normalizeQuarter(filters.quarter) || filters.quarter;
|
||||||
if (filters?.status) where.statusOltas = filters.status;
|
if (filters?.status) where.statusOltas = filters.status;
|
||||||
if (filters?.assessmentYear) where.assessmentYear = filters.assessmentYear;
|
if (filters?.assessmentYear) where.assessmentYear = filters.assessmentYear;
|
||||||
if (filters?.sectionCode) where.sectionCode = filters.sectionCode;
|
if (filters?.sectionCode) where.sectionCode = filters.sectionCode;
|
||||||
|
|
||||||
|
if (filters?.tanNumber?.trim()) {
|
||||||
|
const normalizedTan = normalizeTanNumber(filters.tanNumber);
|
||||||
|
if (normalizedTan) {
|
||||||
|
andClauses.push(
|
||||||
|
sqlWhere(
|
||||||
|
fn('upper', fn('regexp_replace', fn('coalesce', col('tan_number'), ''), '[^a-zA-Z0-9]', '', 'g')),
|
||||||
|
{ [Op.like]: `%${normalizedTan}%` }
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filters?.search?.trim()) {
|
||||||
|
const s = filters.search.trim();
|
||||||
|
const normalizedSearchTan = normalizeTanNumber(s);
|
||||||
|
const searchOr: unknown[] = [{ deductorName: { [Op.iLike]: `%${s}%` } }];
|
||||||
|
if (normalizedSearchTan) {
|
||||||
|
searchOr.push(
|
||||||
|
sqlWhere(
|
||||||
|
fn('upper', fn('regexp_replace', fn('coalesce', col('tan_number'), ''), '[^a-zA-Z0-9]', '', 'g')),
|
||||||
|
{ [Op.like]: `%${normalizedSearchTan}%` }
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
andClauses.push({ [Op.or]: searchOr });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (andClauses.length > 0) {
|
||||||
|
(where as any)[Op.and] = andClauses;
|
||||||
|
}
|
||||||
return where;
|
return where;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2257,7 +2324,8 @@ export async function list26asEntries(filters?: List26asFilters): Promise<{
|
|||||||
summary: List26asSummary;
|
summary: List26asSummary;
|
||||||
}> {
|
}> {
|
||||||
const where = build26asWhere(filters);
|
const where = build26asWhere(filters);
|
||||||
const hasWhere = Object.keys(where).length > 0;
|
// Use Reflect.ownKeys so symbol keys like Op.and are counted.
|
||||||
|
const hasWhere = Reflect.ownKeys(where).length > 0;
|
||||||
const limit = Math.min(MAX_PAGE_SIZE, Math.max(1, filters?.limit ?? DEFAULT_PAGE_SIZE));
|
const limit = Math.min(MAX_PAGE_SIZE, Math.max(1, filters?.limit ?? DEFAULT_PAGE_SIZE));
|
||||||
const offset = Math.max(0, filters?.offset ?? 0);
|
const offset = Math.max(0, filters?.offset ?? 0);
|
||||||
|
|
||||||
|
|||||||
@ -79,7 +79,7 @@ STEP 2 - Extract these fields. For amounts, look in TABLES: find rows or columns
|
|||||||
|
|
||||||
8. statusOfMatchingOltas - "Status of matching with OLTAS" or "OLTAS". Single letter (F, O, M) or word like "Matched". Extract as shown.
|
8. statusOfMatchingOltas - "Status of matching with OLTAS" or "OLTAS". Single letter (F, O, M) or word like "Matched". Extract as shown.
|
||||||
|
|
||||||
9. dateOfBooking - "Date of booking" or "Date of deposit". DD-MM-YYYY or DD/MM/YYYY.
|
9. dateOfBooking - For this workflow, use Form 16A "Last updated on" (or "Date of certificate") as booking date. DD-MM-YYYY or DD/MM/YYYY.
|
||||||
|
|
||||||
10. assessmentYear - "Assessment Year" or "AY" from the form header. Format YYYY-YY (e.g. 2025-26). This is the Form 16A assessment year.
|
10. assessmentYear - "Assessment Year" or "AY" from the form header. Format YYYY-YY (e.g. 2025-26). This is the Form 16A assessment year.
|
||||||
|
|
||||||
@ -355,8 +355,13 @@ function extractAssessmentYear(text: string): string | null {
|
|||||||
|
|
||||||
function extractCertificateDate(text: string): string | null {
|
function extractCertificateDate(text: string): string | null {
|
||||||
const patterns = [
|
const patterns = [
|
||||||
|
/Certificate\s*No\.?[^\n\r]*?Last\s*updated\s*on[:\s]*([0-9]{1,2}[-\/][A-Za-z]{3,9}[-\/][0-9]{4})/i,
|
||||||
|
/Certificate\s*No\.?[^\n\r]*?Last\s*updated\s*on[:\s]*([0-9]{1,2}[-\/][0-9]{1,2}[-\/][0-9]{4})/i,
|
||||||
|
/Last\s*updated\s*on[:\s]*([0-9]{1,2}[-\/][A-Za-z]{3,9}[-\/][0-9]{4})/i,
|
||||||
|
/Last\s*updated\s*on[:\s]*([0-9]{1,2}[-\/][0-9]{1,2}[-\/][0-9]{4})/i,
|
||||||
/Certificate\s*Date[:\s]*([0-9]{1,2}[-/][0-9]{1,2}[-/][0-9]{4})/i,
|
/Certificate\s*Date[:\s]*([0-9]{1,2}[-/][0-9]{1,2}[-/][0-9]{4})/i,
|
||||||
/Date[:\s]*([0-9]{1,2}[-/][0-9]{1,2}[-/][0-9]{4})/i,
|
/Date\s+of\s+certificate[:\s]*([0-9]{1,2}[-\/][A-Za-z]{3,9}[-\/][0-9]{4})/i,
|
||||||
|
/Date\s+of\s+certificate[:\s]*([0-9]{1,2}[-\/][0-9]{1,2}[-\/][0-9]{4})/i,
|
||||||
/Issued\s*on[:\s]*([0-9]{1,2}[-/][0-9]{1,2}[-/][0-9]{4})/i,
|
/Issued\s*on[:\s]*([0-9]{1,2}[-/][0-9]{1,2}[-/][0-9]{4})/i,
|
||||||
];
|
];
|
||||||
for (const pattern of patterns) {
|
for (const pattern of patterns) {
|
||||||
@ -393,7 +398,8 @@ function parseForm16ARawText(text: string): Form16AExtractedData {
|
|||||||
const transactionDate = extractTransactionDate(fullText);
|
const transactionDate = extractTransactionDate(fullText);
|
||||||
const statusOfMatchingOltas = extractOltasStatus(fullText);
|
const statusOfMatchingOltas = extractOltasStatus(fullText);
|
||||||
const certificateDate = extractCertificateDate(fullText);
|
const certificateDate = extractCertificateDate(fullText);
|
||||||
const dateOfBooking = extractDateOfBooking(fullText);
|
// Business rule: Form 16A "Last updated on" is the booking date used for 26AS matching.
|
||||||
|
const dateOfBooking = certificateDate ?? extractDateOfBooking(fullText);
|
||||||
let financialYear = extractFinancialYear(fullText);
|
let financialYear = extractFinancialYear(fullText);
|
||||||
if (!financialYear && assessmentYear) {
|
if (!financialYear && assessmentYear) {
|
||||||
const parts = assessmentYear.split(/[-/]/).map((p) => parseInt(p, 10));
|
const parts = assessmentYear.split(/[-/]/).map((p) => parseInt(p, 10));
|
||||||
@ -524,7 +530,8 @@ function sanitizeAndCleanGeminiData(extracted: Record<string, unknown>): Form16A
|
|||||||
natureOfPayment: getStr(extracted.natureOfPayment),
|
natureOfPayment: getStr(extracted.natureOfPayment),
|
||||||
transactionDate: getStr(extracted.transactionDate),
|
transactionDate: getStr(extracted.transactionDate),
|
||||||
statusOfMatchingOltas: getStr(extracted.statusOfMatchingOltas),
|
statusOfMatchingOltas: getStr(extracted.statusOfMatchingOltas),
|
||||||
dateOfBooking: getStr(extracted.dateOfBooking),
|
// Business rule: map "Last updated on" (certificateDate) as booking date for matching/UI.
|
||||||
|
dateOfBooking: getStr(extracted.certificateDate ?? (extracted as any).lastUpdatedOn ?? extracted.dateOfBooking),
|
||||||
assessmentYear: getStr(extracted.assessmentYear),
|
assessmentYear: getStr(extracted.assessmentYear),
|
||||||
quarter,
|
quarter,
|
||||||
form16aNumber,
|
form16aNumber,
|
||||||
@ -586,6 +593,21 @@ async function extractWithVertexAI(filePath: string, fileBase64: string, mimeTyp
|
|||||||
return await fallbackExtraction(filePath);
|
return await fallbackExtraction(filePath);
|
||||||
}
|
}
|
||||||
const data = sanitizeAndCleanGeminiData(extractedData);
|
const data = sanitizeAndCleanGeminiData(extractedData);
|
||||||
|
|
||||||
|
// Deterministic safeguard: re-parse raw PDF text and prefer the header "Last updated on" date
|
||||||
|
// to avoid model picking unrelated "Date" fields (e.g., verification/challan rows).
|
||||||
|
try {
|
||||||
|
const fallback = await fallbackExtraction(filePath);
|
||||||
|
const fallbackData = fallback.success ? (fallback.data as Form16AExtractedData | undefined) : undefined;
|
||||||
|
const fallbackCert = getStr(fallbackData?.certificateDate);
|
||||||
|
if (fallbackCert) {
|
||||||
|
data.certificateDate = fallbackCert;
|
||||||
|
data.dateOfBooking = fallbackCert;
|
||||||
|
}
|
||||||
|
} catch (overrideErr) {
|
||||||
|
logger.warn('[Form16 OCR] Could not apply fallback date override:', overrideErr);
|
||||||
|
}
|
||||||
|
|
||||||
logger.info('[Form16 OCR] Vertex AI extraction completed successfully');
|
logger.info('[Form16 OCR] Vertex AI extraction completed successfully');
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user