From 387d1881f71c978618df5308b38623bb7a425856 Mon Sep 17 00:00:00 2001 From: Arjun Mehar Date: Mon, 20 Apr 2026 20:11:11 +0530 Subject: [PATCH] FIX USER VALIDATION --- ...rkflow_CPC_CDC_API.postman_collection.json | 113 ++++++++- ...kflow_CPC_CDC_API.postman_environment.json | 20 +- src/controllers/CpcCdcController.ts | 124 +++++----- src/services/cpc-cdc/CpcRuleExtractService.ts | 6 +- src/services/cpc-cdc/CpcValidationService.ts | 26 ++- src/services/cpc-cdc/utils.ts | 220 +++++++++++++++++- 6 files changed, 420 insertions(+), 89 deletions(-) diff --git a/RE-CPC-CDC_POSTMAN_COLLECTION/RE_Workflow_CPC_CDC_API.postman_collection.json b/RE-CPC-CDC_POSTMAN_COLLECTION/RE_Workflow_CPC_CDC_API.postman_collection.json index b612df1..7026f1f 100644 --- a/RE-CPC-CDC_POSTMAN_COLLECTION/RE_Workflow_CPC_CDC_API.postman_collection.json +++ b/RE-CPC-CDC_POSTMAN_COLLECTION/RE_Workflow_CPC_CDC_API.postman_collection.json @@ -2,7 +2,7 @@ "info": { "_postman_id": "re-workflow-cpc-csd-complete-2026", "name": "RE Workflow — CPC-CSD API (complete)", - "description": "## Purpose\nCovers **all CPC-CSD HTTP APIs** used by the browser (Dashboard, History, reports) so Postman can replace manual UI testing once tokens and URLs are set.\n\n## Authentication\n1. Import **RE_Workflow_CPC_CDC_API** environment and select it.\n2. Paste JWT into **accessToken** (no `Bearer ` prefix).\n3. Run **01_Session_and_health → GET Auth me** — expect **200**. Then use **02_CPC_CSD_API** and onward.\n4. User must be **ADMIN** or listed in **CPC_CSD_ADMIN_CONFIG** viewer emails.\n\n## URL base\n- **apiRoot** = `{hostUrl}/api/v1` — all CPC-CSD requests in this collection use **`{apiRoot}/cpc-csd/...`** (canonical API; legacy **`{apiRoot}/cpc-cdc/...`** is the same).\n- The SPA may still call **`{hostUrl}/api/documents/...`** (legacy layout); behaviour is the same — see `docs/CPC-CDC.md` if you need those paths.\n- **Bare GCS staging** (no metadata): `POST {hostUrl}/api/upload` — single multipart field **`file`**.\n\n## Multipart (same as `Dashboard.jsx`)\n| Operation | Text fields | Files |\n|-----------|-------------|-------|\n| `POST .../v1/ocr/upload` | `claim_id`, `booking_id`, `booking_type` (CPC or CSD), `provider`, **`metadata_queue`** (JSON **string** of array) | **`files`** — repeat field name; order matches `metadata_queue` |\n| `POST .../v1/ocr/validate-upload` | `claim_id`, `booking_id`, `booking_type`, `document_type`, `provider`, **`msd_payload`** (JSON string), optional `skip_min_attachment_check=true` | **`file`** (single) |\n| `POST /api/upload` | — | **`file`** |\n\nEach `metadata_queue[]` item: `document_type`, `msd_payload` (object), `expected_field_keys` (unique list of keys to run rules on).\n\n## Metadata — business names vs JSON keys (`metadata_queue` / `msd_payload`)\nUse **these JSON property names** in each `msd_payload` and the same names in `expected_field_keys` (see env `metadataQueueJsonCsdPo`, `metadataQueueJsonCpcTwoFiles`). **Legacy keys** from older integrations are still accepted (`order_or_authorisation_number`, `invoice_value`, `govt_signatory_and_stamp_present`, `authorized_person_name`, `name`, `aadhaar_number`).\n\n### 1. CSD claim (1 document) — Purchase Order (PO)\n| Business name | JSON key | Rule |\n|----------------|----------|------|\n| Customer Name | `customer_name` | Accuracy between 90% – 100% |\n| PO Number | `po_number` | 100% accuracy required |\n| PO Amount | `po_amount` | Tolerance of ±5 rupees |\n| Signature & Stamp | `signature_and_stamp` | Binary check (Available / Not Available) |\n\n### 2. CPC claim (2 documents)\n**Document 1 — Authorization Letter**\n| Customer Name | `customer_name` | 90% – 100% |\n| Letter Number | `letter_number` | 90% – 100% |\n| Letter Amount | `letter_amount` | ±5 rupees |\n| Signature & Stamp | `signature_and_stamp` | Binary (Available / Not Available) |\n\n**Document 2 — Aadhaar card**\n| Customer Name | `customer_name` | 90% – 100% |\n| Aadhar Number | `aadhar_number` | 100% accuracy required |\n\n## Provider vs model\n- **ocrProvider** in env: pipeline mode (`GEMINI_VERTEX_DIRECT`, `GEMINI_VERTEX`, `RULES`).\n- **Gemini model id** (e.g. `gemini-2.0-flash-lite`) is **server** `GEMINI_MODEL` in `re-workflow-be/.env`, not Postman.\n\n## Limits\n- 15 MB per file; ZIP not allowed; max 20 files on bulk upload.\n\n## Reference\n- Repo: `re-workflow-be/docs/CPC-CDC.md`", + "description": "## Purpose\nCovers **all CPC-CSD HTTP APIs** used by the browser (Dashboard, History, reports) so Postman can replace manual UI testing once tokens and URLs are set.\n\n## Authentication\n1. Import **RE_Workflow_CPC_CDC_API** environment and select it.\n2. Paste JWT into **accessToken** (no `Bearer ` prefix).\n3. Run **01_Session_and_health → GET Auth me** — expect **200**. Then use **02_CPC_CSD_API** and onward.\n4. User must be **ADMIN** or listed in **CPC_CSD_ADMIN_CONFIG** viewer emails.\n\n## URL base\n- **apiRoot** = `{hostUrl}/api/v1` — all CPC-CSD requests in this collection use **`{apiRoot}/cpc-csd/...`** (canonical API; legacy **`{apiRoot}/cpc-cdc/...`** is the same).\n- The SPA may still call **`{hostUrl}/api/documents/...`** (legacy layout); behaviour is the same — see `docs/CPC-CDC.md` if you need those paths.\n- **Bare GCS staging** (no metadata): `POST {hostUrl}/api/upload` — single multipart field **`file`**.\n\n## Multipart (same as `Dashboard.jsx`)\n| Operation | Text fields | Files |\n|-----------|-------------|-------|\n| `POST .../v1/ocr/upload` | `claim_id`, `booking_id`, `booking_type` (CPC or CSD), `provider`, **`metadata_queue`** (JSON **string** of array) | **`files`** — repeat field name; order matches `metadata_queue` |\n| `POST .../v1/ocr/validate-upload` | `claim_id`, `booking_id`, `booking_type`, `document_type`, `provider`, **`msd_payload`** (JSON string), optional `skip_min_attachment_check=true` | **`file`** (single) |\n| `POST /api/upload` | — | **`file`** |\n\nEach `metadata_queue[]` item: `document_type`, `msd_payload` (object), `expected_field_keys` (unique list of keys to run rules on).\n\n## Metadata — business names vs JSON keys (`metadata_queue` / `msd_payload`)\nUse **these JSON property names** in each `msd_payload` and the same names in `expected_field_keys` (see env `metadataQueueJsonCsdPo`, `metadataQueueJsonCpcTwoFiles`). **Legacy keys** from older integrations are still accepted (`order_or_authorisation_number`, `invoice_value`, `govt_signatory_and_stamp_present`, `authorized_person_name`, `name`, `aadhaar_number`).\n\n### 1. CSD claim (1 document) — Purchase Order (PO)\n| Business name | JSON key | Rule |\n|----------------|----------|------|\n| Customer Name | `customer_name` | Accuracy between 90% – 100% |\n| PO Number | `po_number` | 100% accuracy required |\n| PO Amount | `po_amount` | Tolerance of ±5 rupees |\n| Signature & Stamp | `signature_and_stamp` | Binary check (Available / Not Available) |\n\n### 2. CPC claim (2 documents)\n**Document 1 — Authorization Letter**\n| Customer Name | `customer_name` | 90% – 100% |\n| Letter Number | `letter_number` | 90% – 100% |\n| Letter Amount | `letter_amount` | ±5 rupees |\n| Signature & Stamp | `signature_and_stamp` | Binary (Available / Not Available) |\n\n**Document 2 — Aadhaar card**\n| Customer Name | `customer_name` | 90% – 100% |\n| Aadhar Number | `aadhar_number` | 100% accuracy required |\n\n## Provider vs model\n- **ocrProvider** in env: pipeline mode (`GEMINI_VERTEX_DIRECT`, `GEMINI_VERTEX`, `RULES`).\n- **Gemini model id** (e.g. `gemini-2.0-flash-lite`) is **server** `GEMINI_MODEL` in `re-workflow-be/.env`, not Postman.\n\n## Limits\n- 15 MB per file; ZIP not allowed; max 20 files on bulk upload.\n\n## Dashboard SPA parity (pagination, booking search, rejected filter)\nThe **CPC-CSD Dashboard** in `re-workflow-fe/src/pages/CpcCdc/Dashboard.jsx` drives list behaviour against this API:\n\n| UI area | HTTP | Notes |\n|---------|------|-------|\n| Submissions table (paginated) | `GET {{apiRoot}}/cpc-csd/documents/recent` | **`page`** (1-based), **`limit`** (e.g. 10–50), **`sortBy`** (default `createdAt`), **`order`** (`asc` / `desc`). |\n| Search by booking / claim ID | Same | Query **`search`**: case-insensitive substring on **`booking_id`**, **`claim_id`**, **`document_type`**, and document **`id`** (recent-documents path sets `searchIncludeId`). |\n| “Rejected / mismatch” tab | Same | Query **`status=UNSUCCESSFUL`**: server expands to `MISMATCH`, `REJECTED`, `UNSUCCESSFUL`, `NEED_MANUAL` (see `appendCpcDocumentFilters` in `src/services/cpc-cdc/utils.ts`). |\n| “All submissions” tab | Same | Omit **`status`** or use **`ALL`** — no status filter. |\n| Summary stat cards | `GET .../documents/analytics` | Global counts: **`totalDocs`**, **`distribution`** (per `validation_status`), **`passRate`**, **`dailyVolume`**, **`topMismatchFields`**. |\n\n**List response shape** (200): `{ \"items\": [ /* CpcDocument rows + summary */ ], \"meta\": { \"total\": number, \"page\": number, \"limit\": number, \"pages\": number } }`. Legacy responses that are a bare array are still tolerated by some clients; prefer **`items` + `meta`**.\n\n**Pagination caveat:** The API paginates **document rows**. The UI may **group** rows by `(claim_id, attempt_no)`; a multi-file upload can theoretically span two pages if files sort apart — use a larger **`limit`** when testing full batches.\n\n## Reference\n- Repo: `re-workflow-be/docs/CPC-CDC.md`\n- Filter implementation: `re-workflow-be/src/services/cpc-cdc/utils.ts` (`appendCpcDocumentFilters`)\n- List + analytics handlers: `re-workflow-be/src/controllers/CpcCdcController.ts` (`getRecentDocuments`, `getAnalytics`)", "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json" }, "auth": { @@ -28,15 +28,16 @@ "item": [ { "name": "01_Session_and_health", - "description": "Verify connectivity and JWT before CPC calls.", + "description": "Run **before** any CPC-CSD call.\n\n1. **`GET Health`** — proves the process is listening (no token).\n2. **`GET Auth me`** — proves **`{{accessToken}}`** is valid for **`{{apiRoot}}`**.\n\nIf **401** on `auth/me`, refresh the JWT in environment variable **`accessToken`** (no `Bearer ` prefix — Postman adds it).", "item": [ { "name": "GET Health (no auth)", + "description": "Kubernetes / load-balancer style probe; does not touch the database.", "request": { "method": "GET", "header": [], "url": "{{hostUrl}}/health", - "description": "Public liveness. No Bearer.", + "description": "**Auth:** none.\n\n**200:** Plain JSON or text body indicating the Node process is up. **Use:** quick sanity check that **`{{hostUrl}}`** points at the correct host/port.", "auth": { "type": "noauth" } @@ -44,11 +45,12 @@ }, { "name": "GET Auth me", + "description": "Resolves the current user from the JWT used by the rest of the collection.", "request": { "method": "GET", "header": [], "url": "{{apiRoot}}/auth/me", - "description": "Confirms accessToken is accepted.", + "description": "**Auth:** Bearer `{{accessToken}}` (collection-level auth also applies).\n\n**200:** User profile (email, role, …) — exact shape from main RE Workflow auth module.\n\n**401:** Token missing, expired, or wrong signing key — update **`accessToken`**.", "auth": { "type": "bearer", "bearer": [ @@ -65,14 +67,16 @@ }, { "name": "02_CPC_CSD_API", - "description": "Canonical routes: `{{apiRoot}}/cpc-csd/...` plus bare `POST /api/upload`.", + "description": "**Canonical mount:** `{{apiRoot}}/cpc-csd/...` (legacy alias **`/cpc-cdc/...`** — same handlers). **Compat:** `{{hostUrl}}/api/documents/*` and uploads under `{{hostUrl}}/api/...` — see `docs/CPC-CDC.md`.\n\nThis folder is split into:\n- **GET Permissions** — feature gate / viewer check.\n- **02a_Dashboard_lists_and_filters** — analytics + paginated recent list + claim history (matches SPA dashboard behaviour).\n- **02b_Single_document** — fetch/update/delete one document by UUID.\n- **02c_Excel_reports** — per-claim and master exports.\n- **POST Bare file upload** — raw `file` → GCS URL (not under `cpc-csd`).", "item": [ { "name": "GET Permissions", + "description": "Returns which CPC-CSD capabilities the authenticated user may use (viewer vs admin). Call after **`GET Auth me`** if the UI gates routes.", "request": { "method": "GET", "header": [], "url": "{{apiRoot}}/cpc-csd/permissions", + "description": "**Auth:** Bearer `{{accessToken}}`.\n\n**200:** JSON permission flags used by the frontend route guard.", "auth": { "type": "bearer", "bearer": [ @@ -85,12 +89,18 @@ } } }, + { + "name": "02a_Dashboard_lists_and_filters", + "description": "Endpoints aligned with **`Dashboard.jsx`**:\n\n1. **`GET .../documents/analytics`** — global counters for summary cards (`totalDocs`, `distribution` keyed by `validation_status`, `passRate`, `dailyVolume`, `topMismatchFields`).\n2. **`GET .../documents/recent`** — paginated rows + `meta` for the submissions table; supports **`search`** (booking / claim / type / id) and **`status`** (e.g. **`UNSUCCESSFUL`** for rejected/mismatch tab).\n3. **`GET .../documents/history`** — all attempts for a single **`claimId`**.\n\nEnvironment keys: **`recentPage`**, **`recentLimit`**, **`recentSearch`**, **`recentStatus`**, **`recentType`**, **`recentSortBy`**, **`recentOrder`**.", + "item": [ { "name": "GET Documents analytics", + "description": "Aggregate metrics across **all** CPC documents visible to the user (not limited to the current list page). Used by the dashboard stat cards.", "request": { "method": "GET", "header": [], "url": "{{apiRoot}}/cpc-csd/documents/analytics", + "description": "**Auth:** Bearer `{{accessToken}}`.\n\n**200 — typical JSON keys:**\n| Field | Meaning |\n|-------|--------|\n| `totalDocs` | Count of documents in scope. |\n| `distribution` | Object map: `validation_status` → count (e.g. `MATCH`, `MISMATCH`, `PENDING`, …). |\n| `passRate` | Percentage derived from successful vs total. |\n| `topMismatchFields` | Ranked field names from `mismatch_reasons` on failed rows. |\n| `dailyVolume` | Upload counts per calendar day (recent window). |\n\n**Errors:** `500` with `error_code` / `error_message` on failure.", "auth": { "type": "bearer", "bearer": [ @@ -105,10 +115,12 @@ }, { "name": "GET Documents history by claim", + "description": "Timeline of uploads / attempts for one **`claim_id`** (same logical claim as used on multipart upload). Set env **`claimIdCpc`** (or CSD) to the claim you want to inspect.", "request": { "method": "GET", "header": [], "url": "{{apiRoot}}/cpc-csd/documents/history?claimId={{claimIdCpc}}", + "description": "**Query (required):**\n| Param | Example | Meaning |\n|-------|---------|--------|\n| `claimId` | `{{claimIdCpc}}` | Claim / booking family identifier (e.g. `CPC-POSTMAN-0001`). |\n\n**Auth:** Bearer `{{accessToken}}`.\n\n**200:** Grouped attempt structure used by the History UI.", "auth": { "type": "bearer", "bearer": [ @@ -122,12 +134,52 @@ } }, { - "name": "GET Documents recent (paginated)", + "name": "GET Documents recent (paginated + search + filters)", + "description": "Primary dashboard list. Combines **pagination**, optional **booking/claim search**, **validation status bucket**, and **document type** filter. Mirrors `GET {{hostUrl}}/api/documents/recent` on the legacy compat mount.", "request": { "method": "GET", "header": [], - "url": "{{apiRoot}}/cpc-csd/documents/recent?page={{recentPage}}&limit={{recentLimit}}&search={{recentSearch}}&status={{recentStatus}}&type={{recentType}}&sortBy={{recentSortBy}}&order={{recentOrder}}", - "description": "Optional: Test script saves first item id into **cpcDocumentId** for follow-up GET/PUT/DELETE.", + "url": { + "raw": "{{apiRoot}}/cpc-csd/documents/recent?page={{recentPage}}&limit={{recentLimit}}&search={{recentSearch}}&status={{recentStatus}}&type={{recentType}}&sortBy={{recentSortBy}}&order={{recentOrder}}", + "query": [ + { + "key": "page", + "value": "{{recentPage}}", + "description": "**1-based** page index. Increase to walk older rows." + }, + { + "key": "limit", + "value": "{{recentLimit}}", + "description": "Page size (documents per page). Dashboard uses 10–50." + }, + { + "key": "search", + "value": "{{recentSearch}}", + "description": "Optional. Case-insensitive `ILIKE` on **`booking_id`**, **`claim_id`**, **`document_type`**, and document **`id`** (UUID). Partial values OK (e.g. `CPC-114` or suffix of UUID). Leave empty in env to disable." + }, + { + "key": "status", + "value": "{{recentStatus}}", + "description": "Optional filter. **`ALL`** or empty = no filter. **`SUCCESSFUL`** → `MATCH` + `SUCCESSFUL` + `APPROVED`. **`UNSUCCESSFUL`** → `MISMATCH` + `REJECTED` + `UNSUCCESSFUL` + `NEED_MANUAL` (use this for “Rejected / mismatch” dashboard tab). Any other value is applied as exact `validation_status`." + }, + { + "key": "type", + "value": "{{recentType}}", + "description": "Optional document family: **`AADHAAR`**, **`CPC_AUTH`**, **`CSD_PO`**, **`RETAIL_INVOICE`**, **`ALL`**, or empty. Server maps friendly names to `document_type` patterns." + }, + { + "key": "sortBy", + "value": "{{recentSortBy}}", + "description": "One of: `id`, `bookingId`, `createdAt`, `documentType`, `validationStatus`, `claimId`, `matchPercentage`. Default in UI: `createdAt`." + }, + { + "key": "order", + "value": "{{recentOrder}}", + "description": "`asc` or `desc` (case-insensitive). Default newest-first: `desc`." + } + ] + }, + "description": "**Auth:** Bearer `{{accessToken}}`.\n\n**200 — body:**\n```json\n{\n \"items\": [ { /* Sequelize row + summary */ } ],\n \"meta\": {\n \"total\": 0,\n \"page\": 1,\n \"limit\": 15,\n \"pages\": 0\n }\n}\n```\n\n- **`items`**: each element includes `bookingId`, `claimId`, `documentType`, `validationStatus`, `matchPercentage`, `createdAt`, `summary`, etc.\n- **`meta.total`**: total rows matching filters (for pagination UI).\n\n**Test script:** on success, saves **`items[0].id`** to **`cpcDocumentId`** when present so **02b_Single_document** requests resolve immediately.", "auth": { "type": "bearer", "bearer": [ @@ -144,10 +196,19 @@ "listen": "test", "script": { "exec": [ + "pm.test('Recent: 200 JSON', function () {", + " pm.response.to.have.status(200);", + " pm.response.to.be.json;", + "});", "if (pm.response.code === 200) {", " try {", " const j = pm.response.json();", " const items = j.items || [];", + " if (j.meta) {", + " pm.test('Recent: meta has pagination fields', function () {", + " pm.expect(j.meta).to.include.keys('total', 'page', 'limit', 'pages');", + " });", + " }", " if (items.length && items[0].id != null) {", " pm.environment.set('cpcDocumentId', String(items[0].id));", " }", @@ -158,13 +219,21 @@ } } ] + } + ] }, + { + "name": "02b_Single_document", + "description": "Operations on **`{{cpcDocumentId}}`** (UUID primary key of `cpc_documents`).\n\n**Prerequisite:** set **`cpcDocumentId`** in the environment, or run **GET Documents recent** in folder **02a** so the test script populates it from the first row.", + "item": [ { "name": "GET Document by id", + "description": "Full document JSON including audit / field breakdown used on the review page.", "request": { "method": "GET", "header": [], "url": "{{apiRoot}}/cpc-csd/documents/{{cpcDocumentId}}", + "description": "**Path:** `:id` = UUID (**`{{cpcDocumentId}}`**).\n\n**200:** Document + related `auditLogs` / `field_results` shape per `getDocumentById`.\n\n**404:** `DOCUMENT_NOT_FOUND`.", "auth": { "type": "bearer", "bearer": [ @@ -179,10 +248,12 @@ }, { "name": "GET Document file binary", + "description": "Streams the **original upload** bytes (GCS download or local disk) with correct `Content-Type` for iframe / `` preview.", "request": { "method": "GET", "header": [], "url": "{{apiRoot}}/cpc-csd/documents/{{cpcDocumentId}}/file", + "description": "**200:** binary body.\n\n**404 / 502:** missing file or GCS read failure — see JSON error body.", "auth": { "type": "bearer", "bearer": [ @@ -197,6 +268,7 @@ }, { "name": "PUT Document status", + "description": "Manual adjudication: set validation status and optional remarks / corrected extracted fields.", "request": { "method": "PUT", "header": [ @@ -210,6 +282,7 @@ "raw": "{{putStatusBodyJson}}" }, "url": "{{apiRoot}}/cpc-csd/documents/{{cpcDocumentId}}/status", + "description": "**Body:** JSON from env **`putStatusBodyJson`** — typically `{ \"status\": \"APPROVED\", \"remarks\": \"...\", \"correctedFields\": { ... } }` (exact keys per API / controller).\n\n**Auth:** Bearer `{{accessToken}}`.", "auth": { "type": "bearer", "bearer": [ @@ -224,10 +297,12 @@ }, { "name": "DELETE Document", + "description": "Hard-delete the `cpc_documents` row (use only in test / admin workflows).", "request": { "method": "DELETE", "header": [], "url": "{{apiRoot}}/cpc-csd/documents/{{cpcDocumentId}}", + "description": "**204 / 200** depending on implementation — confirm in controller. **404** if id unknown.", "auth": { "type": "bearer", "bearer": [ @@ -239,13 +314,21 @@ ] } } + } + ] }, + { + "name": "02c_Excel_reports", + "description": "Download **`.xlsx`** reports. Two URL families exist on the mount: full **`/v1/ocr/report/...`** and shorter **`/report/...`** aliases.\n\n**Per-claim:** replace **`{{claimIdCpc}}`** in the path; optional **`reportAttemptQuery`** (e.g. `?attempt=2`).\n\n**Master:** optional filters **`search`**, **`status`**, **`type`** via env **`masterReport*`**.", + "item": [ { "name": "GET Report Excel per claim", + "description": "Primary per-claim export path (matches legacy **`/api/v1/ocr/report/:claimId/download`**).", "request": { "method": "GET", "header": [], "url": "{{apiRoot}}/cpc-csd/v1/ocr/report/{{claimIdCpc}}/download{{reportAttemptQuery}}", + "description": "**Path:** `claimId` = logical claim (e.g. **`{{claimIdCpc}}`**).\n\n**Query:** append **`{{reportAttemptQuery}}`** — empty string or `?attempt=2`.\n\n**200:** Excel binary — save response to file in Postman.", "auth": { "type": "bearer", "bearer": [ @@ -260,10 +343,12 @@ }, { "name": "GET Report Excel master", + "description": "All claims (within permission scope) with optional **`search`**, **`status`**, **`type`** filters.", "request": { "method": "GET", "header": [], "url": "{{apiRoot}}/cpc-csd/v1/ocr/report/all/download?search={{masterReportSearch}}&status={{masterReportStatus}}&type={{masterReportType}}", + "description": "**Query (all optional):** `search`, `status`, `type` — driven by env **`masterReportSearch`**, **`masterReportStatus`**, **`masterReportType`**.", "auth": { "type": "bearer", "bearer": [ @@ -278,10 +363,12 @@ }, { "name": "GET Report per claim (alt path)", + "description": "Shorter alias on the **`cpc-csd`** router (no `/v1/ocr` segment).", "request": { "method": "GET", "header": [], "url": "{{apiRoot}}/cpc-csd/report/{{claimIdCpc}}/download{{reportAttemptQuery}}", + "description": "Same semantics as **GET Report Excel per claim** — different URL shape only.", "auth": { "type": "bearer", "bearer": [ @@ -296,10 +383,12 @@ }, { "name": "GET Report master (alt path)", + "description": "Shorter master export; add query string manually if you need the same filters as the `/v1/ocr/report/all/download` variant.", "request": { "method": "GET", "header": [], "url": "{{apiRoot}}/cpc-csd/report/all/download", + "description": "**Note:** This sample URL has **no** query params; extend in Postman Params tab with `search`, `status`, `type` when needed.", "auth": { "type": "bearer", "bearer": [ @@ -311,9 +400,12 @@ ] } } + } + ] }, { "name": "POST Bare file upload (GCS)", + "description": "Minimal upload: **no** `claim_id`, **no** validation pipeline — stores bytes to GCS and returns a URL. For full CPC validation use **`POST .../v1/ocr/upload`** in folders **03–07**.", "request": { "method": "POST", "header": [], @@ -323,12 +415,13 @@ { "key": "file", "type": "file", - "src": [] + "src": [], + "description": "Single multipart part named **`file`** (required)." } ] }, "url": "{{hostUrl}}/api/upload", - "description": "Returns `{ gcsUrl }`. Same as compat route; not under /cpc-csd prefix.", + "description": "**Auth:** Bearer `{{accessToken}}`.\n\n**200:** JSON including **`gcsUrl`** (or equivalent) pointing at the uploaded object.\n\n**Note:** Path is **`{{hostUrl}}/api/upload`** — not under **`/cpc-csd`**. Documented in `docs/CPC-CDC.md` as compat surface.", "auth": { "type": "bearer", "bearer": [ diff --git a/RE-CPC-CDC_POSTMAN_COLLECTION/RE_Workflow_CPC_CDC_API.postman_environment.json b/RE-CPC-CDC_POSTMAN_COLLECTION/RE_Workflow_CPC_CDC_API.postman_environment.json index 8fd58e1..56a1b03 100644 --- a/RE-CPC-CDC_POSTMAN_COLLECTION/RE_Workflow_CPC_CDC_API.postman_environment.json +++ b/RE-CPC-CDC_POSTMAN_COLLECTION/RE_Workflow_CPC_CDC_API.postman_environment.json @@ -84,49 +84,49 @@ "value": "1", "type": "default", "enabled": true, - "description": "GET documents/recent — page (1-based)." + "description": "`GET .../documents/recent` — **page** (integer, **1-based**). Increment to fetch the next page; reset to `1` when you change `recentSearch`, `recentStatus`, or `recentType`." }, { "key": "recentLimit", - "value": "30", + "value": "15", "type": "default", "enabled": true, - "description": "GET documents/recent — page size (max sensible for UI parity)." + "description": "`GET .../documents/recent` — **limit** (page size, number of **document rows** per page). The SPA dashboard offers 10 / 15 / 30 / 50. Larger pages reduce the chance a multi-file CPC batch is split across pages." }, { "key": "recentSearch", "value": "", "type": "default", "enabled": true, - "description": "Optional: filter by booking/claim/type text and id (when API supports searchIncludeId)." + "description": "Optional **`search`** query: case-insensitive substring on **`booking_id`**, **`claim_id`**, **`document_type`**, and document **`id`** (UUID). Examples: `CPC-114`, `POSTMAN`, part of a UUID. Leave **empty** to list without text filter (matches Dashboard debounced booking search)." }, { "key": "recentStatus", "value": "", "type": "default", "enabled": true, - "description": "Leave empty for no filter. Set SUCCESSFUL or UNSUCCESSFUL to match History page filters (backend maps to validation_status sets)." + "description": "Optional **`status`** filter. **Empty** or omit in URL = all statuses.\n\n| Value | Server behaviour |\n|-------|------------------|\n| *(empty)* | No status filter — “All submissions”. |\n| `SUCCESSFUL` | `MATCH`, `SUCCESSFUL`, `APPROVED`. |\n| `UNSUCCESSFUL` | `MISMATCH`, `REJECTED`, `UNSUCCESSFUL`, `NEED_MANUAL` — use for **“Rejected / mismatch”** tab parity. |\n| `ALL` | Explicit no-op filter. |\n| Any other string | Treated as exact **`validation_status`** value. |\n\nImplementation: `appendCpcDocumentFilters` in `re-workflow-be/src/services/cpc-cdc/utils.ts`." }, { "key": "recentType", "value": "", "type": "default", "enabled": true, - "description": "Leave empty for no filter. Else: AADHAAR | CPC_AUTH | CSD_PO | RETAIL_INVOICE | AUTHORITY_LETTER (see appendCpcDocumentFilters)." + "description": "Optional **`type`** (document family). **Empty** = all types.\n\nSupported tokens include **`AADHAAR`**, **`CPC_AUTH`**, **`CSD_PO`**, **`RETAIL_INVOICE`**, **`ALL`** — server maps to `document_type` `ILIKE` patterns (see same `appendCpcDocumentFilters`)." }, { "key": "recentSortBy", "value": "createdAt", "type": "default", "enabled": true, - "description": "Sort field: id | bookingId | createdAt | documentType | validationStatus | claimId | matchPercentage." + "description": "`sortBy` query — must be one of: **`id`**, **`bookingId`**, **`createdAt`**, **`documentType`**, **`validationStatus`**, **`claimId`**, **`matchPercentage`**. Invalid values fall back to **`createdAt`** in the controller." }, { "key": "recentOrder", - "value": "DESC", + "value": "desc", "type": "default", "enabled": true, - "description": "ASC or DESC." + "description": "`order` query — **`asc`** or **`desc`** (case-insensitive). **`desc`** = newest first (dashboard default)." }, { "key": "masterReportSearch", @@ -214,6 +214,6 @@ } ], "_postman_variable_scope": "environment", - "_postman_exported_at": "2026-04-15T12:00:00.000Z", + "_postman_exported_at": "2026-04-20T12:00:00.000Z", "_postman_exported_using": "RE Workflow CPC-CSD bundle" } diff --git a/src/controllers/CpcCdcController.ts b/src/controllers/CpcCdcController.ts index 1f2a307..5430169 100644 --- a/src/controllers/CpcCdcController.ts +++ b/src/controllers/CpcCdcController.ts @@ -10,7 +10,15 @@ import { CpcHistoryService } from '@services/cpc-cdc/CpcHistoryService'; import { CpcRuleExtractService } from '@services/cpc-cdc/CpcRuleExtractService'; import { cpcGcsService } from '@services/cpc-cdc/CpcGcsService'; import { extractPdfTextFromBuffer } from '@services/cpc-cdc/extractPdfText'; -import { appendCpcDocumentFilters, cpcWhereFromAndParts } from '@services/cpc-cdc/utils'; +import { + appendCpcDocumentFilters, + canonicalizeMoneyFieldKeysInRecord, + canonicalizeRuleFieldKey, + cpcWhereFromAndParts, + isMoneyFieldKey, + sanitizeMoneyValuesInRecord, + sanitizePersonNameFieldsInRecord +} from '@services/cpc-cdc/utils'; import { gcsStorageService } from '@services/gcsStorage.service'; import logger from '@utils/logger'; @@ -211,6 +219,27 @@ export class CpcCdcController { }]; } + queue = queue.map((entry) => { + const rawMsd = (entry.msd_payload || {}) as Record; + const msd_payload = sanitizeMoneyValuesInRecord(canonicalizeMoneyFieldKeysInRecord(rawMsd)); + const rawKeys = (entry as { expected_field_keys?: unknown }).expected_field_keys; + const out: Record = { ...entry, msd_payload }; + if (Array.isArray(rawKeys)) { + out.expected_field_keys = [ + ...new Set( + (rawKeys as unknown[]) + .map((k) => { + const s = String(k ?? '').trim(); + if (!s) return ''; + return isMoneyFieldKey(s) ? canonicalizeRuleFieldKey(s) : s; + }) + .filter(Boolean) + ) + ]; + } + return out; + }); + const results: any[] = []; const ipAddress = req.ip || req.headers['x-forwarded-for'] || req.socket.remoteAddress; // Production: real Vertex/Gemini only unless CPC_ALLOW_DEGRADED_SAVE_WITHOUT_AI=true. @@ -436,6 +465,19 @@ export class CpcCdcController { } } + Object.assign( + extracted, + sanitizePersonNameFieldsInRecord({ ...extracted } as Record) + ); + Object.assign( + extracted, + canonicalizeMoneyFieldKeysInRecord({ ...extracted } as Record) + ); + Object.assign( + extracted, + sanitizeMoneyValuesInRecord({ ...extracted } as Record) + ); + // 3. Validation const v = CpcValidationService.validateSrs( expectedPayload, @@ -586,16 +628,23 @@ export class CpcCdcController { */ async getRecentDocuments(req: Request, res: Response) { try { - const { search, status, type, limit = 50, page, sortBy, order } = req.query; - const take = parseInt(limit as string); - const pageNum = parseInt(page as string || '1'); + const { search, status, type, limit, page, sortBy, order } = req.query; + const qFirst = (v: unknown): string => { + if (v == null) return ''; + if (Array.isArray(v)) return v[0] != null ? String(v[0]) : ''; + return String(v); + }; + const takeRaw = parseInt(qFirst(limit) || '50', 10); + const take = Number.isFinite(takeRaw) && takeRaw > 0 ? Math.min(200, takeRaw) : 50; + const pageRaw = parseInt(qFirst(page) || '1', 10); + const pageNum = Number.isFinite(pageRaw) && pageRaw > 0 ? pageRaw : 1; const skip = (pageNum - 1) * take; const andParts: Record[] = []; appendCpcDocumentFilters(andParts, { - type: type as string, - status: status as string, - search: search as string, + type: qFirst(type) || (type as string), + status: qFirst(status) || (status as string), + search: qFirst(search) || (search as string), searchIncludeId: true }); const where = cpcWhereFromAndParts(andParts); @@ -620,13 +669,15 @@ export class CpcCdcController { }); + const pages = count === 0 ? 1 : Math.ceil(count / take); + return res.json({ items: enriched, meta: { total: count, page: pageNum, limit: take, - pages: Math.ceil(count / take) + pages } }); } catch (error: any) { @@ -831,56 +882,15 @@ export class CpcCdcController { } /** - * Manually override validation status + * Manual validation override (edit / approve / reject) — disabled; status comes from pipeline only. */ - async updateDocumentStatus(req: Request, res: Response) { - try { - const { id } = req.params; - const { status, remarks, correctedFields } = req.body; - - const document = await CpcDocument.findByPk(id); - if (!document) { - return res.status(404).json({ - error_code: 'DOCUMENT_NOT_FOUND', - error_message: 'Document not found', - retryable: false - }); - } - - const previousStatus = document.validationStatus; - - await document.update({ - validationStatus: status, - extractedFields: correctedFields || document.extractedFields, - mismatchReasons: remarks ? [{ field: 'MANUAL_REVIEW', expected: '-', actual: remarks }] : document.mismatchReasons - }); - - const statusRequestId = String(req.headers['x-request-id'] || randomUUID()); - const statusClientId = String(req.headers['x-client-id'] || (req as any).user?.email || 'unknown'); - - await CpcAuditLog.create({ - documentId: id, - action: 'STATUS_UPDATED', - performedBy: statusClientId, - previousState: { status: previousStatus }, - newState: { - status, - request_id: statusRequestId, - client_id: statusClientId, - timestamp: new Date().toISOString() - }, - remarks: remarks || `Status manual update to ${status}` - }); - - return res.json(document); - } catch (error: any) { - logger.error("[CpcController] updateDocumentStatus Error:", error); - return res.status(500).json({ - error_code: 'INTERNAL_SERVER_ERROR', - error_message: 'Failed to update status', - retryable: true - }); - } + async updateDocumentStatus(_req: Request, res: Response) { + return res.status(403).json({ + error_code: 'MANUAL_DOCUMENT_ACTIONS_DISABLED', + error_message: + 'Manual document status updates and corrected-field edits are not available for CPC/CSD documents.', + retryable: false + }); } /** diff --git a/src/services/cpc-cdc/CpcRuleExtractService.ts b/src/services/cpc-cdc/CpcRuleExtractService.ts index f3f90fb..042a339 100644 --- a/src/services/cpc-cdc/CpcRuleExtractService.ts +++ b/src/services/cpc-cdc/CpcRuleExtractService.ts @@ -1,4 +1,4 @@ -import { calculateMatch } from './utils'; +import { calculateMatch, normalizePersonNameExtract } from './utils'; export type RuleExtractHints = { /** MSD fields typed in UI — used to find the same text inside the PDF (no "Name:" label needed). */ @@ -303,6 +303,10 @@ export class CpcRuleExtractService { if (isCsdPo && displayName) { displayName = CpcRuleExtractService.refineCsdPoCustomerName(t, displayName) ?? displayName; } + if (displayName) { + const n = normalizePersonNameExtract(displayName); + if (n) displayName = n; + } // PAN (Indian format) + MSD hint (PDF may lack strict word boundaries) let panFromRegex = t.match(/\b([A-Z]{5}[0-9]{4}[A-Z])\b/i); diff --git a/src/services/cpc-cdc/CpcValidationService.ts b/src/services/cpc-cdc/CpcValidationService.ts index b22ddb9..de77193 100644 --- a/src/services/cpc-cdc/CpcValidationService.ts +++ b/src/services/cpc-cdc/CpcValidationService.ts @@ -1,7 +1,14 @@ import fs from 'fs'; import path from 'path'; import { VertexAI } from '@google-cloud/vertexai'; -import { calculateMatch, digitsOnly, normalizeMoney } from './utils'; +import { + calculateMatch, + canonicalizeRuleFieldKey, + digitsOnly, + isPersonalHolderNameField, + normalizeMoney, + normalizePersonNameExtract +} from './utils'; import { getCriteriaLabel } from './CpcHistoryService'; import logger from '@utils/logger'; @@ -177,7 +184,7 @@ function buildMsdStyleMessage(fieldKey: string, status: string, docType?: string } function pickRuleForKey(rules: Record, key: string): string { - const k = key.toLowerCase(); + const k = canonicalizeRuleFieldKey(key).toLowerCase(); const candidates = Object.keys(rules) .filter((rk) => rk !== 'default') .sort((a, b) => b.length - a.length); @@ -348,10 +355,19 @@ export class CpcValidationService { continue; } - const expected = rawExpected; - const found = findNormalizedValue(extractedFields, key); + let expected = rawExpected; + let found = findNormalizedValue(extractedFields, key); const confidence = fieldConfidence[key] || 0; + if (isPersonalHolderNameField(key)) { + const en = normalizePersonNameExtract(String(expected ?? '')); + if (en) expected = en as typeof rawExpected; + if (found !== undefined && found !== null) { + const fn = normalizePersonNameExtract(String(found)); + if (fn) found = fn as typeof found; + } + } + const ruleKey = pickRuleForKey(rules as Record, key); const rule = rules[ruleKey] || rules.default || DOCUMENT_RULES.GENERIC.default; @@ -743,6 +759,8 @@ ${scriptPrefBlock} BILINGUAL_FORMS: Indian CPC/CSD forms often print the same label in English and Hindi. For each key in MSD_SCRIPT_PREFERENCE (if present), the MSD value shows which language the user entered — prefer_script is Devanagari (Hindi script) vs Latin (English). When both languages appear for that field on the image/PDF, copy the value whose script matches prefer_script. When only one script is visible, extract that visible value. Never return the other language if both are printed and MSD is clearly single-script. Numeric-only fields (amounts, IDs): use digits as printed; script rule applies mainly to name and free-text fields. For Aadhaar: customer_name (holder name), aadhar_number (12 digits, no spaces preferred), optional dob (DDMMYYYY), gender, address. You may also populate legacy keys name and aadhaar_number if visible. +NAME_LINE_VS_MSD: When the printed name includes a relation suffix (S/O, D/O, W/O, C/O, Son of, …) after the holder's name, if REFERENCE_VALUES show the same person's name without that suffix, return only that shorter holder name for customer_name / name / authorized_person_name (do not append the S/O clause). +HOLDER_NAME_NO_TITLES: For customer_name, name, and authorized_person_name only — return the person's given name tokens as printed (Latin or Devanagari per script rules). Do NOT include salutations or ranks (Mr, Mrs, Ms, Dr, Prof, Sir, Shri, Smt, Kumari, Lt, Captain, Major, Colonel, General, Admiral, Wing Commander, Group Captain, etc.). Do NOT include relation lines (S/O, D/O, W/O, C/O, Son of, …) or father's name after the holder name; only the holder's own name span. CRITICAL: For 'address', extract ONLY the physical location details. ${isCsdPo ? `For CSD Purchase Order: extract po_number (PO reference — exact text), po_amount (digits only, rupees), vendor_name (supplier/dealer company from letterhead or From/Supplier block), customer_name (the human buyer / beneficiary — NOT the dealer company name), invoice_date, signature_and_stamp as yes/no (official stamp or authorized signatory visible). Legacy keys order_or_authorisation_number, invoice_value, govt_signatory_and_stamp_present may be filled with the same values if present. diff --git a/src/services/cpc-cdc/utils.ts b/src/services/cpc-cdc/utils.ts index 39391d2..59f1e72 100644 --- a/src/services/cpc-cdc/utils.ts +++ b/src/services/cpc-cdc/utils.ts @@ -1,5 +1,5 @@ import stringSimilarity from 'string-similarity'; -import { Op } from 'sequelize'; +import { Op, cast, col, where as sqlWhere } from 'sequelize'; /** Shared list/report filters for CPC documents (parity with legacy CPC-CSD). */ export function appendCpcDocumentFilters( @@ -68,14 +68,17 @@ export function appendCpcDocumentFilters( } } - if (search) { - const orClause: Record[] = [ - { bookingId: { [Op.iLike]: `%${search}%` } }, - { claimId: { [Op.iLike]: `%${search}%` } }, - { documentType: { [Op.iLike]: `%${search}%` } } + const q = String(search ?? '').trim(); + if (q) { + const pattern = `%${q}%`; + const orClause: object[] = [ + { bookingId: { [Op.iLike]: pattern } }, + { claimId: { [Op.iLike]: pattern } }, + { documentType: { [Op.iLike]: pattern } } ]; if (searchIncludeId) { - orClause.unshift({ id: { [Op.iLike]: `%${search}%` } }); + // Postgres: `uuid ILIKE '…'` is invalid — cast so id substring search works and does not break the whole OR. + orClause.unshift(sqlWhere(cast(col('id'), 'TEXT'), { [Op.iLike]: pattern })); } andParts.push({ [Op.or]: orClause }); } @@ -97,6 +100,170 @@ export function normalizeMoney(str: string | null | undefined): string { return String(Math.round(num)); } +/** Compact key for rule lookup / money detection (spaces, hyphens, underscores removed). */ +export function compactFieldKey(rawKey: string): string { + return String(rawKey || '') + .trim() + .toLowerCase() + .replace(/[\s_-]+/g, ''); +} + +/** + * True for MSD/extraction keys that represent rupee amounts (commas / Indian grouping should be ignored). + */ +export function isMoneyFieldKey(rawKey: string): boolean { + const k = compactFieldKey(rawKey); + if (!k) return false; + if (k.includes('amount')) return true; + if (k.includes('invoicevalue')) return true; + if (k.includes('totalvalue')) return true; + if (k.includes('taxamount')) return true; + return false; +} + +/** + * Lowercase + spaces/hyphens → underscores for all keys; compact camelCase aliases **only for money keys** + * (e.g. poAmount / Po Amount → po_amount). Non-money keys are unchanged except whitespace normalization. + */ +export function canonicalizeRuleFieldKey(rawKey: string): string { + const k = String(rawKey || '') + .trim() + .toLowerCase() + .replace(/[\s-]+/g, '_'); + if (!isMoneyFieldKey(k) && !isMoneyFieldKey(rawKey)) { + return k; + } + const compact = k.replace(/_/g, ''); + const amountAliases: Record = { + poamount: 'po_amount', + letteramount: 'letter_amount', + invoicevalue: 'invoice_value', + taxamount: 'tax_amount', + totalamount: 'total_amount' + }; + if (amountAliases[compact]) return amountAliases[compact]; + return k; +} + +/** Rename payload keys so money fields use canonical snake_case (e.g. poAmount → po_amount). Non-money keys untouched. */ +export function canonicalizeMoneyFieldKeysInRecord(obj: Record | null | undefined): Record { + if (!obj || typeof obj !== 'object' || Array.isArray(obj)) return (obj || {}) as Record; + const out = { ...obj }; + for (const key of [...Object.keys(out)]) { + if (!isMoneyFieldKey(key)) continue; + const nk = canonicalizeRuleFieldKey(key); + if (nk === key) continue; + const v = out[key]; + delete out[key]; + if (out[nk] === undefined) out[nk] = v; + } + return out; +} + +/** Normalize money-type values to plain digit strings (no commas) for MSD / extracted payloads. */ +export function sanitizeMoneyValuesInRecord(obj: Record | null | undefined): Record { + if (!obj || typeof obj !== 'object' || Array.isArray(obj)) return (obj || {}) as Record; + const out: Record = { ...obj }; + for (const key of Object.keys(out)) { + if (!isMoneyFieldKey(key)) continue; + const v = out[key]; + if (v === null || v === undefined) continue; + const s = String(v).trim(); + if (!s) continue; + const nm = normalizeMoney(s); + if (nm !== '') out[key] = nm; + } + return out; +} + +/** + * Strip trailing relation / father-name suffix (S/O, W/O, …) so "Arjun Mehar S/O Radheshyam Mehar" → "Arjun Mehar". + */ +export function trimPatronymicSuffixFromName(s: string | null | undefined): string { + let t = cleanText(s); + if (!t) return ''; + const re = /\b(?:s\/o|w\/o|d\/o|c\/o|son\s+of|daughter\s+of|wife\s+of|husband\s+of|care\s+of)\b/i; + const parts = t.split(re); + t = (parts[0] ?? t).trim(); + t = t.split(/[,;]/)[0]?.trim() ?? t; + return cleanText(t); +} + +/** Multi-word military / rank prefixes at the start of a name line (longest first). */ +const MULTI_TITLE_PREFIX_RES: RegExp[] = [ + /^air\s+vice\s+marshal\s+/i, + /^air\s+commodore\s+/i, + /^vice\s+admiral\s+/i, + /^rear\s+admiral\s+/i, + /^group\s+captain\s+/i, + /^wing\s+commander\s+/i, + /^sqn\s+ldr\.?\s+/i, + /^flying\s+officer\s+/i, + /^fg\s+offr\.?\s+/i +]; + +/** Single-token salutations / ranks at the start (repeat until none). */ +const SINGLE_TITLE_PREFIX_RE = + /^(?:mr|mrs|ms|miss|dr\.?|doctor|prof\.?|sir|madam|shri|smt\.?|smti\.?|kumari|kum\.?|lt\.?|lieut\.?|lieutenant|leftenant|capt\.?|captain|maj\.?|major|col\.?|colonel|brig\.?|brigadier|gen\.?|general|cmdr|commander|cmde|commodore|adm\.?|admiral|hon\.?|honorable|honourable|retd\.?|svc)\s+/i; + +function stripLeadingSalutationsAndTitles(s: string): string { + let t = cleanText(s); + for (let guard = 0; guard < 24; guard++) { + let removed = false; + for (const re of MULTI_TITLE_PREFIX_RES) { + if (re.test(t)) { + t = t.replace(re, '').trim(); + removed = true; + break; + } + } + if (removed) continue; + if (SINGLE_TITLE_PREFIX_RE.test(t)) { + t = t.replace(SINGLE_TITLE_PREFIX_RE, '').trim(); + continue; + } + break; + } + return t; +} + +/** + * Holder-style person name for extraction / compare: no leading Dr./military rank tokens, no S/O-style suffixes. + */ +export function normalizePersonNameExtract(s: string | null | undefined): string { + if (s == null || !String(s).trim()) return ''; + let t = stripLeadingSalutationsAndTitles(String(s)); + t = trimPatronymicSuffixFromName(t); + return cleanText(t); +} + +/** Strip salutations / relation clutter from holder name fields on an extracted / payload object. */ +export function sanitizePersonNameFieldsInRecord(obj: Record | null | undefined): Record { + if (!obj || typeof obj !== 'object' || Array.isArray(obj)) return (obj || {}) as Record; + const out = { ...obj }; + for (const key of Object.keys(out)) { + if (!isPersonalHolderNameField(key)) continue; + const v = out[key]; + if (v === null || v === undefined) continue; + const n = normalizePersonNameExtract(String(v)); + if (n) out[key] = n; + } + return out; +} + +/** Customer / holder person name fields (not supplier, grantor, or company). */ +export function isPersonalHolderNameField(rawKey: string): boolean { + const k = compactFieldKey(rawKey); + if (!k) return false; + if (/(vendor|grantor|supplier|dealer|company|business)/.test(k)) return false; + return ( + k === 'name' || + k === 'customername' || + k === 'authorizedpersonname' || + k === 'accountholdername' + ); +} + export function cleanText(str: string | null | undefined): string { return String(str || "").trim().replace(/\s+/g, " "); } @@ -172,6 +339,45 @@ export function calculateMatch(expected: string, found: string, key: string = "" expStr = cleanAddress(expStr).toLowerCase(); } + // 2a. Personal name (MSD): document may print "Arjun Mehar S/O Radheshyam Mehar" while MSD is "Arjun Mehar". + // Strip S/O-style suffixes from the document side, then pass if the full MSD phrase appears as a whole phrase. + if (isPersonalHolderNameField(lowerKey)) { + const expTrim = trimPatronymicSuffixFromName(expStr).toLowerCase().replace(/\s+/g, ' ').trim(); + const fndTrim = trimPatronymicSuffixFromName(fndStr).toLowerCase().replace(/\s+/g, ' ').trim(); + if (expTrim.length >= 2 && fndTrim.length >= 2) { + const phraseOk = (hay: string, needle: string) => { + if (hay === needle) return true; + if (hay.startsWith(needle)) { + if (hay.length === needle.length) return true; + const next = hay.charAt(needle.length); + return /\s|[,;/]/.test(next); + } + const esc = needle.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + return new RegExp(`(^|\\s)${esc}(\\s|$)`).test(hay); + }; + if (expTrim.length >= 3 && phraseOk(fndTrim, expTrim)) { + return 100; + } + expStr = expTrim; + fndStr = fndTrim; + } + } + + // 2b. Money: ignore commas, ₹, spaces — compare numeric rupees (aligns browser vs API + Gemini "1,93,533") + if (isMoneyFieldKey(lowerKey)) { + const expM = normalizeMoney(expStr); + const fndM = normalizeMoney(fndStr); + if (expM && fndM && expM === fndM) return 100; + const a = expM ? Number(expM) : NaN; + const b = fndM ? Number(fndM) : NaN; + if (!Number.isNaN(a) && !Number.isNaN(b)) { + if (Math.abs(a - b) <= 5) return 100; + const maxv = Math.max(Math.abs(a), Math.abs(b), 1); + const pct = Math.round(100 - Math.min(100, (Math.abs(a - b) / maxv) * 100)); + return Math.max(0, pct); + } + } + // 3. Exact match if (expStr === fndStr) return 100;