From dc39677783484cf4be547c92e04394df15818c39 Mon Sep 17 00:00:00 2001 From: rohitgir-879 Date: Thu, 12 Jun 2025 00:19:44 +0530 Subject: [PATCH] v1.0.0-rc --- .DS_Store | Bin 0 -> 10244 bytes .env.example | 29 + .gitignore | 8 + CHANGES.md | 229 + Jenkinsfile | 146 + __pycache__/model_manager.cpython-312.pyc | Bin 0 -> 7973 bytes chat.py | 2269 +++++ docs/API.md | 104 + model_manager.py | 139 + nodemon.json | 6 + package-lock.json | 8309 +++++++++++++++++ package.json | 65 + public/images/email-banner.png | Bin 0 -> 127537 bytes readme.md | 200 + requirements.txt | 32 + scripts/setup.js | 34 + src/app.js | 184 + src/config/database.js | 88 + src/config/emailConfig.js | 17 + src/config/env.js | 54 + src/config/index.js | 79 + src/config/initDatabase.js | 308 + src/controllers/.DS_Store | Bin 0 -> 6148 bytes src/controllers/analysisController.js | 99 + src/controllers/appUserController.js | 1020 ++ src/controllers/authController.js | 99 + src/controllers/documentsController.js | 437 + src/controllers/exceldataController.js | 30 + src/controllers/feedbacksController.js | 119 + src/controllers/hospitalController.js | 315 + src/controllers/onboardingController.js | 59 + src/controllers/roleController.js | 10 + src/controllers/superAdminController.js | 148 + src/controllers/userController.js | 260 + src/middlewares/authMiddleware.js | 187 + src/middlewares/errorHandler.js | 73 + src/middlewares/security.js | 114 + src/middlewares/uploadsMiddleware.js | 52 + src/middlewares/validateRequest.js | 34 + src/migrations/createMigration.js | 46 + src/migrations/migrationRunner.js | 157 + ...000016_add_is_liked_to_interaction_logs.js | 45 + ...c_signup_enabled_to_hospitals_app_users.js | 44 + ...assword_to_hospitals_and_hospital_users.js | 79 + ...726120000_modify_feedback_table_columns.js | 77 + .../migrations/app_users_pin_otp_setup.js | 67 + .../migrations/city_to_hospital_users.js | 44 + .../migrations/delete_app_user_keep_data.js | 43 + .../feedback_add_boolean_for_forwarded.js | 47 + src/migrations/migrations/sessions.js | 36 + .../super-admins-temporary-password.js | 43 + .../migrations/user_sessions_delete.js | 35 + src/migrations/runMigrations.js | 26 + src/migrations/template.js | 37 + src/routes/analysis.js | 14 + src/routes/appUsers.js | 163 + src/routes/auth.js | 16 + src/routes/documents.js | 66 + src/routes/exceldata.js | 13 + src/routes/feedbacks.js | 25 + src/routes/hospitals.js | 158 + src/routes/onboarding.js | 16 + src/routes/roles.js | 8 + src/routes/superAdmins.js | 61 + src/routes/users.js | 78 + src/schema | 520 ++ src/services/analysisService.js | 302 + src/services/appUserService.js | 161 + src/services/authService.js | 220 + src/services/cronJobs.js | 167 + src/services/exceldataService.js | 125 + src/services/feedbacksService.js | 351 + src/services/hospitalService.js | 841 ++ src/services/nlpqamapper.js | 299 + src/services/onboardingService.js | 146 + src/services/roleService.js | 5 + src/services/secondaryWebsocket.js | 366 + src/services/superAdminService.js | 335 + src/services/tokenService.js | 42 + src/services/userService.js | 631 ++ src/services/webSocket.js | 308 + src/templates/passwordResetEmail.js | 169 + src/templates/welcomeEmail.js | 87 + src/utils/asyncHandler.js | 10 + src/utils/encryption.js | 113 + src/utils/errors.js | 54 + src/utils/fix_refresh_token.js | 43 + src/utils/logger.js | 49 + src/utils/monitoring.js | 210 + src/utils/responseHandler.js | 34 + src/utils/validator.js | 117 + src/validators/hospitalValidator.js | 31 + tests/integration/README.md | 17 + tests/unit/README.md | 18 + 94 files changed, 22571 insertions(+) create mode 100644 .DS_Store create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 CHANGES.md create mode 100644 Jenkinsfile create mode 100644 __pycache__/model_manager.cpython-312.pyc create mode 100644 chat.py create mode 100644 docs/API.md create mode 100644 model_manager.py create mode 100644 nodemon.json create mode 100644 package-lock.json create mode 100644 package.json create mode 100644 public/images/email-banner.png create mode 100644 readme.md create mode 100644 requirements.txt create mode 100644 scripts/setup.js create mode 100644 src/app.js create mode 100644 src/config/database.js create mode 100644 src/config/emailConfig.js create mode 100644 src/config/env.js create mode 100644 src/config/index.js create mode 100644 src/config/initDatabase.js create mode 100644 src/controllers/.DS_Store create mode 100644 src/controllers/analysisController.js create mode 100644 src/controllers/appUserController.js create mode 100644 src/controllers/authController.js create mode 100644 src/controllers/documentsController.js create mode 100644 src/controllers/exceldataController.js create mode 100644 src/controllers/feedbacksController.js create mode 100644 src/controllers/hospitalController.js create mode 100644 src/controllers/onboardingController.js create mode 100644 src/controllers/roleController.js create mode 100644 src/controllers/superAdminController.js create mode 100644 src/controllers/userController.js create mode 100644 src/middlewares/authMiddleware.js create mode 100644 src/middlewares/errorHandler.js create mode 100644 src/middlewares/security.js create mode 100644 src/middlewares/uploadsMiddleware.js create mode 100644 src/middlewares/validateRequest.js create mode 100644 src/migrations/createMigration.js create mode 100644 src/migrations/migrationRunner.js create mode 100644 src/migrations/migrations/20240315000016_add_is_liked_to_interaction_logs.js create mode 100644 src/migrations/migrations/20240315000017_add_public_signup_enabled_to_hospitals_app_users.js create mode 100644 src/migrations/migrations/20240723000000_add_temporary_password_to_hospitals_and_hospital_users.js create mode 100644 src/migrations/migrations/20240726120000_modify_feedback_table_columns.js create mode 100644 src/migrations/migrations/app_users_pin_otp_setup.js create mode 100644 src/migrations/migrations/city_to_hospital_users.js create mode 100644 src/migrations/migrations/delete_app_user_keep_data.js create mode 100644 src/migrations/migrations/feedback_add_boolean_for_forwarded.js create mode 100644 src/migrations/migrations/sessions.js create mode 100644 src/migrations/migrations/super-admins-temporary-password.js create mode 100644 src/migrations/migrations/user_sessions_delete.js create mode 100644 src/migrations/runMigrations.js create mode 100644 src/migrations/template.js create mode 100644 src/routes/analysis.js create mode 100644 src/routes/appUsers.js create mode 100644 src/routes/auth.js create mode 100644 src/routes/documents.js create mode 100644 src/routes/exceldata.js create mode 100644 src/routes/feedbacks.js create mode 100644 src/routes/hospitals.js create mode 100644 src/routes/onboarding.js create mode 100644 src/routes/roles.js create mode 100644 src/routes/superAdmins.js create mode 100644 src/routes/users.js create mode 100644 src/schema create mode 100644 src/services/analysisService.js create mode 100644 src/services/appUserService.js create mode 100644 src/services/authService.js create mode 100644 src/services/cronJobs.js create mode 100644 src/services/exceldataService.js create mode 100644 src/services/feedbacksService.js create mode 100644 src/services/hospitalService.js create mode 100644 src/services/nlpqamapper.js create mode 100644 src/services/onboardingService.js create mode 100644 src/services/roleService.js create mode 100644 src/services/secondaryWebsocket.js create mode 100644 src/services/superAdminService.js create mode 100644 src/services/tokenService.js create mode 100644 src/services/userService.js create mode 100644 src/services/webSocket.js create mode 100644 src/templates/passwordResetEmail.js create mode 100644 src/templates/welcomeEmail.js create mode 100644 src/utils/asyncHandler.js create mode 100644 src/utils/encryption.js create mode 100644 src/utils/errors.js create mode 100644 src/utils/fix_refresh_token.js create mode 100644 src/utils/logger.js create mode 100644 src/utils/monitoring.js create mode 100644 src/utils/responseHandler.js create mode 100644 src/utils/validator.js create mode 100644 src/validators/hospitalValidator.js create mode 100644 tests/integration/README.md create mode 100644 tests/unit/README.md diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..c7965687b83009c12ccfb473b3a8a4fe15b84b49 GIT binary patch literal 10244 zcmeHMT~8B16uk?jET6#yVtg=eVoZF%T96ctFBR~C2OqRXAJo(>U14R}#qO3u2uWZ4 z`V0OA3Rx9SCJH7}@jdFvON0nLDBKr^5j&lPAG=-4xAj(TD~E0_tvI4C#CBbP$KMYKy%>b3yA8J8907%&0(@Nd9+_;XMT z71aCVp=zygJGq(oR@?U6Fe(;*7z2a3q5QBpY>t^PDjPwo64_BRT(TR__~~&Fc(qR5 zu03&`R&{E6Iq)Of^__+Q+`0po=TBX~9<-K%re6;QTZuoIBj!kT>eBxHTyd^6Yb_RU zAIw_&cjk+;*21mQ!NG_*bM@xkN7YSl%MW-eNJgS^G!Z`^mLE{-LrOhd^J*9_L;JYw zR2se)`0Mb9z3Q@(Hq~H5a9g0)UF`mXcRB(ls18pDtIr4rYq~>oz2abi`$;25;k|{P zT&!aOQ550H%dX|XcLg&7OhZ<`0gQ(zSkEE`mleb`y+RMWR0YaGoy!otRncF7(K?8y z9`hXhqO@b86<_AM+si+9Me)x|d~U%vdkcoR&V5vTCoyveZC8bP-WN@v3K-`bm66?zWY~s$J3#Y>;_#CVfx4eve1r8U-NLxiLO&dfMjDp~u8Cl0`CrTSAu^HmWwCE! zyJF)izAO)YeN%Dq^~?qSA_RT`QL$J$Lf+qE_(*-_{T)8$u?r8UpN)#t2PG=sBSQdwET=sLGf>OlFf z-9mne%!&Qlu@wjkxsHdG>v%lyb^LW?_ARpIgnBu#?bzZC(wYA=;Di@>`ux9%;~w82 P=<~nbh%b2mxAXsBEi4s_ literal 0 HcmV?d00001 diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..d50ac15 --- /dev/null +++ b/.env.example @@ -0,0 +1,29 @@ +NODE_ENV = development +DB_HOST=localhost +DB_USER=root +DB_PASSWORD=Admin@123 +DB_NAME=spurrintest + +EMAIL_HOST="smtp.zoho.com" +SENDER_PORT = 465 +SENDER_SECURITY = true +EMAIL_USER="kavya.j@tech4biz.io" +EMAIL_PASS="8pQfkBw8gbrz" + +JWT_ACCESS_TOKEN_SECRET=jN4!pY9*d#T2@x$L7wq&Z8^gFc%X5@K#m +JWT_REFRESH_TOKEN_SECRET=Lx$Z7#T2^d&n9!Y4%K8@Fcg*m#qX5p@wL +JWT_ACCESS_TOKEN_EXPIRY=5h +JWT_REFRESH_TOKEN_EXPIRY=7d + +# BACK_URL = https://backend.spurrinai.com/ +BACK_URL = http://localhost:3000/ +DOMAIN_url = http://localhost:3000/ +FLASK_BASE_URL = http://localhost:5000/ + +# PORT +PORT = 3000 + +# zoho mail config for development mode + +SSL_CERT = "/home/ubuntu/spurrin-cleaned-node/certificates/fullchain.pem" +SSL_KEY = "/home/ubuntu/spurrinai-backend-node/certificates/privkey.pem" \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..76e9b59 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +/node_modules +/.env +/hospital_data +/logs +/error.log +/uploads +/llm-uploads +/certificates \ No newline at end of file diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..450a65b --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,229 @@ +# Changes Log + +## [Unreleased] + +### Added +- Created comprehensive README.md with project documentation +- Implemented structured error handling system +- Added validation middleware using Joi +- Created standardized response handlers +- Implemented async handler utility +- Added custom error classes +- Created hospital validation schemas +- Updated hospital routes with proper middleware +- Added role-based authorization +- Implemented request validation +- Added structured logging +- Created separate authorization middleware with role-based access control +- Created request validation middleware with Joi schema validation +- Added repository layer for database operations +- Implemented database connection pooling +- Added custom error classes for better error handling +- Improved error handling in service layer + +### Changed +- Reorganized project structure into src directory +- Updated hospital controller to use new utilities +- Improved error handling in hospital routes +- Enhanced security with proper authentication +- Standardized API response format +- Improved code organization and readability +- Separated authentication and authorization middleware +- Enhanced validation middleware with better error handling and logging +- Refactored hospital routes for better middleware usage +- Moved logo upload logic to controller +- Updated hospital controller methods to use asyncHandler and standardized responses +- Standardized authentication and authorization across all hospital routes +- Improved error handling in hospital user and color management +- Refactored changePassword method to use asyncHandler and standardized responses +- Reordered hospital routes to prevent conflicts +- Fixed route parameter conflicts +- Moved database operations to repository layer +- Improved error handling with custom error classes +- Enhanced database connection management with connection pooling + +### Removed +- Removed unused model file (superAdminModel.js) +- Cleaned up empty directories +- Removed redundant code +- Removed inline route handlers in favor of controller methods +- Removed duplicate hospital list method +- Removed old authentication middleware usage +- Removed redundant token validation in changePassword method +- Removed unused imports from hospital routes +- Removed direct database queries from service layer + +### Fixed +- Fixed error handling in hospital controller +- Improved validation error messages +- Enhanced security in authentication flow +- Fixed response format consistency +- Fixed asyncHandler import and usage in hospital controller +- Fixed authorize function import and usage in hospital routes +- Fixed validateRequest middleware implementation +- Fixed validateRequest import in hospital routes +- Fixed missing getAllHospitals method in hospital controller +- Fixed error handling in hospital controller methods +- Fixed inconsistent authentication middleware usage +- Fixed missing controller methods and their implementations +- Fixed undefined route handler in changePassword endpoint +- Fixed route conflicts between /users and /:id endpoints +- Fixed missing changePassword route +- Fixed route ordering to prevent parameter conflicts +- Fixed database connection handling +- Fixed error propagation in service layer + +## [0.1.0] - Initial Setup + +### Added +- Basic project structure +- Database configuration +- Authentication middleware +- Hospital management endpoints +- File upload functionality +- Email notification system +- User management system +- Password reset functionality +- Interaction logging system + +### Security +- Implemented JWT authentication +- Added password hashing +- Implemented role-based access control +- Added input validation +- Implemented secure file uploads +- Added email verification system + +### Performance +- Implemented database connection pooling +- Added request compression +- Optimized database queries +- Implemented caching where appropriate + +### Documentation +- Added API documentation +- Created setup instructions +- Added security guidelines +- Included contribution guidelines + +## Hospital Module Improvements + +### Code Structure and Organization +- [x] Created dedicated `HospitalService` class for business logic +- [x] Separated concerns between routes, controller, and service layers +- [x] Improved error handling and validation +- [x] Removed duplicate code +- [x] Added proper input validation +- [x] Organized routes with proper middleware +- [x] Added repository layer for database operations +- [x] Implemented database connection pooling +- [x] Added custom error classes + +### Security Enhancements +- [x] Added rate limiting (100 requests per 15 minutes per IP) +- [x] Improved file upload security + - Added file type validation (JPEG, PNG, GIF) + - Set file size limit (5MB) + - Secure file naming +- [x] Added input validation through schemas +- [x] Enhanced error messages +- [x] Implemented proper authentication middleware +- [x] Added authorization checks + +### Database Optimization +- [x] Improved query structure +- [x] Added proper error handling for database operations +- [x] Implemented better transaction handling +- [x] Added validation before database operations +- [x] Improved error messages for database operations +- [x] Implemented connection pooling +- [x] Added repository layer for better database abstraction + +### Additional Improvements +- [x] Better error handling and logging +- [x] Consistent response formats +- [x] Improved code readability +- [x] Better separation of concerns +- [x] Added proper validation for all inputs +- [x] Improved file upload handling +- [x] Added custom error classes +- [x] Improved error propagation + +### Pending Improvements +- [ ] Add query caching for frequently accessed data +- [ ] Add request sanitization +- [ ] Implement proper CORS configuration +- [ ] Add security headers +- [ ] Add API documentation +- [ ] Add unit tests +- [ ] Add integration tests +- [ ] Add API tests + +## File Structure Changes +``` +src/ +├── controllers/ +│ └── hospitalController.js # Simplified controller with service usage +├── services/ +│ └── hospitalService.js # Business logic layer +├── repositories/ +│ └── hospitalRepository.js # Database operations layer +├── routes/ +│ └── hospitals.js # Updated with security and validation +├── middlewares/ +│ ├── authMiddleware.js # Authentication middleware +│ ├── authorizeMiddleware.js # Authorization middleware +│ └── validateRequest.js # Request validation middleware +├── utils/ +│ └── errors.js # Custom error classes +└── validators/ + └── hospitalValidator.js # Validation schemas +``` + +## Security Improvements +1. Rate Limiting + - Added express-rate-limit + - 100 requests per 15 minutes per IP + - Custom error message for rate limit exceeded + +2. File Upload Security + - File type validation + - File size limits + - Secure file naming + - Proper error handling + +3. Input Validation + - Added validation schemas + - Proper error messages + - Type checking + - Required field validation + +4. Authentication & Authorization + - Token-based authentication + - Role-based authorization + - Proper error handling for unauthorized access + +## Performance Improvements +1. Database Operations + - Optimized queries + - Better error handling + - Transaction support + - Input validation before database operations + - Connection pooling + - Repository pattern implementation + +2. Code Organization + - Service layer for business logic + - Repository layer for database operations + - Controller for request handling + - Routes for endpoint definition + - Middleware for cross-cutting concerns + +## Next Steps +1. Implement query caching +2. Add comprehensive testing +3. Add API documentation +4. Enhance security measures +5. Add monitoring and logging + +flag added to trigger logout to both websockets (secondary and main) diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000..be62b5d --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,146 @@ +pipeline { + agent any + + environment { + SSH_CREDENTIALS = 'spurrin-backend-dev' + GIT_CREDENTIALS = 'gitea-cred' + REMOTE_SERVER = 'ubuntu@160.187.166.67' + REPO_HTTPS_URL = 'https://git.tech4biz.wiki/rohit/spurrin-backend.git' + BRANCH = 'main' + + REMOTE_DIR = '/home/ubuntu/spurrin-cleaned-node' + BACKUP_UPLOADS_DIR = '/home/ubuntu/uploads_backup' + VENV_PYTHON = '../venv/bin/python' + + NODE_BIN_PATH = '/home/ubuntu/.nvm/versions/node/v22.12.0/bin' + NOTIFY_EMAIL = 'jassim.mohammed@tech4biz.io' + } + + stages { + stage('Add Remote Host Key') { + steps { + echo '🔐 Adding remote host to known_hosts...' + sshagent(credentials: [SSH_CREDENTIALS]) { + sh ''' + mkdir -p ~/.ssh + ssh-keyscan -H ${REMOTE_SERVER#*@} >> ~/.ssh/known_hosts + ''' + } + } + } + +stage('Update Repo on Remote') { + steps { + echo '🔄 Pulling latest code on remote server with conditional restore and fresh backup...' + withCredentials([usernamePassword(credentialsId: "${GIT_CREDENTIALS}", usernameVariable: 'GIT_USER', passwordVariable: 'GIT_PASS')]) { + sshagent(credentials: [SSH_CREDENTIALS]) { + sh """ + ssh ${REMOTE_SERVER} ' + set -e + + # Clean old backup folder + echo "🗑️ Removing old backups..." + rm -rf ${BACKUP_UPLOADS_DIR} + mkdir -p ${BACKUP_UPLOADS_DIR} + + echo "📦 Backing up existing data..." + if [ -d ${REMOTE_DIR}/uploads ]; then + cp -a ${REMOTE_DIR}/uploads ${BACKUP_UPLOADS_DIR}/ + fi + + if [ -d ${REMOTE_DIR}/hospital_data ]; then + cp -a ${REMOTE_DIR}/hospital_data ${BACKUP_UPLOADS_DIR}/ + fi + + if [ -f ${REMOTE_DIR}/.env ]; then + cp ${REMOTE_DIR}/.env ${BACKUP_UPLOADS_DIR}/.env + fi + + if [ -d ${REMOTE_DIR}/certificates ]; then + cp -a ${REMOTE_DIR}/certificates ${BACKUP_UPLOADS_DIR}/ + fi + + # Pull latest changes without deleting local files/folders + if [ -d ${REMOTE_DIR}/.git ]; then + echo "🔁 Repo exists. Pulling latest changes..." + cd ${REMOTE_DIR} + git stash push --include-untracked --message "temp-backup-before-pull" || true + git pull origin ${BRANCH} + git stash pop || true + else + echo "📥 Repo not found. Cloning fresh and restoring backup..." + rm -rf ${REMOTE_DIR} + git clone -b ${BRANCH} https://${GIT_USER}:${GIT_PASS}@git.tech4biz.wiki/rohit/spurrin-backend.git ${REMOTE_DIR} + + # Restore backups only on fresh clone... + + # Restore backup only on fresh clone + if [ -d ${BACKUP_UPLOADS_DIR}/uploads ]; then + cp -a ${BACKUP_UPLOADS_DIR}/uploads ${REMOTE_DIR}/ + fi + + if [ -d ${BACKUP_UPLOADS_DIR}/hospital_data ]; then + cp -a ${BACKUP_UPLOADS_DIR}/hospital_data ${REMOTE_DIR}/ + fi + + if [ -f ${BACKUP_UPLOADS_DIR}/.env ]; then + cp ${BACKUP_UPLOADS_DIR}/.env ${REMOTE_DIR}/.env + fi + + if [ -d ${BACKUP_UPLOADS_DIR}/certificates ]; then + cp -a ${BACKUP_UPLOADS_DIR}/certificates ${REMOTE_DIR}/ + fi + fi + ' + """ + } + } + } +} + + + + stage('Install & Start Services') { + steps { + echo '🚀 Installing and starting services...' + sshagent(credentials: [SSH_CREDENTIALS]) { + sh """ + ssh ${REMOTE_SERVER} ' + set -e + export PATH=${NODE_BIN_PATH}:\$PATH + cd ${REMOTE_DIR} + npm install --legacy-peer-deps --force + + pm2 delete web-server || true + pm2 delete convo || true + + pm2 start npm --name web-server -- start + pm2 start chat.py --interpreter ${VENV_PYTHON} --name=convo + ' + """ + } + } + } + } + + post { + always { + echo '🧹 Cleaning workspace...' + cleanWs() + } + + success { + echo '✅ Deployment successful!' + mail to: "${NOTIFY_EMAIL}", + subject: "✅ Jenkins - spurrin-cleaned-node Deployment Successful", + body: "The deployment of spurrin-cleaned-node to ${REMOTE_SERVER} was successful.\n\nRegards,\nJenkins" + } + + failure { + echo '❌ Deployment failed!' + mail to: "${NOTIFY_EMAIL}", + subject: "❌ Jenkins - spurrin-cleaned-node Deployment Failed", + body: "The deployment of spurrin-cleaned-node to ${REMOTE_SERVER} failed. Please check Jenkins logs.\n\nRegards,\nJenkins" + } + } +} \ No newline at end of file diff --git a/__pycache__/model_manager.cpython-312.pyc b/__pycache__/model_manager.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dcce957272c5e9d0772cda20d7d951402873df40 GIT binary patch literal 7973 zcmbVRYit`=cD}6EK;H*>S@u|+m59AAz8NMhb$|*l{aVFblP-yg77b7XZGiqz?4WkDe|qi= zXGqDkW8_M_bLT$JJ@?#u&v(w5e=995L6ENg&#}2rb|dtk_(e&&T;a)|LE$!{5l#4z zpO_*1ni&m&-!(q1UpJ$Z%37a(Mh|s5pTTdOG3M$_GbX6h`ph$C4LXTv{dGhe?rCJo zGbKw}m+@=-;?jtvqhUUHE_j&MK1#&KqDT&g=L3+^m;h^2ns+JU0(|G6wHIgEcT{hM>`mh9+jTv}Q&ZN0$g%d))+_ z>#n021LkkWNE;wG(MHJ4v9N08skQH&x=8Snx+)95ZS<2}{9S*~T+q+-gfE5ze7J`Tg;~}c=<@g& zcYvY00wDYzKi&b#FE6({w8WOe(1^8A2;I4PNNC~qLiTJKWi4elkE|R?TAZ7XrcXLI zw1VUCXVWRiX#Dt%v)S@pH)AWYq{;CFdZXeZ{4KQHMm+8yrFm6kN)62RBVMn(%^Q?b z{z(&xFbOO|bG}6>-OCl=xN7oqQR=4h7NzGrUfzxbG|P5W!E1ibCA5TG+Nns_>42B_ zx_#b=H!$lgl%2E7IT>`*xLT4l*WKNH&SjD}kc55Bjbmuh=nKxyLPt^O4O|L}79WhG zbd=ym) zow!lRmeFQ;U8cO_L3zg}S#@)AWin|$l_pPT$%+i=5J*RwY{+(W-k402ZJXq-o3F3D zzBcyJ0SdV^Z&XV8r?R6g zT=D=xYn0u9>!i-at0!SzQh)i;xI}AVUyGc!576JM)QG~9S>$BQ!&TmyXr00fjK+eI z60KKifkTxq;9!^1BmbPQ&X<)^ab6Xg(z!22sYV%}e=3p#?(9nKe63Q-KS6=#Bncd= z=bvv>dVKE~d`VjE&a!Dy@zKIqHmt{)SdBU_C6nQ8d zDfRzrk=&Ukrp**w1JVPKuXfT#x)4YGGNEWQ9D z!N`y)D^M0JB^0r3)13v{*aod!2V9Y5w56#t?|M|*_)+Jb&VR1{P3^C1GkvFozEkPG ziI1-)fAo`7?Hlp28)tILb!|A&`m23+_a*9+_Q5nc^sNDv?z(w;<#dLu7s&dxV4CcP zx|V_Dz^PR8>G)WhY?M@Sqd+!h$Yz0TUO({5LmwYXlRX9XtpeGaCfgr1_hgz!gyxat z(dkt4IcfA}!=6mTpwKXw96FO~m;_#4hk;m+XUGPDY*^zzx_0MUn%t+>It9|1CY!em zsI?uFaAuqL$c!{N@$g3qPMm#6PCa{6ZhMS0=3RJ!XIIW99sL`98!gG&fiyXoB`g29 zrH97vI4rTBj_(=Yi+;D)e4Ph*(hJOW#5P9t z^D2hqTZ;FOmSBaR*HSk|qlTiR)%gK8j758x3aHRJ6g9B_1#_yJvVaovPzQ=si^L4c0 zQ=`g7X~8b4*ay1^_W7&qV!?SUx|_;8z<=}eoLq);!tRQeMooARU$no6ahra=?fLOh zV{T-Qf^s_ubi=I!`wQGN_{-NuHS+)sJ5yA(QV6&UH{v=}uqxFmq79eFuyQZhH5Ci< z6r*bYX%q!34X3mTW;?KFX zVi_Prm<>?iGjc(e6OAFagaKj+&te3`D*kJ%Jit8G(q8{v1khFD1z64bQCf~?MLjs1 zvy7RSULn5}^eDq?2NQt?!C(p%>bo~UgDt^{S}(mQ>Ts)Q zmby}xnI+6wC=C6L!7wis%CKXZC(L0T<=mEJPoruUz+KvzeCNnE2dKM9j{%^@Szuei z|FBfAG9ON-Elt_#+SQZsaR8oLN4#vaa`)<$4_nrl5B9FpzYKmHOjQm%LG|VX@zHE; z<2te4Bv%W~uL0V`&%iKebCPsq>l@=!*~TU)KB{kDzm}@s{}>rc`)-+6wX5f{4K1IH zC9MB2v3@w|=v_5us~qdPjPrm1|5XRF*4B)5pJ3gW7|V1X6*`aJw+o$T9$F{AvNmU| zF2U+bw5F{+`EJJ_T6?!jf%-QUsG=cjug};!1$$@W?JvfXGjAs^2b1>Bls%NObAp{q z@{1Y%J%N7@eB#GKUH^NgU<|32!j(3>L#+olV3~=!#`dd%Q$)kNAHGRa2(3oIx@B% z!Pb-PJ$k=N=pDcRZt{&cg_AQ6ZEt;J##olC--k1f^GSB z81?jTbf>yU?~gv{9={R3d2Qv|+Hk6TZ_?!bVmLYTmT=~L=FBDG%%$Y)TKvCSRtMnz0>=;Fwj2~M)-yX&REw1ts4-$5o3A^P0#7en1o+bvup|yecamQ%lOpX3Kf(Ye@-Q$`aawsVh9e461t$@+fkW6*|!B%#kU0T!s;F z2e3I>WNcI`nI9!ZV0tkJ(Ho%!%A6JQoY>~Lgo zn-7SgjAI~H%6mNpA?fbJzM#kLmby&=i$V6j}-Hpsx3 z2zVfb%VGt|zJW7r0gHM`)}q!OSYprP3a#H8V25!#wnbc)0!yQh#erz{PjNRL;CP>u z_lqX%tK)TwW(DH=@K0eP`fQ-}mjx(2JJE7(UqKmY?ihwTwNiG3f4x;>O zckPD<*7YA8TI*WB@~Jh!+^b8JCanW;bJk*A(`9U(0{mM#vsE>jsy#y0o`hMb>Q9pW zU?EiQzHPZ>dA~GM(IQl|q$=9t`fNq@b9I2OWmPvXtX%jj49(_o;wwz6U8ri$RCNhe zU5RjF_I^|HjhSRsSE}l*_?R@;OWHSUIuh0dpQ<^q@nfN8@H0ZFIT$|$41vX9>lJLh z8`UY>V8(VxupLU-j$~}d1>5nIZ9INDYjb>04@}#>aXM8q{Mi+uX7qlOP%{=k^|T7} zy=6nY?Q27~-d^`4PJJ8@st?3VU~y!5s%&JV`ZsmIuKTC@blFJSGO}rRtWB@)O>jbU zf6Bf;J|T6B^O^D%p}Zw!XzIdMe$`y=|32IIEt4T?(!a~Afr(918TeDYob0p3g$y`zC20QoyQUWLS0 zg+4_^unpfcwutAgjKwkM7e-x+#BY^x5Or0RR?&586%`FpMh!$6RlgacjLi^b%#X(5 zlpTrI?Tj+M38oN^GA?Q4XF;@4A89y_zX63qxsMV~cYtX(MH-~*$~mQv4-ia**1~wA5;vs7@SSN^?iCI}+^{qs6_nJLF>X*RM+MmSj3cgpQGP$LNjmn-eP&a^NjZw%+HTNl`a^ z@-t++K(?pJ{CAT-c}O07wu$3mgn8(>cvyxU6v)9pZfT)Wj)&bI-G9u2KDU^U4QM~N z+K%;UKkw5+*+t^-HvzYwp(wG0!g12D50^=bx)OH#!#0v8`6azS}xr zMIZs1XyY#9T@G~nA@u9@aNQxdmtF9JzbpZp6jR1lpv>+;T*7x$25i$|SxCbywzlv+ o;aVUAVH3oEpuRt#*S bool: + """Delete all vectors associated with a specific document from ChromaDB""" + try: + # Initialize vector store for the hospital + vector_store = await initialize_or_load_vector_store(hospital_id) + + # Delete vectors with matching doc_id + await asyncio.to_thread( + lambda: vector_store._collection.delete(where={"doc_id": str(doc_id)}) + ) + + # Persist changes + await asyncio.to_thread(vector_store.persist) + + # Clear Redis cache for this document + redis_client = get_redis_client() + pattern = f"vector_store_data:{hospital_id}:*" + for key in redis_client.scan_iter(pattern): + redis_client.delete(key) + + logging.info( + f"Successfully deleted vectors for document {doc_id} from hospital {hospital_id}" + ) + return True + + except Exception as e: + logging.error(f"Error deleting document vectors: {e}", exc_info=True) + return False + + +async def add_document_to_index(doc_id, hospital_id): + try: + pool = await get_db_pool() + async with pool.acquire() as conn: + async with conn.cursor() as cursor: + vector_store = await initialize_or_load_vector_store(hospital_id) + + await cursor.execute( + "SELECT page_number, content FROM document_pages WHERE document_id = %s ORDER BY page_number", + (doc_id,), + ) + rows = await cursor.fetchall() + + total_pages = len(rows) + logging.info(f"Processing {total_pages} pages for document {doc_id}") + page_bar = tqdm_async(total=total_pages, desc="Processing pages") + + async def process_page(page_data): + page_num, content = page_data + try: + icd_data = extract_and_process_icd_data( + content, hospital_id, save_to_json=False + ) + chunks = text_splitter.split_text(content) + await asyncio.sleep(0) # Yield control + return page_num, chunks, icd_data + except Exception as e: + logging.error(f"Error processing page {page_num}: {e}") + return page_num, [], [] + + tasks = [asyncio.create_task(process_page(row)) for row in rows] + results = [] + + for coro in asyncio.as_completed(tasks): + result = await coro + results.append(result) + page_bar.update(1) + + page_bar.close() + + # Vector addition progress bar + all_icd_data = [] + all_chunks = [] + all_metadatas = [] + + chunk_add_bar = tqdm_async(desc="Vectorizing chunks", total=0) + + for result in results: + page_num, chunks, icd_data = result + all_icd_data.extend(icd_data) + + for i, chunk in enumerate(chunks): + all_chunks.append(chunk) + all_metadatas.append( + { + "doc_id": str(doc_id), + "hospital_id": str(hospital_id), + "page_number": str(page_num), + "chunk_index": str(i), + } + ) + + if len(all_chunks) >= BATCH_SIZE: + chunk_add_bar.total += len(all_chunks) + chunk_add_bar.refresh() + await asyncio.to_thread( + vector_store.add_texts, + texts=all_chunks, + metadatas=all_metadatas, + ) + all_chunks = [] + all_metadatas = [] + chunk_add_bar.update(BATCH_SIZE) + + # Final batch + if all_chunks: + chunk_add_bar.total += len(all_chunks) + chunk_add_bar.refresh() + await asyncio.to_thread( + vector_store.add_texts, + texts=all_chunks, + metadatas=all_metadatas, + ) + chunk_add_bar.update(len(all_chunks)) + + chunk_add_bar.close() + + if all_icd_data: + logging.info(f"Saving {len(all_icd_data)} ICD codes") + extract_and_process_icd_data("", hospital_id, save_to_json=True) + + await asyncio.to_thread(vector_store.persist) + logging.info(f"Successfully indexed document {doc_id}") + return True + + except Exception as e: + logging.error(f"Error adding document: {e}") + return False + + +def is_general_knowledge_question( + query: str, context: str, conversation_context=None +) -> bool: + """ + Determine if a question is likely a general knowledge question not covered in the documents. + Takes conversation history into account to reduce repeated confirmations. + """ + query_lower = query.lower() + context_lower = context.lower() + + if conversation_context: + for interaction in conversation_context: + prev_question = interaction.get("question", "").lower() + if ( + prev_question + and query_lower in prev_question + or prev_question in query_lower + ): + logging.info( + f"Question is similar to previous conversation, skipping confirmation" + ) + return False + + stop_words = { + "search", + "query:", + "can", + "you", + "some", + "at", + "the", + "a", + "an", + "in", + "on", + "at", + "to", + "for", + "with", + "by", + "about", + "give", + "full", + "is", + "are", + "was", + "were", + "define", + "what", + "how", + "why", + "when", + "where", + "year", + "list", + "form", + "table", + "who", + "which", + "me", + "tell", + "explain", + "describe", + "of", + "and", + "or", + "there", + "their", + "please", + "could", + "would", + "various", + "different", + "type", + "types", + "kind", + "kinds", + "has", + "have", + "had", + "many", + "say", + } + + key_words = [ + word for word in query_lower.split() if word not in stop_words and len(word) > 2 + ] + logging.info(f"Key words: {key_words}") + + if not key_words: + logging.info("No significant keywords found, directing to general knowledge") + return True + + matches = sum(1 for word in key_words if word in context_lower) + logging.info(f"Matches: {matches} out of {len(key_words)} keywords") + + match_ratio = matches / len(key_words) + logging.info(f"Match ratio: {match_ratio}") + + return match_ratio < 0.6 + + +def is_table_request(query: str) -> bool: + """ + Determine if the user is requesting a response in tabular format. + """ + table_keywords = [ + "table", + "tabular", + "in a table", + "in table format", + "in tabular format", + "chart", + "data", + "comparison", + "as a table", + "table format", + "in rows and columns", + "in a grid", + "breakdown", + "spreadsheet", + "comparison table", + "data table", + "structured table", + "tabular form", + "table form", + ] + + query_lower = query.lower() + return any(keyword in query_lower for keyword in table_keywords) + + +import re + + +def ensure_html_response(text: str) -> str: + """ + Ensure the response is properly formatted in HTML. + This function handles plain text conversion to HTML. + """ + if "", text)) + + if not has_html_tags: + paragraphs = text.split("\n\n") + html_parts = [] + in_ordered_list = False + in_unordered_list = False + + for para in paragraphs: + if para.strip(): + if re.match(r"^\s*[\*\-\•]\s", para): + if not in_unordered_list: + html_parts.append("
    ") + in_unordered_list = True + + lines = para.split("\n") + for line in lines: + if line.strip(): + item = re.sub(r"^\s*[\*\-\•]\s*", "", line) + html_parts.append(f"
  • {item}
  • ") + + elif re.match(r"^\s*\d+\.\s", para): + if not in_ordered_list: + html_parts.append("
      ") + in_ordered_list = True + + lines = para.split("\n") + for line in lines: + match = re.match(r"^\s*\d+\.\s*(.*)", line) + if match: + html_parts.append(f"
    1. {match.group(1)}
    2. ") + + else: # Close any open lists before adding a new paragraph + if in_ordered_list: + html_parts.append("
    ") + in_ordered_list = False + if in_unordered_list: + html_parts.append("
") + in_unordered_list = False + + html_parts.append(f"

{para}

") + + if in_ordered_list: + html_parts.append("") + if in_unordered_list: + html_parts.append("") + + return "".join(html_parts) + + else: + if not any(tag in text for tag in ("

", "

", "