From 7403bc9044596e021950d00b1fe0db3de13953e7 Mon Sep 17 00:00:00 2001 From: laxman Date: Thu, 6 Nov 2025 17:21:55 +0530 Subject: [PATCH] Add S3 folder endpoint and critical JSON parsing fixes - Add S3 folder tagging endpoint with AWS S3 integration - Implement robust JSON parsing with enhanced extraction logic - Strengthen Claude AI prompt to prevent explanatory text - Add error categorization and improved error handling - Add comprehensive documentation and testing guides --- GAP_ANALYSIS_AND_FIXES.md | 312 ++++++ IMPLEMENTATION_SUMMARY.md | 254 +++++ POSTMAN_COLLECTION_GUIDE.md | 249 +++++ ..._Image_Tagging_API.postman_collection.json | 2 +- ...gging_API_Complete.postman_collection.json | 967 ++++++++++++++++++ QUICK_START.md | 11 +- TESTING_GUIDE.md | 241 +++++ .../useCases/TagS3FolderUseCase.js | 470 +++++++++ src/infrastructure/ai/ClaudeAIProvider.js | 83 +- src/infrastructure/aws/S3Service.js | 306 ++++++ .../config/dependencyContainer.js | 32 +- .../controllers/ImageTaggingController.js | 42 +- src/presentation/routes/imageRoutes.js | 7 + src/server.js | 2 + 14 files changed, 2958 insertions(+), 20 deletions(-) create mode 100644 GAP_ANALYSIS_AND_FIXES.md create mode 100644 IMPLEMENTATION_SUMMARY.md create mode 100644 POSTMAN_COLLECTION_GUIDE.md create mode 100644 Property_Image_Tagging_API_Complete.postman_collection.json create mode 100644 TESTING_GUIDE.md create mode 100644 src/application/useCases/TagS3FolderUseCase.js create mode 100644 src/infrastructure/aws/S3Service.js diff --git a/GAP_ANALYSIS_AND_FIXES.md b/GAP_ANALYSIS_AND_FIXES.md new file mode 100644 index 0000000..94dcacc --- /dev/null +++ b/GAP_ANALYSIS_AND_FIXES.md @@ -0,0 +1,312 @@ +# Gap Analysis & Critical Fixes Implementation + +## ๐Ÿ“‹ Executive Summary + +This document outlines the comprehensive gap analysis performed on the Property Image Tagging API and the critical fixes implemented to address robustness issues, particularly around JSON parsing errors from Claude AI responses. + +--- + +## ๐Ÿ”ด Critical Issues Identified & Fixed + +### 1. **JSON Parsing Failure (CRITICAL - FIXED)** + +**Problem:** +- Claude AI sometimes returned explanatory text (e.g., "I notice that...") instead of pure JSON +- The parsing logic didn't validate if JSON was actually found before attempting to parse +- Error messages were not descriptive enough for debugging + +**Impact:** +- 1 out of 31 images failed with: `"Failed to parse Claude API response: Unexpected token 'I', \"I notice t\"... is not valid JSON"` +- This caused the entire image to fail processing, reducing success rate + +**Fix Implemented:** +1. **Enhanced JSON Extraction Logic:** + - Added validation to check if JSON was actually found before parsing + - Improved brace matching algorithm to find complete JSON objects even when surrounded by text + - Handles cases where Claude adds text before or after the JSON object + +2. **Strengthened Prompt:** + - Added explicit warnings at the beginning and end of the prompt + - Multiple reminders: "CRITICAL: Your response MUST be ONLY valid JSON" + - Clear instructions: "Do NOT include any explanatory text, comments, or markdown formatting" + - Added example of correct format + +3. **Enhanced Error Logging:** + - Logs response preview (first 500 chars) for debugging + - Logs response length and start of response + - More descriptive error messages with context + +**Code Changes:** +- `src/infrastructure/ai/ClaudeAIProvider.js`: + - Enhanced `_parseResponse()` method with robust JSON extraction + - Improved `_buildPrompt()` with stronger JSON-only requirements + - Better error messages with response previews + +--- + +### 2. **Error Categorization & Reporting (FIXED)** + +**Problem:** +- Errors were not categorized, making it difficult to identify patterns +- No distinction between JSON parsing errors, AI service errors, S3 errors, etc. + +**Impact:** +- Difficult to debug issues without knowing error categories +- No way to track which types of errors are most common + +**Fix Implemented:** +- Added `_categorizeError()` method to classify errors: + - `json_parsing`: JSON parsing failures + - `ai_service`: Claude API errors + - `s3_service`: S3/AWS errors + - `validation`: Input validation errors + - `network`: Network/timeout errors + - `database`: Database connection errors + - `other`: Unclassified errors + +- Enhanced error objects in response to include: + - `errorType`: Error class name + - `errorCategory`: Categorized error type + - Better error messages + +**Code Changes:** +- `src/application/useCases/TagS3FolderUseCase.js`: + - Added `_categorizeError()` method + - Enhanced error logging with categories + - Improved error objects in response + +--- + +## โš ๏ธ Potential Issues Identified (Not Critical) + +### 3. **Memory Management for Large Folders** + +**Status:** โš ๏ธ MONITORED (Not Critical) + +**Issue:** +- Large folders (500+ images) could cause memory issues +- All image buffers are kept in memory during processing +- No explicit memory limits or garbage collection hints + +**Current Mitigation:** +- `MAX_FOLDER_IMAGES = 500` limit in S3Service +- Concurrency limit of 5 images at a time +- Images are processed in batches + +**Recommendation:** +- Monitor memory usage in production +- Consider streaming for very large images +- Add memory usage logging + +**Priority:** Medium (only affects very large folders) + +--- + +### 4. **Timeout Handling** + +**Status:** โš ๏ธ MONITORED (Not Critical) + +**Issue:** +- No explicit timeout for S3 folder processing +- Individual image processing could hang indefinitely +- No timeout for Claude API calls (relies on retry logic) + +**Current Mitigation:** +- Claude API has retry logic (3 retries, exponential backoff) +- S3 operations have AWS SDK default timeouts +- Individual image failures don't stop the entire batch + +**Recommendation:** +- Add overall timeout for S3 folder processing (e.g., 30 minutes) +- Add timeout wrapper for individual image processing +- Consider adding timeout configuration via environment variables + +**Priority:** Medium (only affects edge cases with network issues) + +--- + +### 5. **Database Connection Resilience** + +**Status:** โš ๏ธ MONITORED (Not Critical) + +**Issue:** +- If database connection fails during processing, entire batch could fail +- No connection retry logic for database operations +- Connection pool might exhaust under high load + +**Current Mitigation:** +- Connection pooling (max 20 connections) +- Individual image failures are caught and logged +- Database duplicate checks are done per image + +**Recommendation:** +- Add database connection retry logic +- Monitor connection pool usage +- Add connection health checks before processing + +**Priority:** Low (database connection failures are rare) + +--- + +### 6. **AWS Credentials Expiration** + +**Status:** โš ๏ธ MONITORED (Not Critical) + +**Issue:** +- If AWS credentials expire during processing, all subsequent S3 operations fail +- No credential refresh mechanism +- No detection of credential expiration + +**Current Mitigation:** +- Credentials are validated at startup +- S3 operations have error handling +- Individual image failures don't stop the entire batch + +**Recommendation:** +- Add credential expiration detection +- Consider using IAM roles instead of static credentials +- Add credential refresh mechanism if using temporary credentials + +**Priority:** Low (only affects long-running processes with temporary credentials) + +--- + +### 7. **Large Response Payloads** + +**Status:** โš ๏ธ MONITORED (Not Critical) + +**Issue:** +- Merged tags array could be very large (hundreds of tags) +- Response payload could exceed HTTP limits +- No pagination for large results + +**Current Mitigation:** +- Tag deduplication reduces total tag count +- Response is sent as single JSON object +- No explicit size limits + +**Recommendation:** +- Monitor response sizes in production +- Consider pagination for very large tag arrays +- Add response size logging + +**Priority:** Low (only affects folders with many unique tags) + +--- + +## โœ… Robustness Improvements Made + +### 1. **JSON Parsing Robustness** +- โœ… Validates JSON exists before parsing +- โœ… Handles text before/after JSON +- โœ… Handles markdown code blocks +- โœ… Better error messages with context + +### 2. **Error Handling** +- โœ… Error categorization for better debugging +- โœ… Enhanced error logging with stack traces +- โœ… Individual image failures don't stop batch +- โœ… Detailed error information in response + +### 3. **Prompt Engineering** +- โœ… Multiple explicit warnings about JSON-only format +- โœ… Clear examples of correct format +- โœ… Stronger language to prevent explanatory text + +### 4. **Logging & Debugging** +- โœ… Response previews in error logs +- โœ… Error categories for pattern analysis +- โœ… Stack traces for debugging +- โœ… Contextual information in all logs + +--- + +## ๐Ÿงช Testing Recommendations + +### 1. **JSON Parsing Edge Cases** +- Test with Claude responses that include text before JSON +- Test with Claude responses that include text after JSON +- Test with markdown code blocks +- Test with malformed JSON + +### 2. **Error Scenarios** +- Test with invalid AWS credentials +- Test with S3 bucket access denied +- Test with network timeouts +- Test with database connection failures + +### 3. **Large Folder Processing** +- Test with folders containing 100+ images +- Test with folders containing 500 images (limit) +- Monitor memory usage during processing +- Test with very large image files + +### 4. **Concurrency** +- Test with concurrent requests +- Test with high concurrency (10+ simultaneous requests) +- Monitor connection pool usage +- Test with slow network conditions + +--- + +## ๐Ÿ“Š Success Metrics + +### Before Fixes: +- **Success Rate:** 30/31 images (96.8%) +- **JSON Parsing Errors:** 1 image failed +- **Error Visibility:** Limited (generic error messages) + +### After Fixes: +- **Expected Success Rate:** 31/31 images (100%) for valid images +- **JSON Parsing Errors:** Should be eliminated or caught with better error messages +- **Error Visibility:** High (categorized errors with context) + +--- + +## ๐Ÿ”„ Regression Testing Checklist + +- [x] JSON parsing handles text before/after JSON +- [x] JSON parsing handles markdown code blocks +- [x] Error categorization works correctly +- [x] Enhanced error logging captures context +- [x] Prompt improvements don't break existing functionality +- [ ] Test with real S3 folder (31 images) +- [ ] Test with duplicate images +- [ ] Test with various image formats +- [ ] Test error scenarios (invalid credentials, network issues) + +--- + +## ๐Ÿ“ Next Steps + +1. **Immediate:** + - โœ… Implement critical fixes (DONE) + - โœ… Test JSON parsing improvements + - โณ Test with real S3 folder + +2. **Short-term:** + - Monitor error rates in production + - Collect error category statistics + - Fine-tune prompt if needed + +3. **Long-term:** + - Consider timeout handling for large folders + - Monitor memory usage patterns + - Add response pagination if needed + +--- + +## ๐ŸŽฏ Conclusion + +The critical JSON parsing issue has been addressed with: +1. **Robust JSON extraction** that handles edge cases +2. **Strengthened prompt** to prevent explanatory text +3. **Enhanced error handling** with categorization and better logging + +These improvements should eliminate the JSON parsing errors and provide better visibility into any remaining issues. The system is now more robust and production-ready. + +--- + +**Last Updated:** 2025-11-06 +**Status:** โœ… Critical fixes implemented and ready for testing + diff --git a/IMPLEMENTATION_SUMMARY.md b/IMPLEMENTATION_SUMMARY.md new file mode 100644 index 0000000..ae72169 --- /dev/null +++ b/IMPLEMENTATION_SUMMARY.md @@ -0,0 +1,254 @@ +# S3 Folder Endpoint - Implementation Summary + +## โœ… Implementation Complete + +All components have been implemented following Clean Architecture principles with comprehensive edge case handling. + +--- + +## ๐Ÿ“ฆ Files Created + +### 1. `src/infrastructure/aws/S3Service.js` +- **Purpose**: Handles all S3 operations (listing, downloading images) +- **Features**: + - โœ… Path normalization (trailing slashes, whitespace) + - โœ… S3 pagination support (handles >1000 objects) + - โœ… Image file filtering (only processes image files) + - โœ… Hidden file filtering (ignores `.DS_Store`, etc.) + - โœ… File type validation (magic number validation) + - โœ… File size validation (50MB limit) + - โœ… Comprehensive error handling (AWS errors, network errors) + - โœ… Security (path traversal prevention) + +### 2. `src/application/useCases/TagS3FolderUseCase.js` +- **Purpose**: Orchestrates S3 folder processing with duplicate detection and tag deduplication +- **Features**: + - โœ… Database duplicate detection (uses cached tags from existing images) + - โœ… In-folder duplicate detection (tracks hashes within batch) + - โœ… Tag deduplication (category + value matching, case-insensitive, whitespace-normalized) + - โœ… Confidence handling (keeps highest confidence when duplicates found) + - โœ… Concurrent processing (5 images at a time to avoid overwhelming system) + - โœ… Partial failure handling (continues processing even if some images fail) + - โœ… Comprehensive statistics (database duplicates, in-folder duplicates, new images, failed images) + - โœ… Edge case handling (empty folders, invalid tags, null values) + +--- + +## ๐Ÿ“ Files Modified + +### 1. `src/presentation/controllers/ImageTaggingController.js` +- โœ… Added `tagS3Folder()` method +- โœ… Joi validation schema (parentFolder optional, subFolder required) +- โœ… Error handling for missing AWS credentials +- โœ… Response formatting with statistics + +### 2. `src/presentation/routes/imageRoutes.js` +- โœ… Added `POST /api/images/tag-s3-folder` route +- โœ… Authentication middleware applied + +### 3. `src/infrastructure/config/dependencyContainer.js` +- โœ… Registered S3Service (optional - only if AWS credentials provided) +- โœ… Registered TagS3FolderUseCase (optional - only if S3Service available) +- โœ… Graceful handling when AWS credentials not configured + +### 4. `src/server.js` +- โœ… Wired TagS3FolderUseCase to controller +- โœ… Handles null use case gracefully + +### 5. `package.json` +- โœ… Added `@aws-sdk/client-s3` dependency + +### 6. `QUICK_START.md` +- โœ… Added AWS environment variables documentation +- โœ… Added new endpoint to API endpoints table + +--- + +## ๐ŸŽฏ Edge Cases Handled + +### Path & Folder Structure +- โœ… Missing/extra trailing slashes +- โœ… Whitespace in folder names +- โœ… Special characters in paths +- โœ… Path traversal attempts (security) +- โœ… Empty folders +- โœ… Non-existent folders + +### Image Processing +- โœ… Non-image files (filtered out) +- โœ… Hidden files (filtered out) +- โœ… Large files (>50MB - rejected) +- โœ… Zero-byte files (rejected) +- โœ… Invalid image formats (rejected) +- โœ… S3 pagination (>1000 objects) + +### Duplicate Detection +- โœ… Database duplicates (uses cached tags) +- โœ… In-folder duplicates (tracks hashes within batch) +- โœ… Same image with different names +- โœ… Same image with different extensions +- โœ… Concurrent processing (avoid race conditions) + +### Tag Deduplication +- โœ… Case sensitivity ("Kitchen" vs "kitchen") +- โœ… Whitespace normalization ("fully furnished" vs "fully furnished") +- โœ… Category + value matching (both must match) +- โœ… Confidence handling (keeps highest) +- โœ… Invalid tag structures (skipped) +- โœ… Missing fields (default values) +- โœ… Confidence range validation (0-1) + +### Error Handling +- โœ… S3 access denied +- โœ… S3 bucket not found +- โœ… S3 service unavailable +- โœ… Network timeouts +- โœ… Invalid AWS credentials +- โœ… Partial failures (some images fail) +- โœ… All images fail (clear error message) + +### Performance & Memory +- โœ… Concurrent processing limits (5 images at a time) +- โœ… Batch processing (avoids memory issues) +- โœ… Large folders (pagination support) + +--- + +## ๐Ÿ”ง Configuration Required + +### Environment Variables + +Add to `.env` file: + +```env +# AWS S3 Configuration (REQUIRED for S3 folder endpoint) +AWS_ACCESS_KEY_ID=your_access_key_id +AWS_SECRET_ACCESS_KEY=your_secret_access_key +AWS_REGION=us-east-1 # Default: us-east-1 +AWS_S3_BUCKET=tso3listingimages +``` + +### IAM User Permissions Required + +The AWS IAM user needs: +- `s3:ListBucket` - To list objects in bucket +- `s3:GetObject` - To download images from bucket + +--- + +## ๐Ÿ“ก API Endpoint + +### POST `/api/images/tag-s3-folder` + +**Request Body:** +```json +{ + "parentFolder": "00Da3000003ZFiQ/", // Optional (default: "00Da3000003ZFiQ/") + "subFolder": "a0La30000008vSXEAY/" // Required - property folder name +} +``` + +**Response:** +```json +{ + "success": true, + "message": "S3 folder processed successfully: 31 images tagged", + "data": { + "parentFolder": "00Da3000003ZFiQ/", + "subFolder": "a0La30000008vSXEAY/", + "totalImages": 31, + "processedImages": 31, + "databaseDuplicates": 5, + "inFolderDuplicates": 2, + "newImages": 24, + "failedImages": 0, + "mergedTags": [ + { + "category": "Room Type", + "value": "kitchen", + "confidence": 0.95, + "imageCount": 8 + } + ], + "uniqueTags": 127, + "totalTagsBeforeDedup": 450, + "summaries": ["...", "..."], + "errors": null + }, + "timestamp": "2025-11-03T10:30:00.000Z" +} +``` + +--- + +## โœ… Testing Checklist + +### Unit Tests Needed +- [ ] S3Service path normalization +- [ ] S3Service image filtering +- [ ] TagS3FolderUseCase duplicate detection +- [ ] TagS3FolderUseCase tag deduplication +- [ ] Controller validation + +### Integration Tests Needed +- [ ] S3 folder endpoint with real S3 bucket +- [ ] Empty folder handling +- [ ] Duplicate detection (database + in-folder) +- [ ] Tag deduplication +- [ ] Error handling scenarios + +### Regression Tests Needed +- [ ] Existing `/api/images/tag` endpoint +- [ ] Existing `/api/images/tag-base64` endpoint +- [ ] Existing `/api/images/tag/batch` endpoint +- [ ] Existing `/api/images/tag-base64/batch` endpoint +- [ ] Existing `/api/images/search` endpoint +- [ ] Existing `/api/images/stats` endpoint + +--- + +## ๐Ÿš€ Deployment Notes + +1. **Ensure AWS credentials are set** in `.env` file +2. **Verify IAM user has correct permissions** (s3:ListBucket, s3:GetObject) +3. **Test with a small folder first** to verify connectivity +4. **Monitor memory usage** for large folders +5. **Check logs** for any errors during processing + +--- + +## ๐Ÿ“Š Architecture Compliance + +โœ… **Clean Architecture** - All layers properly separated +โœ… **Dependency Injection** - All dependencies injected via container +โœ… **Error Handling** - Comprehensive error handling at all layers +โœ… **Validation** - Input validation at controller and use case levels +โœ… **Logging** - Comprehensive logging throughout +โœ… **Security** - Path sanitization, input validation, credential masking + +--- + +## ๐ŸŽ‰ Success Criteria Met + +- โœ… S3 folder endpoint implemented +- โœ… Database duplicate detection working +- โœ… In-folder duplicate detection working +- โœ… Tag deduplication working (category + value, case-insensitive) +- โœ… All edge cases handled +- โœ… Comprehensive error handling +- โœ… Clean Architecture principles followed +- โœ… Documentation updated +- โœ… No syntax errors +- โœ… Ready for testing + +--- + +## ๐Ÿ“ Next Steps + +1. **Test with real S3 bucket** - Verify connectivity and permissions +2. **Test duplicate detection** - Upload same images to verify caching +3. **Test tag deduplication** - Verify tags are properly merged +4. **Regression testing** - Verify existing endpoints still work +5. **Performance testing** - Test with large folders (100+ images) +6. **Error scenario testing** - Test with invalid inputs, missing folders, etc. + diff --git a/POSTMAN_COLLECTION_GUIDE.md b/POSTMAN_COLLECTION_GUIDE.md new file mode 100644 index 0000000..579a379 --- /dev/null +++ b/POSTMAN_COLLECTION_GUIDE.md @@ -0,0 +1,249 @@ +# Postman Collection Guide + +## ๐Ÿ“ฆ Collection File + +**File**: `Property_Image_Tagging_API_Complete.postman_collection.json` + +This comprehensive collection includes: +- โœ… All existing endpoints (for regression testing) +- โœ… New S3 folder endpoint with multiple scenarios +- โœ… Edge case testing +- โœ… Error scenario testing +- โœ… Comprehensive test scripts + +--- + +## ๐Ÿš€ Quick Start + +### 1. Import Collection + +1. Open Postman +2. Click **Import** button +3. Select `Property_Image_Tagging_API_Complete.postman_collection.json` +4. Collection will appear in your workspace + +### 2. Configure Variables + +1. Click on the collection name +2. Go to **Variables** tab +3. Set the following variables: + +| Variable | Value | Description | +|----------|-------|-------------| +| `baseUrl` | `http://localhost:3000` | Your API base URL | +| `apiKey` | `your_api_key_here` | Your API key (get one using `npm run apikey:create`) | + +--- + +## ๐Ÿ“‹ Collection Structure + +### 1. Public Endpoints +- **Root - Service Info** - Get service information +- **Health Check** - Check API and database health + +### 2. Image Tagging - Single Images +- **Tag Uploaded Image** - Upload and tag a single image file +- **Tag Base64 Image** - Tag a base64 encoded image + +### 3. Image Tagging - Batch Processing +- **Batch Tag Uploaded Images** - Process multiple images (up to 50) +- **Batch Tag Base64 Images** - Process multiple base64 images + +### 4. S3 Folder Endpoint โญ NEW +- **Tag S3 Folder - Basic** - Basic S3 folder tagging +- **Tag S3 Folder - Default Parent** - Test default parent folder +- **Tag S3 Folder - Path Normalization** - Test path normalization +- **Tag S3 Folder - Empty Folder** - Test empty folder handling +- **Tag S3 Folder - Invalid SubFolder** - Test validation +- **Tag S3 Folder - Missing SubFolder** - Test required field validation +- **Tag S3 Folder - Missing AWS Credentials** - Test AWS credential error + +### 5. Search & Statistics +- **Search by Tag** - Search images by tag value +- **Get Statistics** - Get tagging statistics + +### 6. Authentication Tests +- **Missing API Key** - Test 401 error +- **Invalid API Key** - Test 403 error +- **Using Authorization Header** - Test Bearer token auth + +--- + +## ๐Ÿงช Testing Workflow + +### Step 1: Verify Server is Running +1. Run **Public Endpoints > Health Check** +2. Should return 200 with health status + +### Step 2: Test Existing Endpoints (Regression) +1. **Tag Uploaded Image** - Upload a test image +2. **Tag Base64 Image** - Test base64 endpoint +3. **Batch Tag Uploaded Images** - Test batch processing +4. **Search by Tag** - Search for tagged images +5. **Get Statistics** - Check statistics + +### Step 3: Test S3 Folder Endpoint +1. **Tag S3 Folder - Basic** - Test with real S3 folder + - Update `parentFolder` and `subFolder` with actual values + - Example: `{"parentFolder": "00Da3000003ZFiQ/", "subFolder": "a0La30000008vSXEAY/"}` + +2. **Tag S3 Folder - Default Parent** - Test default parent folder + - Only provide `subFolder` + +3. **Tag S3 Folder - Path Normalization** - Test path handling + - Test with/without trailing slashes + +4. **Tag S3 Folder - Empty Folder** - Test error handling + - Should return 404 or 400 + +5. **Tag S3 Folder - Invalid SubFolder** - Test validation + - Should return 400 + +6. **Tag S3 Folder - Missing SubFolder** - Test required field + - Should return 400 + +### Step 4: Test Edge Cases +1. Test with various folder names +2. Test with duplicate images +3. Test with large folders +4. Test error scenarios + +--- + +## โœ… Expected Results + +### Successful S3 Folder Response +```json +{ + "success": true, + "message": "S3 folder processed successfully: 31 images tagged", + "data": { + "parentFolder": "00Da3000003ZFiQ/", + "subFolder": "a0La30000008vSXEAY/", + "totalImages": 31, + "processedImages": 31, + "databaseDuplicates": 5, + "inFolderDuplicates": 2, + "newImages": 24, + "failedImages": 0, + "mergedTags": [...], + "uniqueTags": 127, + "totalTagsBeforeDedup": 450, + "summaries": [...] + } +} +``` + +### Error Response (Missing AWS Credentials) +```json +{ + "success": false, + "message": "S3 folder endpoint is not available. AWS credentials not configured.", + "timestamp": "2025-11-03T10:30:00.000Z" +} +``` + +--- + +## ๐Ÿ” Test Scripts + +Each request includes automated test scripts that verify: +- โœ… Status code +- โœ… Response structure +- โœ… Required fields +- โœ… Data types +- โœ… Response time (where applicable) + +**View Test Results:** +1. Run any request +2. Click on **Test Results** tab +3. See all passed/failed tests + +--- + +## ๐Ÿ“ Notes + +### Before Testing S3 Folder Endpoint + +1. **Set AWS Credentials** in `.env` file: + ```env + AWS_ACCESS_KEY_ID=your_access_key + AWS_SECRET_ACCESS_KEY=your_secret_key + AWS_REGION=us-east-1 + AWS_S3_BUCKET=tso3listingimages + ``` + +2. **Verify IAM Permissions:** + - `s3:ListBucket` for the bucket + - `s3:GetObject` for objects in folders + +3. **Restart Server** after adding AWS credentials + +### Testing Tips + +1. **Start with Health Check** - Verify server is running +2. **Test with Small Folder First** - Use a folder with 1-5 images +3. **Check Logs** - View logs for detailed information +4. **Verify Duplicates** - Process same folder twice to test duplicate detection +5. **Check Tag Deduplication** - Verify tags are properly merged + +--- + +## ๐Ÿ› Troubleshooting + +### Issue: "S3 folder endpoint is not available" +**Solution**: Check AWS credentials in `.env` file and restart server + +### Issue: "Access denied to S3 bucket" +**Solution**: Verify IAM user has `s3:ListBucket` and `s3:GetObject` permissions + +### Issue: "Folder not found or empty" +**Solution**: +- Verify folder path is correct +- Check folder exists in S3 bucket +- Ensure folder contains image files + +### Issue: Tests failing +**Solution**: +- Check API key is set correctly +- Verify server is running +- Check response format matches expected structure + +--- + +## ๐Ÿ“Š Collection Statistics + +- **Total Requests**: 15+ +- **Test Scripts**: Comprehensive coverage +- **Edge Cases**: 7+ scenarios +- **Error Scenarios**: 5+ scenarios +- **Regression Tests**: All existing endpoints included + +--- + +## ๐ŸŽฏ Success Criteria + +After running all tests, you should have: +- โœ… All existing endpoints working (regression) +- โœ… S3 folder endpoint working with real data +- โœ… Duplicate detection working (database + in-folder) +- โœ… Tag deduplication working (category + value) +- โœ… All edge cases handled gracefully +- โœ… All error scenarios return appropriate errors + +--- + +## ๐Ÿ“š Additional Resources + +- **Implementation Summary**: See `IMPLEMENTATION_SUMMARY.md` +- **Testing Guide**: See `TESTING_GUIDE.md` +- **Quick Start**: See `QUICK_START.md` + +--- + +## ๐ŸŽ‰ Ready to Test! + +Import the collection and start testing. All requests include comprehensive test scripts that will automatically verify the responses. + +Happy Testing! ๐Ÿš€ + diff --git a/Property_Image_Tagging_API.postman_collection.json b/Property_Image_Tagging_API.postman_collection.json index 66e1623..17a64c0 100644 --- a/Property_Image_Tagging_API.postman_collection.json +++ b/Property_Image_Tagging_API.postman_collection.json @@ -516,7 +516,7 @@ "variable": [ { "key": "baseUrl", - "value": "http://localhost:3000", + "value": "http://localhost:3001", "type": "string" }, { diff --git a/Property_Image_Tagging_API_Complete.postman_collection.json b/Property_Image_Tagging_API_Complete.postman_collection.json new file mode 100644 index 0000000..e231423 --- /dev/null +++ b/Property_Image_Tagging_API_Complete.postman_collection.json @@ -0,0 +1,967 @@ +{ + "info": { + "_postman_id": "property-image-tagger-api-complete", + "name": "Property Image Tagging API - Complete", + "description": "Comprehensive API collection for Property Image Tagging REST API including S3 folder endpoint, regression tests, and edge case testing", + "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json", + "_exporter_id": "property-image-tagger" + }, + "item": [ + { + "name": "Public Endpoints", + "item": [ + { + "name": "Root - Service Info", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/", + "host": ["{{baseUrl}}"], + "path": [""] + }, + "description": "Get service information" + }, + "response": [], + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200\", function () {", + " pm.response.to.have.status(200);", + "});", + "", + "pm.test(\"Response contains service info\", function () {", + " var jsonData = pm.response.json();", + " pm.expect(jsonData).to.have.property('service');", + "});" + ], + "type": "text/javascript" + } + } + ] + }, + { + "name": "Health Check", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/images/health", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "health"] + }, + "description": "Check API and database health status" + }, + "response": [], + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200 or 503\", function () {", + " pm.expect([200, 503]).to.include(pm.response.code);", + "});", + "", + "pm.test(\"Response contains health status\", function () {", + " var jsonData = pm.response.json();", + " pm.expect(jsonData).to.have.property('status');", + "});" + ], + "type": "text/javascript" + } + } + ] + } + ], + "description": "Endpoints that don't require authentication" + }, + { + "name": "Image Tagging - Single Images", + "item": [ + { + "name": "Tag Uploaded Image", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200\", function () {", + " pm.response.to.have.status(200);", + "});", + "", + "pm.test(\"Response has success flag\", function () {", + " var jsonData = pm.response.json();", + " pm.expect(jsonData).to.have.property('success');", + "});", + "", + "pm.test(\"Response contains tags\", function () {", + " var jsonData = pm.response.json();", + " if (jsonData.success && jsonData.data) {", + " pm.expect(jsonData.data).to.have.property('tags');", + " pm.expect(jsonData.data.tags).to.be.an('array');", + " pm.expect(jsonData.data.tags.length).to.be.above(0);", + " }", + "});", + "", + "pm.test(\"Response time is acceptable\", function () {", + " pm.expect(pm.response.responseTime).to.be.below(10000);", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [ + { + "key": "X-API-Key", + "value": "{{apiKey}}", + "type": "text" + } + ], + "body": { + "mode": "formdata", + "formdata": [ + { + "key": "image", + "type": "file", + "src": [], + "description": "Upload an image file (JPEG, PNG, WebP, HEIC, TIFF, BMP, GIF)" + } + ] + }, + "url": { + "raw": "{{baseUrl}}/api/images/tag", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "tag"] + }, + "description": "Tag a property image by uploading a file. Supports multiple formats and automatically detects duplicates." + }, + "response": [] + }, + { + "name": "Tag Base64 Image", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200\", function () {", + " pm.response.to.have.status(200);", + "});", + "", + "pm.test(\"Response has success flag\", function () {", + " var jsonData = pm.response.json();", + " pm.expect(jsonData).to.have.property('success');", + "});", + "", + "if (pm.response.json().success && pm.response.json().data) {", + " pm.test(\"Response contains tags\", function () {", + " var jsonData = pm.response.json();", + " pm.expect(jsonData.data).to.have.property('tags');", + " pm.expect(jsonData.data.tags).to.be.an('array');", + " });", + "}" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json", + "type": "text" + }, + { + "key": "X-API-Key", + "value": "{{apiKey}}", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"base64Image\": \"\",\n \"mediaType\": \"image/jpeg\",\n \"fileName\": \"sample.jpg\"\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{baseUrl}}/api/images/tag-base64", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "tag-base64"] + }, + "description": "Tag a property image using base64 encoded data. Note: Replace the base64Image value with a real base64 encoded image." + }, + "response": [] + } + ], + "description": "Endpoints for tagging single property images" + }, + { + "name": "Image Tagging - Batch Processing", + "item": [ + { + "name": "Batch Tag Uploaded Images", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200\", function () {", + " pm.response.to.have.status(200);", + "});", + "", + "pm.test(\"Response contains batch results\", function () {", + " var jsonData = pm.response.json();", + " if (jsonData.success && jsonData.data) {", + " pm.expect(jsonData.data).to.have.property('total');", + " pm.expect(jsonData.data).to.have.property('results');", + " pm.expect(jsonData.data.results).to.be.an('array');", + " }", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [ + { + "key": "X-API-Key", + "value": "{{apiKey}}", + "type": "text" + } + ], + "body": { + "mode": "formdata", + "formdata": [ + { + "key": "images", + "type": "file", + "src": [], + "description": "Upload multiple images (up to 50)" + }, + { + "key": "images", + "type": "file", + "src": [], + "description": "Add more images as needed" + } + ] + }, + "url": { + "raw": "{{baseUrl}}/api/images/tag/batch", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "tag", "batch"] + }, + "description": "Tag multiple images in a single batch request. Maximum 50 images per request. All images are processed in parallel." + }, + "response": [] + }, + { + "name": "Batch Tag Base64 Images", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200\", function () {", + " pm.response.to.have.status(200);", + "});", + "", + "pm.test(\"Response contains batch results\", function () {", + " var jsonData = pm.response.json();", + " if (jsonData.success && jsonData.data) {", + " pm.expect(jsonData.data).to.have.property('total');", + " pm.expect(jsonData.data).to.have.property('succeeded');", + " pm.expect(jsonData.data).to.have.property('failed');", + " }", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json", + "type": "text" + }, + { + "key": "X-API-Key", + "value": "{{apiKey}}", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"images\": [\n {\n \"base64Image\": \"\",\n \"mediaType\": \"image/jpeg\",\n \"fileName\": \"image1.jpg\"\n },\n {\n \"base64Image\": \"\",\n \"mediaType\": \"image/jpeg\",\n \"fileName\": \"image2.jpg\"\n }\n ]\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{baseUrl}}/api/images/tag-base64/batch", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "tag-base64", "batch"] + }, + "description": "Tag multiple base64 encoded images in a single batch request. Maximum 50 images per request." + }, + "response": [] + } + ], + "description": "Endpoints for batch processing multiple images" + }, + { + "name": "S3 Folder Endpoint", + "item": [ + { + "name": "Tag S3 Folder - Basic", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200\", function () {", + " pm.response.to.have.status(200);", + "});", + "", + "pm.test(\"Response has success flag\", function () {", + " var jsonData = pm.response.json();", + " pm.expect(jsonData).to.have.property('success');", + "});", + "", + "pm.test(\"Response contains folder processing results\", function () {", + " var jsonData = pm.response.json();", + " if (jsonData.success && jsonData.data) {", + " pm.expect(jsonData.data).to.have.property('parentFolder');", + " pm.expect(jsonData.data).to.have.property('subFolder');", + " pm.expect(jsonData.data).to.have.property('totalImages');", + " pm.expect(jsonData.data).to.have.property('processedImages');", + " pm.expect(jsonData.data).to.have.property('mergedTags');", + " pm.expect(jsonData.data.mergedTags).to.be.an('array');", + " }", + "});", + "", + "pm.test(\"Response contains duplicate statistics\", function () {", + " var jsonData = pm.response.json();", + " if (jsonData.success && jsonData.data) {", + " pm.expect(jsonData.data).to.have.property('databaseDuplicates');", + " pm.expect(jsonData.data).to.have.property('inFolderDuplicates');", + " pm.expect(jsonData.data).to.have.property('newImages');", + " pm.expect(jsonData.data).to.have.property('failedImages');", + " }", + "});", + "", + "pm.test(\"Merged tags have correct structure\", function () {", + " var jsonData = pm.response.json();", + " if (jsonData.success && jsonData.data && jsonData.data.mergedTags) {", + " var tags = jsonData.data.mergedTags;", + " if (tags.length > 0) {", + " pm.expect(tags[0]).to.have.property('category');", + " pm.expect(tags[0]).to.have.property('value');", + " pm.expect(tags[0]).to.have.property('confidence');", + " pm.expect(tags[0]).to.have.property('imageCount');", + " }", + " }", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json", + "type": "text" + }, + { + "key": "X-API-Key", + "value": "{{apiKey}}", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"parentFolder\": \"00Da3000003ZFiQ/\",\n \"subFolder\": \"a0La30000008vSXEAY/\"\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{baseUrl}}/api/images/tag-s3-folder", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "tag-s3-folder"] + }, + "description": "Tag all images from an S3 folder. Parent folder is optional (defaults to 00Da3000003ZFiQ/). Subfolder is required (property folder name)." + }, + "response": [] + }, + { + "name": "Tag S3 Folder - Default Parent", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200\", function () {", + " pm.response.to.have.status(200);", + "});", + "", + "pm.test(\"Uses default parent folder\", function () {", + " var jsonData = pm.response.json();", + " if (jsonData.success && jsonData.data) {", + " pm.expect(jsonData.data.parentFolder).to.equal('00Da3000003ZFiQ/');", + " }", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json", + "type": "text" + }, + { + "key": "X-API-Key", + "value": "{{apiKey}}", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"subFolder\": \"a0La30000008vSXEAY/\"\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{baseUrl}}/api/images/tag-s3-folder", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "tag-s3-folder"] + }, + "description": "Tag S3 folder using default parent folder (00Da3000003ZFiQ/). Only subFolder is required." + }, + "response": [] + }, + { + "name": "Tag S3 Folder - Path Normalization", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200 or 400\", function () {", + " pm.expect([200, 400]).to.include(pm.response.code);", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json", + "type": "text" + }, + { + "key": "X-API-Key", + "value": "{{apiKey}}", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"parentFolder\": \"00Da3000003ZFiQ\",\n \"subFolder\": \"a0La30000008vSXEAY\"\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{baseUrl}}/api/images/tag-s3-folder", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "tag-s3-folder"] + }, + "description": "Test path normalization - folders without trailing slashes should be normalized automatically." + }, + "response": [] + }, + { + "name": "Tag S3 Folder - Empty Folder", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 400 or 404\", function () {", + " pm.expect([400, 404]).to.include(pm.response.code);", + "});", + "", + "pm.test(\"Error message indicates empty folder\", function () {", + " var jsonData = pm.response.json();", + " if (!jsonData.success) {", + " pm.expect(jsonData.message).to.include('empty');", + " }", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json", + "type": "text" + }, + { + "key": "X-API-Key", + "value": "{{apiKey}}", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"subFolder\": \"empty_folder_name/\"\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{baseUrl}}/api/images/tag-s3-folder", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "tag-s3-folder"] + }, + "description": "Test error handling for empty folder (should return 404 or 400 with clear error message)." + }, + "response": [] + }, + { + "name": "Tag S3 Folder - Invalid SubFolder", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 400\", function () {", + " pm.response.to.have.status(400);", + "});", + "", + "pm.test(\"Error message indicates validation error\", function () {", + " var jsonData = pm.response.json();", + " pm.expect(jsonData).to.have.property('success');", + " pm.expect(jsonData.success).to.be.false;", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json", + "type": "text" + }, + { + "key": "X-API-Key", + "value": "{{apiKey}}", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"subFolder\": \"\"\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{baseUrl}}/api/images/tag-s3-folder", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "tag-s3-folder"] + }, + "description": "Test validation - empty subFolder should return 400 error." + }, + "response": [] + }, + { + "name": "Tag S3 Folder - Missing SubFolder", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 400\", function () {", + " pm.response.to.have.status(400);", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json", + "type": "text" + }, + { + "key": "X-API-Key", + "value": "{{apiKey}}", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"parentFolder\": \"00Da3000003ZFiQ/\"\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{baseUrl}}/api/images/tag-s3-folder", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "tag-s3-folder"] + }, + "description": "Test validation - missing subFolder should return 400 error." + }, + "response": [] + }, + { + "name": "Tag S3 Folder - Missing AWS Credentials", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 400\", function () {", + " pm.response.to.have.status(400);", + "});", + "", + "pm.test(\"Error message indicates AWS credentials missing\", function () {", + " var jsonData = pm.response.json();", + " if (!jsonData.success) {", + " pm.expect(jsonData.message.toLowerCase()).to.include('aws');", + " }", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json", + "type": "text" + }, + { + "key": "X-API-Key", + "value": "{{apiKey}}", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"subFolder\": \"a0La30000008vSXEAY/\"\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{baseUrl}}/api/images/tag-s3-folder", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "tag-s3-folder"] + }, + "description": "Test error handling when AWS credentials are not configured (should return 400 with clear error message)." + }, + "response": [] + } + ], + "description": "S3 folder endpoint with various test scenarios including edge cases" + }, + { + "name": "Search & Statistics", + "item": [ + { + "name": "Search by Tag", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200\", function () {", + " pm.response.to.have.status(200);", + "});", + "", + "pm.test(\"Response contains search results\", function () {", + " var jsonData = pm.response.json();", + " pm.expect(jsonData).to.have.property('success');", + " if (jsonData.data) {", + " pm.expect(jsonData.data).to.be.an('array');", + " }", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "GET", + "header": [ + { + "key": "X-API-Key", + "value": "{{apiKey}}", + "type": "text" + } + ], + "url": { + "raw": "{{baseUrl}}/api/images/search?tag=kitchen", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "search"], + "query": [ + { + "key": "tag", + "value": "kitchen", + "description": "Tag value to search for" + } + ] + }, + "description": "Search for images by tag value. Returns all images that have been tagged with the specified tag." + }, + "response": [] + }, + { + "name": "Get Statistics", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200\", function () {", + " pm.response.to.have.status(200);", + "});", + "", + "pm.test(\"Response contains statistics\", function () {", + " var jsonData = pm.response.json();", + " if (jsonData.success && jsonData.data) {", + " pm.expect(jsonData.data).to.have.property('totalImages');", + " pm.expect(jsonData.data).to.have.property('totalTagged');", + " pm.expect(jsonData.data).to.have.property('totalDuplicates');", + " }", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "GET", + "header": [ + { + "key": "X-API-Key", + "value": "{{apiKey}}", + "type": "text" + } + ], + "url": { + "raw": "{{baseUrl}}/api/images/stats", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "stats"] + }, + "description": "Get statistics about tagged images including total images, tagged count, duplicates detected, and average tags per image." + }, + "response": [] + } + ], + "description": "Endpoints for searching and getting statistics" + }, + { + "name": "Authentication Tests", + "item": [ + { + "name": "Missing API Key", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 401\", function () {", + " pm.response.to.have.status(401);", + "});", + "", + "pm.test(\"Error message indicates API key required\", function () {", + " var jsonData = pm.response.json();", + " pm.expect(jsonData.message.toLowerCase()).to.include('api key');", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/images/tag", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "tag"] + }, + "description": "Test authentication - request without API key should return 401" + }, + "response": [] + }, + { + "name": "Invalid API Key", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 403\", function () {", + " pm.response.to.have.status(403);", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [ + { + "key": "X-API-Key", + "value": "invalid_key_here", + "type": "text" + } + ], + "url": { + "raw": "{{baseUrl}}/api/images/tag", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "tag"] + }, + "description": "Test authentication - request with invalid API key should return 403" + }, + "response": [] + }, + { + "name": "Using Authorization Header", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200 or 401\", function () {", + " pm.expect([200, 401]).to.include(pm.response.code);", + "});" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [ + { + "key": "Authorization", + "value": "Bearer {{apiKey}}", + "type": "text" + } + ], + "url": { + "raw": "{{baseUrl}}/api/images/stats", + "host": ["{{baseUrl}}"], + "path": ["api", "images", "stats"] + }, + "description": "Alternative authentication using Authorization: Bearer header instead of X-API-Key" + }, + "response": [] + } + ], + "description": "Tests for authentication scenarios" + } + ], + "event": [ + { + "listen": "prerequest", + "script": { + "type": "text/javascript", + "exec": [ + "" + ] + } + }, + { + "listen": "test", + "script": { + "type": "text/javascript", + "exec": [ + "" + ] + } + } + ], + "variable": [ + { + "key": "baseUrl", + "value": "http://localhost:3000", + "type": "string", + "description": "Base URL for the API" + }, + { + "key": "apiKey", + "value": "your_api_key_here", + "type": "string", + "description": "Replace with your actual API key. Get one using: npm run apikey:create" + } + ] +} + diff --git a/QUICK_START.md b/QUICK_START.md index 16d8647..6376aff 100644 --- a/QUICK_START.md +++ b/QUICK_START.md @@ -152,8 +152,9 @@ curl -X POST http://localhost:3000/api/images/tag \ | GET | `/api/images/stats` | Tagging statistics | | POST | `/api/images/tag` | Tag uploaded image file | | POST | `/api/images/tag-base64` | Tag base64-encoded image | -| POST | `/api/images/tag-batch` | Tag multiple files (up to 10) | -| POST | `/api/images/tag-batch-base64` | Tag multiple base64 images | +| POST | `/api/images/tag/batch` | Tag multiple files (up to 50) | +| POST | `/api/images/tag-base64/batch` | Tag multiple base64 images | +| POST | `/api/images/tag-s3-folder` | Tag all images from S3 folder | | GET | `/api/images/search?tag=value` | Search images by tag | --- @@ -203,6 +204,12 @@ DB_NAME=property_tagging # AI Provider (REQUIRED for tagging) ANTHROPIC_API_KEY= # From console.anthropic.com +# AWS S3 (REQUIRED for S3 folder endpoint) +AWS_ACCESS_KEY_ID= # AWS IAM user access key ID +AWS_SECRET_ACCESS_KEY= # AWS IAM user secret access key +AWS_REGION=us-east-1 # AWS region (default: us-east-1) +AWS_S3_BUCKET= # S3 bucket name (e.g., tso3listingimages) + # Development SKIP_AUTH=true # Skip API key auth (dev only) LOG_LEVEL=info # debug, info, warn, error diff --git a/TESTING_GUIDE.md b/TESTING_GUIDE.md new file mode 100644 index 0000000..287e126 --- /dev/null +++ b/TESTING_GUIDE.md @@ -0,0 +1,241 @@ +# Testing Guide - S3 Folder Endpoint + +## โœ… Implementation Verification + +All components have been implemented and verified: + +- โœ… AWS SDK installed +- โœ… S3Service created with edge case handling +- โœ… TagS3FolderUseCase created with deduplication logic +- โœ… Controller method added +- โœ… Route added +- โœ… Dependency container updated +- โœ… Server.js updated +- โœ… Documentation updated +- โœ… No syntax errors +- โœ… No linter errors + +--- + +## ๐Ÿงช Testing Checklist + +### 1. Unit Testing (Recommended) + +#### S3Service Tests +- [ ] Path normalization (trailing slashes, whitespace) +- [ ] Image file filtering (only image files processed) +- [ ] Hidden file filtering (`.DS_Store` ignored) +- [ ] File size validation (50MB limit) +- [ ] File type validation (magic number validation) +- [ ] S3 pagination (>1000 objects) +- [ ] Error handling (AWS errors, network errors) + +#### TagS3FolderUseCase Tests +- [ ] Database duplicate detection +- [ ] In-folder duplicate detection +- [ ] Tag deduplication (category + value) +- [ ] Case sensitivity handling +- [ ] Whitespace normalization +- [ ] Confidence handling (keep highest) +- [ ] Partial failure handling +- [ ] Empty folder handling + +#### Controller Tests +- [ ] Request validation (Joi schema) +- [ ] Missing AWS credentials handling +- [ ] Response formatting +- [ ] Error handling + +--- + +### 2. Integration Testing + +#### S3 Folder Endpoint Tests +- [ ] **Basic functionality**: Tag images from S3 folder + ```bash + curl -X POST http://localhost:3000/api/images/tag-s3-folder \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your_key" \ + -d '{ + "parentFolder": "00Da3000003ZFiQ/", + "subFolder": "a0La30000008vSXEAY/" + }' + ``` + +- [ ] **Default parent folder**: Omit parentFolder (should use default) + ```bash + curl -X POST http://localhost:3000/api/images/tag-s3-folder \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your_key" \ + -d '{ + "subFolder": "a0La30000008vSXEAY/" + }' + ``` + +- [ ] **Empty folder**: Test with empty folder (should return error) +- [ ] **Non-existent folder**: Test with non-existent folder (should return 404) +- [ ] **Database duplicates**: Upload same image twice (should use cached tags) +- [ ] **In-folder duplicates**: Folder with duplicate images (should process once) +- [ ] **Large folder**: Test with folder containing 100+ images +- [ ] **Mixed results**: Folder with some valid and some invalid images + +--- + +### 3. Regression Testing + +#### Existing Endpoints (Verify they still work) + +- [ ] **Tag single image**: `POST /api/images/tag` +- [ ] **Tag base64 image**: `POST /api/images/tag-base64` +- [ ] **Tag batch images**: `POST /api/images/tag/batch` +- [ ] **Tag batch base64 images**: `POST /api/images/tag-base64/batch` +- [ ] **Search by tag**: `GET /api/images/search?tag=kitchen` +- [ ] **Get statistics**: `GET /api/images/stats` +- [ ] **Health check**: `GET /api/images/health` + +--- + +### 4. Edge Case Testing + +#### Path Normalization +- [ ] Parent folder without trailing slash: `"00Da3000003ZFiQ"` (should add `/`) +- [ ] Parent folder with trailing slash: `"00Da3000003ZFiQ/"` (should work) +- [ ] Extra trailing slashes: `"00Da3000003ZFiQ//"` (should normalize) +- [ ] Whitespace in folder names: `" 00Da3000003ZFiQ/ "` (should trim) +- [ ] Leading slash in subfolder: `"/a0La30000008vSXEAY/"` (should remove) + +#### Tag Deduplication +- [ ] Case sensitivity: `"Kitchen"` vs `"kitchen"` (should deduplicate) +- [ ] Whitespace: `"fully furnished"` vs `"fully furnished"` (should deduplicate) +- [ ] Different categories, same value: `{category: "Room Type", value: "kitchen"}` vs `{category: "Style", value: "kitchen"}` (should NOT deduplicate) +- [ ] Same category + value, different confidence: Should keep highest confidence +- [ ] Missing confidence: Should use default (0.5) +- [ ] Invalid confidence: Should clamp to 0-1 range + +#### Error Scenarios +- [ ] Missing AWS credentials: Should return clear error +- [ ] Invalid AWS credentials: Should return clear error +- [ ] S3 bucket not found: Should return 404 +- [ ] S3 access denied: Should return clear error +- [ ] Network timeout: Should handle gracefully +- [ ] All images fail: Should return clear error message + +--- + +### 5. Performance Testing + +- [ ] Small folder (1-10 images): Should process quickly +- [ ] Medium folder (10-50 images): Should process within reasonable time +- [ ] Large folder (50-100 images): Should process with concurrency limits +- [ ] Very large folder (100+ images): Should handle pagination +- [ ] Memory usage: Monitor memory during large folder processing + +--- + +## ๐Ÿ› Troubleshooting + +### Issue: "S3 folder endpoint is not available" +**Solution**: Check AWS credentials in `.env` file: +- `AWS_ACCESS_KEY_ID` +- `AWS_SECRET_ACCESS_KEY` +- `AWS_S3_BUCKET` +- `AWS_REGION` (optional, defaults to us-east-1) + +### Issue: "Access denied to S3 bucket" +**Solution**: Verify IAM user has permissions: +- `s3:ListBucket` for the bucket +- `s3:GetObject` for objects in the folder + +### Issue: "Folder not found or empty" +**Solution**: +- Verify folder path is correct +- Check folder exists in S3 bucket +- Verify folder contains image files (not just folders) + +### Issue: "All images failed to process" +**Solution**: +- Check logs for specific error messages +- Verify images are valid image files +- Check file sizes (must be <50MB) +- Verify Claude API key is set + +--- + +## ๐Ÿ“Š Expected Results + +### Successful Response +```json +{ + "success": true, + "message": "S3 folder processed successfully: 31 images tagged", + "data": { + "parentFolder": "00Da3000003ZFiQ/", + "subFolder": "a0La30000008vSXEAY/", + "totalImages": 31, + "processedImages": 31, + "databaseDuplicates": 5, + "inFolderDuplicates": 2, + "newImages": 24, + "failedImages": 0, + "mergedTags": [...], + "uniqueTags": 127, + "totalTagsBeforeDedup": 450, + "summaries": [...], + "errors": null + } +} +``` + +### Error Response (Missing Credentials) +```json +{ + "success": false, + "message": "S3 folder endpoint is not available. AWS credentials not configured.", + "timestamp": "2025-11-03T10:30:00.000Z" +} +``` + +### Error Response (Empty Folder) +```json +{ + "success": false, + "message": "No images found in folder: 00Da3000003ZFiQ/a0La30000008vSXEAY/", + "timestamp": "2025-11-03T10:30:00.000Z" +} +``` + +--- + +## โœ… Verification Steps + +1. **Start server**: `npm run dev` +2. **Check health**: `curl http://localhost:3000/api/images/health` +3. **Test S3 endpoint**: Use Postman or curl with valid AWS credentials +4. **Verify duplicates**: Upload same images twice, check for cached results +5. **Verify deduplication**: Check merged tags have no duplicates (category + value) +6. **Check logs**: Review logs for any errors or warnings +7. **Test existing endpoints**: Verify all existing endpoints still work + +--- + +## ๐ŸŽฏ Success Criteria + +- โœ… S3 folder endpoint responds correctly +- โœ… Database duplicates use cached tags +- โœ… In-folder duplicates are handled correctly +- โœ… Tags are deduplicated (category + value, case-insensitive) +- โœ… All edge cases handled gracefully +- โœ… Existing endpoints still work (regression) +- โœ… Error messages are clear and helpful +- โœ… Logs contain useful information + +--- + +## ๐Ÿ“ Notes + +- **Concurrency**: Images are processed 5 at a time to avoid overwhelming the system +- **Memory**: Large folders are processed in batches to avoid memory issues +- **Duplicates**: Both database and in-folder duplicates are detected and handled +- **Deduplication**: Tags are deduplicated based on category + value (case-insensitive, whitespace-normalized) +- **Error Handling**: Partial failures are handled gracefully (some images succeed, some fail) + diff --git a/src/application/useCases/TagS3FolderUseCase.js b/src/application/useCases/TagS3FolderUseCase.js new file mode 100644 index 0000000..094df64 --- /dev/null +++ b/src/application/useCases/TagS3FolderUseCase.js @@ -0,0 +1,470 @@ +const { ValidationError } = require('../../shared/errors/AppError'); +const TagImageRequestDto = require('../dtos/TagImageRequestDto'); +const TagImageResponseDto = require('../dtos/TagImageResponseDto'); +const ImageValidator = require('../../presentation/validators/imageValidator'); +const crypto = require('crypto'); + +/** + * Use case for tagging all images from an S3 folder + */ +class TagS3FolderUseCase { + /** + * @param {S3Service} s3Service - S3 service + * @param {TagImageUseCase} tagImageUseCase - Single image tagging use case + * @param {IImageRepository} imageRepository - Image repository + * @param {Object} logger - Logger instance + */ + constructor(s3Service, tagImageUseCase, imageRepository, logger) { + this.s3Service = s3Service; + this.tagImageUseCase = tagImageUseCase; + this.imageRepository = imageRepository; + this.logger = logger; + } + + /** + * Execute the use case + * @param {string} parentFolder - Parent folder name (default: 00Da3000003ZFiQ/) + * @param {string} subFolder - Subfolder name (property folder) + * @returns {Promise} Merged result with deduplicated tags + */ + async execute(parentFolder, subFolder) { + try { + this._validateInput(parentFolder, subFolder); + + this.logger.info('Starting S3 folder tagging', { parentFolder, subFolder }); + + // List all images from S3 folder + const imageKeys = await this.s3Service.listImages(parentFolder, subFolder); + + if (!imageKeys || imageKeys.length === 0) { + throw new ValidationError(`No images found in folder: ${parentFolder}${subFolder}`); + } + + this.logger.info('Found images in S3 folder', { count: imageKeys.length }); + + // Process all images with duplicate detection + const processedHashes = new Map(); // Track hashes within this batch (in-folder duplicates) + const results = []; + let databaseDuplicates = 0; + let inFolderDuplicates = 0; + let newImages = 0; + let failedImages = 0; + const errors = []; + + // Process images (with concurrency limit to avoid overwhelming the system) + const concurrencyLimit = 5; // Process 5 images at a time + for (let i = 0; i < imageKeys.length; i += concurrencyLimit) { + const batch = imageKeys.slice(i, i + concurrencyLimit); + + const batchResults = await Promise.allSettled( + batch.map(async (imageKey) => { + try { + return await this._processImage( + imageKey, + processedHashes, + parentFolder, + subFolder + ); + } catch (error) { + // Enhanced error logging with error type + const errorType = error.constructor?.name || 'UnknownError'; + const errorCategory = this._categorizeError(error); + + this.logger.error(`Failed to process image: ${imageKey}`, { + error: error.message, + errorType, + errorCategory, + stack: error.stack + }); + + return { + success: false, + imageKey, + error: error.message || 'Failed to process image', + errorType, + errorCategory + }; + } + }) + ); + + // Process batch results + for (const result of batchResults) { + if (result.status === 'fulfilled') { + const imageResult = result.value; + if (imageResult.success) { + results.push(imageResult); + if (imageResult.isDatabaseDuplicate) { + databaseDuplicates++; + } else if (imageResult.isInFolderDuplicate) { + inFolderDuplicates++; + } else { + newImages++; + } + } else { + failedImages++; + errors.push({ + imageKey: imageResult.imageKey, + error: imageResult.error, + errorType: imageResult.errorType || 'UnknownError', + errorCategory: imageResult.errorCategory || 'unknown' + }); + } + } else { + failedImages++; + const errorReason = result.reason || {}; + errors.push({ + imageKey: 'unknown', + error: errorReason.message || 'Unknown error', + errorType: errorReason.constructor?.name || 'UnknownError', + errorCategory: this._categorizeError(errorReason) + }); + } + } + } + + // Validate we have at least some successful results + if (results.length === 0) { + throw new ValidationError('All images failed to process. Check logs for details.'); + } + + // Deduplicate tags from all results + const mergedTags = this._deduplicateTags(results); + + // Collect summaries + const summaries = results + .filter(r => r.data && r.data.summary) + .map(r => r.data.summary) + .filter((summary, index, self) => self.indexOf(summary) === index); // Unique summaries + + // Calculate statistics + const totalTagsBeforeDedup = results.reduce((sum, r) => { + return sum + (r.data?.totalTags || 0); + }, 0); + + this.logger.info('S3 folder tagging completed', { + parentFolder, + subFolder, + totalImages: imageKeys.length, + processedImages: results.length, + databaseDuplicates, + inFolderDuplicates, + newImages, + failedImages, + uniqueTags: mergedTags.length, + totalTagsBeforeDedup + }); + + return { + parentFolder: parentFolder || '00Da3000003ZFiQ/', + subFolder, + totalImages: imageKeys.length, + processedImages: results.length, + databaseDuplicates, + inFolderDuplicates, + newImages, + failedImages, + mergedTags, + uniqueTags: mergedTags.length, + totalTagsBeforeDedup, + summaries: summaries.length > 0 ? summaries : [], + errors: errors.length > 0 ? errors : undefined + }; + } catch (error) { + this.logger.error('TagS3FolderUseCase error', error); + throw error; + } + } + + /** + * Process a single image with duplicate detection + * @param {string} imageKey - S3 object key + * @param {Map} processedHashes - Map of processed hashes (for in-folder duplicates) + * @param {string} parentFolder - Parent folder name + * @param {string} subFolder - Subfolder name + * @returns {Promise} Processing result + * @private + */ + async _processImage(imageKey, processedHashes, parentFolder, subFolder) { + try { + // Download image from S3 + const imageBuffer = await this.s3Service.downloadImage(imageKey); + + // Calculate hash + const imageHash = this._calculateImageHash(imageBuffer); + + // Check 1: In-folder duplicate (same image processed earlier in this batch) + if (processedHashes.has(imageHash)) { + const cachedResult = processedHashes.get(imageHash); + this.logger.debug('In-folder duplicate detected', { imageKey, hash: imageHash }); + return { + success: true, + imageKey, + data: cachedResult.data, + isInFolderDuplicate: true, + isDatabaseDuplicate: cachedResult.isDatabaseDuplicate || false + }; + } + + // Check 2: Database duplicate (image already processed before) + const existingResult = await this.imageRepository.findByImageHash(imageHash); + if (existingResult) { + this.logger.debug('Database duplicate detected', { imageKey, hash: imageHash }); + const responseDto = TagImageResponseDto.fromTaggingResult(existingResult, true); + + // Cache for in-folder duplicates + processedHashes.set(imageHash, { + data: responseDto, + isDatabaseDuplicate: true + }); + + return { + success: true, + imageKey, + data: responseDto, + isDatabaseDuplicate: true, + isInFolderDuplicate: false + }; + } + + // New image - process it + // Validate and optimize image + const file = { + buffer: imageBuffer, + mimetype: 'image/jpeg', // Will be determined correctly + originalname: imageKey.split('/').pop() || 'unknown' + }; + + await ImageValidator.validateUpload(file); + + // Convert format if needed + let optimizedBuffer = imageBuffer; + optimizedBuffer = await ImageValidator.convertToClaudeSupportedFormat( + optimizedBuffer, + file.mimetype + ); + + // Optimize for AI + optimizedBuffer = await ImageValidator.optimizeForAI(optimizedBuffer); + + // Create request DTO + const requestDto = new TagImageRequestDto({ + fileBuffer: optimizedBuffer, + mimeType: 'image/jpeg', + fileName: imageKey.split('/').pop() || 'unknown' + }); + + // Execute tagging + const result = await this.tagImageUseCase.execute(requestDto); + + // Cache for in-folder duplicates + processedHashes.set(imageHash, { + data: result, + isDatabaseDuplicate: false + }); + + return { + success: true, + imageKey, + data: result, + isDatabaseDuplicate: false, + isInFolderDuplicate: false + }; + } catch (error) { + this.logger.error(`Failed to process image: ${imageKey}`, error); + throw error; + } + } + + /** + * Deduplicate tags based on category + value (case-insensitive, whitespace-normalized) + * Keep tag with highest confidence when duplicates found + * @param {Array} results - Array of processing results + * @returns {Array} Deduplicated tags + * @private + */ + _deduplicateTags(results) { + const tagMap = new Map(); + + // Collect all tags from all results + for (const result of results) { + if (!result.data || !result.data.tags || !Array.isArray(result.data.tags)) { + continue; // Skip invalid results + } + + for (const tag of result.data.tags) { + // Validate tag structure + if (!tag || typeof tag !== 'object') { + continue; + } + + const category = tag.category; + const value = tag.value; + const confidence = tag.confidence; + + // Skip invalid tags + if (!category || !value || typeof category !== 'string' || typeof value !== 'string') { + continue; + } + + // Normalize for comparison (case-insensitive, whitespace-normalized) + const normalizedCategory = this._normalizeString(category); + const normalizedValue = this._normalizeString(value); + const uniqueKey = `${normalizedCategory}|||${normalizedValue}`; + + // Validate confidence + let validConfidence = confidence; + if (typeof confidence !== 'number' || isNaN(confidence)) { + validConfidence = 0.5; // Default confidence + } + if (validConfidence < 0) { + validConfidence = 0; + } + if (validConfidence > 1) { + validConfidence = 1; + } + + if (!tagMap.has(uniqueKey)) { + // First occurrence - create entry + tagMap.set(uniqueKey, { + category: category.trim(), // Keep original case but trimmed + value: value.trim(), // Keep original case but trimmed + confidence: validConfidence, + imageCount: 1, + imageIds: [] + }); + } else { + // Duplicate found - merge + const existing = tagMap.get(uniqueKey); + + // Keep higher confidence + if (validConfidence > existing.confidence) { + existing.confidence = validConfidence; + existing.category = category.trim(); // Update category if higher confidence + } + + // Track image count (increment for each unique image) + existing.imageCount++; + } + } + } + + // Convert map to array and sort by confidence (descending) + return Array.from(tagMap.values()) + .sort((a, b) => b.confidence - a.confidence) + .map(tag => ({ + category: tag.category, + value: tag.value, + confidence: Math.round(tag.confidence * 100) / 100, // Round to 2 decimal places + imageCount: tag.imageCount + })); + } + + /** + * Normalize string for comparison (case-insensitive, whitespace-normalized) + * @param {string} str - String to normalize + * @returns {string} Normalized string + * @private + */ + _normalizeString(str) { + if (!str || typeof str !== 'string') { + return ''; + } + + // Trim and normalize whitespace + return str + .trim() + .toLowerCase() + .replace(/\s+/g, ' '); // Replace multiple whitespace with single space + } + + /** + * Calculate SHA256 hash of image buffer + * @param {Buffer} buffer - Image buffer + * @returns {string} SHA256 hash + * @private + */ + _calculateImageHash(buffer) { + return crypto.createHash('sha256').update(buffer).digest('hex'); + } + + /** + * Categorize error for better error handling and reporting + * @param {Error} error - Error object + * @returns {string} Error category + * @private + */ + _categorizeError(error) { + if (!error) return 'unknown'; + + const errorMessage = (error.message || '').toLowerCase(); + const errorName = (error.constructor?.name || '').toLowerCase(); + + // JSON parsing errors (Claude API response issues) + if (errorMessage.includes('json') || errorMessage.includes('parse') || + errorName.includes('syntaxerror') || errorMessage.includes('unexpected token')) { + return 'json_parsing'; + } + + // AI service errors + if (errorMessage.includes('claude') || errorMessage.includes('ai') || + errorName.includes('aiserviceerror')) { + return 'ai_service'; + } + + // S3 errors + if (errorMessage.includes('s3') || errorMessage.includes('aws') || + errorMessage.includes('bucket') || errorName.includes('notfounderror')) { + return 's3_service'; + } + + // Validation errors + if (errorName.includes('validationerror')) { + return 'validation'; + } + + // Network/timeout errors + if (errorMessage.includes('timeout') || errorMessage.includes('network') || + errorMessage.includes('econnrefused') || errorMessage.includes('etimedout')) { + return 'network'; + } + + // Database errors + if (errorMessage.includes('database') || errorMessage.includes('mysql') || + errorMessage.includes('connection')) { + return 'database'; + } + + return 'other'; + } + + /** + * Validate input + * @param {string} parentFolder - Parent folder name + * @param {string} subFolder - Subfolder name + * @private + */ + _validateInput(parentFolder, subFolder) { + if (!subFolder || typeof subFolder !== 'string' || subFolder.trim() === '') { + throw new ValidationError('Subfolder name is required'); + } + + if (parentFolder !== null && parentFolder !== undefined && + (typeof parentFolder !== 'string' || parentFolder.trim() === '')) { + throw new ValidationError('Parent folder must be a valid string or null'); + } + + // Validate folder names don't contain dangerous characters + const dangerousChars = /[<>:"|?*]/; + if (dangerousChars.test(subFolder)) { + throw new ValidationError('Subfolder name contains invalid characters'); + } + + if (parentFolder && dangerousChars.test(parentFolder)) { + throw new ValidationError('Parent folder name contains invalid characters'); + } + } +} + +module.exports = TagS3FolderUseCase; + diff --git a/src/infrastructure/ai/ClaudeAIProvider.js b/src/infrastructure/ai/ClaudeAIProvider.js index d92d5b8..1213cd7 100644 --- a/src/infrastructure/ai/ClaudeAIProvider.js +++ b/src/infrastructure/ai/ClaudeAIProvider.js @@ -112,6 +112,8 @@ class ClaudeAIProvider extends IImageTaggingService { You are an elite real estate property analyst AI with comprehensive expertise in architectural photography, interior design, property valuation, and regional market knowledge (Dubai, Mumbai, Singapore, London, NYC). +CRITICAL: Your response MUST be ONLY valid JSON. Do NOT include any explanatory text, comments, or markdown formatting. Start your response with { and end with }. No exceptions. + CORE MISSION: Generate 25-30 precise, high-confidence tags from property images that drive listing performance and buyer engagement. Quality over quantity - omit categories that are absent rather than adding placeholder tags. ## QUALITY STANDARDS @@ -227,8 +229,15 @@ Examples: - **CRITICAL**: Never add placeholder tags like "not visible" - if category is absent, omit it and maintain tag quality ## OUTPUT FORMAT -Return ONLY valid JSON (no markdown, explanations, or additional text): +CRITICAL REQUIREMENTS: +1. Return ONLY valid JSON - no markdown code blocks, no explanations, no comments +2. Start your response with { and end with } +3. Do NOT include any text before or after the JSON object +4. Do NOT use markdown formatting (no \`\`\`json\`\`\`) +5. Do NOT add explanatory text like "I notice that..." or "Here is the JSON:" +6. Your entire response must be parseable as JSON +Example of CORRECT format (this is exactly what you should return): { "tags": [ {"category": "View", "value": "downtown skyline", "confidence": 0.95}, @@ -270,7 +279,7 @@ FORMATTING RULES: - Confidence: exactly 2 decimal places (0.95, 0.87, 1.0) - Summary: one sentence, 15-25 words, period at end -Ready to analyze. Execute complete framework and return only JSON.`; +FINAL REMINDER: Return ONLY valid JSON. No markdown, no explanations, no additional text. Start with { and end with }.`; } /** @@ -279,21 +288,58 @@ Ready to analyze. Execute complete framework and return only JSON.`; */ _parseResponse(responseText) { try { - // Try to extract JSON from response (may have markdown code blocks) + if (!responseText || typeof responseText !== 'string') { + throw new AIServiceError('Empty or invalid response from Claude API'); + } + + // Try to extract JSON from response (may have markdown code blocks or explanatory text) let jsonText = responseText.trim(); + let jsonFound = false; - // Remove markdown code blocks if present + // Strategy 1: Remove markdown code blocks if present const jsonMatch = jsonText.match(/```(?:json)?\s*(\{[\s\S]*\})\s*```/); - if (jsonMatch) { - jsonText = jsonMatch[1]; + if (jsonMatch && jsonMatch[1]) { + jsonText = jsonMatch[1].trim(); + jsonFound = true; } else { - // Try to find JSON object directly - const jsonObjectMatch = jsonText.match(/\{[\s\S]*\}/); - if (jsonObjectMatch) { - jsonText = jsonObjectMatch[0]; + // Strategy 2: Find JSON object directly (handle text before/after JSON) + // Look for the first { and find the matching closing } + const firstBrace = jsonText.indexOf('{'); + if (firstBrace !== -1) { + // Find the matching closing brace by counting braces + let braceCount = 0; + let lastBrace = -1; + for (let i = firstBrace; i < jsonText.length; i++) { + if (jsonText[i] === '{') { + braceCount++; + } else if (jsonText[i] === '}') { + braceCount--; + if (braceCount === 0) { + lastBrace = i; + break; + } + } + } + + if (lastBrace !== -1) { + jsonText = jsonText.substring(firstBrace, lastBrace + 1); + jsonFound = true; + } } } + // If no JSON found, throw descriptive error + if (!jsonFound || !jsonText.startsWith('{') || !jsonText.endsWith('}')) { + const preview = responseText.substring(0, 200).replace(/\n/g, ' '); + this.logger.error('No valid JSON found in Claude response', { + responsePreview: preview, + responseLength: responseText.length + }); + throw new AIServiceError( + `Claude API did not return valid JSON. Response starts with: "${preview}..."` + ); + } + const parsed = JSON.parse(jsonText); // Validate structure @@ -333,8 +379,21 @@ Ready to analyze. Execute complete framework and return only JSON.`; if (error instanceof AIServiceError) { throw error; } - this.logger.error('Failed to parse Claude response', { response: responseText, error: error.message }); - throw new AIServiceError(`Failed to parse Claude API response: ${error.message}`); + + // Enhanced error logging for debugging + const responsePreview = responseText ? responseText.substring(0, 500).replace(/\n/g, ' ') : 'null'; + this.logger.error('Failed to parse Claude API response', { + error: error.message, + errorType: error.constructor.name, + responsePreview, + responseLength: responseText ? responseText.length : 0, + responseStartsWith: responseText ? responseText.substring(0, 100) : 'null' + }); + + throw new AIServiceError( + `Failed to parse Claude API response: ${error.message}. ` + + `Response preview: "${responsePreview}..."` + ); } } diff --git a/src/infrastructure/aws/S3Service.js b/src/infrastructure/aws/S3Service.js new file mode 100644 index 0000000..972705a --- /dev/null +++ b/src/infrastructure/aws/S3Service.js @@ -0,0 +1,306 @@ +const { S3Client, ListObjectsV2Command, GetObjectCommand } = require('@aws-sdk/client-s3'); +const { ValidationError, NotFoundError } = require('../../shared/errors/AppError'); +const { fileTypeFromBuffer } = require('file-type'); + +/** + * S3 Service - handles S3 operations for image fetching + */ +class S3Service { + /** + * @param {string} accessKeyId - AWS access key ID + * @param {string} secretAccessKey - AWS secret access key + * @param {string} region - AWS region + * @param {string} bucketName - S3 bucket name + * @param {Object} logger - Logger instance + */ + constructor(accessKeyId, secretAccessKey, region, bucketName, logger) { + if (!accessKeyId || !secretAccessKey || !region || !bucketName) { + throw new ValidationError('AWS credentials and bucket name are required'); + } + + this.bucketName = bucketName; + this.logger = logger; + + this.client = new S3Client({ + region, + credentials: { + accessKeyId, + secretAccessKey + } + }); + } + + /** + * Normalize S3 path - handles trailing slashes, whitespace, special characters + * @param {string} path - S3 path + * @returns {string} Normalized path + * @private + */ + _normalizePath(path) { + if (!path || typeof path !== 'string') { + return ''; + } + + // Trim whitespace + let normalized = path.trim(); + + // Remove leading slash + if (normalized.startsWith('/')) { + normalized = normalized.substring(1); + } + + // Ensure trailing slash for folders + if (normalized && !normalized.endsWith('/')) { + normalized += '/'; + } + + // Remove path traversal attempts + normalized = normalized.replace(/\.\./g, ''); + normalized = normalized.replace(/\/+/g, '/'); // Remove multiple slashes + + return normalized; + } + + /** + * Build full S3 path from parent and subfolder + * @param {string} parentFolder - Parent folder name + * @param {string} subFolder - Subfolder name + * @returns {string} Full S3 path + * @private + */ + _buildFullPath(parentFolder, subFolder) { + const normalizedParent = this._normalizePath(parentFolder || '00Da3000003ZFiQ/'); + const normalizedSub = this._normalizePath(subFolder); + + if (!normalizedSub) { + throw new ValidationError('Subfolder name is required'); + } + + // Remove trailing slash from parent if subfolder already has it + const parent = normalizedParent.endsWith('/') + ? normalizedParent.substring(0, normalizedParent.length - 1) + : normalizedParent; + + return `${parent}/${normalizedSub}`; + } + + /** + * Check if file is an image based on extension + * @param {string} key - S3 object key + * @returns {boolean} + * @private + */ + _isImageFile(key) { + if (!key || typeof key !== 'string') { + return false; + } + + const lowerKey = key.toLowerCase(); + const imageExtensions = ['.jpg', '.jpeg', '.png', '.webp', '.gif', '.heic', '.tiff', '.bmp', '.heif']; + + // Check if it's a file (not a folder) + if (key.endsWith('/')) { + return false; + } + + // Check if it's a hidden file + const fileName = lowerKey.split('/').pop(); + if (fileName.startsWith('.')) { + return false; + } + + // Check extension + return imageExtensions.some(ext => lowerKey.endsWith(ext)); + } + + /** + * List all images in S3 folder with pagination support + * @param {string} parentFolder - Parent folder name + * @param {string} subFolder - Subfolder name + * @returns {Promise>} Array of S3 object keys + * @throws {NotFoundError} If folder doesn't exist or is empty + */ + async listImages(parentFolder, subFolder) { + try { + const prefix = this._buildFullPath(parentFolder, subFolder); + + this.logger.info('Listing images from S3', { prefix, bucket: this.bucketName }); + + const imageKeys = []; + let continuationToken = null; + let hasItems = false; + + do { + const command = new ListObjectsV2Command({ + Bucket: this.bucketName, + Prefix: prefix, + ContinuationToken: continuationToken, + MaxKeys: 1000 // S3 limit per request + }); + + const response = await this.client.send(command); + + if (response.Contents && response.Contents.length > 0) { + hasItems = true; + + // Filter for image files only + for (const item of response.Contents) { + if (this._isImageFile(item.Key)) { + imageKeys.push(item.Key); + } + } + } + + continuationToken = response.NextContinuationToken; + } while (continuationToken); + + if (!hasItems) { + throw new NotFoundError(`Folder not found or empty: ${prefix}`); + } + + if (imageKeys.length === 0) { + throw new NotFoundError(`No images found in folder: ${prefix}`); + } + + this.logger.info('Found images in S3 folder', { + count: imageKeys.length, + prefix + }); + + return imageKeys; + } catch (error) { + if (error instanceof NotFoundError || error instanceof ValidationError) { + throw error; + } + + this.logger.error('Failed to list images from S3', { + error: error.message, + parentFolder, + subFolder + }); + + // Handle AWS errors + if (error.name === 'NoSuchBucket') { + throw new NotFoundError(`S3 bucket not found: ${this.bucketName}`); + } + if (error.name === 'AccessDenied') { + throw new ValidationError(`Access denied to S3 bucket: ${this.bucketName}`); + } + + throw new Error(`Failed to list images from S3: ${error.message}`); + } + } + + /** + * Download image from S3 + * @param {string} key - S3 object key + * @returns {Promise} Image buffer + * @throws {NotFoundError} If object doesn't exist + * @throws {ValidationError} If file is too large or invalid + */ + async downloadImage(key) { + try { + if (!key || typeof key !== 'string') { + throw new ValidationError('S3 object key is required'); + } + + this.logger.debug('Downloading image from S3', { key }); + + const command = new GetObjectCommand({ + Bucket: this.bucketName, + Key: key + }); + + const response = await this.client.send(command); + + // Check content length + const contentLength = response.ContentLength || 0; + const MAX_FILE_SIZE = 50 * 1024 * 1024; // 50MB + + if (contentLength > MAX_FILE_SIZE) { + throw new ValidationError(`File size (${contentLength} bytes) exceeds maximum allowed (${MAX_FILE_SIZE} bytes)`); + } + + if (contentLength === 0) { + throw new ValidationError('File is empty'); + } + + // Read stream into buffer + const chunks = []; + for await (const chunk of response.Body) { + chunks.push(chunk); + } + + const buffer = Buffer.concat(chunks); + + // Validate file type + const fileType = await fileTypeFromBuffer(buffer); + if (!fileType) { + throw new ValidationError('Unable to determine file type'); + } + + // Verify it's an image + const imageMimeTypes = [ + 'image/jpeg', + 'image/png', + 'image/webp', + 'image/gif', + 'image/heic', + 'image/tiff', + 'image/bmp', + 'image/heif' + ]; + + if (!imageMimeTypes.includes(fileType.mime.toLowerCase())) { + throw new ValidationError(`File is not an image: ${fileType.mime}`); + } + + this.logger.debug('Image downloaded from S3', { + key, + size: buffer.length, + mimeType: fileType.mime + }); + + return buffer; + } catch (error) { + if (error instanceof NotFoundError || error instanceof ValidationError) { + throw error; + } + + this.logger.error('Failed to download image from S3', { + error: error.message, + key + }); + + // Handle AWS errors + if (error.name === 'NoSuchKey') { + throw new NotFoundError(`Image not found: ${key}`); + } + if (error.name === 'AccessDenied') { + throw new ValidationError(`Access denied to image: ${key}`); + } + + throw new Error(`Failed to download image from S3: ${error.message}`); + } + } + + /** + * Get image metadata (filename, size) + * @param {string} key - S3 object key + * @returns {Object} Metadata object + */ + getImageMetadata(key) { + if (!key || typeof key !== 'string') { + return { fileName: 'unknown', s3Key: key }; + } + + const fileName = key.split('/').pop() || 'unknown'; + return { + fileName, + s3Key: key + }; + } +} + +module.exports = S3Service; + diff --git a/src/infrastructure/config/dependencyContainer.js b/src/infrastructure/config/dependencyContainer.js index f7e142f..86a53be 100644 --- a/src/infrastructure/config/dependencyContainer.js +++ b/src/infrastructure/config/dependencyContainer.js @@ -2,10 +2,12 @@ const mysql = require('mysql2/promise'); const ClaudeAIProvider = require('../ai/ClaudeAIProvider'); const MySQLImageRepository = require('../repositories/MySQLImageRepository'); const ApiKeyRepository = require('../repositories/ApiKeyRepository'); +const S3Service = require('../aws/S3Service'); const TagImageUseCase = require('../../application/useCases/TagImageUseCase'); const TagBase64ImageUseCase = require('../../application/useCases/TagBase64ImageUseCase'); const TagBatchImagesUseCase = require('../../application/useCases/TagBatchImagesUseCase'); const TagBatchBase64ImagesUseCase = require('../../application/useCases/TagBatchBase64ImagesUseCase'); +const TagS3FolderUseCase = require('../../application/useCases/TagS3FolderUseCase'); const logger = require('../../shared/utils/logger'); /** @@ -51,28 +53,50 @@ class DependencyContainer { const aiProvider = new ClaudeAIProvider(process.env.ANTHROPIC_API_KEY, logger); this._services.set('aiProvider', aiProvider); + // S3 Service (optional - only if AWS credentials are provided) + if (process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY && process.env.AWS_S3_BUCKET) { + const s3Service = new S3Service( + process.env.AWS_ACCESS_KEY_ID, + process.env.AWS_SECRET_ACCESS_KEY, + process.env.AWS_REGION || 'us-east-1', + process.env.AWS_S3_BUCKET, + logger + ); + this._services.set('s3Service', s3Service); + } else { + logger.warn('AWS credentials not set. S3 folder endpoint will not work.'); + this._services.set('s3Service', null); + } + // Use Cases const tagImageUseCase = new TagImageUseCase(imageRepository, aiProvider, logger); const tagBase64ImageUseCase = new TagBase64ImageUseCase(imageRepository, aiProvider, logger); const tagBatchImagesUseCase = new TagBatchImagesUseCase(tagImageUseCase, logger); const tagBatchBase64ImagesUseCase = new TagBatchBase64ImagesUseCase(tagBase64ImageUseCase, logger); + + // S3 Folder Use Case (only if S3 service is available) + const s3Service = this._services.get('s3Service'); + let tagS3FolderUseCase = null; + if (s3Service) { + tagS3FolderUseCase = new TagS3FolderUseCase(s3Service, tagImageUseCase, imageRepository, logger); + } this._services.set('tagImageUseCase', tagImageUseCase); this._services.set('tagBase64ImageUseCase', tagBase64ImageUseCase); this._services.set('tagBatchImagesUseCase', tagBatchImagesUseCase); this._services.set('tagBatchBase64ImagesUseCase', tagBatchBase64ImagesUseCase); + this._services.set('tagS3FolderUseCase', tagS3FolderUseCase); } /** * Get service by name * @param {string} serviceName - Service name - * @returns {*} Service instance + * @returns {*} Service instance (may be null for optional services) */ get(serviceName) { - const service = this._services.get(serviceName); - if (!service) { + if (!this._services.has(serviceName)) { throw new Error(`Service '${serviceName}' not found`); } - return service; + return this._services.get(serviceName); // May return null for optional services } /** diff --git a/src/presentation/controllers/ImageTaggingController.js b/src/presentation/controllers/ImageTaggingController.js index db0ae7e..7d4912b 100644 --- a/src/presentation/controllers/ImageTaggingController.js +++ b/src/presentation/controllers/ImageTaggingController.js @@ -13,14 +13,16 @@ class ImageTaggingController { * @param {TagBase64ImageUseCase} tagBase64ImageUseCase - Tag base64 image use case * @param {TagBatchImagesUseCase} tagBatchImagesUseCase - Tag batch images use case * @param {TagBatchBase64ImagesUseCase} tagBatchBase64ImagesUseCase - Tag batch base64 images use case + * @param {TagS3FolderUseCase} tagS3FolderUseCase - Tag S3 folder use case * @param {IImageRepository} imageRepository - Image repository * @param {Object} logger - Logger instance */ - constructor(tagImageUseCase, tagBase64ImageUseCase, tagBatchImagesUseCase, tagBatchBase64ImagesUseCase, imageRepository, logger) { + constructor(tagImageUseCase, tagBase64ImageUseCase, tagBatchImagesUseCase, tagBatchBase64ImagesUseCase, tagS3FolderUseCase, imageRepository, logger) { this.tagImageUseCase = tagImageUseCase; this.tagBase64ImageUseCase = tagBase64ImageUseCase; this.tagBatchImagesUseCase = tagBatchImagesUseCase; this.tagBatchBase64ImagesUseCase = tagBatchBase64ImagesUseCase; + this.tagS3FolderUseCase = tagS3FolderUseCase; this.imageRepository = imageRepository; this.logger = logger; } @@ -268,6 +270,44 @@ class ImageTaggingController { } } + /** + * Tag all images from S3 folder + */ + async tagS3Folder(req, res, next) { + try { + if (!this.tagS3FolderUseCase) { + throw new ValidationError('S3 folder endpoint is not available. AWS credentials not configured.'); + } + + // Validate input + const schema = Joi.object({ + parentFolder: Joi.string().optional().default('00Da3000003ZFiQ/'), + subFolder: Joi.string().required().min(1) + }); + + const { error: validationError, value } = schema.validate(req.body); + if (validationError) { + throw new ValidationError(validationError.details[0].message); + } + + const { parentFolder, subFolder } = value; + + // Execute use case + const result = await this.tagS3FolderUseCase.execute(parentFolder, subFolder); + + // Format response + const message = result.failedImages > 0 + ? `S3 folder processed: ${result.processedImages} succeeded, ${result.failedImages} failed` + : `S3 folder processed successfully: ${result.processedImages} images tagged`; + + res.status(200).json( + ResponseFormatter.success(result, message) + ); + } catch (error) { + next(error); + } + } + /** * Health check */ diff --git a/src/presentation/routes/imageRoutes.js b/src/presentation/routes/imageRoutes.js index 47e432c..48c92f4 100644 --- a/src/presentation/routes/imageRoutes.js +++ b/src/presentation/routes/imageRoutes.js @@ -65,6 +65,13 @@ const createImageRoutes = (controller, authMiddleware) => { (req, res, next) => controller.getStats(req, res, next) ); + // Tag all images from S3 folder (auth required) + router.post( + '/tag-s3-folder', + authMiddleware.authenticate(), + (req, res, next) => controller.tagS3Folder(req, res, next) + ); + return router; }; diff --git a/src/server.js b/src/server.js index 6a81665..31f5c3c 100644 --- a/src/server.js +++ b/src/server.js @@ -41,6 +41,7 @@ const tagImageUseCase = container.get('tagImageUseCase'); const tagBase64ImageUseCase = container.get('tagBase64ImageUseCase'); const tagBatchImagesUseCase = container.get('tagBatchImagesUseCase'); const tagBatchBase64ImagesUseCase = container.get('tagBatchBase64ImagesUseCase'); +const tagS3FolderUseCase = container.get('tagS3FolderUseCase'); // May be null if AWS credentials not set const imageRepository = container.get('imageRepository'); const apiKeyRepository = container.get('apiKeyRepository'); @@ -49,6 +50,7 @@ const imageController = new ImageTaggingController( tagBase64ImageUseCase, tagBatchImagesUseCase, tagBatchBase64ImagesUseCase, + tagS3FolderUseCase, imageRepository, logger );