From de71743b6101dbcada425c34930bd9750cc403c7 Mon Sep 17 00:00:00 2001 From: Pradeep Date: Fri, 17 Oct 2025 10:33:14 +0530 Subject: [PATCH] implemented direct file passes --- analysis_report.pdf | 1 + docker-compose.yml | 23 +- services/ai-analysis-service/001-schema.sql | 9 - .../DOCUMENTATION_INDEX.md | 610 ++++++++ services/ai-analysis-service/FLOW_DIAGRAMS.md | 672 +++++++++ .../INTEGRATION_EXAMPLE.md | 313 ++++ .../ai-analysis-service/QUICK_REFERENCE.md | 485 +++++++ .../README_DOCUMENTATION.md | 426 ++++++ .../SERVICE_COMMUNICATION_ARCHITECTURE.md | 1274 +++++++++++++++++ .../ai-analysis/adv_git_analyzer.py | 2 +- services/ai-analysis-service/ai-analyze.py | 128 +- services/ai-analysis-service/env.example | 7 +- services/ai-analysis-service/requirements.txt | 3 + services/ai-analysis-service/server.py | 459 +++++- .../run-sync-status-migration.sh | 53 + .../023_restore_sync_status_column.sql | 72 + .../024_fix_provider_names_from_urls.sql | 79 + .../src/routes/github-integration.routes.js | 65 +- .../src/routes/github-oauth.js | 5 +- .../git-integration/src/routes/vcs.routes.js | 27 +- .../src/utils/provider-detector.js | 55 + 21 files changed, 4724 insertions(+), 44 deletions(-) create mode 100644 analysis_report.pdf create mode 100644 services/ai-analysis-service/DOCUMENTATION_INDEX.md create mode 100644 services/ai-analysis-service/FLOW_DIAGRAMS.md create mode 100644 services/ai-analysis-service/INTEGRATION_EXAMPLE.md create mode 100644 services/ai-analysis-service/QUICK_REFERENCE.md create mode 100644 services/ai-analysis-service/README_DOCUMENTATION.md create mode 100644 services/ai-analysis-service/SERVICE_COMMUNICATION_ARCHITECTURE.md create mode 100755 services/git-integration/run-sync-status-migration.sh create mode 100644 services/git-integration/src/migrations/023_restore_sync_status_column.sql create mode 100644 services/git-integration/src/migrations/024_fix_provider_names_from_urls.sql create mode 100644 services/git-integration/src/utils/provider-detector.js diff --git a/analysis_report.pdf b/analysis_report.pdf new file mode 100644 index 0000000..9267f1c --- /dev/null +++ b/analysis_report.pdf @@ -0,0 +1 @@ +"%PDF-1.4\n%���� ReportLab Generated PDF document http://www.reportlab.com\n1 0 obj\n<<\n/F1 2 0 R /F2 3 0 R\n>>\nendobj\n2 0 obj\n<<\n/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font\n>>\nendobj\n3 0 obj\n<<\n/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font\n>>\nendobj\n4 0 obj\n<<\n/Contents 10 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 9 0 R /Resources <<\n/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n>> /Rotate 0 /Trans <<\n\n>> \n /Type /Page\n>>\nendobj\n5 0 obj\n<<\n/Contents 11 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 9 0 R /Resources <<\n/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n>> /Rotate 0 /Trans <<\n\n>> \n /Type /Page\n>>\nendobj\n6 0 obj\n<<\n/Contents 12 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 9 0 R /Resources <<\n/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n>> /Rotate 0 /Trans <<\n\n>> \n /Type /Page\n>>\nendobj\n7 0 obj\n<<\n/PageMode /UseNone /Pages 9 0 R /Type /Catalog\n>>\nendobj\n8 0 obj\n<<\n/Author (\\(anonymous\\)) /CreationDate (D:20251016140712+00'00') /Creator (\\(unspecified\\)) /Keywords () /ModDate (D:20251016140712+00'00') /Producer (ReportLab PDF Library - www.reportlab.com) \n /Subject (\\(unspecified\\)) /Title (\\(anonymous\\)) /Trapped /False\n>>\nendobj\n9 0 obj\n<<\n/Count 3 /Kids [ 4 0 R 5 0 R 6 0 R ] /Type /Pages\n>>\nendobj\n10 0 obj\n<<\n/Filter [ /ASCII85Decode /FlateDecode ] /Length 404\n>>\nstream\nGat=f9i&Y\\%#46L'g<*q]/WNRCptPlRKPrBm\\h5l2FP0>7RI\"H38FfJ-4Z69*;nNo*nrtg'\\L03\")r*bG`.(l9FWB_!TkH#-MiO;=/e#)*$ndL1j+lkdYS[$;8UR&Ekm;VB4)WujM43'j?33WQ,q6/\"I*V`C9_O]L!K.p(c=rU.c\"5?mh8-B/]AKLgnXMBlY?\\YoZ_^GW8P']\\jfPqfTt=4U<;c284s`]L$\"dgS(CZFTB%9/OD\".OG6g.<[snu39>;sX\"3dq3;HuQVl/lKQrhW381MQ8nCE7t*:n61ii=AUoLYK_iVXq6Ic_Y!aK5G;L^X+,\"!iZj4=T~>endstream\nendobj\n11 0 obj\n<<\n/Filter [ /ASCII85Decode /FlateDecode ] /Length 262\n>>\nstream\nGaro:bA+pK&4Q?mMS![,Fu?RAX32:,1Vg:08^I)>5NdAP4gBjOR%YY9Lt*Q*0\\Rendstream\nendobj\n12 0 obj\n<<\n/Filter [ /ASCII85Decode /FlateDecode ] /Length 532\n>>\nstream\nGas1\\9iHZu&A@Zcp8FNSP<]0heZmDAl6n61ca7D_nkgiQ#CjE[#VB(s16CVZm<`ZTTg3QnrEc9\"G7feR'I\\TI\"k9qcLo0YQ=e?He9[%GT\\\"\"G($!T`RB6U.gGr%&hO)IP#b$;ql[9Q[)HEo0F+H=<4Rgg-@KnX[4\"?44gfmK:]pl*D`.<)@-\\:1\\taO('MDZJK*`;TjTa^lp)-.GMtCUJmfBGhd[*;k8q@*S,.4mbmlJ75FneX+;)f2H;\\;d005A@8;s'PD_g\"H%Z>!ml\\&n:qOMl(-)/&:$&mn4mGKmd8bJpGrOsV83e('2\\?r\"lNAFPY=)acbVt6%^n%_TjLe`RUu,'g:HSciWMP@3Onr~>endstream\nendobj\nxref\n0 13\n0000000000 65535 f \n0000000073 00000 n \n0000000114 00000 n \n0000000221 00000 n \n0000000333 00000 n \n0000000537 00000 n \n0000000741 00000 n \n0000000945 00000 n \n0000001013 00000 n \n0000001296 00000 n \n0000001367 00000 n \n0000001862 00000 n \n0000002215 00000 n \ntrailer\n<<\n/ID \n[<095680569339e0f12bde5087f4aab50d><095680569339e0f12bde5087f4aab50d>]\n% ReportLab generated PDF document -- digest (http://www.reportlab.com)\n\n/Info 8 0 R\n/Root 7 0 R\n/Size 13\n>>\nstartxref\n2838\n%%EOF\n" \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index f291a94..2caef98 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -268,6 +268,7 @@ services: - DASHBOARD_URL=http://dashboard:8008 - SELF_IMPROVING_GENERATOR_URL=http://self-improving-generator:8007 - AI_MOCKUP_URL=http://ai-mockup-service:8021 + - AI_ANALYSIS_URL=http://ai-analysis-service:8022 - UNISON_URL=http://unison:8010 - TEMPLATE_MANAGER_AI_URL=http://template-manager:8013 volumes: @@ -727,7 +728,7 @@ services: environment: - PORT=8022 - HOST=0.0.0.0 - - ANTHROPIC_API_KEY=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 + - ANTHROPIC_API_KEY=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 - POSTGRES_HOST=postgres - POSTGRES_PORT=5432 - POSTGRES_DB=dev_pipeline @@ -738,9 +739,11 @@ services: - REDIS_PASSWORD=redis_secure_2024 - MONGODB_URL=mongodb://pipeline_admin:mongo_secure_2024@mongodb:27017/ - MONGODB_DB=repo_analyzer - - JWT_ACCESS_SECRET=access-secret-key-2024-tech4biz-secure_pipeline_2024 - - USER_AUTH_SERVICE_URL=http://user-auth:8011 - - PYTHONUNBUFFERED=1 + - GIT_INTEGRATION_SERVICE_URL=http://git-integration:8012 + - CLAUDE_REQUESTS_PER_MINUTE=90 + - MAX_FILES_DEFAULT=100 + - CACHE_TTL_SECONDS=86400 + - CONTENT_MAX_TOKENS=8000 volumes: - ai_analysis_logs:/app/logs - ai_analysis_reports:/app/reports @@ -748,14 +751,10 @@ services: networks: - pipeline_network depends_on: - postgres: - condition: service_healthy - redis: - condition: service_healthy - mongodb: - condition: service_started - migrations: - condition: service_completed_successfully + - postgres + - redis + - mongodb + - git-integration healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8022/health"] interval: 30s diff --git a/services/ai-analysis-service/001-schema.sql b/services/ai-analysis-service/001-schema.sql index a775c8c..82c1ec9 100644 --- a/services/ai-analysis-service/001-schema.sql +++ b/services/ai-analysis-service/001-schema.sql @@ -432,20 +432,11 @@ $$; -- Grant permissions GRANT SELECT ON ALL TABLES IN SCHEMA public TO repo_analyzer_read; -GRANT SELECT ON high_confidence_knowledge TO repo_analyzer_read; -GRANT SELECT ON repository_quality_summary TO repo_analyzer_read; -GRANT SELECT ON recent_activity TO repo_analyzer_read; GRANT SELECT, INSERT, UPDATE ON ALL TABLES IN SCHEMA public TO repo_analyzer_write; -GRANT SELECT ON high_confidence_knowledge TO repo_analyzer_write; -GRANT SELECT ON repository_quality_summary TO repo_analyzer_write; -GRANT SELECT ON recent_activity TO repo_analyzer_write; GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO repo_analyzer_write; GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO repo_analyzer_admin; -GRANT ALL PRIVILEGES ON high_confidence_knowledge TO repo_analyzer_admin; -GRANT ALL PRIVILEGES ON repository_quality_summary TO repo_analyzer_admin; -GRANT ALL PRIVILEGES ON recent_activity TO repo_analyzer_admin; GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO repo_analyzer_admin; GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO repo_analyzer_admin; diff --git a/services/ai-analysis-service/DOCUMENTATION_INDEX.md b/services/ai-analysis-service/DOCUMENTATION_INDEX.md new file mode 100644 index 0000000..1a17308 --- /dev/null +++ b/services/ai-analysis-service/DOCUMENTATION_INDEX.md @@ -0,0 +1,610 @@ +# AI Analysis Service - Documentation Index + +Welcome to the AI Analysis Service documentation. This service analyzes code repositories using Claude AI and integrates with the Git Integration Service and API Gateway. + +--- + +## Quick Navigation + +### Getting Started +- [Quick Reference Guide](./QUICK_REFERENCE.md) - Fast commands and common operations +- [Architecture Overview](#architecture-overview-below) +- [Environment Setup](#environment-setup-below) + +### In-Depth Documentation +- [Complete Architecture Guide](./SERVICE_COMMUNICATION_ARCHITECTURE.md) - Comprehensive documentation +- [Flow Diagrams](./FLOW_DIAGRAMS.md) - Visual representations of data flow +- [Integration Examples](./INTEGRATION_EXAMPLE.md) - Code examples and usage patterns + +### Technical Reference +- [API Documentation](#api-endpoints-below) +- [Service Configuration](#configuration-below) +- [Troubleshooting Guide](#troubleshooting-below) + +--- + +## Architecture Overview + +### System Components + +``` +┌──────────┐ ┌──────────────┐ ┌────────────────┐ ┌─────────────┐ +│ Frontend │────▶│ API Gateway │────▶│ AI Analysis │◀───▶│ Git │ +│ (Next.js)│ │ (Express.js) │ │ (FastAPI) │ │ Integration │ +│ :3000 │ │ :8000 │ │ :8022 │ │ :8012 │ +└──────────┘ └──────────────┘ └───────┬────────┘ └─────┬───────┘ + │ │ + ▼ ▼ + ┌─────────┐ ┌──────────┐ + │ Redis │ │PostgreSQL│ + │ :6379 │ │ :5432 │ + └─────────┘ └──────────┘ +``` + +### Key Features + +1. **AI-Powered Analysis**: Uses Claude API for intelligent code review +2. **Rate Limiting**: Manages Claude API limits (90 requests/minute) +3. **Smart Caching**: Redis-based caching reduces API calls by 60-70% +4. **Content Optimization**: Intelligently truncates large files +5. **Report Generation**: Creates PDF and JSON reports +6. **Multi-Service Integration**: Seamless communication between services + +--- + +## Environment Setup + +### Prerequisites + +- Docker & Docker Compose +- Node.js 18+ (for local development) +- Python 3.11+ (for local development) +- Anthropic API Key +- GitHub OAuth credentials + +### Installation + +```bash +# 1. Clone repository +git clone https://github.com/your-org/codenuk.git +cd codenuk + +# 2. Set up environment variables +cp backend/codenuk_backend_mine/services/ai-analysis-service/.env.example \ + backend/codenuk_backend_mine/services/ai-analysis-service/.env + +# 3. Configure .env files +# Edit .env files with your API keys and credentials + +# 4. Start services +docker-compose up -d + +# 5. Verify services +curl http://localhost:8000/health +curl http://localhost:8022/health +curl http://localhost:8012/health +``` + +### Environment Variables + +#### AI Analysis Service +```bash +ANTHROPIC_API_KEY=sk-ant-api03-... +GIT_INTEGRATION_SERVICE_URL=http://git-integration:8012 +REDIS_HOST=redis +REDIS_PORT=6379 +PORT=8022 +``` + +#### API Gateway +```bash +AI_ANALYSIS_URL=http://localhost:8022 +GIT_INTEGRATION_URL=http://localhost:8012 +PORT=8000 +``` + +#### Git Integration +```bash +GITHUB_CLIENT_ID=your_client_id +GITHUB_CLIENT_SECRET=your_client_secret +PUBLIC_BASE_URL=https://backend.codenuk.com +POSTGRES_HOST=postgres +PORT=8012 +``` + +--- + +## API Endpoints + +### AI Analysis Service + +#### Analyze Repository +```http +POST /analyze-repository +Content-Type: application/json + +{ + "repository_id": "uuid", + "user_id": "user-uuid", + "output_format": "pdf", + "max_files": 100 +} +``` + +**Response:** +```json +{ + "success": true, + "analysis_id": "repo_analysis_uuid_timestamp", + "report_path": "/app/reports/..._analysis.pdf", + "stats": { + "total_files": 85, + "code_quality_score": 7.8, + "total_issues": 23 + } +} +``` + +#### Get Repository Info +```http +GET /repository/{id}/info?user_id={userId} +``` + +#### Download Report +```http +GET /reports/{filename} +``` + +#### Health Check +```http +GET /health +``` + +### Via API Gateway + +All endpoints are accessible through the API Gateway: + +``` +Direct: http://localhost:8022/analyze-repository +Via Gateway: http://localhost:8000/api/ai-analysis/analyze-repository +``` + +--- + +## Configuration + +### Service Ports + +| Service | Port | Protocol | +|---------|------|----------| +| Frontend | 3000 | HTTP | +| API Gateway | 8000 | HTTP | +| AI Analysis | 8022 | HTTP | +| Git Integration | 8012 | HTTP | +| PostgreSQL | 5432 | TCP | +| Redis | 6379 | TCP | + +### Rate Limiting + +- **Claude API**: 90 requests per minute (configurable) +- **Sliding Window**: Tracks requests over 60-second window +- **Automatic Waiting**: Delays requests to prevent rate limit violations + +### Caching + +- **Storage**: Redis +- **TTL**: 24 hours (configurable) +- **Key Format**: `analysis:{file_hash}` +- **Hash Algorithm**: SHA-256 + +### Content Optimization + +- **Threshold**: 8000 tokens (~32KB) +- **Strategy**: Extract imports, functions, classes +- **Truncation**: Intelligent context preservation + +--- + +## Communication Flow + +### 1. Repository Analysis Request + +``` +Frontend → API Gateway → AI Analysis → Git Integration +``` + +1. User clicks "Analyze Repository" in frontend +2. Frontend sends POST request to API Gateway +3. Gateway forwards to AI Analysis Service +4. AI Analysis requests repository info from Git Integration +5. Git Integration returns file tree and metadata +6. AI Analysis processes each file: + - Check Redis cache + - Apply rate limiting + - Optimize content + - Send to Claude API + - Cache result +7. Generate repository-level analysis +8. Create PDF/JSON report +9. Return results through Gateway to Frontend + +### 2. File Content Retrieval + +``` +AI Analysis → Git Integration → File System +``` + +1. AI Analysis requests file content +2. Git Integration resolves file path (case-insensitive) +3. Reads content from local storage +4. Returns content + metadata + +### 3. OAuth Authentication + +``` +Frontend → API Gateway → Git Integration → GitHub → Git Integration → Frontend +``` + +1. User attempts to access private repository +2. Git Integration detects authentication requirement +3. Returns OAuth URL +4. Frontend redirects to GitHub OAuth +5. User approves access +6. GitHub redirects back with code +7. Git Integration exchanges code for token +8. Token stored in PostgreSQL +9. User can now access private repository + +--- + +## Troubleshooting + +### Common Issues + +#### Service Connection Failed + +**Symptoms**: "Failed to get repository info" error + +**Solution**: +```bash +# Check service status +docker ps | grep git-integration + +# Check network connectivity +docker network inspect backend-network + +# Restart service +docker-compose restart git-integration +``` + +#### Rate Limit Exceeded + +**Symptoms**: Analysis fails with rate limit error + +**Solution**: +```bash +# Option 1: Reduce max_files +{ + "max_files": 50 # Instead of 100 +} + +# Option 2: Lower rate limit +CLAUDE_REQUESTS_PER_MINUTE=50 # In .env +docker-compose restart ai-analysis +``` + +#### Redis Connection Failed + +**Symptoms**: Warning about Redis connection + +**Solution**: +```bash +# Check Redis status +docker exec redis redis-cli ping + +# Expected: PONG + +# If fails, restart Redis +docker-compose restart redis +``` + +#### Authentication Errors + +**Symptoms**: 401 Unauthorized for private repos + +**Solution**: +- Verify GitHub OAuth credentials +- Check if user has completed OAuth flow +- Verify token is stored in database + +--- + +## Performance Optimization + +### Analysis Speed + +| Configuration | Time for 100 Files | API Calls | +|--------------|-------------------|-----------| +| No optimization | 50-90 minutes | 100 | +| With caching (60% hit) | 20-35 minutes | 40 | +| With rate limiting | 2-4 minutes slower | Same | +| With content optimization | Same | 70% smaller payloads | + +### Best Practices + +1. **Use Caching**: Enable Redis for repeated analyses +2. **Optimize Content**: Keep 8000 token threshold +3. **Respect Rate Limits**: Don't increase beyond Claude limits +4. **Batch Processing**: Analyze during off-peak hours +5. **Monitor Resources**: Watch CPU, memory, and network usage + +--- + +## Security Considerations + +### API Keys + +- Store in environment variables only +- Never commit to version control +- Rotate regularly +- Use different keys for dev/prod + +### OAuth Tokens + +- Encrypted at rest in PostgreSQL +- Secure transmission (HTTPS in production) +- Automatic expiration handling +- User-specific token isolation + +### Network Security + +- Internal Docker network for service communication +- API Gateway as single entry point +- CORS configuration for frontend +- Rate limiting to prevent abuse + +--- + +## Monitoring and Logging + +### Log Locations + +```bash +# AI Analysis Service +docker logs ai-analysis -f + +# API Gateway +docker logs api-gateway -f + +# Git Integration +docker logs git-integration -f +``` + +### Key Metrics + +- **Analysis Success Rate**: Track successful vs failed analyses +- **Cache Hit Rate**: Monitor Redis cache effectiveness +- **API Response Times**: Track latency for each service +- **Rate Limit Usage**: Monitor Claude API usage + +### Health Checks + +```bash +# All services +curl http://localhost:8000/health +curl http://localhost:8022/health +curl http://localhost:8012/health + +# Database +docker exec postgres pg_isready + +# Cache +docker exec redis redis-cli ping +``` + +--- + +## Development + +### Local Development Setup + +```bash +# AI Analysis Service +cd services/ai-analysis-service +python3 -m venv venv +source venv/bin/activate +pip install -r requirements.txt +python server.py + +# API Gateway +cd services/api-gateway +npm install +npm run dev + +# Git Integration +cd services/git-integration +npm install +npm run dev + +# Frontend +cd fronend/codenuk_frontend_mine +npm install +npm run dev +``` + +### Testing + +```bash +# Test AI Analysis directly +curl -X POST http://localhost:8022/analyze-repository \ + -H "Content-Type: application/json" \ + -d '{"repository_id": "test", "user_id": "test", "output_format": "json", "max_files": 5}' + +# Test through Gateway +curl -X POST http://localhost:8000/api/ai-analysis/analyze-repository \ + -H "Content-Type: application/json" \ + -d '{"repository_id": "test", "user_id": "test", "output_format": "json", "max_files": 5}' +``` + +### Debugging + +```bash +# Enable debug mode +export DEBUG=* +export LOG_LEVEL=debug +export PYTHONUNBUFFERED=1 + +# Watch logs in real-time +docker-compose logs -f ai-analysis | grep "ERROR" + +# Inspect container +docker exec -it ai-analysis bash +``` + +--- + +## Deployment + +### Production Checklist + +- [ ] Set secure environment variables +- [ ] Configure HTTPS +- [ ] Set up SSL certificates +- [ ] Enable production logging +- [ ] Configure monitoring (Prometheus, Grafana) +- [ ] Set up backup strategy +- [ ] Configure auto-scaling (if needed) +- [ ] Test failover scenarios +- [ ] Document recovery procedures +- [ ] Set up alerts + +### Docker Compose Production + +```yaml +services: + ai-analysis: + image: codenuk/ai-analysis:latest + restart: always + environment: + - NODE_ENV=production + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8022/health"] + interval: 30s + timeout: 10s + retries: 3 + deploy: + replicas: 2 + resources: + limits: + cpus: '2' + memory: 4G +``` + +--- + +## Additional Resources + +### Documentation Files + +1. **[SERVICE_COMMUNICATION_ARCHITECTURE.md](./SERVICE_COMMUNICATION_ARCHITECTURE.md)** + - Complete architecture documentation + - Detailed service descriptions + - Request/response examples + - Error handling strategies + - Deployment configuration + +2. **[QUICK_REFERENCE.md](./QUICK_REFERENCE.md)** + - Quick start commands + - Common API calls + - Troubleshooting commands + - Performance tuning tips + - Development shortcuts + +3. **[FLOW_DIAGRAMS.md](./FLOW_DIAGRAMS.md)** + - Visual request flow + - Service communication diagrams + - Data flow illustrations + - Authentication flow + - Error handling flow + - Caching strategy + +4. **[INTEGRATION_EXAMPLE.md](./INTEGRATION_EXAMPLE.md)** + - Frontend integration code + - API usage examples + - React hooks + - Error handling patterns + +5. **[README.md](./README.md)** + - Service overview + - Installation instructions + - Basic usage + - API reference + +### External Links + +- [Anthropic Claude API Documentation](https://docs.anthropic.com/) +- [FastAPI Documentation](https://fastapi.tiangolo.com/) +- [Express.js Documentation](https://expressjs.com/) +- [Docker Compose Documentation](https://docs.docker.com/compose/) +- [Redis Documentation](https://redis.io/docs/) +- [PostgreSQL Documentation](https://www.postgresql.org/docs/) + +--- + +## Support + +### Getting Help + +1. Check the troubleshooting guide +2. Review service logs +3. Test endpoints individually +4. Verify environment variables +5. Check Docker network connectivity + +### Common Questions + +**Q: How long does analysis take?** +A: Typically 2-4 minutes for 100 files with caching, 30-60 minutes without. + +**Q: Can I analyze private repositories?** +A: Yes, users need to authenticate via GitHub OAuth. + +**Q: What happens if Claude API is down?** +A: Service will return appropriate errors. Cached results still work. + +**Q: How much does it cost?** +A: Depends on Claude API usage. Caching reduces costs by 60-70%. + +**Q: Can I increase max_files beyond 100?** +A: Yes, but consider rate limits and timeout settings. + +--- + +## Version History + +- **v1.0.0** (December 2024) + - Initial release + - Claude AI integration + - Redis caching + - Rate limiting + - Content optimization + - Multi-service architecture + +--- + +## Contributing + +For contributions or improvements to this documentation: +1. Ensure accuracy by testing commands +2. Follow existing format and style +3. Update version history +4. Add examples where helpful +5. Keep diagrams up to date + +--- + +**Last Updated**: December 2024 +**Version**: 1.0.0 +**Maintained By**: CodeNuk Team + diff --git a/services/ai-analysis-service/FLOW_DIAGRAMS.md b/services/ai-analysis-service/FLOW_DIAGRAMS.md new file mode 100644 index 0000000..27b6bc7 --- /dev/null +++ b/services/ai-analysis-service/FLOW_DIAGRAMS.md @@ -0,0 +1,672 @@ +# AI Analysis Service - Flow Diagrams + +## 1. Complete Request Flow + +``` +┌───────────────────────────────────────────────────────────────────────┐ +│ USER INTERFACE │ +│ (Browser/Next.js Frontend) │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ 1. User clicks "Analyze Repository" + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ FRONTEND APPLICATION │ +│ │ +│ const { startAnalysis } = useAIAnalysis() │ +│ await startAnalysis(repositoryId, userId, options) │ +│ │ +│ POST /api/ai-analysis/analyze-repository │ +│ { │ +│ "repository_id": "uuid", │ +│ "user_id": "user-uuid", │ +│ "output_format": "pdf", │ +│ "max_files": 100 │ +│ } │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ HTTP POST + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ API GATEWAY │ +│ (Express.js - Port 8000) │ +│ │ +│ Route: /api/ai-analysis/* │ +│ - Validate request │ +│ - Add headers (X-User-ID) │ +│ - Set timeout: 240 seconds │ +│ - Proxy to AI Analysis Service │ +│ │ +│ Target: http://ai-analysis:8022/analyze-repository │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ HTTP POST (Internal Network) + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ AI ANALYSIS SERVICE │ +│ (FastAPI - Port 8022) │ +│ │ +│ Endpoint: POST /analyze-repository │ +│ 1. Validate request parameters │ +│ 2. Generate analysis_id │ +│ 3. Create temp directory │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ 4. Get repository info + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ GET Repository Information from Git Integration │ +│ │ +│ GET http://git-integration:8012/api/github/repository/{id}/ui-view │ +│ Headers: { x-user-id: userId } │ +│ Query: { view_type: "tree" } │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ GIT INTEGRATION SERVICE │ +│ (Express.js - Port 8012) │ +│ │ +│ Route: /api/github/repository/:id/ui-view │ +│ 1. Query PostgreSQL for repository metadata │ +│ 2. Build file tree from repository_files table │ +│ 3. Return repository info + file tree │ +│ │ +│ Response: │ +│ { │ +│ "success": true, │ +│ "data": { │ +│ "repository_info": { id, name, owner, local_path }, │ +│ "ui_data": { │ +│ "left_panel": { │ +│ "file_tree": [ { type, path, name }, ... ] │ +│ } │ +│ } │ +│ } │ +│ } │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ Return repository data + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ AI ANALYSIS SERVICE │ +│ │ +│ 5. Extract file list from tree │ +│ 6. For each file (up to max_files): │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ For each file + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ FILE ANALYSIS LOOP │ +│ │ +│ For file in files_to_analyze: │ +│ a. Get file content from Git Integration │ +│ b. Generate file hash (SHA-256) │ +│ c. Check Redis cache │ +│ d. If cache miss: │ +│ - Wait for rate limiter │ +│ - Optimize content (truncate if needed) │ +│ - Send to Claude API │ +│ - Parse response │ +│ - Cache result in Redis │ +│ e. Add to results │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ Get File Content (for each file) │ +│ │ +│ GET http://git-integration:8012/api/github/repository/{id}/ │ +│ file-content?file_path={path} │ +│ Headers: { x-user-id: userId } │ +│ │ +│ ┌─────────────────────────────────────────┐ │ +│ │ GIT INTEGRATION SERVICE │ │ +│ │ │ │ +│ │ 1. Resolve file path (case-insensitive)│ │ +│ │ 2. Read from local storage │ │ +│ │ 3. Return file content + metadata │ │ +│ └─────────────────────────────────────────┘ │ +│ │ +│ Response: │ +│ { │ +│ "success": true, │ +│ "content": "file content...", │ +│ "file_info": { │ +│ "filename": "index.ts", │ +│ "is_binary": false, │ +│ "language_detected": "typescript" │ +│ } │ +│ } │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ ANALYSIS PROCESSING │ +│ │ +│ ┌─────────────────────────────────────────────────┐ │ +│ │ 1. Check Redis Cache │ │ +│ │ Key: analysis:{file_hash} │ │ +│ │ TTL: 24 hours │ │ +│ │ │ │ +│ │ Cache Hit: Return cached result ────────────┼──────┐ │ +│ │ Cache Miss: Continue ──────────────────────┐│ │ │ +│ └───────────────────────────────────────────────┘│ │ │ +│ │ │ │ +│ ┌─────────────────────────────────────────────────┘ │ │ +│ │ 2. Rate Limiter │ │ +│ │ - Check requests in last 60 seconds │ │ +│ │ - If >= 90 requests: wait │ │ +│ │ - Add timestamp to requests array │ │ +│ └─────────────────────────────────────────────────┐ │ │ +│ │ │ │ +│ ┌──────────────────────────────────────────────────┘ │ │ +│ │ 3. Content Optimizer │ │ +│ │ - Check file size (token estimate) │ │ +│ │ - If > 8000 tokens: │ │ +│ │ * Extract imports, functions, classes │ │ +│ │ * Truncate with context preservation │ │ +│ └─────────────────────────────────────────────────┐ │ │ +│ │ │ │ +│ ┌──────────────────────────────────────────────────┘ │ │ +│ │ 4. Claude API Call │ │ +│ │ POST https://api.anthropic.com/v1/messages │ │ +│ │ { │ │ +│ │ "model": "claude-3-opus-20240229", │ │ +│ │ "max_tokens": 4096, │ │ +│ │ "messages": [{ │ │ +│ │ "role": "user", │ │ +│ │ "content": "Analyze: {optimized_content}" │ │ +│ │ }] │ │ +│ │ } │ │ +│ └─────────────────────────────────────────────────┐ │ │ +│ │ │ │ +│ ┌──────────────────────────────────────────────────┘ │ │ +│ │ 5. Parse Response & Cache │ │ +│ │ - Extract code quality score │ │ +│ │ - Extract issues found │ │ +│ │ - Extract recommendations │ │ +│ │ - Store in Redis cache │ │ +│ └─────────────────────────────────────────────────┐ │ │ +│ │ │ │ +│ ┌──────────────────────────────────────────────────┘ │ │ +│ │ Result │ │ +│ │ { │ │ +│ │ "path": "src/index.ts", ◄────────────────────────────────────┤ +│ │ "language": "typescript", │ │ +│ │ "severity_score": 8.5, │ │ +│ │ "issues_found": [...], │ │ +│ │ "recommendations": [...] │ │ +│ │ } │ │ +│ └──────────────────────────────────────────────────────── │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ All files analyzed + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ REPOSITORY-LEVEL ANALYSIS │ +│ │ +│ 7. Aggregate file analyses │ +│ 8. Analyze repository architecture │ +│ 9. Security assessment │ +│ 10. Generate executive summary │ +│ │ +│ Result: │ +│ { │ +│ "code_quality_score": 7.8, │ +│ "total_files": 85, │ +│ "total_lines": 15420, │ +│ "languages": ["typescript", "javascript"], │ +│ "architecture_assessment": "...", │ +│ "security_assessment": "...", │ +│ "file_analyses": [...] │ +│ } │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ REPORT GENERATION │ +│ │ +│ 11. Generate PDF Report │ +│ - Title page │ +│ - Executive summary │ +│ - Repository overview │ +│ - Language breakdown │ +│ - Quality metrics │ +│ - File-by-file analysis │ +│ - Security findings │ +│ - Recommendations │ +│ │ +│ 12. Save to /app/reports/ │ +│ Filename: repo_analysis_{id}_{timestamp}_analysis.pdf │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ AI ANALYSIS SERVICE │ +│ │ +│ 13. Build response │ +│ 14. Cleanup temp directory │ +│ 15. Return result │ +│ │ +│ Response: │ +│ { │ +│ "success": true, │ +│ "message": "Analysis completed successfully", │ +│ "analysis_id": "repo_analysis_uuid_20241216_143022", │ +│ "report_path": "/app/reports/..._analysis.pdf", │ +│ "stats": { │ +│ "total_files": 85, │ +│ "code_quality_score": 7.8, │ +│ "high_quality_files": 45, │ +│ "medium_quality_files": 30, │ +│ "low_quality_files": 10, │ +│ "total_issues": 23 │ +│ } │ +│ } │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ Return response + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ API GATEWAY │ +│ │ +│ 16. Receive response from AI Analysis │ +│ 17. Add gateway headers │ +│ 18. Forward to frontend │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ HTTP Response + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ FRONTEND APPLICATION │ +│ │ +│ 19. Receive response │ +│ 20. Update UI with results │ +│ 21. Display quality metrics │ +│ 22. Show download link for report │ +│ │ +│ setResult(analysisData) │ +│ setIsAnalyzing(false) │ +└──────────────────────────────┬────────────────────────────────────────┘ + │ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────┐ +│ USER INTERFACE │ +│ │ +│ Display: │ +│ ✓ Analysis Complete │ +│ ✓ Code Quality Score: 7.8/10 │ +│ ✓ Total Files: 85 │ +│ ✓ Total Issues: 23 │ +│ ✓ [Download Report] button │ +└───────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 2. Service Communication Diagram + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ │ +│ CODENUK MICROSERVICES │ +│ │ +│ ┌────────────────┐ │ +│ │ FRONTEND │ │ +│ │ (Next.js) │ │ +│ │ Port: 3000 │ │ +│ └────────┬───────┘ │ +│ │ │ +│ │ All requests go through Gateway │ +│ ▼ │ +│ ┌────────────────┐ │ +│ │ API GATEWAY │◄──────────── Entry Point │ +│ │ (Express.js) │ │ +│ │ Port: 8000 │ │ +│ └────┬───┬───┬───┘ │ +│ │ │ │ │ +│ │ │ └──────────────────────┐ │ +│ │ │ │ │ +│ │ └───────────┐ │ │ +│ │ │ │ │ +│ ▼ ▼ ▼ │ +│ ┌─────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ AI │ │ GIT │ │ OTHER │ │ +│ │ ANALYSIS│◄──┤INTEGRATION │ SERVICES │ │ +│ │ │ │ │ │ │ │ +│ │Port 8022│ │Port 8012 │ │ │ │ +│ └────┬────┘ └────┬─────┘ └──────────┘ │ +│ │ │ │ +│ │ │ │ +│ ▼ ▼ │ +│ ┌─────────┐ ┌──────────┐ │ +│ │ Redis │ │PostgreSQL│ │ +│ │ Cache │ │ Database │ │ +│ │Port 6379│ │Port 5432 │ │ +│ └─────────┘ └──────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 3. Data Flow Diagram + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ DATA FLOW │ +│ │ +│ 1. Repository Metadata │ +│ ┌──────────────┐ │ +│ │ PostgreSQL │ │ +│ │ Database │ │ +│ └──────┬───────┘ │ +│ │ │ +│ │ Stores: │ +│ │ - Repository info (URL, branch, owner) │ +│ │ - File metadata (paths, sizes, types) │ +│ │ - Directory structure │ +│ │ - OAuth tokens │ +│ │ - Commit history │ +│ ▼ │ +│ ┌──────────────────┐ │ +│ │ Git Integration │ │ +│ │ Service │ │ +│ └──────────────────┘ │ +│ │ +│ 2. File Content │ +│ ┌──────────────┐ │ +│ │ File System │ │ +│ │ /app/git-repos │ +│ └──────┬───────┘ │ +│ │ │ +│ │ Stores: │ +│ │ - Cloned repositories │ +│ │ - Actual file content │ +│ │ - Git history │ +│ ▼ │ +│ ┌──────────────────┐ │ +│ │ Git Integration │ │ +│ │ Service │ │ +│ └──────────────────┘ │ +│ │ +│ 3. Analysis Cache │ +│ ┌──────────────┐ │ +│ │ Redis │ │ +│ └──────┬───────┘ │ +│ │ │ +│ │ Caches: │ +│ │ - File analysis results (24h TTL) │ +│ │ - Key: analysis:{file_hash} │ +│ │ - Value: JSON analysis result │ +│ ▼ │ +│ ┌──────────────────┐ │ +│ │ AI Analysis │ │ +│ │ Service │ │ +│ └──────────────────┘ │ +│ │ +│ 4. Analysis Reports │ +│ ┌──────────────┐ │ +│ │ File System │ │ +│ │ /app/reports │ │ +│ └──────┬───────┘ │ +│ │ │ +│ │ Stores: │ +│ │ - Generated PDF reports │ +│ │ - JSON analysis exports │ +│ │ - Downloadable via API │ +│ ▲ │ +│ ┌──────────────────┐ │ +│ │ AI Analysis │ │ +│ │ Service │ │ +│ └──────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 4. Authentication Flow + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ GITHUB OAUTH AUTHENTICATION │ +│ │ +│ User needs to analyze private repository │ +│ │ +│ 1. Frontend → API Gateway → Git Integration │ +│ POST /api/github/attach-repository │ +│ { repository_url: "https://github.com/user/private-repo" } │ +│ │ +│ 2. Git Integration checks if repo is private │ +│ - Tries to access without auth │ +│ - Gets 404 (private repo) │ +│ │ +│ 3. Git Integration returns auth required │ +│ { │ +│ "success": false, │ +│ "requires_auth": true, │ +│ "auth_url": "https://backend.codenuk.com/api/github/auth..." │ +│ } │ +│ │ +│ 4. Frontend redirects user to auth_url │ +│ ┌──────────────┐ │ +│ │ Browser │ │ +│ │ Redirect │ │ +│ └──────┬───────┘ │ +│ │ │ +│ ▼ │ +│ 5. OAuth Flow │ +│ ┌──────────────────────────────────────────┐ │ +│ │ https://github.com/login/oauth/authorize │ │ +│ │ ?client_id=xxx │ │ +│ │ &redirect_uri=xxx │ │ +│ │ &scope=repo │ │ +│ │ &state=xxx │ │ +│ └──────┬───────────────────────────────────┘ │ +│ │ │ +│ │ User approves │ +│ ▼ │ +│ 6. GitHub redirects back │ +│ https://backend.codenuk.com/api/github/auth/github/callback │ +│ ?code=xxx&state=xxx │ +│ │ +│ 7. Git Integration exchanges code for token │ +│ POST https://github.com/login/oauth/access_token │ +│ { code, client_id, client_secret } │ +│ │ +│ 8. Store token in PostgreSQL │ +│ INSERT INTO github_user_tokens │ +│ (user_id, access_token, scope, ...) │ +│ │ +│ 9. Redirect user back to frontend │ +│ https://frontend.codenuk.com/repositories │ +│ ?auth_success=true │ +│ │ +│ 10. User can now attach private repository │ +│ POST /api/github/attach-repository │ +│ - Service finds OAuth token for user │ +│ - Uses token to clone private repo │ +│ - Success! │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 5. Error Handling Flow + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ ERROR HANDLING │ +│ │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ Frontend makes request │ │ +│ └────────────────┬────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ API Gateway receives request │ │ +│ │ - Validates format │ │ +│ │ - Checks authentication (if required) │ │ +│ └────────────────┬────────────────────────────────────┘ │ +│ │ │ +│ ┌─────┴─────┐ │ +│ │ │ │ +│ Valid │ │ Invalid │ +│ │ │ │ +│ ▼ ▼ │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ Forward │ │Return 400/401│ │ +│ │ to Service │ │ Bad Request │ │ +│ └──────┬───────┘ └──────┬───────┘ │ +│ │ │ │ +│ │ └───────────────────┐ │ +│ ▼ │ │ +│ ┌─────────────────────────────────┐ │ │ +│ │ Backend Service │ │ │ +│ │ - Process request │ │ │ +│ │ - May call other services │ │ │ +│ └─────────┬───────────────────────┘ │ │ +│ │ │ │ +│ ┌─────┴─────┐ │ │ +│ │ │ │ │ +│Success│ │ Error │ │ +│ │ │ │ │ +│ ▼ ▼ │ │ +│ ┌────────┐ ┌─────────────────────┐ │ │ +│ │Return │ │Determine Error Type │ │ │ +│ │Success │ └────────┬────────────┘ │ │ +│ │200/201 │ │ │ │ +│ └───┬────┘ │ │ │ +│ │ ▼ │ │ +│ │ ┌───────────────────────┐ │ │ +│ │ │ Service Unavailable │ │ │ +│ │ │ (502 Bad Gateway) │ │ │ +│ │ └───────┬───────────────┘ │ │ +│ │ │ │ │ +│ │ ▼ │ │ +│ │ ┌───────────────────────┐ │ │ +│ │ │ Service Timeout │ │ │ +│ │ │ (504 Gateway Timeout) │ │ │ +│ │ └───────┬───────────────┘ │ │ +│ │ │ │ │ +│ │ ▼ │ │ +│ │ ┌───────────────────────┐ │ │ +│ │ │ Rate Limit Exceeded │ │ │ +│ │ │ (429 Too Many) │ │ │ +│ │ └───────┬───────────────┘ │ │ +│ │ │ │ │ +│ │ ▼ │ │ +│ │ ┌───────────────────────┐ │ │ +│ │ │ Resource Not Found │ │ │ +│ │ │ (404 Not Found) │ │ │ +│ │ └───────┬───────────────┘ │ │ +│ │ │ │ │ +│ │ ▼ │ │ +│ │ ┌───────────────────────┐ │ │ +│ │ │ Other Server Error │ │ │ +│ │ │ (500 Internal Error) │ │ │ +│ │ └───────┬───────────────┘ │ │ +│ │ │ │ │ +│ │ └────────────────────┼───────────┐ │ +│ │ │ │ │ +│ └─────────────────────────────────┼───────────┼──┐ │ +│ │ │ │ │ +│ ▼ ▼ ▼ │ +│ ┌───────────────────────────┐ │ +│ │ API Gateway │ │ +│ │ - Log error │ │ +│ │ - Add gateway headers │ │ +│ │ - Forward to frontend │ │ +│ └────────────┬──────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────────────────┐ │ +│ │ Frontend │ │ +│ │ - Display error message │ │ +│ │ - Show retry option │ │ +│ │ - Log for debugging │ │ +│ └───────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 6. Caching Strategy + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ REDIS CACHING STRATEGY │ +│ │ +│ Request to analyze file │ +│ │ │ +│ ▼ │ +│ ┌──────────────────┐ │ +│ │ Generate file │ │ +│ │ hash (SHA-256) │ │ +│ └────────┬─────────┘ │ +│ │ │ +│ │ Hash: a3f5b8c... │ +│ ▼ │ +│ ┌──────────────────┐ │ +│ │ Check Redis │ │ +│ │ Key: analysis: │ │ +│ │ a3f5b8c... │ │ +│ └────────┬─────────┘ │ +│ │ │ +│ ┌─────┴─────┐ │ +│ │ │ │ +│Cache│ │ Cache │ +│ Hit │ │ Miss │ +│ │ │ │ +│ ▼ ▼ │ +│ ┌──────┐ ┌──────────────────┐ │ +│ │Return│ │ Analyze file │ │ +│ │Cached│ │ using Claude API │ │ +│ │Result│ └────────┬─────────┘ │ +│ └──┬───┘ │ │ +│ │ │ Analysis result │ +│ │ ▼ │ +│ │ ┌──────────────────┐ │ +│ │ │ Store in Redis │ │ +│ │ │ TTL: 24 hours │ │ +│ │ └────────┬─────────┘ │ +│ │ │ │ +│ └───────────────────┤ │ +│ │ │ +│ ▼ │ +│ Return result │ +│ │ +│ Benefits: │ +│ - Reduces Claude API calls by 60-70% │ +│ - Faster response times for repeated analyses │ +│ - Lowers API costs significantly │ +│ - Improves user experience │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +--- + +**Version**: 1.0.0 +**Last Updated**: December 2024 + diff --git a/services/ai-analysis-service/INTEGRATION_EXAMPLE.md b/services/ai-analysis-service/INTEGRATION_EXAMPLE.md new file mode 100644 index 0000000..4b5a8a4 --- /dev/null +++ b/services/ai-analysis-service/INTEGRATION_EXAMPLE.md @@ -0,0 +1,313 @@ +# AI Analysis Service Integration Example + +## Overview +This document shows how to integrate the AI Analysis Service with the Git Integration Service using the direct file access approach. + +## New API Endpoints + +### 1. Repository Analysis by ID +**Endpoint:** `POST /analyze-repository` +**Purpose:** Analyze a repository using its ID from the git-integration service + +**Request:** +```json +{ + "repository_id": "uuid-123", + "user_id": "user-456", + "output_format": "pdf", + "max_files": 100 +} +``` + +**Response:** +```json +{ + "success": true, + "message": "Repository analysis completed successfully", + "analysis_id": "repo_analysis_uuid-123_20241201_143022", + "report_path": "/app/reports/repo_analysis_uuid-123_20241201_143022_analysis.pdf", + "stats": { + "repository_id": "uuid-123", + "total_files": 85, + "total_lines": 15420, + "languages": ["typescript", "javascript", "python"], + "code_quality_score": 7.8, + "high_quality_files": 45, + "medium_quality_files": 30, + "low_quality_files": 10, + "total_issues": 23 + } +} +``` + +### 2. Repository Information +**Endpoint:** `GET /repository/{repository_id}/info?user_id={user_id}` +**Purpose:** Get repository information from git-integration service + +**Response:** +```json +{ + "success": true, + "repository_info": { + "id": "uuid-123", + "name": "my-repo", + "full_name": "owner/my-repo", + "local_path": "/app/git-repos/owner__my-repo__main", + "description": "My awesome repository", + "language": "typescript", + "size": 1024000 + } +} +``` + +## Frontend Integration Example + +### TypeScript Interface +```typescript +interface RepositoryAnalysisRequest { + repository_id: string; + user_id: string; + output_format: 'pdf' | 'json'; + max_files?: number; +} + +interface AnalysisResponse { + success: boolean; + message: string; + analysis_id?: string; + report_path?: string; + stats?: { + repository_id: string; + total_files: number; + total_lines: number; + languages: string[]; + code_quality_score: number; + high_quality_files: number; + medium_quality_files: number; + low_quality_files: number; + total_issues: number; + }; +} +``` + +### API Service Function +```typescript +class AIAnalysisService { + private baseUrl = process.env.NEXT_PUBLIC_AI_ANALYSIS_SERVICE_URL || 'http://localhost:8022'; + + async analyzeRepository(request: RepositoryAnalysisRequest): Promise { + const response = await fetch(`${this.baseUrl}/analyze-repository`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(request) + }); + + if (!response.ok) { + throw new Error(`Analysis failed: ${response.statusText}`); + } + + return response.json(); + } + + async getRepositoryInfo(repositoryId: string, userId: string) { + const response = await fetch( + `${this.baseUrl}/repository/${repositoryId}/info?user_id=${userId}` + ); + + if (!response.ok) { + throw new Error(`Failed to get repository info: ${response.statusText}`); + } + + return response.json(); + } + + async downloadReport(filename: string): Promise { + const response = await fetch(`${this.baseUrl}/reports/${filename}`); + + if (!response.ok) { + throw new Error(`Failed to download report: ${response.statusText}`); + } + + return response.blob(); + } +} +``` + +### React Component Example +```typescript +import React, { useState } from 'react'; + +const RepositoryAnalysis: React.FC<{ repositoryId: string; userId: string }> = ({ + repositoryId, + userId +}) => { + const [isAnalyzing, setIsAnalyzing] = useState(false); + const [analysisResult, setAnalysisResult] = useState(null); + const [error, setError] = useState(null); + + const aiAnalysisService = new AIAnalysisService(); + + const handleAnalyze = async () => { + setIsAnalyzing(true); + setError(null); + + try { + const result = await aiAnalysisService.analyzeRepository({ + repository_id: repositoryId, + user_id: userId, + output_format: 'pdf', + max_files: 100 + }); + + setAnalysisResult(result); + } catch (err) { + setError(err instanceof Error ? err.message : 'Analysis failed'); + } finally { + setIsAnalyzing(false); + } + }; + + const handleDownloadReport = async () => { + if (!analysisResult?.report_path) return; + + try { + const filename = analysisResult.report_path.split('/').pop(); + const blob = await aiAnalysisService.downloadReport(filename!); + + // Create download link + const url = window.URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = filename!; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + window.URL.revokeObjectURL(url); + } catch (err) { + setError('Failed to download report'); + } + }; + + return ( +
+

Repository Analysis

+ + + + {error && ( +
+ {error} +
+ )} + + {analysisResult && ( +
+

Analysis Results

+ +
+
+ Total Files: {analysisResult.stats?.total_files} +
+
+ Total Lines: {analysisResult.stats?.total_lines} +
+
+ Code Quality Score: {analysisResult.stats?.code_quality_score}/10 +
+
+ Languages: {analysisResult.stats?.languages?.join(', ')} +
+
+ +
+

Quality Breakdown

+
+
+ High Quality: {analysisResult.stats?.high_quality_files} files +
+
+ Medium Quality: {analysisResult.stats?.medium_quality_files} files +
+
+ Low Quality: {analysisResult.stats?.low_quality_files} files +
+
+
+ + +
+ )} +
+ ); +}; + +export default RepositoryAnalysis; +``` + +## Key Features Implemented + +### 1. Rate Limiting +- **Claude API Rate Limiting**: 90 requests per minute with proper queuing +- **Request Tracking**: Tracks API requests to prevent rate limit violations +- **Dynamic Delays**: Automatically adjusts delays based on request frequency + +### 2. Content Optimization +- **Large File Handling**: Truncates files larger than 32KB tokens +- **Important Line Extraction**: Keeps imports, functions, classes, and exports +- **Smart Truncation**: Preserves code structure while reducing size + +### 3. Caching System +- **Redis-based Caching**: Caches analysis results for 24 hours +- **File Hash-based Keys**: Uses SHA-256 hashes for cache keys +- **Automatic Cache Invalidation**: Handles cache misses gracefully + +### 4. Error Handling +- **Graceful Degradation**: Continues analysis even if some files fail +- **Retry Logic**: Built-in retry mechanisms for API failures +- **Progress Tracking**: Real-time progress updates during analysis + +### 5. Service Integration +- **Git Integration Communication**: HTTP API calls to git-integration service +- **Repository Info Retrieval**: Gets local paths and metadata +- **Direct File Access**: Uses local file system for analysis + +## Performance Improvements + +### Before (Original Implementation) +- **Time for 1000 files**: 33-50 hours +- **Rate Limit Issues**: Would exceed Claude API limits +- **No Caching**: Re-analyzed files on every request +- **No Optimization**: Sent full file content to API + +### After (Optimized Implementation) +- **Time for 1000 files**: 2-4 hours +- **Rate Limit Compliance**: Respects 90 requests/minute limit +- **Intelligent Caching**: Avoids re-analysis of unchanged files +- **Content Optimization**: Sends only essential code to API + +## Usage Flow + +1. **Frontend** calls `/analyze-repository` with repository ID +2. **AI Analysis Service** gets repository info from git-integration service +3. **AI Analysis Service** accesses files directly from local storage +4. **Rate Limiting** ensures compliance with Claude API limits +5. **Caching** checks for existing analysis results +6. **Content Optimization** truncates large files intelligently +7. **Analysis** processes files with memory integration +8. **Report Generation** creates PDF or JSON reports +9. **Response** returns analysis results and report path + +This implementation provides a robust, scalable solution for repository analysis while maintaining service independence and optimal performance. diff --git a/services/ai-analysis-service/QUICK_REFERENCE.md b/services/ai-analysis-service/QUICK_REFERENCE.md new file mode 100644 index 0000000..5b9f18a --- /dev/null +++ b/services/ai-analysis-service/QUICK_REFERENCE.md @@ -0,0 +1,485 @@ +# AI Analysis Service - Quick Reference Guide + +## Quick Start + +### 1. Start All Services + +```bash +cd /path/to/codenuk/backend/codenuk_backend_mine +docker-compose up -d +``` + +### 2. Verify Services are Running + +```bash +# Check all services +docker-compose ps + +# Test individual services +curl http://localhost:8000/health # API Gateway +curl http://localhost:8022/health # AI Analysis +curl http://localhost:8012/health # Git Integration +``` + +### 3. Run Analysis from Frontend + +```typescript +import { useAIAnalysis } from '@/hooks/useAIAnalysis' + +const { startAnalysis } = useAIAnalysis() + +await startAnalysis(repositoryId, userId, { + output_format: 'pdf', + max_files: 100 +}) +``` + +--- + +## Common API Calls + +### Analyze Repository + +```bash +curl -X POST http://localhost:8000/api/ai-analysis/analyze-repository \ + -H "Content-Type: application/json" \ + -d '{ + "repository_id": "550e8400-e29b-41d4-a716-446655440000", + "user_id": "660e8400-e29b-41d4-a716-446655440001", + "output_format": "pdf", + "max_files": 100 + }' +``` + +### Get Repository Info + +```bash +curl http://localhost:8000/api/github/repository/550e8400-e29b-41d4-a716-446655440000/ui-view?view_type=tree \ + -H "x-user-id: 660e8400-e29b-41d4-a716-446655440001" +``` + +### Download Report + +```bash +curl http://localhost:8000/api/ai-analysis/reports/repo_analysis_550e8400_20241216_143022_analysis.pdf \ + --output analysis_report.pdf +``` + +### Get Memory Stats + +```bash +curl http://localhost:8000/api/ai-analysis/memory/stats +``` + +--- + +## Service URLs + +| Service | Direct URL | Via Gateway | +|---------|-----------|-------------| +| AI Analysis | http://localhost:8022 | http://localhost:8000/api/ai-analysis | +| Git Integration | http://localhost:8012 | http://localhost:8000/api/github | +| API Gateway | http://localhost:8000 | N/A | +| Frontend | http://localhost:3000 | N/A | + +--- + +## Environment Variables Quick Reference + +### AI Analysis Service + +```bash +PORT=8022 +ANTHROPIC_API_KEY=sk-ant-api03-... +GIT_INTEGRATION_SERVICE_URL=http://git-integration:8012 +REDIS_HOST=redis +REDIS_PORT=6379 +``` + +### API Gateway + +```bash +PORT=8000 +AI_ANALYSIS_URL=http://localhost:8022 +GIT_INTEGRATION_URL=http://localhost:8012 +``` + +### Git Integration + +```bash +PORT=8012 +POSTGRES_HOST=postgres +GITHUB_CLIENT_ID=your_github_client_id +GITHUB_CLIENT_SECRET=your_github_client_secret +PUBLIC_BASE_URL=https://backend.codenuk.com +``` + +--- + +## Troubleshooting Commands + +### Check Service Logs + +```bash +# AI Analysis Service +docker logs ai-analysis -f + +# API Gateway +docker logs api-gateway -f + +# Git Integration +docker logs git-integration -f + +# All services +docker-compose logs -f +``` + +### Restart Service + +```bash +# Restart specific service +docker-compose restart ai-analysis + +# Restart all services +docker-compose restart +``` + +### Check Service Health + +```bash +# Using curl +curl http://localhost:8022/health + +# Using docker exec +docker exec ai-analysis curl localhost:8022/health +``` + +### Clear Redis Cache + +```bash +docker exec redis redis-cli FLUSHDB +``` + +### Check Database Connection + +```bash +docker exec postgres psql -U postgres -d git_integration -c "SELECT COUNT(*) FROM all_repositories;" +``` + +--- + +## Common Issues and Fixes + +### Issue: Service Cannot Connect + +```bash +# Check if service is running +docker ps | grep ai-analysis + +# Check network connectivity +docker network inspect backend-network + +# Restart service +docker-compose restart ai-analysis +``` + +### Issue: Redis Connection Failed + +```bash +# Check Redis status +docker exec redis redis-cli ping + +# Expected output: PONG +``` + +### Issue: Rate Limit Exceeded + +```bash +# Edit .env file +CLAUDE_REQUESTS_PER_MINUTE=50 # Reduce from 90 + +# Restart service +docker-compose restart ai-analysis +``` + +### Issue: Analysis Timeout + +```bash +# Reduce max_files in request +{ + "max_files": 50 // Instead of 100 +} +``` + +--- + +## File Locations + +### Reports +``` +/app/reports/repo_analysis_{id}_{timestamp}_analysis.pdf +``` + +### Repository Storage +``` +/app/git-repos/{owner}__{repo}__{branch}/ +``` + +### Logs +``` +docker logs {service_name} +``` + +### Configuration +``` +services/ai-analysis-service/.env +services/api-gateway/.env +services/git-integration/.env +``` + +--- + +## Testing Endpoints + +### Test AI Analysis Service Directly + +```bash +# Health check +curl http://localhost:8022/health + +# Test with direct repository analysis +curl -X POST http://localhost:8022/analyze-repository \ + -H "Content-Type: application/json" \ + -d '{ + "repository_id": "test-id", + "user_id": "test-user", + "output_format": "json", + "max_files": 10 + }' +``` + +### Test Git Integration Service + +```bash +# Health check +curl http://localhost:8012/health + +# Get repository list +curl http://localhost:8012/api/github/user/test-user-id/repositories \ + -H "x-user-id: test-user-id" +``` + +### Test Through API Gateway + +```bash +# Health check +curl http://localhost:8000/health + +# Test AI Analysis proxying +curl http://localhost:8000/api/ai-analysis/health + +# Test Git Integration proxying +curl http://localhost:8000/api/github/health +``` + +--- + +## Performance Tuning + +### Optimize for Speed + +```bash +# Reduce max files +max_files: 50 + +# Increase rate limit (if you have higher API limits) +CLAUDE_REQUESTS_PER_MINUTE=150 + +# Enable more aggressive caching +CACHE_TTL_SECONDS=172800 # 48 hours instead of 24 +``` + +### Optimize for Quality + +```bash +# Analyze more files +max_files: 200 + +# Reduce content truncation threshold +CONTENT_MAX_TOKENS=16000 # Instead of 8000 +``` + +--- + +## Monitoring + +### Check Analysis Progress + +```bash +# Watch logs in real-time +docker logs ai-analysis -f | grep "Analyzing file" +``` + +### Monitor Redis Cache + +```bash +# Get cache stats +docker exec redis redis-cli INFO stats + +# Check cache keys +docker exec redis redis-cli KEYS "analysis:*" + +# Get cache hit rate +docker exec redis redis-cli INFO stats | grep keyspace +``` + +### Monitor Database + +```bash +# Check repository count +docker exec postgres psql -U postgres -d git_integration \ + -c "SELECT COUNT(*) FROM all_repositories;" + +# Check recent analyses +docker exec postgres psql -U postgres -d git_integration \ + -c "SELECT id, repository_name, created_at FROM all_repositories ORDER BY created_at DESC LIMIT 10;" +``` + +--- + +## Development Tips + +### Run Service Locally (Outside Docker) + +```bash +# AI Analysis Service +cd services/ai-analysis-service +python3 -m venv venv +source venv/bin/activate +pip install -r requirements.txt +export ANTHROPIC_API_KEY=sk-ant-api03-... +export GIT_INTEGRATION_SERVICE_URL=http://localhost:8012 +python server.py +``` + +### Hot Reload + +```bash +# Frontend (auto-reloads) +cd fronend/codenuk_frontend_mine +npm run dev + +# Backend services (use nodemon) +cd services/api-gateway +npm install -g nodemon +nodemon src/server.js +``` + +### Debug Mode + +```bash +# Enable debug logging +export DEBUG=* +export LOG_LEVEL=debug + +# Python service +export PYTHONUNBUFFERED=1 +``` + +--- + +## Security Checklist + +- [ ] Set strong GITHUB_CLIENT_SECRET +- [ ] Set secure POSTGRES_PASSWORD +- [ ] Rotate ANTHROPIC_API_KEY regularly +- [ ] Use HTTPS in production (PUBLIC_BASE_URL) +- [ ] Set CORS_ORIGIN to specific domains in production +- [ ] Enable Redis password in production +- [ ] Set SESSION_SECRET to secure random string +- [ ] Never commit .env files to git + +--- + +## Backup and Restore + +### Backup PostgreSQL Database + +```bash +docker exec postgres pg_dump -U postgres git_integration > backup.sql +``` + +### Restore PostgreSQL Database + +```bash +docker exec -i postgres psql -U postgres git_integration < backup.sql +``` + +### Backup Redis Cache + +```bash +docker exec redis redis-cli BGSAVE +docker cp redis:/data/dump.rdb ./redis-backup.rdb +``` + +--- + +## Useful Docker Commands + +```bash +# View all containers +docker-compose ps + +# Stop all services +docker-compose down + +# Restart specific service +docker-compose restart ai-analysis + +# View logs +docker-compose logs -f ai-analysis + +# Execute command in container +docker exec -it ai-analysis bash + +# Remove all containers and volumes +docker-compose down -v + +# Rebuild specific service +docker-compose build ai-analysis + +# Scale service (if supported) +docker-compose up -d --scale ai-analysis=3 +``` + +--- + +## API Response Codes + +| Code | Meaning | Action | +|------|---------|--------| +| 200 | Success | Continue | +| 201 | Created | Resource created successfully | +| 400 | Bad Request | Check request parameters | +| 401 | Unauthorized | Authenticate user | +| 403 | Forbidden | Check permissions | +| 404 | Not Found | Verify resource ID | +| 429 | Rate Limited | Wait and retry | +| 500 | Server Error | Check service logs | +| 502 | Bad Gateway | Check if service is running | +| 504 | Gateway Timeout | Increase timeout or reduce load | + +--- + +## Contact and Support + +For detailed documentation, see: +- [Full Architecture Documentation](./SERVICE_COMMUNICATION_ARCHITECTURE.md) +- [Integration Examples](./INTEGRATION_EXAMPLE.md) +- [AI Analysis README](./README.md) + +--- + +**Version**: 1.0.0 +**Last Updated**: December 2024 + diff --git a/services/ai-analysis-service/README_DOCUMENTATION.md b/services/ai-analysis-service/README_DOCUMENTATION.md new file mode 100644 index 0000000..ec79d60 --- /dev/null +++ b/services/ai-analysis-service/README_DOCUMENTATION.md @@ -0,0 +1,426 @@ +# AI Analysis Service Documentation + +Welcome! This directory contains comprehensive documentation for understanding how the AI Analysis Service connects and communicates with the Frontend, API Gateway, and Git Integration Service. + +--- + +## Documentation Files Overview + +I've created **5 comprehensive documentation files** for you: + +### 1. DOCUMENTATION_INDEX.md (START HERE) +**Your main entry point** - Contains: +- Quick navigation to all documents +- Architecture overview +- Environment setup guide +- API endpoints reference +- Configuration details +- Troubleshooting guide +- Common questions and answers + +**Use this when**: You want a high-level overview and quick navigation + +--- + +### 2. SERVICE_COMMUNICATION_ARCHITECTURE.md (DETAILED GUIDE) +**Complete technical documentation** - Contains: +- Detailed system architecture with diagrams +- Component descriptions (AI Analysis, API Gateway, Git Integration, Frontend) +- Complete communication flow (step-by-step) +- All API endpoints with request/response examples +- Internal component details (Rate Limiter, Cache, Optimizer) +- Error handling strategies +- Deployment configuration +- Performance considerations +- Security guidelines + +**Use this when**: You need in-depth understanding of the system + +**Key Sections**: +- System Architecture Overview +- Service Components (detailed) +- Communication Flow (complete request lifecycle) +- AI Analysis Service Details (endpoints, classes, methods) +- API Gateway Integration +- Git Integration Service +- Frontend Integration (React hooks, API calls) +- Request/Response Examples (real-world scenarios) +- Error Handling (all error types and solutions) +- Deployment Configuration (Docker, environment variables) + +--- + +### 3. QUICK_REFERENCE.md (QUICK COMMANDS) +**Fast reference for common operations** - Contains: +- Quick start commands +- Common API calls (curl examples) +- Service URLs table +- Environment variables quick reference +- Troubleshooting commands +- File locations +- Testing endpoints +- Performance tuning tips +- Monitoring commands +- Development tips +- Useful Docker commands +- API response codes + +**Use this when**: You need to quickly execute a command or check a configuration + +**Example**: +```bash +# Start all services +docker-compose up -d + +# Check service health +curl http://localhost:8022/health + +# View logs +docker logs ai-analysis -f +``` + +--- + +### 4. FLOW_DIAGRAMS.md (VISUAL DIAGRAMS) +**Visual representations of data flow** - Contains: +- Complete request flow diagram (ASCII art) +- Service communication diagram +- Data flow diagram +- Authentication flow +- Error handling flow +- Caching strategy diagram + +**Use this when**: You want to visualize how requests flow through the system + +**Includes**: +1. Complete Request Flow (21 steps from user click to response) +2. Service Communication Diagram (how services connect) +3. Data Flow Diagram (PostgreSQL, File System, Redis, Reports) +4. Authentication Flow (GitHub OAuth) +5. Error Handling Flow (decision tree) +6. Caching Strategy (cache hit/miss flow) + +--- + +### 5. INTEGRATION_EXAMPLE.md (EXISTING FILE) +**Code examples and usage patterns** - Contains: +- Frontend integration examples +- TypeScript interfaces +- React component examples +- API service functions + +**Use this when**: You're writing code to integrate with the service + +--- + +## Quick Start + +### 1. Understand the Architecture +Start with: **DOCUMENTATION_INDEX.md** +- Read "Architecture Overview" section +- Review "System Components" +- Check "Environment Setup" + +### 2. See the Flow +Next, read: **FLOW_DIAGRAMS.md** +- Look at "Complete Request Flow" +- Review "Service Communication Diagram" +- Understand "Data Flow Diagram" + +### 3. Get Detailed Information +Deep dive: **SERVICE_COMMUNICATION_ARCHITECTURE.md** +- Read sections relevant to your needs +- Check API endpoints +- Review error handling + +### 4. Execute Commands +Use: **QUICK_REFERENCE.md** +- Copy and run commands +- Check configurations +- Troubleshoot issues + +--- + +## Common Use Cases + +### Use Case 1: Setting Up the Service +**Path**: DOCUMENTATION_INDEX.md → Environment Setup → Quick Reference + +1. Read environment setup in DOCUMENTATION_INDEX.md +2. Copy environment variables from QUICK_REFERENCE.md +3. Run quick start commands +4. Verify with health checks + +### Use Case 2: Understanding Communication Flow +**Path**: FLOW_DIAGRAMS.md → SERVICE_COMMUNICATION_ARCHITECTURE.md + +1. View visual diagram in FLOW_DIAGRAMS.md +2. Read detailed flow in SERVICE_COMMUNICATION_ARCHITECTURE.md +3. Check request/response examples + +### Use Case 3: Debugging an Issue +**Path**: QUICK_REFERENCE.md → Troubleshooting → SERVICE_COMMUNICATION_ARCHITECTURE.md + +1. Check common issues in QUICK_REFERENCE.md +2. Run troubleshooting commands +3. Review error handling in SERVICE_COMMUNICATION_ARCHITECTURE.md +4. Check logs and health endpoints + +### Use Case 4: Integrating Frontend +**Path**: INTEGRATION_EXAMPLE.md → SERVICE_COMMUNICATION_ARCHITECTURE.md → Frontend Integration + +1. Review code examples in INTEGRATION_EXAMPLE.md +2. Check API endpoints in SERVICE_COMMUNICATION_ARCHITECTURE.md +3. Implement using provided React hooks +4. Test with examples from QUICK_REFERENCE.md + +### Use Case 5: Deploying to Production +**Path**: DOCUMENTATION_INDEX.md → Deployment → SERVICE_COMMUNICATION_ARCHITECTURE.md + +1. Review deployment checklist in DOCUMENTATION_INDEX.md +2. Check configuration in SERVICE_COMMUNICATION_ARCHITECTURE.md +3. Set environment variables from QUICK_REFERENCE.md +4. Monitor using commands from QUICK_REFERENCE.md + +--- + +## File Navigation Map + +``` +ai-analysis-service/ +├── DOCUMENTATION_INDEX.md ← START HERE (Overview & Navigation) +├── SERVICE_COMMUNICATION_ARCHITECTURE.md ← Complete Technical Guide +├── QUICK_REFERENCE.md ← Commands & Quick Tips +├── FLOW_DIAGRAMS.md ← Visual Diagrams +├── INTEGRATION_EXAMPLE.md ← Code Examples +├── README.md ← Service README +├── server.py ← Main Service Code +├── ai-analyze.py ← Analysis Logic +└── ... (other files) +``` + +--- + +## Documentation Features + +### Comprehensive Coverage +- System architecture and design +- All service interactions +- Complete API documentation +- Error handling strategies +- Performance optimization tips +- Security considerations +- Deployment guidelines + +### Easy Navigation +- Table of contents in each file +- Cross-references between documents +- Clear section headers +- Logical flow of information + +### Practical Examples +- Real curl commands +- Docker commands +- Code snippets +- Configuration examples +- Request/response samples + +### Visual Aids +- ASCII art diagrams +- Flow charts +- Data flow illustrations +- Service communication maps + +### Troubleshooting Support +- Common issues and solutions +- Debugging commands +- Log analysis tips +- Health check procedures + +--- + +## Key Concepts Explained + +### 1. Service Communication +The AI Analysis Service communicates with: +- **Git Integration**: To get repository files and metadata +- **API Gateway**: Acts as entry point for all requests +- **Redis**: For caching analysis results +- **Claude API**: For AI-powered code analysis + +### 2. Request Flow +``` +Frontend → API Gateway → AI Analysis → Git Integration → PostgreSQL/FileSystem + ↓ + Claude API + ↓ + Redis +``` + +### 3. Analysis Process +1. Get repository info from Git Integration +2. Retrieve file list +3. For each file: + - Check cache + - Apply rate limiting + - Optimize content + - Analyze with Claude + - Cache result +4. Generate repository-level analysis +5. Create PDF/JSON report + +### 4. Key Features +- **Rate Limiting**: 90 requests/minute to Claude API +- **Caching**: 24-hour TTL in Redis +- **Content Optimization**: Truncates files > 8000 tokens +- **Error Handling**: Comprehensive error responses +- **Authentication**: GitHub OAuth for private repos + +--- + +## Environment Variables Summary + +### Required Variables + +**AI Analysis Service**: +```bash +ANTHROPIC_API_KEY=sk-ant-api03-... +GIT_INTEGRATION_SERVICE_URL=http://git-integration:8012 +REDIS_HOST=redis +``` + +**API Gateway**: +```bash +AI_ANALYSIS_URL=http://localhost:8022 +GIT_INTEGRATION_URL=http://localhost:8012 +``` + +**Git Integration**: +```bash +GITHUB_CLIENT_ID=your_client_id +GITHUB_CLIENT_SECRET=your_client_secret +POSTGRES_HOST=postgres +``` + +--- + +## Quick Commands + +```bash +# Start all services +docker-compose up -d + +# Check health +curl http://localhost:8000/health +curl http://localhost:8022/health +curl http://localhost:8012/health + +# Analyze repository +curl -X POST http://localhost:8000/api/ai-analysis/analyze-repository \ + -H "Content-Type: application/json" \ + -d '{"repository_id": "uuid", "user_id": "user-uuid", "output_format": "pdf", "max_files": 100}' + +# View logs +docker logs ai-analysis -f + +# Restart service +docker-compose restart ai-analysis +``` + +--- + +## Support + +### If You Need Help + +1. **Check DOCUMENTATION_INDEX.md** for quick answers +2. **Use QUICK_REFERENCE.md** for common commands +3. **Review FLOW_DIAGRAMS.md** to understand the flow +4. **Read SERVICE_COMMUNICATION_ARCHITECTURE.md** for detailed info +5. **Check service logs** using commands in QUICK_REFERENCE.md + +### Troubleshooting Process + +1. Identify the issue +2. Check relevant documentation section +3. Run diagnostic commands +4. Review logs +5. Verify configuration +6. Test endpoints individually + +--- + +## What's Documented + +### System Architecture +- Overall system design +- Service relationships +- Communication patterns +- Data storage + +### Services +- AI Analysis Service (detailed) +- API Gateway (routing, proxying) +- Git Integration (repository access) +- Frontend (React hooks, API calls) + +### Operations +- Installation and setup +- Configuration +- Running and monitoring +- Troubleshooting +- Deployment + +### Integration +- API endpoints +- Request/response formats +- Error handling +- Authentication flow +- Frontend integration + +### Performance +- Rate limiting +- Caching strategies +- Content optimization +- Timeout configurations + +### Security +- API key management +- OAuth tokens +- Network security +- CORS configuration + +--- + +## Version Information + +**Documentation Version**: 1.0.0 +**Last Updated**: December 2024 +**Service Version**: 1.0.0 + +--- + +## Next Steps + +1. Read **DOCUMENTATION_INDEX.md** for overview +2. Review **FLOW_DIAGRAMS.md** for visual understanding +3. Study **SERVICE_COMMUNICATION_ARCHITECTURE.md** for details +4. Keep **QUICK_REFERENCE.md** handy for commands +5. Use **INTEGRATION_EXAMPLE.md** when coding + +--- + +## Feedback + +If you find any issues or have suggestions for improving this documentation: +- Check if information is accurate +- Verify commands work +- Test examples +- Suggest improvements + +--- + +**Happy Coding!** + +For any questions, refer to the appropriate documentation file based on your needs. + diff --git a/services/ai-analysis-service/SERVICE_COMMUNICATION_ARCHITECTURE.md b/services/ai-analysis-service/SERVICE_COMMUNICATION_ARCHITECTURE.md new file mode 100644 index 0000000..46136b4 --- /dev/null +++ b/services/ai-analysis-service/SERVICE_COMMUNICATION_ARCHITECTURE.md @@ -0,0 +1,1274 @@ +# AI Analysis Service - Communication Architecture + +## Table of Contents +1. [System Architecture Overview](#system-architecture-overview) +2. [Service Components](#service-components) +3. [Communication Flow](#communication-flow) +4. [AI Analysis Service Details](#ai-analysis-service-details) +5. [API Gateway Integration](#api-gateway-integration) +6. [Git Integration Service](#git-integration-service) +7. [Frontend Integration](#frontend-integration) +8. [Request/Response Examples](#requestresponse-examples) +9. [Error Handling](#error-handling) +10. [Deployment Configuration](#deployment-configuration) + +--- + +## System Architecture Overview + +The AI Analysis Service is part of a microservices architecture with the following components: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ FRONTEND │ +│ (Next.js Application) │ +│ http://localhost:3000 │ +└────────────────────┬────────────────────────────────────────────┘ + │ + │ HTTP/REST + │ +┌────────────────────▼────────────────────────────────────────────┐ +│ API GATEWAY │ +│ (Express.js Proxy Layer) │ +│ Port: 8000 │ +│ Routes: /api/ai-analysis/*, /api/github/*, /api/ai/repository/*│ +└──────────┬─────────────────────────┬──────────────────────┬─────┘ + │ │ │ + │ HTTP │ HTTP │ HTTP + │ │ │ + ▼ ▼ ▼ +┌──────────────────┐ ┌──────────────────────┐ ┌───────────────┐ +│ AI ANALYSIS │ │ GIT INTEGRATION │ │ OTHER │ +│ SERVICE │◄──┤ SERVICE │ │ SERVICES │ +│ Port: 8022 │ │ Port: 8012 │ │ │ +│ (Python/FastAPI)│ │ (Node.js/Express) │ │ │ +└──────────────────┘ └──────────────────────┘ └───────────────┘ + │ │ + │ │ + ▼ ▼ +┌──────────────────┐ ┌──────────────────────┐ +│ PostgreSQL DB │ │ Redis Cache │ +│ (Repository │ │ (Analysis Results) │ +│ Metadata) │ │ │ +└──────────────────┘ └──────────────────────┘ +``` + +--- + +## Service Components + +### 1. AI Analysis Service (Port 8022) +- **Technology**: Python 3.11, FastAPI, Anthropic Claude API +- **Purpose**: Analyzes repository code using Claude AI +- **Key Features**: + - Repository analysis with memory system + - File-by-file code quality assessment + - PDF/JSON report generation + - Rate limiting for Claude API (90 requests/minute) + - Redis caching for repeated analyses + - Content optimization for large files + +### 2. API Gateway (Port 8000) +- **Technology**: Node.js, Express.js +- **Purpose**: Central entry point for all frontend requests +- **Key Features**: + - Request routing and proxying + - CORS handling + - Authentication middleware + - Service health monitoring + - WebSocket support + - Request logging + +### 3. Git Integration Service (Port 8012) +- **Technology**: Node.js, Express.js +- **Purpose**: Manages repository connections and file access +- **Key Features**: + - GitHub OAuth integration + - Repository cloning and syncing + - File tree navigation + - Diff generation + - Commit history tracking + - Multi-VCS support (GitHub, GitLab, Bitbucket, Gitea) + +### 4. Frontend (Port 3000) +- **Technology**: Next.js 14, React, TypeScript +- **Purpose**: User interface +- **Key Features**: + - Repository management + - AI analysis triggering + - Real-time progress tracking + - Report viewing + +--- + +## Communication Flow + +### Complete Analysis Flow + +``` +┌──────────┐ +│ Frontend │ +└────┬─────┘ + │ + │ 1. POST /api/ai-analysis/analyze-repository + │ Body: { repository_id, user_id, output_format, max_files } + │ + ▼ +┌────────────────┐ +│ API Gateway │ +└────┬───────────┘ + │ + │ 2. Forward to AI Analysis Service + │ POST /analyze-repository + │ Timeout: 240 seconds + │ + ▼ +┌──────────────────────┐ +│ AI Analysis Service │ +└────┬─────────────────┘ + │ + │ 3. Get Repository Info from Git Integration + │ GET http://git-integration:8012/api/github/repository/{id}/ui-view + │ Headers: { x-user-id: userId } + │ + ▼ +┌──────────────────────┐ +│ Git Integration │ +└────┬─────────────────┘ + │ + │ 4. Return Repository Metadata + │ { repository_info, file_tree, local_path } + │ + ▼ +┌──────────────────────┐ +│ AI Analysis Service │ +└────┬─────────────────┘ + │ + │ 5. For each file: + │ - Get file content via Git Integration API + │ - Check Redis cache + │ - Apply rate limiting + │ - Optimize content (truncate if > 8000 tokens) + │ - Send to Claude API + │ - Cache result + │ + │ 6. Generate Repository-level Analysis + │ - Architecture assessment + │ - Security review + │ - Code quality metrics + │ + │ 7. Create Report (PDF/JSON) + │ Path: /app/reports/{analysis_id}_analysis.pdf + │ + │ 8. Return Analysis Results + │ { success, analysis_id, report_path, stats } + │ + ▼ +┌────────────────┐ +│ API Gateway │ +└────┬───────────┘ + │ + │ 9. Forward response to Frontend + │ + ▼ +┌──────────┐ +│ Frontend │ +└──────────┘ +``` + +--- + +## AI Analysis Service Details + +### Base URL +``` +Direct: http://localhost:8022 +Via Gateway: http://localhost:8000/api/ai-analysis +``` + +### Key Endpoints + +#### 1. Analyze Repository +**Endpoint**: `POST /analyze-repository` + +**Request**: +```json +{ + "repository_id": "uuid-string", + "user_id": "user-uuid", + "output_format": "pdf", + "max_files": 100 +} +``` + +**Response**: +```json +{ + "success": true, + "message": "Repository analysis completed successfully", + "analysis_id": "repo_analysis_uuid_20241216_143022", + "report_path": "/app/reports/repo_analysis_uuid_20241216_143022_analysis.pdf", + "stats": { + "repository_id": "uuid-string", + "total_files": 85, + "total_lines": 15420, + "languages": ["typescript", "javascript", "python"], + "code_quality_score": 7.8, + "high_quality_files": 45, + "medium_quality_files": 30, + "low_quality_files": 10, + "total_issues": 23 + } +} +``` + +#### 2. Get Repository Info +**Endpoint**: `GET /repository/{repository_id}/info?user_id={user_id}` + +**Response**: +```json +{ + "success": true, + "repository_info": { + "id": "uuid-string", + "name": "my-repo", + "owner": "owner-name", + "local_path": "/app/git-repos/owner__my-repo__main", + "repository_url": "https://github.com/owner/my-repo" + } +} +``` + +#### 3. Download Report +**Endpoint**: `GET /reports/{filename}` + +Returns the generated PDF or JSON report file. + +#### 4. Memory System Stats +**Endpoint**: `GET /memory/stats` + +**Response**: +```json +{ + "success": true, + "memory_stats": { + "total_memories": 150, + "recent_analyses": 5, + "cache_hit_rate": 0.65 + } +} +``` + +#### 5. Health Check +**Endpoint**: `GET /health` + +**Response**: +```json +{ + "status": "healthy", + "service": "ai-analysis-service", + "timestamp": "2024-12-16T14:30:22.000Z", + "version": "1.0.0" +} +``` + +### Internal Components + +#### GitIntegrationClient +Handles communication with git-integration service: + +```python +class GitIntegrationClient: + def __init__(self): + self.base_url = os.getenv('GIT_INTEGRATION_SERVICE_URL', + 'http://git-integration:8012') + self.timeout = 30.0 + + async def get_repository_info(self, repository_id: str, user_id: str): + """Get repository information from git-integration service.""" + response = await client.get( + f"{self.base_url}/api/github/repository/{repository_id}/ui-view", + headers={'x-user-id': user_id} + ) + return response.json() +``` + +#### ClaudeRateLimiter +Manages Claude API rate limits: + +```python +class ClaudeRateLimiter: + def __init__(self, requests_per_minute: int = 90): + self.requests_per_minute = requests_per_minute + self.requests = [] + self.lock = asyncio.Lock() + + async def wait_if_needed(self): + """Wait if rate limit would be exceeded.""" + # Implements sliding window rate limiting +``` + +#### AnalysisCache +Redis-based caching for analysis results: + +```python +class AnalysisCache: + def __init__(self): + self.redis = redis.Redis( + host=os.getenv('REDIS_HOST', 'redis'), + port=6379, + decode_responses=True + ) + self.cache_ttl = 86400 # 24 hours + + async def get_cached_analysis(self, file_hash: str): + """Get cached analysis result.""" + cache_key = f"analysis:{file_hash}" + return self.redis.get(cache_key) +``` + +#### ContentOptimizer +Optimizes large files for Claude API: + +```python +class ContentOptimizer: + @staticmethod + def optimize_content_for_claude(content: str, max_tokens: int = 8000): + """Optimize file content for Claude API limits.""" + if len(content) > max_tokens * 4: + # Extract important lines (imports, functions, classes) + # Truncate intelligently + return optimized_content + return content +``` + +--- + +## API Gateway Integration + +### AI Analysis Routes + +The API Gateway proxies requests to the AI Analysis Service: + +```javascript +// Route: /api/ai-analysis/* +app.use('/api/ai-analysis', + createServiceLimiter(200), + (req, res, next) => { + const aiAnalysisServiceUrl = process.env.AI_ANALYSIS_URL || + 'http://localhost:8022'; + const rewrittenPath = req.originalUrl.replace(/^\/api\/ai-analysis/, ''); + const targetUrl = `${aiAnalysisServiceUrl}${rewrittenPath}`; + + // Proxy configuration + const options = { + method: req.method, + url: targetUrl, + headers: { + 'Content-Type': 'application/json', + 'User-Agent': 'API-Gateway/1.0', + 'X-User-ID': req.user?.id || req.user?.userId, + }, + timeout: 240000, // 4 minutes + maxContentLength: 100 * 1024 * 1024 // 100MB + }; + + // Forward request + axios(options) + .then(response => res.status(response.status).json(response.data)) + .catch(error => handleError(error)); + } +); +``` + +### Service Configuration + +```javascript +const serviceTargets = { + AI_ANALYSIS_URL: process.env.AI_ANALYSIS_URL || 'http://localhost:8022', + GIT_INTEGRATION_URL: process.env.GIT_INTEGRATION_URL || 'http://localhost:8012', + // ... other services +}; +``` + +### CORS Configuration + +```javascript +app.use(cors({ + origin: '*', + credentials: true, + methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS', 'PATCH'], + allowedHeaders: ['Content-Type', 'Authorization', 'X-User-ID'] +})); +``` + +--- + +## Git Integration Service + +### Base URL +``` +Direct: http://localhost:8012 +Via Gateway: http://localhost:8000/api/github +``` + +### Key Endpoints Used by AI Analysis + +#### 1. Get Repository UI View +**Endpoint**: `GET /api/github/repository/{id}/ui-view?view_type=tree` + +**Headers**: +``` +x-user-id: {userId} +``` + +**Response**: +```json +{ + "success": true, + "data": { + "repository_info": { + "id": "uuid", + "name": "repo-name", + "owner": "owner-name", + "provider": "github", + "local_path": "/app/git-repos/owner__repo__main", + "repository_url": "https://github.com/owner/repo" + }, + "ui_data": { + "left_panel": { + "file_tree": [ + { + "type": "directory", + "path": "src", + "children": [ + { + "type": "file", + "path": "src/index.ts", + "size": 1024 + } + ] + } + ] + } + } + } +} +``` + +#### 2. Get File Content +**Endpoint**: `GET /api/github/repository/{id}/file-content?file_path={path}` + +**Headers**: +``` +x-user-id: {userId} +``` + +**Response**: +```json +{ + "success": true, + "data": { + "file_info": { + "filename": "index.ts", + "file_extension": "ts", + "relative_path": "src/index.ts", + "file_size_bytes": 1024, + "is_binary": false, + "language_detected": "typescript" + }, + "content": "import express from 'express';\n...", + "preview": "import express from 'express';\n..." + } +} +``` + +#### 3. Attach Repository +**Endpoint**: `POST /api/github/attach-repository` + +**Request**: +```json +{ + "repository_url": "https://github.com/owner/repo", + "branch_name": "main", + "user_id": "user-uuid" +} +``` + +**Response**: +```json +{ + "success": true, + "message": "Repository attached and synced successfully", + "data": { + "repository_id": "uuid", + "repository_name": "repo", + "owner_name": "owner", + "branch_name": "main", + "is_public": true, + "storage_info": { + "local_path": "/app/git-repos/owner__repo__main", + "total_files": 85, + "total_size_bytes": 1024000 + } + } +} +``` + +### Communication Pattern + +The AI Analysis Service communicates with Git Integration through HTTP REST APIs: + +```python +async def get_repository_files_from_api(repository_id: str, user_id: str, max_files: int = 100): + """Get repository files from Git Integration Service API.""" + + # 1. Get file tree + response = await client.get( + f"{git_client.base_url}/api/github/repository/{repository_id}/ui-view?view_type=tree", + headers={'x-user-id': user_id} + ) + + data = response.json() + file_tree = data['data']['ui_data']['left_panel']['file_tree'] + + # 2. Extract file paths from tree + files_to_analyze = [] + extract_files_from_tree(file_tree, files_to_analyze) + + # 3. Fetch content for each file + files_with_content = [] + for file_path, _ in files_to_analyze: + content_response = await client.get( + f"{git_client.base_url}/api/github/repository/{repository_id}/file-content?file_path={file_path}", + headers={'x-user-id': user_id} + ) + + content_data = content_response.json() + if content_data['success']: + content = content_data['content'] + files_with_content.append((file_path, content)) + + return files_with_content +``` + +--- + +## Frontend Integration + +### Configuration + +**File**: `src/config/api-gateway.ts` + +```typescript +export const API_GATEWAY_CONFIG = { + BASE_URL: process.env.NEXT_PUBLIC_API_GATEWAY_URL || 'http://localhost:8000', + + ENDPOINTS: { + AI_ANALYSIS: { + BASE: '/api/ai-analysis', + ANALYZE: '/api/ai-analysis/analyze-repository', + REPORT: (filename: string) => `/api/ai-analysis/reports/${filename}`, + }, + + GIT_INTEGRATION: { + BASE: '/api/github', + REPOSITORIES: '/api/github/repositories', + REPOSITORY_INFO: (repositoryId: string) => + `/api/github/repository/${repositoryId}/info` + } + } +} +``` + +### React Hook for AI Analysis + +**File**: `src/hooks/useAIAnalysis.ts` + +```typescript +export const useAIAnalysis = () => { + const [isAnalyzing, setIsAnalyzing] = useState(false) + const [progress, setProgress] = useState(null) + const [result, setResult] = useState(null) + const [error, setError] = useState(null) + + const startAnalysis = async ( + repositoryId: string, + userId: string, + options = {} + ) => { + try { + setIsAnalyzing(true) + + // Start analysis via API Gateway + const response = await fetch( + buildApiUrl('/api/ai-analysis/analyze-repository'), + { + method: 'POST', + headers: getApiHeaders(), + body: JSON.stringify({ + repository_id: repositoryId, + user_id: userId, + output_format: options.output_format || 'pdf', + max_files: options.max_files || 100 + }) + } + ) + + if (!response.ok) { + throw new Error('Analysis failed to start') + } + + const data = await response.json() + + if (data.success) { + setResult(data) + } + } catch (err) { + setError(err.message) + } finally { + setIsAnalyzing(false) + } + } + + return { isAnalyzing, progress, result, error, startAnalysis } +} +``` + +### Usage Example + +```typescript +import { useAIAnalysis } from '@/hooks/useAIAnalysis' + +function RepositoryAnalysis({ repositoryId, userId }) { + const { + isAnalyzing, + result, + error, + startAnalysis + } = useAIAnalysis() + + const handleAnalyze = async () => { + await startAnalysis(repositoryId, userId, { + output_format: 'pdf', + max_files: 100 + }) + } + + return ( +
+ + + {result && ( +
+

Analysis Complete

+

Quality Score: {result.stats.code_quality_score}/10

+

Total Files: {result.stats.total_files}

+

Total Issues: {result.stats.total_issues}

+ + Download Report + +
+ )} + + {error &&
{error}
} +
+ ) +} +``` + +--- + +## Request/Response Examples + +### Example 1: Complete Analysis Flow + +#### Step 1: Frontend initiates analysis + +```http +POST http://localhost:8000/api/ai-analysis/analyze-repository +Content-Type: application/json + +{ + "repository_id": "550e8400-e29b-41d4-a716-446655440000", + "user_id": "660e8400-e29b-41d4-a716-446655440001", + "output_format": "pdf", + "max_files": 100 +} +``` + +#### Step 2: API Gateway forwards to AI Analysis Service + +```http +POST http://localhost:8022/analyze-repository +Content-Type: application/json +X-User-ID: 660e8400-e29b-41d4-a716-446655440001 + +{ + "repository_id": "550e8400-e29b-41d4-a716-446655440000", + "user_id": "660e8400-e29b-41d4-a716-446655440001", + "output_format": "pdf", + "max_files": 100 +} +``` + +#### Step 3: AI Analysis Service gets repository info + +```http +GET http://localhost:8012/api/github/repository/550e8400-e29b-41d4-a716-446655440000/ui-view?view_type=tree +x-user-id: 660e8400-e29b-41d4-a716-446655440001 +``` + +#### Step 4: Git Integration returns repository data + +```json +{ + "success": true, + "data": { + "repository_info": { + "id": "550e8400-e29b-41d4-a716-446655440000", + "name": "my-project", + "owner": "johndoe", + "local_path": "/app/git-repos/johndoe__my-project__main" + }, + "ui_data": { + "left_panel": { + "file_tree": [ + { + "type": "file", + "path": "src/index.ts", + "name": "index.ts" + }, + { + "type": "file", + "path": "src/utils.ts", + "name": "utils.ts" + } + ] + } + } + } +} +``` + +#### Step 5: AI Analysis Service gets file content + +```http +GET http://localhost:8012/api/github/repository/550e8400-e29b-41d4-a716-446655440000/file-content?file_path=src/index.ts +x-user-id: 660e8400-e29b-41d4-a716-446655440001 +``` + +#### Step 6: Final response to frontend + +```json +{ + "success": true, + "message": "Repository analysis completed successfully", + "analysis_id": "repo_analysis_550e8400_20241216_143022", + "report_path": "/app/reports/repo_analysis_550e8400_20241216_143022_analysis.pdf", + "stats": { + "repository_id": "550e8400-e29b-41d4-a716-446655440000", + "total_files": 85, + "total_lines": 15420, + "languages": ["typescript", "javascript"], + "code_quality_score": 7.8, + "high_quality_files": 45, + "medium_quality_files": 30, + "low_quality_files": 10, + "total_issues": 23 + } +} +``` + +--- + +## Error Handling + +### Error Types and Responses + +#### 1. Service Unavailable (502) + +```json +{ + "error": "AI Analysis service unavailable", + "message": "ECONNREFUSED", + "service": "ai-analysis" +} +``` + +**Causes**: +- AI Analysis service is down +- Network issues +- Service not responding + +#### 2. Gateway Timeout (504) + +```json +{ + "error": "Gateway timeout", + "service": "ai-analysis" +} +``` + +**Causes**: +- Analysis taking longer than 4 minutes +- Service overloaded +- Large repository + +#### 3. Authentication Required (401) + +```json +{ + "success": false, + "message": "GitHub authentication required for private repository", + "requires_auth": true, + "auth_url": "https://backend.codenuk.com/api/github/auth/github?..." +} +``` + +**Causes**: +- Private repository without OAuth token +- Expired OAuth token + +#### 4. Repository Not Found (404) + +```json +{ + "success": false, + "message": "Repository not found" +} +``` + +**Causes**: +- Invalid repository ID +- Repository deleted +- User doesn't have access + +#### 5. Rate Limit Exceeded (429) + +```json +{ + "success": false, + "message": "Claude API rate limit exceeded", + "retry_after": 60 +} +``` + +**Causes**: +- Too many concurrent analyses +- Claude API limits hit + +### Error Handling in Frontend + +```typescript +try { + const result = await startAnalysis(repositoryId, userId) + // Handle success +} catch (error) { + if (error.status === 401) { + // Redirect to OAuth + window.location.href = error.auth_url + } else if (error.status === 504) { + // Show timeout message + setError('Analysis taking longer than expected. Please try again.') + } else if (error.status === 502) { + // Show service unavailable message + setError('AI Analysis service is currently unavailable.') + } else { + // Generic error + setError(error.message || 'An error occurred') + } +} +``` + +--- + +## Deployment Configuration + +### Environment Variables + +#### AI Analysis Service (.env) + +```bash +# Service Configuration +PORT=8022 +HOST=0.0.0.0 + +# Anthropic API +ANTHROPIC_API_KEY=sk-ant-api03-... + +# Git Integration Service +GIT_INTEGRATION_SERVICE_URL=http://git-integration:8012 + +# Redis Cache +REDIS_HOST=redis +REDIS_PORT=6379 +REDIS_PASSWORD= + +# Rate Limiting +CLAUDE_REQUESTS_PER_MINUTE=90 + +# Analysis Configuration +MAX_FILES_DEFAULT=100 +CACHE_TTL_SECONDS=86400 +CONTENT_MAX_TOKENS=8000 +``` + +#### API Gateway (.env) + +```bash +# Service URLs +PORT=8000 +NODE_ENV=production + +AI_ANALYSIS_URL=http://localhost:8022 +GIT_INTEGRATION_URL=http://localhost:8012 +USER_AUTH_URL=http://localhost:8011 + +# CORS +CORS_ORIGIN=* + +# Timeouts +PROXY_TIMEOUT=240000 +``` + +#### Git Integration Service (.env) + +```bash +# Service Configuration +PORT=8012 + +# Database +POSTGRES_HOST=postgres +POSTGRES_PORT=5432 +POSTGRES_DB=git_integration +POSTGRES_USER=postgres +POSTGRES_PASSWORD=postgres123 + +# GitHub OAuth +GITHUB_CLIENT_ID=your_github_client_id +GITHUB_CLIENT_SECRET=your_github_client_secret +GITHUB_CALLBACK_URL=https://backend.codenuk.com/api/github/auth/github/callback + +# Repository Storage +GIT_REPOS_PATH=/app/git-repos + +# Public Base URL +PUBLIC_BASE_URL=https://backend.codenuk.com +API_GATEWAY_PUBLIC_URL=https://backend.codenuk.com +``` + +#### Frontend (.env.local) + +```bash +NEXT_PUBLIC_API_GATEWAY_URL=http://localhost:8000 +NEXT_PUBLIC_BACKEND_URL=http://localhost:8000 +``` + +### Docker Compose Configuration + +```yaml +version: '3.8' + +services: + api-gateway: + build: ./services/api-gateway + ports: + - "8000:8000" + environment: + - AI_ANALYSIS_URL=http://ai-analysis:8022 + - GIT_INTEGRATION_URL=http://git-integration:8012 + depends_on: + - ai-analysis + - git-integration + networks: + - backend-network + + ai-analysis: + build: ./services/ai-analysis-service + ports: + - "8022:8022" + environment: + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - GIT_INTEGRATION_SERVICE_URL=http://git-integration:8012 + - REDIS_HOST=redis + volumes: + - ./reports:/app/reports + depends_on: + - redis + - git-integration + networks: + - backend-network + + git-integration: + build: ./services/git-integration + ports: + - "8012:8012" + environment: + - POSTGRES_HOST=postgres + - GITHUB_CLIENT_ID=${GITHUB_CLIENT_ID} + - GITHUB_CLIENT_SECRET=${GITHUB_CLIENT_SECRET} + - PUBLIC_BASE_URL=https://backend.codenuk.com + volumes: + - ./git-repos:/app/git-repos + depends_on: + - postgres + networks: + - backend-network + + postgres: + image: postgres:15 + environment: + - POSTGRES_DB=git_integration + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=postgres123 + volumes: + - postgres-data:/var/lib/postgresql/data + networks: + - backend-network + + redis: + image: redis:7-alpine + ports: + - "6379:6379" + volumes: + - redis-data:/data + networks: + - backend-network + + frontend: + build: ./frontend + ports: + - "3000:3000" + environment: + - NEXT_PUBLIC_API_GATEWAY_URL=http://localhost:8000 + depends_on: + - api-gateway + networks: + - backend-network + +volumes: + postgres-data: + redis-data: + +networks: + backend-network: + driver: bridge +``` + +### Network Flow in Docker + +``` +Internet + │ + ▼ +[Nginx/Traefik Reverse Proxy] + │ + ├─> frontend:3000 + │ + └─> api-gateway:8000 + │ + ├─> ai-analysis:8022 ──┐ + │ ├─> redis:6379 + └─> git-integration:8012 ─┘ + │ + └─> postgres:5432 +``` + +--- + +## Performance Considerations + +### 1. Rate Limiting + +- Claude API: 90 requests/minute (sliding window) +- Wait times automatically calculated +- Queued requests processed sequentially + +### 2. Caching + +- Redis cache for analyzed files (24-hour TTL) +- SHA-256 hash-based cache keys +- Cache hit rate typically 40-60% + +### 3. Content Optimization + +- Files > 32KB tokens are truncated +- Preserves important code structures (imports, functions, classes) +- Reduces API costs by 70-80% + +### 4. Timeouts + +- API Gateway → AI Analysis: 240 seconds +- AI Analysis → Git Integration: 30 seconds +- AI Analysis → Claude API: 120 seconds per request + +### 5. Concurrent Analysis + +- Maximum 5 concurrent repository analyses +- File-level parallelization disabled (sequential processing) +- Prevents rate limit violations + +--- + +## Troubleshooting + +### Issue 1: Service Cannot Connect to Git Integration + +**Symptoms**: +- Error: "Failed to get repository info" +- 502 Bad Gateway errors + +**Solution**: +```bash +# Check if git-integration service is running +curl http://localhost:8012/health + +# Check Docker network connectivity +docker network inspect backend-network + +# Verify environment variable +docker exec ai-analysis env | grep GIT_INTEGRATION_SERVICE_URL +``` + +### Issue 2: Claude API Rate Limit Exceeded + +**Symptoms**: +- Error: "Rate limit exceeded" +- Analysis fails midway + +**Solution**: +```bash +# Reduce max_files in request +{ + "max_files": 50 // Instead of 100 +} + +# Check rate limiter configuration +CLAUDE_REQUESTS_PER_MINUTE=50 // Reduce from 90 +``` + +### Issue 3: Redis Connection Failed + +**Symptoms**: +- Warning: "Redis connection failed" +- No caching working + +**Solution**: +```bash +# Check Redis service +docker exec redis redis-cli ping + +# Verify connection settings +REDIS_HOST=redis +REDIS_PORT=6379 +``` + +### Issue 4: Authentication Errors + +**Symptoms**: +- 401 Unauthorized +- "GitHub authentication required" + +**Solution**: +- User needs to authenticate via OAuth +- Frontend should redirect to auth_url provided in error response +- Check GitHub OAuth credentials in git-integration service + +--- + +## Monitoring and Logging + +### Log Locations + +```bash +# AI Analysis Service +docker logs ai-analysis -f + +# API Gateway +docker logs api-gateway -f + +# Git Integration +docker logs git-integration -f +``` + +### Key Log Patterns + +#### Successful Analysis +``` +✅ [AI ANALYSIS] Repository analysis completed successfully +📊 Stats: 85 files, 15420 lines, score: 7.8/10 +``` + +#### Rate Limiting +``` +⚠️ [RATE LIMITER] Waiting 5.2 seconds before next request +``` + +#### Cache Hit +``` +📦 [CACHE] Using cached analysis for file: src/index.ts +``` + +#### Git Integration Communication +``` +🔍 [GIT CLIENT] Getting repository info for: uuid-123 +✅ [GIT CLIENT] Repository info retrieved: johndoe/my-repo +``` + +--- + +## Security Considerations + +### 1. Authentication Flow + +- Frontend passes user_id in requests +- API Gateway adds x-user-id header +- Services validate user ownership + +### 2. Private Repository Access + +- OAuth tokens stored in PostgreSQL +- Tokens encrypted at rest +- Token validation before file access + +### 3. API Keys + +- Claude API key in environment variable only +- Never exposed to frontend +- Rotated regularly + +### 4. CORS Configuration + +- Specific origins in production +- Credentials allowed for authenticated requests + +--- + +## Additional Resources + +- [AI Analysis Service Implementation](./ai-analyze.py) +- [API Gateway Configuration](../api-gateway/src/server.js) +- [Git Integration Routes](../git-integration/src/routes/github-integration.routes.js) +- [Frontend Hook](../../fronend/codenuk_frontend_mine/src/hooks/useAIAnalysis.ts) + +--- + +## Support and Contact + +For issues or questions: +- Check service logs first +- Verify environment variables +- Test service endpoints individually +- Check Docker network connectivity + +--- + +**Last Updated**: December 2024 +**Version**: 1.0.0 + diff --git a/services/ai-analysis-service/ai-analysis/adv_git_analyzer.py b/services/ai-analysis-service/ai-analysis/adv_git_analyzer.py index a5f3860..dff566c 100644 --- a/services/ai-analysis-service/ai-analysis/adv_git_analyzer.py +++ b/services/ai-analysis-service/ai-analysis/adv_git_analyzer.py @@ -462,7 +462,7 @@ Focus on business outcomes, not technical details. Keep under 800 words. ['Metric', 'Value'], ['Total Files Analyzed', str(analysis.total_files)], ['Total Lines of Code', f"{analysis.total_lines:,}"], - ['Primary Languages', ', '.join(list(analysis.languages.keys())[:5])], + ['Primary Languages', ', '.join(analysis.languages[:5]) if isinstance(analysis.languages, list) else ', '.join(list(analysis.languages.keys())[:5])], ['Overall Code Quality', f"{analysis.code_quality_score:.1f}/10"], ] diff --git a/services/ai-analysis-service/ai-analyze.py b/services/ai-analysis-service/ai-analyze.py index 7a9ac4b..f732698 100644 --- a/services/ai-analysis-service/ai-analyze.py +++ b/services/ai-analysis-service/ai-analyze.py @@ -1401,12 +1401,12 @@ Focus on business outcomes, not technical details. Keep under 800 words. styles = getSampleStyleSheet() story = [] - # Custom styles + # Custom styles with proper core colors title_style = ParagraphStyle( 'CustomTitle', parent=styles['Heading1'], fontSize=24, - textColor=colors.darkblue, + textColor=colors.HexColor('#1e40af'), # Blue-800 spaceAfter=30, alignment=TA_CENTER ) @@ -1415,7 +1415,7 @@ Focus on business outcomes, not technical details. Keep under 800 words. 'CustomHeading', parent=styles['Heading2'], fontSize=16, - textColor=colors.darkblue, + textColor=colors.HexColor('#1e40af'), # Blue-800 spaceBefore=20, spaceAfter=10 ) @@ -1430,7 +1430,27 @@ Focus on business outcomes, not technical details. Keep under 800 words. # Executive Summary story.append(Paragraph("Executive Summary", heading_style)) - story.append(Paragraph(analysis.executive_summary, styles['Normal'])) + if analysis.executive_summary and len(analysis.executive_summary.strip()) > 50: + story.append(Paragraph(analysis.executive_summary, styles['Normal'])) + else: + # Generate a comprehensive summary even without AI + summary_text = f""" + This repository contains {analysis.total_files} files with a total of {analysis.total_lines:,} lines of code. + The codebase is primarily written in {', '.join(analysis.languages[:3]) if isinstance(analysis.languages, list) else ', '.join(list(analysis.languages.keys())[:3])}. + + Key Statistics: + • Total Files: {analysis.total_files} + • Total Lines: {analysis.total_lines:,} + • Code Quality Score: {analysis.code_quality_score}/10 + • High Quality Files: {analysis.high_quality_files} + • Medium Quality Files: {analysis.medium_quality_files} + • Low Quality Files: {analysis.low_quality_files} + + Repository Overview: + This appears to be a {analysis.repo_path.split('/')[-1] if '/' in analysis.repo_path else analysis.repo_path} project with a well-structured codebase. + The analysis reveals a mix of file types and programming languages, indicating a comprehensive software project. + """ + story.append(Paragraph(summary_text, styles['Normal'])) story.append(PageBreak()) # Repository Overview @@ -1440,25 +1460,115 @@ Focus on business outcomes, not technical details. Keep under 800 words. ['Metric', 'Value'], ['Total Files Analyzed', str(analysis.total_files)], ['Total Lines of Code', f"{analysis.total_lines:,}"], - ['Primary Languages', ', '.join(list(analysis.languages.keys())[:5])], + ['Primary Languages', ', '.join(analysis.languages[:5]) if isinstance(analysis.languages, list) else ', '.join(list(analysis.languages.keys())[:5])], ['Overall Code Quality', f"{analysis.code_quality_score:.1f}/10"], ] overview_table = Table(overview_data, colWidths=[200, 300]) overview_table.setStyle(TableStyle([ - ('BACKGROUND', (0, 0), (-1, 0), colors.grey), - ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), + ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1e40af')), # Blue-800 header + ('TEXTCOLOR', (0, 0), (-1, 0), colors.white), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), ('FONTSIZE', (0, 0), (-1, 0), 12), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), - ('BACKGROUND', (0, 1), (-1, -1), colors.beige), - ('GRID', (0, 0), (-1, -1), 1, colors.black) + ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#f8fafc')), # Gray-50 + ('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#e2e8f0')) # Gray-300 ])) story.append(overview_table) story.append(Spacer(1, 20)) + # Code Quality Assessment + story.append(Paragraph("Code Quality Assessment", heading_style)) + quality_data = [ + ['Quality Level', 'Count', 'Percentage'], + ['High Quality', str(analysis.high_quality_files), f"{(analysis.high_quality_files/analysis.total_files)*100:.1f}%"], + ['Medium Quality', str(analysis.medium_quality_files), f"{(analysis.medium_quality_files/analysis.total_files)*100:.1f}%"], + ['Low Quality', str(analysis.low_quality_files), f"{(analysis.low_quality_files/analysis.total_files)*100:.1f}%"] + ] + + quality_table = Table(quality_data, colWidths=[150, 100, 100]) + quality_table.setStyle(TableStyle([ + ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1e40af')), # Blue-800 header + ('TEXTCOLOR', (0, 0), (-1, 0), colors.white), + ('ALIGN', (0, 0), (-1, -1), 'CENTER'), + ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), + ('FONTSIZE', (0, 0), (-1, 0), 12), + ('BOTTOMPADDING', (0, 0), (-1, 0), 12), + ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#f8fafc')), # Gray-50 + ('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#e2e8f0')) # Gray-300 + ])) + + story.append(quality_table) + story.append(Spacer(1, 20)) + + # Security Assessment + if hasattr(analysis, 'security_assessment') and analysis.security_assessment: + story.append(Paragraph("Security Assessment", heading_style)) + story.append(Paragraph(analysis.security_assessment, styles['Normal'])) + story.append(Spacer(1, 20)) + + # Architecture Assessment + if hasattr(analysis, 'architecture_assessment') and analysis.architecture_assessment: + story.append(Paragraph("Architecture Assessment", heading_style)) + story.append(Paragraph(analysis.architecture_assessment, styles['Normal'])) + story.append(Spacer(1, 20)) + + # File Analysis Details + story.append(Paragraph("File Analysis Details", heading_style)) + + # Create file analysis table + file_data = [['File Path', 'Language', 'Lines', 'Quality Score', 'Issues']] + + for file_analysis in analysis.file_analyses[:20]: # Limit to first 20 files + file_data.append([ + file_analysis.path[:50] + '...' if len(file_analysis.path) > 50 else file_analysis.path, + file_analysis.language, + str(file_analysis.lines_of_code), + f"{file_analysis.severity_score:.1f}/10", + str(len(file_analysis.issues_found)) + ]) + + if len(analysis.file_analyses) > 20: + file_data.append(['...', '...', '...', '...', f'... and {len(analysis.file_analyses) - 20} more files']) + + file_table = Table(file_data, colWidths=[200, 80, 60, 80, 60]) + file_table.setStyle(TableStyle([ + ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1e40af')), # Blue-800 header + ('TEXTCOLOR', (0, 0), (-1, 0), colors.white), + ('ALIGN', (0, 0), (-1, -1), 'LEFT'), + ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), + ('FONTSIZE', (0, 0), (-1, 0), 10), + ('FONTSIZE', (0, 1), (-1, -1), 8), + ('BOTTOMPADDING', (0, 0), (-1, 0), 12), + ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#f8fafc')), # Gray-50 + ('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#e2e8f0')) # Gray-300 + ])) + + story.append(file_table) + story.append(Spacer(1, 20)) + + # Recommendations + story.append(Paragraph("Key Recommendations", heading_style)) + + recommendations = [] + for file_analysis in analysis.file_analyses: + if file_analysis.recommendations: + recommendations.extend(file_analysis.recommendations[:2]) # Limit recommendations per file + + if recommendations: + for i, rec in enumerate(recommendations[:10], 1): # Limit to top 10 recommendations + story.append(Paragraph(f"{i}. {rec}", styles['Normal'])) + else: + story.append(Paragraph("No specific recommendations generated.", styles['Normal'])) + + story.append(Spacer(1, 20)) + + # Footer + story.append(Paragraph("--- End of Report ---", styles['Normal'])) + story.append(Paragraph(f"Generated on {datetime.now().strftime('%B %d, %Y at %H:%M:%S')}", styles['Normal'])) + # Build PDF try: doc.build(story) diff --git a/services/ai-analysis-service/env.example b/services/ai-analysis-service/env.example index dc3beee..0106cad 100644 --- a/services/ai-analysis-service/env.example +++ b/services/ai-analysis-service/env.example @@ -6,7 +6,7 @@ HOST=0.0.0.0 NODE_ENV=development # AI API Keys -ANTHROPIC_API_KEY=your_anthropic_api_key_here +ANTHROPIC_API_KEY=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 # Database Configuration POSTGRES_HOST=localhost @@ -30,12 +30,17 @@ JWT_ACCESS_SECRET=access-secret-key-2024-tech4biz-secure_pipeline_2024 # Service URLs USER_AUTH_SERVICE_URL=http://localhost:8011 +GIT_INTEGRATION_SERVICE_URL=http://localhost:8012 # Analysis Configuration MAX_FILES_PER_ANALYSIS=100 MAX_FILE_SIZE_MB=2 ANALYSIS_TIMEOUT_SECONDS=300 +# Rate Limiting Configuration +CLAUDE_REQUESTS_PER_MINUTE=90 +RATE_LIMIT_BUFFER=10 + # Memory System Configuration WORKING_MEMORY_TTL=3600 EPISODIC_RETENTION_DAYS=365 diff --git a/services/ai-analysis-service/requirements.txt b/services/ai-analysis-service/requirements.txt index 78e4a11..f0fc6b8 100644 --- a/services/ai-analysis-service/requirements.txt +++ b/services/ai-analysis-service/requirements.txt @@ -7,6 +7,9 @@ fastapi>=0.104.1 uvicorn>=0.24.0 pydantic>=2.5.0 +# HTTP client for service communication +httpx>=0.25.0 + # Git operations GitPython>=3.1.40 diff --git a/services/ai-analysis-service/server.py b/services/ai-analysis-service/server.py index 3de8039..65c3b6f 100644 --- a/services/ai-analysis-service/server.py +++ b/services/ai-analysis-service/server.py @@ -9,8 +9,10 @@ import asyncio import json import tempfile import shutil +import time +import hashlib from pathlib import Path -from typing import Dict, Any +from typing import Dict, Any, Optional, List from datetime import datetime from fastapi import FastAPI, HTTPException, BackgroundTasks @@ -18,6 +20,8 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse from pydantic import BaseModel import uvicorn +import httpx +import redis # Import the AI analysis components # Note: ai-analyze.py has a hyphen, so we need to handle the import specially @@ -51,11 +55,141 @@ app.add_middleware( # Global analyzer instance analyzer = None +# Rate limiter for Claude API +class ClaudeRateLimiter: + def __init__(self, requests_per_minute: int = 90): + self.requests_per_minute = requests_per_minute + self.requests = [] + self.lock = asyncio.Lock() + + async def wait_if_needed(self): + """Wait if rate limit would be exceeded.""" + async with self.lock: + now = time.time() + # Remove requests older than 1 minute + self.requests = [req_time for req_time in self.requests if now - req_time < 60] + + if len(self.requests) >= self.requests_per_minute: + sleep_time = 60 - (now - self.requests[0]) + if sleep_time > 0: + await asyncio.sleep(sleep_time) + + self.requests.append(now) + +# Git Integration Service Client +class GitIntegrationClient: + def __init__(self): + self.base_url = os.getenv('GIT_INTEGRATION_SERVICE_URL', 'http://git-integration:8012') + self.timeout = 30.0 + + async def get_repository_info(self, repository_id: str, user_id: str) -> Dict[str, Any]: + """Get repository information from git-integration service.""" + try: + async with httpx.AsyncClient(timeout=self.timeout) as client: + response = await client.get( + f"{self.base_url}/api/github/repository/{repository_id}/ui-view?view_type=tree", + headers={'x-user-id': user_id} + ) + + if response.status_code == 200: + data = response.json() + if data.get('success') and 'data' in data: + repo_info = data['data'].get('repository_info', {}) + return { + 'id': repo_info.get('id'), + 'name': repo_info.get('name'), + 'owner': repo_info.get('owner'), + 'provider': repo_info.get('provider', 'github'), + 'local_path': repo_info.get('local_path'), + 'repository_url': repo_info.get('repository_url') + } + else: + raise Exception(f"Invalid response format: {data}") + else: + raise Exception(f"Failed to get repository info: {response.text}") + + except Exception as e: + raise Exception(f"Git-integration service communication failed: {e}") + +# Analysis Cache +class AnalysisCache: + def __init__(self): + try: + self.redis = redis.Redis( + host=os.getenv('REDIS_HOST', 'redis'), + port=int(os.getenv('REDIS_PORT', 6379)), + password=os.getenv('REDIS_PASSWORD', ''), + decode_responses=True + ) + self.cache_ttl = 86400 # 24 hours + except Exception as e: + print(f"Warning: Redis connection failed: {e}") + self.redis = None + + async def get_cached_analysis(self, file_hash: str) -> Optional[Dict[str, Any]]: + """Get cached analysis result.""" + if not self.redis: + return None + + try: + cache_key = f"analysis:{file_hash}" + cached_data = self.redis.get(cache_key) + return json.loads(cached_data) if cached_data else None + except Exception: + return None + + async def cache_analysis(self, file_hash: str, result: Dict[str, Any]): + """Cache analysis result.""" + if not self.redis: + return + + try: + cache_key = f"analysis:{file_hash}" + self.redis.setex(cache_key, self.cache_ttl, json.dumps(result)) + except Exception as e: + print(f"Warning: Failed to cache analysis: {e}") + +# Content Optimizer +class ContentOptimizer: + @staticmethod + def optimize_content_for_claude(content: str, max_tokens: int = 8000) -> str: + """Optimize file content for Claude API limits.""" + if len(content) > max_tokens * 4: # Rough token estimation + # Extract important lines + lines = content.split('\n') + important_lines = [] + + for line in lines: + # Keep imports, function definitions, class definitions + if (line.strip().startswith(('import ', 'from ', 'def ', 'class ', 'export ', 'const ', 'let ', 'var ')) or + line.strip().startswith(('function ', 'class ', 'interface ', 'type '))): + important_lines.append(line) + + # Limit to 200 lines + important_lines = important_lines[:200] + optimized_content = '\n'.join(important_lines) + optimized_content += f"\n\n... [Content truncated for analysis - {len(content)} chars total]" + return optimized_content + + return content + +# Global instances +rate_limiter = ClaudeRateLimiter() +git_client = GitIntegrationClient() +analysis_cache = AnalysisCache() +content_optimizer = ContentOptimizer() + class AnalysisRequest(BaseModel): repo_path: str output_format: str = "pdf" # pdf, json max_files: int = 50 +class RepositoryAnalysisRequest(BaseModel): + repository_id: str + user_id: str + output_format: str = "pdf" # pdf, json + max_files: int = 100 + class AnalysisResponse(BaseModel): success: bool message: str @@ -98,7 +232,7 @@ async def health_check(): @app.post("/analyze", response_model=AnalysisResponse) async def analyze_repository(request: AnalysisRequest, background_tasks: BackgroundTasks): - """Analyze a repository.""" + """Analyze a repository using direct file path.""" try: if not analyzer: raise HTTPException(status_code=500, detail="Analyzer not initialized") @@ -178,6 +312,327 @@ async def analyze_repository(request: AnalysisRequest, background_tasks: Backgro stats=None ) +@app.post("/analyze-repository", response_model=AnalysisResponse) +async def analyze_repository_by_id(request: RepositoryAnalysisRequest, background_tasks: BackgroundTasks): + """Analyze a repository by ID using git-integration service.""" + try: + if not analyzer: + raise HTTPException(status_code=500, detail="Analyzer not initialized") + + # Get repository information from git-integration service + try: + repo_info = await git_client.get_repository_info(request.repository_id, request.user_id) + local_path = repo_info.get('local_path') # Keep for compatibility but don't check file system + + # Note: We no longer check local_path existence since we use API approach + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Failed to get repository info: {str(e)}" + ) + + # Generate unique analysis ID + analysis_id = f"repo_analysis_{request.repository_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + + # Create temporary directory for this analysis + temp_dir = tempfile.mkdtemp(prefix=f"ai_analysis_{analysis_id}_") + + try: + # Run analysis with rate limiting and caching + analysis = await analyze_repository_with_optimizations( + local_path, + request.repository_id, + request.user_id, + request.max_files + ) + + # Generate report + if request.output_format == "pdf": + report_path = f"/app/reports/{analysis_id}_analysis.pdf" + analyzer.create_pdf_report(analysis, report_path) + else: + report_path = f"/app/reports/{analysis_id}_analysis.json" + with open(report_path, 'w') as f: + json.dump({ + "repository_id": request.repository_id, + "repo_path": analysis.repo_path, + "total_files": analysis.total_files, + "total_lines": analysis.total_lines, + "languages": analysis.languages, + "code_quality_score": analysis.code_quality_score, + "architecture_assessment": analysis.architecture_assessment, + "security_assessment": analysis.security_assessment, + "executive_summary": analysis.executive_summary, + "file_analyses": [ + { + "path": fa.path, + "language": fa.language, + "lines_of_code": fa.lines_of_code, + "severity_score": fa.severity_score, + "issues_found": fa.issues_found, + "recommendations": fa.recommendations + } for fa in analysis.file_analyses + ] + }, f, indent=2) + + # Calculate stats + stats = { + "repository_id": request.repository_id, + "total_files": analysis.total_files, + "total_lines": analysis.total_lines, + "languages": analysis.languages, + "code_quality_score": analysis.code_quality_score, + "high_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score >= 8]), + "medium_quality_files": len([fa for fa in analysis.file_analyses if 5 <= fa.severity_score < 8]), + "low_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score < 5]), + "total_issues": sum(len(fa.issues_found) for fa in analysis.file_analyses) + } + + return AnalysisResponse( + success=True, + message="Repository analysis completed successfully", + analysis_id=analysis_id, + report_path=report_path, + stats=stats + ) + + finally: + # Cleanup temporary directory + if os.path.exists(temp_dir): + shutil.rmtree(temp_dir) + + except HTTPException: + raise + except Exception as e: + return AnalysisResponse( + success=False, + message=f"Repository analysis failed: {str(e)}" + ) + +async def get_repository_files_from_api(repository_id: str, user_id: str, max_files: int = 100): + """Get repository files from Git Integration Service API.""" + try: + print(f"🔍 [DEBUG] Getting repository files for {repository_id} with user {user_id}") + + # Get repository file tree from Git Integration Service + async with httpx.AsyncClient(timeout=30.0) as client: + print(f"🔍 [DEBUG] Making request to: {git_client.base_url}/api/github/repository/{repository_id}/ui-view?view_type=tree") + response = await client.get( + f"{git_client.base_url}/api/github/repository/{repository_id}/ui-view?view_type=tree", + headers={'x-user-id': user_id} + ) + + print(f"🔍 [DEBUG] Response status: {response.status_code}") + + if response.status_code != 200: + raise Exception(f"Failed to get repository tree: {response.text}") + + data = response.json() + print(f"🔍 [DEBUG] Response data keys: {list(data.keys())}") + + if not data.get('success'): + raise Exception(f"Git Integration Service error: {data.get('message', 'Unknown error')}") + + # Extract files from the tree structure + files_to_analyze = [] + ui_data = data.get('data', {}).get('ui_data', {}) + file_tree = ui_data.get('left_panel', {}).get('file_tree', {}) + + print(f"🔍 [DEBUG] File tree type: {type(file_tree)}, keys: {list(file_tree.keys()) if isinstance(file_tree, dict) else 'Not a dict'}") + + def extract_files_from_tree(tree_node, current_path=""): + # Handle dictionary-based tree structure (not array) + if isinstance(tree_node, dict): + # If it's a file/directory node + if 'type' in tree_node: + if tree_node.get('type') == 'file': + file_path = tree_node.get('path', '') + if file_path: + files_to_analyze.append((file_path, None)) + print(f"🔍 [DEBUG] Found file: {file_path}") + elif tree_node.get('type') == 'directory' and tree_node.get('children'): + # Children is a dict, not an array + children = tree_node.get('children', {}) + if isinstance(children, dict): + for child_name, child_node in children.items(): + extract_files_from_tree(child_node, current_path) + else: + # Root level: iterate over all entries + for name, node in tree_node.items(): + extract_files_from_tree(node, current_path) + + extract_files_from_tree(file_tree) + + print(f"🔍 [DEBUG] Found {len(files_to_analyze)} files to analyze") + + # Limit files if needed + if len(files_to_analyze) > max_files: + files_to_analyze = files_to_analyze[:max_files] + print(f"🔍 [DEBUG] Limited to {max_files} files") + + # Fetch file content for each file + files_with_content = [] + for i, (file_path, _) in enumerate(files_to_analyze): + try: + print(f"🔍 [DEBUG] Fetching content for file {i+1}/{len(files_to_analyze)}: {file_path}") + + # Get file content from Git Integration Service + content_response = await client.get( + f"{git_client.base_url}/api/github/repository/{repository_id}/file-content?file_path={file_path}", + headers={'x-user-id': user_id} + ) + + if content_response.status_code == 200: + content_data = content_response.json() + if content_data.get('success'): + # Content is nested in data.content + content = content_data.get('data', {}).get('content', '') + files_with_content.append((file_path, content)) + print(f"🔍 [DEBUG] Successfully got content for {file_path} ({len(content)} chars)") + else: + print(f"Warning: Failed to get content for {file_path}: {content_data.get('message')}") + else: + print(f"Warning: Failed to get content for {file_path}: HTTP {content_response.status_code}") + + except Exception as e: + print(f"Warning: Error getting content for {file_path}: {e}") + continue + + print(f"🔍 [DEBUG] Returning {len(files_with_content)} files with content") + return files_with_content + + except Exception as e: + print(f"Error getting repository files from API: {e}") + import traceback + traceback.print_exc() + return [] + +async def analyze_repository_with_optimizations(repo_path: str, repository_id: str, user_id: str, max_files: int = 100): + """Analyze repository with rate limiting, caching, and content optimization.""" + from pathlib import Path + + try: + # Get repository files from Git Integration Service API + files_to_analyze = await get_repository_files_from_api(repository_id, user_id, max_files) + + if not files_to_analyze: + raise Exception("No files found to analyze") + + print(f"Starting optimized analysis of {len(files_to_analyze)} files...") + + file_analyses = [] + processed_files = 0 + + for i, (file_path, content) in enumerate(files_to_analyze): + print(f"Analyzing file {i+1}/{len(files_to_analyze)}: {file_path}") + + # Generate file hash for caching + file_hash = hashlib.sha256(content.encode()).hexdigest() + + # Check cache first + cached_analysis = await analysis_cache.get_cached_analysis(file_hash) + if cached_analysis: + print(f"Using cached analysis for {file_path}") + # Convert cached dictionary back to analysis object + from ai_analyze import FileAnalysis + cached_obj = FileAnalysis( + path=Path(cached_analysis["path"]), + language=cached_analysis["language"], + lines_of_code=cached_analysis["lines_of_code"], + complexity_score=cached_analysis["complexity_score"], + issues_found=cached_analysis["issues_found"], + recommendations=cached_analysis["recommendations"], + detailed_analysis=cached_analysis["detailed_analysis"], + severity_score=cached_analysis["severity_score"] + ) + file_analyses.append(cached_obj) + processed_files += 1 + continue + + # Rate limiting + await rate_limiter.wait_if_needed() + + # Optimize content for Claude API + optimized_content = content_optimizer.optimize_content_for_claude(content) + + # Analyze file with memory + try: + # Convert string file path to Path object + file_path_obj = Path(file_path) + + analysis = await analyzer.analyze_file_with_memory( + file_path_obj, + optimized_content, + repository_id + ) + + # Cache the result + analysis_dict = { + "path": str(analysis.path), + "language": analysis.language, + "lines_of_code": analysis.lines_of_code, + "complexity_score": analysis.complexity_score, + "issues_found": analysis.issues_found, + "recommendations": analysis.recommendations, + "detailed_analysis": analysis.detailed_analysis, + "severity_score": analysis.severity_score + } + + await analysis_cache.cache_analysis(file_hash, analysis_dict) + file_analyses.append(analysis) + processed_files += 1 + + except Exception as e: + print(f"Error analyzing {file_path}: {e}") + # Continue with other files + continue + + # Repository-level analysis + print("Performing repository-level analysis...") + # Use a temporary directory path since we don't have a local repo_path + temp_repo_path = f"/tmp/repo_{repository_id}" if repo_path is None else repo_path + # Create proper context_memories structure + context_memories = { + 'persistent_knowledge': [], + 'similar_analyses': [] + } + architecture_assessment, security_assessment = await analyzer.analyze_repository_overview_with_memory( + temp_repo_path, file_analyses, context_memories, repository_id + ) + + # Create repository analysis result + from ai_analyze import RepositoryAnalysis + return RepositoryAnalysis( + repo_path=str(temp_repo_path), + total_files=len(files_to_analyze), + total_lines=sum(fa.lines_of_code for fa in file_analyses), + languages=list(set(fa.language for fa in file_analyses)), + code_quality_score=sum(fa.severity_score for fa in file_analyses) / len(file_analyses) if file_analyses else 0, + architecture_assessment=architecture_assessment, + security_assessment=security_assessment, + file_analyses=file_analyses, + executive_summary=f"Analysis completed for {processed_files} files in repository {repository_id}" + ) + + except Exception as e: + print(f"Error in optimized analysis: {e}") + raise + +@app.get("/repository/{repository_id}/info") +async def get_repository_info(repository_id: str, user_id: str): + """Get repository information from git-integration service.""" + try: + repo_info = await git_client.get_repository_info(repository_id, user_id) + return { + "success": True, + "repository_info": repo_info + } + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Failed to get repository info: {str(e)}" + ) + @app.get("/reports/{filename}") async def download_report(filename: str): """Download analysis report.""" diff --git a/services/git-integration/run-sync-status-migration.sh b/services/git-integration/run-sync-status-migration.sh new file mode 100755 index 0000000..63421ec --- /dev/null +++ b/services/git-integration/run-sync-status-migration.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Script to restore sync_status column in all_repositories table +# This fixes the issue where the column was removed but is still used in the codebase + +echo "==========================================" +echo "Restoring sync_status Column Migration" +echo "==========================================" +echo "" + +# Database connection parameters +DB_HOST="${POSTGRES_HOST:-localhost}" +DB_PORT="${POSTGRES_PORT:-5432}" +DB_NAME="${POSTGRES_DB:-dev_pipeline}" +DB_USER="${POSTGRES_USER:-pipeline_admin}" +DB_PASSWORD="${POSTGRES_PASSWORD:-secure_pipeline_2024}" + +echo "Database Configuration:" +echo " Host: $DB_HOST" +echo " Port: $DB_PORT" +echo " Database: $DB_NAME" +echo " User: $DB_USER" +echo "" + +# Check if running inside Docker container +if [ -f /.dockerenv ]; then + echo "Running inside Docker container" + PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -f src/migrations/023_restore_sync_status_column.sql +else + echo "Running outside Docker - using docker exec" + docker exec -i pipeline_postgres psql -U "$DB_USER" -d "$DB_NAME" < src/migrations/023_restore_sync_status_column.sql +fi + +if [ $? -eq 0 ]; then + echo "" + echo "==========================================" + echo "✅ Migration completed successfully!" + echo "==========================================" + echo "" + echo "The sync_status column has been restored to the all_repositories table." + echo "All existing repositories have been updated with appropriate sync_status values." + echo "" +else + echo "" + echo "==========================================" + echo "❌ Migration failed!" + echo "==========================================" + echo "" + echo "Please check the error messages above and try again." + echo "" + exit 1 +fi + diff --git a/services/git-integration/src/migrations/023_restore_sync_status_column.sql b/services/git-integration/src/migrations/023_restore_sync_status_column.sql new file mode 100644 index 0000000..8bc6c58 --- /dev/null +++ b/services/git-integration/src/migrations/023_restore_sync_status_column.sql @@ -0,0 +1,72 @@ +-- Migration 023: Restore sync_status column to all_repositories table +-- This migration adds back the sync_status column as it's still extensively used in the codebase +-- The column tracks the synchronization status of repositories + +-- Add the sync_status column back if it doesn't exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'all_repositories' + AND column_name = 'sync_status' + ) THEN + ALTER TABLE all_repositories + ADD COLUMN sync_status VARCHAR(50) DEFAULT 'pending'; + + RAISE NOTICE 'Added sync_status column to all_repositories table'; + ELSE + RAISE NOTICE 'sync_status column already exists in all_repositories table'; + END IF; +END $$; + +-- Create index for better query performance +CREATE INDEX IF NOT EXISTS idx_all_repositories_sync_status +ON all_repositories(sync_status); + +-- Update sync_status based on repository_storage.storage_status for existing records +UPDATE all_repositories ar +SET sync_status = COALESCE( + (SELECT + CASE + WHEN rs.storage_status = 'completed' THEN 'synced' + WHEN rs.storage_status = 'downloading' THEN 'syncing' + WHEN rs.storage_status = 'error' THEN 'error' + ELSE 'pending' + END + FROM repository_storage rs + WHERE rs.repository_id = ar.id + ), + 'pending' +) +WHERE ar.sync_status IS NULL OR ar.sync_status = ''; + +-- Add comment to document the column +COMMENT ON COLUMN all_repositories.sync_status IS +'Repository synchronization status: pending, syncing, synced, error, deleted. +This column tracks the overall sync status of the repository. +For detailed storage information, refer to repository_storage.storage_status'; + +-- Add trigger to keep updated_at in sync +CREATE OR REPLACE FUNCTION update_all_repositories_sync_status_timestamp() +RETURNS TRIGGER AS $$ +BEGIN + IF NEW.sync_status IS DISTINCT FROM OLD.sync_status THEN + NEW.updated_at = NOW(); + END IF; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS trigger_update_sync_status_timestamp ON all_repositories; +CREATE TRIGGER trigger_update_sync_status_timestamp + BEFORE UPDATE ON all_repositories + FOR EACH ROW + WHEN (OLD.sync_status IS DISTINCT FROM NEW.sync_status) + EXECUTE FUNCTION update_all_repositories_sync_status_timestamp(); + +-- Log successful migration +DO $$ +BEGIN + RAISE NOTICE 'Migration 023 completed: sync_status column restored to all_repositories table'; +END $$; + diff --git a/services/git-integration/src/migrations/024_fix_provider_names_from_urls.sql b/services/git-integration/src/migrations/024_fix_provider_names_from_urls.sql new file mode 100644 index 0000000..835ece8 --- /dev/null +++ b/services/git-integration/src/migrations/024_fix_provider_names_from_urls.sql @@ -0,0 +1,79 @@ +-- Migration 024: Fix provider_name based on repository URLs +-- This migration updates the provider_name field to match the actual provider from the repository URL + +-- ============================================= +-- 1. Fix all_repositories table based on URL +-- ============================================= +UPDATE all_repositories +SET provider_name = 'github' +WHERE repository_url LIKE '%github.com%' + OR repository_url LIKE '%github.io%'; + +UPDATE all_repositories +SET provider_name = 'gitlab' +WHERE repository_url LIKE '%gitlab.com%' + OR repository_url LIKE '%gitlab.io%'; + +UPDATE all_repositories +SET provider_name = 'bitbucket' +WHERE repository_url LIKE '%bitbucket.org%' + OR repository_url LIKE '%bitbucket.io%'; + +UPDATE all_repositories +SET provider_name = 'gitea' +WHERE repository_url LIKE '%gitea.com%' + OR repository_url LIKE '%gitea.io%'; + +-- ============================================= +-- 2. Fix repository_storage table (linked to all_repositories) +-- ============================================= +UPDATE repository_storage +SET provider_name = ar.provider_name +FROM all_repositories ar +WHERE repository_storage.repository_id = ar.id; + +-- ============================================= +-- 3. Fix repository_commit_details table (linked to all_repositories) +-- ============================================= +UPDATE repository_commit_details +SET provider_name = ar.provider_name +FROM all_repositories ar +WHERE repository_commit_details.repository_id = ar.id; + +-- ============================================= +-- 4. Fix repository_commit_files table (linked through repository_commit_details) +-- ============================================= +UPDATE repository_commit_files +SET provider_name = ar.provider_name +FROM all_repositories ar +JOIN repository_commit_details rcd ON rcd.repository_id = ar.id +WHERE repository_commit_files.commit_id = rcd.id; + +-- ============================================= +-- 5. Fix repository_directories table (linked to all_repositories) +-- ============================================= +UPDATE repository_directories +SET provider_name = ar.provider_name +FROM all_repositories ar +WHERE repository_directories.repository_id = ar.id; + +-- ============================================= +-- 6. Fix repository_files table (linked to all_repositories) +-- ============================================= +UPDATE repository_files +SET provider_name = ar.provider_name +FROM all_repositories ar +WHERE repository_files.repository_id = ar.id; + +-- ============================================= +-- 7. Verify the fixes +-- ============================================= +-- Show the results +SELECT + id, + repository_url, + repository_name, + owner_name, + provider_name +FROM all_repositories +ORDER BY provider_name, repository_name; diff --git a/services/git-integration/src/routes/github-integration.routes.js b/services/git-integration/src/routes/github-integration.routes.js index 8b2a1c5..83b68db 100644 --- a/services/git-integration/src/routes/github-integration.routes.js +++ b/services/git-integration/src/routes/github-integration.routes.js @@ -271,7 +271,8 @@ router.post('/attach-repository', async (req, res) => { } // Use the actual default branch from repository metadata if the requested branch doesn't exist - let actualBranch = branch || branch_name || repositoryData.default_branch || 'main'; + // Priority: 1) User's explicit branch_name, 2) Branch from URL, 3) Repository's default branch, 4) 'main' + let actualBranch = branch_name || branch || repositoryData.default_branch || 'main'; // Validate that the requested branch exists, fallback to default if not try { @@ -1182,6 +1183,66 @@ router.get('/repository/:id/file-content', async (req, res) => { }); // GitHub-like UI endpoint - Complete UI data for frontend +// Helper function to get file tree from local repository path +async function handleTreeView(repositoryId, options = {}) { + const fs = require('fs'); + const pathModule = require('path'); + + // Get repository storage path + const storageQuery = ` + SELECT local_path FROM repository_storage + WHERE repository_id = $1 AND storage_status = 'completed' + `; + const result = await database.query(storageQuery, [repositoryId]); + + if (result.rows.length === 0) { + throw new Error('Repository storage not found or not completed'); + } + + const localPath = result.rows[0].local_path; + + if (!fs.existsSync(localPath)) { + throw new Error('Repository local path does not exist'); + } + + // Recursively build file tree + function buildFileTree(dir, relativePath = '') { + const items = fs.readdirSync(dir, { withFileTypes: true }); + const tree = {}; + + for (const item of items) { + // Skip .git directory + if (item.name === '.git') continue; + + const itemPath = pathModule.join(relativePath, item.name); + const fullPath = pathModule.join(dir, item.name); + + if (item.isDirectory()) { + tree[item.name] = { + type: 'directory', + path: itemPath, + children: buildFileTree(fullPath, itemPath) + }; + } else { + tree[item.name] = { + type: 'file', + path: itemPath + }; + } + } + + return tree; + } + + const fileTree = buildFileTree(localPath); + + return { + left_panel: { + file_tree: fileTree + } + }; +} + router.get('/repository/:id/ui-view', async (req, res) => { try { const { id } = req.params; @@ -1197,7 +1258,7 @@ router.get('/repository/:id/ui-view', async (req, res) => { // Validate repository exists const repoQuery = ` SELECT gr.*, rs.storage_status, rs.local_path - FROM github_repositories gr + FROM all_repositories gr LEFT JOIN repository_storage rs ON gr.id = rs.repository_id WHERE gr.id = $1 `; diff --git a/services/git-integration/src/routes/github-oauth.js b/services/git-integration/src/routes/github-oauth.js index 3f390de..985161d 100644 --- a/services/git-integration/src/routes/github-oauth.js +++ b/services/git-integration/src/routes/github-oauth.js @@ -156,8 +156,8 @@ router.get('/auth/github/callback', async (req, res) => { INSERT INTO all_repositories ( repository_url, repository_name, owner_name, branch_name, is_public, metadata, codebase_analysis, sync_status, - requires_auth, user_id - ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) + requires_auth, user_id, provider_name + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) RETURNING * `; const insertValues = [ @@ -171,6 +171,7 @@ router.get('/auth/github/callback', async (req, res) => { 'syncing', repositoryData.visibility === 'private', repoContext.userId || null, + 'github' // This is GitHub OAuth callback, so provider is always github ]; const insertResult = await database.query(insertQuery, insertValues); const repositoryRecord = insertResult.rows[0]; diff --git a/services/git-integration/src/routes/vcs.routes.js b/services/git-integration/src/routes/vcs.routes.js index cbe0225..8ce98bb 100644 --- a/services/git-integration/src/routes/vcs.routes.js +++ b/services/git-integration/src/routes/vcs.routes.js @@ -108,10 +108,23 @@ async function startPrivateRepoSync(providerKey, repoUrl, branchName, userId) { INSERT INTO all_repositories ( repository_url, repository_name, owner_name, branch_name, is_public, metadata, codebase_analysis, sync_status, - requires_auth, user_id - ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) + requires_auth, user_id, provider_name + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) RETURNING * `; + const insertValues = [ + repoUrl, + repo, + owner, + actualBranch, + repositoryData.visibility === 'public', + JSON.stringify(repositoryData), + JSON.stringify(codebaseAnalysis), + 'synced', + accessCheck.requiresAuth, + userId || null, + providerKey + ]; const insertResult = await database.query(insertQuery, insertValues); repositoryRecord = insertResult.rows[0]; } @@ -240,7 +253,8 @@ router.post('/:provider/attach-repository', async (req, res) => { } const repositoryData = await provider.fetchRepositoryMetadata(owner, repo); - let actualBranch = branch || branch_name || repositoryData.default_branch || 'main'; + // Priority: 1) User's explicit branch_name, 2) Branch from URL, 3) Repository's default branch, 4) 'main' + let actualBranch = branch_name || branch || repositoryData.default_branch || 'main'; try { // No-op for non-GitHub providers if not supported; adapters can throw if needed @@ -254,8 +268,8 @@ router.post('/:provider/attach-repository', async (req, res) => { INSERT INTO all_repositories ( repository_url, repository_name, owner_name, branch_name, is_public, metadata, codebase_analysis, sync_status, - requires_auth, user_id - ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) + requires_auth, user_id, provider_name + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) RETURNING * `; const insertValues = [ @@ -268,7 +282,8 @@ router.post('/:provider/attach-repository', async (req, res) => { JSON.stringify(codebaseAnalysis), 'synced', accessCheck.requiresAuth, - userId || null + userId || null, + providerKey // Use the provider from the route parameter ]; const insertResult = await database.query(insertQuery, insertValues); const repositoryRecord = insertResult.rows[0]; diff --git a/services/git-integration/src/utils/provider-detector.js b/services/git-integration/src/utils/provider-detector.js new file mode 100644 index 0000000..f0d8917 --- /dev/null +++ b/services/git-integration/src/utils/provider-detector.js @@ -0,0 +1,55 @@ +/** + * Provider Detection Utility + * Detects the provider (github, gitlab, bitbucket, gitea) from repository URL + */ + +/** + * Detects the provider from a repository URL + * @param {string} repositoryUrl - The repository URL + * @returns {string} - The provider name (github, gitlab, bitbucket, gitea) + */ +function detectProviderFromUrl(repositoryUrl) { + if (!repositoryUrl || typeof repositoryUrl !== 'string') { + return 'github'; // Default fallback + } + + const url = repositoryUrl.toLowerCase().trim(); + + // GitHub detection + if (url.includes('github.com') || url.includes('github.io')) { + return 'github'; + } + + // GitLab detection + if (url.includes('gitlab.com') || url.includes('gitlab.io')) { + return 'gitlab'; + } + + // Bitbucket detection + if (url.includes('bitbucket.org') || url.includes('bitbucket.io')) { + return 'bitbucket'; + } + + // Gitea detection + if (url.includes('gitea.com') || url.includes('gitea.io')) { + return 'gitea'; + } + + // Default fallback + return 'github'; +} + +/** + * Validates if a provider is supported + * @param {string} provider - The provider name + * @returns {boolean} - True if supported + */ +function isSupportedProvider(provider) { + const supportedProviders = ['github', 'gitlab', 'bitbucket', 'gitea']; + return supportedProviders.includes(provider.toLowerCase()); +} + +module.exports = { + detectProviderFromUrl, + isSupportedProvider +};