#!/usr/bin/env python3 """ AI Analysis Service HTTP Server Provides REST API endpoints for repository analysis. """ import os import asyncio import json import tempfile import shutil import time import hashlib import traceback from pathlib import Path from typing import Dict, Any, Optional, List, Tuple from datetime import datetime from fastapi import FastAPI, HTTPException, BackgroundTasks, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse, JSONResponse, StreamingResponse from pydantic import BaseModel import uvicorn import httpx import redis # Import the AI analysis components # Note: ai-analyze.py has a hyphen, so we need to handle the import specially import sys import importlib.util # Load the ai-analyze.py module spec = importlib.util.spec_from_file_location("ai_analyze", "ai-analyze.py") ai_analyze_module = importlib.util.module_from_spec(spec) sys.modules["ai_analyze"] = ai_analyze_module spec.loader.exec_module(ai_analyze_module) # Now import the classes from ai_analyze import EnhancedGitHubAnalyzer, get_memory_config # Import enhanced analyzer (backward compatible) try: from enhanced_analyzer import EnhancedGitHubAnalyzerV2, create_enhanced_analyzer ENHANCED_ANALYZER_AVAILABLE = True except ImportError as e: print(f"Enhanced analyzer not available: {e}") ENHANCED_ANALYZER_AVAILABLE = False # Import progress manager from progress_manager import AnalysisProgressManager, progress_tracker app = FastAPI( title="AI Analysis Service", description="AI-powered repository analysis with memory system", version="1.0.0" ) # CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Global analyzer instance analyzer = None # Optimized Token Bucket Rate Limiter for Claude API class TokenBucketRateLimiter: def __init__(self, capacity: int = 90, refill_rate: float = 1.5): self.capacity = capacity self.tokens = capacity self.refill_rate = refill_rate # tokens per second self.last_update = time.time() self.lock = asyncio.Lock() async def acquire(self, tokens: int = 1): """Acquire tokens from the bucket.""" async with self.lock: now = time.time() # Refill tokens based on time elapsed elapsed = now - self.last_update self.tokens = min(self.capacity, self.tokens + elapsed * self.refill_rate) self.last_update = now if self.tokens >= tokens: self.tokens -= tokens return True else: # Wait for tokens to refill wait_time = (tokens - self.tokens) / self.refill_rate await asyncio.sleep(wait_time) self.tokens = 0 return True # Batch Rate Limiter for parallel processing class BatchRateLimiter: def __init__(self, batch_size: int = 10, requests_per_minute: int = 90): self.batch_size = batch_size self.requests_per_minute = requests_per_minute self.batch_interval = 60 / (requests_per_minute / batch_size) # Time between batches self.last_batch_time = 0 self.lock = asyncio.Lock() async def wait_for_batch(self): """Wait for the next batch slot.""" async with self.lock: now = time.time() time_since_last = now - self.last_batch_time if time_since_last < self.batch_interval: await asyncio.sleep(self.batch_interval - time_since_last) self.last_batch_time = time.time() # Legacy rate limiter for backward compatibility class ClaudeRateLimiter: def __init__(self, requests_per_minute: int = 90): self.token_bucket = TokenBucketRateLimiter(requests_per_minute, requests_per_minute / 60) async def wait_if_needed(self): """Wait if rate limit would be exceeded.""" await self.token_bucket.acquire(1) # Git Integration Service Client class GitIntegrationClient: def __init__(self): self.base_url = os.getenv('GIT_INTEGRATION_SERVICE_URL', 'http://git-integration:8012') self.timeout = 30.0 async def get_repository_info(self, repository_id: str, user_id: str) -> Dict[str, Any]: """Get repository information from git-integration service.""" try: print(f"🔍 [DEBUG] Getting repository info for ID: {repository_id}, User: {user_id}") print(f"🔍 [DEBUG] Git integration URL: {self.base_url}") async with httpx.AsyncClient(timeout=self.timeout) as client: # Get repository info from the diffs endpoint url = f"{self.base_url}/api/diffs/repositories" headers = {'x-user-id': user_id} print(f"🔍 [DEBUG] Making request to: {url}") print(f"🔍 [DEBUG] Headers: {headers}") response = await client.get(url, headers=headers) print(f"🔍 [DEBUG] Response status: {response.status_code}") print(f"🔍 [DEBUG] Response headers: {dict(response.headers)}") if response.status_code == 200: data = response.json() print(f"🔍 [DEBUG] Response data: {data}") if data.get('success') and 'data' in data: repositories = data['data'].get('repositories', []) print(f"🔍 [DEBUG] Found {len(repositories)} repositories") for repo in repositories: print(f"🔍 [DEBUG] Checking repo: {repo.get('id')} vs {repository_id}") if repo.get('id') == repository_id: result = { 'id': repo.get('id'), 'name': repo.get('repository_name'), 'owner': repo.get('owner_name'), 'provider': repo.get('provider_name', 'github'), 'local_path': f"/tmp/attached-repos/{repo.get('owner_name')}__{repo.get('repository_name')}__main", 'repository_url': f"https://github.com/{repo.get('owner_name')}/{repo.get('repository_name')}" } print(f"🔍 [DEBUG] Found repository: {result}") return result print(f"❌ [DEBUG] Repository {repository_id} not found in {len(repositories)} repositories") raise Exception(f"Repository {repository_id} not found") else: print(f"❌ [DEBUG] Invalid response format: {data}") raise Exception(f"Invalid response format: {data}") else: print(f"❌ [DEBUG] HTTP error: {response.status_code} - {response.text}") raise Exception(f"Failed to get repository info: {response.text}") except Exception as e: print(f"❌ [DEBUG] Exception in get_repository_info: {e}") raise Exception(f"Git-integration service communication failed: {e}") # Analysis Cache class AnalysisCache: def __init__(self): try: self.redis = redis.Redis( host=os.getenv('REDIS_HOST', 'redis'), port=int(os.getenv('REDIS_PORT', 6379)), password=os.getenv('REDIS_PASSWORD', ''), decode_responses=True ) self.cache_ttl = 86400 # 24 hours except Exception as e: print(f"Warning: Redis connection failed: {e}") self.redis = None async def get_cached_analysis(self, file_hash: str) -> Optional[Dict[str, Any]]: """Get cached analysis result.""" if not self.redis: return None try: cache_key = f"analysis:{file_hash}" cached_data = self.redis.get(cache_key) return json.loads(cached_data) if cached_data else None except Exception: return None async def cache_analysis(self, file_hash: str, result: Dict[str, Any]): """Cache analysis result.""" if not self.redis: return try: cache_key = f"analysis:{file_hash}" self.redis.setex(cache_key, self.cache_ttl, json.dumps(result)) except Exception as e: print(f"Warning: Failed to cache analysis: {e}") # Optimized Content Optimizer class ContentOptimizer: @staticmethod def optimize_content_for_claude(content: str, max_tokens: int = 4000) -> str: """Optimize file content for Claude API limits with intelligent truncation.""" if content is None: return "" # Rough token estimation (4 chars per token) if len(content) <= max_tokens * 4: return content lines = content.split('\n') important_lines = [] # Keep important lines (imports, functions, classes, comments) for line in lines: stripped = line.strip() if (stripped.startswith(('import ', 'from ', 'def ', 'class ', 'export ', 'const ', 'let ', 'var ')) or stripped.startswith(('function ', 'interface ', 'type ', 'enum ')) or stripped.startswith(('//', '#', '/*', '*', '