#!/usr/bin/env python3 """ AI Analysis Service HTTP Server Provides REST API endpoints for repository analysis. """ import os import asyncio import json import tempfile import shutil import time import hashlib import traceback import uuid from pathlib import Path from typing import Dict, Any, Optional, List, Tuple from datetime import datetime from contextlib import asynccontextmanager from fastapi import FastAPI, HTTPException, BackgroundTasks, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse, JSONResponse, StreamingResponse from pydantic import BaseModel import uvicorn import mimetypes import httpx import redis # PostgreSQL cursor for querying try: from psycopg2.extras import RealDictCursor except ImportError: # Fallback if psycopg2 not available RealDictCursor = None # Import the AI analysis components # Note: ai-analyze.py has a hyphen, so we need to handle the import specially import sys import importlib.util # Load the ai-analyze.py module spec = importlib.util.spec_from_file_location("ai_analyze", "ai-analyze.py") ai_analyze_module = importlib.util.module_from_spec(spec) sys.modules["ai_analyze"] = ai_analyze_module spec.loader.exec_module(ai_analyze_module) # Now import the classes from ai_analyze import ( EnhancedGitHubAnalyzer, get_memory_config, ArchitectureAnalysis, SecurityAnalysis, CodeQualityAnalysis, PerformanceAnalysis, Issue, ModuleAnalysis, ModuleSummary ) # Import enhanced analyzer (backward compatible) try: from enhanced_analyzer import EnhancedGitHubAnalyzerV2, create_enhanced_analyzer ENHANCED_ANALYZER_AVAILABLE = True except ImportError as e: print(f"Enhanced analyzer not available: {e}") ENHANCED_ANALYZER_AVAILABLE = False # Import progress manager from progress_manager import AnalysisProgressManager, progress_tracker # Global analyzer instance analyzer = None @asynccontextmanager async def lifespan(app: FastAPI): """Lifespan context manager for startup and shutdown events.""" # Startup global analyzer try: # Load environment variables from dotenv import load_dotenv load_dotenv() # Get API key api_key = os.getenv('ANTHROPIC_API_KEY') if not api_key: raise Exception("ANTHROPIC_API_KEY not found in environment") # Initialize analyzer with enhanced capabilities if available config = get_memory_config() # Add OPTIMIZED performance settings to config config.update({ 'max_workers': 10, # Optimized parallel processing workers (REDUCED for better throughput) 'batch_size': 10, # REDUCED batch size for faster first results (was 20) 'cache_ttl': 3600, # Cache TTL (1 hour) 'max_file_size': 0, # No file size limit (0 = unlimited) 'analysis_timeout': 1800, # 30 minute timeout for large repositories 'fast_mode': False, # Disable fast mode to use full AI analysis 'parallel_processing': True, # Enable parallel processing 'rate_limit_batch_size': 10, # REDUCED batch size for rate limiting (was 20) 'redis_host': 'pipeline_redis', # Use Docker service name for Redis 'redis_port': 6379, # Use standard Redis port 'redis_password': 'redis_secure_2024', 'mongodb_url': 'mongodb://pipeline_admin:mongo_secure_2024@pipeline_mongodb:27017/', 'postgres_host': 'pipeline_postgres', 'postgres_password': 'secure_pipeline_2024' }) if ENHANCED_ANALYZER_AVAILABLE: print("✅ Using Enhanced Analyzer with intelligent chunking and parallel processing") analyzer = create_enhanced_analyzer(api_key, config) else: print("✅ Using Standard Analyzer with performance optimizations") analyzer = EnhancedGitHubAnalyzer(api_key, config) print("✅ AI Analysis Service initialized successfully") except Exception as e: print(f"❌ Failed to initialize AI Analysis Service: {e}") raise yield # Shutdown (if needed) # Cleanup code can go here if needed app = FastAPI( title="AI Analysis Service", description="AI-powered repository analysis with memory system", version="1.0.0", lifespan=lifespan ) # CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Optimized Token Bucket Rate Limiter for Claude API # Updated to match actual billing plan: 2K requests/min, token limits vary by model class TokenBucketRateLimiter: def __init__(self, capacity: int = 2000, refill_rate: float = 33.33): """ Token bucket for request rate limiting. Default: 2000 requests/minute (2K per billing plan) Refill rate: 2000 / 60 = 33.33 requests per second """ self.capacity = capacity self.tokens = capacity self.refill_rate = refill_rate # tokens per second (2000 requests / 60 seconds = 33.33) self.last_update = time.time() self.lock = asyncio.Lock() async def acquire(self, tokens: int = 1): """Acquire tokens from the bucket.""" async with self.lock: now = time.time() # Refill tokens based on time elapsed elapsed = now - self.last_update self.tokens = min(self.capacity, self.tokens + elapsed * self.refill_rate) self.last_update = now if self.tokens >= tokens: self.tokens -= tokens return True else: # Wait for tokens to refill wait_time = (tokens - self.tokens) / self.refill_rate await asyncio.sleep(wait_time) self.tokens = 0 return True # Token-based rate limiter (NEW - for input/output token limits) class TokenUsageRateLimiter: """ Rate limiter for token usage per minute based on billing plan. Tracks both input and output tokens separately. """ # Billing plan limits (from your API plan) PLAN_LIMITS = { "claude-3-5-haiku-latest": { "requests_per_minute": 2000, "input_tokens_per_minute": 200_000, # Claude Haiku 3.5: 200K input/min "output_tokens_per_minute": 40_000, # Claude Haiku 3.5: 40K output/min }, "claude-3-5-sonnet-20241022": { "requests_per_minute": 2000, "input_tokens_per_minute": 800_000, # Claude Sonnet 4.x: 800K input/min "output_tokens_per_minute": 160_000, # Claude Sonnet 4.x: 160K output/min }, "claude-3-opus-20240229": { "requests_per_minute": 2000, "input_tokens_per_minute": 800_000, # Claude Opus 4.x: 800K input/min "output_tokens_per_minute": 160_000, # Claude Opus 4.x: 160K output/min }, "claude-3-5-haiku-20241022": { "requests_per_minute": 2000, "input_tokens_per_minute": 1_000_000, # Claude Haiku 4.x: 1M input/min "output_tokens_per_minute": 200_000, # Claude Haiku 4.x: 200K output/min }, } def __init__(self, model: str = "claude-3-5-haiku-latest"): self.model = model limits = self.PLAN_LIMITS.get(model, self.PLAN_LIMITS["claude-3-5-haiku-latest"]) self.input_tokens_per_minute = limits["input_tokens_per_minute"] self.output_tokens_per_minute = limits["output_tokens_per_minute"] # Token usage tracking (sliding window) self.input_token_usage = [] # List of (timestamp, tokens) tuples self.output_token_usage = [] # List of (timestamp, tokens) tuples self.lock = asyncio.Lock() print(f"📊 [RATE LIMITER] Initialized for model: {model}") print(f" • Input tokens/min: {self.input_tokens_per_minute:,}") print(f" • Output tokens/min: {self.output_tokens_per_minute:,}") def _cleanup_old_usage(self, usage_list: List[Tuple[float, int]], window_seconds: int = 60): """Remove usage records older than window_seconds.""" now = time.time() cutoff = now - window_seconds return [(ts, tokens) for ts, tokens in usage_list if ts > cutoff] async def check_token_limits(self, input_tokens: int, output_tokens: int) -> Tuple[bool, float]: """ Check if token usage would exceed limits. Returns: (can_proceed, wait_time_seconds) """ async with self.lock: now = time.time() # Clean up old usage records (sliding 60-second window) self.input_token_usage = self._cleanup_old_usage(self.input_token_usage, 60) self.output_token_usage = self._cleanup_old_usage(self.output_token_usage, 60) # Calculate current usage in the last minute current_input_usage = sum(tokens for _, tokens in self.input_token_usage) current_output_usage = sum(tokens for _, tokens in self.output_token_usage) # Check if adding these tokens would exceed limits new_input_usage = current_input_usage + input_tokens new_output_usage = current_output_usage + output_tokens input_exceeded = new_input_usage > self.input_tokens_per_minute output_exceeded = new_output_usage > self.output_tokens_per_minute if input_exceeded or output_exceeded: # Calculate wait time (wait until oldest usage expires) if self.input_token_usage: oldest_input_time = min(ts for ts, _ in self.input_token_usage) wait_time = max(0, 60 - (now - oldest_input_time)) elif self.output_token_usage: oldest_output_time = min(ts for ts, _ in self.output_token_usage) wait_time = max(0, 60 - (now - oldest_output_time)) else: wait_time = 0 if input_exceeded: print(f"⚠️ [TOKEN LIMIT] Input tokens would exceed limit!") print(f" Current: {current_input_usage:,} + {input_tokens:,} = {new_input_usage:,}") print(f" Limit: {self.input_tokens_per_minute:,} input tokens/min") print(f" Wait time: {wait_time:.2f} seconds") if output_exceeded: print(f"⚠️ [TOKEN LIMIT] Output tokens would exceed limit!") print(f" Current: {current_output_usage:,} + {output_tokens:,} = {new_output_usage:,}") print(f" Limit: {self.output_tokens_per_minute:,} output tokens/min") print(f" Wait time: {wait_time:.2f} seconds") return False, wait_time # Don't record usage here - that's done by record_token_usage() after API call # This method only checks if we can proceed # Log usage if approaching limits (80% threshold) input_usage_pct = (current_input_usage / self.input_tokens_per_minute) * 100 output_usage_pct = (current_output_usage / self.output_tokens_per_minute) * 100 if input_usage_pct > 80 or output_usage_pct > 80: print(f"⚠️ [TOKEN USAGE] Approaching limits:") print(f" Input: {current_input_usage:,}/{self.input_tokens_per_minute:,} ({input_usage_pct:.1f}%)") print(f" Output: {current_output_usage:,}/{self.output_tokens_per_minute:,} ({output_usage_pct:.1f}%)") return True, 0.0 async def record_token_usage(self, input_tokens: int, output_tokens: int): """ Record token usage without checking limits (for actual usage after API call). This is used to update the limiter with actual usage after an API call completes. """ async with self.lock: now = time.time() # Record usage self.input_token_usage.append((now, input_tokens)) self.output_token_usage.append((now, output_tokens)) def get_current_usage(self) -> Dict[str, Any]: """Get current token usage statistics.""" async def _get_usage(): async with self.lock: now = time.time() self.input_token_usage = self._cleanup_old_usage(self.input_token_usage, 60) self.output_token_usage = self._cleanup_old_usage(self.output_token_usage, 60) current_input = sum(tokens for _, tokens in self.input_token_usage) current_output = sum(tokens for _, tokens in self.output_token_usage) return { "model": self.model, "input_tokens_used": current_input, "input_tokens_limit": self.input_tokens_per_minute, "input_tokens_remaining": max(0, self.input_tokens_per_minute - current_input), "input_usage_percent": (current_input / self.input_tokens_per_minute) * 100, "output_tokens_used": current_output, "output_tokens_limit": self.output_tokens_per_minute, "output_tokens_remaining": max(0, self.output_tokens_per_minute - current_output), "output_usage_percent": (current_output / self.output_tokens_per_minute) * 100, } # This is a sync method, but we need async - return a coroutine import asyncio try: loop = asyncio.get_event_loop() if loop.is_running(): # If loop is running, we can't use run_until_complete # Return a dict that will be updated return { "model": self.model, "input_tokens_limit": self.input_tokens_per_minute, "output_tokens_limit": self.output_tokens_per_minute, "note": "Usage stats require async context" } else: return loop.run_until_complete(_get_usage()) except: return { "model": self.model, "input_tokens_limit": self.input_tokens_per_minute, "output_tokens_limit": self.output_tokens_per_minute, } # Batch Rate Limiter for parallel processing # Updated to match billing plan: 2K requests/minute class BatchRateLimiter: def __init__(self, batch_size: int = 10, requests_per_minute: int = 2000): """ Batch rate limiter for parallel chunk processing. Default: 2000 requests/minute (2K per billing plan) """ self.batch_size = batch_size self.requests_per_minute = requests_per_minute # Fixed calculation: batches_per_minute = requests_per_minute / files_per_batch # For smart batching: 5 files per batch, so batches_per_minute = requests_per_minute / 5 # batch_interval = 60 / batches_per_minute self.files_per_batch = 5 # Smart batching uses 5 files per batch self.batches_per_minute = requests_per_minute / self.files_per_batch self.batch_interval = 60 / self.batches_per_minute # Time between batches self.last_batch_time = 0 self.lock = asyncio.Lock() async def wait_for_batch(self): """Wait for the next batch slot (only if needed).""" async with self.lock: now = time.time() time_since_last = now - self.last_batch_time # Only wait if we're sending batches too fast if time_since_last < self.batch_interval: wait_time = self.batch_interval - time_since_last if wait_time > 0.01: # Only wait if more than 10ms await asyncio.sleep(wait_time) self.last_batch_time = time.time() # Legacy rate limiter for backward compatibility # Updated to match billing plan: 2K requests/minute class ClaudeRateLimiter: def __init__(self, requests_per_minute: int = 2000): """ Rate limiter for Claude API requests. Default: 2000 requests/minute (2K per billing plan) """ self.token_bucket = TokenBucketRateLimiter(requests_per_minute, requests_per_minute / 60) async def wait_if_needed(self): """Wait if rate limit would be exceeded.""" await self.token_bucket.acquire(1) # Git Integration Service Client class GitIntegrationClient: def __init__(self): self.base_url = os.getenv('GIT_INTEGRATION_SERVICE_URL', 'http://git-integration:8012') self.timeout = 30.0 async def get_repository_info(self, repository_id: str, user_id: str) -> Dict[str, Any]: """Get repository information from git-integration service.""" try: print(f"🔍 [DEBUG] Getting repository info for ID: {repository_id}, User: {user_id}") print(f"🔍 [DEBUG] Git integration URL: {self.base_url}") async with httpx.AsyncClient(timeout=self.timeout) as client: # Get repository info from the diffs endpoint url = f"{self.base_url}/api/diffs/repositories" headers = {'x-user-id': user_id} print(f"🔍 [DEBUG] Making request to: {url}") print(f"🔍 [DEBUG] Headers: {headers}") response = await client.get(url, headers=headers) print(f"🔍 [DEBUG] Response status: {response.status_code}") print(f"🔍 [DEBUG] Response headers: {dict(response.headers)}") if response.status_code == 200: data = response.json() print(f"🔍 [DEBUG] Response data: {data}") if data.get('success') and 'data' in data: repositories = data['data'].get('repositories', []) print(f"🔍 [DEBUG] Found {len(repositories)} repositories") for repo in repositories: print(f"🔍 [DEBUG] Checking repo: {repo.get('id')} vs {repository_id}") if repo.get('id') == repository_id: result = { 'id': repo.get('id'), 'name': repo.get('repository_name'), 'owner': repo.get('owner_name'), 'provider': repo.get('provider_name', 'github'), 'local_path': f"/tmp/attached-repos/{repo.get('owner_name')}__{repo.get('repository_name')}__main", 'repository_url': f"https://github.com/{repo.get('owner_name')}/{repo.get('repository_name')}" } print(f"🔍 [DEBUG] Found repository: {result}") return result print(f"❌ [DEBUG] Repository {repository_id} not found in {len(repositories)} repositories") raise Exception(f"Repository {repository_id} not found") else: print(f"❌ [DEBUG] Invalid response format: {data}") raise Exception(f"Invalid response format: {data}") else: print(f"❌ [DEBUG] HTTP error: {response.status_code} - {response.text}") raise Exception(f"Failed to get repository info: {response.text}") except Exception as e: print(f"❌ [DEBUG] Exception in get_repository_info: {e}") raise Exception(f"Git-integration service communication failed: {e}") # Analysis Cache class AnalysisCache: def __init__(self): try: self.redis = redis.Redis( host=os.getenv('REDIS_HOST', 'redis'), port=int(os.getenv('REDIS_PORT', 6379)), password=os.getenv('REDIS_PASSWORD', ''), decode_responses=True ) self.cache_ttl = 86400 # 24 hours except Exception as e: print(f"Warning: Redis connection failed: {e}") self.redis = None async def get_cached_analysis(self, file_hash: str) -> Optional[Dict[str, Any]]: """Get cached analysis result.""" if not self.redis: return None try: cache_key = f"analysis:{file_hash}" cached_data = self.redis.get(cache_key) return json.loads(cached_data) if cached_data else None except Exception: return None async def cache_analysis(self, file_hash: str, result: Dict[str, Any]): """Cache analysis result.""" if not self.redis: return try: cache_key = f"analysis:{file_hash}" self.redis.setex(cache_key, self.cache_ttl, json.dumps(result)) except Exception as e: print(f"Warning: Failed to cache analysis: {e}") # Optimized Content Optimizer class ContentOptimizer: @staticmethod def optimize_content_for_claude(content: str, max_tokens: int = 4000) -> str: """Optimize file content for Claude API limits with intelligent truncation.""" if content is None: return "" # Rough token estimation (4 chars per token) if len(content) <= max_tokens * 4: return content lines = content.split('\n') important_lines = [] # Keep important lines (imports, functions, classes, comments) for line in lines: stripped = line.strip() if (stripped.startswith(('import ', 'from ', 'def ', 'class ', 'export ', 'const ', 'let ', 'var ')) or stripped.startswith(('function ', 'interface ', 'type ', 'enum ')) or stripped.startswith(('//', '#', '/*', '*', '