1243 lines
57 KiB
Python
1243 lines
57 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
AI Analysis Service HTTP Server
|
|
Provides REST API endpoints for repository analysis.
|
|
"""
|
|
|
|
import os
|
|
import asyncio
|
|
import json
|
|
import tempfile
|
|
import shutil
|
|
import time
|
|
import hashlib
|
|
import traceback
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional, List
|
|
from datetime import datetime
|
|
|
|
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.responses import FileResponse, JSONResponse
|
|
from pydantic import BaseModel
|
|
import uvicorn
|
|
import httpx
|
|
import redis
|
|
|
|
# Import the AI analysis components
|
|
# Note: ai-analyze.py has a hyphen, so we need to handle the import specially
|
|
import sys
|
|
import importlib.util
|
|
|
|
# Load the ai-analyze.py module
|
|
spec = importlib.util.spec_from_file_location("ai_analyze", "ai-analyze.py")
|
|
ai_analyze_module = importlib.util.module_from_spec(spec)
|
|
sys.modules["ai_analyze"] = ai_analyze_module
|
|
spec.loader.exec_module(ai_analyze_module)
|
|
|
|
# Now import the classes
|
|
from ai_analyze import EnhancedGitHubAnalyzer, get_memory_config
|
|
|
|
# Import enhanced analyzer (backward compatible)
|
|
try:
|
|
from enhanced_analyzer import EnhancedGitHubAnalyzerV2, create_enhanced_analyzer
|
|
ENHANCED_ANALYZER_AVAILABLE = True
|
|
except ImportError as e:
|
|
print(f"Enhanced analyzer not available: {e}")
|
|
ENHANCED_ANALYZER_AVAILABLE = False
|
|
|
|
app = FastAPI(
|
|
title="AI Analysis Service",
|
|
description="AI-powered repository analysis with memory system",
|
|
version="1.0.0"
|
|
)
|
|
|
|
# CORS middleware
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
# Global analyzer instance
|
|
analyzer = None
|
|
|
|
# Rate limiter for Claude API
|
|
class ClaudeRateLimiter:
|
|
def __init__(self, requests_per_minute: int = 90):
|
|
self.requests_per_minute = requests_per_minute
|
|
self.requests = []
|
|
self.lock = asyncio.Lock()
|
|
|
|
async def wait_if_needed(self):
|
|
"""Wait if rate limit would be exceeded."""
|
|
async with self.lock:
|
|
now = time.time()
|
|
# Remove requests older than 1 minute
|
|
self.requests = [req_time for req_time in self.requests if now - req_time < 60]
|
|
|
|
if len(self.requests) >= self.requests_per_minute:
|
|
sleep_time = 60 - (now - self.requests[0])
|
|
if sleep_time > 0:
|
|
await asyncio.sleep(sleep_time)
|
|
|
|
self.requests.append(now)
|
|
|
|
# Git Integration Service Client
|
|
class GitIntegrationClient:
|
|
def __init__(self):
|
|
self.base_url = os.getenv('GIT_INTEGRATION_SERVICE_URL', 'http://git-integration:8012')
|
|
self.timeout = 30.0
|
|
|
|
async def get_repository_info(self, repository_id: str, user_id: str) -> Dict[str, Any]:
|
|
"""Get repository information from git-integration service."""
|
|
try:
|
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
# Get repository info from the diffs endpoint
|
|
response = await client.get(
|
|
f"{self.base_url}/api/diffs/repositories",
|
|
headers={'x-user-id': user_id}
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
if data.get('success') and 'data' in data:
|
|
repositories = data['data'].get('repositories', [])
|
|
for repo in repositories:
|
|
if repo.get('id') == repository_id:
|
|
return {
|
|
'id': repo.get('id'),
|
|
'name': repo.get('repository_name'),
|
|
'owner': repo.get('owner_name'),
|
|
'provider': repo.get('provider_name', 'github'),
|
|
'local_path': f"/tmp/attached-repos/{repo.get('owner_name')}__{repo.get('repository_name')}__main",
|
|
'repository_url': f"https://github.com/{repo.get('owner_name')}/{repo.get('repository_name')}"
|
|
}
|
|
raise Exception(f"Repository {repository_id} not found")
|
|
else:
|
|
raise Exception(f"Invalid response format: {data}")
|
|
else:
|
|
raise Exception(f"Failed to get repository info: {response.text}")
|
|
|
|
except Exception as e:
|
|
raise Exception(f"Git-integration service communication failed: {e}")
|
|
|
|
# Analysis Cache
|
|
class AnalysisCache:
|
|
def __init__(self):
|
|
try:
|
|
self.redis = redis.Redis(
|
|
host=os.getenv('REDIS_HOST', 'redis'),
|
|
port=int(os.getenv('REDIS_PORT', 6379)),
|
|
password=os.getenv('REDIS_PASSWORD', ''),
|
|
decode_responses=True
|
|
)
|
|
self.cache_ttl = 86400 # 24 hours
|
|
except Exception as e:
|
|
print(f"Warning: Redis connection failed: {e}")
|
|
self.redis = None
|
|
|
|
async def get_cached_analysis(self, file_hash: str) -> Optional[Dict[str, Any]]:
|
|
"""Get cached analysis result."""
|
|
if not self.redis:
|
|
return None
|
|
|
|
try:
|
|
cache_key = f"analysis:{file_hash}"
|
|
cached_data = self.redis.get(cache_key)
|
|
return json.loads(cached_data) if cached_data else None
|
|
except Exception:
|
|
return None
|
|
|
|
async def cache_analysis(self, file_hash: str, result: Dict[str, Any]):
|
|
"""Cache analysis result."""
|
|
if not self.redis:
|
|
return
|
|
|
|
try:
|
|
cache_key = f"analysis:{file_hash}"
|
|
self.redis.setex(cache_key, self.cache_ttl, json.dumps(result))
|
|
except Exception as e:
|
|
print(f"Warning: Failed to cache analysis: {e}")
|
|
|
|
# Content Optimizer
|
|
class ContentOptimizer:
|
|
@staticmethod
|
|
def optimize_content_for_claude(content: str, max_tokens: int = 8000) -> str:
|
|
"""Optimize file content for Claude API limits."""
|
|
if content is None:
|
|
return ""
|
|
if len(content) > max_tokens * 4: # Rough token estimation
|
|
# Extract important lines
|
|
lines = content.split('\n')
|
|
important_lines = []
|
|
|
|
for line in lines:
|
|
# Keep imports, function definitions, class definitions
|
|
if (line.strip().startswith(('import ', 'from ', 'def ', 'class ', 'export ', 'const ', 'let ', 'var ')) or
|
|
line.strip().startswith(('function ', 'class ', 'interface ', 'type '))):
|
|
important_lines.append(line)
|
|
|
|
# Limit to 200 lines
|
|
important_lines = important_lines[:200]
|
|
optimized_content = '\n'.join(important_lines)
|
|
optimized_content += f"\n\n... [Content truncated for analysis - {len(content)} chars total]"
|
|
return optimized_content
|
|
|
|
return content
|
|
|
|
# Sanitizers to ensure JSON-serializable, primitive types
|
|
def sanitize_analysis_result(analysis):
|
|
"""Ensure analysis object only contains JSON-serializable types."""
|
|
try:
|
|
print(f"🔍 Sanitizing analysis object...")
|
|
|
|
# Sanitize repo_path
|
|
try:
|
|
if hasattr(analysis, 'repo_path'):
|
|
analysis.repo_path = str(analysis.repo_path) if analysis.repo_path else ""
|
|
except Exception as e:
|
|
print(f"⚠️ Error sanitizing repo_path: {e}")
|
|
analysis.repo_path = ""
|
|
|
|
# Sanitize file_analyses list
|
|
try:
|
|
if hasattr(analysis, 'file_analyses') and analysis.file_analyses:
|
|
print(f"🔍 Sanitizing {len(analysis.file_analyses)} file analyses...")
|
|
for idx, fa in enumerate(analysis.file_analyses):
|
|
try:
|
|
# Path to string
|
|
if hasattr(fa, 'path'):
|
|
fa.path = str(fa.path)
|
|
|
|
# issues_found to list of strings
|
|
if hasattr(fa, 'issues_found'):
|
|
issues = fa.issues_found
|
|
if isinstance(issues, str):
|
|
fa.issues_found = [issues]
|
|
elif isinstance(issues, (list, tuple)):
|
|
fa.issues_found = [str(x) for x in issues]
|
|
else:
|
|
fa.issues_found = []
|
|
else:
|
|
fa.issues_found = []
|
|
|
|
# recommendations to list of strings
|
|
if hasattr(fa, 'recommendations'):
|
|
recs = fa.recommendations
|
|
if isinstance(recs, str):
|
|
fa.recommendations = [recs]
|
|
elif isinstance(recs, (list, tuple)):
|
|
fa.recommendations = [str(x) for x in recs]
|
|
else:
|
|
fa.recommendations = []
|
|
else:
|
|
fa.recommendations = []
|
|
|
|
except Exception as fa_err:
|
|
print(f"⚠️ Error sanitizing file[{idx}]: {fa_err}")
|
|
# Ensure fields exist even if there's an error
|
|
if not hasattr(fa, 'path'):
|
|
fa.path = ""
|
|
if not hasattr(fa, 'issues_found'):
|
|
fa.issues_found = []
|
|
if not hasattr(fa, 'recommendations'):
|
|
fa.recommendations = []
|
|
except Exception as files_err:
|
|
print(f"⚠️ Error iterating file_analyses: {files_err}")
|
|
|
|
print(f"✅ Analysis object sanitized successfully")
|
|
return analysis
|
|
except Exception as e:
|
|
print(f"❌ Critical sanitization error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return analysis
|
|
|
|
# Global instances
|
|
rate_limiter = ClaudeRateLimiter()
|
|
git_client = GitIntegrationClient()
|
|
analysis_cache = AnalysisCache()
|
|
content_optimizer = ContentOptimizer()
|
|
|
|
class AnalysisRequest(BaseModel):
|
|
repo_path: str
|
|
output_format: str = "pdf" # pdf, json
|
|
max_files: int = 50
|
|
|
|
class RepositoryAnalysisRequest(BaseModel):
|
|
repository_id: str
|
|
user_id: str
|
|
output_format: str = "pdf" # pdf, json
|
|
max_files: int = 0 # 0 = unlimited files
|
|
analysis_type: str = "full" # fast, basic, full
|
|
|
|
class AnalysisResponse(BaseModel):
|
|
success: bool
|
|
message: str
|
|
analysis_id: Optional[str] = None
|
|
report_path: Optional[str] = None
|
|
stats: Optional[Dict[str, Any]] = None
|
|
|
|
@app.on_event("startup")
|
|
async def startup_event():
|
|
"""Initialize the analyzer on startup."""
|
|
global analyzer
|
|
try:
|
|
# Load environment variables
|
|
from dotenv import load_dotenv
|
|
load_dotenv()
|
|
|
|
# Get API key
|
|
api_key = os.getenv('ANTHROPIC_API_KEY')
|
|
if not api_key:
|
|
raise Exception("ANTHROPIC_API_KEY not found in environment")
|
|
|
|
# Initialize analyzer with enhanced capabilities if available
|
|
config = get_memory_config()
|
|
|
|
# Add performance optimization settings to config
|
|
config.update({
|
|
'max_workers': 50, # Increased parallel processing workers
|
|
'batch_size': 200, # Increased batch processing size
|
|
'cache_ttl': 3600, # Cache TTL (1 hour)
|
|
'max_file_size': 0, # No file size limit (0 = unlimited)
|
|
'analysis_timeout': 1800, # 30 minute timeout for large repositories
|
|
'fast_mode': False, # Disable fast mode to use full AI analysis
|
|
'redis_host': 'pipeline_redis', # Use Docker service name for Redis
|
|
'redis_port': 6379, # Use standard Redis port
|
|
'redis_password': 'redis_secure_2024',
|
|
'mongodb_url': 'mongodb://pipeline_admin:mongo_secure_2024@pipeline_mongodb:27017/',
|
|
'postgres_host': 'pipeline_postgres',
|
|
'postgres_password': 'secure_pipeline_2024'
|
|
})
|
|
|
|
if ENHANCED_ANALYZER_AVAILABLE:
|
|
print("✅ Using Enhanced Analyzer with intelligent chunking and parallel processing")
|
|
analyzer = create_enhanced_analyzer(api_key, config)
|
|
else:
|
|
print("✅ Using Standard Analyzer with performance optimizations")
|
|
analyzer = EnhancedGitHubAnalyzer(api_key, config)
|
|
|
|
print("✅ AI Analysis Service initialized successfully")
|
|
except Exception as e:
|
|
print(f"❌ Failed to initialize AI Analysis Service: {e}")
|
|
raise
|
|
|
|
@app.get("/health")
|
|
async def health_check():
|
|
"""Health check endpoint."""
|
|
return {
|
|
"status": "healthy",
|
|
"service": "ai-analysis-service",
|
|
"timestamp": datetime.now().isoformat(),
|
|
"version": "1.0.0"
|
|
}
|
|
|
|
@app.post("/analyze")
|
|
async def analyze_repository(request: AnalysisRequest, background_tasks: BackgroundTasks):
|
|
"""Analyze a repository using direct file path."""
|
|
try:
|
|
if not analyzer:
|
|
raise HTTPException(status_code=500, detail="Analyzer not initialized")
|
|
|
|
# Generate unique analysis ID
|
|
analysis_id = f"analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
|
|
# Create temporary directory for this analysis
|
|
temp_dir = tempfile.mkdtemp(prefix=f"ai_analysis_{analysis_id}_")
|
|
|
|
try:
|
|
# Run analysis
|
|
analysis = await analyzer.analyze_repository_with_memory(
|
|
request.repo_path
|
|
)
|
|
# Ensure fields are JSON-safe and types are normalized
|
|
analysis = sanitize_analysis_result(analysis)
|
|
|
|
# DEBUG: Log field types
|
|
print(f"DEBUG: repo_path type: {type(analysis.repo_path)}")
|
|
if analysis.file_analyses:
|
|
for i, fa in enumerate(analysis.file_analyses[:3]): # Check first 3
|
|
print(f"DEBUG FA[{i}]: path type={type(fa.path)}, issues_found type={type(fa.issues_found)}, recommendations type={type(fa.recommendations)}")
|
|
if fa.issues_found:
|
|
print(f" issues_found[0] type: {type(fa.issues_found[0])}")
|
|
if fa.recommendations:
|
|
print(f" recommendations[0] type: {type(fa.recommendations[0])}")
|
|
|
|
# Generate report
|
|
if request.output_format == "pdf":
|
|
report_path = f"reports/{analysis_id}_analysis.pdf"
|
|
try:
|
|
analyzer.create_pdf_report(analysis, report_path)
|
|
except Exception as pdf_err:
|
|
print(f"⚠️ PDF generation failed: {pdf_err}, falling back to JSON")
|
|
report_path = f"reports/{analysis_id}_analysis.json"
|
|
with open(report_path, 'w') as f:
|
|
json.dump({
|
|
"repo_path": str(analysis.repo_path),
|
|
"total_files": analysis.total_files,
|
|
"total_lines": analysis.total_lines,
|
|
"languages": analysis.languages,
|
|
"code_quality_score": analysis.code_quality_score,
|
|
"architecture_assessment": analysis.architecture_assessment,
|
|
"security_assessment": analysis.security_assessment,
|
|
"executive_summary": analysis.executive_summary,
|
|
"file_analyses": [
|
|
{
|
|
"path": str(fa.path),
|
|
"language": fa.language,
|
|
"lines_of_code": fa.lines_of_code,
|
|
"severity_score": fa.severity_score,
|
|
"issues_found": [str(issue) for issue in fa.issues_found] if isinstance(fa.issues_found, (list, tuple)) else [],
|
|
"recommendations": [str(rec) for rec in fa.recommendations] if isinstance(fa.recommendations, (list, tuple)) else []
|
|
} for fa in analysis.file_analyses
|
|
]
|
|
}, f, indent=2)
|
|
|
|
# Calculate stats - ensure all fields are properly typed
|
|
stats = {
|
|
"total_files": analysis.total_files,
|
|
"total_lines": analysis.total_lines,
|
|
"languages": analysis.languages,
|
|
"code_quality_score": analysis.code_quality_score,
|
|
"high_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score >= 8]),
|
|
"medium_quality_files": len([fa for fa in analysis.file_analyses if 5 <= fa.severity_score < 8]),
|
|
"low_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score < 5]),
|
|
"total_issues": sum(len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0 for fa in analysis.file_analyses)
|
|
}
|
|
|
|
# Pre-sanitize all file analyses before stats calculation
|
|
if hasattr(analysis, 'file_analyses'):
|
|
for fa in analysis.file_analyses:
|
|
# Force issues_found to be a list
|
|
if not isinstance(fa.issues_found, list):
|
|
if isinstance(fa.issues_found, tuple):
|
|
fa.issues_found = list(fa.issues_found)
|
|
else:
|
|
fa.issues_found = []
|
|
# Force recommendations to be a list
|
|
if not isinstance(fa.recommendations, list):
|
|
if isinstance(fa.recommendations, tuple):
|
|
fa.recommendations = list(fa.recommendations)
|
|
else:
|
|
fa.recommendations = []
|
|
|
|
# Now calculate stats safely
|
|
stats = {
|
|
"total_files": analysis.total_files,
|
|
"total_lines": analysis.total_lines,
|
|
"languages": analysis.languages,
|
|
"code_quality_score": analysis.code_quality_score,
|
|
"high_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score >= 8]),
|
|
"medium_quality_files": len([fa for fa in analysis.file_analyses if 5 <= fa.severity_score < 8]),
|
|
"low_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score < 5]),
|
|
"total_issues": sum(len(fa.issues_found) for fa in analysis.file_analyses)
|
|
}
|
|
|
|
# Use dictionary instead of Pydantic model to avoid serialization issues
|
|
return {
|
|
"success": True,
|
|
"message": "Analysis completed successfully",
|
|
"analysis_id": analysis_id,
|
|
"report_path": report_path,
|
|
"stats": stats
|
|
}
|
|
|
|
finally:
|
|
# Cleanup temporary directory
|
|
if os.path.exists(temp_dir):
|
|
shutil.rmtree(temp_dir)
|
|
|
|
except Exception as e:
|
|
return AnalysisResponse(
|
|
success=False,
|
|
message=f"Analysis failed: {str(e)}",
|
|
analysis_id=None,
|
|
report_path=None,
|
|
stats=None
|
|
)
|
|
|
|
@app.post("/analyze-repository")
|
|
async def analyze_repository_by_id(request: RepositoryAnalysisRequest, background_tasks: BackgroundTasks):
|
|
"""Analyze a repository by ID using git-integration service."""
|
|
global os, shutil, tempfile, json
|
|
# Ensure we're using the module-level imports, not shadowed local variables
|
|
try:
|
|
print(f"🔍 [DEBUG] Analysis request received: {request}")
|
|
if not analyzer:
|
|
raise HTTPException(status_code=500, detail="Analyzer not initialized")
|
|
|
|
# Get repository information from git-integration service
|
|
try:
|
|
repo_info = await git_client.get_repository_info(request.repository_id, request.user_id)
|
|
local_path = repo_info.get('local_path') # Keep for compatibility but don't check file system
|
|
|
|
# Note: We no longer check local_path existence since we use API approach
|
|
except Exception as e:
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to get repository info: {str(e)}"
|
|
)
|
|
|
|
# Generate unique analysis ID
|
|
analysis_id = f"repo_analysis_{request.repository_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
|
|
# Create temporary directory for this analysis
|
|
temp_dir = tempfile.mkdtemp(prefix=f"ai_analysis_{analysis_id}_")
|
|
|
|
try:
|
|
# Check if fast mode is enabled
|
|
if request.analysis_type == "fast" or request.analysis_type == "basic":
|
|
# Run fast analysis with timeout
|
|
analysis = await analyze_repository_fast(
|
|
local_path,
|
|
request.repository_id,
|
|
request.user_id,
|
|
request.max_files
|
|
)
|
|
else:
|
|
# Run full analysis with rate limiting and caching
|
|
analysis = await analyze_repository_with_optimizations(
|
|
local_path,
|
|
request.repository_id,
|
|
request.user_id,
|
|
request.max_files
|
|
)
|
|
|
|
# Normalize types before serialization/PDF
|
|
analysis = sanitize_analysis_result(analysis)
|
|
|
|
# DEBUG: Log field types
|
|
print(f"DEBUG: repo_path type: {type(analysis.repo_path)}")
|
|
if analysis.file_analyses:
|
|
for i, fa in enumerate(analysis.file_analyses[:3]): # Check first 3
|
|
print(f"DEBUG FA[{i}]: path type={type(fa.path)}, issues_found type={type(fa.issues_found)}, recommendations type={type(fa.recommendations)}")
|
|
if fa.issues_found:
|
|
print(f" issues_found[0] type: {type(fa.issues_found[0])}")
|
|
if fa.recommendations:
|
|
print(f" recommendations[0] type: {type(fa.recommendations[0])}")
|
|
|
|
try:
|
|
# Generate report
|
|
if request.output_format == "pdf":
|
|
report_path = f"reports/{analysis_id}_analysis.pdf"
|
|
try:
|
|
analyzer.create_pdf_report(analysis, report_path)
|
|
except Exception as pdf_err:
|
|
print(f"⚠️ PDF generation failed: {pdf_err}, falling back to JSON")
|
|
report_path = f"reports/{analysis_id}_analysis.json"
|
|
with open(report_path, 'w') as f:
|
|
json.dump({
|
|
"repository_id": request.repository_id,
|
|
"repo_path": str(analysis.repo_path),
|
|
"total_files": analysis.total_files,
|
|
"total_lines": analysis.total_lines,
|
|
"languages": analysis.languages,
|
|
"code_quality_score": analysis.code_quality_score,
|
|
"architecture_assessment": analysis.architecture_assessment,
|
|
"security_assessment": analysis.security_assessment,
|
|
"executive_summary": analysis.executive_summary,
|
|
"file_analyses": [
|
|
{
|
|
"path": str(fa.path),
|
|
"language": fa.language,
|
|
"lines_of_code": fa.lines_of_code,
|
|
"severity_score": fa.severity_score,
|
|
"issues_found": [str(issue) for issue in fa.issues_found] if isinstance(fa.issues_found, (list, tuple)) else [],
|
|
"recommendations": [str(rec) for rec in fa.recommendations] if isinstance(fa.recommendations, (list, tuple)) else []
|
|
} for fa in analysis.file_analyses
|
|
]
|
|
}, f, indent=2)
|
|
else:
|
|
report_path = f"reports/{analysis_id}_analysis.json"
|
|
with open(report_path, 'w') as f:
|
|
json.dump({
|
|
"repository_id": request.repository_id,
|
|
"repo_path": str(analysis.repo_path),
|
|
"total_files": analysis.total_files,
|
|
"total_lines": analysis.total_lines,
|
|
"languages": analysis.languages,
|
|
"code_quality_score": analysis.code_quality_score,
|
|
"architecture_assessment": analysis.architecture_assessment,
|
|
"security_assessment": analysis.security_assessment,
|
|
"executive_summary": analysis.executive_summary,
|
|
"file_analyses": [
|
|
{
|
|
"path": str(fa.path),
|
|
"language": fa.language,
|
|
"lines_of_code": fa.lines_of_code,
|
|
"severity_score": fa.severity_score,
|
|
"issues_found": [str(issue) for issue in fa.issues_found] if isinstance(fa.issues_found, (list, tuple)) else [],
|
|
"recommendations": [str(rec) for rec in fa.recommendations] if isinstance(fa.recommendations, (list, tuple)) else []
|
|
} for fa in analysis.file_analyses
|
|
]
|
|
}, f, indent=2)
|
|
except Exception as report_err:
|
|
print(f"ERROR during report generation: {report_err}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
raise
|
|
|
|
print("✅ Report generated successfully, now calculating stats...")
|
|
|
|
try:
|
|
print("Calculating stats...")
|
|
# Calculate stats
|
|
stats = {
|
|
"repository_id": request.repository_id,
|
|
"total_files": analysis.total_files,
|
|
"total_lines": analysis.total_lines,
|
|
"languages": analysis.languages,
|
|
"code_quality_score": analysis.code_quality_score,
|
|
"high_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score >= 8]),
|
|
"medium_quality_files": len([fa for fa in analysis.file_analyses if 5 <= fa.severity_score < 8]),
|
|
"low_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score < 5]),
|
|
"total_issues": sum(len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0 for fa in analysis.file_analyses)
|
|
}
|
|
|
|
# Pre-sanitize all file analyses before stats calculation
|
|
if hasattr(analysis, 'file_analyses'):
|
|
for fa in analysis.file_analyses:
|
|
# Force issues_found to be a list
|
|
if not isinstance(fa.issues_found, list):
|
|
if isinstance(fa.issues_found, tuple):
|
|
fa.issues_found = list(fa.issues_found)
|
|
else:
|
|
fa.issues_found = []
|
|
# Force recommendations to be a list
|
|
if not isinstance(fa.recommendations, list):
|
|
if isinstance(fa.recommendations, tuple):
|
|
fa.recommendations = list(fa.recommendations)
|
|
else:
|
|
fa.recommendations = []
|
|
|
|
# Now calculate stats safely
|
|
stats = {
|
|
"repository_id": request.repository_id,
|
|
"total_files": analysis.total_files,
|
|
"total_lines": analysis.total_lines,
|
|
"languages": analysis.languages,
|
|
"code_quality_score": analysis.code_quality_score,
|
|
"high_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score >= 8]),
|
|
"medium_quality_files": len([fa for fa in analysis.file_analyses if 5 <= fa.severity_score < 8]),
|
|
"low_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score < 5]),
|
|
"total_issues": sum(len(fa.issues_found) for fa in analysis.file_analyses)
|
|
}
|
|
|
|
# Use dictionary instead of Pydantic model to avoid serialization issues
|
|
return {
|
|
"success": True,
|
|
"message": "Repository analysis completed successfully",
|
|
"analysis_id": analysis_id,
|
|
"report_path": report_path,
|
|
"stats": stats
|
|
}
|
|
|
|
except Exception as e:
|
|
print(f"❌ Repository analysis failed: {str(e)}")
|
|
return AnalysisResponse(
|
|
success=False,
|
|
message=f"Repository analysis failed: {str(e)}"
|
|
)
|
|
|
|
finally:
|
|
# Cleanup temporary directory
|
|
if 'temp_dir' in locals():
|
|
if os.path.exists(temp_dir):
|
|
shutil.rmtree(temp_dir)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
import traceback
|
|
traceback.print_exc()
|
|
print(f"❌ Repository analysis failed: {str(e)}")
|
|
tb_lines = traceback.format_exception(type(e), e, e.__traceback__)
|
|
print("FULL TRACEBACK:")
|
|
for line in tb_lines:
|
|
print(line.rstrip())
|
|
return {
|
|
"success": False,
|
|
"message": f"Repository analysis failed: {str(e)}",
|
|
"analysis_id": None,
|
|
"report_path": None,
|
|
"stats": None
|
|
}
|
|
|
|
async def analyze_repository_fast(local_path: str, repository_id: str, user_id: str, max_files: int = 50):
|
|
"""Fast analysis with timeout and limited files for quick results."""
|
|
try:
|
|
print(f"🚀 Starting FAST analysis for repository {repository_id}")
|
|
|
|
# Set a timeout for fast analysis
|
|
import asyncio
|
|
timeout_seconds = 60 # 1 minute timeout for fast analysis
|
|
|
|
async def run_analysis():
|
|
# Get repository files from API (limited to max_files)
|
|
files_data = await get_repository_files_from_api(repository_id, user_id, max_files)
|
|
|
|
if not files_data:
|
|
raise Exception("No files found in repository")
|
|
|
|
print(f"📁 Found {len(files_data)} files for fast analysis")
|
|
|
|
# Create a simple analysis without AI processing
|
|
from ai_analyze import FileAnalysis, RepositoryAnalysis
|
|
|
|
file_analyses = []
|
|
total_lines = 0
|
|
languages = set()
|
|
|
|
for file_path, content in files_data[:max_files]: # Limit to max_files
|
|
# files_data is a list of tuples (file_path, content)
|
|
|
|
# Basic analysis without AI
|
|
lines = len(content.splitlines()) if content else 0
|
|
total_lines += lines
|
|
|
|
# Enhanced language detection
|
|
language = "Unknown"
|
|
if '.' in file_path:
|
|
ext = '.' + file_path.split('.')[-1].lower()
|
|
language_map = {
|
|
'.py': 'Python', '.js': 'JavaScript', '.ts': 'TypeScript', '.tsx': 'TypeScript',
|
|
'.jsx': 'JavaScript', '.java': 'Java', '.cpp': 'C++', '.c': 'C', '.cs': 'C#',
|
|
'.go': 'Go', '.rs': 'Rust', '.php': 'PHP', '.rb': 'Ruby', '.swift': 'Swift',
|
|
'.kt': 'Kotlin', '.html': 'HTML', '.htm': 'HTML', '.css': 'CSS', '.scss': 'SCSS',
|
|
'.sass': 'SASS', '.sql': 'SQL', '.json': 'JSON', '.yaml': 'YAML', '.yml': 'YAML',
|
|
'.md': 'Markdown', '.txt': 'Text', '.xml': 'XML', '.sh': 'Shell', '.bash': 'Shell',
|
|
'.zsh': 'Shell', '.fish': 'Shell', '.dockerfile': 'Docker', '.dockerignore': 'Docker',
|
|
'.gitignore': 'Git', '.gitattributes': 'Git', '.env': 'Environment', '.ini': 'Config',
|
|
'.cfg': 'Config', '.conf': 'Config', '.toml': 'TOML', '.lock': 'Lock File',
|
|
'.log': 'Log', '.tmp': 'Temporary', '.temp': 'Temporary'
|
|
}
|
|
language = language_map.get(ext, 'Unknown')
|
|
else:
|
|
# Try to detect from filename
|
|
filename = file_path.lower()
|
|
if 'dockerfile' in filename:
|
|
language = 'Docker'
|
|
elif 'makefile' in filename:
|
|
language = 'Makefile'
|
|
elif 'readme' in filename:
|
|
language = 'Markdown'
|
|
elif 'license' in filename:
|
|
language = 'Text'
|
|
elif 'changelog' in filename:
|
|
language = 'Text'
|
|
|
|
languages.add(language)
|
|
|
|
# Perform smart fast analysis
|
|
issues_found = []
|
|
recommendations = []
|
|
complexity_score = 5.0
|
|
severity_score = 7.0
|
|
|
|
# Basic code quality analysis
|
|
if lines > 500:
|
|
issues_found.append("Large file - consider breaking into smaller modules")
|
|
recommendations.append("Split into smaller, focused files")
|
|
complexity_score += 2
|
|
severity_score -= 1
|
|
|
|
if lines < 10:
|
|
issues_found.append("Very small file - might be incomplete")
|
|
recommendations.append("Review if this file is necessary")
|
|
severity_score -= 0.5
|
|
|
|
# Language-specific analysis
|
|
if language == "Python":
|
|
if "import" not in content and "def" not in content and "class" not in content:
|
|
issues_found.append("Python file without imports, functions, or classes")
|
|
recommendations.append("Add proper Python structure")
|
|
severity_score -= 1
|
|
|
|
if "print(" in content and "def " not in content:
|
|
issues_found.append("Contains print statements - consider logging")
|
|
recommendations.append("Use proper logging instead of print statements")
|
|
complexity_score += 1
|
|
|
|
elif language == "JavaScript":
|
|
if "console.log" in content and "function" not in content:
|
|
issues_found.append("Contains console.log statements")
|
|
recommendations.append("Use proper logging or remove debug statements")
|
|
complexity_score += 1
|
|
|
|
elif language == "Markdown":
|
|
if lines < 5:
|
|
issues_found.append("Very short documentation")
|
|
recommendations.append("Add more detailed documentation")
|
|
severity_score += 1
|
|
|
|
# Calculate final scores
|
|
complexity_score = max(1.0, min(10.0, complexity_score))
|
|
severity_score = max(1.0, min(10.0, severity_score))
|
|
|
|
# Generate detailed analysis
|
|
detailed_analysis = f"Fast analysis of {file_path}: {lines} lines, {language} code. "
|
|
if issues_found:
|
|
detailed_analysis += f"Issues found: {len(issues_found)}. "
|
|
else:
|
|
detailed_analysis += "No major issues detected. "
|
|
detailed_analysis += f"Complexity: {complexity_score:.1f}/10, Quality: {severity_score:.1f}/10"
|
|
|
|
# Create smart file analysis
|
|
file_analysis = FileAnalysis(
|
|
path=str(file_path),
|
|
language=language,
|
|
lines_of_code=lines,
|
|
complexity_score=complexity_score,
|
|
issues_found=issues_found if issues_found else ["No issues detected in fast analysis"],
|
|
recommendations=recommendations if recommendations else ["File appears well-structured"],
|
|
detailed_analysis=detailed_analysis,
|
|
severity_score=severity_score
|
|
)
|
|
file_analyses.append(file_analysis)
|
|
|
|
# Create language count dictionary
|
|
language_counts = {}
|
|
for file_analysis in file_analyses:
|
|
lang = file_analysis.language
|
|
language_counts[lang] = language_counts.get(lang, 0) + 1
|
|
|
|
# Create repository analysis
|
|
analysis = RepositoryAnalysis(
|
|
repo_path=local_path,
|
|
total_files=len(file_analyses),
|
|
total_lines=total_lines,
|
|
languages=language_counts,
|
|
code_quality_score=7.5, # Default good score
|
|
architecture_assessment="Fast analysis - architecture details require full analysis",
|
|
security_assessment="Fast analysis - security details require full analysis",
|
|
executive_summary=f"Fast analysis completed for {len(file_analyses)} files. Total lines: {total_lines}. Languages: {', '.join(language_counts.keys())}",
|
|
file_analyses=file_analyses
|
|
)
|
|
|
|
return analysis
|
|
|
|
# Run with timeout
|
|
analysis = await asyncio.wait_for(run_analysis(), timeout=timeout_seconds)
|
|
print(f"✅ Fast analysis completed in under {timeout_seconds} seconds")
|
|
return analysis
|
|
|
|
except asyncio.TimeoutError:
|
|
print(f"⏰ Fast analysis timed out after {timeout_seconds} seconds")
|
|
raise Exception(f"Fast analysis timed out after {timeout_seconds} seconds")
|
|
except Exception as e:
|
|
print(f"❌ Fast analysis failed: {e}")
|
|
raise e
|
|
|
|
async def get_repository_files_from_api(repository_id: str, user_id: str, max_files: int = 100):
|
|
"""Get repository files from Git Integration Service API."""
|
|
try:
|
|
print(f"🔍 [DEBUG] Getting repository files for {repository_id} with user {user_id}")
|
|
|
|
# Get all files by scanning all directories recursively
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
# First, get all directories from the repository
|
|
print(f"🔍 [DEBUG] Getting all directories for repository")
|
|
|
|
# Get all directories from database
|
|
directories_query = f"""
|
|
SELECT DISTINCT rd.relative_path
|
|
FROM repository_directories rd
|
|
WHERE rd.repository_id = '{repository_id}'
|
|
ORDER BY rd.relative_path
|
|
"""
|
|
|
|
# We need to get all directories and then scan each one
|
|
# Let's use a different approach - get all files directly from the database
|
|
all_files_query = f"""
|
|
SELECT
|
|
file->>'relative_path' as relative_path,
|
|
file->>'filename' as filename
|
|
FROM repository_files rf,
|
|
jsonb_array_elements(rf.files) as file
|
|
WHERE rf.repository_id = '{repository_id}'
|
|
ORDER BY file->>'relative_path'
|
|
"""
|
|
|
|
# Get all directories by making multiple structure requests
|
|
all_directories = set()
|
|
all_directories.add('') # Add root directory
|
|
|
|
# First, get root structure
|
|
structure_response = await client.get(
|
|
f"{git_client.base_url}/api/github/repository/{repository_id}/structure",
|
|
headers={'x-user-id': user_id}
|
|
)
|
|
|
|
if structure_response.status_code != 200:
|
|
raise Exception(f"Failed to get repository structure: {structure_response.text}")
|
|
|
|
structure_data = structure_response.json()
|
|
if not structure_data.get('success'):
|
|
raise Exception(f"Git Integration Service error: {structure_data.get('message', 'Unknown error')}")
|
|
|
|
# Get all directories from root structure
|
|
structure_items = structure_data.get('data', {}).get('structure', [])
|
|
directories_to_scan = []
|
|
|
|
for item in structure_items:
|
|
if isinstance(item, dict) and item.get('type') == 'directory':
|
|
dir_path = item.get('path', '')
|
|
if dir_path:
|
|
all_directories.add(dir_path)
|
|
directories_to_scan.append(dir_path)
|
|
print(f"🔍 [DEBUG] Found directory: {dir_path}")
|
|
|
|
# Now scan each directory to find subdirectories
|
|
for directory in directories_to_scan:
|
|
try:
|
|
print(f"🔍 [DEBUG] Getting structure for directory: '{directory}'")
|
|
dir_structure_response = await client.get(
|
|
f"{git_client.base_url}/api/github/repository/{repository_id}/structure",
|
|
params={'path': directory},
|
|
headers={'x-user-id': user_id}
|
|
)
|
|
|
|
if dir_structure_response.status_code == 200:
|
|
dir_structure_data = dir_structure_response.json()
|
|
if dir_structure_data.get('success'):
|
|
dir_items = dir_structure_data.get('data', {}).get('structure', [])
|
|
for item in dir_items:
|
|
if isinstance(item, dict) and item.get('type') == 'directory':
|
|
subdir_path = item.get('path', '')
|
|
if subdir_path and subdir_path not in all_directories:
|
|
all_directories.add(subdir_path)
|
|
directories_to_scan.append(subdir_path)
|
|
print(f"🔍 [DEBUG] Found subdirectory: {subdir_path}")
|
|
else:
|
|
print(f"⚠️ [DEBUG] Failed to get structure for directory '{directory}': {dir_structure_data.get('message')}")
|
|
else:
|
|
print(f"⚠️ [DEBUG] Failed to get structure for directory '{directory}': HTTP {dir_structure_response.status_code}")
|
|
except Exception as e:
|
|
print(f"⚠️ [DEBUG] Error getting structure for directory '{directory}': {e}")
|
|
|
|
print(f"🔍 [DEBUG] Found {len(all_directories)} total directories to scan")
|
|
|
|
# Scan each directory for files
|
|
files_to_analyze = []
|
|
for directory in all_directories:
|
|
try:
|
|
print(f"🔍 [DEBUG] Scanning directory: '{directory}'")
|
|
files_response = await client.get(
|
|
f"{git_client.base_url}/api/github/repository/{repository_id}/files",
|
|
params={'directory_path': directory} if directory else {},
|
|
headers={'x-user-id': user_id}
|
|
)
|
|
|
|
if files_response.status_code == 200:
|
|
files_data = files_response.json()
|
|
if files_data.get('success'):
|
|
dir_files = files_data.get('data', {}).get('files', [])
|
|
for file_info in dir_files:
|
|
file_path = file_info.get('relative_path', '')
|
|
if file_path:
|
|
files_to_analyze.append((file_path, None))
|
|
print(f"🔍 [DEBUG] Found file in '{directory}': {file_path}")
|
|
else:
|
|
print(f"⚠️ [DEBUG] Failed to get files from directory '{directory}': {files_data.get('message')}")
|
|
else:
|
|
print(f"⚠️ [DEBUG] Failed to get files from directory '{directory}': HTTP {files_response.status_code}")
|
|
except Exception as e:
|
|
print(f"⚠️ [DEBUG] Error scanning directory '{directory}': {e}")
|
|
|
|
print(f"🔍 [DEBUG] Found {len(files_to_analyze)} total files after scanning all directories")
|
|
|
|
print(f"🔍 [DEBUG] Found {len(files_to_analyze)} files to analyze")
|
|
|
|
# Limit files if needed (0 means unlimited)
|
|
if max_files > 0 and len(files_to_analyze) > max_files:
|
|
files_to_analyze = files_to_analyze[:max_files]
|
|
print(f"🔍 [DEBUG] Limited to {max_files} files")
|
|
|
|
# Fetch file content for each file
|
|
files_with_content = []
|
|
for i, (file_path, _) in enumerate(files_to_analyze):
|
|
try:
|
|
print(f"🔍 [DEBUG] Fetching content for file {i+1}/{len(files_to_analyze)}: {file_path}")
|
|
|
|
# Get file content from Git Integration Service
|
|
content_response = await client.get(
|
|
f"{git_client.base_url}/api/github/repository/{repository_id}/file-content",
|
|
params={'file_path': file_path},
|
|
headers={'x-user-id': user_id}
|
|
)
|
|
|
|
if content_response.status_code == 200:
|
|
content_data = content_response.json()
|
|
if content_data.get('success'):
|
|
# Content is nested in data.content
|
|
content = content_data.get('data', {}).get('content', '')
|
|
files_with_content.append((file_path, content))
|
|
print(f"🔍 [DEBUG] Successfully got content for {file_path} ({len(content)} chars)")
|
|
else:
|
|
print(f"Warning: Failed to get content for {file_path}: {content_data.get('message')}")
|
|
else:
|
|
print(f"Warning: Failed to get content for {file_path}: HTTP {content_response.status_code}")
|
|
|
|
except Exception as e:
|
|
print(f"Warning: Error getting content for {file_path}: {e}")
|
|
continue
|
|
|
|
print(f"🔍 [DEBUG] Returning {len(files_with_content)} files with content")
|
|
return files_with_content
|
|
|
|
except Exception as e:
|
|
print(f"Error getting repository files from API: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return []
|
|
|
|
async def analyze_repository_with_optimizations(repo_path: str, repository_id: str, user_id: str, max_files: int = 100):
|
|
"""Analyze repository with rate limiting, caching, and content optimization."""
|
|
from pathlib import Path
|
|
|
|
try:
|
|
# Get repository files from Git Integration Service API
|
|
files_to_analyze = await get_repository_files_from_api(repository_id, user_id, max_files)
|
|
|
|
if not files_to_analyze:
|
|
raise Exception("No files found to analyze")
|
|
|
|
print(f"Starting optimized analysis of {len(files_to_analyze)} files...")
|
|
|
|
file_analyses = []
|
|
processed_files = 0
|
|
|
|
for i, (file_path, content) in enumerate(files_to_analyze):
|
|
print(f"Analyzing file {i+1}/{len(files_to_analyze)}: {file_path}")
|
|
|
|
# Generate file hash for caching
|
|
file_hash = hashlib.sha256((content or '').encode()).hexdigest()
|
|
|
|
# Check cache first
|
|
cached_analysis = await analysis_cache.get_cached_analysis(file_hash)
|
|
if cached_analysis:
|
|
print(f"Using cached analysis for {file_path}")
|
|
# Convert cached dictionary back to analysis object
|
|
from ai_analyze import FileAnalysis
|
|
cached_obj = FileAnalysis(
|
|
path=cached_analysis["path"],
|
|
language=cached_analysis["language"],
|
|
lines_of_code=cached_analysis["lines_of_code"],
|
|
complexity_score=cached_analysis["complexity_score"],
|
|
issues_found=cached_analysis["issues_found"],
|
|
recommendations=cached_analysis["recommendations"],
|
|
detailed_analysis=cached_analysis["detailed_analysis"],
|
|
severity_score=cached_analysis["severity_score"]
|
|
)
|
|
file_analyses.append(cached_obj)
|
|
processed_files += 1
|
|
continue
|
|
|
|
# Rate limiting
|
|
await rate_limiter.wait_if_needed()
|
|
|
|
# Optimize content for Claude API
|
|
optimized_content = content_optimizer.optimize_content_for_claude(content)
|
|
|
|
# Analyze file with memory
|
|
try:
|
|
# Convert string file path to Path object
|
|
file_path_obj = Path(file_path)
|
|
|
|
# Use enhanced analysis if available, fallback to standard
|
|
if hasattr(analyzer, 'analyze_file_with_memory_enhanced'):
|
|
print(f"🔍 [DEBUG] Using ENHANCED analysis method for {file_path}")
|
|
analysis = await analyzer.analyze_file_with_memory_enhanced(
|
|
file_path_obj,
|
|
optimized_content,
|
|
repository_id
|
|
)
|
|
else:
|
|
print(f"🔍 [DEBUG] Using STANDARD analysis method for {file_path}")
|
|
analysis = await analyzer.analyze_file_with_memory(
|
|
file_path_obj,
|
|
optimized_content,
|
|
repository_id
|
|
)
|
|
|
|
# Cache the result
|
|
analysis_dict = {
|
|
"path": str(analysis.path),
|
|
"language": analysis.language,
|
|
"lines_of_code": analysis.lines_of_code,
|
|
"complexity_score": analysis.complexity_score,
|
|
"issues_found": analysis.issues_found,
|
|
"recommendations": analysis.recommendations,
|
|
"detailed_analysis": analysis.detailed_analysis,
|
|
"severity_score": analysis.severity_score
|
|
}
|
|
|
|
await analysis_cache.cache_analysis(file_hash, analysis_dict)
|
|
file_analyses.append(analysis)
|
|
processed_files += 1
|
|
|
|
except Exception as e:
|
|
print(f"Error analyzing {file_path}: {e}")
|
|
# Continue with other files
|
|
continue
|
|
|
|
# Repository-level analysis
|
|
print("Performing repository-level analysis...")
|
|
# Use a temporary directory path since we don't have a local repo_path
|
|
temp_repo_path = f"/tmp/repo_{repository_id}" if repo_path is None else repo_path
|
|
# Create proper context_memories structure
|
|
context_memories = {
|
|
'persistent_knowledge': [],
|
|
'similar_analyses': []
|
|
}
|
|
# Repository-level analysis with enhanced context
|
|
try:
|
|
print(f"DEBUG: Calling analyze_repository_overview_with_memory...")
|
|
architecture_assessment, security_assessment = await analyzer.analyze_repository_overview_with_memory(
|
|
temp_repo_path, file_analyses, context_memories, repository_id
|
|
)
|
|
print(f"DEBUG: analyze_repository_overview_with_memory completed")
|
|
except Exception as ov_err:
|
|
print(f"ERROR in analyze_repository_overview_with_memory: {ov_err}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
architecture_assessment = f"Error: {str(ov_err)}"
|
|
security_assessment = f"Error: {str(ov_err)}"
|
|
|
|
# Create repository analysis result
|
|
from ai_analyze import RepositoryAnalysis
|
|
|
|
# Calculate code quality score safely
|
|
if file_analyses and len(file_analyses) > 0:
|
|
valid_scores = [fa.severity_score for fa in file_analyses if fa.severity_score is not None]
|
|
code_quality_score = sum(valid_scores) / len(valid_scores) if valid_scores else 5.0
|
|
else:
|
|
code_quality_score = 5.0
|
|
|
|
# Calculate total lines safely
|
|
total_lines = sum(fa.lines_of_code for fa in file_analyses if fa.lines_of_code is not None) if file_analyses else 0
|
|
|
|
# Get languages safely - count occurrences of each language
|
|
if file_analyses:
|
|
from collections import Counter
|
|
language_list = [fa.language for fa in file_analyses if fa.language is not None]
|
|
languages = dict(Counter(language_list))
|
|
else:
|
|
languages = {}
|
|
|
|
# DEBUG: Check file_analyses before creating RepositoryAnalysis
|
|
print(f"DEBUG: About to create RepositoryAnalysis with {len(file_analyses)} file_analyses")
|
|
if file_analyses:
|
|
for i, fa in enumerate(file_analyses[:2]):
|
|
try:
|
|
print(f" FA[{i}]: path type={type(fa.path).__name__}, issues={type(fa.issues_found).__name__}, recs={type(fa.recommendations).__name__}")
|
|
except Exception as debug_err:
|
|
print(f" FA[{i}]: DEBUG ERROR - {debug_err}")
|
|
|
|
return RepositoryAnalysis(
|
|
repo_path=str(temp_repo_path),
|
|
total_files=len(files_to_analyze),
|
|
total_lines=total_lines,
|
|
languages=languages,
|
|
code_quality_score=code_quality_score,
|
|
architecture_assessment=architecture_assessment or "Analysis in progress",
|
|
security_assessment=security_assessment or "Analysis in progress",
|
|
file_analyses=file_analyses,
|
|
executive_summary=f"Analysis completed for {processed_files} files in repository {repository_id}",
|
|
high_quality_files=[]
|
|
)
|
|
|
|
except Exception as e:
|
|
print(f"Error in optimized analysis: {e}")
|
|
raise
|
|
|
|
@app.get("/repository/{repository_id}/info")
|
|
async def get_repository_info(repository_id: str, user_id: str):
|
|
"""Get repository information from git-integration service."""
|
|
try:
|
|
repo_info = await git_client.get_repository_info(repository_id, user_id)
|
|
return {
|
|
"success": True,
|
|
"repository_info": repo_info
|
|
}
|
|
except Exception as e:
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to get repository info: {str(e)}"
|
|
)
|
|
|
|
@app.get("/reports/{filename}")
|
|
async def download_report(filename: str):
|
|
"""Download analysis report."""
|
|
report_path = f"reports/{filename}"
|
|
if not os.path.exists(report_path):
|
|
raise HTTPException(status_code=404, detail="Report not found")
|
|
|
|
return FileResponse(
|
|
report_path,
|
|
media_type='application/pdf',
|
|
headers={
|
|
'Content-Disposition': f'inline; filename="{filename}"'
|
|
}
|
|
)
|
|
|
|
@app.get("/memory/stats")
|
|
async def get_memory_stats():
|
|
"""Get memory system statistics."""
|
|
try:
|
|
if not analyzer:
|
|
raise HTTPException(status_code=500, detail="Analyzer not initialized")
|
|
|
|
stats = await analyzer.memory_manager.get_memory_stats()
|
|
return {
|
|
"success": True,
|
|
"memory_stats": stats
|
|
}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Failed to get memory stats: {str(e)}")
|
|
|
|
@app.post("/memory/query")
|
|
async def query_memory(query: str, repo_context: str = ""):
|
|
"""Query the memory system."""
|
|
try:
|
|
if not analyzer:
|
|
raise HTTPException(status_code=500, detail="Analyzer not initialized")
|
|
|
|
result = await analyzer.query_memory(query, repo_context)
|
|
return {
|
|
"success": True,
|
|
"query": query,
|
|
"result": result
|
|
}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Memory query failed: {str(e)}")
|
|
|
|
@app.get("/enhanced/status")
|
|
async def get_enhanced_status():
|
|
"""Get enhanced processing status and statistics."""
|
|
return {
|
|
"success": True,
|
|
"enhanced_available": ENHANCED_ANALYZER_AVAILABLE,
|
|
"message": "Enhanced chunking system is active"
|
|
}
|
|
|
|
@app.post("/enhanced/toggle")
|
|
async def toggle_enhanced_processing(enabled: bool = True):
|
|
"""Toggle enhanced processing on/off."""
|
|
return {
|
|
"success": True,
|
|
"message": f"Enhanced processing {'enabled' if enabled else 'disabled'}",
|
|
"enhanced_enabled": enabled
|
|
}
|
|
|
|
if __name__ == "__main__":
|
|
port = int(os.getenv('PORT', 8022))
|
|
host = os.getenv('HOST', '0.0.0.0')
|
|
|
|
print(f"🚀 Starting AI Analysis Service on {host}:{port}")
|
|
uvicorn.run(app, host=host, port=port)
|