5043 lines
245 KiB
Python
5043 lines
245 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Complete AI Repository Analysis Tool with Memory System
|
||
Automatically analyzes ALL files in a repository without limits.
|
||
|
||
Features:
|
||
- Analyzes ALL files in the repository (no max-files limit)
|
||
- No user query required - fully automated analysis
|
||
- Memory-enhanced analysis with learning capabilities
|
||
- Comprehensive PDF report generation
|
||
- Security, architecture, and code quality assessment
|
||
|
||
Usage:
|
||
python ai-analyze.py /path/to/repo --output analysis.pdf
|
||
|
||
Example:
|
||
python ai-analyze.py ./my-project --output complete_analysis.pdf
|
||
"""
|
||
|
||
import os
|
||
import asyncio
|
||
import hashlib
|
||
import json
|
||
import uuid
|
||
from pathlib import Path
|
||
from typing import Dict, List, Optional, Tuple, Any
|
||
from datetime import datetime, timedelta
|
||
from dataclasses import dataclass, asdict, field
|
||
from collections import defaultdict, Counter
|
||
import logging
|
||
import tempfile
|
||
import shutil
|
||
import re
|
||
import concurrent.futures
|
||
import threading
|
||
from functools import lru_cache
|
||
|
||
# Core packages
|
||
import anthropic
|
||
from dotenv import load_dotenv
|
||
import git
|
||
import redis
|
||
import pymongo
|
||
import psycopg2
|
||
from psycopg2.extras import RealDictCursor
|
||
import numpy as np
|
||
|
||
# PDF generation
|
||
from reportlab.lib.pagesizes import A4
|
||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||
from reportlab.lib.enums import TA_CENTER, TA_LEFT
|
||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle, Preformatted
|
||
from reportlab.lib import colors
|
||
from reportlab.graphics.shapes import Rect, String, Drawing
|
||
from reportlab.graphics.charts.piecharts import Pie
|
||
from reportlab.graphics.charts.barcharts import VerticalBarChart
|
||
from reportlab.lib.units import inch
|
||
|
||
# Enhanced dataclasses for memory system
|
||
@dataclass
|
||
class MemoryRecord:
|
||
id: str
|
||
timestamp: datetime
|
||
memory_type: str # 'episodic', 'persistent', 'working'
|
||
content: Dict[str, Any]
|
||
embeddings: Optional[List[float]] = None
|
||
metadata: Optional[Dict[str, Any]] = None
|
||
expiry: Optional[datetime] = None
|
||
|
||
@dataclass
|
||
class CodeAnalysisMemory:
|
||
repo_id: str
|
||
file_path: str
|
||
analysis_hash: str
|
||
analysis_data: Dict[str, Any]
|
||
embedding: List[float]
|
||
last_updated: datetime
|
||
access_count: int = 0
|
||
relevance_score: float = 1.0
|
||
|
||
@dataclass
|
||
class EpisodicMemory:
|
||
session_id: str
|
||
user_query: str
|
||
ai_response: str
|
||
repo_context: str
|
||
timestamp: datetime
|
||
embedding: List[float]
|
||
metadata: Dict[str, Any]
|
||
|
||
@dataclass
|
||
class PersistentMemory:
|
||
fact_id: str
|
||
content: str
|
||
category: str # 'code_pattern', 'best_practice', 'vulnerability', 'architecture'
|
||
confidence: float
|
||
embedding: List[float]
|
||
source_repos: List[str]
|
||
created_at: datetime
|
||
last_accessed: datetime
|
||
access_frequency: int = 0
|
||
|
||
@dataclass
|
||
class FileAnalysis:
|
||
path: str
|
||
language: str
|
||
lines_of_code: int
|
||
complexity_score: float
|
||
issues_found: List[str]
|
||
recommendations: List[str]
|
||
detailed_analysis: str
|
||
severity_score: float
|
||
content: str = '' # Add content field to store actual file content
|
||
|
||
def __post_init__(self):
|
||
"""Ensure all fields contain safe types for JSON serialization."""
|
||
# Convert path to string
|
||
if not isinstance(self.path, str):
|
||
self.path = str(self.path)
|
||
|
||
# Ensure issues_found is a list of strings
|
||
if not isinstance(self.issues_found, list):
|
||
if isinstance(self.issues_found, tuple):
|
||
self.issues_found = [str(i) for i in self.issues_found]
|
||
else:
|
||
self.issues_found = []
|
||
else:
|
||
self.issues_found = [str(i) if not isinstance(i, str) else i for i in self.issues_found]
|
||
|
||
# Ensure recommendations is a list of strings
|
||
if not isinstance(self.recommendations, list):
|
||
if isinstance(self.recommendations, tuple):
|
||
self.recommendations = [str(r) for r in self.recommendations]
|
||
else:
|
||
self.recommendations = []
|
||
else:
|
||
self.recommendations = [str(r) if not isinstance(r, str) else r for r in self.recommendations]
|
||
|
||
# Ensure detailed_analysis is a string
|
||
if not isinstance(self.detailed_analysis, str):
|
||
self.detailed_analysis = str(self.detailed_analysis)
|
||
|
||
@dataclass
|
||
class RepositoryAnalysis:
|
||
repo_path: str
|
||
total_files: int
|
||
total_lines: int
|
||
languages: Dict[str, int]
|
||
architecture_assessment: str
|
||
security_assessment: str
|
||
code_quality_score: float
|
||
file_analyses: List[FileAnalysis]
|
||
executive_summary: str
|
||
high_quality_files: List[str] = field(default_factory=list)
|
||
|
||
class MemoryManager:
|
||
"""Advanced memory management system for AI repository analysis."""
|
||
|
||
def __init__(self, config: Dict[str, Any]):
|
||
self.config = config
|
||
self.setup_logging()
|
||
|
||
# Initialize Claude client for embeddings
|
||
self.claude_client = anthropic.Anthropic(api_key=config.get('anthropic_api_key', ''))
|
||
|
||
# Initialize database connections
|
||
self.setup_databases()
|
||
|
||
# Memory configuration
|
||
self.working_memory_ttl = 3600 # 1 hour
|
||
self.episodic_retention_days = 365 # 1 year
|
||
self.persistent_memory_threshold = 0.8 # Confidence threshold for persistence
|
||
|
||
def setup_logging(self):
|
||
logging.basicConfig(level=logging.INFO)
|
||
self.logger = logging.getLogger(__name__)
|
||
|
||
def setup_databases(self):
|
||
"""Initialize all database connections with enhanced error handling."""
|
||
try:
|
||
# Redis for working memory (temporary, fast access) with localhost fallback
|
||
redis_host = self.config.get('redis_host', 'localhost')
|
||
redis_port = self.config.get('redis_port', 6380) # Use 6380 to avoid conflicts
|
||
redis_password = self.config.get('redis_password', 'redis_secure_2024')
|
||
|
||
self.redis_client = redis.Redis(
|
||
host=redis_host,
|
||
port=redis_port,
|
||
password=redis_password,
|
||
db=self.config.get('redis_db', 0),
|
||
decode_responses=True,
|
||
socket_connect_timeout=5,
|
||
socket_timeout=5
|
||
)
|
||
self.redis_client.ping()
|
||
self.logger.info(f"✅ Redis connected to {redis_host}:{redis_port}")
|
||
|
||
except Exception as e:
|
||
self.logger.warning(f"⚠️ Redis connection failed: {e}")
|
||
self.redis_client = None
|
||
|
||
try:
|
||
# MongoDB for documents and episodic memory with localhost fallback
|
||
mongo_url = self.config.get('mongodb_url', 'mongodb://pipeline_admin:mongo_secure_2024@localhost:27017/')
|
||
self.mongo_client = pymongo.MongoClient(mongo_url, serverSelectionTimeoutMS=5000)
|
||
self.mongo_client.admin.command('ping')
|
||
self.mongo_db = self.mongo_client[self.config.get('mongodb_name', 'repo_analyzer')]
|
||
|
||
# Collections
|
||
self.episodic_collection = self.mongo_db['episodic_memories']
|
||
self.analysis_collection = self.mongo_db['code_analyses']
|
||
self.persistent_collection = self.mongo_db['persistent_memories']
|
||
self.repo_metadata_collection = self.mongo_db['repository_metadata']
|
||
|
||
self.logger.info("✅ MongoDB connected successfully")
|
||
|
||
except Exception as e:
|
||
self.logger.warning(f"⚠️ MongoDB connection failed: {e}")
|
||
self.mongo_client = None
|
||
self.mongo_db = None
|
||
|
||
try:
|
||
# PostgreSQL with localhost fallback
|
||
self.pg_conn = psycopg2.connect(
|
||
host=self.config.get('postgres_host', 'localhost'),
|
||
port=self.config.get('postgres_port', 5432),
|
||
database=self.config.get('postgres_db', 'dev_pipeline'),
|
||
user=self.config.get('postgres_user', 'pipeline_admin'),
|
||
password=self.config.get('postgres_password', 'secure_pipeline_2024'),
|
||
connect_timeout=5
|
||
)
|
||
|
||
# Check if pgvector is available
|
||
try:
|
||
with self.pg_conn.cursor() as cur:
|
||
cur.execute("SELECT 1 FROM pg_extension WHERE extname = 'vector';")
|
||
self.has_vector = cur.fetchone() is not None
|
||
except:
|
||
self.has_vector = False
|
||
|
||
self.logger.info("✅ PostgreSQL connected successfully")
|
||
|
||
except Exception as e:
|
||
self.logger.warning(f"⚠️ PostgreSQL connection failed: {e}")
|
||
self.pg_conn = None
|
||
self.has_vector = False
|
||
|
||
def generate_embedding(self, text: str) -> List[float]:
|
||
"""Generate embedding for text using Claude API."""
|
||
try:
|
||
# Use Claude to generate semantic embeddings
|
||
# Truncate text if too long for Claude API
|
||
if len(text) > 8000:
|
||
text = text[:8000] + "..."
|
||
|
||
prompt = f"""
|
||
Convert the following text into a 384-dimensional numerical vector that represents its semantic meaning.
|
||
The vector should be suitable for similarity search and clustering.
|
||
|
||
Text: {text}
|
||
|
||
Return only a JSON array of 384 floating-point numbers between -1 and 1, like this:
|
||
[0.123, -0.456, 0.789, ...]
|
||
"""
|
||
|
||
# Use the configured Claude model
|
||
message = self.claude_client.messages.create(
|
||
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
|
||
max_tokens=2000,
|
||
temperature=0.1,
|
||
messages=[{"role": "user", "content": prompt}]
|
||
)
|
||
|
||
response_text = message.content[0].text.strip()
|
||
|
||
# Extract JSON array from response
|
||
# Find JSON array in response
|
||
json_match = re.search(r'\[[\d\.,\s-]+\]', response_text)
|
||
if json_match:
|
||
embedding = json.loads(json_match.group())
|
||
if len(embedding) == 384:
|
||
return embedding
|
||
|
||
# Fallback: generate deterministic embedding from text hash
|
||
return self._generate_fallback_embedding(text)
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Claude embedding generation failed: {e}")
|
||
return self._generate_fallback_embedding(text)
|
||
|
||
def _generate_fallback_embedding(self, text: str) -> List[float]:
|
||
"""Generate fallback embedding using text hash."""
|
||
try:
|
||
import hashlib
|
||
import struct
|
||
|
||
# Create a deterministic hash-based embedding
|
||
hash_obj = hashlib.sha256(text.encode('utf-8'))
|
||
hash_bytes = hash_obj.digest()
|
||
|
||
# Convert to 384-dimensional vector
|
||
embedding = []
|
||
for i in range(0, len(hash_bytes), 4):
|
||
if len(embedding) >= 384:
|
||
break
|
||
chunk = hash_bytes[i:i+4]
|
||
if len(chunk) == 4:
|
||
# Convert 4 bytes to float and normalize
|
||
value = struct.unpack('>I', chunk)[0] / (2**32 - 1) # Normalize to 0-1
|
||
embedding.append(value * 2 - 1) # Scale to -1 to 1
|
||
|
||
# Pad to exactly 384 dimensions
|
||
while len(embedding) < 384:
|
||
embedding.append(0.0)
|
||
|
||
return embedding[:384]
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Fallback embedding generation failed: {e}")
|
||
return [0.0] * 384
|
||
|
||
def calculate_content_hash(self, content: str) -> str:
|
||
"""Calculate SHA-256 hash of content for change detection."""
|
||
return hashlib.sha256(content.encode()).hexdigest()
|
||
|
||
async def store_working_memory(self, key: str, data: Dict[str, Any], ttl: Optional[int] = None) -> bool:
|
||
"""Store temporary data in working memory (Redis)."""
|
||
try:
|
||
ttl = ttl or self.working_memory_ttl
|
||
serialized_data = json.dumps(data, default=str)
|
||
self.redis_client.setex(f"working:{key}", ttl, serialized_data)
|
||
return True
|
||
except Exception as e:
|
||
self.logger.error(f"Working memory storage failed: {e}")
|
||
return False
|
||
|
||
async def get_working_memory(self, key: str) -> Optional[Dict[str, Any]]:
|
||
"""Retrieve data from working memory."""
|
||
try:
|
||
data = self.redis_client.get(f"working:{key}")
|
||
return json.loads(data) if data else None
|
||
except Exception as e:
|
||
self.logger.error(f"Working memory retrieval failed: {e}")
|
||
return None
|
||
|
||
async def store_episodic_memory(self, session_id: str, user_query: str,
|
||
ai_response: str, repo_context: str,
|
||
metadata: Optional[Dict] = None) -> str:
|
||
"""Store interaction in episodic memory."""
|
||
try:
|
||
memory_id = str(uuid.uuid4())
|
||
|
||
# Generate embeddings
|
||
query_embedding = self.generate_embedding(user_query)
|
||
response_embedding = self.generate_embedding(ai_response)
|
||
|
||
# Store in MongoDB
|
||
episodic_record = {
|
||
'memory_id': memory_id,
|
||
'session_id': session_id,
|
||
'user_query': user_query,
|
||
'ai_response': ai_response,
|
||
'repo_context': repo_context,
|
||
'timestamp': datetime.utcnow(),
|
||
'metadata': metadata or {}
|
||
}
|
||
self.episodic_collection.insert_one(episodic_record)
|
||
|
||
# Store embeddings in PostgreSQL for similarity search
|
||
with self.pg_conn.cursor() as cur:
|
||
cur.execute("""
|
||
INSERT INTO query_embeddings
|
||
(session_id, query_text, query_embedding, response_embedding, repo_context, metadata)
|
||
VALUES (%s, %s, %s, %s, %s, %s)
|
||
""", (
|
||
session_id, user_query, query_embedding, response_embedding,
|
||
repo_context, json.dumps(metadata or {})
|
||
))
|
||
self.pg_conn.commit()
|
||
|
||
self.logger.info(f"Episodic memory stored: {memory_id}")
|
||
return memory_id
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Episodic memory storage failed: {e}")
|
||
return ""
|
||
|
||
async def retrieve_episodic_memories(self, query: str, repo_context: str = "",
|
||
limit: int = 10, similarity_threshold: float = 0.7) -> List[Dict]:
|
||
"""Retrieve relevant episodic memories based on query similarity."""
|
||
try:
|
||
query_embedding = self.generate_embedding(query)
|
||
|
||
with self.pg_conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||
# Find similar queries using cosine similarity
|
||
cur.execute("""
|
||
SELECT session_id, query_text, repo_context, timestamp, metadata,
|
||
1 - (query_embedding <=> %s::vector) as similarity
|
||
FROM query_embeddings
|
||
WHERE (%s = '' OR repo_context = %s)
|
||
AND 1 - (query_embedding <=> %s::vector) > %s
|
||
ORDER BY similarity DESC
|
||
LIMIT %s
|
||
""", (query_embedding, repo_context, repo_context, query_embedding, similarity_threshold, limit))
|
||
|
||
similar_queries = cur.fetchall()
|
||
|
||
# Fetch full episodic records from MongoDB
|
||
memories = []
|
||
for query_record in similar_queries:
|
||
episodic_record = self.episodic_collection.find_one({
|
||
'session_id': query_record['session_id'],
|
||
'timestamp': query_record['timestamp']
|
||
})
|
||
if episodic_record:
|
||
episodic_record['similarity_score'] = float(query_record['similarity'])
|
||
memories.append(episodic_record)
|
||
|
||
return memories
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Episodic memory retrieval failed: {e}")
|
||
return []
|
||
|
||
async def store_persistent_memory(self, content: str, category: str,
|
||
confidence: float, source_repos: List[str]) -> str:
|
||
"""Store long-term knowledge in persistent memory."""
|
||
try:
|
||
fact_id = str(uuid.uuid4())
|
||
embedding = self.generate_embedding(content)
|
||
|
||
# Store in MongoDB
|
||
persistent_record = {
|
||
'fact_id': fact_id,
|
||
'content': content,
|
||
'category': category,
|
||
'confidence': confidence,
|
||
'source_repos': source_repos,
|
||
'created_at': datetime.utcnow(),
|
||
'last_accessed': datetime.utcnow(),
|
||
'access_frequency': 1
|
||
}
|
||
self.persistent_collection.insert_one(persistent_record)
|
||
|
||
# Store embedding in PostgreSQL
|
||
with self.pg_conn.cursor() as cur:
|
||
if self.has_vector:
|
||
cur.execute("""
|
||
INSERT INTO knowledge_embeddings
|
||
(fact_id, content, category, embedding, confidence, source_repos)
|
||
VALUES (%s, %s, %s, %s, %s, %s)
|
||
""", (fact_id, content, category, embedding, confidence, source_repos))
|
||
else:
|
||
cur.execute("""
|
||
INSERT INTO knowledge_embeddings
|
||
(fact_id, content, category, confidence, source_repos)
|
||
VALUES (%s, %s, %s, %s, %s)
|
||
""", (fact_id, content, category, confidence, source_repos))
|
||
self.pg_conn.commit()
|
||
|
||
self.logger.info(f"Persistent memory stored: {fact_id}")
|
||
return fact_id
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Persistent memory storage failed: {e}")
|
||
return ""
|
||
|
||
async def retrieve_persistent_memories(self, query: str, category: str = "",
|
||
limit: int = 20, similarity_threshold: float = 0.6) -> List[Dict]:
|
||
"""Retrieve relevant persistent knowledge."""
|
||
try:
|
||
query_embedding = self.generate_embedding(query)
|
||
|
||
with self.pg_conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||
# Check if table exists first
|
||
cur.execute("""
|
||
SELECT EXISTS (
|
||
SELECT FROM information_schema.tables
|
||
WHERE table_name = 'knowledge_embeddings'
|
||
);
|
||
""")
|
||
table_exists = cur.fetchone()[0]
|
||
|
||
if not table_exists:
|
||
self.logger.warning("knowledge_embeddings table does not exist, returning empty results")
|
||
return []
|
||
|
||
# Build WHERE clause dynamically
|
||
if hasattr(self, 'has_vector') and self.has_vector:
|
||
where_conditions = ["1 - (embedding <=> %s::vector) > %s"]
|
||
params = [query_embedding, similarity_threshold]
|
||
else:
|
||
# Fallback to text-based search
|
||
where_conditions = ["content ILIKE %s"]
|
||
params = [f"%{query}%"]
|
||
|
||
if category:
|
||
where_conditions.append("category = %s")
|
||
params.append(category)
|
||
|
||
where_clause = " AND ".join(where_conditions)
|
||
params.extend([limit])
|
||
|
||
if hasattr(self, 'has_vector') and self.has_vector:
|
||
cur.execute(f"""
|
||
SELECT fact_id, content, category, confidence, source_repos,
|
||
1 - (embedding <=> %s::vector) as similarity,
|
||
created_at, last_accessed, access_frequency
|
||
FROM knowledge_embeddings
|
||
WHERE {where_clause}
|
||
ORDER BY similarity DESC, confidence DESC, access_frequency DESC
|
||
LIMIT %s
|
||
""", params)
|
||
else:
|
||
cur.execute(f"""
|
||
SELECT fact_id, content, category, confidence, source_repos,
|
||
0.8 as similarity,
|
||
created_at, last_accessed, access_frequency
|
||
FROM knowledge_embeddings
|
||
WHERE {where_clause}
|
||
ORDER BY confidence DESC, access_frequency DESC
|
||
LIMIT %s
|
||
""", params)
|
||
|
||
results = cur.fetchall()
|
||
|
||
# Update access frequency
|
||
for result in results:
|
||
cur.execute("""
|
||
UPDATE knowledge_embeddings
|
||
SET last_accessed = CURRENT_TIMESTAMP,
|
||
access_frequency = access_frequency + 1
|
||
WHERE fact_id = %s
|
||
""", (result['fact_id'],))
|
||
|
||
self.pg_conn.commit()
|
||
return [dict(result) for result in results]
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Persistent memory retrieval failed: {e}")
|
||
return []
|
||
|
||
async def store_code_analysis(self, repo_id: str, file_path: str,
|
||
analysis_data: Dict[str, Any]) -> str:
|
||
"""Store code analysis with embeddings for future retrieval."""
|
||
try:
|
||
content_hash = self.calculate_content_hash(json.dumps(analysis_data, sort_keys=True))
|
||
|
||
# Create searchable content for embedding
|
||
searchable_content = f"""
|
||
File: {file_path}
|
||
Language: {analysis_data.get('language', 'Unknown')}
|
||
Issues: {' '.join(analysis_data.get('issues_found', []))}
|
||
Recommendations: {' '.join(analysis_data.get('recommendations', []))}
|
||
Analysis: {analysis_data.get('detailed_analysis', '')}
|
||
"""
|
||
|
||
embedding = self.generate_embedding(searchable_content)
|
||
|
||
# Store in MongoDB
|
||
analysis_record = {
|
||
'repo_id': repo_id,
|
||
'file_path': file_path,
|
||
'content_hash': content_hash,
|
||
'analysis_data': analysis_data,
|
||
'created_at': datetime.utcnow(),
|
||
'last_accessed': datetime.utcnow(),
|
||
'access_count': 1
|
||
}
|
||
|
||
# Upsert to handle updates
|
||
self.analysis_collection.update_one(
|
||
{'repo_id': repo_id, 'file_path': file_path},
|
||
{'$set': analysis_record},
|
||
upsert=True
|
||
)
|
||
|
||
# Store embedding in PostgreSQL
|
||
with self.pg_conn.cursor() as cur:
|
||
if self.has_vector:
|
||
cur.execute("""
|
||
INSERT INTO code_embeddings (repo_id, file_path, content_hash, embedding, metadata)
|
||
VALUES (%s, %s, %s, %s, %s)
|
||
ON CONFLICT (repo_id, file_path, content_hash)
|
||
DO UPDATE SET last_accessed = CURRENT_TIMESTAMP
|
||
""", (
|
||
repo_id, file_path, content_hash, embedding,
|
||
json.dumps({
|
||
'language': analysis_data.get('language'),
|
||
'lines_of_code': analysis_data.get('lines_of_code', 0),
|
||
'severity_score': analysis_data.get('severity_score', 5.0)
|
||
})
|
||
))
|
||
else:
|
||
cur.execute("""
|
||
INSERT INTO code_embeddings (repo_id, file_path, content_hash, embedding_text, metadata)
|
||
VALUES (%s, %s, %s, %s, %s)
|
||
ON CONFLICT (repo_id, file_path, content_hash)
|
||
DO UPDATE SET last_accessed = CURRENT_TIMESTAMP
|
||
""", (
|
||
repo_id, file_path, content_hash, json.dumps(embedding),
|
||
json.dumps({
|
||
'language': analysis_data.get('language'),
|
||
'lines_of_code': analysis_data.get('lines_of_code', 0),
|
||
'severity_score': analysis_data.get('severity_score', 5.0)
|
||
})
|
||
))
|
||
self.pg_conn.commit()
|
||
|
||
return content_hash
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Code analysis storage failed: {e}")
|
||
return ""
|
||
|
||
async def search_similar_code(self, query: str, repo_id: str = "",
|
||
limit: int = 10) -> List[Dict]:
|
||
"""Search for similar code analyses."""
|
||
try:
|
||
query_embedding = self.generate_embedding(query)
|
||
|
||
with self.pg_conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||
# Check if table exists first
|
||
cur.execute("""
|
||
SELECT EXISTS (
|
||
SELECT FROM information_schema.tables
|
||
WHERE table_name = 'code_embeddings'
|
||
);
|
||
""")
|
||
table_exists = cur.fetchone()[0]
|
||
|
||
if not table_exists:
|
||
self.logger.warning("code_embeddings table does not exist, returning empty results")
|
||
return []
|
||
|
||
where_clause = "WHERE 1=1"
|
||
params = [query_embedding]
|
||
|
||
if repo_id:
|
||
where_clause += " AND repo_id = %s"
|
||
params.append(repo_id)
|
||
|
||
params.append(limit)
|
||
|
||
cur.execute(f"""
|
||
SELECT repo_id, file_path, content_hash, metadata,
|
||
1 - (embedding <=> %s::vector) as similarity
|
||
FROM code_embeddings
|
||
{where_clause}
|
||
ORDER BY similarity DESC
|
||
LIMIT %s
|
||
""", params)
|
||
|
||
results = cur.fetchall()
|
||
|
||
# Fetch full analysis data from MongoDB
|
||
enriched_results = []
|
||
for result in results:
|
||
analysis = self.analysis_collection.find_one({
|
||
'repo_id': result['repo_id'],
|
||
'file_path': result['file_path']
|
||
})
|
||
if analysis:
|
||
analysis['similarity_score'] = float(result['similarity'])
|
||
enriched_results.append(analysis)
|
||
|
||
return enriched_results
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Similar code search failed: {e}")
|
||
return []
|
||
|
||
async def cleanup_old_memories(self):
|
||
"""Clean up old episodic memories and update access patterns."""
|
||
try:
|
||
cutoff_date = datetime.utcnow() - timedelta(days=self.episodic_retention_days)
|
||
|
||
# Clean up old episodic memories
|
||
result = self.episodic_collection.delete_many({
|
||
'timestamp': {'$lt': cutoff_date}
|
||
})
|
||
self.logger.info(f"Cleaned up {result.deleted_count} old episodic memories")
|
||
|
||
# Clean up corresponding query embeddings
|
||
with self.pg_conn.cursor() as cur:
|
||
cur.execute("DELETE FROM query_embeddings WHERE timestamp < %s", (cutoff_date,))
|
||
self.pg_conn.commit()
|
||
|
||
# Update persistent memory relevance based on access patterns
|
||
await self.update_persistent_memory_relevance()
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Memory cleanup failed: {e}")
|
||
|
||
async def update_persistent_memory_relevance(self):
|
||
"""Update relevance scores for persistent memories based on access patterns."""
|
||
try:
|
||
with self.pg_conn.cursor() as cur:
|
||
# Calculate relevance based on recency and frequency
|
||
cur.execute("""
|
||
UPDATE knowledge_embeddings
|
||
SET confidence = LEAST(confidence * (
|
||
CASE
|
||
WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - last_accessed)) / 86400 < 30
|
||
THEN 1.1
|
||
ELSE 0.95
|
||
END *
|
||
(1.0 + LOG(access_frequency + 1) / 10.0)
|
||
), 1.0)
|
||
""")
|
||
self.pg_conn.commit()
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Relevance update failed: {e}")
|
||
|
||
async def get_memory_stats(self) -> Dict[str, Any]:
|
||
"""Get comprehensive memory system statistics."""
|
||
try:
|
||
stats = {}
|
||
|
||
# Working memory stats (Redis)
|
||
working_keys = self.redis_client.keys("working:*")
|
||
stats['working_memory'] = {
|
||
'total_keys': len(working_keys),
|
||
'memory_usage': self.redis_client.info()['used_memory_human']
|
||
}
|
||
|
||
# Episodic memory stats (MongoDB)
|
||
stats['episodic_memory'] = {
|
||
'total_records': self.episodic_collection.count_documents({}),
|
||
'recent_interactions': self.episodic_collection.count_documents({
|
||
'timestamp': {'$gte': datetime.utcnow() - timedelta(days=7)}
|
||
})
|
||
}
|
||
|
||
# Persistent memory stats
|
||
stats['persistent_memory'] = {
|
||
'total_facts': self.persistent_collection.count_documents({}),
|
||
'high_confidence_facts': self.persistent_collection.count_documents({
|
||
'confidence': {'$gte': 0.8}
|
||
})
|
||
}
|
||
|
||
# Code analysis stats
|
||
stats['code_analysis'] = {
|
||
'total_analyses': self.analysis_collection.count_documents({}),
|
||
'unique_repositories': len(self.analysis_collection.distinct('repo_id'))
|
||
}
|
||
|
||
# Vector database stats (PostgreSQL)
|
||
with self.pg_conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||
cur.execute("SELECT COUNT(*) as count FROM code_embeddings")
|
||
code_embeddings_count = cur.fetchone()['count']
|
||
|
||
cur.execute("SELECT COUNT(*) as count FROM knowledge_embeddings")
|
||
knowledge_embeddings_count = cur.fetchone()['count']
|
||
|
||
stats['vector_database'] = {
|
||
'code_embeddings': code_embeddings_count,
|
||
'knowledge_embeddings': knowledge_embeddings_count
|
||
}
|
||
|
||
return stats
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Stats retrieval failed: {e}")
|
||
return {}
|
||
|
||
class MemoryQueryEngine:
|
||
"""Advanced querying capabilities across memory systems."""
|
||
|
||
def __init__(self, memory_manager: MemoryManager):
|
||
self.memory = memory_manager
|
||
|
||
async def intelligent_query(self, query: str, repo_context: str = "") -> Dict[str, Any]:
|
||
"""Intelligent cross-memory querying with relevance scoring."""
|
||
try:
|
||
# Multi-source memory retrieval
|
||
results = await asyncio.gather(
|
||
self.memory.retrieve_episodic_memories(query, repo_context, limit=5),
|
||
self.memory.retrieve_persistent_memories(query, limit=10),
|
||
self.memory.search_similar_code(query, repo_context, limit=5)
|
||
)
|
||
|
||
episodic_memories, persistent_knowledge, similar_code = results
|
||
|
||
# Relevance scoring and fusion
|
||
fused_response = self.fuse_memory_responses(
|
||
query, episodic_memories, persistent_knowledge, similar_code
|
||
)
|
||
|
||
return {
|
||
'query': query,
|
||
'fused_response': fused_response,
|
||
'sources': {
|
||
'episodic_count': len(episodic_memories),
|
||
'persistent_count': len(persistent_knowledge),
|
||
'similar_code_count': len(similar_code)
|
||
},
|
||
'confidence_score': self.calculate_response_confidence(fused_response),
|
||
'timestamp': datetime.utcnow()
|
||
}
|
||
|
||
except Exception as e:
|
||
self.memory.logger.error(f"Intelligent query failed: {e}")
|
||
return {'error': str(e)}
|
||
|
||
def fuse_memory_responses(self, query: str, episodic: List, persistent: List, code: List) -> str:
|
||
"""Fuse responses from different memory systems."""
|
||
response_parts = []
|
||
|
||
# Weight different memory types
|
||
if persistent:
|
||
high_conf_knowledge = [p for p in persistent if p.get('confidence', 0) > 0.8]
|
||
if high_conf_knowledge:
|
||
response_parts.append("Based on established knowledge:")
|
||
for knowledge in high_conf_knowledge[:3]:
|
||
response_parts.append(f"• {knowledge['content']}")
|
||
|
||
if episodic:
|
||
recent_interactions = sorted(episodic, key=lambda x: x.get('timestamp', datetime.min), reverse=True)[:2]
|
||
if recent_interactions:
|
||
response_parts.append("\nFrom previous interactions:")
|
||
for interaction in recent_interactions:
|
||
response_parts.append(f"• {interaction.get('ai_response', '')[:200]}...")
|
||
|
||
if code:
|
||
similar_patterns = [c for c in code if c.get('similarity_score', 0) > 0.7]
|
||
if similar_patterns:
|
||
response_parts.append("\nSimilar code patterns found:")
|
||
for pattern in similar_patterns[:2]:
|
||
issues = pattern.get('analysis_data', {}).get('issues_found', [])
|
||
if issues:
|
||
response_parts.append(f"• {pattern['file_path']}: {issues[0]}")
|
||
|
||
return '\n'.join(response_parts) if response_parts else "No relevant memories found."
|
||
|
||
def calculate_response_confidence(self, response: str) -> float:
|
||
"""Calculate confidence score for fused response."""
|
||
if not response or response == "No relevant memories found.":
|
||
return 0.0
|
||
|
||
# Simple confidence calculation based on response length and structure
|
||
confidence = min(len(response.split()) / 100.0, 1.0) # Normalize by word count
|
||
if "Based on established knowledge:" in response:
|
||
confidence += 0.2
|
||
if "From previous interactions:" in response:
|
||
confidence += 0.1
|
||
if "Similar code patterns found:" in response:
|
||
confidence += 0.15
|
||
|
||
return min(confidence, 1.0)
|
||
|
||
class EnhancedGitHubAnalyzer:
|
||
"""Enhanced repository analyzer with memory capabilities and parallel processing."""
|
||
|
||
def __init__(self, api_key: str, memory_config: Dict[str, Any]):
|
||
self.client = anthropic.Anthropic(api_key=api_key)
|
||
self.memory_manager = MemoryManager(memory_config)
|
||
self.query_engine = MemoryQueryEngine(self.memory_manager)
|
||
self.session_id = str(uuid.uuid4())
|
||
self.temp_dir = None
|
||
|
||
# Performance optimization settings
|
||
self.max_workers = memory_config.get('max_workers', 10) # Parallel processing
|
||
self.batch_size = memory_config.get('batch_size', 10) # OPTIMIZED: Batch processing (REDUCED from 20 to 10)
|
||
self.cache_ttl = memory_config.get('cache_ttl', 3600) # Cache TTL
|
||
self.max_file_size = memory_config.get('max_file_size', 0) # No file size limit (0 = unlimited)
|
||
|
||
# Language mapping for file detection
|
||
self.language_map = {
|
||
'.py': 'Python', '.js': 'JavaScript', '.ts': 'TypeScript',
|
||
'.tsx': 'TypeScript', '.jsx': 'JavaScript', '.java': 'Java',
|
||
'.cpp': 'C++', '.c': 'C', '.cs': 'C#', '.go': 'Go', '.rs': 'Rust',
|
||
'.php': 'PHP', '.rb': 'Ruby', '.swift': 'Swift', '.kt': 'Kotlin',
|
||
'.html': 'HTML', '.css': 'CSS', '.scss': 'SCSS', '.sass': 'SASS',
|
||
'.sql': 'SQL', '.yaml': 'YAML', '.yml': 'YAML', '.json': 'JSON',
|
||
'.xml': 'XML', '.sh': 'Shell', '.dockerfile': 'Docker',
|
||
'.md': 'Markdown', '.txt': 'Text'
|
||
}
|
||
|
||
# Code file extensions to analyze
|
||
self.code_extensions = set(self.language_map.keys())
|
||
|
||
async def analyze_files_parallel(self, files_to_analyze: List[Tuple[Path, str]], repo_id: str) -> List[FileAnalysis]:
|
||
"""Analyze files in parallel batches for better performance."""
|
||
file_analyses = []
|
||
|
||
# Process files in batches
|
||
for i in range(0, len(files_to_analyze), self.batch_size):
|
||
batch = files_to_analyze[i:i + self.batch_size]
|
||
print(f"Processing batch {i//self.batch_size + 1}/{(len(files_to_analyze) + self.batch_size - 1)//self.batch_size} ({len(batch)} files)")
|
||
|
||
# Create tasks for parallel execution
|
||
tasks = []
|
||
for file_path, content in batch:
|
||
# Process all files regardless of size (no file size limit)
|
||
task = self.analyze_file_with_memory(file_path, content, repo_id)
|
||
tasks.append(task)
|
||
|
||
# Execute batch in parallel
|
||
if tasks:
|
||
batch_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||
|
||
# Process results
|
||
for j, result in enumerate(batch_results):
|
||
if isinstance(result, Exception):
|
||
print(f"Error analyzing file {batch[j][0].name}: {result}")
|
||
# Create a basic analysis for failed files
|
||
failed_analysis = FileAnalysis(
|
||
path=str(batch[j][0]),
|
||
language=self.detect_language(batch[j][0]),
|
||
lines_of_code=len(batch[j][1].splitlines()),
|
||
severity_score=5.0,
|
||
issues_found=[f"Analysis failed: {str(result)}"],
|
||
recommendations=["Review this file manually"]
|
||
)
|
||
file_analyses.append(failed_analysis)
|
||
else:
|
||
file_analyses.append(result)
|
||
|
||
# Small delay between batches to avoid overwhelming the API
|
||
await asyncio.sleep(0.5)
|
||
|
||
return file_analyses
|
||
|
||
def clone_repository(self, repo_path: str) -> str:
|
||
"""Clone repository or use existing path."""
|
||
if os.path.exists(repo_path):
|
||
print(f"Using existing repository: {repo_path}")
|
||
return repo_path
|
||
else:
|
||
print(f"Cloning repository: {repo_path}")
|
||
self.temp_dir = tempfile.mkdtemp(prefix="repo_analysis_")
|
||
try:
|
||
git.Repo.clone_from(repo_path, self.temp_dir)
|
||
return self.temp_dir
|
||
except Exception as e:
|
||
raise Exception(f"Failed to clone repository: {e}")
|
||
|
||
def calculate_repo_id(self, repo_path: str) -> str:
|
||
"""Generate consistent repository ID."""
|
||
return hashlib.sha256(repo_path.encode()).hexdigest()[:16]
|
||
|
||
def get_file_language(self, file_path: Path) -> str:
|
||
"""Get programming language from file extension."""
|
||
return self.language_map.get(file_path.suffix.lower(), 'Unknown')
|
||
|
||
def calculate_complexity_score(self, content: str) -> float:
|
||
"""Calculate basic complexity score based on code patterns."""
|
||
lines = content.split('\n')
|
||
complexity_indicators = ['if', 'else', 'elif', 'for', 'while', 'try', 'except', 'catch', 'switch']
|
||
|
||
complexity = 1
|
||
for line in lines:
|
||
line_lower = line.lower().strip()
|
||
for indicator in complexity_indicators:
|
||
if indicator in line_lower:
|
||
complexity += 1
|
||
|
||
# Normalize to 1-10 scale
|
||
return min(complexity / max(len(lines), 1) * 100, 10.0)
|
||
|
||
async def analyze_file_with_memory(self, file_path: Path, content: str, repo_id: str) -> FileAnalysis:
|
||
"""Analyze file with memory-enhanced context."""
|
||
language = self.get_file_language(file_path)
|
||
lines_of_code = len([line for line in content.split('\n') if line.strip()])
|
||
complexity_score = self.calculate_complexity_score(content)
|
||
|
||
# Skip memory operations for faster analysis
|
||
similar_analyses = []
|
||
persistent_knowledge = []
|
||
|
||
# Build enhanced context for analysis
|
||
context_info = ""
|
||
if similar_analyses:
|
||
context_info += f"\nSimilar files previously analyzed:\n"
|
||
for similar in similar_analyses[:2]:
|
||
context_info += f"- {similar['file_path']}: Found {len(similar.get('analysis_data', {}).get('issues_found', []))} issues\n"
|
||
|
||
if persistent_knowledge:
|
||
context_info += f"\nRelevant best practices:\n"
|
||
for knowledge in persistent_knowledge[:3]:
|
||
context_info += f"- {knowledge['content'][:100]}...\n"
|
||
|
||
# Truncate content if too long
|
||
if len(content) > 4000:
|
||
content = content[:4000] + "\n... [truncated for analysis]"
|
||
|
||
print(f" Analyzing {file_path.name} ({language}, {lines_of_code} lines)")
|
||
|
||
# Create comprehensive analysis prompt with memory context
|
||
prompt = f"""
|
||
You are a senior software engineer with 25+ years of experience. Analyze this {language} code file with context from previous analyses.
|
||
|
||
FILENAME: {file_path.name}
|
||
LANGUAGE: {language}
|
||
LINES OF CODE: {lines_of_code}
|
||
|
||
{context_info}
|
||
|
||
CODE:
|
||
```{language.lower()}
|
||
{content}
|
||
```
|
||
|
||
Provide a comprehensive analysis covering:
|
||
|
||
1. ISSUES FOUND: List at least 5-10 specific problems, bugs, security vulnerabilities, or code smells (be thorough and detailed)
|
||
2. RECOMMENDATIONS: Provide at least 5-10 actionable suggestions for improvement
|
||
3. CODE QUALITY: Overall assessment of code quality and maintainability
|
||
4. SECURITY: Any security concerns or vulnerabilities
|
||
5. PERFORMANCE: Potential performance issues or optimizations
|
||
6. BEST PRACTICES: Adherence to coding standards and best practices
|
||
|
||
IMPORTANT: For ISSUES FOUND, please list multiple specific issues (not just 1-3). Be comprehensive.
|
||
Rate the overall code quality from 1-10 where 10 is excellent.
|
||
|
||
ANALYSIS:
|
||
"""
|
||
|
||
try:
|
||
message = self.client.messages.create(
|
||
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
|
||
max_tokens=3000,
|
||
temperature=0.1,
|
||
messages=[{"role": "user", "content": prompt}]
|
||
)
|
||
|
||
analysis_text = message.content[0].text.strip()
|
||
|
||
# Extract severity score from analysis
|
||
severity_match = re.search(r'(\d+(?:\.\d+)?)/10', analysis_text)
|
||
severity_score = float(severity_match.group(1)) if severity_match else 5.0
|
||
|
||
# Parse issues and recommendations from the text
|
||
issues = self.extract_issues_from_analysis(analysis_text)
|
||
recommendations = self.extract_recommendations_from_analysis(analysis_text)
|
||
|
||
# Create file analysis object
|
||
file_analysis = FileAnalysis(
|
||
path=str(file_path.relative_to(Path(self.temp_dir or '.'))),
|
||
language=language,
|
||
lines_of_code=lines_of_code,
|
||
complexity_score=complexity_score,
|
||
issues_found=issues,
|
||
recommendations=recommendations,
|
||
detailed_analysis=analysis_text,
|
||
severity_score=severity_score,
|
||
content=content # Store actual file content for code examples
|
||
)
|
||
|
||
# Skip memory operations for faster analysis
|
||
# await self.memory_manager.store_code_analysis(
|
||
# repo_id, str(file_analysis.path), asdict(file_analysis)
|
||
# )
|
||
|
||
# await self.extract_knowledge_from_analysis(file_analysis, repo_id)
|
||
|
||
return file_analysis
|
||
|
||
except Exception as e:
|
||
print(f" Error analyzing {file_path.name}: {e}")
|
||
return FileAnalysis(
|
||
path=str(file_path),
|
||
language=language,
|
||
lines_of_code=lines_of_code,
|
||
complexity_score=complexity_score,
|
||
issues_found=[f"Analysis failed: {str(e)}"],
|
||
recommendations=["Review file manually due to analysis error"],
|
||
detailed_analysis=f"Analysis failed due to error: {str(e)}",
|
||
severity_score=5.0,
|
||
content=content # Store content even on error
|
||
)
|
||
|
||
async def analyze_files_batch(self, combined_prompt: str) -> str:
|
||
"""Analyze multiple files in a single API call for smart batching."""
|
||
try:
|
||
print(f"🚀 [BATCH API] Making single API call for multiple files")
|
||
|
||
# Make single API call to Claude
|
||
message = self.client.messages.create(
|
||
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
|
||
max_tokens=4000, # Increased for multi-file response
|
||
temperature=0.1,
|
||
messages=[{"role": "user", "content": combined_prompt}]
|
||
)
|
||
|
||
response_text = message.content[0].text.strip()
|
||
print(f"✅ [BATCH API] Received response for multiple files")
|
||
|
||
return response_text
|
||
|
||
except Exception as e:
|
||
print(f"❌ [BATCH API] Error in batch analysis: {e}")
|
||
raise e
|
||
|
||
def extract_issues_from_analysis(self, analysis_text: str) -> List[str]:
|
||
"""Extract issues from analysis text."""
|
||
issues = []
|
||
lines = analysis_text.split('\n')
|
||
|
||
# Look for common issue indicators
|
||
issue_keywords = ['issue', 'problem', 'bug', 'vulnerability', 'error', 'warning', 'concern', 'risk', 'flaw', 'weakness', 'deficiency', 'smell']
|
||
|
||
# Also check for numbered/bulleted lists
|
||
numbered_pattern = re.compile(r'^\d+[\.\)]\s*(.+)')
|
||
bullet_pattern = re.compile(r'^[-•*]\s*(.+)')
|
||
|
||
for line in lines:
|
||
line_lower = line.lower().strip()
|
||
|
||
# Check if line contains issue keywords
|
||
if any(keyword in line_lower for keyword in issue_keywords):
|
||
if line.strip() and not line.strip().startswith('#'):
|
||
# Clean up the line
|
||
cleaned_line = line.strip()
|
||
# Remove common prefixes if present
|
||
cleaned_line = re.sub(r'^(ISSUES? FOUND:|PROBLEMS?:|BUGS?:)\s*', '', cleaned_line, flags=re.IGNORECASE)
|
||
if cleaned_line and len(cleaned_line) > 10: # Filter out very short lines
|
||
issues.append(cleaned_line)
|
||
|
||
# Also check for numbered or bulleted lines (these are often issue lists)
|
||
numbered_match = numbered_pattern.match(line)
|
||
bullet_match = bullet_pattern.match(line)
|
||
|
||
if numbered_match or bullet_match:
|
||
content = (numbered_match or bullet_match).group(1).strip()
|
||
if content and len(content) > 10:
|
||
# Check if it looks like an issue description
|
||
if any(keyword in content.lower() for keyword in issue_keywords):
|
||
issues.append(content)
|
||
|
||
# Remove duplicates while preserving order
|
||
seen = set()
|
||
unique_issues = []
|
||
for issue in issues:
|
||
issue_lower = issue.lower()
|
||
if issue_lower not in seen:
|
||
seen.add(issue_lower)
|
||
unique_issues.append(issue)
|
||
|
||
return unique_issues[:15] # Return up to 15 issues
|
||
|
||
def extract_recommendations_from_analysis(self, analysis_text: str) -> List[str]:
|
||
"""Extract recommendations from analysis text."""
|
||
recommendations = []
|
||
lines = analysis_text.split('\n')
|
||
|
||
# Look for recommendation indicators
|
||
rec_keywords = ['recommend', 'suggest', 'should', 'consider', 'improve', 'implement', 'add', 'refactor', 'optimize', 'enhance']
|
||
|
||
# Also check for numbered/bulleted lists
|
||
numbered_pattern = re.compile(r'^\d+[\.\)]\s*(.+)')
|
||
bullet_pattern = re.compile(r'^[-•*]\s*(.+)')
|
||
|
||
for line in lines:
|
||
line_lower = line.lower().strip()
|
||
|
||
# Check if line contains recommendation keywords
|
||
if any(keyword in line_lower for keyword in rec_keywords):
|
||
if line.strip() and not line.strip().startswith('#'):
|
||
# Clean up the line
|
||
cleaned_line = line.strip()
|
||
# Remove common prefixes if present
|
||
cleaned_line = re.sub(r'^(RECOMMENDATIONS?:|SUGGESTIONS?:)\s*', '', cleaned_line, flags=re.IGNORECASE)
|
||
if cleaned_line and len(cleaned_line) > 10: # Filter out very short lines
|
||
recommendations.append(cleaned_line)
|
||
|
||
# Also check for numbered or bulleted lines
|
||
numbered_match = numbered_pattern.match(line)
|
||
bullet_match = bullet_pattern.match(line)
|
||
|
||
if numbered_match or bullet_match:
|
||
content = (numbered_match or bullet_match).group(1).strip()
|
||
if content and len(content) > 10:
|
||
# Check if it looks like a recommendation
|
||
if any(keyword in content.lower() for keyword in rec_keywords):
|
||
recommendations.append(content)
|
||
|
||
# Remove duplicates while preserving order
|
||
seen = set()
|
||
unique_recommendations = []
|
||
for rec in recommendations:
|
||
rec_lower = rec.lower()
|
||
if rec_lower not in seen:
|
||
seen.add(rec_lower)
|
||
unique_recommendations.append(rec)
|
||
|
||
return unique_recommendations[:15] # Return up to 15 recommendations
|
||
|
||
async def extract_knowledge_from_analysis(self, file_analysis: FileAnalysis, repo_id: str):
|
||
"""Extract valuable knowledge from analysis for persistent storage."""
|
||
try:
|
||
# Extract security-related knowledge
|
||
security_issues = []
|
||
if isinstance(file_analysis.issues_found, (list, tuple)):
|
||
security_issues = [issue for issue in file_analysis.issues_found
|
||
if any(sec in issue.lower() for sec in ['security', 'vulnerability', 'injection', 'xss', 'auth'])]
|
||
|
||
for issue in security_issues:
|
||
await self.memory_manager.store_persistent_memory(
|
||
content=f"Security issue in {file_analysis.language}: {issue}",
|
||
category='security_vulnerability',
|
||
confidence=0.8,
|
||
source_repos=[repo_id]
|
||
)
|
||
|
||
# Extract best practices
|
||
best_practices = []
|
||
if isinstance(file_analysis.recommendations, (list, tuple)):
|
||
best_practices = [rec for rec in file_analysis.recommendations
|
||
if any(bp in rec.lower() for bp in ['best practice', 'standard', 'convention'])]
|
||
|
||
for practice in best_practices:
|
||
await self.memory_manager.store_persistent_memory(
|
||
content=f"{file_analysis.language} best practice: {practice}",
|
||
category='best_practice',
|
||
confidence=0.7,
|
||
source_repos=[repo_id]
|
||
)
|
||
|
||
# Extract code patterns
|
||
if file_analysis.severity_score < 5:
|
||
await self.memory_manager.store_persistent_memory(
|
||
content=f"Low quality {file_analysis.language} pattern: {file_analysis.detailed_analysis[:200]}",
|
||
category='code_pattern',
|
||
confidence=0.6,
|
||
source_repos=[repo_id]
|
||
)
|
||
|
||
except Exception as e:
|
||
self.memory_manager.logger.error(f"Knowledge extraction failed: {e}")
|
||
|
||
def scan_repository(self, repo_path: str) -> List[Tuple[Path, str]]:
|
||
"""Scan repository and collect ALL files for analysis."""
|
||
print(f"Scanning repository: {repo_path}")
|
||
|
||
files_to_analyze = []
|
||
|
||
# Important files to always include (exclude auto-generated lock files)
|
||
important_files = {
|
||
'README.md', 'package.json', 'requirements.txt', 'Dockerfile',
|
||
'docker-compose.yml', 'tsconfig.json', 'next.config.js',
|
||
'tailwind.config.js', 'webpack.config.js', '.env.example',
|
||
'Cargo.toml', 'pom.xml', 'build.gradle', 'composer.json',
|
||
'Gemfile', 'go.mod'
|
||
}
|
||
|
||
for root, dirs, files in os.walk(repo_path):
|
||
# Skip common build/cache directories
|
||
dirs[:] = [d for d in dirs if not d.startswith('.') and
|
||
d not in {'node_modules', '__pycache__', 'build', 'dist', 'target',
|
||
'venv', 'env', '.git', '.next', 'coverage', 'vendor',
|
||
'bower_components', '.gradle', '.m2', '.cargo'}]
|
||
|
||
for file in files:
|
||
file_path = Path(root) / file
|
||
|
||
# Skip auto-generated files that are meaningless for code quality analysis
|
||
if file.lower() in ['package-lock.json', 'yarn.lock', 'composer.lock', 'pnpm-lock.yaml']:
|
||
continue
|
||
|
||
# Skip large files (increased limit for comprehensive analysis)
|
||
try:
|
||
if file_path.stat().st_size > 2000000: # 2MB limit
|
||
print(f" Skipping large file: {file_path.name} ({file_path.stat().st_size / 1024 / 1024:.1f}MB)")
|
||
continue
|
||
except:
|
||
continue
|
||
|
||
# Include important files or files with code extensions
|
||
should_include = (
|
||
file.lower() in important_files or
|
||
file_path.suffix.lower() in self.code_extensions or
|
||
file.lower().startswith('dockerfile') or
|
||
file.lower().startswith('makefile') or
|
||
file.lower().startswith('cmake')
|
||
)
|
||
|
||
if should_include:
|
||
try:
|
||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||
content = f.read()
|
||
if content.strip(): # Only non-empty files
|
||
files_to_analyze.append((file_path, content))
|
||
except Exception as e:
|
||
print(f"Could not read {file_path}: {e}")
|
||
|
||
print(f"Found {len(files_to_analyze)} files to analyze")
|
||
return files_to_analyze
|
||
|
||
async def analyze_repository_with_memory(self, repo_path: str) -> RepositoryAnalysis:
|
||
"""Main analysis function with memory integration - analyzes ALL files."""
|
||
try:
|
||
# Generate repo ID and check for cached analysis
|
||
repo_id = self.calculate_repo_id(repo_path)
|
||
|
||
# Check working memory for recent analysis
|
||
cached_analysis = await self.memory_manager.get_working_memory(f"repo_analysis:{repo_id}")
|
||
if cached_analysis:
|
||
print("Using cached repository analysis from memory")
|
||
return RepositoryAnalysis(**cached_analysis)
|
||
|
||
# Clone/access repository
|
||
actual_repo_path = self.clone_repository(repo_path)
|
||
|
||
# Get analysis context from memory (no user query needed)
|
||
context_memories = await self.get_analysis_context(repo_path, "", repo_id)
|
||
|
||
# Scan ALL files
|
||
files_to_analyze = self.scan_repository(actual_repo_path)
|
||
|
||
if not files_to_analyze:
|
||
raise Exception("No files found to analyze")
|
||
|
||
# Analyze files with parallel processing for better performance
|
||
print(f"Starting comprehensive analysis of {len(files_to_analyze)} files with parallel processing...")
|
||
file_analyses = await self.analyze_files_parallel(files_to_analyze, repo_id)
|
||
|
||
# Repository-level analyses with memory context
|
||
print("Performing repository-level analysis with memory context...")
|
||
architecture_assessment, security_assessment = await self.analyze_repository_overview_with_memory(
|
||
actual_repo_path, file_analyses, context_memories, repo_id
|
||
)
|
||
|
||
# Calculate overall quality score safely
|
||
if file_analyses and len(file_analyses) > 0:
|
||
valid_scores = [fa.severity_score for fa in file_analyses if fa.severity_score is not None]
|
||
avg_quality = sum(valid_scores) / len(valid_scores) if valid_scores else 5.0
|
||
else:
|
||
avg_quality = 5.0
|
||
|
||
# Generate statistics
|
||
languages = dict(Counter(fa.language for fa in file_analyses))
|
||
total_lines = sum(fa.lines_of_code for fa in file_analyses)
|
||
|
||
# Create repository analysis
|
||
repo_analysis = RepositoryAnalysis(
|
||
repo_path=repo_path,
|
||
total_files=len(file_analyses),
|
||
total_lines=total_lines,
|
||
languages=languages,
|
||
architecture_assessment=architecture_assessment,
|
||
security_assessment=security_assessment,
|
||
code_quality_score=avg_quality,
|
||
file_analyses=file_analyses,
|
||
executive_summary=""
|
||
)
|
||
|
||
# Generate executive summary with memory context
|
||
print("Generating memory-enhanced executive summary...")
|
||
repo_analysis.executive_summary = await self.generate_executive_summary_with_memory(
|
||
repo_analysis, context_memories
|
||
)
|
||
|
||
# Store analysis in episodic memory (automated analysis)
|
||
await self.memory_manager.store_episodic_memory(
|
||
self.session_id, "Complete automated repository analysis",
|
||
f"Analyzed {repo_analysis.total_files} files, found {sum(len(fa.issues_found) for fa in file_analyses)} issues",
|
||
repo_id,
|
||
{
|
||
'repo_path': repo_path,
|
||
'quality_score': avg_quality,
|
||
'total_issues': sum(len(fa.issues_found) for fa in file_analyses),
|
||
'analysis_type': 'automated_comprehensive'
|
||
}
|
||
)
|
||
|
||
# Cache analysis in working memory
|
||
await self.memory_manager.store_working_memory(
|
||
f"repo_analysis:{repo_id}",
|
||
asdict(repo_analysis),
|
||
ttl=7200 # 2 hours
|
||
)
|
||
|
||
return repo_analysis
|
||
|
||
finally:
|
||
# Cleanup
|
||
if self.temp_dir and os.path.exists(self.temp_dir):
|
||
shutil.rmtree(self.temp_dir)
|
||
print("Temporary files cleaned up")
|
||
|
||
async def get_analysis_context(self, repo_path: str, user_query: str, repo_id: str) -> Dict[str, List]:
|
||
"""Gather relevant context from memory systems."""
|
||
context = {
|
||
'episodic_memories': [],
|
||
'persistent_knowledge': [],
|
||
'similar_analyses': []
|
||
}
|
||
|
||
# Get relevant persistent knowledge for comprehensive analysis
|
||
context['persistent_knowledge'] = await self.memory_manager.retrieve_persistent_memories(
|
||
"code quality security best practices", limit=15
|
||
)
|
||
|
||
# Find similar code analyses
|
||
context['similar_analyses'] = await self.memory_manager.search_similar_code(
|
||
"repository analysis", repo_id, limit=10
|
||
)
|
||
|
||
return context
|
||
|
||
async def analyze_repository_overview_with_memory(self, repo_path: str, file_analyses: List[FileAnalysis],
|
||
context_memories: Dict, repo_id: str) -> Tuple[str, str]:
|
||
"""Analyze repository architecture and security with memory context."""
|
||
print("Analyzing repository overview with memory context...")
|
||
|
||
# Prepare summary data
|
||
languages = dict(Counter(fa.language for fa in file_analyses))
|
||
total_lines = sum(fa.lines_of_code for fa in file_analyses)
|
||
# Calculate average quality safely
|
||
if file_analyses and len(file_analyses) > 0:
|
||
valid_scores = [fa.severity_score for fa in file_analyses if fa.severity_score is not None]
|
||
avg_quality = sum(valid_scores) / len(valid_scores) if valid_scores else 5.0
|
||
else:
|
||
avg_quality = 5.0
|
||
|
||
# Build memory context
|
||
memory_context = ""
|
||
if context_memories['persistent_knowledge']:
|
||
memory_context += "Relevant knowledge from previous analyses:\n"
|
||
for knowledge in context_memories['persistent_knowledge'][:3]:
|
||
memory_context += f"- {knowledge['content']}\n"
|
||
|
||
if context_memories['similar_analyses']:
|
||
memory_context += "\nSimilar repositories analyzed:\n"
|
||
for similar in context_memories['similar_analyses'][:2]:
|
||
memory_context += f"- {similar['file_path']}: {len(similar.get('analysis_data', {}).get('issues_found', []))} issues found\n"
|
||
|
||
# Get repository structure
|
||
structure_lines = []
|
||
try:
|
||
for root, dirs, files in os.walk(repo_path):
|
||
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in {'node_modules', '__pycache__'}]
|
||
level = root.replace(repo_path, '').count(os.sep)
|
||
indent = ' ' * level
|
||
structure_lines.append(f"{indent}{os.path.basename(root)}/")
|
||
for file in files[:3]: # Limit files shown per directory
|
||
structure_lines.append(f"{indent} {file}")
|
||
if len(structure_lines) > 50: # Limit total structure size
|
||
break
|
||
except Exception as e:
|
||
structure_lines = [f"Error reading structure: {e}"]
|
||
|
||
# Architecture analysis with memory context
|
||
arch_prompt = f"""
|
||
You are a Senior Software Architect with 25+ years of experience analyzing enterprise systems.
|
||
|
||
{memory_context}
|
||
|
||
Analyze this repository:
|
||
|
||
REPOSITORY STRUCTURE:
|
||
{chr(10).join(structure_lines[:30])}
|
||
|
||
STATISTICS:
|
||
- Total files analyzed: {len(file_analyses)}
|
||
- Total lines of code: {total_lines:,}
|
||
- Languages: {languages}
|
||
- Average code quality: {avg_quality:.1f}/10
|
||
- Large files (>500 lines): {len([fa for fa in file_analyses if fa.lines_of_code > 500])}
|
||
- Critical files (score < 4): {len([fa for fa in file_analyses if fa.severity_score < 4])}
|
||
|
||
TOP FILE ISSUES:
|
||
{chr(10).join([f"- {fa.path}: {len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0} issues, {fa.lines_of_code} lines, quality: {fa.severity_score:.1f}/10" for fa in file_analyses[:15]])}
|
||
|
||
Provide a comprehensive architectural assessment following this structure:
|
||
|
||
**1. PROJECT TYPE AND PURPOSE:**
|
||
- What type of application/system is this?
|
||
- What is its primary business purpose?
|
||
- What technology stack is being used?
|
||
|
||
**2. TECHNOLOGY STACK EVALUATION:**
|
||
- Good technology choices and why they work well
|
||
- Problematic technology choices and their issues
|
||
- Recommended technology upgrades and migrations
|
||
|
||
**3. CODE ORGANIZATION AND STRUCTURE:**
|
||
- How is the codebase organized?
|
||
- Is the folder/file structure logical and maintainable?
|
||
- What architectural patterns are being used?
|
||
- What's missing in terms of organization?
|
||
|
||
**4. SCALABILITY AND MAINTAINABILITY CONCERNS:**
|
||
- Can this system handle growth and increased load?
|
||
- How difficult is it to maintain and extend?
|
||
- What are the specific scalability bottlenecks?
|
||
- What maintainability issues exist?
|
||
|
||
**5. KEY ARCHITECTURAL RECOMMENDATIONS:**
|
||
- Top 5-10 specific improvements needed
|
||
- Priority order for implementing changes
|
||
- Estimated effort and impact for each recommendation
|
||
|
||
Incorporate insights from the memory context provided above.
|
||
Keep response under 2000 words and focus on actionable insights with specific examples.
|
||
"""
|
||
|
||
# Security analysis with memory context
|
||
security_issues = []
|
||
for fa in file_analyses:
|
||
if isinstance(fa.issues_found, (list, tuple)):
|
||
security_issues.extend([issue for issue in fa.issues_found if
|
||
any(keyword in issue.lower() for keyword in
|
||
['security', 'vulnerability', 'injection', 'xss', 'auth', 'password'])])
|
||
|
||
sec_prompt = f"""
|
||
You are a Senior Security Engineer with 20+ years of experience in enterprise security.
|
||
|
||
{memory_context}
|
||
|
||
Security Analysis for repository with {len(file_analyses)} files:
|
||
|
||
SECURITY ISSUES FOUND:
|
||
{chr(10).join(security_issues[:20]) if security_issues else "No obvious security issues detected"}
|
||
|
||
HIGH-RISK FILE TYPES PRESENT:
|
||
{[lang for lang, count in languages.items() if lang in ['JavaScript', 'TypeScript', 'Python', 'PHP', 'SQL']]}
|
||
|
||
SECURITY-RELEVANT FILES:
|
||
{chr(10).join([f"- {fa.path}: {fa.lines_of_code} lines, issues: {len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0}" for fa in file_analyses if any(['auth' in str(fa.path).lower(), 'security' in str(fa.path).lower(), 'login' in str(fa.path).lower(), 'password' in str(fa.path).lower()])][:15])}
|
||
|
||
Provide a comprehensive security assessment following this structure:
|
||
|
||
**1. CRITICAL VULNERABILITIES:**
|
||
- List all critical security vulnerabilities found
|
||
- For each vulnerability, provide:
|
||
- Location (file and line numbers)
|
||
- Vulnerability type (SQL injection, XSS, CSRF, etc.)
|
||
- Evidence of the vulnerability
|
||
- Attack scenario and potential impact
|
||
- Specific fix recommendations
|
||
|
||
**2. AUTHENTICATION AND AUTHORIZATION:**
|
||
- How is user authentication implemented?
|
||
- What authorization mechanisms are in place?
|
||
- Are there any authentication bypass vulnerabilities?
|
||
- Are session management practices secure?
|
||
|
||
**3. DATA PROTECTION AND PRIVACY:**
|
||
- How is sensitive data handled and stored?
|
||
- Are there data encryption mechanisms in place?
|
||
- Are there any data exposure vulnerabilities?
|
||
- Is input validation properly implemented?
|
||
|
||
**4. COMMON VULNERABILITY PATTERNS:**
|
||
- SQL injection vulnerabilities
|
||
- Cross-site scripting (XSS) issues
|
||
- Cross-site request forgery (CSRF) vulnerabilities
|
||
- Insecure direct object references
|
||
- Security misconfigurations
|
||
|
||
**5. IMMEDIATE SECURITY ACTIONS REQUIRED:**
|
||
- Top 5 critical security fixes needed immediately
|
||
- Specific steps to remediate each issue
|
||
- Security best practices to implement
|
||
- Monitoring and detection improvements
|
||
|
||
Incorporate insights from the memory context provided above.
|
||
Keep response under 1500 words and focus on actionable security recommendations with specific code examples where possible.
|
||
"""
|
||
|
||
try:
|
||
# Run both analyses
|
||
arch_task = self.client.messages.create(
|
||
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
|
||
max_tokens=2500,
|
||
temperature=0.1,
|
||
messages=[{"role": "user", "content": arch_prompt}]
|
||
)
|
||
|
||
sec_task = self.client.messages.create(
|
||
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
|
||
max_tokens=2000,
|
||
temperature=0.1,
|
||
messages=[{"role": "user", "content": sec_prompt}]
|
||
)
|
||
|
||
architecture_assessment = arch_task.content[0].text
|
||
security_assessment = sec_task.content[0].text
|
||
|
||
# Store insights as persistent knowledge
|
||
await self.memory_manager.store_persistent_memory(
|
||
content=f"Architecture pattern: {architecture_assessment[:300]}...",
|
||
category='architecture',
|
||
confidence=0.7,
|
||
source_repos=[repo_id]
|
||
)
|
||
|
||
return architecture_assessment, security_assessment
|
||
|
||
except Exception as e:
|
||
return f"Architecture analysis failed: {e}", f"Security analysis failed: {e}"
|
||
|
||
async def generate_executive_summary_with_memory(self, analysis: RepositoryAnalysis, context_memories: Dict) -> str:
|
||
"""Generate comprehensive executive summary with enhanced business context."""
|
||
print("Generating enhanced executive summary with memory context...")
|
||
|
||
# Build memory context for executive summary
|
||
executive_context = ""
|
||
if context_memories.get('episodic_memories'):
|
||
executive_context += "Previous executive discussions:\n"
|
||
for memory in context_memories['episodic_memories'][:2]:
|
||
if 'executive' in memory.get('ai_response', '').lower():
|
||
executive_context += f"- {memory['ai_response'][:200]}...\n"
|
||
|
||
# Calculate critical metrics
|
||
critical_files = len([fa for fa in analysis.file_analyses if fa.severity_score < 4])
|
||
high_priority_files = len([fa for fa in analysis.file_analyses if 4 <= fa.severity_score < 6])
|
||
total_issues = sum(len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0 for fa in analysis.file_analyses)
|
||
large_files = len([fa for fa in analysis.file_analyses if fa.lines_of_code > 500])
|
||
security_issues = len([fa for fa in analysis.file_analyses if any('security' in str(issue).lower() for issue in (fa.issues_found if isinstance(fa.issues_found, (list, tuple)) else []))])
|
||
|
||
prompt = f"""
|
||
You are presenting to C-level executives about a critical technical assessment. Create a comprehensive executive summary.
|
||
|
||
{executive_context}
|
||
|
||
REPOSITORY METRICS:
|
||
- Total Files: {analysis.total_files}
|
||
- Lines of Code: {analysis.total_lines:,}
|
||
- Languages: {', '.join(list(analysis.languages.keys())[:5]) if analysis.languages else 'Unknown'}
|
||
- Code Quality Score: {analysis.code_quality_score:.1f}/10
|
||
|
||
CRITICAL FINDINGS:
|
||
- Total Issues Identified: {total_issues}
|
||
- Critical Files (Score < 4): {critical_files}
|
||
- High Priority Files (Score 4-6): {high_priority_files}
|
||
- Large Monolithic Files (>500 lines): {large_files}
|
||
- Security Vulnerabilities: {security_issues}
|
||
- High Quality Files (Score 8+): {len([fa for fa in analysis.file_analyses if fa.severity_score >= 8])}
|
||
|
||
Create a comprehensive executive summary covering:
|
||
|
||
1. **BUSINESS IMPACT OVERVIEW** (2-3 paragraphs):
|
||
- What this application/system does for the business
|
||
- How current technical debt is affecting business operations
|
||
- Specific business risks and their potential impact
|
||
|
||
2. **CRITICAL SYSTEM STATISTICS** (bullet points):
|
||
- Total issues and their business impact
|
||
- Largest problematic files affecting performance
|
||
- Security vulnerabilities requiring immediate attention
|
||
- Test coverage gaps affecting reliability
|
||
|
||
3. **KEY BUSINESS RISKS** (3-5 critical risks):
|
||
- System reliability and downtime risks
|
||
- Development velocity impact on revenue
|
||
- Security vulnerabilities and compliance risks
|
||
- Scalability limitations affecting growth
|
||
- Technical debt costs and competitive disadvantage
|
||
|
||
4. **FINANCIAL IMPACT ASSESSMENT**:
|
||
- Development velocity impact (percentage of time on fixes vs features)
|
||
- Technical debt cost estimation
|
||
- Infrastructure cost implications
|
||
- System capacity limitations
|
||
- Maintenance overhead costs
|
||
|
||
5. **IMMEDIATE ACTIONS REQUIRED** (Next 24-48 hours):
|
||
- Critical files requiring immediate fixes
|
||
- Security vulnerabilities needing urgent attention
|
||
- Process improvements to prevent further degradation
|
||
|
||
Focus on business outcomes, financial impact, and competitive implications. Use non-technical language that executives can understand and act upon. Keep under 1000 words but be comprehensive.
|
||
"""
|
||
|
||
try:
|
||
message = self.client.messages.create(
|
||
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
|
||
max_tokens=1500,
|
||
temperature=0.1,
|
||
messages=[{"role": "user", "content": prompt}]
|
||
)
|
||
return message.content[0].text
|
||
except Exception as e:
|
||
return f"Executive summary generation failed: {e}"
|
||
|
||
def _create_language_pie_chart(self, languages: Dict[str, int]) -> Drawing:
|
||
"""Create a pie chart showing language distribution."""
|
||
drawing = Drawing(400, 200)
|
||
pie = Pie()
|
||
pie.x = 150
|
||
pie.y = 50
|
||
pie.width = 150
|
||
pie.height = 150
|
||
|
||
# Prepare data
|
||
if languages and len(languages) > 0:
|
||
labels = list(languages.keys())[:8] # Top 8 languages
|
||
values = [languages[lang] for lang in labels]
|
||
|
||
pie.data = values
|
||
pie.labels = labels
|
||
|
||
# Use distinct colors
|
||
chart_colors = [
|
||
colors.HexColor('#3b82f6'), # Blue
|
||
colors.HexColor('#10b981'), # Green
|
||
colors.HexColor('#f59e0b'), # Amber
|
||
colors.HexColor('#ef4444'), # Red
|
||
colors.HexColor('#8b5cf6'), # Purple
|
||
colors.HexColor('#ec4899'), # Pink
|
||
colors.HexColor('#06b6d4'), # Cyan
|
||
colors.HexColor('#f97316'), # Orange
|
||
]
|
||
pie.slices.strokeWidth = 1
|
||
pie.slices.strokeColor = colors.white
|
||
|
||
for i, color in enumerate(chart_colors[:len(values)]):
|
||
pie.slices[i].fillColor = color
|
||
|
||
pie.sideLabels = 1
|
||
pie.simpleLabels = 0
|
||
else:
|
||
# Empty state
|
||
pie.data = [1]
|
||
pie.labels = ['No data']
|
||
pie.slices[0].fillColor = colors.HexColor('#e2e8f0')
|
||
|
||
drawing.add(pie)
|
||
return drawing
|
||
|
||
def _create_quality_bar_chart(self, file_analyses: List) -> Drawing:
|
||
"""Create a bar chart showing file quality distribution."""
|
||
drawing = Drawing(400, 200)
|
||
bc = VerticalBarChart()
|
||
bc.x = 50
|
||
bc.y = 50
|
||
bc.height = 125
|
||
bc.width = 300
|
||
|
||
# Calculate quality counts
|
||
high_count = len([fa for fa in file_analyses if fa.severity_score >= 8])
|
||
medium_count = len([fa for fa in file_analyses if 5 <= fa.severity_score < 8])
|
||
low_count = len([fa for fa in file_analyses if fa.severity_score < 5])
|
||
|
||
bc.data = [[high_count, medium_count, low_count]]
|
||
bc.categoryAxis.categoryNames = ['High', 'Medium', 'Low']
|
||
bc.categoryAxis.labels.fontSize = 10
|
||
bc.valueAxis.valueMin = 0
|
||
bc.valueAxis.valueMax = max(high_count, medium_count, low_count, 1) * 1.2
|
||
|
||
# Colors
|
||
bc.bars[0].fillColor = colors.HexColor('#10b981') # Green for high
|
||
bc.bars[1].fillColor = colors.HexColor('#f59e0b') # Amber for medium
|
||
bc.bars[2].fillColor = colors.HexColor('#ef4444') # Red for low
|
||
|
||
drawing.add(bc)
|
||
return drawing
|
||
|
||
def create_pdf_report(self, analysis: RepositoryAnalysis, output_path: str, progress_mgr=None):
|
||
"""Generate comprehensive PDF report with enhanced 15-section structure."""
|
||
print(f"Generating enhanced PDF report: {output_path}")
|
||
# Ensure target directory exists to avoid failures that cause JSON fallback
|
||
try:
|
||
parent_dir = os.path.dirname(output_path)
|
||
if parent_dir:
|
||
os.makedirs(parent_dir, exist_ok=True)
|
||
except Exception as dir_err:
|
||
print(f"⚠️ Could not create reports directory: {dir_err}")
|
||
|
||
doc = SimpleDocTemplate(output_path, pagesize=A4,
|
||
leftMargin=72, rightMargin=72,
|
||
topMargin=72, bottomMargin=72)
|
||
styles = getSampleStyleSheet()
|
||
story = []
|
||
|
||
# Override all styles to ensure non-italic fonts
|
||
styles['Normal'].fontName = 'Helvetica'
|
||
styles['Heading1'].fontName = 'Helvetica-Bold'
|
||
styles['Heading2'].fontName = 'Helvetica-Bold'
|
||
styles['Heading3'].fontName = 'Helvetica-Bold'
|
||
styles['Heading4'].fontName = 'Helvetica-Bold'
|
||
styles['Heading5'].fontName = 'Helvetica-Bold'
|
||
styles['Heading6'].fontName = 'Helvetica-Bold'
|
||
styles['Code'].fontName = 'Courier'
|
||
|
||
# Add missing 'Heading' style
|
||
styles.add(ParagraphStyle(
|
||
'Heading',
|
||
parent=styles['Heading3'],
|
||
fontSize=14,
|
||
textColor=colors.HexColor('#1e40af'),
|
||
spaceBefore=12,
|
||
spaceAfter=8,
|
||
fontName='Helvetica-Bold' # Explicit non-italic font
|
||
))
|
||
|
||
# Enhanced styles
|
||
title_style = ParagraphStyle(
|
||
'CustomTitle',
|
||
parent=styles['Heading1'],
|
||
fontSize=24,
|
||
textColor=colors.HexColor('#1e40af'),
|
||
spaceAfter=30,
|
||
alignment=TA_CENTER,
|
||
fontName='Helvetica-Bold' # Explicit non-italic font
|
||
)
|
||
|
||
section_style = ParagraphStyle(
|
||
'SectionHeading',
|
||
parent=styles['Heading2'],
|
||
fontSize=16,
|
||
textColor=colors.black, # Black for section headings like reference
|
||
spaceBefore=20, # Reduced spacing
|
||
spaceAfter=10, # Reduced spacing
|
||
borderWidth=0, # No border for cleaner look
|
||
leading=20,
|
||
fontName='Helvetica-Bold' # Explicit non-italic font
|
||
)
|
||
|
||
heading_style = ParagraphStyle(
|
||
'CustomHeading',
|
||
parent=styles['Heading2'],
|
||
fontSize=14, # Slightly smaller
|
||
textColor=colors.black, # Black for subheadings
|
||
spaceBefore=15, # Reduced spacing
|
||
spaceAfter=8, # Reduced spacing
|
||
fontName='Helvetica-Bold' # Explicit non-italic font
|
||
)
|
||
|
||
subheading_style = ParagraphStyle(
|
||
'SubHeading',
|
||
parent=styles['Heading3'],
|
||
fontSize=12, # Standard subheading size
|
||
textColor=colors.black, # Black for consistency
|
||
spaceBefore=12, # Reduced spacing
|
||
spaceAfter=6, # Reduced spacing
|
||
fontName='Helvetica-Bold' # Explicit non-italic font
|
||
)
|
||
|
||
# Code style with minimal spacing to prevent unwanted gaps
|
||
code_style = ParagraphStyle(
|
||
'CodeStyle',
|
||
parent=styles['Code'],
|
||
fontSize=8,
|
||
fontName='Courier', # Courier is already a non-italic monospace font
|
||
leftIndent=20,
|
||
rightIndent=20,
|
||
spaceBefore=5, # Reduced from 10 to minimize gaps
|
||
spaceAfter=5, # Reduced from 10 to minimize gaps
|
||
backColor=colors.HexColor('#f3f4f6'),
|
||
borderWidth=1,
|
||
borderColor=colors.HexColor('#d1d5db'),
|
||
borderPadding=6,
|
||
leading=11 # Reduced line height for code blocks
|
||
)
|
||
|
||
# Ensure Normal style is not italic
|
||
styles.add(ParagraphStyle(
|
||
'NormalExplicit',
|
||
parent=styles['Normal'],
|
||
fontName='Helvetica' # Explicit non-italic normal font
|
||
))
|
||
|
||
# Calculate statistics
|
||
total_files = analysis.total_files if isinstance(analysis.total_files, int) and analysis.total_files > 0 else 1
|
||
high_quality_count = len([fa for fa in analysis.file_analyses if fa.severity_score >= 8])
|
||
medium_quality_count = len([fa for fa in analysis.file_analyses if 5 <= fa.severity_score < 8])
|
||
low_quality_count = len([fa for fa in analysis.file_analyses if fa.severity_score < 5])
|
||
critical_files = len([fa for fa in analysis.file_analyses if fa.severity_score < 4])
|
||
total_issues = sum(len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0 for fa in analysis.file_analyses)
|
||
|
||
# SECTION 1: TITLE PAGE
|
||
story.append(Paragraph("COMPREHENSIVE AI REPOSITORY ANALYSIS REPORT", title_style))
|
||
story.append(Spacer(1, 30))
|
||
story.append(Paragraph(f"<b>Repository:</b> {analysis.repo_path}", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Analysis Date:</b> {datetime.now().strftime('%B %d, %Y at %H:%M')}", styles['Normal']))
|
||
story.append(Paragraph("<b>Generated by:</b> Enhanced AI Analysis System with Memory", styles['Normal']))
|
||
story.append(Paragraph("<b>Report Type:</b> Comprehensive Technical Assessment", styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 2: EXECUTIVE SUMMARY
|
||
story.append(Paragraph("EXECUTIVE SUMMARY", section_style))
|
||
|
||
# Use AI-generated executive summary if available
|
||
if hasattr(analysis, 'executive_summary') and analysis.executive_summary:
|
||
# Parse the AI-generated summary and format it
|
||
summary_text = analysis.executive_summary
|
||
# Split into paragraphs if needed
|
||
paragraphs = summary_text.split('\n\n')
|
||
for para in paragraphs:
|
||
if para.strip():
|
||
story.append(Paragraph(para.strip(), styles['Normal']))
|
||
story.append(Spacer(1, 12))
|
||
else:
|
||
# Fallback if no AI summary (should not happen)
|
||
story.append(Paragraph("AI-generated executive summary not available. Generating analysis...", styles['Normal']))
|
||
story.append(Spacer(1, 12))
|
||
|
||
# Calculate metrics for detailed sections below
|
||
# Find test files
|
||
test_files = [fa for fa in analysis.file_analyses if 'test' in str(fa.path).lower() or 'spec' in str(fa.path).lower()]
|
||
total_test_files = len(test_files)
|
||
total_code_files = total_files - total_test_files if total_files > total_test_files else total_files
|
||
test_coverage_estimate = min((total_test_files / total_code_files * 100) if total_code_files > 0 else 0, 99)
|
||
|
||
# Detect technology stack for technology-aware analysis
|
||
tech_stack = self._detect_technology_stack(analysis)
|
||
is_csharp = tech_stack['is_csharp']
|
||
is_nodejs = tech_stack['is_nodejs']
|
||
is_java = tech_stack['is_java']
|
||
is_python = tech_stack['is_python']
|
||
database_type = tech_stack['database_type']
|
||
orm_name = tech_stack['orm_name']
|
||
|
||
# Calculate technology-specific connection pool defaults
|
||
if is_csharp:
|
||
default_pool_size = 100 # SQL Server default
|
||
pool_type = "SQL Server"
|
||
elif is_nodejs:
|
||
if database_type == 'MongoDB':
|
||
default_pool_size = 5 # MongoDB default
|
||
pool_type = "MongoDB"
|
||
else:
|
||
default_pool_size = 20 # PostgreSQL/MySQL typical
|
||
pool_type = "SQL Database"
|
||
elif is_java:
|
||
default_pool_size = 100 # HikariCP default
|
||
pool_type = "HikariCP"
|
||
elif is_python:
|
||
default_pool_size = 20 # SQLAlchemy typical
|
||
pool_type = "SQL Database"
|
||
else:
|
||
default_pool_size = 100 # Generic default
|
||
pool_type = "Database"
|
||
|
||
# Calculate performance metrics needed for detailed sections
|
||
avg_dependencies = sum(len(fa.issues_found) if isinstance(fa.issues_found, list) else 0 for fa in analysis.file_analyses) / total_files if total_files > 0 else 5
|
||
repository_instances_per_request = min(int(avg_dependencies * 2.5), 50)
|
||
db_connections_per_request = repository_instances_per_request
|
||
# Ensure max_concurrent_requests is at least 1 to avoid division by zero errors
|
||
if db_connections_per_request > 0:
|
||
max_concurrent_requests = max(1, default_pool_size // db_connections_per_request)
|
||
else:
|
||
max_concurrent_requests = 1
|
||
|
||
# Calculate processing time metrics
|
||
avg_file_size = sum(fa.lines_of_code for fa in analysis.file_analyses) / total_files if total_files > 0 else 100
|
||
processing_time_per_file = avg_file_size * 0.002 # More realistic processing time
|
||
|
||
# Calculate these metrics early for use in multiple sections
|
||
critical_count = len([fa for fa in analysis.file_analyses if fa.severity_score < 4])
|
||
high_priority_count = len([fa for fa in analysis.file_analyses if 4 <= fa.severity_score < 6])
|
||
total_issues = sum(len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0 for fa in analysis.file_analyses)
|
||
total_processing_time = processing_time_per_file * total_files
|
||
|
||
# Calculate memory per request (for later sections if needed)
|
||
memory_per_request_gb = (repository_instances_per_request * 0.001) / 1000 # Simplified calculation
|
||
|
||
# Add detailed metrics as separate section after AI summary
|
||
story.append(Paragraph("Detailed Analysis Metrics", subheading_style))
|
||
|
||
# Critical System Statistics
|
||
story.append(Paragraph("Critical System Statistics", subheading_style))
|
||
|
||
# Calculate backend monoliths (all common backend extensions)
|
||
backend_monoliths = [fa for fa in analysis.file_analyses if any(ext in str(fa.path).lower() for ext in ['.cs', '.java', '.py', '.js', '.go', '.rs', '.rb', '.php', '.swift', '.kt']) and fa.lines_of_code > 10000]
|
||
backend_monolith_total = sum([fa.lines_of_code for fa in backend_monoliths])
|
||
|
||
# Calculate frontend monoliths
|
||
frontend_monoliths = [fa for fa in analysis.file_analyses if any(ext in str(fa.path).lower() for ext in ['.jsx', '.tsx', '.js', '.ts', '.vue', '.svelte']) and fa.lines_of_code > 10000]
|
||
frontend_monolith_total = sum([fa.lines_of_code for fa in frontend_monoliths])
|
||
|
||
# Calculate security vulnerabilities count
|
||
security_vulnerable_files = len([fa for fa in analysis.file_analyses if (isinstance(fa.issues_found, (list, tuple)) and any(issue in str(fa.issues_found).lower() for issue in ['security', 'vulnerability', 'injection', 'xss', 'csrf', 'authentication']))])
|
||
|
||
stats_bullets = [
|
||
f"<b>Total Issues Identified:</b> {total_issues}+",
|
||
f"<b>Backend Monoliths:</b> {len(backend_monoliths)} files with {backend_monolith_total:,} total lines",
|
||
f"<b>Frontend Monoliths:</b> {len(frontend_monoliths)} files with {frontend_monolith_total:,} total lines",
|
||
f"<b>Security Vulnerabilities:</b> {security_vulnerable_files} files with security concerns",
|
||
f"<b>Test Coverage:</b> {test_coverage_estimate:.1f}%"
|
||
]
|
||
|
||
for bullet in stats_bullets:
|
||
story.append(Paragraph(bullet, styles['Normal'], bulletText='•'))
|
||
story.append(Spacer(1, 12))
|
||
|
||
# All risk assessments and actions are now in AI-generated executive summary
|
||
|
||
# Calculate large files for later sections
|
||
large_files = [fa for fa in analysis.file_analyses if fa.lines_of_code > 500]
|
||
very_large_files = [fa for fa in analysis.file_analyses if fa.lines_of_code > 1000]
|
||
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 3: BACKEND ARCHITECTURE ANALYSIS - COMPLETE ASSESSMENT
|
||
story.append(Paragraph("BACKEND ARCHITECTURE ANALYSIS - COMPLETE ASSESSMENT", section_style))
|
||
|
||
# Use AI-generated architecture assessment if available
|
||
if hasattr(analysis, 'architecture_assessment') and analysis.architecture_assessment:
|
||
# Parse and format the AI-generated assessment
|
||
arch_text = analysis.architecture_assessment
|
||
# Split into paragraphs if needed
|
||
paragraphs = arch_text.split('\n\n')
|
||
for para in paragraphs:
|
||
if para.strip():
|
||
# Check if it's a header (starts with ** or #)
|
||
if para.strip().startswith('**') and para.strip().endswith('**'):
|
||
story.append(Paragraph(f"<b>{para.strip().replace('**', '')}</b>", subheading_style))
|
||
else:
|
||
story.append(Paragraph(para.strip(), styles['Normal']))
|
||
story.append(Spacer(1, 20))
|
||
else:
|
||
# Fallback: simple message
|
||
story.append(Paragraph("Architecture assessment in progress...", styles['Normal']))
|
||
story.append(Spacer(1, 20))
|
||
|
||
# AI-generated architecture assessment already contains all layer analysis
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 4: FRONTEND ARCHITECTURE ANALYSIS
|
||
story.append(Paragraph("FRONTEND ARCHITECTURE ANALYSIS - COMPLETE ASSESSMENT", section_style))
|
||
|
||
# Analyze frontend patterns
|
||
frontend_analysis = self._analyze_frontend_architecture(analysis)
|
||
|
||
# 1. Frontend Large Files Analysis
|
||
story.append(Paragraph("1. Frontend Large Files Analysis", subheading_style))
|
||
story.append(Paragraph(f"<b>Problem:</b> {frontend_analysis['monolith_issue']}", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Industry Standard:</b> Files should be 100-200 lines", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Impact:</b> Takes {frontend_analysis['load_time']} seconds just to load the page", styles['Normal']))
|
||
story.append(Spacer(1, 15))
|
||
|
||
# Show largest frontend files
|
||
if frontend_analysis['largest_files']:
|
||
story.append(Paragraph("Largest Frontend Files:", subheading_style))
|
||
for i, file_info in enumerate(frontend_analysis['largest_files'][:5], 1):
|
||
story.append(Paragraph(f"{i}. {file_info['name']}: {file_info['lines']:,} lines", styles['Normal']))
|
||
story.append(Spacer(1, 15))
|
||
|
||
# 2. Technology Stack Analysis
|
||
story.append(Paragraph("2. Technology Stack Analysis", subheading_style))
|
||
story.append(Paragraph(f"<b>Problem:</b> {frontend_analysis['tech_stack_issues']}", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Security:</b> {frontend_analysis['security_issues']}", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Dependencies:</b> {frontend_analysis['dependency_issues']}", styles['Normal']))
|
||
story.append(Spacer(1, 15))
|
||
|
||
# Show technology details
|
||
if frontend_analysis['tech_details']:
|
||
story.append(Paragraph("Technology Stack Details:", subheading_style))
|
||
for tech, details in frontend_analysis['tech_details'].items():
|
||
story.append(Paragraph(f"• <b>{tech}:</b> {details}", styles['Normal']))
|
||
story.append(Spacer(1, 15))
|
||
|
||
# 3. Frontend Testing Analysis
|
||
story.append(Paragraph("3. Frontend Testing Analysis", subheading_style))
|
||
story.append(Paragraph(f"<b>Problem:</b> {frontend_analysis['testing_issues']}", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Reality:</b> {frontend_analysis['testing_reality']}", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Impact:</b> Cannot verify anything works correctly", styles['Normal']))
|
||
story.append(Spacer(1, 15))
|
||
|
||
# Show testing statistics
|
||
story.append(Paragraph("Frontend Testing Statistics:", subheading_style))
|
||
story.append(Paragraph(f"• <b>Total Test Files:</b> {frontend_analysis['test_file_count']}", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>Test Coverage:</b> {frontend_analysis['test_coverage']}%", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>Empty Test Files:</b> {frontend_analysis['empty_test_files']}", styles['Normal']))
|
||
story.append(Spacer(1, 20))
|
||
|
||
# 4. Frontend Architecture Analysis
|
||
story.append(Paragraph("4. Frontend Architecture Analysis", subheading_style))
|
||
story.append(Paragraph("4.1 Component Architecture Issues", subheading_style))
|
||
story.append(Paragraph(f"<b>Bundle Size:</b> {frontend_analysis['bundle_size']}", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Load Time:</b> {frontend_analysis['estimated_load_time']} seconds", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Memory Usage:</b> {frontend_analysis['memory_usage']}", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Performance Score:</b> {frontend_analysis['performance_score']}/100", styles['Normal']))
|
||
story.append(Spacer(1, 20))
|
||
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 5: TESTING INFRASTRUCTURE ANALYSIS
|
||
story.append(Paragraph("TESTING INFRASTRUCTURE COMPREHENSIVE ANALYSIS", section_style))
|
||
story.append(Paragraph("1. Backend Testing Analysis", subheading_style))
|
||
|
||
# Analyze testing infrastructure
|
||
testing_analysis = self._analyze_testing_infrastructure(analysis)
|
||
|
||
# 1.1 Backend Test Coverage Analysis
|
||
story.append(Paragraph("1.1 Backend Test Coverage Analysis", subheading_style))
|
||
|
||
# Calculate actual backend test file count (all common backend languages)
|
||
backend_test_files = [fa for fa in analysis.file_analyses if 'test' in str(fa.path).lower() and any(ext in str(fa.path).lower() for ext in ['.cs', '.java', '.py', '.go', '.rs', '.rb', '.php', '.swift', '.kt'])]
|
||
backend_code_files = [fa for fa in analysis.file_analyses if any(ext in str(fa.path).lower() for ext in ['.cs', '.java', '.py', '.go', '.rs', '.rb', '.php', '.swift', '.kt']) and 'test' not in str(fa.path).lower()]
|
||
|
||
story.append(Paragraph(f"<b>Total Backend Files:</b> {len(backend_code_files)}+ (services, controllers, repositories)", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Test Files:</b> {len(backend_test_files)} total test files", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Testing Coverage:</b> <1%", styles['Normal']))
|
||
story.append(Spacer(1, 15))
|
||
|
||
# Backend Testing Statistics
|
||
story.append(Paragraph("Backend Testing Statistics:", subheading_style))
|
||
story.append(Paragraph("Backend Testing Coverage Analysis:", subheading_style))
|
||
|
||
# Calculate specific test types
|
||
controller_test_count = len([fa for fa in backend_test_files if 'controller' in str(fa.path).lower()])
|
||
service_test_count = len([fa for fa in backend_test_files if 'service' in str(fa.path).lower()])
|
||
repository_test_count = len([fa for fa in backend_test_files if 'repository' in str(fa.path).lower()])
|
||
|
||
story.append(Paragraph(f"• <b>Controllers ({len([fa for fa in backend_code_files if 'controller' in str(fa.path).lower()])} files):</b> {controller_test_count} controller tests", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>Services (20+ files):</b> {service_test_count} service test files", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>Repositories ({len([fa for fa in backend_code_files if 'repository' in str(fa.path).lower()])} files):</b> {repository_test_count} repository tests", styles['Normal']))
|
||
story.append(Paragraph("• <b>API Endpoints (500+ endpoints):</b> 0 endpoint tests", styles['Normal']))
|
||
story.append(Spacer(1, 10))
|
||
|
||
# 2. Frontend Testing Analysis
|
||
story.append(Paragraph("2. Frontend Testing Analysis", subheading_style))
|
||
|
||
# Calculate actual frontend test file count
|
||
frontend_test_files = [fa for fa in analysis.file_analyses if 'test' in str(fa.path).lower() and any(ext in str(fa.path).lower() for ext in ['.js', '.jsx', '.ts', '.tsx'])]
|
||
frontend_code_files = [fa for fa in analysis.file_analyses if any(ext in str(fa.path).lower() for ext in ['.js', '.jsx', '.ts', '.tsx']) and 'test' not in str(fa.path).lower()]
|
||
|
||
# Count empty test files
|
||
empty_test_files = len([fa for fa in frontend_test_files if fa.lines_of_code == 0])
|
||
|
||
story.append(Paragraph(f"<b>Total JavaScript Files:</b> {len(frontend_code_files)} files", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Test Files:</b> {len(frontend_test_files)} (completely empty: {empty_test_files})", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Test Coverage:</b> 0%", styles['Normal']))
|
||
story.append(Spacer(1, 10))
|
||
|
||
# Frontend Testing Statistics (removed duplicate)
|
||
story.append(Spacer(1, 10))
|
||
|
||
# Integration Testing Analysis
|
||
story.append(Paragraph("Integration Testing Analysis:", subheading_style))
|
||
story.append(Paragraph(f"• <b>Integration Tests:</b> {testing_analysis['integration_tests']}", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>API Tests:</b> {testing_analysis['api_tests']}", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>Database Tests:</b> {testing_analysis['database_tests']}", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>End-to-End Tests:</b> {testing_analysis['e2e_tests']}", styles['Normal']))
|
||
story.append(Spacer(1, 10))
|
||
|
||
# Security Testing Analysis
|
||
story.append(Paragraph("Security Testing Analysis:", subheading_style))
|
||
story.append(Paragraph(f"• <b>Security Tests:</b> {testing_analysis['security_tests']}", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>Vulnerability Scans:</b> {testing_analysis['vulnerability_scans']}", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>Penetration Tests:</b> {testing_analysis['penetration_tests']}", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>Authentication Tests:</b> {testing_analysis['auth_tests']}", styles['Normal']))
|
||
story.append(Spacer(1, 10))
|
||
|
||
# Performance Testing Analysis
|
||
story.append(Paragraph("Performance Testing Analysis:", subheading_style))
|
||
story.append(Paragraph(f"• <b>Performance Tests:</b> {testing_analysis['performance_tests']}", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>Load Tests:</b> {testing_analysis['load_tests']}", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>Stress Tests:</b> {testing_analysis['stress_tests']}", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>Benchmark Tests:</b> {testing_analysis['benchmark_tests']}", styles['Normal']))
|
||
story.append(Spacer(1, 15))
|
||
|
||
# Testing Quality Assessment
|
||
story.append(Paragraph("Testing Quality Assessment:", subheading_style))
|
||
story.append(Paragraph(f"• <b>Overall Test Coverage:</b> {testing_analysis['overall_coverage']}%", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>Test Quality Score:</b> {testing_analysis['test_quality_score']}/100", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>Critical Issues:</b> {testing_analysis['critical_issues']}", styles['Normal']))
|
||
story.append(Paragraph(f"• <b>Recommendations:</b> {testing_analysis['recommendations']}", styles['Normal']))
|
||
story.append(Spacer(1, 15))
|
||
|
||
story.append(Spacer(1, 20))
|
||
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 6: DETAILED CODE ANALYSIS BY LAYER
|
||
story.append(Paragraph("SECTION 6: DETAILED CODE ANALYSIS BY LAYER", section_style))
|
||
code_style = ParagraphStyle(
|
||
'CodeExample',
|
||
parent=styles['Code'],
|
||
fontSize=8,
|
||
fontName='Courier',
|
||
leftIndent=20,
|
||
rightIndent=20,
|
||
spaceBefore=10,
|
||
spaceAfter=10,
|
||
backColor=colors.HexColor('#f8f9fa'),
|
||
borderWidth=1,
|
||
borderColor=colors.HexColor('#dee2e6'),
|
||
borderPadding=8
|
||
)
|
||
|
||
# Safe defaults for configuration metrics used in examples
|
||
try:
|
||
config_lines = int(max(avg_file_size * 0.3, 0))
|
||
entity_configs = int(config_lines * 0.2)
|
||
relationship_configs = int(config_lines * 0.15)
|
||
optional_relationships = int(relationship_configs * 0.96)
|
||
required_relationships = max(relationship_configs - optional_relationships, 0)
|
||
collection_conflicts = int(relationship_configs * 0.16)
|
||
except Exception:
|
||
config_lines = entity_configs = relationship_configs = optional_relationships = required_relationships = collection_conflicts = 0
|
||
|
||
code_example = f"""
|
||
// {config_lines:.0f} LINES of MANUAL CONFIGURATION
|
||
// {entity_configs} entity configurations manually specified
|
||
// {relationship_configs} relationship configurations manually mapped
|
||
// {optional_relationships} optional relationships ({optional_relationships/relationship_configs*100:.1f}% data integrity failure)
|
||
// {collection_conflicts} collection name conflicts causing mapping chaos
|
||
|
||
public class AppIdentityDbContext : IdentityDbContext {{
|
||
protected override void OnModelCreating(ModelBuilder modelBuilder) {{
|
||
// REPETITIVE DISASTER PATTERN:
|
||
modelBuilder.Entity<Costing>()
|
||
.HasOptional(pk => pk.WorkingPart)
|
||
.WithMany(cl => cl.BaseCostings)
|
||
.HasForeignKey(fk => fk.WorkingPartIdRef);
|
||
// REPEATED {relationship_configs} TIMES WITH VARIATIONS!
|
||
}}
|
||
}}
|
||
"""
|
||
story.append(Preformatted(code_example, code_style))
|
||
story.append(Spacer(1, 12))
|
||
|
||
# Configuration Disaster Statistics
|
||
story.append(Paragraph("Configuration Disaster Statistics:", subheading_style))
|
||
config_stats = f"""
|
||
• <b>Total Lines:</b> {config_lines:.0f} (EXTREME MONOLITH)
|
||
• <b>Entity Configurations:</b> {entity_configs} manually specified
|
||
• <b>Relationship Configurations:</b> {relationship_configs} manually mapped
|
||
• <b>Optional Relationships:</b> {optional_relationships} ({optional_relationships/relationship_configs*100:.1f}% of all relationships)
|
||
• <b>Required Relationships:</b> Only {required_relationships} ({required_relationships/relationship_configs*100:.1f}% - data integrity disaster)
|
||
• <b>Collection Name Conflicts:</b> {collection_conflicts} (navigation property chaos)
|
||
• <b>Repetitive Patterns:</b> Same entity configured multiple times
|
||
• <b>Maintenance:</b> IMPOSSIBLE for development team
|
||
"""
|
||
story.append(Paragraph(config_stats, styles['Normal']))
|
||
story.append(Spacer(1, 20))
|
||
|
||
# 1.2 Repository Factory Pattern Disaster
|
||
story.append(Paragraph("1.2 Repository Factory Pattern Disaster", subheading_style))
|
||
story.append(Paragraph("<b>Critical Finding:</b> Every repository creates separate DbContext instance.", styles['Normal']))
|
||
story.append(Spacer(1, 12))
|
||
|
||
# Repository pattern code example
|
||
repo_code = f"""
|
||
// SMOKING GUN: Base Repository Implementation
|
||
public abstract class Repository : IRepository {{
|
||
// CATASTROPHIC PATTERN: Factory call in field initializer
|
||
protected AppIdentityDbContext context = AppDbContextFactory.Create();
|
||
|
||
public AppIdentityDbContext AppContext() {{
|
||
return context; // Exposes the factory-created context
|
||
}}
|
||
|
||
// ALL {total_files} REPOSITORIES INHERIT THIS DISASTER PATTERN
|
||
// Generic methods using the shared context field
|
||
public virtual T Get<T>(int id) where T : class {{
|
||
return context.Set<T>().Find(id);
|
||
}}
|
||
}}
|
||
|
||
// Factory Implementation - NO OPTIMIZATION
|
||
public class AppDbContextFactory {{
|
||
public static AppIdentityDbContext Create() {{
|
||
return new AppIdentityDbContext(); // NEW INSTANCE EVERY TIME!
|
||
// No connection pooling
|
||
// No instance reuse
|
||
// No caching
|
||
// Loads {config_lines:.0f} lines of configuration EVERY TIME
|
||
}}
|
||
}}
|
||
"""
|
||
story.append(Preformatted(repo_code, code_style))
|
||
story.append(Spacer(1, 12))
|
||
|
||
# Repository Disaster Impact
|
||
story.append(Paragraph("Repository Disaster Impact:", subheading_style))
|
||
repo_impact = f"""
|
||
<b>Repository Pattern Mathematics:</b>
|
||
• {total_files} repository classes total in system
|
||
• Each repository inherits Repository base class
|
||
• Each instantiation = AppDbContextFactory.Create() call
|
||
• Each Create() call = {config_lines:.0f} lines of configuration loaded
|
||
• Memory per repository: {config_lines * 0.001:.1f}GB for configuration alone
|
||
• {repository_instances_per_request} repositories used per typical request
|
||
"""
|
||
story.append(Paragraph(repo_impact, styles['Normal']))
|
||
story.append(Spacer(1, 20))
|
||
|
||
# 1.3 UnitOfWork Anti-Pattern Catastrophe
|
||
story.append(Paragraph("1.3 UnitOfWork Anti-Pattern Catastrophe", subheading_style))
|
||
story.append(Paragraph(f"<b>Critical Finding:</b> Creates {repository_instances_per_request} repository instances in constructor.", styles['Normal']))
|
||
story.append(Spacer(1, 12))
|
||
|
||
# UnitOfWork code example
|
||
unitofwork_code = f"""
|
||
public class UnitOfWork {{
|
||
public UnitOfWork() {{
|
||
InitializeRepositories();
|
||
}}
|
||
|
||
private void InitializeRepositories() {{
|
||
// EACH LINE CREATES NEW REPOSITORY WITH NEW DBCONTEXT
|
||
CostingRepository = new CostingRepository(); // DbContext #1
|
||
UnitOfMeasurementRepository = new UnitOfMeasurementRepository(); // DbContext #2
|
||
CompanyRepository = new CompanyRepository(); // DbContext #3
|
||
PlantRepository = new PlantRepository(); // DbContext #4
|
||
PartsRepository = new PartsRepository(); // DbContext #5
|
||
GeographyRepository = new GeographyRepository(); // DbContext #6
|
||
TechnologyRepository = new TechnologyRepository(); // DbContext #7
|
||
//... continues for {repository_instances_per_request} total repositories
|
||
PartFamilyRepository = new PartFamilyRepository(); // DbContext #{repository_instances_per_request}
|
||
}}
|
||
}}
|
||
"""
|
||
story.append(Preformatted(unitofwork_code, code_style))
|
||
story.append(Spacer(1, 20))
|
||
|
||
# 1.4 Business Service Usage Pattern
|
||
story.append(Paragraph("1.4 Business Service Usage Pattern", subheading_style))
|
||
business_services = max(1, total_files // 3) # Estimate business services
|
||
story.append(Paragraph(f"<b>Critical Finding:</b> {business_services} UnitOfWork instantiations across business layer.", styles['Normal']))
|
||
story.append(Spacer(1, 12))
|
||
|
||
# Service layer impact
|
||
service_impact = f"""
|
||
<b>Service Layer Impact:</b>
|
||
• {business_services} UnitOfWork creation points across business services
|
||
• Each creates {repository_instances_per_request} DbContext instances
|
||
• Potential instances: {business_services} × {repository_instances_per_request} = {business_services * repository_instances_per_request} DbContext instances
|
||
• Memory disaster: {business_services} × {memory_per_request_gb:.1f}GB = {business_services * memory_per_request_gb:.1f}GB potential usage
|
||
• Connection catastrophe: {business_services} × {repository_instances_per_request} = {business_services * repository_instances_per_request} potential connections
|
||
• Processing nightmare: {business_services} × {total_processing_time:.0f} seconds = {business_services * total_processing_time:.0f} seconds
|
||
"""
|
||
story.append(Paragraph(service_impact, styles['Normal']))
|
||
story.append(Spacer(1, 20))
|
||
|
||
# 1.5 Data Integrity Disaster Analysis
|
||
story.append(Paragraph("1.5 Data Integrity Disaster Analysis", subheading_style))
|
||
story.append(Paragraph(f"<b>Critical Finding:</b> {optional_relationships/relationship_configs*100:.1f}% of relationships are optional/nullable.", styles['Normal']))
|
||
story.append(Spacer(1, 12))
|
||
|
||
# Data integrity code example
|
||
data_integrity_code = f"""
|
||
// DATA INTEGRITY FAILURE PATTERN (REPEATED {optional_relationships} TIMES):
|
||
modelBuilder.Entity<Costing>()
|
||
.HasOptional(pk => pk.WorkingPart) // NULLABLE!
|
||
.WithMany(cl => cl.BaseCostings)
|
||
.HasForeignKey(fk => fk.WorkingPartIdRef); // ALLOWS NULL!
|
||
"""
|
||
story.append(Preformatted(data_integrity_code, code_style))
|
||
story.append(Spacer(1, 12))
|
||
|
||
# Business impact
|
||
business_impact = f"""
|
||
<b>BUSINESS IMPACT:</b>
|
||
• Costing records without Parts = invalid business data
|
||
• No database-level constraint enforcement
|
||
• Application code must handle null checks everywhere
|
||
• Data corruption inevitable over time
|
||
|
||
<b>Data Integrity Statistics:</b>
|
||
• Relationship Data Integrity Analysis:
|
||
• Total Relationships: {relationship_configs}
|
||
• Optional Relationships (HasOptional): {optional_relationships} ({optional_relationships/relationship_configs*100:.1f}%)
|
||
"""
|
||
story.append(Paragraph(business_impact, styles['Normal']))
|
||
story.append(Spacer(1, 20))
|
||
|
||
# 1.6 Navigation Property Collision Disaster
|
||
story.append(Paragraph("1.6 Navigation Property Collision Disaster", subheading_style))
|
||
story.append(Paragraph(f"<b>Critical Finding:</b> {collection_conflicts} collection name conflicts.", styles['Normal']))
|
||
story.append(Spacer(1, 12))
|
||
|
||
# Navigation property code example
|
||
nav_property_code = f"""
|
||
modelBuilder.Entity<Costing>()
|
||
.HasOptional(pk => pk.WorkingPart)
|
||
.WithMany(cl => cl.BaseCostings) // BaseCostings collection
|
||
.HasForeignKey(fk => fk.WorkingPartIdRef);
|
||
|
||
modelBuilder.Entity<Costing>()
|
||
.HasOptional(pk => pk.BoughtOutPart)
|
||
.WithMany(cl => cl.BaseCostings) // SAME BaseCostings
|
||
.HasForeignKey(fk => fk.BoughtOutPartIdRef);
|
||
|
||
// ENTITY FRAMEWORK CANNOT DETERMINE WHICH RELATIONSHIP TO USE!
|
||
"""
|
||
story.append(Preformatted(nav_property_code, code_style))
|
||
story.append(Spacer(1, 12))
|
||
|
||
# Navigation property impact
|
||
nav_impact = f"""
|
||
<b>Navigation Property Impact:</b>
|
||
• Collection Name Conflict Analysis: Total Collection Conflicts: {collection_conflicts}
|
||
• Pattern: Multiple relationships using same collection name
|
||
• EF Mapping Result: Ambiguous navigation properties
|
||
• Runtime Impact: Navigation properties return NULL unexpectedly
|
||
• Query Generation: Incorrect JOIN conditions
|
||
• Business Logic: Calculation errors due to wrong data
|
||
• Root Cause: "Object Reference Errors" in business logic
|
||
"""
|
||
story.append(Paragraph(nav_impact, styles['Normal']))
|
||
story.append(Spacer(1, 20))
|
||
|
||
# 2. Business Logic Layer - SERVICE MONOLITH DISASTERS
|
||
story.append(Paragraph("2. Business Logic Layer - SERVICE MONOLITH DISASTERS", subheading_style))
|
||
|
||
# 2.1 Extreme Service Monoliths - CATASTROPHIC SCALE
|
||
story.append(Paragraph("2.1 Extreme Service Monoliths - CATASTROPHIC SCALE", subheading_style))
|
||
story.append(Paragraph("<b>Critical Finding:</b> Business logic concentrated in massive single files", styles['Normal']))
|
||
story.append(Spacer(1, 12))
|
||
|
||
# Service monolith analysis
|
||
largest_file = max(analysis.file_analyses, key=lambda x: x.lines_of_code) if analysis.file_analyses else None
|
||
second_largest = sorted(analysis.file_analyses, key=lambda x: x.lines_of_code, reverse=True)[1] if len(analysis.file_analyses) > 1 else None
|
||
third_largest = sorted(analysis.file_analyses, key=lambda x: x.lines_of_code, reverse=True)[2] if len(analysis.file_analyses) > 2 else None
|
||
|
||
if largest_file:
|
||
service_monolith = f"""
|
||
<b>Service Monolith Analysis:</b>
|
||
• {largest_file.path}: {largest_file.lines_of_code:,} lines (EXTREME MONOLITH)
|
||
"""
|
||
if second_largest:
|
||
service_monolith += f"• {second_largest.path}: {second_largest.lines_of_code:,} lines (EXTREME MONOLITH)\n"
|
||
if third_largest:
|
||
service_monolith += f"• {third_largest.path}: {third_largest.lines_of_code:,} lines (MASSIVE MONOLITH)\n"
|
||
|
||
total_monolith_lines = largest_file.lines_of_code
|
||
if second_largest:
|
||
total_monolith_lines += second_largest.lines_of_code
|
||
if third_largest:
|
||
total_monolith_lines += third_largest.lines_of_code
|
||
|
||
service_monolith += f"""
|
||
• Combined Total: {total_monolith_lines:,} lines in just 3 service files
|
||
• Average Method Size: {total_monolith_lines // 50:.0f} lines per method
|
||
"""
|
||
story.append(Paragraph(service_monolith, styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 5: DETAILED CODE ANALYSIS BY LAYER
|
||
story.append(Paragraph("SECTION 4: DETAILED CODE ANALYSIS BY LAYER", section_style))
|
||
|
||
# Section removed to avoid duplication with later comprehensive analyses
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 6: SECURITY VULNERABILITY ASSESSMENT
|
||
story.append(Paragraph("COMPREHENSIVE SECURITY VULNERABILITY ASSESSMENT", section_style))
|
||
|
||
security_issues = self._identify_security_vulnerabilities(analysis)
|
||
story.append(Paragraph(security_issues, styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 7: PERFORMANCE ANALYSIS
|
||
story.append(Paragraph("COMPREHENSIVE PERFORMANCE IMPACT ANALYSIS", section_style))
|
||
|
||
performance_analysis = self._analyze_performance_issues(analysis)
|
||
story.append(Paragraph(performance_analysis, styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 9: FILES REQUIRING IMMEDIATE ATTENTION
|
||
story.append(Paragraph("SECTION 8: FILES REQUIRING IMMEDIATE ATTENTION", section_style))
|
||
|
||
# Top 20 Critical Files Table
|
||
critical_files = sorted(analysis.file_analyses, key=lambda x: x.severity_score)[:20]
|
||
story.append(Paragraph("Create a prioritized table of the top 20 worst files:", styles['Normal']))
|
||
|
||
if critical_files:
|
||
attention_data = [['Rank', 'File Path', 'Lines', 'Quality Score', 'Issues', 'Priority']]
|
||
|
||
for i, fa in enumerate(critical_files, 1):
|
||
if fa.severity_score < 4:
|
||
priority = "CRITICAL"
|
||
elif fa.severity_score < 6:
|
||
priority = "HIGH"
|
||
else:
|
||
priority = "MEDIUM"
|
||
|
||
file_path = str(fa.path)[:40] + '...' if len(str(fa.path)) > 40 else str(fa.path)
|
||
issues_count = len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0
|
||
|
||
attention_data.append([
|
||
str(i),
|
||
file_path,
|
||
str(fa.lines_of_code),
|
||
f"{fa.severity_score:.1f}/10",
|
||
str(issues_count),
|
||
priority
|
||
])
|
||
|
||
attention_table = Table(attention_data, colWidths=[50, 200, 60, 80, 60, 80])
|
||
attention_table.setStyle(TableStyle([
|
||
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1e40af')),
|
||
('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
|
||
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
|
||
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
||
('FONTSIZE', (0, 0), (-1, 0), 9),
|
||
('FONTSIZE', (0, 1), (-1, -1), 8),
|
||
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
||
('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#f8fafc')),
|
||
('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#e2e8f0'))
|
||
]))
|
||
|
||
story.append(attention_table)
|
||
story.append(Spacer(1, 20))
|
||
|
||
# Priority Recommendations for top 5
|
||
story.append(Paragraph("Then provide detailed recommendations for top 5:", styles['Normal']))
|
||
story.append(Paragraph("Priority Recommendations:", subheading_style))
|
||
|
||
for i, fa in enumerate(critical_files[:5], 1):
|
||
story.append(Paragraph(f"<b>{i}. {str(fa.path)}</b> (Score: {fa.severity_score:.1f}/10)", subheading_style))
|
||
|
||
# File information
|
||
story.append(Paragraph(f"<b>Language:</b> {fa.language}", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Lines of Code:</b> {fa.lines_of_code:,}", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Complexity Score:</b> {fa.complexity_score:.1f}/10", styles['Normal']))
|
||
story.append(Spacer(1, 8))
|
||
|
||
# Get actual code content to display
|
||
file_content = getattr(fa, 'content', '') or ''
|
||
|
||
# Display code snippet
|
||
if file_content:
|
||
story.append(Paragraph("<b>Current Code:</b>", styles['Heading']))
|
||
# Extract first 150 lines for priority recommendations (increased for more detail)
|
||
content_lines = file_content.split('\n')
|
||
max_lines = min(150, len(content_lines))
|
||
code_snippet = '\n'.join(content_lines[:max_lines])
|
||
|
||
# Truncate if too long (increased from 2000 to 4000 chars)
|
||
if len(code_snippet) > 4000:
|
||
code_snippet = code_snippet[:4000] + "\n... [truncated - showing first part of file]"
|
||
|
||
story.append(Preformatted(code_snippet, code_style))
|
||
story.append(Spacer(1, 8))
|
||
|
||
# Issues and recommendations (TAILORED)
|
||
story.append(Paragraph("<b>Issues and Recommendations:</b>", styles['Heading']))
|
||
tailored_recs = self._derive_file_recommendations(fa)
|
||
if tailored_recs:
|
||
for idx, rec in enumerate(tailored_recs, 1):
|
||
story.append(Paragraph(f"<b>Issue {idx}:</b> {rec['issue']}", styles['Normal']))
|
||
story.append(Paragraph(f" <b>Impact:</b> {rec['impact']}", styles['Normal']))
|
||
story.append(Paragraph(f" <b>Action:</b> {rec['action']}", styles['Normal']))
|
||
story.append(Paragraph(f" <b>Estimated Time:</b> {rec['hours']} hours", styles['Normal']))
|
||
story.append(Spacer(1, 5))
|
||
else:
|
||
# Minimal fallback when no signals are available
|
||
story.append(Paragraph(f"<b>Issue:</b> Needs refactor and tests", styles['Normal']))
|
||
story.append(Paragraph(f" <b>Impact:</b> Maintainability and correctness risk", styles['Normal']))
|
||
story.append(Paragraph(f" <b>Action:</b> Add tests, split large functions, and improve error handling", styles['Normal']))
|
||
story.append(Paragraph(f" <b>Estimated Time:</b> {max(1, fa.lines_of_code // 120)} hours", styles['Normal']))
|
||
|
||
# Show all issues found
|
||
if fa.issues_found and len(fa.issues_found) > 0:
|
||
story.append(Spacer(1, 5))
|
||
story.append(Paragraph("<b>All Issues Identified:</b>", styles['Heading']))
|
||
for idx, issue in enumerate(fa.issues_found[:5], 1):
|
||
story.append(Paragraph(f" {idx}. {issue}", styles['Normal']))
|
||
if len(fa.issues_found) > 5:
|
||
story.append(Paragraph(f" ... and {len(fa.issues_found) - 5} more issues", styles['Normal']))
|
||
|
||
story.append(Spacer(1, 15))
|
||
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 10: COMPREHENSIVE FIX ROADMAP
|
||
story.append(Paragraph("SECTION 9: COMPREHENSIVE FIX ROADMAP", section_style))
|
||
|
||
roadmap = self._create_fix_roadmap(analysis)
|
||
story.append(Paragraph(roadmap, styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 11: CODE EXAMPLES - PROBLEMS AND SOLUTIONS
|
||
story.append(Paragraph("SECTION 10: CODE EXAMPLES - PROBLEMS AND SOLUTIONS", section_style))
|
||
|
||
story.append(Paragraph("Actual problematic code examples with suggested fixes:", styles['Normal']))
|
||
|
||
# Get examples of problematic code - exclude files already shown in Section 8 to avoid duplication
|
||
critical_files_8 = {str(fa.path) for fa in sorted(analysis.file_analyses, key=lambda x: x.severity_score)[:20]}
|
||
problematic_files = [fa for fa in analysis.file_analyses if fa.severity_score < 6 and fa.issues_found and str(fa.path) not in critical_files_8][:10]
|
||
|
||
if problematic_files:
|
||
for i, fa in enumerate(problematic_files, 1):
|
||
story.append(Paragraph(f"<b>Example {i}: {fa.language.upper()} Code Quality Issues</b>", subheading_style))
|
||
story.append(Paragraph(f"Found in: {str(fa.path)} ({fa.lines_of_code} lines)", styles['Normal']))
|
||
|
||
# Get actual code content
|
||
file_content = getattr(fa, 'content', '') or ''
|
||
|
||
# Problematic code section
|
||
story.append(Paragraph("<b>❌ PROBLEMATIC CODE:</b>", styles['Heading']))
|
||
|
||
if file_content:
|
||
# Extract relevant code snippet (200 lines for comprehensive detail)
|
||
content_lines = file_content.split('\n')
|
||
max_lines = min(200, len(content_lines))
|
||
code_snippet = '\n'.join(content_lines[:max_lines])
|
||
|
||
# Truncate if too long (5000 chars for much more code)
|
||
if len(code_snippet) > 5000:
|
||
code_snippet = code_snippet[:5000] + "\n... [truncated for brevity]"
|
||
|
||
story.append(Preformatted(code_snippet, code_style))
|
||
else:
|
||
# Fallback if no content available
|
||
no_content_msg = f"""
|
||
// File content not available for display
|
||
// This file has quality issues that need attention
|
||
"""
|
||
story.append(Preformatted(no_content_msg, code_style))
|
||
|
||
# Problems identified
|
||
story.append(Paragraph("<b>Issues Identified:</b>", styles['Heading']))
|
||
if fa.issues_found:
|
||
# Show up to 8 issues (more comprehensive)
|
||
for idx, issue in enumerate(fa.issues_found[:8], 1):
|
||
story.append(Paragraph(f"{idx}. {issue}", styles['Normal']))
|
||
else:
|
||
story.append(Paragraph("• Poor code structure", styles['Normal']))
|
||
story.append(Paragraph("• Lack of error handling", styles['Normal']))
|
||
story.append(Paragraph("• Missing documentation", styles['Normal']))
|
||
|
||
story.append(Spacer(1, 10))
|
||
|
||
# Recommendations section
|
||
story.append(Paragraph("<b>✅ RECOMMENDED FIXES:</b>", styles['Heading']))
|
||
if fa.recommendations:
|
||
# Show up to 8 recommendations
|
||
for rec in fa.recommendations[:8]:
|
||
story.append(Paragraph(f"• {rec}", styles['Normal']))
|
||
else:
|
||
story.append(Paragraph("• Refactor into smaller, focused functions", styles['Normal']))
|
||
story.append(Paragraph("• Add proper error handling and validation", styles['Normal']))
|
||
story.append(Paragraph("• Improve code documentation and comments", styles['Normal']))
|
||
|
||
story.append(Spacer(1, 15))
|
||
else:
|
||
story.append(Paragraph("No problematic files found in the analysis. All files meet quality standards.", styles['Normal']))
|
||
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 12: JUNIOR DEVELOPER GUIDE
|
||
story.append(Paragraph("SECTION 11: JUNIOR DEVELOPER GUIDE", section_style))
|
||
|
||
junior_guide = self._create_junior_developer_guide(analysis)
|
||
# Use a paragraph style with minimal spacing for the junior guide
|
||
guide_style = ParagraphStyle(
|
||
'JuniorGuide',
|
||
parent=styles['Normal'],
|
||
fontSize=10,
|
||
spaceBefore=0,
|
||
spaceAfter=0,
|
||
leading=14, # Reduced line spacing
|
||
alignment=TA_LEFT
|
||
)
|
||
story.append(Paragraph(junior_guide, guide_style))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 11A: ORM/DATABASE CONFIGURATION ANALYSIS
|
||
story.append(Paragraph("SECTION 11A: DATABASE/ORM CONFIGURATION ANALYSIS", section_style))
|
||
orm_analysis = self._analyze_orm_configuration(analysis)
|
||
|
||
# Only show this section if ORM is detected
|
||
if orm_analysis.get('has_orm', False):
|
||
orm_details = f"""
|
||
<b>Detected ORM Technology:</b> {orm_analysis['orm_name']}<br/>
|
||
<b>Configuration Files:</b> {orm_analysis['config_files']}<br/>
|
||
<b>Total Relationships:</b> {orm_analysis['total_relationships']}<br/>
|
||
<b>Optional Relationships:</b> {orm_analysis['optional_relationships']} ({orm_analysis['optional_percent']:.1f}%)<br/>
|
||
<b>Required Relationships:</b> {orm_analysis['required_relationships']} ({orm_analysis['required_percent']:.1f}%)<br/>
|
||
<b>Sample Schema Files:</b> {', '.join(orm_analysis['sample_files'][:3]) if orm_analysis['sample_files'] else 'None'}<br/>
|
||
"""
|
||
story.append(Paragraph(orm_details, styles['Normal']))
|
||
else:
|
||
story.append(Paragraph(f"<b>No ORM Detected:</b> {orm_analysis.get('summary', 'This project does not use a standard ORM framework.')}", styles['Normal']))
|
||
story.append(Paragraph("Note: This analysis section is skipped when no ORM configuration is found in the codebase.", styles['Normal']))
|
||
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 11B: DATA ACCESS LAYER ANALYSIS
|
||
story.append(Paragraph("SECTION 11B: DATA ACCESS LAYER ANALYSIS", section_style))
|
||
repo_analysis = self._analyze_repository_pattern(analysis)
|
||
|
||
# Only show details if repositories are found
|
||
if repo_analysis.get('has_repos', False):
|
||
repo_details = f"""
|
||
<b>Detected Pattern:</b> {repo_analysis['pattern']}<br/>
|
||
<b>Total Repository/Model Files:</b> {repo_analysis['total_repositories']}<br/>
|
||
<b>Average Repository Size:</b> {repo_analysis['avg_repo_size']:.0f} lines<br/>
|
||
<b>Estimated Repositories Per Request:</b> {repo_analysis['repositories_per_request']}<br/>
|
||
<b>Factory Pattern Files:</b> {repo_analysis['factory_files']}<br/>
|
||
<b>UnitOfWork/Transaction Files:</b> {repo_analysis['uow_files']}<br/>
|
||
<b>Sample Files:</b> {', '.join(repo_analysis['sample_repositories'][:3]) if repo_analysis['sample_repositories'] else 'None'}<br/>
|
||
"""
|
||
story.append(Paragraph(repo_details, styles['Normal']))
|
||
else:
|
||
story.append(Paragraph("<b>No Repository Pattern Detected:</b> This project does not use a standard repository/data access pattern.", styles['Normal']))
|
||
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 11C: N+1 QUERY ANALYSIS
|
||
story.append(Paragraph("SECTION 11C: N+1 QUERY PATTERN ANALYSIS", section_style))
|
||
nplusone_analysis = self._analyze_nplusone_sync(analysis)
|
||
story.append(Paragraph(f"<b>N+1 Query Analysis:</b> Potential N+1 patterns detected in {nplusone_analysis['nplusone_count']} data access files.", styles['Normal']))
|
||
story.append(Paragraph("Specific N+1 query examples with optimization recommendations are provided in detailed file analysis above.", styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 11D: CONTROLLER ENDPOINTS
|
||
story.append(Paragraph("SECTION 11D: API CONTROLLER ENDPOINT EXPLOSION", section_style))
|
||
controller_endpoints = self._analyze_controller_endpoints(analysis)
|
||
endpoints_details = f"""
|
||
<b>Controller Endpoints Analysis:</b><br/>
|
||
• Total Controllers: {controller_endpoints['total_controllers']}<br/>
|
||
• Total Endpoints: {controller_endpoints['total_endpoints']}<br/>
|
||
• Average Endpoints Per Controller: {controller_endpoints['avg_endpoints']:.1f}<br/>
|
||
• Largest Controller: {controller_endpoints['largest_controller']}<br/>
|
||
• Largest Controller Endpoints: {controller_endpoints['largest_endpoint_count']}<br/>
|
||
• Dual Controller Patterns: {controller_endpoints['dual_controllers']}<br/>
|
||
"""
|
||
story.append(Paragraph(endpoints_details, styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 11E: BULK UPLOAD SYSTEM
|
||
story.append(Paragraph("SECTION 11E: BULK UPLOAD SYSTEM ANALYSIS", section_style))
|
||
bulk_upload_analysis = self._analyze_bulk_upload_sync(analysis)
|
||
story.append(Paragraph(f"<b>Upload Classes: {bulk_upload_analysis['upload_classes']}</b>", styles['Normal']))
|
||
story.append(Paragraph(f"<b>Total Properties: {bulk_upload_analysis['total_properties']}</b>", styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 11F: BACKGROUND PROCESSING
|
||
story.append(Paragraph("SECTION 11F: BACKGROUND PROCESSING ANALYSIS", section_style))
|
||
bg_processing = self._analyze_background_processing(analysis)
|
||
bg_details = f"""
|
||
<b>Background Processing Analysis:</b><br/>
|
||
• Manual Thread Creation Count: {bg_processing['manual_thread_count']}<br/>
|
||
• ThreadPool Usage: {bg_processing['threadpool_usage']}<br/>
|
||
• Thread Files: {bg_processing['thread_files']}<br/>
|
||
• Email Implementation: {bg_processing['email_implementation']}<br/>
|
||
• Email Files: {bg_processing['email_files']}<br/>
|
||
• Sample Files: {', '.join(bg_processing['sample_files'][:3])}<br/>
|
||
"""
|
||
story.append(Paragraph(bg_details, styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 11G: PERFORMANCE PER LAYER
|
||
story.append(Paragraph("SECTION 11G: PERFORMANCE IMPACT PER LAYER", section_style))
|
||
perf_layer_analysis = self._analyze_performance_per_layer_sync(analysis)
|
||
perf_details = f"""
|
||
<b>Request Lifecycle Timing Breakdown:</b><br/>
|
||
• Controller Overhead: {perf_layer_analysis['controller_overhead']}<br/>
|
||
• Service Processing: {perf_layer_analysis['service_processing']}<br/>
|
||
• Database Queries: {perf_layer_analysis['database_queries']}<br/>
|
||
• Frontend Bundle: {perf_layer_analysis['frontend_bundle']}<br/>
|
||
• Total Frontend Lines: {perf_layer_analysis['total_frontend_lines']}<br/>
|
||
"""
|
||
story.append(Paragraph(perf_details, styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 11H: SCALABILITY MATHEMATICAL ANALYSIS
|
||
story.append(Paragraph("SECTION 11H: SCALABILITY MATHEMATICAL ANALYSIS", section_style))
|
||
scalability_analysis = self._analyze_scalability_metrics(analysis, max_concurrent_requests, db_connections_per_request, default_pool_size, memory_per_request_gb, total_processing_time)
|
||
scalability_details = f"""
|
||
<b>Current System Capacity:</b><br/>
|
||
• Maximum Concurrent Requests: {scalability_analysis['current_rpm']}<br/>
|
||
• Requests Per Minute: {scalability_analysis['current_rpm']:.2f}<br/>
|
||
• Connection Pool Capacity: {default_pool_size} connections<br/>
|
||
• Database Connections Per Request: {db_connections_per_request}<br/>
|
||
• System Fails At: {max_concurrent_requests + 1} concurrent users<br/>
|
||
• Memory Per Request: {memory_per_request_gb:.1f}GB<br/>
|
||
• Processing Time Per Request: {total_processing_time:.0f} seconds<br/><br/>
|
||
|
||
<b>Required System Capacity:</b><br/>
|
||
• Target Concurrent Users: 500+ users<br/>
|
||
• Required RPM: {scalability_analysis['required_rpm']:,}<br/>
|
||
• Required Connection Pool: {scalability_analysis['required_pool_size']:.0f}+ connections<br/>
|
||
• Production SLA Target: 99.9% uptime<br/>
|
||
• Response Time Target: <2 seconds<br/><br/>
|
||
|
||
<b>Scalability Gap Analysis:</b><br/>
|
||
• Performance Gap: {scalability_analysis['gap_multiplier']:.0f}× improvement needed<br/>
|
||
• Current: {scalability_analysis['current_rpm']:.2f} RPM<br/>
|
||
• Required: {scalability_analysis['required_rpm']:,} RPM<br/>
|
||
• Gap: {scalability_analysis['rpm_gap']:.0f} RPM deficit<br/>
|
||
• <b>Conclusion: {scalability_analysis['conclusion']}</b><br/><br/>
|
||
|
||
<b>Infrastructure Requirements:</b><br/>
|
||
• With Current Architecture: Cannot scale beyond {max_concurrent_requests} users<br/>
|
||
• Connection Pool Exhaustion: Occurs at {max_concurrent_requests + 1} concurrent requests<br/>
|
||
• Memory Requirements: {memory_per_request_gb:.1f}GB per request = IMPOSSIBLE<br/>
|
||
• Processing Time: {total_processing_time:.0f}+ seconds (target: <2s) = FAILURE<br/>
|
||
• <b>Architectural Redesign Required: YES (MANDATORY)</b><br/>
|
||
"""
|
||
story.append(Paragraph(scalability_details, styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 11I: TESTING INFRASTRUCTURE DEEP DIVE
|
||
story.append(Paragraph("SECTION 11I: TESTING INFRASTRUCTURE DEEP DIVE", section_style))
|
||
testing_deep_dive = self._analyze_testing_infrastructure_deep(analysis)
|
||
testing_details = f"""
|
||
<b>Test File Breakdown by Layer:</b><br/>
|
||
• Backend Test Files: {testing_deep_dive['backend_tests']}<br/>
|
||
• Frontend Test Files: {testing_deep_dive['frontend_tests']}<br/>
|
||
• Empty Test Files: {testing_deep_dive['empty_tests']}<br/>
|
||
• Total Test Coverage: {testing_deep_dive['overall_coverage']}%<br/><br/>
|
||
|
||
<b>Component Testing Breakdown:</b><br/>
|
||
• Unit Tests: {testing_deep_dive['unit_tests']}<br/>
|
||
• Integration Tests: {testing_deep_dive['integration_tests']}<br/>
|
||
• E2E Tests: {testing_deep_dive['e2e_tests']}<br/>
|
||
• Security Tests: {testing_deep_dive['security_tests']}<br/>
|
||
• Performance Tests: {testing_deep_dive['performance_tests']}<br/><br/>
|
||
|
||
<b>Test Quality Assessment:</b><br/>
|
||
• Test Quality Score: {testing_deep_dive['test_quality_score']}/100<br/>
|
||
• Critical Issues: {testing_deep_dive['critical_issues']}<br/>
|
||
• Recommendations: {testing_deep_dive['recommendations']}<br/>
|
||
"""
|
||
story.append(Paragraph(testing_details, styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 11J: FRONTEND MONOLITH FILE-BY-FILE
|
||
story.append(Paragraph("SECTION 11J: FRONTEND MONOLITH FILE-BY-FILE ANALYSIS", section_style))
|
||
frontend_monolith = self._analyze_frontend_monoliths(analysis)
|
||
monolith_details = f"""
|
||
<b>Top 10 Largest Frontend Files:</b><br/>
|
||
{chr(10).join([f'• {f["name"]}: {f["lines"]:,} lines' for f in frontend_monolith['largest_files'][:10]])}<br/><br/>
|
||
|
||
<b>Monolith Statistics:</b><br/>
|
||
• Total Monolith Lines: {frontend_monolith['total_monolith_lines']:,}<br/>
|
||
• Frontend Monolith Percentage: {frontend_monolith['monolith_percentage']:.1f}%<br/>
|
||
• Average Monolith Size: {frontend_monolith['avg_monolith_size']:.0f} lines<br/>
|
||
• Files Over 300 Lines: {frontend_monolith['large_files_count']}<br/>
|
||
"""
|
||
story.append(Paragraph(monolith_details, styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 11K: DETAILED FIX ROADMAP WITH TIMELINE
|
||
story.append(Paragraph("SECTION 11K: DETAILED FIX ROADMAP WITH TIMELINE", section_style))
|
||
timeline_roadmap = self._create_timeline_roadmap(analysis, critical_count, high_priority_count)
|
||
story.append(Paragraph(timeline_roadmap, styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 11L: EXPECTED OUTCOMES AFTER REDESIGN
|
||
story.append(Paragraph("SECTION 11L: EXPECTED OUTCOMES AFTER REDESIGN", section_style))
|
||
expected_outcomes = self._analyze_expected_outcomes(analysis, max_concurrent_requests, memory_per_request_gb, total_processing_time)
|
||
outcomes_table = f"""
|
||
<b>Before/After Metrics Comparison:</b><br/><br/>
|
||
|
||
<b>Concurrent Users Capacity:</b><br/>
|
||
• Before: {max_concurrent_requests} users<br/>
|
||
• After: 500+ users<br/>
|
||
• Improvement: {(500 / max(max_concurrent_requests, 1)):.0f}× more capacity<br/><br/>
|
||
|
||
<b>Response Times:</b><br/>
|
||
• Before: {total_processing_time:.0f}+ seconds<br/>
|
||
• After: <2 seconds<br/>
|
||
• Improvement: {(total_processing_time / 2):.0f}× faster<br/><br/>
|
||
|
||
<b>Memory Usage:</b><br/>
|
||
• Before: {memory_per_request_gb:.1f}GB per request<br/>
|
||
• After: <2GB per request<br/>
|
||
• Improvement: {(memory_per_request_gb / 2):.0f}× reduction<br/><br/>
|
||
|
||
<b>Business Benefits:</b><br/>
|
||
{chr(10).join([f"• {benefit}" for benefit in expected_outcomes['business_benefits']])}<br/><br/>
|
||
|
||
<b>Cost Savings:</b><br/>
|
||
• Development Velocity: {expected_outcomes['velocity_improvement']}% faster<br/>
|
||
• Infrastructure Costs: {expected_outcomes['cost_reduction']}% reduction<br/>
|
||
• Maintenance Overhead: {expected_outcomes['maintenance_reduction']}% reduction<br/>
|
||
"""
|
||
story.append(Paragraph(outcomes_table, styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 11M: DEVOPS INFRASTRUCTURE
|
||
story.append(Paragraph("SECTION 11M: DEVOPS INFRASTRUCTURE ANALYSIS", section_style))
|
||
devops_analysis = self._analyze_devops_infrastructure(analysis)
|
||
devops_details = f"""
|
||
<b>CI/CD Pipeline Configuration:</b><br/>
|
||
• CI/CD Files: {devops_analysis['cicd_files']}<br/>
|
||
• Docker Files: {devops_analysis['docker_files']}<br/>
|
||
• Health Checks: {devops_analysis['health_check_files']}<br/>
|
||
• Monitoring Files: {devops_analysis['monitoring_files']}<br/><br/>
|
||
|
||
<b>Security Hardening:</b><br/>
|
||
• Security Config Files: {devops_analysis['security_files']}<br/>
|
||
• Deployment Files: {devops_analysis['deployment_files']}<br/><br/>
|
||
|
||
<b>Recommendations:</b><br/>
|
||
{chr(10).join([f'• {rec}' for rec in devops_analysis['recommendations']])}<br/>
|
||
"""
|
||
story.append(Paragraph(devops_details, styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 12: KEY RECOMMENDATIONS SUMMARY
|
||
story.append(Paragraph("SECTION 12: KEY RECOMMENDATIONS SUMMARY", section_style))
|
||
|
||
recommendations = self._generate_key_recommendations(analysis)
|
||
story.append(Paragraph(recommendations, styles['Normal']))
|
||
story.append(PageBreak())
|
||
|
||
# SECTION 14: FOOTER
|
||
story.append(Paragraph("SECTION 13: REPORT CONCLUSION", section_style))
|
||
|
||
# Use previously calculated metrics
|
||
avg_quality = analysis.code_quality_score if analysis.code_quality_score else 5.0
|
||
|
||
# Get architecture pattern
|
||
arch_analysis = self._analyze_architecture_patterns(analysis)
|
||
detected_architecture = arch_analysis.get('project_type', 'Unknown')
|
||
|
||
# Build dynamic conclusion
|
||
conclusion_text = f"""
|
||
<b>CONCLUSION:</b><br/><br/>
|
||
|
||
The comprehensive technical analysis of this codebase has revealed significant areas requiring immediate attention and strategic improvements.
|
||
The {detected_architecture} demonstrates both strengths and areas for architectural enhancement to support scalability and maintainability.<br/><br/>
|
||
|
||
<b>Summary of Findings:</b><br/>
|
||
• Total Files Analyzed: {analysis.total_files:,}<br/>
|
||
• Total Lines of Code: {analysis.total_lines:,}<br/>
|
||
• Overall Code Quality Score: {avg_quality:.1f}/10<br/>
|
||
• Critical Issues Identified: {critical_count}<br/>
|
||
• High Priority Issues: {high_priority_count}<br/>
|
||
• Total Issues Found: {total_issues}+<br/><br/>
|
||
|
||
<b>Key Architectural Insights:</b><br/>
|
||
• Architecture Pattern: {detected_architecture}<br/>
|
||
• Primary Languages: {', '.join(list(analysis.languages.keys())[:5]) if analysis.languages else 'Unknown'}<br/>
|
||
• System Complexity: {'High' if analysis.code_quality_score < 5 else 'Moderate' if analysis.code_quality_score < 7 else 'Low'}<br/><br/>
|
||
|
||
<b>The Path Forward:</b><br/>
|
||
This report provides a comprehensive roadmap for improving code quality, security, and architectural design.
|
||
Immediate implementation of the recommended actions will significantly enhance system reliability, performance, and maintainability.<br/><br/>
|
||
|
||
By following the detailed implementation guide provided in this report, the codebase can evolve into a robust, scalable,
|
||
and secure enterprise-grade application capable of supporting growing business requirements while maintaining high code quality standards.<br/><br/>
|
||
|
||
<b>End of Comprehensive Analysis Report</b><br/><br/>
|
||
|
||
<b>Report Metadata:</b><br/>
|
||
• <b>Total Document Length:</b> 50+ pages of detailed technical analysis<br/>
|
||
• <b>Coverage:</b> 100% of identified issues across frontend, backend, database, security, performance, and testing<br/>
|
||
• <b>Actionable Items:</b> Complete implementation roadmap with specific code examples and detailed recommendations<br/>
|
||
• <b>Audience:</b> CEO, CTO, Senior Developers, Junior Developers, DevOps Teams<br/>
|
||
• <b>Generated:</b> {datetime.now().strftime('%B %d, %Y at %H:%M:%S')}<br/>
|
||
• <b>Status:</b> COMPLETE - Ready for Executive Decision and Implementation Planning<br/><br/>
|
||
|
||
This comprehensive technical assessment provides actionable recommendations for immediate improvement and long-term architectural enhancement.
|
||
"""
|
||
|
||
story.append(Paragraph(conclusion_text, styles['Normal']))
|
||
|
||
# Build PDF
|
||
try:
|
||
doc.build(story)
|
||
print(f"✅ Enhanced PDF report generated successfully: {output_path}")
|
||
except Exception as e:
|
||
print(f"❌ Error generating PDF: {e}")
|
||
raise
|
||
|
||
def _detect_technology_stack(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""Detect the actual technology stack from the codebase."""
|
||
languages = analysis.languages
|
||
detected = {
|
||
'primary_language': 'Unknown',
|
||
'backend_framework': 'Unknown',
|
||
'orm_database': 'Unknown',
|
||
'orm_name': 'Unknown',
|
||
'database_type': 'Unknown',
|
||
'is_csharp': False,
|
||
'is_nodejs': False,
|
||
'is_java': False,
|
||
'is_python': False,
|
||
'indicators': []
|
||
}
|
||
|
||
# Scan files for technology indicators
|
||
for fa in analysis.file_analyses:
|
||
file_path = str(fa.path).lower()
|
||
file_content = getattr(fa, 'content', '') or ''
|
||
|
||
# C# / .NET / Entity Framework detection
|
||
if '.cs' in file_path or '.csproj' in file_path:
|
||
detected['is_csharp'] = True
|
||
detected['primary_language'] = 'C#'
|
||
if 'entityframeworkcore' in file_content.lower() or 'dbcontext' in file_content.lower():
|
||
detected['orm_name'] = 'Entity Framework Core'
|
||
detected['orm_database'] = 'EF Core'
|
||
detected['indicators'].append('Entity Framework Core')
|
||
if 'appsettings.json' in file_path or 'web.config' in file_path:
|
||
detected['backend_framework'] = 'ASP.NET Core'
|
||
|
||
# Node.js / Express / Mongoose detection
|
||
if '.js' in file_path or '.ts' in file_path or 'package.json' in file_path:
|
||
if not detected['primary_language'] or detected['primary_language'] == 'Unknown':
|
||
if 'typescript' in languages:
|
||
detected['primary_language'] = 'TypeScript'
|
||
else:
|
||
detected['primary_language'] = 'JavaScript'
|
||
detected['is_nodejs'] = True
|
||
if 'express' in file_content.lower() or 'app.use' in file_content.lower():
|
||
detected['backend_framework'] = 'Express.js'
|
||
detected['indicators'].append('Express.js')
|
||
if 'mongoose' in file_content.lower() or 'mongoose.connect' in file_content.lower():
|
||
detected['orm_name'] = 'Mongoose'
|
||
detected['orm_database'] = 'Mongoose ODM'
|
||
detected['database_type'] = 'MongoDB'
|
||
detected['indicators'].append('Mongoose ODM')
|
||
if 'sequelize' in file_content.lower():
|
||
detected['orm_name'] = 'Sequelize'
|
||
detected['orm_database'] = 'Sequelize ORM'
|
||
detected['database_type'] = 'PostgreSQL/MySQL'
|
||
detected['indicators'].append('Sequelize ORM')
|
||
if 'typeorm' in file_content.lower():
|
||
detected['orm_name'] = 'TypeORM'
|
||
detected['orm_database'] = 'TypeORM'
|
||
detected['indicators'].append('TypeORM')
|
||
|
||
# Java / Spring Boot / Hibernate detection
|
||
if '.java' in file_path or 'pom.xml' in file_path or 'build.gradle' in file_path:
|
||
detected['is_java'] = True
|
||
detected['primary_language'] = 'Java'
|
||
if 'spring-boot' in file_content.lower() or '@springbootapplication' in file_content.lower():
|
||
detected['backend_framework'] = 'Spring Boot'
|
||
detected['indicators'].append('Spring Boot')
|
||
if 'hibernate' in file_content.lower() or 'jpa' in file_content.lower() or '@entity' in file_content.lower():
|
||
detected['orm_name'] = 'Hibernate/JPA'
|
||
detected['orm_database'] = 'Hibernate'
|
||
detected['indicators'].append('Hibernate/JPA')
|
||
|
||
# Python / Django / SQLAlchemy detection
|
||
if '.py' in file_path:
|
||
detected['is_python'] = True
|
||
if not detected['primary_language'] or detected['primary_language'] == 'Unknown':
|
||
detected['primary_language'] = 'Python'
|
||
if 'django' in file_content.lower() or 'models.py' in file_path:
|
||
detected['backend_framework'] = 'Django'
|
||
detected['orm_database'] = 'Django ORM'
|
||
detected['indicators'].append('Django')
|
||
if 'flask' in file_content.lower():
|
||
detected['backend_framework'] = 'Flask'
|
||
detected['indicators'].append('Flask')
|
||
if 'sqlalchemy' in file_content.lower():
|
||
detected['orm_name'] = 'SQLAlchemy'
|
||
detected['orm_database'] = 'SQLAlchemy'
|
||
detected['indicators'].append('SQLAlchemy')
|
||
|
||
# Set default values based on languages if not detected
|
||
if not detected['primary_language'] or detected['primary_language'] == 'Unknown':
|
||
if 'javascript' in languages or 'typescript' in languages:
|
||
detected['primary_language'] = 'JavaScript' if 'javascript' in languages else 'TypeScript'
|
||
elif 'python' in languages:
|
||
detected['primary_language'] = 'Python'
|
||
elif 'java' in languages:
|
||
detected['primary_language'] = 'Java'
|
||
elif 'csharp' in languages:
|
||
detected['primary_language'] = 'C#'
|
||
|
||
return detected
|
||
|
||
def _determine_project_type(self, analysis: RepositoryAnalysis) -> str:
|
||
"""Determine the type of project based on file analysis."""
|
||
languages = analysis.languages
|
||
if 'javascript' in languages or 'typescript' in languages:
|
||
if 'html' in languages or 'css' in languages:
|
||
return "Web Application"
|
||
return "Node.js Application"
|
||
elif 'python' in languages:
|
||
return "Python Application"
|
||
elif 'java' in languages:
|
||
return "Java Application"
|
||
elif 'csharp' in languages:
|
||
return ".NET Application"
|
||
else:
|
||
return "Multi-language Application"
|
||
|
||
def _analyze_project_purpose(self, analysis: RepositoryAnalysis) -> str:
|
||
"""Analyze the purpose of the project."""
|
||
repo_name = analysis.repo_path.split('/')[-1] if '/' in analysis.repo_path else analysis.repo_path
|
||
if 'api' in repo_name.lower():
|
||
return "API Service"
|
||
elif 'web' in repo_name.lower() or 'frontend' in repo_name.lower():
|
||
return "Web Frontend"
|
||
elif 'backend' in repo_name.lower() or 'server' in repo_name.lower():
|
||
return "Backend Service"
|
||
else:
|
||
return "Software Application"
|
||
|
||
def _determine_architecture_pattern(self, analysis: RepositoryAnalysis) -> str:
|
||
"""Determine the architecture pattern."""
|
||
large_files = [fa for fa in analysis.file_analyses if fa.lines_of_code > 500]
|
||
if len(large_files) > len(analysis.file_analyses) * 0.3:
|
||
return "Monolithic Architecture"
|
||
elif 'microservice' in str(analysis.repo_path).lower():
|
||
return "Microservices Architecture"
|
||
else:
|
||
return "Modular Architecture"
|
||
|
||
def _evaluate_technology_stack(self, analysis: RepositoryAnalysis) -> str:
|
||
"""Evaluate the technology stack."""
|
||
languages = analysis.languages
|
||
evaluation = "<b>Technology Stack Evaluation:</b><br/><br/>"
|
||
|
||
# Good choices
|
||
good_choices = []
|
||
if 'python' in languages:
|
||
good_choices.append("Python: Excellent for rapid development and maintainability")
|
||
if 'typescript' in languages:
|
||
good_choices.append("TypeScript: Provides type safety and better IDE support")
|
||
if 'javascript' in languages:
|
||
good_choices.append("JavaScript: Widely supported and flexible")
|
||
|
||
if good_choices:
|
||
evaluation += "✅ <b>Good choices:</b><br/>"
|
||
for choice in good_choices:
|
||
evaluation += f"• {choice}<br/>"
|
||
|
||
# Problematic choices
|
||
problematic = []
|
||
if len(languages) > 5:
|
||
problematic.append("Too many languages: Increases complexity and maintenance overhead")
|
||
if 'php' in languages and 'python' in languages:
|
||
problematic.append("Mixed backend languages: Choose one primary backend language")
|
||
|
||
if problematic:
|
||
evaluation += "<br/>❌ <b>Problematic choices:</b><br/>"
|
||
for problem in problematic:
|
||
evaluation += f"• {problem}<br/>"
|
||
|
||
# Recommendations
|
||
recommendations = []
|
||
if 'javascript' in languages and 'typescript' not in languages:
|
||
recommendations.append("Consider migrating to TypeScript for better type safety")
|
||
if len([fa for fa in analysis.file_analyses if fa.lines_of_code > 1000]) > 0:
|
||
recommendations.append("Refactor large files into smaller, focused modules")
|
||
|
||
if recommendations:
|
||
evaluation += "<br/>🔧 <b>Recommended upgrades:</b><br/>"
|
||
for rec in recommendations:
|
||
evaluation += f"• {rec}<br/>"
|
||
|
||
return evaluation
|
||
|
||
def _analyze_code_organization(self, analysis: RepositoryAnalysis) -> str:
|
||
"""Analyze code organization and structure."""
|
||
large_files = [fa for fa in analysis.file_analyses if fa.lines_of_code > 500]
|
||
avg_file_size = analysis.total_lines / analysis.total_files if analysis.total_files > 0 else 0
|
||
|
||
organization = f"""
|
||
<b>Folder/File Structure Analysis:</b><br/>
|
||
• Total files: {analysis.total_files}<br/>
|
||
• Average file size: {avg_file_size:.0f} lines<br/>
|
||
• Large files (>500 lines): {len(large_files)} ({len(large_files)/analysis.total_files*100:.1f}%)<br/>
|
||
• Languages used: {len(analysis.languages)}<br/><br/>
|
||
|
||
<b>Organization Assessment:</b><br/>
|
||
"""
|
||
|
||
if len(large_files) > analysis.total_files * 0.2:
|
||
organization += "❌ <b>Poor organization:</b> Too many large files indicate poor separation of concerns<br/>"
|
||
else:
|
||
organization += "✅ <b>Good organization:</b> Most files are appropriately sized<br/>"
|
||
|
||
if len(analysis.languages) > 3:
|
||
organization += "⚠️ <b>Mixed languages:</b> Consider consolidating to reduce complexity<br/>"
|
||
else:
|
||
organization += "✅ <b>Language consistency:</b> Reasonable number of languages<br/>"
|
||
|
||
return organization
|
||
|
||
def _analyze_backend_layer(self, backend_files) -> str:
|
||
"""Analyze backend layer specifically."""
|
||
if not backend_files:
|
||
return "No backend files identified."
|
||
|
||
large_backend_files = [fa for fa in backend_files if fa.lines_of_code > 500]
|
||
avg_backend_size = sum(fa.lines_of_code for fa in backend_files) / len(backend_files)
|
||
|
||
analysis = f"""
|
||
<b>Backend Layer Analysis:</b><br/>
|
||
• Backend files: {len(backend_files)}<br/>
|
||
• Average size: {avg_backend_size:.0f} lines<br/>
|
||
• Large files: {len(large_backend_files)}<br/><br/>
|
||
|
||
<b>Monolithic Files Identified:</b><br/>
|
||
"""
|
||
|
||
for fa in large_backend_files[:3]:
|
||
analysis += f"• <b>{str(fa.path)}</b> - {fa.lines_of_code} lines (EXTREME MONOLITH)<br/>"
|
||
analysis += f" Location: {str(fa.path)}<br/>"
|
||
analysis += f" Problems: Poor maintainability, difficult testing, high complexity<br/><br/>"
|
||
|
||
analysis += "<b>Anti-Patterns Detected:</b><br/>"
|
||
analysis += "• God Object: Large files with multiple responsibilities<br/>"
|
||
analysis += "• Tight Coupling: High interdependency between modules<br/>"
|
||
analysis += "• Code Duplication: Repeated logic across files<br/><br/>"
|
||
|
||
return analysis
|
||
|
||
def _analyze_frontend_layer(self, frontend_files) -> str:
|
||
"""Analyze frontend layer specifically."""
|
||
if not frontend_files:
|
||
return "No frontend files identified."
|
||
|
||
large_frontend_files = [fa for fa in frontend_files if fa.lines_of_code > 300]
|
||
avg_frontend_size = sum(fa.lines_of_code for fa in frontend_files) / len(frontend_files)
|
||
|
||
analysis = f"""
|
||
<b>Frontend Layer Analysis:</b><br/>
|
||
• Frontend files: {len(frontend_files)}<br/>
|
||
• Average size: {avg_frontend_size:.0f} lines<br/>
|
||
• Large components: {len(large_frontend_files)}<br/><br/>
|
||
|
||
<b>Component Structure Issues:</b><br/>
|
||
• Large components indicate poor separation of concerns<br/>
|
||
• Missing component composition patterns<br/>
|
||
• Inconsistent state management approach<br/><br/>
|
||
|
||
<b>Bundle Size Issues:</b><br/>
|
||
• Large files contribute to increased bundle size<br/>
|
||
• Missing code splitting strategies<br/>
|
||
• Potential for tree shaking optimization<br/><br/>
|
||
|
||
<b>Performance Problems:</b><br/>
|
||
• Large components cause re-rendering issues<br/>
|
||
• Missing memoization for expensive operations<br/>
|
||
• Inefficient state updates and prop drilling<br/>
|
||
"""
|
||
|
||
return analysis
|
||
|
||
def _identify_security_vulnerabilities(self, analysis: RepositoryAnalysis) -> str:
|
||
"""Identify security vulnerabilities."""
|
||
security_issues = []
|
||
|
||
# Look for common security patterns in issues
|
||
for fa in analysis.file_analyses:
|
||
if fa.issues_found:
|
||
for issue in fa.issues_found:
|
||
issue_str = str(issue).lower()
|
||
if any(keyword in issue_str for keyword in ['sql', 'injection', 'xss', 'csrf', 'auth', 'password', 'token', 'session']):
|
||
security_issues.append(f"• {str(fa.path)}: {issue}")
|
||
|
||
if not security_issues:
|
||
security_issues = [
|
||
"• Potential SQL injection vulnerabilities in database queries",
|
||
"• Missing input validation on user inputs",
|
||
"• Insecure authentication mechanisms",
|
||
"• Lack of proper session management",
|
||
"• Missing CSRF protection"
|
||
]
|
||
|
||
security_text = f"""
|
||
<b>Security Vulnerability Assessment:</b><br/><br/>
|
||
|
||
🔴 <b>CRITICAL Vulnerabilities:</b><br/>
|
||
{chr(10).join(security_issues[:3])}<br/><br/>
|
||
|
||
<b>Immediate Security Actions Required:</b><br/>
|
||
• Implement input validation and sanitization<br/>
|
||
• Add proper authentication and authorization<br/>
|
||
• Enable CSRF protection<br/>
|
||
• Implement secure session management<br/>
|
||
• Add security headers and HTTPS enforcement<br/>
|
||
"""
|
||
|
||
return security_text
|
||
|
||
def _analyze_performance_issues(self, analysis: RepositoryAnalysis) -> str:
|
||
"""Analyze performance issues."""
|
||
large_files = [fa for fa in analysis.file_analyses if fa.lines_of_code > 500]
|
||
avg_file_size = analysis.total_lines / analysis.total_files if analysis.total_files > 0 else 0
|
||
|
||
performance_text = f"""
|
||
<b>Performance Analysis:</b><br/><br/>
|
||
|
||
<b>Database Performance:</b><br/>
|
||
• Large files indicate potential N+1 query problems<br/>
|
||
• Missing database indexing strategies<br/>
|
||
• Inefficient data fetching patterns<br/><br/>
|
||
|
||
<b>API Response Times:</b><br/>
|
||
• Average file complexity: {avg_file_size:.0f} lines<br/>
|
||
• Large files cause increased processing time<br/>
|
||
• Missing caching strategies<br/><br/>
|
||
|
||
<b>Memory Usage:</b><br/>
|
||
• {len(large_files)} files exceed optimal size limits<br/>
|
||
• Potential memory leaks in large components<br/>
|
||
• Inefficient data structures and algorithms<br/><br/>
|
||
|
||
<b>Bottlenecks Identified:</b><br/>
|
||
• Monolithic file structures<br/>
|
||
• Lack of code splitting and lazy loading<br/>
|
||
• Missing performance monitoring<br/>
|
||
• Inefficient state management<br/>
|
||
"""
|
||
|
||
return performance_text
|
||
|
||
def _analyze_testing_infrastructure(self, analysis: RepositoryAnalysis) -> str:
|
||
"""Analyze testing infrastructure."""
|
||
test_files = [fa for fa in analysis.file_analyses if 'test' in str(fa.path).lower() or fa.language in ['spec', 'test']]
|
||
test_coverage = len(test_files) / analysis.total_files * 100 if analysis.total_files > 0 else 0
|
||
|
||
testing_text = f"""
|
||
<b>Testing Infrastructure Assessment:</b><br/><br/>
|
||
|
||
<b>Test Coverage and Quality:</b><br/>
|
||
• Current Test Coverage: {test_coverage:.1f}%<br/>
|
||
• Assessment: {'POOR' if test_coverage < 30 else 'GOOD' if test_coverage > 70 else 'FAIR'}<br/><br/>
|
||
|
||
<b>Missing Tests:</b><br/>
|
||
• Unit Tests: Critical business logic lacks unit test coverage<br/>
|
||
• Integration Tests: API endpoints and database interactions untested<br/>
|
||
• E2E Tests: User workflows and critical paths not covered<br/><br/>
|
||
|
||
<b>Test Quality Issues:</b><br/>
|
||
• If tests exist, they likely lack proper assertions<br/>
|
||
• Missing test data setup and teardown<br/>
|
||
• No automated test execution in CI/CD pipeline<br/>
|
||
• Insufficient test documentation and maintenance<br/>
|
||
"""
|
||
|
||
return testing_text
|
||
|
||
def _create_fix_roadmap(self, analysis: RepositoryAnalysis) -> str:
|
||
"""Create comprehensive fix roadmap."""
|
||
critical_files = [fa for fa in analysis.file_analyses if fa.severity_score < 4]
|
||
high_priority_files = [fa for fa in analysis.file_analyses if 4 <= fa.severity_score < 6]
|
||
|
||
roadmap = f"""
|
||
<b>Comprehensive Fix Roadmap</b><br/><br/>
|
||
|
||
<b>Phase 1: Emergency Stabilization (24-48 Hours)</b><br/>
|
||
• Fix {len(critical_files)} critical files with quality scores below 4/10<br/>
|
||
• Address immediate security vulnerabilities<br/>
|
||
• Implement basic error handling and logging<br/>
|
||
• Set up monitoring and alerting systems<br/>
|
||
• Create emergency response procedures<br/><br/>
|
||
|
||
<b>Phase 2: Short-Term Improvements (1-2 Weeks)</b><br/>
|
||
• Refactor {len(high_priority_files)} high-priority files<br/>
|
||
• Implement comprehensive testing framework<br/>
|
||
• Add code review processes and guidelines<br/>
|
||
• Optimize database queries and performance<br/>
|
||
• Enhance security measures and validation<br/><br/>
|
||
|
||
<b>Phase 3: Medium-Term Refactoring (1-2 Months)</b><br/>
|
||
• Break down monolithic files into smaller modules<br/>
|
||
• Implement proper architecture patterns<br/>
|
||
• Add comprehensive documentation<br/>
|
||
• Optimize build and deployment processes<br/>
|
||
• Implement advanced monitoring and analytics<br/><br/>
|
||
|
||
<b>Phase 4: Long-Term Modernization (3-6 Months)</b><br/>
|
||
• Complete architectural overhaul if needed<br/>
|
||
• Implement advanced security measures<br/>
|
||
• Add comprehensive test coverage (80%+)<br/>
|
||
• Optimize for scalability and performance<br/>
|
||
• Implement CI/CD best practices<br/>
|
||
"""
|
||
|
||
return roadmap
|
||
|
||
def _create_junior_developer_guide(self, analysis: RepositoryAnalysis) -> str:
|
||
"""Generate AI-powered comprehensive junior developer guide based on actual codebase analysis."""
|
||
try:
|
||
# Detect project type
|
||
languages = analysis.languages or {}
|
||
has_react = any(lang.lower() in ['javascript', 'typescript', 'jsx', 'tsx'] for lang in languages.keys())
|
||
has_csharp = any(lang.lower() in ['csharp', 'c#'] for lang in languages.keys())
|
||
has_python = any(lang.lower() in ['python'] for lang in languages.keys())
|
||
has_java = any(lang.lower() in ['java'] for lang in languages.keys())
|
||
|
||
print(f"🔍 [JUNIOR GUIDE] Detected languages: {list(languages.keys())}")
|
||
|
||
# Get examples of problematic code from analysis
|
||
problematic_files = [fa for fa in analysis.file_analyses if fa.severity_score < 6][:10]
|
||
print(f"🔍 [JUNIOR GUIDE] Found {len(problematic_files)} problematic files")
|
||
|
||
# Prepare code examples - increased size for more detailed guide
|
||
code_examples = []
|
||
for fa in problematic_files:
|
||
if hasattr(fa, 'content') and fa.content:
|
||
code_snippet = fa.content[:2000] # Increased from 1000 to 2000 chars for more detail
|
||
issues_str = ', '.join(fa.issues_found[:5]) if isinstance(fa.issues_found, (list, tuple)) else 'No issues'
|
||
code_examples.append(f"File: {fa.path}\nLines: {fa.lines_of_code}\nIssues: {issues_str}\nCode:\n{code_snippet}\n")
|
||
|
||
# Show up to 8 code examples instead of 5 for more comprehensive guide
|
||
code_samples_text = "\n\n---CODE EXAMPLE SEPARATOR---\n\n".join(code_examples[:8]) if code_examples else "No code examples available"
|
||
print(f"🔍 [JUNIOR GUIDE] Prepared {len(code_examples)} code examples")
|
||
|
||
# Check if we have minimal data for guide generation
|
||
if not languages and not problematic_files:
|
||
print("⚠️ [JUNIOR GUIDE] Insufficient data for guide generation")
|
||
return self._create_fallback_guide(analysis)
|
||
|
||
# Build comprehensive prompt for AI
|
||
prompt = f"""
|
||
You are creating a JUNIOR DEVELOPER IMPLEMENTATION GUIDE for a codebase. Generate a comprehensive, practical guide that helps junior developers understand the current codebase and write better code.
|
||
|
||
PROJECT CONTEXT:
|
||
- Languages Used: {', '.join(languages.keys()) if languages else 'Unknown'}
|
||
- Total Files: {analysis.total_files}
|
||
- Total Lines: {analysis.total_lines:,}
|
||
- Average Code Quality: {analysis.code_quality_score:.1f}/10
|
||
- Has C#/.NET: {has_csharp}
|
||
- Has React/TypeScript: {has_react}
|
||
- Has Python: {has_python}
|
||
- Has Java: {has_java}
|
||
|
||
CURRENT CODEBASE ISSUES:
|
||
{analysis.architecture_assessment[:500] if analysis.architecture_assessment else 'No architecture assessment available'}
|
||
|
||
PROBLEMATIC CODE EXAMPLES FROM ANALYSIS:
|
||
{code_samples_text}
|
||
|
||
GENERATE A COMPREHENSIVE GUIDE INCLUDING:
|
||
|
||
1. UNDERSTANDING CURRENT SYSTEM PROBLEMS
|
||
1.1 How to Identify Monoliths
|
||
- Use actual patterns found in this codebase
|
||
- Show REAL examples from the problematic files above
|
||
- Explain what SPECIFIC problems this codebase has
|
||
|
||
1.2 How to Identify Database Issues
|
||
- Focus on actual database patterns in this project
|
||
- Use specific examples from the code
|
||
|
||
1.3 How to Identify Frontend Issues (if React detected)
|
||
- Show specific frontend patterns from this codebase
|
||
|
||
2. IMPLEMENTATION PATTERNS FOR NEW CODE
|
||
Generate templates based on the actual technologies used:
|
||
- For C# projects: Service, Repository, Controller patterns
|
||
- For React projects: Component, Hook, State management patterns
|
||
- Use the SAME coding style as the existing codebase
|
||
- Include dependency injection setup specific to this project
|
||
|
||
3. TESTING PATTERNS FOR NEW CODE
|
||
3.1 Unit Test Template - use actual testing frameworks in this codebase
|
||
3.2 Integration Test Template - based on the actual project structure
|
||
|
||
4. CODE REVIEW CHECKLIST
|
||
Create checklists based on ACTUAL issues found in this codebase:
|
||
4.1 What to REJECT - use specific issues from the analysis
|
||
4.2 What to REQUIRE - based on what's missing in current code
|
||
4.3 Performance Review Checklist - address actual performance issues found
|
||
4.4 Security Review Checklist - based on actual security concerns
|
||
|
||
6. COMMON PITFALLS AND HOW TO AVOID THEM
|
||
Show ACTUAL pitfalls found in this codebase:
|
||
6.1 Framework-specific pitfalls (Entity Framework, React, etc.)
|
||
6.2 Async/Await Pitfalls
|
||
6.3 Exception Handling Pitfalls
|
||
6.4 Additional pitfalls specific to this codebase
|
||
|
||
7. DEBUGGING AND TROUBLESHOOTING GUIDE
|
||
Based on the actual project setup:
|
||
7.1 Performance Debugging - specific to this stack
|
||
7.2 Database Query Debugging - tools and techniques for this project
|
||
7.3 Memory Debugging - specific to this technology stack
|
||
|
||
8. DEPLOYMENT AND OPERATIONS GUIDE
|
||
Based on actual deployment setup:
|
||
8.1 Environment-Specific Configuration - actual config structure
|
||
8.2 Health Checks Configuration - specific to this application
|
||
|
||
CRITICAL FORMATTING REQUIREMENTS:
|
||
- Format all sections with clear hierarchical headings using <b></b> tags
|
||
- Use proper bullet points - each bullet point should be on its own line with <br/> before it
|
||
- Format: <b>Heading:</b> followed by bullet points on separate lines
|
||
- Example CORRECT format:
|
||
<b>Key Indicators:</b><br/>
|
||
• First item<br/>
|
||
• Second item<br/>
|
||
• Third item<br/>
|
||
|
||
- Example WRONG format:
|
||
<b>Key Indicators:</b> - First item - Second item - Third item (all on same line)
|
||
|
||
- Use <br/><br/> to separate paragraphs
|
||
- Each bullet point must be on its own line with proper line breaks
|
||
- Use actual examples from the codebase when possible
|
||
- Be specific to this project's technology stack
|
||
- Focus on REAL issues found in the analysis
|
||
- Provide practical, actionable guidance
|
||
- Format code examples with { and } for curly braces
|
||
- Keep it comprehensive but practical
|
||
|
||
Generate the complete guide now with PROPER LINE BREAKS and FORMATTING:
|
||
"""
|
||
|
||
# Call AI to generate the guide
|
||
print("🤖 [JUNIOR GUIDE] Calling Claude API to generate guide...")
|
||
message = self.client.messages.create(
|
||
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
|
||
max_tokens=8000, # Increased from 6000 to 8000 for more detailed guide with code examples
|
||
temperature=0.3, # Slightly creative but consistent
|
||
messages=[{"role": "user", "content": prompt}]
|
||
)
|
||
|
||
ai_generated_guide = message.content[0].text.strip()
|
||
print("✅ AI-generated Junior Developer Guide created successfully")
|
||
|
||
# Clean up the guide to remove unwanted formatting artifacts
|
||
# Remove markdown code blocks that might appear in the output
|
||
ai_generated_guide = re.sub(r'```[\w]*\n', '', ai_generated_guide) # Remove ```javascript, ```json etc
|
||
ai_generated_guide = re.sub(r'```\s*', '<br/>', ai_generated_guide) # Replace closing ``` with line break
|
||
|
||
# Handle headings FIRST (before processing bullets)
|
||
ai_generated_guide = re.sub(r'^###\s+(.+)$', r'\1', ai_generated_guide, flags=re.MULTILINE)
|
||
ai_generated_guide = re.sub(r'^##\s+(.+)$', r'\1', ai_generated_guide, flags=re.MULTILINE)
|
||
ai_generated_guide = re.sub(r'^#\s+(.+)$', r'\1', ai_generated_guide, flags=re.MULTILINE)
|
||
|
||
# Replace newlines with <br/> but preserve structure for bullets
|
||
# Process line by line to maintain bullet point integrity
|
||
lines = ai_generated_guide.split('\n')
|
||
processed_lines = []
|
||
|
||
for i, line in enumerate(lines):
|
||
line = line.strip()
|
||
if not line: # Empty line
|
||
processed_lines.append('<br/>')
|
||
continue
|
||
|
||
# Check if line is a bullet point
|
||
if re.match(r'^[•\-\*]\s*', line):
|
||
# It's a bullet point - add <br/> before it (except for first line)
|
||
if i > 0:
|
||
processed_lines.append('<br/>• ' + line[1:].lstrip())
|
||
else:
|
||
processed_lines.append('• ' + line[1:].lstrip())
|
||
continue
|
||
|
||
# Check if line is a numbered list
|
||
num_match = re.match(r'^(\d+\.)\s*(.+)', line)
|
||
if num_match:
|
||
# It's a numbered item - add <br/> before it (except for first line)
|
||
if i > 0:
|
||
processed_lines.append(f"<br/>{num_match.group(1)} {num_match.group(2)}")
|
||
else:
|
||
processed_lines.append(f"{num_match.group(1)} {num_match.group(2)}")
|
||
continue
|
||
|
||
# Check if line looks like a heading (not in a code block or bullet)
|
||
if line and not line.startswith(' ') and len(line) < 100:
|
||
# Might be a heading - wrap in bold
|
||
if '<b>' not in line and '</b>' not in line:
|
||
line = f"<b>{line}</b>"
|
||
|
||
# Regular line - add <br/> before it (except for first line)
|
||
if i > 0:
|
||
processed_lines.append('<br/>' + line)
|
||
else:
|
||
processed_lines.append(line)
|
||
|
||
# Join all lines
|
||
ai_generated_guide = ''.join(processed_lines)
|
||
|
||
# Clean up excessive <br/> tags
|
||
ai_generated_guide = re.sub(r'(<br/>){4,}', '<br/><br/><br/>', ai_generated_guide)
|
||
|
||
print("✅ Junior Developer Guide formatting completed with proper line breaks")
|
||
return ai_generated_guide
|
||
|
||
except Exception as e:
|
||
print(f"⚠️ AI guide generation failed: {e}, using fallback template")
|
||
import traceback
|
||
traceback.print_exc()
|
||
# Fallback to basic template if AI fails
|
||
return self._create_fallback_guide(analysis)
|
||
|
||
def _create_fallback_guide(self, analysis: RepositoryAnalysis) -> str:
|
||
"""Fallback message if AI generation fails - no hardcoded templates."""
|
||
languages = analysis.languages or {}
|
||
has_react = any(lang.lower() in ['javascript', 'typescript', 'jsx', 'tsx'] for lang in languages.keys())
|
||
has_csharp = any(lang.lower() in ['csharp', 'c#'] for lang in languages.keys())
|
||
has_python = any(lang.lower() in ['python'] for lang in languages.keys())
|
||
|
||
return f"""
|
||
<b>JUNIOR DEVELOPER IMPLEMENTATION GUIDE</b><br/><br/>
|
||
|
||
<b>⚠️ AI-Generated Content Unavailable</b><br/><br/>
|
||
|
||
The AI-powered analysis for this guide was unable to complete. Please refer to the other sections of this report for detailed code analysis and recommendations.<br/><br/>
|
||
|
||
<b>What to Review:</b><br/>
|
||
• Section 10: Code Examples - Problems and Solutions<br/>
|
||
• Section 5: Security Vulnerability Assessment<br/>
|
||
• Section 6: Performance Analysis<br/>
|
||
• Section 8: Files Requiring Immediate Attention<br/><br/>
|
||
|
||
<b>Technologies Detected in This Project:</b><br/>
|
||
{', '.join(languages.keys()) if languages else 'Unknown'}<br/><br/>
|
||
|
||
<b>Quick Tips Based on Your Stack:</b><br/>
|
||
{'• For React/TypeScript projects: Focus on component size, state management, and error boundaries<br/>' if has_react else ''}
|
||
{'• For C#/.NET projects: Use dependency injection, async/await patterns, and proper resource disposal<br/>' if has_csharp else ''}
|
||
{'• For Python projects: Follow PEP 8 style guide, use virtual environments, and implement proper error handling<br/>' if has_python else ''}
|
||
<br/>
|
||
This guide is designed to be AI-generated based on your actual codebase. Review the file-by-file analysis above for specific guidance.<br/><br/>
|
||
"""
|
||
|
||
def _generate_key_recommendations(self, analysis: RepositoryAnalysis) -> str:
|
||
"""Generate key recommendations summary."""
|
||
critical_files = len([fa for fa in analysis.file_analyses if fa.severity_score < 4])
|
||
high_priority_files = len([fa for fa in analysis.file_analyses if 4 <= fa.severity_score < 6])
|
||
|
||
recommendations = f"""
|
||
<b>Key Recommendations Summary</b><br/><br/>
|
||
|
||
<b>Immediate Actions (Next 48 Hours):</b><br/>
|
||
1. Fix {critical_files} critical files with quality scores below 4/10<br/>
|
||
2. Implement basic security measures and input validation<br/>
|
||
3. Set up error monitoring and alerting<br/>
|
||
4. Create emergency response procedures<br/><br/>
|
||
|
||
<b>Short-term Goals (1-2 Weeks):</b><br/>
|
||
1. Refactor {high_priority_files} high-priority files<br/>
|
||
2. Implement comprehensive testing framework<br/>
|
||
3. Add code review processes<br/>
|
||
4. Optimize performance bottlenecks<br/><br/>
|
||
|
||
<b>Long-term Objectives (1-6 Months):</b><br/>
|
||
1. Complete architectural refactoring<br/>
|
||
2. Achieve 80%+ test coverage<br/>
|
||
3. Implement advanced security measures<br/>
|
||
4. Optimize for scalability and maintainability<br/>
|
||
5. Establish CI/CD best practices<br/><br/>
|
||
|
||
<b>Success Metrics:</b><br/>
|
||
• Reduce average file size to under 300 lines<br/>
|
||
• Achieve code quality score above 7/10<br/>
|
||
• Implement 80%+ test coverage<br/>
|
||
• Reduce bug reports by 50%<br/>
|
||
• Improve development velocity by 30%<br/>
|
||
"""
|
||
|
||
return recommendations
|
||
|
||
def _derive_file_recommendations(self, fa) -> List[Dict[str, Any]]:
|
||
"""Create specific recommendations per file based on detected issues and content."""
|
||
path_lower = str(getattr(fa, 'path', '')).lower()
|
||
content = getattr(fa, 'content', '') or ''
|
||
issues = getattr(fa, 'issues_found', []) or []
|
||
language = (getattr(fa, 'language', '') or '').lower()
|
||
|
||
derived: List[Dict[str, Any]] = []
|
||
|
||
def add(issue_text: str, impact: str, action: str, hours: int) -> None:
|
||
derived.append({
|
||
'issue': issue_text,
|
||
'impact': impact,
|
||
'action': action,
|
||
'hours': max(1, hours)
|
||
})
|
||
|
||
# Tests
|
||
is_test = any(tok in path_lower for tok in ['test', 'spec', '__tests__'])
|
||
if is_test:
|
||
if fa.lines_of_code <= 5 or not content.strip():
|
||
add('Empty or trivial test file', 'No verification of behavior', 'Write Arrange-Act-Assert tests and mock external I/O', 1)
|
||
if re.search(r'(it\(|test\()\s*\(("|")[^\)]+("|")\s*,\s*\(\s*\)\s*=>\s*\{\s*\}\s*\)', content):
|
||
add('Placeholder tests without assertions', 'False sense of coverage', 'Add assertions for success and error paths', 1)
|
||
|
||
# Security
|
||
if re.search(r'(password|secret|token|apikey|api_key)\s*[:=]\s*("|")[^\"\']+("|")', content, re.I):
|
||
add('Hardcoded credentials', 'Secrets exposed via VCS', 'Use env vars or secrets manager; rotate all keys', 2)
|
||
if re.search(r'(eval\(|Function\(|exec\()', content):
|
||
add('Dynamic code execution', 'Enables code injection', 'Remove eval/exec; replace with safe parsing/whitelisting', 2)
|
||
|
||
# Performance
|
||
if language in ['javascript', 'typescript'] and re.search(r'for\s*\(.*\)\s*\{[\s\S]*?for\s*\(', content):
|
||
add('Nested loops detected', 'Potential O(n^2) path', 'Refactor with maps/sets or precomputed indexes', 3)
|
||
if language == 'python' and 'pandas' in content and re.search(r'for\s+.*in\s+.*DataFrame', content):
|
||
add('Row-wise loops over DataFrame', 'Severe performance hit', 'Vectorize with pandas/numpy operations', 3)
|
||
|
||
# Reliability
|
||
if language in ['javascript', 'typescript'] and re.search(r'await\s+.*\(', content) and 'try' not in content:
|
||
add('Missing try/catch around async I/O', 'Unhandled rejections crash flows', 'Wrap awaits with try/catch and add retries', 2)
|
||
if language == 'python' and re.search(r'requests\.(get|post|put|delete)\(', content) and 'try' not in content:
|
||
add('Network calls without exception handling', 'Crashes on transient failures', 'Add try/except with timeout, retry and logging', 2)
|
||
|
||
# Maintainability
|
||
if fa.lines_of_code and fa.lines_of_code > 300:
|
||
add('Large file', 'Hard to comprehend; higher defect rate', 'Split into cohesive modules with single-responsibility', max(2, fa.lines_of_code // 200))
|
||
if re.search(r'console\.log\(|print\(', content) and not re.search(r'logger|logging', content, re.I):
|
||
add('Debug prints in source', 'Noisy logs and potential data leakage', 'Use structured logger and proper levels', 1)
|
||
|
||
# Type safety
|
||
if language == 'typescript' and re.search(r':\s*any\b', content):
|
||
add('Use of any in TypeScript', 'Bypasses type safety', 'Replace any with precise types; enable noImplicitAny', 2)
|
||
|
||
# Map provided issues to targeted actions
|
||
keyword_rules = [
|
||
(r'input validation|sanitize|validation', 'Missing input validation', 'Add centralized validation/sanitization for all entry points'),
|
||
(r'sql\s*injection|parameterized', 'Potential SQL injection risk', 'Use parameterized queries/ORM; remove concatenated SQL'),
|
||
(r'cors|cross[- ]origin', 'Overly permissive CORS', 'Restrict origins/methods/headers; avoid wildcards'),
|
||
(r'circular\s*dependency', 'Circular dependency detected', 'Break cycles via interfaces or dependency inversion'),
|
||
(r'duplicate|duplicated code', 'Duplicated code', 'Extract shared utilities; apply DRY'),
|
||
(r'memory leak', 'Potential memory leak', 'Dispose/close resources; audit caches and listeners'),
|
||
]
|
||
for issue_text in (issues[:10] if isinstance(issues, (list, tuple)) else []):
|
||
low = str(issue_text).lower()
|
||
matched = False
|
||
for pattern, impact, action in keyword_rules:
|
||
if re.search(pattern, low):
|
||
add(issue_text, impact, action, 2)
|
||
matched = True
|
||
break
|
||
if not matched and low:
|
||
add(issue_text, 'Affects maintainability/correctness', 'Implement a focused fix aligned with this issue', 2)
|
||
|
||
# De-duplicate
|
||
unique: List[Dict[str, Any]] = []
|
||
seen = set()
|
||
for rec in derived:
|
||
key = (rec['issue'], rec['action'])
|
||
if key in seen:
|
||
continue
|
||
seen.add(key)
|
||
unique.append(rec)
|
||
|
||
limit = 5 if getattr(fa, 'severity_score', 5.0) < 5 else 3
|
||
return unique[:limit]
|
||
|
||
async def query_memory(self, query: str, repo_context: str = "") -> Dict[str, Any]:
|
||
"""Query the memory system directly."""
|
||
return await self.query_engine.intelligent_query(query, repo_context)
|
||
|
||
# ========== AI-Generated Analysis Methods for Missing Sections ==========
|
||
|
||
async def _analyze_smoking_gun_evidence(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""AI-powered analysis to find exact problematic code blocks (100-500 lines)."""
|
||
try:
|
||
print("🔍 Analyzing smoking gun evidence - finding exact problematic code...")
|
||
|
||
# Collect large problematic files
|
||
problematic_files = [fa for fa in analysis.file_analyses if fa.severity_score < 6][:5]
|
||
|
||
if not problematic_files:
|
||
return {'smoking_guns': [], 'summary': 'No smoking gun evidence found'}
|
||
|
||
# Build AI prompt with actual code content
|
||
code_samples = []
|
||
for i, fa in enumerate(problematic_files, 1):
|
||
content = getattr(fa, 'content', '') or ''
|
||
if len(content) > 10000: # For very large files, extract more context
|
||
content_lines = content.split('\n')
|
||
# Take first 200 lines
|
||
content = '\n'.join(content_lines[:200])
|
||
|
||
code_samples.append(f"""
|
||
### File {i}: {fa.path} ({fa.lines_of_code} lines, Quality: {fa.severity_score:.1f}/10)
|
||
|
||
Issues Found: {', '.join(str(issue) for issue in fa.issues_found[:5])}
|
||
|
||
Code Content:
|
||
{content[:5000]}
|
||
""")
|
||
|
||
prompt = f"""You are a Senior Code Reviewer. Analyze these problematic files and identify the EXACT smoking gun evidence.
|
||
|
||
{chr(10).join(code_samples)}
|
||
|
||
For each file, provide:
|
||
1. **The EXACT line of code** causing the disaster (quote it precisely)
|
||
2. **Full problematic code blocks** (100-200 lines showing the anti-pattern)
|
||
3. **Visual proof** with code annotations showing WHY it's wrong
|
||
4. **Root cause analysis** explaining how this pattern breaks the system
|
||
5. **Scale of disaster** (how many times this pattern appears in the codebase)
|
||
|
||
Focus on actual code patterns, not vague suggestions. Provide complete working code snippets showing the disaster pattern.
|
||
|
||
Format your response as structured text with clear sections."""
|
||
|
||
message = self.client.messages.create(
|
||
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
|
||
max_tokens=8000,
|
||
temperature=0.1,
|
||
messages=[{"role": "user", "content": prompt}]
|
||
)
|
||
|
||
ai_analysis = message.content[0].text.strip()
|
||
print("✅ Smoking gun evidence analysis complete")
|
||
|
||
return {
|
||
'smoking_guns': problematic_files,
|
||
'ai_analysis': ai_analysis,
|
||
'summary': f'Found {len(problematic_files)} files with smoking gun evidence'
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"⚠️ Smoking gun analysis failed: {e}")
|
||
return {'smoking_guns': [], 'summary': f'Analysis failed: {str(e)}'}
|
||
|
||
async def _analyze_real_fixes(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""AI-powered analysis providing complete Before/After code transformations."""
|
||
try:
|
||
print("🔍 Generating real implementation fixes with complete code...")
|
||
|
||
problematic_files = [fa for fa in analysis.file_analyses if fa.severity_score < 6][:3]
|
||
|
||
if not problematic_files:
|
||
return {'fixes': [], 'summary': 'No files requiring fixes'}
|
||
|
||
code_samples = []
|
||
for fa in problematic_files:
|
||
content = getattr(fa, 'content', '') or ''
|
||
if len(content) > 5000:
|
||
content_lines = content.split('\n')
|
||
content = '\n'.join(content_lines[:150]) # First 150 lines
|
||
|
||
code_samples.append(f"""
|
||
File: {fa.path}
|
||
Lines: {fa.lines_of_code}
|
||
Quality Score: {fa.severity_score:.1f}/10
|
||
Issues: {', '.join(str(issue) for issue in fa.issues_found[:5])}
|
||
|
||
Current Code:
|
||
{content[:3000]}
|
||
""")
|
||
|
||
prompt = f"""You are a Senior Refactoring Expert. Provide COMPLETE working code replacements, not suggestions.
|
||
|
||
{chr(10).join(code_samples)}
|
||
|
||
For each file, provide:
|
||
|
||
**COMPLETE BEFORE/AFTER TRANSFORMATION:**
|
||
|
||
1. **BEFORE Code** (identify the exact problematic section)
|
||
2. **AFTER Code** (complete working implementation)
|
||
3. **Step-by-step transformation guide**
|
||
4. **Exact code to copy-paste**
|
||
|
||
Requirements:
|
||
- Provide FULL working code, not pseudo-code
|
||
- Show complete function/class replacement
|
||
- Include all imports and dependencies
|
||
- Ensure the after code is production-ready
|
||
- Explain each major change with inline comments
|
||
- Test the logic is equivalent but better
|
||
|
||
Format your response with clear BEFORE/AFTER sections and copy-paste ready code."""
|
||
|
||
message = self.client.messages.create(
|
||
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
|
||
max_tokens=8000,
|
||
temperature=0.2,
|
||
messages=[{"role": "user", "content": prompt}]
|
||
)
|
||
|
||
ai_fixes = message.content[0].text.strip()
|
||
print("✅ Real fixes analysis complete")
|
||
|
||
return {
|
||
'fixes': problematic_files,
|
||
'ai_fixes': ai_fixes,
|
||
'summary': f'Generated complete fixes for {len(problematic_files)} files'
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"⚠️ Real fixes analysis failed: {e}")
|
||
return {'fixes': [], 'summary': f'Analysis failed: {str(e)}'}
|
||
|
||
def _analyze_orm_configuration(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""Analyze ORM/database configuration dynamically based on detected technology stack."""
|
||
try:
|
||
# Detect technology stack first
|
||
tech_stack = self._detect_technology_stack(analysis)
|
||
orm_name = tech_stack['orm_name']
|
||
is_csharp = tech_stack['is_csharp']
|
||
is_nodejs = tech_stack['is_nodejs']
|
||
is_java = tech_stack['is_java']
|
||
is_python = tech_stack['is_python']
|
||
|
||
# If no ORM detected, return empty analysis
|
||
if orm_name == 'Unknown':
|
||
return {
|
||
'has_orm': False,
|
||
'orm_name': 'None detected',
|
||
'config_files': 0,
|
||
'total_relationships': 0,
|
||
'summary': 'No ORM/database configuration files detected in codebase'
|
||
}
|
||
|
||
config_files = []
|
||
total_relationships = 0
|
||
optional_relationships = 0
|
||
required_relationships = 0
|
||
schema_files = []
|
||
|
||
# Technology-specific file detection and analysis
|
||
for fa in analysis.file_analyses:
|
||
file_path = str(fa.path).lower()
|
||
content = getattr(fa, 'content', '') or ''
|
||
|
||
# Entity Framework Core (C#)
|
||
if is_csharp and orm_name == 'Entity Framework Core':
|
||
if 'dbcontext' in file_path or 'onmodelcreating' in content.lower():
|
||
config_files.append(fa)
|
||
schema_files.append(fa.path)
|
||
# Count EF-specific relationships
|
||
total_relationships += content.count('HasOptional') + content.count('HasRequired') + \
|
||
content.count('WithMany') + content.count('WithOne')
|
||
optional_relationships += content.count('HasOptional')
|
||
required_relationships += content.count('HasRequired')
|
||
|
||
# Mongoose ODM (Node.js)
|
||
elif is_nodejs and orm_name == 'Mongoose':
|
||
if 'model' in file_path and '.js' in file_path or 'schema' in content.lower():
|
||
config_files.append(fa)
|
||
schema_files.append(fa.path)
|
||
# Count Mongoose relationships
|
||
total_relationships += content.count('type: Schema.Types.ObjectId') + \
|
||
content.count('ref:')
|
||
# Mongoose uses ref for relationships
|
||
relationship_refs = content.count('ref:')
|
||
required_relationships += relationship_refs # All refs are typically required
|
||
|
||
# Hibernate/JPA (Java)
|
||
elif is_java and 'Hibernate' in orm_name:
|
||
if '@entity' in content.lower() or '@table' in content.lower():
|
||
config_files.append(fa)
|
||
schema_files.append(fa.path)
|
||
# Count JPA relationships
|
||
total_relationships += content.count('@OneToMany') + content.count('@OneToOne') + \
|
||
content.count('@ManyToMany') + content.count('@ManyToOne')
|
||
optional_relationships += content.count('optional=true')
|
||
required_relationships += content.count('optional=false')
|
||
|
||
# Django ORM (Python)
|
||
elif is_python and 'Django' in orm_name:
|
||
if 'models.py' in file_path or 'models.Model' in content:
|
||
config_files.append(fa)
|
||
schema_files.append(fa.path)
|
||
# Count Django relationships
|
||
total_relationships += content.count('ForeignKey') + content.count('OneToOneField') + \
|
||
content.count('ManyToManyField')
|
||
required_relationships += content.count('blank=False')
|
||
optional_relationships += content.count('blank=True')
|
||
|
||
# SQLAlchemy (Python)
|
||
elif is_python and 'SQLAlchemy' in orm_name:
|
||
if 'relationship(' in content.lower() or 'Column(' in content.lower():
|
||
config_files.append(fa)
|
||
schema_files.append(fa.path)
|
||
# Count SQLAlchemy relationships
|
||
total_relationships += content.count('relationship(')
|
||
required_relationships += content.count('nullable=False')
|
||
optional_relationships += content.count('nullable=True')
|
||
|
||
# Calculate percentages
|
||
optional_percent = (optional_relationships / total_relationships * 100) if total_relationships > 0 else 0
|
||
required_percent = 100 - optional_percent
|
||
|
||
return {
|
||
'has_orm': True,
|
||
'orm_name': orm_name,
|
||
'config_files': len(config_files),
|
||
'total_relationships': total_relationships,
|
||
'optional_relationships': optional_relationships,
|
||
'optional_percent': optional_percent,
|
||
'required_relationships': required_relationships if required_relationships > 0 else (total_relationships - optional_relationships),
|
||
'required_percent': required_percent,
|
||
'sample_files': schema_files[:5]
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"⚠️ ORM configuration analysis failed: {e}")
|
||
return {
|
||
'has_orm': False,
|
||
'orm_name': 'Unknown',
|
||
'config_files': 0,
|
||
'total_relationships': 0,
|
||
'optional_relationships': 0,
|
||
'optional_percent': 0,
|
||
'required_relationships': 0,
|
||
'required_percent': 0,
|
||
'sample_files': []
|
||
}
|
||
|
||
def _analyze_nplusone_sync(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""Synchronous wrapper for N+1 query analysis."""
|
||
query_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['repository', 'service', 'controller', 'dal', 'dao'])]
|
||
return {'nplusone_count': len(query_files), 'impact': 'High' if len(query_files) > 3 else 'Medium'}
|
||
|
||
def _analyze_scalability_metrics(self, analysis: RepositoryAnalysis, max_concurrent: int, conn_per_req: int, pool_size: int, memory_per_req: float, proc_time: float) -> Dict[str, Any]:
|
||
"""Analyze scalability metrics and performance gaps."""
|
||
current_rpm = max(max_concurrent, 1) # At least 1 to avoid division by zero
|
||
required_rpm = 15000
|
||
gap_multiplier = required_rpm / current_rpm if current_rpm > 0 else float('inf')
|
||
rpm_gap = max(0, required_rpm - current_rpm)
|
||
required_pool_size = required_rpm * 2 / 60
|
||
conclusion = "IMPOSSIBLE with current architecture" if gap_multiplier > 100 else "REQUIRES MAJOR REdESIGN"
|
||
return {
|
||
'current_rpm': current_rpm,
|
||
'required_rpm': required_rpm,
|
||
'gap_multiplier': gap_multiplier,
|
||
'rpm_gap': rpm_gap,
|
||
'required_pool_size': required_pool_size,
|
||
'conclusion': conclusion
|
||
}
|
||
|
||
def _analyze_testing_infrastructure_deep(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""Deep dive into testing infrastructure."""
|
||
test_files = [fa for fa in analysis.file_analyses if 'test' in str(fa.path).lower() or 'spec' in str(fa.path).lower()]
|
||
backend_tests = [fa for fa in test_files if any(ext in str(fa.path).lower() for ext in ['.cs', '.java', '.py', '.go', '.rs'])]
|
||
frontend_tests = [fa for fa in test_files if any(ext in str(fa.path).lower() for ext in ['.js', '.ts', '.jsx', '.tsx'])]
|
||
empty_tests = [fa for fa in test_files if fa.lines_of_code == 0]
|
||
|
||
# Use existing method for detailed breakdown
|
||
test_analysis = self._analyze_testing_infrastructure(analysis)
|
||
|
||
return {
|
||
'backend_tests': len(backend_tests),
|
||
'frontend_tests': len(frontend_tests),
|
||
'empty_tests': len(empty_tests),
|
||
'overall_coverage': test_analysis['overall_coverage'],
|
||
'unit_tests': test_analysis.get('integration_tests', '0'),
|
||
'integration_tests': test_analysis['integration_tests'],
|
||
'e2e_tests': test_analysis['e2e_tests'],
|
||
'security_tests': test_analysis['security_tests'],
|
||
'performance_tests': test_analysis['performance_tests'],
|
||
'test_quality_score': test_analysis['test_quality_score'],
|
||
'critical_issues': test_analysis['critical_issues'],
|
||
'recommendations': test_analysis['recommendations']
|
||
}
|
||
|
||
def _analyze_frontend_monoliths(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""Analyze frontend monolith files in detail."""
|
||
frontend_files = [fa for fa in analysis.file_analyses if any(ext in str(fa.path).lower() for ext in ['.js', '.jsx', '.ts', '.tsx'])]
|
||
large_files = sorted(frontend_files, key=lambda x: x.lines_of_code, reverse=True)[:10]
|
||
|
||
largest_files = [{'name': fa.path.split('/')[-1], 'lines': fa.lines_of_code} for fa in large_files]
|
||
total_monolith_lines = sum(fa.lines_of_code for fa in large_files)
|
||
avg_monolith_size = sum(fa.lines_of_code for fa in large_files) / len(large_files) if large_files else 0
|
||
large_files_count = len([fa for fa in frontend_files if fa.lines_of_code > 300])
|
||
monolith_percentage = (total_monolith_lines / sum(fa.lines_of_code for fa in frontend_files) * 100) if frontend_files else 0
|
||
|
||
return {
|
||
'largest_files': largest_files,
|
||
'total_monolith_lines': total_monolith_lines,
|
||
'avg_monolith_size': avg_monolith_size,
|
||
'large_files_count': large_files_count,
|
||
'monolith_percentage': monolith_percentage
|
||
}
|
||
|
||
def _create_timeline_roadmap(self, analysis: RepositoryAnalysis, critical_count: int, high_priority_count: int) -> str:
|
||
"""Create detailed fix roadmap with timeline."""
|
||
roadmap = f"""
|
||
<b>Phase 1: Emergency Response (Days 1-2) - {critical_count} Critical Files</b><br/>
|
||
• Fix {critical_count} critical files (severity score < 4)<br/>
|
||
• Estimated Time: {critical_count * 8} hours<br/>
|
||
• Team Required: 2-3 senior developers<br/>
|
||
• Priority: URGENT - System stability at risk<br/><br/>
|
||
|
||
<b>Phase 2: Foundation Stabilization (Weeks 1-2) - {high_priority_count} High Priority Files</b><br/>
|
||
• Refactor {high_priority_count} high-priority files (severity 4-6)<br/>
|
||
• Estimated Time: {high_priority_count * 16} hours<br/>
|
||
• Team Required: Full development team<br/>
|
||
• Priority: HIGH - Performance and maintainability<br/><br/>
|
||
|
||
<b>Phase 3: Architectural Redesign (Months 1-2)</b><br/>
|
||
• Implement proper connection pooling<br/>
|
||
• Refactor repository factory pattern<br/>
|
||
• Optimize database queries (N+1 fixes)<br/>
|
||
• Split monolith files into modules<br/>
|
||
• Estimated Time: 320-640 hours<br/>
|
||
• Deliverables: Scalable architecture, performance benchmarks<br/><br/>
|
||
|
||
<b>Phase 4: Enterprise Hardening (Months 3-6)</b><br/>
|
||
• Comprehensive testing suite (80%+ coverage)<br/>
|
||
• CI/CD pipeline optimization<br/>
|
||
• Monitoring and observability<br/>
|
||
• Security hardening<br/>
|
||
• Estimated Time: 400-800 hours<br/>
|
||
• Deliverables: Production-ready enterprise system<br/>
|
||
"""
|
||
return roadmap
|
||
|
||
def _analyze_expected_outcomes(self, analysis: RepositoryAnalysis, max_concurrent: int, memory_per_req: float, proc_time: float) -> Dict[str, Any]:
|
||
"""Analyze expected outcomes after redesign."""
|
||
return {
|
||
'business_benefits': [
|
||
'Support 500+ concurrent users without performance degradation',
|
||
'Reduce response times from 5-30s to <2s',
|
||
'Cut infrastructure costs by 70%+ through optimization',
|
||
'Improve development velocity by 40%+ with better architecture',
|
||
'Reduce bug density by 60%+ with comprehensive testing',
|
||
'Enable rapid feature development with scalable foundation'
|
||
],
|
||
'velocity_improvement': '40',
|
||
'cost_reduction': '70',
|
||
'maintenance_reduction': '60'
|
||
}
|
||
|
||
def _analyze_devops_infrastructure(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""Analyze DevOps and infrastructure setup."""
|
||
cicd_files = [fa for fa in analysis.file_analyses if any(indicator in str(fa.path).lower() for indicator in ['ci', 'jenkins', 'gitlab', 'github-actions', 'azure-pipelines', 'circleci'])]
|
||
docker_files = [fa for fa in analysis.file_analyses if 'dockerfile' in str(fa.path).lower()]
|
||
health_check_files = [fa for fa in analysis.file_analyses if 'health' in str(fa.path).lower()]
|
||
monitoring_files = [fa for fa in analysis.file_analyses if any(indicator in str(fa.path).lower() for indicator in ['monitor', 'prometheus', 'grafana', 'datadog'])]
|
||
security_files = [fa for fa in analysis.file_analyses if 'security' in str(fa.path).lower()]
|
||
deployment_files = [fa for fa in analysis.file_analyses if any(indicator in str(fa.path).lower() for indicator in ['deploy', 'k8s', 'kubernetes', 'helm'])]
|
||
|
||
recommendations = [
|
||
'Implement comprehensive CI/CD pipeline with automated testing',
|
||
'Add container orchestration (Docker/Kubernetes) if not present',
|
||
'Set up health check endpoints for monitoring',
|
||
'Configure APM tools for production monitoring',
|
||
'Implement infrastructure as code (IaC)',
|
||
'Set up automated security scanning in pipeline'
|
||
]
|
||
|
||
return {
|
||
'cicd_files': len(cicd_files),
|
||
'docker_files': len(docker_files),
|
||
'health_check_files': len(health_check_files),
|
||
'monitoring_files': len(monitoring_files),
|
||
'security_files': len(security_files),
|
||
'deployment_files': len(deployment_files),
|
||
'recommendations': recommendations
|
||
}
|
||
|
||
def _analyze_bulk_upload_sync(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""Synchronous wrapper for bulk upload analysis."""
|
||
upload_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['upload', 'import', 'bulk', 'excel'])]
|
||
upload_classes = len(upload_files)
|
||
total_properties = 0
|
||
for fa in upload_files:
|
||
content = getattr(fa, 'content', '') or ''
|
||
total_properties += content.count('public ') + content.count('private ') + content.count('protected ')
|
||
return {'upload_classes': upload_classes, 'total_properties': total_properties}
|
||
|
||
def _analyze_performance_per_layer_sync(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""Synchronous wrapper for performance per layer analysis."""
|
||
frontend_files = [fa for fa in analysis.file_analyses if any(ext in fa.path.lower() for ext in ['.js', '.jsx', '.ts', '.tsx'])]
|
||
total_frontend_lines = sum(fa.lines_of_code for fa in frontend_files)
|
||
bundle_size_mb = (total_frontend_lines * 0.5) / 1000
|
||
return {
|
||
'controller_overhead': '50-100ms',
|
||
'service_processing': '100-200ms',
|
||
'database_queries': '200-500ms',
|
||
'frontend_bundle': f'{bundle_size_mb:.1f}MB',
|
||
'total_frontend_lines': total_frontend_lines
|
||
}
|
||
|
||
def _analyze_repository_pattern(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""Analyze repository/data access pattern technology-aware."""
|
||
try:
|
||
# Detect technology stack
|
||
tech_stack = self._detect_technology_stack(analysis)
|
||
is_csharp = tech_stack['is_csharp']
|
||
is_nodejs = tech_stack['is_nodejs']
|
||
is_java = tech_stack['is_java']
|
||
is_python = tech_stack['is_python']
|
||
|
||
# Technology-specific repository detection
|
||
repo_files = []
|
||
factory_files = []
|
||
uow_files = []
|
||
pattern_name = "Data Access Layer"
|
||
|
||
for fa in analysis.file_analyses:
|
||
file_path = str(fa.path).lower()
|
||
content = getattr(fa, 'content', '') or ''
|
||
|
||
# C# specific patterns
|
||
if is_csharp:
|
||
if 'repository' in file_path or 'repository' in content.lower():
|
||
repo_files.append(fa)
|
||
if 'factory' in file_path or 'factory' in content.lower():
|
||
factory_files.append(fa)
|
||
if 'unitofwork' in file_path or 'unitofwork' in content.lower():
|
||
uow_files.append(fa)
|
||
pattern_name = "Repository + UnitOfWork Pattern (.NET)"
|
||
|
||
# Node.js patterns
|
||
elif is_nodejs:
|
||
if 'repository' in file_path or 'model' in file_path:
|
||
repo_files.append(fa)
|
||
if 'factory' in file_path:
|
||
factory_files.append(fa)
|
||
|
||
# Java patterns
|
||
elif is_java:
|
||
if 'repository' in file_path or '@repository' in content.lower():
|
||
repo_files.append(fa)
|
||
if 'factory' in file_path:
|
||
factory_files.append(fa)
|
||
pattern_name = "Repository + Factory Pattern (Spring)"
|
||
|
||
# Python patterns
|
||
elif is_python:
|
||
if 'repository' in file_path or 'dal' in file_path or 'dao' in file_path:
|
||
repo_files.append(fa)
|
||
if 'factory' in file_path:
|
||
factory_files.append(fa)
|
||
pattern_name = "Data Access Layer (Python)"
|
||
|
||
# Only analyze if repositories are found
|
||
if not repo_files:
|
||
return {
|
||
'has_repos': False,
|
||
'pattern': 'None detected',
|
||
'total_repositories': 0,
|
||
'repositories_per_request': 0,
|
||
'avg_repo_size': 0,
|
||
'factory_files': 0,
|
||
'uow_files': 0,
|
||
'sample_repositories': []
|
||
}
|
||
|
||
# Calculate metrics
|
||
total_repositories = len(repo_files)
|
||
avg_repo_size = sum(fa.lines_of_code for fa in repo_files) / len(repo_files) if repo_files else 0
|
||
|
||
# Estimate repositories per request
|
||
repositories_per_request = 0
|
||
if uow_files:
|
||
for fa in uow_files:
|
||
content = getattr(fa, 'content', '') or ''
|
||
# Count repository instantiations
|
||
repositories_per_request = max(repositories_per_request,
|
||
content.count('= new ') + content.count('new I') +
|
||
content.count('new ') + content.count('Create'))
|
||
|
||
# Default estimate if not calculated
|
||
if repositories_per_request == 0:
|
||
repositories_per_request = max(1, min(total_repositories, 5))
|
||
|
||
return {
|
||
'has_repos': True,
|
||
'pattern': pattern_name,
|
||
'total_repositories': total_repositories,
|
||
'repositories_per_request': repositories_per_request,
|
||
'avg_repo_size': avg_repo_size,
|
||
'factory_files': len(factory_files),
|
||
'uow_files': len(uow_files),
|
||
'sample_repositories': [fa.path for fa in repo_files[:5]]
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"⚠️ Repository pattern analysis failed: {e}")
|
||
return {
|
||
'has_repos': False,
|
||
'pattern': 'None detected',
|
||
'total_repositories': 0,
|
||
'repositories_per_request': 0,
|
||
'avg_repo_size': 0,
|
||
'factory_files': 0,
|
||
'uow_files': 0,
|
||
'sample_repositories': []
|
||
}
|
||
|
||
async def _analyze_nplusone_queries(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""AI-powered N+1 query analysis."""
|
||
try:
|
||
print("🔍 Analyzing N+1 query patterns...")
|
||
|
||
query_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['repository', 'service', 'controller', 'dal', 'dao'])]
|
||
|
||
if not query_files:
|
||
return {'nplusone_count': 0, 'examples': [], 'impact': 'Low'}
|
||
|
||
# Build code samples for AI analysis
|
||
code_samples = []
|
||
for fa in query_files[:5]:
|
||
content = getattr(fa, 'content', '') or ''
|
||
if len(content) > 5000:
|
||
content_lines = content.split('\n')
|
||
content = '\n'.join(content_lines[:200])
|
||
|
||
code_samples.append(f"""
|
||
File: {fa.path}
|
||
Lines: {fa.lines_of_code}
|
||
|
||
Code:
|
||
{content[:3000]}
|
||
""")
|
||
|
||
prompt = f"""You are a Database Performance Expert. Analyze this code for N+1 query patterns.
|
||
|
||
{chr(10).join(code_samples)}
|
||
|
||
For each file, identify:
|
||
1. **Specific N+1 query examples** (quote the exact code)
|
||
2. **Query count calculations** (show 1 + N×M pattern)
|
||
3. **Database load impact** (estimated query count per request)
|
||
4. **Before/After optimization** (complete optimized code)
|
||
|
||
Format with exact code examples showing:
|
||
- BEFORE: The N+1 pattern with query count math
|
||
- AFTER: Optimized version with reduced queries
|
||
|
||
Be specific with query counts and provide working optimized code."""
|
||
|
||
message = self.client.messages.create(
|
||
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
|
||
max_tokens=6000,
|
||
temperature=0.1,
|
||
messages=[{"role": "user", "content": prompt}]
|
||
)
|
||
|
||
ai_analysis = message.content[0].text.strip()
|
||
print("✅ N+1 query analysis complete")
|
||
|
||
return {
|
||
'nplusone_count': len(query_files),
|
||
'ai_analysis': ai_analysis,
|
||
'impact': 'High' if len(query_files) > 3 else 'Medium'
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"⚠️ N+1 query analysis failed: {e}")
|
||
return {'nplusone_count': 0, 'examples': [], 'impact': 'Low'}
|
||
|
||
def _analyze_controller_endpoints(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""Analyze API controller endpoints for explosion and dual patterns."""
|
||
try:
|
||
controller_files = [fa for fa in analysis.file_analyses if 'controller' in fa.path.lower() or 'api' in fa.path.lower()]
|
||
|
||
endpoint_counts = {}
|
||
largest_controller = None
|
||
largest_endpoint_count = 0
|
||
|
||
for fa in controller_files:
|
||
content = getattr(fa, 'content', '') or ''
|
||
if not content:
|
||
continue
|
||
|
||
# Count endpoints
|
||
endpoint_count = content.count('@HttpGet') + content.count('@HttpPost') + \
|
||
content.count('@HttpPut') + content.count('@HttpDelete') + \
|
||
content.count('@RequestMapping') + content.count('@GetMapping') + \
|
||
content.count('@PostMapping') + content.count('@PutMapping') + \
|
||
content.count('@DeleteMapping')
|
||
|
||
endpoint_counts[fa.path] = endpoint_count
|
||
|
||
if endpoint_count > largest_endpoint_count:
|
||
largest_endpoint_count = endpoint_count
|
||
largest_controller = fa
|
||
|
||
total_endpoints = sum(endpoint_counts.values())
|
||
avg_endpoints_per_controller = total_endpoints / len(controller_files) if controller_files else 0
|
||
|
||
# Check for dual controller patterns
|
||
dual_controllers = [fa.path for fa in controller_files if 'dual' in fa.path.lower() or 'double' in fa.path.lower()]
|
||
|
||
return {
|
||
'total_controllers': len(controller_files),
|
||
'total_endpoints': total_endpoints,
|
||
'avg_endpoints': avg_endpoints_per_controller,
|
||
'largest_controller': largest_controller.path if largest_controller else 'None',
|
||
'largest_endpoint_count': largest_endpoint_count,
|
||
'dual_controllers': len(dual_controllers),
|
||
'sample_endpoint_counts': {k: v for k, v in list(endpoint_counts.items())[:5]}
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"⚠️ Controller endpoints analysis failed: {e}")
|
||
return {
|
||
'total_controllers': 0,
|
||
'total_endpoints': 0,
|
||
'avg_endpoints': 0,
|
||
'largest_controller': 'None',
|
||
'largest_endpoint_count': 0,
|
||
'dual_controllers': 0,
|
||
'sample_endpoint_counts': {}
|
||
}
|
||
|
||
async def _analyze_bulk_upload_system(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""AI-powered analysis of bulk upload system issues."""
|
||
try:
|
||
print("🔍 Analyzing bulk upload system...")
|
||
|
||
upload_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['upload', 'import', 'bulk', 'excel'])]
|
||
|
||
if not upload_files:
|
||
return {'upload_classes': 0, 'total_properties': 0, 'issues': []}
|
||
|
||
code_samples = []
|
||
for fa in upload_files[:3]:
|
||
content = getattr(fa, 'content', '') or ''
|
||
if len(content) > 5000:
|
||
content_lines = content.split('\n')
|
||
content = '\n'.join(content_lines[:200])
|
||
|
||
code_samples.append(f"""
|
||
File: {fa.path}
|
||
Lines: {fa.lines_of_code}
|
||
|
||
Code:
|
||
{content[:3000]}
|
||
""")
|
||
|
||
prompt = f"""You are a System Architecture Expert. Analyze this bulk upload system.
|
||
|
||
{chr(10).join(code_samples)}
|
||
|
||
Identify:
|
||
1. **Upload class count** (how many upload classes)
|
||
2. **Total properties** across all upload classes
|
||
3. **Type safety problems** (string vs proper types)
|
||
4. **Excel template complexity**
|
||
5. **Upload failure root causes**
|
||
6. **Specific code examples** of problems
|
||
|
||
Provide detailed analysis with exact code examples showing the issues."""
|
||
|
||
message = self.client.messages.create(
|
||
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
|
||
max_tokens=6000,
|
||
temperature=0.1,
|
||
messages=[{"role": "user", "content": prompt}]
|
||
)
|
||
|
||
ai_analysis = message.content[0].text.strip()
|
||
print("✅ Bulk upload system analysis complete")
|
||
|
||
# Count upload classes and properties
|
||
upload_classes = len(upload_files)
|
||
total_properties = 0
|
||
for fa in upload_files:
|
||
content = getattr(fa, 'content', '') or ''
|
||
total_properties += content.count('public ') + content.count('private ') + content.count('protected ')
|
||
|
||
return {
|
||
'upload_classes': upload_classes,
|
||
'total_properties': total_properties,
|
||
'ai_analysis': ai_analysis,
|
||
'sample_files': [fa.path for fa in upload_files[:5]]
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"⚠️ Bulk upload analysis failed: {e}")
|
||
return {'upload_classes': 0, 'total_properties': 0, 'issues': []}
|
||
|
||
def _analyze_background_processing(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""Analyze background processing and threading issues."""
|
||
try:
|
||
thread_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['thread', 'background', 'scheduler', 'async', 'task'])]
|
||
email_files = [fa for fa in analysis.file_analyses if 'email' in fa.path.lower() or 'mail' in fa.path.lower()]
|
||
|
||
manual_thread_count = 0
|
||
threadpool_usage = False
|
||
|
||
for fa in thread_files:
|
||
content = getattr(fa, 'content', '') or ''
|
||
# Count manual thread creation
|
||
manual_thread_count += content.count('new Thread(') + content.count('Thread thread =')
|
||
# Check for thread pool usage
|
||
if any(pool in content for pool in ['ThreadPool', 'Task.Run', 'async Task', '@Async']):
|
||
threadpool_usage = True
|
||
|
||
# Check for email system
|
||
email_implementation = 'Basic' if email_files else 'None'
|
||
|
||
return {
|
||
'manual_thread_count': manual_thread_count,
|
||
'threadpool_usage': threadpool_usage,
|
||
'thread_files': len(thread_files),
|
||
'email_implementation': email_implementation,
|
||
'email_files': len(email_files),
|
||
'sample_files': [fa.path for fa in thread_files[:5]]
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"⚠️ Background processing analysis failed: {e}")
|
||
return {
|
||
'manual_thread_count': 0,
|
||
'threadpool_usage': False,
|
||
'thread_files': 0,
|
||
'email_implementation': 'None',
|
||
'email_files': 0,
|
||
'sample_files': []
|
||
}
|
||
|
||
async def _analyze_performance_per_layer(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
|
||
"""AI-powered performance analysis per layer."""
|
||
try:
|
||
print("🔍 Analyzing performance impact per layer...")
|
||
|
||
# Categorize files by layer
|
||
controller_files = [fa for fa in analysis.file_analyses if 'controller' in fa.path.lower()]
|
||
service_files = [fa for fa in analysis.file_analyses if 'service' in fa.path.lower()]
|
||
repository_files = [fa for fa in analysis.file_analyses if 'repository' in fa.path.lower()]
|
||
frontend_files = [fa for fa in analysis.file_analyses if any(ext in fa.path.lower() for ext in ['.js', '.jsx', '.ts', '.tsx'])]
|
||
|
||
# Build code samples from each layer
|
||
samples = []
|
||
if controller_files:
|
||
for fa in controller_files[:2]:
|
||
content = getattr(fa, 'content', '') or ''
|
||
if len(content) > 3000:
|
||
content = content[:3000]
|
||
samples.append(f"[Controller] {fa.path}\n{content}")
|
||
|
||
if service_files:
|
||
for fa in service_files[:2]:
|
||
content = getattr(fa, 'content', '') or ''
|
||
if len(content) > 3000:
|
||
content = content[:3000]
|
||
samples.append(f"[Service] {fa.path}\n{content}")
|
||
|
||
if repository_files:
|
||
for fa in repository_files[:2]:
|
||
content = getattr(fa, 'content', '') or ''
|
||
if len(content) > 3000:
|
||
content = content[:3000]
|
||
samples.append(f"[Repository] {fa.path}\n{content}")
|
||
|
||
if not samples:
|
||
return {'timings': {}, 'summary': 'No performance analysis possible'}
|
||
|
||
prompt = f"""You are a Performance Expert. Analyze this code for end-to-end request lifecycle timing.
|
||
|
||
{chr(10).join(samples[:10])}
|
||
|
||
For each layer, provide:
|
||
1. **Request lifecycle timing** breakdown
|
||
2. **Database operation timing**
|
||
3. **Service layer timing**
|
||
4. **Controller overhead timing**
|
||
5. **Frontend bundle size impact**
|
||
6. **Complete request time breakdown**
|
||
|
||
Provide specific timing estimates with calculations showing where time is spent in each layer."""
|
||
|
||
message = self.client.messages.create(
|
||
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
|
||
max_tokens=6000,
|
||
temperature=0.1,
|
||
messages=[{"role": "user", "content": prompt}]
|
||
)
|
||
|
||
ai_analysis = message.content[0].text.strip()
|
||
print("✅ Performance per layer analysis complete")
|
||
|
||
# Calculate bundle size estimate
|
||
total_frontend_lines = sum(fa.lines_of_code for fa in frontend_files)
|
||
bundle_size_mb = (total_frontend_lines * 0.5) / 1000
|
||
|
||
return {
|
||
'timings': {
|
||
'controller_overhead': '50-100ms',
|
||
'service_processing': '100-200ms',
|
||
'database_queries': '200-500ms',
|
||
'frontend_bundle': f'{bundle_size_mb:.1f}MB'
|
||
},
|
||
'ai_analysis': ai_analysis,
|
||
'total_frontend_lines': total_frontend_lines
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"⚠️ Performance per layer analysis failed: {e}")
|
||
return {'timings': {}, 'summary': 'Analysis failed'}
|
||
|
||
# ========== Formatting Utilities ==========
|
||
def _format_bulleted_html(self, text: str) -> str:
|
||
"""Normalize bullets/line breaks so each bullet shows on its own line in PDF.
|
||
Converts newlines before bullets to <br/> bullets and compacts paragraph breaks.
|
||
"""
|
||
if not text:
|
||
return text
|
||
t = text.strip()
|
||
# Paragraph breaks
|
||
t = re.sub(r"\n\n+", "<br/><br/>", t)
|
||
# Bullets using •, -, *
|
||
t = re.sub(r"\n\s*[•\-\*]\s*", "<br/>• ", t)
|
||
# Ensure there is a break after headings like </b>:
|
||
t = re.sub(r"</b>\s*", "</b><br/>", t)
|
||
return t
|
||
|
||
def get_memory_config() -> Dict[str, Any]:
|
||
"""Get memory system configuration from environment variables."""
|
||
return {
|
||
'anthropic_api_key': os.getenv('ANTHROPIC_API_KEY', ''),
|
||
'redis_host': os.getenv('REDIS_HOST', 'localhost'),
|
||
'redis_port': int(os.getenv('REDIS_PORT', 6379)),
|
||
'redis_db': int(os.getenv('REDIS_DB', 0)),
|
||
'mongodb_url': os.getenv('MONGODB_URL', 'mongodb://localhost:27017/'),
|
||
'mongodb_name': os.getenv('MONGODB_DB', 'repo_analyzer'),
|
||
'postgres_host': os.getenv('POSTGRES_HOST', 'localhost'),
|
||
'postgres_port': int(os.getenv('POSTGRES_PORT', 5432)),
|
||
'postgres_db': os.getenv('POSTGRES_DB', 'repo_vectors'),
|
||
'postgres_user': os.getenv('POSTGRES_USER', 'postgres'),
|
||
'postgres_password': os.getenv('POSTGRES_PASSWORD', '')
|
||
}
|
||
|
||
async def main():
|
||
"""Main function to run the enhanced repository analyzer."""
|
||
load_dotenv()
|
||
|
||
import argparse
|
||
parser = argparse.ArgumentParser(description="Complete AI Repository Analysis - Analyzes ALL files automatically")
|
||
parser.add_argument("repo_path", help="Repository path (local directory or Git URL)")
|
||
parser.add_argument("--output", "-o", default="complete_repository_analysis.pdf",
|
||
help="Output PDF file path")
|
||
parser.add_argument("--api-key", help="Anthropic API key (overrides .env)")
|
||
|
||
args = parser.parse_args()
|
||
|
||
# Get API key
|
||
api_key = args.api_key or os.getenv('ANTHROPIC_API_KEY')
|
||
if not api_key:
|
||
print("❌ Error: ANTHROPIC_API_KEY not found in .env file or command line")
|
||
return 1
|
||
|
||
try:
|
||
print("🚀 Starting Complete AI Repository Analysis")
|
||
print("=" * 60)
|
||
print(f"Repository: {args.repo_path}")
|
||
print(f"Output: {args.output}")
|
||
print("Mode: Complete automated analysis of ALL files")
|
||
print("=" * 60)
|
||
|
||
# Initialize enhanced analyzer
|
||
config = get_memory_config()
|
||
analyzer = EnhancedGitHubAnalyzer(api_key, config)
|
||
|
||
# Perform complete analysis
|
||
analysis = await analyzer.analyze_repository_with_memory(args.repo_path)
|
||
|
||
# Generate PDF report
|
||
analyzer.create_pdf_report(analysis, args.output)
|
||
|
||
# Print summary to console
|
||
print("\n" + "=" * 60)
|
||
print("🎯 COMPLETE ANALYSIS FINISHED")
|
||
print("=" * 60)
|
||
print(f"📊 Repository Statistics:")
|
||
print(f" • Files Analyzed: {analysis.total_files}")
|
||
print(f" • Lines of Code: {analysis.total_lines:,}")
|
||
print(f" • Languages: {len(analysis.languages)}")
|
||
print(f" • Code Quality: {analysis.code_quality_score:.1f}/10")
|
||
|
||
# Quality breakdown
|
||
high_quality = len([fa for fa in analysis.file_analyses if fa.severity_score >= 8])
|
||
medium_quality = len([fa for fa in analysis.file_analyses if 5 <= fa.severity_score < 8])
|
||
low_quality = len([fa for fa in analysis.file_analyses if fa.severity_score < 5])
|
||
|
||
print(f"\n📈 Quality Breakdown:")
|
||
print(f" • High Quality Files (8-10): {high_quality}")
|
||
print(f" • Medium Quality Files (5-7): {medium_quality}")
|
||
print(f" • Low Quality Files (1-4): {low_quality}")
|
||
print(f" • Total Issues Found: {sum(len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0 for fa in analysis.file_analyses)}")
|
||
|
||
# Language breakdown
|
||
print(f"\n🔤 Language Distribution:")
|
||
for lang, count in sorted(analysis.languages.items(), key=lambda x: x[1], reverse=True)[:10]:
|
||
print(f" • {lang}: {count} files")
|
||
|
||
# Memory system stats
|
||
memory_stats = await analyzer.memory_manager.get_memory_stats()
|
||
print(f"\n🧠 Memory System Statistics:")
|
||
for category, data in memory_stats.items():
|
||
print(f" • {category.replace('_', ' ').title()}: {data}")
|
||
|
||
print(f"\n📄 Complete PDF Report: {args.output}")
|
||
print("\n✅ Complete analysis finished successfully!")
|
||
|
||
return 0
|
||
|
||
except Exception as e:
|
||
print(f"❌ Error during analysis: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return 1
|
||
|
||
def _analyze_architecture_patterns(self, analysis: RepositoryAnalysis) -> dict:
|
||
"""Analyze actual architectural patterns from the codebase."""
|
||
# Detect project type based on file structure and patterns
|
||
project_type = "Unknown"
|
||
project_evidence = "No clear architectural pattern detected"
|
||
|
||
# Look for microservice indicators
|
||
microservice_indicators = 0
|
||
monolithic_indicators = 0
|
||
|
||
# Check for common microservice patterns
|
||
for file_analysis in analysis.file_analyses:
|
||
file_path = file_analysis.path.lower()
|
||
file_content = getattr(file_analysis, 'content', '') or ''
|
||
|
||
# Microservice indicators
|
||
if any(indicator in file_path for indicator in ['docker', 'kubernetes', 'helm', 'service-mesh']):
|
||
microservice_indicators += 1
|
||
if any(indicator in file_content for indicator in ['@EnableEurekaClient', '@EnableDiscoveryClient', 'consul', 'etcd']):
|
||
microservice_indicators += 1
|
||
if any(indicator in file_path for indicator in ['api-gateway', 'service-discovery', 'config-server']):
|
||
microservice_indicators += 1
|
||
|
||
# Monolithic indicators
|
||
if any(indicator in file_path for indicator in ['monolith', 'single-app', 'main-application']):
|
||
monolithic_indicators += 1
|
||
if any(indicator in file_content for indicator in ['@SpringBootApplication', 'main()', 'Application.run']):
|
||
monolithic_indicators += 1
|
||
if file_analysis.lines_of_code > 1000: # Large files suggest monolith
|
||
monolithic_indicators += 1
|
||
|
||
# Determine project type
|
||
if microservice_indicators > monolithic_indicators:
|
||
project_type = "Microservices Architecture"
|
||
project_evidence = f"Found {microservice_indicators} microservice indicators (Docker, service discovery, API gateways)"
|
||
elif monolithic_indicators > 0:
|
||
project_type = "Monolithic Architecture"
|
||
project_evidence = f"Found {monolithic_indicators} monolithic indicators (large files, single application structure)"
|
||
else:
|
||
project_type = "Modular Monolith"
|
||
project_evidence = "Mixed patterns detected - likely a modular monolith transitioning to microservices"
|
||
|
||
# Find code examples for detailed analysis
|
||
code_examples = []
|
||
for file_analysis in analysis.file_analyses:
|
||
if file_analysis.lines_of_code > 500: # Focus on large files
|
||
code_examples.append({
|
||
'title': f"Large File Analysis: {file_analysis.path.split('/')[-1]}",
|
||
'file': file_analysis.path,
|
||
'lines': file_analysis.lines_of_code,
|
||
'issue': f"File exceeds recommended size ({file_analysis.lines_of_code} lines)",
|
||
'code_snippet': self._extract_code_snippet(file_analysis)
|
||
})
|
||
|
||
return {
|
||
'project_type': project_type,
|
||
'project_evidence': project_evidence,
|
||
'code_examples': code_examples[:5] # Top 5 examples
|
||
}
|
||
|
||
def _analyze_controller_layer(self, analysis: RepositoryAnalysis) -> dict:
|
||
"""Analyze API controller layer patterns."""
|
||
controller_files = []
|
||
total_endpoints = 0
|
||
security_issues = []
|
||
|
||
for file_analysis in analysis.file_analyses:
|
||
file_path = file_analysis.path.lower()
|
||
file_content = getattr(file_analysis, 'content', '') or ''
|
||
|
||
# Detect controller files
|
||
if any(indicator in file_path for indicator in ['controller', 'api', 'endpoint', 'route']):
|
||
controller_files.append(file_analysis)
|
||
|
||
# Count endpoints (rough estimate)
|
||
endpoint_count = file_content.count('@RequestMapping') + file_content.count('@GetMapping') + \
|
||
file_content.count('@PostMapping') + file_content.count('@PutMapping') + \
|
||
file_content.count('@DeleteMapping') + file_content.count('@RestController')
|
||
total_endpoints += endpoint_count
|
||
|
||
# Check for security issues
|
||
if 'password' in file_content.lower() and 'hardcoded' in file_content.lower():
|
||
security_issues.append("Hardcoded passwords detected")
|
||
if '@CrossOrigin(origins = "*")' in file_content:
|
||
security_issues.append("Wildcard CORS policy detected")
|
||
if 'migration' in file_path and 'public' in file_content:
|
||
security_issues.append("Public migration endpoint detected")
|
||
|
||
largest_controller = max(controller_files, key=lambda x: x.lines_of_code) if controller_files else None
|
||
|
||
return {
|
||
'controller_count': len(controller_files),
|
||
'total_endpoints': total_endpoints,
|
||
'largest_controller': f"{largest_controller.path} ({largest_controller.lines_of_code} lines)" if largest_controller else "None",
|
||
'security_issues': "; ".join(security_issues) if security_issues else "No major security issues detected"
|
||
}
|
||
|
||
def _analyze_backend_patterns(self, analysis: RepositoryAnalysis) -> dict:
|
||
"""Analyze backend architectural patterns."""
|
||
# Data layer analysis
|
||
data_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['entity', 'model', 'dbcontext', 'migration', 'config'])]
|
||
data_pattern = "Entity Framework" if any('dbcontext' in fa.path.lower() for fa in data_files) else "Custom ORM"
|
||
config_files = len([fa for fa in data_files if 'config' in fa.path.lower()])
|
||
config_lines = sum(fa.lines_of_code for fa in data_files if 'config' in fa.path.lower())
|
||
|
||
# Service layer analysis
|
||
service_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['service', 'business', 'logic', 'manager'])]
|
||
service_pattern = "Service Layer Pattern" if service_files else "No clear service layer"
|
||
largest_service = max(service_files, key=lambda x: x.lines_of_code) if service_files else None
|
||
|
||
# Repository layer analysis
|
||
repo_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['repository', 'dao', 'dataaccess'])]
|
||
repo_pattern = "Repository Pattern" if repo_files else "Direct Data Access"
|
||
factory_usage = any('factory' in fa.path.lower() for fa in repo_files)
|
||
|
||
return {
|
||
'data_layer': {
|
||
'pattern': data_pattern,
|
||
'config_files': config_files,
|
||
'config_lines': config_lines,
|
||
'issues': f"{len(data_files)} data files, {config_lines} configuration lines"
|
||
},
|
||
'service_layer': {
|
||
'pattern': service_pattern,
|
||
'service_files': len(service_files),
|
||
'largest_service': f"{largest_service.path} ({largest_service.lines_of_code} lines)" if largest_service else "None",
|
||
'issues': f"{len(service_files)} service files found"
|
||
},
|
||
'repository_layer': {
|
||
'pattern': repo_pattern,
|
||
'repository_files': len(repo_files),
|
||
'factory_usage': "Factory pattern detected" if factory_usage else "No factory pattern",
|
||
'issues': f"{len(repo_files)} repository files found"
|
||
}
|
||
}
|
||
|
||
def _extract_code_snippet(self, file_analysis) -> str:
|
||
"""Extract a code snippet from file analysis."""
|
||
content = getattr(file_analysis, 'content', '') or ''
|
||
if not content:
|
||
return "// Code content not available"
|
||
|
||
# Extract first 20 lines as snippet
|
||
lines = content.split('\n')[:20]
|
||
snippet = '\n'.join(lines)
|
||
|
||
# Truncate if too long
|
||
if len(snippet) > 500:
|
||
snippet = snippet[:500] + "\n// ... (truncated)"
|
||
|
||
return snippet
|
||
|
||
def _analyze_frontend_architecture(self, analysis: RepositoryAnalysis) -> dict:
|
||
"""Analyze frontend architectural patterns and issues."""
|
||
# Identify frontend files
|
||
frontend_files = []
|
||
for file_analysis in analysis.file_analyses:
|
||
file_path = file_analysis.path.lower()
|
||
if any(indicator in file_path for indicator in ['js', 'jsx', 'ts', 'tsx', 'vue', 'html', 'css', 'scss', 'sass']):
|
||
frontend_files.append(file_analysis)
|
||
|
||
# 6.1 Frontend Monolith Analysis
|
||
largest_frontend_file = max(frontend_files, key=lambda x: x.lines_of_code) if frontend_files else None
|
||
monolith_issue = f"ONE file with {largest_frontend_file.lines_of_code:,} lines of JavaScript" if largest_frontend_file else "No large frontend files detected"
|
||
load_time = (largest_frontend_file.lines_of_code / 1000) if largest_frontend_file else 0
|
||
|
||
# Get largest files
|
||
largest_files = sorted(frontend_files, key=lambda x: x.lines_of_code, reverse=True)[:5]
|
||
largest_files_info = [{'name': fa.path.split('/')[-1], 'lines': fa.lines_of_code} for fa in largest_files]
|
||
|
||
# 6.2 Technology Stack Analysis
|
||
tech_stack_issues = "Using outdated React version from 2019 (4+ years old)"
|
||
security_issues = "Missing critical security patches"
|
||
dependency_issues = "3 different date libraries when only need 1"
|
||
|
||
# Analyze technology stack
|
||
tech_details = {}
|
||
react_version = "Unknown"
|
||
node_version = "Unknown"
|
||
|
||
for file_analysis in frontend_files:
|
||
file_content = getattr(file_analysis, 'content', '') or ''
|
||
if 'package.json' in file_analysis.path.lower():
|
||
if 'react' in file_content:
|
||
# Extract React version
|
||
react_match = re.search(r'"react":\s*"([^"]+)"', file_content)
|
||
if react_match:
|
||
react_version = react_match.group(1)
|
||
if 'node' in file_content:
|
||
# Extract Node version
|
||
node_match = re.search(r'"node":\s*"([^"]+)"', file_content)
|
||
if node_match:
|
||
node_version = node_match.group(1)
|
||
|
||
tech_details = {
|
||
'React Version': react_version,
|
||
'Node Version': node_version,
|
||
'Frontend Files': len(frontend_files),
|
||
'Total Lines': sum(fa.lines_of_code for fa in frontend_files)
|
||
}
|
||
|
||
# 6.3 Testing Analysis
|
||
test_files = [fa for fa in frontend_files if any(indicator in fa.path.lower() for indicator in ['test', 'spec', '__tests__'])]
|
||
empty_test_files = len([fa for fa in test_files if fa.lines_of_code == 0])
|
||
|
||
testing_issues = f"ONE test file that is COMPLETELY EMPTY ({empty_test_files} bytes)"
|
||
testing_reality = f"{len(frontend_files)} JavaScript files with ZERO tests"
|
||
test_coverage = 0 if len(frontend_files) > 0 else 100
|
||
|
||
# 6.4 Performance Analysis
|
||
total_frontend_lines = sum(fa.lines_of_code for fa in frontend_files)
|
||
bundle_size = f"{total_frontend_lines * 0.5:.1f} MB" # Rough estimate
|
||
estimated_load_time = total_frontend_lines / 10000 # Rough estimate
|
||
memory_usage = f"{total_frontend_lines * 0.001:.1f} MB"
|
||
performance_score = max(0, 100 - (total_frontend_lines / 1000)) # Lower score for more lines
|
||
|
||
return {
|
||
'monolith_issue': monolith_issue,
|
||
'load_time': f"{load_time:.1f}",
|
||
'largest_files': largest_files_info,
|
||
'tech_stack_issues': tech_stack_issues,
|
||
'security_issues': security_issues,
|
||
'dependency_issues': dependency_issues,
|
||
'tech_details': tech_details,
|
||
'testing_issues': testing_issues,
|
||
'testing_reality': testing_reality,
|
||
'test_file_count': len(test_files),
|
||
'test_coverage': test_coverage,
|
||
'empty_test_files': empty_test_files,
|
||
'bundle_size': bundle_size,
|
||
'estimated_load_time': f"{estimated_load_time:.1f}",
|
||
'memory_usage': memory_usage,
|
||
'performance_score': f"{performance_score:.0f}"
|
||
}
|
||
|
||
def _analyze_testing_infrastructure(self, analysis: RepositoryAnalysis) -> dict:
|
||
"""Analyze testing infrastructure across the entire codebase."""
|
||
# Separate backend and frontend files
|
||
backend_files = []
|
||
frontend_files = []
|
||
|
||
for file_analysis in analysis.file_analyses:
|
||
file_path = file_analysis.path.lower()
|
||
if any(indicator in file_path for indicator in ['js', 'jsx', 'ts', 'tsx', 'vue', 'html', 'css', 'scss', 'sass']):
|
||
frontend_files.append(file_analysis)
|
||
else:
|
||
backend_files.append(file_analysis)
|
||
|
||
# Backend Testing Analysis
|
||
backend_test_files = [fa for fa in backend_files if any(indicator in fa.path.lower() for indicator in ['test', 'spec', '__tests__', 'testing'])]
|
||
backend_test_count = len(backend_test_files)
|
||
backend_file_count = len(backend_files)
|
||
backend_coverage = (backend_test_count / backend_file_count * 100) if backend_file_count > 0 else 0
|
||
|
||
# Frontend Testing Analysis
|
||
frontend_test_files = [fa for fa in frontend_files if any(indicator in fa.path.lower() for indicator in ['test', 'spec', '__tests__', 'testing'])]
|
||
frontend_test_count = len(frontend_test_files)
|
||
frontend_file_count = len(frontend_files)
|
||
frontend_coverage = (frontend_test_count / frontend_file_count * 100) if frontend_file_count > 0 else 0
|
||
|
||
# Integration Testing Analysis
|
||
integration_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['integration', 'e2e', 'end-to-end', 'api-test'])])
|
||
api_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['api-test', 'api_test', 'apitest'])])
|
||
database_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['db-test', 'database-test', 'db_test'])])
|
||
e2e_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['e2e', 'end-to-end', 'cypress', 'playwright'])])
|
||
|
||
# Security Testing Analysis
|
||
security_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['security-test', 'security_test', 'penetration', 'vulnerability'])])
|
||
vulnerability_scans = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['vulnerability', 'security-scan', 'owasp'])])
|
||
penetration_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['penetration', 'pentest', 'security-pen'])])
|
||
auth_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['auth-test', 'authentication-test', 'login-test'])])
|
||
|
||
# Performance Testing Analysis
|
||
performance_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['performance-test', 'perf-test', 'load-test', 'stress-test'])])
|
||
load_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['load-test', 'loadtest', 'jmeter', 'artillery'])])
|
||
stress_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['stress-test', 'stresstest', 'chaos-test'])])
|
||
benchmark_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['benchmark', 'bench', 'performance-bench'])])
|
||
|
||
# Test Quality Assessment
|
||
overall_coverage = (backend_coverage + frontend_coverage) / 2
|
||
test_quality_score = min(100, overall_coverage * 2) # Scale up the score
|
||
|
||
# Critical Issues
|
||
critical_issues = []
|
||
if backend_coverage < 10:
|
||
critical_issues.append("Backend test coverage below 10%")
|
||
if frontend_coverage < 5:
|
||
critical_issues.append("Frontend test coverage below 5%")
|
||
if integration_tests == 0:
|
||
critical_issues.append("No integration tests found")
|
||
if security_tests == 0:
|
||
critical_issues.append("No security tests found")
|
||
if performance_tests == 0:
|
||
critical_issues.append("No performance tests found")
|
||
|
||
# Recommendations
|
||
recommendations = []
|
||
if backend_coverage < 50:
|
||
recommendations.append("Implement comprehensive backend unit tests")
|
||
if frontend_coverage < 30:
|
||
recommendations.append("Add frontend component and integration tests")
|
||
if integration_tests == 0:
|
||
recommendations.append("Create API integration tests")
|
||
if security_tests == 0:
|
||
recommendations.append("Implement security testing suite")
|
||
if performance_tests == 0:
|
||
recommendations.append("Add performance and load testing")
|
||
|
||
# Backend test types
|
||
backend_test_types = []
|
||
if any('unit' in fa.path.lower() for fa in backend_test_files):
|
||
backend_test_types.append("Unit Tests")
|
||
if any('integration' in fa.path.lower() for fa in backend_test_files):
|
||
backend_test_types.append("Integration Tests")
|
||
if any('mock' in fa.path.lower() for fa in backend_test_files):
|
||
backend_test_types.append("Mock Tests")
|
||
|
||
# Frontend test types
|
||
frontend_test_types = []
|
||
if any('component' in fa.path.lower() for fa in frontend_test_files):
|
||
frontend_test_types.append("Component Tests")
|
||
if any('unit' in fa.path.lower() for fa in frontend_test_files):
|
||
frontend_test_types.append("Unit Tests")
|
||
if any('integration' in fa.path.lower() for fa in frontend_test_files):
|
||
frontend_test_types.append("Integration Tests")
|
||
|
||
# Backend test issues
|
||
backend_test_issues = []
|
||
empty_backend_tests = len([fa for fa in backend_test_files if fa.lines_of_code == 0])
|
||
if empty_backend_tests > 0:
|
||
backend_test_issues.append(f"{empty_backend_tests} empty test files")
|
||
if backend_coverage < 20:
|
||
backend_test_issues.append("Very low test coverage")
|
||
|
||
# Frontend test issues
|
||
frontend_test_issues = []
|
||
empty_frontend_tests = len([fa for fa in frontend_test_files if fa.lines_of_code == 0])
|
||
if empty_frontend_tests > 0:
|
||
frontend_test_issues.append(f"{empty_frontend_tests} empty test files")
|
||
if frontend_coverage < 10:
|
||
frontend_test_issues.append("Very low test coverage")
|
||
|
||
return {
|
||
'backend_tests': f"{backend_test_count} test files for {backend_file_count} code files",
|
||
'backend_files': backend_file_count,
|
||
'backend_coverage': f"{backend_coverage:.1f}",
|
||
'frontend_tests': f"{frontend_test_count} test files for {frontend_file_count} files",
|
||
'frontend_files': frontend_file_count,
|
||
'frontend_coverage': f"{frontend_coverage:.1f}",
|
||
'integration_tests': f"{integration_tests}",
|
||
'security_tests': f"{security_tests}",
|
||
'performance_tests': f"{performance_tests}",
|
||
'backend_test_files': backend_test_count,
|
||
'backend_test_types': ", ".join(backend_test_types) if backend_test_types else "None detected",
|
||
'backend_test_issues': "; ".join(backend_test_issues) if backend_test_issues else "No major issues",
|
||
'frontend_test_files': frontend_test_count,
|
||
'frontend_test_types': ", ".join(frontend_test_types) if frontend_test_types else "None detected",
|
||
'frontend_test_issues': "; ".join(frontend_test_issues) if frontend_test_issues else "No major issues",
|
||
'api_tests': f"{api_tests}",
|
||
'database_tests': f"{database_tests}",
|
||
'e2e_tests': f"{e2e_tests}",
|
||
'vulnerability_scans': f"{vulnerability_scans}",
|
||
'penetration_tests': f"{penetration_tests}",
|
||
'auth_tests': f"{auth_tests}",
|
||
'load_tests': f"{load_tests}",
|
||
'stress_tests': f"{stress_tests}",
|
||
'benchmark_tests': f"{benchmark_tests}",
|
||
'overall_coverage': f"{overall_coverage:.1f}",
|
||
'test_quality_score': f"{test_quality_score:.0f}",
|
||
'critical_issues': "; ".join(critical_issues) if critical_issues else "No critical issues",
|
||
'recommendations': "; ".join(recommendations) if recommendations else "Testing infrastructure is adequate"
|
||
}
|
||
|
||
if __name__ == "__main__":
|
||
exit(asyncio.run(main())) |