The comprehensive technical analysis of this codebase has revealed significant areas requiring immediate attention and strategic improvements.
The {detected_architecture} demonstrates both strengths and areas for architectural enhancement to support scalability and maintainability.
Summary of Findings:
• Total Files Analyzed: {analysis.total_files:,}
• Total Lines of Code: {analysis.total_lines:,}
• Overall Code Quality Score: {avg_quality:.1f}/10
• Critical Issues Identified: {critical_count}
• High Priority Issues: {high_priority_count}
• Total Issues Found: {total_issues}+
Key Architectural Insights:
• Architecture Pattern: {detected_architecture}
• Primary Languages: {', '.join(list(analysis.languages.keys())[:5]) if analysis.languages else 'Unknown'}
• System Complexity: {'High' if analysis.code_quality_score < 5 else 'Moderate' if analysis.code_quality_score < 7 else 'Low'}
The Path Forward:
This report provides a comprehensive roadmap for improving code quality, security, and architectural design.
Immediate implementation of the recommended actions will significantly enhance system reliability, performance, and maintainability.
By following the detailed implementation guide provided in this report, the codebase can evolve into a robust, scalable,
and secure enterprise-grade application capable of supporting growing business requirements while maintaining high code quality standards.
End of Comprehensive Analysis Report
Report Metadata:
• Total Document Length: 50+ pages of detailed technical analysis
• Coverage: 100% of identified issues across frontend, backend, database, security, performance, and testing
• Actionable Items: Complete implementation roadmap with specific code examples and detailed recommendations
• Audience: CEO, CTO, Senior Developers, Junior Developers, DevOps Teams
• Generated: {datetime.now().strftime('%B %d, %Y at %H:%M:%S')}
• Status: COMPLETE - Ready for Executive Decision and Implementation Planning
This comprehensive technical assessment provides actionable recommendations for immediate improvement and long-term architectural enhancement.
"""
story.append(Paragraph(conclusion_text, styles['Normal']))
# Build PDF
try:
doc.build(story)
print(f"✅ Enhanced PDF report generated successfully: {output_path}")
except Exception as e:
print(f"❌ Error generating PDF: {e}")
raise
async def create_multi_level_pdf_report(
self,
comprehensive_context: Dict,
output_path: str,
repository_id: str,
run_id: str,
progress_mgr=None
):
"""
Generate comprehensive 100+ page multi-level PDF report.
Includes both non-technical and technical versions for each section.
Architecture sections include: Frontend, Backend, Database, APIs.
"""
print(f"\n{'='*80}")
print(f"📄 [REPORT] 🚀 STARTING PDF GENERATION")
print(f"{'='*80}")
print(f" Output Path: {output_path}")
print(f" Repository ID: {repository_id}")
print(f" Run ID: {run_id}")
print(f" Context: {comprehensive_context.get('total_modules', 0)} modules, {comprehensive_context.get('total_findings', 0)} findings")
print(f" File analyses count: {len(comprehensive_context.get('file_analyses', []))}")
# Ensure target directory exists
try:
parent_dir = os.path.dirname(output_path)
if parent_dir:
os.makedirs(parent_dir, exist_ok=True)
print(f" ✅ Reports directory ready: {parent_dir}")
except Exception as dir_err:
print(f" ⚠️ Could not create reports directory: {dir_err}")
# Setup PDF document
doc = SimpleDocTemplate(output_path, pagesize=A4,
leftMargin=72, rightMargin=72,
topMargin=72, bottomMargin=72)
styles = getSampleStyleSheet()
story = []
# Override all styles to ensure non-italic fonts
styles['Normal'].fontName = 'Helvetica'
styles['Heading1'].fontName = 'Helvetica-Bold'
styles['Heading2'].fontName = 'Helvetica-Bold'
styles['Heading3'].fontName = 'Helvetica-Bold'
styles['Code'].fontName = 'Courier'
# Enhanced styles
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=24,
textColor=colors.HexColor('#1e40af'),
spaceAfter=30,
alignment=TA_CENTER,
fontName='Helvetica-Bold'
)
section_style = ParagraphStyle(
'SectionHeading',
parent=styles['Heading2'],
fontSize=18,
textColor=colors.black,
spaceBefore=20,
spaceAfter=10,
fontName='Helvetica-Bold'
)
subsection_style = ParagraphStyle(
'SubsectionHeading',
parent=styles['Heading3'],
fontSize=14,
textColor=colors.HexColor('#1e40af'),
spaceBefore=15,
spaceAfter=8,
fontName='Helvetica-Bold'
)
nontech_style = ParagraphStyle(
'NonTechnical',
parent=styles['Normal'],
fontSize=11,
textColor=colors.black,
spaceBefore=10,
spaceAfter=8,
fontName='Helvetica'
)
tech_style = ParagraphStyle(
'Technical',
parent=styles['Normal'],
fontSize=10,
textColor=colors.black,
spaceBefore=10,
spaceAfter=8,
fontName='Helvetica'
)
code_style = ParagraphStyle(
'CodeStyle',
parent=styles['Code'],
fontSize=8,
fontName='Courier',
leftIndent=20,
rightIndent=20,
spaceBefore=5,
spaceAfter=5,
backColor=colors.HexColor('#f3f4f6'),
borderWidth=1,
borderColor=colors.HexColor('#d1d5db'),
borderPadding=6
)
def _is_truthy(value: Optional[str], default: bool = False) -> bool:
if value is None:
return default
return str(value).strip().lower() in ("1", "true", "yes", "on")
technical_only = _is_truthy(os.getenv("REPORT_TECHNICAL_ONLY"), default=True)
self._report_technical_only = technical_only
include_nontechnical = not technical_only
def append_markdown_block(markdown_text: str, paragraph_style: ParagraphStyle) -> bool:
if not markdown_text or not markdown_text.strip():
return False
elements = self._convert_markdown_to_pdf_elements(
markdown_text,
styles,
section_style,
subsection_style,
code_style,
paragraph_style
)
story.extend(elements)
return True
def render_markdown_pair(
nontech_md: str,
tech_md: str,
spacer_after_nontech: float = 10,
add_page_break: bool = False
) -> None:
section_has_content = False
if include_nontechnical:
if append_markdown_block(nontech_md, nontech_style):
section_has_content = True
if spacer_after_nontech:
story.append(Spacer(1, spacer_after_nontech))
if append_markdown_block(tech_md, tech_style):
section_has_content = True
if add_page_break and section_has_content:
story.append(PageBreak())
# Extract context data
module_analyses = comprehensive_context.get('module_analyses', [])
synthesis_analysis = comprehensive_context.get('synthesis_analysis', {})
analysis_state = comprehensive_context.get('analysis_state', {})
findings_by_module = comprehensive_context.get('findings_by_module', {})
metrics_by_module = comprehensive_context.get('metrics_by_module', {})
# SECTION 1: TITLE PAGE
if progress_mgr:
await progress_mgr.emit_event("report_progress", {
"message": "Generating title page",
"percent": 5
})
story.append(Paragraph("COMPREHENSIVE AI REPOSITORY ANALYSIS REPORT", title_style))
story.append(Spacer(1, 30))
story.append(Paragraph(f"Repository ID: {repository_id}", styles['Normal']))
story.append(Paragraph(f"Analysis Run ID: {run_id}", styles['Normal']))
story.append(Paragraph(f"Analysis Date: {datetime.now().strftime('%B %d, %Y at %H:%M')}", styles['Normal']))
story.append(Paragraph("Generated by: Enhanced AI Analysis System with Multi-Level Reporting", styles['Normal']))
story.append(Paragraph("Report Type: Comprehensive Multi-Level Technical & Business Assessment", styles['Normal']))
story.append(Spacer(1, 20))
story.append(Paragraph(f"Total Modules Analyzed: {len(module_analyses)}", styles['Normal']))
story.append(Paragraph(f"Total Findings: {comprehensive_context.get('total_findings', 0)}", styles['Normal']))
story.append(PageBreak())
# SECTION 2: EXECUTIVE SUMMARY (Multi-Level)
if progress_mgr:
await progress_mgr.emit_event("report_progress", {
"message": "Generating executive summary",
"percent": 10
})
story.append(Paragraph("SECTION 1: EXECUTIVE SUMMARY", section_style))
# Generate executive summary with both versions
exec_summary_nontech, exec_summary_tech = await self._generate_section_multi_level(
section_name="Executive Summary",
section_data={
'synthesis': synthesis_analysis,
'analysis_state': analysis_state,
'total_modules': len(module_analyses),
'total_findings': comprehensive_context.get('total_findings', 0),
'metrics_by_module': metrics_by_module
},
progress_mgr=progress_mgr
)
render_markdown_pair(
exec_summary_nontech,
exec_summary_tech,
spacer_after_nontech=10,
add_page_break=True
)
# SECTION 3: PROJECT OVERVIEW (Multi-Level)
if progress_mgr:
await progress_mgr.emit_event("report_progress", {
"message": "Generating project overview",
"percent": 15
})
story.append(Paragraph("SECTION 2: PROJECT OVERVIEW", section_style))
project_overview_nontech, project_overview_tech = await self._generate_section_multi_level(
section_name="Project Overview",
section_data={
'analysis_state': analysis_state,
'module_analyses': module_analyses,
'metrics_by_module': metrics_by_module
},
progress_mgr=progress_mgr
)
render_markdown_pair(
project_overview_nontech,
project_overview_tech,
spacer_after_nontech=15,
add_page_break=True
)
# SECTION 4: ARCHITECTURE ANALYSIS (Multi-Level with Frontend, Backend, Database, APIs)
print(f" 📍 SECTION 3: ARCHITECTURE ANALYSIS")
if progress_mgr:
await progress_mgr.emit_event("report_progress", {
"message": "Generating architecture analysis",
"percent": 20
})
story.append(Paragraph("SECTION 3: ARCHITECTURE ANALYSIS", section_style))
# 4.1 Frontend Architecture
story.append(Paragraph("3.1 Frontend Architecture", subsection_style))
frontend_nontech, frontend_tech = await self._generate_architecture_section(
architecture_type="Frontend",
module_analyses=module_analyses,
findings_by_module=findings_by_module,
metrics_by_module=metrics_by_module,
synthesis_analysis=synthesis_analysis,
progress_mgr=progress_mgr
)
render_markdown_pair(
frontend_nontech,
frontend_tech,
spacer_after_nontech=10,
add_page_break=True
)
# 4.2 Backend Architecture
story.append(Paragraph("3.2 Backend Architecture", subsection_style))
backend_nontech, backend_tech = await self._generate_architecture_section(
architecture_type="Backend",
module_analyses=module_analyses,
findings_by_module=findings_by_module,
metrics_by_module=metrics_by_module,
synthesis_analysis=synthesis_analysis,
progress_mgr=progress_mgr
)
render_markdown_pair(
backend_nontech,
backend_tech,
spacer_after_nontech=10,
add_page_break=True
)
# 4.3 Database Architecture
story.append(Paragraph("3.3 Database Architecture", subsection_style))
database_nontech, database_tech = await self._generate_architecture_section(
architecture_type="Database",
module_analyses=module_analyses,
findings_by_module=findings_by_module,
metrics_by_module=metrics_by_module,
synthesis_analysis=synthesis_analysis,
progress_mgr=progress_mgr
)
render_markdown_pair(
database_nontech,
database_tech,
spacer_after_nontech=10,
add_page_break=True
)
# 4.4 API Architecture
story.append(Paragraph("3.4 API Architecture", subsection_style))
api_nontech, api_tech = await self._generate_architecture_section(
architecture_type="API",
module_analyses=module_analyses,
findings_by_module=findings_by_module,
metrics_by_module=metrics_by_module,
synthesis_analysis=synthesis_analysis,
progress_mgr=progress_mgr
)
render_markdown_pair(
api_nontech,
api_tech,
spacer_after_nontech=10,
add_page_break=True
)
# SECTION 5: SECURITY ASSESSMENT (Multi-Level)
print(f" 📍 SECTION 4: SECURITY ASSESSMENT")
if progress_mgr:
await progress_mgr.emit_event("report_progress", {
"message": "Generating security assessment",
"percent": 40
})
story.append(Paragraph("SECTION 4: SECURITY ASSESSMENT", section_style))
security_nontech, security_tech = await self._generate_section_multi_level(
section_name="Security Assessment",
section_data={
'module_analyses': module_analyses,
'findings_by_module': findings_by_module,
'synthesis_analysis': synthesis_analysis,
'security_findings': [f for findings_list in findings_by_module.values() for f in findings_list if f.get('category') == 'security']
},
progress_mgr=progress_mgr
)
render_markdown_pair(
security_nontech,
security_tech,
spacer_after_nontech=15,
add_page_break=True
)
# SECTION 6: MODULE DEEP DIVES (One per module)
print(f" 📍 SECTION 5: MODULE DEEP DIVES")
if progress_mgr:
await progress_mgr.emit_event("report_progress", {
"message": "Generating module deep dives",
"percent": 50
})
story.append(Paragraph("SECTION 5: MODULE DEEP DIVES", section_style))
# Fallback: If no modules found, use file_analyses from RepositoryAnalysis
if len(module_analyses) == 0:
print("⚠️ [REPORT] No modules found, using file_analyses fallback...")
file_analyses = comprehensive_context.get('file_analyses', [])
repository_analysis = comprehensive_context.get('repository_analysis')
if file_analyses and len(file_analyses) > 0:
# Group files by directory/module for fallback
from collections import defaultdict
files_by_module = defaultdict(list)
for fa in file_analyses:
# Handle both dict and object formats
if isinstance(fa, dict):
file_path = fa.get('path', fa.get('file_path', 'unknown'))
else:
file_path = getattr(fa, 'path', getattr(fa, 'file_path', 'unknown'))
path_parts = str(file_path).split('/')
if len(path_parts) > 1:
module_name = path_parts[0] if path_parts[0] else path_parts[-2] if len(path_parts) > 2 else 'root'
else:
module_name = 'root'
files_by_module[module_name].append(fa)
# Generate sections for each module group
for idx, (module_name, module_files) in enumerate(files_by_module.items(), 1):
if progress_mgr:
await progress_mgr.emit_event("report_progress", {
"message": f"Generating module {idx}/{len(files_by_module)}: {module_name}",
"percent": 50 + int((idx / len(files_by_module)) * 20)
})
story.append(Paragraph(f"5.{idx} {module_name}", subsection_style))
# Create fallback module data
# Extract paths from both dict and object formats
file_paths = []
for fa in module_files:
if isinstance(fa, dict):
path = fa.get('path', fa.get('file_path', 'unknown'))
else:
path = getattr(fa, 'path', getattr(fa, 'file_path', 'unknown'))
file_paths.append(str(path))
fallback_module = {
'module_name': module_name,
'files_analyzed': file_paths,
'detailed_analysis': f"Analysis of {len(module_files)} files in {module_name} module.",
'summary': f"{module_name} module contains {len(module_files)} files."
}
module_nontech, module_tech = await self._generate_module_section(
module=fallback_module,
findings=findings_by_module.get(module_name, []),
metrics=metrics_by_module.get(module_name, {}),
progress_mgr=progress_mgr
)
render_markdown_pair(
module_nontech,
module_tech,
spacer_after_nontech=10,
add_page_break=True
)
else:
# No file analyses either - generate minimal section
story.append(Paragraph("No modules found in analysis. Please check the analysis logs.", tech_style))
story.append(PageBreak())
else:
# Normal flow: Use module_analyses
for idx, module in enumerate(module_analyses):
if progress_mgr:
await progress_mgr.emit_event("report_progress", {
"message": f"Generating module {idx+1}/{len(module_analyses)}: {module.get('module_name', 'Unknown')}",
"percent": 50 + int((idx + 1) / len(module_analyses) * 20)
})
module_name = module.get('module_name', f'Module {idx+1}')
story.append(Paragraph(f"5.{idx+1} {module_name}", subsection_style))
module_nontech, module_tech = await self._generate_module_section(
module=module,
findings=findings_by_module.get(module_name, []),
metrics=metrics_by_module.get(module_name, {}),
progress_mgr=progress_mgr
)
render_markdown_pair(
module_nontech,
module_tech,
spacer_after_nontech=10,
add_page_break=True
)
# SECTION 7: CRITICAL ISSUES & RECOMMENDATIONS (Multi-Level)
print(f" 📍 SECTION 6: CRITICAL ISSUES & RECOMMENDATIONS")
if progress_mgr:
await progress_mgr.emit_event("report_progress", {
"message": "Generating critical issues section",
"percent": 75
})
story.append(Paragraph("SECTION 6: CRITICAL ISSUES & RECOMMENDATIONS", section_style))
issues_nontech, issues_tech = await self._generate_section_multi_level(
section_name="Critical Issues & Recommendations",
section_data={
'findings_by_module': findings_by_module,
'module_analyses': module_analyses,
'synthesis_analysis': synthesis_analysis
},
progress_mgr=progress_mgr
)
render_markdown_pair(
issues_nontech,
issues_tech,
spacer_after_nontech=15,
add_page_break=True
)
# SECTION 7: SYSTEM-LEVEL INSIGHTS (Multi-Level)
print(f" 📍 SECTION 7: SYSTEM-LEVEL INSIGHTS")
if progress_mgr:
await progress_mgr.emit_event("report_progress", {
"message": "Generating system-level insights",
"percent": 85
})
story.append(Paragraph("SECTION 7: SYSTEM-LEVEL INSIGHTS", section_style))
system_nontech, system_tech = await self._generate_section_multi_level(
section_name="System-Level Insights",
section_data={
'synthesis_analysis': synthesis_analysis,
'analysis_state': analysis_state,
'module_analyses': module_analyses
},
progress_mgr=progress_mgr
)
render_markdown_pair(
system_nontech,
system_tech,
spacer_after_nontech=15,
add_page_break=True
)
# SECTION 9: JUNIOR DEVELOPER ONBOARDING GUIDE (Technical Only)
if progress_mgr:
await progress_mgr.emit_event("report_progress", {
"message": "Generating onboarding guide",
"percent": 90
})
story.append(Paragraph("SECTION 8: JUNIOR DEVELOPER ONBOARDING GUIDE", section_style))
onboarding_content = await self._generate_onboarding_guide(
module_analyses=module_analyses,
analysis_state=analysis_state,
progress_mgr=progress_mgr
)
# Convert markdown to properly formatted PDF elements
onboarding_elements = self._convert_markdown_to_pdf_elements(
onboarding_content, styles, section_style, subsection_style, code_style, tech_style
)
story.extend(onboarding_elements)
story.append(PageBreak())
# SECTION 10: CONCLUSION (Multi-Level)
if progress_mgr:
await progress_mgr.emit_event("report_progress", {
"message": "Generating conclusion",
"percent": 95
})
story.append(Paragraph("SECTION 9: CONCLUSION & NEXT STEPS", section_style))
conclusion_nontech, conclusion_tech = await self._generate_section_multi_level(
section_name="Conclusion & Next Steps",
section_data={
'synthesis_analysis': synthesis_analysis,
'analysis_state': analysis_state,
'total_findings': comprehensive_context.get('total_findings', 0),
'total_modules': len(module_analyses)
},
progress_mgr=progress_mgr
)
render_markdown_pair(
conclusion_nontech,
conclusion_tech,
spacer_after_nontech=15,
add_page_break=False
)
# Build PDF
try:
print(f"\n 📝 Building PDF document...")
print(f" Total story elements: {len(story)}")
doc.build(story)
print(f"\n{'='*80}")
print(f"✅ [REPORT] ✨ PDF GENERATION COMPLETE!")
print(f"{'='*80}")
print(f" Output File: {output_path}")
print(f" File Size: {os.path.getsize(output_path) / 1024 / 1024:.2f} MB")
print(f"{'='*80}\n")
if progress_mgr:
await progress_mgr.emit_event("report_progress", {
"message": "PDF report generation complete",
"percent": 100
})
except Exception as e:
print(f"\n{'='*80}")
print(f"❌ [REPORT] PDF GENERATION FAILED!")
print(f"{'='*80}")
print(f" Error: {str(e)}")
print(f" Output Path: {output_path}")
print(f"{'='*80}\n")
raise
finally:
self._report_technical_only = None
def _prepare_llm_payload(self, section_name: str, section_data: Dict[str, Any]) -> Dict[str, Any]:
"""Trim large fields from section data to keep prompts below token limits."""
if not isinstance(section_data, dict):
return section_data
safe_payload = copy.deepcopy(section_data)
modules = safe_payload.get("module_analyses")
if isinstance(modules, list):
max_modules = 8 if section_name.lower().startswith("module") else 12
safe_payload["module_analyses"] = modules[:max_modules]
allowed_module_names = {
m.get("module_name")
for m in safe_payload.get("module_analyses", [])
if isinstance(m, dict)
}
metrics_by_module = safe_payload.get("metrics_by_module")
if isinstance(metrics_by_module, dict) and allowed_module_names:
safe_payload["metrics_by_module"] = {
name: metrics_by_module[name]
for name in allowed_module_names
if name in metrics_by_module
}
findings_by_module = safe_payload.get("findings_by_module")
if isinstance(findings_by_module, dict) and allowed_module_names:
safe_payload["findings_by_module"] = {
name: findings_by_module[name]
for name in allowed_module_names
if name in findings_by_module
}
max_string_length = 1500
max_top_list = 20
max_nested_list = 10
max_dict_items = 40
exclude_exact = {
"detailed_analysis",
"full_response",
"raw_content",
"raw_text",
"raw_chunks",
"content",
"code",
"code_snippet",
"file_content",
"nontechnical_version",
"technical_version",
"non_technical_version",
"markdown",
"html",
"prompt",
"raw_prompt",
"claude_response",
"pdf_data",
}
exclude_substrings = [
"raw_",
"_raw",
"nontechnical",
"non-technical",
"technical_version",
"_html",
"markdown",
"html",
"prompt",
"claude",
]
def truncate(value: Any, depth: int = 0) -> Any:
if is_dataclass(value):
value = asdict(value)
if isinstance(value, str):
if len(value) > max_string_length:
return (
value[:max_string_length]
+ f"... [truncated {len(value) - max_string_length} chars]"
)
return value
if isinstance(value, (int, float, bool)) or value is None:
return value
if isinstance(value, list):
limit = max_top_list if depth == 0 else max_nested_list
trimmed = value[:limit]
result = [truncate(item, depth + 1) for item in trimmed]
if len(value) > limit:
result.append(
f"... {len(value) - limit} additional items truncated ..."
)
return result
if isinstance(value, dict):
result = {}
items = list(value.items())
truncated_count = 0
if len(items) > max_dict_items:
truncated_count = len(items) - max_dict_items
items = items[:max_dict_items]
for key, item in items:
key_str = str(key)
if key_str in exclude_exact:
continue
key_lower = key_str.lower()
if any(sub in key_lower for sub in exclude_substrings):
continue
result[key] = truncate(item, depth + 1)
if truncated_count:
result["__truncated_keys__"] = (
f"{truncated_count} additional keys removed"
)
return result
return str(value)
return truncate(safe_payload)
async def _generate_section_multi_level(
self,
section_name: str,
section_data: Dict,
progress_mgr=None
) -> Tuple[str, str]:
"""
Generate both non-technical and technical versions of a section using Claude.
Returns: (non_technical_version, technical_version)
"""
try:
technical_only_default = True
section_technical_only = getattr(self, "_report_technical_only", None)
if section_technical_only is None:
technical_only_env = os.getenv("REPORT_TECHNICAL_ONLY")
if technical_only_env is None:
section_technical_only = technical_only_default
else:
section_technical_only = technical_only_env.lower() in ("1", "true", "yes", "on")
sanitized_section_data = self._prepare_llm_payload(section_name, section_data)
if section_technical_only:
prompt = f"""
You are a senior software architect with 30+ years of experience. Produce a strictly technical analysis for the section "{section_name}".
CONSTRAINTS:
- Focus exclusively on code, architecture, dependencies, security, performance, and testing.
- Do not include business analogies, executive summaries, emojis, or non-technical commentary.
- Reference files, modules, and metrics when possible.
- Respond in GitHub-flavoured Markdown with clear headings and bullet lists.
SECTION DATA:
{json.dumps(sanitized_section_data, indent=2, default=str)}
"""
else:
prompt = f"""
You are a senior software architect with 30+ years of experience. Generate a comprehensive analysis for the section: "{section_name}".
SECTION DATA:
{json.dumps(sanitized_section_data, indent=2, default=str)}
Generate TWO versions of this section:
1. NON-TECHNICAL VERSION:
- Use clear, plain language without analogies, metaphors, or storytelling.
- Avoid jargon while grounding statements in observable evidence.
- Focus on business impact and stakeholder implications derived from the section data.
- Do not use emojis or decorative symbols.
- Keep the structure concise and easy for executives to follow.
2. TECHNICAL VERSION:
- Full technical details with code examples
- File paths, line numbers, specific recommendations
- Architecture patterns, design decisions
- Metrics, numbers, quantitative analysis
- Implementation details and code snippets
- For developers and technical leads
Output format:
[NON-TECHNICAL]
...non-technical content here...
[TECHNICAL]
...technical content here...
"""
loop = asyncio.get_event_loop()
def call_claude():
message = self.client.messages.create(
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
max_tokens=8000,
temperature=0.3,
messages=[{"role": "user", "content": prompt}]
)
return message.content[0].text.strip()
response_text = await loop.run_in_executor(None, call_claude)
if section_technical_only:
technical = response_text.strip()
if not technical:
technical = "Technical analysis generation failed."
return "", technical
# Parse response
nontech_match = re.search(r'\[NON-TECHNICAL\](.*?)(?=\[TECHNICAL\]|$)', response_text, re.DOTALL)
tech_match = re.search(r'\[TECHNICAL\](.*?)$', response_text, re.DOTALL)
nontech = nontech_match.group(1).strip() if nontech_match else "Non-technical version generation failed."
tech = tech_match.group(1).strip() if tech_match else "Technical version generation failed."
return nontech, tech
except Exception as e:
print(f"⚠️ [REPORT] Failed to generate multi-level section '{section_name}': {e}")
return f"Analysis generation failed for {section_name} (non-technical version).", f"Analysis generation failed for {section_name} (technical version). Error: {str(e)}"
async def _generate_architecture_section(
self,
architecture_type: str, # "Frontend", "Backend", "Database", "API"
module_analyses: List[Dict],
findings_by_module: Dict[str, List[Dict]],
metrics_by_module: Dict[str, Dict],
synthesis_analysis: Dict,
progress_mgr=None
) -> Tuple[str, str]:
"""
Generate architecture section for specific type (Frontend, Backend, Database, API).
"""
# Filter modules and findings relevant to this architecture type
relevant_modules = []
relevant_findings = []
for module in module_analyses:
module_name = module.get('module_name', '')
files = module.get('files_analyzed', [])
# Check if module is relevant to this architecture type
is_relevant = False
if architecture_type.lower() == "frontend":
is_relevant = any(f.lower().endswith(('.jsx', '.tsx', '.vue', '.html', '.css', '.scss')) or
'frontend' in f.lower() or 'client' in f.lower() or
'component' in f.lower() for f in files)
elif architecture_type.lower() == "backend":
is_relevant = any(f.lower().endswith(('.py', '.java', '.cs', '.go', '.rb')) or
'backend' in f.lower() or 'server' in f.lower() or
'service' in f.lower() or 'controller' in f.lower() for f in files)
elif architecture_type.lower() == "database":
is_relevant = any('database' in f.lower() or 'db' in f.lower() or
'model' in f.lower() or 'schema' in f.lower() or
f.lower().endswith(('.sql', '.migration')) for f in files)
elif architecture_type.lower() == "api":
is_relevant = any('api' in f.lower() or 'endpoint' in f.lower() or
'route' in f.lower() or 'controller' in f.lower() or
'rest' in f.lower() or 'graphql' in f.lower() for f in files)
if is_relevant:
relevant_modules.append(module)
relevant_findings.extend(findings_by_module.get(module_name, []))
section_data = {
'architecture_type': architecture_type,
'relevant_modules': relevant_modules,
'relevant_findings': relevant_findings,
'metrics': {k: v for k, v in metrics_by_module.items() if k in [m.get('module_name') for m in relevant_modules]},
'synthesis_analysis': synthesis_analysis
}
return await self._generate_section_multi_level(
section_name=f"{architecture_type} Architecture",
section_data=section_data,
progress_mgr=progress_mgr
)
async def _generate_module_section(
self,
module: Dict,
findings: List[Dict],
metrics: Dict,
progress_mgr=None
) -> Tuple[str, str]:
"""
Generate detailed section for a specific module.
"""
section_data = {
'module': module,
'findings': findings,
'metrics': metrics
}
return await self._generate_section_multi_level(
section_name=f"Module: {module.get('module_name', 'Unknown')}",
section_data=section_data,
progress_mgr=progress_mgr
)
def _sanitize_html_for_pdf(self, text: str) -> str:
"""
Sanitize HTML for ReportLab Paragraph.
ReportLab only supports a limited set of HTML attributes.
Removes or escapes unsupported attributes like rel=, as=, etc.
"""
import re
try:
# Replace problematic HTML attributes that ReportLab doesn't support
# Common unsupported attributes: rel, as, crossorigin, integrity, etc.
# Remove rel="..." attribute from tags
text = re.sub(r'{self._sanitize_html_for_pdf(heading_text)}", subsection_style))
elements.append(Spacer(1, 6))
elif stripped.startswith('##'):
# H2 heading
heading_text = stripped[2:].strip()
if heading_text:
elements.append(Paragraph(f"{self._sanitize_html_for_pdf(heading_text)}", subsection_style))
elements.append(Spacer(1, 8))
elif stripped.startswith('#'):
# H1 heading
heading_text = stripped[1:].strip()
if heading_text:
elements.append(Paragraph(f"{self._sanitize_html_for_pdf(heading_text)}", section_style))
elements.append(Spacer(1, 10))
# Handle bullet points - standardize all bullet types
elif stripped.startswith('-') or stripped.startswith('*') or stripped.startswith('•') or stripped.startswith('■'):
# Remove markdown bullet and black squares, standardize to bullet
bullet_text = re.sub(r'^[-*•■\s]+', '', stripped)
# Remove multiple black squares at start
bullet_text = re.sub(r'^■+', '', bullet_text).strip()
if bullet_text:
# Handle nested bullets (indented)
indent_level = len(line) - len(line.lstrip())
if indent_level > 2:
bullet_text = f" • {self._sanitize_html_for_pdf(bullet_text)}"
else:
bullet_text = f"• {self._sanitize_html_for_pdf(bullet_text)}"
elements.append(Paragraph(bullet_text, normal_style))
# Handle numbered lists
elif re.match(r'^\d+\.', stripped):
# Numbered list item
list_text = re.sub(r'^\d+\.\s*', '', stripped)
# Remove black squares
list_text = re.sub(r'^■+\s*', '', list_text).strip()
if list_text:
elements.append(Paragraph(f"• {self._sanitize_html_for_pdf(list_text)}", normal_style))
# Handle empty lines
elif not stripped:
elements.append(Spacer(1, 4))
# Regular paragraph text
else:
# Remove any remaining markdown syntax
clean_text = stripped
# Remove bold/italic markdown (**text** -> text)
clean_text = re.sub(r'\*\*([^*]+)\*\*', r'\1', clean_text)
clean_text = re.sub(r'\*([^*]+)\*', r'\1', clean_text)
# Remove inline code backticks
clean_text = re.sub(r'`([^`]+)`', r'\1', clean_text)
# Remove black squares
clean_text = re.sub(r'■+', '', clean_text)
# Remove trailing backticks
clean_text = re.sub(r'```\s*$', '', clean_text)
if clean_text:
elements.append(Paragraph(self._sanitize_html_for_pdf(clean_text), normal_style))
i += 1
# Handle any remaining code block
if in_code_block and code_block_lines:
code_text = '\n'.join(code_block_lines)
code_text = re.sub(r'^[a-zA-Z]+\n', '', code_text, flags=re.MULTILINE)
elements.append(Preformatted(code_text, code_style))
elements.append(Spacer(1, 8))
return elements
def _extract_code_evidence_for_report(self, file_analyses) -> List[Dict]:
"""Extract code evidence with actual line numbers and code snippets for report."""
evidence_items = []
try:
for fa in file_analyses:
# Handle different file analysis formats
if hasattr(fa, '__dict__'): # Object format
file_path = getattr(fa, 'path', getattr(fa, 'file_path', 'Unknown'))
content = getattr(fa, 'content', '')
issues = getattr(fa, 'issues_found', [])
recommendations = getattr(fa, 'recommendations', [])
language = getattr(fa, 'language', 'text')
elif isinstance(fa, dict): # Dictionary format
file_path = fa.get('path', fa.get('file_path', 'Unknown'))
content = fa.get('content', '')
issues = fa.get('issues_found', [])
recommendations = fa.get('recommendations', [])
language = fa.get('language', 'text')
else:
continue
if not content:
continue
lines = content.split('\n')
# Extract evidence from issues
for issue in issues[:3]: # Top 3 issues per file
try:
issue_text = str(issue) if not isinstance(issue, dict) else issue.get('title', str(issue))
evidence_snippet = self._find_code_for_issue(lines, issue_text, language)
if evidence_snippet:
evidence_items.append({
'file': str(file_path),
'issue': issue_text,
'line_number': evidence_snippet['line_number'],
'code_snippet': evidence_snippet['code'],
'language': language,
'recommendation': evidence_snippet['recommendation'],
'severity': 'HIGH' if any(keyword in issue_text.lower()
for keyword in ['security', 'vulnerability', 'critical', 'error', 'fail']) else 'MEDIUM'
})
except Exception as e:
print(f"Warning: Could not extract evidence for issue: {e}")
continue
# Sort by severity and limit results
evidence_items.sort(key=lambda x: (x['severity'] != 'HIGH', x['file']))
return evidence_items[:20] # Top 20 evidence items
except Exception as e:
print(f"Error extracting code evidence for report: {e}")
return []
def _find_code_for_issue(self, lines, issue_text, language):
"""Find code snippet demonstrating the issue."""
try:
issue_keywords = {
'authentication': ['password', 'auth', 'login', 'token'],
'security': ['sql', 'injection', 'xss', 'csrf', 'vulnerability'],
'validation': ['input', 'validate', 'sanitize', 'req.body'],
'error': ['error', 'exception', 'try', 'catch', 'throw'],
'performance': ['query', 'loop', 'n+1', 'slow']
}
issue_lower = issue_text.lower()
# Find relevant lines
for category, keywords in issue_keywords.items():
if any(keyword in issue_lower for keyword in keywords):
for i, line in enumerate(lines):
if any(keyword in line.lower() for keyword in keywords) and len(line.strip()) > 10:
# Get context (3 lines)
start = max(0, i-1)
end = min(len(lines), i+2)
context = '\n'.join(lines[start:end])
return {
'line_number': i + 1,
'code': context,
'recommendation': self._get_fix_for_issue(issue_text)
}
return None
except:
return None
def _get_fix_for_issue(self, issue_text):
"""Generate specific fix recommendation."""
issue_lower = issue_text.lower()
if 'password' in issue_lower:
return "Hash passwords with bcrypt before storing"
elif 'sql' in issue_lower:
return "Use prepared statements to prevent SQL injection"
elif 'token' in issue_lower:
return "Add expiration and proper validation to tokens"
elif 'validation' in issue_lower:
return "Add comprehensive input validation"
elif 'error' in issue_lower:
return "Implement proper error handling with try-catch"
else:
return f"Address: {issue_text}"
async def _generate_code_evidence_section(self, code_evidence: List[Dict], progress_mgr=None) -> Tuple[str, str]:
"""Generate non-technical and technical versions of code evidence section."""
try:
if not code_evidence:
return "No specific code evidence found.", "No code evidence available."
technical_only_default = True
section_technical_only = getattr(self, "_report_technical_only", None)
if section_technical_only is None:
technical_only_env = os.getenv("REPORT_TECHNICAL_ONLY")
if technical_only_env is None:
section_technical_only = technical_only_default
else:
section_technical_only = technical_only_env.lower() in ("1", "true", "yes", "on")
high_count = len([e for e in code_evidence if e['severity'] == 'HIGH'])
medium_count = len([e for e in code_evidence if e['severity'] == 'MEDIUM'])
unique_files = len(set(e['file'] for e in code_evidence))
tech_content = f"""
🔧 DETAILED CODE EVIDENCE ANALYSIS
📋 COMPREHENSIVE FINDINGS:
Total Issues Found: {len(code_evidence)}
Files Analyzed: {unique_files}
High Severity: {high_count}
Medium Severity: {medium_count}
"""
for idx, evidence in enumerate(code_evidence[:10], 1): # Top 10 detailed findings
tech_content += f"""
FINDING #{idx} - {evidence['severity']} PRIORITY
File: {evidence['file']}
Issue: {evidence['issue']}
Line: {evidence['line_number']}
Code Evidence:
{evidence['code_snippet'][:300]}{"..." if len(evidence['code_snippet']) > 300 else ""}
Recommended Fix:
{evidence['recommendation']}
{'─' * 60}
"""
tech_content += """
🔧 IMPLEMENTATION NOTES:
• Focus on HIGH severity issues first
• Test all fixes in staging environment
• Use code review process for all changes
• Update documentation after fixes
• Consider automated testing for fixed issues
"""
if section_technical_only:
return "", tech_content
# Non-technical version (for managers)
nontech_content = f"""
🔍 CODE INSPECTION FINDINGS
Our automated code review identified {len(code_evidence)} specific issues with actual code examples as proof.
📊 ISSUE BREAKDOWN:
• High Priority Issues: {high_count}
• Medium Priority Issues: {medium_count}
• Files with Evidence: {unique_files}
🎯 TOP CRITICAL FINDINGS:
"""
# Add top 5 critical findings for managers
critical_findings = [e for e in code_evidence if e['severity'] == 'HIGH'][:5]
for idx, finding in enumerate(critical_findings, 1):
file_name = finding['file'].split('/')[-1] # Just filename for managers
nontech_content += f"""
{idx}. {file_name}
Issue: {finding['issue']}
Business Impact: This could cause system failures or security breaches
Fix Required: {finding['recommendation']}
"""
nontech_content += """
💡 BUSINESS IMPACT:
These code issues directly affect system reliability, security, and maintenance costs.
Each issue represents technical debt that slows down development and increases the risk of production failures.
⚡ IMMEDIATE ACTION:
Assign developers to fix high-priority issues within 1-2 weeks to prevent system degradation.
"""
return nontech_content, tech_content
except Exception as e:
print(f"Error generating code evidence section: {e}")
return "Error generating code evidence section.", "Technical error in evidence generation."
async def _generate_onboarding_guide(
self,
module_analyses: List[Dict],
analysis_state: Dict,
progress_mgr=None
) -> str:
"""
Generate junior developer onboarding guide (technical only).
"""
try:
prompt = f"""
You are a senior software architect. Generate a comprehensive junior developer onboarding guide.
MODULE ANALYSES:
{json.dumps([{'module_name': m.get('module_name'), 'files_analyzed': m.get('files_analyzed', [])[:10]} for m in module_analyses[:10]], indent=2, default=str)}
ANALYSIS STATE:
{json.dumps(analysis_state, indent=2, default=str)}
Generate a detailed onboarding guide that includes:
1. Project structure overview
2. Key files to understand first
3. How to set up the development environment
4. Common patterns and conventions
5. Where to find what (file locations)
6. Step-by-step walkthrough of key features
7. Common pitfalls to avoid
8. Testing and debugging tips
Keep it practical and actionable for junior developers.
"""
loop = asyncio.get_event_loop()
def call_claude():
message = self.client.messages.create(
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
max_tokens=6000,
temperature=0.3,
messages=[{"role": "user", "content": prompt}]
)
return message.content[0].text.strip()
response_text = await loop.run_in_executor(None, call_claude)
return response_text
except Exception as e:
print(f"⚠️ [REPORT] Failed to generate onboarding guide: {e}")
return f"Onboarding guide generation failed. Error: {str(e)}"
def _detect_technology_stack(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""Detect the actual technology stack from the codebase."""
languages = analysis.languages
detected = {
'primary_language': 'Unknown',
'backend_framework': 'Unknown',
'orm_database': 'Unknown',
'orm_name': 'Unknown',
'database_type': 'Unknown',
'is_csharp': False,
'is_nodejs': False,
'is_java': False,
'is_python': False,
'indicators': []
}
# Scan files for technology indicators
for fa in analysis.file_analyses:
file_path = str(fa.path).lower()
file_content = getattr(fa, 'content', '') or ''
# C# / .NET / Entity Framework detection
if '.cs' in file_path or '.csproj' in file_path:
detected['is_csharp'] = True
detected['primary_language'] = 'C#'
if 'entityframeworkcore' in file_content.lower() or 'dbcontext' in file_content.lower():
detected['orm_name'] = 'Entity Framework Core'
detected['orm_database'] = 'EF Core'
detected['indicators'].append('Entity Framework Core')
if 'appsettings.json' in file_path or 'web.config' in file_path:
detected['backend_framework'] = 'ASP.NET Core'
# Node.js / Express / Mongoose detection
if '.js' in file_path or '.ts' in file_path or 'package.json' in file_path:
if not detected['primary_language'] or detected['primary_language'] == 'Unknown':
if 'typescript' in languages:
detected['primary_language'] = 'TypeScript'
else:
detected['primary_language'] = 'JavaScript'
detected['is_nodejs'] = True
if 'express' in file_content.lower() or 'app.use' in file_content.lower():
detected['backend_framework'] = 'Express.js'
detected['indicators'].append('Express.js')
if 'mongoose' in file_content.lower() or 'mongoose.connect' in file_content.lower():
detected['orm_name'] = 'Mongoose'
detected['orm_database'] = 'Mongoose ODM'
detected['database_type'] = 'MongoDB'
detected['indicators'].append('Mongoose ODM')
if 'sequelize' in file_content.lower():
detected['orm_name'] = 'Sequelize'
detected['orm_database'] = 'Sequelize ORM'
detected['database_type'] = 'PostgreSQL/MySQL'
detected['indicators'].append('Sequelize ORM')
if 'typeorm' in file_content.lower():
detected['orm_name'] = 'TypeORM'
detected['orm_database'] = 'TypeORM'
detected['indicators'].append('TypeORM')
# Java / Spring Boot / Hibernate detection
if '.java' in file_path or 'pom.xml' in file_path or 'build.gradle' in file_path:
detected['is_java'] = True
detected['primary_language'] = 'Java'
if 'spring-boot' in file_content.lower() or '@springbootapplication' in file_content.lower():
detected['backend_framework'] = 'Spring Boot'
detected['indicators'].append('Spring Boot')
if 'hibernate' in file_content.lower() or 'jpa' in file_content.lower() or '@entity' in file_content.lower():
detected['orm_name'] = 'Hibernate/JPA'
detected['orm_database'] = 'Hibernate'
detected['indicators'].append('Hibernate/JPA')
# Python / Django / SQLAlchemy detection
if '.py' in file_path:
detected['is_python'] = True
if not detected['primary_language'] or detected['primary_language'] == 'Unknown':
detected['primary_language'] = 'Python'
if 'django' in file_content.lower() or 'models.py' in file_path:
detected['backend_framework'] = 'Django'
detected['orm_database'] = 'Django ORM'
detected['indicators'].append('Django')
if 'flask' in file_content.lower():
detected['backend_framework'] = 'Flask'
detected['indicators'].append('Flask')
if 'sqlalchemy' in file_content.lower():
detected['orm_name'] = 'SQLAlchemy'
detected['orm_database'] = 'SQLAlchemy'
detected['indicators'].append('SQLAlchemy')
# Set default values based on languages if not detected
if not detected['primary_language'] or detected['primary_language'] == 'Unknown':
if 'javascript' in languages or 'typescript' in languages:
detected['primary_language'] = 'JavaScript' if 'javascript' in languages else 'TypeScript'
elif 'python' in languages:
detected['primary_language'] = 'Python'
elif 'java' in languages:
detected['primary_language'] = 'Java'
elif 'csharp' in languages:
detected['primary_language'] = 'C#'
return detected
def _determine_project_type(self, analysis: RepositoryAnalysis) -> str:
"""Determine the type of project based on file analysis."""
languages = analysis.languages
if 'javascript' in languages or 'typescript' in languages:
if 'html' in languages or 'css' in languages:
return "Web Application"
return "Node.js Application"
elif 'python' in languages:
return "Python Application"
elif 'java' in languages:
return "Java Application"
elif 'csharp' in languages:
return ".NET Application"
else:
return "Multi-language Application"
def _analyze_project_purpose(self, analysis: RepositoryAnalysis) -> str:
"""Analyze the purpose of the project."""
repo_name = analysis.repo_path.split('/')[-1] if '/' in analysis.repo_path else analysis.repo_path
if 'api' in repo_name.lower():
return "API Service"
elif 'web' in repo_name.lower() or 'frontend' in repo_name.lower():
return "Web Frontend"
elif 'backend' in repo_name.lower() or 'server' in repo_name.lower():
return "Backend Service"
else:
return "Software Application"
def _determine_architecture_pattern(self, analysis: RepositoryAnalysis) -> str:
"""Determine the architecture pattern."""
large_files = [fa for fa in analysis.file_analyses if fa.lines_of_code > 500]
if len(large_files) > len(analysis.file_analyses) * 0.3:
return "Monolithic Architecture"
elif 'microservice' in str(analysis.repo_path).lower():
return "Microservices Architecture"
else:
return "Modular Architecture"
def _evaluate_technology_stack(self, analysis: RepositoryAnalysis) -> str:
"""Evaluate the technology stack."""
languages = analysis.languages
evaluation = "Technology Stack Evaluation:
"
# Good choices
good_choices = []
if 'python' in languages:
good_choices.append("Python: Excellent for rapid development and maintainability")
if 'typescript' in languages:
good_choices.append("TypeScript: Provides type safety and better IDE support")
if 'javascript' in languages:
good_choices.append("JavaScript: Widely supported and flexible")
if good_choices:
evaluation += "✅ Good choices: "
for choice in good_choices:
evaluation += f"• {choice} "
# Problematic choices
problematic = []
if len(languages) > 5:
problematic.append("Too many languages: Increases complexity and maintenance overhead")
if 'php' in languages and 'python' in languages:
problematic.append("Mixed backend languages: Choose one primary backend language")
if problematic:
evaluation += " ❌ Problematic choices: "
for problem in problematic:
evaluation += f"• {problem} "
# Recommendations
recommendations = []
if 'javascript' in languages and 'typescript' not in languages:
recommendations.append("Consider migrating to TypeScript for better type safety")
if len([fa for fa in analysis.file_analyses if fa.lines_of_code > 1000]) > 0:
recommendations.append("Refactor large files into smaller, focused modules")
if recommendations:
evaluation += " 🔧 Recommended upgrades: "
for rec in recommendations:
evaluation += f"• {rec} "
return evaluation
def _analyze_code_organization(self, analysis: RepositoryAnalysis) -> str:
"""Analyze code organization and structure."""
large_files = [fa for fa in analysis.file_analyses if fa.lines_of_code > 500]
avg_file_size = analysis.total_lines / analysis.total_files if analysis.total_files > 0 else 0
organization = f"""
Folder/File Structure Analysis:
• Total files: {analysis.total_files}
• Average file size: {avg_file_size:.0f} lines
• Large files (>500 lines): {len(large_files)} ({len(large_files)/analysis.total_files*100:.1f}%)
• Languages used: {len(analysis.languages)}
Organization Assessment:
"""
if len(large_files) > analysis.total_files * 0.2:
organization += "❌ Poor organization: Too many large files indicate poor separation of concerns "
else:
organization += "✅ Good organization: Most files are appropriately sized "
if len(analysis.languages) > 3:
organization += "⚠️ Mixed languages: Consider consolidating to reduce complexity "
else:
organization += "✅ Language consistency: Reasonable number of languages "
return organization
def _analyze_backend_layer(self, backend_files) -> str:
"""Analyze backend layer specifically."""
if not backend_files:
return "No backend files identified."
large_backend_files = [fa for fa in backend_files if fa.lines_of_code > 500]
avg_backend_size = sum(fa.lines_of_code for fa in backend_files) / len(backend_files)
analysis = f"""
Backend Layer Analysis:
• Backend files: {len(backend_files)}
• Average size: {avg_backend_size:.0f} lines
• Large files: {len(large_backend_files)}
Monolithic Files Identified:
"""
for fa in large_backend_files[:3]:
analysis += f"• {str(fa.path)} - {fa.lines_of_code} lines (EXTREME MONOLITH) "
analysis += f" Location: {str(fa.path)} "
analysis += f" Problems: Poor maintainability, difficult testing, high complexity
"
analysis += "Anti-Patterns Detected: "
analysis += "• God Object: Large files with multiple responsibilities "
analysis += "• Tight Coupling: High interdependency between modules "
analysis += "• Code Duplication: Repeated logic across files
"
return analysis
def _analyze_frontend_layer(self, frontend_files) -> str:
"""Analyze frontend layer specifically."""
if not frontend_files:
return "No frontend files identified."
large_frontend_files = [fa for fa in frontend_files if fa.lines_of_code > 300]
avg_frontend_size = sum(fa.lines_of_code for fa in frontend_files) / len(frontend_files)
analysis = f"""
Frontend Layer Analysis:
• Frontend files: {len(frontend_files)}
• Average size: {avg_frontend_size:.0f} lines
• Large components: {len(large_frontend_files)}
Component Structure Issues:
• Large components indicate poor separation of concerns
• Missing component composition patterns
• Inconsistent state management approach
Bundle Size Issues:
• Large files contribute to increased bundle size
• Missing code splitting strategies
• Potential for tree shaking optimization
Performance Problems:
• Large components cause re-rendering issues
• Missing memoization for expensive operations
• Inefficient state updates and prop drilling
"""
return analysis
def _identify_security_vulnerabilities(self, analysis: RepositoryAnalysis) -> str:
"""Identify security vulnerabilities."""
security_issues = []
# Look for common security patterns in issues
for fa in analysis.file_analyses:
if fa.issues_found:
for issue in fa.issues_found:
issue_str = str(issue).lower()
if any(keyword in issue_str for keyword in ['sql', 'injection', 'xss', 'csrf', 'auth', 'password', 'token', 'session']):
security_issues.append(f"• {str(fa.path)}: {issue}")
if not security_issues:
security_issues = [
"• Potential SQL injection vulnerabilities in database queries",
"• Missing input validation on user inputs",
"• Insecure authentication mechanisms",
"• Lack of proper session management",
"• Missing CSRF protection"
]
security_text = f"""
Security Vulnerability Assessment:
Immediate Security Actions Required:
• Implement input validation and sanitization
• Add proper authentication and authorization
• Enable CSRF protection
• Implement secure session management
• Add security headers and HTTPS enforcement
"""
return security_text
def _analyze_performance_issues(self, analysis: RepositoryAnalysis) -> str:
"""Analyze performance issues."""
large_files = [fa for fa in analysis.file_analyses if fa.lines_of_code > 500]
avg_file_size = analysis.total_lines / analysis.total_files if analysis.total_files > 0 else 0
performance_text = f"""
Performance Analysis:
Database Performance:
• Large files indicate potential N+1 query problems
• Missing database indexing strategies
• Inefficient data fetching patterns
API Response Times:
• Average file complexity: {avg_file_size:.0f} lines
• Large files cause increased processing time
• Missing caching strategies
Memory Usage:
• {len(large_files)} files exceed optimal size limits
• Potential memory leaks in large components
• Inefficient data structures and algorithms
Bottlenecks Identified:
• Monolithic file structures
• Lack of code splitting and lazy loading
• Missing performance monitoring
• Inefficient state management
"""
return performance_text
def _analyze_testing_infrastructure(self, analysis: RepositoryAnalysis) -> str:
"""Analyze testing infrastructure."""
test_files = [fa for fa in analysis.file_analyses if 'test' in str(fa.path).lower() or fa.language in ['spec', 'test']]
test_coverage = len(test_files) / analysis.total_files * 100 if analysis.total_files > 0 else 0
testing_text = f"""
Testing Infrastructure Assessment:
Test Coverage and Quality:
• Current Test Coverage: {test_coverage:.1f}%
• Assessment: {'POOR' if test_coverage < 30 else 'GOOD' if test_coverage > 70 else 'FAIR'}
Missing Tests:
• Unit Tests: Critical business logic lacks unit test coverage
• Integration Tests: API endpoints and database interactions untested
• E2E Tests: User workflows and critical paths not covered
Test Quality Issues:
• If tests exist, they likely lack proper assertions
• Missing test data setup and teardown
• No automated test execution in CI/CD pipeline
• Insufficient test documentation and maintenance
"""
return testing_text
def _create_fix_roadmap(self, analysis: RepositoryAnalysis) -> str:
"""Create comprehensive fix roadmap."""
critical_files = [fa for fa in analysis.file_analyses if fa.severity_score < 4]
high_priority_files = [fa for fa in analysis.file_analyses if 4 <= fa.severity_score < 6]
roadmap = f"""
Comprehensive Fix Roadmap
Phase 1: Emergency Stabilization (24-48 Hours)
• Fix {len(critical_files)} critical files with quality scores below 4/10
• Address immediate security vulnerabilities
• Implement basic error handling and logging
• Set up monitoring and alerting systems
• Create emergency response procedures
Phase 3: Medium-Term Refactoring (1-2 Months)
• Break down monolithic files into smaller modules
• Implement proper architecture patterns
• Add comprehensive documentation
• Optimize build and deployment processes
• Implement advanced monitoring and analytics
Phase 4: Long-Term Modernization (3-6 Months)
• Complete architectural overhaul if needed
• Implement advanced security measures
• Add comprehensive test coverage (80%+)
• Optimize for scalability and performance
• Implement CI/CD best practices
"""
return roadmap
def _create_junior_developer_guide(self, analysis: RepositoryAnalysis) -> str:
"""Generate AI-powered comprehensive junior developer guide based on actual codebase analysis."""
try:
# Detect project type
languages = analysis.languages or {}
has_react = any(lang.lower() in ['javascript', 'typescript', 'jsx', 'tsx'] for lang in languages.keys())
has_csharp = any(lang.lower() in ['csharp', 'c#'] for lang in languages.keys())
has_python = any(lang.lower() in ['python'] for lang in languages.keys())
has_java = any(lang.lower() in ['java'] for lang in languages.keys())
print(f"🔍 [JUNIOR GUIDE] Detected languages: {list(languages.keys())}")
# Get examples of problematic code from analysis
problematic_files = [fa for fa in analysis.file_analyses if fa.severity_score < 6][:10]
print(f"🔍 [JUNIOR GUIDE] Found {len(problematic_files)} problematic files")
# Prepare code examples - increased size for more detailed guide
code_examples = []
for fa in problematic_files:
if hasattr(fa, 'content') and fa.content:
code_snippet = fa.content[:2000] # Increased from 1000 to 2000 chars for more detail
issues_str = ', '.join(fa.issues_found[:5]) if isinstance(fa.issues_found, (list, tuple)) else 'No issues'
code_examples.append(f"File: {fa.path}\nLines: {fa.lines_of_code}\nIssues: {issues_str}\nCode:\n{code_snippet}\n")
# Show up to 8 code examples instead of 5 for more comprehensive guide
code_samples_text = "\n\n---CODE EXAMPLE SEPARATOR---\n\n".join(code_examples[:8]) if code_examples else "No code examples available"
print(f"🔍 [JUNIOR GUIDE] Prepared {len(code_examples)} code examples")
# Check if we have minimal data for guide generation
if not languages and not problematic_files:
print("⚠️ [JUNIOR GUIDE] Insufficient data for guide generation")
return self._create_fallback_guide(analysis)
# Build comprehensive prompt for AI
prompt = f"""
You are creating a JUNIOR DEVELOPER IMPLEMENTATION GUIDE for a codebase. Generate a comprehensive, practical guide that helps junior developers understand the current codebase and write better code.
PROJECT CONTEXT:
- Languages Used: {', '.join(languages.keys()) if languages else 'Unknown'}
- Total Files: {analysis.total_files}
- Total Lines: {analysis.total_lines:,}
- Average Code Quality: {analysis.code_quality_score:.1f}/10
- Has C#/.NET: {has_csharp}
- Has React/TypeScript: {has_react}
- Has Python: {has_python}
- Has Java: {has_java}
CURRENT CODEBASE ISSUES:
{analysis.architecture_assessment[:500] if analysis.architecture_assessment else 'No architecture assessment available'}
PROBLEMATIC CODE EXAMPLES FROM ANALYSIS:
{code_samples_text}
GENERATE A COMPREHENSIVE GUIDE INCLUDING:
1. UNDERSTANDING CURRENT SYSTEM PROBLEMS
1.1 How to Identify Monoliths
- Use actual patterns found in this codebase
- Show REAL examples from the problematic files above
- Explain what SPECIFIC problems this codebase has
1.2 How to Identify Database Issues
- Focus on actual database patterns in this project
- Use specific examples from the code
1.3 How to Identify Frontend Issues (if React detected)
- Show specific frontend patterns from this codebase
2. IMPLEMENTATION PATTERNS FOR NEW CODE
Generate templates based on the actual technologies used:
- For C# projects: Service, Repository, Controller patterns
- For React projects: Component, Hook, State management patterns
- Use the SAME coding style as the existing codebase
- Include dependency injection setup specific to this project
3. TESTING PATTERNS FOR NEW CODE
3.1 Unit Test Template - use actual testing frameworks in this codebase
3.2 Integration Test Template - based on the actual project structure
4. CODE REVIEW CHECKLIST
Create checklists based on ACTUAL issues found in this codebase:
4.1 What to REJECT - use specific issues from the analysis
4.2 What to REQUIRE - based on what's missing in current code
4.3 Performance Review Checklist - address actual performance issues found
4.4 Security Review Checklist - based on actual security concerns
6. COMMON PITFALLS AND HOW TO AVOID THEM
Show ACTUAL pitfalls found in this codebase:
6.1 Framework-specific pitfalls (Entity Framework, React, etc.)
6.2 Async/Await Pitfalls
6.3 Exception Handling Pitfalls
6.4 Additional pitfalls specific to this codebase
7. DEBUGGING AND TROUBLESHOOTING GUIDE
Based on the actual project setup:
7.1 Performance Debugging - specific to this stack
7.2 Database Query Debugging - tools and techniques for this project
7.3 Memory Debugging - specific to this technology stack
8. DEPLOYMENT AND OPERATIONS GUIDE
Based on actual deployment setup:
8.1 Environment-Specific Configuration - actual config structure
8.2 Health Checks Configuration - specific to this application
CRITICAL FORMATTING REQUIREMENTS:
- Format all sections with clear hierarchical headings using tags
- Use proper bullet points - each bullet point should be on its own line with before it
- Format: Heading: followed by bullet points on separate lines
- Example CORRECT format:
Key Indicators:
• First item
• Second item
• Third item
- Example WRONG format:
Key Indicators: - First item - Second item - Third item (all on same line)
- Use
to separate paragraphs
- Each bullet point must be on its own line with proper line breaks
- Use actual examples from the codebase when possible
- Be specific to this project's technology stack
- Focus on REAL issues found in the analysis
- Provide practical, actionable guidance
- Format code examples with { and } for curly braces
- Keep it comprehensive but practical
Generate the complete guide now with PROPER LINE BREAKS and FORMATTING:
"""
# Call AI to generate the guide
print("🤖 [JUNIOR GUIDE] Calling Claude API to generate guide...")
message = self.client.messages.create(
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
max_tokens=8000, # Increased from 6000 to 8000 for more detailed guide with code examples
temperature=0.3, # Slightly creative but consistent
messages=[{"role": "user", "content": prompt}]
)
ai_generated_guide = message.content[0].text.strip()
print("✅ AI-generated Junior Developer Guide created successfully")
# Clean up the guide to remove unwanted formatting artifacts
# Remove markdown code blocks that might appear in the output
ai_generated_guide = re.sub(r'```[\w]*\n', '', ai_generated_guide) # Remove ```javascript, ```json etc
ai_generated_guide = re.sub(r'```\s*', ' ', ai_generated_guide) # Replace closing ``` with line break
# Handle headings FIRST (before processing bullets)
ai_generated_guide = re.sub(r'^###\s+(.+)$', r'\1', ai_generated_guide, flags=re.MULTILINE)
ai_generated_guide = re.sub(r'^##\s+(.+)$', r'\1', ai_generated_guide, flags=re.MULTILINE)
ai_generated_guide = re.sub(r'^#\s+(.+)$', r'\1', ai_generated_guide, flags=re.MULTILINE)
# Replace newlines with but preserve structure for bullets
# Process line by line to maintain bullet point integrity
lines = ai_generated_guide.split('\n')
processed_lines = []
for i, line in enumerate(lines):
line = line.strip()
if not line: # Empty line
processed_lines.append(' ')
continue
# Check if line is a bullet point
if re.match(r'^[•\-\*]\s*', line):
# It's a bullet point - add before it (except for first line)
if i > 0:
processed_lines.append(' • ' + line[1:].lstrip())
else:
processed_lines.append('• ' + line[1:].lstrip())
continue
# Check if line is a numbered list
num_match = re.match(r'^(\d+\.)\s*(.+)', line)
if num_match:
# It's a numbered item - add before it (except for first line)
if i > 0:
processed_lines.append(f" {num_match.group(1)} {num_match.group(2)}")
else:
processed_lines.append(f"{num_match.group(1)} {num_match.group(2)}")
continue
# Check if line looks like a heading (not in a code block or bullet)
if line and not line.startswith(' ') and len(line) < 100:
# Might be a heading - wrap in bold
if '' not in line and '' not in line:
line = f"{line}"
# Regular line - add before it (except for first line)
if i > 0:
processed_lines.append(' ' + line)
else:
processed_lines.append(line)
# Join all lines
ai_generated_guide = ''.join(processed_lines)
# Clean up excessive tags
ai_generated_guide = re.sub(r'( ){4,}', '
', ai_generated_guide)
# Sanitize HTML to ensure all tags are properly closed
ai_generated_guide = self._sanitize_html_for_reportlab(ai_generated_guide)
print("✅ Junior Developer Guide formatting completed with proper line breaks")
return ai_generated_guide
except Exception as e:
print(f"⚠️ AI guide generation failed: {e}, using fallback template")
import traceback
traceback.print_exc()
# Fallback to basic template if AI fails
return self._create_fallback_guide(analysis)
def _sanitize_html_for_reportlab(self, html_text: str) -> str:
"""Sanitize HTML content to ensure all tags are properly closed for ReportLab Paragraph."""
import re
# Remove and tags (ReportLab Paragraph doesn't need these)
html_text = re.sub(r'?para>', '', html_text, flags=re.IGNORECASE)
# Simple approach: ensure all tags are properly closed
# Count opening and closing tags
open_b_count = len(re.findall(r'', html_text))
close_b_count = len(re.findall(r'', html_text))
# If there are unclosed tags, close them at the end
if open_b_count > close_b_count:
html_text += '' * (open_b_count - close_b_count)
# If there are extra tags, remove them
# Process the string to match pairs properly
result = []
b_stack = []
i = 0
while i < len(html_text):
if html_text[i:i+3] == '':
b_stack.append(i)
result.append('')
i += 3
elif html_text[i:i+4] == '':
if b_stack:
b_stack.pop()
result.append('')
# Skip extra closing tags
i += 4
else:
result.append(html_text[i])
i += 1
# Close any remaining open tags
result_text = ''.join(result)
if b_stack:
result_text += '' * len(b_stack)
return result_text
def _create_fallback_guide(self, analysis: RepositoryAnalysis) -> str:
"""Fallback message if AI generation fails - no hardcoded templates."""
languages = analysis.languages or {}
has_react = any(lang.lower() in ['javascript', 'typescript', 'jsx', 'tsx'] for lang in languages.keys())
has_csharp = any(lang.lower() in ['csharp', 'c#'] for lang in languages.keys())
has_python = any(lang.lower() in ['python'] for lang in languages.keys())
return f"""
JUNIOR DEVELOPER IMPLEMENTATION GUIDE
⚠️ AI-Generated Content Unavailable
The AI-powered analysis for this guide was unable to complete. Please refer to the other sections of this report for detailed code analysis and recommendations.
What to Review:
• Section 10: Code Examples - Problems and Solutions
• Section 5: Security Vulnerability Assessment
• Section 6: Performance Analysis
• Section 8: Files Requiring Immediate Attention
Technologies Detected in This Project:
{', '.join(languages.keys()) if languages else 'Unknown'}
Quick Tips Based on Your Stack:
{'• For React/TypeScript projects: Focus on component size, state management, and error boundaries ' if has_react else ''}
{'• For C#/.NET projects: Use dependency injection, async/await patterns, and proper resource disposal ' if has_csharp else ''}
{'• For Python projects: Follow PEP 8 style guide, use virtual environments, and implement proper error handling ' if has_python else ''}
This guide is designed to be AI-generated based on your actual codebase. Review the file-by-file analysis above for specific guidance.
"""
def _generate_key_recommendations(self, analysis: RepositoryAnalysis) -> str:
"""Generate key recommendations summary."""
critical_files = len([fa for fa in analysis.file_analyses if fa.severity_score < 4])
high_priority_files = len([fa for fa in analysis.file_analyses if 4 <= fa.severity_score < 6])
recommendations = f"""
Key Recommendations Summary
Immediate Actions (Next 48 Hours):
1. Fix {critical_files} critical files with quality scores below 4/10
2. Implement basic security measures and input validation
3. Set up error monitoring and alerting
4. Create emergency response procedures
Long-term Objectives (1-6 Months):
1. Complete architectural refactoring
2. Achieve 80%+ test coverage
3. Implement advanced security measures
4. Optimize for scalability and maintainability
5. Establish CI/CD best practices
Success Metrics:
• Reduce average file size to under 300 lines
• Achieve code quality score above 7/10
• Implement 80%+ test coverage
• Reduce bug reports by 50%
• Improve development velocity by 30%
"""
return recommendations
def _derive_file_recommendations(self, fa) -> List[Dict[str, Any]]:
"""Create specific recommendations per file based on detected issues and content."""
path_lower = str(getattr(fa, 'path', '')).lower()
content = getattr(fa, 'content', '') or ''
issues = getattr(fa, 'issues_found', []) or []
language = (getattr(fa, 'language', '') or '').lower()
derived: List[Dict[str, Any]] = []
def add(issue_text: str, impact: str, action: str, hours: int) -> None:
derived.append({
'issue': issue_text,
'impact': impact,
'action': action,
'hours': max(1, hours)
})
# Tests
is_test = any(tok in path_lower for tok in ['test', 'spec', '__tests__'])
if is_test:
if fa.lines_of_code <= 5 or not content.strip():
add('Empty or trivial test file', 'No verification of behavior', 'Write Arrange-Act-Assert tests and mock external I/O', 1)
if re.search(r'(it\(|test\()\s*\(("|")[^\)]+("|")\s*,\s*\(\s*\)\s*=>\s*\{\s*\}\s*\)', content):
add('Placeholder tests without assertions', 'False sense of coverage', 'Add assertions for success and error paths', 1)
# Security
if re.search(r'(password|secret|token|apikey|api_key)\s*[:=]\s*("|")[^\"\']+("|")', content, re.I):
add('Hardcoded credentials', 'Secrets exposed via VCS', 'Use env vars or secrets manager; rotate all keys', 2)
if re.search(r'(eval\(|Function\(|exec\()', content):
add('Dynamic code execution', 'Enables code injection', 'Remove eval/exec; replace with safe parsing/whitelisting', 2)
# Performance
if language in ['javascript', 'typescript'] and re.search(r'for\s*\(.*\)\s*\{[\s\S]*?for\s*\(', content):
add('Nested loops detected', 'Potential O(n^2) path', 'Refactor with maps/sets or precomputed indexes', 3)
if language == 'python' and 'pandas' in content and re.search(r'for\s+.*in\s+.*DataFrame', content):
add('Row-wise loops over DataFrame', 'Severe performance hit', 'Vectorize with pandas/numpy operations', 3)
# Reliability
if language in ['javascript', 'typescript'] and re.search(r'await\s+.*\(', content) and 'try' not in content:
add('Missing try/catch around async I/O', 'Unhandled rejections crash flows', 'Wrap awaits with try/catch and add retries', 2)
if language == 'python' and re.search(r'requests\.(get|post|put|delete)\(', content) and 'try' not in content:
add('Network calls without exception handling', 'Crashes on transient failures', 'Add try/except with timeout, retry and logging', 2)
# Maintainability
if fa.lines_of_code and fa.lines_of_code > 300:
add('Large file', 'Hard to comprehend; higher defect rate', 'Split into cohesive modules with single-responsibility', max(2, fa.lines_of_code // 200))
if re.search(r'console\.log\(|print\(', content) and not re.search(r'logger|logging', content, re.I):
add('Debug prints in source', 'Noisy logs and potential data leakage', 'Use structured logger and proper levels', 1)
# Type safety
if language == 'typescript' and re.search(r':\s*any\b', content):
add('Use of any in TypeScript', 'Bypasses type safety', 'Replace any with precise types; enable noImplicitAny', 2)
# Map provided issues to targeted actions
keyword_rules = [
(r'input validation|sanitize|validation', 'Missing input validation', 'Add centralized validation/sanitization for all entry points'),
(r'sql\s*injection|parameterized', 'Potential SQL injection risk', 'Use parameterized queries/ORM; remove concatenated SQL'),
(r'cors|cross[- ]origin', 'Overly permissive CORS', 'Restrict origins/methods/headers; avoid wildcards'),
(r'circular\s*dependency', 'Circular dependency detected', 'Break cycles via interfaces or dependency inversion'),
(r'duplicate|duplicated code', 'Duplicated code', 'Extract shared utilities; apply DRY'),
(r'memory leak', 'Potential memory leak', 'Dispose/close resources; audit caches and listeners'),
]
for issue_text in (issues[:10] if isinstance(issues, (list, tuple)) else []):
low = str(issue_text).lower()
matched = False
for pattern, impact, action in keyword_rules:
if re.search(pattern, low):
add(issue_text, impact, action, 2)
matched = True
break
if not matched and low:
add(issue_text, 'Affects maintainability/correctness', 'Implement a focused fix aligned with this issue', 2)
# De-duplicate
unique: List[Dict[str, Any]] = []
seen = set()
for rec in derived:
key = (rec['issue'], rec['action'])
if key in seen:
continue
seen.add(key)
unique.append(rec)
limit = 5 if getattr(fa, 'severity_score', 5.0) < 5 else 3
return unique[:limit]
async def query_memory(self, query: str, repo_context: str = "") -> Dict[str, Any]:
"""Query the memory system directly."""
return await self.query_engine.intelligent_query(query, repo_context)
# ========== AI-Generated Analysis Methods for Missing Sections ==========
async def _analyze_smoking_gun_evidence(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""AI-powered analysis to find exact problematic code blocks (100-500 lines)."""
try:
print("🔍 Analyzing smoking gun evidence - finding exact problematic code...")
# Collect large problematic files
problematic_files = [fa for fa in analysis.file_analyses if fa.severity_score < 6][:5]
if not problematic_files:
return {'smoking_guns': [], 'summary': 'No smoking gun evidence found'}
# Build AI prompt with actual code content
code_samples = []
for i, fa in enumerate(problematic_files, 1):
content = getattr(fa, 'content', '') or ''
if len(content) > 10000: # For very large files, extract more context
content_lines = content.split('\n')
# Take first 200 lines
content = '\n'.join(content_lines[:200])
code_samples.append(f"""
### File {i}: {fa.path} ({fa.lines_of_code} lines, Quality: {fa.severity_score:.1f}/10)
Issues Found: {', '.join(str(issue) for issue in fa.issues_found[:5])}
Code Content:
{content[:5000]}
""")
prompt = f"""You are a Senior Code Reviewer. Analyze these problematic files and identify the EXACT smoking gun evidence.
{chr(10).join(code_samples)}
For each file, provide:
1. **The EXACT line of code** causing the disaster (quote it precisely)
2. **Full problematic code blocks** (100-200 lines showing the anti-pattern)
3. **Visual proof** with code annotations showing WHY it's wrong
4. **Root cause analysis** explaining how this pattern breaks the system
5. **Scale of disaster** (how many times this pattern appears in the codebase)
Focus on actual code patterns, not vague suggestions. Provide complete working code snippets showing the disaster pattern.
Format your response as structured text with clear sections."""
message = self.client.messages.create(
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
max_tokens=8000,
temperature=0.1,
messages=[{"role": "user", "content": prompt}]
)
ai_analysis = message.content[0].text.strip()
print("✅ Smoking gun evidence analysis complete")
return {
'smoking_guns': problematic_files,
'ai_analysis': ai_analysis,
'summary': f'Found {len(problematic_files)} files with smoking gun evidence'
}
except Exception as e:
print(f"⚠️ Smoking gun analysis failed: {e}")
return {'smoking_guns': [], 'summary': f'Analysis failed: {str(e)}'}
async def _analyze_real_fixes(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""AI-powered analysis providing complete Before/After code transformations."""
try:
print("🔍 Generating real implementation fixes with complete code...")
problematic_files = [fa for fa in analysis.file_analyses if fa.severity_score < 6][:3]
if not problematic_files:
return {'fixes': [], 'summary': 'No files requiring fixes'}
code_samples = []
for fa in problematic_files:
content = getattr(fa, 'content', '') or ''
if len(content) > 5000:
content_lines = content.split('\n')
content = '\n'.join(content_lines[:150]) # First 150 lines
code_samples.append(f"""
File: {fa.path}
Lines: {fa.lines_of_code}
Quality Score: {fa.severity_score:.1f}/10
Issues: {', '.join(str(issue) for issue in fa.issues_found[:5])}
Current Code:
{content[:3000]}
""")
prompt = f"""You are a Senior Refactoring Expert. Provide COMPLETE working code replacements, not suggestions.
{chr(10).join(code_samples)}
For each file, provide:
**COMPLETE BEFORE/AFTER TRANSFORMATION:**
1. **BEFORE Code** (identify the exact problematic section)
2. **AFTER Code** (complete working implementation)
3. **Step-by-step transformation guide**
4. **Exact code to copy-paste**
Requirements:
- Provide FULL working code, not pseudo-code
- Show complete function/class replacement
- Include all imports and dependencies
- Ensure the after code is production-ready
- Explain each major change with inline comments
- Test the logic is equivalent but better
Format your response with clear BEFORE/AFTER sections and copy-paste ready code."""
message = self.client.messages.create(
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
max_tokens=8000,
temperature=0.2,
messages=[{"role": "user", "content": prompt}]
)
ai_fixes = message.content[0].text.strip()
print("✅ Real fixes analysis complete")
return {
'fixes': problematic_files,
'ai_fixes': ai_fixes,
'summary': f'Generated complete fixes for {len(problematic_files)} files'
}
except Exception as e:
print(f"⚠️ Real fixes analysis failed: {e}")
return {'fixes': [], 'summary': f'Analysis failed: {str(e)}'}
def _analyze_orm_configuration(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""Analyze ORM/database configuration dynamically based on detected technology stack."""
try:
# Detect technology stack first
tech_stack = self._detect_technology_stack(analysis)
orm_name = tech_stack['orm_name']
is_csharp = tech_stack['is_csharp']
is_nodejs = tech_stack['is_nodejs']
is_java = tech_stack['is_java']
is_python = tech_stack['is_python']
# If no ORM detected, return empty analysis
if orm_name == 'Unknown':
return {
'has_orm': False,
'orm_name': 'None detected',
'config_files': 0,
'total_relationships': 0,
'summary': 'No ORM/database configuration files detected in codebase'
}
config_files = []
total_relationships = 0
optional_relationships = 0
required_relationships = 0
schema_files = []
# Technology-specific file detection and analysis
for fa in analysis.file_analyses:
file_path = str(fa.path).lower()
content = getattr(fa, 'content', '') or ''
# Entity Framework Core (C#)
if is_csharp and orm_name == 'Entity Framework Core':
if 'dbcontext' in file_path or 'onmodelcreating' in content.lower():
config_files.append(fa)
schema_files.append(fa.path)
# Count EF-specific relationships
total_relationships += content.count('HasOptional') + content.count('HasRequired') + \
content.count('WithMany') + content.count('WithOne')
optional_relationships += content.count('HasOptional')
required_relationships += content.count('HasRequired')
# Mongoose ODM (Node.js)
elif is_nodejs and orm_name == 'Mongoose':
if 'model' in file_path and '.js' in file_path or 'schema' in content.lower():
config_files.append(fa)
schema_files.append(fa.path)
# Count Mongoose relationships
total_relationships += content.count('type: Schema.Types.ObjectId') + \
content.count('ref:')
# Mongoose uses ref for relationships
relationship_refs = content.count('ref:')
required_relationships += relationship_refs # All refs are typically required
# Hibernate/JPA (Java)
elif is_java and 'Hibernate' in orm_name:
if '@entity' in content.lower() or '@table' in content.lower():
config_files.append(fa)
schema_files.append(fa.path)
# Count JPA relationships
total_relationships += content.count('@OneToMany') + content.count('@OneToOne') + \
content.count('@ManyToMany') + content.count('@ManyToOne')
optional_relationships += content.count('optional=true')
required_relationships += content.count('optional=false')
# Django ORM (Python)
elif is_python and 'Django' in orm_name:
if 'models.py' in file_path or 'models.Model' in content:
config_files.append(fa)
schema_files.append(fa.path)
# Count Django relationships
total_relationships += content.count('ForeignKey') + content.count('OneToOneField') + \
content.count('ManyToManyField')
required_relationships += content.count('blank=False')
optional_relationships += content.count('blank=True')
# SQLAlchemy (Python)
elif is_python and 'SQLAlchemy' in orm_name:
if 'relationship(' in content.lower() or 'Column(' in content.lower():
config_files.append(fa)
schema_files.append(fa.path)
# Count SQLAlchemy relationships
total_relationships += content.count('relationship(')
required_relationships += content.count('nullable=False')
optional_relationships += content.count('nullable=True')
# Calculate percentages
optional_percent = (optional_relationships / total_relationships * 100) if total_relationships > 0 else 0
required_percent = 100 - optional_percent
return {
'has_orm': True,
'orm_name': orm_name,
'config_files': len(config_files),
'total_relationships': total_relationships,
'optional_relationships': optional_relationships,
'optional_percent': optional_percent,
'required_relationships': required_relationships if required_relationships > 0 else (total_relationships - optional_relationships),
'required_percent': required_percent,
'sample_files': schema_files[:5]
}
except Exception as e:
print(f"⚠️ ORM configuration analysis failed: {e}")
return {
'has_orm': False,
'orm_name': 'Unknown',
'config_files': 0,
'total_relationships': 0,
'optional_relationships': 0,
'optional_percent': 0,
'required_relationships': 0,
'required_percent': 0,
'sample_files': []
}
def _analyze_nplusone_sync(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""Synchronous wrapper for N+1 query analysis."""
query_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['repository', 'service', 'controller', 'dal', 'dao'])]
return {'nplusone_count': len(query_files), 'impact': 'High' if len(query_files) > 3 else 'Medium'}
def _analyze_scalability_metrics(self, analysis: RepositoryAnalysis, max_concurrent: int, conn_per_req: int, pool_size: int, memory_per_req: float, proc_time: float) -> Dict[str, Any]:
"""Analyze scalability metrics and performance gaps."""
current_rpm = max(max_concurrent, 1) # At least 1 to avoid division by zero
required_rpm = 15000
gap_multiplier = required_rpm / current_rpm if current_rpm > 0 else float('inf')
rpm_gap = max(0, required_rpm - current_rpm)
required_pool_size = required_rpm * 2 / 60
conclusion = "IMPOSSIBLE with current architecture" if gap_multiplier > 100 else "REQUIRES MAJOR REdESIGN"
return {
'current_rpm': current_rpm,
'required_rpm': required_rpm,
'gap_multiplier': gap_multiplier,
'rpm_gap': rpm_gap,
'required_pool_size': required_pool_size,
'conclusion': conclusion
}
def _analyze_testing_infrastructure_deep(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""Deep dive into testing infrastructure."""
test_files = [fa for fa in analysis.file_analyses if 'test' in str(fa.path).lower() or 'spec' in str(fa.path).lower()]
backend_tests = [fa for fa in test_files if any(ext in str(fa.path).lower() for ext in ['.cs', '.java', '.py', '.go', '.rs'])]
frontend_tests = [fa for fa in test_files if any(ext in str(fa.path).lower() for ext in ['.js', '.ts', '.jsx', '.tsx'])]
empty_tests = [fa for fa in test_files if fa.lines_of_code == 0]
# Use existing method for detailed breakdown
test_analysis = self._analyze_testing_infrastructure(analysis)
return {
'backend_tests': len(backend_tests),
'frontend_tests': len(frontend_tests),
'empty_tests': len(empty_tests),
'overall_coverage': test_analysis['overall_coverage'],
'unit_tests': test_analysis.get('integration_tests', '0'),
'integration_tests': test_analysis['integration_tests'],
'e2e_tests': test_analysis['e2e_tests'],
'security_tests': test_analysis['security_tests'],
'performance_tests': test_analysis['performance_tests'],
'test_quality_score': test_analysis['test_quality_score'],
'critical_issues': test_analysis['critical_issues'],
'recommendations': test_analysis['recommendations']
}
def _analyze_frontend_monoliths(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""Analyze frontend monolith files in detail."""
frontend_files = [fa for fa in analysis.file_analyses if any(ext in str(fa.path).lower() for ext in ['.js', '.jsx', '.ts', '.tsx'])]
large_files = sorted(frontend_files, key=lambda x: x.lines_of_code, reverse=True)[:10]
largest_files = [{'name': fa.path.split('/')[-1], 'lines': fa.lines_of_code} for fa in large_files]
total_monolith_lines = sum(fa.lines_of_code for fa in large_files)
avg_monolith_size = sum(fa.lines_of_code for fa in large_files) / len(large_files) if large_files else 0
large_files_count = len([fa for fa in frontend_files if fa.lines_of_code > 300])
monolith_percentage = (total_monolith_lines / sum(fa.lines_of_code for fa in frontend_files) * 100) if frontend_files else 0
return {
'largest_files': largest_files,
'total_monolith_lines': total_monolith_lines,
'avg_monolith_size': avg_monolith_size,
'large_files_count': large_files_count,
'monolith_percentage': monolith_percentage
}
def _create_timeline_roadmap(self, analysis: RepositoryAnalysis, critical_count: int, high_priority_count: int) -> str:
"""Create detailed fix roadmap with timeline."""
roadmap = f"""
Phase 1: Emergency Response (Days 1-2) - {critical_count} Critical Files
• Fix {critical_count} critical files (severity score < 4)
• Estimated Time: {critical_count * 8} hours
• Team Required: 2-3 senior developers
• Priority: URGENT - System stability at risk
Phase 2: Foundation Stabilization (Weeks 1-2) - {high_priority_count} High Priority Files
• Refactor {high_priority_count} high-priority files (severity 4-6)
• Estimated Time: {high_priority_count * 16} hours
• Team Required: Full development team
• Priority: HIGH - Performance and maintainability
Phase 4: Enterprise Hardening (Months 3-6)
• Comprehensive testing suite (80%+ coverage)
• CI/CD pipeline optimization
• Monitoring and observability
• Security hardening
• Estimated Time: 400-800 hours
• Deliverables: Production-ready enterprise system
"""
return roadmap
def _analyze_expected_outcomes(self, analysis: RepositoryAnalysis, max_concurrent: int, memory_per_req: float, proc_time: float) -> Dict[str, Any]:
"""Analyze expected outcomes after redesign."""
return {
'business_benefits': [
'Support 500+ concurrent users without performance degradation',
'Reduce response times from 5-30s to <2s',
'Cut infrastructure costs by 70%+ through optimization',
'Improve development velocity by 40%+ with better architecture',
'Reduce bug density by 60%+ with comprehensive testing',
'Enable rapid feature development with scalable foundation'
],
'velocity_improvement': '40',
'cost_reduction': '70',
'maintenance_reduction': '60'
}
def _analyze_devops_infrastructure(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""Analyze DevOps and infrastructure setup."""
cicd_files = [fa for fa in analysis.file_analyses if any(indicator in str(fa.path).lower() for indicator in ['ci', 'jenkins', 'gitlab', 'github-actions', 'azure-pipelines', 'circleci'])]
docker_files = [fa for fa in analysis.file_analyses if 'dockerfile' in str(fa.path).lower()]
health_check_files = [fa for fa in analysis.file_analyses if 'health' in str(fa.path).lower()]
monitoring_files = [fa for fa in analysis.file_analyses if any(indicator in str(fa.path).lower() for indicator in ['monitor', 'prometheus', 'grafana', 'datadog'])]
security_files = [fa for fa in analysis.file_analyses if 'security' in str(fa.path).lower()]
deployment_files = [fa for fa in analysis.file_analyses if any(indicator in str(fa.path).lower() for indicator in ['deploy', 'k8s', 'kubernetes', 'helm'])]
recommendations = [
'Implement comprehensive CI/CD pipeline with automated testing',
'Add container orchestration (Docker/Kubernetes) if not present',
'Set up health check endpoints for monitoring',
'Configure APM tools for production monitoring',
'Implement infrastructure as code (IaC)',
'Set up automated security scanning in pipeline'
]
return {
'cicd_files': len(cicd_files),
'docker_files': len(docker_files),
'health_check_files': len(health_check_files),
'monitoring_files': len(monitoring_files),
'security_files': len(security_files),
'deployment_files': len(deployment_files),
'recommendations': recommendations
}
def _analyze_bulk_upload_sync(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""Synchronous wrapper for bulk upload analysis."""
upload_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['upload', 'import', 'bulk', 'excel'])]
upload_classes = len(upload_files)
total_properties = 0
for fa in upload_files:
content = getattr(fa, 'content', '') or ''
total_properties += content.count('public ') + content.count('private ') + content.count('protected ')
return {'upload_classes': upload_classes, 'total_properties': total_properties}
def _analyze_performance_per_layer_sync(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""Synchronous wrapper for performance per layer analysis."""
frontend_files = [fa for fa in analysis.file_analyses if any(ext in fa.path.lower() for ext in ['.js', '.jsx', '.ts', '.tsx'])]
total_frontend_lines = sum(fa.lines_of_code for fa in frontend_files)
bundle_size_mb = (total_frontend_lines * 0.5) / 1000
return {
'controller_overhead': '50-100ms',
'service_processing': '100-200ms',
'database_queries': '200-500ms',
'frontend_bundle': f'{bundle_size_mb:.1f}MB',
'total_frontend_lines': total_frontend_lines
}
def _analyze_repository_pattern(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""Analyze repository/data access pattern technology-aware."""
try:
# Detect technology stack
tech_stack = self._detect_technology_stack(analysis)
is_csharp = tech_stack['is_csharp']
is_nodejs = tech_stack['is_nodejs']
is_java = tech_stack['is_java']
is_python = tech_stack['is_python']
# Technology-specific repository detection
repo_files = []
factory_files = []
uow_files = []
pattern_name = "Data Access Layer"
for fa in analysis.file_analyses:
file_path = str(fa.path).lower()
content = getattr(fa, 'content', '') or ''
# C# specific patterns
if is_csharp:
if 'repository' in file_path or 'repository' in content.lower():
repo_files.append(fa)
if 'factory' in file_path or 'factory' in content.lower():
factory_files.append(fa)
if 'unitofwork' in file_path or 'unitofwork' in content.lower():
uow_files.append(fa)
pattern_name = "Repository + UnitOfWork Pattern (.NET)"
# Node.js patterns
elif is_nodejs:
if 'repository' in file_path or 'model' in file_path:
repo_files.append(fa)
if 'factory' in file_path:
factory_files.append(fa)
# Java patterns
elif is_java:
if 'repository' in file_path or '@repository' in content.lower():
repo_files.append(fa)
if 'factory' in file_path:
factory_files.append(fa)
pattern_name = "Repository + Factory Pattern (Spring)"
# Python patterns
elif is_python:
if 'repository' in file_path or 'dal' in file_path or 'dao' in file_path:
repo_files.append(fa)
if 'factory' in file_path:
factory_files.append(fa)
pattern_name = "Data Access Layer (Python)"
# Only analyze if repositories are found
if not repo_files:
return {
'has_repos': False,
'pattern': 'None detected',
'total_repositories': 0,
'repositories_per_request': 0,
'avg_repo_size': 0,
'factory_files': 0,
'uow_files': 0,
'sample_repositories': []
}
# Calculate metrics
total_repositories = len(repo_files)
avg_repo_size = sum(fa.lines_of_code for fa in repo_files) / len(repo_files) if repo_files else 0
# Estimate repositories per request
repositories_per_request = 0
if uow_files:
for fa in uow_files:
content = getattr(fa, 'content', '') or ''
# Count repository instantiations
repositories_per_request = max(repositories_per_request,
content.count('= new ') + content.count('new I') +
content.count('new ') + content.count('Create'))
# Default estimate if not calculated
if repositories_per_request == 0:
repositories_per_request = max(1, min(total_repositories, 5))
return {
'has_repos': True,
'pattern': pattern_name,
'total_repositories': total_repositories,
'repositories_per_request': repositories_per_request,
'avg_repo_size': avg_repo_size,
'factory_files': len(factory_files),
'uow_files': len(uow_files),
'sample_repositories': [fa.path for fa in repo_files[:5]]
}
except Exception as e:
print(f"⚠️ Repository pattern analysis failed: {e}")
return {
'has_repos': False,
'pattern': 'None detected',
'total_repositories': 0,
'repositories_per_request': 0,
'avg_repo_size': 0,
'factory_files': 0,
'uow_files': 0,
'sample_repositories': []
}
async def _analyze_nplusone_queries(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""AI-powered N+1 query analysis."""
try:
print("🔍 Analyzing N+1 query patterns...")
query_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['repository', 'service', 'controller', 'dal', 'dao'])]
if not query_files:
return {'nplusone_count': 0, 'examples': [], 'impact': 'Low'}
# Build code samples for AI analysis
code_samples = []
for fa in query_files[:5]:
content = getattr(fa, 'content', '') or ''
if len(content) > 5000:
content_lines = content.split('\n')
content = '\n'.join(content_lines[:200])
code_samples.append(f"""
File: {fa.path}
Lines: {fa.lines_of_code}
Code:
{content[:3000]}
""")
prompt = f"""You are a Database Performance Expert. Analyze this code for N+1 query patterns.
{chr(10).join(code_samples)}
For each file, identify:
1. **Specific N+1 query examples** (quote the exact code)
2. **Query count calculations** (show 1 + N×M pattern)
3. **Database load impact** (estimated query count per request)
4. **Before/After optimization** (complete optimized code)
Format with exact code examples showing:
- BEFORE: The N+1 pattern with query count math
- AFTER: Optimized version with reduced queries
Be specific with query counts and provide working optimized code."""
message = self.client.messages.create(
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
max_tokens=6000,
temperature=0.1,
messages=[{"role": "user", "content": prompt}]
)
ai_analysis = message.content[0].text.strip()
print("✅ N+1 query analysis complete")
return {
'nplusone_count': len(query_files),
'ai_analysis': ai_analysis,
'impact': 'High' if len(query_files) > 3 else 'Medium'
}
except Exception as e:
print(f"⚠️ N+1 query analysis failed: {e}")
return {'nplusone_count': 0, 'examples': [], 'impact': 'Low'}
def _analyze_controller_endpoints(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""Analyze API controller endpoints for explosion and dual patterns."""
try:
controller_files = [fa for fa in analysis.file_analyses if 'controller' in fa.path.lower() or 'api' in fa.path.lower()]
endpoint_counts = {}
largest_controller = None
largest_endpoint_count = 0
for fa in controller_files:
content = getattr(fa, 'content', '') or ''
if not content:
continue
# Count endpoints
endpoint_count = content.count('@HttpGet') + content.count('@HttpPost') + \
content.count('@HttpPut') + content.count('@HttpDelete') + \
content.count('@RequestMapping') + content.count('@GetMapping') + \
content.count('@PostMapping') + content.count('@PutMapping') + \
content.count('@DeleteMapping')
endpoint_counts[fa.path] = endpoint_count
if endpoint_count > largest_endpoint_count:
largest_endpoint_count = endpoint_count
largest_controller = fa
total_endpoints = sum(endpoint_counts.values())
avg_endpoints_per_controller = total_endpoints / len(controller_files) if controller_files else 0
# Check for dual controller patterns
dual_controllers = [fa.path for fa in controller_files if 'dual' in fa.path.lower() or 'double' in fa.path.lower()]
return {
'total_controllers': len(controller_files),
'total_endpoints': total_endpoints,
'avg_endpoints': avg_endpoints_per_controller,
'largest_controller': largest_controller.path if largest_controller else 'None',
'largest_endpoint_count': largest_endpoint_count,
'dual_controllers': len(dual_controllers),
'sample_endpoint_counts': {k: v for k, v in list(endpoint_counts.items())[:5]}
}
except Exception as e:
print(f"⚠️ Controller endpoints analysis failed: {e}")
return {
'total_controllers': 0,
'total_endpoints': 0,
'avg_endpoints': 0,
'largest_controller': 'None',
'largest_endpoint_count': 0,
'dual_controllers': 0,
'sample_endpoint_counts': {}
}
async def _analyze_bulk_upload_system(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""AI-powered analysis of bulk upload system issues."""
try:
print("🔍 Analyzing bulk upload system...")
upload_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['upload', 'import', 'bulk', 'excel'])]
if not upload_files:
return {'upload_classes': 0, 'total_properties': 0, 'issues': []}
code_samples = []
for fa in upload_files[:3]:
content = getattr(fa, 'content', '') or ''
if len(content) > 5000:
content_lines = content.split('\n')
content = '\n'.join(content_lines[:200])
code_samples.append(f"""
File: {fa.path}
Lines: {fa.lines_of_code}
Code:
{content[:3000]}
""")
prompt = f"""You are a System Architecture Expert. Analyze this bulk upload system.
{chr(10).join(code_samples)}
Identify:
1. **Upload class count** (how many upload classes)
2. **Total properties** across all upload classes
3. **Type safety problems** (string vs proper types)
4. **Excel template complexity**
5. **Upload failure root causes**
6. **Specific code examples** of problems
Provide detailed analysis with exact code examples showing the issues."""
message = self.client.messages.create(
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
max_tokens=6000,
temperature=0.1,
messages=[{"role": "user", "content": prompt}]
)
ai_analysis = message.content[0].text.strip()
print("✅ Bulk upload system analysis complete")
# Count upload classes and properties
upload_classes = len(upload_files)
total_properties = 0
for fa in upload_files:
content = getattr(fa, 'content', '') or ''
total_properties += content.count('public ') + content.count('private ') + content.count('protected ')
return {
'upload_classes': upload_classes,
'total_properties': total_properties,
'ai_analysis': ai_analysis,
'sample_files': [fa.path for fa in upload_files[:5]]
}
except Exception as e:
print(f"⚠️ Bulk upload analysis failed: {e}")
return {'upload_classes': 0, 'total_properties': 0, 'issues': []}
def _analyze_background_processing(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""Analyze background processing and threading issues."""
try:
thread_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['thread', 'background', 'scheduler', 'async', 'task'])]
email_files = [fa for fa in analysis.file_analyses if 'email' in fa.path.lower() or 'mail' in fa.path.lower()]
manual_thread_count = 0
threadpool_usage = False
for fa in thread_files:
content = getattr(fa, 'content', '') or ''
# Count manual thread creation
manual_thread_count += content.count('new Thread(') + content.count('Thread thread =')
# Check for thread pool usage
if any(pool in content for pool in ['ThreadPool', 'Task.Run', 'async Task', '@Async']):
threadpool_usage = True
# Check for email system
email_implementation = 'Basic' if email_files else 'None'
return {
'manual_thread_count': manual_thread_count,
'threadpool_usage': threadpool_usage,
'thread_files': len(thread_files),
'email_implementation': email_implementation,
'email_files': len(email_files),
'sample_files': [fa.path for fa in thread_files[:5]]
}
except Exception as e:
print(f"⚠️ Background processing analysis failed: {e}")
return {
'manual_thread_count': 0,
'threadpool_usage': False,
'thread_files': 0,
'email_implementation': 'None',
'email_files': 0,
'sample_files': []
}
async def _analyze_performance_per_layer(self, analysis: RepositoryAnalysis) -> Dict[str, Any]:
"""AI-powered performance analysis per layer."""
try:
print("🔍 Analyzing performance impact per layer...")
# Categorize files by layer
controller_files = [fa for fa in analysis.file_analyses if 'controller' in fa.path.lower()]
service_files = [fa for fa in analysis.file_analyses if 'service' in fa.path.lower()]
repository_files = [fa for fa in analysis.file_analyses if 'repository' in fa.path.lower()]
frontend_files = [fa for fa in analysis.file_analyses if any(ext in fa.path.lower() for ext in ['.js', '.jsx', '.ts', '.tsx'])]
# Build code samples from each layer
samples = []
if controller_files:
for fa in controller_files[:2]:
content = getattr(fa, 'content', '') or ''
if len(content) > 3000:
content = content[:3000]
samples.append(f"[Controller] {fa.path}\n{content}")
if service_files:
for fa in service_files[:2]:
content = getattr(fa, 'content', '') or ''
if len(content) > 3000:
content = content[:3000]
samples.append(f"[Service] {fa.path}\n{content}")
if repository_files:
for fa in repository_files[:2]:
content = getattr(fa, 'content', '') or ''
if len(content) > 3000:
content = content[:3000]
samples.append(f"[Repository] {fa.path}\n{content}")
if not samples:
return {'timings': {}, 'summary': 'No performance analysis possible'}
prompt = f"""You are a Performance Expert. Analyze this code for end-to-end request lifecycle timing.
{chr(10).join(samples[:10])}
For each layer, provide:
1. **Request lifecycle timing** breakdown
2. **Database operation timing**
3. **Service layer timing**
4. **Controller overhead timing**
5. **Frontend bundle size impact**
6. **Complete request time breakdown**
Provide specific timing estimates with calculations showing where time is spent in each layer."""
message = self.client.messages.create(
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
max_tokens=6000,
temperature=0.1,
messages=[{"role": "user", "content": prompt}]
)
ai_analysis = message.content[0].text.strip()
print("✅ Performance per layer analysis complete")
# Calculate bundle size estimate
total_frontend_lines = sum(fa.lines_of_code for fa in frontend_files)
bundle_size_mb = (total_frontend_lines * 0.5) / 1000
return {
'timings': {
'controller_overhead': '50-100ms',
'service_processing': '100-200ms',
'database_queries': '200-500ms',
'frontend_bundle': f'{bundle_size_mb:.1f}MB'
},
'ai_analysis': ai_analysis,
'total_frontend_lines': total_frontend_lines
}
except Exception as e:
print(f"⚠️ Performance per layer analysis failed: {e}")
return {'timings': {}, 'summary': 'Analysis failed'}
# ========== Formatting Utilities ==========
def _format_bulleted_html(self, text: str) -> str:
"""Normalize bullets/line breaks so each bullet shows on its own line in PDF.
Converts newlines before bullets to bullets and compacts paragraph breaks.
"""
if not text:
return text
t = text.strip()
# Paragraph breaks
t = re.sub(r"\n\n+", "
", t)
# Bullets using •, -, *
t = re.sub(r"\n\s*[•\-\*]\s*", " • ", t)
# Ensure there is a break after headings like :
t = re.sub(r"\s*", " ", t)
return t
def get_memory_config() -> Dict[str, Any]:
"""Get memory system configuration from environment variables."""
return {
'anthropic_api_key': os.getenv('ANTHROPIC_API_KEY', ''),
'redis_host': os.getenv('REDIS_HOST', 'localhost'),
'redis_port': int(os.getenv('REDIS_PORT', 6379)),
'redis_db': int(os.getenv('REDIS_DB', 0)),
'mongodb_url': os.getenv('MONGODB_URL', 'mongodb://localhost:27017/'),
'mongodb_name': os.getenv('MONGODB_DB', 'repo_analyzer'),
'postgres_host': os.getenv('POSTGRES_HOST', 'localhost'),
'postgres_port': int(os.getenv('POSTGRES_PORT', 5432)),
'postgres_db': os.getenv('POSTGRES_DB', 'repo_vectors'),
'postgres_user': os.getenv('POSTGRES_USER', 'postgres'),
'postgres_password': os.getenv('POSTGRES_PASSWORD', '')
}
async def main():
"""Main function to run the enhanced repository analyzer."""
load_dotenv()
import argparse
parser = argparse.ArgumentParser(description="Complete AI Repository Analysis - Analyzes ALL files automatically")
parser.add_argument("repo_path", help="Repository path (local directory or Git URL)")
parser.add_argument("--output", "-o", default="complete_repository_analysis.pdf",
help="Output PDF file path")
parser.add_argument("--api-key", help="Anthropic API key (overrides .env)")
args = parser.parse_args()
# Get API key
api_key = args.api_key or os.getenv('ANTHROPIC_API_KEY')
if not api_key:
print("❌ Error: ANTHROPIC_API_KEY not found in .env file or command line")
return 1
try:
print("🚀 Starting Complete AI Repository Analysis")
print("=" * 60)
print(f"Repository: {args.repo_path}")
print(f"Output: {args.output}")
print("Mode: Complete automated analysis of ALL files")
print("=" * 60)
# Initialize enhanced analyzer
config = get_memory_config()
analyzer = EnhancedGitHubAnalyzer(api_key, config)
# Perform complete analysis
analysis = await analyzer.analyze_repository_with_memory(args.repo_path)
# Generate PDF report
analyzer.create_pdf_report(analysis, args.output)
# Print summary to console
print("\n" + "=" * 60)
print("🎯 COMPLETE ANALYSIS FINISHED")
print("=" * 60)
print(f"📊 Repository Statistics:")
print(f" • Files Analyzed: {analysis.total_files}")
print(f" • Lines of Code: {analysis.total_lines:,}")
print(f" • Languages: {len(analysis.languages)}")
print(f" • Code Quality: {analysis.code_quality_score:.1f}/10")
# Quality breakdown
high_quality = len([fa for fa in analysis.file_analyses if fa.severity_score >= 8])
medium_quality = len([fa for fa in analysis.file_analyses if 5 <= fa.severity_score < 8])
low_quality = len([fa for fa in analysis.file_analyses if fa.severity_score < 5])
print(f"\n📈 Quality Breakdown:")
print(f" • High Quality Files (8-10): {high_quality}")
print(f" • Medium Quality Files (5-7): {medium_quality}")
print(f" • Low Quality Files (1-4): {low_quality}")
print(f" • Total Issues Found: {sum(len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0 for fa in analysis.file_analyses)}")
# Language breakdown
print(f"\n🔤 Language Distribution:")
for lang, count in sorted(analysis.languages.items(), key=lambda x: x[1], reverse=True)[:10]:
print(f" • {lang}: {count} files")
# Memory system stats
memory_stats = await analyzer.memory_manager.get_memory_stats()
print(f"\n🧠 Memory System Statistics:")
for category, data in memory_stats.items():
print(f" • {category.replace('_', ' ').title()}: {data}")
print(f"\n📄 Complete PDF Report: {args.output}")
print("\n✅ Complete analysis finished successfully!")
return 0
except Exception as e:
print(f"❌ Error during analysis: {e}")
import traceback
traceback.print_exc()
return 1
def _analyze_architecture_patterns(self, analysis: RepositoryAnalysis) -> dict:
"""Analyze actual architectural patterns from the codebase."""
# Detect project type based on file structure and patterns
project_type = "Unknown"
project_evidence = "No clear architectural pattern detected"
# Look for microservice indicators
microservice_indicators = 0
monolithic_indicators = 0
# Check for common microservice patterns
for file_analysis in analysis.file_analyses:
file_path = file_analysis.path.lower()
file_content = getattr(file_analysis, 'content', '') or ''
# Microservice indicators
if any(indicator in file_path for indicator in ['docker', 'kubernetes', 'helm', 'service-mesh']):
microservice_indicators += 1
if any(indicator in file_content for indicator in ['@EnableEurekaClient', '@EnableDiscoveryClient', 'consul', 'etcd']):
microservice_indicators += 1
if any(indicator in file_path for indicator in ['api-gateway', 'service-discovery', 'config-server']):
microservice_indicators += 1
# Monolithic indicators
if any(indicator in file_path for indicator in ['monolith', 'single-app', 'main-application']):
monolithic_indicators += 1
if any(indicator in file_content for indicator in ['@SpringBootApplication', 'main()', 'Application.run']):
monolithic_indicators += 1
if file_analysis.lines_of_code > 1000: # Large files suggest monolith
monolithic_indicators += 1
# Determine project type
if microservice_indicators > monolithic_indicators:
project_type = "Microservices Architecture"
project_evidence = f"Found {microservice_indicators} microservice indicators (Docker, service discovery, API gateways)"
elif monolithic_indicators > 0:
project_type = "Monolithic Architecture"
project_evidence = f"Found {monolithic_indicators} monolithic indicators (large files, single application structure)"
else:
project_type = "Modular Monolith"
project_evidence = "Mixed patterns detected - likely a modular monolith transitioning to microservices"
# Find code examples for detailed analysis
code_examples = []
for file_analysis in analysis.file_analyses:
if file_analysis.lines_of_code > 500: # Focus on large files
code_examples.append({
'title': f"Large File Analysis: {file_analysis.path.split('/')[-1]}",
'file': file_analysis.path,
'lines': file_analysis.lines_of_code,
'issue': f"File exceeds recommended size ({file_analysis.lines_of_code} lines)",
'code_snippet': self._extract_code_snippet(file_analysis)
})
return {
'project_type': project_type,
'project_evidence': project_evidence,
'code_examples': code_examples[:5] # Top 5 examples
}
def _analyze_controller_layer(self, analysis: RepositoryAnalysis) -> dict:
"""Analyze API controller layer patterns."""
controller_files = []
total_endpoints = 0
security_issues = []
for file_analysis in analysis.file_analyses:
file_path = file_analysis.path.lower()
file_content = getattr(file_analysis, 'content', '') or ''
# Detect controller files
if any(indicator in file_path for indicator in ['controller', 'api', 'endpoint', 'route']):
controller_files.append(file_analysis)
# Count endpoints (rough estimate)
endpoint_count = file_content.count('@RequestMapping') + file_content.count('@GetMapping') + \
file_content.count('@PostMapping') + file_content.count('@PutMapping') + \
file_content.count('@DeleteMapping') + file_content.count('@RestController')
total_endpoints += endpoint_count
# Check for security issues
if 'password' in file_content.lower() and 'hardcoded' in file_content.lower():
security_issues.append("Hardcoded passwords detected")
if '@CrossOrigin(origins = "*")' in file_content:
security_issues.append("Wildcard CORS policy detected")
if 'migration' in file_path and 'public' in file_content:
security_issues.append("Public migration endpoint detected")
largest_controller = max(controller_files, key=lambda x: x.lines_of_code) if controller_files else None
return {
'controller_count': len(controller_files),
'total_endpoints': total_endpoints,
'largest_controller': f"{largest_controller.path} ({largest_controller.lines_of_code} lines)" if largest_controller else "None",
'security_issues': "; ".join(security_issues) if security_issues else "No major security issues detected"
}
def _analyze_backend_patterns(self, analysis: RepositoryAnalysis) -> dict:
"""Analyze backend architectural patterns."""
# Data layer analysis
data_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['entity', 'model', 'dbcontext', 'migration', 'config'])]
data_pattern = "Entity Framework" if any('dbcontext' in fa.path.lower() for fa in data_files) else "Custom ORM"
config_files = len([fa for fa in data_files if 'config' in fa.path.lower()])
config_lines = sum(fa.lines_of_code for fa in data_files if 'config' in fa.path.lower())
# Service layer analysis
service_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['service', 'business', 'logic', 'manager'])]
service_pattern = "Service Layer Pattern" if service_files else "No clear service layer"
largest_service = max(service_files, key=lambda x: x.lines_of_code) if service_files else None
# Repository layer analysis
repo_files = [fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['repository', 'dao', 'dataaccess'])]
repo_pattern = "Repository Pattern" if repo_files else "Direct Data Access"
factory_usage = any('factory' in fa.path.lower() for fa in repo_files)
return {
'data_layer': {
'pattern': data_pattern,
'config_files': config_files,
'config_lines': config_lines,
'issues': f"{len(data_files)} data files, {config_lines} configuration lines"
},
'service_layer': {
'pattern': service_pattern,
'service_files': len(service_files),
'largest_service': f"{largest_service.path} ({largest_service.lines_of_code} lines)" if largest_service else "None",
'issues': f"{len(service_files)} service files found"
},
'repository_layer': {
'pattern': repo_pattern,
'repository_files': len(repo_files),
'factory_usage': "Factory pattern detected" if factory_usage else "No factory pattern",
'issues': f"{len(repo_files)} repository files found"
}
}
def _extract_code_snippet(self, file_analysis) -> str:
"""Extract a code snippet from file analysis."""
content = getattr(file_analysis, 'content', '') or ''
if not content:
return "// Code content not available"
# Extract first 20 lines as snippet
lines = content.split('\n')[:20]
snippet = '\n'.join(lines)
# Truncate if too long
if len(snippet) > 500:
snippet = snippet[:500] + "\n// ... (truncated)"
return snippet
async def _analyze_frontend_architecture_ai(self, analysis: RepositoryAnalysis) -> dict:
"""AI-based comprehensive frontend architecture analysis using Claude API."""
# Identify frontend files - ENHANCED DETECTION
frontend_files = []
frontend_extensions = [
# JavaScript/TypeScript files
'.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs',
# Vue/Svelte frameworks
'.vue', '.svelte',
# HTML files
'.html', '.htm', '.xhtml',
# CSS and styling files
'.css', '.scss', '.sass', '.less', '.styl', '.stylus',
# Frontend configuration files
'.json', # package.json, tsconfig.json, etc.
]
# Frontend-related directories
frontend_dirs = [
'frontend', 'src/app', 'src/components', 'src/pages', 'src/views',
'components', 'pages', 'views', 'app', 'public', 'static',
'assets', 'styles', 'stylesheets', 'css', 'html',
'www', 'web', 'client', 'ui', 'interface'
]
# Frontend-related file patterns
frontend_patterns = [
'index.html', 'index.htm', 'app.html', 'main.html',
'style.css', 'main.css', 'app.css', 'styles.css',
'package.json', 'package-lock.json', 'yarn.lock',
'tsconfig.json', 'jsconfig.json', 'babel.config',
'webpack.config', 'vite.config', 'rollup.config',
'tailwind.config', 'postcss.config'
]
for file_analysis in analysis.file_analyses:
file_path = file_analysis.path.lower()
file_name = file_path.split('/')[-1]
# Check 1: File extension
is_frontend_ext = any(file_path.endswith(ext) for ext in frontend_extensions)
# Check 2: Frontend directories
is_in_frontend_dir = any(
f"/{dir}/" in file_path or
file_path.startswith(f"{dir}/") or
file_path == dir
for dir in frontend_dirs
)
# Check 3: Frontend file patterns
is_frontend_pattern = any(
pattern in file_name or pattern in file_path
for pattern in frontend_patterns
)
# Check 4: JSON files in root (likely package.json, config files)
if file_path.endswith('.json') and '/' not in file_path.replace('\\', '/'):
is_frontend_ext = True
# Check 5: HTML files anywhere (they are definitely frontend)
if file_path.endswith(('.html', '.htm', '.xhtml')):
is_frontend_ext = True
if is_frontend_ext or is_in_frontend_dir or is_frontend_pattern:
frontend_files.append(file_analysis)
# Debug logging
print(f"🔍 [FRONTEND AI] Found {len(frontend_files)} frontend files after initial detection")
if frontend_files:
print(f"🔍 [FRONTEND AI] Frontend files detected:")
for fa in frontend_files[:10]:
print(f" - {fa.path} ({fa.lines_of_code} lines)")
# ENSURE: Even if no frontend files detected by extension, check for HTML/CSS explicitly
if not frontend_files:
print(f"⚠️ [FRONTEND AI] No frontend files in initial detection, doing explicit HTML/CSS check...")
# Double-check for HTML and CSS files that might have been missed
for file_analysis in analysis.file_analyses:
file_path = file_analysis.path.lower()
# Check for HTML files
if file_path.endswith(('.html', '.htm', '.xhtml')):
if file_analysis not in frontend_files:
frontend_files.append(file_analysis)
print(f"🔍 [FRONTEND AI] Added HTML file: {file_analysis.path}")
# Check for CSS files
elif file_path.endswith(('.css', '.scss', '.sass', '.less', '.styl')):
if file_analysis not in frontend_files:
frontend_files.append(file_analysis)
print(f"🔍 [FRONTEND AI] Added CSS file: {file_analysis.path}")
# Check for JavaScript files
elif file_path.endswith(('.js', '.jsx', '.mjs', '.cjs')):
if file_analysis not in frontend_files:
frontend_files.append(file_analysis)
print(f"🔍 [FRONTEND AI] Added JavaScript file: {file_analysis.path}")
# Final check - if still no frontend files, log all files for debugging
if not frontend_files:
print("⚠️ [FRONTEND AI] No frontend files detected after all checks")
print(f"🔍 [FRONTEND AI] Sample files in analysis:")
for fa in analysis.file_analyses[:20]:
print(f" - {fa.path} (extension: {fa.path.split('.')[-1] if '.' in fa.path else 'none'})")
return {
'has_frontend': False,
'ai_analysis': None,
'frontend_file_count': 0
}
print(f"✅ [FRONTEND AI] Final count: {len(frontend_files)} frontend files detected")
# Prepare frontend files content for AI analysis
frontend_files_content = []
config_files_content = []
component_files = []
routing_files = []
state_files = []
newline = chr(10) # Define newline once to avoid backslash issues in f-strings
for file_analysis in frontend_files:
file_path = file_analysis.path.lower()
# Safely get file content, handle None or empty
file_content = getattr(file_analysis, 'content', '') or ''
if file_content is None:
file_content = ''
# Skip files with no content (unless they're config files which might be important)
if not file_content.strip() and 'package.json' not in file_path and 'config' not in file_path:
continue
# Collect config files
if any(config in file_path for config in ['package.json', 'package-lock.json', 'tsconfig.json',
'jsconfig.json', 'vite.config', 'next.config',
'angular.json', 'nuxt.config', 'svelte.config',
'webpack.config', 'rollup.config', 'tailwind.config']):
config_files_content.append(f"=== {file_analysis.path} ==={newline}{file_content[:5000]}{newline}")
# Collect component files
if any(ext in file_path for ext in ['.jsx', '.tsx', '.vue', '.svelte']):
component_files.append({
'path': file_analysis.path,
'content': file_content[:3000] if file_content else '', # Limit content size
'lines': file_analysis.lines_of_code
})
# Collect routing files
if any(route_indicator in file_path for route_indicator in ['route', 'router', 'navigation', 'app.js', 'app.tsx', '_app', 'pages']):
routing_files.append({
'path': file_analysis.path,
'content': file_content[:3000] if file_content else '',
'lines': file_analysis.lines_of_code
})
# Collect state management files
if any(state_indicator in file_path for state_indicator in ['store', 'context', 'state', 'redux', 'zustand', 'recoil', 'mobx', 'pinia', 'vuex']):
state_files.append({
'path': file_analysis.path,
'content': file_content[:3000] if file_content else '',
'lines': file_analysis.lines_of_code
})
# Collect all frontend files (limited)
if len(frontend_files_content) < 20: # Limit to 20 files for analysis
frontend_files_content.append(f"=== {file_analysis.path} ({file_analysis.lines_of_code} lines) ==={newline}{file_content[:2000] if file_content else '[No content]'}{newline}")
# Prepare comprehensive AI prompt for frontend analysis - ENHANCED FOR NON-TECHNICAL AUDIENCE
# Build strings outside f-string to avoid backslash issues
config_files_text = newline.join(config_files_content[:5]) if config_files_content else "No configuration files found"
component_files_list = []
for cf in component_files[:10]:
component_files_list.append(f"=== {cf['path']} ({cf['lines']} lines) ==={newline}{cf['content']}{newline}")
component_files_text = newline.join(component_files_list) if component_files else "No component files found"
routing_files_list = []
for rf in routing_files[:5]:
routing_files_list.append(f"=== {rf['path']} ({rf['lines']} lines) ==={newline}{rf['content']}{newline}")
routing_files_text = newline.join(routing_files_list) if routing_files else "No routing files found"
state_files_list = []
for sf in state_files[:5]:
state_files_list.append(f"=== {sf['path']} ({sf['lines']} lines) ==={newline}{sf['content']}{newline}")
state_files_text = newline.join(state_files_list) if state_files else "No state management files found"
frontend_files_text = newline.join(frontend_files_content[:15]) if frontend_files_content else "No frontend files with content found"
# Get file type breakdown
html_files = [fa for fa in frontend_files if fa.path.lower().endswith(('.html', '.htm'))]
css_files = [fa for fa in frontend_files if fa.path.lower().endswith(('.css', '.scss', '.sass', '.less'))]
js_files = [fa for fa in frontend_files if fa.path.lower().endswith(('.js', '.jsx', '.mjs', '.cjs'))]
ts_files = [fa for fa in frontend_files if fa.path.lower().endswith(('.ts', '.tsx'))]
front_end_prompt = f"""
You are a Senior Frontend Architect and Technical Writer with 20+ years of experience. Analyze this frontend codebase and produce a comprehensive, technically precise report. The audience includes senior engineers and stakeholders who expect evidence-based, objective findings.
STRICT STYLE RULES:
- Use professional, technical language only. Do not use analogies, metaphors, storytelling, or colloquial comparisons.
- Ground every statement in repository evidence. Reference file paths, components, functions, metrics, or configurations explicitly.
- When assumptions are necessary, label them clearly and mark for validation.
- Follow the outline exactly; preserve headings, numbering, and bullet structure.
- Provide 2000-3000 words of detailed insight.
FRONTEND FILES SUMMARY:
- Total Frontend Files: {len(frontend_files)}
- Total Frontend Lines: {sum(fa.lines_of_code for fa in frontend_files):,}
- HTML Files: {len(html_files)}
- CSS/Styling Files: {len(css_files)}
- JavaScript Files: {len(js_files)}
- TypeScript Files: {len(ts_files)}
- Component Files: {len(component_files)}
- Routing Files: {len(routing_files)}
- State Management Files: {len(state_files)}
CONFIGURATION FILES:
{config_files_text}
COMPONENT FILES:
{component_files_text}
ROUTING FILES:
{routing_files_text}
STATE MANAGEMENT FILES:
{state_files_text}
SAMPLE FRONTEND FILES:
{frontend_files_text}
Produce the analysis using this structure:
**1. FRONTEND FRAMEWORK DETECTION:**
- Identify framework(s) and versions (from package.json, lockfiles, config files).
- Note meta-framework usage (Next.js, Nuxt, Remix, etc.).
- Highlight version-specific issues or EOL dependencies.
**2. TECHNOLOGY STACK ANALYSIS:**
- Catalogue all dependencies with purpose and usage context.
- Flag outdated/vulnerable packages (cite version/date where possible).
- Identify redundant or overlapping libraries.
- Summarize build tooling, testing frameworks, bundlers.
**3. COMPONENT ARCHITECTURE (GRANULAR ANALYSIS):**
For each major component:
- Name, responsibility, and file path.
- Props/inputs, outputs, and state managed.
- Dependencies and side effects (API calls, browser APIs, storage).
- Hierarchy position (parent/child relationships) and reusability assessment.
**4. NAVIGATION & ROUTING ANALYSIS:**
- Routing mechanism and configuration files.
- Route definitions, dynamic parameters, guards/middleware.
- Navigation flow between major views.
- Lazy loading/split points tied to routing.
**5. STATE MANAGEMENT ANALYSIS:**
- State management paradigm (Context, Redux, Zustand, etc.).
- Global vs local state boundaries.
- Data flow patterns, caching strategies, synchronization points.
- Known issues or improvement opportunities.
**6. FRONTEND ARCHITECTURE FLOW (TECHNICAL WALKTHROUGH):**
6.1. WHAT IS THE FRONTEND?
- Define the UI layer in this repository.
- Enumerate file types and roles (HTML templating, styling, interactivity).
6.2. HOW DOES THE FRONTEND WORK? (STEP-BY-STEP FLOW)
- User Action Flow: trace representative interactions through handlers, state updates, and rendering.
- Component Interaction: describe data exchange, context usage, dependencies.
- Data Flow: document sources (APIs, constants), transformation, caching, rendering.
- Navigation Flow: explain route resolution, dynamic segments, transition handling.
- State Updates: detail triggers, asynchronous coordination, batching/debouncing behavior.
6.3. VISUAL MAPPING (TEXTUAL DESCRIPTION)
- Provide a textual map of main views/pages and relationships.
- Describe component hierarchy like a dependency graph (parent/child/data links).
6.4. TECHNICAL SUMMARY
- Summarize strengths, weaknesses, and priorities in concise technical terms only.
**7. PERFORMANCE ANALYSIS:**
- Bundle size estimates (computed or inferred).
- Code splitting/lazy loading evaluation.
- Runtime performance considerations (render hotspots, expensive operations).
- Optimization opportunities with expected impact.
**8. UI/UX REVIEW (STRUCTURED OBSERVATIONS):**
- Layout architecture, design system usage, spacing/typography consistency.
- Accessibility findings (ARIA usage, keyboard navigation, color contrast).
- Responsiveness across breakpoints/devices.
- Usability issues grounded in code evidence.
**9. RISK & MATURITY ASSESSMENT:**
- Architecture risk score (1-10) with technical justification.
- Maintainability risk score (comment on complexity, coupling).
- Business risk commentary (security exposure, performance degradation, compliance concerns).
- Identify technical debt hotspots with file/component references.
**10. PRIORITIZED RECOMMENDATIONS:**
- List improvements grouped into short-term (next sprint), medium-term (1-3 months), long-term (3-6 months).
- For each recommendation provide action, expected outcome, and risk mitigated.
FINAL REQUIREMENTS:
- Keep tone technical and objective; no analogies, metaphors, or storytelling.
- Use markdown formatting as requested (bold headings, bullet lists).
- Cite file paths, components, and metrics wherever possible.
- Ensure total length between 2000-3000 words.
"""
try:
print(f"🤖 [FRONTEND AI] Calling Claude API for comprehensive frontend analysis...")
print(f"🤖 [FRONTEND AI] Analyzing {len(frontend_files)} frontend files...")
# Call Claude API for comprehensive frontend analysis - INCREASED TOKENS for detailed analysis
message = self.client.messages.create(
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
max_tokens=8000, # Increased from 6000 to 8000 for more detailed analysis
temperature=0.1,
messages=[{"role": "user", "content": front_end_prompt}]
)
ai_analysis = message.content[0].text.strip()
print(f"✅ [FRONTEND AI] AI analysis completed successfully ({len(ai_analysis)} characters)")
# Ensure analysis is not empty
if not ai_analysis or len(ai_analysis) < 100:
print("⚠️ [FRONTEND AI] AI analysis too short, regenerating...")
# Retry with more emphasis on detail
retry_prompt = front_end_prompt + "\n\nIMPORTANT: Provide a VERY DETAILED analysis. The previous response was too short. Please provide at least 2000 words of detailed explanation."
message = self.client.messages.create(
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
max_tokens=8000,
temperature=0.1,
messages=[{"role": "user", "content": retry_prompt}]
)
ai_analysis = message.content[0].text.strip()
# Extract statistics for backward compatibility
largest_frontend_file = max(frontend_files, key=lambda x: x.lines_of_code) if frontend_files else None
largest_files = sorted(frontend_files, key=lambda x: x.lines_of_code, reverse=True)[:5]
largest_files_info = [{'name': fa.path.split('/')[-1], 'lines': fa.lines_of_code} for fa in largest_files]
test_files = [fa for fa in frontend_files if any(indicator in fa.path.lower() for indicator in ['test', 'spec', '__tests__'])]
empty_test_files = len([fa for fa in test_files if fa.lines_of_code == 0])
total_frontend_lines = sum(fa.lines_of_code for fa in frontend_files)
return {
'has_frontend': True,
'ai_analysis': ai_analysis,
'frontend_file_count': len(frontend_files),
'total_frontend_lines': total_frontend_lines,
'component_count': len(component_files),
'routing_files_count': len(routing_files),
'state_files_count': len(state_files),
'largest_files': largest_files_info,
'test_file_count': len(test_files),
'empty_test_files': empty_test_files,
'bundle_size_estimate': f"{(total_frontend_lines * 0.5) / 1000:.1f} MB"
}
except Exception as e:
print(f"❌ Error in AI frontend analysis: {e}")
import traceback
traceback.print_exc()
# CRITICAL: If frontend files exist, we MUST generate analysis - retry with simpler prompt
print(f"🔄 [FRONTEND AI] Retrying with simplified prompt...")
try:
# Create a simpler, more focused prompt that's more likely to succeed
simple_prompt = f"""
You are a senior frontend engineer. The previous request failed; provide a concise but technically accurate overview of this frontend codebase.
STYLE:
- Use professional technical language only. Do not use analogies, metaphors, or storytelling.
- Reference specific file paths, components, and observable behavior from the repository.
- Mark any assumptions clearly as assumptions.
FRONTEND FILES DETECTED:
- Total Frontend Files: {len(frontend_files)}
- HTML Files: {len(html_files)}
- CSS Files: {len(css_files)}
- JavaScript Files: {len(js_files)}
SAMPLE FRONTEND FILES:
{frontend_files_text[:5000]}
Deliver a structured explanation covering:
1. File types present and the responsibilities they implement.
2. Step-by-step technical description of rendering, event handling, and data flow.
3. Component/module organization and how information moves between them.
4. Routing or navigation behavior, if applicable.
5. Key risks, limitations, or improvement areas supported by code evidence.
Write at least 1000 words and adhere strictly to the technical style.
"""
retry_message = self.client.messages.create(
model=os.getenv("CLAUDE_MODEL", "claude-3-5-haiku-latest"),
max_tokens=8000,
temperature=0.1,
messages=[{"role": "user", "content": simple_prompt}]
)
ai_analysis = retry_message.content[0].text.strip()
print(f"✅ [FRONTEND AI] Retry successful ({len(ai_analysis)} characters)")
except Exception as retry_error:
print(f"❌ [FRONTEND AI] Retry also failed: {retry_error}")
# Last resort: Generate a basic but informative analysis
largest_files = sorted(frontend_files, key=lambda x: x.lines_of_code, reverse=True)[:5] if frontend_files else []
total_frontend_lines = sum(fa.lines_of_code for fa in frontend_files)
# Generate a basic analysis even without AI
basic_analysis = f"""
**FRONTEND ARCHITECTURE ANALYSIS**
**Overview**
This repository contains {len(frontend_files)} frontend files with a total of {total_frontend_lines:,} lines of code. The following summary is generated without AI assistance.
**Frontend File Types Detected**
- HTML Files: {len(html_files)} files - define the DOM structure and static content for each view.
- CSS Files: {len(css_files)} files - implement styling rules, layout definitions, and responsive behavior.
- JavaScript Files: {len(js_files)} files - provide interaction handlers, client-side logic, and integration with backend services.
- TypeScript Files: {len(ts_files)} files - supply strongly typed client logic compiled to JavaScript.
**Execution Flow**
1. HTML documents load in the browser and reference the required CSS and JavaScript bundles.
2. CSS stylesheets apply layout, typography, and responsive rules to the rendered DOM.
3. JavaScript or TypeScript modules register event listeners, hydrate components, and coordinate data retrieval.
4. State updates trigger DOM mutations through the frontend framework, refreshing the visible UI.
**Structure Summary**
- Frontend source files are grouped by feature or component to deliver the user interface.
- Styling assets are maintained separately to control presentation concerns.
- Interactive behavior is encapsulated within JavaScript or TypeScript modules.
**Note**
Detailed AI analysis was unavailable; this summary is derived from static inspection of the repository contents.
"""
ai_analysis = basic_analysis
print(f"⚠️ [FRONTEND AI] Using basic analysis fallback")
# Return with AI analysis (even if it's basic)
largest_files = sorted(frontend_files, key=lambda x: x.lines_of_code, reverse=True)[:5] if frontend_files else []
largest_files_info = [{'name': fa.path.split('/')[-1], 'lines': fa.lines_of_code} for fa in largest_files]
total_frontend_lines = sum(fa.lines_of_code for fa in frontend_files)
test_files = [fa for fa in frontend_files if any(indicator in fa.path.lower() for indicator in ['test', 'spec', '__tests__'])]
return {
'has_frontend': True,
'ai_analysis': ai_analysis, # Always return analysis, even if basic
'frontend_file_count': len(frontend_files),
'total_frontend_lines': total_frontend_lines,
'component_count': len(component_files),
'routing_files_count': len(routing_files),
'state_files_count': len(state_files),
'largest_files': largest_files_info,
'test_file_count': len(test_files),
'empty_test_files': len([fa for fa in test_files if fa.lines_of_code == 0]),
'bundle_size_estimate': f"{(total_frontend_lines * 0.5) / 1000:.1f} MB"
}
def _analyze_frontend_architecture(self, analysis: RepositoryAnalysis) -> dict:
"""Synchronous wrapper for AI-based frontend architecture analysis."""
print(f"🔍 [FRONTEND WRAPPER] Starting frontend architecture analysis...")
print(f"🔍 [FRONTEND WRAPPER] Total files in analysis: {len(analysis.file_analyses)}")
# Run async AI analysis in sync context
try:
# Try to get existing event loop
try:
loop = asyncio.get_event_loop()
if loop.is_running():
print(f"🔍 [FRONTEND WRAPPER] Event loop is running, using thread approach...")
# If loop is already running, we need to use a different approach
# Create a new event loop in a separate thread
import concurrent.futures
import threading
result = None
exception = None
def run_in_thread():
nonlocal result, exception
try:
new_loop = asyncio.new_event_loop()
asyncio.set_event_loop(new_loop)
result = new_loop.run_until_complete(self._analyze_frontend_architecture_ai(analysis))
new_loop.close()
except Exception as e:
exception = e
thread = threading.Thread(target=run_in_thread)
thread.start()
thread.join(timeout=120) # 2 minute timeout
if not thread.is_alive():
if exception:
raise exception
if result:
print(f"✅ [FRONTEND WRAPPER] Analysis completed successfully")
return result
else:
print(f"⚠️ [FRONTEND WRAPPER] Thread timeout, using fallback detection")
raise TimeoutError("Frontend analysis timed out")
else:
print(f"🔍 [FRONTEND WRAPPER] Event loop exists but not running, using run_until_complete...")
result = loop.run_until_complete(self._analyze_frontend_architecture_ai(analysis))
print(f"✅ [FRONTEND WRAPPER] Analysis completed successfully")
return result
except RuntimeError:
print(f"🔍 [FRONTEND WRAPPER] No event loop exists, creating new one...")
result = asyncio.run(self._analyze_frontend_architecture_ai(analysis))
print(f"✅ [FRONTEND WRAPPER] Analysis completed successfully")
return result
except Exception as e:
print(f"❌ Error in frontend analysis wrapper: {e}")
import traceback
traceback.print_exc()
# CRITICAL: Even if wrapper fails, try to detect frontend files directly
print(f"🔍 [FRONTEND WRAPPER] Wrapper failed, doing direct frontend file detection...")
frontend_files_detected = []
frontend_extensions = ['.js', '.jsx', '.ts', '.tsx', '.vue', '.svelte', '.html', '.htm', '.xhtml', '.css', '.scss', '.sass', '.less', '.styl']
# Check all files in analysis
for file_analysis in analysis.file_analyses:
file_path = file_analysis.path.lower()
# Check extension
if any(file_path.endswith(ext) for ext in frontend_extensions):
if file_analysis not in frontend_files_detected:
frontend_files_detected.append(file_analysis)
print(f"🔍 [FRONTEND WRAPPER] Detected frontend file: {file_analysis.path}")
if frontend_files_detected:
print(f"✅ [FRONTEND WRAPPER] Detected {len(frontend_files_detected)} frontend files despite wrapper error")
total_frontend_lines = sum(fa.lines_of_code for fa in frontend_files_detected)
# Categorize files
html_files = [fa for fa in frontend_files_detected if fa.path.lower().endswith(('.html', '.htm'))]
css_files = [fa for fa in frontend_files_detected if fa.path.lower().endswith(('.css', '.scss', '.sass', '.less'))]
js_files = [fa for fa in frontend_files_detected if fa.path.lower().endswith(('.js', '.jsx', '.mjs', '.cjs'))]
ts_files = [fa for fa in frontend_files_detected if fa.path.lower().endswith(('.ts', '.tsx'))]
# Generate comprehensive basic analysis (technical fallback)
basic_analysis = f"""
**1. FRONTEND OVERVIEW**
Detected {len(frontend_files_detected)} frontend-related files totaling {total_frontend_lines:,} lines of client-side code.
**2. FILE TYPE SUMMARY**
- HTML Files ({len(html_files)}): define document structure, semantic layout, and script mount points.
- CSS Files ({len(css_files)}): manage layout, typography, spacing, and responsive behaviour.
- JavaScript Files ({len(js_files)}): provide interaction handlers, API connectivity, and state management.
- TypeScript Files ({len(ts_files)}): supply typed sources that compile to JavaScript for runtime execution.
**3. RUNTIME EXECUTION FLOW**
1. Browsers load HTML documents and create the DOM tree.
2. CSS stylesheets apply presentation rules across breakpoints.
3. JavaScript or TypeScript modules initialise, attach event listeners, and hydrate UI components.
4. State changes trigger DOM updates via framework abstractions or direct DOM APIs.
**4. USER INTERACTION PIPELINE**
- Button callbacks execute JavaScript routines to drive navigation, trigger side effects, or fetch data.
- Form workflows validate input locally, issue HTTP requests, and update the interface with success or error states.
**5. DATA FLOW**
1. User actions invoke JavaScript functions that compose network requests.
2. Backend endpoints respond with structured payloads (typically JSON or HTML).
3. The frontend updates local state and rerenders affected DOM nodes.
4. CSS keeps presentation consistent after dynamic updates.
**6. STRUCTURE AND ORGANISATION**
- Markup, styling, and behavioural concerns are separated for maintainability.
- Script modules coordinate data loading, state transitions, and rendering logic.
- Styling assets encapsulate layout concerns independently of business logic.
**7. SUMMARY**
This fallback summary is produced from static inspection because AI synthesis was unavailable during the run. Re-run the automated analysis when the AI service is reachable for richer insight.
"""
return {
'has_frontend': True,
'ai_analysis': basic_analysis,
'frontend_file_count': len(frontend_files_detected),
'total_frontend_lines': total_frontend_lines,
'component_count': 0,
'routing_files_count': 0,
'state_files_count': 0,
'largest_files': [{'name': fa.path.split('/')[-1], 'lines': fa.lines_of_code} for fa in sorted(frontend_files_detected, key=lambda x: x.lines_of_code, reverse=True)[:5]],
'test_file_count': 0,
'empty_test_files': 0,
'bundle_size_estimate': f"{(total_frontend_lines * 0.5) / 1000:.1f} MB",
'error': str(e)
}
else:
# No frontend files found - but log for debugging
print(f"⚠️ [FRONTEND WRAPPER] No frontend files detected in fallback")
print(f"🔍 [FRONTEND WRAPPER] Checking all files in analysis:")
for fa in analysis.file_analyses[:20]:
print(f" - {fa.path} (extension: {fa.path.split('.')[-1] if '.' in fa.path else 'none'})")
return {
'has_frontend': False,
'ai_analysis': None,
'frontend_file_count': 0,
'error': str(e)
}
def _analyze_testing_infrastructure(self, analysis: RepositoryAnalysis) -> dict:
"""Analyze testing infrastructure across the entire codebase."""
# Separate backend and frontend files
backend_files = []
frontend_files = []
for file_analysis in analysis.file_analyses:
file_path = file_analysis.path.lower()
if any(indicator in file_path for indicator in ['js', 'jsx', 'ts', 'tsx', 'vue', 'html', 'css', 'scss', 'sass']):
frontend_files.append(file_analysis)
else:
backend_files.append(file_analysis)
# Backend Testing Analysis
backend_test_files = [fa for fa in backend_files if any(indicator in fa.path.lower() for indicator in ['test', 'spec', '__tests__', 'testing'])]
backend_test_count = len(backend_test_files)
backend_file_count = len(backend_files)
backend_coverage = (backend_test_count / backend_file_count * 100) if backend_file_count > 0 else 0
# Frontend Testing Analysis
frontend_test_files = [fa for fa in frontend_files if any(indicator in fa.path.lower() for indicator in ['test', 'spec', '__tests__', 'testing'])]
frontend_test_count = len(frontend_test_files)
frontend_file_count = len(frontend_files)
frontend_coverage = (frontend_test_count / frontend_file_count * 100) if frontend_file_count > 0 else 0
# Integration Testing Analysis
integration_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['integration', 'e2e', 'end-to-end', 'api-test'])])
api_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['api-test', 'api_test', 'apitest'])])
database_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['db-test', 'database-test', 'db_test'])])
e2e_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['e2e', 'end-to-end', 'cypress', 'playwright'])])
# Security Testing Analysis
security_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['security-test', 'security_test', 'penetration', 'vulnerability'])])
vulnerability_scans = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['vulnerability', 'security-scan', 'owasp'])])
penetration_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['penetration', 'pentest', 'security-pen'])])
auth_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['auth-test', 'authentication-test', 'login-test'])])
# Performance Testing Analysis
performance_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['performance-test', 'perf-test', 'load-test', 'stress-test'])])
load_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['load-test', 'loadtest', 'jmeter', 'artillery'])])
stress_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['stress-test', 'stresstest', 'chaos-test'])])
benchmark_tests = len([fa for fa in analysis.file_analyses if any(indicator in fa.path.lower() for indicator in ['benchmark', 'bench', 'performance-bench'])])
# Test Quality Assessment
overall_coverage = (backend_coverage + frontend_coverage) / 2
test_quality_score = min(100, overall_coverage * 2) # Scale up the score
# Critical Issues
critical_issues = []
if backend_coverage < 10:
critical_issues.append("Backend test coverage below 10%")
if frontend_coverage < 5:
critical_issues.append("Frontend test coverage below 5%")
if integration_tests == 0:
critical_issues.append("No integration tests found")
if security_tests == 0:
critical_issues.append("No security tests found")
if performance_tests == 0:
critical_issues.append("No performance tests found")
# Recommendations
recommendations = []
if backend_coverage < 50:
recommendations.append("Implement comprehensive backend unit tests")
if frontend_coverage < 30:
recommendations.append("Add frontend component and integration tests")
if integration_tests == 0:
recommendations.append("Create API integration tests")
if security_tests == 0:
recommendations.append("Implement security testing suite")
if performance_tests == 0:
recommendations.append("Add performance and load testing")
# Backend test types
backend_test_types = []
if any('unit' in fa.path.lower() for fa in backend_test_files):
backend_test_types.append("Unit Tests")
if any('integration' in fa.path.lower() for fa in backend_test_files):
backend_test_types.append("Integration Tests")
if any('mock' in fa.path.lower() for fa in backend_test_files):
backend_test_types.append("Mock Tests")
# Frontend test types
frontend_test_types = []
if any('component' in fa.path.lower() for fa in frontend_test_files):
frontend_test_types.append("Component Tests")
if any('unit' in fa.path.lower() for fa in frontend_test_files):
frontend_test_types.append("Unit Tests")
if any('integration' in fa.path.lower() for fa in frontend_test_files):
frontend_test_types.append("Integration Tests")
# Backend test issues
backend_test_issues = []
empty_backend_tests = len([fa for fa in backend_test_files if fa.lines_of_code == 0])
if empty_backend_tests > 0:
backend_test_issues.append(f"{empty_backend_tests} empty test files")
if backend_coverage < 20:
backend_test_issues.append("Very low test coverage")
# Frontend test issues
frontend_test_issues = []
empty_frontend_tests = len([fa for fa in frontend_test_files if fa.lines_of_code == 0])
if empty_frontend_tests > 0:
frontend_test_issues.append(f"{empty_frontend_tests} empty test files")
if frontend_coverage < 10:
frontend_test_issues.append("Very low test coverage")
return {
'backend_tests': f"{backend_test_count} test files for {backend_file_count} code files",
'backend_files': backend_file_count,
'backend_coverage': f"{backend_coverage:.1f}",
'frontend_tests': f"{frontend_test_count} test files for {frontend_file_count} files",
'frontend_files': frontend_file_count,
'frontend_coverage': f"{frontend_coverage:.1f}",
'integration_tests': f"{integration_tests}",
'security_tests': f"{security_tests}",
'performance_tests': f"{performance_tests}",
'backend_test_files': backend_test_count,
'backend_test_types': ", ".join(backend_test_types) if backend_test_types else "None detected",
'backend_test_issues': "; ".join(backend_test_issues) if backend_test_issues else "No major issues",
'frontend_test_files': frontend_test_count,
'frontend_test_types': ", ".join(frontend_test_types) if frontend_test_types else "None detected",
'frontend_test_issues': "; ".join(frontend_test_issues) if frontend_test_issues else "No major issues",
'api_tests': f"{api_tests}",
'database_tests': f"{database_tests}",
'e2e_tests': f"{e2e_tests}",
'vulnerability_scans': f"{vulnerability_scans}",
'penetration_tests': f"{penetration_tests}",
'auth_tests': f"{auth_tests}",
'load_tests': f"{load_tests}",
'stress_tests': f"{stress_tests}",
'benchmark_tests': f"{benchmark_tests}",
'overall_coverage': f"{overall_coverage:.1f}",
'test_quality_score': f"{test_quality_score:.0f}",
'critical_issues': "; ".join(critical_issues) if critical_issues else "No critical issues",
'recommendations': "; ".join(recommendations) if recommendations else "Testing infrastructure is adequate"
}
if __name__ == "__main__":
exit(asyncio.run(main()))