#!/usr/bin/env python3 """ Enhanced Analyzer Integration Seamlessly integrates enhanced chunking with existing AI Analysis Service. Author: Senior Engineer (20+ years experience) Version: 1.0.0 """ import asyncio import logging from typing import Dict, List, Any, Optional, Tuple from pathlib import Path # Import existing classes (maintain compatibility) from ai_analyze import EnhancedGitHubAnalyzer, FileAnalysis, RepositoryAnalysis from enhanced_chunking import EnhancedFileProcessor, ENHANCED_CHUNKING_CONFIG class EnhancedGitHubAnalyzerV2(EnhancedGitHubAnalyzer): """ Enhanced version of GitHubAnalyzer with intelligent chunking. Maintains 100% backward compatibility while adding enhanced capabilities. """ def __init__(self, api_key: str, memory_config: Dict[str, Any]): # Initialize parent class super().__init__(api_key, memory_config) # Add enhanced processing capability self.enhanced_processor = EnhancedFileProcessor(self.client, self.memory_manager) self.enhanced_enabled = True # Feature flag for easy toggling # Configuration self.chunking_config = ENHANCED_CHUNKING_CONFIG self.logger = logging.getLogger(__name__) print(f"🔍 [DEBUG] EnhancedGitHubAnalyzerV2 initialized - class: {self.__class__.__name__}") self.logger.info("Enhanced GitHub Analyzer V2 initialized with chunking capabilities") async def analyze_file_with_memory_enhanced(self, file_path: Path, content: str, repo_id: str) -> FileAnalysis: """ Enhanced version of analyze_file_with_memory with intelligent chunking. Maintains exact same interface and return type for backward compatibility. """ try: if not self.enhanced_enabled: print(f"🔍 [DEBUG] Enhanced disabled, using original method for {file_path}") return await super().analyze_file_with_memory(file_path, content, repo_id) print(f"🔍 [DEBUG] Starting enhanced processing for {file_path}") # Use enhanced processing enhanced_result = await self.enhanced_processor.process_file_enhanced( str(file_path), content, repo_id ) print(f"🔍 [DEBUG] Enhanced processing completed for {file_path}") # Convert to FileAnalysis object (maintain compatibility) return self._convert_to_file_analysis(enhanced_result, file_path) except Exception as e: print(f"🔍 [DEBUG] Enhanced analysis failed for {file_path}: {e}") self.logger.error(f"Enhanced analysis failed for {file_path}, falling back to original: {e}") # Fallback to original method return await super().analyze_file_with_memory(file_path, content, repo_id) async def analyze_file_with_memory(self, file_path: Path, content: str, repo_id: str) -> FileAnalysis: """Wrapper method to maintain compatibility with server calls.""" return await self.analyze_file_with_memory_enhanced(file_path, content, repo_id) async def analyze_repository_overview_with_memory(self, repo_path: str, file_analyses: List[FileAnalysis], context_memories: Dict, repo_id: str) -> Tuple[str, str]: """Wrapper method to maintain compatibility with server calls.""" return await super().analyze_repository_overview_with_memory(repo_path, file_analyses, context_memories, repo_id) def create_pdf_report(self, analysis: RepositoryAnalysis, output_path: str, progress_mgr=None): """Wrapper method to maintain compatibility with server calls.""" return super().create_pdf_report(analysis, output_path, progress_mgr) def _convert_to_file_analysis(self, enhanced_result: Dict[str, Any], file_path: Path) -> FileAnalysis: """Convert enhanced analysis result to FileAnalysis object for compatibility.""" return FileAnalysis( path=str(file_path), language=enhanced_result.get('language', 'Unknown'), lines_of_code=enhanced_result.get('lines_of_code', 0), complexity_score=enhanced_result.get('complexity_score', 5.0), issues_found=enhanced_result.get('issues_found', []), recommendations=enhanced_result.get('recommendations', []), detailed_analysis=enhanced_result.get('detailed_analysis', ''), severity_score=enhanced_result.get('severity_score', 5.0) ) async def analyze_repository_with_memory_enhanced(self, repo_path: str) -> RepositoryAnalysis: """ Enhanced repository analysis with intelligent chunking and batch processing. Maintains exact same interface and return type for backward compatibility. """ try: if not self.enhanced_enabled: # Fallback to original method return await super().analyze_repository_with_memory(repo_path) # Use enhanced processing with batch optimization return await self._analyze_repository_enhanced(repo_path) except Exception as e: self.logger.error(f"Enhanced repository analysis failed, falling back to original: {e}") # Fallback to original method return await super().analyze_repository_with_memory(repo_path) async def _analyze_repository_enhanced(self, repo_path: str) -> RepositoryAnalysis: """Enhanced repository analysis with batch processing and chunking.""" # Generate repo ID and check cache repo_id = self.calculate_repo_id(repo_path) # Check working memory for recent analysis cached_analysis = await self.memory_manager.get_working_memory(f"repo_analysis:{repo_id}") if cached_analysis: self.logger.info("Using cached repository analysis from memory") return RepositoryAnalysis(**cached_analysis) # Clone/access repository actual_repo_path = self.clone_repository(repo_path) # Get analysis context from memory context_memories = await self.get_analysis_context(repo_path, "", repo_id) # Scan files with enhanced processing files_to_analyze = self.scan_repository(actual_repo_path) if not files_to_analyze: raise Exception("No files found to analyze") self.logger.info(f"Starting enhanced analysis of {len(files_to_analyze)} files...") # Process files with batch optimization file_analyses = await self._process_files_with_batching(files_to_analyze, repo_id) # Repository-level analysis with enhanced context architecture_assessment, security_assessment = await self.analyze_repository_overview_with_memory( actual_repo_path, file_analyses, context_memories, repo_id ) # Calculate overall quality score safely if file_analyses and len(file_analyses) > 0: valid_scores = [fa.severity_score for fa in file_analyses if fa.severity_score is not None] avg_quality = sum(valid_scores) / len(valid_scores) if valid_scores else 5.0 else: avg_quality = 5.0 # Generate statistics safely from collections import Counter if file_analyses: language_list = [fa.language for fa in file_analyses if fa.language is not None] languages = dict(Counter(language_list)) total_lines = sum(fa.lines_of_code for fa in file_analyses if fa.lines_of_code is not None) else: languages = {} total_lines = 0 # Create repository analysis repo_analysis = RepositoryAnalysis( repo_path=repo_path, total_files=len(file_analyses), total_lines=total_lines, languages=languages, architecture_assessment=architecture_assessment, security_assessment=security_assessment, code_quality_score=avg_quality, file_analyses=file_analyses, executive_summary="", high_quality_files=[] ) # Generate executive summary with enhanced context repo_analysis.executive_summary = await self.generate_executive_summary_with_memory( repo_analysis, context_memories ) # Store analysis in episodic memory await self.memory_manager.store_episodic_memory( self.session_id, "Enhanced automated repository analysis", f"Analyzed {repo_analysis.total_files} files with enhanced chunking, found {sum(len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0 for fa in file_analyses)} issues", repo_id, { 'repo_path': repo_path, 'quality_score': avg_quality, 'total_issues': sum(len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0 for fa in file_analyses), 'analysis_type': 'enhanced_automated_comprehensive', 'chunking_enabled': True } ) # Cache analysis in working memory await self.memory_manager.store_working_memory( f"repo_analysis:{repo_id}", self._repo_analysis_to_dict(repo_analysis), ttl=7200 # 2 hours ) return repo_analysis async def _process_files_with_batching(self, files_to_analyze: List[tuple], repo_id: str) -> List[FileAnalysis]: """Process files with intelligent batching to optimize API usage.""" file_analyses = [] processed_files = 0 # Group files by size and type for optimal batching small_files = [] medium_files = [] large_files = [] for file_path, content in files_to_analyze: file_size = len(content.split('\n')) if file_size < 200: small_files.append((file_path, content)) elif file_size < 500: medium_files.append((file_path, content)) else: large_files.append((file_path, content)) # Process small files in batches (fast processing) if small_files: self.logger.info(f"Processing {len(small_files)} small files...") for file_path, content in small_files: try: analysis = await self.analyze_file_with_memory_enhanced( Path(file_path), content, repo_id ) file_analyses.append(analysis) processed_files += 1 await asyncio.sleep(0.05) # Small delay except Exception as e: self.logger.error(f"Error analyzing small file {file_path}: {e}") continue # Process medium files individually (balanced processing) if medium_files: self.logger.info(f"Processing {len(medium_files)} medium files...") for file_path, content in medium_files: try: analysis = await self.analyze_file_with_memory_enhanced( Path(file_path), content, repo_id ) file_analyses.append(analysis) processed_files += 1 await asyncio.sleep(0.1) # Medium delay except Exception as e: self.logger.error(f"Error analyzing medium file {file_path}: {e}") continue # Process large files with enhanced chunking (careful processing) if large_files: self.logger.info(f"Processing {len(large_files)} large files with enhanced chunking...") for file_path, content in large_files: try: analysis = await self.analyze_file_with_memory_enhanced( Path(file_path), content, repo_id ) file_analyses.append(analysis) processed_files += 1 await asyncio.sleep(0.2) # Longer delay for large files except Exception as e: self.logger.error(f"Error analyzing large file {file_path}: {e}") continue self.logger.info(f"Enhanced processing completed: {processed_files}/{len(files_to_analyze)} files processed") return file_analyses def _repo_analysis_to_dict(self, repo_analysis: RepositoryAnalysis) -> Dict[str, Any]: """Convert RepositoryAnalysis to dictionary for caching.""" return { 'repo_path': repo_analysis.repo_path, 'total_files': repo_analysis.total_files, 'total_lines': repo_analysis.total_lines, 'languages': repo_analysis.languages, 'architecture_assessment': repo_analysis.architecture_assessment, 'security_assessment': repo_analysis.security_assessment, 'code_quality_score': repo_analysis.code_quality_score, 'file_analyses': [ { 'path': fa.path, 'language': fa.language, 'lines_of_code': fa.lines_of_code, 'complexity_score': fa.complexity_score, 'issues_found': fa.issues_found, 'recommendations': fa.recommendations, 'detailed_analysis': fa.detailed_analysis, 'severity_score': fa.severity_score } for fa in repo_analysis.file_analyses ], 'executive_summary': repo_analysis.executive_summary } def enable_enhanced_processing(self, enabled: bool = True): """Enable or disable enhanced processing (feature flag).""" self.enhanced_enabled = enabled self.logger.info(f"Enhanced processing {'enabled' if enabled else 'disabled'}") def get_processing_stats(self) -> Dict[str, Any]: """Get statistics about enhanced processing.""" return { 'enhanced_enabled': self.enhanced_enabled, 'chunking_config': self.chunking_config, 'memory_stats': {} } # Factory function for easy integration def create_enhanced_analyzer(api_key: str, memory_config: Dict[str, Any]) -> EnhancedGitHubAnalyzerV2: """ Factory function to create enhanced analyzer. Drop-in replacement for existing EnhancedGitHubAnalyzer. """ return EnhancedGitHubAnalyzerV2(api_key, memory_config) # Backward compatibility alias EnhancedGitHubAnalyzer = EnhancedGitHubAnalyzerV2