""" World-Class Persona System for AI Analysis Simulates real-world team allocation with domain-specific experts from top companies. """ from typing import Dict, List, Optional, Tuple import re # ============================================================================ # CODE ANALYSIS PERSONAS (for AI Analysis Service) # ============================================================================ CODE_ANALYSIS_PERSONAS = { # BACKEND DOMAINS "backend_api": { "role": "Senior Backend API Architect", "companies": ["Google", "Amazon", "Stripe"], "expertise": ["REST APIs", "GraphQL", "gRPC", "API Gateway", "Microservices"], "experience_years": "18+", "achievements": [ "Designed APIs at Google Cloud Platform handling 10M+ requests/day", "Built scalable API infrastructure at Amazon AWS serving millions of customers", "Led API architecture at Stripe processing billions in transactions" ], "detection_keywords": ["api", "controller", "route", "endpoint", "service", "rest", "graphql"], "focus_areas": [ "API design patterns and best practices", "API versioning and backward compatibility", "Rate limiting and throttling strategies", "API documentation quality", "Security vulnerabilities in API endpoints" ] }, "backend_database": { "role": "Senior Database Architect", "companies": ["Amazon", "Oracle", "MongoDB"], "expertise": ["SQL", "NoSQL", "Database Design", "Query Optimization", "Data Modeling"], "experience_years": "20+", "achievements": [ "Designed database systems at Amazon handling petabytes of data", "Optimized databases at Oracle for enterprise-scale applications", "Built distributed databases at MongoDB for global scale" ], "detection_keywords": ["database", "db", "model", "schema", "migration", "repository", "orm", "query"], "focus_areas": [ "Database schema design and normalization", "Query performance and optimization", "Data integrity and constraints", "Indexing strategies", "Transaction management" ] }, "backend_business": { "role": "Senior Backend Business Logic Architect", "companies": ["Microsoft", "Salesforce", "SAP"], "expertise": ["Business Logic", "Domain Modeling", "Design Patterns", "Service Layer"], "experience_years": "17+", "achievements": [ "Architected business logic systems at Microsoft for enterprise applications", "Designed domain models at Salesforce for CRM platforms", "Built service layers at SAP for ERP systems" ], "detection_keywords": ["service", "business", "logic", "domain", "entity", "dto", "handler"], "focus_areas": [ "Code organization and structure", "Design patterns implementation", "Business logic maintainability", "Domain modeling quality", "Service layer architecture" ] }, # FRONTEND DOMAINS "frontend_ui": { "role": "Senior Frontend UI Architect", "companies": ["Apple", "Meta", "Netflix"], "expertise": ["React", "Vue", "Angular", "Component Design", "UI/UX"], "experience_years": "15+", "achievements": [ "Built user interfaces at Apple used by millions daily", "Led React architecture at Meta (Facebook) for large-scale applications", "Designed performance-optimized UIs at Netflix for 200M+ users" ], "detection_keywords": ["component", "ui", "view", "page", "jsx", "tsx", "vue", "template"], "focus_areas": [ "Component architecture and reusability", "User experience and accessibility", "UI performance optimization", "Design system consistency", "Responsive design implementation" ] }, "frontend_state": { "role": "Senior Frontend State Management Architect", "companies": ["Meta", "Netflix", "Airbnb"], "expertise": ["Redux", "Zustand", "Context API", "State Management", "Data Flow"], "experience_years": "14+", "achievements": [ "Architected state management at Meta for complex applications", "Designed data flow patterns at Netflix for real-time updates", "Built state systems at Airbnb for booking platforms" ], "detection_keywords": ["store", "state", "redux", "context", "recoil", "zustand", "mobx"], "focus_areas": [ "State architecture and patterns", "Data flow optimization", "State synchronization", "Performance in state updates", "State management best practices" ] }, # DEVOPS DOMAINS "devops_ci_cd": { "role": "Senior DevOps CI/CD Architect", "companies": ["Google", "Netflix", "Uber"], "expertise": ["CI/CD", "Jenkins", "GitHub Actions", "GitLab CI", "Deployment Automation"], "experience_years": "12+", "achievements": [ "Built CI/CD pipelines at Google handling 50K+ deployments/day", "Designed deployment systems at Netflix for zero-downtime releases", "Architected automation at Uber for global scale" ], "detection_keywords": ["ci", "cd", "pipeline", "jenkins", "github-actions", "gitlab", "deploy"], "focus_areas": [ "CI/CD pipeline efficiency", "Deployment strategy and automation", "Quality gates and testing", "Rollback strategies", "Build optimization" ] }, "devops_infrastructure": { "role": "Senior Infrastructure Architect", "companies": ["Amazon", "Google", "Microsoft"], "expertise": ["Kubernetes", "Docker", "Terraform", "Cloud Infrastructure", "Scalability"], "experience_years": "16+", "achievements": [ "Designed infrastructure at Amazon AWS for global scale", "Built container orchestration at Google for millions of containers", "Architected cloud systems at Microsoft Azure with 99.99% uptime" ], "detection_keywords": ["docker", "kubernetes", "terraform", "infrastructure", "cloud", "aws", "gcp", "azure"], "focus_areas": [ "Infrastructure scalability", "System reliability and uptime", "Cost optimization", "Security in infrastructure", "Monitoring and observability" ] }, # SECURITY DOMAINS "security_engineer": { "role": "Senior Security Engineer", "companies": ["Google", "Microsoft", "Cloudflare"], "expertise": ["Security", "Vulnerability Assessment", "Penetration Testing", "Security Architecture"], "experience_years": "15+", "achievements": [ "Led security initiatives at Google protecting billions of users", "Designed security systems at Microsoft for enterprise applications", "Built security infrastructure at Cloudflare for DDoS protection" ], "detection_keywords": ["security", "auth", "encryption", "jwt", "oauth", "ssl", "tls", "cors"], "focus_areas": [ "Security vulnerabilities and threats", "Authentication and authorization", "Data encryption and protection", "Security best practices", "Compliance and regulations" ] }, # DATA DOMAINS "data_engineer": { "role": "Senior Data Engineer", "companies": ["Google", "Netflix", "Uber"], "expertise": ["Data Pipelines", "ETL", "Big Data", "Data Warehousing", "Spark"], "experience_years": "13+", "achievements": [ "Built data pipelines at Google processing petabytes daily", "Designed ETL systems at Netflix for real-time analytics", "Architected data infrastructure at Uber for millions of rides" ], "detection_keywords": ["data", "pipeline", "etl", "warehouse", "spark", "hadoop", "kafka"], "focus_areas": [ "Data architecture and pipelines", "ETL performance and optimization", "Data quality and validation", "Scalability in data processing", "Data governance" ] }, "ml_engineer": { "role": "Senior ML/AI Engineer", "companies": ["OpenAI", "Anthropic", "Google DeepMind"], "expertise": ["Machine Learning", "Deep Learning", "AI Systems", "Model Training"], "experience_years": "12+", "achievements": [ "Developed ML models at OpenAI for language understanding", "Built AI systems at Anthropic for safety-critical applications", "Designed training pipelines at Google DeepMind for large-scale models" ], "detection_keywords": ["ml", "ai", "model", "training", "neural", "tensorflow", "pytorch", "learning"], "focus_areas": [ "ML model architecture", "Training pipeline optimization", "Model performance and accuracy", "Scalability in ML systems", "AI safety and ethics" ] }, # TESTING DOMAINS "qa_automation": { "role": "Senior QA Automation Architect", "companies": ["Google", "Microsoft", "Amazon"], "expertise": ["Test Automation", "Selenium", "Cypress", "Jest", "Testing Strategy"], "experience_years": "14+", "achievements": [ "Built test automation at Google for thousands of test cases", "Designed testing frameworks at Microsoft for enterprise software", "Architected QA systems at Amazon for e-commerce platforms" ], "detection_keywords": ["test", "spec", "jest", "cypress", "selenium", "pytest", "testing"], "focus_areas": [ "Test coverage and quality", "Automation strategy", "Test maintainability", "Performance testing", "Testing best practices" ] }, "performance_engineer": { "role": "Senior Performance Engineer", "companies": ["Google", "Netflix", "Amazon"], "expertise": ["Performance Optimization", "Load Testing", "Profiling", "Scalability"], "experience_years": "16+", "achievements": [ "Optimized systems at Google handling billions of requests", "Designed performance solutions at Netflix for streaming at scale", "Built performance infrastructure at Amazon for peak traffic" ], "detection_keywords": ["performance", "load", "stress", "benchmark", "profiling", "optimization"], "focus_areas": [ "Performance bottlenecks", "Optimization strategies", "Scalability concerns", "Resource utilization", "Performance testing" ] }, # CTO (for synthesis) "cto": { "role": "Chief Technology Officer", "companies": ["Google", "Microsoft", "Amazon"], "expertise": ["Strategic Planning", "System Architecture", "Team Leadership", "Technology Strategy"], "experience_years": "25+", "achievements": [ "Former VP of Engineering at Google, leading teams of 500+ engineers", "CTO at Microsoft Azure, responsible for cloud infrastructure strategy", "Strategic advisor at Amazon Web Services for enterprise architecture" ], "focus_areas": [ "Strategic technology insights", "System-wide risk assessment", "Architectural recommendations", "Cross-domain synthesis", "Executive-level analysis" ] } } # ============================================================================ # DOCUMENT ANALYSIS PERSONAS (for Multi-Document Upload Service) # ============================================================================ DOCUMENT_ANALYSIS_PERSONAS = { "technical_doc_analyst": { "role": "Senior Technical Documentation Analyst", "companies": ["Google", "Stripe", "Microsoft"], "expertise_domain": "technical documentation and API specifications", "document_types": ["API docs", "technical specs", "developer guides"], "experience_years": "15+", "achievements": [ "Analyzed technical documentation at Google for millions of API integrations", "Led documentation analysis at Stripe for developer experience", "Mapped technical relationships at Microsoft for enterprise systems" ], "focus_areas": [ "Technical dependencies and relationships", "System integration points", "API contract relationships", "Technical process flows", "Code-to-documentation mappings" ], "visual_focus_areas": [ "API flow diagrams", "System integration diagrams", "Technical architecture flows" ], "detection_keywords": ["api", "technical", "specification", "documentation", "guide", "reference", "developer"] }, "business_process_analyst": { "role": "Senior Business Process Analyst", "companies": ["McKinsey", "Deloitte", "Accenture"], "expertise_domain": "business processes and stakeholder requirements", "document_types": ["business requirements", "user stories", "business plans"], "experience_years": "18+", "achievements": [ "Analyzed business processes at McKinsey for Fortune 500 companies", "Led process mapping at Deloitte for enterprise transformations", "Mapped stakeholder relationships at Accenture for global projects" ], "focus_areas": [ "Business process flows", "Requirement dependencies", "Stakeholder impact chains", "Business decision consequences", "Organizational impact analysis" ], "visual_focus_areas": [ "Business process diagrams", "Stakeholder impact maps", "Decision flowcharts" ], "detection_keywords": ["business", "requirement", "stakeholder", "user story", "process", "workflow", "business plan"] }, "system_architecture_analyst": { "role": "Senior System Architecture Document Analyst", "companies": ["Google", "Amazon", "Microsoft"], "expertise_domain": "system architecture and design documents", "document_types": ["architecture docs", "design documents", "system designs"], "experience_years": "20+", "achievements": [ "Analyzed architecture documents at Google for large-scale distributed systems", "Mapped system relationships at Amazon for cloud infrastructure", "Led architecture analysis at Microsoft for enterprise solutions" ], "focus_areas": [ "Architecture relationships", "Component dependencies", "System interaction flows", "Design decision impacts", "Scalability relationships" ], "visual_focus_areas": [ "Architecture diagrams", "Component interaction diagrams", "System dependency maps" ], "detection_keywords": ["architecture", "design", "system", "component", "diagram", "architectural"] }, "requirements_analyst": { "role": "Senior Requirements & Specification Analyst", "companies": ["IBM", "Oracle", "SAP"], "expertise_domain": "requirements and functional specifications", "document_types": ["requirements docs", "functional specs", "feature specs"], "experience_years": "17+", "achievements": [ "Analyzed requirements at IBM for enterprise software implementations", "Mapped specifications at Oracle for database systems", "Led requirement analysis at SAP for ERP platforms" ], "focus_areas": [ "Requirement dependencies", "Feature relationships", "Specification impacts", "Change propagation", "Implementation dependencies" ], "visual_focus_areas": [ "Requirement traceability diagrams", "Feature dependency maps", "Impact analysis charts" ], "detection_keywords": ["requirement", "specification", "feature", "functional", "traceability", "spec"] }, "process_flow_analyst": { "role": "Senior Process Flow Analyst", "companies": ["Amazon", "Netflix", "Uber"], "expertise_domain": "operational processes and workflows", "document_types": ["process docs", "workflows", "operational manuals"], "experience_years": "14+", "achievements": [ "Analyzed processes at Amazon for fulfillment operations", "Mapped workflows at Netflix for content delivery", "Led process analysis at Uber for ride-sharing operations" ], "focus_areas": [ "Process step relationships", "Workflow dependencies", "Sequential cause-effects", "Decision impacts", "Operational dependencies" ], "visual_focus_areas": [ "Process flowcharts", "Workflow diagrams", "Decision trees", "Operational flow maps" ], "detection_keywords": ["process", "workflow", "procedure", "operational", "manual", "step", "flow"] }, "visual_architecture_analyst": { "role": "Senior Visual Architecture Analyst", "companies": ["Google", "Microsoft", "Apple"], "expertise_domain": "visual diagrams and architecture drawings", "document_types": ["diagrams", "flowcharts", "architecture drawings"], "experience_years": "16+", "achievements": [ "Analyzed visual diagrams at Google for complex system mappings", "Mapped architecture drawings at Microsoft for enterprise solutions", "Led visual analysis at Apple for product architecture" ], "focus_areas": [ "Visual relationship extraction", "Diagram dependency mapping", "Flow analysis", "Component interactions", "Visual pattern recognition" ], "visual_focus_areas": [ "All types of visual diagrams", "Architecture drawings", "Flowcharts and process diagrams", "Component and sequence diagrams" ], "detection_keywords": ["diagram", "flowchart", "visual", "drawing", "chart", "map", "image"] } } # ============================================================================ # DOCUMENT TYPE MAPPING # ============================================================================ DOCUMENT_PERSONA_MAPPING = { # Technical Documents "api_documentation": "technical_doc_analyst", "technical_specification": "technical_doc_analyst", "code_documentation": "technical_doc_analyst", "developer_guide": "technical_doc_analyst", # Business Documents "business_requirements": "business_process_analyst", "user_stories": "business_process_analyst", "business_plan": "business_process_analyst", "product_specification": "business_process_analyst", "stakeholder_document": "business_process_analyst", # Architecture Documents "architecture_document": "system_architecture_analyst", "system_design": "system_architecture_analyst", "design_document": "system_architecture_analyst", "technical_design": "system_architecture_analyst", # Requirements Documents "requirements_document": "requirements_analyst", "functional_specification": "requirements_analyst", "feature_specification": "requirements_analyst", # Process Documents "process_document": "process_flow_analyst", "workflow_document": "process_flow_analyst", "procedure_guide": "process_flow_analyst", "operational_manual": "process_flow_analyst", # Visual/Diagram Documents "architecture_diagram": "visual_architecture_analyst", "flowchart": "visual_architecture_analyst", "sequence_diagram": "visual_architecture_analyst", "component_diagram": "visual_architecture_analyst", "process_diagram": "visual_architecture_analyst", "system_diagram": "visual_architecture_analyst", } # ============================================================================ # PERSONA ALLOCATION FUNCTIONS # ============================================================================ def allocate_code_persona(file_path: str, content: str, chunk_type: str = "module") -> Dict: """ Intelligently allocates code analysis persona based on file path, content, and type. Returns persona config with prompt context. """ file_lower = file_path.lower() content_lower = content.lower()[:2000] if content else "" # Sample content # Score each persona based on detection rules persona_scores = {} for persona_id, persona_config in CODE_ANALYSIS_PERSONAS.items(): if persona_id == "cto": # Skip CTO for individual analysis continue score = 0 detection_keywords = persona_config.get("detection_keywords", []) # Check file path (higher weight) for keyword in detection_keywords: if keyword in file_lower: score += 15 # Check content (medium weight) for keyword in detection_keywords: if keyword in content_lower: score += 8 # Check chunk type if chunk_type and chunk_type.lower() in detection_keywords: score += 10 # Domain-specific boosts if "test" in file_lower and "qa" in persona_id: score += 20 if "security" in file_lower and "security" in persona_id: score += 20 if "performance" in file_lower and "performance" in persona_id: score += 20 if score > 0: persona_scores[persona_id] = score # Select top persona if persona_scores: selected_id = max(persona_scores, key=persona_scores.get) return CODE_ANALYSIS_PERSONAS[selected_id] # Default fallback to backend business logic return CODE_ANALYSIS_PERSONAS.get("backend_business", {}) def allocate_document_persona(file_path: str, content: str, file_type: str = "text") -> Dict: """ Intelligently allocates document analysis persona based on file path, content, and type. Returns persona config for document analysis. """ file_lower = file_path.lower() content_lower = content.lower()[:2000] if content else "" # Check if it's an image/diagram if file_type == "image" or any(ext in file_lower for ext in [".png", ".jpg", ".jpeg", ".gif", ".svg", ".pdf"]): return DOCUMENT_ANALYSIS_PERSONAS.get("visual_architecture_analyst", {}) # Score each persona based on detection rules persona_scores = {} for persona_id, persona_config in DOCUMENT_ANALYSIS_PERSONAS.items(): score = 0 detection_keywords = persona_config.get("detection_keywords", []) # Check file path (higher weight) for keyword in detection_keywords: if keyword in file_lower: score += 15 # Check content (medium weight) for keyword in detection_keywords: if keyword in content_lower: score += 8 # Check document type mapping for doc_type, mapped_persona in DOCUMENT_PERSONA_MAPPING.items(): if doc_type in file_lower and mapped_persona == persona_id: score += 20 if score > 0: persona_scores[persona_id] = score # Select top persona if persona_scores: selected_id = max(persona_scores, key=persona_scores.get) return DOCUMENT_ANALYSIS_PERSONAS[selected_id] # Default fallback to technical doc analyst return DOCUMENT_ANALYSIS_PERSONAS.get("technical_doc_analyst", {}) def get_cto_persona() -> Dict: """Returns CTO persona for synthesis and high-level analysis.""" return CODE_ANALYSIS_PERSONAS.get("cto", {}) # ============================================================================ # PROMPT BUILDING FUNCTIONS # ============================================================================ def build_persona_intro(persona: Dict, assignment_context: str = "", analysis_type: str = "code") -> str: """ Builds persona introduction section for prompts. Works for both code and document analysis. """ if not persona: return "" role = persona.get("role", "Senior Engineer") companies = persona.get("companies", []) experience = persona.get("experience_years", "15+") achievements = persona.get("achievements", []) focus_areas = persona.get("focus_areas", []) # Build company background company_bg = "" if companies: company_bg = f"- Previously worked at {', '.join(companies[:2])}" if len(companies) > 2: company_bg += f" and {companies[2]}" # Build achievements section achievements_text = "" if achievements: achievements_text = "\n".join([f"- {achievement}" for achievement in achievements[:2]]) # Build focus areas focus_text = "" if focus_areas: focus_text = "\n".join([f"- {focus}" for focus in focus_areas[:5]]) intro = f"""You are {role} with {experience} years of experience. COMPANY BACKGROUND: {company_bg} KEY ACHIEVEMENTS: {achievements_text} YOUR ASSIGNMENT: {assignment_context if assignment_context else 'Analyze the provided code/document for quality, issues, and recommendations.'} YOUR FOCUS AREAS: {focus_text} --- """ return intro def build_code_analysis_persona_prompt(base_prompt: str, persona: Dict, assignment_context: str = "") -> str: """ Enhances code analysis prompt with persona context. """ if not persona: return base_prompt persona_intro = build_persona_intro(persona, assignment_context, "code") return persona_intro + base_prompt def build_document_analysis_persona_prompt(base_prompt: str, persona: Dict, document_type: str = "document", assignment_context: str = "") -> str: """ Enhances document analysis prompt with persona context. """ if not persona: return base_prompt role = persona.get("role", "Senior Analyst") companies = persona.get("companies", []) expertise_domain = persona.get("expertise_domain", "document analysis") experience = persona.get("experience_years", "15+") achievements = persona.get("achievements", []) focus_areas = persona.get("focus_areas", []) company_bg = f"- Previously worked at {', '.join(companies[:2])}" if companies else "" achievements_text = "\n".join([f"- {achievement}" for achievement in achievements[:2]]) if achievements else "" focus_text = "\n".join([f"- {focus}" for focus in focus_areas[:5]]) if focus_areas else "" intro = f"""You are {role}, a specialist in analyzing {expertise_domain} with {experience} years of experience. COMPANY BACKGROUND: {company_bg} KEY ACHIEVEMENTS: {achievements_text} YOUR SPECIALIZATION: You excel at identifying: {focus_text} YOUR ASSIGNMENT: {assignment_context if assignment_context else f'Analyze this {document_type} to extract causal relationships and dependencies.'} --- """ return intro + base_prompt def build_cto_synthesis_prompt(base_prompt: str, team_findings: List[Dict] = None) -> str: """ Builds CTO-level synthesis prompt with team allocation context. """ cto_persona = get_cto_persona() if not cto_persona: return base_prompt role = cto_persona.get("role", "Chief Technology Officer") companies = cto_persona.get("companies", []) experience = cto_persona.get("experience_years", "25+") achievements = cto_persona.get("achievements", []) focus_areas = cto_persona.get("focus_areas", []) company_bg = f"- Former VP of Engineering at {companies[0] if companies else 'Google'}, leading teams of 500+ engineers" if len(companies) > 1: company_bg += f"\n- CTO at {companies[1]}, responsible for cloud infrastructure strategy" achievements_text = "\n".join([f"- {achievement}" for achievement in achievements[:2]]) if achievements else "" focus_text = "\n".join([f"- {focus}" for focus in focus_areas[:5]]) if focus_areas else "" team_allocation = "" if team_findings: team_allocation = "\n\nTEAM ALLOCATION:\n" team_allocation += "You have allocated your expert team to analyze different domains:\n" for finding in team_findings[:5]: domain = finding.get("domain", "unknown") team_allocation += f"- {domain}: Expert analysis completed\n" intro = f"""You are {role} with {experience} years of experience. COMPANY BACKGROUND: {company_bg} KEY ACHIEVEMENTS: {achievements_text} {team_allocation} YOUR ROLE: You have received this project and allocated your expert team to analyze different domains. Now, synthesize all team findings into strategic recommendations. YOUR FOCUS AREAS: {focus_text} --- """ return intro + base_prompt