# # TRULY DYNAMIC MONGODB DESIGNER - HYBRID APPROACH # # Analyzes actual business requirements using NLP + AI + Pattern Analysis # # NO HARDCODING - Everything derived from functional requirements # import json # import re # from datetime import datetime # from typing import Dict, Any, List, Optional, Set # from loguru import logger # try: # import anthropic # CLAUDE_AVAILABLE = True # except ImportError: # CLAUDE_AVAILABLE = False # class HybridRequirementsAnalyzer: # """Hybrid analyzer combining NLP + AI + Pattern Analysis""" # def __init__(self): # self.claude_client = anthropic.Anthropic() if CLAUDE_AVAILABLE else None # self.field_type_mappings = self._initialize_type_inference_patterns() # logger.info("🧠 Hybrid Requirements Analyzer initialized") # def _initialize_type_inference_patterns(self) -> Dict[str, str]: # """Patterns to infer MongoDB field types from context""" # return { # # Date patterns # r'\b(date|time|timestamp|created|updated|birth|expiry|deadline|schedule)\b': 'Date', # # Number patterns # r'\b(age|count|amount|price|quantity|number|id|duration|length|weight|height)\b': 'Number', # # Boolean patterns # r'\b(active|inactive|enabled|disabled|verified|confirmed|approved|completed|is\w+)\b': 'Boolean', # # String patterns (default) # r'\b(name|description|notes|comments|text|message|title|label)\b': 'String', # # ObjectId patterns # r'\b(\w+Id|\w+Ref|reference to \w+|belongs to \w+)\b': 'ObjectId', # # Array patterns # r'\b(list of|multiple|collection of|array of|history|log|tags)\b': 'Array' # } # def analyze_requirements_for_entities(self, functional_requirements: Dict) -> Dict[str, Any]: # """Analyze requirements to extract entities and their fields""" # # Extract all text content for analysis # all_text = self._extract_all_requirement_text(functional_requirements) # # Phase 1: Pattern-based entity extraction # pattern_entities = self._extract_entities_with_patterns(all_text) # # Phase 2: NLP-based field extraction # nlp_fields = self._extract_fields_with_nlp(all_text, pattern_entities) # # Phase 3: AI-powered enhancement and validation # ai_enhanced = self._enhance_with_ai_analysis(all_text, pattern_entities, nlp_fields) # # Phase 4: Synthesize all results # final_entities = self._synthesize_analysis_results(pattern_entities, nlp_fields, ai_enhanced) # logger.info(f"✅ Hybrid analysis completed. Extracted {len(final_entities)} entities") # return final_entities # def _extract_all_requirement_text(self, functional_requirements: Dict) -> str: # """Extract all text content from functional requirements""" # text_parts = [] # # Feature names and descriptions # if functional_requirements.get('feature_name'): # text_parts.append(functional_requirements['feature_name']) # if functional_requirements.get('description'): # text_parts.append(functional_requirements['description']) # # All features # if functional_requirements.get('all_features'): # text_parts.extend(functional_requirements['all_features']) # # Technical requirements # if functional_requirements.get('technical_requirements'): # text_parts.extend(functional_requirements['technical_requirements']) # # Business logic rules - MOST IMPORTANT # if functional_requirements.get('business_logic_rules'): # text_parts.extend(functional_requirements['business_logic_rules']) # return ' '.join(text_parts) # def _extract_entities_with_patterns(self, text: str) -> Dict[str, Dict]: # """Phase 1: Pattern-based entity extraction""" # entities = {} # text_lower = text.lower() # # Extract nouns that could be entities # words = re.findall(r'\b[a-zA-Z]+\b', text) # for word in words: # word_clean = word.lower() # # Skip common words # if word_clean in ['the', 'and', 'or', 'for', 'with', 'system', 'data', 'information']: # continue # # Look for entity indicators in surrounding context # word_pattern = rf'\b{re.escape(word_clean)}\b' # # Check if word appears with entity-indicating context # if re.search(rf'{word_pattern}\s+(management|record|data|information|details)', text_lower): # entities[word_clean] = { # 'confidence': 0.7, # 'source': 'pattern_analysis', # 'context': self._extract_word_context(word, text) # } # elif re.search(rf'(manage|create|update|delete|validate)\s+{word_pattern}', text_lower): # entities[word_clean] = { # 'confidence': 0.8, # 'source': 'pattern_analysis', # 'context': self._extract_word_context(word, text) # } # return entities # def _extract_word_context(self, word: str, text: str, context_size: int = 50) -> str: # """Extract surrounding context for a word""" # word_index = text.lower().find(word.lower()) # if word_index == -1: # return "" # start = max(0, word_index - context_size) # end = min(len(text), word_index + len(word) + context_size) # return text[start:end] # def _extract_fields_with_nlp(self, text: str, entities: Dict) -> Dict[str, List]: # """Phase 2: NLP-based field extraction""" # entity_fields = {} # for entity_name in entities.keys(): # fields = [] # # Look for field mentions in relation to this entity # entity_pattern = rf'\b{re.escape(entity_name)}\b' # # Find sentences mentioning this entity # sentences = re.split(r'[.!?]+', text) # entity_sentences = [s for s in sentences if re.search(entity_pattern, s, re.IGNORECASE)] # for sentence in entity_sentences: # # Extract potential field names from sentence # sentence_fields = self._extract_fields_from_sentence(sentence, entity_name) # fields.extend(sentence_fields) # entity_fields[entity_name] = fields # return entity_fields # def _extract_fields_from_sentence(self, sentence: str, entity_name: str) -> List[Dict]: # """Extract field information from a sentence""" # fields = [] # sentence_lower = sentence.lower() # # Look for field patterns in parentheses like "personal information (name, DOB, contact details)" # parentheses_content = re.findall(r'\(([^)]+)\)', sentence) # for content in parentheses_content: # field_names = [name.strip() for name in content.split(',')] # for field_name in field_names: # if field_name: # field_config = self._infer_field_type_from_name_and_context(field_name, sentence) # fields.append({ # 'name': self._normalize_field_name(field_name), # 'config': field_config, # 'source': 'nlp_extraction', # 'context': sentence # }) # # Look for validation patterns like "ensure unique", "validate format" # if re.search(r'\bunique\b', sentence_lower): # fields.append({ # 'constraint': 'unique', # 'applies_to': self._extract_field_from_validation_context(sentence), # 'source': 'validation_pattern' # }) # if re.search(r'\brequired\b', sentence_lower): # fields.append({ # 'constraint': 'required', # 'applies_to': self._extract_field_from_validation_context(sentence), # 'source': 'validation_pattern' # }) # return fields # def _infer_field_type_from_name_and_context(self, field_name: str, context: str) -> Dict: # """Infer MongoDB field type from field name and context""" # field_name_lower = field_name.lower() # context_lower = context.lower() # # Check against type inference patterns # for pattern, mongo_type in self.field_type_mappings.items(): # if re.search(pattern, field_name_lower) or re.search(pattern, context_lower): # return self._create_field_config(mongo_type, field_name, context) # # Default to String if no specific type detected # return self._create_field_config('String', field_name, context) # def _create_field_config(self, mongo_type: str, field_name: str, context: str) -> Dict: # """Create MongoDB field configuration""" # config = {'type': mongo_type} # # Add validation based on context # if re.search(r'\brequired\b', context.lower()): # config['required'] = True # if re.search(r'\bunique\b', context.lower()): # config['unique'] = True # if mongo_type == 'String': # config['trim'] = True # # Email detection # if re.search(r'\bemail\b', field_name.lower()): # config['lowercase'] = True # config['match'] = '/^[^\s@]+@[^\s@]+\.[^\s@]+$/' # if mongo_type == 'Date': # if 'created' in field_name.lower() or 'updated' in field_name.lower(): # config['default'] = 'Date.now' # return config # def _normalize_field_name(self, field_name: str) -> str: # """Normalize field name to camelCase""" # # Clean the field name # clean_name = re.sub(r'[^a-zA-Z\s]', '', field_name) # words = clean_name.split() # if not words: # return field_name # # Convert to camelCase # if len(words) == 1: # return words[0].lower() # return words[0].lower() + ''.join(word.capitalize() for word in words[1:]) # def _extract_field_from_validation_context(self, sentence: str) -> str: # """Extract field name from validation context""" # # Simple extraction - look for the subject of validation # words = sentence.split() # for i, word in enumerate(words): # if word.lower() in ['validate', 'ensure', 'check']: # if i + 1 < len(words): # return self._normalize_field_name(words[i + 1]) # return "" # def _enhance_with_ai_analysis(self, text: str, pattern_entities: Dict, nlp_fields: Dict) -> Dict: # """Phase 3: AI-powered enhancement""" # if not self.claude_client: # logger.warning("Claude AI not available, skipping AI enhancement") # return {} # try: # prompt = f""" # Analyze these business requirements and extract MongoDB schema information: # Requirements Text: # {text} # Already identified entities: {list(pattern_entities.keys())} # Already identified fields: {nlp_fields} # Please provide additional insights: # 1. Any missing entities that should be included? # 2. What additional fields are needed for each entity? # 3. What are the relationships between entities? # 4. What validation rules should be applied? # 5. What indexes would be needed for performance? # Return your analysis as structured JSON with: # {{ # "additional_entities": ["entity1", "entity2"], # "entity_fields": {{ # "entity_name": {{ # "field_name": {{"type": "String|Number|Date|Boolean|ObjectId", "required": true/false, "unique": true/false}} # }} # }}, # "relationships": [ # {{"from": "entity1", "to": "entity2", "type": "one_to_many|many_to_one|many_to_many"}} # ], # "business_validations": [ # {{"field": "field_name", "validation": "description", "implementation": "mongoose_validation_code"}} # ], # "recommended_indexes": [ # {{"collection": "entity_name", "index": {{"field": 1}}, "reason": "performance_reason"}} # ] # }} # Focus on extracting information that's actually mentioned or implied in the requirements, not general assumptions. # """ # message = self.claude_client.messages.create( # model="claude-3-5-sonnet-20241022", # max_tokens=4000, # temperature=0.1, # messages=[{"role": "user", "content": prompt}] # ) # ai_response = message.content[0].text.strip() # # Try to parse JSON response # try: # ai_analysis = json.loads(ai_response) # logger.info("✅ AI analysis completed successfully") # return ai_analysis # except json.JSONDecodeError: # logger.warning("AI response was not valid JSON, parsing manually") # return self._parse_ai_response_manually(ai_response) # except Exception as e: # logger.error(f"AI analysis failed: {e}") # return {} # def _parse_ai_response_manually(self, response: str) -> Dict: # """Fallback manual parsing of AI response""" # # Simple extraction as fallback # return { # "additional_entities": [], # "entity_fields": {}, # "relationships": [], # "business_validations": [], # "recommended_indexes": [] # } # def _synthesize_analysis_results(self, pattern_entities: Dict, nlp_fields: Dict, ai_enhanced: Dict) -> Dict[str, Any]: # """Phase 4: Synthesize all analysis results""" # final_entities = {} # # Combine all entity sources # all_entities = set(pattern_entities.keys()) # all_entities.update(ai_enhanced.get('additional_entities', [])) # for entity_name in all_entities: # entity_config = { # 'fields': {}, # 'relationships': [], # 'indexes': [], # 'validations': [] # } # # Add base fields that every entity needs # entity_config['fields'].update(self._get_essential_fields()) # # Add fields from NLP analysis # if entity_name in nlp_fields: # for field_info in nlp_fields[entity_name]: # if 'name' in field_info and 'config' in field_info: # entity_config['fields'][field_info['name']] = field_info['config'] # # Add fields from AI analysis # ai_entity_fields = ai_enhanced.get('entity_fields', {}).get(entity_name, {}) # entity_config['fields'].update(ai_entity_fields) # # Add relationships # for rel in ai_enhanced.get('relationships', []): # if rel.get('from') == entity_name or rel.get('to') == entity_name: # entity_config['relationships'].append(rel) # # Add indexes # for idx in ai_enhanced.get('recommended_indexes', []): # if idx.get('collection') == entity_name: # entity_config['indexes'].append(idx) # # Add validations # for val in ai_enhanced.get('business_validations', []): # if val.get('field') in entity_config['fields']: # entity_config['validations'].append(val) # final_entities[entity_name] = entity_config # return final_entities # def _get_essential_fields(self) -> Dict[str, Any]: # """Get essential fields every MongoDB document needs""" # return { # "_id": {"type": "ObjectId", "required": True}, # "createdAt": {"type": "Date", "default": "Date.now"}, # "updatedAt": {"type": "Date", "default": "Date.now"}, # "isActive": {"type": "Boolean", "default": True} # } # class DynamicMongoDBDesigner: # """Truly dynamic MongoDB designer using hybrid analysis""" # def __init__(self): # self.analyzer = HybridRequirementsAnalyzer() # self.database_type = "mongodb" # logger.info("🍃 Dynamic MongoDB Designer with Hybrid Analysis initialized") # def generate_mongodb_architecture(self, functional_requirements: Dict, business_context: Dict) -> Dict[str, Any]: # """Generate MongoDB architecture through dynamic analysis""" # try: # logger.info("🏗️ Starting dynamic MongoDB architecture generation") # # Analyze requirements to extract entities and fields # entities_analysis = self.analyzer.analyze_requirements_for_entities(functional_requirements) # # Generate MongoDB collections # collections_design = self._generate_collections_from_analysis(entities_analysis) # # Generate Mongoose schemas # mongoose_schemas = self._generate_mongoose_schemas_from_analysis(entities_analysis) # # Generate performance configuration # performance_config = self._generate_performance_configuration(entities_analysis) # # Generate connection and deployment config # deployment_config = self._generate_deployment_configuration( # functional_requirements.get('complexity_level', 'medium') # ) # architecture = { # "database_type": "mongodb", # "entities_analyzed": len(entities_analysis), # "collections_design": collections_design, # "mongoose_schemas": mongoose_schemas, # "performance_indexes": performance_config.get('indexes', {}), # "aggregation_pipelines": performance_config.get('aggregations', {}), # "connection_configuration": deployment_config, # "security_implementation": self._generate_security_config(entities_analysis), # "backup_strategy": self._generate_backup_strategy(), # "monitoring_setup": self._generate_monitoring_config(), # "generated_at": datetime.utcnow().isoformat(), # "analysis_method": "hybrid_nlp_ai_pattern", # "requirements_coverage": self._calculate_requirements_coverage( # functional_requirements, entities_analysis # ) # } # logger.info("✅ Dynamic MongoDB architecture generation completed") # return architecture # except Exception as e: # logger.error(f"❌ MongoDB architecture generation failed: {e}") # raise # def _generate_collections_from_analysis(self, entities_analysis: Dict) -> Dict[str, Any]: # """Generate MongoDB collections from analysis results""" # collections = {} # for entity_name, entity_config in entities_analysis.items(): # collection_name = f"{entity_name}s" # Simple pluralization # collections[collection_name] = { # "description": f"Collection for {entity_name} entities", # "fields": entity_config.get('fields', {}), # "relationships": entity_config.get('relationships', []), # "business_validations": entity_config.get('validations', []) # } # return collections # def _generate_mongoose_schemas_from_analysis(self, entities_analysis: Dict) -> Dict[str, str]: # """Generate actual Mongoose schema code from analysis""" # schemas = {} # for entity_name, entity_config in entities_analysis.items(): # schema_name = entity_name.capitalize() # schema_code = self._build_mongoose_schema_code( # schema_name, entity_config.get('fields', {}), entity_config.get('validations', []) # ) # schemas[f"{schema_name}Schema"] = schema_code # return schemas # def _build_mongoose_schema_code(self, schema_name: str, fields: Dict, validations: List) -> str: # """Build actual Mongoose schema code""" # schema_code = f"""const mongoose = require('mongoose'); # const {schema_name}Schema = new mongoose.Schema({{ # """ # # Generate field definitions # for field_name, field_config in fields.items(): # schema_code += self._generate_mongoose_field_definition(field_name, field_config) # schema_code += "}, {\n timestamps: true,\n versionKey: false\n});\n\n" # # Add business validation middleware # if validations: # schema_code += self._generate_validation_middleware(schema_name, validations) # # Add common methods # schema_code += self._generate_schema_methods(schema_name) # schema_code += f"\nmodule.exports = mongoose.model('{schema_name}', {schema_name}Schema);\n" # return schema_code # def _generate_mongoose_field_definition(self, field_name: str, field_config: Dict) -> str: # """Generate Mongoose field definition""" # field_def = f" {field_name}: {{\n" # for key, value in field_config.items(): # if key == "type": # if value == "ObjectId": # field_def += " type: mongoose.Schema.Types.ObjectId,\n" # elif value == "Mixed": # field_def += " type: mongoose.Schema.Types.Mixed,\n" # else: # field_def += f" type: {value},\n" # elif key == "default": # if value == "Date.now": # field_def += " default: Date.now,\n" # elif isinstance(value, str): # field_def += f" default: '{value}',\n" # else: # field_def += f" default: {value},\n" # elif key == "match": # field_def += f" match: {value},\n" # else: # field_def += f" {key}: {value},\n" # field_def += " },\n" # return field_def # def _generate_validation_middleware(self, schema_name: str, validations: List) -> str: # """Generate business validation middleware""" # middleware = f""" # // Business validation middleware for {schema_name} # {schema_name}Schema.pre('save', function(next) {{ # // Business logic validations # """ # for validation in validations: # middleware += f" // {validation.get('validation', '')}\n" # if validation.get('implementation'): # middleware += f" {validation['implementation']}\n" # middleware += " next();\n});\n" # return middleware # def _generate_schema_methods(self, schema_name: str) -> str: # """Generate common schema methods""" # return f""" # // Instance methods # {schema_name}Schema.methods.toSafeObject = function() {{ # const obj = this.toObject(); # delete obj.password; # delete obj.__v; # return obj; # }}; # // Static methods # {schema_name}Schema.statics.findActive = function() {{ # return this.find({{ isActive: true }}); # }}; # """ # def _generate_performance_configuration(self, entities_analysis: Dict) -> Dict[str, Any]: # """Generate performance configuration from analysis""" # config = { # "indexes": {}, # "aggregations": {} # } # for entity_name, entity_config in entities_analysis.items(): # # Add indexes from analysis # entity_indexes = entity_config.get('indexes', []) # if entity_indexes: # config["indexes"][f"{entity_name}s"] = entity_indexes # # Generate basic aggregation pipelines # config["aggregations"][f"{entity_name}Stats"] = [ # {"$group": {"_id": "$status", "count": {"$sum": 1}}}, # {"$sort": {"count": -1}} # ] # return config # def _generate_deployment_configuration(self, complexity_level: str) -> Dict[str, Any]: # """Generate deployment configuration""" # return { # "database_url": "mongodb://localhost:27017/{{database_name}}", # "connection_options": { # "useNewUrlParser": True, # "useUnifiedTopology": True, # "maxPoolSize": 20 if complexity_level == "high" else 10 # }, # "environment_variables": { # "MONGODB_URI": "MongoDB connection string", # "DB_NAME": "Database name" # } # } # def _generate_security_config(self, entities_analysis: Dict) -> Dict[str, Any]: # """Generate security configuration""" # return { # "authentication": { # "enabled": True, # "mechanism": "SCRAM-SHA-256" # }, # "encryption": { # "at_rest": True, # "in_transit": True # } # } # def _generate_backup_strategy(self) -> Dict[str, Any]: # """Generate backup strategy""" # return { # "method": "mongodump", # "frequency": "daily", # "retention": "30 days" # } # def _generate_monitoring_config(self) -> Dict[str, Any]: # """Generate monitoring configuration""" # return { # "performance_monitoring": { # "slow_query_threshold": "100ms", # "profiling_level": 1 # } # } # def _calculate_requirements_coverage(self, functional_requirements: Dict, entities_analysis: Dict) -> Dict[str, Any]: # """Calculate how well the analysis covered the requirements""" # total_requirements = ( # len(functional_requirements.get('technical_requirements', [])) + # len(functional_requirements.get('business_logic_rules', [])) # ) # entities_count = len(entities_analysis) # total_fields = sum(len(entity.get('fields', {})) for entity in entities_analysis.values()) # return { # "total_requirements_analyzed": total_requirements, # "entities_extracted": entities_count, # "total_fields_generated": total_fields, # "coverage_estimation": min(95, (entities_count * 20) + (total_fields * 2)), # "analysis_confidence": "high" if total_requirements > 5 else "medium" # } # TRULY DYNAMIC MONGODB DESIGNER - HYBRID APPROACH # Analyzes actual business requirements using NLP + AI + Pattern Analysis # NO HARDCODING - Everything derived from functional requirements import json import re from datetime import datetime from typing import Dict, Any, List, Optional, Set from loguru import logger try: import anthropic CLAUDE_AVAILABLE = True except ImportError: CLAUDE_AVAILABLE = False class HybridRequirementsAnalyzer: """Hybrid analyzer combining NLP + AI + Pattern Analysis""" def __init__(self): self.claude_client = anthropic.Anthropic() if CLAUDE_AVAILABLE else None self.field_type_mappings = self._initialize_type_inference_patterns() logger.info("🧠 Hybrid Requirements Analyzer initialized") def _initialize_type_inference_patterns(self) -> Dict[str, str]: """Patterns to infer MongoDB field types from context""" return { # Date patterns r'\b(date|time|timestamp|created|updated|birth|expiry|deadline|schedule)\b': 'Date', # Number patterns r'\b(age|count|amount|price|quantity|number|id|duration|length|weight|height)\b': 'Number', # Boolean patterns r'\b(active|inactive|enabled|disabled|verified|confirmed|approved|completed|is\w+)\b': 'Boolean', # String patterns (default) r'\b(name|description|notes|comments|text|message|title|label)\b': 'String', # ObjectId patterns r'\b(\w+Id|\w+Ref|reference to \w+|belongs to \w+)\b': 'ObjectId', # Array patterns r'\b(list of|multiple|collection of|array of|history|log|tags)\b': 'Array' } def analyze_requirements_for_entities(self, functional_requirements: Dict) -> Dict[str, Any]: """Analyze requirements to extract entities and their fields""" # Extract all text content for analysis all_text = self._extract_all_requirement_text(functional_requirements) # Phase 1: Pattern-based entity extraction pattern_entities = self._extract_entities_with_patterns(all_text) # Phase 2: NLP-based field extraction nlp_fields = self._extract_fields_with_nlp(all_text, pattern_entities) # Phase 3: AI-powered enhancement and validation ai_enhanced = self._enhance_with_ai_analysis(all_text, pattern_entities, nlp_fields) # Phase 4: Synthesize all results final_entities = self._synthesize_analysis_results(pattern_entities, nlp_fields, ai_enhanced) logger.info(f"✅ Hybrid analysis completed. Extracted {len(final_entities)} entities") return final_entities def _extract_all_requirement_text(self, functional_requirements: Dict) -> str: """Extract all text content from functional requirements""" text_parts = [] # Feature names and descriptions if functional_requirements.get('feature_name'): text_parts.append(functional_requirements['feature_name']) if functional_requirements.get('description'): text_parts.append(functional_requirements['description']) # All features if functional_requirements.get('all_features'): text_parts.extend(functional_requirements['all_features']) # Technical requirements if functional_requirements.get('technical_requirements'): text_parts.extend(functional_requirements['technical_requirements']) # Business logic rules - MOST IMPORTANT if functional_requirements.get('business_logic_rules'): text_parts.extend(functional_requirements['business_logic_rules']) return ' '.join(text_parts) def _extract_entities_with_patterns(self, text: str) -> Dict[str, Dict]: """Phase 1: Pattern-based entity extraction""" entities = {} text_lower = text.lower() # Extract nouns that could be entities words = re.findall(r'\b[a-zA-Z]+\b', text) for word in words: word_clean = word.lower() # Skip common words if word_clean in ['the', 'and', 'or', 'for', 'with', 'system', 'data', 'information']: continue # Look for entity indicators in surrounding context word_pattern = rf'\b{re.escape(word_clean)}\b' # Check if word appears with entity-indicating context if re.search(rf'{word_pattern}\s+(management|record|data|information|details)', text_lower): entities[word_clean] = { 'confidence': 0.7, 'source': 'pattern_analysis', 'context': self._extract_word_context(word, text) } elif re.search(rf'(manage|create|update|delete|validate)\s+{word_pattern}', text_lower): entities[word_clean] = { 'confidence': 0.8, 'source': 'pattern_analysis', 'context': self._extract_word_context(word, text) } return entities def _extract_word_context(self, word: str, text: str, context_size: int = 50) -> str: """Extract surrounding context for a word""" word_index = text.lower().find(word.lower()) if word_index == -1: return "" start = max(0, word_index - context_size) end = min(len(text), word_index + len(word) + context_size) return text[start:end] def _extract_fields_with_nlp(self, text: str, entities: Dict) -> Dict[str, List]: """Phase 2: NLP-based field extraction""" entity_fields = {} for entity_name in entities.keys(): fields = [] # Look for field mentions in relation to this entity entity_pattern = rf'\b{re.escape(entity_name)}\b' # Find sentences mentioning this entity sentences = re.split(r'[.!?]+', text) entity_sentences = [s for s in sentences if re.search(entity_pattern, s, re.IGNORECASE)] for sentence in entity_sentences: # Extract potential field names from sentence sentence_fields = self._extract_fields_from_sentence(sentence, entity_name) fields.extend(sentence_fields) entity_fields[entity_name] = fields return entity_fields def _extract_fields_from_sentence(self, sentence: str, entity_name: str) -> List[Dict]: """Extract field information from a sentence""" fields = [] sentence_lower = sentence.lower() # Look for field patterns in parentheses like "personal information (name, DOB, contact details)" parentheses_content = re.findall(r'\(([^)]+)\)', sentence) for content in parentheses_content: field_names = [name.strip() for name in content.split(',')] for field_name in field_names: if field_name: field_config = self._infer_field_type_from_name_and_context(field_name, sentence) fields.append({ 'name': self._normalize_field_name(field_name), 'config': field_config, 'source': 'nlp_extraction', 'context': sentence }) # Look for validation patterns like "ensure unique", "validate format" if re.search(r'\bunique\b', sentence_lower): fields.append({ 'constraint': 'unique', 'applies_to': self._extract_field_from_validation_context(sentence), 'source': 'validation_pattern' }) if re.search(r'\brequired\b', sentence_lower): fields.append({ 'constraint': 'required', 'applies_to': self._extract_field_from_validation_context(sentence), 'source': 'validation_pattern' }) return fields def _infer_field_type_from_name_and_context(self, field_name: str, context: str) -> Dict: """Infer MongoDB field type from field name and context""" field_name_lower = field_name.lower() context_lower = context.lower() # Check against type inference patterns for pattern, mongo_type in self.field_type_mappings.items(): if re.search(pattern, field_name_lower) or re.search(pattern, context_lower): return self._create_field_config(mongo_type, field_name, context) # Default to String if no specific type detected return self._create_field_config('String', field_name, context) def _create_field_config(self, mongo_type: str, field_name: str, context: str) -> Dict: """Create MongoDB field configuration""" config = {'type': mongo_type} # Add validation based on context if re.search(r'\brequired\b', context.lower()): config['required'] = True if re.search(r'\bunique\b', context.lower()): config['unique'] = True if mongo_type == 'String': config['trim'] = True # Email detection if re.search(r'\bemail\b', field_name.lower()): config['lowercase'] = True config['match'] = '/^[^\s@]+@[^\s@]+\.[^\s@]+$/' if mongo_type == 'Date': if 'created' in field_name.lower() or 'updated' in field_name.lower(): config['default'] = 'Date.now' return config def _normalize_field_name(self, field_name: str) -> str: """Normalize field name to camelCase""" # Clean the field name clean_name = re.sub(r'[^a-zA-Z\s]', '', field_name) words = clean_name.split() if not words: return field_name # Convert to camelCase if len(words) == 1: return words[0].lower() return words[0].lower() + ''.join(word.capitalize() for word in words[1:]) def _extract_field_from_validation_context(self, sentence: str) -> str: """Extract field name from validation context""" # Simple extraction - look for the subject of validation words = sentence.split() for i, word in enumerate(words): if word.lower() in ['validate', 'ensure', 'check']: if i + 1 < len(words): return self._normalize_field_name(words[i + 1]) return "" def _enhance_with_ai_analysis(self, text: str, pattern_entities: Dict, nlp_fields: Dict) -> Dict: """Phase 3: AI-powered enhancement""" if not self.claude_client: logger.warning("AI not available, skipping AI enhancement") return {} try: prompt = f""" Analyze these business requirements and extract MongoDB schema information: Requirements Text: {text} Already identified entities: {list(pattern_entities.keys())} Already identified fields: {nlp_fields} Please provide additional insights: 1. Any missing entities that should be included? 2. What additional fields are needed for each entity? 3. What are the relationships between entities? 4. What validation rules should be applied? 5. What indexes would be needed for performance? Return your analysis as structured JSON with: {{ "additional_entities": ["entity1", "entity2"], "entity_fields": {{ "entity_name": {{ "field_name": {{"type": "String|Number|Date|Boolean|ObjectId", "required": true/false, "unique": true/false}} }} }}, "relationships": [ {{"from": "entity1", "to": "entity2", "type": "one_to_many|many_to_one|many_to_many"}} ], "business_validations": [ {{"field": "field_name", "validation": "description", "implementation": "mongoose_validation_code"}} ], "recommended_indexes": [ {{"collection": "entity_name", "index": {{"field": 1}}, "reason": "performance_reason"}} ] }} Focus on extracting information that's actually mentioned or implied in the requirements, not general assumptions. """ message = self.claude_client.messages.create( model="claude-3-5-sonnet-20241022", max_tokens=4000, temperature=0.1, messages=[{"role": "user", "content": prompt}] ) ai_response = message.content[0].text.strip() # Try to parse JSON response try: ai_analysis = json.loads(ai_response) logger.info("✅ AI analysis completed successfully") return ai_analysis except json.JSONDecodeError: logger.warning("AI response was not valid JSON, parsing manually") return self._parse_ai_response_manually(ai_response) except Exception as e: logger.error(f"AI analysis failed: {e}") return {} def _parse_ai_response_manually(self, response: str) -> Dict: """Fallback manual parsing of AI response""" # Simple extraction as fallback return { "additional_entities": [], "entity_fields": {}, "relationships": [], "business_validations": [], "recommended_indexes": [] } def _synthesize_analysis_results(self, pattern_entities: Dict, nlp_fields: Dict, ai_enhanced: Dict) -> Dict[str, Any]: """Phase 4: Synthesize all analysis results""" final_entities = {} # Combine all entity sources all_entities = set(pattern_entities.keys()) all_entities.update(ai_enhanced.get('additional_entities', [])) for entity_name in all_entities: entity_config = { 'fields': {}, 'relationships': [], 'indexes': [], 'validations': [] } # Add base fields that every entity needs entity_config['fields'].update(self._get_essential_fields()) # Add fields from NLP analysis if entity_name in nlp_fields: for field_info in nlp_fields[entity_name]: if 'name' in field_info and 'config' in field_info: entity_config['fields'][field_info['name']] = field_info['config'] # Add fields from AI analysis ai_entity_fields = ai_enhanced.get('entity_fields', {}).get(entity_name, {}) entity_config['fields'].update(ai_entity_fields) # Add relationships for rel in ai_enhanced.get('relationships', []): if rel.get('from') == entity_name or rel.get('to') == entity_name: entity_config['relationships'].append(rel) # Add indexes for idx in ai_enhanced.get('recommended_indexes', []): if idx.get('collection') == entity_name: entity_config['indexes'].append(idx) # Add validations for val in ai_enhanced.get('business_validations', []): if val.get('field') in entity_config['fields']: entity_config['validations'].append(val) final_entities[entity_name] = entity_config return final_entities def _get_essential_fields(self) -> Dict[str, Any]: """Get essential fields every MongoDB document needs""" return { "_id": {"type": "ObjectId", "required": True}, "createdAt": {"type": "Date", "default": "Date.now"}, "updatedAt": {"type": "Date", "default": "Date.now"}, "isActive": {"type": "Boolean", "default": True} } class DynamicMongoDBDesigner: """Truly dynamic MongoDB designer using hybrid analysis""" def __init__(self): self.analyzer = HybridRequirementsAnalyzer() self.database_type = "mongodb" logger.info("🍃 Dynamic MongoDB Designer with Hybrid Analysis initialized") def generate_mongodb_architecture(self, functional_requirements: Dict, business_context: Dict) -> Dict[str, Any]: """Generate MongoDB architecture through dynamic analysis""" try: logger.info("🏗️ Starting dynamic MongoDB architecture generation") # Analyze requirements to extract entities and fields entities_analysis = self.analyzer.analyze_requirements_for_entities(functional_requirements) # Generate MongoDB collections collections_design = self._generate_collections_from_analysis(entities_analysis) # Generate Mongoose schemas mongoose_schemas = self._generate_mongoose_schemas_from_analysis(entities_analysis) # Generate performance configuration performance_config = self._generate_performance_configuration(entities_analysis) # Generate connection and deployment config deployment_config = self._generate_deployment_configuration( functional_requirements.get('complexity_level', 'medium') ) architecture = { "database_type": "mongodb", "entities_analyzed": len(entities_analysis), "collections_design": collections_design, "mongoose_schemas": mongoose_schemas, "performance_indexes": performance_config.get('indexes', {}), "aggregation_pipelines": performance_config.get('aggregations', {}), "connection_configuration": deployment_config, "security_implementation": self._generate_security_config(entities_analysis), "backup_strategy": self._generate_backup_strategy(), "monitoring_setup": self._generate_monitoring_config(), "generated_at": datetime.utcnow().isoformat(), "analysis_method": "hybrid_nlp_ai_pattern", "requirements_coverage": self._calculate_requirements_coverage( functional_requirements, entities_analysis ) } logger.info("✅ Dynamic MongoDB architecture generation completed") return architecture except Exception as e: logger.error(f"❌ MongoDB architecture generation failed: {e}") raise async def design_architecture(self, context: Dict[str, Any]) -> Dict[str, Any]: """Router-compatible method that calls the main generation method""" try: logger.info("🍃 MongoDB Designer started via router") functional_requirements = context['functional_requirements'] business_context = context['business_context'] # Call the existing comprehensive method result = self.generate_mongodb_architecture(functional_requirements, business_context) # Format result for router compatibility return { "success": True, "architecture": result, "specialist": result, "database_type": "mongodb", "specialist_used": "DynamicMongoDBDesigner" } except Exception as e: logger.error(f"❌ MongoDB design_architecture failed: {e}") return { "success": False, "error": str(e), "architecture": self._get_fallback_architecture(), "specialist": "MongoDB", "database_type": "mongodb" } def _get_fallback_architecture(self) -> Dict[str, Any]: """Fallback architecture if main generation fails""" return { "database_type": "mongodb", "collections_design": { "users": {"description": "Basic user collection"}, "documents": {"description": "Generic document collection"} }, "mongoose_schemas": {}, "note": "Fallback MongoDB architecture - main analysis failed" } def _generate_collections_from_analysis(self, entities_analysis: Dict) -> Dict[str, Any]: """Generate MongoDB collections from analysis results""" collections = {} for entity_name, entity_config in entities_analysis.items(): collection_name = f"{entity_name}s" # Simple pluralization collections[collection_name] = { "description": f"Collection for {entity_name} entities", "fields": entity_config.get('fields', {}), "relationships": entity_config.get('relationships', []), "business_validations": entity_config.get('validations', []) } return collections def _generate_mongoose_schemas_from_analysis(self, entities_analysis: Dict) -> Dict[str, str]: """Generate actual Mongoose schema code from analysis""" schemas = {} for entity_name, entity_config in entities_analysis.items(): schema_name = entity_name.capitalize() schema_code = self._build_mongoose_schema_code( schema_name, entity_config.get('fields', {}), entity_config.get('validations', []) ) schemas[f"{schema_name}Schema"] = schema_code return schemas def _build_mongoose_schema_code(self, schema_name: str, fields: Dict, validations: List) -> str: """Build actual Mongoose schema code""" schema_code = f"""const mongoose = require('mongoose'); const {schema_name}Schema = new mongoose.Schema({{ """ # Generate field definitions for field_name, field_config in fields.items(): schema_code += self._generate_mongoose_field_definition(field_name, field_config) schema_code += "}, {\n timestamps: true,\n versionKey: false\n});\n\n" # Add business validation middleware if validations: schema_code += self._generate_validation_middleware(schema_name, validations) # Add common methods schema_code += self._generate_schema_methods(schema_name) schema_code += f"\nmodule.exports = mongoose.model('{schema_name}', {schema_name}Schema);\n" return schema_code def _generate_mongoose_field_definition(self, field_name: str, field_config: Dict) -> str: """Generate Mongoose field definition""" field_def = f" {field_name}: {{\n" for key, value in field_config.items(): if key == "type": if value == "ObjectId": field_def += " type: mongoose.Schema.Types.ObjectId,\n" elif value == "Mixed": field_def += " type: mongoose.Schema.Types.Mixed,\n" else: field_def += f" type: {value},\n" elif key == "default": if value == "Date.now": field_def += " default: Date.now,\n" elif isinstance(value, str): field_def += f" default: '{value}',\n" else: field_def += f" default: {value},\n" elif key == "match": field_def += f" match: {value},\n" else: field_def += f" {key}: {value},\n" field_def += " },\n" return field_def def _generate_validation_middleware(self, schema_name: str, validations: List) -> str: """Generate business validation middleware""" middleware = f""" // Business validation middleware for {schema_name} {schema_name}Schema.pre('save', function(next) {{ // Business logic validations """ for validation in validations: middleware += f" // {validation.get('validation', '')}\n" if validation.get('implementation'): middleware += f" {validation['implementation']}\n" middleware += " next();\n});\n" return middleware def _generate_schema_methods(self, schema_name: str) -> str: """Generate common schema methods""" return f""" // Instance methods {schema_name}Schema.methods.toSafeObject = function() {{ const obj = this.toObject(); delete obj.password; delete obj.__v; return obj; }}; // Static methods {schema_name}Schema.statics.findActive = function() {{ return this.find({{ isActive: true }}); }}; """ def _generate_performance_configuration(self, entities_analysis: Dict) -> Dict[str, Any]: """Generate performance configuration from analysis""" config = { "indexes": {}, "aggregations": {} } for entity_name, entity_config in entities_analysis.items(): # Add indexes from analysis entity_indexes = entity_config.get('indexes', []) if entity_indexes: config["indexes"][f"{entity_name}s"] = entity_indexes # Generate basic aggregation pipelines config["aggregations"][f"{entity_name}Stats"] = [ {"$group": {"_id": "$status", "count": {"$sum": 1}}}, {"$sort": {"count": -1}} ] return config def _generate_deployment_configuration(self, complexity_level: str) -> Dict[str, Any]: """Generate deployment configuration""" return { "database_url": "mongodb://localhost:27017/{{database_name}}", "connection_options": { "useNewUrlParser": True, "useUnifiedTopology": True, "maxPoolSize": 20 if complexity_level == "high" else 10 }, "environment_variables": { "MONGODB_URI": "MongoDB connection string", "DB_NAME": "Database name" } } def _generate_security_config(self, entities_analysis: Dict) -> Dict[str, Any]: """Generate security configuration""" return { "authentication": { "enabled": True, "mechanism": "SCRAM-SHA-256" }, "encryption": { "at_rest": True, "in_transit": True } } def _generate_backup_strategy(self) -> Dict[str, Any]: """Generate backup strategy""" return { "method": "mongodump", "frequency": "daily", "retention": "30 days" } def _generate_monitoring_config(self) -> Dict[str, Any]: """Generate monitoring configuration""" return { "performance_monitoring": { "slow_query_threshold": "100ms", "profiling_level": 1 } } def _calculate_requirements_coverage(self, functional_requirements: Dict, entities_analysis: Dict) -> Dict[str, Any]: """Calculate how well the analysis covered the requirements""" total_requirements = ( len(functional_requirements.get('technical_requirements', [])) + len(functional_requirements.get('business_logic_rules', [])) ) entities_count = len(entities_analysis) total_fields = sum(len(entity.get('fields', {})) for entity in entities_analysis.values()) return { "total_requirements_analyzed": total_requirements, "entities_extracted": entities_count, "total_fields_generated": total_fields, "coverage_estimation": min(95, (entities_count * 20) + (total_fields * 2)), "analysis_confidence": "high" if total_requirements > 5 else "medium" }