""" Simulation Engine v2.0 - World Class Precision Enhanced with Big5 + behavioral profile prompts. """ import json import time from typing import Dict, List, Any from anthropic import Anthropic import sys from pathlib import Path # Add parent dir sys.path.append(str(Path(__file__).resolve().parent.parent)) try: import config except ImportError: # Fallback for some linter environments import sys sys.path.append("..") import config class SimulationEngine: def __init__(self, api_key: str): self.client = Anthropic(api_key=api_key) self.max_retries = 5 def construct_system_prompt(self, persona: Dict) -> str: """ Builds enhanced System Prompt using Big5 + behavioral profiles. Uses all 23 personification columns from merged_personas.xlsx. """ # Demographics first_name = persona.get('First Name', 'Student') last_name = persona.get('Last Name', '') age = persona.get('Age', 16) gender = persona.get('Gender', 'Unknown') age_category = persona.get('Age Category', 'adolescent') # Big 5 Personality Traits openness = persona.get('Openness Score', 5) openness_traits = persona.get('Openness Traits', '') openness_narrative = persona.get('Openness Narrative', '') conscientiousness = persona.get('Conscientiousness Score', 5) conscientiousness_traits = persona.get('Conscientiousness Traits', '') conscientiousness_narrative = persona.get('Conscientiousness Narrative', '') extraversion = persona.get('Extraversion Score', 5) extraversion_traits = persona.get('Extraversion Traits', '') extraversion_narrative = persona.get('Extraversion Narrative', '') agreeableness = persona.get('Agreeableness Score', 5) agreeableness_traits = persona.get('Agreeableness Traits', '') agreeableness_narrative = persona.get('Agreeableness Narrative', '') neuroticism = persona.get('Neuroticism Score', 5) neuroticism_traits = persona.get('Neuroticism Traits', '') neuroticism_narrative = persona.get('Neuroticism Narrative', '') # Behavioral Profiles cognitive_style = persona.get('Cognitive Style', '') learning_prefs = persona.get('Learning Preferences', '') ei_profile = persona.get('Emotional Intelligence Profile', '') social_patterns = persona.get('Social Patterns', '') stress_response = persona.get('Stress Response Pattern', '') motivation = persona.get('Motivation Drivers', '') academic_behavior = persona.get('Academic Behavioral Indicators', '') psych_notes = persona.get('Psychometric Notes', '') # Behavioral fingerprint (optional from fixed_3k_personas, parsed as JSON) behavioral_fp = persona.get('behavioral_fingerprint', {}) if isinstance(behavioral_fp, str): try: behavioral_fp = json.loads(behavioral_fp) except: behavioral_fp = {} fp_text = "\n".join([f"- {k}: {v}" for k, v in behavioral_fp.items()]) if behavioral_fp else "Not available" # Goals & Interests (from fixed_3k_personas - backward compatible) short_term_focuses = [persona.get('short_term_focus_1', ''), persona.get('short_term_focus_2', ''), persona.get('short_term_focus_3', '')] long_term_focuses = [persona.get('long_term_focus_1', ''), persona.get('long_term_focus_2', ''), persona.get('long_term_focus_3', '')] strengths = [persona.get('strength_1', ''), persona.get('strength_2', ''), persona.get('strength_3', '')] improvements = [persona.get('improvement_area_1', ''), persona.get('improvement_area_2', ''), persona.get('improvement_area_3', '')] hobbies = [persona.get('hobby_1', ''), persona.get('hobby_2', ''), persona.get('hobby_3', '')] clubs = persona.get('clubs', '') achievements = persona.get('achievements', '') expectations = [persona.get('expectation_1', ''), persona.get('expectation_2', ''), persona.get('expectation_3', '')] segment = persona.get('segment', '') archetype = persona.get('archetype', '') # Filter out empty values for cleaner presentation short_term_str = ", ".join([f for f in short_term_focuses if f]) long_term_str = ", ".join([f for f in long_term_focuses if f]) strengths_str = ", ".join([s for s in strengths if s]) improvements_str = ", ".join([i for i in improvements if i]) hobbies_str = ", ".join([h for h in hobbies if h]) expectations_str = ", ".join([e for e in expectations if e]) # Build Goals & Interests section (only if data exists) goals_section = "" if short_term_str or long_term_str or strengths_str or improvements_str or hobbies_str or clubs or achievements or expectations_str or segment or archetype: goals_section = "\n## Your Goals & Interests:\n" if short_term_str: goals_section += f"- Short-term Focus: {short_term_str}\n" if long_term_str: goals_section += f"- Long-term Goals: {long_term_str}\n" if strengths_str: goals_section += f"- Strengths: {strengths_str}\n" if improvements_str: goals_section += f"- Areas for Improvement: {improvements_str}\n" if hobbies_str: goals_section += f"- Hobbies: {hobbies_str}\n" if clubs: goals_section += f"- Clubs/Activities: {clubs}\n" if achievements: goals_section += f"- Achievements: {achievements}\n" if expectations_str: goals_section += f"- Expectations: {expectations_str}\n" if segment: goals_section += f"- Segment: {segment}\n" if archetype: goals_section += f"- Archetype: {archetype}\n" return f"""You are {first_name} {last_name}, a {age}-year-old {gender} student ({age_category}). ## Your Personality Profile (Big Five): ### Openness ({openness}/10) Traits: {openness_traits} {openness_narrative} ### Conscientiousness ({conscientiousness}/10) Traits: {conscientiousness_traits} {conscientiousness_narrative} ### Extraversion ({extraversion}/10) Traits: {extraversion_traits} {extraversion_narrative} ### Agreeableness ({agreeableness}/10) Traits: {agreeableness_traits} {agreeableness_narrative} ### Neuroticism ({neuroticism}/10) Traits: {neuroticism_traits} {neuroticism_narrative} ## Your Behavioral Profile: - Cognitive Style: {cognitive_style} - Learning Preferences: {learning_prefs} - Emotional Intelligence: {ei_profile} - Social Patterns: {social_patterns} - Stress Response: {stress_response} - Motivation: {motivation} - Academic Behavior: {academic_behavior} {goals_section}## Additional Context: {psych_notes} ## Behavioral Fingerprint: {fp_text} ## TASK: You are taking a psychological assessment survey. Answer each question HONESTLY based on your personality profile above. - Choose the Likert scale option (1-5) that best represents how YOU would genuinely respond. - Be CONSISTENT with your personality scores (e.g., if you have high Neuroticism, reflect that anxiety in your responses). - Do NOT game the system or pick "socially desirable" answers. Answer as the REAL you. """ def construct_user_prompt(self, questions: List[Dict[str, Any]]) -> str: """ Builds the User Prompt containing questions with Q-codes. """ prompt_lines = ["Answer the following questions. Return ONLY a valid JSON object mapping Q-Code to your selected option (1-5).\n"] for idx, q in enumerate(questions): q_code = q.get('q_code', f"Q{idx}") question_text = q.get('question', '') options = q.get('options_list', []).copy() prompt_lines.append(f"[{q_code}]: {question_text}") for opt_idx, opt in enumerate(options): prompt_lines.append(f" {opt_idx + 1}. {opt}") prompt_lines.append("") prompt_lines.append("## OUTPUT FORMAT (JSON):") prompt_lines.append("{") prompt_lines.append(' "P.1.1.1": 3,') prompt_lines.append(' "P.1.1.2": 5,') prompt_lines.append(" ...") prompt_lines.append("}") prompt_lines.append("\nIMPORTANT: Return ONLY the JSON object. No explanation, no preamble, just the JSON.") return "\n".join(prompt_lines) def simulate_batch(self, persona: Dict, questions: List[Dict], verbose: bool = False) -> Dict: """ Synchronous LLM call to simulate student responses. Returns: { "Q-CODE": selected_index (1-5) } """ system_prompt = self.construct_system_prompt(persona) user_prompt = self.construct_user_prompt(questions) if verbose: print(f"\n--- SYSTEM PROMPT ---\n{system_prompt[:500]}...") print(f"\n--- USER PROMPT (first 500 chars) ---\n{user_prompt[:500]}...") for attempt in range(self.max_retries): try: # Use the stable version-pinned model response = self.client.messages.create( model=config.LLM_MODEL, max_tokens=config.LLM_MAX_TOKENS, temperature=config.LLM_TEMPERATURE, system=system_prompt, messages=[{"role": "user", "content": user_prompt}] ) # Extract text text = response.content[0].text.strip() # Robust JSON Extraction (handles markdown blocks and noise) json_str = "" # Try to find content between ```json and ``` if "```json" in text: start_index = text.find("```json") + 7 end_index = text.find("```", start_index) json_str = text[start_index:end_index].strip() elif "```" in text: # Generic code block start_index = text.find("```") + 3 end_index = text.find("```", start_index) json_str = text[start_index:end_index].strip() else: # Fallback to finding first { and last } start = text.find('{') end = text.rfind('}') + 1 if start != -1: json_str = text[start:end] if not json_str: if verbose: print(f" โš ๏ธ No JSON block found in attempt {attempt+1}. Text snippet: {text[:200]}") raise ValueError("No JSON found") try: result = json.loads(json_str) except json.JSONDecodeError as je: if verbose: print(f" โš ๏ธ JSON Decode Error in attempt {attempt+1}: {je}") print(f" ๐Ÿ” Raw JSON string (first 100 chars): {json_str[:100]}") raise je # Validate all values are 1-5 validated: Dict[str, Any] = {} passed: int = 0 for q_code, value in result.items(): try: # Some models might return strings or floats val: int = int(float(value)) if isinstance(value, (int, float, str)) else 0 if 1 <= val <= 5: validated[str(q_code)] = val passed = int(passed + 1) except: pass if verbose: print(f" โœ… Validated {passed}/{len(questions)} keys from LLM response (Attempt {attempt+1})") # Success - return results return validated except Exception as e: # Specific check for Credit Balance exhaustion error_msg = str(e).lower() if "credit balance" in error_msg or "insufficient_funds" in error_msg: print("\n" + "!"*80) print("๐Ÿ›‘ CRITICAL: YOUR ANTHROPIC CREDIT BALANCE IS EXHAUSTED.") print("๐Ÿ‘‰ REASON: The simulation has stopped to prevent data loss.") print("๐Ÿ‘‰ ACTION: Please top up credits at: https://console.anthropic.com/settings/plans") print("!"*80 + "\n") # Terminate the script gracefully - no point in retrying sys.exit(1) # Wait longer each time wait_time = (attempt + 1) * 2 print(f" โš ๏ธ Simulation Attempt {attempt+1} failed ({type(e).__name__}): {e}. Retrying in {wait_time}s...") time.sleep(wait_time) if verbose: print(f" โŒ CRITICAL: Chunk simulation failed after {self.max_retries} attempts.") return {} if __name__ == "__main__": # Test with one student from data_loader import load_personas, load_questions print("๐Ÿงช Testing Enhanced Simulator v2.0...") adolescents, adults = load_personas() questions_map = load_questions() if not config.ANTHROPIC_API_KEY: print("โŒ No API Key found in environment. Set ANTHROPIC_API_KEY.") exit(1) # Pick first adolescent student = adolescents[0] print(f"\n๐Ÿ‘ค Student: {student.get('First Name')} {student.get('Last Name')}") print(f" CPID: {student.get('StudentCPID')}") print(f" Openness: {student.get('Openness Score')}") # Pick first domain's first 5 questions domain = list(questions_map.keys())[0] questions = questions_map[domain][:5] print(f"\n๐Ÿ“ Testing {domain} with {len(questions)} questions") engine = SimulationEngine(config.ANTHROPIC_API_KEY) result = engine.simulate_batch(student, questions, verbose=True) print(f"\nโœ… Result: {json.dumps(result, indent=2)}")