CP_Assessment_engine/services/simulator.py
2026-02-10 12:59:40 +05:30

324 lines
14 KiB
Python

"""
Simulation Engine v2.0 - World Class Precision
Enhanced with Big5 + behavioral profile prompts.
"""
import json
import time
from typing import Dict, List, Any
from anthropic import Anthropic
import sys
from pathlib import Path
# Add parent dir
sys.path.append(str(Path(__file__).resolve().parent.parent))
try:
import config
except ImportError:
# Fallback for some linter environments
import sys
sys.path.append("..")
import config
class SimulationEngine:
def __init__(self, api_key: str):
self.client = Anthropic(api_key=api_key)
self.max_retries = 5
def construct_system_prompt(self, persona: Dict) -> str:
"""
Builds enhanced System Prompt using Big5 + behavioral profiles.
Uses all 23 personification columns from merged_personas.xlsx.
"""
# Demographics
first_name = persona.get('First Name', 'Student')
last_name = persona.get('Last Name', '')
age = persona.get('Age', 16)
gender = persona.get('Gender', 'Unknown')
age_category = persona.get('Age Category', 'adolescent')
# Big 5 Personality Traits
openness = persona.get('Openness Score', 5)
openness_traits = persona.get('Openness Traits', '')
openness_narrative = persona.get('Openness Narrative', '')
conscientiousness = persona.get('Conscientiousness Score', 5)
conscientiousness_traits = persona.get('Conscientiousness Traits', '')
conscientiousness_narrative = persona.get('Conscientiousness Narrative', '')
extraversion = persona.get('Extraversion Score', 5)
extraversion_traits = persona.get('Extraversion Traits', '')
extraversion_narrative = persona.get('Extraversion Narrative', '')
agreeableness = persona.get('Agreeableness Score', 5)
agreeableness_traits = persona.get('Agreeableness Traits', '')
agreeableness_narrative = persona.get('Agreeableness Narrative', '')
neuroticism = persona.get('Neuroticism Score', 5)
neuroticism_traits = persona.get('Neuroticism Traits', '')
neuroticism_narrative = persona.get('Neuroticism Narrative', '')
# Behavioral Profiles
cognitive_style = persona.get('Cognitive Style', '')
learning_prefs = persona.get('Learning Preferences', '')
ei_profile = persona.get('Emotional Intelligence Profile', '')
social_patterns = persona.get('Social Patterns', '')
stress_response = persona.get('Stress Response Pattern', '')
motivation = persona.get('Motivation Drivers', '')
academic_behavior = persona.get('Academic Behavioral Indicators', '')
psych_notes = persona.get('Psychometric Notes', '')
# Behavioral fingerprint (optional from fixed_3k_personas, parsed as JSON)
behavioral_fp = persona.get('behavioral_fingerprint', {})
if isinstance(behavioral_fp, str):
try:
behavioral_fp = json.loads(behavioral_fp)
except:
behavioral_fp = {}
fp_text = "\n".join([f"- {k}: {v}" for k, v in behavioral_fp.items()]) if behavioral_fp else "Not available"
# Goals & Interests (from fixed_3k_personas - backward compatible)
short_term_focuses = [persona.get('short_term_focus_1', ''), persona.get('short_term_focus_2', ''), persona.get('short_term_focus_3', '')]
long_term_focuses = [persona.get('long_term_focus_1', ''), persona.get('long_term_focus_2', ''), persona.get('long_term_focus_3', '')]
strengths = [persona.get('strength_1', ''), persona.get('strength_2', ''), persona.get('strength_3', '')]
improvements = [persona.get('improvement_area_1', ''), persona.get('improvement_area_2', ''), persona.get('improvement_area_3', '')]
hobbies = [persona.get('hobby_1', ''), persona.get('hobby_2', ''), persona.get('hobby_3', '')]
clubs = persona.get('clubs', '')
achievements = persona.get('achievements', '')
expectations = [persona.get('expectation_1', ''), persona.get('expectation_2', ''), persona.get('expectation_3', '')]
segment = persona.get('segment', '')
archetype = persona.get('archetype', '')
# Filter out empty values for cleaner presentation
short_term_str = ", ".join([f for f in short_term_focuses if f])
long_term_str = ", ".join([f for f in long_term_focuses if f])
strengths_str = ", ".join([s for s in strengths if s])
improvements_str = ", ".join([i for i in improvements if i])
hobbies_str = ", ".join([h for h in hobbies if h])
expectations_str = ", ".join([e for e in expectations if e])
# Build Goals & Interests section (only if data exists)
goals_section = ""
if short_term_str or long_term_str or strengths_str or improvements_str or hobbies_str or clubs or achievements or expectations_str or segment or archetype:
goals_section = "\n## Your Goals & Interests:\n"
if short_term_str:
goals_section += f"- Short-term Focus: {short_term_str}\n"
if long_term_str:
goals_section += f"- Long-term Goals: {long_term_str}\n"
if strengths_str:
goals_section += f"- Strengths: {strengths_str}\n"
if improvements_str:
goals_section += f"- Areas for Improvement: {improvements_str}\n"
if hobbies_str:
goals_section += f"- Hobbies: {hobbies_str}\n"
if clubs:
goals_section += f"- Clubs/Activities: {clubs}\n"
if achievements:
goals_section += f"- Achievements: {achievements}\n"
if expectations_str:
goals_section += f"- Expectations: {expectations_str}\n"
if segment:
goals_section += f"- Segment: {segment}\n"
if archetype:
goals_section += f"- Archetype: {archetype}\n"
return f"""You are {first_name} {last_name}, a {age}-year-old {gender} student ({age_category}).
## Your Personality Profile (Big Five):
### Openness ({openness}/10)
Traits: {openness_traits}
{openness_narrative}
### Conscientiousness ({conscientiousness}/10)
Traits: {conscientiousness_traits}
{conscientiousness_narrative}
### Extraversion ({extraversion}/10)
Traits: {extraversion_traits}
{extraversion_narrative}
### Agreeableness ({agreeableness}/10)
Traits: {agreeableness_traits}
{agreeableness_narrative}
### Neuroticism ({neuroticism}/10)
Traits: {neuroticism_traits}
{neuroticism_narrative}
## Your Behavioral Profile:
- Cognitive Style: {cognitive_style}
- Learning Preferences: {learning_prefs}
- Emotional Intelligence: {ei_profile}
- Social Patterns: {social_patterns}
- Stress Response: {stress_response}
- Motivation: {motivation}
- Academic Behavior: {academic_behavior}
{goals_section}## Additional Context:
{psych_notes}
## Behavioral Fingerprint:
{fp_text}
## TASK:
You are taking a psychological assessment survey. Answer each question HONESTLY based on your personality profile above.
- Choose the Likert scale option (1-5) that best represents how YOU would genuinely respond.
- Be CONSISTENT with your personality scores (e.g., if you have high Neuroticism, reflect that anxiety in your responses).
- Do NOT game the system or pick "socially desirable" answers. Answer as the REAL you.
"""
def construct_user_prompt(self, questions: List[Dict[str, Any]]) -> str:
"""
Builds the User Prompt containing questions with Q-codes.
"""
prompt_lines = ["Answer the following questions. Return ONLY a valid JSON object mapping Q-Code to your selected option (1-5).\n"]
for idx, q in enumerate(questions):
q_code = q.get('q_code', f"Q{idx}")
question_text = q.get('question', '')
options = q.get('options_list', []).copy()
prompt_lines.append(f"[{q_code}]: {question_text}")
for opt_idx, opt in enumerate(options):
prompt_lines.append(f" {opt_idx + 1}. {opt}")
prompt_lines.append("")
prompt_lines.append("## OUTPUT FORMAT (JSON):")
prompt_lines.append("{")
prompt_lines.append(' "P.1.1.1": 3,')
prompt_lines.append(' "P.1.1.2": 5,')
prompt_lines.append(" ...")
prompt_lines.append("}")
prompt_lines.append("\nIMPORTANT: Return ONLY the JSON object. No explanation, no preamble, just the JSON.")
return "\n".join(prompt_lines)
def simulate_batch(self, persona: Dict, questions: List[Dict], verbose: bool = False) -> Dict:
"""
Synchronous LLM call to simulate student responses.
Returns: { "Q-CODE": selected_index (1-5) }
"""
system_prompt = self.construct_system_prompt(persona)
user_prompt = self.construct_user_prompt(questions)
if verbose:
print(f"\n--- SYSTEM PROMPT ---\n{system_prompt[:500]}...")
print(f"\n--- USER PROMPT (first 500 chars) ---\n{user_prompt[:500]}...")
for attempt in range(self.max_retries):
try:
# Use the stable version-pinned model
response = self.client.messages.create(
model=config.LLM_MODEL,
max_tokens=config.LLM_MAX_TOKENS,
temperature=config.LLM_TEMPERATURE,
system=system_prompt,
messages=[{"role": "user", "content": user_prompt}]
)
# Extract text
text = response.content[0].text.strip()
# Robust JSON Extraction (handles markdown blocks and noise)
json_str = ""
# Try to find content between ```json and ```
if "```json" in text:
start_index = text.find("```json") + 7
end_index = text.find("```", start_index)
json_str = text[start_index:end_index].strip()
elif "```" in text:
# Generic code block
start_index = text.find("```") + 3
end_index = text.find("```", start_index)
json_str = text[start_index:end_index].strip()
else:
# Fallback to finding first { and last }
start = text.find('{')
end = text.rfind('}') + 1
if start != -1:
json_str = text[start:end]
if not json_str:
if verbose:
print(f" ⚠️ No JSON block found in attempt {attempt+1}. Text snippet: {text[:200]}")
raise ValueError("No JSON found")
try:
result = json.loads(json_str)
except json.JSONDecodeError as je:
if verbose:
print(f" ⚠️ JSON Decode Error in attempt {attempt+1}: {je}")
print(f" 🔍 Raw JSON string (first 100 chars): {json_str[:100]}")
raise je
# Validate all values are 1-5
validated: Dict[str, Any] = {}
passed: int = 0
for q_code, value in result.items():
try:
# Some models might return strings or floats
val: int = int(float(value)) if isinstance(value, (int, float, str)) else 0
if 1 <= val <= 5:
validated[str(q_code)] = val
passed = int(passed + 1)
except:
pass
if verbose:
print(f" ✅ Validated {passed}/{len(questions)} keys from LLM response (Attempt {attempt+1})")
# Success - return results
return validated
except Exception as e:
# Specific check for Credit Balance exhaustion
error_msg = str(e).lower()
if "credit balance" in error_msg or "insufficient_funds" in error_msg:
print("\n" + "!"*80)
print("🛑 CRITICAL: YOUR ANTHROPIC CREDIT BALANCE IS EXHAUSTED.")
print("👉 REASON: The simulation has stopped to prevent data loss.")
print("👉 ACTION: Please top up credits at: https://console.anthropic.com/settings/plans")
print("!"*80 + "\n")
# Terminate the script gracefully - no point in retrying
sys.exit(1)
# Wait longer each time
wait_time = (attempt + 1) * 2
print(f" ⚠️ Simulation Attempt {attempt+1} failed ({type(e).__name__}): {e}. Retrying in {wait_time}s...")
time.sleep(wait_time)
if verbose:
print(f" ❌ CRITICAL: Chunk simulation failed after {self.max_retries} attempts.")
return {}
if __name__ == "__main__":
# Test with one student
from data_loader import load_personas, load_questions
print("🧪 Testing Enhanced Simulator v2.0...")
adolescents, adults = load_personas()
questions_map = load_questions()
if not config.ANTHROPIC_API_KEY:
print("❌ No API Key found in environment. Set ANTHROPIC_API_KEY.")
exit(1)
# Pick first adolescent
student = adolescents[0]
print(f"\n👤 Student: {student.get('First Name')} {student.get('Last Name')}")
print(f" CPID: {student.get('StudentCPID')}")
print(f" Openness: {student.get('Openness Score')}")
# Pick first domain's first 5 questions
domain = list(questions_map.keys())[0]
questions = questions_map[domain][:5]
print(f"\n📝 Testing {domain} with {len(questions)} questions")
engine = SimulationEngine(config.ANTHROPIC_API_KEY)
result = engine.simulate_batch(student, questions, verbose=True)
print(f"\n✅ Result: {json.dumps(result, indent=2)}")