324 lines
14 KiB
Python
324 lines
14 KiB
Python
"""
|
|
Simulation Engine v2.0 - World Class Precision
|
|
Enhanced with Big5 + behavioral profile prompts.
|
|
"""
|
|
import json
|
|
import time
|
|
from typing import Dict, List, Any
|
|
from anthropic import Anthropic
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add parent dir
|
|
sys.path.append(str(Path(__file__).resolve().parent.parent))
|
|
try:
|
|
import config
|
|
except ImportError:
|
|
# Fallback for some linter environments
|
|
import sys
|
|
sys.path.append("..")
|
|
import config
|
|
|
|
|
|
class SimulationEngine:
|
|
def __init__(self, api_key: str):
|
|
self.client = Anthropic(api_key=api_key)
|
|
self.max_retries = 5
|
|
|
|
def construct_system_prompt(self, persona: Dict) -> str:
|
|
"""
|
|
Builds enhanced System Prompt using Big5 + behavioral profiles.
|
|
Uses all 23 personification columns from merged_personas.xlsx.
|
|
"""
|
|
# Demographics
|
|
first_name = persona.get('First Name', 'Student')
|
|
last_name = persona.get('Last Name', '')
|
|
age = persona.get('Age', 16)
|
|
gender = persona.get('Gender', 'Unknown')
|
|
age_category = persona.get('Age Category', 'adolescent')
|
|
|
|
# Big 5 Personality Traits
|
|
openness = persona.get('Openness Score', 5)
|
|
openness_traits = persona.get('Openness Traits', '')
|
|
openness_narrative = persona.get('Openness Narrative', '')
|
|
|
|
conscientiousness = persona.get('Conscientiousness Score', 5)
|
|
conscientiousness_traits = persona.get('Conscientiousness Traits', '')
|
|
conscientiousness_narrative = persona.get('Conscientiousness Narrative', '')
|
|
|
|
extraversion = persona.get('Extraversion Score', 5)
|
|
extraversion_traits = persona.get('Extraversion Traits', '')
|
|
extraversion_narrative = persona.get('Extraversion Narrative', '')
|
|
|
|
agreeableness = persona.get('Agreeableness Score', 5)
|
|
agreeableness_traits = persona.get('Agreeableness Traits', '')
|
|
agreeableness_narrative = persona.get('Agreeableness Narrative', '')
|
|
|
|
neuroticism = persona.get('Neuroticism Score', 5)
|
|
neuroticism_traits = persona.get('Neuroticism Traits', '')
|
|
neuroticism_narrative = persona.get('Neuroticism Narrative', '')
|
|
|
|
# Behavioral Profiles
|
|
cognitive_style = persona.get('Cognitive Style', '')
|
|
learning_prefs = persona.get('Learning Preferences', '')
|
|
ei_profile = persona.get('Emotional Intelligence Profile', '')
|
|
social_patterns = persona.get('Social Patterns', '')
|
|
stress_response = persona.get('Stress Response Pattern', '')
|
|
motivation = persona.get('Motivation Drivers', '')
|
|
academic_behavior = persona.get('Academic Behavioral Indicators', '')
|
|
psych_notes = persona.get('Psychometric Notes', '')
|
|
|
|
# Behavioral fingerprint (optional from fixed_3k_personas, parsed as JSON)
|
|
behavioral_fp = persona.get('behavioral_fingerprint', {})
|
|
if isinstance(behavioral_fp, str):
|
|
try:
|
|
behavioral_fp = json.loads(behavioral_fp)
|
|
except:
|
|
behavioral_fp = {}
|
|
|
|
fp_text = "\n".join([f"- {k}: {v}" for k, v in behavioral_fp.items()]) if behavioral_fp else "Not available"
|
|
|
|
# Goals & Interests (from fixed_3k_personas - backward compatible)
|
|
short_term_focuses = [persona.get('short_term_focus_1', ''), persona.get('short_term_focus_2', ''), persona.get('short_term_focus_3', '')]
|
|
long_term_focuses = [persona.get('long_term_focus_1', ''), persona.get('long_term_focus_2', ''), persona.get('long_term_focus_3', '')]
|
|
strengths = [persona.get('strength_1', ''), persona.get('strength_2', ''), persona.get('strength_3', '')]
|
|
improvements = [persona.get('improvement_area_1', ''), persona.get('improvement_area_2', ''), persona.get('improvement_area_3', '')]
|
|
hobbies = [persona.get('hobby_1', ''), persona.get('hobby_2', ''), persona.get('hobby_3', '')]
|
|
clubs = persona.get('clubs', '')
|
|
achievements = persona.get('achievements', '')
|
|
expectations = [persona.get('expectation_1', ''), persona.get('expectation_2', ''), persona.get('expectation_3', '')]
|
|
segment = persona.get('segment', '')
|
|
archetype = persona.get('archetype', '')
|
|
|
|
# Filter out empty values for cleaner presentation
|
|
short_term_str = ", ".join([f for f in short_term_focuses if f])
|
|
long_term_str = ", ".join([f for f in long_term_focuses if f])
|
|
strengths_str = ", ".join([s for s in strengths if s])
|
|
improvements_str = ", ".join([i for i in improvements if i])
|
|
hobbies_str = ", ".join([h for h in hobbies if h])
|
|
expectations_str = ", ".join([e for e in expectations if e])
|
|
|
|
# Build Goals & Interests section (only if data exists)
|
|
goals_section = ""
|
|
if short_term_str or long_term_str or strengths_str or improvements_str or hobbies_str or clubs or achievements or expectations_str or segment or archetype:
|
|
goals_section = "\n## Your Goals & Interests:\n"
|
|
if short_term_str:
|
|
goals_section += f"- Short-term Focus: {short_term_str}\n"
|
|
if long_term_str:
|
|
goals_section += f"- Long-term Goals: {long_term_str}\n"
|
|
if strengths_str:
|
|
goals_section += f"- Strengths: {strengths_str}\n"
|
|
if improvements_str:
|
|
goals_section += f"- Areas for Improvement: {improvements_str}\n"
|
|
if hobbies_str:
|
|
goals_section += f"- Hobbies: {hobbies_str}\n"
|
|
if clubs:
|
|
goals_section += f"- Clubs/Activities: {clubs}\n"
|
|
if achievements:
|
|
goals_section += f"- Achievements: {achievements}\n"
|
|
if expectations_str:
|
|
goals_section += f"- Expectations: {expectations_str}\n"
|
|
if segment:
|
|
goals_section += f"- Segment: {segment}\n"
|
|
if archetype:
|
|
goals_section += f"- Archetype: {archetype}\n"
|
|
|
|
return f"""You are {first_name} {last_name}, a {age}-year-old {gender} student ({age_category}).
|
|
|
|
## Your Personality Profile (Big Five):
|
|
|
|
### Openness ({openness}/10)
|
|
Traits: {openness_traits}
|
|
{openness_narrative}
|
|
|
|
### Conscientiousness ({conscientiousness}/10)
|
|
Traits: {conscientiousness_traits}
|
|
{conscientiousness_narrative}
|
|
|
|
### Extraversion ({extraversion}/10)
|
|
Traits: {extraversion_traits}
|
|
{extraversion_narrative}
|
|
|
|
### Agreeableness ({agreeableness}/10)
|
|
Traits: {agreeableness_traits}
|
|
{agreeableness_narrative}
|
|
|
|
### Neuroticism ({neuroticism}/10)
|
|
Traits: {neuroticism_traits}
|
|
{neuroticism_narrative}
|
|
|
|
## Your Behavioral Profile:
|
|
- Cognitive Style: {cognitive_style}
|
|
- Learning Preferences: {learning_prefs}
|
|
- Emotional Intelligence: {ei_profile}
|
|
- Social Patterns: {social_patterns}
|
|
- Stress Response: {stress_response}
|
|
- Motivation: {motivation}
|
|
- Academic Behavior: {academic_behavior}
|
|
{goals_section}## Additional Context:
|
|
{psych_notes}
|
|
|
|
## Behavioral Fingerprint:
|
|
{fp_text}
|
|
|
|
## TASK:
|
|
You are taking a psychological assessment survey. Answer each question HONESTLY based on your personality profile above.
|
|
- Choose the Likert scale option (1-5) that best represents how YOU would genuinely respond.
|
|
- Be CONSISTENT with your personality scores (e.g., if you have high Neuroticism, reflect that anxiety in your responses).
|
|
- Do NOT game the system or pick "socially desirable" answers. Answer as the REAL you.
|
|
"""
|
|
|
|
def construct_user_prompt(self, questions: List[Dict[str, Any]]) -> str:
|
|
"""
|
|
Builds the User Prompt containing questions with Q-codes.
|
|
"""
|
|
prompt_lines = ["Answer the following questions. Return ONLY a valid JSON object mapping Q-Code to your selected option (1-5).\n"]
|
|
|
|
for idx, q in enumerate(questions):
|
|
q_code = q.get('q_code', f"Q{idx}")
|
|
question_text = q.get('question', '')
|
|
options = q.get('options_list', []).copy()
|
|
|
|
prompt_lines.append(f"[{q_code}]: {question_text}")
|
|
for opt_idx, opt in enumerate(options):
|
|
prompt_lines.append(f" {opt_idx + 1}. {opt}")
|
|
prompt_lines.append("")
|
|
|
|
prompt_lines.append("## OUTPUT FORMAT (JSON):")
|
|
prompt_lines.append("{")
|
|
prompt_lines.append(' "P.1.1.1": 3,')
|
|
prompt_lines.append(' "P.1.1.2": 5,')
|
|
prompt_lines.append(" ...")
|
|
prompt_lines.append("}")
|
|
prompt_lines.append("\nIMPORTANT: Return ONLY the JSON object. No explanation, no preamble, just the JSON.")
|
|
|
|
return "\n".join(prompt_lines)
|
|
|
|
def simulate_batch(self, persona: Dict, questions: List[Dict], verbose: bool = False) -> Dict:
|
|
"""
|
|
Synchronous LLM call to simulate student responses.
|
|
Returns: { "Q-CODE": selected_index (1-5) }
|
|
"""
|
|
system_prompt = self.construct_system_prompt(persona)
|
|
user_prompt = self.construct_user_prompt(questions)
|
|
|
|
if verbose:
|
|
print(f"\n--- SYSTEM PROMPT ---\n{system_prompt[:500]}...")
|
|
print(f"\n--- USER PROMPT (first 500 chars) ---\n{user_prompt[:500]}...")
|
|
|
|
for attempt in range(self.max_retries):
|
|
try:
|
|
# Use the stable version-pinned model
|
|
response = self.client.messages.create(
|
|
model=config.LLM_MODEL,
|
|
max_tokens=config.LLM_MAX_TOKENS,
|
|
temperature=config.LLM_TEMPERATURE,
|
|
system=system_prompt,
|
|
messages=[{"role": "user", "content": user_prompt}]
|
|
)
|
|
|
|
# Extract text
|
|
text = response.content[0].text.strip()
|
|
|
|
# Robust JSON Extraction (handles markdown blocks and noise)
|
|
json_str = ""
|
|
# Try to find content between ```json and ```
|
|
if "```json" in text:
|
|
start_index = text.find("```json") + 7
|
|
end_index = text.find("```", start_index)
|
|
json_str = text[start_index:end_index].strip()
|
|
elif "```" in text:
|
|
# Generic code block
|
|
start_index = text.find("```") + 3
|
|
end_index = text.find("```", start_index)
|
|
json_str = text[start_index:end_index].strip()
|
|
else:
|
|
# Fallback to finding first { and last }
|
|
start = text.find('{')
|
|
end = text.rfind('}') + 1
|
|
if start != -1:
|
|
json_str = text[start:end]
|
|
|
|
if not json_str:
|
|
if verbose:
|
|
print(f" ⚠️ No JSON block found in attempt {attempt+1}. Text snippet: {text[:200]}")
|
|
raise ValueError("No JSON found")
|
|
|
|
try:
|
|
result = json.loads(json_str)
|
|
except json.JSONDecodeError as je:
|
|
if verbose:
|
|
print(f" ⚠️ JSON Decode Error in attempt {attempt+1}: {je}")
|
|
print(f" 🔍 Raw JSON string (first 100 chars): {json_str[:100]}")
|
|
raise je
|
|
|
|
# Validate all values are 1-5
|
|
validated: Dict[str, Any] = {}
|
|
passed: int = 0
|
|
for q_code, value in result.items():
|
|
try:
|
|
# Some models might return strings or floats
|
|
val: int = int(float(value)) if isinstance(value, (int, float, str)) else 0
|
|
if 1 <= val <= 5:
|
|
validated[str(q_code)] = val
|
|
passed = int(passed + 1)
|
|
except:
|
|
pass
|
|
|
|
if verbose:
|
|
print(f" ✅ Validated {passed}/{len(questions)} keys from LLM response (Attempt {attempt+1})")
|
|
|
|
# Success - return results
|
|
return validated
|
|
|
|
except Exception as e:
|
|
# Specific check for Credit Balance exhaustion
|
|
error_msg = str(e).lower()
|
|
if "credit balance" in error_msg or "insufficient_funds" in error_msg:
|
|
print("\n" + "!"*80)
|
|
print("🛑 CRITICAL: YOUR ANTHROPIC CREDIT BALANCE IS EXHAUSTED.")
|
|
print("👉 REASON: The simulation has stopped to prevent data loss.")
|
|
print("👉 ACTION: Please top up credits at: https://console.anthropic.com/settings/plans")
|
|
print("!"*80 + "\n")
|
|
# Terminate the script gracefully - no point in retrying
|
|
sys.exit(1)
|
|
|
|
# Wait longer each time
|
|
wait_time = (attempt + 1) * 2
|
|
print(f" ⚠️ Simulation Attempt {attempt+1} failed ({type(e).__name__}): {e}. Retrying in {wait_time}s...")
|
|
time.sleep(wait_time)
|
|
|
|
if verbose:
|
|
print(f" ❌ CRITICAL: Chunk simulation failed after {self.max_retries} attempts.")
|
|
return {}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Test with one student
|
|
from data_loader import load_personas, load_questions
|
|
|
|
print("🧪 Testing Enhanced Simulator v2.0...")
|
|
|
|
adolescents, adults = load_personas()
|
|
questions_map = load_questions()
|
|
|
|
if not config.ANTHROPIC_API_KEY:
|
|
print("❌ No API Key found in environment. Set ANTHROPIC_API_KEY.")
|
|
exit(1)
|
|
|
|
# Pick first adolescent
|
|
student = adolescents[0]
|
|
print(f"\n👤 Student: {student.get('First Name')} {student.get('Last Name')}")
|
|
print(f" CPID: {student.get('StudentCPID')}")
|
|
print(f" Openness: {student.get('Openness Score')}")
|
|
|
|
# Pick first domain's first 5 questions
|
|
domain = list(questions_map.keys())[0]
|
|
questions = questions_map[domain][:5]
|
|
print(f"\n📝 Testing {domain} with {len(questions)} questions")
|
|
|
|
engine = SimulationEngine(config.ANTHROPIC_API_KEY)
|
|
result = engine.simulate_batch(student, questions, verbose=True)
|
|
|
|
print(f"\n✅ Result: {json.dumps(result, indent=2)}")
|