#!/usr/bin/env python3 """ Synthetic Data Generator Generates synthetic data for profile completion based on student information. This matches the format provided by the user for creating students. """ import sys import json from pathlib import Path from datetime import datetime import pandas as pd project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) class SyntheticDataGenerator: """Generate synthetic data for student profiles""" def __init__(self): """Initialize generator""" self.focus_areas = [ '01. Academics', '02. Family', '03. Health', '04. Friendship', '05. Emotional management', '06. Personal Growth', '07. Hobbies', '08. Physical Activities', '09. Future Aspiration', '10. Others' ] self.strengths = [ '1. Quick Learning', '2. Curiosity', '3. Problem Solving', '4. Justice', '5. Empathy', '6. Risk Taking', '7. Compassion', '8. Creative', '9. Technical', '10. Leadership', '11. Communication', '12. Athletic', '13. Languages', '14. Research', '15. Critical Thinking', '16. Artistic', '17. Others' ] self.hobbies = [ '1. Reading', '2. Musical', '3. Sports', '4. Arts & Crafts', '5. Cooking', '6. Gardening', '7. Gaming', '8. Traveling', '9. Volunteering', '10. Learning', '11. Singing', '12. Other' ] self.clubs = [ '1. Science', '2. Mathematics', '3. Quiz', '4. Literary', '5. Robotics', '6. Art', '7. Music', '8. Dramatics', '9. Sports', '10. Community', '11. MUN', '12. Other' ] self.expectations = [ '1. Self-Understanding: Gain deeper insights into their personality, strengths, and areas for growth.', '2. Career Guidance: Clear recommendations on suitable career paths or college majors based on their interests and abilities, backed by scientific tool.', '3. Academic Support: Help in identifying their learning styles, study habits, or cognitive strengths to improve performance.', '4. Validation / Reassurance: Confirmation of their self-perceptions or reassurance that they\'re "on the right path."', '5. Improved Decision-Making: Help making informed choices', '6. Clarity About Strengths and Weaknesses: Hope to learn what I\'m naturally good at and what skills I may need to develop.', '7. Personal Growth: To help them build confidence, motivation, or emotional intelligence.', '8. Objective Feedback: Want an unbiased, science-based perspective rather than subjective opinions from others.', '9. Actionable Next Steps: Concrete advice or recommendations they can follow after the assessment.', '10. Others' ] def generate_for_student(self, student_data): """ Generate synthetic data for a student Args: student_data: Dictionary with student information Returns: dict: Synthetic data for profile completion """ synthetic = { # Step 5: Focus Areas (Pick 3 each) 'short_term_focus_areas': self._pick_random(self.focus_areas, 3), 'long_term_focus_areas': self._pick_random(self.focus_areas, 3), # Step 6: Self-Assessment (Pick 3 each) 'strengths': self._pick_random(self.strengths, 3), 'areas_of_improvement': self._pick_random(self.strengths, 3), # Step 7: Hobbies & Clubs (Pick 3 hobbies, multiple clubs) 'hobbies_interests': self._pick_random(self.hobbies, 3), 'clubs_or_teams': self._pick_random(self.clubs, min(4, len(self.clubs))), # Step 8: Achievements (Text areas) 'achievements_academics': self._generate_achievement_text('academic'), 'achievements_sports': self._generate_achievement_text('sports'), 'achievements_cultural': self._generate_achievement_text('cultural'), 'achievements_others': self._generate_achievement_text('other'), # Step 9: Expectations (Pick 3) 'expectations': self._pick_random(self.expectations, 3), # Additional fields if needed 'specially_abled': False, 'specially_abled_details': '', 'short_term_focus_others_text': '', 'long_term_focus_others_text': '', 'strength_others_text': '', 'improvement_others_text': '', 'hobby_other_text': '', 'club_other_text': '', 'expectation_others_text': '' } return synthetic def _pick_random(self, options, count): """Pick random items from options""" import random return random.sample(options, min(count, len(options))) def _generate_achievement_text(self, category): """Generate achievement text based on category""" templates = { 'academic': [ "Merit scholarship recipient for academic excellence", "Top 10% in class for three consecutive years", "Won inter-school science quiz competition", "Published research paper in school journal" ], 'sports': [ "Represented school in district level basketball tournament", "Won gold medal in inter-school athletics meet", "Captain of school cricket team", "Participated in state level swimming championship" ], 'cultural': [ "Won first prize in school drama competition", "Participated in inter-school music festival", "Organized annual cultural day event", "Won best performer award in dance competition" ], 'other': [ "Active member of community service club", "Volunteered for environmental awareness campaign", "Organized charity fundraiser event", "Received appreciation for leadership skills" ] } import random return random.choice(templates.get(category, templates['other'])) def generate_batch(self, students_data): """ Generate synthetic data for multiple students Args: students_data: List of student dictionaries Returns: list: List of synthetic data dictionaries """ results = [] for student in students_data: synthetic = self.generate_for_student(student) synthetic['student_cpid'] = student.get('Student CPID', '') synthetic['student_name'] = f"{student.get('First Name', '')} {student.get('Last Name', '')}" results.append(synthetic) return results def save_to_excel(self, synthetic_data, output_path): """ Save synthetic data to Excel file Args: synthetic_data: List of synthetic data dictionaries output_path: Path to save Excel file """ # Convert to DataFrame df = pd.DataFrame(synthetic_data) # Save to Excel df.to_excel(output_path, index=False) print(f"āœ… Saved synthetic data to {output_path}") def save_to_json(self, synthetic_data, output_path): """ Save synthetic data to JSON file Args: synthetic_data: List of synthetic data dictionaries output_path: Path to save JSON file """ with open(output_path, 'w') as f: json.dump(synthetic_data, f, indent=2) print(f"āœ… Saved synthetic data to {output_path}") def main(): """Main entry point""" import argparse parser = argparse.ArgumentParser(description='Generate synthetic data for students') parser.add_argument('--students', type=str, required=True, help='Path to Excel file with student data') parser.add_argument('--output', type=str, default=None, help='Output file path (default: synthetic_data_.xlsx)') parser.add_argument('--format', choices=['excel', 'json'], default='excel', help='Output format') args = parser.parse_args() # Load students students_df = pd.read_excel(args.students) students_data = students_df.to_dict('records') print(f"āœ… Loaded {len(students_data)} students") # Generate synthetic data generator = SyntheticDataGenerator() synthetic_data = generator.generate_batch(students_data) # Save output if args.output: output_path = Path(args.output) else: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_path = Path(__file__).parent.parent / "test_data" / f"synthetic_data_{timestamp}.{args.format}" output_path.parent.mkdir(parents=True, exist_ok=True) if args.format == 'excel': generator.save_to_excel(synthetic_data, output_path) else: generator.save_to_json(synthetic_data, output_path) print(f"\nāœ… Generated synthetic data for {len(synthetic_data)} students") if __name__ == "__main__": main()