import pandas as pd from pathlib import Path import sys import io # Force UTF-8 for output sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') # Add root to sys.path root = Path(__file__).resolve().parent.parent sys.path.append(str(root)) import config def audit_missing_only(): base_dir = Path(r'C:\work\CP_Automation\Simulated_Assessment_Engine\output\dry_run') expected_domains = [ 'Learning_Strategies_{age}.xlsx', 'Personality_{age}.xlsx', 'Emotional_Intelligence_{age}.xlsx', 'Vocational_Interest_{age}.xlsx', 'Grit_{age}.xlsx' ] cognition_tests = config.COGNITION_TESTS issues = [] for age_label, age_suffix in [('adolescense', '14-17'), ('adults', '18-23')]: # Survey domain_dir = base_dir / age_label / "5_domain" for d_tmpl in expected_domains: f_name = d_tmpl.format(age=age_suffix) f_path = domain_dir / f_name check_issue(f_path, age_label, "Survey", f_name, issues) # Cognition cog_dir = base_dir / age_label / "cognition" for c_test in cognition_tests: f_name = config.COGNITION_FILE_NAMES.get(c_test, f'{c_test}_{age_suffix}.xlsx').replace('{age}', age_suffix) f_path = cog_dir / f_name check_issue(f_path, age_label, "Cognition", c_test, issues) if not issues: print("🎉 NO ISSUES FOUND! 100% PERFECT.") else: print(f"❌ FOUND {len(issues)} ISSUES:") for iss in issues: print(f" - {iss}") def check_issue(path, age, category, name, issues): if not path.exists(): issues.append(f"{age} | {category} | {name}: MISSING") return try: df = pd.read_excel(path) if df.shape[0] == 0: issues.append(f"{age} | {category} | {name}: EMPTY ROWS") return # For Survey, check first row (one student) if category == "Survey": student_row = df.iloc[0] # Q-codes start after 'Participant' q_cols = [c for c in df.columns if c != 'Participant'] missing = student_row[q_cols].isna().sum() if missing > 0: issues.append(f"{age} | {category} | {name}: {missing}/{len(q_cols)} answers missing") # For Cognition, check first row else: student_row = df.iloc[0] if student_row.isna().sum() > 0: issues.append(f"{age} | {category} | {name}: contains NaNs") except Exception as e: issues.append(f"{age} | {category} | {name}: ERROR {e}") if __name__ == "__main__": audit_missing_only()