import pandas as pd from pathlib import Path import json def verify_counts(): base_dir = Path(r'C:\work\CP_Automation\Simulated_Assessment_Engine\output\dry_run') expected = { 'adolescense': { 'Learning_Strategies_14-17.xlsx': 197, 'Personality_14-17.xlsx': 130, 'Emotional_Intelligence_14-17.xlsx': 125, 'Vocational_Interest_14-17.xlsx': 120, 'Grit_14-17.xlsx': 75 }, 'adults': { 'Learning_Strategies_18-23.xlsx': 198, 'Personality_18-23.xlsx': 133, 'Emotional_Intelligence_18-23.xlsx': 124, 'Vocational_Interest_18-23.xlsx': 120, 'Grit_18-23.xlsx': 75 } } results = [] print(f"{'Age Group':<15} | {'File Name':<35} | {'Expected Qs':<12} | {'Found Qs':<10} | {'Answered':<10} | {'Status'}") print("-" * 110) for age_group, files in expected.items(): domain_dir = base_dir / age_group / "5_domain" for file_name, qs_expected in files.items(): f_path = domain_dir / file_name if not f_path.exists(): results.append(f"❌ {file_name}: MISSING") print(f"{age_group:<15} | {file_name:<35} | {qs_expected:<12} | {'MIS':<10} | {'MIS':<10} | ❌ MISSING") continue df = pd.read_excel(f_path) # Column count including Participant found_qs = len(df.columns) - 1 # Check non-null answers in first row answered = df.iloc[0, 1:].notna().sum() status = "✅ PERFECT" if (found_qs == qs_expected and answered == qs_expected) else "⚠️ INCOMPLETE" if found_qs != qs_expected: status = "❌ SCHEMA MISMATCH" print(f"{age_group:<15} | {file_name:<35} | {qs_expected:<12} | {found_qs:<10} | {answered:<10} | {status}") if __name__ == "__main__": verify_counts()