CP_Assessment_engine/scripts/verify_user_counts.py
2026-02-10 12:59:40 +05:30

51 lines
1.9 KiB
Python

import pandas as pd
from pathlib import Path
import json
def verify_counts():
base_dir = Path(r'C:\work\CP_Automation\Simulated_Assessment_Engine\output\dry_run')
expected = {
'adolescense': {
'Learning_Strategies_14-17.xlsx': 197,
'Personality_14-17.xlsx': 130,
'Emotional_Intelligence_14-17.xlsx': 125,
'Vocational_Interest_14-17.xlsx': 120,
'Grit_14-17.xlsx': 75
},
'adults': {
'Learning_Strategies_18-23.xlsx': 198,
'Personality_18-23.xlsx': 133,
'Emotional_Intelligence_18-23.xlsx': 124,
'Vocational_Interest_18-23.xlsx': 120,
'Grit_18-23.xlsx': 75
}
}
results = []
print(f"{'Age Group':<15} | {'File Name':<35} | {'Expected Qs':<12} | {'Found Qs':<10} | {'Answered':<10} | {'Status'}")
print("-" * 110)
for age_group, files in expected.items():
domain_dir = base_dir / age_group / "5_domain"
for file_name, qs_expected in files.items():
f_path = domain_dir / file_name
if not f_path.exists():
results.append(f"{file_name}: MISSING")
print(f"{age_group:<15} | {file_name:<35} | {qs_expected:<12} | {'MIS':<10} | {'MIS':<10} | ❌ MISSING")
continue
df = pd.read_excel(f_path)
# Column count including Participant
found_qs = len(df.columns) - 1
# Check non-null answers in first row
answered = df.iloc[0, 1:].notna().sum()
status = "✅ PERFECT" if (found_qs == qs_expected and answered == qs_expected) else "⚠️ INCOMPLETE"
if found_qs != qs_expected:
status = "❌ SCHEMA MISMATCH"
print(f"{age_group:<15} | {file_name:<35} | {qs_expected:<12} | {found_qs:<10} | {answered:<10} | {status}")
if __name__ == "__main__":
verify_counts()