CP_Assessment_engine/scripts/verify_user_counts.py

import pandas as pd
from pathlib import Path
import json

def verify_counts():
    base_dir = Path(r'C:\work\CP_Automation\Simulated_Assessment_Engine\output\dry_run')
    expected = {
        'adolescense': {
            'Learning_Strategies_14-17.xlsx': 197,
            'Personality_14-17.xlsx': 130,
            'Emotional_Intelligence_14-17.xlsx': 125,
            'Vocational_Interest_14-17.xlsx': 120,
            'Grit_14-17.xlsx': 75
        },
        'adults': {
            'Learning_Strategies_18-23.xlsx': 198,
            'Personality_18-23.xlsx': 133,
            'Emotional_Intelligence_18-23.xlsx': 124,
            'Vocational_Interest_18-23.xlsx': 120,
            'Grit_18-23.xlsx': 75
        }
    }

    results = []
    print(f"{'Age Group':<15} | {'File Name':<35} | {'Expected Qs':<12} | {'Found Qs':<10} | {'Answered':<10} | {'Status'}")
    print("-" * 110)

    for age_group, files in expected.items():
        domain_dir = base_dir / age_group / "5_domain"
        for file_name, qs_expected in files.items():
            f_path = domain_dir / file_name
            if not f_path.exists():
                results.append(f"❌ {file_name}: MISSING")
                print(f"{age_group:<15} | {file_name:<35} | {qs_expected:<12} | {'MIS':<10} | {'MIS':<10} | ❌ MISSING")
                continue

            df = pd.read_excel(f_path)
            # Column count including Participant
            found_qs = len(df.columns) - 1
            # Check non-null answers in first row
            answered = df.iloc[0, 1:].notna().sum()

            status = "✅ PERFECT" if (found_qs == qs_expected and answered == qs_expected) else "⚠️ INCOMPLETE"
            if found_qs != qs_expected:
                status = "❌ SCHEMA MISMATCH"

            print(f"{age_group:<15} | {file_name:<35} | {qs_expected:<12} | {found_qs:<10} | {answered:<10} | {status}")

if __name__ == "__main__":
    verify_counts()