CP_Assessment_engine/scripts/final_verification.py
2026-02-10 12:59:40 +05:30

134 lines
4.6 KiB
Python

"""
Final 100% Verification Report
"""
import pandas as pd
from pathlib import Path
import sys
import io
if sys.platform == 'win32':
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
BASE_DIR = Path(__file__).resolve().parent.parent
OUTPUT_DIR = BASE_DIR / "output" / "full_run"
EXPECTED_ADOLESCENTS = 1507
EXPECTED_ADULTS = 1493
def verify_domain_files():
"""Verify all 5 domain files for both age groups"""
results = {}
domain_files = {
'adolescense': {
'Personality': 'Personality_14-17.xlsx',
'Grit': 'Grit_14-17.xlsx',
'Emotional Intelligence': 'Emotional_Intelligence_14-17.xlsx',
'Vocational Interest': 'Vocational_Interest_14-17.xlsx',
'Learning Strategies': 'Learning_Strategies_14-17.xlsx'
},
'adults': {
'Personality': 'Personality_18-23.xlsx',
'Grit': 'Grit_18-23.xlsx',
'Emotional Intelligence': 'Emotional_Intelligence_18-23.xlsx',
'Vocational Interest': 'Vocational_Interest_18-23.xlsx',
'Learning Strategies': 'Learning_Strategies_18-23.xlsx'
}
}
all_passed = True
for age_group, domains in domain_files.items():
expected_count = EXPECTED_ADOLESCENTS if age_group == 'adolescense' else EXPECTED_ADULTS
age_results = {}
for domain, file_name in domains.items():
file_path = OUTPUT_DIR / age_group / "5_domain" / file_name
if not file_path.exists():
age_results[domain] = {'status': 'MISSING', 'rows': 0}
all_passed = False
continue
try:
df = pd.read_excel(file_path, engine='openpyxl')
row_count = len(df)
col_count = len(df.columns)
# Check ID column
id_col = 'Student CPID' if 'Student CPID' in df.columns else 'Participant'
if id_col not in df.columns:
age_results[domain] = {'status': 'NO_ID_COLUMN', 'rows': row_count}
all_passed = False
continue
# Check for unique IDs
unique_ids = df[id_col].dropna().nunique()
# Calculate data density
total_cells = row_count * col_count
null_cells = df.isnull().sum().sum()
density = ((total_cells - null_cells) / total_cells) * 100 if total_cells > 0 else 0
# Verify row count
if row_count == expected_count and unique_ids == expected_count:
age_results[domain] = {
'status': 'PASS',
'rows': row_count,
'cols': col_count,
'unique_ids': unique_ids,
'density': round(density, 2)
}
else:
age_results[domain] = {
'status': 'ROW_MISMATCH',
'rows': row_count,
'expected': expected_count,
'unique_ids': unique_ids
}
all_passed = False
except Exception as e:
age_results[domain] = {'status': 'ERROR', 'error': str(e)}
all_passed = False
results[age_group] = age_results
return results, all_passed
def main():
print("=" * 80)
print("FINAL 100% VERIFICATION REPORT")
print("=" * 80)
print()
results, all_passed = verify_domain_files()
# Print detailed results
for age_group, domains in results.items():
age_label = "ADOLESCENTS (14-17)" if age_group == 'adolescense' else "ADULTS (18-23)"
expected = EXPECTED_ADOLESCENTS if age_group == 'adolescense' else EXPECTED_ADULTS
print(f"{age_label} - Expected: {expected} students")
print("-" * 80)
for domain, result in domains.items():
if result['status'] == 'PASS':
print(f" {domain:30} PASS - {result['rows']} rows, {result['cols']} cols, {result['density']}% density")
else:
print(f" {domain:30} {result['status']} - {result}")
print()
print("=" * 80)
if all_passed:
print("VERIFICATION RESULT: 100% PASS - ALL DOMAINS COMPLETE")
else:
print("VERIFICATION RESULT: FAILED - REVIEW REQUIRED")
print("=" * 80)
return all_passed
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)