CP_Assessment_engine/scripts/audit_tool.py
2026-02-10 12:59:40 +05:30

81 lines
2.6 KiB
Python

import pandas as pd
from pathlib import Path
import sys
import io
# Force UTF-8 for output
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
# Add root to sys.path
root = Path(__file__).resolve().parent.parent
sys.path.append(str(root))
import config
def audit_missing_only():
base_dir = Path(r'C:\work\CP_Automation\Simulated_Assessment_Engine\output\dry_run')
expected_domains = [
'Learning_Strategies_{age}.xlsx',
'Personality_{age}.xlsx',
'Emotional_Intelligence_{age}.xlsx',
'Vocational_Interest_{age}.xlsx',
'Grit_{age}.xlsx'
]
cognition_tests = config.COGNITION_TESTS
issues = []
for age_label, age_suffix in [('adolescense', '14-17'), ('adults', '18-23')]:
# Survey
domain_dir = base_dir / age_label / "5_domain"
for d_tmpl in expected_domains:
f_name = d_tmpl.format(age=age_suffix)
f_path = domain_dir / f_name
check_issue(f_path, age_label, "Survey", f_name, issues)
# Cognition
cog_dir = base_dir / age_label / "cognition"
for c_test in cognition_tests:
f_name = config.COGNITION_FILE_NAMES.get(c_test, f'{c_test}_{age_suffix}.xlsx').replace('{age}', age_suffix)
f_path = cog_dir / f_name
check_issue(f_path, age_label, "Cognition", c_test, issues)
if not issues:
print("🎉 NO ISSUES FOUND! 100% PERFECT.")
else:
print(f"❌ FOUND {len(issues)} ISSUES:")
for iss in issues:
print(f" - {iss}")
def check_issue(path, age, category, name, issues):
if not path.exists():
issues.append(f"{age} | {category} | {name}: MISSING")
return
try:
df = pd.read_excel(path)
if df.shape[0] == 0:
issues.append(f"{age} | {category} | {name}: EMPTY ROWS")
return
# For Survey, check first row (one student)
if category == "Survey":
student_row = df.iloc[0]
# Q-codes start after 'Participant'
q_cols = [c for c in df.columns if c != 'Participant']
missing = student_row[q_cols].isna().sum()
if missing > 0:
issues.append(f"{age} | {category} | {name}: {missing}/{len(q_cols)} answers missing")
# For Cognition, check first row
else:
student_row = df.iloc[0]
if student_row.isna().sum() > 0:
issues.append(f"{age} | {category} | {name}: contains NaNs")
except Exception as e:
issues.append(f"{age} | {category} | {name}: ERROR {e}")
if __name__ == "__main__":
audit_missing_only()