81 lines
2.6 KiB
Python
81 lines
2.6 KiB
Python
import pandas as pd
|
|
from pathlib import Path
|
|
import sys
|
|
import io
|
|
|
|
# Force UTF-8 for output
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
|
|
|
# Add root to sys.path
|
|
root = Path(__file__).resolve().parent.parent
|
|
sys.path.append(str(root))
|
|
|
|
import config
|
|
|
|
def audit_missing_only():
|
|
base_dir = Path(r'C:\work\CP_Automation\Simulated_Assessment_Engine\output\dry_run')
|
|
expected_domains = [
|
|
'Learning_Strategies_{age}.xlsx',
|
|
'Personality_{age}.xlsx',
|
|
'Emotional_Intelligence_{age}.xlsx',
|
|
'Vocational_Interest_{age}.xlsx',
|
|
'Grit_{age}.xlsx'
|
|
]
|
|
cognition_tests = config.COGNITION_TESTS
|
|
|
|
issues = []
|
|
|
|
for age_label, age_suffix in [('adolescense', '14-17'), ('adults', '18-23')]:
|
|
# Survey
|
|
domain_dir = base_dir / age_label / "5_domain"
|
|
for d_tmpl in expected_domains:
|
|
f_name = d_tmpl.format(age=age_suffix)
|
|
f_path = domain_dir / f_name
|
|
check_issue(f_path, age_label, "Survey", f_name, issues)
|
|
|
|
# Cognition
|
|
cog_dir = base_dir / age_label / "cognition"
|
|
for c_test in cognition_tests:
|
|
f_name = config.COGNITION_FILE_NAMES.get(c_test, f'{c_test}_{age_suffix}.xlsx').replace('{age}', age_suffix)
|
|
f_path = cog_dir / f_name
|
|
check_issue(f_path, age_label, "Cognition", c_test, issues)
|
|
|
|
if not issues:
|
|
print("🎉 NO ISSUES FOUND! 100% PERFECT.")
|
|
else:
|
|
print(f"❌ FOUND {len(issues)} ISSUES:")
|
|
for iss in issues:
|
|
print(f" - {iss}")
|
|
|
|
def check_issue(path, age, category, name, issues):
|
|
if not path.exists():
|
|
issues.append(f"{age} | {category} | {name}: MISSING")
|
|
return
|
|
|
|
try:
|
|
df = pd.read_excel(path)
|
|
if df.shape[0] == 0:
|
|
issues.append(f"{age} | {category} | {name}: EMPTY ROWS")
|
|
return
|
|
|
|
# For Survey, check first row (one student)
|
|
if category == "Survey":
|
|
student_row = df.iloc[0]
|
|
# Q-codes start after 'Participant'
|
|
q_cols = [c for c in df.columns if c != 'Participant']
|
|
missing = student_row[q_cols].isna().sum()
|
|
if missing > 0:
|
|
issues.append(f"{age} | {category} | {name}: {missing}/{len(q_cols)} answers missing")
|
|
|
|
# For Cognition, check first row
|
|
else:
|
|
student_row = df.iloc[0]
|
|
if student_row.isna().sum() > 0:
|
|
issues.append(f"{age} | {category} | {name}: contains NaNs")
|
|
|
|
except Exception as e:
|
|
issues.append(f"{age} | {category} | {name}: ERROR {e}")
|
|
|
|
if __name__ == "__main__":
|
|
audit_missing_only()
|