CP_Assessment_engine/scripts/final_client_deliverable_check.py
2026-02-10 12:59:40 +05:30

176 lines
6.2 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Final comprehensive check of ALL client deliverables
Perfectionist-level review before client/BOD delivery
"""
import pandas as pd
from pathlib import Path
import sys
import io
if sys.platform == 'win32':
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
BASE_DIR = Path(__file__).resolve().parent.parent
def check_all_deliverables():
"""Comprehensive check of all files to be delivered to client"""
print("=" * 80)
print("🔍 FINAL CLIENT DELIVERABLE QUALITY CHECK")
print("Perfectionist-Level Review - Zero Tolerance for Issues")
print("=" * 80)
print()
issues_found = []
warnings = []
# 1. Check merged_personas.xlsx
print("1⃣ CHECKING: merged_personas.xlsx")
print("-" * 80)
personas_file = BASE_DIR / "data" / "merged_personas.xlsx"
if personas_file.exists():
df_personas = pd.read_excel(personas_file, engine='openpyxl')
# Check row count
if len(df_personas) != 3000:
issues_found.append(f"merged_personas.xlsx: Expected 3000 rows, got {len(df_personas)}")
# Check for redundant DB columns
db_columns = [c for c in df_personas.columns if '_DB' in str(c)]
if db_columns:
issues_found.append(f"merged_personas.xlsx: Found redundant DB columns: {db_columns}")
# Check for duplicate columns
if df_personas.columns.duplicated().any():
issues_found.append(f"merged_personas.xlsx: Duplicate column names found")
# Check StudentCPID uniqueness
if 'StudentCPID' in df_personas.columns:
if df_personas['StudentCPID'].duplicated().any():
issues_found.append(f"merged_personas.xlsx: Duplicate StudentCPIDs found")
if df_personas['StudentCPID'].isna().any():
issues_found.append(f"merged_personas.xlsx: Missing StudentCPIDs found")
# Check for suspicious uniform columns
for col in df_personas.columns:
if col in ['Nationality', 'Native State']:
if df_personas[col].nunique() == 1:
warnings.append(f"merged_personas.xlsx: '{col}' has only 1 unique value (all students same)")
print(f" ✅ Basic structure: {len(df_personas)} rows, {len(df_personas.columns)} columns")
if db_columns:
print(f" ⚠️ Redundant columns found: {len(db_columns)}")
else:
print(f" ✅ No redundant DB columns")
else:
issues_found.append("merged_personas.xlsx: FILE NOT FOUND")
print()
# 2. Check AllQuestions.xlsx
print("2⃣ CHECKING: AllQuestions.xlsx")
print("-" * 80)
questions_file = BASE_DIR / "data" / "AllQuestions.xlsx"
if questions_file.exists():
df_questions = pd.read_excel(questions_file, engine='openpyxl')
# Check for duplicate question codes
if 'code' in df_questions.columns:
if df_questions['code'].duplicated().any():
issues_found.append("AllQuestions.xlsx: Duplicate question codes found")
# Check required columns
required = ['code', 'domain', 'age-group', 'question']
missing = [c for c in required if c not in df_questions.columns]
if missing:
issues_found.append(f"AllQuestions.xlsx: Missing required columns: {missing}")
print(f" ✅ Structure: {len(df_questions)} questions, {len(df_questions.columns)} columns")
print(f" ✅ All question codes unique")
else:
issues_found.append("AllQuestions.xlsx: FILE NOT FOUND")
print()
# 3. Check output files structure
print("3⃣ CHECKING: Output Files Structure")
print("-" * 80)
output_dir = BASE_DIR / "output" / "full_run"
expected_files = {
'adolescense/5_domain': [
'Personality_14-17.xlsx',
'Grit_14-17.xlsx',
'Emotional_Intelligence_14-17.xlsx',
'Vocational_Interest_14-17.xlsx',
'Learning_Strategies_14-17.xlsx'
],
'adults/5_domain': [
'Personality_18-23.xlsx',
'Grit_18-23.xlsx',
'Emotional_Intelligence_18-23.xlsx',
'Vocational_Interest_18-23.xlsx',
'Learning_Strategies_18-23.xlsx'
]
}
missing_files = []
for age_dir, files in expected_files.items():
for file_name in files:
file_path = output_dir / age_dir / file_name
if not file_path.exists():
missing_files.append(f"{age_dir}/{file_name}")
if missing_files:
issues_found.append(f"Output files missing: {missing_files}")
else:
print(f" ✅ All 10 domain files present")
# Check cognition files
cog_files_adol = list((output_dir / "adolescense" / "cognition").glob("*.xlsx"))
cog_files_adult = list((output_dir / "adults" / "cognition").glob("*.xlsx"))
if len(cog_files_adol) != 12:
warnings.append(f"Cognition files: Expected 12 for adolescents, found {len(cog_files_adol)}")
if len(cog_files_adult) != 12:
warnings.append(f"Cognition files: Expected 12 for adults, found {len(cog_files_adult)}")
print(f" ✅ Domain files: 10/10")
print(f" ✅ Cognition files: {len(cog_files_adol) + len(cog_files_adult)}/24")
print()
# Final summary
print("=" * 80)
print("📊 FINAL ASSESSMENT")
print("=" * 80)
if issues_found:
print(f"❌ CRITICAL ISSUES FOUND: {len(issues_found)}")
for issue in issues_found:
print(f" - {issue}")
print()
if warnings:
print(f"⚠️ WARNINGS: {len(warnings)}")
for warning in warnings:
print(f" - {warning}")
print()
if not issues_found and not warnings:
print("✅ ALL CHECKS PASSED - FILES READY FOR CLIENT DELIVERY")
elif not issues_found:
print("⚠️ WARNINGS ONLY - Review recommended but not blocking")
else:
print("❌ CRITICAL ISSUES - MUST FIX BEFORE CLIENT DELIVERY")
print("=" * 80)
return len(issues_found) == 0
if __name__ == "__main__":
success = check_all_deliverables()
sys.exit(0 if success else 1)