176 lines
6.2 KiB
Python
176 lines
6.2 KiB
Python
"""
|
||
Final comprehensive check of ALL client deliverables
|
||
Perfectionist-level review before client/BOD delivery
|
||
"""
|
||
import pandas as pd
|
||
from pathlib import Path
|
||
import sys
|
||
import io
|
||
|
||
if sys.platform == 'win32':
|
||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||
|
||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||
|
||
def check_all_deliverables():
|
||
"""Comprehensive check of all files to be delivered to client"""
|
||
print("=" * 80)
|
||
print("🔍 FINAL CLIENT DELIVERABLE QUALITY CHECK")
|
||
print("Perfectionist-Level Review - Zero Tolerance for Issues")
|
||
print("=" * 80)
|
||
print()
|
||
|
||
issues_found = []
|
||
warnings = []
|
||
|
||
# 1. Check merged_personas.xlsx
|
||
print("1️⃣ CHECKING: merged_personas.xlsx")
|
||
print("-" * 80)
|
||
|
||
personas_file = BASE_DIR / "data" / "merged_personas.xlsx"
|
||
if personas_file.exists():
|
||
df_personas = pd.read_excel(personas_file, engine='openpyxl')
|
||
|
||
# Check row count
|
||
if len(df_personas) != 3000:
|
||
issues_found.append(f"merged_personas.xlsx: Expected 3000 rows, got {len(df_personas)}")
|
||
|
||
# Check for redundant DB columns
|
||
db_columns = [c for c in df_personas.columns if '_DB' in str(c)]
|
||
if db_columns:
|
||
issues_found.append(f"merged_personas.xlsx: Found redundant DB columns: {db_columns}")
|
||
|
||
# Check for duplicate columns
|
||
if df_personas.columns.duplicated().any():
|
||
issues_found.append(f"merged_personas.xlsx: Duplicate column names found")
|
||
|
||
# Check StudentCPID uniqueness
|
||
if 'StudentCPID' in df_personas.columns:
|
||
if df_personas['StudentCPID'].duplicated().any():
|
||
issues_found.append(f"merged_personas.xlsx: Duplicate StudentCPIDs found")
|
||
if df_personas['StudentCPID'].isna().any():
|
||
issues_found.append(f"merged_personas.xlsx: Missing StudentCPIDs found")
|
||
|
||
# Check for suspicious uniform columns
|
||
for col in df_personas.columns:
|
||
if col in ['Nationality', 'Native State']:
|
||
if df_personas[col].nunique() == 1:
|
||
warnings.append(f"merged_personas.xlsx: '{col}' has only 1 unique value (all students same)")
|
||
|
||
print(f" ✅ Basic structure: {len(df_personas)} rows, {len(df_personas.columns)} columns")
|
||
if db_columns:
|
||
print(f" ⚠️ Redundant columns found: {len(db_columns)}")
|
||
else:
|
||
print(f" ✅ No redundant DB columns")
|
||
else:
|
||
issues_found.append("merged_personas.xlsx: FILE NOT FOUND")
|
||
|
||
print()
|
||
|
||
# 2. Check AllQuestions.xlsx
|
||
print("2️⃣ CHECKING: AllQuestions.xlsx")
|
||
print("-" * 80)
|
||
|
||
questions_file = BASE_DIR / "data" / "AllQuestions.xlsx"
|
||
if questions_file.exists():
|
||
df_questions = pd.read_excel(questions_file, engine='openpyxl')
|
||
|
||
# Check for duplicate question codes
|
||
if 'code' in df_questions.columns:
|
||
if df_questions['code'].duplicated().any():
|
||
issues_found.append("AllQuestions.xlsx: Duplicate question codes found")
|
||
|
||
# Check required columns
|
||
required = ['code', 'domain', 'age-group', 'question']
|
||
missing = [c for c in required if c not in df_questions.columns]
|
||
if missing:
|
||
issues_found.append(f"AllQuestions.xlsx: Missing required columns: {missing}")
|
||
|
||
print(f" ✅ Structure: {len(df_questions)} questions, {len(df_questions.columns)} columns")
|
||
print(f" ✅ All question codes unique")
|
||
else:
|
||
issues_found.append("AllQuestions.xlsx: FILE NOT FOUND")
|
||
|
||
print()
|
||
|
||
# 3. Check output files structure
|
||
print("3️⃣ CHECKING: Output Files Structure")
|
||
print("-" * 80)
|
||
|
||
output_dir = BASE_DIR / "output" / "full_run"
|
||
|
||
expected_files = {
|
||
'adolescense/5_domain': [
|
||
'Personality_14-17.xlsx',
|
||
'Grit_14-17.xlsx',
|
||
'Emotional_Intelligence_14-17.xlsx',
|
||
'Vocational_Interest_14-17.xlsx',
|
||
'Learning_Strategies_14-17.xlsx'
|
||
],
|
||
'adults/5_domain': [
|
||
'Personality_18-23.xlsx',
|
||
'Grit_18-23.xlsx',
|
||
'Emotional_Intelligence_18-23.xlsx',
|
||
'Vocational_Interest_18-23.xlsx',
|
||
'Learning_Strategies_18-23.xlsx'
|
||
]
|
||
}
|
||
|
||
missing_files = []
|
||
for age_dir, files in expected_files.items():
|
||
for file_name in files:
|
||
file_path = output_dir / age_dir / file_name
|
||
if not file_path.exists():
|
||
missing_files.append(f"{age_dir}/{file_name}")
|
||
|
||
if missing_files:
|
||
issues_found.append(f"Output files missing: {missing_files}")
|
||
else:
|
||
print(f" ✅ All 10 domain files present")
|
||
|
||
# Check cognition files
|
||
cog_files_adol = list((output_dir / "adolescense" / "cognition").glob("*.xlsx"))
|
||
cog_files_adult = list((output_dir / "adults" / "cognition").glob("*.xlsx"))
|
||
|
||
if len(cog_files_adol) != 12:
|
||
warnings.append(f"Cognition files: Expected 12 for adolescents, found {len(cog_files_adol)}")
|
||
if len(cog_files_adult) != 12:
|
||
warnings.append(f"Cognition files: Expected 12 for adults, found {len(cog_files_adult)}")
|
||
|
||
print(f" ✅ Domain files: 10/10")
|
||
print(f" ✅ Cognition files: {len(cog_files_adol) + len(cog_files_adult)}/24")
|
||
|
||
print()
|
||
|
||
# Final summary
|
||
print("=" * 80)
|
||
print("📊 FINAL ASSESSMENT")
|
||
print("=" * 80)
|
||
|
||
if issues_found:
|
||
print(f"❌ CRITICAL ISSUES FOUND: {len(issues_found)}")
|
||
for issue in issues_found:
|
||
print(f" - {issue}")
|
||
print()
|
||
|
||
if warnings:
|
||
print(f"⚠️ WARNINGS: {len(warnings)}")
|
||
for warning in warnings:
|
||
print(f" - {warning}")
|
||
print()
|
||
|
||
if not issues_found and not warnings:
|
||
print("✅ ALL CHECKS PASSED - FILES READY FOR CLIENT DELIVERY")
|
||
elif not issues_found:
|
||
print("⚠️ WARNINGS ONLY - Review recommended but not blocking")
|
||
else:
|
||
print("❌ CRITICAL ISSUES - MUST FIX BEFORE CLIENT DELIVERY")
|
||
|
||
print("=" * 80)
|
||
|
||
return len(issues_found) == 0
|
||
|
||
if __name__ == "__main__":
|
||
success = check_all_deliverables()
|
||
sys.exit(0 if success else 1)
|