""" Final comprehensive check of ALL client deliverables Perfectionist-level review before client/BOD delivery """ import pandas as pd from pathlib import Path import sys import io if sys.platform == 'win32': sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') BASE_DIR = Path(__file__).resolve().parent.parent def check_all_deliverables(): """Comprehensive check of all files to be delivered to client""" print("=" * 80) print("🔍 FINAL CLIENT DELIVERABLE QUALITY CHECK") print("Perfectionist-Level Review - Zero Tolerance for Issues") print("=" * 80) print() issues_found = [] warnings = [] # 1. Check merged_personas.xlsx print("1️⃣ CHECKING: merged_personas.xlsx") print("-" * 80) personas_file = BASE_DIR / "data" / "merged_personas.xlsx" if personas_file.exists(): df_personas = pd.read_excel(personas_file, engine='openpyxl') # Check row count if len(df_personas) != 3000: issues_found.append(f"merged_personas.xlsx: Expected 3000 rows, got {len(df_personas)}") # Check for redundant DB columns db_columns = [c for c in df_personas.columns if '_DB' in str(c)] if db_columns: issues_found.append(f"merged_personas.xlsx: Found redundant DB columns: {db_columns}") # Check for duplicate columns if df_personas.columns.duplicated().any(): issues_found.append(f"merged_personas.xlsx: Duplicate column names found") # Check StudentCPID uniqueness if 'StudentCPID' in df_personas.columns: if df_personas['StudentCPID'].duplicated().any(): issues_found.append(f"merged_personas.xlsx: Duplicate StudentCPIDs found") if df_personas['StudentCPID'].isna().any(): issues_found.append(f"merged_personas.xlsx: Missing StudentCPIDs found") # Check for suspicious uniform columns for col in df_personas.columns: if col in ['Nationality', 'Native State']: if df_personas[col].nunique() == 1: warnings.append(f"merged_personas.xlsx: '{col}' has only 1 unique value (all students same)") print(f" ✅ Basic structure: {len(df_personas)} rows, {len(df_personas.columns)} columns") if db_columns: print(f" ⚠️ Redundant columns found: {len(db_columns)}") else: print(f" ✅ No redundant DB columns") else: issues_found.append("merged_personas.xlsx: FILE NOT FOUND") print() # 2. Check AllQuestions.xlsx print("2️⃣ CHECKING: AllQuestions.xlsx") print("-" * 80) questions_file = BASE_DIR / "data" / "AllQuestions.xlsx" if questions_file.exists(): df_questions = pd.read_excel(questions_file, engine='openpyxl') # Check for duplicate question codes if 'code' in df_questions.columns: if df_questions['code'].duplicated().any(): issues_found.append("AllQuestions.xlsx: Duplicate question codes found") # Check required columns required = ['code', 'domain', 'age-group', 'question'] missing = [c for c in required if c not in df_questions.columns] if missing: issues_found.append(f"AllQuestions.xlsx: Missing required columns: {missing}") print(f" ✅ Structure: {len(df_questions)} questions, {len(df_questions.columns)} columns") print(f" ✅ All question codes unique") else: issues_found.append("AllQuestions.xlsx: FILE NOT FOUND") print() # 3. Check output files structure print("3️⃣ CHECKING: Output Files Structure") print("-" * 80) output_dir = BASE_DIR / "output" / "full_run" expected_files = { 'adolescense/5_domain': [ 'Personality_14-17.xlsx', 'Grit_14-17.xlsx', 'Emotional_Intelligence_14-17.xlsx', 'Vocational_Interest_14-17.xlsx', 'Learning_Strategies_14-17.xlsx' ], 'adults/5_domain': [ 'Personality_18-23.xlsx', 'Grit_18-23.xlsx', 'Emotional_Intelligence_18-23.xlsx', 'Vocational_Interest_18-23.xlsx', 'Learning_Strategies_18-23.xlsx' ] } missing_files = [] for age_dir, files in expected_files.items(): for file_name in files: file_path = output_dir / age_dir / file_name if not file_path.exists(): missing_files.append(f"{age_dir}/{file_name}") if missing_files: issues_found.append(f"Output files missing: {missing_files}") else: print(f" ✅ All 10 domain files present") # Check cognition files cog_files_adol = list((output_dir / "adolescense" / "cognition").glob("*.xlsx")) cog_files_adult = list((output_dir / "adults" / "cognition").glob("*.xlsx")) if len(cog_files_adol) != 12: warnings.append(f"Cognition files: Expected 12 for adolescents, found {len(cog_files_adol)}") if len(cog_files_adult) != 12: warnings.append(f"Cognition files: Expected 12 for adults, found {len(cog_files_adult)}") print(f" ✅ Domain files: 10/10") print(f" ✅ Cognition files: {len(cog_files_adol) + len(cog_files_adult)}/24") print() # Final summary print("=" * 80) print("📊 FINAL ASSESSMENT") print("=" * 80) if issues_found: print(f"❌ CRITICAL ISSUES FOUND: {len(issues_found)}") for issue in issues_found: print(f" - {issue}") print() if warnings: print(f"⚠️ WARNINGS: {len(warnings)}") for warning in warnings: print(f" - {warning}") print() if not issues_found and not warnings: print("✅ ALL CHECKS PASSED - FILES READY FOR CLIENT DELIVERY") elif not issues_found: print("⚠️ WARNINGS ONLY - Review recommended but not blocking") else: print("❌ CRITICAL ISSUES - MUST FIX BEFORE CLIENT DELIVERY") print("=" * 80) return len(issues_found) == 0 if __name__ == "__main__": success = check_all_deliverables() sys.exit(0 if success else 1)