""" Final 100% Verification Report """ import pandas as pd from pathlib import Path import sys import io if sys.platform == 'win32': sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') BASE_DIR = Path(__file__).resolve().parent.parent OUTPUT_DIR = BASE_DIR / "output" / "full_run" EXPECTED_ADOLESCENTS = 1507 EXPECTED_ADULTS = 1493 def verify_domain_files(): """Verify all 5 domain files for both age groups""" results = {} domain_files = { 'adolescense': { 'Personality': 'Personality_14-17.xlsx', 'Grit': 'Grit_14-17.xlsx', 'Emotional Intelligence': 'Emotional_Intelligence_14-17.xlsx', 'Vocational Interest': 'Vocational_Interest_14-17.xlsx', 'Learning Strategies': 'Learning_Strategies_14-17.xlsx' }, 'adults': { 'Personality': 'Personality_18-23.xlsx', 'Grit': 'Grit_18-23.xlsx', 'Emotional Intelligence': 'Emotional_Intelligence_18-23.xlsx', 'Vocational Interest': 'Vocational_Interest_18-23.xlsx', 'Learning Strategies': 'Learning_Strategies_18-23.xlsx' } } all_passed = True for age_group, domains in domain_files.items(): expected_count = EXPECTED_ADOLESCENTS if age_group == 'adolescense' else EXPECTED_ADULTS age_results = {} for domain, file_name in domains.items(): file_path = OUTPUT_DIR / age_group / "5_domain" / file_name if not file_path.exists(): age_results[domain] = {'status': 'MISSING', 'rows': 0} all_passed = False continue try: df = pd.read_excel(file_path, engine='openpyxl') row_count = len(df) col_count = len(df.columns) # Check ID column id_col = 'Student CPID' if 'Student CPID' in df.columns else 'Participant' if id_col not in df.columns: age_results[domain] = {'status': 'NO_ID_COLUMN', 'rows': row_count} all_passed = False continue # Check for unique IDs unique_ids = df[id_col].dropna().nunique() # Calculate data density total_cells = row_count * col_count null_cells = df.isnull().sum().sum() density = ((total_cells - null_cells) / total_cells) * 100 if total_cells > 0 else 0 # Verify row count if row_count == expected_count and unique_ids == expected_count: age_results[domain] = { 'status': 'PASS', 'rows': row_count, 'cols': col_count, 'unique_ids': unique_ids, 'density': round(density, 2) } else: age_results[domain] = { 'status': 'ROW_MISMATCH', 'rows': row_count, 'expected': expected_count, 'unique_ids': unique_ids } all_passed = False except Exception as e: age_results[domain] = {'status': 'ERROR', 'error': str(e)} all_passed = False results[age_group] = age_results return results, all_passed def main(): print("=" * 80) print("FINAL 100% VERIFICATION REPORT") print("=" * 80) print() results, all_passed = verify_domain_files() # Print detailed results for age_group, domains in results.items(): age_label = "ADOLESCENTS (14-17)" if age_group == 'adolescense' else "ADULTS (18-23)" expected = EXPECTED_ADOLESCENTS if age_group == 'adolescense' else EXPECTED_ADULTS print(f"{age_label} - Expected: {expected} students") print("-" * 80) for domain, result in domains.items(): if result['status'] == 'PASS': print(f" {domain:30} PASS - {result['rows']} rows, {result['cols']} cols, {result['density']}% density") else: print(f" {domain:30} {result['status']} - {result}") print() print("=" * 80) if all_passed: print("VERIFICATION RESULT: 100% PASS - ALL DOMAINS COMPLETE") else: print("VERIFICATION RESULT: FAILED - REVIEW REQUIRED") print("=" * 80) return all_passed if __name__ == "__main__": success = main() sys.exit(0 if success else 1)