""" Verify that omitted question values were replaced with "--" """ import pandas as pd from pathlib import Path import sys import io if sys.platform == 'win32': sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') BASE_DIR = Path(__file__).resolve().parent.parent OUTPUT_DIR = BASE_DIR / "output" / "full_run" MAPPING_FILE = BASE_DIR / "data" / "AllQuestions.xlsx" def verify_replacement(): """Verify omitted values were replaced correctly""" print("=" * 80) print("✅ VERIFICATION: Omitted Values Replacement") print("=" * 80) print() # Load omitted codes map_df = pd.read_excel(MAPPING_FILE, engine='openpyxl') omitted_codes = set(map_df[map_df['Type'].str.lower() == 'omission']['code'].astype(str).str.strip().tolist()) print(f"📊 Total omitted question codes: {len(omitted_codes)}") print() # Test a sample file test_file = OUTPUT_DIR / "adolescense" / "5_domain" / "Personality_14-17.xlsx" if not test_file.exists(): print(f"❌ Test file not found: {test_file}") return False df = pd.read_excel(test_file, engine='openpyxl') # Find omitted columns in this file omitted_cols_in_file = [] for col in df.columns: if str(col).strip() in omitted_codes: omitted_cols_in_file.append(col) print(f"📋 Testing file: {test_file.name}") print(f" Found {len(omitted_cols_in_file)} omitted question columns") print() # Verify replacement all_correct = True sample_checked = 0 for col in omitted_cols_in_file[:10]: # Check first 10 unique_vals = df[col].unique() non_dash_vals = [v for v in unique_vals if str(v) != '--' and pd.notna(v)] if non_dash_vals: print(f" ❌ {col}: Found non-'--' values: {non_dash_vals[:3]}") all_correct = False else: sample_checked += 1 if sample_checked <= 3: print(f" ✅ {col}: All values are '--' (verified)") if sample_checked > 3: print(f" ✅ ... and {sample_checked - 3} more columns verified") print() # Check a few random rows print("📊 Sample Row Check (first 3 omitted columns):") for col in omitted_cols_in_file[:3]: sample_values = df[col].head(5).tolist() all_dash = all(str(v) == '--' for v in sample_values) status = "✅" if all_dash else "❌" print(f" {status} {col}: {sample_values}") print() print("=" * 80) if all_correct: print("✅ VERIFICATION PASSED: All omitted values replaced with '--'") else: print("❌ VERIFICATION FAILED: Some values not replaced") print("=" * 80) return all_correct if __name__ == "__main__": success = verify_replacement() sys.exit(0 if success else 1)