93 lines
2.8 KiB
Python
93 lines
2.8 KiB
Python
"""
|
|
Verify that omitted question values were replaced with "--"
|
|
"""
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
import sys
|
|
import io
|
|
|
|
if sys.platform == 'win32':
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
|
|
|
BASE_DIR = Path(__file__).resolve().parent.parent
|
|
OUTPUT_DIR = BASE_DIR / "output" / "full_run"
|
|
MAPPING_FILE = BASE_DIR / "data" / "AllQuestions.xlsx"
|
|
|
|
def verify_replacement():
|
|
"""Verify omitted values were replaced correctly"""
|
|
print("=" * 80)
|
|
print("✅ VERIFICATION: Omitted Values Replacement")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
# Load omitted codes
|
|
map_df = pd.read_excel(MAPPING_FILE, engine='openpyxl')
|
|
omitted_codes = set(map_df[map_df['Type'].str.lower() == 'omission']['code'].astype(str).str.strip().tolist())
|
|
|
|
print(f"📊 Total omitted question codes: {len(omitted_codes)}")
|
|
print()
|
|
|
|
# Test a sample file
|
|
test_file = OUTPUT_DIR / "adolescense" / "5_domain" / "Personality_14-17.xlsx"
|
|
|
|
if not test_file.exists():
|
|
print(f"❌ Test file not found: {test_file}")
|
|
return False
|
|
|
|
df = pd.read_excel(test_file, engine='openpyxl')
|
|
|
|
# Find omitted columns in this file
|
|
omitted_cols_in_file = []
|
|
for col in df.columns:
|
|
if str(col).strip() in omitted_codes:
|
|
omitted_cols_in_file.append(col)
|
|
|
|
print(f"📋 Testing file: {test_file.name}")
|
|
print(f" Found {len(omitted_cols_in_file)} omitted question columns")
|
|
print()
|
|
|
|
# Verify replacement
|
|
all_correct = True
|
|
sample_checked = 0
|
|
|
|
for col in omitted_cols_in_file[:10]: # Check first 10
|
|
unique_vals = df[col].unique()
|
|
non_dash_vals = [v for v in unique_vals if str(v) != '--' and pd.notna(v)]
|
|
|
|
if non_dash_vals:
|
|
print(f" ❌ {col}: Found non-'--' values: {non_dash_vals[:3]}")
|
|
all_correct = False
|
|
else:
|
|
sample_checked += 1
|
|
if sample_checked <= 3:
|
|
print(f" ✅ {col}: All values are '--' (verified)")
|
|
|
|
if sample_checked > 3:
|
|
print(f" ✅ ... and {sample_checked - 3} more columns verified")
|
|
|
|
print()
|
|
|
|
# Check a few random rows
|
|
print("📊 Sample Row Check (first 3 omitted columns):")
|
|
for col in omitted_cols_in_file[:3]:
|
|
sample_values = df[col].head(5).tolist()
|
|
all_dash = all(str(v) == '--' for v in sample_values)
|
|
status = "✅" if all_dash else "❌"
|
|
print(f" {status} {col}: {sample_values}")
|
|
|
|
print()
|
|
print("=" * 80)
|
|
|
|
if all_correct:
|
|
print("✅ VERIFICATION PASSED: All omitted values replaced with '--'")
|
|
else:
|
|
print("❌ VERIFICATION FAILED: Some values not replaced")
|
|
|
|
print("=" * 80)
|
|
|
|
return all_correct
|
|
|
|
if __name__ == "__main__":
|
|
success = verify_replacement()
|
|
sys.exit(0 if success else 1)
|