29 lines
1.1 KiB
Python
29 lines
1.1 KiB
Python
"""Check the difference between old and new resume logic"""
|
|
import pandas as pd
|
|
|
|
df = pd.read_excel('output/full_run/adolescense/5_domain/Emotional_Intelligence_14-17.xlsx', engine='openpyxl')
|
|
cpid_col = 'Student CPID'
|
|
|
|
# OLD logic (what current running process used)
|
|
old_logic = set(df[cpid_col].astype(str).tolist())
|
|
|
|
# NEW logic (what fixed code will use)
|
|
new_logic = set()
|
|
for cpid in df[cpid_col].dropna().astype(str):
|
|
cpid_str = str(cpid).strip()
|
|
if cpid_str and cpid_str.lower() != 'nan' and cpid_str != '':
|
|
new_logic.add(cpid_str)
|
|
|
|
print("="*60)
|
|
print("RESUME LOGIC COMPARISON")
|
|
print("="*60)
|
|
print(f"OLD logic count (includes NaN): {len(old_logic)}")
|
|
print(f"NEW logic count (valid only): {len(new_logic)}")
|
|
print(f"Difference: {len(old_logic) - len(new_logic)}")
|
|
print(f"\n'nan' in old set: {'nan' in old_logic}")
|
|
print(f"Valid CPIDs in old set: {len([c for c in old_logic if c and c.lower() != 'nan'])}")
|
|
print(f"\nExpected total: 1507")
|
|
print(f"Missing with OLD logic: {1507 - len([c for c in old_logic if c and c.lower() != 'nan'])}")
|
|
print(f"Missing with NEW logic: {1507 - len(new_logic)}")
|
|
print("="*60)
|