"""Check the difference between old and new resume logic""" import pandas as pd df = pd.read_excel('output/full_run/adolescense/5_domain/Emotional_Intelligence_14-17.xlsx', engine='openpyxl') cpid_col = 'Student CPID' # OLD logic (what current running process used) old_logic = set(df[cpid_col].astype(str).tolist()) # NEW logic (what fixed code will use) new_logic = set() for cpid in df[cpid_col].dropna().astype(str): cpid_str = str(cpid).strip() if cpid_str and cpid_str.lower() != 'nan' and cpid_str != '': new_logic.add(cpid_str) print("="*60) print("RESUME LOGIC COMPARISON") print("="*60) print(f"OLD logic count (includes NaN): {len(old_logic)}") print(f"NEW logic count (valid only): {len(new_logic)}") print(f"Difference: {len(old_logic) - len(new_logic)}") print(f"\n'nan' in old set: {'nan' in old_logic}") print(f"Valid CPIDs in old set: {len([c for c in old_logic if c and c.lower() != 'nan'])}") print(f"\nExpected total: 1507") print(f"Missing with OLD logic: {1507 - len([c for c in old_logic if c and c.lower() != 'nan'])}") print(f"Missing with NEW logic: {1507 - len(new_logic)}") print("="*60)