import pandas as pd df1 = pd.read_csv('latest_malware_bytes_predictions_KNeighbours.csv') df2 = pd.read_csv('latest_malware_bytes_predictions_RandomForest.csv') df3 = pd.read_csv('latest_malware_bytes_predictions_SGD.csv') df4 = pd.read_csv('latest_malware_bytes_predictions_XGB.csv') # Step 2: Create a new DataFrame to hold combined results combined_data1 = pd.DataFrame() # Step 3: Combine predictions combined_data1['File'] = df1['File'] # Assuming all files are the same combined_data1['Predicted Class'] = df1['Predicted Class'] # Placeholder combined_data1['Prediction Probability'] = 0.0 # Initialize probability column # Step 4: Loop through each row and calculate the highest probability and average for i in range(len(df1)): # Get probabilities from all models probs = [ df1['Prediction Probability'][i], df2['Prediction Probability'][i], df3['Prediction Probability'][i], df4['Prediction Probability'][i], ] # Get predicted classes classes = [ df1['Predicted Class'][i], df2['Predicted Class'][i], df3['Predicted Class'][i], df4['Predicted Class'][i], ] # Find the index of the highest probability max_index = probs.index(max(probs)) # Set the highest predicted class combined_data1.at[i, 'Predicted Class'] = classes[max_index] # Calculate the average probability combined_data1.at[i, 'Prediction Probability'] = sum(probs) / len(probs) print(combined_data1) df5 = pd.read_csv('latest_malware_ASM_predictions_KNeighbours.csv') df6 = pd.read_csv('latest_malware_ASM_predictions_LogisticRegression.csv') df7 = pd.read_csv('latest_malware_ASM_predictions_RandomForest.csv') df8 = pd.read_csv('latest_malware_ASM_predictions_XGB.csv') combined_data2 = pd.DataFrame() # Step 3: Combine predictions combined_data2['File'] = df5['File'] # Assuming all files are the same combined_data2['Predicted Class'] = df5['Predicted Class'] # Placeholder combined_data2['Prediction Probability'] = 0.0 # Initialize probability column # Step 4: Loop through each row and calculate the highest probability and average for i in range(len(df5)): # Get probabilities from all models probs = [ df5['Prediction Probability'][i], df6['Prediction Probability'][i], df7['Prediction Probability'][i], df8['Prediction Probability'][i], ] # Get predicted classes classes = [ df5['Predicted Class'][i], df6['Predicted Class'][i], df7['Predicted Class'][i], df8['Predicted Class'][i], ] # Find the index of the highest probability max_index = probs.index(max(probs)) # Set the highest predicted class combined_data2.at[i, 'Predicted Class'] = classes[max_index] # Calculate the average probability combined_data2.at[i, 'Prediction Probability'] = sum(probs) / len(probs) print(combined_data2) combined_data = pd.concat([combined_data1, combined_data2], ignore_index=True)