91 lines
3.0 KiB
Python
91 lines
3.0 KiB
Python
import pandas as pd
|
|
|
|
df1 = pd.read_csv('latest_malware_bytes_predictions_KNeighbours.csv')
|
|
df2 = pd.read_csv('latest_malware_bytes_predictions_RandomForest.csv')
|
|
df3 = pd.read_csv('latest_malware_bytes_predictions_SGD.csv')
|
|
df4 = pd.read_csv('latest_malware_bytes_predictions_XGB.csv')
|
|
|
|
# Step 2: Create a new DataFrame to hold combined results
|
|
combined_data1 = pd.DataFrame()
|
|
|
|
# Step 3: Combine predictions
|
|
combined_data1['File'] = df1['File'] # Assuming all files are the same
|
|
combined_data1['Predicted Class'] = df1['Predicted Class'] # Placeholder
|
|
combined_data1['Prediction Probability'] = 0.0 # Initialize probability column
|
|
|
|
# Step 4: Loop through each row and calculate the highest probability and average
|
|
for i in range(len(df1)):
|
|
# Get probabilities from all models
|
|
probs = [
|
|
df1['Prediction Probability'][i],
|
|
df2['Prediction Probability'][i],
|
|
df3['Prediction Probability'][i],
|
|
df4['Prediction Probability'][i],
|
|
]
|
|
|
|
# Get predicted classes
|
|
classes = [
|
|
df1['Predicted Class'][i],
|
|
df2['Predicted Class'][i],
|
|
df3['Predicted Class'][i],
|
|
df4['Predicted Class'][i],
|
|
]
|
|
|
|
# Find the index of the highest probability
|
|
max_index = probs.index(max(probs))
|
|
|
|
# Set the highest predicted class
|
|
combined_data1.at[i, 'Predicted Class'] = classes[max_index]
|
|
|
|
# Calculate the average probability
|
|
combined_data1.at[i, 'Prediction Probability'] = sum(probs) / len(probs)
|
|
|
|
print(combined_data1)
|
|
|
|
df5 = pd.read_csv('latest_malware_ASM_predictions_KNeighbours.csv')
|
|
df6 = pd.read_csv('latest_malware_ASM_predictions_LogisticRegression.csv')
|
|
df7 = pd.read_csv('latest_malware_ASM_predictions_RandomForest.csv')
|
|
df8 = pd.read_csv('latest_malware_ASM_predictions_XGB.csv')
|
|
|
|
combined_data2 = pd.DataFrame()
|
|
|
|
# Step 3: Combine predictions
|
|
combined_data2['File'] = df5['File'] # Assuming all files are the same
|
|
combined_data2['Predicted Class'] = df5['Predicted Class'] # Placeholder
|
|
combined_data2['Prediction Probability'] = 0.0 # Initialize probability column
|
|
|
|
# Step 4: Loop through each row and calculate the highest probability and average
|
|
for i in range(len(df5)):
|
|
# Get probabilities from all models
|
|
probs = [
|
|
df5['Prediction Probability'][i],
|
|
df6['Prediction Probability'][i],
|
|
df7['Prediction Probability'][i],
|
|
df8['Prediction Probability'][i],
|
|
]
|
|
|
|
# Get predicted classes
|
|
classes = [
|
|
df5['Predicted Class'][i],
|
|
df6['Predicted Class'][i],
|
|
df7['Predicted Class'][i],
|
|
df8['Predicted Class'][i],
|
|
]
|
|
|
|
# Find the index of the highest probability
|
|
max_index = probs.index(max(probs))
|
|
|
|
# Set the highest predicted class
|
|
combined_data2.at[i, 'Predicted Class'] = classes[max_index]
|
|
|
|
# Calculate the average probability
|
|
combined_data2.at[i, 'Prediction Probability'] = sum(probs) / len(probs)
|
|
|
|
print(combined_data2)
|
|
|
|
combined_data = pd.concat([combined_data1, combined_data2], ignore_index=True)
|
|
|
|
|
|
|
|
|