web_updated_code/X_sys_demo_New/test.py
2024-10-28 13:57:19 +05:30

91 lines
3.0 KiB
Python

import pandas as pd
df1 = pd.read_csv('latest_malware_bytes_predictions_KNeighbours.csv')
df2 = pd.read_csv('latest_malware_bytes_predictions_RandomForest.csv')
df3 = pd.read_csv('latest_malware_bytes_predictions_SGD.csv')
df4 = pd.read_csv('latest_malware_bytes_predictions_XGB.csv')
# Step 2: Create a new DataFrame to hold combined results
combined_data1 = pd.DataFrame()
# Step 3: Combine predictions
combined_data1['File'] = df1['File'] # Assuming all files are the same
combined_data1['Predicted Class'] = df1['Predicted Class'] # Placeholder
combined_data1['Prediction Probability'] = 0.0 # Initialize probability column
# Step 4: Loop through each row and calculate the highest probability and average
for i in range(len(df1)):
# Get probabilities from all models
probs = [
df1['Prediction Probability'][i],
df2['Prediction Probability'][i],
df3['Prediction Probability'][i],
df4['Prediction Probability'][i],
]
# Get predicted classes
classes = [
df1['Predicted Class'][i],
df2['Predicted Class'][i],
df3['Predicted Class'][i],
df4['Predicted Class'][i],
]
# Find the index of the highest probability
max_index = probs.index(max(probs))
# Set the highest predicted class
combined_data1.at[i, 'Predicted Class'] = classes[max_index]
# Calculate the average probability
combined_data1.at[i, 'Prediction Probability'] = sum(probs) / len(probs)
print(combined_data1)
df5 = pd.read_csv('latest_malware_ASM_predictions_KNeighbours.csv')
df6 = pd.read_csv('latest_malware_ASM_predictions_LogisticRegression.csv')
df7 = pd.read_csv('latest_malware_ASM_predictions_RandomForest.csv')
df8 = pd.read_csv('latest_malware_ASM_predictions_XGB.csv')
combined_data2 = pd.DataFrame()
# Step 3: Combine predictions
combined_data2['File'] = df5['File'] # Assuming all files are the same
combined_data2['Predicted Class'] = df5['Predicted Class'] # Placeholder
combined_data2['Prediction Probability'] = 0.0 # Initialize probability column
# Step 4: Loop through each row and calculate the highest probability and average
for i in range(len(df5)):
# Get probabilities from all models
probs = [
df5['Prediction Probability'][i],
df6['Prediction Probability'][i],
df7['Prediction Probability'][i],
df8['Prediction Probability'][i],
]
# Get predicted classes
classes = [
df5['Predicted Class'][i],
df6['Predicted Class'][i],
df7['Predicted Class'][i],
df8['Predicted Class'][i],
]
# Find the index of the highest probability
max_index = probs.index(max(probs))
# Set the highest predicted class
combined_data2.at[i, 'Predicted Class'] = classes[max_index]
# Calculate the average probability
combined_data2.at[i, 'Prediction Probability'] = sum(probs) / len(probs)
print(combined_data2)
combined_data = pd.concat([combined_data1, combined_data2], ignore_index=True)