85 lines
2.8 KiB
Python
85 lines
2.8 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
from sklearn.preprocessing import StandardScaler
|
|
import tensorflow as tf
|
|
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
|
|
from datetime import datetime
|
|
|
|
TEST_DATA_PATH = 'combined_log_summary.csv'
|
|
VARIABLE_NAMES_PATH = 'output.txt'
|
|
|
|
# Load the trained model
|
|
model = tf.keras.models.load_model('updated_ransomware_classifier.h5')
|
|
|
|
# Load and prepare test data
|
|
# Read variable names
|
|
with open(VARIABLE_NAMES_PATH, encoding='utf-8') as f:
|
|
columns = [line.split(';')[1].strip() for line in f]
|
|
|
|
# Load test data
|
|
data = pd.read_csv(TEST_DATA_PATH, header=None, names=columns)
|
|
|
|
# Check and clean column names
|
|
data.columns = data.columns.str.strip()
|
|
print("Columns in DataFrame:", data.columns)
|
|
|
|
# Drop features that are all zero and label column
|
|
try:
|
|
# data = data.loc[:, (data != 0).any(axis=0)]
|
|
|
|
#drop features that are all label and start the model training.
|
|
X_data = data.drop('Label (1 Ransomware / 0 Goodware)', axis=1) # Features
|
|
X = X_data.drop('Ransomware Family', axis=1)
|
|
# X = X_data
|
|
# print(X)
|
|
y = data['Label (1 Ransomware / 0 Goodware)'] # Labels
|
|
# X = X.loc[:, (data != 0).any(axis=0)]
|
|
|
|
except KeyError as e:
|
|
print(f"Error: {e}")
|
|
print("Available columns:", data.columns)
|
|
raise
|
|
|
|
# Standardize the features
|
|
scaler = StandardScaler()
|
|
X = scaler.fit_transform(X)
|
|
|
|
# Make predictions
|
|
predictions = model.predict(X)
|
|
predicted_labels = (predictions > 0.5).astype(int)
|
|
true_labels = y.values
|
|
|
|
# Convert predictions to "Yes" or "No"
|
|
predicted_labels_text = ['Yes' if label == 1 else 'No' for label in predicted_labels.flatten()]
|
|
true_labels_text = ['Yes' if label == 1 else 'No' for label in true_labels]
|
|
|
|
# Get current timestamp
|
|
timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
|
|
|
|
# Evaluation metrics
|
|
accuracy = accuracy_score(true_labels, predicted_labels)
|
|
conf_matrix = confusion_matrix(true_labels, predicted_labels)
|
|
class_report = classification_report(true_labels, predicted_labels)
|
|
|
|
print(f"Test Accuracy ({timestamp}): {accuracy:.2f}")
|
|
print(f"\nConfusion Matrix ({timestamp}):")
|
|
print(conf_matrix)
|
|
print(f"\nClassification Report ({timestamp}):")
|
|
print(class_report)
|
|
|
|
# Print the first few predictions and true labels with timestamp
|
|
print(f"\nSample Predictions vs True Labels ({timestamp}):")
|
|
for i in range(10): # Adjust the range as needed
|
|
print(f"Sample {i}: Predicted = {predicted_labels_text[i]}, True = {true_labels_text[i]}")
|
|
|
|
# Save predictions and true labels to a CSV file with timestamp
|
|
output_df = pd.DataFrame({
|
|
'Timestamp': [timestamp] * len(predicted_labels_text), # Add timestamp column
|
|
'Predicted Label': predicted_labels_text,
|
|
'True Label': true_labels_text
|
|
})
|
|
|
|
output_file = f'prediction_{timestamp}.csv'
|
|
output_df.to_csv(output_file, index=False)
|
|
print(f"Predictions saved to {output_file} ({timestamp})")
|