Final_Installer_Merged/test_data.py
2024-10-25 11:19:11 +05:30

85 lines
2.8 KiB
Python

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from datetime import datetime
TEST_DATA_PATH = 'combined_log_summary.csv'
VARIABLE_NAMES_PATH = 'output.txt'
# Load the trained model
model = tf.keras.models.load_model('updated_ransomware_classifier.h5')
# Load and prepare test data
# Read variable names
with open(VARIABLE_NAMES_PATH, encoding='utf-8') as f:
columns = [line.split(';')[1].strip() for line in f]
# Load test data
data = pd.read_csv(TEST_DATA_PATH, header=None, names=columns)
# Check and clean column names
data.columns = data.columns.str.strip()
print("Columns in DataFrame:", data.columns)
# Drop features that are all zero and label column
try:
# data = data.loc[:, (data != 0).any(axis=0)]
#drop features that are all label and start the model training.
X_data = data.drop('Label (1 Ransomware / 0 Goodware)', axis=1) # Features
X = X_data.drop('Ransomware Family', axis=1)
# X = X_data
# print(X)
y = data['Label (1 Ransomware / 0 Goodware)'] # Labels
# X = X.loc[:, (data != 0).any(axis=0)]
except KeyError as e:
print(f"Error: {e}")
print("Available columns:", data.columns)
raise
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)
# Make predictions
predictions = model.predict(X)
predicted_labels = (predictions > 0.5).astype(int)
true_labels = y.values
# Convert predictions to "Yes" or "No"
predicted_labels_text = ['Yes' if label == 1 else 'No' for label in predicted_labels.flatten()]
true_labels_text = ['Yes' if label == 1 else 'No' for label in true_labels]
# Get current timestamp
timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
# Evaluation metrics
accuracy = accuracy_score(true_labels, predicted_labels)
conf_matrix = confusion_matrix(true_labels, predicted_labels)
class_report = classification_report(true_labels, predicted_labels)
print(f"Test Accuracy ({timestamp}): {accuracy:.2f}")
print(f"\nConfusion Matrix ({timestamp}):")
print(conf_matrix)
print(f"\nClassification Report ({timestamp}):")
print(class_report)
# Print the first few predictions and true labels with timestamp
print(f"\nSample Predictions vs True Labels ({timestamp}):")
for i in range(10): # Adjust the range as needed
print(f"Sample {i}: Predicted = {predicted_labels_text[i]}, True = {true_labels_text[i]}")
# Save predictions and true labels to a CSV file with timestamp
output_df = pd.DataFrame({
'Timestamp': [timestamp] * len(predicted_labels_text), # Add timestamp column
'Predicted Label': predicted_labels_text,
'True Label': true_labels_text
})
output_file = f'prediction_{timestamp}.csv'
output_df.to_csv(output_file, index=False)
print(f"Predictions saved to {output_file} ({timestamp})")