import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler import tensorflow as tf from sklearn.metrics import accuracy_score, confusion_matrix, classification_report from datetime import datetime TEST_DATA_PATH = 'combined_log_summary.csv' VARIABLE_NAMES_PATH = 'output.txt' # Load the trained model model = tf.keras.models.load_model('updated_ransomware_classifier.h5') # Load and prepare test data # Read variable names with open(VARIABLE_NAMES_PATH, encoding='utf-8') as f: columns = [line.split(';')[1].strip() for line in f] # Load test data data = pd.read_csv(TEST_DATA_PATH, header=None, names=columns) # Check and clean column names data.columns = data.columns.str.strip() print("Columns in DataFrame:", data.columns) # Drop features that are all zero and label column try: # data = data.loc[:, (data != 0).any(axis=0)] #drop features that are all label and start the model training. X_data = data.drop('Label (1 Ransomware / 0 Goodware)', axis=1) # Features X = X_data.drop('Ransomware Family', axis=1) # X = X_data # print(X) y = data['Label (1 Ransomware / 0 Goodware)'] # Labels # X = X.loc[:, (data != 0).any(axis=0)] except KeyError as e: print(f"Error: {e}") print("Available columns:", data.columns) raise # Standardize the features scaler = StandardScaler() X = scaler.fit_transform(X) # Make predictions predictions = model.predict(X) predicted_labels = (predictions > 0.5).astype(int) true_labels = y.values # Convert predictions to "Yes" or "No" predicted_labels_text = ['Yes' if label == 1 else 'No' for label in predicted_labels.flatten()] true_labels_text = ['Yes' if label == 1 else 'No' for label in true_labels] # Get current timestamp timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') # Evaluation metrics accuracy = accuracy_score(true_labels, predicted_labels) conf_matrix = confusion_matrix(true_labels, predicted_labels) class_report = classification_report(true_labels, predicted_labels) print(f"Test Accuracy ({timestamp}): {accuracy:.2f}") print(f"\nConfusion Matrix ({timestamp}):") print(conf_matrix) print(f"\nClassification Report ({timestamp}):") print(class_report) # Print the first few predictions and true labels with timestamp print(f"\nSample Predictions vs True Labels ({timestamp}):") for i in range(10): # Adjust the range as needed print(f"Sample {i}: Predicted = {predicted_labels_text[i]}, True = {true_labels_text[i]}") # Save predictions and true labels to a CSV file with timestamp output_df = pd.DataFrame({ 'Timestamp': [timestamp] * len(predicted_labels_text), # Add timestamp column 'Predicted Label': predicted_labels_text, 'True Label': true_labels_text }) output_file = f'prediction_{timestamp}.csv' output_df.to_csv(output_file, index=False) print(f"Predictions saved to {output_file} ({timestamp})")