web_defender/malware/views.py

1213 lines
47 KiB
Python

import io
import os
import pickle
import numpy as np
import pandas as pd
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status
import seaborn as sns
import matplotlib.pyplot as plt
from django.core.files.storage import default_storage
from rest_framework.parsers import MultiPartParser
from django.conf import settings
from django.http import HttpResponse
from .models import MalwarePrediction,MalwarePredictionsDevice
from .serializers import MalwarePredictionSerializer
from Device.models import Devices
from Accounts.models import UserProfile
from django.utils import timezone
from django.http import JsonResponse
from django.utils.decorators import method_decorator
from django.views.decorators.csrf import csrf_exempt
class MalwarePredictionAPIView(APIView):
parser_classes = [MultiPartParser] # To handle file uploads
@staticmethod
def get_device_ids_by_user_id(user_id):
try:
# Get the UserProfile instance using the user ID
user_profile = UserProfile.objects.get(user__id=user_id)
print('user_profile', user_profile)
# Retrieve all Devices associated with this UserProfile
devices = Devices.objects.filter(used_by=user_profile)
print('devices', devices)
# Get the device IDs
device_ids = [device.id for device in devices]
return device_ids
except UserProfile.DoesNotExist:
return []
def post(self, request, *args, **kwargs):
if 'csv_file' not in request.FILES:
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
csv_file = request.FILES['csv_file']
user_id= request.data.get('user_id')
if not user_id :
return Response({"error": "User ID is required"}, status=status.HTTP_400_BAD_REQUEST)
device_ids = self.get_device_ids_by_user_id(user_id)
print(f"Device IDs: {device_ids}")
if not device_ids:
return Response({'error': 'No devices associated with the given user ID'}, status=status.HTTP_400_BAD_REQUEST)
try:
# device = Devices.objects.filter(id__in=device_ids).order_by('-created_at').first() # Use the first device ID
# Get the most recent device associated with the user
device = Devices.objects.get(id=device_ids[-1])
print(f"Device ID: {device.id}")
except Devices.DoesNotExist:
return Response({"error": "Device not found for the given device ID"}, status=status.HTTP_400_BAD_REQUEST)
try:
# Define the temp directory path
temp_dir = os.path.join(settings.MEDIA_ROOT, 'malware_predictions',str(device.id))
# Create the 'temp' directory if it doesn't exist
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Save the file temporarily
temp_file_path = os.path.join(temp_dir, csv_file.name)
with default_storage.open(temp_file_path, 'wb+') as destination:
for chunk in csv_file.chunks():
destination.write(chunk)
# Read the CSV file with headers
df = pd.read_csv(temp_file_path)
# Extract column names from the CSV
actual_columns = df.columns.tolist()
except Exception as e:
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# Define the expected column names
expected_columns = ['process_name', 'class', 'probability_of_malware']
# Mapping logic
if actual_columns != expected_columns:
# Map actual column names to expected ones
column_mapping = dict(zip(actual_columns, expected_columns))
df.rename(columns=column_mapping, inplace=True)
# Continue with processing the dataframe...
records_saved = 0
for _, row in df.iterrows():
process_name = row['process_name']
process_class = row['class']
probability = row['probability_of_malware']
MalwarePrediction.objects.create(
process_name=process_name,
process_class=process_class,
probability_of_malware=probability,
)
records_saved += 1
# print(data_sent,"dataaaaaa")
return Response({"message": f"{records_saved} records saved successfully!"}, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs):
# Query all MalwarePrediction records from the database
predictions = MalwarePrediction.objects.all()
if not predictions.exists():
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
# Create a DataFrame from the queryset
data = {
'process_name': [p.process_name for p in predictions],
'class': [p.process_class for p in predictions],
'probability_of_malware': [p.probability_of_malware for p in predictions]
}
df = pd.DataFrame(data)
# Plot using seaborn or matplotlib
plt.figure(figsize=(10, 6))
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
sns.barplot(
data=df,
x='class', # Independent variable (x-axis)
y='probability_of_malware', # Dependent variable (y-axis)
ci=None, # No confidence intervals
palette='Set2' # Use a color palette for different classes
)
plt.title('Malware Probability by Class')
plt.xlabel('Class')
plt.ylabel('Probability of Malware')
plt.tight_layout()
# Save the plot to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
# Return the image as a response
return HttpResponse(buf, content_type='image/png')
# class MalwarePredictionAPIView(APIView):
# parser_classes = [MultiPartParser] # To handle file uploads
# @staticmethod
# def get_device_ids_by_user_id(user_id):
# try:
# # Get the UserProfile instance using the user ID
# user_profile = UserProfile.objects.get(user__id=user_id)
# print('user_profile', user_profile)
# # Retrieve all Devices associated with this UserProfile
# devices = Devices.objects.filter(used_by=user_profile)
# print('devices', devices)
# # Get the device IDs
# device_ids = [device.id for device in devices]
# return device_ids
# except UserProfile.DoesNotExist:
# return []
# def post(self, request, *args, **kwargs):
# if 'csv_file' not in request.FILES:
# return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
# csv_file = request.FILES['csv_file']
# # Retrieve user ID from the request
# user_id = request.data.get('user_id') # Ensure 'user_id' is being sent in the request body
# print("user_id ", user_id)
# if not user_id:
# return Response({"error": "User ID is required"}, status=status.HTTP_400_BAD_REQUEST)
# # Get the device IDs associated with the user
# device_ids = self.get_device_ids_by_user_id(user_id)
# print(f"Device IDs: {device_ids}")
# # Fetch the first associated device for the user
# if not device_ids:
# return Response({'error': 'No devices associated with the given user ID'}, status=status.HTTP_400_BAD_REQUEST)
# try:
# # device = Devices.objects.filter(id__in=device_ids).order_by('-created_at').first() # Use the first device ID
# device = Devices.objects.filter(used_by__user=request.user).order_by('-id').first()
# print(f"Device ID: {device.id}")
# except Devices.DoesNotExist:
# return Response({"error": "Device not found for the given device ID"}, status=status.HTTP_400_BAD_REQUEST)
# try:
# user_profile = UserProfile.objects.get(user__id=user_id)
# print(user_profile)
# except UserProfile.DoesNotExist:
# return Response({"error": "User profile not found"}, status=status.HTTP_400_BAD_REQUEST)
# try:
# # Define the temp directory path using the device ID
# temp_dir = os.path.join(settings.MEDIA_ROOT, 'malware_predictions')
# # Create the temp directory if it doesn't exist
# if not os.path.exists(temp_dir):
# os.makedirs(temp_dir)
# # Save the file temporarily
# temp_file_path = os.path.join(temp_dir, csv_file.name)
# with default_storage.open(temp_file_path, 'wb+') as destination:
# for chunk in csv_file.chunks():
# destination.write(chunk)
# # Read the CSV file with headers
# df = pd.read_csv(temp_file_path)
# # Extract column names from the CSV
# actual_columns = df.columns.tolist()
# except Exception as e:
# return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# # Define the expected column names
# expected_columns = ['process_name', 'class', 'probability_of_malware']
# # Mapping logic
# if actual_columns != expected_columns:
# # Map actual column names to expected ones
# column_mapping = dict(zip(actual_columns, expected_columns))
# df.rename(columns=column_mapping, inplace=True)
# # Save the data to the database
# records_saved = 0
# for _, row in df.iterrows():
# try:
# process_name = row['process_name']
# process_class = row['class']
# probability = float(row['probability_of_malware']) # Ensure it's a number
# except ValueError:
# return Response({
# "error": f"Invalid value in 'probability_of_malware': {row['probability_of_malware']}"
# }, status=status.HTTP_400_BAD_REQUEST)
# # MalwarePredictionsDevice.objects.create(
# # device=device, # Pass the Devices instance here
# # user=user_profile, # This will reference the user related to the device
# # file_path=temp_file_path, # The path to the uploaded file
# # )
# MalwarePrediction.objects.create(
# process_name=process_name,
# process_class=process_class,
# probability_of_malware=probability,
# )
# records_saved += 1
# return Response({
# "message": f"{records_saved} records saved successfully!",
# }, status=status.HTTP_201_CREATED)
# class MalwarePredictionAPIView(APIView):
# parser_classes = [MultiPartParser] # To handle file uploads
# @staticmethod
# def get_device_ids_by_user_id(user_id):
# try:
# # Get the UserProfile instance using the user ID
# user_profile = UserProfile.objects.get(user__id=user_id)
# print('user_profile', user_profile)
# # Retrieve all Devices associated with this UserProfile
# devices = Devices.objects.filter(used_by=user_profile)
# print('devices', devices)
# # Get the device IDs
# device_ids = [device.id for device in devices]
# return device_ids
# except UserProfile.DoesNotExist:
# return []
# def post(self, request, *args, **kwargs):
# if 'csv_file' not in request.FILES:
# return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
# csv_file = request.FILES.get('csv_file')
# if not csv_file:
# return Response({"error": "No CSV file provided"}, status=status.HTTP_400_BAD_REQUEST)
# user_id = request.data.get('user_id')
# if not user_id:
# return Response({"error": "User ID is required"}, status=status.HTTP_400_BAD_REQUEST)
# # Retrieve associated device IDs for the user
# device_ids = self.get_device_ids_by_user_id(user_id)
# print(device_ids)
# if not device_ids:
# return Response({"error": "No devices associated with the given user ID"}, status=status.HTTP_400_BAD_REQUEST)
# # Try to get therecent device associated with the user
# try:
# device = Devices.objects.get(id=device_ids[-1])
# except Devices.DoesNotExist:
# return Response({"error": "Device not found for the given device ID"}, status=status.HTTP_400_BAD_REQUEST)
# # Define the temp directory path for saving the file
# temp_dir = os.path.join(settings.MEDIA_ROOT, 'malware_predictions', f'device_{device.id}')
# # Create the 'temp' directory if it doesn't exist
# if not os.path.exists(temp_dir):
# os.makedirs(temp_dir)
# # Save the file temporarily
# temp_file_path = os.path.join(temp_dir, csv_file.name)
# try:
# with default_storage.open(temp_file_path, 'wb+') as destination:
# for chunk in csv_file.chunks():
# destination.write(chunk)
# except Exception as e:
# return Response({"error": "Failed to save the file", "details": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
# # Read the CSV file with headers
# try:
# df = pd.read_csv(temp_file_path)
# actual_columns = df.columns.tolist()
# except Exception as e:
# return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# # Define the expected column names
# expected_columns = ['process_name', 'class', 'probability_of_malware']
# # Validate and map columns
# if actual_columns != expected_columns:
# if len(actual_columns) == len(expected_columns):
# column_mapping = dict(zip(actual_columns, expected_columns))
# df.rename(columns=column_mapping, inplace=True)
# else:
# return Response({"error": "CSV columns do not match expected format"}, status=status.HTTP_400_BAD_REQUEST)
# # Ensure the user profile exists
# try:
# user_profile = UserProfile.objects.get(user__id=user_id)
# except UserProfile.DoesNotExist:
# return Response({"error": "User profile not found"}, status=status.HTTP_400_BAD_REQUEST)
# # Save the predictions and create the related record
# records_saved = 0
# for _, row in df.iterrows():
# process_name = row['process_name']
# process_class = row['class']
# probability = row['probability_of_malware']
# try:
# # Save malware prediction
# MalwarePrediction.objects.create(
# process_name=process_name,
# process_class=process_class,
# probability_of_malware=probability,
# )
# # Save the device association
# MalwarePredictionsDevice.objects.create(
# device=device,
# user=user_profile,
# file_path=temp_file_path,
# )
# records_saved += 1
# except Exception as e:
# return Response({"error": "Failed to save record", "details": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
# return Response({
# "message": f"{records_saved} records saved successfully!"
# }, status=status.HTTP_201_CREATED)
class KNeighborsModelView(APIView):
parser_classes = [MultiPartParser] # To handle file uploads
def post(self, request, *args, **kwargs):
if 'csv_file' not in request.FILES:
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
csv_file = request.FILES['csv_file']
try:
# Define the temp directory path
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
# Create the 'temp' directory if it doesn't exist
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Save the file temporarily
temp_file_path = os.path.join(temp_dir, csv_file.name)
with default_storage.open(temp_file_path, 'wb+') as destination:
for chunk in csv_file.chunks():
destination.write(chunk)
# Read the CSV file with headers
df = pd.read_csv(temp_file_path)
# Extract column names from the CSV
actual_columns = df.columns.tolist()
except Exception as e:
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# Define the expected column names
expected_columns = ['process_name', 'class', 'probability_of_malware']
# Mapping logic
if actual_columns != expected_columns:
# Map actual column names to expected ones
column_mapping = dict(zip(actual_columns, expected_columns))
df.rename(columns=column_mapping, inplace=True)
# Continue with processing the dataframe...
records_saved = 0
for _, row in df.iterrows():
process_name = row['process_name']
process_class = row['class']
probability = row['probability_of_malware']
# Save the row to the database
MalwarePrediction.objects.create(
process_name=process_name,
process_class=process_class,
probability_of_malware=probability,
model_type=1
)
records_saved += 1
return Response({"message": " knn file saved successfully!"}, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs):
# Query all MalwarePrediction records from the database
predictions = MalwarePrediction.objects.filter(model_type=1)
if not predictions.exists():
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
# Create a DataFrame from the queryset
data = {
'process_name': [p.process_name for p in predictions],
'class': [p.process_class for p in predictions],
'probability_of_malware': [p.probability_of_malware for p in predictions]
}
df = pd.DataFrame(data)
# Plot using seaborn or matplotlib
plt.figure(figsize=(10, 6))
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
sns.barplot(
data=df,
x='class', # Independent variable (x-axis)
y='probability_of_malware', # Dependent variable (y-axis)
ci=None, # No confidence intervals
palette='Set2' # Use a color palette for different classes
)
plt.title('Malware Probability by Class')
plt.xlabel('Class')
plt.ylabel('Probability of Malware')
plt.tight_layout()
# Save the plot to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
# Return the image as a response
return HttpResponse(buf, content_type='image/png')
class RandomForestModelView(APIView):
parser_classes = [MultiPartParser] # To handle file uploads
def post(self, request, *args, **kwargs):
if 'csv_file' not in request.FILES:
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
csv_file = request.FILES['csv_file']
try:
# Define the temp directory path
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
# Create the 'temp' directory if it doesn't exist
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Save the file temporarily
temp_file_path = os.path.join(temp_dir, csv_file.name)
with default_storage.open(temp_file_path, 'wb+') as destination:
for chunk in csv_file.chunks():
destination.write(chunk)
# Read the CSV file with headers
df = pd.read_csv(temp_file_path)
# Extract column names from the CSV
actual_columns = df.columns.tolist()
except Exception as e:
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# Define the expected column names
expected_columns = ['process_name', 'class', 'probability_of_malware']
# Mapping logic
if actual_columns != expected_columns:
# Map actual column names to expected ones
column_mapping = dict(zip(actual_columns, expected_columns))
df.rename(columns=column_mapping, inplace=True)
# Continue with processing the dataframe...
records_saved = 0
for _, row in df.iterrows():
process_name = row['process_name']
process_class = row['class']
probability = row['probability_of_malware']
# Save the row to the database
MalwarePrediction.objects.create(
process_name=process_name,
process_class=process_class,
probability_of_malware=probability,
model_type=2
)
records_saved += 1
return Response({"message": " RandomForest file saved successfully!"}, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs):
# Query all MalwarePrediction records from the database
predictions = MalwarePrediction.objects.filter(model_type=2)
if not predictions.exists():
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
# Create a DataFrame from the queryset
data = {
'process_name': [p.process_name for p in predictions],
'class': [p.process_class for p in predictions],
'probability_of_malware': [p.probability_of_malware for p in predictions]
}
df = pd.DataFrame(data)
# Plot using seaborn or matplotlib
plt.figure(figsize=(10, 6))
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
sns.barplot(
data=df,
x='class', # Independent variable (x-axis)
y='probability_of_malware', # Dependent variable (y-axis)
ci=None, # No confidence intervals
palette='Set2' # Use a color palette for different classes
)
plt.title('Malware Probability by Class')
plt.xlabel('Class')
plt.ylabel('Probability of Malware')
plt.tight_layout()
# Save the plot to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
# Return the image as a response
return HttpResponse(buf, content_type='image/png')
class XGBModelView(APIView):
parser_classes = [MultiPartParser] # To handle file uploads
def post(self, request, *args, **kwargs):
if 'csv_file' not in request.FILES:
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
csv_file = request.FILES['csv_file']
try:
# Define the temp directory path
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
# Create the 'temp' directory if it doesn't exist
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Save the file temporarily
temp_file_path = os.path.join(temp_dir, csv_file.name)
with default_storage.open(temp_file_path, 'wb+') as destination:
for chunk in csv_file.chunks():
destination.write(chunk)
# Read the CSV file with headers
df = pd.read_csv(temp_file_path)
# Extract column names from the CSV
actual_columns = df.columns.tolist()
except Exception as e:
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# Define the expected column names
expected_columns = ['process_name', 'class', 'probability_of_malware']
# Mapping logic
if actual_columns != expected_columns:
# Map actual column names to expected ones
column_mapping = dict(zip(actual_columns, expected_columns))
df.rename(columns=column_mapping, inplace=True)
# Continue with processing the dataframe...
records_saved = 0
for _, row in df.iterrows():
process_name = row['process_name']
process_class = row['class']
probability = row['probability_of_malware']
# Save the row to the database
MalwarePrediction.objects.create(
process_name=process_name,
process_class=process_class,
probability_of_malware=probability,
model_type=3
)
records_saved += 1
return Response({"message": " XGB file saved successfully!"}, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs):
# Query all MalwarePrediction records from the database
predictions = MalwarePrediction.objects.filter(model_type=3)
if not predictions.exists():
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
# Create a DataFrame from the queryset
data = {
'process_name': [p.process_name for p in predictions],
'class': [p.process_class for p in predictions],
'probability_of_malware': [p.probability_of_malware for p in predictions]
}
df = pd.DataFrame(data)
# Plot using seaborn or matplotlib
plt.figure(figsize=(10, 6))
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
sns.barplot(
data=df,
x='class', # Independent variable (x-axis)
y='probability_of_malware', # Dependent variable (y-axis)
ci=None, # No confidence intervals
palette='Set2' # Use a color palette for different classes
)
plt.title('Malware Probability by Class')
plt.xlabel('Class')
plt.ylabel('Probability of Malware')
plt.tight_layout()
# Save the plot to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
# Return the image as a response
return HttpResponse(buf, content_type='image/png')
class SGDModelView(APIView):
parser_classes = [MultiPartParser] # To handle file uploads
def post(self, request, *args, **kwargs):
if 'csv_file' not in request.FILES:
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
csv_file = request.FILES['csv_file']
try:
# Define the temp directory path
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
# Create the 'temp' directory if it doesn't exist
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Save the file temporarily
temp_file_path = os.path.join(temp_dir, csv_file.name)
with default_storage.open(temp_file_path, 'wb+') as destination:
for chunk in csv_file.chunks():
destination.write(chunk)
# Read the CSV file with headers
df = pd.read_csv(temp_file_path)
# Extract column names from the CSV
actual_columns = df.columns.tolist()
except Exception as e:
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# Define the expected column names
expected_columns = ['process_name', 'class', 'probability_of_malware']
# Mapping logic
if actual_columns != expected_columns:
# Map actual column names to expected ones
column_mapping = dict(zip(actual_columns, expected_columns))
df.rename(columns=column_mapping, inplace=True)
# Continue with processing the dataframe...
records_saved = 0
for _, row in df.iterrows():
process_name = row['process_name']
process_class = row['class']
probability = row['probability_of_malware']
# Save the row to the database
MalwarePrediction.objects.create(
process_name=process_name,
process_class=process_class,
probability_of_malware=probability,
model_type=4
)
records_saved += 1
return Response({"message": " SGD file saved successfully!"}, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs):
# Query all MalwarePrediction records from the database
predictions = MalwarePrediction.objects.filter(model_type=4)
if not predictions.exists():
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
# Create a DataFrame from the queryset
data = {
'process_name': [p.process_name for p in predictions],
'class': [p.process_class for p in predictions],
'probability_of_malware': [p.probability_of_malware for p in predictions]
}
df = pd.DataFrame(data)
# Plot using seaborn or matplotlib
plt.figure(figsize=(10, 6))
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
sns.barplot(
data=df,
x='class', # Independent variable (x-axis)
y='probability_of_malware', # Dependent variable (y-axis)
ci=None, # No confidence intervals
palette='Set2' # Use a color palette for different classes
)
plt.title('Malware Probability by Class')
plt.xlabel('Class')
plt.ylabel('Probability of Malware')
plt.tight_layout()
# Save the plot to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
# Return the image as a response
return HttpResponse(buf, content_type='image/png')
class KNeighborsModelView(APIView):
parser_classes = [MultiPartParser] # To handle file uploads
def post(self, request, *args, **kwargs):
if 'csv_file' not in request.FILES:
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
csv_file = request.FILES['csv_file']
try:
# Define the temp directory path
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
# Create the 'temp' directory if it doesn't exist
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Save the file temporarily
temp_file_path = os.path.join(temp_dir, csv_file.name)
with default_storage.open(temp_file_path, 'wb+') as destination:
for chunk in csv_file.chunks():
destination.write(chunk)
# Read the CSV file with headers
df = pd.read_csv(temp_file_path)
# Extract column names from the CSV
actual_columns = df.columns.tolist()
except Exception as e:
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# Define the expected column names
expected_columns = ['process_name', 'class', 'probability_of_malware']
# Mapping logic
if actual_columns != expected_columns:
# Map actual column names to expected ones
column_mapping = dict(zip(actual_columns, expected_columns))
df.rename(columns=column_mapping, inplace=True)
# Continue with processing the dataframe...
records_saved = 0
for _, row in df.iterrows():
process_name = row['process_name']
process_class = row['class']
probability = row['probability_of_malware']
# Save the row to the database
MalwarePrediction.objects.create(
process_name=process_name,
process_class=process_class,
probability_of_malware=probability,
model_type=1
)
records_saved += 1
return Response({"message": " knn file saved successfully!"}, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs):
# Query all MalwarePrediction records from the database
predictions = MalwarePrediction.objects.filter(model_type=1)
if not predictions.exists():
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
# Create a DataFrame from the queryset
data = {
'process_name': [p.process_name for p in predictions],
'class': [p.process_class for p in predictions],
'probability_of_malware': [p.probability_of_malware for p in predictions]
}
df = pd.DataFrame(data)
# Plot using seaborn or matplotlib
plt.figure(figsize=(10, 6))
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
sns.barplot(
data=df,
x='class', # Independent variable (x-axis)
y='probability_of_malware', # Dependent variable (y-axis)
ci=None, # No confidence intervals
palette='Set2' # Use a color palette for different classes
)
plt.title('Malware Probability by Class')
plt.xlabel('Class')
plt.ylabel('Probability of Malware')
plt.tight_layout()
# Save the plot to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
# Return the image as a response
return HttpResponse(buf, content_type='image/png')
class RandomForestModelView(APIView):
parser_classes = [MultiPartParser] # To handle file uploads
def post(self, request, *args, **kwargs):
if 'csv_file' not in request.FILES:
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
csv_file = request.FILES['csv_file']
try:
# Define the temp directory path
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
# Create the 'temp' directory if it doesn't exist
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Save the file temporarily
temp_file_path = os.path.join(temp_dir, csv_file.name)
with default_storage.open(temp_file_path, 'wb+') as destination:
for chunk in csv_file.chunks():
destination.write(chunk)
# Read the CSV file with headers
df = pd.read_csv(temp_file_path)
# Extract column names from the CSV
actual_columns = df.columns.tolist()
except Exception as e:
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# Define the expected column names
expected_columns = ['process_name', 'class', 'probability_of_malware']
# Mapping logic
if actual_columns != expected_columns:
# Map actual column names to expected ones
column_mapping = dict(zip(actual_columns, expected_columns))
df.rename(columns=column_mapping, inplace=True)
# Continue with processing the dataframe...
records_saved = 0
for _, row in df.iterrows():
process_name = row['process_name']
process_class = row['class']
probability = row['probability_of_malware']
# Save the row to the database
MalwarePrediction.objects.create(
process_name=process_name,
process_class=process_class,
probability_of_malware=probability,
model_type=2
)
records_saved += 1
return Response({"message": " RandomForest file saved successfully!"}, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs):
# Query all MalwarePrediction records from the database
predictions = MalwarePrediction.objects.filter(model_type=2)
if not predictions.exists():
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
# Create a DataFrame from the queryset
data = {
'process_name': [p.process_name for p in predictions],
'class': [p.process_class for p in predictions],
'probability_of_malware': [p.probability_of_malware for p in predictions]
}
df = pd.DataFrame(data)
# Plot using seaborn or matplotlib
plt.figure(figsize=(10, 6))
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
sns.barplot(
data=df,
x='class', # Independent variable (x-axis)
y='probability_of_malware', # Dependent variable (y-axis)
ci=None, # No confidence intervals
palette='Set2' # Use a color palette for different classes
)
plt.title('Malware Probability by Class')
plt.xlabel('Class')
plt.ylabel('Probability of Malware')
plt.tight_layout()
# Save the plot to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
# Return the image as a response
return HttpResponse(buf, content_type='image/png')
class XGBModelView(APIView):
parser_classes = [MultiPartParser] # To handle file uploads
def post(self, request, *args, **kwargs):
if 'csv_file' not in request.FILES:
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
csv_file = request.FILES['csv_file']
try:
# Define the temp directory path
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
# Create the 'temp' directory if it doesn't exist
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Save the file temporarily
temp_file_path = os.path.join(temp_dir, csv_file.name)
with default_storage.open(temp_file_path, 'wb+') as destination:
for chunk in csv_file.chunks():
destination.write(chunk)
# Read the CSV file with headers
df = pd.read_csv(temp_file_path)
# Extract column names from the CSV
actual_columns = df.columns.tolist()
except Exception as e:
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# Define the expected column names
expected_columns = ['process_name', 'class', 'probability_of_malware']
# Mapping logic
if actual_columns != expected_columns:
# Map actual column names to expected ones
column_mapping = dict(zip(actual_columns, expected_columns))
df.rename(columns=column_mapping, inplace=True)
# Continue with processing the dataframe...
records_saved = 0
for _, row in df.iterrows():
process_name = row['process_name']
process_class = row['class']
probability = row['probability_of_malware']
# Save the row to the database
MalwarePrediction.objects.create(
process_name=process_name,
process_class=process_class,
probability_of_malware=probability,
model_type=3
)
records_saved += 1
return Response({"message": " XGB file saved successfully!"}, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs):
# Query all MalwarePrediction records from the database
predictions = MalwarePrediction.objects.filter(model_type=3)
if not predictions.exists():
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
# Create a DataFrame from the queryset
data = {
'process_name': [p.process_name for p in predictions],
'class': [p.process_class for p in predictions],
'probability_of_malware': [p.probability_of_malware for p in predictions]
}
df = pd.DataFrame(data)
# Plot using seaborn or matplotlib
plt.figure(figsize=(10, 6))
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
sns.barplot(
data=df,
x='class', # Independent variable (x-axis)
y='probability_of_malware', # Dependent variable (y-axis)
ci=None, # No confidence intervals
palette='Set2' # Use a color palette for different classes
)
plt.title('Malware Probability by Class')
plt.xlabel('Class')
plt.ylabel('Probability of Malware')
plt.tight_layout()
# Save the plot to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
# Return the image as a response
return HttpResponse(buf, content_type='image/png')
class SGDModelView(APIView):
parser_classes = [MultiPartParser] # To handle file uploads
def post(self, request, *args, **kwargs):
if 'csv_file' not in request.FILES:
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
csv_file = request.FILES['csv_file']
try:
# Define the temp directory path
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
# Create the 'temp' directory if it doesn't exist
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Save the file temporarily
temp_file_path = os.path.join(temp_dir, csv_file.name)
with default_storage.open(temp_file_path, 'wb+') as destination:
for chunk in csv_file.chunks():
destination.write(chunk)
# Read the CSV file with headers
df = pd.read_csv(temp_file_path)
# Extract column names from the CSV
actual_columns = df.columns.tolist()
except Exception as e:
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# Define the expected column names
expected_columns = ['process_name', 'class', 'probability_of_malware']
# Mapping logic
if actual_columns != expected_columns:
# Map actual column names to expected ones
column_mapping = dict(zip(actual_columns, expected_columns))
df.rename(columns=column_mapping, inplace=True)
# Continue with processing the dataframe...
records_saved = 0
for _, row in df.iterrows():
process_name = row['process_name']
process_class = row['class']
probability = row['probability_of_malware']
# Save the row to the database
MalwarePrediction.objects.create(
process_name=process_name,
process_class=process_class,
probability_of_malware=probability,
model_type=4
)
records_saved += 1
return Response({"message": " SGD file saved successfully!"}, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs):
# Query all MalwarePrediction records from the database
predictions = MalwarePrediction.objects.filter(model_type=4)
if not predictions.exists():
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
# Create a DataFrame from the queryset
data = {
'process_name': [p.process_name for p in predictions],
'class': [p.process_class for p in predictions],
'probability_of_malware': [p.probability_of_malware for p in predictions]
}
df = pd.DataFrame(data)
# Plot using seaborn or matplotlib
plt.figure(figsize=(10, 6))
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
sns.barplot(
data=df,
x='class', # Independent variable (x-axis)
y='probability_of_malware', # Dependent variable (y-axis)
ci=None, # No confidence intervals
palette='Set2' # Use a color palette for different classes
)
plt.title('Malware Probability by Class')
plt.xlabel('Class')
plt.ylabel('Probability of Malware')
plt.tight_layout()
# Save the plot to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
# Return the image as a response
return HttpResponse(buf, content_type='image/png')