web_defender/malware/views.py
2024-12-09 13:43:16 +05:30

536 lines
20 KiB
Python

import io
import os
import pickle
import numpy as np
import pandas as pd
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status
import seaborn as sns
import matplotlib.pyplot as plt
from django.core.files.storage import default_storage
from rest_framework.parsers import MultiPartParser
from django.conf import settings
from django.http import HttpResponse
from .models import MalwarePrediction
from .serializers import MalwarePredictionSerializer
class MalwarePredictionAPIView(APIView):
parser_classes = [MultiPartParser] # To handle file uploads
def post(self, request, *args, **kwargs):
if 'csv_file' not in request.FILES:
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
csv_file = request.FILES['csv_file']
try:
# Define the temp directory path
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
# Create the 'temp' directory if it doesn't exist
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Save the file temporarily
temp_file_path = os.path.join(temp_dir, csv_file.name)
with default_storage.open(temp_file_path, 'wb+') as destination:
for chunk in csv_file.chunks():
destination.write(chunk)
# Read the CSV file with headers
df = pd.read_csv(temp_file_path)
# Extract column names from the CSV
actual_columns = df.columns.tolist()
except Exception as e:
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# Define the expected column names
expected_columns = ['process_name', 'class', 'probability_of_malware']
# Mapping logic
if actual_columns != expected_columns:
# Map actual column names to expected ones
column_mapping = dict(zip(actual_columns, expected_columns))
df.rename(columns=column_mapping, inplace=True)
# Continue with processing the dataframe...
records_saved = 0
for _, row in df.iterrows():
process_name = row['process_name']
process_class = row['class']
probability = row['probability_of_malware']
# Save the row to the database
MalwarePrediction.objects.create(
process_name=process_name,
process_class=process_class,
probability_of_malware=probability,
)
records_saved += 1
return Response({"message": f"{records_saved} records saved successfully!"}, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs):
# Query all MalwarePrediction records from the database
predictions = MalwarePrediction.objects.all()
if not predictions.exists():
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
# Create a DataFrame from the queryset
data = {
'process_name': [p.process_name for p in predictions],
'class': [p.process_class for p in predictions],
'probability_of_malware': [p.probability_of_malware for p in predictions]
}
df = pd.DataFrame(data)
# Plot using seaborn or matplotlib
plt.figure(figsize=(10, 6))
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
sns.barplot(
data=df,
x='class', # Independent variable (x-axis)
y='probability_of_malware', # Dependent variable (y-axis)
ci=None, # No confidence intervals
palette='Set2' # Use a color palette for different classes
)
plt.title('Malware Probability by Class')
plt.xlabel('Class')
plt.ylabel('Probability of Malware')
plt.tight_layout()
# Save the plot to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
# Return the image as a response
return HttpResponse(buf, content_type='image/png')
class KNeighborsModelView(APIView):
parser_classes = [MultiPartParser] # To handle file uploads
def post(self, request, *args, **kwargs):
if 'csv_file' not in request.FILES:
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
csv_file = request.FILES['csv_file']
try:
# Define the temp directory path
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
# Create the 'temp' directory if it doesn't exist
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Save the file temporarily
temp_file_path = os.path.join(temp_dir, csv_file.name)
with default_storage.open(temp_file_path, 'wb+') as destination:
for chunk in csv_file.chunks():
destination.write(chunk)
# Read the CSV file with headers
df = pd.read_csv(temp_file_path)
# Extract column names from the CSV
actual_columns = df.columns.tolist()
except Exception as e:
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# Define the expected column names
expected_columns = ['process_name', 'class', 'probability_of_malware']
# Mapping logic
if actual_columns != expected_columns:
# Map actual column names to expected ones
column_mapping = dict(zip(actual_columns, expected_columns))
df.rename(columns=column_mapping, inplace=True)
# Continue with processing the dataframe...
records_saved = 0
for _, row in df.iterrows():
process_name = row['process_name']
process_class = row['class']
probability = row['probability_of_malware']
# Save the row to the database
MalwarePrediction.objects.create(
process_name=process_name,
process_class=process_class,
probability_of_malware=probability,
model_type=1
)
records_saved += 1
return Response({"message": " knn file saved successfully!"}, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs):
# Query all MalwarePrediction records from the database
predictions = MalwarePrediction.objects.filter(model_type=1)
if not predictions.exists():
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
# Create a DataFrame from the queryset
data = {
'process_name': [p.process_name for p in predictions],
'class': [p.process_class for p in predictions],
'probability_of_malware': [p.probability_of_malware for p in predictions]
}
df = pd.DataFrame(data)
# Plot using seaborn or matplotlib
plt.figure(figsize=(10, 6))
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
sns.barplot(
data=df,
x='class', # Independent variable (x-axis)
y='probability_of_malware', # Dependent variable (y-axis)
ci=None, # No confidence intervals
palette='Set2' # Use a color palette for different classes
)
plt.title('Malware Probability by Class')
plt.xlabel('Class')
plt.ylabel('Probability of Malware')
plt.tight_layout()
# Save the plot to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
# Return the image as a response
return HttpResponse(buf, content_type='image/png')
class RandomForestModelView(APIView):
parser_classes = [MultiPartParser] # To handle file uploads
def post(self, request, *args, **kwargs):
if 'csv_file' not in request.FILES:
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
csv_file = request.FILES['csv_file']
try:
# Define the temp directory path
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
# Create the 'temp' directory if it doesn't exist
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Save the file temporarily
temp_file_path = os.path.join(temp_dir, csv_file.name)
with default_storage.open(temp_file_path, 'wb+') as destination:
for chunk in csv_file.chunks():
destination.write(chunk)
# Read the CSV file with headers
df = pd.read_csv(temp_file_path)
# Extract column names from the CSV
actual_columns = df.columns.tolist()
except Exception as e:
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# Define the expected column names
expected_columns = ['process_name', 'class', 'probability_of_malware']
# Mapping logic
if actual_columns != expected_columns:
# Map actual column names to expected ones
column_mapping = dict(zip(actual_columns, expected_columns))
df.rename(columns=column_mapping, inplace=True)
# Continue with processing the dataframe...
records_saved = 0
for _, row in df.iterrows():
process_name = row['process_name']
process_class = row['class']
probability = row['probability_of_malware']
# Save the row to the database
MalwarePrediction.objects.create(
process_name=process_name,
process_class=process_class,
probability_of_malware=probability,
model_type=2
)
records_saved += 1
return Response({"message": " RandomForest file saved successfully!"}, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs):
# Query all MalwarePrediction records from the database
predictions = MalwarePrediction.objects.filter(model_type=2)
if not predictions.exists():
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
# Create a DataFrame from the queryset
data = {
'process_name': [p.process_name for p in predictions],
'class': [p.process_class for p in predictions],
'probability_of_malware': [p.probability_of_malware for p in predictions]
}
df = pd.DataFrame(data)
# Plot using seaborn or matplotlib
plt.figure(figsize=(10, 6))
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
sns.barplot(
data=df,
x='class', # Independent variable (x-axis)
y='probability_of_malware', # Dependent variable (y-axis)
ci=None, # No confidence intervals
palette='Set2' # Use a color palette for different classes
)
plt.title('Malware Probability by Class')
plt.xlabel('Class')
plt.ylabel('Probability of Malware')
plt.tight_layout()
# Save the plot to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
# Return the image as a response
return HttpResponse(buf, content_type='image/png')
class XGBModelView(APIView):
parser_classes = [MultiPartParser] # To handle file uploads
def post(self, request, *args, **kwargs):
if 'csv_file' not in request.FILES:
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
csv_file = request.FILES['csv_file']
try:
# Define the temp directory path
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
# Create the 'temp' directory if it doesn't exist
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Save the file temporarily
temp_file_path = os.path.join(temp_dir, csv_file.name)
with default_storage.open(temp_file_path, 'wb+') as destination:
for chunk in csv_file.chunks():
destination.write(chunk)
# Read the CSV file with headers
df = pd.read_csv(temp_file_path)
# Extract column names from the CSV
actual_columns = df.columns.tolist()
except Exception as e:
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# Define the expected column names
expected_columns = ['process_name', 'class', 'probability_of_malware']
# Mapping logic
if actual_columns != expected_columns:
# Map actual column names to expected ones
column_mapping = dict(zip(actual_columns, expected_columns))
df.rename(columns=column_mapping, inplace=True)
# Continue with processing the dataframe...
records_saved = 0
for _, row in df.iterrows():
process_name = row['process_name']
process_class = row['class']
probability = row['probability_of_malware']
# Save the row to the database
MalwarePrediction.objects.create(
process_name=process_name,
process_class=process_class,
probability_of_malware=probability,
model_type=3
)
records_saved += 1
return Response({"message": " XGB file saved successfully!"}, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs):
# Query all MalwarePrediction records from the database
predictions = MalwarePrediction.objects.filter(model_type=3)
if not predictions.exists():
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
# Create a DataFrame from the queryset
data = {
'process_name': [p.process_name for p in predictions],
'class': [p.process_class for p in predictions],
'probability_of_malware': [p.probability_of_malware for p in predictions]
}
df = pd.DataFrame(data)
# Plot using seaborn or matplotlib
plt.figure(figsize=(10, 6))
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
sns.barplot(
data=df,
x='class', # Independent variable (x-axis)
y='probability_of_malware', # Dependent variable (y-axis)
ci=None, # No confidence intervals
palette='Set2' # Use a color palette for different classes
)
plt.title('Malware Probability by Class')
plt.xlabel('Class')
plt.ylabel('Probability of Malware')
plt.tight_layout()
# Save the plot to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
# Return the image as a response
return HttpResponse(buf, content_type='image/png')
class SGDModelView(APIView):
parser_classes = [MultiPartParser] # To handle file uploads
def post(self, request, *args, **kwargs):
if 'csv_file' not in request.FILES:
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
csv_file = request.FILES['csv_file']
try:
# Define the temp directory path
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
# Create the 'temp' directory if it doesn't exist
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Save the file temporarily
temp_file_path = os.path.join(temp_dir, csv_file.name)
with default_storage.open(temp_file_path, 'wb+') as destination:
for chunk in csv_file.chunks():
destination.write(chunk)
# Read the CSV file with headers
df = pd.read_csv(temp_file_path)
# Extract column names from the CSV
actual_columns = df.columns.tolist()
except Exception as e:
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
# Define the expected column names
expected_columns = ['process_name', 'class', 'probability_of_malware']
# Mapping logic
if actual_columns != expected_columns:
# Map actual column names to expected ones
column_mapping = dict(zip(actual_columns, expected_columns))
df.rename(columns=column_mapping, inplace=True)
# Continue with processing the dataframe...
records_saved = 0
for _, row in df.iterrows():
process_name = row['process_name']
process_class = row['class']
probability = row['probability_of_malware']
# Save the row to the database
MalwarePrediction.objects.create(
process_name=process_name,
process_class=process_class,
probability_of_malware=probability,
model_type=4
)
records_saved += 1
return Response({"message": " SGD file saved successfully!"}, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs):
# Query all MalwarePrediction records from the database
predictions = MalwarePrediction.objects.filter(model_type=4)
if not predictions.exists():
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
# Create a DataFrame from the queryset
data = {
'process_name': [p.process_name for p in predictions],
'class': [p.process_class for p in predictions],
'probability_of_malware': [p.probability_of_malware for p in predictions]
}
df = pd.DataFrame(data)
# Plot using seaborn or matplotlib
plt.figure(figsize=(10, 6))
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
sns.barplot(
data=df,
x='class', # Independent variable (x-axis)
y='probability_of_malware', # Dependent variable (y-axis)
ci=None, # No confidence intervals
palette='Set2' # Use a color palette for different classes
)
plt.title('Malware Probability by Class')
plt.xlabel('Class')
plt.ylabel('Probability of Malware')
plt.tight_layout()
# Save the plot to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
# Return the image as a response
return HttpResponse(buf, content_type='image/png')