536 lines
20 KiB
Python
536 lines
20 KiB
Python
|
|
|
|
|
|
|
|
|
|
import io
|
|
import os
|
|
import pickle
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
from rest_framework.views import APIView
|
|
from rest_framework.response import Response
|
|
from rest_framework import status
|
|
import seaborn as sns
|
|
import matplotlib.pyplot as plt
|
|
from django.core.files.storage import default_storage
|
|
from rest_framework.parsers import MultiPartParser
|
|
from django.conf import settings
|
|
from django.http import HttpResponse
|
|
from .models import MalwarePrediction
|
|
from .serializers import MalwarePredictionSerializer
|
|
|
|
class MalwarePredictionAPIView(APIView):
|
|
parser_classes = [MultiPartParser] # To handle file uploads
|
|
|
|
|
|
def post(self, request, *args, **kwargs):
|
|
if 'csv_file' not in request.FILES:
|
|
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
csv_file = request.FILES['csv_file']
|
|
|
|
try:
|
|
# Define the temp directory path
|
|
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
|
|
|
|
# Create the 'temp' directory if it doesn't exist
|
|
if not os.path.exists(temp_dir):
|
|
os.makedirs(temp_dir)
|
|
|
|
# Save the file temporarily
|
|
temp_file_path = os.path.join(temp_dir, csv_file.name)
|
|
with default_storage.open(temp_file_path, 'wb+') as destination:
|
|
for chunk in csv_file.chunks():
|
|
destination.write(chunk)
|
|
|
|
# Read the CSV file with headers
|
|
df = pd.read_csv(temp_file_path)
|
|
|
|
# Extract column names from the CSV
|
|
actual_columns = df.columns.tolist()
|
|
|
|
except Exception as e:
|
|
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
# Define the expected column names
|
|
expected_columns = ['process_name', 'class', 'probability_of_malware']
|
|
|
|
# Mapping logic
|
|
if actual_columns != expected_columns:
|
|
# Map actual column names to expected ones
|
|
column_mapping = dict(zip(actual_columns, expected_columns))
|
|
df.rename(columns=column_mapping, inplace=True)
|
|
|
|
# Continue with processing the dataframe...
|
|
records_saved = 0
|
|
for _, row in df.iterrows():
|
|
process_name = row['process_name']
|
|
process_class = row['class']
|
|
probability = row['probability_of_malware']
|
|
|
|
# Save the row to the database
|
|
MalwarePrediction.objects.create(
|
|
process_name=process_name,
|
|
process_class=process_class,
|
|
probability_of_malware=probability,
|
|
)
|
|
records_saved += 1
|
|
|
|
return Response({"message": f"{records_saved} records saved successfully!"}, status=status.HTTP_201_CREATED)
|
|
|
|
|
|
|
|
|
|
|
|
def get(self, request, *args, **kwargs):
|
|
# Query all MalwarePrediction records from the database
|
|
predictions = MalwarePrediction.objects.all()
|
|
|
|
if not predictions.exists():
|
|
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
|
|
|
|
# Create a DataFrame from the queryset
|
|
data = {
|
|
'process_name': [p.process_name for p in predictions],
|
|
'class': [p.process_class for p in predictions],
|
|
'probability_of_malware': [p.probability_of_malware for p in predictions]
|
|
}
|
|
df = pd.DataFrame(data)
|
|
|
|
# Plot using seaborn or matplotlib
|
|
plt.figure(figsize=(10, 6))
|
|
|
|
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
|
|
sns.barplot(
|
|
data=df,
|
|
x='class', # Independent variable (x-axis)
|
|
y='probability_of_malware', # Dependent variable (y-axis)
|
|
ci=None, # No confidence intervals
|
|
palette='Set2' # Use a color palette for different classes
|
|
)
|
|
|
|
plt.title('Malware Probability by Class')
|
|
plt.xlabel('Class')
|
|
plt.ylabel('Probability of Malware')
|
|
plt.tight_layout()
|
|
|
|
# Save the plot to a bytes buffer
|
|
buf = io.BytesIO()
|
|
plt.savefig(buf, format='png')
|
|
buf.seek(0)
|
|
|
|
# Return the image as a response
|
|
return HttpResponse(buf, content_type='image/png')
|
|
|
|
|
|
|
|
|
|
class KNeighborsModelView(APIView):
|
|
parser_classes = [MultiPartParser] # To handle file uploads
|
|
|
|
|
|
def post(self, request, *args, **kwargs):
|
|
if 'csv_file' not in request.FILES:
|
|
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
csv_file = request.FILES['csv_file']
|
|
|
|
try:
|
|
# Define the temp directory path
|
|
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
|
|
|
|
# Create the 'temp' directory if it doesn't exist
|
|
if not os.path.exists(temp_dir):
|
|
os.makedirs(temp_dir)
|
|
|
|
# Save the file temporarily
|
|
temp_file_path = os.path.join(temp_dir, csv_file.name)
|
|
with default_storage.open(temp_file_path, 'wb+') as destination:
|
|
for chunk in csv_file.chunks():
|
|
destination.write(chunk)
|
|
|
|
# Read the CSV file with headers
|
|
df = pd.read_csv(temp_file_path)
|
|
|
|
# Extract column names from the CSV
|
|
actual_columns = df.columns.tolist()
|
|
|
|
except Exception as e:
|
|
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
# Define the expected column names
|
|
expected_columns = ['process_name', 'class', 'probability_of_malware']
|
|
|
|
# Mapping logic
|
|
if actual_columns != expected_columns:
|
|
# Map actual column names to expected ones
|
|
column_mapping = dict(zip(actual_columns, expected_columns))
|
|
df.rename(columns=column_mapping, inplace=True)
|
|
|
|
# Continue with processing the dataframe...
|
|
records_saved = 0
|
|
for _, row in df.iterrows():
|
|
process_name = row['process_name']
|
|
process_class = row['class']
|
|
probability = row['probability_of_malware']
|
|
|
|
# Save the row to the database
|
|
MalwarePrediction.objects.create(
|
|
process_name=process_name,
|
|
process_class=process_class,
|
|
probability_of_malware=probability,
|
|
model_type=1
|
|
)
|
|
records_saved += 1
|
|
|
|
return Response({"message": " knn file saved successfully!"}, status=status.HTTP_201_CREATED)
|
|
def get(self, request, *args, **kwargs):
|
|
# Query all MalwarePrediction records from the database
|
|
predictions = MalwarePrediction.objects.filter(model_type=1)
|
|
|
|
if not predictions.exists():
|
|
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
|
|
|
|
# Create a DataFrame from the queryset
|
|
data = {
|
|
'process_name': [p.process_name for p in predictions],
|
|
'class': [p.process_class for p in predictions],
|
|
'probability_of_malware': [p.probability_of_malware for p in predictions]
|
|
}
|
|
df = pd.DataFrame(data)
|
|
|
|
# Plot using seaborn or matplotlib
|
|
plt.figure(figsize=(10, 6))
|
|
|
|
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
|
|
sns.barplot(
|
|
data=df,
|
|
x='class', # Independent variable (x-axis)
|
|
y='probability_of_malware', # Dependent variable (y-axis)
|
|
ci=None, # No confidence intervals
|
|
palette='Set2' # Use a color palette for different classes
|
|
)
|
|
|
|
plt.title('Malware Probability by Class')
|
|
plt.xlabel('Class')
|
|
plt.ylabel('Probability of Malware')
|
|
plt.tight_layout()
|
|
|
|
# Save the plot to a bytes buffer
|
|
buf = io.BytesIO()
|
|
plt.savefig(buf, format='png')
|
|
buf.seek(0)
|
|
|
|
# Return the image as a response
|
|
return HttpResponse(buf, content_type='image/png')
|
|
|
|
|
|
|
|
|
|
|
|
class RandomForestModelView(APIView):
|
|
parser_classes = [MultiPartParser] # To handle file uploads
|
|
|
|
|
|
def post(self, request, *args, **kwargs):
|
|
if 'csv_file' not in request.FILES:
|
|
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
csv_file = request.FILES['csv_file']
|
|
|
|
try:
|
|
# Define the temp directory path
|
|
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
|
|
|
|
# Create the 'temp' directory if it doesn't exist
|
|
if not os.path.exists(temp_dir):
|
|
os.makedirs(temp_dir)
|
|
|
|
# Save the file temporarily
|
|
temp_file_path = os.path.join(temp_dir, csv_file.name)
|
|
with default_storage.open(temp_file_path, 'wb+') as destination:
|
|
for chunk in csv_file.chunks():
|
|
destination.write(chunk)
|
|
|
|
# Read the CSV file with headers
|
|
df = pd.read_csv(temp_file_path)
|
|
|
|
# Extract column names from the CSV
|
|
actual_columns = df.columns.tolist()
|
|
|
|
except Exception as e:
|
|
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
# Define the expected column names
|
|
expected_columns = ['process_name', 'class', 'probability_of_malware']
|
|
|
|
# Mapping logic
|
|
if actual_columns != expected_columns:
|
|
# Map actual column names to expected ones
|
|
column_mapping = dict(zip(actual_columns, expected_columns))
|
|
df.rename(columns=column_mapping, inplace=True)
|
|
|
|
# Continue with processing the dataframe...
|
|
records_saved = 0
|
|
for _, row in df.iterrows():
|
|
process_name = row['process_name']
|
|
process_class = row['class']
|
|
probability = row['probability_of_malware']
|
|
|
|
# Save the row to the database
|
|
MalwarePrediction.objects.create(
|
|
process_name=process_name,
|
|
process_class=process_class,
|
|
probability_of_malware=probability,
|
|
model_type=2
|
|
)
|
|
records_saved += 1
|
|
|
|
return Response({"message": " RandomForest file saved successfully!"}, status=status.HTTP_201_CREATED)
|
|
def get(self, request, *args, **kwargs):
|
|
# Query all MalwarePrediction records from the database
|
|
predictions = MalwarePrediction.objects.filter(model_type=2)
|
|
|
|
if not predictions.exists():
|
|
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
|
|
|
|
# Create a DataFrame from the queryset
|
|
data = {
|
|
'process_name': [p.process_name for p in predictions],
|
|
'class': [p.process_class for p in predictions],
|
|
'probability_of_malware': [p.probability_of_malware for p in predictions]
|
|
}
|
|
df = pd.DataFrame(data)
|
|
|
|
# Plot using seaborn or matplotlib
|
|
plt.figure(figsize=(10, 6))
|
|
|
|
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
|
|
sns.barplot(
|
|
data=df,
|
|
x='class', # Independent variable (x-axis)
|
|
y='probability_of_malware', # Dependent variable (y-axis)
|
|
ci=None, # No confidence intervals
|
|
palette='Set2' # Use a color palette for different classes
|
|
)
|
|
|
|
plt.title('Malware Probability by Class')
|
|
plt.xlabel('Class')
|
|
plt.ylabel('Probability of Malware')
|
|
plt.tight_layout()
|
|
|
|
# Save the plot to a bytes buffer
|
|
buf = io.BytesIO()
|
|
plt.savefig(buf, format='png')
|
|
buf.seek(0)
|
|
|
|
# Return the image as a response
|
|
return HttpResponse(buf, content_type='image/png')
|
|
|
|
|
|
|
|
class XGBModelView(APIView):
|
|
|
|
parser_classes = [MultiPartParser] # To handle file uploads
|
|
|
|
|
|
def post(self, request, *args, **kwargs):
|
|
if 'csv_file' not in request.FILES:
|
|
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
csv_file = request.FILES['csv_file']
|
|
|
|
try:
|
|
# Define the temp directory path
|
|
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
|
|
|
|
# Create the 'temp' directory if it doesn't exist
|
|
if not os.path.exists(temp_dir):
|
|
os.makedirs(temp_dir)
|
|
|
|
# Save the file temporarily
|
|
temp_file_path = os.path.join(temp_dir, csv_file.name)
|
|
with default_storage.open(temp_file_path, 'wb+') as destination:
|
|
for chunk in csv_file.chunks():
|
|
destination.write(chunk)
|
|
|
|
# Read the CSV file with headers
|
|
df = pd.read_csv(temp_file_path)
|
|
|
|
# Extract column names from the CSV
|
|
actual_columns = df.columns.tolist()
|
|
|
|
except Exception as e:
|
|
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
# Define the expected column names
|
|
expected_columns = ['process_name', 'class', 'probability_of_malware']
|
|
|
|
# Mapping logic
|
|
if actual_columns != expected_columns:
|
|
# Map actual column names to expected ones
|
|
column_mapping = dict(zip(actual_columns, expected_columns))
|
|
df.rename(columns=column_mapping, inplace=True)
|
|
|
|
# Continue with processing the dataframe...
|
|
records_saved = 0
|
|
for _, row in df.iterrows():
|
|
process_name = row['process_name']
|
|
process_class = row['class']
|
|
probability = row['probability_of_malware']
|
|
|
|
# Save the row to the database
|
|
MalwarePrediction.objects.create(
|
|
process_name=process_name,
|
|
process_class=process_class,
|
|
probability_of_malware=probability,
|
|
model_type=3
|
|
)
|
|
records_saved += 1
|
|
|
|
return Response({"message": " XGB file saved successfully!"}, status=status.HTTP_201_CREATED)
|
|
def get(self, request, *args, **kwargs):
|
|
# Query all MalwarePrediction records from the database
|
|
predictions = MalwarePrediction.objects.filter(model_type=3)
|
|
|
|
if not predictions.exists():
|
|
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
|
|
|
|
# Create a DataFrame from the queryset
|
|
data = {
|
|
'process_name': [p.process_name for p in predictions],
|
|
'class': [p.process_class for p in predictions],
|
|
'probability_of_malware': [p.probability_of_malware for p in predictions]
|
|
}
|
|
df = pd.DataFrame(data)
|
|
|
|
# Plot using seaborn or matplotlib
|
|
plt.figure(figsize=(10, 6))
|
|
|
|
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
|
|
sns.barplot(
|
|
data=df,
|
|
x='class', # Independent variable (x-axis)
|
|
y='probability_of_malware', # Dependent variable (y-axis)
|
|
ci=None, # No confidence intervals
|
|
palette='Set2' # Use a color palette for different classes
|
|
)
|
|
|
|
plt.title('Malware Probability by Class')
|
|
plt.xlabel('Class')
|
|
plt.ylabel('Probability of Malware')
|
|
plt.tight_layout()
|
|
|
|
# Save the plot to a bytes buffer
|
|
buf = io.BytesIO()
|
|
plt.savefig(buf, format='png')
|
|
buf.seek(0)
|
|
|
|
# Return the image as a response
|
|
return HttpResponse(buf, content_type='image/png')
|
|
|
|
|
|
class SGDModelView(APIView):
|
|
|
|
|
|
parser_classes = [MultiPartParser] # To handle file uploads
|
|
|
|
|
|
def post(self, request, *args, **kwargs):
|
|
if 'csv_file' not in request.FILES:
|
|
return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
csv_file = request.FILES['csv_file']
|
|
|
|
try:
|
|
# Define the temp directory path
|
|
temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')
|
|
|
|
# Create the 'temp' directory if it doesn't exist
|
|
if not os.path.exists(temp_dir):
|
|
os.makedirs(temp_dir)
|
|
|
|
# Save the file temporarily
|
|
temp_file_path = os.path.join(temp_dir, csv_file.name)
|
|
with default_storage.open(temp_file_path, 'wb+') as destination:
|
|
for chunk in csv_file.chunks():
|
|
destination.write(chunk)
|
|
|
|
# Read the CSV file with headers
|
|
df = pd.read_csv(temp_file_path)
|
|
|
|
# Extract column names from the CSV
|
|
actual_columns = df.columns.tolist()
|
|
|
|
except Exception as e:
|
|
return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)
|
|
|
|
# Define the expected column names
|
|
expected_columns = ['process_name', 'class', 'probability_of_malware']
|
|
|
|
# Mapping logic
|
|
if actual_columns != expected_columns:
|
|
# Map actual column names to expected ones
|
|
column_mapping = dict(zip(actual_columns, expected_columns))
|
|
df.rename(columns=column_mapping, inplace=True)
|
|
|
|
# Continue with processing the dataframe...
|
|
records_saved = 0
|
|
for _, row in df.iterrows():
|
|
process_name = row['process_name']
|
|
process_class = row['class']
|
|
probability = row['probability_of_malware']
|
|
|
|
# Save the row to the database
|
|
MalwarePrediction.objects.create(
|
|
process_name=process_name,
|
|
process_class=process_class,
|
|
probability_of_malware=probability,
|
|
model_type=4
|
|
)
|
|
records_saved += 1
|
|
|
|
return Response({"message": " SGD file saved successfully!"}, status=status.HTTP_201_CREATED)
|
|
def get(self, request, *args, **kwargs):
|
|
# Query all MalwarePrediction records from the database
|
|
predictions = MalwarePrediction.objects.filter(model_type=4)
|
|
|
|
if not predictions.exists():
|
|
return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)
|
|
|
|
# Create a DataFrame from the queryset
|
|
data = {
|
|
'process_name': [p.process_name for p in predictions],
|
|
'class': [p.process_class for p in predictions],
|
|
'probability_of_malware': [p.probability_of_malware for p in predictions]
|
|
}
|
|
df = pd.DataFrame(data)
|
|
|
|
# Plot using seaborn or matplotlib
|
|
plt.figure(figsize=(10, 6))
|
|
|
|
# Create a barplot where the class is on the x-axis and the probability is on the y-axis
|
|
sns.barplot(
|
|
data=df,
|
|
x='class', # Independent variable (x-axis)
|
|
y='probability_of_malware', # Dependent variable (y-axis)
|
|
ci=None, # No confidence intervals
|
|
palette='Set2' # Use a color palette for different classes
|
|
)
|
|
|
|
plt.title('Malware Probability by Class')
|
|
plt.xlabel('Class')
|
|
plt.ylabel('Probability of Malware')
|
|
plt.tight_layout()
|
|
|
|
# Save the plot to a bytes buffer
|
|
buf = io.BytesIO()
|
|
plt.savefig(buf, format='png')
|
|
buf.seek(0)
|
|
|
|
# Return the image as a response
|
|
return HttpResponse(buf, content_type='image/png')
|
|
|
|
|