web_defender/malware/views.py



import io
import os
import pickle
import numpy as np

import pandas as pd
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status
import seaborn as sns
import matplotlib.pyplot as plt
from django.core.files.storage import default_storage
from rest_framework.parsers import MultiPartParser
from django.conf import settings
from django.http import HttpResponse
from .models import MalwarePrediction
from .serializers import MalwarePredictionSerializer

class MalwarePredictionAPIView(APIView):
    parser_classes = [MultiPartParser]  # To handle file uploads


    def post(self, request, *args, **kwargs):
        if 'csv_file' not in request.FILES:
            return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)

        csv_file = request.FILES['csv_file']

        try:
            # Define the temp directory path
            temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')

            # Create the 'temp' directory if it doesn't exist
            if not os.path.exists(temp_dir):
                os.makedirs(temp_dir)

            # Save the file temporarily
            temp_file_path = os.path.join(temp_dir, csv_file.name)
            with default_storage.open(temp_file_path, 'wb+') as destination:
                for chunk in csv_file.chunks():
                    destination.write(chunk)

            # Read the CSV file with headers
            df = pd.read_csv(temp_file_path)

            # Extract column names from the CSV
            actual_columns = df.columns.tolist()

        except Exception as e:
            return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)

        # Define the expected column names
        expected_columns = ['process_name', 'class', 'probability_of_malware']

        # Mapping logic
        if actual_columns != expected_columns:
            # Map actual column names to expected ones
            column_mapping = dict(zip(actual_columns, expected_columns))
            df.rename(columns=column_mapping, inplace=True)

        # Continue with processing the dataframe...
        records_saved = 0
        for _, row in df.iterrows():
            process_name = row['process_name']
            process_class = row['class']
            probability = row['probability_of_malware']

            # Save the row to the database
            MalwarePrediction.objects.create(
                process_name=process_name,
                process_class=process_class,
                probability_of_malware=probability,
            )
            records_saved += 1

        return Response({"message": f"{records_saved} records saved successfully!"}, status=status.HTTP_201_CREATED)


    def get(self, request, *args, **kwargs):
        # Query all MalwarePrediction records from the database
        predictions = MalwarePrediction.objects.all()

        if not predictions.exists():
            return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)

        # Create a DataFrame from the queryset
        data = {
            'process_name': [p.process_name for p in predictions],
            'class': [p.process_class for p in predictions],
            'probability_of_malware': [p.probability_of_malware for p in predictions]
        }
        df = pd.DataFrame(data)

        # Plot using seaborn or matplotlib
        plt.figure(figsize=(10, 6))

        # Create a barplot where the class is on the x-axis and the probability is on the y-axis
        sns.barplot(
            data=df,
            x='class',  # Independent variable (x-axis)
            y='probability_of_malware',  # Dependent variable (y-axis)
            ci=None,  # No confidence intervals
            palette='Set2'  # Use a color palette for different classes
        )

        plt.title('Malware Probability by Class')
        plt.xlabel('Class')
        plt.ylabel('Probability of Malware')
        plt.tight_layout()

        # Save the plot to a bytes buffer
        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)

        # Return the image as a response
        return HttpResponse(buf, content_type='image/png')


class KNeighborsModelView(APIView):
    parser_classes = [MultiPartParser]  # To handle file uploads


    def post(self, request, *args, **kwargs):
        if 'csv_file' not in request.FILES:
            return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)

        csv_file = request.FILES['csv_file']

        try:
            # Define the temp directory path
            temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')

            # Create the 'temp' directory if it doesn't exist
            if not os.path.exists(temp_dir):
                os.makedirs(temp_dir)

            # Save the file temporarily
            temp_file_path = os.path.join(temp_dir, csv_file.name)
            with default_storage.open(temp_file_path, 'wb+') as destination:
                for chunk in csv_file.chunks():
                    destination.write(chunk)

            # Read the CSV file with headers
            df = pd.read_csv(temp_file_path)

            # Extract column names from the CSV
            actual_columns = df.columns.tolist()

        except Exception as e:
            return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)

        # Define the expected column names
        expected_columns = ['process_name', 'class', 'probability_of_malware']

        # Mapping logic
        if actual_columns != expected_columns:
            # Map actual column names to expected ones
            column_mapping = dict(zip(actual_columns, expected_columns))
            df.rename(columns=column_mapping, inplace=True)

        # Continue with processing the dataframe...
        records_saved = 0
        for _, row in df.iterrows():
            process_name = row['process_name']
            process_class = row['class']
            probability = row['probability_of_malware']

            # Save the row to the database
            MalwarePrediction.objects.create(
                process_name=process_name,
                process_class=process_class,
                probability_of_malware=probability,
                model_type=1
            )
            records_saved += 1

        return Response({"message": " knn file saved successfully!"}, status=status.HTTP_201_CREATED)
    def get(self, request, *args, **kwargs):
        # Query all MalwarePrediction records from the database
        predictions = MalwarePrediction.objects.filter(model_type=1)

        if not predictions.exists():
            return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)

        # Create a DataFrame from the queryset
        data = {
            'process_name': [p.process_name for p in predictions],
            'class': [p.process_class for p in predictions],
            'probability_of_malware': [p.probability_of_malware for p in predictions]
        }
        df = pd.DataFrame(data)

        # Plot using seaborn or matplotlib
        plt.figure(figsize=(10, 6))

        # Create a barplot where the class is on the x-axis and the probability is on the y-axis
        sns.barplot(
            data=df,
            x='class',  # Independent variable (x-axis)
            y='probability_of_malware',  # Dependent variable (y-axis)
            ci=None,  # No confidence intervals
            palette='Set2'  # Use a color palette for different classes
        )

        plt.title('Malware Probability by Class')
        plt.xlabel('Class')
        plt.ylabel('Probability of Malware')
        plt.tight_layout()

        # Save the plot to a bytes buffer
        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)

        # Return the image as a response
        return HttpResponse(buf, content_type='image/png')


class RandomForestModelView(APIView):
    parser_classes = [MultiPartParser]  # To handle file uploads


    def post(self, request, *args, **kwargs):
        if 'csv_file' not in request.FILES:
            return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)

        csv_file = request.FILES['csv_file']

        try:
            # Define the temp directory path
            temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')

            # Create the 'temp' directory if it doesn't exist
            if not os.path.exists(temp_dir):
                os.makedirs(temp_dir)

            # Save the file temporarily
            temp_file_path = os.path.join(temp_dir, csv_file.name)
            with default_storage.open(temp_file_path, 'wb+') as destination:
                for chunk in csv_file.chunks():
                    destination.write(chunk)

            # Read the CSV file with headers
            df = pd.read_csv(temp_file_path)

            # Extract column names from the CSV
            actual_columns = df.columns.tolist()

        except Exception as e:
            return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)

        # Define the expected column names
        expected_columns = ['process_name', 'class', 'probability_of_malware']

        # Mapping logic
        if actual_columns != expected_columns:
            # Map actual column names to expected ones
            column_mapping = dict(zip(actual_columns, expected_columns))
            df.rename(columns=column_mapping, inplace=True)

        # Continue with processing the dataframe...
        records_saved = 0
        for _, row in df.iterrows():
            process_name = row['process_name']
            process_class = row['class']
            probability = row['probability_of_malware']

            # Save the row to the database
            MalwarePrediction.objects.create(
                process_name=process_name,
                process_class=process_class,
                probability_of_malware=probability,
                model_type=2
            )
            records_saved += 1

        return Response({"message": " RandomForest file saved successfully!"}, status=status.HTTP_201_CREATED)
    def get(self, request, *args, **kwargs):
        # Query all MalwarePrediction records from the database
        predictions = MalwarePrediction.objects.filter(model_type=2)

        if not predictions.exists():
            return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)

        # Create a DataFrame from the queryset
        data = {
            'process_name': [p.process_name for p in predictions],
            'class': [p.process_class for p in predictions],
            'probability_of_malware': [p.probability_of_malware for p in predictions]
        }
        df = pd.DataFrame(data)

        # Plot using seaborn or matplotlib
        plt.figure(figsize=(10, 6))

        # Create a barplot where the class is on the x-axis and the probability is on the y-axis
        sns.barplot(
            data=df,
            x='class',  # Independent variable (x-axis)
            y='probability_of_malware',  # Dependent variable (y-axis)
            ci=None,  # No confidence intervals
            palette='Set2'  # Use a color palette for different classes
        )

        plt.title('Malware Probability by Class')
        plt.xlabel('Class')
        plt.ylabel('Probability of Malware')
        plt.tight_layout()

        # Save the plot to a bytes buffer
        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)

        # Return the image as a response
        return HttpResponse(buf, content_type='image/png')


class XGBModelView(APIView):

    parser_classes = [MultiPartParser]  # To handle file uploads


    def post(self, request, *args, **kwargs):
        if 'csv_file' not in request.FILES:
            return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)

        csv_file = request.FILES['csv_file']

        try:
            # Define the temp directory path
            temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')

            # Create the 'temp' directory if it doesn't exist
            if not os.path.exists(temp_dir):
                os.makedirs(temp_dir)

            # Save the file temporarily
            temp_file_path = os.path.join(temp_dir, csv_file.name)
            with default_storage.open(temp_file_path, 'wb+') as destination:
                for chunk in csv_file.chunks():
                    destination.write(chunk)

            # Read the CSV file with headers
            df = pd.read_csv(temp_file_path)

            # Extract column names from the CSV
            actual_columns = df.columns.tolist()

        except Exception as e:
            return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)

        # Define the expected column names
        expected_columns = ['process_name', 'class', 'probability_of_malware']

        # Mapping logic
        if actual_columns != expected_columns:
            # Map actual column names to expected ones
            column_mapping = dict(zip(actual_columns, expected_columns))
            df.rename(columns=column_mapping, inplace=True)

        # Continue with processing the dataframe...
        records_saved = 0
        for _, row in df.iterrows():
            process_name = row['process_name']
            process_class = row['class']
            probability = row['probability_of_malware']

            # Save the row to the database
            MalwarePrediction.objects.create(
                process_name=process_name,
                process_class=process_class,
                probability_of_malware=probability,
                model_type=3
            )
            records_saved += 1

        return Response({"message": " XGB file saved successfully!"}, status=status.HTTP_201_CREATED)
    def get(self, request, *args, **kwargs):
        # Query all MalwarePrediction records from the database
        predictions = MalwarePrediction.objects.filter(model_type=3)

        if not predictions.exists():
            return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)

        # Create a DataFrame from the queryset
        data = {
            'process_name': [p.process_name for p in predictions],
            'class': [p.process_class for p in predictions],
            'probability_of_malware': [p.probability_of_malware for p in predictions]
        }
        df = pd.DataFrame(data)

        # Plot using seaborn or matplotlib
        plt.figure(figsize=(10, 6))

        # Create a barplot where the class is on the x-axis and the probability is on the y-axis
        sns.barplot(
            data=df,
            x='class',  # Independent variable (x-axis)
            y='probability_of_malware',  # Dependent variable (y-axis)
            ci=None,  # No confidence intervals
            palette='Set2'  # Use a color palette for different classes
        )

        plt.title('Malware Probability by Class')
        plt.xlabel('Class')
        plt.ylabel('Probability of Malware')
        plt.tight_layout()

        # Save the plot to a bytes buffer
        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)

        # Return the image as a response
        return HttpResponse(buf, content_type='image/png')


class SGDModelView(APIView):


    parser_classes = [MultiPartParser]  # To handle file uploads


    def post(self, request, *args, **kwargs):
        if 'csv_file' not in request.FILES:
            return Response({"error": "No file provided"}, status=status.HTTP_400_BAD_REQUEST)

        csv_file = request.FILES['csv_file']

        try:
            # Define the temp directory path
            temp_dir = os.path.join(settings.MEDIA_ROOT, 'temp')

            # Create the 'temp' directory if it doesn't exist
            if not os.path.exists(temp_dir):
                os.makedirs(temp_dir)

            # Save the file temporarily
            temp_file_path = os.path.join(temp_dir, csv_file.name)
            with default_storage.open(temp_file_path, 'wb+') as destination:
                for chunk in csv_file.chunks():
                    destination.write(chunk)

            # Read the CSV file with headers
            df = pd.read_csv(temp_file_path)

            # Extract column names from the CSV
            actual_columns = df.columns.tolist()

        except Exception as e:
            return Response({"error": "Could not read the CSV file", "details": str(e)}, status=status.HTTP_400_BAD_REQUEST)

        # Define the expected column names
        expected_columns = ['process_name', 'class', 'probability_of_malware']

        # Mapping logic
        if actual_columns != expected_columns:
            # Map actual column names to expected ones
            column_mapping = dict(zip(actual_columns, expected_columns))
            df.rename(columns=column_mapping, inplace=True)

        # Continue with processing the dataframe...
        records_saved = 0
        for _, row in df.iterrows():
            process_name = row['process_name']
            process_class = row['class']
            probability = row['probability_of_malware']

            # Save the row to the database
            MalwarePrediction.objects.create(
                process_name=process_name,
                process_class=process_class,
                probability_of_malware=probability,
                model_type=4
            )
            records_saved += 1

        return Response({"message": " SGD file saved successfully!"}, status=status.HTTP_201_CREATED)
    def get(self, request, *args, **kwargs):
        # Query all MalwarePrediction records from the database
        predictions = MalwarePrediction.objects.filter(model_type=4)

        if not predictions.exists():
            return Response({"error": "No data available to generate graph."}, status=status.HTTP_404_NOT_FOUND)

        # Create a DataFrame from the queryset
        data = {
            'process_name': [p.process_name for p in predictions],
            'class': [p.process_class for p in predictions],
            'probability_of_malware': [p.probability_of_malware for p in predictions]
        }
        df = pd.DataFrame(data)

        # Plot using seaborn or matplotlib
        plt.figure(figsize=(10, 6))

        # Create a barplot where the class is on the x-axis and the probability is on the y-axis
        sns.barplot(
            data=df,
            x='class',  # Independent variable (x-axis)
            y='probability_of_malware',  # Dependent variable (y-axis)
            ci=None,  # No confidence intervals
            palette='Set2'  # Use a color palette for different classes
        )

        plt.title('Malware Probability by Class')
        plt.xlabel('Class')
        plt.ylabel('Probability of Malware')
        plt.tight_layout()

        # Save the plot to a bytes buffer
        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)

        # Return the image as a response
        return HttpResponse(buf, content_type='image/png')