l3_centralizedattackdetectorServiceServicerImpl.py

# Copyright 2021-2023 H2020 TeraFlow (https://www.teraflow-h2020.eu/)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function
from datetime import datetime

import os
import grpc
import numpy as np
import onnxruntime as rt
import logging
from time import sleep

from common.proto.l3_centralizedattackdetector_pb2 import Empty
from common.proto.l3_centralizedattackdetector_pb2_grpc import L3CentralizedattackdetectorServicer

from common.proto.l3_attackmitigator_pb2 import L3AttackmitigatorOutput
from common.proto.l3_attackmitigator_pb2_grpc import L3AttackmitigatorStub

from common.proto.monitoring_pb2 import KpiDescriptor
from common.proto.kpi_sample_types_pb2 import KpiSampleType

from monitoring.client.MonitoringClient import MonitoringClient
from common.proto.monitoring_pb2 import Kpi

from common.tools.timestamp.Converters import timestamp_utcnow_to_float
from common.proto.context_pb2 import Timestamp, ServiceId, EndPointId

from l3_attackmitigator.client.l3_attackmitigatorClient import l3_attackmitigatorClient

from multiprocessing import Process, Queue

from google.protobuf.json_format import MessageToJson, Parse
import copy


LOGGER = logging.getLogger(__name__)
current_dir = os.path.dirname(os.path.abspath(__file__))
MODEL_FILE = os.path.join(current_dir, "ml_model/crypto_5g_rf_spider_features.onnx")


class l3_centralizedattackdetectorServiceServicerImpl(L3CentralizedattackdetectorServicer):

    """
    Initialize variables, prediction model and clients of components used by CAD
    """

    def __init__(self):
        LOGGER.info("Creating Centralized Attack Detector Service")

        self.inference_values = Queue()
        self.inference_results = Queue()
        self.model = rt.InferenceSession(MODEL_FILE)
        self.input_name = self.model.get_inputs()[0].name
        self.label_name = self.model.get_outputs()[0].name
        self.prob_name = self.model.get_outputs()[1].name
        self.monitoring_client = MonitoringClient()
        self.service_ids = []
        self.monitored_kpis = {
            "l3_security_status": {
                "kpi_id": None,
                "description": "L3 - Confidence of the cryptomining detector in the security status in the last time interval of the service {service_id}",
                "kpi_sample_type": KpiSampleType.KPISAMPLETYPE_UNKNOWN,  # TODO: change this to KPI_L3_SECURITY_STATUS and add it to kpi_sample_types.proto
                "service_ids": [],
            },
            "l3_ml_model_confidence": {
                "kpi_id": None,
                "description": "L3 - Security status of the service in a time interval of the service {service_id} (“0” if no attack has been detected on the service and “1” if a cryptomining attack has been detected)",
                "kpi_sample_type": KpiSampleType.KPISAMPLETYPE_UNKNOWN,  # TODO: change this to KPI_L3_ML_CONFIDENCE and add it to kpi_sample_types.proto
                "service_ids": [],
            },
            "l3_unique_attack_conns": {
                "kpi_id": None,
                "description": "L3 - Number of attack connections detected in a time interval of the service {service_id} (attacks of the same connection [origin IP, origin port, destination IP and destination port] are only considered once)",
                "kpi_sample_type": KpiSampleType.KPISAMPLETYPE_UNKNOWN,  # TODO: change this to KPI_UNIQUE_ATTACK_CONNS and add it to kpi_sample_types.proto
                "service_ids": [],
            },
            "l3_unique_compromised_clients": {
                "kpi_id": None,
                "description": "L3 - Number of unique compromised clients of the service in a time interval of the service {service_id} (attacks from the same origin IP are only considered once)",
                "kpi_sample_type": KpiSampleType.KPISAMPLETYPE_UNKNOWN,  # TODO: change this to KPI_UNIQUE_COMPROMISED_CLIENTS and add it to kpi_sample_types.proto
                "service_ids": [],
            },
            "l3_unique_attackers": {
                "kpi_id": None,
                "description": "L3 - number of unique attackers of the service in a time interval of the service {service_id} (attacks from the same destination IP are only considered once)",
                "kpi_sample_type": KpiSampleType.KPISAMPLETYPE_UNKNOWN,  # TODO: change this to KPI_UNIQUE_ATTACKERS and add it to kpi_sample_types.proto
                "service_ids": [],
            },
        }
        self.attackmitigator_client = l3_attackmitigatorClient()

        # Environment variables
        self.CLASSIFICATION_THRESHOLD = os.getenv("CAD_CLASSIFICATION_THRESHOLD", 0.5)
        self.MONITORED_KPIS_TIME_INTERVAL_AGG = os.getenv("MONITORED_KPIS_TIME_INTERVAL_AGG", 5)

        # Constants
        self.NORMAL_CLASS = 0
        self.CRYPTO_CLASS = 1

        # start monitoring process
        self.monitoring_process = Process(target=self.monitor_kpis, args=(self.inference_results,))
        self.monitoring_process.start()

    """
    Create a monitored KPI for a specific service and add it to the Monitoring Client
        -input: 
            + client: Monitoring Client object where the KPI will be tracked
            + service_id: service ID where the KPI will be monitored
            + kpi_description: description of the KPI
            + kpi_sample_type: KPI sample type of the KPI (it must be defined in the kpi_sample_types.proto file)
        -output: KPI identifier representing the KPI
    """

    def create_kpi(
        self, client: MonitoringClient, service_id, device_id, endpoint_id, kpi_name, kpi_description, kpi_sample_type
    ):
        kpidescriptor = KpiDescriptor()
        kpidescriptor.kpi_description = kpi_description
        kpidescriptor.service_id.service_uuid.uuid = service_id.service_uuid.uuid
        kpidescriptor.device_id.device_uuid.uuid = device_id.device_uuid.uuid
        kpidescriptor.endpoint_id.endpoint_uuid.uuid = endpoint_id.endpoint_uuid.uuid
        kpidescriptor.kpi_sample_type = kpi_sample_type
        new_kpi = client.SetKpi(kpidescriptor)

        LOGGER.info("Created KPI {}".format(kpi_name))

        return new_kpi

    """
    Create the monitored KPIs for a specific service, add them to the Monitoring Client and store their identifiers in the monitored_kpis dictionary
        -input:
            + service_id: service ID where the KPIs will be monitored
        -output: None
    """

    def create_kpis(self, service_id, device_id, endpoint_id):
        # for now, all the KPIs are created for all the services from which requests are received
        for kpi in self.monitored_kpis:
            created_kpi = self.create_kpi(
                self.monitoring_client,
                service_id,
                device_id,
                endpoint_id,
                kpi,
                self.monitored_kpis[kpi]["description"],
                self.monitored_kpis[kpi]["kpi_sample_type"],
            )
            self.monitored_kpis[kpi]["kpi_id"] = created_kpi.kpi_id
            self.monitored_kpis[kpi]["service_ids"].append(service_id.service_uuid.uuid)

    def monitor_kpis(self, inference_results):
        while True:
            try:
                # get all information from the inference_results queue
                monitor_inference_results = []

                for i in range(inference_results.qsize()):
                    monitor_inference_results.append(self.inference_results.get())

                # deserialize the inference results
                for i in range(len(monitor_inference_results)):
                    monitor_inference_results[i]["output"]["service_id"] = Parse(
                        monitor_inference_results[i]["output"]["service_id"], ServiceId()
                    )
                    monitor_inference_results[i]["output"]["endpoint_id"] = Parse(
                        monitor_inference_results[i]["output"]["endpoint_id"], EndPointId()
                    )

                for service_id in self.service_ids:
                    time_interval = self.MONITORED_KPIS_TIME_INTERVAL_AGG
                    time_interval_start = datetime.utcnow()
                    time_interval_end = time_interval_start + time_interval

                    # L3 security status
                    kpi_security_status = Kpi()
                    kpi_security_status.kpi_id.kpi_id.CopyFrom(
                        self.monitored_kpis["l3_security_status"]["kpi_id"].kpi_id
                    )

                    # get the output.tag of the ML model of the last aggregation time interval as indicated by the self.MONITORED_KPIS_TIME_INTERVAL_AGG variable
                    outputs_last_time_interval = []

                    for i in range(self.monitor_inference_results):
                        if (
                            self.monitor_inference_results[i]["timestamp"] >= time_interval_start
                            and self.monitor_inference_results[i]["timestamp"] <= time_interval_end
                            and self.monitor_inference_results[i]["service_id"] == service_id
                            and service_id in self.monitored_kpis["l3_security_status"]["service_ids"]
                        ):
                            outputs_last_time_interval.append(self.monitor_inference_results[i]["output"]["tag"])

                    kpi_security_status.kpi_value.intVal = (
                        0 if np.all(outputs_last_time_interval == self.NORMAL_CLASS) else 1
                    )

                    # L3 ML model confidence
                    kpi_conf = Kpi()
                    kpi_conf.kpi_id.kpi_id.CopyFrom(self.monitored_kpis["l3_ml_model_confidence"]["kpi_id"].kpi_id)

                    # get the output.confidence of the ML model of the last aggregation time interval as indicated by the self.MONITORED_KPIS_TIME_INTERVAL_AGG variable
                    confidences_normal_last_time_interval = []
                    confidences_crypto_last_time_interval = []

                    for i in range(self.monitor_inference_results):
                        if (
                            self.monitor_inference_results[i]["timestamp"] >= time_interval_start
                            and self.monitor_inference_results[i]["timestamp"] <= time_interval_end
                            and self.monitor_inference_results[i]["service_id"] == service_id
                            and service_id in self.monitored_kpis["l3_security_status"]["service_ids"]
                        ):
                            if self.monitor_inference_results[i]["output"]["tag"] == self.NORMAL_CLASS:
                                confidences_normal_last_time_interval.append(
                                    self.monitor_inference_results[i]["output"]["confidence"]
                                )
                            elif self.monitor_inference_results[i]["output"]["tag"] == self.CRYPTO_CLASS:
                                confidences_crypto_last_time_interval.append(
                                    self.monitor_inference_results[i]["output"]["confidence"]
                                )

                    kpi_conf.kpi_value.intVal = (
                        np.mean(confidences_crypto_last_time_interval)
                        if np.all(outputs_last_time_interval == self.CRYPTO_CLASS)
                        else np.mean(confidences_normal_last_time_interval)
                    )

                    # L3 unique attack connections
                    kpi_unique_attack_conns = Kpi()
                    kpi_unique_attack_conns.kpi_id.kpi_id.CopyFrom(
                        self.monitored_kpis["l3_unique_attack_conns"]["kpi_id"].kpi_id
                    )

                    # get the number of unique attack connections (grouping by origin IP, origin port, destination IP, destination port) of the last aggregation time interval as indicated by the self.MONITORED_KPIS_TIME_INTERVAL_AGG variable
                    num_unique_attack_conns_last_time_interval = 0
                    unique_attack_conns_last_time_interval = []

                    for i in range(self.monitor_inference_results):
                        if (
                            self.monitor_inference_results[i]["timestamp"] >= time_interval_start
                            and self.monitor_inference_results[i]["timestamp"] <= time_interval_end
                            and self.monitor_inference_results[i]["service_id"] == service_id
                            and service_id in self.monitored_kpis["l3_security_status"]["service_ids"]
                        ):
                            if self.monitor_inference_results[i]["output"]["tag"] == self.CRYPTO_CLASS:
                                current_attack_conn = {
                                    "ip_o": self.monitor_inference_results[i]["input"]["src_ip"],
                                    "port_o": self.monitor_inference_results[i]["input"]["src_port"],
                                    "ip_d": self.monitor_inference_results[i]["input"]["dst_ip"],
                                    "port_d": self.monitor_inference_results[i]["input"]["dst_port"],
                                }

                                for j in range(unique_attack_conns_last_time_interval):
                                    if current_attack_conn == unique_attack_conns_last_time_interval[j]:
                                        break

                                    num_unique_attack_conns_last_time_interval += 1
                                    unique_attack_conns_last_time_interval.append(current_attack_conn)

                    kpi_unique_attack_conns.kpi_value.intVal = num_unique_attack_conns_last_time_interval

                    # L3 unique compromised clients
                    kpi_unique_compromised_clients = Kpi()
                    kpi_unique_compromised_clients.kpi_id.kpi_id.CopyFrom(
                        self.monitored_kpis["l3_unique_attack_conns"]["kpi_id"].kpi_id
                    )

                    # get the number of unique compromised clients (grouping by origin IP) of the last aggregation time interval as indicated by the self.MONITORED_KPIS_TIME_INTERVAL_AGG variable
                    num_unique_compromised_clients_last_time_interval = 0
                    unique_compromised_clients_last_time_interval = []

                    for i in range(self.monitor_inference_results):
                        if (
                            self.monitor_inference_results[i]["timestamp"] >= time_interval_start
                            and self.monitor_inference_results[i]["timestamp"] <= time_interval_end
                            and self.monitor_inference_results[i]["service_id"] == service_id
                            and service_id in self.monitored_kpis["l3_security_status"]["service_ids"]
                        ):
                            if self.monitor_inference_results[i]["output"]["tag"] == self.CRYPTO_CLASS:
                                if (
                                    self.monitor_inference_results[i]["output"]["ip_o"]
                                    not in unique_compromised_clients_last_time_interval
                                ):
                                    unique_compromised_clients_last_time_interval.append(
                                        self.monitor_inference_results[i]["output"]["ip_o"]
                                    )
                                    num_unique_compromised_clients_last_time_interval += 1

                    kpi_unique_compromised_clients.kpi_value.intVal = num_unique_compromised_clients_last_time_interval

                    # L3 unique attackers
                    kpi_unique_attackers = Kpi()
                    kpi_unique_attackers.kpi_id.kpi_id.CopyFrom(
                        self.monitored_kpis["l3_unique_attack_conns"]["kpi_id"].kpi_id
                    )

                    # get the number of unique attackers (grouping by destination ip) of the last aggregation time interval as indicated by the self.MONITORED_KPIS_TIME_INTERVAL_AGG variable
                    num_unique_attackers_last_time_interval = 0
                    unique_attackers_last_time_interval = []

                    for i in range(self.monitor_inference_results):
                        if (
                            self.monitor_inference_results[i]["timestamp"] >= time_interval_start
                            and self.monitor_inference_results[i]["timestamp"] <= time_interval_end
                            and self.monitor_inference_results[i]["service_id"] == service_id
                            and service_id in self.monitored_kpis["l3_security_status"]["service_ids"]
                        ):
                            if self.monitor_inference_results[i]["output"]["tag"] == self.CRYPTO_CLASS:
                                if (
                                    self.monitor_inference_results[i]["output"]["ip_d"]
                                    not in unique_attackers_last_time_interval
                                ):
                                    unique_attackers_last_time_interval.append(
                                        self.monitor_inference_results[i]["output"]["ip_d"]
                                    )
                                    num_unique_attackers_last_time_interval += 1

                    kpi_unique_attackers.kpi_value.intVal = num_unique_attackers_last_time_interval

                    timestamp = Timestamp()
                    timestamp.timestamp = timestamp_utcnow_to_float()

                    kpi_security_status.timestamp.CopyFrom(timestamp)
                    kpi_conf.timestamp.CopyFrom(timestamp)
                    kpi_unique_attack_conns.timestamp.CopyFrom(timestamp)
                    kpi_unique_compromised_clients.timestamp.CopyFrom(timestamp)
                    kpi_unique_attackers.timestamp.CopyFrom(timestamp)

                    self.monitoring_client.IncludeKpi(kpi_security_status)
                    self.monitoring_client.IncludeKpi(kpi_conf)
                    self.monitoring_client.IncludeKpi(kpi_unique_attack_conns)
                    self.monitoring_client.IncludeKpi(kpi_unique_compromised_clients)
                    self.monitoring_client.IncludeKpi(kpi_unique_attackers)

                sleep(self.MONITORED_KPIS_TIME_INTERVAL_AGG)
            except KeyboardInterrupt:
                print("Exiting...")
                break

    """
    Classify connection as standard traffic or cryptomining attack and return results
        -input: 
            + request: L3CentralizedattackdetectorMetrics object with connection features information
        -output: L3AttackmitigatorOutput object with information about the assigned class and prediction confidence
    """

    def make_inference(self, request):
        x_data = np.array(
            [
                [
                    request.c_pkts_all,
                    request.c_ack_cnt,
                    request.c_bytes_uniq,
                    request.c_pkts_data,
                    request.c_bytes_all,
                    request.s_pkts_all,
                    request.s_ack_cnt,
                    request.s_bytes_uniq,
                    request.s_pkts_data,
                    request.s_bytes_all,
                ]
            ]
        )

        predictions = self.model.run([self.prob_name], {self.input_name: x_data.astype(np.float32)})[0]

        # Gather the predicted class, the probability of that class and other relevant information required to block the attack
        output_message = {
            "confidence": None,
            "timestamp": datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
            "ip_o": request.ip_o,
            "ip_d": request.ip_d,
            "tag_name": None,
            "tag": None,
            "flow_id": request.flow_id,
            "protocol": request.protocol,
            "port_o": request.port_o,
            "port_d": request.port_d,
            "ml_id": "RandomForest",
            "service_id": request.service_id,
            "endpoint_id": request.endpoint_id,
            "time_start": request.time_start,
            "time_end": request.time_end,
        }

        if predictions[0][1] >= self.CLASSIFICATION_THRESHOLD:
            output_message["confidence"] = predictions[0][1]
            output_message["tag_name"] = "Crypto"
            output_message["tag"] = self.CRYPTO_CLASS
        else:
            output_message["confidence"] = predictions[0][0]
            output_message["tag_name"] = "Normal"
            output_message["tag"] = self.NORMAL_CLASS

        return output_message

    """
    Receive features from Attack Mitigator, predict attack and communicate with Attack Mitigator
        -input: 
            + request: L3CentralizedattackdetectorMetrics object with connection features information
        -output: Empty object with a message about the execution of the function
    """

    def SendInput(self, request, context):
        # Store the data sent in the request
        # Protobuff messages are NOT pickable, so we need to serialize them first
        # self.inference_values.put({"request": request, "timestamp": datetime.now()})

        # Perform inference with the data sent in the request
        logging.info("Performing inference...")
        cryptomining_detector_output = self.make_inference(request)
        logging.info("Inference performed correctly")

        # Store the results of the inference that will be later used to monitor the KPIs
        # Protobuff messages are NOT pickable, so we need to serialize them first
        cryptomining_detector_output_serialized = copy.deepcopy(cryptomining_detector_output)
        cryptomining_detector_output_serialized["service_id"] = MessageToJson(
            request.service_id, preserving_proto_field_name=True
        )
        cryptomining_detector_output_serialized["endpoint_id"] = MessageToJson(
            request.endpoint_id, preserving_proto_field_name=True
        )

        self.inference_results.put({"output": cryptomining_detector_output_serialized, "timestamp": datetime.now()})

        service_id = request.service_id
        device_id = request.endpoint_id.device_id
        endpoint_id = request.endpoint_id

        # Check if a request of a new service has been received and, if so, create the monitored KPIs for that service
        if service_id not in self.service_ids:
            self.create_kpis(service_id, device_id, endpoint_id)
            self.service_ids.append(service_id)

        # Only notify Attack Mitigator when a cryptomining connection has been detected
        if cryptomining_detector_output["tag_name"] == "Crypto":
            logging.info("Crypto attack detected")

            # Notify the Attack Mitigator component about the attack
            logging.info(
                "Notifying the Attack Mitigator component about the attack in order to block the connection..."
            )

            try:
                logging.info("Sending the connection information to the Attack Mitigator component...")
                message = L3AttackmitigatorOutput(**cryptomining_detector_output)
                response = self.attackmitigator_client.SendOutput(message)
                logging.info(
                    "Attack Mitigator notified and received response: ", response.message
                )  # FIX No message received

                return Empty(message="OK, information received and mitigator notified abou the attack")
            except Exception as e:
                logging.error("Error notifying the Attack Mitigator component about the attack: ", e)
                logging.error("Couldn't find l3_attackmitigator")

                return Empty(message="Attack Mitigator not found")
        else:
            logging.info("No attack detected")

            return Empty(message="Ok, information received (no attack detected)")


"""
    def GetOutput(self, request, context):
        logging.info("Returning inference output...")
        k = np.multiply(self.inference_values, [2])
        k = np.sum(k)

        return self.make_inference(k)
"""