Commit bd059d3d authored by Lluis Gifre Renom's avatar Lluis Gifre Renom
Browse files

Merge branch 'feat/247-cttc-analytics-module-enhancements' into 'develop'

Resolve "(CTTC) Analytics Module Enhancements"

See merge request !317
parents 4e7a76d8 5d18e008
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -39,4 +39,14 @@ enum KpiSampleType {
    KPISAMPLETYPE_L3_SECURITY_STATUS_CRYPTO     = 605;

    KPISAMPLETYPE_SERVICE_LATENCY_MS            = 701;

// output KPIs
    KPISAMPLETYPE_PACKETS_TRANSMITTED_AGG_OUTPUT           = 1101;
    KPISAMPLETYPE_PACKETS_RECEIVED_AGG_OUTPUT              = 1102;
    KPISAMPLETYPE_PACKETS_DROPPED_AGG_OUTPUT               = 1103;
    KPISAMPLETYPE_BYTES_TRANSMITTED_AGG_OUTPUT             = 1201;
    KPISAMPLETYPE_BYTES_RECEIVED_AGG_OUTPUT                = 1202;
    KPISAMPLETYPE_BYTES_DROPPED_AGG_OUTPUT                 = 1203;

    KPISAMPLETYPE_SERVICE_LATENCY_MS_AGG_OUTPUT            = 1701;
}
+4 −1
Original line number Diff line number Diff line
@@ -18,8 +18,11 @@ PROJECTDIR=`pwd`

cd $PROJECTDIR/src
RCFILE=$PROJECTDIR/coverage/.coveragerc

export KFK_SERVER_ADDRESS='127.0.0.1:9092'

CRDB_SQL_ADDRESS=$(kubectl get service cockroachdb-public --namespace crdb -o jsonpath='{.spec.clusterIP}')
export CRDB_URI="cockroachdb://tfs:tfs123@${CRDB_SQL_ADDRESS}:26257/tfs_analytics?sslmode=require"
python3 -m pytest --log-level=DEBUG --log-cli-level=DEBUG --verbose \

python3 -m pytest --log-level=DEBUG --log-cli-level=INFO --verbose \
    analytics/backend/tests/test_backend.py
+1 −1
Original line number Diff line number Diff line
@@ -21,5 +21,5 @@ RCFILE=$PROJECTDIR/coverage/.coveragerc
export KFK_SERVER_ADDRESS='127.0.0.1:9092'
CRDB_SQL_ADDRESS=$(kubectl get service cockroachdb-public --namespace crdb -o jsonpath='{.spec.clusterIP}')
export CRDB_URI="cockroachdb://tfs:tfs123@${CRDB_SQL_ADDRESS}:26257/tfs_analytics?sslmode=require"
python3 -m pytest --log-level=DEBUG --log-cli-level=DEBUG --verbose \
python3 -m pytest --log-level=DEBUG --log-cli-level=INFO --verbose \
    analytics/frontend/tests/test_frontend.py
+101 −56
Original line number Diff line number Diff line
@@ -16,32 +16,46 @@ import time
import json
import logging
import threading

from common.tools.service.GenericGrpcService import GenericGrpcService
from common.tools.kafka.Variables import KafkaConfig, KafkaTopic
from confluent_kafka import Consumer as KafkaConsumer
from confluent_kafka import Consumer
from confluent_kafka import KafkaError
from common.Constants import ServiceNameEnum
from common.Settings import get_service_port_grpc
from threading import Thread, Event
from .DaskStreaming import DaskStreamer
from analytics.backend.service.Streamer import DaskStreamer
from analytics.backend.service.AnalyzerHelper import AnalyzerHelper


LOGGER = logging.getLogger(__name__)

class AnalyticsBackendService(GenericGrpcService):
    """
    Class listens for ...
    AnalyticsBackendService class is responsible for handling the requests from the AnalyticsFrontendService.
    It listens to the Kafka topic for the requests and starts/stops the DaskStreamer accordingly.
    It also initializes the Kafka producer and Dask cluster for the streamer.
    """
    def __init__(self, cls_name : str = __name__) -> None:
    def __init__(self, cls_name : str = __name__, n_workers=1, threads_per_worker=1
                 ) -> None:
        LOGGER.info('Init AnalyticsBackendService')
        port = get_service_port_grpc(ServiceNameEnum.ANALYTICSBACKEND)
        super().__init__(port, cls_name=cls_name)
        self.running_threads = {}       # To keep track of all running analyzers 
        self.kafka_consumer = KafkaConsumer({'bootstrap.servers' : KafkaConfig.get_kafka_address(),
                                            'group.id'           : 'analytics-frontend',
                                            'auto.offset.reset'  : 'latest'})
        self.active_streamers = {}
        self.central_producer = AnalyzerHelper.initialize_kafka_producer()  # Multi-threaded producer
        self.cluster          = AnalyzerHelper.initialize_dask_cluster(
                                        n_workers, threads_per_worker) # Local cluster
        self.request_consumer = Consumer({
            'bootstrap.servers' : KafkaConfig.get_kafka_address(),
            'group.id'          : 'analytics-backend',
            'auto.offset.reset' : 'latest',
            })


    def install_servicers(self):
        threading.Thread(target=self.RequestListener, args=()).start()
        threading.Thread(
            target=self.RequestListener,
            args=()
        ).start()

    def RequestListener(self):
        """
@@ -49,7 +63,7 @@ class AnalyticsBackendService(GenericGrpcService):
        """
        LOGGER.info("Request Listener is initiated ...")
        # print      ("Request Listener is initiated ...")
        consumer = self.kafka_consumer
        consumer = self.request_consumer
        consumer.subscribe([KafkaTopic.ANALYTICS_REQUEST.value])
        while True:
            receive_msg = consumer.poll(2.0)
@@ -60,65 +74,96 @@ class AnalyticsBackendService(GenericGrpcService):
                    continue
                else:
                    LOGGER.error("Consumer error: {:}".format(receive_msg.error()))
                    # print       ("Consumer error: {:}".format(receive_msg.error()))
                    break
            try:
                analyzer      = json.loads(receive_msg.value().decode('utf-8'))
                analyzer_uuid = receive_msg.key().decode('utf-8')
                LOGGER.debug('Recevied Analyzer: {:} - {:}'.format(analyzer_uuid, analyzer))
                # print       ('Recevied Analyzer: {:} - {:}'.format(analyzer_uuid, analyzer))
                LOGGER.info('Recevied Analyzer: {:} - {:}'.format(analyzer_uuid, analyzer))

                if analyzer["algo_name"] is None and analyzer["oper_mode"] is None:
                    self.StopDaskListener(analyzer_uuid)
                    if self.StopStreamer(analyzer_uuid):
                        LOGGER.info("Dask Streamer stopped.")
                    else:
                    self.StartDaskListener(analyzer_uuid, analyzer)
                        LOGGER.error("Failed to stop Dask Streamer.")
                else:
                    if self.StartStreamer(analyzer_uuid, analyzer):
                        LOGGER.info("Dask Streamer started.")
                    else:
                        LOGGER.error("Failed to start Dask Streamer.")
            except Exception as e:
                LOGGER.warning("Unable to consume message from topic: {:}. ERROR: {:}".format(KafkaTopic.ANALYTICS_REQUEST.value, e))
                # print         ("Unable to consume message from topic: {:}. ERROR: {:}".format(KafkaTopic.ANALYTICS_REQUEST.value, e))

    def StartDaskListener(self, analyzer_uuid, analyzer):
        kpi_list      = analyzer[ 'input_kpis'   ] 
        thresholds    = analyzer[ 'thresholds'   ]
        window_size   = analyzer[ 'window_size'  ]
        window_slider = analyzer[ 'window_slider']

        LOGGER.debug ("Received parameters: {:} - {:} - {:} - {:}".format(
            kpi_list, thresholds, window_size, window_slider))
        # print        ("Received parameters: {:} - {:} - {:} - {:}".format(
        #     kpi_list, thresholds, window_size, window_slider))


    def StartStreamer(self, analyzer_uuid : str, analyzer : dict):
        """
        Start the DaskStreamer with the given parameters.
        """
        if analyzer_uuid in self.active_streamers:
            LOGGER.warning("Dask Streamer already running with the given analyzer_uuid: {:}".format(analyzer_uuid))
            return False
        try:
            stop_event = Event()
            thread     = Thread(
                target=DaskStreamer,
                # args=(analyzer_uuid, kpi_list, oper_list, thresholds, stop_event),
                args=(analyzer['output_kpis'][0] , kpi_list, thresholds, stop_event),
                kwargs={
                    "window_size"       : window_size,
                }
            streamer = DaskStreamer(
                key               = analyzer_uuid,
                input_kpis        = analyzer['input_kpis' ],
                output_kpis       = analyzer['output_kpis'],
                thresholds        = analyzer['thresholds' ],
                batch_size        = analyzer['batch_size' ],
                window_size       = analyzer['window_size'],
                cluster_instance  = self.cluster,
                producer_instance = self.central_producer,
            )
            thread.start()
            self.running_threads[analyzer_uuid] = (thread, stop_event)
            # print      ("Initiated Analyzer backend: {:}".format(analyzer_uuid))
            LOGGER.info("Initiated Analyzer backend: {:}".format(analyzer_uuid))
            streamer.start()
            LOGGER.info(f"Streamer started with analyzer Id: {analyzer_uuid}")

            # Stop the streamer after the given duration
            if analyzer['duration'] > 0:
                def stop_after_duration():
                    time.sleep(analyzer['duration'])
                    LOGGER.warning(f"Execution duration completed of Analyzer: {analyzer_uuid}")
                    if not self.StopStreamer(analyzer_uuid):
                        LOGGER.warning("Failed to stop Dask Streamer. Streamer may be already terminated.")

                duration_thread = threading.Thread(target=stop_after_duration, daemon=True)
                duration_thread.start()

            self.active_streamers[analyzer_uuid] = streamer
            return True
        except Exception as e:
            # print       ("Failed to initiate Analyzer backend: {:}".format(e))
            LOGGER.error("Failed to initiate Analyzer backend: {:}".format(e))
            LOGGER.error("Failed to start Dask Streamer. ERROR: {:}".format(e))
            return False

    def StopDaskListener(self, analyzer_uuid):
        if analyzer_uuid in self.running_threads:
    def StopStreamer(self, analyzer_uuid : str):
        """
        Stop the DaskStreamer with the given analyzer_uuid.
        """
        try:
                thread, stop_event = self.running_threads[analyzer_uuid]
                stop_event.set()
                thread.join()
                del self.running_threads[analyzer_uuid]
                # print      ("Terminating backend (by TerminateBackend): Analyzer Id: {:}".format(analyzer_uuid))
                LOGGER.info("Terminating backend (by TerminateBackend): Analyzer Id: {:}".format(analyzer_uuid))
            if analyzer_uuid not in self.active_streamers:
                LOGGER.warning("Dask Streamer not found with the given analyzer_uuid: {:}".format(analyzer_uuid))
                return False
            LOGGER.info(f"Terminating streamer with Analyzer Id: {analyzer_uuid}")
            streamer = self.active_streamers[analyzer_uuid]
            streamer.stop()
            streamer.join()
            del self.active_streamers[analyzer_uuid]
            LOGGER.info(f"Streamer with analyzer_uuid '{analyzer_uuid}' has been trerminated sucessfully.")
            return True
        except Exception as e:
                LOGGER.error("Failed to terminate. Analyzer Id: {:} - ERROR: {:}".format(analyzer_uuid, e))
            LOGGER.error("Failed to stop Dask Streamer. ERROR: {:}".format(e))
            return False
        else:
            # print         ("Analyzer not found in active collectors. Analyzer Id: {:}".format(analyzer_uuid))
            LOGGER.warning("Analyzer not found in active collectors: Analyzer Id: {:}".format(analyzer_uuid))

    def close(self):        # TODO: Is this function needed?
        """
        Close the producer and cluster cleanly.
        """
        if self.central_producer:
            try:
                self.central_producer.flush()
                LOGGER.info("Kafka producer flushed and closed.")
            except Exception as e:
                LOGGER.error(f"Error closing Kafka producer: {e}")
        if self.cluster:
            try:
                self.cluster.close()
                LOGGER.info("Dask cluster closed.")
            except Exception as e:
                LOGGER.error(f"Error closing Dask cluster: {e}")
+131 −0
Original line number Diff line number Diff line
# Copyright 2022-2024 ETSI SDG TeraFlowSDN (TFS) (https://tfs.etsi.org/)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
from enum import Enum
import pandas as pd

logger = logging.getLogger(__name__)


class Handlers(Enum):
    AGGREGATION_HANDLER = "AggregationHandler"
    UNSUPPORTED_HANDLER = "UnsupportedHandler"

    @classmethod
    def is_valid_handler(cls, handler_name):
        return handler_name in cls._value2member_map_

# This method is top-level and should not be part of the class due to serialization issues.
def threshold_handler(key, aggregated_df, thresholds):
    """
    Apply thresholds (TH-Fall and TH-Raise) based on the thresholds dictionary
    on the aggregated DataFrame.

    Args:
        key (str): Key for the aggregated DataFrame.
        aggregated_df (pd.DataFrame): DataFrame with aggregated metrics.
        thresholds (dict): Thresholds dictionary with keys in the format '<metricName>' and values as (fail_th, raise_th).

    Returns:
        pd.DataFrame: DataFrame with additional threshold columns.
    """
    for metric_name, threshold_values in thresholds.items():
        # Ensure the metric column exists in the DataFrame
        if metric_name not in aggregated_df.columns:
            logger.warning(f"Metric '{metric_name}' does not exist in the DataFrame for key: {key}. Skipping threshold application.")
            continue
        
        # Ensure the threshold values are valid (check for tuple specifically)
        if isinstance(threshold_values, list) and len(threshold_values) == 2:
            fail_th, raise_th = threshold_values
            
            # Add threshold columns with updated naming
            aggregated_df[f"{metric_name}_TH_RAISE"] = aggregated_df[metric_name] > raise_th
            aggregated_df[f"{metric_name}_TH_FALL"]  = aggregated_df[metric_name] < fail_th
        else:
            logger.warning(f"Threshold values for '{metric_name}' ({threshold_values}) are not a list of length 2. Skipping threshold application.")
    return aggregated_df

def aggregation_handler(
        batch_type_name, key, batch, input_kpi_list, output_kpi_list, thresholds
    ):
    """
      Process a batch of data and calculate aggregated values for each input KPI
      and maps them to the output KPIs. """

    logger.info(f"({batch_type_name}) Processing batch for key: {key}")
    if not batch:
        logger.info("Empty batch received. Skipping processing.")
        return []
    else:
        logger.info(f" >>>>> Processing {len(batch)} records for key: {key}")
        
        # Convert data into a DataFrame
        df = pd.DataFrame(batch)

        # Filter the DataFrame to retain rows where kpi_id is in the input list (subscribed endpoints only)
        df = df[df['kpi_id'].isin(input_kpi_list)].copy()

        if df.empty:
            logger.warning(f"No data available for KPIs: {input_kpi_list}. Skipping processing.")
            return []

        # Define all possible aggregation methods
        aggregation_methods = {
            "min"     : ('kpi_value', 'min'),
            "max"     : ('kpi_value', 'max'),
            "avg"     : ('kpi_value', 'mean'),
            "first"   : ('kpi_value', lambda x: x.iloc[0]),
            "last"    : ('kpi_value', lambda x: x.iloc[-1]),
            "variance": ('kpi_value', 'var'),
            "count"   : ('kpi_value', 'count'),
            "range"   : ('kpi_value', lambda x: x.max() - x.min()),
            "sum"     : ('kpi_value', 'sum'),
        }

        # Process each KPI-specific task parameter
        for kpi_index, kpi_id in enumerate(input_kpi_list):

            # logger.info(f"1.Processing KPI: {kpi_id}")
            kpi_task_parameters = thresholds["task_parameter"][kpi_index]
            
            # Get valid task parameters for this KPI
            valid_task_parameters = [
                method for method in kpi_task_parameters.keys() 
                if method in aggregation_methods
            ]

            # Select the aggregation methods based on valid task parameters
            selected_methods = {method: aggregation_methods[method] for method in valid_task_parameters}

            # logger.info(f"2. Processing KPI: {kpi_id} with task parameters: {kpi_task_parameters}")
            kpi_df = df[df['kpi_id'] == kpi_id]

            # Check if kpi_df is not empty before applying the aggregation methods
            if not kpi_df.empty:
                agg_df = kpi_df.groupby('kpi_id').agg(**selected_methods).reset_index()

                # logger.info(f"3. Aggregated DataFrame for KPI: {kpi_id}: {agg_df}")

                agg_df['kpi_id'] = output_kpi_list[kpi_index]

                # logger.info(f"4. Applying thresholds for df: {agg_df['kpi_id']}")
                result = threshold_handler(key, agg_df, kpi_task_parameters)

                return result.to_dict(orient='records')
            else:
                logger.warning(f"No data available for KPIs: {kpi_id}. Skipping aggregation.")
                continue
        return []
Loading