Commit 10a63cc4 authored by Lluis Gifre Renom's avatar Lluis Gifre Renom
Browse files

Common and Device:

- Implemented generic MutexQueues class
- Implemented sequentialization of operations in Device component to prevent data corruption and race conditions
parent 8806bbf9
Loading
Loading
Loading
Loading
+78 −0
Original line number Diff line number Diff line
# Copyright 2021-2023 H2020 TeraFlow (https://www.teraflow-h2020.eu/)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# MutexQueues:
# ------------
# This class enables to schedule and serialize operations concurrently issued
# over a number of resources. For instance, when multiple components want to
# configure devices through the Device component, configuration operations
# have to be serialized to prevent data corruptions, and race conditions, etc.
# Usage Example:
#   class Servicer():
#       def __init__(self):
#           # init other stuff
#           self.drivers = dict()
#           self.mutex_queues = MutexQueues()
#       
#       def configure_device(self, device_uuid, settings):
#           self.mutex_queues.wait_my_turn(device_uuid)
#           driver = self.drivers.get(device_uuid)
#           if driver is None:
#               driver = Driver(device_uuid)
#               self.drivers[device_uuid] = driver
#           driver.configure(settings)
#           self.mutex_queues.signal_done(device_uuid)

import threading
from queue import Queue
from typing import Dict

class MutexQueues:
    def __init__(self) -> None:
        # lock to protect dictionary updates
        self.lock = threading.Lock()

        # dictionaty of queues of mutexes: queue_name => queue[mutex]
        # first mutex is the running one
        self.mutex_queues : Dict[str, Queue[threading.Event]] = dict()
    
    def wait_my_turn(self, queue_name : str) -> None:
        # create my mutex and enqueue it
        mutex = threading.Event()
        with self.lock:
            queue : Queue = self.mutex_queues.setdefault(queue_name, Queue())
            first_in_queue = (queue.qsize() == 0)
            queue.put_nowait(mutex)

        # if I'm the first in the queue upon addition, means there are no running tasks
        # directly return without waiting
        if first_in_queue: return

        # otherwise, wait for my turn in the queue
        mutex.wait()

    def signal_done(self, queue_name : str) -> None:
        # I'm done with my work
        with self.lock:
            queue : Queue = self.mutex_queues.setdefault(queue_name, Queue())
            
            # remove muself from the queue
            queue.get_nowait()

            # if there are no other tasks queued, return
            if queue.qsize() == 0: return

            # otherwise, signal the next task in the queue to start
            next_mutex : threading.Event = queue.queue[0]
            next_mutex.set()
+14 −0
Original line number Diff line number Diff line
# Copyright 2021-2023 H2020 TeraFlow (https://www.teraflow-h2020.eu/)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+6 −1
Original line number Diff line number Diff line
@@ -23,10 +23,15 @@ from .driver_api.DriverInstanceCache import DriverInstanceCache
from .DeviceServiceServicerImpl import DeviceServiceServicerImpl
from .MonitoringLoops import MonitoringLoops

# Custom gRPC settings
# Multiple clients might keep connections alive waiting for RPC methods to be executed.
# Requests needs to be serialized to ensure correct device configurations
GRPC_MAX_WORKERS = 200

class DeviceService(GenericGrpcService):
    def __init__(self, driver_instance_cache : DriverInstanceCache, cls_name: str = __name__) -> None:
        port = get_service_port_grpc(ServiceNameEnum.DEVICE)
        super().__init__(port, cls_name=cls_name)
        super().__init__(port, max_workers=GRPC_MAX_WORKERS, cls_name=cls_name)
        database = Database(get_database_backend(backend=BackendEnum.INMEMORY))
        self.monitoring_loops = MonitoringLoops(database)
        self.device_servicer = DeviceServiceServicerImpl(database, driver_instance_cache, self.monitoring_loops)
+316 −294
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@ from common.proto.kpi_sample_types_pb2 import KpiSampleType
from common.rpc_method_wrapper.Decorator import create_metrics, safe_and_metered_rpc_method
from common.rpc_method_wrapper.ServiceExceptions import InvalidArgumentException, OperationFailedException
from common.tools.grpc.Tools import grpc_message_to_json
from common.tools.mutex_queues.MutexQueues import MutexQueues
from context.client.ContextClient import ContextClient
from .database.ConfigModel import (
    ConfigModel, ConfigRuleModel, ORM_ConfigActionEnum, get_config_rules, grpc_config_rules_to_raw, update_config)
@@ -56,6 +57,7 @@ class DeviceServiceServicerImpl(DeviceServiceServicer):
        self.database = database
        self.driver_instance_cache = driver_instance_cache
        self.monitoring_loops = monitoring_loops
        self.mutex_queues = MutexQueues()
        LOGGER.debug('Servicer Created')

    @safe_and_metered_rpc_method(METRICS, LOGGER)
@@ -101,6 +103,8 @@ class DeviceServiceServicerImpl(DeviceServiceServicer):
        json_request['device_config'] = {}
        request = Device(**json_request)

        self.mutex_queues.wait_my_turn(device_uuid)
        try:
            sync_device_from_context(device_uuid, self.context_client, self.database)
            db_device,_ = update_device_in_local_database(self.database, request)

@@ -169,12 +173,16 @@ class DeviceServiceServicerImpl(DeviceServiceServicer):

            sync_device_to_context(db_device, self.context_client)
            return DeviceId(**db_device.dump_id())
        finally:
            self.mutex_queues.signal_done(device_uuid)

    @safe_and_metered_rpc_method(METRICS, LOGGER)
    def ConfigureDevice(self, request : Device, context : grpc.ServicerContext) -> DeviceId:
        device_id = request.device_id
        device_uuid = device_id.device_uuid.uuid

        self.mutex_queues.wait_my_turn(device_uuid)
        try:
            sync_device_from_context(device_uuid, self.context_client, self.database)

            context_config_rules = get_config_rules(self.database, device_uuid, 'running')
@@ -231,11 +239,16 @@ class DeviceServiceServicerImpl(DeviceServiceServicer):

            sync_device_to_context(db_device, self.context_client)
            return DeviceId(**db_device.dump_id())
        finally:
            self.mutex_queues.signal_done(device_uuid)


    @safe_and_metered_rpc_method(METRICS, LOGGER)
    def DeleteDevice(self, request : DeviceId, context : grpc.ServicerContext) -> Empty:
        device_uuid = request.device_uuid.uuid

        self.mutex_queues.wait_my_turn(device_uuid)
        try:
            self.monitoring_loops.remove(device_uuid)

            sync_device_from_context(device_uuid, self.context_client, self.database)
@@ -272,25 +285,32 @@ class DeviceServiceServicerImpl(DeviceServiceServicer):
            db_initial_config.delete()
            db_running_config.delete()
            return Empty()
        finally:
            self.mutex_queues.signal_done(device_uuid)

    @safe_and_metered_rpc_method(METRICS, LOGGER)
    def GetInitialConfig(self, request : DeviceId, context : grpc.ServicerContext) -> DeviceConfig:
        device_uuid = request.device_uuid.uuid

        self.mutex_queues.wait_my_turn(device_uuid)
        try:
            sync_device_from_context(device_uuid, self.context_client, self.database)
            db_device : DeviceModel = get_object(self.database, DeviceModel, device_uuid, raise_if_not_found=False)

            config_rules = {} if db_device is None else db_device.dump_initial_config()
        return DeviceConfig(config_rules=config_rules)
            device_config = DeviceConfig(config_rules=config_rules)
            return device_config
        finally:
            self.mutex_queues.signal_done(device_uuid)

    @safe_and_metered_rpc_method(METRICS, LOGGER)
    def MonitorDeviceKpi(self, request : MonitoringSettings, context : grpc.ServicerContext) -> Empty:
        kpi_uuid = request.kpi_id.kpi_id.uuid

        device_uuid = request.kpi_descriptor.device_id.device_uuid.uuid
        self.mutex_queues.wait_my_turn(device_uuid)
        try:
            subscribe = (request.sampling_duration_s > 0.0) and (request.sampling_interval_s > 0.0)
            if subscribe:
            device_uuid = request.kpi_descriptor.device_id.device_uuid.uuid

                db_device : DeviceModel = get_object(self.database, DeviceModel, device_uuid, raise_if_not_found=False)
                if db_device is None:
                    msg = 'Device({:s}) has not been added to this Device instance.'.format(str(device_uuid))
@@ -446,3 +466,5 @@ class DeviceServiceServicerImpl(DeviceServiceServicer):

            sync_device_to_context(db_device, self.context_client)
            return Empty()
        finally:
            self.mutex_queues.signal_done(device_uuid)