Tools.py

# Copyright 2021-2023 H2020 TeraFlow (https://www.teraflow-h2020.eu/)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import hashlib, re
from enum import Enum
from typing import Dict, List, Tuple, Union

# Convenient helper function to remove dictionary items in dict/list/set comprehensions.

def remove_dict_key(dictionary : Dict, key : str):
    dictionary.pop(key, None)
    return dictionary

# Enumeration classes are redundant with gRPC classes, but gRPC does not provide a programmatical method to retrieve
# the values it expects from strings containing the desired value symbol or its integer value, so a kind of mapping is
# required. Besides, ORM Models expect Enum classes in EnumeratedFields; we create specific and conveniently defined
# Enum classes to serve both purposes.

def grpc_to_enum(grpc_enum_class, orm_enum_class : Enum, grpc_enum_value):
    grpc_enum_name = grpc_enum_class.Name(grpc_enum_value)
    grpc_enum_prefix = orm_enum_class.__name__.upper()
    grpc_enum_prefix = re.sub(r'^ORM_(.+)$', r'\1', grpc_enum_prefix)
    grpc_enum_prefix = re.sub(r'^(.+)ENUM$', r'\1', grpc_enum_prefix)
    grpc_enum_prefix = grpc_enum_prefix + '_'
    orm_enum_name = grpc_enum_name.replace(grpc_enum_prefix, '')
    orm_enum_value = orm_enum_class._member_map_.get(orm_enum_name) # pylint: disable=protected-access
    return orm_enum_value

# For some models, it is convenient to produce a string hash for fast comparisons of existence or modification. Method
# fast_hasher computes configurable length (between 1 and 64 byte) hashes and retrieves them in hex representation.

FASTHASHER_ITEM_ACCEPTED_FORMAT = 'Union[bytes, str]'
FASTHASHER_DATA_ACCEPTED_FORMAT = 'Union[{fmt:s}, List[{fmt:s}], Tuple[{fmt:s}]]'.format(
    fmt=FASTHASHER_ITEM_ACCEPTED_FORMAT)

def fast_hasher(data : Union[bytes, str, List[Union[bytes, str]], Tuple[Union[bytes, str]]], digest_size : int = 8):
    hasher = hashlib.blake2b(digest_size=digest_size)
    # Do not accept sets, dicts, or other unordered dats tructures since their order is arbitrary thus producing
    # different hashes depending on the order. Consider adding support for sets or dicts with previous sorting of
    # items by their key.

    if isinstance(data, bytes):
        data = [data]
    elif isinstance(data, str):
        data = [data.encode('UTF-8')]
    elif isinstance(data, (list, tuple)):
        pass
    else:
        msg = 'data({:s}) must be {:s}, found {:s}'
        raise TypeError(msg.format(str(data), FASTHASHER_DATA_ACCEPTED_FORMAT, str(type(data))))

    for i,item in enumerate(data):
        if isinstance(item, str):
            item = item.encode('UTF-8')
        elif isinstance(item, bytes):
            pass
        else:
            msg = 'data[{:d}]({:s}) must be {:s}, found {:s}'
            raise TypeError(msg.format(i, str(item), FASTHASHER_ITEM_ACCEPTED_FORMAT, str(type(item))))
        hasher.update(item)
    return hasher.hexdigest()