# Copyright 2021-2023 H2020 TeraFlow (https://www.teraflow-h2020.eu/) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import hashlib, re from enum import Enum from typing import Dict, List, Tuple, Union import logging # Convenient helper function to remove dictionary items in dict/list/set comprehensions. LOGGER = logging.getLogger(__name__) def remove_dict_key(dictionary : Dict, key : str): dictionary.pop(key, None) return dictionary # Enumeration classes are redundant with gRPC classes, but gRPC does not provide a programmatical method to retrieve # the values it expects from strings containing the desired value symbol or its integer value, so a kind of mapping is # required. Besides, ORM Models expect Enum classes in EnumeratedFields; we create specific and conveniently defined # Enum classes to serve both purposes. def grpc_to_enum(grpc_enum_class, orm_enum_class : Enum, grpc_enum_value): grpc_enum_name = grpc_enum_class.Name(grpc_enum_value) grpc_enum_prefix = orm_enum_class.__name__.upper() grpc_enum_prefix = re.sub(r'^ORM_(.+)$', r'\1', grpc_enum_prefix) grpc_enum_prefix = re.sub(r'^(.+)ENUM$', r'\1', grpc_enum_prefix) grpc_enum_prefix = grpc_enum_prefix + '_' orm_enum_name = grpc_enum_name.replace(grpc_enum_prefix, '') orm_enum_value = orm_enum_class._member_map_.get(orm_enum_name) # pylint: disable=protected-access return orm_enum_value # For some models, it is convenient to produce a string hash for fast comparisons of existence or modification. Method # fast_hasher computes configurable length (between 1 and 64 byte) hashes and retrieves them in hex representation. FASTHASHER_ITEM_ACCEPTED_FORMAT = 'Union[bytes, str]' FASTHASHER_DATA_ACCEPTED_FORMAT = 'Union[{fmt:s}, List[{fmt:s}], Tuple[{fmt:s}]]'.format( fmt=FASTHASHER_ITEM_ACCEPTED_FORMAT) def fast_hasher(data : Union[bytes, str, List[Union[bytes, str]], Tuple[Union[bytes, str]]], digest_size : int = 8): hasher = hashlib.blake2b(digest_size=digest_size) # Do not accept sets, dicts, or other unordered dats tructures since their order is arbitrary thus producing # different hashes depending on the order. Consider adding support for sets or dicts with previous sorting of # items by their key. if isinstance(data, bytes): data = [data] elif isinstance(data, str): data = [data.encode('UTF-8')] elif isinstance(data, (list, tuple)): pass else: msg = 'data({:s}) must be {:s}, found {:s}' raise TypeError(msg.format(str(data), FASTHASHER_DATA_ACCEPTED_FORMAT, str(type(data)))) for i,item in enumerate(data): if isinstance(item, str): item = item.encode('UTF-8') elif isinstance(item, bytes): pass else: msg = 'data[{:d}]({:s}) must be {:s}, found {:s}' raise TypeError(msg.format(i, str(item), FASTHASHER_ITEM_ACCEPTED_FORMAT, str(type(item)))) hasher.update(item) return hasher.hexdigest()