Skip to content
Tools.py 2.76 KiB
Newer Older
import hashlib, re
from enum import Enum
from typing import Dict, List, Tuple, Union

# Convenient helper function to remove dictionary items in dict/list/set comprehensions.

def remove_dict_key(dictionary : Dict, key : str):
    dictionary.pop(key, None)
    return dictionary

# Enumeration classes are redundant with gRPC classes, but gRPC does not provide a programmatical method to retrieve
# the values it expects from strings containing the desired value symbol or its integer value, so a kind of mapping is
# required. Besides, ORM Models expect Enum classes in EnumeratedFields; we create specific and conveniently defined
# Enum classes to serve both purposes.

def grpc_to_enum(grpc_enum_class, orm_enum_class : Enum, grpc_enum_value):
    grpc_enum_name = grpc_enum_class.Name(grpc_enum_value)
    grpc_enum_prefix = orm_enum_class.__name__.upper()
    grpc_enum_prefix = re.sub(r'^ORM_(.+)$', r'\1', grpc_enum_prefix)
    grpc_enum_prefix = re.sub(r'^(.+)ENUM$', r'\1', grpc_enum_prefix)
    grpc_enum_prefix = grpc_enum_prefix + '_'
    orm_enum_name = grpc_enum_name.replace(grpc_enum_prefix, '')
    orm_enum_value = orm_enum_class._member_map_.get(orm_enum_name) # pylint: disable=protected-access
    return orm_enum_value

# For some models, it is convenient to produce a string hash for fast comparisons of existence or modification. Method
# fast_hasher computes configurable length (between 1 and 64 byte) hashes and retrieves them in hex representation.

FASTHASHER_ITEM_ACCEPTED_FORMAT = 'Union[bytes, str]'
FASTHASHER_DATA_ACCEPTED_FORMAT = 'Union[{fmt:s}, List[{fmt:s}], Tuple[{fmt:s}]]'.format(
    fmt=FASTHASHER_ITEM_ACCEPTED_FORMAT)

def fast_hasher(data : Union[bytes, str, List[Union[bytes, str]], Tuple[Union[bytes, str]]], digest_size : int = 8):
    hasher = hashlib.blake2b(digest_size=digest_size)
    # Do not accept sets, dicts, or other unordered dats tructures since their order is arbitrary thus producing
    # different hashes depending on the order. Consider adding support for sets or dicts with previous sorting of
    # items by their key.

    if isinstance(data, bytes):
        data = [data]
    elif isinstance(data, str):
        data = [data.encode('UTF-8')]
    elif isinstance(data, (list, tuple)):
        pass
    else:
        msg = 'data({:s}) must be {:s}, found {:s}'
        raise TypeError(msg.format(str(data), FASTHASHER_DATA_ACCEPTED_FORMAT, str(type(data))))

    for i,item in enumerate(data):
        if isinstance(item, str):
            item = item.encode('UTF-8')
        elif isinstance(item, bytes):
            pass
        else:
            msg = 'data[{:d}]({:s}) must be {:s}, found {:s}'
            raise TypeError(msg.format(i, str(item), FASTHASHER_ITEM_ACCEPTED_FORMAT, str(type(item))))
        hasher.update(item)
    return hasher.hexdigest()