Newer
Older
# Copyright 2021-2023 H2020 TeraFlow (https://www.teraflow-h2020.eu/)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Lluis Gifre Renom
committed
import anytree, copy, logging, pytz, queue, re, threading
#import lxml.etree as ET
from datetime import datetime, timedelta
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
from apscheduler.executors.pool import ThreadPoolExecutor
from apscheduler.job import Job
from apscheduler.jobstores.memory import MemoryJobStore
from apscheduler.schedulers.background import BackgroundScheduler
from common.method_wrappers.Decorator import MetricTypeEnum, MetricsPool, metered_subclass_method, INF

Lluis Gifre Renom
committed
from common.tools.client.RetryDecorator import delay_exponential
from common.type_checkers.Checkers import chk_length, chk_string, chk_type, chk_float
from device.service.driver_api.Exceptions import UnsupportedResourceKeyException
from device.service.driver_api._Driver import _Driver

Lluis Gifre Renom
committed
from device.service.driver_api.AnyTreeTools import TreeNode, get_subnode, set_subnode_value #dump_subtree
#from .Tools import xml_pretty_print, xml_to_dict, xml_to_file
from .templates import ALL_RESOURCE_KEYS, EMPTY_CONFIG, compose_config, get_filter, parse
from .RetryDecorator import retry
logging.getLogger('ncclient.manager').setLevel(logging.DEBUG if DEBUG_MODE else logging.WARNING)
logging.getLogger('ncclient.transport.ssh').setLevel(logging.DEBUG if DEBUG_MODE else logging.WARNING)
logging.getLogger('apscheduler.executors.default').setLevel(logging.INFO if DEBUG_MODE else logging.ERROR)
logging.getLogger('apscheduler.scheduler').setLevel(logging.INFO if DEBUG_MODE else logging.ERROR)
logging.getLogger('monitoring-client').setLevel(logging.INFO if DEBUG_MODE else logging.ERROR)
RE_GET_ENDPOINT_FROM_INTERFACE_KEY = re.compile(r'.*interface\[([^\]]+)\].*')
RE_GET_ENDPOINT_FROM_INTERFACE_XPATH = re.compile(r".*interface\[oci\:name\='([^\]]+)'\].*")
# Collection of samples through NetConf is very slow and each request collects all the data.
# Populate a cache periodically (when first interface is interrogated).
# Evict data after some seconds, when data is considered as outdated
SAMPLE_EVICTION_SECONDS = 30.0 # seconds
SAMPLE_RESOURCE_KEY = 'interfaces/interface/state/counters'

Lluis Gifre Renom
committed
MAX_RETRIES = 15
DELAY_FUNCTION = delay_exponential(initial=0.01, increment=2.0, maximum=5.0)
RETRY_DECORATOR = retry(max_retries=MAX_RETRIES, delay_function=DELAY_FUNCTION, prepare_method_name='connect')
class NetconfSessionHandler:
def __init__(self, address : str, port : int, **settings) -> None:
self.__lock = threading.RLock()
self.__connected = threading.Event()
self.__address = address
self.__port = int(port)
self.__username = settings.get('username')
self.__password = settings.get('password')
self.__hostkey_verify = settings.get('hostkey_verify', True)
self.__look_for_keys = settings.get('look_for_keys', True)
self.__allow_agent = settings.get('allow_agent', True)
self.__force_running = settings.get('force_running', False)
self.__commit_per_rule = settings.get('commit_rule', False)
self.__device_params = settings.get('device_params', {})
self.__manager_params = settings.get('manager_params', {})
self.__nc_params = settings.get('nc_params', {})
self.__manager : Manager = None
self.__candidate_supported = False

Lluis Gifre Renom
committed

Lluis Gifre Renom
committed
with self.__lock:
self.__manager = connect_ssh(
host=self.__address, port=self.__port, username=self.__username, password=self.__password,
device_params=self.__device_params, manager_params=self.__manager_params, nc_params=self.__nc_params,
key_filename=self.__key_filename, hostkey_verify=self.__hostkey_verify, allow_agent=self.__allow_agent,
look_for_keys=self.__look_for_keys)
self.__candidate_supported = ':candidate' in self.__manager.server_capabilities

Lluis Gifre Renom
committed
self.__connected.set()

Lluis Gifre Renom
committed
def disconnect(self):
if not self.__connected.is_set(): return
with self.__lock:
self.__manager.close_session()
@property
def use_candidate(self): return self.__candidate_supported and not self.__force_running

Lluis Gifre Renom
committed
def commit_per_rule(self): return self.__commit_per_rule
@property
def vendor(self): return self.__vendor

Lluis Gifre Renom
committed
@RETRY_DECORATOR
def get(self, filter=None, with_defaults=None): # pylint: disable=redefined-builtin
with self.__lock:
return self.__manager.get(filter=filter, with_defaults=with_defaults)

Lluis Gifre Renom
committed
@RETRY_DECORATOR
def edit_config(
self, config, target='running', default_operation=None, test_option=None,
error_option=None, format='xml' # pylint: disable=redefined-builtin
):
if config == EMPTY_CONFIG: return
with self.__lock:

Lluis Gifre Renom
committed
config, target=target, default_operation=default_operation, test_option=test_option,
error_option=error_option, format=format)
def locked(self, target):
return self.__manager.locked(target=target)
def commit(self, confirmed=False, timeout=None, persist=None, persist_id=None):
return self.__manager.commit(confirmed=confirmed, timeout=timeout, persist=persist, persist_id=persist_id)

Lluis Gifre Renom
committed
def compute_delta_sample(previous_sample, previous_timestamp, current_sample, current_timestamp):
if previous_sample is None: return None
if previous_timestamp is None: return None
if current_sample is None: return None
if current_timestamp is None: return None
delay = current_timestamp - previous_timestamp
field_keys = set(previous_sample.keys()).union(current_sample.keys())
field_keys.discard('name')
delta_sample = {'name': previous_sample['name']}
for field_key in field_keys:
previous_sample_value = previous_sample[field_key]
if not isinstance(previous_sample_value, (int, float)): continue
current_sample_value = current_sample[field_key]
if not isinstance(current_sample_value, (int, float)): continue
delta_value = current_sample_value - previous_sample_value
if delta_value < 0: continue
delta_sample[field_key] = delta_value / delay
return delta_sample
def __init__(self, netconf_handler : NetconfSessionHandler, logger : logging.Logger) -> None:

Lluis Gifre Renom
committed
self.__netconf_handler = netconf_handler
self.__lock = threading.Lock()
self.__timestamp = None

Lluis Gifre Renom
committed
self.__absolute_samples = {}
self.__delta_samples = {}

Lluis Gifre Renom
committed
def _refresh_samples(self) -> None:
with self.__lock:
try:
now = datetime.timestamp(datetime.utcnow())
if self.__timestamp is not None and (now - self.__timestamp) < SAMPLE_EVICTION_SECONDS: return
str_filter = get_filter(SAMPLE_RESOURCE_KEY)

Lluis Gifre Renom
committed
xml_data = self.__netconf_handler.get(filter=str_filter).data_ele
interface_samples = parse(SAMPLE_RESOURCE_KEY, xml_data)
for interface,samples in interface_samples:
match = RE_GET_ENDPOINT_FROM_INTERFACE_KEY.match(interface)
if match is None: continue
interface = match.group(1)

Lluis Gifre Renom
committed
delta_sample = compute_delta_sample(
self.__absolute_samples.get(interface), self.__timestamp, samples, now)
if delta_sample is not None: self.__delta_samples[interface] = delta_sample
self.__absolute_samples[interface] = samples
self.__timestamp = now
except: # pylint: disable=bare-except
self.__logger.exception('Error collecting samples')

Lluis Gifre Renom
committed
def get(self, resource_key : str) -> Tuple[float, Dict]:
self._refresh_samples()
match = RE_GET_ENDPOINT_FROM_INTERFACE_XPATH.match(resource_key)
with self.__lock:
if match is None: return self.__timestamp, {}
interface = match.group(1)

Lluis Gifre Renom
committed
return self.__timestamp, copy.deepcopy(self.__delta_samples.get(interface, {}))
def do_sampling(
samples_cache : SamplesCache, logger : logging.Logger, resource_key : str, out_samples : queue.Queue
) -> None:

Lluis Gifre Renom
committed
timestamp, samples = samples_cache.get(resource_key)
counter_name = resource_key.split('/')[-1].split(':')[-1]
value = samples.get(counter_name)
if value is None:
logger.warning('[do_sampling] value not found for {:s}'.format(resource_key))
return
sample = (timestamp, resource_key, value)
out_samples.put_nowait(sample)
except: # pylint: disable=bare-except
logger.exception('Error retrieving samples')
netconf_handler : NetconfSessionHandler, logger : logging.Logger, resources : List[Tuple[str, Any]], delete=False,
commit_per_rule= False, target='running', default_operation='merge', test_option=None, error_option=None,
format='xml' # pylint: disable=redefined-builtin
):
str_method = 'DeleteConfig' if delete else 'SetConfig'
logger.info('[{:s}] resources = {:s}'.format(str_method, str(resources)))
results = [None for _ in resources]
for i,resource in enumerate(resources):
str_resource_name = 'resources[#{:d}]'.format(i)
try:
logger.info('[{:s}] resource = {:s}'.format(str_method, str(resource)))
chk_type(str_resource_name, resource, (list, tuple))
chk_length(str_resource_name, resource, min_length=2, max_length=2)
resource_key,resource_value = resource
chk_string(str_resource_name + '.key', resource_key, allow_empty=False)
str_config_message = compose_config(
resource_key, resource_value, delete=delete, vendor=netconf_handler.vendor)
if str_config_message is None: raise UnsupportedResourceKeyException(resource_key)
logger.info('[{:s}] str_config_message[{:d}] = {:s}'.format(
str_method, len(str_config_message), str(str_config_message)))
netconf_handler.edit_config(
config=str_config_message, target=target, default_operation=default_operation,
test_option=test_option, error_option=error_option, format=format)
if commit_per_rule:
netconf_handler.commit()
results[i] = True
except Exception as e: # pylint: disable=broad-except
str_operation = 'preparing' if target == 'candidate' else ('deleting' if delete else 'setting')
msg = '[{:s}] Exception {:s} {:s}: {:s}'
logger.exception(msg.format(str_method, str_operation, str_resource_name, str(resource)))
results[i] = e # if validation fails, store the exception
return results
HISTOGRAM_BUCKETS = (
# .005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, INF
0.0001, 0.00025, 0.00050, 0.00075,
0.0010, 0.0025, 0.0050, 0.0075,
0.0100, 0.0250, 0.0500, 0.0750,
0.1000, 0.2500, 0.5000, 0.7500,
1.0000, 2.5000, 5.0000, 7.5000,
10.0, 25.0, 50.0, 75.0,
100.0, INF
)
METRICS_POOL = MetricsPool('Device', 'Driver', labels={'driver': 'openconfig'})
METRICS_POOL.get_or_create('GetInitialConfig', MetricTypeEnum.HISTOGRAM_DURATION, buckets=HISTOGRAM_BUCKETS)
METRICS_POOL.get_or_create('GetConfig', MetricTypeEnum.HISTOGRAM_DURATION, buckets=HISTOGRAM_BUCKETS)
METRICS_POOL.get_or_create('SetConfig', MetricTypeEnum.HISTOGRAM_DURATION, buckets=HISTOGRAM_BUCKETS)
METRICS_POOL.get_or_create('DeleteConfig', MetricTypeEnum.HISTOGRAM_DURATION, buckets=HISTOGRAM_BUCKETS)
METRICS_POOL.get_or_create('SubscribeState', MetricTypeEnum.HISTOGRAM_DURATION, buckets=HISTOGRAM_BUCKETS)
METRICS_POOL.get_or_create('UnsubscribeState', MetricTypeEnum.HISTOGRAM_DURATION, buckets=HISTOGRAM_BUCKETS)
class OpenConfigDriver(_Driver):
def __init__(self, address : str, port : int, **settings) -> None: # pylint: disable=super-init-not-called
self.__logger = logging.getLogger('{:s}:[{:s}:{:s}]'.format(str(__name__), str(address), str(port)))
self.__lock = threading.Lock()
#self.__initial = TreeNode('.')
#self.__running = TreeNode('.')
self.__subscriptions = TreeNode('.')
self.__started = threading.Event()
self.__terminate = threading.Event()
self.__scheduler = BackgroundScheduler(daemon=True) # scheduler used to emulate sampling events
self.__scheduler.configure(
jobstores = {'default': MemoryJobStore()},
executors = {'default': ThreadPoolExecutor(max_workers=1)},
job_defaults = {'coalesce': False, 'max_instances': 3},
timezone=pytz.utc)
self.__out_samples = queue.Queue()

Lluis Gifre Renom
committed
self.__netconf_handler : NetconfSessionHandler = NetconfSessionHandler(address, port, **settings)
self.__samples_cache = SamplesCache(self.__netconf_handler, self.__logger)
def Connect(self) -> bool:
with self.__lock:
if self.__started.is_set(): return True

Lluis Gifre Renom
committed
self.__netconf_handler.connect()
# Connect triggers activation of sampling events that will be scheduled based on subscriptions
self.__scheduler.start()
self.__started.set()
return True
def Disconnect(self) -> bool:
with self.__lock:
# Trigger termination of loops and processes
self.__terminate.set()
# If not started, assume it is already disconnected
if not self.__started.is_set(): return True
# Disconnect triggers deactivation of sampling events
self.__scheduler.shutdown()

Lluis Gifre Renom
committed
self.__netconf_handler.disconnect()
def GetInitialConfig(self) -> List[Tuple[str, Any]]:
with self.__lock:
return []
def GetConfig(self, resource_keys : List[str] = []) -> List[Tuple[str, Union[Any, None, Exception]]]:
chk_type('resources', resource_keys, list)
results = []
with self.__lock:
if len(resource_keys) == 0: resource_keys = ALL_RESOURCE_KEYS
for i,resource_key in enumerate(resource_keys):
str_resource_name = 'resource_key[#{:d}]'.format(i)
try:
chk_string(str_resource_name, resource_key, allow_empty=False)
str_filter = get_filter(resource_key)
self.__logger.info('[GetConfig] str_filter = {:s}'.format(str(str_filter)))
if str_filter is None: str_filter = resource_key

Lluis Gifre Renom
committed
xml_data = self.__netconf_handler.get(filter=str_filter).data_ele
if isinstance(xml_data, Exception): raise xml_data
results.extend(parse(resource_key, xml_data))
except Exception as e: # pylint: disable=broad-except
MSG = 'Exception retrieving {:s}: {:s}'
self.__logger.exception(MSG.format(str_resource_name, str(resource_key)))
results.append((resource_key, e)) # if validation fails, store the exception
return results
def SetConfig(self, resources : List[Tuple[str, Any]]) -> List[Union[bool, Exception]]:
chk_type('resources', resources, list)
if len(resources) == 0: return []
with self.__lock:
if self.__netconf_handler.use_candidate:
with self.__netconf_handler.locked(target='candidate'):
commit_per_rule = bool(self.__netconf_handler.commit_per_rule)
results = edit_config(
self.__netconf_handler, self.__logger, resources, target='candidate',
commit_per_rule=commit_per_rule)
if not commit_per_rule:
try:
self.__netconf_handler.commit()
except Exception as e: # pylint: disable=broad-except
MSG = '[SetConfig] Exception commiting resources: {:s}'
self.__logger.exception(MSG.format(str(resources)))
results = [e for _ in resources] # if commit fails, set exception in each resource
results = edit_config(self.__netconf_handler, self.__logger, resources)
return results
def DeleteConfig(self, resources : List[Tuple[str, Any]]) -> List[Union[bool, Exception]]:
chk_type('resources', resources, list)
if len(resources) == 0: return []
with self.__lock:
if self.__netconf_handler.use_candidate:
with self.__netconf_handler.locked(target='candidate'):
commit_per_rule = bool(self.__netconf_handler.commit_per_rule)
results = edit_config(
self.__netconf_handler, self.__logger, resources, target='candidate', delete=True,
commit_per_rule=commit_per_rule)
if not commit_per_rule:
try:
self.__netconf_handler.commit()
except Exception as e: # pylint: disable=broad-except
MSG = '[DeleteConfig] Exception commiting resources: {:s}'
self.__logger.exception(MSG.format(str(resources)))
results = [e for _ in resources] # if commit fails, set exception in each resource
results = edit_config(self.__netconf_handler, self.__logger, resources, delete=True)
return results
def SubscribeState(self, subscriptions : List[Tuple[str, float, float]]) -> List[Union[bool, Exception]]:
chk_type('subscriptions', subscriptions, list)
if len(subscriptions) == 0: return []
results = []
resolver = anytree.Resolver(pathattr='name')
with self.__lock:
for i,subscription in enumerate(subscriptions):
str_subscription_name = 'subscriptions[#{:d}]'.format(i)
try:
chk_type(str_subscription_name, subscription, (list, tuple))
chk_length(str_subscription_name, subscription, min_length=3, max_length=3)
resource_key,sampling_duration,sampling_interval = subscription
chk_string(str_subscription_name + '.resource_key', resource_key, allow_empty=False)
resource_path = resource_key.split('/')
chk_float(str_subscription_name + '.sampling_duration', sampling_duration, min_value=0)
chk_float(str_subscription_name + '.sampling_interval', sampling_interval, min_value=0)
except Exception as e: # pylint: disable=broad-except
MSG = 'Exception validating {:s}: {:s}'
self.__logger.exception(MSG.format(str_subscription_name, str(resource_key)))
results.append(e) # if validation fails, store the exception
continue
start_date,end_date = None,None

Lluis Gifre Renom
committed
if sampling_duration >= 1.e-12:
start_date = datetime.utcnow()
end_date = start_date + timedelta(seconds=sampling_duration)
job_id = 'k={:s}/d={:f}/i={:f}'.format(resource_key, sampling_duration, sampling_interval)
job = self.__scheduler.add_job(
do_sampling, args=(self.__samples_cache, self.__logger, resource_key, self.__out_samples),
kwargs={}, id=job_id, trigger='interval', seconds=sampling_interval,
start_date=start_date, end_date=end_date, timezone=pytz.utc)
subscription_path = resource_path + ['{:.3f}:{:.3f}'.format(sampling_duration, sampling_interval)]
set_subnode_value(resolver, self.__subscriptions, subscription_path, job)
results.append(True)
return results
def UnsubscribeState(self, subscriptions : List[Tuple[str, float, float]]) -> List[Union[bool, Exception]]:
chk_type('subscriptions', subscriptions, list)
if len(subscriptions) == 0: return []
results = []
resolver = anytree.Resolver(pathattr='name')
with self.__lock:
for i,resource in enumerate(subscriptions):
str_subscription_name = 'resources[#{:d}]'.format(i)
try:
chk_type(str_subscription_name, resource, (list, tuple))
chk_length(str_subscription_name, resource, min_length=3, max_length=3)
resource_key,sampling_duration,sampling_interval = resource
chk_string(str_subscription_name + '.resource_key', resource_key, allow_empty=False)
resource_path = resource_key.split('/')
chk_float(str_subscription_name + '.sampling_duration', sampling_duration, min_value=0)
chk_float(str_subscription_name + '.sampling_interval', sampling_interval, min_value=0)
except Exception as e: # pylint: disable=broad-except
MSG = 'Exception validating {:s}: {:s}'
self.__logger.exception(MSG.format(str_subscription_name, str(resource_key)))
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
results.append(e) # if validation fails, store the exception
continue
subscription_path = resource_path + ['{:.3f}:{:.3f}'.format(sampling_duration, sampling_interval)]
subscription_node = get_subnode(resolver, self.__subscriptions, subscription_path)
# if not found, resource_node is None
if subscription_node is None:
results.append(False)
continue
job : Job = getattr(subscription_node, 'value', None)
if job is None or not isinstance(job, Job):
raise Exception('Malformed subscription node or wrong resource key: {:s}'.format(str(resource)))
job.remove()
parent = subscription_node.parent
children = list(parent.children)
children.remove(subscription_node)
parent.children = tuple(children)
results.append(True)
return results
def GetState(self, blocking=False, terminate : Optional[threading.Event] = None) -> Iterator[Tuple[str, Any]]:
while True:
if self.__terminate.is_set(): break
if terminate is not None and terminate.is_set(): break
try:
sample = self.__out_samples.get(block=blocking, timeout=0.1)
except queue.Empty:
if blocking: continue
return
if sample is None: continue
yield sample