diff --git a/src/opticalattackmanager/service/__main__.py b/src/opticalattackmanager/service/__main__.py index e058f4827d5ee9d81855124b14e30b8b9ef24990..575edd5ba011373443efa904a60849387f259e92 100644 --- a/src/opticalattackmanager/service/__main__.py +++ b/src/opticalattackmanager/service/__main__.py @@ -83,31 +83,12 @@ LOOP_TIME = Histogram( "tfs_opticalattackmanager_loop_seconds", "Time taken by each security loop", buckets=( - 1.0, - 2.5, - 5.0, - 7.5, - 10.0, - 12.5, - 15.0, - 17.5, - 20.0, - 22.5, - 25.0, - 27.5, - 30.0, - 32.5, - 35.0, - 37.5, - 40.0, - 42.5, - 45.0, - 47.5, - 50.0, - 52.5, - 55.0, - 57.5, - 60.0, + 1.0, 2.5, 5.0, 7.5, 10.0, 12.5, + 15.0, 17.5, 20.0, 22.5, 25.0, 27.5, + 30.0, 32.5, 35.0, 37.5, 40.0, + 42.5, 45.0, 47.5, + 50.0, 52.5, 55.0, 57.5, + 60.0, 70.0, 80.0, 90.0, 100.0, float("inf"), ), ) @@ -127,6 +108,11 @@ DESIRED_MONITORING_INTERVAL = Gauge( "Desired loop monitoring interval", ) +DROP_COUNTER = Counter( + "tfs_opticalattackmanager_dropped_assessments", + "Dropped assessments due to detector timeout", +) + global service_list global cache @@ -390,6 +376,7 @@ async def monitor_services(terminate, service_list=None, cache=None): (i + 1) * k + min(i + 1, m), # last index host, port, + DROP_COUNTER, desired_monitoring_interval * 0.9, ) for i in range(cur_number_workers) diff --git a/src/opticalattackmanager/utils/monitor.py b/src/opticalattackmanager/utils/monitor.py index 390c43dc663f6e1a90d6a834a6ec9f72cc38a45a..30d4b7e22d5fefa1f280d86a47a13bb1800b1f55 100644 --- a/src/opticalattackmanager/utils/monitor.py +++ b/src/opticalattackmanager/utils/monitor.py @@ -22,13 +22,7 @@ from prometheus_client import Counter from common.proto.asyncio.optical_attack_detector_grpc import \ OpticalAttackDetectorServiceStub from common.proto.asyncio.optical_attack_detector_pb2 import DetectionRequest -from common.Settings import get_log_level, get_setting - -DROP_COUNTER = Counter( - "tfs_opticalattackmanager_dropped_assessments", - "Dropped assessments due to detector timeout", -) - +from common.Settings import get_log_level log_level = get_log_level() logging.basicConfig(level=log_level) @@ -41,6 +35,7 @@ async def detect_attack( context_id: str, service_id: str, kpi_id: str, + drop_counter: Counter, timeout: float = 20.0, ) -> None: try: @@ -61,7 +56,7 @@ async def detect_attack( "Exception while processing service_id {}/{}".format(service_id, kpi_id) ) # LOGGER.exception(e) - DROP_COUNTER.inc() + drop_counter.inc() def delegate_services( @@ -70,6 +65,7 @@ def delegate_services( end_index: int, host: str, port: str, + drop_counter: Counter, monitoring_interval: float, ): async def run_internal_loop(): @@ -81,6 +77,7 @@ def delegate_services( service["context"], service["service"], service["kpi"], + drop_counter, # allow at most 90% of the monitoring interval to succeed monitoring_interval * 0.9, ) diff --git a/src/tests/scenario3/optical/dashboard.json b/src/tests/scenario3/optical/dashboard.json index 361ac70bbce1136c255c95c7fab1351083c8b93b..990ab47e95f9db5bc021ca91333f4c5fe61f7ff7 100644 --- a/src/tests/scenario3/optical/dashboard.json +++ b/src/tests/scenario3/optical/dashboard.json @@ -1380,7 +1380,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 29, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1397,7 +1397,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -1416,7 +1416,8 @@ "value": 80 } ] - } + }, + "unit": "mwatt" }, "overrides": [] }, @@ -1445,10 +1446,71 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(scaph_process_power_consumption_microwatts{namespace=\"tfs\", cmdline=~\".+opticalattackdetector.+\"})/1000000", - "legendFormat": "Detector", + "exemplar": false, + "expr": "sum(scaph_process_power_consumption_microwatts{namespace=\"tfs\", cmdline=~\".+opticalattackmanager.+\"})/1000", + "instant": false, + "legendFormat": "Manager", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(scaph_process_power_consumption_microwatts{namespace=\"tfs\", cmdline=~\".+opticalattackdetector.+\"})/1000", + "hide": false, + "instant": false, + "legendFormat": "Detector", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(scaph_process_power_consumption_microwatts{namespace=\"tfs\", cmdline=~\".+dbscan.+\"})/1000", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "UL Inference", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(scaph_process_power_consumption_microwatts{namespace=\"tfs\", cmdline=~\"redis-server.+\"})/1000", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Cache", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(scaph_process_power_consumption_microwatts{namespace=\"tfs\", cmdline=~\".+opticalattackmitigator.+\"})/1000", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Mitigator", + "range": true, + "refId": "E" } ], "title": "Energy consumption", @@ -1463,7 +1525,7 @@ "list": [] }, "time": { - "from": "now-15m", + "from": "now-5m", "to": "now" }, "timepicker": {}, diff --git a/src/tests/scenario3/optical/ofc23/run_experiment_demo.py b/src/tests/scenario3/optical/ofc23/run_experiment_demo.py index b0fe01547708f553a4c39fb7eb60a6006d6d23c4..99dda0dc9b2a6dc083ce07f8258a246baf5ecd94 100644 --- a/src/tests/scenario3/optical/ofc23/run_experiment_demo.py +++ b/src/tests/scenario3/optical/ofc23/run_experiment_demo.py @@ -32,7 +32,7 @@ v1 = client.CoreV1Api() caching_pod = None pods = v1.list_namespaced_pod(namespace=namespace) for pod in pods.items: - print(pod.metadata) + # print(pod.metadata) if "app" in pod.metadata.labels and "caching" in pod.metadata.labels["app"]: caching_pod = pod