Commit e76d0be2 authored by Carlos Natalino's avatar Carlos Natalino
Browse files

Latest changes to the dashboard.

parent 3dc40639
Loading
Loading
Loading
Loading
+15 −14
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@
# limitations under the License.

import logging, os, time
from typing import List
from typing import List, Dict
from common.Constants import (
    DEFAULT_GRPC_BIND_ADDRESS, DEFAULT_GRPC_GRACE_PERIOD, DEFAULT_GRPC_MAX_WORKERS, DEFAULT_HTTP_BIND_ADDRESS,
    DEFAULT_LOG_LEVEL, DEFAULT_METRICS_PORT, DEFAULT_SERVICE_GRPC_PORTS, DEFAULT_SERVICE_HTTP_BASEURLS,
@@ -37,23 +37,24 @@ ENVVAR_SUFIX_SERVICE_HOST = 'SERVICE_HOST'
ENVVAR_SUFIX_SERVICE_PORT_GRPC    = 'SERVICE_PORT_GRPC'
ENVVAR_SUFIX_SERVICE_PORT_HTTP    = 'SERVICE_PORT_HTTP'

def find_missing_environment_variables(
    required_environment_variables : List[str] = []
) -> List[str]:
    if ENVVAR_KUBERNETES_PORT in os.environ:
        missing_variables = set(required_environment_variables).difference(set(os.environ.keys()))
    else:
        # We're not running in Kubernetes, nothing to wait for
        missing_variables = required_environment_variables
    return missing_variables
def find_environment_variables(
    environment_variable_names : List[str] = []
) -> Dict[str, str]:
    environment_variable : Dict[str, str] = dict()
    for name in environment_variable_names:
        if name not in os.environ: continue
        environment_variable[name] = os.environ[name]
    return environment_variable

def wait_for_environment_variables(
    required_environment_variables : List[str] = [], wait_delay_seconds : float = DEFAULT_RESTART_DELAY
):
    missing_variables = find_missing_environment_variables(required_environment_variables)
    if len(missing_variables) == 0: return # We have all environment variables defined
    msg = 'Variables({:s}) are missing in Environment({:s}), restarting in {:f} seconds...'
    LOGGER.error(msg.format(str(missing_variables), str(os.environ), wait_delay_seconds))
    if ENVVAR_KUBERNETES_PORT not in os.environ: return # Not running in Kubernetes
    found = find_environment_variables(required_environment_variables)
    missing = set(required_environment_variables).difference(set(found.keys()))
    if len(missing) == 0: return # We have all environment variables defined
    MSG = 'Variables({:s}) are missing in Environment({:s}), restarting in {:f} seconds...'
    LOGGER.error(MSG.format(str(missing), str(os.environ), wait_delay_seconds))
    time.sleep(wait_delay_seconds)
    raise Exception('Restarting...') # pylint: disable=broad-exception-raised

+184 −175
Original line number Diff line number Diff line
@@ -66,6 +66,7 @@
  "liveNow": false,
  "panels": [
    {
      "collapsed": false,
      "gridPos": {
        "h": 1,
        "w": 24,
@@ -73,6 +74,7 @@
        "y": 0
      },
      "id": 12,
      "panels": [],
      "title": "General status",
      "type": "row"
    },
@@ -216,7 +218,7 @@
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "displayMode": "hidden",
          "placement": "bottom"
        },
        "tooltip": {
@@ -230,7 +232,11 @@
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "expr": "tfs_opticalattackmanager_dropped_assessments_created",
          "editorMode": "code",
          "exemplar": false,
          "expr": "rate(tfs_opticalattackmanager_dropped_assessments_created[1m])",
          "instant": false,
          "range": true,
          "refId": "A"
        }
      ],
@@ -238,6 +244,7 @@
      "type": "timeseries"
    },
    {
      "collapsed": false,
      "gridPos": {
        "h": 1,
        "w": 24,
@@ -245,6 +252,7 @@
        "y": 9
      },
      "id": 10,
      "panels": [],
      "title": "Pipeline",
      "type": "row"
    },
@@ -253,6 +261,7 @@
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
@@ -309,11 +318,11 @@
        "x": 0,
        "y": 10
      },
      "id": 16,
      "id": 14,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "displayMode": "hidden",
          "placement": "bottom"
        },
        "tooltip": {
@@ -328,25 +337,13 @@
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "histogram_quantile(0.95, sum(rate(tfs_opticalattackmanager_loop_seconds_bucket[$__rate_interval])) by (le))",
          "legendFormat": "Measured",
          "expr": "tfs_opticalattackmanager_active_services",
          "legendFormat": "Active services",
          "range": true,
          "refId": "A"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "tfs_opticalattackmanager_desired_monitoring_interval",
          "hide": false,
          "legendFormat": "Desired",
          "range": true,
          "refId": "B"
        }
      ],
      "title": "Loop time",
      "title": "Number of active optical services",
      "type": "timeseries"
    },
    {
@@ -354,7 +351,6 @@
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
@@ -411,7 +407,7 @@
        "x": 12,
        "y": 10
      },
      "id": 14,
      "id": 16,
      "options": {
        "legend": {
          "calcs": [],
@@ -430,13 +426,25 @@
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "tfs_opticalattackmanager_active_services",
          "legendFormat": "Active services",
          "expr": "histogram_quantile(0.95, sum(rate(tfs_opticalattackmanager_loop_seconds_bucket[$__rate_interval])) by (le))",
          "legendFormat": "Measured",
          "range": true,
          "refId": "A"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "tfs_opticalattackmanager_desired_monitoring_interval",
          "hide": false,
          "legendFormat": "Desired",
          "range": true,
          "refId": "B"
        }
      ],
      "title": "Number of active optical services",
      "title": "Loop time",
      "type": "timeseries"
    },
    {
@@ -543,7 +551,7 @@
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "histogram_quantile(0.95, sum(rate(tfs_opticalattackdetector_inference_response_time_bucket[$__rate_interval])) by (le))",
          "expr": "histogram_quantile(0.95, sum(rate(tfs_opticalattackdetector_execution_details_histogram_duration_bucket{step=\"uldetection\"}[$__rate_interval])) by (le))",
          "hide": false,
          "legendFormat": "UL Inference",
          "range": true,
@@ -555,7 +563,7 @@
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "histogram_quantile(0.95, sum(rate(tfs_opticalattackdetector_cache_response_time_bucket[$__rate_interval])) by (le))",
          "expr": "histogram_quantile(0.95, sum(rate(tfs_opticalattackdetector_execution_details_histogram_duration_bucket{step=\"cachefetch\"}[$__rate_interval])) by (le))",
          "hide": false,
          "interval": "",
          "legendFormat": "Cache",
@@ -568,7 +576,7 @@
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "histogram_quantile(0.95, sum(rate(tfs_opticalattackdetector_mitigation_response_time_bucket[$__rate_interval])) by (le))",
          "expr": "histogram_quantile(0.95, sum(rate(tfs_opticalattackdetector_execution_details_histogram_duration_bucket{step=\"mitigation\"}[$__rate_interval])) by (le))",
          "hide": false,
          "interval": "",
          "legendFormat": "Mitigator",
@@ -1353,7 +1361,7 @@
      "type": "timeseries"
    },
    {
      "collapsed": false,
      "collapsed": true,
      "gridPos": {
        "h": 1,
        "w": 24,
@@ -1361,10 +1369,7 @@
        "y": 44
      },
      "id": 2,
      "panels": [],
      "title": "General status",
      "type": "row"
    },
      "panels": [
        {
          "datasource": {
            "type": "prometheus",
@@ -1517,7 +1522,11 @@
          "type": "timeseries"
        }
      ],
  "refresh": "5s",
      "title": "General status",
      "type": "row"
    }
  ],
  "refresh": "",
  "schemaVersion": 36,
  "style": "dark",
  "tags": [],
@@ -1525,13 +1534,13 @@
    "list": []
  },
  "time": {
    "from": "now-5m",
    "from": "now-30m",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "",
  "title": "TFS / Optical cybersecurity",
  "uid": "-Q-B-AsVk",
  "version": 1,
  "version": 5,
  "weekStart": ""
}
 No newline at end of file
+1 −1
Original line number Diff line number Diff line
@@ -725,7 +725,7 @@
  },
  "timepicker": {},
  "timezone": "",
  "title": "Scalable and Efficient Pipeline for ML-based Optical Network Monitoring",
  "title": "Scalable and Efficient Pipeline for ML-based Optical Network Monitoring - No longer maintained!",
  "uid": "IYQSZX0Vk",
  "version": 4,
  "weekStart": ""