From 87685c9c58a468f4da960e80148158d8c390528a Mon Sep 17 00:00:00 2001
From: Waleed Akbar <wakbar@cttc.es>
Date: Fri, 7 Mar 2025 05:47:47 +0000
Subject: [PATCH] Updated Telemetry Backend and add monitoring deployment
 script

- Minor changes in Telmetry backend service
- new script added to deploy Prom. Gateway, Prometheus and Mimir
- Configuration file for Prometheus to scrape Gateway.
---
 .gitignore                                    |   2 -
 deploy/monitoring.sh                          |  53 ----
 deploy/new_monitoring.sh                      | 132 ++++++++++
 manifests/monitoring/grafana_values.yaml      | 235 ++++++++++++++++++
 .../prometheus_values.yaml}                   |   0
 .../emulated/SyntheticMetricsGenerator.py     |   2 +-
 .../service/TelemetryBackendService.py        |   7 +-
 7 files changed, 372 insertions(+), 59 deletions(-)
 delete mode 100644 deploy/monitoring.sh
 create mode 100755 deploy/new_monitoring.sh
 create mode 100644 manifests/monitoring/grafana_values.yaml
 rename manifests/{prometheus/prometheus.yaml => monitoring/prometheus_values.yaml} (100%)

diff --git a/.gitignore b/.gitignore
index db47387c8..235d7768a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -179,5 +179,3 @@ libyang/
 # Other logs
 **/logs/*.log.*
 
-# PySpark checkpoints
-src/analytics/.spark/*
diff --git a/deploy/monitoring.sh b/deploy/monitoring.sh
deleted file mode 100644
index 18992501a..000000000
--- a/deploy/monitoring.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-# Copyright 2022-2024 ETSI SDG TeraFlowSDN (TFS) (https://tfs.etsi.org/)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-RELEASE_NAME="mon-prometheus"          
-NAMESPACE="monitoring"                 
-CHART_REPO_NAME="prometheus-community"
-CHART_REPO_URL="https://prometheus-community.github.io/helm-charts"
-CHART_NAME="prometheus"                # Chart name within the repo
-VALUES_FILE="manifests/prometheus/prometheus.yaml"   
-
-echo ">>> Deploying Prometheus with the following configuration:"
-echo "Adding/updating Helm repo: $CHART_REPO_NAME -> $CHART_REPO_URL"
-helm repo add "$CHART_REPO_NAME" "$CHART_REPO_URL" || true
-helm repo update
-
-echo "Creating namespace '$NAMESPACE' if it doesn't exist..."
-kubectl get namespace "$NAMESPACE" >/dev/null 2>&1 || kubectl create namespace "$NAMESPACE"
-
-#------------------------------------------------------------------------------
-# 3. Install or upgrade the Prometheus chart
-#    - If 'VALUES_FILE' is set, it will use it for custom configuration.
-#    - Otherwise, it will deploy with the chart defaults.
-#------------------------------------------------------------------------------
-if [ -n "$VALUES_FILE" ] && [ -f "$VALUES_FILE" ]; then
-    echo "Installing/Upgrading Prometheus with custom values from $VALUES_FILE..."
-    helm upgrade --install "$RELEASE_NAME" "$CHART_REPO_NAME/$CHART_NAME" \
-      --namespace "$NAMESPACE" \
-      --values "$VALUES_FILE"
-else
-    echo "Installing/Upgrading Prometheus with default chart values..."
-    helm upgrade --install "$RELEASE_NAME" "$CHART_REPO_NAME/$CHART_NAME" \
-      --namespace "$NAMESPACE"
-fi
-
-echo "Waiting for Prometheus pods to be ready..."
-kubectl rollout status deployment/"$RELEASE_NAME"-server -n "$NAMESPACE" || true
-
-# echo "Listing deployed resources in namespace '$NAMESPACE':"
-# kubectl get all -n "$NAMESPACE"
-
-echo "<<< Prometheus deployment completed successfully!"
diff --git a/deploy/new_monitoring.sh b/deploy/new_monitoring.sh
new file mode 100755
index 000000000..ac1f46723
--- /dev/null
+++ b/deploy/new_monitoring.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# Copyright 2022-2024 ETSI SDG TeraFlowSDN (TFS) (https://tfs.etsi.org/)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -euo pipefail
+
+# -----------------------------------------------------------
+# Global namespace for all deployments
+# -----------------------------------------------------------
+NAMESPACE="monitoring"
+VALUES_FILE_PATH="manifests/monitoring"
+
+# -----------------------------------------------------------
+# Prometheus Configuration
+# -----------------------------------------------------------
+RELEASE_NAME_PROM="mon-prometheus"
+CHART_REPO_NAME_PROM="prometheus-community"
+CHART_REPO_URL_PROM="https://prometheus-community.github.io/helm-charts"
+CHART_NAME_PROM="prometheus"
+VALUES_FILE_PROM="$VALUES_FILE_PATH/prometheus_values.yaml"
+
+# -----------------------------------------------------------
+# Mimir Configuration
+# -----------------------------------------------------------
+RELEASE_NAME_MIMIR="mon-mimir"
+CHART_REPO_NAME_MIMIR="grafana"
+CHART_REPO_URL_MIMIR="https://grafana.github.io/helm-charts"
+CHART_NAME_MIMIR="mimir-distributed"
+VALUES_FILE_MIMIR="$VALUES_FILE_PATH/mimir_values.yaml"
+
+# -----------------------------------------------------------
+# Grafana Configuration
+# -----------------------------------------------------------
+# RELEASE_NAME_GRAFANA="mon-grafana"
+# CHART_REPO_NAME_GRAFANA="grafana"
+# CHART_REPO_URL_GRAFANA="https://grafana.github.io/helm-charts"
+# CHART_NAME_GRAFANA="grafana"
+# VALUES_FILE_GRAFANA="$VALUES_FILE_PATH/grafana_values.yaml"
+
+
+# -----------------------------------------------------------
+# Function to deploy or upgrade a Helm chart
+# -----------------------------------------------------------
+deploy_chart() {
+  local release_name="$1"
+  local chart_repo_name="$2"
+  local chart_repo_url="$3"
+  local chart_name="$4"
+  local values_file="$5"
+  local namespace="$6"
+
+  echo ">>> Deploying [${release_name}] from repo [${chart_repo_name}]..."
+
+  # Add or update the Helm repo
+  echo "Adding/updating Helm repo: $chart_repo_name -> $chart_repo_url"
+  helm repo add "$chart_repo_name" "$chart_repo_url" || true
+  helm repo update
+
+  # Create namespace if needed
+  echo "Creating namespace '$namespace' if it doesn't exist..."
+  kubectl get namespace "$namespace" >/dev/null 2>&1 || kubectl create namespace "$namespace"
+
+  # Install or upgrade the chart
+  if [ -n "$values_file" ] && [ -f "$values_file" ]; then
+    echo "Installing/Upgrading $release_name using custom values from $values_file..."
+    helm upgrade --install "$release_name" "$chart_repo_name/$chart_name" \
+      --namespace "$namespace" \
+      --values "$values_file"
+  else
+    echo "Installing/Upgrading $release_name with default chart values..."
+    helm upgrade --install "$release_name" "$chart_repo_name/$chart_name" \
+      --namespace "$namespace"
+  fi
+
+  echo "<<< Deployment initiated for [$release_name]."
+  echo
+}
+
+
+# -----------------------------------------------------------
+# Actual Deployments
+# -----------------------------------------------------------
+
+# 1) Deploy Prometheus
+deploy_chart "$RELEASE_NAME_PROM" \
+             "$CHART_REPO_NAME_PROM" \
+             "$CHART_REPO_URL_PROM" \
+             "$CHART_NAME_PROM" \
+             "$VALUES_FILE_PROM" \
+             "$NAMESPACE"
+
+# Optionally wait for Prometheus server pod to become ready
+kubectl rollout status deployment/"$RELEASE_NAME_PROM-server" -n "$NAMESPACE" || true
+
+
+# 2) Deploy Mimir
+deploy_chart "$RELEASE_NAME_MIMIR" \
+             "$CHART_REPO_NAME_MIMIR" \
+             "$CHART_REPO_URL_MIMIR" \
+             "$CHART_NAME_MIMIR" \
+             "$VALUES_FILE_MIMIR" \
+             "$NAMESPACE"
+
+# Depending on how Mimir runs (StatefulSets, Deployments), you can wait for
+# the correct resource to be ready. For example:
+# kubectl rollout status statefulset/"$RELEASE_NAME_MIMIR-distributor" -n "$NAMESPACE" || true
+
+
+# 3) Deploy Grafana
+# deploy_chart "$RELEASE_NAME_GRAFANA" \
+#              "$CHART_REPO_NAME_GRAFANA" \
+#              "$CHART_REPO_URL_GRAFANA" \
+#              "$CHART_NAME_GRAFANA" \
+#              "$VALUES_FILE_GRAFANA" \
+#              "$NAMESPACE"
+
+# kubectl rollout status deployment/"$RELEASE_NAME_GRAFANA" -n "$NAMESPACE" || true
+
+# -----------------------------------------------------------
+echo "All deployments completed!"
+
diff --git a/manifests/monitoring/grafana_values.yaml b/manifests/monitoring/grafana_values.yaml
new file mode 100644
index 000000000..a2dbd7971
--- /dev/null
+++ b/manifests/monitoring/grafana_values.yaml
@@ -0,0 +1,235 @@
+rbac:
+  create: true
+  ## Use an existing ClusterRole/Role (depending on rbac.namespaced false/true)
+  # useExistingRole: name-of-some-role
+  # useExistingClusterRole: name-of-some-clusterRole
+  pspEnabled: false
+  pspUseAppArmor: false
+  namespaced: false
+
+serviceAccount:
+  create: true
+  name:
+  nameTest:
+  ## ServiceAccount labels.
+  automountServiceAccountToken: false
+
+replicas: 1
+
+## Create a headless service for the deployment
+headlessService: false
+
+## Should the service account be auto mounted on the pod
+automountServiceAccountToken: true
+
+## Create HorizontalPodAutoscaler object for deployment type
+#
+autoscaling:
+  enabled: false
+  minReplicas: 1
+  maxReplicas: 3
+  targetCPU: "60"
+  targetMemory: ""
+  behavior: {}
+
+deploymentStrategy:
+  type: RollingUpdate
+
+readinessProbe:
+  httpGet:
+    path: /api/health
+    port: 3000
+
+livenessProbe:
+  httpGet:
+    path: /api/health
+    port: 3000
+  initialDelaySeconds: 60
+  timeoutSeconds: 30
+  failureThreshold: 10
+
+image:
+  registry: docker.io
+  repository: grafana/grafana
+  # Overrides the Grafana image tag whose default is the chart appVersion
+  tag: ""
+  sha: ""
+  pullPolicy: IfNotPresent
+
+  ## Optionally specify an array of imagePullSecrets.
+  ## Secrets must be manually created in the namespace.
+  ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
+  ## Can be templated.
+  ##
+  pullSecrets: []
+  #   - myRegistrKeySecretName
+
+testFramework:
+  enabled: true
+  ## The type of Helm hook used to run this test. Defaults to test.
+  ## ref: https://helm.sh/docs/topics/charts_hooks/#the-available-hooks
+  ##
+  # hookType: test
+  image:
+    # -- The Docker registry
+    registry: docker.io
+    repository: bats/bats
+    tag: "v1.4.1"
+  imagePullPolicy: IfNotPresent
+
+# dns configuration for pod
+dnsPolicy: ~
+dnsConfig: {}
+  # nameservers:
+  #   - 8.8.8.8
+  #   options:
+  #   - name: ndots
+  #     value: "2"
+  #   - name: edns0
+
+securityContext:
+  runAsNonRoot: true
+  runAsUser: 472
+  runAsGroup: 472
+  fsGroup: 472
+
+containerSecurityContext:
+  allowPrivilegeEscalation: false
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+# Enable creating the grafana configmap
+createConfigmap: true
+
+downloadDashboardsImage:
+  registry: docker.io
+  repository: curlimages/curl
+  tag: 8.9.1
+  sha: ""
+  pullPolicy: IfNotPresent
+
+downloadDashboards:
+  env: {}
+  envFromSecret: ""
+  resources: {}
+  securityContext:
+    allowPrivilegeEscalation: false
+    capabilities:
+      drop:
+      - ALL
+    seccompProfile:
+      type: RuntimeDefault
+  envValueFrom: {}
+  #  ENV_NAME:
+  #    configMapKeyRef:
+  #      name: configmap-name
+  #      key: value_key
+
+## Pod Annotations
+# podAnnotations: {}
+
+## ConfigMap Annotations
+# configMapAnnotations: {}
+  # argocd.argoproj.io/sync-options: Replace=true
+
+## Pod Labels
+# podLabels: {}
+
+podPortName: grafana
+gossipPortName: gossip
+## Deployment annotations
+# annotations: {}
+
+service:
+  enabled: true
+  type: NodePort
+  port: 80
+  targetPort: 3000
+  nodePort: 30080
+  portName: service
+
+## Enable persistence using Persistent Volume Claims
+## ref: https://kubernetes.io/docs/user-guide/persistent-volumes/
+##
+persistence:
+  type: pvc
+  enabled: true
+  # storageClassName: default
+  accessModes:
+    - ReadWriteOnce
+  size: 10Gi
+  # annotations: {}
+  finalizers:
+    - kubernetes.io/pvc-protection
+
+  disableWarning: false
+
+  ## If 'lookupVolumeName' is set to true, Helm will attempt to retrieve
+  ## the current value of 'spec.volumeName' and incorporate it into the template.
+  lookupVolumeName: true
+
+# Administrator credentials when not using an existing secret (see below)
+adminUser: admin
+# adminPassword: strongpassword
+
+# Use an existing secret for the admin user.
+admin:
+  ## Name of the secret. Can be templated.
+  existingSecret: ""
+  userKey: admin-user
+  passwordKey: admin-password
+
+## Configure grafana datasources
+## ref: http://docs.grafana.org/administration/provisioning/#datasources
+##
+datasources: 
+ datasources.yaml:
+   apiVersion: 1
+   datasources:
+   - name: Prometheus
+     type: prometheus
+     url: http://mon-prometheus-server.monitoring.svc.cluster.local
+     access: proxy
+     isDefault: true
+   - name: Mimir
+     type: prometheus
+     url: http://mimir-nginx.mon-mimir.svc:80/prometheus
+     access: proxy
+     isDefault: false
+
+## Grafana's primary configuration
+## NOTE: values in map will be converted to ini format
+## ref: http://docs.grafana.org/installation/configuration/
+##
+grafana.ini:
+  paths:
+    data: /var/lib/grafana/
+    logs: /var/log/grafana
+    plugins: /var/lib/grafana/plugins
+    provisioning: /etc/grafana/provisioning
+  analytics:
+    check_for_updates: true
+  log:
+    mode: console
+  grafana_net:
+    url: https://grafana.net
+  server:
+    domain: "{{ if (and .Values.ingress.enabled .Values.ingress.hosts) }}{{ tpl (.Values.ingress.hosts | first) . }}{{ else }}''{{ end }}"
+
+## Number of old ReplicaSets to retain
+##
+revisionHistoryLimit: 5
+
+# assertNoLeakedSecrets is a helper function defined in _helpers.tpl that checks if secret
+# values are not exposed in the rendered grafana.ini configmap. It is enabled by default.
+#
+# To pass values into grafana.ini without exposing them in a configmap, use variable expansion:
+# https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#variable-expansion
+#
+# Alternatively, if you wish to allow secret values to be exposed in the rendered grafana.ini configmap,
+# you can disable this check by setting assertNoLeakedSecrets to false.
+assertNoLeakedSecrets: true
+
diff --git a/manifests/prometheus/prometheus.yaml b/manifests/monitoring/prometheus_values.yaml
similarity index 100%
rename from manifests/prometheus/prometheus.yaml
rename to manifests/monitoring/prometheus_values.yaml
diff --git a/src/telemetry/backend/collectors/emulated/SyntheticMetricsGenerator.py b/src/telemetry/backend/collectors/emulated/SyntheticMetricsGenerator.py
index a01e2c0e6..77d998432 100644
--- a/src/telemetry/backend/collectors/emulated/SyntheticMetricsGenerator.py
+++ b/src/telemetry/backend/collectors/emulated/SyntheticMetricsGenerator.py
@@ -98,7 +98,7 @@ class SyntheticMetricsGenerator():
 
         return (time.time(), resource_key, requested_metrics)
 
-    def metric_id_mapper(self, sample_type_ids, metric_dict):
+    def metric_id_mapper(self, sample_type_ids, metric_dict):   # TODO: Add a dynamic mappper from kpi_sample_type ID to name...
         """
         Maps the sample type IDs to the corresponding metric names.
 
diff --git a/src/telemetry/backend/service/TelemetryBackendService.py b/src/telemetry/backend/service/TelemetryBackendService.py
index 40cd1443a..3aeee8238 100755
--- a/src/telemetry/backend/service/TelemetryBackendService.py
+++ b/src/telemetry/backend/service/TelemetryBackendService.py
@@ -49,7 +49,7 @@ class TelemetryBackendService(GenericGrpcService):
         self.kafka_consumer = KafkaConsumer({'bootstrap.servers' : KafkaConfig.get_kafka_address(),
                                             'group.id'           : 'backend',
                                             'auto.offset.reset'  : 'latest'})
-        self.collector          = EmulatedCollector(address="127.0.0.1", port=8000)
+        self.collector          = None
         self.context_client     = ContextClient()
         self.kpi_manager_client = KpiManagerClient()
         self.active_jobs = {}
@@ -124,11 +124,11 @@ class TelemetryBackendService(GenericGrpcService):
         Method to handle collector request.
         """
         device_type, end_points = self.get_endpoint_detail(kpi_id)
-        # end_points : dict = self.get_endpoints_from_kpi_id(kpi_id)
+
         if end_points is None:
             LOGGER.warning("KPI ID: {:} - Endpoints not found. Skipping...".format(kpi_id))
             return
-        # device_type : str = self.get_device_type_from_kpi_id(kpi_id)
+
         if device_type and "emu" in device_type:
             LOGGER.info("KPI ID: {:} - Device Type: {:} - Endpoints: {:}".format(kpi_id, device_type, end_points))
             subscription = [collector_id, end_points, duration, interval]
@@ -139,6 +139,7 @@ class TelemetryBackendService(GenericGrpcService):
     def EmulatedCollectorHandler(self, subscription, duration, collector_id, kpi_id, stop_event):
             # EmulatedCollector
             
+            self.collector = EmulatedCollector(address="127.0.0.1", port=8000)
             self.collector.Connect()
             if not self.collector.SubscribeState(subscription):
                 LOGGER.warning("KPI ID: {:} - Subscription failed. Skipping...".format(kpi_id))
-- 
GitLab