From 87685c9c58a468f4da960e80148158d8c390528a Mon Sep 17 00:00:00 2001 From: Waleed Akbar <wakbar@cttc.es> Date: Fri, 7 Mar 2025 05:47:47 +0000 Subject: [PATCH] Updated Telemetry Backend and add monitoring deployment script - Minor changes in Telmetry backend service - new script added to deploy Prom. Gateway, Prometheus and Mimir - Configuration file for Prometheus to scrape Gateway. --- .gitignore | 2 - deploy/monitoring.sh | 53 ---- deploy/new_monitoring.sh | 132 ++++++++++ manifests/monitoring/grafana_values.yaml | 235 ++++++++++++++++++ .../prometheus_values.yaml} | 0 .../emulated/SyntheticMetricsGenerator.py | 2 +- .../service/TelemetryBackendService.py | 7 +- 7 files changed, 372 insertions(+), 59 deletions(-) delete mode 100644 deploy/monitoring.sh create mode 100755 deploy/new_monitoring.sh create mode 100644 manifests/monitoring/grafana_values.yaml rename manifests/{prometheus/prometheus.yaml => monitoring/prometheus_values.yaml} (100%) diff --git a/.gitignore b/.gitignore index db47387c8..235d7768a 100644 --- a/.gitignore +++ b/.gitignore @@ -179,5 +179,3 @@ libyang/ # Other logs **/logs/*.log.* -# PySpark checkpoints -src/analytics/.spark/* diff --git a/deploy/monitoring.sh b/deploy/monitoring.sh deleted file mode 100644 index 18992501a..000000000 --- a/deploy/monitoring.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash -# Copyright 2022-2024 ETSI SDG TeraFlowSDN (TFS) (https://tfs.etsi.org/) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -RELEASE_NAME="mon-prometheus" -NAMESPACE="monitoring" -CHART_REPO_NAME="prometheus-community" -CHART_REPO_URL="https://prometheus-community.github.io/helm-charts" -CHART_NAME="prometheus" # Chart name within the repo -VALUES_FILE="manifests/prometheus/prometheus.yaml" - -echo ">>> Deploying Prometheus with the following configuration:" -echo "Adding/updating Helm repo: $CHART_REPO_NAME -> $CHART_REPO_URL" -helm repo add "$CHART_REPO_NAME" "$CHART_REPO_URL" || true -helm repo update - -echo "Creating namespace '$NAMESPACE' if it doesn't exist..." -kubectl get namespace "$NAMESPACE" >/dev/null 2>&1 || kubectl create namespace "$NAMESPACE" - -#------------------------------------------------------------------------------ -# 3. Install or upgrade the Prometheus chart -# - If 'VALUES_FILE' is set, it will use it for custom configuration. -# - Otherwise, it will deploy with the chart defaults. -#------------------------------------------------------------------------------ -if [ -n "$VALUES_FILE" ] && [ -f "$VALUES_FILE" ]; then - echo "Installing/Upgrading Prometheus with custom values from $VALUES_FILE..." - helm upgrade --install "$RELEASE_NAME" "$CHART_REPO_NAME/$CHART_NAME" \ - --namespace "$NAMESPACE" \ - --values "$VALUES_FILE" -else - echo "Installing/Upgrading Prometheus with default chart values..." - helm upgrade --install "$RELEASE_NAME" "$CHART_REPO_NAME/$CHART_NAME" \ - --namespace "$NAMESPACE" -fi - -echo "Waiting for Prometheus pods to be ready..." -kubectl rollout status deployment/"$RELEASE_NAME"-server -n "$NAMESPACE" || true - -# echo "Listing deployed resources in namespace '$NAMESPACE':" -# kubectl get all -n "$NAMESPACE" - -echo "<<< Prometheus deployment completed successfully!" diff --git a/deploy/new_monitoring.sh b/deploy/new_monitoring.sh new file mode 100755 index 000000000..ac1f46723 --- /dev/null +++ b/deploy/new_monitoring.sh @@ -0,0 +1,132 @@ +#!/bin/bash +# Copyright 2022-2024 ETSI SDG TeraFlowSDN (TFS) (https://tfs.etsi.org/) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -euo pipefail + +# ----------------------------------------------------------- +# Global namespace for all deployments +# ----------------------------------------------------------- +NAMESPACE="monitoring" +VALUES_FILE_PATH="manifests/monitoring" + +# ----------------------------------------------------------- +# Prometheus Configuration +# ----------------------------------------------------------- +RELEASE_NAME_PROM="mon-prometheus" +CHART_REPO_NAME_PROM="prometheus-community" +CHART_REPO_URL_PROM="https://prometheus-community.github.io/helm-charts" +CHART_NAME_PROM="prometheus" +VALUES_FILE_PROM="$VALUES_FILE_PATH/prometheus_values.yaml" + +# ----------------------------------------------------------- +# Mimir Configuration +# ----------------------------------------------------------- +RELEASE_NAME_MIMIR="mon-mimir" +CHART_REPO_NAME_MIMIR="grafana" +CHART_REPO_URL_MIMIR="https://grafana.github.io/helm-charts" +CHART_NAME_MIMIR="mimir-distributed" +VALUES_FILE_MIMIR="$VALUES_FILE_PATH/mimir_values.yaml" + +# ----------------------------------------------------------- +# Grafana Configuration +# ----------------------------------------------------------- +# RELEASE_NAME_GRAFANA="mon-grafana" +# CHART_REPO_NAME_GRAFANA="grafana" +# CHART_REPO_URL_GRAFANA="https://grafana.github.io/helm-charts" +# CHART_NAME_GRAFANA="grafana" +# VALUES_FILE_GRAFANA="$VALUES_FILE_PATH/grafana_values.yaml" + + +# ----------------------------------------------------------- +# Function to deploy or upgrade a Helm chart +# ----------------------------------------------------------- +deploy_chart() { + local release_name="$1" + local chart_repo_name="$2" + local chart_repo_url="$3" + local chart_name="$4" + local values_file="$5" + local namespace="$6" + + echo ">>> Deploying [${release_name}] from repo [${chart_repo_name}]..." + + # Add or update the Helm repo + echo "Adding/updating Helm repo: $chart_repo_name -> $chart_repo_url" + helm repo add "$chart_repo_name" "$chart_repo_url" || true + helm repo update + + # Create namespace if needed + echo "Creating namespace '$namespace' if it doesn't exist..." + kubectl get namespace "$namespace" >/dev/null 2>&1 || kubectl create namespace "$namespace" + + # Install or upgrade the chart + if [ -n "$values_file" ] && [ -f "$values_file" ]; then + echo "Installing/Upgrading $release_name using custom values from $values_file..." + helm upgrade --install "$release_name" "$chart_repo_name/$chart_name" \ + --namespace "$namespace" \ + --values "$values_file" + else + echo "Installing/Upgrading $release_name with default chart values..." + helm upgrade --install "$release_name" "$chart_repo_name/$chart_name" \ + --namespace "$namespace" + fi + + echo "<<< Deployment initiated for [$release_name]." + echo +} + + +# ----------------------------------------------------------- +# Actual Deployments +# ----------------------------------------------------------- + +# 1) Deploy Prometheus +deploy_chart "$RELEASE_NAME_PROM" \ + "$CHART_REPO_NAME_PROM" \ + "$CHART_REPO_URL_PROM" \ + "$CHART_NAME_PROM" \ + "$VALUES_FILE_PROM" \ + "$NAMESPACE" + +# Optionally wait for Prometheus server pod to become ready +kubectl rollout status deployment/"$RELEASE_NAME_PROM-server" -n "$NAMESPACE" || true + + +# 2) Deploy Mimir +deploy_chart "$RELEASE_NAME_MIMIR" \ + "$CHART_REPO_NAME_MIMIR" \ + "$CHART_REPO_URL_MIMIR" \ + "$CHART_NAME_MIMIR" \ + "$VALUES_FILE_MIMIR" \ + "$NAMESPACE" + +# Depending on how Mimir runs (StatefulSets, Deployments), you can wait for +# the correct resource to be ready. For example: +# kubectl rollout status statefulset/"$RELEASE_NAME_MIMIR-distributor" -n "$NAMESPACE" || true + + +# 3) Deploy Grafana +# deploy_chart "$RELEASE_NAME_GRAFANA" \ +# "$CHART_REPO_NAME_GRAFANA" \ +# "$CHART_REPO_URL_GRAFANA" \ +# "$CHART_NAME_GRAFANA" \ +# "$VALUES_FILE_GRAFANA" \ +# "$NAMESPACE" + +# kubectl rollout status deployment/"$RELEASE_NAME_GRAFANA" -n "$NAMESPACE" || true + +# ----------------------------------------------------------- +echo "All deployments completed!" + diff --git a/manifests/monitoring/grafana_values.yaml b/manifests/monitoring/grafana_values.yaml new file mode 100644 index 000000000..a2dbd7971 --- /dev/null +++ b/manifests/monitoring/grafana_values.yaml @@ -0,0 +1,235 @@ +rbac: + create: true + ## Use an existing ClusterRole/Role (depending on rbac.namespaced false/true) + # useExistingRole: name-of-some-role + # useExistingClusterRole: name-of-some-clusterRole + pspEnabled: false + pspUseAppArmor: false + namespaced: false + +serviceAccount: + create: true + name: + nameTest: + ## ServiceAccount labels. + automountServiceAccountToken: false + +replicas: 1 + +## Create a headless service for the deployment +headlessService: false + +## Should the service account be auto mounted on the pod +automountServiceAccountToken: true + +## Create HorizontalPodAutoscaler object for deployment type +# +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 3 + targetCPU: "60" + targetMemory: "" + behavior: {} + +deploymentStrategy: + type: RollingUpdate + +readinessProbe: + httpGet: + path: /api/health + port: 3000 + +livenessProbe: + httpGet: + path: /api/health + port: 3000 + initialDelaySeconds: 60 + timeoutSeconds: 30 + failureThreshold: 10 + +image: + registry: docker.io + repository: grafana/grafana + # Overrides the Grafana image tag whose default is the chart appVersion + tag: "" + sha: "" + pullPolicy: IfNotPresent + + ## Optionally specify an array of imagePullSecrets. + ## Secrets must be manually created in the namespace. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + ## Can be templated. + ## + pullSecrets: [] + # - myRegistrKeySecretName + +testFramework: + enabled: true + ## The type of Helm hook used to run this test. Defaults to test. + ## ref: https://helm.sh/docs/topics/charts_hooks/#the-available-hooks + ## + # hookType: test + image: + # -- The Docker registry + registry: docker.io + repository: bats/bats + tag: "v1.4.1" + imagePullPolicy: IfNotPresent + +# dns configuration for pod +dnsPolicy: ~ +dnsConfig: {} + # nameservers: + # - 8.8.8.8 + # options: + # - name: ndots + # value: "2" + # - name: edns0 + +securityContext: + runAsNonRoot: true + runAsUser: 472 + runAsGroup: 472 + fsGroup: 472 + +containerSecurityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +# Enable creating the grafana configmap +createConfigmap: true + +downloadDashboardsImage: + registry: docker.io + repository: curlimages/curl + tag: 8.9.1 + sha: "" + pullPolicy: IfNotPresent + +downloadDashboards: + env: {} + envFromSecret: "" + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + envValueFrom: {} + # ENV_NAME: + # configMapKeyRef: + # name: configmap-name + # key: value_key + +## Pod Annotations +# podAnnotations: {} + +## ConfigMap Annotations +# configMapAnnotations: {} + # argocd.argoproj.io/sync-options: Replace=true + +## Pod Labels +# podLabels: {} + +podPortName: grafana +gossipPortName: gossip +## Deployment annotations +# annotations: {} + +service: + enabled: true + type: NodePort + port: 80 + targetPort: 3000 + nodePort: 30080 + portName: service + +## Enable persistence using Persistent Volume Claims +## ref: https://kubernetes.io/docs/user-guide/persistent-volumes/ +## +persistence: + type: pvc + enabled: true + # storageClassName: default + accessModes: + - ReadWriteOnce + size: 10Gi + # annotations: {} + finalizers: + - kubernetes.io/pvc-protection + + disableWarning: false + + ## If 'lookupVolumeName' is set to true, Helm will attempt to retrieve + ## the current value of 'spec.volumeName' and incorporate it into the template. + lookupVolumeName: true + +# Administrator credentials when not using an existing secret (see below) +adminUser: admin +# adminPassword: strongpassword + +# Use an existing secret for the admin user. +admin: + ## Name of the secret. Can be templated. + existingSecret: "" + userKey: admin-user + passwordKey: admin-password + +## Configure grafana datasources +## ref: http://docs.grafana.org/administration/provisioning/#datasources +## +datasources: + datasources.yaml: + apiVersion: 1 + datasources: + - name: Prometheus + type: prometheus + url: http://mon-prometheus-server.monitoring.svc.cluster.local + access: proxy + isDefault: true + - name: Mimir + type: prometheus + url: http://mimir-nginx.mon-mimir.svc:80/prometheus + access: proxy + isDefault: false + +## Grafana's primary configuration +## NOTE: values in map will be converted to ini format +## ref: http://docs.grafana.org/installation/configuration/ +## +grafana.ini: + paths: + data: /var/lib/grafana/ + logs: /var/log/grafana + plugins: /var/lib/grafana/plugins + provisioning: /etc/grafana/provisioning + analytics: + check_for_updates: true + log: + mode: console + grafana_net: + url: https://grafana.net + server: + domain: "{{ if (and .Values.ingress.enabled .Values.ingress.hosts) }}{{ tpl (.Values.ingress.hosts | first) . }}{{ else }}''{{ end }}" + +## Number of old ReplicaSets to retain +## +revisionHistoryLimit: 5 + +# assertNoLeakedSecrets is a helper function defined in _helpers.tpl that checks if secret +# values are not exposed in the rendered grafana.ini configmap. It is enabled by default. +# +# To pass values into grafana.ini without exposing them in a configmap, use variable expansion: +# https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#variable-expansion +# +# Alternatively, if you wish to allow secret values to be exposed in the rendered grafana.ini configmap, +# you can disable this check by setting assertNoLeakedSecrets to false. +assertNoLeakedSecrets: true + diff --git a/manifests/prometheus/prometheus.yaml b/manifests/monitoring/prometheus_values.yaml similarity index 100% rename from manifests/prometheus/prometheus.yaml rename to manifests/monitoring/prometheus_values.yaml diff --git a/src/telemetry/backend/collectors/emulated/SyntheticMetricsGenerator.py b/src/telemetry/backend/collectors/emulated/SyntheticMetricsGenerator.py index a01e2c0e6..77d998432 100644 --- a/src/telemetry/backend/collectors/emulated/SyntheticMetricsGenerator.py +++ b/src/telemetry/backend/collectors/emulated/SyntheticMetricsGenerator.py @@ -98,7 +98,7 @@ class SyntheticMetricsGenerator(): return (time.time(), resource_key, requested_metrics) - def metric_id_mapper(self, sample_type_ids, metric_dict): + def metric_id_mapper(self, sample_type_ids, metric_dict): # TODO: Add a dynamic mappper from kpi_sample_type ID to name... """ Maps the sample type IDs to the corresponding metric names. diff --git a/src/telemetry/backend/service/TelemetryBackendService.py b/src/telemetry/backend/service/TelemetryBackendService.py index 40cd1443a..3aeee8238 100755 --- a/src/telemetry/backend/service/TelemetryBackendService.py +++ b/src/telemetry/backend/service/TelemetryBackendService.py @@ -49,7 +49,7 @@ class TelemetryBackendService(GenericGrpcService): self.kafka_consumer = KafkaConsumer({'bootstrap.servers' : KafkaConfig.get_kafka_address(), 'group.id' : 'backend', 'auto.offset.reset' : 'latest'}) - self.collector = EmulatedCollector(address="127.0.0.1", port=8000) + self.collector = None self.context_client = ContextClient() self.kpi_manager_client = KpiManagerClient() self.active_jobs = {} @@ -124,11 +124,11 @@ class TelemetryBackendService(GenericGrpcService): Method to handle collector request. """ device_type, end_points = self.get_endpoint_detail(kpi_id) - # end_points : dict = self.get_endpoints_from_kpi_id(kpi_id) + if end_points is None: LOGGER.warning("KPI ID: {:} - Endpoints not found. Skipping...".format(kpi_id)) return - # device_type : str = self.get_device_type_from_kpi_id(kpi_id) + if device_type and "emu" in device_type: LOGGER.info("KPI ID: {:} - Device Type: {:} - Endpoints: {:}".format(kpi_id, device_type, end_points)) subscription = [collector_id, end_points, duration, interval] @@ -139,6 +139,7 @@ class TelemetryBackendService(GenericGrpcService): def EmulatedCollectorHandler(self, subscription, duration, collector_id, kpi_id, stop_event): # EmulatedCollector + self.collector = EmulatedCollector(address="127.0.0.1", port=8000) self.collector.Connect() if not self.collector.SubscribeState(subscription): LOGGER.warning("KPI ID: {:} - Subscription failed. Skipping...".format(kpi_id)) -- GitLab