From 0306b222f8e25768b0a934ae4d49e7f47e9be177 Mon Sep 17 00:00:00 2001 From: Waleed Akbar <wakbar@cttc.es> Date: Mon, 30 Sep 2024 16:14:32 +0000 Subject: [PATCH] Changes to Address Minor Issues in Monitoring Services Deployment and Build: - Improved Kafka scripts. - Updated manifests to handle ports for backend and frontend. - Changed the return type of a method in settings to `int`. - Added DB deployment in the service's main file. --- deploy/kafka.sh | 14 +- manifests/analyticsservice.yaml | 12 +- manifests/servicemonitors.yaml | 153 +++++++++++++++++++++ manifests/telemetryservice.yaml | 12 +- src/analytics/backend/service/__main__.py | 2 +- src/analytics/frontend/service/__main__.py | 14 +- src/analytics/tests/test_analytics_db.py | 2 +- src/common/Settings.py | 10 +- src/common/tools/kafka/Variables.py | 10 +- src/kpi_manager/service/__main__.py | 13 +- src/kpi_manager/tests/test_kpi_db.py | 2 +- src/telemetry/backend/service/__main__.py | 2 +- src/telemetry/frontend/service/__main__.py | 12 +- src/telemetry/tests/test_telemetryDB.py | 2 +- 14 files changed, 230 insertions(+), 30 deletions(-) diff --git a/deploy/kafka.sh b/deploy/kafka.sh index 0483bce15..4cbcdb701 100755 --- a/deploy/kafka.sh +++ b/deploy/kafka.sh @@ -47,10 +47,10 @@ function kafka_deploy() { cp "${KFK_MANIFESTS_PATH}/${KFK_MANIFEST}" "${TMP_MANIFESTS_FOLDER}/${KFK_MANIFEST}" # echo "Apache Kafka Namespace" - echo ">>> Delete Apache Kafka Namespace" + echo "Delete Apache Kafka Namespace" kubectl delete namespace ${KFK_NAMESPACE} --ignore-not-found - echo ">>> Create Apache Kafka Namespace" + echo "Create Apache Kafka Namespace" kubectl create namespace ${KFK_NAMESPACE} # echo ">>> Deplying Apache Kafka Zookeeper" @@ -76,15 +76,15 @@ function kafka_deploy() { # fi } -echo "Apache Kafka" -echo ">>> Checking if Apache Kafka is deployed ... " +echo ">>> Apache Kafka" +echo "Checking if Apache Kafka is deployed ... " if [ "$KFK_REDEPLOY" == "YES" ]; then - echo ">>> Redeploying kafka namespace" + echo "Redeploying kafka namespace" kafka_deploy elif kubectl get namespace "${KFK_NAMESPACE}" &> /dev/null; then - echo ">>> Apache Kafka already present; skipping step." + echo "Apache Kafka already present; skipping step." else - echo ">>> Kafka namespace doesn't exists. Deploying kafka namespace" + echo "Kafka namespace doesn't exists. Deploying kafka namespace" kafka_deploy fi echo diff --git a/manifests/analyticsservice.yaml b/manifests/analyticsservice.yaml index 7340dff5f..e15214dbd 100644 --- a/manifests/analyticsservice.yaml +++ b/manifests/analyticsservice.yaml @@ -39,6 +39,8 @@ spec: value: "INFO" - name: CRDB_DATABASE value: "tfs_analytics" + - name: METRICS_PORT + value: "9192" envFrom: - secretRef: name: crdb-analytics @@ -62,10 +64,12 @@ spec: imagePullPolicy: Always ports: - containerPort: 30090 - - containerPort: 9192 + - containerPort: 9193 env: - name: LOG_LEVEL value: "INFO" + - name: METRICS_PORT + value: "9193" envFrom: - secretRef: name: kfk-kpi-data @@ -102,10 +106,14 @@ spec: protocol: TCP port: 30090 targetPort: 30090 - - name: metrics + - name: metrics-frontend protocol: TCP port: 9192 targetPort: 9192 + - name: metrics-backend + protocol: TCP + port: 9193 + targetPort: 9193 --- apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler diff --git a/manifests/servicemonitors.yaml b/manifests/servicemonitors.yaml index 716c1c689..8a8fe6f39 100644 --- a/manifests/servicemonitors.yaml +++ b/manifests/servicemonitors.yaml @@ -475,3 +475,156 @@ spec: any: false matchNames: - tfs # namespace where the app is running +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + namespace: monitoring # namespace where prometheus is running + name: tfs-analyticsservice-metric + labels: + app: analyticsservice + #release: prometheus + #release: prom # name of the release + # ( VERY IMPORTANT: You need to know the correct release name by viewing + # the servicemonitor of Prometheus itself: Without the correct name, + # Prometheus cannot identify the metrics of the Flask app as the target.) +spec: + selector: + matchLabels: + # Target app service + #namespace: tfs + app: analyticsservice # same as above + #release: prometheus # same as above + endpoints: + - port: metrics-frontend # named port in target app + scheme: http + path: /metrics # path to scrape + interval: 5s # scrape interval + - port: metrics-backend # named port in target app + scheme: http + path: /metrics # path to scrape + interval: 5s # scrape interval + namespaceSelector: + any: false + matchNames: + - tfs # namespace where the app is running +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + namespace: monitoring # namespace where prometheus is running + name: tfs-telemetryservice-metric + labels: + app: telemetryservice + #release: prometheus + #release: prom # name of the release + # ( VERY IMPORTANT: You need to know the correct release name by viewing + # the servicemonitor of Prometheus itself: Without the correct name, + # Prometheus cannot identify the metrics of the Flask app as the target.) +spec: + selector: + matchLabels: + # Target app service + #namespace: tfs + app: telemetryservice # same as above + #release: prometheus # same as above + endpoints: + - port: metrics-frontend # named port in target app + scheme: http + path: /metrics # path to scrape + interval: 5s # scrape interval + - port: metrics-backend # named port in target app + scheme: http + path: /metrics # path to scrape + interval: 5s # scrape interval + namespaceSelector: + any: false + matchNames: + - tfs # namespace where the app is running +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + namespace: monitoring # namespace where prometheus is running + name: tfs-kpi-managerservice-metric + labels: + app: kpi-managerservice + #release: prometheus + #release: prom # name of the release + # ( VERY IMPORTANT: You need to know the correct release name by viewing + # the servicemonitor of Prometheus itself: Without the correct name, + # Prometheus cannot identify the metrics of the Flask app as the target.) +spec: + selector: + matchLabels: + # Target app service + #namespace: tfs + app: kpi-managerservice # same as above + #release: prometheus # same as above + endpoints: + - port: metrics # named port in target app + scheme: http + path: /metrics # path to scrape + interval: 5s # scrape interval + namespaceSelector: + any: false + matchNames: + - tfs # namespace where the app is running +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + namespace: monitoring # namespace where prometheus is running + name: tfs-kpi_value_apiservice-metric + labels: + app: kpi_value_apiservice + #release: prometheus + #release: prom # name of the release + # ( VERY IMPORTANT: You need to know the correct release name by viewing + # the servicemonitor of Prometheus itself: Without the correct name, + # Prometheus cannot identify the metrics of the Flask app as the target.) +spec: + selector: + matchLabels: + # Target app service + #namespace: tfs + app: kpi_value_apiservice # same as above + #release: prometheus # same as above + endpoints: + - port: metrics # named port in target app + scheme: http + path: /metrics # path to scrape + interval: 5s # scrape interval + namespaceSelector: + any: false + matchNames: + - tfs # namespace where the app is running +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + namespace: monitoring # namespace where prometheus is running + name: tfs-kpi_value_writerservice-metric + labels: + app: kpi_value_writerservice + #release: prometheus + #release: prom # name of the release + # ( VERY IMPORTANT: You need to know the correct release name by viewing + # the servicemonitor of Prometheus itself: Without the correct name, + # Prometheus cannot identify the metrics of the Flask app as the target.) +spec: + selector: + matchLabels: + # Target app service + #namespace: tfs + app: kpi_value_writerservice # same as above + #release: prometheus # same as above + endpoints: + - port: metrics # named port in target app + scheme: http + path: /metrics # path to scrape + interval: 5s # scrape interval + namespaceSelector: + any: false + matchNames: + - tfs # namespace where the app is running diff --git a/manifests/telemetryservice.yaml b/manifests/telemetryservice.yaml index 7c781bb3d..9c979713d 100644 --- a/manifests/telemetryservice.yaml +++ b/manifests/telemetryservice.yaml @@ -39,6 +39,8 @@ spec: value: "INFO" - name: CRDB_DATABASE value: "tfs_kpi" + - name: METRICS_PORT + value: "9192" envFrom: - secretRef: name: crdb-telemetry @@ -62,10 +64,12 @@ spec: imagePullPolicy: Always ports: - containerPort: 30060 - - containerPort: 9192 + - containerPort: 9193 env: - name: LOG_LEVEL value: "INFO" + - name: METRICS_PORT + value: "9193" envFrom: - secretRef: name: kfk-kpi-data @@ -102,10 +106,14 @@ spec: protocol: TCP port: 30060 targetPort: 30060 - - name: metrics + - name: metrics-frontend protocol: TCP port: 9192 targetPort: 9192 + - name: metrics-backend + protocol: TCP + port: 9193 + targetPort: 9193 --- apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler diff --git a/src/analytics/backend/service/__main__.py b/src/analytics/backend/service/__main__.py index 3c4c36b7c..9b1941e20 100644 --- a/src/analytics/backend/service/__main__.py +++ b/src/analytics/backend/service/__main__.py @@ -37,7 +37,7 @@ def main(): LOGGER.info('Starting...') # Start metrics server - metrics_port = get_metrics_port() + metrics_port = int(get_metrics_port()) start_http_server(metrics_port) grpc_service = AnalyticsBackendService() diff --git a/src/analytics/frontend/service/__main__.py b/src/analytics/frontend/service/__main__.py index 6c331844f..bf94eb5e6 100644 --- a/src/analytics/frontend/service/__main__.py +++ b/src/analytics/frontend/service/__main__.py @@ -16,9 +16,14 @@ import logging, signal, sys, threading from prometheus_client import start_http_server from common.Settings import get_log_level, get_metrics_port from .AnalyticsFrontendService import AnalyticsFrontendService +from analytics.database.AnalyzerModel import Analyzer as Model +from common.tools.database.GenericDatabase import Database +from common.Settings import get_setting + +DB_NAME = get_setting('CRDB_DATABASE', default=None) terminate = threading.Event() -LOGGER = None +LOGGER = None def signal_handler(signal, frame): # pylint: disable=redefined-outer-name LOGGER.warning('Terminate signal received') @@ -36,8 +41,13 @@ def main(): LOGGER.info('Starting...') + # To create DB + kpiDBobj = Database(DB_NAME, Model) + kpiDBobj.create_database() + kpiDBobj.create_tables() + # Start metrics server - metrics_port = get_metrics_port() + metrics_port = int(get_metrics_port()) start_http_server(metrics_port) grpc_service = AnalyticsFrontendService() diff --git a/src/analytics/tests/test_analytics_db.py b/src/analytics/tests/test_analytics_db.py index f944fc0b5..58e7d0167 100644 --- a/src/analytics/tests/test_analytics_db.py +++ b/src/analytics/tests/test_analytics_db.py @@ -14,7 +14,7 @@ import logging -from analytics.database.new_Analyzer_DB import AnalyzerDB +from analytics.database.Analyzer_DB import AnalyzerDB LOGGER = logging.getLogger(__name__) diff --git a/src/common/Settings.py b/src/common/Settings.py index eaeb363ad..936c0387b 100644 --- a/src/common/Settings.py +++ b/src/common/Settings.py @@ -79,12 +79,12 @@ def get_service_host(service_name : ServiceNameEnum): def get_service_port_grpc(service_name : ServiceNameEnum): envvar_name = get_env_var_name(service_name, ENVVAR_SUFIX_SERVICE_PORT_GRPC) default_value = DEFAULT_SERVICE_GRPC_PORTS.get(service_name.value) - return get_setting(envvar_name, default=default_value) + return int(get_setting(envvar_name, default=default_value)) def get_service_port_http(service_name : ServiceNameEnum): envvar_name = get_env_var_name(service_name, ENVVAR_SUFIX_SERVICE_PORT_HTTP) default_value = DEFAULT_SERVICE_HTTP_PORTS.get(service_name.value) - return get_setting(envvar_name, default=default_value) + return int(get_setting(envvar_name, default=default_value)) def get_service_baseurl_http(service_name : ServiceNameEnum): envvar_name = get_env_var_name(service_name, ENVVAR_SUFIX_SERVICE_BASEURL_HTTP) @@ -95,16 +95,16 @@ def get_log_level(): return get_setting(ENVVAR_LOG_LEVEL, default=DEFAULT_LOG_LEVEL) def get_metrics_port(): - return get_setting(ENVVAR_METRICS_PORT, default=DEFAULT_METRICS_PORT) + return int(get_setting(ENVVAR_METRICS_PORT, default=DEFAULT_METRICS_PORT)) def get_grpc_bind_address(): return get_setting(ENVVAR_GRPC_BIND_ADDRESS, default=DEFAULT_GRPC_BIND_ADDRESS) def get_grpc_max_workers(): - return get_setting(ENVVAR_GRPC_MAX_WORKERS, default=DEFAULT_GRPC_MAX_WORKERS) + return int(get_setting(ENVVAR_GRPC_MAX_WORKERS, default=DEFAULT_GRPC_MAX_WORKERS)) def get_grpc_grace_period(): - return get_setting(ENVVAR_GRPC_GRACE_PERIOD, default=DEFAULT_GRPC_GRACE_PERIOD) + return int(get_setting(ENVVAR_GRPC_GRACE_PERIOD, default=DEFAULT_GRPC_GRACE_PERIOD)) def get_http_bind_address(): return get_setting(ENVVAR_HTTP_BIND_ADDRESS, default=DEFAULT_HTTP_BIND_ADDRESS) diff --git a/src/common/tools/kafka/Variables.py b/src/common/tools/kafka/Variables.py index fc43c3151..73b633e23 100644 --- a/src/common/tools/kafka/Variables.py +++ b/src/common/tools/kafka/Variables.py @@ -25,11 +25,11 @@ class KafkaConfig(Enum): @staticmethod def get_kafka_address() -> str: - # kafka_server_address = get_setting('KFK_SERVER_ADDRESS', default=None) - # if kafka_server_address is None: - KFK_NAMESPACE = get_setting('KFK_NAMESPACE') - KFK_PORT = get_setting('KFK_SERVER_PORT') - kafka_server_address = KFK_SERVER_ADDRESS_TEMPLATE.format(KFK_NAMESPACE, KFK_PORT) + kafka_server_address = get_setting('KFK_SERVER_ADDRESS', default=None) + if kafka_server_address is None: + KFK_NAMESPACE = get_setting('KFK_NAMESPACE') + KFK_PORT = get_setting('KFK_SERVER_PORT') + kafka_server_address = KFK_SERVER_ADDRESS_TEMPLATE.format(KFK_NAMESPACE, KFK_PORT) return kafka_server_address @staticmethod diff --git a/src/kpi_manager/service/__main__.py b/src/kpi_manager/service/__main__.py index 244d5afa3..6a3d078e2 100644 --- a/src/kpi_manager/service/__main__.py +++ b/src/kpi_manager/service/__main__.py @@ -16,8 +16,14 @@ import logging, signal, sys, threading from common.Settings import get_log_level from .KpiManagerService import KpiManagerService +from kpi_manager.database.KpiModel import Kpi as Model +from common.tools.database.GenericDatabase import Database +from common.Settings import get_setting + + +DB_NAME = get_setting('CRDB_DATABASE', default=None) terminate = threading.Event() -LOGGER = None +LOGGER = None def signal_handler(signal, frame): # pylint: disable=redefined-outer-name LOGGER.warning('Terminate signal received') @@ -35,6 +41,11 @@ def main(): LOGGER.debug('Starting...') + # To create DB + kpiDBobj = Database(DB_NAME, Model) + kpiDBobj.create_database() + kpiDBobj.create_tables() + grpc_service = KpiManagerService() grpc_service.start() diff --git a/src/kpi_manager/tests/test_kpi_db.py b/src/kpi_manager/tests/test_kpi_db.py index b9c3b2d7c..b1513a83f 100644 --- a/src/kpi_manager/tests/test_kpi_db.py +++ b/src/kpi_manager/tests/test_kpi_db.py @@ -18,7 +18,7 @@ import logging from common.proto.kpi_manager_pb2 import KpiDescriptorList from .test_messages import create_kpi_filter_request from kpi_manager.database.KpiModel import Kpi as KpiModel -from kpi_manager.database.new_KpiDB import KpiDB +from kpi_manager.database.KpiDB import KpiDB # from common.tools.database.GenericDatabase import Database LOGGER = logging.getLogger(__name__) diff --git a/src/telemetry/backend/service/__main__.py b/src/telemetry/backend/service/__main__.py index 9ec9e191f..8af209e2c 100644 --- a/src/telemetry/backend/service/__main__.py +++ b/src/telemetry/backend/service/__main__.py @@ -37,7 +37,7 @@ def main(): LOGGER.info('Starting...') # Start metrics server - metrics_port = get_metrics_port() + metrics_port = int(get_metrics_port()) start_http_server(metrics_port) grpc_service = TelemetryBackendService() diff --git a/src/telemetry/frontend/service/__main__.py b/src/telemetry/frontend/service/__main__.py index 2a6c5dbcf..60b96646a 100644 --- a/src/telemetry/frontend/service/__main__.py +++ b/src/telemetry/frontend/service/__main__.py @@ -16,7 +16,12 @@ import logging, signal, sys, threading from prometheus_client import start_http_server from common.Settings import get_log_level, get_metrics_port from .TelemetryFrontendService import TelemetryFrontendService +from telemetry.database.TelemetryModel import Collector as Model +from common.tools.database.GenericDatabase import Database +from common.Settings import get_setting + +DB_NAME = get_setting('CRDB_DATABASE', default=None) terminate = threading.Event() LOGGER = None @@ -36,8 +41,13 @@ def main(): LOGGER.info('Starting...') + # To create DB + kpiDBobj = Database(DB_NAME, Model) + kpiDBobj.create_database() + kpiDBobj.create_tables() + # Start metrics server - metrics_port = get_metrics_port() + metrics_port = int(get_metrics_port()) start_http_server(metrics_port) grpc_service = TelemetryFrontendService() diff --git a/src/telemetry/tests/test_telemetryDB.py b/src/telemetry/tests/test_telemetryDB.py index 707e6b5b2..0d5bd0584 100644 --- a/src/telemetry/tests/test_telemetryDB.py +++ b/src/telemetry/tests/test_telemetryDB.py @@ -14,7 +14,7 @@ import logging -from telemetry.database.new_Telemetry_DB import TelemetryDB +from telemetry.database.Telemetry_DB import TelemetryDB LOGGER = logging.getLogger(__name__) -- GitLab