From 798d205233780adb772883c5cc6868c4e56436ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ara=C3=BAjo?= <davidaraujo@av.it.pt> Date: Tue, 13 Feb 2024 01:16:40 +0000 Subject: [PATCH] Clean up default variables set code refactoring NATS cluster complete Startup Probe failling in NATS cluster mode Cockroach cluster operator and NATS cluster mode Update Update scheduling policy for CRDB NATS cluster mode Testing CRDB cluster with node affinity Revert "Testing dynamic node resources" This reverts commit 856eb4799d2136697c721b387e6fca9fdcdbf5fd. Testing dynamic node resources --- deploy/all.sh | 9 +++ deploy/crdb.sh | 2 +- deploy/nats.sh | 115 +++++++++++++++++++++++++--- manifests/cockroachdb/cluster.yaml | 36 ++++----- manifests/cockroachdb/operator.yaml | 2 + manifests/nats/cluster.yaml | 34 ++++++++ my_deploy.sh | 7 +- 7 files changed, 175 insertions(+), 30 deletions(-) create mode 100644 manifests/nats/cluster.yaml diff --git a/deploy/all.sh b/deploy/all.sh index 876928c4b..50a6c0816 100755 --- a/deploy/all.sh +++ b/deploy/all.sh @@ -107,6 +107,15 @@ export NATS_EXT_PORT_CLIENT=${NATS_EXT_PORT_CLIENT:-"4222"} # If not already set, set the external port NATS HTTP Mgmt GUI interface will be exposed to. export NATS_EXT_PORT_HTTP=${NATS_EXT_PORT_HTTP:-"8222"} +# TESTING +# If not already set, set NATS installation mode. Accepted values are: 'single' and 'cluster'. +# - If NATS_DEPLOY_MODE is "single", NATS is deployed in single node mode. It is convenient for +# development and testing purposes and should fit in a VM. IT SHOULD NOT BE USED IN PRODUCTION ENVIRONMENTS. +# - If NATS_DEPLOY_MODE is "cluster", NATS is deployed in cluster mode, and an entire NATS cluster +# with 3 replicas (set by default) will be deployed. It is convenient for production and +# provides scalability features. +export NATS_DEPLOY_MODE=${NATS_DEPLOY_MODE:-"single"} + # If not already set, disable flag for re-deploying NATS from scratch. # WARNING: ACTIVATING THIS FLAG IMPLIES LOOSING THE MESSAGE BROKER INFORMATION! # If NATS_REDEPLOY is "YES", the message broker will be dropped while checking/deploying NATS. diff --git a/deploy/crdb.sh b/deploy/crdb.sh index d81c7c703..2a8bd88d3 100755 --- a/deploy/crdb.sh +++ b/deploy/crdb.sh @@ -228,7 +228,7 @@ function crdb_deploy_cluster() { kubectl create namespace ${CRDB_NAMESPACE} echo - echo "CockroachDB" + echo "CockroachDB (cluster-mode)" echo ">>> Checking if CockroachDB is deployed..." if kubectl get --namespace ${CRDB_NAMESPACE} statefulset/cockroachdb &> /dev/null; then echo ">>> CockroachDB is present; skipping step." diff --git a/deploy/nats.sh b/deploy/nats.sh index 3e2374fc7..d6922d86b 100755 --- a/deploy/nats.sh +++ b/deploy/nats.sh @@ -31,16 +31,32 @@ export NATS_EXT_PORT_CLIENT=${NATS_EXT_PORT_CLIENT:-"4222"} # If not already set, set the external port NATS HTTP Mgmt GUI interface will be exposed to. export NATS_EXT_PORT_HTTP=${NATS_EXT_PORT_HTTP:-"8222"} +# TESTING +# If not already set, set NATS installation mode. Accepted values are: 'single' and 'cluster'. +# - If NATS_DEPLOY_MODE is "single", NATS is deployed in single node mode. It is convenient for +# development and testing purposes and should fit in a VM. IT SHOULD NOT BE USED IN PRODUCTION ENVIRONMENTS. +# - If NATS_DEPLOY_MODE is "cluster", NATS is deployed in cluster mode, and an entire NATS cluster +# with 3 replicas (set by default) will be deployed. It is convenient for production and +# provides scalability features. +export NATS_DEPLOY_MODE=${NATS_DEPLOY_MODE:-"single"} + # If not already set, disable flag for re-deploying NATS from scratch. # WARNING: ACTIVATING THIS FLAG IMPLIES LOOSING THE MESSAGE BROKER INFORMATION! # If NATS_REDEPLOY is "YES", the message broker will be dropped while checking/deploying NATS. export NATS_REDEPLOY=${NATS_REDEPLOY:-""} - ######################################################################################################################## # Automated steps start here ######################################################################################################################## +# Constants +TMP_FOLDER="./tmp" +NATS_MANIFESTS_PATH="manifests/nats" + +# Create a tmp folder for files modified during the deployment +TMP_MANIFESTS_FOLDER="${TMP_FOLDER}/${NATS_NAMESPACE}/manifests" +mkdir -p $TMP_MANIFESTS_FOLDER + function nats_deploy_single() { echo "NATS Namespace" echo ">>> Create NATS Namespace (if missing)" @@ -51,20 +67,85 @@ function nats_deploy_single() { helm3 repo add nats https://nats-io.github.io/k8s/helm/charts/ echo + echo "Install NATS (single-node)" + echo ">>> Checking if NATS is deployed..." + if kubectl get --namespace ${NATS_NAMESPACE} statefulset/${NATS_NAMESPACE} &> /dev/null; then + echo ">>> NATS is present; skipping step." + else + echo ">>> Deploy NATS" + helm3 install ${NATS_NAMESPACE} nats/nats --namespace ${NATS_NAMESPACE} --set nats.image=nats:2.9-alpine + + echo ">>> Waiting NATS statefulset to be created..." + while ! kubectl get --namespace ${NATS_NAMESPACE} statefulset/${NATS_NAMESPACE} &> /dev/null; do + printf "%c" "." + sleep 1 + done + + # Wait for statefulset condition "Available=True" does not work + # Wait for statefulset condition "jsonpath='{.status.readyReplicas}'=3" throws error: + # "error: readyReplicas is not found" + # Workaround: Check the pods are ready + #echo ">>> NATS statefulset created. Waiting for readiness condition..." + #kubectl wait --namespace ${NATS_NAMESPACE} --for=condition=Available=True --timeout=300s statefulset/nats + #kubectl wait --namespace ${NATS_NAMESPACE} --for=jsonpath='{.status.readyReplicas}'=3 --timeout=300s \ + # statefulset/nats + echo ">>> NATS statefulset created. Waiting NATS pods to be created..." + while ! kubectl get --namespace ${NATS_NAMESPACE} pod/${NATS_NAMESPACE}-0 &> /dev/null; do + printf "%c" "." + sleep 1 + done + kubectl wait --namespace ${NATS_NAMESPACE} --for=condition=Ready --timeout=300s pod/${NATS_NAMESPACE}-0 + fi + echo + + echo "NATS Port Mapping" + echo ">>> Expose NATS Client port (4222->${NATS_EXT_PORT_CLIENT})" + NATS_PORT_CLIENT=$(kubectl --namespace ${NATS_NAMESPACE} get service ${NATS_NAMESPACE} -o 'jsonpath={.spec.ports[?(@.name=="client")].port}') + PATCH='{"data": {"'${NATS_EXT_PORT_CLIENT}'": "'${NATS_NAMESPACE}'/'${NATS_NAMESPACE}':'${NATS_PORT_CLIENT}'"}}' + kubectl patch configmap nginx-ingress-tcp-microk8s-conf --namespace ingress --patch "${PATCH}" + + PORT_MAP='{"containerPort": '${NATS_EXT_PORT_CLIENT}', "hostPort": '${NATS_EXT_PORT_CLIENT}'}' + CONTAINER='{"name": "nginx-ingress-microk8s", "ports": ['${PORT_MAP}']}' + PATCH='{"spec": {"template": {"spec": {"containers": ['${CONTAINER}']}}}}' + kubectl patch daemonset nginx-ingress-microk8s-controller --namespace ingress --patch "${PATCH}" + echo + + echo ">>> Expose NATS HTTP Mgmt GUI port (8222->${NATS_EXT_PORT_HTTP})" + NATS_PORT_HTTP=$(kubectl --namespace ${NATS_NAMESPACE} get service ${NATS_NAMESPACE} -o 'jsonpath={.spec.ports[?(@.name=="monitor")].port}') + PATCH='{"data": {"'${NATS_EXT_PORT_HTTP}'": "'${NATS_NAMESPACE}'/'${NATS_NAMESPACE}':'${NATS_PORT_HTTP}'"}}' + kubectl patch configmap nginx-ingress-tcp-microk8s-conf --namespace ingress --patch "${PATCH}" + + PORT_MAP='{"containerPort": '${NATS_EXT_PORT_HTTP}', "hostPort": '${NATS_EXT_PORT_HTTP}'}' + CONTAINER='{"name": "nginx-ingress-microk8s", "ports": ['${PORT_MAP}']}' + PATCH='{"spec": {"template": {"spec": {"containers": ['${CONTAINER}']}}}}' + kubectl patch daemonset nginx-ingress-microk8s-controller --namespace ingress --patch "${PATCH}" + echo +} + + +function nats_deploy_cluster() { + echo "NATS Namespace" + echo ">>> Create NATS Namespace (if missing)" + kubectl create namespace ${NATS_NAMESPACE} + echo + + echo "Add NATS Helm Chart" + helm3 repo add nats https://nats-io.github.io/k8s/helm/charts/ + echo + echo "Upgrade NATS Helm Chart" helm3 repo update nats echo - echo "Install NATS (single-node)" + echo "Install NATS (cluster-mode)" echo ">>> Checking if NATS is deployed..." if kubectl get --namespace ${NATS_NAMESPACE} statefulset/${NATS_NAMESPACE} &> /dev/null; then echo ">>> NATS is present; skipping step." else - # TESTING - Experimenting with cluster echo ">>> Deploy NATS" - helm3 install ${NATS_NAMESPACE} nats/nats --namespace ${NATS_NAMESPACE} --set nats.image=nats:2.9-alpine --set config.cluster.enabled=true --set config.cluster.tls.enabled=true - - + cp "${NATS_MANIFESTS_PATH}/cluster.yaml" "${TMP_MANIFESTS_FOLDER}/nats_cluster.yaml" + helm3 install ${NATS_NAMESPACE} nats/nats --namespace ${NATS_NAMESPACE} -f "${TMP_MANIFESTS_FOLDER}/nats_cluster.yaml" + echo ">>> Waiting NATS statefulset to be created..." while ! kubectl get --namespace ${NATS_NAMESPACE} statefulset/${NATS_NAMESPACE} &> /dev/null; do printf "%c" "." @@ -84,7 +165,17 @@ function nats_deploy_single() { printf "%c" "." sleep 1 done + while ! kubectl get --namespace ${NATS_NAMESPACE} pod/${NATS_NAMESPACE}-1 &> /dev/null; do + printf "%c" "." + sleep 1 + done + while ! kubectl get --namespace ${NATS_NAMESPACE} pod/${NATS_NAMESPACE}-2 &> /dev/null; do + printf "%c" "." + sleep 1 + done kubectl wait --namespace ${NATS_NAMESPACE} --for=condition=Ready --timeout=300s pod/${NATS_NAMESPACE}-0 + kubectl wait --namespace ${NATS_NAMESPACE} --for=condition=Ready --timeout=300s pod/${NATS_NAMESPACE}-1 + kubectl wait --namespace ${NATS_NAMESPACE} --for=condition=Ready --timeout=300s pod/${NATS_NAMESPACE}-2 fi echo @@ -116,7 +207,7 @@ function nats_deploy_single() { echo } -function nats_undeploy_single() { +function nats_undeploy() { echo "NATS" echo ">>> Checking if NATS is deployed..." if kubectl get --namespace ${NATS_NAMESPACE} statefulset/${NATS_NAMESPACE} &> /dev/null; then @@ -134,7 +225,13 @@ function nats_undeploy_single() { } if [ "$NATS_REDEPLOY" == "YES" ] || [ "$REDEPLOYALL" == "YES" ]; then - nats_undeploy_single + nats_undeploy fi -nats_deploy_single +if [ "$NATS_DEPLOY_MODE" == "single" ]; then + nats_deploy_single +elif [ "$NATS_DEPLOY_MODE" == "cluster" ]; then + nats_deploy_cluster +else + echo "Unsupported value: NATS_DEPLOY_MODE=$NATS_DEPLOY_MODE" +fi \ No newline at end of file diff --git a/manifests/cockroachdb/cluster.yaml b/manifests/cockroachdb/cluster.yaml index 4d9ef0f84..73875ca3f 100644 --- a/manifests/cockroachdb/cluster.yaml +++ b/manifests/cockroachdb/cluster.yaml @@ -33,14 +33,16 @@ spec: resources: requests: # This is intentionally low to make it work on local k3d clusters. - cpu: 4 - memory: 4Gi + # TESTING + cpu: 1 #4 + memory: 500Mi #4Gi limits: - cpu: 8 - memory: 8Gi + # TESTING + cpu: 1 #8 + memory: 1Gi #8Gi tlsEnabled: true -# You can set either a version of the db or a specific image name -# cockroachDBVersion: v22.2.8 + # You can set either a version of the db or a specific image name + # cockroachDBVersion: v22.2.8 image: name: cockroachdb/cockroach:v22.2.8 # nodes refers to the number of crdb pods that are created @@ -49,21 +51,17 @@ spec: additionalLabels: crdb: is-cool # affinity is a new API field that is behind a feature gate that is - # disabled by default. To enable please see the operator.yaml file. + # disabled by default. To enable please see the operator.yaml file. # The affinity field will accept any podSpec affinity rule. - # affinity: - # podAntiAffinity: - # preferredDuringSchedulingIgnoredDuringExecution: - # - weight: 100 - # podAffinityTerm: - # labelSelector: - # matchExpressions: - # - key: app.kubernetes.io/instance - # operator: In - # values: - # - cockroachdb - # topologyKey: kubernetes.io/hostname + # TESTING: Force one pod per node, if possible + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app.kubernetes.io/instance: cockroachdb # nodeSelectors used to match against # nodeSelector: diff --git a/manifests/cockroachdb/operator.yaml b/manifests/cockroachdb/operator.yaml index 59d515061..0d578410c 100644 --- a/manifests/cockroachdb/operator.yaml +++ b/manifests/cockroachdb/operator.yaml @@ -381,6 +381,8 @@ spec: spec: containers: - args: + # TESTING + - -feature-gates=TolerationRules=true,AffinityRules=true,TopologySpreadRules=true - -zap-log-level - info env: diff --git a/manifests/nats/cluster.yaml b/manifests/nats/cluster.yaml new file mode 100644 index 000000000..39e41958f --- /dev/null +++ b/manifests/nats/cluster.yaml @@ -0,0 +1,34 @@ +container: + image: + tags: 2.9-alpine + env: + # different from k8s units, suffix must be B, KiB, MiB, GiB, or TiB + # should be ~90% of memory limit + GOMEMLIMIT: 400MiB + merge: + # recommended limit is at least 2 CPU cores and 8Gi Memory for production JetStream clusters + resources: + requests: + cpu: 1 # 2 + memory: 500Mi # 4Gi + limits: + cpu: 1 # 4 + memory: 1Gi # 8Gi + +config: + cluster: + enabled: true + replicas: 3 + jetstream: + enabled: true + fileStore: + pvc: + size: 4Gi + +# Force one pod per node, if possible +podTemplate: + topologySpreadConstraints: + kubernetes.io/hostname: + maxSkew: 1 + whenUnsatisfiable: ScheduleAnyway + \ No newline at end of file diff --git a/my_deploy.sh b/my_deploy.sh index c358bc1af..92a1bfb63 100755 --- a/my_deploy.sh +++ b/my_deploy.sh @@ -101,7 +101,7 @@ export CRDB_DATABASE="tfs" # Set CockroachDB installation mode to 'single'. This option is convenient for development and testing. # See ./deploy/all.sh or ./deploy/crdb.sh for additional details -export CRDB_DEPLOY_MODE="single" +export CRDB_DEPLOY_MODE="cluster" # Disable flag for dropping database, if it exists. export CRDB_DROP_DATABASE_IF_EXISTS="" @@ -121,6 +121,11 @@ export NATS_EXT_PORT_CLIENT="4222" # Set the external port NATS HTTP Mgmt GUI interface will be exposed to. export NATS_EXT_PORT_HTTP="8222" +# TESTING +# Set NATS installation mode to 'single'. This option is convenient for development and testing. +# See ./deploy/all.sh or ./deploy/nats.sh for additional details +export NATS_DEPLOY_MODE="single" + # Disable flag for re-deploying NATS from scratch. export NATS_REDEPLOY="" -- GitLab