Loading config/values/meep-prometheus.yaml +4 −4 Original line number Diff line number Diff line Loading @@ -14,7 +14,7 @@ additionalPrometheusRulesMap: - name: Node Alerts rules: - alert: HostRebooted expr: changes(node_boot_time_seconds[10m]) > 0 expr: changes(node_boot_time_seconds[10m]) > 10 for: 0m labels: severity: warning Loading Loading @@ -96,13 +96,13 @@ additionalPrometheusRulesMap: summary: Kubernetes pod crash looping (instance {{ $labels.instance }}) description: Pod {{ $labels.pod }} is crash looping\n VALUE = {{ $value }}\n LABELS = {{ $labels }} - alert: KubernetesPodNotHealthy expr: min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[15m:]) > 0 for: 5m expr: sum by (namespace, pod) (sum_over_time(kube_pod_status_phase{phase=~"Pending|Unknown|Failed"}[5m])) >= 10 for: 0m labels: severity: critical annotations: summary: Kubernetes Pod not healthy (instance {{ $labels.instance }}) description: Pod has been in a non-ready state for more than 5 minutes in the last 15 minutes.\n VALUE = {{ $value }}\n LABELS = {{ $labels }} description: Pod has been in a non-ready state for more than 5 minutes.\n VALUE = {{ $value }}\n LABELS = {{ $labels }} alertmanager: enabled: true Loading Loading
config/values/meep-prometheus.yaml +4 −4 Original line number Diff line number Diff line Loading @@ -14,7 +14,7 @@ additionalPrometheusRulesMap: - name: Node Alerts rules: - alert: HostRebooted expr: changes(node_boot_time_seconds[10m]) > 0 expr: changes(node_boot_time_seconds[10m]) > 10 for: 0m labels: severity: warning Loading Loading @@ -96,13 +96,13 @@ additionalPrometheusRulesMap: summary: Kubernetes pod crash looping (instance {{ $labels.instance }}) description: Pod {{ $labels.pod }} is crash looping\n VALUE = {{ $value }}\n LABELS = {{ $labels }} - alert: KubernetesPodNotHealthy expr: min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[15m:]) > 0 for: 5m expr: sum by (namespace, pod) (sum_over_time(kube_pod_status_phase{phase=~"Pending|Unknown|Failed"}[5m])) >= 10 for: 0m labels: severity: critical annotations: summary: Kubernetes Pod not healthy (instance {{ $labels.instance }}) description: Pod has been in a non-ready state for more than 5 minutes in the last 15 minutes.\n VALUE = {{ $value }}\n LABELS = {{ $labels }} description: Pod has been in a non-ready state for more than 5 minutes.\n VALUE = {{ $value }}\n LABELS = {{ $labels }} alertmanager: enabled: true Loading