From 22399db00a15f9509c8326aec7f8c6b814101e93 Mon Sep 17 00:00:00 2001 From: Artyom Babiy Date: Thu, 17 Jul 2025 16:24:34 +0300 Subject: [PATCH] feat(cluster): add prom rule alert for failed backups Signed-off-by: Artyom Babiy --- .../prometheus_rules/cluster-backup_failed.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 charts/cluster/prometheus_rules/cluster-backup_failed.yaml diff --git a/charts/cluster/prometheus_rules/cluster-backup_failed.yaml b/charts/cluster/prometheus_rules/cluster-backup_failed.yaml new file mode 100644 index 0000000000..80b58ceedb --- /dev/null +++ b/charts/cluster/prometheus_rules/cluster-backup_failed.yaml @@ -0,0 +1,17 @@ +{{- $alert := "CNPGClusterBackupFailed" -}} +{{- if not (has $alert .excludeRules) -}} +alert: {{ $alert }} +annotations: + summary: CNPG Cluster backup has failed. + description: |- + Backup for CloudNativePG Cluster "{{ .namespace }}/{{ .cluster }}" has failed. +expr: | + sum by (namespace) (cnpg_collector_last_failed_backup_timestamp{namespace="{{ .namespace }}", pod=~"{{ .podSelector }}"}) + > + sum by (namespace) (cnpg_collector_last_available_backup_timestamp{namespace="{{ .namespace }}", pod=~"{{ .podSelector }}"}) +for: 5m +labels: + severity: warning + namespace: {{ .namespace }} + cnpg_cluster: {{ .cluster }} +{{- end -}}