-
Notifications
You must be signed in to change notification settings - Fork 54
/
Copy pathpersistentvolumeissues_test.yaml
141 lines (125 loc) · 7.63 KB
/
persistentvolumeissues_test.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
evaluation_interval: 1m
rule_files:
- prometheus.pv_alerts.yaml
tests:
# Alerted cases:
- interval: 1m
input_series:
# Persistent volume is in the Pending state so it will be alerted.
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-1", namespace="ns-1", phase="Pending", source_cluster="cluster01"}'
values: '1x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-1", namespace="ns-1", phase="Available", source_cluster="cluster01"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-1", namespace="ns-1", phase="Bound", source_cluster="cluster01"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-1", namespace="ns-1", phase="Released", source_cluster="cluster01"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-1", namespace="ns-1", phase="Failed", source_cluster="cluster01"}'
values: '0x4'
alert_rule_test:
- eval_time: 5m
alertname: PersistentVolumeIssues
exp_alerts:
- exp_labels:
severity: warning
namespace: ns-1
persistentvolume: pv-1
phase: Pending
source_cluster: cluster01
exp_annotations:
summary: >-
Persistent Volume pv-1 is in Pending phase for more than 5 minutes.
description: >-
Persistent Volume pv-1 in namespace ns-1 on cluster cluster01
is in Pending phase for more than 5 minutes.
alert_routing_key: ns-1
runbook_url: https://gitlab.cee.redhat.com/konflux/docs/sop/-/blob/main/o11y/alert-rule-pesistentVolumeIssues.md
- interval: 1m
input_series:
# Persistent volume is in the Failed state so it will be alerted.
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-2", namespace="ns-2", phase="Pending", source_cluster="cluster02"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-2", namespace="ns-2", phase="Available", source_cluster="cluster02"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-2", namespace="ns-2", phase="Bound", source_cluster="cluster02"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-2", namespace="ns-2", phase="Released", source_cluster="cluster02"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-2", namespace="ns-2", phase="Failed", source_cluster="cluster02"}'
values: '1x4'
alert_rule_test:
- eval_time: 5m
alertname: PersistentVolumeIssues
exp_alerts:
- exp_labels:
severity: warning
namespace: ns-2
persistentvolume: pv-2
phase: Failed
source_cluster: cluster02
exp_annotations:
summary: >-
Persistent Volume pv-2 is in Failed phase for more than 5 minutes.
description: >-
Persistent Volume pv-2 in namespace ns-2 on cluster cluster02
is in Failed phase for more than 5 minutes.
alert_routing_key: ns-2
runbook_url: https://gitlab.cee.redhat.com/konflux/docs/sop/-/blob/main/o11y/alert-rule-pesistentVolumeIssues.md
# Not Alerted cases:
- interval: 1m
input_series:
# Persistent volume is in the Bound state so it will not be alerted.
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-4", namespace="ns-4", phase="Pending", source_cluster="cluster03"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-4", namespace="ns-4", phase="Available", source_cluster="cluster03"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-4", namespace="ns-4", phase="Bound", source_cluster="cluster03"}'
values: '1x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-4", namespace="ns-4", phase="Released", source_cluster="cluster03"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-4", namespace="ns-4", phase="Failed", source_cluster="cluster03"}'
values: '0x4'
# Persistent volume is in the Released state so it will not be alerted.
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-5", namespace="ns-5", phase="Pending", source_cluster="cluster04"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-5", namespace="ns-5", phase="Available", source_cluster="cluster04"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-5", namespace="ns-5", phase="Bound", source_cluster="cluster04"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-5", namespace="ns-5", phase="Released", source_cluster="cluster04"}'
values: '1x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-5", namespace="ns-5", phase="Failed", source_cluster="cluster04"}'
values: '0x4'
# Persistent volume is in the Available state for the first 4 minutes and in the bound state for the last 1 minute, so it won't be alerted.
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-6", namespace="ns-6", phase="Pending", source_cluster="cluster05"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-6", namespace="ns-6", phase="Available", source_cluster="cluster05"}'
values: '1x3 0'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-6", namespace="ns-6", phase="Bound", source_cluster="cluster05"}'
values: '0x3 1'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-6", namespace="ns-6", phase="Released", source_cluster="cluster05"}'
values: '0x4'
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-6", namespace="ns-6", phase="Failed", source_cluster="cluster05"}'
values: '0x4'
# Persistent volume is in the Failed state but it has a namespace that ends
# with 'tenant' so it's ignored.
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-7", namespace="prod-tenant", phase="Failed", source_cluster="cluster06"}'
values: '1x4'
# Persistent volume is in the Pending state but it has a namespace that starts
# with 'openshift' so it's ignored.
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-8", namespace="openshift-prod-test", phase="Pending", source_cluster="cluster07"}'
values: '1x4'
# Persistent volume is in the Failed state but it has a namespace that starts
# with 'kube' so it's ignored.
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-9", namespace="kube-test", phase="Failed", source_cluster="cluster08"}'
values: '1x4'
# Persistent volume is in the Pending state but it has a namespace
# 'default' so it's ignored.
- series: 'kube_persistentvolume_status_phase{persistentvolume="pv-a", namespace="default", phase="Pending", source_cluster="cluster09"}'
values: '1x4'
# Persistent volume is in the Failed state but it has a namespace that ends with 'env' so it's ignored.
- series: 'kube_persistentvolume_status_phase{persistentvolume="test-pv", namespace="test-env", phase="Failed", source_cluster="cluster01"}'
values: '1x4'
alert_rule_test:
- eval_time: 5m
alertname: PersistentVolumeIssues