-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathvalues.yaml
104 lines (104 loc) · 4 KB
/
values.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
prometheus-blackbox-exporter:
config:
modules:
http_2xx:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
follow_redirects: true
preferred_ip_protocol: "ip4"
http_2xx_insecure:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
follow_redirects: true
preferred_ip_protocol: "ip4"
tls_config:
insecure_skip_verify: true
prometheusRule:
enabled: true
rules:
- alert: BlackboxProbeFailed
expr: "probe_success == 0"
for: 1m
labels:
severity: critical
annotations:
summary: Blackbox probe failed (instance {{ $labels.instance }})
description: "Probe failed - Value = {{ $value }}"
- alert: BlackboxConfigurationReloadFailure
expr: "blackbox_exporter_config_last_reload_successful != 1"
for: 0m
labels:
severity: warning
annotations:
summary: Blackbox configuration reload failure (instance {{ $labels.instance }})
description: "Blackbox configuration reload failure - Value = {{ $value }}"
- alert: BlackboxSlowProbe
expr: "avg_over_time(probe_duration_seconds[1m]) > 3"
for: 1m
labels:
severity: warning
annotations:
summary: Blackbox slow probe (instance {{ $labels.instance }})
description: "Blackbox probe took more than 3s to complete - Value = {{ $value }}"
- alert: BlackboxProbeHttpFailure
expr: "probe_http_status_code <= 199 OR probe_http_status_code >= 400"
for: 1m
labels:
severity: critical
annotations:
summary: Blackbox probe HTTP failure (instance {{ $labels.instance }})
description: "HTTP status code is not 200-399 - Value = {{ $value }}"
- alert: BlackboxSslCertificateWillExpireSoon
expr: "3 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 20"
for: 0m
labels:
severity: warning
annotations:
summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
description: "SSL certificate expires in less than 20 days - Value = {{ $value }}"
- alert: BlackboxSslCertificateWillExpireSoon
expr: "0 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 3"
for: 0m
labels:
severity: critical
annotations:
summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
description: "SSL certificate expires in less than 3 days - Value = {{ $value }}"
- alert: BlackboxSslCertificateExpired
expr: "round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 0"
for: 0m
labels:
severity: critical
annotations:
summary: Blackbox SSL certificate expired (instance {{ $labels.instance }})
description: "SSL certificate has expired already - Value = {{ $value }}"
- alert: BlackboxProbeSlowHttp
expr: "avg_over_time(probe_http_duration_seconds[1m]) > 3"
for: 1m
labels:
severity: warning
annotations:
summary: Blackbox probe slow HTTP (instance {{ $labels.instance }})
description: "HTTP request took more than 3s - Value = {{ $value }}"
- alert: BlackboxProbeSlowPing
expr: "avg_over_time(probe_icmp_duration_seconds[1m]) > 1"
for: 1m
labels:
severity: warning
annotations:
summary: Blackbox probe slow ping (instance {{ $labels.instance }})
description: "Blackbox ping took more than 1s - Value = {{ $value }}"
serviceMonitor:
enabled: true
selfMonitor:
enabled: true
targets:
- name: health
url: https://health.terence.cloud
module: http_2xx
configReloader:
enabled: true