Skip to content

Commit

Permalink
add vars on titles
Browse files Browse the repository at this point in the history
  • Loading branch information
Aohzan committed Jan 23, 2025
1 parent 6e66038 commit 6e4bbc5
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 22 deletions.
18 changes: 9 additions & 9 deletions caas/kubernetes/node/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ module "datadog-monitors-caas-kubernetes-node" {

Creates DataDog monitors with the following checks:

- Kubernetes Node Disk pressure
- Kubernetes Node Frequent unregister net device
- Kubernetes Node Kubelet API does not respond
- Kubernetes Node Kubelet sync loop that updates containers does not work
- Kubernetes Node Memory pressure
- Kubernetes Node not ready
- Kubernetes Node unschedulable
- Kubernetes Node volume inodes usage
- Kubernetes Node volume space usage
- Kubernetes Node {{kube_node}} disk pressure on {{kube_cluster_name}}
- Kubernetes Node {{kube_node}} frequent unregister net device
- Kubernetes Node {{kube_node}} Kubelet API does not respond on {{kube_cluster_name}}
- Kubernetes Node {{kube_node}} Kubelet sync loop that updates containers does not work on {{kube_cluster_name}}
- Kubernetes Node {{kube_node}} memory pressure on {{kube_cluster_name}}
- Kubernetes Node {{kube_node}} not ready on {{kube_cluster_name}}
- Kubernetes Node {{kube_node}} unschedulable on {{kube_cluster_name}}
- Kubernetes Node volume {{persistentvolumeclaim}} inodes usage
- Kubernetes Node volume {{persistentvolumeclaim}} space usage

<!-- BEGIN_TF_DOCS -->
## Requirements
Expand Down
22 changes: 11 additions & 11 deletions caas/kubernetes/node/monitors-k8s-node.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
resource "datadog_monitor" "disk_pressure" {
count = var.disk_pressure_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Disk pressure"
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node {{kube_node}} disk pressure on {{kube_cluster_name}}"
message = coalesce(var.disk_pressure_message, var.message)
type = "service check"

Expand All @@ -27,7 +27,7 @@ EOQ

resource "datadog_monitor" "memory_pressure" {
count = var.memory_pressure_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Memory pressure"
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node {{kube_node}} memory pressure on {{kube_cluster_name}}"
message = coalesce(var.memory_pressure_message, var.message)
type = "service check"

Expand All @@ -54,7 +54,7 @@ EOQ

resource "datadog_monitor" "ready" {
count = var.ready_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node not ready"
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node {{kube_node}} not ready on {{kube_cluster_name}}"
message = coalesce(var.ready_message, var.message)
type = "service check"

Expand All @@ -81,12 +81,12 @@ EOQ

resource "datadog_monitor" "kubelet_ping" {
count = var.kubelet_ping_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Kubelet API does not respond"
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node {{kube_node}} Kubelet API does not respond on {{kube_cluster_name}}"
message = coalesce(var.kubelet_ping_message, var.message)
type = "service check"

query = <<EOQ
"kubernetes.kubelet.check.ping"${module.filter-tags.service_check}.by("name","kube_cluster_name").last(6).count_by_status()
"kubernetes.kubelet.check.ping"${module.filter-tags.service_check}.by("kube_node","kube_cluster_name").last(6).count_by_status()
EOQ

monitor_thresholds {
Expand All @@ -109,12 +109,12 @@ EOQ

resource "datadog_monitor" "kubelet_syncloop" {
count = var.kubelet_syncloop_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Kubelet sync loop that updates containers does not work"
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node {{kube_node}} Kubelet sync loop that updates containers does not work on {{kube_cluster_name}}"
message = coalesce(var.kubelet_syncloop_message, var.message)
type = "service check"

query = <<EOQ
"kubernetes.kubelet.check.syncloop"${module.filter-tags.service_check}.by("name","kube_cluster_name").last(6).count_by_status()
"kubernetes.kubelet.check.syncloop"${module.filter-tags.service_check}.by("kube_node","kube_cluster_name").last(6).count_by_status()
EOQ

monitor_thresholds {
Expand All @@ -136,7 +136,7 @@ EOQ

resource "datadog_monitor" "unregister_net_device" {
count = var.unregister_net_device_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Frequent unregister net device"
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node {{kube_node}} frequent unregister net device"
message = coalesce(var.unregister_net_device_message, var.message)
type = "event-v2 alert"

Expand All @@ -154,7 +154,7 @@ resource "datadog_monitor" "unregister_net_device" {

resource "datadog_monitor" "node_unschedulable" {
count = var.node_unschedulable_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node unschedulable"
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node {{kube_node}} unschedulable on {{kube_cluster_name}}"
message = coalesce(var.node_unschedulable_message, var.message)
type = "metric alert"

Expand Down Expand Up @@ -183,7 +183,7 @@ EOQ

resource "datadog_monitor" "volume_space" {
count = var.volume_space_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node volume space usage {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node volume {{persistentvolumeclaim}} space usage {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}} on {{kube_cluster_name}}"
message = coalesce(var.volume_space_message, var.message)
type = "query alert"

Expand Down Expand Up @@ -214,7 +214,7 @@ EOQ

resource "datadog_monitor" "volume_inodes" {
count = var.volume_inodes_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node volume inodes usage {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node volume {{persistentvolumeclaim}} inodes usage {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}} on {{kube_cluster_name}}"
message = coalesce(var.volume_inodes_message, var.message)
type = "query alert"

Expand Down
2 changes: 1 addition & 1 deletion caas/kubernetes/pod/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Creates DataDog monitors with the following checks:
- Kubernetes Pod {{pod_name}} container {{kube_container_name}} killed by OOM on {{kube_cluster_name}}
- Kubernetes Pod terminated abnormally
- Kubernetes Pod waiting errors
- Kubernetes pods in {{kube_replica_set}} frequently restarted on {{kube_cluster_name}}
- Kubernetes Pods in {{kube_replica_set}} frequently restarted on {{kube_cluster_name}}

<!-- BEGIN_TF_DOCS -->
## Requirements
Expand Down
2 changes: 1 addition & 1 deletion caas/kubernetes/pod/monitors-k8s-pod.tf
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ EOQ

resource "datadog_monitor" "pod_frequently_restarted" {
count = var.pod_frequently_restarted_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes pods in {{kube_replica_set}} frequently restarted on {{kube_cluster_name}}"
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Pods in {{kube_replica_set}} frequently restarted on {{kube_cluster_name}}"
message = coalesce(var.pod_frequently_restarted_message, var.message)
type = "metric alert"

Expand Down

0 comments on commit 6e4bbc5

Please sign in to comment.