diff --git a/docker/docker-compose.provider.yml b/docker/docker-compose.provider.yml index 4c77c98446..f11bc5e1b7 100644 --- a/docker/docker-compose.provider.yml +++ b/docker/docker-compose.provider.yml @@ -167,6 +167,143 @@ services: internal: ipv4_address: 172.18.0.8 ipv6_address: fd00:aaaa::8 + redis-exporter: + # Exports redis-stack metrics in Prometheus format on :9121, scraped by + # vector (sources.redis_metrics -> http://redis-exporter:9121/metrics). + container_name: redis-exporter + profiles: + - production + - staging + image: oliver006/redis_exporter:${REDIS_EXPORTER_IMAGE:-v1.62.0} + labels: + - "vector.docker=true" # log docker events + environment: + # The exporter connects to redis over the internal network. Without an + # explicit REDIS_ADDR it defaults to its own localhost:6379 and scrapes + # nothing. REDIS_PASSWORD is filled from the deploy-exported provider env + # (same value as REDIS_CONNECTION_PASSWORD); empty means no-auth redis. + - REDIS_ADDR=redis://redis-stack:6379 + - REDIS_PASSWORD=${REDIS_CONNECTION_PASSWORD:-} + env_file: + - ../.env.1.${NODE_ENV} + depends_on: + - redis-stack + restart: unless-stopped # unless the container has been stopped, it will be restarted, even on reboot + expose: + - "9121" + networks: + internal: + ipv4_address: 172.18.0.11 + ipv6_address: fd00:aaaa::b + logging: + driver: 'json-file' + options: + max-size: '100m' + max-file: '1' + cadvisor: + # Per-container resource metrics (cpu/memory/disk/network/...) in + # Prometheus format on :8080, scraped by vector + # (sources.cadvisor_metrics -> http://cadvisor:8080/metrics). + container_name: cadvisor + profiles: + - production + - staging + image: gcr.io/cadvisor/cadvisor:${CADVISOR_IMAGE:-v0.52.1} + labels: + - "vector.docker=true" # log docker events + # cadvisor needs broad host access to read cgroup/container stats. + privileged: true + devices: + - /dev/kmsg + command: + - '--enable_metrics=cpu,cpuLoad,memory,disk,diskIO,network,process,pressure,oom_event,percpu,app,tcp,udp' + - '--store_container_labels=false' + - '--whitelisted_container_labels=com.docker.compose.service,com.docker.compose.project' + - '--docker_only=true' + volumes: + - /:/rootfs:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + - /var/run:/var/run:ro + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + - /dev/disk/:/dev/disk:ro + restart: unless-stopped + expose: + - "8080" + networks: + internal: + ipv4_address: 172.18.0.12 + ipv6_address: fd00:aaaa::c + logging: + driver: 'json-file' + options: + max-size: '100m' + max-file: '1' + mongodb-exporter: + # Percona MongoDB exporter: richer mongo metrics (replication, per-collection + # & index stats, query exec stats, connection pool) in Prometheus format on + # :9216, scraped by vector (sources.mongodb_exporter_metrics -> + # http://mongodb-exporter:9216/metrics). Complements vector's built-in + # mongodb_metrics source, which only exposes basic server status. + container_name: mongodb-exporter + profiles: + - production + - staging + image: percona/mongodb_exporter:${MONGODB_EXPORTER_IMAGE:-0.43.1} + labels: + - "vector.docker=true" # log docker events + # MONGODB_URI is resolved by compose interpolation from the deploy-exported + # provider env (same MONGO_INITDB_ROOT_* values the database/vector services + # use). Passed via env rather than --mongodb.uri so the password isn't + # visible in the container's process args. --collect-all enables all + # collectors; --compatible-mode keeps metric names stable for dashboards. + environment: + - MONGODB_URI=mongodb://${MONGO_INITDB_ROOT_USERNAME:-root}:${MONGO_INITDB_ROOT_PASSWORD:-root}@database:27017 + command: + - '--collect-all' + - '--compatible-mode' + env_file: + - ../.env.1.${NODE_ENV} + depends_on: + - database + restart: unless-stopped # unless the container has been stopped, it will be restarted, even on reboot + expose: + - "9216" + networks: + internal: + ipv4_address: 172.18.0.13 + ipv6_address: fd00:aaaa::d + logging: + driver: 'json-file' + options: + max-size: '100m' + max-file: '1' + smartctl-exporter: + # Disk SMART health metrics (reallocated sectors, temperature, wear, + # predicted failure) in Prometheus format on :9633, scraped by vector + # (sources.smartctl_metrics -> http://smartctl-exporter:9633/metrics). + # Needs privileged + host /dev access to issue SMART commands to the disks. + container_name: smartctl-exporter + profiles: + - production + - staging + image: prometheuscommunity/smartctl-exporter:${SMARTCTL_EXPORTER_IMAGE:-v0.13.0} + labels: + - "vector.docker=true" # log docker events + privileged: true + user: root + restart: unless-stopped # unless the container has been stopped, it will be restarted, even on reboot + expose: + - "9633" + networks: + internal: + ipv4_address: 172.18.0.14 + ipv6_address: fd00:aaaa::e + logging: + driver: 'json-file' + options: + max-size: '100m' + max-file: '1' vector: container_name: vector profiles: @@ -178,8 +315,18 @@ services: labels: - "vector.docker=true" # log docker events restart: unless-stopped # unless the container has been stopped, it will be restarted, even on reboot + # sources.host_metrics reads /proc and /sys. Inside a container these are + # the container's own namespaced views, so without these mounts the + # "host" metrics would actually describe the vector container. Mount the + # host's procfs/sysfs read-only and point vector at them via + # PROCFS_ROOT/SYSFS_ROOT so host_metrics reports true host CPU/mem/disk/net. + environment: + - PROCFS_ROOT=/host/proc + - SYSFS_ROOT=/host/sys volumes: - /var/run/docker.sock:/var/run/docker.sock # needed for monitoring docker container events, e.g. start/stop/etc + - /proc:/host/proc:ro + - /sys:/host/sys:ro networks: external: internal: diff --git a/docker/images/vector/src/vector.toml b/docker/images/vector/src/vector.toml index e760a1c626..7e6940a639 100644 --- a/docker/images/vector/src/vector.toml +++ b/docker/images/vector/src/vector.toml @@ -809,6 +809,46 @@ source = ''' .tags.env=get_env_var!("NODE_ENV") ''' +# ============================================================================= +# MONGODB EXPORTER METRICS (Percona) +# ============================================================================= +# Source: Richer MongoDB metrics from the percona/mongodb_exporter sidecar. +# Adds replication, per-collection/index stats, query exec stats and connection +# pool detail that the built-in mongodb_metrics source above does not expose. +[sources.mongodb_exporter_metrics] +type = "prometheus_scrape" +endpoints = [ "http://mongodb-exporter:9216/metrics" ] +scrape_interval_secs = 30 + +# Transform: Add host and environment tags to MongoDB exporter metrics +[transforms.mongodb_exporter_metrics_format] +type = "remap" +inputs = ["mongodb_exporter_metrics"] +source = ''' +.tags.host=get_env_var!("OO_HOST") +.tags.env=get_env_var!("NODE_ENV") +''' + +# ============================================================================= +# SMARTCTL DISK HEALTH METRICS +# ============================================================================= +# Source: Disk SMART health metrics from the smartctl-exporter sidecar. +# Reallocated sectors, temperature, wear levelling and predicted failure for the +# host's physical disks — not captured by host_metrics or cadvisor. +[sources.smartctl_metrics] +type = "prometheus_scrape" +endpoints = [ "http://smartctl-exporter:9633/metrics" ] +scrape_interval_secs = 30 + +# Transform: Add host and environment tags to smartctl metrics +[transforms.smartctl_metrics_format] +type = "remap" +inputs = ["smartctl_metrics"] +source = ''' +.tags.host=get_env_var!("OO_HOST") +.tags.env=get_env_var!("NODE_ENV") +''' + # ============================================================================= # METRICS SINKS # ============================================================================= @@ -817,7 +857,7 @@ source = ''' # Uses Prometheus Remote Write protocol for efficient metric transmission [sinks.oo_metrics] type = "prometheus_remote_write" -inputs = ["host_metrics_format", "vector_metrics_format", "cadvisor_metrics_format", "mongodb_metrics_format", "redis_metrics_format", "caddy_metrics_format"] +inputs = ["host_metrics_format", "vector_metrics_format", "cadvisor_metrics_format", "mongodb_metrics_format", "mongodb_exporter_metrics_format", "smartctl_metrics_format", "redis_metrics_format", "caddy_metrics_format"] endpoint = "https://oo.prosopo.io/api/dev_organization_29569_u24VnjrjN7XrP35/prometheus/api/v1/write" [sinks.oo_metrics.auth] @@ -844,7 +884,7 @@ when_full = "drop_newest" # Backup destination for metrics redundancy and disaster recovery [sinks.oo2_metrics] type = "prometheus_remote_write" -inputs = ["host_metrics_format", "vector_metrics_format", "cadvisor_metrics_format", "mongodb_metrics_format", "redis_metrics_format", "caddy_metrics_format"] +inputs = ["host_metrics_format", "vector_metrics_format", "cadvisor_metrics_format", "mongodb_metrics_format", "mongodb_exporter_metrics_format", "smartctl_metrics_format", "redis_metrics_format", "caddy_metrics_format"] endpoint = "https://oo2.prosopo.io/api/default/prometheus/api/v1/write" [sinks.oo2_metrics.auth]