diff --git a/Dockerfile b/Dockerfile index 30c9d9f..d4d7bfc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ -FROM ubuntu:24.04 +FROM fedora:41 -RUN apt-get update - && apt-get install -y nvme-cli - && rm -rf /var/lib/apt/lists/* +RUN dnf update -y \ + && dnf install -y nvme-cli \ + && dnf clean all COPY nvme_exporter /usr/bin/nvme_exporter diff --git a/README.md b/README.md index 2a43bb4..1905e54 100644 --- a/README.md +++ b/README.md @@ -1,40 +1,32 @@ -# nvme_exporter +# NVMe Exporter -Prometheus exporter for nvme smart-log and OCP smart-log metrics inspired by [fritchie nvme exporter](https://github.com/fritchie/nvme_exporter) +[![build](https://github.com/E4-Computer-Engineering/nvme-exporter/actions/workflows/build.yml/badge.svg)](https://github.com/E4-Computer-Engineering/nvme-exporter/actions/workflows/build.yml) +![Latest GitHub release](https://img.shields.io/github/release/E4-Computer-Engineering/nvme-exporter.svg) +[![GitHub license](https://img.shields.io/github/license/E4-Computer-Engineering/nvme-exporter)](https://github.com/E4-Computer-Engineering/nvme-exporter/blob/master/LICENSE) +![GitHub all releases](https://img.shields.io/github/downloads/E4-Computer-Engineering/nvme-exporter/total) + +Prometheus exporter for nvme smart-log and OCP smart-log metrics inspired by [fritchie nvme exporter](https://github.com/fritchie/nvme_exporter). Specification versions of reference: -* nvme smart-log field descriptions can be found on page 209 of: -https://nvmexpress.org/wp-content/uploads/NVM-Express-Base-Specification-Revision-2.1-2024.08.05-Ratified.pdf +* nvme smart-log field descriptions can be found on page 209 of [NVMe specifications](https://nvmexpress.org/wp-content/uploads/NVM-Express-Base-Specification-Revision-2.1-2024.08.05-Ratified.pdf) -* nvme ocp-smart-log field descriptions can be found on page 24 of: -https://www.opencompute.org/documents/datacenter-nvme-ssd-specification-v2-5-pdf +* nvme ocp-smart-log field descriptions can be found on page 24 of [Opencompute NVMe SSD specifications](https://www.opencompute.org/documents/datacenter-nvme-ssd-specification-v2-5-pdf) Supported [NVMe CLI](https://github.com/linux-nvme/nvme-cli) versions: | Version | Supported | |----|----| |2.9 | OK | -|2.10 | TBD | +|2.10 | OK | |2.11 | TBD | -## Metrics - -This collector exports the output of the following `nvme` cli commands: - -``` bash -nvme list -nvme smart-log -nvme ocp-smart-add-log -``` - -## Content +## Repo Content -* Docker: A sample Dockerfile and docker-compose.yaml are provided. +* Docker: A sample `Dockerfile` is provided. * Kubernetes: In [resources](resources/k8s/). * Grafana: In [resources](resources/grafana/) for dashboards. * [smart-log and OCP dashboard](https://github.com/E4-Computer-Engineering/nvme-exporter/blob/main/resources/grafana/dashboard_SMART_OCP.json) - * [smart-log dashboard](https://github.com/E4-Computer-Engineering/nvme-exporter/blob/main/resources/grafana/dashboard_SMART.json) * Prometheus: In [resources](resources/prom/) for recording and alert rules. * Systemd: In [resources](resources/systemd/) for executing the exporter as unit. * Scripts: In [resources](resources/scripts/) for package installation hooks. @@ -54,3 +46,107 @@ nvme_exporter -h |port | Listen port number. Type: String. | `9998` | |ocp | Enable OCP smart log metrics. Type: Bool. | `false` | |endpoint | The endpoint to query for metrics. Type: String. | `/metrics` | + +### Systemd + +By installing the packaged version: RPM or DEB, the systemd unit will be automatically deployed and started as `nvme_exporter.service`. +If you are installing from `tar.gz` the [systemd unit file](resources/systemd/nvme_exporter.service) is provided in this repo. + +> NOTE: if you want to execute with custom flags you will need to modify the unit file + +### Container + +To run the exporter as a container with, for example, OCP metrics enabled: + +``` bash +podman run --rm -d --network=host --privileged nvme_exporter -ocp +``` + +## Visualization + +This is how the dashboard visualizes: + +![OCP metrics](https://raw.githubusercontent.com/E4-Computer-Engineering/nvme-exporter/refs/heads/main/resources/grafana/nvme_ocp.png) + +![Endurance metrics](https://raw.githubusercontent.com/E4-Computer-Engineering/nvme-exporter/refs/heads/main/resources/grafana/nvme_endurance.png) + +![Stats metrics](https://raw.githubusercontent.com/E4-Computer-Engineering/nvme-exporter/refs/heads/main/resources/grafana/nvme_stats.png) + +![Errors metrics](https://raw.githubusercontent.com/E4-Computer-Engineering/nvme-exporter/refs/heads/main/resources/grafana/nvme_errors.png) + +## Metrics + +This collector exports the output of the following `nvme` cli commands: + +``` bash +nvme list +nvme smart-log +nvme ocp-smart-add-log +``` + +|metric_name|description| +|---|---| +|nvme_avail_spare|---| +|nvme_bad_system_nand_blocks_normalized|---| +|nvme_bad_system_nand_blocks_raw|---| +|nvme_bad_user_nand_blocks_normalized|---| +|nvme_bad_user_nand_blocks_raw|---| +|nvme_capacitor_health|---| +|nvme_controller_busy_time|---| +|nvme_critical_comp_time|---| +|nvme_critical_warning|---| +|nvme_current_throttling_status|---| +|nvme_data_units_read|---| +|nvme_data_units_written|---| +|nvme_end_to_end_corrected_errors|---| +|nvme_end_to_end_detected_errors|---| +|nvme_endurance_estimate|---| +|nvme_endurance_grp_critical_warning_summary|---| +|nvme_errata_version_field|---| +|nvme_host_read_commands|---| +|nvme_host_write_commands|---| +|nvme_incomplete_shutdowns|---| +|nvme_log_page_guid|---| +|nvme_log_page_version|---| +|nvme_major_version_field|---| +|nvme_maximum_lba|---| +|nvme_max_user_data_erase_counts|---| +|nvme_media_errors|---| +|nvme_minor_version_field|---| +|nvme_min_user_data_erase_counts|---| +|nvme_namespace|---| +|nvme_number_of_thermal_throttling_events|---| +|nvme_num_err_log_entries|---| +|nvme_nuse_namespace_utilization|---| +|nvme_nvme_errata_version|---| +|nvme_pcie_correctable_error_count|---| +|nvme_pcie_link_retraining_count|---| +|nvme_percent_free_blocks|---| +|nvme_percent_used|---| +|nvme_physical_media_units_read_hi|---| +|nvme_physical_media_units_read_lo|---| +|nvme_physical_media_units_written_hi|---| +|nvme_physical_media_units_written_lo|---| +|nvme_physical_size|---| +|nvme_plp_start_count|---| +|nvme_point_version_field|---| +|nvme_power_cycles|---| +|nvme_power_on_hours|---| +|nvme_power_state_change_count|---| +|nvme_refresh_counts|---| +|nvme_sector_size|---| +|nvme_security_version_number|---| +|nvme_soft_ecc_error_count|---| +|nvme_spare_thresh|---| +|nvme_system_data_percent_used|---| +|nvme_temperature|---| +|nvme_thm_temp1_trans_count|---| +|nvme_thm_temp1_trans_time|---| +|nvme_thm_temp2_trans_count|---| +|nvme_thm_temp2_trans_time|---| +|nvme_unaligned_io|---| +|nvme_uncorrectable_uead_error_count|---| +|nvme_unsafe_shutdowns|---| +|nvme_used_bytes|---| +|nvme_warning_temp_time|---| +|nvme_xor_recovery_count|---| diff --git a/cmd/nvme_exporter/main.go b/cmd/nvme_exporter/main.go index 1301c46..55481a4 100644 --- a/cmd/nvme_exporter/main.go +++ b/cmd/nvme_exporter/main.go @@ -17,7 +17,9 @@ import ( ) var _supportedVersions = map[string]bool{ - "2.9": true, + "2.9": true, + "2.10": true, + "2.11": true, } func isSupportedVersion(version string) bool { diff --git a/resources/grafana/dashboard_SMART.json b/resources/grafana/dashboard_SMART.json deleted file mode 100644 index b83113d..0000000 --- a/resources/grafana/dashboard_SMART.json +++ /dev/null @@ -1,1113 +0,0 @@ -{ - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "Prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "7.0.1" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "stat", - "name": "Stat", - "version": "" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "links": [], - "panels": [ - { - "datasource": "${DS_PROMETHEUS}", - "description": "Number of current critical warnings", - "fieldConfig": { - "defaults": { - "custom": {}, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 0 - }, - "id": 10, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "values": false - } - }, - "pluginVersion": "7.0.1", - "targets": [ - { - "expr": "sum(nvme_critical_warning)", - "format": "time_series", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "NVMe Current Critical Warning", - "type": "stat" - }, - { - "datasource": "${DS_PROMETHEUS}", - "description": "Number of devices with nvme_percent_used > 80", - "fieldConfig": { - "defaults": { - "custom": {}, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 0 - }, - "id": 13, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "values": false - } - }, - "pluginVersion": "7.0.1", - "targets": [ - { - "expr": "count(nvme_percent_used > 80) or (1 - absent(nvme_percent_used > 80))", - "format": "time_series", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "NVMe Percentage Used > 80", - "type": "stat" - }, - { - "datasource": "${DS_PROMETHEUS}", - "description": "Number of critical temperature warnings over last 24 hours", - "fieldConfig": { - "defaults": { - "custom": {}, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 0 - }, - "id": 11, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "values": false - } - }, - "pluginVersion": "7.0.1", - "targets": [ - { - "expr": "sum(rate(nvme_critical_comp_time[5m]))", - "format": "time_series", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "NVMe Recent Critical Temp ", - "type": "stat" - }, - { - "datasource": "${DS_PROMETHEUS}", - "description": "Number of media errors over last 24 hours", - "fieldConfig": { - "defaults": { - "custom": {}, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 0 - }, - "id": 12, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "values": false - } - }, - "pluginVersion": "7.0.1", - "targets": [ - { - "expr": "sum(rate(nvme_media_errors[24h]))", - "format": "time_series", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "NVMe Recent Media Error", - "type": "stat" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "description": "Vendor specific estimate of the percentage of life used", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 7 - }, - "hiddenSeries": false, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "nvme_percent_used{job=\"nvme_exporter\"}", - "interval": "", - "legendFormat": "nvme_percent_used{device={{ device }}, instance={{ instance }}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "NVMe Percent Used", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:79", - "decimals": 0, - "format": "percent", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:80", - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "description": "Normalized percentage of remaining spare capacity available and warning threshold", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 7 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "nvme_avail_spare{job=\"nvme_exporter\"}", - "interval": "", - "legendFormat": "nvme_avail_spare{device={{ device }}, instance={{ instance }}}", - "refId": "A" - }, - { - "expr": "nvme_spare_thresh{job=\"nvme_exporter\"}", - "interval": "", - "legendFormat": "nvme_spare_thresh{device={{ device }}, instance={{ instance }}}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "NVMe Available Spare and Spare Threshold", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:79", - "decimals": 0, - "format": "percent", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "$$hashKey": "object:80", - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "description": "Critical warnings for the state of the controller", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 16 - }, - "hiddenSeries": false, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "nvme_critical_warning{job=\"nvme_exporter\"}", - "interval": "", - "legendFormat": "nvme_critical_warning{device={{ device }}, instance={{ instance }}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "NVMe Critical Warnings", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:79", - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:80", - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "description": "Number of unrecovered data integrity errors", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 16 - }, - "hiddenSeries": false, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "nvme_media_errors{job=\"nvme_exporter\"}", - "interval": "", - "legendFormat": "nvme_media_errors{device={{ device }}, instance={{ instance }}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "NVMe Media Errors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:79", - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:80", - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "description": "Temperature in degrees Fahrenheit", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 25 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "nvme_temperature{job=\"nvme_exporter\"}", - "interval": "", - "legendFormat": "nvme_temperature{device={{ device }}, instance={{ instance }}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "NVMe Temperature", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:79", - "decimals": 0, - "format": "fahrenheit", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:80", - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "description": "Amount of time in minutes temperature > warning and critical threshold", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 25 - }, - "hiddenSeries": false, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "nvme_warning_temp_time{job=\"nvme_exporter\"}", - "interval": "", - "legendFormat": "nvme_warning_temp_time{device={{ device }}, instance={{ instance }}}", - "refId": "A" - }, - { - "expr": "nvme_critical_comp_time{job=\"nvme_exporter\"}", - "interval": "", - "legendFormat": "nvme_critical_comp_time{device={{ device }}, instance={{ instance }}}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "NVMe Warning and Critical Temp Time", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:79", - "decimals": 0, - "format": "m", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:80", - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "description": "Lifetime number of error log entries", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 34 - }, - "hiddenSeries": false, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "nvme_num_err_log_entries{job=\"nvme_exporter\"}", - "interval": "", - "legendFormat": "nvme_media_errors{device={{ device }}, instance={{ instance }}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "NVMe Number of Error Log Entries", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:79", - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:80", - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "description": "Critical warnings for the state of endurance groups", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 34 - }, - "hiddenSeries": false, - "id": 15, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "nvme_endurance_grp_critical_warning_summary{job=\"nvme_exporter\"}", - "interval": "", - "legendFormat": "nvme_endurance_grp_critical_warning_summary{device={{ device }}, instance={{ instance }}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "NVMe Endurance Group Critical Warning Summary", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:79", - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:80", - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "schemaVersion": 25, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "NVMe Exporter", - "uid": "lBqX37i7z", - "version": 53 -} \ No newline at end of file diff --git a/resources/grafana/nvme_endurance.png b/resources/grafana/nvme_endurance.png new file mode 100644 index 0000000..79d3d97 Binary files /dev/null and b/resources/grafana/nvme_endurance.png differ diff --git a/resources/grafana/nvme_errors.png b/resources/grafana/nvme_errors.png new file mode 100644 index 0000000..f9b7cb0 Binary files /dev/null and b/resources/grafana/nvme_errors.png differ diff --git a/resources/grafana/nvme_ocp.png b/resources/grafana/nvme_ocp.png new file mode 100644 index 0000000..64b54a1 Binary files /dev/null and b/resources/grafana/nvme_ocp.png differ diff --git a/resources/grafana/nvme_stats.png b/resources/grafana/nvme_stats.png new file mode 100644 index 0000000..79d3d97 Binary files /dev/null and b/resources/grafana/nvme_stats.png differ