You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: metrics/actor.go
+44Lines changed: 44 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -235,6 +235,17 @@ func (a *Actor) initializeErgoMetrics() error {
235
235
registerInternalGaugeVec(cm, a.registry, "ergo_remote_messages_out_total", "Total number of messages sent to remote node", nodeLabels, []string{"remote_node"})
236
236
registerInternalGaugeVec(cm, a.registry, "ergo_remote_bytes_in_total", "Total number of bytes received from remote node", nodeLabels, []string{"remote_node"})
237
237
registerInternalGaugeVec(cm, a.registry, "ergo_remote_bytes_out_total", "Total number of bytes sent to remote node", nodeLabels, []string{"remote_node"})
238
+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_fragments_sent_total", "Total fragments sent to remote node", nodeLabels, []string{"remote_node"})
239
+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_fragment_messages_sent_total", "Total fragmented messages sent to remote node", nodeLabels, []string{"remote_node"})
240
+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_fragments_received_total", "Total fragments received from remote node", nodeLabels, []string{"remote_node"})
registerInternalGaugeVec(cm, a.registry, "ergo_remote_fragment_timeouts_total", "Total fragment assembly timeouts for remote node", nodeLabels, []string{"remote_node"})
243
+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_compressed_sent_total", "Total compressed messages sent to remote node", nodeLabels, []string{"remote_node"})
244
+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_compressed_bytes_sent_total", "Total bytes after compression sent to remote node", nodeLabels, []string{"remote_node"})
245
+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_compressed_orig_bytes_sent_total", "Total bytes before compression sent to remote node", nodeLabels, []string{"remote_node"})
246
+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_decompressed_recv_total", "Total decompressed messages received from remote node", nodeLabels, []string{"remote_node"})
247
+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_decompressed_bytes_recv_total", "Total bytes before decompression from remote node", nodeLabels, []string{"remote_node"})
248
+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_decompressed_orig_recv_total", "Total bytes after decompression from remote node", nodeLabels, []string{"remote_node"})
"description": "Compression effectiveness across the cluster. Ratio shows how much data is reduced (higher = better). Percentage shows what fraction of messages are compressed. A low percentage with high ratio means only large messages are compressed (expected). See https://docs.ergo.services/networking/network-stack for details."
3558
+
},
3559
+
{
3560
+
"id": 123,
3561
+
"title": "Compression Savings per Node",
3562
+
"type": "timeseries",
3563
+
"gridPos": {
3564
+
"h": 8,
3565
+
"w": 12,
3566
+
"x": 12,
3567
+
"y": 70
3568
+
},
3569
+
"targets": [
3570
+
{
3571
+
"expr": "sum by (node) (rate(ergo_remote_compressed_orig_bytes_sent_total{node=~\"$node\"}[1m]) - rate(ergo_remote_compressed_bytes_sent_total{node=~\"$node\"}[1m]))",
3572
+
"legendFormat": "{{node}} saved"
3573
+
}
3574
+
],
3575
+
"fieldConfig": {
3576
+
"defaults": {
3577
+
"unit": "Bps",
3578
+
"custom": {
3579
+
"lineInterpolation": "smooth",
3580
+
"lineWidth": 1
3581
+
}
3582
+
}
3583
+
},
3584
+
"options": {
3585
+
"legend": {
3586
+
"displayMode": "table",
3587
+
"placement": "right"
3588
+
}
3589
+
},
3590
+
"description": "Bytes per second saved by compression on each node. Shows which nodes benefit most from compression in absolute terms. Zero means compression is disabled or messages are below the compression threshold."
"description": "Cluster-wide fragment rate. Shows how many messages are being fragmented and reassembled. Assembly timeouts indicate incomplete deliveries (sender crash, connection drop). See https://docs.ergo.services/networking/network-stack#message-fragmentation for details."
3640
+
},
3641
+
{
3642
+
"id": 121,
3643
+
"title": "Fragmentation per Node",
3644
+
"type": "timeseries",
3645
+
"gridPos": {
3646
+
"h": 8,
3647
+
"w": 12,
3648
+
"x": 12,
3649
+
"y": 78
3650
+
},
3651
+
"targets": [
3652
+
{
3653
+
"expr": "sum by (node) (rate(ergo_remote_fragment_messages_sent_total{node=~\"$node\"}[1m]))",
3654
+
"legendFormat": "{{node}} fragmented (sent)"
3655
+
},
3656
+
{
3657
+
"expr": "sum by (node) (rate(ergo_remote_fragment_messages_received_total{node=~\"$node\"}[1m]))",
3658
+
"legendFormat": "{{node}} reassembled"
3659
+
},
3660
+
{
3661
+
"expr": "sum by (node) (rate(ergo_remote_fragment_timeouts_total{node=~\"$node\"}[1m]))",
3662
+
"legendFormat": "{{node}} timeouts"
3663
+
}
3664
+
],
3665
+
"fieldConfig": {
3666
+
"defaults": {
3667
+
"unit": "ops",
3668
+
"custom": {
3669
+
"lineInterpolation": "smooth",
3670
+
"lineWidth": 1
3671
+
}
3672
+
}
3673
+
},
3674
+
"options": {
3675
+
"legend": {
3676
+
"displayMode": "table",
3677
+
"placement": "right"
3678
+
}
3679
+
},
3680
+
"description": "Per-node fragmentation activity. Shows which nodes are sending or receiving fragmented messages. Timeouts on a specific node may indicate that node is losing connections mid-transfer."
0 commit comments