Skip to content

Commit 43b5243

Browse files
committed
update ergo deps
1 parent 301e3b3 commit 43b5243

6 files changed

Lines changed: 239 additions & 9 deletions

File tree

health/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@ module ergo.services/actor/health
22

33
go 1.20
44

5-
require ergo.services/ergo v1.999.321-0.20260305211829-909f6f11d916
5+
require ergo.services/ergo v1.999.321-0.20260310130400-c2d8d8287c0a

health/go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
ergo.services/ergo v1.999.321-0.20260305211829-909f6f11d916 h1:RjQnfp7dsOeyIQvb7zNAMj7+0RbMuXUF1oPil9+Lmrg=
2-
ergo.services/ergo v1.999.321-0.20260305211829-909f6f11d916/go.mod h1:bLQ6PoO6Mz/8gVuzvPv3xfMfo1P9w6rZV1WnMXMeMdg=
1+
ergo.services/ergo v1.999.321-0.20260310130400-c2d8d8287c0a h1:aMo3LDtVMJQ1/cAoKOf34vd+cl74BpIiMKUimntlrTc=
2+
ergo.services/ergo v1.999.321-0.20260310130400-c2d8d8287c0a/go.mod h1:bLQ6PoO6Mz/8gVuzvPv3xfMfo1P9w6rZV1WnMXMeMdg=

metrics/actor.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,17 @@ func (a *Actor) initializeErgoMetrics() error {
235235
registerInternalGaugeVec(cm, a.registry, "ergo_remote_messages_out_total", "Total number of messages sent to remote node", nodeLabels, []string{"remote_node"})
236236
registerInternalGaugeVec(cm, a.registry, "ergo_remote_bytes_in_total", "Total number of bytes received from remote node", nodeLabels, []string{"remote_node"})
237237
registerInternalGaugeVec(cm, a.registry, "ergo_remote_bytes_out_total", "Total number of bytes sent to remote node", nodeLabels, []string{"remote_node"})
238+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_fragments_sent_total", "Total fragments sent to remote node", nodeLabels, []string{"remote_node"})
239+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_fragment_messages_sent_total", "Total fragmented messages sent to remote node", nodeLabels, []string{"remote_node"})
240+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_fragments_received_total", "Total fragments received from remote node", nodeLabels, []string{"remote_node"})
241+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_fragment_messages_received_total", "Total fragmented messages reassembled from remote node", nodeLabels, []string{"remote_node"})
242+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_fragment_timeouts_total", "Total fragment assembly timeouts for remote node", nodeLabels, []string{"remote_node"})
243+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_compressed_sent_total", "Total compressed messages sent to remote node", nodeLabels, []string{"remote_node"})
244+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_compressed_bytes_sent_total", "Total bytes after compression sent to remote node", nodeLabels, []string{"remote_node"})
245+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_compressed_orig_bytes_sent_total", "Total bytes before compression sent to remote node", nodeLabels, []string{"remote_node"})
246+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_decompressed_recv_total", "Total decompressed messages received from remote node", nodeLabels, []string{"remote_node"})
247+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_decompressed_bytes_recv_total", "Total bytes before decompression from remote node", nodeLabels, []string{"remote_node"})
248+
registerInternalGaugeVec(cm, a.registry, "ergo_remote_decompressed_orig_recv_total", "Total bytes after decompression from remote node", nodeLabels, []string{"remote_node"})
238249

239250
// Network health metrics
240251
registerInternalGauge(cm, a.registry, "ergo_connections_established_total", "Cumulative connections established", nodeLabels)
@@ -369,13 +380,35 @@ func (a *Actor) collectBaseMetrics() error {
369380
remoteMsgOut := gaugeVecFromMap(cm, "ergo_remote_messages_out_total")
370381
remoteBytIn := gaugeVecFromMap(cm, "ergo_remote_bytes_in_total")
371382
remoteBytOut := gaugeVecFromMap(cm, "ergo_remote_bytes_out_total")
383+
remoteFragSent := gaugeVecFromMap(cm, "ergo_remote_fragments_sent_total")
384+
remoteFragMsgSent := gaugeVecFromMap(cm, "ergo_remote_fragment_messages_sent_total")
385+
remoteFragRecv := gaugeVecFromMap(cm, "ergo_remote_fragments_received_total")
386+
remoteFragMsgRecv := gaugeVecFromMap(cm, "ergo_remote_fragment_messages_received_total")
387+
remoteFragTimeouts := gaugeVecFromMap(cm, "ergo_remote_fragment_timeouts_total")
388+
remoteCompSent := gaugeVecFromMap(cm, "ergo_remote_compressed_sent_total")
389+
remoteCompBytesSent := gaugeVecFromMap(cm, "ergo_remote_compressed_bytes_sent_total")
390+
remoteCompOrigSent := gaugeVecFromMap(cm, "ergo_remote_compressed_orig_bytes_sent_total")
391+
remoteDecompRecv := gaugeVecFromMap(cm, "ergo_remote_decompressed_recv_total")
392+
remoteDecompBytesRecv := gaugeVecFromMap(cm, "ergo_remote_decompressed_bytes_recv_total")
393+
remoteDecompOrigRecv := gaugeVecFromMap(cm, "ergo_remote_decompressed_orig_recv_total")
372394

373395
// Reset remote node metrics before updating
374396
remoteUptime.Reset()
375397
remoteMsgIn.Reset()
376398
remoteMsgOut.Reset()
377399
remoteBytIn.Reset()
378400
remoteBytOut.Reset()
401+
remoteFragSent.Reset()
402+
remoteFragMsgSent.Reset()
403+
remoteFragRecv.Reset()
404+
remoteFragMsgRecv.Reset()
405+
remoteFragTimeouts.Reset()
406+
remoteCompSent.Reset()
407+
remoteCompBytesSent.Reset()
408+
remoteCompOrigSent.Reset()
409+
remoteDecompRecv.Reset()
410+
remoteDecompBytesRecv.Reset()
411+
remoteDecompOrigRecv.Reset()
379412

380413
// Update per-node metrics
381414
for _, nodeName := range connectedNodes {
@@ -392,6 +425,17 @@ func (a *Actor) collectBaseMetrics() error {
392425
remoteMsgOut.WithLabelValues(nodeNameStr).Set(float64(remoteInfo.MessagesOut))
393426
remoteBytIn.WithLabelValues(nodeNameStr).Set(float64(remoteInfo.BytesIn))
394427
remoteBytOut.WithLabelValues(nodeNameStr).Set(float64(remoteInfo.BytesOut))
428+
remoteFragSent.WithLabelValues(nodeNameStr).Set(float64(remoteInfo.FragmentsSent))
429+
remoteFragMsgSent.WithLabelValues(nodeNameStr).Set(float64(remoteInfo.FragmentMessagesSent))
430+
remoteFragRecv.WithLabelValues(nodeNameStr).Set(float64(remoteInfo.FragmentsReceived))
431+
remoteFragMsgRecv.WithLabelValues(nodeNameStr).Set(float64(remoteInfo.FragmentMessagesRecv))
432+
remoteFragTimeouts.WithLabelValues(nodeNameStr).Set(float64(remoteInfo.FragmentTimeouts))
433+
remoteCompSent.WithLabelValues(nodeNameStr).Set(float64(remoteInfo.CompressedSent))
434+
remoteCompBytesSent.WithLabelValues(nodeNameStr).Set(float64(remoteInfo.CompressedBytesSent))
435+
remoteCompOrigSent.WithLabelValues(nodeNameStr).Set(float64(remoteInfo.CompressedOrigBytesSent))
436+
remoteDecompRecv.WithLabelValues(nodeNameStr).Set(float64(remoteInfo.DecompressedRecv))
437+
remoteDecompBytesRecv.WithLabelValues(nodeNameStr).Set(float64(remoteInfo.DecompressedBytesRecv))
438+
remoteDecompOrigRecv.WithLabelValues(nodeNameStr).Set(float64(remoteInfo.DecompressedOrigRecv))
395439
}
396440

397441
// Collect per-process metrics in a single pass

metrics/ergo-cluster.json

Lines changed: 189 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3493,6 +3493,192 @@
34933493
},
34943494
"description": "Byte rate between each pair of connected nodes. Identifies which specific node-to-node link is saturated or carrying the most data."
34953495
},
3496+
{
3497+
"id": 122,
3498+
"title": "Compression Overview",
3499+
"type": "timeseries",
3500+
"gridPos": {
3501+
"h": 8,
3502+
"w": 12,
3503+
"x": 0,
3504+
"y": 70
3505+
},
3506+
"targets": [
3507+
{
3508+
"expr": "sum(rate(ergo_remote_compressed_orig_bytes_sent_total{node=~\"$node\"}[1m])) / clamp_min(sum(rate(ergo_remote_compressed_bytes_sent_total{node=~\"$node\"}[1m])), 1)",
3509+
"legendFormat": "Compression Ratio"
3510+
},
3511+
{
3512+
"expr": "sum(rate(ergo_remote_compressed_sent_total{node=~\"$node\"}[1m]))",
3513+
"legendFormat": "Compressed msg/s"
3514+
},
3515+
{
3516+
"expr": "sum(rate(ergo_remote_compressed_sent_total{node=~\"$node\"}[1m])) / clamp_min(sum(rate(ergo_remote_messages_out_total{node=~\"$node\"}[1m])), 1) * 100",
3517+
"legendFormat": "% Messages Compressed"
3518+
}
3519+
],
3520+
"fieldConfig": {
3521+
"defaults": {
3522+
"custom": {
3523+
"lineInterpolation": "smooth",
3524+
"lineWidth": 1
3525+
}
3526+
},
3527+
"overrides": [
3528+
{
3529+
"matcher": { "id": "byName", "options": "Compression Ratio" },
3530+
"properties": [
3531+
{ "id": "unit", "value": "x" },
3532+
{ "id": "custom.axisPlacement", "value": "left" }
3533+
]
3534+
},
3535+
{
3536+
"matcher": { "id": "byName", "options": "Compressed msg/s" },
3537+
"properties": [
3538+
{ "id": "unit", "value": "ops" },
3539+
{ "id": "custom.axisPlacement", "value": "right" }
3540+
]
3541+
},
3542+
{
3543+
"matcher": { "id": "byName", "options": "% Messages Compressed" },
3544+
"properties": [
3545+
{ "id": "unit", "value": "percent" },
3546+
{ "id": "custom.axisPlacement", "value": "right" }
3547+
]
3548+
}
3549+
]
3550+
},
3551+
"options": {
3552+
"legend": {
3553+
"displayMode": "table",
3554+
"placement": "right"
3555+
}
3556+
},
3557+
"description": "Compression effectiveness across the cluster. Ratio shows how much data is reduced (higher = better). Percentage shows what fraction of messages are compressed. A low percentage with high ratio means only large messages are compressed (expected). See https://docs.ergo.services/networking/network-stack for details."
3558+
},
3559+
{
3560+
"id": 123,
3561+
"title": "Compression Savings per Node",
3562+
"type": "timeseries",
3563+
"gridPos": {
3564+
"h": 8,
3565+
"w": 12,
3566+
"x": 12,
3567+
"y": 70
3568+
},
3569+
"targets": [
3570+
{
3571+
"expr": "sum by (node) (rate(ergo_remote_compressed_orig_bytes_sent_total{node=~\"$node\"}[1m]) - rate(ergo_remote_compressed_bytes_sent_total{node=~\"$node\"}[1m]))",
3572+
"legendFormat": "{{node}} saved"
3573+
}
3574+
],
3575+
"fieldConfig": {
3576+
"defaults": {
3577+
"unit": "Bps",
3578+
"custom": {
3579+
"lineInterpolation": "smooth",
3580+
"lineWidth": 1
3581+
}
3582+
}
3583+
},
3584+
"options": {
3585+
"legend": {
3586+
"displayMode": "table",
3587+
"placement": "right"
3588+
}
3589+
},
3590+
"description": "Bytes per second saved by compression on each node. Shows which nodes benefit most from compression in absolute terms. Zero means compression is disabled or messages are below the compression threshold."
3591+
},
3592+
{
3593+
"id": 120,
3594+
"title": "Fragmentation (Cluster Total)",
3595+
"type": "timeseries",
3596+
"gridPos": {
3597+
"h": 8,
3598+
"w": 12,
3599+
"x": 0,
3600+
"y": 78
3601+
},
3602+
"targets": [
3603+
{
3604+
"expr": "sum(rate(ergo_remote_fragments_sent_total{node=~\"$node\"}[1m]))",
3605+
"legendFormat": "Fragments Sent"
3606+
},
3607+
{
3608+
"expr": "sum(rate(ergo_remote_fragments_received_total{node=~\"$node\"}[1m]))",
3609+
"legendFormat": "Fragments Received"
3610+
},
3611+
{
3612+
"expr": "sum(rate(ergo_remote_fragment_messages_sent_total{node=~\"$node\"}[1m]))",
3613+
"legendFormat": "Messages Fragmented (sent)"
3614+
},
3615+
{
3616+
"expr": "sum(rate(ergo_remote_fragment_messages_received_total{node=~\"$node\"}[1m]))",
3617+
"legendFormat": "Messages Reassembled"
3618+
},
3619+
{
3620+
"expr": "sum(rate(ergo_remote_fragment_timeouts_total{node=~\"$node\"}[1m]))",
3621+
"legendFormat": "Assembly Timeouts"
3622+
}
3623+
],
3624+
"fieldConfig": {
3625+
"defaults": {
3626+
"unit": "ops",
3627+
"custom": {
3628+
"lineInterpolation": "smooth",
3629+
"lineWidth": 1
3630+
}
3631+
}
3632+
},
3633+
"options": {
3634+
"legend": {
3635+
"displayMode": "table",
3636+
"placement": "right"
3637+
}
3638+
},
3639+
"description": "Cluster-wide fragment rate. Shows how many messages are being fragmented and reassembled. Assembly timeouts indicate incomplete deliveries (sender crash, connection drop). See https://docs.ergo.services/networking/network-stack#message-fragmentation for details."
3640+
},
3641+
{
3642+
"id": 121,
3643+
"title": "Fragmentation per Node",
3644+
"type": "timeseries",
3645+
"gridPos": {
3646+
"h": 8,
3647+
"w": 12,
3648+
"x": 12,
3649+
"y": 78
3650+
},
3651+
"targets": [
3652+
{
3653+
"expr": "sum by (node) (rate(ergo_remote_fragment_messages_sent_total{node=~\"$node\"}[1m]))",
3654+
"legendFormat": "{{node}} fragmented (sent)"
3655+
},
3656+
{
3657+
"expr": "sum by (node) (rate(ergo_remote_fragment_messages_received_total{node=~\"$node\"}[1m]))",
3658+
"legendFormat": "{{node}} reassembled"
3659+
},
3660+
{
3661+
"expr": "sum by (node) (rate(ergo_remote_fragment_timeouts_total{node=~\"$node\"}[1m]))",
3662+
"legendFormat": "{{node}} timeouts"
3663+
}
3664+
],
3665+
"fieldConfig": {
3666+
"defaults": {
3667+
"unit": "ops",
3668+
"custom": {
3669+
"lineInterpolation": "smooth",
3670+
"lineWidth": 1
3671+
}
3672+
}
3673+
},
3674+
"options": {
3675+
"legend": {
3676+
"displayMode": "table",
3677+
"placement": "right"
3678+
}
3679+
},
3680+
"description": "Per-node fragmentation activity. Shows which nodes are sending or receiving fragmented messages. Timeouts on a specific node may indicate that node is losing connections mid-transfer."
3681+
},
34963682
{
34973683
"id": 119,
34983684
"title": "Connectivity Strength",
@@ -3502,7 +3688,7 @@
35023688
"h": 8,
35033689
"w": 2,
35043690
"x": 0,
3505-
"y": 70
3691+
"y": 86
35063692
},
35073693
"targets": [
35083694
{
@@ -3560,7 +3746,7 @@
35603746
"h": 8,
35613747
"w": 10,
35623748
"x": 2,
3563-
"y": 70
3749+
"y": 86
35643750
},
35653751
"targets": [
35663752
{
@@ -3619,7 +3805,7 @@
36193805
"h": 8,
36203806
"w": 12,
36213807
"x": 12,
3622-
"y": 70
3808+
"y": 86
36233809
},
36243810
"targets": [
36253811
{

metrics/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ module ergo.services/actor/metrics
33
go 1.20
44

55
require (
6-
ergo.services/ergo v1.999.321-0.20260305211829-909f6f11d916
6+
ergo.services/ergo v1.999.321-0.20260310130400-c2d8d8287c0a
77
github.com/prometheus/client_golang v1.20.5
88
)
99

metrics/go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
ergo.services/ergo v1.999.321-0.20260305211829-909f6f11d916 h1:RjQnfp7dsOeyIQvb7zNAMj7+0RbMuXUF1oPil9+Lmrg=
2-
ergo.services/ergo v1.999.321-0.20260305211829-909f6f11d916/go.mod h1:bLQ6PoO6Mz/8gVuzvPv3xfMfo1P9w6rZV1WnMXMeMdg=
1+
ergo.services/ergo v1.999.321-0.20260310130400-c2d8d8287c0a h1:aMo3LDtVMJQ1/cAoKOf34vd+cl74BpIiMKUimntlrTc=
2+
ergo.services/ergo v1.999.321-0.20260310130400-c2d8d8287c0a/go.mod h1:bLQ6PoO6Mz/8gVuzvPv3xfMfo1P9w6rZV1WnMXMeMdg=
33
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
44
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
55
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=

0 commit comments

Comments
 (0)