Skip to content
This repository was archived by the owner on Jan 10, 2023. It is now read-only.

Commit 2db4fae

Browse files
authored
Merge pull request #41 from Xaelias/master
XDR] Update metrics, add DC specific metrics
2 parents a1998a4 + ff6eb8f commit 2db4fae

File tree

5 files changed

+139
-33
lines changed

5 files changed

+139
-33
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
asprom
22
dist/
3+
.*.swp

main.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,14 @@ import (
2727

2828
const (
2929
namespace = "aerospike"
30+
secondaryIndex = "sindex"
3031
systemNode = "node"
3132
systemNamespace = "ns"
3233
systemLatency = "latency"
3334
systemLatencyHist = "latency_hist" // total number of ops
3435
systemOps = "ops"
3536
systemSet = "set"
36-
secondaryIndex = "sindex"
37+
xdrDC = "xdr"
3738
)
3839

3940
var (
@@ -126,11 +127,12 @@ func newAsCollector(nodeAddr, username, password string) *asCollector {
126127
password: password,
127128
totalScrapes: totalScrapes,
128129
collectors: []collector{
129-
newStatsCollector(),
130-
newNSCollector(),
131130
newLatencyCollector(),
131+
newNSCollector(),
132132
newSetCollector(),
133133
newSindexCollector(),
134+
newStatsCollector(),
135+
newXdrDCCollector(),
134136
},
135137
}
136138
}

namespaces.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,20 @@ var (
160160
counter("udf_sub_udf_complete", "udf sub udf complete"),
161161
counter("udf_sub_udf_error", "udf sub udf error"),
162162
counter("udf_sub_udf_timeout", "udf sub udf timeout"),
163-
counter("xdr_write_error", "xdr write error"),
164-
counter("xdr_write_success", "xdr write success"),
165-
counter("xdr_write_timeout", "xdr write timeout"),
163+
counter("xdr_client_delete_error", "xdr client delete error"),
164+
counter("xdr_client_delete_not_found", "xdr client delete not found"),
165+
counter("xdr_client_delete_success", "xdr client delete success"),
166+
counter("xdr_client_delete_timeout", "xdr client delete timeout"),
167+
counter("xdr_client_write_error", "xdr client write error"),
168+
counter("xdr_client_write_success", "xdr client write success"),
169+
counter("xdr_client_write_timeout", "xdr client write timeout"),
170+
counter("xdr_from_proxy_delete_error", "xdr from proxy delete error"),
171+
counter("xdr_from_proxy_delete_not_found", "xdr from proxy delete not found"),
172+
counter("xdr_from_proxy_delete_success", "xdr from proxy delete success"),
173+
counter("xdr_from_proxy_delete_timeout", "xdr from proxy delete timeout"),
174+
counter("xdr_from_proxy_write_error", "xdr from proxy write error"),
175+
counter("xdr_from_proxy_write_success", "xdr from proxy write success"),
176+
counter("xdr_from_proxy_write_timeout", "xdr from proxy write timeout"),
166177
gauge("available_bin_names", "available bin names"),
167178
gauge("device_available_pct", "device available pct"),
168179
gauge("device_compression_ratio", "device compression ratio"),

stats.go

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -75,41 +75,52 @@ var (
7575
gauge("fabric_meta_recv_rate", "fabric meta recv rate"),
7676
gauge("fabric_rw_send_rate", "fabric rw send rate"),
7777
gauge("fabric_rw_recv_rate", "fabric rw recv rate"),
78-
counter("xdr_ship_success", "xdr ship success"),
79-
counter("xdr_ship_delete_success", "xdr ship delete success"),
80-
counter("xdr_ship_source_error", "xdr ship source error"),
81-
counter("xdr_ship_destination_error", "xdr ship destination error"),
82-
gauge("xdr_ship_bytes", "xdr ship bytes"),
83-
gauge("xdr_ship_latency_avg", "xdr ship latency avg"),
84-
gauge("xdr_ship_compression_avg_pct", "xdr ship compression avg pct"),
85-
gauge("xdr_ship_inflight_objects", "xdr ship inflight objects"),
86-
gauge("xdr_ship_outstanding_objects", "xdr ship outstanding objects"),
87-
counter("xdr_read_success", "xdr read success"),
88-
counter("xdr_read_error", "xdr read error"),
89-
gauge("xdr_read_notfound", "xdr read notfound"),
90-
gauge("xdr_read_latency_avg", "xdr read latency avg"),
78+
// XDR specific metrics
79+
// requires Aerospike EE
80+
gauge("dlog_free_pct", "dlog free pct"),
81+
counter("dlog_logged", "dlog logged"),
82+
counter("dlog_overwritten_error", "dlog overwritten error"),
83+
counter("dlog_processed_link_down", "dlog processed link down"),
84+
counter("dlog_processed_main", "dlog processed main"),
85+
counter("dlog_processed_replica", "dlog processed replica"),
86+
counter("dlog_relogged", "dlog relogged"),
87+
gauge("dlog_used_objects", "dlog used objects"),
88+
counter("local_recs_migration_retry", "Number of records missing in a batch call"),
89+
counter("stat_pipe_reads_diginfo", "Number of digest information read from the named pipe."),
90+
gauge("xdr_active_failed_node_sessions", "Number of active failed node sessions pending."),
91+
gauge("xdr_active_link_down_sessions", "Number of active link down sessions pending."),
92+
gauge("xdr_global_lastshiptime", "The minimum last ship time in millisecond (epoch) for XDR for across the cluster."),
93+
counter("xdr_hotkey_fetch", "xdr hotkey fetch"),
94+
counter("xdr_hotkey_skip", "xdr hotkey skip"),
95+
counter("xdr_queue_overflow_error", "xdr queue overflow error"),
9196
gauge("xdr_read_active_avg_pct", "xdr read active avg pct"),
97+
counter("xdr_read_error", "xdr read error"),
9298
gauge("xdr_read_idle_avg_pct", "xdr read idle avg pct"),
99+
gauge("xdr_read_latency_avg", "xdr read latency avg"),
100+
counter("xdr_read_notfound", "xdr read notfound"),
93101
gauge("xdr_read_reqq_used", "xdr read reqq used"),
94102
gauge("xdr_read_reqq_used_pct", "xdr read reqq used pct"),
95103
gauge("xdr_read_respq_used", "xdr read respq used"),
104+
counter("xdr_read_success", "xdr read success"),
96105
gauge("xdr_read_txnq_used", "xdr read txnq used"),
97106
gauge("xdr_read_txnq_used_pct", "xdr read txnq used pct"),
98-
gauge("xdr_queue_overflow_error", "xdr queue overflow error"),
99-
gauge("xdr_hotkey_fetch", "xdr hotkey fetch"),
100-
gauge("xdr_hotkey_skip", "xdr hotkey skip"),
101-
counter("xdr_unknown_namespace_error", "xdr unknown namespace error"),
102-
counter("xdr_uninitialized_destination_error", "xdr uninitialized destination error"),
103-
gauge("xdr_timelag", "xdr timelag"),
107+
counter("xdr_relogged_incoming", "Number of records relogged into this node's digest log by another node."),
108+
counter("xdr_relogged_outgoing", "Number of records relogged to another node's digest log. "),
109+
counter("xdr_ship_bytes", "xdr ship bytes"),
110+
gauge("xdr_ship_compression_avg_pct", "xdr ship compression avg pct"),
111+
counter("xdr_ship_delete_success", "xdr ship delete success"),
112+
counter("xdr_ship_destination_error", "xdr ship destination error"),
113+
counter("xdr_ship_destination_permanent_error", "xdr ship destination permanent error"),
114+
gauge("xdr_ship_fullrecord", "Number of records that did not take advantage of bin level shipping."),
115+
gauge("xdr_ship_inflight_objects", "xdr ship inflight objects"),
116+
gauge("xdr_ship_latency_avg", "xdr ship latency avg"),
117+
gauge("xdr_ship_outstanding_objects", "xdr ship outstanding objects"),
118+
counter("xdr_ship_source_error", "xdr ship source error"),
119+
counter("xdr_ship_success", "xdr ship success"),
104120
gauge("xdr_throughput", "xdr throughput"),
105-
gauge("dlog_free_pct", "dlog free pct"),
106-
counter("dlog_logged", "dlog logged"),
107-
counter("dlog_overwritten_error", "dlog overwritten error"),
108-
counter("dlog_processed_link_down", "dlog processed link down"),
109-
counter("dlog_processed_main", "dlog processed main"),
110-
counter("dlog_processed_replica", "dlog processed replica"),
111-
counter("dlog_relogged", "dlog relogged"),
112-
counter("dlog_used_objects", "dlog used objects"),
121+
gauge("xdr_timelag", "xdr timelag"),
122+
counter("xdr_uninitialized_destination_error", "xdr uninitialized destination error"),
123+
counter("xdr_unknown_namespace_error", "xdr unknown namespace error"),
113124
}
114125
)
115126

xdr.go

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package main
2+
3+
import (
4+
"strings"
5+
6+
as "github.com/aerospike/aerospike-client-go"
7+
"github.com/prometheus/client_golang/prometheus"
8+
)
9+
10+
var (
11+
// DCMetrics lists the keys we report from aero's dc statistics command.
12+
// See `asinfo -l -v dcs` for a list of XDR DCs.
13+
// See `asinfo -l -v dc/<dc>` for detailed metrics for a given DC.
14+
DCMetrics = []metric{
15+
gauge("dc_as_open_conn", "Number of open connection to the Aerospike DC."),
16+
gauge("dc_as_size", "The cluster size of the destination Aerospike DC."),
17+
gauge("dc_http_good_locations", "Number of URLs that are considered healthy."),
18+
gauge("dc_http_locations", "Number of URLs configured for the HTTP destination."),
19+
counter("dc_ship_attempt", "Number of records that have been attempted to be shipped."),
20+
counter("dc_ship_bytes", "Number of bytes shipped for this DC."),
21+
counter("dc_ship_delete_success", "Number of delete transactions that have been successfully shipped."),
22+
counter("dc_ship_destination_error", "Number of errors from the remote cluster(s) while shipping records for this DC."),
23+
gauge("dc_ship_idle_avg", "Average number of ms of sleep for each record being shipped."),
24+
gauge("dc_ship_idle_avg_pct", "Representation in percent of total time spent for dc_ship_idle_avg."),
25+
gauge("dc_ship_inflight_objects", "Number of records that are inflight."),
26+
gauge("dc_ship_latency_avg", "Moving average of shipping latency for the specific DC."),
27+
counter("dc_ship_source_error", "Number of client layer errors while shipping records for this DC."),
28+
counter("dc_ship_success", "Number of records that have been successfully shipped."),
29+
// dc_state https://www.aerospike.com/docs/reference/metrics/?show-removed=0#dc_state
30+
gauge("dc_timelag", "Time lag for this specific DC."),
31+
}
32+
)
33+
34+
type XdrDCCollector cmetrics
35+
36+
func newXdrDCCollector() XdrDCCollector {
37+
dc := map[string]cmetric {}
38+
for _, m := range DCMetrics {
39+
dc[m.aeroName] = cmetric{
40+
typ: m.typ,
41+
desc: prometheus.NewDesc(
42+
promkey(xdrDC, m.aeroName),
43+
m.desc,
44+
[]string{"dc"},
45+
nil,
46+
),
47+
}
48+
}
49+
return dc
50+
}
51+
52+
func (dcc XdrDCCollector) describe(ch chan<- *prometheus.Desc) {
53+
for _, s := range dcc {
54+
ch <- s.desc
55+
}
56+
}
57+
58+
func (sic XdrDCCollector) collect(conn *as.Connection) ([]prometheus.Metric, error) {
59+
info, err := as.RequestInfo(conn, "dcs")
60+
if err != nil {
61+
return nil, err
62+
}
63+
64+
var metrics []prometheus.Metric
65+
for _, dc := range strings.Split(info["dcs"], ";") {
66+
dcInfo, err := as.RequestInfo(conn, "dc/"+dc)
67+
if err != nil {
68+
return nil, err
69+
}
70+
71+
metrics = append(
72+
metrics,
73+
infoCollect(
74+
cmetrics(sic),
75+
dcInfo["dc/"+dc],
76+
dc,
77+
)...,
78+
)
79+
}
80+
return metrics, nil
81+
}

0 commit comments

Comments
 (0)