-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Kubernetes has a metrics provider interface, add an adapter to be able to get these into our Prometheus registry. This code exists in a similar form inside K8s but against their custom metrics architecture, not plain Prometheus. As these metrics are shared across all workqueues we follow K8s in implementing this with a singleton/global. It's not the prettiest, but otherwise we may get issues with Prometheus and duplicate metrics. Change-Id: I0b6d608d14793e44859166a5a59d446c8f662a25 Reviewed-on: https://review.monogon.dev/c/monogon/+/3829 Reviewed-by: Tim Windelschmidt <[email protected]> Tested-by: Jenkins CI
- Loading branch information
Showing
5 changed files
with
140 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
load("@io_bazel_rules_go//go:def.bzl", "go_library") | ||
|
||
go_library( | ||
name = "metricsprovider", | ||
srcs = ["metricsprovider.go"], | ||
importpath = "source.monogon.dev/metropolis/node/kubernetes/metricsprovider", | ||
visibility = ["//visibility:public"], | ||
deps = [ | ||
"@com_github_prometheus_client_golang//prometheus", | ||
"@io_k8s_client_go//util/workqueue", | ||
], | ||
) |
114 changes: 114 additions & 0 deletions
114
metropolis/node/kubernetes/metricsprovider/metricsprovider.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
// Copyright The Monogon Project Authors. | ||
// Copyright 2019 The Kubernetes Authors. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
// Package metricsprovider provides a Prometheus registry for code in K8s | ||
// client-go capable of providing metrics. Currently it registers itself | ||
// as a metrics backend for workqueues, more can be added in the future. | ||
// The registry with all the metrics is available as `Registry`. | ||
package metricsprovider | ||
|
||
import ( | ||
"github.com/prometheus/client_golang/prometheus" | ||
"k8s.io/client-go/util/workqueue" | ||
) | ||
|
||
// Metrics subsystem and keys used by the workqueue. | ||
const ( | ||
WorkQueueSubsystem = "workqueue" | ||
DepthKey = "depth" | ||
AddsKey = "adds_total" | ||
QueueLatencyKey = "queue_duration_seconds" | ||
WorkDurationKey = "work_duration_seconds" | ||
UnfinishedWorkKey = "unfinished_work_seconds" | ||
LongestRunningProcessorKey = "longest_running_processor_seconds" | ||
RetriesKey = "retries_total" | ||
) | ||
|
||
var Registry = prometheus.NewRegistry() | ||
|
||
var ( | ||
depth = prometheus.NewGaugeVec(prometheus.GaugeOpts{ | ||
Subsystem: WorkQueueSubsystem, | ||
Name: DepthKey, | ||
Help: "Current depth of workqueue", | ||
}, []string{"name"}) | ||
|
||
adds = prometheus.NewCounterVec(prometheus.CounterOpts{ | ||
Subsystem: WorkQueueSubsystem, | ||
Name: AddsKey, | ||
Help: "Total number of adds handled by workqueue", | ||
}, []string{"name"}) | ||
|
||
latency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ | ||
Subsystem: WorkQueueSubsystem, | ||
Name: QueueLatencyKey, | ||
Help: "How long in seconds an item stays in the workqueue before being requested.", | ||
Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), | ||
}, []string{"name"}) | ||
|
||
workDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ | ||
Subsystem: WorkQueueSubsystem, | ||
Name: WorkDurationKey, | ||
Help: "How long in seconds processing an item from workqueue takes.", | ||
Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), | ||
}, []string{"name"}) | ||
|
||
unfinished = prometheus.NewGaugeVec(prometheus.GaugeOpts{ | ||
Subsystem: WorkQueueSubsystem, | ||
Name: UnfinishedWorkKey, | ||
Help: "How many seconds of work has done that " + | ||
"is in progress and hasn't been observed by work_duration. Large " + | ||
"values indicate stuck threads. One can deduce the number of stuck " + | ||
"threads by observing the rate at which this increases.", | ||
}, []string{"name"}) | ||
|
||
longestRunningProcessor = prometheus.NewGaugeVec(prometheus.GaugeOpts{ | ||
Subsystem: WorkQueueSubsystem, | ||
Name: LongestRunningProcessorKey, | ||
Help: "How many seconds has the longest running " + | ||
"processor for workqueue been running.", | ||
}, []string{"name"}) | ||
|
||
retries = prometheus.NewCounterVec(prometheus.CounterOpts{ | ||
Subsystem: WorkQueueSubsystem, | ||
Name: RetriesKey, | ||
Help: "Total number of retries handled by workqueue", | ||
}, []string{"name"}) | ||
) | ||
|
||
func init() { | ||
Registry.MustRegister(depth, adds, latency, workDuration, unfinished, longestRunningProcessor, retries) | ||
workqueue.SetProvider(&promProvider{}) | ||
} | ||
|
||
type promProvider struct { | ||
} | ||
|
||
func (promProvider) NewDepthMetric(name string) workqueue.GaugeMetric { | ||
return depth.WithLabelValues(name) | ||
} | ||
|
||
func (promProvider) NewAddsMetric(name string) workqueue.CounterMetric { | ||
return adds.WithLabelValues(name) | ||
} | ||
|
||
func (promProvider) NewLatencyMetric(name string) workqueue.HistogramMetric { | ||
return latency.WithLabelValues(name) | ||
} | ||
|
||
func (promProvider) NewWorkDurationMetric(name string) workqueue.HistogramMetric { | ||
return workDuration.WithLabelValues(name) | ||
} | ||
|
||
func (promProvider) NewUnfinishedWorkSecondsMetric(name string) workqueue.SettableGaugeMetric { | ||
return unfinished.WithLabelValues(name) | ||
} | ||
|
||
func (promProvider) NewLongestRunningProcessorSecondsMetric(name string) workqueue.SettableGaugeMetric { | ||
return longestRunningProcessor.WithLabelValues(name) | ||
} | ||
|
||
func (promProvider) NewRetriesMetric(name string) workqueue.CounterMetric { | ||
return retries.WithLabelValues(name) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters