Skip to content

Monitoring API: Add Metric server config #2322

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,149 @@ tests:
request: "500m"
limit: "200m"
expectedError: 'spec.alertmanagerConfig.customConfig.resources[0]: Invalid value: "object": limit must be greater than or equal to request'
- name: Should be able to create a minimal MetricsServerConfig
initial: |
apiVersion: config.openshift.io/v1alpha1
kind: ClusterMonitoring
spec:
metricsServerConfig:
verbosity: 1
expected: |
apiVersion: config.openshift.io/v1alpha1
kind: ClusterMonitoring
spec:
metricsServerConfig:
verbosity: 1
- name: Should accept MetricsServerConfig with comprehensive ContainerResource array
initial: |
apiVersion: config.openshift.io/v1alpha1
kind: ClusterMonitoring
spec:
userDefined:
mode: "Disabled"
metricsServerConfig:
resources:
- name: "cpu"
request: "100m"
limit: "500m"
- name: "memory"
request: "128Mi"
limit: "512Mi"
- name: "ephemeral-storage"
request: "1Gi"
limit: "2Gi"
expected: |
apiVersion: config.openshift.io/v1alpha1
kind: ClusterMonitoring
spec:
userDefined:
mode: "Disabled"
metricsServerConfig:
verbosity: 0
resources:
- name: "cpu"
request: "100m"
limit: "500m"
- name: "memory"
request: "128Mi"
limit: "512Mi"
- name: "ephemeral-storage"
request: "1Gi"
limit: "2Gi"
- name: Should accept MetricsServerConfig with only requests
initial: |
apiVersion: config.openshift.io/v1alpha1
kind: ClusterMonitoring
spec:
userDefined:
mode: "Disabled"
metricsServerConfig:
resources:
- name: "cpu"
request: "200m"
- name: "memory"
request: "256Mi"
expected: |
apiVersion: config.openshift.io/v1alpha1
kind: ClusterMonitoring
spec:
userDefined:
mode: "Disabled"
metricsServerConfig:
verbosity: 0
resources:
- name: "cpu"
request: "200m"
- name: "memory"
request: "256Mi"
- name: Should accept MetricsServerConfig with only limits
initial: |
apiVersion: config.openshift.io/v1alpha1
kind: ClusterMonitoring
spec:
userDefined:
mode: "Disabled"
metricsServerConfig:
resources:
- name: "cpu"
limit: "1"
- name: "memory"
limit: "1Gi"
expected: |
apiVersion: config.openshift.io/v1alpha1
kind: ClusterMonitoring
spec:
userDefined:
mode: "Disabled"
metricsServerConfig:
verbosity: 0
resources:
- name: "cpu"
limit: "1"
- name: "memory"
limit: "1Gi"
- name: Should reject MetricsServerConfig with limit less than request
initial: |
apiVersion: config.openshift.io/v1alpha1
kind: ClusterMonitoring
spec:
userDefined:
mode: "Disabled"
metricsServerConfig:
resources:
- name: "cpu"
request: "500m"
limit: "200m"
expectedError: 'spec.metricsServerConfig.resources[0]: Invalid value: "object": limit must be greater than or equal to request'
- name: Should reject MetricsServerConfig with more than 10 resource items
initial: |
apiVersion: config.openshift.io/v1alpha1
kind: ClusterMonitoring
spec:
userDefined:
mode: "Disabled"
metricsServerConfig:
resources:
- name: "cpu"
request: "100m"
- name: "memory"
request: "64Mi"
- name: "hugepages-2Mi"
request: "32Mi"
- name: "hugepages-1Gi"
request: "1Gi"
- name: "ephemeral-storage"
request: "1Gi"
- name: "nvidia.com/gpu"
request: "1"
- name: "example.com/foo"
request: "1"
- name: "example.com/bar"
request: "1"
- name: "example.com/baz"
request: "1"
- name: "example.com/qux"
request: "1"
- name: "example.com/quux"
request: "1"
expectedError: 'spec.metricsServerConfig.resources: Too many: 11: must have at most 10 items'
119 changes: 119 additions & 0 deletions config/v1alpha1/types_cluster_monitoring.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ type ClusterMonitoringSpec struct {
// The current default value is `DefaultConfig`.
// +optional
AlertmanagerConfig *AlertmanagerConfig `json:"alertmanagerConfig,omitempty"`
// metricsServerConfig metricsServerConfig defines the configuration for the Kubernetes Metrics Server.
// metricsServerConfig is optional.
// When omitted, this means no opinion and the platform is left to choose a reasonable default, that is subject to change over time.
// +optional
MetricsServerConfig *MetricsServerConfig `json:"metricsServerConfig,omitempty"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the metrics server always deployed or is this something users can opt out of deploying like Alertmanager?

}

// UserDefinedMonitoring config for user-defined projects.
Expand Down Expand Up @@ -322,3 +327,117 @@ type ContainerResource struct {
// +kubebuilder:validation:XValidation:rule="!format.dns1123Subdomain().validate(self).hasValue()",message="a lowercase RFC 1123 subdomain must consist of lower case alphanumeric characters, '-' or '.', and must start and end with an alphanumeric character."
// +kubebuilder:validation:MaxLength=63
type SecretName string

// MetricsServerConfig provides configuration options for the Metrics Server instance
// that runs in the `openshift-monitoring` namespace. Use this configuration to control
// how the Metrics Server instance is deployed, how it logs, and how its pods are scheduled.
// +kubebuilder:validation:MinProperties=1
type MetricsServerConfig struct {
// audit defines the audit configuration used by the Metrics Server instance.
// audit is optional.
// When omitted, this means no opinion and the platform is left to choose a reasonable default, that is subject to change over time.
// The current default value is `metadata`.
// +optional
Audit *Audit `json:"audit,omitempty"`
// nodeSelector defines the nodes on which the Pods are scheduled
// nodeSelector is optional.
//
// When omitted, this means the user has no opinion and the platform is left
// to choose reasonable defaults. These defaults are subject to change over time.
// The current default value is `kubernetes.io/os: linux`.
// +optional
// +kubebuilder:validation:MinProperties=1
// +kubebuilder:validation:MaxProperties=10
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
// tolerations defines tolerations for the pods.
// tolerations is optional.
//
// When omitted, this means the user has no opinion and the platform is left
// to choose reasonable defaults. These defaults are subject to change over time.
// Defaults are empty/unset.
// Maximum length for this list is 10
// Minimum length for this list is 1
// +kubebuilder:validation:MaxItems=10
// +kubebuilder:validation:MinItems=1
// +listType=atomic
// +optional
Tolerations []v1.Toleration `json:"tolerations,omitempty"`
// verbosity defines the verbosity of log messages for Metrics Server.
// Valid values are positive integers, values over 10 are usually unnecessary.
// When omitted, this means no opinion and the platform is left to choose a reasonable default, that is subject to change over time.
// The current default value is `0`.
// default means minimal logging
// +optional
// +kubebuilder:validation:Minimum=0
// +kubebuilder:validation:Maximum=255
// +kubebuilder:default=0
Verbosity *int32 `json:"verbosity,omitempty"`
// resources defines the compute resource requests and limits for the Metrics Server container.
// This includes CPU, memory and HugePages constraints to help control scheduling and resource usage.
// When not specified, defaults are used by the platform. Requests cannot exceed limits.
// This field is optional.
// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
// This is a simplified API that maps to Kubernetes ResourceRequirements.
// The current default values are:
// resources:
// - name: cpu
// request: 4m
// limit: null
// - name: memory
// request: 40Mi
// limit: null
// Maximum length for this list is 10.
// Minimum length for this list is 1.
// +optional
// +listType=map
// +listMapKey=name
// +kubebuilder:validation:MaxItems=10
// +kubebuilder:validation:MinItems=1
Resources []ContainerResource `json:"resources,omitempty"`
// topologySpreadConstraints defines rules for how Metrics Server Pods should be distributed
// across topology domains such as zones, nodes, or other user-defined labels.
// topologySpreadConstraints is optional.
// This helps improve high availability and resource efficiency by avoiding placing
// too many replicas in the same failure domain.
//
// When omitted, this means no opinion and the platform is left to choose a default, which is subject to change over time.
// This field maps directly to the `topologySpreadConstraints` field in the Pod spec.
// Default is empty list.
// Maximum length for this list is 10.
// Minimum length for this list is 1
// Entries must have unique topologyKey and whenUnsatisfiable pairs.
// +kubebuilder:validation:MaxItems=10
// +kubebuilder:validation:MinItems=1
// +listType=map
// +listMapKey=topologyKey
// +listMapKey=whenUnsatisfiable
// +optional
TopologySpreadConstraints []v1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"`
}

// AuditLevel defines the audit log level for the Metrics Server.
// +kubebuilder:validation:Enum=None;Metadata;Request;RequestResponse
type AuditLevel string

const (
// AuditLevelNone disables audit logging
AuditLevelNone AuditLevel = "None"
// AuditLevelMetadata logs request metadata (requesting user, timestamp, resource, verb, etc.) but not request or response body
AuditLevelMetadata AuditLevel = "Metadata"
// AuditLevelRequest logs event metadata and request body but not response body
AuditLevelRequest AuditLevel = "Request"
// AuditLevelRequestResponse logs event metadata, request and response bodies
AuditLevelRequestResponse AuditLevel = "RequestResponse"
)

// Audit profile configurations
type Audit struct {
// profile sets the audit log level for the Metrics Server. This currently matches the various
// audit log levels such as: "None, Metadata, Request, RequestResponse".
// The default audit log level is "Metadata"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no default at this level if the field is required.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

correct

//
// see: https://kubernetes.io/docs/tasks/debug-application-cluster/audit/#audit-policy
// for more information about auditing and log levels.
// +required
Profile AuditLevel `json:"profile"`
}
Loading