Skip to content

Copy density test into high-density-config #1026

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 6, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
251 changes: 251 additions & 0 deletions clusterloader2/testing/density/high-density-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
# TODO(https://github.com/kubernetes/perf-tests/issues/1007): Make it possible to run high density as part of the load test.
# ASSUMPTIONS:
# - Underlying cluster should have 100+ nodes.
# - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100).

#Constants
{{$DENSITY_RESOURCE_CONSTRAINTS_FILE := DefaultParam .DENSITY_RESOURCE_CONSTRAINTS_FILE ""}}
{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}}
{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 100}}
{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}}
{{$DENSITY_TEST_THROUGHPUT := DefaultParam .DENSITY_TEST_THROUGHPUT 20}}
{{$SCHEDULER_THROUGHPUT_THRESHOLD := DefaultParam .CL2_SCHEDULER_THROUGHPUT_THRESHOLD 0}}
# LATENCY_POD_MEMORY and LATENCY_POD_CPU are calculated for 1-core 4GB node.
# Increasing allocation of both memory and cpu by 10%
# decreases the value of priority function in scheduler by one point.
# This results in decreased probability of choosing the same node again.
{{$LATENCY_POD_CPU := DefaultParam .LATENCY_POD_CPU 100}}
{{$LATENCY_POD_MEMORY := DefaultParam .LATENCY_POD_MEMORY 350}}
{{$MIN_LATENCY_PODS := 500}}
{{$MIN_SATURATION_PODS_TIMEOUT := 180}}
{{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}}
{{$ENABLE_PROMETHEUS_API_RESPONSIVENESS := DefaultParam .ENABLE_PROMETHEUS_API_RESPONSIVENESS false}}
{{$ENABLE_SYSTEM_POD_METRICS:= DefaultParam .ENABLE_SYSTEM_POD_METRICS true}}
{{$USE_SIMPLE_LATENCY_QUERY := DefaultParam .USE_SIMPLE_LATENCY_QUERY false}}
{{$ENABLE_RESTART_COUNT_CHECK := DefaultParam .ENABLE_RESTART_COUNT_CHECK false}}
{{$RESTART_COUNT_THRESHOLD_OVERRIDES:= DefaultParam .RESTART_COUNT_THRESHOLD_OVERRIDES ""}}
#Variables
{{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}}
{{$podsPerNamespace := MultiplyInt $PODS_PER_NODE $NODES_PER_NAMESPACE}}
{{$totalPods := MultiplyInt $podsPerNamespace $namespaces}}
{{$latencyReplicas := DivideInt (MaxInt $MIN_LATENCY_PODS .Nodes) $namespaces}}
{{$totalLatencyPods := MultiplyInt $namespaces $latencyReplicas}}
{{$saturationDeploymentTimeout := DivideFloat $totalPods $DENSITY_TEST_THROUGHPUT | AddInt $MIN_SATURATION_PODS_TIMEOUT}}
# saturationDeploymentHardTimeout must be at least 20m to make sure that ~10m node
# failure won't fail the test. See https://github.com/kubernetes/kubernetes/issues/73461#issuecomment-467338711
{{$saturationDeploymentHardTimeout := MaxInt $saturationDeploymentTimeout 1200}}

name: density
automanagedNamespaces: {{$namespaces}}
tuningSets:
- name: Uniform5qps
qpsLoad:
qps: 5
{{if $ENABLE_CHAOSMONKEY}}
chaosMonkey:
nodeFailure:
failureRate: 0.01
interval: 1m
jitterFactor: 10.0
simulatedDowntime: 10m
{{end}}
steps:
- name: Starting measurements
measurements:
- Identifier: APIResponsivenessPrometheus
Method: APIResponsivenessPrometheus
Params:
action: start
- Identifier: APIResponsivenessPrometheusSimple
Method: APIResponsivenessPrometheus
Params:
action: start
# TODO(oxddr): figure out how many probers to run in function of cluster
- Identifier: InClusterNetworkLatency
Method: InClusterNetworkLatency
Params:
action: start
replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}}
- Identifier: DnsLookupLatency
Method: DnsLookupLatency
Params:
action: start
replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}}
- Identifier: TestMetrics
Method: TestMetrics
Params:
action: start
nodeMode: {{$NODE_MODE}}
resourceConstraints: {{$DENSITY_RESOURCE_CONSTRAINTS_FILE}}
systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
restartCountThresholdOverrides: {{YamlQuote $RESTART_COUNT_THRESHOLD_OVERRIDES 4}}
enableRestartCountCheck: {{$ENABLE_RESTART_COUNT_CHECK}}

- name: Starting saturation pod measurements
measurements:
- Identifier: SaturationPodStartupLatency
Method: PodStartupLatency
Params:
action: start
labelSelector: group = saturation
threshold: {{$saturationDeploymentTimeout}}s
- Identifier: WaitForRunningSaturationDeployments
Method: WaitForControlledPodsRunning
Params:
action: start
apiVersion: apps/v1
kind: Deployment
labelSelector: group = saturation
operationTimeout: {{$saturationDeploymentHardTimeout}}s
- Identifier: SchedulingThroughput
Method: SchedulingThroughput
Params:
action: start
labelSelector: group = saturation

- name: Creating saturation pods
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 1
tuningSet: Uniform5qps
objectBundle:
- basename: saturation-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
Replicas: {{$podsPerNamespace}}
Group: saturation
CpuRequest: 1m
MemoryRequest: 10M

- name: Collecting saturation pod measurements
measurements:
- Identifier: WaitForRunningSaturationDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather
- measurements:
- Identifier: SaturationPodStartupLatency
Method: PodStartupLatency
Params:
action: gather
- measurements:
- Identifier: SchedulingThroughput
Method: SchedulingThroughput
Params:
action: gather
threshold: {{$SCHEDULER_THROUGHPUT_THRESHOLD}}

- name: Starting latency pod measurements
measurements:
- Identifier: PodStartupLatency
Method: PodStartupLatency
Params:
action: start
labelSelector: group = latency
- Identifier: WaitForRunningLatencyDeployments
Method: WaitForControlledPodsRunning
Params:
action: start
apiVersion: apps/v1
kind: Deployment
labelSelector: group = latency
operationTimeout: 15m

- name: Creating latency pods
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$latencyReplicas}}
tuningSet: Uniform5qps
objectBundle:
- basename: latency-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
Replicas: 1
Group: latency
CpuRequest: {{$LATENCY_POD_CPU}}m
MemoryRequest: {{$LATENCY_POD_MEMORY}}M

- name: Waiting for latency pods to be running
measurements:
- Identifier: WaitForRunningLatencyDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather

- name: Deleting latency pods
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: Uniform5qps
objectBundle:
- basename: latency-deployment
objectTemplatePath: deployment.yaml

- name: Waiting for latency pods to be deleted
measurements:
- Identifier: WaitForRunningLatencyDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather

- name: Collecting pod startup latency
measurements:
- Identifier: PodStartupLatency
Method: PodStartupLatency
Params:
action: gather

- name: Deleting saturation pods
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: Uniform5qps
objectBundle:
- basename: saturation-deployment
objectTemplatePath: deployment.yaml

- name: Waiting for saturation pods to be deleted
measurements:
- Identifier: WaitForRunningSaturationDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather

- name: Collecting measurements
measurements:
- Identifier: APIResponsivenessPrometheusSimple
Method: APIResponsivenessPrometheus
Params:
action: gather
enableViolations: true
useSimpleLatencyQuery: true
summaryName: APIResponsivenessPrometheus_simple
{{if not $USE_SIMPLE_LATENCY_QUERY}}
- Identifier: APIResponsivenessPrometheus
Method: APIResponsivenessPrometheus
Params:
action: gather
{{end}}
- Identifier: InClusterNetworkLatency
Method: InClusterNetworkLatency
Params:
action: gather
- Identifier: DnsLookupLatency
Method: DnsLookupLatency
Params:
action: gather
- Identifier: TestMetrics
Method: TestMetrics
Params:
action: gather
systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
restartCountThresholdOverrides: {{YamlQuote $RESTART_COUNT_THRESHOLD_OVERRIDES 4}}
enableRestartCountCheck: {{$ENABLE_RESTART_COUNT_CHECK}}