diff --git a/cassandra/Docker/Dockerfile b/cassandra/Docker/Dockerfile new file mode 100644 index 0000000..a733511 --- /dev/null +++ b/cassandra/Docker/Dockerfile @@ -0,0 +1,20 @@ +FROM ubuntu:bionic + +# Install curl, wget, gnupg2 +#RUN apt-get update && apt-get --assume-yes install curl gnupg2 wget +RUN apt-get update && apt-get --assume-yes install curl gnupg2 wget + +# Add Cassandra repo 3.11 +#RUN echo "deb http://www.apache.org/dist/cassandra/debian 311x main" | tee -a /etc/apt/sources.list.d/cassandra.sources.list +#RUN apt-key adv --keyserver pool.sks-keyservers.net --recv-key A278B781FE4B2BDA +#RUN wget https://www.apache.org/dist/cassandra/KEYS && apt-key add KEYS + +# Add Cassandra Repo 4.x +RUN echo "deb http://downloads.apache.org/cassandra/debian 311x main" | tee -a /etc/apt/sources.list.d/cassandra.sources.list +RUN curl https://downloads.apache.org/cassandra/KEYS | apt-key add - + +# Install Cassandra package +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get --assume-yes install cassandra cassandra-tools + +COPY entrypoint.sh /usr/local/bin/ +ENTRYPOINT ["entrypoint.sh"] diff --git a/cassandra/Docker/entrypoint.sh b/cassandra/Docker/entrypoint.sh new file mode 100755 index 0000000..e5f823a --- /dev/null +++ b/cassandra/Docker/entrypoint.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -e + +#cassandra-stress write n=1000000 -rate threads=100 -node cassandra & +#wait $! + +#cassandra-stress mixed n=100000 -rate threads=100 -node cassandra & +#wait + +#cqlsh --request-timeout=60 -e "DROP KEYSPACE keyspace1;" cassandra || true +#wait + +>&1 +cqlsh --request-timeout=60 -e "DESCRIBE KEYSPACE keyspace1;" cassandra || true + +cqlsh --request-timeout=60 -e "DROP KEYSPACE keyspace1;" cassandra || true + +cassandra-stress write n=100000 -rate threads=10 -node cassandra + +cassandra-stress mixed n=100000 -rate threads=10 -node cassandra + +#if [ $? -eq 0 ] +#then +# echo "Successfully completed ctress test" +#else +# echo "Could not finish stress test successfully" >&2 +#fi + +#cqlsh --request-timeout=60 -e "DROP KEYSPACE keyspace1;" cassandra || true + +#if [ $? -eq 0 ] +#then +# echo "Successfully dropped keyspace keyspace1" +#else +# echo "Could not drop keyspace keyspace1" >&2 +#fi + +#cqlsh --request-timeout=60 -e "DROP KEYSPACE system;" cassandra || true + +#if [ $? -eq 0 ] +#then +# echo "Successfully dropped system keyspace" +#else +# echo "Could not drop system keyspace" >&2 +#fi + +## NOTES - rm -rf /cassandra_data/data/system/peers*/* +## DO AS INIT CONTAINER ON STS +## $env:JVM_OPTS="$JVM_OPTS -Dcassandra.load_ring_state=false" \ No newline at end of file diff --git a/cassandra/README.md b/cassandra/README.md new file mode 100644 index 0000000..d628e54 --- /dev/null +++ b/cassandra/README.md @@ -0,0 +1,170 @@ +# Cassandra StormForge Example +Optimizing Cassandra for Cost and Performance using cassandra-stress + +## Overview +As companies start using containerized versions of Cassandra, it can be challenging to tune the environment Cassandra is operating in +for things like HEAP size, CPU, Memory, etc. Due to this challenge, companies are scaling their infrastucture out to keep up with +the demand of Cassandras increasing resource demand in order to remain stable. In this example we show how to use cassandra-stress, the +apache cassandra load testing utility to run all three stress tests consecutively - Write, Read, Mixed. + +The official documentation for cassandra-stress can be found [here](https://cassandra.apache.org/doc/latest/tools/cassandra_stress.html) + + +### Technical Process +In order to get the cassandra-stress to run all three load tests under one experiment trial, we needed to create a container for that task. +You can find the Dockerfile and related artifacts [here](https://www.github.com/thecrudge/cstress) or in the Docker folder. Essentially its +an image that runs an entrypoint with a very basic script to run all three load tests consecutively. You can customize your load test parameters +here in the entrypoint.sh file. + +In the experiment spec, you can see the parameters we are using for our experiment, and the experiment budget (or how many trials we want to run) - + +``` +spec: + optimization: + - name: "experimentBudget" + value: "120" #number of trials + parameters: + - name: memory + min: 500 + max: 12000 + - name: cpu + min: 500 + max: 3000 + - name: MAX_HEAP_SIZE + min: 1000 + max: 8000 +``` +It is important to remember here to leave some headroom for the max config so not to run into OOM or resource issues during the trial. Here +I am running Cassandra in AWS on ec2, t2.xlarge nodes. + +Because we never want our HEAP size to be greater than our memory setting, we can configure this in our experiment file by declaring constraints +like so - + +``` + constraints: + - order: + lowerParameter: MAX_HEAP_SIZE + upperParameter: memory +``` + +You can also see that we did the same thing here, but defined them in a different way so that MAX_HEAP_SIZE remains 1500M below memory. + +You can find documentation on constraints [here](https://docs.stormforge.io/experiment/parameters/#parameter-constraints) + +``` + constraints: + - name: heap_memory + isUpperBound: true + bound: "-1500" + constraintType: sum + parameters: + - parameterName: memory + weight: "-1.0" + - parameterName: MAX_HEAP_SIZE + weight: "1.0" +``` + +Next, we need to define our metrics or objectives we are optimizing for - + +``` + metrics: + - name: duration + minimize: true + query: "{{duration .StartTime .CompletionTime}}" + - name: cost + minimize: true + query: "{{div (add (mul .Values.cpu 22) (mul .Values.memory 3)) 1000}}" +``` + +In this example, duration is equal to the amount of time it takes for the cassandra-stress job to complete, and the cost is measured by the +amount of CPU and Memory we are consuming in that trial. + +Finally, we define our patches and our trial template + +``` + patch: | + spec: + template: + spec: + containers: + - name: cassandra + resources: + limits: + cpu: "{{ .Values.cpu }}m" + memory: "{{ .Values.memory }}Mi" + requests: + cpu: "{{ .Values.cpu }}m" + memory: "{{ .Values.memory }}Mi" + env: + - name: MAX_HEAP_SIZE + value: "{{ .Values.MAX_HEAP_SIZE }}M" + + template: # trial + spec: + initialDelaySeconds: 15 + template: # job + spec: + template: # pod + spec: + containers: + - image: thecrudge/cstress:latest + name: cassandra-stress +``` + +You can see here how we are patching the cassandra containers for limits and env variables for HEAP sizing. You can also see here that we are +using the custom cassandra-stress image we discussed at the beginning of this file. We can validate our trial patch, by descibing a cassandra pod +and verifying the trial settings by describing the trial - + +``` +kubectl describe pod cassandra-0 +Name: cassandra-0 +... +Containers: + cassandra: + Container ID: docker://835392cb704e7a01c8011c4d69f7b014159a2b3847809f9074689b905f44596e + Image: gcr.io/google-samples/cassandra:v13 + Image ID: docker-pullable://gcr.io/google-samples/cassandra@sha256:7a3d20afa0a46ed073a5c587b4f37e21fa860e83c60b9c42fec1e1e739d64007 + Ports: 7000/TCP, 7001/TCP, 7199/TCP, 9042/TCP + Host Ports: 0/TCP, 0/TCP, 0/TCP, 0/TCP + State: Running + Started: Wed, 02 Jun 2021 10:54:42 -0500 + Ready: True + Restart Count: 0 + Limits: + cpu: 618m + memory: 5049Mi + Requests: + cpu: 618m + memory: 5049Mi + Readiness: exec [/bin/bash -c /ready-probe.sh] delay=15s timeout=5s period=10s #success=1 #failure=3 + Environment: + MAX_HEAP_SIZE: 1413M + HEAP_NEW_SIZE: 7514M + CASSANDRA_SEEDS: cassandra-0.cassandra.default.svc.cluster.local + CASSANDRA_CLUSTER_NAME: K8Demo + CASSANDRA_DC: DC1-K8Demo + CASSANDRA_RACK: Rack1-K8Demo + POD_IP: (v1:status.podIP) +... +``` +``` +kubectl get trials -w + +NAME STATUS ASSIGNMENTS VALUES +cassandra-write-read-mixed-example-000 Completed MAX_HEAP_SIZE=5186, cpu=2309, memory=6622 duration=3411, cost=70 +cassandra-write-read-mixed-example-001 Running MAX_HEAP_SIZE=1413, cpu=618, memory=5049 +``` + +## Results +The image below shows us that the machine learning has recommended trial number #98. With this trial we can see we have a cost savings of 34.29% +compared to our baseline in Trial #1. + + + +In this image, we can see all of our trials, with the recommended trial highlighted. + + + +And finally, we can get the parameter settings or export the config itself + + \ No newline at end of file diff --git a/cassandra/cassandra-full-experiment.yaml b/cassandra/cassandra-full-experiment.yaml new file mode 100644 index 0000000..57ac128 --- /dev/null +++ b/cassandra/cassandra-full-experiment.yaml @@ -0,0 +1,74 @@ +apiVersion: redskyops.dev/v1beta1 +kind: Experiment +metadata: + name: cassandra-rwx +spec: + optimization: + - name: "experimentBudget" + value: "120" #number of trials + parameters: + - name: memory + min: 500 + max: 12000 + - name: cpu + min: 500 + max: 3000 + - name: MAX_HEAP_SIZE + min: 1000 + max: 8000 +# - name: HEAP_NEWSIZE +# min: 1000 +# max: 8000 + constraints: + - name: heap_memory + sum: + bound: "-1500" + isUpperBound: false + parameters: + - name: memory + weight: "-1.0" + - name: MAX_HEAP_SIZE + weight: "1.0" +# - order: +# lowerParameter: MAX_HEAP_SIZE +# upperParameter: memory + metrics: + - name: duration + minimize: true + query: "{{duration .StartTime .CompletionTime}}" + - name: cost + minimize: true + query: "{{div (add (mul .Values.cpu 22) (mul .Values.memory 3)) 1000}}" + patches: + - targetRef: + kind: StatefulSet + apiVersion: apps/v1 + name: cassandra + patch: | + spec: + template: + spec: + containers: + - name: cassandra + resources: + limits: + cpu: "{{ .Values.cpu }}m" + memory: "{{ .Values.memory }}Mi" + requests: + cpu: "{{ .Values.cpu }}m" + memory: "{{ .Values.memory }}Mi" + env: + - name: MAX_HEAP_SIZE + value: "{{ .Values.MAX_HEAP_SIZE }}M" +# - name: HEAP_NEW_SIZE +# value: "{{ .Values.HEAP_NEWSIZE }}M" + template: # trial + spec: + initialDelaySeconds: 15 + template: # job + spec: + template: # pod + spec: + containers: + - image: thecrudge/cstress:latest + name: cassandra-stress diff --git a/cassandra/cassandra-sts.yaml b/cassandra/cassandra-sts.yaml new file mode 100644 index 0000000..691a4aa --- /dev/null +++ b/cassandra/cassandra-sts.yaml @@ -0,0 +1,108 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app: cassandra + name: cassandra +spec: + clusterIP: None + ports: + - port: 9042 + selector: + app: cassandra + +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: cassandra + labels: + app: cassandra +spec: + serviceName: cassandra + replicas: 3 + selector: + matchLabels: + app: cassandra + template: + metadata: + labels: + app: cassandra + spec: + terminationGracePeriodSeconds: 1800 + containers: + - name: cassandra + image: gcr.io/google-samples/cassandra:v14 + imagePullPolicy: Always + ports: + - containerPort: 7000 + name: intra-node + - containerPort: 7001 + name: tls-intra-node + - containerPort: 7199 + name: jmx + - containerPort: 9042 + name: cql + resources: + limits: + cpu: "500m" + memory: 1Gi + requests: + cpu: "500m" + memory: 1Gi + securityContext: + capabilities: + add: + - IPC_LOCK + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - nodetool drain + env: +# - name: JVM_OPTS +# value: "$JVM_OPTS -Dcassandra.load_ring_state=false" + - name: MAX_HEAP_SIZE + value: 512M + - name: HEAP_NEWSIZE + value: 100M + - name: CASSANDRA_SEEDS + value: "cassandra-0.cassandra.default.svc.cluster.local" + - name: CASSANDRA_CLUSTER_NAME + value: "K8Demo" + - name: CASSANDRA_DC + value: "DC1-K8Demo" + - name: CASSANDRA_RACK + value: "Rack1-K8Demo" + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + readinessProbe: + exec: + command: + - /bin/bash + - -c + - /ready-probe.sh + initialDelaySeconds: 15 + timeoutSeconds: 5 + # These volume mounts are persistent. They are like inline claims, + # but not exactly because the names need to match exactly one of + # the stateful pod volumes. + volumeMounts: + - name: cassandra-data + mountPath: /cassandra_data + # These are converted to volume claims by the controller + # and mounted at the paths mentioned above. + # do not use these in production until ssd GCEPersistentDisk or other ssd pd + volumeClaimTemplates: + - metadata: + name: cassandra-data + spec: + accessModes: [ "ReadWriteOnce" ] + storageClassName: gp2 + resources: + requests: + storage: 80Gi \ No newline at end of file diff --git a/cassandra/img/results1.png b/cassandra/img/results1.png new file mode 100644 index 0000000..40382c9 Binary files /dev/null and b/cassandra/img/results1.png differ diff --git a/cassandra/img/results2.png b/cassandra/img/results2.png new file mode 100644 index 0000000..632bb41 Binary files /dev/null and b/cassandra/img/results2.png differ diff --git a/cassandra/img/results3.png b/cassandra/img/results3.png new file mode 100644 index 0000000..2b92836 Binary files /dev/null and b/cassandra/img/results3.png differ diff --git a/hpa/locust/kustomization.yaml b/hpa/locust/kustomization.yaml index 3bce7f5..a97ae3a 100644 --- a/hpa/locust/kustomization.yaml +++ b/hpa/locust/kustomization.yaml @@ -4,7 +4,7 @@ kind: Kustomization namespace: recipe resources: - - github.com/thestormforge/examples/voting-webapp/application + - ../../webserver/voting-webapp/application - hpa.yaml - experiment.yaml diff --git a/hpa/sf-perftest/application/kustomization.yaml b/hpa/sf-perftest/application/kustomization.yaml index 32efec9..db803e6 100644 --- a/hpa/sf-perftest/application/kustomization.yaml +++ b/hpa/sf-perftest/application/kustomization.yaml @@ -2,7 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: -- github.com/thestormforge/examples/voting-webapp/application +- ../../../webserver/voting-webapp/application - ingress.yaml - hpa.yaml diff --git a/webserver/vhs-sf-perftest/application/kustomization.yaml b/webserver/vhs-sf-perftest/application/kustomization.yaml index 2ac0a94..58b01d9 100644 --- a/webserver/vhs-sf-perftest/application/kustomization.yaml +++ b/webserver/vhs-sf-perftest/application/kustomization.yaml @@ -2,7 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: -- github.com/thestormforge/examples/voting-webapp/application +- ../../voting-webapp/application - ingress-alb.yaml patchesStrategicMerge: