From a5656b43d788986b3cf46f8124a845d5bad30ef3 Mon Sep 17 00:00:00 2001 From: Karthik Satchitanand Date: Fri, 15 Nov 2019 18:21:23 +0530 Subject: [PATCH] (refactor)kafka: add updated liveness checks (#945) Signed-off-by: ksatchit --- .../kafka-broker-disk-failure-ansible-logic.yml | 1 + .../kafka-broker-disk-failure-k8s-job.yml | 4 ++++ .../kafka-broker-pod-failure-ansible-logic.yml | 1 + .../kafka-broker-pod-failure-k8s-job.yml | 4 ++++ utils/apps/kafka/kafka_liveness.j2 | 7 +++++-- utils/common/status_app_pod.yml | 10 +++++----- 6 files changed, 20 insertions(+), 7 deletions(-) diff --git a/experiments/kafka/kafka-broker-disk-failure/kafka-broker-disk-failure-ansible-logic.yml b/experiments/kafka/kafka-broker-disk-failure/kafka-broker-disk-failure-ansible-logic.yml index 41456529ca7..ef9fd7c24bb 100644 --- a/experiments/kafka/kafka-broker-disk-failure/kafka-broker-disk-failure-ansible-logic.yml +++ b/experiments/kafka/kafka-broker-disk-failure/kafka-broker-disk-failure-ansible-logic.yml @@ -15,6 +15,7 @@ kafka_instance: "{{ lookup('env','KAFKA_INSTANCE_NAME') }}" kafka_broker: "{{ lookup('env','KAFKA_BROKER') }}" kafka_stream: "{{ lookup('env','KAFKA_LIVENESS_STREAM') }}" + kafka_consumer_timeout: "{{ lookup('env','KAFKA_CONSUMER_TIMEOUT') }}" kafka_service: "{{ lookup('env','KAFKA_SERVICE') }}" kafka_port: "{{ lookup('env','KAFKA_PORT') }}" kafka_replication_factor: "{{ lookup('env','KAFKA_REPLICATION_FACTOR') }}" diff --git a/experiments/kafka/kafka-broker-disk-failure/kafka-broker-disk-failure-k8s-job.yml b/experiments/kafka/kafka-broker-disk-failure/kafka-broker-disk-failure-k8s-job.yml index 657f6c36977..954170241e1 100644 --- a/experiments/kafka/kafka-broker-disk-failure/kafka-broker-disk-failure-k8s-job.yml +++ b/experiments/kafka/kafka-broker-disk-failure/kafka-broker-disk-failure-k8s-job.yml @@ -28,6 +28,10 @@ spec: - name: KAFKA_LIVENESS_STREAM value: 'enabled' + # in milliseconds + - name: KAFKA_CONSUMER_TIMEOUT + value: '30000' + - name: TOTAL_CHAOS_DURATION value: '30' diff --git a/experiments/kafka/kafka-broker-pod-failure/kafka-broker-pod-failure-ansible-logic.yml b/experiments/kafka/kafka-broker-pod-failure/kafka-broker-pod-failure-ansible-logic.yml index f33b4b4bf7f..05c669dcb19 100644 --- a/experiments/kafka/kafka-broker-pod-failure/kafka-broker-pod-failure-ansible-logic.yml +++ b/experiments/kafka/kafka-broker-pod-failure/kafka-broker-pod-failure-ansible-logic.yml @@ -14,6 +14,7 @@ kafka_kind: "{{ lookup('env','KAFKA_KIND') }}" kafka_broker: "{{ lookup('env','KAFKA_BROKER') }}" kafka_stream: "{{ lookup('env','KAFKA_LIVENESS_STREAM') }}" + kafka_consumer_timeout: "{{ lookup('env','KAFKA_CONSUMER_TIMEOUT') }}" kafka_service: "{{ lookup('env','KAFKA_SERVICE') }}" kafka_port: "{{ lookup('env','KAFKA_PORT') }}" kafka_replication_factor: "{{ lookup('env','KAFKA_REPLICATION_FACTOR') }}" diff --git a/experiments/kafka/kafka-broker-pod-failure/kafka-broker-pod-failure-k8s-job.yml b/experiments/kafka/kafka-broker-pod-failure/kafka-broker-pod-failure-k8s-job.yml index d739d78d57f..387e8d62914 100644 --- a/experiments/kafka/kafka-broker-pod-failure/kafka-broker-pod-failure-k8s-job.yml +++ b/experiments/kafka/kafka-broker-pod-failure/kafka-broker-pod-failure-k8s-job.yml @@ -27,6 +27,10 @@ spec: - name: KAFKA_LIVENESS_STREAM value: 'enabled' + # in milliseconds + - name: KAFKA_CONSUMER_TIMEOUT + value: '30000' + - name: TOTAL_CHAOS_DURATION value: '15' diff --git a/utils/apps/kafka/kafka_liveness.j2 b/utils/apps/kafka/kafka_liveness.j2 index 18261e0462c..eb2c1398383 100644 --- a/utils/apps/kafka/kafka_liveness.j2 +++ b/utils/apps/kafka/kafka_liveness.j2 @@ -6,6 +6,7 @@ metadata: labels: name: kafka-liveness spec: + restartPolicy: Never initContainers: - name: kafka-topic-creator image: litmuschaos/kafka-client:ci @@ -39,11 +40,13 @@ spec: command: - sh - -c - - "./producer.sh" + - "stdbuf -oL ./producer.sh" - name: kafka-consumer image: litmuschaos/kafka-client:ci imagePullPolicy: Always env: + - name: KAFKA_CONSUMER_TIMEOUT + value: "{{ kafka_consumer_timeout }}" - name: TOPIC_NAME value: {{ kafka_topic }} - name: KAFKA_SERVICE @@ -53,4 +56,4 @@ spec: command: - sh - -c - - "./consumer.sh" + - "stdbuf -oL ./consumer.sh" diff --git a/utils/common/status_app_pod.yml b/utils/common/status_app_pod.yml index 5a9183a6a85..541497fb00a 100644 --- a/utils/common/status_app_pod.yml +++ b/utils/common/status_app_pod.yml @@ -8,14 +8,14 @@ - name: Get the container status of application. shell: > - kubectl get pod -n {{ a_ns }} -l {{ a_label }} - -o custom-columns=:..containerStatuses[].state --no-headers | grep -w "running" + kubectl get pod -n {{ a_ns }} -l {{ a_label }} --no-headers + -o jsonpath='{.items[*].status.containerStatuses[*].ready}' | tr ' ' '\n' | uniq args: executable: /bin/bash register: containerStatus - until: "'running' in containerStatus.stdout" - delay: 2 - retries: 150 + until: "containerStatus.stdout == 'true'" + delay: '{{ delay }}' + retries: '{{ retries }}'