Skip to content

Commit 2e549ff

Browse files
authored
Make sure all nodepools are in healthy status after rolling out before the health check (#71986)
1 parent 85fdc44 commit 2e549ff

File tree

1 file changed

+27
-1
lines changed

1 file changed

+27
-1
lines changed

ci-operator/step-registry/cucushift/hypershift-extended/enable-qe/pull-secret/cucushift-hypershift-extended-enable-qe-pull-secret-commands.sh

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,27 @@ set -e
44
set -u
55
set -o pipefail
66

7+
# retry_until_success <retries> <sleep_time> <function_name> [args...]
8+
# - retries : max number of attempts
9+
# - sleep_time : seconds between attempts
10+
# - func : the function to be called or a sub shell call
11+
function retry_until_success() {
12+
local retries="$1"
13+
local sleep_time="$2"
14+
shift 2 # drop retries and sleep_time
15+
for i in $(seq 1 "$retries"); do
16+
echo "Attempt $i/$retries: running $*"
17+
if "$@"; then
18+
echo "Success on attempt $i"
19+
return 0
20+
fi
21+
echo "Failed attempt $i, retrying in $sleep_time seconds..."
22+
sleep "$sleep_time"
23+
done
24+
echo "$* did not succeed after $retries attempts"
25+
return 1
26+
}
27+
728
function check_node() {
829
local node_number ready_number
930
node_number=$(oc get node --no-headers | grep -cv STATUS)
@@ -41,7 +62,7 @@ EOT
4162
oc wait clusterversion/version --for='condition=Available=True' --timeout=15m
4263

4364
echo "Step #2: Make sure every machine is in 'Ready' status"
44-
check_node
65+
retry_until_success 60 10 check_node
4566

4667
echo "Step #3: Check all pods are in status running or complete"
4768
check_pod
@@ -114,6 +135,11 @@ rm /tmp/global-pull-secret.json
114135
echo "{\"spec\":{\"pullSecret\":{\"name\":\"$CLUSTER_NAME-pull-secret-new\"}}}" > /tmp/patch.json
115136
oc patch hostedclusters -n "$HYPERSHIFT_NAMESPACE" "$CLUSTER_NAME" --type=merge -p="$(cat /tmp/patch.json)"
116137

138+
echo "Make sure all nodepools are in healthy status after rolling out"
139+
oc wait nodepool -n "$HYPERSHIFT_NAMESPACE" --all --for='condition=UpdatingConfig=True' --timeout=15m
140+
oc wait nodepool -n "$HYPERSHIFT_NAMESPACE" --all --for='condition=UpdatingConfig=False' --timeout=30m
141+
oc wait nodepool -n "$HYPERSHIFT_NAMESPACE" --all --for='condition=AllNodesHealthy=True' --timeout=30m
142+
117143
echo "check day-2 pull-secret update"
118144
export KUBECONFIG="${SHARED_DIR}/nested_kubeconfig"
119145
RETRIES=45

0 commit comments

Comments
 (0)