|
4 | 4 | set -u |
5 | 5 | set -o pipefail |
6 | 6 |
|
| 7 | +# retry_until_success <retries> <sleep_time> <function_name> [args...] |
| 8 | +# - retries : max number of attempts |
| 9 | +# - sleep_time : seconds between attempts |
| 10 | +# - func : the function to be called or a sub shell call |
| 11 | +function retry_until_success() { |
| 12 | + local retries="$1" |
| 13 | + local sleep_time="$2" |
| 14 | + shift 2 # drop retries and sleep_time |
| 15 | + for i in $(seq 1 "$retries"); do |
| 16 | + echo "Attempt $i/$retries: running $*" |
| 17 | + if "$@"; then |
| 18 | + echo "Success on attempt $i" |
| 19 | + return 0 |
| 20 | + fi |
| 21 | + echo "Failed attempt $i, retrying in $sleep_time seconds..." |
| 22 | + sleep "$sleep_time" |
| 23 | + done |
| 24 | + echo "$* did not succeed after $retries attempts" |
| 25 | + return 1 |
| 26 | +} |
| 27 | + |
7 | 28 | function check_node() { |
8 | 29 | local node_number ready_number |
9 | 30 | node_number=$(oc get node --no-headers | grep -cv STATUS) |
|
41 | 62 | oc wait clusterversion/version --for='condition=Available=True' --timeout=15m |
42 | 63 |
|
43 | 64 | echo "Step #2: Make sure every machine is in 'Ready' status" |
44 | | - check_node |
| 65 | + retry_until_success 60 10 check_node |
45 | 66 |
|
46 | 67 | echo "Step #3: Check all pods are in status running or complete" |
47 | 68 | check_pod |
@@ -114,6 +135,11 @@ rm /tmp/global-pull-secret.json |
114 | 135 | echo "{\"spec\":{\"pullSecret\":{\"name\":\"$CLUSTER_NAME-pull-secret-new\"}}}" > /tmp/patch.json |
115 | 136 | oc patch hostedclusters -n "$HYPERSHIFT_NAMESPACE" "$CLUSTER_NAME" --type=merge -p="$(cat /tmp/patch.json)" |
116 | 137 |
|
| 138 | +echo "Make sure all nodepools are in healthy status after rolling out" |
| 139 | +oc wait nodepool -n "$HYPERSHIFT_NAMESPACE" --all --for='condition=UpdatingConfig=True' --timeout=15m |
| 140 | +oc wait nodepool -n "$HYPERSHIFT_NAMESPACE" --all --for='condition=UpdatingConfig=False' --timeout=30m |
| 141 | +oc wait nodepool -n "$HYPERSHIFT_NAMESPACE" --all --for='condition=AllNodesHealthy=True' --timeout=30m |
| 142 | + |
117 | 143 | echo "check day-2 pull-secret update" |
118 | 144 | export KUBECONFIG="${SHARED_DIR}/nested_kubeconfig" |
119 | 145 | RETRIES=45 |
|
0 commit comments