From df7e0a1a0d38baca34a39c587f0d751fa270420a Mon Sep 17 00:00:00 2001 From: Stuart Grace Date: Mon, 23 Dec 2024 19:33:40 +0000 Subject: [PATCH] Improve status_reason when cluster creation is stalled If during cluster creation or update, one or more nodes cannot be created by Nova (because, for example, the project runs out of quota, or Nova runs out of suitable hypervisors), the cluster stalls with CREATE_IN_PROGRESS status. The status_reason in this case is not helpful as it only reports the previous step which succeeded. This patch adds a check of each individual machine status looking for machines which are not ready and reports any reason found. This can result in useful status_reason messages such as 'error creating Openstack instance ... Quota exceeded for instances' --- magnum_cluster_api/driver.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/magnum_cluster_api/driver.py b/magnum_cluster_api/driver.py index a8f2e082..291242a6 100644 --- a/magnum_cluster_api/driver.py +++ b/magnum_cluster_api/driver.py @@ -192,6 +192,25 @@ def update_cluster_status( cluster.save() return + # Check reason if an individual machine is not ready + machines = objects.Machine.objects(self.k8s_api).filter( + namespace="magnum-system", + selector={ + "cluster.x-k8s.io/cluster-name": cluster.stack_id, + }, + ) + + for machine in machines: + for cond in machine.obj["status"]["conditions"]: + if ( + cond.get("type") == "InfrastructureReady" + and cond.get("status") == "False" + ): + messagetext = cond.get("message") + if messagetext: + cluster.status_reason = messagetext + cluster.save() + api_endpoint = capi_cluster.obj["spec"]["controlPlaneEndpoint"] cluster.api_address = ( f"https://{api_endpoint['host']}:{api_endpoint['port']}"