diff --git a/test/extended/machine_config/pinnedimages.go b/test/extended/machine_config/pinnedimages.go index 3be048b3eb73..865c44e283fe 100644 --- a/test/extended/machine_config/pinnedimages.go +++ b/test/extended/machine_config/pinnedimages.go @@ -24,7 +24,13 @@ import ( "sigs.k8s.io/yaml" ) -const emptyImagePin string = "localhost.localdomain/emptyimagepin" +var ( + emptyImagePin = "localhost.localdomain/emptyimagepin" + + // these represent the expected rendered config prefixes for worker and custom MCP nodes + workerConfigPrefix = "rendered-worker" + customConfigPrefix = "rendered-custom" +) // This test is [Serial] because it modifies the state of the images present on Node in each test. var _ = g.Describe("[Suite:openshift/machine-config-operator/disruptive][Suite:openshift/conformance/serial][sig-mco][OCPFeatureGate:PinnedImages][OCPFeatureGate:MachineConfigNodes][Serial]", func() { @@ -58,35 +64,41 @@ var _ = g.Describe("[Suite:openshift/machine-config-operator/disruptive][Suite:o }) g.It("All Nodes in a custom Pool should have the PinnedImages even after Garbage Collection [apigroup:machineconfiguration.openshift.io]", func() { - - // skip this test on single node platforms + // Skip this test on single node and two-node platforms since custom MCPs are not supported + // for clusters with only a master MCP skipOnSingleNodeTopology(oc) + skipOnTwoNodeTopology(oc) + // Get the MCP, PIS, and KubletConfig fixtures needed for this test pisFixture := customGCMCPpinnedImageSetFixture mcpFixture := customMCPFixture kcFixture := customGcKCFixture + // Get the PIS from the PIS fixture pis, err := getPISFromFixture(pisFixture) - o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting PIS from fixture `%v`: %v", pisFixture, err)) pisDiverged := false + // Create kube clients & MC clientset for test kubeClient, err := kubernetes.NewForConfig(oc.KubeFramework().ClientConfig()) - o.Expect(err).NotTo(o.HaveOccurred(), "Get KubeClient") - + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting kube client: %v", err)) clientSet, err := mcClient.NewForConfig(oc.KubeFramework().ClientConfig()) - o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting client set: %v", err)) // Create custom MCP defer deleteMCP(oc, "custom") err = oc.Run("apply").Args("-f", mcpFixture).Execute() - o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error creating MCP `custom`: %v", pisFixture)) - // Add node to pool + // Add node to custom MCP & wait for the node to be ready in the MCP optedNodes, err := addWorkerNodesToCustomPool(oc, kubeClient, 1, "custom") - o.Expect(err).NotTo(o.HaveOccurred(), "Label node") - defer waitTillNodeReadyWithConfig(kubeClient, optedNodes[0]) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error adding node to `custom` MCP: %v", err)) + defer waitTillNodeReadyWithConfig(kubeClient, optedNodes[0], workerConfigPrefix) defer unlabelNode(oc, optedNodes[0]) + framework.Logf("Waiting for `%v` node to be ready in `custom` MCP.", optedNodes[0]) + waitTillNodeReadyWithConfig(kubeClient, optedNodes[0], customConfigPrefix) + // Handle disconnected metal cluster environment isMetalDisconnected := false gcImage := alpineImage if IsMetal(oc) && IsDisconnected(oc, optedNodes[0]) { @@ -108,6 +120,7 @@ var _ = g.Describe("[Suite:openshift/machine-config-operator/disruptive][Suite:o waitTillImageDownload(oc, optedNodes[0], gcImage) } + // Get images defined in PIS var pinnedImages []string for _, img := range pis.Spec.PinnedImages { pinnedImages = append(pinnedImages, string(img.Name)) @@ -119,38 +132,45 @@ var _ = g.Describe("[Suite:openshift/machine-config-operator/disruptive][Suite:o err = applyPIS(oc, pisFixture, pis, pisDiverged) o.Expect(err).NotTo(o.HaveOccurred(), "Applied PIS") + // Test the images applied in the PIS exist on the node after garbage collection. GCPISTest(oc, kubeClient, clientSet, true, optedNodes[0], kcFixture, gcImage, pis.Name, isMetalDisconnected) }) g.It("All Nodes in a Custom Pool should have the PinnedImages in PIS [apigroup:machineconfiguration.openshift.io]", func() { - - // skip this test on single node platforms + // Skip this test on single node and two-node platforms since custom MCPs are not supported + // for clusters with only a master MCP skipOnSingleNodeTopology(oc) + skipOnTwoNodeTopology(oc) + // Get the MCP & PIS fixtures needed for this test pisFixture := customMCPpinnedImageSetFixture mcpFixture := customMCPFixture + // Get the PIS from the PIS fixture pis, err := getPISFromFixture(pisFixture) - o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting PIS from fixture `%v`: %v", pisFixture, err)) pisDiverged := false + // Create kube clients & MC clientset for test kubeClient, err := kubernetes.NewForConfig(oc.KubeFramework().ClientConfig()) - o.Expect(err).NotTo(o.HaveOccurred(), "Get KubeClient") - + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting kube client: %v", err)) clientSet, err := mcClient.NewForConfig(oc.KubeFramework().ClientConfig()) - o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting client set: %v", err)) // Create custom MCP defer deleteMCP(oc, "custom") err = oc.Run("apply").Args("-f", mcpFixture).Execute() - o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error creating MCP `custom`: %v", pisFixture)) - // Add node to pool + // Add node to custom MCP & wait for the node to be ready in the MCP optedNodes, err := addWorkerNodesToCustomPool(oc, kubeClient, 1, "custom") - o.Expect(err).NotTo(o.HaveOccurred(), "Label node") - defer waitTillNodeReadyWithConfig(kubeClient, optedNodes[0]) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error adding node to `custom` MCP: %v", err)) + defer waitTillNodeReadyWithConfig(kubeClient, optedNodes[0], workerConfigPrefix) defer unlabelNode(oc, optedNodes[0]) + framework.Logf("Waiting for `%v` node to be ready in `custom` MCP.", optedNodes[0]) + waitTillNodeReadyWithConfig(kubeClient, optedNodes[0], customConfigPrefix) + // Handle disconnected metal cluster environment isMetalDisconnected := false if IsMetal(oc) && IsDisconnected(oc, optedNodes[0]) { isMetalDisconnected = true @@ -163,6 +183,7 @@ var _ = g.Describe("[Suite:openshift/machine-config-operator/disruptive][Suite:o pisDiverged = true } + // Get images defined in PIS var pinnedImages []string for _, img := range pis.Spec.PinnedImages { pinnedImages = append(pinnedImages, string(img.Name)) @@ -174,35 +195,38 @@ var _ = g.Describe("[Suite:openshift/machine-config-operator/disruptive][Suite:o err = applyPIS(oc, pisFixture, pis, pisDiverged) o.Expect(err).NotTo(o.HaveOccurred(), "Applied PIS") + // Test the PIS apply & corresponding MCN updates to ensure the PIS application is successful. SimplePISTest(oc, kubeClient, clientSet, true, pis.Name, isMetalDisconnected) }) g.It("All Nodes in a standard Pool should have the PinnedImages PIS [apigroup:machineconfiguration.openshift.io]", func() { + // Create kube client and client set for test + kubeClient, err := kubernetes.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting kube client: %v", err)) + clientSet, err := mcClient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting client set: %v", err)) - // Since the node in a SNO cluster is a part of the master MCP, the PIS for this test on a - // single node topology should target `master`. + // Get the PIS for this test. For clusters with only a master MCP with nodes, use a PIS + // targeting `master`, otherwise use a PIS targeting `worker`. pisFixture := pinnedImageSetFixture - if IsSingleNode(oc) { + mcpsToTest := GetRolesToTest(oc, clientSet) + mcpToTest := "worker" + if len(mcpsToTest) == 1 && mcpsToTest[0] == "master" { pisFixture = masterPinnedImageSetFixture } - pis, err := getPISFromFixture(pisFixture) - o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting PIS from fixture `%v`: %v", pisFixture, err)) pisDiverged := false - kubeClient, err := kubernetes.NewForConfig(oc.KubeFramework().ClientConfig()) - o.Expect(err).NotTo(o.HaveOccurred(), "Get KubeClient") - - clientSet, err := mcClient.NewForConfig(oc.KubeFramework().ClientConfig()) - o.Expect(err).NotTo(o.HaveOccurred()) - + // Get the nodes targeted by the PIS var optedNodes []string - nodes, err := kubeClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labels.Set{"node-role.kubernetes.io/worker": ""}).String()}) + nodes, err := kubeClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labels.Set{fmt.Sprintf("node-role.kubernetes.io/%s", mcpToTest): ""}).String()}) o.Expect(err).NotTo(o.HaveOccurred(), "Getting nodes from standard pool") for _, node := range nodes.Items { optedNodes = append(optedNodes, node.Name) } + // Handle disconnected metal cluster environment isMetalDisconnected := false if IsMetal(oc) && IsDisconnected(oc, optedNodes[0]) { isMetalDisconnected = true @@ -217,6 +241,7 @@ var _ = g.Describe("[Suite:openshift/machine-config-operator/disruptive][Suite:o pisDiverged = true } + // Get images defined in PIS var pinnedImages []string for _, img := range pis.Spec.PinnedImages { pinnedImages = append(pinnedImages, string(img.Name)) @@ -228,73 +253,83 @@ var _ = g.Describe("[Suite:openshift/machine-config-operator/disruptive][Suite:o err = applyPIS(oc, pisFixture, pis, pisDiverged) o.Expect(err).NotTo(o.HaveOccurred(), "Applied PIS") + // Test the PIS apply & corresponding MCN updates to ensure the PIS application is successful. SimplePISTest(oc, kubeClient, clientSet, true, pis.Name, isMetalDisconnected) }) g.It("Invalid PIS leads to degraded MCN in a standard Pool [apigroup:machineconfiguration.openshift.io]", func() { + // Create kube client and client set for test + kubeClient, err := kubernetes.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting kube client: %v", err)) + clientSet, err := mcClient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting client set: %v", err)) - // Since the node in a SNO cluster is a part of the master MCP, the PIS for this test on a - // single node topology should target `master`. + // Get the PIS for this test. For clusters with only a master MCP with nodes, use a PIS + // targeting `master`, otherwise use a PIS targeting `worker`. pisFixture := invalidPinnedImageSetFixture - if IsSingleNode(oc) { + mcpsToTest := GetRolesToTest(oc, clientSet) + if len(mcpsToTest) == 1 && mcpsToTest[0] == "master" { pisFixture = masterInvalidPinnedImageSetFixture } - pis, err := getPISFromFixture(pisFixture) - o.Expect(err).NotTo(o.HaveOccurred()) - - kubeClient, err := kubernetes.NewForConfig(oc.KubeFramework().ClientConfig()) - o.Expect(err).NotTo(o.HaveOccurred(), "Get KubeClient") - - clientSet, err := mcClient.NewForConfig(oc.KubeFramework().ClientConfig()) - o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting PIS from fixture `%v`: %v", pisFixture, err)) + framework.Logf("Using PIS `%v` for this test.", pis.Name) // Apply PIS defer deletePIS(oc, pis.Name) err = oc.Run("apply").Args("-f", pisFixture).Execute() - o.Expect(err).NotTo(o.HaveOccurred(), "Applied PIS") + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error applying PIS `%v`: %v", pisFixture, err)) + // Test the PIS apply & corresponding MCN updates to ensure the PIS application is not + // successful & the PIS Degraded condition in the MCN is populated as `True`. SimplePISTest(oc, kubeClient, clientSet, false, pis.Name, false) }) g.It("Invalid PIS leads to degraded MCN in a custom Pool [apigroup:machineconfiguration.openshift.io]", func() { - - // skip this test on single node platforms + // Skip this test on single node and two-node platforms since custom MCPs are not supported + // for clusters with only a master MCP skipOnSingleNodeTopology(oc) + skipOnTwoNodeTopology(oc) + // Get the MCP & PIS fixtures needed for this test pisFixture := customInvalidPinnedImageSetFixture mcpFixture := customMCPFixture + // Get the PIS from the PIS fixture pis, err := getPISFromFixture(pisFixture) - o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting PIS from fixture `%v`: %v", pisFixture, err)) + // Create kube client and client set for test kubeClient, err := kubernetes.NewForConfig(oc.KubeFramework().ClientConfig()) - o.Expect(err).NotTo(o.HaveOccurred(), "Get KubeClient") - + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting kube client: %v", err)) clientSet, err := mcClient.NewForConfig(oc.KubeFramework().ClientConfig()) - o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting client set: %v", err)) // Create custom MCP defer deleteMCP(oc, "custom") err = oc.Run("apply").Args("-f", mcpFixture).Execute() - o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error creating MCP `custom`: %v", pisFixture)) - // Add node to pool + // Add node to custom MCP & wait for the node to be ready in the MCP optedNodes, err := addWorkerNodesToCustomPool(oc, kubeClient, 1, "custom") - o.Expect(err).NotTo(o.HaveOccurred(), "Label node") - defer waitTillNodeReadyWithConfig(kubeClient, optedNodes[0]) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error adding node to `custom` MCP: %v", err)) + defer waitTillNodeReadyWithConfig(kubeClient, optedNodes[0], workerConfigPrefix) defer unlabelNode(oc, optedNodes[0]) + framework.Logf("Waiting for `%v` node to be ready in `custom` MCP.", optedNodes[0]) + waitTillNodeReadyWithConfig(kubeClient, optedNodes[0], customConfigPrefix) // Apply PIS defer deletePIS(oc, pis.Name) err = oc.Run("apply").Args("-f", pisFixture).Execute() - o.Expect(err).NotTo(o.HaveOccurred(), "Applied PIS") + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error applying PIS `%v`: %v", pisFixture, err)) + // Test the PIS apply & corresponding MCN updates to ensure the PIS application is not + // successful & the PIS Degraded condition in the MCN is populated as `True`. SimplePISTest(oc, kubeClient, clientSet, false, pis.Name, false) }) - }) +// `applyPIS` runs the oc command necessary for applying a provided PIS. func applyPIS(oc *exutil.CLI, pisFixture string, pis *mcfgv1.PinnedImageSet, pisDiverged bool) error { if pisDiverged { yamlData, err := yaml.Marshal(&pis) @@ -314,14 +349,20 @@ func applyPIS(oc *exutil.CLI, pisFixture string, pis *mcfgv1.PinnedImageSet, pis return nil } +// `addWorkerNodesToCustomPool` labels the desired number of worker nodes with the MCP role +// selector so that the nodes become part of the desired custom MCP func addWorkerNodesToCustomPool(oc *exutil.CLI, kubeClient *kubernetes.Clientset, numberOfNodes int, customMCP string) ([]string, error) { + // Get the worker nodes nodes, err := kubeClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labels.Set{"node-role.kubernetes.io/worker": ""}).String()}) if err != nil { return nil, err } + // Return an error if there are less worker nodes in the cluster than the desired number of nodes to add to the custom MCP if len(nodes.Items) < numberOfNodes { return nil, fmt.Errorf("Node in Worker MCP %d < Number of nodes needed in %d MCP", len(nodes.Items), numberOfNodes) } + + // Label the nodes with the custom MCP role selector var optedNodes []string for node_i := 0; node_i < numberOfNodes; node_i++ { err = oc.AsAdmin().Run("label").Args("node", nodes.Items[node_i].Name, fmt.Sprintf("node-role.kubernetes.io/%s=", customMCP)).Execute() @@ -333,79 +374,92 @@ func addWorkerNodesToCustomPool(oc *exutil.CLI, kubeClient *kubernetes.Clientset return optedNodes, nil } +// `GCPISTest` completes the body of a PIS test including the garbage collection step func GCPISTest(oc *exutil.CLI, kubeClient *kubernetes.Clientset, clientSet *mcClient.Clientset, success bool, nodeName, customGcKCFixture, gcImage, pisName string, isMetalDisconnected bool) { - // Apply KC to Pool defer deleteKC(oc, "custom-gc-config") err := oc.Run("apply").Args("-f", customGcKCFixture).Execute() - o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error applying garbage collection kubelet config: %s", err)) + // Wait for the node to reboot & for the garbage collection to complete waitForReboot(kubeClient, nodeName) waitTillImageGC(oc, nodeName, gcImage) + // Set overall timeout for PIS application validaiton waitTime := time.Minute * 20 ctx, cancel := context.WithTimeout(context.Background(), waitTime) defer cancel() + // Check that the PIS conditions are still met err = waitForPISStatusX(ctx, oc, kubeClient, clientSet, pisName, success, isMetalDisconnected) - o.Expect(err).NotTo(o.HaveOccurred(), "Checking status of PIS") + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error checking status of PIS `%s`: %v", pisName, err)) } +// `GCPISTest` completes the body of a PIS test, validating that the conditions are met for cases +// where the PIS was expected to be successful and cases where the PIS applied was invalid func SimplePISTest(oc *exutil.CLI, kubeClient *kubernetes.Clientset, clientSet *mcClient.Clientset, success bool, pisName string, isMetalDisconnected bool) { - + // Set overall timeout for PIS application validaiton waitTime := time.Minute * 20 ctx, cancel := context.WithTimeout(context.Background(), waitTime) defer cancel() + // Check that the PIS conditions are met, depending on the expected validity of the PIS err := waitForPISStatusX(ctx, oc, kubeClient, clientSet, pisName, success, isMetalDisconnected) - o.Expect(err).NotTo(o.HaveOccurred(), "Checking status of PIS") + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error checking status of PIS `%s`: %v", pisName, err)) } -func detectXCondition(oc *exutil.CLI, node corev1.Node, mcn *mcfgv1.MachineConfigNode, appliedPIS *mcfgv1.PinnedImageSet, detectingSuccess bool, isMetalDisconnected bool) (bool, bool, error) { - if detectingSuccess { - if isMetalDisconnected { - crictlStatus, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, node.Name, "openshift-machine-config-operator", "crictl", "inspecti", emptyImagePin) - if err != nil { - return false, false, fmt.Errorf("failed to execute `crictl inspecti %s` on node %s: %w", emptyImagePin, node.Name, err) - } - if !strings.Contains(crictlStatus, "imageSpec") { - return false, false, fmt.Errorf("Image %s not present on node %s: %w", emptyImagePin, node.Name, err) - } - return true, false, nil +// `detectXCondition` checks if a valid PIS has been successfully applied. For disconnected metal +// environments, this checks that the `localhost.localdomain/emptyimagepin` image spec is present +// on the node. For all other cluster types, this checks that the MCN conditions are populating as +// expected and inspects the node to confirm each desired image is pinned. +func detectXCondition(oc *exutil.CLI, node corev1.Node, mcn *mcfgv1.MachineConfigNode, appliedPIS *mcfgv1.PinnedImageSet, isMetalDisconnected bool) (bool, bool, error) { + // Handle disconnected metal clusters + if isMetalDisconnected { + crictlStatus, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, node.Name, "openshift-machine-config-operator", "crictl", "inspecti", emptyImagePin) + if err != nil { + return false, false, fmt.Errorf("failed to execute `crictl inspecti %s` on node %s: %w", emptyImagePin, node.Name, err) } + if !strings.Contains(crictlStatus, "imageSpec") { + return false, false, fmt.Errorf("Image %s not present on node %s: %w", emptyImagePin, node.Name, err) + } + return true, false, nil + } - for _, cond := range mcn.Status.Conditions { - if mcfgv1.StateProgress(cond.Type) == mcfgv1.MachineConfigNodePinnedImageSetsDegraded && cond.Status == "True" { - return false, true, fmt.Errorf("PIS degraded for MCN %s with reason: %s and message: %s", mcn.Name, cond.Reason, cond.Message) - } + // Loop through the MCN conditions, ensuring the "PinnedImageSetsDegraded" condition is not + // `True`` and that the "PinnedImageSetsProgressing" is `True`. + for _, cond := range mcn.Status.Conditions { + if mcfgv1.StateProgress(cond.Type) == mcfgv1.MachineConfigNodePinnedImageSetsDegraded && cond.Status == "True" { + return false, true, fmt.Errorf("PIS degraded for MCN %s with reason: %s and message: %s", mcn.Name, cond.Reason, cond.Message) + } - if mcfgv1.StateProgress(cond.Type) == mcfgv1.MachineConfigNodePinnedImageSetsProgressing && cond.Status == "True" { - return false, false, nil - } + if mcfgv1.StateProgress(cond.Type) == mcfgv1.MachineConfigNodePinnedImageSetsProgressing && cond.Status == "True" { + return false, false, nil } - for _, img := range appliedPIS.Spec.PinnedImages { - crictlStatus, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, node.Name, "openshift-machine-config-operator", "crictl", "inspecti", string(img.Name)) - if err != nil { - return false, false, fmt.Errorf("failed to execute `crictl inspecti %s` on node %s: %w", img.Name, node.Name, err) - } - if !strings.Contains(crictlStatus, "imageSpec") { - return false, false, fmt.Errorf("Image %s not present on node %s: %w", img.Name, node.Name, err) - } + } + + // Loop though the images defined in the PIS & ensure they are pinned on the associated nodes + for _, img := range appliedPIS.Spec.PinnedImages { + crictlStatus, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, node.Name, "openshift-machine-config-operator", "crictl", "inspecti", string(img.Name)) + if err != nil { + return false, false, fmt.Errorf("failed to execute `crictl inspecti %s` on node %s: %w", img.Name, node.Name, err) } - return true, false, nil - } else { - for _, cond := range mcn.Status.Conditions { - if mcfgv1.StateProgress(cond.Type) == mcfgv1.MachineConfigNodePinnedImageSetsDegraded && cond.Status == "True" { - continue - } - if mcfgv1.StateProgress(cond.Type) == mcfgv1.MachineConfigNodePinnedImageSetsProgressing && cond.Status == "True" { - return false, false, nil - } + if !strings.Contains(crictlStatus, "imageSpec") { + return false, false, fmt.Errorf("Image %s not present on node %s: %w", img.Name, node.Name, err) } - return true, false, nil } + return true, false, nil } +// `waitForPISStatusX` defines the retry function to use to validate if the PIS application has +// completed as intended. The steps include: +// 1. Wait for the PIS object to create +// 2. Get the MCP targetted by the PIS +// 3. Get the nodes part of the MCP from step 2 +// 4. Loop through the nodes to see if the desired conditions are met +// - If the PIS is expected to be invalid, it checks that the degrade condition in the +// corresponding MCN becomes "true" +// - If the PIS is expected to be valid, it checks that the desired images are pinned on the +// corresponding nodes and that the MCN conditions properly report the success func waitForPISStatusX(ctx context.Context, oc *exutil.CLI, kubeClient *kubernetes.Clientset, clientSet *mcClient.Clientset, pisName string, success bool, isMetalDisconnected bool) error { return wait.PollUntilContextCancel(ctx, time.Second, true, func(ctx context.Context) (done bool, err error) { // Wait for PIS object to get created @@ -414,33 +468,44 @@ func waitForPISStatusX(ctx context.Context, oc *exutil.CLI, kubeClient *kubernet return false, fmt.Errorf("PIS Object not created yet: %w", err) } + // Get the MCP targeted by the PIS pool, err := clientSet.MachineconfigurationV1().MachineConfigPools().Get(ctx, appliedPIS.Labels["machineconfiguration.openshift.io/role"], metav1.GetOptions{}) if err != nil { return false, fmt.Errorf("failed to get MCP mentioned in PIS: %w", err) } + // Get the nodes from the MCP nodes, err := getNodesForPool(ctx, oc, kubeClient, pool) if err != nil { return false, fmt.Errorf("failed to get Nodes from MCP %q mentioned in PIS: %w", pool.Name, err) } + // Loop through nodes to see if the conditions required to consider the pis apply "done" are met doneNodes := 0 for _, node := range nodes.Items { - mcn, err := clientSet.MachineconfigurationV1().MachineConfigNodes().Get(ctx, node.Name, metav1.GetOptions{}) - if err != nil { - return false, fmt.Errorf("failed to get mcn: %w", err) - } - toContinue, isFatal, err := detectXCondition(oc, node, mcn, appliedPIS, success, isMetalDisconnected) - if !toContinue { - if isFatal { - return true, err - } else { - if err != nil { - framework.Logf("Retrying PIS Status with non-fatal error: %s", err) + if !success { // handle case when we are expecting the PIS application to fail, so the PIS degraded condition should become true + framework.Logf("Waiting for PinnedImageSetsDegraded=True") + conditionMet, err := WaitForMCNConditionStatus(clientSet, node.Name, mcfgv1.MachineConfigNodePinnedImageSetsDegraded, metav1.ConditionTrue, 2*time.Minute, 5*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for PinnedImageSetsDegraded=True: %v", err)) + o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect PinnedImageSetsDegraded=True.") + } else { // handle cases where we are expecting the PIS application to succeed + mcn, err := clientSet.MachineconfigurationV1().MachineConfigNodes().Get(ctx, node.Name, metav1.GetOptions{}) + if err != nil { + return false, fmt.Errorf("failed to get mcn: %w", err) + } + toContinue, isFatal, err := detectXCondition(oc, node, mcn, appliedPIS, isMetalDisconnected) + if !toContinue { + if isFatal { + return true, err + } else { + if err != nil { + framework.Logf("Retrying PIS Status with non-fatal error: %s", err) + } + return false, nil } - return false, nil } } + // If we make it here, it means no fatal or non-fatal error has occured & the PIS application conditions were met doneNodes += 1 } if doneNodes == len(nodes.Items) { @@ -451,9 +516,12 @@ func waitForPISStatusX(ctx context.Context, oc *exutil.CLI, kubeClient *kubernet }) } +// `deletePinnedImages` loops through each node targeted by a PIS and removes each image defined in +// the provided array of images, which represents the images defined in the previously applied PIS. func deletePinnedImages(oc *exutil.CLI, kubeClient *kubernetes.Clientset, clientSet *mcClient.Clientset, optedNodes []string, images []string, isMetalDisconnected bool) { for _, nodeName := range optedNodes { for _, img := range images { + // Handle disconnected metal cluster envs if isMetalDisconnected { _, _ = exutil.DebugNodeRetryWithOptionsAndChroot(oc, nodeName, "openshift-machine-config-operator", "podman", "rmi", emptyImagePin) break @@ -463,6 +531,7 @@ func deletePinnedImages(oc *exutil.CLI, kubeClient *kubernetes.Clientset, client } } +// `waitTillImageDownload` waits for up to 10 minutes for the desired image to download onto a node func waitTillImageDownload(oc *exutil.CLI, nodeName, imageName string) { o.Eventually(func() bool { crictlstatus, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, nodeName, "openshift-machine-config-operator", "crictl", "inspecti", imageName) @@ -477,6 +546,7 @@ func waitTillImageDownload(oc *exutil.CLI, nodeName, imageName string) { }, 10*time.Minute, 10*time.Second).Should(o.BeTrue(), "Timed out waiting for Node '%s' to download image '%s'.", nodeName, imageName) } +// `waitTillImageGC` waits for up to 10 minutes for the garbage collection of the desired image on the input node func waitTillImageGC(oc *exutil.CLI, nodeName, imageName string) { o.Eventually(func() bool { _, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, nodeName, "openshift-machine-config-operator", "crictl", "inspecti", imageName) @@ -488,6 +558,8 @@ func waitTillImageGC(oc *exutil.CLI, nodeName, imageName string) { }, 10*time.Minute, 10*time.Second).Should(o.BeTrue(), "Timed out waiting for Node '%s' to garbage collect '%s'.", nodeName, imageName) } +// `waitForReboot` waits for up to 5 minutes for the input node to start a reboot and then up to 15 +// minutes for the node to complete its reboot. func waitForReboot(kubeClient *kubernetes.Clientset, nodeName string) { o.Eventually(func() bool { node, err := kubeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) @@ -516,37 +588,48 @@ func waitForReboot(kubeClient *kubernetes.Clientset, nodeName string) { }, 15*time.Minute, 10*time.Second).Should(o.BeTrue(), "Timed out waiting for Node '%s' to finish reboot.", nodeName) } -func waitTillNodeReadyWithConfig(kubeClient *kubernetes.Clientset, nodeName string) { +// `waitTillNodeReadyWithConfig` loops for up to 5 minutes to check whether the input node reaches +// the desired rendered config version. The config version is determined by checking if the config +// version prefix matches the stardard format of `rendered-`. +func waitTillNodeReadyWithConfig(kubeClient *kubernetes.Clientset, nodeName, currentConfigPrefix string) { o.Eventually(func() bool { node, err := kubeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) if err != nil { framework.Logf("Failed to grab Node '%s', error :%s", nodeName, err) return false } - if strings.Contains(node.Annotations["machineconfiguration.openshift.io/currentConfig"], "rendered-worker") && node.Annotations["machineconfiguration.openshift.io/state"] == "Done" { - framework.Logf("Node '%s' has rendered-worker config", nodeName) + currentConfig := node.Annotations["machineconfiguration.openshift.io/currentConfig"] + if strings.Contains(currentConfig, currentConfigPrefix) && node.Annotations["machineconfiguration.openshift.io/state"] == "Done" { + framework.Logf("Node '%s' has current config `%v`", nodeName, currentConfig) return true } + framework.Logf("Node '%s' has is not yet ready and has the current config `%v`", nodeName, currentConfig) return false - }, 15*time.Minute, 10*time.Second).Should(o.BeTrue(), "Timed out waiting for Node '%s' to have rendered-worker config.", nodeName) + }, 5*time.Minute, 10*time.Second).Should(o.BeTrue(), "Timed out waiting for Node '%s' to have rendered-worker config.", nodeName) } +// `unlabelNode` removes the `node-role.kubernetes.io/custom` label from the node with the input +// name. This triggers the node's removal from the custom MCP named `custom`. func unlabelNode(oc *exutil.CLI, name string) error { return oc.AsAdmin().Run("label").Args("node", name, "node-role.kubernetes.io/custom-").Execute() } +// `deleteKC` deletes the KubeletConfig with the input name func deleteKC(oc *exutil.CLI, name string) error { return oc.Run("delete").Args("kubeletconfig", name).Execute() } +// `deleteMCP` deletes the MachineConfigPool with the input name func deleteMCP(oc *exutil.CLI, name string) error { return oc.Run("delete").Args("mcp", name).Execute() } +// `deletePIS` deletes the PinnedImageSet with the input name func deletePIS(oc *exutil.CLI, name string) error { return oc.Run("delete").Args("pinnedimageset", name).Execute() } +// `getPISFromFixture` extracts the PinnedImageSet object as defined in the the provided fixture func getPISFromFixture(path string) (*mcfgv1.PinnedImageSet, error) { data, err := ioutil.ReadFile(path) if err != nil {