diff --git a/test/e2e/config_test.go b/test/e2e/config_test.go index 12535a708..6cccb401e 100644 --- a/test/e2e/config_test.go +++ b/test/e2e/config_test.go @@ -63,6 +63,7 @@ var ( lvmOperatorUninstall bool scheme = runtime.NewScheme() crClient crclient.Client + config *rest.Config deserializer runtime.Decoder contentTester *PodRunner summaryFile string @@ -87,8 +88,10 @@ func init() { utilruntime.Must(monitoringv1.AddToScheme(scheme)) utilruntime.Must(apiextensionsv1.AddToScheme(scheme)) + var err error + kubeconfig := os.Getenv("KUBECONFIG") - config, err := getKubeconfig(kubeconfig) + config, err = getKubeconfig(kubeconfig) if err != nil { panic(fmt.Sprintf("Failed to set kubeconfig: %v", err)) } diff --git a/test/e2e/disk_setup_test.go b/test/e2e/disk_setup_test.go index 7e8a2323e..9297367da 100644 --- a/test/e2e/disk_setup_test.go +++ b/test/e2e/disk_setup_test.go @@ -62,7 +62,7 @@ func getNodeEnvironmentFromNodeList(nodeList *corev1.NodeList) ([]NodeDisks, err Node: node.GetName(), Disks: []Disk{ {Size: 10}, - {Size: 20}, + {Size: 30}, }, AWSNodeInfo: nodeInfo, } diff --git a/test/e2e/helper_test.go b/test/e2e/helper_test.go index c53e94b7c..29557f64f 100644 --- a/test/e2e/helper_test.go +++ b/test/e2e/helper_test.go @@ -25,7 +25,7 @@ import ( . "github.com/onsi/gomega" "k8s.io/apimachinery/pkg/api/meta" - k8sv1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" storagev1 "k8s.io/api/storage/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -118,7 +118,7 @@ func DeleteResource(ctx context.Context, obj client.Object) { } func lvmNamespaceCleanup(ctx context.Context) { - DeleteResource(ctx, &k8sv1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: testNamespace}}) + DeleteResource(ctx, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: testNamespace}}) } // afterTestSuiteCleanup is the function called to tear down the test environment. @@ -171,7 +171,7 @@ func createNamespace(ctx context.Context, namespace string) { // annotation required for workload partitioning annotations["workload.openshift.io/allowed"] = "management" - ns := &k8sv1.Namespace{ + ns := &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ Name: namespace, Annotations: annotations, @@ -181,6 +181,12 @@ func createNamespace(ctx context.Context, namespace string) { CreateResource(ctx, ns) } +func IsSNO(ctx context.Context) bool { + nodeList := &corev1.NodeList{} + Expect(crClient.List(ctx, nodeList, client.HasLabels{labelNodeRoleWorker})).To(Succeed()) + return len(nodeList.Items) == 1 +} + // DeleteResources in concurrent rows with sequential elements in each row // for 3 rows with 3 objects each there will be 3 goroutines running deletions for // 3 elements each diff --git a/test/e2e/lvm_cluster_test.go b/test/e2e/lvm_cluster_test.go index 72301a553..360e9aa40 100644 --- a/test/e2e/lvm_cluster_test.go +++ b/test/e2e/lvm_cluster_test.go @@ -17,13 +17,17 @@ limitations under the License. package e2e import ( + "context" "fmt" . "github.com/onsi/ginkgo/v2" ginkgotypes "github.com/onsi/ginkgo/v2/types" . "github.com/onsi/gomega" + k8sv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/openshift/lvm-operator/v4/api/v1alpha1" ) @@ -90,4 +94,102 @@ func lvmClusterTest() { VerifyLVMSSetup(ctx, cluster) }) }) + + Describe("Device Removal", Serial, func() { + BeforeEach(func(ctx SpecContext) { + if !IsSNO(ctx) { + Skip("Device removal tests run only on SNO instances") + } + }) + + It("should remove devices from volume group successfully", func(ctx SpecContext) { + // Configure cluster with multiple devices for removal testing + cluster.Spec.Storage.DeviceClasses[0].DeviceSelector = &v1alpha1.DeviceSelector{ + Paths: []v1alpha1.DevicePath{"/dev/nvme3n1", "/dev/nvme4n1"}, + } + + By("Creating cluster with multiple devices") + CreateResource(ctx, cluster) + VerifyLVMSSetup(ctx, cluster) + + By("Executing pvmove to migrate data from device to be removed") + err := executePvmoveOnVGManagerPods(ctx, "/dev/nvme4n1", "/dev/nvme3n1") + Expect(err).NotTo(HaveOccurred(), "pvmove should succeed before device removal") + + By("Removing one device from the volume group") + // Update cluster to remove /dev/nvme1n1 + cluster.Spec.Storage.DeviceClasses[0].DeviceSelector.Paths = []v1alpha1.DevicePath{ + "/dev/nvme3n1", + } + + err = crClient.Update(ctx, cluster) + Expect(err).NotTo(HaveOccurred()) + + By("Verifying device removal completed successfully") + Eventually(func(ctx SpecContext) bool { + return validateDeviceRemovalSuccess(ctx, cluster, 1) + }, 5*timeout, interval).WithContext(ctx).Should(BeTrue()) + }) + + It("should handle optional device removal", func(ctx SpecContext) { + // Configure cluster with both required and optional devices + cluster.Spec.Storage.DeviceClasses[0].DeviceSelector = &v1alpha1.DeviceSelector{ + Paths: []v1alpha1.DevicePath{"/dev/nvme4n1"}, + OptionalPaths: []v1alpha1.DevicePath{"/dev/nvme1n1", "/dev/nvme3n1"}, + } + + By("Creating cluster with required and optional devices") + CreateResource(ctx, cluster) + VerifyLVMSSetup(ctx, cluster) + + By("Executing pvmove to migrate data from optional devices before removal") + err := executePvmoveOnVGManagerPods(ctx, "/dev/nvme3n1", "/dev/nvme4n1") + Expect(err).NotTo(HaveOccurred(), "pvmove should succeed for optional device") + + By("Removing optional devices") + cluster.Spec.Storage.DeviceClasses[0].DeviceSelector.OptionalPaths = []v1alpha1.DevicePath{} + + err = crClient.Update(ctx, cluster) + Expect(err).NotTo(HaveOccurred(), "Should allow removal of optional devices") + + By("Verifying cluster remains Ready with required device only") + Eventually(func(ctx SpecContext) bool { + return validateClusterReady(ctx, cluster) + }, timeout, interval).WithContext(ctx).Should(BeTrue()) + }) + }) +} + +// executePvmoveOnVGManagerPods executes pvmove command on all vg-manager pods +func executePvmoveOnVGManagerPods(ctx context.Context, sourceDevice, targetDevice string) error { + // Get all vg-manager pods + podList := &k8sv1.PodList{} + err := crClient.List(ctx, podList, &client.ListOptions{ + Namespace: installNamespace, + LabelSelector: labels.Set{ + "app.kubernetes.io/name": "vg-manager", + }.AsSelector(), + }) + if err != nil { + return fmt.Errorf("failed to list vg-manager pods: %w", err) + } + + // Create pod runner for command execution + podRunner, err := NewPodRunner(config, scheme) + if err != nil { + return fmt.Errorf("failed to create pod runner: %w", err) + } + + // Execute pvmove on each vg-manager pod that has the device + for _, pod := range podList.Items { + // Execute pvmove to migrate data away from the source device + pvmoveCmd := fmt.Sprintf("nsenter -m -u -i -n -p -t 1 pvmove %s %s", sourceDevice, targetDevice) + stdout, stderr, err := podRunner.ExecCommandInFirstPodContainer(ctx, &pod, pvmoveCmd) + if err != nil { + return fmt.Errorf("pvmove failed on pod %s (node: %s): %w\nstdout: %s\nstderr: %s", + pod.Name, pod.Spec.NodeName, err, stdout, stderr) + } + } + + return nil } diff --git a/test/e2e/pod_runner_test.go b/test/e2e/pod_runner_test.go index 6f4573c20..6917e9d2a 100644 --- a/test/e2e/pod_runner_test.go +++ b/test/e2e/pod_runner_test.go @@ -97,10 +97,10 @@ func (t *PodRunner) ExecWithOptions(ctx context.Context, options ExecOptions) (s return strings.TrimSpace(stdout.String()), strings.TrimSpace(stderr.String()), err } -func (t *PodRunner) ExecCommand(ctx context.Context, pod, container string, command []string) (string, string, error) { +func (t *PodRunner) ExecCommand(ctx context.Context, pod, container, namespace string, command []string) (string, string, error) { return t.ExecWithOptions(ctx, ExecOptions{ Command: command, - Namespace: testNamespace, + Namespace: namespace, PodName: pod, ContainerName: container, Stdin: nil, @@ -111,8 +111,8 @@ func (t *PodRunner) ExecCommand(ctx context.Context, pod, container string, comm } // ExecShellCommand executes the command in container through a shell. -func (t *PodRunner) ExecShellCommand(ctx context.Context, pod, container string, command string) (string, string, error) { - stdout, stderr, err := t.ExecCommand(ctx, pod, container, []string{"sh", "-c", command}) +func (t *PodRunner) ExecShellCommand(ctx context.Context, pod, container string, namespace string, command string) (string, string, error) { + stdout, stderr, err := t.ExecCommand(ctx, pod, container, namespace, []string{"sh", "-c", command}) if err != nil { return stdout, stderr, fmt.Errorf("failed to execute shell command in pod %v, container %v: %v", pod, container, err) @@ -125,7 +125,7 @@ func (t *PodRunner) ExecCommandInFirstPodContainer(ctx context.Context, pod *k8s if len(pod.Spec.Containers) < 1 { return "", "", fmt.Errorf("found %v containers, but expected at least 1", pod.Spec.Containers) } - return t.ExecShellCommand(ctx, pod.Name, pod.Spec.Containers[0].Name, cmd) + return t.ExecShellCommand(ctx, pod.Name, pod.Spec.Containers[0].Name, pod.Namespace, cmd) } // WriteDataInPod writes the data to pod. diff --git a/test/e2e/validation_test.go b/test/e2e/validation_test.go index 075ad130d..75eca67a5 100644 --- a/test/e2e/validation_test.go +++ b/test/e2e/validation_test.go @@ -61,6 +61,7 @@ func validateLVMCluster(ctx context.Context, cluster *v1alpha1.LVMCluster) bool return err } if currentCluster.Status.State == v1alpha1.LVMStatusReady { + GinkgoLogr.V(0).Info("Current LVM cluster devices are", "devices", currentCluster.Status.DeviceClassStatuses) return nil } return fmt.Errorf("cluster is not ready: %v", currentCluster.Status) @@ -369,3 +370,39 @@ func GenericGetItemsFromList(list client.ObjectList) ([]client.Object, error) { return result, nil } + +func validateDeviceRemovalSuccess(ctx context.Context, cluster *v1alpha1.LVMCluster, expectedDeviceCount int) bool { + vgStatus := getVGStatusForCluster(ctx, cluster) + return len(vgStatus.Devices) == expectedDeviceCount && vgStatus.Status == v1alpha1.VGStatusReady +} + +func validateClusterReady(ctx context.Context, cluster *v1alpha1.LVMCluster) bool { + vgStatus := getVGStatusForCluster(ctx, cluster) + return vgStatus.Status == v1alpha1.VGStatusReady +} + +func getVGStatusForCluster(ctx context.Context, cluster *v1alpha1.LVMCluster) v1alpha1.VGStatus { + currentCluster := &v1alpha1.LVMCluster{} + err := crClient.Get(ctx, client.ObjectKeyFromObject(cluster), currentCluster) + if err != nil { + return v1alpha1.VGStatus{} + } + + if len(currentCluster.Status.DeviceClassStatuses) == 0 { + return v1alpha1.VGStatus{} + } + + // Find the first device class (usually "vg1") + for _, deviceClassStatus := range currentCluster.Status.DeviceClassStatuses { + if len(deviceClassStatus.NodeStatus) > 0 { + // Return status from the first node with VG status + for _, nodeStatus := range deviceClassStatus.NodeStatus { + if nodeStatus.Name == lvmVolumeGroupName { + return nodeStatus.VGStatus + } + } + } + } + + return v1alpha1.VGStatus{} +}