diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index b39d889404..a2ed671ffb 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -83,6 +83,7 @@ rules: - machinepools - machinepools/status verbs: + - create - get - list - patch @@ -162,7 +163,6 @@ rules: - awsmanagedclusters - awsmanagedmachinepools - rosaclusters - - rosamachinepools verbs: - delete - get @@ -176,7 +176,6 @@ rules: - awsclusters/status - awsfargateprofiles/status - rosaclusters/status - - rosamachinepools/status verbs: - get - patch @@ -198,6 +197,7 @@ rules: - infrastructure.cluster.x-k8s.io resources: - awsmachines + - rosamachinepools verbs: - create - delete @@ -212,3 +212,14 @@ rules: - rosamachinepools/finalizers verbs: - update +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - rosamachinepools/status + verbs: + - create + - get + - list + - patch + - update + - watch diff --git a/controllers/rosacluster_controller.go b/controllers/rosacluster_controller.go index 2207180c17..8d228ba0f1 100644 --- a/controllers/rosacluster_controller.go +++ b/controllers/rosacluster_controller.go @@ -20,11 +20,17 @@ import ( "context" "fmt" + cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1" "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" + "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" @@ -35,9 +41,15 @@ import ( infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" rosacontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/v2/controlplane/rosa/api/v1beta2" expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/exp/utils" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" + stsservice "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/sts" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/rosa" "sigs.k8s.io/cluster-api-provider-aws/v2/util/paused" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/predicates" @@ -48,16 +60,20 @@ type ROSAClusterReconciler struct { client.Client Recorder record.EventRecorder WatchFilterValue string + NewStsClient func(cloud.ScopeUsage, cloud.Session, logger.Wrapper, runtime.Object) stsservice.STSClient + NewOCMClient func(ctx context.Context, rosaScope *scope.ROSAControlPlaneScope) (rosa.OCMClient, error) } // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=rosaclusters,verbs=get;list;watch;update;patch;delete // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=rosaclusters/status,verbs=get;update;patch // +kubebuilder:rbac:groups=controlplane.cluster.x-k8s.io,resources=rosacontrolplanes;rosacontrolplanes/status,verbs=get;list;watch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;create +// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=rosamachinepools;rosamachinepools/status,verbs=get;list;watch;create // +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create;update;patch;delete func (r *ROSAClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { - log := ctrl.LoggerFrom(ctx) + log := logger.FromContext(ctx) log.Info("Reconciling ROSACluster") // Fetch the ROSACluster instance @@ -70,11 +86,17 @@ func (r *ROSAClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return reconcile.Result{}, err } + if !rosaCluster.DeletionTimestamp.IsZero() { + log.Info("Deleting ROSACluster.") + return reconcile.Result{}, nil + } + // Fetch the Cluster. cluster, err := util.GetOwnerCluster(ctx, r.Client, rosaCluster.ObjectMeta) if err != nil { return reconcile.Result{}, err } + if cluster == nil { log.Info("Cluster Controller has not yet set OwnerRef") return reconcile.Result{}, nil @@ -111,6 +133,23 @@ func (r *ROSAClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return reconcile.Result{}, fmt.Errorf("failed to patch ROSACluster: %w", err) } + rosaScope, err := scope.NewROSAControlPlaneScope(scope.ROSAControlPlaneScopeParams{ + Client: r.Client, + Cluster: cluster, + ControlPlane: controlPlane, + ControllerName: "", + Logger: log, + NewStsClient: r.NewStsClient, + }) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed to create rosa controlplane scope: %w", err) + } + + err = r.syncROSAClusterNodePools(ctx, controlPlane, rosaScope) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed to sync ROSA cluster nodePools: %w", err) + } + log.Info("Successfully reconciled ROSACluster") return reconcile.Result{}, nil @@ -118,6 +157,8 @@ func (r *ROSAClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) func (r *ROSAClusterReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { log := logger.FromContext(ctx) + r.NewOCMClient = rosa.NewWrappedOCMClient + r.NewStsClient = scope.NewSTSClient rosaCluster := &expinfrav1.ROSACluster{} @@ -196,3 +237,132 @@ func (r *ROSAClusterReconciler) rosaControlPlaneToManagedCluster(log *logger.Log } } } + +// getRosMachinePools returns a map of RosaMachinePool names associatd with the cluster. +func (r *ROSAClusterReconciler) getRosaMachinePoolNames(ctx context.Context, cluster *clusterv1.Cluster) (map[string]bool, error) { + selectors := []client.ListOption{ + client.InNamespace(cluster.GetNamespace()), + client.MatchingLabels{ + clusterv1.ClusterNameLabel: cluster.GetName(), + }, + } + + rosaMachinePoolList := &expinfrav1.ROSAMachinePoolList{} + err := r.Client.List(ctx, rosaMachinePoolList, selectors...) + if err != nil { + return nil, err + } + + rosaMPNames := make(map[string]bool) + for _, rosaMP := range rosaMachinePoolList.Items { + rosaMPNames[rosaMP.Spec.NodePoolName] = true + } + + return rosaMPNames, nil +} + +// buildROSAMachinePool returns a ROSAMachinePool and its corresponding MachinePool. +func (r *ROSAClusterReconciler) buildROSAMachinePool(nodePoolName string, clusterName string, namespace string, nodePool *cmv1.NodePool) (*expinfrav1.ROSAMachinePool, *expclusterv1.MachinePool) { + rosaMPSpec := utils.NodePoolToRosaMachinePoolSpec(nodePool) + rosaMachinePool := &expinfrav1.ROSAMachinePool{ + TypeMeta: metav1.TypeMeta{ + APIVersion: expinfrav1.GroupVersion.String(), + Kind: "ROSAMachinePool", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: nodePoolName, + Namespace: namespace, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + }, + }, + Spec: rosaMPSpec, + } + machinePool := &expclusterv1.MachinePool{ + TypeMeta: metav1.TypeMeta{ + APIVersion: expclusterv1.GroupVersion.String(), + Kind: "MachinePool", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: nodePoolName, + Namespace: namespace, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + }, + }, + Spec: expclusterv1.MachinePoolSpec{ + ClusterName: clusterName, + Replicas: ptr.To(int32(1)), + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + ClusterName: clusterName, + Bootstrap: clusterv1.Bootstrap{ + DataSecretName: ptr.To(string("")), + }, + InfrastructureRef: corev1.ObjectReference{ + APIVersion: expinfrav1.GroupVersion.String(), + Kind: "ROSAMachinePool", + Name: rosaMachinePool.Name, + }, + }, + }, + }, + } + + return rosaMachinePool, machinePool +} + +// syncROSAClusterNodePools ensure every NodePool has a MachinePool and create a corresponding MachinePool if it does not exist. +func (r *ROSAClusterReconciler) syncROSAClusterNodePools(ctx context.Context, controlPlane *rosacontrolplanev1.ROSAControlPlane, rosaScope *scope.ROSAControlPlaneScope) error { + if controlPlane.Status.Ready { + if r.NewOCMClient == nil { + return fmt.Errorf("failed to create OCM client: NewOCMClient is nil") + } + + ocmClient, err := r.NewOCMClient(ctx, rosaScope) + if err != nil || ocmClient == nil { + return fmt.Errorf("failed to create OCM client: %w", err) + } + + // List the ROSA-HCP nodePools and MachinePools + nodePools, err := ocmClient.GetNodePools(rosaScope.ControlPlane.Status.ID) + if err != nil { + return fmt.Errorf("failed to get nodePools: %w", err) + } + + rosaMPNames, err := r.getRosaMachinePoolNames(ctx, rosaScope.Cluster) + if err != nil { + return fmt.Errorf("failed to get Rosa machinePool names: %w", err) + } + + // Ensure every NodePool has a MachinePool and create a corresponding MachinePool if it does not exist. + var errs []error + for _, nodePool := range nodePools { + // continue if nodePool is not in ready state. + if !rosa.IsNodePoolReady(nodePool) { + continue + } + // continue if nodePool exist + if rosaMPNames[nodePool.ID()] { + continue + } + // create ROSAMachinePool & MachinePool + rosaMachinePool, machinePool := r.buildROSAMachinePool(nodePool.ID(), rosaScope.Cluster.Name, rosaScope.Cluster.Namespace, nodePool) + + rosaScope.Logger.Info(fmt.Sprintf("Create ROSAMachinePool %s", rosaMachinePool.Name)) + if err = r.Client.Create(ctx, rosaMachinePool); err != nil { + errs = append(errs, err) + } + + rosaScope.Logger.Info(fmt.Sprintf("Create MachinePool %s", machinePool.Name)) + if err = r.Client.Create(ctx, machinePool); err != nil { + errs = append(errs, err) + } + } + + if len(errs) > 0 { + return kerrors.NewAggregate(errs) + } + } + return nil +} diff --git a/controllers/rosacluster_controller_test.go b/controllers/rosacluster_controller_test.go new file mode 100644 index 0000000000..0a7dd42c0b --- /dev/null +++ b/controllers/rosacluster_controller_test.go @@ -0,0 +1,302 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package controllers provides a way to reconcile ROSA resources. +package controllers + +import ( + "context" + "testing" + "time" + + "github.com/golang/mock/gomock" + . "github.com/onsi/gomega" + cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + rosacontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/v2/controlplane/rosa/api/v1beta2" + expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" + stsiface "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/sts" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/sts/mock_stsiface" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/rosa" + "sigs.k8s.io/cluster-api-provider-aws/v2/test/mocks" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/util/patch" +) + +func TestRosaClusterReconcile(t *testing.T) { + t.Run("Reconcile Rosa Cluster", func(t *testing.T) { + g := NewWithT(t) + ns, err := testEnv.CreateNamespace(ctx, "test-namespace") + g.Expect(err).ToNot(HaveOccurred()) + + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rosa-secret", + Namespace: ns.Name, + }, + Data: map[string][]byte{ + "ocmToken": []byte("secret-ocm-token-string"), + }, + } + + identity := &infrav1.AWSClusterControllerIdentity{ + ObjectMeta: metav1.ObjectMeta{ + Name: "default", + }, + Spec: infrav1.AWSClusterControllerIdentitySpec{ + AWSClusterIdentitySpec: infrav1.AWSClusterIdentitySpec{ + AllowedNamespaces: &infrav1.AllowedNamespaces{}, + }, + }, + } + identity.SetGroupVersionKind(infrav1.GroupVersion.WithKind("AWSClusterStaticIdentity")) + + rosaClusterName := "rosa-controlplane-1" + rosaControlPlane := &rosacontrolplanev1.ROSAControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Name: rosaClusterName, + Namespace: ns.Name, + }, + TypeMeta: metav1.TypeMeta{ + Kind: "ROSAControlPlane", + APIVersion: rosacontrolplanev1.GroupVersion.String(), + }, + Spec: rosacontrolplanev1.RosaControlPlaneSpec{ + RosaClusterName: rosaClusterName, + Subnets: []string{"subnet-0ac99a6230b408813", "subnet-1ac99a6230b408811"}, + AvailabilityZones: []string{"az-1", "az-2"}, + Network: &rosacontrolplanev1.NetworkSpec{ + MachineCIDR: "10.0.0.0/16", + PodCIDR: "10.128.0.0/14", + ServiceCIDR: "172.30.0.0/16", + }, + Region: "us-east-1", + Version: "4.19.20", + ChannelGroup: "stable", + RolesRef: rosacontrolplanev1.AWSRolesRef{}, + OIDCID: "oidcid1", + InstallerRoleARN: "arn1", + WorkerRoleARN: "arn2", + SupportRoleARN: "arn3", + CredentialsSecretRef: &corev1.LocalObjectReference{ + Name: secret.Name, + }, + VersionGate: "Acknowledge", + IdentityRef: &infrav1.AWSIdentityReference{ + Name: identity.Name, + Kind: infrav1.ControllerIdentityKind, + }, + }, + } + + rosaCluster := &expinfrav1.ROSACluster{ + TypeMeta: metav1.TypeMeta{ + Kind: "ROSACluster", + APIVersion: expinfrav1.GroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "rosa-cluster", + Namespace: ns.Name, + }, + } + + capiCluster := &clusterv1.Cluster{ + TypeMeta: metav1.TypeMeta{ + Kind: "Cluster", + APIVersion: clusterv1.GroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "capi-cluster-1", + Namespace: ns.Name, + UID: types.UID("capi-cluster-1"), + }, + Spec: clusterv1.ClusterSpec{ + InfrastructureRef: &corev1.ObjectReference{ + Name: rosaCluster.Name, + Kind: "ROSACluster", + APIVersion: expinfrav1.GroupVersion.String(), + Namespace: ns.Name, + }, + ControlPlaneRef: &corev1.ObjectReference{ + Name: rosaControlPlane.Name, + Kind: "ROSAControlPlane", + APIVersion: rosacontrolplanev1.GroupVersion.String(), + Namespace: ns.Name, + }, + Paused: false, + }, + } + + rosaCluster.OwnerReferences = []metav1.OwnerReference{ + { + Name: capiCluster.Name, + Kind: "Cluster", + APIVersion: clusterv1.GroupVersion.String(), + UID: capiCluster.UID, + }, + } + + createObject(g, secret, ns.Name) + createObject(g, identity, ns.Name) + createObject(g, capiCluster, ns.Name) + createObject(g, rosaControlPlane, ns.Name) + createObject(g, rosaCluster, ns.Name) + + // set controlplane status + rosaCPPatch, err := patch.NewHelper(rosaControlPlane, testEnv) + rosaControlPlane.Status.Ready = true + rosaControlPlane.Status.Version = "4.19.20" + rosaControlPlane.Status.ID = rosaClusterName + g.Expect(rosaCPPatch.Patch(ctx, rosaControlPlane)).To(Succeed()) + g.Expect(err).ShouldNot(HaveOccurred()) + + // set rosaCluster pause conditions + rosaClsPatch, err := patch.NewHelper(rosaCluster, testEnv) + rosaCluster.Status.Conditions = clusterv1.Conditions{ + clusterv1.Condition{ + Type: clusterv1.PausedV1Beta2Condition, + Status: corev1.ConditionFalse, + Reason: clusterv1.NotPausedV1Beta2Reason, + Message: "", + }, + } + g.Expect(rosaClsPatch.Patch(ctx, rosaCluster)).To(Succeed()) + g.Expect(err).ShouldNot(HaveOccurred()) + + // set capiCluster pause condition + clsPatch, err := patch.NewHelper(capiCluster, testEnv) + capiCluster.Status.Conditions = clusterv1.Conditions{ + clusterv1.Condition{ + Type: clusterv1.PausedV1Beta2Condition, + Status: corev1.ConditionFalse, + Reason: clusterv1.NotPausedV1Beta2Reason, + Message: "", + }, + } + g.Expect(clsPatch.Patch(ctx, capiCluster)).To(Succeed()) + g.Expect(err).ShouldNot(HaveOccurred()) + + // patching is not reliably synchronous + time.Sleep(50 * time.Millisecond) + + mockCtrl := gomock.NewController(t) + recorder := record.NewFakeRecorder(10) + ctx := context.TODO() + ocmMock := mocks.NewMockOCMClient(mockCtrl) + stsMock := mock_stsiface.NewMockSTSClient(mockCtrl) + stsMock.EXPECT().GetCallerIdentity(gomock.Any(), gomock.Any()).AnyTimes() + + nodePoolName := "nodepool-1" + expect := func(m *mocks.MockOCMClientMockRecorder) { + m.GetNodePools(gomock.Any()).AnyTimes().DoAndReturn(func(clusterId string) ([]*cmv1.NodePool, error) { + // Build a NodePool. + builder := cmv1.NewNodePool(). + ID(nodePoolName). + Version(cmv1.NewVersion().ID("openshift-v4.15.0")). + AvailabilityZone("us-east-1a"). + Subnet("subnet-12345"). + Labels(map[string]string{"role": "worker"}). + AutoRepair(true). + TuningConfigs("tuning1"). + AWSNodePool( + cmv1.NewAWSNodePool(). + InstanceType("m5.large"). + AdditionalSecurityGroupIds("sg-123", "sg-456"). + RootVolume(cmv1.NewAWSVolume().Size(120)), + ). + Taints( + cmv1.NewTaint().Key("dedicated").Value("gpu").Effect(string(corev1.TaintEffectNoSchedule)), + ). + NodeDrainGracePeriod( + cmv1.NewValue().Value(10), + ). + ManagementUpgrade( + cmv1.NewNodePoolManagementUpgrade(). + MaxSurge("1"). + MaxUnavailable("2"), + ). + Replicas(2). + Status( + cmv1.NewNodePoolStatus(). + Message(""). + CurrentReplicas(2), + ) + + nodePool, err := builder.Build() + g.Expect(err).ToNot(HaveOccurred()) + return []*cmv1.NodePool{nodePool}, err + }) + } + expect(ocmMock.EXPECT()) + + r := ROSAClusterReconciler{ + Recorder: recorder, + WatchFilterValue: "", + Client: testEnv, + NewStsClient: func(cloud.ScopeUsage, cloud.Session, logger.Wrapper, runtime.Object) stsiface.STSClient { + return stsMock + }, + NewOCMClient: func(ctx context.Context, rosaScope *scope.ROSAControlPlaneScope) (rosa.OCMClient, error) { + return ocmMock, nil + }, + } + + req := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Namespace: rosaCluster.Namespace, + Name: rosaCluster.Name, + }, + } + + _, err = r.Reconcile(ctx, req) + g.Expect(err).ToNot(HaveOccurred()) + + // Check RosamachinePool & MachinePool are created. + rosaMachinePool := &expinfrav1.ROSAMachinePool{} + keyRosaMP := client.ObjectKey{Name: nodePoolName, Namespace: ns.Name} + errRosaMP := testEnv.Get(ctx, keyRosaMP, rosaMachinePool) + g.Expect(errRosaMP).ToNot(HaveOccurred()) + + machinePool := &expclusterv1.MachinePool{} + keyMP := client.ObjectKey{Name: nodePoolName, Namespace: ns.Name} + errMP := testEnv.Get(ctx, keyMP, machinePool) + g.Expect(errMP).ToNot(HaveOccurred()) + + // Test get RosaMachinePoolNames + rosaMachinePools, err := r.getRosaMachinePoolNames(ctx, capiCluster) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(len(rosaMachinePools)).To(Equal(1)) + + cleanupObject(g, rosaMachinePool) + cleanupObject(g, machinePool) + cleanupObject(g, rosaCluster) + cleanupObject(g, rosaControlPlane) + cleanupObject(g, capiCluster) + mockCtrl.Finish() + }) +} diff --git a/controllers/suite_test.go b/controllers/suite_test.go index c299f19f7f..4ee71f9d07 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -21,15 +21,19 @@ import ( "path" "testing" + // +kubebuilder:scaffold:imports + corev1 "k8s.io/api/core/v1" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" - // +kubebuilder:scaffold:imports infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + rosacontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/v2/controlplane/rosa/api/v1beta2" + expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" "sigs.k8s.io/cluster-api-provider-aws/v2/test/helpers" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" kubeadmv1beta1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" + expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" ) var ( @@ -47,6 +51,11 @@ func setup() { utilruntime.Must(infrav1.AddToScheme(scheme.Scheme)) utilruntime.Must(clusterv1.AddToScheme(scheme.Scheme)) utilruntime.Must(kubeadmv1beta1.AddToScheme(scheme.Scheme)) + utilruntime.Must(rosacontrolplanev1.AddToScheme(scheme.Scheme)) + utilruntime.Must(expinfrav1.AddToScheme(scheme.Scheme)) + utilruntime.Must(corev1.AddToScheme(scheme.Scheme)) + utilruntime.Must(expclusterv1.AddToScheme(scheme.Scheme)) + testEnvConfig := helpers.NewTestEnvironmentConfiguration([]string{ path.Join("config", "crd", "bases"), }, @@ -68,6 +77,12 @@ func setup() { if err := (&infrav1.AWSClusterControllerIdentity{}).SetupWebhookWithManager(testEnv); err != nil { panic(fmt.Sprintf("Unable to setup AWSClusterControllerIdentity webhook: %v", err)) } + if err := (&expinfrav1.ROSAMachinePool{}).SetupWebhookWithManager(testEnv); err != nil { + panic(fmt.Sprintf("Unable to setup ROSAMachinePool webhook: %v", err)) + } + if err := (&rosacontrolplanev1.ROSAControlPlane{}).SetupWebhookWithManager(testEnv); err != nil { + panic(fmt.Sprintf("Unable to setup ROSAControlPlane webhook: %v", err)) + } go func() { fmt.Println("Starting the manager") if err := testEnv.StartManager(ctx); err != nil { diff --git a/controlplane/rosa/controllers/rosacontrolplane_controller.go b/controlplane/rosa/controllers/rosacontrolplane_controller.go index 37143bc3f9..90426d2aab 100644 --- a/controlplane/rosa/controllers/rosacontrolplane_controller.go +++ b/controlplane/rosa/controllers/rosacontrolplane_controller.go @@ -421,6 +421,15 @@ func (r *ROSAControlPlaneReconciler) deleteMachinePools(ctx context.Context, ros } } + // Workaround the case where last machinePool cannot be deleted without deleting the ROSA controlplane. + // In Cluster API (CAPI), machine pools (MPs) are normally deleted before the control plane is removed. + // However, in ROSA-HCP, deleting the final MP results in an error because the control plane cannot exist without at least 1 MP. + // To handle this, when only one MP remains, we ignore the deletion error and proceed with deleting the control plane. + // Also OCM cascade delete the MPs when deleting control plane, so we are safe to ignore last MP and delete the control plane. + if len(errs) == 0 && len(machinePools) == 1 { + return true, nil + } + if len(errs) > 0 { return false, kerrors.NewAggregate(errs) } diff --git a/exp/controllers/rosamachinepool_controller.go b/exp/controllers/rosamachinepool_controller.go index 4750369a9b..9aebd92622 100644 --- a/exp/controllers/rosamachinepool_controller.go +++ b/exp/controllers/rosamachinepool_controller.go @@ -15,10 +15,8 @@ import ( "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" "k8s.io/utils/ptr" @@ -32,6 +30,7 @@ import ( rosacontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/v2/controlplane/rosa/api/v1beta2" expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/exp/utils" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" stsservice "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/sts" @@ -43,6 +42,7 @@ import ( "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/predicates" ) @@ -130,6 +130,18 @@ func (r *ROSAMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Requ } controlPlane := &rosacontrolplanev1.ROSAControlPlane{} if err := r.Client.Get(ctx, controlPlaneKey, controlPlane); err != nil { + if apierrors.IsNotFound(err) && !rosaMachinePool.DeletionTimestamp.IsZero() { + log.Info("RosaControlPlane not found, RosaMachinePool is deleted") + patchHelper, err := patch.NewHelper(rosaMachinePool, r.Client) + if err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to init RosaMachinePool patch helper") + } + + controllerutil.RemoveFinalizer(rosaMachinePool, expinfrav1.RosaMachinePoolFinalizer) + return ctrl.Result{}, patchHelper.Patch(ctx, rosaMachinePool, patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + expinfrav1.RosaMachinePoolReadyCondition}}) + } + log.Info("Failed to retrieve ControlPlane from MachinePool") return ctrl.Result{}, err } @@ -410,7 +422,7 @@ func (r *ROSAMachinePoolReconciler) updateNodePool(machinePoolScope *scope.RosaM } func computeSpecDiff(desiredSpec expinfrav1.RosaMachinePoolSpec, nodePool *cmv1.NodePool) string { - currentSpec := nodePoolToRosaMachinePoolSpec(nodePool) + currentSpec := utils.NodePoolToRosaMachinePoolSpec(nodePool) ignoredFields := []string{ "ProviderIDList", // providerIDList is set by the controller. @@ -521,60 +533,6 @@ func nodePoolBuilder(rosaMachinePoolSpec expinfrav1.RosaMachinePoolSpec, machine return npBuilder } -func nodePoolToRosaMachinePoolSpec(nodePool *cmv1.NodePool) expinfrav1.RosaMachinePoolSpec { - spec := expinfrav1.RosaMachinePoolSpec{ - NodePoolName: nodePool.ID(), - Version: rosa.RawVersionID(nodePool.Version()), - AvailabilityZone: nodePool.AvailabilityZone(), - Subnet: nodePool.Subnet(), - Labels: nodePool.Labels(), - AutoRepair: nodePool.AutoRepair(), - InstanceType: nodePool.AWSNodePool().InstanceType(), - TuningConfigs: nodePool.TuningConfigs(), - AdditionalSecurityGroups: nodePool.AWSNodePool().AdditionalSecurityGroupIds(), - VolumeSize: nodePool.AWSNodePool().RootVolume().Size(), - // nodePool.AWSNodePool().Tags() returns all tags including "system" tags if "fetchUserTagsOnly" parameter is not specified. - // TODO: enable when AdditionalTags day2 changes is supported. - // AdditionalTags: nodePool.AWSNodePool().Tags(), - } - - if nodePool.Autoscaling() != nil { - spec.Autoscaling = &expinfrav1.RosaMachinePoolAutoScaling{ - MinReplicas: nodePool.Autoscaling().MinReplica(), - MaxReplicas: nodePool.Autoscaling().MaxReplica(), - } - } - if nodePool.Taints() != nil { - rosaTaints := make([]expinfrav1.RosaTaint, 0, len(nodePool.Taints())) - for _, taint := range nodePool.Taints() { - rosaTaints = append(rosaTaints, expinfrav1.RosaTaint{ - Key: taint.Key(), - Value: taint.Value(), - Effect: corev1.TaintEffect(taint.Effect()), - }) - } - spec.Taints = rosaTaints - } - if nodePool.NodeDrainGracePeriod() != nil { - spec.NodeDrainGracePeriod = &metav1.Duration{ - Duration: time.Minute * time.Duration(nodePool.NodeDrainGracePeriod().Value()), - } - } - if nodePool.ManagementUpgrade() != nil { - spec.UpdateConfig = &expinfrav1.RosaUpdateConfig{ - RollingUpdate: &expinfrav1.RollingUpdate{}, - } - if nodePool.ManagementUpgrade().MaxSurge() != "" { - spec.UpdateConfig.RollingUpdate.MaxSurge = ptr.To(intstr.Parse(nodePool.ManagementUpgrade().MaxSurge())) - } - if nodePool.ManagementUpgrade().MaxUnavailable() != "" { - spec.UpdateConfig.RollingUpdate.MaxUnavailable = ptr.To(intstr.Parse(nodePool.ManagementUpgrade().MaxUnavailable())) - } - } - - return spec -} - func (r *ROSAMachinePoolReconciler) reconcileProviderIDList(ctx context.Context, machinePoolScope *scope.RosaMachinePoolScope, nodePool *cmv1.NodePool) error { tags := nodePool.AWSNodePool().Tags() if len(tags) == 0 { diff --git a/exp/utils/rosa_helper.go b/exp/utils/rosa_helper.go new file mode 100644 index 0000000000..fc08747874 --- /dev/null +++ b/exp/utils/rosa_helper.go @@ -0,0 +1,86 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package utils provide helper functions. +package utils + +import ( + "time" + + cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" + + expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/rosa" +) + +// NodePoolToRosaMachinePoolSpec convert ocm nodePool to rosaMachinePool spec. +func NodePoolToRosaMachinePoolSpec(nodePool *cmv1.NodePool) expinfrav1.RosaMachinePoolSpec { + spec := expinfrav1.RosaMachinePoolSpec{ + NodePoolName: nodePool.ID(), + Version: rosa.RawVersionID(nodePool.Version()), + AvailabilityZone: nodePool.AvailabilityZone(), + Subnet: nodePool.Subnet(), + Labels: nodePool.Labels(), + AutoRepair: nodePool.AutoRepair(), + InstanceType: nodePool.AWSNodePool().InstanceType(), + TuningConfigs: nodePool.TuningConfigs(), + AdditionalSecurityGroups: nodePool.AWSNodePool().AdditionalSecurityGroupIds(), + VolumeSize: nodePool.AWSNodePool().RootVolume().Size(), + // nodePool.AWSNodePool().Tags() returns all tags including "system" tags if "fetchUserTagsOnly" parameter is not specified. + // TODO: enable when AdditionalTags day2 changes is supported. + // AdditionalTags: nodePool.AWSNodePool().Tags(), + } + + if nodePool.Autoscaling() != nil { + spec.Autoscaling = &expinfrav1.RosaMachinePoolAutoScaling{ + MinReplicas: nodePool.Autoscaling().MinReplica(), + MaxReplicas: nodePool.Autoscaling().MaxReplica(), + } + } + if nodePool.Taints() != nil { + rosaTaints := make([]expinfrav1.RosaTaint, 0, len(nodePool.Taints())) + for _, taint := range nodePool.Taints() { + rosaTaints = append(rosaTaints, expinfrav1.RosaTaint{ + Key: taint.Key(), + Value: taint.Value(), + Effect: corev1.TaintEffect(taint.Effect()), + }) + } + spec.Taints = rosaTaints + } + if nodePool.NodeDrainGracePeriod() != nil { + spec.NodeDrainGracePeriod = &metav1.Duration{ + Duration: time.Minute * time.Duration(nodePool.NodeDrainGracePeriod().Value()), + } + } + if nodePool.ManagementUpgrade() != nil { + spec.UpdateConfig = &expinfrav1.RosaUpdateConfig{ + RollingUpdate: &expinfrav1.RollingUpdate{}, + } + if nodePool.ManagementUpgrade().MaxSurge() != "" { + spec.UpdateConfig.RollingUpdate.MaxSurge = ptr.To(intstr.Parse(nodePool.ManagementUpgrade().MaxSurge())) + } + if nodePool.ManagementUpgrade().MaxUnavailable() != "" { + spec.UpdateConfig.RollingUpdate.MaxUnavailable = ptr.To(intstr.Parse(nodePool.ManagementUpgrade().MaxUnavailable())) + } + } + + return spec +} diff --git a/exp/utils/rosa_helper_test.go b/exp/utils/rosa_helper_test.go new file mode 100644 index 0000000000..f298ea9be4 --- /dev/null +++ b/exp/utils/rosa_helper_test.go @@ -0,0 +1,105 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "testing" + "time" + + . "github.com/onsi/gomega" + cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" + + expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" +) + +func TestNodePoolToRosaMachinePoolSpec(t *testing.T) { + g := NewWithT(t) + + // Build a NodePool with various fields populated + builder := cmv1.NewNodePool(). + ID("test-nodepool"). + Version(cmv1.NewVersion().ID("openshift-v4.15.0")). + AvailabilityZone("us-east-1a"). + Subnet("subnet-12345"). + Labels(map[string]string{"role": "worker"}). + AutoRepair(true). + TuningConfigs("tuning1"). + AWSNodePool( + cmv1.NewAWSNodePool(). + InstanceType("m5.large"). + AdditionalSecurityGroupIds("sg-123", "sg-456"). + RootVolume(cmv1.NewAWSVolume().Size(120)), + ). + Autoscaling( + cmv1.NewNodePoolAutoscaling(). + MinReplica(2). + MaxReplica(5), + ). + Taints( + cmv1.NewTaint().Key("dedicated").Value("gpu").Effect(string(corev1.TaintEffectNoSchedule)), + ). + NodeDrainGracePeriod( + cmv1.NewValue().Value(10), + ). + ManagementUpgrade( + cmv1.NewNodePoolManagementUpgrade(). + MaxSurge("1"). + MaxUnavailable("2"), + ) + + nodePool, err := builder.Build() + g.Expect(err).ToNot(HaveOccurred()) + + actualSpec := NodePoolToRosaMachinePoolSpec(nodePool) + // Build the expected spec + expectedSpec := expinfrav1.RosaMachinePoolSpec{ + NodePoolName: "test-nodepool", + Version: "4.15.0", + AvailabilityZone: "us-east-1a", + Subnet: "subnet-12345", + Labels: map[string]string{"role": "worker"}, + AutoRepair: true, + InstanceType: "m5.large", + TuningConfigs: []string{"tuning1"}, + AdditionalSecurityGroups: []string{"sg-123", "sg-456"}, + VolumeSize: 120, + Autoscaling: &expinfrav1.RosaMachinePoolAutoScaling{ + MinReplicas: 2, + MaxReplicas: 5, + }, + Taints: []expinfrav1.RosaTaint{ + { + Key: "dedicated", + Value: "gpu", + Effect: corev1.TaintEffectNoSchedule, + }, + }, + NodeDrainGracePeriod: &metav1.Duration{Duration: 10 * time.Minute}, + UpdateConfig: &expinfrav1.RosaUpdateConfig{ + RollingUpdate: &expinfrav1.RollingUpdate{ + MaxSurge: ptr.To(intstr.FromInt32(1)), + MaxUnavailable: ptr.To(intstr.FromInt32(2)), + }, + }, + } + + g.Expect(expectedSpec).To(Equal(actualSpec)) +} diff --git a/pkg/rosa/ocmclient.go b/pkg/rosa/ocmclient.go index ca7fa81fb1..04c4fa700a 100644 --- a/pkg/rosa/ocmclient.go +++ b/pkg/rosa/ocmclient.go @@ -33,6 +33,7 @@ type OCMClient interface { GetIdentityProviders(clusterID string) ([]*v1.IdentityProvider, error) GetMissingGateAgreementsHypershift(clusterID string, upgradePolicy *v1.ControlPlaneUpgradePolicy) ([]*v1.VersionGate, error) GetNodePool(clusterID string, nodePoolID string) (*v1.NodePool, bool, error) + GetNodePools(clusterID string) ([]*v1.NodePool, error) GetHypershiftNodePoolUpgrade(clusterID string, clusterKey string, nodePoolID string) (*v1.NodePool, *v1.NodePoolUpgradePolicy, error) GetUser(clusterID string, group string, username string) (*v1.User, error) ScheduleHypershiftControlPlaneUpgrade(clusterID string, upgradePolicy *v1.ControlPlaneUpgradePolicy) (*v1.ControlPlaneUpgradePolicy, error) @@ -95,6 +96,10 @@ func (c *ocmclient) GetNodePool(clusterID string, nodePoolID string) (*v1.NodePo return c.ocmClient.GetNodePool(clusterID, nodePoolID) } +func (c *ocmclient) GetNodePools(clusterID string) ([]*v1.NodePool, error) { + return c.ocmClient.GetNodePools(clusterID) +} + func (c *ocmclient) GetHypershiftNodePoolUpgrade(clusterID string, clusterKey string, nodePoolID string) (*v1.NodePool, *v1.NodePoolUpgradePolicy, error) { return c.ocmClient.GetHypershiftNodePoolUpgrade(clusterID, clusterKey, nodePoolID) } diff --git a/test/mocks/ocm_client_mock.go b/test/mocks/ocm_client_mock.go index 4e948cf639..38da1767aa 100644 --- a/test/mocks/ocm_client_mock.go +++ b/test/mocks/ocm_client_mock.go @@ -290,6 +290,21 @@ func (mr *MockOCMClientMockRecorder) GetNodePool(arg0, arg1 interface{}) *gomock return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetNodePool", reflect.TypeOf((*MockOCMClient)(nil).GetNodePool), arg0, arg1) } +// GetNodePools mocks base method. +func (m *MockOCMClient) GetNodePools(arg0 string) ([]*v1.NodePool, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetNodePools", arg0) + ret0, _ := ret[0].([]*v1.NodePool) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetNodePools indicates an expected call of GetNodePools. +func (mr *MockOCMClientMockRecorder) GetNodePools(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetNodePools", reflect.TypeOf((*MockOCMClient)(nil).GetNodePools), arg0) +} + // GetUser mocks base method. func (m *MockOCMClient) GetUser(arg0, arg1, arg2 string) (*v1.User, error) { m.ctrl.T.Helper()