From f3fa12165cb0b10cb2d97e4abf9d861ada526d1b Mon Sep 17 00:00:00 2001 From: ihor-hrytskiv <39990360+ihor-hrytskiv@users.noreply.github.com> Date: Mon, 17 Mar 2025 08:52:12 +0200 Subject: [PATCH 1/2] fix: dropping data and some refactoring (#5) --- Makefile | 4 +- cmd/main.go | 16 +++-- e2e/dragonfly_controller_test.go | 12 ++-- ...dragonfly_pod_lifecycle_controller_test.go | 6 +- internal/controller/base_controller.go | 63 +++++++++++++++++++ internal/controller/dragonfly_controller.go | 37 +++++------ internal/controller/dragonfly_instance.go | 47 ++------------ .../dragonfly_pod_lifecycle_controller.go | 29 ++++----- internal/controller/util.go | 35 ++++++++++- internal/resources/const.go | 4 ++ internal/resources/resources.go | 12 ++-- 11 files changed, 163 insertions(+), 102 deletions(-) create mode 100644 internal/controller/base_controller.go diff --git a/Makefile b/Makefile index 7034e160..666826ee 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,7 @@ vet: ## Run go vet against code. .PHONY: test test: manifests generate fmt vet envtest ## Run tests. - GOBIN=$(LOCALBIN) go install github.com/onsi/ginkgo/v2/ginkgo@v2.12.1 + GOBIN=$(LOCALBIN) go install github.com/onsi/ginkgo/v2/ginkgo@v2.17.2 KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" $(GINKGO) -vv -r -p -coverprofile cover.out ##@ Build @@ -168,4 +168,4 @@ $(CONTROLLER_GEN): $(LOCALBIN) .PHONY: envtest envtest: $(ENVTEST) ## Download envtest-setup locally if necessary. $(ENVTEST): $(LOCALBIN) - test -s $(LOCALBIN)/setup-envtest || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest + test -s $(LOCALBIN)/setup-envtest || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-runtime/tools/setup-envtest@release-0.18 diff --git a/cmd/main.go b/cmd/main.go index 87486d9a..5e47ebc2 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -144,18 +144,22 @@ func main() { defer eventBroadcaster.Shutdown() if err = (&controller.DragonflyReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - EventRecorder: eventRecorder, + Reconciler: controller.Reconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + EventRecorder: eventRecorder, + }, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "Dragonfly") os.Exit(1) } if err = (&controller.DfPodLifeCycleReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - EventRecorder: eventRecorder, + Reconciler: controller.Reconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + EventRecorder: eventRecorder, + }, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "Health") os.Exit(1) diff --git a/e2e/dragonfly_controller_test.go b/e2e/dragonfly_controller_test.go index d3f8d6eb..a62be23f 100644 --- a/e2e/dragonfly_controller_test.go +++ b/e2e/dragonfly_controller_test.go @@ -177,7 +177,7 @@ var _ = Describe("Dragonfly Lifecycle tests", Ordered, FlakeAttempts(3), func() // Check if there are relevant pods with expected roles var pods corev1.PodList err := k8sClient.List(ctx, &pods, client.InNamespace(namespace), client.MatchingLabels{ - "app": name, + resources.DragonflyNameLabelKey: name, resources.KubernetesPartOfLabelKey: "dragonfly", }) Expect(err).To(BeNil()) @@ -231,7 +231,7 @@ var _ = Describe("Dragonfly Lifecycle tests", Ordered, FlakeAttempts(3), func() // Check if there are relevant pods with expected roles var pods corev1.PodList err = k8sClient.List(ctx, &pods, client.InNamespace(namespace), client.MatchingLabels{ - "app": name, + resources.DragonflyNameLabelKey: name, resources.KubernetesPartOfLabelKey: "dragonfly", }) Expect(err).To(BeNil()) @@ -274,7 +274,7 @@ var _ = Describe("Dragonfly Lifecycle tests", Ordered, FlakeAttempts(3), func() // Check if there are relevant pods with expected roles var pods corev1.PodList err = k8sClient.List(ctx, &pods, client.InNamespace(namespace), client.MatchingLabels{ - "app": name, + resources.DragonflyNameLabelKey: name, resources.KubernetesPartOfLabelKey: "dragonfly", }) Expect(err).To(BeNil()) @@ -331,7 +331,7 @@ var _ = Describe("Dragonfly Lifecycle tests", Ordered, FlakeAttempts(3), func() // Check if there are relevant pods with expected roles var pods corev1.PodList err = k8sClient.List(ctx, &pods, client.InNamespace(namespace), client.MatchingLabels{ - "app": name, + resources.DragonflyNameLabelKey: name, resources.KubernetesPartOfLabelKey: "dragonfly", }) Expect(err).To(BeNil()) @@ -456,7 +456,7 @@ var _ = Describe("Dragonfly Lifecycle tests", Ordered, FlakeAttempts(3), func() // check for pods too var pods corev1.PodList err = k8sClient.List(ctx, &pods, client.InNamespace(namespace), client.MatchingLabels{ - "app": name, + resources.DragonflyNameLabelKey: name, resources.KubernetesPartOfLabelKey: "dragonfly", }) Expect(err).To(BeNil()) @@ -716,7 +716,7 @@ var _ = Describe("Dragonfly PVC Test with single replica", Ordered, FlakeAttempt // check if the pvc is created var pvcs corev1.PersistentVolumeClaimList err = k8sClient.List(ctx, &pvcs, client.InNamespace(namespace), client.MatchingLabels{ - "app": name, + resources.DragonflyNameLabelKey: name, resources.KubernetesPartOfLabelKey: "dragonfly", }) Expect(err).To(BeNil()) diff --git a/e2e/dragonfly_pod_lifecycle_controller_test.go b/e2e/dragonfly_pod_lifecycle_controller_test.go index 6955b7dc..0eca8de5 100644 --- a/e2e/dragonfly_pod_lifecycle_controller_test.go +++ b/e2e/dragonfly_pod_lifecycle_controller_test.go @@ -85,7 +85,7 @@ var _ = Describe("DF Pod Lifecycle Reconciler", Ordered, FlakeAttempts(3), func( // Check if there are relevant pods with expected roles var pods corev1.PodList err = k8sClient.List(ctx, &pods, client.InNamespace(namespace), client.MatchingLabels{ - "app": name, + resources.DragonflyNameLabelKey: name, resources.KubernetesPartOfLabelKey: "dragonfly", }) Expect(err).To(BeNil()) @@ -136,7 +136,7 @@ var _ = Describe("DF Pod Lifecycle Reconciler", Ordered, FlakeAttempts(3), func( // Check if there are relevant pods with expected roles var pods corev1.PodList err = k8sClient.List(ctx, &pods, client.InNamespace(namespace), client.MatchingLabels{ - "app": name, + resources.DragonflyNameLabelKey: name, resources.KubernetesPartOfLabelKey: "dragonfly", }) Expect(err).To(BeNil()) @@ -184,7 +184,7 @@ var _ = Describe("DF Pod Lifecycle Reconciler", Ordered, FlakeAttempts(3), func( // Check if there are relevant pods with expected roles var pods corev1.PodList err = k8sClient.List(ctx, &pods, client.InNamespace(namespace), client.MatchingLabels{ - "app": name, + resources.DragonflyNameLabelKey: name, resources.KubernetesPartOfLabelKey: "dragonfly", }) Expect(err).To(BeNil()) diff --git a/internal/controller/base_controller.go b/internal/controller/base_controller.go new file mode 100644 index 00000000..1539e048 --- /dev/null +++ b/internal/controller/base_controller.go @@ -0,0 +1,63 @@ +/* +Copyright 2023 DragonflyDB authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + dfv1alpha1 "github.com/dragonflydb/dragonfly-operator/api/v1alpha1" + "github.com/go-logr/logr" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type ( + Reconciler struct { + Client client.Client // Explicitly named + Scheme *runtime.Scheme + EventRecorder record.EventRecorder + } + // DragonflyInstance is an abstraction over the `Dragonfly` CRD + // and provides methods to handle replication. + DragonflyInstance struct { + // Dragonfly is the relevant Dragonfly CRD that it performs actions over + df *dfv1alpha1.Dragonfly + + client client.Client + scheme *runtime.Scheme + eventRecorder record.EventRecorder + log logr.Logger + } +) + +func (r *Reconciler) getDragonflyInstance(ctx context.Context, namespacedName types.NamespacedName, log logr.Logger) (*DragonflyInstance, error) { + // Retrieve the relevant Dragonfly object + var df dfv1alpha1.Dragonfly + err := r.Client.Get(ctx, namespacedName, &df) + if err != nil { + return nil, err + } + + return &DragonflyInstance{ + df: &df, + client: r.Client, + scheme: r.Scheme, + eventRecorder: r.EventRecorder, + log: log, + }, nil +} diff --git a/internal/controller/dragonfly_controller.go b/internal/controller/dragonfly_controller.go index 9ec7ad40..a126078a 100644 --- a/internal/controller/dragonfly_controller.go +++ b/internal/controller/dragonfly_controller.go @@ -26,8 +26,6 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" @@ -37,10 +35,7 @@ import ( // DragonflyReconciler reconciles a Dragonfly object type DragonflyReconciler struct { - client.Client - Scheme *runtime.Scheme - - EventRecorder record.EventRecorder + Reconciler } //+kubebuilder:rbac:groups=dragonflydb.io,resources=dragonflies,verbs=get;list;watch;create;update;patch;delete @@ -59,7 +54,7 @@ type DragonflyReconciler struct { func (r *DragonflyReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log := log.FromContext(ctx) var df dfv1alpha1.Dragonfly - if err := r.Get(ctx, req.NamespacedName, &df); err != nil { + if err := r.Client.Get(ctx, req.NamespacedName, &df); err != nil { log.Info(fmt.Sprintf("could not get the Dragonfly object: %s", req.NamespacedName)) return ctrl.Result{}, client.IgnoreNotFound(err) } @@ -76,7 +71,7 @@ func (r *DragonflyReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // create all resources for _, resource := range resources { - if err := r.Create(ctx, resource); err != nil { + if err := r.Client.Create(ctx, resource); err != nil { log.Error(err, fmt.Sprintf("could not create resource %s/%s/%s", resource.GetObjectKind(), resource.GetNamespace(), resource.GetName())) return ctrl.Result{}, err } @@ -85,7 +80,7 @@ func (r *DragonflyReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // Update Status df.Status.Phase = PhaseResourcesCreated log.Info("Created resources for object") - if err := r.Status().Update(ctx, &df); err != nil { + if err := r.Client.Status().Update(ctx, &df); err != nil { log.Error(err, "could not update the Dragonfly object") return ctrl.Result{}, err } @@ -102,14 +97,14 @@ func (r *DragonflyReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } for _, resource := range missingResources { // recreate missing resources - if err := r.Create(ctx, resource); err != nil { + if err := r.Client.Create(ctx, resource); err != nil { log.Error(err, fmt.Sprintf("could not create resource %s/%s/%s", resource.GetObjectKind(), resource.GetNamespace(), resource.GetName())) return ctrl.Result{}, err } } var statefulSet appsv1.StatefulSet - if err := r.Get(ctx, client.ObjectKey{Namespace: df.Namespace, Name: df.Name}, &statefulSet); err != nil { + if err := r.Client.Get(ctx, client.ObjectKey{Namespace: df.Namespace, Name: df.Name}, &statefulSet); err != nil { log.Error(err, "could not get statefulset") return ctrl.Result{}, err } @@ -125,7 +120,7 @@ func (r *DragonflyReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // update all resources for _, resource := range newResources { - if err := r.Update(ctx, resource); err != nil { + if err := r.Client.Update(ctx, resource); err != nil { log.Error(err, fmt.Sprintf("could not update resource %s/%s/%s", resource.GetObjectKind(), resource.GetNamespace(), resource.GetName())) return ctrl.Result{}, err } @@ -138,15 +133,15 @@ func (r *DragonflyReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // This is a Rollout log.Info("Rolling out new version") var updatedStatefulset appsv1.StatefulSet - if err := r.Get(ctx, client.ObjectKey{Namespace: df.Namespace, Name: df.Name}, &updatedStatefulset); err != nil { + if err := r.Client.Get(ctx, client.ObjectKey{Namespace: df.Namespace, Name: df.Name}, &updatedStatefulset); err != nil { log.Error(err, "could not get statefulset") return ctrl.Result{Requeue: true}, err } // get pods of the statefulset var pods corev1.PodList - if err := r.List(ctx, &pods, client.InNamespace(df.Namespace), client.MatchingLabels(map[string]string{ - "app": df.Name, + if err := r.Client.List(ctx, &pods, client.InNamespace(df.Namespace), client.MatchingLabels(map[string]string{ + resources.DragonflyNameLabelKey: df.Name, resources.KubernetesAppNameLabelKey: "dragonfly", })); err != nil { log.Error(err, "could not list pods") @@ -173,7 +168,7 @@ func (r *DragonflyReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( if isFailedToStart(&pod) { // This is a new pod which is trying to be ready, but couldn't start due to misconfig. // Delete the pod and create a new one. - if err := r.Delete(ctx, &pod); err != nil { + if err := r.Client.Delete(ctx, &pod); err != nil { log.Error(err, "could not delete pod") return ctrl.Result{RequeueAfter: 5 * time.Second}, err } @@ -228,7 +223,7 @@ func (r *DragonflyReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // delete the replica log.Info("deleting replica", "pod", replica.Name) r.EventRecorder.Event(&df, corev1.EventTypeNormal, "Rollout", "Deleting replica") - if err := r.Delete(ctx, &replica); err != nil { + if err := r.Client.Delete(ctx, &replica); err != nil { log.Error(err, "could not delete pod") return ctrl.Result{RequeueAfter: 5 * time.Second}, err } @@ -268,7 +263,7 @@ func (r *DragonflyReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // delete the old master, so that it gets recreated with the new version log.Info("deleting master", "pod", master.Name) - if err := r.Delete(ctx, &master); err != nil { + if err := r.Client.Delete(ctx, &master); err != nil { log.Error(err, "could not delete pod") return ctrl.Result{RequeueAfter: 5 * time.Second}, err } @@ -279,7 +274,7 @@ func (r *DragonflyReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // update status df.Status.IsRollingUpdate = false - if err := r.Status().Update(ctx, &df); err != nil { + if err := r.Client.Status().Update(ctx, &df); err != nil { log.Error(err, "could not update the Dragonfly object") return ctrl.Result{Requeue: true}, err } @@ -295,7 +290,7 @@ func (r *DragonflyReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // Start rollout and update status // update status so that we can track progress df.Status.IsRollingUpdate = true - if err := r.Status().Update(ctx, &df); err != nil { + if err := r.Client.Status().Update(ctx, &df); err != nil { log.Error(err, "could not update the Dragonfly object") return ctrl.Result{Requeue: true}, err } @@ -332,7 +327,7 @@ func (r *DragonflyReconciler) getMissingResources(ctx context.Context, df *dfv1a for _, resource := range resources { obj := resource.DeepCopyObject().(client.Object) - err := r.Get(ctx, client.ObjectKey{ + err := r.Client.Get(ctx, client.ObjectKey{ Namespace: df.Namespace, Name: resource.GetName(), }, obj) diff --git a/internal/controller/dragonfly_instance.go b/internal/controller/dragonfly_instance.go index 8c6796d1..8c0cb3c4 100644 --- a/internal/controller/dragonfly_instance.go +++ b/internal/controller/dragonfly_instance.go @@ -24,48 +24,13 @@ import ( "strconv" "strings" - dfv1alpha1 "github.com/dragonflydb/dragonfly-operator/api/v1alpha1" "github.com/dragonflydb/dragonfly-operator/internal/resources" - "github.com/go-logr/logr" "github.com/redis/go-redis/v9" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" ) -// DragonflyInstance is an abstraction over the `Dragonfly` CRD -// and provides methods to handle replication. -type DragonflyInstance struct { - // Dragonfly is the relevant Dragonfly CRD that it performs actions over - df *dfv1alpha1.Dragonfly - - client client.Client - log logr.Logger -} - -func GetDragonflyInstanceFromPod(ctx context.Context, c client.Client, pod *corev1.Pod, log logr.Logger) (*DragonflyInstance, error) { - dfName, ok := pod.Labels["app"] - if !ok { - return nil, errors.New("can't find the `app` label") - } - - // Retrieve the relevant Dragonfly object - var df dfv1alpha1.Dragonfly - err := c.Get(ctx, types.NamespacedName{ - Name: dfName, - Namespace: pod.Namespace, - }, &df) - if err != nil { - return nil, err - } - - return &DragonflyInstance{ - df: &df, - client: c, - log: log, - }, nil -} - func (dfi *DragonflyInstance) configureReplication(ctx context.Context) error { dfi.log.Info("Configuring replication") @@ -90,7 +55,7 @@ func (dfi *DragonflyInstance) configureReplication(ctx context.Context) error { var master string var masterIp string for _, pod := range pods.Items { - if pod.Status.Phase == corev1.PodRunning && pod.Status.ContainerStatuses[0].Ready && pod.DeletionTimestamp == nil && pod.Status.PodIP != "" { + if isPodReady(pod) { master = pod.Name masterIp = pod.Status.PodIP dfi.log.Info("Marking pod as master", "podName", master, "ip", masterIp) @@ -112,7 +77,7 @@ func (dfi *DragonflyInstance) configureReplication(ctx context.Context) error { for _, pod := range pods.Items { // only mark the running non-master pods dfi.log.Info("Checking pod", "podName", pod.Name, "ip", pod.Status.PodIP, "status", pod.Status.Phase, "deletiontimestamp", pod.DeletionTimestamp) - if pod.Name != master && pod.Status.Phase == corev1.PodRunning && pod.DeletionTimestamp == nil && pod.Status.PodIP != "" { + if pod.Name != master && isPodReady(pod) { dfi.log.Info("Marking pod as replica", "podName", pod.Name, "ip", pod.Status.PodIP, "status", pod.Status.Phase) if err := dfi.replicaOf(ctx, &pod, masterIp); err != nil { // TODO: Why does this fail every now and then? @@ -159,7 +124,7 @@ func (dfi *DragonflyInstance) masterExists(ctx context.Context) (bool, error) { } for _, pod := range pods.Items { - if pod.Status.Phase == corev1.PodRunning && pod.Status.ContainerStatuses[0].Ready && pod.Labels[resources.Role] == resources.Master { + if isPodReady(pod) && pod.Labels[resources.Role] == resources.Master { return true, nil } } @@ -175,7 +140,7 @@ func (dfi *DragonflyInstance) getMasterIp(ctx context.Context) (string, error) { } for _, pod := range pods.Items { - if pod.Status.Phase == corev1.PodRunning && pod.Status.ContainerStatuses[0].Ready && pod.Labels[resources.Role] == resources.Master && pod.DeletionTimestamp == nil { + if isPodReady(pod) && pod.Labels[resources.Role] == resources.Master { return pod.Status.PodIP, nil } } @@ -278,7 +243,7 @@ func (dfi *DragonflyInstance) checkAndConfigureReplication(ctx context.Context) // configure non replica pods as replicas for _, pod := range pods.Items { if pod.Labels[resources.Role] == "" { - if pod.Status.Phase == corev1.PodRunning && pod.Status.ContainerStatuses[0].Ready && pod.Status.PodIP != "" { + if isPodReady(pod) { if err := dfi.configureReplica(ctx, &pod); err != nil { return err } @@ -317,7 +282,7 @@ func (dfi *DragonflyInstance) getPods(ctx context.Context) (*corev1.PodList, err dfi.log.Info("getting all pods relevant to the instance") var pods corev1.PodList if err := dfi.client.List(ctx, &pods, client.InNamespace(dfi.df.Namespace), client.MatchingLabels{ - "app": dfi.df.Name, + resources.DragonflyNameLabelKey: dfi.df.Name, resources.KubernetesPartOfLabelKey: "dragonfly", }, ); err != nil { diff --git a/internal/controller/dragonfly_pod_lifecycle_controller.go b/internal/controller/dragonfly_pod_lifecycle_controller.go index 6c622a7a..6dce5d8a 100644 --- a/internal/controller/dragonfly_pod_lifecycle_controller.go +++ b/internal/controller/dragonfly_pod_lifecycle_controller.go @@ -19,12 +19,11 @@ package controller import ( "context" "fmt" + "k8s.io/apimachinery/pkg/types" "time" "github.com/dragonflydb/dragonfly-operator/internal/resources" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/event" @@ -33,9 +32,7 @@ import ( ) type DfPodLifeCycleReconciler struct { - client.Client - Scheme *runtime.Scheme - EventRecorder record.EventRecorder + Reconciler } // +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;create;update;patch;delete @@ -57,16 +54,16 @@ func (r *DfPodLifeCycleReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, client.IgnoreNotFound(err) } - // check for pod readiness - isPodReady := false - for _, condition := range pod.Status.Conditions { - if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { - isPodReady = true - break - } + dfName, ok := pod.Labels[resources.DragonflyNameLabelKey] + if !ok { + log.Info("failed to get Dragonfly name from pod labels") + return ctrl.Result{}, nil } - dfi, err := GetDragonflyInstanceFromPod(ctx, r.Client, &pod, log) + dfi, err := r.getDragonflyInstance(ctx, types.NamespacedName{ + Name: dfName, + Namespace: pod.Namespace, + }, log) if err != nil { log.Info("Pod does not belong to a Dragonfly instance") return ctrl.Result{}, nil @@ -74,8 +71,8 @@ func (r *DfPodLifeCycleReconciler) Reconcile(ctx context.Context, req ctrl.Reque // Get the role of the pod role, roleExists := pod.Labels[resources.Role] - if !isPodReady { - if roleExists && role == "master" { + if !isPodReady(pod) { + if roleExists && role == resources.Master { log.Info("Master pod is not ready, initiating failover", "pod", req.NamespacedName) err := dfi.configureReplication(ctx) if err != nil { @@ -147,7 +144,7 @@ func (r *DfPodLifeCycleReconciler) Reconcile(ctx context.Context, req ctrl.Reque r.EventRecorder.Event(dfi.df, corev1.EventTypeNormal, "Replication", "Configured a new replica") } } - } else if pod.DeletionTimestamp != nil { + } else if isPodMarkedForDeletion(pod) { // pod deletion event // configure replication if its a master pod // do nothing if its a replica pod diff --git a/internal/controller/util.go b/internal/controller/util.go index 562be1cb..105e5579 100644 --- a/internal/controller/util.go +++ b/internal/controller/util.go @@ -61,7 +61,7 @@ func getLatestReplica(ctx context.Context, c client.Client, statefulSet *appsv1. err := c.List(ctx, podList, &client.ListOptions{ Namespace: statefulSet.Namespace, LabelSelector: labels.SelectorFromValidatedSet(map[string]string{ - "app": statefulSet.Name, + resources.DragonflyNameLabelKey: statefulSet.Name, resources.KubernetesPartOfLabelKey: "dragonfly", }), }) @@ -158,3 +158,36 @@ func isStableState(ctx context.Context, pod *corev1.Pod) (bool, error) { return true, nil } + +func isPodReady(pod corev1.Pod) bool { + if isPodMarkedForDeletion(pod) { + return false + } + + for _, c := range pod.Status.Conditions { + if c.Type == corev1.PodReady && c.Status == corev1.ConditionTrue && pod.Status.PodIP != "" { + return isDragonflyContainerReady(pod.Status.ContainerStatuses) + } + } + + return false +} + +func isDragonflyContainerReady(containerStatuses []corev1.ContainerStatus) bool { + for _, cs := range containerStatuses { + if cs.Name == resources.DragonflyContainerName && cs.Ready { + return true + } + } + + return false +} + +func isPodMarkedForDeletion(pod corev1.Pod) bool { + for _, c := range pod.Status.Conditions { + if !pod.DeletionTimestamp.IsZero() || (c.Type == corev1.DisruptionTarget && c.Status == corev1.ConditionTrue) { + return true + } + } + return false +} diff --git a/internal/resources/const.go b/internal/resources/const.go index ec6a3b5b..8a250372 100644 --- a/internal/resources/const.go +++ b/internal/resources/const.go @@ -56,6 +56,8 @@ const ( // KubernetesPartOfLabel is the name of a higher level application this one is part of KubernetesPartOfLabelKey = "app.kubernetes.io/part-of" + DragonflyNameLabelKey = "app" + MasterIp string = "master-ip" Role string = "role" @@ -63,6 +65,8 @@ const ( Master string = "master" Replica string = "replica" + + DragonflyContainerName = "dragonfly" ) var DefaultDragonflyArgs = []string{ diff --git a/internal/resources/resources.go b/internal/resources/resources.go index 043d4929..472f0c24 100644 --- a/internal/resources/resources.go +++ b/internal/resources/resources.go @@ -74,7 +74,7 @@ func GetDragonflyResources(ctx context.Context, df *resourcesv1.Dragonfly) ([]cl KubernetesAppVersionLabelKey: Version, KubernetesPartOfLabelKey: "dragonfly", KubernetesManagedByLabelKey: DragonflyOperatorName, - "app": df.Name, + DragonflyNameLabelKey: df.Name, }, }, Spec: appsv1.StatefulSetSpec{ @@ -82,7 +82,7 @@ func GetDragonflyResources(ctx context.Context, df *resourcesv1.Dragonfly) ([]cl ServiceName: df.Name, Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - "app": df.Name, + DragonflyNameLabelKey: df.Name, KubernetesPartOfLabelKey: "dragonfly", KubernetesAppNameLabelKey: "dragonfly", }, @@ -93,7 +93,7 @@ func GetDragonflyResources(ctx context.Context, df *resourcesv1.Dragonfly) ([]cl Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ - "app": df.Name, + DragonflyNameLabelKey: df.Name, KubernetesPartOfLabelKey: "dragonfly", KubernetesAppNameLabelKey: "dragonfly", }, @@ -236,7 +236,7 @@ func GetDragonflyResources(ctx context.Context, df *resourcesv1.Dragonfly) ([]cl ObjectMeta: metav1.ObjectMeta{ Name: "df", Labels: map[string]string{ - "app": df.Name, + DragonflyNameLabelKey: df.Name, KubernetesPartOfLabelKey: "dragonfly", KubernetesAppNameLabelKey: "dragonfly", }, @@ -383,12 +383,12 @@ func GetDragonflyResources(ctx context.Context, df *resourcesv1.Dragonfly) ([]cl KubernetesAppVersionLabelKey: Version, KubernetesPartOfLabelKey: "dragonfly", KubernetesManagedByLabelKey: DragonflyOperatorName, - "app": df.Name, + DragonflyNameLabelKey: df.Name, }, }, Spec: corev1.ServiceSpec{ Selector: map[string]string{ - "app": df.Name, + DragonflyNameLabelKey: df.Name, KubernetesAppNameLabelKey: "dragonfly", Role: Master, }, From 998d7abb1505051b20a6a4cd1b03f93977d728ab Mon Sep 17 00:00:00 2001 From: Ihor Hrytskiv Date: Mon, 17 Mar 2025 11:20:19 +0200 Subject: [PATCH 2/2] fix: container name --- internal/resources/resources.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/resources/resources.go b/internal/resources/resources.go index 472f0c24..a5688280 100644 --- a/internal/resources/resources.go +++ b/internal/resources/resources.go @@ -102,7 +102,7 @@ func GetDragonflyResources(ctx context.Context, df *resourcesv1.Dragonfly) ([]cl ImagePullSecrets: df.Spec.ImagePullSecrets, Containers: []corev1.Container{ { - Name: "dragonfly", + Name: DragonflyContainerName, Image: image, Ports: []corev1.ContainerPort{ {