Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 57 additions & 8 deletions controllers/maascluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,18 @@ import (
"github.com/spectrocloud/cluster-api-provider-maas/pkg/maas/lxd"
"github.com/spectrocloud/cluster-api-provider-maas/pkg/maas/scope"
infrautil "github.com/spectrocloud/cluster-api-provider-maas/pkg/util"
"github.com/spectrocloud/maas-client-go/maasclient"
"sigs.k8s.io/controller-runtime/pkg/controller"
)

// dnsServicer is the subset of dns.Service used by reconcileDNSAttachments.
// Extracted as an interface to allow unit testing without a live MAAS endpoint.
type dnsServicer interface {
GetDNSResource() (maasclient.DNSResource, error)
UpdateDNSAttachmentsWithResource(maasclient.DNSResource, []string) (bool, error)
IsDriftDetected(maasclient.DNSResource, []string) bool
}

const lastAppliedAnn = "infrastructure.cluster.x-k8s.io/last-applied-dns-hash"

// MaasClusterReconciler reconciles a MaasCluster object
Expand Down Expand Up @@ -155,7 +164,7 @@ func (r *MaasClusterReconciler) reconcileDelete(ctx context.Context, clusterScop
return reconcile.Result{}, nil
}

func (r *MaasClusterReconciler) reconcileDNSAttachments(clusterScope *scope.ClusterScope, dnssvc *dns.Service) error {
func (r *MaasClusterReconciler) reconcileDNSAttachments(clusterScope *scope.ClusterScope, dnssvc dnsServicer) error {

if clusterScope.IsCustomEndpoint() {
return nil
Expand All @@ -171,34 +180,62 @@ func (r *MaasClusterReconciler) reconcileDNSAttachments(clusterScope *scope.Clus
return errors.Wrapf(err, "unable to find preferred subnets")
}

// Build desired IPs first (no MAAS call)
// Build desired IPs first (no MAAS call). Track running CP count separately so we can
// distinguish "machines powered off" from "machines running but IPs filtered by subnets".
var runningCPCount int
var runningIpAddresses []string
for _, m := range machines {
if !IsControlPlaneMachine(m) {
continue
}
machineIP := getExternalMachineIP(clusterScope.Logger, preferredSubnets, m)
isRunningHealthy := IsRunning(m)
if !m.DeletionTimestamp.IsZero() || !isRunningHealthy {
continue
}
runningCPCount++
machineIP := getExternalMachineIP(clusterScope.Logger, preferredSubnets, m)
if machineIP != "" {
runningIpAddresses = append(runningIpAddresses, machineIP)
}
}

// Early-exit gate using last-applied hash
desiredHash := infrautil.StableHashStringSlice(runningIpAddresses)
if clusterScope.MaasCluster.Annotations != nil && clusterScope.MaasCluster.Annotations[lastAppliedAnn] == desiredHash {
// Never wipe DNS when all CP machines are transiently unavailable (e.g. powered off during
// a flap). An empty desired set would clear the MAAS A records and the hash would be cached
// as SHA-256(""), preventing self-recovery until the controller pod itself is reachable again.
if len(runningIpAddresses) == 0 {
if runningCPCount > 0 {
clusterScope.Info("CP machines are running but no IPs match preferred subnets; preserving existing DNS records",
"runningCPs", runningCPCount)
} else {
clusterScope.Info("No running CP machines found; preserving existing DNS records")
}
return nil
}

// Only now fetch DNS resource once
// Deduplicate before hashing so the annotation always represents the applied IP *set*.
// updateResourceIPs deduplicates via a set internally; without this step the hash could
// differ between reconciles if duplicate IPs appear/disappear while MAAS state is unchanged,
// causing spurious re-syncs.
runningIpAddresses = dedupStringSlice(runningIpAddresses)

// Fetch DNS resource once — needed for both drift check and potential update.
dnsResource, err := dnssvc.GetDNSResource()
if err != nil {
return errors.Wrap(err, "Unable to get the dns resource")
}

// Early-exit only when the annotation hash matches AND MAAS state agrees with desired IPs.
// Verifying MAAS state catches external drift (e.g. DNS records manually wiped while the
// desired set was unchanged, so the hash never changed).
desiredHash := infrautil.StableHashStringSlice(runningIpAddresses)
if clusterScope.MaasCluster.Annotations != nil && clusterScope.MaasCluster.Annotations[lastAppliedAnn] == desiredHash {
Comment thread
AmitSahastra marked this conversation as resolved.
if !dnssvc.IsDriftDetected(dnsResource, runningIpAddresses) {
return nil
}
clusterScope.Info("DNS drift detected: MAAS state diverges from last-applied annotation; forcing re-sync",
"desiredIPs", runningIpAddresses)
}

// Use optimized update with in-resource idempotency
updated, err := dnssvc.UpdateDNSAttachmentsWithResource(dnsResource, runningIpAddresses)
if err != nil {
Expand All @@ -211,14 +248,26 @@ func (r *MaasClusterReconciler) reconcileDNSAttachments(clusterScope *scope.Clus
}
clusterScope.MaasCluster.Annotations[lastAppliedAnn] = desiredHash

// Best-effort requeue hint remains optional; we skip computing current vs attached
if updated {
clusterScope.Info("DNS attachments updated; will continue monitoring for changes")
}

return nil
}

// dedupStringSlice returns a new slice with duplicate elements removed, preserving order.
func dedupStringSlice(in []string) []string {
seen := make(map[string]struct{}, len(in))
out := make([]string, 0, len(in))
for _, s := range in {
if _, ok := seen[s]; !ok {
seen[s] = struct{}{}
out = append(out, s)
}
}
return out
}

// IsControlPlaneMachine checks machine is a control plane node.
func IsControlPlaneMachine(m *infrav1beta1.MaasMachine) bool {
_, ok := m.ObjectMeta.Labels[clusterv1.MachineControlPlaneLabel]
Expand Down
Loading