diff --git a/Makefile b/Makefile index 702e23cc5..882008806 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ RELEASE_DIR := _build/release DEV_DIR := _build/dev REPO_ROOT := $(shell git rev-parse --show-toplevel) FIPS_ENABLE ?= "" -BUILDER_GOLANG_VERSION ?= 1.24.11 +BUILDER_GOLANG_VERSION ?= 1.24.13 BUILD_ARGS = --build-arg CRYPTO_LIB=${FIPS_ENABLE} --build-arg BUILDER_GOLANG_VERSION=${BUILDER_GOLANG_VERSION} ARCH ?= amd64 ALL_ARCH = amd64 arm64 @@ -26,7 +26,7 @@ endif # Image URL to use all building/pushing image targets IMAGE_NAME := cluster-api-provider-maas-controller REGISTRY ?= "us-east1-docker.pkg.dev/spectro-images/dev/${USER}/cluster-api" -SPECTRO_VERSION ?= 4.8.3-dev-12112025 +SPECTRO_VERSION ?= 4.8.3-dev-tmo-19022026 IMG_TAG ?= v0.6.1-spectro-${SPECTRO_VERSION} CONTROLLER_IMG ?= ${REGISTRY}/${IMAGE_NAME} diff --git a/api/v1beta1/maasmachine_types.go b/api/v1beta1/maasmachine_types.go index 8d29fa7c2..9d96a65dd 100644 --- a/api/v1beta1/maasmachine_types.go +++ b/api/v1beta1/maasmachine_types.go @@ -130,6 +130,12 @@ type VMConfig struct { // +optional Network string `json:"network,omitempty"` + // InterfaceLinkModes sets the MAAS link mode per interface (e.g. eth0, eth1, eth2). + // Keys are interface names ("eth0", "eth1", ...); values: "auto", "dhcp", "static", "link_up". + // When unset for an interface: eth0 defaults to "auto", others to "dhcp". Extensible for future interfaces. + // +optional + InterfaceLinkModes map[string]string `json:"interfaceLinkModes,omitempty"` + // AutoStart specifies whether the VM should automatically start // +kubebuilder:default=true // +optional diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 411d0db29..3c14aec5b 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -253,6 +253,11 @@ func (in *MaasMachineSpec) DeepCopyInto(out *MaasMachineSpec) { *out = new(string) **out = **in } + if in.Parent != nil { + in, out := &in.Parent, &out.Parent + *out = new(string) + **out = **in + } if in.ProviderID != nil { in, out := &in.ProviderID, &out.ProviderID *out = new(string) @@ -530,6 +535,13 @@ func (in *VMConfig) DeepCopyInto(out *VMConfig) { *out = new(int) **out = **in } + if in.InterfaceLinkModes != nil { + in, out := &in.InterfaceLinkModes, &out.InterfaceLinkModes + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } if in.AutoStart != nil { in, out := &in.AutoStart, &out.AutoStart *out = new(bool) diff --git a/controllers/maasmachine_controller.go b/controllers/maasmachine_controller.go index 79d1261f8..971b984f9 100644 --- a/controllers/maasmachine_controller.go +++ b/controllers/maasmachine_controller.go @@ -439,12 +439,14 @@ func (r *MaasMachineReconciler) reconcileNormal(ctx context.Context, machineScop // TODO(saamalik) confirm that we'll never "recreate" a m; e.g: findMachine should always return err // if there used to be a m if m == nil || !(m.State == infrav1beta1.MachineStateDeployed || m.State == infrav1beta1.MachineStateDeploying) { - // If machine is in Ready state, verify network interfaces before deploying - // This ensures correct subnet assignment before deployment starts - if m != nil && m.State == infrav1beta1.MachineStateReady && machineScope.GetDynamicLXD() { - machineScope.Info("Machine is in Ready state, verifying network interfaces before deployment", "machineID", m.ID) + // If machine is in Ready or Allocated state, verify network interfaces before deploying. + // This ensures correct subnet assignment before deployment starts (avoids wrong eth0 subnet + // on 2nd VM when MAAS compose links eth0 to a different subnet than requested). + if m != nil && machineScope.GetDynamicLXD() && + (m.State == infrav1beta1.MachineStateReady || m.State == infrav1beta1.MachineStateAllocated) { + machineScope.Info("Verifying VM network interfaces before deployment", "machineID", m.ID, "state", m.State) if err := machineSvc.VerifyVMNetworkInterfaces(ctx, m.ID); err != nil { - machineScope.Error(err, "Failed to verify VM network interfaces in Ready state, requeuing", "machineID", m.ID) + machineScope.Error(err, "Failed to verify VM network interfaces before deploy, requeuing", "machineID", m.ID) conditions.MarkFalse(machineScope.MaasMachine, infrav1beta1.MachineDeployedCondition, infrav1beta1.MachineDeployingReason, clusterv1.ConditionSeverityWarning, "verifying network interfaces") return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } @@ -502,14 +504,14 @@ func (r *MaasMachineReconciler) reconcileNormal(ctx context.Context, machineScop } switch s := m.State; { - case s == infrav1beta1.MachineStateReady: + case s == infrav1beta1.MachineStateReady, s == infrav1beta1.MachineStateAllocated: machineScope.SetNotReady() conditions.MarkFalse(machineScope.MaasMachine, infrav1beta1.MachineDeployedCondition, infrav1beta1.MachineDeployingReason, clusterv1.ConditionSeverityWarning, "") // Note: Network interface verification happens before deployment is triggered (see above). - // This is a safety check in case the machine reached Ready state through a different path. + // This is a safety check in case the machine reached Ready/Allocated through a different path. if machineScope.GetDynamicLXD() { if err := machineSvc.VerifyVMNetworkInterfaces(ctx, m.ID); err != nil { - machineScope.Error(err, "Failed to verify VM network interfaces in Ready state (safety check)", "machineID", m.ID) + machineScope.Error(err, "Failed to verify VM network interfaces (safety check)", "machineID", m.ID, "state", s) // Requeue to retry verification - deployment should not proceed until interfaces are correct return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } @@ -534,7 +536,7 @@ func (r *MaasMachineReconciler) reconcileNormal(ctx context.Context, machineScop machineScope.SetNotReady() machineScope.Info("Machine is powered off!") conditions.MarkFalse(machineScope.MaasMachine, infrav1beta1.MachineDeployedCondition, infrav1beta1.MachinePoweredOffReason, clusterv1.ConditionSeverityWarning, "") - case s == infrav1beta1.MachineStateDeploying, s == infrav1beta1.MachineStateAllocated: + case s == infrav1beta1.MachineStateDeploying: machineScope.SetNotReady() conditions.MarkFalse(machineScope.MaasMachine, infrav1beta1.MachineDeployedCondition, infrav1beta1.MachineDeployingReason, clusterv1.ConditionSeverityWarning, "") case s == infrav1beta1.MachineStateDeployed: diff --git a/go.mod b/go.mod index b4977862f..d00669054 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,7 @@ module github.com/spectrocloud/cluster-api-provider-maas go 1.24.2 -toolchain go1.24.11 +toolchain go1.24.13 require ( github.com/go-logr/logr v1.4.2 @@ -11,7 +11,7 @@ require ( github.com/onsi/ginkgo v1.16.5 github.com/onsi/gomega v1.36.3 github.com/pkg/errors v0.9.1 - github.com/spectrocloud/maas-client-go v1.0.0-beta1.0.20251203075453-87a316457f38 + github.com/spectrocloud/maas-client-go v0.1.5-pre github.com/spf13/pflag v1.0.6 k8s.io/api v0.32.3 k8s.io/apiextensions-apiserver v0.32.3 @@ -82,5 +82,6 @@ require ( replace ( github.com/prometheus/common v0.32.1 => github.com/prometheus/common v0.26.0 + github.com/spectrocloud/maas-client-go => github.com/spectrocloud/maas-client-go v0.1.5-pre sigs.k8s.io/structured-merge-diff/v6 => sigs.k8s.io/structured-merge-diff/v4 v4.4.3 ) diff --git a/go.sum b/go.sum index db1bbc82c..cd36a65dc 100644 --- a/go.sum +++ b/go.sum @@ -168,8 +168,8 @@ github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME= -github.com/spectrocloud/maas-client-go v1.0.0-beta1.0.20251203075453-87a316457f38 h1:szvztoXh1SrvZtediy5N7oisH53rhf7sTJ1F4bqNMSA= -github.com/spectrocloud/maas-client-go v1.0.0-beta1.0.20251203075453-87a316457f38/go.mod h1:CaqAAlh6/xfzc/cDpU8eMG0wqnwx1ODSyXcH86uV7Ww= +github.com/spectrocloud/maas-client-go v0.1.5-pre h1:j/4JimqO4/T4d5VkbmYJp60TO+gtpIRnA5RBesKFkCA= +github.com/spectrocloud/maas-client-go v0.1.5-pre/go.mod h1:CaqAAlh6/xfzc/cDpU8eMG0wqnwx1ODSyXcH86uV7Ww= github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y= github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= diff --git a/pkg/maas/machine/machine.go b/pkg/maas/machine/machine.go index 20eddad1c..c87d601eb 100644 --- a/pkg/maas/machine/machine.go +++ b/pkg/maas/machine/machine.go @@ -1,6 +1,7 @@ package machine import ( + "bytes" "context" "crypto/sha256" "encoding/hex" @@ -286,6 +287,11 @@ func (s *Service) createVMViaMAAS(ctx context.Context, userDataB64 string) (*inf if err != nil { return nil, errors.Wrap(err, "failed to get existing VM by system-id") } + // Verify and fix network interfaces (eth0/eth1 subnets) before deploy so that even if MAAS + // selected the wrong subnet for eth0 after commissioning/acquire, we correct it before deploy. + if err := s.VerifyVMNetworkInterfaces(ctx, m.SystemID()); err != nil { + return nil, errors.Wrap(err, "failed to verify VM network interfaces before deploy") + } // Best-effort: set hostname and static IP before deploy machineName := s.scope.Machine.Name vmName := fmt.Sprintf("vm-%s", machineName) @@ -455,7 +461,10 @@ func (s *Service) PrepareLXDVM(ctx context.Context) (*infrav1beta1.Machine, erro if subnet0 == "" || subnet1 == "" { s.scope.Info("Skipping setting network interfaces due to empty subnet name(s)", "subnet0", subnet0, "subnet1", subnet1) } else { - // Build interfaces parameter: eth0 gets first subnet (MAAS management), eth1 gets second subnet + // Build interfaces parameter: eth0 = first subnet (typically PXE), eth1 = second subnet (typically management). + // eth0 is sent without ip= so it uses DHCP; eth1 gets static IP when configured. Using DHCP on eth0 + // can cause DHCP-provided routes (e.g. to MAAS controller) to be installed on eth0; if the controller + // is on the eth1 network, ensure PXE subnet DHCP does not push those routes, or use static IP for eth0. // Format: "eth0:subnet=;eth1:subnet=" or // "eth0:subnet=;eth1:subnet=,ip=" if static IP is configured interfacesParam := fmt.Sprintf("eth0:subnet=%s;eth1:subnet=%s", subnet0, subnet1) @@ -555,6 +564,24 @@ func (s *Service) getSubnetCIDR(subnet maasclient.Subnet) string { return subnet.CIDR() } +// resolveLinkMode returns the MAAS link mode for an interface: from VMConfig.InterfaceLinkModes, else default (eth0=auto, others=dhcp). +func (s *Service) resolveLinkMode(ifaceName string) string { + defaultMode := maasclient.ModeDHCP + if ifaceName == "eth0" { + defaultMode = maasclient.ModeAuto + } + mm := s.scope.MaasMachine + if mm.Spec.LXD == nil || mm.Spec.LXD.VMConfig == nil || len(mm.Spec.LXD.VMConfig.InterfaceLinkModes) == 0 { + return defaultMode + } + m := strings.TrimSpace(mm.Spec.LXD.VMConfig.InterfaceLinkModes[ifaceName]) + allowed := map[string]bool{maasclient.ModeAuto: true, maasclient.ModeDHCP: true, maasclient.ModeStatic: true, maasclient.ModeLinkUp: true} + if m != "" && allowed[m] { + return m + } + return defaultMode +} + // VerifyVMNetworkInterfaces verifies and fixes LXD VM network interfaces to have expected subnets after commissioning. func (s *Service) VerifyVMNetworkInterfaces(ctx context.Context, systemID string) error { mm := s.scope.MaasMachine @@ -585,7 +612,7 @@ func (s *Service) VerifyVMNetworkInterfaces(ctx context.Context, systemID string return nil } - s.scope.Info("Verifying VM network interfaces", "system-id", systemID, "state", machineState, "expected-subnets", fmt.Sprintf("%s,%s", expected0, expected1)) + s.scope.Info("Verifying VM network interfaces", "system-id", systemID, "state", machineState, "expected-subnets", fmt.Sprintf("%s,%s", expected0, expected1), "eth0-mode", s.resolveLinkMode("eth0"), "eth1-mode", s.resolveLinkMode("eth1")) // Fetch network interfaces - refetch if we encounter issues with subnet data interfaces, err := s.maasClient.NetworkInterfaces().Get(ctx, systemID) @@ -667,11 +694,29 @@ func (s *Service) VerifyVMNetworkInterfaces(ctx context.Context, systemID string } } + // subnetsMatch returns true if actual and expected represent the same subnet (CIDR or name). + subnetsMatch := func(actual, expected string) bool { + if actual == "" || expected == "" { + return false + } + if strings.EqualFold(actual, expected) { + return true + } + // If both parse as CIDR, compare by network equality (robust to formatting) + _, actualNet, err1 := net.ParseCIDR(actual) + _, expectedNet, err2 := net.ParseCIDR(expected) + if err1 == nil && err2 == nil && actualNet != nil && expectedNet != nil { + return actualNet.IP.Equal(expectedNet.IP) && actualNet.Mask != nil && expectedNet.Mask != nil && + bytes.Equal(actualNet.Mask, expectedNet.Mask) + } + return false + } + var aggErr error if eth0Iface != nil { - if eth0Subnet == "" || !strings.EqualFold(eth0Subnet, expected0) { + if eth0Subnet == "" || !subnetsMatch(eth0Subnet, expected0) { s.scope.Info("Fixing eth0 subnet mismatch", "system-id", systemID, "expected", expected0, "actual", eth0Subnet) - if err := s.fixInterfaceSubnet(ctx, systemID, eth0Iface, expected0, "eth0"); err != nil { + if err := s.fixInterfaceSubnet(ctx, systemID, eth0Iface, expected0, "eth0", s.resolveLinkMode("eth0")); err != nil { s.scope.Error(err, "Failed to fix eth0 subnet", "system-id", systemID, "expected", expected0, "actual", eth0Subnet) aggErr = errors.Wrap(err, "eth0 subnet correction failed") } else { @@ -685,9 +730,9 @@ func (s *Service) VerifyVMNetworkInterfaces(ctx context.Context, systemID string } if eth1Iface != nil { - if eth1Subnet == "" || !strings.EqualFold(eth1Subnet, expected1) { + if eth1Subnet == "" || !subnetsMatch(eth1Subnet, expected1) { s.scope.Info("Fixing eth1 subnet mismatch", "system-id", systemID, "expected", expected1, "actual", eth1Subnet) - if err := s.fixInterfaceSubnet(ctx, systemID, eth1Iface, expected1, "eth1"); err != nil { + if err := s.fixInterfaceSubnet(ctx, systemID, eth1Iface, expected1, "eth1", s.resolveLinkMode("eth1")); err != nil { s.scope.Error(err, "Failed to fix eth1 subnet", "system-id", systemID, "expected", expected1, "actual", eth1Subnet) if aggErr != nil { aggErr = errors.Wrap(aggErr, err.Error()) @@ -711,7 +756,7 @@ func (s *Service) VerifyVMNetworkInterfaces(ctx context.Context, systemID string return aggErr } -func (s *Service) fixInterfaceSubnet(ctx context.Context, systemID string, iface maasclient.NetworkInterface, expectedSubnetIdentifier, ifaceName string) error { +func (s *Service) fixInterfaceSubnet(ctx context.Context, systemID string, iface maasclient.NetworkInterface, expectedSubnetIdentifier, ifaceName string, linkMode string) error { interfaceID := iface.ID() ifaceClient := s.maasClient.NetworkInterfaces().Interface(systemID, interfaceID) @@ -724,13 +769,19 @@ func (s *Service) fixInterfaceSubnet(ctx context.Context, systemID string, iface } } - // Link the subnet with auto IP assignment (empty string means auto/DHCP) - // The MAAS API accepts subnet names, CIDRs, or IDs directly - if err := ifaceClient.LinkSubnet(ctx, expectedSubnetIdentifier, ""); err != nil { - return fmt.Errorf("failed to link subnet %s to %s: %w", expectedSubnetIdentifier, ifaceName, err) + // Link the subnet with the given mode (from VMConfig.Eth0LinkMode/Eth1LinkMode or defaults). + // When linkMode is empty we use LinkSubnet (client default: auto when no IP). + if linkMode != "" { + if err := ifaceClient.LinkSubnetWithMode(ctx, expectedSubnetIdentifier, linkMode, ""); err != nil { + return fmt.Errorf("failed to link subnet %s to %s (mode=%s): %w", expectedSubnetIdentifier, ifaceName, linkMode, err) + } + } else { + if err := ifaceClient.LinkSubnet(ctx, expectedSubnetIdentifier, ""); err != nil { + return fmt.Errorf("failed to link subnet %s to %s: %w", expectedSubnetIdentifier, ifaceName, err) + } } - s.scope.Info("Fixed subnet on interface", "system-id", systemID, "interface", ifaceName, "subnet", expectedSubnetIdentifier) + s.scope.Info("Fixed subnet on interface", "system-id", systemID, "interface", ifaceName, "subnet", expectedSubnetIdentifier, "link-mode", linkMode) return nil }