Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ RELEASE_DIR := _build/release
DEV_DIR := _build/dev
REPO_ROOT := $(shell git rev-parse --show-toplevel)
FIPS_ENABLE ?= ""
BUILDER_GOLANG_VERSION ?= 1.24.11
BUILDER_GOLANG_VERSION ?= 1.24.13
BUILD_ARGS = --build-arg CRYPTO_LIB=${FIPS_ENABLE} --build-arg BUILDER_GOLANG_VERSION=${BUILDER_GOLANG_VERSION}
ARCH ?= amd64
ALL_ARCH = amd64 arm64
Expand All @@ -26,7 +26,7 @@ endif
# Image URL to use all building/pushing image targets
IMAGE_NAME := cluster-api-provider-maas-controller
REGISTRY ?= "us-east1-docker.pkg.dev/spectro-images/dev/${USER}/cluster-api"
SPECTRO_VERSION ?= 4.8.3-dev-12112025
SPECTRO_VERSION ?= 4.8.3-dev-tmo-19022026
IMG_TAG ?= v0.6.1-spectro-${SPECTRO_VERSION}
CONTROLLER_IMG ?= ${REGISTRY}/${IMAGE_NAME}

Expand Down
6 changes: 6 additions & 0 deletions api/v1beta1/maasmachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ type VMConfig struct {
// +optional
Network string `json:"network,omitempty"`

// InterfaceLinkModes sets the MAAS link mode per interface (e.g. eth0, eth1, eth2).
// Keys are interface names ("eth0", "eth1", ...); values: "auto", "dhcp", "static", "link_up".
// When unset for an interface: eth0 defaults to "auto", others to "dhcp". Extensible for future interfaces.
// +optional
InterfaceLinkModes map[string]string `json:"interfaceLinkModes,omitempty"`

// AutoStart specifies whether the VM should automatically start
// +kubebuilder:default=true
// +optional
Expand Down
12 changes: 12 additions & 0 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 11 additions & 9 deletions controllers/maasmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -439,12 +439,14 @@ func (r *MaasMachineReconciler) reconcileNormal(ctx context.Context, machineScop
// TODO(saamalik) confirm that we'll never "recreate" a m; e.g: findMachine should always return err
// if there used to be a m
if m == nil || !(m.State == infrav1beta1.MachineStateDeployed || m.State == infrav1beta1.MachineStateDeploying) {
// If machine is in Ready state, verify network interfaces before deploying
// This ensures correct subnet assignment before deployment starts
if m != nil && m.State == infrav1beta1.MachineStateReady && machineScope.GetDynamicLXD() {
machineScope.Info("Machine is in Ready state, verifying network interfaces before deployment", "machineID", m.ID)
// If machine is in Ready or Allocated state, verify network interfaces before deploying.
// This ensures correct subnet assignment before deployment starts (avoids wrong eth0 subnet
// on 2nd VM when MAAS compose links eth0 to a different subnet than requested).
if m != nil && machineScope.GetDynamicLXD() &&
(m.State == infrav1beta1.MachineStateReady || m.State == infrav1beta1.MachineStateAllocated) {
machineScope.Info("Verifying VM network interfaces before deployment", "machineID", m.ID, "state", m.State)
if err := machineSvc.VerifyVMNetworkInterfaces(ctx, m.ID); err != nil {
machineScope.Error(err, "Failed to verify VM network interfaces in Ready state, requeuing", "machineID", m.ID)
machineScope.Error(err, "Failed to verify VM network interfaces before deploy, requeuing", "machineID", m.ID)
conditions.MarkFalse(machineScope.MaasMachine, infrav1beta1.MachineDeployedCondition, infrav1beta1.MachineDeployingReason, clusterv1.ConditionSeverityWarning, "verifying network interfaces")
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}
Expand Down Expand Up @@ -502,14 +504,14 @@ func (r *MaasMachineReconciler) reconcileNormal(ctx context.Context, machineScop
}

switch s := m.State; {
case s == infrav1beta1.MachineStateReady:
case s == infrav1beta1.MachineStateReady, s == infrav1beta1.MachineStateAllocated:
machineScope.SetNotReady()
conditions.MarkFalse(machineScope.MaasMachine, infrav1beta1.MachineDeployedCondition, infrav1beta1.MachineDeployingReason, clusterv1.ConditionSeverityWarning, "")
// Note: Network interface verification happens before deployment is triggered (see above).
// This is a safety check in case the machine reached Ready state through a different path.
// This is a safety check in case the machine reached Ready/Allocated through a different path.
if machineScope.GetDynamicLXD() {
if err := machineSvc.VerifyVMNetworkInterfaces(ctx, m.ID); err != nil {
machineScope.Error(err, "Failed to verify VM network interfaces in Ready state (safety check)", "machineID", m.ID)
machineScope.Error(err, "Failed to verify VM network interfaces (safety check)", "machineID", m.ID, "state", s)
// Requeue to retry verification - deployment should not proceed until interfaces are correct
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}
Expand All @@ -534,7 +536,7 @@ func (r *MaasMachineReconciler) reconcileNormal(ctx context.Context, machineScop
machineScope.SetNotReady()
machineScope.Info("Machine is powered off!")
conditions.MarkFalse(machineScope.MaasMachine, infrav1beta1.MachineDeployedCondition, infrav1beta1.MachinePoweredOffReason, clusterv1.ConditionSeverityWarning, "")
case s == infrav1beta1.MachineStateDeploying, s == infrav1beta1.MachineStateAllocated:
case s == infrav1beta1.MachineStateDeploying:
machineScope.SetNotReady()
conditions.MarkFalse(machineScope.MaasMachine, infrav1beta1.MachineDeployedCondition, infrav1beta1.MachineDeployingReason, clusterv1.ConditionSeverityWarning, "")
case s == infrav1beta1.MachineStateDeployed:
Expand Down
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module github.com/spectrocloud/cluster-api-provider-maas

go 1.24.2

toolchain go1.24.11
toolchain go1.24.13

require (
github.com/go-logr/logr v1.4.2
Expand All @@ -11,7 +11,7 @@ require (
github.com/onsi/ginkgo v1.16.5
github.com/onsi/gomega v1.36.3
github.com/pkg/errors v0.9.1
github.com/spectrocloud/maas-client-go v1.0.0-beta1.0.20251203075453-87a316457f38
github.com/spectrocloud/maas-client-go v0.1.5-pre
github.com/spf13/pflag v1.0.6
k8s.io/api v0.32.3
k8s.io/apiextensions-apiserver v0.32.3
Expand Down Expand Up @@ -82,5 +82,6 @@ require (

replace (
github.com/prometheus/common v0.32.1 => github.com/prometheus/common v0.26.0
github.com/spectrocloud/maas-client-go => github.com/spectrocloud/maas-client-go v0.1.5-pre
sigs.k8s.io/structured-merge-diff/v6 => sigs.k8s.io/structured-merge-diff/v4 v4.4.3
)
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
github.com/spectrocloud/maas-client-go v1.0.0-beta1.0.20251203075453-87a316457f38 h1:szvztoXh1SrvZtediy5N7oisH53rhf7sTJ1F4bqNMSA=
github.com/spectrocloud/maas-client-go v1.0.0-beta1.0.20251203075453-87a316457f38/go.mod h1:CaqAAlh6/xfzc/cDpU8eMG0wqnwx1ODSyXcH86uV7Ww=
github.com/spectrocloud/maas-client-go v0.1.5-pre h1:j/4JimqO4/T4d5VkbmYJp60TO+gtpIRnA5RBesKFkCA=
github.com/spectrocloud/maas-client-go v0.1.5-pre/go.mod h1:CaqAAlh6/xfzc/cDpU8eMG0wqnwx1ODSyXcH86uV7Ww=
github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y=
github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo=
Expand Down
75 changes: 63 additions & 12 deletions pkg/maas/machine/machine.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package machine

import (
"bytes"
"context"
"crypto/sha256"
"encoding/hex"
Expand Down Expand Up @@ -286,6 +287,11 @@ func (s *Service) createVMViaMAAS(ctx context.Context, userDataB64 string) (*inf
if err != nil {
return nil, errors.Wrap(err, "failed to get existing VM by system-id")
}
// Verify and fix network interfaces (eth0/eth1 subnets) before deploy so that even if MAAS
// selected the wrong subnet for eth0 after commissioning/acquire, we correct it before deploy.
if err := s.VerifyVMNetworkInterfaces(ctx, m.SystemID()); err != nil {
return nil, errors.Wrap(err, "failed to verify VM network interfaces before deploy")
}
// Best-effort: set hostname and static IP before deploy
machineName := s.scope.Machine.Name
vmName := fmt.Sprintf("vm-%s", machineName)
Expand Down Expand Up @@ -455,7 +461,10 @@ func (s *Service) PrepareLXDVM(ctx context.Context) (*infrav1beta1.Machine, erro
if subnet0 == "" || subnet1 == "" {
s.scope.Info("Skipping setting network interfaces due to empty subnet name(s)", "subnet0", subnet0, "subnet1", subnet1)
} else {
// Build interfaces parameter: eth0 gets first subnet (MAAS management), eth1 gets second subnet
// Build interfaces parameter: eth0 = first subnet (typically PXE), eth1 = second subnet (typically management).
// eth0 is sent without ip= so it uses DHCP; eth1 gets static IP when configured. Using DHCP on eth0
// can cause DHCP-provided routes (e.g. to MAAS controller) to be installed on eth0; if the controller
// is on the eth1 network, ensure PXE subnet DHCP does not push those routes, or use static IP for eth0.
// Format: "eth0:subnet=<subnet-name>;eth1:subnet=<subnet-name>" or
// "eth0:subnet=<subnet-name>;eth1:subnet=<subnet-name>,ip=<static-ip>" if static IP is configured
interfacesParam := fmt.Sprintf("eth0:subnet=%s;eth1:subnet=%s", subnet0, subnet1)
Expand Down Expand Up @@ -555,6 +564,24 @@ func (s *Service) getSubnetCIDR(subnet maasclient.Subnet) string {
return subnet.CIDR()
}

// resolveLinkMode returns the MAAS link mode for an interface: from VMConfig.InterfaceLinkModes, else default (eth0=auto, others=dhcp).
func (s *Service) resolveLinkMode(ifaceName string) string {
defaultMode := maasclient.ModeDHCP
if ifaceName == "eth0" {
defaultMode = maasclient.ModeAuto
}
mm := s.scope.MaasMachine
if mm.Spec.LXD == nil || mm.Spec.LXD.VMConfig == nil || len(mm.Spec.LXD.VMConfig.InterfaceLinkModes) == 0 {
return defaultMode
}
m := strings.TrimSpace(mm.Spec.LXD.VMConfig.InterfaceLinkModes[ifaceName])
allowed := map[string]bool{maasclient.ModeAuto: true, maasclient.ModeDHCP: true, maasclient.ModeStatic: true, maasclient.ModeLinkUp: true}
if m != "" && allowed[m] {
return m
}
return defaultMode
}
Comment on lines +567 to +583
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new resolveLinkMode function and the modified VerifyVMNetworkInterfaces function that now uses it lack test coverage. Consider adding unit tests to verify: 1) default mode selection for eth0 (auto) vs other interfaces (dhcp), 2) custom mode selection from InterfaceLinkModes map, 3) validation of allowed modes, and 4) handling of invalid/empty mode values.

Copilot uses AI. Check for mistakes.

// VerifyVMNetworkInterfaces verifies and fixes LXD VM network interfaces to have expected subnets after commissioning.
func (s *Service) VerifyVMNetworkInterfaces(ctx context.Context, systemID string) error {
mm := s.scope.MaasMachine
Expand Down Expand Up @@ -585,7 +612,7 @@ func (s *Service) VerifyVMNetworkInterfaces(ctx context.Context, systemID string
return nil
}

s.scope.Info("Verifying VM network interfaces", "system-id", systemID, "state", machineState, "expected-subnets", fmt.Sprintf("%s,%s", expected0, expected1))
s.scope.Info("Verifying VM network interfaces", "system-id", systemID, "state", machineState, "expected-subnets", fmt.Sprintf("%s,%s", expected0, expected1), "eth0-mode", s.resolveLinkMode("eth0"), "eth1-mode", s.resolveLinkMode("eth1"))

// Fetch network interfaces - refetch if we encounter issues with subnet data
interfaces, err := s.maasClient.NetworkInterfaces().Get(ctx, systemID)
Expand Down Expand Up @@ -667,11 +694,29 @@ func (s *Service) VerifyVMNetworkInterfaces(ctx context.Context, systemID string
}
}

// subnetsMatch returns true if actual and expected represent the same subnet (CIDR or name).
subnetsMatch := func(actual, expected string) bool {
if actual == "" || expected == "" {
return false
}
if strings.EqualFold(actual, expected) {
return true
}
// If both parse as CIDR, compare by network equality (robust to formatting)
_, actualNet, err1 := net.ParseCIDR(actual)
_, expectedNet, err2 := net.ParseCIDR(expected)
if err1 == nil && err2 == nil && actualNet != nil && expectedNet != nil {
return actualNet.IP.Equal(expectedNet.IP) && actualNet.Mask != nil && expectedNet.Mask != nil &&
bytes.Equal(actualNet.Mask, expectedNet.Mask)
}
return false
}

var aggErr error
if eth0Iface != nil {
if eth0Subnet == "" || !strings.EqualFold(eth0Subnet, expected0) {
if eth0Subnet == "" || !subnetsMatch(eth0Subnet, expected0) {
s.scope.Info("Fixing eth0 subnet mismatch", "system-id", systemID, "expected", expected0, "actual", eth0Subnet)
if err := s.fixInterfaceSubnet(ctx, systemID, eth0Iface, expected0, "eth0"); err != nil {
if err := s.fixInterfaceSubnet(ctx, systemID, eth0Iface, expected0, "eth0", s.resolveLinkMode("eth0")); err != nil {
s.scope.Error(err, "Failed to fix eth0 subnet", "system-id", systemID, "expected", expected0, "actual", eth0Subnet)
aggErr = errors.Wrap(err, "eth0 subnet correction failed")
} else {
Expand All @@ -685,9 +730,9 @@ func (s *Service) VerifyVMNetworkInterfaces(ctx context.Context, systemID string
}

if eth1Iface != nil {
if eth1Subnet == "" || !strings.EqualFold(eth1Subnet, expected1) {
if eth1Subnet == "" || !subnetsMatch(eth1Subnet, expected1) {
s.scope.Info("Fixing eth1 subnet mismatch", "system-id", systemID, "expected", expected1, "actual", eth1Subnet)
if err := s.fixInterfaceSubnet(ctx, systemID, eth1Iface, expected1, "eth1"); err != nil {
if err := s.fixInterfaceSubnet(ctx, systemID, eth1Iface, expected1, "eth1", s.resolveLinkMode("eth1")); err != nil {
s.scope.Error(err, "Failed to fix eth1 subnet", "system-id", systemID, "expected", expected1, "actual", eth1Subnet)
if aggErr != nil {
aggErr = errors.Wrap(aggErr, err.Error())
Expand All @@ -711,7 +756,7 @@ func (s *Service) VerifyVMNetworkInterfaces(ctx context.Context, systemID string
return aggErr
}

func (s *Service) fixInterfaceSubnet(ctx context.Context, systemID string, iface maasclient.NetworkInterface, expectedSubnetIdentifier, ifaceName string) error {
func (s *Service) fixInterfaceSubnet(ctx context.Context, systemID string, iface maasclient.NetworkInterface, expectedSubnetIdentifier, ifaceName string, linkMode string) error {
interfaceID := iface.ID()
ifaceClient := s.maasClient.NetworkInterfaces().Interface(systemID, interfaceID)

Expand All @@ -724,13 +769,19 @@ func (s *Service) fixInterfaceSubnet(ctx context.Context, systemID string, iface
}
}

// Link the subnet with auto IP assignment (empty string means auto/DHCP)
// The MAAS API accepts subnet names, CIDRs, or IDs directly
if err := ifaceClient.LinkSubnet(ctx, expectedSubnetIdentifier, ""); err != nil {
return fmt.Errorf("failed to link subnet %s to %s: %w", expectedSubnetIdentifier, ifaceName, err)
// Link the subnet with the given mode (from VMConfig.Eth0LinkMode/Eth1LinkMode or defaults).
// When linkMode is empty we use LinkSubnet (client default: auto when no IP).
if linkMode != "" {
if err := ifaceClient.LinkSubnetWithMode(ctx, expectedSubnetIdentifier, linkMode, ""); err != nil {
return fmt.Errorf("failed to link subnet %s to %s (mode=%s): %w", expectedSubnetIdentifier, ifaceName, linkMode, err)
}
} else {
if err := ifaceClient.LinkSubnet(ctx, expectedSubnetIdentifier, ""); err != nil {
return fmt.Errorf("failed to link subnet %s to %s: %w", expectedSubnetIdentifier, ifaceName, err)
}
Comment on lines +773 to +781
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The resolveLinkMode function always returns a non-empty string (either a configured mode or a default mode). This means the condition "if linkMode != ''" on line 774 will always be true, making the else block (lines 778-782) unreachable dead code. Consider removing the conditional check and the else branch, keeping only the LinkSubnetWithMode call.

Suggested change
// When linkMode is empty we use LinkSubnet (client default: auto when no IP).
if linkMode != "" {
if err := ifaceClient.LinkSubnetWithMode(ctx, expectedSubnetIdentifier, linkMode, ""); err != nil {
return fmt.Errorf("failed to link subnet %s to %s (mode=%s): %w", expectedSubnetIdentifier, ifaceName, linkMode, err)
}
} else {
if err := ifaceClient.LinkSubnet(ctx, expectedSubnetIdentifier, ""); err != nil {
return fmt.Errorf("failed to link subnet %s to %s: %w", expectedSubnetIdentifier, ifaceName, err)
}
// resolveLinkMode always returns a non-empty mode, so we always use LinkSubnetWithMode.
if err := ifaceClient.LinkSubnetWithMode(ctx, expectedSubnetIdentifier, linkMode, ""); err != nil {
return fmt.Errorf("failed to link subnet %s to %s (mode=%s): %w", expectedSubnetIdentifier, ifaceName, linkMode, err)

Copilot uses AI. Check for mistakes.
}

s.scope.Info("Fixed subnet on interface", "system-id", systemID, "interface", ifaceName, "subnet", expectedSubnetIdentifier)
s.scope.Info("Fixed subnet on interface", "system-id", systemID, "interface", ifaceName, "subnet", expectedSubnetIdentifier, "link-mode", linkMode)
return nil
}

Expand Down