From 8917637dd69ed26c766132310ea60a24a79ecd28 Mon Sep 17 00:00:00 2001 From: Tom Wieczorek Date: Tue, 28 Jan 2025 17:37:05 +0100 Subject: [PATCH] Share node name detection between controller and worker The controller uses the node name solely when counting the number of active controllers. Albeit it's not super-important to match the actual worker node name in any case, it is probably a good practice and avoids weird edge cases. Use the Kubernetes API machinery's NodeName newtype to "tag" the node name in a type safe manner, i.e. so that it can't be mixed with other arbitrary strings. Signed-off-by: Tom Wieczorek --- cmd/controller/controller.go | 14 +++++-- cmd/worker/worker.go | 39 +++++++++++++++-- pkg/autopilot/common/hostname.go | 7 ++-- .../controller/controllersleasecounter.go | 12 ++---- pkg/component/worker/kubelet.go | 42 +++++++++---------- pkg/component/worker/utils.go | 22 +--------- pkg/node/nodename.go | 13 +++--- pkg/node/nodename_other.go | 2 +- pkg/node/nodename_test.go | 14 ++++--- pkg/node/nodename_windows.go | 2 +- pkg/node/nodename_windows_test.go | 15 ++++--- 11 files changed, 102 insertions(+), 80 deletions(-) diff --git a/cmd/controller/controller.go b/cmd/controller/controller.go index faab52ee93bd..d07e4c5d27d4 100644 --- a/cmd/controller/controller.go +++ b/cmd/controller/controller.go @@ -37,6 +37,7 @@ import ( "github.com/k0sproject/k0s/internal/pkg/dir" "github.com/k0sproject/k0s/internal/pkg/file" internallog "github.com/k0sproject/k0s/internal/pkg/log" + "github.com/k0sproject/k0s/internal/pkg/stringmap" "github.com/k0sproject/k0s/internal/pkg/sysinfo" "github.com/k0sproject/k0s/internal/sync/value" "github.com/k0sproject/k0s/pkg/apis/k0s/v1beta1" @@ -64,6 +65,7 @@ import ( "github.com/sirupsen/logrus" "github.com/spf13/cobra" "k8s.io/apimachinery/pkg/fields" + apitypes "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/rest" ) @@ -293,8 +295,14 @@ func (c *command) start(ctx context.Context) error { DisableEndpointReconciler: enableK0sEndpointReconciler, }) + nodeName, kubeletExtraArgs, err := workercmd.GetNodeName(&c.WorkerOptions) + if err != nil { + return fmt.Errorf("failed to determine node name: %w", err) + } + if !c.SingleNode { nodeComponents.Add(ctx, &controller.K0sControllersLeaseCounter{ + NodeName: nodeName, InvocationID: c.K0sVars.InvocationID, ClusterConfig: nodeConfig, KubeClientFactory: adminClientFactory, @@ -622,7 +630,7 @@ func (c *command) start(ctx context.Context) error { if c.EnableWorker { perfTimer.Checkpoint("starting-worker") - if err := c.startWorker(ctx, c.WorkerProfile, nodeConfig); err != nil { + if err := c.startWorker(ctx, nodeName, kubeletExtraArgs, c.WorkerProfile, nodeConfig); err != nil { logrus.WithError(err).Error("Failed to start controller worker") } else { perfTimer.Checkpoint("started-worker") @@ -641,7 +649,7 @@ func (c *command) start(ctx context.Context) error { return nil } -func (c *command) startWorker(ctx context.Context, profile string, nodeConfig *v1beta1.ClusterConfig) error { +func (c *command) startWorker(ctx context.Context, nodeName apitypes.NodeName, kubeletExtraArgs stringmap.StringMap, profile string, nodeConfig *v1beta1.ClusterConfig) error { var bootstrapConfig string if !file.Exists(c.K0sVars.KubeletAuthConfigPath) { // wait for controller to start up @@ -684,7 +692,7 @@ func (c *command) startWorker(ctx context.Context, profile string, nodeConfig *v taint := fields.OneTermEqualSelector(key, ":NoSchedule") wc.Taints = append(wc.Taints, taint.String()) } - return wc.Start(ctx) + return wc.Start(ctx, nodeName, kubeletExtraArgs) } // If we've got an etcd data directory in place for embedded etcd, or a ca for diff --git a/cmd/worker/worker.go b/cmd/worker/worker.go index 2f1fcea6d771..c49ed02b4338 100644 --- a/cmd/worker/worker.go +++ b/cmd/worker/worker.go @@ -25,7 +25,9 @@ import ( "runtime" "syscall" + "github.com/k0sproject/k0s/internal/pkg/flags" internallog "github.com/k0sproject/k0s/internal/pkg/log" + "github.com/k0sproject/k0s/internal/pkg/stringmap" "github.com/k0sproject/k0s/internal/pkg/sysinfo" "github.com/k0sproject/k0s/pkg/component/iptables" "github.com/k0sproject/k0s/pkg/component/manager" @@ -36,6 +38,9 @@ import ( "github.com/k0sproject/k0s/pkg/component/worker/nllb" "github.com/k0sproject/k0s/pkg/config" "github.com/k0sproject/k0s/pkg/kubernetes" + "github.com/k0sproject/k0s/pkg/node" + + apitypes "k8s.io/apimachinery/pkg/types" "github.com/sirupsen/logrus" "github.com/spf13/cobra" @@ -77,6 +82,11 @@ func NewWorkerCmd() *cobra.Command { return errors.New("you can only pass one token argument either as a CLI argument 'k0s worker [token]' or as a flag 'k0s worker --token-file [path]'") } + nodeName, kubeletExtraArgs, err := GetNodeName(&c.WorkerOptions) + if err != nil { + return fmt.Errorf("failed to determine node name: %w", err) + } + if err := (&sysinfo.K0sSysinfoSpec{ ControllerRoleEnabled: false, WorkerRoleEnabled: true, @@ -89,7 +99,7 @@ func NewWorkerCmd() *cobra.Command { ctx, cancel := signal.NotifyContext(cmd.Context(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM) defer cancel() - return c.Start(ctx) + return c.Start(ctx, nodeName, kubeletExtraArgs) }, } @@ -101,9 +111,29 @@ func NewWorkerCmd() *cobra.Command { return cmd } +func GetNodeName(opts *config.WorkerOptions) (apitypes.NodeName, stringmap.StringMap, error) { + // The node name used during bootstrapping needs to match the node name + // selected by kubelet. Otherwise, kubelet will have problems interacting + // with a Node object that doesn't match the name in the certificates. + // https://kubernetes.io/docs/reference/access-authn-authz/node/ + + // Kubelet still has some deprecated support for cloud providers, which may + // completely bypass the "standard" node name detection as it's done here. + // K0s only supports external cloud providers, which seems to be a dead code + // path anyways in kubelet. So it's safe to assume that the following code + // exactly matches the behavior of kubelet. + + kubeletExtraArgs := flags.Split(opts.KubeletExtraArgs) + nodeName, err := node.GetNodeName(kubeletExtraArgs["--hostname-override"]) + if err != nil { + return "", nil, err + } + return nodeName, kubeletExtraArgs, nil +} + // Start starts the worker components based on the given [config.CLIOptions]. -func (c *Command) Start(ctx context.Context) error { - if err := worker.BootstrapKubeletKubeconfig(ctx, c.K0sVars, &c.WorkerOptions); err != nil { +func (c *Command) Start(ctx context.Context, nodeName apitypes.NodeName, kubeletExtraArgs stringmap.StringMap) error { + if err := worker.BootstrapKubeletKubeconfig(ctx, c.K0sVars, nodeName, &c.WorkerOptions); err != nil { return err } @@ -156,6 +186,7 @@ func (c *Command) Start(ctx context.Context) error { } componentManager.Add(ctx, &worker.Kubelet{ + NodeName: nodeName, CRISocket: c.CriSocket, EnableCloudProvider: c.CloudProvider, K0sVars: c.K0sVars, @@ -165,7 +196,7 @@ func (c *Command) Start(ctx context.Context) error { LogLevel: c.LogLevels.Kubelet, Labels: c.Labels, Taints: c.Taints, - ExtraArgs: c.KubeletExtraArgs, + ExtraArgs: kubeletExtraArgs, DualStackEnabled: workerConfig.DualStackEnabled, }) diff --git a/pkg/autopilot/common/hostname.go b/pkg/autopilot/common/hostname.go index 502f11eca827..425031c3efe1 100644 --- a/pkg/autopilot/common/hostname.go +++ b/pkg/autopilot/common/hostname.go @@ -29,11 +29,12 @@ const ( // for an AUTOPILOT_HOSTNAME environment variable, falling back to whatever the OS // returns. func FindEffectiveHostname() (string, error) { - return node.GetNodename(os.Getenv(envAutopilotHostname)) + nodeName, err := node.GetNodeName(os.Getenv(envAutopilotHostname)) + return string(nodeName), err } func FindKubeletHostname(kubeletExtraArgs string) string { - defaultNodename, _ := node.GetNodename("") + defaultNodename, _ := node.GetNodeName("") if kubeletExtraArgs != "" { extras := flags.Split(kubeletExtraArgs) nodeName, ok := extras["--hostname-override"] @@ -42,5 +43,5 @@ func FindKubeletHostname(kubeletExtraArgs string) string { } } - return defaultNodename + return string(defaultNodename) } diff --git a/pkg/component/controller/controllersleasecounter.go b/pkg/component/controller/controllersleasecounter.go index 639e755ded08..28a0ee82b013 100644 --- a/pkg/component/controller/controllersleasecounter.go +++ b/pkg/component/controller/controllersleasecounter.go @@ -25,8 +25,8 @@ import ( "github.com/k0sproject/k0s/pkg/component/manager" kubeutil "github.com/k0sproject/k0s/pkg/kubernetes" "github.com/k0sproject/k0s/pkg/leaderelection" - "github.com/k0sproject/k0s/pkg/node" + apitypes "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" @@ -36,6 +36,7 @@ import ( // K0sControllersLeaseCounter implements a component that manages a lease per controller. // The per-controller leases are used to determine the amount of currently running controllers type K0sControllersLeaseCounter struct { + NodeName apitypes.NodeName InvocationID string ClusterConfig *v1beta1.ClusterConfig KubeClientFactory kubeutil.ClientFactoryInterface @@ -59,13 +60,8 @@ func (l *K0sControllersLeaseCounter) Start(context.Context) error { return fmt.Errorf("can't create kubernetes rest client for lease pool: %w", err) } - // hostname used to make the lease names be clear to which controller they belong to - // follow kubelet convention for naming so we e.g. use lowercase hostname etc. - nodeName, err := node.GetNodename("") - if err != nil { - return nil - } - leaseName := "k0s-ctrl-" + nodeName + // Use the node name to make the lease names be clear to which controller they belong to + leaseName := "k0s-ctrl-" + string(l.NodeName) leasePool, err := leaderelection.NewLeasePool(client, leaseName, l.InvocationID, leaderelection.WithLogger(log)) diff --git a/pkg/component/worker/kubelet.go b/pkg/component/worker/kubelet.go index 5c7ecd7f608e..33a96d746bcd 100644 --- a/pkg/component/worker/kubelet.go +++ b/pkg/component/worker/kubelet.go @@ -30,16 +30,15 @@ import ( "github.com/k0sproject/k0s/internal/pkg/dir" "github.com/k0sproject/k0s/internal/pkg/file" - "github.com/k0sproject/k0s/internal/pkg/flags" "github.com/k0sproject/k0s/internal/pkg/stringmap" "github.com/k0sproject/k0s/pkg/assets" "github.com/k0sproject/k0s/pkg/component/manager" "github.com/k0sproject/k0s/pkg/config" "github.com/k0sproject/k0s/pkg/constant" - "github.com/k0sproject/k0s/pkg/node" "github.com/k0sproject/k0s/pkg/supervisor" corev1 "k8s.io/api/core/v1" + apitypes "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation" kubeletv1beta1 "k8s.io/kubelet/config/v1beta1" @@ -49,6 +48,7 @@ import ( // Kubelet is the component implementation to manage kubelet type Kubelet struct { + NodeName apitypes.NodeName CRISocket string EnableCloudProvider bool K0sVars *config.CfgVars @@ -59,7 +59,7 @@ type Kubelet struct { ClusterDNS string Labels []string Taints []string - ExtraArgs string + ExtraArgs stringmap.StringMap DualStackEnabled bool configPath string @@ -100,8 +100,8 @@ func (k *Kubelet) Init(_ context.Context) error { return nil } -func lookupHostname(ctx context.Context, hostname string) (ipv4 net.IP, ipv6 net.IP, _ error) { - ipaddrs, err := net.DefaultResolver.LookupIPAddr(ctx, hostname) +func lookupNodeName(ctx context.Context, nodeName apitypes.NodeName) (ipv4 net.IP, ipv6 net.IP, _ error) { + ipaddrs, err := net.DefaultResolver.LookupIPAddr(ctx, string(nodeName)) if err != nil { return nil, nil, err } @@ -142,28 +142,24 @@ func (k *Kubelet) Start(ctx context.Context) error { args["--node-labels"] = strings.Join(k.Labels, ",") } - extras := flags.Split(k.ExtraArgs) - nodename, err := node.GetNodename(extras["--hostname-override"]) - if err != nil { - return fmt.Errorf("failed to get nodename: %w", err) - } - - if k.DualStackEnabled && extras["--node-ip"] == "" { - // Kubelet uses hostname lookup to autodetect the ip address, but - // will only pick one for a single family. Do something similar as - // kubelet but for both ipv6 and ipv6. - // https://github.com/kubernetes/kubernetes/blob/0cc57258c3f8545c8250f0e7a1307fd01b0d283d/pkg/kubelet/nodestatus/setters.go#L196 - ipv4, ipv6, err := lookupHostname(ctx, nodename) + if k.DualStackEnabled && k.ExtraArgs["--node-ip"] == "" { + // Kubelet uses a DNS lookup of the node name to figure out the node IP, + // but will only pick one for a single family. Do something similar as + // kubelet, but for both IPv4 and IPv6. + // https://github.com/kubernetes/kubernetes/blob/v1.32.1/pkg/kubelet/nodestatus/setters.go#L202-L230 + ipv4, ipv6, err := lookupNodeName(ctx, k.NodeName) if err != nil { - logrus.WithError(err).Errorf("failed to lookup %q", nodename) + logrus.WithError(err).Errorf("failed to lookup %q", k.NodeName) } else if ipv4 != nil && ipv6 != nil { + // The kubelet will perform some extra validations on the discovered IP + // addresses in the private function k8s.io/kubernetes/pkg/kubelet.validateNodeIP + // which won't be replicated here. args["--node-ip"] = ipv4.String() + "," + ipv6.String() } } if runtime.GOOS == "windows" { args["--enforce-node-allocatable"] = "" - args["--hostname-override"] = nodename args["--hairpin-mode"] = "promiscuous-bridge" args["--cert-dir"] = "C:\\var\\lib\\k0s\\kubelet_certs" } @@ -181,9 +177,11 @@ func (k *Kubelet) Start(ctx context.Context) error { } // Handle the extra args as last so they can be used to override some k0s "hardcodings" - if k.ExtraArgs != "" { - args.Merge(extras) - } + args.Merge(k.ExtraArgs) + + // Pin the node name that has been figured out by k0s + args["--hostname-override"] = string(k.NodeName) + logrus.Debugf("starting kubelet with args: %v", args) k.supervisor = supervisor.Supervisor{ Name: cmd, diff --git a/pkg/component/worker/utils.go b/pkg/component/worker/utils.go index 369681cc9cd5..a5a7cdb17e52 100644 --- a/pkg/component/worker/utils.go +++ b/pkg/component/worker/utils.go @@ -29,10 +29,8 @@ import ( "github.com/k0sproject/k0s/internal/pkg/dir" "github.com/k0sproject/k0s/internal/pkg/file" - "github.com/k0sproject/k0s/internal/pkg/flags" "github.com/k0sproject/k0s/pkg/config" "github.com/k0sproject/k0s/pkg/constant" - "github.com/k0sproject/k0s/pkg/node" "github.com/k0sproject/k0s/pkg/token" apitypes "k8s.io/apimachinery/pkg/types" @@ -44,7 +42,7 @@ import ( "github.com/sirupsen/logrus" ) -func BootstrapKubeletKubeconfig(ctx context.Context, k0sVars *config.CfgVars, workerOpts *config.WorkerOptions) error { +func BootstrapKubeletKubeconfig(ctx context.Context, k0sVars *config.CfgVars, nodeName apitypes.NodeName, workerOpts *config.WorkerOptions) error { bootstrapKubeconfigPath := filepath.Join(k0sVars.DataDir, "kubelet-bootstrap.conf") // When using `k0s install` along with a join token, that join token @@ -149,22 +147,6 @@ func BootstrapKubeletKubeconfig(ctx context.Context, k0sVars *config.CfgVars, wo return fmt.Errorf("failed to initialize kubelet certificate directory: %w", err) } - // The node name used during bootstrapping needs to match the node name - // selected by kubelet. Otherwise, kubelet will have problems interacting - // with a Node object that doesn't match the name in the certificates. - // https://kubernetes.io/docs/reference/access-authn-authz/node/ - - // Kubelet still has some deprecated support for cloud providers, which may - // completely bypass the "standard" node name detection as it's done here. - // K0s only supports external cloud providers, which seems to be a dead code - // path anyways in kubelet. So it's safe to assume that the following code - // exactly matches the behavior of kubelet. - - nodeName, err := node.GetNodename(flags.Split(workerOpts.KubeletExtraArgs)["--hostname-override"]) - if err != nil { - return fmt.Errorf("failed to determine node name: %w", err) - } - logrus.Infof("Bootstrapping kubelet client configuration using %s as node name", nodeName) if err := retry.Do( @@ -174,7 +156,7 @@ func BootstrapKubeletKubeconfig(ctx context.Context, k0sVars *config.CfgVars, wo k0sVars.KubeletAuthConfigPath, bootstrapKubeconfigPath, certDir, - apitypes.NodeName(nodeName), + nodeName, ) }, retry.Context(ctx), diff --git a/pkg/node/nodename.go b/pkg/node/nodename.go index e8f0b710aa9d..3fba8f76acaa 100644 --- a/pkg/node/nodename.go +++ b/pkg/node/nodename.go @@ -20,18 +20,19 @@ import ( "context" "fmt" + apitypes "k8s.io/apimachinery/pkg/types" nodeutil "k8s.io/component-helpers/node/util" ) -// GetNodename returns the node name for the node taking OS, cloud provider and override into account -func GetNodename(override string) (string, error) { - return getNodename(context.TODO(), override) +// GetNodeName returns the node name for the node taking OS, cloud provider and override into account +func GetNodeName(override string) (apitypes.NodeName, error) { + return getNodeName(context.TODO(), override) } -func getNodename(ctx context.Context, override string) (string, error) { +func getNodeName(ctx context.Context, override string) (apitypes.NodeName, error) { if override == "" { var err error - override, err = defaultNodenameOverride(ctx) + override, err = defaultNodeNameOverride(ctx) if err != nil { return "", err } @@ -40,5 +41,5 @@ func getNodename(ctx context.Context, override string) (string, error) { if err != nil { return "", fmt.Errorf("failed to determine node name: %w", err) } - return nodeName, nil + return apitypes.NodeName(nodeName), nil } diff --git a/pkg/node/nodename_other.go b/pkg/node/nodename_other.go index 57fcb36b2aa0..b649d6d79344 100644 --- a/pkg/node/nodename_other.go +++ b/pkg/node/nodename_other.go @@ -22,6 +22,6 @@ import ( "context" ) -func defaultNodenameOverride(context.Context) (string, error) { +func defaultNodeNameOverride(context.Context) (string, error) { return "", nil // no default override } diff --git a/pkg/node/nodename_test.go b/pkg/node/nodename_test.go index a6e5ee99945c..a9196e202db0 100644 --- a/pkg/node/nodename_test.go +++ b/pkg/node/nodename_test.go @@ -20,16 +20,18 @@ import ( "runtime" "testing" + apitypes "k8s.io/apimachinery/pkg/types" + nodeutil "k8s.io/component-helpers/node/util" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - nodeutil "k8s.io/component-helpers/node/util" ) -func TestGetNodename(t *testing.T) { +func TestGetNodeName(t *testing.T) { t.Run("should_always_return_override_if_given", func(t *testing.T) { - name, err := GetNodename("override") + name, err := GetNodeName("override") if assert.NoError(t, err) { - assert.Equal(t, "override", name) + assert.Equal(t, apitypes.NodeName("override"), name) } }) @@ -38,9 +40,9 @@ func TestGetNodename(t *testing.T) { require.NoError(t, err) t.Run("should_call_kubernetes_hostname_helper_on_linux", func(t *testing.T) { - name, err := GetNodename("") + name, err := GetNodeName("") if assert.NoError(t, err) { - assert.Equal(t, kubeHostname, name) + assert.Equal(t, apitypes.NodeName(kubeHostname), name) } }) } diff --git a/pkg/node/nodename_windows.go b/pkg/node/nodename_windows.go index f70367aef95f..53a8afb7381f 100644 --- a/pkg/node/nodename_windows.go +++ b/pkg/node/nodename_windows.go @@ -28,7 +28,7 @@ import ( // A URL that may be retrieved to determine the nodename. type nodenameURL string -func defaultNodenameOverride(ctx context.Context) (string, error) { +func defaultNodeNameOverride(ctx context.Context) (string, error) { // we need to check if we have EC2 dns name available url := k0scontext.ValueOr[nodenameURL](ctx, "http://169.254.169.254/latest/meta-data/local-hostname") diff --git a/pkg/node/nodename_windows_test.go b/pkg/node/nodename_windows_test.go index 58d611bca585..6d618364e810 100644 --- a/pkg/node/nodename_windows_test.go +++ b/pkg/node/nodename_windows_test.go @@ -24,29 +24,32 @@ import ( "testing" "github.com/k0sproject/k0s/pkg/k0scontext" + + apitypes "k8s.io/apimachinery/pkg/types" + nodeutil "k8s.io/component-helpers/node/util" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - nodeutil "k8s.io/component-helpers/node/util" ) -func TestGetNodenameWindows(t *testing.T) { +func TestGetNodeNameWindows(t *testing.T) { kubeHostname, err := nodeutil.GetHostname("") require.NoError(t, err) baseURL := startFakeMetadataServer(t) t.Run("no_metadata_service_available", func(t *testing.T) { ctx := k0scontext.WithValue(context.TODO(), nodenameURL(baseURL)) - name, err := getNodename(ctx, "") + name, err := getNodeName(ctx, "") if assert.NoError(t, err) { - assert.Equal(t, kubeHostname, name) + assert.Equal(t, apitypes.NodeName(kubeHostname), name) } }) t.Run("metadata_service_is_available", func(t *testing.T) { ctx := k0scontext.WithValue(context.TODO(), nodenameURL(baseURL+"/latest/meta-data/local-hostname")) - name, err := getNodename(ctx, "") + name, err := getNodeName(ctx, "") if assert.NoError(t, err) { - assert.Equal(t, "some-hostname from aws_metadata", name) + assert.Equal(t, apitypes.NodeName("some-hostname from aws_metadata"), name) } }) }