diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 4b9f31ba47f36..9986ee0204f49 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -38,3 +38,29 @@ jobs: with: name: tests-e2e-scenarios-bare-metal path: /tmp/artifacts/ + + tests-e2e-scenarios-bare-metal-ipv6: + runs-on: ubuntu-24.04 + timeout-minutes: 70 + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + with: + path: ${{ env.GOPATH }}/src/k8s.io/kops + + - name: Set up go + uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed + with: + go-version-file: '${{ env.GOPATH }}/src/k8s.io/kops/go.mod' + + - name: tests/e2e/scenarios/bare-metal/run-test + working-directory: ${{ env.GOPATH }}/src/k8s.io/kops + run: | + timeout 60m tests/e2e/scenarios/bare-metal/scenario-ipv6 + env: + ARTIFACTS: /tmp/artifacts + - name: Archive production artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: tests-e2e-scenarios-bare-metal-ipv6 + path: /tmp/artifacts/ diff --git a/cmd/kops-controller/controllers/gceipam.go b/cmd/kops-controller/controllers/gceipam.go index 0a750f4c0b1de..37a8e8c9ad485 100644 --- a/cmd/kops-controller/controllers/gceipam.go +++ b/cmd/kops-controller/controllers/gceipam.go @@ -56,7 +56,7 @@ func NewGCEIPAMReconciler(mgr manager.Manager) (*GCEIPAMReconciler, error) { return r, nil } -// GCEIPAMReconciler observes Node objects, assigning their`PodCIDRs` from the instance's `ExternalIpv6`. +// GCEIPAMReconciler observes Node objects, assigning their `PodCIDRs` from the instance's `ExternalIpv6`. type GCEIPAMReconciler struct { // client is the controller-runtime client client client.Client diff --git a/cmd/kops-controller/controllers/metalipam.go b/cmd/kops-controller/controllers/metalipam.go new file mode 100644 index 0000000000000..9f4c55181ffa5 --- /dev/null +++ b/cmd/kops-controller/controllers/metalipam.go @@ -0,0 +1,103 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "fmt" + + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + corev1client "k8s.io/client-go/kubernetes/typed/core/v1" + "k8s.io/klog/v2" + kopsapi "k8s.io/kops/pkg/apis/kops/v1alpha2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +// NewMetalIPAMReconciler is the constructor for a MetalIPAMReconciler +func NewMetalIPAMReconciler(ctx context.Context, mgr manager.Manager) (*MetalIPAMReconciler, error) { + klog.Info("starting metal ipam controller") + r := &MetalIPAMReconciler{ + client: mgr.GetClient(), + log: ctrl.Log.WithName("controllers").WithName("metal_ipam"), + } + + coreClient, err := corev1client.NewForConfig(mgr.GetConfig()) + if err != nil { + return nil, fmt.Errorf("building corev1 client: %w", err) + } + r.coreV1Client = coreClient + + return r, nil +} + +// MetalIPAMReconciler observes Node objects, assigning their `PodCIDRs` from the instance's `ExternalIpv6`. +type MetalIPAMReconciler struct { + // client is the controller-runtime client + client client.Client + + // log is a logr + log logr.Logger + + // coreV1Client is a client-go client for patching nodes + coreV1Client *corev1client.CoreV1Client +} + +// +kubebuilder:rbac:groups=,resources=nodes,verbs=get;list;watch;patch +// Reconcile is the main reconciler function that observes node changes. +func (r *MetalIPAMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + node := &corev1.Node{} + if err := r.client.Get(ctx, req.NamespacedName, node); err != nil { + klog.Warningf("unable to fetch node %s: %v", node.Name, err) + if apierrors.IsNotFound(err) { + // we'll ignore not-found errors, since they can't be fixed by an immediate + // requeue (we'll need to wait for a new notification), and we can get them + // on deleted requests. + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + + host := &kopsapi.Host{} + id := types.NamespacedName{ + Namespace: "kops-system", + Name: node.Name, + } + if err := r.client.Get(ctx, id, host); err != nil { + klog.Warningf("unable to fetch host %s: %v", id, err) + return ctrl.Result{}, err + } + + if len(node.Spec.PodCIDRs) == 0 { + if err := patchNodePodCIDRs(r.coreV1Client, ctx, node, host.Spec.PodCIDRs); err != nil { + return ctrl.Result{}, err + } + } + + return ctrl.Result{}, nil +} + +func (r *MetalIPAMReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + Named("metal_ipam"). + For(&corev1.Node{}). + Complete(r) +} diff --git a/cmd/kops-controller/main.go b/cmd/kops-controller/main.go index 1ac9d498dacc3..f722e6aabac45 100644 --- a/cmd/kops-controller/main.go +++ b/cmd/kops-controller/main.go @@ -392,6 +392,12 @@ func setupCloudIPAM(ctx context.Context, mgr manager.Manager, opt *config.Option return fmt.Errorf("creating gce IPAM controller: %w", err) } controller = ipamController + case "metal": + ipamController, err := controllers.NewMetalIPAMReconciler(ctx, mgr) + if err != nil { + return fmt.Errorf("creating metal IPAM controller: %w", err) + } + controller = ipamController default: return fmt.Errorf("kOps IPAM controller is not supported on cloud %q", opt.Cloud) } diff --git a/cmd/kops/toolbox_enroll.go b/cmd/kops/toolbox_enroll.go index 55d3600655be0..4fc7f648214e4 100644 --- a/cmd/kops/toolbox_enroll.go +++ b/cmd/kops/toolbox_enroll.go @@ -45,10 +45,13 @@ func NewCmdToolboxEnroll(f commandutils.Factory, out io.Writer) *cobra.Command { cmd.Flags().StringVar(&options.ClusterName, "cluster", options.ClusterName, "Name of cluster to join") cmd.Flags().StringVar(&options.InstanceGroup, "instance-group", options.InstanceGroup, "Name of instance-group to join") + cmd.Flags().StringSliceVar(&options.PodCIDRs, "pod-cidr", options.PodCIDRs, "IP Address range to use for pods that run on this node") cmd.Flags().StringVar(&options.Host, "host", options.Host, "IP/hostname for machine to add") cmd.Flags().StringVar(&options.SSHUser, "ssh-user", options.SSHUser, "user for ssh") cmd.Flags().IntVar(&options.SSHPort, "ssh-port", options.SSHPort, "port for ssh") + cmd.Flags().BoolVar(&options.BuildHost, "build-host", options.BuildHost, "only build the host resource, don't apply it or enroll the node") + return cmd } diff --git a/docs/cli/kops_toolbox_enroll.md b/docs/cli/kops_toolbox_enroll.md index 2f68d815ddab3..796f4686f5cbc 100644 --- a/docs/cli/kops_toolbox_enroll.md +++ b/docs/cli/kops_toolbox_enroll.md @@ -22,10 +22,12 @@ kops toolbox enroll [CLUSTER] [flags] ### Options ``` + --build-host only build the host resource, don't apply it or enroll the node --cluster string Name of cluster to join -h, --help help for enroll --host string IP/hostname for machine to add --instance-group string Name of instance-group to join + --pod-cidr strings IP Address range to use for pods that run on this node --ssh-port int port for ssh (default 22) --ssh-user string user for ssh (default "root") ``` diff --git a/k8s/crds/kops.k8s.io_hosts.yaml b/k8s/crds/kops.k8s.io_hosts.yaml index 84ac033ee7191..10d95c221a50b 100644 --- a/k8s/crds/kops.k8s.io_hosts.yaml +++ b/k8s/crds/kops.k8s.io_hosts.yaml @@ -42,6 +42,12 @@ spec: properties: instanceGroup: type: string + podCIDRs: + description: PodCIDRs configures the IP ranges to be used for pods + on this node/host. + items: + type: string + type: array publicKey: type: string type: object diff --git a/nodeup/pkg/model/kube_apiserver.go b/nodeup/pkg/model/kube_apiserver.go index f1636a44b4674..5fe43eea27dcb 100644 --- a/nodeup/pkg/model/kube_apiserver.go +++ b/nodeup/pkg/model/kube_apiserver.go @@ -19,10 +19,12 @@ package model import ( "context" "fmt" + "net" "path/filepath" "sort" "strings" + "k8s.io/klog/v2" "k8s.io/kops/pkg/apis/kops" "k8s.io/kops/pkg/flagbuilder" "k8s.io/kops/pkg/k8scodecs" @@ -77,6 +79,55 @@ func (b *KubeAPIServerBuilder) Build(c *fi.NodeupModelBuilderContext) error { } } + if b.CloudProvider() == kops.CloudProviderMetal { + // Workaround for https://github.com/kubernetes/kubernetes/issues/111671 + if b.IsIPv6Only() { + interfaces, err := net.Interfaces() + if err != nil { + return fmt.Errorf("getting local network interfaces: %w", err) + } + var ipv6s []net.IP + for _, intf := range interfaces { + addresses, err := intf.Addrs() + if err != nil { + return fmt.Errorf("getting addresses for network interface %q: %w", intf.Name, err) + } + for _, addr := range addresses { + ip, _, err := net.ParseCIDR(addr.String()) + if ip == nil { + return fmt.Errorf("parsing ip address %q (bound to network %q): %w", addr.String(), intf.Name, err) + } + if ip.To4() != nil { + // We're only looking for ipv6 + continue + } + if ip.IsLinkLocalUnicast() { + klog.V(4).Infof("ignoring link-local unicast addr %v", addr) + continue + } + if ip.IsLinkLocalMulticast() { + klog.V(4).Infof("ignoring link-local multicast addr %v", addr) + continue + } + if ip.IsLoopback() { + klog.V(4).Infof("ignoring loopback addr %v", addr) + continue + } + ipv6s = append(ipv6s, ip) + } + } + if len(ipv6s) > 1 { + klog.Warningf("found multiple ipv6s, choosing first: %v", ipv6s) + } + if len(ipv6s) == 0 { + klog.Warningf("did not find ipv6 address for kube-apiserver --advertise-address") + } + if len(ipv6s) > 0 { + kubeAPIServer.AdvertiseAddress = ipv6s[0].String() + } + } + } + b.configureOIDC(&kubeAPIServer) if err := b.writeAuthenticationConfig(c, &kubeAPIServer); err != nil { return err diff --git a/nodeup/pkg/model/prefix.go b/nodeup/pkg/model/prefix.go index de796341cfc72..0381b44672d41 100644 --- a/nodeup/pkg/model/prefix.go +++ b/nodeup/pkg/model/prefix.go @@ -41,6 +41,8 @@ func (b *PrefixBuilder) Build(c *fi.NodeupModelBuilderContext) error { }) case kops.CloudProviderGCE: // Prefix is assigned by GCE + case kops.CloudProviderMetal: + // IPv6 must be configured externally (not by nodeup) default: return fmt.Errorf("kOps IPAM controller not supported on cloud %q", b.CloudProvider()) } diff --git a/pkg/apis/kops/v1alpha2/host.go b/pkg/apis/kops/v1alpha2/host.go index 7e1295ca167ec..9aa242d3b62ff 100644 --- a/pkg/apis/kops/v1alpha2/host.go +++ b/pkg/apis/kops/v1alpha2/host.go @@ -36,6 +36,9 @@ type Host struct { type HostSpec struct { PublicKey string `json:"publicKey,omitempty"` InstanceGroup string `json:"instanceGroup,omitempty"` + + // PodCIDRs configures the IP ranges to be used for pods on this node/host. + PodCIDRs []string `json:"podCIDRs,omitempty"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go b/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go index db9773eec5318..56727ad9ee3e6 100644 --- a/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go +++ b/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go @@ -2334,7 +2334,7 @@ func (in *Host) DeepCopyInto(out *Host) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec + in.Spec.DeepCopyInto(&out.Spec) return } @@ -2392,6 +2392,11 @@ func (in *HostList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *HostSpec) DeepCopyInto(out *HostSpec) { *out = *in + if in.PodCIDRs != nil { + in, out := &in.PodCIDRs, &out.PodCIDRs + *out = make([]string, len(*in)) + copy(*out, *in) + } return } diff --git a/pkg/commands/toolbox_enroll.go b/pkg/commands/toolbox_enroll.go index 268563d6a409d..adf18d2eb3bb0 100644 --- a/pkg/commands/toolbox_enroll.go +++ b/pkg/commands/toolbox_enroll.go @@ -65,6 +65,12 @@ type ToolboxEnrollOptions struct { SSHUser string SSHPort int + + // BuildHost is a flag to only build the host resource, don't apply it or enroll the node + BuildHost bool + + // PodCIDRs is the list of IP Address ranges to use for pods that run on this node + PodCIDRs []string } func (o *ToolboxEnrollOptions) InitDefaults() { @@ -97,14 +103,6 @@ func RunToolboxEnroll(ctx context.Context, f commandutils.Factory, out io.Writer if err != nil { return err } - fullInstanceGroup, err := configBuilder.GetFullInstanceGroup(ctx) - if err != nil { - return err - } - bootstrapData, err := configBuilder.GetBootstrapData(ctx) - if err != nil { - return err - } // Enroll the node over SSH. if options.Host != "" { @@ -113,37 +111,50 @@ func RunToolboxEnroll(ctx context.Context, f commandutils.Factory, out io.Writer return err } - if err := enrollHost(ctx, fullInstanceGroup, options, bootstrapData, restConfig); err != nil { + sudo := true + if options.SSHUser == "root" { + sudo = false + } + + sshTarget, err := NewSSHHost(ctx, options.Host, options.SSHPort, options.SSHUser, sudo) + if err != nil { return err } - } + defer sshTarget.Close() - return nil -} + hostData, err := buildHostData(ctx, sshTarget, options) + if err != nil { + return err + } -func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnrollOptions, bootstrapData *BootstrapData, restConfig *rest.Config) error { - scheme := runtime.NewScheme() - if err := v1alpha2.AddToScheme(scheme); err != nil { - return fmt.Errorf("building kubernetes scheme: %w", err) - } - kubeClient, err := client.New(restConfig, client.Options{ - Scheme: scheme, - }) - if err != nil { - return fmt.Errorf("building kubernetes client: %w", err) - } + if options.BuildHost { + klog.Infof("building host data for %+v", hostData) + b, err := yaml.Marshal(hostData) + if err != nil { + return fmt.Errorf("error marshalling host data: %w", err) + } + fmt.Fprintf(out, "%s\n", string(b)) + } else { + fullInstanceGroup, err := configBuilder.GetFullInstanceGroup(ctx) + if err != nil { + return err + } + bootstrapData, err := configBuilder.GetBootstrapData(ctx) + if err != nil { + return err + } - sudo := true - if options.SSHUser == "root" { - sudo = false + if err := enrollHost(ctx, fullInstanceGroup, bootstrapData, restConfig, hostData, sshTarget); err != nil { + return err + } + } } - sshTarget, err := NewSSHHost(ctx, options.Host, options.SSHPort, options.SSHUser, sudo) - if err != nil { - return err - } - defer sshTarget.Close() + return nil +} +// buildHostData builds an instance of the Host CRD, based on information in the options and by SSHing to the target host. +func buildHostData(ctx context.Context, sshTarget *SSHHost, options *ToolboxEnrollOptions) (*v1alpha2.Host, error) { publicKeyPath := "/etc/kubernetes/kops/pki/machine/public.pem" publicKeyBytes, err := sshTarget.readFile(ctx, publicKeyPath) @@ -151,19 +162,20 @@ func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnr if errors.Is(err, fs.ErrNotExist) { publicKeyBytes = nil } else { - return fmt.Errorf("error reading public key %q: %w", publicKeyPath, err) + return nil, fmt.Errorf("error reading public key %q: %w", publicKeyPath, err) } } + // Create the key if it doesn't exist publicKeyBytes = bytes.TrimSpace(publicKeyBytes) if len(publicKeyBytes) == 0 { - if _, err := sshTarget.runScript(ctx, scriptCreateKey, ExecOptions{Sudo: sudo, Echo: true}); err != nil { - return err + if _, err := sshTarget.runScript(ctx, scriptCreateKey, ExecOptions{Echo: true}); err != nil { + return nil, err } b, err := sshTarget.readFile(ctx, publicKeyPath) if err != nil { - return fmt.Errorf("error reading public key %q (after creation): %w", publicKeyPath, err) + return nil, fmt.Errorf("error reading public key %q (after creation): %w", publicKeyPath, err) } publicKeyBytes = b } @@ -171,14 +183,37 @@ func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnr hostname, err := sshTarget.getHostname(ctx) if err != nil { - return err + return nil, err + } + + host := &v1alpha2.Host{} + host.SetGroupVersionKind(v1alpha2.SchemeGroupVersion.WithKind("Host")) + host.Namespace = "kops-system" + host.Name = hostname + host.Spec.InstanceGroup = options.InstanceGroup + host.Spec.PublicKey = string(publicKeyBytes) + host.Spec.PodCIDRs = options.PodCIDRs + + return host, nil +} + +func enrollHost(ctx context.Context, ig *kops.InstanceGroup, bootstrapData *BootstrapData, restConfig *rest.Config, hostData *v1alpha2.Host, sshTarget *SSHHost) error { + scheme := runtime.NewScheme() + if err := v1alpha2.AddToScheme(scheme); err != nil { + return fmt.Errorf("building kubernetes scheme: %w", err) + } + kubeClient, err := client.New(restConfig, client.Options{ + Scheme: scheme, + }) + if err != nil { + return fmt.Errorf("building kubernetes client: %w", err) } // We can't create the host resource in the API server for control-plane nodes, // because the API server (likely) isn't running yet. if !ig.IsControlPlane() { - if err := createHostResourceInAPIServer(ctx, options, hostname, publicKeyBytes, kubeClient); err != nil { - return err + if err := kubeClient.Create(ctx, hostData); err != nil { + return fmt.Errorf("failed to create host %s/%s: %w", hostData.Namespace, hostData.Name, err) } } @@ -189,27 +224,13 @@ func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnr } if len(bootstrapData.NodeupScript) != 0 { - if _, err := sshTarget.runScript(ctx, string(bootstrapData.NodeupScript), ExecOptions{Sudo: sudo, Echo: true}); err != nil { + if _, err := sshTarget.runScript(ctx, string(bootstrapData.NodeupScript), ExecOptions{Echo: true}); err != nil { return err } } return nil } -func createHostResourceInAPIServer(ctx context.Context, options *ToolboxEnrollOptions, nodeName string, publicKey []byte, client client.Client) error { - host := &v1alpha2.Host{} - host.Namespace = "kops-system" - host.Name = nodeName - host.Spec.InstanceGroup = options.InstanceGroup - host.Spec.PublicKey = string(publicKey) - - if err := client.Create(ctx, host); err != nil { - return fmt.Errorf("failed to create host %s/%s: %w", host.Namespace, host.Name, err) - } - - return nil -} - const scriptCreateKey = ` #!/bin/bash set -o errexit @@ -310,7 +331,7 @@ func (s *SSHHost) runScript(ctx context.Context, script string, options ExecOpti p := vfs.NewSSHPath(s.sshClient, s.hostname, scriptPath, s.sudo) defer func() { - if _, err := s.runCommand(ctx, "rm -rf "+tempDir, ExecOptions{Sudo: s.sudo, Echo: false}); err != nil { + if _, err := s.runCommand(ctx, "rm -rf "+tempDir, ExecOptions{Echo: false}); err != nil { klog.Warningf("error cleaning up temp directory %q: %v", tempDir, err) } }() @@ -331,7 +352,6 @@ type CommandOutput struct { // ExecOptions holds options for running a command remotely. type ExecOptions struct { - Sudo bool Echo bool } @@ -348,10 +368,11 @@ func (s *SSHHost) runCommand(ctx context.Context, command string, options ExecOp session.Stderr = &output.Stderr if options.Echo { - session.Stdout = io.MultiWriter(os.Stdout, session.Stdout) + // We send both to stderr, so we don't "corrupt" stdout + session.Stdout = io.MultiWriter(os.Stderr, session.Stdout) session.Stderr = io.MultiWriter(os.Stderr, session.Stderr) } - if options.Sudo { + if s.sudo { command = "sudo " + command } if err := session.Run(command); err != nil { @@ -363,7 +384,7 @@ func (s *SSHHost) runCommand(ctx context.Context, command string, options ExecOp // getHostname gets the hostname of the SSH target. // This is used as the node name when registering the node. func (s *SSHHost) getHostname(ctx context.Context) (string, error) { - output, err := s.runCommand(ctx, "hostname", ExecOptions{Sudo: false, Echo: true}) + output, err := s.runCommand(ctx, "hostname", ExecOptions{Echo: true}) if err != nil { return "", fmt.Errorf("failed to get hostname: %w", err) } diff --git a/pkg/kubeconfig/create_kubecfg.go b/pkg/kubeconfig/create_kubecfg.go index 51bf933249703..af4aae2214dff 100644 --- a/pkg/kubeconfig/create_kubecfg.go +++ b/pkg/kubeconfig/create_kubecfg.go @@ -20,6 +20,7 @@ import ( "context" "crypto/x509/pkix" "fmt" + "net" "os/user" "sort" "time" @@ -57,7 +58,7 @@ func BuildKubecfg(ctx context.Context, cluster *kops.Cluster, keyStore fi.Keysto server = "https://" + cluster.APIInternalName() } else { if cluster.Spec.API.PublicName != "" { - server = "https://" + cluster.Spec.API.PublicName + server = "https://" + wrapIPv6Address(cluster.Spec.API.PublicName) } else { server = "https://api." + clusterName } @@ -98,7 +99,7 @@ func BuildKubecfg(ctx context.Context, cluster *kops.Cluster, keyStore fi.Keysto if len(targets) != 1 { klog.Warningf("Found multiple API endpoints (%v), choosing arbitrarily", targets) } - server = "https://" + targets[0] + server = "https://" + wrapIPv6Address(targets[0]) } } } @@ -187,3 +188,14 @@ func BuildKubecfg(ctx context.Context, cluster *kops.Cluster, keyStore fi.Keysto return b, nil } + +// wrapIPv6Address will wrap IPv6 addresses in square brackets, +// for use in URLs; other endpoints are unchanged. +func wrapIPv6Address(endpoint string) string { + ip := net.ParseIP(endpoint) + // IPv6 addresses are wrapped in square brackets in URLs + if ip != nil && ip.To4() == nil { + return "[" + endpoint + "]" + } + return endpoint +} diff --git a/pkg/model/components/kubelet.go b/pkg/model/components/kubelet.go index ce0c0823ad70c..df9859eb30dfd 100644 --- a/pkg/model/components/kubelet.go +++ b/pkg/model/components/kubelet.go @@ -161,7 +161,14 @@ func (b *KubeletOptionsBuilder) configureKubelet(cluster *kops.Cluster, kubelet } if cluster.Spec.ExternalCloudControllerManager != nil { - kubelet.CloudProvider = "external" + if cloudProvider == kops.CloudProviderMetal { + // metal does not (yet) have a cloud-controller-manager, so we don't need to set the cloud-provider flag + // If we do set it to external, kubelet will taint the node with the node.kops.k8s.io/uninitialized taint + // and there is no cloud-controller-manager to remove it + kubelet.CloudProvider = "" + } else { + kubelet.CloudProvider = "external" + } } // Prevent image GC from pruning the pause image diff --git a/tests/e2e/scenarios/bare-metal/cleanup b/tests/e2e/scenarios/bare-metal/cleanup index 60c52b8566b91..2061a961f1ef0 100755 --- a/tests/e2e/scenarios/bare-metal/cleanup +++ b/tests/e2e/scenarios/bare-metal/cleanup @@ -38,6 +38,8 @@ sudo ip link del dev tap-vm0 || true sudo ip link del dev tap-vm1 || true sudo ip link del dev tap-vm2 || true +sudo ip link del dev br0 || true + rm -rf .build/vm0 rm -rf .build/vm1 rm -rf .build/vm2 diff --git a/tests/e2e/scenarios/bare-metal/e2e_test.go b/tests/e2e/scenarios/bare-metal/e2e_test.go index 2c596569c6dc8..216fff05f6c9b 100644 --- a/tests/e2e/scenarios/bare-metal/e2e_test.go +++ b/tests/e2e/scenarios/bare-metal/e2e_test.go @@ -54,6 +54,35 @@ func TestNodeAddresses(t *testing.T) { } } } +func TestNodesNotTainted(t *testing.T) { + h := NewHarness(context.Background(), t) + + nodes := h.Nodes() + + // Quick check that we have some nodes + if len(nodes) == 0 { + t.Errorf("expected some nodes, got 0 nodes") + } + + // Verify that the nodes aren't tainted + // In particular, we are checking for the node.cloudprovider.kubernetes.io/uninitialized taint + for _, node := range nodes { + t.Logf("node %s has taints: %v", node.Name, node.Spec.Taints) + for _, taint := range node.Spec.Taints { + switch taint.Key { + case "node.kops.k8s.io/uninitialized": + t.Errorf("unexpected taint for node %s: %s", node.Name, taint.Key) + t.Errorf("if we pass the --cloud-provider=external flag to kubelet, the node will be tainted with the node.kops.k8s.io/uninitialize taint") + t.Errorf("the taint is expected to be removed by the cloud-contoller-manager") + t.Errorf("(likely should be running a cloud-controller-manager in the cluster, or we should not pass the --cloud-provider=external flag to kubelet)") + case "node-role.kubernetes.io/control-plane": + // expected for control-plane nodes + default: + t.Errorf("unexpected taint for node %s: %s", node.Name, taint.Key) + } + } + } +} // Harness is a test harness for our bare-metal e2e tests type Harness struct { diff --git a/tests/e2e/scenarios/bare-metal/run-test b/tests/e2e/scenarios/bare-metal/run-test index cdd85dec4ef45..ae5b29e25ffe9 100755 --- a/tests/e2e/scenarios/bare-metal/run-test +++ b/tests/e2e/scenarios/bare-metal/run-test @@ -48,22 +48,33 @@ fi rm -rf ${WORKDIR}/s3 mkdir -p ${WORKDIR}/s3/ +IPV4_PREFIX=10.123.45. + +VM0_IP=${IPV4_PREFIX}10 +VM1_IP=${IPV4_PREFIX}11 +VM2_IP=${IPV4_PREFIX}12 + # Start our VMs ${REPO_ROOT}/tests/e2e/scenarios/bare-metal/start-vms +# Start an SSH agent; enroll assumes SSH connectivity to the VMs with the key in the agent +eval $(ssh-agent) +ssh-add ${REPO_ROOT}/.build/.ssh/id_ed25519 + . hack/dev-build-metal.sh echo "Waiting 10 seconds for VMs to start" sleep 10 # Remove from known-hosts in case of reuse -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.10 || true -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.11 || true -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.12 || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM0_IP} || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM1_IP} || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM2_IP} || true -ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 uptime -ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.11 uptime -ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.12 uptime +# Check SSH is working and accept the keys +ssh -o StrictHostKeyChecking=accept-new root@${VM0_IP} uptime +ssh -o StrictHostKeyChecking=accept-new root@${VM1_IP} uptime +ssh -o StrictHostKeyChecking=accept-new root@${VM2_IP} uptime cd ${REPO_ROOT} @@ -93,7 +104,7 @@ ${KOPS} create cluster --cloud=metal metal.k8s.local --zones main --networking c # Set the IP ingress, required for metal cloud # TODO: is this the best option? -${KOPS} edit cluster metal.k8s.local --set spec.api.publicName=10.123.45.10 +${KOPS} edit cluster metal.k8s.local --set spec.api.publicName=${VM0_IP} # Use latest etcd-manager image (while we're adding features) ${KOPS} edit cluster metal.k8s.local --set 'spec.etcdClusters[*].manager.image=us-central1-docker.pkg.dev/k8s-staging-images/etcd-manager/etcd-manager-static:latest' @@ -114,28 +125,24 @@ ${KOPS} get ig --name metal.k8s.local -oyaml ${KOPS} update cluster metal.k8s.local ${KOPS} update cluster metal.k8s.local --yes --admin -# Start an SSH agent; enroll assumes SSH connectivity to the VMs with the key in the agent -eval $(ssh-agent) -ssh-add ${REPO_ROOT}/.build/.ssh/id_ed25519 - # Enroll the control-plane VM -${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group control-plane-main --host 10.123.45.10 --v=2 +${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group control-plane-main --host ${VM0_IP} --v=2 # Manual creation of "volumes" for etcd, and setting up peer nodes -cat <