Skip to content

Commit 693802f

Browse files
author
Klavs Klavsen
committed
Merge pull request 'Ready for releasing v0.4' (#22) from feat/retry into main
Reviewed-on: https://gitea.obmondo.com/EnableIT/kubeaid-bootstrap-script/pulls/22
2 parents 603c858 + e02f748 commit 693802f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+746
-226
lines changed

.github/workflows/release.yaml

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@ jobs:
2828
username: obmondo
2929
password: ${{ secrets.GITHUB_TOKEN }}
3030

31-
- name: Build and push AMD64 and ARM64 container images
31+
- name: Build and push KubeAid Bootstrap Script AMD64 and ARM64 container images
3232
uses: docker/build-push-action@v4
3333
with:
3434
context: .
35-
file: build/Dockerfile
35+
file: build/kubeaid-bootstrap-script/Dockerfile
3636
# NOTE : It takes pretty long to build container images for the ARM64 platform (even when
3737
# using QEMU).
3838
platforms: linux/amd64,linux/arm64
@@ -43,3 +43,19 @@ jobs:
4343
# builds.
4444
cache-from: type=gha
4545
cache-to: type=gha,mode=max
46+
47+
- name: Build and push Hetzner Failover Script AMD64 and ARM64 container images
48+
uses: docker/build-push-action@v4
49+
with:
50+
context: .
51+
file: build/hetzner-failover-script/Dockerfile
52+
# NOTE : It takes pretty long to build container images for the ARM64 platform (even when
53+
# using QEMU).
54+
platforms: linux/amd64,linux/arm64
55+
tags: ghcr.io/obmondo/hetzner-failover-script:${{ github.event.release.tag_name }}
56+
push: true
57+
# Experimental cache exporter for GitHub Actions provided by buildx and BuildKit.
58+
# It uses the GitHub Cache API to fetch and load the Docker layer cache blobs across
59+
# builds.
60+
cache-from: type=gha
61+
cache-to: type=gha,mode=max

Makefile

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ IMAGE_NAME=kubeaid-bootstrap-script-dev:latest
77

88
.PHONY: build-image-dev
99
build-image-dev:
10-
@docker build -f ./build/Dockerfile.dev --build-arg CPU_ARCHITECTURE=arm64 -t $(IMAGE_NAME) .
10+
@docker build -f ./build/kubeaid-bootstrap-script/Dockerfile.dev --build-arg CPU_ARCHITECTURE=arm64 -t $(IMAGE_NAME) .
1111

1212
.PHONY: remove-image-dev
1313
remove-image-dev:
@@ -43,22 +43,22 @@ remove-container-dev: stop-container-dev
4343

4444
.PHONY: generate-sample-config-aws-dev
4545
generate-sample-config-aws-dev:
46-
@go run ./cmd config generate aws
46+
@go run ./cmd/kubeaid-bootstrap-script/ config generate aws
4747

4848
.PHONY: bootstrap-cluster-dev-aws
4949
bootstrap-cluster-dev-aws:
50-
@go run ./cmd cluster bootstrap aws \
50+
@go run ./cmd/kubeaid-bootstrap-script/ cluster bootstrap aws \
5151
--debug \
52-
--config /app/outputs/kubeaid-bootstrap-script.config.yaml \
53-
--skip-clusterctl-move
52+
--config /app/outputs/kubeaid-bootstrap-script.aws.config.yaml
5453
# --skip-kubeaid-config-setup
54+
# --skip-clusterctl-move
5555

5656
.PHONY: bootstrap-cluster-dev-hetzner
5757
bootstrap-cluster-dev-hetzner:
58-
@go run ./cmd cluster bootstrap hetzner \
58+
@go run ./cmd/kubeaid-bootstrap-script/ cluster bootstrap hetzner \
5959
--debug \
60-
--config /app/outputs/kubeaid-bootstrap-script.config.yaml \
61-
--skip-clusterctl-move
60+
--config /app/outputs/kubeaid-bootstrap-script.hetzner.config.yaml \
61+
--skip-clusterctl-move
6262
# --skip-kubeaid-config-setup
6363

6464
.PHONY: use-management-cluster
@@ -69,10 +69,15 @@ use-management-cluster:
6969
use-provisioned-cluster:
7070
export KUBECONFIG=./outputs/provisioned-cluster.kubeconfig.yaml
7171

72-
.PHONY: delete-provisioned-cluster
73-
delete-provisioned-cluster-dev:
74-
@go run ./cmd cluster delete \
75-
--config /app/outputs/kubeaid-bootstrap-script.config.yaml
72+
.PHONY: delete-provisioned-cluster-dev-aws
73+
delete-provisioned-cluster-dev-aws:
74+
@go run ./cmd/kubeaid-bootstrap-script/ cluster delete \
75+
--config /app/outputs/kubeaid-bootstrap-script.aws.config.yaml
76+
77+
.PHONY: delete-provisioned-cluster-dev-hetzner
78+
delete-provisioned-cluster-dev-hetzner:
79+
@go run ./cmd/kubeaid-bootstrap-script/ cluster delete \
80+
--config /app/outputs/kubeaid-bootstrap-script.hetzner.config.yaml
7681

7782
.PHONY: delete-management-cluster
7883
delete-management-cluster:

README.md

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ The `KubeAid Bootstrap Script` is used to bootstrap Kubernetes clusters using Cl
88

99
- [Bootstrapping a self-managed cluster in AWS](https://github.com/Obmondo/KubeAid/blob/master/docs/aws/capi/cluster.md)
1010

11-
## Developer Guide
11+
## Developer Guide (AWS)
1212

1313
> Make sure, you've Docker installed in your system.
1414
@@ -20,7 +20,31 @@ In a separate terminal window, use `make exec-container-dev` to execute into the
2020

2121
Once you're inside the container, use `make generate-sample-config-aws-dev` to generate a sample config file at [./outputs/kubeaid-bootstrap-script.config.yaml](./outputs/kubeaid-bootstrap-script.config.yaml), targetting the AWS cloud provider. Adjust the config file according to your needs.
2222

23-
Then run `make bootstrap-cluster-dev` to bootstrap the cluster!
23+
Export your AWS credentials as environment variables like such :
24+
25+
```sh
26+
export AWS_REGION=""
27+
export AWS_ACCESS_KEY_ID=""
28+
export AWS_SECRET_ACCESS_KEY=""
29+
export AWS_SESSION_TOKEN=""
30+
```
31+
32+
Then run `make bootstrap-cluster-dev-aws` to bootstrap the cluster!
33+
34+
> [!NOTE]
35+
> If the `clusterawsadm bootstrap iam create-cloudformation-stack` command errors out with this message :
36+
>
37+
> the IAM CloudFormation Stack create / update failed and it's currently in a `ROLLBACK_COMPLETE` state
38+
>
39+
> then that means maybe there are pre-existing IAM resources with overlapping name. Then first delete them manually from the AWS Console and then retry running the script. Filter the IAM roles and policies in the corresponding region with the keyword : `cluster` / `clusterapi`.
40+
41+
If cluster provisioning gets stuck, then debug by :
42+
43+
- checking logs of ClusterAPI related pod.
44+
45+
- SSHing into the control-plane node. You can view cloud-init output logs stored at `/var/log/cloud-init-output.log`.
46+
47+
If you want to delete the provisioned cluster, then execute : `make delete-provisioned-cluster-dev-aws`.
2448

2549
## TODOs
2650

@@ -30,6 +54,13 @@ Then run `make bootstrap-cluster-dev` to bootstrap the cluster!
3054
- [ ] Support adding admin SSH keys via config file.
3155
- [ ] Support using HTTPS for ArgoCD apps.
3256
- [ ] Use ArgoCD sync waves so that we don't need to explicitly sync the Infrastructure Provider component first.
57+
- [x] Support enabling `Audit Logging`.
58+
- [x] Switch to IAM Role from (temporary) credentials after cluster bootstrap.
59+
- [x] ETCD metrics enabled.
60+
- [x] Support scale to / from zero for the node-groups.
61+
> Currently, I have added extra ClusterRole and ClusterRoleBinding in the KubeAid [cluster-autoscaler Helm chart](https://github.com/Obmondo/kubeaid/tree/master/argocd-helm-charts/cluster-autoscaler) to support this feature.
62+
> But I have also opened an issue in the kubernetes-sigs/autoscaler repository regarding this : [Allow adding extra rules to the Role / ClusterRole template of the Cluster AutoScaler Helm chart](https://github.com/kubernetes/autoscaler/issues/7680).
63+
- [ ] `recover cluster` command
3364

3465
## REFERENCES
3566

@@ -46,3 +77,15 @@ Then run `make bootstrap-cluster-dev` to bootstrap the cluster!
4677
- [Secret Rotation](https://github.com/bitnami-labs/sealed-secrets?tab=readme-ov-file#secret-rotation)
4778

4879
- [Kubernetes Backups, Upgrades, Migrations - with Velero](https://youtu.be/zybLTQER0yY?si=qOZcizBqPOeouJ7y)
80+
81+
- [Failover](https://docs.hetzner.com/robot/dedicated-server/ip/failover/)
82+
83+
- [Auditing](https://kubernetes.io/docs/tasks/debug/debug-cluster/audit/)
84+
85+
- [Kube API server args](https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/)
86+
87+
- [Using IAM roles in management cluster instead of AWS credentials](https://cluster-api-aws.sigs.k8s.io/topics/using-iam-roles-in-mgmt-cluster)
88+
89+
- [KubeadmControlPlane CRD](https://github.com/kubernetes-sigs/cluster-api/blob/main/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanes.yaml)
90+
91+
- [How can you call a helm 'helper' template from a subchart with the correct context?](https://stackoverflow.com/questions/47791971/how-can-you-call-a-helm-helper-template-from-a-subchart-with-the-correct-conte)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# syntax=docker/dockerfile:1
2+
3+
#--- Builder stage ---
4+
5+
FROM golang:1.23.0 AS builder
6+
7+
WORKDIR /app
8+
9+
COPY go.mod go.sum ./
10+
RUN go mod download
11+
12+
COPY . .
13+
RUN go build -o hetzner-failover-script ./cmd/hetzner-failover-script
14+
15+
#--- Packager stage ---
16+
17+
FROM golang:1.23.0 AS packages
18+
19+
# Set the maintainer label
20+
LABEL org.opencontainers.image.authors="[email protected], [email protected]"
21+
22+
RUN apk add --no-cache procps
23+
24+
WORKDIR /root/
25+
26+
COPY --from=builder /app/hetzner-failover-script .
27+
28+
CMD ["./hetzner-failover-script"]

build/Dockerfile renamed to build/kubeaid-bootstrap-script/Dockerfile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,21 @@ COPY go.mod go.sum ./
1010
RUN go mod download
1111

1212
COPY . .
13-
RUN go build -o kubeaid-bootstrap-script ./cmd
13+
RUN go build -o kubeaid-bootstrap-script ./cmd/kubeaid-bootstrap-script
1414

1515
#--- Packager stage ---
1616

1717
FROM golang:1.23.0 AS packages
1818

19+
# Set the maintainer label
20+
LABEL org.opencontainers.image.authors="[email protected], [email protected]"
21+
1922
WORKDIR /
2023

2124
COPY ./scripts/install-prerequisites.sh /install-prerequisites.sh
2225
RUN chmod +x /install-prerequisites.sh
2326
RUN CPU_ARCHITECTURE=$([ "$(uname -m)" = "x86_64" ] && echo "amd64" || echo "arm64") \
24-
/install-prerequisites.sh
27+
/install-prerequisites.sh
2528

2629
COPY --from=builder /app/kubeaid-bootstrap-script /usr/local/bin/kubeaid-bootstrap-script
2730

build/Dockerfile.dev renamed to build/kubeaid-bootstrap-script/Dockerfile.dev

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ WORKDIR /
77
COPY ./scripts/install-prerequisites.sh /install-prerequisites.sh
88
RUN chmod +x /install-prerequisites.sh
99
RUN CPU_ARCHITECTURE=$([ "$(uname -m)" = "x86_64" ] && echo "amd64" || echo "arm64") \
10-
/install-prerequisites.sh
10+
/install-prerequisites.sh
1111

1212
WORKDIR /app
1313

cmd/hetzner-failover-script/main.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"log"
6+
"log/slog"
7+
"os"
8+
"time"
9+
10+
"github.com/Obmondo/kubeaid-bootstrap-script/utils"
11+
"github.com/Obmondo/kubeaid-bootstrap-script/utils/assert"
12+
"github.com/floshodan/hrobot-go/hrobot"
13+
)
14+
15+
func main() {
16+
ctx := context.Background()
17+
18+
// Read required environment variables.
19+
var (
20+
failoverIP = utils.GetEnv("FAILOVER_IP")
21+
22+
nodeIP = utils.GetEnv("NODE_IP")
23+
24+
username = os.Getenv("API_USERNAME") // (optional).
25+
password = os.Getenv("API_PASSWORD") // (optional).
26+
27+
apiToken = os.Getenv("API_TOKEN") // (optional).
28+
)
29+
30+
// Construct Hetzner Robot API client.
31+
var hetznerRobotClient *hrobot.Client
32+
switch {
33+
case len(username) > 0 && len(password) > 0:
34+
hetznerRobotClient = hrobot.NewClient(hrobot.WithBasicAuth(username, password))
35+
36+
case len(apiToken) > 0:
37+
hetznerRobotClient = hrobot.NewClient(hrobot.WithToken(apiToken))
38+
39+
default:
40+
log.Fatalf("Either provide username and password / api token as credentials, to communicate with the Hetzner Robot API")
41+
}
42+
43+
/*
44+
A Failover IP is an additional IP that you can switch from one server to another. You can order
45+
it for any Hetzner dedicated root server, and you can switch it to any other Hetzner dedicated
46+
root server, regardless of location.
47+
48+
Switching a failover IP takes between 90 and 110 seconds.
49+
50+
REFERENCE : https://docs.hetzner.com/robot/dedicated-server/ip/failover/.
51+
*/
52+
// Hetzner Robot Failover IP API spec : API REFERENCE : https://robot.hetzner.com/doc/webservice/en.html#failover.
53+
54+
// Get the Failover IP's current active server IP.
55+
failoverIPDetails, _, err := hetznerRobotClient.Failover.GetFailoverIP(ctx, failoverIP)
56+
assert.AssertErrNil(ctx, err, "Failed getting Failover IP details")
57+
58+
activeServerIP := failoverIPDetails.ActiveServerIP
59+
slog.InfoContext(ctx, "Detected active server", slog.String("ip", activeServerIP))
60+
61+
if activeServerIP == nodeIP {
62+
slog.InfoContext(ctx, "Active server IP is already same as the current server IP")
63+
return
64+
}
65+
66+
// Update Failover IP to the current node's IP (the current node, on which this script is
67+
// running)
68+
// NOTE : Contributed :
69+
// https://github.com/floshodan/hrobot-go/commit/700f8ef9fdac565129608b3a50583b4b6564ff34.
70+
_, _, err = hetznerRobotClient.Failover.SwitchFailover(ctx, failoverIP, activeServerIP)
71+
assert.AssertErrNil(ctx, err, "Failed switching Failover IP to the current node IP")
72+
73+
// Wait for the update to complete.
74+
for {
75+
failoverIPDetails, _, err := hetznerRobotClient.Failover.GetFailoverIP(ctx, failoverIP)
76+
assert.AssertErrNil(ctx, err, "Failed getting Failover IP details")
77+
78+
if failoverIPDetails.ActiveServerIP == nodeIP {
79+
slog.InfoContext(ctx, "Successfully updated Failover IP", slog.String("active-server-ip", nodeIP))
80+
break
81+
}
82+
83+
slog.InfoContext(ctx, "Waiting for the Failover IP update to complete. Sleeping for a minute....")
84+
time.Sleep(time.Minute)
85+
}
86+
}

cmd/cluster/bootstrap/aws.go renamed to cmd/kubeaid-bootstrap-script/cluster/bootstrap/aws.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ import (
88
)
99

1010
var AWSCmd = &cobra.Command{
11-
Use: "aws",
11+
Use: "aws",
12+
Short: "Bootstrap a self-managed Kubernetes cluster in AWS",
1213
Run: func(cmd *cobra.Command, args []string) {
1314
core.BootstrapCluster(cmd.Context(), skipKubeAidConfigSetup, skipClusterctlMove, aws.NewAWSCloudProvider(), false)
1415
},

cmd/cluster/bootstrap/hetzner.go renamed to cmd/kubeaid-bootstrap-script/cluster/bootstrap/hetzner.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ import (
88
)
99

1010
var HetznerCmd = &cobra.Command{
11-
Use: "hetzner",
11+
Use: "hetzner",
12+
Short: "Bootstrap a self-managed Kubernetes cluster in Hetzner (bare-metal)",
1213
Run: func(cmd *cobra.Command, args []string) {
1314
core.BootstrapCluster(cmd.Context(), skipKubeAidConfigSetup, skipClusterctlMove, hetzner.NewHetznerCloudProvider(), false)
1415
},

cmd/cluster/cluster.go renamed to cmd/kubeaid-bootstrap-script/cluster/cluster.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
package cluster
22

33
import (
4-
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/cluster/bootstrap"
5-
delete_ "github.com/Obmondo/kubeaid-bootstrap-script/cmd/cluster/delete"
4+
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/kubeaid-bootstrap-script/cluster/bootstrap"
5+
delete_ "github.com/Obmondo/kubeaid-bootstrap-script/cmd/kubeaid-bootstrap-script/cluster/delete"
66
"github.com/Obmondo/kubeaid-bootstrap-script/config"
77
"github.com/Obmondo/kubeaid-bootstrap-script/utils"
88
"github.com/spf13/cobra"

cmd/cluster/delete/delete.go renamed to cmd/kubeaid-bootstrap-script/cluster/delete/delete.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ import (
66
)
77

88
var DeleteCmd = &cobra.Command{
9-
Use: "delete",
9+
Use: "delete",
10+
Short: "Delete a provisioned cluster",
1011
Run: func(cmd *cobra.Command, args []string) {
1112
core.DeleteCluster(cmd.Context())
1213
},

cmd/config/config.go renamed to cmd/kubeaid-bootstrap-script/config/config.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package config
22

33
import (
4-
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/config/generate"
4+
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/kubeaid-bootstrap-script/config/generate"
55
"github.com/Obmondo/kubeaid-bootstrap-script/constants"
66
"github.com/spf13/cobra"
77
)
@@ -13,9 +13,7 @@ var ConfigCmd = &cobra.Command{
1313
},
1414
}
1515

16-
var (
17-
ConfigFilePath string
18-
)
16+
var ConfigFilePath string
1917

2018
func init() {
2119
// Subcommands.

cmd/main.go renamed to cmd/kubeaid-bootstrap-script/main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ import (
44
"log/slog"
55
"os"
66

7-
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/cluster"
8-
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/config"
7+
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/kubeaid-bootstrap-script/cluster"
8+
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/kubeaid-bootstrap-script/config"
99
"github.com/Obmondo/kubeaid-bootstrap-script/constants"
1010
"github.com/Obmondo/kubeaid-bootstrap-script/utils/logger"
1111
"github.com/spf13/cobra"

0 commit comments

Comments
 (0)