Skip to content

Commit

Permalink
Merge pull request #322 from jaxesn/jgw/capture-boot-log
Browse files Browse the repository at this point in the history
Capture EC2 serial console output of nodes
  • Loading branch information
g-gaston authored Feb 7, 2025
2 parents 735f9be + b68ba10 commit 126cf65
Show file tree
Hide file tree
Showing 12 changed files with 588 additions and 62 deletions.
6 changes: 5 additions & 1 deletion buildspecs/test-nodeadm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,14 @@ phases:
build:
commands:
- SANITIZED_CODEBUILD_BUILD_ID=$(echo $CODEBUILD_BUILD_ID | tr ':' '-')
- ./hack/run-e2e.sh $SANITIZED_CODEBUILD_BUILD_ID $AWS_REGION $KUBERNETES_VERSION $CNI s3://$ARTIFACTS_BUCKET/latest-pre/linux/amd64/nodeadm s3://$ARTIFACTS_BUCKET/latest-pre/linux/arm64/nodeadm $LOGS_BUCKET $ENDPOINT
- ./hack/run-e2e.sh $SANITIZED_CODEBUILD_BUILD_ID $AWS_REGION $KUBERNETES_VERSION $CNI s3://$ARTIFACTS_BUCKET/latest-pre/linux/amd64/nodeadm s3://$ARTIFACTS_BUCKET/latest-pre/linux/arm64/nodeadm $LOGS_BUCKET e2e-artifacts $ENDPOINT

reports:
e2e-reports:
files:
- e2e-reports/junit-nodeadm.xml
file-format: "JUNITXML"

artifacts:
files:
- "e2e-artifacts/*"
6 changes: 4 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.27
github.com/aws/aws-sdk-go-v2/service/cloudformation v1.56.10
github.com/aws/aws-sdk-go-v2/service/ec2 v1.202.2
github.com/aws/aws-sdk-go-v2/service/ec2instanceconnect v1.27.12
github.com/aws/aws-sdk-go-v2/service/ecr v1.40.1
github.com/aws/aws-sdk-go-v2/service/eks v1.57.2
github.com/aws/aws-sdk-go-v2/service/iam v1.38.10
Expand All @@ -27,6 +28,7 @@ require (
github.com/stretchr/testify v1.10.0
github.com/tredoe/osutil v1.5.0
go.uber.org/zap v1.27.0
golang.org/x/crypto v0.31.0
golang.org/x/mod v0.22.0
k8s.io/apimachinery v0.32.1
k8s.io/client-go v0.32.1
Expand Down Expand Up @@ -119,8 +121,8 @@ require (
golang.org/x/net v0.33.0 // indirect
golang.org/x/oauth2 v0.23.0 // indirect
golang.org/x/sync v0.10.0 // indirect
golang.org/x/sys v0.28.0 // indirect
golang.org/x/term v0.27.0 // indirect
golang.org/x/sys v0.29.0 // indirect
golang.org/x/term v0.28.0 // indirect
golang.org/x/text v0.21.0 // indirect
golang.org/x/time v0.7.0 // indirect
golang.org/x/tools v0.28.0 // indirect
Expand Down
12 changes: 8 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ github.com/aws/aws-sdk-go-v2/service/cloudformation v1.56.10 h1:5asGyAEAXc1A5FWC
github.com/aws/aws-sdk-go-v2/service/cloudformation v1.56.10/go.mod h1:synt0SxqCGxfepb1DUAMK7TxtRvucnmk80qXbp9Cfcs=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.202.2 h1:qas57zkkMX8OM+MVz+4sMaOaD9HRmeFJRb8nzMdYkx0=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.202.2/go.mod h1:2omfxRebtpbbFqQGqeurDzlyB7Txa2e1xe9rCDFqlwA=
github.com/aws/aws-sdk-go-v2/service/ec2instanceconnect v1.27.12 h1:UK36TdbN1GB8FwZYTT0ZPlsg7ZqVpMHMcee0b6BYpSY=
github.com/aws/aws-sdk-go-v2/service/ec2instanceconnect v1.27.12/go.mod h1:qvHbeq93l0rsTolEGpOZ12yHcVQ/yfHiqVEmh8AVw1U=
github.com/aws/aws-sdk-go-v2/service/ecr v1.40.1 h1:1C4JhM1VGzObZBPY0t+ymBRvF9BuCFToJ/j8RaW/9p8=
github.com/aws/aws-sdk-go-v2/service/ecr v1.40.1/go.mod h1:XGy5jWOLlZK6nN3NWkoux6vN0QfaEIB91xbPCiX8gao=
github.com/aws/aws-sdk-go-v2/service/eks v1.57.2 h1:Uxm6iUIEaRtyvcp8Gj45viJmM2KksMLNBRCd8DBxuJA=
Expand Down Expand Up @@ -237,6 +239,8 @@ go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4=
Expand All @@ -258,10 +262,10 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q=
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg=
golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
Expand Down
12 changes: 8 additions & 4 deletions hack/run-e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ KUBERNETES_VERSION="${3?Please specify the Kubernetes version}"
CNI="${4?Please specify the cni}"
NODEADM_AMD_URL="${5?Please specify the nodeadm amd url}"
NODEADM_ARM_URL="${6?Please specify the nodeadm arm url}"
LOGS_BUCKET="${7-}"
ENDPOINT="${8-}"
LOGS_BUCKET="${7-?Please specify the bucket for logs}"
ARTIFACTS_FOLDER="${8-?Please specify the folder for artifacts}"
ENDPOINT="${9-}"

CONFIG_DIR="$REPO_ROOT/e2e-config"
ARCH="$([ "x86_64" = "$(uname -m)" ] && echo amd64 || echo arm64)"
Expand Down Expand Up @@ -65,22 +66,25 @@ trap "cleanup" EXIT

$BIN_DIR/e2e-test setup -s $RESOURCES_YAML

mkdir -p e2e-reports
mkdir -p "$ARTIFACTS_FOLDER"
ARTIFACTS_FOLDER=$(realpath "$ARTIFACTS_FOLDER")

cat <<EOF > $CONFIG_DIR/e2e-param.yaml
clusterName: "$CLUSTER_NAME"
clusterRegion: "$REGION"
nodeadmUrlAMD: "$NODEADM_AMD_URL"
nodeadmUrlARM: "$NODEADM_ARM_URL"
logsBucket: "$LOGS_BUCKET"
endpoint: "$ENDPOINT"
artifactsFolder: "$ARTIFACTS_FOLDER"
EOF


SKIP_FILE=$REPO_ROOT/hack/SKIPPED_TESTS.yaml
# Extract skipped_tests field from SKIP_FILE file and join entries with ' || '
skip=$(yq '.skipped_tests | join("|")' ${SKIP_FILE})

mkdir -p e2e-reports

# We explicitly specify procs instead of letting ginkgo decide (with -p) because in if not
# ginkgo will use all available CPUs, which could be a small number depending
# on how the CI runner has been configured. However, even if only one CPU is available,
Expand Down
1 change: 1 addition & 0 deletions hybrid-nodes-cdk/lib/nodeadm-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ export class NodeadmBuildStack extends cdk.Stack {
'ec2:RebootInstances',
'ec2:StopInstances',
'ec2:TerminateInstances',
'ec2-instance-connect:SendSerialConsoleSSHPublicKey',
],
resources: ['*'],
effect: iam.Effect.ALLOW,
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/ec2/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ func DeleteEC2Instance(ctx context.Context, client *ec2.Client, instanceID strin

func WaitForEC2InstanceRunning(ctx context.Context, ec2Client *ec2.Client, instanceID string) error {
waiter := ec2.NewInstanceRunningWaiter(ec2Client, func(isowo *ec2.InstanceRunningWaiterOptions) {
isowo.LogWaitAttempts = true
isowo.MinDelay = 1 * time.Second
})
return waiter.Wait(ctx, &ec2.DescribeInstancesInput{InstanceIds: []string{instanceID}}, nodeRunningTimeout)
}
Expand Down
48 changes: 30 additions & 18 deletions test/e2e/kubernetes/verify.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"strings"

"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
clientgo "k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
)
Expand All @@ -24,32 +25,43 @@ type VerifyNode struct {
NodeIPAddress string
}

func (t VerifyNode) Run(ctx context.Context) error {
func (v VerifyNode) WaitForNodeReady(ctx context.Context) (*corev1.Node, error) {
// get the hybrid node registered using nodeadm by the internal IP of an EC2 Instance
node, err := WaitForNode(ctx, t.K8s, t.NodeIPAddress, t.Logger)
node, err := WaitForNode(ctx, v.K8s, v.NodeIPAddress, v.Logger)
if err != nil {
return err
return nil, err
}
if node == nil {
return fmt.Errorf("returned node is nil")
return nil, fmt.Errorf("returned node is nil")
}

nodeName := node.Name

t.Logger.Info("Waiting for hybrid node to be ready...")
if err = WaitForHybridNodeToBeReady(ctx, t.K8s, nodeName, t.Logger); err != nil {
v.Logger.Info("Waiting for hybrid node to be ready...")
if err = WaitForHybridNodeToBeReady(ctx, v.K8s, nodeName, v.Logger); err != nil {
return nil, err
}

return node, nil
}

func (v VerifyNode) Run(ctx context.Context) error {
node, err := v.WaitForNodeReady(ctx)
if err != nil {
return err
}

t.Logger.Info("Creating a test pod on the hybrid node...")
nodeName := node.Name

v.Logger.Info("Creating a test pod on the hybrid node...")
podName := GetNginxPodName(nodeName)
if err = CreateNginxPodInNode(ctx, t.K8s, nodeName, testPodNamespace, t.Region, t.Logger); err != nil {
if err = CreateNginxPodInNode(ctx, v.K8s, nodeName, testPodNamespace, v.Region, v.Logger); err != nil {
return err
}
t.Logger.Info(fmt.Sprintf("Pod %s created and running on node %s", podName, nodeName))
v.Logger.Info(fmt.Sprintf("Pod %s created and running on node %s", podName, nodeName))

t.Logger.Info("Exec-ing nginx -version", "pod", podName)
stdout, stderr, err := ExecPodWithRetries(ctx, t.ClientConfig, t.K8s, podName, testPodNamespace, "/sbin/nginx", "-version")
v.Logger.Info("Exec-ing nginx -version", "pod", podName)
stdout, stderr, err := ExecPodWithRetries(ctx, v.ClientConfig, v.K8s, podName, testPodNamespace, "/sbin/nginx", "-version")
if err != nil {
return err
}
Expand All @@ -59,23 +71,23 @@ func (t VerifyNode) Run(ctx context.Context) error {
if stderr != "" {
return fmt.Errorf("pod exec stderr should be empty %s", stderr)
}
t.Logger.Info("Successfully exec'd nginx -version", "pod", podName)
v.Logger.Info("Successfully exec'd nginx -version", "pod", podName)

t.Logger.Info("Checking logs for nginx output", "pod", podName)
logs, err := GetPodLogsWithRetries(ctx, t.K8s, podName, testPodNamespace)
v.Logger.Info("Checking logs for nginx output", "pod", podName)
logs, err := GetPodLogsWithRetries(ctx, v.K8s, podName, testPodNamespace)
if err != nil {
return err
}
if !strings.Contains(logs, "nginx") {
return fmt.Errorf("pod log does not contain expected value %s: %s", logs, "nginx")
}
t.Logger.Info("Successfully validated log output", "pod", podName)
v.Logger.Info("Successfully validated log output", "pod", podName)

t.Logger.Info("Deleting test pod", "pod", podName)
if err = DeletePod(ctx, t.K8s, podName, testPodNamespace); err != nil {
v.Logger.Info("Deleting test pod", "pod", podName)
if err = DeletePod(ctx, v.K8s, podName, testPodNamespace); err != nil {
return err
}
t.Logger.Info("Pod deleted successfully", "pod", podName)
v.Logger.Info("Pod deleted successfully", "pod", podName)

return nil
}
118 changes: 117 additions & 1 deletion test/e2e/logger.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package e2e

import (
"bytes"
"io"
"os"
"sync"

"github.com/go-logr/logr"
"github.com/go-logr/zapr"
Expand All @@ -11,10 +14,14 @@ import (

type LoggerConfig struct {
NoColor bool
syncer zapcore.WriteSyncer
}

func (c LoggerConfig) Apply(opts *LoggerConfig) {
opts.NoColor = c.NoColor
if c.syncer != nil {
opts.syncer = c.syncer
}
}

type LoggerOption interface {
Expand All @@ -27,13 +34,122 @@ func NewLogger(opts ...LoggerOption) logr.Logger {
opt.Apply(cfg)
}

if cfg.syncer == nil {
cfg.syncer = zapcore.AddSync(os.Stdout)
}

encoderCfg := zap.NewDevelopmentEncoderConfig()
if !cfg.NoColor {
encoderCfg.EncodeLevel = zapcore.CapitalColorLevelEncoder
}

consoleEncoder := zapcore.NewConsoleEncoder(encoderCfg)
core := zapcore.NewCore(consoleEncoder, zapcore.AddSync(os.Stdout), zapcore.DebugLevel)
core := zapcore.NewCore(consoleEncoder, cfg.syncer, zapcore.DebugLevel)
log := zap.New(core)

return zapr.NewLogger(log)
}

// NewPausableLogger returns a logger that can be paused and resumed.
func NewPausableLogger(opts ...LoggerOption) PausableLogger {
cfg := &LoggerConfig{}
for _, opt := range opts {
opt.Apply(cfg)
}

activeSyncer := zapcore.AddSync(os.Stdout)
syncer := newSwitchSyncer(activeSyncer)
cfg.syncer = syncer

return PausableLogger{
Logger: NewLogger(cfg),
syncer: syncer,
}
}

// PausableLogger can be paused and resumed. It wraps a logr.Logger.
// When paused, writes go to a buffer; when resumed, writes go to stdout.
// After it's resumed, the buffer is flushed to stdout.
type PausableLogger struct {
logr.Logger
syncer *switchSyncer
}

func (l PausableLogger) Pause() {
l.syncer.Pause()
}

func (l PausableLogger) Resume() error {
return l.syncer.Resume()
}

// switchSyncer implements zapcore.WriteSyncer.
// When paused, writes go to a buffer; when resumed, writes go to the active writer.
type switchSyncer struct {
*SwitchWriter
active zapcore.WriteSyncer // normally stdout
}

func newSwitchSyncer(active zapcore.WriteSyncer) *switchSyncer {
return &switchSyncer{
active: active,
SwitchWriter: NewSwitchWriter(active),
}
}

func (s *switchSyncer) Sync() error {
s.mu.Lock()
defer s.mu.Unlock()
if s.paused {
return nil
}
return s.active.Sync()
}

// SwitchWriter implements io.Writer.
// When paused, writes go to a buffer; when resumed, writes go to the active writer.
type SwitchWriter struct {
mu sync.Mutex
active io.Writer // actual writer where we want to output
buffer *bytes.Buffer // holds bytes while paused
paused bool
}

var _ io.Writer = &SwitchWriter{}

func NewSwitchWriter(active io.Writer) *SwitchWriter {
return &SwitchWriter{
active: active,
buffer: &bytes.Buffer{},
}
}

func (s *SwitchWriter) Write(p []byte) (int, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.paused {
return s.buffer.Write(p)
}
return s.active.Write(p)
}

// Pause causes subsequent writes to be buffered.
func (s *SwitchWriter) Pause() {
s.mu.Lock()
s.paused = true
s.mu.Unlock()
}

// Resume flushes the buffer to the active writer and resumes normal output.
func (s *SwitchWriter) Resume() error {
s.mu.Lock()
defer s.mu.Unlock()
if s.buffer.Len() > 0 {
if _, err := s.active.Write(s.buffer.Bytes()); err != nil {
return err
}
s.buffer.Reset()
}
s.paused = false
return nil
}
Loading

0 comments on commit 126cf65

Please sign in to comment.