From b6a8a7f6357388167762adf7f04150c08b7b6b22 Mon Sep 17 00:00:00 2001 From: Ramon Quitales Date: Thu, 18 Jan 2024 15:30:17 -0800 Subject: [PATCH] Fix deployment await logic for accurate rollout detection (#2756) ### Proposed changes This pull request enhances the deployment await logic to accurately identify whether a deployment has triggered a rollout. In the previous implementation, the logic relied on flagging a rollout if the pod template spec had changed. However, this approach proved inaccurate, as instances arose where a pod template spec update did not necessarily result in a replicaset rollout. Specifically, when users explicitly defined a default field, the await logic would run until timeout, erroneously reporting that the deployment failed to complete. The revised logic now assesses whether the new deployment revision has been incremented compared to the previous deployment. #### Changes made: - Updated logic for determining Deployment rollout triggers - Added unit tests - Addressed data race conditions in existing unit tests - Added additional step to existing e2e test case to validate CUJ is fixed (test fails on older provider version, but passes on new version) ### Related issues (optional) Fixes: #2662 --- CHANGELOG.md | 2 + provider/pkg/await/deployment.go | 16 +- provider/pkg/await/deployment_test.go | 186 ++++++++++++++++-- provider/pkg/await/statefulset_test.go | 15 +- .../nodejs/deployment-rollout/step3/index.ts | 39 ++++ tests/sdk/nodejs/nodejs_test.go | 32 +++ 6 files changed, 256 insertions(+), 34 deletions(-) create mode 100644 tests/sdk/nodejs/deployment-rollout/step3/index.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index dee54d117f..812378f662 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ ## Unreleased +- Fix deployment await logic for accurate rollout detection + ## 4.7.0 (January 17, 2024) - Fix JSON encoding of KubeVersion and Version on Chart resource (.NET SDK) (https://github.com/pulumi/pulumi-kubernetes/pull/2740) - Fix option propagation in component resources (Python SDK) (https://github.com/pulumi/pulumi-kubernetes/pull/2717) diff --git a/provider/pkg/await/deployment.go b/provider/pkg/await/deployment.go index ee55a7f7e0..8ea3b10f5e 100644 --- a/provider/pkg/await/deployment.go +++ b/provider/pkg/await/deployment.go @@ -685,23 +685,15 @@ func (dia *deploymentInitAwaiter) checkReplicaSetStatus() { } } +// changeTriggeredRollout returns true if the current deployment has a different revision than the last deployment. +// This is used to determine whether the deployment is rolling out a new revision, which in turn, creates/updates a +// replica set. func (dia *deploymentInitAwaiter) changeTriggeredRollout() bool { if dia.config.lastInputs == nil { return true } - fields, err := openapi.PropertiesChanged( - dia.config.lastInputs.Object, dia.config.currentInputs.Object, - []string{ - ".spec.template.spec", - }) - if err != nil { - logger.V(3).Infof("Failed to check whether Pod template for Deployment %q changed", - dia.config.currentInputs.GetName()) - return false - } - - return len(fields) > 0 + return dia.deployment.GetAnnotations()[revision] != dia.config.lastOutputs.GetAnnotations()[revision] } func (dia *deploymentInitAwaiter) checkPersistentVolumeClaimStatus() { diff --git a/provider/pkg/await/deployment_test.go b/provider/pkg/await/deployment_test.go index ee9c3c539c..81dde01f87 100644 --- a/provider/pkg/await/deployment_test.go +++ b/provider/pkg/await/deployment_test.go @@ -702,29 +702,26 @@ func Test_Apps_Deployment_Without_PersistentVolumeClaims(t *testing.T) { } } -type setLastInputs func(obj *unstructured.Unstructured) - func Test_Apps_Deployment_MultipleUpdates(t *testing.T) { tests := []struct { - description string - inputs func() *unstructured.Unstructured - firstUpdate func(deployments, replicaSets, pods chan watch.Event, timeout chan time.Time, - setLast setLastInputs) + description string + inputs func() *unstructured.Unstructured + outputs func() *unstructured.Unstructured + firstUpdate func(deployments, replicaSets, pods chan watch.Event, timeout chan time.Time) secondUpdate func(deployments, replicaSets, pods chan watch.Event, timeout chan time.Time) expectedError error }{ { description: "Should succeed if replicas are scaled", inputs: regressionDeploymentScaled3Input, + outputs: regressionDeploymentScaled3Output, firstUpdate: func( deployments, replicaSets, pods chan watch.Event, timeout chan time.Time, - setLast setLastInputs, ) { computed := regressionDeploymentScaled3() deployments <- watchAddedEvent(computed) replicaSets <- watchAddedEvent(regressionReplicaSetScaled3()) - setLast(regressionDeploymentScaled3Input()) // Timeout. Success. timeout <- time.Now() }, @@ -732,6 +729,28 @@ func Test_Apps_Deployment_MultipleUpdates(t *testing.T) { deployments <- watchAddedEvent(regressionDeploymentScaled5()) replicaSets <- watchAddedEvent(regressionReplicaSetScaled5()) + // Timeout. Success. + timeout <- time.Now() + }, + }, + { + description: "Should succeed if deployment spec has a no-op replicaset change that doesn't trigger a rollout", + inputs: regressionDeploymentScaled3Input, + outputs: regressionDeploymentScaled3Output, + firstUpdate: func( + deployments, replicaSets, pods chan watch.Event, timeout chan time.Time, + ) { + computed := regressionDeploymentScaled3() + deployments <- watchAddedEvent(computed) + replicaSets <- watchAddedEvent(regressionReplicaSetScaled3()) + + // Timeout. Success. + timeout <- time.Now() + }, + secondUpdate: func(deployments, replicaSets, pods chan watch.Event, timeout chan time.Time) { + deployments <- watchAddedEvent(regressionDeploymentScaled3ExplicitDefault()) + replicaSets <- watchAddedEvent(regressionReplicaSetScaled3()) // ReplicaSet is still the same as previous step. + // Timeout. Success. timeout <- time.Now() }, @@ -742,6 +761,8 @@ func Test_Apps_Deployment_MultipleUpdates(t *testing.T) { awaiter := makeDeploymentInitAwaiter( updateAwaitConfig{ createAwaitConfig: mockAwaitConfig(test.inputs()), + lastOutputs: test.outputs(), + lastInputs: test.inputs(), }) deployments := make(chan watch.Event) replicaSets := make(chan watch.Event) @@ -750,10 +771,7 @@ func Test_Apps_Deployment_MultipleUpdates(t *testing.T) { timeout := make(chan time.Time) period := make(chan time.Time) - go test.firstUpdate(deployments, replicaSets, pods, timeout, - func(obj *unstructured.Unstructured) { - awaiter.config.lastInputs = obj - }) + go test.firstUpdate(deployments, replicaSets, pods, timeout) err := awaiter.await(deployments, replicaSets, pods, pvcs, timeout, period) assert.Nil(t, err, test.description) @@ -2150,6 +2168,38 @@ func regressionDeploymentScaled3Input() *unstructured.Unstructured { return obj } +func regressionDeploymentScaled3Output() *unstructured.Unstructured { + obj, err := decodeUnstructured(`{ + "apiVersion": "apps/v1", + "kind": "Deployment", + "metadata": { + "name": "frontend-ur1fwk62", + "namespace": "default", + "annotations": { + "deployment.kubernetes.io/revision": "1" + } + }, + "spec": { + "selector": { "matchLabels": { "app": "frontend" } }, + "replicas": 3, + "template": { + "metadata": { "labels": { "app": "frontend" } }, + "spec": { "containers": [{ + "name": "php-redis", + "image": "us-docker.pkg.dev/google-samples/containers/gke/gb-frontend:v5", + "resources": { "requests": { "cpu": "100m", "memory": "100Mi" } }, + "env": [{ "name": "GET_HOSTS_FROM", "value": "dns" }], + "ports": [{ "containerPort": 80 }] + }] } + } + } +}`) + if err != nil { + panic(err) + } + return obj +} + func regressionDeploymentScaled3() *unstructured.Unstructured { obj, err := decodeUnstructured(`{ "apiVersion": "apps/v1", @@ -2262,6 +2312,118 @@ func regressionDeploymentScaled3() *unstructured.Unstructured { return obj } +func regressionDeploymentScaled3ExplicitDefault() *unstructured.Unstructured { + obj, err := decodeUnstructured(`{ + "apiVersion": "apps/v1", + "kind": "Deployment", + "metadata": { + "annotations": { + "deployment.kubernetes.io/revision": "1", + "pulumi.com/autonamed": "true" + }, + "creationTimestamp": "2018-08-21T21:55:11Z", + "generation": 2, + "labels": { + "app": "frontend" + }, + "name": "frontend-ur1fwk62", + "namespace": "default", + "resourceVersion": "917821", + "selfLink": "/apis/apps/v1/namespaces/default/deployments/frontend-ur1fwk62", + "uid": "e0a51d3c-a58c-11e8-8cb4-080027bd9056" + }, + "spec": { + "progressDeadlineSeconds": 600, + "replicas": 3, + "revisionHistoryLimit": 2, + "selector": { + "matchLabels": { + "app": "frontend" + } + }, + "strategy": { + "rollingUpdate": { + "maxSurge": "25%", + "maxUnavailable": "25%" + }, + "type": "RollingUpdate" + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "app": "frontend" + } + }, + "spec": { + "containers": [ + { + "env": [ + { + "name": "GET_HOSTS_FROM", + "value": "dns" + } + ], + "image": "us-docker.pkg.dev/google-samples/containers/gke/gb-frontend:v5", + "imagePullPolicy": "IfNotPresent", + "name": "php-redis", + "ports": [ + { + "containerPort": 80, + "protocol": "TCP" + } + ], + "resources": { + "requests": { + "cpu": "100m", + "memory": "100Mi" + } + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File" + } + ], + "dnsPolicy": "ClusterFirst", + "restartPolicy": "Always", + "schedulerName": "default-scheduler", + "securityContext": {}, + "terminationGracePeriodSeconds": 30 + } + } + }, + "status": { + "availableReplicas": 3, + "conditions": [ + { + "lastTransitionTime": "2018-08-21T21:55:16Z", + "lastUpdateTime": "2018-08-21T21:55:16Z", + "message": "Deployment has minimum availability.", + "reason": "MinimumReplicasAvailable", + "status": "True", + "type": "Available" + }, + { + "lastTransitionTime": "2018-08-21T21:55:11Z", + "lastUpdateTime": "2018-08-21T21:55:16Z", + "message": "ReplicaSet \"frontend-ur1fwk62-777d669468\" has successfully progressed.", + "reason": "NewReplicaSetAvailable", + "status": "True", + "type": "Progressing" + } + ], + "observedGeneration": 2, + "readyReplicas": 3, + "replicas": 3, + "updatedReplicas": 3 + } +}`) + + if err != nil { + panic(err) + } + return obj +} + func regressionDeploymentScaled5() *unstructured.Unstructured { obj, err := decodeUnstructured(`{ "apiVersion": "apps/v1", diff --git a/provider/pkg/await/statefulset_test.go b/provider/pkg/await/statefulset_test.go index 677a988f76..e38b473a3c 100644 --- a/provider/pkg/await/statefulset_test.go +++ b/provider/pkg/await/statefulset_test.go @@ -263,10 +263,9 @@ func Test_Apps_StatefulSet(t *testing.T) { func Test_Apps_StatefulSet_MultipleUpdates(t *testing.T) { tests := []struct { - description string - inputs func() *unstructured.Unstructured - firstUpdate func(statefulsets, pods chan watch.Event, timeout chan time.Time, - setLast setLastInputs) + description string + inputs func() *unstructured.Unstructured + firstUpdate func(statefulsets, pods chan watch.Event, timeout chan time.Time) secondUpdate func(statefulsets, pods chan watch.Event, timeout chan time.Time) firstExpectedError error secondExpectedError error @@ -276,11 +275,9 @@ func Test_Apps_StatefulSet_MultipleUpdates(t *testing.T) { inputs: statefulsetFailed, firstUpdate: func( statefulsets, pods chan watch.Event, timeout chan time.Time, - setLast setLastInputs, ) { statefulsets <- watchAddedEvent(statefulsetFailed()) - setLast(statefulsetFailed()) // Timeout. Failed. timeout <- time.Now() }, @@ -303,16 +300,14 @@ func Test_Apps_StatefulSet_MultipleUpdates(t *testing.T) { awaiter := makeStatefulSetInitAwaiter( updateAwaitConfig{ createAwaitConfig: mockAwaitConfig(test.inputs()), + lastInputs: statefulsetFailed(), }) statefulsets := make(chan watch.Event) pods := make(chan watch.Event) timeout := make(chan time.Time) period := make(chan time.Time) - go test.firstUpdate(statefulsets, pods, timeout, - func(obj *unstructured.Unstructured) { - awaiter.config.lastInputs = obj - }) + go test.firstUpdate(statefulsets, pods, timeout) err := awaiter.await(statefulsets, pods, timeout, period) assert.Equal(t, test.firstExpectedError, err, test.description) diff --git a/tests/sdk/nodejs/deployment-rollout/step3/index.ts b/tests/sdk/nodejs/deployment-rollout/step3/index.ts new file mode 100644 index 0000000000..40b4ec0267 --- /dev/null +++ b/tests/sdk/nodejs/deployment-rollout/step3/index.ts @@ -0,0 +1,39 @@ +// Copyright 2016-2024, Pulumi Corporation. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import * as k8s from "@pulumi/kubernetes"; + +export const namespace = new k8s.core.v1.Namespace("test-namespace"); + +const appLabels = { app: "nginx" }; +new k8s.apps.v1.Deployment("nginx", { + metadata: { namespace: namespace.metadata.name }, + spec: { + selector: { matchLabels: appLabels }, + replicas: 1, + template: { + metadata: { labels: appLabels }, + spec: { + containers: [ + { + name: "nginx", + image: "nginx:stable", + ports: [{ containerPort: 80 }], + }, + ], + schedulerName: "default-scheduler", // No-op update that doesn't result in a new replica set rollout. + }, + }, + }, +}); diff --git a/tests/sdk/nodejs/nodejs_test.go b/tests/sdk/nodejs/nodejs_test.go index 088c632f92..1da97ca911 100644 --- a/tests/sdk/nodejs/nodejs_test.go +++ b/tests/sdk/nodejs/nodejs_test.go @@ -456,6 +456,38 @@ func TestDeploymentRollout(t *testing.T) { // Assert deployment is updated successfully. // + name, _ := openapi.Pluck(appsv1Deploy.Outputs, "metadata", "name") + assert.True(t, strings.Contains(name.(string), "nginx")) + containers, _ := openapi.Pluck(appsv1Deploy.Outputs, "spec", "template", "spec", "containers") + containerStatus := containers.([]any)[0].(map[string]any) + image := containerStatus["image"] + assert.Equal(t, image.(string), "nginx:stable") + }, + }, + { + // This is a deployment spec update that causes a no-op replica set update. + Dir: filepath.Join("deployment-rollout", "step3"), + Additive: true, + ExtraRuntimeValidation: func(t *testing.T, stackInfo integration.RuntimeValidationStackInfo) { + assert.NotNil(t, stackInfo.Deployment) + assert.Equal(t, 4, len(stackInfo.Deployment.Resources)) + + tests.SortResourcesByURN(stackInfo) + + appsv1Deploy := stackInfo.Deployment.Resources[0] + namespace := stackInfo.Deployment.Resources[1] + provRes := stackInfo.Deployment.Resources[2] + stackRes := stackInfo.Deployment.Resources[3] + + assert.Equal(t, resource.RootStackType, stackRes.URN.Type()) + assert.True(t, providers.IsProviderType(provRes.URN.Type())) + + assert.Equal(t, tokens.Type("kubernetes:core/v1:Namespace"), namespace.URN.Type()) + + // + // Assert deployment is updated successfully. + // + name, _ := openapi.Pluck(appsv1Deploy.Outputs, "metadata", "name") assert.True(t, strings.Contains(name.(string), "nginx")) containers, _ := openapi.Pluck(appsv1Deploy.Outputs, "spec", "template", "spec", "containers")