Skip to content

Commit 6d80452

Browse files
authored
Only Delete Deployments of NotRegistered versions if TemporalState is non-empty (#147)
<!--- Note to EXTERNAL Contributors --> <!-- Thanks for opening a PR! If it is a significant code change, please **make sure there is an open issue** for this. We work best with you when we have accepted the idea first before you code. --> <!--- For ALL Contributors 👇 --> ## What was changed? Only Delete Deployments of NotRegistered versions if TemporalState is non-empty ## Why? TemporalState can be empty on the initial rollout of a Worker Deployment, if no versioned workers have ever polled. In that case we need to continue the reconcile loop to create the worker pods which will poll and create the Worker Deployment. TemporalState can also, rarely, be empty if the server returns a transient NotFound error, which can happen at any time in a Worker Deployment's life. If TemporalState is empty when there are Deprecated Versions around, it is important that we do not take an empty TemporalState to mean that those Deprecated Versions are NotRegistered and should be deleted, because that would cause the controller to delete those Deployments despite the fact that they are actually draining. This change prevents that. ## Checklist <!--- add/delete as needed ---> 1. Closes <!-- add issue number here --> 2. How was this tested: Unit test 3. Any docs updates needed? <!--- update README if applicable or point out where to update docs.temporal.io -->
1 parent 1340184 commit 6d80452

File tree

3 files changed

+59
-15
lines changed

3 files changed

+59
-15
lines changed

internal/planner/planner.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,13 @@ func GeneratePlan(
7575
ScaleDeployments: make(map[*corev1.ObjectReference]uint32),
7676
}
7777

78+
// If Deployment was not found in temporal, which always happens on the first worker deployment version
79+
// and sometimes happens transiently thereafter, the versions list will be empty. If the deployment
80+
// exists and was found, there will always be at least one version in the list.
81+
foundDeploymentInTemporal := temporalState != nil && len(temporalState.Versions) > 0
82+
7883
// Add delete/scale operations based on version status
79-
plan.DeleteDeployments = getDeleteDeployments(k8sState, status, spec)
84+
plan.DeleteDeployments = getDeleteDeployments(k8sState, status, spec, foundDeploymentInTemporal)
8085
plan.ScaleDeployments = getScaleDeployments(k8sState, status, spec)
8186
plan.ShouldCreateDeployment = shouldCreateDeployment(status, maxVersionsIneligibleForDeletion)
8287
plan.UpdateDeployments = getUpdateDeployments(k8sState, status, connection)
@@ -191,6 +196,7 @@ func getDeleteDeployments(
191196
k8sState *k8s.DeploymentState,
192197
status *temporaliov1alpha1.TemporalWorkerDeploymentStatus,
193198
spec *temporaliov1alpha1.TemporalWorkerDeploymentSpec,
199+
foundDeploymentInTemporal bool,
194200
) []*appsv1.Deployment {
195201
var deleteDeployments []*appsv1.Deployment
196202

@@ -215,9 +221,11 @@ func getDeleteDeployments(
215221
deleteDeployments = append(deleteDeployments, d)
216222
}
217223
case temporaliov1alpha1.VersionStatusNotRegistered:
218-
// NotRegistered versions are versions that the server doesn't know about.
219-
// Only delete if it's not the target version.
220-
if status.TargetVersion.BuildID != version.BuildID {
224+
// Only delete Deployments of NotRegistered versions if temporalState was not empty
225+
if foundDeploymentInTemporal &&
226+
// NotRegistered versions are versions that the server doesn't know about.
227+
// Only delete if it's not the target version.
228+
status.TargetVersion.BuildID != version.BuildID {
221229
deleteDeployments = append(deleteDeployments, d)
222230
}
223231
}

internal/planner/planner_test.go

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -426,12 +426,13 @@ func TestGeneratePlan(t *testing.T) {
426426

427427
func TestGetDeleteDeployments(t *testing.T) {
428428
testCases := []struct {
429-
name string
430-
k8sState *k8s.DeploymentState
431-
status *temporaliov1alpha1.TemporalWorkerDeploymentStatus
432-
spec *temporaliov1alpha1.TemporalWorkerDeploymentSpec
433-
config *Config
434-
expectDeletes int
429+
name string
430+
k8sState *k8s.DeploymentState
431+
status *temporaliov1alpha1.TemporalWorkerDeploymentStatus
432+
spec *temporaliov1alpha1.TemporalWorkerDeploymentSpec
433+
config *Config
434+
expectDeletes int
435+
foundDeploymentInTemporal bool
435436
}{
436437
{
437438
name: "drained version should be deleted",
@@ -463,7 +464,8 @@ func TestGetDeleteDeployments(t *testing.T) {
463464
config: &Config{
464465
RolloutStrategy: temporaliov1alpha1.RolloutStrategy{},
465466
},
466-
expectDeletes: 1,
467+
expectDeletes: 1,
468+
foundDeploymentInTemporal: true,
467469
},
468470
{
469471
name: "not yet drained long enough",
@@ -492,7 +494,8 @@ func TestGetDeleteDeployments(t *testing.T) {
492494
},
493495
Replicas: func() *int32 { r := int32(1); return &r }(),
494496
},
495-
expectDeletes: 0,
497+
expectDeletes: 0,
498+
foundDeploymentInTemporal: true,
496499
},
497500
{
498501
name: "not registered version should be deleted",
@@ -523,15 +526,48 @@ func TestGetDeleteDeployments(t *testing.T) {
523526
spec: &temporaliov1alpha1.TemporalWorkerDeploymentSpec{
524527
Replicas: func() *int32 { r := int32(1); return &r }(),
525528
},
526-
expectDeletes: 1,
529+
expectDeletes: 1,
530+
foundDeploymentInTemporal: true,
531+
},
532+
{
533+
name: "not registered version should NOT be deleted, deployment not found in temporal",
534+
k8sState: &k8s.DeploymentState{
535+
Deployments: map[string]*appsv1.Deployment{
536+
"123": createDeploymentWithDefaultConnectionSpecHash(1),
537+
"456": createDeploymentWithDefaultConnectionSpecHash(1),
538+
},
539+
},
540+
status: &temporaliov1alpha1.TemporalWorkerDeploymentStatus{
541+
TargetVersion: temporaliov1alpha1.TargetWorkerDeploymentVersion{
542+
BaseWorkerDeploymentVersion: temporaliov1alpha1.BaseWorkerDeploymentVersion{
543+
BuildID: "123",
544+
Status: temporaliov1alpha1.VersionStatusCurrent,
545+
Deployment: &corev1.ObjectReference{Name: "test-123"},
546+
},
547+
},
548+
DeprecatedVersions: []*temporaliov1alpha1.DeprecatedWorkerDeploymentVersion{
549+
{
550+
BaseWorkerDeploymentVersion: temporaliov1alpha1.BaseWorkerDeploymentVersion{
551+
BuildID: "456",
552+
Status: temporaliov1alpha1.VersionStatusNotRegistered,
553+
Deployment: &corev1.ObjectReference{Name: "test-456"},
554+
},
555+
},
556+
},
557+
},
558+
spec: &temporaliov1alpha1.TemporalWorkerDeploymentSpec{
559+
Replicas: func() *int32 { r := int32(1); return &r }(),
560+
},
561+
expectDeletes: 0,
562+
foundDeploymentInTemporal: false,
527563
},
528564
}
529565

530566
for _, tc := range testCases {
531567
t.Run(tc.name, func(t *testing.T) {
532568
err := tc.spec.Default(context.Background())
533569
require.NoError(t, err)
534-
deletes := getDeleteDeployments(tc.k8sState, tc.status, tc.spec)
570+
deletes := getDeleteDeployments(tc.k8sState, tc.status, tc.spec, tc.foundDeploymentInTemporal)
535571
assert.Equal(t, tc.expectDeletes, len(deletes), "unexpected number of deletes")
536572
})
537573
}

internal/temporal/worker_deployment.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ func GetWorkerDeploymentState(
8888
if err != nil {
8989
var notFound *serviceerror.NotFound
9090
if errors.As(err, &notFound) {
91-
// If deployment not found, return empty state
91+
// If deployment not found, return empty state. Need to scale up workers in order to create Deployment Temporal-side
9292
return state, nil
9393
}
9494
return nil, fmt.Errorf("unable to describe worker deployment %s: %w", workerDeploymentName, err)

0 commit comments

Comments
 (0)