diff --git a/CHANGELOG.md b/CHANGELOG.md index 13098cf0e..c6cce1701 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ This changelog keeps track of work items that have been completed and are ready ### New -- **General**: TODO ([#TODO](https://github.com/kedacore/http-add-on/issues/TODO)) +- **General**: Add environment variables for leader election timing configuration ([#1365](https://github.com/kedacore/http-add-on/pull/1365)) ### Improvements diff --git a/docs/operate.md b/docs/operate.md index 266d4f7fe..ec7a838ae 100644 --- a/docs/operate.md +++ b/docs/operate.md @@ -71,3 +71,24 @@ Optional variables `OTEL_EXPORTER_OTLP_TRACES_TIMEOUT` - The batcher timeout in seconds to send batch of data points (`5` by default) ### Configuring Service Failover + +# Configuring the KEDA HTTP Add-on Operator + +## Leader Election Timing + +When running multiple replicas of the operator for high availability, you can configure the leader election timing parameters: + +- **`KEDA_HTTP_OPERATOR_LEADER_ELECTION_LEASE_DURATION`** - Duration that non-leader candidates will wait to force acquire leadership. Default: `15s` (Kubernetes default) +- **`KEDA_HTTP_OPERATOR_LEADER_ELECTION_RENEW_DEADLINE`** - Duration that the acting leader will retry refreshing leadership before giving up. Default: `10s` (Kubernetes default) +- **`KEDA_HTTP_OPERATOR_LEADER_ELECTION_RETRY_PERIOD`** - Duration the LeaderElector clients should wait between tries of actions. Default: `2s` (Kubernetes default) + +Example usage in deployment: +```yaml +env: +- name: KEDA_HTTP_OPERATOR_LEADER_ELECTION_LEASE_DURATION + value: "30s" +- name: KEDA_HTTP_OPERATOR_LEADER_ELECTION_RENEW_DEADLINE + value: "20s" +- name: KEDA_HTTP_OPERATOR_LEADER_ELECTION_RETRY_PERIOD + value: "5s" +``` diff --git a/operator/main.go b/operator/main.go index 4844a268a..f02e40e71 100644 --- a/operator/main.go +++ b/operator/main.go @@ -34,6 +34,7 @@ import ( httpv1alpha1 "github.com/kedacore/http-add-on/operator/apis/http/v1alpha1" httpcontrollers "github.com/kedacore/http-add-on/operator/controllers/http" "github.com/kedacore/http-add-on/operator/controllers/http/config" + "github.com/kedacore/http-add-on/pkg/util" // +kubebuilder:scaffold:imports ) @@ -86,6 +87,29 @@ func main() { os.Exit(1) } + leaseDuration, err := util.ResolveOsEnvDuration("KEDA_HTTP_OPERATOR_LEADER_ELECTION_LEASE_DURATION") + if err != nil { + setupLog.Error(err, "invalid KEDA_HTTP_OPERATOR_LEADER_ELECTION_LEASE_DURATION") + os.Exit(1) + } + + renewDeadline, err := util.ResolveOsEnvDuration("KEDA_HTTP_OPERATOR_LEADER_ELECTION_RENEW_DEADLINE") + if err != nil { + setupLog.Error(err, "invalid KEDA_HTTP_OPERATOR_LEADER_ELECTION_RENEW_DEADLINE") + os.Exit(1) + } + + retryPeriod, err := util.ResolveOsEnvDuration("KEDA_HTTP_OPERATOR_LEADER_ELECTION_RETRY_PERIOD") + if err != nil { + setupLog.Error(err, "invalid KEDA_HTTP_OPERATOR_LEADER_ELECTION_RETRY_PERIOD") + os.Exit(1) + } + + if err := util.ValidateLeaderElectionConfig(leaseDuration, renewDeadline, retryPeriod); err != nil { + setupLog.Error(err, "invalid leader election configuration") + os.Exit(1) + } + var namespaces map[string]cache.Config if baseConfig.WatchNamespace != "" { namespaces = map[string]cache.Config{ @@ -103,6 +127,9 @@ func main() { LeaderElection: enableLeaderElection, LeaderElectionID: "http-add-on.keda.sh", LeaderElectionReleaseOnCancel: true, + LeaseDuration: leaseDuration, + RenewDeadline: renewDeadline, + RetryPeriod: retryPeriod, Cache: cache.Options{ DefaultNamespaces: namespaces, }, diff --git a/operator/main_test.go b/operator/main_test.go new file mode 100644 index 000000000..1cd7fbbfc --- /dev/null +++ b/operator/main_test.go @@ -0,0 +1,128 @@ +/* +Copyright 2025 The KEDA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "testing" + "time" + + "github.com/kedacore/http-add-on/pkg/util" + "github.com/stretchr/testify/assert" +) + +func TestLeaderElectionEnvVarsIntegration(t *testing.T) { + tests := []struct { + name string + envVars map[string]string + expectedLease *time.Duration + expectedRenew *time.Duration + expectedRetry *time.Duration + expectError bool + }{ + { + name: "all environment variables set with valid values", + envVars: map[string]string{ + "KEDA_HTTP_OPERATOR_LEADER_ELECTION_LEASE_DURATION": "30s", + "KEDA_HTTP_OPERATOR_LEADER_ELECTION_RENEW_DEADLINE": "20s", + "KEDA_HTTP_OPERATOR_LEADER_ELECTION_RETRY_PERIOD": "5s", + }, + expectedLease: durationPtr(30 * time.Second), + expectedRenew: durationPtr(20 * time.Second), + expectedRetry: durationPtr(5 * time.Second), + expectError: false, + }, + { + name: "no environment variables set - should return nil for defaults", + envVars: map[string]string{}, + expectedLease: nil, + expectedRenew: nil, + expectedRetry: nil, + expectError: false, + }, + { + name: "invalid lease duration", + envVars: map[string]string{ + "KEDA_HTTP_OPERATOR_LEADER_ELECTION_LEASE_DURATION": "invalid", + }, + expectError: true, + }, + { + name: "invalid renew deadline", + envVars: map[string]string{ + "KEDA_HTTP_OPERATOR_LEADER_ELECTION_RENEW_DEADLINE": "not-a-duration", + }, + expectError: true, + }, + { + name: "invalid retry period", + envVars: map[string]string{ + "KEDA_HTTP_OPERATOR_LEADER_ELECTION_RETRY_PERIOD": "xyz", + }, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + for key, value := range tt.envVars { + t.Setenv(key, value) + } + + leaseDuration, leaseErr := util.ResolveOsEnvDuration("KEDA_HTTP_OPERATOR_LEADER_ELECTION_LEASE_DURATION") + renewDeadline, renewErr := util.ResolveOsEnvDuration("KEDA_HTTP_OPERATOR_LEADER_ELECTION_RENEW_DEADLINE") + retryPeriod, retryErr := util.ResolveOsEnvDuration("KEDA_HTTP_OPERATOR_LEADER_ELECTION_RETRY_PERIOD") + + if tt.expectError { + // At least one of the errors should be non-nil + hasError := false + if _, ok := tt.envVars["KEDA_HTTP_OPERATOR_LEADER_ELECTION_LEASE_DURATION"]; ok && leaseErr != nil { + hasError = true + } + if _, ok := tt.envVars["KEDA_HTTP_OPERATOR_LEADER_ELECTION_RENEW_DEADLINE"]; ok && renewErr != nil { + hasError = true + } + if _, ok := tt.envVars["KEDA_HTTP_OPERATOR_LEADER_ELECTION_RETRY_PERIOD"]; ok && retryErr != nil { + hasError = true + } + if !hasError { + t.Errorf("expected error but got none") + } + } else { + // No errors expected + if leaseErr != nil { + t.Errorf("unexpected error for lease duration: %v", leaseErr) + } + if renewErr != nil { + t.Errorf("unexpected error for renew deadline: %v", renewErr) + } + if retryErr != nil { + t.Errorf("unexpected error for retry period: %v", retryErr) + } + + // Verify the parsed values match expectations + assert.Equal(t, tt.expectedLease, leaseDuration) + assert.Equal(t, tt.expectedRenew, renewDeadline) + assert.Equal(t, tt.expectedRetry, retryPeriod) + } + }) + } +} + +func durationPtr(d time.Duration) *time.Duration { + return &d +} diff --git a/pkg/util/env_resolver.go b/pkg/util/env_resolver.go index b01cc5671..98dda4f42 100644 --- a/pkg/util/env_resolver.go +++ b/pkg/util/env_resolver.go @@ -17,6 +17,7 @@ limitations under the License. package util import ( + "fmt" "os" "strconv" "time" @@ -52,3 +53,34 @@ func ResolveOsEnvDuration(envName string) (*time.Duration, error) { return nil, nil } + +// ValidateLeaderElectionDurations ensures LeaseDuration > RenewDeadline > RetryPeriod +// to prevent multiple active leaders and unnecessary leadership churn. +func ValidateLeaderElectionConfig(leaseDuration, renewDeadline, retryPeriod *time.Duration) error { + if leaseDuration == nil && renewDeadline == nil && retryPeriod == nil { + return nil + } + + // If any are set, validate relationships + if leaseDuration != nil && *leaseDuration <= 0 { + return fmt.Errorf("lease duration must be greater than 0, got %v", *leaseDuration) + } + if renewDeadline != nil && *renewDeadline <= 0 { + return fmt.Errorf("renew deadline must be greater than 0, got %v", *renewDeadline) + } + if retryPeriod != nil && *retryPeriod <= 0 { + return fmt.Errorf("retry period must be greater than 0, got %v", *retryPeriod) + } + + // Validate relationships when multiple values are set + if leaseDuration != nil && renewDeadline != nil && *leaseDuration <= *renewDeadline { + return fmt.Errorf("lease duration (%v) must be greater than renew deadline (%v)", + *leaseDuration, *renewDeadline) + } + if renewDeadline != nil && retryPeriod != nil && *renewDeadline <= *retryPeriod { + return fmt.Errorf("renew deadline (%v) must be greater than retry period (%v)", + *renewDeadline, *retryPeriod) + } + + return nil +} diff --git a/pkg/util/env_resolver_test.go b/pkg/util/env_resolver_test.go index cb673a5cc..7c1cc1fcb 100644 --- a/pkg/util/env_resolver_test.go +++ b/pkg/util/env_resolver_test.go @@ -106,3 +106,87 @@ func TestResolveValidOsEnvDuration(t *testing.T) { assert.Equal(t, time.Duration(30)*time.Minute, *actual) assert.Nil(t, err) } + +func TestValidateLeaderElectionDurations(t *testing.T) { + tests := []struct { + name string + leaseDuration *time.Duration + renewDeadline *time.Duration + retryPeriod *time.Duration + wantErr bool + errContains string + }{ + { + name: "all nil is valid (uses defaults)", + leaseDuration: nil, + renewDeadline: nil, + retryPeriod: nil, + wantErr: false, + }, + { + name: "valid configuration", + leaseDuration: ptr(15 * time.Second), + renewDeadline: ptr(10 * time.Second), + retryPeriod: ptr(2 * time.Second), + wantErr: false, + }, + { + name: "lease duration <= 0", + leaseDuration: ptr(0 * time.Second), + renewDeadline: ptr(10 * time.Second), + retryPeriod: ptr(2 * time.Second), + wantErr: true, + errContains: "lease duration must be greater than 0", + }, + { + name: "renew deadline <= 0", + leaseDuration: ptr(15 * time.Second), + renewDeadline: ptr(0 * time.Second), + retryPeriod: ptr(2 * time.Second), + wantErr: true, + errContains: "renew deadline must be greater than 0", + }, + { + name: "retry period <= 0", + leaseDuration: ptr(15 * time.Second), + renewDeadline: ptr(10 * time.Second), + retryPeriod: ptr(0 * time.Second), + wantErr: true, + errContains: "retry period must be greater than 0", + }, + { + name: "lease duration <= renew deadline", + leaseDuration: ptr(10 * time.Second), + renewDeadline: ptr(10 * time.Second), + retryPeriod: ptr(2 * time.Second), + wantErr: true, + errContains: "lease duration", + }, + { + name: "renew deadline <= retry period", + leaseDuration: ptr(15 * time.Second), + renewDeadline: ptr(2 * time.Second), + retryPeriod: ptr(2 * time.Second), + wantErr: true, + errContains: "renew deadline", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := ValidateLeaderElectionConfig(tt.leaseDuration, tt.renewDeadline, tt.retryPeriod) + if tt.wantErr { + assert.Error(t, err) + if tt.errContains != "" { + assert.Contains(t, err.Error(), tt.errContains) + } + } else { + assert.NoError(t, err) + } + }) + } +} + +func ptr(d time.Duration) *time.Duration { + return &d +}