Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions pkg/operator/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/openshift/cluster-dns-operator/pkg/manifests"
operatorconfig "github.com/openshift/cluster-dns-operator/pkg/operator/config"
retryable "github.com/openshift/cluster-dns-operator/pkg/util/retryableerror"
"github.com/openshift/cluster-dns-operator/pkg/util/slice"

"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -122,11 +123,11 @@ func New(mgr manager.Manager, config Config) (controller.Controller, error) {
DeleteFunc: func(e event.DeleteEvent) bool { return nodePredicate(e.Object) },
UpdateFunc: func(e event.UpdateEvent) bool {
old := e.ObjectOld.(*corev1.Node)
new := e.ObjectNew.(*corev1.Node)
if ignoreNodeForTopologyAwareHints(old) != ignoreNodeForTopologyAwareHints(new) {
nu := e.ObjectNew.(*corev1.Node)
if ignoreNodeForTopologyAwareHints(old) != ignoreNodeForTopologyAwareHints(nu) {
return true
}
if !ignoreNodeForTopologyAwareHints(new) && nodeIsValidForTopologyAwareHints(old) != nodeIsValidForTopologyAwareHints(new) {
if !ignoreNodeForTopologyAwareHints(nu) && nodeIsValidForTopologyAwareHints(old) != nodeIsValidForTopologyAwareHints(nu) {
return true
}
return false
Expand Down Expand Up @@ -263,6 +264,11 @@ func (r *reconciler) Reconcile(ctx context.Context, request reconcile.Request) (
} else {
// Handle everything else.
if err := r.ensureDNS(dns, &result); err != nil {
switch e := err.(type) {
case retryable.Error:
logrus.Error(e, "got retryable error; requeueing", "after", e.After())
return reconcile.Result{RequeueAfter: e.After()}, nil
}
errs = append(errs, fmt.Errorf("failed to ensure dns %s: %v", dns.Name, err))
} else if err := r.ensureExternalNameForOpenshiftService(); err != nil {
errs = append(errs, fmt.Errorf("failed to ensure external name for openshift service: %v", err))
Expand Down
163 changes: 101 additions & 62 deletions pkg/operator/controller/dns_status.go

Large diffs are not rendered by default.

541 changes: 376 additions & 165 deletions pkg/operator/controller/dns_status_test.go

Large diffs are not rendered by default.

64 changes: 40 additions & 24 deletions pkg/operator/controller/status/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ func (r *reconciler) Reconcile(ctx context.Context, request reconcile.Request) (
co.Status.Conditions = mergeConditions(co.Status.Conditions,
computeOperatorAvailableCondition(state.haveDNS, &state.dns),
operatorProgressingCondition,
computeOperatorDegradedCondition(state.haveDNS, &state.dns),
computeOperatorDegradedCondition(state.haveDNS, &state.dns, oldVersions, newVersions, curVersions),
)
co.Status.Versions = computeOperatorStatusVersions(curVersions)
co.Status.Conditions = mergeConditions(co.Status.Conditions, computeOperatorUpgradeableCondition(&state.dns))
Expand Down Expand Up @@ -401,7 +401,9 @@ func computeOperatorUpgradeableCondition(dns *operatorv1.DNS) configv1.ClusterOp
}

// computeOperatorDegradedCondition computes the operator's current Degraded status state.
func computeOperatorDegradedCondition(haveDNS bool, dns *operatorv1.DNS) configv1.ClusterOperatorStatusCondition {
func computeOperatorDegradedCondition(haveDNS bool, dns *operatorv1.DNS, oldVersions, newVersions, curVersions map[string]string) configv1.ClusterOperatorStatusCondition {
var messages []string

if !haveDNS {
return configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorDegraded,
Expand All @@ -411,18 +413,23 @@ func computeOperatorDegradedCondition(haveDNS bool, dns *operatorv1.DNS) configv
}
}

var degraded bool
for _, cond := range dns.Status.Conditions {
if cond.Type == operatorv1.OperatorStatusTypeDegraded && cond.Status == operatorv1.ConditionTrue {
degraded = true
// See OCPBUGS-14346. If the operator is upgrading, we can't consider it as degraded.
upgrading, _ := isUpgrading(curVersions, oldVersions, newVersions)
if !upgrading {
var degraded bool
for _, cond := range dns.Status.Conditions {
if cond.Type == operatorv1.OperatorStatusTypeDegraded && cond.Status == operatorv1.ConditionTrue {
degraded = true
messages = append(messages, cond.Message)
}
}
}
if degraded {
return configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorDegraded,
Status: configv1.ConditionTrue,
Reason: "DNSDegraded",
Message: fmt.Sprintf("DNS %s is degraded", dns.Name),
if degraded {
return configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorDegraded,
Status: configv1.ConditionTrue,
Reason: "DNSDegraded",
Message: fmt.Sprintf("DNS %s is degraded: %s", dns.Name, strings.Join(messages, "\n")),
}
}
}
return configv1.ClusterOperatorStatusCondition{
Expand Down Expand Up @@ -468,24 +475,17 @@ func computeOperatorProgressingCondition(haveDNS bool, dns *operatorv1.DNS, oldV
}
}

upgrading := false
for name, curVersion := range curVersions {
if oldVersion, ok := oldVersions[name]; ok && oldVersion != curVersion {
messages = append(messages, fmt.Sprintf("Upgraded %s to %q.", name, curVersion))
}
if newVersion, ok := newVersions[name]; ok && curVersion != newVersion {
upgrading = true
messages = append(messages, fmt.Sprintf("Upgrading %s to %q.", name, newVersion))
}
}
// If the operator is upgrading, note it as a Progressing reason and add the upgrading messages
upgrading, upgradingMessages := isUpgrading(curVersions, oldVersions, newVersions)
if upgrading {
status = configv1.ConditionTrue
messages = append(messages, strings.Join(upgradingMessages, "And "))
progressingReasons = append(progressingReasons, "Upgrading")
}

if len(progressingReasons) != 0 {
progressingCondition.Status = status
progressingCondition.Reason = strings.Join(progressingReasons, "And")
progressingCondition.Reason = strings.Join(progressingReasons, "And ")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bentito found this can produce a reason like DNSReportsProgressingIsTrueAnd Upgrading (extra space).

progressingCondition.Message = strings.Join(messages, "\n")
} else {
progressingCondition.Status = configv1.ConditionFalse
Expand All @@ -496,6 +496,22 @@ func computeOperatorProgressingCondition(haveDNS bool, dns *operatorv1.DNS, oldV
return progressingCondition
}

func isUpgrading(curVersions, oldVersions, newVersions map[string]string) (bool, []string) {
var messages []string
upgrading := false

for name, curVersion := range curVersions {
if oldVersion, ok := oldVersions[name]; ok && oldVersion != curVersion {
messages = append(messages, fmt.Sprintf("Upgraded %s to %q.", name, curVersion))
}
if newVersion, ok := newVersions[name]; ok && curVersion != newVersion {
upgrading = true
messages = append(messages, fmt.Sprintf("Upgrading %s to %q.", name, newVersion))
}
}
return upgrading, messages
}

// computeOldVersions returns a map of operand name to version computed from the
// given clusteroperator status.
func computeOldVersions(oldVersions []configv1.OperandVersion) map[string]string {
Expand Down
154 changes: 154 additions & 0 deletions pkg/operator/controller/status/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -631,3 +631,157 @@ func TestComputeCurrentVersions(t *testing.T) {
}
}
}

func TestComputeOperatorDegradedCondition(t *testing.T) {
type versions struct {
operator, operand string
}

testCases := []struct {
description string
dnsMissing bool
oldVersions versions
newVersions versions
curVersions versions
expectDegraded configv1.ConditionStatus
}{
{
description: "dns does not exist",
dnsMissing: true,
expectDegraded: configv1.ConditionTrue,
},
{
description: "versions match",
oldVersions: versions{"v1", "dns-v1"},
newVersions: versions{"v1", "dns-v1"},
curVersions: versions{"v1", "dns-v1"},
expectDegraded: configv1.ConditionFalse,
},
{
description: "operator upgrade in progress",
oldVersions: versions{"v1", "dns-v1"},
newVersions: versions{"v2", "dns-v1"},
curVersions: versions{"v1", "dns-v1"},
expectDegraded: configv1.ConditionFalse,
},
{
description: "operand upgrade in progress",
oldVersions: versions{"v1", "dns-v1"},
newVersions: versions{"v1", "dns-v2"},
curVersions: versions{"v1", "dns-v1"},
expectDegraded: configv1.ConditionFalse,
},
{
description: "operator and operand upgrade in progress",
oldVersions: versions{"v1", "dns-v1"},
newVersions: versions{"v2", "dns-v2"},
curVersions: versions{"v1", "dns-v1"},
expectDegraded: configv1.ConditionFalse,
},
{
description: "operator upgrade done",
oldVersions: versions{"v1", "dns-v1"},
newVersions: versions{"v2", "dns-v1"},
curVersions: versions{"v2", "dns-v1"},
expectDegraded: configv1.ConditionFalse,
},
{
description: "operand upgrade done",
oldVersions: versions{"v1", "dns-v1"},
newVersions: versions{"v1", "dns-v2"},
curVersions: versions{"v1", "dns-v2"},
expectDegraded: configv1.ConditionFalse,
},
{
description: "operator and operand upgrade done",
oldVersions: versions{"v1", "dns-v1"},
newVersions: versions{"v2", "dns-v2"},
curVersions: versions{"v2", "dns-v2"},
expectDegraded: configv1.ConditionFalse,
},
{
description: "operator upgrade in progress, operand upgrade done",
oldVersions: versions{"v1", "dns-v1"},
newVersions: versions{"v2", "dns-v2"},
curVersions: versions{"v1", "dns-v2"},
expectDegraded: configv1.ConditionFalse,
},
{
description: "operator upgrade in progress but no dns",
dnsMissing: true,
oldVersions: versions{"v1", "dns-v1"},
newVersions: versions{"v2", "dns-v1"},
curVersions: versions{"v1", "dns-v1"},
expectDegraded: configv1.ConditionTrue,
},
{
description: "operand upgrade in progress, but no dns",
dnsMissing: true,
oldVersions: versions{"v1", "dns-v1"},
newVersions: versions{"v1", "dns-v2"},
curVersions: versions{"v1", "dns-v1"},
expectDegraded: configv1.ConditionTrue,
},
{
description: "operator and operand upgrade in progress, but no dns",
dnsMissing: true,
oldVersions: versions{"v1", "dns-v1"},
newVersions: versions{"v2", "dns-v2"},
curVersions: versions{"v1", "dns-v1"},
expectDegraded: configv1.ConditionTrue,
},
}

for _, tc := range testCases {
var (
haveDNS bool
dns *operatorv1.DNS
)
if !tc.dnsMissing {
haveDNS = true
degradedStatus := operatorv1.ConditionFalse
if tc.dnsMissing {
degradedStatus = operatorv1.ConditionTrue
}
dns = &operatorv1.DNS{
Status: operatorv1.DNSStatus{
Conditions: []operatorv1.OperatorCondition{{
Type: operatorv1.OperatorStatusTypeDegraded,
Status: degradedStatus,
}},
},
}
}
oldVersions := map[string]string{
OperatorVersionName: tc.oldVersions.operator,
CoreDNSVersionName: tc.oldVersions.operand,
OpenshiftCLIVersionName: tc.oldVersions.operand,
KubeRBACProxyName: tc.oldVersions.operand,
}
newVersions := map[string]string{
OperatorVersionName: tc.newVersions.operator,
CoreDNSVersionName: tc.newVersions.operand,
OpenshiftCLIVersionName: tc.newVersions.operand,
KubeRBACProxyName: tc.newVersions.operand,
}
curVersions := map[string]string{
OperatorVersionName: tc.curVersions.operator,
CoreDNSVersionName: tc.curVersions.operand,
OpenshiftCLIVersionName: tc.curVersions.operand,
KubeRBACProxyName: tc.curVersions.operand,
}

expected := configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorDegraded,
Status: tc.expectDegraded,
}

actual := computeOperatorDegradedCondition(haveDNS, dns, oldVersions, newVersions, curVersions)
conditionsCmpOpts := []cmp.Option{
cmpopts.IgnoreFields(configv1.ClusterOperatorStatusCondition{}, "LastTransitionTime", "Reason", "Message"),
}
if !cmp.Equal(actual, expected, conditionsCmpOpts...) {
t.Errorf("%q: expected %#v, got %#v", tc.description, expected, actual)
}
}
}
32 changes: 32 additions & 0 deletions pkg/util/conditions/conditions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Package conditions is copied in from openshift/cluster-ingress-operator and should be moved to library-go to be shared.
package conditions

import (
"fmt"

operatorv1 "github.com/openshift/api/operator/v1"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
)

func FormatConditions(conditions []*operatorv1.OperatorCondition) string {
var formatted string
if len(conditions) == 0 {
return ""
}
for _, cond := range conditions {
formatted = formatted + fmt.Sprintf(", %s=%s (%s: %s)", cond.Type, cond.Status, cond.Reason, cond.Message)
}
formatted = formatted[2:]
return formatted
}

func ConditionsEqual(a, b []operatorv1.OperatorCondition) bool {
conditionCmpOpts := []cmp.Option{
cmpopts.EquateEmpty(),
cmpopts.SortSlices(func(a, b operatorv1.OperatorCondition) bool { return a.Type < b.Type }),
}

return cmp.Equal(a, b, conditionCmpOpts...)
}
60 changes: 60 additions & 0 deletions pkg/util/retryableerror/retryableerror.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Package retryableerror is copied in from openshift/cluster-ingress-operator and should be moved to library-go to be shared.
package retryableerror

import (
"time"

utilerrors "k8s.io/apimachinery/pkg/util/errors"
)

// Error represents an error for an operation that should be retried after the
// specified duration.
type Error interface {
error
// After is the time period after which the operation that caused the
// error should be retried.
After() time.Duration
}

// New returns a new RetryableError with the given error and time period.
func New(err error, after time.Duration) Error {
return retryableError{err, after}
}

type retryableError struct {
error
after time.Duration
}

// After returns the time period after which the operation that caused the error
// should be retried.
func (r retryableError) After() time.Duration {
return r.after
}

// NewMaybeRetryableAggregate converts a slice of errors into a single error
// value. Nil values will be filtered from the slice. If the filtered slice is
// empty, the return value will be nil. Else, if any values are non-retryable
// errors, the result will be an Aggregate interface. Else, if all errors are
// retryable, the result will be a retryable Error interface, with After() equal
// to the minimum of all the errors' After() values.
func NewMaybeRetryableAggregate(errs []error) error {
aggregate := utilerrors.NewAggregate(errs)
if aggregate == nil {
return nil
}
afterHasInitialValue := false
var after time.Duration
for _, err := range aggregate.Errors() {
switch e := err.(type) {
case Error:
if !afterHasInitialValue || e.After() < after {
after = e.After()
}
afterHasInitialValue = true
default:
return aggregate
}
}
return New(aggregate, after)
}
Loading