Skip to content

Commit 7739806

Browse files
committed
feat: implement problem reporting via status
Signed-off-by: Bence Csati <[email protected]>
1 parent 2da4c2c commit 7739806

File tree

4 files changed

+74
-76
lines changed

4 files changed

+74
-76
lines changed

controllers/telemetry/collector_controller.go

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -145,27 +145,41 @@ func (r *CollectorReconciler) buildConfigInputForCollector(ctx context.Context,
145145
}
146146

147147
func (r *CollectorReconciler) populateSecretForOutput(ctx context.Context, queriedOutput *v1alpha1.Output, outputWithSecret *components.OutputWithSecretData) error {
148-
logger := log.FromContext(ctx)
148+
originalOutputStatus := queriedOutput.Status.DeepCopy()
149149

150150
if queriedOutput.Spec.Authentication != nil {
151151
if queriedOutput.Spec.Authentication.BasicAuth != nil && queriedOutput.Spec.Authentication.BasicAuth.SecretRef != nil {
152152
queriedSecret := &corev1.Secret{}
153153
if err := r.Get(ctx, types.NamespacedName{Namespace: queriedOutput.Spec.Authentication.BasicAuth.SecretRef.Namespace, Name: queriedOutput.Spec.Authentication.BasicAuth.SecretRef.Name}, queriedSecret); err != nil {
154-
logger.Error(errors.WithStack(err), "failed getting secrets for output", "output", queriedOutput.NamespacedName().String())
155-
return err
154+
queriedOutput.Status.Problems = append(queriedOutput.Status.Problems, fmt.Sprintf("failed getting secrets for output %s: %v", queriedOutput.Name, err))
156155
}
157156
outputWithSecret.Secret = *queriedSecret
158157
}
159158
if queriedOutput.Spec.Authentication.BearerAuth != nil && queriedOutput.Spec.Authentication.BearerAuth.SecretRef != nil {
160159
queriedSecret := &corev1.Secret{}
161160
if err := r.Get(ctx, types.NamespacedName{Namespace: queriedOutput.Spec.Authentication.BearerAuth.SecretRef.Namespace, Name: queriedOutput.Spec.Authentication.BearerAuth.SecretRef.Name}, queriedSecret); err != nil {
162-
logger.Error(errors.WithStack(err), "failed getting secrets for output", "output", queriedOutput.NamespacedName().String())
163-
return err
161+
queriedOutput.Status.Problems = append(queriedOutput.Status.Problems, fmt.Sprintf("failed getting secrets for output %s: %v", queriedOutput.Name, err))
164162
}
165163
outputWithSecret.Secret = *queriedSecret
166164
}
167165
}
168166

167+
if !reflect.DeepEqual(originalOutputStatus, queriedOutput.Status) {
168+
if len(queriedOutput.Status.Problems) > 0 {
169+
queriedOutput.Status.State = state.StateFailed
170+
queriedOutput.Status.ProblemsCount = len(queriedOutput.Status.Problems)
171+
}
172+
if updateErr := r.updateStatus(ctx, queriedOutput); updateErr != nil {
173+
log.FromContext(ctx).Error(errors.WithStack(updateErr), "failed updating output status", "output", queriedOutput.Name)
174+
return updateErr
175+
}
176+
}
177+
178+
// the built-in config validation will not catch issues related to missing secrets
179+
if queriedOutput.Status.ProblemsCount > 0 {
180+
return fmt.Errorf("output %s has problems: %v", queriedOutput.Name, strings.Join(queriedOutput.Status.Problems, ", "))
181+
}
182+
169183
return nil
170184
}
171185

@@ -206,8 +220,10 @@ func (r *CollectorReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
206220
return ctrl.Result{}, nil
207221
}
208222
logger.Error(errors.WithStack(err), "invalid otel config input")
209-
223+
collector.Status.Problems = append(collector.Status.Problems, fmt.Sprintf("invalid otel config input: %s", err.Error()))
224+
collector.Status.ProblemsCount = len(collector.Status.Problems)
210225
collector.Status.State = state.StateFailed
226+
211227
if updateErr := r.updateStatus(ctx, collector); updateErr != nil {
212228
logger.Error(errors.WithStack(updateErr), "failed updating collector status")
213229
return ctrl.Result{}, errors.Append(err, updateErr)
@@ -219,8 +235,10 @@ func (r *CollectorReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
219235
otelConfig, additionalArgs := otelConfigInput.AssembleConfig(ctx)
220236
if err := validator.ValidateAssembledConfig(otelConfig); err != nil {
221237
logger.Error(errors.WithStack(err), "invalid otel config")
222-
238+
collector.Status.Problems = append(collector.Status.Problems, fmt.Sprintf("invalid otel config: %s", err.Error()))
239+
collector.Status.ProblemsCount = len(collector.Status.Problems)
223240
collector.Status.State = state.StateFailed
241+
224242
if updateErr := r.updateStatus(ctx, collector); updateErr != nil {
225243
logger.Error(errors.WithStack(updateErr), "failed updating collector status")
226244
return ctrl.Result{}, errors.Append(err, updateErr)
@@ -427,11 +445,8 @@ func (r *CollectorReconciler) otelCollector(collector *v1alpha1.Collector, otelC
427445

428446
func (r *CollectorReconciler) reconcileRBAC(ctx context.Context, collector *v1alpha1.Collector) (v1alpha1.NamespacedName, error) {
429447
errCR := r.reconcileClusterRole(ctx, collector)
430-
431448
sa, errSA := r.reconcileServiceAccount(ctx, collector)
432-
433449
errCRB := r.reconcileClusterRoleBinding(ctx, collector)
434-
435450
if allErr := errors.Combine(errCR, errSA, errCRB); allErr != nil {
436451
return v1alpha1.NamespacedName{}, allErr
437452
}
@@ -440,22 +455,18 @@ func (r *CollectorReconciler) reconcileRBAC(ctx context.Context, collector *v1al
440455
}
441456

442457
func (r *CollectorReconciler) reconcileServiceAccount(ctx context.Context, collector *v1alpha1.Collector) (v1alpha1.NamespacedName, error) {
443-
logger := log.FromContext(ctx)
444-
445458
serviceAccount := corev1.ServiceAccount{
446459
TypeMeta: metav1.TypeMeta{},
447460
ObjectMeta: metav1.ObjectMeta{
448461
Name: fmt.Sprintf("%s-sa", collector.Name),
449462
Namespace: collector.Spec.ControlNamespace,
450463
},
451464
}
452-
453465
if err := ctrl.SetControllerReference(collector, &serviceAccount, r.Scheme); err != nil {
454466
return v1alpha1.NamespacedName{}, err
455467
}
456468

457-
resourceReconciler := reconciler.NewReconcilerWith(r.Client, reconciler.WithLog(logger))
458-
469+
resourceReconciler := reconciler.NewReconcilerWith(r.Client, reconciler.WithLog(log.FromContext(ctx)))
459470
_, err := resourceReconciler.ReconcileResource(&serviceAccount, reconciler.StatePresent)
460471
if err != nil {
461472
return v1alpha1.NamespacedName{}, err
@@ -465,8 +476,6 @@ func (r *CollectorReconciler) reconcileServiceAccount(ctx context.Context, colle
465476
}
466477

467478
func (r *CollectorReconciler) reconcileClusterRoleBinding(ctx context.Context, collector *v1alpha1.Collector) error {
468-
logger := log.FromContext(ctx)
469-
470479
clusterRoleBinding := rbacv1.ClusterRoleBinding{
471480
TypeMeta: metav1.TypeMeta{},
472481
ObjectMeta: metav1.ObjectMeta{
@@ -482,20 +491,17 @@ func (r *CollectorReconciler) reconcileClusterRoleBinding(ctx context.Context, c
482491
Name: fmt.Sprintf("%s-pod-association-reader", collector.Name),
483492
},
484493
}
485-
486494
if err := ctrl.SetControllerReference(collector, &clusterRoleBinding, r.Scheme); err != nil {
487495
return err
488496
}
489497

490-
resourceReconciler := reconciler.NewReconcilerWith(r.Client, reconciler.WithLog(logger))
491-
498+
resourceReconciler := reconciler.NewReconcilerWith(r.Client, reconciler.WithLog(log.FromContext(ctx)))
492499
_, err := resourceReconciler.ReconcileResource(&clusterRoleBinding, reconciler.StatePresent)
500+
493501
return err
494502
}
495503

496504
func (r *CollectorReconciler) reconcileClusterRole(ctx context.Context, collector *v1alpha1.Collector) error {
497-
logger := log.FromContext(ctx)
498-
499505
clusterRole := rbacv1.ClusterRole{
500506
TypeMeta: metav1.TypeMeta{},
501507
ObjectMeta: metav1.ObjectMeta{
@@ -514,13 +520,11 @@ func (r *CollectorReconciler) reconcileClusterRole(ctx context.Context, collecto
514520
},
515521
},
516522
}
517-
518523
if err := ctrl.SetControllerReference(collector, &clusterRole, r.Scheme); err != nil {
519524
return err
520525
}
521526

522-
resourceReconciler := reconciler.NewReconcilerWith(r.Client, reconciler.WithLog(logger))
523-
527+
resourceReconciler := reconciler.NewReconcilerWith(r.Client, reconciler.WithLog(log.FromContext(ctx)))
524528
_, err := resourceReconciler.ReconcileResource(&clusterRole, reconciler.StatePresent)
525529

526530
return err

controllers/telemetry/route_controller.go

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"reflect"
2121
"slices"
2222
"sort"
23+
"strings"
2324

2425
"emperror.dev/errors"
2526
apiv1 "k8s.io/api/core/v1"
@@ -71,6 +72,7 @@ func (r *RouteReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
7172

7273
if err := handleOwnedResources(ctx, baseManager.GetTenantResourceManager(), tenant); err != nil {
7374
tenant.Status.State = state.StateFailed
75+
tenant.Status.ProblemsCount = len(tenant.Status.Problems)
7476
baseManager.Error(errors.WithStack(err), "failed to handle resources owned by tenant", "tenant", tenant.Name)
7577
if updateErr := r.updateStatus(ctx, tenant); updateErr != nil {
7678
baseManager.Error(errors.WithStack(updateErr), "failed updating tenant status", "tenant", tenant.Name)
@@ -80,6 +82,7 @@ func (r *RouteReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
8082

8183
if err := handleBridgeResources(ctx, baseManager.GetBridgeManager(), tenant); err != nil {
8284
tenant.Status.State = state.StateFailed
85+
tenant.Status.ProblemsCount = len(tenant.Status.Problems)
8386
baseManager.Error(errors.WithStack(err), "failed to handle bridge resources", "tenant", tenant.Name)
8487
if updateErr := r.updateStatus(ctx, tenant); updateErr != nil {
8588
baseManager.Error(errors.WithStack(updateErr), "failed updating tenant status", "tenant", tenant.Name)
@@ -98,7 +101,6 @@ func (r *RouteReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
98101
return ctrl.Result{}, nil
99102
}
100103

101-
baseManager.Info("tenant reconciliation complete", "tenant", tenant.Name)
102104
return ctrl.Result{}, nil
103105
}
104106

@@ -234,26 +236,18 @@ func handleOwnedResources(ctx context.Context, tenantResManager *manager.TenantR
234236
logsourceNamespacesForTenant, err := tenantResManager.GetLogsourceNamespaceNamesForTenant(ctx, tenant)
235237
if err != nil {
236238
tenant.Status.State = state.StateFailed
237-
tenantResManager.Error(errors.WithStack(err), "failed to get logsource namespaces for tenant", "tenant", tenant.Name)
238-
if updateErr := tenantResManager.Status().Update(ctx, tenant); updateErr != nil {
239-
tenantResManager.Error(errors.WithStack(updateErr), "failed updating tenant status", "tenant", tenant.Name)
240-
return errors.Append(err, updateErr)
241-
}
242-
239+
tenant.Status.Problems = append(tenant.Status.Problems,
240+
fmt.Sprintf("failed to get logsource namespaces for tenant %s: %v", tenant.Name, err))
243241
return err
244242
}
245243
slices.Sort(logsourceNamespacesForTenant)
246244
tenant.Status.LogSourceNamespaces = logsourceNamespacesForTenant
247245

248246
subscriptionsForTenant, subscriptionUpdateList, err := tenantResManager.GetResourceOwnedByTenant(ctx, &v1alpha1.Subscription{}, tenant)
249247
if err != nil {
250-
tenantResManager.Error(errors.WithStack(err), "failed to get subscriptions for tenant", "tenant", tenant.Name)
251-
252248
tenant.Status.State = state.StateFailed
253-
if updateErr := tenantResManager.Status().Update(ctx, tenant); updateErr != nil {
254-
tenantResManager.Error(errors.WithStack(updateErr), "failed updating tenant status", "tenant", tenant.Name)
255-
return errors.Append(err, updateErr)
256-
}
249+
tenant.Status.Problems = append(tenant.Status.Problems,
250+
fmt.Sprintf("failed to get subscriptions for tenant %s: %v", tenant.Name, err))
257251

258252
return err
259253
}
@@ -262,7 +256,8 @@ func handleOwnedResources(ctx context.Context, tenantResManager *manager.TenantR
262256
subscriptionsForTenant = append(subscriptionsForTenant, tenantResManager.UpdateResourcesForTenant(ctx, tenant.Name, subscriptionUpdateList)...)
263257
subscriptionsToDisown, err := tenantResManager.GetResourcesReferencingTenantButNotSelected(ctx, tenant, &v1alpha1.Subscription{}, subscriptionsForTenant)
264258
if err != nil {
265-
tenantResManager.Error(errors.WithStack(err), "failed to get subscriptions to disown", "tenant", tenant.Name)
259+
tenant.Status.Problems = append(tenant.Status.Problems,
260+
fmt.Sprintf("failed to get subscriptions to disown for tenant %s: %v", tenant.Name, err))
266261
}
267262
tenantResManager.DisownResources(ctx, subscriptionsToDisown)
268263

@@ -273,13 +268,9 @@ func handleOwnedResources(ctx context.Context, tenantResManager *manager.TenantR
273268
// Check outputs for tenant
274269
outputsForTenant, outputUpdateList, err := tenantResManager.GetResourceOwnedByTenant(ctx, &v1alpha1.Output{}, tenant)
275270
if err != nil {
276-
tenantResManager.Error(errors.WithStack(err), "failed to get outputs for tenant", "tenant", tenant.Name)
277-
278271
tenant.Status.State = state.StateFailed
279-
if updateErr := tenantResManager.Status().Update(ctx, tenant); updateErr != nil {
280-
tenantResManager.Error(errors.WithStack(updateErr), "failed updating tenant status", "tenant", tenant.Name)
281-
return errors.Append(err, updateErr)
282-
}
272+
tenant.Status.Problems = append(tenant.Status.Problems,
273+
fmt.Sprintf("failed to get outputs for tenant %s: %v", tenant.Name, err))
283274

284275
return err
285276
}
@@ -288,7 +279,8 @@ func handleOwnedResources(ctx context.Context, tenantResManager *manager.TenantR
288279
outputsForTenant = append(outputsForTenant, tenantResManager.UpdateResourcesForTenant(ctx, tenant.Name, outputUpdateList)...)
289280
outputsToDisown, err := tenantResManager.GetResourcesReferencingTenantButNotSelected(ctx, tenant, &v1alpha1.Output{}, outputsForTenant)
290281
if err != nil {
291-
tenantResManager.Error(errors.WithStack(err), "failed to get outputs to disown", "tenant", tenant.Name)
282+
tenant.Status.Problems = append(tenant.Status.Problems,
283+
fmt.Sprintf("failed to get outputs to disown for tenant %s: %v", tenant.Name, err))
292284
}
293285
tenantResManager.DisownResources(ctx, outputsToDisown)
294286

@@ -316,11 +308,8 @@ func handleBridgeResources(ctx context.Context, bridgeManager *manager.BridgeMan
316308
bridgesForTenant, err := bridgeManager.GetBridgesForTenant(ctx, tenant.Name)
317309
if err != nil {
318310
tenant.Status.State = state.StateFailed
319-
bridgeManager.Error(errors.WithStack(err), "failed to get bridges for tenant", "tenant", tenant.Name)
320-
if updateErr := bridgeManager.Status().Update(ctx, tenant); updateErr != nil {
321-
bridgeManager.Error(errors.WithStack(updateErr), "failed updating tenant status", "tenant", tenant.Name)
322-
return errors.Append(err, updateErr)
323-
}
311+
tenant.Status.Problems = append(tenant.Status.Problems,
312+
fmt.Sprintf("failed to get bridges for tenant %s: %v", tenant.Name, err))
324313

325314
return err
326315
}
@@ -330,15 +319,28 @@ func handleBridgeResources(ctx context.Context, bridgeManager *manager.BridgeMan
330319
tenant.Status.ConnectedBridges = bridgesForTenantNames
331320

332321
for _, bridge := range bridgesForTenant {
333-
if err := bridgeManager.CheckBridgeConnection(ctx, tenant.Name, &bridge); err != nil {
322+
originalBridgeStatus := bridge.Status.DeepCopy()
323+
bridge.Status.State = state.StateReady
324+
325+
err := bridgeManager.ValidateBridgeConnection(ctx, tenant.Name, &bridge)
326+
if err != nil {
327+
errorMsg := fmt.Sprintf("validation failed for bridge %s: %v", bridge.Name, err)
334328
tenant.Status.State = state.StateFailed
335-
bridgeManager.Error(errors.WithStack(err), "failed to check bridge connection", "bridge", bridge.Name)
336-
if updateErr := bridgeManager.Status().Update(ctx, tenant); updateErr != nil {
337-
bridgeManager.Error(errors.WithStack(updateErr), "failed updating tenant status", "tenant", tenant.Name)
338-
return errors.Append(err, updateErr)
329+
tenant.Status.Problems = append(tenant.Status.Problems, errorMsg)
330+
bridge.Status.State = state.StateFailed
331+
bridge.Status.Problems = append(bridge.Status.Problems, errorMsg)
332+
bridge.Status.ProblemsCount = len(bridge.Status.Problems)
333+
}
334+
335+
if !reflect.DeepEqual(originalBridgeStatus, bridge.Status) {
336+
if updateErr := bridgeManager.Status().Update(ctx, &bridge); updateErr != nil {
337+
bridgeManager.Error(errors.WithStack(updateErr), "failed updating bridge status", "bridge", bridge.Name)
338+
return updateErr
339339
}
340+
}
340341

341-
return err
342+
if bridge.Status.ProblemsCount > 0 {
343+
return fmt.Errorf("bridge %s has problems: %v", bridge.Name, strings.Join(bridge.Status.Problems, ", "))
342344
}
343345
}
344346

pkg/resources/manager/bridge_manager.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ func (b *BridgeManager) getTenants(ctx context.Context, listOpts *client.ListOpt
7575
return tenants.Items, nil
7676
}
7777

78-
func (b *BridgeManager) CheckBridgeConnection(ctx context.Context, tenantName string, bridge *v1alpha1.Bridge) error {
78+
func (b *BridgeManager) ValidateBridgeConnection(ctx context.Context, tenantName string, bridge *v1alpha1.Bridge) error {
7979
for _, tenantReference := range []string{bridge.Spec.SourceTenant, bridge.Spec.TargetTenant} {
8080
if tenantReference != tenantName {
8181
listOpts := &client.ListOptions{

pkg/resources/manager/tenant_resource_manager.go

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"slices"
2222
"strings"
2323

24-
"emperror.dev/errors"
2524
apiv1 "k8s.io/api/core/v1"
2625
apierrors "k8s.io/apimachinery/pkg/api/errors"
2726
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -190,31 +189,25 @@ func (t *TenantResourceManager) DisownResources(ctx context.Context, resourceToD
190189
// ValidateSubscriptionOutputs validates the output references of a subscription
191190
func (t *TenantResourceManager) ValidateSubscriptionOutputs(ctx context.Context, subscription *v1alpha1.Subscription) []v1alpha1.NamespacedName {
192191
validOutputs := []v1alpha1.NamespacedName{}
193-
invalidOutputs := []v1alpha1.NamespacedName{}
194192

195193
for _, outputRef := range subscription.Spec.Outputs {
196194
checkedOutput := &v1alpha1.Output{}
197195
if err := t.Get(ctx, types.NamespacedName(outputRef), checkedOutput); err != nil {
198-
t.Error(err, "referred output invalid", "output", outputRef.String())
199-
200-
invalidOutputs = append(invalidOutputs, outputRef)
196+
subscription.Status.Problems = append(subscription.Status.Problems,
197+
fmt.Sprintf("failed to get output (%s/%s): %v", outputRef.Namespace, outputRef.Name, err))
201198
continue
202199
}
203200

204-
// Ensure the output belongs to the same tenant
205201
if checkedOutput.Status.Tenant != subscription.Status.Tenant {
206-
t.Error(errors.New("output and subscription tenants mismatch"),
207-
"output and subscription tenants mismatch",
208-
"output", checkedOutput.NamespacedName().String(),
209-
"output's tenant", checkedOutput.Status.Tenant,
210-
"subscription", subscription.NamespacedName().String(),
211-
"subscription's tenant", subscription.Status.Tenant)
212-
213-
invalidOutputs = append(invalidOutputs, outputRef)
202+
subscription.Status.Problems = append(subscription.Status.Problems,
203+
fmt.Sprintf("output (%s/%s) belongs to different tenant", outputRef.Namespace, outputRef.Name))
204+
checkedOutput.Status.Problems = append(checkedOutput.Status.Problems,
205+
fmt.Sprintf("output (%s/%s) belongs to different tenant", outputRef.Namespace, outputRef.Name))
206+
checkedOutput.Status.ProblemsCount = len(checkedOutput.Status.Problems)
214207
continue
215208
}
216209

217-
// update the output state if validation was successful
210+
// update output state
218211
checkedOutput.SetState(state.StateReady)
219212
if updateErr := t.Status().Update(ctx, checkedOutput); updateErr != nil {
220213
checkedOutput.SetState(state.StateFailed)
@@ -223,9 +216,8 @@ func (t *TenantResourceManager) ValidateSubscriptionOutputs(ctx context.Context,
223216

224217
validOutputs = append(validOutputs, outputRef)
225218
}
226-
227-
if len(invalidOutputs) > 0 {
228-
t.Error(errors.New("some outputs are invalid"), "some outputs are invalid", "invalidOutputs", invalidOutputs, "subscription", subscription.NamespacedName().String())
219+
if len(subscription.Status.Problems) > 0 {
220+
subscription.Status.ProblemsCount = len(subscription.Status.Problems)
229221
}
230222

231223
return validOutputs

0 commit comments

Comments
 (0)