diff --git a/flytepropeller/pkg/controller/handler.go b/flytepropeller/pkg/controller/handler.go index 9f78b231100..0fcecf32bd2 100644 --- a/flytepropeller/pkg/controller/handler.go +++ b/flytepropeller/pkg/controller/handler.go @@ -104,7 +104,9 @@ func (p *Propeller) TryMutateWorkflow(ctx context.Context, originalW *v1alpha1.F ctx = contextutils.WithResourceVersion(ctx, mutableW.GetResourceVersion()) maxRetries := uint32(p.cfg.MaxWorkflowRetries) // #nosec G115 - if !mutableW.GetDeletionTimestamp().IsZero() || mutableW.Status.FailedAttempts > maxRetries { + if !mutableW.GetDeletionTimestamp().IsZero() || + (mutableW.Status.FailedAttempts > maxRetries && + mutableW.GetExecutionStatus().GetPhase() != v1alpha1.WorkflowPhaseHandlingFailureNode) { var err error func() { defer func() { diff --git a/flytepropeller/pkg/controller/nodes/dynamic/handler.go b/flytepropeller/pkg/controller/nodes/dynamic/handler.go index d97b46d98f6..bc9e9cdaf03 100644 --- a/flytepropeller/pkg/controller/nodes/dynamic/handler.go +++ b/flytepropeller/pkg/controller/nodes/dynamic/handler.go @@ -98,7 +98,7 @@ func (d dynamicNodeTaskNodeHandler) produceDynamicWorkflow(ctx context.Context, if stdErrors.IsCausedBy(err, utils.ErrorCodeUser) { return handler.DoTransition(handler.TransitionTypeEphemeral, handler.PhaseInfoFailure(core.ExecutionError_USER, "DynamicWorkflowBuildFailed", err.Error(), nil), - ), handler.DynamicNodeState{Phase: v1alpha1.DynamicNodePhaseFailing, Reason: err.Error()}, nil + ), handler.DynamicNodeState{Phase: v1alpha1.DynamicNodePhaseFailing, Reason: err.Error(), IsFailurePermanent: true}, nil } return handler.Transition{}, handler.DynamicNodeState{}, err } @@ -125,7 +125,7 @@ func (d dynamicNodeTaskNodeHandler) handleDynamicSubNodes(ctx context.Context, n if stdErrors.IsCausedBy(err, utils.ErrorCodeUser) { return handler.DoTransition(handler.TransitionTypeEphemeral, handler.PhaseInfoFailure(core.ExecutionError_USER, "DynamicWorkflowBuildFailed", err.Error(), nil), - ), handler.DynamicNodeState{Phase: v1alpha1.DynamicNodePhaseFailing, Reason: err.Error()}, nil + ), handler.DynamicNodeState{Phase: v1alpha1.DynamicNodePhaseFailing, Reason: err.Error(), IsFailurePermanent: true}, nil } // Mostly a system error or unknown return handler.Transition{}, handler.DynamicNodeState{}, err diff --git a/flytepropeller/pkg/controller/workflow/executor.go b/flytepropeller/pkg/controller/workflow/executor.go index 1d5be1b6fb6..01916b136d0 100644 --- a/flytepropeller/pkg/controller/workflow/executor.go +++ b/flytepropeller/pkg/controller/workflow/executor.go @@ -499,17 +499,28 @@ func (c *workflowExecutor) HandleAbortedWorkflow(ctx context.Context, w *v1alpha } var status Status - if err != nil { - // This workflow failed, record that phase and corresponding error message. - status = StatusFailed(&core.ExecutionError{ - Code: "Workflow abort failed", + if w.GetDeletionTimestamp() != nil { + // Explicit deletion: abort without running failure node. + if err != nil { + status = StatusFailed(&core.ExecutionError{ + Code: "Workflow abort failed", + Message: err.Error(), + Kind: core.ExecutionError_SYSTEM, + }) + } else { + status = Status{TransitionToPhase: v1alpha1.WorkflowPhaseAborted} + } + } else { + // Max system retries exhausted: give on_failure a chance to run. + execErr := &core.ExecutionError{ + Code: errors.RuntimeExecutionError.String(), Message: err.Error(), Kind: core.ExecutionError_SYSTEM, - }) - } else { - // Otherwise, this workflow is aborted. - status = Status{ - TransitionToPhase: v1alpha1.WorkflowPhaseAborted, + } + if failureNode := w.GetOnFailureNode(); failureNode != nil { + status = StatusFailureNode(execErr) + } else { + status = StatusFailed(execErr) } } if err := c.TransitionToPhase(ctx, w.ExecutionID.WorkflowExecutionIdentifier, w.GetExecutionStatus(), status); err != nil {