From cdca264067b3a5055b28da9315aaba3e46125af6 Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Fri, 25 Jul 2025 16:07:23 -0400 Subject: [PATCH 1/6] oomkill --- .../standard/StandardAsyncExecutionActor.scala | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index 8f8312023f..46c846dff9 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -965,6 +965,10 @@ trait StandardAsyncExecutionActor */ def isAbort(returnCode: Int): Boolean = returnCode == SIGINT || returnCode == SIGTERM || returnCode == SIGKILL + // 247 return codes have been observed in the wild for OOMKilled jobs during GVS Mars work, but unfortunately we don't + // have a test case to reproduce this yet. + def isOOMKill(returnCode: Int): Boolean = returnCode == SIGKILL || returnCode == 247 + /** * Custom behavior to run after an abort signal is processed. * @@ -1216,12 +1220,14 @@ trait StandardAsyncExecutionActor // See executeOrRecoverSuccess private var missedAbort = false + private var abortRequested = false private case class CheckMissedAbort(jobId: StandardAsyncJob) context.become(kvClientReceive orElse standardReceiveBehavior(None) orElse slowJobWarningReceive orElse receive) def standardReceiveBehavior(jobIdOption: Option[StandardAsyncJob]): Receive = LoggingReceive { case AbortJobCommand => + abortRequested = true jobIdOption match { case Some(jobId) => Try(tryAbort(jobId)) match { @@ -1445,9 +1451,11 @@ trait StandardAsyncExecutionActor retryElseFail(executionHandle) case Success(returnCodeAsInt) if continueOnReturnCode.continueFor(returnCodeAsInt) => handleExecutionSuccess(status, oldHandle, returnCodeAsInt) - // It's important that we check retryWithMoreMemory case before isAbort. RC could be 137 in either case; - // if it was caused by OOM killer, want to handle as OOM and not job abort. - case Success(returnCodeAsInt) if outOfMemoryDetected && memoryRetryRequested => + // Check abort first, but only if abort was requested. There could be a SIGKILL rc (137) for either abort or + // an OOM kill. + case Success(returnCodeAsInt) if abortRequested && isAbort(returnCodeAsInt) => + Future.successful(AbortedExecutionHandle) + case Success(returnCodeAsInt) if (isOOMKill(returnCodeAsInt) || outOfMemoryDetected) && memoryRetryRequested => val executionHandle = Future.successful( FailedNonRetryableExecutionHandle( RetryWithMoreMemory(jobDescriptor.key.tag, stderrAsOption, memoryRetryErrorKeys, log), @@ -1458,8 +1466,6 @@ trait StandardAsyncExecutionActor retryElseFail(executionHandle, MemoryRetryResult(outOfMemoryDetected, memoryRetryFactor, previousMemoryMultiplier) ) - case Success(returnCodeAsInt) if isAbort(returnCodeAsInt) => - Future.successful(AbortedExecutionHandle) case Success(returnCodeAsInt) => val executionHandle = Future.successful( FailedNonRetryableExecutionHandle(WrongReturnCode(jobDescriptor.key.tag, returnCodeAsInt, stderrAsOption), From a8974eebb4ed04b266bae949093f867d40b9da5e Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Fri, 25 Jul 2025 16:44:04 -0400 Subject: [PATCH 2/6] centaur --- .../retry_with_more_memory_assorted_ooms.wdl | 37 +++++++++++++++++++ .../retry_with_more_memory_assorted_ooms.test | 19 ++++++++++ 2 files changed, 56 insertions(+) create mode 100644 centaur/src/main/resources/standardTestCases/retry_with_more_memory/retry_with_more_memory_assorted_ooms.wdl create mode 100644 centaur/src/main/resources/standardTestCases/retry_with_more_memory_assorted_ooms.test diff --git a/centaur/src/main/resources/standardTestCases/retry_with_more_memory/retry_with_more_memory_assorted_ooms.wdl b/centaur/src/main/resources/standardTestCases/retry_with_more_memory/retry_with_more_memory_assorted_ooms.wdl new file mode 100644 index 0000000000..f1183ae531 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/retry_with_more_memory/retry_with_more_memory_assorted_ooms.wdl @@ -0,0 +1,37 @@ +version 1.0 + +workflow retry_with_more_memory_assorted_ooms { + call run_app +} + +# A Java-based task that sets the maximum heap size to 64 GB on a VM that has nowhere near that much memory. This task +# should be OOM killed and retried with more memory. +task run_app { + command <<< + echo "MEM_SIZE=$MEM_SIZE" >&2 + echo "MEM_UNIT=$MEM_UNIT" >&2 + + cat > Mem.java << EOF + class Mem { + public static void main(String[] args) throws Exception { + int gb = (int)Math.pow(2, 30); + System.out.println("Allocating memory..."); + byte[][] byteArr = new byte[32][]; + for (int i = 0; i < byteArr.length; i++) { + byteArr[i] = new byte[gb]; + } + System.out.println("Sleeping a minute..."); + Thread.sleep(60_000); + System.out.printf("Heap size: %,.2f%n", (double)Runtime.getRuntime().totalMemory() / gb); + System.out.println(byteArr.hashCode()); + } + } + EOF + java -Xms64g -Xmx64g Mem.java + >>> + runtime { + docker: "eclipse-temurin:21" + memory: "1 GB" + maxRetries: 1 + } +} \ No newline at end of file diff --git a/centaur/src/main/resources/standardTestCases/retry_with_more_memory_assorted_ooms.test b/centaur/src/main/resources/standardTestCases/retry_with_more_memory_assorted_ooms.test new file mode 100644 index 0000000000..72cb9b8572 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/retry_with_more_memory_assorted_ooms.test @@ -0,0 +1,19 @@ +name: retry_with_more_memory_assorted_ooms +testFormat: workflowfailure +backends: [GCPBATCH] + +files { + workflow: retry_with_more_memory/retry_with_more_memory_assorted_ooms.wdl + options: retry_with_more_memory/retry_with_more_memory_2x.options +} + +metadata { + workflowName: retry_with_more_memory_assorted_ooms + status: Failed + "failures.0.message": "Workflow failed" + "failures.0.causedBy.0.message": "stderr for job `retry_with_more_memory_assorted_ooms.run_app:NA:2` contained one of the `memory-retry-error-keys: [OutOfMemory,Killed]` specified in the Cromwell config. Job might have run out of memory." + "retry_with_more_memory_assorted_ooms.run_app.-1.1.runtimeAttributes.memory": "1 GB" + "retry_with_more_memory_assorted_ooms.run_app.-1.1.executionStatus": "RetryableFailure" + "retry_with_more_memory_assorted_ooms.run_app.-1.2.runtimeAttributes.memory": "2 GB" + "retry_with_more_memory_assorted_ooms.run_app.-1.2.executionStatus": "Failed" +} From 1933d5292670d383cef2acbf8128338cccddf604 Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Fri, 25 Jul 2025 18:09:42 -0400 Subject: [PATCH 3/6] wip --- .../StandardAsyncExecutionActor.scala | 21 +++++++----- .../retry_with_more_memory_assorted_ooms.wdl | 34 +++++++++++++++++-- .../retry_with_more_memory_assorted_ooms.test | 20 ++++++++--- 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index 46c846dff9..ea169b07db 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -96,6 +96,12 @@ trait StandardAsyncExecutionActor val SIGTERM = 143 val SIGINT = 130 val SIGKILL = 137 + // The Variants team have observed 247 exit codes in the wild for what we suspect were OOM-killed jobs, but + // unfortunately we do not yet have a test case that reproduces this. From Gemini: + // + // An exit code of 247, particularly in the context of process execution in Linux or containerized environments like + // Docker, often indicates a process termination due to resource limitations, most commonly insufficient memory (RAM). + val SIGCONTAINERKILL = 247 // `CheckingForMemoryRetry` action exits with code 0 if the stderr file contains keys mentioned in `memory-retry` config. val StderrContainsRetryKeysCode = 0 @@ -965,9 +971,7 @@ trait StandardAsyncExecutionActor */ def isAbort(returnCode: Int): Boolean = returnCode == SIGINT || returnCode == SIGTERM || returnCode == SIGKILL - // 247 return codes have been observed in the wild for OOMKilled jobs during GVS Mars work, but unfortunately we don't - // have a test case to reproduce this yet. - def isOOMKill(returnCode: Int): Boolean = returnCode == SIGKILL || returnCode == 247 + def isOomKill(returnCode: Int): Boolean = returnCode == SIGKILL || returnCode == SIGCONTAINERKILL /** * Custom behavior to run after an abort signal is processed. @@ -1436,8 +1440,9 @@ trait StandardAsyncExecutionActor outOfMemoryDetected <- memoryRetryRC } yield (stderrSize, returnCodeAsString, outOfMemoryDetected) - stderrSizeAndReturnCodeAndMemoryRetry flatMap { case (stderrSize, returnCodeAsString, outOfMemoryDetected) => + stderrSizeAndReturnCodeAndMemoryRetry flatMap { case (stderrSize, returnCodeAsString, stderrOomDetected) => val tryReturnCodeAsInt = Try(returnCodeAsString.trim.toInt) + def oomDetected(rc: Int): Boolean = isOomKill(rc) || stderrOomDetected if (isDone(status)) { tryReturnCodeAsInt match { @@ -1455,7 +1460,7 @@ trait StandardAsyncExecutionActor // an OOM kill. case Success(returnCodeAsInt) if abortRequested && isAbort(returnCodeAsInt) => Future.successful(AbortedExecutionHandle) - case Success(returnCodeAsInt) if (isOOMKill(returnCodeAsInt) || outOfMemoryDetected) && memoryRetryRequested => + case Success(returnCodeAsInt) if oomDetected(returnCodeAsInt) && memoryRetryRequested => val executionHandle = Future.successful( FailedNonRetryableExecutionHandle( RetryWithMoreMemory(jobDescriptor.key.tag, stderrAsOption, memoryRetryErrorKeys, log), @@ -1464,7 +1469,7 @@ trait StandardAsyncExecutionActor ) ) retryElseFail(executionHandle, - MemoryRetryResult(outOfMemoryDetected, memoryRetryFactor, previousMemoryMultiplier) + MemoryRetryResult(oomDetected(returnCodeAsInt), memoryRetryFactor, previousMemoryMultiplier) ) case Success(returnCodeAsInt) => val executionHandle = Future.successful( @@ -1485,7 +1490,7 @@ trait StandardAsyncExecutionActor } else { tryReturnCodeAsInt match { case Success(returnCodeAsInt) - if outOfMemoryDetected && memoryRetryRequested && !continueOnReturnCode.continueFor(returnCodeAsInt) => + if oomDetected(returnCodeAsInt) && memoryRetryRequested && !continueOnReturnCode.continueFor(returnCodeAsInt) => val executionHandle = Future.successful( FailedNonRetryableExecutionHandle( RetryWithMoreMemory(jobDescriptor.key.tag, stderrAsOption, memoryRetryErrorKeys, log), @@ -1494,7 +1499,7 @@ trait StandardAsyncExecutionActor ) ) retryElseFail(executionHandle, - MemoryRetryResult(outOfMemoryDetected, memoryRetryFactor, previousMemoryMultiplier) + MemoryRetryResult(oomDetected(returnCodeAsInt), memoryRetryFactor, previousMemoryMultiplier) ) case _ => val failureStatus = handleExecutionFailure(status, tryReturnCodeAsInt.toOption) diff --git a/centaur/src/main/resources/standardTestCases/retry_with_more_memory/retry_with_more_memory_assorted_ooms.wdl b/centaur/src/main/resources/standardTestCases/retry_with_more_memory/retry_with_more_memory_assorted_ooms.wdl index f1183ae531..2d959a5494 100644 --- a/centaur/src/main/resources/standardTestCases/retry_with_more_memory/retry_with_more_memory_assorted_ooms.wdl +++ b/centaur/src/main/resources/standardTestCases/retry_with_more_memory/retry_with_more_memory_assorted_ooms.wdl @@ -1,12 +1,14 @@ version 1.0 workflow retry_with_more_memory_assorted_ooms { - call run_app + call java_oom_kill + call python_oom_kill + call tail_dev_zero_oom_kill } # A Java-based task that sets the maximum heap size to 64 GB on a VM that has nowhere near that much memory. This task # should be OOM killed and retried with more memory. -task run_app { +task java_oom_kill { command <<< echo "MEM_SIZE=$MEM_SIZE" >&2 echo "MEM_UNIT=$MEM_UNIT" >&2 @@ -34,4 +36,32 @@ task run_app { memory: "1 GB" maxRetries: 1 } +} + + +task python_oom_kill { + command <<< + echo "MEM_SIZE=$MEM_SIZE" >&2 + echo "MEM_UNIT=$MEM_UNIT" >&2 + python3 -c 'print(len([0] * (2**34)))' + >>> + runtime { + docker: "google/cloud-sdk:slim" + memory: "1 GB" + maxRetries: 1 + } +} + + +task tail_dev_zero_oom_kill { + command <<< + echo "MEM_SIZE=$MEM_SIZE" >&2 + echo "MEM_UNIT=$MEM_UNIT" >&2 + tail /dev/zero + >>> + runtime { + docker: "ubuntu:latest" + memory: "1 GB" + maxRetries: 1 + } } \ No newline at end of file diff --git a/centaur/src/main/resources/standardTestCases/retry_with_more_memory_assorted_ooms.test b/centaur/src/main/resources/standardTestCases/retry_with_more_memory_assorted_ooms.test index 72cb9b8572..8724a8f00f 100644 --- a/centaur/src/main/resources/standardTestCases/retry_with_more_memory_assorted_ooms.test +++ b/centaur/src/main/resources/standardTestCases/retry_with_more_memory_assorted_ooms.test @@ -11,9 +11,19 @@ metadata { workflowName: retry_with_more_memory_assorted_ooms status: Failed "failures.0.message": "Workflow failed" - "failures.0.causedBy.0.message": "stderr for job `retry_with_more_memory_assorted_ooms.run_app:NA:2` contained one of the `memory-retry-error-keys: [OutOfMemory,Killed]` specified in the Cromwell config. Job might have run out of memory." - "retry_with_more_memory_assorted_ooms.run_app.-1.1.runtimeAttributes.memory": "1 GB" - "retry_with_more_memory_assorted_ooms.run_app.-1.1.executionStatus": "RetryableFailure" - "retry_with_more_memory_assorted_ooms.run_app.-1.2.runtimeAttributes.memory": "2 GB" - "retry_with_more_memory_assorted_ooms.run_app.-1.2.executionStatus": "Failed" + + "retry_with_more_memory_assorted_ooms.java_oom_kill.-1.1.runtimeAttributes.memory": "1 GB" + "retry_with_more_memory_assorted_ooms.java_oom_kill.-1.1.executionStatus": "RetryableFailure" + "retry_with_more_memory_assorted_ooms.java_oom_kill.-1.2.runtimeAttributes.memory": "2 GB" + "retry_with_more_memory_assorted_ooms.java_oom_kill.-1.2.executionStatus": "Failed" + + "retry_with_more_memory_assorted_ooms.python_oom_kill.-1.1.runtimeAttributes.memory": "1 GB" + "retry_with_more_memory_assorted_ooms.python_oom_kill.-1.1.executionStatus": "RetryableFailure" + "retry_with_more_memory_assorted_ooms.python_oom_kill.-1.2.runtimeAttributes.memory": "2 GB" + "retry_with_more_memory_assorted_ooms.python_oom_kill.-1.2.executionStatus": "Failed" + + "retry_with_more_memory_assorted_ooms.tail_dev_zero_oom_kill.-1.1.runtimeAttributes.memory": "1 GB" + "retry_with_more_memory_assorted_ooms.tail_dev_zero_oom_kill.-1.1.executionStatus": "RetryableFailure" + "retry_with_more_memory_assorted_ooms.tail_dev_zero_oom_kill.-1.2.runtimeAttributes.memory": "2 GB" + "retry_with_more_memory_assorted_ooms.tail_dev_zero_oom_kill.-1.2.executionStatus": "Failed" } From a0757884be013f1183996ca378ff2ade3b98f4ee Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Fri, 25 Jul 2025 19:09:44 -0400 Subject: [PATCH 4/6] cleanup --- .../backend/standard/StandardAsyncExecutionActor.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index ea169b07db..5bac82979d 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -96,9 +96,7 @@ trait StandardAsyncExecutionActor val SIGTERM = 143 val SIGINT = 130 val SIGKILL = 137 - // The Variants team have observed 247 exit codes in the wild for what we suspect were OOM-killed jobs, but - // unfortunately we do not yet have a test case that reproduces this. From Gemini: - // + // From Gemini: // An exit code of 247, particularly in the context of process execution in Linux or containerized environments like // Docker, often indicates a process termination due to resource limitations, most commonly insufficient memory (RAM). val SIGCONTAINERKILL = 247 @@ -1490,7 +1488,9 @@ trait StandardAsyncExecutionActor } else { tryReturnCodeAsInt match { case Success(returnCodeAsInt) - if oomDetected(returnCodeAsInt) && memoryRetryRequested && !continueOnReturnCode.continueFor(returnCodeAsInt) => + if oomDetected(returnCodeAsInt) && memoryRetryRequested && !continueOnReturnCode.continueFor( + returnCodeAsInt + ) => val executionHandle = Future.successful( FailedNonRetryableExecutionHandle( RetryWithMoreMemory(jobDescriptor.key.tag, stderrAsOption, memoryRetryErrorKeys, log), From 095b864bb2f68f36e51efc3bfcd02b9b773418f1 Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Mon, 28 Jul 2025 09:55:21 -0400 Subject: [PATCH 5/6] cleanup --- .../backend/standard/StandardAsyncExecutionActor.scala | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index 5bac82979d..e8b0f787ec 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -1222,6 +1222,7 @@ trait StandardAsyncExecutionActor // See executeOrRecoverSuccess private var missedAbort = false + // Records whether abort has been requested for this job. private var abortRequested = false private case class CheckMissedAbort(jobId: StandardAsyncJob) @@ -1249,7 +1250,9 @@ trait StandardAsyncExecutionActor val executeOrRecoverFuture = mode match { case Reconnect(jobId: StandardAsyncJob @unchecked) => reconnectAsync(jobId) - case ReconnectToAbort(jobId: StandardAsyncJob @unchecked) => reconnectToAbortAsync(jobId) + case ReconnectToAbort(jobId: StandardAsyncJob @unchecked) => + abortRequested = true + reconnectToAbortAsync(jobId) case Recover(jobId: StandardAsyncJob @unchecked) => recoverAsync(jobId) case _ => tellMetadata(startMetadataKeyValues) @@ -1454,8 +1457,8 @@ trait StandardAsyncExecutionActor retryElseFail(executionHandle) case Success(returnCodeAsInt) if continueOnReturnCode.continueFor(returnCodeAsInt) => handleExecutionSuccess(status, oldHandle, returnCodeAsInt) - // Check abort first, but only if abort was requested. There could be a SIGKILL rc (137) for either abort or - // an OOM kill. + // A job can receive a SIGKILL (137) if it was aborted or OOM killed. Abort must have been requested for this + // to actually be an abort. case Success(returnCodeAsInt) if abortRequested && isAbort(returnCodeAsInt) => Future.successful(AbortedExecutionHandle) case Success(returnCodeAsInt) if oomDetected(returnCodeAsInt) && memoryRetryRequested => From 284782b4c42331f6d652f8f04f818713bbd91815 Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Mon, 28 Jul 2025 12:23:30 -0400 Subject: [PATCH 6/6] cleanup --- .../standard/StandardAsyncExecutionActor.scala | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index e8b0f787ec..9861456731 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -967,9 +967,9 @@ trait StandardAsyncExecutionActor * @param returnCode The return code. * @return True if the return code is for an abort. */ - def isAbort(returnCode: Int): Boolean = returnCode == SIGINT || returnCode == SIGTERM || returnCode == SIGKILL + def isAbortRc(returnCode: Int): Boolean = returnCode == SIGINT || returnCode == SIGTERM || returnCode == SIGKILL - def isOomKill(returnCode: Int): Boolean = returnCode == SIGKILL || returnCode == SIGCONTAINERKILL + def isOomKillRc(returnCode: Int): Boolean = returnCode == SIGKILL || returnCode == SIGCONTAINERKILL /** * Custom behavior to run after an abort signal is processed. @@ -1441,9 +1441,9 @@ trait StandardAsyncExecutionActor outOfMemoryDetected <- memoryRetryRC } yield (stderrSize, returnCodeAsString, outOfMemoryDetected) - stderrSizeAndReturnCodeAndMemoryRetry flatMap { case (stderrSize, returnCodeAsString, stderrOomDetected) => + stderrSizeAndReturnCodeAndMemoryRetry flatMap { case (stderrSize, returnCodeAsString, isStderrOomDetected) => val tryReturnCodeAsInt = Try(returnCodeAsString.trim.toInt) - def oomDetected(rc: Int): Boolean = isOomKill(rc) || stderrOomDetected + def isOomDetected(rc: Int): Boolean = isOomKillRc(rc) || isStderrOomDetected if (isDone(status)) { tryReturnCodeAsInt match { @@ -1459,9 +1459,9 @@ trait StandardAsyncExecutionActor handleExecutionSuccess(status, oldHandle, returnCodeAsInt) // A job can receive a SIGKILL (137) if it was aborted or OOM killed. Abort must have been requested for this // to actually be an abort. - case Success(returnCodeAsInt) if abortRequested && isAbort(returnCodeAsInt) => + case Success(returnCodeAsInt) if abortRequested && isAbortRc(returnCodeAsInt) => Future.successful(AbortedExecutionHandle) - case Success(returnCodeAsInt) if oomDetected(returnCodeAsInt) && memoryRetryRequested => + case Success(returnCodeAsInt) if memoryRetryRequested && isOomDetected(returnCodeAsInt) => val executionHandle = Future.successful( FailedNonRetryableExecutionHandle( RetryWithMoreMemory(jobDescriptor.key.tag, stderrAsOption, memoryRetryErrorKeys, log), @@ -1470,7 +1470,7 @@ trait StandardAsyncExecutionActor ) ) retryElseFail(executionHandle, - MemoryRetryResult(oomDetected(returnCodeAsInt), memoryRetryFactor, previousMemoryMultiplier) + MemoryRetryResult(isOomDetected(returnCodeAsInt), memoryRetryFactor, previousMemoryMultiplier) ) case Success(returnCodeAsInt) => val executionHandle = Future.successful( @@ -1491,7 +1491,7 @@ trait StandardAsyncExecutionActor } else { tryReturnCodeAsInt match { case Success(returnCodeAsInt) - if oomDetected(returnCodeAsInt) && memoryRetryRequested && !continueOnReturnCode.continueFor( + if isOomDetected(returnCodeAsInt) && memoryRetryRequested && !continueOnReturnCode.continueFor( returnCodeAsInt ) => val executionHandle = Future.successful( @@ -1502,7 +1502,7 @@ trait StandardAsyncExecutionActor ) ) retryElseFail(executionHandle, - MemoryRetryResult(oomDetected(returnCodeAsInt), memoryRetryFactor, previousMemoryMultiplier) + MemoryRetryResult(isOomDetected(returnCodeAsInt), memoryRetryFactor, previousMemoryMultiplier) ) case _ => val failureStatus = handleExecutionFailure(status, tryReturnCodeAsInt.toOption)