aws · hgreebe · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 29, 2025
@@ -18,6 +18,9 @@ ignore =
     W503,
     # N818: exception name should be named with an Error suffix
     N818
+    # B042: Exception class with `__init__` should pass all args to `super().__init__()` in order to work with `copy.copy()`.
+    # Affected by false positive, https://github.com/PyCQA/flake8-bugbear/issues/525
+    B042
 exclude =
     .tox,
     .git,

@@ -3,6 +3,12 @@ aws-parallelcluster-node CHANGELOG
 
 This file is used to list changes made in each version of the aws-parallelcluster-node package.
 
+3.15.0
+------
+
+**CHANGES**
+- Direct users to slurm_resume log to see EC2 error codes if no instances are launched.
+
 3.14.0
 ------
 

@@ -1262,7 +1262,8 @@ def _reset_timeout_expired_compute_resources(
             return
         log.info(
             "The following compute resources are in down state due to insufficient capacity: %s, "
-            "compute resources will be reset after insufficient capacity timeout (%s seconds) expired",
+            "compute resources will be reset after insufficient capacity timeout (%s seconds) expired. "
+            "Check the slurm_resume log for EC2 error codes.",
             self._insufficient_capacity_compute_resources,
             self._config.insufficient_capacity_timeout,
         )

@@ -227,7 +227,11 @@ def _resume(arg_nodes, resume_config, slurm_resume):
             print_with_count(failed_nodes),
         )
         for error_code, node_list in instance_manager.failed_nodes.items():
-            _handle_failed_nodes(node_list, reason=f"(Code:{error_code})Failure when resuming nodes")
+            _handle_failed_nodes(
+                node_list,
+                reason=f"(Code:{error_code})Failure when resuming nodes - "
+                f"Check the slurm_resume log for EC2 error codes",
+            )
 
         event_publisher = ClusterEventPublisher.create_with_default_publisher(
             event_logger,

@@ -3533,6 +3533,13 @@ def test_reset_timeout_expired_compute_resources(
     assert_that(cluster_manager._insufficient_capacity_compute_resources).is_equal_to(
         expected_insufficient_capacity_compute_resources
     )
+
+    if expected_insufficient_capacity_compute_resources:
+        assert (
+            "compute resources will be reset after insufficient capacity timeout (20 seconds) expired. "
+            "Check the slurm_resume log for EC2 error codes."
+        ) in caplog.text
+
     if expected_power_save_node_list:
         power_save_mock.assert_called_with(
             expected_power_save_node_list, reason="Enabling node since insufficient capacity timeout expired"

@@ -448,7 +448,11 @@ def test_resume_launch(
         if expected_failed_nodes:
             for error_code, nodeset in expected_failed_nodes.items():
                 mock_handle_failed_nodes_calls.append(
-                    call(nodeset, reason=f"(Code:{error_code})Failure when resuming nodes")
+                    call(
+                        nodeset,
+                        reason=f"(Code:{error_code})Failure when resuming nodes - "
+                        f"Check the slurm_resume log for EC2 error codes",
+                    )
                 )
             mock_handle_failed_nodes.assert_has_calls(mock_handle_failed_nodes_calls)
             mock_terminate_instances.assert_called_with(ANY, mock_resume_config.terminate_max_batch_size)
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,6 +3,12 @@ aws-parallelcluster-node CHANGELOG @@
     This file is used to list changes made in each version of the aws-parallelcluster-node package.
+.15.0
+    ------
+    **CHANGES**
+    - Direct users to slurm_resume log to see EC2 error codes if no instances are launched.
 .14.0
     ------
@@ Expand Down @@