From ec9d278001739dbc7ad86c5aa726668985828971 Mon Sep 17 00:00:00 2001 From: Aitor Miguel Blanco Date: Thu, 21 May 2020 02:10:06 +0000 Subject: [PATCH 1/2] Added tool to request transition to ErrorState Signed-off-by: Aitor Miguel Blanco --- .../include/nav2_util/lifecycle_utils.hpp | 27 ++++++++ nav2_util/src/lifecycle_utils.cpp | 62 +++++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/nav2_util/include/nav2_util/lifecycle_utils.hpp b/nav2_util/include/nav2_util/lifecycle_utils.hpp index 91517753d49..b0b374cec67 100644 --- a/nav2_util/include/nav2_util/lifecycle_utils.hpp +++ b/nav2_util/include/nav2_util/lifecycle_utils.hpp @@ -77,6 +77,33 @@ void reset_lifecycle_nodes( reset_lifecycle_nodes(split(nodes, ':'), service_call_timeout, retries); } +/// Transition the given lifecycle nodes to the ErrorProcessing state in order +/** At this time, service calls frequently hang for unknown reasons. The only + * way to combat that is to timeout the service call and retry it. To use this + * function, estimate how long your nodes should take to at each transition and + * set your timeout accordingly. + * \param[in] node_names A vector of the fully qualified node names to reset. + * \param[in] service_call_timeout The maximum amount of time to wait for a + * service call. + * \param[in] retries The number of times to try a state transition service call + */ +void process_error_lifecycle_nodes( + const std::vector & node_names, + const std::chrono::seconds service_call_timeout = std::chrono::seconds::max(), + const int retries = 3); + +/// Transition the given lifecycle nodes to the ErrorProcessing state in order. +/** + * \param[in] nodes A ':' seperated list of node names. eg. "/node1:/node2" + */ +void process_error_lifecycle_nodes( + const std::string & nodes, + const std::chrono::seconds service_call_timeout = std::chrono::seconds::max(), + const int retries = 3) +{ + process_error_lifecycle_nodes(split(nodes, ':'), service_call_timeout, retries); +} + } // namespace nav2_util #endif // NAV2_UTIL__LIFECYCLE_UTILS_HPP_ diff --git a/nav2_util/src/lifecycle_utils.cpp b/nav2_util/src/lifecycle_utils.cpp index c778e2abdf7..59d7b59152d 100644 --- a/nav2_util/src/lifecycle_utils.cpp +++ b/nav2_util/src/lifecycle_utils.cpp @@ -23,6 +23,7 @@ using std::string; using lifecycle_msgs::msg::Transition; +using lifecycle_msgs::msg::State; namespace nav2_util { @@ -98,4 +99,65 @@ void reset_lifecycle_nodes( } } +static void processErrorLifecycleNode( + const std::string & node_name, + const std::chrono::seconds service_call_timeout, + const int retries) +{ + LifecycleServiceClient sc(node_name); + uint8_t transition; + // Each state of the lifecycle node has a specific transition + // to the ErrorProcessing state. Some states do not support transitions + // to ErrorProcessing. + switch (sc.get_state(service_call_timeout)) { + case State::PRIMARY_STATE_UNKNOWN: + return; + case State::PRIMARY_STATE_UNCONFIGURED: + return; + case State::PRIMARY_STATE_INACTIVE: + return; + case State::PRIMARY_STATE_ACTIVE: + return; + case State::PRIMARY_STATE_FINALIZED: + return; + case State::TRANSITION_STATE_CONFIGURING: + transition = Transition::TRANSITION_ON_CONFIGURE_ERROR; + break; + case State::TRANSITION_STATE_CLEANINGUP: + transition = Transition::TRANSITION_ON_CLEANUP_ERROR; + break; + case State::TRANSITION_STATE_SHUTTINGDOWN: + transition = Transition::TRANSITION_ON_SHUTDOWN_ERROR; + break; + case State::TRANSITION_STATE_ACTIVATING: + transition = Transition::TRANSITION_ON_ACTIVATE_ERROR; + break; + case State::TRANSITION_STATE_DEACTIVATING: + transition = Transition::TRANSITION_ON_DEACTIVATE_ERROR; + break; + case State::TRANSITION_STATE_ERRORPROCESSING: + transition = Transition::TRANSITION_ON_ERROR_ERROR; + break; + default: + return; + } + + // Despite waiting for the service to be available and using reliable transport + // service calls still frequently hang. To get reliable reset it's necessary + // to timeout the service call and retry it when that happens. + RETRY( + sc.change_state(transition, service_call_timeout), + retries); +} + +void process_error_lifecycle_nodes( + const std::vector & node_names, + const std::chrono::seconds service_call_timeout, + const int retries) +{ + for (const auto & node_name : node_names) { + processErrorLifecycleNode(node_name, service_call_timeout, retries); + } +} + } // namespace nav2_util From 03a84705f02f36dc5c3dcaecc23dcc6e2713d21d Mon Sep 17 00:00:00 2001 From: Aitor Miguel Blanco Date: Tue, 9 Jun 2020 05:51:56 +0000 Subject: [PATCH 2/2] Added exception for invalid states and test for the tool. Signed-off-by: Aitor Miguel Blanco --- nav2_util/src/lifecycle_utils.cpp | 15 ++++----------- nav2_util/test/test_lifecycle_utils.cpp | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/nav2_util/src/lifecycle_utils.cpp b/nav2_util/src/lifecycle_utils.cpp index 59d7b59152d..b324fafe3a0 100644 --- a/nav2_util/src/lifecycle_utils.cpp +++ b/nav2_util/src/lifecycle_utils.cpp @@ -99,6 +99,9 @@ void reset_lifecycle_nodes( } } +// TODO(gimait): This should be updated with the missing transitions to error after +// PRs ros2/rcl_interfaces#97, ros2/rcl#618 and ros2/rclcpp#1064 are merged. +// This tool should not be used for until then. static void processErrorLifecycleNode( const std::string & node_name, const std::chrono::seconds service_call_timeout, @@ -110,16 +113,6 @@ static void processErrorLifecycleNode( // to the ErrorProcessing state. Some states do not support transitions // to ErrorProcessing. switch (sc.get_state(service_call_timeout)) { - case State::PRIMARY_STATE_UNKNOWN: - return; - case State::PRIMARY_STATE_UNCONFIGURED: - return; - case State::PRIMARY_STATE_INACTIVE: - return; - case State::PRIMARY_STATE_ACTIVE: - return; - case State::PRIMARY_STATE_FINALIZED: - return; case State::TRANSITION_STATE_CONFIGURING: transition = Transition::TRANSITION_ON_CONFIGURE_ERROR; break; @@ -139,7 +132,7 @@ static void processErrorLifecycleNode( transition = Transition::TRANSITION_ON_ERROR_ERROR; break; default: - return; + throw std::runtime_error("Invalid state."); } // Despite waiting for the service to be available and using reliable transport diff --git a/nav2_util/test/test_lifecycle_utils.cpp b/nav2_util/test/test_lifecycle_utils.cpp index f8724ccf1c2..8b4b0e3fe43 100644 --- a/nav2_util/test/test_lifecycle_utils.cpp +++ b/nav2_util/test/test_lifecycle_utils.cpp @@ -22,6 +22,7 @@ using nav2_util::startup_lifecycle_nodes; using nav2_util::reset_lifecycle_nodes; +using nav2_util::process_error_lifecycle_nodes; class RclCppFixture { @@ -58,3 +59,22 @@ TEST(Lifecycle, interface) node_thread.join(); SUCCEED(); } + +TEST(Lifecycle, error_transition) +{ + std::vector nodes; + nodes.push_back(rclcpp_lifecycle::LifecycleNode::make_shared("foo")); + nodes.push_back(rclcpp_lifecycle::LifecycleNode::make_shared("bar")); + + std::atomic done(false); + std::thread node_thread(SpinNodesUntilDone, nodes, &done); + startup_lifecycle_nodes("/foo:/bar"); + + // At the moment, the lifecycle transitions to ErrorProcessing are not exposed + // to client nodes, so process_error_lifecycle_nodes will always return an exception. + EXPECT_THROW(process_error_lifecycle_nodes("/foo:/bar"), std::runtime_error); + + done = true; + node_thread.join(); + SUCCEED(); +}