From 02b1a18c1130c7913547a586ee0e4501ad633d4c Mon Sep 17 00:00:00 2001 From: Chris Lumens Date: Mon, 6 Jan 2025 12:59:40 -0500 Subject: [PATCH 1/8] Refactor: libcrmservice: systemd_init should return a bool. --- lib/services/systemd.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/services/systemd.c b/lib/services/systemd.c index 7de850b2c05..b1ab9ea49c2 100644 --- a/lib/services/systemd.c +++ b/lib/services/systemd.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -133,7 +134,7 @@ systemd_call_simple_method(const char *method) return reply; } -static gboolean +static bool systemd_init(void) { static int need_init = 1; @@ -152,9 +153,9 @@ systemd_init(void) systemd_proxy = pcmk_dbus_connect(); } if (systemd_proxy == NULL) { - return FALSE; + return false; } - return TRUE; + return true; } static inline char * @@ -548,7 +549,7 @@ systemd_unit_listall(void) DBusMessageIter elem; DBusMessage *reply = NULL; - if (systemd_init() == FALSE) { + if (!systemd_init()) { return NULL; } From 9138d96d027b42e95aa93b837479f3040158b95f Mon Sep 17 00:00:00 2001 From: Chris Lumens Date: Tue, 7 Jan 2025 11:55:58 -0500 Subject: [PATCH 2/8] Refactor: daemons: Fix whitespace problems in execd_commands.c. Most importantly, replace a tab with spaces so the else blocks line up. --- daemons/execd/execd_commands.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index 4506effceba..03cde196fde 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -883,7 +883,7 @@ action_complete(svc_action_t * action) goagain = true; cmd->real_action = cmd->action; cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR); - } else if (cmd->real_action != NULL) { + } else if (cmd->real_action != NULL) { // This is follow-up monitor to check whether start/stop/probe(monitor) completed if (cmd->result.execution_status == PCMK_EXEC_PENDING) { goagain = true; @@ -917,8 +917,8 @@ action_complete(svc_action_t * action) } } } - } else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) && - (cmd->interval_ms > 0)) { + } else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) + && (cmd->interval_ms > 0)) { /* For monitors, excluding follow-up monitors, */ /* if the pending state persists from the first notification until its timeout, */ /* it will be treated as a timeout. */ From 9151b51c50b16608df0b11102f047f2766841911 Mon Sep 17 00:00:00 2001 From: Chris Lumens Date: Mon, 13 Jan 2025 14:34:10 -0500 Subject: [PATCH 3/8] Refactor: daemons: Get rid of an unnecessary #endif/#ifdef. If you scroll up far enough, you'll see that the #endif closes out the same #ifdef that is immediately used again. --- daemons/execd/execd_commands.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index 03cde196fde..923cff18345 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -940,9 +940,7 @@ action_complete(svc_action_t * action) } } } -#endif -#ifdef PCMK__TIME_USE_CGT if (goagain) { int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); int timeout_left = cmd->timeout_orig - time_sum; From 96e9f97e4926b126ebe731270c456a5dc6935290 Mon Sep 17 00:00:00 2001 From: Chris Lumens Date: Mon, 13 Jan 2025 14:36:52 -0500 Subject: [PATCH 4/8] Refactor: daemons: Unindent a block of code in action_complete. First, invert the test to see if this is a systemd resource. If it's not, there's no way goagain was set so we can just bail out to the end of the function. Then having done that, everything that was in the block can be unindented. No other code changes. --- daemons/execd/execd_commands.c | 141 +++++++++++++++++---------------- 1 file changed, 72 insertions(+), 69 deletions(-) diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index 923cff18345..8582f9b8a6d 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -862,81 +862,83 @@ action_complete(svc_action_t * action) #endif } - if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { - if (pcmk__result_ok(&(cmd->result)) - && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START, - PCMK_ACTION_STOP, NULL)) { - /* systemd returns from start and stop actions after the action - * begins, not after it completes. We have to jump through a few - * hoops so that we don't report 'complete' to the rest of pacemaker - * until it's actually done. - */ + if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { + goto finalize; + } + + if (pcmk__result_ok(&(cmd->result)) + && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START, + PCMK_ACTION_STOP, NULL)) { + /* systemd returns from start and stop actions after the action + * begins, not after it completes. We have to jump through a few + * hoops so that we don't report 'complete' to the rest of pacemaker + * until it's actually done. + */ + goagain = true; + cmd->real_action = cmd->action; + cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR); + + } else if (cmd->result.execution_status == PCMK_EXEC_PENDING && + pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) && + cmd->interval_ms == 0 && + cmd->real_action == NULL) { + /* If the state is Pending at the time of probe, execute follow-up monitor. */ + goagain = true; + cmd->real_action = cmd->action; + cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR); + } else if (cmd->real_action != NULL) { + // This is follow-up monitor to check whether start/stop/probe(monitor) completed + if (cmd->result.execution_status == PCMK_EXEC_PENDING) { goagain = true; - cmd->real_action = cmd->action; - cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR); - - } else if (cmd->result.execution_status == PCMK_EXEC_PENDING && - pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) && - cmd->interval_ms == 0 && - cmd->real_action == NULL) { - /* If the state is Pending at the time of probe, execute follow-up monitor. */ + + } else if (pcmk__result_ok(&(cmd->result)) + && pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP, + pcmk__str_casei)) { goagain = true; - cmd->real_action = cmd->action; - cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR); - } else if (cmd->real_action != NULL) { - // This is follow-up monitor to check whether start/stop/probe(monitor) completed - if (cmd->result.execution_status == PCMK_EXEC_PENDING) { - goagain = true; - - } else if (pcmk__result_ok(&(cmd->result)) - && pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP, - pcmk__str_casei)) { - goagain = true; - } else { - int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); - int timeout_left = cmd->timeout_orig - time_sum; - - crm_debug("%s systemd %s is now complete (elapsed=%dms, " - "remaining=%dms): %s (%d)", - cmd->rsc_id, cmd->real_action, time_sum, timeout_left, - crm_exit_str(cmd->result.exit_status), - cmd->result.exit_status); - cmd_original_times(cmd); + } else { + int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); + int timeout_left = cmd->timeout_orig - time_sum; + + crm_debug("%s systemd %s is now complete (elapsed=%dms, " + "remaining=%dms): %s (%d)", + cmd->rsc_id, cmd->real_action, time_sum, timeout_left, + crm_exit_str(cmd->result.exit_status), + cmd->result.exit_status); + cmd_original_times(cmd); + + // Monitors may return "not running", but start/stop shouldn't + if ((cmd->result.execution_status == PCMK_EXEC_DONE) + && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) { - // Monitors may return "not running", but start/stop shouldn't - if ((cmd->result.execution_status == PCMK_EXEC_DONE) - && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) { - - if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_START, - pcmk__str_casei)) { - cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR; - } else if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP, - pcmk__str_casei)) { - cmd->result.exit_status = PCMK_OCF_OK; - } + if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_START, + pcmk__str_casei)) { + cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR; + } else if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP, + pcmk__str_casei)) { + cmd->result.exit_status = PCMK_OCF_OK; } } - } else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) - && (cmd->interval_ms > 0)) { - /* For monitors, excluding follow-up monitors, */ - /* if the pending state persists from the first notification until its timeout, */ - /* it will be treated as a timeout. */ - - if ((cmd->result.execution_status == PCMK_EXEC_PENDING) && - (cmd->last_notify_op_status == PCMK_EXEC_PENDING)) { - int time_left = time(NULL) - (cmd->epoch_rcchange + (cmd->timeout_orig/1000)); - - if (time_left >= 0) { - crm_notice("Giving up on %s %s (rc=%d): monitor pending timeout (first pending notification=%s timeout=%ds)", - cmd->rsc_id, cmd->action, - cmd->result.exit_status, pcmk__trim(ctime(&cmd->epoch_rcchange)), cmd->timeout_orig); - pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, - PCMK_EXEC_TIMEOUT, - "Investigate reason for timeout, and adjust " - "configured operation timeout if necessary"); - cmd_original_times(cmd); - } + } + } else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) + && (cmd->interval_ms > 0)) { + /* For monitors, excluding follow-up monitors, */ + /* if the pending state persists from the first notification until its timeout, */ + /* it will be treated as a timeout. */ + + if ((cmd->result.execution_status == PCMK_EXEC_PENDING) && + (cmd->last_notify_op_status == PCMK_EXEC_PENDING)) { + int time_left = time(NULL) - (cmd->epoch_rcchange + (cmd->timeout_orig/1000)); + + if (time_left >= 0) { + crm_notice("Giving up on %s %s (rc=%d): monitor pending timeout (first pending notification=%s timeout=%ds)", + cmd->rsc_id, cmd->action, + cmd->result.exit_status, pcmk__trim(ctime(&cmd->epoch_rcchange)), cmd->timeout_orig); + pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_TIMEOUT, + "Investigate reason for timeout, and adjust " + "configured operation timeout if necessary"); + cmd_original_times(cmd); } } } @@ -996,6 +998,7 @@ action_complete(svc_action_t * action) } #endif +finalize: pcmk__set_result_output(&(cmd->result), services__grab_stdout(action), services__grab_stderr(action)); cmd_finalize(cmd, rsc); From 7874bce94ba8b33b734a8edde41c0a6800c771c7 Mon Sep 17 00:00:00 2001 From: Chris Lumens Date: Mon, 13 Jan 2025 14:40:16 -0500 Subject: [PATCH 5/8] Refactor: daemons: Unindent the goagain block in action_complete. If goagain is not set, just bail to the end of the function. Having done that, everything that was in the previous block can be unindented. I think this is a little more legible. No other code changes. --- daemons/execd/execd_commands.c | 95 ++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 45 deletions(-) diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index 8582f9b8a6d..cdc5b39032e 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -828,6 +828,9 @@ action_complete(svc_action_t * action) #ifdef PCMK__TIME_USE_CGT const char *rclass = NULL; bool goagain = false; + int time_sum = 0; + int timeout_left = 0; + int delay = 0; #endif if (!cmd) { @@ -943,58 +946,60 @@ action_complete(svc_action_t * action) } } - if (goagain) { - int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); - int timeout_left = cmd->timeout_orig - time_sum; - int delay = cmd->timeout_orig / 10; + if (!goagain) { + goto finalize; + } - if(delay >= timeout_left && timeout_left > 20) { - delay = timeout_left/2; - } + time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); + timeout_left = cmd->timeout_orig - time_sum; + delay = cmd->timeout_orig / 10; - delay = QB_MIN(2000, delay); - if (delay < timeout_left) { - cmd->start_delay = delay; - cmd->timeout = timeout_left; - - if (pcmk__result_ok(&(cmd->result))) { - crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", - cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); - - } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) { - crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", - cmd->rsc_id, cmd->action, time_sum, timeout_left, delay); - - } else { - crm_notice("%s %s failed: %s: Re-scheduling (remaining " - "timeout %s) " QB_XS - " exitstatus=%d elapsed=%dms start_delay=%dms)", - cmd->rsc_id, cmd->action, - crm_exit_str(cmd->result.exit_status), - pcmk__readable_interval(timeout_left), - cmd->result.exit_status, time_sum, delay); - } + if (delay >= timeout_left && timeout_left > 20) { + delay = timeout_left/2; + } - cmd_reset(cmd); - if(rsc) { - rsc->active = NULL; - } - schedule_lrmd_cmd(rsc, cmd); + delay = QB_MIN(2000, delay); + if (delay < timeout_left) { + cmd->start_delay = delay; + cmd->timeout = timeout_left; - /* Don't finalize cmd, we're not done with it yet */ - return; + if (pcmk__result_ok(&(cmd->result))) { + crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", + cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); + + } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) { + crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", + cmd->rsc_id, cmd->action, time_sum, timeout_left, delay); } else { - crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)", - cmd->rsc_id, - (cmd->real_action? cmd->real_action : cmd->action), - cmd->result.exit_status, time_sum, timeout_left); - pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, - PCMK_EXEC_TIMEOUT, - "Investigate reason for timeout, and adjust " - "configured operation timeout if necessary"); - cmd_original_times(cmd); + crm_notice("%s %s failed: %s: Re-scheduling (remaining " + "timeout %s) " QB_XS + " exitstatus=%d elapsed=%dms start_delay=%dms)", + cmd->rsc_id, cmd->action, + crm_exit_str(cmd->result.exit_status), + pcmk__readable_interval(timeout_left), + cmd->result.exit_status, time_sum, delay); } + + cmd_reset(cmd); + if (rsc) { + rsc->active = NULL; + } + schedule_lrmd_cmd(rsc, cmd); + + /* Don't finalize cmd, we're not done with it yet */ + return; + + } else { + crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)", + cmd->rsc_id, + (cmd->real_action? cmd->real_action : cmd->action), + cmd->result.exit_status, time_sum, timeout_left); + pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_TIMEOUT, + "Investigate reason for timeout, and adjust " + "configured operation timeout if necessary"); + cmd_original_times(cmd); } #endif From 96e9bff3ff7b140c5fb208389b50f852039f3a16 Mon Sep 17 00:00:00 2001 From: Chris Lumens Date: Mon, 3 Feb 2025 12:28:27 -0500 Subject: [PATCH 6/8] Refactor: daemons: Improve whitespace in action_complete. --- daemons/execd/execd_commands.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index cdc5b39032e..d6325745698 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -934,13 +934,14 @@ action_complete(svc_action_t * action) int time_left = time(NULL) - (cmd->epoch_rcchange + (cmd->timeout_orig/1000)); if (time_left >= 0) { - crm_notice("Giving up on %s %s (rc=%d): monitor pending timeout (first pending notification=%s timeout=%ds)", - cmd->rsc_id, cmd->action, - cmd->result.exit_status, pcmk__trim(ctime(&cmd->epoch_rcchange)), cmd->timeout_orig); + crm_notice("Giving up on %s %s (rc=%d): monitor pending timeout " + "(first pending notification=%s timeout=%ds)", + cmd->rsc_id, cmd->action, cmd->result.exit_status, + pcmk__trim(ctime(&cmd->epoch_rcchange)), cmd->timeout_orig); pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, - PCMK_EXEC_TIMEOUT, - "Investigate reason for timeout, and adjust " - "configured operation timeout if necessary"); + PCMK_EXEC_TIMEOUT, + "Investigate reason for timeout, and adjust " + "configured operation timeout if necessary"); cmd_original_times(cmd); } } From 51a93e7716566f78ba6f3b8fda34d0547e49d449 Mon Sep 17 00:00:00 2001 From: Chris Lumens Date: Mon, 3 Feb 2025 12:25:30 -0500 Subject: [PATCH 7/8] Low: libcrmservices: Don't leak msg if systemd_proxy is NULL. --- lib/services/systemd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/services/systemd.c b/lib/services/systemd.c index b1ab9ea49c2..282e588fdd1 100644 --- a/lib/services/systemd.c +++ b/lib/services/systemd.c @@ -104,13 +104,15 @@ systemd_send_recv(DBusMessage *msg, DBusError *error, int timeout) static DBusMessage * systemd_call_simple_method(const char *method) { - DBusMessage *msg = systemd_new_method(method); + DBusMessage *msg = NULL; DBusMessage *reply = NULL; DBusError error; /* Don't call systemd_init() here, because that calls this */ CRM_CHECK(systemd_proxy, return NULL); + msg = systemd_new_method(method); + if (msg == NULL) { crm_err("Could not create message to send %s to systemd", method); return NULL; From 4a4e721520a7c72afc42ab2ffa944327edab7325 Mon Sep 17 00:00:00 2001 From: Chris Lumens Date: Mon, 3 Feb 2025 12:46:41 -0500 Subject: [PATCH 8/8] Refactor: libcrmservices: Unref the dbus connection... ...when we disconnect from the bus. We aren't allowed to close the connection since we acquired it with dbus_bus_get which makes it a shared connection. So, this is the best cleanup we can do. --- lib/services/dbus.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/services/dbus.c b/lib/services/dbus.c index 8befef8d2eb..9033d3cea64 100644 --- a/lib/services/dbus.c +++ b/lib/services/dbus.c @@ -294,12 +294,12 @@ pcmk_dbus_connect(void) void pcmk_dbus_disconnect(DBusConnection *connection) { - /* Per the DBus documentation, connections created with - * dbus_connection_open() are owned by libdbus and should never be closed. - * - * @TODO Should we call dbus_connection_unref() here? + /* We acquire our dbus connection with dbus_bus_get(), which makes it a + * shared connection. Therefore, we can't close or free it here. The + * best we can do is decrement the reference count so dbus knows when + * there are no more clients connected to it. */ - return; + dbus_connection_unref(connection); } // Custom DBus error names to use