Skip to content

Commit

Permalink
Merge pull request #3816 from clumens/dbus-systemd-prep
Browse files Browse the repository at this point in the history
Prepare for the systemd+dbus PR
  • Loading branch information
nrwahl2 authored Feb 5, 2025
2 parents 5efdc9d + 4a4e721 commit 88e9ec1
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 127 deletions.
241 changes: 124 additions & 117 deletions daemons/execd/execd_commands.c
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,9 @@ action_complete(svc_action_t * action)
#ifdef PCMK__TIME_USE_CGT
const char *rclass = NULL;
bool goagain = false;
int time_sum = 0;
int timeout_left = 0;
int delay = 0;
#endif

if (!cmd) {
Expand Down Expand Up @@ -862,142 +865,146 @@ action_complete(svc_action_t * action)
#endif
}

if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
if (pcmk__result_ok(&(cmd->result))
&& pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_STOP, NULL)) {
/* systemd returns from start and stop actions after the action
* begins, not after it completes. We have to jump through a few
* hoops so that we don't report 'complete' to the rest of pacemaker
* until it's actually done.
*/
if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
goto finalize;
}

if (pcmk__result_ok(&(cmd->result))
&& pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_STOP, NULL)) {
/* systemd returns from start and stop actions after the action
* begins, not after it completes. We have to jump through a few
* hoops so that we don't report 'complete' to the rest of pacemaker
* until it's actually done.
*/
goagain = true;
cmd->real_action = cmd->action;
cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR);

} else if (cmd->result.execution_status == PCMK_EXEC_PENDING &&
pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) &&
cmd->interval_ms == 0 &&
cmd->real_action == NULL) {
/* If the state is Pending at the time of probe, execute follow-up monitor. */
goagain = true;
cmd->real_action = cmd->action;
cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR);
} else if (cmd->real_action != NULL) {
// This is follow-up monitor to check whether start/stop/probe(monitor) completed
if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
goagain = true;
cmd->real_action = cmd->action;
cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR);

} else if (cmd->result.execution_status == PCMK_EXEC_PENDING &&
pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) &&
cmd->interval_ms == 0 &&
cmd->real_action == NULL) {
/* If the state is Pending at the time of probe, execute follow-up monitor. */

} else if (pcmk__result_ok(&(cmd->result))
&& pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
pcmk__str_casei)) {
goagain = true;
cmd->real_action = cmd->action;
cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR);
} else if (cmd->real_action != NULL) {
// This is follow-up monitor to check whether start/stop/probe(monitor) completed
if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
goagain = true;

} else if (pcmk__result_ok(&(cmd->result))
&& pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
pcmk__str_casei)) {
goagain = true;

} else {
int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
int timeout_left = cmd->timeout_orig - time_sum;

crm_debug("%s systemd %s is now complete (elapsed=%dms, "
"remaining=%dms): %s (%d)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
crm_exit_str(cmd->result.exit_status),
cmd->result.exit_status);
cmd_original_times(cmd);

// Monitors may return "not running", but start/stop shouldn't
if ((cmd->result.execution_status == PCMK_EXEC_DONE)
&& (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) {

if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_START,
pcmk__str_casei)) {
cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR;
} else if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
pcmk__str_casei)) {
cmd->result.exit_status = PCMK_OCF_OK;
}
} else {
int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
int timeout_left = cmd->timeout_orig - time_sum;

crm_debug("%s systemd %s is now complete (elapsed=%dms, "
"remaining=%dms): %s (%d)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
crm_exit_str(cmd->result.exit_status),
cmd->result.exit_status);
cmd_original_times(cmd);

// Monitors may return "not running", but start/stop shouldn't
if ((cmd->result.execution_status == PCMK_EXEC_DONE)
&& (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) {

if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_START,
pcmk__str_casei)) {
cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR;
} else if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
pcmk__str_casei)) {
cmd->result.exit_status = PCMK_OCF_OK;
}
}
} else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) &&
(cmd->interval_ms > 0)) {
/* For monitors, excluding follow-up monitors, */
/* if the pending state persists from the first notification until its timeout, */
/* it will be treated as a timeout. */

if ((cmd->result.execution_status == PCMK_EXEC_PENDING) &&
(cmd->last_notify_op_status == PCMK_EXEC_PENDING)) {
int time_left = time(NULL) - (cmd->epoch_rcchange + (cmd->timeout_orig/1000));

if (time_left >= 0) {
crm_notice("Giving up on %s %s (rc=%d): monitor pending timeout (first pending notification=%s timeout=%ds)",
cmd->rsc_id, cmd->action,
cmd->result.exit_status, pcmk__trim(ctime(&cmd->epoch_rcchange)), cmd->timeout_orig);
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"Investigate reason for timeout, and adjust "
"configured operation timeout if necessary");
cmd_original_times(cmd);
}
}
} else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)
&& (cmd->interval_ms > 0)) {
/* For monitors, excluding follow-up monitors, */
/* if the pending state persists from the first notification until its timeout, */
/* it will be treated as a timeout. */

if ((cmd->result.execution_status == PCMK_EXEC_PENDING) &&
(cmd->last_notify_op_status == PCMK_EXEC_PENDING)) {
int time_left = time(NULL) - (cmd->epoch_rcchange + (cmd->timeout_orig/1000));

if (time_left >= 0) {
crm_notice("Giving up on %s %s (rc=%d): monitor pending timeout "
"(first pending notification=%s timeout=%ds)",
cmd->rsc_id, cmd->action, cmd->result.exit_status,
pcmk__trim(ctime(&cmd->epoch_rcchange)), cmd->timeout_orig);
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"Investigate reason for timeout, and adjust "
"configured operation timeout if necessary");
cmd_original_times(cmd);
}
}
}
#endif

#ifdef PCMK__TIME_USE_CGT
if (goagain) {
int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
int timeout_left = cmd->timeout_orig - time_sum;
int delay = cmd->timeout_orig / 10;
if (!goagain) {
goto finalize;
}

if(delay >= timeout_left && timeout_left > 20) {
delay = timeout_left/2;
}
time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
timeout_left = cmd->timeout_orig - time_sum;
delay = cmd->timeout_orig / 10;

delay = QB_MIN(2000, delay);
if (delay < timeout_left) {
cmd->start_delay = delay;
cmd->timeout = timeout_left;

if (pcmk__result_ok(&(cmd->result))) {
crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);

} else if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);

} else {
crm_notice("%s %s failed: %s: Re-scheduling (remaining "
"timeout %s) " QB_XS
" exitstatus=%d elapsed=%dms start_delay=%dms)",
cmd->rsc_id, cmd->action,
crm_exit_str(cmd->result.exit_status),
pcmk__readable_interval(timeout_left),
cmd->result.exit_status, time_sum, delay);
}
if (delay >= timeout_left && timeout_left > 20) {
delay = timeout_left/2;
}

cmd_reset(cmd);
if(rsc) {
rsc->active = NULL;
}
schedule_lrmd_cmd(rsc, cmd);
delay = QB_MIN(2000, delay);
if (delay < timeout_left) {
cmd->start_delay = delay;
cmd->timeout = timeout_left;

/* Don't finalize cmd, we're not done with it yet */
return;
if (pcmk__result_ok(&(cmd->result))) {
crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);

} else if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);

} else {
crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
cmd->rsc_id,
(cmd->real_action? cmd->real_action : cmd->action),
cmd->result.exit_status, time_sum, timeout_left);
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"Investigate reason for timeout, and adjust "
"configured operation timeout if necessary");
cmd_original_times(cmd);
crm_notice("%s %s failed: %s: Re-scheduling (remaining "
"timeout %s) " QB_XS
" exitstatus=%d elapsed=%dms start_delay=%dms)",
cmd->rsc_id, cmd->action,
crm_exit_str(cmd->result.exit_status),
pcmk__readable_interval(timeout_left),
cmd->result.exit_status, time_sum, delay);
}

cmd_reset(cmd);
if (rsc) {
rsc->active = NULL;
}
schedule_lrmd_cmd(rsc, cmd);

/* Don't finalize cmd, we're not done with it yet */
return;

} else {
crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
cmd->rsc_id,
(cmd->real_action? cmd->real_action : cmd->action),
cmd->result.exit_status, time_sum, timeout_left);
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"Investigate reason for timeout, and adjust "
"configured operation timeout if necessary");
cmd_original_times(cmd);
}
#endif

finalize:
pcmk__set_result_output(&(cmd->result), services__grab_stdout(action),
services__grab_stderr(action));
cmd_finalize(cmd, rsc);
Expand Down
10 changes: 5 additions & 5 deletions lib/services/dbus.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,12 +294,12 @@ pcmk_dbus_connect(void)
void
pcmk_dbus_disconnect(DBusConnection *connection)
{
/* Per the DBus documentation, connections created with
* dbus_connection_open() are owned by libdbus and should never be closed.
*
* @TODO Should we call dbus_connection_unref() here?
/* We acquire our dbus connection with dbus_bus_get(), which makes it a
* shared connection. Therefore, we can't close or free it here. The
* best we can do is decrement the reference count so dbus knows when
* there are no more clients connected to it.
*/
return;
dbus_connection_unref(connection);
}

// Custom DBus error names to use
Expand Down
13 changes: 8 additions & 5 deletions lib/services/systemd.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <crm/services_internal.h>
#include <crm/common/mainloop.h>

#include <stdbool.h>
#include <sys/stat.h>
#include <gio/gio.h>
#include <services_private.h>
Expand Down Expand Up @@ -103,13 +104,15 @@ systemd_send_recv(DBusMessage *msg, DBusError *error, int timeout)
static DBusMessage *
systemd_call_simple_method(const char *method)
{
DBusMessage *msg = systemd_new_method(method);
DBusMessage *msg = NULL;
DBusMessage *reply = NULL;
DBusError error;

/* Don't call systemd_init() here, because that calls this */
CRM_CHECK(systemd_proxy, return NULL);

msg = systemd_new_method(method);

if (msg == NULL) {
crm_err("Could not create message to send %s to systemd", method);
return NULL;
Expand All @@ -133,7 +136,7 @@ systemd_call_simple_method(const char *method)
return reply;
}

static gboolean
static bool
systemd_init(void)
{
static int need_init = 1;
Expand All @@ -152,9 +155,9 @@ systemd_init(void)
systemd_proxy = pcmk_dbus_connect();
}
if (systemd_proxy == NULL) {
return FALSE;
return false;
}
return TRUE;
return true;
}

static inline char *
Expand Down Expand Up @@ -548,7 +551,7 @@ systemd_unit_listall(void)
DBusMessageIter elem;
DBusMessage *reply = NULL;

if (systemd_init() == FALSE) {
if (!systemd_init()) {
return NULL;
}

Expand Down

0 comments on commit 88e9ec1

Please sign in to comment.