Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Logging improvements #3803

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 29 additions & 28 deletions daemons/based/based_callbacks.c
Original file line number Diff line number Diff line change
Expand Up @@ -861,9 +861,8 @@ cib_process_request(xmlNode *request, gboolean privileged,

if (cib_status != pcmk_ok) {
rc = cib_status;
crm_err("Operation ignored, cluster configuration is invalid."
" Please repair and restart: %s", pcmk_strerror(cib_status));

crm_err("Ignoring request because cluster configuration is invalid "
"(please repair and restart): %s", pcmk_strerror(rc));
op_reply = create_cib_reply(op, call_id, client_id, call_options, rc,
the_cib);

Expand Down Expand Up @@ -1242,8 +1241,9 @@ cib_peer_callback(xmlNode * msg, void *private_data)
static gboolean
cib_force_exit(gpointer data)
{
crm_notice("Forcing exit!");
terminate_cib(__func__, CRM_EX_ERROR);
crm_notice("Exiting immediately after %s without shutdown acknowledgment",
pcmk__readable_interval(EXIT_ESCALATION_MS));
terminate_cib(CRM_EX_ERROR);
return FALSE;
}

Expand All @@ -1264,7 +1264,8 @@ initiate_exit(void)

active = pcmk__cluster_num_active_nodes();
if (active < 2) { // This is the last active node
terminate_cib(__func__, 0);
crm_info("Exiting without sending shutdown request (no active peers)");
terminate_cib(CRM_EX_OK);
return;
}

Expand Down Expand Up @@ -1353,15 +1354,12 @@ extern int remote_tls_fd;
* \internal
* \brief Close remote sockets, free the global CIB and quit
*
* \param[in] caller Name of calling function (for log message)
* \param[in] fast If -1, skip disconnect; if positive, exit that
* \param[in] exit_status What exit status to use (if -1, use CRM_EX_OK, but
* skip disconnecting from the cluster layer)
*/
void
terminate_cib(const char *caller, int fast)
terminate_cib(int exit_status)
{
crm_info("%s: Exiting%s...", caller,
(fast > 0)? " fast" : mainloop ? " from mainloop" : "");

if (remote_fd > 0) {
close(remote_fd);
remote_fd = 0;
Expand All @@ -1373,27 +1371,30 @@ terminate_cib(const char *caller, int fast)

uninitializeCib();

if (fast > 0) {
/* Quit fast on error */
// Exit immediately on error
if (exit_status > CRM_EX_OK) {
pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm);
crm_exit(fast);
crm_exit(exit_status);
return;
}

} else if ((mainloop != NULL) && g_main_loop_is_running(mainloop)) {
/* Quit via returning from the main loop. If fast == -1, we skip the
* disconnect here, and it will be done when the main loop returns
* (this allows the peer status callback to avoid messing with the
* peer caches).
if ((mainloop != NULL) && g_main_loop_is_running(mainloop)) {
/* Quit via returning from the main loop. If exit_status has the special
* value -1, we skip the disconnect here, and it will be done when the
* main loop returns (this allows the peer status callback to avoid
* messing with the peer caches).
*/
if (fast == 0) {
if (exit_status == CRM_EX_OK) {
pcmk_cluster_disconnect(crm_cluster);
}
g_main_loop_quit(mainloop);

} else {
/* Quit via clean exit. Even the peer status callback can disconnect
* here, because we're not returning control to the caller. */
pcmk_cluster_disconnect(crm_cluster);
pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm);
crm_exit(CRM_EX_OK);
return;
}

/* Exit cleanly. Even the peer status callback can disconnect here, because
* we're not returning control to the caller.
*/
pcmk_cluster_disconnect(crm_cluster);
pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm);
crm_exit(CRM_EX_OK);
}
64 changes: 26 additions & 38 deletions daemons/based/based_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,20 +70,18 @@ static xmlNode *
retrieveCib(const char *filename, const char *sigfile)
{
xmlNode *root = NULL;
int rc = cib_file_read_and_verify(filename, sigfile, &root);

crm_info("Reading cluster configuration file %s (digest: %s)",
filename, sigfile);
switch (cib_file_read_and_verify(filename, sigfile, &root)) {
case -pcmk_err_cib_corrupt:
crm_warn("Continuing but %s will NOT be used.", filename);
break;

case -pcmk_err_cib_modified:
/* Archive the original files so the contents are not lost */
crm_warn("Continuing but %s will NOT be used.", filename);
if (rc == pcmk_ok) {
crm_info("Loaded CIB from %s (with digest %s)", filename, sigfile);
} else {
crm_warn("Continuing but NOT using CIB from %s (with digest %s): %s",
filename, sigfile, pcmk_strerror(rc));
if (rc == -pcmk_err_cib_modified) {
// Archive the original files so the contents are not lost
cib_rename(filename);
cib_rename(sigfile);
break;
}
}
return root;
}
Expand Down Expand Up @@ -215,27 +213,29 @@ readCibXmlFile(const char *dir, const char *file, gboolean discard_status)
free(sigfilepath);

if (root == NULL) {
crm_warn("Primary configuration corrupt or unusable, trying backups in %s", cib_root);
lpc = scandir(cib_root, &namelist, cib_archive_filter, cib_archive_sort);
if (lpc < 0) {
crm_err("scandir(%s) failed: %s", cib_root, pcmk_rc_str(errno));
crm_err("Could not check for CIB backups in %s: %s",
cib_root, pcmk_rc_str(errno));
}
}

while (root == NULL && lpc > 1) {
crm_debug("Testing %d candidates", lpc);
int rc = pcmk_ok;

lpc--;

filename = crm_strdup_printf("%s/%s", cib_root, namelist[lpc]->d_name);
sigfile = crm_strdup_printf("%s.sig", filename);

crm_info("Reading cluster configuration file %s (digest: %s)",
filename, sigfile);
if (cib_file_read_and_verify(filename, sigfile, &root) < 0) {
crm_warn("Continuing but %s will NOT be used.", filename);
rc = cib_file_read_and_verify(filename, sigfile, &root);
if (rc == pcmk_ok) {
crm_notice("Loaded CIB from last valid backup %s (with digest %s)",
filename, sigfile);
} else {
crm_notice("Continuing with last valid configuration archive: %s", filename);
crm_warn("Not using next most recent CIB backup from %s "
"(with digest %s): %s",
filename, sigfile, pcmk_strerror(rc));
}

free(namelist[lpc]);
Expand All @@ -246,7 +246,7 @@ readCibXmlFile(const char *dir, const char *file, gboolean discard_status)

if (root == NULL) {
root = createEmptyCib(0);
crm_warn("Continuing with an empty configuration.");
crm_warn("Continuing with an empty configuration");
}

if (cib_writes_enabled && (use_valgrind != NULL)
Expand All @@ -270,16 +270,12 @@ readCibXmlFile(const char *dir, const char *file, gboolean discard_status)
/* Do this before schema validation happens */

/* fill in some defaults */
name = PCMK_XA_ADMIN_EPOCH;
value = crm_element_value(root, name);
if (value == NULL) {
crm_warn("No value for %s was specified in the configuration.", name);
crm_warn("The recommended course of action is to shutdown,"
" run crm_verify and fix any errors it reports.");
crm_warn("We will default to zero and continue but may get"
" confused about which configuration to use if"
" multiple nodes are powered up at the same time.");
crm_xml_add_int(root, name, 0);
value = crm_element_value(root, PCMK_XA_ADMIN_EPOCH);
if (value == NULL) { // Not possible with schema validation enabled
crm_warn("Defaulting missing " PCMK_XA_ADMIN_EPOCH " to 0, but "
"cluster may get confused about which node's configuration "
"is most recent");
crm_xml_add_int(root, PCMK_XA_ADMIN_EPOCH, 0);
}

name = PCMK_XA_EPOCH;
Expand Down Expand Up @@ -313,18 +309,10 @@ uninitializeCib(void)
xmlNode *tmp_cib = the_cib;

if (tmp_cib == NULL) {
crm_debug("The CIB has already been deallocated.");
return FALSE;
}

the_cib = NULL;

crm_debug("Deallocating the CIB.");

pcmk__xml_free(tmp_cib);

crm_debug("The CIB has been deallocated.");

return TRUE;
}

Expand Down
4 changes: 2 additions & 2 deletions daemons/based/based_messages.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ cib_process_shutdown_req(const char *op, int options, const char *section, xmlNo
return -EINVAL;
}

crm_info("Peer %s has acknowledged our shutdown request", host);
terminate_cib(__func__, 0);
crm_info("Exiting after %s acknowledged our shutdown request", host);
terminate_cib(CRM_EX_OK);
return pcmk_ok;
}

Expand Down
11 changes: 6 additions & 5 deletions daemons/based/pacemaker-based.c
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ main(int argc, char **argv)
g_main_loop_run(mainloop);

/* If main loop returned, clean up and exit. We disconnect in case
* terminate_cib() was called with fast=-1.
* terminate_cib(-1) was called.
*/
pcmk_cluster_disconnect(crm_cluster);
pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm);
Expand Down Expand Up @@ -345,8 +345,9 @@ cib_cs_destroy(gpointer user_data)
if (cib_shutdown_flag) {
crm_info("Corosync disconnection complete");
} else {
crm_crit("Lost connection to cluster layer, shutting down");
terminate_cib(__func__, CRM_EX_DISCONNECT);
crm_crit("Exiting immediately after losing connection "
"to cluster layer");
terminate_cib(CRM_EX_DISCONNECT);
}
}
#endif
Expand All @@ -361,8 +362,8 @@ cib_peer_update_callback(enum pcmk__node_update type,
if (cib_shutdown_flag && (pcmk__cluster_num_active_nodes() < 2)
&& (pcmk__ipc_client_count() == 0)) {

crm_info("No more peers");
terminate_cib(__func__, -1);
crm_info("Exiting after no more peers or clients remain");
terminate_cib(-1);
}
break;

Expand Down
2 changes: 1 addition & 1 deletion daemons/based/pacemaker-based.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ void cib_common_callback_worker(uint32_t id, uint32_t flags,
int cib_process_request(xmlNode *request, gboolean privileged,
const pcmk__client_t *cib_client);
void cib_shutdown(int nsig);
void terminate_cib(const char *caller, int fast);
void terminate_cib(int exit_status);

gboolean uninitializeCib(void);
xmlNode *readCibXmlFile(const char *dir, const char *file,
Expand Down
22 changes: 10 additions & 12 deletions daemons/controld/controld_execd.c
Original file line number Diff line number Diff line change
Expand Up @@ -1954,20 +1954,13 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
}
}

/* now do the op */
crm_notice("Requesting local execution of %s operation for %s on %s "
QB_XS " transition_key=%s op_key=" PCMK__OP_FMT,
pcmk__readable_action(op->op_type, op->interval_ms), rsc->id,
lrm_state->node_name, pcmk__s(transition, ""), rsc->id,
operation, op->interval_ms);

nack_reason = should_nack_action(operation);
if (nack_reason != NULL) {
crm_notice("Discarding attempt to perform action %s on %s in state %s "
"(shutdown=%s)", operation, rsc->id,
fsa_state2string(controld_globals.fsa_state),
pcmk__flag_text(controld_globals.fsa_input_register,
R_SHUTDOWN));
crm_notice("Not requesting local execution of %s operation for %s on %s"
" in state %s: %s",
pcmk__readable_action(op->op_type, op->interval_ms), rsc->id,
lrm_state->node_name,
fsa_state2string(controld_globals.fsa_state), nack_reason);

lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID,
nack_reason);
Expand All @@ -1977,6 +1970,11 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
return;
}

crm_notice("Requesting local execution of %s operation for %s on %s "
QB_XS " transition %s",
pcmk__readable_action(op->op_type, op->interval_ms), rsc->id,
lrm_state->node_name, pcmk__s(transition, ""));

controld_record_pending_op(lrm_state->node_name, rsc, op);

op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms);
Expand Down
Loading