1414#include <crm/services_internal.h>
1515#include <crm/common/mainloop.h>
1616
17+ #include <inttypes.h> // PRIu32
1718#include <stdbool.h>
19+ #include <stdint.h> // uint32_t
1820#include <stdio.h> // fopen(), NULL, etc.
1921#include <sys/stat.h>
2022#include <gio/gio.h>
@@ -785,6 +787,8 @@ systemd_unit_metadata(const char *name, int timeout)
785787static void
786788process_unit_method_reply (DBusMessage * reply , svc_action_t * op )
787789{
790+ bool start_stop = pcmk__strcase_any_of (op -> action , PCMK_ACTION_START ,
791+ PCMK_ACTION_STOP , NULL );
788792 DBusError error ;
789793
790794 dbus_error_init (& error );
@@ -798,11 +802,25 @@ process_unit_method_reply(DBusMessage *reply, svc_action_t *op)
798802
799803 } else if (!pcmk_dbus_type_check (reply , NULL , DBUS_TYPE_OBJECT_PATH ,
800804 __func__ , __LINE__ )) {
805+ const char * reason = "systemd D-Bus method had unexpected reply" ;
806+
801807 crm_info ("DBus request for %s of %s succeeded but "
802808 "return type was unexpected" ,
803809 op -> action , pcmk__s (op -> rsc , "unknown resource" ));
804- services__set_result (op , PCMK_OCF_OK , PCMK_EXEC_DONE ,
805- "systemd DBus method had unexpected reply" );
810+
811+ if (!op -> synchronous && start_stop ) {
812+ /* The start or stop job is enqueued but is not complete. We need a
813+ * job path to detect completion in job_removed_filter().
814+ */
815+ services__set_result (op , PCMK_OCF_UNKNOWN_ERROR , PCMK_EXEC_ERROR ,
816+ reason );
817+
818+ } else {
819+ /* Something weird happened, but the action is finished and there
820+ * was no D-Bus error. So call it a success.
821+ */
822+ services__set_result (op , PCMK_OCF_OK , PCMK_EXEC_DONE , reason );
823+ }
806824
807825 } else {
808826 const char * path = NULL ;
@@ -813,9 +831,114 @@ process_unit_method_reply(DBusMessage *reply, svc_action_t *op)
813831
814832 crm_debug ("DBus request for %s of %s using %s succeeded" ,
815833 op -> action , pcmk__s (op -> rsc , "unknown resource" ), path );
816- pcmk__str_update (& (op -> opaque -> job_path ), path );
817- services__set_result (op , PCMK_OCF_OK , PCMK_EXEC_DONE , NULL );
834+
835+ if (!op -> synchronous && start_stop ) {
836+ // Should be set to unknown/pending already
837+ services__set_result (op , PCMK_OCF_UNKNOWN , PCMK_EXEC_PENDING , NULL );
838+ pcmk__str_update (& (op -> opaque -> job_path ), path );
839+
840+ } else {
841+ services__set_result (op , PCMK_OCF_OK , PCMK_EXEC_DONE , NULL );
842+ }
843+ }
844+ }
845+
846+ /*!
847+ * \internal
848+ * \brief Process a systemd \c JobRemoved signal for a given service action
849+ *
850+ * This filter is expected to be added with \c finalize_async_action_dbus() as
851+ * the \c free_data_function. Then if \p message is a \c JobRemoved signal for
852+ * the action specified by \p user_data, the action's result is set, the filter
853+ * is removed, and the action is finalized.
854+ *
855+ * \param[in,out] connection D-Bus connection
856+ * \param[in] message D-Bus message
857+ * \param[in,out] user_data Service action (\c svc_action_t)
858+ *
859+ * \retval \c DBUS_HANDLER_RESULT_HANDLED if \p message is a \c JobRemoved
860+ * signal for \p user_data
861+ * \retval \c DBUS_HANDLER_RESULT_NOT_YET_HANDLED otherwise (on error, if
862+ * \p message is not a \c JobRemoved signal, or if the signal is for
863+ * some other action's job)
864+ */
865+ static DBusHandlerResult
866+ job_removed_filter (DBusConnection * connection , DBusMessage * message ,
867+ void * user_data )
868+ {
869+ svc_action_t * action = user_data ;
870+ const char * action_name = NULL ;
871+ uint32_t job_id = 0 ;
872+ const char * bus_path = NULL ;
873+ const char * unit_name = NULL ;
874+ const char * result = NULL ;
875+ DBusError error ;
876+
877+ CRM_CHECK ((connection != NULL ) && (message != NULL ),
878+ return DBUS_HANDLER_RESULT_NOT_YET_HANDLED );
879+
880+ // action should always be set when the filter is added
881+ if ((action == NULL )
882+ || !dbus_message_is_signal (message , BUS_NAME_MANAGER , "JobRemoved" )) {
883+ return DBUS_HANDLER_RESULT_NOT_YET_HANDLED ;
884+ }
885+
886+ dbus_error_init (& error );
887+ if (!dbus_message_get_args (message , & error ,
888+ DBUS_TYPE_UINT32 , & job_id ,
889+ DBUS_TYPE_OBJECT_PATH , & bus_path ,
890+ DBUS_TYPE_STRING , & unit_name ,
891+ DBUS_TYPE_STRING , & result ,
892+ DBUS_TYPE_INVALID )) {
893+ crm_err ("Could not interpret systemd DBus signal: %s " QB_XS " (%s)" ,
894+ error .message , error .name );
895+ dbus_error_free (& error );
896+ return DBUS_HANDLER_RESULT_NOT_YET_HANDLED ;
897+ }
898+
899+ if (!pcmk__str_eq (bus_path , action -> opaque -> job_path , pcmk__str_none )) {
900+ // This filter is not for this job
901+ return DBUS_HANDLER_RESULT_NOT_YET_HANDLED ;
818902 }
903+
904+ action_name = pcmk__s (action -> action , "(unknown)" );
905+
906+ crm_trace ("Setting %s result for %s (JobRemoved id=%" PRIu32 ", result=%s" ,
907+ action_name , unit_name , job_id , result );
908+
909+ if (pcmk__str_eq (result , "done" , pcmk__str_none )) {
910+ services__set_result (action , PCMK_OCF_OK , PCMK_EXEC_DONE , NULL );
911+
912+ } else if (pcmk__str_eq (result , "timeout" , pcmk__str_none )) {
913+ services__format_result (action , PCMK_OCF_UNKNOWN_ERROR , PCMK_EXEC_TIMEOUT ,
914+ "systemd %s job for %s timed out" ,
915+ action_name , unit_name );
916+
917+ } else {
918+ services__format_result (action , PCMK_OCF_UNKNOWN_ERROR , PCMK_EXEC_ERROR ,
919+ "systemd %s job for %s failed with result '%s'" ,
920+ action_name , unit_name , result );
921+ }
922+
923+ /* This instance of the filter was specifically for the given action.
924+ *
925+ * The action gets finalized by services__finalize_async_op() via the
926+ * filter's free_data_function.
927+ */
928+ dbus_connection_remove_filter (systemd_proxy , job_removed_filter , action );
929+ return DBUS_HANDLER_RESULT_HANDLED ;
930+ }
931+
932+ /*!
933+ * \internal
934+ * \brief \c DBusFreeFunction wrapper for \c services__finalize_async_op()
935+ *
936+ * \param[in,out] action Asynchronous service action to finalize
937+ */
938+ static void
939+ finalize_async_action_dbus (void * action )
940+ {
941+ services__finalize_async_op ((svc_action_t * ) action );
819942}
820943
821944/*!
@@ -842,12 +965,34 @@ unit_method_complete(DBusPendingCall *pending, void *user_data)
842965 CRM_LOG_ASSERT (pending == op -> opaque -> pending );
843966 services_set_op_pending (op , NULL );
844967
845- // Determine result and finalize action
846968 process_unit_method_reply (reply , op );
847- services__finalize_async_op ( op );
969+
848970 if (reply != NULL ) {
849971 dbus_message_unref (reply );
850972 }
973+
974+ if ((op -> status == PCMK_EXEC_PENDING )
975+ && pcmk__strcase_any_of (op -> action , PCMK_ACTION_START , PCMK_ACTION_STOP ,
976+ NULL )) {
977+ /* Start and stop method calls return when the job is enqueued, not when
978+ * it's complete. Start and stop actions must be finalized after the job
979+ * is complete, because the action callback function may use it. We add
980+ * a message filter to process the JobRemoved signal, which indicates
981+ * completion.
982+ *
983+ * The filter takes ownership of op, which will be finalized when the
984+ * filter is later removed.
985+ */
986+ if (dbus_connection_add_filter (systemd_proxy , job_removed_filter , op ,
987+ finalize_async_action_dbus )) {
988+ return ;
989+ }
990+ crm_err ("Could not add D-Bus filter for systemd JobRemoved signals" );
991+ services__set_result (op , PCMK_OCF_UNKNOWN_ERROR , PCMK_EXEC_ERROR ,
992+ "Failed to add D-Bus filter for systemd "
993+ "JobRemoved signal" );
994+ }
995+ services__finalize_async_op (op );
851996}
852997
853998/* When the cluster manages a systemd resource, we create a unit file override
@@ -1193,7 +1338,14 @@ systemd_timeout_callback(gpointer p)
11931338 services__format_result (op , PCMK_OCF_UNKNOWN_ERROR , PCMK_EXEC_TIMEOUT ,
11941339 "%s action for systemd unit %s "
11951340 "did not complete in time" , op -> action , op -> agent );
1196- services__finalize_async_op (op );
1341+
1342+ if (op -> opaque -> job_path != NULL ) {
1343+ // A filter owns this op
1344+ dbus_connection_remove_filter (systemd_proxy , job_removed_filter , op );
1345+
1346+ } else {
1347+ services__finalize_async_op (op );
1348+ }
11971349 return FALSE;
11981350}
11991351
@@ -1247,6 +1399,7 @@ services__execute_systemd(svc_action_t *op)
12471399 "Bug in service library" );
12481400
12491401 if (invoke_unit_by_name (op -> agent , op , NULL ) == pcmk_rc_ok ) {
1402+ // @TODO Why plus 5000? No explanation in fccd046.
12501403 op -> opaque -> timerid = pcmk__create_timer (op -> timeout + 5000 ,
12511404 systemd_timeout_callback , op );
12521405 services_add_inflight_op (op );
0 commit comments