1414#include <crm/services_internal.h>
1515#include <crm/common/mainloop.h>
1616
17+ #include <inttypes.h> // PRIu32
1718#include <stdbool.h>
19+ #include <stdint.h> // uint32_t
1820#include <sys/stat.h>
1921#include <gio/gio.h>
2022#include <services_private.h>
@@ -787,6 +789,8 @@ systemd_unit_metadata(const char *name, int timeout)
787789static void
788790process_unit_method_reply (DBusMessage * reply , svc_action_t * op )
789791{
792+ bool start_stop = pcmk__strcase_any_of (op -> action , PCMK_ACTION_START ,
793+ PCMK_ACTION_STOP , NULL );
790794 DBusError error ;
791795
792796 dbus_error_init (& error );
@@ -800,11 +804,25 @@ process_unit_method_reply(DBusMessage *reply, svc_action_t *op)
800804
801805 } else if (!pcmk_dbus_type_check (reply , NULL , DBUS_TYPE_OBJECT_PATH ,
802806 __func__ , __LINE__ )) {
807+ const char * reason = "systemd D-Bus method had unexpected reply" ;
808+
803809 crm_info ("DBus request for %s of %s succeeded but "
804810 "return type was unexpected" ,
805811 op -> action , pcmk__s (op -> rsc , "unknown resource" ));
806- services__set_result (op , PCMK_OCF_OK , PCMK_EXEC_DONE ,
807- "systemd DBus method had unexpected reply" );
812+
813+ if (!op -> synchronous && start_stop ) {
814+ /* The start or stop job is enqueued but is not complete. We need a
815+ * job path to detect completion in job_removed_filter().
816+ */
817+ services__set_result (op , PCMK_OCF_UNKNOWN_ERROR , PCMK_EXEC_ERROR ,
818+ reason );
819+
820+ } else {
821+ /* Something weird happened, but the action is finished and there
822+ * was no D-Bus error. So call it a success.
823+ */
824+ services__set_result (op , PCMK_OCF_OK , PCMK_EXEC_DONE , reason );
825+ }
808826
809827 } else {
810828 const char * path = NULL ;
@@ -815,11 +833,115 @@ process_unit_method_reply(DBusMessage *reply, svc_action_t *op)
815833
816834 crm_debug ("DBus request for %s of %s using %s succeeded" ,
817835 op -> action , pcmk__s (op -> rsc , "unknown resource" ), path );
818- pcmk__str_update (& (op -> opaque -> job_path ), path );
819- services__set_result (op , PCMK_OCF_OK , PCMK_EXEC_DONE , NULL );
836+
837+ if (!op -> synchronous && start_stop ) {
838+ // Should be set to unknown/pending already
839+ services__set_result (op , PCMK_OCF_UNKNOWN , PCMK_EXEC_PENDING , NULL );
840+ pcmk__str_update (& (op -> opaque -> job_path ), path );
841+
842+ } else {
843+ services__set_result (op , PCMK_OCF_OK , PCMK_EXEC_DONE , NULL );
844+ }
820845 }
821846}
822847
848+ /*!
849+ * \internal
850+ * \brief Process a systemd \c JobRemoved signal for a given service action
851+ *
852+ * This filter is expected to be added with \c finalize_async_action_dbus() as
853+ * the \c free_data_function. Then if \p message is a \c JobRemoved signal for
854+ * the action specified by \p user_data, the action's result is set, the filter
855+ * is removed, and the action is finalized.
856+ *
857+ * \param[in,out] connection D-Bus connection
858+ * \param[in] message D-Bus message
859+ * \param[in,out] user_data Service action (\c svc_action_t)
860+ *
861+ * \retval \c DBUS_HANDLER_RESULT_HANDLED if \p message is a \c JobRemoved
862+ * signal for \p user_data
863+ * \retval \c DBUS_HANDLER_RESULT_NOT_YET_HANDLED otherwise (on error, if
864+ * \p message is not a \c JobRemoved signal, or if the signal is for
865+ * some other action's job)
866+ */
867+ static DBusHandlerResult
868+ job_removed_filter (DBusConnection * connection , DBusMessage * message ,
869+ void * user_data )
870+ {
871+ svc_action_t * action = user_data ;
872+ uint32_t job_id = 0 ;
873+ const char * bus_path = NULL ;
874+ const char * unit_name = NULL ;
875+ const char * result = NULL ;
876+ DBusError error ;
877+
878+ CRM_CHECK ((connection != NULL ) && (message != NULL ),
879+ return DBUS_HANDLER_RESULT_NOT_YET_HANDLED );
880+
881+ // action should always be set when the filter is added
882+ if ((action == NULL )
883+ || !dbus_message_is_signal (message , BUS_NAME_MANAGER , "JobRemoved" )) {
884+ return DBUS_HANDLER_RESULT_NOT_YET_HANDLED ;
885+ }
886+
887+ dbus_error_init (& error );
888+ if (!dbus_message_get_args (message , & error ,
889+ DBUS_TYPE_UINT32 , & job_id ,
890+ DBUS_TYPE_OBJECT_PATH , & bus_path ,
891+ DBUS_TYPE_STRING , & unit_name ,
892+ DBUS_TYPE_STRING , & result ,
893+ DBUS_TYPE_INVALID )) {
894+ crm_err ("Could not interpret systemd DBus signal: %s " QB_XS " (%s)" ,
895+ error .message , error .name );
896+ dbus_error_free (& error );
897+ return DBUS_HANDLER_RESULT_NOT_YET_HANDLED ;
898+ }
899+
900+ if (!pcmk__str_eq (bus_path , action -> opaque -> job_path , pcmk__str_none )) {
901+ // This filter is not for this job
902+ return DBUS_HANDLER_RESULT_NOT_YET_HANDLED ;
903+ }
904+
905+ crm_trace ("Setting %s result for %s (JobRemoved id=%" PRIu32 ", result=%s" ,
906+ pcmk__s (action -> action , "(unknown)" ), unit_name , job_id , result );
907+
908+ if (pcmk__str_eq (result , "done" , pcmk__str_none )) {
909+ services__set_result (action , PCMK_OCF_OK , PCMK_EXEC_DONE , NULL );
910+
911+ } else if (pcmk__str_eq (result , "timeout" , pcmk__str_none )) {
912+ services__set_result (action , PCMK_OCF_UNKNOWN_ERROR , PCMK_EXEC_TIMEOUT ,
913+ "Investigate reason for timeout, and adjust "
914+ "configured operation timeout if necessary" );
915+
916+ } else {
917+ /* @FIXME Should we handle additional results specially, instead of
918+ * mapping them all to a generic error with no exit reason?
919+ */
920+ services__set_result (action , PCMK_OCF_UNKNOWN_ERROR , PCMK_EXEC_ERROR ,
921+ NULL );
922+ }
923+
924+ /* This instance of the filter was specifically for the given action.
925+ *
926+ * The action gets finalized by services__finalize_async_op() via the
927+ * filter's free_data_function.
928+ */
929+ dbus_connection_remove_filter (systemd_proxy , job_removed_filter , action );
930+ return DBUS_HANDLER_RESULT_HANDLED ;
931+ }
932+
933+ /*!
934+ * \internal
935+ * \brief \c DBusFreeFunction wrapper for \c services__finalize_async_op()
936+ *
937+ * \param[in,out] action Asynchronous service action to finalize
938+ */
939+ static void
940+ finalize_async_action_dbus (void * action )
941+ {
942+ services__finalize_async_op ((svc_action_t * ) action );
943+ }
944+
823945/*!
824946 * \internal
825947 * \brief Process the completion of an asynchronous unit start, stop, or restart
@@ -844,12 +966,34 @@ unit_method_complete(DBusPendingCall *pending, void *user_data)
844966 CRM_LOG_ASSERT (pending == op -> opaque -> pending );
845967 services_set_op_pending (op , NULL );
846968
847- // Determine result and finalize action
848969 process_unit_method_reply (reply , op );
849- services__finalize_async_op ( op );
970+
850971 if (reply != NULL ) {
851972 dbus_message_unref (reply );
852973 }
974+
975+ if ((op -> status == PCMK_EXEC_PENDING )
976+ && pcmk__strcase_any_of (op -> action , PCMK_ACTION_START , PCMK_ACTION_STOP ,
977+ NULL )) {
978+ /* Start and stop method calls return when the job is enqueued, not when
979+ * it's complete. Start and stop actions must be finalized after the job
980+ * is complete, because the action callback function may use it. We add
981+ * a message filter to process the JobRemoved signal, which indicates
982+ * completion.
983+ *
984+ * The filter takes ownership of op, which will be finalized when the
985+ * filter is later removed.
986+ */
987+ if (dbus_connection_add_filter (systemd_proxy , job_removed_filter , op ,
988+ finalize_async_action_dbus )) {
989+ return ;
990+ }
991+ crm_err ("Could not add D-Bus filter for systemd JobRemoved signals" );
992+ services__set_result (op , PCMK_OCF_UNKNOWN_ERROR , PCMK_EXEC_ERROR ,
993+ "Failed to add D-Bus filter for systemd "
994+ "JobRemoved signal" );
995+ }
996+ services__finalize_async_op (op );
853997}
854998
855999#define SYSTEMD_OVERRIDE_ROOT "/run/systemd/system/"
@@ -1111,7 +1255,14 @@ systemd_timeout_callback(gpointer p)
11111255 services__format_result (op , PCMK_OCF_UNKNOWN_ERROR , PCMK_EXEC_TIMEOUT ,
11121256 "%s action for systemd unit %s "
11131257 "did not complete in time" , op -> action , op -> agent );
1114- services__finalize_async_op (op );
1258+
1259+ if (op -> opaque -> job_path != NULL ) {
1260+ // A filter owns this op
1261+ dbus_connection_remove_filter (systemd_proxy , job_removed_filter , op );
1262+
1263+ } else {
1264+ services__finalize_async_op (op );
1265+ }
11151266 return FALSE;
11161267}
11171268
0 commit comments