diff --git a/common/src/unifyfs_configurator.h b/common/src/unifyfs_configurator.h index f85a7359..1c96d71a 100644 --- a/common/src/unifyfs_configurator.h +++ b/common/src/unifyfs_configurator.h @@ -90,6 +90,7 @@ UNIFYFS_CFG(margo, client_pool_size, INT, UNIFYFS_MARGO_POOL_SZ, "size of server's ULT pool for client-server RPCs", NULL) \ UNIFYFS_CFG(margo, client_timeout, INT, UNIFYFS_MARGO_CLIENT_SERVER_TIMEOUT_MSEC, "timeout in milliseconds for client-server RPCs", NULL) \ UNIFYFS_CFG(margo, lazy_connect, BOOL, on, "wait until first communication with server to resolve its connection address", NULL) \ + UNIFYFS_CFG(margo, server_address, STRING, NULLSTRING, "Mercury address string to use for Margo init of server-server communication", NULL) \ UNIFYFS_CFG(margo, server_pool_size, INT, UNIFYFS_MARGO_POOL_SZ, "size of server's ULT pool for server-server RPCs", NULL) \ UNIFYFS_CFG(margo, server_timeout, INT, UNIFYFS_MARGO_SERVER_SERVER_TIMEOUT_MSEC, "timeout in milliseconds for server-server RPCs", NULL) \ UNIFYFS_CFG(margo, tcp, BOOL, on, "use TCP for server-to-server margo RPCs", NULL) \ diff --git a/docs/configuration.rst b/docs/configuration.rst index a861168f..40a5772a 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -128,13 +128,14 @@ files. .. table:: ``[margo]`` section - margo server NA settings :widths: auto - ============== ==== ================================================================================= - Key Type Description - ============== ==== ================================================================================= - tcp BOOL Use TCP for server-to-server rpcs (default: on, turn off to enable libfabric RMA) - client_timeout INT timeout in milliseconds for rpcs between client and server (default: 5000) - server_timeout INT timeout in milliseconds for rpcs between servers (default: 15000) - ============== ==== ================================================================================= + ============== ====== ==================================================================================== + Key Type Description + ============== ====== ==================================================================================== + tcp BOOL Use TCP for server-to-server rpcs (default: on, turn off to enable libfabric RMA) + client_timeout INT timeout in milliseconds for rpcs between client and server (default: 5000) + server_timeout INT timeout in milliseconds for rpcs between servers (default: 15000) + server_address STRING Mercury address string to use for Margo initialization of inter-server communication + ============== ====== ==================================================================================== ----------- diff --git a/server/src/margo_server.c b/server/src/margo_server.c index d3ce6651..fee2bddf 100644 --- a/server/src/margo_server.c +++ b/server/src/margo_server.c @@ -26,6 +26,8 @@ // global variables ServerRpcContext_t* unifyfsd_rpc_context; +char* margo_init_address; // = NULL +bool margo_use_address; // = false bool margo_use_tcp = true; bool margo_lazy_connect; // = false int margo_client_server_pool_sz = UNIFYFS_MARGO_POOL_SZ; @@ -99,31 +101,37 @@ static char* get_margo_addr_str(margo_instance_id mid) /* setup_remote_target - Initializes the server-server margo target */ static margo_instance_id setup_remote_target(void) { - /* by default we try to use ofi */ - const char* margo_protocol = margo_use_tcp ? + const char* server_addr = NULL; + + if (margo_use_address) { + server_addr = margo_init_address; + } else { + /* by default we try to use ofi */ + server_addr = margo_use_tcp ? PROTOCOL_MARGO_OFI_TCP : PROTOCOL_MARGO_OFI_RMA; - if (!margo_protocol) { - /* when ofi is not available, fallback to using bmi */ - LOGWARN("OFI is not available, using BMI for margo rpc"); - margo_protocol = PROTOCOL_MARGO_BMI_TCP; + if (!server_addr) { + /* when ofi is not available, fallback to using bmi */ + LOGWARN("OFI is not available, using BMI for margo rpc"); + server_addr = PROTOCOL_MARGO_BMI_TCP; + } } /* initialize margo */ - margo_instance_id mid = margo_init(margo_protocol, MARGO_SERVER_MODE, + margo_instance_id mid = margo_init(server_addr, MARGO_SERVER_MODE, margo_use_progress_thread, margo_server_server_pool_sz); if (mid == MARGO_INSTANCE_NULL) { LOGERR("margo_init(%s, SERVER_MODE, %d, %d) failed", - margo_protocol, margo_use_progress_thread, + server_addr, margo_use_progress_thread, margo_server_server_pool_sz); - if (margo_protocol == PROTOCOL_MARGO_OFI_TCP) { + if (server_addr == PROTOCOL_MARGO_OFI_TCP) { /* try "ofi+sockets" instead */ - margo_protocol = PROTOCOL_MARGO_OFI_SOCKETS; - mid = margo_init(margo_protocol, MARGO_SERVER_MODE, + server_addr = PROTOCOL_MARGO_OFI_SOCKETS; + mid = margo_init(server_addr, MARGO_SERVER_MODE, margo_use_progress_thread, margo_server_server_pool_sz); if (mid == MARGO_INSTANCE_NULL) { LOGERR("margo_init(%s, SERVER_MODE, %d, %d) failed", - margo_protocol, margo_use_progress_thread, + server_addr, margo_use_progress_thread, margo_server_server_pool_sz); return mid; } diff --git a/server/src/margo_server.h b/server/src/margo_server.h index 3265adef..c89516ab 100644 --- a/server/src/margo_server.h +++ b/server/src/margo_server.h @@ -68,7 +68,8 @@ typedef struct ServerRpcContext { } ServerRpcContext_t; extern ServerRpcContext_t* unifyfsd_rpc_context; - +extern char* margo_init_address; +extern bool margo_use_address; extern bool margo_use_tcp; extern bool margo_lazy_connect; extern int margo_client_server_pool_sz; diff --git a/server/src/unifyfs_server.c b/server/src/unifyfs_server.c index f226f492..9d087d0a 100644 --- a/server/src/unifyfs_server.c +++ b/server/src/unifyfs_server.c @@ -427,9 +427,16 @@ int main(int argc, char* argv[]) margo_lazy_connect = b; } - rc = configurator_bool_val(server_cfg.margo_tcp, &b); - if (0 == rc) { - margo_use_tcp = b; + if (server_cfg.margo_server_address != NULL) { + margo_use_address = true; + margo_init_address = strdup(server_cfg.margo_server_address); + } + + if (!margo_use_address) { + rc = configurator_bool_val(server_cfg.margo_tcp, &b); + if (0 == rc) { + margo_use_tcp = b; + } } rc = margo_server_rpc_init();