From 0d574d99a04c1740b182cee390d0a578d0b1135b Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Thu, 23 Aug 2018 11:22:50 +0200 Subject: [PATCH 1/4] Add msk_fork_init(): wrapper around ibv_fork_init() --- include/mooshika.h | 2 ++ src/trans_rdma.c | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/include/mooshika.h b/include/mooshika.h index f606491..07240bf 100644 --- a/include/mooshika.h +++ b/include/mooshika.h @@ -226,6 +226,8 @@ static inline int msk_wait_write(msk_trans_t *trans, msk_data_t *data, msk_rloc_ int msk_init(msk_trans_t **ptrans, msk_trans_attr_t *attr); +int msk_fork_init(void); + // server specific: int msk_bind_server(msk_trans_t *trans); msk_trans_t *msk_accept_one_wait(msk_trans_t *trans, int msleep); diff --git a/src/trans_rdma.c b/src/trans_rdma.c index 4c3f356..5b843f1 100644 --- a/src/trans_rdma.c +++ b/src/trans_rdma.c @@ -212,6 +212,17 @@ static inline int msk_cond_timedwait(int debug, } +/** + * msk_fork_init: wrapper around ibv_fork_init() + * Initialize libibverbs to support fork(). + * + * @return 0 on success, the value of errno on failure + */ +int msk_fork_init(void) +{ + return ibv_fork_init(); +} + /** * msk_getpd: helper function to get the right pd for a given trans * From a4b7e084a99265332eb60921b1d0dad5cd5400fe Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Thu, 23 Aug 2018 11:49:01 +0200 Subject: [PATCH 2/4] Add interface to reinitialize the library global data Add msk_reset_lib() interface that allows to reinitialize the library global structure in a child process after a fork. It doesn't free the rdma resources that may have been allocated by parent and haven't been freed before fork (they are leaked in the child process), but prevents some hangs and crashes if the process doesn't exec() after fork(). Better than nothing. msk_reset_lib() also calls Mellanox-specific routine rdma_lib_reset() to reset librdmacm state if Mooshika has been compiled with Mellanox's librdmacm. --- configure.ac | 5 +++++ include/mooshika.h | 1 + src/trans_rdma.c | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/configure.ac b/configure.ac index 16351e0..1b07cca 100644 --- a/configure.ac +++ b/configure.ac @@ -19,6 +19,11 @@ AC_SEARCH_LIBS([rdma_create_id], [rdmacm], [], [ ]) AC_CHECK_HEADERS([rdma/rdma_cma.h],[], [AC_MSG_ERROR(missing rdma headers)]) +# Check if RDMA_CM Libs supports rdma_lib_reset() +AC_CHECK_FUNC([rdma_lib_reset], [ + AC_DEFINE([HAVE_RDMA_LIB_RESET], [1], [Define if rdma_lib_reset() is supported.]) +]) + AC_DEFINE( [VERSION_COMMENT], ["libmooshika and examples"], [No Comment]) # Git latest commit diff --git a/include/mooshika.h b/include/mooshika.h index 07240bf..c2cfeb1 100644 --- a/include/mooshika.h +++ b/include/mooshika.h @@ -226,6 +226,7 @@ static inline int msk_wait_write(msk_trans_t *trans, msk_data_t *data, msk_rloc_ int msk_init(msk_trans_t **ptrans, msk_trans_attr_t *attr); +void msk_lib_reset(void); int msk_fork_init(void); // server specific: diff --git a/src/trans_rdma.c b/src/trans_rdma.c index 5b843f1..b4bf684 100644 --- a/src/trans_rdma.c +++ b/src/trans_rdma.c @@ -175,6 +175,25 @@ void __attribute__ ((destructor)) msk_internals_fini(void) { } } +/** + * Reset global state. + * Should (only) be called to reset global data in child process after a fork. + */ +void msk_lib_reset(void) +{ + /* Brutal re-initialization of global state */ + memset(msk_global_state, 0, sizeof(*msk_global_state)); + + msk_global_state->run_threads = 0; + if (pthread_mutex_init(&msk_global_state->lock, NULL)) + ERROR_LOG("pthread_mutex_init failed?!"); + +#ifdef HAVE_RDMA_LIB_RESET + /* Reset librdmacm (Mellanox MOFED interface) */ + rdma_lib_reset(); +#endif +} + /* forward declarations */ static void *msk_cq_thread(void *arg); From 3b1837fb61ef2406062ea02413ccd7947b4c5aaa Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Mon, 10 Sep 2018 16:11:30 +0200 Subject: [PATCH 3/4] msk_lib_reset(): close global epoll file descriptors Leak less fd's in child process by closing epoll fd stored in global structure. --- src/trans_rdma.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/trans_rdma.c b/src/trans_rdma.c index b4bf684..1d9e328 100644 --- a/src/trans_rdma.c +++ b/src/trans_rdma.c @@ -181,6 +181,16 @@ void __attribute__ ((destructor)) msk_internals_fini(void) { */ void msk_lib_reset(void) { + /* Close epoll fd's */ + if (msk_global_state->cm_epollfd != 0) + close(msk_global_state->cm_epollfd); + + if (msk_global_state->cq_epollfd != 0) + close(msk_global_state->cq_epollfd); + + if (msk_global_state->stats_epollfd != 0) + close(msk_global_state->stats_epollfd); + /* Brutal re-initialization of global state */ memset(msk_global_state, 0, sizeof(*msk_global_state)); From 6f16397d8c028bf33b3ca57ef50d305981c12466 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Mon, 10 Sep 2018 16:36:54 +0200 Subject: [PATCH 4/4] msk_lib_reset(): free worker pool resources eventfd's + some memory --- src/trans_rdma.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/trans_rdma.c b/src/trans_rdma.c index 1d9e328..531cf51 100644 --- a/src/trans_rdma.c +++ b/src/trans_rdma.c @@ -191,6 +191,19 @@ void msk_lib_reset(void) if (msk_global_state->stats_epollfd != 0) close(msk_global_state->stats_epollfd); + /* Free worker pool resources */ + if (msk_global_state->worker_pool.w_efd != 0) + close(msk_global_state->worker_pool.w_efd); + + if (msk_global_state->worker_pool.m_efd != 0) + close(msk_global_state->worker_pool.m_efd); + + if (msk_global_state->worker_pool.thrids) + free(msk_global_state->worker_pool.thrids); + + if (msk_global_state->worker_pool.wd_queue) + free(msk_global_state->worker_pool.wd_queue); + /* Brutal re-initialization of global state */ memset(msk_global_state, 0, sizeof(*msk_global_state));