Skip to content

Commit a16908d

Browse files
authored
Merge pull request #13319 from bwbarrett/backports/v5.0-rdma-when-mtl-fix
osc: Fix rdma component when not using ob1
2 parents 66fbfa5 + ba42e9d commit a16908d

File tree

5 files changed

+41
-6
lines changed

5 files changed

+41
-6
lines changed

ompi/instance/instance.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,10 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
535535
return ompi_instance_print_error ("mca_pml_base_select() failed", ret);
536536
}
537537

538+
if (OMPI_SUCCESS != (ret = ompi_osc_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) {
539+
return ompi_instance_print_error ("ompi_osc_base_find_available() failed", ret);
540+
}
541+
538542
OMPI_TIMING_IMPORT_OPAL("orte_init");
539543
OMPI_TIMING_NEXT("rte_init-commit");
540544

@@ -616,10 +620,6 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
616620
return ompi_instance_print_error ("mca_coll_base_find_available() failed", ret);
617621
}
618622

619-
if (OMPI_SUCCESS != (ret = ompi_osc_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) {
620-
return ompi_instance_print_error ("ompi_osc_base_find_available() failed", ret);
621-
}
622-
623623
/* io and topo components are not selected here -- see comment
624624
above about the io and topo frameworks being loaded lazily */
625625

@@ -653,7 +653,8 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
653653
return ompi_instance_print_error ("ompi_attr_create_predefined_keyvals() failed", ret);
654654
}
655655

656-
if (mca_pml_base_requires_world ()) {
656+
if (mca_pml_base_requires_world() ||
657+
mca_osc_base_requires_world()) {
657658
/* need to set up comm world for this instance -- XXX -- FIXME -- probably won't always
658659
* be the case. */
659660
if (OMPI_SUCCESS != (ret = ompi_comm_init_mpi3 ())) {
@@ -702,7 +703,8 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
702703
/* some btls/mtls require we call add_procs with all procs in the job.
703704
* since the btls/mtls have no visibility here it is up to the pml to
704705
* convey this requirement */
705-
if (mca_pml_base_requires_world ()) {
706+
if (mca_pml_base_requires_world() ||
707+
mca_osc_base_requires_world()) {
706708
if (NULL == (procs = ompi_proc_world (&nprocs))) {
707709
return ompi_instance_print_error ("ompi_proc_get_allocated () failed", ret);
708710
}

ompi/mca/osc/base/osc_base_init.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
#include "ompi/communicator/communicator.h"
3131
#include "ompi/win/win.h"
3232

33+
bool ompi_osc_base_requires_world = false;
34+
3335
int
3436
ompi_osc_base_select(ompi_win_t *win,
3537
void **base,

ompi/mca/osc/osc.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ struct ompi_datatype_t;
5353
struct ompi_op_t;
5454
struct ompi_request_t;
5555

56+
57+
extern bool ompi_osc_base_requires_world;
58+
5659
/* ******************************************************************** */
5760

5861

@@ -419,6 +422,11 @@ typedef ompi_osc_base_module_3_0_0_t ompi_osc_base_module_t;
419422

420423
/* ******************************************************************** */
421424

425+
static inline bool mca_osc_base_requires_world (void)
426+
{
427+
return ompi_osc_base_requires_world;
428+
}
429+
422430

423431
END_C_DECLS
424432

ompi/mca/osc/portals4/osc_portals4_component.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,8 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads)
348348
return ret;
349349
}
350350

351+
ompi_osc_base_requires_world = true;
352+
351353
return OMPI_SUCCESS;
352354
}
353355

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,27 @@ static int ompi_osc_rdma_component_init (bool enable_progress_threads,
344344
__FILE__, __LINE__, ret);
345345
}
346346

347+
ret = mca_bml_base_init(enable_progress_threads, enable_mpi_threads);
348+
if (OPAL_SUCCESS != ret) {
349+
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
350+
"%s:%d: bml_base_init() failed: %d",
351+
__FILE__, __LINE__, ret);
352+
return ret;
353+
}
354+
355+
/* check if any btls do not support dynamic add_procs */
356+
mca_btl_base_selected_module_t* selected_btl;
357+
OPAL_LIST_FOREACH(selected_btl, &mca_btl_base_modules_initialized,
358+
mca_btl_base_selected_module_t) {
359+
mca_btl_base_module_t *btl = selected_btl->btl_module;
360+
361+
if (btl->btl_flags & MCA_BTL_FLAGS_SINGLE_ADD_PROCS) {
362+
ompi_osc_base_requires_world = true;
363+
break;
364+
}
365+
366+
}
367+
347368
return ret;
348369
}
349370

0 commit comments

Comments
 (0)