Skip to content

Commit

Permalink
op/cuda: Lazily initialize the CUDA information
Browse files Browse the repository at this point in the history
Signed-off-by: Joseph Schuchart <[email protected]>
  • Loading branch information
devreal committed Jun 19, 2024
1 parent 53336c3 commit 3d1ef9c
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 27 deletions.
2 changes: 2 additions & 0 deletions ompi/mca/op/cuda/op_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ ompi_op_base_stream_handler_fn_t ompi_op_cuda_functions[OMPI_OP_BASE_FORTRAN_OP_
extern
ompi_op_base_3buff_stream_handler_fn_t ompi_op_cuda_3buff_functions[OMPI_OP_BASE_FORTRAN_OP_MAX][OMPI_OP_BASE_TYPE_MAX];

void ompi_op_cuda_lazy_init();

END_C_DECLS

#endif /* MCA_OP_CUDA_EXPORT_H */
58 changes: 31 additions & 27 deletions ompi/mca/op/cuda/op_cuda_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,36 @@ cuda_component_register(void)
static int
cuda_component_init_query(bool enable_progress_threads,
bool enable_mpi_thread_multiple)
{
return OMPI_SUCCESS;
}

/*
* Query whether this component can be used for a specific op
*/
static struct ompi_op_base_module_1_0_0_t*
cuda_component_op_query(struct ompi_op_t *op, int *priority)
{
ompi_op_base_module_t *module = NULL;

module = OBJ_NEW(ompi_op_base_module_t);
module->opm_device_enabled = true;
for (int i = 0; i < OMPI_OP_BASE_TYPE_MAX; ++i) {
module->opm_stream_fns[i] = ompi_op_cuda_functions[op->o_f_to_c_index][i];
module->opm_3buff_stream_fns[i] = ompi_op_cuda_3buff_functions[op->o_f_to_c_index][i];

if( NULL != module->opm_fns[i] ) {
OBJ_RETAIN(module);
}
if( NULL != module->opm_3buff_fns[i] ) {
OBJ_RETAIN(module);
}
}
*priority = 50;
return (ompi_op_base_module_1_0_0_t *) module;
}

void ompi_op_cuda_lazy_init()
{
int num_devices;
int rc;
Expand Down Expand Up @@ -166,30 +196,4 @@ cuda_component_init_query(bool enable_progress_threads,
}
}

return OMPI_SUCCESS;
}

/*
* Query whether this component can be used for a specific op
*/
static struct ompi_op_base_module_1_0_0_t*
cuda_component_op_query(struct ompi_op_t *op, int *priority)
{
ompi_op_base_module_t *module = NULL;

module = OBJ_NEW(ompi_op_base_module_t);
module->opm_device_enabled = true;
for (int i = 0; i < OMPI_OP_BASE_TYPE_MAX; ++i) {
module->opm_stream_fns[i] = ompi_op_cuda_functions[op->o_f_to_c_index][i];
module->opm_3buff_stream_fns[i] = ompi_op_cuda_3buff_functions[op->o_f_to_c_index][i];

if( NULL != module->opm_fns[i] ) {
OBJ_RETAIN(module);
}
if( NULL != module->opm_3buff_fns[i] ) {
OBJ_RETAIN(module);
}
}
*priority = 50;
return (ompi_op_base_module_1_0_0_t *) module;
}
}
2 changes: 2 additions & 0 deletions ompi/mca/op/cuda/op_cuda_functions.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ static inline void device_op_pre(const void *orig_source1,
uint64_t target_flags = -1, source1_flags = -1, source2_flags = -1;
int target_rc, source1_rc, source2_rc = -1;

ompi_op_cuda_lazy_init();

*target = orig_target;
*source1 = (void*)orig_source1;
if (NULL != orig_source2) {
Expand Down

0 comments on commit 3d1ef9c

Please sign in to comment.