Skip to content

Commit 3d1ef9c

Browse files
committed
op/cuda: Lazily initialize the CUDA information
Signed-off-by: Joseph Schuchart <[email protected]>
1 parent 53336c3 commit 3d1ef9c

File tree

3 files changed

+35
-27
lines changed

3 files changed

+35
-27
lines changed

ompi/mca/op/cuda/op_cuda.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ ompi_op_base_stream_handler_fn_t ompi_op_cuda_functions[OMPI_OP_BASE_FORTRAN_OP_
7575
extern
7676
ompi_op_base_3buff_stream_handler_fn_t ompi_op_cuda_3buff_functions[OMPI_OP_BASE_FORTRAN_OP_MAX][OMPI_OP_BASE_TYPE_MAX];
7777

78+
void ompi_op_cuda_lazy_init();
79+
7880
END_C_DECLS
7981

8082
#endif /* MCA_OP_CUDA_EXPORT_H */

ompi/mca/op/cuda/op_cuda_component.c

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,36 @@ cuda_component_register(void)
127127
static int
128128
cuda_component_init_query(bool enable_progress_threads,
129129
bool enable_mpi_thread_multiple)
130+
{
131+
return OMPI_SUCCESS;
132+
}
133+
134+
/*
135+
* Query whether this component can be used for a specific op
136+
*/
137+
static struct ompi_op_base_module_1_0_0_t*
138+
cuda_component_op_query(struct ompi_op_t *op, int *priority)
139+
{
140+
ompi_op_base_module_t *module = NULL;
141+
142+
module = OBJ_NEW(ompi_op_base_module_t);
143+
module->opm_device_enabled = true;
144+
for (int i = 0; i < OMPI_OP_BASE_TYPE_MAX; ++i) {
145+
module->opm_stream_fns[i] = ompi_op_cuda_functions[op->o_f_to_c_index][i];
146+
module->opm_3buff_stream_fns[i] = ompi_op_cuda_3buff_functions[op->o_f_to_c_index][i];
147+
148+
if( NULL != module->opm_fns[i] ) {
149+
OBJ_RETAIN(module);
150+
}
151+
if( NULL != module->opm_3buff_fns[i] ) {
152+
OBJ_RETAIN(module);
153+
}
154+
}
155+
*priority = 50;
156+
return (ompi_op_base_module_1_0_0_t *) module;
157+
}
158+
159+
void ompi_op_cuda_lazy_init()
130160
{
131161
int num_devices;
132162
int rc;
@@ -166,30 +196,4 @@ cuda_component_init_query(bool enable_progress_threads,
166196
}
167197
}
168198

169-
return OMPI_SUCCESS;
170-
}
171-
172-
/*
173-
* Query whether this component can be used for a specific op
174-
*/
175-
static struct ompi_op_base_module_1_0_0_t*
176-
cuda_component_op_query(struct ompi_op_t *op, int *priority)
177-
{
178-
ompi_op_base_module_t *module = NULL;
179-
180-
module = OBJ_NEW(ompi_op_base_module_t);
181-
module->opm_device_enabled = true;
182-
for (int i = 0; i < OMPI_OP_BASE_TYPE_MAX; ++i) {
183-
module->opm_stream_fns[i] = ompi_op_cuda_functions[op->o_f_to_c_index][i];
184-
module->opm_3buff_stream_fns[i] = ompi_op_cuda_3buff_functions[op->o_f_to_c_index][i];
185-
186-
if( NULL != module->opm_fns[i] ) {
187-
OBJ_RETAIN(module);
188-
}
189-
if( NULL != module->opm_3buff_fns[i] ) {
190-
OBJ_RETAIN(module);
191-
}
192-
}
193-
*priority = 50;
194-
return (ompi_op_base_module_1_0_0_t *) module;
195-
}
199+
}

ompi/mca/op/cuda/op_cuda_functions.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ static inline void device_op_pre(const void *orig_source1,
5555
uint64_t target_flags = -1, source1_flags = -1, source2_flags = -1;
5656
int target_rc, source1_rc, source2_rc = -1;
5757

58+
ompi_op_cuda_lazy_init();
59+
5860
*target = orig_target;
5961
*source1 = (void*)orig_source1;
6062
if (NULL != orig_source2) {

0 commit comments

Comments
 (0)