Skip to content

Commit a25dd5f

Browse files
authored
Merge pull request #12354 from edgargabriel/topic/smcuda-delayed-sae-init
btl/smcuda: add delayed stream initialization
2 parents ecd1f42 + 835eef5 commit a25dd5f

File tree

3 files changed

+74
-25
lines changed

3 files changed

+74
-25
lines changed

opal/mca/btl/smcuda/btl_smcuda.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* Copyright (c) 2010-2015 Los Alamos National Security, LLC.
1616
* All rights reserved.
1717
* Copyright (c) 2012-2023 NVIDIA Corporation. All rights reserved.
18+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -204,6 +205,9 @@ struct mca_btl_smcuda_component_t {
204205
int use_cuda_ipc;
205206
int use_cuda_ipc_same_gpu;
206207

208+
int accelerator_delayed_ipc_init;
209+
int accelerator_max_ipc_events;
210+
207211
unsigned long mpool_min_size;
208212
char *allocator;
209213
};

opal/mca/btl/smcuda/btl_smcuda_accelerator.c

Lines changed: 53 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* Copyright (c) 2022 IBM Corporation. All rights reserved.
44
* Copyright (c) 2023 Triad National Security, LLC. All rights
55
* reserved.
6+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights reserved.
67
* $COPYRIGHT$
78
*
89
* Additional copyrights may follow
@@ -33,62 +34,80 @@ static int accelerator_event_ipc_first_used;
3334
static volatile int accelerator_event_ipc_num_used;
3435

3536
/* Size of array holding events */
36-
static int accelerator_event_max = 400;
3737
static int accelerator_event_ipc_most = 0;
3838
static bool smcuda_accelerator_initialized = false;
3939

4040
void mca_btl_smcuda_accelerator_fini(void);
4141

42-
int mca_btl_smcuda_accelerator_init(void)
42+
/* Initialize the internal ipc stream and the events (s&e) */
43+
static int mca_btl_smcuda_accelerator_ipc_init(void)
4344
{
4445
int rc = OPAL_SUCCESS;
4546
int i;
46-
OBJ_CONSTRUCT(&btl_smcuda_accelerator_ipc_lock, opal_mutex_t);
47-
/* The first available status index is 0. Make an empty frag
48-
array. */
47+
int device_id;
48+
49+
rc = opal_accelerator.get_device(&device_id);
50+
if (OPAL_SUCCESS != rc) {
51+
opal_output_verbose(1, mca_btl_smcuda_component.cuda_ipc_output, "Failed to retrieve current device.");
52+
return OPAL_ERROR;
53+
}
4954

50-
rc = opal_accelerator.create_stream(MCA_ACCELERATOR_NO_DEVICE_ID, &ipc_stream);
55+
rc = opal_accelerator.create_stream(device_id, &ipc_stream);
5156
if (OPAL_SUCCESS != rc) {
5257
opal_output_verbose(1, mca_btl_smcuda_component.cuda_ipc_output, "Failed to create accelerator ipc_stream stream.");
53-
goto cleanup_and_error;
58+
return OPAL_ERROR;
5459
}
5560

61+
/* Create the events since they can be reused. */
62+
for (i = 0; i < mca_btl_smcuda_component.accelerator_max_ipc_events; i++) {
63+
rc = opal_accelerator.create_event(device_id, &accelerator_event_ipc_array[i], opal_accelerator_use_sync_memops ? false : true);
64+
if (OPAL_SUCCESS != rc) {
65+
opal_output_verbose(1, mca_btl_smcuda_component.cuda_ipc_output, "Accelerator create event failed.");
66+
return OPAL_ERROR;
67+
}
68+
}
69+
70+
return OPAL_SUCCESS;
71+
}
72+
73+
int mca_btl_smcuda_accelerator_init(void)
74+
{
75+
int rc = OPAL_SUCCESS;
76+
int i;
77+
78+
OBJ_CONSTRUCT(&btl_smcuda_accelerator_ipc_lock, opal_mutex_t);
79+
5680
accelerator_event_ipc_num_used = 0;
5781
accelerator_event_ipc_first_avail = 0;
5882
accelerator_event_ipc_first_used = 0;
5983

60-
accelerator_event_ipc_array = calloc(accelerator_event_max, sizeof(opal_accelerator_event_t *));
84+
accelerator_event_ipc_array = calloc(mca_btl_smcuda_component.accelerator_max_ipc_events, sizeof(opal_accelerator_event_t *));
6185
if (NULL == accelerator_event_ipc_array) {
6286
opal_output_verbose(1, mca_btl_smcuda_component.cuda_ipc_output, "No memory.");
6387
rc = OPAL_ERROR;
6488
goto cleanup_and_error;
6589
}
66-
/* Create the events since they can be reused. */
67-
for (i = 0; i < accelerator_event_max; i++) {
68-
rc = opal_accelerator.create_event(MCA_ACCELERATOR_NO_DEVICE_ID, &accelerator_event_ipc_array[i], opal_accelerator_use_sync_memops ? false : true);
69-
if (OPAL_SUCCESS != rc) {
70-
opal_output_verbose(1, mca_btl_smcuda_component.cuda_ipc_output, "Accelerator create event failed.");
71-
rc = OPAL_ERROR;
72-
goto cleanup_and_error;
73-
}
74-
}
7590

7691
/* The first available status index is 0. Make an empty frag
7792
array. */
78-
79-
accelerator_event_ipc_frag_array = (struct mca_btl_base_descriptor_t **) malloc(sizeof(struct mca_btl_base_descriptor_t *) * accelerator_event_max);
93+
accelerator_event_ipc_frag_array = (struct mca_btl_base_descriptor_t **) malloc(sizeof(struct mca_btl_base_descriptor_t *) *
94+
mca_btl_smcuda_component.accelerator_max_ipc_events);
8095
if (NULL == accelerator_event_ipc_frag_array) {
8196
opal_output_verbose(1, mca_btl_smcuda_component.cuda_ipc_output, "No memory.");
8297
rc = OPAL_ERROR;
8398
goto cleanup_and_error;
8499
}
85100

101+
if (!mca_btl_smcuda_component.accelerator_delayed_ipc_init) {
102+
mca_btl_smcuda_accelerator_ipc_init();
103+
}
104+
86105
smcuda_accelerator_initialized = true;
87106

88107
cleanup_and_error:
89108
if (OPAL_SUCCESS != rc) {
90109
if (NULL != accelerator_event_ipc_array) {
91-
for (i = 0; i < accelerator_event_max; i++) {
110+
for (i = 0; i < mca_btl_smcuda_component.accelerator_max_ipc_events; i++) {
92111
if (NULL != accelerator_event_ipc_array[i]) {
93112
OBJ_RELEASE(accelerator_event_ipc_array[i]);
94113
}
@@ -117,7 +136,7 @@ void mca_btl_smcuda_accelerator_fini(void)
117136
}
118137

119138
if (NULL != accelerator_event_ipc_array) {
120-
for (i = 0; i < accelerator_event_max; i++) {
139+
for (i = 0; i < mca_btl_smcuda_component.accelerator_max_ipc_events; i++) {
121140
if (NULL != accelerator_event_ipc_array[i]) {
122141
OBJ_RELEASE(accelerator_event_ipc_array[i]);
123142
}
@@ -129,7 +148,9 @@ void mca_btl_smcuda_accelerator_fini(void)
129148
free(accelerator_event_ipc_frag_array);
130149
}
131150

132-
OBJ_RELEASE(ipc_stream);
151+
if (NULL != ipc_stream) {
152+
OBJ_RELEASE(ipc_stream);
153+
}
133154

134155
OBJ_DESTRUCT(&btl_smcuda_accelerator_ipc_lock);
135156
smcuda_accelerator_initialized = false;
@@ -175,7 +196,7 @@ int mca_btl_smcuda_progress_one_ipc_event(struct mca_btl_base_descriptor_t **fra
175196
/* Bump counters, loop around the circular buffer if necessary */
176197
--accelerator_event_ipc_num_used;
177198
++accelerator_event_ipc_first_used;
178-
if (accelerator_event_ipc_first_used >= accelerator_event_max) {
199+
if (accelerator_event_ipc_first_used >= mca_btl_smcuda_component.accelerator_max_ipc_events) {
179200
accelerator_event_ipc_first_used = 0;
180201
}
181202
/* A return value of 1 indicates an event completed and a frag was returned */
@@ -196,10 +217,17 @@ int mca_btl_smcuda_memcpy(void *dst, void *src, size_t amount, char *msg,
196217
int result;
197218
OPAL_THREAD_LOCK(&btl_smcuda_accelerator_ipc_lock);
198219

220+
if (NULL == ipc_stream) {
221+
result = mca_btl_smcuda_accelerator_ipc_init();
222+
if (OPAL_SUCCESS != result) {
223+
return result;
224+
}
225+
}
226+
199227
/* First make sure there is room to store the event. If not, then
200228
* return an error. The error message will tell the user to try and
201229
* run again, but with a larger array for storing events. */
202-
if (accelerator_event_ipc_num_used == accelerator_event_max) {
230+
if (accelerator_event_ipc_num_used == mca_btl_smcuda_component.accelerator_max_ipc_events) {
203231
opal_output_verbose(1, mca_btl_smcuda_component.cuda_ipc_output, "smcuda: Out of event handles");
204232
OPAL_THREAD_UNLOCK(&btl_smcuda_accelerator_ipc_lock);
205233
return OPAL_ERR_OUT_OF_RESOURCE;
@@ -237,7 +265,7 @@ int mca_btl_smcuda_memcpy(void *dst, void *src, size_t amount, char *msg,
237265

238266
/* Bump up the first available slot and number used by 1 */
239267
accelerator_event_ipc_first_avail++;
240-
if (accelerator_event_ipc_first_avail >= accelerator_event_max) {
268+
if (accelerator_event_ipc_first_avail >= mca_btl_smcuda_component.accelerator_max_ipc_events) {
241269
accelerator_event_ipc_first_avail = 0;
242270
}
243271
accelerator_event_ipc_num_used++;

opal/mca/btl/smcuda/btl_smcuda_component.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
* Copyright (c) 2022 IBM Corporation. All rights reserved.
2121
* Copyright (c) 2023 Triad National Security, LLC. All rights
2222
* reserved.
23+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights reserved.
2324
* $COPYRIGHT$
2425
*
2526
* Additional copyrights may follow
@@ -169,6 +170,22 @@ static int smcuda_register(void)
169170
mca_btl_smcuda_param_register_uint("fifo_lazy_free", 120, OPAL_INFO_LVL_5,
170171
&mca_btl_smcuda_component.fifo_lazy_free);
171172

173+
/* Delay the creation of the IPC stream and events. This has the advantage of also
174+
* working in scenarios where the user did not set the accelerator device
175+
* before MPI_Init AND the stream/event has internally some reference to the device
176+
* used at that time */
177+
mca_btl_smcuda_component.accelerator_delayed_ipc_init = 1;
178+
(void) mca_base_component_var_register(&mca_btl_smcuda_component.super.btl_version, "delayed_stream_init",
179+
"Delay the initialization of the ipc stream and internal events",
180+
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5,
181+
MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_smcuda_component.accelerator_delayed_ipc_init);
182+
183+
mca_btl_smcuda_component.accelerator_max_ipc_events = 400;
184+
(void) mca_base_component_var_register(&mca_btl_smcuda_component.super.btl_version, "max_ipc_events",
185+
"Number of events created by the smcuda components internally",
186+
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5,
187+
MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_smcuda_component.accelerator_max_ipc_events);
188+
172189
/* default number of extra procs to allow for future growth */
173190
mca_btl_smcuda_param_register_int("sm_extra_procs", 0, OPAL_INFO_LVL_9,
174191
&mca_btl_smcuda_component.sm_extra_procs);

0 commit comments

Comments
 (0)