3
3
* Copyright (c) 2022 IBM Corporation. All rights reserved.
4
4
* Copyright (c) 2023 Triad National Security, LLC. All rights
5
5
* reserved.
6
+ * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights reserved.
6
7
* $COPYRIGHT$
7
8
*
8
9
* Additional copyrights may follow
@@ -33,62 +34,80 @@ static int accelerator_event_ipc_first_used;
33
34
static volatile int accelerator_event_ipc_num_used ;
34
35
35
36
/* Size of array holding events */
36
- static int accelerator_event_max = 400 ;
37
37
static int accelerator_event_ipc_most = 0 ;
38
38
static bool smcuda_accelerator_initialized = false;
39
39
40
40
void mca_btl_smcuda_accelerator_fini (void );
41
41
42
- int mca_btl_smcuda_accelerator_init (void )
42
+ /* Initialize the internal ipc stream and the events (s&e) */
43
+ static int mca_btl_smcuda_accelerator_ipc_init (void )
43
44
{
44
45
int rc = OPAL_SUCCESS ;
45
46
int i ;
46
- OBJ_CONSTRUCT (& btl_smcuda_accelerator_ipc_lock , opal_mutex_t );
47
- /* The first available status index is 0. Make an empty frag
48
- array. */
47
+ int device_id ;
48
+
49
+ rc = opal_accelerator .get_device (& device_id );
50
+ if (OPAL_SUCCESS != rc ) {
51
+ opal_output_verbose (1 , mca_btl_smcuda_component .cuda_ipc_output , "Failed to retrieve current device." );
52
+ return OPAL_ERROR ;
53
+ }
49
54
50
- rc = opal_accelerator .create_stream (MCA_ACCELERATOR_NO_DEVICE_ID , & ipc_stream );
55
+ rc = opal_accelerator .create_stream (device_id , & ipc_stream );
51
56
if (OPAL_SUCCESS != rc ) {
52
57
opal_output_verbose (1 , mca_btl_smcuda_component .cuda_ipc_output , "Failed to create accelerator ipc_stream stream." );
53
- goto cleanup_and_error ;
58
+ return OPAL_ERROR ;
54
59
}
55
60
61
+ /* Create the events since they can be reused. */
62
+ for (i = 0 ; i < mca_btl_smcuda_component .accelerator_max_ipc_events ; i ++ ) {
63
+ rc = opal_accelerator .create_event (device_id , & accelerator_event_ipc_array [i ], opal_accelerator_use_sync_memops ? false : true);
64
+ if (OPAL_SUCCESS != rc ) {
65
+ opal_output_verbose (1 , mca_btl_smcuda_component .cuda_ipc_output , "Accelerator create event failed." );
66
+ return OPAL_ERROR ;
67
+ }
68
+ }
69
+
70
+ return OPAL_SUCCESS ;
71
+ }
72
+
73
+ int mca_btl_smcuda_accelerator_init (void )
74
+ {
75
+ int rc = OPAL_SUCCESS ;
76
+ int i ;
77
+
78
+ OBJ_CONSTRUCT (& btl_smcuda_accelerator_ipc_lock , opal_mutex_t );
79
+
56
80
accelerator_event_ipc_num_used = 0 ;
57
81
accelerator_event_ipc_first_avail = 0 ;
58
82
accelerator_event_ipc_first_used = 0 ;
59
83
60
- accelerator_event_ipc_array = calloc (accelerator_event_max , sizeof (opal_accelerator_event_t * ));
84
+ accelerator_event_ipc_array = calloc (mca_btl_smcuda_component . accelerator_max_ipc_events , sizeof (opal_accelerator_event_t * ));
61
85
if (NULL == accelerator_event_ipc_array ) {
62
86
opal_output_verbose (1 , mca_btl_smcuda_component .cuda_ipc_output , "No memory." );
63
87
rc = OPAL_ERROR ;
64
88
goto cleanup_and_error ;
65
89
}
66
- /* Create the events since they can be reused. */
67
- for (i = 0 ; i < accelerator_event_max ; i ++ ) {
68
- rc = opal_accelerator .create_event (MCA_ACCELERATOR_NO_DEVICE_ID , & accelerator_event_ipc_array [i ], opal_accelerator_use_sync_memops ? false : true);
69
- if (OPAL_SUCCESS != rc ) {
70
- opal_output_verbose (1 , mca_btl_smcuda_component .cuda_ipc_output , "Accelerator create event failed." );
71
- rc = OPAL_ERROR ;
72
- goto cleanup_and_error ;
73
- }
74
- }
75
90
76
91
/* The first available status index is 0. Make an empty frag
77
92
array. */
78
-
79
- accelerator_event_ipc_frag_array = ( struct mca_btl_base_descriptor_t * * ) malloc ( sizeof ( struct mca_btl_base_descriptor_t * ) * accelerator_event_max );
93
+ accelerator_event_ipc_frag_array = ( struct mca_btl_base_descriptor_t * * ) malloc ( sizeof ( struct mca_btl_base_descriptor_t * ) *
94
+ mca_btl_smcuda_component . accelerator_max_ipc_events );
80
95
if (NULL == accelerator_event_ipc_frag_array ) {
81
96
opal_output_verbose (1 , mca_btl_smcuda_component .cuda_ipc_output , "No memory." );
82
97
rc = OPAL_ERROR ;
83
98
goto cleanup_and_error ;
84
99
}
85
100
101
+ if (!mca_btl_smcuda_component .accelerator_delayed_ipc_init ) {
102
+ mca_btl_smcuda_accelerator_ipc_init ();
103
+ }
104
+
86
105
smcuda_accelerator_initialized = true;
87
106
88
107
cleanup_and_error :
89
108
if (OPAL_SUCCESS != rc ) {
90
109
if (NULL != accelerator_event_ipc_array ) {
91
- for (i = 0 ; i < accelerator_event_max ; i ++ ) {
110
+ for (i = 0 ; i < mca_btl_smcuda_component . accelerator_max_ipc_events ; i ++ ) {
92
111
if (NULL != accelerator_event_ipc_array [i ]) {
93
112
OBJ_RELEASE (accelerator_event_ipc_array [i ]);
94
113
}
@@ -117,7 +136,7 @@ void mca_btl_smcuda_accelerator_fini(void)
117
136
}
118
137
119
138
if (NULL != accelerator_event_ipc_array ) {
120
- for (i = 0 ; i < accelerator_event_max ; i ++ ) {
139
+ for (i = 0 ; i < mca_btl_smcuda_component . accelerator_max_ipc_events ; i ++ ) {
121
140
if (NULL != accelerator_event_ipc_array [i ]) {
122
141
OBJ_RELEASE (accelerator_event_ipc_array [i ]);
123
142
}
@@ -129,7 +148,9 @@ void mca_btl_smcuda_accelerator_fini(void)
129
148
free (accelerator_event_ipc_frag_array );
130
149
}
131
150
132
- OBJ_RELEASE (ipc_stream );
151
+ if (NULL != ipc_stream ) {
152
+ OBJ_RELEASE (ipc_stream );
153
+ }
133
154
134
155
OBJ_DESTRUCT (& btl_smcuda_accelerator_ipc_lock );
135
156
smcuda_accelerator_initialized = false;
@@ -175,7 +196,7 @@ int mca_btl_smcuda_progress_one_ipc_event(struct mca_btl_base_descriptor_t **fra
175
196
/* Bump counters, loop around the circular buffer if necessary */
176
197
-- accelerator_event_ipc_num_used ;
177
198
++ accelerator_event_ipc_first_used ;
178
- if (accelerator_event_ipc_first_used >= accelerator_event_max ) {
199
+ if (accelerator_event_ipc_first_used >= mca_btl_smcuda_component . accelerator_max_ipc_events ) {
179
200
accelerator_event_ipc_first_used = 0 ;
180
201
}
181
202
/* A return value of 1 indicates an event completed and a frag was returned */
@@ -196,10 +217,17 @@ int mca_btl_smcuda_memcpy(void *dst, void *src, size_t amount, char *msg,
196
217
int result ;
197
218
OPAL_THREAD_LOCK (& btl_smcuda_accelerator_ipc_lock );
198
219
220
+ if (NULL == ipc_stream ) {
221
+ result = mca_btl_smcuda_accelerator_ipc_init ();
222
+ if (OPAL_SUCCESS != result ) {
223
+ return result ;
224
+ }
225
+ }
226
+
199
227
/* First make sure there is room to store the event. If not, then
200
228
* return an error. The error message will tell the user to try and
201
229
* run again, but with a larger array for storing events. */
202
- if (accelerator_event_ipc_num_used == accelerator_event_max ) {
230
+ if (accelerator_event_ipc_num_used == mca_btl_smcuda_component . accelerator_max_ipc_events ) {
203
231
opal_output_verbose (1 , mca_btl_smcuda_component .cuda_ipc_output , "smcuda: Out of event handles" );
204
232
OPAL_THREAD_UNLOCK (& btl_smcuda_accelerator_ipc_lock );
205
233
return OPAL_ERR_OUT_OF_RESOURCE ;
@@ -237,7 +265,7 @@ int mca_btl_smcuda_memcpy(void *dst, void *src, size_t amount, char *msg,
237
265
238
266
/* Bump up the first available slot and number used by 1 */
239
267
accelerator_event_ipc_first_avail ++ ;
240
- if (accelerator_event_ipc_first_avail >= accelerator_event_max ) {
268
+ if (accelerator_event_ipc_first_avail >= mca_btl_smcuda_component . accelerator_max_ipc_events ) {
241
269
accelerator_event_ipc_first_avail = 0 ;
242
270
}
243
271
accelerator_event_ipc_num_used ++ ;
0 commit comments