Skip to content

Commit e58222b

Browse files
committed
accelerator/rocm: some minor fixes
- memset the IPC handles to zero before calling hipIpcGetMemHandle (and the event counter part) - get_buffer_id: fix the buffer_id argument passed to the hip call. - correctly set device_id in check_addr() Signed-off-by: Edgar Gabriel <[email protected]>
1 parent 4c972c1 commit e58222b

File tree

1 file changed

+11
-7
lines changed

1 file changed

+11
-7
lines changed

opal/mca/accelerator/rocm/accelerator_rocm_module.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -117,20 +117,17 @@ static int mca_accelerator_rocm_check_addr (const void *addr, int *dev_id, uint6
117117
#else
118118
if (hipMemoryTypeDevice == srcAttr.memoryType) {
119119
#endif
120-
//We might want to set additional flags in a later iteration.
121-
//*flags |= MCA_ACCELERATOR_FLAGS_HOST_LDSTR;
122-
//*flags |= MCA_ACCELERATOR_FLAGS_HOST_ATOMICS;
123-
/* First access on a device pointer triggers ROCM support lazy initialization. */
124120
opal_accelerator_rocm_lazy_init();
121+
*dev_id = srcAttr.device;
125122
ret = 1;
126123
#if HIP_VERSION >= 50731921
127124
} else if (hipMemoryTypeUnified == srcAttr.type) {
128125
#else
129126
} else if (hipMemoryTypeUnified == srcAttr.memoryType) {
130127
#endif
131128
*flags |= MCA_ACCELERATOR_FLAGS_UNIFIED_MEMORY;
132-
//*flags |= MCA_ACCELERATOR_FLAGS_HOST_LDSTR;
133-
//*flags |= MCA_ACCELERATOR_FLAGS_HOST_ATOMICS;
129+
opal_accelerator_rocm_lazy_init();
130+
*dev_id = srcAttr.device;
134131
ret = 1;
135132
}
136133
}
@@ -527,6 +524,7 @@ static int mca_accelerator_rocm_get_ipc_handle(int dev_id, void *dev_ptr,
527524
OBJ_CONSTRUCT(rocm_handle, opal_accelerator_rocm_ipc_handle_t);
528525
rocm_handle->base.dev_ptr = NULL;
529526

527+
memset(rocm_ipc_handle.reserved, 0, HIP_IPC_HANDLE_SIZE);
530528
hipError_t err = hipIpcGetMemHandle(&rocm_ipc_handle,
531529
(hipDeviceptr_t)dev_ptr);
532530
if (hipSuccess != err) {
@@ -596,6 +594,7 @@ static int mca_accelerator_rocm_get_ipc_event_handle(opal_accelerator_event_t *e
596594
opal_accelerator_rocm_ipc_event_handle_t *rocm_handle = (opal_accelerator_rocm_ipc_event_handle_t *) handle;
597595
OBJ_CONSTRUCT(rocm_handle, opal_accelerator_rocm_ipc_event_handle_t);
598596

597+
memset(rocm_ipc_handle.reserved, 0, HIP_IPC_HANDLE_SIZE);
599598
hipError_t err = hipIpcGetEventHandle(&rocm_ipc_handle,
600599
*((hipEvent_t *)event->event));
601600
if (hipSuccess != err) {
@@ -738,6 +737,11 @@ static int mca_accelerator_rocm_device_can_access_peer(int *access, int dev1, in
738737
return OPAL_ERR_BAD_PARAM;
739738
}
740739

740+
if (dev1 == dev2) {
741+
*access = 1;
742+
return OPAL_SUCCESS;
743+
}
744+
741745
hipError_t err = hipDeviceCanAccessPeer(access, dev1, dev2);
742746
if (hipSuccess != err) {
743747
opal_output_verbose(10, opal_accelerator_base_framework.framework_output,
@@ -753,7 +757,7 @@ static int mca_accelerator_rocm_get_buffer_id(int dev_id, const void *addr, opal
753757
*buf_id = 0;
754758

755759
#if HIP_VERSION >= 50120531
756-
hipError_t result = hipPointerGetAttribute((unsigned long long *)&buf_id, HIP_POINTER_ATTRIBUTE_BUFFER_ID,
760+
hipError_t result = hipPointerGetAttribute((unsigned long long *)buf_id, HIP_POINTER_ATTRIBUTE_BUFFER_ID,
757761
(hipDeviceptr_t)addr);
758762
if (hipSuccess != result) {
759763
opal_output_verbose(10, opal_accelerator_base_framework.framework_output,

0 commit comments

Comments
 (0)