Skip to content

Commit

Permalink
SHMEM/MCA/SSHMEM/UCX: Fixing DEVICE_NIC_MEM support to use RDMA memor…
Browse files Browse the repository at this point in the history
…y type

Added a fallback for rdma allocation failure - allocating host memory instead

Signed-off-by: Roie Danino <[email protected]>
(cherry picked from commit b192a78)

Conflicts:
	config/ompi_check_ucx.m4
	oshmem/mca/sshmem/ucx/configure.m4
	oshmem/mca/sshmem/ucx/sshmem_ucx_module.c
  • Loading branch information
roiedanino committed Oct 8, 2023
1 parent 547d0e4 commit 61ccb13
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 114 deletions.
3 changes: 2 additions & 1 deletion config/ompi_check_ucx.m4
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ AC_DEFUN([OMPI_CHECK_UCX],[
UCP_ATOMIC_FETCH_OP_FXOR,
UCP_PARAM_FIELD_ESTIMATED_NUM_PPN,
UCP_WORKER_FLAG_IGNORE_REQUEST_LEAK,
UCP_OP_ATTR_FLAG_MULTI_SEND],
UCP_OP_ATTR_FLAG_MULTI_SEND,
UCS_MEMORY_TYPE_RDMA],
[], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS],
Expand Down
27 changes: 2 additions & 25 deletions oshmem/mca/sshmem/ucx/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -27,31 +27,9 @@ AC_DEFUN([MCA_oshmem_sshmem_ucx_CONFIG],[
save_LIBS="$LIBS"
save_CPPFLAGS="$CPPFLAGS"
alloc_dm_LDFLAGS=" -L$ompi_check_ucx_libdir/ucx"
alloc_dm_LIBS=" -luct_ib"
CPPFLAGS+=" $sshmem_ucx_CPPFLAGS"
LDFLAGS+=" $sshmem_ucx_LDFLAGS $alloc_dm_LDFLAGS"
LIBS+=" $sshmem_ucx_LIBS $alloc_dm_LIBS"
AC_LANG_PUSH([C])
AC_LINK_IFELSE([AC_LANG_PROGRAM(
[[
#include <ucp/core/ucp_resource.h>
#include <uct/ib/base/ib_alloc.h>
]],
[[
uct_md_h md = ucp_context_find_tl_md((ucp_context_h)NULL, "");
(void)uct_ib_md_alloc_device_mem(md, NULL, NULL, 0, "", NULL);
uct_ib_md_release_device_mem(NULL);
]])],
[
AC_MSG_NOTICE([UCX device memory allocation is supported])
AC_DEFINE([HAVE_UCX_DEVICE_MEM], [1], [Support for device memory allocation])
sshmem_ucx_LIBS+=" $alloc_dm_LIBS"
sshmem_ucx_LDFLAGS+=" $alloc_dm_LDFLAGS"
],
[AC_MSG_NOTICE([UCX device memory allocation is not supported])])
AC_LANG_POP([C])
LDFLAGS+=" $sshmem_ucx_LDFLAGS"
LIBS+=" $sshmem_ucx_LIBS"
CPPFLAGS="$save_CPPFLAGS"
LDFLAGS="$save_LDFLAGS"
Expand All @@ -63,4 +41,3 @@ AC_DEFUN([MCA_oshmem_sshmem_ucx_CONFIG],[
AC_SUBST([sshmem_ucx_LDFLAGS])
AC_SUBST([sshmem_ucx_LIBS])
])dnl

1 change: 0 additions & 1 deletion oshmem/mca/sshmem/ucx/sshmem_ucx.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ OSHMEM_MODULE_DECLSPEC extern mca_sshmem_ucx_component_t
mca_sshmem_ucx_component;

typedef struct mca_sshmem_ucx_segment_context {
void *dev_mem;
sshmem_ucx_shadow_allocator_t *shadow_allocator;
ucp_mem_h ucp_memh;
} mca_sshmem_ucx_segment_context_t;
Expand Down
116 changes: 29 additions & 87 deletions oshmem/mca/sshmem/ucx/sshmem_ucx_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,6 @@

#include "sshmem_ucx.h"

//#include <ucs/sys/math.h>

#if HAVE_UCX_DEVICE_MEM
#include <ucp/core/ucp_resource.h>
#include <uct/ib/base/ib_alloc.h>
#endif

#define ALLOC_ELEM_SIZE sizeof(uint64_t)
#define min(a,b) ((a) < (b) ? (a) : (b))
#define max(a,b) ((a) > (b) ? (a) : (b))
Expand Down Expand Up @@ -103,7 +96,7 @@ static segment_allocator_t sshmem_ucx_allocator = {

static int
segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,
unsigned flags, long hint, void *dev_mem)
unsigned flags, ucs_memory_type_t mem_type, int err_level)
{
mca_sshmem_ucx_segment_context_t *ctx;
int rc = OSHMEM_SUCCESS;
Expand All @@ -119,15 +112,19 @@ segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,

mem_map_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS |
UCP_MEM_MAP_PARAM_FIELD_LENGTH |
UCP_MEM_MAP_PARAM_FIELD_FLAGS;
UCP_MEM_MAP_PARAM_FIELD_FLAGS |
UCP_MEM_MAP_PARAM_FIELD_MEMORY_TYPE;

mem_map_params.address = address;
mem_map_params.length = size;
mem_map_params.flags = flags;
mem_map_params.address = address;
mem_map_params.length = size;
mem_map_params.flags = flags;
mem_map_params.memory_type = mem_type;

status = ucp_mem_map(spml->ucp_context, &mem_map_params, &mem_h);
if (UCS_OK != status) {
SSHMEM_ERROR("ucp_mem_map() failed: %s\n", ucs_status_string(status));
SSHMEM_VERBOSE(err_level, "ucp_mem_map(memory_type=%s) failed: %s\n",
ucs_memory_type_names[mem_type],
ucs_status_string(status));
rc = OSHMEM_ERROR;
goto out;
}
Expand Down Expand Up @@ -160,12 +157,7 @@ segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,
ds_buf->super.va_end = (void*)((uintptr_t)ds_buf->super.va_base + ds_buf->seg_size);
ds_buf->context = ctx;
ds_buf->type = MAP_SEGMENT_ALLOC_UCX;
ds_buf->alloc_hints = hint;
ctx->ucp_memh = mem_h;
ctx->dev_mem = dev_mem;
if (hint) {
ds_buf->allocator = &sshmem_ucx_allocator;
}

out:
OPAL_OUTPUT_VERBOSE(
Expand All @@ -180,81 +172,37 @@ segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,
return rc;
}

#if HAVE_UCX_DEVICE_MEM
static uct_ib_device_mem_h alloc_device_mem(mca_spml_ucx_t *spml, size_t size,
void **address_p)
{
uct_ib_device_mem_h dev_mem = NULL;
ucs_status_t status;
uct_md_h uct_md;
void *address;
size_t length;

uct_md = ucp_context_find_tl_md(spml->ucp_context, "mlx5");
if (uct_md == NULL) {
SSHMEM_VERBOSE(1, "ucp_context_find_tl_md() returned NULL\n");
return NULL;
}

/* If found a matching memory domain, allocate device memory on it */
length = size;
address = NULL;
status = uct_ib_md_alloc_device_mem(uct_md, &length, &address,
UCT_MD_MEM_ACCESS_ALL, "sshmem_seg",
&dev_mem);
if (status != UCS_OK) {
/* If could not allocate device memory - fallback to mmap (since some
* PEs in the job may succeed and while others failed */
SSHMEM_VERBOSE(1, "uct_ib_md_alloc_dm() failed: %s\n",
ucs_status_string(status));
return NULL;
}

SSHMEM_VERBOSE(3, "uct_ib_md_alloc_dm() returned address %p\n", address);
*address_p = address;
return dev_mem;
}
#endif

static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size, long hint)
{
mca_spml_ucx_t *spml = (mca_spml_ucx_t*)mca_spml.self;
unsigned flags;
unsigned flags = UCP_MEM_MAP_ALLOCATE;
int status;

#if HAVE_UCX_DEVICE_MEM
int ret = OSHMEM_ERROR;
if (hint & SHMEM_HINT_DEVICE_NIC_MEM) {
if (size > UINT_MAX) {
return OSHMEM_ERR_BAD_PARAM;
#if HAVE_DECL_UCS_MEMORY_TYPE_RDMA
status = segment_create_internal(ds_buf, NULL, size, flags,
UCS_MEMORY_TYPE_RDMA, 3);
if (status == OSHMEM_SUCCESS) {
ds_buf->alloc_hints = hint;
ds_buf->allocator = &sshmem_ucx_allocator;
return OSHMEM_SUCCESS;
}

void *dev_mem_address;
uct_ib_device_mem_h dev_mem = alloc_device_mem(spml, size,
&dev_mem_address);
if (dev_mem != NULL) {
ret = segment_create_internal(ds_buf, dev_mem_address, size, 0,
hint, dev_mem);
if (ret == OSHMEM_SUCCESS) {
return OSHMEM_SUCCESS;
} else if (dev_mem != NULL) {
uct_ib_md_release_device_mem(dev_mem);
/* fallback to regular allocation */
}
}
}
#else
SSHMEM_VERBOSE(3, "DEVICE_NIC_MEM hint ignored since UCX does not "
"support MEMORY_TYPE_RDMA");
#endif
return OSHMEM_ERR_NOT_IMPLEMENTED;
}

flags = UCP_MEM_MAP_ALLOCATE | (spml->heap_reg_nb ? UCP_MEM_MAP_NONBLOCK : 0);
if (hint) {
return segment_create_internal(ds_buf, NULL, size, flags, hint, NULL);
} else {
return segment_create_internal(ds_buf, mca_sshmem_base_start_address,
size, flags | UCP_MEM_MAP_FIXED, hint,
NULL);
flags |= UCP_MEM_MAP_FIXED;
if (spml->heap_reg_nb) {
flags |= UCP_MEM_MAP_NONBLOCK;
}
return segment_create_internal(ds_buf, mca_sshmem_base_start_address, size,
flags, UCS_MEMORY_TYPE_HOST, 0);
}

static void *
Expand Down Expand Up @@ -301,12 +249,6 @@ segment_unlink(map_segment_t *ds_buf)

ucp_mem_unmap(spml->ucp_context, ctx->ucp_memh);

#if HAVE_UCX_DEVICE_MEM
if (ctx->dev_mem) {
uct_ib_md_release_device_mem(ctx->dev_mem);
}
#endif

ds_buf->context = NULL;
free(ctx);

Expand Down

0 comments on commit 61ccb13

Please sign in to comment.