diff --git a/ompi/mca/coll/accelerator/Makefile.am b/ompi/mca/coll/accelerator/Makefile.am new file mode 100644 index 00000000000..eaf81137602 --- /dev/null +++ b/ompi/mca/coll/accelerator/Makefile.am @@ -0,0 +1,40 @@ +# +# Copyright (c) 2014 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2014 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +dist_ompidata_DATA = help-mpi-coll-accelerator.txt + +sources = coll_accelerator_module.c coll_accelerator_reduce.c coll_accelerator_allreduce.c \ + coll_accelerator_reduce_scatter_block.c coll_accelerator_component.c \ + coll_accelerator_scan.c coll_accelerator_exscan.c coll_accelerator.h + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_coll_accelerator_DSO +component_noinst = +component_install = mca_coll_accelerator.la +else +component_noinst = libmca_coll_accelerator.la +component_install = +endif + +mcacomponentdir = $(ompilibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_coll_accelerator_la_SOURCES = $(sources) +mca_coll_accelerator_la_LDFLAGS = -module -avoid-version +mca_coll_accelerator_la_LIBADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_coll_accelerator_la_SOURCES =$(sources) +libmca_coll_accelerator_la_LDFLAGS = -module -avoid-version + diff --git a/ompi/mca/coll/cuda/coll_cuda.h b/ompi/mca/coll/accelerator/coll_accelerator.h similarity index 69% rename from ompi/mca/coll/cuda/coll_cuda.h rename to ompi/mca/coll/accelerator/coll_accelerator.h index 58f9ec4ae5b..c840a3c2d27 100644 --- a/ompi/mca/coll/cuda/coll_cuda.h +++ b/ompi/mca/coll/accelerator/coll_accelerator.h @@ -3,6 +3,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -10,8 +11,8 @@ * $HEADER$ */ -#ifndef MCA_COLL_CUDA_EXPORT_H -#define MCA_COLL_CUDA_EXPORT_H +#ifndef MCA_COLL_ACCELERATOR_EXPORT_H +#define MCA_COLL_ACCELERATOR_EXPORT_H #include "ompi_config.h" @@ -31,43 +32,43 @@ BEGIN_C_DECLS /* API functions */ -int mca_coll_cuda_init_query(bool enable_progress_threads, +int mca_coll_accelerator_init_query(bool enable_progress_threads, bool enable_mpi_threads); mca_coll_base_module_t -*mca_coll_cuda_comm_query(struct ompi_communicator_t *comm, +*mca_coll_accelerator_comm_query(struct ompi_communicator_t *comm, int *priority); -int mca_coll_cuda_module_enable(mca_coll_base_module_t *module, +int mca_coll_accelerator_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm); int -mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count, +mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count, +int mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count, +int mca_coll_accelerator_exscan(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count, +int mca_coll_accelerator_scan(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); int -mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, +mca_coll_accelerator_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -83,7 +84,7 @@ mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, * @retval >0 The buffer belongs to a managed buffer in * device memory. */ -static inline int mca_coll_cuda_check_buf(void *addr) +static inline int mca_coll_accelerator_check_buf(void *addr) { uint64_t flags; int dev_id; @@ -94,13 +95,13 @@ static inline int mca_coll_cuda_check_buf(void *addr) } } -static inline void *mca_coll_cuda_memcpy(void *dest, const void *src, size_t size) +static inline void *mca_coll_accelerator_memcpy(void *dest, const void *src, size_t size) { int res; res = opal_accelerator.mem_copy(MCA_ACCELERATOR_NO_DEVICE_ID, MCA_ACCELERATOR_NO_DEVICE_ID, dest, src, size, MCA_ACCELERATOR_TRANSFER_UNSPEC); if (res != 0) { - opal_output(0, "CUDA: Error in cuMemcpy: res=%d, dest=%p, src=%p, size=%d", res, dest, src, + opal_output(0, "coll/accelerator: Error in mem_copy: res=%d, dest=%p, src=%p, size=%d", res, dest, src, (int) size); abort(); } else { @@ -111,28 +112,28 @@ static inline void *mca_coll_cuda_memcpy(void *dest, const void *src, size_t siz /* Types */ /* Module */ -typedef struct mca_coll_cuda_module_t { +typedef struct mca_coll_accelerator_module_t { mca_coll_base_module_t super; /* Pointers to all the "real" collective functions */ mca_coll_base_comm_coll_t c_coll; -} mca_coll_cuda_module_t; +} mca_coll_accelerator_module_t; -OBJ_CLASS_DECLARATION(mca_coll_cuda_module_t); +OBJ_CLASS_DECLARATION(mca_coll_accelerator_module_t); /* Component */ -typedef struct mca_coll_cuda_component_t { +typedef struct mca_coll_accelerator_component_t { mca_coll_base_component_2_4_0_t super; int priority; /* Priority of this component */ - int disable_cuda_coll; /* Force disable of the CUDA collective component */ -} mca_coll_cuda_component_t; + int disable_accelerator_coll; /* Force disable of the accelerator collective component */ +} mca_coll_accelerator_component_t; /* Globally exported variables */ -OMPI_DECLSPEC extern mca_coll_cuda_component_t mca_coll_cuda_component; +OMPI_DECLSPEC extern mca_coll_accelerator_component_t mca_coll_accelerator_component; END_C_DECLS -#endif /* MCA_COLL_CUDA_EXPORT_H */ +#endif /* MCA_COLL_ACCELERATOR_EXPORT_H */ diff --git a/ompi/mca/coll/cuda/coll_cuda_allreduce.c b/ompi/mca/coll/accelerator/coll_accelerator_allreduce.c similarity index 77% rename from ompi/mca/coll/cuda/coll_cuda_allreduce.c rename to ompi/mca/coll/accelerator/coll_accelerator_allreduce.c index d206793362f..5546e0abd01 100644 --- a/ompi/mca/coll/cuda/coll_cuda_allreduce.c +++ b/ompi/mca/coll/accelerator/coll_accelerator_allreduce.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2024 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -12,7 +13,7 @@ */ #include "ompi_config.h" -#include "coll_cuda.h" +#include "coll_accelerator.h" #include @@ -27,20 +28,20 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count, +mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; + mca_coll_accelerator_module_t *s = (mca_coll_accelerator_module_t*) module; ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; size_t bufsize; int rc; bufsize = opal_datatype_span(&dtype->super, count, &gap); - rc = mca_coll_cuda_check_buf((void *)sbuf); + rc = mca_coll_accelerator_check_buf((void *)sbuf); if (rc < 0) { return rc; } @@ -49,10 +50,10 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count, if (NULL == sbuf1) { return OMPI_ERR_OUT_OF_RESOURCE; } - mca_coll_cuda_memcpy(sbuf1, sbuf, bufsize); + mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize); sbuf = sbuf1 - gap; } - rc = mca_coll_cuda_check_buf(rbuf); + rc = mca_coll_accelerator_check_buf(rbuf); if (rc < 0) { return rc; } @@ -62,7 +63,7 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count, if (NULL != sbuf1) free(sbuf1); return OMPI_ERR_OUT_OF_RESOURCE; } - mca_coll_cuda_memcpy(rbuf1, rbuf, bufsize); + mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize); rbuf2 = rbuf; /* save away original buffer */ rbuf = rbuf1 - gap; } @@ -72,7 +73,7 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count, } if (NULL != rbuf1) { rbuf = rbuf2; - mca_coll_cuda_memcpy(rbuf, rbuf1, bufsize); + mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize); free(rbuf1); } return rc; diff --git a/ompi/mca/coll/cuda/coll_cuda_component.c b/ompi/mca/coll/accelerator/coll_accelerator_component.c similarity index 59% rename from ompi/mca/coll/cuda/coll_cuda_component.c rename to ompi/mca/coll/accelerator/coll_accelerator_component.c index 88f4ee80fcc..b74f0b09540 100644 --- a/ompi/mca/coll/cuda/coll_cuda_component.c +++ b/ompi/mca/coll/accelerator/coll_accelerator_component.c @@ -6,6 +6,7 @@ * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2024 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -19,25 +20,25 @@ #include "mpi.h" #include "ompi/constants.h" -#include "coll_cuda.h" +#include "coll_accelerator.h" /* - * Public string showing the coll ompi_cuda component version number + * Public string showing the coll ompi_accelerator component version number */ -const char *mca_coll_cuda_component_version_string = - "Open MPI cuda collective MCA component version " OMPI_VERSION; +const char *mca_coll_accelerator_component_version_string = + "Open MPI accelerator collective MCA component version " OMPI_VERSION; /* * Local function */ -static int cuda_register(void); +static int accelerator_register(void); /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -mca_coll_cuda_component_t mca_coll_cuda_component = { +mca_coll_accelerator_component_t mca_coll_accelerator_component = { { /* First, the mca_component_t struct containing meta information * about the component itself */ @@ -46,12 +47,12 @@ mca_coll_cuda_component_t mca_coll_cuda_component = { MCA_COLL_BASE_VERSION_2_4_0, /* Component name and version */ - .mca_component_name = "cuda", + .mca_component_name = "accelerator", MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, OMPI_RELEASE_VERSION), /* Component open and close functions */ - .mca_register_component_params = cuda_register, + .mca_register_component_params = accelerator_register, }, .collm_data = { /* The component is checkpoint ready */ @@ -60,32 +61,32 @@ mca_coll_cuda_component_t mca_coll_cuda_component = { /* Initialization / querying functions */ - .collm_init_query = mca_coll_cuda_init_query, - .collm_comm_query = mca_coll_cuda_comm_query, + .collm_init_query = mca_coll_accelerator_init_query, + .collm_comm_query = mca_coll_accelerator_comm_query, }, - /* cuda-specific component information */ + /* accelerator-specific component information */ /* Priority: make it above all point to point collectives including self */ .priority = 78, }; -static int cuda_register(void) +static int accelerator_register(void) { - (void) mca_base_component_var_register(&mca_coll_cuda_component.super.collm_version, - "priority", "Priority of the cuda coll component; only relevant if barrier_before or barrier_after is > 0", + (void) mca_base_component_var_register(&mca_coll_accelerator_component.super.collm_version, + "priority", "Priority of the accelerator coll component; only relevant if barrier_before or barrier_after is > 0", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_6, MCA_BASE_VAR_SCOPE_READONLY, - &mca_coll_cuda_component.priority); + &mca_coll_accelerator_component.priority); - (void) mca_base_component_var_register(&mca_coll_cuda_component.super.collm_version, - "disable_cuda_coll", "Automatically handle the CUDA buffers for the MPI collective.", + (void) mca_base_component_var_register(&mca_coll_accelerator_component.super.collm_version, + "disable_accelerator_coll", "Automatically handle the accelerator buffers for the MPI collective.", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_2, MCA_BASE_VAR_SCOPE_READONLY, - &mca_coll_cuda_component.disable_cuda_coll); + &mca_coll_accelerator_component.disable_accelerator_coll); return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/cuda/coll_cuda_exscan.c b/ompi/mca/coll/accelerator/coll_accelerator_exscan.c similarity index 76% rename from ompi/mca/coll/cuda/coll_cuda_exscan.c rename to ompi/mca/coll/accelerator/coll_accelerator_exscan.c index 48c3cc65038..f5c53a9a9ca 100644 --- a/ompi/mca/coll/cuda/coll_cuda_exscan.c +++ b/ompi/mca/coll/accelerator/coll_accelerator_exscan.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2024 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -12,27 +13,27 @@ */ #include "ompi_config.h" -#include "coll_cuda.h" +#include "coll_accelerator.h" #include #include "ompi/op/op.h" #include "opal/datatype/opal_convertor.h" -int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count, +int mca_coll_accelerator_exscan(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; + mca_coll_accelerator_module_t *s = (mca_coll_accelerator_module_t*) module; ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; size_t bufsize; int rc; bufsize = opal_datatype_span(&dtype->super, count, &gap); - rc = mca_coll_cuda_check_buf((void *)sbuf); + rc = mca_coll_accelerator_check_buf((void *)sbuf); if (rc < 0) { return rc; } @@ -42,10 +43,10 @@ int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count, if (NULL == sbuf1) { return OMPI_ERR_OUT_OF_RESOURCE; } - mca_coll_cuda_memcpy(sbuf1, sbuf, bufsize); + mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize); sbuf = sbuf1 - gap; } - rc = mca_coll_cuda_check_buf(rbuf); + rc = mca_coll_accelerator_check_buf(rbuf); if (rc < 0) { return rc; } @@ -55,7 +56,7 @@ int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count, if (NULL != sbuf1) free(sbuf1); return OMPI_ERR_OUT_OF_RESOURCE; } - mca_coll_cuda_memcpy(rbuf1, rbuf, bufsize); + mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize); rbuf2 = rbuf; /* save away original buffer */ rbuf = rbuf1 - gap; } @@ -67,7 +68,7 @@ int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count, } if (NULL != rbuf1) { rbuf = rbuf2; - mca_coll_cuda_memcpy(rbuf, rbuf1, bufsize); + mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize); free(rbuf1); } return rc; diff --git a/ompi/mca/coll/cuda/coll_cuda_module.c b/ompi/mca/coll/accelerator/coll_accelerator_module.c similarity index 60% rename from ompi/mca/coll/cuda/coll_cuda_module.c rename to ompi/mca/coll/accelerator/coll_accelerator_module.c index 04ca13f4d72..505d31c1c07 100644 --- a/ompi/mca/coll/cuda/coll_cuda_module.c +++ b/ompi/mca/coll/accelerator/coll_accelerator_module.c @@ -6,6 +6,7 @@ * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2024 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,7 +19,7 @@ #include #include -#include "coll_cuda.h" +#include "coll_accelerator.h" #include "mpi.h" @@ -29,15 +30,15 @@ #include "ompi/communicator/communicator.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/base.h" -#include "coll_cuda.h" +#include "coll_accelerator.h" -static void mca_coll_cuda_module_construct(mca_coll_cuda_module_t *module) +static void mca_coll_accelerator_module_construct(mca_coll_accelerator_module_t *module) { memset(&(module->c_coll), 0, sizeof(module->c_coll)); } -static void mca_coll_cuda_module_destruct(mca_coll_cuda_module_t *module) +static void mca_coll_accelerator_module_destruct(mca_coll_accelerator_module_t *module) { OBJ_RELEASE(module->c_coll.coll_allreduce_module); OBJ_RELEASE(module->c_coll.coll_reduce_module); @@ -52,9 +53,9 @@ static void mca_coll_cuda_module_destruct(mca_coll_cuda_module_t *module) } } -OBJ_CLASS_INSTANCE(mca_coll_cuda_module_t, mca_coll_base_module_t, - mca_coll_cuda_module_construct, - mca_coll_cuda_module_destruct); +OBJ_CLASS_INSTANCE(mca_coll_accelerator_module_t, mca_coll_base_module_t, + mca_coll_accelerator_module_construct, + mca_coll_accelerator_module_destruct); /* @@ -62,7 +63,7 @@ OBJ_CLASS_INSTANCE(mca_coll_cuda_module_t, mca_coll_base_module_t, * this component to disqualify itself if it doesn't support the * required level of thread support. */ -int mca_coll_cuda_init_query(bool enable_progress_threads, +int mca_coll_accelerator_init_query(bool enable_progress_threads, bool enable_mpi_threads) { /* Nothing to do */ @@ -77,59 +78,59 @@ int mca_coll_cuda_init_query(bool enable_progress_threads, * priority we want to return. */ mca_coll_base_module_t * -mca_coll_cuda_comm_query(struct ompi_communicator_t *comm, +mca_coll_accelerator_comm_query(struct ompi_communicator_t *comm, int *priority) { - mca_coll_cuda_module_t *cuda_module; + mca_coll_accelerator_module_t *accelerator_module; if (0 == strcmp(opal_accelerator_base_selected_component.base_version.mca_component_name, "null")) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:cuda:comm_query: accelerator component is null: disqualifying myself"); + "coll:accelerator:comm_query: accelerator component is null: disqualifying myself"); return NULL; } - cuda_module = OBJ_NEW(mca_coll_cuda_module_t); - if (NULL == cuda_module) { + accelerator_module = OBJ_NEW(mca_coll_accelerator_module_t); + if (NULL == accelerator_module) { return NULL; } - *priority = mca_coll_cuda_component.priority; + *priority = mca_coll_accelerator_component.priority; /* Choose whether to use [intra|inter] */ - cuda_module->super.coll_module_enable = mca_coll_cuda_module_enable; - - cuda_module->super.coll_allgather = NULL; - cuda_module->super.coll_allgatherv = NULL; - cuda_module->super.coll_allreduce = mca_coll_cuda_allreduce; - cuda_module->super.coll_alltoall = NULL; - cuda_module->super.coll_alltoallv = NULL; - cuda_module->super.coll_alltoallw = NULL; - cuda_module->super.coll_barrier = NULL; - cuda_module->super.coll_bcast = NULL; - cuda_module->super.coll_exscan = mca_coll_cuda_exscan; - cuda_module->super.coll_gather = NULL; - cuda_module->super.coll_gatherv = NULL; - cuda_module->super.coll_reduce = mca_coll_cuda_reduce; - cuda_module->super.coll_reduce_scatter = NULL; - cuda_module->super.coll_reduce_scatter_block = mca_coll_cuda_reduce_scatter_block; - cuda_module->super.coll_scan = mca_coll_cuda_scan; - cuda_module->super.coll_scatter = NULL; - cuda_module->super.coll_scatterv = NULL; - - return &(cuda_module->super); + accelerator_module->super.coll_module_enable = mca_coll_accelerator_module_enable; + + accelerator_module->super.coll_allgather = NULL; + accelerator_module->super.coll_allgatherv = NULL; + accelerator_module->super.coll_allreduce = mca_coll_accelerator_allreduce; + accelerator_module->super.coll_alltoall = NULL; + accelerator_module->super.coll_alltoallv = NULL; + accelerator_module->super.coll_alltoallw = NULL; + accelerator_module->super.coll_barrier = NULL; + accelerator_module->super.coll_bcast = NULL; + accelerator_module->super.coll_exscan = mca_coll_accelerator_exscan; + accelerator_module->super.coll_gather = NULL; + accelerator_module->super.coll_gatherv = NULL; + accelerator_module->super.coll_reduce = mca_coll_accelerator_reduce; + accelerator_module->super.coll_reduce_scatter = NULL; + accelerator_module->super.coll_reduce_scatter_block = mca_coll_accelerator_reduce_scatter_block; + accelerator_module->super.coll_scan = mca_coll_accelerator_scan; + accelerator_module->super.coll_scatter = NULL; + accelerator_module->super.coll_scatterv = NULL; + + return &(accelerator_module->super); } /* * Init module on the communicator */ -int mca_coll_cuda_module_enable(mca_coll_base_module_t *module, +int mca_coll_accelerator_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm) { bool good = true; char *msg = NULL; - mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; + mca_coll_accelerator_module_t *s = (mca_coll_accelerator_module_t*) module; #define CHECK_AND_RETAIN(src, dst, name) \ if (NULL == (src)->c_coll->coll_ ## name ## _module) { \ @@ -155,9 +156,9 @@ int mca_coll_cuda_module_enable(mca_coll_base_module_t *module, if (good) { return OMPI_SUCCESS; } - opal_show_help("help-mpi-coll-cuda.txt", "missing collective", true, + opal_show_help("help-mpi-coll-accelerator.txt", "missing collective", true, ompi_process_info.nodename, - mca_coll_cuda_component.priority, msg); + mca_coll_accelerator_component.priority, msg); return OMPI_ERR_NOT_FOUND; } diff --git a/ompi/mca/coll/cuda/coll_cuda_reduce.c b/ompi/mca/coll/accelerator/coll_accelerator_reduce.c similarity index 78% rename from ompi/mca/coll/cuda/coll_cuda_reduce.c rename to ompi/mca/coll/accelerator/coll_accelerator_reduce.c index d8a6cef1419..e1cd5bfba17 100644 --- a/ompi/mca/coll/cuda/coll_cuda_reduce.c +++ b/ompi/mca/coll/accelerator/coll_accelerator_reduce.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2024 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -12,7 +13,7 @@ */ #include "ompi_config.h" -#include "coll_cuda.h" +#include "coll_accelerator.h" #include @@ -27,13 +28,13 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count, +mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; + mca_coll_accelerator_module_t *s = (mca_coll_accelerator_module_t*) module; int rank = ompi_comm_rank(comm); ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; @@ -42,7 +43,7 @@ mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count, bufsize = opal_datatype_span(&dtype->super, count, &gap); - rc = mca_coll_cuda_check_buf((void *)sbuf); + rc = mca_coll_accelerator_check_buf((void *)sbuf); if (rc < 0) { return rc; } @@ -51,11 +52,11 @@ mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count, if (NULL == sbuf1) { return OMPI_ERR_OUT_OF_RESOURCE; } - mca_coll_cuda_memcpy(sbuf1, sbuf, bufsize); + mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize); sbuf = sbuf1 - gap; } - rc = mca_coll_cuda_check_buf(rbuf); + rc = mca_coll_accelerator_check_buf(rbuf); if (rc < 0) { return rc; } @@ -65,7 +66,7 @@ mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count, if (NULL != sbuf1) free(sbuf1); return OMPI_ERR_OUT_OF_RESOURCE; } - mca_coll_cuda_memcpy(rbuf1, rbuf, bufsize); + mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize); rbuf2 = rbuf; /* save away original buffer */ rbuf = rbuf1 - gap; } @@ -78,7 +79,7 @@ mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count, } if (NULL != rbuf1) { rbuf = rbuf2; - mca_coll_cuda_memcpy(rbuf, rbuf1, bufsize); + mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize); free(rbuf1); } return rc; diff --git a/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c b/ompi/mca/coll/accelerator/coll_accelerator_reduce_scatter_block.c similarity index 79% rename from ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c rename to ompi/mca/coll/accelerator/coll_accelerator_reduce_scatter_block.c index 100724873f0..affc258a5ab 100644 --- a/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c +++ b/ompi/mca/coll/accelerator/coll_accelerator_reduce_scatter_block.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2024 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -12,7 +13,7 @@ */ #include "ompi_config.h" -#include "coll_cuda.h" +#include "coll_accelerator.h" #include @@ -31,13 +32,13 @@ * up at some point) */ int -mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, +mca_coll_accelerator_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; + mca_coll_accelerator_module_t *s = (mca_coll_accelerator_module_t*) module; ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; size_t sbufsize, rbufsize; @@ -46,7 +47,7 @@ mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, rbufsize = opal_datatype_span(&dtype->super, rcount, &gap); sbufsize = rbufsize * ompi_comm_size(comm); - rc = mca_coll_cuda_check_buf((void *)sbuf); + rc = mca_coll_accelerator_check_buf((void *)sbuf); if (rc < 0) { return rc; } @@ -55,10 +56,10 @@ mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, if (NULL == sbuf1) { return OMPI_ERR_OUT_OF_RESOURCE; } - mca_coll_cuda_memcpy(sbuf1, sbuf, sbufsize); + mca_coll_accelerator_memcpy(sbuf1, sbuf, sbufsize); sbuf = sbuf1 - gap; } - rc = mca_coll_cuda_check_buf(rbuf); + rc = mca_coll_accelerator_check_buf(rbuf); if (rc < 0) { return rc; } @@ -68,7 +69,7 @@ mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, if (NULL != sbuf1) free(sbuf1); return OMPI_ERR_OUT_OF_RESOURCE; } - mca_coll_cuda_memcpy(rbuf1, rbuf, rbufsize); + mca_coll_accelerator_memcpy(rbuf1, rbuf, rbufsize); rbuf2 = rbuf; /* save away original buffer */ rbuf = rbuf1 - gap; } @@ -79,7 +80,7 @@ mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, } if (NULL != rbuf1) { rbuf = rbuf2; - mca_coll_cuda_memcpy(rbuf, rbuf1, rbufsize); + mca_coll_accelerator_memcpy(rbuf, rbuf1, rbufsize); free(rbuf1); } return rc; diff --git a/ompi/mca/coll/cuda/coll_cuda_scan.c b/ompi/mca/coll/accelerator/coll_accelerator_scan.c similarity index 77% rename from ompi/mca/coll/cuda/coll_cuda_scan.c rename to ompi/mca/coll/accelerator/coll_accelerator_scan.c index 08af90bb6c0..07129df867b 100644 --- a/ompi/mca/coll/cuda/coll_cuda_scan.c +++ b/ompi/mca/coll/accelerator/coll_accelerator_scan.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2024 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -12,7 +13,7 @@ */ #include "ompi_config.h" -#include "coll_cuda.h" +#include "coll_accelerator.h" #include @@ -26,20 +27,20 @@ * Accepts: - same arguments as MPI_Scan() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count, +int mca_coll_accelerator_scan(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; + mca_coll_accelerator_module_t *s = (mca_coll_accelerator_module_t*) module; ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; size_t bufsize; int rc; bufsize = opal_datatype_span(&dtype->super, count, &gap); - rc = mca_coll_cuda_check_buf((void *)sbuf); + rc = mca_coll_accelerator_check_buf((void *)sbuf); if (rc < 0) { return rc; } @@ -48,10 +49,10 @@ int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count, if (NULL == sbuf1) { return OMPI_ERR_OUT_OF_RESOURCE; } - mca_coll_cuda_memcpy(sbuf1, sbuf, bufsize); + mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize); sbuf = sbuf1 - gap; } - rc = mca_coll_cuda_check_buf(rbuf); + rc = mca_coll_accelerator_check_buf(rbuf); if (rc < 0) { return rc; } @@ -61,7 +62,7 @@ int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count, if (NULL != sbuf1) free(sbuf1); return OMPI_ERR_OUT_OF_RESOURCE; } - mca_coll_cuda_memcpy(rbuf1, rbuf, bufsize); + mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize); rbuf2 = rbuf; /* save away original buffer */ rbuf = rbuf1 - gap; } @@ -72,7 +73,7 @@ int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count, } if (NULL != rbuf1) { rbuf = rbuf2; - mca_coll_cuda_memcpy(rbuf, rbuf1, bufsize); + mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize); free(rbuf1); } return rc; diff --git a/ompi/mca/coll/cuda/help-mpi-coll-cuda.txt b/ompi/mca/coll/accelerator/help-mpi-coll-accelerator.txt similarity index 79% rename from ompi/mca/coll/cuda/help-mpi-coll-cuda.txt rename to ompi/mca/coll/accelerator/help-mpi-coll-accelerator.txt index 5537420ab4e..abc39d08e12 100644 --- a/ompi/mca/coll/cuda/help-mpi-coll-cuda.txt +++ b/ompi/mca/coll/accelerator/help-mpi-coll-accelerator.txt @@ -11,17 +11,18 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2014 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2024 Triad National Security, LLC. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # -# This is the US/English general help file for Open MPI's CUDA +# This is the US/English general help file for Open MPI's accelerator # collective reduction component. # [missing collective] -There was a problem while initializing support for the CUDA reduction operations. +There was a problem while initializing support for the accelerator reduction operations. hostname: %s priority: %d collective: %s diff --git a/ompi/mca/coll/cuda/owner.txt b/ompi/mca/coll/accelerator/owner.txt similarity index 100% rename from ompi/mca/coll/cuda/owner.txt rename to ompi/mca/coll/accelerator/owner.txt diff --git a/ompi/mca/coll/base/coll_base_frame.c b/ompi/mca/coll/base/coll_base_frame.c index 5bb6fe38ace..07b7f85cf92 100644 --- a/ompi/mca/coll/base/coll_base_frame.c +++ b/ompi/mca/coll/base/coll_base_frame.c @@ -15,6 +15,7 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2024 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,8 +31,7 @@ #include "ompi/mca/mca.h" #include "opal/util/output.h" #include "opal/mca/base/base.h" - - +#include "opal/mca/base/mca_base_alias.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/base.h" #include "ompi/mca/coll/base/coll_base_functions.h" @@ -130,5 +130,12 @@ ompi_request_t** ompi_coll_base_comm_get_reqs(mca_coll_base_comm_t* data, int nr return data->mcct_reqs; } -MCA_BASE_FRAMEWORK_DECLARE(ompi, coll, "Collectives", NULL, NULL, NULL, +static int mca_coll_base_register(mca_base_register_flag_t flags) +{ + (void) mca_base_alias_register("ompi", "coll", "accelerator", "cuda", MCA_BASE_ALIAS_FLAG_DEPRECATED); + return OMPI_SUCCESS; +} + + +MCA_BASE_FRAMEWORK_DECLARE(ompi, coll, "Collectives", mca_coll_base_register, NULL, NULL, mca_coll_base_static_components, 0); diff --git a/ompi/mca/coll/cuda/Makefile.am b/ompi/mca/coll/cuda/Makefile.am deleted file mode 100644 index 74a6ecfd947..00000000000 --- a/ompi/mca/coll/cuda/Makefile.am +++ /dev/null @@ -1,40 +0,0 @@ -# -# Copyright (c) 2014 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2014 NVIDIA Corporation. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -dist_ompidata_DATA = help-mpi-coll-cuda.txt - -sources = coll_cuda_module.c coll_cuda_reduce.c coll_cuda_allreduce.c \ - coll_cuda_reduce_scatter_block.c coll_cuda_component.c \ - coll_cuda_scan.c coll_cuda_exscan.c coll_cuda.h - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_ompi_coll_cuda_DSO -component_noinst = -component_install = mca_coll_cuda.la -else -component_noinst = libmca_coll_cuda.la -component_install = -endif - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_coll_cuda_la_SOURCES = $(sources) -mca_coll_cuda_la_LDFLAGS = -module -avoid-version -mca_coll_cuda_la_LIBADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_coll_cuda_la_SOURCES =$(sources) -libmca_coll_cuda_la_LDFLAGS = -module -avoid-version -