Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

coll/cuda: rename to accelerator component #12212

Merged
merged 1 commit into from
Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions ompi/mca/coll/accelerator/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#
# Copyright (c) 2014 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_ompidata_DATA = help-mpi-coll-accelerator.txt

sources = coll_accelerator_module.c coll_accelerator_reduce.c coll_accelerator_allreduce.c \
coll_accelerator_reduce_scatter_block.c coll_accelerator_component.c \
coll_accelerator_scan.c coll_accelerator_exscan.c coll_accelerator.h

# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).

if MCA_BUILD_ompi_coll_accelerator_DSO
component_noinst =
component_install = mca_coll_accelerator.la
else
component_noinst = libmca_coll_accelerator.la
component_install =
endif

mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_coll_accelerator_la_SOURCES = $(sources)
mca_coll_accelerator_la_LDFLAGS = -module -avoid-version
mca_coll_accelerator_la_LIBADD = $(top_builddir)/ompi/lib@[email protected]

noinst_LTLIBRARIES = $(component_noinst)
libmca_coll_accelerator_la_SOURCES =$(sources)
libmca_coll_accelerator_la_LDFLAGS = -module -avoid-version

Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/

#ifndef MCA_COLL_CUDA_EXPORT_H
#define MCA_COLL_CUDA_EXPORT_H
#ifndef MCA_COLL_ACCELERATOR_EXPORT_H
#define MCA_COLL_ACCELERATOR_EXPORT_H

#include "ompi_config.h"

Expand All @@ -31,43 +32,43 @@ BEGIN_C_DECLS

/* API functions */

int mca_coll_cuda_init_query(bool enable_progress_threads,
int mca_coll_accelerator_init_query(bool enable_progress_threads,
bool enable_mpi_threads);
mca_coll_base_module_t
*mca_coll_cuda_comm_query(struct ompi_communicator_t *comm,
*mca_coll_accelerator_comm_query(struct ompi_communicator_t *comm,
int *priority);

int mca_coll_cuda_module_enable(mca_coll_base_module_t *module,
int mca_coll_accelerator_module_enable(mca_coll_base_module_t *module,
struct ompi_communicator_t *comm);

int
mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count,
mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);

int mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count,
int mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);

int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count,
int mca_coll_accelerator_exscan(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);

int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count,
int mca_coll_accelerator_scan(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);

int
mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount,
mca_coll_accelerator_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
Expand All @@ -83,7 +84,7 @@ mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount,
* @retval >0 The buffer belongs to a managed buffer in
* device memory.
*/
static inline int mca_coll_cuda_check_buf(void *addr)
static inline int mca_coll_accelerator_check_buf(void *addr)
{
uint64_t flags;
int dev_id;
Expand All @@ -94,13 +95,13 @@ static inline int mca_coll_cuda_check_buf(void *addr)
}
}

static inline void *mca_coll_cuda_memcpy(void *dest, const void *src, size_t size)
static inline void *mca_coll_accelerator_memcpy(void *dest, const void *src, size_t size)
{
int res;
res = opal_accelerator.mem_copy(MCA_ACCELERATOR_NO_DEVICE_ID, MCA_ACCELERATOR_NO_DEVICE_ID,
dest, src, size, MCA_ACCELERATOR_TRANSFER_UNSPEC);
if (res != 0) {
opal_output(0, "CUDA: Error in cuMemcpy: res=%d, dest=%p, src=%p, size=%d", res, dest, src,
opal_output(0, "coll/accelerator: Error in mem_copy: res=%d, dest=%p, src=%p, size=%d", res, dest, src,
(int) size);
abort();
} else {
Expand All @@ -111,28 +112,28 @@ static inline void *mca_coll_cuda_memcpy(void *dest, const void *src, size_t siz
/* Types */
/* Module */

typedef struct mca_coll_cuda_module_t {
typedef struct mca_coll_accelerator_module_t {
mca_coll_base_module_t super;

/* Pointers to all the "real" collective functions */
mca_coll_base_comm_coll_t c_coll;
} mca_coll_cuda_module_t;
} mca_coll_accelerator_module_t;

OBJ_CLASS_DECLARATION(mca_coll_cuda_module_t);
OBJ_CLASS_DECLARATION(mca_coll_accelerator_module_t);

/* Component */

typedef struct mca_coll_cuda_component_t {
typedef struct mca_coll_accelerator_component_t {
mca_coll_base_component_2_4_0_t super;

int priority; /* Priority of this component */
int disable_cuda_coll; /* Force disable of the CUDA collective component */
} mca_coll_cuda_component_t;
int disable_accelerator_coll; /* Force disable of the accelerator collective component */
} mca_coll_accelerator_component_t;

/* Globally exported variables */

OMPI_DECLSPEC extern mca_coll_cuda_component_t mca_coll_cuda_component;
OMPI_DECLSPEC extern mca_coll_accelerator_component_t mca_coll_accelerator_component;

END_C_DECLS

#endif /* MCA_COLL_CUDA_EXPORT_H */
#endif /* MCA_COLL_ACCELERATOR_EXPORT_H */
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* reserved.
* Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -12,7 +13,7 @@
*/

#include "ompi_config.h"
#include "coll_cuda.h"
#include "coll_accelerator.h"

#include <stdio.h>

Expand All @@ -27,20 +28,20 @@
* Returns: - MPI_SUCCESS or error code
*/
int
mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count,
mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module;
mca_coll_accelerator_module_t *s = (mca_coll_accelerator_module_t*) module;
ptrdiff_t gap;
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
size_t bufsize;
int rc;

bufsize = opal_datatype_span(&dtype->super, count, &gap);
rc = mca_coll_cuda_check_buf((void *)sbuf);
rc = mca_coll_accelerator_check_buf((void *)sbuf);
if (rc < 0) {
return rc;
}
Expand All @@ -49,10 +50,10 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count,
if (NULL == sbuf1) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_coll_cuda_memcpy(sbuf1, sbuf, bufsize);
mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize);
sbuf = sbuf1 - gap;
}
rc = mca_coll_cuda_check_buf(rbuf);
rc = mca_coll_accelerator_check_buf(rbuf);
if (rc < 0) {
return rc;
}
Expand All @@ -62,7 +63,7 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count,
if (NULL != sbuf1) free(sbuf1);
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_coll_cuda_memcpy(rbuf1, rbuf, bufsize);
mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize);
rbuf2 = rbuf; /* save away original buffer */
rbuf = rbuf1 - gap;
}
Expand All @@ -72,7 +73,7 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count,
}
if (NULL != rbuf1) {
rbuf = rbuf2;
mca_coll_cuda_memcpy(rbuf, rbuf1, bufsize);
mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize);
free(rbuf1);
}
return rc;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -19,25 +20,25 @@

#include "mpi.h"
#include "ompi/constants.h"
#include "coll_cuda.h"
#include "coll_accelerator.h"

/*
* Public string showing the coll ompi_cuda component version number
* Public string showing the coll ompi_accelerator component version number
*/
const char *mca_coll_cuda_component_version_string =
"Open MPI cuda collective MCA component version " OMPI_VERSION;
const char *mca_coll_accelerator_component_version_string =
"Open MPI accelerator collective MCA component version " OMPI_VERSION;

/*
* Local function
*/
static int cuda_register(void);
static int accelerator_register(void);

/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/

mca_coll_cuda_component_t mca_coll_cuda_component = {
mca_coll_accelerator_component_t mca_coll_accelerator_component = {
{
/* First, the mca_component_t struct containing meta information
* about the component itself */
Expand All @@ -46,12 +47,12 @@ mca_coll_cuda_component_t mca_coll_cuda_component = {
MCA_COLL_BASE_VERSION_2_4_0,

/* Component name and version */
.mca_component_name = "cuda",
.mca_component_name = "accelerator",
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION),

/* Component open and close functions */
.mca_register_component_params = cuda_register,
.mca_register_component_params = accelerator_register,
},
.collm_data = {
/* The component is checkpoint ready */
Expand All @@ -60,32 +61,32 @@ mca_coll_cuda_component_t mca_coll_cuda_component = {

/* Initialization / querying functions */

.collm_init_query = mca_coll_cuda_init_query,
.collm_comm_query = mca_coll_cuda_comm_query,
.collm_init_query = mca_coll_accelerator_init_query,
.collm_comm_query = mca_coll_accelerator_comm_query,
},

/* cuda-specific component information */
/* accelerator-specific component information */

/* Priority: make it above all point to point collectives including self */
.priority = 78,
};


static int cuda_register(void)
static int accelerator_register(void)
{
(void) mca_base_component_var_register(&mca_coll_cuda_component.super.collm_version,
"priority", "Priority of the cuda coll component; only relevant if barrier_before or barrier_after is > 0",
(void) mca_base_component_var_register(&mca_coll_accelerator_component.super.collm_version,
"priority", "Priority of the accelerator coll component; only relevant if barrier_before or barrier_after is > 0",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_cuda_component.priority);
&mca_coll_accelerator_component.priority);

(void) mca_base_component_var_register(&mca_coll_cuda_component.super.collm_version,
"disable_cuda_coll", "Automatically handle the CUDA buffers for the MPI collective.",
(void) mca_base_component_var_register(&mca_coll_accelerator_component.super.collm_version,
"disable_accelerator_coll", "Automatically handle the accelerator buffers for the MPI collective.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_2,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_cuda_component.disable_cuda_coll);
&mca_coll_accelerator_component.disable_accelerator_coll);

return OMPI_SUCCESS;
}
Loading