Skip to content

Commit

Permalink
Merge branch 'open-mpi:main' into allreduce
Browse files Browse the repository at this point in the history
  • Loading branch information
jiaxiyan authored Jan 4, 2024
2 parents e937de8 + ec01df5 commit ab8bcff
Show file tree
Hide file tree
Showing 15 changed files with 181 additions and 165 deletions.
2 changes: 1 addition & 1 deletion 3rd-party/openpmix
2 changes: 1 addition & 1 deletion 3rd-party/prrte
Submodule prrte updated 1 files
+1 −2 src/util/session_dir.c
40 changes: 40 additions & 0 deletions ompi/mca/coll/accelerator/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#
# Copyright (c) 2014 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_ompidata_DATA = help-mpi-coll-accelerator.txt

sources = coll_accelerator_module.c coll_accelerator_reduce.c coll_accelerator_allreduce.c \
coll_accelerator_reduce_scatter_block.c coll_accelerator_component.c \
coll_accelerator_scan.c coll_accelerator_exscan.c coll_accelerator.h

# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).

if MCA_BUILD_ompi_coll_accelerator_DSO
component_noinst =
component_install = mca_coll_accelerator.la
else
component_noinst = libmca_coll_accelerator.la
component_install =
endif

mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_coll_accelerator_la_SOURCES = $(sources)
mca_coll_accelerator_la_LDFLAGS = -module -avoid-version
mca_coll_accelerator_la_LIBADD = $(top_builddir)/ompi/lib@[email protected]

noinst_LTLIBRARIES = $(component_noinst)
libmca_coll_accelerator_la_SOURCES =$(sources)
libmca_coll_accelerator_la_LDFLAGS = -module -avoid-version

Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/

#ifndef MCA_COLL_CUDA_EXPORT_H
#define MCA_COLL_CUDA_EXPORT_H
#ifndef MCA_COLL_ACCELERATOR_EXPORT_H
#define MCA_COLL_ACCELERATOR_EXPORT_H

#include "ompi_config.h"

Expand All @@ -31,43 +32,43 @@ BEGIN_C_DECLS

/* API functions */

int mca_coll_cuda_init_query(bool enable_progress_threads,
int mca_coll_accelerator_init_query(bool enable_progress_threads,
bool enable_mpi_threads);
mca_coll_base_module_t
*mca_coll_cuda_comm_query(struct ompi_communicator_t *comm,
*mca_coll_accelerator_comm_query(struct ompi_communicator_t *comm,
int *priority);

int mca_coll_cuda_module_enable(mca_coll_base_module_t *module,
int mca_coll_accelerator_module_enable(mca_coll_base_module_t *module,
struct ompi_communicator_t *comm);

int
mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count,
mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);

int mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count,
int mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);

int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count,
int mca_coll_accelerator_exscan(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);

int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count,
int mca_coll_accelerator_scan(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);

int
mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount,
mca_coll_accelerator_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
Expand All @@ -83,7 +84,7 @@ mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount,
* @retval >0 The buffer belongs to a managed buffer in
* device memory.
*/
static inline int mca_coll_cuda_check_buf(void *addr)
static inline int mca_coll_accelerator_check_buf(void *addr)
{
uint64_t flags;
int dev_id;
Expand All @@ -94,13 +95,13 @@ static inline int mca_coll_cuda_check_buf(void *addr)
}
}

static inline void *mca_coll_cuda_memcpy(void *dest, const void *src, size_t size)
static inline void *mca_coll_accelerator_memcpy(void *dest, const void *src, size_t size)
{
int res;
res = opal_accelerator.mem_copy(MCA_ACCELERATOR_NO_DEVICE_ID, MCA_ACCELERATOR_NO_DEVICE_ID,
dest, src, size, MCA_ACCELERATOR_TRANSFER_UNSPEC);
if (res != 0) {
opal_output(0, "CUDA: Error in cuMemcpy: res=%d, dest=%p, src=%p, size=%d", res, dest, src,
opal_output(0, "coll/accelerator: Error in mem_copy: res=%d, dest=%p, src=%p, size=%d", res, dest, src,
(int) size);
abort();
} else {
Expand All @@ -111,28 +112,28 @@ static inline void *mca_coll_cuda_memcpy(void *dest, const void *src, size_t siz
/* Types */
/* Module */

typedef struct mca_coll_cuda_module_t {
typedef struct mca_coll_accelerator_module_t {
mca_coll_base_module_t super;

/* Pointers to all the "real" collective functions */
mca_coll_base_comm_coll_t c_coll;
} mca_coll_cuda_module_t;
} mca_coll_accelerator_module_t;

OBJ_CLASS_DECLARATION(mca_coll_cuda_module_t);
OBJ_CLASS_DECLARATION(mca_coll_accelerator_module_t);

/* Component */

typedef struct mca_coll_cuda_component_t {
typedef struct mca_coll_accelerator_component_t {
mca_coll_base_component_2_4_0_t super;

int priority; /* Priority of this component */
int disable_cuda_coll; /* Force disable of the CUDA collective component */
} mca_coll_cuda_component_t;
int disable_accelerator_coll; /* Force disable of the accelerator collective component */
} mca_coll_accelerator_component_t;

/* Globally exported variables */

OMPI_DECLSPEC extern mca_coll_cuda_component_t mca_coll_cuda_component;
OMPI_DECLSPEC extern mca_coll_accelerator_component_t mca_coll_accelerator_component;

END_C_DECLS

#endif /* MCA_COLL_CUDA_EXPORT_H */
#endif /* MCA_COLL_ACCELERATOR_EXPORT_H */
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* reserved.
* Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -12,7 +13,7 @@
*/

#include "ompi_config.h"
#include "coll_cuda.h"
#include "coll_accelerator.h"

#include <stdio.h>

Expand All @@ -27,20 +28,20 @@
* Returns: - MPI_SUCCESS or error code
*/
int
mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count,
mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module;
mca_coll_accelerator_module_t *s = (mca_coll_accelerator_module_t*) module;
ptrdiff_t gap;
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
size_t bufsize;
int rc;

bufsize = opal_datatype_span(&dtype->super, count, &gap);
rc = mca_coll_cuda_check_buf((void *)sbuf);
rc = mca_coll_accelerator_check_buf((void *)sbuf);
if (rc < 0) {
return rc;
}
Expand All @@ -49,10 +50,10 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count,
if (NULL == sbuf1) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_coll_cuda_memcpy(sbuf1, sbuf, bufsize);
mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize);
sbuf = sbuf1 - gap;
}
rc = mca_coll_cuda_check_buf(rbuf);
rc = mca_coll_accelerator_check_buf(rbuf);
if (rc < 0) {
return rc;
}
Expand All @@ -62,7 +63,7 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count,
if (NULL != sbuf1) free(sbuf1);
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_coll_cuda_memcpy(rbuf1, rbuf, bufsize);
mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize);
rbuf2 = rbuf; /* save away original buffer */
rbuf = rbuf1 - gap;
}
Expand All @@ -72,7 +73,7 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count,
}
if (NULL != rbuf1) {
rbuf = rbuf2;
mca_coll_cuda_memcpy(rbuf, rbuf1, bufsize);
mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize);
free(rbuf1);
}
return rc;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -19,25 +20,25 @@

#include "mpi.h"
#include "ompi/constants.h"
#include "coll_cuda.h"
#include "coll_accelerator.h"

/*
* Public string showing the coll ompi_cuda component version number
* Public string showing the coll ompi_accelerator component version number
*/
const char *mca_coll_cuda_component_version_string =
"Open MPI cuda collective MCA component version " OMPI_VERSION;
const char *mca_coll_accelerator_component_version_string =
"Open MPI accelerator collective MCA component version " OMPI_VERSION;

/*
* Local function
*/
static int cuda_register(void);
static int accelerator_register(void);

/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/

mca_coll_cuda_component_t mca_coll_cuda_component = {
mca_coll_accelerator_component_t mca_coll_accelerator_component = {
{
/* First, the mca_component_t struct containing meta information
* about the component itself */
Expand All @@ -46,12 +47,12 @@ mca_coll_cuda_component_t mca_coll_cuda_component = {
MCA_COLL_BASE_VERSION_2_4_0,

/* Component name and version */
.mca_component_name = "cuda",
.mca_component_name = "accelerator",
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION),

/* Component open and close functions */
.mca_register_component_params = cuda_register,
.mca_register_component_params = accelerator_register,
},
.collm_data = {
/* The component is checkpoint ready */
Expand All @@ -60,32 +61,32 @@ mca_coll_cuda_component_t mca_coll_cuda_component = {

/* Initialization / querying functions */

.collm_init_query = mca_coll_cuda_init_query,
.collm_comm_query = mca_coll_cuda_comm_query,
.collm_init_query = mca_coll_accelerator_init_query,
.collm_comm_query = mca_coll_accelerator_comm_query,
},

/* cuda-specific component information */
/* accelerator-specific component information */

/* Priority: make it above all point to point collectives including self */
.priority = 78,
};


static int cuda_register(void)
static int accelerator_register(void)
{
(void) mca_base_component_var_register(&mca_coll_cuda_component.super.collm_version,
"priority", "Priority of the cuda coll component; only relevant if barrier_before or barrier_after is > 0",
(void) mca_base_component_var_register(&mca_coll_accelerator_component.super.collm_version,
"priority", "Priority of the accelerator coll component; only relevant if barrier_before or barrier_after is > 0",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_cuda_component.priority);
&mca_coll_accelerator_component.priority);

(void) mca_base_component_var_register(&mca_coll_cuda_component.super.collm_version,
"disable_cuda_coll", "Automatically handle the CUDA buffers for the MPI collective.",
(void) mca_base_component_var_register(&mca_coll_accelerator_component.super.collm_version,
"disable_accelerator_coll", "Automatically handle the accelerator buffers for the MPI collective.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_2,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_cuda_component.disable_cuda_coll);
&mca_coll_accelerator_component.disable_accelerator_coll);

return OMPI_SUCCESS;
}
Loading

0 comments on commit ab8bcff

Please sign in to comment.