Skip to content

Commit 5fcb328

Browse files
authored
Merge branch 'main' into 17Dec2024_bug_fixes
2 parents e1a70f6 + 0bccfcd commit 5fcb328

25 files changed

+4005
-2468
lines changed

Diff for: docs/developers/sphinx.rst

+4-2
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,12 @@ permissions to run this command):
113113
# Or: python3 -m pip install install -r docs/requirements.txt
114114
115115
This will install Sphinx and some Python modules required for building
116-
the Open MPI documentation in a system-wide location.
116+
the Open MPI documentation in a user-specific location, likely
117+
somewhere under ``$HOME``
117118

118119
You will likely need to find the location where ``sphinx-build`` was
119-
installed and add it to your ``PATH``.
120+
installed and add it to your ``PATH`` (e.g., on macOS, it might appear
121+
under ``$HOME/Library/Python/PYTHON_VERSION/bin/sphinx-build``).
120122

121123
.. note:: On MacOS, look for ``sphinx-build`` under
122124
``$HOME/Library/Python/VERSION/bin`` (where ``VERSION`` is

Diff for: docs/index.rst

+8
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,11 @@ Table of contents
8282
history
8383
man-openmpi/index
8484
man-openshmem/index
85+
86+
Contributors
87+
============
88+
89+
A gigantic "thank you!" to all of our contributors:
90+
91+
.. image:: https://contrib.rocks/image?repo=open-mpi/ompi&max=999
92+
:target: https://github.com/open-mpi/ompi/graphs/contributors

Diff for: examples/Makefile.include

+2-1
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,5 @@ EXTRA_DIST += \
5656
examples/oshmem_symmetric_data.c \
5757
examples/Hello.java \
5858
examples/Ring.java \
59-
examples/spc_example.c
59+
examples/spc_example.c \
60+
examples/hello_sessions_c.c

Diff for: ompi/dpm/dpm.c

-16
Original file line numberDiff line numberDiff line change
@@ -100,18 +100,6 @@ int ompi_dpm_init(void)
100100
return OMPI_SUCCESS;
101101
}
102102

103-
static int compare_pmix_proc(const void *a, const void *b)
104-
{
105-
const pmix_proc_t *proc_a = (pmix_proc_t *)a;
106-
const pmix_proc_t *proc_b = (pmix_proc_t *)b;
107-
108-
int nspace_dif = strncmp(proc_a->nspace, proc_b->nspace, PMIX_MAX_NSLEN);
109-
if (nspace_dif != 0)
110-
return nspace_dif;
111-
112-
return proc_a->rank - proc_b->rank;
113-
}
114-
115103
int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
116104
const char *port_string, bool send_first,
117105
ompi_communicator_t **newcomm)
@@ -395,10 +383,6 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
395383
PMIX_INFO_CONSTRUCT(&tinfo);
396384
PMIX_INFO_LOAD(&tinfo, PMIX_TIMEOUT, &ompi_pmix_connect_timeout, PMIX_UINT32);
397385

398-
/*
399-
* sort procs so that all ranks call PMIx_Connect() with the processes in same order
400-
*/
401-
qsort(procs, nprocs, sizeof(pmix_proc_t), compare_pmix_proc);
402386
pret = PMIx_Connect(procs, nprocs, &tinfo, 1);
403387
PMIX_INFO_DESTRUCT(&tinfo);
404388
PMIX_PROC_FREE(procs, nprocs);

Diff for: ompi/mca/coll/accelerator/coll_accelerator.h

+9-6
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* reserved.
66
* Copyright (c) 2014-2024 NVIDIA Corporation. All rights reserved.
77
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
8+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights reserved.
89
* $COPYRIGHT$
910
*
1011
* Additional copyrights may follow
@@ -87,22 +88,24 @@ mca_coll_accelerator_reduce_scatter_block(const void *sbuf, void *rbuf, size_t r
8788
* @retval >0 The buffer belongs to a managed buffer in
8889
* device memory.
8990
*/
90-
static inline int mca_coll_accelerator_check_buf(void *addr)
91+
static inline int mca_coll_accelerator_check_buf(void *addr, int *dev_id)
9192
{
9293
uint64_t flags;
93-
int dev_id;
94+
9495
if (OPAL_LIKELY(NULL != addr)) {
95-
return opal_accelerator.check_addr(addr, &dev_id, &flags);
96+
return opal_accelerator.check_addr(addr, dev_id, &flags);
9697
} else {
98+
*dev_id = MCA_ACCELERATOR_NO_DEVICE_ID;
9799
return 0;
98100
}
99101
}
100102

101-
static inline void *mca_coll_accelerator_memcpy(void *dest, const void *src, size_t size)
103+
static inline void *mca_coll_accelerator_memcpy(void *dest, int dest_dev, const void *src, int src_dev, size_t size,
104+
opal_accelerator_transfer_type_t type)
102105
{
103106
int res;
104-
res = opal_accelerator.mem_copy(MCA_ACCELERATOR_NO_DEVICE_ID, MCA_ACCELERATOR_NO_DEVICE_ID,
105-
dest, src, size, MCA_ACCELERATOR_TRANSFER_UNSPEC);
107+
108+
res = opal_accelerator.mem_copy(dest_dev, src_dev, dest, src, size, type);
106109
if (res != 0) {
107110
opal_output(0, "coll/accelerator: Error in mem_copy: res=%d, dest=%p, src=%p, size=%d", res, dest, src,
108111
(int) size);

Diff for: ompi/mca/coll/accelerator/coll_accelerator_allreduce.c

+10-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved.
66
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
77
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
8+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights reserved.
89
* $COPYRIGHT$
910
*
1011
* Additional copyrights may follow
@@ -37,11 +38,12 @@ mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, size_t count,
3738
mca_coll_accelerator_module_t *s = (mca_coll_accelerator_module_t*) module;
3839
ptrdiff_t gap;
3940
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
41+
int sbuf_dev, rbuf_dev;
4042
size_t bufsize;
4143
int rc;
4244

4345
bufsize = opal_datatype_span(&dtype->super, count, &gap);
44-
rc = mca_coll_accelerator_check_buf((void *)sbuf);
46+
rc = mca_coll_accelerator_check_buf((void *)sbuf, &sbuf_dev);
4547
if (rc < 0) {
4648
return rc;
4749
}
@@ -50,10 +52,11 @@ mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, size_t count,
5052
if (NULL == sbuf1) {
5153
return OMPI_ERR_OUT_OF_RESOURCE;
5254
}
53-
mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize);
55+
mca_coll_accelerator_memcpy(sbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, sbuf, sbuf_dev,
56+
bufsize, MCA_ACCELERATOR_TRANSFER_DTOH);
5457
sbuf = sbuf1 - gap;
5558
}
56-
rc = mca_coll_accelerator_check_buf(rbuf);
59+
rc = mca_coll_accelerator_check_buf(rbuf, &rbuf_dev);
5760
if (rc < 0) {
5861
return rc;
5962
}
@@ -63,7 +66,8 @@ mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, size_t count,
6366
if (NULL != sbuf1) free(sbuf1);
6467
return OMPI_ERR_OUT_OF_RESOURCE;
6568
}
66-
mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize);
69+
mca_coll_accelerator_memcpy(rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, rbuf, rbuf_dev,
70+
bufsize, MCA_ACCELERATOR_TRANSFER_DTOH);
6771
rbuf2 = rbuf; /* save away original buffer */
6872
rbuf = rbuf1 - gap;
6973
}
@@ -73,7 +77,8 @@ mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, size_t count,
7377
}
7478
if (NULL != rbuf1) {
7579
rbuf = rbuf2;
76-
mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize);
80+
mca_coll_accelerator_memcpy(rbuf, rbuf_dev, rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, bufsize,
81+
MCA_ACCELERATOR_TRANSFER_HTOD);
7782
free(rbuf1);
7883
}
7984
return rc;

Diff for: ompi/mca/coll/accelerator/coll_accelerator_exscan.c

+10-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved.
66
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
77
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
8+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights reserved.
89
* $COPYRIGHT$
910
*
1011
* Additional copyrights may follow
@@ -29,11 +30,12 @@ int mca_coll_accelerator_exscan(const void *sbuf, void *rbuf, size_t count,
2930
mca_coll_accelerator_module_t *s = (mca_coll_accelerator_module_t*) module;
3031
ptrdiff_t gap;
3132
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
33+
int sbuf_dev, rbuf_dev;
3234
size_t bufsize;
3335
int rc;
3436

3537
bufsize = opal_datatype_span(&dtype->super, count, &gap);
36-
rc = mca_coll_accelerator_check_buf((void *)sbuf);
38+
rc = mca_coll_accelerator_check_buf((void *)sbuf, &sbuf_dev);
3739
if (rc < 0) {
3840
return rc;
3941
}
@@ -43,10 +45,11 @@ int mca_coll_accelerator_exscan(const void *sbuf, void *rbuf, size_t count,
4345
if (NULL == sbuf1) {
4446
return OMPI_ERR_OUT_OF_RESOURCE;
4547
}
46-
mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize);
48+
mca_coll_accelerator_memcpy(sbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, sbuf, sbuf_dev, bufsize,
49+
MCA_ACCELERATOR_TRANSFER_DTOH);
4750
sbuf = sbuf1 - gap;
4851
}
49-
rc = mca_coll_accelerator_check_buf(rbuf);
52+
rc = mca_coll_accelerator_check_buf(rbuf, &rbuf_dev);
5053
if (rc < 0) {
5154
return rc;
5255
}
@@ -56,7 +59,8 @@ int mca_coll_accelerator_exscan(const void *sbuf, void *rbuf, size_t count,
5659
if (NULL != sbuf1) free(sbuf1);
5760
return OMPI_ERR_OUT_OF_RESOURCE;
5861
}
59-
mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize);
62+
mca_coll_accelerator_memcpy(rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, rbuf, rbuf_dev, bufsize,
63+
MCA_ACCELERATOR_TRANSFER_DTOH);
6064
rbuf2 = rbuf; /* save away original buffer */
6165
rbuf = rbuf1 - gap;
6266
}
@@ -68,7 +72,8 @@ int mca_coll_accelerator_exscan(const void *sbuf, void *rbuf, size_t count,
6872
}
6973
if (NULL != rbuf1) {
7074
rbuf = rbuf2;
71-
mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize);
75+
mca_coll_accelerator_memcpy(rbuf, rbuf_dev, rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, bufsize,
76+
MCA_ACCELERATOR_TRANSFER_HTOD);
7277
free(rbuf1);
7378
}
7479
return rc;

Diff for: ompi/mca/coll/accelerator/coll_accelerator_reduce.c

+19-10
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
* Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved.
77
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
88
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
9+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights reserved.
910
* $COPYRIGHT$
1011
*
1112
* Additional copyrights may follow
@@ -39,12 +40,13 @@ mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, size_t count,
3940
int rank = ompi_comm_rank(comm);
4041
ptrdiff_t gap;
4142
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
43+
int rbuf_dev, sbuf_dev;
4244
size_t bufsize;
4345
int rc;
4446

4547
bufsize = opal_datatype_span(&dtype->super, count, &gap);
4648

47-
rc = mca_coll_accelerator_check_buf((void *)sbuf);
49+
rc = mca_coll_accelerator_check_buf((void *)sbuf, &sbuf_dev);
4850
if (rc < 0) {
4951
return rc;
5052
}
@@ -53,11 +55,12 @@ mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, size_t count,
5355
if (NULL == sbuf1) {
5456
return OMPI_ERR_OUT_OF_RESOURCE;
5557
}
56-
mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize);
58+
mca_coll_accelerator_memcpy(sbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, sbuf, sbuf_dev, bufsize,
59+
MCA_ACCELERATOR_TRANSFER_DTOH);
5760
sbuf = sbuf1 - gap;
5861
}
5962

60-
rc = mca_coll_accelerator_check_buf(rbuf);
63+
rc = mca_coll_accelerator_check_buf(rbuf, &rbuf_dev);
6164
if (rc < 0) {
6265
return rc;
6366
}
@@ -67,7 +70,8 @@ mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, size_t count,
6770
if (NULL != sbuf1) free(sbuf1);
6871
return OMPI_ERR_OUT_OF_RESOURCE;
6972
}
70-
mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize);
73+
mca_coll_accelerator_memcpy(rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, rbuf, rbuf_dev, bufsize,
74+
MCA_ACCELERATOR_TRANSFER_DTOH);
7175
rbuf2 = rbuf; /* save away original buffer */
7276
rbuf = rbuf1 - gap;
7377
}
@@ -80,7 +84,8 @@ mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, size_t count,
8084
}
8185
if (NULL != rbuf1) {
8286
rbuf = rbuf2;
83-
mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize);
87+
mca_coll_accelerator_memcpy(rbuf, rbuf_dev, rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, bufsize,
88+
MCA_ACCELERATOR_TRANSFER_HTOD);
8489
free(rbuf1);
8590
}
8691
return rc;
@@ -94,12 +99,13 @@ mca_coll_accelerator_reduce_local(const void *sbuf, void *rbuf, size_t count,
9499
{
95100
ptrdiff_t gap;
96101
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
102+
int sbuf_dev, rbuf_dev;
97103
size_t bufsize;
98104
int rc;
99105

100106
bufsize = opal_datatype_span(&dtype->super, count, &gap);
101107

102-
rc = mca_coll_accelerator_check_buf((void *)sbuf);
108+
rc = mca_coll_accelerator_check_buf((void *)sbuf, &sbuf_dev);
103109
if (rc < 0) {
104110
return rc;
105111
}
@@ -109,11 +115,12 @@ mca_coll_accelerator_reduce_local(const void *sbuf, void *rbuf, size_t count,
109115
if (NULL == sbuf1) {
110116
return OMPI_ERR_OUT_OF_RESOURCE;
111117
}
112-
mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize);
118+
mca_coll_accelerator_memcpy(sbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, sbuf, sbuf_dev, bufsize,
119+
MCA_ACCELERATOR_TRANSFER_DTOH);
113120
sbuf = sbuf1 - gap;
114121
}
115122

116-
rc = mca_coll_accelerator_check_buf(rbuf);
123+
rc = mca_coll_accelerator_check_buf(rbuf, &rbuf_dev);
117124
if (rc < 0) {
118125
return rc;
119126
}
@@ -124,7 +131,8 @@ mca_coll_accelerator_reduce_local(const void *sbuf, void *rbuf, size_t count,
124131
if (NULL != sbuf1) free(sbuf1);
125132
return OMPI_ERR_OUT_OF_RESOURCE;
126133
}
127-
mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize);
134+
mca_coll_accelerator_memcpy(rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, rbuf, rbuf_dev, bufsize,
135+
MCA_ACCELERATOR_TRANSFER_DTOH);
128136
rbuf2 = rbuf; /* save away original buffer */
129137
rbuf = rbuf1 - gap;
130138
}
@@ -137,7 +145,8 @@ mca_coll_accelerator_reduce_local(const void *sbuf, void *rbuf, size_t count,
137145
}
138146
if (NULL != rbuf1) {
139147
rbuf = rbuf2;
140-
mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize);
148+
mca_coll_accelerator_memcpy(rbuf, rbuf_dev, rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, bufsize,
149+
MCA_ACCELERATOR_TRANSFER_HTOD);
141150
free(rbuf1);
142151
}
143152
return rc;

Diff for: ompi/mca/coll/accelerator/coll_accelerator_reduce_scatter_block.c

+10-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved.
66
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
77
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
8+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights reserved.
89
* $COPYRIGHT$
910
*
1011
* Additional copyrights may follow
@@ -41,13 +42,14 @@ mca_coll_accelerator_reduce_scatter_block(const void *sbuf, void *rbuf, size_t r
4142
mca_coll_accelerator_module_t *s = (mca_coll_accelerator_module_t*) module;
4243
ptrdiff_t gap;
4344
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
45+
int sbuf_dev, rbuf_dev;
4446
size_t sbufsize, rbufsize;
4547
int rc;
4648

4749
rbufsize = opal_datatype_span(&dtype->super, rcount, &gap);
4850

4951
sbufsize = rbufsize * ompi_comm_size(comm);
50-
rc = mca_coll_accelerator_check_buf((void *)sbuf);
52+
rc = mca_coll_accelerator_check_buf((void *)sbuf, &sbuf_dev);
5153
if (rc < 0) {
5254
return rc;
5355
}
@@ -56,10 +58,11 @@ mca_coll_accelerator_reduce_scatter_block(const void *sbuf, void *rbuf, size_t r
5658
if (NULL == sbuf1) {
5759
return OMPI_ERR_OUT_OF_RESOURCE;
5860
}
59-
mca_coll_accelerator_memcpy(sbuf1, sbuf, sbufsize);
61+
mca_coll_accelerator_memcpy(sbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, sbuf, sbuf_dev, sbufsize,
62+
MCA_ACCELERATOR_TRANSFER_DTOH);
6063
sbuf = sbuf1 - gap;
6164
}
62-
rc = mca_coll_accelerator_check_buf(rbuf);
65+
rc = mca_coll_accelerator_check_buf(rbuf, &rbuf_dev);
6366
if (rc < 0) {
6467
return rc;
6568
}
@@ -69,7 +72,8 @@ mca_coll_accelerator_reduce_scatter_block(const void *sbuf, void *rbuf, size_t r
6972
if (NULL != sbuf1) free(sbuf1);
7073
return OMPI_ERR_OUT_OF_RESOURCE;
7174
}
72-
mca_coll_accelerator_memcpy(rbuf1, rbuf, rbufsize);
75+
mca_coll_accelerator_memcpy(rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, rbuf, rbuf_dev, rbufsize,
76+
MCA_ACCELERATOR_TRANSFER_DTOH);
7377
rbuf2 = rbuf; /* save away original buffer */
7478
rbuf = rbuf1 - gap;
7579
}
@@ -80,7 +84,8 @@ mca_coll_accelerator_reduce_scatter_block(const void *sbuf, void *rbuf, size_t r
8084
}
8185
if (NULL != rbuf1) {
8286
rbuf = rbuf2;
83-
mca_coll_accelerator_memcpy(rbuf, rbuf1, rbufsize);
87+
mca_coll_accelerator_memcpy(rbuf, rbuf_dev, rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, rbufsize,
88+
MCA_ACCELERATOR_TRANSFER_HTOD);
8489
free(rbuf1);
8590
}
8691
return rc;

0 commit comments

Comments
 (0)