Skip to content

Commit a5ee049

Browse files
committed
MPICH: Configure --with-device=ch4:ofi,ucx
1 parent e0e0b7c commit a5ee049

File tree

3 files changed

+141
-1
lines changed

3 files changed

+141
-1
lines changed

Diff for: cibw-build-mpi.sh

+6-1
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,16 @@ PREFIX=${PREFIX:-"/opt/$mpiname"}
1313

1414
if test "$mpiname" = "mpich"; then
1515
version=$(sed -n 's/MPICH_VERSION=\(.*\)/\1/p' "$SOURCE"/maint/Version)
16+
case $(uname)-$(uname -m) in
17+
Linux-x86_64) netmod=ofi,ucx ;;
18+
Linux-aarch64) netmod=ofi,ucx ;;
19+
*) netmod=ofi ;;
20+
esac
1621
options=(
1722
CC=cc
1823
CXX=c++
1924
--prefix="$PREFIX"
20-
--with-device=ch4:"${variant:-ofi}"
25+
--with-device="ch4:$netmod"
2126
--with-pm=hydra:gforker
2227
--with-libfabric=embedded
2328
--with-ucx=embedded

Diff for: cibw-check-mpi.sh

+19
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,22 @@ RUN mpicxx helloworld.cxx -o helloworld-cxx
6868
RUN command -v mpiexec
6969
RUN mpiexec -n 3 ./helloworld-c
7070
RUN mpiexec -n 3 ./helloworld-cxx
71+
72+
if test "$mpiname" = "mpich"; then
73+
case $(uname)-$(uname -m) in
74+
Linux-x86_64) ch4netmods=(ofi ucx) ;;
75+
Linux-aarch64) ch4netmods=(ofi ucx) ;;
76+
*) ch4netmods=(ofi) ;;
77+
esac
78+
export MPICH_CH4_OFI_CAPABILITY_DEBUG=1
79+
export MPICH_CH4_UCX_CAPABILITY_DEBUG=1
80+
for netmod in "${ch4netmods[@]}"; do
81+
printf "testing ch4:%s ... " "$netmod"
82+
export MPICH_CH4_NETMOD="$netmod"
83+
./helloworld-c | grep -i "$netmod" > /dev/null
84+
printf "OK\n"
85+
done
86+
unset MPICH_CH4_OFI_CAPABILITY_DEBUG
87+
unset MPICH_CH4_UCX_CAPABILITY_DEBUG
88+
unset MPICH_CH4_NETMOD
89+
fi

Diff for: patches/mpich-4.2.3

+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
From 135459663554992589868b9f4e75d040e0cf4942 Mon Sep 17 00:00:00 2001
2+
From: Ken Raffenetti <[email protected]>
3+
Date: Wed, 23 Oct 2024 09:58:33 -0500
4+
Subject: [PATCH] ch4/ucx: Reorder function definitions
5+
6+
When building a non-inline netmod configuration, the compiler will flag
7+
MPIDI_UCX_do_am_recv for implicit declaration. Move its definition
8+
before its used to resolve. Fixes pmodels/mpich#7185.
9+
---
10+
src/mpid/ch4/netmod/ucx/ucx_am.c | 86 ++++++++++++++++----------------
11+
1 file changed, 43 insertions(+), 43 deletions(-)
12+
13+
diff --git a/src/mpid/ch4/netmod/ucx/ucx_am.c b/src/mpid/ch4/netmod/ucx/ucx_am.c
14+
index 082d35f9cc8..05f849eac6d 100644
15+
--- a/src/mpid/ch4/netmod/ucx/ucx_am.c
16+
+++ b/src/mpid/ch4/netmod/ucx/ucx_am.c
17+
@@ -70,6 +70,49 @@ void MPIDI_UCX_am_send_callback(void *request, ucs_status_t status)
18+
}
19+
20+
#ifdef HAVE_UCP_AM_NBX
21+
+/* Called when recv buffer is posted */
22+
+int MPIDI_UCX_do_am_recv(MPIR_Request * rreq)
23+
+{
24+
+ void *recv_buf;
25+
+ bool is_contig;
26+
+ MPI_Aint data_sz, in_data_sz;
27+
+ int vci = MPIDI_Request_get_vci(rreq);
28+
+
29+
+ MPIDIG_get_recv_buffer(&recv_buf, &data_sz, &is_contig, &in_data_sz, rreq);
30+
+ if (!is_contig || in_data_sz > data_sz) {
31+
+ /* non-contig datatype, need receive into pack buffer */
32+
+ /* ucx will error out if buffer size is less than the promised data size,
33+
+ * also use a pack buffer in this case */
34+
+ recv_buf = MPL_malloc(in_data_sz, MPL_MEM_OTHER);
35+
+ MPIR_Assert(recv_buf);
36+
+ MPIDI_UCX_AM_RECV_REQUEST(rreq, pack_buffer) = recv_buf;
37+
+ } else {
38+
+ MPIDI_UCX_AM_RECV_REQUEST(rreq, pack_buffer) = NULL;
39+
+ }
40+
+
41+
+ MPIDI_UCX_ucp_request_t *ucp_request;
42+
+ size_t received_length;
43+
+ ucp_request_param_t param = {
44+
+ .op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK | UCP_OP_ATTR_FIELD_RECV_INFO,
45+
+ .cb.recv_am = &MPIDI_UCX_am_recv_callback_nbx,
46+
+ .recv_info.length = &received_length,
47+
+ };
48+
+ void *data_desc = MPIDI_UCX_AM_RECV_REQUEST(rreq, data_desc);
49+
+ /* note: use in_data_sz to match promised data size */
50+
+ ucp_request = ucp_am_recv_data_nbx(MPIDI_UCX_global.ctx[vci].worker,
51+
+ data_desc, recv_buf, in_data_sz, &param);
52+
+ if (ucp_request == NULL) {
53+
+ /* completed immediately */
54+
+ MPIDI_UCX_ucp_request_t tmp_ucp_request;
55+
+ tmp_ucp_request.req = rreq;
56+
+ MPIDI_UCX_am_recv_callback_nbx(&tmp_ucp_request, UCS_OK, received_length, NULL);
57+
+ } else {
58+
+ ucp_request->req = rreq;
59+
+ }
60+
+
61+
+ return MPI_SUCCESS;
62+
+}
63+
+
64+
/* Am handler for messages sent from ucp_am_send_nbx. Registered with
65+
* ucp_worker_set_am_recv_handler.
66+
*/
67+
@@ -116,49 +159,6 @@ ucs_status_t MPIDI_UCX_am_nbx_handler(void *arg, const void *header, size_t head
68+
}
69+
}
70+
71+
-/* Called when recv buffer is posted */
72+
-int MPIDI_UCX_do_am_recv(MPIR_Request * rreq)
73+
-{
74+
- void *recv_buf;
75+
- bool is_contig;
76+
- MPI_Aint data_sz, in_data_sz;
77+
- int vci = MPIDI_Request_get_vci(rreq);
78+
-
79+
- MPIDIG_get_recv_buffer(&recv_buf, &data_sz, &is_contig, &in_data_sz, rreq);
80+
- if (!is_contig || in_data_sz > data_sz) {
81+
- /* non-contig datatype, need receive into pack buffer */
82+
- /* ucx will error out if buffer size is less than the promised data size,
83+
- * also use a pack buffer in this case */
84+
- recv_buf = MPL_malloc(in_data_sz, MPL_MEM_OTHER);
85+
- MPIR_Assert(recv_buf);
86+
- MPIDI_UCX_AM_RECV_REQUEST(rreq, pack_buffer) = recv_buf;
87+
- } else {
88+
- MPIDI_UCX_AM_RECV_REQUEST(rreq, pack_buffer) = NULL;
89+
- }
90+
-
91+
- MPIDI_UCX_ucp_request_t *ucp_request;
92+
- size_t received_length;
93+
- ucp_request_param_t param = {
94+
- .op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK | UCP_OP_ATTR_FIELD_RECV_INFO,
95+
- .cb.recv_am = &MPIDI_UCX_am_recv_callback_nbx,
96+
- .recv_info.length = &received_length,
97+
- };
98+
- void *data_desc = MPIDI_UCX_AM_RECV_REQUEST(rreq, data_desc);
99+
- /* note: use in_data_sz to match promised data size */
100+
- ucp_request = ucp_am_recv_data_nbx(MPIDI_UCX_global.ctx[vci].worker,
101+
- data_desc, recv_buf, in_data_sz, &param);
102+
- if (ucp_request == NULL) {
103+
- /* completed immediately */
104+
- MPIDI_UCX_ucp_request_t tmp_ucp_request;
105+
- tmp_ucp_request.req = rreq;
106+
- MPIDI_UCX_am_recv_callback_nbx(&tmp_ucp_request, UCS_OK, received_length, NULL);
107+
- } else {
108+
- ucp_request->req = rreq;
109+
- }
110+
-
111+
- return MPI_SUCCESS;
112+
-}
113+
-
114+
/* callback for ucp_am_recv_data_nbx */
115+
void MPIDI_UCX_am_recv_callback_nbx(void *request, ucs_status_t status, size_t length,
116+
void *user_data)

0 commit comments

Comments
 (0)