diff --git a/.github/workflows/ompi_mpi4py.yaml b/.github/workflows/ompi_mpi4py.yaml new file mode 100644 index 00000000000..45913bf79c4 --- /dev/null +++ b/.github/workflows/ompi_mpi4py.yaml @@ -0,0 +1,112 @@ +name: GitHub Action CI + +on: [pull_request] + +jobs: + mpi4py: + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - name: Configure hostname + run: echo 127.0.0.1 `hostname` | sudo tee -a /etc/hosts > /dev/null + if: ${{ runner.os == 'Linux' || runner.os == 'macOS' }} + + - name: Install depencencies + run: sudo apt-get install -y -q + libnuma-dev + + - name: Checkout Open MPI + uses: actions/checkout@v4 + with: + path: mpi-build + submodules: recursive + + - name: Bootstrap Open MPI + run: ./autogen.pl + working-directory: mpi-build + + - name: Configure Open MPI + run: ./configure + --disable-dependency-tracking + --disable-sphinx + --disable-man-pages + --disable-mpi-fortran + --disable-oshmem + --enable-debug + --enable-mem-debug + LDFLAGS=-Wl,-rpath,/usr/local/lib + working-directory: mpi-build + + - name: Build MPI + run: make -j 2 + working-directory: mpi-build + + - name: Install MPI + run: sudo make install + working-directory: mpi-build + + - name: Tweak MPI + run: | + # Tweak MPI + mca_params="$HOME/.openmpi/mca-params.conf" + mkdir -p "$(dirname "$mca_params")" + echo mpi_param_check = true >> "$mca_params" + echo mpi_show_handle_leaks = true >> "$mca_params" + echo rmaps_base_oversubscribe = true >> "$mca_params" + mca_params="$HOME/.prte/mca-params.conf" + mkdir -p "$(dirname "$mca_params")" + echo rmaps_default_mapping_policy = :oversubscribe >> "$mca_params" + + - name: Show MPI + run: ompi_info + + - name: Show MPICC + run: mpicc -show + + - name: Use Python + uses: actions/setup-python@v5 + with: + python-version: 3 + architecture: x64 + + - name: Install Python packages (build) + run: python -m pip install --upgrade + setuptools pip wheel + + - name: Install Python packages (test) + run: python -m pip install --upgrade + numpy cffi pyyaml + + - name: Checkout mpi4py + uses: actions/checkout@v4 + with: + repository: "mpi4py/mpi4py" + + - name: Install mpi4py + run: python -m pip install . + env: + CFLAGS: "-O0" + + - name: Test mpi4py (singleton) + run: python test/main.py -v + if: ${{ true }} + - name: Test mpi4py (np=1) + run: mpiexec -n 1 python test/main.py -v + - name: Test mpi4py (np=2) + run: mpiexec -n 2 python test/main.py -v -f + - name: Test mpi4py (np=3) + run: mpiexec -n 3 python test/main.py -v -f + if: ${{ true }} + timeout-minutes: 20 + - name: Test mpi4py (np=4) + run: mpiexec -n 4 python test/main.py -v -f + if: ${{ true }} + timeout-minutes: 20 + - name: Test mpi4py (np=5) + run: mpiexec -n 5 python test/main.py -v -f + if: ${{ true }} + timeout-minutes: 20 + - name: Test mpi4py.run + run: python demo/test-run/test_run.py -v + if: ${{ true }} + timeout-minutes: 20 diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index 3b66d416cea..84e89a65fe8 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -1618,7 +1618,7 @@ int ompi_comm_create_from_group (ompi_group_t *group, const char *tag, opal_info /* * setup predefined keyvals - see MPI Standard for predefined keyvals cached on - * communicators created via MPI_Comm_from_group or MPI_Intercomm_create_from_groups + * communicators created via MPI_Comm_create_from_group or MPI_Intercomm_create_from_groups */ ompi_attr_hash_init(&newcomp->c_keyhash); ompi_attr_set_int(COMM_ATTR, diff --git a/ompi/communicator/comm_cid.c b/ompi/communicator/comm_cid.c index b5e7838d151..be022b8f470 100644 --- a/ompi/communicator/comm_cid.c +++ b/ompi/communicator/comm_cid.c @@ -367,7 +367,7 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu opal_show_help("help-comm.txt", "MPI function not supported", true, - "MPI_Comm_from_group/MPI_Intercomm_from_groups", + "MPI_Comm_create_from_group/MPI_Intercomm_create_from_groups", msg_string); ret = MPI_ERR_UNSUPPORTED_OPERATION; break; @@ -490,7 +490,7 @@ int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_communicator_t *com /* old CID algorighm */ - /* if we got here and comm is NULL then that means the app is invoking MPI-4 Sessions or later + /* if we got here and comm is NULL then that means the app is invoking MPI-4 Sessions or later functions but the pml does not support these functions so return not supported */ if (NULL == comm) { char msg_string[1024]; @@ -499,7 +499,7 @@ int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_communicator_t *com opal_show_help("help-comm.txt", "MPI function not supported", true, - "MPI_Comm_from_group/MPI_Intercomm_from_groups", + "MPI_Comm_create_from_group/MPI_Intercomm_create_from_groups", msg_string); return MPI_ERR_UNSUPPORTED_OPERATION; diff --git a/opal/mca/accelerator/rocm/accelerator_rocm_component.c b/opal/mca/accelerator/rocm/accelerator_rocm_component.c index 317de021565..8f1bbbb53a5 100644 --- a/opal/mca/accelerator/rocm/accelerator_rocm_component.c +++ b/opal/mca/accelerator/rocm/accelerator_rocm_component.c @@ -6,7 +6,7 @@ * reserved. * Copyright (c) 2017-2022 Amazon.com, Inc. or its affiliates. * All Rights reserved. - * Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights reserved. + * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -17,13 +17,12 @@ #include "opal_config.h" #include -#include #include "opal/mca/dl/base/base.h" #include "opal/runtime/opal_params.h" #include "accelerator_rocm.h" -int opal_accelerator_rocm_memcpy_async = 1; +int opal_accelerator_rocm_memcpy_async = 0; int opal_accelerator_rocm_verbose = 0; size_t opal_accelerator_rocm_memcpyD2H_limit=1024; size_t opal_accelerator_rocm_memcpyH2D_limit=1048576; @@ -149,9 +148,9 @@ static int accelerator_rocm_component_register(void) &opal_accelerator_rocm_memcpyH2D_limit); /* Use this flag to test async vs sync copies */ - opal_accelerator_rocm_memcpy_async = 1; + opal_accelerator_rocm_memcpy_async = 0; (void) mca_base_var_register("ompi", "mpi", "accelerator_rocm", "memcpy_async", - "Set to 0 to force using hipMemcpy instead of hipMemcpyAsync", + "Set to 1 to force using hipMemcpyAsync instead of hipMemcpy", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &opal_accelerator_rocm_memcpy_async);