From 70fd241710283b9d21bda275d9d424691d98bdbb Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Fri, 29 Sep 2023 10:50:04 -0400 Subject: [PATCH 1/2] Global particle redistribution based on the grid --- grid/src/CMakeLists.txt | 1 + grid/src/Cabana_Grid.hpp | 1 + grid/src/Cabana_Grid_GlobalParticleComm.hpp | 322 ++++++++++++++++++++ grid/unit_test/CMakeLists.txt | 1 + grid/unit_test/tstGlobalParticleComm.hpp | 242 +++++++++++++++ 5 files changed, 567 insertions(+) create mode 100644 grid/src/Cabana_Grid_GlobalParticleComm.hpp create mode 100644 grid/unit_test/tstGlobalParticleComm.hpp diff --git a/grid/src/CMakeLists.txt b/grid/src/CMakeLists.txt index 975be8735..53bba4259 100644 --- a/grid/src/CMakeLists.txt +++ b/grid/src/CMakeLists.txt @@ -19,6 +19,7 @@ set(HEADERS_PUBLIC Cabana_Grid_GlobalGrid.hpp Cabana_Grid_GlobalGrid_impl.hpp Cabana_Grid_GlobalMesh.hpp + Cabana_Grid_GlobalParticleComm.hpp Cabana_Grid_Halo.hpp Cabana_Grid_IndexConversion.hpp Cabana_Grid_IndexSpace.hpp diff --git a/grid/src/Cabana_Grid.hpp b/grid/src/Cabana_Grid.hpp index e9b498948..5b67499e7 100644 --- a/grid/src/Cabana_Grid.hpp +++ b/grid/src/Cabana_Grid.hpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/grid/src/Cabana_Grid_GlobalParticleComm.hpp b/grid/src/Cabana_Grid_GlobalParticleComm.hpp new file mode 100644 index 000000000..171d9d975 --- /dev/null +++ b/grid/src/Cabana_Grid_GlobalParticleComm.hpp @@ -0,0 +1,322 @@ +/**************************************************************************** + * Copyright (c) 2018-2023 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +/*! + \file Cabana_Grid_GlobalParticleComm.hpp + \brief Global particle communication. +*/ +#ifndef CABANA_GRID_GLOBALPARTICLECOMM_HPP +#define CABANA_GRID_GLOBALPARTICLECOMM_HPP + +#include +#include + +#include +#include + +#include +#include +#include + +namespace Cabana +{ +namespace Grid +{ +//---------------------------------------------------------------------------// +/*! + \brief Global particle communication based on the background grid. +*/ +template +class GlobalParticleComm +{ + public: + //! Spatial dimension. + static constexpr std::size_t num_space_dim = LocalGridType::num_space_dim; + //! Mesh type. + using mesh_type = typename LocalGridType::mesh_type; + //! Global grid. + using global_grid_type = Cabana::Grid::GlobalGrid; + //! Kokkos memory space. + using memory_space = MemorySpace; + + //! Local boundary View type. + using corner_view_type = + Kokkos::View; + //! Particle destination ranks View type. + using destination_view_type = Kokkos::View; + //! Cartesian rank View type. + using rank_view_type = + Kokkos::View; + //! Cartesian rank View type (host). + using host_rank_view_type = + Kokkos::View; + + //! \brief Constructor. + GlobalParticleComm( const LocalGridType local_grid ) + { + auto global_grid = local_grid.globalGrid(); + _destinations = destination_view_type( + Kokkos::ViewAllocateWithoutInitializing( "global_destination" ), + 0 ); + + int max_ranks_per_dim = -1; + for ( std::size_t d = 0; d < num_space_dim; ++d ) + { + _ranks_per_dim[d] = global_grid.dimNumBlock( d ); + if ( _ranks_per_dim[d] > max_ranks_per_dim ) + max_ranks_per_dim = _ranks_per_dim[d]; + } + copyRanks( global_grid ); + + // Purposely using zero-init. Some entries unused in non-cubic + // decompositions. + _local_corners = + corner_view_type( "local_mpi_boundaries", max_ranks_per_dim ); + + _rank_1d = global_grid.blockId(); + for ( std::size_t d = 0; d < num_space_dim; ++d ) + _rank[d] = global_grid.dimBlockId( d ); + + auto local_mesh = createLocalMesh( local_grid ); + storeRanks( local_mesh ); + + // Update local boundaries from all ranks. + auto comm = global_grid.comm(); + // TODO: Could use subcommunicators instead. + MPI_Allreduce( MPI_IN_PLACE, _local_corners.data(), + _local_corners.size(), MPI_DOUBLE, MPI_SUM, comm ); + + scaleRanks(); + } + + //! Store local rank boundaries from the local mesh. + template + void storeRanks( LocalMeshType local_mesh ) + { + auto local_corners = _local_corners; + auto rank = _rank; + auto store_corners = KOKKOS_LAMBDA( const std::size_t d ) + { + local_corners( rank[d], d, 0 ) = + local_mesh.lowCorner( Cabana::Grid::Own(), d ); + local_corners( rank[d], d, 1 ) = + local_mesh.highCorner( Cabana::Grid::Own(), d ); + }; + using exec_space = typename memory_space::execution_space; + Kokkos::RangePolicy policy( 0, num_space_dim ); + Kokkos::parallel_for( "Cabana::Grid::GlobalParticleComm::storeCorners", + policy, store_corners ); + Kokkos::fence(); + } + + //! Scale local rank boundaries based on double counting from MPI reduction. + void scaleRanks() + { + auto scale = getScaling(); + + auto local_corners = _local_corners; + auto ranks_per_dim = _ranks_per_dim; + auto scale_corners = KOKKOS_LAMBDA( const std::size_t d ) + { + for ( int r = 0; r < ranks_per_dim[d]; ++r ) + { + local_corners( r, d, 0 ) /= scale[d]; + local_corners( r, d, 1 ) /= scale[d]; + } + }; + using exec_space = typename memory_space::execution_space; + Kokkos::RangePolicy policy( 0, num_space_dim ); + Kokkos::parallel_for( "Cabana::Grid::GlobalParticleComm::scaleCorners", + policy, scale_corners ); + Kokkos::fence(); + } + + //! Scaling factors due to double counting from MPI reduction. + template + std::enable_if_t<3 == NSD, Kokkos::Array> getScaling() + { + Kokkos::Array scale; + scale[0] = _ranks_per_dim[1] * _ranks_per_dim[2]; + scale[1] = _ranks_per_dim[0] * _ranks_per_dim[2]; + scale[2] = _ranks_per_dim[0] * _ranks_per_dim[1]; + return scale; + } + + //! Scaling factors due to double counting from MPI reduction. + template + std::enable_if_t<2 == NSD, Kokkos::Array> getScaling() + { + Kokkos::Array scale; + scale[0] = _ranks_per_dim[1]; + scale[1] = _ranks_per_dim[0]; + return scale; + } + + //! Store all cartesian MPI ranks. + template + std::enable_if_t<3 == NSD, void> copyRanks( GlobalGridType global_grid ) + { + host_rank_view_type global_ranks_host( + Kokkos::ViewAllocateWithoutInitializing( "ranks_host" ), + _ranks_per_dim[0], _ranks_per_dim[1], _ranks_per_dim[2] ); + for ( int i = 0; i < _ranks_per_dim[0]; ++i ) + for ( int j = 0; j < _ranks_per_dim[1]; ++j ) + for ( int k = 0; k < _ranks_per_dim[2]; ++k ) + // Not device accessible (uses MPI), so must be copied. + global_ranks_host( i, j, k ) = + global_grid.blockRank( i, j, k ); + + _global_ranks = Kokkos::create_mirror_view_and_copy( + memory_space(), global_ranks_host ); + } + + //! Store all cartesian MPI ranks. + template + std::enable_if_t<2 == NSD, void> copyRanks( GlobalGridType global_grid ) + { + // Storing as 3d for convenience. + host_rank_view_type global_ranks_host( + Kokkos::ViewAllocateWithoutInitializing( "ranks_host" ), + _ranks_per_dim[0], _ranks_per_dim[1], 1 ); + for ( int i = 0; i < _ranks_per_dim[0]; ++i ) + for ( int j = 0; j < _ranks_per_dim[1]; ++j ) + // Not device accessible (uses MPI), so must be copied. + global_ranks_host( i, j, 0 ) = global_grid.blockRank( i, j ); + + _global_ranks = Kokkos::create_mirror_view_and_copy( + memory_space(), global_ranks_host ); + } + + /*! + \brief Bin particles across the global grid. + + Because of MPI partitioning, this is not a perfect grid (as the Core + LinkedCellList is), so we use binary search instead of direct 3d->1d + indexing. + */ + template + void build( ExecutionSpace exec_space, PositionType positions, + const std::size_t begin, const std::size_t end ) + { + Kokkos::Profiling::pushRegion( + "Cabana::Grid::GlobalParticleComm::build" ); + + static_assert( is_accessible_from{}, "" ); + assert( end >= begin ); + assert( end <= positions.size() ); + + // Must match the size of all particles, even if some can be ignored in + // this search. + Kokkos::resize( _destinations, positions.size() ); + // Start with everything staying on this rank. + Kokkos::deep_copy( _destinations, _rank_1d ); + + // Local copies for lambda capture. + auto local_corners = _local_corners; + auto ranks_per_dim = _ranks_per_dim; + auto destinations = _destinations; + auto global_ranks = _global_ranks; + auto build_migrate = KOKKOS_LAMBDA( const std::size_t p ) + { + // This is not num_space_dim because global_ranks is always rank-3 + // out of convenience. + int ijk[3] = { 0, 0, 0 }; + + // Find the rank this particle should be moved to. + for ( std::size_t d = 0; d < num_space_dim; ++d ) + { + int min = 0; + int max = ranks_per_dim[d]; + + // Check if outside the box in this dimension. + if ( ( positions( p, d ) < local_corners( min, d, 0 ) ) || + ( positions( p, d ) > local_corners( max - 1, d, 1 ) ) ) + destinations( p ) = -1; + + // Do a binary search for this particle in this dimension. + while ( max - min > 1 ) + { + int center = Kokkos::floor( ( max + min ) / 2.0 ); + if ( positions( p, d ) < local_corners( center, d, 0 ) ) + max = center; + else + min = center; + } + ijk[d] = min; + } + // Keep the destination rank for eventual migration. + destinations( p ) = global_ranks( ijk[0], ijk[1], ijk[2] ); + }; + + Kokkos::RangePolicy policy( exec_space, begin, end ); + Kokkos::parallel_for( "Cabana::Grid::GlobalParticleComm::build", policy, + build_migrate ); + Kokkos::fence(); + + Kokkos::Profiling::popRegion(); + } + + //! Bin particles across the global grid. + template + void build( ExecutionSpace exec_space, PositionType positions ) + { + build( exec_space, positions, 0, positions.size() ); + } + + //! Bin particles across the global grid. + template + void build( PositionType positions ) + { + using execution_space = typename memory_space::execution_space; + // TODO: enable views. + build( execution_space{}, positions, 0, positions.size() ); + } + + //! Migrate particles to the correct rank. + template + void migrate( MPI_Comm comm, AoSoAType& aosoa ) + { + Cabana::Distributor distributor( comm, _destinations ); + Cabana::migrate( distributor, aosoa ); + } + + protected: + //! Current rank. + int _rank_1d; + //! Current cartesian rank. + Kokkos::Array _rank; + //! Total ranks per dimension. + Kokkos::Array _ranks_per_dim; + //! Local boundaries. + corner_view_type _local_corners; + //! All cartesian ranks. + rank_view_type _global_ranks; + //! Particle destination ranks. + destination_view_type _destinations; +}; + +/*! + \brief Create global linked cell binning. + \return Shared pointer to a GlobalParticleComm. +*/ +template +auto createGlobalParticleComm( const LocalGridType& local_grid ) +{ + return std::make_shared>( + local_grid ); +} + +//---------------------------------------------------------------------------// + +} // namespace Grid +} // namespace Cabana + +#endif // end CABANA_GRID_GLOBALPARTICLECOMM_HPP diff --git a/grid/unit_test/CMakeLists.txt b/grid/unit_test/CMakeLists.txt index 3cd61a147..40c3574ca 100644 --- a/grid/unit_test/CMakeLists.txt +++ b/grid/unit_test/CMakeLists.txt @@ -20,6 +20,7 @@ set(SERIAL_TESTS set(MPI_TESTS GlobalGrid + GlobalParticleComm LocalGrid IndexConversion LocalMesh3d diff --git a/grid/unit_test/tstGlobalParticleComm.hpp b/grid/unit_test/tstGlobalParticleComm.hpp new file mode 100644 index 000000000..4c1cbd0ea --- /dev/null +++ b/grid/unit_test/tstGlobalParticleComm.hpp @@ -0,0 +1,242 @@ +/**************************************************************************** + * Copyright (c) 2018-2023 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace Test +{ + +//---------------------------------------------------------------------------// +void testMigrate3d() +{ + std::array is_dim_periodic = { true, true, true }; + + // Create the global mesh. + std::array global_low = { -1.2, 0.1, 1.1 }; + std::array global_high = { -0.3, 9.5, 2.3 }; + double cell_size = 0.05; + auto global_mesh = Cabana::Grid::createUniformGlobalMesh( + global_low, global_high, cell_size ); + int num_particles = 200; + + // Create the global grid. + Cabana::Grid::DimBlockPartitioner<3> partitioner; + auto global_grid = Cabana::Grid::createGlobalGrid( + MPI_COMM_WORLD, global_mesh, is_dim_periodic, partitioner ); + + // Create a local grid + int halo_width = 1; + auto local_grid = Cabana::Grid::createLocalGrid( global_grid, halo_width ); + + // Create the communication object. + auto global_comm = + Cabana::Grid::createGlobalParticleComm( *local_grid ); + + // Create random particles. + using DataTypes = Cabana::MemberTypes; + using AoSoA_t = Cabana::AoSoA; + AoSoA_t particles( "random", num_particles ); + auto position = Cabana::slice<1>( particles ); + + using PoolType = Kokkos::Random_XorShift64_Pool; + using RandomType = Kokkos::Random_XorShift64; + PoolType pool( 174748 ); + + // Copy box bounds to device array. + Kokkos::Array global_low_kokkos; + Kokkos::Array global_high_kokkos; + for ( int d = 0; d < 3; ++d ) + { + global_low_kokkos[d] = global_low[d]; + global_high_kokkos[d] = global_high[d]; + } + + // Create particles randomly in the global domain. + auto random_coord_op = KOKKOS_LAMBDA( const int p ) + { + auto gen = pool.get_state(); + for ( int d = 0; d < 3; ++d ) + { + position( p, d ) = Kokkos::rand::draw( + gen, global_low_kokkos[d], global_high_kokkos[d] ); + } + pool.free_state( gen ); + }; + Kokkos::RangePolicy policy( 0, num_particles ); + Kokkos::parallel_for( policy, random_coord_op ); + Kokkos::fence(); + + // Plan the communication. + global_comm->build( position ); + + // Move particles to the correct rank. + global_comm->migrate( global_grid->comm(), particles ); + + // Get the local domain bounds to check particles. + auto local_mesh = + Cabana::Grid::createLocalMesh( *local_grid ); + std::array local_low = { + local_mesh.lowCorner( Cabana::Grid::Own(), Cabana::Grid::Dim::I ), + local_mesh.lowCorner( Cabana::Grid::Own(), Cabana::Grid::Dim::J ), + local_mesh.lowCorner( Cabana::Grid::Own(), Cabana::Grid::Dim::K ) }; + std::array local_high = { + local_mesh.highCorner( Cabana::Grid::Own(), Cabana::Grid::Dim::I ), + local_mesh.highCorner( Cabana::Grid::Own(), Cabana::Grid::Dim::J ), + local_mesh.highCorner( Cabana::Grid::Own(), Cabana::Grid::Dim::K ) }; + + // Copy particles to the host. + Cabana::AoSoA particles_host( + "migrated", particles.size() ); + Cabana::deep_copy( particles_host, particles ); + auto position_host = Cabana::slice<1>( particles_host ); + + // Make sure the total particles were conserved. + int global_particles; + int local_particles = static_cast( particles.size() ); + MPI_Reduce( &local_particles, &global_particles, 1, MPI_INT, MPI_SUM, 0, + MPI_COMM_WORLD ); + if ( global_grid->blockId() == 0 ) + { + EXPECT_EQ( global_particles, + num_particles * global_grid->totalNumBlock() ); + } + + for ( std::size_t p = 0; p < particles.size(); ++p ) + { + // Check that all of the particles were moved to the correct local rank. + for ( int d = 0; d < 3; ++d ) + { + EXPECT_GE( position_host( p, d ), local_low[d] ); + EXPECT_LE( position_host( p, d ), local_high[d] ); + } + } +} + +void testMigrate2d() +{ + std::array is_dim_periodic = { true, true }; + + // Create the global mesh. + std::array global_low = { -1.2, 0.1 }; + std::array global_high = { -0.3, 9.5 }; + double cell_size = 0.05; + auto global_mesh = Cabana::Grid::createUniformGlobalMesh( + global_low, global_high, cell_size ); + int num_particles = 200; + + // Create the global grid. + Cabana::Grid::DimBlockPartitioner<2> partitioner; + auto global_grid = Cabana::Grid::createGlobalGrid( + MPI_COMM_WORLD, global_mesh, is_dim_periodic, partitioner ); + + // Create a local grid + int halo_width = 1; + auto local_grid = Cabana::Grid::createLocalGrid( global_grid, halo_width ); + + // Create the communication object. + auto global_comm = + Cabana::Grid::createGlobalParticleComm( *local_grid ); + + // Create random particles. + using DataTypes = Cabana::MemberTypes; + using AoSoA_t = Cabana::AoSoA; + AoSoA_t particles( "random", num_particles ); + auto position = Cabana::slice<1>( particles ); + + using PoolType = Kokkos::Random_XorShift64_Pool; + using RandomType = Kokkos::Random_XorShift64; + PoolType pool( 174748 ); + + // Create particles randomly in the global domain. + auto random_coord_op = KOKKOS_LAMBDA( const int p ) + { + auto gen = pool.get_state(); + for ( int d = 0; d < 2; ++d ) + { + position( p, d ) = Kokkos::rand::draw( + gen, global_low[d], global_high[d] ); + } + pool.free_state( gen ); + }; + Kokkos::RangePolicy policy( 0, num_particles ); + Kokkos::parallel_for( policy, random_coord_op ); + Kokkos::fence(); + + // Plan the communication. + global_comm->build( position ); + + // Move particles to the correct rank. + global_comm->migrate( global_grid->comm(), particles ); + + // Get the local domain bounds to check particles. + auto local_mesh = + Cabana::Grid::createLocalMesh( *local_grid ); + std::array local_low = { + local_mesh.lowCorner( Cabana::Grid::Own(), Cabana::Grid::Dim::I ), + local_mesh.lowCorner( Cabana::Grid::Own(), Cabana::Grid::Dim::J ) }; + std::array local_high = { + local_mesh.highCorner( Cabana::Grid::Own(), Cabana::Grid::Dim::I ), + local_mesh.highCorner( Cabana::Grid::Own(), Cabana::Grid::Dim::J ) }; + + // Copy particles to the host. + Cabana::AoSoA particles_host( + "migrated", particles.size() ); + Cabana::deep_copy( particles_host, particles ); + auto position_host = Cabana::slice<1>( particles_host ); + + // Make sure the total particles were conserved. + int global_particles; + int local_particles = static_cast( particles.size() ); + MPI_Reduce( &local_particles, &global_particles, 1, MPI_INT, MPI_SUM, 0, + MPI_COMM_WORLD ); + if ( global_grid->blockId() == 0 ) + { + EXPECT_EQ( global_particles, + num_particles * global_grid->totalNumBlock() ); + } + + for ( std::size_t p = 0; p < particles.size(); ++p ) + { + // Check that all of the particles were moved to the correct local rank. + for ( int d = 0; d < 2; ++d ) + { + EXPECT_GE( position_host( p, d ), local_low[d] ); + EXPECT_LE( position_host( p, d ), local_high[d] ); + } + } +} + +//---------------------------------------------------------------------------// +// RUN TESTS +//---------------------------------------------------------------------------// +TEST( global_particle_comm, migrate_3d_test ) { testMigrate3d(); } + +TEST( global_particle_comm, migrate_2d_test ) { testMigrate2d(); } + +} // namespace Test From 23ca9662ec40b1c643d5de2c279a33e005ce65f4 Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Mon, 20 Nov 2023 11:34:31 -0500 Subject: [PATCH 2/2] Disable global comm in CUDA SYCL tests --- .jenkins | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins b/.jenkins index 09d3bb34d..c658ff8b1 100644 --- a/.jenkins +++ b/.jenkins @@ -130,7 +130,7 @@ pipeline { sh ''' . /opt/intel/oneapi/setvars.sh --include-intel-llvm && \ cd build && \ - ctest --output-on-failure -E "Cabana.*MPI|Cabana.*Sort|Cabana_Performance_Comm|Grid_IndexConversion|Grid_Halo|Grid_ParticleGridDistributor|Grid_Interpolation|Grid_BovWriter|valgrind" ''' + ctest --output-on-failure -E "Cabana.*MPI|Cabana.*Sort|Cabana_Performance_Comm|Grid_IndexConversion|Grid_Halo|Grid_ParticleGridDistributor|Grid_Interpolation|Grid_BovWriter|Grid_GlobalParticleComm|valgrind" ''' } post { always {