Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allocating multiple blocks to one mpi rank in LB #5026

Open
wants to merge 38 commits into
base: python
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
e76ecf5
Annotation for pure fluid integration
Nov 6, 2024
067d3fa
Annotation for pure fluid integration 2nd
Nov 6, 2024
6392e3c
Allocating many blocks to mpi rank
hidekb Jan 7, 2025
9e7f3c9
Add test script about domain decomposition for LBM
hidekb Jan 8, 2025
e3ee829
Added unit_tests and python integration tests for allocating multipul…
hidekb Jan 9, 2025
0135af7
Deleted unnecessary comment
hidekb Jan 9, 2025
0793276
Formatting codes for allocating multiple blocks to mpi rank
hidekb Jan 10, 2025
0c33a15
Merge branch 'python' into scale-lbm
hidekb Jan 10, 2025
d40edca
Formatting codes
hidekb Jan 10, 2025
75e9e17
Formatting codes for git style
hidekb Jan 10, 2025
e8d0b1e
Solve the conflict
hidekb Jan 10, 2025
281abc2
Formatting codes and Fix benchmarks script
hidekb Jan 10, 2025
a55c6bf
Responding to Reviews
hidekb Jan 15, 2025
cb1561c
Formatting codes
hidekb Jan 15, 2025
42a24e7
Formatting codes for clang-sanitizer
hidekb Jan 15, 2025
e26d439
Fortting codes in git style
hidekb Jan 15, 2025
a91eaf5
Responding reviews
hidekb Jan 17, 2025
a509615
Formatting codes
hidekb Jan 17, 2025
2d221c1
Fixed problems with debuging option
hidekb Jan 17, 2025
6091226
Formatting codes for clang-sanitizer
hidekb Jan 17, 2025
a6bac85
Responding to Reviews
hidekb Jan 17, 2025
1b0e7c1
Removing unneccessary comments
hidekb Jan 17, 2025
9d9bd13
Formatting codes for git-style
hidekb Jan 17, 2025
a806a06
Avoiding unintentional errors
hidekb Jan 20, 2025
f3a1520
Narrowing the scope of integerisation function
hidekb Jan 22, 2025
2229672
Refactoring
jngrad Jan 22, 2025
75d4564
Merge branch 'python' into scale-lbm
jngrad Jan 23, 2025
5f2993f
Bugfixes (WIP)
jngrad Jan 28, 2025
5e7b48b
Redesign LB composition design pattern
jngrad Jan 29, 2025
ce0de63
Cleanup
jngrad Jan 29, 2025
7acd2ba
Style
jngrad Jan 29, 2025
6a62442
Merge branch 'python' into scale-lbm
jngrad Jan 29, 2025
859c5ae
Responce to a review
hidekb Jan 30, 2025
9099a3d
Style
hidekb Jan 30, 2025
d695178
Style for git-style
hidekb Jan 30, 2025
b307474
Style for code formatting
hidekb Jan 30, 2025
3a44a6c
Style
hidekb Jan 30, 2025
0c8a53d
Style for Pylint
hidekb Jan 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 29 additions & 6 deletions maintainer/benchmarks/lb.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@
parser.add_argument("--output", metavar="FILEPATH", action="store",
type=str, required=False, default="benchmarks.csv",
help="Output file (default: benchmarks.csv)")
parser.add_argument("--divided_block", action="store",
type=int, default=1, required=False,
jngrad marked this conversation as resolved.
Show resolved Hide resolved
help="blocks^(1/3) per mpi rank")
parser.add_argument("--divided_block_x", action="store",
type=int, default=0, required=False,
help="The number of divided blocks for x direction")
parser.add_argument("--divided_block_y", action="store",
type=int, default=0, required=False,
help="The number of divided blocks for x direction")
parser.add_argument("--divided_block_z", action="store",
type=int, default=0, required=False,
help="The number of divided blocks for x direction")

args = parser.parse_args()

Expand Down Expand Up @@ -85,7 +97,10 @@
n_proc = system.cell_system.get_state()["n_nodes"]
n_part = n_proc * args.particles_per_core
if n_part == 0:
box_l = 3 * args.box_l if len(args.box_l) == 1 else args.box_l
if len(args.box_l) == 1:
box_l = 3 * args.box_l
elif len(args.box_l) == 3:
box_l = args.box_l
agrid = 1.
lb_grid = box_l
measurement_steps = 80
Expand All @@ -101,13 +116,21 @@
lb_grid = 3 * [lb_grid]
box_l = 3 * [box_l]

print(f"box length: {box_l}")
print(f"LB shape: {lb_grid}")
print(f"LB agrid: {agrid:.3f}")
divided_block_x = args.divided_block_x
divided_block_y = args.divided_block_y
divided_block_z = args.divided_block_z
if divided_block_x != 0 and divided_block_y != 0 and divided_block_z != 0:
blocks_per_mpi_rank = [divided_block_x,
divided_block_y, divided_block_z]
else:
divided_block = args.divided_block
blocks_per_mpi_rank = [divided_block] * 3

# System
#############################################################
system.box_l = box_l
system.box_l = box_l * system.cell_system.node_grid
print(f"LB agrid: {agrid:.3f}")
print("LB shape", system.box_l)

# Integration parameters
#############################################################
Expand Down Expand Up @@ -145,7 +168,7 @@
if args.multi_gpu:
system.cuda_init_handle.call_method("set_device_id_per_rank")
lbf = lb_class(agrid=agrid, tau=system.time_step, kinematic_viscosity=1.,
density=1., single_precision=args.single_precision)
density=1., single_precision=args.single_precision, blocks_per_mpi_rank=blocks_per_mpi_rank)
system.lb = lbf
if n_part:
system.thermostat.set_lb(LB_fluid=lbf, gamma=1., seed=42)
Expand Down
6 changes: 6 additions & 0 deletions src/core/integrate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,12 +633,18 @@ int System::System::integrate(int n_steps, int reuse_forces) {
ek.propagate();
}
} else if (lb_active) {
#ifdef CALIPER
CALI_MARK_BEGIN("LB.PROPAGATE");
#endif
auto const md_steps_per_lb_step = calc_md_steps_per_tau(lb.get_tau());
propagation.lb_skipped_md_steps += 1;
if (propagation.lb_skipped_md_steps >= md_steps_per_lb_step) {
propagation.lb_skipped_md_steps = 0;
lb.propagate();
}
#ifdef CALIPER
CALI_MARK_END("LB.PROPAGATE");
#endif
} else if (ek_active) {
auto const md_steps_per_ek_step = calc_md_steps_per_tau(ek.get_tau());
propagation.ek_skipped_md_steps += 1;
Expand Down
14 changes: 13 additions & 1 deletion src/core/lb/LBWalberla.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@
#include <optional>
#include <variant>

#ifdef CALIPER
#include <caliper/cali.h>
#endif

namespace LB {

bool LBWalberla::is_gpu() const { return lb_fluid->is_gpu(); }
Expand All @@ -50,7 +54,15 @@ Utils::VectorXd<9> LBWalberla::get_pressure_tensor() const {
return lb_fluid->get_pressure_tensor();
}

void LBWalberla::propagate() { lb_fluid->integrate(); }
void LBWalberla::propagate() {
#ifdef CALIPER
CALI_MARK_BEGIN("LBWalberla.PROPAGATE");
#endif
lb_fluid->integrate();
#ifdef CALIPER
CALI_MARK_END("LBWalberla.PROPAGATE");
#endif
}

void LBWalberla::ghost_communication() { lb_fluid->ghost_communication(); }

Expand Down
10 changes: 10 additions & 0 deletions src/core/lb/Solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@
#include <variant>
#include <vector>

#ifdef CALIPER
#include <caliper/cali.h>
#endif

namespace LB {

Solver::Solver() { impl = std::make_unique<Implementation>(); }
Expand All @@ -69,8 +73,14 @@ void Solver::reset() {
}

void Solver::propagate() {
#ifdef CALIPER
CALI_MARK_BEGIN("SOLVER.PROPAGATE");
#endif
check_solver(impl);
std::visit([](auto &ptr) { ptr->propagate(); }, *impl->solver);
#ifdef CALIPER
CALI_MARK_END("SOLVER.PROPAGATE");
#endif
}

void Solver::ghost_communication() {
Expand Down
3 changes: 2 additions & 1 deletion src/core/unit_tests/ek_interface_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ static auto make_ek_actor() {
auto constexpr n_ghost_layers = 1u;
auto constexpr single_precision = true;
ek_lattice = std::make_shared<LatticeWalberla>(
params.grid_dimensions, ::communicator.node_grid, n_ghost_layers);
params.grid_dimensions, ::communicator.node_grid,
::communicator.node_grid, n_ghost_layers);
ek_container = std::make_shared<EK::EKWalberla::ek_container_type>(
params.tau, walberla::new_ek_poisson_none(ek_lattice, single_precision));
ek_reactions = std::make_shared<EK::EKWalberla::ek_reactions_type>();
Expand Down
6 changes: 4 additions & 2 deletions src/core/unit_tests/lb_particle_coupling_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ static auto make_lb_actor() {
auto constexpr single_precision = false;
lb_params = std::make_shared<LB::LBWalberlaParams>(params.agrid, params.tau);
lb_lattice = std::make_shared<LatticeWalberla>(
params.grid_dimensions, ::communicator.node_grid, n_ghost_layers);
params.grid_dimensions, ::communicator.node_grid,
::communicator.node_grid, n_ghost_layers);
lb_fluid = new_lb_walberla_cpu(lb_lattice, params.viscosity, params.density,
single_precision);
lb_fluid->set_collision_model(params.kT, params.seed);
Expand Down Expand Up @@ -535,7 +536,8 @@ bool test_lb_domain_mismatch_local() {
auto const params = std::make_shared<LB::LBWalberlaParams>(0.5, 0.01);
::communicator.node_grid = node_grid_reversed;
auto const lattice = std::make_shared<LatticeWalberla>(
Utils::Vector3i{12, 12, 12}, node_grid_original, n_ghost_layers);
Utils::Vector3i{12, 12, 12}, node_grid_original, node_grid_original,
n_ghost_layers);
auto const ptr = new_lb_walberla_cpu(lattice, 1.0, 1.0, false);
ptr->set_collision_model(0.0, 0);
::communicator.node_grid = node_grid_original;
Expand Down
2 changes: 1 addition & 1 deletion src/python/espressomd/detail/walberla.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(self, *args, **kwargs):
super().__init__(**kwargs)

def valid_keys(self):
return {"agrid", "n_ghost_layers"}
return {"agrid", "n_ghost_layers", "blocks_per_mpi_rank"}

def required_keys(self):
return self.valid_keys()
Expand Down
8 changes: 5 additions & 3 deletions src/python/espressomd/lb.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@ def validate_params(self, params):

def valid_keys(self):
return {"agrid", "tau", "density", "ext_force_density",
"kinematic_viscosity", "lattice", "kT", "seed"}
"kinematic_viscosity", "lattice", "kT", "seed", "blocks_per_mpi_rank"}

def required_keys(self):
return {"lattice", "density", "kinematic_viscosity", "tau"}

def default_params(self):
return {"lattice": None, "seed": 0, "kT": 0.,
"ext_force_density": [0.0, 0.0, 0.0]}
"ext_force_density": [0.0, 0.0, 0.0], "blocks_per_mpi_rank": [1, 1, 1]}

def mach_limit(self):
"""
Expand Down Expand Up @@ -141,6 +141,8 @@ class LBFluidWalberla(HydrodynamicInteraction,
Required for a thermalized fluid. Must be positive.
single_precision : :obj:`bool`, optional
Use single-precision floating-point arithmetic.
blocks_per_mpi_rank : (3,) array_like of :obj:`int`, optional
Ditribute more than one block to each CPU.
jngrad marked this conversation as resolved.
Show resolved Hide resolved

Methods
-------
Expand Down Expand Up @@ -240,7 +242,7 @@ def validate_params(self, params):
if "agrid" not in params:
raise ValueError("missing argument 'lattice' or 'agrid'")
params["lattice"] = LatticeWalberla(
agrid=params.pop("agrid"), n_ghost_layers=1)
agrid=params.pop("agrid"), n_ghost_layers=1, blocks_per_mpi_rank=params.get("blocks_per_mpi_rank"))
elif "agrid" in params:
raise ValueError("cannot provide both 'lattice' and 'agrid'")

Expand Down
6 changes: 6 additions & 0 deletions src/script_interface/walberla/LBFluid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,12 @@ void LBFluidGPU::make_instance(VariantMap const &params) {
auto const visc = get_value<double>(params, "kinematic_viscosity");
auto const dens = get_value<double>(params, "density");
auto const precision = get_value<bool>(params, "single_precision");
auto const blocks_per_mpi_rank = get_value_or<Utils::Vector3i>(
params, "blocks_per_mpi_rank", Utils::Vector3i{{1, 1, 1}});
jngrad marked this conversation as resolved.
Show resolved Hide resolved
if (blocks_per_mpi_rank != Utils::Vector3i{{1, 1, 1}}) {
throw std::runtime_error(
"GPU architecture PROHIBITED allocating many blocks to 1 CPU.");
}
jngrad marked this conversation as resolved.
Show resolved Hide resolved
auto const lb_lattice = m_lattice->lattice();
auto const lb_visc = m_conv_visc * visc;
auto const lb_dens = m_conv_dens * dens;
Expand Down
14 changes: 13 additions & 1 deletion src/script_interface/walberla/LatticeWalberla.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class LatticeWalberla : public AutoParameters<LatticeWalberla> {
std::shared_ptr<::LatticeWalberla> m_lattice;
double m_agrid;
Utils::Vector3d m_box_l;
Utils::Vector3i m_blocks_per_mpi_rank;

public:
LatticeWalberla() {
Expand All @@ -53,14 +54,25 @@ class LatticeWalberla : public AutoParameters<LatticeWalberla> {
{"shape", AutoParameter::read_only,
[this]() { return m_lattice->get_grid_dimensions(); }},
{"_box_l", AutoParameter::read_only, [this]() { return m_box_l; }},
{"blocks_per_mpi_rank", AutoParameter::read_only,
[this]() { return m_blocks_per_mpi_rank; }},
});
}

void do_construct(VariantMap const &args) override {
auto const &box_geo = *::System::get_system().box_geo;
m_agrid = get_value<double>(args, "agrid");
m_box_l = get_value_or<Utils::Vector3d>(args, "_box_l", box_geo.length());
m_blocks_per_mpi_rank = get_value_or<Utils::Vector3i>(
args, "blocks_per_mpi_rank", Utils::Vector3i{{1, 1, 1}});
auto const n_ghost_layers = get_value<int>(args, "n_ghost_layers");
auto const block_grid =
Utils::Vector3i{{static_cast<int>(::communicator.node_grid[0] *
m_blocks_per_mpi_rank[0]),
static_cast<int>(::communicator.node_grid[1] *
m_blocks_per_mpi_rank[1]),
static_cast<int>(::communicator.node_grid[2] *
m_blocks_per_mpi_rank[2])}};
jngrad marked this conversation as resolved.
Show resolved Hide resolved

context()->parallel_try_catch([&]() {
if (m_agrid <= 0.) {
Expand All @@ -72,7 +84,7 @@ class LatticeWalberla : public AutoParameters<LatticeWalberla> {
auto const grid_dim =
::LatticeWalberla::calc_grid_dimensions(m_box_l, m_agrid);
m_lattice = std::make_shared<::LatticeWalberla>(
grid_dim, ::communicator.node_grid,
grid_dim, ::communicator.node_grid, block_grid,
static_cast<unsigned int>(n_ghost_layers));
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class LatticeWalberla {
public:
LatticeWalberla(Utils::Vector3i const &grid_dimensions,
Utils::Vector3i const &node_grid,
Utils::Vector3i const &block_grid,
unsigned int n_ghost_layers);

// Grid, domain, halo
Expand Down
4 changes: 2 additions & 2 deletions src/walberla_bridge/src/BoundaryPackInfo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class BoundaryPackInfo : public PackInfo<GhostLayerField_T> {
WALBERLA_ASSERT_EQUAL(bSize, buf_size);
#endif

auto const offset = std::get<0>(m_lattice->get_local_grid_range());
auto const offset = to_vector3i(receiver->getAABB().min());
jngrad marked this conversation as resolved.
Show resolved Hide resolved
typename Boundary_T::value_type value;
for (auto it = begin(flag_field); it != flag_field->end(); ++it) {
if (isFlagSet(it, boundary_flag)) {
Expand Down Expand Up @@ -133,7 +133,7 @@ class BoundaryPackInfo : public PackInfo<GhostLayerField_T> {
<< buf_size;
#endif

auto const offset = std::get<0>(m_lattice->get_local_grid_range());
auto const offset = to_vector3i(sender->getAABB().min());
for (auto it = begin(flag_field); it != flag_field->end(); ++it) {
if (isFlagSet(it, boundary_flag)) {
auto const node = offset + Utils::Vector3i{{it.x(), it.y(), it.z()}};
Expand Down
57 changes: 49 additions & 8 deletions src/walberla_bridge/src/LatticeWalberla.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

LatticeWalberla::LatticeWalberla(Utils::Vector3i const &grid_dimensions,
Utils::Vector3i const &node_grid,
Utils::Vector3i const &block_grid,
unsigned int n_ghost_layers)
: m_grid_dimensions{grid_dimensions}, m_n_ghost_layers{n_ghost_layers} {
using walberla::real_t;
Expand All @@ -50,21 +51,28 @@ LatticeWalberla::LatticeWalberla(Utils::Vector3i const &grid_dimensions,
throw std::runtime_error(
"Lattice grid dimensions and MPI node grid are not compatible.");
}
if (m_grid_dimensions[i] % block_grid[i] != 0) {
throw std::runtime_error(
"Lattice grid dimensions and block grid are not compatible.");
}
}

auto constexpr lattice_constant = real_t{1};
auto const cells_block = Utils::hadamard_division(grid_dimensions, node_grid);
jngrad marked this conversation as resolved.
Show resolved Hide resolved
auto const cells_block =
Utils::hadamard_division(grid_dimensions, block_grid);

m_blocks = walberla::blockforest::createUniformBlockGrid(
// number of blocks in each direction
uint_c(node_grid[0]), uint_c(node_grid[1]), uint_c(node_grid[2]),
uint_c(block_grid[0]), uint_c(block_grid[1]), uint_c(block_grid[2]),
// number of cells per block in each direction
uint_c(cells_block[0]), uint_c(cells_block[1]), uint_c(cells_block[2]),
lattice_constant,
// number of cpus per direction
uint_c(node_grid[0]), uint_c(node_grid[1]), uint_c(node_grid[2]),
// periodicity
true, true, true);
true, true, true,
// keep global block information
jngrad marked this conversation as resolved.
Show resolved Hide resolved
false);
for (IBlock &block : *m_blocks) {
m_cached_blocks.push_back(&block);
}
Expand All @@ -73,11 +81,44 @@ LatticeWalberla::LatticeWalberla(Utils::Vector3i const &grid_dimensions,
[[nodiscard]] std::pair<Utils::Vector3d, Utils::Vector3d>
jngrad marked this conversation as resolved.
Show resolved Hide resolved
LatticeWalberla::get_local_domain() const {
using walberla::to_vector3d;
// We only have one block per mpi rank
assert(++(m_blocks->begin()) == m_blocks->end());

auto const ab = m_blocks->begin()->getAABB();
return {to_vector3d(ab.min()), to_vector3d(ab.max())};
// Get upper and lower corner of BlockForest assigned to a mpi rank.
// Since we can allocate multiple blocks per mpi rank,
// the corners of all Blocks are compared.
int64_t const stride_y = m_grid_dimensions[2];
int64_t const stride_x = m_grid_dimensions[1] * stride_y;
auto aa = m_blocks->begin()->getAABB();
auto bb = m_blocks->begin()->getAABB();
int64_t aa_index = stride_x * static_cast<int>(aa.min()[0]) +
stride_y * static_cast<int>(aa.min()[1]) +
static_cast<int>(aa.min()[2]);
int64_t bb_index = stride_x * static_cast<int>(bb.max()[0]) +
stride_y * static_cast<int>(bb.max()[1]) +
static_cast<int>(bb.max()[2]);
for (auto b = m_blocks->begin(); b != m_blocks->end(); ++b) {
auto cc = b->getAABB();
for (auto const i : {0u, 1u, 2u}) {
if ((cc.max()[i] - cc.min()[i]) != 0) {
assert(m_grid_dimensions[i] %
static_cast<int>(cc.max()[i] - cc.min()[i]) ==
0);
}
}
int64_t min_index = stride_x * static_cast<int>(cc.min()[0]) +
stride_y * static_cast<int>(cc.min()[1]) +
static_cast<int>(cc.min()[2]);
int64_t max_index = stride_x * static_cast<int>(cc.max()[0]) +
stride_y * static_cast<int>(cc.max()[1]) +
static_cast<int>(cc.max()[2]);
if (min_index < aa_index) {
aa = cc;
aa_index = min_index;
}
if (max_index > bb_index) {
bb = cc;
bb_index = max_index;
}
}
return {to_vector3d(aa.min()), to_vector3d(bb.max())};
}

[[nodiscard]] bool
Expand Down
Loading
Loading