Skip to content

Commit

Permalink
Responding to Reviews
Browse files Browse the repository at this point in the history
  • Loading branch information
hidekb committed Jan 15, 2025
1 parent 281abc2 commit a55c6bf
Show file tree
Hide file tree
Showing 11 changed files with 457 additions and 479 deletions.
38 changes: 10 additions & 28 deletions maintainer/benchmarks/lb.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,9 @@
parser.add_argument("--output", metavar="FILEPATH", action="store",
type=str, required=False, default="benchmarks.csv",
help="Output file (default: benchmarks.csv)")
parser.add_argument("--divided_block", action="store",
type=int, default=1, required=False,
help="blocks^(1/3) per mpi rank")
parser.add_argument("--divided_block_x", action="store",
type=int, default=0, required=False,
help="The number of divided blocks for x direction")
parser.add_argument("--divided_block_y", action="store",
type=int, default=0, required=False,
help="The number of divided blocks for x direction")
parser.add_argument("--divided_block_z", action="store",
type=int, default=0, required=False,
help="The number of divided blocks for x direction")
parser.add_argument("--blocks_per_mpi_rank", action="store", nargs=3,
type=int, default=[1, 1, 1], required=False,
help="blocks per mpi rank")

args = parser.parse_args()

Expand Down Expand Up @@ -97,10 +88,7 @@
n_proc = system.cell_system.get_state()["n_nodes"]
n_part = n_proc * args.particles_per_core
if n_part == 0:
if len(args.box_l) == 1:
box_l = 3 * args.box_l
elif len(args.box_l) == 3:
box_l = args.box_l
box_l = 3 * args.box_l if len(args.box_l) == 1 else args.box_l
agrid = 1.
lb_grid = box_l
measurement_steps = 80
Expand All @@ -116,21 +104,15 @@
lb_grid = 3 * [lb_grid]
box_l = 3 * [box_l]

divided_block_x = args.divided_block_x
divided_block_y = args.divided_block_y
divided_block_z = args.divided_block_z
if divided_block_x != 0 and divided_block_y != 0 and divided_block_z != 0:
blocks_per_mpi_rank = [divided_block_x,
divided_block_y, divided_block_z]
else:
divided_block = args.divided_block
blocks_per_mpi_rank = [divided_block] * 3
print(f"box length: {box_l}")
print(f"LB shape: {lb_grid}")
print(f"LB agrid: {agrid:.3f}")

blocks_per_mpi_rank = args.blocks_per_mpi_rank

# System
#############################################################
system.box_l = box_l * system.cell_system.node_grid
print(f"LB agrid: {agrid:.3f}")
print("LB shape", system.box_l)
system.box_l = box_l

# Integration parameters
#############################################################
Expand Down
166 changes: 166 additions & 0 deletions maintainer/benchmarks/lb_weakscaling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
#
# Copyright (C) 2013-2022 The ESPResSo project
#
# This file is part of ESPResSo.
#
# ESPResSo is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ESPResSo is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

"""
Benchmark Lattice-Boltzmann fluid + Lennard-Jones particles.
"""
import espressomd
import espressomd.lb
import benchmarks
import numpy as np
import argparse

parser = argparse.ArgumentParser(description="Benchmark LB simulations. "
"Save the results to a CSV file.")
parser.add_argument("--particles_per_core", metavar="N", action="store",
type=int, default=125, required=False,
help="Number of particles per core")
parser.add_argument("--box_l", action="store", nargs="+",
type=int, default=argparse.SUPPRESS, required=False,
help="Box length (cubic box)")
parser.add_argument("--lb_sites_per_particle", metavar="N_LB", action="store",
type=float, default=28, required=False,
help="Number of LB sites per particle")
parser.add_argument("--volume_fraction", metavar="FRAC", action="store",
type=float, default=0.03, required=False,
help="Fraction of the simulation box volume occupied by "
"particles (range: [0.01-0.74], default: 0.03)")
parser.add_argument("--single_precision", action="store_true", required=False,
help="Using single-precision floating point accuracy")
parser.add_argument("--gpu", action=argparse.BooleanOptionalAction,
default=False, required=False, help="Use GPU implementation")
parser.add_argument("--multi-gpu", action=argparse.BooleanOptionalAction,
default=False, required=False, help="Use multi-GPU implementation")
parser.add_argument("--output", metavar="FILEPATH", action="store",
type=str, required=False, default="benchmarks.csv",
help="Output file (default: benchmarks.csv)")
parser.add_argument("--blocks_per_mpi_rank", action="store", nargs=3,
type=int, default=[1, 1, 1], required=False,
help="blocks per mpi rank")

args = parser.parse_args()

# process and check arguments
n_iterations = 30
assert args.volume_fraction > 0, "--volume_fraction must be a positive number"
assert args.volume_fraction < np.pi / (3 * np.sqrt(2)), \
"--volume_fraction exceeds the physical limit of sphere packing (~0.74)"
assert "box_l" not in args or args.particles_per_core == 0, \
"Argument --box_l requires --particles_per_core=0"

required_features = ["LENNARD_JONES", "WALBERLA"]
if args.gpu:
required_features.append("CUDA")
espressomd.assert_features(required_features)

# make simulation deterministic
np.random.seed(42)

# System
#############################################################
system = espressomd.System(box_l=[1, 1, 1])

# Interaction parameters (Lennard-Jones)
#############################################################

lj_eps = 1.0 # LJ epsilon
lj_sig = 1.0 # particle diameter
lj_cut = lj_sig * 2**(1. / 6.) # cutoff distance

# System parameters
#############################################################
n_proc = system.cell_system.get_state()["n_nodes"]
n_part = n_proc * args.particles_per_core
if n_part == 0:
box_l = 3 * args.box_l if len(args.box_l) == 1 else args.box_l
agrid = 1.
lb_grid = box_l
measurement_steps = 80
else:
# volume of N spheres with radius r: N * (4/3*pi*r^3)
box_l = (n_part * 4. / 3. * np.pi * (lj_sig / 2.)**3
/ args.volume_fraction)**(1. / 3.)
lb_grid = (n_part * args.lb_sites_per_particle)**(1. / 3.)
lb_grid = int(2. * round(lb_grid / 2.))
agrid = box_l / lb_grid
measurement_steps = max(50, int(120**3 / lb_grid**3))
measurement_steps = 40
lb_grid = 3 * [lb_grid]
box_l = 3 * [box_l]

blocks_per_mpi_rank = args.blocks_per_mpi_rank

# System
#############################################################
system.box_l = box_l * system.cell_system.node_grid
print(f"box length: {system.box_l}")
print(f"LB shape: {lb_grid}")
print(f"LB agrid: {agrid:.3f}")

# Integration parameters
#############################################################
system.time_step = 0.01
system.cell_system.skin = 0.5

# Interaction and particle setup
#############################################################
if n_part:
system.non_bonded_inter[0, 0].lennard_jones.set_params(
epsilon=lj_eps, sigma=lj_sig, cutoff=lj_cut, shift="auto")
system.part.add(pos=np.random.random((n_part, 3)) * system.box_l)
benchmarks.minimize(system, n_part / 2.)
system.integrator.set_vv()
system.thermostat.set_langevin(kT=1.0, gamma=1.0, seed=42)

# tuning and equilibration
min_skin = 0.2
max_skin = 1.0
print("Tune skin: {:.3f}".format(system.cell_system.tune_skin(
min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=100)))
print("Equilibration")
system.integrator.run(500)
print("Tune skin: {:.3f}".format(system.cell_system.tune_skin(
min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=100)))
print("Equilibration")
system.integrator.run(500)
system.thermostat.turn_off()

# LB fluid setup
#############################################################
lb_class = espressomd.lb.LBFluidWalberla
if args.gpu or args.multi_gpu:
lb_class = espressomd.lb.LBFluidWalberlaGPU
if args.multi_gpu:
system.cuda_init_handle.call_method("set_device_id_per_rank")
lbf = lb_class(agrid=agrid, tau=system.time_step, kinematic_viscosity=1.,
density=1., single_precision=args.single_precision, blocks_per_mpi_rank=blocks_per_mpi_rank)
system.lb = lbf
if n_part:
system.thermostat.set_lb(LB_fluid=lbf, gamma=1., seed=42)


# time integration loop
timings = benchmarks.get_timings(system, measurement_steps, n_iterations)

# average time
avg, ci = benchmarks.get_average_time(timings)
print(f"average: {1000 * avg:.2f} +/- {1000 * ci:.2f} ms (95% C.I.)")

# write report
benchmarks.write_report(args.output, n_proc, timings, measurement_steps)
2 changes: 1 addition & 1 deletion src/script_interface/walberla/LBFluid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ void LBFluidGPU::make_instance(VariantMap const &params) {
params, "blocks_per_mpi_rank", Utils::Vector3i{{1, 1, 1}});
if (blocks_per_mpi_rank != Utils::Vector3i{{1, 1, 1}}) {
throw std::runtime_error(
"GPU architecture PROHIBITED allocating many blocks to 1 CPU.");
"Using more than one block per MPI rank is not supported for GPU LB");
}
auto const lb_lattice = m_lattice->lattice();
auto const lb_visc = m_conv_visc * visc;
Expand Down
41 changes: 8 additions & 33 deletions src/walberla_bridge/src/LatticeWalberla.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@ LatticeWalberla::LatticeWalberla(Utils::Vector3i const &grid_dimensions,
}

auto constexpr lattice_constant = real_t{1};
auto const cells_block =
auto const cells_per_block =
Utils::hadamard_division(grid_dimensions, block_grid);

m_blocks = walberla::blockforest::createUniformBlockGrid(
// number of blocks in each direction
uint_c(block_grid[0]), uint_c(block_grid[1]), uint_c(block_grid[2]),
// number of cells per block in each direction
uint_c(cells_block[0]), uint_c(cells_block[1]), uint_c(cells_block[2]),
lattice_constant,
uint_c(cells_per_block[0]), uint_c(cells_per_block[1]),
uint_c(cells_per_block[2]), lattice_constant,
// number of cpus per direction
uint_c(node_grid[0]), uint_c(node_grid[1]), uint_c(node_grid[2]),
// periodicity
Expand All @@ -84,41 +84,16 @@ LatticeWalberla::get_local_domain() const {
// Get upper and lower corner of BlockForest assigned to a mpi rank.
// Since we can allocate multiple blocks per mpi rank,
// the corners of all Blocks are compared.
int64_t const stride_y = m_grid_dimensions[2];
int64_t const stride_x = m_grid_dimensions[1] * stride_y;
auto aa = m_blocks->begin()->getAABB();
auto bb = m_blocks->begin()->getAABB();
int64_t aa_index = stride_x * static_cast<int>(aa.min()[0]) +
stride_y * static_cast<int>(aa.min()[1]) +
static_cast<int>(aa.min()[2]);
int64_t bb_index = stride_x * static_cast<int>(bb.max()[0]) +
stride_y * static_cast<int>(bb.max()[1]) +
static_cast<int>(bb.max()[2]);
auto aa = to_vector3d(m_blocks->begin()->getAABB().min());
auto bb = to_vector3d(m_blocks->begin()->getAABB().max());
for (auto b = m_blocks->begin(); b != m_blocks->end(); ++b) {
auto cc = b->getAABB();
for (auto const i : {0u, 1u, 2u}) {
if ((cc.max()[i] - cc.min()[i]) != 0) {
assert(m_grid_dimensions[i] %
static_cast<int>(cc.max()[i] - cc.min()[i]) ==
0);
}
}
int64_t min_index = stride_x * static_cast<int>(cc.min()[0]) +
stride_y * static_cast<int>(cc.min()[1]) +
static_cast<int>(cc.min()[2]);
int64_t max_index = stride_x * static_cast<int>(cc.max()[0]) +
stride_y * static_cast<int>(cc.max()[1]) +
static_cast<int>(cc.max()[2]);
if (min_index < aa_index) {
aa = cc;
aa_index = min_index;
}
if (max_index > bb_index) {
bb = cc;
bb_index = max_index;
aa[i] = std::min(aa[i], cc.min()[i]);
bb[i] = std::max(bb[i], cc.max()[i]);
}
}
return {to_vector3d(aa.min()), to_vector3d(bb.max())};
return {aa, bb};
}

[[nodiscard]] bool
Expand Down
Loading

0 comments on commit a55c6bf

Please sign in to comment.