Responding to Reviews

espressomd · Jan 15, 2025 · a55c6bf · a55c6bf
1 parent 281abc2
commit a55c6bf
Show file tree

Hide file tree

Showing 11 changed files with 457 additions and 479 deletions.
diff --git a/maintainer/benchmarks/lb.py b/maintainer/benchmarks/lb.py
@@ -50,18 +50,9 @@
 parser.add_argument("--output", metavar="FILEPATH", action="store",
                     type=str, required=False, default="benchmarks.csv",
                     help="Output file (default: benchmarks.csv)")
-parser.add_argument("--divided_block", action="store",
-                    type=int, default=1, required=False,
-                    help="blocks^(1/3) per mpi rank")
-parser.add_argument("--divided_block_x", action="store",
-                    type=int, default=0, required=False,
-                    help="The number of divided blocks for x direction")
-parser.add_argument("--divided_block_y", action="store",
-                    type=int, default=0, required=False,
-                    help="The number of divided blocks for x direction")
-parser.add_argument("--divided_block_z", action="store",
-                    type=int, default=0, required=False,
-                    help="The number of divided blocks for x direction")
+parser.add_argument("--blocks_per_mpi_rank", action="store", nargs=3,
+                    type=int, default=[1, 1, 1], required=False,
+                    help="blocks per mpi rank")
 
 args = parser.parse_args()
 
@@ -97,10 +88,7 @@
 n_proc = system.cell_system.get_state()["n_nodes"]
 n_part = n_proc * args.particles_per_core
 if n_part == 0:
-    if len(args.box_l) == 1:
-        box_l = 3 * args.box_l
-    elif len(args.box_l) == 3:
-        box_l = args.box_l
+    box_l = 3 * args.box_l if len(args.box_l) == 1 else args.box_l
     agrid = 1.
     lb_grid = box_l
     measurement_steps = 80
@@ -116,21 +104,15 @@
     lb_grid = 3 * [lb_grid]
     box_l = 3 * [box_l]
 
-divided_block_x = args.divided_block_x
-divided_block_y = args.divided_block_y
-divided_block_z = args.divided_block_z
-if divided_block_x != 0 and divided_block_y != 0 and divided_block_z != 0:
-    blocks_per_mpi_rank = [divided_block_x,
-                           divided_block_y, divided_block_z]
-else:
-    divided_block = args.divided_block
-    blocks_per_mpi_rank = [divided_block] * 3
+print(f"box length: {box_l}")
+print(f"LB shape: {lb_grid}")
+print(f"LB agrid: {agrid:.3f}")
+
+blocks_per_mpi_rank = args.blocks_per_mpi_rank
 
 # System
 #############################################################
-system.box_l = box_l * system.cell_system.node_grid
-print(f"LB agrid: {agrid:.3f}")
-print("LB shape", system.box_l)
+system.box_l = box_l
 
 # Integration parameters
 #############################################################

diff --git a/maintainer/benchmarks/lb_weakscaling.py b/maintainer/benchmarks/lb_weakscaling.py
@@ -0,0 +1,166 @@
+#
+# Copyright (C) 2013-2022 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+"""
+Benchmark Lattice-Boltzmann fluid + Lennard-Jones particles.
+"""
+import espressomd
+import espressomd.lb
+import benchmarks
+import numpy as np
+import argparse
+
+parser = argparse.ArgumentParser(description="Benchmark LB simulations. "
+                                 "Save the results to a CSV file.")
+parser.add_argument("--particles_per_core", metavar="N", action="store",
+                    type=int, default=125, required=False,
+                    help="Number of particles per core")
+parser.add_argument("--box_l", action="store", nargs="+",
+                    type=int, default=argparse.SUPPRESS, required=False,
+                    help="Box length (cubic box)")
+parser.add_argument("--lb_sites_per_particle", metavar="N_LB", action="store",
+                    type=float, default=28, required=False,
+                    help="Number of LB sites per particle")
+parser.add_argument("--volume_fraction", metavar="FRAC", action="store",
+                    type=float, default=0.03, required=False,
+                    help="Fraction of the simulation box volume occupied by "
+                    "particles (range: [0.01-0.74], default: 0.03)")
+parser.add_argument("--single_precision", action="store_true", required=False,
+                    help="Using single-precision floating point accuracy")
+parser.add_argument("--gpu", action=argparse.BooleanOptionalAction,
+                    default=False, required=False, help="Use GPU implementation")
+parser.add_argument("--multi-gpu", action=argparse.BooleanOptionalAction,
+                    default=False, required=False, help="Use multi-GPU implementation")
+parser.add_argument("--output", metavar="FILEPATH", action="store",
+                    type=str, required=False, default="benchmarks.csv",
+                    help="Output file (default: benchmarks.csv)")
+parser.add_argument("--blocks_per_mpi_rank", action="store", nargs=3,
+                    type=int, default=[1, 1, 1], required=False,
+                    help="blocks per mpi rank")
+
+args = parser.parse_args()
+
+# process and check arguments
+n_iterations = 30
+assert args.volume_fraction > 0, "--volume_fraction must be a positive number"
+assert args.volume_fraction < np.pi / (3 * np.sqrt(2)), \
+    "--volume_fraction exceeds the physical limit of sphere packing (~0.74)"
+assert "box_l" not in args or args.particles_per_core == 0, \
+    "Argument --box_l requires --particles_per_core=0"
+
+required_features = ["LENNARD_JONES", "WALBERLA"]
+if args.gpu:
+    required_features.append("CUDA")
+espressomd.assert_features(required_features)
+
+# make simulation deterministic
+np.random.seed(42)
+
+# System
+#############################################################
+system = espressomd.System(box_l=[1, 1, 1])
+
+# Interaction parameters (Lennard-Jones)
+#############################################################
+
+lj_eps = 1.0  # LJ epsilon
+lj_sig = 1.0  # particle diameter
+lj_cut = lj_sig * 2**(1. / 6.)  # cutoff distance
+
+# System parameters
+#############################################################
+n_proc = system.cell_system.get_state()["n_nodes"]
+n_part = n_proc * args.particles_per_core
+if n_part == 0:
+    box_l = 3 * args.box_l if len(args.box_l) == 1 else args.box_l
+    agrid = 1.
+    lb_grid = box_l
+    measurement_steps = 80
+else:
+    # volume of N spheres with radius r: N * (4/3*pi*r^3)
+    box_l = (n_part * 4. / 3. * np.pi * (lj_sig / 2.)**3
+             / args.volume_fraction)**(1. / 3.)
+    lb_grid = (n_part * args.lb_sites_per_particle)**(1. / 3.)
+    lb_grid = int(2. * round(lb_grid / 2.))
+    agrid = box_l / lb_grid
+    measurement_steps = max(50, int(120**3 / lb_grid**3))
+    measurement_steps = 40
+    lb_grid = 3 * [lb_grid]
+    box_l = 3 * [box_l]
+
+blocks_per_mpi_rank = args.blocks_per_mpi_rank
+
+# System
+#############################################################
+system.box_l = box_l * system.cell_system.node_grid
+print(f"box length: {system.box_l}")
+print(f"LB shape: {lb_grid}")
+print(f"LB agrid: {agrid:.3f}")
+
+# Integration parameters
+#############################################################
+system.time_step = 0.01
+system.cell_system.skin = 0.5
+
+# Interaction and particle setup
+#############################################################
+if n_part:
+    system.non_bonded_inter[0, 0].lennard_jones.set_params(
+        epsilon=lj_eps, sigma=lj_sig, cutoff=lj_cut, shift="auto")
+    system.part.add(pos=np.random.random((n_part, 3)) * system.box_l)
+    benchmarks.minimize(system, n_part / 2.)
+    system.integrator.set_vv()
+    system.thermostat.set_langevin(kT=1.0, gamma=1.0, seed=42)
+
+    # tuning and equilibration
+    min_skin = 0.2
+    max_skin = 1.0
+    print("Tune skin: {:.3f}".format(system.cell_system.tune_skin(
+        min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=100)))
+    print("Equilibration")
+    system.integrator.run(500)
+    print("Tune skin: {:.3f}".format(system.cell_system.tune_skin(
+        min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=100)))
+    print("Equilibration")
+    system.integrator.run(500)
+    system.thermostat.turn_off()
+
+# LB fluid setup
+#############################################################
+lb_class = espressomd.lb.LBFluidWalberla
+if args.gpu or args.multi_gpu:
+    lb_class = espressomd.lb.LBFluidWalberlaGPU
+if args.multi_gpu:
+    system.cuda_init_handle.call_method("set_device_id_per_rank")
+lbf = lb_class(agrid=agrid, tau=system.time_step, kinematic_viscosity=1.,
+               density=1., single_precision=args.single_precision, blocks_per_mpi_rank=blocks_per_mpi_rank)
+system.lb = lbf
+if n_part:
+    system.thermostat.set_lb(LB_fluid=lbf, gamma=1., seed=42)
+
+
+# time integration loop
+timings = benchmarks.get_timings(system, measurement_steps, n_iterations)
+
+# average time
+avg, ci = benchmarks.get_average_time(timings)
+print(f"average: {1000 * avg:.2f} +/- {1000 * ci:.2f} ms (95% C.I.)")
+
+# write report
+benchmarks.write_report(args.output, n_proc, timings, measurement_steps)
diff --git a/src/script_interface/walberla/LBFluid.cpp b/src/script_interface/walberla/LBFluid.cpp
@@ -143,7 +143,7 @@ void LBFluidGPU::make_instance(VariantMap const &params) {
       params, "blocks_per_mpi_rank", Utils::Vector3i{{1, 1, 1}});
   if (blocks_per_mpi_rank != Utils::Vector3i{{1, 1, 1}}) {
     throw std::runtime_error(
-        "GPU architecture PROHIBITED allocating many blocks to 1 CPU.");
+        "Using more than one block per MPI rank is not supported for GPU LB");
   }
   auto const lb_lattice = m_lattice->lattice();
   auto const lb_visc = m_conv_visc * visc;

diff --git a/src/walberla_bridge/src/LatticeWalberla.cpp b/src/walberla_bridge/src/LatticeWalberla.cpp
@@ -58,15 +58,15 @@ LatticeWalberla::LatticeWalberla(Utils::Vector3i const &grid_dimensions,
   }
 
   auto constexpr lattice_constant = real_t{1};
-  auto const cells_block =
+  auto const cells_per_block =
       Utils::hadamard_division(grid_dimensions, block_grid);
 
   m_blocks = walberla::blockforest::createUniformBlockGrid(
       // number of blocks in each direction
       uint_c(block_grid[0]), uint_c(block_grid[1]), uint_c(block_grid[2]),
       // number of cells per block in each direction
-      uint_c(cells_block[0]), uint_c(cells_block[1]), uint_c(cells_block[2]),
-      lattice_constant,
+      uint_c(cells_per_block[0]), uint_c(cells_per_block[1]),
+      uint_c(cells_per_block[2]), lattice_constant,
       // number of cpus per direction
       uint_c(node_grid[0]), uint_c(node_grid[1]), uint_c(node_grid[2]),
       // periodicity
@@ -84,41 +84,16 @@ LatticeWalberla::get_local_domain() const {
   // Get upper and lower corner of BlockForest assigned to a mpi rank.
   // Since we can allocate multiple blocks per mpi rank,
   // the corners of all Blocks are compared.
-  int64_t const stride_y = m_grid_dimensions[2];
-  int64_t const stride_x = m_grid_dimensions[1] * stride_y;
-  auto aa = m_blocks->begin()->getAABB();
-  auto bb = m_blocks->begin()->getAABB();
-  int64_t aa_index = stride_x * static_cast<int>(aa.min()[0]) +
-                     stride_y * static_cast<int>(aa.min()[1]) +
-                     static_cast<int>(aa.min()[2]);
-  int64_t bb_index = stride_x * static_cast<int>(bb.max()[0]) +
-                     stride_y * static_cast<int>(bb.max()[1]) +
-                     static_cast<int>(bb.max()[2]);
+  auto aa = to_vector3d(m_blocks->begin()->getAABB().min());
+  auto bb = to_vector3d(m_blocks->begin()->getAABB().max());
   for (auto b = m_blocks->begin(); b != m_blocks->end(); ++b) {
     auto cc = b->getAABB();
     for (auto const i : {0u, 1u, 2u}) {
-      if ((cc.max()[i] - cc.min()[i]) != 0) {
-        assert(m_grid_dimensions[i] %
-                   static_cast<int>(cc.max()[i] - cc.min()[i]) ==
-               0);
-      }
-    }
-    int64_t min_index = stride_x * static_cast<int>(cc.min()[0]) +
-                        stride_y * static_cast<int>(cc.min()[1]) +
-                        static_cast<int>(cc.min()[2]);
-    int64_t max_index = stride_x * static_cast<int>(cc.max()[0]) +
-                        stride_y * static_cast<int>(cc.max()[1]) +
-                        static_cast<int>(cc.max()[2]);
-    if (min_index < aa_index) {
-      aa = cc;
-      aa_index = min_index;
-    }
-    if (max_index > bb_index) {
-      bb = cc;
-      bb_index = max_index;
+      aa[i] = std::min(aa[i], cc.min()[i]);
+      bb[i] = std::max(bb[i], cc.max()[i]);
     }
   }
-  return {to_vector3d(aa.min()), to_vector3d(bb.max())};
+  return {aa, bb};
 }
 
 [[nodiscard]] bool