diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 98a0ba8a..3d8c352a 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -15,7 +15,7 @@ jobs: cxx: ['g++', 'clang++'] backend: ['SERIAL', 'OPENMP'] cmake_build_type: ['Debug', 'Release'] - kokkos_ver: ['3.7.02'] + kokkos_ver: ['4.3.01'] arborx: ['ArborX', 'NoArborX'] nnp: ['ON', 'OFF'] layout: ['1'] @@ -27,7 +27,7 @@ jobs: cxx: 'g++' openmp: 'ON' cmake_build_type: 'Release' - kokkos_ver: '3.7.02' + kokkos_ver: '4.3.01' arborx: 'NoArborX' nnp: 'OFF' layout: '2' @@ -38,7 +38,7 @@ jobs: cxx: 'g++' openmp: 'ON' cmake_build_type: 'Release' - kokkos_ver: '3.7.02' + kokkos_ver: '4.3.01' arborx: 'NoArborX' nnp: 'ON' layout: '6' @@ -49,7 +49,7 @@ jobs: cxx: 'g++' openmp: 'ON' cmake_build_type: 'Release' - kokkos_ver: '3.7.02' + kokkos_ver: '4.3.01' arborx: 'NoArborX' nnp: 'OFF' layout: '6' @@ -94,6 +94,15 @@ jobs: -DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} cmake --build build --parallel 2 cmake --install build + - name: Checkout ALL + run: | + git clone --depth 1 --branch master https://gitlab.jsc.fz-juelich.de/SLMS/loadbalancing ALL + - name: Build ALL + working-directory: ALL + run: | + cmake -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$HOME/ALL + cmake --build build --parallel 2 + cmake --install build - name: Checkout Cabana uses: actions/checkout@v3 with: @@ -106,9 +115,8 @@ jobs: cmake -B build \ -DCMAKE_INSTALL_PREFIX=$HOME/Cabana \ -DMPIEXEC_MAX_NUMPROCS=2 -DMPIEXEC_PREFLAGS="--oversubscribe" \ - -DCMAKE_PREFIX_PATH="$HOME/kokkos;$HOME/arborx" \ + -DCMAKE_PREFIX_PATH="$HOME/kokkos;$HOME/arborx;$HOME/ALL" \ -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} \ - -DCabana_DISABLE_CAJITA_DEPRECATION_WARNINGS=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} cmake --build build --parallel 2 cmake --install build @@ -140,6 +148,7 @@ jobs: -DCabanaMD_LAYOUT=${{ matrix.layout }} \ -DCabanaMD_VECTORLENGTH=${{ matrix.vector }} \ -DCabanaMD_ENABLE_NNP=${{ matrix.nnp }} \ + -DCabanaMD_ENABLE_LB=ON \ -DN2P2_DIR=$HOME/n2p2 \ -DCabanaMD_LAYOUT_NNP=${{ matrix.layout_nnp }} \ -DCabanaMD_VECTORLENGTH_NNP=${{ matrix.vector_nnp }} diff --git a/CMakeLists.txt b/CMakeLists.txt index d9f8ad2d..8ad0e9f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.11) +cmake_minimum_required(VERSION 3.14) project(CabanaMD LANGUAGES CXX VERSION 0.1.0) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -18,6 +18,8 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) ##---------------------------------------------------------------------------## find_package(Cabana REQUIRED) +option(CabanaMD_ENABLE_LB "Utilize Cabana load balancer" OFF) + ##---------------------------------------------------------------------------## # Set up optional libraries ##---------------------------------------------------------------------------## diff --git a/input/in.lj b/input/in.lj index 9b5664ac..20198ebc 100644 --- a/input/in.lj +++ b/input/in.lj @@ -17,7 +17,10 @@ pair_coeff 1 1 1.0 1.0 2.5 neighbor 0.3 bin neigh_modify every 20 one 50 +comm_modify cutoff * 20 fix 1 all nve thermo 10 +dump dmpvtk all vtk 10 dump%_*.vtu + run 100 diff --git a/src/CabanaMD_config.hpp.cmakein b/src/CabanaMD_config.hpp.cmakein index f14e285e..a12cec77 100644 --- a/src/CabanaMD_config.hpp.cmakein +++ b/src/CabanaMD_config.hpp.cmakein @@ -5,6 +5,7 @@ #define CabanaMD_GIT_COMMIT_HASH "@CabanaMD_GIT_COMMIT_HASH@" #cmakedefine CabanaMD_ENABLE_NNP +#cmakedefine CabanaMD_ENABLE_LB #cmakedefine CabanaMD_LAYOUT @CabanaMD_LAYOUT@ #cmakedefine CabanaMD_VECTORLENGTH "@CabanaMD_VECTORLENGTH@" diff --git a/src/cabanamd.h b/src/cabanamd.h index bfb4582f..d5d62224 100644 --- a/src/cabanamd.h +++ b/src/cabanamd.h @@ -57,6 +57,11 @@ #include #include +#ifdef CabanaMD_ENABLE_LB +#include +#include +#endif + class CabanaMD { public: @@ -81,6 +86,10 @@ class CbnMD : public CabanaMD Comm *comm; Binning *binning; InputFile *input; +#ifdef CabanaMD_ENABLE_LB + Cabana::Grid::Experimental::LoadBalancer> + *lb; +#endif void init( InputCL cl ) override; void run() override; diff --git a/src/cabanamd_impl.h b/src/cabanamd_impl.h index a2c743da..8b047ab6 100644 --- a/src/cabanamd_impl.h +++ b/src/cabanamd_impl.h @@ -51,15 +51,19 @@ #include #include +#include + #include #include #include #include #include +#include #include #include #include +#include #define MAXPATHLEN 1024 @@ -224,6 +228,12 @@ void CbnMD::init( InputCL commandline ) comm->update_force(); } +#ifdef CabanaMD_ENABLE_LB + lb = new Cabana::Grid::Experimental::LoadBalancer< + Cabana::Grid::UniformMesh>( MPI_COMM_WORLD, + system->global_grid ); +#endif + // Initial output int step = 0; if ( input->thermo_rate > 0 ) @@ -236,10 +246,19 @@ void CbnMD::init( InputCL commandline ) auto KE = kine.compute( system ) / system->N; if ( !_print_lammps ) { +#ifdef CabanaMD_ENABLE_LB + log( out, "\n", std::fixed, std::setprecision( 6 ), + "#Timestep Temperature PotE ETot Time Atomsteps/s " + "LBImbalance\n", + step, " ", T, " ", PE, " ", PE + KE, " ", + std::setprecision( 2 ), 0.0, " ", std::scientific, 0.0, " ", + std::setprecision( 2 ), 0.0 ); +#else log( out, "\n", std::fixed, std::setprecision( 6 ), "#Timestep Temperature PotE ETot Time Atomsteps/s\n", step, " ", T, " ", PE, " ", PE + KE, " ", std::setprecision( 2 ), 0.0, " ", std::scientific, 0.0 ); +#endif } else { @@ -264,6 +283,7 @@ template void CbnMD::run() { std::ofstream out( input->output_file, std::ofstream::app ); + std::ofstream err( input->error_file, std::ofstream::app ); auto neigh_cutoff = input->force_cutoff + input->neighbor_skin; bool half_neigh = input->force_iteration_type == FORCE_ITER_NEIGH_HALF; @@ -272,15 +292,19 @@ void CbnMD::run() PotE pote( comm ); KinE kine( comm ); + std::string vtk_actual_domain_basename( "domain_act" ); + std::string vtk_lb_domain_basename( "domain_lb" ); + double force_time = 0; double comm_time = 0; double neigh_time = 0; double integrate_time = 0; + double lb_time = 0; double other_time = 0; double last_time = 0; Kokkos::Timer timer, force_timer, comm_timer, neigh_timer, integrate_timer, - other_timer; + lb_timer, other_timer; // Main timestep loop for ( int step = 1; step <= nsteps; step++ ) @@ -292,6 +316,16 @@ void CbnMD::run() if ( step % input->comm_exchange_rate == 0 && step > 0 ) { + // Update domain decomposition + lb_timer.reset(); +#ifdef CabanaMD_ENABLE_LB + double work = system->N_local + system->N_ghost; + auto new_global_grid = lb->createBalancedGlobalGrid( + system->global_mesh, *system->partitioner, work ); + system->update_global_grid( new_global_grid ); +#endif + lb_time += lb_timer.seconds(); + // Exchange atoms across MPI ranks comm_timer.reset(); comm->exchange(); @@ -361,9 +395,16 @@ void CbnMD::run() double time = timer.seconds(); double rate = 1.0 * system->N * input->thermo_rate / ( time - last_time ); +#ifdef CabanaMD_ENABLE_LB + log( out, std::fixed, std::setprecision( 6 ), step, " ", T, " ", + PE, " ", PE + KE, " ", std::setprecision( 2 ), time, " ", + std::scientific, rate, " ", std::setprecision( 2 ), + lb->getImbalance() ); +#else log( out, std::fixed, std::setprecision( 6 ), step, " ", T, " ", PE, " ", PE + KE, " ", std::setprecision( 2 ), time, " ", std::scientific, rate ); +#endif last_time = time; } else @@ -373,8 +414,22 @@ void CbnMD::run() " ", T, " ", PE, " ", PE + KE, " ", time ); last_time = time; } +#ifdef CabanaMD_ENABLE_LB + double work = system->N_local + system->N_ghost; + std::array vertices; + vertices = lb->getVertices(); + VTKWriter::writeDomain( MPI_COMM_WORLD, step, vertices, work, + vtk_actual_domain_basename ); + vertices = lb->getInternalVertices(); + VTKWriter::writeDomain( MPI_COMM_WORLD, step, vertices, work, + vtk_lb_domain_basename ); +#endif } + if ( step % input->vtk_rate == 0 ) + VTKWriter::writeParticles( MPI_COMM_WORLD, step, system, + input->vtk_file, err ); + if ( input->dumpbinaryflag ) dump_binary( step ); @@ -391,16 +446,18 @@ void CbnMD::run() { double steps_per_sec = 1.0 * nsteps / time; double atom_steps_per_sec = system->N * steps_per_sec; + // todo(sschulz): Properly remove lb timing if not enabled. log( out, std::fixed, std::setprecision( 2 ), - "\n#Procs Atoms | Time T_Force T_Neigh T_Comm T_Int ", + "\n#Procs Atoms | Time T_Force T_Neigh T_Comm T_Int T_lb ", "T_Other |\n", comm->num_processes(), " ", system->N, " | ", time, " ", force_time, " ", neigh_time, " ", comm_time, " ", - integrate_time, " ", other_time, " | PERFORMANCE\n", std::fixed, - comm->num_processes(), " ", system->N, " | ", 1.0, " ", + integrate_time, " ", lb_time, " ", other_time, " | PERFORMANCE\n", + std::fixed, comm->num_processes(), " ", system->N, " | ", 1.0, " ", force_time / time, " ", neigh_time / time, " ", comm_time / time, - " ", integrate_time / time, " ", other_time / time, - " | FRACTION\n\n", "#Steps/s Atomsteps/s Atomsteps/(proc*s)\n", - std::scientific, steps_per_sec, " ", atom_steps_per_sec, " ", + " ", integrate_time / time, " ", lb_time / time, " ", + other_time / time, " | FRACTION\n\n", + "#Steps/s Atomsteps/s Atomsteps/(proc*s)\n", std::scientific, + steps_per_sec, " ", atom_steps_per_sec, " ", atom_steps_per_sec / comm->num_processes() ); } else @@ -409,6 +466,7 @@ void CbnMD::run() " procs for ", nsteps, " steps with ", system->N, " atoms" ); } out.close(); + err.close(); if ( input->write_data_flag ) write_data( system, input->output_data_file ); diff --git a/src/comm_mpi_impl.h b/src/comm_mpi_impl.h index bb7a9243..0a1983e3 100644 --- a/src/comm_mpi_impl.h +++ b/src/comm_mpi_impl.h @@ -230,7 +230,7 @@ void Comm::exchange() // resized as well if ( pack_ranks_migrate_all.extent( 0 ) < x.size() ) { - max_local *= 1.1; + max_local = x.size() * 1.1; Kokkos::realloc( pack_ranks_migrate_all, max_local ); } pack_ranks_migrate = diff --git a/src/inputCL.cpp b/src/inputCL.cpp index b975b291..df96366f 100644 --- a/src/inputCL.cpp +++ b/src/inputCL.cpp @@ -116,6 +116,8 @@ void InputCL::read_args( int argc, char *argv[] ) " (N = positive integer)\n", " (PATH = location of ", "directory)\n" ); + log( std::cout, " --sparse : " + "create a vacuum for an unbalanced system\n" ); } // Read Lammps input deck @@ -233,6 +235,11 @@ void InputCL::read_args( int argc, char *argv[] ) i += 3; } + else if ( ( strcmp( argv[i], "--sparse" ) == 0 ) ) + { + sparse = true; + } + else if ( ( strstr( argv[i], "--kokkos-" ) == NULL ) ) { log_err( std::cout, "Unknown command line argument: ", argv[i] ); diff --git a/src/inputCL.h b/src/inputCL.h index c39d3c0b..15a2c64f 100644 --- a/src/inputCL.h +++ b/src/inputCL.h @@ -63,6 +63,7 @@ class InputCL int layout_type; int nnp_layout_type; int device_type; + bool sparse = false; int dumpbinary_rate, correctness_rate; bool dumpbinaryflag, correctnessflag; diff --git a/src/inputFile.h b/src/inputFile.h index 16e90192..d70cd546 100644 --- a/src/inputFile.h +++ b/src/inputFile.h @@ -58,6 +58,7 @@ #include #include +#include #include #include @@ -149,18 +150,20 @@ template class InputFile { private: - bool timestepflag; // input timestep? + bool timestepflag = false; // input timestep? public: InputCL commandline; t_System *system; bool _print_rank; - int units_style; - int lattice_style; - double lattice_constant, lattice_offset_x, lattice_offset_y, - lattice_offset_z; + + // defaults match ExaMiniMD LJ example + int units_style = UNITS_LJ; + int lattice_style = LATTICE_FCC; + double lattice_constant = 0.8442, lattice_offset_x = 0.0, + lattice_offset_y = 0.0, lattice_offset_z = 0.0; int lattice_nx, lattice_ny, lattice_nz; - int box[6]; + std::array box = { 0, 40, 0, 40, 0, 40 }; char *data_file; int data_file_type; @@ -168,38 +171,42 @@ class InputFile std::string output_file; std::string error_file; - double temperature_target; - int temperature_seed; + double temperature_target = 1.4; + int temperature_seed = 87287; - int integrator_type; - int nsteps; + int integrator_type = INTEGRATOR_NVE; + int nsteps = 100; - int binning_type; + int binning_type = BINNING_LINKEDCELL; - int comm_type; - int comm_exchange_rate; + int comm_type = COMM_MPI; + int comm_exchange_rate = 20; + double comm_ghost_cutoff; - int force_type; + int force_type = FORCE_LJ; int force_iteration_type; int force_neigh_parallel_type; - T_F_FLOAT force_cutoff; + T_F_FLOAT force_cutoff = 2.5; std::vector> force_coeff_lines; - T_F_FLOAT neighbor_skin; - int neighbor_type; - T_INT max_neigh_guess; + T_F_FLOAT neighbor_skin = 0.0; + int neighbor_type = NEIGH_VERLET_2D; + T_INT max_neigh_guess = 50; int layout_type; int nnp_layout_type; - int thermo_rate, dumpbinary_rate, correctness_rate; - bool dumpbinaryflag, correctnessflag; + int thermo_rate = 10, dumpbinary_rate = 0, correctness_rate = 0; + bool dumpbinaryflag = false, correctnessflag = false; char *dumpbinary_path, *reference_path, *correctness_file; std::string input_data_file; std::string output_data_file; bool read_data_flag = false; bool write_data_flag = false; + bool write_vtk_flag = false; + int vtk_rate; + std::string vtk_file; InputFile( InputCL cl, t_System *s ); void read_file( const char *filename = NULL ); diff --git a/src/inputFile_impl.h b/src/inputFile_impl.h index f4edc440..9213a716 100644 --- a/src/inputFile_impl.h +++ b/src/inputFile_impl.h @@ -49,6 +49,7 @@ #include #include +#include #include #include @@ -71,12 +72,6 @@ InputFile::InputFile( InputCL commandline_, t_System *system_ ) : commandline( commandline_ ) , system( system_ ) { - comm_type = COMM_MPI; - integrator_type = INTEGRATOR_NVE; - neighbor_type = NEIGH_VERLET_2D; - force_type = FORCE_LJ; - binning_type = BINNING_LINKEDCELL; - neighbor_type = commandline.neighbor_type; force_iteration_type = commandline.force_iteration_type; force_neigh_parallel_type = commandline.force_neigh_parallel_type; @@ -84,43 +79,8 @@ InputFile::InputFile( InputCL commandline_, t_System *system_ ) output_file = commandline.output_file; error_file = commandline.error_file; - // set defaults (matches ExaMiniMD LJ example) - - nsteps = 0; - - thermo_rate = 0; - dumpbinary_rate = 0; - correctness_rate = 0; - dumpbinaryflag = false; - correctnessflag = false; - timestepflag = false; - - lattice_offset_x = 0.0; - lattice_offset_y = 0.0; - lattice_offset_z = 0.0; - box[0] = 0; - box[2] = 0; - box[4] = 0; - box[1] = 40; - box[3] = 40; - box[5] = 40; - - units_style = UNITS_LJ; - lattice_style = LATTICE_FCC; - lattice_constant = 0.8442; - - temperature_target = 1.4; - temperature_seed = 87287; - - nsteps = 100; - thermo_rate = 10; - - neighbor_skin = 0.3; - neighbor_skin = 0.0; // for metal and real units - max_neigh_guess = 50; - comm_exchange_rate = 20; - - force_cutoff = 2.5; + comm_ghost_cutoff = std::pow( ( 4.0 / lattice_constant ), ( 1.0 / 3.0 ) ) * + 20.0; // 20 lattice constants } template @@ -335,6 +295,38 @@ void InputFile::check_lammps_command( std::string line, write_data_flag = true; output_data_file = words.at( 1 ); } + if ( keyword.compare( "dump" ) == 0 ) + { + if ( words.at( 3 ).compare( "vtk" ) == 0 ) + { + known = true; + write_vtk_flag = true; + vtk_file = words.at( 5 ); + vtk_rate = std::stod( words.at( 4 ) ); + if ( words.at( 2 ).compare( "all" ) != 0 ) + { + log_err( err, "LAMMPS-Command: 'dump' command only supports " + "dumping 'all' types in CabanaMD" ); + } + size_t pos = 0; + pos = vtk_file.find( "*", pos ); + if ( std::string::npos == pos ) + log_err( err, + "LAMMPS-Command: 'dump' requires '*' in file name, so " + "it can be replaced by the time step in CabanaMD" ); + pos = 0; + pos = vtk_file.find( "%", pos ); + if ( std::string::npos == pos ) + log_err( err, + "LAMMPS-Command: 'dump' requires '%' in file name, so " + "it can be replaced by the rank in CabanaMD" ); + } + else + { + log_err( err, "LAMMPS-Command: 'dump' command only supports 'vtk' " + "in CabanaMD" ); + } + } if ( keyword.compare( "pair_style" ) == 0 ) { if ( words.at( 1 ).compare( "lj/cut" ) == 0 ) @@ -416,6 +408,27 @@ void InputFile::check_lammps_command( std::string line, } } } + if ( keyword.compare( "comm_modify" ) == 0 ) + { + if ( words.at( 1 ).compare( "cutoff" ) == 0 ) + { + if ( words.at( 2 ).compare( "*" ) == 0 ) + { + known = true; + comm_ghost_cutoff = std::stod( words.at( 3 ) ); + } + else + { + log_err( err, "LAMMPS-Command: 'comm_modify' command only " + "supported for all atom types '*' in CabanaMD" ); + } + } + else + { + log_err( err, "LAMMPS-Command: 'comm_modify' command only supports " + "single cutoff 'cutoff' in CabanaMD" ); + } + } if ( keyword.compare( "fix" ) == 0 ) { if ( words.at( 3 ).compare( "nve" ) == 0 ) @@ -499,7 +512,14 @@ void InputFile::create_lattice( Comm *comm ) T_X_FLOAT max_z = lattice_constant * lattice_nz; std::array global_low = { 0.0, 0.0, 0.0 }; std::array global_high = { max_x, max_y, max_z }; - system->create_domain( global_low, global_high ); + if ( commandline.sparse ) + { + // Create a vacuum for an unbalanced system. + global_high[0] *= 2; + global_high[1] *= 2; + global_high[2] *= 2; + } + system->create_domain( global_low, global_high, comm_ghost_cutoff ); s = *system; auto local_mesh_lo_x = s.local_mesh_lo_x; @@ -509,12 +529,27 @@ void InputFile::create_lattice( Comm *comm ) auto local_mesh_hi_y = s.local_mesh_hi_y; auto local_mesh_hi_z = s.local_mesh_hi_z; - T_INT ix_start = local_mesh_lo_x / s.global_mesh_x * lattice_nx - 0.5; - T_INT iy_start = local_mesh_lo_y / s.global_mesh_y * lattice_ny - 0.5; - T_INT iz_start = local_mesh_lo_z / s.global_mesh_z * lattice_nz - 0.5; - T_INT ix_end = local_mesh_hi_x / s.global_mesh_x * lattice_nx + 0.5; - T_INT iy_end = local_mesh_hi_y / s.global_mesh_y * lattice_ny + 0.5; - T_INT iz_end = local_mesh_hi_z / s.global_mesh_z * lattice_nz + 0.5; + T_INT ix_start = local_mesh_lo_x / lattice_constant - 0.5; + T_INT iy_start = local_mesh_lo_y / lattice_constant - 0.5; + T_INT iz_start = local_mesh_lo_z / lattice_constant - 0.5; + T_INT ix_end = + std::max( std::min( static_cast( lattice_nx ), + local_mesh_hi_x / lattice_constant + 0.5 ), + static_cast( ix_start ) ); + T_INT iy_end = + std::max( std::min( static_cast( lattice_ny ), + local_mesh_hi_y / lattice_constant + 0.5 ), + static_cast( iy_start ) ); + T_INT iz_end = + std::max( std::min( static_cast( lattice_nz ), + local_mesh_hi_z / lattice_constant + 0.5 ), + static_cast( iz_start ) ); + if ( ix_start == ix_end ) + ix_end -= 1; + if ( iy_start == iy_end ) + iy_end -= 1; + if ( iz_start == iz_end ) + iz_end -= 1; // Create Simple Cubic Lattice if ( lattice_style == LATTICE_SC ) @@ -535,7 +570,8 @@ void InputFile::create_lattice( Comm *comm ) ( ztmp >= local_mesh_lo_z ) && ( xtmp < local_mesh_hi_x ) && ( ytmp < local_mesh_hi_y ) && - ( ztmp < local_mesh_hi_z ) ) + ( ztmp < local_mesh_hi_z ) && ( xtmp < max_x ) && + ( ytmp < max_y ) && ( ztmp < max_z ) ) { n++; } @@ -568,7 +604,8 @@ void InputFile::create_lattice( Comm *comm ) ( ztmp >= local_mesh_lo_z ) && ( xtmp < local_mesh_hi_x ) && ( ytmp < local_mesh_hi_y ) && - ( ztmp < local_mesh_hi_z ) ) + ( ztmp < local_mesh_hi_z ) && ( xtmp < max_x ) && + ( ytmp < max_y ) && ( ztmp < max_z ) ) { x( n, 0 ) = xtmp; x( n, 1 ) = ytmp; @@ -634,7 +671,8 @@ void InputFile::create_lattice( Comm *comm ) ( ztmp >= local_mesh_lo_z ) && ( xtmp < local_mesh_hi_x ) && ( ytmp < local_mesh_hi_y ) && - ( ztmp < local_mesh_hi_z ) ) + ( ztmp < local_mesh_hi_z ) && ( xtmp < max_x ) && + ( ytmp < max_y ) && ( ztmp < max_z ) ) { n++; } @@ -674,7 +712,8 @@ void InputFile::create_lattice( Comm *comm ) ( ztmp >= local_mesh_lo_z ) && ( xtmp < local_mesh_hi_x ) && ( ytmp < local_mesh_hi_y ) && - ( ztmp < local_mesh_hi_z ) ) + ( ztmp < local_mesh_hi_z ) && ( xtmp < max_x ) && + ( ytmp < max_y ) && ( ztmp < max_z ) ) { h_x( n, 0 ) = xtmp; h_x( n, 1 ) = ytmp; diff --git a/src/integrator_nve_impl.h b/src/integrator_nve_impl.h index 564f4fd2..0047456f 100644 --- a/src/integrator_nve_impl.h +++ b/src/integrator_nve_impl.h @@ -64,10 +64,8 @@ void Integrator::initial_integrate( t_System *system ) f = system->f; type = system->type; - static int step = 1; Kokkos::parallel_for( "IntegratorNVE::initial_integrate", t_policy_initial( 0, system->N_local ), *this ); - step++; } template @@ -80,10 +78,8 @@ void Integrator::final_integrate( t_System *system ) f = system->f; type = system->type; - static int step = 1; Kokkos::parallel_for( "IntegratorNVE::final_integrate", t_policy_final( 0, system->N_local ), *this ); - step++; } template diff --git a/src/system.h b/src/system.h index 5185f1f7..9a812856 100644 --- a/src/system.h +++ b/src/system.h @@ -55,7 +55,11 @@ #include +#include +#include +#include #include +#include #include template @@ -90,9 +94,17 @@ class SystemCommon T_X_FLOAT local_mesh_hi_x, local_mesh_hi_y, local_mesh_hi_z; T_X_FLOAT ghost_mesh_lo_x, ghost_mesh_lo_y, ghost_mesh_lo_z; T_X_FLOAT ghost_mesh_hi_x, ghost_mesh_hi_y, ghost_mesh_hi_z; + T_X_FLOAT halo_width; + std::shared_ptr> partitioner; + std::shared_ptr< + Cabana::Grid::GlobalMesh>> + global_mesh; std::shared_ptr< Cabana::Grid::LocalGrid>> local_grid; + std::shared_ptr< + Cabana::Grid::GlobalGrid>> + global_grid; // Only needed for current comm std::array ranks_per_dim; @@ -129,13 +141,32 @@ class SystemCommon void create_domain( std::array low_corner, std::array high_corner ) { + double ghost_cutoff = + std::max( std::max( high_corner[0] - low_corner[0], + high_corner[2] - low_corner[1] ), + high_corner[2] - low_corner[2] ); + create_domain( low_corner, high_corner, ghost_cutoff ); + } + void create_domain( std::array low_corner, + std::array high_corner, double ghost_cutoff ) + { + halo_width = ghost_cutoff; // Create the MPI partitions. - Cabana::Grid::DimBlockPartitioner<3> partitioner; - ranks_per_dim = partitioner.ranksPerDimension( MPI_COMM_WORLD, {} ); + partitioner = std::make_shared>(); + ranks_per_dim = partitioner->ranksPerDimension( MPI_COMM_WORLD, {} ); + int cells_per_dim_per_rank = 1; + + // The load balancing will be able to change the local domains with a + // resolution of 1/cells_per_dim_per_rank + cells_per_dim_per_rank = 100; + std::array cells_per_rank = { + cells_per_dim_per_rank * ranks_per_dim[0], + cells_per_dim_per_rank * ranks_per_dim[1], + cells_per_dim_per_rank * ranks_per_dim[2] }; // Create global mesh of MPI partitions. - auto global_mesh = Cabana::Grid::createUniformGlobalMesh( - low_corner, high_corner, ranks_per_dim ); + global_mesh = Cabana::Grid::createUniformGlobalMesh( + low_corner, high_corner, cells_per_rank ); global_mesh_x = global_mesh->extent( 0 ); global_mesh_y = global_mesh->extent( 1 ); @@ -143,8 +174,8 @@ class SystemCommon // Create the global grid. std::array is_periodic = { true, true, true }; - auto global_grid = Cabana::Grid::createGlobalGrid( - MPI_COMM_WORLD, global_mesh, is_periodic, partitioner ); + global_grid = Cabana::Grid::createGlobalGrid( + MPI_COMM_WORLD, global_mesh, is_periodic, *partitioner ); for ( int d = 0; d < 3; d++ ) { @@ -152,26 +183,26 @@ class SystemCommon } // Create a local mesh - int halo_width = 1; - local_grid = Cabana::Grid::createLocalGrid( global_grid, halo_width ); - auto local_mesh = - Cabana::Grid::createLocalMesh( *local_grid ); + double minimum_cell_size = std::min( + std::min( global_mesh->cellSize( 0 ), global_mesh->cellSize( 1 ) ), + global_mesh->cellSize( 2 ) ); + halo_width = std::ceil( ghost_cutoff / minimum_cell_size ); + // Update local_mesh_* and ghost_mesh_* info as well as create + // local_grid. + update_global_grid( global_grid ); + } - local_mesh_lo_x = local_mesh.lowCorner( Cabana::Grid::Own(), 0 ); - local_mesh_lo_y = local_mesh.lowCorner( Cabana::Grid::Own(), 1 ); - local_mesh_lo_z = local_mesh.lowCorner( Cabana::Grid::Own(), 2 ); - local_mesh_hi_x = local_mesh.highCorner( Cabana::Grid::Own(), 0 ); - local_mesh_hi_y = local_mesh.highCorner( Cabana::Grid::Own(), 1 ); - local_mesh_hi_z = local_mesh.highCorner( Cabana::Grid::Own(), 2 ); - ghost_mesh_lo_x = local_mesh.lowCorner( Cabana::Grid::Ghost(), 0 ); - ghost_mesh_lo_y = local_mesh.lowCorner( Cabana::Grid::Ghost(), 1 ); - ghost_mesh_lo_z = local_mesh.lowCorner( Cabana::Grid::Ghost(), 2 ); - ghost_mesh_hi_x = local_mesh.highCorner( Cabana::Grid::Ghost(), 0 ); - ghost_mesh_hi_y = local_mesh.highCorner( Cabana::Grid::Ghost(), 1 ); - ghost_mesh_hi_z = local_mesh.highCorner( Cabana::Grid::Ghost(), 2 ); - local_mesh_x = local_mesh.extent( Cabana::Grid::Own(), 0 ); - local_mesh_y = local_mesh.extent( Cabana::Grid::Own(), 1 ); - local_mesh_z = local_mesh.extent( Cabana::Grid::Own(), 2 ); + // Update domain info according to new global grid. We assume that the + // number of ranks (per dim) does not change. We also assume that the + // position of this rank in the cartesian grid of ranks does not change. + void update_global_grid( + const std::shared_ptr< + Cabana::Grid::GlobalGrid>> + &new_global_grid ) + { + global_grid = new_global_grid; + local_grid = Cabana::Grid::createLocalGrid( global_grid, halo_width ); + update_mesh_info(); } void slice_all() @@ -215,6 +246,30 @@ class SystemCommon migrate( std::shared_ptr> distributor ) = 0; virtual void gather( std::shared_ptr> halo ) = 0; virtual const char *name() { return "SystemNone"; } + + private: + // Update local_mesh_* and ghost_mesh* info from global grid + void update_mesh_info() + { + auto local_mesh = + Cabana::Grid::createLocalMesh( *local_grid ); + + local_mesh_lo_x = local_mesh.lowCorner( Cabana::Grid::Own(), 0 ); + local_mesh_lo_y = local_mesh.lowCorner( Cabana::Grid::Own(), 1 ); + local_mesh_lo_z = local_mesh.lowCorner( Cabana::Grid::Own(), 2 ); + local_mesh_hi_x = local_mesh.highCorner( Cabana::Grid::Own(), 0 ); + local_mesh_hi_y = local_mesh.highCorner( Cabana::Grid::Own(), 1 ); + local_mesh_hi_z = local_mesh.highCorner( Cabana::Grid::Own(), 2 ); + ghost_mesh_lo_x = local_mesh.lowCorner( Cabana::Grid::Ghost(), 0 ); + ghost_mesh_lo_y = local_mesh.lowCorner( Cabana::Grid::Ghost(), 1 ); + ghost_mesh_lo_z = local_mesh.lowCorner( Cabana::Grid::Ghost(), 2 ); + ghost_mesh_hi_x = local_mesh.highCorner( Cabana::Grid::Ghost(), 0 ); + ghost_mesh_hi_y = local_mesh.highCorner( Cabana::Grid::Ghost(), 1 ); + ghost_mesh_hi_z = local_mesh.highCorner( Cabana::Grid::Ghost(), 2 ); + local_mesh_x = local_mesh.extent( Cabana::Grid::Own(), 0 ); + local_mesh_y = local_mesh.extent( Cabana::Grid::Own(), 1 ); + local_mesh_z = local_mesh.extent( Cabana::Grid::Own(), 2 ); + } }; template diff --git a/src/vtk_writer.h b/src/vtk_writer.h new file mode 100644 index 00000000..88d54a6b --- /dev/null +++ b/src/vtk_writer.h @@ -0,0 +1,297 @@ +/**************************************************************************** + * Copyright (c) 2018-2021 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef VTK_DOMAIN_WRITER_H +#define VTK_DOMAIN_WRITER_H +#include + +#include +#include +#include + +namespace +{ + +std::string set_width( const int value, const unsigned width = 3 ) +{ + std::ostringstream oss; + oss << std::setw( width ) << std::setfill( '0' ) << value; + return oss.str(); +} + +} // namespace + +namespace VTKWriter +{ +// Write PVTU +void writeDomainParallelFile( MPI_Comm comm, const std::string &time_step, + std::string &basename ) +{ + // Should only be called from a single rank + int size; + MPI_Comm_size( comm, &size ); + std::stringstream filename; + filename << basename << "_" << time_step << ".pvtu"; + FILE *file = fopen( filename.str().c_str(), "w" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "\t\n" ); + for ( int i = 0; i < size; ++i ) + fprintf( file, "\t\n", basename.c_str(), + i, time_step.c_str() ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fclose( file ); +} + +// Write VTU for domain (low corner, high corner) +// basename will be appended with the corresponding time step, rank and +// extension +void writeDomain( MPI_Comm comm, int time_step, + std::array &domain_vertices, double work, + std::string &basename ) +{ + int rank; + MPI_Comm_rank( comm, &rank ); + + auto time_step_fixed = set_width( time_step ); + if ( rank == 1 ) + writeDomainParallelFile( comm, time_step_fixed, basename ); + std::stringstream filename; + // todo(sschulz): properly format, according to max rank + filename << basename << "_" << rank << "_" << time_step_fixed << ".vtu"; + FILE *file = fopen( filename.str().c_str(), "w" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + std::array vertices; + vertices[0 * 3 + 0] = domain_vertices[0]; + vertices[2 * 3 + 0] = domain_vertices[0]; + vertices[4 * 3 + 0] = domain_vertices[0]; + vertices[6 * 3 + 0] = domain_vertices[0]; + vertices[0 * 3 + 1] = domain_vertices[1]; + vertices[1 * 3 + 1] = domain_vertices[1]; + vertices[4 * 3 + 1] = domain_vertices[1]; + vertices[5 * 3 + 1] = domain_vertices[1]; + vertices[0 * 3 + 2] = domain_vertices[2]; + vertices[1 * 3 + 2] = domain_vertices[2]; + vertices[2 * 3 + 2] = domain_vertices[2]; + vertices[3 * 3 + 2] = domain_vertices[2]; + vertices[1 * 3 + 0] = domain_vertices[3]; + vertices[3 * 3 + 0] = domain_vertices[3]; + vertices[5 * 3 + 0] = domain_vertices[3]; + vertices[7 * 3 + 0] = domain_vertices[3]; + vertices[2 * 3 + 1] = domain_vertices[4]; + vertices[3 * 3 + 1] = domain_vertices[4]; + vertices[6 * 3 + 1] = domain_vertices[4]; + vertices[7 * 3 + 1] = domain_vertices[4]; + vertices[4 * 3 + 2] = domain_vertices[5]; + vertices[5 * 3 + 2] = domain_vertices[5]; + vertices[6 * 3 + 2] = domain_vertices[5]; + vertices[7 * 3 + 2] = domain_vertices[5]; + std::array connectivity = { 0, 1, 2, 3, 4, 5, 6, 7 }; + fprintf( file, "\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "%d", rank ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "%g", work ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + for ( const double &vert : vertices ) + fprintf( file, "%g ", vert ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + for ( const int &conn : connectivity ) + fprintf( file, "%d ", conn ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "8\n" ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "11\n" ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fclose( file ); +} + +void writeParticlesParallelFile( MPI_Comm comm, const std::string &time_step, + std::string filename ) +{ + // Should only be called from a single rank + int size; + MPI_Comm_size( comm, &size ); + // Prepare actual filename + // todo(sschulz): Also separate filename construction into function + size_t pos = 0; + pos = filename.find( "*", pos ); + filename.replace( pos, 1, time_step ); + std::string parallel_filename( filename ); + pos = 0; + pos = parallel_filename.find( "%", pos ); + parallel_filename.erase( pos, 1 ); + pos = 0; + pos = parallel_filename.find( ".vtu", pos ); + parallel_filename.replace( pos, 4, ".pvtu" ); + FILE *file = fopen( parallel_filename.c_str(), "w" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "\t\n" ); + for ( int i = 0; i < size; ++i ) + { + std::string piece_filename( filename ); + pos = 0; + pos = piece_filename.find( "%", pos ); + std::stringstream rank_string; + rank_string << "_" << i; + piece_filename.replace( pos, 1, rank_string.str() ); + fprintf( file, "\t\n", piece_filename.c_str() ); + } + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fclose( file ); +} + +// Write particles to vtu file +// filename must contain * and % which will be replaced by time step and _rank. +// The filename dump_*%.vtu will create the file dump_043_325.vtu in time +// step 43 on rank 325. +template +void writeParticles( MPI_Comm comm, const int time_step, t_System *system, + std::string filename, std::ofstream &err ) +{ + int rank; + MPI_Comm_rank( comm, &rank ); + auto time_step_fixed = set_width( time_step ); + // Write parallel file + if ( rank == 1 ) + writeParticlesParallelFile( comm, time_step_fixed, filename ); + // Prepare actual filename + // todo(sschulz): Separate filename construction into function + size_t pos = 0; + pos = filename.find( "*", pos ); + if ( std::string::npos == pos ) + log_err( err, "VTK output file does not contain required '*'" ); + filename.replace( pos, 1, time_step_fixed ); + pos = 0; + pos = filename.find( "%", pos ); + if ( std::string::npos == pos ) + log_err( err, "VTK output file does not contain required '%'" ); + std::stringstream rank_string; + rank_string << "_" << rank; + filename.replace( pos, 1, rank_string.str() ); + // Prepare data + System, + CabanaMD_LAYOUT> + host_system; + system->slice_x(); + auto x = system->x; + host_system.resize( x.size() ); + host_system.slice_x(); + auto host_x = host_system.x; + host_system.deep_copy( *system ); + host_system.slice_all(); + host_x = host_system.x; + auto host_id = host_system.id; + auto host_type = host_system.type; + auto host_v = host_system.v; + FILE *file = fopen( filename.c_str(), "w" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fprintf( file, "\n", + system->N_local ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + for ( int n = 0; n < system->N_local; ++n ) + fprintf( file, "%g %g %g ", host_v( n, 0 ), host_v( n, 1 ), + host_v( n, 2 ) ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\t\n" ); + for ( int n = 0; n < system->N_local; ++n ) + fprintf( file, "%d ", host_id( n ) ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\t\n" ); + for ( int n = 0; n < system->N_local; ++n ) + fprintf( file, "%d ", host_type( n ) ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + for ( int n = 0; n < system->N_local; ++n ) + fprintf( file, "%g %g %g ", host_x( n, 0 ), host_x( n, 1 ), + host_x( n, 2 ) ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fclose( file ); +} + +} // end namespace VTKWriter +#endif