From 7c29655157b5892f1e96e6a2c59eed22ff053347 Mon Sep 17 00:00:00 2001 From: Alan Humphrey Date: Mon, 22 Jul 2019 15:13:58 -0700 Subject: [PATCH 01/36] Update deprecated 'dimension' functions in favor of ISO/C++ vocabulary 'extent'. --- src/examinimd.cpp | 58 +++++++++++++++++++++++------------------------ src/input.cpp | 20 ++++++++-------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/examinimd.cpp b/src/examinimd.cpp index 52d431c..8539a7e 100644 --- a/src/examinimd.cpp +++ b/src/examinimd.cpp @@ -64,13 +64,13 @@ void ExaMiniMD::init(int argc, char* argv[]) { // Lets parse the command line arguments input->read_command_line_args(argc,argv); - // Read input file + // Read input file input->read_file(); // Now we know which integrator type to use if(input->integrator_type == INTEGRATOR_NVE) integrator = new IntegratorNVE(system); - + // Fill some binning if(input->binning_type == BINNING_KKSORT) binning = new BinningKKSort(system); @@ -81,7 +81,7 @@ void ExaMiniMD::init(int argc, char* argv[]) { #include #undef FORCE_MODULES_INSTANTIATION else comm->error("Invalid ForceType"); - for(int line = 0; line < input->force_coeff_lines.dimension_0(); line++) { + for(int line = 0; line < input->force_coeff_lines.extent(0); line++) { //input->input_data.print_line(input->force_coeff_lines(line)); //printf("init_coeff: %i %i\n",line,input->input_data.words_in_line(input->force_coeff_lines(line))); force->init_coeff(input->input_data.words_in_line(input->force_coeff_lines(line)), @@ -117,7 +117,7 @@ void ExaMiniMD::init(int argc, char* argv[]) { input->create_lattice(comm); // Create the Halo - comm->exchange(); + comm->exchange(); // Sort particles T_F_FLOAT neigh_cutoff = input->force_cutoff + input->neighbor_skin; @@ -141,7 +141,7 @@ void ExaMiniMD::init(int argc, char* argv[]) { // Reverse Communicate Force Update on Halo comm->update_force(); } - + // Initial output int step = 0; if(input->thermo_rate > 0) { @@ -189,8 +189,8 @@ void ExaMiniMD::run(int nsteps) { // Timestep Loop for(int step = 1; step <= nsteps; step++ ) { - - // Do first part of the verlet time step integration + + // Do first part of the verlet time step integration other_timer.reset(); integrator->initial_integrate(); other_time += other_timer.seconds(); @@ -198,7 +198,7 @@ void ExaMiniMD::run(int nsteps) { if(step%input->comm_exchange_rate==0 && step >0) { // Exchange particles comm_timer.reset(); - comm->exchange(); + comm->exchange(); comm_time += comm_timer.seconds(); // Sort particles @@ -210,7 +210,7 @@ void ExaMiniMD::run(int nsteps) { comm_timer.reset(); comm->exchange_halo(); comm_time += comm_timer.seconds(); - + // Create binning for neighborlist construction neigh_timer.reset(); binning->create_binning(neigh_cutoff,neigh_cutoff,neigh_cutoff,1,true,true,false); @@ -226,16 +226,16 @@ void ExaMiniMD::run(int nsteps) { comm_time += comm_timer.seconds(); } - // Zero out forces + // Zero out forces force_timer.reset(); Kokkos::deep_copy(system->f,0.0); - + // Compute Short Range Force force->compute(system,binning,neighbor); force_time += force_timer.seconds(); - // This is where Bonds, Angles and KSpace should go eventually - + // This is where Bonds, Angles and KSpace should go eventually + // Reverse Communicate Force Update on Halo if(input->comm_newton) { comm_timer.reset(); @@ -243,7 +243,7 @@ void ExaMiniMD::run(int nsteps) { comm_time += comm_timer.seconds(); } - // Do second part of the verlet time step integration + // Do second part of the verlet time step integration other_timer.reset(); integrator->final_integrate(); @@ -267,7 +267,7 @@ void ExaMiniMD::run(int nsteps) { if(input->dumpbinaryflag) dump_binary(step); - + if(input->correctnessflag) check_correctness(step); @@ -297,10 +297,10 @@ void ExaMiniMD::dump_binary(int step) { // On dump steps print configuration if(step%input->dumpbinary_rate) return; - + FILE* fp; T_INT n = system->N_local; - + char* filename = new char[MAXPATHLEN]; sprintf(filename,"%s%s.%010d.%03d",input->dumpbinary_path, "/output",step,comm->process_rank()); @@ -340,7 +340,7 @@ void ExaMiniMD::dump_binary(int step) { fwrite(o_x.data(),sizeof(T_X_FLOAT),3*n,fp); fwrite(o_v.data(),sizeof(T_V_FLOAT),3*n,fp); fwrite(o_f.data(),sizeof(T_F_FLOAT),3*n,fp); - + fclose(fp); } @@ -358,7 +358,7 @@ void ExaMiniMD::check_correctness(int step) { FILE* fpref; T_INT n = system->N_local; T_INT ntmp; - + char* filename = new char[MAXPATHLEN]; sprintf(filename,"%s%s.%010d.%03d",input->reference_path, "/output",step,comm->process_rank()); @@ -368,25 +368,25 @@ void ExaMiniMD::check_correctness(int step) { sprintf(str,"Cannot open input file %s",filename); comm->error(str); } - + fread(&ntmp,sizeof(T_INT),1,fpref); - if (ntmp != n) + if (ntmp != n) comm->error("Mismatch in current and reference atom counts"); - + t_id idref = t_id("Correctness::id",n); t_type typeref = t_type("Correctness::type",n); t_q qref = t_q("Correctness::q",n); t_x xref = t_x("Correctness::x",n); t_v vref = t_v("Correctness::v",n); t_f fref = t_f("Correctness::f",n); - + fread(idref.data(),sizeof(T_INT),n,fpref); - fread(typeref.data(),sizeof(T_INT),n,fpref); + fread(typeref.data(),sizeof(T_INT),n,fpref); fread(qref.data(),sizeof(T_FLOAT),n,fpref); fread(xref.data(),sizeof(T_X_FLOAT),3*n,fpref); fread(vref.data(),sizeof(T_V_FLOAT),3*n,fpref); fread(fref.data(),sizeof(T_F_FLOAT),3*n,fpref); - + T_FLOAT sumdelrsq = 0.0; T_FLOAT sumdelvsq = 0.0; T_FLOAT sumdelfsq = 0.0; @@ -395,7 +395,7 @@ void ExaMiniMD::check_correctness(int step) { T_FLOAT maxdelf = 0.0; for (int i = 0; i < n; i++) { int ii = -1; - if (system->id(i) != idref(i)) + if (system->id(i) != idref(i)) for (int j = 0; j < n; j++) { if (system->id(j) == idref(i)) { ii = j; @@ -404,7 +404,7 @@ void ExaMiniMD::check_correctness(int step) { } else ii = i; - + if (ii == -1) printf("Unable to find current id matchinf reference id %d \n",idref(i)); else { @@ -417,7 +417,7 @@ void ExaMiniMD::check_correctness(int step) { maxdelr = MAX(fabs(delx),maxdelr); maxdelr = MAX(fabs(dely),maxdelr); maxdelr = MAX(fabs(delz),maxdelr); - + delx = system->v(ii,0)-vref(i,0); dely = system->v(ii,1)-vref(i,1); delz = system->v(ii,2)-vref(i,2); @@ -426,7 +426,7 @@ void ExaMiniMD::check_correctness(int step) { maxdelv = MAX(fabs(delx),maxdelv); maxdelv = MAX(fabs(dely),maxdelv); maxdelv = MAX(fabs(delz),maxdelv); - + delx = system->f(ii,0)-fref(i,0); dely = system->f(ii,1)-fref(i,1); delz = system->f(ii,2)-fref(i,2); diff --git a/src/input.cpp b/src/input.cpp index c174c4f..579d87f 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -90,7 +90,7 @@ void ItemizedFile::print_line(int i) { int ItemizedFile::words_in_line(int i){ int count = 0; - for(int j=0; jdomain_x = lattice_constant * lattice_nx; - system->domain_y = lattice_constant * lattice_ny; - system->domain_z = lattice_constant * lattice_nz; + system->domain_x = lattice_constant * lattice_nx; + system->domain_y = lattice_constant * lattice_ny; + system->domain_z = lattice_constant * lattice_nz; comm->create_domain_decomposition(); s = *system; @@ -727,7 +727,7 @@ void Input::create_lattice(Comm* comm) { // velocity geom option, i.e. uniform random kinetic energies. // zero out momentum of the whole system afterwards, to eliminate // drift (bad for energy statistics) - + { // Scope s System s = *system; T_FLOAT total_mass = 0.0; From 7f4ec408aced68f0df85761cb2f91105263ad808 Mon Sep 17 00:00:00 2001 From: Alan Humphrey Date: Mon, 22 Jul 2019 15:15:09 -0700 Subject: [PATCH 02/36] Kokkos::Max no longer in Experimental namespace for Kokkos v3.0 promotion, remove this usage. --- src/force_types/force_snap_neigh_impl.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/force_types/force_snap_neigh_impl.h b/src/force_types/force_snap_neigh_impl.h index 8d9500d..8a410f3 100644 --- a/src/force_types/force_snap_neigh_impl.h +++ b/src/force_types/force_snap_neigh_impl.h @@ -137,7 +137,7 @@ template struct FindMaxNumNeighs { NeighList neigh_list; - FindMaxNumNeighs(NeighList& nl): neigh_list(nl) {} + FindMaxNumNeighs(NeighList& nl): neigh_list(nl) {} KOKKOS_INLINE_FUNCTION void operator() (const int& i, int& max_neighs) const { @@ -172,7 +172,7 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo const int num_neighs = neighs_i.get_num_neighs(); if(max_neighs(neigh_list), Kokkos::Experimental::Max(max_neighs)); + Kokkos::parallel_reduce("ForceSNAP::find_max_neighs",nlocal, FindMaxNumNeighs(neigh_list), Kokkos::Max(max_neighs)); sna.nmax = max_neighs; @@ -272,8 +272,8 @@ void ForceSNAP::init_coeff(int narg, char **arg) // ncoeffall should be (ncoeff+2)*(ncoeff+1)/2 // so, ncoeff = floor(sqrt(2*ncoeffall))-1 - - ncoeff = sqrt(2*ncoeffall)-1; + + ncoeff = sqrt(2*ncoeffall)-1; ncoeffq = (ncoeff*(ncoeff+1))/2; int ntmp = 1+ncoeff+ncoeffq; if (ntmp != ncoeffall) { @@ -384,7 +384,7 @@ void ForceSNAP::read_files(char *coefffilename, char *paramfilena int nelemfile = atoi(words[0]); ncoeffall = atoi(words[1]); - + // Set up element lists radelem = Kokkos::View("pair:radelem",nelements); @@ -504,7 +504,7 @@ void ForceSNAP::read_files(char *coefffilename, char *paramfilena switchflag = 1; bzeroflag = 1; quadraticflag = 0; - + // open SNAP parameter file on proc 0 FILE *fpparam; @@ -537,7 +537,7 @@ void ForceSNAP::read_files(char *coefffilename, char *paramfilena //nwords = atom->count_words(line); if(line[0]!=10) nwords = 2; else nwords = 0; if (nwords == 0) continue; - + if (nwords != 2) Kokkos::abort("Incorrect format in SNAP parameter file"); @@ -548,7 +548,7 @@ void ForceSNAP::read_files(char *coefffilename, char *paramfilena char* keyval = strtok(NULL,"' \t\n\r\f"); //if (comm->me == 0) { - //if (screen) + //if (screen) //if (logfile) fprintf(logfile,"SNAP keyword %s %s \n",keywd,keyval); //} From da6ece60888867c993e4fb4dee312d1d204689f7 Mon Sep 17 00:00:00 2001 From: Alan Humphrey Date: Mon, 22 Jul 2019 15:19:20 -0700 Subject: [PATCH 03/36] Remove compiler warning for unused printf format specifier (examinimd.cpp:162:42: warning: more '%' conversions than data arguments [-Wformat]). --- src/examinimd.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/examinimd.cpp b/src/examinimd.cpp index 8539a7e..b599892 100644 --- a/src/examinimd.cpp +++ b/src/examinimd.cpp @@ -159,7 +159,7 @@ void ExaMiniMD::init(int argc, char* argv[]) { } else { printf("\n"); printf("Step Temp E_pair TotEng CPU\n"); - printf(" %i %lf %lf %lf %lf %e\n",step,T,PE,PE+KE,0.0); + printf(" %i %lf %lf %lf %lf\n",step,T,PE,PE+KE,0.0); } } } From 33d47c3247f51d940c76d1923e790cc8dfac47e5 Mon Sep 17 00:00:00 2001 From: Alan Humphrey Date: Mon, 22 Jul 2019 15:33:45 -0700 Subject: [PATCH 04/36] Template parameters for StaticCrsGraph (Kokkos_StaticCrsGraph.hpp) have been reorganized to be consistent with Kokkos::View. ExaMiniMD not currently using Arg3Type, which if provided corresponds to the MemoryTraits. Because the implementation is not using variadic templates, order of template arguments matters. For now simply passing the default value. May want to use this differently? --- src/neighbor_types/neighbor_csr.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/neighbor_types/neighbor_csr.h b/src/neighbor_types/neighbor_csr.h index 859ee24..0344321 100644 --- a/src/neighbor_types/neighbor_csr.h +++ b/src/neighbor_types/neighbor_csr.h @@ -79,7 +79,7 @@ #include template -struct NeighListCSR : public Kokkos::StaticCrsGraph { +struct NeighListCSR : public Kokkos::StaticCrsGraph { struct NeighViewCSR { private: const T_INT* const ptr; @@ -100,14 +100,14 @@ struct NeighListCSR : public Kokkos::StaticCrsGraph() {} + Kokkos::StaticCrsGraph() {} NeighListCSR (const NeighListCSR& rhs) : - Kokkos::StaticCrsGraph(rhs) { + Kokkos::StaticCrsGraph(rhs) { } template NeighListCSR (const EntriesType& entries_,const RowMapType& row_map_) : - Kokkos::StaticCrsGraph( entries_, row_map_) {} + Kokkos::StaticCrsGraph( entries_, row_map_) {} KOKKOS_INLINE_FUNCTION @@ -274,10 +274,10 @@ class NeighborCSR: public Neighbor { for(int by_j = by-1; by_jby) || ((by_j==by) && (bz_j>bz) )))) && - (bx_j>=nhalo) && (bx_jby) || ((by_j==by) && (bz_j>bz) )))) && + (bx_j>=nhalo) && (bx_j=nhalo) && (by_j=nhalo) && (bz_j=nhalo) && (bz_jby) || ((by_j==by) && (bz_j>bz) )))) && - (bx_j>nhalo) && (bx_jby) || ((by_j==by) && (bz_j>bz) )))) && + (bx_j>nhalo) && (bx_jnhalo) && (by_jnhalo) && (bz_jnhalo) && (bz_j Date: Mon, 22 Jul 2019 16:19:15 -0700 Subject: [PATCH 05/36] Update team_size_max() calls. With kokkos v3.0 promotion, TeamPolicy<>::team_size_max(Functor) changes to a member function of the team policy. The previous variant didn't take all necessary information into account and could potentially result in invalid answers. NOTE: the league and team size arguments passed to the TeamPolicy may warrant inspection (force_types/force_snap_neigh_impl.h:183), e.g., perhaps use team_scratch_size & thread_scratch_size? --- src/force_types/force_snap_neigh_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/force_types/force_snap_neigh_impl.h b/src/force_types/force_snap_neigh_impl.h index 8a410f3..3eee514 100644 --- a/src/force_types/force_snap_neigh_impl.h +++ b/src/force_types/force_snap_neigh_impl.h @@ -180,7 +180,7 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo T_INT thread_scratch_size = sna.size_thread_scratch_arrays(); //printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024); - int team_size_max = Kokkos::TeamPolicy<>::team_size_max(*this); + int team_size_max = Kokkos::TeamPolicy<>(team_scratch_size,thread_scratch_size).team_size_max(*this,Kokkos::ParallelForTag()); int vector_length = 8; #ifdef KOKKOS_ENABLE_CUDA int team_size = 20;//max_neighs; From 8dc8c451bf00633d0478c392be26777b03304a7a Mon Sep 17 00:00:00 2001 From: Alan Humphrey Date: Mon, 22 Jul 2019 19:00:11 -0600 Subject: [PATCH 06/36] Pass basic league and team size arguments to the TeamPolicy CTOR. --- src/force_types/force_snap_neigh_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/force_types/force_snap_neigh_impl.h b/src/force_types/force_snap_neigh_impl.h index 3eee514..3e90cdf 100644 --- a/src/force_types/force_snap_neigh_impl.h +++ b/src/force_types/force_snap_neigh_impl.h @@ -180,7 +180,7 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo T_INT thread_scratch_size = sna.size_thread_scratch_arrays(); //printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024); - int team_size_max = Kokkos::TeamPolicy<>(team_scratch_size,thread_scratch_size).team_size_max(*this,Kokkos::ParallelForTag()); + int team_size_max = Kokkos::TeamPolicy<>(1,1).team_size_max(*this,Kokkos::ParallelForTag()); int vector_length = 8; #ifdef KOKKOS_ENABLE_CUDA int team_size = 20;//max_neighs; From b439ad2efa8c7ee0f6441ed90312daf22f450ef8 Mon Sep 17 00:00:00 2001 From: Alan Humphrey Date: Thu, 25 Jul 2019 14:37:31 -0600 Subject: [PATCH 07/36] More precisely determine TeamPolicy CTOR args. --- src/force_types/force_snap_neigh_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/force_types/force_snap_neigh_impl.h b/src/force_types/force_snap_neigh_impl.h index 3e90cdf..5d4a855 100644 --- a/src/force_types/force_snap_neigh_impl.h +++ b/src/force_types/force_snap_neigh_impl.h @@ -180,8 +180,8 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo T_INT thread_scratch_size = sna.size_thread_scratch_arrays(); //printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024); - int team_size_max = Kokkos::TeamPolicy<>(1,1).team_size_max(*this,Kokkos::ParallelForTag()); int vector_length = 8; + int team_size_max = Kokkos::TeamPolicy<>(nlocal,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag()); #ifdef KOKKOS_ENABLE_CUDA int team_size = 20;//max_neighs; if(team_size*vector_length > team_size_max) From 8baad0fad846f8bf8388de2616ac9ce008e0d656 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Mon, 18 May 2020 16:10:05 -0600 Subject: [PATCH 08/36] Build with CMake 3.10+ Original makefile was not removed Cuda needs testing --- CMakeLists.txt | 25 ++++++++++++++ input/CMakeLists.txt | 10 ++++++ input/snap/CMakeLists.txt | 14 ++++++++ src/CMakeLists.txt | 55 +++++++++++++++++++++++++++++++ src/binning_types/CMakeLists.txt | 13 ++++++++ src/comm_types/CMakeLists.txt | 15 +++++++++ src/force_types/CMakeLists.txt | 23 +++++++++++++ src/neighbor_types/CMakeLists.txt | 17 ++++++++++ 8 files changed, 172 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 input/CMakeLists.txt create mode 100644 input/snap/CMakeLists.txt create mode 100644 src/CMakeLists.txt create mode 100644 src/binning_types/CMakeLists.txt create mode 100644 src/comm_types/CMakeLists.txt create mode 100644 src/force_types/CMakeLists.txt create mode 100644 src/neighbor_types/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..f25d9a9 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,25 @@ +cmake_minimum_required(VERSION 3.10) + +project(ExaMiniMD LANGUAGES CXX) + +find_package(Kokkos REQUIRED) +option(USE_MPI "MPI" ON) + +if (USE_MPI) + add_definitions(-DEXAMIND_ENABLE_MPI) + find_package(MPI REQUIRED) +else() + string(FIND "${Kokkos_DEVICES}" "Cuda" USING_CUDA) + # string FIND returns -1 if not found + if (NOT USING_CUDA STREQUAL "-1") + if (CMAKE_CXX_COMPILER_ID MATCHES "nvcc") + enable_language(CUDA) + set(USE_CUDA True INTERNAL "") + else() + message(FATAL_ERROR "Kokkos uses CUDA but compiler is not nvcc") + endif() + endif() +endif() + +add_subdirectory(src) +add_subdirectory(input) diff --git a/input/CMakeLists.txt b/input/CMakeLists.txt new file mode 100644 index 0000000..290cb4f --- /dev/null +++ b/input/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.10) + +set(FILES + in.lj) + +foreach(FILE ${FILES}) + configure_file(${FILE} ${FILE} COPYONLY) +endforeach() + +add_subdirectory(snap) diff --git a/input/snap/CMakeLists.txt b/input/snap/CMakeLists.txt new file mode 100644 index 0000000..990c5c9 --- /dev/null +++ b/input/snap/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 3.10) + +set(FILES + in.snap.Ta06A + in.snap.W + Ta06A.snap + Ta06A.snapcoeff + Ta06A.snapparam + W.snapcoeff + W.snapparam) + +foreach(FILE ${FILES}) + configure_file(${FILE} ${FILE} COPYONLY) +endforeach() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..0e231ed --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,55 @@ +cmake_minimum_required(VERSION 3.10) + +set(HEADERS + binning.h + comm.h + examinimd.h + force.h + input.h + integrator.h + integrator_nve.h + math_extra.h + modules_binning.h + modules_comm.h + modules_force.h + modules_integrator.h + modules_neighbor.h + modules_property.h + neighbor.h + property_kine.h + property_pote.h + property_temperature.h + system.h + types.h) + +set(SOURCES + binning.cpp + comm.cpp + examinimd.cpp + force.cpp + input.cpp + integrator.cpp + integrator_nve.cpp + neighbor.cpp + property_kine.cpp + property_pote.cpp + property_temperature.cpp + system.cpp) + +set(SUBDIRECTORIES + binning_types + comm_types + force_types + neighbor_types) + +foreach(SUBDIR ${SUBDIRECTORIES}) + add_subdirectory(${SUBDIR}) +endforeach() + +if (USE_CUDA) + set_property(SOURCE ${SOURCES} PROPERTIES LANGUAGE CUDA) +endif() + +add_executable(ExaMiniMD main.cpp ${SOURCES} ${HEADERS}) +target_include_directories(ExaMiniMD PRIVATE ${Kokkos_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${SUBDIRECTORIES}) +target_link_libraries(ExaMiniMD Kokkos::kokkos) diff --git a/src/binning_types/CMakeLists.txt b/src/binning_types/CMakeLists.txt new file mode 100644 index 0000000..a8a987d --- /dev/null +++ b/src/binning_types/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required(VERSION 3.10) + +SET(HEADERS + ${HEADERS} + ${CMAKE_CURRENT_SOURCE_DIR}/binning_kksort.h + + PARENT_SCOPE) + +SET(SOURCES + ${SOURCES} + ${CMAKE_CURRENT_SOURCE_DIR}/binning_kksort.cpp + + PARENT_SCOPE) diff --git a/src/comm_types/CMakeLists.txt b/src/comm_types/CMakeLists.txt new file mode 100644 index 0000000..4be5dda --- /dev/null +++ b/src/comm_types/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.10) + +SET(HEADERS + ${HEADERS} + ${CMAKE_CURRENT_SOURCE_DIR}/comm_mpi.h + ${CMAKE_CURRENT_SOURCE_DIR}/comm_serial.h + + PARENT_SCOPE) + +SET(SOURCES + ${SOURCES} + ${CMAKE_CURRENT_SOURCE_DIR}/comm_mpi.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/comm_serial.cpp + + PARENT_SCOPE) diff --git a/src/force_types/CMakeLists.txt b/src/force_types/CMakeLists.txt new file mode 100644 index 0000000..218d382 --- /dev/null +++ b/src/force_types/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 3.10) + +SET(HEADERS + ${HEADERS} + ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_cell.h + ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_idial_neigh.h + ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_idial_neigh_impl.h + ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_neigh.h + ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_neigh_impl.h + ${CMAKE_CURRENT_SOURCE_DIR}/force_snap_neigh.h + ${CMAKE_CURRENT_SOURCE_DIR}/force_snap_neigh_impl.h + ${CMAKE_CURRENT_SOURCE_DIR}/sna.h + + PARENT_SCOPE) + +SET(SOURCES + ${SOURCES} + ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_cell.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_idial_neigh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_neigh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/force_snap_neigh.cpp + + PARENT_SCOPE) diff --git a/src/neighbor_types/CMakeLists.txt b/src/neighbor_types/CMakeLists.txt new file mode 100644 index 0000000..16dc0b8 --- /dev/null +++ b/src/neighbor_types/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.10) + +SET(HEADERS + ${HEADERS} + ${CMAKE_CURRENT_SOURCE_DIR}/neighbor_2d.h + ${CMAKE_CURRENT_SOURCE_DIR}/neighbor_csr.h + ${CMAKE_CURRENT_SOURCE_DIR}/neighbor_csr_map_constr.h + + PARENT_SCOPE) + +SET(SOURCES + ${SOURCES} + ${CMAKE_CURRENT_SOURCE_DIR}/neighbor_2d.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/neighbor_csr.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/neighbor_csr_map_constr.cpp + + PARENT_SCOPE) From c0b756472545591ce1f82e9fe37ceceaa69b9295 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Tue, 19 May 2020 08:37:31 -0600 Subject: [PATCH 09/36] use target_sources instead of setting variable in parent scope --- src/CMakeLists.txt | 4 ++-- src/binning_types/CMakeLists.txt | 12 +++++------- src/comm_types/CMakeLists.txt | 16 +++++++--------- src/force_types/CMakeLists.txt | 32 +++++++++++++++---------------- src/neighbor_types/CMakeLists.txt | 20 +++++++++---------- 5 files changed, 38 insertions(+), 46 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0e231ed..9d7cac0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -36,6 +36,8 @@ set(SOURCES property_temperature.cpp system.cpp) +add_executable(ExaMiniMD main.cpp ${SOURCES} ${HEADERS}) + set(SUBDIRECTORIES binning_types comm_types @@ -49,7 +51,5 @@ endforeach() if (USE_CUDA) set_property(SOURCE ${SOURCES} PROPERTIES LANGUAGE CUDA) endif() - -add_executable(ExaMiniMD main.cpp ${SOURCES} ${HEADERS}) target_include_directories(ExaMiniMD PRIVATE ${Kokkos_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${SUBDIRECTORIES}) target_link_libraries(ExaMiniMD Kokkos::kokkos) diff --git a/src/binning_types/CMakeLists.txt b/src/binning_types/CMakeLists.txt index a8a987d..22a1735 100644 --- a/src/binning_types/CMakeLists.txt +++ b/src/binning_types/CMakeLists.txt @@ -1,13 +1,11 @@ cmake_minimum_required(VERSION 3.10) SET(HEADERS - ${HEADERS} - ${CMAKE_CURRENT_SOURCE_DIR}/binning_kksort.h - - PARENT_SCOPE) + binning_kksort.h) SET(SOURCES - ${SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/binning_kksort.cpp + binning_kksort.cpp) - PARENT_SCOPE) +foreach(FILE ${SOURCES} ${HEADERS}) + target_sources(ExaMiniMD PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/${FILE}) +endforeach() diff --git a/src/comm_types/CMakeLists.txt b/src/comm_types/CMakeLists.txt index 4be5dda..e8d4a33 100644 --- a/src/comm_types/CMakeLists.txt +++ b/src/comm_types/CMakeLists.txt @@ -1,15 +1,13 @@ cmake_minimum_required(VERSION 3.10) SET(HEADERS - ${HEADERS} - ${CMAKE_CURRENT_SOURCE_DIR}/comm_mpi.h - ${CMAKE_CURRENT_SOURCE_DIR}/comm_serial.h - - PARENT_SCOPE) + comm_mpi.h + comm_serial.h) SET(SOURCES - ${SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/comm_mpi.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/comm_serial.cpp + comm_mpi.cpp + comm_serial.cpp) - PARENT_SCOPE) +foreach(FILE ${SOURCES} ${HEADERS}) + target_sources(ExaMiniMD PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/${FILE}) +endforeach() diff --git a/src/force_types/CMakeLists.txt b/src/force_types/CMakeLists.txt index 218d382..6683bb3 100644 --- a/src/force_types/CMakeLists.txt +++ b/src/force_types/CMakeLists.txt @@ -1,23 +1,21 @@ cmake_minimum_required(VERSION 3.10) SET(HEADERS - ${HEADERS} - ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_cell.h - ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_idial_neigh.h - ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_idial_neigh_impl.h - ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_neigh.h - ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_neigh_impl.h - ${CMAKE_CURRENT_SOURCE_DIR}/force_snap_neigh.h - ${CMAKE_CURRENT_SOURCE_DIR}/force_snap_neigh_impl.h - ${CMAKE_CURRENT_SOURCE_DIR}/sna.h - - PARENT_SCOPE) + force_lj_cell.h + force_lj_idial_neigh.h + force_lj_idial_neigh_impl.h + force_lj_neigh.h + force_lj_neigh_impl.h + force_snap_neigh.h + force_snap_neigh_impl.h + sna.h) SET(SOURCES - ${SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_cell.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_idial_neigh.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/force_lj_neigh.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/force_snap_neigh.cpp + force_lj_cell.cpp + force_lj_idial_neigh.cpp + force_lj_neigh.cpp + force_snap_neigh.cpp) - PARENT_SCOPE) +foreach(FILE ${SOURCES} ${HEADERS}) + target_sources(ExaMiniMD PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/${FILE}) +endforeach() diff --git a/src/neighbor_types/CMakeLists.txt b/src/neighbor_types/CMakeLists.txt index 16dc0b8..096a980 100644 --- a/src/neighbor_types/CMakeLists.txt +++ b/src/neighbor_types/CMakeLists.txt @@ -1,17 +1,15 @@ cmake_minimum_required(VERSION 3.10) SET(HEADERS - ${HEADERS} - ${CMAKE_CURRENT_SOURCE_DIR}/neighbor_2d.h - ${CMAKE_CURRENT_SOURCE_DIR}/neighbor_csr.h - ${CMAKE_CURRENT_SOURCE_DIR}/neighbor_csr_map_constr.h - - PARENT_SCOPE) + neighbor_2d.h + neighbor_csr.h + neighbor_csr_map_constr.h) SET(SOURCES - ${SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/neighbor_2d.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/neighbor_csr.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/neighbor_csr_map_constr.cpp + neighbor_2d.cpp + neighbor_csr.cpp + neighbor_csr_map_constr.cpp) - PARENT_SCOPE) +foreach(FILE ${SOURCES} ${HEADERS}) + target_sources(ExaMiniMD PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/${FILE}) +endforeach() From 9cd8e3148deac17e3ff497bd65e6d655eb597759 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Tue, 19 May 2020 08:38:33 -0600 Subject: [PATCH 10/36] set ExaMiniMD target to use Cuda instead of individual files --- src/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9d7cac0..2cb17d6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -49,7 +49,8 @@ foreach(SUBDIR ${SUBDIRECTORIES}) endforeach() if (USE_CUDA) - set_property(SOURCE ${SOURCES} PROPERTIES LANGUAGE CUDA) + set_target_properties(ExaMiniMD PROPERTIES LANGUAGE CUDA) endif() + target_include_directories(ExaMiniMD PRIVATE ${Kokkos_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${SUBDIRECTORIES}) target_link_libraries(ExaMiniMD Kokkos::kokkos) From 89fc9f3b22c708dabb48ec91b21d36989161d357 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Tue, 19 May 2020 08:44:59 -0600 Subject: [PATCH 11/36] include and link MPI when used --- src/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2cb17d6..6b2bf71 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -53,4 +53,10 @@ if (USE_CUDA) endif() target_include_directories(ExaMiniMD PRIVATE ${Kokkos_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${SUBDIRECTORIES}) + +if (USE_MPI) + target_include_directories(ExaMiniMD PRIVATE ${MPI_INCLUDE_PATH}) + target_link_libraries(ExaMiniMD ${MPI_LIBRARIES}) +endif() + target_link_libraries(ExaMiniMD Kokkos::kokkos) From 4c18d8c8bb61d5694bda8f32bd44605915e68351 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Tue, 19 May 2020 09:12:44 -0600 Subject: [PATCH 12/36] cmake_minimum_required at top only --- input/CMakeLists.txt | 2 -- input/snap/CMakeLists.txt | 2 -- src/CMakeLists.txt | 2 -- src/binning_types/CMakeLists.txt | 2 -- src/comm_types/CMakeLists.txt | 2 -- src/force_types/CMakeLists.txt | 2 -- src/neighbor_types/CMakeLists.txt | 2 -- 7 files changed, 14 deletions(-) diff --git a/input/CMakeLists.txt b/input/CMakeLists.txt index 290cb4f..72a74b5 100644 --- a/input/CMakeLists.txt +++ b/input/CMakeLists.txt @@ -1,5 +1,3 @@ -cmake_minimum_required(VERSION 3.10) - set(FILES in.lj) diff --git a/input/snap/CMakeLists.txt b/input/snap/CMakeLists.txt index 990c5c9..ce700e4 100644 --- a/input/snap/CMakeLists.txt +++ b/input/snap/CMakeLists.txt @@ -1,5 +1,3 @@ -cmake_minimum_required(VERSION 3.10) - set(FILES in.snap.Ta06A in.snap.W diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6b2bf71..0fccab1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,3 @@ -cmake_minimum_required(VERSION 3.10) - set(HEADERS binning.h comm.h diff --git a/src/binning_types/CMakeLists.txt b/src/binning_types/CMakeLists.txt index 22a1735..2d4ab15 100644 --- a/src/binning_types/CMakeLists.txt +++ b/src/binning_types/CMakeLists.txt @@ -1,5 +1,3 @@ -cmake_minimum_required(VERSION 3.10) - SET(HEADERS binning_kksort.h) diff --git a/src/comm_types/CMakeLists.txt b/src/comm_types/CMakeLists.txt index e8d4a33..080c4cb 100644 --- a/src/comm_types/CMakeLists.txt +++ b/src/comm_types/CMakeLists.txt @@ -1,5 +1,3 @@ -cmake_minimum_required(VERSION 3.10) - SET(HEADERS comm_mpi.h comm_serial.h) diff --git a/src/force_types/CMakeLists.txt b/src/force_types/CMakeLists.txt index 6683bb3..cd380b7 100644 --- a/src/force_types/CMakeLists.txt +++ b/src/force_types/CMakeLists.txt @@ -1,5 +1,3 @@ -cmake_minimum_required(VERSION 3.10) - SET(HEADERS force_lj_cell.h force_lj_idial_neigh.h diff --git a/src/neighbor_types/CMakeLists.txt b/src/neighbor_types/CMakeLists.txt index 096a980..0377117 100644 --- a/src/neighbor_types/CMakeLists.txt +++ b/src/neighbor_types/CMakeLists.txt @@ -1,5 +1,3 @@ -cmake_minimum_required(VERSION 3.10) - SET(HEADERS neighbor_2d.h neighbor_csr.h From 8cce3f8b95875f8851dd8b93d45bca67ff815680 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Tue, 19 May 2020 09:20:46 -0600 Subject: [PATCH 13/36] use MPI CXX and imported target instead of variables --- CMakeLists.txt | 2 +- src/CMakeLists.txt | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f25d9a9..ee89376 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ option(USE_MPI "MPI" ON) if (USE_MPI) add_definitions(-DEXAMIND_ENABLE_MPI) - find_package(MPI REQUIRED) + find_package(MPI REQUIRED CXX) else() string(FIND "${Kokkos_DEVICES}" "Cuda" USING_CUDA) # string FIND returns -1 if not found diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0fccab1..cd83918 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -52,9 +52,11 @@ endif() target_include_directories(ExaMiniMD PRIVATE ${Kokkos_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${SUBDIRECTORIES}) +set(EXAMINIMD_LIBS + Kokkos::kokkos) + if (USE_MPI) - target_include_directories(ExaMiniMD PRIVATE ${MPI_INCLUDE_PATH}) - target_link_libraries(ExaMiniMD ${MPI_LIBRARIES}) + list(APPEND EXAMINIMD_LIBS MPI::MPI_CXX) endif() -target_link_libraries(ExaMiniMD Kokkos::kokkos) +target_link_libraries(ExaMiniMD ${EXAMINIMD_LIBS}) From d11a228fc1a56de2174d3b405cce4744f63d0019 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Tue, 19 May 2020 09:30:07 -0600 Subject: [PATCH 14/36] use CMP0076 to resolve paths in target_source immediately --- CMakeLists.txt | 3 +++ src/binning_types/CMakeLists.txt | 4 +--- src/comm_types/CMakeLists.txt | 4 +--- src/force_types/CMakeLists.txt | 4 +--- src/neighbor_types/CMakeLists.txt | 4 +--- 5 files changed, 7 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ee89376..b3e33cb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,5 +21,8 @@ else() endif() endif() +# force target_source to resolve relative paths immediately +cmake_policy(SET CMP0076 NEW) + add_subdirectory(src) add_subdirectory(input) diff --git a/src/binning_types/CMakeLists.txt b/src/binning_types/CMakeLists.txt index 2d4ab15..8bbfdb1 100644 --- a/src/binning_types/CMakeLists.txt +++ b/src/binning_types/CMakeLists.txt @@ -4,6 +4,4 @@ SET(HEADERS SET(SOURCES binning_kksort.cpp) -foreach(FILE ${SOURCES} ${HEADERS}) - target_sources(ExaMiniMD PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/${FILE}) -endforeach() +target_sources(ExaMiniMD PRIVATE ${SOURCES} ${HEADERS}) diff --git a/src/comm_types/CMakeLists.txt b/src/comm_types/CMakeLists.txt index 080c4cb..34cc878 100644 --- a/src/comm_types/CMakeLists.txt +++ b/src/comm_types/CMakeLists.txt @@ -6,6 +6,4 @@ SET(SOURCES comm_mpi.cpp comm_serial.cpp) -foreach(FILE ${SOURCES} ${HEADERS}) - target_sources(ExaMiniMD PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/${FILE}) -endforeach() +target_sources(ExaMiniMD PRIVATE ${SOURCES} ${HEADERS}) diff --git a/src/force_types/CMakeLists.txt b/src/force_types/CMakeLists.txt index cd380b7..e1aee41 100644 --- a/src/force_types/CMakeLists.txt +++ b/src/force_types/CMakeLists.txt @@ -14,6 +14,4 @@ SET(SOURCES force_lj_neigh.cpp force_snap_neigh.cpp) -foreach(FILE ${SOURCES} ${HEADERS}) - target_sources(ExaMiniMD PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/${FILE}) -endforeach() +target_sources(ExaMiniMD PRIVATE ${SOURCES} ${HEADERS}) diff --git a/src/neighbor_types/CMakeLists.txt b/src/neighbor_types/CMakeLists.txt index 0377117..13b155a 100644 --- a/src/neighbor_types/CMakeLists.txt +++ b/src/neighbor_types/CMakeLists.txt @@ -8,6 +8,4 @@ SET(SOURCES neighbor_csr.cpp neighbor_csr_map_constr.cpp) -foreach(FILE ${SOURCES} ${HEADERS}) - target_sources(ExaMiniMD PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/${FILE}) -endforeach() +target_sources(ExaMiniMD PRIVATE ${SOURCES} ${HEADERS}) From aba28eca8aac1f916b1eaecf6d2c45f3fbd2fed2 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Tue, 19 May 2020 09:54:59 -0600 Subject: [PATCH 15/36] Kokkos 3.0 REQUIRED --- CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b3e33cb..a52af45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,11 +2,10 @@ cmake_minimum_required(VERSION 3.10) project(ExaMiniMD LANGUAGES CXX) -find_package(Kokkos REQUIRED) +find_package(Kokkos 3.0 REQUIRED) option(USE_MPI "MPI" ON) if (USE_MPI) - add_definitions(-DEXAMIND_ENABLE_MPI) find_package(MPI REQUIRED CXX) else() string(FIND "${Kokkos_DEVICES}" "Cuda" USING_CUDA) From 2e1a0768e05194b0bb872774dff9e17c5ea86e76 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Tue, 19 May 2020 09:58:07 -0600 Subject: [PATCH 16/36] check for CMP0076 before setting it --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a52af45..fd3bd55 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,9 @@ else() endif() # force target_source to resolve relative paths immediately -cmake_policy(SET CMP0076 NEW) +if(POLICY CMP0076) + cmake_policy(SET CMP0076 NEW) +endif() add_subdirectory(src) add_subdirectory(input) From 51cd06621d17b6a165f9a3545355ce8a9bcd01f8 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Tue, 19 May 2020 10:01:28 -0600 Subject: [PATCH 17/36] MPI fixes --- CMakeLists.txt | 2 +- src/CMakeLists.txt | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fd3bd55..80eb78c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.10) project(ExaMiniMD LANGUAGES CXX) find_package(Kokkos 3.0 REQUIRED) -option(USE_MPI "MPI" ON) +option(USE_MPI "Build with MPI" ON) if (USE_MPI) find_package(MPI REQUIRED CXX) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cd83918..1981974 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -50,13 +50,9 @@ if (USE_CUDA) set_target_properties(ExaMiniMD PROPERTIES LANGUAGE CUDA) endif() -target_include_directories(ExaMiniMD PRIVATE ${Kokkos_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${SUBDIRECTORIES}) - -set(EXAMINIMD_LIBS - Kokkos::kokkos) - if (USE_MPI) - list(APPEND EXAMINIMD_LIBS MPI::MPI_CXX) + target_compile_definitions(ExaMiniMD PRIVATE EXAMIND_ENABLE_MPI) endif() -target_link_libraries(ExaMiniMD ${EXAMINIMD_LIBS}) +target_include_directories(ExaMiniMD PRIVATE ${Kokkos_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${SUBDIRECTORIES}) +target_link_libraries(ExaMiniMD PRIVATE $<$:MPI::MPI_CXX> Kokkos::kokkos) From 750d7141820e7a324ecc9d3ea9397746643bd570 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Tue, 19 May 2020 10:07:40 -0600 Subject: [PATCH 18/36] use kokkos_check instead of string FIND to check for Cuda --- CMakeLists.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 80eb78c..9bb7ad5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,9 +8,8 @@ option(USE_MPI "Build with MPI" ON) if (USE_MPI) find_package(MPI REQUIRED CXX) else() - string(FIND "${Kokkos_DEVICES}" "Cuda" USING_CUDA) - # string FIND returns -1 if not found - if (NOT USING_CUDA STREQUAL "-1") + kokkos_check(DEVICES Cuda RETURN_VALUE USE_CUDA) + if (NOT USING_CUDA STREQUAL "FALSE") if (CMAKE_CXX_COMPILER_ID MATCHES "nvcc") enable_language(CUDA) set(USE_CUDA True INTERNAL "") From b1a81d26ab09ff9f68aa29b7ffad18d9d40eee01 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Tue, 19 May 2020 10:18:54 -0600 Subject: [PATCH 19/36] fix USE_CUDA --- CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9bb7ad5..c30c775 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,10 +9,9 @@ if (USE_MPI) find_package(MPI REQUIRED CXX) else() kokkos_check(DEVICES Cuda RETURN_VALUE USE_CUDA) - if (NOT USING_CUDA STREQUAL "FALSE") + if (USING_CUDA) if (CMAKE_CXX_COMPILER_ID MATCHES "nvcc") enable_language(CUDA) - set(USE_CUDA True INTERNAL "") else() message(FATAL_ERROR "Kokkos uses CUDA but compiler is not nvcc") endif() From 78ac857da09fec9bb816639d0cc3859792ba0238 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 19 Aug 2020 17:04:57 -0500 Subject: [PATCH 20/36] Changes to allow HIP --- src/force_types/force_snap_neigh_impl.h | 5 ++++- src/neighbor_types/neighbor_2d.cpp | 4 ++-- src/neighbor_types/neighbor_csr.cpp | 4 ++-- src/neighbor_types/neighbor_csr_map_constr.cpp | 4 ++-- src/types.h | 5 +++++ 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/force_types/force_snap_neigh_impl.h b/src/force_types/force_snap_neigh_impl.h index 5d4a855..8509ce1 100644 --- a/src/force_types/force_snap_neigh_impl.h +++ b/src/force_types/force_snap_neigh_impl.h @@ -84,6 +84,9 @@ ForceSNAP::ForceSNAP(char** args, System* system_, bool half_neig #if defined(KOKKOS_ENABLE_CUDA) std::is_same::value ? Kokkos::DefaultExecutionSpace::concurrency()/vector_length : +#elif defined(KOKKOS_ENABLE_HIP) + std::is_same::value ? + Kokkos::DefaultExecutionSpace::concurrency()/vector_length : #else Kokkos::DefaultExecutionSpace::concurrency(); #endif @@ -182,7 +185,7 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo //printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024); int vector_length = 8; int team_size_max = Kokkos::TeamPolicy<>(nlocal,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag()); -#ifdef KOKKOS_ENABLE_CUDA +#ifdef EMD_ENABLE_GPU int team_size = 20;//max_neighs; if(team_size*vector_length > team_size_max) team_size = team_size_max/vector_length; diff --git a/src/neighbor_types/neighbor_2d.cpp b/src/neighbor_types/neighbor_2d.cpp index 3cc5c8f..d0b8364 100644 --- a/src/neighbor_types/neighbor_2d.cpp +++ b/src/neighbor_types/neighbor_2d.cpp @@ -38,8 +38,8 @@ #include -#ifdef KOKKOS_ENABLE_CUDA -template struct Neighbor2D; +#ifdef EMD_ENABLE_GPU +template struct Neighbor2D; #endif template struct Neighbor2D; diff --git a/src/neighbor_types/neighbor_csr.cpp b/src/neighbor_types/neighbor_csr.cpp index 76cffd2..429d657 100644 --- a/src/neighbor_types/neighbor_csr.cpp +++ b/src/neighbor_types/neighbor_csr.cpp @@ -38,8 +38,8 @@ #include -#ifdef KOKKOS_ENABLE_CUDA -template struct NeighborCSR; +#ifdef EMD_ENABLE_GPU +template struct NeighborCSR; #endif template struct NeighborCSR; diff --git a/src/neighbor_types/neighbor_csr_map_constr.cpp b/src/neighbor_types/neighbor_csr_map_constr.cpp index 1381431..f79435b 100644 --- a/src/neighbor_types/neighbor_csr_map_constr.cpp +++ b/src/neighbor_types/neighbor_csr_map_constr.cpp @@ -38,8 +38,8 @@ #include -#ifdef KOKKOS_ENABLE_CUDA -template struct NeighborCSRMapConstr; +#ifdef EMD_ENABLE_GPU +template struct NeighborCSRMapConstr; #endif template struct NeighborCSRMapConstr; diff --git a/src/types.h b/src/types.h index 1583e18..b2db5f9 100644 --- a/src/types.h +++ b/src/types.h @@ -178,5 +178,10 @@ t_scalar3 operator * (const Scalar& b, const t_scalar3& a) { return t_scalar3(a.x*b,a.y*b,a.z*b); } + +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) +#define EMD_ENABLE_GPU +#endif + #endif From 2e0d45b4813b7fef0389212a7bf6dea505c97848 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Fri, 23 Oct 2020 16:11:09 -0700 Subject: [PATCH 21/36] Atempt to make ExaMiniMD work with OpenMPTarget This compiles now (leaving out the SNAP potential which requires, ThreadVector level paralle_scan), but fails to do mandatory offload for a couple kernels (sorting, neighborlist construction) in the LJ example. --- src/examinimd.cpp | 2 +- src/force_types/force_snap_neigh.cpp | 3 +++ src/modules_force.h | 3 +++ src/types.h | 2 +- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/examinimd.cpp b/src/examinimd.cpp index b599892..719368b 100644 --- a/src/examinimd.cpp +++ b/src/examinimd.cpp @@ -59,7 +59,7 @@ ExaMiniMD::ExaMiniMD() { void ExaMiniMD::init(int argc, char* argv[]) { if(system->do_print) - Kokkos::DefaultExecutionSpace::print_configuration(std::cout); + Kokkos::print_configuration(std::cout); // Lets parse the command line arguments input->read_command_line_args(argc,argv); diff --git a/src/force_types/force_snap_neigh.cpp b/src/force_types/force_snap_neigh.cpp index 1f33395..60b33c8 100644 --- a/src/force_types/force_snap_neigh.cpp +++ b/src/force_types/force_snap_neigh.cpp @@ -36,8 +36,11 @@ // Questions? Contact Christian R. Trott (crtrott@sandia.gov) //************************************************************************ +#include +#ifndef KOKKOS_ENABLE_OPENMPTARGET #include #define FORCETYPE_DECLARE_TEMPLATE_MACRO(NeighType) ForceSNAP #define FORCE_MODULES_TEMPLATE #include #undef FORCE_MODULES_TEMPLATE +#endif diff --git a/src/modules_force.h b/src/modules_force.h index 7ad4702..7d01eb7 100644 --- a/src/modules_force.h +++ b/src/modules_force.h @@ -37,7 +37,10 @@ //************************************************************************ // Include Module header files for force +#include #include #include #include +#ifndef KOKKOS_ENABLE_OPENMPTARGET #include +#endif diff --git a/src/types.h b/src/types.h index b2db5f9..4d70c17 100644 --- a/src/types.h +++ b/src/types.h @@ -179,7 +179,7 @@ t_scalar3 operator * return t_scalar3(a.x*b,a.y*b,a.z*b); } -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_OPENMPTARGET) || defined(KOKKOS_ENABLE_SYCL) #define EMD_ENABLE_GPU #endif From 62ab8c4e7d5338dfe13facf3b44916f231990513 Mon Sep 17 00:00:00 2001 From: Daniel Arndt Date: Tue, 25 May 2021 20:24:20 +0000 Subject: [PATCH 22/36] Fix running with the SYCL backend --- src/examinimd.cpp | 2 +- src/force_types/force_snap_neigh_impl.h | 3 +++ src/types.h | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/examinimd.cpp b/src/examinimd.cpp index b599892..63f7539 100644 --- a/src/examinimd.cpp +++ b/src/examinimd.cpp @@ -59,7 +59,7 @@ ExaMiniMD::ExaMiniMD() { void ExaMiniMD::init(int argc, char* argv[]) { if(system->do_print) - Kokkos::DefaultExecutionSpace::print_configuration(std::cout); + Kokkos::DefaultExecutionSpace{}.print_configuration(std::cout); // Lets parse the command line arguments input->read_command_line_args(argc,argv); diff --git a/src/force_types/force_snap_neigh_impl.h b/src/force_types/force_snap_neigh_impl.h index 8509ce1..eb89ed3 100644 --- a/src/force_types/force_snap_neigh_impl.h +++ b/src/force_types/force_snap_neigh_impl.h @@ -87,6 +87,9 @@ ForceSNAP::ForceSNAP(char** args, System* system_, bool half_neig #elif defined(KOKKOS_ENABLE_HIP) std::is_same::value ? Kokkos::DefaultExecutionSpace::concurrency()/vector_length : +#elif defined(KOKKOS_ENABLE_SYCL) + std::is_same::value ? + Kokkos::DefaultExecutionSpace::concurrency()/vector_length : #else Kokkos::DefaultExecutionSpace::concurrency(); #endif diff --git a/src/types.h b/src/types.h index b2db5f9..00f1e7c 100644 --- a/src/types.h +++ b/src/types.h @@ -179,7 +179,7 @@ t_scalar3 operator * return t_scalar3(a.x*b,a.y*b,a.z*b); } -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_SYCL) #define EMD_ENABLE_GPU #endif From 51390b686ff5c5fcdee87a284228afe867e16f16 Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Mon, 13 Mar 2023 11:50:59 -0400 Subject: [PATCH 23/36] fixup: MPI CMake configure flag typo --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1981974..f6aac5f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -51,7 +51,7 @@ if (USE_CUDA) endif() if (USE_MPI) - target_compile_definitions(ExaMiniMD PRIVATE EXAMIND_ENABLE_MPI) + target_compile_definitions(ExaMiniMD PRIVATE EXAMINIMD_ENABLE_MPI) endif() target_include_directories(ExaMiniMD PRIVATE ${Kokkos_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${SUBDIRECTORIES}) From 1da7489e5b1a1b46dd27846ff15a78a7bfbcf64d Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Mon, 13 Mar 2023 12:01:38 -0400 Subject: [PATCH 24/36] Install exe with CMake --- CMakeLists.txt | 2 ++ src/CMakeLists.txt | 2 ++ 2 files changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c30c775..31e2cc4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 3.10) project(ExaMiniMD LANGUAGES CXX) +include(GNUInstallDirs) + find_package(Kokkos 3.0 REQUIRED) option(USE_MPI "Build with MPI" ON) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f6aac5f..5a880f7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -56,3 +56,5 @@ endif() target_include_directories(ExaMiniMD PRIVATE ${Kokkos_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${SUBDIRECTORIES}) target_link_libraries(ExaMiniMD PRIVATE $<$:MPI::MPI_CXX> Kokkos::kokkos) + +install(TARGETS ExaMiniMD DESTINATION ${CMAKE_INSTALL_BINDIR}) From 3b50f4bf7e4a62d68ee6c48eb8fce61efd2617ba Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Mon, 9 Oct 2023 16:23:32 -0600 Subject: [PATCH 25/36] Fix compile error --- src/examinimd.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/examinimd.cpp b/src/examinimd.cpp index 63f7539..2d1b625 100644 --- a/src/examinimd.cpp +++ b/src/examinimd.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #define MAXPATHLEN 1024 From fa7e2bedafe8b099353d34057a1565dc1cd88905 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Thu, 10 May 2018 11:30:58 -0600 Subject: [PATCH 26/36] Initial Support for SHMEM Based halo access --- src/Makefile | 17 +++++-- src/comm_types/comm_mpi.cpp | 18 +++++-- src/comm_types/comm_mpi.h | 8 ++++ src/force_types/force_lj_neigh.h | 13 +++++ src/force_types/force_lj_neigh_impl.h | 68 +++++++++++++++++++++++++-- src/input.cpp | 16 +++++-- src/main.cpp | 3 +- src/system.cpp | 15 ++++++ src/system.h | 14 ++++-- src/types.h | 10 ++++ 10 files changed, 162 insertions(+), 20 deletions(-) diff --git a/src/Makefile b/src/Makefile index e763ae9..83df87c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,9 +1,10 @@ #Kokkos root path -KOKKOS_PATH = ${HOME}/kokkos +KOKKOS_PATH = ${HOME}/Kokkos/kokkos +KOKKOS_REMOTE_SPACES_PATH = ${HOME}/Kokkos/kokkos-remote-spaces #Backend and architecture configuration KOKKOS_DEVICES=OpenMP -KOKKOS_ARCH = "" +KOKKOS_ARCH = "None" #MPI On or off (1/0) MPI = 1 @@ -16,7 +17,7 @@ default: build EXE = ExaMiniMD -CXXFLAGS = -O3 -g +CXXFLAGS = -O3 -g -DKOKKOS_ENABLE_SHMEMSPACE LINKFLAGS = -O3 -g ifeq ($(MPI), 1) @@ -32,6 +33,11 @@ endif LINK = ${CXX} +#CXXFLAGS += -DSHMEMTESTS_USE_HALO +#CXXFLAGS += -DSHMEMTESTS_USE_HALO_LOCAL +#CXXFLAGS += -DSHMEMTESTS_USE_LOCAL_GLOBAL +CXXFLAGS += -DSHMEMTESTS_USE_GLOBAL + KOKKOS_OPTIONS=aggressive_vectorization DEPFLAGS = -M @@ -67,6 +73,11 @@ LIB = include $(KOKKOS_PATH)/Makefile.kokkos +EXTRA_INC += -I${KOKKOS_REMOTE_SPACES_PATH}/src -I${KOKKOS_REMOTE_SPACES_PATH}/src/SHMEM +vpath %.cpp ${KOKKOS_REMOTE_SPACES_PATH}/src/SHMEM +LIB += -L/home/projects/x86-64-haswell/openmpi/2.1.1/gcc/4.9.3/lib -loshmem +OBJ += Kokkos_SHMEMSpace.o + build: $(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) diff --git a/src/comm_types/comm_mpi.cpp b/src/comm_types/comm_mpi.cpp index 45628df..4780010 100644 --- a/src/comm_types/comm_mpi.cpp +++ b/src/comm_types/comm_mpi.cpp @@ -245,7 +245,9 @@ void CommMPI::exchange() { MPI_Irecv(unpack_buffer.data(),proc_num_recv[phase]*sizeof(Particle)/sizeof(int),MPI_INT, proc_neighbors_recv[phase],100002,MPI_COMM_WORLD,&request); if(proc_num_send[phase]>0) MPI_Send (pack_buffer.data(),proc_num_send[phase]*sizeof(Particle)/sizeof(int),MPI_INT, proc_neighbors_send[phase],100002,MPI_COMM_WORLD); - system->grow(N_local + N_ghost + count); + int global_n_max = N_local + N_ghost + count; + reduce_max_int(&global_n_max,1); + system->grow(global_n_max); s = *system; if(proc_num_recv[phase]>0) MPI_Wait(&request,&status); @@ -292,6 +294,10 @@ void CommMPI::exchange_halo() { Kokkos::Profiling::pushRegion("Comm::exchange_halo"); + Kokkos::parallel_for("CommMPI::halo_exchange_create_global_indicies", + Kokkos::RangePolicy >(0,N_local), + *this); + N_local = system->N_local; N_ghost = 0; @@ -332,7 +338,9 @@ void CommMPI::exchange_halo() { } MPI_Irecv(unpack_buffer.data(),proc_num_recv[phase]*sizeof(Particle)/sizeof(int),MPI_INT, proc_neighbors_recv[phase],100002,MPI_COMM_WORLD,&request); MPI_Send (pack_buffer.data(),proc_num_send[phase]*sizeof(Particle)/sizeof(int),MPI_INT, proc_neighbors_send[phase],100002,MPI_COMM_WORLD); - system->grow(N_local + N_ghost + count); + int global_n_max = N_local + N_ghost + count; + reduce_max_int(&global_n_max,1); + system->grow(global_n_max); s = *system; MPI_Wait(&request,&status); Kokkos::parallel_for("CommMPI::halo_exchange_unpack", @@ -346,8 +354,10 @@ void CommMPI::exchange_halo() { *this); Kokkos::deep_copy(count,pack_count); bool redo = false; - if(N_local+N_ghost+count>s.x.extent(0)) { - system->grow(N_local + N_ghost + count); + int global_n_max = N_local + N_ghost + count; + reduce_max_int(&global_n_max,1); + if(global_n_max>s.x.extent(0)) { + system->grow(global_n_max); s = *system; redo = true; } diff --git a/src/comm_types/comm_mpi.h b/src/comm_types/comm_mpi.h index 40f9f4c..754834f 100644 --- a/src/comm_types/comm_mpi.h +++ b/src/comm_types/comm_mpi.h @@ -115,6 +115,8 @@ class CommMPI: public Comm { struct TagPermuteIndexList {}; + struct TagCreateGlobalIndecies {}; + CommMPI(System* s, T_X_FLOAT comm_depth_); void init(); void create_domain_decomposition(); @@ -496,6 +498,12 @@ class CommMPI: public Comm { s.f(i, 2) += pack_buffer_update(ii, 2); } + KOKKOS_INLINE_FUNCTION + void operator() (const TagCreateGlobalIndecies, + const T_INT& i) const { + s.global_index(i) = N_MAX_MASK * proc_rank + i; + } + const char* name(); int process_rank(); int num_processes(); diff --git a/src/force_types/force_lj_neigh.h b/src/force_types/force_lj_neigh.h index 5737421..3c7f459 100644 --- a/src/force_types/force_lj_neigh.h +++ b/src/force_types/force_lj_neigh.h @@ -67,16 +67,24 @@ class ForceLJNeigh: public Force { private: int N_local,ntypes; t_x_const_rnd x; + t_x_shmem x_shmem; + t_x x_shmem_local; t_f f; t_f_atomic f_a; t_id id; + t_index global_index; t_type_const_rnd type; + + T_X_FLOAT domain_x, domain_y, domain_z; + int proc_rank; + Binning::t_bincount bin_count; Binning::t_binoffsets bin_offsets; T_INT nbinx,nbiny,nbinz,nhalo; int step; bool use_stackparams; + typedef Kokkos::View t_fparams; typedef Kokkos::View> t_fparams_rnd; @@ -115,6 +123,8 @@ class ForceLJNeigh: public Force { typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh_pe_stackparams; typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh_pe_stackparams; + struct TagCopyLocalXShmem {}; + ForceLJNeigh (char** args, System* system, bool half_neigh_); void init_coeff(int nargs, char** args); @@ -138,6 +148,9 @@ class ForceLJNeigh: public Force { KOKKOS_INLINE_FUNCTION void operator() (TagHalfNeighPE, const T_INT& i, T_V_FLOAT& PE) const; + KOKKOS_INLINE_FUNCTION + void operator() (TagCopyLocalXShmem, const T_INT& i) const; + const char* name(); }; diff --git a/src/force_types/force_lj_neigh_impl.h b/src/force_types/force_lj_neigh_impl.h index e6789c2..f1248d7 100644 --- a/src/force_types/force_lj_neigh_impl.h +++ b/src/force_types/force_lj_neigh_impl.h @@ -105,10 +105,21 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig N_local = system->N_local; x = system->x; + x_shmem = system->x_shmem; + x_shmem_local = t_x(x_shmem.data(),x_shmem.extent(1)); f = system->f; f_a = system->f; type = system->type; id = system->id; + global_index = system->global_index; + + domain_x = system->domain_x; + domain_y = system->domain_y; + domain_z = system->domain_z; + + Kokkos::parallel_for("ForceLJNeigh::compute_fill_xshmem", Kokkos::RangePolicy(0,system->N_local), *this); + //Kokkos::SHMEMSpace::fence(); + shmem_barrier_all(); if (use_stackparams) { if(half_neigh) Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_half_neigh_stackparams(0, system->N_local), *this); @@ -121,7 +132,10 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_full_neigh(0, system->N_local), *this); } Kokkos::fence(); + shmem_barrier_all(); + //Kokkos::SHMEMSpace::fence(); + x_shmem = t_x_shmem(); step++; } @@ -130,7 +144,7 @@ T_V_FLOAT ForceLJNeigh::compute_energy(System* system, Binning* b // Set internal data handles NeighborClass* neighbor = (NeighborClass*) neighbor_; neigh_list = neighbor->get_neigh_list(); - + MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); N_local = system->N_local; x = system->x; f = system->f; @@ -177,10 +191,45 @@ void ForceLJNeigh::operator() (TagFullNeigh, const T for(int jj = 0; jj < num_neighs; jj++) { T_INT j = neighs_i(jj); - const T_F_FLOAT dx = x_i - x(j,0); - const T_F_FLOAT dy = y_i - x(j,1); - const T_F_FLOAT dz = z_i - x(j,2); - + //printf("Neigh: %i %i %li %li %i\n",i,j,global_index(i),global_index(j),j>N_local?1:0); + //const T_F_FLOAT dx = x_i - x(j,0); + //const T_F_FLOAT dy = y_i - x(j,1); + //const T_F_FLOAT dz = z_i - x(j,2); + + const T_INDEX jg = global_index(j); + #ifdef SHMEMTESTS_USE_HALO + const T_X_FLOAT xj_shmem = x(j,0);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = x(j,1);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = x(j,2);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_HALO_LOCAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x(j,0):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x(j,1):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x(j,2):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_LOCAL_GLOBAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+0]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+1]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+2]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_GLOBAL + const T_X_FLOAT xj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + + T_F_FLOAT dx = abs(x_i - xj_shmem)>domain_x/2? + (x_i-xj_shmem<0?x_i-xj_shmem+domain_x:x_i-xj_shmem-domain_x) + :x_i-xj_shmem; + T_F_FLOAT dy = abs(y_i - yj_shmem)>domain_y/2? + (y_i-yj_shmem<0?y_i-yj_shmem+domain_y:y_i-yj_shmem-domain_y) + :y_i-yj_shmem; + T_F_FLOAT dz = abs(z_i - zj_shmem)>domain_z/2? + (z_i-zj_shmem<0?z_i-zj_shmem+domain_z:z_i-zj_shmem-domain_z) + :z_i-zj_shmem; + +// if((abs(dx_shmem-dx)>1e-10) || (abs(dy_shmem-dy)>1e-10) || (abs(dz_shmem-dz)>1e-10)) +// printf("Neigh: %i %i %li %li %i : %lf %lf %lf %lf %lf\n",i,j,global_index(i),global_index(j),j>N_local?1:0,x(j,0),xj_shmem,domain_x,dx,dx_shmem); const int type_j = type(j); const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; @@ -341,3 +390,12 @@ void ForceLJNeigh::operator() (TagHalfNeighPE, const } } + +template +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagCopyLocalXShmem, const T_INT& i) const { + x_shmem_local(i,0) = x(i,0); + x_shmem_local(i,1) = x(i,1); + x_shmem_local(i,2) = x(i,2); +} + diff --git a/src/input.cpp b/src/input.cpp index 579d87f..b38459b 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -508,7 +508,9 @@ void Input::create_lattice(Comm* comm) { } system->N_local = n; system->N = n; - system->grow(n); + int global_n_max = n; + comm->reduce_max_int(&global_n_max,1); + system->grow(global_n_max); s = *system; h_x = Kokkos::create_mirror_view(s.x); h_v = Kokkos::create_mirror_view(s.v); @@ -538,7 +540,9 @@ void Input::create_lattice(Comm* comm) { } } } - system->grow(n); + global_n_max = n; + comm->reduce_max_int(&global_n_max,1); + system->grow(global_n_max); System s = *system; h_x = Kokkos::create_mirror_view(s.x); h_v = Kokkos::create_mirror_view(s.v); @@ -640,7 +644,9 @@ void Input::create_lattice(Comm* comm) { system->N_local = n; system->N = n; - system->grow(n); + int global_n_max = n; + comm->reduce_max_int(&global_n_max,1); + system->grow(global_n_max); s = *system; h_x = Kokkos::create_mirror_view(s.x); h_v = Kokkos::create_mirror_view(s.v); @@ -672,7 +678,9 @@ void Input::create_lattice(Comm* comm) { } } } - system->grow(n); + global_n_max = n; + comm->reduce_max_int(&global_n_max,1); + system->grow(global_n_max); System s = *system; h_x = Kokkos::create_mirror_view(s.x); h_v = Kokkos::create_mirror_view(s.v); diff --git a/src/main.cpp b/src/main.cpp index 98c82d8..c002b1f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -49,6 +49,7 @@ int main(int argc, char* argv[]) { #ifdef EXAMINIMD_ENABLE_MPI MPI_Init(&argc,&argv); + shmem_init(); #endif Kokkos::initialize(argc,argv); @@ -67,7 +68,7 @@ int main(int argc, char* argv[]) { Kokkos::finalize(); #ifdef EXAMINIMD_ENABLE_MPI - MPI_Finalize(); + //MPI_Finalize(); #endif } diff --git a/src/system.cpp b/src/system.cpp index b478aa9..e695cf9 100644 --- a/src/system.cpp +++ b/src/system.cpp @@ -50,6 +50,7 @@ System::System() { v = t_v(); f = t_f(); id = t_id(); + global_index = t_index(); type = t_type(); q = t_q(); mass = t_mass(); @@ -73,6 +74,7 @@ void System::init() { v = t_v("System::v",N_max); f = t_f("System::f",N_max); id = t_id("System::id",N_max); + global_index = t_index("System::global_index",N_max); type = t_type("System::type",N_max); q = t_q("System::q",N_max); mass = t_mass("System::mass",ntypes); @@ -87,6 +89,7 @@ void System::destroy() { v = t_v(); f = t_f(); id = t_id(); + global_index = t_index(); type = t_type(); q = t_q(); mass = t_mass(); @@ -101,10 +104,22 @@ void System::grow(T_INT N_new) { Kokkos::resize(f,N_max); // Forces Kokkos::resize(id,N_max); // Id + Kokkos::resize(global_index,N_max); // Id Kokkos::resize(type,N_max); // Particle Type Kokkos::resize(q,N_max); // Charge + +#ifdef EXAMINIMD_ENABLE_MPI + int num_ranks; + MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); +#else + int num_ranks = 1; +#endif + int* rank_list = new int[num_ranks]; + for(int i=0; i("X_shmem",num_ranks,rank_list,N_max); } } diff --git a/src/system.h b/src/system.h index 017d42d..5c47b22 100644 --- a/src/system.h +++ b/src/system.h @@ -45,12 +45,14 @@ struct Particle { Particle() { x=y=z=vx=vy=vz=mass=q=0.0; id = type = 0; + global_index = 0; } T_X_FLOAT x,y,z; T_V_FLOAT vx,vy,vz,mass; T_FLOAT q; T_INT id; + T_INDEX global_index; int type; }; @@ -68,11 +70,14 @@ class System { t_v v; // Velocities t_f f; // Forces - t_type type; // Particle Type - t_id id; // Particle ID - + t_type type; // Particle Type + t_id id; // Particle ID + t_index global_index; // Index for PGAS indexing + t_q q; // Charge + t_x_shmem x_shmem; + // Per Type Property t_mass mass; @@ -107,6 +112,7 @@ class System { p.q = q(i); p.id = id(i); p.type = type(i); + p.global_index = global_index(i); return p; } @@ -117,6 +123,7 @@ class System { q(i) = p.q; id(i) = p.id; type(i) = p.type; + global_index(i) = p.global_index; } KOKKOS_INLINE_FUNCTION @@ -130,6 +137,7 @@ class System { type(dest) = type(src); id(dest) = id(src); q(dest) = q(src); + global_index(dest) = global_index(src); } KOKKOS_INLINE_FUNCTION diff --git a/src/types.h b/src/types.h index 4d70c17..4788198 100644 --- a/src/types.h +++ b/src/types.h @@ -39,6 +39,7 @@ #ifndef TYPES_H #define TYPES_H #include +#include // Module Types etc // Units to be used @@ -60,6 +61,8 @@ enum {NEIGH_NONE, NEIGH_CSR, NEIGH_CSR_MAPCONSTR, NEIGH_2D}; // Input File Type enum {INPUT_LAMMPS}; +enum INDEX_TYPE: int64_t { N_MAX_MASK = 1024*1024*1024 }; + // Macros to work around the fact that std::max/min is not available on GPUs #define MAX(a,b) (a>b?a:b) #define MIN(a,b) (a t_x; // Positions typedef Kokkos::View t_x_const; // Positions typedef Kokkos::View> t_x_const_rnd; // Positions +typedef Kokkos::View t_x_shmem; // PGAS Positions typedef Kokkos::View t_v; // Velocities typedef Kokkos::View t_f; // Force typedef Kokkos::View> t_type_const_rnd; // Type (int is enough as type) typedef Kokkos::View t_id; // ID typedef Kokkos::View t_id_const; // ID +typedef Kokkos::View t_index; // ID +typedef Kokkos::View t_index_const; // ID typedef Kokkos::View t_q; // Charge typedef Kokkos::View t_q_const; // Charge From 85f41f25ebdbda06ac8fae53f1a61f11c68720f2 Mon Sep 17 00:00:00 2001 From: crtrott Date: Fri, 7 Sep 2018 12:47:39 -0600 Subject: [PATCH 27/36] Update RemoteSpaces Stuff --- input/in.lj | 2 +- src/Makefile | 12 ++++-------- src/force_types/force_lj_neigh_impl.h | 4 ++-- src/main.cpp | 2 ++ src/types.h | 2 +- 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/input/in.lj b/input/in.lj index 85725b8..95c93a0 100644 --- a/input/in.lj +++ b/input/in.lj @@ -5,7 +5,7 @@ atom_style atomic newton off lattice fcc 0.8442 -region box block 0 40 0 40 0 40 +region box block 0 10 0 10 0 10 create_box 1 box create_atoms 1 box mass 1 2.0 diff --git a/src/Makefile b/src/Makefile index 83df87c..428c3c7 100644 --- a/src/Makefile +++ b/src/Makefile @@ -17,7 +17,7 @@ default: build EXE = ExaMiniMD -CXXFLAGS = -O3 -g -DKOKKOS_ENABLE_SHMEMSPACE +CXXFLAGS = -O3 -g LINKFLAGS = -O3 -g ifeq ($(MPI), 1) @@ -34,9 +34,9 @@ endif LINK = ${CXX} #CXXFLAGS += -DSHMEMTESTS_USE_HALO -#CXXFLAGS += -DSHMEMTESTS_USE_HALO_LOCAL +CXXFLAGS += -DSHMEMTESTS_USE_HALO_LOCAL #CXXFLAGS += -DSHMEMTESTS_USE_LOCAL_GLOBAL -CXXFLAGS += -DSHMEMTESTS_USE_GLOBAL +#CXXFLAGS += -DSHMEMTESTS_USE_GLOBAL KOKKOS_OPTIONS=aggressive_vectorization @@ -72,11 +72,7 @@ OBJ = $(SRC_NOTDIR:.cpp=.o) LIB = include $(KOKKOS_PATH)/Makefile.kokkos - -EXTRA_INC += -I${KOKKOS_REMOTE_SPACES_PATH}/src -I${KOKKOS_REMOTE_SPACES_PATH}/src/SHMEM -vpath %.cpp ${KOKKOS_REMOTE_SPACES_PATH}/src/SHMEM -LIB += -L/home/projects/x86-64-haswell/openmpi/2.1.1/gcc/4.9.3/lib -loshmem -OBJ += Kokkos_SHMEMSpace.o +include $(KOKKOS_REMOTE_SPACES_PATH)/Makefile.kokkos-remote-spaces build: $(EXE) diff --git a/src/force_types/force_lj_neigh_impl.h b/src/force_types/force_lj_neigh_impl.h index f1248d7..27ae244 100644 --- a/src/force_types/force_lj_neigh_impl.h +++ b/src/force_types/force_lj_neigh_impl.h @@ -119,7 +119,7 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig Kokkos::parallel_for("ForceLJNeigh::compute_fill_xshmem", Kokkos::RangePolicy(0,system->N_local), *this); //Kokkos::SHMEMSpace::fence(); - shmem_barrier_all(); + Kokkos::DefaultRemoteMemorySpace().fence();; if (use_stackparams) { if(half_neigh) Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_half_neigh_stackparams(0, system->N_local), *this); @@ -132,7 +132,7 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_full_neigh(0, system->N_local), *this); } Kokkos::fence(); - shmem_barrier_all(); + Kokkos::DefaultRemoteMemorySpace().fence();; //Kokkos::SHMEMSpace::fence(); x_shmem = t_x_shmem(); diff --git a/src/main.cpp b/src/main.cpp index c002b1f..342797b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -49,6 +49,8 @@ int main(int argc, char* argv[]) { #ifdef EXAMINIMD_ENABLE_MPI MPI_Init(&argc,&argv); + #endif + #ifdef KOKKOS_ENABLE_SHMEMSPACE shmem_init(); #endif diff --git a/src/types.h b/src/types.h index 4788198..fd21d5d 100644 --- a/src/types.h +++ b/src/types.h @@ -96,7 +96,7 @@ typedef Kokkos::View t_x; // P typedef Kokkos::View t_x_const; // Positions typedef Kokkos::View> t_x_const_rnd; // Positions -typedef Kokkos::View t_x_shmem; // PGAS Positions +typedef Kokkos::View t_x_shmem; // PGAS Positions typedef Kokkos::View t_v; // Velocities typedef Kokkos::View t_f; // Force typedef Kokkos::View Date: Tue, 11 Sep 2018 20:33:09 -0600 Subject: [PATCH 28/36] Update init logic --- src/main.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/main.cpp b/src/main.cpp index 342797b..53154ab 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -44,15 +44,23 @@ #ifdef EXAMINIMD_ENABLE_MPI #include "mpi.h" #endif +#include int main(int argc, char* argv[]) { #ifdef EXAMINIMD_ENABLE_MPI MPI_Init(&argc,&argv); #endif + #ifdef KOKKOS_ENABLE_NVSHMEM + shmemx_init_attr_t attr; + auto mpi_comm = MPI_COMM_WORLD; + attr.mpi_comm = &mpi_comm; + shmemx_init_attr (SHMEMX_INIT_WITH_MPI_COMM, &attr); + #endif #ifdef KOKKOS_ENABLE_SHMEMSPACE shmem_init(); #endif + #endif Kokkos::initialize(argc,argv); From da3cc0acba079ff266ffd521ef774351c26905b1 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Wed, 12 Sep 2018 17:47:34 -0600 Subject: [PATCH 29/36] Working NVSHMEM --- input/in.lj | 10 +++++----- src/Makefile | 4 ++-- src/comm_types/comm_mpi.cpp | 1 - src/force_types/force_lj_neigh_impl.h | 7 +++++-- src/main.cpp | 3 +-- src/system.cpp | 5 ++++- 6 files changed, 17 insertions(+), 13 deletions(-) diff --git a/input/in.lj b/input/in.lj index 95c93a0..a90f7ec 100644 --- a/input/in.lj +++ b/input/in.lj @@ -5,19 +5,19 @@ atom_style atomic newton off lattice fcc 0.8442 -region box block 0 10 0 10 0 10 +region box block 0 40 0 40 0 40 create_box 1 box create_atoms 1 box mass 1 2.0 velocity all create 1.4 87287 loop geom -pair_style lj/cut 2.5 -pair_coeff 1 1 1.0 1.0 2.5 +pair_style lj/cut 1.5 +pair_coeff 1 1 1.0 1.0 1.5 neighbor 0.3 bin neigh_modify delay 0 every 20 check no fix 1 all nve -thermo 10 +thermo 100 -run 100 +run 1000 diff --git a/src/Makefile b/src/Makefile index 428c3c7..932d62d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -34,9 +34,9 @@ endif LINK = ${CXX} #CXXFLAGS += -DSHMEMTESTS_USE_HALO -CXXFLAGS += -DSHMEMTESTS_USE_HALO_LOCAL +#CXXFLAGS += -DSHMEMTESTS_USE_HALO_LOCAL #CXXFLAGS += -DSHMEMTESTS_USE_LOCAL_GLOBAL -#CXXFLAGS += -DSHMEMTESTS_USE_GLOBAL +CXXFLAGS += -DSHMEMTESTS_USE_GLOBAL KOKKOS_OPTIONS=aggressive_vectorization diff --git a/src/comm_types/comm_mpi.cpp b/src/comm_types/comm_mpi.cpp index 4780010..059317e 100644 --- a/src/comm_types/comm_mpi.cpp +++ b/src/comm_types/comm_mpi.cpp @@ -390,7 +390,6 @@ void CommMPI::exchange_halo() { }; void CommMPI::update_halo() { - Kokkos::Profiling::pushRegion("Comm::update_halo"); N_ghost = 0; diff --git a/src/force_types/force_lj_neigh_impl.h b/src/force_types/force_lj_neigh_impl.h index 27ae244..8cbddc3 100644 --- a/src/force_types/force_lj_neigh_impl.h +++ b/src/force_types/force_lj_neigh_impl.h @@ -116,9 +116,11 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig domain_x = system->domain_x; domain_y = system->domain_y; domain_z = system->domain_z; - + Kokkos::fence(); + Kokkos::DefaultRemoteMemorySpace().fence();; Kokkos::parallel_for("ForceLJNeigh::compute_fill_xshmem", Kokkos::RangePolicy(0,system->N_local), *this); //Kokkos::SHMEMSpace::fence(); + Kokkos::fence(); Kokkos::DefaultRemoteMemorySpace().fence();; if (use_stackparams) { if(half_neigh) @@ -217,7 +219,7 @@ void ForceLJNeigh::operator() (TagFullNeigh, const T const T_X_FLOAT yj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); const T_X_FLOAT zj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); #endif - + //printf("DATA: %i %i %i %lf %lf %lf %i %li %i\n",id(i),jj,(int)jg,xj_shmem,yj_shmem,zj_shmem,(int)jg/N_MAX_MASK,N_MAX_MASK,int(jg%N_MAX_MASK)); T_F_FLOAT dx = abs(x_i - xj_shmem)>domain_x/2? (x_i-xj_shmem<0?x_i-xj_shmem+domain_x:x_i-xj_shmem-domain_x) :x_i-xj_shmem; @@ -394,6 +396,7 @@ void ForceLJNeigh::operator() (TagHalfNeighPE, const template KOKKOS_INLINE_FUNCTION void ForceLJNeigh::operator() (TagCopyLocalXShmem, const T_INT& i) const { + //printf("CopyLocal: %i %lf %lf %lf\n",i,x(i,0),x(i,1),x(i,2)); x_shmem_local(i,0) = x(i,0); x_shmem_local(i,1) = x(i,1); x_shmem_local(i,2) = x(i,2); diff --git a/src/main.cpp b/src/main.cpp index 53154ab..ce19b22 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -50,8 +50,7 @@ int main(int argc, char* argv[]) { #ifdef EXAMINIMD_ENABLE_MPI MPI_Init(&argc,&argv); - #endif - #ifdef KOKKOS_ENABLE_NVSHMEM + #ifdef KOKKOS_ENABLE_NVSHMEMSPACE shmemx_init_attr_t attr; auto mpi_comm = MPI_COMM_WORLD; attr.mpi_comm = &mpi_comm; diff --git a/src/system.cpp b/src/system.cpp index e695cf9..7a57389 100644 --- a/src/system.cpp +++ b/src/system.cpp @@ -109,7 +109,7 @@ void System::grow(T_INT N_new) { Kokkos::resize(type,N_max); // Particle Type Kokkos::resize(q,N_max); // Charge - +{ #ifdef EXAMINIMD_ENABLE_MPI int num_ranks; MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); @@ -119,7 +119,10 @@ void System::grow(T_INT N_new) { int* rank_list = new int[num_ranks]; for(int i=0; i("X_shmem",num_ranks,rank_list,N_max); +} + } } From cf54b44cda193cd26de10ab8b69c13fa72316c8e Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Wed, 12 Sep 2018 19:56:01 -0600 Subject: [PATCH 30/36] Skip update_halo when using remote memory spaces --- src/comm_types/comm_mpi.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/comm_types/comm_mpi.cpp b/src/comm_types/comm_mpi.cpp index 059317e..d8bab7e 100644 --- a/src/comm_types/comm_mpi.cpp +++ b/src/comm_types/comm_mpi.cpp @@ -390,6 +390,9 @@ void CommMPI::exchange_halo() { }; void CommMPI::update_halo() { +#ifndef -DSHMEMTESTS_USE_HALO + return; +#else Kokkos::Profiling::pushRegion("Comm::update_halo"); N_ghost = 0; @@ -429,6 +432,7 @@ void CommMPI::update_halo() { } Kokkos::Profiling::popRegion(); +#endif }; void CommMPI::update_force() { From 5ef0acb688d9212a1007e396e10557705c488221 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Thu, 13 Sep 2018 09:32:05 -0600 Subject: [PATCH 31/36] RemoteSpaces: add double3 variant and SNAP support --- input/in.lj | 2 +- input/snap/in.snap.W | 4 +- src/Makefile | 1 + src/comm_types/comm_mpi.cpp | 2 +- src/force_types/force_lj_neigh.h | 2 +- src/force_types/force_lj_neigh_impl.h | 18 +++- src/force_types/force_snap_neigh.h | 11 ++- src/force_types/force_snap_neigh_impl.h | 125 ++++++++++++++++++++++-- src/types.h | 6 ++ 9 files changed, 154 insertions(+), 17 deletions(-) diff --git a/input/in.lj b/input/in.lj index a90f7ec..1c64c0a 100644 --- a/input/in.lj +++ b/input/in.lj @@ -5,7 +5,7 @@ atom_style atomic newton off lattice fcc 0.8442 -region box block 0 40 0 40 0 40 +region box block 0 80 0 80 0 80 create_box 1 box create_atoms 1 box mass 1 2.0 diff --git a/input/snap/in.snap.W b/input/snap/in.snap.W index b19472d..9e77c54 100644 --- a/input/snap/in.snap.W +++ b/input/snap/in.snap.W @@ -8,7 +8,7 @@ newton on # generate the box and atom positions using a BCC lattice lattice sc 3.1803 -region box block 0 4 0 8 0 8 +region box block 0 8 0 8 0 8 create_box 1 box create_atoms 1 box @@ -27,7 +27,7 @@ thermo 10 # Set up NVE run neighbor 1.0 bin -neigh_modify once no every 1 delay 0 check yes +neigh_modify once no every 20 delay 0 check yes # Run MD diff --git a/src/Makefile b/src/Makefile index 932d62d..6be8818 100644 --- a/src/Makefile +++ b/src/Makefile @@ -33,6 +33,7 @@ endif LINK = ${CXX} +CXXFLAGS += -DSHMEMTESTS_USE_SCALAR #CXXFLAGS += -DSHMEMTESTS_USE_HALO #CXXFLAGS += -DSHMEMTESTS_USE_HALO_LOCAL #CXXFLAGS += -DSHMEMTESTS_USE_LOCAL_GLOBAL diff --git a/src/comm_types/comm_mpi.cpp b/src/comm_types/comm_mpi.cpp index d8bab7e..66bd7fe 100644 --- a/src/comm_types/comm_mpi.cpp +++ b/src/comm_types/comm_mpi.cpp @@ -390,7 +390,7 @@ void CommMPI::exchange_halo() { }; void CommMPI::update_halo() { -#ifndef -DSHMEMTESTS_USE_HALO +#ifndef SHMEMTESTS_USE_HALO return; #else Kokkos::Profiling::pushRegion("Comm::update_halo"); diff --git a/src/force_types/force_lj_neigh.h b/src/force_types/force_lj_neigh.h index 3c7f459..0927d90 100644 --- a/src/force_types/force_lj_neigh.h +++ b/src/force_types/force_lj_neigh.h @@ -68,7 +68,7 @@ class ForceLJNeigh: public Force { int N_local,ntypes; t_x_const_rnd x; t_x_shmem x_shmem; - t_x x_shmem_local; + t_x_shmem_local x_shmem_local; t_f f; t_f_atomic f_a; t_id id; diff --git a/src/force_types/force_lj_neigh_impl.h b/src/force_types/force_lj_neigh_impl.h index 8cbddc3..2003789 100644 --- a/src/force_types/force_lj_neigh_impl.h +++ b/src/force_types/force_lj_neigh_impl.h @@ -106,7 +106,7 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig N_local = system->N_local; x = system->x; x_shmem = system->x_shmem; - x_shmem_local = t_x(x_shmem.data(),x_shmem.extent(1)); + x_shmem_local = t_x_shmem_local(x_shmem.data(),x_shmem.extent(1)); f = system->f; f_a = system->f; type = system->type; @@ -137,7 +137,7 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig Kokkos::DefaultRemoteMemorySpace().fence();; //Kokkos::SHMEMSpace::fence(); - x_shmem = t_x_shmem(); + //x_shmem = t_x_shmem(); step++; } @@ -199,6 +199,7 @@ void ForceLJNeigh::operator() (TagFullNeigh, const T //const T_F_FLOAT dz = z_i - x(j,2); const T_INDEX jg = global_index(j); + #ifdef SHMEMTESTS_USE_SCALAR #ifdef SHMEMTESTS_USE_HALO const T_X_FLOAT xj_shmem = x(j,0);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); const T_X_FLOAT yj_shmem = x(j,1);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); @@ -219,6 +220,14 @@ void ForceLJNeigh::operator() (TagFullNeigh, const T const T_X_FLOAT yj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); const T_X_FLOAT zj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); #endif + #else + #ifdef SHMEMTESTS_USE_GLOBAL + const double3 posj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK); + const T_X_FLOAT xj_shmem = posj_shmem.x; + const T_X_FLOAT yj_shmem = posj_shmem.y; + const T_X_FLOAT zj_shmem = posj_shmem.z; + #endif + #endif //printf("DATA: %i %i %i %lf %lf %lf %i %li %i\n",id(i),jj,(int)jg,xj_shmem,yj_shmem,zj_shmem,(int)jg/N_MAX_MASK,N_MAX_MASK,int(jg%N_MAX_MASK)); T_F_FLOAT dx = abs(x_i - xj_shmem)>domain_x/2? (x_i-xj_shmem<0?x_i-xj_shmem+domain_x:x_i-xj_shmem-domain_x) @@ -397,8 +406,13 @@ template KOKKOS_INLINE_FUNCTION void ForceLJNeigh::operator() (TagCopyLocalXShmem, const T_INT& i) const { //printf("CopyLocal: %i %lf %lf %lf\n",i,x(i,0),x(i,1),x(i,2)); + #ifdef SHMEMTESTS_USE_SCALAR x_shmem_local(i,0) = x(i,0); x_shmem_local(i,1) = x(i,1); x_shmem_local(i,2) = x(i,2); + #else + double3 pos = {x(i,0),x(i,1),x(i,2)}; + x_shmem_local(i) = pos; + #endif } diff --git a/src/force_types/force_snap_neigh.h b/src/force_types/force_snap_neigh.h index d6f4e1e..53b2b16 100644 --- a/src/force_types/force_snap_neigh.h +++ b/src/force_types/force_snap_neigh.h @@ -170,14 +170,23 @@ class ForceSNAP : public Force { t_x x; + t_x_shmem x_shmem; + t_x_shmem_local x_shmem_local; t_f_atomic f; t_type type; + t_index global_index; + T_X_FLOAT domain_x, domain_y, domain_z; + int proc_rank; public: + struct TagForceCompute {}; KOKKOS_INLINE_FUNCTION - void operator() (const Kokkos::TeamPolicy<>::member_type& team) const; + void operator() (TagForceCompute, const Kokkos::TeamPolicy<>::member_type& team) const; + struct TagCopyLocalXShmem {}; + KOKKOS_INLINE_FUNCTION + void operator() (TagCopyLocalXShmem, const T_INT& i) const; }; #define FORCE_MODULES_EXTERNAL_TEMPLATE diff --git a/src/force_types/force_snap_neigh_impl.h b/src/force_types/force_snap_neigh_impl.h index eb89ed3..e3fdc50 100644 --- a/src/force_types/force_snap_neigh_impl.h +++ b/src/force_types/force_snap_neigh_impl.h @@ -164,6 +164,12 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo Kokkos::abort("ForceSNAP requires 'newton on'"); x = system->x; f = system->f; + x_shmem = system->x_shmem; + x_shmem_local = t_x_shmem_local(x_shmem.data(),x_shmem.extent(1)); + domain_x = system->domain_x; + domain_y = system->domain_y; + domain_z = system->domain_z; + global_index = system->global_index; type = system->type; int nlocal = system->N_local; @@ -178,7 +184,12 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo const int num_neighs = neighs_i.get_num_neighs(); if(max_neighs(neigh_list), Kokkos::Max(max_neighs)); + + Kokkos::DefaultRemoteMemorySpace().fence();; + Kokkos::parallel_for("ForceSNAPNeigh::compute_fill_xshmem", Kokkos::RangePolicy(0,system->N_local), *this); + Kokkos::DefaultRemoteMemorySpace().fence();; + + Kokkos::parallel_reduce("ForceSNAP::find_max_neighs",nlocal, FindMaxNumNeighs(neigh_list), Kokkos::Experimental::Max(max_neighs)); sna.nmax = max_neighs; @@ -186,6 +197,7 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo T_INT thread_scratch_size = sna.size_thread_scratch_arrays(); //printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024); + int team_size_max = Kokkos::TeamPolicy::team_size_max(*this); int vector_length = 8; int team_size_max = Kokkos::TeamPolicy<>(nlocal,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag()); #ifdef EMD_ENABLE_GPU @@ -195,12 +207,14 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo #else int team_size = 1; #endif - Kokkos::TeamPolicy<> policy(nlocal,team_size,vector_length); + Kokkos::TeamPolicy policy(nlocal,team_size,vector_length); Kokkos::parallel_for("ForceSNAP::compute",policy .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size)) .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) ,*this); + Kokkos::fence(); + Kokkos::DefaultRemoteMemorySpace().fence();; //static int step =0; //step++; //if(step%10==0) @@ -588,7 +602,7 @@ void ForceSNAP::read_files(char *coefffilename, char *paramfilena template KOKKOS_INLINE_FUNCTION -void ForceSNAP::operator() (const Kokkos::TeamPolicy<>::member_type& team) const { +void ForceSNAP::operator() (TagForceCompute, const Kokkos::TeamPolicy<>::member_type& team) const { const int i = team.league_rank(); SNA my_sna(sna,team); const double x_i = x(i,0); @@ -614,9 +628,49 @@ void ForceSNAP::operator() (const Kokkos::TeamPolicy<>::member_ty [&] (const int jj, int& count) { Kokkos::single(Kokkos::PerThread(team), [&] (){ T_INT j = neighs_i(jj); - const T_F_FLOAT dx = x(j,0) - x_i; - const T_F_FLOAT dy = x(j,1) - y_i; - const T_F_FLOAT dz = x(j,2) - z_i; + const T_INDEX jg = global_index(j); + #ifdef SHMEMTESTS_USE_SCALAR + #ifdef SHMEMTESTS_USE_HALO + const T_X_FLOAT xj_shmem = x(j,0);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = x(j,1);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = x(j,2);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_HALO_LOCAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x(j,0):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x(j,1):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x(j,2):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_LOCAL_GLOBAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+0]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+1]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+2]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_GLOBAL + const T_X_FLOAT xj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #else + #ifdef SHMEMTESTS_USE_GLOBAL + const double3 posj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK); + const T_X_FLOAT xj_shmem = posj_shmem.x; + const T_X_FLOAT yj_shmem = posj_shmem.y; + const T_X_FLOAT zj_shmem = posj_shmem.z; + #endif + #endif + + T_F_FLOAT dx = -(abs(x_i - xj_shmem)>domain_x/2? + (x_i-xj_shmem<0?x_i-xj_shmem+domain_x:x_i-xj_shmem-domain_x) + :x_i-xj_shmem); + T_F_FLOAT dy = -(abs(y_i - yj_shmem)>domain_y/2? + (y_i-yj_shmem<0?y_i-yj_shmem+domain_y:y_i-yj_shmem-domain_y) + :y_i-yj_shmem); + T_F_FLOAT dz = -(abs(z_i - zj_shmem)>domain_z/2? + (z_i-zj_shmem<0?z_i-zj_shmem+domain_z:z_i-zj_shmem-domain_z) + :z_i-zj_shmem); + //const T_F_FLOAT dx = xj_shmem - x_i; + //const T_F_FLOAT dy = yj_shmem - y_i; + //const T_F_FLOAT dz = zj_shmem - z_i; const int type_j = type(j); const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; @@ -634,9 +688,48 @@ void ForceSNAP::operator() (const Kokkos::TeamPolicy<>::member_ty [&] (const int jj, int& offset, bool final){ //for (int jj = 0; jj < num_neighs; jj++) { T_INT j = neighs_i(jj); - const T_F_FLOAT dx = x(j,0) - x_i; - const T_F_FLOAT dy = x(j,1) - y_i; - const T_F_FLOAT dz = x(j,2) - z_i; + const T_INDEX jg = global_index(j); + #ifdef SHMEMTESTS_USE_SCALAR + #ifdef SHMEMTESTS_USE_HALO + const T_X_FLOAT xj_shmem = x(j,0);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = x(j,1);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = x(j,2);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_HALO_LOCAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x(j,0):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x(j,1):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x(j,2):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_LOCAL_GLOBAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+0]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+1]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+2]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_GLOBAL + const T_X_FLOAT xj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #else + #ifdef SHMEMTESTS_USE_GLOBAL + const double3 posj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK); + const T_X_FLOAT xj_shmem = posj_shmem.x; + const T_X_FLOAT yj_shmem = posj_shmem.y; + const T_X_FLOAT zj_shmem = posj_shmem.z; + #endif + #endif + T_F_FLOAT dx = -(abs(x_i - xj_shmem)>domain_x/2? + (x_i-xj_shmem<0?x_i-xj_shmem+domain_x:x_i-xj_shmem-domain_x) + :x_i-xj_shmem); + T_F_FLOAT dy = -(abs(y_i - yj_shmem)>domain_y/2? + (y_i-yj_shmem<0?y_i-yj_shmem+domain_y:y_i-yj_shmem-domain_y) + :y_i-yj_shmem); + T_F_FLOAT dz = -(abs(z_i - zj_shmem)>domain_z/2? + (z_i-zj_shmem<0?z_i-zj_shmem+domain_z:z_i-zj_shmem-domain_z) + :z_i-zj_shmem); + //const T_F_FLOAT dx = xj_shmem - x_i; + //const T_F_FLOAT dy = yj_shmem - y_i; + //const T_F_FLOAT dz = zj_shmem - z_i; const int type_j = type(j); const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; @@ -723,3 +816,17 @@ void ForceSNAP::operator() (const Kokkos::TeamPolicy<>::member_ty }); //t5 += timer.seconds(); timer.reset(); } + +template +KOKKOS_INLINE_FUNCTION +void ForceSNAP::operator() (TagCopyLocalXShmem, const T_INT& i) const { + #ifdef SHMEMTESTS_USE_SCALAR + x_shmem_local(i,0) = x(i,0); + x_shmem_local(i,1) = x(i,1); + x_shmem_local(i,2) = x(i,2); + #else + double3 pos = {x(i,0),x(i,1),x(i,2)}; + x_shmem_local(i) = pos; + #endif +} + diff --git a/src/types.h b/src/types.h index fd21d5d..ccbb8a3 100644 --- a/src/types.h +++ b/src/types.h @@ -96,7 +96,13 @@ typedef Kokkos::View t_x; // P typedef Kokkos::View t_x_const; // Positions typedef Kokkos::View> t_x_const_rnd; // Positions +#ifdef SHMEMTESTS_USE_SCALAR typedef Kokkos::View t_x_shmem; // PGAS Positions +typedef Kokkos::View t_x_shmem_local; // Local PGAS Positions +#else +typedef Kokkos::View t_x_shmem; // PGAS Positions +typedef Kokkos::View t_x_shmem_local; // Local PGAS Positions +#endif typedef Kokkos::View t_v; // Velocities typedef Kokkos::View t_f; // Force typedef Kokkos::View Date: Thu, 13 Sep 2018 14:56:16 -0600 Subject: [PATCH 32/36] Fixes for QUO Space --- src/force_types/force_lj_neigh_impl.h | 6 ++++++ src/force_types/force_snap_neigh_impl.h | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/src/force_types/force_lj_neigh_impl.h b/src/force_types/force_lj_neigh_impl.h index 2003789..0d9d42d 100644 --- a/src/force_types/force_lj_neigh_impl.h +++ b/src/force_types/force_lj_neigh_impl.h @@ -51,6 +51,7 @@ ForceLJNeigh::ForceLJNeigh(char** args, System* system, bool half N_local = 0; nhalo = 0; step = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); } template @@ -106,7 +107,12 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig N_local = system->N_local; x = system->x; x_shmem = system->x_shmem; + +#ifdef KOKKOS_ENABLE_QUOSPACE + x_shmem_local = t_x_shmem_local(&x_shmem.access(proc_rank,0,0),x_shmem.extent(1)); +#else x_shmem_local = t_x_shmem_local(x_shmem.data(),x_shmem.extent(1)); +#endif f = system->f; f_a = system->f; type = system->type; diff --git a/src/force_types/force_snap_neigh_impl.h b/src/force_types/force_snap_neigh_impl.h index e3fdc50..f7468e6 100644 --- a/src/force_types/force_snap_neigh_impl.h +++ b/src/force_types/force_snap_neigh_impl.h @@ -165,7 +165,11 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo x = system->x; f = system->f; x_shmem = system->x_shmem; +#ifdef KOKKOS_ENABLE_QUOSPACE + x_shmem_local = t_x_shmem_local(&x_shmem.access(proc_rank,0,0),x_shmem.extent(1)); +#else x_shmem_local = t_x_shmem_local(x_shmem.data(),x_shmem.extent(1)); +#endif domain_x = system->domain_x; domain_y = system->domain_y; domain_z = system->domain_z; From e78797383ca4cd435f2d0e3e785edb62594ef7d7 Mon Sep 17 00:00:00 2001 From: Jan Ciesko Date: Wed, 12 Jun 2024 15:48:25 -0600 Subject: [PATCH 33/36] - Add support for Kokkos Remote Spaces 1.2 - Removes Makefile - Adds CMake options --- CMakeLists.txt | 36 +++++----- src/CMakeLists.txt | 55 ++++----------- src/Makefile | 90 ------------------------- src/binning_types/CMakeLists.txt | 9 +-- src/comm_types/CMakeLists.txt | 11 +-- src/comm_types/comm_mpi.cpp | 2 +- src/comm_types/comm_mpi.h | 2 +- src/examinimd.h | 6 +- src/force_types/CMakeLists.txt | 18 +---- src/force_types/force_lj_idial_neigh.h | 4 +- src/force_types/force_lj_neigh_impl.h | 19 ++++-- src/force_types/force_snap_neigh.h | 3 + src/force_types/force_snap_neigh_impl.h | 43 ++++++++---- src/input.cpp | 2 +- src/main.cpp | 28 +++----- src/modules_comm.h | 3 +- src/neighbor_types/CMakeLists.txt | 12 +--- src/neighbor_types/neighbor_2d.h | 6 +- src/neighbor_types/neighbor_csr.h | 2 - src/system.cpp | 25 +++---- src/types.h | 8 ++- 21 files changed, 115 insertions(+), 269 deletions(-) delete mode 100644 src/Makefile diff --git a/CMakeLists.txt b/CMakeLists.txt index 31e2cc4..9029b24 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,28 +1,32 @@ -cmake_minimum_required(VERSION 3.10) +cmake_minimum_required(VERSION 3.16) project(ExaMiniMD LANGUAGES CXX) include(GNUInstallDirs) -find_package(Kokkos 3.0 REQUIRED) -option(USE_MPI "Build with MPI" ON) +find_package(Kokkos REQUIRED) -if (USE_MPI) +option(ENABLE_MPI "Whether to build with MPI" OFF) +option(ENABLE_KOKKOS_REMOTE_SPACES "Whether to build with Kokkos Remote Spaces" ON) + +if (ENABLE_MPI) + find_package(MPI REQUIRED CXX) + message(STATUS "Building with MPI support") + set(BACKEND_NAME MPI) + list(APPEND BACKENDS ${BACKEND_NAME}) +endif() + +if (ENABLE_KOKKOS_REMOTE_SPACES) find_package(MPI REQUIRED CXX) -else() - kokkos_check(DEVICES Cuda RETURN_VALUE USE_CUDA) - if (USING_CUDA) - if (CMAKE_CXX_COMPILER_ID MATCHES "nvcc") - enable_language(CUDA) - else() - message(FATAL_ERROR "Kokkos uses CUDA but compiler is not nvcc") - endif() - endif() + find_package(KokkosRemoteSpaces REQUIRED) + message(STATUS "Building with Kokkos Remote Spaces support") + set(BACKEND_NAME KokkosRemoteSpaces) + list(APPEND BACKENDS ${BACKEND_NAME}) endif() -# force target_source to resolve relative paths immediately -if(POLICY CMP0076) - cmake_policy(SET CMP0076 NEW) +list(LENGTH BACKENDS N_BACKENDS) +if (${N_BACKENDS} GREATER_EQUAL "2") + message(FATAL_ERROR "Must specify a single valid communiction implementation. ${N_BACKENDS} given.") endif() add_subdirectory(src) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5a880f7..c72806c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,40 +1,5 @@ -set(HEADERS - binning.h - comm.h - examinimd.h - force.h - input.h - integrator.h - integrator_nve.h - math_extra.h - modules_binning.h - modules_comm.h - modules_force.h - modules_integrator.h - modules_neighbor.h - modules_property.h - neighbor.h - property_kine.h - property_pote.h - property_temperature.h - system.h - types.h) - -set(SOURCES - binning.cpp - comm.cpp - examinimd.cpp - force.cpp - input.cpp - integrator.cpp - integrator_nve.cpp - neighbor.cpp - property_kine.cpp - property_pote.cpp - property_temperature.cpp - system.cpp) - -add_executable(ExaMiniMD main.cpp ${SOURCES} ${HEADERS}) +FILE(GLOB SRCS *.cpp) +add_executable(ExaMiniMD ${SRCS}) set(SUBDIRECTORIES binning_types @@ -46,15 +11,19 @@ foreach(SUBDIR ${SUBDIRECTORIES}) add_subdirectory(${SUBDIR}) endforeach() -if (USE_CUDA) - set_target_properties(ExaMiniMD PROPERTIES LANGUAGE CUDA) +if(ENABLE_MPI) + target_compile_definitions(ExaMiniMD PRIVATE EXAMINIMD_ENABLE_MPI) endif() -if (USE_MPI) - target_compile_definitions(ExaMiniMD PRIVATE EXAMINIMD_ENABLE_MPI) +# Select a default set of options. We can export this as CMake options later +if (ENABLE_KOKKOS_REMOTE_SPACES) + target_compile_definitions(ExaMiniMD PRIVATE EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_SCALAR) + #target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_HALO) + target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_GLOBAL) endif() target_include_directories(ExaMiniMD PRIVATE ${Kokkos_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${SUBDIRECTORIES}) -target_link_libraries(ExaMiniMD PRIVATE $<$:MPI::MPI_CXX> Kokkos::kokkos) +target_link_libraries(ExaMiniMD PRIVATE $<$:MPI::MPI_CXX> Kokkos::kokkos $<$:Kokkos::kokkosremotespaces>) -install(TARGETS ExaMiniMD DESTINATION ${CMAKE_INSTALL_BINDIR}) +install(TARGETS ExaMiniMD DESTINATION ${CMAKE_INSTALL_BINDIR}) \ No newline at end of file diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index 6be8818..0000000 --- a/src/Makefile +++ /dev/null @@ -1,90 +0,0 @@ -#Kokkos root path -KOKKOS_PATH = ${HOME}/Kokkos/kokkos -KOKKOS_REMOTE_SPACES_PATH = ${HOME}/Kokkos/kokkos-remote-spaces - -#Backend and architecture configuration -KOKKOS_DEVICES=OpenMP -KOKKOS_ARCH = "None" - -#MPI On or off (1/0) -MPI = 1 - -MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) -EXAMINIMD_PATH = $(subst Makefile,,$(MAKEFILE_PATH))/.. - -default: build - echo "Start Build" - - -EXE = ExaMiniMD -CXXFLAGS = -O3 -g -LINKFLAGS = -O3 -g - -ifeq ($(MPI), 1) - CXX = mpicxx - CXXFLAGS += -DEXAMINIMD_ENABLE_MPI -else - ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) - CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper - else - CXX = g++ - endif -endif - -LINK = ${CXX} - -CXXFLAGS += -DSHMEMTESTS_USE_SCALAR -#CXXFLAGS += -DSHMEMTESTS_USE_HALO -#CXXFLAGS += -DSHMEMTESTS_USE_HALO_LOCAL -#CXXFLAGS += -DSHMEMTESTS_USE_LOCAL_GLOBAL -CXXFLAGS += -DSHMEMTESTS_USE_GLOBAL - -KOKKOS_OPTIONS=aggressive_vectorization - -DEPFLAGS = -M - -vpath %.cpp $(EXAMINIMD_PATH)/src -SRC = $(wildcard $(EXAMINIMD_PATH)/src/*.cpp) -HEADERS = $(wildcard $(EXAMINIMD_PATH)/src/*.h) -CXXFLAGS += -I$(EXAMINIMD_PATH)/src - -vpath %.cpp $(EXAMINIMD_PATH)/src/force_types -SRC += $(wildcard $(EXAMINIMD_PATH)/src/force_types/*.cpp) -HEADERS += $(wildcard $(EXAMINIMD_PATH)/src/force_types/*.h) -CXXFLAGS += -I$(EXAMINIMD_PATH)/src/force_types - -vpath %.cpp $(EXAMINIMD_PATH)/src/comm_types -SRC += $(wildcard $(EXAMINIMD_PATH)/src/comm_types/*.cpp) -HEADERS += $(wildcard $(EXAMINIMD_PATH)/src/comm_types/*.h) -CXXFLAGS += -I$(EXAMINIMD_PATH)/src/comm_types - -vpath %.cpp $(EXAMINIMD_PATH)/src/neighbor_types -SRC += $(wildcard $(EXAMINIMD_PATH)/src/neighbor_types/*.cpp) -HEADERS += $(wildcard $(EXAMINIMD_PATH)/src/neighbor_types/*.h) -CXXFLAGS += -I$(EXAMINIMD_PATH)/src/neighbor_types - -vpath %.cpp $(EXAMINIMD_PATH)/src/binning_types -SRC += $(wildcard $(EXAMINIMD_PATH)/src/binning_types/*.cpp) -HEADERS += $(wildcard $(EXAMINIMD_PATH)/src/binning_types/*.h) -CXXFLAGS += -I$(EXAMINIMD_PATH)/src/binning_types - -SRC_NOTDIR = $(notdir $(SRC)) -OBJ = $(SRC_NOTDIR:.cpp=.o) -LIB = - -include $(KOKKOS_PATH)/Makefile.kokkos -include $(KOKKOS_REMOTE_SPACES_PATH)/Makefile.kokkos-remote-spaces - -build: $(EXE) - -$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) - -clean: kokkos-clean - rm -f *.o $(EXE) - -# Compilation rules - -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) ${HEADERS} - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< - diff --git a/src/binning_types/CMakeLists.txt b/src/binning_types/CMakeLists.txt index 8bbfdb1..242043b 100644 --- a/src/binning_types/CMakeLists.txt +++ b/src/binning_types/CMakeLists.txt @@ -1,7 +1,2 @@ -SET(HEADERS - binning_kksort.h) - -SET(SOURCES - binning_kksort.cpp) - -target_sources(ExaMiniMD PRIVATE ${SOURCES} ${HEADERS}) +FILE(GLOB SRCS *.cpp) +target_sources(ExaMiniMD PRIVATE ${SRCS}) diff --git a/src/comm_types/CMakeLists.txt b/src/comm_types/CMakeLists.txt index 34cc878..242043b 100644 --- a/src/comm_types/CMakeLists.txt +++ b/src/comm_types/CMakeLists.txt @@ -1,9 +1,2 @@ -SET(HEADERS - comm_mpi.h - comm_serial.h) - -SET(SOURCES - comm_mpi.cpp - comm_serial.cpp) - -target_sources(ExaMiniMD PRIVATE ${SOURCES} ${HEADERS}) +FILE(GLOB SRCS *.cpp) +target_sources(ExaMiniMD PRIVATE ${SRCS}) diff --git a/src/comm_types/comm_mpi.cpp b/src/comm_types/comm_mpi.cpp index 66bd7fe..38703c1 100644 --- a/src/comm_types/comm_mpi.cpp +++ b/src/comm_types/comm_mpi.cpp @@ -36,7 +36,7 @@ // Questions? Contact Christian R. Trott (crtrott@sandia.gov) //************************************************************************ -#ifdef EXAMINIMD_ENABLE_MPI +#if defined(EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) #include CommMPI::CommMPI(System* s, T_X_FLOAT comm_depth):Comm(s,comm_depth) { diff --git a/src/comm_types/comm_mpi.h b/src/comm_types/comm_mpi.h index 754834f..a82eac3 100644 --- a/src/comm_types/comm_mpi.h +++ b/src/comm_types/comm_mpi.h @@ -52,7 +52,7 @@ #define COMM_MPI_H #include -#ifndef EXAMINIMD_ENABLE_MPI +#if !defined(EXAMINIMD_ENABLE_MPI) && !defined(EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) #error "Trying to compile CommMPI without MPI" #endif diff --git a/src/examinimd.h b/src/examinimd.h index 4738703..1fa6821 100644 --- a/src/examinimd.h +++ b/src/examinimd.h @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -57,16 +58,11 @@ class ExaMiniMD { Binning* binning; ExaMiniMD(); - void init(int argc,char* argv[]); - void run(int nsteps); - void dump_binary(int); void check_correctness(int); - void print_performance(); - void shutdown(); }; diff --git a/src/force_types/CMakeLists.txt b/src/force_types/CMakeLists.txt index e1aee41..d7870d5 100644 --- a/src/force_types/CMakeLists.txt +++ b/src/force_types/CMakeLists.txt @@ -1,17 +1,3 @@ -SET(HEADERS - force_lj_cell.h - force_lj_idial_neigh.h - force_lj_idial_neigh_impl.h - force_lj_neigh.h - force_lj_neigh_impl.h - force_snap_neigh.h - force_snap_neigh_impl.h - sna.h) +FILE(GLOB SRCS *.cpp) +target_sources(ExaMiniMD PRIVATE ${SRCS}) -SET(SOURCES - force_lj_cell.cpp - force_lj_idial_neigh.cpp - force_lj_neigh.cpp - force_snap_neigh.cpp) - -target_sources(ExaMiniMD PRIVATE ${SOURCES} ${HEADERS}) diff --git a/src/force_types/force_lj_idial_neigh.h b/src/force_types/force_lj_idial_neigh.h index c0d298a..4164cd8 100644 --- a/src/force_types/force_lj_idial_neigh.h +++ b/src/force_types/force_lj_idial_neigh.h @@ -39,9 +39,8 @@ #ifdef MODULES_OPTION_CHECK if( (strcmp(argv[i+1], "NEIGH_FULL") == 0) ) force_iteration_type = FORCE_ITER_NEIGH_FULL; - if( (strcmp(argv[i+1], "NEIGH_HALF") == 0) ) { + if( (strcmp(argv[i+1], "NEIGH_HALF") == 0) ) force_iteration_type = FORCE_ITER_NEIGH_HALF; - } #endif #ifdef FORCE_MODULES_INSTANTIATION else if (input->force_type == FORCE_LJ_IDIAL) { @@ -54,7 +53,6 @@ } #endif - #if !defined(MODULES_OPTION_CHECK) && \ !defined(FORCE_MODULES_INSTANTIATION) diff --git a/src/force_types/force_lj_neigh_impl.h b/src/force_types/force_lj_neigh_impl.h index 0d9d42d..005f520 100644 --- a/src/force_types/force_lj_neigh_impl.h +++ b/src/force_types/force_lj_neigh_impl.h @@ -123,11 +123,11 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig domain_y = system->domain_y; domain_z = system->domain_z; Kokkos::fence(); - Kokkos::DefaultRemoteMemorySpace().fence();; + Kokkos::Experimental::DefaultRemoteMemorySpace::fence();; Kokkos::parallel_for("ForceLJNeigh::compute_fill_xshmem", Kokkos::RangePolicy(0,system->N_local), *this); //Kokkos::SHMEMSpace::fence(); Kokkos::fence(); - Kokkos::DefaultRemoteMemorySpace().fence();; + Kokkos::Experimental::DefaultRemoteMemorySpace().fence();; if (use_stackparams) { if(half_neigh) Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_half_neigh_stackparams(0, system->N_local), *this); @@ -140,7 +140,7 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_full_neigh(0, system->N_local), *this); } Kokkos::fence(); - Kokkos::DefaultRemoteMemorySpace().fence();; + Kokkos::Experimental::DefaultRemoteMemorySpace::fence();; //Kokkos::SHMEMSpace::fence(); //x_shmem = t_x_shmem(); @@ -200,9 +200,8 @@ void ForceLJNeigh::operator() (TagFullNeigh, const T for(int jj = 0; jj < num_neighs; jj++) { T_INT j = neighs_i(jj); //printf("Neigh: %i %i %li %li %i\n",i,j,global_index(i),global_index(j),j>N_local?1:0); - //const T_F_FLOAT dx = x_i - x(j,0); - //const T_F_FLOAT dy = y_i - x(j,1); - //const T_F_FLOAT dz = z_i - x(j,2); + + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES const T_INDEX jg = global_index(j); #ifdef SHMEMTESTS_USE_SCALAR @@ -234,6 +233,7 @@ void ForceLJNeigh::operator() (TagFullNeigh, const T const T_X_FLOAT zj_shmem = posj_shmem.z; #endif #endif + //printf("DATA: %i %i %i %lf %lf %lf %i %li %i\n",id(i),jj,(int)jg,xj_shmem,yj_shmem,zj_shmem,(int)jg/N_MAX_MASK,N_MAX_MASK,int(jg%N_MAX_MASK)); T_F_FLOAT dx = abs(x_i - xj_shmem)>domain_x/2? (x_i-xj_shmem<0?x_i-xj_shmem+domain_x:x_i-xj_shmem-domain_x) @@ -244,7 +244,12 @@ void ForceLJNeigh::operator() (TagFullNeigh, const T T_F_FLOAT dz = abs(z_i - zj_shmem)>domain_z/2? (z_i-zj_shmem<0?z_i-zj_shmem+domain_z:z_i-zj_shmem-domain_z) :z_i-zj_shmem; - + #else //EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + const T_F_FLOAT dx = x_i - x(j,0); + const T_F_FLOAT dy = y_i - x(j,1); + const T_F_FLOAT dz = z_i - x(j,2); + #endif + // if((abs(dx_shmem-dx)>1e-10) || (abs(dy_shmem-dy)>1e-10) || (abs(dz_shmem-dz)>1e-10)) // printf("Neigh: %i %i %li %li %i : %lf %lf %lf %lf %lf\n",i,j,global_index(i),global_index(j),j>N_local?1:0,x(j,0),xj_shmem,domain_x,dx,dx_shmem); const int type_j = type(j); diff --git a/src/force_types/force_snap_neigh.h b/src/force_types/force_snap_neigh.h index 53b2b16..710f5de 100644 --- a/src/force_types/force_snap_neigh.h +++ b/src/force_types/force_snap_neigh.h @@ -183,6 +183,9 @@ class ForceSNAP : public Force { struct TagForceCompute {}; KOKKOS_INLINE_FUNCTION void operator() (TagForceCompute, const Kokkos::TeamPolicy<>::member_type& team) const; + KOKKOS_INLINE_FUNCTION + void operator() (const Kokkos::TeamPolicy<>::member_type& team) const; + struct TagCopyLocalXShmem {}; KOKKOS_INLINE_FUNCTION diff --git a/src/force_types/force_snap_neigh_impl.h b/src/force_types/force_snap_neigh_impl.h index f7468e6..c1faa3a 100644 --- a/src/force_types/force_snap_neigh_impl.h +++ b/src/force_types/force_snap_neigh_impl.h @@ -189,11 +189,11 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo if(max_neighs(0,system->N_local), *this); - Kokkos::DefaultRemoteMemorySpace().fence();; + Kokkos::Experimental::DefaultRemoteMemorySpace::fence();; - Kokkos::parallel_reduce("ForceSNAP::find_max_neighs",nlocal, FindMaxNumNeighs(neigh_list), Kokkos::Experimental::Max(max_neighs)); + Kokkos::parallel_reduce("ForceSNAP::find_max_neighs",nlocal, FindMaxNumNeighs(neigh_list), Kokkos::Max(max_neighs)); sna.nmax = max_neighs; @@ -201,7 +201,6 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo T_INT thread_scratch_size = sna.size_thread_scratch_arrays(); //printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024); - int team_size_max = Kokkos::TeamPolicy::team_size_max(*this); int vector_length = 8; int team_size_max = Kokkos::TeamPolicy<>(nlocal,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag()); #ifdef EMD_ENABLE_GPU @@ -218,7 +217,7 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) ,*this); Kokkos::fence(); - Kokkos::DefaultRemoteMemorySpace().fence();; + Kokkos::Experimental::DefaultRemoteMemorySpace::fence();; //static int step =0; //step++; //if(step%10==0) @@ -604,6 +603,11 @@ void ForceSNAP::read_files(char *coefffilename, char *paramfilena delete[] found; } +template +KOKKOS_INLINE_FUNCTION +void ForceSNAP::operator() (const Kokkos::TeamPolicy<>::member_type& team) const { +} + template KOKKOS_INLINE_FUNCTION void ForceSNAP::operator() (TagForceCompute, const Kokkos::TeamPolicy<>::member_type& team) const { @@ -631,6 +635,9 @@ void ForceSNAP::operator() (TagForceCompute, const Kokkos::TeamPo Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,num_neighs), [&] (const int jj, int& count) { Kokkos::single(Kokkos::PerThread(team), [&] (){ + + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + T_INT j = neighs_i(jj); const T_INDEX jg = global_index(j); #ifdef SHMEMTESTS_USE_SCALAR @@ -672,9 +679,15 @@ void ForceSNAP::operator() (TagForceCompute, const Kokkos::TeamPo T_F_FLOAT dz = -(abs(z_i - zj_shmem)>domain_z/2? (z_i-zj_shmem<0?z_i-zj_shmem+domain_z:z_i-zj_shmem-domain_z) :z_i-zj_shmem); - //const T_F_FLOAT dx = xj_shmem - x_i; - //const T_F_FLOAT dy = yj_shmem - y_i; - //const T_F_FLOAT dz = zj_shmem - z_i; + + #else //EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + + const T_F_FLOAT dx = xj_shmem - x_i; + const T_F_FLOAT dy = yj_shmem - y_i; + const T_F_FLOAT dz = zj_shmem - z_i; + + #endif + const int type_j = type(j); const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; @@ -690,9 +703,12 @@ void ForceSNAP::operator() (TagForceCompute, const Kokkos::TeamPo if(team.team_rank() == 0) Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs), [&] (const int jj, int& offset, bool final){ - //for (int jj = 0; jj < num_neighs; jj++) { + //for (int jj = 0; jj < num_neighs; jj++) { T_INT j = neighs_i(jj); const T_INDEX jg = global_index(j); + + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + #ifdef SHMEMTESTS_USE_SCALAR #ifdef SHMEMTESTS_USE_HALO const T_X_FLOAT xj_shmem = x(j,0);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); @@ -731,9 +747,12 @@ void ForceSNAP::operator() (TagForceCompute, const Kokkos::TeamPo T_F_FLOAT dz = -(abs(z_i - zj_shmem)>domain_z/2? (z_i-zj_shmem<0?z_i-zj_shmem+domain_z:z_i-zj_shmem-domain_z) :z_i-zj_shmem); - //const T_F_FLOAT dx = xj_shmem - x_i; - //const T_F_FLOAT dy = yj_shmem - y_i; - //const T_F_FLOAT dz = zj_shmem - z_i; + + #else //EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + const T_F_FLOAT dx = xj_shmem - x_i; + const T_F_FLOAT dy = yj_shmem - y_i; + const T_F_FLOAT dz = zj_shmem - z_i; + #endif const int type_j = type(j); const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; diff --git a/src/input.cpp b/src/input.cpp index b38459b..1de761e 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -124,7 +124,7 @@ Input::Input(System* p):system(p),input_data(ItemizedFile()),integrator_type(INT input_file_type = -1; -#ifdef EXAMINIMD_ENABLE_MPI +#if defined(EXAMINIMD_ENABLE_MPI) && defined(EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) comm_type = COMM_MPI; #else comm_type = COMM_SERIAL; diff --git a/src/main.cpp b/src/main.cpp index ce19b22..9efcf4c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -44,40 +44,28 @@ #ifdef EXAMINIMD_ENABLE_MPI #include "mpi.h" #endif + +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES #include +#endif int main(int argc, char* argv[]) { - - #ifdef EXAMINIMD_ENABLE_MPI - MPI_Init(&argc,&argv); - #ifdef KOKKOS_ENABLE_NVSHMEMSPACE - shmemx_init_attr_t attr; - auto mpi_comm = MPI_COMM_WORLD; - attr.mpi_comm = &mpi_comm; - shmemx_init_attr (SHMEMX_INIT_WITH_MPI_COMM, &attr); - #endif - #ifdef KOKKOS_ENABLE_SHMEMSPACE - shmem_init(); - #endif + #if defined (EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + comm_lib_init(argc,argv); #endif Kokkos::initialize(argc,argv); ExaMiniMD examinimd; examinimd.init(argc,argv); - examinimd.run(examinimd.input->nsteps); - - // examinimd.check_correctness(); - + // examinimd.check_correctness(); examinimd.print_performance(); - examinimd.shutdown(); Kokkos::finalize(); - #ifdef EXAMINIMD_ENABLE_MPI - //MPI_Finalize(); + #if defined (EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + comm_lib_finalize(); #endif } - diff --git a/src/modules_comm.h b/src/modules_comm.h index a111d15..4ee8a2d 100644 --- a/src/modules_comm.h +++ b/src/modules_comm.h @@ -37,7 +37,6 @@ //************************************************************************ // Include Module header files for comm -#ifdef EXAMINIMD_ENABLE_MPI + #include -#endif #include diff --git a/src/neighbor_types/CMakeLists.txt b/src/neighbor_types/CMakeLists.txt index 13b155a..d7870d5 100644 --- a/src/neighbor_types/CMakeLists.txt +++ b/src/neighbor_types/CMakeLists.txt @@ -1,11 +1,3 @@ -SET(HEADERS - neighbor_2d.h - neighbor_csr.h - neighbor_csr_map_constr.h) +FILE(GLOB SRCS *.cpp) +target_sources(ExaMiniMD PRIVATE ${SRCS}) -SET(SOURCES - neighbor_2d.cpp - neighbor_csr.cpp - neighbor_csr_map_constr.cpp) - -target_sources(ExaMiniMD PRIVATE ${SOURCES} ${HEADERS}) diff --git a/src/neighbor_types/neighbor_2d.h b/src/neighbor_types/neighbor_2d.h index bca409b..89ba193 100644 --- a/src/neighbor_types/neighbor_2d.h +++ b/src/neighbor_types/neighbor_2d.h @@ -152,7 +152,6 @@ class Neighbor2D: public Neighbor { t_neigh_list neigh_list; - Neighbor2D():neigh_cut(0.0) { neigh_type = NEIGH_2D; @@ -244,7 +243,7 @@ class Neighbor2D: public Neighbor { (by_j>=nhalo) && (by_j=nhalo) && (bz_jpermute_vector; do { - // Resize NeighborList if( neigh_list.neighs.extent(0) < N_local + 1 || neigh_list.neighs.extent(1) < neigh_list.maxneighs ) neigh_list.neighs = Kokkos::View ("Neighbor2D::neighs", N_local + 1, neigh_list.maxneighs); - // Fill the NeighborList Kokkos::deep_copy(neigh_list.num_neighs,0); Kokkos::deep_copy(resize,0); @@ -317,7 +314,6 @@ class Neighbor2D: public Neighbor { else Kokkos::parallel_for("Neighbor2D::fill_neigh_list_full",t_policy_fnlf(nbins,Kokkos::AUTO,8),*this); - Kokkos::fence(); Kokkos::deep_copy(h_resize,resize); diff --git a/src/neighbor_types/neighbor_csr.h b/src/neighbor_types/neighbor_csr.h index 0344321..df27ac0 100644 --- a/src/neighbor_types/neighbor_csr.h +++ b/src/neighbor_types/neighbor_csr.h @@ -145,8 +145,6 @@ class NeighborCSR: public Neighbor { typename Binning::t_bincount bin_count; typename Binning::t_permute_vector permute_vector; - - public: struct TagCreateOffsets {}; struct TagCountNeighsFull {}; diff --git a/src/system.cpp b/src/system.cpp index 7a57389..744c53c 100644 --- a/src/system.cpp +++ b/src/system.cpp @@ -37,9 +37,11 @@ //************************************************************************ #include + #ifdef EXAMINIMD_ENABLE_MPI #include #endif + System::System() { N = 0; N_max = 0; @@ -59,7 +61,7 @@ System::System() { sub_domain_hi_x = sub_domain_hi_y = sub_domain_hi_z = 0.0; sub_domain_lo_x = sub_domain_lo_y = sub_domain_lo_z = 0.0; mvv2e = boltz = dt = 0.0; -#ifdef EXAMINIMD_ENABLE_MPI +#if defined(EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) int proc_rank; MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); do_print = proc_rank == 0; @@ -102,27 +104,16 @@ void System::grow(T_INT N_new) { Kokkos::resize(x,N_max); // Positions Kokkos::resize(v,N_max); // Velocities Kokkos::resize(f,N_max); // Forces - Kokkos::resize(id,N_max); // Id Kokkos::resize(global_index,N_max); // Id - Kokkos::resize(type,N_max); // Particle Type - Kokkos::resize(q,N_max); // Charge -{ -#ifdef EXAMINIMD_ENABLE_MPI - int num_ranks; - MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); -#else - int num_ranks = 1; -#endif - int* rank_list = new int[num_ranks]; - for(int i=0; i("X_shmem",num_ranks,rank_list,N_max); -} + #ifdef EXAMINIMD_ENABLE_USE_KOKKOS_REMOTE_SPACES + int num_ranks; + MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); + x_shmem = t_x_shmem("X_shmem", num_ranks, N_max); + #endif } } diff --git a/src/types.h b/src/types.h index ccbb8a3..92abd3e 100644 --- a/src/types.h +++ b/src/types.h @@ -38,8 +38,12 @@ #ifndef TYPES_H #define TYPES_H + #include + +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES #include +#endif // Module Types etc // Units to be used @@ -97,10 +101,10 @@ typedef Kokkos::View t_x_const; // P typedef Kokkos::View> t_x_const_rnd; // Positions #ifdef SHMEMTESTS_USE_SCALAR -typedef Kokkos::View t_x_shmem; // PGAS Positions +typedef Kokkos::View t_x_shmem; // PGAS Positions typedef Kokkos::View t_x_shmem_local; // Local PGAS Positions #else -typedef Kokkos::View t_x_shmem; // PGAS Positions +typedef Kokkos::View t_x_shmem; // PGAS Positions typedef Kokkos::View t_x_shmem_local; // Local PGAS Positions #endif typedef Kokkos::View t_v; // Velocities From a7eaaaf567a2616c89d4e5244aa88624bbfae0bf Mon Sep 17 00:00:00 2001 From: Jan Ciesko Date: Thu, 13 Jun 2024 16:24:05 -0600 Subject: [PATCH 34/36] Temporarily disable non-relevant module backends Use same li file as CabanaMD Fix ifdefs when using USING_MPI or USING_KOKKOS_REMOTE_SPACES --- CMakeLists.txt | 9 +++++++-- input/in.lj | 12 +++++------ src/comm_types/CMakeLists.txt | 7 +++++++ src/comm_types/comm_mpi.h | 2 ++ src/force_types/CMakeLists.txt | 8 ++++++++ src/force_types/force_lj_neigh.cpp | 1 + src/force_types/force_lj_neigh.h | 3 +++ src/force_types/force_lj_neigh_impl.h | 29 ++++++++++++++++++--------- src/force_types/force_snap_neigh.h | 2 ++ src/modules_comm.h | 5 ++++- src/modules_force.h | 11 +++++----- src/system.h | 2 ++ src/types.h | 2 ++ 13 files changed, 70 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9029b24..613bd7e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ include(GNUInstallDirs) find_package(Kokkos REQUIRED) option(ENABLE_MPI "Whether to build with MPI" OFF) -option(ENABLE_KOKKOS_REMOTE_SPACES "Whether to build with Kokkos Remote Spaces" ON) +option(ENABLE_KOKKOS_REMOTE_SPACES "Whether to build with Kokkos Remote Spaces" OFF) if (ENABLE_MPI) find_package(MPI REQUIRED CXX) @@ -26,8 +26,13 @@ endif() list(LENGTH BACKENDS N_BACKENDS) if (${N_BACKENDS} GREATER_EQUAL "2") - message(FATAL_ERROR "Must specify a single valid communiction implementation. ${N_BACKENDS} given.") + message(FATAL_ERROR "Must specify a single valid communiction implementation. ${N_BACKENDS} given") endif() +if (${N_BACKENDS} EQUAL "0") + message(STATUS "Building with serial comm type") +endif() + + add_subdirectory(src) add_subdirectory(input) diff --git a/input/in.lj b/input/in.lj index 1c64c0a..774c548 100644 --- a/input/in.lj +++ b/input/in.lj @@ -1,4 +1,4 @@ -# 3d Lennard-Jones melt +# Example using Lennard-Jones potential units lj atom_style atomic @@ -12,12 +12,12 @@ mass 1 2.0 velocity all create 1.4 87287 loop geom -pair_style lj/cut 1.5 -pair_coeff 1 1 1.0 1.0 1.5 +pair_style lj/cut 2.5 +pair_coeff 1 1 1.0 1.0 2.5 neighbor 0.3 bin -neigh_modify delay 0 every 20 check no +neigh_modify every 20 one 50 fix 1 all nve -thermo 100 +thermo 10 -run 1000 +run 100 diff --git a/src/comm_types/CMakeLists.txt b/src/comm_types/CMakeLists.txt index 242043b..cd13f8b 100644 --- a/src/comm_types/CMakeLists.txt +++ b/src/comm_types/CMakeLists.txt @@ -1,2 +1,9 @@ FILE(GLOB SRCS *.cpp) target_sources(ExaMiniMD PRIVATE ${SRCS}) + +if (!ENABLE_MPI AND !ENABLE_KOKKOS_REMOTE_SPACES) + # Skip MPI module + list(FILTER SRCS EXCLUDE REGEX ".*comm_mpi\\.cpp$") +endif() + +target_sources(ExaMiniMD PRIVATE ${SRCS}) diff --git a/src/comm_types/comm_mpi.h b/src/comm_types/comm_mpi.h index a82eac3..f1ba4dd 100644 --- a/src/comm_types/comm_mpi.h +++ b/src/comm_types/comm_mpi.h @@ -56,7 +56,9 @@ #error "Trying to compile CommMPI without MPI" #endif +#ifdef EXAMINIMD_ENABLE_MPI #include "mpi.h" +#endif class CommMPI: public Comm { diff --git a/src/force_types/CMakeLists.txt b/src/force_types/CMakeLists.txt index d7870d5..9f43539 100644 --- a/src/force_types/CMakeLists.txt +++ b/src/force_types/CMakeLists.txt @@ -1,3 +1,11 @@ FILE(GLOB SRCS *.cpp) + + +#Skip lj_ideal, snap and cell +#TODO: SNAP is outdates and should likely be removed all together as it is +list(FILTER SRCS EXCLUDE REGEX ".*lj_cell\\.cpp$") +list(FILTER SRCS EXCLUDE REGEX ".*lj_idial_neigh\\.cpp$") +list(FILTER SRCS EXCLUDE REGEX ".*snap_neigh\\.cpp$") + target_sources(ExaMiniMD PRIVATE ${SRCS}) diff --git a/src/force_types/force_lj_neigh.cpp b/src/force_types/force_lj_neigh.cpp index 9f297ff..565964f 100644 --- a/src/force_types/force_lj_neigh.cpp +++ b/src/force_types/force_lj_neigh.cpp @@ -37,6 +37,7 @@ //************************************************************************ #include + #define FORCETYPE_DECLARE_TEMPLATE_MACRO(NeighType) ForceLJNeigh #define FORCE_MODULES_TEMPLATE #include diff --git a/src/force_types/force_lj_neigh.h b/src/force_types/force_lj_neigh.h index 0927d90..785a377 100644 --- a/src/force_types/force_lj_neigh.h +++ b/src/force_types/force_lj_neigh.h @@ -67,8 +67,11 @@ class ForceLJNeigh: public Force { private: int N_local,ntypes; t_x_const_rnd x; + + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES t_x_shmem x_shmem; t_x_shmem_local x_shmem_local; + #endif t_f f; t_f_atomic f_a; t_id id; diff --git a/src/force_types/force_lj_neigh_impl.h b/src/force_types/force_lj_neigh_impl.h index 005f520..4740dc1 100644 --- a/src/force_types/force_lj_neigh_impl.h +++ b/src/force_types/force_lj_neigh_impl.h @@ -38,6 +38,10 @@ #include +#ifdef EXAMINIMD_ENABLE_MPI +#include +#endif + template ForceLJNeigh::ForceLJNeigh(char** args, System* system, bool half_neigh_):Force(args,system,half_neigh_) { ntypes = system->ntypes; @@ -51,7 +55,11 @@ ForceLJNeigh::ForceLJNeigh(char** args, System* system, bool half N_local = 0; nhalo = 0; step = 0; + #if defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) || defined (EXAMINIMD_ENABLE_MPI) MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); + #else + proc_rank=0; + #endif } template @@ -106,13 +114,11 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig N_local = system->N_local; x = system->x; + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES x_shmem = system->x_shmem; - -#ifdef KOKKOS_ENABLE_QUOSPACE - x_shmem_local = t_x_shmem_local(&x_shmem.access(proc_rank,0,0),x_shmem.extent(1)); -#else x_shmem_local = t_x_shmem_local(x_shmem.data(),x_shmem.extent(1)); -#endif + #endif + f = system->f; f_a = system->f; type = system->type; @@ -122,12 +128,13 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig domain_x = system->domain_x; domain_y = system->domain_y; domain_z = system->domain_z; - Kokkos::fence(); + + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES Kokkos::Experimental::DefaultRemoteMemorySpace::fence();; Kokkos::parallel_for("ForceLJNeigh::compute_fill_xshmem", Kokkos::RangePolicy(0,system->N_local), *this); - //Kokkos::SHMEMSpace::fence(); Kokkos::fence(); Kokkos::Experimental::DefaultRemoteMemorySpace().fence();; + #endif if (use_stackparams) { if(half_neigh) Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_half_neigh_stackparams(0, system->N_local), *this); @@ -140,9 +147,9 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_full_neigh(0, system->N_local), *this); } Kokkos::fence(); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES Kokkos::Experimental::DefaultRemoteMemorySpace::fence();; - //Kokkos::SHMEMSpace::fence(); - + #endif //x_shmem = t_x_shmem(); step++; } @@ -152,7 +159,9 @@ T_V_FLOAT ForceLJNeigh::compute_energy(System* system, Binning* b // Set internal data handles NeighborClass* neighbor = (NeighborClass*) neighbor_; neigh_list = neighbor->get_neigh_list(); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); + #endif N_local = system->N_local; x = system->x; f = system->f; @@ -417,6 +426,7 @@ template KOKKOS_INLINE_FUNCTION void ForceLJNeigh::operator() (TagCopyLocalXShmem, const T_INT& i) const { //printf("CopyLocal: %i %lf %lf %lf\n",i,x(i,0),x(i,1),x(i,2)); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES #ifdef SHMEMTESTS_USE_SCALAR x_shmem_local(i,0) = x(i,0); x_shmem_local(i,1) = x(i,1); @@ -425,5 +435,6 @@ void ForceLJNeigh::operator() (TagCopyLocalXShmem, const T_INT& i double3 pos = {x(i,0),x(i,1),x(i,2)}; x_shmem_local(i) = pos; #endif + #endif //EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES } diff --git a/src/force_types/force_snap_neigh.h b/src/force_types/force_snap_neigh.h index 710f5de..d1edb4f 100644 --- a/src/force_types/force_snap_neigh.h +++ b/src/force_types/force_snap_neigh.h @@ -170,8 +170,10 @@ class ForceSNAP : public Force { t_x x; + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES t_x_shmem x_shmem; t_x_shmem_local x_shmem_local; + #endif t_f_atomic f; t_type type; t_index global_index; diff --git a/src/modules_comm.h b/src/modules_comm.h index 4ee8a2d..32c8cd7 100644 --- a/src/modules_comm.h +++ b/src/modules_comm.h @@ -38,5 +38,8 @@ // Include Module header files for comm -#include +#if !defined(EXAMINIMD_ENABLE_MPI) && !defined(EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) #include +#else +#include +#endif diff --git a/src/modules_force.h b/src/modules_force.h index 7d01eb7..526d89e 100644 --- a/src/modules_force.h +++ b/src/modules_force.h @@ -38,9 +38,10 @@ // Include Module header files for force #include -#include #include -#include -#ifndef KOKKOS_ENABLE_OPENMPTARGET -#include -#endif + +//#include +//#include +//#ifndef KOKKOS_ENABLE_OPENMPTARGET +//#include +//#endif diff --git a/src/system.h b/src/system.h index 5c47b22..1be1da3 100644 --- a/src/system.h +++ b/src/system.h @@ -76,7 +76,9 @@ class System { t_q q; // Charge + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES t_x_shmem x_shmem; + #endif // Per Type Property t_mass mass; diff --git a/src/types.h b/src/types.h index 92abd3e..b0b17a4 100644 --- a/src/types.h +++ b/src/types.h @@ -100,6 +100,7 @@ typedef Kokkos::View t_x; // P typedef Kokkos::View t_x_const; // Positions typedef Kokkos::View> t_x_const_rnd; // Positions +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES #ifdef SHMEMTESTS_USE_SCALAR typedef Kokkos::View t_x_shmem; // PGAS Positions typedef Kokkos::View t_x_shmem_local; // Local PGAS Positions @@ -107,6 +108,7 @@ typedef Kokkos::View t_x_shmem_local; // Loca typedef Kokkos::View t_x_shmem; // PGAS Positions typedef Kokkos::View t_x_shmem_local; // Local PGAS Positions #endif +#endif //EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES typedef Kokkos::View t_v; // Velocities typedef Kokkos::View t_f; // Force typedef Kokkos::View Date: Mon, 17 Jun 2024 13:04:16 -0600 Subject: [PATCH 35/36] Do not silenly drop unsupported comm-type --- src/comm_types/comm_serial.h | 6 ++++++ src/input.cpp | 1 - src/modules_comm.h | 5 +++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/comm_types/comm_serial.h b/src/comm_types/comm_serial.h index 067a91a..386b3cd 100644 --- a/src/comm_types/comm_serial.h +++ b/src/comm_types/comm_serial.h @@ -39,6 +39,12 @@ #ifdef MODULES_OPTION_CHECK if( (strcmp(argv[i+1], "SERIAL") == 0) ) comm_type = COMM_SERIAL; + #if !defined(EXAMINIMD_ENABLE_MPI) && !defined(EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + if( (strcmp(argv[i+1], "SERIAL") != 0) ){ + printf("ERROR: Unsupported Comm-type selected\n"); + exit(1); + } + #endif #endif #ifdef COMM_MODULES_INSTANTIATION else if(input->comm_type == COMM_SERIAL) { diff --git a/src/input.cpp b/src/input.cpp index 1de761e..336cad7 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -123,7 +123,6 @@ Input::Input(System* p):system(p),input_data(ItemizedFile()),integrator_type(INT force_coeff_lines = Kokkos::View("Input::force_coeff_lines",0); input_file_type = -1; - #if defined(EXAMINIMD_ENABLE_MPI) && defined(EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) comm_type = COMM_MPI; #else diff --git a/src/modules_comm.h b/src/modules_comm.h index 32c8cd7..a62de7a 100644 --- a/src/modules_comm.h +++ b/src/modules_comm.h @@ -38,8 +38,9 @@ // Include Module header files for comm -#if !defined(EXAMINIMD_ENABLE_MPI) && !defined(EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) +#if defined(EXAMINIMD_ENABLE_MPI) || defined(EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) #include -#else #include +#else +#include #endif From 68f4353e68ecca7bd0cee9aebd56cc43db6f3c11 Mon Sep 17 00:00:00 2001 From: Jan Ciesko Date: Mon, 17 Jun 2024 16:06:36 -0600 Subject: [PATCH 36/36] Move Comm into a file Add force module that uses KRS --- input/in.lj | 4 +- src/CMakeLists.txt | 8 - src/binning_types/binning_kksort.cpp | 2 +- src/comm_lib.cpp | 88 ++++ src/comm_lib.h | 47 ++ src/comm_types/CMakeLists.txt | 6 - src/comm_types/comm_mpi.cpp | 15 +- src/comm_types/comm_mpi.h | 5 +- src/comm_types/comm_serial.h | 1 - src/examinimd.cpp | 12 +- src/force_types/CMakeLists.txt | 22 +- src/force_types/force_lj_neigh.cpp | 1 - src/force_types/force_lj_neigh.h | 16 - src/force_types/force_lj_neigh_distrib.cpp | 43 ++ src/force_types/force_lj_neigh_distrib.h | 164 +++++++ src/force_types/force_lj_neigh_distrib_impl.h | 420 ++++++++++++++++++ src/force_types/force_lj_neigh_impl.h | 104 +---- src/force_types/force_snap_neigh_impl.h | 2 +- src/input.cpp | 76 +--- src/main.cpp | 1 - src/modules_force.h | 9 +- src/neighbor_types/neighbor_2d.cpp | 2 +- src/neighbor_types/neighbor_csr.cpp | 2 +- src/neighbor_types/neighbor_csr.h | 2 +- .../neighbor_csr_map_constr.cpp | 2 +- src/system.cpp | 28 +- src/system.h | 18 +- src/types.h | 6 +- 28 files changed, 863 insertions(+), 243 deletions(-) create mode 100644 src/comm_lib.cpp create mode 100644 src/comm_lib.h create mode 100644 src/force_types/force_lj_neigh_distrib.cpp create mode 100644 src/force_types/force_lj_neigh_distrib.h create mode 100644 src/force_types/force_lj_neigh_distrib_impl.h diff --git a/input/in.lj b/input/in.lj index 774c548..c951cf0 100644 --- a/input/in.lj +++ b/input/in.lj @@ -5,7 +5,7 @@ atom_style atomic newton off lattice fcc 0.8442 -region box block 0 80 0 80 0 80 +region box block 0 100 0 100 0 100 create_box 1 box create_atoms 1 box mass 1 2.0 @@ -20,4 +20,4 @@ neigh_modify every 20 one 50 fix 1 all nve thermo 10 -run 100 +run 100 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c72806c..8c81768 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,14 +15,6 @@ if(ENABLE_MPI) target_compile_definitions(ExaMiniMD PRIVATE EXAMINIMD_ENABLE_MPI) endif() -# Select a default set of options. We can export this as CMake options later -if (ENABLE_KOKKOS_REMOTE_SPACES) - target_compile_definitions(ExaMiniMD PRIVATE EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) - target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_SCALAR) - #target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_HALO) - target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_GLOBAL) -endif() - target_include_directories(ExaMiniMD PRIVATE ${Kokkos_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${SUBDIRECTORIES}) target_link_libraries(ExaMiniMD PRIVATE $<$:MPI::MPI_CXX> Kokkos::kokkos $<$:Kokkos::kokkosremotespaces>) diff --git a/src/binning_types/binning_kksort.cpp b/src/binning_types/binning_kksort.cpp index a7fdefc..535ab85 100644 --- a/src/binning_types/binning_kksort.cpp +++ b/src/binning_types/binning_kksort.cpp @@ -71,7 +71,7 @@ namespace { void BinningKKSort::create_binning(T_X_FLOAT dx_in, T_X_FLOAT dy_in, T_X_FLOAT dz_in, int halo_depth, bool do_local, bool do_ghost, bool sort) { if(do_local||do_ghost) { nhalo = halo_depth; - std::pair range(do_local?0:system->N_local, + Kokkos::pair range(do_local?0:system->N_local, do_ghost?system->N_local+system->N_ghost:system->N_local); nbinx = T_INT(system->sub_domain_x/dx_in); diff --git a/src/comm_lib.cpp b/src/comm_lib.cpp new file mode 100644 index 0000000..359de92 --- /dev/null +++ b/src/comm_lib.cpp @@ -0,0 +1,88 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#include +#include + +#if EXAMINIMD_ENABLE_MPI +#include +#endif + +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES +#include +#endif + +void comm_lib_init(int argc, char* argv[]) { +#if defined (EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + int mpi_thread_level_available; + int mpi_thread_level_required = MPI_THREAD_MULTIPLE; + +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL + mpi_thread_level_required = MPI_THREAD_SINGLE; +#endif + + MPI_Init_thread(&argc, &argv, mpi_thread_level_required, + &mpi_thread_level_available); + assert(mpi_thread_level_available >= mpi_thread_level_required); + +#ifdef KRS_ENABLE_SHMEMSPACE + shmem_init_thread(mpi_thread_level_required, &mpi_thread_level_available); + assert(mpi_thread_level_available >= mpi_thread_level_required); +#endif + +#ifdef KRS_ENABLE_NVSHMEMSPACE + MPI_Comm mpi_comm; + nvshmemx_init_attr_t attr; + mpi_comm = MPI_COMM_WORLD; + attr.mpi_comm = &mpi_comm; + nvshmemx_init_attr(NVSHMEMX_INIT_WITH_MPI_COMM, &attr); +#endif +} + +void comm_lib_finalize() { +#if defined (EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) +#ifdef KRS_ENABLE_SHMEMSPACE + shmem_finalize(); +#endif +#ifdef KRS_ENABLE_NVSHMEMSPACE + nvshmem_finalize(); +#endif + MPI_Finalize(); +#endif +#endif +} diff --git a/src/comm_lib.h b/src/comm_lib.h new file mode 100644 index 0000000..8985856 --- /dev/null +++ b/src/comm_lib.h @@ -0,0 +1,47 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#pragma once + +#ifndef COMM_INIT_H +#define COMM_INIT_H + +void comm_lib_init(int argc, char* argv[]); +void comm_lib_finalize(); + +#endif diff --git a/src/comm_types/CMakeLists.txt b/src/comm_types/CMakeLists.txt index cd13f8b..a4b281b 100644 --- a/src/comm_types/CMakeLists.txt +++ b/src/comm_types/CMakeLists.txt @@ -1,9 +1,3 @@ FILE(GLOB SRCS *.cpp) target_sources(ExaMiniMD PRIVATE ${SRCS}) - -if (!ENABLE_MPI AND !ENABLE_KOKKOS_REMOTE_SPACES) - # Skip MPI module - list(FILTER SRCS EXCLUDE REGEX ".*comm_mpi\\.cpp$") -endif() - target_sources(ExaMiniMD PRIVATE ${SRCS}) diff --git a/src/comm_types/comm_mpi.cpp b/src/comm_types/comm_mpi.cpp index 38703c1..4fed510 100644 --- a/src/comm_types/comm_mpi.cpp +++ b/src/comm_types/comm_mpi.cpp @@ -36,6 +36,8 @@ // Questions? Contact Christian R. Trott (crtrott@sandia.gov) //************************************************************************ +#include + #if defined(EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) #include @@ -197,14 +199,12 @@ void CommMPI::exchange() { s = *system; N_local = system->N_local; N_ghost = 0; - //printf("System A: %i %lf %lf %lf %i\n",s.N_local,s.x(21,0),s.x(21,1),s.x(21,2),s.type(21)); Kokkos::parallel_for("CommMPI::exchange_self", Kokkos::RangePolicy >(0,N_local), *this); T_INT N_total_recv = 0; T_INT N_total_send = 0; - //printf("System B: %i %lf %lf %lf %i\n",s.N_local,s.x(21,0),s.x(21,1),s.x(21,2),s.type(21)); for(phase = 0; phase < 6; phase ++) { proc_num_send[phase] = 0; proc_num_recv[phase] = 0; @@ -390,7 +390,8 @@ void CommMPI::exchange_halo() { }; void CommMPI::update_halo() { -#ifndef SHMEMTESTS_USE_HALO + +#if !defined(SHMEMTESTS_USE_HALO) && defined(EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) return; #else Kokkos::Profiling::pushRegion("Comm::update_halo"); @@ -478,7 +479,13 @@ void CommMPI::update_force() { Kokkos::Profiling::popRegion(); }; -const char* CommMPI::name() { return "CommMPI"; } +const char* CommMPI::name() { + comm_name = std::string("CommMPI"); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + comm_name += "Distrib"; + #endif + return comm_name.c_str(); +} int CommMPI::process_rank() { return proc_rank; } int CommMPI::num_processes() { return proc_size; } diff --git a/src/comm_types/comm_mpi.h b/src/comm_types/comm_mpi.h index f1ba4dd..3515479 100644 --- a/src/comm_types/comm_mpi.h +++ b/src/comm_types/comm_mpi.h @@ -70,7 +70,6 @@ class CommMPI: public Comm { System s; // Owned Variables - int phase; // Communication Phase int proc_neighbors_recv[6]; // Neighbor for each phase int proc_neighbors_send[6]; // Neighbor for each phase @@ -81,6 +80,8 @@ class CommMPI: public Comm { int proc_rank; // My Process rank int proc_size; // Number of processes + std::string comm_name; + T_INT num_ghost[6]; T_INT ghost_offsets[6]; @@ -503,7 +504,9 @@ class CommMPI: public Comm { KOKKOS_INLINE_FUNCTION void operator() (const TagCreateGlobalIndecies, const T_INT& i) const { + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES s.global_index(i) = N_MAX_MASK * proc_rank + i; + #endif } const char* name(); diff --git a/src/comm_types/comm_serial.h b/src/comm_types/comm_serial.h index 386b3cd..dd11508 100644 --- a/src/comm_types/comm_serial.h +++ b/src/comm_types/comm_serial.h @@ -52,7 +52,6 @@ } #endif - #if !defined(MODULES_OPTION_CHECK) && !defined(COMM_MODULES_INSTANTIATION) #ifndef COMM_SERIAL_H #define COMM_SERIAL_H diff --git a/src/examinimd.cpp b/src/examinimd.cpp index 2d1b625..7ea9e4b 100644 --- a/src/examinimd.cpp +++ b/src/examinimd.cpp @@ -83,8 +83,6 @@ void ExaMiniMD::init(int argc, char* argv[]) { #undef FORCE_MODULES_INSTANTIATION else comm->error("Invalid ForceType"); for(int line = 0; line < input->force_coeff_lines.extent(0); line++) { - //input->input_data.print_line(input->force_coeff_lines(line)); - //printf("init_coeff: %i %i\n",line,input->input_data.words_in_line(input->force_coeff_lines(line))); force->init_coeff(input->input_data.words_in_line(input->force_coeff_lines(line)), input->input_data.words[input->force_coeff_lines(line)]); } @@ -108,7 +106,6 @@ void ExaMiniMD::init(int argc, char* argv[]) { if(neighbor) neighbor->comm_newton = input->comm_newton; - // system->print_particles(); if(system->do_print) { printf("Using: %s %s %s %s\n",force->name(),neighbor->name(),comm->name(),binning->name()); } @@ -160,7 +157,7 @@ void ExaMiniMD::init(int argc, char* argv[]) { } else { printf("\n"); printf("Step Temp E_pair TotEng CPU\n"); - printf(" %i %lf %lf %lf %lf\n",step,T,PE,PE+KE,0.0); + printf("%i %lf %lf %lf %lf\n",step,T,PE,PE+KE,0.0); } } } @@ -170,7 +167,6 @@ void ExaMiniMD::init(int argc, char* argv[]) { if(input->correctnessflag) check_correctness(step); - } void ExaMiniMD::run(int nsteps) { @@ -190,7 +186,6 @@ void ExaMiniMD::run(int nsteps) { // Timestep Loop for(int step = 1; step <= nsteps; step++ ) { - // Do first part of the verlet time step integration other_timer.reset(); integrator->initial_integrate(); @@ -221,11 +216,12 @@ void ExaMiniMD::run(int nsteps) { neighbor->create_neigh_list(system,binning,force->half_neigh,false); neigh_time += neigh_timer.seconds(); } else { - // Exchange Halo + // Exchange Halo data comm_timer.reset(); comm->update_halo(); comm_time += comm_timer.seconds(); } + Kokkos::Experimental::DefaultRemoteMemorySpace::fence(); // Zero out forces force_timer.reset(); @@ -260,7 +256,7 @@ void ExaMiniMD::run(int nsteps) { last_time = time; } else { double time = timer.seconds(); - printf(" %i %lf %lf %lf %lf\n",step, T, PE, PE+KE, timer.seconds()); + printf("%i %lf %lf %lf %lf\n",step, T, PE, PE+KE, timer.seconds()); last_time = time; } } diff --git a/src/force_types/CMakeLists.txt b/src/force_types/CMakeLists.txt index 9f43539..9c06a2c 100644 --- a/src/force_types/CMakeLists.txt +++ b/src/force_types/CMakeLists.txt @@ -1,11 +1,25 @@ FILE(GLOB SRCS *.cpp) - -#Skip lj_ideal, snap and cell +#Skip snap and cell #TODO: SNAP is outdates and should likely be removed all together as it is list(FILTER SRCS EXCLUDE REGEX ".*lj_cell\\.cpp$") -list(FILTER SRCS EXCLUDE REGEX ".*lj_idial_neigh\\.cpp$") list(FILTER SRCS EXCLUDE REGEX ".*snap_neigh\\.cpp$") -target_sources(ExaMiniMD PRIVATE ${SRCS}) +# Skip force-type module if Kokkos Remote Spaces is not enabled +if (ENABLE_KOKKOS_REMOTE_SPACES) + message(STATUS "Building with support for force_lj_neigh_distrib") + list(FILTER SRCS EXCLUDE REGEX ".*lj_neigh\\.cpp$") + target_compile_definitions(ExaMiniMD PRIVATE EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_SCALAR) + #target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_HALO) + #target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_HALO_LOCAL) + #target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_LOCAL_GLOBAL) + target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_GLOBAL) +else() + #Otherwise exclude + list(FILTER SRCS EXCLUDE REGEX ".*distrib\\.cpp$") +endif() + + +target_sources(ExaMiniMD PRIVATE ${SRCS}) diff --git a/src/force_types/force_lj_neigh.cpp b/src/force_types/force_lj_neigh.cpp index 565964f..9f297ff 100644 --- a/src/force_types/force_lj_neigh.cpp +++ b/src/force_types/force_lj_neigh.cpp @@ -37,7 +37,6 @@ //************************************************************************ #include - #define FORCETYPE_DECLARE_TEMPLATE_MACRO(NeighType) ForceLJNeigh #define FORCE_MODULES_TEMPLATE #include diff --git a/src/force_types/force_lj_neigh.h b/src/force_types/force_lj_neigh.h index 785a377..5737421 100644 --- a/src/force_types/force_lj_neigh.h +++ b/src/force_types/force_lj_neigh.h @@ -67,27 +67,16 @@ class ForceLJNeigh: public Force { private: int N_local,ntypes; t_x_const_rnd x; - - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - t_x_shmem x_shmem; - t_x_shmem_local x_shmem_local; - #endif t_f f; t_f_atomic f_a; t_id id; - t_index global_index; t_type_const_rnd type; - - T_X_FLOAT domain_x, domain_y, domain_z; - int proc_rank; - Binning::t_bincount bin_count; Binning::t_binoffsets bin_offsets; T_INT nbinx,nbiny,nbinz,nhalo; int step; bool use_stackparams; - typedef Kokkos::View t_fparams; typedef Kokkos::View> t_fparams_rnd; @@ -126,8 +115,6 @@ class ForceLJNeigh: public Force { typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh_pe_stackparams; typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh_pe_stackparams; - struct TagCopyLocalXShmem {}; - ForceLJNeigh (char** args, System* system, bool half_neigh_); void init_coeff(int nargs, char** args); @@ -151,9 +138,6 @@ class ForceLJNeigh: public Force { KOKKOS_INLINE_FUNCTION void operator() (TagHalfNeighPE, const T_INT& i, T_V_FLOAT& PE) const; - KOKKOS_INLINE_FUNCTION - void operator() (TagCopyLocalXShmem, const T_INT& i) const; - const char* name(); }; diff --git a/src/force_types/force_lj_neigh_distrib.cpp b/src/force_types/force_lj_neigh_distrib.cpp new file mode 100644 index 0000000..c286e43 --- /dev/null +++ b/src/force_types/force_lj_neigh_distrib.cpp @@ -0,0 +1,43 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#include +#define FORCETYPE_DECLARE_TEMPLATE_MACRO(NeighType) ForceLJNeigh +#define FORCE_MODULES_TEMPLATE +#include +#undef FORCE_MODULES_TEMPLATE diff --git a/src/force_types/force_lj_neigh_distrib.h b/src/force_types/force_lj_neigh_distrib.h new file mode 100644 index 0000000..d2e8a11 --- /dev/null +++ b/src/force_types/force_lj_neigh_distrib.h @@ -0,0 +1,164 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#ifdef MODULES_OPTION_CHECK + if( (strcmp(argv[i+1], "NEIGH_FULL") == 0) ) + force_iteration_type = FORCE_ITER_NEIGH_FULL; + if( (strcmp(argv[i+1], "NEIGH_HALF") == 0) ) { + force_iteration_type = FORCE_ITER_NEIGH_HALF; + } +#endif +#ifdef FORCE_MODULES_INSTANTIATION + else if (input->force_type == FORCE_LJ) { + bool half_neigh = input->force_iteration_type == FORCE_ITER_NEIGH_HALF; + switch ( input->neighbor_type ) { + #define FORCETYPE_ALLOCATION_MACRO(NeighType) ForceLJNeigh(input->input_data.words[input->force_line],system,half_neigh) + #include + #undef FORCETYPE_ALLOCATION_MACRO + } + } +#endif + +#if !defined(MODULES_OPTION_CHECK) && \ + !defined(FORCE_MODULES_INSTANTIATION) + +#ifndef FORCE_LJ_NEIGH_H +#define FORCE_LJ_NEIGH_H +#include + +template +class ForceLJNeigh: public Force { +private: + int N_local,ntypes; + t_x_const_rnd x; + t_x_shmem x_shmem; + t_x_shmem_local x_shmem_local; + t_index global_index; + t_f f; + t_f_atomic f_a; + t_id id; + t_type_const_rnd type; + + T_X_FLOAT domain_x, domain_y, domain_z; + int proc_rank; + + Binning::t_bincount bin_count; + Binning::t_binoffsets bin_offsets; + T_INT nbinx,nbiny,nbinz,nhalo; + int step; + bool use_stackparams; + + + typedef Kokkos::View t_fparams; + typedef Kokkos::View> t_fparams_rnd; + t_fparams lj1,lj2,cutsq; + t_fparams_rnd rnd_lj1,rnd_lj2,rnd_cutsq; + + T_F_FLOAT stack_lj1[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; // hardwired space for 12 atom types + T_F_FLOAT stack_lj2[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + T_F_FLOAT stack_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + typedef typename NeighborClass::t_neigh_list t_neigh_list; + t_neigh_list neigh_list; + +public: + typedef T_V_FLOAT value_type; + + template + struct TagFullNeigh {}; + + template + struct TagHalfNeigh {}; + + template + struct TagFullNeighPE {}; + + template + struct TagHalfNeighPE {}; + + struct TagCopyLocalXShmem {}; + + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh_pe; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh_pe; + + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh_stackparams; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh_stackparams; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh_pe_stackparams; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh_pe_stackparams; + + typedef Kokkos::RangePolicy > t_policy_compute_fill_xshmem; + + ForceLJNeigh (char** args, System* system, bool half_neigh_); + + void init_coeff(int nargs, char** args); + + void compute(System* system, Binning* binning, Neighbor* neighbor ); + T_F_FLOAT compute_energy(System* system, Binning* binning, Neighbor* neighbor); + + template + KOKKOS_INLINE_FUNCTION + void operator() (TagFullNeigh, const T_INT& i) const; + + template + KOKKOS_INLINE_FUNCTION + void operator() (TagHalfNeigh, const T_INT& i) const; + + template + KOKKOS_INLINE_FUNCTION + void operator() (TagFullNeighPE, const T_INT& i, T_V_FLOAT& PE) const; + + template + KOKKOS_INLINE_FUNCTION + void operator() (TagHalfNeighPE, const T_INT& i, T_V_FLOAT& PE) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagCopyLocalXShmem, const T_INT& i) const; + + const char* name(); +}; + +#define FORCE_MODULES_EXTERNAL_TEMPLATE +#define FORCETYPE_DECLARE_TEMPLATE_MACRO(NeighType) ForceLJNeigh +#include +#undef FORCETYPE_DECLARE_TEMPLATE_MACRO +#undef FORCE_MODULES_EXTERNAL_TEMPLATE +#endif +#endif diff --git a/src/force_types/force_lj_neigh_distrib_impl.h b/src/force_types/force_lj_neigh_distrib_impl.h new file mode 100644 index 0000000..bb24ce4 --- /dev/null +++ b/src/force_types/force_lj_neigh_distrib_impl.h @@ -0,0 +1,420 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#include +#include + +template +ForceLJNeigh::ForceLJNeigh(char** args, System* system, bool half_neigh_):Force(args,system,half_neigh_) { + ntypes = system->ntypes; + use_stackparams = (ntypes <= MAX_TYPES_STACKPARAMS); + if (!use_stackparams) { + lj1 = t_fparams("ForceLJNeigh::lj1",ntypes,ntypes); + lj2 = t_fparams("ForceLJNeigh::lj2",ntypes,ntypes); + cutsq = t_fparams("ForceLJNeigh::cutsq",ntypes,ntypes); + } + nbinx = nbiny = nbinz = 0; + N_local = 0; + nhalo = 0; + step = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); + +} + +template +void ForceLJNeigh::init_coeff(int nargs, char** args) { + step = 0; + + int one_based_type = 1; + int t1 = atoi(args[1])-one_based_type; + int t2 = atoi(args[2])-one_based_type; + double eps = atof(args[3]); + double sigma = atof(args[4]); + double cut = atof(args[5]); + + if (use_stackparams) { + for (int i = 0; i < ntypes; i++) { + for (int j = 0; j < ntypes; j++) { + stack_lj1[i][j] = 48.0 * eps * pow(sigma,12.0); + stack_lj2[i][j] = 24.0 * eps * pow(sigma,6.0); + stack_cutsq[i][j] = cut*cut; + } + } + } else { + t_fparams::HostMirror h_lj1 = Kokkos::create_mirror_view(lj1); + t_fparams::HostMirror h_lj2 = Kokkos::create_mirror_view(lj2); + t_fparams::HostMirror h_cutsq = Kokkos::create_mirror_view(cutsq); + Kokkos::deep_copy(h_lj1,lj1); + Kokkos::deep_copy(h_lj2,lj2); + Kokkos::deep_copy(h_cutsq,cutsq); + + h_lj1(t1,t2) = 48.0 * eps * pow(sigma,12.0); + h_lj2(t1,t2) = 24.0 * eps * pow(sigma,6.0); + h_lj1(t2,t1) = h_lj1(t1,t2); + h_lj2(t2,t1) = h_lj2(t1,t2); + h_cutsq(t1,t2) = cut*cut; + h_cutsq(t2,t1) = cut*cut; + + Kokkos::deep_copy(lj1,h_lj1); + Kokkos::deep_copy(lj2,h_lj2); + Kokkos::deep_copy(cutsq,h_cutsq); + + rnd_lj1 = lj1; + rnd_lj2 = lj2; + rnd_cutsq = cutsq; + } +}; + +template +void ForceLJNeigh::compute(System* system, Binning* binning, Neighbor* neighbor_ ) { + // Set internal data handles + NeighborClass* neighbor = (NeighborClass*) neighbor_; + neigh_list = neighbor->get_neigh_list(); + N_local = system->N_local; + x = system->x; + x_shmem = system->x_shmem; + x_shmem_local = t_x_shmem_local(x_shmem.data(),x_shmem.extent(1)); + f = system->f; + f_a = system->f; + type = system->type; + id = system->id; + global_index = system->global_index; + + domain_x = system->domain_x; + domain_y = system->domain_y; + domain_z = system->domain_z; + + #ifdef SHMEMTESTS_USE_HALO + #else + Kokkos::Experimental::DefaultRemoteMemorySpace::fence(); + Kokkos::parallel_for("ForceLJNeigh::compute_fill_xshmem", t_policy_compute_fill_xshmem(0,system->N_local), *this); + Kokkos::fence(); + Kokkos::Experimental::DefaultRemoteMemorySpace().fence(); + #endif + + if (use_stackparams) { + if(half_neigh) + Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_half_neigh_stackparams(0, system->N_local), *this); + else + Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_full_neigh_stackparams(0, system->N_local), *this); + } else { + if(half_neigh) + Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_half_neigh(0, system->N_local), *this); + else + Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_full_neigh(0, system->N_local), *this); + } + Kokkos::fence(); + Kokkos::Experimental::DefaultRemoteMemorySpace::fence(); + step++; +} + +template +T_V_FLOAT ForceLJNeigh::compute_energy(System* system, Binning* binning, Neighbor* neighbor_ ) { + // Set internal data handles + NeighborClass* neighbor = (NeighborClass*) neighbor_; + neigh_list = neighbor->get_neigh_list(); + MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); + N_local = system->N_local; + x = system->x; + f = system->f; + f_a = system->f; + type = system->type; + id = system->id; + T_V_FLOAT energy; + + if (use_stackparams) { + if(half_neigh) + Kokkos::parallel_reduce("ForceLJNeigh::compute_energy", t_policy_half_neigh_pe_stackparams(0, system->N_local), *this, energy); + else + Kokkos::parallel_reduce("ForceLJNeigh::compute_energy", t_policy_full_neigh_pe_stackparams(0, system->N_local), *this, energy); + } else { + if(half_neigh) + Kokkos::parallel_reduce("ForceLJNeigh::compute_energy", t_policy_half_neigh_pe(0, system->N_local), *this, energy); + else + Kokkos::parallel_reduce("ForceLJNeigh::compute_energy", t_policy_full_neigh_pe(0, system->N_local), *this, energy); + } + Kokkos::fence(); + Kokkos::Experimental::DefaultRemoteMemorySpace::fence(); + + step++; + return energy; +} + +template +const char* ForceLJNeigh::name() { return half_neigh?"ForceLJNeighHalf":"ForceLJNeighFull"; } + +template +template +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagFullNeigh, const T_INT& i) const { + const T_F_FLOAT x_i = x(i,0); + const T_F_FLOAT y_i = x(i,1); + const T_F_FLOAT z_i = x(i,2); + const int type_i = type(i); + + typename t_neigh_list::t_neighs neighs_i = neigh_list.get_neighs(i); + const int num_neighs = neighs_i.get_num_neighs(); + + T_F_FLOAT fxi = 0.0; + T_F_FLOAT fyi = 0.0; + T_F_FLOAT fzi = 0.0; + + for(int jj = 0; jj < num_neighs; jj++) { + T_INT j = neighs_i(jj); + const T_INDEX jg = global_index(j); + #ifdef SHMEMTESTS_USE_SCALAR + #ifdef SHMEMTESTS_USE_HALO + const T_X_FLOAT xj_shmem = x(j,0); + const T_X_FLOAT yj_shmem = x(j,1); + const T_X_FLOAT zj_shmem = x(j,2); + #endif + #ifdef SHMEMTESTS_USE_HALO_LOCAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x(j,0):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x(j,1):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x(j,2):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_LOCAL_GLOBAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+0]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+1]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+2]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_GLOBAL + const T_X_FLOAT xj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #else + #ifdef SHMEMTESTS_USE_GLOBAL + const double3 posj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK); + const T_X_FLOAT xj_shmem = posj_shmem.x; + const T_X_FLOAT yj_shmem = posj_shmem.y; + const T_X_FLOAT zj_shmem = posj_shmem.z; + #else + #error "Unknown configuration" + #endif + #endif + + #ifdef SHMEMTESTS_USE_HALO + const T_F_FLOAT dx = x_i - x(j,0); + const T_F_FLOAT dy = y_i - x(j,1); + const T_F_FLOAT dz = z_i - x(j,2); + #else + T_F_FLOAT dx = abs(x_i - xj_shmem)>domain_x/2? + (x_i-xj_shmem<0?x_i-xj_shmem+domain_x:x_i-xj_shmem-domain_x) + :x_i-xj_shmem; + T_F_FLOAT dy = abs(y_i - yj_shmem)>domain_y/2? + (y_i-yj_shmem<0?y_i-yj_shmem+domain_y:y_i-yj_shmem-domain_y) + :y_i-yj_shmem; + T_F_FLOAT dz = abs(z_i - zj_shmem)>domain_z/2? + (z_i-zj_shmem<0?z_i-zj_shmem+domain_z:z_i-zj_shmem-domain_z) + :z_i-zj_shmem; + #endif + + const int type_j = type(j); + const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; + + const T_F_FLOAT cutsq_ij = STACKPARAMS?stack_cutsq[type_i][type_j]:rnd_cutsq(type_i,type_j); + + if( rsq < cutsq_ij ) { + const T_F_FLOAT lj1_ij = STACKPARAMS?stack_lj1[type_i][type_j]:rnd_lj1(type_i,type_j); + const T_F_FLOAT lj2_ij = STACKPARAMS?stack_lj2[type_i][type_j]:rnd_lj2(type_i,type_j); + + T_F_FLOAT r2inv = 1.0/rsq; + T_F_FLOAT r6inv = r2inv*r2inv*r2inv; + T_F_FLOAT fpair = (r6inv * (lj1_ij*r6inv - lj2_ij)) * r2inv; + fxi += dx*fpair; + fyi += dy*fpair; + fzi += dz*fpair; + } + } + + f(i,0) += fxi; + f(i,1) += fyi; + f(i,2) += fzi; +} + +template +template +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagHalfNeigh, const T_INT& i) const { + const T_F_FLOAT x_i = x(i,0); + const T_F_FLOAT y_i = x(i,1); + const T_F_FLOAT z_i = x(i,2); + const int type_i = type(i); + + typename t_neigh_list::t_neighs neighs_i = neigh_list.get_neighs(i); + + const int num_neighs = neighs_i.get_num_neighs(); + + T_F_FLOAT fxi = 0.0; + T_F_FLOAT fyi = 0.0; + T_F_FLOAT fzi = 0.0; + for(int jj = 0; jj < num_neighs; jj++) { + T_INT j = neighs_i(jj); + const T_F_FLOAT dx = x_i - x(j,0); + const T_F_FLOAT dy = y_i - x(j,1); + const T_F_FLOAT dz = z_i - x(j,2); + + const int type_j = type(j); + const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; + + const T_F_FLOAT cutsq_ij = STACKPARAMS?stack_cutsq[type_i][type_j]:rnd_cutsq(type_i,type_j); + + if( rsq < cutsq_ij ) { + const T_F_FLOAT lj1_ij = STACKPARAMS?stack_lj1[type_i][type_j]:rnd_lj1(type_i,type_j); + const T_F_FLOAT lj2_ij = STACKPARAMS?stack_lj2[type_i][type_j]:rnd_lj2(type_i,type_j); + + T_F_FLOAT r2inv = 1.0/rsq; + T_F_FLOAT r6inv = r2inv*r2inv*r2inv; + T_F_FLOAT fpair = (r6inv * (lj1_ij*r6inv - lj2_ij)) * r2inv; + fxi += dx*fpair; + fyi += dy*fpair; + fzi += dz*fpair; + f_a(j,0) -= dx*fpair; + f_a(j,1) -= dy*fpair; + f_a(j,2) -= dz*fpair; + } + } + f_a(i,0) += fxi; + f_a(i,1) += fyi; + f_a(i,2) += fzi; + +} + +template +template +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagFullNeighPE, const T_INT& i, T_V_FLOAT& PE) const { + const T_F_FLOAT x_i = x(i,0); + const T_F_FLOAT y_i = x(i,1); + const T_F_FLOAT z_i = x(i,2); + const int type_i = type(i); + const bool shift_flag = true; + + typename t_neigh_list::t_neighs neighs_i = neigh_list.get_neighs(i); + + const int num_neighs = neighs_i.get_num_neighs(); + + for(int jj = 0; jj < num_neighs; jj++) { + T_INT j = neighs_i(jj); + const T_F_FLOAT dx = x_i - x(j,0); + const T_F_FLOAT dy = y_i - x(j,1); + const T_F_FLOAT dz = z_i - x(j,2); + + const int type_j = type(j); + const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; + + const T_F_FLOAT cutsq_ij = STACKPARAMS?stack_cutsq[type_i][type_j]:rnd_cutsq(type_i,type_j); + + if( rsq < cutsq_ij ) { + const T_F_FLOAT lj1_ij = STACKPARAMS?stack_lj1[type_i][type_j]:rnd_lj1(type_i,type_j); + const T_F_FLOAT lj2_ij = STACKPARAMS?stack_lj2[type_i][type_j]:rnd_lj2(type_i,type_j); + + T_F_FLOAT r2inv = 1.0/rsq; + T_F_FLOAT r6inv = r2inv*r2inv*r2inv; + PE += 0.5*r6inv * (0.5*lj1_ij*r6inv - lj2_ij) / 6.0; // optimize later + + if (shift_flag) { + T_F_FLOAT r2invc = 1.0/cutsq_ij; + T_F_FLOAT r6invc = r2invc*r2invc*r2invc; + PE -= 0.5*r6invc * (0.5*lj1_ij*r6invc - lj2_ij) / 6.0; // optimize later + } + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagHalfNeighPE, const T_INT& i, T_V_FLOAT& PE) const { + const T_F_FLOAT x_i = x(i,0); + const T_F_FLOAT y_i = x(i,1); + const T_F_FLOAT z_i = x(i,2); + const int type_i = type(i); + const bool shift_flag = true; + + typename t_neigh_list::t_neighs neighs_i = neigh_list.get_neighs(i); + + const int num_neighs = neighs_i.get_num_neighs(); + + for(int jj = 0; jj < num_neighs; jj++) { + T_INT j = neighs_i(jj); + const T_F_FLOAT dx = x_i - x(j,0); + const T_F_FLOAT dy = y_i - x(j,1); + const T_F_FLOAT dz = z_i - x(j,2); + + const int type_j = type(j); + const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; + + const T_F_FLOAT cutsq_ij = STACKPARAMS?stack_cutsq[type_i][type_j]:rnd_cutsq(type_i,type_j); + + if( rsq < cutsq_ij ) { + const T_F_FLOAT lj1_ij = STACKPARAMS?stack_lj1[type_i][type_j]:rnd_lj1(type_i,type_j); + const T_F_FLOAT lj2_ij = STACKPARAMS?stack_lj2[type_i][type_j]:rnd_lj2(type_i,type_j); + + T_F_FLOAT r2inv = 1.0/rsq; + T_F_FLOAT r6inv = r2inv*r2inv*r2inv; + T_F_FLOAT fac; + if(j +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagCopyLocalXShmem, const T_INT& i) const { + #ifdef SHMEMTESTS_USE_SCALAR + x_shmem_local(i,0) = x(i,0); + x_shmem_local(i,1) = x(i,1); + x_shmem_local(i,2) = x(i,2); + #else + double3 pos = {x(i,0),x(i,1),x(i,2)}; + x_shmem_local(i) = pos; + #endif +} + diff --git a/src/force_types/force_lj_neigh_impl.h b/src/force_types/force_lj_neigh_impl.h index 4740dc1..19e3fbf 100644 --- a/src/force_types/force_lj_neigh_impl.h +++ b/src/force_types/force_lj_neigh_impl.h @@ -38,10 +38,6 @@ #include -#ifdef EXAMINIMD_ENABLE_MPI -#include -#endif - template ForceLJNeigh::ForceLJNeigh(char** args, System* system, bool half_neigh_):Force(args,system,half_neigh_) { ntypes = system->ntypes; @@ -55,11 +51,6 @@ ForceLJNeigh::ForceLJNeigh(char** args, System* system, bool half N_local = 0; nhalo = 0; step = 0; - #if defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) || defined (EXAMINIMD_ENABLE_MPI) - MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); - #else - proc_rank=0; - #endif } template @@ -88,7 +79,6 @@ void ForceLJNeigh::init_coeff(int nargs, char** args) { Kokkos::deep_copy(h_lj1,lj1); Kokkos::deep_copy(h_lj2,lj2); Kokkos::deep_copy(h_cutsq,cutsq); - h_lj1(t1,t2) = 48.0 * eps * pow(sigma,12.0); h_lj2(t1,t2) = 24.0 * eps * pow(sigma,6.0); h_lj1(t2,t1) = h_lj1(t1,t2); @@ -111,30 +101,13 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig // Set internal data handles NeighborClass* neighbor = (NeighborClass*) neighbor_; neigh_list = neighbor->get_neigh_list(); - + N_local = system->N_local; x = system->x; - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - x_shmem = system->x_shmem; - x_shmem_local = t_x_shmem_local(x_shmem.data(),x_shmem.extent(1)); - #endif - f = system->f; f_a = system->f; type = system->type; id = system->id; - global_index = system->global_index; - - domain_x = system->domain_x; - domain_y = system->domain_y; - domain_z = system->domain_z; - - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - Kokkos::Experimental::DefaultRemoteMemorySpace::fence();; - Kokkos::parallel_for("ForceLJNeigh::compute_fill_xshmem", Kokkos::RangePolicy(0,system->N_local), *this); - Kokkos::fence(); - Kokkos::Experimental::DefaultRemoteMemorySpace().fence();; - #endif if (use_stackparams) { if(half_neigh) Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_half_neigh_stackparams(0, system->N_local), *this); @@ -147,10 +120,7 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_full_neigh(0, system->N_local), *this); } Kokkos::fence(); - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - Kokkos::Experimental::DefaultRemoteMemorySpace::fence();; - #endif - //x_shmem = t_x_shmem(); + step++; } @@ -159,9 +129,7 @@ T_V_FLOAT ForceLJNeigh::compute_energy(System* system, Binning* b // Set internal data handles NeighborClass* neighbor = (NeighborClass*) neighbor_; neigh_list = neighbor->get_neigh_list(); - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); - #endif + N_local = system->N_local; x = system->x; f = system->f; @@ -208,59 +176,10 @@ void ForceLJNeigh::operator() (TagFullNeigh, const T for(int jj = 0; jj < num_neighs; jj++) { T_INT j = neighs_i(jj); - //printf("Neigh: %i %i %li %li %i\n",i,j,global_index(i),global_index(j),j>N_local?1:0); - - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - - const T_INDEX jg = global_index(j); - #ifdef SHMEMTESTS_USE_SCALAR - #ifdef SHMEMTESTS_USE_HALO - const T_X_FLOAT xj_shmem = x(j,0);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); - const T_X_FLOAT yj_shmem = x(j,1);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); - const T_X_FLOAT zj_shmem = x(j,2);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); - #endif - #ifdef SHMEMTESTS_USE_HALO_LOCAL - const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x(j,0):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); - const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x(j,1):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); - const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x(j,2):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); - #endif - #ifdef SHMEMTESTS_USE_LOCAL_GLOBAL - const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+0]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); - const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+1]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); - const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+2]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); - #endif - #ifdef SHMEMTESTS_USE_GLOBAL - const T_X_FLOAT xj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); - const T_X_FLOAT yj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); - const T_X_FLOAT zj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); - #endif - #else - #ifdef SHMEMTESTS_USE_GLOBAL - const double3 posj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK); - const T_X_FLOAT xj_shmem = posj_shmem.x; - const T_X_FLOAT yj_shmem = posj_shmem.y; - const T_X_FLOAT zj_shmem = posj_shmem.z; - #endif - #endif - - //printf("DATA: %i %i %i %lf %lf %lf %i %li %i\n",id(i),jj,(int)jg,xj_shmem,yj_shmem,zj_shmem,(int)jg/N_MAX_MASK,N_MAX_MASK,int(jg%N_MAX_MASK)); - T_F_FLOAT dx = abs(x_i - xj_shmem)>domain_x/2? - (x_i-xj_shmem<0?x_i-xj_shmem+domain_x:x_i-xj_shmem-domain_x) - :x_i-xj_shmem; - T_F_FLOAT dy = abs(y_i - yj_shmem)>domain_y/2? - (y_i-yj_shmem<0?y_i-yj_shmem+domain_y:y_i-yj_shmem-domain_y) - :y_i-yj_shmem; - T_F_FLOAT dz = abs(z_i - zj_shmem)>domain_z/2? - (z_i-zj_shmem<0?z_i-zj_shmem+domain_z:z_i-zj_shmem-domain_z) - :z_i-zj_shmem; - #else //EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES const T_F_FLOAT dx = x_i - x(j,0); const T_F_FLOAT dy = y_i - x(j,1); const T_F_FLOAT dz = z_i - x(j,2); - #endif -// if((abs(dx_shmem-dx)>1e-10) || (abs(dy_shmem-dy)>1e-10) || (abs(dz_shmem-dz)>1e-10)) -// printf("Neigh: %i %i %li %li %i : %lf %lf %lf %lf %lf\n",i,j,global_index(i),global_index(j),j>N_local?1:0,x(j,0),xj_shmem,domain_x,dx,dx_shmem); const int type_j = type(j); const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; @@ -421,20 +340,3 @@ void ForceLJNeigh::operator() (TagHalfNeighPE, const } } - -template -KOKKOS_INLINE_FUNCTION -void ForceLJNeigh::operator() (TagCopyLocalXShmem, const T_INT& i) const { - //printf("CopyLocal: %i %lf %lf %lf\n",i,x(i,0),x(i,1),x(i,2)); - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - #ifdef SHMEMTESTS_USE_SCALAR - x_shmem_local(i,0) = x(i,0); - x_shmem_local(i,1) = x(i,1); - x_shmem_local(i,2) = x(i,2); - #else - double3 pos = {x(i,0),x(i,1),x(i,2)}; - x_shmem_local(i) = pos; - #endif - #endif //EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES -} - diff --git a/src/force_types/force_snap_neigh_impl.h b/src/force_types/force_snap_neigh_impl.h index c1faa3a..3f8abe5 100644 --- a/src/force_types/force_snap_neigh_impl.h +++ b/src/force_types/force_snap_neigh_impl.h @@ -203,7 +203,7 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo //printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024); int vector_length = 8; int team_size_max = Kokkos::TeamPolicy<>(nlocal,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag()); -#ifdef EMD_ENABLE_GPU +#ifdef EXAMINIMD_HAS_GPU int team_size = 20;//max_neighs; if(team_size*vector_length > team_size_max) team_size = team_size_max/vector_length; diff --git a/src/input.cpp b/src/input.cpp index 336cad7..799e04a 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -373,8 +373,7 @@ void Input::check_lammps_command(int line) { force_type = FORCE_LJ; force_cutoff = atof(input_data.words[line][2]); force_line = line; - } - if(strcmp(input_data.words[line][1],"snap")==0) { + }else if(strcmp(input_data.words[line][1],"snap")==0) { known = true; force_type = FORCE_SNAP; force_cutoff = 4.73442;// atof(input_data.words[line][2]); @@ -505,6 +504,7 @@ void Input::create_lattice(Comm* comm) { } } } + system->N_local = n; system->N = n; int global_n_max = n; @@ -522,40 +522,8 @@ void Input::create_lattice(Comm* comm) { // zero out momentum of the whole system afterwards, to eliminate // drift (bad for energy statistics) - for(T_INT iz=iz_start; iz<=iz_end; iz++) { - T_FLOAT ztmp = lattice_constant * (iz+lattice_offset_z); - for(T_INT iy=iy_start; iy<=iy_end; iy++) { - T_FLOAT ytmp = lattice_constant * (iy+lattice_offset_y); - for(T_INT ix=ix_start; ix<=ix_end; ix++) { - T_FLOAT xtmp = lattice_constant * (ix+lattice_offset_x); - if((xtmp >= s.sub_domain_lo_x) && - (ytmp >= s.sub_domain_lo_y) && - (ztmp >= s.sub_domain_lo_z) && - (xtmp < s.sub_domain_hi_x) && - (ytmp < s.sub_domain_hi_y) && - (ztmp < s.sub_domain_hi_z) ) { - n++; - } - } - } - } - global_n_max = n; - comm->reduce_max_int(&global_n_max,1); - system->grow(global_n_max); - System s = *system; - h_x = Kokkos::create_mirror_view(s.x); - h_v = Kokkos::create_mirror_view(s.v); - h_q = Kokkos::create_mirror_view(s.q); - h_type = Kokkos::create_mirror_view(s.type); - h_id = Kokkos::create_mirror_view(s.id); - n = 0; - // Initialize system using the equivalent of the LAMMPS - // velocity geom option, i.e. uniform random kinetic energies. - // zero out momentum of the whole system afterwards, to eliminate - // drift (bad for energy statistics) - for(T_INT iz=iz_start; iz<=iz_end; iz++) { T_FLOAT ztmp = lattice_constant * (iz+lattice_offset_z); for(T_INT iy=iy_start; iy<=iy_end; iy++) { @@ -658,42 +626,8 @@ void Input::create_lattice(Comm* comm) { // zero out momentum of the whole system afterwards, to eliminate // drift (bad for energy statistics) - for(T_INT iz=iz_start; iz<=iz_end; iz++) { - for(T_INT iy=iy_start; iy<=iy_end; iy++) { - for(T_INT ix=ix_start; ix<=ix_end; ix++) { - for(int k = 0; k<4; k++) { - T_FLOAT xtmp = lattice_constant * (1.0*ix+basis[k][0]); - T_FLOAT ytmp = lattice_constant * (1.0*iy+basis[k][1]); - T_FLOAT ztmp = lattice_constant * (1.0*iz+basis[k][2]); - if((xtmp >= s.sub_domain_lo_x) && - (ytmp >= s.sub_domain_lo_y) && - (ztmp >= s.sub_domain_lo_z) && - (xtmp < s.sub_domain_hi_x) && - (ytmp < s.sub_domain_hi_y) && - (ztmp < s.sub_domain_hi_z) ) { - n++; - } - } - } - } - } - global_n_max = n; - comm->reduce_max_int(&global_n_max,1); - system->grow(global_n_max); - System s = *system; - h_x = Kokkos::create_mirror_view(s.x); - h_v = Kokkos::create_mirror_view(s.v); - h_q = Kokkos::create_mirror_view(s.q); - h_type = Kokkos::create_mirror_view(s.type); - h_id = Kokkos::create_mirror_view(s.id); - n = 0; - // Initialize system using the equivalent of the LAMMPS - // velocity geom option, i.e. uniform random kinetic energies. - // zero out momentum of the whole system afterwards, to eliminate - // drift (bad for energy statistics) - for(T_INT iz=iz_start; iz<=iz_end; iz++) { for(T_INT iy=iy_start; iy<=iy_end; iy++) { for(T_INT ix=ix_start; ix<=ix_end; ix++) { @@ -730,12 +664,8 @@ void Input::create_lattice(Comm* comm) { if(system->do_print) printf("Atoms: %i %i\n",system->N,system->N_local); } - // Initialize velocity using the equivalent of the LAMMPS - // velocity geom option, i.e. uniform random kinetic energies. - // zero out momentum of the whole system afterwards, to eliminate - // drift (bad for energy statistics) - { // Scope s + { //Scope System s = *system; T_FLOAT total_mass = 0.0; T_FLOAT total_momentum_x = 0.0; diff --git a/src/main.cpp b/src/main.cpp index 9efcf4c..b879154 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -59,7 +59,6 @@ int main(int argc, char* argv[]) { ExaMiniMD examinimd; examinimd.init(argc,argv); examinimd.run(examinimd.input->nsteps); - // examinimd.check_correctness(); examinimd.print_performance(); examinimd.shutdown(); diff --git a/src/modules_force.h b/src/modules_force.h index 526d89e..e780654 100644 --- a/src/modules_force.h +++ b/src/modules_force.h @@ -38,10 +38,15 @@ // Include Module header files for force #include +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES +#include +#else #include +#endif +#include +#include -//#include -//#include +// SNAP is outdated and likely subject to removal //#ifndef KOKKOS_ENABLE_OPENMPTARGET //#include //#endif diff --git a/src/neighbor_types/neighbor_2d.cpp b/src/neighbor_types/neighbor_2d.cpp index d0b8364..8a54522 100644 --- a/src/neighbor_types/neighbor_2d.cpp +++ b/src/neighbor_types/neighbor_2d.cpp @@ -38,7 +38,7 @@ #include -#ifdef EMD_ENABLE_GPU +#ifdef EXAMINIMD_HAS_GPU template struct Neighbor2D; #endif template struct Neighbor2D; diff --git a/src/neighbor_types/neighbor_csr.cpp b/src/neighbor_types/neighbor_csr.cpp index 429d657..cc5f09a 100644 --- a/src/neighbor_types/neighbor_csr.cpp +++ b/src/neighbor_types/neighbor_csr.cpp @@ -38,7 +38,7 @@ #include -#ifdef EMD_ENABLE_GPU +#ifdef EXAMINIMD_HAS_GPU template struct NeighborCSR; #endif template struct NeighborCSR; diff --git a/src/neighbor_types/neighbor_csr.h b/src/neighbor_types/neighbor_csr.h index df27ac0..68593c1 100644 --- a/src/neighbor_types/neighbor_csr.h +++ b/src/neighbor_types/neighbor_csr.h @@ -427,7 +427,7 @@ class NeighborCSR: public Neighbor { // Create actual CSR NeighList neigh_list = t_neigh_list( - Kokkos::View( neighs, Kokkos::pair(0,total_num_neighs)), + Kokkos::View( neighs, Kokkos::pair(0,total_num_neighs)), Kokkos::View( neigh_offsets, Kokkos::pair(0,N_local+1))); } diff --git a/src/neighbor_types/neighbor_csr_map_constr.cpp b/src/neighbor_types/neighbor_csr_map_constr.cpp index f79435b..adf9a5e 100644 --- a/src/neighbor_types/neighbor_csr_map_constr.cpp +++ b/src/neighbor_types/neighbor_csr_map_constr.cpp @@ -38,7 +38,7 @@ #include -#ifdef EMD_ENABLE_GPU +#ifdef EXAMINIMD_HAS_GPU template struct NeighborCSRMapConstr; #endif template struct NeighborCSRMapConstr; diff --git a/src/system.cpp b/src/system.cpp index 744c53c..c0d7590 100644 --- a/src/system.cpp +++ b/src/system.cpp @@ -52,7 +52,9 @@ System::System() { v = t_v(); f = t_f(); id = t_id(); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES global_index = t_index(); + #endif type = t_type(); q = t_q(); mass = t_mass(); @@ -61,7 +63,7 @@ System::System() { sub_domain_hi_x = sub_domain_hi_y = sub_domain_hi_z = 0.0; sub_domain_lo_x = sub_domain_lo_y = sub_domain_lo_z = 0.0; mvv2e = boltz = dt = 0.0; -#if defined(EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + #if defined (EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) int proc_rank; MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); do_print = proc_rank == 0; @@ -76,7 +78,9 @@ void System::init() { v = t_v("System::v",N_max); f = t_f("System::f",N_max); id = t_id("System::id",N_max); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES global_index = t_index("System::global_index",N_max); + #endif type = t_type("System::type",N_max); q = t_q("System::q",N_max); mass = t_mass("System::mass",ntypes); @@ -91,7 +95,9 @@ void System::destroy() { v = t_v(); f = t_f(); id = t_id(); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES global_index = t_index(); + #endif type = t_type(); q = t_q(); mass = t_mass(); @@ -105,18 +111,34 @@ void System::grow(T_INT N_new) { Kokkos::resize(v,N_max); // Velocities Kokkos::resize(f,N_max); // Forces Kokkos::resize(id,N_max); // Id + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES Kokkos::resize(global_index,N_max); // Id + #endif Kokkos::resize(type,N_max); // Particle Type Kokkos::resize(q,N_max); // Charge - #ifdef EXAMINIMD_ENABLE_USE_KOKKOS_REMOTE_SPACES + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES int num_ranks; MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); - x_shmem = t_x_shmem("X_shmem", num_ranks, N_max); + x_shmem = t_x_shmem("X_shmem", num_ranks, N_max); // Positions (distrib) #endif } } +void System::print_particles_from_device_data() +{ + printf("Print all particles (GPU): \n"); + printf(" Owned: %d\n",N_local); + Kokkos::parallel_for("print_particles_2", N_local, KOKKOS_LAMBDA(int i){ + printf(" %d %lf %lf %lf | %lf %lf %lf | %lf %lf %lf | %d %e\n",i, + double(x(i,0)),double(x(i,1)),double(x(i,2)), + double(v(i,0)),double(v(i,1)),double(v(i,2)), + double(f(i,0)),double(f(i,1)),double(f(i,2)), + type(i),q(i)); + }); + Kokkos::fence(); +} + void System::print_particles() { printf("Print all particles: \n"); printf(" Owned: %d\n",N_local); diff --git a/src/system.h b/src/system.h index 1be1da3..0dae9d4 100644 --- a/src/system.h +++ b/src/system.h @@ -67,18 +67,19 @@ class System { // Per Particle Property t_x x; // Positions + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + t_x_shmem x_shmem; // Positions + #endif t_v v; // Velocities t_f f; // Forces t_type type; // Particle Type t_id id; // Particle ID - t_index global_index; // Index for PGAS indexing - - t_q q; // Charge - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - t_x_shmem x_shmem; + t_index global_index; // Index for distibuted view indexing #endif + + t_q q; // Charge // Per Type Property t_mass mass; @@ -114,7 +115,9 @@ class System { p.q = q(i); p.id = id(i); p.type = type(i); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES p.global_index = global_index(i); + #endif return p; } @@ -125,7 +128,9 @@ class System { q(i) = p.q; id(i) = p.id; type(i) = p.type; + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES global_index(i) = p.global_index; + #endif } KOKKOS_INLINE_FUNCTION @@ -139,7 +144,9 @@ class System { type(dest) = type(src); id(dest) = id(src); q(dest) = q(src); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES global_index(dest) = global_index(src); + #endif } KOKKOS_INLINE_FUNCTION @@ -150,5 +157,6 @@ class System { } void print_particles(); + void print_particles_from_device_data(); }; #endif diff --git a/src/types.h b/src/types.h index b0b17a4..e476863 100644 --- a/src/types.h +++ b/src/types.h @@ -65,7 +65,9 @@ enum {NEIGH_NONE, NEIGH_CSR, NEIGH_CSR_MAPCONSTR, NEIGH_2D}; // Input File Type enum {INPUT_LAMMPS}; +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES enum INDEX_TYPE: int64_t { N_MAX_MASK = 1024*1024*1024 }; +#endif // Macros to work around the fact that std::max/min is not available on GPUs #define MAX(a,b) (a>b?a:b) @@ -126,8 +128,10 @@ typedef Kokkos::View> t_type_const_rnd; // Type (int is enough as type) typedef Kokkos::View t_id; // ID typedef Kokkos::View t_id_const; // ID +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES typedef Kokkos::View t_index; // ID typedef Kokkos::View t_index_const; // ID +#endif typedef Kokkos::View t_q; // Charge typedef Kokkos::View t_q_const; // Charge @@ -202,7 +206,7 @@ t_scalar3 operator * } #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_OPENMPTARGET) || defined(KOKKOS_ENABLE_SYCL) -#define EMD_ENABLE_GPU +#define EXAMINIMD_HAS_GPU #endif #endif