diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..613bd7e --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required(VERSION 3.16) + +project(ExaMiniMD LANGUAGES CXX) + +include(GNUInstallDirs) + +find_package(Kokkos REQUIRED) + +option(ENABLE_MPI "Whether to build with MPI" OFF) +option(ENABLE_KOKKOS_REMOTE_SPACES "Whether to build with Kokkos Remote Spaces" OFF) + +if (ENABLE_MPI) + find_package(MPI REQUIRED CXX) + message(STATUS "Building with MPI support") + set(BACKEND_NAME MPI) + list(APPEND BACKENDS ${BACKEND_NAME}) +endif() + +if (ENABLE_KOKKOS_REMOTE_SPACES) + find_package(MPI REQUIRED CXX) + find_package(KokkosRemoteSpaces REQUIRED) + message(STATUS "Building with Kokkos Remote Spaces support") + set(BACKEND_NAME KokkosRemoteSpaces) + list(APPEND BACKENDS ${BACKEND_NAME}) +endif() + +list(LENGTH BACKENDS N_BACKENDS) +if (${N_BACKENDS} GREATER_EQUAL "2") + message(FATAL_ERROR "Must specify a single valid communiction implementation. ${N_BACKENDS} given") +endif() + +if (${N_BACKENDS} EQUAL "0") + message(STATUS "Building with serial comm type") +endif() + + +add_subdirectory(src) +add_subdirectory(input) diff --git a/input/CMakeLists.txt b/input/CMakeLists.txt new file mode 100644 index 0000000..72a74b5 --- /dev/null +++ b/input/CMakeLists.txt @@ -0,0 +1,8 @@ +set(FILES + in.lj) + +foreach(FILE ${FILES}) + configure_file(${FILE} ${FILE} COPYONLY) +endforeach() + +add_subdirectory(snap) diff --git a/input/in.lj b/input/in.lj index 85725b8..c951cf0 100644 --- a/input/in.lj +++ b/input/in.lj @@ -1,11 +1,11 @@ -# 3d Lennard-Jones melt +# Example using Lennard-Jones potential units lj atom_style atomic newton off lattice fcc 0.8442 -region box block 0 40 0 40 0 40 +region box block 0 100 0 100 0 100 create_box 1 box create_atoms 1 box mass 1 2.0 @@ -16,8 +16,8 @@ pair_style lj/cut 2.5 pair_coeff 1 1 1.0 1.0 2.5 neighbor 0.3 bin -neigh_modify delay 0 every 20 check no +neigh_modify every 20 one 50 fix 1 all nve thermo 10 -run 100 +run 100 diff --git a/input/snap/CMakeLists.txt b/input/snap/CMakeLists.txt new file mode 100644 index 0000000..ce700e4 --- /dev/null +++ b/input/snap/CMakeLists.txt @@ -0,0 +1,12 @@ +set(FILES + in.snap.Ta06A + in.snap.W + Ta06A.snap + Ta06A.snapcoeff + Ta06A.snapparam + W.snapcoeff + W.snapparam) + +foreach(FILE ${FILES}) + configure_file(${FILE} ${FILE} COPYONLY) +endforeach() diff --git a/input/snap/in.snap.W b/input/snap/in.snap.W index b19472d..9e77c54 100644 --- a/input/snap/in.snap.W +++ b/input/snap/in.snap.W @@ -8,7 +8,7 @@ newton on # generate the box and atom positions using a BCC lattice lattice sc 3.1803 -region box block 0 4 0 8 0 8 +region box block 0 8 0 8 0 8 create_box 1 box create_atoms 1 box @@ -27,7 +27,7 @@ thermo 10 # Set up NVE run neighbor 1.0 bin -neigh_modify once no every 1 delay 0 check yes +neigh_modify once no every 20 delay 0 check yes # Run MD diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..8c81768 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,21 @@ +FILE(GLOB SRCS *.cpp) +add_executable(ExaMiniMD ${SRCS}) + +set(SUBDIRECTORIES + binning_types + comm_types + force_types + neighbor_types) + +foreach(SUBDIR ${SUBDIRECTORIES}) + add_subdirectory(${SUBDIR}) +endforeach() + +if(ENABLE_MPI) + target_compile_definitions(ExaMiniMD PRIVATE EXAMINIMD_ENABLE_MPI) +endif() + +target_include_directories(ExaMiniMD PRIVATE ${Kokkos_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${SUBDIRECTORIES}) +target_link_libraries(ExaMiniMD PRIVATE $<$:MPI::MPI_CXX> Kokkos::kokkos $<$:Kokkos::kokkosremotespaces>) + +install(TARGETS ExaMiniMD DESTINATION ${CMAKE_INSTALL_BINDIR}) \ No newline at end of file diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index e763ae9..0000000 --- a/src/Makefile +++ /dev/null @@ -1,82 +0,0 @@ -#Kokkos root path -KOKKOS_PATH = ${HOME}/kokkos - -#Backend and architecture configuration -KOKKOS_DEVICES=OpenMP -KOKKOS_ARCH = "" - -#MPI On or off (1/0) -MPI = 1 - -MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) -EXAMINIMD_PATH = $(subst Makefile,,$(MAKEFILE_PATH))/.. - -default: build - echo "Start Build" - - -EXE = ExaMiniMD -CXXFLAGS = -O3 -g -LINKFLAGS = -O3 -g - -ifeq ($(MPI), 1) - CXX = mpicxx - CXXFLAGS += -DEXAMINIMD_ENABLE_MPI -else - ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) - CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper - else - CXX = g++ - endif -endif - -LINK = ${CXX} - -KOKKOS_OPTIONS=aggressive_vectorization - -DEPFLAGS = -M - -vpath %.cpp $(EXAMINIMD_PATH)/src -SRC = $(wildcard $(EXAMINIMD_PATH)/src/*.cpp) -HEADERS = $(wildcard $(EXAMINIMD_PATH)/src/*.h) -CXXFLAGS += -I$(EXAMINIMD_PATH)/src - -vpath %.cpp $(EXAMINIMD_PATH)/src/force_types -SRC += $(wildcard $(EXAMINIMD_PATH)/src/force_types/*.cpp) -HEADERS += $(wildcard $(EXAMINIMD_PATH)/src/force_types/*.h) -CXXFLAGS += -I$(EXAMINIMD_PATH)/src/force_types - -vpath %.cpp $(EXAMINIMD_PATH)/src/comm_types -SRC += $(wildcard $(EXAMINIMD_PATH)/src/comm_types/*.cpp) -HEADERS += $(wildcard $(EXAMINIMD_PATH)/src/comm_types/*.h) -CXXFLAGS += -I$(EXAMINIMD_PATH)/src/comm_types - -vpath %.cpp $(EXAMINIMD_PATH)/src/neighbor_types -SRC += $(wildcard $(EXAMINIMD_PATH)/src/neighbor_types/*.cpp) -HEADERS += $(wildcard $(EXAMINIMD_PATH)/src/neighbor_types/*.h) -CXXFLAGS += -I$(EXAMINIMD_PATH)/src/neighbor_types - -vpath %.cpp $(EXAMINIMD_PATH)/src/binning_types -SRC += $(wildcard $(EXAMINIMD_PATH)/src/binning_types/*.cpp) -HEADERS += $(wildcard $(EXAMINIMD_PATH)/src/binning_types/*.h) -CXXFLAGS += -I$(EXAMINIMD_PATH)/src/binning_types - -SRC_NOTDIR = $(notdir $(SRC)) -OBJ = $(SRC_NOTDIR:.cpp=.o) -LIB = - -include $(KOKKOS_PATH)/Makefile.kokkos - -build: $(EXE) - -$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) - -clean: kokkos-clean - rm -f *.o $(EXE) - -# Compilation rules - -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) ${HEADERS} - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< - diff --git a/src/binning_types/CMakeLists.txt b/src/binning_types/CMakeLists.txt new file mode 100644 index 0000000..242043b --- /dev/null +++ b/src/binning_types/CMakeLists.txt @@ -0,0 +1,2 @@ +FILE(GLOB SRCS *.cpp) +target_sources(ExaMiniMD PRIVATE ${SRCS}) diff --git a/src/binning_types/binning_kksort.cpp b/src/binning_types/binning_kksort.cpp index a7fdefc..535ab85 100644 --- a/src/binning_types/binning_kksort.cpp +++ b/src/binning_types/binning_kksort.cpp @@ -71,7 +71,7 @@ namespace { void BinningKKSort::create_binning(T_X_FLOAT dx_in, T_X_FLOAT dy_in, T_X_FLOAT dz_in, int halo_depth, bool do_local, bool do_ghost, bool sort) { if(do_local||do_ghost) { nhalo = halo_depth; - std::pair range(do_local?0:system->N_local, + Kokkos::pair range(do_local?0:system->N_local, do_ghost?system->N_local+system->N_ghost:system->N_local); nbinx = T_INT(system->sub_domain_x/dx_in); diff --git a/src/comm_lib.cpp b/src/comm_lib.cpp new file mode 100644 index 0000000..359de92 --- /dev/null +++ b/src/comm_lib.cpp @@ -0,0 +1,88 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#include +#include + +#if EXAMINIMD_ENABLE_MPI +#include +#endif + +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES +#include +#endif + +void comm_lib_init(int argc, char* argv[]) { +#if defined (EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + int mpi_thread_level_available; + int mpi_thread_level_required = MPI_THREAD_MULTIPLE; + +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL + mpi_thread_level_required = MPI_THREAD_SINGLE; +#endif + + MPI_Init_thread(&argc, &argv, mpi_thread_level_required, + &mpi_thread_level_available); + assert(mpi_thread_level_available >= mpi_thread_level_required); + +#ifdef KRS_ENABLE_SHMEMSPACE + shmem_init_thread(mpi_thread_level_required, &mpi_thread_level_available); + assert(mpi_thread_level_available >= mpi_thread_level_required); +#endif + +#ifdef KRS_ENABLE_NVSHMEMSPACE + MPI_Comm mpi_comm; + nvshmemx_init_attr_t attr; + mpi_comm = MPI_COMM_WORLD; + attr.mpi_comm = &mpi_comm; + nvshmemx_init_attr(NVSHMEMX_INIT_WITH_MPI_COMM, &attr); +#endif +} + +void comm_lib_finalize() { +#if defined (EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) +#ifdef KRS_ENABLE_SHMEMSPACE + shmem_finalize(); +#endif +#ifdef KRS_ENABLE_NVSHMEMSPACE + nvshmem_finalize(); +#endif + MPI_Finalize(); +#endif +#endif +} diff --git a/src/comm_lib.h b/src/comm_lib.h new file mode 100644 index 0000000..8985856 --- /dev/null +++ b/src/comm_lib.h @@ -0,0 +1,47 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#pragma once + +#ifndef COMM_INIT_H +#define COMM_INIT_H + +void comm_lib_init(int argc, char* argv[]); +void comm_lib_finalize(); + +#endif diff --git a/src/comm_types/CMakeLists.txt b/src/comm_types/CMakeLists.txt new file mode 100644 index 0000000..a4b281b --- /dev/null +++ b/src/comm_types/CMakeLists.txt @@ -0,0 +1,3 @@ +FILE(GLOB SRCS *.cpp) +target_sources(ExaMiniMD PRIVATE ${SRCS}) +target_sources(ExaMiniMD PRIVATE ${SRCS}) diff --git a/src/comm_types/comm_mpi.cpp b/src/comm_types/comm_mpi.cpp index 45628df..4fed510 100644 --- a/src/comm_types/comm_mpi.cpp +++ b/src/comm_types/comm_mpi.cpp @@ -36,7 +36,9 @@ // Questions? Contact Christian R. Trott (crtrott@sandia.gov) //************************************************************************ -#ifdef EXAMINIMD_ENABLE_MPI +#include + +#if defined(EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) #include CommMPI::CommMPI(System* s, T_X_FLOAT comm_depth):Comm(s,comm_depth) { @@ -197,14 +199,12 @@ void CommMPI::exchange() { s = *system; N_local = system->N_local; N_ghost = 0; - //printf("System A: %i %lf %lf %lf %i\n",s.N_local,s.x(21,0),s.x(21,1),s.x(21,2),s.type(21)); Kokkos::parallel_for("CommMPI::exchange_self", Kokkos::RangePolicy >(0,N_local), *this); T_INT N_total_recv = 0; T_INT N_total_send = 0; - //printf("System B: %i %lf %lf %lf %i\n",s.N_local,s.x(21,0),s.x(21,1),s.x(21,2),s.type(21)); for(phase = 0; phase < 6; phase ++) { proc_num_send[phase] = 0; proc_num_recv[phase] = 0; @@ -245,7 +245,9 @@ void CommMPI::exchange() { MPI_Irecv(unpack_buffer.data(),proc_num_recv[phase]*sizeof(Particle)/sizeof(int),MPI_INT, proc_neighbors_recv[phase],100002,MPI_COMM_WORLD,&request); if(proc_num_send[phase]>0) MPI_Send (pack_buffer.data(),proc_num_send[phase]*sizeof(Particle)/sizeof(int),MPI_INT, proc_neighbors_send[phase],100002,MPI_COMM_WORLD); - system->grow(N_local + N_ghost + count); + int global_n_max = N_local + N_ghost + count; + reduce_max_int(&global_n_max,1); + system->grow(global_n_max); s = *system; if(proc_num_recv[phase]>0) MPI_Wait(&request,&status); @@ -292,6 +294,10 @@ void CommMPI::exchange_halo() { Kokkos::Profiling::pushRegion("Comm::exchange_halo"); + Kokkos::parallel_for("CommMPI::halo_exchange_create_global_indicies", + Kokkos::RangePolicy >(0,N_local), + *this); + N_local = system->N_local; N_ghost = 0; @@ -332,7 +338,9 @@ void CommMPI::exchange_halo() { } MPI_Irecv(unpack_buffer.data(),proc_num_recv[phase]*sizeof(Particle)/sizeof(int),MPI_INT, proc_neighbors_recv[phase],100002,MPI_COMM_WORLD,&request); MPI_Send (pack_buffer.data(),proc_num_send[phase]*sizeof(Particle)/sizeof(int),MPI_INT, proc_neighbors_send[phase],100002,MPI_COMM_WORLD); - system->grow(N_local + N_ghost + count); + int global_n_max = N_local + N_ghost + count; + reduce_max_int(&global_n_max,1); + system->grow(global_n_max); s = *system; MPI_Wait(&request,&status); Kokkos::parallel_for("CommMPI::halo_exchange_unpack", @@ -346,8 +354,10 @@ void CommMPI::exchange_halo() { *this); Kokkos::deep_copy(count,pack_count); bool redo = false; - if(N_local+N_ghost+count>s.x.extent(0)) { - system->grow(N_local + N_ghost + count); + int global_n_max = N_local + N_ghost + count; + reduce_max_int(&global_n_max,1); + if(global_n_max>s.x.extent(0)) { + system->grow(global_n_max); s = *system; redo = true; } @@ -381,6 +391,9 @@ void CommMPI::exchange_halo() { void CommMPI::update_halo() { +#if !defined(SHMEMTESTS_USE_HALO) && defined(EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + return; +#else Kokkos::Profiling::pushRegion("Comm::update_halo"); N_ghost = 0; @@ -420,6 +433,7 @@ void CommMPI::update_halo() { } Kokkos::Profiling::popRegion(); +#endif }; void CommMPI::update_force() { @@ -465,7 +479,13 @@ void CommMPI::update_force() { Kokkos::Profiling::popRegion(); }; -const char* CommMPI::name() { return "CommMPI"; } +const char* CommMPI::name() { + comm_name = std::string("CommMPI"); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + comm_name += "Distrib"; + #endif + return comm_name.c_str(); +} int CommMPI::process_rank() { return proc_rank; } int CommMPI::num_processes() { return proc_size; } diff --git a/src/comm_types/comm_mpi.h b/src/comm_types/comm_mpi.h index 40f9f4c..3515479 100644 --- a/src/comm_types/comm_mpi.h +++ b/src/comm_types/comm_mpi.h @@ -52,11 +52,13 @@ #define COMM_MPI_H #include -#ifndef EXAMINIMD_ENABLE_MPI +#if !defined(EXAMINIMD_ENABLE_MPI) && !defined(EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) #error "Trying to compile CommMPI without MPI" #endif +#ifdef EXAMINIMD_ENABLE_MPI #include "mpi.h" +#endif class CommMPI: public Comm { @@ -68,7 +70,6 @@ class CommMPI: public Comm { System s; // Owned Variables - int phase; // Communication Phase int proc_neighbors_recv[6]; // Neighbor for each phase int proc_neighbors_send[6]; // Neighbor for each phase @@ -79,6 +80,8 @@ class CommMPI: public Comm { int proc_rank; // My Process rank int proc_size; // Number of processes + std::string comm_name; + T_INT num_ghost[6]; T_INT ghost_offsets[6]; @@ -115,6 +118,8 @@ class CommMPI: public Comm { struct TagPermuteIndexList {}; + struct TagCreateGlobalIndecies {}; + CommMPI(System* s, T_X_FLOAT comm_depth_); void init(); void create_domain_decomposition(); @@ -496,6 +501,14 @@ class CommMPI: public Comm { s.f(i, 2) += pack_buffer_update(ii, 2); } + KOKKOS_INLINE_FUNCTION + void operator() (const TagCreateGlobalIndecies, + const T_INT& i) const { + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + s.global_index(i) = N_MAX_MASK * proc_rank + i; + #endif + } + const char* name(); int process_rank(); int num_processes(); diff --git a/src/comm_types/comm_serial.h b/src/comm_types/comm_serial.h index 067a91a..dd11508 100644 --- a/src/comm_types/comm_serial.h +++ b/src/comm_types/comm_serial.h @@ -39,6 +39,12 @@ #ifdef MODULES_OPTION_CHECK if( (strcmp(argv[i+1], "SERIAL") == 0) ) comm_type = COMM_SERIAL; + #if !defined(EXAMINIMD_ENABLE_MPI) && !defined(EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + if( (strcmp(argv[i+1], "SERIAL") != 0) ){ + printf("ERROR: Unsupported Comm-type selected\n"); + exit(1); + } + #endif #endif #ifdef COMM_MODULES_INSTANTIATION else if(input->comm_type == COMM_SERIAL) { @@ -46,7 +52,6 @@ } #endif - #if !defined(MODULES_OPTION_CHECK) && !defined(COMM_MODULES_INSTANTIATION) #ifndef COMM_SERIAL_H #define COMM_SERIAL_H diff --git a/src/examinimd.cpp b/src/examinimd.cpp index 52d431c..7ea9e4b 100644 --- a/src/examinimd.cpp +++ b/src/examinimd.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #define MAXPATHLEN 1024 @@ -59,18 +60,18 @@ ExaMiniMD::ExaMiniMD() { void ExaMiniMD::init(int argc, char* argv[]) { if(system->do_print) - Kokkos::DefaultExecutionSpace::print_configuration(std::cout); + Kokkos::DefaultExecutionSpace{}.print_configuration(std::cout); // Lets parse the command line arguments input->read_command_line_args(argc,argv); - // Read input file + // Read input file input->read_file(); // Now we know which integrator type to use if(input->integrator_type == INTEGRATOR_NVE) integrator = new IntegratorNVE(system); - + // Fill some binning if(input->binning_type == BINNING_KKSORT) binning = new BinningKKSort(system); @@ -81,9 +82,7 @@ void ExaMiniMD::init(int argc, char* argv[]) { #include #undef FORCE_MODULES_INSTANTIATION else comm->error("Invalid ForceType"); - for(int line = 0; line < input->force_coeff_lines.dimension_0(); line++) { - //input->input_data.print_line(input->force_coeff_lines(line)); - //printf("init_coeff: %i %i\n",line,input->input_data.words_in_line(input->force_coeff_lines(line))); + for(int line = 0; line < input->force_coeff_lines.extent(0); line++) { force->init_coeff(input->input_data.words_in_line(input->force_coeff_lines(line)), input->input_data.words[input->force_coeff_lines(line)]); } @@ -107,7 +106,6 @@ void ExaMiniMD::init(int argc, char* argv[]) { if(neighbor) neighbor->comm_newton = input->comm_newton; - // system->print_particles(); if(system->do_print) { printf("Using: %s %s %s %s\n",force->name(),neighbor->name(),comm->name(),binning->name()); } @@ -117,7 +115,7 @@ void ExaMiniMD::init(int argc, char* argv[]) { input->create_lattice(comm); // Create the Halo - comm->exchange(); + comm->exchange(); // Sort particles T_F_FLOAT neigh_cutoff = input->force_cutoff + input->neighbor_skin; @@ -141,7 +139,7 @@ void ExaMiniMD::init(int argc, char* argv[]) { // Reverse Communicate Force Update on Halo comm->update_force(); } - + // Initial output int step = 0; if(input->thermo_rate > 0) { @@ -159,7 +157,7 @@ void ExaMiniMD::init(int argc, char* argv[]) { } else { printf("\n"); printf("Step Temp E_pair TotEng CPU\n"); - printf(" %i %lf %lf %lf %lf %e\n",step,T,PE,PE+KE,0.0); + printf("%i %lf %lf %lf %lf\n",step,T,PE,PE+KE,0.0); } } } @@ -169,7 +167,6 @@ void ExaMiniMD::init(int argc, char* argv[]) { if(input->correctnessflag) check_correctness(step); - } void ExaMiniMD::run(int nsteps) { @@ -189,8 +186,7 @@ void ExaMiniMD::run(int nsteps) { // Timestep Loop for(int step = 1; step <= nsteps; step++ ) { - - // Do first part of the verlet time step integration + // Do first part of the verlet time step integration other_timer.reset(); integrator->initial_integrate(); other_time += other_timer.seconds(); @@ -198,7 +194,7 @@ void ExaMiniMD::run(int nsteps) { if(step%input->comm_exchange_rate==0 && step >0) { // Exchange particles comm_timer.reset(); - comm->exchange(); + comm->exchange(); comm_time += comm_timer.seconds(); // Sort particles @@ -210,7 +206,7 @@ void ExaMiniMD::run(int nsteps) { comm_timer.reset(); comm->exchange_halo(); comm_time += comm_timer.seconds(); - + // Create binning for neighborlist construction neigh_timer.reset(); binning->create_binning(neigh_cutoff,neigh_cutoff,neigh_cutoff,1,true,true,false); @@ -220,22 +216,23 @@ void ExaMiniMD::run(int nsteps) { neighbor->create_neigh_list(system,binning,force->half_neigh,false); neigh_time += neigh_timer.seconds(); } else { - // Exchange Halo + // Exchange Halo data comm_timer.reset(); comm->update_halo(); comm_time += comm_timer.seconds(); } + Kokkos::Experimental::DefaultRemoteMemorySpace::fence(); - // Zero out forces + // Zero out forces force_timer.reset(); Kokkos::deep_copy(system->f,0.0); - + // Compute Short Range Force force->compute(system,binning,neighbor); force_time += force_timer.seconds(); - // This is where Bonds, Angles and KSpace should go eventually - + // This is where Bonds, Angles and KSpace should go eventually + // Reverse Communicate Force Update on Halo if(input->comm_newton) { comm_timer.reset(); @@ -243,7 +240,7 @@ void ExaMiniMD::run(int nsteps) { comm_time += comm_timer.seconds(); } - // Do second part of the verlet time step integration + // Do second part of the verlet time step integration other_timer.reset(); integrator->final_integrate(); @@ -259,7 +256,7 @@ void ExaMiniMD::run(int nsteps) { last_time = time; } else { double time = timer.seconds(); - printf(" %i %lf %lf %lf %lf\n",step, T, PE, PE+KE, timer.seconds()); + printf("%i %lf %lf %lf %lf\n",step, T, PE, PE+KE, timer.seconds()); last_time = time; } } @@ -267,7 +264,7 @@ void ExaMiniMD::run(int nsteps) { if(input->dumpbinaryflag) dump_binary(step); - + if(input->correctnessflag) check_correctness(step); @@ -297,10 +294,10 @@ void ExaMiniMD::dump_binary(int step) { // On dump steps print configuration if(step%input->dumpbinary_rate) return; - + FILE* fp; T_INT n = system->N_local; - + char* filename = new char[MAXPATHLEN]; sprintf(filename,"%s%s.%010d.%03d",input->dumpbinary_path, "/output",step,comm->process_rank()); @@ -340,7 +337,7 @@ void ExaMiniMD::dump_binary(int step) { fwrite(o_x.data(),sizeof(T_X_FLOAT),3*n,fp); fwrite(o_v.data(),sizeof(T_V_FLOAT),3*n,fp); fwrite(o_f.data(),sizeof(T_F_FLOAT),3*n,fp); - + fclose(fp); } @@ -358,7 +355,7 @@ void ExaMiniMD::check_correctness(int step) { FILE* fpref; T_INT n = system->N_local; T_INT ntmp; - + char* filename = new char[MAXPATHLEN]; sprintf(filename,"%s%s.%010d.%03d",input->reference_path, "/output",step,comm->process_rank()); @@ -368,25 +365,25 @@ void ExaMiniMD::check_correctness(int step) { sprintf(str,"Cannot open input file %s",filename); comm->error(str); } - + fread(&ntmp,sizeof(T_INT),1,fpref); - if (ntmp != n) + if (ntmp != n) comm->error("Mismatch in current and reference atom counts"); - + t_id idref = t_id("Correctness::id",n); t_type typeref = t_type("Correctness::type",n); t_q qref = t_q("Correctness::q",n); t_x xref = t_x("Correctness::x",n); t_v vref = t_v("Correctness::v",n); t_f fref = t_f("Correctness::f",n); - + fread(idref.data(),sizeof(T_INT),n,fpref); - fread(typeref.data(),sizeof(T_INT),n,fpref); + fread(typeref.data(),sizeof(T_INT),n,fpref); fread(qref.data(),sizeof(T_FLOAT),n,fpref); fread(xref.data(),sizeof(T_X_FLOAT),3*n,fpref); fread(vref.data(),sizeof(T_V_FLOAT),3*n,fpref); fread(fref.data(),sizeof(T_F_FLOAT),3*n,fpref); - + T_FLOAT sumdelrsq = 0.0; T_FLOAT sumdelvsq = 0.0; T_FLOAT sumdelfsq = 0.0; @@ -395,7 +392,7 @@ void ExaMiniMD::check_correctness(int step) { T_FLOAT maxdelf = 0.0; for (int i = 0; i < n; i++) { int ii = -1; - if (system->id(i) != idref(i)) + if (system->id(i) != idref(i)) for (int j = 0; j < n; j++) { if (system->id(j) == idref(i)) { ii = j; @@ -404,7 +401,7 @@ void ExaMiniMD::check_correctness(int step) { } else ii = i; - + if (ii == -1) printf("Unable to find current id matchinf reference id %d \n",idref(i)); else { @@ -417,7 +414,7 @@ void ExaMiniMD::check_correctness(int step) { maxdelr = MAX(fabs(delx),maxdelr); maxdelr = MAX(fabs(dely),maxdelr); maxdelr = MAX(fabs(delz),maxdelr); - + delx = system->v(ii,0)-vref(i,0); dely = system->v(ii,1)-vref(i,1); delz = system->v(ii,2)-vref(i,2); @@ -426,7 +423,7 @@ void ExaMiniMD::check_correctness(int step) { maxdelv = MAX(fabs(delx),maxdelv); maxdelv = MAX(fabs(dely),maxdelv); maxdelv = MAX(fabs(delz),maxdelv); - + delx = system->f(ii,0)-fref(i,0); dely = system->f(ii,1)-fref(i,1); delz = system->f(ii,2)-fref(i,2); diff --git a/src/examinimd.h b/src/examinimd.h index 4738703..1fa6821 100644 --- a/src/examinimd.h +++ b/src/examinimd.h @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -57,16 +58,11 @@ class ExaMiniMD { Binning* binning; ExaMiniMD(); - void init(int argc,char* argv[]); - void run(int nsteps); - void dump_binary(int); void check_correctness(int); - void print_performance(); - void shutdown(); }; diff --git a/src/force_types/CMakeLists.txt b/src/force_types/CMakeLists.txt new file mode 100644 index 0000000..9c06a2c --- /dev/null +++ b/src/force_types/CMakeLists.txt @@ -0,0 +1,25 @@ +FILE(GLOB SRCS *.cpp) + +#Skip snap and cell +#TODO: SNAP is outdates and should likely be removed all together as it is +list(FILTER SRCS EXCLUDE REGEX ".*lj_cell\\.cpp$") +list(FILTER SRCS EXCLUDE REGEX ".*snap_neigh\\.cpp$") + +# Skip force-type module if Kokkos Remote Spaces is not enabled +if (ENABLE_KOKKOS_REMOTE_SPACES) + message(STATUS "Building with support for force_lj_neigh_distrib") + list(FILTER SRCS EXCLUDE REGEX ".*lj_neigh\\.cpp$") + target_compile_definitions(ExaMiniMD PRIVATE EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_SCALAR) + #target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_HALO) + #target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_HALO_LOCAL) + #target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_LOCAL_GLOBAL) + target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_GLOBAL) +else() + #Otherwise exclude + list(FILTER SRCS EXCLUDE REGEX ".*distrib\\.cpp$") +endif() + + + +target_sources(ExaMiniMD PRIVATE ${SRCS}) diff --git a/src/force_types/force_lj_idial_neigh.h b/src/force_types/force_lj_idial_neigh.h index c0d298a..4164cd8 100644 --- a/src/force_types/force_lj_idial_neigh.h +++ b/src/force_types/force_lj_idial_neigh.h @@ -39,9 +39,8 @@ #ifdef MODULES_OPTION_CHECK if( (strcmp(argv[i+1], "NEIGH_FULL") == 0) ) force_iteration_type = FORCE_ITER_NEIGH_FULL; - if( (strcmp(argv[i+1], "NEIGH_HALF") == 0) ) { + if( (strcmp(argv[i+1], "NEIGH_HALF") == 0) ) force_iteration_type = FORCE_ITER_NEIGH_HALF; - } #endif #ifdef FORCE_MODULES_INSTANTIATION else if (input->force_type == FORCE_LJ_IDIAL) { @@ -54,7 +53,6 @@ } #endif - #if !defined(MODULES_OPTION_CHECK) && \ !defined(FORCE_MODULES_INSTANTIATION) diff --git a/src/force_types/force_lj_neigh_distrib.cpp b/src/force_types/force_lj_neigh_distrib.cpp new file mode 100644 index 0000000..c286e43 --- /dev/null +++ b/src/force_types/force_lj_neigh_distrib.cpp @@ -0,0 +1,43 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#include +#define FORCETYPE_DECLARE_TEMPLATE_MACRO(NeighType) ForceLJNeigh +#define FORCE_MODULES_TEMPLATE +#include +#undef FORCE_MODULES_TEMPLATE diff --git a/src/force_types/force_lj_neigh_distrib.h b/src/force_types/force_lj_neigh_distrib.h new file mode 100644 index 0000000..d2e8a11 --- /dev/null +++ b/src/force_types/force_lj_neigh_distrib.h @@ -0,0 +1,164 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#ifdef MODULES_OPTION_CHECK + if( (strcmp(argv[i+1], "NEIGH_FULL") == 0) ) + force_iteration_type = FORCE_ITER_NEIGH_FULL; + if( (strcmp(argv[i+1], "NEIGH_HALF") == 0) ) { + force_iteration_type = FORCE_ITER_NEIGH_HALF; + } +#endif +#ifdef FORCE_MODULES_INSTANTIATION + else if (input->force_type == FORCE_LJ) { + bool half_neigh = input->force_iteration_type == FORCE_ITER_NEIGH_HALF; + switch ( input->neighbor_type ) { + #define FORCETYPE_ALLOCATION_MACRO(NeighType) ForceLJNeigh(input->input_data.words[input->force_line],system,half_neigh) + #include + #undef FORCETYPE_ALLOCATION_MACRO + } + } +#endif + +#if !defined(MODULES_OPTION_CHECK) && \ + !defined(FORCE_MODULES_INSTANTIATION) + +#ifndef FORCE_LJ_NEIGH_H +#define FORCE_LJ_NEIGH_H +#include + +template +class ForceLJNeigh: public Force { +private: + int N_local,ntypes; + t_x_const_rnd x; + t_x_shmem x_shmem; + t_x_shmem_local x_shmem_local; + t_index global_index; + t_f f; + t_f_atomic f_a; + t_id id; + t_type_const_rnd type; + + T_X_FLOAT domain_x, domain_y, domain_z; + int proc_rank; + + Binning::t_bincount bin_count; + Binning::t_binoffsets bin_offsets; + T_INT nbinx,nbiny,nbinz,nhalo; + int step; + bool use_stackparams; + + + typedef Kokkos::View t_fparams; + typedef Kokkos::View> t_fparams_rnd; + t_fparams lj1,lj2,cutsq; + t_fparams_rnd rnd_lj1,rnd_lj2,rnd_cutsq; + + T_F_FLOAT stack_lj1[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; // hardwired space for 12 atom types + T_F_FLOAT stack_lj2[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + T_F_FLOAT stack_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + typedef typename NeighborClass::t_neigh_list t_neigh_list; + t_neigh_list neigh_list; + +public: + typedef T_V_FLOAT value_type; + + template + struct TagFullNeigh {}; + + template + struct TagHalfNeigh {}; + + template + struct TagFullNeighPE {}; + + template + struct TagHalfNeighPE {}; + + struct TagCopyLocalXShmem {}; + + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh_pe; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh_pe; + + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh_stackparams; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh_stackparams; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh_pe_stackparams; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh_pe_stackparams; + + typedef Kokkos::RangePolicy > t_policy_compute_fill_xshmem; + + ForceLJNeigh (char** args, System* system, bool half_neigh_); + + void init_coeff(int nargs, char** args); + + void compute(System* system, Binning* binning, Neighbor* neighbor ); + T_F_FLOAT compute_energy(System* system, Binning* binning, Neighbor* neighbor); + + template + KOKKOS_INLINE_FUNCTION + void operator() (TagFullNeigh, const T_INT& i) const; + + template + KOKKOS_INLINE_FUNCTION + void operator() (TagHalfNeigh, const T_INT& i) const; + + template + KOKKOS_INLINE_FUNCTION + void operator() (TagFullNeighPE, const T_INT& i, T_V_FLOAT& PE) const; + + template + KOKKOS_INLINE_FUNCTION + void operator() (TagHalfNeighPE, const T_INT& i, T_V_FLOAT& PE) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagCopyLocalXShmem, const T_INT& i) const; + + const char* name(); +}; + +#define FORCE_MODULES_EXTERNAL_TEMPLATE +#define FORCETYPE_DECLARE_TEMPLATE_MACRO(NeighType) ForceLJNeigh +#include +#undef FORCETYPE_DECLARE_TEMPLATE_MACRO +#undef FORCE_MODULES_EXTERNAL_TEMPLATE +#endif +#endif diff --git a/src/force_types/force_lj_neigh_distrib_impl.h b/src/force_types/force_lj_neigh_distrib_impl.h new file mode 100644 index 0000000..bb24ce4 --- /dev/null +++ b/src/force_types/force_lj_neigh_distrib_impl.h @@ -0,0 +1,420 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#include +#include + +template +ForceLJNeigh::ForceLJNeigh(char** args, System* system, bool half_neigh_):Force(args,system,half_neigh_) { + ntypes = system->ntypes; + use_stackparams = (ntypes <= MAX_TYPES_STACKPARAMS); + if (!use_stackparams) { + lj1 = t_fparams("ForceLJNeigh::lj1",ntypes,ntypes); + lj2 = t_fparams("ForceLJNeigh::lj2",ntypes,ntypes); + cutsq = t_fparams("ForceLJNeigh::cutsq",ntypes,ntypes); + } + nbinx = nbiny = nbinz = 0; + N_local = 0; + nhalo = 0; + step = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); + +} + +template +void ForceLJNeigh::init_coeff(int nargs, char** args) { + step = 0; + + int one_based_type = 1; + int t1 = atoi(args[1])-one_based_type; + int t2 = atoi(args[2])-one_based_type; + double eps = atof(args[3]); + double sigma = atof(args[4]); + double cut = atof(args[5]); + + if (use_stackparams) { + for (int i = 0; i < ntypes; i++) { + for (int j = 0; j < ntypes; j++) { + stack_lj1[i][j] = 48.0 * eps * pow(sigma,12.0); + stack_lj2[i][j] = 24.0 * eps * pow(sigma,6.0); + stack_cutsq[i][j] = cut*cut; + } + } + } else { + t_fparams::HostMirror h_lj1 = Kokkos::create_mirror_view(lj1); + t_fparams::HostMirror h_lj2 = Kokkos::create_mirror_view(lj2); + t_fparams::HostMirror h_cutsq = Kokkos::create_mirror_view(cutsq); + Kokkos::deep_copy(h_lj1,lj1); + Kokkos::deep_copy(h_lj2,lj2); + Kokkos::deep_copy(h_cutsq,cutsq); + + h_lj1(t1,t2) = 48.0 * eps * pow(sigma,12.0); + h_lj2(t1,t2) = 24.0 * eps * pow(sigma,6.0); + h_lj1(t2,t1) = h_lj1(t1,t2); + h_lj2(t2,t1) = h_lj2(t1,t2); + h_cutsq(t1,t2) = cut*cut; + h_cutsq(t2,t1) = cut*cut; + + Kokkos::deep_copy(lj1,h_lj1); + Kokkos::deep_copy(lj2,h_lj2); + Kokkos::deep_copy(cutsq,h_cutsq); + + rnd_lj1 = lj1; + rnd_lj2 = lj2; + rnd_cutsq = cutsq; + } +}; + +template +void ForceLJNeigh::compute(System* system, Binning* binning, Neighbor* neighbor_ ) { + // Set internal data handles + NeighborClass* neighbor = (NeighborClass*) neighbor_; + neigh_list = neighbor->get_neigh_list(); + N_local = system->N_local; + x = system->x; + x_shmem = system->x_shmem; + x_shmem_local = t_x_shmem_local(x_shmem.data(),x_shmem.extent(1)); + f = system->f; + f_a = system->f; + type = system->type; + id = system->id; + global_index = system->global_index; + + domain_x = system->domain_x; + domain_y = system->domain_y; + domain_z = system->domain_z; + + #ifdef SHMEMTESTS_USE_HALO + #else + Kokkos::Experimental::DefaultRemoteMemorySpace::fence(); + Kokkos::parallel_for("ForceLJNeigh::compute_fill_xshmem", t_policy_compute_fill_xshmem(0,system->N_local), *this); + Kokkos::fence(); + Kokkos::Experimental::DefaultRemoteMemorySpace().fence(); + #endif + + if (use_stackparams) { + if(half_neigh) + Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_half_neigh_stackparams(0, system->N_local), *this); + else + Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_full_neigh_stackparams(0, system->N_local), *this); + } else { + if(half_neigh) + Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_half_neigh(0, system->N_local), *this); + else + Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_full_neigh(0, system->N_local), *this); + } + Kokkos::fence(); + Kokkos::Experimental::DefaultRemoteMemorySpace::fence(); + step++; +} + +template +T_V_FLOAT ForceLJNeigh::compute_energy(System* system, Binning* binning, Neighbor* neighbor_ ) { + // Set internal data handles + NeighborClass* neighbor = (NeighborClass*) neighbor_; + neigh_list = neighbor->get_neigh_list(); + MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); + N_local = system->N_local; + x = system->x; + f = system->f; + f_a = system->f; + type = system->type; + id = system->id; + T_V_FLOAT energy; + + if (use_stackparams) { + if(half_neigh) + Kokkos::parallel_reduce("ForceLJNeigh::compute_energy", t_policy_half_neigh_pe_stackparams(0, system->N_local), *this, energy); + else + Kokkos::parallel_reduce("ForceLJNeigh::compute_energy", t_policy_full_neigh_pe_stackparams(0, system->N_local), *this, energy); + } else { + if(half_neigh) + Kokkos::parallel_reduce("ForceLJNeigh::compute_energy", t_policy_half_neigh_pe(0, system->N_local), *this, energy); + else + Kokkos::parallel_reduce("ForceLJNeigh::compute_energy", t_policy_full_neigh_pe(0, system->N_local), *this, energy); + } + Kokkos::fence(); + Kokkos::Experimental::DefaultRemoteMemorySpace::fence(); + + step++; + return energy; +} + +template +const char* ForceLJNeigh::name() { return half_neigh?"ForceLJNeighHalf":"ForceLJNeighFull"; } + +template +template +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagFullNeigh, const T_INT& i) const { + const T_F_FLOAT x_i = x(i,0); + const T_F_FLOAT y_i = x(i,1); + const T_F_FLOAT z_i = x(i,2); + const int type_i = type(i); + + typename t_neigh_list::t_neighs neighs_i = neigh_list.get_neighs(i); + const int num_neighs = neighs_i.get_num_neighs(); + + T_F_FLOAT fxi = 0.0; + T_F_FLOAT fyi = 0.0; + T_F_FLOAT fzi = 0.0; + + for(int jj = 0; jj < num_neighs; jj++) { + T_INT j = neighs_i(jj); + const T_INDEX jg = global_index(j); + #ifdef SHMEMTESTS_USE_SCALAR + #ifdef SHMEMTESTS_USE_HALO + const T_X_FLOAT xj_shmem = x(j,0); + const T_X_FLOAT yj_shmem = x(j,1); + const T_X_FLOAT zj_shmem = x(j,2); + #endif + #ifdef SHMEMTESTS_USE_HALO_LOCAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x(j,0):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x(j,1):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x(j,2):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_LOCAL_GLOBAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+0]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+1]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+2]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_GLOBAL + const T_X_FLOAT xj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #else + #ifdef SHMEMTESTS_USE_GLOBAL + const double3 posj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK); + const T_X_FLOAT xj_shmem = posj_shmem.x; + const T_X_FLOAT yj_shmem = posj_shmem.y; + const T_X_FLOAT zj_shmem = posj_shmem.z; + #else + #error "Unknown configuration" + #endif + #endif + + #ifdef SHMEMTESTS_USE_HALO + const T_F_FLOAT dx = x_i - x(j,0); + const T_F_FLOAT dy = y_i - x(j,1); + const T_F_FLOAT dz = z_i - x(j,2); + #else + T_F_FLOAT dx = abs(x_i - xj_shmem)>domain_x/2? + (x_i-xj_shmem<0?x_i-xj_shmem+domain_x:x_i-xj_shmem-domain_x) + :x_i-xj_shmem; + T_F_FLOAT dy = abs(y_i - yj_shmem)>domain_y/2? + (y_i-yj_shmem<0?y_i-yj_shmem+domain_y:y_i-yj_shmem-domain_y) + :y_i-yj_shmem; + T_F_FLOAT dz = abs(z_i - zj_shmem)>domain_z/2? + (z_i-zj_shmem<0?z_i-zj_shmem+domain_z:z_i-zj_shmem-domain_z) + :z_i-zj_shmem; + #endif + + const int type_j = type(j); + const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; + + const T_F_FLOAT cutsq_ij = STACKPARAMS?stack_cutsq[type_i][type_j]:rnd_cutsq(type_i,type_j); + + if( rsq < cutsq_ij ) { + const T_F_FLOAT lj1_ij = STACKPARAMS?stack_lj1[type_i][type_j]:rnd_lj1(type_i,type_j); + const T_F_FLOAT lj2_ij = STACKPARAMS?stack_lj2[type_i][type_j]:rnd_lj2(type_i,type_j); + + T_F_FLOAT r2inv = 1.0/rsq; + T_F_FLOAT r6inv = r2inv*r2inv*r2inv; + T_F_FLOAT fpair = (r6inv * (lj1_ij*r6inv - lj2_ij)) * r2inv; + fxi += dx*fpair; + fyi += dy*fpair; + fzi += dz*fpair; + } + } + + f(i,0) += fxi; + f(i,1) += fyi; + f(i,2) += fzi; +} + +template +template +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagHalfNeigh, const T_INT& i) const { + const T_F_FLOAT x_i = x(i,0); + const T_F_FLOAT y_i = x(i,1); + const T_F_FLOAT z_i = x(i,2); + const int type_i = type(i); + + typename t_neigh_list::t_neighs neighs_i = neigh_list.get_neighs(i); + + const int num_neighs = neighs_i.get_num_neighs(); + + T_F_FLOAT fxi = 0.0; + T_F_FLOAT fyi = 0.0; + T_F_FLOAT fzi = 0.0; + for(int jj = 0; jj < num_neighs; jj++) { + T_INT j = neighs_i(jj); + const T_F_FLOAT dx = x_i - x(j,0); + const T_F_FLOAT dy = y_i - x(j,1); + const T_F_FLOAT dz = z_i - x(j,2); + + const int type_j = type(j); + const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; + + const T_F_FLOAT cutsq_ij = STACKPARAMS?stack_cutsq[type_i][type_j]:rnd_cutsq(type_i,type_j); + + if( rsq < cutsq_ij ) { + const T_F_FLOAT lj1_ij = STACKPARAMS?stack_lj1[type_i][type_j]:rnd_lj1(type_i,type_j); + const T_F_FLOAT lj2_ij = STACKPARAMS?stack_lj2[type_i][type_j]:rnd_lj2(type_i,type_j); + + T_F_FLOAT r2inv = 1.0/rsq; + T_F_FLOAT r6inv = r2inv*r2inv*r2inv; + T_F_FLOAT fpair = (r6inv * (lj1_ij*r6inv - lj2_ij)) * r2inv; + fxi += dx*fpair; + fyi += dy*fpair; + fzi += dz*fpair; + f_a(j,0) -= dx*fpair; + f_a(j,1) -= dy*fpair; + f_a(j,2) -= dz*fpair; + } + } + f_a(i,0) += fxi; + f_a(i,1) += fyi; + f_a(i,2) += fzi; + +} + +template +template +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagFullNeighPE, const T_INT& i, T_V_FLOAT& PE) const { + const T_F_FLOAT x_i = x(i,0); + const T_F_FLOAT y_i = x(i,1); + const T_F_FLOAT z_i = x(i,2); + const int type_i = type(i); + const bool shift_flag = true; + + typename t_neigh_list::t_neighs neighs_i = neigh_list.get_neighs(i); + + const int num_neighs = neighs_i.get_num_neighs(); + + for(int jj = 0; jj < num_neighs; jj++) { + T_INT j = neighs_i(jj); + const T_F_FLOAT dx = x_i - x(j,0); + const T_F_FLOAT dy = y_i - x(j,1); + const T_F_FLOAT dz = z_i - x(j,2); + + const int type_j = type(j); + const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; + + const T_F_FLOAT cutsq_ij = STACKPARAMS?stack_cutsq[type_i][type_j]:rnd_cutsq(type_i,type_j); + + if( rsq < cutsq_ij ) { + const T_F_FLOAT lj1_ij = STACKPARAMS?stack_lj1[type_i][type_j]:rnd_lj1(type_i,type_j); + const T_F_FLOAT lj2_ij = STACKPARAMS?stack_lj2[type_i][type_j]:rnd_lj2(type_i,type_j); + + T_F_FLOAT r2inv = 1.0/rsq; + T_F_FLOAT r6inv = r2inv*r2inv*r2inv; + PE += 0.5*r6inv * (0.5*lj1_ij*r6inv - lj2_ij) / 6.0; // optimize later + + if (shift_flag) { + T_F_FLOAT r2invc = 1.0/cutsq_ij; + T_F_FLOAT r6invc = r2invc*r2invc*r2invc; + PE -= 0.5*r6invc * (0.5*lj1_ij*r6invc - lj2_ij) / 6.0; // optimize later + } + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagHalfNeighPE, const T_INT& i, T_V_FLOAT& PE) const { + const T_F_FLOAT x_i = x(i,0); + const T_F_FLOAT y_i = x(i,1); + const T_F_FLOAT z_i = x(i,2); + const int type_i = type(i); + const bool shift_flag = true; + + typename t_neigh_list::t_neighs neighs_i = neigh_list.get_neighs(i); + + const int num_neighs = neighs_i.get_num_neighs(); + + for(int jj = 0; jj < num_neighs; jj++) { + T_INT j = neighs_i(jj); + const T_F_FLOAT dx = x_i - x(j,0); + const T_F_FLOAT dy = y_i - x(j,1); + const T_F_FLOAT dz = z_i - x(j,2); + + const int type_j = type(j); + const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; + + const T_F_FLOAT cutsq_ij = STACKPARAMS?stack_cutsq[type_i][type_j]:rnd_cutsq(type_i,type_j); + + if( rsq < cutsq_ij ) { + const T_F_FLOAT lj1_ij = STACKPARAMS?stack_lj1[type_i][type_j]:rnd_lj1(type_i,type_j); + const T_F_FLOAT lj2_ij = STACKPARAMS?stack_lj2[type_i][type_j]:rnd_lj2(type_i,type_j); + + T_F_FLOAT r2inv = 1.0/rsq; + T_F_FLOAT r6inv = r2inv*r2inv*r2inv; + T_F_FLOAT fac; + if(j +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagCopyLocalXShmem, const T_INT& i) const { + #ifdef SHMEMTESTS_USE_SCALAR + x_shmem_local(i,0) = x(i,0); + x_shmem_local(i,1) = x(i,1); + x_shmem_local(i,2) = x(i,2); + #else + double3 pos = {x(i,0),x(i,1),x(i,2)}; + x_shmem_local(i) = pos; + #endif +} + diff --git a/src/force_types/force_lj_neigh_impl.h b/src/force_types/force_lj_neigh_impl.h index e6789c2..19e3fbf 100644 --- a/src/force_types/force_lj_neigh_impl.h +++ b/src/force_types/force_lj_neigh_impl.h @@ -79,7 +79,6 @@ void ForceLJNeigh::init_coeff(int nargs, char** args) { Kokkos::deep_copy(h_lj1,lj1); Kokkos::deep_copy(h_lj2,lj2); Kokkos::deep_copy(h_cutsq,cutsq); - h_lj1(t1,t2) = 48.0 * eps * pow(sigma,12.0); h_lj2(t1,t2) = 24.0 * eps * pow(sigma,6.0); h_lj1(t2,t1) = h_lj1(t1,t2); @@ -102,7 +101,7 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig // Set internal data handles NeighborClass* neighbor = (NeighborClass*) neighbor_; neigh_list = neighbor->get_neigh_list(); - + N_local = system->N_local; x = system->x; f = system->f; diff --git a/src/force_types/force_snap_neigh.cpp b/src/force_types/force_snap_neigh.cpp index 1f33395..60b33c8 100644 --- a/src/force_types/force_snap_neigh.cpp +++ b/src/force_types/force_snap_neigh.cpp @@ -36,8 +36,11 @@ // Questions? Contact Christian R. Trott (crtrott@sandia.gov) //************************************************************************ +#include +#ifndef KOKKOS_ENABLE_OPENMPTARGET #include #define FORCETYPE_DECLARE_TEMPLATE_MACRO(NeighType) ForceSNAP #define FORCE_MODULES_TEMPLATE #include #undef FORCE_MODULES_TEMPLATE +#endif diff --git a/src/force_types/force_snap_neigh.h b/src/force_types/force_snap_neigh.h index d6f4e1e..d1edb4f 100644 --- a/src/force_types/force_snap_neigh.h +++ b/src/force_types/force_snap_neigh.h @@ -170,14 +170,28 @@ class ForceSNAP : public Force { t_x x; + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + t_x_shmem x_shmem; + t_x_shmem_local x_shmem_local; + #endif t_f_atomic f; t_type type; + t_index global_index; + T_X_FLOAT domain_x, domain_y, domain_z; + int proc_rank; public: + struct TagForceCompute {}; + KOKKOS_INLINE_FUNCTION + void operator() (TagForceCompute, const Kokkos::TeamPolicy<>::member_type& team) const; KOKKOS_INLINE_FUNCTION void operator() (const Kokkos::TeamPolicy<>::member_type& team) const; + + struct TagCopyLocalXShmem {}; + KOKKOS_INLINE_FUNCTION + void operator() (TagCopyLocalXShmem, const T_INT& i) const; }; #define FORCE_MODULES_EXTERNAL_TEMPLATE diff --git a/src/force_types/force_snap_neigh_impl.h b/src/force_types/force_snap_neigh_impl.h index 8d9500d..3f8abe5 100644 --- a/src/force_types/force_snap_neigh_impl.h +++ b/src/force_types/force_snap_neigh_impl.h @@ -84,6 +84,12 @@ ForceSNAP::ForceSNAP(char** args, System* system_, bool half_neig #if defined(KOKKOS_ENABLE_CUDA) std::is_same::value ? Kokkos::DefaultExecutionSpace::concurrency()/vector_length : +#elif defined(KOKKOS_ENABLE_HIP) + std::is_same::value ? + Kokkos::DefaultExecutionSpace::concurrency()/vector_length : +#elif defined(KOKKOS_ENABLE_SYCL) + std::is_same::value ? + Kokkos::DefaultExecutionSpace::concurrency()/vector_length : #else Kokkos::DefaultExecutionSpace::concurrency(); #endif @@ -137,7 +143,7 @@ template struct FindMaxNumNeighs { NeighList neigh_list; - FindMaxNumNeighs(NeighList& nl): neigh_list(nl) {} + FindMaxNumNeighs(NeighList& nl): neigh_list(nl) {} KOKKOS_INLINE_FUNCTION void operator() (const int& i, int& max_neighs) const { @@ -158,6 +164,16 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo Kokkos::abort("ForceSNAP requires 'newton on'"); x = system->x; f = system->f; + x_shmem = system->x_shmem; +#ifdef KOKKOS_ENABLE_QUOSPACE + x_shmem_local = t_x_shmem_local(&x_shmem.access(proc_rank,0,0),x_shmem.extent(1)); +#else + x_shmem_local = t_x_shmem_local(x_shmem.data(),x_shmem.extent(1)); +#endif + domain_x = system->domain_x; + domain_y = system->domain_y; + domain_z = system->domain_z; + global_index = system->global_index; type = system->type; int nlocal = system->N_local; @@ -172,7 +188,12 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo const int num_neighs = neighs_i.get_num_neighs(); if(max_neighs(neigh_list), Kokkos::Experimental::Max(max_neighs)); + + Kokkos::Experimental::DefaultRemoteMemorySpace::fence();; + Kokkos::parallel_for("ForceSNAPNeigh::compute_fill_xshmem", Kokkos::RangePolicy(0,system->N_local), *this); + Kokkos::Experimental::DefaultRemoteMemorySpace::fence();; + + Kokkos::parallel_reduce("ForceSNAP::find_max_neighs",nlocal, FindMaxNumNeighs(neigh_list), Kokkos::Max(max_neighs)); sna.nmax = max_neighs; @@ -180,21 +201,23 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo T_INT thread_scratch_size = sna.size_thread_scratch_arrays(); //printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024); - int team_size_max = Kokkos::TeamPolicy<>::team_size_max(*this); int vector_length = 8; -#ifdef KOKKOS_ENABLE_CUDA + int team_size_max = Kokkos::TeamPolicy<>(nlocal,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag()); +#ifdef EXAMINIMD_HAS_GPU int team_size = 20;//max_neighs; if(team_size*vector_length > team_size_max) team_size = team_size_max/vector_length; #else int team_size = 1; #endif - Kokkos::TeamPolicy<> policy(nlocal,team_size,vector_length); + Kokkos::TeamPolicy policy(nlocal,team_size,vector_length); Kokkos::parallel_for("ForceSNAP::compute",policy .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size)) .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) ,*this); + Kokkos::fence(); + Kokkos::Experimental::DefaultRemoteMemorySpace::fence();; //static int step =0; //step++; //if(step%10==0) @@ -272,8 +295,8 @@ void ForceSNAP::init_coeff(int narg, char **arg) // ncoeffall should be (ncoeff+2)*(ncoeff+1)/2 // so, ncoeff = floor(sqrt(2*ncoeffall))-1 - - ncoeff = sqrt(2*ncoeffall)-1; + + ncoeff = sqrt(2*ncoeffall)-1; ncoeffq = (ncoeff*(ncoeff+1))/2; int ntmp = 1+ncoeff+ncoeffq; if (ntmp != ncoeffall) { @@ -384,7 +407,7 @@ void ForceSNAP::read_files(char *coefffilename, char *paramfilena int nelemfile = atoi(words[0]); ncoeffall = atoi(words[1]); - + // Set up element lists radelem = Kokkos::View("pair:radelem",nelements); @@ -504,7 +527,7 @@ void ForceSNAP::read_files(char *coefffilename, char *paramfilena switchflag = 1; bzeroflag = 1; quadraticflag = 0; - + // open SNAP parameter file on proc 0 FILE *fpparam; @@ -537,7 +560,7 @@ void ForceSNAP::read_files(char *coefffilename, char *paramfilena //nwords = atom->count_words(line); if(line[0]!=10) nwords = 2; else nwords = 0; if (nwords == 0) continue; - + if (nwords != 2) Kokkos::abort("Incorrect format in SNAP parameter file"); @@ -548,7 +571,7 @@ void ForceSNAP::read_files(char *coefffilename, char *paramfilena char* keyval = strtok(NULL,"' \t\n\r\f"); //if (comm->me == 0) { - //if (screen) + //if (screen) //if (logfile) fprintf(logfile,"SNAP keyword %s %s \n",keywd,keyval); //} @@ -583,6 +606,11 @@ void ForceSNAP::read_files(char *coefffilename, char *paramfilena template KOKKOS_INLINE_FUNCTION void ForceSNAP::operator() (const Kokkos::TeamPolicy<>::member_type& team) const { +} + +template +KOKKOS_INLINE_FUNCTION +void ForceSNAP::operator() (TagForceCompute, const Kokkos::TeamPolicy<>::member_type& team) const { const int i = team.league_rank(); SNA my_sna(sna,team); const double x_i = x(i,0); @@ -607,10 +635,59 @@ void ForceSNAP::operator() (const Kokkos::TeamPolicy<>::member_ty Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,num_neighs), [&] (const int jj, int& count) { Kokkos::single(Kokkos::PerThread(team), [&] (){ + + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + T_INT j = neighs_i(jj); - const T_F_FLOAT dx = x(j,0) - x_i; - const T_F_FLOAT dy = x(j,1) - y_i; - const T_F_FLOAT dz = x(j,2) - z_i; + const T_INDEX jg = global_index(j); + #ifdef SHMEMTESTS_USE_SCALAR + #ifdef SHMEMTESTS_USE_HALO + const T_X_FLOAT xj_shmem = x(j,0);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = x(j,1);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = x(j,2);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_HALO_LOCAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x(j,0):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x(j,1):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x(j,2):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_LOCAL_GLOBAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+0]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+1]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+2]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_GLOBAL + const T_X_FLOAT xj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #else + #ifdef SHMEMTESTS_USE_GLOBAL + const double3 posj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK); + const T_X_FLOAT xj_shmem = posj_shmem.x; + const T_X_FLOAT yj_shmem = posj_shmem.y; + const T_X_FLOAT zj_shmem = posj_shmem.z; + #endif + #endif + + T_F_FLOAT dx = -(abs(x_i - xj_shmem)>domain_x/2? + (x_i-xj_shmem<0?x_i-xj_shmem+domain_x:x_i-xj_shmem-domain_x) + :x_i-xj_shmem); + T_F_FLOAT dy = -(abs(y_i - yj_shmem)>domain_y/2? + (y_i-yj_shmem<0?y_i-yj_shmem+domain_y:y_i-yj_shmem-domain_y) + :y_i-yj_shmem); + T_F_FLOAT dz = -(abs(z_i - zj_shmem)>domain_z/2? + (z_i-zj_shmem<0?z_i-zj_shmem+domain_z:z_i-zj_shmem-domain_z) + :z_i-zj_shmem); + + #else //EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + + const T_F_FLOAT dx = xj_shmem - x_i; + const T_F_FLOAT dy = yj_shmem - y_i; + const T_F_FLOAT dz = zj_shmem - z_i; + + #endif + const int type_j = type(j); const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; @@ -626,11 +703,56 @@ void ForceSNAP::operator() (const Kokkos::TeamPolicy<>::member_ty if(team.team_rank() == 0) Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs), [&] (const int jj, int& offset, bool final){ - //for (int jj = 0; jj < num_neighs; jj++) { + //for (int jj = 0; jj < num_neighs; jj++) { T_INT j = neighs_i(jj); - const T_F_FLOAT dx = x(j,0) - x_i; - const T_F_FLOAT dy = x(j,1) - y_i; - const T_F_FLOAT dz = x(j,2) - z_i; + const T_INDEX jg = global_index(j); + + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + + #ifdef SHMEMTESTS_USE_SCALAR + #ifdef SHMEMTESTS_USE_HALO + const T_X_FLOAT xj_shmem = x(j,0);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = x(j,1);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = x(j,2);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_HALO_LOCAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x(j,0):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x(j,1):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x(j,2):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_LOCAL_GLOBAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+0]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+1]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+2]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_GLOBAL + const T_X_FLOAT xj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #else + #ifdef SHMEMTESTS_USE_GLOBAL + const double3 posj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK); + const T_X_FLOAT xj_shmem = posj_shmem.x; + const T_X_FLOAT yj_shmem = posj_shmem.y; + const T_X_FLOAT zj_shmem = posj_shmem.z; + #endif + #endif + T_F_FLOAT dx = -(abs(x_i - xj_shmem)>domain_x/2? + (x_i-xj_shmem<0?x_i-xj_shmem+domain_x:x_i-xj_shmem-domain_x) + :x_i-xj_shmem); + T_F_FLOAT dy = -(abs(y_i - yj_shmem)>domain_y/2? + (y_i-yj_shmem<0?y_i-yj_shmem+domain_y:y_i-yj_shmem-domain_y) + :y_i-yj_shmem); + T_F_FLOAT dz = -(abs(z_i - zj_shmem)>domain_z/2? + (z_i-zj_shmem<0?z_i-zj_shmem+domain_z:z_i-zj_shmem-domain_z) + :z_i-zj_shmem); + + #else //EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + const T_F_FLOAT dx = xj_shmem - x_i; + const T_F_FLOAT dy = yj_shmem - y_i; + const T_F_FLOAT dz = zj_shmem - z_i; + #endif const int type_j = type(j); const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; @@ -717,3 +839,17 @@ void ForceSNAP::operator() (const Kokkos::TeamPolicy<>::member_ty }); //t5 += timer.seconds(); timer.reset(); } + +template +KOKKOS_INLINE_FUNCTION +void ForceSNAP::operator() (TagCopyLocalXShmem, const T_INT& i) const { + #ifdef SHMEMTESTS_USE_SCALAR + x_shmem_local(i,0) = x(i,0); + x_shmem_local(i,1) = x(i,1); + x_shmem_local(i,2) = x(i,2); + #else + double3 pos = {x(i,0),x(i,1),x(i,2)}; + x_shmem_local(i) = pos; + #endif +} + diff --git a/src/input.cpp b/src/input.cpp index c174c4f..799e04a 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -90,7 +90,7 @@ void ItemizedFile::print_line(int i) { int ItemizedFile::words_in_line(int i){ int count = 0; - for(int j=0; j("Input::force_coeff_lines",0); input_file_type = -1; - -#ifdef EXAMINIMD_ENABLE_MPI +#if defined(EXAMINIMD_ENABLE_MPI) && defined(EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) comm_type = COMM_MPI; #else comm_type = COMM_SERIAL; @@ -203,7 +202,7 @@ void Input::read_command_line_args(int argc, char* argv[]) { dumpbinaryflag = true; i += 2; } - + // Correctness Check else if( (strcmp(argv[i], "--correctness") == 0) ) { correctness_rate = atoi(argv[i+1]); @@ -264,7 +263,7 @@ void Input::read_lammps_file(const char* filename) { void Input::check_lammps_command(int line) { bool known = false; - + if(input_data.words[line][0][0]==0) { known = true; } if(strstr(input_data.words[line][0],"#")) { known = true; } if(strcmp(input_data.words[line][0],"variable")==0) { @@ -374,8 +373,7 @@ void Input::check_lammps_command(int line) { force_type = FORCE_LJ; force_cutoff = atof(input_data.words[line][2]); force_line = line; - } - if(strcmp(input_data.words[line][1],"snap")==0) { + }else if(strcmp(input_data.words[line][1],"snap")==0) { known = true; force_type = FORCE_SNAP; force_cutoff = 4.73442;// atof(input_data.words[line][2]); @@ -386,7 +384,7 @@ void Input::check_lammps_command(int line) { } if(strcmp(input_data.words[line][0],"pair_coeff")==0) { known = true; - int n_coeff_lines = force_coeff_lines.dimension_0(); + int n_coeff_lines = force_coeff_lines.extent(0); Kokkos::resize(force_coeff_lines,n_coeff_lines+1); force_coeff_lines( n_coeff_lines) = line; n_coeff_lines++; @@ -426,7 +424,7 @@ void Input::check_lammps_command(int line) { } if(strcmp(input_data.words[line][0],"run")==0) { known = true; - nsteps = atoi(input_data.words[line][1]); + nsteps = atoi(input_data.words[line][1]); } if(strcmp(input_data.words[line][0],"thermo")==0) { known = true; @@ -472,9 +470,9 @@ void Input::create_lattice(Comm* comm) { // Create Simple Cubic Lattice if(lattice_style == LATTICE_SC) { - system->domain_x = lattice_constant * lattice_nx; - system->domain_y = lattice_constant * lattice_ny; - system->domain_z = lattice_constant * lattice_nz; + system->domain_x = lattice_constant * lattice_nx; + system->domain_y = lattice_constant * lattice_ny; + system->domain_z = lattice_constant * lattice_nz; comm->create_domain_decomposition(); s = *system; @@ -506,9 +504,12 @@ void Input::create_lattice(Comm* comm) { } } } + system->N_local = n; system->N = n; - system->grow(n); + int global_n_max = n; + comm->reduce_max_int(&global_n_max,1); + system->grow(global_n_max); s = *system; h_x = Kokkos::create_mirror_view(s.x); h_v = Kokkos::create_mirror_view(s.v); @@ -521,38 +522,8 @@ void Input::create_lattice(Comm* comm) { // zero out momentum of the whole system afterwards, to eliminate // drift (bad for energy statistics) - for(T_INT iz=iz_start; iz<=iz_end; iz++) { - T_FLOAT ztmp = lattice_constant * (iz+lattice_offset_z); - for(T_INT iy=iy_start; iy<=iy_end; iy++) { - T_FLOAT ytmp = lattice_constant * (iy+lattice_offset_y); - for(T_INT ix=ix_start; ix<=ix_end; ix++) { - T_FLOAT xtmp = lattice_constant * (ix+lattice_offset_x); - if((xtmp >= s.sub_domain_lo_x) && - (ytmp >= s.sub_domain_lo_y) && - (ztmp >= s.sub_domain_lo_z) && - (xtmp < s.sub_domain_hi_x) && - (ytmp < s.sub_domain_hi_y) && - (ztmp < s.sub_domain_hi_z) ) { - n++; - } - } - } - } - system->grow(n); - System s = *system; - h_x = Kokkos::create_mirror_view(s.x); - h_v = Kokkos::create_mirror_view(s.v); - h_q = Kokkos::create_mirror_view(s.q); - h_type = Kokkos::create_mirror_view(s.type); - h_id = Kokkos::create_mirror_view(s.id); - n = 0; - // Initialize system using the equivalent of the LAMMPS - // velocity geom option, i.e. uniform random kinetic energies. - // zero out momentum of the whole system afterwards, to eliminate - // drift (bad for energy statistics) - for(T_INT iz=iz_start; iz<=iz_end; iz++) { T_FLOAT ztmp = lattice_constant * (iz+lattice_offset_z); for(T_INT iy=iy_start; iy<=iy_end; iy++) { @@ -640,7 +611,9 @@ void Input::create_lattice(Comm* comm) { system->N_local = n; system->N = n; - system->grow(n); + int global_n_max = n; + comm->reduce_max_int(&global_n_max,1); + system->grow(global_n_max); s = *system; h_x = Kokkos::create_mirror_view(s.x); h_v = Kokkos::create_mirror_view(s.v); @@ -653,40 +626,8 @@ void Input::create_lattice(Comm* comm) { // zero out momentum of the whole system afterwards, to eliminate // drift (bad for energy statistics) - for(T_INT iz=iz_start; iz<=iz_end; iz++) { - for(T_INT iy=iy_start; iy<=iy_end; iy++) { - for(T_INT ix=ix_start; ix<=ix_end; ix++) { - for(int k = 0; k<4; k++) { - T_FLOAT xtmp = lattice_constant * (1.0*ix+basis[k][0]); - T_FLOAT ytmp = lattice_constant * (1.0*iy+basis[k][1]); - T_FLOAT ztmp = lattice_constant * (1.0*iz+basis[k][2]); - if((xtmp >= s.sub_domain_lo_x) && - (ytmp >= s.sub_domain_lo_y) && - (ztmp >= s.sub_domain_lo_z) && - (xtmp < s.sub_domain_hi_x) && - (ytmp < s.sub_domain_hi_y) && - (ztmp < s.sub_domain_hi_z) ) { - n++; - } - } - } - } - } - system->grow(n); - System s = *system; - h_x = Kokkos::create_mirror_view(s.x); - h_v = Kokkos::create_mirror_view(s.v); - h_q = Kokkos::create_mirror_view(s.q); - h_type = Kokkos::create_mirror_view(s.type); - h_id = Kokkos::create_mirror_view(s.id); - n = 0; - // Initialize system using the equivalent of the LAMMPS - // velocity geom option, i.e. uniform random kinetic energies. - // zero out momentum of the whole system afterwards, to eliminate - // drift (bad for energy statistics) - for(T_INT iz=iz_start; iz<=iz_end; iz++) { for(T_INT iy=iy_start; iy<=iy_end; iy++) { for(T_INT ix=ix_start; ix<=ix_end; ix++) { @@ -723,12 +664,8 @@ void Input::create_lattice(Comm* comm) { if(system->do_print) printf("Atoms: %i %i\n",system->N,system->N_local); } - // Initialize velocity using the equivalent of the LAMMPS - // velocity geom option, i.e. uniform random kinetic energies. - // zero out momentum of the whole system afterwards, to eliminate - // drift (bad for energy statistics) - - { // Scope s + + { //Scope System s = *system; T_FLOAT total_mass = 0.0; T_FLOAT total_momentum_x = 0.0; diff --git a/src/main.cpp b/src/main.cpp index 98c82d8..b879154 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -45,29 +45,26 @@ #include "mpi.h" #endif -int main(int argc, char* argv[]) { +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES +#include +#endif - #ifdef EXAMINIMD_ENABLE_MPI - MPI_Init(&argc,&argv); +int main(int argc, char* argv[]) { + #if defined (EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + comm_lib_init(argc,argv); #endif Kokkos::initialize(argc,argv); ExaMiniMD examinimd; examinimd.init(argc,argv); - examinimd.run(examinimd.input->nsteps); - - // examinimd.check_correctness(); - examinimd.print_performance(); - examinimd.shutdown(); Kokkos::finalize(); - #ifdef EXAMINIMD_ENABLE_MPI - MPI_Finalize(); + #if defined (EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + comm_lib_finalize(); #endif } - diff --git a/src/modules_comm.h b/src/modules_comm.h index a111d15..a62de7a 100644 --- a/src/modules_comm.h +++ b/src/modules_comm.h @@ -37,7 +37,10 @@ //************************************************************************ // Include Module header files for comm -#ifdef EXAMINIMD_ENABLE_MPI + +#if defined(EXAMINIMD_ENABLE_MPI) || defined(EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) +#include #include -#endif +#else #include +#endif diff --git a/src/modules_force.h b/src/modules_force.h index 7ad4702..e780654 100644 --- a/src/modules_force.h +++ b/src/modules_force.h @@ -37,7 +37,16 @@ //************************************************************************ // Include Module header files for force -#include +#include +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES +#include +#else #include +#endif #include -#include +#include + +// SNAP is outdated and likely subject to removal +//#ifndef KOKKOS_ENABLE_OPENMPTARGET +//#include +//#endif diff --git a/src/neighbor_types/CMakeLists.txt b/src/neighbor_types/CMakeLists.txt new file mode 100644 index 0000000..d7870d5 --- /dev/null +++ b/src/neighbor_types/CMakeLists.txt @@ -0,0 +1,3 @@ +FILE(GLOB SRCS *.cpp) +target_sources(ExaMiniMD PRIVATE ${SRCS}) + diff --git a/src/neighbor_types/neighbor_2d.cpp b/src/neighbor_types/neighbor_2d.cpp index 3cc5c8f..8a54522 100644 --- a/src/neighbor_types/neighbor_2d.cpp +++ b/src/neighbor_types/neighbor_2d.cpp @@ -38,8 +38,8 @@ #include -#ifdef KOKKOS_ENABLE_CUDA -template struct Neighbor2D; +#ifdef EXAMINIMD_HAS_GPU +template struct Neighbor2D; #endif template struct Neighbor2D; diff --git a/src/neighbor_types/neighbor_2d.h b/src/neighbor_types/neighbor_2d.h index bca409b..89ba193 100644 --- a/src/neighbor_types/neighbor_2d.h +++ b/src/neighbor_types/neighbor_2d.h @@ -152,7 +152,6 @@ class Neighbor2D: public Neighbor { t_neigh_list neigh_list; - Neighbor2D():neigh_cut(0.0) { neigh_type = NEIGH_2D; @@ -244,7 +243,7 @@ class Neighbor2D: public Neighbor { (by_j>=nhalo) && (by_j=nhalo) && (bz_jpermute_vector; do { - // Resize NeighborList if( neigh_list.neighs.extent(0) < N_local + 1 || neigh_list.neighs.extent(1) < neigh_list.maxneighs ) neigh_list.neighs = Kokkos::View ("Neighbor2D::neighs", N_local + 1, neigh_list.maxneighs); - // Fill the NeighborList Kokkos::deep_copy(neigh_list.num_neighs,0); Kokkos::deep_copy(resize,0); @@ -317,7 +314,6 @@ class Neighbor2D: public Neighbor { else Kokkos::parallel_for("Neighbor2D::fill_neigh_list_full",t_policy_fnlf(nbins,Kokkos::AUTO,8),*this); - Kokkos::fence(); Kokkos::deep_copy(h_resize,resize); diff --git a/src/neighbor_types/neighbor_csr.cpp b/src/neighbor_types/neighbor_csr.cpp index 76cffd2..cc5f09a 100644 --- a/src/neighbor_types/neighbor_csr.cpp +++ b/src/neighbor_types/neighbor_csr.cpp @@ -38,8 +38,8 @@ #include -#ifdef KOKKOS_ENABLE_CUDA -template struct NeighborCSR; +#ifdef EXAMINIMD_HAS_GPU +template struct NeighborCSR; #endif template struct NeighborCSR; diff --git a/src/neighbor_types/neighbor_csr.h b/src/neighbor_types/neighbor_csr.h index 859ee24..68593c1 100644 --- a/src/neighbor_types/neighbor_csr.h +++ b/src/neighbor_types/neighbor_csr.h @@ -79,7 +79,7 @@ #include template -struct NeighListCSR : public Kokkos::StaticCrsGraph { +struct NeighListCSR : public Kokkos::StaticCrsGraph { struct NeighViewCSR { private: const T_INT* const ptr; @@ -100,14 +100,14 @@ struct NeighListCSR : public Kokkos::StaticCrsGraph() {} + Kokkos::StaticCrsGraph() {} NeighListCSR (const NeighListCSR& rhs) : - Kokkos::StaticCrsGraph(rhs) { + Kokkos::StaticCrsGraph(rhs) { } template NeighListCSR (const EntriesType& entries_,const RowMapType& row_map_) : - Kokkos::StaticCrsGraph( entries_, row_map_) {} + Kokkos::StaticCrsGraph( entries_, row_map_) {} KOKKOS_INLINE_FUNCTION @@ -145,8 +145,6 @@ class NeighborCSR: public Neighbor { typename Binning::t_bincount bin_count; typename Binning::t_permute_vector permute_vector; - - public: struct TagCreateOffsets {}; struct TagCountNeighsFull {}; @@ -274,10 +272,10 @@ class NeighborCSR: public Neighbor { for(int by_j = by-1; by_jby) || ((by_j==by) && (bz_j>bz) )))) && - (bx_j>=nhalo) && (bx_jby) || ((by_j==by) && (bz_j>bz) )))) && + (bx_j>=nhalo) && (bx_j=nhalo) && (by_j=nhalo) && (bz_j=nhalo) && (bz_jby) || ((by_j==by) && (bz_j>bz) )))) && - (bx_j>nhalo) && (bx_jby) || ((by_j==by) && (bz_j>bz) )))) && + (bx_j>nhalo) && (bx_jnhalo) && (by_jnhalo) && (bz_jnhalo) && (bz_j( neighs, Kokkos::pair(0,total_num_neighs)), + Kokkos::View( neighs, Kokkos::pair(0,total_num_neighs)), Kokkos::View( neigh_offsets, Kokkos::pair(0,N_local+1))); } diff --git a/src/neighbor_types/neighbor_csr_map_constr.cpp b/src/neighbor_types/neighbor_csr_map_constr.cpp index 1381431..adf9a5e 100644 --- a/src/neighbor_types/neighbor_csr_map_constr.cpp +++ b/src/neighbor_types/neighbor_csr_map_constr.cpp @@ -38,8 +38,8 @@ #include -#ifdef KOKKOS_ENABLE_CUDA -template struct NeighborCSRMapConstr; +#ifdef EXAMINIMD_HAS_GPU +template struct NeighborCSRMapConstr; #endif template struct NeighborCSRMapConstr; diff --git a/src/system.cpp b/src/system.cpp index b478aa9..c0d7590 100644 --- a/src/system.cpp +++ b/src/system.cpp @@ -37,9 +37,11 @@ //************************************************************************ #include + #ifdef EXAMINIMD_ENABLE_MPI #include #endif + System::System() { N = 0; N_max = 0; @@ -50,6 +52,9 @@ System::System() { v = t_v(); f = t_f(); id = t_id(); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + global_index = t_index(); + #endif type = t_type(); q = t_q(); mass = t_mass(); @@ -58,7 +63,7 @@ System::System() { sub_domain_hi_x = sub_domain_hi_y = sub_domain_hi_z = 0.0; sub_domain_lo_x = sub_domain_lo_y = sub_domain_lo_z = 0.0; mvv2e = boltz = dt = 0.0; -#ifdef EXAMINIMD_ENABLE_MPI + #if defined (EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) int proc_rank; MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); do_print = proc_rank == 0; @@ -73,6 +78,9 @@ void System::init() { v = t_v("System::v",N_max); f = t_f("System::f",N_max); id = t_id("System::id",N_max); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + global_index = t_index("System::global_index",N_max); + #endif type = t_type("System::type",N_max); q = t_q("System::q",N_max); mass = t_mass("System::mass",ntypes); @@ -87,6 +95,9 @@ void System::destroy() { v = t_v(); f = t_f(); id = t_id(); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + global_index = t_index(); + #endif type = t_type(); q = t_q(); mass = t_mass(); @@ -99,15 +110,35 @@ void System::grow(T_INT N_new) { Kokkos::resize(x,N_max); // Positions Kokkos::resize(v,N_max); // Velocities Kokkos::resize(f,N_max); // Forces - Kokkos::resize(id,N_max); // Id - + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + Kokkos::resize(global_index,N_max); // Id + #endif Kokkos::resize(type,N_max); // Particle Type - Kokkos::resize(q,N_max); // Charge + + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + int num_ranks; + MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); + x_shmem = t_x_shmem("X_shmem", num_ranks, N_max); // Positions (distrib) + #endif } } +void System::print_particles_from_device_data() +{ + printf("Print all particles (GPU): \n"); + printf(" Owned: %d\n",N_local); + Kokkos::parallel_for("print_particles_2", N_local, KOKKOS_LAMBDA(int i){ + printf(" %d %lf %lf %lf | %lf %lf %lf | %lf %lf %lf | %d %e\n",i, + double(x(i,0)),double(x(i,1)),double(x(i,2)), + double(v(i,0)),double(v(i,1)),double(v(i,2)), + double(f(i,0)),double(f(i,1)),double(f(i,2)), + type(i),q(i)); + }); + Kokkos::fence(); +} + void System::print_particles() { printf("Print all particles: \n"); printf(" Owned: %d\n",N_local); diff --git a/src/system.h b/src/system.h index 017d42d..0dae9d4 100644 --- a/src/system.h +++ b/src/system.h @@ -45,12 +45,14 @@ struct Particle { Particle() { x=y=z=vx=vy=vz=mass=q=0.0; id = type = 0; + global_index = 0; } T_X_FLOAT x,y,z; T_V_FLOAT vx,vy,vz,mass; T_FLOAT q; T_INT id; + T_INDEX global_index; int type; }; @@ -65,12 +67,18 @@ class System { // Per Particle Property t_x x; // Positions + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + t_x_shmem x_shmem; // Positions + #endif t_v v; // Velocities t_f f; // Forces - t_type type; // Particle Type - t_id id; // Particle ID - + t_type type; // Particle Type + t_id id; // Particle ID + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + t_index global_index; // Index for distibuted view indexing + #endif + t_q q; // Charge // Per Type Property @@ -107,6 +115,9 @@ class System { p.q = q(i); p.id = id(i); p.type = type(i); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + p.global_index = global_index(i); + #endif return p; } @@ -117,6 +128,9 @@ class System { q(i) = p.q; id(i) = p.id; type(i) = p.type; + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + global_index(i) = p.global_index; + #endif } KOKKOS_INLINE_FUNCTION @@ -130,6 +144,9 @@ class System { type(dest) = type(src); id(dest) = id(src); q(dest) = q(src); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + global_index(dest) = global_index(src); + #endif } KOKKOS_INLINE_FUNCTION @@ -140,5 +157,6 @@ class System { } void print_particles(); + void print_particles_from_device_data(); }; #endif diff --git a/src/types.h b/src/types.h index 1583e18..e476863 100644 --- a/src/types.h +++ b/src/types.h @@ -38,8 +38,13 @@ #ifndef TYPES_H #define TYPES_H + #include +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES +#include +#endif + // Module Types etc // Units to be used enum {UNITS_REAL,UNITS_LJ,UNITS_METAL}; @@ -60,6 +65,10 @@ enum {NEIGH_NONE, NEIGH_CSR, NEIGH_CSR_MAPCONSTR, NEIGH_2D}; // Input File Type enum {INPUT_LAMMPS}; +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES +enum INDEX_TYPE: int64_t { N_MAX_MASK = 1024*1024*1024 }; +#endif + // Macros to work around the fact that std::max/min is not available on GPUs #define MAX(a,b) (a>b?a:b) #define MIN(a,b) (a t_x; // Positions typedef Kokkos::View t_x_const; // Positions typedef Kokkos::View> t_x_const_rnd; // Positions +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES +#ifdef SHMEMTESTS_USE_SCALAR +typedef Kokkos::View t_x_shmem; // PGAS Positions +typedef Kokkos::View t_x_shmem_local; // Local PGAS Positions +#else +typedef Kokkos::View t_x_shmem; // PGAS Positions +typedef Kokkos::View t_x_shmem_local; // Local PGAS Positions +#endif +#endif //EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES typedef Kokkos::View t_v; // Velocities typedef Kokkos::View t_f; // Force typedef Kokkos::View> t_type_const_rnd; // Type (int is enough as type) typedef Kokkos::View t_id; // ID typedef Kokkos::View t_id_const; // ID +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES +typedef Kokkos::View t_index; // ID +typedef Kokkos::View t_index_const; // ID +#endif typedef Kokkos::View t_q; // Charge typedef Kokkos::View t_q_const; // Charge @@ -178,5 +204,10 @@ t_scalar3 operator * (const Scalar& b, const t_scalar3& a) { return t_scalar3(a.x*b,a.y*b,a.z*b); } + +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_OPENMPTARGET) || defined(KOKKOS_ENABLE_SYCL) +#define EXAMINIMD_HAS_GPU +#endif + #endif