Skip to content

Commit

Permalink
Merge branch 'checkpoint-benchmarkIII' into 'master'
Browse files Browse the repository at this point in the history
Checkpoint and Resilient execution benchmark additions

See merge request kokkos-resilience/minimd!1
  • Loading branch information
jeffmiles63 committed Jul 18, 2019
2 parents ce5e74c + 4cd5bef commit 05185b0
Show file tree
Hide file tree
Showing 20 changed files with 548 additions and 16 deletions.
81 changes: 81 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
cmake_minimum_required(VERSION 3.12)
project(minimd)

add_executable(minimd)
# Require C++14
if (CMAKE_CXX_COMPILER MATCHES ".*nvcc_wrapper" )
message("nvcc wrapper requires that CXX standard by set in kokkos build")
else()
target_compile_features(minimd PUBLIC cxx_std_14)
endif()

# Options
option(MINIMD_SINGLE_PRECISION, "Use single precision" OFF)
if (MINIMD_SINGLE_PRECISION)
target_compile_definitions(minimd PRIVATE PRECISION=1)
else()
target_compile_definitions(minimd PRIVATE PRECISION=2)
endif()

option(MINIMD_AUTOMATIC_CHECKPOINT "Use resilience automatic checkpointing" OFF)
option(MINIMD_MANUAL_CHECKPOINT "Use resilience manual checkpointing" OFF)

target_compile_definitions(minimd PRIVATE PREC_TIMER)

# Dependencies
if (MINIMD_USE_MPISTUB)
find_library( mpi_stub_ NAMES libmpi_stubs.a libmpi_stubs mpi_stubs HINTS ${PROJECT_SOURCE_DIR}/kokkos/MPI-Stubs )
#add_library(mpi_stubs)
#target_link_libraries(minimd PRIVATE mpi_stubs)
target_link_libraries(minimd PRIVATE ${mpi_stub_})
target_include_directories(minimd PUBLIC
${PROJECT_SOURCE_DIR}/kokkos/MPI-Stubs
)
else()
find_package(MPI REQUIRED)
target_link_libraries(minimd PRIVATE MPI::MPI_CXX)
endif()

find_package(Kokkos REQUIRED NO_CMAKE_PACKAGE_REGISTRY)

# Optional resilience
if(MINIMD_AUTOMATIC_CHECKPOINT OR MINIMD_MANUAL_CHECKPOINT OR MINIMD_RESILIENT_EXECUTION)
find_package(resilience REQUIRED)
target_link_libraries(minimd PRIVATE Kokkos::resilience)
target_compile_definitions(minimd PRIVATE MINIMD_RESILIENCE)
if(MINIMD_AUTOMATIC_CHECKPOINT)
target_compile_definitions(minimd PRIVATE KOKKOS_ENABLE_AUTOMATIC_CHECKPOINT)
endif()
if(MINIMD_MANUAL_CHECKPOINT)
target_compile_definitions(minimd PRIVATE KOKKOS_ENABLE_MANUAL_CHECKPOINT)
endif()
if(MINIMD_RESILIENT_EXECUTION)
target_compile_definitions(minimd PRIVATE KOKKOS_ENABLE_RESILIENT_EXECUTION)
endif()
endif()

TARGET_LINK_KOKKOS(minimd PRIVATE)

target_link_libraries(minimd PRIVATE "-L${resilience_LINK_DIRECTORIES}")

# VeloC config
add_custom_command(TARGET minimd PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/minimd.cfg ${CMAKE_CURRENT_BINARY_DIR}/minimd.cfg)

# Other inputs

add_custom_command(TARGET minimd PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/kokkos/in.lj.miniMD ${CMAKE_CURRENT_BINARY_DIR}/in.lj.miniMD)

add_custom_command(TARGET minimd PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/kokkos/Cu_u6.eam ${CMAKE_CURRENT_BINARY_DIR}/Cu_u6.eam)

add_subdirectory(kokkos)


##get_cmake_property(_variableNames VARIABLES)
##list (SORT _variableNames)
##foreach (_variableName ${_variableNames})
## message(STATUS "${_variableName}=${${_variableName}}")
##endforeach()

14 changes: 14 additions & 0 deletions kokkos/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
target_sources(minimd PRIVATE
${PROJECT_SOURCE_DIR}/kokkos/atom.cpp
${PROJECT_SOURCE_DIR}/kokkos/comm.cpp
${PROJECT_SOURCE_DIR}/kokkos/force_eam.cpp
${PROJECT_SOURCE_DIR}/kokkos/force_lj.cpp
${PROJECT_SOURCE_DIR}/kokkos/input.cpp
${PROJECT_SOURCE_DIR}/kokkos/integrate.cpp
${PROJECT_SOURCE_DIR}/kokkos/ljs.cpp
${PROJECT_SOURCE_DIR}/kokkos/neighbor.cpp
${PROJECT_SOURCE_DIR}/kokkos/output.cpp
${PROJECT_SOURCE_DIR}/kokkos/setup.cpp
${PROJECT_SOURCE_DIR}/kokkos/thermo.cpp
${PROJECT_SOURCE_DIR}/kokkos/timer.cpp
)
27 changes: 22 additions & 5 deletions kokkos/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,44 @@ MPI_PATH =
HAVE_MPI = yes

#Set the path to Kokkos
KOKKOS_PATH = /home/crtrott/kokkos
#KOKKOS_PATH = /home/crtrott/kokkos
#Set the Devices to compile for
KOKKOS_DEVICES=OpenMP
#Set the Architecture to compiler for
KOKKOS_ARCH=SNB
KOKKOS_ARCH=
#Set third party library usage
KOKKOS_USE_TPL=
KOKKOS_OPTIONS=enable_hdf5_parallel

CXXFLAGS = -O3 -g
LINKFLAGS =
CXXFLAGS = -O3 -g
##LINKFLAGS = -lm -L./ --std=c++11
LINKFLAGS = -L./ --std=c++11
LIB =

FILE_EXTENSION=base

ifeq ($(KOKKOS_OPTIONS), enable_stdfile)
FILE_EXTENSION=stdfile
endif
ifeq ($(KOKKOS_OPTIONS), enable_hdf5)
FILE_EXTENSION=hdf5ser
endif
ifeq ($(KOKKOS_OPTIONS), enable_hdf5_parallel)
FILE_EXTENSION=hdf5par
endif

ifeq ($(HAVE_MPI), yes)
##CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
CXX = mpicxx
LIB += -lmpi
else
CXX = g++
override CXXFLAGS += -I./MPI-Stubs
LIB += MPI-Stubs/libmpi_stubs.a
endif

LINK = ${CXX}
EXE = miniMD
EXE = miniMD.${FILE_EXTENSION}

#CXXFLAGS += -DTEST_LAMBDA_BYCOPY
#CXXFLAGS += -DTEST_LAMBDA_BYPTR
Expand Down Expand Up @@ -75,6 +91,7 @@ default: all

MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
include $(KOKKOS_PATH)/Makefile.kokkos
include $(KOKKOS_RESILIENCE_PATH)/Makefile.resilience

SRC = $(wildcard $(MAKEFILE_PATH)*.cpp)
HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp)
Expand Down
13 changes: 10 additions & 3 deletions kokkos/atom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,14 @@
#include "atom.h"
#include "neighbor.h"

#define DELTA 20000
int Atom::atom_block = 10000;

Atom::Atom(int ntypes_)
void Atom::set_atom_block_size(int size_)
{
atom_block = size_;
}

Atom::Atom(int ntypes_) : x("atom",0), v("velocity",0), f("force",0)
{
natoms = 0;
nlocal = 0;
Expand All @@ -61,7 +66,9 @@ Atom::~Atom()

void Atom::growarray()
{
nmax += DELTA;
// printf("grow array: %ld\n", nmax);
// fflush(stdout);
nmax += atom_block;
Kokkos::resize(x,nmax);
Kokkos::resize(v,nmax);
Kokkos::resize(f,nmax);
Expand Down
5 changes: 4 additions & 1 deletion kokkos/atom.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ class Atom
struct TagAtomUnpackReverse {};
struct TagAtomSort {};

static int atom_block;
static void set_atom_block_size(int size_);

typedef int value_type;
int natoms;
int nlocal, nghost;
Expand All @@ -82,7 +85,7 @@ class Atom

Box box;

Atom() {};
Atom() : x("atom",0), v("velocity",0), f("force",0) {};
Atom(int ntypes_);
~Atom();

Expand Down
59 changes: 59 additions & 0 deletions kokkos/data/hdf5_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
[
{
"name":"atom",
"Layout_Config":{
"data_set":"atom_dataset",
"rank":2,
"layout":"REGULAR",
"data_extents":["{DATA_SIZE}","{MPI_SIZE}"],
"local_extents":["{DATA_SIZE}","1"],
"offset":["0","{MPI_RANK}"],
"stride":["1","{MPI_SIZE}"],
"count":["1","1","1","1"],
"block":["{DATA_SIZE}","1"],
"local_offset":["0","0","0","0"],
"local_stride":["1","1","1","1"],
"local_count":["1","1","1","1"],
"local_block":["{DATA_SIZE}","1","1","1"],
"view_offset":["0","0"]
}
},
{
"name":"velocity",
"Layout_Config":{
"data_set":"velocity_dataset",
"rank":2,
"layout":"REGULAR",
"data_extents":["{DATA_SIZE}","{MPI_SIZE}"],
"local_extents":["{DATA_SIZE}","1"],
"offset":["0","{MPI_RANK}"],
"stride":["1","{MPI_SIZE}"],
"count":["1","1","1","1"],
"block":["{DATA_SIZE}","1"],
"local_offset":["0","0","0","0"],
"local_stride":["1","1","1","1"],
"local_count":["1","1","1","1"],
"local_block":["{DATA_SIZE}","1","1","1"],
"view_offset":["0","0"]
}
},
{
"name":"force",
"Layout_Config":{
"data_set":"force_dataset",
"rank":2,
"layout":"REGULAR",
"data_extents":["{DATA_SIZE}","{MPI_SIZE}"],
"local_extents":["{DATA_SIZE}","1"],
"offset":["0","{MPI_RANK}"],
"stride":["1","{MPI_SIZE}"],
"count":["1","1","1","1"],
"block":["{DATA_SIZE}","1"],
"local_offset":["0","0","0","0"],
"local_stride":["1","1","1","1"],
"local_count":["1","1","1","1"],
"local_block":["{DATA_SIZE}","1","1","1"],
"view_offset":["0","0"]
}
}
]
75 changes: 71 additions & 4 deletions kokkos/integrate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,27 @@
#include "stdio.h"
#include "integrate.h"
#include "math.h"
#include <cstdlib>

#ifdef KOKKOS_ENABLE_HDF5
#define CHECKPOINT_FILESPACE Kokkos::Experimental::HDF5Space
#else
#define CHECKPOINT_FILESPACE Kokkos::Experimental::StdFileSpace
#endif

#ifdef MINIMD_RESILIENCE
#include <Kokkos_Resilience.hpp>
#endif

#ifdef KOKKOS_ENABLE_RESILIENT_EXECUTION
#define DEVICE_EXECUTION_SPACE Kokkos::ResCuda
#else
#ifdef KOKKOS_ENABLE_CUDA
#define DEVICE_EXECUTION_SPACE Kokkos::Cuda
#else
#define DEVICE_EXECUTION_SPACE Kokkos::OpenMP
#endif
#endif

Integrate::Integrate() {sort_every=20;}
Integrate::~Integrate() {}
Expand All @@ -44,7 +65,7 @@ void Integrate::setup()

void Integrate::initialIntegrate()
{
Kokkos::parallel_for(Kokkos::RangePolicy<TagInitialIntegrate>(0,nlocal), *this);
Kokkos::parallel_for(Kokkos::RangePolicy<DEVICE_EXECUTION_SPACE, TagInitialIntegrate>(0,nlocal), *this);
}

KOKKOS_INLINE_FUNCTION
Expand All @@ -59,7 +80,7 @@ void Integrate::operator() (TagInitialIntegrate, const int& i) const {

void Integrate::finalIntegrate()
{
Kokkos::parallel_for(Kokkos::RangePolicy<TagFinalIntegrate>(0,nlocal), *this);
Kokkos::parallel_for(Kokkos::RangePolicy<DEVICE_EXECUTION_SPACE, TagFinalIntegrate>(0,nlocal), *this);
}

KOKKOS_INLINE_FUNCTION
Expand All @@ -70,7 +91,7 @@ void Integrate::operator() (TagFinalIntegrate, const int& i) const {
}

void Integrate::run(Atom &atom, Force* force, Neighbor &neighbor,
Comm &comm, Thermo &thermo, Timer &timer)
Comm &comm, Thermo &thermo, Timer &timer, const int restart_)
{
int i, n;

Expand All @@ -83,8 +104,32 @@ void Integrate::run(Atom &atom, Force* force, Neighbor &neighbor,
dtforce = dtforce / mass;

int next_sort = sort_every>0?sort_every:ntimes+1;
int nStart = 0;

#ifdef KOKKOS_ENABLE_MANUAL_CHECKPOINT
Kokkos::Experimental::StdFileSpace sfs;
auto x_cp = Kokkos::create_chkpt_mirror( sfs, atom.x );
auto v_cp = Kokkos::create_chkpt_mirror( sfs, atom.v );
auto f_cp = Kokkos::create_chkpt_mirror( sfs, atom.f );
nStart = restart_;

// Load from restart ...
if (nStart > 0) {
if (comm.nprocs > 1)
Kokkos::Experimental::DirectoryManager<CHECKPOINT_FILESPACE>::set_checkpoint_directory(comm.me == 0 ? true : false, "./data", (int)((nStart / 10) * 10));
else
Kokkos::Experimental::DirectoryManager<CHECKPOINT_FILESPACE>::set_checkpoint_directory(comm.me == 0 ? true : false, "./data", (int)((nStart / 10) * 10), comm.me);
// need to resize the views to match the checkpoint files ...
Kokkos::Experimental::StdFileSpace::restore_all_views();
}
#endif

for(n = 0; n < ntimes; n++) {
for(n = nStart; n < ntimes; n++) {
#ifdef KOKKOS_ENABLE_AUTOMATIC_CHECKPOINT
#ifdef KR_ENABLE_TRACING
auto iter_time = KokkosResilience::Util::begin_trace< KokkosResilience::Util::IterTimingTrace< std::string > >( *resilience_context, "step", n );
#endif
#endif

Kokkos::fence();

Expand All @@ -94,7 +139,13 @@ void Integrate::run(Atom &atom, Force* force, Neighbor &neighbor,
xold = atom.xold;
nlocal = atom.nlocal;

#ifdef KOKKOS_ENABLE_AUTOMATIC_CHECKPOINT
KokkosResilience::checkpoint( *resilience_context, "initial", n, [self = *this]() mutable {
self.initialIntegrate();
}, KokkosResilience::filter::nth_iteration_filter< 10 >{} );
#else
initialIntegrate();
#endif

timer.stamp();

Expand Down Expand Up @@ -179,8 +230,24 @@ void Integrate::run(Atom &atom, Force* force, Neighbor &neighbor,

Kokkos::fence();

#ifdef KOKKOS_ENABLE_AUTOMATIC_CHECKPOINT
KokkosResilience::checkpoint( *resilience_context, "final", n, [self = *this]() mutable {
self.finalIntegrate();
}, KokkosResilience::filter::nth_iteration_filter< 10 >{} );
#else
finalIntegrate();
#endif

if(thermo.nstat) thermo.compute(n + 1, atom, neighbor, force, timer, comm);
#ifdef KOKKOS_ENABLE_MANUAL_CHECKPOINT
if ( n % 10 == 0 ) {
Kokkos::fence();
if (comm.nprocs > 1)
Kokkos::Experimental::DirectoryManager<CHECKPOINT_FILESPACE>::set_checkpoint_directory(comm.me == 0 ? true : false, "./data", n);
else
Kokkos::Experimental::DirectoryManager<CHECKPOINT_FILESPACE>::set_checkpoint_directory(comm.me == 0 ? true : false, "./data", n, comm.me);
CHECKPOINT_FILESPACE::checkpoint_views();
}
#endif
}
}
2 changes: 1 addition & 1 deletion kokkos/integrate.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,5 @@ class Integrate
void finalIntegrate();
KOKKOS_INLINE_FUNCTION
void operator() (TagFinalIntegrate, const int& i) const;
void run(Atom &, Force*, Neighbor &, Comm &, Thermo &, Timer &);
void run(Atom &, Force*, Neighbor &, Comm &, Thermo &, Timer &, const int);
};
Loading

0 comments on commit 05185b0

Please sign in to comment.