Skip to content

Commit

Permalink
Merge branch 'develop' into collected-small-changes
Browse files Browse the repository at this point in the history
  • Loading branch information
akohlmey committed Oct 1, 2024
2 parents 2749e09 + 75f86a6 commit b12aeb7
Show file tree
Hide file tree
Showing 47 changed files with 1,743 additions and 925 deletions.
8 changes: 7 additions & 1 deletion lib/kokkos/Makefile.kokkos
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ KOKKOS_DEVICES ?= "OpenMP"
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace
# IBM: Power8,Power9
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC
KOKKOS_ARCH ?= ""
Expand Down Expand Up @@ -465,6 +465,7 @@ KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103)

# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
Expand Down Expand Up @@ -1158,6 +1159,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1103")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1103
endif


ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
Expand Down
1 change: 1 addition & 0 deletions lib/kokkos/cmake/KokkosCore_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@
#cmakedefine KOKKOS_ARCH_AMD_GFX942
#cmakedefine KOKKOS_ARCH_AMD_GFX1030
#cmakedefine KOKKOS_ARCH_AMD_GFX1100
#cmakedefine KOKKOS_ARCH_AMD_GFX1103
#cmakedefine KOKKOS_ARCH_AMD_GPU
#cmakedefine KOKKOS_ARCH_VEGA // deprecated
#cmakedefine KOKKOS_ARCH_VEGA906 // deprecated
Expand Down
6 changes: 3 additions & 3 deletions lib/kokkos/cmake/kokkos_arch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx90a gfx908 gfx908)
LIST(APPEND SUPPORTED_AMD_GPUS MI50/60 MI50/60)
LIST(APPEND SUPPORTED_AMD_ARCHS VEGA906 AMD_GFX906)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx906 gfx906)
LIST(APPEND SUPPORTED_AMD_GPUS RX7900XTX RX7900XTX V620/W6800 V620/W6800)
LIST(APPEND SUPPORTED_AMD_ARCHS NAVI1100 AMD_GFX1100 NAVI1030 AMD_GFX1030)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1100 gfx1100 gfx1030 gfx1030)
LIST(APPEND SUPPORTED_AMD_GPUS PHOENIX RX7900XTX V620/W6800 V620/W6800)
LIST(APPEND SUPPORTED_AMD_ARCHS AMD_GFX1103 AMD_GFX1100 NAVI1030 AMD_GFX1030)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1103 gfx1100 gfx1030 gfx1030)

#FIXME CAN BE REPLACED WITH LIST_ZIP IN CMAKE 3.17
FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS)
Expand Down
3 changes: 2 additions & 1 deletion lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ struct HIPTraits {
static constexpr int WarpSize = 64;
static constexpr int WarpIndexMask = 0x003f; /* hexadecimal for 63 */
static constexpr int WarpIndexShift = 6; /* WarpSize == 1 << WarpShift*/
#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100)
#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100) || \
defined(KOKKOS_ARCH_AMD_GFX1103)
static constexpr int WarpSize = 32;
static constexpr int WarpIndexMask = 0x001f; /* hexadecimal for 31 */
static constexpr int WarpIndexShift = 5; /* WarpSize == 1 << WarpShift*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>,
local_offset_value = element_values(team_id, i - 1);
// FIXME_OPENMPTARGET We seem to access memory illegaly on AMD GPUs
#if defined(KOKKOS_ARCH_AMD_GPU) && !defined(KOKKOS_ARCH_AMD_GFX1030) && \
!defined(KOKKOS_ARCH_AMD_GFX1100)
!defined(KOKKOS_ARCH_AMD_GFX1100) && !defined(KOKKOS_ARCH_AMD_GFX1103)
if constexpr (Analysis::Reducer::has_join_member_function()) {
if constexpr (std::is_void_v<WorkTag>)
a_functor_reducer.get_functor().join(local_offset_value,
Expand Down
3 changes: 3 additions & 0 deletions lib/kokkos/core/src/impl/Kokkos_Core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,9 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) {
#elif defined(KOKKOS_ARCH_AMD_GFX1100)
declare_configuration_metadata("architecture", "GPU architecture",
"AMD_GFX1100");
#elif defined(KOKKOS_ARCH_AMD_GFX1103)
declare_configuration_metadata("architecture", "GPU architecture",
"AMD_GFX1103");

#else
declare_configuration_metadata("architecture", "GPU architecture", "none");
Expand Down
1 change: 1 addition & 0 deletions lib/kokkos/generate_makefile.bash
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ display_help_text() {
echo " AMD_GFX942 = AMD GPU MI300 GFX942"
echo " AMD_GFX1030 = AMD GPU V620/W6800 GFX1030"
echo " AMD_GFX1100 = AMD GPU RX 7900 XT(X) GFX1100"
echo " AMD_GFX1103 = AMD APU Radeon 740M/760M/780M/880M/890M GFX1103"
echo " [ARM]"
echo " ARMV80 = ARMv8.0 Compatible CPU"
echo " ARMV81 = ARMv8.1 Compatible CPU"
Expand Down
56 changes: 54 additions & 2 deletions src/KOKKOS/domain_kokkos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,6 @@ void DomainKokkos::operator()(TagDomain_image_flip, const int &i) const {
void DomainKokkos::lamda2x(int n)
{
atomKK->sync(Device,X_MASK);

x = atomKK->k_x.view<LMPDeviceType>();

copymode = 1;
Expand All @@ -573,6 +572,21 @@ void DomainKokkos::lamda2x(int n)
atomKK->modified(Device,X_MASK);
}

void DomainKokkos::lamda2x(int n, int groupbit_in)
{
atomKK->sync(Device,X_MASK);
x = atomKK->k_x.view<LMPDeviceType>();
mask = atomKK->k_mask.view<LMPDeviceType>();
mask = atomKK->k_mask.view<LMPDeviceType>();
groupbit = groupbit_in;

copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagDomain_lamda2x_group>(0,n),*this);
copymode = 0;

atomKK->modified(Device,X_MASK);
}

KOKKOS_INLINE_FUNCTION
void DomainKokkos::operator()(TagDomain_lamda2x, const int &i) const {
const double xi1 = x(i,1);
Expand All @@ -582,6 +596,17 @@ void DomainKokkos::operator()(TagDomain_lamda2x, const int &i) const {
x(i,2) = h[2]*xi2 + boxlo[2];
}

KOKKOS_INLINE_FUNCTION
void DomainKokkos::operator()(TagDomain_lamda2x_group, const int &i) const {
if (mask[i] & groupbit) {
const double xi1 = x(i,1);
const double xi2 = x(i,2);
x(i,0) = h[0]*x(i,0) + h[5]*xi1 + h[4]*xi2 + boxlo[0];
x(i,1) = h[1]*xi1 + h[3]*xi2 + boxlo[1];
x(i,2) = h[2]*xi2 + boxlo[2];
}
}

/* ----------------------------------------------------------------------
convert box coords to triclinic 0-1 lamda coords for all N atoms
lamda = H^-1 (x - x0)
Expand All @@ -590,7 +615,6 @@ void DomainKokkos::operator()(TagDomain_lamda2x, const int &i) const {
void DomainKokkos::x2lamda(int n)
{
atomKK->sync(Device,X_MASK);

x = atomKK->k_x.view<LMPDeviceType>();

copymode = 1;
Expand All @@ -600,6 +624,20 @@ void DomainKokkos::x2lamda(int n)
atomKK->modified(Device,X_MASK);
}

void DomainKokkos::x2lamda(int n, int groupbit_in)
{
atomKK->sync(Device,X_MASK);
x = atomKK->k_x.view<LMPDeviceType>();
mask = atomKK->k_mask.view<LMPDeviceType>();
groupbit = groupbit_in;

copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagDomain_x2lamda_group>(0,n),*this);
copymode = 0;

atomKK->modified(Device,X_MASK);
}

KOKKOS_INLINE_FUNCTION
void DomainKokkos::operator()(TagDomain_x2lamda, const int &i) const {
F_FLOAT delta[3];
Expand All @@ -612,3 +650,17 @@ void DomainKokkos::operator()(TagDomain_x2lamda, const int &i) const {
x(i,2) = h_inv[2]*delta[2];
}

KOKKOS_INLINE_FUNCTION
void DomainKokkos::operator()(TagDomain_x2lamda_group, const int &i) const {
if (mask[i] & groupbit) {
F_FLOAT delta[3];
delta[0] = x(i,0) - boxlo[0];
delta[1] = x(i,1) - boxlo[1];
delta[2] = x(i,2) - boxlo[2];

x(i,0) = h_inv[0]*delta[0] + h_inv[5]*delta[1] + h_inv[4]*delta[2];
x(i,1) = h_inv[1]*delta[1] + h_inv[3]*delta[2];
x(i,2) = h_inv[2]*delta[2];
}
}

12 changes: 12 additions & 0 deletions src/KOKKOS/domain_kokkos.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ namespace LAMMPS_NS {
struct TagDomain_remap_all{};
struct TagDomain_image_flip{};
struct TagDomain_lamda2x{};
struct TagDomain_lamda2x_group{};
struct TagDomain_x2lamda{};
struct TagDomain_x2lamda_group{};

class DomainKokkos : public Domain {
public:
Expand All @@ -35,7 +37,9 @@ class DomainKokkos : public Domain {
void remap_all();
void image_flip(int, int, int);
void x2lamda(int) override;
void x2lamda(int,int) override;
void lamda2x(int) override;
void lamda2x(int,int) override;
// forward remaining x2lamda() and lambda2x() variants to parent class
void x2lamda(double *a, double *b) override { Domain::x2lamda(a,b); }
void lamda2x(double *a, double *b) override { Domain::lamda2x(a,b); }
Expand All @@ -54,18 +58,26 @@ class DomainKokkos : public Domain {
KOKKOS_INLINE_FUNCTION
void operator()(TagDomain_lamda2x, const int&) const;

KOKKOS_INLINE_FUNCTION
void operator()(TagDomain_lamda2x_group, const int&) const;

KOKKOS_INLINE_FUNCTION
void operator()(TagDomain_x2lamda, const int&) const;

KOKKOS_INLINE_FUNCTION
void operator()(TagDomain_x2lamda_group, const int&) const;

static KOKKOS_INLINE_FUNCTION
Few<double,3> unmap(Few<double,3> prd, Few<double,6> h, int triclinic,
Few<double,3> x, imageint image);

private:
int groupbit;
double lo[3],hi[3],period[3];
int n_flip, m_flip, p_flip;
ArrayTypes<LMPDeviceType>::t_x_array x;
ArrayTypes<LMPDeviceType>::t_imageint_1d image;
ArrayTypes<LMPDeviceType>::t_int_1d mask;
};

KOKKOS_INLINE_FUNCTION
Expand Down
Loading

0 comments on commit b12aeb7

Please sign in to comment.