From a5b2fa565bdbc06f5a0eba76830a371efe37f56b Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 11 Nov 2021 07:49:16 -0700 Subject: [PATCH] sve - initial SVE backend framework --- Makefile | 17 +++- backends/ceed-backend-list.h | 2 + backends/opt/ceed-opt-blocked.c | 2 + backends/sve/ceed-sve-blocked.c | 61 +++++++++++ backends/sve/ceed-sve-serial.c | 61 +++++++++++ backends/sve/ceed-sve-tensor-f32.c | 158 +++++++++++++++++++++++++++++ backends/sve/ceed-sve-tensor-f64.c | 158 +++++++++++++++++++++++++++++ backends/sve/ceed-sve.h | 28 +++++ 8 files changed, 485 insertions(+), 2 deletions(-) create mode 100644 backends/sve/ceed-sve-blocked.c create mode 100644 backends/sve/ceed-sve-serial.c create mode 100644 backends/sve/ceed-sve-tensor-f32.c create mode 100644 backends/sve/ceed-sve-tensor-f64.c create mode 100644 backends/sve/ceed-sve.h diff --git a/Makefile b/Makefile index 705ded82d0..52ce09a789 100644 --- a/Makefile +++ b/Makefile @@ -223,6 +223,7 @@ blocked.c := $(sort $(wildcard backends/blocked/*.c)) ceedmemcheck.c := $(sort $(wildcard backends/memcheck/*.c)) opt.c := $(sort $(wildcard backends/opt/*.c)) avx.c := $(sort $(wildcard backends/avx/*.c)) +sve.c := $(sort $(wildcard backends/sve/*.c)) xsmm.c := $(sort $(wildcard backends/xsmm/*.c)) cuda.c := $(sort $(wildcard backends/cuda/*.c)) cuda-ref.c := $(sort $(wildcard backends/cuda-ref/*.c)) @@ -286,6 +287,7 @@ info: $(info ------------------------------------) $(info MEMCHK_STATUS = $(MEMCHK_STATUS)$(call backend_status,$(MEMCHK_BACKENDS))) $(info AVX_STATUS = $(AVX_STATUS)$(call backend_status,$(AVX_BACKENDS))) + $(info SVE_STATUS = $(SVE_STATUS)$(call backend_status,$(SVE_BACKENDS))) $(info XSMM_DIR = $(XSMM_DIR)$(call backend_status,$(XSMM_BACKENDS))) $(info OCCA_DIR = $(OCCA_DIR)$(call backend_status,$(OCCA_BACKENDS))) $(info MAGMA_DIR = $(MAGMA_DIR)$(call backend_status,$(MAGMA_BACKENDS))) @@ -326,7 +328,7 @@ ifeq ($(MEMCHK),1) BACKENDS_MAKE += $(MEMCHK_BACKENDS) endif -# AVX Backed +# AVX Backends AVX_STATUS = Disabled AVX_FLAG := $(if $(filter clang,$(CC_VENDOR)),+avx,-mavx) AVX := $(filter $(AVX_FLAG),$(shell $(CC) $(OPT) -v -E -x c /dev/null 2>&1)) @@ -337,6 +339,17 @@ ifneq ($(AVX),) BACKENDS_MAKE += $(AVX_BACKENDS) endif +# SVE Backends +SVE_STATUS = Disabled +SVE_FLAG := $(if $(filter clang,$(CC_VENDOR)),+sve,-msve) +SVE ?= +SVE_BACKENDS = /cpu/self/sve/serial /cpu/self/sve/blocked +ifneq ($(SVE),) + SVE_STATUS = Enabled + libceed.c += $(sve.c) + BACKENDS_MAKE += $(SVE_BACKENDS) +endif + # Collect list of libraries and paths for use in linking and pkg-config PKG_LIBS = @@ -413,7 +426,7 @@ ifneq ($(HIP_LIB_DIR),) BACKENDS_MAKE += $(HIP_BACKENDS) endif -# MAGMA Backend +# MAGMA Backends ifneq ($(wildcard $(MAGMA_DIR)/lib/libmagma.*),) MAGMA_ARCH=$(shell nm -g $(MAGMA_DIR)/lib/libmagma.* | grep -c "hipblas") ifeq ($(MAGMA_ARCH), 0) #CUDA MAGMA diff --git a/backends/ceed-backend-list.h b/backends/ceed-backend-list.h index 6eb689d9f5..8107610ec0 100644 --- a/backends/ceed-backend-list.h +++ b/backends/ceed-backend-list.h @@ -22,5 +22,7 @@ MACRO(CeedRegister_Opt_Blocked, 1, "/cpu/self/opt/blocked") MACRO(CeedRegister_Opt_Serial, 1, "/cpu/self/opt/serial") MACRO(CeedRegister_Ref, 1, "/cpu/self/ref/serial") MACRO(CeedRegister_Ref_Blocked, 1, "/cpu/self/ref/blocked") +MACRO(CeedRegister_Sve_Serial, 1, "/cpu/self/sve/serial") +MACRO(CeedRegister_Sve_Blocked, 1, "/cpu/self/sve/blocked") MACRO(CeedRegister_Xsmm_Blocked, 1, "/cpu/self/xsmm/blocked") MACRO(CeedRegister_Xsmm_Serial, 1, "/cpu/self/xsmm/serial") diff --git a/backends/opt/ceed-opt-blocked.c b/backends/opt/ceed-opt-blocked.c index fedb295e53..a794b53eaa 100644 --- a/backends/opt/ceed-opt-blocked.c +++ b/backends/opt/ceed-opt-blocked.c @@ -58,6 +58,8 @@ static int CeedInit_Opt_Blocked(const char *resource, Ceed ceed) { ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "Destroy", CeedDestroy_Opt); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "TensorContractCreate", + CeedTensorContractCreate_Opt); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "OperatorCreate", CeedOperatorCreate_Opt); CeedChkBackend(ierr); diff --git a/backends/sve/ceed-sve-blocked.c b/backends/sve/ceed-sve-blocked.c new file mode 100644 index 0000000000..905d5f12b7 --- /dev/null +++ b/backends/sve/ceed-sve-blocked.c @@ -0,0 +1,61 @@ +// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. +// All Rights reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +#include +#include +#include +#include +#include "ceed-sve.h" + +//------------------------------------------------------------------------------ +// Backend Init +//------------------------------------------------------------------------------ +static int CeedInit_Sve(const char *resource, Ceed ceed) { + int ierr; + if (strcmp(resource, "/cpu/self") && strcmp(resource, "/cpu/self/sve") && + strcmp(resource, "/cpu/self/sve/blocked")) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "SVE backend cannot use resource: %s", resource); + // LCOV_EXCL_STOP + ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr); + + // Create reference CEED that implementation will be dispatched + // through unless overridden + Ceed ceed_ref; + CeedInit("/cpu/self/opt/blocked", &ceed_ref); + ierr = CeedSetDelegate(ceed, ceed_ref); CeedChkBackend(ierr); + + if (CEED_SCALAR_TYPE == CEED_SCALAR_FP64) { + ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "TensorContractCreate", + CeedTensorContractCreate_f64_Sve); + CeedChkBackend(ierr); + } else { + ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "TensorContractCreate", + CeedTensorContractCreate_f32_Sve); + CeedChkBackend(ierr); + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Backend Register +//------------------------------------------------------------------------------ +CEED_INTERN int CeedRegister_Sve_Blocked(void) { + return CeedRegister("/cpu/self/sve/blocked", CeedInit_Sve, 30); +} +//------------------------------------------------------------------------------ diff --git a/backends/sve/ceed-sve-serial.c b/backends/sve/ceed-sve-serial.c new file mode 100644 index 0000000000..4e41837cdf --- /dev/null +++ b/backends/sve/ceed-sve-serial.c @@ -0,0 +1,61 @@ +// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. +// All Rights reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +#include +#include +#include +#include +#include "ceed-sve.h" + +//------------------------------------------------------------------------------ +// Backend Init +//------------------------------------------------------------------------------ +static int CeedInit_Sve(const char *resource, Ceed ceed) { + int ierr; + if (strcmp(resource, "/cpu/self") + && strcmp(resource, "/cpu/self/sve/serial")) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "SVE backend cannot use resource: %s", resource); + // LCOV_EXCL_STOP + ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr); + + // Create reference CEED that implementation will be dispatched + // through unless overridden + Ceed ceed_ref; + CeedInit("/cpu/self/opt/serial", &ceed_ref); + ierr = CeedSetDelegate(ceed, ceed_ref); CeedChkBackend(ierr); + + if (CEED_SCALAR_TYPE == CEED_SCALAR_FP64) { + ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "TensorContractCreate", + CeedTensorContractCreate_f64_Sve); + CeedChkBackend(ierr); + } else { + ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "TensorContractCreate", + CeedTensorContractCreate_f32_Sve); + CeedChkBackend(ierr); + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Backend Register +//------------------------------------------------------------------------------ +CEED_INTERN int CeedRegister_Sve_Serial(void) { + return CeedRegister("/cpu/self/sve/serial", CeedInit_Sve, 35); +} +//------------------------------------------------------------------------------ diff --git a/backends/sve/ceed-sve-tensor-f32.c b/backends/sve/ceed-sve-tensor-f32.c new file mode 100644 index 0000000000..0451c7f98a --- /dev/null +++ b/backends/sve/ceed-sve-tensor-f32.c @@ -0,0 +1,158 @@ +// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. +// All Rights reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +#include +#include +#ifdef __ARM_FEATURE_SVE +#include +#endif +#include +#include "ceed-sve.h" + +//------------------------------------------------------------------------------ +// Blocked Tensor Contract +//------------------------------------------------------------------------------ +static inline int CeedTensorContract_Sve_Blocked(CeedTensorContract contract, + CeedInt A, CeedInt B, CeedInt C, CeedInt J, const float *restrict t, + CeedTransposeMode t_mode, const CeedInt add, const float *restrict u, + float *restrict v, const CeedInt JJ) { + CeedInt t_stride_0 = B, t_stride_1 = 1; + if (t_mode == CEED_TRANSPOSE) { + t_stride_0 = 1; t_stride_1 = J; + } + + for (CeedInt a=0; a +#include +#ifdef __ARM_FEATURE_SVE +#include +#endif +#include +#include "ceed-sve.h" + +//------------------------------------------------------------------------------ +// Blocked Tensor Contract +//------------------------------------------------------------------------------ +static inline int CeedTensorContract_Sve_Blocked(CeedTensorContract contract, + CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t, + CeedTransposeMode t_mode, const CeedInt add, const double *restrict u, + double *restrict v, const CeedInt JJ) { + CeedInt t_stride_0 = B, t_stride_1 = 1; + if (t_mode == CEED_TRANSPOSE) { + t_stride_0 = 1; t_stride_1 = J; + } + + for (CeedInt a=0; a +#include + +CEED_INTERN int CeedTensorContractCreate_f32_Sve(CeedBasis basis, + CeedTensorContract contract); +CEED_INTERN int CeedTensorContractCreate_f64_Sve(CeedBasis basis, + CeedTensorContract contract); + +#endif // _ceed_sve_h