Skip to content

riscv64 initial import #821

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 53 additions & 41 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
endif()
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/sse2neon)
set(DISABLE_WDDM_LINUX TRUE)
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64")
set(NEO_TARGET_PROCESSOR "riscv64")
if(NOT ${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL ${CMAKE_SYSTEM_PROCESSOR})
set(NEO_DISABLE_LD_LLD TRUE)
set(NEO_DISABLE_LD_GOLD TRUE)
endif()
endif()
message(STATUS "Host processor: ${CMAKE_HOST_SYSTEM_PROCESSOR}")
message(STATUS "Target processor: ${CMAKE_SYSTEM_PROCESSOR}")
Expand Down Expand Up @@ -788,65 +794,71 @@ if(NOT NEO_DISABLE_MITIGATIONS)
message(WARNING "Spectre mitigation is not supported by the compiler")
endif()
else()
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
check_cxx_compiler_flag(-mretpoline COMPILER_SUPPORTS_RETPOLINE)
if(COMPILER_SUPPORTS_RETPOLINE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mretpoline")
else()
message(WARNING "Spectre mitigation -mretpoline flag is not supported by the compiler")
endif()
else()
check_cxx_compiler_flag(-mindirect-branch=thunk COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK)
if(COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch=thunk")
else()
message(WARNING "Spectre mitigation -mindirect-branch=thunk flag is not supported by the compiler")
endif()
check_cxx_compiler_flag(-mfunction-return=thunk COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK)
if(COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfunction-return=thunk")
else()
message(WARNING "Spectre mitigation -mfunction-return=thunk flag is not supported by the compiler")
endif()
check_cxx_compiler_flag(-mindirect-branch-register COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER)
if(COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch-register")
if(NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64")
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
check_cxx_compiler_flag(-mretpoline COMPILER_SUPPORTS_RETPOLINE)
if(COMPILER_SUPPORTS_RETPOLINE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mretpoline")
else()
message(WARNING "Spectre mitigation -mretpoline flag is not supported by the compiler")
endif()
else()
message(WARNING "Spectre mitigation -mindirect-branch-register flag is not supported by the compiler")
check_cxx_compiler_flag(-mindirect-branch=thunk COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK)
if(COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch=thunk")
else()
message(WARNING "Spectre mitigation -mindirect-branch=thunk flag is not supported by the compiler")
endif()
check_cxx_compiler_flag(-mfunction-return=thunk COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK)
if(COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfunction-return=thunk")
else()
message(WARNING "Spectre mitigation -mfunction-return=thunk flag is not supported by the compiler")
endif()
check_cxx_compiler_flag(-mindirect-branch-register COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER)
if(COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch-register")
else()
message(WARNING "Spectre mitigation -mindirect-branch-register flag is not supported by the compiler")
endif()
endif()
endif()
endif()
else()
message(WARNING "Spectre mitigation DISABLED")
endif()

if(NOT MSVC)
check_cxx_compiler_flag(-msse4.2 COMPILER_SUPPORTS_SSE42)
check_cxx_compiler_flag(-mavx2 COMPILER_SUPPORTS_AVX2)
check_cxx_compiler_flag(-march=armv8-a+simd COMPILER_SUPPORTS_NEON)
if(NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64")
if(NOT MSVC)
check_cxx_compiler_flag(-msse4.2 COMPILER_SUPPORTS_SSE42)
check_cxx_compiler_flag(-mavx2 COMPILER_SUPPORTS_AVX2)
check_cxx_compiler_flag(-march=armv8-a+simd COMPILER_SUPPORTS_NEON)
endif()
endif()

if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftemplate-depth=1024")
endif()

# intrinsics (_mm_clflushopt and waitpkg) support
if(NOT MSVC)
check_cxx_compiler_flag(-mclflushopt SUPPORTS_CLFLUSHOPT)
check_cxx_compiler_flag(-mwaitpkg SUPPORTS_WAITPKG)
if(SUPPORTS_CLFLUSHOPT)
if(NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64")
if(NOT MSVC)
check_cxx_compiler_flag(-mclflushopt SUPPORTS_CLFLUSHOPT)
check_cxx_compiler_flag(-mwaitpkg SUPPORTS_WAITPKG)
if(SUPPORTS_CLFLUSHOPT)
add_compile_definitions(SUPPORTS_CLFLUSHOPT)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mclflushopt")
endif()
if(SUPPORTS_WAITPKG)
add_compile_definitions(SUPPORTS_WAITPKG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mwaitpkg")
else()
message(WARNING "-mwaitpkg flag is not supported by the compiler")
endif()
else()
add_compile_definitions(SUPPORTS_CLFLUSHOPT)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mclflushopt")
endif()
if(SUPPORTS_WAITPKG)
add_compile_definitions(SUPPORTS_WAITPKG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mwaitpkg")
else()
message(WARNING "-mwaitpkg flag is not supported by the compiler")
endif()
else()
add_compile_definitions(SUPPORTS_CLFLUSHOPT)
add_compile_definitions(SUPPORTS_WAITPKG)
endif()

# Compiler warning flags
Expand Down
4 changes: 4 additions & 0 deletions scripts/packaging/build_deb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ EOF
NEO_SKIP_UNIT_TESTS="TRUE"
export NEO_DISABLE_BUILTINS_COMPILATION="TRUE"
fi
if [ "${TARGET_ARCH}" == "riscv64" ]; then
NEO_SKIP_UNIT_TESTS="TRUE"
export NEO_DISABLE_BUILTINS_COMPILATION="TRUE"
fi
export NEO_DISABLE_BUILTINS_COMPILATION
export NEO_SKIP_UNIT_TESTS

Expand Down
4 changes: 4 additions & 0 deletions scripts/packaging/l0_gpu_driver/build_l0_gpu_driver_deb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ EOF
NEO_SKIP_UNIT_TESTS="TRUE"
export NEO_DISABLE_BUILTINS_COMPILATION="TRUE"
fi
if [ "${TARGET_ARCH}" == "riscv64" ]; then
NEO_SKIP_UNIT_TESTS="TRUE"
export NEO_DISABLE_BUILTINS_COMPILATION="TRUE"
fi
export NEO_DISABLE_BUILTINS_COMPILATION
export NEO_SKIP_UNIT_TESTS

Expand Down
4 changes: 4 additions & 0 deletions scripts/packaging/opencl/build_opencl_deb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ EOF
NEO_SKIP_UNIT_TESTS="TRUE"
export NEO_DISABLE_BUILTINS_COMPILATION="TRUE"
fi
if [ "${TARGET_ARCH}" == "riscv64" ]; then
NEO_SKIP_UNIT_TESTS="TRUE"
export NEO_DISABLE_BUILTINS_COMPILATION="TRUE"
fi
export NEO_DISABLE_BUILTINS_COMPILATION
export NEO_SKIP_UNIT_TESTS

Expand Down
2 changes: 2 additions & 0 deletions shared/source/helpers/local_id_gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ inline uint32_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t grfSize, uint32
}

struct LocalIDHelper {
#if !defined(__riscv)
static void (*generateSimd8)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
static void (*generateSimd16)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
static void (*generateSimd32)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
#endif

static LocalIDHelper initializer;

Expand Down
14 changes: 14 additions & 0 deletions shared/source/helpers/riscv64/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#
# Copyright (C) 2019-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#

if(${NEO_TARGET_PROCESSOR} STREQUAL "riscv64")
list(APPEND NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen.cpp
)

set_property(GLOBAL PROPERTY NEO_CORE_HELPERS ${NEO_CORE_HELPERS})
endif()
39 changes: 39 additions & 0 deletions shared/source/helpers/riscv64/local_id_gen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/

#include "shared/source/helpers/local_id_gen.h"

#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/local_id_gen_special.inl"
#include "shared/source/utilities/cpu_info.h"

namespace NEO {

struct uint16x8_t;
struct uint16x16_t;

// This is the initial value of SIMD for local ID
// computation. It correlates to the SIMD lane.
// Must be 32byte aligned for AVX2 usage
ALIGNAS(32)
const uint16_t initialLocalID[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};

// Initialize the lookup table based on CPU capabilities
LocalIDHelper::LocalIDHelper() {
}

LocalIDHelper LocalIDHelper::initializer;

void generateLocalIDs(void *buffer, uint16_t simd, const std::array<uint16_t, 3> &localWorkgroupSize, const std::array<uint8_t, 3> &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize, uint32_t grfCount, const RootDeviceEnvironment &rootDeviceEnvironment) {
generateLocalIDsForSimdOne(buffer, localWorkgroupSize, dimensionsOrder, grfSize);
}

} // namespace NEO
81 changes: 79 additions & 2 deletions shared/source/helpers/uint16_sse4.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@
#include <cstdint>
#if defined(__ARM_ARCH)
#include <sse2neon.h>
#else
#elif defined(__x86_64__) || defined(_M_X64)
#include <immintrin.h>
#elif defined(__riscv)
#include <cstring>
typedef std::uint16_t __attribute__((vector_size(8))) __m128i;
#endif

namespace NEO {
Expand All @@ -24,14 +27,28 @@ struct uint16x8_t { // NOLINT(readability-identifier-naming)
__m128i value;

uint16x8_t() {
#if defined(__riscv)
std::memset(&value, 0, sizeof(std::uint16_t)*8);
#else
value = _mm_setzero_si128();
#endif
}

#if defined(__riscv)
uint16x8_t(__m128i val) {
std::memcpy(&value, &val, sizeof(std::uint16_t)*8);
}
#else
uint16x8_t(__m128i value) : value(value) {
}
#endif

uint16x8_t(uint16_t a) {
#if defined(__riscv)
std::memset(&value, a, sizeof(std::uint16_t)*8);
#else
value = _mm_set1_epi16(a); // SSE2
#endif
}

explicit uint16x8_t(const void *alignedPtr) {
Expand All @@ -57,57 +74,117 @@ struct uint16x8_t { // NOLINT(readability-identifier-naming)

inline void load(const void *alignedPtr) {
DEBUG_BREAK_IF(!isAligned<16>(alignedPtr));
#if defined(__riscv)
std::memcpy(&value, reinterpret_cast<const __m128i *>(alignedPtr), sizeof(std::uint16_t)*8);
#else
value = _mm_load_si128(reinterpret_cast<const __m128i *>(alignedPtr)); // SSE2
#endif
}

inline void loadUnaligned(const void *ptr) {
#if defined(__riscv)
std::memcpy(&value, reinterpret_cast<const __m128i *>(ptr), sizeof(std::uint16_t)*8);
#else
value = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr)); // SSE2
#endif
}

inline void store(void *alignedPtr) {
DEBUG_BREAK_IF(!isAligned<16>(alignedPtr));
#if defined(__riscv)
std::memcpy(alignedPtr, &value, sizeof(std::uint16_t)*8);
#else
_mm_store_si128(reinterpret_cast<__m128i *>(alignedPtr), value); // SSE2
#endif
}

inline void storeUnaligned(void *ptr) {
#if defined(__riscv)
std::memcpy(ptr, &value, sizeof(std::uint16_t)*8);
#else
_mm_storeu_si128(reinterpret_cast<__m128i *>(ptr), value); // SSE2
#endif
}

inline operator bool() const {
#if defined(__riscv)
unsigned int result = 0;
uint16x8_t mask_value = mask();
for(unsigned int i = 0; i < 8; ++i) {
result += value[i] & mask_value.value[i];
}
return (result < 1);
#else
return _mm_test_all_zeros(value, mask().value) ? false : true; // SSE4.1 alternatives?
#endif
}

inline uint16x8_t &operator-=(const uint16x8_t &a) {
#if defined(__riscv)
for(unsigned int i = 0; i < 8; ++i) {
value[i] = value[i] - a.value[i];
}
#else
value = _mm_sub_epi16(value, a.value); // SSE2
#endif
return *this;
}

inline uint16x8_t &operator+=(const uint16x8_t &a) {
#if defined(__riscv)
for(unsigned int i = 0; i < 8; ++i) {
value[i] = value[i] + a.value[i];
}
#else
value = _mm_add_epi16(value, a.value); // SSE2
#endif
return *this;
}

inline friend uint16x8_t operator>=(const uint16x8_t &a, const uint16x8_t &b) {
uint16x8_t result;
#if defined(__riscv)
std::uint16_t mask = 0;
for(unsigned int i = 0; i < 8; ++i) {
mask = ( a.value[i] < b.value[i] ) ? 1 : 0;
result.value[i] = result.value[i] ^ mask;
}
#else
result.value =
_mm_xor_si128(mask().value,
_mm_cmplt_epi16(a.value, b.value)); // SSE2
#endif
return result;
}

inline friend uint16x8_t operator&&(const uint16x8_t &a, const uint16x8_t &b) {
uint16x8_t result;
#if defined(__riscv)
for(unsigned int i = 0; i < 8; ++i) {
result.value[i] = a.value[i] & b.value[i];
}
#else
result.value = _mm_and_si128(a.value, b.value); // SSE2
#endif
return result;
}

// NOTE: uint16x8_t::blend behaves like mask ? a : b
inline friend uint16x8_t blend(const uint16x8_t &a, const uint16x8_t &b, const uint16x8_t &mask) {
uint16x8_t result;

#if defined(__riscv)
for(unsigned int i = 0; i < 8; ++i) {
std::uint8_t mask_values[2] = { static_cast<uint8_t>((mask.value[i] << 8) >> 8), static_cast<uint8_t>(mask.value[i] >> 8) };
std::uint8_t a_values[2] = { static_cast<uint8_t>((a.value[i] << 8) >> 8), static_cast<uint8_t>(a.value[i] >> 8) };
std::uint8_t b_values[2] = { static_cast<uint8_t>((b.value[i] << 8) >> 8), static_cast<uint8_t>(b.value[i] >> 8) };

result.value[i] = (( mask_values[1] ? a_values[1] : b_values[0] ) << 8 ) | ( mask_values[0] ? a_values[0] : b_values[0] );
}
#else
// Have to swap arguments to get intended calling semantics
result.value =
_mm_blendv_epi8(b.value, a.value, mask.value); // SSE4.1 alternatives?
#endif
return result;
}
};
Expand Down
Loading