From 910249372dc866330d8eb6daeb94be5930818a5a Mon Sep 17 00:00:00 2001 From: "ct.clmsn" Date: Sat, 29 Mar 2025 21:43:29 -0400 Subject: [PATCH 1/4] initial import --- CMakeLists.txt | 94 +++++++++++-------- scripts/packaging/build_deb.sh | 4 + .../l0_gpu_driver/build_l0_gpu_driver_deb.sh | 4 + scripts/packaging/opencl/build_opencl_deb.sh | 4 + shared/source/helpers/local_id_gen.h | 2 + shared/source/helpers/riscv64/CMakeLists.txt | 14 +++ shared/source/helpers/uint16_sse4.h | 81 +++++++++++++++- .../windows/wddm_memory_manager.cpp | 52 ++++++++++ .../os_interface/windows/windows_wrapper.h | 2 + shared/source/utilities/cpuintrinsics.cpp | 34 +++++++ .../utilities/linux/riscv64/CMakeLists.txt | 13 +++ .../source/utilities/riscv64/CMakeLists.txt | 12 +++ 12 files changed, 273 insertions(+), 43 deletions(-) create mode 100644 shared/source/helpers/riscv64/CMakeLists.txt create mode 100644 shared/source/utilities/linux/riscv64/CMakeLists.txt create mode 100644 shared/source/utilities/riscv64/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index e00ce36d2421c..81f594eaea31e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,6 +50,12 @@ elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64") endif() include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/sse2neon) set(DISABLE_WDDM_LINUX TRUE) +elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64") + set(NEO_TARGET_PROCESSOR "riscv64") + if(NOT ${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL ${CMAKE_SYSTEM_PROCESSOR}) + set(NEO_DISABLE_LD_LLD TRUE) + set(NEO_DISABLE_LD_GOLD TRUE) + endif() endif() message(STATUS "Host processor: ${CMAKE_HOST_SYSTEM_PROCESSOR}") message(STATUS "Target processor: ${CMAKE_SYSTEM_PROCESSOR}") @@ -788,31 +794,33 @@ if(NOT NEO_DISABLE_MITIGATIONS) message(WARNING "Spectre mitigation is not supported by the compiler") endif() else() - if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") - check_cxx_compiler_flag(-mretpoline COMPILER_SUPPORTS_RETPOLINE) - if(COMPILER_SUPPORTS_RETPOLINE) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mretpoline") - else() - message(WARNING "Spectre mitigation -mretpoline flag is not supported by the compiler") - endif() - else() - check_cxx_compiler_flag(-mindirect-branch=thunk COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK) - if(COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch=thunk") - else() - message(WARNING "Spectre mitigation -mindirect-branch=thunk flag is not supported by the compiler") - endif() - check_cxx_compiler_flag(-mfunction-return=thunk COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK) - if(COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfunction-return=thunk") - else() - message(WARNING "Spectre mitigation -mfunction-return=thunk flag is not supported by the compiler") - endif() - check_cxx_compiler_flag(-mindirect-branch-register COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER) - if(COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch-register") + if(NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64") + if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + check_cxx_compiler_flag(-mretpoline COMPILER_SUPPORTS_RETPOLINE) + if(COMPILER_SUPPORTS_RETPOLINE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mretpoline") + else() + message(WARNING "Spectre mitigation -mretpoline flag is not supported by the compiler") + endif() else() - message(WARNING "Spectre mitigation -mindirect-branch-register flag is not supported by the compiler") + check_cxx_compiler_flag(-mindirect-branch=thunk COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK) + if(COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch=thunk") + else() + message(WARNING "Spectre mitigation -mindirect-branch=thunk flag is not supported by the compiler") + endif() + check_cxx_compiler_flag(-mfunction-return=thunk COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK) + if(COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfunction-return=thunk") + else() + message(WARNING "Spectre mitigation -mfunction-return=thunk flag is not supported by the compiler") + endif() + check_cxx_compiler_flag(-mindirect-branch-register COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER) + if(COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch-register") + else() + message(WARNING "Spectre mitigation -mindirect-branch-register flag is not supported by the compiler") + endif() endif() endif() endif() @@ -820,10 +828,12 @@ else() message(WARNING "Spectre mitigation DISABLED") endif() -if(NOT MSVC) - check_cxx_compiler_flag(-msse4.2 COMPILER_SUPPORTS_SSE42) - check_cxx_compiler_flag(-mavx2 COMPILER_SUPPORTS_AVX2) - check_cxx_compiler_flag(-march=armv8-a+simd COMPILER_SUPPORTS_NEON) +if(NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64") + if(NOT MSVC) + check_cxx_compiler_flag(-msse4.2 COMPILER_SUPPORTS_SSE42) + check_cxx_compiler_flag(-mavx2 COMPILER_SUPPORTS_AVX2) + check_cxx_compiler_flag(-march=armv8-a+simd COMPILER_SUPPORTS_NEON) + endif() endif() if(NOT MSVC) @@ -831,22 +841,24 @@ if(NOT MSVC) endif() # intrinsics (_mm_clflushopt and waitpkg) support -if(NOT MSVC) - check_cxx_compiler_flag(-mclflushopt SUPPORTS_CLFLUSHOPT) - check_cxx_compiler_flag(-mwaitpkg SUPPORTS_WAITPKG) - if(SUPPORTS_CLFLUSHOPT) +if(NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64") + if(NOT MSVC) + check_cxx_compiler_flag(-mclflushopt SUPPORTS_CLFLUSHOPT) + check_cxx_compiler_flag(-mwaitpkg SUPPORTS_WAITPKG) + if(SUPPORTS_CLFLUSHOPT) + add_compile_definitions(SUPPORTS_CLFLUSHOPT) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mclflushopt") + endif() + if(SUPPORTS_WAITPKG) + add_compile_definitions(SUPPORTS_WAITPKG) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mwaitpkg") + else() + message(WARNING "-mwaitpkg flag is not supported by the compiler") + endif() + else() add_compile_definitions(SUPPORTS_CLFLUSHOPT) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mclflushopt") - endif() - if(SUPPORTS_WAITPKG) add_compile_definitions(SUPPORTS_WAITPKG) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mwaitpkg") - else() - message(WARNING "-mwaitpkg flag is not supported by the compiler") endif() -else() - add_compile_definitions(SUPPORTS_CLFLUSHOPT) - add_compile_definitions(SUPPORTS_WAITPKG) endif() # Compiler warning flags diff --git a/scripts/packaging/build_deb.sh b/scripts/packaging/build_deb.sh index 9504b8bf08e43..80164db5f76cc 100755 --- a/scripts/packaging/build_deb.sh +++ b/scripts/packaging/build_deb.sh @@ -162,6 +162,10 @@ EOF NEO_SKIP_UNIT_TESTS="TRUE" export NEO_DISABLE_BUILTINS_COMPILATION="TRUE" fi + if [ "${TARGET_ARCH}" == "riscv64" ]; then + NEO_SKIP_UNIT_TESTS="TRUE" + export NEO_DISABLE_BUILTINS_COMPILATION="TRUE" + fi export NEO_DISABLE_BUILTINS_COMPILATION export NEO_SKIP_UNIT_TESTS diff --git a/scripts/packaging/l0_gpu_driver/build_l0_gpu_driver_deb.sh b/scripts/packaging/l0_gpu_driver/build_l0_gpu_driver_deb.sh index f3a9598bc9bc1..76c55e3f9dff3 100755 --- a/scripts/packaging/l0_gpu_driver/build_l0_gpu_driver_deb.sh +++ b/scripts/packaging/l0_gpu_driver/build_l0_gpu_driver_deb.sh @@ -130,6 +130,10 @@ EOF NEO_SKIP_UNIT_TESTS="TRUE" export NEO_DISABLE_BUILTINS_COMPILATION="TRUE" fi + if [ "${TARGET_ARCH}" == "riscv64" ]; then + NEO_SKIP_UNIT_TESTS="TRUE" + export NEO_DISABLE_BUILTINS_COMPILATION="TRUE" + fi export NEO_DISABLE_BUILTINS_COMPILATION export NEO_SKIP_UNIT_TESTS diff --git a/scripts/packaging/opencl/build_opencl_deb.sh b/scripts/packaging/opencl/build_opencl_deb.sh index b0d11a21e4578..fd15834f68759 100755 --- a/scripts/packaging/opencl/build_opencl_deb.sh +++ b/scripts/packaging/opencl/build_opencl_deb.sh @@ -129,6 +129,10 @@ EOF NEO_SKIP_UNIT_TESTS="TRUE" export NEO_DISABLE_BUILTINS_COMPILATION="TRUE" fi + if [ "${TARGET_ARCH}" == "riscv64" ]; then + NEO_SKIP_UNIT_TESTS="TRUE" + export NEO_DISABLE_BUILTINS_COMPILATION="TRUE" + fi export NEO_DISABLE_BUILTINS_COMPILATION export NEO_SKIP_UNIT_TESTS diff --git a/shared/source/helpers/local_id_gen.h b/shared/source/helpers/local_id_gen.h index 461199b707839..706b73143eb31 100644 --- a/shared/source/helpers/local_id_gen.h +++ b/shared/source/helpers/local_id_gen.h @@ -47,9 +47,11 @@ inline uint32_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t grfSize, uint32 } struct LocalIDHelper { +#if !defined(__riscv) static void (*generateSimd8)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); static void (*generateSimd16)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); static void (*generateSimd32)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); +#endif static LocalIDHelper initializer; diff --git a/shared/source/helpers/riscv64/CMakeLists.txt b/shared/source/helpers/riscv64/CMakeLists.txt new file mode 100644 index 0000000000000..fe380bda50e47 --- /dev/null +++ b/shared/source/helpers/riscv64/CMakeLists.txt @@ -0,0 +1,14 @@ +# +# Copyright (C) 2019-2022 Intel Corporation +# +# SPDX-License-Identifier: MIT +# + +if(${NEO_TARGET_PROCESSOR} STREQUAL "riscv64") + list(APPEND NEO_CORE_HELPERS + ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen.cpp + ) + + set_property(GLOBAL PROPERTY NEO_CORE_HELPERS ${NEO_CORE_HELPERS}) +endif() diff --git a/shared/source/helpers/uint16_sse4.h b/shared/source/helpers/uint16_sse4.h index 2909be51e69a5..1e3f96c0b29c9 100644 --- a/shared/source/helpers/uint16_sse4.h +++ b/shared/source/helpers/uint16_sse4.h @@ -12,8 +12,11 @@ #include #if defined(__ARM_ARCH) #include -#else +#elif defined(__x86_64__) || defined(_M_X64) #include +#elif defined(__riscv) +#include +typedef std::uint16_t __attribute__((vector_size(8))) __m128i; #endif namespace NEO { @@ -24,14 +27,28 @@ struct uint16x8_t { // NOLINT(readability-identifier-naming) __m128i value; uint16x8_t() { +#if defined(__riscv) + std::memset(&value, 0, sizeof(std::uint16_t)*8); +#else value = _mm_setzero_si128(); +#endif } +#if defined(__riscv) + uint16x8_t(__m128i val) { + std::memcpy(&value, &val, sizeof(std::uint16_t)*8); + } +#else uint16x8_t(__m128i value) : value(value) { } +#endif uint16x8_t(uint16_t a) { +#if defined(__riscv) + std::memset(&value, a, sizeof(std::uint16_t)*8); +#else value = _mm_set1_epi16(a); // SSE2 +#endif } explicit uint16x8_t(const void *alignedPtr) { @@ -57,57 +74,117 @@ struct uint16x8_t { // NOLINT(readability-identifier-naming) inline void load(const void *alignedPtr) { DEBUG_BREAK_IF(!isAligned<16>(alignedPtr)); +#if defined(__riscv) + std::memcpy(&value, reinterpret_cast(alignedPtr), sizeof(std::uint16_t)*8); +#else value = _mm_load_si128(reinterpret_cast(alignedPtr)); // SSE2 +#endif } inline void loadUnaligned(const void *ptr) { +#if defined(__riscv) + std::memcpy(&value, reinterpret_cast(ptr), sizeof(std::uint16_t)*8); +#else value = _mm_loadu_si128(reinterpret_cast(ptr)); // SSE2 +#endif } inline void store(void *alignedPtr) { DEBUG_BREAK_IF(!isAligned<16>(alignedPtr)); +#if defined(__riscv) + std::memcpy(alignedPtr, &value, sizeof(std::uint16_t)*8); +#else _mm_store_si128(reinterpret_cast<__m128i *>(alignedPtr), value); // SSE2 +#endif } inline void storeUnaligned(void *ptr) { +#if defined(__riscv) + std::memcpy(ptr, &value, sizeof(std::uint16_t)*8); +#else _mm_storeu_si128(reinterpret_cast<__m128i *>(ptr), value); // SSE2 +#endif } inline operator bool() const { +#if defined(__riscv) + unsigned int result = 0; + uint16x8_t mask_value = mask(); + for(unsigned int i = 0; i < 8; ++i) { + result += value[i] & mask_value.value[i]; + } + return (result < 1); +#else return _mm_test_all_zeros(value, mask().value) ? false : true; // SSE4.1 alternatives? +#endif } inline uint16x8_t &operator-=(const uint16x8_t &a) { +#if defined(__riscv) + for(unsigned int i = 0; i < 8; ++i) { + value[i] = value[i] - a.value[i]; + } +#else value = _mm_sub_epi16(value, a.value); // SSE2 +#endif return *this; } inline uint16x8_t &operator+=(const uint16x8_t &a) { +#if defined(__riscv) + for(unsigned int i = 0; i < 8; ++i) { + value[i] = value[i] + a.value[i]; + } +#else value = _mm_add_epi16(value, a.value); // SSE2 +#endif return *this; } inline friend uint16x8_t operator>=(const uint16x8_t &a, const uint16x8_t &b) { uint16x8_t result; +#if defined(__riscv) + std::uint16_t mask = 0; + for(unsigned int i = 0; i < 8; ++i) { + mask = ( a.value[i] < b.value[i] ) ? 1 : 0; + result.value[i] = result.value[i] ^ mask; + } +#else result.value = _mm_xor_si128(mask().value, _mm_cmplt_epi16(a.value, b.value)); // SSE2 +#endif return result; } inline friend uint16x8_t operator&&(const uint16x8_t &a, const uint16x8_t &b) { uint16x8_t result; +#if defined(__riscv) + for(unsigned int i = 0; i < 8; ++i) { + result.value[i] = a.value[i] & b.value[i]; + } +#else result.value = _mm_and_si128(a.value, b.value); // SSE2 +#endif return result; } // NOTE: uint16x8_t::blend behaves like mask ? a : b inline friend uint16x8_t blend(const uint16x8_t &a, const uint16x8_t &b, const uint16x8_t &mask) { uint16x8_t result; - +#if defined(__riscv) + for(unsigned int i = 0; i < 8; ++i) { + std::uint8_t mask_values[2] = { static_cast((mask.value[i] << 8) >> 8), static_cast(mask.value[i] >> 8) }; + std::uint8_t a_values[2] = { static_cast((a.value[i] << 8) >> 8), static_cast(a.value[i] >> 8) }; + std::uint8_t b_values[2] = { static_cast((b.value[i] << 8) >> 8), static_cast(b.value[i] >> 8) }; + + result.value[i] = (( mask_values[1] ? a_values[1] : b_values[0] ) << 8 ) | ( mask_values[0] ? a_values[0] : b_values[0] ); + } +#else // Have to swap arguments to get intended calling semantics result.value = _mm_blendv_epi8(b.value, a.value, mask.value); // SSE4.1 alternatives? +#endif return result; } }; diff --git a/shared/source/os_interface/windows/wddm_memory_manager.cpp b/shared/source/os_interface/windows/wddm_memory_manager.cpp index 649ee02c58c1c..9bc01bbac86e3 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.cpp +++ b/shared/source/os_interface/windows/wddm_memory_manager.cpp @@ -44,7 +44,9 @@ #include "shared/source/utilities/logger_neo_only.h" #include +#if !defined(__riscv) #include +#endif namespace NEO { @@ -1180,11 +1182,25 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) { int64_t timestamp0, timestamp1, localVariableReadDelta, inputPointerReadDelta; // compute timing overhead +#if defined(__riscv) + __asm__ __volatile__ ("fence ir, ir" : : : "memory"); + __asm__ __volatile__( + "rdtime %0;\n" + : "=r"(timestamp0) + :: ); + __asm__ __volatile__ ("fence ir, ir" : : : "memory"); + __asm__ __volatile__( + "rdtime %0;\n" + : "=r"(timestamp1) + :: ); + __asm__ __volatile__ ("fence ir, ir" : : : "memory"); +#else _mm_lfence(); timestamp0 = __rdtsc(); _mm_lfence(); timestamp1 = __rdtsc(); _mm_lfence(); +#endif if (timestamp1 - timestamp0 < meassurmentOverhead) { meassurmentOverhead = timestamp1 - timestamp0; @@ -1193,14 +1209,32 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) { // dummy read cacheable = *localVariablePointer; +#if defined(__riscv) + __asm__ __volatile__ ("fence ir, ir" : : : "memory"); + __asm__ __volatile__( + "rdtime %0;\n" + : "=r"(timestamp0) + :: ); + __asm__ __volatile__ ("fence ir, ir" : : : "memory"); +#else _mm_lfence(); timestamp0 = __rdtsc(); _mm_lfence(); +#endif // do read cacheable = *localVariablePointer; +#if defined(__riscv) + __asm__ __volatile__ ("fence ir, ir" : : : "memory"); + __asm__ __volatile__( + "rdtime %0;\n" + : "=r"(timestamp1) + :: ); + __asm__ __volatile__ ("fence ir, ir" : : : "memory"); +#else _mm_lfence(); timestamp1 = __rdtsc(); _mm_lfence(); +#endif localVariableReadDelta = timestamp1 - timestamp0 - meassurmentOverhead; if (localVariableReadDelta <= 0) { localVariableReadDelta = 1; @@ -1211,13 +1245,31 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) { // dummy read cacheable = *volatileInputPtr; +#if defined(__riscv) + __asm__ __volatile__ ("fence ir, ir" : : : "memory"); + __asm__ __volatile__( + "rdtime %0;\n" + : "=r"(timestamp0) + :: ); + __asm__ __volatile__ ("fence ir, ir" : : : "memory"); +#else _mm_lfence(); timestamp0 = __rdtsc(); _mm_lfence(); +#endif cacheable = *volatileInputPtr; +#if defined(__riscv) + __asm__ __volatile__ ("fence ir, ir" : : : "memory"); + __asm__ __volatile__( + "rdtime %0;\n" + : "=r"(timestamp1) + :: ); + __asm__ __volatile__ ("fence ir, ir" : : : "memory"); +#else _mm_lfence(); timestamp1 = __rdtsc(); _mm_lfence(); +#endif inputPointerReadDelta = timestamp1 - timestamp0 - meassurmentOverhead; if (inputPointerReadDelta <= 0) { inputPointerReadDelta = 1; diff --git a/shared/source/os_interface/windows/windows_wrapper.h b/shared/source/os_interface/windows/windows_wrapper.h index 937a9d5f19ed8..d2a038f6712ea 100644 --- a/shared/source/os_interface/windows/windows_wrapper.h +++ b/shared/source/os_interface/windows/windows_wrapper.h @@ -49,7 +49,9 @@ #if __clang__ #pragma clang diagnostic pop #endif +#if !defined(__riscv) #include +#endif #define STATUS_GRAPHICS_NO_VIDEO_MEMORY ((NTSTATUS)0xC01E0100L) diff --git a/shared/source/utilities/cpuintrinsics.cpp b/shared/source/utilities/cpuintrinsics.cpp index 6f6ec7bc50a87..304ca793439ae 100644 --- a/shared/source/utilities/cpuintrinsics.cpp +++ b/shared/source/utilities/cpuintrinsics.cpp @@ -11,6 +11,7 @@ #include #include #pragma intrinsic(__rdtsc) +#elif defined(__riscv) #else #if defined(__ARM_ARCH) extern "C" uint64_t __rdtsc(); @@ -22,6 +23,7 @@ extern "C" uint64_t __rdtsc(); #if defined(__ARM_ARCH) #include +#elif defined(__riscv) #else #include #endif @@ -30,23 +32,46 @@ namespace NEO { namespace CpuIntrinsics { void clFlush(void const *ptr) { +#if defined(__riscv) + return; +#else _mm_clflush(ptr); +#endif } void clFlushOpt(void *ptr) { #ifdef SUPPORTS_CLFLUSHOPT _mm_clflushopt(ptr); +#elif defined(__riscv) + return; #else _mm_clflush(ptr); #endif } void sfence() { +#if defined(__riscv) +// According to: +// +// https://blog.jiejiss.com/Rust-is-incompatible-with-LLVM-at-least-partially/ +// https://github.com/riscv/riscv-isa-manual/issues/43 +// https://stackoverflow.com/questions/68537854/pause-instruction-unrecognized-opcode-pause-in-risc-v +// +// gcc/clang assembler will not currently (2022) accept `fence w,unknown`; +// `hand-rolling` the instruction (see below) does the job. +// + __asm__ __volatile__(".insn i 0x0F, 0, x0, x0, 0x010"); +#else _mm_sfence(); +#endif } void pause() { +#if defined(__riscv) + __asm__ volatile("fence"::); +#else _mm_pause(); +#endif } uint8_t tpause(uint32_t control, uint64_t counter) { @@ -72,7 +97,16 @@ void umonitor(void *a) { } uint64_t rdtsc() { +#if defined(__riscv) + std::uint64_t val = 0; + __asm__ __volatile__( + "rdtime %0;\n" + : "=r"(val) + :: ); + return val; +#else return __rdtsc(); +#endif } } // namespace CpuIntrinsics diff --git a/shared/source/utilities/linux/riscv64/CMakeLists.txt b/shared/source/utilities/linux/riscv64/CMakeLists.txt new file mode 100644 index 0000000000000..bf75f5bc1a7c7 --- /dev/null +++ b/shared/source/utilities/linux/riscv64/CMakeLists.txt @@ -0,0 +1,13 @@ +# +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT +# + +if(${NEO_TARGET_PROCESSOR} STREQUAL "riscv64") + list(APPEND NEO_CORE_UTILITIES_LINUX + ${CMAKE_CURRENT_SOURCE_DIR}/cpu_info.cpp + ) + + set_property(GLOBAL PROPERTY NEO_CORE_UTILITIES_LINUX ${NEO_CORE_UTILITIES_LINUX}) +endif() diff --git a/shared/source/utilities/riscv64/CMakeLists.txt b/shared/source/utilities/riscv64/CMakeLists.txt new file mode 100644 index 0000000000000..413ca2f0e6708 --- /dev/null +++ b/shared/source/utilities/riscv64/CMakeLists.txt @@ -0,0 +1,12 @@ +# +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT +# + +if(${NEO_TARGET_PROCESSOR} STREQUAL "riscv64") + set_property(GLOBAL APPEND PROPERTY NEO_CORE_UTILITIES + ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/cpu_info_riscv64.cpp + ) +endif() From 5025995a3109c8dd3f45300926082260c957053c Mon Sep 17 00:00:00 2001 From: "ct.clmsn" Date: Mon, 31 Mar 2025 21:11:09 -0400 Subject: [PATCH 2/4] adding missing file --- .../utilities/riscv64/cpu_info_riscv64.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 shared/source/utilities/riscv64/cpu_info_riscv64.cpp diff --git a/shared/source/utilities/riscv64/cpu_info_riscv64.cpp b/shared/source/utilities/riscv64/cpu_info_riscv64.cpp new file mode 100644 index 0000000000000..8d0c49888f720 --- /dev/null +++ b/shared/source/utilities/riscv64/cpu_info_riscv64.cpp @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2021-2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/utilities/cpu_info.h" + +#include + +namespace NEO { +void CpuInfo::detect() const { + uint32_t cpuInfo[4] = {}; + cpuid(cpuInfo, 0u); + features |= featureNone; +} +} // namespace NEO From 517c7eb9e15e6c4f97298e3e042e59821b000c27 Mon Sep 17 00:00:00 2001 From: "ct.clmsn" Date: Mon, 31 Mar 2025 21:13:09 -0400 Subject: [PATCH 3/4] adding missing file --- .../utilities/linux/riscv64/cpu_info.cpp | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 shared/source/utilities/linux/riscv64/cpu_info.cpp diff --git a/shared/source/utilities/linux/riscv64/cpu_info.cpp b/shared/source/utilities/linux/riscv64/cpu_info.cpp new file mode 100644 index 0000000000000..e8d7c0bd9cd68 --- /dev/null +++ b/shared/source/utilities/linux/riscv64/cpu_info.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2019-2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/utilities/cpu_info.h" + +#include "shared/source/os_interface/linux/os_inc.h" + +#include +#include +#include + +namespace NEO { + +void cpuidLinuxWrapper(int cpuInfo[4], int functionId) { + cpuInfo[0] = static_cast(getauxval(AT_HWCAP)); +} + +void cpuidexLinuxWrapper(int *cpuInfo, int functionId, int subfunctionId) { +} + +void getCpuFlagsLinux(std::string &cpuFlags) { + std::ifstream cpuinfo(std::string(Os::sysFsProcPathPrefix) + "/cpuinfo"); + std::string line; + while (std::getline(cpuinfo, line)) { + if (line.substr(0, 8) == "Features") { + cpuFlags = line; + break; + } + } +} + +void (*CpuInfo::cpuidexFunc)(int *, int, int) = cpuidexLinuxWrapper; +void (*CpuInfo::cpuidFunc)(int[4], int) = cpuidLinuxWrapper; +void (*CpuInfo::getCpuFlagsFunc)(std::string &) = getCpuFlagsLinux; + +const CpuInfo CpuInfo::instance; + +void CpuInfo::cpuid( + uint32_t cpuInfo[4], + uint32_t functionId) const { + cpuidFunc(reinterpret_cast(cpuInfo), functionId); +} + +void CpuInfo::cpuidex( + uint32_t cpuInfo[4], + uint32_t functionId, + uint32_t subfunctionId) const { + cpuidexFunc(reinterpret_cast(cpuInfo), functionId, subfunctionId); +} + +} // namespace NEO From b39f1c72bbc15c0848e6277e9e090fde5b625b67 Mon Sep 17 00:00:00 2001 From: "ct.clmsn" Date: Mon, 31 Mar 2025 21:14:23 -0400 Subject: [PATCH 4/4] adding missing file --- .../source/helpers/riscv64/local_id_gen.cpp | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 shared/source/helpers/riscv64/local_id_gen.cpp diff --git a/shared/source/helpers/riscv64/local_id_gen.cpp b/shared/source/helpers/riscv64/local_id_gen.cpp new file mode 100644 index 0000000000000..c4f4e9712f28c --- /dev/null +++ b/shared/source/helpers/riscv64/local_id_gen.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2018-2024 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/local_id_gen.h" + +#include "shared/source/execution_environment/root_device_environment.h" +#include "shared/source/helpers/aligned_memory.h" +#include "shared/source/helpers/gfx_core_helper.h" +#include "shared/source/helpers/local_id_gen_special.inl" +#include "shared/source/utilities/cpu_info.h" + +namespace NEO { + +struct uint16x8_t; +struct uint16x16_t; + +// This is the initial value of SIMD for local ID +// computation. It correlates to the SIMD lane. +// Must be 32byte aligned for AVX2 usage +ALIGNAS(32) +const uint16_t initialLocalID[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + +// Initialize the lookup table based on CPU capabilities +LocalIDHelper::LocalIDHelper() { +} + +LocalIDHelper LocalIDHelper::initializer; + +void generateLocalIDs(void *buffer, uint16_t simd, const std::array &localWorkgroupSize, const std::array &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize, uint32_t grfCount, const RootDeviceEnvironment &rootDeviceEnvironment) { + generateLocalIDsForSimdOne(buffer, localWorkgroupSize, dimensionsOrder, grfSize); +} + +} // namespace NEO