From fc0ceddecb7e64073356f64c0290080cb20bb6d7 Mon Sep 17 00:00:00 2001 From: Malcolm Roberts Date: Fri, 20 Sep 2024 15:47:20 -0600 Subject: [PATCH] Improve address sanitizer build When building with the address sanitizer option turned on: Compile for xnack+ architectures when specifying address sanitizer build. Add address sanitizer flags to runtime-compiled kernels. Do not create a kernel cache. --- CHANGELOG.md | 8 ++++++ CMakeLists.txt | 55 ++++++++++++++++++++++++++++++------- library/src/rtc_cache.cpp | 9 ++++-- library/src/rtc_compile.cpp | 4 ++- 4 files changed, 63 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da86c649..7b89a0bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ Documentation for rocFFT is available at ## rocFFT 1.0.29 (unreleased) +### Changes + +* Building with the address sanitizer option sets xnack+ on relevant GPU + architectures and address-sanitizer support is added to runtime-compiled + kernels. + +## rocFFT 1.0.31 for ROCm 6.3.0 + ### Additions * Implemented experimental APIs to allow computing FFTs on data diff --git a/CMakeLists.txt b/CMakeLists.txt index fc439ca5..deb249e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -116,11 +116,6 @@ option(ROCFFT_RUNTIME_COMPILE_DEFAULT "Compile kernels at runtime by default" OF # Set default to OFF since users are not likely to tune option(ROCFFT_BUILD_OFFLINE_TUNER "Build with offline tuner executable rocfft_offline_tuner" OFF) -if(BUILD_ADDRESS_SANITIZER) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan") - add_link_options(-fuse-ld=lld) -endif() # FOR HANDLING ENABLE/DISABLE OPTIONAL BACKWARD COMPATIBILITY for FILE/FOLDER REORG option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "Build with file/folder reorg with backward compatibility enabled" OFF) @@ -138,12 +133,52 @@ if( WERROR ) set( WARNING_FLAGS ${WARNING_FLAGS} -Werror ) endif( ) -# Use target ID syntax if supported for AMDGPU_TARGETS -rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS - TARGETS "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201") -set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") -list(LENGTH AMDGPU_TARGETS AMDGPU_TARGETS_LENGTH) +set(DEFAULT_GPUS + gfx803 + gfx900 + gfx906 + gfx908 + gfx90a + gfx940 + gfx941 + gfx942 + gfx1030 + gfx1100 + gfx1101 + gfx1102 + gfx1151 + gfx1200 + gfx1201) + +if(BUILD_ADDRESS_SANITIZER) + add_compile_options(-fsanitize=address) + add_link_options(-fsanitize=address) + add_link_options(-shared-libasan) + SET(DEFAULT_GPUS + gfx908:xnack+ + gfx90a:xnack+ + gfx940:xnack+ + gfx941:xnack+ + gfx942:xnack+) + add_link_options(-fuse-ld=lld) + set(ROCFFT_KERNEL_CACHE_ENABLE off) + add_compile_definitions(ADDRESS_SANITIZER) +endif() + +# Build only for local GPU architecture +if (BUILD_LOCAL_GPU_TARGET_ONLY) + message(STATUS "Building only for local GPU target") + if (COMMAND rocm_local_targets) + rocm_local_targets(DEFAULT_GPUS) + else() + message(WARNING "Unable to determine local GPU targets. Falling back to default GPUs.") + endif() +endif() + +set(AMDGPU_TARGETS "${DEFAULT_GPUS}" CACHE STRING "Target default GPUs if AMDGPU_TARGETS is not defined.") +rocm_check_target_ids(AMDGPU_TARGETS TARGETS "${AMDGPU_TARGETS}") + # HIP is required - library and clients use HIP to access the device find_package( HIP REQUIRED ) diff --git a/library/src/rtc_cache.cpp b/library/src/rtc_cache.cpp index 7a0d7a46..e5a86987 100644 --- a/library/src/rtc_cache.cpp +++ b/library/src/rtc_cache.cpp @@ -562,6 +562,11 @@ std::vector RTCCache::cached_compile(const std::string& kernel_na kernel_src_gen_t generate_src, const std::array& generator_sum) { +#ifdef ADDRESS_SANITIZER + // The address sanitizer is reported to work better when we include xnack+, so don't strip this + // from the architecture string when building: + const std::string gpu_arch = gpu_arch_with_flags; +#else // Supplied gpu arch may have extra flags on it // (e.g. gfx90a:sramecc+:xnack-), Strip those from the arch name // since omitting them will generate code that handles either @@ -570,8 +575,8 @@ std::vector RTCCache::cached_compile(const std::string& kernel_na // As of this writing, there are no known performance benefits to // including the flags. If that changes, we may need to be more // selective about which flags to strip. - std::string gpu_arch = gpu_arch_strip_flags(gpu_arch_with_flags); - + const std::string gpu_arch = gpu_arch_strip_flags(gpu_arch_with_flags); +#endif std::shared_future> result; const pending_key key{kernel_name, gpu_arch}; diff --git a/library/src/rtc_compile.cpp b/library/src/rtc_compile.cpp index 711eafd3..2e831f7f 100644 --- a/library/src/rtc_compile.cpp +++ b/library/src/rtc_compile.cpp @@ -36,11 +36,13 @@ std::vector compile_inprocess(const std::string& kernel_src, const std::st std::string gpu_arch_arg = "--gpu-architecture=" + gpu_arch; std::vector options; - options.reserve(4); options.push_back("-O3"); options.push_back("-std=c++14"); options.push_back(gpu_arch_arg.c_str()); options.push_back("-mcumode"); +#ifdef ADDRESS_SANITIZER + options.push_back("-fsanitize=address"); +#endif auto compileResult = hiprtcCompileProgram(prog, options.size(), options.data()); if(compileResult != HIPRTC_SUCCESS)