diff --git a/rocclr/device/rocm/rocsettings.cpp b/rocclr/device/rocm/rocsettings.cpp index e1de3a142..9982b2eb8 100644 --- a/rocclr/device/rocm/rocsettings.cpp +++ b/rocclr/device/rocm/rocsettings.cpp @@ -256,6 +256,8 @@ void Settings::setKernelArgImpl(const amd::Isa& isa, bool isXgmi, bool hasValidH auto kernelArgImpl = KernelArgImpl::HostKernelArgs; + hasValidHDPFlush &= DEBUG_CLR_KERNARG_HDP_FLUSH_WA; + if (isXgmi) { // The XGMI-connected path does not require the manual memory ordering // workarounds that the PCIe connected path requires @@ -284,6 +286,8 @@ void Settings::setKernelArgImpl(const amd::Isa& isa, bool isXgmi, bool hasValidH if (!flagIsDefault(HIP_FORCE_DEV_KERNARG)) { kernel_arg_impl_ = kernelArgImpl & (HIP_FORCE_DEV_KERNARG ? 0xF : 0x0); } + + ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Using dev kernel arg wa = %d", kernel_arg_impl_); } } // namespace amd::roc diff --git a/rocclr/utils/flags.hpp b/rocclr/utils/flags.hpp index 30d286d73..72c643bf4 100644 --- a/rocclr/utils/flags.hpp +++ b/rocclr/utils/flags.hpp @@ -251,6 +251,8 @@ release(bool, DEBUG_HIP_GRAPH_DOT_PRINT, false, \ "Enable/Disable graph debug dot print dump") \ release(bool, HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false, \ "Force to always use new comgr unbundling action") \ +release(bool, DEBUG_CLR_KERNARG_HDP_FLUSH_WA, false, \ + "Toggle kernel arg copy workaround") \ namespace amd {