Skip to content

Commit

Permalink
Make global workitems multiple of local workitems (#1976)
Browse files Browse the repository at this point in the history
HIP requires global work items in multiple of local work items. If it is not it is not guaranteed to generate correct results all the time.
Fixes #1977
Fixes #1644
MIGraphX CI has moved to rocm-5.6 which doesn't require hipRTC workarounds
  • Loading branch information
umangyadav authored Jul 21, 2023
1 parent 04ae9b8 commit 3216fe5
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 8 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set +x
gh extension install actions/gh-actions-cache --pin v1.0.1
gh actions-cache delete ${{ steps.ccache_restore.outputs.cache-matched-key }} --confirm
Expand Down Expand Up @@ -439,6 +440,7 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set +x
gh extension install actions/gh-actions-cache
gh actions-cache delete ${{ steps.ccache_restore_fpga.outputs.cache-matched-key }} --confirm
continue-on-error: true
Expand Down
4 changes: 1 addition & 3 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@ def rocmtestnode(Map conf) {
def compiler = bconf.get("compiler", "/opt/rocm/llvm/bin/clang++")
def flags = bconf.get("flags", "")
def gpu_debug = bconf.get("gpu_debug", "0")
def hiprtc_workarounds = bconf.get("hiprtc_workarounds", "0")
def cmd = """
ulimit -c unlimited
echo "leak:dnnl::impl::malloc" > suppressions.txt
export LSAN_OPTIONS="suppressions=\$(pwd)/suppressions.txt"
export MIGRAPHX_GPU_DEBUG=${gpu_debug}
export MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS=${hiprtc_workarounds}
export CXX=${compiler}
export CXXFLAGS='-Werror'
env
Expand Down Expand Up @@ -109,7 +107,7 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build ->
stage('hipRTC Debug') {
def sanitizers = "undefined"
def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DMIGRAPHX_USE_HIPRTC=On", gpu_debug: true, hiprtc_workarounds: true)
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DMIGRAPHX_USE_HIPRTC=On", gpu_debug: true)
}
}, clang_release: rocmnode('cdna') { cmake_build ->
stage('Hip Clang Release') {
Expand Down
9 changes: 4 additions & 5 deletions src/targets/gpu/compile_hip_code_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,14 +135,13 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
std::size_t max_global = ctx.get_current_device().get_cu_count() *
ctx.get_current_device().get_max_workitems_per_cu();
return [n, over, max_global](std::size_t local) {
std::size_t num_elements = n;
// hip require global workitems multiple of local workitems. It may degrade performance.
// [TODO]: consider adding "fno-hip-uniform-block" flag when it becomes available.
// https://reviews.llvm.org/D155213
std::size_t num_elements = ((n + local - 1) / local) * local;
std::size_t groups = (num_elements + local - 1) / local;
std::size_t max_blocks = max_global / local;
std::size_t nglobal = std::min(max_blocks * over, groups) * local;
#ifdef MIGRAPHX_USE_HIPRTC
if(enabled(MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS{}))
num_elements = ((num_elements + local - 1) / local) * local;
#endif
return std::min(nglobal, num_elements);
};
}
Expand Down

0 comments on commit 3216fe5

Please sign in to comment.