Skip to content

Commit

Permalink
Merge pull request #146 from ROCmSoftwarePlatform/mad_u64_u32_on_gfx8_9
Browse files Browse the repository at this point in the history
Reenable Mad u64 u32 on gfx8-9
  • Loading branch information
saadrahim authored Sep 23, 2020
2 parents 4563be6 + ed697c4 commit e1ac257
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ variables:
# Local build options
LOCAL_CXXFLAGS: ""
LOCAL_CMAKE_OPTIONS: ""
ROCM_LATEST_PATH: "/opt/rocm-3.7.0/"
ROCM_LATEST_PATH: "/opt/rocm-3.8.0/"

# ROCm

Expand Down
39 changes: 36 additions & 3 deletions library/include/rocrand_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,43 @@
namespace rocrand_device {
namespace detail {

#if ( defined(__gfx801__) || \
defined(__gfx802__) || \
defined(__gfx803__) || \
defined(__gfx810__) || \
defined(__gfx900__) || \
defined(__gfx902__) || \
defined(__gfx904__) || \
defined(__gfx906__) || \
defined(__gfx908__) || \
defined(__gfx909__) )
#ifndef ROCRAND_ENABLE_INLINE_ASM
#define ROCRAND_ENABLE_INLINE_ASM
#endif
#else
#ifdef ROCRAND_ENABLE_INLINE_ASM
#undef ROCRAND_ENABLE_INLINE_ASM
#pragma warning "Disabled inline asm, because the build target does not support it."
#endif
#endif

FQUALIFIERS
unsigned long long mad_u64_u32(const unsigned int x, const unsigned int y, const unsigned long long z)
{
#if defined(__HIP_PLATFORM_NVCC__) && defined(__HIP_DEVICE_COMPILE__) \
#if defined(__HIP_PLATFORM_HCC__) && defined(__HIP_DEVICE_COMPILE__) \
&& defined(ROCRAND_ENABLE_INLINE_ASM)

unsigned long long r;
unsigned long long c; // carry bits, SGPR, unused
// x has "r" constraint. This allows to use both VGPR and SGPR
// (to save VGPR) as input.
// y and z have "v" constraints, because only one SGPR or literal
// can be read by the instruction.
asm volatile("v_mad_u64_u32 %0, %1, %2, %3, %4"
: "=v"(r), "=s"(c) : "r"(x), "v"(y), "v"(z)
);
return r;
#elif defined(__HIP_PLATFORM_NVCC__) && defined(__HIP_DEVICE_COMPILE__) \
&& defined(ROCRAND_ENABLE_INLINE_ASM)

unsigned long long r;
Expand All @@ -58,11 +91,11 @@ unsigned long long mad_u64_u32(const unsigned int x, const unsigned int y, const
);
return r;

#else // host code
#else // host code

return static_cast<unsigned long long>(x) * static_cast<unsigned long long>(y) + z;

#endif
#endif
}

// This helps access fields of engine's internal state which
Expand Down

0 comments on commit e1ac257

Please sign in to comment.