Merge pull request #1796 from emankov/HIPIFY

[HIPIFY][Device][tests][fix] Fix device test failures on CUDA 11.8.0, 12.0.x, and 12.1.x
ROCm · Dec 17, 2024 · a5bc7cd · a5bc7cd
2 parents 6ae8908 + 97dd6f9
commit a5bc7cd
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 20 deletions.
diff --git a/tests/lit.cfg b/tests/lit.cfg
@@ -129,6 +129,10 @@ if config.cuda_version_major >= 11:
 
 if config.cuda_version_major < 11 or (config.cuda_version_major == 11 and config.cuda_version_minor < 8):
     config.excludes.append('cudevice2hipdevice.cu')
+    config.excludes.append('cudevice2hipdevice_before_11080_after_12011.cu')
+
+if (config.cuda_version_major == 11 and config.cuda_version_minor >=8) or (config.cuda_version_major == 12 and config.cuda_version_minor < 2):
+    config.excludes.append('cudevice2hipdevice_before_11080_after_12011.cu')
 
 if config.cuda_version_major < 12:
     config.excludes.append('headers_test_06_12000.cu')

diff --git a/tests/unit_tests/synthetic/libraries/cudevice2hipdevice.cu b/tests/unit_tests/synthetic/libraries/cudevice2hipdevice.cu
@@ -28,6 +28,8 @@ int main() {
   __nv_bfloat16_raw bf16r = { 0 };
 
   // CHECK: __hip_bfloat162 bf162 = { 0, 0 };
+  // CHECK-NEXT: __hip_bfloat162 bf162a = { 0, 0 };
+  // CHECK-NEXT: __hip_bfloat162 bf162b = { 0, 0 };
   __nv_bfloat162 bf162 = { 0, 0 };
   __nv_bfloat162 bf162a = { 0, 0 };
   __nv_bfloat162 bf162b = { 0, 0 };
@@ -54,26 +56,6 @@ int main() {
   // HIP: __BF16_HOST_DEVICE_STATIC__ float2 __bfloat1622float2(const __hip_bfloat162 a);
   // CHECK: f2 = __bfloat1622float2(bf162);
   f2 = __bfloat1622float2(bf162);
-
-  // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __bfloat162bfloat162(const __nv_bfloat16 a);
-  // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a);
-  // CHECK: bf162 = __bfloat162bfloat162(bf16);
-  bf162 = __bfloat162bfloat162(bf16);
-
-  // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __lows2bfloat162(const __nv_bfloat162 a, const __nv_bfloat162 b);
-  // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b);
-  // CHECK: bf162 = __lows2bfloat162(bf162a, bf162b);
-  bf162 = __lows2bfloat162(bf162a, bf162b);
-
-  // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __highs2bfloat162(const __nv_bfloat162 a, const __nv_bfloat162 b);
-  // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b);
-  // CHECK: bf162 = __highs2bfloat162(bf162a, bf162b);
-  bf162 = __highs2bfloat162(bf162a, bf162b);
-
-  // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __high2bfloat16(const __nv_bfloat162 a);
-  // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a);
-  // CHECK: bf16 = __high2bfloat16(bf162a);
-  bf16 = __high2bfloat16(bf162a);
 #endif
 
 #if CUDA_VERSION >= 11080

diff --git a/tests/unit_tests/synthetic/libraries/cudevice2hipdevice_before_11080_after_12011.cu b/tests/unit_tests/synthetic/libraries/cudevice2hipdevice_before_11080_after_12011.cu
@@ -0,0 +1,58 @@
+// RUN: %run_test hipify "%s" "%t" %hipify_args 3 --amap --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -ferror-limit=500
+
+// CHECK: #include <hip/hip_runtime.h>
+#include <cuda_runtime.h>
+#include <stdio.h>
+// CHECK: #include "hip/hip_fp8.h"
+#include "cuda_fp8.h"
+// CHECK-NOT: #include "hip/hip_fp8.h"
+// CHECK-NOT: #include "cuda_fp8.h"
+
+int main() {
+  printf("24.before_11080_after_12011, CUDA Device API to HIP Device API synthetic test\n");
+
+  double da = 0.0f;
+  double dx = 0.0f;
+  float fa = 0.0f;
+  float fx = 0.0f;
+  double2 d2 = { 0.0f, 0.0f };
+  float2 f2 = { 0.0f, 0.0f };
+  __half_raw hrx = { 0 };
+  __half2_raw h2rx = { 0, 0 };
+
+#if CUDA_VERSION >= 11000
+  // CHECK: __hip_bfloat16 bf16 = { 0 };
+  __nv_bfloat16 bf16 = { 0 };
+  // CHECK: __hip_bfloat162 bf162 = { 0, 0 };
+  // CHECK-NEXT: __hip_bfloat162 bf162a = { 0, 0 };
+  // CHECK-NEXT: __hip_bfloat162 bf162b = { 0, 0 };
+  __nv_bfloat162 bf162 = { 0, 0 };
+  __nv_bfloat162 bf162a = { 0, 0 };
+  __nv_bfloat162 bf162b = { 0, 0 };
+
+#if CUDA_VERSION < 11080 || CUDA_VERSION >= 12000
+  // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __bfloat162bfloat162(const __nv_bfloat16 a);
+  // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a);
+  // CHECK: bf162 = __bfloat162bfloat162(bf16);
+  bf162 = __bfloat162bfloat162(bf16);
+
+  // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __lows2bfloat162(const __nv_bfloat162 a, const __nv_bfloat162 b);
+  // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b);
+  // CHECK: bf162 = __lows2bfloat162(bf162a, bf162b);
+  bf162 = __lows2bfloat162(bf162a, bf162b);
+
+  // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __highs2bfloat162(const __nv_bfloat162 a, const __nv_bfloat162 b);
+  // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b);
+  // CHECK: bf162 = __highs2bfloat162(bf162a, bf162b);
+  bf162 = __highs2bfloat162(bf162a, bf162b);
+
+  // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __high2bfloat16(const __nv_bfloat162 a);
+  // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a);
+  // CHECK: bf16 = __high2bfloat16(bf162a);
+  bf16 = __high2bfloat16(bf162a);
+#endif
+
+#endif
+
+  return 0;
+}