@@ -58,15 +58,19 @@ std::string CodeGenCUDA::Finish() {
58
58
<< " {\n return __hgt(__half(a), __half(b)) ? a : b;\n }\n " ;
59
59
decl_stream << " __device__ half min(half a, half b)\n "
60
60
<< " {\n return __hlt(__half(a), __half(b)) ? a : b;\n }\n " ;
61
- decl_stream << " __device__ half operator<="
62
- << " (__half a, __half b)\n "
63
- << " {\n return __hlt(a, b);\n }\n " ;
64
- decl_stream << " __device__ half operator+"
65
- << " (__half a, __half &b)\n "
66
- <<" {\n return __hadd(a, b);\n }\n " ;
67
- decl_stream << " __device__ half operator*"
68
- << " (__half a, __half b)\n "
69
- << " {\n return __hmul(a, b);\n }\n " ;
61
+ // FIXME(tvm-team): "volatile" is used to enable cross thread reduction,
62
+ // which is needed by operations such as softmax.
63
+ // However, volatile overloading is not supported in NVRTC and CUDA < 9.2.
64
+ // We need to figure out a solution which can satisfy both scenario.
65
+ // decl_stream << "__device__ half operator<="
66
+ // << "(const volatile __half &a, const volatile __half &b)\n"
67
+ // << "{\n return __hlt(a, b);\n}\n";
68
+ // decl_stream << "__device__ half operator+"
69
+ // << "(const volatile __half &a, const volatile __half &b)\n"
70
+ // <<"{\n return __hadd(a, b);\n}\n";
71
+ // decl_stream << "__device__ half operator*"
72
+ // << "(const volatile __half &a, const volatile __half &b)\n"
73
+ // << "{\n return __hmul(a, b);\n}\n";
70
74
// otherwise simulate computation via float32
71
75
decl_stream << " #else\n " ;
72
76
decl_stream << _cuda_half_t_def;
0 commit comments