|
131 | 131 |
|
132 | 132 | @device_override Base.exp2(x::Float64) = ccall("extern __nv_exp2", llvmcall, Cdouble, (Cdouble,), x)
|
133 | 133 | @device_override Base.exp2(x::Float32) = ccall("extern __nv_exp2f", llvmcall, Cfloat, (Cfloat,), x)
|
| 134 | +@device_override FastMath.exp2_fast(x::Union{Float32, Float64}) = exp2(x) |
134 | 135 | # TODO: enable once PTX > 7.0 is supported
|
135 | 136 | # @device_override Base.exp2(x::Float16) = @asmcall("ex2.approx.f16 \$0, \$1", "=h,h", Float16, Tuple{Float16}, x)
|
136 | 137 |
|
|
221 | 222 |
|
222 | 223 | @device_override Base.sqrt(x::Float64) = ccall("extern __nv_sqrt", llvmcall, Cdouble, (Cdouble,), x)
|
223 | 224 | @device_override Base.sqrt(x::Float32) = ccall("extern __nv_sqrtf", llvmcall, Cfloat, (Cfloat,), x)
|
| 225 | +@device_override FastMath.sqrt_fast(x::Union{Float32, Float64}) = sqrt(x) |
224 | 226 |
|
225 | 227 | @device_function rsqrt(x::Float64) = ccall("extern __nv_rsqrt", llvmcall, Cdouble, (Cdouble,), x)
|
226 | 228 | @device_function rsqrt(x::Float32) = ccall("extern __nv_rsqrtf", llvmcall, Cfloat, (Cfloat,), x)
|
|
306 | 308 |
|
307 | 309 | @device_override FastMath.div_fast(x::Float32, y::Float32) = ccall("extern __nv_fast_fdividef", llvmcall, Cfloat, (Cfloat, Cfloat), x, y)
|
308 | 310 |
|
| 311 | +@device_override Base.inv(x::Float32) = ccall("extern __nv_frcp_rn", llvmcall, Cfloat, (Cfloat,), x) |
| 312 | +@device_override FastMath.inv_fast(x::Union{Float32, Float64}) = @fastmath one(x) / x |
309 | 313 |
|
310 | 314 | ## distributions
|
311 | 315 |
|
|
0 commit comments