Merge #466

bors[bot] · benchislett · PhilipVinc · web-flow · commit d98e60ef932e · 2019-10-02T06:45:29.000Z
466: Add some more complex operations - Take 2 r=maleadt a=PhilipVinc This is the commit from @benchislett in PR #445, with the addition of the `angle` and `log` operations and several tests for all functions included in the PR. @benchislett If you prefer to take my commits into your PR, please feel free to do it. I'm doing this just because I have some urgency in having those merged. On a side note: I would love to add more functions (notably, `log1p` and `expm1`, `sort`) but I am not sure how to do this. Giving a look at [thrust](https://github.com/thrust/thrust/blob/7df7efe3542a0ab549530bc478467320467e0094/thrust/detail/complex/csqrt.h) they have a bunch of if/else logic like in base Julia. Is this even a good thing to do on the GPU? cc @maleadt Co-authored-by: benchislett <chislett.ben@gmail.com> Co-authored-by: Filippo Vicentini <filippovicentini@gmail.com>
diff --git a/src/device/cuda/math.jl b/src/device/cuda/math.jl
@@ -32,9 +32,14 @@
 @inline atan(x::Float64) = @wrap __nv_atan(x::double)::double
 @inline atan(x::Float32) = @wrap __nv_atanf(x::float)::float
 
+# ! CUDAnative.atan2 is equivalent to Base.atan
 @inline atan2(x::Float64, y::Float64) = @wrap __nv_atan2(x::double, y::double)::double
 @inline atan2(x::Float32, y::Float32) = @wrap __nv_atan2f(x::float, y::float)::float
 
+@inline angle(x::ComplexF64) = atan2(x.im, x.re)
+@inline angle(x::ComplexF32) = atan2(x.im, x.re)
+@inline angle(x::Float64) = signbit(x) * 3.141592653589793
+@inline angle(x::Float32) = signbit(x) * 3.1415927f0
 
 ## hyperbolic
 
@@ -66,6 +71,10 @@
 @inline log(x::Float32) = @wrap __nv_logf(x::float)::float
 @inline log_fast(x::Float32) = @wrap __nv_fast_logf(x::float)::float
 
+@inline log(x::ComplexF64) = log(abs(x)) + im * angle(x)
+@inline log(x::ComplexF32) = log(abs(x)) + im * angle(x)
+@inline log_fast(x::ComplexF32) = log_fast(abs(x)) + im * angle(x)
+
 @inline log10(x::Float64) = @wrap __nv_log10(x::double)::double
 @inline log10(x::Float32) = @wrap __nv_log10f(x::float)::float
 @inline log10_fast(x::Float32) = @wrap __nv_fast_log10f(x::float)::float
@@ -103,6 +112,9 @@
 @inline ldexp(x::Float64, y::Int32) = @wrap __nv_ldexp(x::double, y::i32)::double
 @inline ldexp(x::Float32, y::Int32) = @wrap __nv_ldexpf(x::float, y::i32)::float
 
+@inline exp(x::Complex{Float64}) = exp(x.re) * (cos(x.im) + 1.0im * sin(x.im))
+@inline exp(x::Complex{Float32}) = exp(x.re) * (cos(x.im) + 1.0im * sin(x.im))
+@inline exp_fast(x::Complex{Float32}) = exp_fast(x.re) * (cos_fast(x.im) + 1.0im * sin_fast(x.im))
 
 ## error
 
@@ -170,6 +182,8 @@
 @inline abs(f::Float32) = @wrap __nv_fabsf(f::float)::float
 @inline abs(x::Int64) =   @wrap __nv_llabs(x::i64)::i64
 
+@inline abs(x::Complex{Float64}) = hypot(x.re, x.im)
+@inline abs(x::Complex{Float32}) = hypot(x.re, x.im)
 
 ## roots and powers
 
@@ -192,6 +206,9 @@
 @inline pow(x::Float32, y::Int32) =   @wrap __nv_powif(x::float, y::i32)::float
 @inline pow(x::Union{Float32, Float64}, y::Int64) = pow(x, Int32(y))
 
+@inline abs2(x::Complex{Float64}) = x.re * x.re + x.im * x.im
+@inline abs2(x::Complex{Float32}) = x.re * x.re + x.im * x.im
+
 ## rounding and selection
 
 # TODO: differentiate in return type, map correctly
diff --git a/test/device/cuda.jl b/test/device/cuda.jl
@@ -145,8 +145,127 @@ end
                                              Tuple{Float64}, optimize=false)[1])
       @test !occursin("Float32", codeinfo_str)
     end
-end
 
+    @testset "angle" begin
+        buf  = CuTestArray(zeros(Float32))
+        cbuf = CuTestArray(zeros(Float32))
+
+        function cuda_kernel(a, x)
+            a[] = CUDAnative.angle(x)
+            return
+        end
+
+        #op(::Float32)
+        x   = rand(Float32)
+        @cuda cuda_kernel(buf, x)
+        val = Array(buf)
+        @test val[] ≈ angle(x)
+        @cuda cuda_kernel(buf, -x)
+        val = Array(buf)
+        @test val[] ≈ angle(-x)
+
+        #op(::ComplexF32)
+        x   = rand(ComplexF32)
+        @cuda cuda_kernel(cbuf, x)
+        val = Array(cbuf)
+        @test val[] ≈ angle(x)
+        @cuda cuda_kernel(cbuf, -x)
+        val = Array(cbuf)
+        @test val[] ≈ angle(-x)
+
+        #op(::Float64)
+        x   = rand(Float64)
+        @cuda cuda_kernel(buf, x)
+        val = Array(buf)
+        @test val[] ≈ angle(x)
+        @cuda cuda_kernel(buf, -x)
+        val = Array(buf)
+        @test val[] ≈ angle(-x)
+
+        #op(::ComplexF64)
+        x   = rand(ComplexF64)
+        @cuda cuda_kernel(cbuf, x)
+        val = Array(cbuf)
+        @test val[] ≈ angle(x)
+        @cuda cuda_kernel(cbuf, -x)
+        val = Array(cbuf)
+        @test val[] ≈ angle(-x)
+    end
+
+    # dictionary of key=>tuple, where the tuple should
+    # contain the cpu command and the cuda function to test.
+    ops = Dict("exp"=>(exp, CUDAnative.exp),
+               "angle"=>(angle, CUDAnative.angle),
+               "exp2"=>(exp2, CUDAnative.exp2),
+               "exp10"=>(exp10, CUDAnative.exp10),
+               "expm1"=>(expm1, CUDAnative.expm1))
+
+    @testset "$key" for key=keys(ops)
+        cpu_op, cuda_op = ops[key]
+
+        buf = CuTestArray(zeros(Float32))
+
+        function cuda_kernel(a, x)
+            a[] = cuda_op(x)
+            return
+        end
+
+        #op(::Float32)
+        x   = rand(Float32)
+        @cuda cuda_kernel(buf, x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(x)
+        @cuda cuda_kernel(buf, -x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(-x)
+
+        #op(::Float64)
+        x   = rand(Float64)
+        @cuda cuda_kernel(buf, x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(x)
+        @cuda cuda_kernel(buf, -x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(-x)
+    end
+
+    # dictionary of key=>tuple, where the tuple should
+    # contain the cpu command and the cuda function to test.
+    ops = Dict("exp"=>(exp, CUDAnative.exp),
+               "abs"=>(abs, CUDAnative.abs),
+               "abs2"=>(abs2, CUDAnative.abs2),
+               "angle"=>(angle, CUDAnative.angle),
+               "log"=>(log, CUDAnative.log))
+
+    @testset "Complex - $key" for key=keys(ops)
+        cpu_op, cuda_op = ops[key]
+
+        buf = CuTestArray(zeros(Complex{Float32}))
+
+        function cuda_kernel(a, x)
+            a[] = cuda_op(x)
+            return
+        end
+
+        #op(::ComplexF32, ::ComplexF32)
+        x   = rand(ComplexF32)
+        @cuda cuda_kernel(buf, x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(x)
+        @cuda cuda_kernel(buf, -x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(-x)
+
+        #op(::ComplexF64, ::ComplexF64)
+        x   = rand(ComplexF64)
+        @cuda cuda_kernel(buf, x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(x)
+        @cuda cuda_kernel(buf, -x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(-x)
+    end
+end
 
 
 ############################################################################################