Skip to content
This repository was archived by the owner on May 27, 2021. It is now read-only.

Commit d98e60e

Browse files
bors[bot]benchislettPhilipVinc
authored
Merge #466
466: Add some more complex operations - Take 2 r=maleadt a=PhilipVinc This is the commit from @benchislett in PR #445, with the addition of the `angle` and `log` operations and several tests for all functions included in the PR. @benchislett If you prefer to take my commits into your PR, please feel free to do it. I'm doing this just because I have some urgency in having those merged. On a side note: I would love to add more functions (notably, `log1p` and `expm1`, `sort`) but I am not sure how to do this. Giving a look at [thrust](https://github.com/thrust/thrust/blob/7df7efe3542a0ab549530bc478467320467e0094/thrust/detail/complex/csqrt.h) they have a bunch of if/else logic like in base Julia. Is this even a good thing to do on the GPU? cc @maleadt Co-authored-by: benchislett <[email protected]> Co-authored-by: Filippo Vicentini <[email protected]>
2 parents a4a56bd + 0a6fe26 commit d98e60e

File tree

2 files changed

+137
-1
lines changed

2 files changed

+137
-1
lines changed

src/device/cuda/math.jl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,14 @@
3232
@inline atan(x::Float64) = @wrap __nv_atan(x::double)::double
3333
@inline atan(x::Float32) = @wrap __nv_atanf(x::float)::float
3434

35+
# ! CUDAnative.atan2 is equivalent to Base.atan
3536
@inline atan2(x::Float64, y::Float64) = @wrap __nv_atan2(x::double, y::double)::double
3637
@inline atan2(x::Float32, y::Float32) = @wrap __nv_atan2f(x::float, y::float)::float
3738

39+
@inline angle(x::ComplexF64) = atan2(x.im, x.re)
40+
@inline angle(x::ComplexF32) = atan2(x.im, x.re)
41+
@inline angle(x::Float64) = signbit(x) * 3.141592653589793
42+
@inline angle(x::Float32) = signbit(x) * 3.1415927f0
3843

3944
## hyperbolic
4045

@@ -66,6 +71,10 @@
6671
@inline log(x::Float32) = @wrap __nv_logf(x::float)::float
6772
@inline log_fast(x::Float32) = @wrap __nv_fast_logf(x::float)::float
6873

74+
@inline log(x::ComplexF64) = log(abs(x)) + im * angle(x)
75+
@inline log(x::ComplexF32) = log(abs(x)) + im * angle(x)
76+
@inline log_fast(x::ComplexF32) = log_fast(abs(x)) + im * angle(x)
77+
6978
@inline log10(x::Float64) = @wrap __nv_log10(x::double)::double
7079
@inline log10(x::Float32) = @wrap __nv_log10f(x::float)::float
7180
@inline log10_fast(x::Float32) = @wrap __nv_fast_log10f(x::float)::float
@@ -103,6 +112,9 @@
103112
@inline ldexp(x::Float64, y::Int32) = @wrap __nv_ldexp(x::double, y::i32)::double
104113
@inline ldexp(x::Float32, y::Int32) = @wrap __nv_ldexpf(x::float, y::i32)::float
105114

115+
@inline exp(x::Complex{Float64}) = exp(x.re) * (cos(x.im) + 1.0im * sin(x.im))
116+
@inline exp(x::Complex{Float32}) = exp(x.re) * (cos(x.im) + 1.0im * sin(x.im))
117+
@inline exp_fast(x::Complex{Float32}) = exp_fast(x.re) * (cos_fast(x.im) + 1.0im * sin_fast(x.im))
106118

107119
## error
108120

@@ -170,6 +182,8 @@
170182
@inline abs(f::Float32) = @wrap __nv_fabsf(f::float)::float
171183
@inline abs(x::Int64) = @wrap __nv_llabs(x::i64)::i64
172184

185+
@inline abs(x::Complex{Float64}) = hypot(x.re, x.im)
186+
@inline abs(x::Complex{Float32}) = hypot(x.re, x.im)
173187

174188
## roots and powers
175189

@@ -192,6 +206,9 @@
192206
@inline pow(x::Float32, y::Int32) = @wrap __nv_powif(x::float, y::i32)::float
193207
@inline pow(x::Union{Float32, Float64}, y::Int64) = pow(x, Int32(y))
194208

209+
@inline abs2(x::Complex{Float64}) = x.re * x.re + x.im * x.im
210+
@inline abs2(x::Complex{Float32}) = x.re * x.re + x.im * x.im
211+
195212
## rounding and selection
196213

197214
# TODO: differentiate in return type, map correctly

test/device/cuda.jl

Lines changed: 120 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,127 @@ end
145145
Tuple{Float64}, optimize=false)[1])
146146
@test !occursin("Float32", codeinfo_str)
147147
end
148-
end
149148

149+
@testset "angle" begin
150+
buf = CuTestArray(zeros(Float32))
151+
cbuf = CuTestArray(zeros(Float32))
152+
153+
function cuda_kernel(a, x)
154+
a[] = CUDAnative.angle(x)
155+
return
156+
end
157+
158+
#op(::Float32)
159+
x = rand(Float32)
160+
@cuda cuda_kernel(buf, x)
161+
val = Array(buf)
162+
@test val[] angle(x)
163+
@cuda cuda_kernel(buf, -x)
164+
val = Array(buf)
165+
@test val[] angle(-x)
166+
167+
#op(::ComplexF32)
168+
x = rand(ComplexF32)
169+
@cuda cuda_kernel(cbuf, x)
170+
val = Array(cbuf)
171+
@test val[] angle(x)
172+
@cuda cuda_kernel(cbuf, -x)
173+
val = Array(cbuf)
174+
@test val[] angle(-x)
175+
176+
#op(::Float64)
177+
x = rand(Float64)
178+
@cuda cuda_kernel(buf, x)
179+
val = Array(buf)
180+
@test val[] angle(x)
181+
@cuda cuda_kernel(buf, -x)
182+
val = Array(buf)
183+
@test val[] angle(-x)
184+
185+
#op(::ComplexF64)
186+
x = rand(ComplexF64)
187+
@cuda cuda_kernel(cbuf, x)
188+
val = Array(cbuf)
189+
@test val[] angle(x)
190+
@cuda cuda_kernel(cbuf, -x)
191+
val = Array(cbuf)
192+
@test val[] angle(-x)
193+
end
194+
195+
# dictionary of key=>tuple, where the tuple should
196+
# contain the cpu command and the cuda function to test.
197+
ops = Dict("exp"=>(exp, CUDAnative.exp),
198+
"angle"=>(angle, CUDAnative.angle),
199+
"exp2"=>(exp2, CUDAnative.exp2),
200+
"exp10"=>(exp10, CUDAnative.exp10),
201+
"expm1"=>(expm1, CUDAnative.expm1))
202+
203+
@testset "$key" for key=keys(ops)
204+
cpu_op, cuda_op = ops[key]
205+
206+
buf = CuTestArray(zeros(Float32))
207+
208+
function cuda_kernel(a, x)
209+
a[] = cuda_op(x)
210+
return
211+
end
212+
213+
#op(::Float32)
214+
x = rand(Float32)
215+
@cuda cuda_kernel(buf, x)
216+
val = Array(buf)
217+
@test val[] cpu_op(x)
218+
@cuda cuda_kernel(buf, -x)
219+
val = Array(buf)
220+
@test val[] cpu_op(-x)
221+
222+
#op(::Float64)
223+
x = rand(Float64)
224+
@cuda cuda_kernel(buf, x)
225+
val = Array(buf)
226+
@test val[] cpu_op(x)
227+
@cuda cuda_kernel(buf, -x)
228+
val = Array(buf)
229+
@test val[] cpu_op(-x)
230+
end
231+
232+
# dictionary of key=>tuple, where the tuple should
233+
# contain the cpu command and the cuda function to test.
234+
ops = Dict("exp"=>(exp, CUDAnative.exp),
235+
"abs"=>(abs, CUDAnative.abs),
236+
"abs2"=>(abs2, CUDAnative.abs2),
237+
"angle"=>(angle, CUDAnative.angle),
238+
"log"=>(log, CUDAnative.log))
239+
240+
@testset "Complex - $key" for key=keys(ops)
241+
cpu_op, cuda_op = ops[key]
242+
243+
buf = CuTestArray(zeros(Complex{Float32}))
244+
245+
function cuda_kernel(a, x)
246+
a[] = cuda_op(x)
247+
return
248+
end
249+
250+
#op(::ComplexF32, ::ComplexF32)
251+
x = rand(ComplexF32)
252+
@cuda cuda_kernel(buf, x)
253+
val = Array(buf)
254+
@test val[] cpu_op(x)
255+
@cuda cuda_kernel(buf, -x)
256+
val = Array(buf)
257+
@test val[] cpu_op(-x)
258+
259+
#op(::ComplexF64, ::ComplexF64)
260+
x = rand(ComplexF64)
261+
@cuda cuda_kernel(buf, x)
262+
val = Array(buf)
263+
@test val[] cpu_op(x)
264+
@cuda cuda_kernel(buf, -x)
265+
val = Array(buf)
266+
@test val[] cpu_op(-x)
267+
end
268+
end
150269

151270

152271
############################################################################################

0 commit comments

Comments
 (0)