diff --git a/src/AppleAccelerate.jl b/src/AppleAccelerate.jl index 1681629..b973ec3 100644 --- a/src/AppleAccelerate.jl +++ b/src/AppleAccelerate.jl @@ -92,9 +92,9 @@ function __init__() end if Sys.isapple() + include("Util.jl") include("Array.jl") include("DSP.jl") - include("Util.jl") end end # module diff --git a/src/Array.jl b/src/Array.jl index be0d8b3..37b744f 100644 --- a/src/Array.jl +++ b/src/Array.jl @@ -116,6 +116,9 @@ for (T, suff) in ((Float64, ""), (Float32, "f")) Base.copy(bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T, N}}}) where {Style, Axes, N} = ($f)(bc.args...) Base.copyto!(dest::Array{$T, N}, bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T, N}}}) where {Style, Axes, N} = ($f!)(dest, bc.args...) end + if T == Float32 + @eval Base.broadcasted(::typeof($f), arg::Union{Array{F,N},Base.Broadcast.Broadcasted}) where {N,F<:Union{Float32,Float64}} = ($f)(maybecopy(arg)) + end end for (f, fa) in (twoarg_funcs...,(:pow,:pow)) f! = Symbol("$(f)!") @@ -124,6 +127,9 @@ for (T, suff) in ((Float64, ""), (Float32, "f")) Base.copy(bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T, N},Array{$T,N}}}) where {Style, Axes, N} = ($f)(bc.args...) Base.copyto!(dest::Array{$T, N}, bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T,N},Array{$T,N}}}) where {Style, Axes, N} = ($f!)(dest, bc.args...) end + if T == Float32 + @eval Base.broadcasted(::typeof($f), arg1::Union{Array{F, N},Base.Broadcast.Broadcasted}, arg2::Union{Array{F, N},Base.Broadcast.Broadcasted}) where {N,F<:Union{Float32,Float64}} = ($f)(maybecopy(arg1), maybecopy(arg2)) + end end end @@ -131,7 +137,9 @@ end for (T, suff) in ((Float32, ""), (Float64, "D")) for (f, fa) in ((:maximum, :maxv), (:minimum, :minv), (:mean, :meanv), - (:meansqr, :measqv), (:meanmag, :meamgv), (:sum, :sve)) + (:meanmag, :meamgv), (:meansqr, :measqv), (:meanssqr, :mvessq), + (:sum, :sve), (:summag, :svemg), (:sumsqr, :svesq), + (:sumssqr, :svs)) @eval begin function ($f)(X::Vector{$T}) val = Ref{$T}(0.0) @@ -192,7 +200,133 @@ for (T, suff) in ((Float32, ""), (Float64, "D")) return result end end + + @eval begin + # Broadcasting override such that f.(X) turns into f(X) + Base.copy(bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T, N},Array{$T,N}}}) where {Style, Axes, N} = ($f)(bc.args...) + Base.copyto!(dest::Array{$T, N}, bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T,N},Array{$T,N}}}) where {Style, Axes, N} = ($f!)(dest, bc.args...) + Base.broadcasted(::typeof($f), arg1::Union{Array{$T, N},Base.Broadcast.Broadcasted}, arg2::Union{Array{$T, N},Base.Broadcast.Broadcasted}) where {N} = ($f)(maybecopy(arg1), maybecopy(arg2)) + end end end +# Element-wise operations over a vector and a scalar +for (T, suff) in ((Float32, ""), (Float64, "D")) + + for (f, name) in ((:vsadd, "addition"), (:vsdiv, "division"), (:vsmul, "multiplication")) + f! = Symbol("$(f)!") + + @eval begin + @doc """ + `$($f!)(result::Vector{$($T)}, X::Vector{$($T)}, c::$($T))` + + Implements vector-scalar **$($name)** over **Vector{$($T)}** and $($T) and overwrites + the result vector with computed value. *Returns:* **Vector{$($T)}** `result` + """ -> + function ($f!)(result::Vector{$T}, X::Vector{$T}, c::$T) + ccall(($(string("vDSP_", f, suff), libacc)), Cvoid, + (Ptr{$T}, Int64, Ptr{$T}, Ptr{$T}, Int64, UInt64), + X, 1, Ref(c), result, 1, length(result)) + return result + end + end + + @eval begin + @doc """ + `$($f)(X::Vector{$($T)}, c::$($T))` + + Implements vector-scalar **$($name)** over **Vector{$($T)}** and $($T). Allocates + memory to store result. *Returns:* **Vector{$($T)}** + """ -> + function ($f)(X::Vector{$T}, c::$T) + result = similar(X) + ($f!)(result, X, c) + return result + end + end + end + f = :vssub + f! = Symbol("$(f)!") + + @eval begin + @doc """ + `$($f!)(result::Vector{$($T)}, X::Vector{$($T)}, c::$($T))` + + Implements vector-scalar **subtraction** over **Vector{$($T)}** and $($T) and overwrites + the result vector with computed value. *Returns:* **Vector{$($T)}** `result` + """ -> + function ($f!)(result::Vector{$T}, X::Vector{$T}, c::$T) + ccall(($(string("vDSP_vsadd", suff), libacc)), Cvoid, + (Ptr{$T}, Int64, Ptr{$T}, Ptr{$T}, Int64, UInt64), + X, 1, Ref(-c), result, 1, length(result)) + return result + end + end + + @eval begin + @doc """ + `$($f)(X::Vector{$($T)}, c::$($T))` + + Implements vector-scalar **subtraction** over **Vector{$($T)}** and $($T). Allocates + memory to store result. *Returns:* **Vector{$($T)}** + """ -> + function ($f)(X::Vector{$T}, c::$T) + result = similar(X) + ($f!)(result, X, c) + return result + end + end + + f = :svsub + f! = Symbol("$(f)!") + + @eval begin + @doc """ + `$($f!)(result::Vector{$($T)}, X::Vector{$($T)}, c::$($T))` + + Implements vector-scalar **subtraction** over $($T) and **Vector{$($T)}** and overwrites + the result vector with computed value. *Returns:* **Vector{$($T)}** `result` + """ -> + function ($f!)(result::Vector{$T}, X::Vector{$T}, c::$T) + ccall(($(string("vDSP_vsadd", suff), libacc)), Cvoid, + (Ptr{$T}, Int64, Ptr{$T}, Ptr{$T}, Int64, UInt64), + -X, 1, Ref(c), result, 1, length(result)) + return result + end + end + + @eval begin + @doc """ + `$($f)(X::Vector{$($T), c::$($T)})` + + Implements vector-scalar **subtraction** over $($T) and **Vector{$($T)}**. Allocates + memory to store result. *Returns:* **Vector{$($T)}** + """ -> + function ($f)(X::Vector{$T}, c::$T) + result = similar(X) + ($f!)(result, X, c) + return result + end + end + + for f in (:vsadd, :vssub, :vsdiv, :vsmul) + f! = Symbol("$(f)!") + @eval begin + # Broadcasting override such that f.(X) turns into f(X) + Base.copy(bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T, N},$T}}) where {Style, Axes, N} = ($f)(bc.args...) + Base.copyto!(dest::Array{$T, N}, bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T,N},$T}}) where {Style, Axes, N} = ($f!)(dest, bc.args...) + Base.broadcasted(::typeof($f), arg1::Union{Array{$T, N},Base.Broadcast.Broadcasted}, arg2::$T) where {N} = ($f)(maybecopy(arg1), arg2) + end + end + + f = :svsub + f! = Symbol("$(f)!") + + @eval begin + # Broadcasting override such that f.(X) turns into f(X) + Base.copy(bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T, N}, $T}}) where {Style, Axes, N} = ($f)(bc.args...) + Base.copyto!(dest::Array{$T, N}, bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T,N}, $T}}) where {Style, Axes, N} = ($f!)(dest, bc.args...) + Base.broadcasted(::typeof($f), arg1::Union{Array{$T, N},Base.Broadcast.Broadcasted}, arg2::$T) where {N} = ($f)(maybecopy(arg1), arg2) + end +end diff --git a/src/Util.jl b/src/Util.jl index 05afdd1..23d5fc7 100644 --- a/src/Util.jl +++ b/src/Util.jl @@ -2,6 +2,13 @@ tupletypelength(a)=length(a.parameters) +@inline maybecopy(x::T) where {T <: Base.Broadcast.Broadcasted} = copy(x) +@inline maybecopy(x::T) where {T <: Array} = x + +const OPS = Dict{Symbol,Tuple{Symbol, Symbol, Symbol}}(:+ => (:vadd, :vsadd, :vsadd), +:- => (:vsub, :vssub, :svsub), +:* => (:vadd, :vsmul, :vsmul), +:/ => (:vadd, :vsdiv, :vsdiv),) macro replaceBase(fs...) b = Expr(:block) @@ -28,16 +35,29 @@ macro replaceBase(fs...) e = quote (Base.$f)(X::Array{T}) where {T <: Union{Float64,Float32}} = ($fa)(X) (Base.$f)(X::Union{Float64,Float32}) = ($fa)([X])[1] + Base.broadcasted(::typeof(Base.$f), arg::Union{Array{F,N},Base.Broadcast.Broadcasted}) where {N,F<:Union{Float32,Float64}} = ($fa)(maybecopy(arg)) end arg_consumed = true end - if fa in (:copysign,:atan,:pow,:rem,:div_float, :vadd, :vsub, :vmul) + if fa in (:copysign,:atan,:pow,:rem) e = quote (Base.$f)(X::Array{T},Y::Array{T}) where {T <: Union{Float32,Float64}} = ($fa)(X,Y) (Base.$f)(X::T,Y::T) where {T <: Union{Float32,Float64}} = ($fa)([X],[Y])[1] + Base.broadcasted(::typeof(Base.$f), arg1::Union{Array{F, N},Base.Broadcast.Broadcasted}, arg2::Union{Array{F, N},Base.Broadcast.Broadcasted}) where {N,F<:Union{Float32,Float64}} = ($fa)(maybecopy(arg1), maybecopy(arg2)) end arg_consumed = true end + if f in (:+,:-,:*,:/) + e = quote + (Base.$f)(X::Array{T},Y::Array{T}) where {T <: Union{Float32,Float64}} = ($(OPS[f][1]))(X,Y) + (Base.$f)(X::T,Y::T) where {T <: Union{Float32,Float64}} = ($(OPS[f][1]))([X],[Y])[1] + Base.broadcasted(::typeof(Base.$f), arg1::Union{Array{F, N},Base.Broadcast.Broadcasted}, arg2::Union{Array{F, N},Base.Broadcast.Broadcasted}) where {N,F<:Union{Float32,Float64}} = ($(OPS[f][1]))(maybecopy(arg1), maybecopy(arg2)) + + Base.broadcasted(::typeof(Base.$f), arg1::Union{Array{T, N},Base.Broadcast.Broadcasted}, arg2::T) where {N, T <: Union{Float32,Float64}} = ($(OPS[f][2]))(maybecopy(arg1), arg2) + Base.broadcasted(::typeof(Base.$f), arg1::T, arg2::Union{Array{T, N},Base.Broadcast.Broadcasted}) where {N, T <: Union{Float32,Float64}} = ($(OPS[f][3]))(maybecopy(arg2), arg1) + end + arg_consumed = true + end if !arg_consumed error("Function $f not defined by AppleAccelerate.jl") end diff --git a/test/runtests.jl b/test/runtests.jl index d0da0a5..b15415e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,14 +10,62 @@ end Random.seed!(7) N = 1_000 +@testset "AppleAccelerate.jl" begin for T in (Float32, Float64) @testset "Element-wise Operators::$T" begin X::Vector{T} = randn(N) Y::Vector{T} = randn(N) + Z::Vector{T} = similar(X) + # Vector-vector @test (X .+ Y) ≈ AppleAccelerate.vadd(X, Y) @test (X .- Y) ≈ AppleAccelerate.vsub(X, Y) @test (X .* Y) ≈ AppleAccelerate.vmul(X, Y) @test (X ./ Y) ≈ AppleAccelerate.vdiv(X, Y) + + # Vector-vector non-allocating + AppleAccelerate.vadd!(Z, X, Y) + @test (X .+ Y) ≈ Z + AppleAccelerate.vsub!(Z, X, Y) + @test (X .- Y) ≈ Z + AppleAccelerate.vmul!(Z, X, Y) + @test (X .* Y) ≈ Z + AppleAccelerate.vdiv!(Z, X, Y) + @test (X ./ Y) ≈ Z + + # Vector-vector broadcasting + @test (X .+ Y) ≈ AppleAccelerate.vadd.(X, Y) + @test (X .- Y) ≈ AppleAccelerate.vsub.(X, Y) + @test (X .* Y) ≈ AppleAccelerate.vmul.(X, Y) + @test (X ./ Y) ≈ AppleAccelerate.vdiv.(X, Y) + + #Vector-scalar + c::T = randn() + @test (X .+ c) ≈ AppleAccelerate.vsadd.(X, c) + @test (X .- c) ≈ AppleAccelerate.vssub.(X, c) + @test (c .- X) ≈ AppleAccelerate.svsub.(X, c) + @test (X .* c) ≈ AppleAccelerate.vsmul.(X, c) + @test (X ./ c) ≈ AppleAccelerate.vsdiv.(X, c) + + #Vector-scalar non-allocating + AppleAccelerate.vsadd!(Y, X, c) + @test (X .+ c) ≈ Y + AppleAccelerate.vssub!(Y, X, c) + @test (X .- c) ≈ Y + AppleAccelerate.svsub!(Y, X, c) + @test (c .- X) ≈ Y + AppleAccelerate.vsmul!(Y, X, c) + @test (X .* c) ≈ Y + AppleAccelerate.vsdiv!(Y, X, c) + @test (X ./ c) ≈ Y + + #Vector-scalar broadcasting + @test (X .+ c) ≈ AppleAccelerate.vsadd.(X, c) + @test (X .- c) ≈ AppleAccelerate.vssub.(X, c) + @test (c .- X) ≈ AppleAccelerate.svsub.(X, c) + @test (X .* c) ≈ AppleAccelerate.vsmul.(X, c) + @test (X ./ c) ≈ AppleAccelerate.vsdiv.(X, c) + + @test (X .+ Y .+ Y) ≈ AppleAccelerate.vadd.(X, Y .+ Y) end end @@ -198,12 +246,28 @@ for T in (Float32, Float64) @test fa(X)[2] ≈ fb(X)[2] end + @testset "Testing meanmag::$T" begin + @test AppleAccelerate.meanmag(X) ≈ mean(abs, X) + end + @testset "Testing meansqr::$T" begin - @test AppleAccelerate.meansqr(X) ≈ mean(X .*X) + @test AppleAccelerate.meansqr(X) ≈ mean(X .* X) end - @testset "Testing meanmag::$T" begin - @test AppleAccelerate.meanmag(X) ≈ mean(abs.(X)) + @testset "Testing meanssqr::$T" begin + @test AppleAccelerate.meanssqr(X) ≈ mean(X .* abs.(X)) + end + + @testset "Testing summag::$T" begin + @test AppleAccelerate.summag(X) ≈ sum(abs, X) + end + + @testset "Testing sumsqr::$T" begin + @test AppleAccelerate.sumsqr(X) ≈ sum(abs2, X) + end + + @testset "Testing sumssqr::$T" begin + @test AppleAccelerate.sumssqr(X) ≈ sum(X .* abs.(X)) end end @@ -270,6 +334,7 @@ Y::Array{T} = abs.(randn(N)) @test X ./ Y == AppleAccelerate.div_float(X, Y) end =# +end if AppleAccelerate.get_macos_version() < v"13.3" @info("AppleAccelerate.jl needs macOS >= 13.3 for BLAS forwarding. Not testing forwarding capabilities.") @@ -338,6 +403,6 @@ end end run(`$(Base.julia_cmd()) --project=$(Base.active_project()) $(dir)/runtests.jl LinearAlgebra/blas LinearAlgebra/lapack`) -end; +end; end