diff --git a/src/AppleAccelerate.jl b/src/AppleAccelerate.jl
index 1681629..b973ec3 100644
--- a/src/AppleAccelerate.jl
+++ b/src/AppleAccelerate.jl
@@ -92,9 +92,9 @@ function __init__()
 end
 
 if Sys.isapple()
+    include("Util.jl")
     include("Array.jl")
     include("DSP.jl")
-    include("Util.jl")
 end
 
 end # module
diff --git a/src/Array.jl b/src/Array.jl
index be0d8b3..37b744f 100644
--- a/src/Array.jl
+++ b/src/Array.jl
@@ -116,6 +116,9 @@ for (T, suff) in ((Float64, ""), (Float32, "f"))
             Base.copy(bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T, N}}}) where {Style, Axes, N} = ($f)(bc.args...)
             Base.copyto!(dest::Array{$T, N}, bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T, N}}}) where {Style, Axes, N} = ($f!)(dest, bc.args...)
         end
+        if T == Float32
+            @eval Base.broadcasted(::typeof($f), arg::Union{Array{F,N},Base.Broadcast.Broadcasted}) where {N,F<:Union{Float32,Float64}} = ($f)(maybecopy(arg))
+        end
     end
     for (f, fa) in (twoarg_funcs...,(:pow,:pow))
         f! = Symbol("$(f)!")
@@ -124,6 +127,9 @@ for (T, suff) in ((Float64, ""), (Float32, "f"))
             Base.copy(bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T, N},Array{$T,N}}}) where {Style, Axes, N} = ($f)(bc.args...)
             Base.copyto!(dest::Array{$T, N}, bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T,N},Array{$T,N}}}) where {Style, Axes, N} = ($f!)(dest, bc.args...)
         end
+        if T == Float32
+            @eval Base.broadcasted(::typeof($f), arg1::Union{Array{F, N},Base.Broadcast.Broadcasted}, arg2::Union{Array{F, N},Base.Broadcast.Broadcasted}) where {N,F<:Union{Float32,Float64}} = ($f)(maybecopy(arg1), maybecopy(arg2))
+        end
     end
 end
 
@@ -131,7 +137,9 @@ end
 for (T, suff) in ((Float32, ""), (Float64, "D"))
 
     for (f, fa) in ((:maximum, :maxv), (:minimum, :minv), (:mean, :meanv),
-                    (:meansqr, :measqv), (:meanmag,  :meamgv), (:sum, :sve))
+                    (:meanmag,  :meamgv), (:meansqr, :measqv), (:meanssqr, :mvessq),
+                    (:sum, :sve), (:summag, :svemg), (:sumsqr, :svesq),
+                    (:sumssqr, :svs))
         @eval begin
             function ($f)(X::Vector{$T})
                 val = Ref{$T}(0.0)
@@ -192,7 +200,133 @@ for (T, suff) in ((Float32, ""), (Float64, "D"))
                 return result
             end
         end
+
+        @eval begin
+            # Broadcasting override such that f.(X) turns into f(X)
+            Base.copy(bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T, N},Array{$T,N}}}) where {Style, Axes, N} = ($f)(bc.args...)
+            Base.copyto!(dest::Array{$T, N}, bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T,N},Array{$T,N}}}) where {Style, Axes, N} = ($f!)(dest, bc.args...)
+            Base.broadcasted(::typeof($f), arg1::Union{Array{$T, N},Base.Broadcast.Broadcasted}, arg2::Union{Array{$T, N},Base.Broadcast.Broadcasted}) where {N} = ($f)(maybecopy(arg1), maybecopy(arg2))
+        end
     end
 end
 
+# Element-wise operations over a vector and a scalar
+for (T, suff) in ((Float32, ""), (Float64, "D"))
+
+    for (f, name) in ((:vsadd, "addition"), (:vsdiv, "division"), (:vsmul, "multiplication"))
+        f! = Symbol("$(f)!")
+
+        @eval begin
+            @doc """
+            `$($f!)(result::Vector{$($T)}, X::Vector{$($T)}, c::$($T))`
+
+            Implements vector-scalar **$($name)** over **Vector{$($T)}** and $($T) and overwrites
+            the result vector with computed value. *Returns:* **Vector{$($T)}** `result`
+            """ ->
+            function ($f!)(result::Vector{$T}, X::Vector{$T}, c::$T)
+                ccall(($(string("vDSP_", f, suff), libacc)),  Cvoid,
+                      (Ptr{$T}, Int64, Ptr{$T}, Ptr{$T}, Int64,  UInt64),
+                      X, 1, Ref(c), result, 1, length(result))
+                return result
+            end
+        end
+
+        @eval begin
+            @doc """
+            `$($f)(X::Vector{$($T)}, c::$($T))`
+
+            Implements vector-scalar **$($name)** over **Vector{$($T)}** and $($T). Allocates
+            memory to store result. *Returns:* **Vector{$($T)}**
+            """ ->
+            function ($f)(X::Vector{$T}, c::$T)
+                result = similar(X)
+                ($f!)(result, X, c)
+                return result
+            end
+        end
+    end
+    f = :vssub
+    f! = Symbol("$(f)!")
+
+    @eval begin
+        @doc """
+        `$($f!)(result::Vector{$($T)}, X::Vector{$($T)}, c::$($T))`
+
+        Implements vector-scalar **subtraction** over **Vector{$($T)}** and $($T) and overwrites
+        the result vector with computed value. *Returns:* **Vector{$($T)}** `result`
+        """ ->
+        function ($f!)(result::Vector{$T}, X::Vector{$T}, c::$T)
+            ccall(($(string("vDSP_vsadd", suff), libacc)),  Cvoid,
+                    (Ptr{$T}, Int64, Ptr{$T}, Ptr{$T}, Int64,  UInt64),
+                    X, 1, Ref(-c), result, 1, length(result))
+            return result
+        end
+    end
+
+    @eval begin
+        @doc """
+        `$($f)(X::Vector{$($T)}, c::$($T))`
+
+        Implements vector-scalar **subtraction** over **Vector{$($T)}** and $($T). Allocates
+        memory to store result. *Returns:* **Vector{$($T)}**
+        """ ->
+        function ($f)(X::Vector{$T}, c::$T)
+            result = similar(X)
+            ($f!)(result, X, c)
+            return result
+        end
+    end
+
+    f = :svsub
+    f! = Symbol("$(f)!")
+
+    @eval begin
+        @doc """
+        `$($f!)(result::Vector{$($T)}, X::Vector{$($T)}, c::$($T))`
+
+        Implements vector-scalar **subtraction** over $($T) and **Vector{$($T)}** and overwrites
+        the result vector with computed value. *Returns:* **Vector{$($T)}** `result`
+        """ ->
+        function ($f!)(result::Vector{$T}, X::Vector{$T}, c::$T)
+            ccall(($(string("vDSP_vsadd", suff), libacc)),  Cvoid,
+                    (Ptr{$T}, Int64, Ptr{$T}, Ptr{$T}, Int64,  UInt64),
+                    -X, 1, Ref(c), result, 1, length(result))
+            return result
+        end
+    end
+
+    @eval begin
+        @doc """
+        `$($f)(X::Vector{$($T), c::$($T)})`
+
+        Implements vector-scalar **subtraction** over $($T) and  **Vector{$($T)}**. Allocates
+        memory to store result. *Returns:* **Vector{$($T)}**
+        """ ->
+        function ($f)(X::Vector{$T}, c::$T)
+            result = similar(X)
+            ($f!)(result, X, c)
+            return result
+        end
+    end
+
+    for f in (:vsadd, :vssub, :vsdiv, :vsmul)
+        f! = Symbol("$(f)!")
+        @eval begin
+            # Broadcasting override such that f.(X) turns into f(X)
+            Base.copy(bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T, N},$T}}) where {Style, Axes, N} = ($f)(bc.args...)
+            Base.copyto!(dest::Array{$T, N}, bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T,N},$T}}) where {Style, Axes, N} = ($f!)(dest, bc.args...)
+            Base.broadcasted(::typeof($f), arg1::Union{Array{$T, N},Base.Broadcast.Broadcasted}, arg2::$T) where {N} = ($f)(maybecopy(arg1), arg2)
+        end
+    end
+
+    f = :svsub
+    f! = Symbol("$(f)!")
+
+    @eval begin
+        # Broadcasting override such that f.(X) turns into f(X)
+        Base.copy(bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T, N}, $T}}) where {Style, Axes, N} = ($f)(bc.args...)
+        Base.copyto!(dest::Array{$T, N}, bc::Base.Broadcast.Broadcasted{Style, Axes, typeof($f), Tuple{Array{$T,N}, $T}}) where {Style, Axes, N} = ($f!)(dest, bc.args...)
+        Base.broadcasted(::typeof($f), arg1::Union{Array{$T, N},Base.Broadcast.Broadcasted}, arg2::$T) where {N} = ($f)(maybecopy(arg1), arg2)
+    end
+end
 
diff --git a/src/Util.jl b/src/Util.jl
index 05afdd1..23d5fc7 100644
--- a/src/Util.jl
+++ b/src/Util.jl
@@ -2,6 +2,13 @@
 
 tupletypelength(a)=length(a.parameters)
 
+@inline maybecopy(x::T) where {T <: Base.Broadcast.Broadcasted} = copy(x)
+@inline maybecopy(x::T) where {T <: Array} = x
+
+const OPS = Dict{Symbol,Tuple{Symbol, Symbol, Symbol}}(:+ => (:vadd, :vsadd, :vsadd),
+:- => (:vsub, :vssub, :svsub),
+:* => (:vadd, :vsmul, :vsmul),
+:/ => (:vadd, :vsdiv, :vsdiv),)
 
 macro replaceBase(fs...)
     b = Expr(:block)
@@ -28,16 +35,29 @@ macro replaceBase(fs...)
             e = quote
                 (Base.$f)(X::Array{T}) where {T <: Union{Float64,Float32}} = ($fa)(X)
                 (Base.$f)(X::Union{Float64,Float32}) = ($fa)([X])[1]
+                Base.broadcasted(::typeof(Base.$f), arg::Union{Array{F,N},Base.Broadcast.Broadcasted}) where {N,F<:Union{Float32,Float64}} = ($fa)(maybecopy(arg))
             end
             arg_consumed = true
         end
-        if fa in (:copysign,:atan,:pow,:rem,:div_float, :vadd, :vsub, :vmul)
+        if fa in (:copysign,:atan,:pow,:rem)
             e = quote
                 (Base.$f)(X::Array{T},Y::Array{T}) where {T <: Union{Float32,Float64}} = ($fa)(X,Y)
                 (Base.$f)(X::T,Y::T) where {T <: Union{Float32,Float64}} = ($fa)([X],[Y])[1]
+                Base.broadcasted(::typeof(Base.$f), arg1::Union{Array{F, N},Base.Broadcast.Broadcasted}, arg2::Union{Array{F, N},Base.Broadcast.Broadcasted}) where {N,F<:Union{Float32,Float64}} = ($fa)(maybecopy(arg1), maybecopy(arg2))
             end
             arg_consumed = true
         end
+        if f in (:+,:-,:*,:/)
+            e = quote
+                (Base.$f)(X::Array{T},Y::Array{T}) where {T <: Union{Float32,Float64}} = ($(OPS[f][1]))(X,Y)
+                (Base.$f)(X::T,Y::T) where {T <: Union{Float32,Float64}} = ($(OPS[f][1]))([X],[Y])[1]
+                Base.broadcasted(::typeof(Base.$f), arg1::Union{Array{F, N},Base.Broadcast.Broadcasted}, arg2::Union{Array{F, N},Base.Broadcast.Broadcasted}) where {N,F<:Union{Float32,Float64}} = ($(OPS[f][1]))(maybecopy(arg1), maybecopy(arg2))
+
+                Base.broadcasted(::typeof(Base.$f), arg1::Union{Array{T, N},Base.Broadcast.Broadcasted}, arg2::T) where {N, T <: Union{Float32,Float64}} = ($(OPS[f][2]))(maybecopy(arg1), arg2)
+                Base.broadcasted(::typeof(Base.$f), arg1::T, arg2::Union{Array{T, N},Base.Broadcast.Broadcasted}) where {N, T <: Union{Float32,Float64}} = ($(OPS[f][3]))(maybecopy(arg2), arg1)
+        end
+            arg_consumed = true
+        end
         if !arg_consumed
             error("Function $f not defined by AppleAccelerate.jl")
         end
diff --git a/test/runtests.jl b/test/runtests.jl
index d0da0a5..b15415e 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -10,14 +10,62 @@ end
 Random.seed!(7)
 N = 1_000
 
+@testset "AppleAccelerate.jl" begin
 for T in (Float32, Float64)
     @testset "Element-wise Operators::$T" begin
         X::Vector{T} = randn(N)
         Y::Vector{T} = randn(N)
+        Z::Vector{T} = similar(X)
+        # Vector-vector
         @test (X .+ Y) ≈ AppleAccelerate.vadd(X, Y)
         @test (X .- Y) ≈ AppleAccelerate.vsub(X, Y)
         @test (X .* Y) ≈ AppleAccelerate.vmul(X, Y)
         @test (X ./ Y) ≈ AppleAccelerate.vdiv(X, Y)
+
+        # Vector-vector non-allocating
+        AppleAccelerate.vadd!(Z, X, Y)
+        @test (X .+ Y) ≈ Z
+        AppleAccelerate.vsub!(Z, X, Y)
+        @test (X .- Y) ≈ Z
+        AppleAccelerate.vmul!(Z, X, Y)
+        @test (X .* Y) ≈ Z
+        AppleAccelerate.vdiv!(Z, X, Y)
+        @test (X ./ Y) ≈ Z
+
+        # Vector-vector broadcasting
+        @test (X .+ Y) ≈ AppleAccelerate.vadd.(X, Y)
+        @test (X .- Y) ≈ AppleAccelerate.vsub.(X, Y)
+        @test (X .* Y) ≈ AppleAccelerate.vmul.(X, Y)
+        @test (X ./ Y) ≈ AppleAccelerate.vdiv.(X, Y)
+
+        #Vector-scalar
+        c::T         = randn()
+        @test (X .+ c) ≈ AppleAccelerate.vsadd.(X, c)
+        @test (X .- c) ≈ AppleAccelerate.vssub.(X, c)
+        @test (c .- X) ≈ AppleAccelerate.svsub.(X, c)
+        @test (X .* c) ≈ AppleAccelerate.vsmul.(X, c)
+        @test (X ./ c) ≈ AppleAccelerate.vsdiv.(X, c)
+
+        #Vector-scalar non-allocating
+        AppleAccelerate.vsadd!(Y, X, c)
+        @test (X .+ c) ≈ Y
+        AppleAccelerate.vssub!(Y, X, c)
+        @test (X .- c) ≈ Y
+        AppleAccelerate.svsub!(Y, X, c)
+        @test (c .- X) ≈ Y
+        AppleAccelerate.vsmul!(Y, X, c)
+        @test (X .* c) ≈ Y
+        AppleAccelerate.vsdiv!(Y, X, c)
+        @test (X ./ c) ≈ Y
+
+        #Vector-scalar broadcasting
+        @test (X .+ c) ≈ AppleAccelerate.vsadd.(X, c)
+        @test (X .- c) ≈ AppleAccelerate.vssub.(X, c)
+        @test (c .- X) ≈ AppleAccelerate.svsub.(X, c)
+        @test (X .* c) ≈ AppleAccelerate.vsmul.(X, c)
+        @test (X ./ c) ≈ AppleAccelerate.vsdiv.(X, c)
+
+        @test (X .+ Y .+ Y) ≈ AppleAccelerate.vadd.(X, Y .+ Y)
     end
 end
 
@@ -198,12 +246,28 @@ for T in (Float32, Float64)
             @test fa(X)[2] ≈ fb(X)[2]
         end
 
+        @testset "Testing meanmag::$T" begin
+            @test AppleAccelerate.meanmag(X) ≈ mean(abs, X)
+        end
+
         @testset "Testing meansqr::$T" begin
-            @test AppleAccelerate.meansqr(X) ≈ mean(X .*X)
+            @test AppleAccelerate.meansqr(X) ≈ mean(X .* X)
         end
 
-        @testset "Testing meanmag::$T" begin
-            @test AppleAccelerate.meanmag(X) ≈ mean(abs.(X))
+        @testset "Testing meanssqr::$T" begin
+            @test AppleAccelerate.meanssqr(X) ≈ mean(X .* abs.(X))
+        end
+
+        @testset "Testing summag::$T" begin
+            @test AppleAccelerate.summag(X) ≈ sum(abs, X)
+        end
+
+        @testset "Testing sumsqr::$T" begin
+            @test AppleAccelerate.sumsqr(X) ≈ sum(abs2, X)
+        end
+
+        @testset "Testing sumssqr::$T" begin
+            @test AppleAccelerate.sumssqr(X) ≈ sum(X .* abs.(X))
         end
 
     end
@@ -270,6 +334,7 @@ Y::Array{T} = abs.(randn(N))
 @test X ./ Y  == AppleAccelerate.div_float(X, Y)
 end
 =#
+end
 
 if AppleAccelerate.get_macos_version() < v"13.3"
     @info("AppleAccelerate.jl needs macOS >= 13.3 for BLAS forwarding. Not testing forwarding capabilities.")
@@ -338,6 +403,6 @@ end
     end
 
     run(`$(Base.julia_cmd()) --project=$(Base.active_project()) $(dir)/runtests.jl LinearAlgebra/blas LinearAlgebra/lapack`)
-end; 
+end;
 end