From d0c962cb898b452c840105eafeeb8ea41e0782a4 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Wed, 12 Jun 2024 18:28:19 -0400 Subject: [PATCH 1/8] De-lv ArrayStats.jl --- src/ArrayStats/ArrayStats.jl | 113 ++++++++--------------------------- 1 file changed, 24 insertions(+), 89 deletions(-) diff --git a/src/ArrayStats/ArrayStats.jl b/src/ArrayStats/ArrayStats.jl index f5f8a96..78bd847 100644 --- a/src/ArrayStats/ArrayStats.jl +++ b/src/ArrayStats/ArrayStats.jl @@ -11,16 +11,9 @@ ``` Return the number of elements of `A` that are `NaN`s. """ - function countnans(A::StridedArray{T}) where T<:PrimitiveNumber - n = 0 - @turbo check_empty=true for i ∈ eachindex(A) - n += A[i]!=A[i] - end - return n - end function countnans(A) n = 0 - @inbounds for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) n += A[i]!=A[i] end return n @@ -35,7 +28,7 @@ """ function countnotnans(A) n = 0 - @turbo check_empty=true for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) n += A[i]==A[i] end return n @@ -57,14 +50,8 @@ ``` Fill a Boolean mask of dimensions `size(A)` that is false wherever `A` is `NaN` """ - function nanmask!(mask::StridedArray, A::StridedArray{T}) where T<:PrimitiveNumber - @turbo for i ∈ eachindex(A) - mask[i] = A[i]==A[i] - end - return mask - end function nanmask!(mask, A) - @inbounds for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) mask[i] = A[i]==A[i] end return mask @@ -81,21 +68,11 @@ ``` Replace all `NaN`s in A with zeros of the same type """ - function zeronan!(A::StridedArray{T}) where T<:PrimitiveNumber - ∅ = zero(T) - @turbo for i ∈ eachindex(A) - Aᵢ = A[i] - A[i] = ifelse(Aᵢ==Aᵢ, Aᵢ, ∅) - end - return A - end function zeronan!(A::AbstractArray{T}) where T ∅ = zero(T) - @inbounds for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) Aᵢ = A[i] - if isnan(Aᵢ) - A[i] = ∅ - end + A[i] = ifelse(Aᵢ==Aᵢ, Aᵢ, ∅) end return A end @@ -153,9 +130,8 @@ function nanadd(A::AbstractArray, B::AbstractArray) result_type = promote_type(eltype(A), eltype(B)) result = similar(A, result_type) - @inbounds @simd for i ∈ eachindex(A) - Aᵢ = A[i] - Bᵢ = B[i] + @inbounds @simd ivdep for i ∈ eachindex(A) + Aᵢ, Bᵢ = A[i], B[i] result[i] = (Aᵢ * (Aᵢ==Aᵢ)) + (Bᵢ * (Bᵢ==Bᵢ)) end return result @@ -170,8 +146,7 @@ """ function nanadd!(A::Array, B::AbstractArray) @inbounds @simd for i ∈ eachindex(A) - Aᵢ = A[i] - Bᵢ = B[i] + Aᵢ, Bᵢ = A[i], B[i] A[i] = (Aᵢ * (Aᵢ==Aᵢ)) + (Bᵢ * (Bᵢ==Bᵢ)) end return A @@ -198,7 +173,6 @@ __nanminimum(A, ::Colon, region) = reducedims(_nanminimum(A, region), region) _nanminimum(A, region) = reduce(nanmin, A, dims=region, init=float(eltype(A))(NaN)) _nanminimum(A, ::Colon) = reduce(nanmin, A, init=float(eltype(A))(NaN)) - _nanminimum(A::Array{<:Number}, ::Colon) = vreduce(nanmin, A) export nanminimum @@ -219,7 +193,6 @@ __nanmaximum(A, ::Colon, region) = reducedims(_nanmaximum(A, region), region) _nanmaximum(A, region) = reduce(nanmax, A, dims=region, init=float(eltype(A))(NaN)) _nanmaximum(A, ::Colon) = reduce(nanmax, A, init=float(eltype(A))(NaN)) - _nanmaximum(A::Array{<:Number}, ::Colon) = vreduce(nanmax, A) export nanmaximum """ @@ -312,41 +285,25 @@ mask = nanmask(A) return sum(A.*W.*mask, dims=region) ./ sum(W.*mask, dims=region) end - # Fallback method for non-StridedArrays - function _nanmean(A, W, ::Colon) - n = zero(eltype(W)) - m = zero(promote_type(eltype(W), eltype(A))) - @inbounds @simd for i ∈ eachindex(A) - Aᵢ = A[i] - Wᵢ = W[i] - t = Aᵢ == Aᵢ - n += Wᵢ * t - m += Wᵢ * Aᵢ * t - end - return m / n - end # Can't have NaNs if array is all Integers - function _nanmean(A::StridedArray{<:Integer}, W, ::Colon) + function _nanmean(A::AbstractArray{<:Integer}, W, ::Colon) n = zero(eltype(W)) m = zero(promote_type(eltype(W), eltype(A))) - @turbo for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) Wᵢ = W[i] n += Wᵢ m += Wᵢ * A[i] end return m / n end - function _nanmean(A::StridedArray, W, ::Colon) - T1 = eltype(W) - T2 = promote_type(eltype(W), eltype(A)) - n = zero(T1) - m = zero(T2) - @turbo for i ∈ eachindex(A) - Aᵢ = A[i] - Wᵢ = W[i] + function _nanmean(A, W, ::Colon) + n = ∅ₙ = zero(eltype(W)) + m = ∅ₘ = zero(promote_type(eltype(W), eltype(A))) + @inbounds @simd ivdep for i ∈ eachindex(A) + Aᵢ, Wᵢ = A[i], W[i] t = Aᵢ==Aᵢ - n += ifelse(t, Wᵢ, zero(T1)) - m += ifelse(t, Wᵢ * Aᵢ, zero(T2)) + n += ifelse(t, Wᵢ, ∅ₙ) + m += ifelse(t, Wᵢ * Aᵢ, ∅ₘ) end return m / n end @@ -369,28 +326,29 @@ __nanstd(A, W, region, ::Colon) = _nanstd(A, W, region) __nanstd(A, W, ::Colon, region) = reducedims(_nanstd(A, W, region), region) function _nanstd(A, W, region) + @assert eachindex(A) == eachindex(W) mask = nanmask(A) n = sum(mask, dims=region) w = sum(W.*mask, dims=region) s = sum(A.*W.*mask, dims=region) ./ w d = A .- s # Subtract mean, using broadcasting - @turbo for i ∈ eachindex(d) + @inbounds @simd ivdep for i ∈ eachindex(d) dᵢ = d[i] d[i] = (dᵢ * dᵢ * W[i]) * mask[i] end s .= sum(d, dims=region) - @turbo for i ∈ eachindex(s) + @inbounds @simd ivdep for i ∈ eachindex(s) s[i] = sqrt((s[i] * n[i]) / (w[i] * (n[i] - 1))) end return s end function _nanstd(A, W, ::Colon) + @assert eachindex(A) == eachindex(W) n = 0 w = zero(eltype(W)) m = zero(promote_type(eltype(W), eltype(A))) - @inbounds @simd for i ∈ eachindex(A) - Aᵢ = A[i] - Wᵢ = W[i] + @inbounds @simd ivdep for i ∈ eachindex(A) + Aᵢ, Wᵢ = A[i], W[i] t = Aᵢ == Aᵢ n += t w += Wᵢ * t @@ -398,36 +356,13 @@ end mu = m / w s = zero(typeof(mu)) - @inbounds @simd for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) Aᵢ = A[i] d = Aᵢ - mu s += (d * d * W[i]) * (Aᵢ == Aᵢ) # Zero if Aᵢ is NaN end return sqrt(s / w * n / (n-1)) end - function _nanstd(A::StridedArray{Ta}, W::StridedArray{Tw}, ::Colon) where {Ta<:PrimitiveNumber, Tw<:PrimitiveNumber} - n = 0 - Tm = promote_type(Tw, Ta) - w = zero(Tw) - m = zero(Tm) - @turbo for i ∈ eachindex(A) - Aᵢ = A[i] - Wᵢ = W[i] - t = Aᵢ==Aᵢ - n += t - w += ifelse(t, Wᵢ, zero(Tw)) - m += ifelse(t, Wᵢ * Aᵢ, zero(Tm)) - end - mu = m / w - Tmu = typeof(mu) - s = zero(Tmu) - @turbo for i ∈ eachindex(A) - Aᵢ = A[i] - d = Aᵢ - mu - s += ifelse(Aᵢ==Aᵢ, d * d * W[i], zero(Tmu)) - end - return sqrt(s / w * n / (n-1)) - end export nanstd From ea59d266eb89888da8630c365ec01eb3566ad496 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Wed, 12 Jun 2024 18:59:08 -0400 Subject: [PATCH 2/8] Improve dimensionality-checking in ArrayStats functions --- src/ArrayStats/ArrayStats.jl | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/ArrayStats/ArrayStats.jl b/src/ArrayStats/ArrayStats.jl index 78bd847..dc7d832 100644 --- a/src/ArrayStats/ArrayStats.jl +++ b/src/ArrayStats/ArrayStats.jl @@ -130,7 +130,7 @@ function nanadd(A::AbstractArray, B::AbstractArray) result_type = promote_type(eltype(A), eltype(B)) result = similar(A, result_type) - @inbounds @simd ivdep for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A,B) Aᵢ, Bᵢ = A[i], B[i] result[i] = (Aᵢ * (Aᵢ==Aᵢ)) + (Bᵢ * (Bᵢ==Bᵢ)) end @@ -145,7 +145,7 @@ Add the non-NaN elements of `B` to `A`, treating `NaN`s as zeros """ function nanadd!(A::Array, B::AbstractArray) - @inbounds @simd for i ∈ eachindex(A) + @inbounds @simd for i ∈ eachindex(A,B) Aᵢ, Bᵢ = A[i], B[i] A[i] = (Aᵢ * (Aᵢ==Aᵢ)) + (Bᵢ * (Bᵢ==Bᵢ)) end @@ -289,7 +289,7 @@ function _nanmean(A::AbstractArray{<:Integer}, W, ::Colon) n = zero(eltype(W)) m = zero(promote_type(eltype(W), eltype(A))) - @inbounds @simd ivdep for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A,W) Wᵢ = W[i] n += Wᵢ m += Wᵢ * A[i] @@ -299,7 +299,7 @@ function _nanmean(A, W, ::Colon) n = ∅ₙ = zero(eltype(W)) m = ∅ₘ = zero(promote_type(eltype(W), eltype(A))) - @inbounds @simd ivdep for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A,W) Aᵢ, Wᵢ = A[i], W[i] t = Aᵢ==Aᵢ n += ifelse(t, Wᵢ, ∅ₙ) @@ -326,18 +326,17 @@ __nanstd(A, W, region, ::Colon) = _nanstd(A, W, region) __nanstd(A, W, ::Colon, region) = reducedims(_nanstd(A, W, region), region) function _nanstd(A, W, region) - @assert eachindex(A) == eachindex(W) mask = nanmask(A) n = sum(mask, dims=region) w = sum(W.*mask, dims=region) s = sum(A.*W.*mask, dims=region) ./ w d = A .- s # Subtract mean, using broadcasting - @inbounds @simd ivdep for i ∈ eachindex(d) + @inbounds @simd ivdep for i ∈ eachindex(d, W) dᵢ = d[i] d[i] = (dᵢ * dᵢ * W[i]) * mask[i] end s .= sum(d, dims=region) - @inbounds @simd ivdep for i ∈ eachindex(s) + @inbounds @simd ivdep for i ∈ eachindex(s,n,w) s[i] = sqrt((s[i] * n[i]) / (w[i] * (n[i] - 1))) end return s @@ -347,7 +346,7 @@ n = 0 w = zero(eltype(W)) m = zero(promote_type(eltype(W), eltype(A))) - @inbounds @simd ivdep for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A,W) Aᵢ, Wᵢ = A[i], W[i] t = Aᵢ == Aᵢ n += t @@ -356,7 +355,7 @@ end mu = m / w s = zero(typeof(mu)) - @inbounds @simd ivdep for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A,W) Aᵢ = A[i] d = Aᵢ - mu s += (d * d * W[i]) * (Aᵢ == Aᵢ) # Zero if Aᵢ is NaN From 8ac5d8d6c79e6526b5b4c92212562a4fcec57819 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Wed, 12 Jun 2024 20:45:55 -0400 Subject: [PATCH 3/8] De-lv other summary statistics --- src/ArrayStats/nancov.jl | 50 ++------- src/ArrayStats/nanmean.jl | 134 +++--------------------- src/ArrayStats/nansem.jl | 116 ++++++--------------- src/ArrayStats/nanstd.jl | 8 +- src/ArrayStats/nansum.jl | 130 ++---------------------- src/ArrayStats/nanvar.jl | 207 +++++--------------------------------- 6 files changed, 89 insertions(+), 556 deletions(-) diff --git a/src/ArrayStats/nancov.jl b/src/ArrayStats/nancov.jl index 6f09d95..c668949 100644 --- a/src/ArrayStats/nancov.jl +++ b/src/ArrayStats/nancov.jl @@ -2,7 +2,7 @@ function _nancov(x::AbstractVector, y::AbstractVector, corrected::Bool, μᵪ::N # Calculate covariance σᵪᵧ = ∅ = zero(promote_type(typeof(μᵪ), typeof(μᵧ), Int)) n = 0 - @turbo check_empty=true for i ∈ eachindex(x,y) + @inbounds @simd ivdep for i ∈ eachindex(x,y) δᵪ = x[i] - μᵪ δᵧ = y[i] - μᵧ δ² = δᵪ * δᵧ @@ -19,7 +19,7 @@ function _nancov(x::AbstractVector, y::AbstractVector, corrected::Bool) n = 0 Σᵪ = ∅ᵪ = zero(eltype(x)) Σᵧ = ∅ᵧ = zero(eltype(y)) - @turbo check_empty=true for i ∈ eachindex(x,y) + @inbounds @simd ivdep for i ∈ eachindex(x,y) xᵢ, yᵢ = x[i], y[i] notnan = (xᵢ==xᵢ) & (yᵢ==yᵢ) n += notnan @@ -103,49 +103,13 @@ function nancor(x::AbstractVector, y::AbstractVector; corrected::Bool=true) @assert eachindex(x) == eachindex(y) return _nancor(x, y, corrected) end - # Pair-wise nan-covariance -function _nancor(x::StridedVector{T}, y::StridedVector{T}, corrected::Bool) where T<:PrimitiveNumber +function _nancor(x::AbstractVector{Tx}, y::AbstractVector{Ty}, corrected::Bool) where {Tx, Ty} # Parwise nan-means n = 0 - Σᵪ = ∅ᵪ = zero(T) - Σᵧ = ∅ᵧ = zero(T) - @turbo check_empty=true for i ∈ eachindex(x,y) - xᵢ, yᵢ = x[i], y[i] - notnan = (xᵢ==xᵢ) & (yᵢ==yᵢ) - n += notnan - Σᵪ += ifelse(notnan, xᵢ, ∅ᵪ) - Σᵧ += ifelse(notnan, yᵢ, ∅ᵧ) - end - μᵪ = Σᵪ/n - μᵧ = Σᵧ/n - n == 0 && return (∅ᵪ+∅ᵧ)/0 # Return appropriate NaN if no data - - # Pairwise nan-variances - σ²ᵪ = ∅ᵪ = zero(typeof(μᵪ)) - σ²ᵧ = ∅ᵧ = zero(typeof(μᵧ)) - @turbo check_empty=true for i ∈ eachindex(x,y) - δᵪ = x[i] - μᵪ - δᵧ = y[i] - μᵧ - notnan = (δᵪ==δᵪ) & (δᵧ==δᵧ) - σ²ᵪ += ifelse(notnan, δᵪ * δᵪ, ∅ᵪ) - σ²ᵧ += ifelse(notnan, δᵧ * δᵧ, ∅ᵧ) - end - σᵪ = sqrt(σ²ᵪ / max(n-corrected, 0)) - σᵧ = sqrt(σ²ᵧ / max(n-corrected, 0)) - - # Covariance and correlation - σᵪᵧ = _nancov(x, y, corrected, μᵪ, μᵧ) - ρᵪᵧ = σᵪᵧ / (σᵪ * σᵧ) - - return ρᵪᵧ -end -function _nancor(x::AbstractVector, y::AbstractVector, corrected::Bool) - # Parwise nan-means - n = 0 - Σᵪ = ∅ᵪ = zero(eltype(x)) - Σᵧ = ∅ᵧ = zero(eltype(y)) - @inbounds for i ∈ eachindex(x,y) + Σᵪ = ∅ᵪ = zero(Tx) + Σᵧ = ∅ᵧ = zero(Ty) + @inbounds @simd ivdep for i ∈ eachindex(x,y) xᵢ, yᵢ = x[i], y[i] notnan = (xᵢ==xᵢ) & (yᵢ==yᵢ) n += notnan @@ -159,7 +123,7 @@ function _nancor(x::AbstractVector, y::AbstractVector, corrected::Bool) # Pairwise nan-variances σ²ᵪ = ∅ᵪ = zero(typeof(μᵪ)) σ²ᵧ = ∅ᵧ = zero(typeof(μᵧ)) - @inbounds for i ∈ eachindex(x,y) + @inbounds @simd ivdep for i ∈ eachindex(x,y) δᵪ = x[i] - μᵪ δᵧ = y[i] - μᵧ notnan = (δᵪ==δᵪ) & (δᵧ==δᵧ) diff --git a/src/ArrayStats/nanmean.jl b/src/ArrayStats/nanmean.jl index 4ec214e..48238fe 100644 --- a/src/ArrayStats/nanmean.jl +++ b/src/ArrayStats/nanmean.jl @@ -50,11 +50,11 @@ function _nanmean(A::AbstractArray{T,N}, dims::Tuple) where {T,N} end # Reduce all the dims! -function _nanmean(A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat - Tₒ = Base.promote_op(/, T, Int) +function _nanmean(A, ::Colon) + Tₒ = Base.promote_op(/, eltype(A), Int) n = 0 Σ = ∅ = zero(Tₒ) - @turbo check_empty=true for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) Aᵢ = A[i] notnan = Aᵢ==Aᵢ n += notnan @@ -62,33 +62,20 @@ function _nanmean(A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat end return Σ / n end -function _nanmean(A::StridedArray{T}, ::Colon) where T<:PrimitiveInteger +function _nanmean(A::AbstractArray{T}, ::Colon) where T<:Integer Tₒ = Base.promote_op(/, T, Int) Σ = zero(Tₒ) - @turbo check_empty=true for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) Σ += A[i] end return Σ / length(A) end -# Fallback method for non-StridedArrays -function _nanmean(A, ::Colon) - Tₒ = Base.promote_op(/, eltype(A), Int) - n = 0 - Σ = ∅ = zero(Tₒ) - @inbounds for i ∈ eachindex(A) - Aᵢ = A[i] - notnan = Aᵢ==Aᵢ - n += notnan - Σ += ifelse(notnan, A[i], ∅) - end - return Σ / n -end # Metaprogramming magic adapted from Chris Elrod example: # Generate customized set of loops for a given ndims and a vector # `static_dims` of dimensions to reduce over -function staticdim_nanmean_quote_turbo(static_dims::Vector{Int}, N::Int) +function staticdim_nanmean_quote(static_dims::Vector{Int}, N::Int) M = length(static_dims) # `static_dims` now contains every dim we're taking the mean over. Bᵥ = Expr(:call, :view, :B) @@ -129,111 +116,11 @@ function staticdim_nanmean_quote_turbo(static_dims::Vector{Int}, N::Int) # Build the reduction loop for n ∈ reduct_inds newblock = Expr(:block) - push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock)) - block = newblock - end - # Push more things here if you want them in the innermost loop - push!(block.args, :(Aᵢ = $Aind)) - push!(block.args, :(notnan = Aᵢ==Aᵢ)) - push!(block.args, :(n += notnan)) - push!(block.args, :(Σ += ifelse(notnan, Aᵢ, ∅))) - # Push more things here if you want them at the end of the reduction loop - push!(rblock.args, :($Bind = Σ * inv(n))) - # Put it all together - quote - ∅ = zero(eltype(B)) - Bᵥ = $Bᵥ - @turbo $loops - return B - end -end - -# Chris Elrod metaprogramming magic: -# Turn non-static integers in `dims` tuple into `StaticInt`s -# so we can construct `static_dims` vector within @generated code -function branches_nanmean_quote_turbo(N::Int, M::Int, D) - static_dims = Int[] - for m ∈ 1:M - param = D.parameters[m] - if param <: StaticInt - new_dim = _dim(param)::Int - @assert new_dim ∉ static_dims - push!(static_dims, new_dim) + if n==last(reduct_inds) + push!(block.args, Expr(:macrocall, Symbol("@simd"), :ivdep, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))) else - t = Expr(:tuple) - for n ∈ static_dims - push!(t.args, :(StaticInt{$n}())) - end - q = Expr(:block, :(dimm = dims[$m])) - qold = q - ifsym = :if - for n ∈ 1:N - n ∈ static_dims && continue - tc = copy(t) - push!(tc.args, :(StaticInt{$n}())) - qnew = Expr(ifsym, :(dimm == $n), :(return _nanmean!(B, A, $tc))) - for r ∈ m+1:M - push!(tc.args, :(dims[$r])) - end - push!(qold.args, qnew) - qold = qnew - ifsym = :elseif - end - push!(qold.args, Expr(:block, :(throw("Dimension `$dimm` not found.")))) - return q - end - end - staticdim_nanmean_quote_turbo(static_dims, N) -end - -# Efficient @generated in-place mean -@generated function _nanmean!(B::StridedArray{Tₒ,N}, A::StridedArray{T,N}, dims::D) where {Tₒ<:PrimitiveNumber,T,N,M,D<:Tuple{Vararg{IntOrStaticInt,M}}} - N == M && return :(B[1] = _nanmean(A, :); B) - branches_nanmean_quote_turbo(N, M, D) -end - -function staticdim_nanmean_quote(static_dims::Vector{Int}, N::Int) - M = length(static_dims) - # `static_dims` now contains every dim we're taking the mean over. - Bᵥ = Expr(:call, :view, :B) - reduct_inds = Int[] - nonreduct_inds = Int[] - # Firstly, build our expressions for indexing each array - Aind = :(A[]) - Bind = :(Bᵥ[]) - inds = Vector{Symbol}(undef, N) - for n ∈ 1:N - ind = Symbol(:i_,n) - inds[n] = ind - push!(Aind.args, ind) - if n ∈ static_dims - push!(reduct_inds, n) - push!(Bᵥ.args, :(firstindex(B,$n))) - else - push!(nonreduct_inds, n) - push!(Bᵥ.args, :) - push!(Bind.args, ind) - end - end - firstn = first(nonreduct_inds) - # Secondly, build up our set of loops - block = Expr(:block) - loops = Expr(:for, :($(inds[firstn]) = indices((A,B),$firstn)), block) - if length(nonreduct_inds) > 1 - for n ∈ @view(nonreduct_inds[2:end]) - newblock = Expr(:block) - push!(block.args, Expr(:for, :($(inds[n]) = indices((A,B),$n)), newblock)) - block = newblock + push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock)) end - end - rblock = block - # Push more things here if you want them at the beginning of the reduction loop - push!(rblock.args, :(n = 0)) - push!(rblock.args, :(Σ = ∅)) - # Build the reduction loop - for n ∈ reduct_inds - newblock = Expr(:block) - push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock)) block = newblock end # Push more things here if you want them in the innermost loop @@ -252,6 +139,8 @@ function staticdim_nanmean_quote(static_dims::Vector{Int}, N::Int) end end +# Turn non-static integers in `dims` tuple into `StaticInt`s +# so we can construct `static_dims` vector within @generated code function branches_nanmean_quote(N::Int, M::Int, D) static_dims = Int[] for m ∈ 1:M @@ -287,6 +176,7 @@ function branches_nanmean_quote(N::Int, M::Int, D) staticdim_nanmean_quote(static_dims, N) end +# Efficient @generated in-place mean @generated function _nanmean!(B::AbstractArray{Tₒ,N}, A::AbstractArray{T,N}, dims::D) where {Tₒ,T,N,M,D<:Tuple{Vararg{IntOrStaticInt,M}}} N == M && return :(B[1] = _nanmean(A, :); B) branches_nanmean_quote(N, M, D) diff --git a/src/ArrayStats/nansem.jl b/src/ArrayStats/nansem.jl index c0d86af..64b51a8 100644 --- a/src/ArrayStats/nansem.jl +++ b/src/ArrayStats/nansem.jl @@ -45,60 +45,40 @@ _nansem(μ, corrected::Bool, A, dims::Int) = _nansem(μ, corrected, A, (dims,)) # If the mean isn't known, compute it _nansem(::Nothing, corrected::Bool, A, dims::Tuple) = _nansem!(_nanmean(A, dims), corrected, A, dims) # Reduce all the dims! -function _nansem(::Nothing, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat - Tₒ = Base.promote_op(/, T, Int) - n = 0 - Σ = ∅ = zero(Tₒ) - @turbo check_empty=true for i ∈ eachindex(A) - Aᵢ = A[i] - notnan = Aᵢ==Aᵢ - n += notnan - Σ += ifelse(notnan, Aᵢ, ∅) - end - μ = Σ / n - σ² = ∅ = zero(typeof(μ)) - @turbo check_empty=true for i ∈ eachindex(A) - δ = A[i] - μ - notnan = δ==δ - σ² += ifelse(notnan, δ * δ, ∅) - end - return sqrt(σ² / max(n-corrected,0) / n) +function _nansem(::Nothing, corrected::Bool, A, ::Colon) + Tₒ = Base.promote_op(/, eltype(A), Int) + n = 0 + Σ = ∅ = zero(Tₒ) + @inbounds @simd ivdep for i ∈ eachindex(A) + Aᵢ = A[i] + notnan = Aᵢ==Aᵢ + n += notnan + Σ += ifelse(notnan, Aᵢ, ∅) + end + μ = Σ / n + σ² = ∅ = zero(typeof(μ)) + @inbounds @simd ivdep for i ∈ eachindex(A) + δ = A[i] - μ + notnan = δ==δ + σ² += ifelse(notnan, δ * δ, ∅) + end + return sqrt(σ² / max(n-corrected,0) / n) end -function _nansem(::Nothing, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveInteger +function _nansem(::Nothing, corrected::Bool, A::AbstractArray{T}, ::Colon) where T<:Integer Tₒ = Base.promote_op(/, T, Int) n = length(A) Σ = zero(Tₒ) - @turbo check_empty=true for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) Σ += A[i] end μ = Σ / n σ² = zero(typeof(μ)) - @turbo check_empty=true for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) δ = A[i] - μ σ² += δ * δ end return sqrt(σ² / max(n-corrected,0) / n) end -# Fallback method for non-StridedArrays -function _nansem(::Nothing, corrected::Bool, A, ::Colon) - Tₒ = Base.promote_op(/, eltype(A), Int) - n = 0 - Σ = ∅ = zero(Tₒ) - @inbounds for i ∈ eachindex(A) - Aᵢ = A[i] - notnan = Aᵢ==Aᵢ - n += notnan - Σ += ifelse(notnan, Aᵢ, ∅) - end - μ = Σ / n - σ² = ∅ = zero(typeof(μ)) - @inbounds for i ∈ eachindex(A) - δ = A[i] - μ - notnan = δ==δ - σ² += ifelse(notnan, δ * δ, ∅) - end - return sqrt(σ² / max(n-corrected,0) / n) -end # If the mean is known, pass it on in the appropriate form @@ -106,21 +86,21 @@ _nansem(μ, corrected::Bool, A, dims::Tuple) = _nansem!(collect(μ), corrected, _nansem(μ::Array, corrected::Bool, A, dims::Tuple) = _nansem!(copy(μ), corrected, A, dims) _nansem(μ::Number, corrected::Bool, A, dims::Tuple) = _nansem!([μ], corrected, A, dims) # Reduce all the dims! -function _nansem(μ::Number, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat - n = 0 - σ² = ∅ = zero(typeof(μ)) - @turbo check_empty=true for i ∈ eachindex(A) - δ = A[i] - μ - notnan = δ==δ - n += notnan - σ² += ifelse(notnan, δ * δ, ∅) - end - return sqrt(σ² / max(n-corrected, 0) / n) +function _nansem(μ::Number, corrected::Bool, A, ::Colon) + n = 0 + σ² = ∅ = zero(typeof(μ)) + @inbounds @simd ivdep for i ∈ eachindex(A) + δ = A[i] - μ + notnan = δ==δ + n += notnan + σ² += ifelse(notnan, δ * δ, ∅) + end + return sqrt(σ² / max(n-corrected, 0) / n) end -function _nansem(μ::Number, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveInteger +function _nansem(μ::Number, corrected::Bool, A::AbstractArray{T}, ::Colon) where T<:Integer σ² = zero(typeof(μ)) if μ==μ - @turbo check_empty=true for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) δ = A[i] - μ σ² += δ * δ end @@ -130,36 +110,6 @@ function _nansem(μ::Number, corrected::Bool, A::StridedArray{T}, ::Colon) where end return sqrt(σ² / max(n-corrected, 0) / n) end -# Fallback method for non-strided-arrays -function _nansem(μ::Number, corrected::Bool, A, ::Colon) - n = 0 - σ² = ∅ = zero(typeof(μ)) - @inbounds for i ∈ eachindex(A) - δ = A[i] - μ - notnan = δ==δ - n += notnan - σ² += ifelse(notnan, δ * δ, ∅) - end - return sqrt(σ² / max(n-corrected, 0) / n) -end - -# # Fallback method for overly-complex reductions -# function _nansem_fallback!(B::AbstractArray, corrected::Bool, A::AbstractArray,region) -# mask = nanmask(A) -# N = sum(mask, dims=region) -# Σ = sum(A.*mask, dims=region)./N -# δ = A .- Σ # Subtract mean, using broadcasting -# @turbo check_empty=true for i ∈ eachindex(δ) -# δᵢ = δ[i] -# δ[i] = ifelse(mask[i], δᵢ * δᵢ, 0) -# end -# B .= sum(δ, dims=region) -# @turbo check_empty=true for i ∈ eachindex(B) -# B[i] = B[i] / max(N[i] - corrected, 0) -# end -# return B -# end - function staticdim_nansem_quote(static_dims::Vector{Int}, N::Int) diff --git a/src/ArrayStats/nanstd.jl b/src/ArrayStats/nanstd.jl index 147ef67..6b75e8a 100644 --- a/src/ArrayStats/nanstd.jl +++ b/src/ArrayStats/nanstd.jl @@ -36,14 +36,8 @@ nanstd(A; dims=:, dim=:, mean=nothing, corrected=true) = sqrt!(__nanvar(mean, co export nanstd sqrt!(x::Number) = sqrt(x) -function sqrt!(A::StridedArray{<:PrimitiveNumber}) - @turbo check_empty=true for i ∈ eachindex(A) - A[i] = sqrt(A[i]) - end - return A -end function sqrt!(A::AbstractArray) - @inbounds for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) A[i] = sqrt(A[i]) end return A diff --git a/src/ArrayStats/nansum.jl b/src/ArrayStats/nansum.jl index 022e430..0e8e4a7 100644 --- a/src/ArrayStats/nansum.jl +++ b/src/ArrayStats/nansum.jl @@ -50,41 +50,30 @@ function _nansum(A::AbstractArray{T,N}, dims::Tuple) where {T,N} end # Reduce all the dims! -function _nansum(A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat - Tₒ = Base.promote_op(+, T, Int) +function _nansum(A, ::Colon) + Tₒ = Base.promote_op(+, eltype(A), Int) Σ = ∅ = zero(Tₒ) - @turbo check_empty=true for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) Aᵢ = A[i] notnan = Aᵢ==Aᵢ Σ += ifelse(notnan, Aᵢ, ∅) end return Σ end -function _nansum(A::StridedArray{T}, ::Colon) where T<:PrimitiveInteger +function _nansum(A::AbstractArray{T}, ::Colon) where T<:Integer Tₒ = Base.promote_op(+, T, Int) Σ = zero(Tₒ) - @turbo check_empty=true for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) Σ += A[i] end return Σ end -# Fallback method for non-StridedArrays -function _nansum(A, ::Colon) - Tₒ = Base.promote_op(+, eltype(A), Int) - Σ = ∅ = zero(Tₒ) - @inbounds for i ∈ eachindex(A) - Aᵢ = A[i] - notnan = Aᵢ==Aᵢ - Σ += ifelse(notnan, Aᵢ, ∅) - end - return Σ -end # Metaprogramming magic adapted from Chris Elrod example: # Generate customized set of loops for a given ndims and a vector # `static_dims` of dimensions to reduce over -function staticdim_nansum_quote_turbo(static_dims::Vector{Int}, N::Int) +function staticdim_nansum_quote(static_dims::Vector{Int}, N::Int) M = length(static_dims) # `static_dims` now contains every dim we're taking the sum over. Bᵥ = Expr(:call, :view, :B) @@ -124,110 +113,11 @@ function staticdim_nansum_quote_turbo(static_dims::Vector{Int}, N::Int) # Build the reduction loop for n ∈ reduct_inds newblock = Expr(:block) - push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock)) - block = newblock - end - # Push more things here if you want them in the innermost loop - push!(block.args, :(Aᵢ = $Aind)) - push!(block.args, :(notnan = Aᵢ==Aᵢ)) - push!(block.args, :(Σ += ifelse(notnan, Aᵢ, ∅))) - # Push more things here if you want them at the end of the reduction loop - push!(rblock.args, :($Bind = Σ)) - # Put it all together - quote - ∅ = zero(eltype(B)) - Bᵥ = $Bᵥ - @turbo $loops - return B - end -end - -# Chris Elrod metaprogramming magic: -# Turn non-static integers in `dims` tuple into `StaticInt`s -# so we can construct `static_dims` vector within @generated code -function branches_nansum_quote_turbo(N::Int, M::Int, D) - static_dims = Int[] - for m ∈ 1:M - param = D.parameters[m] - if param <: StaticInt - new_dim = _dim(param)::Int - @assert new_dim ∉ static_dims - push!(static_dims, new_dim) + if n==last(reduct_inds) + push!(block.args, Expr(:macrocall, Symbol("@simd"), :ivdep, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))) else - t = Expr(:tuple) - for n ∈ static_dims - push!(t.args, :(StaticInt{$n}())) - end - q = Expr(:block, :(dimm = dims[$m])) - qold = q - ifsym = :if - for n ∈ 1:N - n ∈ static_dims && continue - tc = copy(t) - push!(tc.args, :(StaticInt{$n}())) - qnew = Expr(ifsym, :(dimm == $n), :(return _nansum!(B, A, $tc))) - for r ∈ m+1:M - push!(tc.args, :(dims[$r])) - end - push!(qold.args, qnew) - qold = qnew - ifsym = :elseif - end - push!(qold.args, Expr(:block, :(throw("Dimension `$dimm` not found.")))) - return q - end - end - staticdim_nansum_quote_turbo(static_dims, N) -end - -# Efficient @generated in-place sum -@generated function _nansum!(B::StridedArray{Tₒ,N}, A::StridedArray{T,N}, dims::D) where {Tₒ<:PrimitiveNumber,T,N,M,D<:Tuple{Vararg{IntOrStaticInt,M}}} - N == M && return :(B[1] = _nansum(A, :); B) - branches_nansum_quote_turbo(N, M, D) -end - - -function staticdim_nansum_quote(static_dims::Vector{Int}, N::Int) - M = length(static_dims) - # `static_dims` now contains every dim we're taking the sum over. - Bᵥ = Expr(:call, :view, :B) - reduct_inds = Int[] - nonreduct_inds = Int[] - # Firstly, build our expressions for indexing each array - Aind = :(A[]) - Bind = :(Bᵥ[]) - inds = Vector{Symbol}(undef, N) - for n ∈ 1:N - ind = Symbol(:i_,n) - inds[n] = ind - push!(Aind.args, ind) - if n ∈ static_dims - push!(reduct_inds, n) - push!(Bᵥ.args, :(firstindex(B,$n))) - else - push!(nonreduct_inds, n) - push!(Bᵥ.args, :) - push!(Bind.args, ind) + push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock)) end - end - firstn = first(nonreduct_inds) - # Secondly, build up our set of loops - block = Expr(:block) - loops = Expr(:for, :($(inds[firstn]) = indices((A,B),$firstn)), block) - if length(nonreduct_inds) > 1 - for n ∈ @view(nonreduct_inds[2:end]) - newblock = Expr(:block) - push!(block.args, Expr(:for, :($(inds[n]) = indices((A,B),$n)), newblock)) - block = newblock - end - end - rblock = block - # Push more things here if you want them at the beginning of the reduction loop - push!(rblock.args, :(Σ = ∅)) - # Build the reduction loop - for n ∈ reduct_inds - newblock = Expr(:block) - push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock)) block = newblock end # Push more things here if you want them in the innermost loop @@ -245,6 +135,8 @@ function staticdim_nansum_quote(static_dims::Vector{Int}, N::Int) end end +# Turn non-static integers in `dims` tuple into `StaticInt`s +# so we can construct `static_dims` vector within @generated code function branches_nansum_quote(N::Int, M::Int, D) static_dims = Int[] for m ∈ 1:M diff --git a/src/ArrayStats/nanvar.jl b/src/ArrayStats/nanvar.jl index 0cbdc6c..fe4b62f 100644 --- a/src/ArrayStats/nanvar.jl +++ b/src/ArrayStats/nanvar.jl @@ -45,11 +45,11 @@ _nanvar(μ, corrected::Bool, A, dims::Int) = _nanvar(μ, corrected, A, (dims,)) # If the mean isn't known, compute it _nanvar(::Nothing, corrected::Bool, A, dims::Tuple) = _nanvar!(_nanmean(A, dims), corrected, A, dims) # Reduce all the dims! -function _nanvar(::Nothing, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat - Tₒ = Base.promote_op(/, T, Int) +function _nanvar(::Nothing, corrected::Bool, A, ::Colon) + Tₒ = Base.promote_op(/, eltype(A), Int) n = 0 Σ = ∅ = zero(Tₒ) - @turbo check_empty=true for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) Aᵢ = A[i] notnan = Aᵢ==Aᵢ n += notnan @@ -57,48 +57,28 @@ function _nanvar(::Nothing, corrected::Bool, A::StridedArray{T}, ::Colon) where end μ = Σ / n σ² = ∅ = zero(typeof(μ)) - @turbo check_empty=true for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) δ = A[i] - μ notnan = δ==δ σ² += ifelse(notnan, δ * δ, ∅) end return σ² / max(n-corrected,0) end -function _nanvar(::Nothing, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveInteger +function _nanvar(::Nothing, corrected::Bool, A::AbstractArray{T}, ::Colon) where T<:PrimitiveInteger Tₒ = Base.promote_op(/, T, Int) n = length(A) Σ = zero(Tₒ) - @turbo check_empty=true for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) Σ += A[i] end μ = Σ / n σ² = zero(typeof(μ)) - @turbo check_empty=true for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) δ = A[i] - μ σ² += δ * δ end return σ² / max(n-corrected,0) end -# Fallback method for non-StridedArrays -function _nanvar(::Nothing, corrected::Bool, A, ::Colon) - Tₒ = Base.promote_op(/, eltype(A), Int) - n = 0 - Σ = ∅ = zero(Tₒ) - @inbounds for i ∈ eachindex(A) - Aᵢ = A[i] - notnan = Aᵢ==Aᵢ - n += notnan - Σ += ifelse(notnan, Aᵢ, ∅) - end - μ = Σ / n - σ² = ∅ = zero(typeof(μ)) - @inbounds for i ∈ eachindex(A) - δ = A[i] - μ - notnan = δ==δ - σ² += ifelse(notnan, δ * δ, ∅) - end - return σ² / max(n-corrected,0) -end # If the mean is known, pass it on in the appropriate form @@ -106,21 +86,21 @@ _nanvar(μ, corrected::Bool, A, dims::Tuple) = _nanvar!(collect(μ), corrected, _nanvar(μ::Array, corrected::Bool, A, dims::Tuple) = _nanvar!(copy(μ), corrected, A, dims) _nanvar(μ::Number, corrected::Bool, A, dims::Tuple) = _nanvar!([μ], corrected, A, dims) # Reduce all the dims! -function _nanvar(μ::Number, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat - n = 0 - σ² = ∅ = zero(typeof(μ)) - @turbo check_empty=true for i ∈ eachindex(A) - δ = A[i] - μ - notnan = δ==δ - n += notnan - σ² += ifelse(notnan, δ * δ, ∅) - end - return σ² / max(n-corrected, 0) +function _nanvar(μ::Number, corrected::Bool, A, ::Colon) + n = 0 + σ² = ∅ = zero(typeof(μ)) + @inbounds @simd ivdep for i ∈ eachindex(A) + δ = A[i] - μ + notnan = δ==δ + n += notnan + σ² += ifelse(notnan, δ * δ, ∅) + end + return σ² / max(n-corrected, 0) end -function _nanvar(μ::Number, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveInteger +function _nanvar(μ::Number, corrected::Bool, A::AbstractArray{T}, ::Colon) where T<:PrimitiveInteger σ² = zero(typeof(μ)) if μ==μ - @turbo check_empty=true for i ∈ eachindex(A) + @inbounds @simd ivdep for i ∈ eachindex(A) δ = A[i] - μ σ² += δ * δ end @@ -130,41 +110,11 @@ function _nanvar(μ::Number, corrected::Bool, A::StridedArray{T}, ::Colon) where end return σ² / max(n-corrected, 0) end -# Fallback method for non-strided-arrays -function _nanvar(μ::Number, corrected::Bool, A, ::Colon) - n = 0 - σ² = ∅ = zero(typeof(μ)) - @inbounds for i ∈ eachindex(A) - δ = A[i] - μ - notnan = δ==δ - n += notnan - σ² += ifelse(notnan, δ * δ, ∅) - end - return σ² / max(n-corrected, 0) -end - -# # Fallback method for overly-complex reductions -# function _nanvar_fallback!(B::AbstractArray, corrected::Bool, A::AbstractArray,region) -# mask = nanmask(A) -# N = sum(mask, dims=region) -# Σ = sum(A.*mask, dims=region)./N -# δ = A .- Σ # Subtract mean, using broadcasting -# @turbo check_empty=true for i ∈ eachindex(δ) -# δᵢ = δ[i] -# δ[i] = ifelse(mask[i], δᵢ * δᵢ, 0) -# end -# B .= sum(δ, dims=region) -# @turbo check_empty=true for i ∈ eachindex(B) -# B[i] = B[i] / max(N[i] - corrected, 0) -# end -# return B -# end - # Metaprogramming magic adapted from Chris Elrod example: # Generate customized set of loops for a given ndims and a vector # `static_dims` of dimensions to reduce over -function staticdim_nanvar_quote_turbo(static_dims::Vector{Int}, N::Int) +function staticdim_nanvar_quote(static_dims::Vector{Int}, N::Int) M = length(static_dims) # `static_dims` now contains every dim we're taking the var over. Bᵥ = Expr(:call, :view, :B) @@ -206,120 +156,11 @@ function staticdim_nanvar_quote_turbo(static_dims::Vector{Int}, N::Int) # Build the reduction loop for n ∈ reduct_inds newblock = Expr(:block) - push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock)) - block = newblock - end - # Push more things here if you want them in the innermost loop - push!(block.args, :(δ = $Aind - μ)) - push!(block.args, :(notnan = δ==δ)) - push!(block.args, :(n += notnan)) - push!(block.args, :(σ² += ifelse(notnan, δ * δ, ∅))) - # Push more things here if you want them at the end of the reduction loop - push!(rblock.args, :($Bind = σ² * inv(max(n-corrected,0)))) - - # Put it all together - quote - ∅ = zero(eltype(B)) - Bᵥ = $Bᵥ - @turbo $loops - return B - end -end - -# Chris Elrod metaprogramming magic: -# Turn non-static integers in `dims` tuple into `StaticInt`s -# so we can construct `static_dims` vector within @generated code -function branches_nanvar_quote_turbo(N::Int, M::Int, D) - static_dims = Int[] - for m ∈ 1:M - param = D.parameters[m] - if param <: StaticInt - new_dim = _dim(param)::Int - @assert new_dim ∉ static_dims - push!(static_dims, new_dim) + if n==last(reduct_inds) + push!(block.args, Expr(:macrocall, Symbol("@simd"), :ivdep, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))) else - t = Expr(:tuple) - for n ∈ static_dims - push!(t.args, :(StaticInt{$n}())) - end - q = Expr(:block, :(dimm = dims[$m])) - qold = q - ifsym = :if - for n ∈ 1:N - n ∈ static_dims && continue - tc = copy(t) - push!(tc.args, :(StaticInt{$n}())) - qnew = Expr(ifsym, :(dimm == $n), :(return _nanvar!(B, corrected, A, $tc))) - for r ∈ m+1:M - push!(tc.args, :(dims[$r])) - end - push!(qold.args, qnew) - qold = qnew - ifsym = :elseif - end - push!(qold.args, Expr(:block, :(throw("Dimension `$dimm` not found.")))) - return q - end - end - staticdim_nanvar_quote_turbo(static_dims, N) -end - -# Efficient @generated in-place var -@generated function _nanvar!(B::StridedArray{Tₒ,N}, corrected::Bool, A::StridedArray{T,N}, dims::D) where {Tₒ<:PrimitiveNumber,T,N,M,D<:Tuple{Vararg{IntOrStaticInt,M}}} - N == M && return :(B[1] = _nanvar(B[1], corrected, A, :); B) - # total_combinations = binomial(N,M) - # if total_combinations > 6 - # # Fallback, for overly-complex reductions - # return :(_nanvar_fallback!(B, corrected, A, dims)) - # else - branches_nanvar_quote_turbo(N, M, D) - # end -end - - -function staticdim_nanvar_quote(static_dims::Vector{Int}, N::Int) - M = length(static_dims) - # `static_dims` now contains every dim we're taking the var over. - Bᵥ = Expr(:call, :view, :B) - reduct_inds = Int[] - nonreduct_inds = Int[] - # Firstly, build our expressions for indexing each array - Aind = :(A[]) - Bind = :(Bᵥ[]) - inds = Vector{Symbol}(undef, N) - for n ∈ 1:N - ind = Symbol(:i_,n) - inds[n] = ind - push!(Aind.args, ind) - if n ∈ static_dims - push!(reduct_inds, n) - push!(Bᵥ.args, :(firstindex(B,$n))) - else - push!(nonreduct_inds, n) - push!(Bᵥ.args, :) - push!(Bind.args, ind) + push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock)) end - end - firstn = first(nonreduct_inds) - # Secondly, build up our set of loops - block = Expr(:block) - loops = Expr(:for, :($(inds[firstn]) = indices((A,B),$firstn)), block) - if length(nonreduct_inds) > 1 - for n ∈ @view(nonreduct_inds[2:end]) - newblock = Expr(:block) - push!(block.args, Expr(:for, :($(inds[n]) = indices((A,B),$n)), newblock)) - block = newblock - end - end - rblock = block - # Push more things here if you want them at the beginning of the reduction loop - push!(rblock.args, :(μ = $Bind)) - push!(rblock.args, :(n = 0)) - push!(rblock.args, :(σ² = ∅)) - # Build the reduction loop - for n ∈ reduct_inds - newblock = Expr(:block) - push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock)) block = newblock end # Push more things here if you want them in the innermost loop @@ -339,6 +180,8 @@ function staticdim_nanvar_quote(static_dims::Vector{Int}, N::Int) end end +# Turn non-static integers in `dims` tuple into `StaticInt`s +# so we can construct `static_dims` vector within @generated code function branches_nanvar_quote(N::Int, M::Int, D) static_dims = Int[] for m ∈ 1:M From 359b3679171e2b4278e32266496b5d1c861d583c Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Wed, 12 Jun 2024 20:52:45 -0400 Subject: [PATCH 4/8] De-lv `quicksort` --- src/Sorting/quicksort.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Sorting/quicksort.jl b/src/Sorting/quicksort.jl index e353641..2f740ff 100644 --- a/src/Sorting/quicksort.jl +++ b/src/Sorting/quicksort.jl @@ -5,7 +5,7 @@ function sortnans!(A, iₗ=firstindex(A), iᵤ=lastindex(A)) # Count up NaNs Nₙₐₙ = 0 - @turbo for i = iₗ:iᵤ + @inbounds @simd ivdep for i = iₗ:iᵤ Nₙₐₙ += A[i] != A[i] end # If none, return early @@ -61,7 +61,7 @@ end A[𝔦ₗ], A[𝔦ᵤ] = A[𝔦ᵤ], A[𝔦ₗ] end else - @turbo for i ∈ 0:n + @inbounds @simd ivdep for i ∈ 0:n 𝔦ₗ = iₗ+i 𝔦ᵤ = iᵤ-i l = A[𝔦ₗ] @@ -115,7 +115,7 @@ function quicksort!(A, iₗ=firstindex(A), iᵤ=lastindex(A)) # Count up elements that must be moved to upper partition Nᵤ = 0 - @turbo for i = (iₗ+1):iᵤ + @inbounds @simd ivdep for i = (iₗ+1):iᵤ Nᵤ += A[i] >= pivot end Nₗ = N - Nᵤ From a815f87c71ebed83dd41d0f47132376bcb88a3cc Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Wed, 12 Jun 2024 21:05:44 -0400 Subject: [PATCH 5/8] Relax type constraints in `nanvar` --- src/ArrayStats/nanvar.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ArrayStats/nanvar.jl b/src/ArrayStats/nanvar.jl index fe4b62f..12a68cc 100644 --- a/src/ArrayStats/nanvar.jl +++ b/src/ArrayStats/nanvar.jl @@ -64,7 +64,7 @@ function _nanvar(::Nothing, corrected::Bool, A, ::Colon) end return σ² / max(n-corrected,0) end -function _nanvar(::Nothing, corrected::Bool, A::AbstractArray{T}, ::Colon) where T<:PrimitiveInteger +function _nanvar(::Nothing, corrected::Bool, A::AbstractArray{T}, ::Colon) where T<:Integer Tₒ = Base.promote_op(/, T, Int) n = length(A) Σ = zero(Tₒ) @@ -97,7 +97,7 @@ function _nanvar(μ::Number, corrected::Bool, A, ::Colon) end return σ² / max(n-corrected, 0) end -function _nanvar(μ::Number, corrected::Bool, A::AbstractArray{T}, ::Colon) where T<:PrimitiveInteger +function _nanvar(μ::Number, corrected::Bool, A::AbstractArray{T}, ::Colon) where T<:Integer σ² = zero(typeof(μ)) if μ==μ @inbounds @simd ivdep for i ∈ eachindex(A) From 29ed673aa8f8bf24ade027e506b8d25b31153640 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Wed, 12 Jun 2024 21:07:33 -0400 Subject: [PATCH 6/8] Remove LoopVectorization dependency --- Project.toml | 11 ++++------- src/NaNStatistics.jl | 9 ++------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/Project.toml b/Project.toml index 9663ca1..916411b 100644 --- a/Project.toml +++ b/Project.toml @@ -4,16 +4,13 @@ authors = ["C. Brenhin Keller"] version = "0.6.35" [deps] -IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" -LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" +ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" -VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" +StaticArrayInterface = "0d7ed370-da01-4f52-bd93-41d350b8b718" [compat] -IfElse = "0.1" -LoopVectorization = "0.12.113" -VectorizationBase = "0.21.0 - 0.21.64" PrecompileTools = "1" Static = "0.8" -julia = "1.8" +StaticArrayInterface = "1" +julia = "1.10" diff --git a/src/NaNStatistics.jl b/src/NaNStatistics.jl index 6399f42..4c15cf0 100644 --- a/src/NaNStatistics.jl +++ b/src/NaNStatistics.jl @@ -1,16 +1,11 @@ module NaNStatistics - # AVX vectorziation tools - using IfElse: ifelse - using LoopVectorization - using Static const IntOrStaticInt = Union{Integer, StaticInt} - const PrimitiveFloat = Union{Float16, Float32, Float64} - const PrimitiveInteger = Union{Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128} - const PrimitiveNumber = Union{PrimitiveFloat, PrimitiveInteger} _dim(::Type{StaticInt{N}}) where {N} = N::Int + using StaticArrayInterface: indices + include("ArrayStats/ArrayStats.jl") include("ArrayStats/nanmean.jl") include("ArrayStats/nansum.jl") From b1c2ca7127f6d7000a29562c11564f82f3bc93c1 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Thu, 13 Jun 2024 00:34:10 -0400 Subject: [PATCH 7/8] Don't assume that `partialsort!` partitions around the selected index --- src/Sorting/nanmedian.jl | 10 +++++----- src/Sorting/nanpctile.jl | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Sorting/nanmedian.jl b/src/Sorting/nanmedian.jl index 7968e9a..9be0aa6 100644 --- a/src/Sorting/nanmedian.jl +++ b/src/Sorting/nanmedian.jl @@ -108,20 +108,20 @@ function _nanmedian!(A::AbstractArray{T}, ::Colon) where {T} n < 2 && return float(T)(A[iₗ]) i½ = (iₗ + iᵤ) ÷ 2 if iseven(n) - if n < 384 + Aᵢ₋, Aᵢ₊ = if n < 384 quicksort!(A, iₗ, iᵤ) + A[i½], A[i½+1] else - quickselect!(A, iₗ, iᵤ, i½) - quickselect!(A, i½+1, iᵤ, i½+1) + quickselect!(A, iₗ, iᵤ, i½), quickselect!(A, iₗ, iᵤ, i½+1) end - return (A[i½] + A[i½+1]) / 2 + return float(T)(0.5*Aᵢ₋ + 0.5*Aᵢ₊) else if n < 192 quicksort!(A, iₗ, iᵤ) else quickselect!(A, iₗ, iᵤ, i½) end - return A[i½] / 1 + return float(T)(A[i½]) end end diff --git a/src/Sorting/nanpctile.jl b/src/Sorting/nanpctile.jl index d3271bf..3b8db57 100644 --- a/src/Sorting/nanpctile.jl +++ b/src/Sorting/nanpctile.jl @@ -182,14 +182,14 @@ function _nanquantile!(A::AbstractArray{T}, q::Real, ::Colon) where {T} iₚ = q*n₋ + iₗ iₚ₋ = floor(Int, iₚ) iₚ₊ = ceil(Int, iₚ) - if n₋ < 384 + f = iₚ - iₚ₋ + Aᵢ₋, Aᵢ₊ = if n₋ < 384 quicksort!(A, iₗ, iᵤ) + A[iₚ₋], A[iₚ₊] else - quickselect!(A, iₗ, iᵤ, iₚ₋) - quickselect!(A, iₚ₊, iᵤ, iₚ₊) + quickselect!(A, iₗ, iᵤ, iₚ₋), quickselect!(A, iₗ, iᵤ, iₚ₊) end - f = iₚ - iₚ₋ - return f*A[iₚ₊] + (1-f)*A[iₚ₋] + return f*Aᵢ₊ + (1-f)*Aᵢ₋ end # Generate customized set of loops for a given ndims and a vector From 0af934cbf96fdc5654c9a8b727dc9074c6090703 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Thu, 13 Jun 2024 13:07:30 -0400 Subject: [PATCH 8/8] Bump version to 0.6.36 --- Project.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 916411b..5a010e9 100644 --- a/Project.toml +++ b/Project.toml @@ -1,10 +1,9 @@ name = "NaNStatistics" uuid = "b946abbf-3ea7-4610-9019-9858bfdeaf2d" authors = ["C. Brenhin Keller"] -version = "0.6.35" +version = "0.6.36" [deps] -ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" StaticArrayInterface = "0d7ed370-da01-4f52-bd93-41d350b8b718"