From d18762ce03fc92028f9ec9cd51d5ef026eda5e06 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sun, 2 May 2021 16:37:13 +0200 Subject: [PATCH] Add pairwise (#627) This generic method takes iterators of vectors and supports skipping missing values. It is a more general version of `pairwise` in Distances.jl. Since methods are compatible, both packages can override a common empty function defined in StatsAPI. --- Project.toml | 2 + docs/src/misc.md | 2 + src/StatsBase.jl | 4 + src/pairwise.jl | 313 +++++++++++++++++++++++++++++++++++++++++++++++ test/pairwise.jl | 261 +++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 6 files changed, 583 insertions(+) create mode 100644 src/pairwise.jl create mode 100644 test/pairwise.jl diff --git a/Project.toml b/Project.toml index f1a4b3a6a..8b7b3137b 100644 --- a/Project.toml +++ b/Project.toml @@ -13,12 +13,14 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SortingAlgorithms = "a2af1166-a08f-5f64-846c-94a0d3cef48c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" [compat] DataAPI = "1" DataStructures = "0.10, 0.11, 0.12, 0.13, 0.14, 0.17, 0.18" Missings = "0.3, 0.4, 1.0" SortingAlgorithms = "0.3, 1.0" +StatsAPI = "1" julia = "1" [extras] diff --git a/docs/src/misc.md b/docs/src/misc.md index 79b55e069..66c840289 100644 --- a/docs/src/misc.md +++ b/docs/src/misc.md @@ -7,4 +7,6 @@ levelsmap indexmap indicatormat StatsBase.midpoints +pairwise +pairwise! ``` diff --git a/src/StatsBase.jl b/src/StatsBase.jl index 70c812c00..2d2344497 100644 --- a/src/StatsBase.jl +++ b/src/StatsBase.jl @@ -19,6 +19,7 @@ import LinearAlgebra: BlasReal, BlasFloat import Statistics: mean, mean!, var, varm, varm!, std, stdm, cov, covm, cor, corm, cov2cor!, unscaled_covzm, quantile, sqrt!, median, middle +import StatsAPI: pairwise, pairwise! ## tackle compatibility issues @@ -157,6 +158,8 @@ export indexmap, # construct a map from element to index levelsmap, # construct a map from n unique elements to [1, ..., n] indicatormat, # construct indicator matrix + pairwise, # pairwise application of functions + pairwise!, # pairwise! application of functions # statistical models CoefTable, @@ -228,6 +231,7 @@ include("signalcorr.jl") include("partialcor.jl") include("empirical.jl") include("hist.jl") +include("pairwise.jl") include("misc.jl") include("sampling.jl") diff --git a/src/pairwise.jl b/src/pairwise.jl new file mode 100644 index 000000000..97e51a3f1 --- /dev/null +++ b/src/pairwise.jl @@ -0,0 +1,313 @@ +function _pairwise!(::Val{:none}, f, dest::AbstractMatrix, x, y, symmetric::Bool) + @inbounds for (i, xi) in enumerate(x), (j, yj) in enumerate(y) + symmetric && i > j && continue + + # For performance, diagonal is special-cased + if f === cor && eltype(dest) !== Union{} && i == j && xi === yj + # TODO: float() will not be needed after JuliaLang/Statistics.jl#61 + dest[i, j] = float(cor(xi)) + else + dest[i, j] = f(xi, yj) + end + end + if symmetric + m, n = size(dest) + @inbounds for j in 1:n, i in (j+1):m + dest[i, j] = dest[j, i] + end + end + return dest +end + +function check_vectors(x, y, skipmissing::Symbol) + m = length(x) + n = length(y) + if !(all(xi -> xi isa AbstractVector, x) && all(yi -> yi isa AbstractVector, y)) + throw(ArgumentError("All entries in x and y must be vectors " * + "when skipmissing=:$skipmissing")) + end + if m > 1 + indsx = keys(first(x)) + for i in 2:m + keys(x[i]) == indsx || + throw(ArgumentError("All input vectors must have the same indices")) + end + end + if n > 1 + indsy = keys(first(y)) + for j in 2:n + keys(y[j]) == indsy || + throw(ArgumentError("All input vectors must have the same indices")) + end + end + if m > 1 && n > 1 + indsx == indsy || + throw(ArgumentError("All input vectors must have the same indices")) + end +end + +function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix, x, y, symmetric::Bool) + check_vectors(x, y, :pairwise) + @inbounds for (j, yj) in enumerate(y) + ynminds = .!ismissing.(yj) + @inbounds for (i, xi) in enumerate(x) + symmetric && i > j && continue + + if xi === yj + ynm = view(yj, ynminds) + # For performance, diagonal is special-cased + if f === cor && eltype(dest) !== Union{} && i == j + # TODO: float() will not be needed after JuliaLang/Statistics.jl#61 + dest[i, j] = float(cor(xi)) + else + dest[i, j] = f(ynm, ynm) + end + else + nminds = .!ismissing.(xi) .& ynminds + xnm = view(xi, nminds) + ynm = view(yj, nminds) + dest[i, j] = f(xnm, ynm) + end + end + end + if symmetric + m, n = size(dest) + @inbounds for j in 1:n, i in (j+1):m + dest[i, j] = dest[j, i] + end + end + return dest +end + +function _pairwise!(::Val{:listwise}, f, dest::AbstractMatrix, x, y, symmetric::Bool) + check_vectors(x, y, :listwise) + m, n = size(dest) + nminds = .!ismissing.(first(x)) + @inbounds for xi in Iterators.drop(x, 1) + nminds .&= .!ismissing.(xi) + end + if x !== y + @inbounds for yj in y + nminds .&= .!ismissing.(yj) + end + end + + # Computing integer indices once for all vectors is faster + nminds′ = findall(nminds) + # TODO: check whether wrapping views in a custom array type which asserts + # that entries cannot be `missing` (similar to `skipmissing`) + # could offer better performance + return _pairwise!(Val(:none), f, dest, + [view(xi, nminds′) for xi in x], + [view(yi, nminds′) for yi in y], + symmetric) +end + +function _pairwise!(f, dest::AbstractMatrix, x, y; + symmetric::Bool=false, skipmissing::Symbol=:none) + if !(skipmissing in (:none, :pairwise, :listwise)) + throw(ArgumentError("skipmissing must be one of :none, :pairwise or :listwise")) + end + + x′ = x isa Union{AbstractArray, Tuple, NamedTuple} ? x : collect(x) + y′ = y isa Union{AbstractArray, Tuple, NamedTuple} ? y : collect(y) + m = length(x′) + n = length(y′) + + size(dest) != (m, n) && + throw(DimensionMismatch("dest has dimensions $(size(dest)) but expected ($m, $n)")) + + Base.has_offset_axes(dest) && throw("dest indices must start at 1") + + return _pairwise!(Val(skipmissing), f, dest, x′, y′, symmetric) +end + +function _pairwise(::Val{skipmissing}, f, x, y, symmetric::Bool) where {skipmissing} + x′ = x isa Union{AbstractArray, Tuple, NamedTuple} ? x : collect(x) + y′ = y isa Union{AbstractArray, Tuple, NamedTuple} ? y : collect(y) + m = length(x′) + n = length(y′) + + T = Core.Compiler.return_type(f, Tuple{eltype(x′), eltype(y′)}) + Tsm = Core.Compiler.return_type((x, y) -> f(disallowmissing(x), disallowmissing(y)), + Tuple{eltype(x′), eltype(y′)}) + + if skipmissing === :none + dest = Matrix{T}(undef, m, n) + elseif skipmissing in (:pairwise, :listwise) + dest = Matrix{Tsm}(undef, m, n) + else + throw(ArgumentError("skipmissing must be one of :none, :pairwise or :listwise")) + end + + # Preserve inferred element type + isempty(dest) && return dest + + _pairwise!(f, dest, x′, y′, symmetric=symmetric, skipmissing=skipmissing) + + if isconcretetype(eltype(dest)) + return dest + else + # Final eltype depends on actual contents (consistent with map and broadcast) + U = mapreduce(typeof, promote_type, dest) + # V is inferred (contrary to U), but it only gives an upper bound for U + V = promote_type(T, Tsm) + return convert(Matrix{U}, dest)::Matrix{<:V} + end +end + +""" + pairwise!(f, dest::AbstractMatrix, x[, y]; + symmetric::Bool=false, skipmissing::Symbol=:none) + +Store in matrix `dest` the result of applying `f` to all possible pairs +of entries in iterators `x` and `y`, and return it. Rows correspond to +entries in `x` and columns to entries in `y`, and `dest` must therefore +be of size `length(x) × length(y)`. +If `y` is omitted then `x` is crossed with itself. + +As a special case, if `f` is `cor`, diagonal cells for which entries +from `x` and `y` are identical (according to `===`) are set to one even +in the presence `missing`, `NaN` or `Inf` entries. + +# Keyword arguments +- `symmetric::Bool=false`: If `true`, `f` is only called to compute + for the lower triangle of the matrix, and these values are copied + to fill the upper triangle. Only allowed when `y` is omitted. + Defaults to `true` when `f` is `cor` or `cov`. +- `skipmissing::Symbol=:none`: If `:none` (the default), missing values + in inputs are passed to `f` without any modification. + Use `:pairwise` to skip entries with a `missing` value in either + of the two vectors passed to `f` for a given pair of vectors in `x` and `y`. + Use `:listwise` to skip entries with a `missing` value in any of the + vectors in `x` or `y`; note that this might drop a large part of entries. + Only allowed when entries in `x` and `y` are vectors. + +# Examples +```jldoctest +julia> using StatsBase, Statistics + +julia> dest = zeros(3, 3); + +julia> x = [1 3 7 + 2 5 6 + 3 8 4 + 4 6 2]; + +julia> pairwise!(cor, dest, eachcol(x)); + +julia> dest +3×3 Matrix{Float64}: + 1.0 0.744208 -0.989778 + 0.744208 1.0 -0.68605 + -0.989778 -0.68605 1.0 + +julia> y = [1 3 missing + 2 5 6 + 3 missing 2 + 4 6 2]; + +julia> pairwise!(cor, dest, eachcol(y), skipmissing=:pairwise); + +julia> dest +3×3 Matrix{Float64}: + 1.0 0.928571 -0.866025 + 0.928571 1.0 -1.0 + -0.866025 -1.0 1.0 +``` +""" +function pairwise!(f, dest::AbstractMatrix, x, y=x; + symmetric::Bool=false, skipmissing::Symbol=:none) + if symmetric && x !== y + throw(ArgumentError("symmetric=true only makes sense passing " * + "a single set of variables (x === y)")) + end + + return _pairwise!(f, dest, x, y, symmetric=symmetric, skipmissing=skipmissing) +end + +""" + pairwise(f, x[, y]; + symmetric::Bool=false, skipmissing::Symbol=:none) + +Return a matrix holding the result of applying `f` to all possible pairs +of entries in iterators `x` and `y`. Rows correspond to +entries in `x` and columns to entries in `y`. If `y` is omitted then a +square matrix crossing `x` with itself is returned. + +As a special case, if `f` is `cor`, diagonal cells for which entries +from `x` and `y` are identical (according to `===`) are set to one even +in the presence `missing`, `NaN` or `Inf` entries. + +# Keyword arguments +- `symmetric::Bool=false`: If `true`, `f` is only called to compute + for the lower triangle of the matrix, and these values are copied + to fill the upper triangle. Only allowed when `y` is omitted. + Defaults to `true` when `f` is `cor` or `cov`. +- `skipmissing::Symbol=:none`: If `:none` (the default), missing values + in inputs are passed to `f` without any modification. + Use `:pairwise` to skip entries with a `missing` value in either + of the two vectors passed to `f` for a given pair of vectors in `x` and `y`. + Use `:listwise` to skip entries with a `missing` value in any of the + vectors in `x` or `y`; note that this might drop a large part of entries. + Only allowed when entries in `x` and `y` are vectors. + +# Examples +```jldoctest +julia> using StatsBase, Statistics + +julia> x = [1 3 7 + 2 5 6 + 3 8 4 + 4 6 2]; + +julia> pairwise(cor, eachcol(x)) +3×3 Matrix{Float64}: + 1.0 0.744208 -0.989778 + 0.744208 1.0 -0.68605 + -0.989778 -0.68605 1.0 + +julia> y = [1 3 missing + 2 5 6 + 3 missing 2 + 4 6 2]; + +julia> pairwise(cor, eachcol(y), skipmissing=:pairwise) +3×3 Matrix{Float64}: + 1.0 0.928571 -0.866025 + 0.928571 1.0 -1.0 + -0.866025 -1.0 1.0 +``` +""" +function pairwise(f, x, y=x; symmetric::Bool=false, skipmissing::Symbol=:none) + if symmetric && x !== y + throw(ArgumentError("symmetric=true only makes sense passing " * + "a single set of variables (x === y)")) + end + + return _pairwise(Val(skipmissing), f, x, y, symmetric) +end + +# cov(x) is faster than cov(x, x) +_cov(x, y) = x === y ? cov(x) : cov(x, y) + +pairwise!(::typeof(cov), dest::AbstractMatrix, x, y; + symmetric::Bool=false, skipmissing::Symbol=:none) = + pairwise!(_cov, dest, x, y, symmetric=symmetric, skipmissing=skipmissing) + +pairwise(::typeof(cov), x, y; symmetric::Bool=false, skipmissing::Symbol=:none) = + pairwise(_cov, x, y, symmetric=symmetric, skipmissing=skipmissing) + +pairwise!(::typeof(cov), dest::AbstractMatrix, x; + symmetric::Bool=true, skipmissing::Symbol=:none) = + pairwise!(_cov, dest, x, x, symmetric=symmetric, skipmissing=skipmissing) + +pairwise(::typeof(cov), x; symmetric::Bool=true, skipmissing::Symbol=:none) = + pairwise(_cov, x, x, symmetric=symmetric, skipmissing=skipmissing) + +pairwise!(::typeof(cor), dest::AbstractMatrix, x; + symmetric::Bool=true, skipmissing::Symbol=:none) = + pairwise!(cor, dest, x, x, symmetric=symmetric, skipmissing=skipmissing) + +pairwise(::typeof(cor), x; symmetric::Bool=true, skipmissing::Symbol=:none) = + pairwise(cor, x, x, symmetric=symmetric, skipmissing=skipmissing) diff --git a/test/pairwise.jl b/test/pairwise.jl new file mode 100644 index 000000000..d31209655 --- /dev/null +++ b/test/pairwise.jl @@ -0,0 +1,261 @@ +using StatsBase +using Test, Random, Statistics, LinearAlgebra +using Missings + +const ≅ = isequal + +Random.seed!(1) + +# to avoid using specialized method +arbitrary_fun(x, y) = cor(x, y) + +@testset "pairwise and pairwise! with $f" for f in (arbitrary_fun, cor, cov) + @testset "basic interface" begin + x = [rand(10) for _ in 1:4] + y = [rand(Float32, 10) for _ in 1:5] + # to test case where inference of returned eltype fails + z = [Vector{Any}(rand(Float32, 10)) for _ in 1:5] + + res = @inferred pairwise(f, x, y) + @test res isa Matrix{Float64} + res2 = zeros(Float64, size(res)) + @test pairwise!(f, res2, x, y) === res2 + @test res == res2 == [f(xi, yi) for xi in x, yi in y] + + res = pairwise(f, y, z) + @test res isa Matrix{Float32} + res2 = zeros(Float32, size(res)) + @test pairwise!(f, res2, y, z) === res2 + @test res == res2 == [f(yi, zi) for yi in y, zi in z] + + res = pairwise(f, Any[[1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0]]) + @test res isa Matrix{Float64} + res2 = zeros(AbstractFloat, size(res)) + @test pairwise!(f, res2, Any[[1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0]]) === res2 + @test res == res2 == + [f(xi, yi) for xi in ([1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0]), + yi in ([1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0])] + @test res isa Matrix{Float64} + + @inferred pairwise(f, x, y) + + @test_throws ArgumentError pairwise(f, [Int[]], [Int[]]) + @test_throws ArgumentError pairwise!(f, zeros(1, 1), [Int[]], [Int[]]) + + res = pairwise(f, [], []) + @test size(res) == (0, 0) + @test res isa Matrix{Any} + res2 = zeros(0, 0) + @test pairwise!(f, res2, [], []) === res2 + + res = pairwise(f, Vector{Int}[], Vector{Int}[]) + @test size(res) == (0, 0) + @test res isa Matrix{Float64} + res2 = zeros(0, 0) + @test pairwise!(f, res2, Vector{Int}[], Vector{Int}[]) === res2 + + res = pairwise(f, [[1, 2]], Vector{Int}[]) + @test size(res) == (1, 0) + @test res isa Matrix{Float64} + res2 = zeros(1, 0) + @test pairwise!(f, res2, [[1, 2]], Vector{Int}[]) === res2 + + res = pairwise(f, Vector{Int}[], [[1, 2], [2, 3]]) + @test size(res) == (0, 2) + @test res isa Matrix{Float64} + res2 = zeros(0, 2) + @test pairwise!(f, res2, [], [[1, 2], [2, 3]]) === res2 + + @test_throws DimensionMismatch pairwise!(f, zeros(1, 2), x, y) + @test_throws DimensionMismatch pairwise!(f, zeros(1, 2), [], []) + @test_throws DimensionMismatch pairwise!(f, zeros(0, 0), + [], [[1, 2], [2, 3]]) + end + + @testset "missing values handling interface" begin + xm = [ifelse.(rand(100) .> 0.9, missing, rand(100)) for _ in 1:4] + ym = [ifelse.(rand(100) .> 0.9, missing, rand(Float32, 100)) for _ in 1:4] + zm = [ifelse.(rand(100) .> 0.9, missing, rand(Float32, 100)) for _ in 1:4] + + res = pairwise(f, xm, ym) + @test res isa Matrix{Missing} + res2 = zeros(Union{Float64, Missing}, size(res)) + @test pairwise!(f, res2, xm, ym) === res2 + @test res ≅ res2 ≅ [missing for xi in xm, yi in ym] + + res = pairwise(f, xm, ym, skipmissing=:pairwise) + @test res isa Matrix{Float64} + res2 = zeros(Union{Float64, Missing}, size(res)) + @test pairwise!(f, res2, xm, ym, skipmissing=:pairwise) === res2 + @test res ≅ res2 + @test isapprox(res, [f(collect.(skipmissings(xi, yi))...) for xi in xm, yi in ym], + rtol=1e-6) + + res = pairwise(f, ym, zm, skipmissing=:pairwise) + @test res isa Matrix{Float32} + res2 = zeros(Union{Float32, Missing}, size(res)) + @test pairwise!(f, res2, ym, zm, skipmissing=:pairwise) === res2 + @test res ≅ res2 + @test isapprox(res, [f(collect.(skipmissings(yi, zi))...) for yi in ym, zi in zm], + rtol=1e-6) + + nminds = mapreduce(x -> .!ismissing.(x), + (x, y) -> x .& y, + [xm; ym]) + res = pairwise(f, xm, ym, skipmissing=:listwise) + @test res isa Matrix{Float64} + res2 = zeros(Union{Float64, Missing}, size(res)) + @test pairwise!(f, res2, xm, ym, skipmissing=:listwise) === res2 + @test res ≅ res2 + @test isapprox(res, [f(view(xi, nminds), view(yi, nminds)) for xi in xm, yi in ym], + rtol=1e-6) + + if VERSION >= v"1.6.0-DEV" + # inference of cor fails so use an inferrable function + # to check that pairwise itself is inferrable + for skipmissing in (:none, :pairwise, :listwise) + g(x, y=x) = pairwise((x, y) -> x[1] * y[1], x, y, skipmissing=skipmissing) + @test Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}}}) == + Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}}, + Vector{Vector{Union{Float64, Missing}}}}) == + Matrix{<: Union{Float64, Missing}} + if skipmissing in (:pairwise, :listwise) + @test_broken Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}}}) == + Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}}, + Vector{Vector{Union{Float64, Missing}}}}) == + Matrix{Float64} + end + end + end + + @test_throws ArgumentError pairwise(f, xm, ym, skipmissing=:something) + @test_throws ArgumentError pairwise!(f, zeros(Union{Float64, Missing}, + length(xm), length(ym)), xm, ym, + skipmissing=:something) + + # variable with only missings + xm = [fill(missing, 10), rand(10)] + ym = [rand(10), rand(10)] + + res = pairwise(f, xm, ym) + @test res isa Matrix{Union{Float64, Missing}} + res2 = zeros(Union{Float64, Missing}, size(res)) + @test pairwise!(f, res2, xm, ym) === res2 + @test res ≅ res2 ≅ [f(xi, yi) for xi in xm, yi in ym] + + if VERSION >= v"1.5" # Fails with UndefVarError on Julia 1.0 + @test_throws ArgumentError pairwise(f, xm, ym, skipmissing=:pairwise) + @test_throws ArgumentError pairwise(f, xm, ym, skipmissing=:listwise) + + res = zeros(Union{Float64, Missing}, length(xm), length(ym)) + @test_throws ArgumentError pairwise!(f, res, xm, ym, skipmissing=:pairwise) + @test_throws ArgumentError pairwise!(f, res, xm, ym, skipmissing=:listwise) + end + + for sm in (:pairwise, :listwise) + @test_throws ArgumentError pairwise(f, [[1, 2]], [1], skipmissing=sm) + @test_throws ArgumentError pairwise(f, [1], [[1, 2]], skipmissing=sm) + @test_throws ArgumentError pairwise(f, [1], [1], skipmissing=sm) + end + end + + @testset "iterators" begin + x = (v for v in [rand(10) for _ in 1:4]) + y = (v for v in [rand(10) for _ in 1:4]) + + res = @inferred pairwise(f, x, y) + res2 = zeros(size(res)) + @test pairwise!(f, res2, x, y) === res2 + @test res == res2 == pairwise(f, collect(x), collect(y)) + + res = @inferred(pairwise(f, x)) + res2 = zeros(size(res)) + @test pairwise!(f, res2, x) === res2 + @test res == res2 == pairwise(f, collect(x)) + end + + @testset "non-vector entries" begin + x = (Iterators.drop(v, 1) for v in [rand(10) for _ in 1:4]) + y = (Iterators.drop(v, 1) for v in [rand(10) for _ in 1:4]) + + @test pairwise((x, y) -> f(collect(x), collect(y)), x, y) == + [f(collect(xi), collect(yi)) for xi in x, yi in y] + @test pairwise((x, y) -> f(collect(x), collect(y)), x) == + [f(collect(xi1), collect(xi2)) for xi1 in x, xi2 in x] + @test_throws ArgumentError pairwise((x, y) -> f(collect(x), collect(y)), x, y, + skipmissing=:pairwise) + @test_throws ArgumentError pairwise((x, y) -> f(collect(x), collect(y)), x, y, + skipmissing=:listwise) + end + + @testset "two-argument method" begin + x = [rand(10) for _ in 1:4] + res = pairwise(f, x) + res2 = zeros(size(res)) + @test pairwise!(f, res2, x) === res2 + @test res == res2 == pairwise(f, x, x) + end + + @testset "symmetric" begin + x = [rand(10) for _ in 1:4] + y = [rand(10) for _ in 1:4] + + @test pairwise(f, x, x, symmetric=true) == + pairwise(f, x, symmetric=true) == + Symmetric(pairwise(f, x, x), :U) + + res = zeros(4, 4) + res2 = zeros(4, 4) + @test pairwise!(f, res, x, x, symmetric=true) === res + @test pairwise!(f, res2, x, symmetric=true) === res2 + @test res == res2 == Symmetric(pairwise(f, x, x), :U) + + @test_throws ArgumentError pairwise(f, x, y, symmetric=true) + @test_throws ArgumentError pairwise!(f, res, x, y, symmetric=true) + end + + @testset "cor corner cases" begin + # Integer inputs must give a Float64 output + res = pairwise(cor, [[1, 2, 3], [1, 5, 2]]) + @test res isa Matrix{Float64} + @test res == [cor(xi, yi) for xi in ([1, 2, 3], [1, 5, 2]), + yi in ([1, 2, 3], [1, 5, 2])] + + # NaNs are ignored for the diagonal + res = pairwise(cor, [[1, 2, NaN], [1, 5, 2]]) + @test res isa Matrix{Float64} + @test res ≅ [1.0 NaN + NaN 1.0] + + # missings are ignored for the diagonal + res = pairwise(cor, [[1, 2, 7], [1, 5, missing]]) + @test res isa Matrix{Union{Float64, Missing}} + @test res ≅ [1.0 missing + missing 1.0] + res = pairwise(cor, Vector{Union{Int, Missing}}[[missing, missing, missing], + [missing, missing, missing]]) + @test res isa Matrix{Union{Float64, Missing}} + @test res ≅ [1.0 missing + missing 1.0] + if VERSION >= v"1.5" + # except when eltype is Missing + res = pairwise(cor, [[missing, missing, missing], + [missing, missing, missing]]) + @test res isa Matrix{Missing} + @test res ≅ [missing missing + missing missing] + end + + for sm in (:pairwise, :listwise) + res = pairwise(cor, [[1, 2, NaN, 4], [1, 5, 5, missing]], skipmissing=sm) + @test res isa Matrix{Float64} + @test res ≅ [1.0 NaN + NaN 1.0] + if VERSION >= v"1.5" + @test_throws ArgumentError pairwise(cor, [[missing, missing, missing], + [missing, missing, missing]], + skipmissing=sm) + end + end + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 500539c74..ca7be4b86 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -17,6 +17,7 @@ tests = ["ambiguous", "rankcorr", "signalcorr", "misc", + "pairwise", "robust", "sampling", "wsampling",