From c8535c1022165469a43b32094e3863e43a874dd8 Mon Sep 17 00:00:00 2001 From: Art Wild Date: Sat, 13 Mar 2021 19:17:08 -0500 Subject: [PATCH 1/3] fixed `pairwise` for non-symmetric kernels (fixed #143) --- src/common.jl | 17 +++++++++-------- test/kpca.jl | 19 +++++++++++++++---- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/common.jl b/src/common.jl index 9d6d094..096fc7b 100644 --- a/src/common.jl +++ b/src/common.jl @@ -122,29 +122,30 @@ end # calculate pairwise kernel function pairwise!(K::AbstractVecOrMat{<:Real}, kernel::Function, - X::AbstractVecOrMat{<:Real}, Y::AbstractVecOrMat{<:Real}) + X::AbstractVecOrMat{<:Real}, Y::AbstractVecOrMat{<:Real}; + force_symmetry=false) n = size(X, 2) m = size(Y, 2) for j = 1:m aj = view(Y, :, j) - for i in j:n + for i in (force_symmetry ? j : 1):n @inbounds K[i, j] = kernel(view(X, :, i), aj)[] end - j <= n && for i in 1:(j - 1) + force_symmetry && j <= n && for i in 1:(j - 1) @inbounds K[i, j] = K[j, i] # leveraging the symmetry end end K end -pairwise!(K::AbstractVecOrMat{<:Real}, kernel::Function, X::AbstractVecOrMat{<:Real}) = - pairwise!(K, kernel, X, X) +pairwise!(K::AbstractVecOrMat{<:Real}, kernel::Function, X::AbstractVecOrMat{<:Real}; kwargs...) = + pairwise!(K, kernel, X, X; force_symmetry=true) -function pairwise(kernel::Function, X::AbstractVecOrMat{<:Real}, Y::AbstractVecOrMat{<:Real}) +function pairwise(kernel::Function, X::AbstractVecOrMat{<:Real}, Y::AbstractVecOrMat{<:Real}; kwargs...) n = size(X, 2) m = size(Y, 2) K = similar(X, n, m) - pairwise!(K, kernel, X, Y) + pairwise!(K, kernel, X, Y; kwargs...) end -pairwise(kernel::Function, X::AbstractVecOrMat{<:Real}) = pairwise(kernel, X, X) +pairwise(kernel::Function, X::AbstractVecOrMat{<:Real}) = pairwise(kernel, X, X; force_symmetry=true) diff --git a/test/kpca.jl b/test/kpca.jl index 71f5bdc..cf633c7 100644 --- a/test/kpca.jl +++ b/test/kpca.jl @@ -34,18 +34,29 @@ import Random end # kernel calculations - K = MultivariateStats.pairwise((x,y)->norm(x-y), X, X[:,1:2]) + ker1 = (x,y)->x'y + ker2 = (x,y)->norm(x-y) + + K = MultivariateStats.pairwise(ker1, X) + @test size(K) == (n,n) + @test K[1,2] == K[2,1] + + K = MultivariateStats.pairwise(ker1, X, X.+1) + @test size(K) == (n,n) + @test K[1,2] != K[2,1] + + K = MultivariateStats.pairwise(ker2, X, X[:,1:2]) @test size(K) == (n, 2) @test K[1,1] == 0 @test K[3,2] == norm(X[:,3] - X[:,2]) - K = MultivariateStats.pairwise((x,y)->norm(x-y), X[:,1:3], X) + K = MultivariateStats.pairwise(ker2, X[:,1:3], X) @test size(K) == (3, n) @test K[1,1] == 0 @test K[3,2] == norm(X[:,2] - X[:,3]) K = similar(X, n, n) - MultivariateStats.pairwise!(K, (x,y)->norm(x-y), X) + MultivariateStats.pairwise!(K, ker2, X) @test size(K) == (n, n) @test K[1,1] == 0 @test K[2,1] == norm(X[:,2] - X[:,1]) @@ -54,7 +65,7 @@ import Random Iₙ = ones(n,n)/n @test MultivariateStats.transform!(KC, copy(K)) ≈ K - Iₙ*K - K*Iₙ + Iₙ*K*Iₙ - K = MultivariateStats.pairwise((x,y)->norm(x-y), X, X[:,1]) + K = MultivariateStats.pairwise(ker2, X, X[:,1]) @test size(K) == (n, 1) @test K[1,1] == 0 @test K[2,1] == norm(X[:,2] - X[:,1]) From cdcec59efa8da2b318b01af9b5f8777409ca4d44 Mon Sep 17 00:00:00 2001 From: Art Wild Date: Mon, 31 May 2021 22:20:18 -0400 Subject: [PATCH 2/3] use StatsBase.pairwise --- Project.toml | 2 +- src/MultivariateStats.jl | 4 ++-- src/cmds.jl | 8 ++++---- src/common.jl | 30 ++---------------------------- src/kpca.jl | 8 ++++---- test/cmds.jl | 14 +++++++------- test/kpca.jl | 31 ++++++------------------------- 7 files changed, 26 insertions(+), 71 deletions(-) diff --git a/Project.toml b/Project.toml index 66c22fb..d9fa1e2 100644 --- a/Project.toml +++ b/Project.toml @@ -14,7 +14,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] -Arpack = "0.3, 0.4" +Arpack = "0.3, 0.4, 0.5" StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33" julia = "1" diff --git a/src/MultivariateStats.jl b/src/MultivariateStats.jl index 21ace39..34372e8 100644 --- a/src/MultivariateStats.jl +++ b/src/MultivariateStats.jl @@ -1,9 +1,9 @@ module MultivariateStats using LinearAlgebra - using StatsBase: SimpleCovariance, CovarianceEstimator + using StatsBase: SimpleCovariance, CovarianceEstimator, pairwise, pairwise! import Statistics: mean, var, cov, covm import Base: length, size, show, dump - import StatsBase: RegressionModel, fit, predict, ConvergenceException, dof, coef, weights + import StatsBase: RegressionModel, fit, predict, ConvergenceException, dof, coef, weights, pairwise import SparseArrays import LinearAlgebra: eigvals diff --git a/src/cmds.jl b/src/cmds.jl index 63e5dff..9895e70 100644 --- a/src/cmds.jl +++ b/src/cmds.jl @@ -95,7 +95,7 @@ function transform(M::MDS{T}, x::AbstractVector{<:Real}; distances=false) where end # get distance matrix - D = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), M.X) + D = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), eachcol(M.X), symmetric=true) d = d.^2 # b = 0.5*(ones(n,n)*d./n - d + D*ones(n,1)./n - ones(n,n)*D*ones(n,1)./n^2) @@ -142,7 +142,7 @@ function fit(::Type{MDS}, X::AbstractMatrix{T}; # get distance matrix and space dimension D, d = if !distances - pairwise((x,y)->norm(x-y), X), size(X,1) + pairwise((x,y)->norm(x-y), eachcol(X), symmetric=true), size(X,1) else X, NaN end @@ -203,8 +203,8 @@ end function stress(M::MDS) # calculate distances if original data was stored - DX = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), M.X) - DY = pairwise((x,y)->norm(x-y), transform(M)) + DX = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), eachcol(M.X), symmetric=true) + DY = pairwise((x,y)->norm(x-y), eachcol(transform(M)), symmetric=true) n = size(DX,1) return sqrt(2*sum((DX - DY).^2)/sum(DX.^2)); end diff --git a/src/common.jl b/src/common.jl index 096fc7b..b54b8eb 100644 --- a/src/common.jl +++ b/src/common.jl @@ -121,31 +121,5 @@ function calcscattermat(Z::DenseMatrix) end # calculate pairwise kernel -function pairwise!(K::AbstractVecOrMat{<:Real}, kernel::Function, - X::AbstractVecOrMat{<:Real}, Y::AbstractVecOrMat{<:Real}; - force_symmetry=false) - n = size(X, 2) - m = size(Y, 2) - for j = 1:m - aj = view(Y, :, j) - for i in (force_symmetry ? j : 1):n - @inbounds K[i, j] = kernel(view(X, :, i), aj)[] - end - force_symmetry && j <= n && for i in 1:(j - 1) - @inbounds K[i, j] = K[j, i] # leveraging the symmetry - end - end - K -end - -pairwise!(K::AbstractVecOrMat{<:Real}, kernel::Function, X::AbstractVecOrMat{<:Real}; kwargs...) = - pairwise!(K, kernel, X, X; force_symmetry=true) - -function pairwise(kernel::Function, X::AbstractVecOrMat{<:Real}, Y::AbstractVecOrMat{<:Real}; kwargs...) - n = size(X, 2) - m = size(Y, 2) - K = similar(X, n, m) - pairwise!(K, kernel, X, Y; kwargs...) -end - -pairwise(kernel::Function, X::AbstractVecOrMat{<:Real}) = pairwise(kernel, X, X; force_symmetry=true) +pairwise(kernel::Function, X::AbstractMatrix, x::AbstractVector; kwargs...) = + [kernel(x,y) for y in eachcol(X)] diff --git a/src/kpca.jl b/src/kpca.jl index eb94dce..108e30e 100644 --- a/src/kpca.jl +++ b/src/kpca.jl @@ -50,7 +50,7 @@ principalvars(M::KernelPCA) = M.λ """Calculate transformation to kernel space""" function transform(M::KernelPCA, x::AbstractVecOrMat{<:Real}) - k = pairwise(M.ker, M.X, x) + k = pairwise(M.ker, eachcol(M.X), eachcol(x)) transform!(M.center, k) return projection(M)'*k end @@ -63,7 +63,7 @@ function reconstruct(M::KernelPCA, y::AbstractVecOrMat{<:Real}) throw(ArgumentError("Inverse transformation coefficients are not available, set `inverse` parameter when fitting data")) end Pᵗ = M.α' .* sqrt.(M.λ) - k = pairwise(M.ker, Pᵗ, y) + k = pairwise(M.ker, eachcol(Pᵗ), eachcol(y)) return M.inv*k end @@ -88,7 +88,7 @@ function fit(::Type{KernelPCA}, X::AbstractMatrix{T}; # set kernel function if available K = if isa(kernel, Function) - pairwise(kernel, X) + pairwise(kernel, eachcol(X), symmetric=true) elseif kernel === nothing @assert issymmetric(X) "Precomputed kernel matrix must be symmetric." inverse = false @@ -126,7 +126,7 @@ function fit(::Type{KernelPCA}, X::AbstractMatrix{T}; Q = zeros(T, 0, 0) if inverse Pᵗ = α' .* sqrt.(λ) - KT = pairwise(kernel, Pᵗ) + KT = pairwise(kernel, eachcol(Pᵗ), symmetric=true) Q = (KT + diagm(0 => fill(β, size(KT,1)))) \ X' end diff --git a/test/cmds.jl b/test/cmds.jl index 895deb7..77531f0 100644 --- a/test/cmds.jl +++ b/test/cmds.jl @@ -9,7 +9,7 @@ using Test n = 10 X0 = randn(d, n) G0 = X0'X0 - D0 = MultivariateStats.pairwise((x,y)->norm(x-y), X0) + D0 = MultivariateStats.pairwise((x,y)->norm(x-y), eachcol(X0), symmetric=true) ## conversion between dmat and gram @@ -35,7 +35,7 @@ using Test X = transform(M) @test size(X) == (3,n) - @test MultivariateStats.pairwise((x,y)->norm(x-y), X) ≈ D0 + @test MultivariateStats.pairwise((x,y)->norm(x-y), eachcol(X0), symmetric=true) ≈ D0 @test_throws DimensionMismatch transform(M, rand(d+1)) y = transform(M, X0[:, 1]) @@ -49,11 +49,11 @@ using Test X = transform(M) @test size(X) == (3,n) - @test MultivariateStats.pairwise((x,y)->norm(x-y), X) ≈ D0 + @test MultivariateStats.pairwise((x,y)->norm(x-y), eachcol(X0), symmetric=true) ≈ D0 @test_throws AssertionError transform(M, X0[:, 1]) @test_throws DimensionMismatch transform(M, rand(d+1); distances = true) - d = MultivariateStats.pairwise((x,y)->norm(x-y), X0, X0[:,2]) |> vec + d = MultivariateStats.pairwise((x,y)->norm(x-y), X0, X0[:,2]) #|> vec y = transform(M, d, distances=true) @test X[:, 2] ≈ y @@ -95,18 +95,18 @@ using Test M = fit(MDS, sqrt.(D), maxoutdim=2, distances=true) X = transform(M) - @test D ≈ MultivariateStats.pairwise((x,y)->sum(abs2, x-y), X) + @test D ≈ MultivariateStats.pairwise((x,y)->sum(abs2, x-y), eachcol(X), symmetric=true) @test eltype(X) == Float32 a = Float32[0.5, 0.5, 0.5, 0.5] A = vcat(hcat(D, a), hcat(a', zeros(Float32, 1, 1))) M⁺ = fit(MDS, sqrt.(A), maxoutdim=2, distances=true) X⁺ = transform(M⁺) - @test A ≈ MultivariateStats.pairwise((x,y)->sum(abs2, x-y), X⁺) + @test A ≈ MultivariateStats.pairwise((x,y)->sum(abs2, x-y), eachcol(X⁺), symmetric=true) y = transform(M, a, distances=true) Y = [X y] - @test A ≈ MultivariateStats.pairwise((x,y)->sum(abs2, x-y), Y) + @test A ≈ MultivariateStats.pairwise((x,y)->sum(abs2, x-y), eachcol(Y), symmetric=true) @test eltype(Y) == Float32 # different input types diff --git a/test/kpca.jl b/test/kpca.jl index cf633c7..fe86bd8 100644 --- a/test/kpca.jl +++ b/test/kpca.jl @@ -34,29 +34,10 @@ import Random end # kernel calculations - ker1 = (x,y)->x'y - ker2 = (x,y)->norm(x-y) - - K = MultivariateStats.pairwise(ker1, X) - @test size(K) == (n,n) - @test K[1,2] == K[2,1] - - K = MultivariateStats.pairwise(ker1, X, X.+1) - @test size(K) == (n,n) - @test K[1,2] != K[2,1] - - K = MultivariateStats.pairwise(ker2, X, X[:,1:2]) - @test size(K) == (n, 2) - @test K[1,1] == 0 - @test K[3,2] == norm(X[:,3] - X[:,2]) - - K = MultivariateStats.pairwise(ker2, X[:,1:3], X) - @test size(K) == (3, n) - @test K[1,1] == 0 - @test K[3,2] == norm(X[:,2] - X[:,3]) + ker = (x,y)->norm(x-y) K = similar(X, n, n) - MultivariateStats.pairwise!(K, ker2, X) + MultivariateStats.pairwise!(ker, K, eachcol(X)) @test size(K) == (n, n) @test K[1,1] == 0 @test K[2,1] == norm(X[:,2] - X[:,1]) @@ -65,7 +46,7 @@ import Random Iₙ = ones(n,n)/n @test MultivariateStats.transform!(KC, copy(K)) ≈ K - Iₙ*K - K*Iₙ + Iₙ*K*Iₙ - K = MultivariateStats.pairwise(ker2, X, X[:,1]) + K = MultivariateStats.pairwise(ker, X, X[:,1])[:,1:1] @test size(K) == (n, 1) @test K[1,1] == 0 @test K[2,1] == norm(X[:,2] - X[:,1]) @@ -99,7 +80,7 @@ import Random M = fit(KernelPCA, X, inverse=true) @test all(isapprox.(reconstruct(M, transform(M)), X, atol=0.75)) - # use rbf kernel + # use RBF kernel γ = 10. rbf=(x,y)->exp(-γ*norm(x-y)^2.0) M = fit(KernelPCA, X, kernel=rbf) @@ -107,7 +88,7 @@ import Random @test outdim(M) == d # use precomputed kernel - K = MultivariateStats.pairwise((x,y)->x'*y, X) + K = MultivariateStats.pairwise((x,y)->x'*y, eachcol(X), symmetric=true) @test_throws AssertionError fit(KernelPCA, rand(1,10), kernel=nothing) # symmetric kernel M = fit(KernelPCA, K, maxoutdim = 5, kernel=nothing, inverse=true) # use precomputed kernel M2 = fit(PCA, X, method=:cov, pratio=1.0) @@ -128,7 +109,7 @@ import Random @test indim(MM) == d @test outdim(MM) == d - @test eltype(transform(MM, X[:,1])) == Float32 + @test eltype(transform(MM, XX[:,1])) == Float32 for func in (projection, principalvars) @test eltype(func(M)) == Float64 From ed349d9aa6e12e7039170a7862f79fe737d933a3 Mon Sep 17 00:00:00 2001 From: Art Wild Date: Mon, 31 May 2021 22:32:19 -0400 Subject: [PATCH 3/3] bump julia verison compat --- .github/workflows/ci.yml | 2 +- Project.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 723e2c5..dc395cd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: fail-fast: false matrix: version: - - '1.0' + - '1.1' - '1' # automatically expands to the latest stable 1.x release of Julia - 'nightly' os: diff --git a/Project.toml b/Project.toml index d9fa1e2..26cf49d 100644 --- a/Project.toml +++ b/Project.toml @@ -16,7 +16,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] Arpack = "0.3, 0.4, 0.5" StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33" -julia = "1" +julia = "1.1" [extras] Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"