Skip to content

Commit

Permalink
use StatsBase.pairwise
Browse files Browse the repository at this point in the history
  • Loading branch information
wildart committed Jun 1, 2021
1 parent c8535c1 commit cdcec59
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 71 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[compat]
Arpack = "0.3, 0.4"
Arpack = "0.3, 0.4, 0.5"
StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33"
julia = "1"

Expand Down
4 changes: 2 additions & 2 deletions src/MultivariateStats.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
module MultivariateStats
using LinearAlgebra
using StatsBase: SimpleCovariance, CovarianceEstimator
using StatsBase: SimpleCovariance, CovarianceEstimator, pairwise, pairwise!
import Statistics: mean, var, cov, covm
import Base: length, size, show, dump
import StatsBase: RegressionModel, fit, predict, ConvergenceException, dof, coef, weights
import StatsBase: RegressionModel, fit, predict, ConvergenceException, dof, coef, weights, pairwise
import SparseArrays
import LinearAlgebra: eigvals

Expand Down
8 changes: 4 additions & 4 deletions src/cmds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ function transform(M::MDS{T}, x::AbstractVector{<:Real}; distances=false) where
end

# get distance matrix
D = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), M.X)
D = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), eachcol(M.X), symmetric=true)
d = d.^2

# b = 0.5*(ones(n,n)*d./n - d + D*ones(n,1)./n - ones(n,n)*D*ones(n,1)./n^2)
Expand Down Expand Up @@ -142,7 +142,7 @@ function fit(::Type{MDS}, X::AbstractMatrix{T};

# get distance matrix and space dimension
D, d = if !distances
pairwise((x,y)->norm(x-y), X), size(X,1)
pairwise((x,y)->norm(x-y), eachcol(X), symmetric=true), size(X,1)
else
X, NaN
end
Expand Down Expand Up @@ -203,8 +203,8 @@ end

function stress(M::MDS)
# calculate distances if original data was stored
DX = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), M.X)
DY = pairwise((x,y)->norm(x-y), transform(M))
DX = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), eachcol(M.X), symmetric=true)
DY = pairwise((x,y)->norm(x-y), eachcol(transform(M)), symmetric=true)
n = size(DX,1)
return sqrt(2*sum((DX - DY).^2)/sum(DX.^2));
end
30 changes: 2 additions & 28 deletions src/common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -121,31 +121,5 @@ function calcscattermat(Z::DenseMatrix)
end

# calculate pairwise kernel
function pairwise!(K::AbstractVecOrMat{<:Real}, kernel::Function,
X::AbstractVecOrMat{<:Real}, Y::AbstractVecOrMat{<:Real};
force_symmetry=false)
n = size(X, 2)
m = size(Y, 2)
for j = 1:m
aj = view(Y, :, j)
for i in (force_symmetry ? j : 1):n
@inbounds K[i, j] = kernel(view(X, :, i), aj)[]
end
force_symmetry && j <= n && for i in 1:(j - 1)
@inbounds K[i, j] = K[j, i] # leveraging the symmetry
end
end
K
end

pairwise!(K::AbstractVecOrMat{<:Real}, kernel::Function, X::AbstractVecOrMat{<:Real}; kwargs...) =
pairwise!(K, kernel, X, X; force_symmetry=true)

function pairwise(kernel::Function, X::AbstractVecOrMat{<:Real}, Y::AbstractVecOrMat{<:Real}; kwargs...)
n = size(X, 2)
m = size(Y, 2)
K = similar(X, n, m)
pairwise!(K, kernel, X, Y; kwargs...)
end

pairwise(kernel::Function, X::AbstractVecOrMat{<:Real}) = pairwise(kernel, X, X; force_symmetry=true)
pairwise(kernel::Function, X::AbstractMatrix, x::AbstractVector; kwargs...) =
[kernel(x,y) for y in eachcol(X)]
8 changes: 4 additions & 4 deletions src/kpca.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ principalvars(M::KernelPCA) = M.λ

"""Calculate transformation to kernel space"""
function transform(M::KernelPCA, x::AbstractVecOrMat{<:Real})
k = pairwise(M.ker, M.X, x)
k = pairwise(M.ker, eachcol(M.X), eachcol(x))
transform!(M.center, k)
return projection(M)'*k
end
Expand All @@ -63,7 +63,7 @@ function reconstruct(M::KernelPCA, y::AbstractVecOrMat{<:Real})
throw(ArgumentError("Inverse transformation coefficients are not available, set `inverse` parameter when fitting data"))
end
Pᵗ = M.α' .* sqrt.(M.λ)
k = pairwise(M.ker, Pᵗ, y)
k = pairwise(M.ker, eachcol(Pᵗ), eachcol(y))
return M.inv*k
end

Expand All @@ -88,7 +88,7 @@ function fit(::Type{KernelPCA}, X::AbstractMatrix{T};

# set kernel function if available
K = if isa(kernel, Function)
pairwise(kernel, X)
pairwise(kernel, eachcol(X), symmetric=true)
elseif kernel === nothing
@assert issymmetric(X) "Precomputed kernel matrix must be symmetric."
inverse = false
Expand Down Expand Up @@ -126,7 +126,7 @@ function fit(::Type{KernelPCA}, X::AbstractMatrix{T};
Q = zeros(T, 0, 0)
if inverse
Pᵗ = α' .* sqrt.(λ)
KT = pairwise(kernel, Pᵗ)
KT = pairwise(kernel, eachcol(Pᵗ), symmetric=true)
Q = (KT + diagm(0 => fill(β, size(KT,1)))) \ X'
end

Expand Down
14 changes: 7 additions & 7 deletions test/cmds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ using Test
n = 10
X0 = randn(d, n)
G0 = X0'X0
D0 = MultivariateStats.pairwise((x,y)->norm(x-y), X0)
D0 = MultivariateStats.pairwise((x,y)->norm(x-y), eachcol(X0), symmetric=true)

## conversion between dmat and gram

Expand All @@ -35,7 +35,7 @@ using Test

X = transform(M)
@test size(X) == (3,n)
@test MultivariateStats.pairwise((x,y)->norm(x-y), X) D0
@test MultivariateStats.pairwise((x,y)->norm(x-y), eachcol(X0), symmetric=true) D0

@test_throws DimensionMismatch transform(M, rand(d+1))
y = transform(M, X0[:, 1])
Expand All @@ -49,11 +49,11 @@ using Test

X = transform(M)
@test size(X) == (3,n)
@test MultivariateStats.pairwise((x,y)->norm(x-y), X) D0
@test MultivariateStats.pairwise((x,y)->norm(x-y), eachcol(X0), symmetric=true) D0

@test_throws AssertionError transform(M, X0[:, 1])
@test_throws DimensionMismatch transform(M, rand(d+1); distances = true)
d = MultivariateStats.pairwise((x,y)->norm(x-y), X0, X0[:,2]) |> vec
d = MultivariateStats.pairwise((x,y)->norm(x-y), X0, X0[:,2]) #|> vec
y = transform(M, d, distances=true)
@test X[:, 2] y

Expand Down Expand Up @@ -95,18 +95,18 @@ using Test

M = fit(MDS, sqrt.(D), maxoutdim=2, distances=true)
X = transform(M)
@test D MultivariateStats.pairwise((x,y)->sum(abs2, x-y), X)
@test D MultivariateStats.pairwise((x,y)->sum(abs2, x-y), eachcol(X), symmetric=true)
@test eltype(X) == Float32

a = Float32[0.5, 0.5, 0.5, 0.5]
A = vcat(hcat(D, a), hcat(a', zeros(Float32, 1, 1)))
M⁺ = fit(MDS, sqrt.(A), maxoutdim=2, distances=true)
X⁺ = transform(M⁺)
@test A MultivariateStats.pairwise((x,y)->sum(abs2, x-y), X⁺)
@test A MultivariateStats.pairwise((x,y)->sum(abs2, x-y), eachcol(X⁺), symmetric=true)

y = transform(M, a, distances=true)
Y = [X y]
@test A MultivariateStats.pairwise((x,y)->sum(abs2, x-y), Y)
@test A MultivariateStats.pairwise((x,y)->sum(abs2, x-y), eachcol(Y), symmetric=true)
@test eltype(Y) == Float32

# different input types
Expand Down
31 changes: 6 additions & 25 deletions test/kpca.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,29 +34,10 @@ import Random
end

# kernel calculations
ker1 = (x,y)->x'y
ker2 = (x,y)->norm(x-y)

K = MultivariateStats.pairwise(ker1, X)
@test size(K) == (n,n)
@test K[1,2] == K[2,1]

K = MultivariateStats.pairwise(ker1, X, X.+1)
@test size(K) == (n,n)
@test K[1,2] != K[2,1]

K = MultivariateStats.pairwise(ker2, X, X[:,1:2])
@test size(K) == (n, 2)
@test K[1,1] == 0
@test K[3,2] == norm(X[:,3] - X[:,2])

K = MultivariateStats.pairwise(ker2, X[:,1:3], X)
@test size(K) == (3, n)
@test K[1,1] == 0
@test K[3,2] == norm(X[:,2] - X[:,3])
ker = (x,y)->norm(x-y)

K = similar(X, n, n)
MultivariateStats.pairwise!(K, ker2, X)
MultivariateStats.pairwise!(ker, K, eachcol(X))
@test size(K) == (n, n)
@test K[1,1] == 0
@test K[2,1] == norm(X[:,2] - X[:,1])
Expand All @@ -65,7 +46,7 @@ import Random
Iₙ = ones(n,n)/n
@test MultivariateStats.transform!(KC, copy(K)) K - Iₙ*K - K*Iₙ + Iₙ*K*Iₙ

K = MultivariateStats.pairwise(ker2, X, X[:,1])
K = MultivariateStats.pairwise(ker, X, X[:,1])[:,1:1]
@test size(K) == (n, 1)
@test K[1,1] == 0
@test K[2,1] == norm(X[:,2] - X[:,1])
Expand Down Expand Up @@ -99,15 +80,15 @@ import Random
M = fit(KernelPCA, X, inverse=true)
@test all(isapprox.(reconstruct(M, transform(M)), X, atol=0.75))

# use rbf kernel
# use RBF kernel
γ = 10.
rbf=(x,y)->exp(-γ*norm(x-y)^2.0)
M = fit(KernelPCA, X, kernel=rbf)
@test indim(M) == d
@test outdim(M) == d

# use precomputed kernel
K = MultivariateStats.pairwise((x,y)->x'*y, X)
K = MultivariateStats.pairwise((x,y)->x'*y, eachcol(X), symmetric=true)
@test_throws AssertionError fit(KernelPCA, rand(1,10), kernel=nothing) # symmetric kernel
M = fit(KernelPCA, K, maxoutdim = 5, kernel=nothing, inverse=true) # use precomputed kernel
M2 = fit(PCA, X, method=:cov, pratio=1.0)
Expand All @@ -128,7 +109,7 @@ import Random

@test indim(MM) == d
@test outdim(MM) == d
@test eltype(transform(MM, X[:,1])) == Float32
@test eltype(transform(MM, XX[:,1])) == Float32

for func in (projection, principalvars)
@test eltype(func(M)) == Float64
Expand Down

0 comments on commit cdcec59

Please sign in to comment.