Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed pairwise for non-symmetric kernels #148

Merged
merged 3 commits into from
Jun 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.0'
- '1.1'
- '1' # automatically expands to the latest stable 1.x release of Julia
- 'nightly'
os:
Expand Down
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[compat]
Arpack = "0.3, 0.4"
Arpack = "0.3, 0.4, 0.5"
StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33"
julia = "1"
julia = "1.1"

[extras]
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Expand Down
4 changes: 2 additions & 2 deletions src/MultivariateStats.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
module MultivariateStats
using LinearAlgebra
using StatsBase: SimpleCovariance, CovarianceEstimator
using StatsBase: SimpleCovariance, CovarianceEstimator, pairwise, pairwise!
import Statistics: mean, var, cov, covm
import Base: length, size, show, dump
import StatsBase: RegressionModel, fit, predict, ConvergenceException, dof, coef, weights
import StatsBase: RegressionModel, fit, predict, ConvergenceException, dof, coef, weights, pairwise
import SparseArrays
import LinearAlgebra: eigvals

Expand Down
8 changes: 4 additions & 4 deletions src/cmds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ function transform(M::MDS{T}, x::AbstractVector{<:Real}; distances=false) where
end

# get distance matrix
D = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), M.X)
D = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), eachcol(M.X), symmetric=true)
d = d.^2

# b = 0.5*(ones(n,n)*d./n - d + D*ones(n,1)./n - ones(n,n)*D*ones(n,1)./n^2)
Expand Down Expand Up @@ -142,7 +142,7 @@ function fit(::Type{MDS}, X::AbstractMatrix{T};

# get distance matrix and space dimension
D, d = if !distances
pairwise((x,y)->norm(x-y), X), size(X,1)
pairwise((x,y)->norm(x-y), eachcol(X), symmetric=true), size(X,1)
else
X, NaN
end
Expand Down Expand Up @@ -203,8 +203,8 @@ end

function stress(M::MDS)
# calculate distances if original data was stored
DX = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), M.X)
DY = pairwise((x,y)->norm(x-y), transform(M))
DX = isnan(M.d) ? M.X : pairwise((x,y)->norm(x-y), eachcol(M.X), symmetric=true)
DY = pairwise((x,y)->norm(x-y), eachcol(transform(M)), symmetric=true)
n = size(DX,1)
return sqrt(2*sum((DX - DY).^2)/sum(DX.^2));
end
29 changes: 2 additions & 27 deletions src/common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -121,30 +121,5 @@ function calcscattermat(Z::DenseMatrix)
end

# calculate pairwise kernel
function pairwise!(K::AbstractVecOrMat{<:Real}, kernel::Function,
X::AbstractVecOrMat{<:Real}, Y::AbstractVecOrMat{<:Real})
n = size(X, 2)
m = size(Y, 2)
for j = 1:m
aj = view(Y, :, j)
for i in j:n
@inbounds K[i, j] = kernel(view(X, :, i), aj)[]
end
j <= n && for i in 1:(j - 1)
@inbounds K[i, j] = K[j, i] # leveraging the symmetry
end
end
K
end

pairwise!(K::AbstractVecOrMat{<:Real}, kernel::Function, X::AbstractVecOrMat{<:Real}) =
pairwise!(K, kernel, X, X)

function pairwise(kernel::Function, X::AbstractVecOrMat{<:Real}, Y::AbstractVecOrMat{<:Real})
n = size(X, 2)
m = size(Y, 2)
K = similar(X, n, m)
pairwise!(K, kernel, X, Y)
end

pairwise(kernel::Function, X::AbstractVecOrMat{<:Real}) = pairwise(kernel, X, X)
pairwise(kernel::Function, X::AbstractMatrix, x::AbstractVector; kwargs...) =
[kernel(x,y) for y in eachcol(X)]
8 changes: 4 additions & 4 deletions src/kpca.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ principalvars(M::KernelPCA) = M.λ

"""Calculate transformation to kernel space"""
function transform(M::KernelPCA, x::AbstractVecOrMat{<:Real})
k = pairwise(M.ker, M.X, x)
k = pairwise(M.ker, eachcol(M.X), eachcol(x))
transform!(M.center, k)
return projection(M)'*k
end
Expand All @@ -63,7 +63,7 @@ function reconstruct(M::KernelPCA, y::AbstractVecOrMat{<:Real})
throw(ArgumentError("Inverse transformation coefficients are not available, set `inverse` parameter when fitting data"))
end
Pᵗ = M.α' .* sqrt.(M.λ)
k = pairwise(M.ker, Pᵗ, y)
k = pairwise(M.ker, eachcol(Pᵗ), eachcol(y))
return M.inv*k
end

Expand All @@ -88,7 +88,7 @@ function fit(::Type{KernelPCA}, X::AbstractMatrix{T};

# set kernel function if available
K = if isa(kernel, Function)
pairwise(kernel, X)
pairwise(kernel, eachcol(X), symmetric=true)
elseif kernel === nothing
@assert issymmetric(X) "Precomputed kernel matrix must be symmetric."
inverse = false
Expand Down Expand Up @@ -126,7 +126,7 @@ function fit(::Type{KernelPCA}, X::AbstractMatrix{T};
Q = zeros(T, 0, 0)
if inverse
Pᵗ = α' .* sqrt.(λ)
KT = pairwise(kernel, Pᵗ)
KT = pairwise(kernel, eachcol(Pᵗ), symmetric=true)
Q = (KT + diagm(0 => fill(β, size(KT,1)))) \ X'
end

Expand Down
14 changes: 7 additions & 7 deletions test/cmds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ using Test
n = 10
X0 = randn(d, n)
G0 = X0'X0
D0 = MultivariateStats.pairwise((x,y)->norm(x-y), X0)
D0 = MultivariateStats.pairwise((x,y)->norm(x-y), eachcol(X0), symmetric=true)

## conversion between dmat and gram

Expand All @@ -35,7 +35,7 @@ using Test

X = transform(M)
@test size(X) == (3,n)
@test MultivariateStats.pairwise((x,y)->norm(x-y), X) ≈ D0
@test MultivariateStats.pairwise((x,y)->norm(x-y), eachcol(X0), symmetric=true) ≈ D0

@test_throws DimensionMismatch transform(M, rand(d+1))
y = transform(M, X0[:, 1])
Expand All @@ -49,11 +49,11 @@ using Test

X = transform(M)
@test size(X) == (3,n)
@test MultivariateStats.pairwise((x,y)->norm(x-y), X) ≈ D0
@test MultivariateStats.pairwise((x,y)->norm(x-y), eachcol(X0), symmetric=true) ≈ D0

@test_throws AssertionError transform(M, X0[:, 1])
@test_throws DimensionMismatch transform(M, rand(d+1); distances = true)
d = MultivariateStats.pairwise((x,y)->norm(x-y), X0, X0[:,2]) |> vec
d = MultivariateStats.pairwise((x,y)->norm(x-y), X0, X0[:,2]) #|> vec
y = transform(M, d, distances=true)
@test X[:, 2] ≈ y

Expand Down Expand Up @@ -95,18 +95,18 @@ using Test

M = fit(MDS, sqrt.(D), maxoutdim=2, distances=true)
X = transform(M)
@test D ≈ MultivariateStats.pairwise((x,y)->sum(abs2, x-y), X)
@test D ≈ MultivariateStats.pairwise((x,y)->sum(abs2, x-y), eachcol(X), symmetric=true)
@test eltype(X) == Float32

a = Float32[0.5, 0.5, 0.5, 0.5]
A = vcat(hcat(D, a), hcat(a', zeros(Float32, 1, 1)))
M⁺ = fit(MDS, sqrt.(A), maxoutdim=2, distances=true)
X⁺ = transform(M⁺)
@test A ≈ MultivariateStats.pairwise((x,y)->sum(abs2, x-y), X⁺)
@test A ≈ MultivariateStats.pairwise((x,y)->sum(abs2, x-y), eachcol(X⁺), symmetric=true)

y = transform(M, a, distances=true)
Y = [X y]
@test A ≈ MultivariateStats.pairwise((x,y)->sum(abs2, x-y), Y)
@test A ≈ MultivariateStats.pairwise((x,y)->sum(abs2, x-y), eachcol(Y), symmetric=true)
@test eltype(Y) == Float32

# different input types
Expand Down
20 changes: 6 additions & 14 deletions test/kpca.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,10 @@ import Random
end

# kernel calculations
K = MultivariateStats.pairwise((x,y)->norm(x-y), X, X[:,1:2])
@test size(K) == (n, 2)
@test K[1,1] == 0
@test K[3,2] == norm(X[:,3] - X[:,2])

K = MultivariateStats.pairwise((x,y)->norm(x-y), X[:,1:3], X)
@test size(K) == (3, n)
@test K[1,1] == 0
@test K[3,2] == norm(X[:,2] - X[:,3])
ker = (x,y)->norm(x-y)

K = similar(X, n, n)
MultivariateStats.pairwise!(K, (x,y)->norm(x-y), X)
MultivariateStats.pairwise!(ker, K, eachcol(X))
@test size(K) == (n, n)
@test K[1,1] == 0
@test K[2,1] == norm(X[:,2] - X[:,1])
Expand All @@ -54,7 +46,7 @@ import Random
Iₙ = ones(n,n)/n
@test MultivariateStats.transform!(KC, copy(K)) ≈ K - Iₙ*K - K*Iₙ + Iₙ*K*Iₙ

K = MultivariateStats.pairwise((x,y)->norm(x-y), X, X[:,1])
K = MultivariateStats.pairwise(ker, X, X[:,1])[:,1:1]
@test size(K) == (n, 1)
@test K[1,1] == 0
@test K[2,1] == norm(X[:,2] - X[:,1])
Expand Down Expand Up @@ -88,15 +80,15 @@ import Random
M = fit(KernelPCA, X, inverse=true)
@test all(isapprox.(reconstruct(M, transform(M)), X, atol=0.75))

# use rbf kernel
# use RBF kernel
γ = 10.
rbf=(x,y)->exp(-γ*norm(x-y)^2.0)
M = fit(KernelPCA, X, kernel=rbf)
@test indim(M) == d
@test outdim(M) == d

# use precomputed kernel
K = MultivariateStats.pairwise((x,y)->x'*y, X)
K = MultivariateStats.pairwise((x,y)->x'*y, eachcol(X), symmetric=true)
@test_throws AssertionError fit(KernelPCA, rand(1,10), kernel=nothing) # symmetric kernel
M = fit(KernelPCA, K, maxoutdim = 5, kernel=nothing, inverse=true) # use precomputed kernel
M2 = fit(PCA, X, method=:cov, pratio=1.0)
Expand All @@ -117,7 +109,7 @@ import Random

@test indim(MM) == d
@test outdim(MM) == d
@test eltype(transform(MM, X[:,1])) == Float32
@test eltype(transform(MM, XX[:,1])) == Float32

for func in (projection, principalvars)
@test eltype(func(M)) == Float64
Expand Down