diff --git a/Project.toml b/Project.toml index 2cf97f9..29ffccc 100644 --- a/Project.toml +++ b/Project.toml @@ -4,18 +4,20 @@ keywords = ["multivariate statistics", "dimensionality reduction"] license = "MIT" desc = "A Julia package for multivariate statistics and data analysis" repository = "https://github.com/JuliaStats/MultivariateStats.jl.git" -version = "0.9.0" +version = "0.9.1" [deps] Arpack = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] Arpack = "0.3, 0.4, 0.5" -StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33" +StatsBase = "~0.33" +StatsAPI = "~1.2" julia = "1.1" [extras] diff --git a/src/MultivariateStats.jl b/src/MultivariateStats.jl index 3bc91fa..1f058d4 100644 --- a/src/MultivariateStats.jl +++ b/src/MultivariateStats.jl @@ -1,12 +1,15 @@ module MultivariateStats + using LinearAlgebra - using StatsBase: SimpleCovariance, CovarianceEstimator, RegressionModel, - AbstractDataTransform, pairwise! + using SparseArrays + using Statistics: middle + using StatsAPI: RegressionModel + using StatsBase: SimpleCovariance, CovarianceEstimator, AbstractDataTransform, + ConvergenceException, pairwise, CoefTable + import Statistics: mean, var, cov, covm, cor - import Base: length, size, show, dump - import StatsBase: fit, predict, predict!, ConvergenceException, coef, weights, - dof, pairwise, r2, CoefTable - import SparseArrays + import Base: length, size, show + import StatsAPI: fit, predict, coef, weights, dof, r2 import LinearAlgebra: eigvals, eigvecs export @@ -118,17 +121,17 @@ module MultivariateStats include("fa.jl") ## deprecations - @deprecate indim(f) size(f)[1] - @deprecate outdim(f) size(f)[2] - @deprecate transform(f, x) predict(f, x) #ex=false + @deprecate indim(f) size(f,1) + @deprecate outdim(f) size(f,2) + @deprecate transform(f, x) predict(f, x) @deprecate indim(f::Whitening) length(f::Whitening) @deprecate outdim(f::Whitening) length(f::Whitening) @deprecate tvar(f::PCA) var(f::PCA) @deprecate classical_mds(D::AbstractMatrix, p::Int) predict(fit(MDS, D, maxoutdim=p, distances=true)) @deprecate transform(f::MDS) predict(f::MDS) - @deprecate xindim(M::CCA) size(M)[1] - @deprecate yindim(M::CCA) size(M)[2] - @deprecate outdim(M::CCA) size(M)[3] + @deprecate xindim(M::CCA) size(M,1) + @deprecate yindim(M::CCA) size(M,2) + @deprecate outdim(M::CCA) size(M,3) @deprecate correlations(M::CCA) cor(M) @deprecate xmean(M::CCA) mean(M, :x) @deprecate ymean(M::CCA) mean(M, :y) diff --git a/src/cmds.jl b/src/cmds.jl index 6d6f21f..0c2f82a 100644 --- a/src/cmds.jl +++ b/src/cmds.jl @@ -175,7 +175,7 @@ function predict(M::MDS, x::AbstractVector{T}; distances=false) where {T<:Real} else size(x, 1) != size(M.X, 1) && throw( DimensionMismatch("Points and original data must have same dimensionality.")) - pairwise((x,y)->norm(x-y), M.X, x) + pairwise((x,y)->norm(x-y), eachcol(M.X), eachcol(x)) end end diff --git a/src/common.jl b/src/common.jl index 51becac..b6d23f2 100644 --- a/src/common.jl +++ b/src/common.jl @@ -120,6 +120,3 @@ function calcscattermat(Z::DenseMatrix) return calcscattermat(SimpleCovariance(), Z) end -# calculate pairwise kernel -pairwise(kernel::Function, X::AbstractMatrix, x::AbstractVector; kwargs...) = - [kernel(x,y) for y in eachcol(X)] diff --git a/src/pca.jl b/src/pca.jl index b7d78f4..a1f8495 100644 --- a/src/pca.jl +++ b/src/pca.jl @@ -134,7 +134,9 @@ gives the principal components for an observation, and \$\\mathbf{P}\$ is the pr """ reconstruct(M::PCA, y::AbstractVecOrMat{T}) where {T<:Real} = decentralize(M.proj * y, M.mean) -## show & dump + +## show + function show(io::IO, M::PCA) idim, odim = size(M) print(io, "PCA(indim = $idim, outdim = $odim, principalratio = $(r2(M)))") @@ -240,7 +242,7 @@ function pcasvd(Z::AbstractMatrix{T}, mean::AbstractVector{T}, n::Real; v = Svd.S::Vector{T} U = Svd.U::Matrix{T} for i = 1:length(v) - @inbounds v[i] = abs2(v[i]) / n + @inbounds v[i] = abs2(v[i]) / (n-1) end ord = sortperm(v; rev=true) vsum = sum(v) diff --git a/src/types.jl b/src/types.jl index 6ff1372..dbaecb1 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,5 +1,17 @@ abstract type AbstractDimensionalityReduction <: RegressionModel end +""" + size(model::AbstractDimensionalityReduction, d::Int) + +Returns the dimension of the input data if `d == 1`, the dimension of the output data +if `d == 2`, otherwise throws error. +""" +function size(model::AbstractDimensionalityReduction, d::Integer) + dims = size(model) + @assert length(dims) >= d "Cannot access dimensional information" + return dims[d] +end + """ projection(model::AbstractDimensionalityReduction) diff --git a/test/cmds.jl b/test/cmds.jl index dd22ce0..cdc3127 100644 --- a/test/cmds.jl +++ b/test/cmds.jl @@ -52,7 +52,7 @@ using Test @test_throws AssertionError predict(M, X0[:, 1]) @test_throws DimensionMismatch predict(M, rand(d+1); distances = true) - d = MultivariateStats.pairwise((x,y)->norm(x-y), X0, X0[:,2]) #|> vec + d = MultivariateStats.pairwise((x,y)->norm(x-y), eachcol(X0), eachcol(X0[:,2])) |> vec y = predict(M, d, distances=true) @test X[:, 2] ≈ y diff --git a/test/kpca.jl b/test/kpca.jl index edf03e5..1142706 100644 --- a/test/kpca.jl +++ b/test/kpca.jl @@ -36,8 +36,7 @@ import Statistics: mean, cov # kernel calculations ker = (x,y)->norm(x-y) - K = similar(X, n, n) - MultivariateStats.pairwise!(ker, K, eachcol(X)) + K = MultivariateStats.pairwise(ker, eachcol(X), symmetric=true) @test size(K) == (n, n) @test K[1,1] == 0 @test K[2,1] == norm(X[:,2] - X[:,1]) @@ -46,7 +45,7 @@ import Statistics: mean, cov Iₙ = ones(n,n)/n @test MultivariateStats.transform!(KC, copy(K)) ≈ K - Iₙ*K - K*Iₙ + Iₙ*K*Iₙ - K = MultivariateStats.pairwise(ker, X, X[:,1])[:,1:1] + K = MultivariateStats.pairwise(ker, eachcol(X), eachcol(X[:,1]))[:,1:1] @test size(K) == (n, 1) @test K[1,1] == 0 @test K[2,1] == norm(X[:,2] - X[:,1]) diff --git a/test/pca.jl b/test/pca.jl index db5de3d..5fb1559 100644 --- a/test/pca.jl +++ b/test/pca.jl @@ -24,6 +24,9 @@ import SparseArrays M = PCA(Float64[], P, pvars, 15.0) @test size(M) == (5, 3) + @test size(M,1) == 5 + @test size(M,2) == 3 + @test_throws AssertionError size(M, 3) @test mean(M) == zeros(5) @test projection(M) == P @test principalvars(M) == pvars