Skip to content

Commit

Permalink
use API imports from StatsAPI & pairwise from StatsBase (#184)
Browse files Browse the repository at this point in the history
* use API imports from StatsAPI & `pairwise` from StatsBase

* added `size` with a dimension parameters

* drop `pairwise!` test

* fix eigvals normalization in SVD-PCA (fixes #183)
  • Loading branch information
wildart committed Mar 3, 2022
1 parent 48b2b50 commit 1863cd5
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 24 deletions.
6 changes: 4 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,20 @@ keywords = ["multivariate statistics", "dimensionality reduction"]
license = "MIT"
desc = "A Julia package for multivariate statistics and data analysis"
repository = "https://github.com/JuliaStats/MultivariateStats.jl.git"
version = "0.9.0"
version = "0.9.1"

[deps]
Arpack = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[compat]
Arpack = "0.3, 0.4, 0.5"
StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33"
StatsBase = "~0.33"
StatsAPI = "~1.2"
julia = "1.1"

[extras]
Expand Down
27 changes: 15 additions & 12 deletions src/MultivariateStats.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
module MultivariateStats

using LinearAlgebra
using StatsBase: SimpleCovariance, CovarianceEstimator, RegressionModel,
AbstractDataTransform, pairwise!
using SparseArrays
using Statistics: middle
using StatsAPI: RegressionModel
using StatsBase: SimpleCovariance, CovarianceEstimator, AbstractDataTransform,
ConvergenceException, pairwise, CoefTable

import Statistics: mean, var, cov, covm, cor
import Base: length, size, show, dump
import StatsBase: fit, predict, predict!, ConvergenceException, coef, weights,
dof, pairwise, r2, CoefTable
import SparseArrays
import Base: length, size, show
import StatsAPI: fit, predict, coef, weights, dof, r2
import LinearAlgebra: eigvals, eigvecs

export
Expand Down Expand Up @@ -118,17 +121,17 @@ module MultivariateStats
include("fa.jl")

## deprecations
@deprecate indim(f) size(f)[1]
@deprecate outdim(f) size(f)[2]
@deprecate transform(f, x) predict(f, x) #ex=false
@deprecate indim(f) size(f,1)
@deprecate outdim(f) size(f,2)
@deprecate transform(f, x) predict(f, x)
@deprecate indim(f::Whitening) length(f::Whitening)
@deprecate outdim(f::Whitening) length(f::Whitening)
@deprecate tvar(f::PCA) var(f::PCA)
@deprecate classical_mds(D::AbstractMatrix, p::Int) predict(fit(MDS, D, maxoutdim=p, distances=true))
@deprecate transform(f::MDS) predict(f::MDS)
@deprecate xindim(M::CCA) size(M)[1]
@deprecate yindim(M::CCA) size(M)[2]
@deprecate outdim(M::CCA) size(M)[3]
@deprecate xindim(M::CCA) size(M,1)
@deprecate yindim(M::CCA) size(M,2)
@deprecate outdim(M::CCA) size(M,3)
@deprecate correlations(M::CCA) cor(M)
@deprecate xmean(M::CCA) mean(M, :x)
@deprecate ymean(M::CCA) mean(M, :y)
Expand Down
2 changes: 1 addition & 1 deletion src/cmds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ function predict(M::MDS, x::AbstractVector{T}; distances=false) where {T<:Real}
else
size(x, 1) != size(M.X, 1) && throw(
DimensionMismatch("Points and original data must have same dimensionality."))
pairwise((x,y)->norm(x-y), M.X, x)
pairwise((x,y)->norm(x-y), eachcol(M.X), eachcol(x))
end
end

Expand Down
3 changes: 0 additions & 3 deletions src/common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,3 @@ function calcscattermat(Z::DenseMatrix)
return calcscattermat(SimpleCovariance(), Z)
end

# calculate pairwise kernel
pairwise(kernel::Function, X::AbstractMatrix, x::AbstractVector; kwargs...) =
[kernel(x,y) for y in eachcol(X)]
6 changes: 4 additions & 2 deletions src/pca.jl
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,9 @@ gives the principal components for an observation, and \$\\mathbf{P}\$ is the pr
"""
reconstruct(M::PCA, y::AbstractVecOrMat{T}) where {T<:Real} = decentralize(M.proj * y, M.mean)

## show & dump

## show

function show(io::IO, M::PCA)
idim, odim = size(M)
print(io, "PCA(indim = $idim, outdim = $odim, principalratio = $(r2(M)))")
Expand Down Expand Up @@ -240,7 +242,7 @@ function pcasvd(Z::AbstractMatrix{T}, mean::AbstractVector{T}, n::Real;
v = Svd.S::Vector{T}
U = Svd.U::Matrix{T}
for i = 1:length(v)
@inbounds v[i] = abs2(v[i]) / n
@inbounds v[i] = abs2(v[i]) / (n-1)
end
ord = sortperm(v; rev=true)
vsum = sum(v)
Expand Down
12 changes: 12 additions & 0 deletions src/types.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
abstract type AbstractDimensionalityReduction <: RegressionModel end

"""
size(model::AbstractDimensionalityReduction, d::Int)
Returns the dimension of the input data if `d == 1`, the dimension of the output data
if `d == 2`, otherwise throws error.
"""
function size(model::AbstractDimensionalityReduction, d::Integer)
dims = size(model)
@assert length(dims) >= d "Cannot access dimensional information"
return dims[d]
end

"""
projection(model::AbstractDimensionalityReduction)
Expand Down
2 changes: 1 addition & 1 deletion test/cmds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ using Test

@test_throws AssertionError predict(M, X0[:, 1])
@test_throws DimensionMismatch predict(M, rand(d+1); distances = true)
d = MultivariateStats.pairwise((x,y)->norm(x-y), X0, X0[:,2]) #|> vec
d = MultivariateStats.pairwise((x,y)->norm(x-y), eachcol(X0), eachcol(X0[:,2])) |> vec
y = predict(M, d, distances=true)
@test X[:, 2] y

Expand Down
5 changes: 2 additions & 3 deletions test/kpca.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ import Statistics: mean, cov
# kernel calculations
ker = (x,y)->norm(x-y)

K = similar(X, n, n)
MultivariateStats.pairwise!(ker, K, eachcol(X))
K = MultivariateStats.pairwise(ker, eachcol(X), symmetric=true)
@test size(K) == (n, n)
@test K[1,1] == 0
@test K[2,1] == norm(X[:,2] - X[:,1])
Expand All @@ -46,7 +45,7 @@ import Statistics: mean, cov
Iₙ = ones(n,n)/n
@test MultivariateStats.transform!(KC, copy(K)) K - Iₙ*K - K*Iₙ + Iₙ*K*Iₙ

K = MultivariateStats.pairwise(ker, X, X[:,1])[:,1:1]
K = MultivariateStats.pairwise(ker, eachcol(X), eachcol(X[:,1]))[:,1:1]
@test size(K) == (n, 1)
@test K[1,1] == 0
@test K[2,1] == norm(X[:,2] - X[:,1])
Expand Down
3 changes: 3 additions & 0 deletions test/pca.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ import SparseArrays
M = PCA(Float64[], P, pvars, 15.0)

@test size(M) == (5, 3)
@test size(M,1) == 5
@test size(M,2) == 3
@test_throws AssertionError size(M, 3)
@test mean(M) == zeros(5)
@test projection(M) == P
@test principalvars(M) == pvars
Expand Down

2 comments on commit 1863cd5

@wildart
Copy link
Collaborator Author

@wildart wildart commented on 1863cd5 Mar 3, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/55929

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.9.1 -m "<description of version>" 1863cd56fd6063687e550f2e81a6504583f03e8e
git push origin v0.9.1

Please sign in to comment.