Skip to content

Commit

Permalink
Merge pull request #8 from KrishnaswamyLab/dev
Browse files Browse the repository at this point in the history
Fix data_pca
  • Loading branch information
stanleyjs authored Jun 7, 2018
2 parents 6ea6270 + 7422c97 commit 6a28922
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 105 deletions.
2 changes: 1 addition & 1 deletion graphtools/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def Graph(data,
elif len(parent_classes) == 2:
class Graph(parent_classes[0], parent_classes[1]):
pass
elif len(parent_classes) == 2:
elif len(parent_classes) == 3:
class Graph(parent_classes[0], parent_classes[1], parent_classes[2]):
pass
else:
Expand Down
96 changes: 13 additions & 83 deletions graphtools/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
import numpy as np
import abc
import pygsp
from sklearn.decomposition import PCA
from sklearn.utils.extmath import randomized_svd
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.preprocessing import normalize
from scipy import sparse
import warnings
Expand Down Expand Up @@ -58,14 +57,8 @@ class Data(Base):
data_nu : array-like, shape=[n_samples,n_pca]
Reduced data matrix
U : array-like, shape=[n_samples, n_pca]
Left singular vectors from PCA calculation
S : array-like, shape=[n_pca]
Singular values from PCA calculation
V : array-like, shape=[n_features, n_pca]
Right singular vectors from SVD calculation
data_pca : sklearn.decomposition.PCA or sklearn.decomposition.TruncatedSVD
sklearn PCA operator
"""

def __init__(self, data, n_pca=None, random_state=None, **kwargs):
Expand All @@ -90,7 +83,6 @@ def __init__(self, data, n_pca=None, random_state=None, **kwargs):
self.data = data
self.n_pca = n_pca
self.random_state = random_state

self.data_nu = self._reduce_data()
super().__init__(**kwargs)

Expand All @@ -108,17 +100,16 @@ def _reduce_data(self):
if self.n_pca is not None and self.n_pca < self.data.shape[1]:
log_start("PCA")
if sparse.issparse(self.data):
_, _, VT = randomized_svd(self.data, self.n_pca,
self.data_pca = TruncatedSVD(self.n_pca,
random_state=self.random_state)
V = VT.T
self._right_singular_vectors = V
data_nu = self.data.dot(V)
self.data_pca.fit(self.data)
data_nu = self.data_pca.transform(self.data)
else:
self.pca = PCA(self.n_pca,
self.data_pca = PCA(self.n_pca,
svd_solver='randomized',
random_state=self.random_state)
self.pca.fit(self.data)
data_nu = self.pca.transform(self.data)
self.data_pca.fit(self.data)
data_nu = self.data_pca.transform(self.data)
log_complete("PCA")
return data_nu
else:
Expand Down Expand Up @@ -153,58 +144,6 @@ def set_params(self, **params):
self.random_state = params['random_state']
return self

@property
def U(self):
"""Left singular vectors
Returns
-------
Left singular vectors from PCA calculation, shape=[n_samples, n_pca]
Raises
------
AttributeError : PCA was not performed
"""
try:
return self.pca.components_
except AttributeError:
return None

@property
def S(self):
"""Singular values
Returns
-------
Singular values from PCA calculation, shape=[n_pca]
Raises
------
AttributeError : PCA was not performed
"""
try:
return self.pca.singular_values_
except AttributeError:
return None

@property
def V(self):
"""Right singular vectors
TODO: can we get this from PCA as well?
Returns
-------
Right singular values from SVD calculation, shape=[n_features, n_pca]
Raises
------
AttributeError : SVD was not performed
"""
try:
return self._right_singular_vectors
except AttributeError:
return None

def transform(self, Y):
"""Transform input data `Y` to reduced data space defined by `self.data`
Expand All @@ -227,13 +166,9 @@ def transform(self, Y):
"""
try:
# try PCA first
return self.pca.transform(Y)
except AttributeError:
# no PCA - try SVD instead
try:
return Y.dot(self._right_singular_vectors)
except AttributeError:
# no SVD either - check if we can just return as is

return self.data_pca.transform(Y)
except AttributeError: #no pca, try to return data
try:
if Y.shape[1] != self.data.shape[1]:
# shape is wrong
Expand Down Expand Up @@ -269,13 +204,8 @@ def inverse_transform(self, Y):
"""
try:
# try PCA first
return self.pca.inverse_transform(Y)
return self.data_pca.inverse_transform(Y)
except AttributeError:
# no PCA - try SVD instead
try:
return Y.dot(self._right_singular_vectors.T)
except AttributeError:
# no SVD either - check if we can just return as is
try:
if Y.shape[1] != self.data_nu.shape[1]:
# shape is wrong
Expand Down
14 changes: 1 addition & 13 deletions graphtools/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,19 +172,7 @@ def build_kernel(self):
symmetric matrix with ones down the diagonal
with no non-negative entries.
"""
if self.decay is None or self.thresh == 1:
# binary connectivity matrix
# sklearn has a function for this
log_start("KNN search")
K = kneighbors_graph(self.knn_tree,
n_neighbors=self.knn,
metric=self.distance,
mode='connectivity',
include_self=True)
log_complete("KNN search")
else:
# sparse fast alpha decay
K = self.build_kernel_to_data(self.data_nu)
K = self.build_kernel_to_data(self.data_nu)
# symmetrize
K = (K + K.T) / 2
return K
Expand Down
8 changes: 0 additions & 8 deletions test/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,6 @@ def test_inverse_transform_dense_no_pca():
assert_raises(ValueError, G.inverse_transform, G.data[:, :15])


def test_inverse_transform_sparse_pca():
G = build_graph(data, sparse=True, n_pca=data.shape[1] - 1)
assert(np.allclose(G.data.toarray(), G.inverse_transform(G.data_nu)))
assert_raises(ValueError, G.inverse_transform, sp.csr_matrix(G.data)[:, 0])
assert_raises(ValueError, G.inverse_transform,
sp.csr_matrix(G.data)[:, :15])


def test_inverse_transform_sparse_no_pca():
G = build_graph(data, sparse=True, n_pca=None)
assert(np.sum(G.data != G.inverse_transform(G.data_nu)) == 0)
Expand Down

0 comments on commit 6a28922

Please sign in to comment.