diff --git a/.travis.yml b/.travis.yml
index d4accf9..64d5223 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,6 +6,8 @@
sudo: required
+ cache: pip
+
addons:
apt:
packages:
diff --git a/README.rst b/README.rst
index 0b8ccee..7e3dd0b 100644
--- a/README.rst
+++ b/README.rst
@@ -5,6 +5,9 @@ graphtools
.. image:: https://img.shields.io/pypi/v/graphtools.svg
:target: https://pypi.org/project/graphtools/
:alt: Latest PyPi version
+.. image:: https://anaconda.org/conda-forge/tasklogger/badges/version.svg
+ :target: https://anaconda.org/conda-forge/tasklogger/
+ :alt: Latest Conda version
.. image:: https://api.travis-ci.com/KrishnaswamyLab/graphtools.svg?branch=master
:target: https://travis-ci.com/KrishnaswamyLab/graphtools
:alt: Travis CI Build
@@ -28,7 +31,11 @@ Installation
graphtools is available on `pip`. Install by running the following in a terminal::
- pip install --user graphtools
+ pip install --user graphtools
+
+Alternatively, graphtools can be installed using `Conda `_ (most easily obtained via the `Miniconda Python distribution `_)::
+
+ conda install -c conda-forge graphtools
Or, to install the latest version from github::
@@ -45,14 +52,14 @@ The `graphtools.Graph` class provides an all-in-one interface for k-nearest neig
Use it as follows::
- from sklearn import datasets
- import graphtools
- digits = datasets.load_digits()
- G = graphtools.Graph(digits['data'])
- K = G.kernel
- P = G.diff_op
- G = graphtools.Graph(digits['data'], n_landmark=300)
- L = G.landmark_op
+ from sklearn import datasets
+ import graphtools
+ digits = datasets.load_digits()
+ G = graphtools.Graph(digits['data'])
+ K = G.kernel
+ P = G.diff_op
+ G = graphtools.Graph(digits['data'], n_landmark=300)
+ L = G.landmark_op
Help
----
diff --git a/graphtools/__init__.py b/graphtools/__init__.py
index 05d693d..8fc8a50 100644
--- a/graphtools/__init__.py
+++ b/graphtools/__init__.py
@@ -1,2 +1,2 @@
-from .api import Graph
+from .api import Graph, from_igraph
from .version import __version__
diff --git a/graphtools/api.py b/graphtools/api.py
index ede4f39..9e5d31b 100644
--- a/graphtools/api.py
+++ b/graphtools/api.py
@@ -1,6 +1,7 @@
import numpy as np
import warnings
import tasklogger
+from scipy import sparse
from . import base
from . import graphs
@@ -9,14 +10,15 @@
def Graph(data,
n_pca=None,
sample_idx=None,
- adaptive_k='sqrt',
+ adaptive_k=None,
precomputed=None,
knn=5,
decay=10,
+ bandwidth=None,
distance='euclidean',
thresh=1e-4,
kernel_symm='+',
- gamma=None,
+ theta=None,
n_landmark=None,
n_svd=100,
beta=1,
@@ -61,6 +63,11 @@ def Graph(data,
decay : `int` or `None`, optional (default: 10)
Rate of alpha decay to use. If `None`, alpha decay is not used.
+ bandwidth : `float`, list-like or `None`, optional (default: `None`)
+ Fixed bandwidth to use. If given, overrides `knn`. Can be a single
+ bandwidth or a list-like (shape=[n_samples]) of bandwidths for each
+ sample.
+
distance : `str`, optional (default: `'euclidean'`)
Any metric from `scipy.spatial.distance` can be used
distance metric for building kNN graph.
@@ -75,12 +82,12 @@ def Graph(data,
Defines method of MNN symmetrization.
'+' : additive
'*' : multiplicative
- 'gamma' : min-max
+ 'theta' : min-max
'none' : no symmetrization
- gamma: float (default: None)
- Min-max symmetrization constant or matrix. Only used if kernel_symm='gamma'.
- K = `gamma * min(K, K.T) + (1 - gamma) * max(K, K.T)`
+ theta: float (default: None)
+ Min-max symmetrization constant or matrix. Only used if kernel_symm='theta'.
+ K = `theta * min(K, K.T) + (1 - theta) * max(K, K.T)`
precomputed : {'distance', 'affinity', 'adjacency', `None`}, optional (default: `None`)
If the graph is precomputed, this variable denotes which graph
@@ -88,12 +95,12 @@ def Graph(data,
Only one of `precomputed` and `n_pca` can be set.
beta: float, optional(default: 1)
- Multiply within - batch connections by(1 - beta)
+ Multiply between - batch connections by beta
sample_idx: array-like
Batch index for MNN kernel
- adaptive_k : `{'min', 'mean', 'sqrt', 'none'}` (default: 'sqrt')
+ adaptive_k : `{'min', 'mean', 'sqrt', 'none'}` (default: None)
Weights MNN kernel adaptively using the number of cells in
each sample according to the selected method.
@@ -221,3 +228,31 @@ def Graph(data,
for key, value in params.items()
if key != "data"])))
return Graph(**params)
+
+
+def from_igraph(G, **kwargs):
+ """Convert an igraph.Graph to a graphtools.Graph
+
+ Creates a graphtools.graphs.TraditionalGraph with a
+ precomputed adjacency matrix
+
+ Parameters
+ ----------
+ G : igraph.Graph
+ Graph to be converted
+ kwargs
+ keyword arguments for graphtools.Graph
+
+ Returns
+ -------
+ G : graphtools.graphs.TraditionalGraph
+ """
+ if 'precomputed' in kwargs:
+ if kwargs['precomputed'] != 'adjacency':
+ warnings.warn(
+ "Cannot build graph from igraph with precomputed={}. "
+ "Use 'adjacency' instead.".format(kwargs['precomputed']),
+ UserWarning)
+ del kwargs['precomputed']
+ return Graph(sparse.coo_matrix(G.get_adjacency().data),
+ precomputed='adjacency', **kwargs)
diff --git a/graphtools/base.py b/graphtools/base.py
index edd678a..5f1a2ab 100644
--- a/graphtools/base.py
+++ b/graphtools/base.py
@@ -311,12 +311,12 @@ class BaseGraph(with_metaclass(abc.ABCMeta, Base)):
Defines method of MNN symmetrization.
'+' : additive
'*' : multiplicative
- 'gamma' : min-max
+ 'theta' : min-max
'none' : no symmetrization
- gamma: float (default: 0.5)
+ theta: float (default: 0.5)
Min-max symmetrization constant.
- K = `gamma * min(K, K.T) + (1 - gamma) * max(K, K.T)`
+ K = `theta * min(K, K.T) + (1 - theta) * max(K, K.T)`
initialize : `bool`, optional (default : `True`)
if false, don't create the kernel matrix.
@@ -337,11 +337,20 @@ class BaseGraph(with_metaclass(abc.ABCMeta, Base)):
"""
def __init__(self, kernel_symm='+',
+ theta=None,
gamma=None,
initialize=True, **kwargs):
+ if gamma is not None:
+ warnings.warn("gamma is deprecated. "
+ "Setting theta={}".format(gamma), FutureWarning)
+ theta = gamma
+ if kernel_symm == 'gamma':
+ warnings.warn("kernel_symm='gamma' is deprecated. "
+ "Setting kernel_symm='theta'", FutureWarning)
+ kernel_symm = 'theta'
self.kernel_symm = kernel_symm
- self.gamma = gamma
- self._check_symmetrization(kernel_symm, gamma)
+ self.theta = theta
+ self._check_symmetrization(kernel_symm, theta)
if initialize:
tasklogger.log_debug("Initializing kernel...")
@@ -350,25 +359,25 @@ def __init__(self, kernel_symm='+',
tasklogger.log_debug("Not initializing kernel.")
super().__init__(**kwargs)
- def _check_symmetrization(self, kernel_symm, gamma):
- if kernel_symm not in ['+', '*', 'gamma', None]:
+ def _check_symmetrization(self, kernel_symm, theta):
+ if kernel_symm not in ['+', '*', 'theta', None]:
raise ValueError(
"kernel_symm '{}' not recognized. Choose from "
- "'+', '*', 'gamma', or 'none'.".format(kernel_symm))
- elif kernel_symm != 'gamma' and gamma is not None:
- warnings.warn("kernel_symm='{}' but gamma is not None. "
- "Setting kernel_symm='gamma'.".format(kernel_symm))
- self.kernel_symm = kernel_symm = 'gamma'
-
- if kernel_symm == 'gamma':
- if gamma is None:
- warnings.warn("kernel_symm='gamma' but gamma not given. "
- "Defaulting to gamma=0.5.")
- self.gamma = gamma = 0.5
- elif not isinstance(gamma, numbers.Number) or \
- gamma < 0 or gamma > 1:
- raise ValueError("gamma {} not recognized. Expected "
- "a float between 0 and 1".format(gamma))
+ "'+', '*', 'theta', or 'none'.".format(kernel_symm))
+ elif kernel_symm != 'theta' and theta is not None:
+ warnings.warn("kernel_symm='{}' but theta is not None. "
+ "Setting kernel_symm='theta'.".format(kernel_symm))
+ self.kernel_symm = kernel_symm = 'theta'
+
+ if kernel_symm == 'theta':
+ if theta is None:
+ warnings.warn("kernel_symm='theta' but theta not given. "
+ "Defaulting to theta=0.5.")
+ self.theta = theta = 0.5
+ elif not isinstance(theta, numbers.Number) or \
+ theta < 0 or theta > 1:
+ raise ValueError("theta {} not recognized. Expected "
+ "a float between 0 and 1".format(theta))
def _build_kernel(self):
"""Private method to build kernel matrix
@@ -400,26 +409,26 @@ def symmetrize_kernel(self, K):
elif self.kernel_symm == "*":
tasklogger.log_debug("Using multiplication symmetrization.")
K = K.multiply(K.T)
- elif self.kernel_symm == 'gamma':
+ elif self.kernel_symm == 'theta':
tasklogger.log_debug(
- "Using gamma symmetrization (gamma = {}).".format(self.gamma))
- K = self.gamma * elementwise_minimum(K, K.T) + \
- (1 - self.gamma) * elementwise_maximum(K, K.T)
+ "Using theta symmetrization (theta = {}).".format(self.theta))
+ K = self.theta * elementwise_minimum(K, K.T) + \
+ (1 - self.theta) * elementwise_maximum(K, K.T)
elif self.kernel_symm is None:
tasklogger.log_debug("Using no symmetrization.")
pass
else:
# this should never happen
raise ValueError(
- "Expected kernel_symm in ['+', '*', 'gamma' or None]. "
- "Got {}".format(self.gamma))
+ "Expected kernel_symm in ['+', '*', 'theta' or None]. "
+ "Got {}".format(self.theta))
return K
def get_params(self):
"""Get parameters from this object
"""
return {'kernel_symm': self.kernel_symm,
- 'gamma': self.gamma}
+ 'theta': self.theta}
def set_params(self, **params):
"""Set parameters on this object
@@ -429,7 +438,7 @@ def set_params(self, **params):
Valid parameters:
Invalid parameters: (these would require modifying the kernel matrix)
- kernel_symm
- - gamma
+ - theta
Parameters
----------
@@ -439,8 +448,8 @@ def set_params(self, **params):
-------
self
"""
- if 'gamma' in params and params['gamma'] != self.gamma:
- raise ValueError("Cannot update gamma. Please create a new graph")
+ if 'theta' in params and params['theta'] != self.theta:
+ raise ValueError("Cannot update theta. Please create a new graph")
if 'kernel_symm' in params and \
params['kernel_symm'] != self.kernel_symm:
raise ValueError(
@@ -535,6 +544,42 @@ def build_kernel(self):
"""
raise NotImplementedError
+ def to_pygsp(self, **kwargs):
+ """Convert to a PyGSP graph
+
+ For use only when the user means to create the graph using
+ the flag `use_pygsp=True`, and doesn't wish to recompute the kernel.
+ Creates a graphtools.graphs.TraditionalGraph with a precomputed
+ affinity matrix which also inherits from pygsp.graphs.Graph.
+
+ Parameters
+ ----------
+ kwargs
+ keyword arguments for graphtools.Graph
+
+ Returns
+ -------
+ G : graphtools.base.PyGSPGraph, graphtools.graphs.TraditionalGraph
+ """
+ from . import api
+ if 'precomputed' in kwargs:
+ if kwargs['precomputed'] != 'affinity':
+ warnings.warn(
+ "Cannot build PyGSPGraph with precomputed={}. "
+ "Using 'affinity' instead.".format(kwargs['precomputed']),
+ UserWarning)
+ del kwargs['precomputed']
+ if 'use_pygsp' in kwargs:
+ if kwargs['use_pygsp'] is not True:
+ warnings.warn(
+ "Cannot build PyGSPGraph with use_pygsp={}. "
+ "Use True instead.".format(kwargs['use_pygsp']),
+ UserWarning)
+ del kwargs['use_pygsp']
+ return api.Graph(self.K,
+ precomputed="affinity", use_pygsp=True,
+ **kwargs)
+
class PyGSPGraph(with_metaclass(abc.ABCMeta, pygsp.graphs.Graph, Base)):
"""Interface between BaseGraph and PyGSP.
diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index ce8f9ae..a0810da 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -1,3 +1,4 @@
+from __future__ import division
from builtins import super
import numpy as np
from sklearn.neighbors import NearestNeighbors
@@ -35,6 +36,12 @@ class kNNGraph(DataGraph):
decay : `int` or `None`, optional (default: `None`)
Rate of alpha decay to use. If `None`, alpha decay is not used.
+ bandwidth : `float`, list-like or `None`, optional (default: `None`)
+ Fixed bandwidth to use. If given, overrides `knn`. Can be a single
+ bandwidth or a list-like (shape=[n_samples]) of bandwidths for each
+ sample.
+ TODO: implement `callable` bandwidth
+
distance : `str`, optional (default: `'euclidean'`)
Any metric from `scipy.spatial.distance` can be used
distance metric for building kNN graph.
@@ -55,25 +62,27 @@ class kNNGraph(DataGraph):
"""
def __init__(self, data, knn=5, decay=None,
- distance='euclidean',
+ bandwidth=None, distance='euclidean',
thresh=1e-4, n_pca=None, **kwargs):
- self.knn = knn
- self.decay = decay
- self.distance = distance
- self.thresh = thresh
if decay is not None and thresh <= 0:
raise ValueError("Cannot instantiate a kNNGraph with `decay=None` "
"and `thresh=0`. Use a TraditionalGraph instead.")
if knn > data.shape[0]:
warnings.warn("Cannot set knn ({k}) to be greater than "
- "data.shape[0] ({n}). Setting knn={n}".format(
+ "n_samples ({n}). Setting knn={n}".format(
k=knn, n=data.shape[0]))
+ knn = data.shape[0]
if n_pca is None and data.shape[1] > 500:
warnings.warn("Building a kNNGraph on data of shape {} is "
"expensive. Consider setting n_pca.".format(
data.shape), UserWarning)
+ self.knn = knn
+ self.decay = decay
+ self.bandwidth = bandwidth
+ self.distance = distance
+ self.thresh = thresh
super().__init__(data, n_pca=n_pca, **kwargs)
def get_params(self):
@@ -82,6 +91,7 @@ def get_params(self):
params = super().get_params()
params.update({'knn': self.knn,
'decay': self.decay,
+ 'bandwidth': self.bandwidth,
'distance': self.distance,
'thresh': self.thresh,
'n_jobs': self.n_jobs,
@@ -101,6 +111,7 @@ def set_params(self, **params):
Invalid parameters: (these would require modifying the kernel matrix)
- knn
- decay
+ - bandwidth
- distance
- thresh
@@ -116,6 +127,9 @@ def set_params(self, **params):
raise ValueError("Cannot update knn. Please create a new graph")
if 'decay' in params and params['decay'] != self.decay:
raise ValueError("Cannot update decay. Please create a new graph")
+ if 'bandwidth' in params and params['bandwidth'] != self.bandwidth:
+ raise ValueError(
+ "Cannot update bandwidth. Please create a new graph")
if 'distance' in params and params['distance'] != self.distance:
raise ValueError("Cannot update distance. "
"Please create a new graph")
@@ -184,7 +198,7 @@ def build_kernel(self):
K = self.build_kernel_to_data(self.data_nu)
return K
- def build_kernel_to_data(self, Y, knn=None):
+ def build_kernel_to_data(self, Y, knn=None, bandwidth=None):
"""Build a kernel from new input data `Y` to the `self.data`
Parameters
@@ -198,6 +212,9 @@ def build_kernel_to_data(self, Y, knn=None):
knn : `int` or `None`, optional (default: `None`)
If `None`, defaults to `self.knn`
+ bandwidth : `int` or `None`, optional (default: `None`)
+ If `None`, defaults to `self.bandwidth`
+
Returns
-------
@@ -212,9 +229,11 @@ def build_kernel_to_data(self, Y, knn=None):
"""
if knn is None:
knn = self.knn
+ if bandwidth is None:
+ bandwidth = self.bandwidth
if knn > self.data.shape[0]:
warnings.warn("Cannot set knn ({k}) to be greater than "
- "data.shape[0] ({n}). Setting knn={n}".format(
+ "n_samples ({n}). Setting knn={n}".format(
k=knn, n=self.data.shape[0]))
Y = self._check_extension_shape(Y)
@@ -247,7 +266,8 @@ def build_kernel_to_data(self, Y, knn=None):
RuntimeWarning)
tasklogger.log_complete("KNN search")
tasklogger.log_start("affinities")
- bandwidth = distances[:, knn - 1]
+ if bandwidth is None:
+ bandwidth = distances[:, knn - 1]
radius = bandwidth * np.power(-1 * np.log(self.thresh),
1 / self.decay)
update_idx = np.argwhere(
@@ -266,8 +286,9 @@ def build_kernel_to_data(self, Y, knn=None):
for i, idx in enumerate(update_idx):
distances[idx] = dist_new[i]
indices[idx] = ind_new[i]
- update_idx = [i for i, d in enumerate(distances)
- if np.max(d) < radius[i]]
+ update_idx = [i for i, d in enumerate(distances) if np.max(d) <
+ (radius if isinstance(bandwidth, numbers.Number)
+ else radius[i])]
tasklogger.log_debug("search_knn = {}; {} remaining".format(
search_knn,
len(update_idx)))
@@ -281,12 +302,18 @@ def build_kernel_to_data(self, Y, knn=None):
# give up - radius search
dist_new, ind_new = knn_tree.radius_neighbors(
Y[update_idx, :],
- radius=np.max(radius[update_idx]))
+ radius=radius
+ if isinstance(bandwidth, numbers.Number)
+ else np.max(radius[update_idx]))
for i, idx in enumerate(update_idx):
distances[idx] = dist_new[i]
indices[idx] = ind_new[i]
- data = np.concatenate([distances[i] / bandwidth[i]
- for i in range(len(distances))])
+ if isinstance(bandwidth, numbers.Number):
+ data = np.concatenate(distances) / bandwidth
+ else:
+ data = np.concatenate([distances[i] / bandwidth[i]
+ for i in range(len(distances))])
+
indices = np.concatenate(indices)
indptr = np.concatenate(
[[0], np.cumsum([len(d) for d in distances])])
@@ -335,8 +362,14 @@ class LandmarkGraph(DataGraph):
transitions : array-like, shape=[n_samples, n_landmark]
Transition probabilities between samples and landmarks.
- _clusters : array-like, shape=[n_samples]
+ clusters : array-like, shape=[n_samples]
Private attribute. Cluster assignments for each sample.
+
+ Examples
+ --------
+ >>> G = graphtools.Graph(data, n_landmark=1000)
+ >>> X_landmark = transform(G.landmark_op)
+ >>> X_full = G.interpolate(X_landmark)
"""
def __init__(self, data, n_landmark=2000, n_svd=100, **kwargs):
@@ -431,6 +464,23 @@ def landmark_op(self):
self.build_landmark_op()
return self._landmark_op
+ @property
+ def clusters(self):
+ """Cluster assignments for each sample.
+
+ Compute or return the cluster assignments
+
+ Returns
+ -------
+ clusters : list-like, shape=[n_samples]
+ Cluster assignments for each sample.
+ """
+ try:
+ return self._clusters
+ except AttributeError:
+ self.build_landmark_op()
+ return self._clusters
+
@property
def transitions(self):
"""Transition matrix from samples to landmarks
@@ -450,13 +500,13 @@ def transitions(self):
return self._transitions
def _landmarks_to_data(self):
- landmarks = np.unique(self._clusters)
+ landmarks = np.unique(self.clusters)
if sparse.issparse(self.kernel):
pmn = sparse.vstack(
- [sparse.csr_matrix(self.kernel[self._clusters == i, :].sum(
+ [sparse.csr_matrix(self.kernel[self.clusters == i, :].sum(
axis=0)) for i in landmarks])
else:
- pmn = np.array([np.sum(self.kernel[self._clusters == i, :], axis=0)
+ pmn = np.array([np.sum(self.kernel[self.clusters == i, :], axis=0)
for i in landmarks])
return pmn
@@ -532,12 +582,12 @@ def extend_to_data(self, data, **kwargs):
kernel = self.build_kernel_to_data(data, **kwargs)
if sparse.issparse(kernel):
pnm = sparse.hstack(
- [sparse.csr_matrix(kernel[:, self._clusters == i].sum(
- axis=1)) for i in np.unique(self._clusters)])
+ [sparse.csr_matrix(kernel[:, self.clusters == i].sum(
+ axis=1)) for i in np.unique(self.clusters)])
else:
pnm = np.array([np.sum(
- kernel[:, self._clusters == i],
- axis=1).T for i in np.unique(self._clusters)]).transpose()
+ kernel[:, self.clusters == i],
+ axis=1).T for i in np.unique(self.clusters)]).transpose()
pnm = normalize(pnm, norm='l1', axis=1)
return pnm
@@ -590,6 +640,12 @@ class TraditionalGraph(DataGraph):
decay : `int` or `None`, optional (default: `None`)
Rate of alpha decay to use. If `None`, alpha decay is not used.
+ bandwidth : `float`, list-like or `None`, optional (default: `None`)
+ Fixed bandwidth to use. If given, overrides `knn`. Can be a single
+ bandwidth or a list-like (shape=[n_samples]) of bandwidths for each
+ sample.
+ TODO: implement `callable` bandwidth
+
distance : `str`, optional (default: `'euclidean'`)
Any metric from `scipy.spatial.distance` can be used
distance metric for building kNN graph.
@@ -613,19 +669,27 @@ class TraditionalGraph(DataGraph):
Only one of `precomputed` and `n_pca` can be set.
"""
- def __init__(self, data, knn=5, decay=10,
- distance='euclidean', n_pca=None,
+ def __init__(self, data,
+ knn=5, decay=10,
+ bandwidth=None,
+ distance='euclidean',
+ n_pca=None,
thresh=1e-4,
precomputed=None, **kwargs):
+ if decay is None and precomputed not in ['affinity', 'adjacency']:
+ # decay high enough is basically a binary kernel
+ raise ValueError("`decay` must be provided for a TraditionalGraph"
+ ". For kNN kernel, use kNNGraph.")
if precomputed is not None and n_pca is not None:
# the data itself is a matrix of distances / affinities
n_pca = None
warnings.warn("n_pca cannot be given on a precomputed graph."
" Setting n_pca=None", RuntimeWarning)
- if decay is None and precomputed not in ['affinity', 'adjacency']:
- # decay high enough is basically a binary kernel
- raise ValueError("`decay` must be provided for a TraditionalGraph"
- ". For kNN kernel, use kNNGraph.")
+ if knn > data.shape[0]:
+ warnings.warn("Cannot set knn ({k}) to be greater than or equal to"
+ " n_samples ({n}). Setting knn={n}".format(
+ k=knn, n=data.shape[0] - 1))
+ knn = data.shape[0] - 1
if precomputed is not None:
if precomputed not in ["distance", "affinity", "adjacency"]:
raise ValueError("Precomputed value {} not recognized. "
@@ -640,6 +704,7 @@ def __init__(self, data, knn=5, decay=10,
"non-negative".format(precomputed))
self.knn = knn
self.decay = decay
+ self.bandwidth = bandwidth
self.distance = distance
self.thresh = thresh
self.precomputed = precomputed
@@ -653,6 +718,7 @@ def get_params(self):
params = super().get_params()
params.update({'knn': self.knn,
'decay': self.decay,
+ 'bandwidth': self.bandwidth,
'distance': self.distance,
'precomputed': self.precomputed})
return params
@@ -667,6 +733,7 @@ def set_params(self, **params):
- distance
- knn
- decay
+ - bandwidth
Parameters
----------
@@ -690,6 +757,10 @@ def set_params(self, **params):
if 'decay' in params and params['decay'] != self.decay and \
self.precomputed is None:
raise ValueError("Cannot update decay. Please create a new graph")
+ if 'bandwidth' in params and params['bandwidth'] != self.bandwidth and \
+ self.precomputed is None:
+ raise ValueError(
+ "Cannot update bandwidth. Please create a new graph")
# update superclass parameters
super().set_params(**params)
return self
@@ -752,9 +823,12 @@ def build_kernel(self):
"precomputed='{}' not recognized. "
"Choose from ['affinity', 'adjacency', 'distance', "
"None]".format(self.precomputed))
- knn_dist = np.partition(pdx, self.knn, axis=1)[:, :self.knn]
- epsilon = np.max(knn_dist, axis=1)
- pdx = (pdx.T / epsilon).T
+ if self.bandwidth is None:
+ knn_dist = np.partition(pdx, self.knn, axis=1)[:, :self.knn]
+ bandwidth = np.max(knn_dist, axis=1)
+ else:
+ bandwidth = self.bandwidth
+ pdx = (pdx.T / bandwidth).T
K = np.exp(-1 * np.power(pdx, self.decay))
# handle nan
K = np.where(np.isnan(K), 1, K)
@@ -773,7 +847,7 @@ def build_kernel(self):
K[K < self.thresh] = 0
return K
- def build_kernel_to_data(self, Y, knn=None):
+ def build_kernel_to_data(self, Y, knn=None, bandwidth=None):
"""Build transition matrix from new data to the graph
Creates a transition matrix such that `Y` can be approximated by
@@ -805,15 +879,18 @@ def build_kernel_to_data(self, Y, knn=None):
"""
if knn is None:
knn = self.knn
+ if bandwidth is None:
+ bandwidth = self.bandwidth
if self.precomputed is not None:
raise ValueError("Cannot extend kernel on precomputed graph")
else:
tasklogger.log_start("affinities")
Y = self._check_extension_shape(Y)
pdx = cdist(Y, self.data_nu, metric=self.distance)
- knn_dist = np.partition(pdx, knn, axis=1)[:, :knn]
- epsilon = np.max(knn_dist, axis=1)
- pdx = (pdx.T / epsilon).T
+ if bandwidth is None:
+ knn_dist = np.partition(pdx, knn, axis=1)[:, :knn]
+ bandwidth = np.max(knn_dist, axis=1)
+ pdx = (pdx.T / bandwidth).T
K = np.exp(-1 * pdx**self.decay)
# handle nan
K = np.where(np.isnan(K), 1, K)
@@ -841,9 +918,9 @@ class MNNGraph(DataGraph):
Batch index
beta : `float`, optional (default: 1)
- Downweight within-batch affinities by beta
+ Downweight between-batch affinities by beta
- adaptive_k : {'min', 'mean', 'sqrt', `None`} (default: 'sqrt')
+ adaptive_k : {'min', 'mean', 'sqrt', `None`} (default: None)
Weights MNN kernel adaptively using the number of cells in
each sample according to the selected method.
@@ -855,8 +932,9 @@ class MNNGraph(DataGraph):
def __init__(self, data, sample_idx,
knn=5, beta=1, n_pca=None,
- adaptive_k='sqrt',
+ adaptive_k=None,
decay=None,
+ bandwidth=None,
distance='euclidean',
thresh=1e-4,
n_jobs=1,
@@ -869,6 +947,7 @@ def __init__(self, data, sample_idx,
self.knn = knn
self.decay = decay
self.distance = distance
+ self.bandwidth = bandwidth
self.thresh = thresh
self.n_jobs = n_jobs
self.weighted_knn = self._weight_knn()
@@ -886,33 +965,33 @@ def __init__(self, data, sample_idx,
super().__init__(data, n_pca=n_pca, **kwargs)
- def _check_symmetrization(self, kernel_symm, gamma):
- if kernel_symm == 'gamma' and gamma is not None and \
- not isinstance(gamma, numbers.Number):
- # matrix gamma
+ def _check_symmetrization(self, kernel_symm, theta):
+ if kernel_symm == 'theta' and theta is not None and \
+ not isinstance(theta, numbers.Number):
+ # matrix theta
try:
- gamma.shape
+ theta.shape
except AttributeError:
- raise ValueError("gamma {} not recognized. "
+ raise ValueError("theta {} not recognized. "
"Expected a float between 0 and 1 "
"or a [n_batch,n_batch] matrix of "
- "floats between 0 and 1".format(gamma))
- if not np.shape(gamma) == (len(self.samples),
+ "floats between 0 and 1".format(theta))
+ if not np.shape(theta) == (len(self.samples),
len(self.samples)):
raise ValueError(
- "Matrix gamma must be of shape "
+ "Matrix theta must be of shape "
"({}), got ({})".format(
(len(self.samples),
- len(self.samples)), gamma.shape))
- elif np.max(gamma) > 1 or np.min(gamma) < 0:
+ len(self.samples)), theta.shape))
+ elif np.max(theta) > 1 or np.min(theta) < 0:
raise ValueError(
- "Values in matrix gamma must be between"
+ "Values in matrix theta must be between"
" 0 and 1, got values between {} and {}".format(
- np.max(gamma), np.min(gamma)))
- elif np.any(gamma != gamma.T):
- raise ValueError("gamma must be a symmetric matrix")
+ np.max(theta), np.min(theta)))
+ elif np.any(theta != theta.T):
+ raise ValueError("theta must be a symmetric matrix")
else:
- super()._check_symmetrization(kernel_symm, gamma)
+ super()._check_symmetrization(kernel_symm, theta)
def _weight_knn(self, sample_size=None):
"""Select adaptive values of knn
@@ -957,6 +1036,7 @@ def get_params(self):
'adaptive_k': self.adaptive_k,
'knn': self.knn,
'decay': self.decay,
+ 'bandwidth': self.bandwidth,
'distance': self.distance,
'thresh': self.thresh,
'n_jobs': self.n_jobs})
@@ -995,7 +1075,7 @@ def set_params(self, **params):
"Cannot update adaptive_k. Please create a new graph")
# knn arguments
- knn_kernel_args = ['knn', 'decay', 'distance', 'thresh']
+ knn_kernel_args = ['knn', 'decay', 'distance', 'thresh', 'bandwidth']
knn_other_args = ['n_jobs', 'random_state', 'verbose']
for arg in knn_kernel_args:
if arg in params and params[arg] != getattr(self, arg):
@@ -1037,12 +1117,13 @@ def build_kernel(self):
graph = Graph(data, n_pca=None,
knn=self.weighted_knn[i],
decay=self.decay,
+ bandwidth=self.bandwidth,
distance=self.distance,
thresh=self.thresh,
verbose=self.verbose,
random_state=self.random_state,
n_jobs=self.n_jobs,
- initialize=False)
+ initialize=True)
self.subgraphs.append(graph) # append to list of subgraphs
tasklogger.log_complete("subgraphs")
@@ -1052,16 +1133,25 @@ def build_kernel(self):
else:
K = np.zeros([self.data_nu.shape[0], self.data_nu.shape[0]])
for i, X in enumerate(self.subgraphs):
+ K = set_submatrix(K, self.sample_idx == self.samples[i],
+ self.sample_idx == self.samples[i], X.K)
+ within_batch_norm = np.array(np.sum(X.K, 1)).flatten()
for j, Y in enumerate(self.subgraphs):
+ if i == j:
+ continue
tasklogger.log_start(
"kernel from sample {} to {}".format(self.samples[i],
self.samples[j]))
Kij = Y.build_kernel_to_data(
X.data_nu,
knn=self.weighted_knn[i])
- if i == j:
- # downweight within-batch affinities by beta
- Kij = Kij * self.beta
+ between_batch_norm = np.array(np.sum(Kij, 1)).flatten()
+ scale = np.minimum(1, within_batch_norm /
+ between_batch_norm) * self.beta
+ if sparse.issparse(Kij):
+ Kij = Kij.multiply(scale[:, None])
+ else:
+ Kij = Kij * scale[:, None]
K = set_submatrix(K, self.sample_idx == self.samples[i],
self.sample_idx == self.samples[j], Kij)
tasklogger.log_complete(
@@ -1070,14 +1160,14 @@ def build_kernel(self):
return K
def symmetrize_kernel(self, K):
- if self.kernel_symm == 'gamma' and self.gamma is not None and \
- not isinstance(self.gamma, numbers.Number):
- # matrix gamma
- # Gamma can be a matrix with specific values transitions for
+ if self.kernel_symm == 'theta' and self.theta is not None and \
+ not isinstance(self.theta, numbers.Number):
+ # matrix theta
+ # Theta can be a matrix with specific values transitions for
# each batch. This allows for technical replicates and
# experimental samples to be corrected simultaneously
- tasklogger.log_debug("Using gamma symmetrization. "
- "Gamma:\n{}".format(self.gamma))
+ tasklogger.log_debug("Using theta symmetrization. "
+ "Theta:\n{}".format(self.theta))
for i, sample_i in enumerate(self.samples):
for j, sample_j in enumerate(self.samples):
if j < i:
@@ -1086,9 +1176,9 @@ def symmetrize_kernel(self, K):
self.sample_idx == sample_j)]
Kji = K[np.ix_(self.sample_idx == sample_j,
self.sample_idx == sample_i)]
- Kij_symm = self.gamma[i, j] * \
+ Kij_symm = self.theta[i, j] * \
elementwise_minimum(Kij, Kji.T) + \
- (1 - self.gamma[i, j]) * \
+ (1 - self.theta[i, j]) * \
elementwise_maximum(Kij, Kji.T)
K = set_submatrix(K, self.sample_idx == sample_i,
self.sample_idx == sample_j, Kij_symm)
@@ -1100,7 +1190,7 @@ def symmetrize_kernel(self, K):
K = super().symmetrize_kernel(K)
return K
- def build_kernel_to_data(self, Y, gamma=None):
+ def build_kernel_to_data(self, Y, theta=None):
"""Build transition matrix from new data to the graph
Creates a transition matrix such that `Y` can be approximated by
@@ -1120,8 +1210,8 @@ def build_kernel_to_data(self, Y, gamma=None):
to the existing data. `n_features` must match
either the ambient or PCA dimensions
- gamma : array-like or `None`, optional (default: `None`)
- if `self.gamma` is a matrix, gamma values must be explicitly
+ theta : array-like or `None`, optional (default: `None`)
+ if `self.theta` is a matrix, theta values must be explicitly
specified between `Y` and each sample in `self.data`
Returns
@@ -1131,15 +1221,15 @@ def build_kernel_to_data(self, Y, gamma=None):
Transition matrix from `Y` to `self.data`
"""
raise NotImplementedError
- tasklogger.log_warning("building MNN kernel to gamma is experimental")
- if not isinstance(self.gamma, str) and \
- not isinstance(self.gamma, numbers.Number):
- if gamma is None:
+ tasklogger.log_warning("building MNN kernel to theta is experimental")
+ if not isinstance(self.theta, str) and \
+ not isinstance(self.theta, numbers.Number):
+ if theta is None:
raise ValueError(
- "self.gamma is a matrix but gamma is not provided.")
- elif len(gamma) != len(self.samples):
+ "self.theta is a matrix but theta is not provided.")
+ elif len(theta) != len(self.samples):
raise ValueError(
- "gamma should have one value for every sample")
+ "theta should have one value for every sample")
Y = self._check_extension_shape(Y)
kernel_xy = []
@@ -1156,26 +1246,26 @@ def build_kernel_to_data(self, Y, gamma=None):
kernel_yx = sparse.vstack(kernel_yx) # n_cells_x x n_cells_y
# symmetrize
- if gamma is not None:
+ if theta is not None:
# Gamma can be a vector with specific values transitions for
# each batch. This allows for technical replicates and
# experimental samples to be corrected simultaneously
K = np.empty_like(kernel_xy)
for i, sample in enumerate(self.samples):
sample_idx = self.sample_idx == sample
- K[:, sample_idx] = gamma[i] * \
+ K[:, sample_idx] = theta[i] * \
kernel_xy[:, sample_idx].minimum(
kernel_yx[sample_idx, :].T) + \
- (1 - gamma[i]) * \
+ (1 - theta[i]) * \
kernel_xy[:, sample_idx].maximum(
kernel_yx[sample_idx, :].T)
- if self.gamma == "+":
+ if self.theta == "+":
K = (kernel_xy + kernel_yx.T) / 2
- elif self.gamma == "*":
+ elif self.theta == "*":
K = kernel_xy.multiply(kernel_yx.T)
else:
- K = self.gamma * kernel_xy.minimum(kernel_yx.T) + \
- (1 - self.gamma) * kernel_xy.maximum(kernel_yx.T)
+ K = self.theta * kernel_xy.minimum(kernel_yx.T) + \
+ (1 - self.theta) * kernel_xy.maximum(kernel_yx.T)
return K
diff --git a/graphtools/version.py b/graphtools/version.py
index 569b121..d3ec452 100644
--- a/graphtools/version.py
+++ b/graphtools/version.py
@@ -1 +1 @@
-__version__ = "0.1.10"
+__version__ = "0.2.0"
diff --git a/requirements.txt b/requirements.txt
index c19f67d..08e1515 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,4 @@ scipy>=1.1.0
pygsp>=>=0.5.1
scikit-learn>=0.19.1
future
-tasklogger>=0.2
+tasklogger>=0.4.0
diff --git a/setup.py b/setup.py
index fca28af..f67b380 100644
--- a/setup.py
+++ b/setup.py
@@ -8,14 +8,15 @@
'pygsp>=0.5.1',
'scikit-learn>=0.19.1',
'future',
- 'tasklogger>=0.2',
+ 'tasklogger>=0.4.0',
]
test_requires = [
'nose2',
'pandas',
'coverage',
- 'coveralls'
+ 'coveralls',
+ 'python-igraph'
]
if sys.version_info[0] == 3:
diff --git a/test/load_tests/__init__.py b/test/load_tests/__init__.py
index 3f62c9d..f018957 100644
--- a/test/load_tests/__init__.py
+++ b/test/load_tests/__init__.py
@@ -16,6 +16,7 @@ def reset_warnings():
warnings.resetwarnings()
warnings.simplefilter("error")
ignore_numpy_warning()
+ ignore_igraph_warning()
def ignore_numpy_warning():
@@ -25,6 +26,14 @@ def ignore_numpy_warning():
"matrices or deal with linear algebra ")
+def ignore_igraph_warning():
+ warnings.filterwarnings(
+ "ignore", category=DeprecationWarning,
+ message="The SafeConfigParser class has been renamed to ConfigParser "
+ "in Python 3.2. This alias will be removed in future versions. Use "
+ "ConfigParser directly instead")
+
+
reset_warnings()
global digits
@@ -38,7 +47,7 @@ def generate_swiss_roll(n_samples=1000, noise=0.5, seed=42):
t = 1.5 * np.pi * (1 + 2 * generator.rand(1, n_samples))
x = t * np.cos(t)
y = t * np.sin(t)
- sample_idx = np.random.choice([0, 1], n_samples, replace=True)
+ sample_idx = generator.choice([0, 1], n_samples, replace=True)
z = sample_idx
t = np.squeeze(t)
X = np.concatenate((x, y))
diff --git a/test/test_api.py b/test/test_api.py
index c099086..49d2126 100644
--- a/test/test_api.py
+++ b/test/test_api.py
@@ -1,10 +1,60 @@
+from __future__ import print_function
from load_tests import (
nose2,
data,
build_graph,
raises,
+ warns,
)
+import warnings
+import igraph
+import numpy as np
+import graphtools
+
+
+def test_from_igraph():
+ n = 100
+ m = 500
+ K = np.zeros((n, n))
+ for _ in range(m):
+ e = np.random.choice(n, 2, replace=False)
+ K[e[0], e[1]] = K[e[1], e[0]] = 1
+ g = igraph.Graph.Adjacency(K.tolist())
+ G = graphtools.from_igraph(g)
+ G2 = graphtools.Graph(K, precomputed='adjacency')
+ assert np.all(G.K == G2.K)
+
+
+@warns(UserWarning)
+def test_from_igraph_invalid_precomputed():
+ n = 100
+ m = 500
+ K = np.zeros((n, n))
+ for _ in range(m):
+ e = np.random.choice(n, 2, replace=False)
+ K[e[0], e[1]] = K[e[1], e[0]] = 1
+ g = igraph.Graph.Adjacency(K.tolist())
+ G = graphtools.from_igraph(g, precomputed='affinity')
+
+
+def test_to_pygsp():
+ G = build_graph(data)
+ G2 = G.to_pygsp()
+ assert isinstance(G2, graphtools.graphs.PyGSPGraph)
+ assert np.all(G2.K == G.K)
+
+
+@warns(UserWarning)
+def test_to_pygsp_invalid_precomputed():
+ G = build_graph(data)
+ G2 = G.to_pygsp(precomputed='adjacency')
+
+
+@warns(UserWarning)
+def test_to_pygsp_invalid_use_pygsp():
+ G = build_graph(data)
+ G2 = G.to_pygsp(use_pygsp=False)
#####################################################
# Check parameters
diff --git a/test/test_data.py b/test/test_data.py
index 39d7966..dfa0889 100644
--- a/test/test_data.py
+++ b/test/test_data.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
from load_tests import (
np,
sp,
diff --git a/test/test_exact.py b/test/test_exact.py
index 542e625..80d84f0 100644
--- a/test/test_exact.py
+++ b/test/test_exact.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
from load_tests import (
graphtools,
np,
@@ -68,6 +69,13 @@ def test_precomputed_negative():
n_pca=None)
+@raises(ValueError)
+def test_precomputed_invalid():
+ build_graph(np.random.uniform(0, 1, [200, 200]),
+ precomputed='invalid',
+ n_pca=None)
+
+
@warns(RuntimeWarning)
def test_duplicate_data():
build_graph(np.vstack([data, data[:10]]),
@@ -76,6 +84,15 @@ def test_duplicate_data():
thresh=0)
+@warns(UserWarning)
+def test_k_too_large():
+ build_graph(data,
+ n_pca=20,
+ decay=10,
+ knn=len(data) + 1,
+ thresh=0)
+
+
#####################################################
# Check kernel
#####################################################
@@ -289,6 +306,48 @@ def test_truncated_exact_graph_no_pca():
assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
+def test_exact_graph_fixed_bandwidth():
+ decay = 5
+ bandwidth = 2
+ n_pca = 20
+ pca = PCA(n_pca, svd_solver='randomized', random_state=42).fit(data)
+ data_nu = pca.transform(data)
+ pdx = squareform(pdist(data_nu, metric='euclidean'))
+ K = np.exp(-1 * (pdx / bandwidth)**decay)
+ K = K + K.T
+ W = np.divide(K, 2)
+ np.fill_diagonal(W, 0)
+ G = pygsp.graphs.Graph(W)
+ G2 = build_graph(data, n_pca=n_pca,
+ graphtype='exact',
+ decay=decay, bandwidth=bandwidth,
+ random_state=42,
+ thresh=0,
+ use_pygsp=True)
+ assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
+ assert(G.N == G2.N)
+ assert(np.all(G.d == G2.d))
+ assert((G2.W != G.W).sum() == 0)
+ assert((G.W != G2.W).nnz == 0)
+ bandwidth = np.random.gamma(5, 0.5, len(data))
+ K = np.exp(-1 * (pdx.T / bandwidth).T**decay)
+ K = K + K.T
+ W = np.divide(K, 2)
+ np.fill_diagonal(W, 0)
+ G = pygsp.graphs.Graph(W)
+ G2 = build_graph(data, n_pca=n_pca,
+ graphtype='exact',
+ decay=decay, bandwidth=bandwidth,
+ random_state=42,
+ thresh=0,
+ use_pygsp=True)
+ assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
+ assert(G.N == G2.N)
+ assert(np.all(G.d == G2.d))
+ assert((G2.W != G.W).sum() == 0)
+ assert((G.W != G2.W).nnz == 0)
+
+
#####################################################
# Check interpolation
#####################################################
@@ -348,15 +407,17 @@ def test_set_params():
assert G.get_params() == {'n_pca': 20,
'random_state': 42,
'kernel_symm': '+',
- 'gamma': None,
+ 'theta': None,
'knn': 3,
'decay': 10,
+ 'bandwidth': None,
'distance': 'euclidean',
'precomputed': None}
assert_raises(ValueError, G.set_params, knn=15)
assert_raises(ValueError, G.set_params, decay=15)
assert_raises(ValueError, G.set_params, distance='manhattan')
assert_raises(ValueError, G.set_params, precomputed='distance')
+ assert_raises(ValueError, G.set_params, bandwidth=5)
G.set_params(knn=G.knn,
decay=G.decay,
distance=G.distance,
diff --git a/test/test_knn.py b/test/test_knn.py
index b8682c2..7d15b0d 100644
--- a/test/test_knn.py
+++ b/test/test_knn.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
from load_tests import (
graphtools,
np,
@@ -45,6 +46,24 @@ def test_duplicate_data():
thresh=1e-4)
+@warns(UserWarning)
+def test_balltree_cosine():
+ build_graph(data,
+ n_pca=20,
+ decay=10,
+ distance='cosine',
+ thresh=1e-4)
+
+
+@warns(UserWarning)
+def test_k_too_large():
+ build_graph(data,
+ n_pca=20,
+ decay=10,
+ knn=len(data) + 1,
+ thresh=1e-4)
+
+
#####################################################
# Check kernel
#####################################################
@@ -124,6 +143,52 @@ def test_sparse_alpha_knn_graph():
assert(isinstance(G2, graphtools.graphs.kNNGraph))
+def test_knn_graph_fixed_bandwidth():
+ k = 3
+ decay = 5
+ bandwidth = 10
+ n_pca = 20
+ thresh = 1e-4
+ pca = PCA(n_pca, svd_solver='randomized', random_state=42).fit(data)
+ data_nu = pca.transform(data)
+ pdx = squareform(pdist(data_nu, metric='euclidean'))
+ K = np.exp(-1 * np.power(pdx / bandwidth, decay))
+ K[K < thresh] = 0
+ K = K + K.T
+ W = np.divide(K, 2)
+ np.fill_diagonal(W, 0)
+ G = pygsp.graphs.Graph(W)
+ G2 = build_graph(data, n_pca=n_pca,
+ decay=decay, bandwidth=bandwidth,
+ knn=k, random_state=42,
+ thresh=thresh,
+ use_pygsp=True)
+ assert(isinstance(G2, graphtools.graphs.kNNGraph))
+ np.testing.assert_array_equal(G.N, G2.N)
+ np.testing.assert_array_equal(G.d, G2.d)
+ np.testing.assert_allclose(
+ (G.W - G2.W).data,
+ np.zeros_like((G.W - G2.W).data), atol=1e-14)
+ bandwidth = np.random.gamma(20, 0.5, len(data))
+ K = np.exp(-1 * (pdx.T / bandwidth).T**decay)
+ K[K < thresh] = 0
+ K = K + K.T
+ W = np.divide(K, 2)
+ np.fill_diagonal(W, 0)
+ G = pygsp.graphs.Graph(W)
+ G2 = build_graph(data, n_pca=n_pca,
+ decay=decay, bandwidth=bandwidth,
+ knn=k, random_state=42,
+ thresh=thresh,
+ use_pygsp=True)
+ assert(isinstance(G2, graphtools.graphs.kNNGraph))
+ np.testing.assert_array_equal(G.N, G2.N)
+ np.testing.assert_allclose(G.dw, G2.dw, atol=1e-14)
+ np.testing.assert_allclose(
+ (G.W - G2.W).data,
+ np.zeros_like((G.W - G2.W).data), atol=1e-14)
+
+
@warns(UserWarning)
def test_knn_graph_sparse_no_pca():
build_graph(sp.coo_matrix(data), n_pca=None, # n_pca,
@@ -184,9 +249,10 @@ def test_set_params():
'n_pca': 20,
'random_state': 42,
'kernel_symm': '+',
- 'gamma': None,
+ 'theta': None,
'knn': 3,
'decay': None,
+ 'bandwidth': None,
'distance': 'euclidean',
'thresh': 0,
'n_jobs': -1,
@@ -204,11 +270,12 @@ def test_set_params():
assert_raises(ValueError, G.set_params, decay=10)
assert_raises(ValueError, G.set_params, distance='manhattan')
assert_raises(ValueError, G.set_params, thresh=1e-3)
- assert_raises(ValueError, G.set_params, gamma=0.99)
+ assert_raises(ValueError, G.set_params, theta=0.99)
assert_raises(ValueError, G.set_params, kernel_symm='*')
+ assert_raises(ValueError, G.set_params, bandwidth=5)
G.set_params(knn=G.knn,
decay=G.decay,
thresh=G.thresh,
distance=G.distance,
- gamma=G.gamma,
+ theta=G.theta,
kernel_symm=G.kernel_symm)
diff --git a/test/test_landmark.py b/test/test_landmark.py
index 42d9025..da0fbbc 100644
--- a/test/test_landmark.py
+++ b/test/test_landmark.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
from load_tests import (
graphtools,
np,
@@ -42,6 +43,15 @@ def test_landmark_exact_graph():
assert(G.landmark_op.shape == (n_landmark, n_landmark))
assert(isinstance(G, graphtools.graphs.TraditionalGraph))
assert(isinstance(G, graphtools.graphs.LandmarkGraph))
+ assert(G.transitions.shape == (data.shape[0], n_landmark))
+ assert(G.clusters.shape == (data.shape[0],))
+ assert(len(np.unique(G.clusters)) <= n_landmark)
+ signal = np.random.normal(0, 1, [n_landmark, 10])
+ interpolated_signal = G.interpolate(signal)
+ assert interpolated_signal.shape == (data.shape[0], signal.shape[1])
+ G._reset_landmarks()
+ # no error on double delete
+ G._reset_landmarks()
def test_landmark_knn_graph():
@@ -49,6 +59,7 @@ def test_landmark_knn_graph():
# knn graph
G = build_graph(data, n_landmark=n_landmark, n_pca=20,
decay=None, knn=5, random_state=42)
+ assert(G.transitions.shape == (data.shape[0], n_landmark))
assert(G.landmark_op.shape == (n_landmark, n_landmark))
assert(isinstance(G, graphtools.graphs.kNNGraph))
assert(isinstance(G, graphtools.graphs.LandmarkGraph))
@@ -62,6 +73,7 @@ def test_landmark_mnn_graph():
thresh=1e-5, n_pca=None,
decay=10, knn=5, random_state=42,
sample_idx=sample_idx)
+ assert(G.clusters.shape == (X.shape[0],))
assert(G.landmark_op.shape == (n_landmark, n_landmark))
assert(isinstance(G, graphtools.graphs.MNNGraph))
assert(isinstance(G, graphtools.graphs.LandmarkGraph))
@@ -132,18 +144,19 @@ def test_verbose():
def test_set_params():
G = build_graph(data, n_landmark=500, decay=None)
G.landmark_op
- assert G.get_params() == {'n_pca': 20,
- 'random_state': 42,
- 'kernel_symm': '+',
- 'gamma': None,
- 'n_landmark': 500,
- 'knn': 3,
- 'decay': None,
- 'distance':
- 'euclidean',
- 'thresh': 0,
- 'n_jobs': -1,
- 'verbose': 0}
+ assert G.get_params() == {
+ 'n_pca': 20,
+ 'random_state': 42,
+ 'kernel_symm': '+',
+ 'theta': None,
+ 'n_landmark': 500,
+ 'knn': 3,
+ 'decay': None,
+ 'bandwidth': None,
+ 'distance': 'euclidean',
+ 'thresh': 0,
+ 'n_jobs': -1,
+ 'verbose': 0}
G.set_params(n_landmark=300)
assert G.landmark_op.shape == (300, 300)
G.set_params(n_landmark=G.n_landmark, n_svd=G.n_svd)
diff --git a/test/test_mnn.py b/test/test_mnn.py
index 330a4e4..be78437 100644
--- a/test/test_mnn.py
+++ b/test/test_mnn.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
from load_tests import (
graphtools,
np,
@@ -10,8 +11,10 @@
generate_swiss_roll,
assert_raises,
raises,
+ warns,
cdist,
)
+from scipy.linalg import norm
#####################################################
@@ -49,38 +52,97 @@ def test_build_mnn_with_precomputed():
@raises(ValueError)
-def test_mnn_with_square_gamma_wrong_length():
+def test_mnn_with_square_theta_wrong_length():
n_sample = len(np.unique(digits['target']))
- # square matrix gamma of the wrong size
+ # square matrix theta of the wrong size
build_graph(
data, thresh=0, n_pca=20,
decay=10, knn=5, random_state=42,
sample_idx=digits['target'],
- kernel_symm='gamma',
- gamma=np.tile(np.linspace(0, 1, n_sample - 1),
+ kernel_symm='theta',
+ theta=np.tile(np.linspace(0, 1, n_sample - 1),
n_sample).reshape(n_sample - 1, n_sample))
@raises(ValueError)
-def test_mnn_with_vector_gamma():
+def test_mnn_with_vector_theta():
n_sample = len(np.unique(digits['target']))
- # vector gamma
+ # vector theta
+ build_graph(
+ data, thresh=0, n_pca=20,
+ decay=10, knn=5, random_state=42,
+ sample_idx=digits['target'],
+ kernel_symm='theta',
+ theta=np.linspace(0, 1, n_sample - 1))
+
+
+@raises(ValueError)
+def test_mnn_with_unbounded_theta():
+ build_graph(
+ data, thresh=0, n_pca=20,
+ decay=10, knn=5, random_state=42,
+ sample_idx=digits['target'],
+ kernel_symm='theta',
+ theta=2)
+
+
+@raises(ValueError)
+def test_mnn_with_string_theta():
+ build_graph(
+ data, thresh=0, n_pca=20,
+ decay=10, knn=5, random_state=42,
+ sample_idx=digits['target'],
+ kernel_symm='theta',
+ theta='invalid')
+
+
+@warns(FutureWarning)
+def test_mnn_with_gamma():
+ build_graph(
+ data, thresh=0, n_pca=20,
+ decay=10, knn=5, random_state=42,
+ sample_idx=digits['target'],
+ kernel_symm='theta',
+ gamma=0.9)
+
+
+@warns(FutureWarning)
+def test_mnn_with_kernel_symm_gamma():
build_graph(
data, thresh=0, n_pca=20,
decay=10, knn=5, random_state=42,
sample_idx=digits['target'],
kernel_symm='gamma',
- gamma=np.linspace(0, 1, n_sample - 1))
+ theta=0.9)
+
+
+@warns(UserWarning)
+def test_mnn_with_theta_and_kernel_symm_not_theta():
+ build_graph(
+ data, thresh=0, n_pca=20,
+ decay=10, knn=5, random_state=42,
+ sample_idx=digits['target'],
+ kernel_symm='+',
+ theta=0.9)
+
+
+@warns(UserWarning)
+def test_mnn_with_kernel_symmm_theta_and_no_theta():
+ build_graph(
+ data, thresh=0, n_pca=20,
+ decay=10, knn=5, random_state=42,
+ sample_idx=digits['target'],
+ kernel_symm='theta')
def test_mnn_with_non_zero_indexed_sample_idx():
X, sample_idx = generate_swiss_roll()
G = build_graph(X, sample_idx=sample_idx,
- kernel_symm='gamma', gamma=0.5,
+ kernel_symm='theta', theta=0.5,
n_pca=None, use_pygsp=True)
sample_idx += 1
G2 = build_graph(X, sample_idx=sample_idx,
- kernel_symm='gamma', gamma=0.5,
+ kernel_symm='theta', theta=0.5,
n_pca=None, use_pygsp=True)
assert G.N == G2.N
assert np.all(G.d == G2.d)
@@ -92,11 +154,11 @@ def test_mnn_with_non_zero_indexed_sample_idx():
def test_mnn_with_string_sample_idx():
X, sample_idx = generate_swiss_roll()
G = build_graph(X, sample_idx=sample_idx,
- kernel_symm='gamma', gamma=0.5,
+ kernel_symm='theta', theta=0.5,
n_pca=None, use_pygsp=True)
sample_idx = np.where(sample_idx == 0, 'a', 'b')
G2 = build_graph(X, sample_idx=sample_idx,
- kernel_symm='gamma', gamma=0.5,
+ kernel_symm='theta', theta=0.5,
n_pca=None, use_pygsp=True)
assert G.N == G2.N
assert np.all(G.d == G2.d)
@@ -110,13 +172,13 @@ def test_mnn_with_string_sample_idx():
#####################################################
-def test_mnn_graph_float_gamma():
+def test_mnn_graph_float_theta():
X, sample_idx = generate_swiss_roll()
- gamma = 0.9
+ theta = 0.9
k = 10
a = 20
metric = 'euclidean'
- beta = 0
+ beta = 0.5
samples = np.unique(sample_idx)
K = np.zeros((len(X), len(X)))
@@ -133,31 +195,46 @@ def test_mnn_graph_float_gamma():
pdxe_ij = pdx_ij / e_ij[:, np.newaxis] # normalize
k_ij = np.exp(-1 * (pdxe_ij ** a)) # apply alpha-decaying kernel
if si == sj:
- K.iloc[sample_idx == si, sample_idx == sj] = k_ij * \
- (1 - beta) # fill out values in K for NN on diagonal
+ K.iloc[sample_idx == si, sample_idx == sj] = (
+ k_ij + k_ij.T) / 2
else:
# fill out values in K for NN on diagonal
K.iloc[sample_idx == si, sample_idx == sj] = k_ij
-
- W = np.array((gamma * np.minimum(K, K.T)) +
- ((1 - gamma) * np.maximum(K, K.T)))
+ Kn = K.copy()
+ for i in samples:
+ curr_K = K.iloc[sample_idx == i, sample_idx == i]
+ i_norm = norm(curr_K, 1, axis=1)
+ for j in samples:
+ if i == j:
+ continue
+ else:
+ curr_K = K.iloc[sample_idx == i, sample_idx == j]
+ curr_norm = norm(curr_K, 1, axis=1)
+ scale = np.minimum(
+ np.ones(len(curr_norm)), i_norm / curr_norm) * beta
+ Kn.iloc[sample_idx == i, sample_idx == j] = (
+ curr_K.T * scale).T
+
+ K = Kn
+ W = np.array((theta * np.minimum(K, K.T)) +
+ ((1 - theta) * np.maximum(K, K.T)))
np.fill_diagonal(W, 0)
G = pygsp.graphs.Graph(W)
- G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=1 - beta,
- kernel_symm='gamma', gamma=gamma,
+ G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=beta,
+ kernel_symm='theta', theta=theta,
distance=metric, sample_idx=sample_idx, thresh=0,
use_pygsp=True)
assert G.N == G2.N
- assert np.all(G.d == G2.d)
+ np.testing.assert_array_equal(G.dw, G2.dw)
assert (G.W != G2.W).nnz == 0
assert (G2.W != G.W).sum() == 0
assert isinstance(G2, graphtools.graphs.MNNGraph)
-def test_mnn_graph_matrix_gamma():
+def test_mnn_graph_matrix_theta():
X, sample_idx = generate_swiss_roll()
bs = 0.8
- gamma = np.array([[1, bs], # 0
+ theta = np.array([[1, bs], # 0
[bs, 1]]) # 3
k = 10
a = 20
@@ -179,30 +256,46 @@ def test_mnn_graph_matrix_gamma():
pdxe_ij = pdx_ij / e_ij[:, np.newaxis] # normalize
k_ij = np.exp(-1 * (pdxe_ij ** a)) # apply alpha-decaying kernel
if si == sj:
- K.iloc[sample_idx == si, sample_idx == sj] = k_ij * \
- (1 - beta) # fill out values in K for NN on diagonal
+ K.iloc[sample_idx == si, sample_idx == sj] = (
+ k_ij + k_ij.T) / 2
else:
# fill out values in K for NN on diagonal
K.iloc[sample_idx == si, sample_idx == sj] = k_ij
+ Kn = K.copy()
+ for i in samples:
+ curr_K = K.iloc[sample_idx == i, sample_idx == i]
+ i_norm = norm(curr_K, 1, axis=1)
+ for j in samples:
+ if i == j:
+ continue
+ else:
+ curr_K = K.iloc[sample_idx == i, sample_idx == j]
+ curr_norm = norm(curr_K, 1, axis=1)
+ scale = np.minimum(
+ np.ones(len(curr_norm)), i_norm / curr_norm) * beta
+ Kn.iloc[sample_idx == i, sample_idx == j] = (
+ curr_K.T * scale).T
+
+ K = Kn
K = np.array(K)
- matrix_gamma = pd.DataFrame(np.zeros((len(sample_idx), len(sample_idx))))
+ matrix_theta = pd.DataFrame(np.zeros((len(sample_idx), len(sample_idx))))
for ix, si in enumerate(set(sample_idx)):
for jx, sj in enumerate(set(sample_idx)):
- matrix_gamma.iloc[sample_idx == si,
- sample_idx == sj] = gamma[ix, jx]
+ matrix_theta.iloc[sample_idx == si,
+ sample_idx == sj] = theta[ix, jx]
- W = np.array((matrix_gamma * np.minimum(K, K.T)) +
- ((1 - matrix_gamma) * np.maximum(K, K.T)))
+ W = np.array((matrix_theta * np.minimum(K, K.T)) +
+ ((1 - matrix_theta) * np.maximum(K, K.T)))
np.fill_diagonal(W, 0)
G = pygsp.graphs.Graph(W)
- G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=1 - beta,
- kernel_symm='gamma', gamma=gamma,
+ G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=beta,
+ kernel_symm='theta', theta=theta,
distance=metric, sample_idx=sample_idx, thresh=0,
use_pygsp=True)
assert G.N == G2.N
- assert np.all(G.d == G2.d)
+ np.testing.assert_array_equal(G.dw, G2.dw)
assert (G.W != G2.W).nnz == 0
assert (G2.W != G.W).sum() == 0
assert isinstance(G2, graphtools.graphs.MNNGraph)
@@ -220,25 +313,26 @@ def test_verbose():
print()
print("Verbose test: MNN")
build_graph(X, sample_idx=sample_idx,
- kernel_symm='gamma', gamma=0.5,
+ kernel_symm='theta', theta=0.5,
n_pca=None, verbose=True)
def test_set_params():
X, sample_idx = generate_swiss_roll()
G = build_graph(X, sample_idx=sample_idx,
- kernel_symm='gamma', gamma=0.5,
+ kernel_symm='theta', theta=0.5,
n_pca=None,
thresh=1e-4)
assert G.get_params() == {
'n_pca': None,
'random_state': 42,
- 'kernel_symm': 'gamma',
- 'gamma': 0.5,
+ 'kernel_symm': 'theta',
+ 'theta': 0.5,
'beta': 1,
- 'adaptive_k': 'sqrt',
+ 'adaptive_k': None,
'knn': 3,
'decay': 10,
+ 'bandwidth': None,
'distance': 'euclidean',
'thresh': 1e-4,
'n_jobs': 1
diff --git a/unittest.cfg b/unittest.cfg
index 0f1a4ec..85c81ba 100644
--- a/unittest.cfg
+++ b/unittest.cfg
@@ -3,4 +3,4 @@ verbose = True
[coverage]
always-on = True
-coverage = graphtools
\ No newline at end of file
+coverage = graphtools