diff --git a/README.rst b/README.rst index 7e3dd0b..2859790 100644 --- a/README.rst +++ b/README.rst @@ -5,8 +5,8 @@ graphtools .. image:: https://img.shields.io/pypi/v/graphtools.svg :target: https://pypi.org/project/graphtools/ :alt: Latest PyPi version -.. image:: https://anaconda.org/conda-forge/tasklogger/badges/version.svg - :target: https://anaconda.org/conda-forge/tasklogger/ +.. image:: https://anaconda.org/conda-forge/graphtools/badges/version.svg + :target: https://anaconda.org/conda-forge/graphtools/ :alt: Latest Conda version .. image:: https://api.travis-ci.com/KrishnaswamyLab/graphtools.svg?branch=master :target: https://travis-ci.com/KrishnaswamyLab/graphtools @@ -39,11 +39,7 @@ Alternatively, graphtools can be installed using `Conda Or, to install the latest version from github:: - pip install --user git+git://github.com/KrishnaswamyLab/graphtools.git - -Alternatively, graphtools can be installed using [Conda](https://conda.io/docs/) (most easily obtained via the [Miniconda Python distribution](https://conda.io/miniconda.html)): - - conda install -c conda-forge graphtools + pip install --user git+git://github.com/KrishnaswamyLab/graphtools.git Usage example ------------- diff --git a/doc/source/index.rst b/doc/source/index.rst index 16a6ab7..0cddedf 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -6,6 +6,10 @@ graphtools Latest PyPi version +.. raw:: html + + Latest Conda version + .. raw:: html Travis CI Build diff --git a/graphtools/api.py b/graphtools/api.py index 9e5d31b..16c2e8b 100644 --- a/graphtools/api.py +++ b/graphtools/api.py @@ -15,6 +15,7 @@ def Graph(data, knn=5, decay=10, bandwidth=None, + anisotropy=0, distance='euclidean', thresh=1e-4, kernel_symm='+', @@ -68,6 +69,10 @@ def Graph(data, bandwidth or a list-like (shape=[n_samples]) of bandwidths for each sample. + anisotropy : float, optional (default: 0) + Level of anisotropy between 0 and 1 + (alpha in Coifman & Lafon, 2006) + distance : `str`, optional (default: `'euclidean'`) Any metric from `scipy.spatial.distance` can be used distance metric for building kNN graph. @@ -230,7 +235,7 @@ def Graph(data, return Graph(**params) -def from_igraph(G, **kwargs): +def from_igraph(G, attribute="weight", **kwargs): """Convert an igraph.Graph to a graphtools.Graph Creates a graphtools.graphs.TraditionalGraph with a @@ -240,6 +245,9 @@ def from_igraph(G, **kwargs): ---------- G : igraph.Graph Graph to be converted + attribute : str, optional (default: "weight") + attribute containing edge weights, if any. + If None, unweighted graph is built kwargs keyword arguments for graphtools.Graph @@ -254,5 +262,13 @@ def from_igraph(G, **kwargs): "Use 'adjacency' instead.".format(kwargs['precomputed']), UserWarning) del kwargs['precomputed'] - return Graph(sparse.coo_matrix(G.get_adjacency().data), + try: + K = G.get_adjacency(attribute=attribute).data + except ValueError as e: + if str(e) == "Attribute does not exist": + warnings.warn("Edge attribute {} not found. " + "Returning unweighted graph".format(attribute), + UserWarning) + K = G.get_adjacency(attribute=None).data + return Graph(sparse.coo_matrix(K), precomputed='adjacency', **kwargs) diff --git a/graphtools/base.py b/graphtools/base.py index 5f1a2ab..c084a55 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -23,9 +23,7 @@ # anndata not installed pass -from .utils import (elementwise_minimum, - elementwise_maximum, - set_diagonal) +from . import utils class Base(object): @@ -318,6 +316,10 @@ class BaseGraph(with_metaclass(abc.ABCMeta, Base)): Min-max symmetrization constant. K = `theta * min(K, K.T) + (1 - theta) * max(K, K.T)` + anisotropy : float, optional (default: 0) + Level of anisotropy between 0 and 1 + (alpha in Coifman & Lafon, 2006) + initialize : `bool`, optional (default : `True`) if false, don't create the kernel matrix. @@ -336,8 +338,10 @@ class BaseGraph(with_metaclass(abc.ABCMeta, Base)): diff_op : synonym for `P` """ - def __init__(self, kernel_symm='+', + def __init__(self, + kernel_symm='+', theta=None, + anisotropy=0, gamma=None, initialize=True, **kwargs): if gamma is not None: @@ -351,6 +355,10 @@ def __init__(self, kernel_symm='+', self.kernel_symm = kernel_symm self.theta = theta self._check_symmetrization(kernel_symm, theta) + if not (isinstance(anisotropy, numbers.Real) and 0 <= anisotropy <= 1): + raise ValueError("Expected 0 <= anisotropy <= 1. " + "Got {}".format(anisotropy)) + self.anisotropy = anisotropy if initialize: tasklogger.log_debug("Initializing kernel...") @@ -395,6 +403,7 @@ def _build_kernel(self): """ kernel = self.build_kernel() kernel = self.symmetrize_kernel(kernel) + kernel = self.apply_anisotropy(kernel) if (kernel - kernel.T).max() > 1e-5: warnings.warn("K should be symmetric", RuntimeWarning) if np.any(kernel.diagonal == 0): @@ -412,8 +421,8 @@ def symmetrize_kernel(self, K): elif self.kernel_symm == 'theta': tasklogger.log_debug( "Using theta symmetrization (theta = {}).".format(self.theta)) - K = self.theta * elementwise_minimum(K, K.T) + \ - (1 - self.theta) * elementwise_maximum(K, K.T) + K = self.theta * utils.elementwise_minimum(K, K.T) + \ + (1 - self.theta) * utils.elementwise_maximum(K, K.T) elif self.kernel_symm is None: tasklogger.log_debug("Using no symmetrization.") pass @@ -424,11 +433,27 @@ def symmetrize_kernel(self, K): "Got {}".format(self.theta)) return K + def apply_anisotropy(self, K): + if self.anisotropy == 0: + # do nothing + return K + else: + if sparse.issparse(K): + d = np.array(K.sum(1)).flatten() + K = K.tocoo() + K.data = K.data / ((d[K.row] * d[K.col]) ** self.anisotropy) + K = K.tocsr() + else: + d = K.sum(1) + K = K / (np.outer(d, d) ** self.anisotropy) + return K + def get_params(self): """Get parameters from this object """ return {'kernel_symm': self.kernel_symm, - 'theta': self.theta} + 'theta': self.theta, + 'anisotropy': self.anisotropy} def set_params(self, **params): """Set parameters on this object @@ -450,6 +475,9 @@ def set_params(self, **params): """ if 'theta' in params and params['theta'] != self.theta: raise ValueError("Cannot update theta. Please create a new graph") + if 'anisotropy' in params and params['anisotropy'] != self.anisotropy: + raise ValueError( + "Cannot update anisotropy. Please create a new graph") if 'kernel_symm' in params and \ params['kernel_symm'] != self.kernel_symm: raise ValueError( @@ -580,6 +608,30 @@ def to_pygsp(self, **kwargs): precomputed="affinity", use_pygsp=True, **kwargs) + def to_igraph(self, attribute="weight", **kwargs): + """Convert to an igraph Graph + + Uses the igraph.Graph.Weighted_Adjacency constructor + + Parameters + ---------- + attribute : str, optional (default: "weight") + kwargs : additional arguments for igraph.Graph.Weighted_Adjacency + """ + try: + import igraph as ig + except ImportError: + raise ImportError("Please install igraph with " + "`pip install --user python-igraph`.") + try: + W = self.W + except AttributeError: + # not a pygsp graph + W = self.K.copy() + W = utils.set_diagonal(W, 0) + return ig.Graph.Weighted_Adjacency(utils.to_dense(W).tolist(), + attr=attribute, **kwargs) + class PyGSPGraph(with_metaclass(abc.ABCMeta, pygsp.graphs.Graph, Base)): """Interface between BaseGraph and PyGSP. @@ -634,7 +686,7 @@ def _build_weight_from_kernel(self, kernel): weight = kernel.copy() self._diagonal = weight.diagonal().copy() - weight = set_diagonal(weight, 0) + weight = utils.set_diagonal(weight, 0) return weight diff --git a/graphtools/graphs.py b/graphtools/graphs.py index a0810da..e67ef5d 100644 --- a/graphtools/graphs.py +++ b/graphtools/graphs.py @@ -44,7 +44,8 @@ class kNNGraph(DataGraph): distance : `str`, optional (default: `'euclidean'`) Any metric from `scipy.spatial.distance` can be used - distance metric for building kNN graph. + distance metric for building kNN graph. Custom distance + functions of form `f(x, y) = d` are also accepted. TODO: actually sklearn.neighbors has even more choices thresh : `float`, optional (default: `1e-4`) diff --git a/graphtools/utils.py b/graphtools/utils.py index c7fbabd..e2d9290 100644 --- a/graphtools/utils.py +++ b/graphtools/utils.py @@ -44,3 +44,9 @@ def set_diagonal(X, diag): def set_submatrix(X, i, j, values): X[np.ix_(i, j)] = values return X + + +def to_dense(X): + if sparse.issparse(X): + X = X.toarray() + return X diff --git a/graphtools/version.py b/graphtools/version.py index d3ec452..3ced358 100644 --- a/graphtools/version.py +++ b/graphtools/version.py @@ -1 +1 @@ -__version__ = "0.2.0" +__version__ = "0.2.1" diff --git a/requirements.txt b/requirements.txt index 08e1515..f584815 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ numpy>=1.14.0 scipy>=1.1.0 pygsp>=>=0.5.1 -scikit-learn>=0.19.1 +scikit-learn>=0.20.0 future tasklogger>=0.4.0 diff --git a/setup.py b/setup.py index f67b380..f995b52 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ 'numpy>=1.14.0', 'scipy>=1.1.0', 'pygsp>=0.5.1', - 'scikit-learn>=0.19.1', + 'scikit-learn>=0.20.0', 'future', 'tasklogger>=0.4.0', ] diff --git a/test/load_tests/__init__.py b/test/load_tests/__init__.py index f018957..ce12fee 100644 --- a/test/load_tests/__init__.py +++ b/test/load_tests/__init__.py @@ -17,6 +17,7 @@ def reset_warnings(): warnings.simplefilter("error") ignore_numpy_warning() ignore_igraph_warning() + ignore_joblib_warning() def ignore_numpy_warning(): @@ -34,6 +35,13 @@ def ignore_igraph_warning(): "ConfigParser directly instead") +def ignore_joblib_warning(): + warnings.filterwarnings( + "ignore", category=DeprecationWarning, + message="check_pickle is deprecated in joblib 0.12 and will be removed" + " in 0.13") + + reset_warnings() global digits diff --git a/test/test_api.py b/test/test_api.py index 49d2126..e8134af 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -1,12 +1,10 @@ from __future__ import print_function from load_tests import ( - nose2, data, build_graph, raises, warns, ) -import warnings import igraph import numpy as np @@ -21,6 +19,19 @@ def test_from_igraph(): e = np.random.choice(n, 2, replace=False) K[e[0], e[1]] = K[e[1], e[0]] = 1 g = igraph.Graph.Adjacency(K.tolist()) + G = graphtools.from_igraph(g, attribute=None) + G2 = graphtools.Graph(K, precomputed='adjacency') + assert np.all(G.K == G2.K) + + +def test_from_igraph_weighted(): + n = 100 + m = 500 + K = np.zeros((n, n)) + for _ in range(m): + e = np.random.choice(n, 2, replace=False) + K[e[0], e[1]] = K[e[1], e[0]] = np.random.uniform(0, 1) + g = igraph.Graph.Weighted_Adjacency(K.tolist()) G = graphtools.from_igraph(g) G2 = graphtools.Graph(K, precomputed='adjacency') assert np.all(G.K == G2.K) @@ -35,7 +46,19 @@ def test_from_igraph_invalid_precomputed(): e = np.random.choice(n, 2, replace=False) K[e[0], e[1]] = K[e[1], e[0]] = 1 g = igraph.Graph.Adjacency(K.tolist()) - G = graphtools.from_igraph(g, precomputed='affinity') + G = graphtools.from_igraph(g, attribute=None, precomputed='affinity') + + +@warns(UserWarning) +def test_from_igraph_invalid_attribute(): + n = 100 + m = 500 + K = np.zeros((n, n)) + for _ in range(m): + e = np.random.choice(n, 2, replace=False) + K[e[0], e[1]] = K[e[1], e[0]] = 1 + g = igraph.Graph.Adjacency(K.tolist()) + G = graphtools.from_igraph(g, attribute="invalid") def test_to_pygsp(): @@ -45,6 +68,14 @@ def test_to_pygsp(): assert np.all(G2.K == G.K) +def test_to_igraph(): + G = build_graph(data, use_pygsp=True) + G2 = G.to_igraph() + assert isinstance(G2, igraph.Graph) + assert np.all(np.array(G2.get_adjacency( + attribute="weight").data) == G.W) + + @warns(UserWarning) def test_to_pygsp_invalid_precomputed(): G = build_graph(data) diff --git a/test/test_exact.py b/test/test_exact.py index 80d84f0..1a0ceb3 100644 --- a/test/test_exact.py +++ b/test/test_exact.py @@ -348,6 +348,51 @@ def test_exact_graph_fixed_bandwidth(): assert((G.W != G2.W).nnz == 0) +##################################################### +# Check anisotropy +##################################################### + +def test_exact_graph_anisotropy(): + k = 3 + a = 13 + n_pca = 20 + anisotropy = 0.9 + data_small = data[np.random.choice( + len(data), len(data) // 2, replace=False)] + pca = PCA(n_pca, svd_solver='randomized', random_state=42).fit(data_small) + data_small_nu = pca.transform(data_small) + pdx = squareform(pdist(data_small_nu, metric='euclidean')) + knn_dist = np.partition(pdx, k, axis=1)[:, :k] + epsilon = np.max(knn_dist, axis=1) + weighted_pdx = (pdx.T / epsilon).T + K = np.exp(-1 * weighted_pdx**a) + K = K + K.T + K = np.divide(K, 2) + d = K.sum(1) + W = K / (np.outer(d, d) ** anisotropy) + np.fill_diagonal(W, 0) + G = pygsp.graphs.Graph(W) + G2 = build_graph(data_small, thresh=0, n_pca=n_pca, + decay=a, knn=k, random_state=42, + use_pygsp=True, anisotropy=anisotropy) + assert(isinstance(G2, graphtools.graphs.TraditionalGraph)) + assert(G.N == G2.N) + assert(np.all(G.d == G2.d)) + assert((G2.W != G.W).sum() == 0) + assert((G.W != G2.W).nnz == 0) + assert_raises(ValueError, build_graph, + data_small, thresh=0, n_pca=n_pca, + decay=a, knn=k, random_state=42, + use_pygsp=True, anisotropy=-1) + assert_raises(ValueError, build_graph, + data_small, thresh=0, n_pca=n_pca, + decay=a, knn=k, random_state=42, + use_pygsp=True, anisotropy=2) + assert_raises(ValueError, build_graph, + data_small, thresh=0, n_pca=n_pca, + decay=a, knn=k, random_state=42, + use_pygsp=True, anisotropy='invalid') + ##################################################### # Check interpolation ##################################################### @@ -409,6 +454,7 @@ def test_set_params(): 'kernel_symm': '+', 'theta': None, 'knn': 3, + 'anisotropy': 0, 'decay': 10, 'bandwidth': None, 'distance': 'euclidean', diff --git a/test/test_knn.py b/test/test_knn.py index 7d15b0d..f552bec 100644 --- a/test/test_knn.py +++ b/test/test_knn.py @@ -1,4 +1,4 @@ -from __future__ import print_function +from __future__ import print_function, division from load_tests import ( graphtools, np, @@ -196,6 +196,42 @@ def test_knn_graph_sparse_no_pca(): random_state=42, use_pygsp=True) +##################################################### +# Check anisotropy +##################################################### + +def test_knn_graph_anisotropy(): + k = 3 + a = 13 + n_pca = 20 + anisotropy = 0.9 + thresh = 1e-4 + data_small = data[np.random.choice( + len(data), len(data) // 2, replace=False)] + pca = PCA(n_pca, svd_solver='randomized', random_state=42).fit(data_small) + data_small_nu = pca.transform(data_small) + pdx = squareform(pdist(data_small_nu, metric='euclidean')) + knn_dist = np.partition(pdx, k, axis=1)[:, :k] + epsilon = np.max(knn_dist, axis=1) + weighted_pdx = (pdx.T / epsilon).T + K = np.exp(-1 * weighted_pdx**a) + K[K < thresh] = 0 + K = K + K.T + K = np.divide(K, 2) + d = K.sum(1) + W = K / (np.outer(d, d) ** anisotropy) + np.fill_diagonal(W, 0) + G = pygsp.graphs.Graph(W) + G2 = build_graph(data_small, n_pca=n_pca, + thresh=thresh, + decay=a, knn=k, random_state=42, + use_pygsp=True, anisotropy=anisotropy) + assert(isinstance(G2, graphtools.graphs.kNNGraph)) + assert(G.N == G2.N) + assert(np.all(G.d == G2.d)) + np.testing.assert_allclose((G2.W - G.W).data, 0, atol=1e-14, rtol=1e-14) + + ##################################################### # Check interpolation ##################################################### @@ -250,6 +286,7 @@ def test_set_params(): 'random_state': 42, 'kernel_symm': '+', 'theta': None, + 'anisotropy': 0, 'knn': 3, 'decay': None, 'bandwidth': None, @@ -272,10 +309,12 @@ def test_set_params(): assert_raises(ValueError, G.set_params, thresh=1e-3) assert_raises(ValueError, G.set_params, theta=0.99) assert_raises(ValueError, G.set_params, kernel_symm='*') + assert_raises(ValueError, G.set_params, anisotropy=0.7) assert_raises(ValueError, G.set_params, bandwidth=5) G.set_params(knn=G.knn, decay=G.decay, thresh=G.thresh, distance=G.distance, theta=G.theta, + anisotropy=G.anisotropy, kernel_symm=G.kernel_symm) diff --git a/test/test_landmark.py b/test/test_landmark.py index da0fbbc..7333207 100644 --- a/test/test_landmark.py +++ b/test/test_landmark.py @@ -150,6 +150,7 @@ def test_set_params(): 'kernel_symm': '+', 'theta': None, 'n_landmark': 500, + 'anisotropy': 0, 'knn': 3, 'decay': None, 'bandwidth': None, diff --git a/test/test_mnn.py b/test/test_mnn.py index be78437..217913d 100644 --- a/test/test_mnn.py +++ b/test/test_mnn.py @@ -328,6 +328,7 @@ def test_set_params(): 'random_state': 42, 'kernel_symm': 'theta', 'theta': 0.5, + 'anisotropy': 0, 'beta': 1, 'adaptive_k': None, 'knn': 3,