diff --git a/graphtools/base.py b/graphtools/base.py index 4fabcb0..4789b8a 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -15,19 +15,7 @@ import sys import tasklogger -try: - import pandas as pd -except ImportError: - # pandas not installed - pass - -try: - import anndata -except (ImportError, SyntaxError): - # anndata not installed - pass - -from . import utils +from . import matrix, utils _logger = tasklogger.get_tasklogger("graphtools") @@ -126,28 +114,19 @@ def __init__( self._check_data(data) n_pca, rank_threshold = self._parse_n_pca_threshold(data, n_pca, rank_threshold) - try: - pd - except NameError: - # pandas not installed - pass - else: - if utils.is_SparseDataFrame(data): - data = data.to_coo() - elif isinstance(data, pd.DataFrame): - try: - data = data.sparse.to_coo() - except AttributeError: - data = np.array(data) - try: - anndata - except NameError: - # anndata not installed - pass - else: - if isinstance(data, anndata.AnnData): - data = data.X + if utils.is_SparseDataFrame(data): + data = data.to_coo() + elif utils.is_DataFrame(data): + try: + # sparse data + data = data.sparse.to_coo() + except AttributeError: + # dense data + data = np.array(data) + elif utils.is_Anndata(data): + data = data.X + self.data = data self.n_pca = n_pca self.rank_threshold = rank_threshold @@ -584,18 +563,14 @@ def symmetrize_kernel(self, K): K = K.multiply(K.T) elif self.kernel_symm == "mnn": _logger.debug("Using mnn symmetrization (theta = {}).".format(self.theta)) - K = self.theta * utils.elementwise_minimum(K, K.T) + ( + K = self.theta * matrix.elementwise_minimum(K, K.T) + ( 1 - self.theta - ) * utils.elementwise_maximum(K, K.T) + ) * matrix.elementwise_maximum(K, K.T) elif self.kernel_symm is None: _logger.debug("Using no symmetrization.") pass else: - # this should never happen - raise ValueError( - "Expected kernel_symm in ['+', '*', 'mnn' or None]. " - "Got {}".format(self.theta) - ) + raise NotImplementedError return K def apply_anisotropy(self, K): @@ -683,7 +658,9 @@ def kernel_degree(self): try: return self._kernel_degree except AttributeError: - self._kernel_degree = utils.to_array(self.kernel.sum(axis=1)).reshape(-1, 1) + self._kernel_degree = matrix.to_array(self.kernel.sum(axis=1)).reshape( + -1, 1 + ) return self._kernel_degree @property @@ -814,7 +791,7 @@ def to_igraph(self, attribute="weight", **kwargs): """ try: import igraph as ig - except ImportError: + except ImportError: # pragma: no cover raise ImportError( "Please install igraph with " "`pip install --user python-igraph`." ) @@ -823,12 +800,12 @@ def to_igraph(self, attribute="weight", **kwargs): except AttributeError: # not a pygsp graph W = self.K.copy() - W = utils.set_diagonal(W, 0) + W = matrix.set_diagonal(W, 0) sources, targets = W.nonzero() edgelist = list(zip(sources, targets)) g = ig.Graph(W.shape[0], edgelist, **kwargs) weights = W[W.nonzero()] - weights = utils.to_array(weights) + weights = matrix.to_array(weights) g.es[attribute] = weights.flatten().tolist() return g @@ -987,7 +964,7 @@ def _build_weight_from_kernel(self, kernel): weight = kernel.copy() self._diagonal = weight.diagonal().copy() - weight = utils.set_diagonal(weight, 0) + weight = matrix.set_diagonal(weight, 0) return weight diff --git a/graphtools/estimator.py b/graphtools/estimator.py new file mode 100644 index 0000000..5cb130f --- /dev/null +++ b/graphtools/estimator.py @@ -0,0 +1,422 @@ +import numpy as np +import tasklogger +import pygsp +import abc + +from functools import partial +from scipy import sparse + +from . import api, graphs, base, utils, matrix + + +def attribute(attr, default=None, doc=None, on_set=None): + def getter(self, attr): + try: + return getattr(self, "_" + attr) + except AttributeError: + return default + + def setter(self, value, attr, on_set=None): + if on_set is not None: + if callable(on_set): + on_set = [on_set] + for fn in on_set: + fn(**{attr: value}) + setattr(self, "_" + attr, value) + + return property( + fget=partial(getter, attr=attr), + fset=partial(setter, attr=attr, on_set=on_set), + doc=doc, + ) + + +_logger = tasklogger.get_tasklogger("graphtools") + + +class GraphEstimator(object, metaclass=abc.ABCMeta): + """Estimator which builds a graphtools Graph + + Parameters + ---------- + + knn : int, optional, default: 5 + number of nearest neighbors on which to build kernel + + decay : int, optional, default: 40 + sets decay rate of kernel tails. + If None, alpha decaying kernel is not used + + n_landmark : int, optional, default: None + number of landmarks to use in graph construction + + n_pca : int, optional, default: 100 + Number of principal components to use for calculating + neighborhoods. For extremely large datasets, using + n_pca < 20 allows neighborhoods to be calculated in + roughly log(n_samples) time. + + distance : string, optional, default: 'euclidean' + recommended values: 'euclidean', 'cosine', 'precomputed' + Any metric from `scipy.spatial.distance` can be used + distance metric for building kNN graph. Custom distance + functions of form `f(x, y) = d` are also accepted. If 'precomputed', + `data` should be an n_samples x n_samples distance or + affinity matrix. Distance matrices are assumed to have zeros + down the diagonal, while affinity matrices are assumed to have + non-zero values down the diagonal. This is detected automatically using + `data[0,0]`. You can override this detection with + `distance='precomputed_distance'` or `distance='precomputed_affinity'`. + + n_jobs : integer, optional, default: 1 + The number of jobs to use for the computation. + If -1 all CPUs are used. If 1 is given, no parallel computing code is + used at all, which is useful for debugging. + For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for + n_jobs = -2, all CPUs but one are used + + random_state : integer or numpy.RandomState, optional, default: None + If an integer is given, it fixes the seed + Defaults to the global `numpy` random number generator + + verbose : `int` or `boolean`, optional (default: 1) + If `True` or `> 0`, print status messages + + n_svd : int, optional (default: 100) + number of singular vectors to compute for landmarking + + thresh : float, optional (default: 1e-4) + threshold below which to truncate kernel + + kwargs : additional arguments for graphtools.Graph + + Attributes + ---------- + + graph : graphtools.Graph + """ + + X = attribute("X", doc="Stored input data") + graph = attribute("graph", doc="graphtools Graph object") + + @graph.setter + def graph(self, G): + self._graph = G + if G is None: + self._reset_graph() + + n_pca = attribute( + "n_pca", + default=100, + on_set=partial(utils.check_if_not, None, utils.check_positive, utils.check_int), + ) + random_state = attribute("random_state") + + knn = attribute("knn", default=5, on_set=[utils.check_positive, utils.check_int]) + decay = attribute("decay", default=40, on_set=utils.check_positive) + distance = attribute( + "distance", + default="euclidean", + on_set=partial( + utils.check_in, + [ + "euclidean", + "precomputed", + "cosine", + "correlation", + "cityblock", + "l1", + "l2", + "manhattan", + "braycurtis", + "canberra", + "chebyshev", + "dice", + "hamming", + "jaccard", + "kulsinski", + "mahalanobis", + "matching", + "minkowski", + "rogerstanimoto", + "russellrao", + "seuclidean", + "sokalmichener", + "sokalsneath", + "sqeuclidean", + "yule", + "precomputed_affinity", + "precomputed_distance", + ], + ), + ) + n_svd = attribute( + "n_svd", + default=100, + on_set=partial(utils.check_if_not, None, utils.check_positive, utils.check_int), + ) + n_jobs = attribute( + "n_jobs", on_set=partial(utils.check_if_not, None, utils.check_int) + ) + verbose = attribute("verbose", default=0) + thresh = attribute( + "thresh", + default=1e-4, + on_set=partial(utils.check_if_not, 0, utils.check_positive), + ) + + n_landmark = attribute("n_landmark") + + @n_landmark.setter + def n_landmark(self, n_landmark): + self._n_landmark = n_landmark + utils.check_if_not( + None, utils.check_positive, utils.check_int, n_landmark=n_landmark + ) + self._update_n_landmark(n_landmark) + + def _update_n_landmark(self, n_landmark): + if self.graph is not None: + n_landmark = self._parse_n_landmark(self.graph.data_nu, n_landmark) + if ( + n_landmark is None and isinstance(self.graph, graphs.LandmarkGraph) + ) or ( + n_landmark is not None + and not isinstance(self.graph, graphs.LandmarkGraph) + ): + # new graph but the same kernel + # there may be a better way to do this + kernel = self.graph.kernel + self.graph = None + self.fit(self.X, initialize=False) + self.graph._kernel = kernel + + def __init__( + self, + knn=5, + decay=40, + n_pca=100, + n_landmark=None, + random_state=None, + distance="euclidean", + n_svd=100, + n_jobs=1, + verbose=1, + thresh=1e-4, + **kwargs + ): + + if verbose is True: + verbose = 1 + elif verbose is False: + verbose = 0 + + self.n_pca = n_pca + self.n_landmark = n_landmark + self.random_state = random_state + self.knn = knn + self.decay = decay + self.distance = distance + self.n_svd = n_svd + self.n_jobs = n_jobs + self.verbose = verbose + self.thresh = thresh + self.kwargs = kwargs + self.logger = _logger + _logger.set_level(self.verbose) + + def set_params(self, **params): + for p in params: + if not getattr(self, p) == params[p]: + setattr(self, p, params[p]) + self._set_graph_params(**params) + + def _set_graph_params(self, **params): + if self.graph is not None: + try: + if "n_pca" in params: + params["n_pca"] = self._parse_n_pca( + self.graph.data_nu, params["n_pca"] + ) + if "n_svd" in params: + params["n_svd"] = self._parse_n_svd( + self.graph.data_nu, params["n_svd"] + ) + if "n_landmark" in params: + params["n_landmark"] = self._parse_n_landmark( + self.graph.data_nu, params["n_landmark"] + ) + self.graph.set_params(**params) + except ValueError as e: + _logger.debug("Reset graph due to {}".format(str(e))) + self.graph = None + + @abc.abstractmethod + def _reset_graph(self): + """Trigger a reset of self.graph + + Any downstream effects of resetting the graph should override this function + """ + raise NotImplementedError + + def _detect_precomputed_matrix_type(self, X): + if isinstance(X, (sparse.coo_matrix, sparse.dia_matrix)): + X = X.tocsr() + if X[0, 0] == 0: + return "distance" + else: + return "affinity" + + @staticmethod + def _parse_n_landmark(X, n_landmark): + if n_landmark is not None and n_landmark >= X.shape[0]: + return None + else: + return n_landmark + + @staticmethod + def _parse_n_pca(X, n_pca): + if n_pca is not None and n_pca >= min(X.shape): + return None + else: + return n_pca + + @staticmethod + def _parse_n_svd(X, n_svd): + if n_svd is not None and n_svd >= X.shape[0]: + return X.shape[0] - 1 + else: + return n_svd + + def _parse_input(self, X): + # passing graphs as input + if isinstance(X, base.BaseGraph): + # we can keep this graph + self.graph = X + X = X.data + # immutable graph properties override operator + n_pca = self.graph.n_pca + self.knn = self.graph.knn + self.decay = self.graph.decay + self.distance = self.graph.distance + self.thresh = self.graph.thresh + update_graph = False + if isinstance(self.graph, graphs.TraditionalGraph): + precomputed = self.graph.precomputed + else: + precomputed = None + elif isinstance(X, pygsp.graphs.Graph): + # convert pygsp to graphtools + self.graph = None + X = X.W + precomputed = "adjacency" + update_graph = False + n_pca = None + else: + # data matrix + update_graph = True + if utils.is_Anndata(X): + X = X.X + if not callable(self.distance) and self.distance.startswith("precomputed"): + if self.distance == "precomputed": + # automatic detection + precomputed = self._detect_precomputed_matrix_type(X) + elif self.distance in ["precomputed_affinity", "precomputed_distance"]: + precomputed = self.distance.split("_")[1] + else: + raise NotImplementedError + n_pca = None + else: + precomputed = None + n_pca = self._parse_n_pca(X, self.n_pca) + return ( + X, + n_pca, + self._parse_n_landmark(X, self.n_landmark), + precomputed, + update_graph, + ) + + def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs): + if self.X is not None and not matrix.matrix_is_equivalent(X, self.X): + """ + If the same data is used, we can reuse existing kernel and + diffusion matrices. Otherwise we have to recompute. + """ + self.graph = None + else: + self._update_n_landmark(n_landmark) + self._set_graph_params( + n_pca=n_pca, + precomputed=precomputed, + n_landmark=n_landmark, + random_state=self.random_state, + knn=self.knn, + decay=self.decay, + distance=self.distance, + n_svd=self._parse_n_svd(self.X, self.n_svd), + n_jobs=self.n_jobs, + thresh=self.thresh, + verbose=self.verbose, + **(self.kwargs) + ) + if self.graph is not None: + _logger.info("Using precomputed graph and diffusion operator...") + + def fit(self, X, **kwargs): + """Computes the graph + + Parameters + ---------- + X : array, shape=[n_samples, n_features] + input data with `n_samples` samples and `n_dimensions` + dimensions. Accepted data types: `numpy.ndarray`, + `scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`. If + `knn_dist` is 'precomputed', `data` should be a n_samples x + n_samples distance or affinity matrix + + kwargs : additional arguments for graphtools.Graph + + Returns + ------- + self : graphtools.estimator.GraphEstimator + """ + X, n_pca, n_landmark, precomputed, update_graph = self._parse_input(X) + + if precomputed is None: + _logger.info( + "Building graph on {} samples and {} features.".format( + X.shape[0], X.shape[1] + ) + ) + else: + _logger.info( + "Building graph on precomputed {} matrix with {} samples.".format( + precomputed, X.shape[0] + ) + ) + + if self.graph is not None and update_graph: + self._update_graph(X, precomputed, n_pca, n_landmark) + + self.X = X + + if self.graph is None: + with _logger.task("graph and diffusion operator"): + self.graph = api.Graph( + X, + n_pca=n_pca, + precomputed=precomputed, + n_landmark=n_landmark, + random_state=self.random_state, + knn=self.knn, + decay=self.decay, + distance=self.distance, + n_svd=self._parse_n_svd(self.X, self.n_svd), + n_jobs=self.n_jobs, + thresh=self.thresh, + verbose=self.verbose, + **(self.kwargs), + **kwargs + ) + return self diff --git a/graphtools/graphs.py b/graphtools/graphs.py index c51851c..3d6fbe7 100644 --- a/graphtools/graphs.py +++ b/graphtools/graphs.py @@ -12,7 +12,7 @@ import warnings import tasklogger -from . import utils +from . import matrix, utils from .base import DataGraph, PyGSPGraph _logger = tasklogger.get_tasklogger("graphtools") @@ -983,7 +983,7 @@ def build_kernel(self): isinstance(K, sparse.dok_matrix) or isinstance(K, sparse.lil_matrix) ): K = K.tolil() - K = utils.set_diagonal(K, 1) + K = matrix.set_diagonal(K, 1) else: with _logger.task("affinities"): if sparse.issparse(self.data_nu): @@ -1110,7 +1110,7 @@ def build_kernel_to_data(self, Y, knn=None, bandwidth=None, bandwidth_scale=None @property def weighted(self): if self.precomputed is not None: - return not utils.nonzero_discrete(self.K, [0.5, 1]) + return not matrix.nonzero_discrete(self.K, [0.5, 1]) else: return super().weighted @@ -1333,7 +1333,7 @@ def build_kernel(self): else: K = np.zeros([self.data_nu.shape[0], self.data_nu.shape[0]]) for i, X in enumerate(self.subgraphs): - K = utils.set_submatrix( + K = matrix.set_submatrix( K, self.sample_idx == self.samples[i], self.sample_idx == self.samples[i], @@ -1358,7 +1358,7 @@ def build_kernel(self): Kij = Kij.multiply(scale[:, None]) else: Kij = Kij * scale[:, None] - K = utils.set_submatrix( + K = matrix.set_submatrix( K, self.sample_idx == self.samples[i], self.sample_idx == self.samples[j], diff --git a/graphtools/matrix.py b/graphtools/matrix.py new file mode 100644 index 0000000..8c818f2 --- /dev/null +++ b/graphtools/matrix.py @@ -0,0 +1,93 @@ +import numpy as np +import numbers + +from scipy import sparse + + +def if_sparse(sparse_func, dense_func, *args, **kwargs): + if sparse.issparse(args[0]): + for arg in args[1:]: + assert sparse.issparse(arg) + return sparse_func(*args, **kwargs) + else: + return dense_func(*args, **kwargs) + + +def sparse_minimum(X, Y): + return X.minimum(Y) + + +def sparse_maximum(X, Y): + return X.maximum(Y) + + +def elementwise_minimum(X, Y): + return if_sparse(sparse_minimum, np.minimum, X, Y) + + +def elementwise_maximum(X, Y): + return if_sparse(sparse_maximum, np.maximum, X, Y) + + +def dense_set_diagonal(X, diag): + X[np.diag_indices(X.shape[0])] = diag + return X + + +def sparse_set_diagonal(X, diag): + cls = type(X) + if not isinstance(X, (sparse.lil_matrix, sparse.dia_matrix)): + X = X.tocoo() + X.setdiag(diag) + return cls(X) + + +def set_diagonal(X, diag): + return if_sparse(sparse_set_diagonal, dense_set_diagonal, X, diag=diag) + + +def set_submatrix(X, i, j, values): + X[np.ix_(i, j)] = values + return X + + +def sparse_nonzero_discrete(X, values): + if isinstance( + X, (sparse.bsr_matrix, sparse.dia_matrix, sparse.dok_matrix, sparse.lil_matrix) + ): + X = X.tocsr() + return dense_nonzero_discrete(X.data, values) + + +def dense_nonzero_discrete(X, values): + result = np.full_like(X, False, dtype=bool) + for value in values: + result = np.logical_or(result, X == value) + return np.all(result) + + +def nonzero_discrete(X, values): + if isinstance(values, numbers.Number): + values = [values] + if 0 not in values: + values.append(0) + return if_sparse(sparse_nonzero_discrete, dense_nonzero_discrete, X, values=values) + + +def to_array(X): + if sparse.issparse(X): + X = X.toarray() + elif isinstance(X, np.matrix): + X = X.A + return X + + +def matrix_is_equivalent(X, Y): + """ + Checks matrix equivalence with numpy, scipy and pandas + """ + return X is Y or ( + isinstance(X, Y.__class__) + and X.shape == Y.shape + and np.sum((X != Y).sum()) == 0 + ) diff --git a/graphtools/utils.py b/graphtools/utils.py index 8a0d08c..55e2bd3 100644 --- a/graphtools/utils.py +++ b/graphtools/utils.py @@ -1,106 +1,240 @@ -import numpy as np -from scipy import sparse import numbers import warnings +from deprecated import deprecated +from . import matrix try: import pandas as pd -except ImportError: +except ImportError: # pragma: no cover # pandas not installed pass +try: + import anndata +except ImportError: # pragma: no cover + # anndata not installed + pass + -def if_sparse(sparse_func, dense_func, *args, **kwargs): - if sparse.issparse(args[0]): - for arg in args[1:]: - assert sparse.issparse(arg) - return sparse_func(*args, **kwargs) - else: - return dense_func(*args, **kwargs) +def is_DataFrame(X): + try: + return isinstance(X, pd.DataFrame) + except NameError: # pragma: no cover + # pandas not installed + return False -def sparse_minimum(X, Y): - return X.minimum(Y) +def is_SparseDataFrame(X): + try: + pd + except NameError: # pragma: no cover + # pandas not installed + return False + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The SparseDataFrame class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version", + FutureWarning, + ) + try: + return isinstance(X, pd.SparseDataFrame) + except AttributeError: + return False -def sparse_maximum(X, Y): - return X.maximum(Y) +def is_Anndata(X): + try: + return isinstance(X, anndata.AnnData) + except NameError: # pragma: no cover + # anndata not installed + return False -def elementwise_minimum(X, Y): - return if_sparse(sparse_minimum, np.minimum, X, Y) +def check_greater(x, **params): + """Check that parameters are greater than x as expected + Parameters + ---------- -def elementwise_maximum(X, Y): - return if_sparse(sparse_maximum, np.maximum, X, Y) + x : excepted boundary + Checks not run if parameters are greater than x + Raises + ------ + ValueError : unacceptable choice of parameters + """ + for p in params: + if not isinstance(params[p], numbers.Number) or params[p] <= x: + raise ValueError("Expected {} > {}, got {}".format(p, x, params[p])) -def dense_set_diagonal(X, diag): - X[np.diag_indices(X.shape[0])] = diag - return X +def check_positive(**params): + """Check that parameters are positive as expected -def sparse_set_diagonal(X, diag): - cls = type(X) - if not isinstance(X, (sparse.lil_matrix, sparse.dia_matrix)): - X = X.tocoo() - X.setdiag(diag) - return cls(X) + Raises + ------ + ValueError : unacceptable choice of parameters + """ + return check_greater(0, **params) -def set_diagonal(X, diag): - return if_sparse(sparse_set_diagonal, dense_set_diagonal, X, diag=diag) +def check_int(**params): + """Check that parameters are integers as expected + Raises + ------ + ValueError : unacceptable choice of parameters + """ + for p in params: + if not isinstance(params[p], numbers.Integral): + raise ValueError("Expected {} integer, got {}".format(p, params[p])) -def set_submatrix(X, i, j, values): - X[np.ix_(i, j)] = values - return X +def check_if_not(x, *checks, **params): + """Run checks only if parameters are not equal to a specified value -def sparse_nonzero_discrete(X, values): - if isinstance( - X, (sparse.bsr_matrix, sparse.dia_matrix, sparse.dok_matrix, sparse.lil_matrix) - ): - X = X.tocsr() - return dense_nonzero_discrete(X.data, values) + Parameters + ---------- + x : excepted value + Checks not run if parameters equal x -def dense_nonzero_discrete(X, values): - result = np.full_like(X, False, dtype=bool) - for value in values: - result = np.logical_or(result, X == value) - return np.all(result) + checks : function + Unnamed arguments, check functions to be run + params : object + Named arguments, parameters to be checked -def nonzero_discrete(X, values): - if isinstance(values, numbers.Number): - values = [values] - if 0 not in values: - values.append(0) - return if_sparse(sparse_nonzero_discrete, dense_nonzero_discrete, X, values=values) + Raises + ------ + ValueError : unacceptable choice of parameters + """ + for p in params: + if params[p] is not x and params[p] != x: + [check(**{p: params[p]}) for check in checks] -def to_array(X): - if sparse.issparse(X): - X = X.toarray() - elif isinstance(X, np.matrix): - X = X.A - return X +def check_in(choices, **params): + """Checks parameters are in a list of allowed parameters + Parameters + ---------- -def is_SparseDataFrame(X): - try: - pd - except NameError: - # pandas not installed - return False - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - "The SparseDataFrame class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version", - FutureWarning, - ) - try: - return isinstance(X, pd.SparseDataFrame) - except AttributeError: - return False + choices : array-like, accepted values + + params : object + Named arguments, parameters to be checked + + Raises + ------ + ValueError : unacceptable choice of parameters + """ + for p in params: + if params[p] not in choices: + raise ValueError( + "{} value {} not recognized. Choose from {}".format( + p, params[p], choices + ) + ) + + +def check_between(v_min, v_max, **params): + """Checks parameters are in a specified range + + Parameters + ---------- + + v_min : float, minimum allowed value (inclusive) + + v_max : float, maximum allowed value (inclusive) + + params : object + Named arguments, parameters to be checked + + Raises + ------ + ValueError : unacceptable choice of parameters + """ + check_greater(v_min, v_max=v_max) + for p in params: + if params[p] < v_min or params[p] > v_max: + raise ValueError( + "Expected {} between {} and {}, " + "got {}".format(p, v_min, v_max, params[p]) + ) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.if_sparse instead") +def if_sparse(*args, **kwargs): + return matrix.if_sparse(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.sparse_minimum instead") +def sparse_minimum(*args, **kwargs): + return matrix.sparse_minimum(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.sparse_maximum instead") +def sparse_maximum(*args, **kwargs): + return matrix.sparse_maximum(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.elementwise_minimum instead") +def elementwise_minimum(*args, **kwargs): + return matrix.elementwise_minimum(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.elementwise_maximum instead") +def elementwise_maximum(*args, **kwargs): + return matrix.elementwise_maximum(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.dense_set_diagonal instead") +def dense_set_diagonal(*args, **kwargs): + return matrix.dense_set_diagonal(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.sparse_set_diagonal instead") +def sparse_set_diagonal(*args, **kwargs): + return matrix.sparse_set_diagonal(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.set_diagonal instead") +def set_diagonal(*args, **kwargs): + return matrix.set_diagonal(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.set_submatrix instead") +def set_submatrix(*args, **kwargs): + return matrix.set_submatrix(*args, **kwargs) + + +@deprecated( + version="1.5.0", reason="Use graphtools.matrix.sparse_nonzero_discrete instead" +) +def sparse_nonzero_discrete(*args, **kwargs): + return matrix.sparse_nonzero_discrete(*args, **kwargs) + + +@deprecated( + version="1.5.0", reason="Use graphtools.matrix.dense_nonzero_discrete instead" +) +def dense_nonzero_discrete(*args, **kwargs): + return matrix.dense_nonzero_discrete(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.nonzero_discrete instead") +def nonzero_discrete(*args, **kwargs): + return matrix.nonzero_discrete(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.to_array instead") +def to_array(*args, **kwargs): + return matrix.to_array(*args, **kwargs) + + +@deprecated( + version="1.5.0", reason="Use graphtools.matrix.matrix_is_equivalent instead" +) +def matrix_is_equivalent(*args, **kwargs): + return matrix.matrix_is_equivalent(*args, **kwargs) diff --git a/graphtools/version.py b/graphtools/version.py index daa50c7..5b60188 100644 --- a/graphtools/version.py +++ b/graphtools/version.py @@ -1 +1 @@ -__version__ = "1.4.2" +__version__ = "1.5.0" diff --git a/requirements.txt b/requirements.txt index 08fcd1b..c31163d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ pygsp>=>=0.5.1 scikit-learn>=0.20.0 future tasklogger>=1.0 +Deprecated diff --git a/setup.py b/setup.py index 4d7be50..0586273 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,7 @@ "scikit-learn>=0.20.0", "future", "tasklogger>=1.0", + "Deprecated", ] test_requires = [ @@ -19,6 +20,7 @@ "coveralls", "python-igraph", "parameterized", + "anndata", ] if sys.version_info[0] == 3: diff --git a/test/test_estimator.py b/test/test_estimator.py new file mode 100644 index 0000000..179abda --- /dev/null +++ b/test/test_estimator.py @@ -0,0 +1,124 @@ +import graphtools +import graphtools.estimator +import pygsp +import anndata +import warnings +import numpy as np +from load_tests import data, assert_raises_message +from scipy import sparse +from parameterized import parameterized + + +class Estimator(graphtools.estimator.GraphEstimator): + def _reset_graph(self): + self.reset = True + + +def test_estimator(): + E = Estimator(verbose=True) + assert E.verbose == 1 + E = Estimator(verbose=False) + assert E.verbose == 0 + E.fit(data) + assert np.all(E.X == data) + assert isinstance(E.graph, graphtools.graphs.kNNGraph) + assert not isinstance(E.graph, graphtools.graphs.LandmarkGraph) + assert not hasattr(E, "reset") + # convert non landmark to landmark + E.set_params(n_landmark=data.shape[0] // 2) + assert E.reset + assert isinstance(E.graph, graphtools.graphs.LandmarkGraph) + del E.reset + # convert landmark to non landmark + E.set_params(n_landmark=None) + assert E.reset + assert not isinstance(E.graph, graphtools.graphs.LandmarkGraph) + del E.reset + # change parameters that force reset + E.set_params(knn=E.knn * 2) + assert E.reset + assert E.graph is None + + +@parameterized( + [ + ("precomputed", 1 - np.eye(10), "distance"), + ("precomputed", np.eye(10), "affinity"), + ("precomputed", sparse.coo_matrix(1 - np.eye(10)), "distance"), + ("precomputed", sparse.eye(10), "affinity"), + ("precomputed_affinity", 1 - np.eye(10), "affinity"), + ("precomputed_distance", np.ones((10, 10)), "distance"), + ] +) +def test_precomputed(distance, X, precomputed): + E = Estimator(verbose=False, distance=distance) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="K should have a non-zero diagonal") + E.fit(X) + assert isinstance(E.graph, graphtools.graphs.TraditionalGraph) + assert E.graph.precomputed == precomputed + + +def test_graph_input(): + X = np.random.normal(0, 1, (10, 2)) + E = Estimator(verbose=0) + G = graphtools.Graph(X) + E.fit(G) + assert E.graph == G + G = graphtools.Graph(X, knn=2, decay=5, distance="cosine", thresh=0) + E.fit(G) + assert E.graph == G + assert E.knn == G.knn + assert E.decay == G.decay + assert E.distance == G.distance + assert E.thresh == G.thresh + W = G.K - np.eye(X.shape[0]) + G = pygsp.graphs.Graph(W) + E.fit(G, use_pygsp=True) + assert np.all(E.graph.W.toarray() == W) + + +def test_pca(): + X = np.random.normal(0, 1, (10, 6)) + E = Estimator(verbose=0) + E.fit(X) + G = E.graph + E.set_params(n_pca=100) + E.fit(X) + assert E.graph is G + E.set_params(n_pca=3) + E.fit(X) + assert E.graph is not G + assert E.graph.n_pca == 3 + + +def test_anndata_input(): + X = np.random.normal(0, 1, (10, 2)) + E = Estimator(verbose=0) + E.fit(X) + E2 = Estimator(verbose=0) + E2.fit(anndata.AnnData(X)) + np.testing.assert_allclose( + E.graph.K.toarray(), E2.graph.K.toarray(), rtol=1e-6, atol=2e-7 + ) + + +def test_new_input(): + X = np.random.normal(0, 1, (10, 2)) + X2 = np.random.normal(0, 1, (10, 2)) + E = Estimator(verbose=0) + E.fit(X) + G = E.graph + E.fit(X) + assert E.graph is G + E.fit(X.copy()) + assert E.graph is G + E.n_landmark = 500 + E.fit(X) + assert E.graph is G + E.n_landmark = 5 + E.fit(X) + assert np.all(E.graph.K.toarray() == G.K.toarray()) + G = E.graph + E.fit(X2) + assert E.graph is not G diff --git a/test/test_matrix.py b/test/test_matrix.py new file mode 100644 index 0000000..aac45a7 --- /dev/null +++ b/test/test_matrix.py @@ -0,0 +1,166 @@ +import graphtools.matrix +import graphtools.utils +from parameterized import parameterized +from scipy import sparse +import numpy as np +import graphtools +from load_tests import data +from load_tests import assert_warns_message + + +@parameterized( + [ + (np.array,), + (sparse.csr_matrix,), + (sparse.csc_matrix,), + (sparse.bsr_matrix,), + (sparse.lil_matrix,), + (sparse.coo_matrix,), + ] +) +def test_nonzero_discrete(matrix_class): + X = np.random.choice([0, 1, 2], p=[0.95, 0.025, 0.025], size=(100, 100)) + X = matrix_class(X) + assert graphtools.matrix.nonzero_discrete(X, [1, 2]) + assert not graphtools.matrix.nonzero_discrete(X, [1, 3]) + + +@parameterized([(0,), (1e-4,)]) +def test_nonzero_discrete_knngraph(thresh): + G = graphtools.Graph(data, n_pca=10, knn=5, decay=None, thresh=thresh) + assert graphtools.matrix.nonzero_discrete(G.K, [0.5, 1]) + + +@parameterized([(0,), (1e-4,)]) +def test_nonzero_discrete_decay_graph(thresh): + G = graphtools.Graph(data, n_pca=10, knn=5, decay=15, thresh=thresh) + assert not graphtools.matrix.nonzero_discrete(G.K, [0.5, 1]) + + +def test_nonzero_discrete_constant(): + assert graphtools.matrix.nonzero_discrete(2, [1, 2]) + assert not graphtools.matrix.nonzero_discrete(2, [1, 3]) + + +def test_if_sparse_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) if_sparse. (Use graphtools.matrix.if_sparse instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.if_sparse(lambda x: x, lambda x: x, np.zeros((4, 4))) + + +def test_sparse_minimum_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) sparse_minimum. (Use graphtools.matrix.sparse_minimum instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.sparse_minimum( + sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4)) + ) + + +def test_sparse_maximum_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) sparse_maximum. (Use graphtools.matrix.sparse_maximum instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.sparse_maximum( + sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4)) + ) + + +def test_elementwise_minimum_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) elementwise_minimum. (Use graphtools.matrix.elementwise_minimum instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.elementwise_minimum( + sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4)) + ) + + +def test_elementwise_maximum_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) elementwise_maximum. (Use graphtools.matrix.elementwise_maximum instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.elementwise_maximum( + sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4)) + ) + + +def test_dense_set_diagonal_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) dense_set_diagonal. (Use graphtools.matrix.dense_set_diagonal instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.dense_set_diagonal(np.zeros((4, 4)), 1) + + +def test_sparse_set_diagonal_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) sparse_set_diagonal. (Use graphtools.matrix.sparse_set_diagonal instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.sparse_set_diagonal(sparse.csr_matrix((4, 4)), 1) + + +def test_set_diagonal_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) set_diagonal. (Use graphtools.matrix.set_diagonal instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.set_diagonal(np.zeros((4, 4)), 1) + + +def test_set_submatrix_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) set_submatrix. (Use graphtools.matrix.set_submatrix instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.set_submatrix( + sparse.lil_matrix((4, 4)), [1, 2], [0, 1], np.array([[1, 2], [3, 4]]) + ) + + +def test_sparse_nonzero_discrete_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) sparse_nonzero_discrete. (Use graphtools.matrix.sparse_nonzero_discrete instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.sparse_nonzero_discrete(sparse.csr_matrix((4, 4)), [1]) + + +def test_dense_nonzero_discrete_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) dense_nonzero_discrete. (Use graphtools.matrix.dense_nonzero_discrete instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.dense_nonzero_discrete(np.zeros((4, 4)), [1]) + + +def test_nonzero_discrete_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) nonzero_discrete. (Use graphtools.matrix.nonzero_discrete instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.nonzero_discrete(np.zeros((4, 4)), [1]) + + +def test_to_array_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) to_array. (Use graphtools.matrix.to_array instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.to_array([1]) + + +def test_matrix_is_equivalent_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) matrix_is_equivalent. (Use graphtools.matrix.matrix_is_equivalent instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.matrix_is_equivalent( + sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4)) + ) diff --git a/test/test_utils.py b/test/test_utils.py index 0d72174..1aadd82 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,40 +1,39 @@ -import graphtools.utils -from parameterized import parameterized -from scipy import sparse -import numpy as np import graphtools -from load_tests import data +from load_tests import assert_raises_message -@parameterized( - [ - (np.array,), - (sparse.csr_matrix,), - (sparse.csc_matrix,), - (sparse.bsr_matrix,), - (sparse.lil_matrix,), - (sparse.coo_matrix,), - ] -) -def test_nonzero_discrete(matrix_class): - X = np.random.choice([0, 1, 2], p=[0.95, 0.025, 0.025], size=(100, 100)) - X = matrix_class(X) - assert graphtools.utils.nonzero_discrete(X, [1, 2]) - assert not graphtools.utils.nonzero_discrete(X, [1, 3]) +def test_check_in(): + graphtools.utils.check_in(["hello", "world"], foo="hello") + with assert_raises_message( + ValueError, "foo value bar not recognized. Choose from ['hello', 'world']" + ): + graphtools.utils.check_in(["hello", "world"], foo="bar") -@parameterized([(0,), (1e-4,)]) -def test_nonzero_discrete_knngraph(thresh): - G = graphtools.Graph(data, n_pca=10, knn=5, decay=None, thresh=thresh) - assert graphtools.utils.nonzero_discrete(G.K, [0.5, 1]) +def test_check_int(): + graphtools.utils.check_int(foo=5) + graphtools.utils.check_int(foo=-5) + with assert_raises_message(ValueError, "Expected foo integer, got 5.3"): + graphtools.utils.check_int(foo=5.3) -@parameterized([(0,), (1e-4,)]) -def test_nonzero_discrete_decay_graph(thresh): - G = graphtools.Graph(data, n_pca=10, knn=5, decay=15, thresh=thresh) - assert not graphtools.utils.nonzero_discrete(G.K, [0.5, 1]) +def test_check_positive(): + graphtools.utils.check_positive(foo=5) + with assert_raises_message(ValueError, "Expected foo > 0, got -5"): + graphtools.utils.check_positive(foo=-5) + with assert_raises_message(ValueError, "Expected foo > 0, got 0"): + graphtools.utils.check_positive(foo=0) -def test_nonzero_discrete_constant(): - assert graphtools.utils.nonzero_discrete(2, [1, 2]) - assert not graphtools.utils.nonzero_discrete(2, [1, 3]) +def test_check_if_not(): + graphtools.utils.check_if_not(-5, graphtools.utils.check_positive, foo=-5) + with assert_raises_message(ValueError, "Expected foo > 0, got -5"): + graphtools.utils.check_if_not(-4, graphtools.utils.check_positive, foo=-5) + + +def test_check_between(): + graphtools.utils.check_between(-5, -3, foo=-4) + with assert_raises_message(ValueError, "Expected foo between -5 and -3, got -6"): + graphtools.utils.check_between(-5, -3, foo=-6) + with assert_raises_message(ValueError, "Expected v_max > -3, got -5"): + graphtools.utils.check_between(-3, -5, foo=-6)