diff --git a/.coveragerc b/.coveragerc index 82ed0b8..b88aee5 100644 --- a/.coveragerc +++ b/.coveragerc @@ -15,5 +15,3 @@ exclude_lines = # Don't complain if non-runnable code isn't run: if 0: if __name__ == .__main__. - - diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..df20a5f --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,62 @@ +name: pre-commit +on: + push: + branches-ignore: + - 'master' + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + pre-commit: + runs-on: ubuntu-latest + + if: >- + !endsWith(github.event.head_commit.message, '# ci skip') && + ( + startsWith(github.ref, 'refs/heads') || + github.event.pull_request.draft == false + ) + + steps: + + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Cache pre-commit + uses: actions/cache@v3 + with: + path: ~/.cache/pre-commit + key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}- + + - name: Run pre-commit + id: precommit + uses: pre-commit/action@v3.0.0 + continue-on-error: true + + - name: Commit files + if: steps.precommit.outcome == 'failure' && startsWith(github.ref, 'refs/heads') + run: | + if [[ `git status --porcelain --untracked-files=no` ]]; then + git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + git add . + git checkout -- .github/workflows + git commit -m "pre-commit" -a + fi + shell: bash -ex {0} + + - name: Push changes + if: steps.precommit.outcome == 'failure' && startsWith(github.ref, 'refs/heads') + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ${{ github.ref }} + + - name: Check pre-commit + if: steps.precommit.outcome == 'failure' + uses: pre-commit/action@v3.0.0 diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml new file mode 100644 index 0000000..cc5dac8 --- /dev/null +++ b/.github/workflows/run_tests.yml @@ -0,0 +1,77 @@ +name: Unit Tests + +on: + push: + branches-ignore: + - 'test_deploy' + pull_request: + branches: + - '*' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + + run_tester: + runs-on: ${{ matrix.config.os }} + if: "!contains(github.event.head_commit.message, 'ci skip')" + + strategy: + fail-fast: false + matrix: + config: + - {name: '3.10', os: ubuntu-latest, python: '3.10' } + - {name: '3.9', os: ubuntu-latest, python: '3.9' } + - {name: '3.8', os: ubuntu-latest, python: '3.8' } + - {name: '3.7', os: ubuntu-latest, python: '3.7' } + + steps: + + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Install system dependencies + if: runner.os == 'Linux' + run: | + sudo apt-get update -qq + sudo apt-get install -y libhdf5-dev libhdf5-serial-dev pandoc gfortran libblas-dev liblapack-dev llvm-dev + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.config.python }} + + - name: Cache Python packages + uses: actions/cache@v2 + with: + path: ${{ env.pythonLocation }} + key: ${{runner.os}}-${{ matrix.config.python }}-pip-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} + restore-keys: ${{runner.os}}-${{ matrix.config.python }}-pip-${{ env.pythonLocation }}- + + - name: Install package & dependencies + run: | + python -m pip install --upgrade pip + pip install -U wheel setuptools + pip install -U .[test] + python -c "import graphtools" + + - name: Run tests + run: | + nose2 -vvv + + - name: Coveralls + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COVERALLS_SERVICE_NAME: github + run: | + coveralls + + - name: Upload check results on fail + if: failure() + uses: actions/upload-artifact@master + with: + name: ${{ matrix.config.name }}_results + path: check diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..152000b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,26 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.3.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + exclude: \.(ai|gz)$ + - repo: https://github.com/timothycrosley/isort + rev: 5.6.4 + hooks: + - id: isort + - repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + args: ['--target-version=py36'] + - repo: https://github.com/pre-commit/mirrors-autopep8 + rev: v1.5.4 + hooks: + - id: autopep8 +# - repo: https://gitlab.com/pycqa/flake8 +# rev: 3.8.4 +# hooks: +# - id: flake8 +# additional_dependencies: ['hacking'] diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 3fd9f36..0000000 --- a/.travis.yml +++ /dev/null @@ -1,32 +0,0 @@ -language: python -python: - - '3.5' - - '3.6' - - '3.7' - - '3.8' -cache: - - pip - - apt -addons: - apt: - packages: libjs-mathjax -script: - - python -c "import graphtools" - - 'pip install -U .[test]' - - 'if [ "$TRAVIS_PYTHON_VERSION" != "3.5" ]; then black . --check --diff; fi' - - python setup.py test - - 'pip install -U .[doc]' - - cd doc; make html - - cd .. -deploy: - provider: pypi - user: scottgigante - password: '${PYPI_PASSWORD}' - distributions: sdist bdist_wheel - skip_existing: true - cleanup: false - 'on': - tags: true - branch: master -after_success: - - coveralls diff --git a/README.rst b/README.rst index 133409a..62ae08f 100644 --- a/README.rst +++ b/README.rst @@ -8,9 +8,9 @@ graphtools .. image:: https://anaconda.org/conda-forge/graphtools/badges/version.svg :target: https://anaconda.org/conda-forge/graphtools/ :alt: Latest Conda version -.. image:: https://api.travis-ci.com/KrishnaswamyLab/graphtools.svg?branch=master +.. image:: https://img.shields.io/github/workflow/status/KrishnaswamyLab/graphtools/Unit%20Tests/master?label=Github%20Actions :target: https://travis-ci.com/KrishnaswamyLab/graphtools - :alt: Travis CI Build + :alt: Github Actions Build .. image:: https://img.shields.io/readthedocs/graphtools.svg :target: https://graphtools.readthedocs.io/ :alt: Read the Docs diff --git a/autoblack.sh b/autoblack.sh index cfbaf2b..3642ac8 100644 --- a/autoblack.sh +++ b/autoblack.sh @@ -11,4 +11,3 @@ for file in \$files; do done EOF chmod +x .git/hooks/pre-commit - diff --git a/doc/Makefile b/doc/Makefile index acdb12e..c596553 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -17,4 +17,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/graphtools/__init__.py b/graphtools/__init__.py index 7384afc..0518e98 100644 --- a/graphtools/__init__.py +++ b/graphtools/__init__.py @@ -1,2 +1,4 @@ -from .api import Graph, from_igraph, read_pickle +from .api import from_igraph +from .api import Graph +from .api import read_pickle from .version import __version__ diff --git a/graphtools/api.py b/graphtools/api.py index b9a4b1b..d61e512 100644 --- a/graphtools/api.py +++ b/graphtools/api.py @@ -1,11 +1,12 @@ -import numpy as np -import warnings +from . import base +from . import graphs from scipy import sparse + +import numpy as np import pickle import pygsp import tasklogger - -from . import base, graphs +import warnings _logger = tasklogger.get_tasklogger("graphtools") @@ -36,7 +37,7 @@ def Graph( graphtype="auto", use_pygsp=False, initialize=True, - **kwargs + **kwargs, ): """Create a graph built on data. @@ -255,7 +256,7 @@ def Graph( else: msg = msg + " and PyGSP inheritance" - _logger.debug(msg) + _logger.log_debug(msg) class_names = [p.__name__.replace("Graph", "") for p in parent_classes] try: @@ -273,7 +274,7 @@ def Graph( pass # build graph and return - _logger.debug( + _logger.log_debug( "Initializing {} with arguments {}".format( parent_classes, ", ".join( diff --git a/graphtools/base.py b/graphtools/base.py index 4789b8a..1ed702a 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -1,21 +1,23 @@ -from future.utils import with_metaclass +from . import matrix +from . import utils from builtins import super from copy import copy as shallow_copy -import numpy as np -import abc -import pygsp +from future.utils import with_metaclass from inspect import signature -from sklearn.decomposition import PCA, TruncatedSVD -from sklearn.preprocessing import normalize -from sklearn.utils.graph import graph_shortest_path from scipy import sparse -import warnings +from scipy.sparse.csgraph import shortest_path +from sklearn.decomposition import PCA +from sklearn.decomposition import TruncatedSVD +from sklearn.preprocessing import normalize + +import abc import numbers +import numpy as np import pickle +import pygsp import sys import tasklogger - -from . import matrix, utils +import warnings _logger = tasklogger.get_tasklogger("graphtools") @@ -173,7 +175,7 @@ def _parse_n_pca_threshold(self, data, n_pca, rank_threshold): n_pca = None elif n_pca is True: # notify that we're going to estimate rank. n_pca = "auto" - _logger.info( + _logger.log_info( "Estimating n_pca from matrix rank. " "Supply an integer n_pca " "for fixed amount." @@ -237,7 +239,7 @@ def _reduce_data(self): if self.n_pca is not None and ( self.n_pca == "auto" or self.n_pca < self.data.shape[1] ): - with _logger.task("PCA"): + with _logger.log_task("PCA"): n_pca = self.data.shape[1] - 1 if self.n_pca == "auto" else self.n_pca if sparse.issparse(self.data): if ( @@ -269,7 +271,7 @@ def _reduce_data(self): "maximum singular value {} " "for the data matrix".format(threshold, smax) ) - _logger.info( + _logger.log_info( "Using rank estimate of {} as n_pca".format(self.n_pca) ) # reset the sklearn operator @@ -292,8 +294,7 @@ def _reduce_data(self): return data_nu def get_params(self): - """Get parameters from this object - """ + """Get parameters from this object""" return {"n_pca": self.n_pca, "random_state": self.random_state} def set_params(self, **params): @@ -469,7 +470,7 @@ def __init__( anisotropy=0, gamma=None, initialize=True, - **kwargs + **kwargs, ): if gamma is not None: warnings.warn( @@ -498,10 +499,10 @@ def __init__( self.anisotropy = anisotropy if initialize: - _logger.debug("Initializing kernel...") + _logger.log_debug("Initializing kernel...") self.K else: - _logger.debug("Not initializing kernel.") + _logger.log_debug("Not initializing kernel.") super().__init__(**kwargs) def _check_symmetrization(self, kernel_symm, theta): @@ -556,18 +557,20 @@ def _build_kernel(self): def symmetrize_kernel(self, K): # symmetrize if self.kernel_symm == "+": - _logger.debug("Using addition symmetrization.") + _logger.log_debug("Using addition symmetrization.") K = (K + K.T) / 2 elif self.kernel_symm == "*": - _logger.debug("Using multiplication symmetrization.") + _logger.log_debug("Using multiplication symmetrization.") K = K.multiply(K.T) elif self.kernel_symm == "mnn": - _logger.debug("Using mnn symmetrization (theta = {}).".format(self.theta)) + _logger.log_debug( + "Using mnn symmetrization (theta = {}).".format(self.theta) + ) K = self.theta * matrix.elementwise_minimum(K, K.T) + ( 1 - self.theta ) * matrix.elementwise_maximum(K, K.T) elif self.kernel_symm is None: - _logger.debug("Using no symmetrization.") + _logger.log_debug("Using no symmetrization.") pass else: raise NotImplementedError @@ -589,8 +592,7 @@ def apply_anisotropy(self, K): return K def get_params(self): - """Get parameters from this object - """ + """Get parameters from this object""" return { "kernel_symm": self.kernel_symm, "theta": self.theta, @@ -697,8 +699,7 @@ def diff_aff(self): @property def diff_op(self): - """Synonym for P - """ + """Synonym for P""" return self.P @property @@ -719,8 +720,7 @@ def K(self): @property def kernel(self): - """Synonym for K - """ + """Synonym for K""" return self.K @property @@ -850,10 +850,10 @@ def _check_shortest_path_distance(self, distance): def _default_shortest_path_distance(self): if not self.weighted: distance = "data" - _logger.info("Using ambient data distances.") + _logger.log_info("Using ambient data distances.") else: distance = "affinity" - _logger.info("Using negative log affinity distances.") + _logger.log_info("Using negative log affinity distances.") return distance def shortest_path(self, method="auto", distance=None): @@ -903,7 +903,7 @@ def shortest_path(self, method="auto", distance=None): "Got {}".format(distance) ) - P = graph_shortest_path(D, method=method) + P = shortest_path(D, method=method) # symmetrize for numerical error P = (P + P.T) / 2 # sklearn returns 0 if no path exists @@ -1019,8 +1019,7 @@ def __init__(self, data, verbose=True, n_jobs=1, **kwargs): super().__init__(data, **kwargs) def get_params(self): - """Get parameters from this object - """ + """Get parameters from this object""" params = Data.get_params(self) params.update(BaseGraph.get_params(self)) return params diff --git a/graphtools/estimator.py b/graphtools/estimator.py index 5cb130f..b072693 100644 --- a/graphtools/estimator.py +++ b/graphtools/estimator.py @@ -1,12 +1,15 @@ -import numpy as np -import tasklogger -import pygsp -import abc - +from . import api +from . import base +from . import graphs +from . import matrix +from . import utils from functools import partial from scipy import sparse -from . import api, graphs, base, utils, matrix +import abc +import numpy as np +import pygsp +import tasklogger def attribute(attr, default=None, doc=None, on_set=None): @@ -81,18 +84,18 @@ class GraphEstimator(object, metaclass=abc.ABCMeta): verbose : `int` or `boolean`, optional (default: 1) If `True` or `> 0`, print status messages - + n_svd : int, optional (default: 100) number of singular vectors to compute for landmarking - + thresh : float, optional (default: 1e-4) threshold below which to truncate kernel - + kwargs : additional arguments for graphtools.Graph - + Attributes ---------- - + graph : graphtools.Graph """ @@ -203,7 +206,7 @@ def __init__( n_jobs=1, verbose=1, thresh=1e-4, - **kwargs + **kwargs, ): if verbose is True: @@ -248,13 +251,13 @@ def _set_graph_params(self, **params): ) self.graph.set_params(**params) except ValueError as e: - _logger.debug("Reset graph due to {}".format(str(e))) + _logger.log_debug("Reset graph due to {}".format(str(e))) self.graph = None @abc.abstractmethod def _reset_graph(self): """Trigger a reset of self.graph - + Any downstream effects of resetting the graph should override this function """ raise NotImplementedError @@ -358,10 +361,10 @@ def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs): n_jobs=self.n_jobs, thresh=self.thresh, verbose=self.verbose, - **(self.kwargs) + **(self.kwargs), ) if self.graph is not None: - _logger.info("Using precomputed graph and diffusion operator...") + _logger.log_info("Using precomputed graph and diffusion operator...") def fit(self, X, **kwargs): """Computes the graph @@ -384,13 +387,13 @@ def fit(self, X, **kwargs): X, n_pca, n_landmark, precomputed, update_graph = self._parse_input(X) if precomputed is None: - _logger.info( + _logger.log_info( "Building graph on {} samples and {} features.".format( X.shape[0], X.shape[1] ) ) else: - _logger.info( + _logger.log_info( "Building graph on precomputed {} matrix with {} samples.".format( precomputed, X.shape[0] ) @@ -402,7 +405,7 @@ def fit(self, X, **kwargs): self.X = X if self.graph is None: - with _logger.task("graph and diffusion operator"): + with _logger.log_task("graph and diffusion operator"): self.graph = api.Graph( X, n_pca=n_pca, @@ -417,6 +420,6 @@ def fit(self, X, **kwargs): thresh=self.thresh, verbose=self.verbose, **(self.kwargs), - **kwargs + **kwargs, ) return self diff --git a/graphtools/graphs.py b/graphtools/graphs.py index 2caa431..8f512a9 100644 --- a/graphtools/graphs.py +++ b/graphtools/graphs.py @@ -1,19 +1,23 @@ from __future__ import division + +from . import matrix +from . import utils +from .base import DataGraph +from .base import PyGSPGraph from builtins import super -import numpy as np +from scipy import sparse +from scipy.spatial.distance import cdist +from scipy.spatial.distance import pdist +from scipy.spatial.distance import squareform +from sklearn.cluster import MiniBatchKMeans from sklearn.neighbors import NearestNeighbors -from sklearn.utils.extmath import randomized_svd from sklearn.preprocessing import normalize -from sklearn.cluster import MiniBatchKMeans -from scipy.spatial.distance import pdist, cdist -from scipy.spatial.distance import squareform -from scipy import sparse +from sklearn.utils.extmath import randomized_svd + import numbers -import warnings +import numpy as np import tasklogger - -from . import matrix, utils -from .base import DataGraph, PyGSPGraph +import warnings _logger = tasklogger.get_tasklogger("graphtools") @@ -76,7 +80,7 @@ def __init__( distance="euclidean", thresh=1e-4, n_pca=None, - **kwargs + **kwargs, ): if decay is not None: @@ -132,8 +136,7 @@ def __init__( super().__init__(data, n_pca=n_pca, **kwargs) def get_params(self): - """Get parameters from this object - """ + """Get parameters from this object""" params = super().get_params() params.update( { @@ -347,19 +350,19 @@ def build_kernel_to_data( Y = self._check_extension_shape(Y) if self.decay is None or self.thresh == 1: - with _logger.task("KNN search"): + with _logger.log_task("KNN search"): # binary connectivity matrix K = self.knn_tree.kneighbors_graph( Y, n_neighbors=knn, mode="connectivity" ) else: - with _logger.task("KNN search"): + with _logger.log_task("KNN search"): # sparse fast alpha decay knn_tree = self.knn_tree search_knn = min(knn * self.search_multiplier, knn_max) distances, indices = knn_tree.kneighbors(Y, n_neighbors=search_knn) self._check_duplicates(distances, indices) - with _logger.task("affinities"): + with _logger.log_task("affinities"): if bandwidth is None: bandwidth = distances[:, knn - 1] @@ -370,7 +373,7 @@ def build_kernel_to_data( radius = bandwidth * np.power(-1 * np.log(self.thresh), 1 / self.decay) update_idx = np.argwhere(np.max(distances, axis=1) < radius).reshape(-1) - _logger.debug( + _logger.log_debug( "search_knn = {}; {} remaining".format(search_knn, len(update_idx)) ) if len(update_idx) > 0: @@ -399,7 +402,7 @@ def build_kernel_to_data( else radius[i] ) ] - _logger.debug( + _logger.log_debug( "search_knn = {}; {} remaining".format( search_knn, len(update_idx) ) @@ -412,7 +415,7 @@ def build_kernel_to_data( ).fit(self.data_nu) if len(update_idx) > 0: if search_knn == knn_max: - _logger.debug( + _logger.log_debug( "knn search to knn_max ({}) on {}".format( knn_max, len(update_idx) ) @@ -425,7 +428,7 @@ def build_kernel_to_data( distances[idx] = dist_new[i] indices[idx] = ind_new[i] else: - _logger.debug("radius search on {}".format(len(update_idx))) + _logger.log_debug("radius search on {}".format(len(update_idx))) # give up - radius search dist_new, ind_new = knn_tree.radius_neighbors( Y[update_idx, :], @@ -524,8 +527,7 @@ def __init__(self, data, n_landmark=2000, n_svd=100, **kwargs): super().__init__(data, **kwargs) def get_params(self): - """Get parameters from this object - """ + """Get parameters from this object""" params = super().get_params() params.update({"n_landmark": self.n_landmark, "n_pca": self.n_pca}) return params @@ -653,19 +655,20 @@ def build_landmark_op(self): probabilities between cluster centers by using transition probabilities between samples assigned to each cluster. """ - with _logger.task("landmark operator"): + with _logger.log_task("landmark operator"): is_sparse = sparse.issparse(self.kernel) # spectral clustering - with _logger.task("SVD"): + with _logger.log_task("SVD"): _, _, VT = randomized_svd( self.diff_aff, n_components=self.n_svd, random_state=self.random_state, ) - with _logger.task("KMeans"): + with _logger.log_task("KMeans"): kmeans = MiniBatchKMeans( self.n_landmark, init_size=3 * self.n_landmark, + n_init=1, batch_size=10000, random_state=self.random_state, ) @@ -678,7 +681,8 @@ def build_landmark_op(self): pnm = pmn.transpose() pmn = normalize(pmn, norm="l1", axis=1) pnm = normalize(pnm, norm="l1", axis=1) - landmark_op = pmn.dot(pnm) # sparsity agnostic matrix multiplication + # sparsity agnostic matrix multiplication + landmark_op = pmn.dot(pnm) if is_sparse: # no need to have a sparse landmark operator landmark_op = landmark_op.toarray() @@ -834,7 +838,7 @@ def __init__( n_pca=None, thresh=1e-4, precomputed=None, - **kwargs + **kwargs, ): if decay is None and precomputed not in ["affinity", "adjacency"]: # decay high enough is basically a binary kernel @@ -886,8 +890,7 @@ def __init__( super().__init__(data, n_pca=n_pca, **kwargs) def get_params(self): - """Get parameters from this object - """ + """Get parameters from this object""" params = super().get_params() params.update( { @@ -985,7 +988,7 @@ def build_kernel(self): K = K.tolil() K = matrix.set_diagonal(K, 1) else: - with _logger.task("affinities"): + with _logger.log_task("affinities"): if sparse.issparse(self.data_nu): self.data_nu = self.data_nu.toarray() if self.precomputed == "distance": @@ -1091,7 +1094,7 @@ def build_kernel_to_data(self, Y, knn=None, bandwidth=None, bandwidth_scale=None if self.precomputed is not None: raise ValueError("Cannot extend kernel on precomputed graph") else: - with _logger.task("affinities"): + with _logger.log_task("affinities"): Y = self._check_extension_shape(Y) pdx = cdist(Y, self.data_nu, metric=self.distance) if bandwidth is None: @@ -1101,7 +1104,7 @@ def build_kernel_to_data(self, Y, knn=None, bandwidth=None, bandwidth_scale=None bandwidth = bandwidth(pdx) bandwidth = bandwidth_scale * bandwidth pdx = (pdx.T / bandwidth).T - K = np.exp(-1 * pdx ** self.decay) + K = np.exp(-1 * pdx**self.decay) # handle nan K = np.where(np.isnan(K), 1, K) K[K < self.thresh] = 0 @@ -1128,7 +1131,7 @@ def _check_shortest_path_distance(self, distance): def _default_shortest_path_distance(self): if self.precomputed is not None and not self.weighted: distance = "constant" - _logger.info("Using constant distances.") + _logger.log_info("Using constant distances.") else: distance = super()._default_shortest_path_distance() return distance @@ -1178,7 +1181,7 @@ def __init__( distance="euclidean", thresh=1e-4, n_jobs=1, - **kwargs + **kwargs, ): self.beta = beta self.sample_idx = sample_idx @@ -1222,8 +1225,7 @@ def _check_symmetrization(self, kernel_symm, theta): super()._check_symmetrization(kernel_symm, theta) def get_params(self): - """Get parameters from this object - """ + """Get parameters from this object""" params = super().get_params() params.update( { @@ -1296,13 +1298,13 @@ def build_kernel(self): symmetric matrix with ones down the diagonal with no non-negative entries. """ - with _logger.task("subgraphs"): + with _logger.log_task("subgraphs"): self.subgraphs = [] from .api import Graph # iterate through sample ids for i, idx in enumerate(self.samples): - _logger.debug( + _logger.log_debug( "subgraph {}: sample {}, " "n = {}, knn = {}".format( i, idx, np.sum(self.sample_idx == idx), self.knn @@ -1327,7 +1329,7 @@ def build_kernel(self): ) self.subgraphs.append(graph) # append to list of subgraphs - with _logger.task("MNN kernel"): + with _logger.log_task("MNN kernel"): if self.thresh > 0 or self.decay is None: K = sparse.lil_matrix((self.data_nu.shape[0], self.data_nu.shape[0])) else: @@ -1343,7 +1345,7 @@ def build_kernel(self): for j, Y in enumerate(self.subgraphs): if i == j: continue - with _logger.task( + with _logger.log_task( "kernel from sample {} to {}".format( self.samples[i], self.samples[j] ) diff --git a/graphtools/matrix.py b/graphtools/matrix.py index 8c818f2..490155f 100644 --- a/graphtools/matrix.py +++ b/graphtools/matrix.py @@ -1,8 +1,8 @@ -import numpy as np -import numbers - from scipy import sparse +import numbers +import numpy as np + def if_sparse(sparse_func, dense_func, *args, **kwargs): if sparse.issparse(args[0]): diff --git a/graphtools/utils.py b/graphtools/utils.py index 55e2bd3..96a992c 100644 --- a/graphtools/utils.py +++ b/graphtools/utils.py @@ -1,7 +1,8 @@ +from . import matrix +from deprecated import deprecated + import numbers import warnings -from deprecated import deprecated -from . import matrix try: import pandas as pd diff --git a/graphtools/version.py b/graphtools/version.py index 5197c5f..a06ff4e 100644 --- a/graphtools/version.py +++ b/graphtools/version.py @@ -1 +1 @@ -__version__ = "1.5.2" +__version__ = "1.5.3" diff --git a/setup.cfg b/setup.cfg index 6372926..c4ba0c6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,29 @@ [metadata] license-file = LICENSE + +[flake8] +ignore = + # top-level module docstring + D100, D104, + # space before: conflicts with black + E203, + # import not in alphabetical: conflicts with isort + H306 +per-file-ignores = + # imported but unused + __init__.py: F401 + # missing docstring in public function for methods, metrics, datasets + openproblems/tasks/*/*/*.py: D103, E203 + openproblems/tasks/*/*/__init__.py: F401, D103 +max-line-length = 88 +exclude = + .git, + __pycache__, + build, + dist, + Snakefile + +[isort] +profile = black +force_single_line = true +force_alphabetical_sort = true diff --git a/setup.py b/setup.py index 0586273..e548184 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ +from setuptools import setup + import os import sys -from setuptools import setup install_requires = [ "numpy>=1.14.0", @@ -44,7 +45,9 @@ description="graphtools", author="Scott Gigante, Daniel Burkhardt, and Jay Stanley, Yale University", author_email="scott.gigante@yale.edu", - packages=["graphtools",], + packages=[ + "graphtools", + ], license="GNU General Public License Version 2", install_requires=install_requires, extras_require={"test": test_requires, "doc": doc_requires}, @@ -54,7 +57,12 @@ download_url="https://github.com/KrishnaswamyLab/graphtools/archive/v{}.tar.gz".format( version ), - keywords=["graphs", "big-data", "signal processing", "manifold-learning",], + keywords=[ + "graphs", + "big-data", + "signal processing", + "manifold-learning", + ], classifiers=[ "Development Status :: 4 - Beta", "Environment :: Console", diff --git a/test/load_tests/__init__.py b/test/load_tests/__init__.py index 1c6213f..8e8765c 100644 --- a/test/load_tests/__init__.py +++ b/test/load_tests/__init__.py @@ -1,16 +1,20 @@ -from sklearn.decomposition import PCA, TruncatedSVD +from nose.tools import assert_raises_regex +from nose.tools import assert_warns_regex +from scipy.spatial.distance import cdist +from scipy.spatial.distance import pdist +from scipy.spatial.distance import squareform from sklearn import datasets -from scipy.spatial.distance import pdist, cdist, squareform -import pygsp +from sklearn.decomposition import PCA +from sklearn.decomposition import TruncatedSVD + import graphtools +import nose2 import numpy as np -import scipy.sparse as sp -import warnings import pandas as pd - -import nose2 -from nose.tools import assert_raises_regex, assert_warns_regex +import pygsp import re +import scipy.sparse as sp +import warnings def assert_warns_message(expected_warning, expected_message, *args, **kwargs): @@ -110,7 +114,7 @@ def build_graph( sparse=False, graph_class=graphtools.Graph, verbose=0, - **kwargs + **kwargs, ): if sparse: data = sp.coo_matrix(data) @@ -122,5 +126,5 @@ def build_graph( knn=knn, random_state=42, verbose=verbose, - **kwargs + **kwargs, ) diff --git a/test/test_api.py b/test/test_api.py index 64d5ae9..1ff7dfe 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -1,13 +1,16 @@ from __future__ import print_function -from load_tests import data, build_graph, assert_raises_message, assert_warns_message +from load_tests import assert_raises_message +from load_tests import assert_warns_message +from load_tests import build_graph +from load_tests import data +import graphtools import igraph import numpy as np -import graphtools -import tempfile import os import pickle +import tempfile def test_from_igraph(): diff --git a/test/test_data.py b/test/test_data.py index 24f6dd2..09e34cb 100644 --- a/test/test_data.py +++ b/test/test_data.py @@ -1,16 +1,16 @@ from __future__ import print_function -from load_tests import ( - np, - sp, - pd, - graphtools, - nose2, - data, - build_graph, - squareform, - pdist, -) -from load_tests import assert_raises_message, assert_warns_message + +from load_tests import assert_raises_message +from load_tests import assert_warns_message +from load_tests import build_graph +from load_tests import data +from load_tests import graphtools +from load_tests import nose2 +from load_tests import np +from load_tests import pd +from load_tests import pdist +from load_tests import sp +from load_tests import squareform from nose.tools import assert_raises_regex import numbers @@ -219,7 +219,7 @@ def test_anndata(): except NameError: # not installed return - G = build_graph(anndata.AnnData(data)) + G = build_graph(anndata.AnnData(data, dtype=data.dtype)) assert isinstance(G, graphtools.base.BaseGraph) assert isinstance(G.data, np.ndarray) @@ -230,7 +230,7 @@ def test_anndata_sparse(): except NameError: # not installed return - G = build_graph(anndata.AnnData(sp.csr_matrix(data))) + G = build_graph(anndata.AnnData(sp.csr_matrix(data), dtype=data.dtype)) assert isinstance(G, graphtools.base.BaseGraph) assert isinstance(G.data, sp.csr_matrix) diff --git a/test/test_estimator.py b/test/test_estimator.py index c66aceb..f3120cb 100644 --- a/test/test_estimator.py +++ b/test/test_estimator.py @@ -1,12 +1,14 @@ +from load_tests import assert_raises_message +from load_tests import data +from parameterized import parameterized +from scipy import sparse + +import anndata import graphtools import graphtools.estimator +import numpy as np import pygsp -import anndata import warnings -import numpy as np -from load_tests import data, assert_raises_message -from scipy import sparse -from parameterized import parameterized class Estimator(graphtools.estimator.GraphEstimator): @@ -97,7 +99,7 @@ def test_anndata_input(): E = Estimator(verbose=0) E.fit(X.astype(np.float32)) E2 = Estimator(verbose=0) - E2.fit(anndata.AnnData(X)) + E2.fit(anndata.AnnData(X, dtype=X.dtype)) np.testing.assert_allclose( E.graph.K.toarray(), E2.graph.K.toarray(), rtol=1e-6, atol=2e-7 ) diff --git a/test/test_exact.py b/test/test_exact.py index 07faab0..0044359 100644 --- a/test/test_exact.py +++ b/test/test_exact.py @@ -1,21 +1,20 @@ from __future__ import print_function -from sklearn.utils.graph import graph_shortest_path -from load_tests import ( - graphtools, - np, - sp, - pygsp, - nose2, - data, - build_graph, - squareform, - pdist, - PCA, - TruncatedSVD, - assert_raises_message, - assert_warns_message, -) + +from load_tests import assert_raises_message +from load_tests import assert_warns_message +from load_tests import build_graph +from load_tests import data +from load_tests import graphtools +from load_tests import nose2 +from load_tests import np +from load_tests import PCA +from load_tests import pdist +from load_tests import pygsp +from load_tests import sp +from load_tests import squareform +from load_tests import TruncatedSVD from nose.tools import assert_warns_regex +from scipy.sparse.csgraph import shortest_path ##################################################### # Check parameters @@ -141,7 +140,7 @@ def test_exact_graph(): knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) * bandwidth_scale weighted_pdx = (pdx.T / epsilon).T - K = np.exp(-1 * weighted_pdx ** a) + K = np.exp(-1 * weighted_pdx**a) W = K + K.T W = np.divide(W, 2) np.fill_diagonal(W, 0) @@ -218,7 +217,7 @@ def test_truncated_exact_graph(): knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) weighted_pdx = (pdx.T / epsilon).T - K = np.exp(-1 * weighted_pdx ** a) + K = np.exp(-1 * weighted_pdx**a) K[K < thresh] = 0 W = K + K.T W = np.divide(W, 2) @@ -289,7 +288,7 @@ def test_truncated_exact_graph_sparse(): knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) weighted_pdx = (pdx.T / epsilon).T - K = np.exp(-1 * weighted_pdx ** a) + K = np.exp(-1 * weighted_pdx**a) K[K < thresh] = 0 W = K + K.T W = np.divide(W, 2) @@ -360,7 +359,7 @@ def test_truncated_exact_graph_no_pca(): knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) weighted_pdx = (pdx.T / epsilon).T - K = np.exp(-1 * weighted_pdx ** a) + K = np.exp(-1 * weighted_pdx**a) K[K < thresh] = 0 W = K + K.T W = np.divide(W, 2) @@ -525,7 +524,7 @@ def test_exact_graph_anisotropy(): knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) weighted_pdx = (pdx.T / epsilon).T - K = np.exp(-1 * weighted_pdx ** a) + K = np.exp(-1 * weighted_pdx**a) K = K + K.T K = np.divide(K, 2) d = K.sum(1) @@ -590,30 +589,36 @@ def test_exact_graph_anisotropy(): def test_shortest_path_affinity(): + np.random.seed(42) data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) D = -1 * np.where(G.K != 0, np.log(np.where(G.K != 0, G.K, np.nan)), 0) - P = graph_shortest_path(D) + P = shortest_path(D) # sklearn returns 0 if no path exists P[np.where(P == 0)] = np.inf # diagonal should actually be zero np.fill_diagonal(P, 0) - np.testing.assert_allclose(P, G.shortest_path(distance="affinity")) - np.testing.assert_allclose(P, G.shortest_path()) + np.testing.assert_allclose( + P, G.shortest_path(distance="affinity"), atol=1e-4, rtol=1e-3 + ) + np.testing.assert_allclose(P, G.shortest_path(), atol=1e-4, rtol=1e-3) def test_shortest_path_affinity_precomputed(): + np.random.seed(42) data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) G = graphtools.Graph(G.K, precomputed="affinity") D = -1 * np.where(G.K != 0, np.log(np.where(G.K != 0, G.K, np.nan)), 0) - P = graph_shortest_path(D) + P = shortest_path(D) # sklearn returns 0 if no path exists P[np.where(P == 0)] = np.inf # diagonal should actually be zero np.fill_diagonal(P, 0) - np.testing.assert_allclose(P, G.shortest_path(distance="affinity")) - np.testing.assert_allclose(P, G.shortest_path()) + np.testing.assert_allclose( + P, G.shortest_path(distance="affinity"), atol=1e-4, rtol=1e-3 + ) + np.testing.assert_allclose(P, G.shortest_path(), atol=1e-4, rtol=1e-3) def test_shortest_path_decay_constant(): diff --git a/test/test_knn.py b/test/test_knn.py index fe47c07..9dae5f9 100644 --- a/test/test_knn.py +++ b/test/test_knn.py @@ -1,21 +1,24 @@ -from __future__ import print_function, division -from sklearn.utils.graph import graph_shortest_path -from scipy.spatial.distance import pdist, squareform -from load_tests import assert_raises_message, assert_warns_message -from nose.tools import assert_raises_regex, assert_warns_regex -import warnings -from load_tests import ( - graphtools, - np, - sp, - pygsp, - data, - datasets, - build_graph, - PCA, - TruncatedSVD, -) +from __future__ import division +from __future__ import print_function + +from load_tests import assert_raises_message +from load_tests import assert_warns_message +from load_tests import build_graph +from load_tests import data +from load_tests import datasets +from load_tests import graphtools +from load_tests import np +from load_tests import PCA +from load_tests import pygsp +from load_tests import sp +from load_tests import TruncatedSVD +from nose.tools import assert_raises_regex +from nose.tools import assert_warns_regex +from scipy.sparse.csgraph import shortest_path +from scipy.spatial.distance import pdist +from scipy.spatial.distance import squareform +import warnings ##################################################### # Check parameters @@ -51,7 +54,7 @@ def test_duplicate_data(): RuntimeWarning, r"Detected zero distance between samples ([0-9and,\s]*). Consider removing duplicates to avoid errors in downstream processing.", ): - build_graph(np.vstack([data, data[:9]]), n_pca=20, decay=10, thresh=1e-4) + build_graph(np.vstack([data, data[:9]]), n_pca=None, decay=10, thresh=1e-4) def test_duplicate_data_many(): @@ -59,7 +62,7 @@ def test_duplicate_data_many(): RuntimeWarning, "Detected zero distance between ([0-9]*) pairs of samples. Consider removing duplicates to avoid errors in downstream processing.", ): - build_graph(np.vstack([data, data[:21]]), n_pca=20, decay=10, thresh=1e-4) + build_graph(np.vstack([data, data[:21]]), n_pca=None, decay=10, thresh=1e-4) def test_balltree_cosine(): @@ -156,7 +159,8 @@ def test_knn_graph(): ), ): G2.build_kernel_to_data( - Y=G2.data_nu, knn=data.shape[0] + 1, + Y=G2.data_nu, + knn=data.shape[0] + 1, ) @@ -232,7 +236,7 @@ def test_sparse_alpha_knn_graph(): knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) * bandwidth_scale pdx = (pdx.T / epsilon).T - K = np.exp(-1 * pdx ** a) + K = np.exp(-1 * pdx**a) K = K + K.T W = np.divide(K, 2) np.fill_diagonal(W, 0) @@ -278,7 +282,7 @@ def test_knnmax(): knn_max_dist = np.max(np.partition(pdx, k_max, axis=1)[:, :k_max], axis=1) epsilon = np.max(knn_dist, axis=1) pdx_scale = (pdx.T / epsilon).T - K = np.where(pdx <= knn_max_dist[:, None], np.exp(-1 * pdx_scale ** a), 0) + K = np.where(pdx <= knn_max_dist[:, None], np.exp(-1 * pdx_scale**a), 0) K = K + K.T W = np.divide(K, 2) np.fill_diagonal(W, 0) @@ -430,7 +434,7 @@ def test_knn_graph_anisotropy(): knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) weighted_pdx = (pdx.T / epsilon).T - K = np.exp(-1 * weighted_pdx ** a) + K = np.exp(-1 * weighted_pdx**a) K[K < thresh] = 0 K = K + K.T K = np.divide(K, 2) @@ -525,7 +529,7 @@ def test_knn_interpolate_wrong_shape(): def test_shortest_path_constant(): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) - P = graph_shortest_path(G.K) + P = shortest_path(G.K) # sklearn returns 0 if no path exists P[np.where(P == 0)] = np.inf # diagonal should actually be zero @@ -537,7 +541,7 @@ def test_shortest_path_precomputed_constant(): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G = graphtools.Graph(G.K, precomputed="affinity") - P = graph_shortest_path(G.K) + P = shortest_path(G.K) # sklearn returns 0 if no path exists P[np.where(P == 0)] = np.inf # diagonal should actually be zero @@ -550,7 +554,7 @@ def test_shortest_path_data(): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) D = squareform(pdist(G.data_nu)) * np.where(G.K.toarray() > 0, 1, 0) - P = graph_shortest_path(D) + P = shortest_path(D) # sklearn returns 0 if no path exists P[np.where(P == 0)] = np.inf # diagonal should actually be zero diff --git a/test/test_landmark.py b/test/test_landmark.py index aa2f9cc..489d864 100644 --- a/test/test_landmark.py +++ b/test/test_landmark.py @@ -1,17 +1,16 @@ from __future__ import print_function -from load_tests import ( - graphtools, - np, - nose2, - data, - digits, - build_graph, - generate_swiss_roll, - assert_raises_message, - assert_warns_message, -) -import pygsp +from load_tests import assert_raises_message +from load_tests import assert_warns_message +from load_tests import build_graph +from load_tests import data +from load_tests import digits +from load_tests import generate_swiss_roll +from load_tests import graphtools +from load_tests import nose2 +from load_tests import np + +import pygsp ##################################################### # Check parameters @@ -68,13 +67,17 @@ def test_landmark_exact_graph(): def test_landmark_knn_graph(): + np.random.seed(42) n_landmark = 500 # knn graph G = build_graph( data, n_landmark=n_landmark, n_pca=20, decay=None, knn=5 - 1, random_state=42 ) - assert G.transitions.shape == (data.shape[0], n_landmark) - assert G.landmark_op.shape == (n_landmark, n_landmark) + n_landmark_out = G.landmark_op.shape[0] + assert n_landmark_out <= n_landmark + assert n_landmark_out >= n_landmark - 3 + assert G.transitions.shape == (data.shape[0], n_landmark_out), G.transitions.shape + assert G.landmark_op.shape == (n_landmark_out, n_landmark_out) assert isinstance(G, graphtools.graphs.kNNGraph) assert isinstance(G, graphtools.graphs.LandmarkGraph) diff --git a/test/test_matrix.py b/test/test_matrix.py index aac45a7..41cf757 100644 --- a/test/test_matrix.py +++ b/test/test_matrix.py @@ -1,11 +1,12 @@ -import graphtools.matrix -import graphtools.utils +from load_tests import assert_warns_message +from load_tests import data from parameterized import parameterized from scipy import sparse -import numpy as np + import graphtools -from load_tests import data -from load_tests import assert_warns_message +import graphtools.matrix +import graphtools.utils +import numpy as np @parameterized( diff --git a/test/test_mnn.py b/test/test_mnn.py index 30e3a77..592b227 100644 --- a/test/test_mnn.py +++ b/test/test_mnn.py @@ -1,21 +1,20 @@ from __future__ import print_function -import warnings -from load_tests import ( - graphtools, - np, - pd, - pygsp, - nose2, - data, - digits, - build_graph, - generate_swiss_roll, - assert_raises_message, - assert_warns_message, - cdist, -) + +from load_tests import assert_raises_message +from load_tests import assert_warns_message +from load_tests import build_graph +from load_tests import cdist +from load_tests import data +from load_tests import digits +from load_tests import generate_swiss_roll +from load_tests import graphtools +from load_tests import nose2 +from load_tests import np +from load_tests import pd +from load_tests import pygsp from scipy.linalg import norm +import warnings ##################################################### # Check parameters @@ -420,7 +419,7 @@ def test_mnn_graph_decay(): kdx_ij = np.sort(pdx_ij, axis=1) # get kNN e_ij = kdx_ij[:, batch_k] # dist to kNN pdxe_ij = pdx_ij / e_ij[:, np.newaxis] # normalize - k_ij = np.exp(-1 * (pdxe_ij ** a)) # apply alpha-decaying kernel + k_ij = np.exp(-1 * (pdxe_ij**a)) # apply alpha-decaying kernel if si == sj: K.iloc[sample_idx == si, sample_idx == sj] = (k_ij + k_ij.T) / 2 else: diff --git a/test/test_utils.py b/test/test_utils.py index 1aadd82..da379b4 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,6 +1,7 @@ -import graphtools from load_tests import assert_raises_message +import graphtools + def test_check_in(): graphtools.utils.check_in(["hello", "world"], foo="hello")