From a91fe0a7411ac94a8ade7d5514dbe746839ab9cb Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:13:00 -0500 Subject: [PATCH 01/41] Use `scipy.sparse.csgraph.shortest_path` --- graphtools/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphtools/base.py b/graphtools/base.py index 4789b8a..5692f87 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -7,7 +7,7 @@ from inspect import signature from sklearn.decomposition import PCA, TruncatedSVD from sklearn.preprocessing import normalize -from sklearn.utils.graph import graph_shortest_path +from scipy.sparse.csgraph import shortest_path from scipy import sparse import warnings import numbers @@ -903,7 +903,7 @@ def shortest_path(self, method="auto", distance=None): "Got {}".format(distance) ) - P = graph_shortest_path(D, method=method) + P = shortest_path(D, method=method) # symmetrize for numerical error P = (P + P.T) / 2 # sklearn returns 0 if no path exists From 5466bd1ffeeb374ccbd600acbd724e27c4454ba5 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:14:55 -0500 Subject: [PATCH 02/41] Create run_tests.yml --- .github/workflows/run_tests.yml | 118 ++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 .github/workflows/run_tests.yml diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml new file mode 100644 index 0000000..6777de2 --- /dev/null +++ b/.github/workflows/run_tests.yml @@ -0,0 +1,118 @@ +name: Unit Tests + +on: + push: + branches-ignore: + - 'test_deploy' + pull_request: + branches: + - '*' + +jobs: + run_linter: + runs-on: ${{ matrix.config.os }} + if: "!contains(github.event.head_commit.message, 'ci skip')" + + strategy: + fail-fast: false + matrix: + config: + - {name: 'current', os: ubuntu-latest, python: '3.8' } + + steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.6.0 + with: + access_token: ${{ github.token }} + + - uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.config.python }} + + - name: Install tools + run: | + python -m pip install --upgrade pip + pip install -U wheel setuptools + pip install -U black flake8 + + - name: Lint with Black + run: | + cd Python + black . --check --diff + + - name: Lint with flake8 + run: | + flake8 phate || true + + run_tester: + runs-on: ${{ matrix.config.os }} + if: "!contains(github.event.head_commit.message, 'ci skip')" + + strategy: + fail-fast: false + matrix: + config: + - {name: '3.11', os: ubuntu-latest, python: '3.11' } + - {name: '3.10', os: ubuntu-latest, python: '3.10' } + - {name: '3.9', os: ubuntu-latest, python: '3.9' } + - {name: '3.8', os: ubuntu-latest, python: '3.8' } + - {name: '3.7', os: ubuntu-latest, python: '3.7' } + - {name: '3.6', os: ubuntu-latest, python: '3.6' } + + steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.6.0 + with: + access_token: ${{ github.token }} + + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Install system dependencies + if: runner.os == 'Linux' + run: | + sudo apt-get update -qq + sudo apt-get install -y libhdf5-dev libhdf5-serial-dev pandoc gfortran libblas-dev liblapack-dev llvm-dev + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.config.python }} + + - name: Cache Python packages + uses: actions/cache@v2 + with: + path: ${{ env.pythonLocation }} + key: ${{runner.os}}-${{ matrix.config.python }}-pip-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} + restore-keys: ${{runner.os}}-${{ matrix.config.python }}-pip-${{ env.pythonLocation }}- + + - name: Install package & dependencies + run: | + cd Python + python -m pip install --upgrade pip + pip install -U wheel setuptools + pip install -U .[test] + python -c "import graphtools" + + - name: Run tests + run: | + cd Python + nose2 -vvv + + - name: Coveralls + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COVERALLS_SERVICE_NAME: github + run: | + coveralls + + - name: Upload check results on fail + if: failure() + uses: actions/upload-artifact@master + with: + name: ${{ matrix.config.name }}_results + path: check From ab3ff4573b9666a61eab5387559d70737faf9791 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:16:34 -0500 Subject: [PATCH 03/41] bugfix --- .github/workflows/run_tests.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 6777de2..17abff6 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -40,12 +40,11 @@ jobs: - name: Lint with Black run: | - cd Python black . --check --diff - name: Lint with flake8 run: | - flake8 phate || true + flake8 graphtools || true run_tester: runs-on: ${{ matrix.config.os }} @@ -92,7 +91,6 @@ jobs: - name: Install package & dependencies run: | - cd Python python -m pip install --upgrade pip pip install -U wheel setuptools pip install -U .[test] @@ -100,7 +98,6 @@ jobs: - name: Run tests run: | - cd Python nose2 -vvv - name: Coveralls From 473a2754100a91d3864c039d7f2bdb410ee67158 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:21:26 -0500 Subject: [PATCH 04/41] Update test_exact.py --- test/test_exact.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_exact.py b/test/test_exact.py index 07faab0..3a03594 100644 --- a/test/test_exact.py +++ b/test/test_exact.py @@ -1,5 +1,5 @@ from __future__ import print_function -from sklearn.utils.graph import graph_shortest_path +from scipy.sparse.csgraph import shortest_path from load_tests import ( graphtools, np, @@ -593,7 +593,7 @@ def test_shortest_path_affinity(): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) D = -1 * np.where(G.K != 0, np.log(np.where(G.K != 0, G.K, np.nan)), 0) - P = graph_shortest_path(D) + P = shortest_path(D) # sklearn returns 0 if no path exists P[np.where(P == 0)] = np.inf # diagonal should actually be zero @@ -607,7 +607,7 @@ def test_shortest_path_affinity_precomputed(): G = build_graph(data_small, knn=5, decay=15) G = graphtools.Graph(G.K, precomputed="affinity") D = -1 * np.where(G.K != 0, np.log(np.where(G.K != 0, G.K, np.nan)), 0) - P = graph_shortest_path(D) + P = shortest_path(D) # sklearn returns 0 if no path exists P[np.where(P == 0)] = np.inf # diagonal should actually be zero From 725c3f710111a011103851bc7a3eb511b26f523a Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:23:28 -0500 Subject: [PATCH 05/41] Create pre-commit.yml --- .github/workflows/pre-commit.yml | 61 ++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 .github/workflows/pre-commit.yml diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..4b6a475 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,61 @@ +name: pre-commit +on: + push: + branches-ignore: + - 'master' + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + pre-commit: + runs-on: ubuntu-latest + + if: >- + !endsWith(github.event.head_commit.message, '# ci skip') && + ( + startsWith(github.ref, 'refs/heads') || + github.event.pull_request.draft == false + ) + + steps: + + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Cache pre-commit + uses: actions/cache@v3 + with: + path: ~/.cache/pre-commit + key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}- + + - name: Run pre-commit + id: precommit + uses: pre-commit/action@v3.0.0 + continue-on-error: true + + - name: Commit files + if: steps.precommit.outcome == 'failure' && startsWith(github.ref, 'refs/heads') + run: | + if [[ `git status --porcelain --untracked-files=no` ]]; then + git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + git checkout -- .github/workflows + git commit -m "pre-commit" -a + fi + shell: bash -e {0} + + - name: Push changes + if: steps.precommit.outcome == 'failure' && startsWith(github.ref, 'refs/heads') + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ${{ github.ref }} + + - name: Check pre-commit + if: steps.precommit.outcome == 'failure' + uses: pre-commit/action@v3.0.0 From 6fb231db6ae49a288914f92b6e6121c0acba44a2 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:24:15 -0500 Subject: [PATCH 06/41] use concurrency --- .github/workflows/run_tests.yml | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 17abff6..c3d45ab 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -8,6 +8,10 @@ on: branches: - '*' +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: run_linter: runs-on: ${{ matrix.config.os }} @@ -20,11 +24,6 @@ jobs: - {name: 'current', os: ubuntu-latest, python: '3.8' } steps: - - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.6.0 - with: - access_token: ${{ github.token }} - - uses: actions/checkout@v2 - name: Set up Python @@ -62,10 +61,6 @@ jobs: - {name: '3.6', os: ubuntu-latest, python: '3.6' } steps: - - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.6.0 - with: - access_token: ${{ github.token }} - uses: actions/checkout@v2 with: From 7d8178d8c4aafc19d4b3c7188774fee75420957c Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:25:44 -0500 Subject: [PATCH 07/41] Create .pre-commit-config.yaml --- .pre-commit-config.yaml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..152000b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,26 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.3.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + exclude: \.(ai|gz)$ + - repo: https://github.com/timothycrosley/isort + rev: 5.6.4 + hooks: + - id: isort + - repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + args: ['--target-version=py36'] + - repo: https://github.com/pre-commit/mirrors-autopep8 + rev: v1.5.4 + hooks: + - id: autopep8 +# - repo: https://gitlab.com/pycqa/flake8 +# rev: 3.8.4 +# hooks: +# - id: flake8 +# additional_dependencies: ['hacking'] From 508d321994dc013ff7f785fa10fd2f0e794d58a8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Jan 2023 05:26:36 +0000 Subject: [PATCH 08/41] pre-commit --- .coveragerc | 2 - autoblack.sh | 1 - doc/Makefile | 2 +- doc/source/conf.py | 6 +- graphtools/api.py | 12 ++-- graphtools/base.py | 83 +++++++++++---------- graphtools/estimator.py | 43 ++++++----- graphtools/graphs.py | 139 ++++++++++++++++++++++-------------- graphtools/matrix.py | 5 +- graphtools/utils.py | 8 ++- setup.py | 18 +++-- test/load_tests/__init__.py | 23 +++--- test/test_api.py | 9 +-- test/test_data.py | 37 ++++++---- test/test_estimator.py | 17 +++-- test/test_exact.py | 73 +++++++++++-------- test/test_knn.py | 77 ++++++++++++-------- test/test_landmark.py | 16 ++--- test/test_matrix.py | 16 +++-- test/test_mnn.py | 42 ++++++----- test/test_utils.py | 6 +- 21 files changed, 377 insertions(+), 258 deletions(-) diff --git a/.coveragerc b/.coveragerc index 82ed0b8..b88aee5 100644 --- a/.coveragerc +++ b/.coveragerc @@ -15,5 +15,3 @@ exclude_lines = # Don't complain if non-runnable code isn't run: if 0: if __name__ == .__main__. - - diff --git a/autoblack.sh b/autoblack.sh index cfbaf2b..3642ac8 100644 --- a/autoblack.sh +++ b/autoblack.sh @@ -11,4 +11,3 @@ for file in \$files; do done EOF chmod +x .git/hooks/pre-commit - diff --git a/doc/Makefile b/doc/Makefile index acdb12e..c596553 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -17,4 +17,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/source/conf.py b/doc/source/conf.py index e7303a3..afa4a9f 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -67,7 +67,8 @@ # version_py = os.path.join(root_dir, "graphtools", "version.py") # The full version, including alpha/beta/rc tags. -release = open(version_py).read().strip().split("=")[-1].replace('"', "").strip() +release = open(version_py).read().strip().split( + "=")[-1].replace('"', "").strip() # The short X.Y version. version = release.split("-")[0] @@ -150,7 +151,8 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [(master_doc, "graphtools", "graphtools Documentation", [author], 1)] +man_pages = [(master_doc, "graphtools", + "graphtools Documentation", [author], 1)] # -- Options for Texinfo output ------------------------------------------- diff --git a/graphtools/api.py b/graphtools/api.py index b9a4b1b..dfa1b70 100644 --- a/graphtools/api.py +++ b/graphtools/api.py @@ -1,9 +1,10 @@ -import numpy as np -import warnings -from scipy import sparse import pickle +import warnings + +import numpy as np import pygsp import tasklogger +from scipy import sparse from . import base, graphs @@ -36,7 +37,7 @@ def Graph( graphtype="auto", use_pygsp=False, initialize=True, - **kwargs + **kwargs, ): """Create a graph built on data. @@ -341,7 +342,8 @@ def read_pickle(path): G = pickle.load(f) if not isinstance(G, base.BaseGraph): - warnings.warn("Returning object that is not a graphtools.base.BaseGraph") + warnings.warn( + "Returning object that is not a graphtools.base.BaseGraph") elif isinstance(G, base.PyGSPGraph) and isinstance(G.logger, str): G.logger = pygsp.utils.build_logger(G.logger) return G diff --git a/graphtools/base.py b/graphtools/base.py index 5692f87..11b6515 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -1,19 +1,20 @@ -from future.utils import with_metaclass +import abc +import numbers +import pickle +import sys +import warnings from builtins import super from copy import copy as shallow_copy +from inspect import signature + import numpy as np -import abc import pygsp -from inspect import signature +import tasklogger +from future.utils import with_metaclass +from scipy import sparse +from scipy.sparse.csgraph import shortest_path from sklearn.decomposition import PCA, TruncatedSVD from sklearn.preprocessing import normalize -from scipy.sparse.csgraph import shortest_path -from scipy import sparse -import warnings -import numbers -import pickle -import sys -import tasklogger from . import matrix, utils @@ -113,7 +114,8 @@ def __init__( ): self._check_data(data) - n_pca, rank_threshold = self._parse_n_pca_threshold(data, n_pca, rank_threshold) + n_pca, rank_threshold = self._parse_n_pca_threshold( + data, n_pca, rank_threshold) if utils.is_SparseDataFrame(data): data = data.to_coo() @@ -238,7 +240,8 @@ def _reduce_data(self): self.n_pca == "auto" or self.n_pca < self.data.shape[1] ): with _logger.task("PCA"): - n_pca = self.data.shape[1] - 1 if self.n_pca == "auto" else self.n_pca + n_pca = self.data.shape[1] - \ + 1 if self.n_pca == "auto" else self.n_pca if sparse.issparse(self.data): if ( isinstance(self.data, sparse.coo_matrix) @@ -246,7 +249,8 @@ def _reduce_data(self): or isinstance(self.data, sparse.dok_matrix) ): self.data = self.data.tocsr() - self.data_pca = TruncatedSVD(n_pca, random_state=self.random_state) + self.data_pca = TruncatedSVD( + n_pca, random_state=self.random_state) else: self.data_pca = PCA( n_pca, svd_solver="randomized", random_state=self.random_state @@ -257,7 +261,8 @@ def _reduce_data(self): smax = s.max() if self.rank_threshold == "auto": threshold = ( - smax * np.finfo(self.data.dtype).eps * max(self.data.shape) + smax * np.finfo(self.data.dtype).eps * + max(self.data.shape) ) self.rank_threshold = threshold threshold = self.rank_threshold @@ -276,7 +281,8 @@ def _reduce_data(self): op = self.data_pca # for line-width brevity.. op.components_ = op.components_[gate, :] op.explained_variance_ = op.explained_variance_[gate] - op.explained_variance_ratio_ = op.explained_variance_ratio_[gate] + op.explained_variance_ratio_ = op.explained_variance_ratio_[ + gate] op.singular_values_ = op.singular_values_[gate] self.data_pca = ( op # im not clear if this is needed due to assignment rules @@ -286,14 +292,14 @@ def _reduce_data(self): else: data_nu = self.data if sparse.issparse(data_nu) and not isinstance( - data_nu, (sparse.csr_matrix, sparse.csc_matrix, sparse.bsr_matrix) + data_nu, (sparse.csr_matrix, + sparse.csc_matrix, sparse.bsr_matrix) ): data_nu = data_nu.tocsr() return data_nu def get_params(self): - """Get parameters from this object - """ + """Get parameters from this object""" return {"n_pca": self.n_pca, "random_state": self.random_state} def set_params(self, **params): @@ -469,11 +475,12 @@ def __init__( anisotropy=0, gamma=None, initialize=True, - **kwargs + **kwargs, ): if gamma is not None: warnings.warn( - "gamma is deprecated. " "Setting theta={}".format(gamma), FutureWarning + "gamma is deprecated. " "Setting theta={}".format( + gamma), FutureWarning ) theta = gamma if kernel_symm == "gamma": @@ -562,7 +569,8 @@ def symmetrize_kernel(self, K): _logger.debug("Using multiplication symmetrization.") K = K.multiply(K.T) elif self.kernel_symm == "mnn": - _logger.debug("Using mnn symmetrization (theta = {}).".format(self.theta)) + _logger.debug( + "Using mnn symmetrization (theta = {}).".format(self.theta)) K = self.theta * matrix.elementwise_minimum(K, K.T) + ( 1 - self.theta ) * matrix.elementwise_maximum(K, K.T) @@ -589,8 +597,7 @@ def apply_anisotropy(self, K): return K def get_params(self): - """Get parameters from this object - """ + """Get parameters from this object""" return { "kernel_symm": self.kernel_symm, "theta": self.theta, @@ -618,9 +625,11 @@ def set_params(self, **params): if "theta" in params and params["theta"] != self.theta: raise ValueError("Cannot update theta. Please create a new graph") if "anisotropy" in params and params["anisotropy"] != self.anisotropy: - raise ValueError("Cannot update anisotropy. Please create a new graph") + raise ValueError( + "Cannot update anisotropy. Please create a new graph") if "kernel_symm" in params and params["kernel_symm"] != self.kernel_symm: - raise ValueError("Cannot update kernel_symm. Please create a new graph") + raise ValueError( + "Cannot update kernel_symm. Please create a new graph") super().set_params(**params) return self @@ -697,8 +706,7 @@ def diff_aff(self): @property def diff_op(self): - """Synonym for P - """ + """Synonym for P""" return self.P @property @@ -719,8 +727,7 @@ def K(self): @property def kernel(self): - """Synonym for K - """ + """Synonym for K""" return self.K @property @@ -819,7 +826,8 @@ def to_pickle(self, path): """ pickle_obj = shallow_copy(self) is_oldpygsp = all( - [isinstance(self, pygsp.graphs.Graph), int(sys.version.split(".")[1]) < 7] + [isinstance(self, pygsp.graphs.Graph), int( + sys.version.split(".")[1]) < 7] ) if is_oldpygsp: pickle_obj.logger = pickle_obj.logger.name @@ -892,7 +900,8 @@ def shortest_path(self, method="auto", distance=None): elif distance == "data": D = sparse.coo_matrix(self.K) D.data = np.sqrt( - np.sum((self.data_nu[D.row] - self.data_nu[D.col]) ** 2, axis=1) + np.sum((self.data_nu[D.row] - + self.data_nu[D.col]) ** 2, axis=1) ) elif distance == "affinity": D = sparse.csr_matrix(self.K) @@ -1019,8 +1028,7 @@ def __init__(self, data, verbose=True, n_jobs=1, **kwargs): super().__init__(data, **kwargs) def get_params(self): - """Get parameters from this object - """ + """Get parameters from this object""" params = Data.get_params(self) params.update(BaseGraph.get_params(self)) return params @@ -1071,7 +1079,8 @@ def _check_extension_shape(self, Y): `self.n_pca`. """ if len(Y.shape) != 2: - raise ValueError("Expected a 2D matrix. Y has shape {}".format(Y.shape)) + raise ValueError( + "Expected a 2D matrix. Y has shape {}".format(Y.shape)) if not Y.shape[1] == self.data_nu.shape[1]: # try PCA transform if Y.shape[1] == self.data.shape[1]: @@ -1085,7 +1094,8 @@ def _check_extension_shape(self, Y): ) else: # no PCA, only one choice of shape - msg = "Y must be of shape (n, {})".format(self.data.shape[1]) + msg = "Y must be of shape (n, {})".format( + self.data.shape[1]) raise ValueError(msg) return Y @@ -1148,7 +1158,8 @@ def interpolate(self, transform, transitions=None, Y=None): """ if transitions is None: if Y is None: - raise ValueError("Either `transitions` or `Y` must be provided.") + raise ValueError( + "Either `transitions` or `Y` must be provided.") else: transitions = self.extend_to_data(Y) Y_transform = transitions.dot(transform) diff --git a/graphtools/estimator.py b/graphtools/estimator.py index 5cb130f..575323b 100644 --- a/graphtools/estimator.py +++ b/graphtools/estimator.py @@ -1,12 +1,12 @@ -import numpy as np -import tasklogger -import pygsp import abc - from functools import partial + +import numpy as np +import pygsp +import tasklogger from scipy import sparse -from . import api, graphs, base, utils, matrix +from . import api, base, graphs, matrix, utils def attribute(attr, default=None, doc=None, on_set=None): @@ -81,18 +81,18 @@ class GraphEstimator(object, metaclass=abc.ABCMeta): verbose : `int` or `boolean`, optional (default: 1) If `True` or `> 0`, print status messages - + n_svd : int, optional (default: 100) number of singular vectors to compute for landmarking - + thresh : float, optional (default: 1e-4) threshold below which to truncate kernel - + kwargs : additional arguments for graphtools.Graph - + Attributes ---------- - + graph : graphtools.Graph """ @@ -108,11 +108,13 @@ def graph(self, G): n_pca = attribute( "n_pca", default=100, - on_set=partial(utils.check_if_not, None, utils.check_positive, utils.check_int), + on_set=partial(utils.check_if_not, None, + utils.check_positive, utils.check_int), ) random_state = attribute("random_state") - knn = attribute("knn", default=5, on_set=[utils.check_positive, utils.check_int]) + knn = attribute("knn", default=5, on_set=[ + utils.check_positive, utils.check_int]) decay = attribute("decay", default=40, on_set=utils.check_positive) distance = attribute( "distance", @@ -153,7 +155,8 @@ def graph(self, G): n_svd = attribute( "n_svd", default=100, - on_set=partial(utils.check_if_not, None, utils.check_positive, utils.check_int), + on_set=partial(utils.check_if_not, None, + utils.check_positive, utils.check_int), ) n_jobs = attribute( "n_jobs", on_set=partial(utils.check_if_not, None, utils.check_int) @@ -179,7 +182,8 @@ def _update_n_landmark(self, n_landmark): if self.graph is not None: n_landmark = self._parse_n_landmark(self.graph.data_nu, n_landmark) if ( - n_landmark is None and isinstance(self.graph, graphs.LandmarkGraph) + n_landmark is None and isinstance( + self.graph, graphs.LandmarkGraph) ) or ( n_landmark is not None and not isinstance(self.graph, graphs.LandmarkGraph) @@ -203,7 +207,7 @@ def __init__( n_jobs=1, verbose=1, thresh=1e-4, - **kwargs + **kwargs, ): if verbose is True: @@ -254,7 +258,7 @@ def _set_graph_params(self, **params): @abc.abstractmethod def _reset_graph(self): """Trigger a reset of self.graph - + Any downstream effects of resetting the graph should override this function """ raise NotImplementedError @@ -358,10 +362,11 @@ def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs): n_jobs=self.n_jobs, thresh=self.thresh, verbose=self.verbose, - **(self.kwargs) + **(self.kwargs), ) if self.graph is not None: - _logger.info("Using precomputed graph and diffusion operator...") + _logger.info( + "Using precomputed graph and diffusion operator...") def fit(self, X, **kwargs): """Computes the graph @@ -417,6 +422,6 @@ def fit(self, X, **kwargs): thresh=self.thresh, verbose=self.verbose, **(self.kwargs), - **kwargs + **kwargs, ) return self diff --git a/graphtools/graphs.py b/graphtools/graphs.py index 2caa431..34afe73 100644 --- a/graphtools/graphs.py +++ b/graphtools/graphs.py @@ -1,16 +1,17 @@ from __future__ import division + +import numbers +import warnings from builtins import super + import numpy as np +import tasklogger +from scipy import sparse +from scipy.spatial.distance import cdist, pdist, squareform +from sklearn.cluster import MiniBatchKMeans from sklearn.neighbors import NearestNeighbors -from sklearn.utils.extmath import randomized_svd from sklearn.preprocessing import normalize -from sklearn.cluster import MiniBatchKMeans -from scipy.spatial.distance import pdist, cdist -from scipy.spatial.distance import squareform -from scipy import sparse -import numbers -import warnings -import tasklogger +from sklearn.utils.extmath import randomized_svd from . import matrix, utils from .base import DataGraph, PyGSPGraph @@ -76,7 +77,7 @@ def __init__( distance="euclidean", thresh=1e-4, n_pca=None, - **kwargs + **kwargs, ): if decay is not None: @@ -99,7 +100,8 @@ def __init__( # implementation requires a knn value knn = 5 if decay is None and bandwidth is not None: - warnings.warn("`bandwidth` is not used when `decay=None`.", UserWarning) + warnings.warn( + "`bandwidth` is not used when `decay=None`.", UserWarning) if knn > data.shape[0] - 2: warnings.warn( "Cannot set knn ({k}) to be greater than " @@ -111,7 +113,8 @@ def __init__( if knn_max is not None and knn_max < knn: warnings.warn( "Cannot set knn_max ({knn_max}) to be less than " - "knn ({knn}). Setting knn_max={knn}".format(knn=knn, knn_max=knn_max) + "knn ({knn}). Setting knn_max={knn}".format( + knn=knn, knn_max=knn_max) ) knn_max = knn if n_pca in [None, 0, False] and data.shape[1] > 500: @@ -132,8 +135,7 @@ def __init__( super().__init__(data, n_pca=n_pca, **kwargs) def get_params(self): - """Get parameters from this object - """ + """Get parameters from this object""" params = super().get_params() params.update( { @@ -180,18 +182,22 @@ def set_params(self, **params): if "knn" in params and params["knn"] != self.knn: raise ValueError("Cannot update knn. Please create a new graph") if "knn_max" in params and params["knn_max"] != self.knn: - raise ValueError("Cannot update knn_max. Please create a new graph") + raise ValueError( + "Cannot update knn_max. Please create a new graph") if "decay" in params and params["decay"] != self.decay: raise ValueError("Cannot update decay. Please create a new graph") if "bandwidth" in params and params["bandwidth"] != self.bandwidth: - raise ValueError("Cannot update bandwidth. Please create a new graph") + raise ValueError( + "Cannot update bandwidth. Please create a new graph") if ( "bandwidth_scale" in params and params["bandwidth_scale"] != self.bandwidth_scale ): - raise ValueError("Cannot update bandwidth_scale. Please create a new graph") + raise ValueError( + "Cannot update bandwidth_scale. Please create a new graph") if "distance" in params and params["distance"] != self.distance: - raise ValueError("Cannot update distance. " "Please create a new graph") + raise ValueError( + "Cannot update distance. " "Please create a new graph") if "thresh" in params and params["thresh"] != self.thresh and self.decay != 0: raise ValueError("Cannot update thresh. Please create a new graph") if "n_jobs" in params: @@ -258,7 +264,8 @@ def build_kernel(self): with no non-negative entries. """ knn_max = self.knn_max + 1 if self.knn_max else None - K = self.build_kernel_to_data(self.data_nu, knn=self.knn + 1, knn_max=knn_max) + K = self.build_kernel_to_data( + self.data_nu, knn=self.knn + 1, knn_max=knn_max) return K def _check_duplicates(self, distances, indices): @@ -357,7 +364,8 @@ def build_kernel_to_data( # sparse fast alpha decay knn_tree = self.knn_tree search_knn = min(knn * self.search_multiplier, knn_max) - distances, indices = knn_tree.kneighbors(Y, n_neighbors=search_knn) + distances, indices = knn_tree.kneighbors( + Y, n_neighbors=search_knn) self._check_duplicates(distances, indices) with _logger.task("affinities"): if bandwidth is None: @@ -368,10 +376,13 @@ def build_kernel_to_data( # check for zero bandwidth bandwidth = np.maximum(bandwidth, np.finfo(float).eps) - radius = bandwidth * np.power(-1 * np.log(self.thresh), 1 / self.decay) - update_idx = np.argwhere(np.max(distances, axis=1) < radius).reshape(-1) + radius = bandwidth * \ + np.power(-1 * np.log(self.thresh), 1 / self.decay) + update_idx = np.argwhere( + np.max(distances, axis=1) < radius).reshape(-1) _logger.debug( - "search_knn = {}; {} remaining".format(search_knn, len(update_idx)) + "search_knn = {}; {} remaining".format( + search_knn, len(update_idx)) ) if len(update_idx) > 0: distances = [d for d in distances] @@ -405,7 +416,8 @@ def build_kernel_to_data( ) ) # increase the knn search - search_knn = min(search_knn * self.search_multiplier, knn_max) + search_knn = min( + search_knn * self.search_multiplier, knn_max) if search_knn > self.data_nu.shape[0] / 2: knn_tree = NearestNeighbors( n_neighbors=search_knn, algorithm="brute", n_jobs=self.n_jobs @@ -425,7 +437,8 @@ def build_kernel_to_data( distances[idx] = dist_new[i] indices[idx] = ind_new[i] else: - _logger.debug("radius search on {}".format(len(update_idx))) + _logger.debug( + "radius search on {}".format(len(update_idx))) # give up - radius search dist_new, ind_new = knn_tree.radius_neighbors( Y[update_idx, :], @@ -440,13 +453,16 @@ def build_kernel_to_data( data = np.concatenate(distances) / bandwidth else: data = np.concatenate( - [distances[i] / bandwidth[i] for i in range(len(distances))] + [distances[i] / bandwidth[i] + for i in range(len(distances))] ) indices = np.concatenate(indices) - indptr = np.concatenate([[0], np.cumsum([len(d) for d in distances])]) + indptr = np.concatenate( + [[0], np.cumsum([len(d) for d in distances])]) K = sparse.csr_matrix( - (data, indices, indptr), shape=(Y.shape[0], self.data_nu.shape[0]) + (data, indices, indptr), shape=( + Y.shape[0], self.data_nu.shape[0]) ) K.data = np.exp(-1 * np.power(K.data, self.decay)) # handle nan @@ -524,8 +540,7 @@ def __init__(self, data, n_landmark=2000, n_svd=100, **kwargs): super().__init__(data, **kwargs) def get_params(self): - """Get parameters from this object - """ + """Get parameters from this object""" params = super().get_params() params.update({"n_landmark": self.n_landmark, "n_pca": self.n_pca}) return params @@ -633,13 +648,15 @@ def _landmarks_to_data(self): if sparse.issparse(self.kernel): pmn = sparse.vstack( [ - sparse.csr_matrix(self.kernel[self.clusters == i, :].sum(axis=0)) + sparse.csr_matrix( + self.kernel[self.clusters == i, :].sum(axis=0)) for i in landmarks ] ) else: pmn = np.array( - [np.sum(self.kernel[self.clusters == i, :], axis=0) for i in landmarks] + [np.sum(self.kernel[self.clusters == i, :], axis=0) + for i in landmarks] ) return pmn @@ -678,7 +695,8 @@ def build_landmark_op(self): pnm = pmn.transpose() pmn = normalize(pmn, norm="l1", axis=1) pnm = normalize(pnm, norm="l1", axis=1) - landmark_op = pmn.dot(pnm) # sparsity agnostic matrix multiplication + # sparsity agnostic matrix multiplication + landmark_op = pmn.dot(pnm) if is_sparse: # no need to have a sparse landmark operator landmark_op = landmark_op.toarray() @@ -714,7 +732,8 @@ def extend_to_data(self, data, **kwargs): if sparse.issparse(kernel): pnm = sparse.hstack( [ - sparse.csr_matrix(kernel[:, self.clusters == i].sum(axis=1)) + sparse.csr_matrix( + kernel[:, self.clusters == i].sum(axis=1)) for i in np.unique(self.clusters) ] ) @@ -834,7 +853,7 @@ def __init__( n_pca=None, thresh=1e-4, precomputed=None, - **kwargs + **kwargs, ): if decay is None and precomputed not in ["affinity", "adjacency"]: # decay high enough is basically a binary kernel @@ -873,7 +892,8 @@ def __init__( ) elif (data < 0).sum() > 0: raise ValueError( - "Precomputed {} should be " "non-negative".format(precomputed) + "Precomputed {} should be " "non-negative".format( + precomputed) ) self.knn = knn self.decay = decay @@ -886,8 +906,7 @@ def __init__( super().__init__(data, n_pca=n_pca, **kwargs) def get_params(self): - """Get parameters from this object - """ + """Get parameters from this object""" params = super().get_params() params.update( { @@ -923,13 +942,15 @@ def set_params(self, **params): self """ if "precomputed" in params and params["precomputed"] != self.precomputed: - raise ValueError("Cannot update precomputed. " "Please create a new graph") + raise ValueError( + "Cannot update precomputed. " "Please create a new graph") if ( "distance" in params and params["distance"] != self.distance and self.precomputed is None ): - raise ValueError("Cannot update distance. " "Please create a new graph") + raise ValueError( + "Cannot update distance. " "Please create a new graph") if "knn" in params and params["knn"] != self.knn and self.precomputed is None: raise ValueError("Cannot update knn. Please create a new graph") if ( @@ -943,12 +964,14 @@ def set_params(self, **params): and params["bandwidth"] != self.bandwidth and self.precomputed is None ): - raise ValueError("Cannot update bandwidth. Please create a new graph") + raise ValueError( + "Cannot update bandwidth. Please create a new graph") if ( "bandwidth_scale" in params and params["bandwidth_scale"] != self.bandwidth_scale ): - raise ValueError("Cannot update bandwidth_scale. Please create a new graph") + raise ValueError( + "Cannot update bandwidth_scale. Please create a new graph") # update superclass parameters super().set_params(**params) return self @@ -980,7 +1003,8 @@ def build_kernel(self): # need to set diagonal to one to make it an affinity matrix K = self.data_nu if sparse.issparse(K) and not ( - isinstance(K, sparse.dok_matrix) or isinstance(K, sparse.lil_matrix) + isinstance(K, sparse.dok_matrix) or isinstance( + K, sparse.lil_matrix) ): K = K.tolil() K = matrix.set_diagonal(K, 1) @@ -999,19 +1023,22 @@ def build_kernel(self): ) if len(duplicate_ids) < 20: duplicate_names = ", ".join( - ["{} and {}".format(i[0], i[1]) for i in duplicate_ids] + ["{} and {}".format(i[0], i[1]) + for i in duplicate_ids] ) warnings.warn( "Detected zero distance between samples {}. " "Consider removing duplicates to avoid errors in " - "downstream processing.".format(duplicate_names), + "downstream processing.".format( + duplicate_names), RuntimeWarning, ) else: warnings.warn( "Detected zero distance between {} pairs of samples. " "Consider removing duplicates to avoid errors in " - "downstream processing.".format(len(duplicate_ids)), + "downstream processing.".format( + len(duplicate_ids)), RuntimeWarning, ) else: @@ -1101,7 +1128,7 @@ def build_kernel_to_data(self, Y, knn=None, bandwidth=None, bandwidth_scale=None bandwidth = bandwidth(pdx) bandwidth = bandwidth_scale * bandwidth pdx = (pdx.T / bandwidth).T - K = np.exp(-1 * pdx ** self.decay) + K = np.exp(-1 * pdx**self.decay) # handle nan K = np.where(np.isnan(K), 1, K) K[K < self.thresh] = 0 @@ -1178,11 +1205,12 @@ def __init__( distance="euclidean", thresh=1e-4, n_jobs=1, - **kwargs + **kwargs, ): self.beta = beta self.sample_idx = sample_idx - self.samples, self.n_cells = np.unique(self.sample_idx, return_counts=True) + self.samples, self.n_cells = np.unique( + self.sample_idx, return_counts=True) self.knn = knn self.decay = decay self.distance = distance @@ -1201,7 +1229,8 @@ def __init__( "data ({})".format(len(sample_idx), data.shape[0]) ) elif len(self.samples) == 1: - raise ValueError("sample_idx must contain more than one unique value") + raise ValueError( + "sample_idx must contain more than one unique value") if adaptive_k is not None: warnings.warn( "`adaptive_k` has been deprecated. Using fixed knn.", DeprecationWarning @@ -1222,8 +1251,7 @@ def _check_symmetrization(self, kernel_symm, theta): super()._check_symmetrization(kernel_symm, theta) def get_params(self): - """Get parameters from this object - """ + """Get parameters from this object""" params = super().get_params() params.update( { @@ -1273,7 +1301,8 @@ def set_params(self, **params): for arg in knn_kernel_args: if arg in params and params[arg] != getattr(self, arg): raise ValueError( - "Cannot update {}. " "Please create a new graph".format(arg) + "Cannot update {}. " "Please create a new graph".format( + arg) ) for arg in knn_other_args: if arg in params: @@ -1329,7 +1358,8 @@ def build_kernel(self): with _logger.task("MNN kernel"): if self.thresh > 0 or self.decay is None: - K = sparse.lil_matrix((self.data_nu.shape[0], self.data_nu.shape[0])) + K = sparse.lil_matrix( + (self.data_nu.shape[0], self.data_nu.shape[0])) else: K = np.zeros([self.data_nu.shape[0], self.data_nu.shape[0]]) for i, X in enumerate(self.subgraphs): @@ -1351,7 +1381,8 @@ def build_kernel(self): Kij = Y.build_kernel_to_data(X.data_nu, knn=self.knn) between_batch_norm = np.array(np.sum(Kij, 1)).flatten() scale = ( - np.minimum(1, within_batch_norm / between_batch_norm) + np.minimum(1, within_batch_norm / + between_batch_norm) * self.beta ) if sparse.issparse(Kij): diff --git a/graphtools/matrix.py b/graphtools/matrix.py index 8c818f2..6e83f48 100644 --- a/graphtools/matrix.py +++ b/graphtools/matrix.py @@ -1,6 +1,6 @@ -import numpy as np import numbers +import numpy as np from scipy import sparse @@ -53,7 +53,8 @@ def set_submatrix(X, i, j, values): def sparse_nonzero_discrete(X, values): if isinstance( - X, (sparse.bsr_matrix, sparse.dia_matrix, sparse.dok_matrix, sparse.lil_matrix) + X, (sparse.bsr_matrix, sparse.dia_matrix, + sparse.dok_matrix, sparse.lil_matrix) ): X = X.tocsr() return dense_nonzero_discrete(X.data, values) diff --git a/graphtools/utils.py b/graphtools/utils.py index 55e2bd3..84ed04a 100644 --- a/graphtools/utils.py +++ b/graphtools/utils.py @@ -1,6 +1,8 @@ import numbers import warnings + from deprecated import deprecated + from . import matrix try: @@ -65,7 +67,8 @@ def check_greater(x, **params): """ for p in params: if not isinstance(params[p], numbers.Number) or params[p] <= x: - raise ValueError("Expected {} > {}, got {}".format(p, x, params[p])) + raise ValueError( + "Expected {} > {}, got {}".format(p, x, params[p])) def check_positive(**params): @@ -87,7 +90,8 @@ def check_int(**params): """ for p in params: if not isinstance(params[p], numbers.Integral): - raise ValueError("Expected {} integer, got {}".format(p, params[p])) + raise ValueError( + "Expected {} integer, got {}".format(p, params[p])) def check_if_not(x, *checks, **params): diff --git a/setup.py b/setup.py index 0586273..4a7ed69 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ import os import sys + from setuptools import setup install_requires = [ @@ -33,8 +34,10 @@ elif sys.version_info[:2] >= (3, 6): test_requires += ["black"] -version_py = os.path.join(os.path.dirname(__file__), "graphtools", "version.py") -version = open(version_py).read().strip().split("=")[-1].replace('"', "").strip() +version_py = os.path.join(os.path.dirname( + __file__), "graphtools", "version.py") +version = open(version_py).read().strip().split( + "=")[-1].replace('"', "").strip() readme = open("README.rst").read() @@ -44,7 +47,9 @@ description="graphtools", author="Scott Gigante, Daniel Burkhardt, and Jay Stanley, Yale University", author_email="scott.gigante@yale.edu", - packages=["graphtools",], + packages=[ + "graphtools", + ], license="GNU General Public License Version 2", install_requires=install_requires, extras_require={"test": test_requires, "doc": doc_requires}, @@ -54,7 +59,12 @@ download_url="https://github.com/KrishnaswamyLab/graphtools/archive/v{}.tar.gz".format( version ), - keywords=["graphs", "big-data", "signal processing", "manifold-learning",], + keywords=[ + "graphs", + "big-data", + "signal processing", + "manifold-learning", + ], classifiers=[ "Development Status :: 4 - Beta", "Environment :: Console", diff --git a/test/load_tests/__init__.py b/test/load_tests/__init__.py index 1c6213f..4cdb4ef 100644 --- a/test/load_tests/__init__.py +++ b/test/load_tests/__init__.py @@ -1,16 +1,17 @@ -from sklearn.decomposition import PCA, TruncatedSVD -from sklearn import datasets -from scipy.spatial.distance import pdist, cdist, squareform -import pygsp -import graphtools -import numpy as np -import scipy.sparse as sp +import re import warnings -import pandas as pd import nose2 +import numpy as np +import pandas as pd +import pygsp +import scipy.sparse as sp from nose.tools import assert_raises_regex, assert_warns_regex -import re +from scipy.spatial.distance import cdist, pdist, squareform +from sklearn import datasets +from sklearn.decomposition import PCA, TruncatedSVD + +import graphtools def assert_warns_message(expected_warning, expected_message, *args, **kwargs): @@ -110,7 +111,7 @@ def build_graph( sparse=False, graph_class=graphtools.Graph, verbose=0, - **kwargs + **kwargs, ): if sparse: data = sp.coo_matrix(data) @@ -122,5 +123,5 @@ def build_graph( knn=knn, random_state=42, verbose=verbose, - **kwargs + **kwargs, ) diff --git a/test/test_api.py b/test/test_api.py index 64d5ae9..88ede82 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -1,13 +1,14 @@ from __future__ import print_function -from load_tests import data, build_graph, assert_raises_message, assert_warns_message +import os +import pickle +import tempfile import igraph import numpy as np +from load_tests import assert_raises_message, assert_warns_message, build_graph, data + import graphtools -import tempfile -import os -import pickle def test_from_igraph(): diff --git a/test/test_data.py b/test/test_data.py index 24f6dd2..8d783b6 100644 --- a/test/test_data.py +++ b/test/test_data.py @@ -1,21 +1,23 @@ from __future__ import print_function + +import numbers +import warnings + from load_tests import ( - np, - sp, - pd, + assert_raises_message, + assert_warns_message, + build_graph, + data, graphtools, nose2, - data, - build_graph, - squareform, + np, + pd, pdist, + sp, + squareform, ) -from load_tests import assert_raises_message, assert_warns_message from nose.tools import assert_raises_regex -import numbers -import warnings - try: import anndata except (ImportError, SyntaxError): @@ -33,7 +35,8 @@ def test_1d_data(): with assert_raises_message( ValueError, - "Expected 2D array, got 1D array instead (shape: ({},).)".format(data.shape[0]), + "Expected 2D array, got 1D array instead (shape: ({},).)".format( + data.shape[0]), ): build_graph(data[:, 0]) with assert_raises_message( @@ -337,7 +340,8 @@ def test_transform_sparse_no_pca(): def test_inverse_transform_dense_pca(): G = build_graph(data, n_pca=data.shape[1] - 1) - np.testing.assert_allclose(G.data, G.inverse_transform(G.data_nu), atol=1e-12) + np.testing.assert_allclose( + G.data, G.inverse_transform(G.data_nu), atol=1e-12) np.testing.assert_allclose( G.data[:, -1, None], G.inverse_transform(G.data_nu, columns=-1), atol=1e-12 ) @@ -346,7 +350,8 @@ def test_inverse_transform_dense_pca(): ) with assert_raises_message( IndexError, - "index {0} is out of bounds for axis 1 with size {0}".format(G.data.shape[1]), + "index {0} is out of bounds for axis 1 with size {0}".format( + G.data.shape[1]), ): G.inverse_transform(G.data_nu, columns=data.shape[1]) with assert_raises_message( @@ -374,7 +379,8 @@ def test_inverse_transform_dense_pca(): def test_inverse_transform_sparse_svd(): G = build_graph(data, sparse=True, n_pca=data.shape[1] - 1) - np.testing.assert_allclose(data, G.inverse_transform(G.data_nu), atol=1e-12) + np.testing.assert_allclose( + data, G.inverse_transform(G.data_nu), atol=1e-12) np.testing.assert_allclose( data[:, -1, None], G.inverse_transform(G.data_nu, columns=-1), atol=1e-12 ) @@ -489,7 +495,8 @@ def test_transform_adaptive_pca(): ): G.transform(G.data[:, :15]) - G2 = build_graph(data, n_pca=True, rank_threshold=G.rank_threshold, random_state=42) + G2 = build_graph(data, n_pca=True, + rank_threshold=G.rank_threshold, random_state=42) assert np.allclose(G2.data_nu, G2.transform(G2.data)) assert np.allclose(G2.data_nu, G.transform(G.data)) diff --git a/test/test_estimator.py b/test/test_estimator.py index c66aceb..f96bf47 100644 --- a/test/test_estimator.py +++ b/test/test_estimator.py @@ -1,12 +1,14 @@ -import graphtools -import graphtools.estimator -import pygsp -import anndata import warnings + +import anndata import numpy as np -from load_tests import data, assert_raises_message -from scipy import sparse +import pygsp +from load_tests import assert_raises_message, data from parameterized import parameterized +from scipy import sparse + +import graphtools +import graphtools.estimator class Estimator(graphtools.estimator.GraphEstimator): @@ -53,7 +55,8 @@ def test_estimator(): def test_precomputed(distance, X, precomputed): E = Estimator(verbose=False, distance=distance) with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="K should have a non-zero diagonal") + warnings.filterwarnings( + "ignore", message="K should have a non-zero diagonal") E.fit(X) assert isinstance(E.graph, graphtools.graphs.TraditionalGraph) assert E.graph.precomputed == precomputed diff --git a/test/test_exact.py b/test/test_exact.py index 3a03594..1e7f11c 100644 --- a/test/test_exact.py +++ b/test/test_exact.py @@ -1,21 +1,22 @@ from __future__ import print_function -from scipy.sparse.csgraph import shortest_path + from load_tests import ( - graphtools, - np, - sp, - pygsp, - nose2, - data, - build_graph, - squareform, - pdist, PCA, TruncatedSVD, assert_raises_message, assert_warns_message, + build_graph, + data, + graphtools, + nose2, + np, + pdist, + pygsp, + sp, + squareform, ) from nose.tools import assert_warns_regex +from scipy.sparse.csgraph import shortest_path ##################################################### # Check parameters @@ -48,7 +49,8 @@ def test_build_exact_with_sample_idx(): ValueError, "TraditionalGraph does not support batch correction. Use `graphtype='mnn'` or `sample_idx=None`", ): - build_graph(data, graphtype="exact", sample_idx=np.arange(len(data)), decay=10) + build_graph(data, graphtype="exact", + sample_idx=np.arange(len(data)), decay=10) def test_precomputed_with_pca(): @@ -56,7 +58,8 @@ def test_precomputed_with_pca(): RuntimeWarning, "n_pca cannot be given on a precomputed graph. Setting n_pca=None", ): - build_graph(squareform(pdist(data)), precomputed="distance", n_pca=20, decay=10) + build_graph(squareform(pdist(data)), + precomputed="distance", n_pca=20, decay=10) def test_exact_no_decay(): @@ -134,14 +137,15 @@ def test_exact_graph(): a = 13 n_pca = 20 bandwidth_scale = 1.3 - data_small = data[np.random.choice(len(data), len(data) // 2, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 2, replace=False)] pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data_small) data_small_nu = pca.transform(data_small) pdx = squareform(pdist(data_small_nu, metric="euclidean")) knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) * bandwidth_scale weighted_pdx = (pdx.T / epsilon).T - K = np.exp(-1 * weighted_pdx ** a) + K = np.exp(-1 * weighted_pdx**a) W = K + K.T W = np.divide(W, 2) np.fill_diagonal(W, 0) @@ -211,14 +215,15 @@ def test_truncated_exact_graph(): a = 13 n_pca = 20 thresh = 1e-4 - data_small = data[np.random.choice(len(data), len(data) // 2, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 2, replace=False)] pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data_small) data_small_nu = pca.transform(data_small) pdx = squareform(pdist(data_small_nu, metric="euclidean")) knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) weighted_pdx = (pdx.T / epsilon).T - K = np.exp(-1 * weighted_pdx ** a) + K = np.exp(-1 * weighted_pdx**a) K[K < thresh] = 0 W = K + K.T W = np.divide(W, 2) @@ -282,14 +287,15 @@ def test_truncated_exact_graph_sparse(): a = 13 n_pca = 20 thresh = 1e-4 - data_small = data[np.random.choice(len(data), len(data) // 2, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 2, replace=False)] pca = TruncatedSVD(n_pca, random_state=42).fit(data_small) data_small_nu = pca.transform(data_small) pdx = squareform(pdist(data_small_nu, metric="euclidean")) knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) weighted_pdx = (pdx.T / epsilon).T - K = np.exp(-1 * weighted_pdx ** a) + K = np.exp(-1 * weighted_pdx**a) K[K < thresh] = 0 W = K + K.T W = np.divide(W, 2) @@ -355,12 +361,13 @@ def test_truncated_exact_graph_no_pca(): a = 13 n_pca = None thresh = 1e-4 - data_small = data[np.random.choice(len(data), len(data) // 10, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 10, replace=False)] pdx = squareform(pdist(data_small, metric="euclidean")) knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) weighted_pdx = (pdx.T / epsilon).T - K = np.exp(-1 * weighted_pdx ** a) + K = np.exp(-1 * weighted_pdx**a) K[K < thresh] = 0 W = K + K.T W = np.divide(W, 2) @@ -518,14 +525,15 @@ def test_exact_graph_anisotropy(): a = 13 n_pca = 20 anisotropy = 0.9 - data_small = data[np.random.choice(len(data), len(data) // 2, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 2, replace=False)] pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data_small) data_small_nu = pca.transform(data_small) pdx = squareform(pdist(data_small_nu, metric="euclidean")) knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) weighted_pdx = (pdx.T / epsilon).T - K = np.exp(-1 * weighted_pdx ** a) + K = np.exp(-1 * weighted_pdx**a) K = K + K.T K = np.divide(K, 2) d = K.sum(1) @@ -590,7 +598,8 @@ def test_exact_graph_anisotropy(): def test_shortest_path_affinity(): - data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) D = -1 * np.where(G.K != 0, np.log(np.where(G.K != 0, G.K, np.nan)), 0) P = shortest_path(D) @@ -603,7 +612,8 @@ def test_shortest_path_affinity(): def test_shortest_path_affinity_precomputed(): - data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) G = graphtools.Graph(G.K, precomputed="affinity") D = -1 * np.where(G.K != 0, np.log(np.where(G.K != 0, G.K, np.nan)), 0) @@ -621,7 +631,8 @@ def test_shortest_path_decay_constant(): NotImplementedError, "Graph shortest path with constant distance only implemented for unweighted graphs. For weighted graphs, use `distance='affinity'`.", ): - data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) G.shortest_path(distance="constant") @@ -631,7 +642,8 @@ def test_shortest_path_precomputed_decay_constant(): NotImplementedError, "Graph shortest path with constant distance only implemented for unweighted graphs. For weighted graphs, use `distance='affinity'`.", ): - data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) G = graphtools.Graph(G.K, precomputed="affinity") G.shortest_path(distance="constant") @@ -642,7 +654,8 @@ def test_shortest_path_decay_data(): NotImplementedError, "Graph shortest path with constant or data distance only implemented for unweighted graphs. For weighted graphs, use `distance='affinity'`.", ): - data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) G.shortest_path(distance="data") @@ -652,7 +665,8 @@ def test_shortest_path_precomputed_data(): ValueError, "Graph shortest path with data distance not valid for precomputed graphs. For precomputed graphs, use `distance='constant'` for unweighted graphs and `distance='affinity'` for weighted graphs.", ): - data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) G = graphtools.Graph(G.K, precomputed="affinity") G.shortest_path(distance="data") @@ -712,7 +726,8 @@ def test_exact_interpolate(): def test_precomputed_interpolate(): with assert_raises_message(ValueError, "Cannot extend kernel on precomputed graph"): - G = build_graph(squareform(pdist(data)), n_pca=None, precomputed="distance") + G = build_graph(squareform(pdist(data)), + n_pca=None, precomputed="distance") G.build_kernel_to_data(data) diff --git a/test/test_knn.py b/test/test_knn.py index fe47c07..9274fdf 100644 --- a/test/test_knn.py +++ b/test/test_knn.py @@ -1,21 +1,23 @@ -from __future__ import print_function, division -from sklearn.utils.graph import graph_shortest_path -from scipy.spatial.distance import pdist, squareform -from load_tests import assert_raises_message, assert_warns_message -from nose.tools import assert_raises_regex, assert_warns_regex +from __future__ import division, print_function + import warnings + from load_tests import ( + PCA, + TruncatedSVD, + assert_raises_message, + assert_warns_message, + build_graph, + data, + datasets, graphtools, np, - sp, pygsp, - data, - datasets, - build_graph, - PCA, - TruncatedSVD, + sp, ) - +from nose.tools import assert_raises_regex, assert_warns_regex +from scipy.spatial.distance import pdist, squareform +from sklearn.utils.graph import graph_shortest_path ##################################################### # Check parameters @@ -51,7 +53,8 @@ def test_duplicate_data(): RuntimeWarning, r"Detected zero distance between samples ([0-9and,\s]*). Consider removing duplicates to avoid errors in downstream processing.", ): - build_graph(np.vstack([data, data[:9]]), n_pca=20, decay=10, thresh=1e-4) + build_graph(np.vstack([data, data[:9]]), + n_pca=20, decay=10, thresh=1e-4) def test_duplicate_data_many(): @@ -59,7 +62,8 @@ def test_duplicate_data_many(): RuntimeWarning, "Detected zero distance between ([0-9]*) pairs of samples. Consider removing duplicates to avoid errors in downstream processing.", ): - build_graph(np.vstack([data, data[:21]]), n_pca=20, decay=10, thresh=1e-4) + build_graph(np.vstack([data, data[:21]]), + n_pca=20, decay=10, thresh=1e-4) def test_balltree_cosine(): @@ -99,7 +103,8 @@ def test_knn_no_knn_no_bandwidth(): with assert_raises_message( ValueError, "Either `knn` or `bandwidth` must be provided." ): - build_graph(data, graphtype="knn", knn=None, bandwidth=None, thresh=1e-4) + build_graph(data, graphtype="knn", knn=None, + bandwidth=None, thresh=1e-4) def test_knn_graph_invalid_symm(): @@ -107,7 +112,8 @@ def test_knn_graph_invalid_symm(): ValueError, "kernel_symm 'invalid' not recognized. Choose from '+', '*', 'mnn', or 'none'.", ): - build_graph(data, graphtype="knn", knn=5, thresh=1e-4, kernel_symm="invalid") + build_graph(data, graphtype="knn", knn=5, + thresh=1e-4, kernel_symm="invalid") ##################################################### @@ -156,7 +162,8 @@ def test_knn_graph(): ), ): G2.build_kernel_to_data( - Y=G2.data_nu, knn=data.shape[0] + 1, + Y=G2.data_nu, + knn=data.shape[0] + 1, ) @@ -232,7 +239,7 @@ def test_sparse_alpha_knn_graph(): knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) * bandwidth_scale pdx = (pdx.T / epsilon).T - K = np.exp(-1 * pdx ** a) + K = np.exp(-1 * pdx**a) K = K + K.T W = np.divide(K, 2) np.fill_diagonal(W, 0) @@ -260,7 +267,8 @@ def test_knnmax(): thresh = 0 with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "K should be symmetric", RuntimeWarning) + warnings.filterwarnings( + "ignore", "K should be symmetric", RuntimeWarning) G = build_graph( data, n_pca=None, # n_pca, @@ -278,7 +286,7 @@ def test_knnmax(): knn_max_dist = np.max(np.partition(pdx, k_max, axis=1)[:, :k_max], axis=1) epsilon = np.max(knn_dist, axis=1) pdx_scale = (pdx.T / epsilon).T - K = np.where(pdx <= knn_max_dist[:, None], np.exp(-1 * pdx_scale ** a), 0) + K = np.where(pdx <= knn_max_dist[:, None], np.exp(-1 * pdx_scale**a), 0) K = K + K.T W = np.divide(K, 2) np.fill_diagonal(W, 0) @@ -423,14 +431,15 @@ def test_knn_graph_anisotropy(): n_pca = 20 anisotropy = 0.9 thresh = 1e-4 - data_small = data[np.random.choice(len(data), len(data) // 2, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 2, replace=False)] pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data_small) data_small_nu = pca.transform(data_small) pdx = squareform(pdist(data_small_nu, metric="euclidean")) knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) weighted_pdx = (pdx.T / epsilon).T - K = np.exp(-1 * weighted_pdx ** a) + K = np.exp(-1 * weighted_pdx**a) K[K < thresh] = 0 K = K + K.T K = np.divide(K, 2) @@ -498,7 +507,8 @@ def test_knn_interpolate(): def test_knn_interpolate_wrong_shape(): G = build_graph(data, n_pca=10, decay=None) with assert_raises_message( - ValueError, "Expected a 2D matrix. Y has shape ({},)".format(data.shape[0]) + ValueError, "Expected a 2D matrix. Y has shape ({},)".format( + data.shape[0]) ): G.extend_to_data(data[:, 0]) with assert_raises_message( @@ -523,7 +533,8 @@ def test_knn_interpolate_wrong_shape(): def test_shortest_path_constant(): - data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) P = graph_shortest_path(G.K) # sklearn returns 0 if no path exists @@ -534,7 +545,8 @@ def test_shortest_path_constant(): def test_shortest_path_precomputed_constant(): - data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G = graphtools.Graph(G.K, precomputed="affinity") P = graph_shortest_path(G.K) @@ -547,7 +559,8 @@ def test_shortest_path_precomputed_constant(): def test_shortest_path_data(): - data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) D = squareform(pdist(G.data_nu)) * np.where(G.K.toarray() > 0, 1, 0) P = graph_shortest_path(D) @@ -564,7 +577,8 @@ def test_shortest_path_no_decay_affinity(): ValueError, "Graph shortest path with affinity distance only valid for weighted graphs. For unweighted graphs, use `distance='constant'` or `distance='data'`.", ): - data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G.shortest_path(distance="affinity") @@ -574,7 +588,8 @@ def test_shortest_path_precomputed_no_decay_affinity(): ValueError, "Graph shortest path with affinity distance only valid for weighted graphs. For unweighted graphs, use `distance='constant'` or `distance='data'`.", ): - data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G = graphtools.Graph(G.K, precomputed="affinity") G.shortest_path(distance="affinity") @@ -585,7 +600,8 @@ def test_shortest_path_precomputed_no_decay_data(): ValueError, "Graph shortest path with data distance not valid for precomputed graphs. For precomputed graphs, use `distance='constant'` for unweighted graphs and `distance='affinity'` for weighted graphs.", ): - data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G = graphtools.Graph(G.K, precomputed="affinity") G.shortest_path(distance="data") @@ -596,7 +612,8 @@ def test_shortest_path_invalid(): ValueError, "Expected `distance` in ['constant', 'data', 'affinity']. Got invalid", ): - data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice( + len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G.shortest_path(distance="invalid") diff --git a/test/test_landmark.py b/test/test_landmark.py index aa2f9cc..e365c9a 100644 --- a/test/test_landmark.py +++ b/test/test_landmark.py @@ -1,17 +1,17 @@ from __future__ import print_function + +import pygsp from load_tests import ( - graphtools, - np, - nose2, + assert_raises_message, + assert_warns_message, + build_graph, data, digits, - build_graph, generate_swiss_roll, - assert_raises_message, - assert_warns_message, + graphtools, + nose2, + np, ) -import pygsp - ##################################################### # Check parameters diff --git a/test/test_matrix.py b/test/test_matrix.py index aac45a7..647c93d 100644 --- a/test/test_matrix.py +++ b/test/test_matrix.py @@ -1,11 +1,11 @@ -import graphtools.matrix -import graphtools.utils +import numpy as np +from load_tests import assert_warns_message, data from parameterized import parameterized from scipy import sparse -import numpy as np + import graphtools -from load_tests import data -from load_tests import assert_warns_message +import graphtools.matrix +import graphtools.utils @parameterized( @@ -120,7 +120,8 @@ def test_set_submatrix_deprecated(): "Call to deprecated function (or staticmethod) set_submatrix. (Use graphtools.matrix.set_submatrix instead) -- Deprecated since version 1.5.0.", ): graphtools.utils.set_submatrix( - sparse.lil_matrix((4, 4)), [1, 2], [0, 1], np.array([[1, 2], [3, 4]]) + sparse.lil_matrix((4, 4)), [1, 2], [ + 0, 1], np.array([[1, 2], [3, 4]]) ) @@ -129,7 +130,8 @@ def test_sparse_nonzero_discrete_deprecated(): DeprecationWarning, "Call to deprecated function (or staticmethod) sparse_nonzero_discrete. (Use graphtools.matrix.sparse_nonzero_discrete instead) -- Deprecated since version 1.5.0.", ): - graphtools.utils.sparse_nonzero_discrete(sparse.csr_matrix((4, 4)), [1]) + graphtools.utils.sparse_nonzero_discrete( + sparse.csr_matrix((4, 4)), [1]) def test_dense_nonzero_discrete_deprecated(): diff --git a/test/test_mnn.py b/test/test_mnn.py index 30e3a77..8f5a13c 100644 --- a/test/test_mnn.py +++ b/test/test_mnn.py @@ -1,22 +1,23 @@ from __future__ import print_function + import warnings + from load_tests import ( + assert_raises_message, + assert_warns_message, + build_graph, + cdist, + data, + digits, + generate_swiss_roll, graphtools, + nose2, np, pd, pygsp, - nose2, - data, - digits, - build_graph, - generate_swiss_roll, - assert_raises_message, - assert_warns_message, - cdist, ) from scipy.linalg import norm - ##################################################### # Check parameters ##################################################### @@ -27,13 +28,15 @@ def test_sample_idx_and_precomputed(): ValueError, "MNNGraph does not support precomputed values. Use `graphtype='exact'` and `sample_idx=None` or `precomputed=None`", ): - build_graph(data, n_pca=None, sample_idx=np.arange(10), precomputed="distance") + build_graph(data, n_pca=None, sample_idx=np.arange( + 10), precomputed="distance") def test_sample_idx_wrong_length(): with assert_raises_message( ValueError, - "sample_idx (10) must be the same length as data ({})".format(data.shape[0]), + "sample_idx (10) must be the same length as data ({})".format( + data.shape[0]), ): build_graph(data, graphtype="mnn", sample_idx=np.arange(10)) @@ -43,7 +46,8 @@ def test_sample_idx_unique(): ValueError, "sample_idx must contain more than one unique value" ): build_graph( - data, graph_class=graphtools.graphs.MNNGraph, sample_idx=np.ones(len(data)) + data, graph_class=graphtools.graphs.MNNGraph, sample_idx=np.ones( + len(data)) ) with assert_warns_message( UserWarning, "Only one unique sample. Not using MNNGraph" @@ -356,7 +360,8 @@ def test_mnn_graph_no_decay(): e_ij = kdx_ij[:, batch_k - 1] # dist to kNN k_ij = np.where(pdx_ij <= e_ij[:, None], 1, 0) # apply knn kernel if si == sj: - K.iloc[sample_idx == si, sample_idx == sj] = (k_ij + k_ij.T) / 2 + K.iloc[sample_idx == si, sample_idx == sj] = ( + k_ij + k_ij.T) / 2 else: # fill out values in K for NN on diagonal K.iloc[sample_idx == si, sample_idx == sj] = k_ij @@ -377,7 +382,8 @@ def test_mnn_graph_no_decay(): ) K = Kn - W = np.array((theta * np.minimum(K, K.T)) + ((1 - theta) * np.maximum(K, K.T))) + W = np.array((theta * np.minimum(K, K.T)) + + ((1 - theta) * np.maximum(K, K.T))) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = graphtools.Graph( @@ -420,9 +426,10 @@ def test_mnn_graph_decay(): kdx_ij = np.sort(pdx_ij, axis=1) # get kNN e_ij = kdx_ij[:, batch_k] # dist to kNN pdxe_ij = pdx_ij / e_ij[:, np.newaxis] # normalize - k_ij = np.exp(-1 * (pdxe_ij ** a)) # apply alpha-decaying kernel + k_ij = np.exp(-1 * (pdxe_ij**a)) # apply alpha-decaying kernel if si == sj: - K.iloc[sample_idx == si, sample_idx == sj] = (k_ij + k_ij.T) / 2 + K.iloc[sample_idx == si, sample_idx == sj] = ( + k_ij + k_ij.T) / 2 else: # fill out values in K for NN on diagonal K.iloc[sample_idx == si, sample_idx == sj] = k_ij @@ -443,7 +450,8 @@ def test_mnn_graph_decay(): ) K = Kn - W = np.array((theta * np.minimum(K, K.T)) + ((1 - theta) * np.maximum(K, K.T))) + W = np.array((theta * np.minimum(K, K.T)) + + ((1 - theta) * np.maximum(K, K.T))) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = graphtools.Graph( diff --git a/test/test_utils.py b/test/test_utils.py index 1aadd82..601d843 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,6 +1,7 @@ -import graphtools from load_tests import assert_raises_message +import graphtools + def test_check_in(): graphtools.utils.check_in(["hello", "world"], foo="hello") @@ -28,7 +29,8 @@ def test_check_positive(): def test_check_if_not(): graphtools.utils.check_if_not(-5, graphtools.utils.check_positive, foo=-5) with assert_raises_message(ValueError, "Expected foo > 0, got -5"): - graphtools.utils.check_if_not(-4, graphtools.utils.check_positive, foo=-5) + graphtools.utils.check_if_not(-4, + graphtools.utils.check_positive, foo=-5) def test_check_between(): From 9e50b173691921b69d9e21a179008bd81fa0c37e Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:27:19 -0500 Subject: [PATCH 09/41] Update api.py --- graphtools/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphtools/api.py b/graphtools/api.py index dfa1b70..3e3ed44 100644 --- a/graphtools/api.py +++ b/graphtools/api.py @@ -256,7 +256,7 @@ def Graph( else: msg = msg + " and PyGSP inheritance" - _logger.debug(msg) + _logger.log_debug(msg) class_names = [p.__name__.replace("Graph", "") for p in parent_classes] try: @@ -274,7 +274,7 @@ def Graph( pass # build graph and return - _logger.debug( + _logger.log_debug( "Initializing {} with arguments {}".format( parent_classes, ", ".join( From 0b3aa0c15692494a287b400ca9fcc228cedf191a Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:30:57 -0500 Subject: [PATCH 10/41] Update base.py --- graphtools/base.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/graphtools/base.py b/graphtools/base.py index 11b6515..600d9fb 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -175,7 +175,7 @@ def _parse_n_pca_threshold(self, data, n_pca, rank_threshold): n_pca = None elif n_pca is True: # notify that we're going to estimate rank. n_pca = "auto" - _logger.info( + _logger.log_info( "Estimating n_pca from matrix rank. " "Supply an integer n_pca " "for fixed amount." @@ -239,7 +239,7 @@ def _reduce_data(self): if self.n_pca is not None and ( self.n_pca == "auto" or self.n_pca < self.data.shape[1] ): - with _logger.task("PCA"): + with _logger.log_task("PCA"): n_pca = self.data.shape[1] - \ 1 if self.n_pca == "auto" else self.n_pca if sparse.issparse(self.data): @@ -274,7 +274,7 @@ def _reduce_data(self): "maximum singular value {} " "for the data matrix".format(threshold, smax) ) - _logger.info( + _logger.log_info( "Using rank estimate of {} as n_pca".format(self.n_pca) ) # reset the sklearn operator @@ -505,10 +505,10 @@ def __init__( self.anisotropy = anisotropy if initialize: - _logger.debug("Initializing kernel...") + _logger.log_debug("Initializing kernel...") self.K else: - _logger.debug("Not initializing kernel.") + _logger.log_debug("Not initializing kernel.") super().__init__(**kwargs) def _check_symmetrization(self, kernel_symm, theta): @@ -563,19 +563,19 @@ def _build_kernel(self): def symmetrize_kernel(self, K): # symmetrize if self.kernel_symm == "+": - _logger.debug("Using addition symmetrization.") + _logger.log_debug("Using addition symmetrization.") K = (K + K.T) / 2 elif self.kernel_symm == "*": - _logger.debug("Using multiplication symmetrization.") + _logger.log_debug("Using multiplication symmetrization.") K = K.multiply(K.T) elif self.kernel_symm == "mnn": - _logger.debug( + _logger.log_debug( "Using mnn symmetrization (theta = {}).".format(self.theta)) K = self.theta * matrix.elementwise_minimum(K, K.T) + ( 1 - self.theta ) * matrix.elementwise_maximum(K, K.T) elif self.kernel_symm is None: - _logger.debug("Using no symmetrization.") + _logger.log_debug("Using no symmetrization.") pass else: raise NotImplementedError @@ -858,10 +858,10 @@ def _check_shortest_path_distance(self, distance): def _default_shortest_path_distance(self): if not self.weighted: distance = "data" - _logger.info("Using ambient data distances.") + _logger.log_info("Using ambient data distances.") else: distance = "affinity" - _logger.info("Using negative log affinity distances.") + _logger.log_info("Using negative log affinity distances.") return distance def shortest_path(self, method="auto", distance=None): From 0da8dae2d3fec1efacf1ff33d70769134445b537 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:31:35 -0500 Subject: [PATCH 11/41] git add --- .github/workflows/pre-commit.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 4b6a475..df20a5f 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -44,10 +44,11 @@ jobs: if [[ `git status --porcelain --untracked-files=no` ]]; then git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" git config --local user.name "github-actions[bot]" + git add . git checkout -- .github/workflows git commit -m "pre-commit" -a fi - shell: bash -e {0} + shell: bash -ex {0} - name: Push changes if: steps.precommit.outcome == 'failure' && startsWith(github.ref, 'refs/heads') From 8c085b5f772065978890748a5249c5106f06124e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Jan 2023 05:32:25 +0000 Subject: [PATCH 12/41] pre-commit --- graphtools/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/graphtools/base.py b/graphtools/base.py index 600d9fb..14aac48 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -570,7 +570,8 @@ def symmetrize_kernel(self, K): K = K.multiply(K.T) elif self.kernel_symm == "mnn": _logger.log_debug( - "Using mnn symmetrization (theta = {}).".format(self.theta)) + "Using mnn symmetrization (theta = {}).".format(self.theta) + ) K = self.theta * matrix.elementwise_minimum(K, K.T) + ( 1 - self.theta ) * matrix.elementwise_maximum(K, K.T) From 1061e9f8e271973a5ca738462dd9146efb01f4c6 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:34:18 -0500 Subject: [PATCH 13/41] Update setup.cfg --- setup.cfg | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/setup.cfg b/setup.cfg index 6372926..c4ba0c6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,29 @@ [metadata] license-file = LICENSE + +[flake8] +ignore = + # top-level module docstring + D100, D104, + # space before: conflicts with black + E203, + # import not in alphabetical: conflicts with isort + H306 +per-file-ignores = + # imported but unused + __init__.py: F401 + # missing docstring in public function for methods, metrics, datasets + openproblems/tasks/*/*/*.py: D103, E203 + openproblems/tasks/*/*/__init__.py: F401, D103 +max-line-length = 88 +exclude = + .git, + __pycache__, + build, + dist, + Snakefile + +[isort] +profile = black +force_single_line = true +force_alphabetical_sort = true From 89983a35786e1c9497bb0c352639bceb4dc0d1f2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Jan 2023 05:35:14 +0000 Subject: [PATCH 14/41] pre-commit --- doc/source/conf.py | 6 +- graphtools/__init__.py | 4 +- graphtools/api.py | 13 ++-- graphtools/base.py | 67 +++++++++----------- graphtools/estimator.py | 26 ++++---- graphtools/graphs.py | 118 ++++++++++++++---------------------- graphtools/matrix.py | 7 +-- graphtools/utils.py | 13 ++-- setup.py | 10 ++- test/load_tests/__init__.py | 19 +++--- test/test_api.py | 14 +++-- test/test_data.py | 43 ++++++------- test/test_estimator.py | 15 +++-- test/test_exact.py | 70 +++++++++------------ test/test_knn.py | 79 ++++++++++-------------- test/test_landmark.py | 21 +++---- test/test_matrix.py | 11 ++-- test/test_mnn.py | 51 +++++++--------- test/test_utils.py | 3 +- 19 files changed, 247 insertions(+), 343 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index afa4a9f..e7303a3 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -67,8 +67,7 @@ # version_py = os.path.join(root_dir, "graphtools", "version.py") # The full version, including alpha/beta/rc tags. -release = open(version_py).read().strip().split( - "=")[-1].replace('"', "").strip() +release = open(version_py).read().strip().split("=")[-1].replace('"', "").strip() # The short X.Y version. version = release.split("-")[0] @@ -151,8 +150,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [(master_doc, "graphtools", - "graphtools Documentation", [author], 1)] +man_pages = [(master_doc, "graphtools", "graphtools Documentation", [author], 1)] # -- Options for Texinfo output ------------------------------------------- diff --git a/graphtools/__init__.py b/graphtools/__init__.py index 7384afc..0518e98 100644 --- a/graphtools/__init__.py +++ b/graphtools/__init__.py @@ -1,2 +1,4 @@ -from .api import Graph, from_igraph, read_pickle +from .api import from_igraph +from .api import Graph +from .api import read_pickle from .version import __version__ diff --git a/graphtools/api.py b/graphtools/api.py index 3e3ed44..d61e512 100644 --- a/graphtools/api.py +++ b/graphtools/api.py @@ -1,12 +1,12 @@ -import pickle -import warnings +from . import base +from . import graphs +from scipy import sparse import numpy as np +import pickle import pygsp import tasklogger -from scipy import sparse - -from . import base, graphs +import warnings _logger = tasklogger.get_tasklogger("graphtools") @@ -342,8 +342,7 @@ def read_pickle(path): G = pickle.load(f) if not isinstance(G, base.BaseGraph): - warnings.warn( - "Returning object that is not a graphtools.base.BaseGraph") + warnings.warn("Returning object that is not a graphtools.base.BaseGraph") elif isinstance(G, base.PyGSPGraph) and isinstance(G.logger, str): G.logger = pygsp.utils.build_logger(G.logger) return G diff --git a/graphtools/base.py b/graphtools/base.py index 14aac48..1ed702a 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -1,22 +1,23 @@ -import abc -import numbers -import pickle -import sys -import warnings +from . import matrix +from . import utils from builtins import super from copy import copy as shallow_copy -from inspect import signature - -import numpy as np -import pygsp -import tasklogger from future.utils import with_metaclass +from inspect import signature from scipy import sparse from scipy.sparse.csgraph import shortest_path -from sklearn.decomposition import PCA, TruncatedSVD +from sklearn.decomposition import PCA +from sklearn.decomposition import TruncatedSVD from sklearn.preprocessing import normalize -from . import matrix, utils +import abc +import numbers +import numpy as np +import pickle +import pygsp +import sys +import tasklogger +import warnings _logger = tasklogger.get_tasklogger("graphtools") @@ -114,8 +115,7 @@ def __init__( ): self._check_data(data) - n_pca, rank_threshold = self._parse_n_pca_threshold( - data, n_pca, rank_threshold) + n_pca, rank_threshold = self._parse_n_pca_threshold(data, n_pca, rank_threshold) if utils.is_SparseDataFrame(data): data = data.to_coo() @@ -240,8 +240,7 @@ def _reduce_data(self): self.n_pca == "auto" or self.n_pca < self.data.shape[1] ): with _logger.log_task("PCA"): - n_pca = self.data.shape[1] - \ - 1 if self.n_pca == "auto" else self.n_pca + n_pca = self.data.shape[1] - 1 if self.n_pca == "auto" else self.n_pca if sparse.issparse(self.data): if ( isinstance(self.data, sparse.coo_matrix) @@ -249,8 +248,7 @@ def _reduce_data(self): or isinstance(self.data, sparse.dok_matrix) ): self.data = self.data.tocsr() - self.data_pca = TruncatedSVD( - n_pca, random_state=self.random_state) + self.data_pca = TruncatedSVD(n_pca, random_state=self.random_state) else: self.data_pca = PCA( n_pca, svd_solver="randomized", random_state=self.random_state @@ -261,8 +259,7 @@ def _reduce_data(self): smax = s.max() if self.rank_threshold == "auto": threshold = ( - smax * np.finfo(self.data.dtype).eps * - max(self.data.shape) + smax * np.finfo(self.data.dtype).eps * max(self.data.shape) ) self.rank_threshold = threshold threshold = self.rank_threshold @@ -281,8 +278,7 @@ def _reduce_data(self): op = self.data_pca # for line-width brevity.. op.components_ = op.components_[gate, :] op.explained_variance_ = op.explained_variance_[gate] - op.explained_variance_ratio_ = op.explained_variance_ratio_[ - gate] + op.explained_variance_ratio_ = op.explained_variance_ratio_[gate] op.singular_values_ = op.singular_values_[gate] self.data_pca = ( op # im not clear if this is needed due to assignment rules @@ -292,8 +288,7 @@ def _reduce_data(self): else: data_nu = self.data if sparse.issparse(data_nu) and not isinstance( - data_nu, (sparse.csr_matrix, - sparse.csc_matrix, sparse.bsr_matrix) + data_nu, (sparse.csr_matrix, sparse.csc_matrix, sparse.bsr_matrix) ): data_nu = data_nu.tocsr() return data_nu @@ -479,8 +474,7 @@ def __init__( ): if gamma is not None: warnings.warn( - "gamma is deprecated. " "Setting theta={}".format( - gamma), FutureWarning + "gamma is deprecated. " "Setting theta={}".format(gamma), FutureWarning ) theta = gamma if kernel_symm == "gamma": @@ -626,11 +620,9 @@ def set_params(self, **params): if "theta" in params and params["theta"] != self.theta: raise ValueError("Cannot update theta. Please create a new graph") if "anisotropy" in params and params["anisotropy"] != self.anisotropy: - raise ValueError( - "Cannot update anisotropy. Please create a new graph") + raise ValueError("Cannot update anisotropy. Please create a new graph") if "kernel_symm" in params and params["kernel_symm"] != self.kernel_symm: - raise ValueError( - "Cannot update kernel_symm. Please create a new graph") + raise ValueError("Cannot update kernel_symm. Please create a new graph") super().set_params(**params) return self @@ -827,8 +819,7 @@ def to_pickle(self, path): """ pickle_obj = shallow_copy(self) is_oldpygsp = all( - [isinstance(self, pygsp.graphs.Graph), int( - sys.version.split(".")[1]) < 7] + [isinstance(self, pygsp.graphs.Graph), int(sys.version.split(".")[1]) < 7] ) if is_oldpygsp: pickle_obj.logger = pickle_obj.logger.name @@ -901,8 +892,7 @@ def shortest_path(self, method="auto", distance=None): elif distance == "data": D = sparse.coo_matrix(self.K) D.data = np.sqrt( - np.sum((self.data_nu[D.row] - - self.data_nu[D.col]) ** 2, axis=1) + np.sum((self.data_nu[D.row] - self.data_nu[D.col]) ** 2, axis=1) ) elif distance == "affinity": D = sparse.csr_matrix(self.K) @@ -1080,8 +1070,7 @@ def _check_extension_shape(self, Y): `self.n_pca`. """ if len(Y.shape) != 2: - raise ValueError( - "Expected a 2D matrix. Y has shape {}".format(Y.shape)) + raise ValueError("Expected a 2D matrix. Y has shape {}".format(Y.shape)) if not Y.shape[1] == self.data_nu.shape[1]: # try PCA transform if Y.shape[1] == self.data.shape[1]: @@ -1095,8 +1084,7 @@ def _check_extension_shape(self, Y): ) else: # no PCA, only one choice of shape - msg = "Y must be of shape (n, {})".format( - self.data.shape[1]) + msg = "Y must be of shape (n, {})".format(self.data.shape[1]) raise ValueError(msg) return Y @@ -1159,8 +1147,7 @@ def interpolate(self, transform, transitions=None, Y=None): """ if transitions is None: if Y is None: - raise ValueError( - "Either `transitions` or `Y` must be provided.") + raise ValueError("Either `transitions` or `Y` must be provided.") else: transitions = self.extend_to_data(Y) Y_transform = transitions.dot(transform) diff --git a/graphtools/estimator.py b/graphtools/estimator.py index 575323b..ae1652b 100644 --- a/graphtools/estimator.py +++ b/graphtools/estimator.py @@ -1,12 +1,15 @@ -import abc +from . import api +from . import base +from . import graphs +from . import matrix +from . import utils from functools import partial +from scipy import sparse +import abc import numpy as np import pygsp import tasklogger -from scipy import sparse - -from . import api, base, graphs, matrix, utils def attribute(attr, default=None, doc=None, on_set=None): @@ -108,13 +111,11 @@ def graph(self, G): n_pca = attribute( "n_pca", default=100, - on_set=partial(utils.check_if_not, None, - utils.check_positive, utils.check_int), + on_set=partial(utils.check_if_not, None, utils.check_positive, utils.check_int), ) random_state = attribute("random_state") - knn = attribute("knn", default=5, on_set=[ - utils.check_positive, utils.check_int]) + knn = attribute("knn", default=5, on_set=[utils.check_positive, utils.check_int]) decay = attribute("decay", default=40, on_set=utils.check_positive) distance = attribute( "distance", @@ -155,8 +156,7 @@ def graph(self, G): n_svd = attribute( "n_svd", default=100, - on_set=partial(utils.check_if_not, None, - utils.check_positive, utils.check_int), + on_set=partial(utils.check_if_not, None, utils.check_positive, utils.check_int), ) n_jobs = attribute( "n_jobs", on_set=partial(utils.check_if_not, None, utils.check_int) @@ -182,8 +182,7 @@ def _update_n_landmark(self, n_landmark): if self.graph is not None: n_landmark = self._parse_n_landmark(self.graph.data_nu, n_landmark) if ( - n_landmark is None and isinstance( - self.graph, graphs.LandmarkGraph) + n_landmark is None and isinstance(self.graph, graphs.LandmarkGraph) ) or ( n_landmark is not None and not isinstance(self.graph, graphs.LandmarkGraph) @@ -365,8 +364,7 @@ def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs): **(self.kwargs), ) if self.graph is not None: - _logger.info( - "Using precomputed graph and diffusion operator...") + _logger.info("Using precomputed graph and diffusion operator...") def fit(self, X, **kwargs): """Computes the graph diff --git a/graphtools/graphs.py b/graphtools/graphs.py index 34afe73..935520e 100644 --- a/graphtools/graphs.py +++ b/graphtools/graphs.py @@ -1,20 +1,23 @@ from __future__ import division -import numbers -import warnings +from . import matrix +from . import utils +from .base import DataGraph +from .base import PyGSPGraph from builtins import super - -import numpy as np -import tasklogger from scipy import sparse -from scipy.spatial.distance import cdist, pdist, squareform +from scipy.spatial.distance import cdist +from scipy.spatial.distance import pdist +from scipy.spatial.distance import squareform from sklearn.cluster import MiniBatchKMeans from sklearn.neighbors import NearestNeighbors from sklearn.preprocessing import normalize from sklearn.utils.extmath import randomized_svd -from . import matrix, utils -from .base import DataGraph, PyGSPGraph +import numbers +import numpy as np +import tasklogger +import warnings _logger = tasklogger.get_tasklogger("graphtools") @@ -100,8 +103,7 @@ def __init__( # implementation requires a knn value knn = 5 if decay is None and bandwidth is not None: - warnings.warn( - "`bandwidth` is not used when `decay=None`.", UserWarning) + warnings.warn("`bandwidth` is not used when `decay=None`.", UserWarning) if knn > data.shape[0] - 2: warnings.warn( "Cannot set knn ({k}) to be greater than " @@ -113,8 +115,7 @@ def __init__( if knn_max is not None and knn_max < knn: warnings.warn( "Cannot set knn_max ({knn_max}) to be less than " - "knn ({knn}). Setting knn_max={knn}".format( - knn=knn, knn_max=knn_max) + "knn ({knn}). Setting knn_max={knn}".format(knn=knn, knn_max=knn_max) ) knn_max = knn if n_pca in [None, 0, False] and data.shape[1] > 500: @@ -182,22 +183,18 @@ def set_params(self, **params): if "knn" in params and params["knn"] != self.knn: raise ValueError("Cannot update knn. Please create a new graph") if "knn_max" in params and params["knn_max"] != self.knn: - raise ValueError( - "Cannot update knn_max. Please create a new graph") + raise ValueError("Cannot update knn_max. Please create a new graph") if "decay" in params and params["decay"] != self.decay: raise ValueError("Cannot update decay. Please create a new graph") if "bandwidth" in params and params["bandwidth"] != self.bandwidth: - raise ValueError( - "Cannot update bandwidth. Please create a new graph") + raise ValueError("Cannot update bandwidth. Please create a new graph") if ( "bandwidth_scale" in params and params["bandwidth_scale"] != self.bandwidth_scale ): - raise ValueError( - "Cannot update bandwidth_scale. Please create a new graph") + raise ValueError("Cannot update bandwidth_scale. Please create a new graph") if "distance" in params and params["distance"] != self.distance: - raise ValueError( - "Cannot update distance. " "Please create a new graph") + raise ValueError("Cannot update distance. " "Please create a new graph") if "thresh" in params and params["thresh"] != self.thresh and self.decay != 0: raise ValueError("Cannot update thresh. Please create a new graph") if "n_jobs" in params: @@ -264,8 +261,7 @@ def build_kernel(self): with no non-negative entries. """ knn_max = self.knn_max + 1 if self.knn_max else None - K = self.build_kernel_to_data( - self.data_nu, knn=self.knn + 1, knn_max=knn_max) + K = self.build_kernel_to_data(self.data_nu, knn=self.knn + 1, knn_max=knn_max) return K def _check_duplicates(self, distances, indices): @@ -364,8 +360,7 @@ def build_kernel_to_data( # sparse fast alpha decay knn_tree = self.knn_tree search_knn = min(knn * self.search_multiplier, knn_max) - distances, indices = knn_tree.kneighbors( - Y, n_neighbors=search_knn) + distances, indices = knn_tree.kneighbors(Y, n_neighbors=search_knn) self._check_duplicates(distances, indices) with _logger.task("affinities"): if bandwidth is None: @@ -376,13 +371,10 @@ def build_kernel_to_data( # check for zero bandwidth bandwidth = np.maximum(bandwidth, np.finfo(float).eps) - radius = bandwidth * \ - np.power(-1 * np.log(self.thresh), 1 / self.decay) - update_idx = np.argwhere( - np.max(distances, axis=1) < radius).reshape(-1) + radius = bandwidth * np.power(-1 * np.log(self.thresh), 1 / self.decay) + update_idx = np.argwhere(np.max(distances, axis=1) < radius).reshape(-1) _logger.debug( - "search_knn = {}; {} remaining".format( - search_knn, len(update_idx)) + "search_knn = {}; {} remaining".format(search_knn, len(update_idx)) ) if len(update_idx) > 0: distances = [d for d in distances] @@ -416,8 +408,7 @@ def build_kernel_to_data( ) ) # increase the knn search - search_knn = min( - search_knn * self.search_multiplier, knn_max) + search_knn = min(search_knn * self.search_multiplier, knn_max) if search_knn > self.data_nu.shape[0] / 2: knn_tree = NearestNeighbors( n_neighbors=search_knn, algorithm="brute", n_jobs=self.n_jobs @@ -437,8 +428,7 @@ def build_kernel_to_data( distances[idx] = dist_new[i] indices[idx] = ind_new[i] else: - _logger.debug( - "radius search on {}".format(len(update_idx))) + _logger.debug("radius search on {}".format(len(update_idx))) # give up - radius search dist_new, ind_new = knn_tree.radius_neighbors( Y[update_idx, :], @@ -453,16 +443,13 @@ def build_kernel_to_data( data = np.concatenate(distances) / bandwidth else: data = np.concatenate( - [distances[i] / bandwidth[i] - for i in range(len(distances))] + [distances[i] / bandwidth[i] for i in range(len(distances))] ) indices = np.concatenate(indices) - indptr = np.concatenate( - [[0], np.cumsum([len(d) for d in distances])]) + indptr = np.concatenate([[0], np.cumsum([len(d) for d in distances])]) K = sparse.csr_matrix( - (data, indices, indptr), shape=( - Y.shape[0], self.data_nu.shape[0]) + (data, indices, indptr), shape=(Y.shape[0], self.data_nu.shape[0]) ) K.data = np.exp(-1 * np.power(K.data, self.decay)) # handle nan @@ -648,15 +635,13 @@ def _landmarks_to_data(self): if sparse.issparse(self.kernel): pmn = sparse.vstack( [ - sparse.csr_matrix( - self.kernel[self.clusters == i, :].sum(axis=0)) + sparse.csr_matrix(self.kernel[self.clusters == i, :].sum(axis=0)) for i in landmarks ] ) else: pmn = np.array( - [np.sum(self.kernel[self.clusters == i, :], axis=0) - for i in landmarks] + [np.sum(self.kernel[self.clusters == i, :], axis=0) for i in landmarks] ) return pmn @@ -732,8 +717,7 @@ def extend_to_data(self, data, **kwargs): if sparse.issparse(kernel): pnm = sparse.hstack( [ - sparse.csr_matrix( - kernel[:, self.clusters == i].sum(axis=1)) + sparse.csr_matrix(kernel[:, self.clusters == i].sum(axis=1)) for i in np.unique(self.clusters) ] ) @@ -892,8 +876,7 @@ def __init__( ) elif (data < 0).sum() > 0: raise ValueError( - "Precomputed {} should be " "non-negative".format( - precomputed) + "Precomputed {} should be " "non-negative".format(precomputed) ) self.knn = knn self.decay = decay @@ -942,15 +925,13 @@ def set_params(self, **params): self """ if "precomputed" in params and params["precomputed"] != self.precomputed: - raise ValueError( - "Cannot update precomputed. " "Please create a new graph") + raise ValueError("Cannot update precomputed. " "Please create a new graph") if ( "distance" in params and params["distance"] != self.distance and self.precomputed is None ): - raise ValueError( - "Cannot update distance. " "Please create a new graph") + raise ValueError("Cannot update distance. " "Please create a new graph") if "knn" in params and params["knn"] != self.knn and self.precomputed is None: raise ValueError("Cannot update knn. Please create a new graph") if ( @@ -964,14 +945,12 @@ def set_params(self, **params): and params["bandwidth"] != self.bandwidth and self.precomputed is None ): - raise ValueError( - "Cannot update bandwidth. Please create a new graph") + raise ValueError("Cannot update bandwidth. Please create a new graph") if ( "bandwidth_scale" in params and params["bandwidth_scale"] != self.bandwidth_scale ): - raise ValueError( - "Cannot update bandwidth_scale. Please create a new graph") + raise ValueError("Cannot update bandwidth_scale. Please create a new graph") # update superclass parameters super().set_params(**params) return self @@ -1003,8 +982,7 @@ def build_kernel(self): # need to set diagonal to one to make it an affinity matrix K = self.data_nu if sparse.issparse(K) and not ( - isinstance(K, sparse.dok_matrix) or isinstance( - K, sparse.lil_matrix) + isinstance(K, sparse.dok_matrix) or isinstance(K, sparse.lil_matrix) ): K = K.tolil() K = matrix.set_diagonal(K, 1) @@ -1023,22 +1001,19 @@ def build_kernel(self): ) if len(duplicate_ids) < 20: duplicate_names = ", ".join( - ["{} and {}".format(i[0], i[1]) - for i in duplicate_ids] + ["{} and {}".format(i[0], i[1]) for i in duplicate_ids] ) warnings.warn( "Detected zero distance between samples {}. " "Consider removing duplicates to avoid errors in " - "downstream processing.".format( - duplicate_names), + "downstream processing.".format(duplicate_names), RuntimeWarning, ) else: warnings.warn( "Detected zero distance between {} pairs of samples. " "Consider removing duplicates to avoid errors in " - "downstream processing.".format( - len(duplicate_ids)), + "downstream processing.".format(len(duplicate_ids)), RuntimeWarning, ) else: @@ -1209,8 +1184,7 @@ def __init__( ): self.beta = beta self.sample_idx = sample_idx - self.samples, self.n_cells = np.unique( - self.sample_idx, return_counts=True) + self.samples, self.n_cells = np.unique(self.sample_idx, return_counts=True) self.knn = knn self.decay = decay self.distance = distance @@ -1229,8 +1203,7 @@ def __init__( "data ({})".format(len(sample_idx), data.shape[0]) ) elif len(self.samples) == 1: - raise ValueError( - "sample_idx must contain more than one unique value") + raise ValueError("sample_idx must contain more than one unique value") if adaptive_k is not None: warnings.warn( "`adaptive_k` has been deprecated. Using fixed knn.", DeprecationWarning @@ -1301,8 +1274,7 @@ def set_params(self, **params): for arg in knn_kernel_args: if arg in params and params[arg] != getattr(self, arg): raise ValueError( - "Cannot update {}. " "Please create a new graph".format( - arg) + "Cannot update {}. " "Please create a new graph".format(arg) ) for arg in knn_other_args: if arg in params: @@ -1358,8 +1330,7 @@ def build_kernel(self): with _logger.task("MNN kernel"): if self.thresh > 0 or self.decay is None: - K = sparse.lil_matrix( - (self.data_nu.shape[0], self.data_nu.shape[0])) + K = sparse.lil_matrix((self.data_nu.shape[0], self.data_nu.shape[0])) else: K = np.zeros([self.data_nu.shape[0], self.data_nu.shape[0]]) for i, X in enumerate(self.subgraphs): @@ -1381,8 +1352,7 @@ def build_kernel(self): Kij = Y.build_kernel_to_data(X.data_nu, knn=self.knn) between_batch_norm = np.array(np.sum(Kij, 1)).flatten() scale = ( - np.minimum(1, within_batch_norm / - between_batch_norm) + np.minimum(1, within_batch_norm / between_batch_norm) * self.beta ) if sparse.issparse(Kij): diff --git a/graphtools/matrix.py b/graphtools/matrix.py index 6e83f48..490155f 100644 --- a/graphtools/matrix.py +++ b/graphtools/matrix.py @@ -1,7 +1,7 @@ -import numbers +from scipy import sparse +import numbers import numpy as np -from scipy import sparse def if_sparse(sparse_func, dense_func, *args, **kwargs): @@ -53,8 +53,7 @@ def set_submatrix(X, i, j, values): def sparse_nonzero_discrete(X, values): if isinstance( - X, (sparse.bsr_matrix, sparse.dia_matrix, - sparse.dok_matrix, sparse.lil_matrix) + X, (sparse.bsr_matrix, sparse.dia_matrix, sparse.dok_matrix, sparse.lil_matrix) ): X = X.tocsr() return dense_nonzero_discrete(X.data, values) diff --git a/graphtools/utils.py b/graphtools/utils.py index 84ed04a..96a992c 100644 --- a/graphtools/utils.py +++ b/graphtools/utils.py @@ -1,9 +1,8 @@ -import numbers -import warnings - +from . import matrix from deprecated import deprecated -from . import matrix +import numbers +import warnings try: import pandas as pd @@ -67,8 +66,7 @@ def check_greater(x, **params): """ for p in params: if not isinstance(params[p], numbers.Number) or params[p] <= x: - raise ValueError( - "Expected {} > {}, got {}".format(p, x, params[p])) + raise ValueError("Expected {} > {}, got {}".format(p, x, params[p])) def check_positive(**params): @@ -90,8 +88,7 @@ def check_int(**params): """ for p in params: if not isinstance(params[p], numbers.Integral): - raise ValueError( - "Expected {} integer, got {}".format(p, params[p])) + raise ValueError("Expected {} integer, got {}".format(p, params[p])) def check_if_not(x, *checks, **params): diff --git a/setup.py b/setup.py index 4a7ed69..e548184 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,8 @@ +from setuptools import setup + import os import sys -from setuptools import setup - install_requires = [ "numpy>=1.14.0", "scipy>=1.1.0", @@ -34,10 +34,8 @@ elif sys.version_info[:2] >= (3, 6): test_requires += ["black"] -version_py = os.path.join(os.path.dirname( - __file__), "graphtools", "version.py") -version = open(version_py).read().strip().split( - "=")[-1].replace('"', "").strip() +version_py = os.path.join(os.path.dirname(__file__), "graphtools", "version.py") +version = open(version_py).read().strip().split("=")[-1].replace('"', "").strip() readme = open("README.rst").read() diff --git a/test/load_tests/__init__.py b/test/load_tests/__init__.py index 4cdb4ef..8e8765c 100644 --- a/test/load_tests/__init__.py +++ b/test/load_tests/__init__.py @@ -1,17 +1,20 @@ -import re -import warnings +from nose.tools import assert_raises_regex +from nose.tools import assert_warns_regex +from scipy.spatial.distance import cdist +from scipy.spatial.distance import pdist +from scipy.spatial.distance import squareform +from sklearn import datasets +from sklearn.decomposition import PCA +from sklearn.decomposition import TruncatedSVD +import graphtools import nose2 import numpy as np import pandas as pd import pygsp +import re import scipy.sparse as sp -from nose.tools import assert_raises_regex, assert_warns_regex -from scipy.spatial.distance import cdist, pdist, squareform -from sklearn import datasets -from sklearn.decomposition import PCA, TruncatedSVD - -import graphtools +import warnings def assert_warns_message(expected_warning, expected_message, *args, **kwargs): diff --git a/test/test_api.py b/test/test_api.py index 88ede82..1ff7dfe 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -1,14 +1,16 @@ from __future__ import print_function -import os -import pickle -import tempfile +from load_tests import assert_raises_message +from load_tests import assert_warns_message +from load_tests import build_graph +from load_tests import data +import graphtools import igraph import numpy as np -from load_tests import assert_raises_message, assert_warns_message, build_graph, data - -import graphtools +import os +import pickle +import tempfile def test_from_igraph(): diff --git a/test/test_data.py b/test/test_data.py index 8d783b6..afec27c 100644 --- a/test/test_data.py +++ b/test/test_data.py @@ -1,23 +1,21 @@ from __future__ import print_function +from load_tests import assert_raises_message +from load_tests import assert_warns_message +from load_tests import build_graph +from load_tests import data +from load_tests import graphtools +from load_tests import nose2 +from load_tests import np +from load_tests import pd +from load_tests import pdist +from load_tests import sp +from load_tests import squareform +from nose.tools import assert_raises_regex + import numbers import warnings -from load_tests import ( - assert_raises_message, - assert_warns_message, - build_graph, - data, - graphtools, - nose2, - np, - pd, - pdist, - sp, - squareform, -) -from nose.tools import assert_raises_regex - try: import anndata except (ImportError, SyntaxError): @@ -35,8 +33,7 @@ def test_1d_data(): with assert_raises_message( ValueError, - "Expected 2D array, got 1D array instead (shape: ({},).)".format( - data.shape[0]), + "Expected 2D array, got 1D array instead (shape: ({},).)".format(data.shape[0]), ): build_graph(data[:, 0]) with assert_raises_message( @@ -340,8 +337,7 @@ def test_transform_sparse_no_pca(): def test_inverse_transform_dense_pca(): G = build_graph(data, n_pca=data.shape[1] - 1) - np.testing.assert_allclose( - G.data, G.inverse_transform(G.data_nu), atol=1e-12) + np.testing.assert_allclose(G.data, G.inverse_transform(G.data_nu), atol=1e-12) np.testing.assert_allclose( G.data[:, -1, None], G.inverse_transform(G.data_nu, columns=-1), atol=1e-12 ) @@ -350,8 +346,7 @@ def test_inverse_transform_dense_pca(): ) with assert_raises_message( IndexError, - "index {0} is out of bounds for axis 1 with size {0}".format( - G.data.shape[1]), + "index {0} is out of bounds for axis 1 with size {0}".format(G.data.shape[1]), ): G.inverse_transform(G.data_nu, columns=data.shape[1]) with assert_raises_message( @@ -379,8 +374,7 @@ def test_inverse_transform_dense_pca(): def test_inverse_transform_sparse_svd(): G = build_graph(data, sparse=True, n_pca=data.shape[1] - 1) - np.testing.assert_allclose( - data, G.inverse_transform(G.data_nu), atol=1e-12) + np.testing.assert_allclose(data, G.inverse_transform(G.data_nu), atol=1e-12) np.testing.assert_allclose( data[:, -1, None], G.inverse_transform(G.data_nu, columns=-1), atol=1e-12 ) @@ -495,8 +489,7 @@ def test_transform_adaptive_pca(): ): G.transform(G.data[:, :15]) - G2 = build_graph(data, n_pca=True, - rank_threshold=G.rank_threshold, random_state=42) + G2 = build_graph(data, n_pca=True, rank_threshold=G.rank_threshold, random_state=42) assert np.allclose(G2.data_nu, G2.transform(G2.data)) assert np.allclose(G2.data_nu, G.transform(G.data)) diff --git a/test/test_estimator.py b/test/test_estimator.py index f96bf47..ba3d10a 100644 --- a/test/test_estimator.py +++ b/test/test_estimator.py @@ -1,14 +1,14 @@ -import warnings - -import anndata -import numpy as np -import pygsp -from load_tests import assert_raises_message, data +from load_tests import assert_raises_message +from load_tests import data from parameterized import parameterized from scipy import sparse +import anndata import graphtools import graphtools.estimator +import numpy as np +import pygsp +import warnings class Estimator(graphtools.estimator.GraphEstimator): @@ -55,8 +55,7 @@ def test_estimator(): def test_precomputed(distance, X, precomputed): E = Estimator(verbose=False, distance=distance) with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", message="K should have a non-zero diagonal") + warnings.filterwarnings("ignore", message="K should have a non-zero diagonal") E.fit(X) assert isinstance(E.graph, graphtools.graphs.TraditionalGraph) assert E.graph.precomputed == precomputed diff --git a/test/test_exact.py b/test/test_exact.py index 1e7f11c..6cefc9d 100644 --- a/test/test_exact.py +++ b/test/test_exact.py @@ -1,20 +1,18 @@ from __future__ import print_function -from load_tests import ( - PCA, - TruncatedSVD, - assert_raises_message, - assert_warns_message, - build_graph, - data, - graphtools, - nose2, - np, - pdist, - pygsp, - sp, - squareform, -) +from load_tests import assert_raises_message +from load_tests import assert_warns_message +from load_tests import build_graph +from load_tests import data +from load_tests import graphtools +from load_tests import nose2 +from load_tests import np +from load_tests import PCA +from load_tests import pdist +from load_tests import pygsp +from load_tests import sp +from load_tests import squareform +from load_tests import TruncatedSVD from nose.tools import assert_warns_regex from scipy.sparse.csgraph import shortest_path @@ -49,8 +47,7 @@ def test_build_exact_with_sample_idx(): ValueError, "TraditionalGraph does not support batch correction. Use `graphtype='mnn'` or `sample_idx=None`", ): - build_graph(data, graphtype="exact", - sample_idx=np.arange(len(data)), decay=10) + build_graph(data, graphtype="exact", sample_idx=np.arange(len(data)), decay=10) def test_precomputed_with_pca(): @@ -58,8 +55,7 @@ def test_precomputed_with_pca(): RuntimeWarning, "n_pca cannot be given on a precomputed graph. Setting n_pca=None", ): - build_graph(squareform(pdist(data)), - precomputed="distance", n_pca=20, decay=10) + build_graph(squareform(pdist(data)), precomputed="distance", n_pca=20, decay=10) def test_exact_no_decay(): @@ -137,8 +133,7 @@ def test_exact_graph(): a = 13 n_pca = 20 bandwidth_scale = 1.3 - data_small = data[np.random.choice( - len(data), len(data) // 2, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 2, replace=False)] pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data_small) data_small_nu = pca.transform(data_small) pdx = squareform(pdist(data_small_nu, metric="euclidean")) @@ -215,8 +210,7 @@ def test_truncated_exact_graph(): a = 13 n_pca = 20 thresh = 1e-4 - data_small = data[np.random.choice( - len(data), len(data) // 2, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 2, replace=False)] pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data_small) data_small_nu = pca.transform(data_small) pdx = squareform(pdist(data_small_nu, metric="euclidean")) @@ -287,8 +281,7 @@ def test_truncated_exact_graph_sparse(): a = 13 n_pca = 20 thresh = 1e-4 - data_small = data[np.random.choice( - len(data), len(data) // 2, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 2, replace=False)] pca = TruncatedSVD(n_pca, random_state=42).fit(data_small) data_small_nu = pca.transform(data_small) pdx = squareform(pdist(data_small_nu, metric="euclidean")) @@ -361,8 +354,7 @@ def test_truncated_exact_graph_no_pca(): a = 13 n_pca = None thresh = 1e-4 - data_small = data[np.random.choice( - len(data), len(data) // 10, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 10, replace=False)] pdx = squareform(pdist(data_small, metric="euclidean")) knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) @@ -525,8 +517,7 @@ def test_exact_graph_anisotropy(): a = 13 n_pca = 20 anisotropy = 0.9 - data_small = data[np.random.choice( - len(data), len(data) // 2, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 2, replace=False)] pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data_small) data_small_nu = pca.transform(data_small) pdx = squareform(pdist(data_small_nu, metric="euclidean")) @@ -598,8 +589,7 @@ def test_exact_graph_anisotropy(): def test_shortest_path_affinity(): - data_small = data[np.random.choice( - len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) D = -1 * np.where(G.K != 0, np.log(np.where(G.K != 0, G.K, np.nan)), 0) P = shortest_path(D) @@ -612,8 +602,7 @@ def test_shortest_path_affinity(): def test_shortest_path_affinity_precomputed(): - data_small = data[np.random.choice( - len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) G = graphtools.Graph(G.K, precomputed="affinity") D = -1 * np.where(G.K != 0, np.log(np.where(G.K != 0, G.K, np.nan)), 0) @@ -631,8 +620,7 @@ def test_shortest_path_decay_constant(): NotImplementedError, "Graph shortest path with constant distance only implemented for unweighted graphs. For weighted graphs, use `distance='affinity'`.", ): - data_small = data[np.random.choice( - len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) G.shortest_path(distance="constant") @@ -642,8 +630,7 @@ def test_shortest_path_precomputed_decay_constant(): NotImplementedError, "Graph shortest path with constant distance only implemented for unweighted graphs. For weighted graphs, use `distance='affinity'`.", ): - data_small = data[np.random.choice( - len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) G = graphtools.Graph(G.K, precomputed="affinity") G.shortest_path(distance="constant") @@ -654,8 +641,7 @@ def test_shortest_path_decay_data(): NotImplementedError, "Graph shortest path with constant or data distance only implemented for unweighted graphs. For weighted graphs, use `distance='affinity'`.", ): - data_small = data[np.random.choice( - len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) G.shortest_path(distance="data") @@ -665,8 +651,7 @@ def test_shortest_path_precomputed_data(): ValueError, "Graph shortest path with data distance not valid for precomputed graphs. For precomputed graphs, use `distance='constant'` for unweighted graphs and `distance='affinity'` for weighted graphs.", ): - data_small = data[np.random.choice( - len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) G = graphtools.Graph(G.K, precomputed="affinity") G.shortest_path(distance="data") @@ -726,8 +711,7 @@ def test_exact_interpolate(): def test_precomputed_interpolate(): with assert_raises_message(ValueError, "Cannot extend kernel on precomputed graph"): - G = build_graph(squareform(pdist(data)), - n_pca=None, precomputed="distance") + G = build_graph(squareform(pdist(data)), n_pca=None, precomputed="distance") G.build_kernel_to_data(data) diff --git a/test/test_knn.py b/test/test_knn.py index 9274fdf..3c01eda 100644 --- a/test/test_knn.py +++ b/test/test_knn.py @@ -1,24 +1,25 @@ -from __future__ import division, print_function +from __future__ import division +from __future__ import print_function + +from load_tests import assert_raises_message +from load_tests import assert_warns_message +from load_tests import build_graph +from load_tests import data +from load_tests import datasets +from load_tests import graphtools +from load_tests import np +from load_tests import PCA +from load_tests import pygsp +from load_tests import sp +from load_tests import TruncatedSVD +from nose.tools import assert_raises_regex +from nose.tools import assert_warns_regex +from scipy.spatial.distance import pdist +from scipy.spatial.distance import squareform +from sklearn.utils.graph import graph_shortest_path import warnings -from load_tests import ( - PCA, - TruncatedSVD, - assert_raises_message, - assert_warns_message, - build_graph, - data, - datasets, - graphtools, - np, - pygsp, - sp, -) -from nose.tools import assert_raises_regex, assert_warns_regex -from scipy.spatial.distance import pdist, squareform -from sklearn.utils.graph import graph_shortest_path - ##################################################### # Check parameters ##################################################### @@ -53,8 +54,7 @@ def test_duplicate_data(): RuntimeWarning, r"Detected zero distance between samples ([0-9and,\s]*). Consider removing duplicates to avoid errors in downstream processing.", ): - build_graph(np.vstack([data, data[:9]]), - n_pca=20, decay=10, thresh=1e-4) + build_graph(np.vstack([data, data[:9]]), n_pca=20, decay=10, thresh=1e-4) def test_duplicate_data_many(): @@ -62,8 +62,7 @@ def test_duplicate_data_many(): RuntimeWarning, "Detected zero distance between ([0-9]*) pairs of samples. Consider removing duplicates to avoid errors in downstream processing.", ): - build_graph(np.vstack([data, data[:21]]), - n_pca=20, decay=10, thresh=1e-4) + build_graph(np.vstack([data, data[:21]]), n_pca=20, decay=10, thresh=1e-4) def test_balltree_cosine(): @@ -103,8 +102,7 @@ def test_knn_no_knn_no_bandwidth(): with assert_raises_message( ValueError, "Either `knn` or `bandwidth` must be provided." ): - build_graph(data, graphtype="knn", knn=None, - bandwidth=None, thresh=1e-4) + build_graph(data, graphtype="knn", knn=None, bandwidth=None, thresh=1e-4) def test_knn_graph_invalid_symm(): @@ -112,8 +110,7 @@ def test_knn_graph_invalid_symm(): ValueError, "kernel_symm 'invalid' not recognized. Choose from '+', '*', 'mnn', or 'none'.", ): - build_graph(data, graphtype="knn", knn=5, - thresh=1e-4, kernel_symm="invalid") + build_graph(data, graphtype="knn", knn=5, thresh=1e-4, kernel_symm="invalid") ##################################################### @@ -267,8 +264,7 @@ def test_knnmax(): thresh = 0 with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", "K should be symmetric", RuntimeWarning) + warnings.filterwarnings("ignore", "K should be symmetric", RuntimeWarning) G = build_graph( data, n_pca=None, # n_pca, @@ -431,8 +427,7 @@ def test_knn_graph_anisotropy(): n_pca = 20 anisotropy = 0.9 thresh = 1e-4 - data_small = data[np.random.choice( - len(data), len(data) // 2, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 2, replace=False)] pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data_small) data_small_nu = pca.transform(data_small) pdx = squareform(pdist(data_small_nu, metric="euclidean")) @@ -507,8 +502,7 @@ def test_knn_interpolate(): def test_knn_interpolate_wrong_shape(): G = build_graph(data, n_pca=10, decay=None) with assert_raises_message( - ValueError, "Expected a 2D matrix. Y has shape ({},)".format( - data.shape[0]) + ValueError, "Expected a 2D matrix. Y has shape ({},)".format(data.shape[0]) ): G.extend_to_data(data[:, 0]) with assert_raises_message( @@ -533,8 +527,7 @@ def test_knn_interpolate_wrong_shape(): def test_shortest_path_constant(): - data_small = data[np.random.choice( - len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) P = graph_shortest_path(G.K) # sklearn returns 0 if no path exists @@ -545,8 +538,7 @@ def test_shortest_path_constant(): def test_shortest_path_precomputed_constant(): - data_small = data[np.random.choice( - len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G = graphtools.Graph(G.K, precomputed="affinity") P = graph_shortest_path(G.K) @@ -559,8 +551,7 @@ def test_shortest_path_precomputed_constant(): def test_shortest_path_data(): - data_small = data[np.random.choice( - len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) D = squareform(pdist(G.data_nu)) * np.where(G.K.toarray() > 0, 1, 0) P = graph_shortest_path(D) @@ -577,8 +568,7 @@ def test_shortest_path_no_decay_affinity(): ValueError, "Graph shortest path with affinity distance only valid for weighted graphs. For unweighted graphs, use `distance='constant'` or `distance='data'`.", ): - data_small = data[np.random.choice( - len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G.shortest_path(distance="affinity") @@ -588,8 +578,7 @@ def test_shortest_path_precomputed_no_decay_affinity(): ValueError, "Graph shortest path with affinity distance only valid for weighted graphs. For unweighted graphs, use `distance='constant'` or `distance='data'`.", ): - data_small = data[np.random.choice( - len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G = graphtools.Graph(G.K, precomputed="affinity") G.shortest_path(distance="affinity") @@ -600,8 +589,7 @@ def test_shortest_path_precomputed_no_decay_data(): ValueError, "Graph shortest path with data distance not valid for precomputed graphs. For precomputed graphs, use `distance='constant'` for unweighted graphs and `distance='affinity'` for weighted graphs.", ): - data_small = data[np.random.choice( - len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G = graphtools.Graph(G.K, precomputed="affinity") G.shortest_path(distance="data") @@ -612,8 +600,7 @@ def test_shortest_path_invalid(): ValueError, "Expected `distance` in ['constant', 'data', 'affinity']. Got invalid", ): - data_small = data[np.random.choice( - len(data), len(data) // 4, replace=False)] + data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G.shortest_path(distance="invalid") diff --git a/test/test_landmark.py b/test/test_landmark.py index e365c9a..a6b4bd8 100644 --- a/test/test_landmark.py +++ b/test/test_landmark.py @@ -1,17 +1,16 @@ from __future__ import print_function +from load_tests import assert_raises_message +from load_tests import assert_warns_message +from load_tests import build_graph +from load_tests import data +from load_tests import digits +from load_tests import generate_swiss_roll +from load_tests import graphtools +from load_tests import nose2 +from load_tests import np + import pygsp -from load_tests import ( - assert_raises_message, - assert_warns_message, - build_graph, - data, - digits, - generate_swiss_roll, - graphtools, - nose2, - np, -) ##################################################### # Check parameters diff --git a/test/test_matrix.py b/test/test_matrix.py index 647c93d..41cf757 100644 --- a/test/test_matrix.py +++ b/test/test_matrix.py @@ -1,11 +1,12 @@ -import numpy as np -from load_tests import assert_warns_message, data +from load_tests import assert_warns_message +from load_tests import data from parameterized import parameterized from scipy import sparse import graphtools import graphtools.matrix import graphtools.utils +import numpy as np @parameterized( @@ -120,8 +121,7 @@ def test_set_submatrix_deprecated(): "Call to deprecated function (or staticmethod) set_submatrix. (Use graphtools.matrix.set_submatrix instead) -- Deprecated since version 1.5.0.", ): graphtools.utils.set_submatrix( - sparse.lil_matrix((4, 4)), [1, 2], [ - 0, 1], np.array([[1, 2], [3, 4]]) + sparse.lil_matrix((4, 4)), [1, 2], [0, 1], np.array([[1, 2], [3, 4]]) ) @@ -130,8 +130,7 @@ def test_sparse_nonzero_discrete_deprecated(): DeprecationWarning, "Call to deprecated function (or staticmethod) sparse_nonzero_discrete. (Use graphtools.matrix.sparse_nonzero_discrete instead) -- Deprecated since version 1.5.0.", ): - graphtools.utils.sparse_nonzero_discrete( - sparse.csr_matrix((4, 4)), [1]) + graphtools.utils.sparse_nonzero_discrete(sparse.csr_matrix((4, 4)), [1]) def test_dense_nonzero_discrete_deprecated(): diff --git a/test/test_mnn.py b/test/test_mnn.py index 8f5a13c..592b227 100644 --- a/test/test_mnn.py +++ b/test/test_mnn.py @@ -1,23 +1,21 @@ from __future__ import print_function -import warnings - -from load_tests import ( - assert_raises_message, - assert_warns_message, - build_graph, - cdist, - data, - digits, - generate_swiss_roll, - graphtools, - nose2, - np, - pd, - pygsp, -) +from load_tests import assert_raises_message +from load_tests import assert_warns_message +from load_tests import build_graph +from load_tests import cdist +from load_tests import data +from load_tests import digits +from load_tests import generate_swiss_roll +from load_tests import graphtools +from load_tests import nose2 +from load_tests import np +from load_tests import pd +from load_tests import pygsp from scipy.linalg import norm +import warnings + ##################################################### # Check parameters ##################################################### @@ -28,15 +26,13 @@ def test_sample_idx_and_precomputed(): ValueError, "MNNGraph does not support precomputed values. Use `graphtype='exact'` and `sample_idx=None` or `precomputed=None`", ): - build_graph(data, n_pca=None, sample_idx=np.arange( - 10), precomputed="distance") + build_graph(data, n_pca=None, sample_idx=np.arange(10), precomputed="distance") def test_sample_idx_wrong_length(): with assert_raises_message( ValueError, - "sample_idx (10) must be the same length as data ({})".format( - data.shape[0]), + "sample_idx (10) must be the same length as data ({})".format(data.shape[0]), ): build_graph(data, graphtype="mnn", sample_idx=np.arange(10)) @@ -46,8 +42,7 @@ def test_sample_idx_unique(): ValueError, "sample_idx must contain more than one unique value" ): build_graph( - data, graph_class=graphtools.graphs.MNNGraph, sample_idx=np.ones( - len(data)) + data, graph_class=graphtools.graphs.MNNGraph, sample_idx=np.ones(len(data)) ) with assert_warns_message( UserWarning, "Only one unique sample. Not using MNNGraph" @@ -360,8 +355,7 @@ def test_mnn_graph_no_decay(): e_ij = kdx_ij[:, batch_k - 1] # dist to kNN k_ij = np.where(pdx_ij <= e_ij[:, None], 1, 0) # apply knn kernel if si == sj: - K.iloc[sample_idx == si, sample_idx == sj] = ( - k_ij + k_ij.T) / 2 + K.iloc[sample_idx == si, sample_idx == sj] = (k_ij + k_ij.T) / 2 else: # fill out values in K for NN on diagonal K.iloc[sample_idx == si, sample_idx == sj] = k_ij @@ -382,8 +376,7 @@ def test_mnn_graph_no_decay(): ) K = Kn - W = np.array((theta * np.minimum(K, K.T)) + - ((1 - theta) * np.maximum(K, K.T))) + W = np.array((theta * np.minimum(K, K.T)) + ((1 - theta) * np.maximum(K, K.T))) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = graphtools.Graph( @@ -428,8 +421,7 @@ def test_mnn_graph_decay(): pdxe_ij = pdx_ij / e_ij[:, np.newaxis] # normalize k_ij = np.exp(-1 * (pdxe_ij**a)) # apply alpha-decaying kernel if si == sj: - K.iloc[sample_idx == si, sample_idx == sj] = ( - k_ij + k_ij.T) / 2 + K.iloc[sample_idx == si, sample_idx == sj] = (k_ij + k_ij.T) / 2 else: # fill out values in K for NN on diagonal K.iloc[sample_idx == si, sample_idx == sj] = k_ij @@ -450,8 +442,7 @@ def test_mnn_graph_decay(): ) K = Kn - W = np.array((theta * np.minimum(K, K.T)) + - ((1 - theta) * np.maximum(K, K.T))) + W = np.array((theta * np.minimum(K, K.T)) + ((1 - theta) * np.maximum(K, K.T))) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = graphtools.Graph( diff --git a/test/test_utils.py b/test/test_utils.py index 601d843..da379b4 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -29,8 +29,7 @@ def test_check_positive(): def test_check_if_not(): graphtools.utils.check_if_not(-5, graphtools.utils.check_positive, foo=-5) with assert_raises_message(ValueError, "Expected foo > 0, got -5"): - graphtools.utils.check_if_not(-4, - graphtools.utils.check_positive, foo=-5) + graphtools.utils.check_if_not(-4, graphtools.utils.check_positive, foo=-5) def test_check_between(): From 465582ce4154292e56c603e3f90c248ab6edd182 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:37:38 -0500 Subject: [PATCH 15/41] Update graphs.py --- graphtools/graphs.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/graphtools/graphs.py b/graphtools/graphs.py index 935520e..2e13fff 100644 --- a/graphtools/graphs.py +++ b/graphtools/graphs.py @@ -350,19 +350,19 @@ def build_kernel_to_data( Y = self._check_extension_shape(Y) if self.decay is None or self.thresh == 1: - with _logger.task("KNN search"): + with _logger.log_task("KNN search"): # binary connectivity matrix K = self.knn_tree.kneighbors_graph( Y, n_neighbors=knn, mode="connectivity" ) else: - with _logger.task("KNN search"): + with _logger.log_task("KNN search"): # sparse fast alpha decay knn_tree = self.knn_tree search_knn = min(knn * self.search_multiplier, knn_max) distances, indices = knn_tree.kneighbors(Y, n_neighbors=search_knn) self._check_duplicates(distances, indices) - with _logger.task("affinities"): + with _logger.log_task("affinities"): if bandwidth is None: bandwidth = distances[:, knn - 1] @@ -373,7 +373,7 @@ def build_kernel_to_data( radius = bandwidth * np.power(-1 * np.log(self.thresh), 1 / self.decay) update_idx = np.argwhere(np.max(distances, axis=1) < radius).reshape(-1) - _logger.debug( + _logger.log_debug( "search_knn = {}; {} remaining".format(search_knn, len(update_idx)) ) if len(update_idx) > 0: @@ -402,7 +402,7 @@ def build_kernel_to_data( else radius[i] ) ] - _logger.debug( + _logger.log_debug( "search_knn = {}; {} remaining".format( search_knn, len(update_idx) ) @@ -415,7 +415,7 @@ def build_kernel_to_data( ).fit(self.data_nu) if len(update_idx) > 0: if search_knn == knn_max: - _logger.debug( + _logger.log_debug( "knn search to knn_max ({}) on {}".format( knn_max, len(update_idx) ) @@ -428,7 +428,7 @@ def build_kernel_to_data( distances[idx] = dist_new[i] indices[idx] = ind_new[i] else: - _logger.debug("radius search on {}".format(len(update_idx))) + _logger.log_debug("radius search on {}".format(len(update_idx))) # give up - radius search dist_new, ind_new = knn_tree.radius_neighbors( Y[update_idx, :], @@ -655,16 +655,16 @@ def build_landmark_op(self): probabilities between cluster centers by using transition probabilities between samples assigned to each cluster. """ - with _logger.task("landmark operator"): + with _logger.log_task("landmark operator"): is_sparse = sparse.issparse(self.kernel) # spectral clustering - with _logger.task("SVD"): + with _logger.log_task("SVD"): _, _, VT = randomized_svd( self.diff_aff, n_components=self.n_svd, random_state=self.random_state, ) - with _logger.task("KMeans"): + with _logger.log_task("KMeans"): kmeans = MiniBatchKMeans( self.n_landmark, init_size=3 * self.n_landmark, @@ -987,7 +987,7 @@ def build_kernel(self): K = K.tolil() K = matrix.set_diagonal(K, 1) else: - with _logger.task("affinities"): + with _logger.log_task("affinities"): if sparse.issparse(self.data_nu): self.data_nu = self.data_nu.toarray() if self.precomputed == "distance": @@ -1093,7 +1093,7 @@ def build_kernel_to_data(self, Y, knn=None, bandwidth=None, bandwidth_scale=None if self.precomputed is not None: raise ValueError("Cannot extend kernel on precomputed graph") else: - with _logger.task("affinities"): + with _logger.log_task("affinities"): Y = self._check_extension_shape(Y) pdx = cdist(Y, self.data_nu, metric=self.distance) if bandwidth is None: @@ -1130,7 +1130,7 @@ def _check_shortest_path_distance(self, distance): def _default_shortest_path_distance(self): if self.precomputed is not None and not self.weighted: distance = "constant" - _logger.info("Using constant distances.") + _logger.log_info("Using constant distances.") else: distance = super()._default_shortest_path_distance() return distance @@ -1297,13 +1297,13 @@ def build_kernel(self): symmetric matrix with ones down the diagonal with no non-negative entries. """ - with _logger.task("subgraphs"): + with _logger.log_task("subgraphs"): self.subgraphs = [] from .api import Graph # iterate through sample ids for i, idx in enumerate(self.samples): - _logger.debug( + _logger.log_debug( "subgraph {}: sample {}, " "n = {}, knn = {}".format( i, idx, np.sum(self.sample_idx == idx), self.knn @@ -1328,7 +1328,7 @@ def build_kernel(self): ) self.subgraphs.append(graph) # append to list of subgraphs - with _logger.task("MNN kernel"): + with _logger.log_task("MNN kernel"): if self.thresh > 0 or self.decay is None: K = sparse.lil_matrix((self.data_nu.shape[0], self.data_nu.shape[0])) else: @@ -1344,7 +1344,7 @@ def build_kernel(self): for j, Y in enumerate(self.subgraphs): if i == j: continue - with _logger.task( + with _logger.log_task( "kernel from sample {} to {}".format( self.samples[i], self.samples[j] ) From b75ba8626afac8acec2163ca657c8c0be068c8b0 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:37:49 -0500 Subject: [PATCH 16/41] Delete .travis.yml --- .travis.yml | 32 -------------------------------- 1 file changed, 32 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 3fd9f36..0000000 --- a/.travis.yml +++ /dev/null @@ -1,32 +0,0 @@ -language: python -python: - - '3.5' - - '3.6' - - '3.7' - - '3.8' -cache: - - pip - - apt -addons: - apt: - packages: libjs-mathjax -script: - - python -c "import graphtools" - - 'pip install -U .[test]' - - 'if [ "$TRAVIS_PYTHON_VERSION" != "3.5" ]; then black . --check --diff; fi' - - python setup.py test - - 'pip install -U .[doc]' - - cd doc; make html - - cd .. -deploy: - provider: pypi - user: scottgigante - password: '${PYPI_PASSWORD}' - distributions: sdist bdist_wheel - skip_existing: true - cleanup: false - 'on': - tags: true - branch: master -after_success: - - coveralls From 17df9e8e0233b21bace328227834ab578305e9c6 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:39:14 -0500 Subject: [PATCH 17/41] Update README.rst --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 133409a..62ae08f 100644 --- a/README.rst +++ b/README.rst @@ -8,9 +8,9 @@ graphtools .. image:: https://anaconda.org/conda-forge/graphtools/badges/version.svg :target: https://anaconda.org/conda-forge/graphtools/ :alt: Latest Conda version -.. image:: https://api.travis-ci.com/KrishnaswamyLab/graphtools.svg?branch=master +.. image:: https://img.shields.io/github/workflow/status/KrishnaswamyLab/graphtools/Unit%20Tests/master?label=Github%20Actions :target: https://travis-ci.com/KrishnaswamyLab/graphtools - :alt: Travis CI Build + :alt: Github Actions Build .. image:: https://img.shields.io/readthedocs/graphtools.svg :target: https://graphtools.readthedocs.io/ :alt: Read the Docs From 19442d8d7ea78e80984e3df0b0f255581badf2d5 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:39:38 -0500 Subject: [PATCH 18/41] Update version.py --- graphtools/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/version.py b/graphtools/version.py index 5197c5f..a06ff4e 100644 --- a/graphtools/version.py +++ b/graphtools/version.py @@ -1 +1 @@ -__version__ = "1.5.2" +__version__ = "1.5.3" From 4bf0c4842939da843323a8cfbd1859a085bb48bf Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:41:31 -0500 Subject: [PATCH 19/41] Update run_tests.yml --- .github/workflows/run_tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index c3d45ab..4c5c258 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -58,7 +58,6 @@ jobs: - {name: '3.9', os: ubuntu-latest, python: '3.9' } - {name: '3.8', os: ubuntu-latest, python: '3.8' } - {name: '3.7', os: ubuntu-latest, python: '3.7' } - - {name: '3.6', os: ubuntu-latest, python: '3.6' } steps: From 6f402d3f2e26847d0180116f6aa811cfb42ec92a Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:42:23 -0500 Subject: [PATCH 20/41] Update test_knn.py --- test/test_knn.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_knn.py b/test/test_knn.py index 3c01eda..3d0b4be 100644 --- a/test/test_knn.py +++ b/test/test_knn.py @@ -16,7 +16,7 @@ from nose.tools import assert_warns_regex from scipy.spatial.distance import pdist from scipy.spatial.distance import squareform -from sklearn.utils.graph import graph_shortest_path +from scipy.sparse.csgraph import shortest_path import warnings @@ -529,7 +529,7 @@ def test_knn_interpolate_wrong_shape(): def test_shortest_path_constant(): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) - P = graph_shortest_path(G.K) + P = shortest_path(G.K) # sklearn returns 0 if no path exists P[np.where(P == 0)] = np.inf # diagonal should actually be zero @@ -541,7 +541,7 @@ def test_shortest_path_precomputed_constant(): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G = graphtools.Graph(G.K, precomputed="affinity") - P = graph_shortest_path(G.K) + P = shortest_path(G.K) # sklearn returns 0 if no path exists P[np.where(P == 0)] = np.inf # diagonal should actually be zero @@ -554,7 +554,7 @@ def test_shortest_path_data(): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) D = squareform(pdist(G.data_nu)) * np.where(G.K.toarray() > 0, 1, 0) - P = graph_shortest_path(D) + P = shortest_path(D) # sklearn returns 0 if no path exists P[np.where(P == 0)] = np.inf # diagonal should actually be zero From 6e505ba24e6c6ee06e34e4f578814428ed5822f0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Jan 2023 05:42:46 +0000 Subject: [PATCH 21/41] pre-commit --- test/test_knn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_knn.py b/test/test_knn.py index 3d0b4be..35670c7 100644 --- a/test/test_knn.py +++ b/test/test_knn.py @@ -14,9 +14,9 @@ from load_tests import TruncatedSVD from nose.tools import assert_raises_regex from nose.tools import assert_warns_regex +from scipy.sparse.csgraph import shortest_path from scipy.spatial.distance import pdist from scipy.spatial.distance import squareform -from scipy.sparse.csgraph import shortest_path import warnings From b2fc021359723cfb7c8b81859c18017643bed46f Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:43:40 -0500 Subject: [PATCH 22/41] set n_init --- graphtools/graphs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/graphtools/graphs.py b/graphtools/graphs.py index 2e13fff..8f512a9 100644 --- a/graphtools/graphs.py +++ b/graphtools/graphs.py @@ -668,6 +668,7 @@ def build_landmark_op(self): kmeans = MiniBatchKMeans( self.n_landmark, init_size=3 * self.n_landmark, + n_init=1, batch_size=10000, random_state=self.random_state, ) From f42706dc588dd711e44e428b5c6091a3a438c522 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:44:20 -0500 Subject: [PATCH 23/41] Update estimator.py --- graphtools/estimator.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/graphtools/estimator.py b/graphtools/estimator.py index ae1652b..b072693 100644 --- a/graphtools/estimator.py +++ b/graphtools/estimator.py @@ -251,7 +251,7 @@ def _set_graph_params(self, **params): ) self.graph.set_params(**params) except ValueError as e: - _logger.debug("Reset graph due to {}".format(str(e))) + _logger.log_debug("Reset graph due to {}".format(str(e))) self.graph = None @abc.abstractmethod @@ -364,7 +364,7 @@ def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs): **(self.kwargs), ) if self.graph is not None: - _logger.info("Using precomputed graph and diffusion operator...") + _logger.log_info("Using precomputed graph and diffusion operator...") def fit(self, X, **kwargs): """Computes the graph @@ -387,13 +387,13 @@ def fit(self, X, **kwargs): X, n_pca, n_landmark, precomputed, update_graph = self._parse_input(X) if precomputed is None: - _logger.info( + _logger.log_info( "Building graph on {} samples and {} features.".format( X.shape[0], X.shape[1] ) ) else: - _logger.info( + _logger.log_info( "Building graph on precomputed {} matrix with {} samples.".format( precomputed, X.shape[0] ) @@ -405,7 +405,7 @@ def fit(self, X, **kwargs): self.X = X if self.graph is None: - with _logger.task("graph and diffusion operator"): + with _logger.log_task("graph and diffusion operator"): self.graph = api.Graph( X, n_pca=n_pca, From c030bb220d8f5e626d57d7cc00b2fdd231c6de1d Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:45:19 -0500 Subject: [PATCH 24/41] Set dtype --- test/test_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_data.py b/test/test_data.py index afec27c..09e34cb 100644 --- a/test/test_data.py +++ b/test/test_data.py @@ -219,7 +219,7 @@ def test_anndata(): except NameError: # not installed return - G = build_graph(anndata.AnnData(data)) + G = build_graph(anndata.AnnData(data, dtype=data.dtype)) assert isinstance(G, graphtools.base.BaseGraph) assert isinstance(G.data, np.ndarray) @@ -230,7 +230,7 @@ def test_anndata_sparse(): except NameError: # not installed return - G = build_graph(anndata.AnnData(sp.csr_matrix(data))) + G = build_graph(anndata.AnnData(sp.csr_matrix(data), dtype=data.dtype)) assert isinstance(G, graphtools.base.BaseGraph) assert isinstance(G.data, sp.csr_matrix) From 9b29c1e17ac708b60a57853ef63df627d9824df6 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:45:51 -0500 Subject: [PATCH 25/41] Update run_tests.yml --- .github/workflows/run_tests.yml | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 4c5c258..6f3a9e8 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -13,37 +13,6 @@ concurrency: cancel-in-progress: true jobs: - run_linter: - runs-on: ${{ matrix.config.os }} - if: "!contains(github.event.head_commit.message, 'ci skip')" - - strategy: - fail-fast: false - matrix: - config: - - {name: 'current', os: ubuntu-latest, python: '3.8' } - - steps: - - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.config.python }} - - - name: Install tools - run: | - python -m pip install --upgrade pip - pip install -U wheel setuptools - pip install -U black flake8 - - - name: Lint with Black - run: | - black . --check --diff - - - name: Lint with flake8 - run: | - flake8 graphtools || true run_tester: runs-on: ${{ matrix.config.os }} From a3da450aab278033a4050dc1439d30093f7c666c Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:49:40 -0500 Subject: [PATCH 26/41] Update test_estimator.py --- test/test_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_estimator.py b/test/test_estimator.py index ba3d10a..f3120cb 100644 --- a/test/test_estimator.py +++ b/test/test_estimator.py @@ -99,7 +99,7 @@ def test_anndata_input(): E = Estimator(verbose=0) E.fit(X.astype(np.float32)) E2 = Estimator(verbose=0) - E2.fit(anndata.AnnData(X)) + E2.fit(anndata.AnnData(X, dtype=X.dtype)) np.testing.assert_allclose( E.graph.K.toarray(), E2.graph.K.toarray(), rtol=1e-6, atol=2e-7 ) From ef34e994242980ccd87623c72aa674b645db5020 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:50:43 -0500 Subject: [PATCH 27/41] Update test_exact.py --- test/test_exact.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_exact.py b/test/test_exact.py index 6cefc9d..4f27756 100644 --- a/test/test_exact.py +++ b/test/test_exact.py @@ -611,7 +611,7 @@ def test_shortest_path_affinity_precomputed(): P[np.where(P == 0)] = np.inf # diagonal should actually be zero np.fill_diagonal(P, 0) - np.testing.assert_allclose(P, G.shortest_path(distance="affinity")) + np.testing.assert_allclose(P, G.shortest_path(distance="affinity"), atol=1e-6) np.testing.assert_allclose(P, G.shortest_path()) From a1c9cedf6bf017ace04632d4258ddbc277f85c9d Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:53:56 -0500 Subject: [PATCH 28/41] Update test_knn.py --- test/test_knn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_knn.py b/test/test_knn.py index 35670c7..65bebad 100644 --- a/test/test_knn.py +++ b/test/test_knn.py @@ -54,7 +54,7 @@ def test_duplicate_data(): RuntimeWarning, r"Detected zero distance between samples ([0-9and,\s]*). Consider removing duplicates to avoid errors in downstream processing.", ): - build_graph(np.vstack([data, data[:9]]), n_pca=20, decay=10, thresh=1e-4) + build_graph(np.vstack([data, data[:9]]), n_pca=50, decay=10, thresh=1e-4) def test_duplicate_data_many(): @@ -62,7 +62,7 @@ def test_duplicate_data_many(): RuntimeWarning, "Detected zero distance between ([0-9]*) pairs of samples. Consider removing duplicates to avoid errors in downstream processing.", ): - build_graph(np.vstack([data, data[:21]]), n_pca=20, decay=10, thresh=1e-4) + build_graph(np.vstack([data, data[:21]]), n_pca=50, decay=10, thresh=1e-4) def test_balltree_cosine(): From dfd271988a2f244d8d0e16916f0c67900b065748 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:54:51 -0500 Subject: [PATCH 29/41] Update test_exact.py --- test/test_exact.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_exact.py b/test/test_exact.py index 4f27756..46741b7 100644 --- a/test/test_exact.py +++ b/test/test_exact.py @@ -611,8 +611,8 @@ def test_shortest_path_affinity_precomputed(): P[np.where(P == 0)] = np.inf # diagonal should actually be zero np.fill_diagonal(P, 0) - np.testing.assert_allclose(P, G.shortest_path(distance="affinity"), atol=1e-6) - np.testing.assert_allclose(P, G.shortest_path()) + np.testing.assert_allclose(P, G.shortest_path(distance="affinity"), atol=1e-5, rtol=1e-4) + np.testing.assert_allclose(P, G.shortest_path(), atol=1e-5, rtol=1e-4) def test_shortest_path_decay_constant(): From a9203a4a91e3980c363c3b2c0aa41f392c127419 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Jan 2023 05:55:13 +0000 Subject: [PATCH 30/41] pre-commit --- test/test_exact.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_exact.py b/test/test_exact.py index 46741b7..1e1f339 100644 --- a/test/test_exact.py +++ b/test/test_exact.py @@ -611,7 +611,9 @@ def test_shortest_path_affinity_precomputed(): P[np.where(P == 0)] = np.inf # diagonal should actually be zero np.fill_diagonal(P, 0) - np.testing.assert_allclose(P, G.shortest_path(distance="affinity"), atol=1e-5, rtol=1e-4) + np.testing.assert_allclose( + P, G.shortest_path(distance="affinity"), atol=1e-5, rtol=1e-4 + ) np.testing.assert_allclose(P, G.shortest_path(), atol=1e-5, rtol=1e-4) From ff2a4367896abe2eff18d712a9e137be5e7ad219 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:58:55 -0500 Subject: [PATCH 31/41] Update test_landmark.py --- test/test_landmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_landmark.py b/test/test_landmark.py index a6b4bd8..90ed109 100644 --- a/test/test_landmark.py +++ b/test/test_landmark.py @@ -72,7 +72,7 @@ def test_landmark_knn_graph(): G = build_graph( data, n_landmark=n_landmark, n_pca=20, decay=None, knn=5 - 1, random_state=42 ) - assert G.transitions.shape == (data.shape[0], n_landmark) + assert G.transitions.shape == (data.shape[0], n_landmark), G.transitions.shape assert G.landmark_op.shape == (n_landmark, n_landmark) assert isinstance(G, graphtools.graphs.kNNGraph) assert isinstance(G, graphtools.graphs.LandmarkGraph) From 2c1316da42b8ae09d0cae6c5e65efc51c9d8be23 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 00:59:20 -0500 Subject: [PATCH 32/41] Update test_exact.py --- test/test_exact.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_exact.py b/test/test_exact.py index 1e1f339..6de8c68 100644 --- a/test/test_exact.py +++ b/test/test_exact.py @@ -597,8 +597,8 @@ def test_shortest_path_affinity(): P[np.where(P == 0)] = np.inf # diagonal should actually be zero np.fill_diagonal(P, 0) - np.testing.assert_allclose(P, G.shortest_path(distance="affinity")) - np.testing.assert_allclose(P, G.shortest_path()) + np.testing.assert_allclose(P, G.shortest_path(distance="affinity"), atol=1e-5, rtol=1e-4) + np.testing.assert_allclose(P, G.shortest_path(), atol=1e-5, rtol=1e-4) def test_shortest_path_affinity_precomputed(): From ea850bcc2add9b81a1f4ed9f54ebfc1edf1f1333 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Jan 2023 06:00:00 +0000 Subject: [PATCH 33/41] pre-commit --- test/test_exact.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_exact.py b/test/test_exact.py index 6de8c68..72ed2f0 100644 --- a/test/test_exact.py +++ b/test/test_exact.py @@ -597,7 +597,9 @@ def test_shortest_path_affinity(): P[np.where(P == 0)] = np.inf # diagonal should actually be zero np.fill_diagonal(P, 0) - np.testing.assert_allclose(P, G.shortest_path(distance="affinity"), atol=1e-5, rtol=1e-4) + np.testing.assert_allclose( + P, G.shortest_path(distance="affinity"), atol=1e-5, rtol=1e-4 + ) np.testing.assert_allclose(P, G.shortest_path(), atol=1e-5, rtol=1e-4) From 7f54209d26c471d404ee416575d840a4c9d2d149 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 01:06:53 -0500 Subject: [PATCH 34/41] Update test_exact.py --- test/test_exact.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_exact.py b/test/test_exact.py index 72ed2f0..7428cdd 100644 --- a/test/test_exact.py +++ b/test/test_exact.py @@ -598,9 +598,9 @@ def test_shortest_path_affinity(): # diagonal should actually be zero np.fill_diagonal(P, 0) np.testing.assert_allclose( - P, G.shortest_path(distance="affinity"), atol=1e-5, rtol=1e-4 + P, G.shortest_path(distance="affinity"), atol=1e-4, rtol=1e-3 ) - np.testing.assert_allclose(P, G.shortest_path(), atol=1e-5, rtol=1e-4) + np.testing.assert_allclose(P, G.shortest_path(), atol=1e-4, rtol=1e-3) def test_shortest_path_affinity_precomputed(): @@ -614,9 +614,9 @@ def test_shortest_path_affinity_precomputed(): # diagonal should actually be zero np.fill_diagonal(P, 0) np.testing.assert_allclose( - P, G.shortest_path(distance="affinity"), atol=1e-5, rtol=1e-4 + P, G.shortest_path(distance="affinity"), atol=1e-4, rtol=1e-3 ) - np.testing.assert_allclose(P, G.shortest_path(), atol=1e-5, rtol=1e-4) + np.testing.assert_allclose(P, G.shortest_path(), atol=1e-4, rtol=1e-3) def test_shortest_path_decay_constant(): From e2b509efc388bf0cd4850a69af28f6d83dcb87cc Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 01:07:13 -0500 Subject: [PATCH 35/41] Update test_knn.py --- test/test_knn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_knn.py b/test/test_knn.py index 65bebad..9dae5f9 100644 --- a/test/test_knn.py +++ b/test/test_knn.py @@ -54,7 +54,7 @@ def test_duplicate_data(): RuntimeWarning, r"Detected zero distance between samples ([0-9and,\s]*). Consider removing duplicates to avoid errors in downstream processing.", ): - build_graph(np.vstack([data, data[:9]]), n_pca=50, decay=10, thresh=1e-4) + build_graph(np.vstack([data, data[:9]]), n_pca=None, decay=10, thresh=1e-4) def test_duplicate_data_many(): @@ -62,7 +62,7 @@ def test_duplicate_data_many(): RuntimeWarning, "Detected zero distance between ([0-9]*) pairs of samples. Consider removing duplicates to avoid errors in downstream processing.", ): - build_graph(np.vstack([data, data[:21]]), n_pca=50, decay=10, thresh=1e-4) + build_graph(np.vstack([data, data[:21]]), n_pca=None, decay=10, thresh=1e-4) def test_balltree_cosine(): From ff3e9a5cbef656be3b55abfdda9b77810818fbbe Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 01:12:09 -0500 Subject: [PATCH 36/41] Update run_tests.yml --- .github/workflows/run_tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 6f3a9e8..cc5dac8 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -22,7 +22,6 @@ jobs: fail-fast: false matrix: config: - - {name: '3.11', os: ubuntu-latest, python: '3.11' } - {name: '3.10', os: ubuntu-latest, python: '3.10' } - {name: '3.9', os: ubuntu-latest, python: '3.9' } - {name: '3.8', os: ubuntu-latest, python: '3.8' } From 1040ec9419496e45541a2e6350c4c5fcd9936a87 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 01:13:05 -0500 Subject: [PATCH 37/41] Update test_exact.py --- test/test_exact.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_exact.py b/test/test_exact.py index 7428cdd..0044359 100644 --- a/test/test_exact.py +++ b/test/test_exact.py @@ -589,6 +589,7 @@ def test_exact_graph_anisotropy(): def test_shortest_path_affinity(): + np.random.seed(42) data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) D = -1 * np.where(G.K != 0, np.log(np.where(G.K != 0, G.K, np.nan)), 0) @@ -604,6 +605,7 @@ def test_shortest_path_affinity(): def test_shortest_path_affinity_precomputed(): + np.random.seed(42) data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) G = graphtools.Graph(G.K, precomputed="affinity") From c560eebd0f98e8edc0336b4314af0b26ce928941 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 01:13:30 -0500 Subject: [PATCH 38/41] Update test_landmark.py --- test/test_landmark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_landmark.py b/test/test_landmark.py index 90ed109..9af4efb 100644 --- a/test/test_landmark.py +++ b/test/test_landmark.py @@ -67,6 +67,7 @@ def test_landmark_exact_graph(): def test_landmark_knn_graph(): + np.random.seed(42) n_landmark = 500 # knn graph G = build_graph( From 87434ea81684c6ef59f5d9fb0dd2aa40fdae4c55 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 01:19:12 -0500 Subject: [PATCH 39/41] Update graphs.py --- graphtools/graphs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/graphs.py b/graphtools/graphs.py index 8f512a9..5e3d299 100644 --- a/graphtools/graphs.py +++ b/graphtools/graphs.py @@ -668,7 +668,7 @@ def build_landmark_op(self): kmeans = MiniBatchKMeans( self.n_landmark, init_size=3 * self.n_landmark, - n_init=1, + n_init=2, batch_size=10000, random_state=self.random_state, ) From 7138a62957b4a2b5314bdf91567cdfc801d07974 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 01:22:28 -0500 Subject: [PATCH 40/41] Update graphs.py --- graphtools/graphs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/graphs.py b/graphtools/graphs.py index 5e3d299..8f512a9 100644 --- a/graphtools/graphs.py +++ b/graphtools/graphs.py @@ -668,7 +668,7 @@ def build_landmark_op(self): kmeans = MiniBatchKMeans( self.n_landmark, init_size=3 * self.n_landmark, - n_init=2, + n_init=1, batch_size=10000, random_state=self.random_state, ) From 07d05d965586aaefc0841e292c43ea89a2658c55 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 3 Jan 2023 01:24:46 -0500 Subject: [PATCH 41/41] Update test_landmark.py --- test/test_landmark.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_landmark.py b/test/test_landmark.py index 9af4efb..489d864 100644 --- a/test/test_landmark.py +++ b/test/test_landmark.py @@ -73,8 +73,11 @@ def test_landmark_knn_graph(): G = build_graph( data, n_landmark=n_landmark, n_pca=20, decay=None, knn=5 - 1, random_state=42 ) - assert G.transitions.shape == (data.shape[0], n_landmark), G.transitions.shape - assert G.landmark_op.shape == (n_landmark, n_landmark) + n_landmark_out = G.landmark_op.shape[0] + assert n_landmark_out <= n_landmark + assert n_landmark_out >= n_landmark - 3 + assert G.transitions.shape == (data.shape[0], n_landmark_out), G.transitions.shape + assert G.landmark_op.shape == (n_landmark_out, n_landmark_out) assert isinstance(G, graphtools.graphs.kNNGraph) assert isinstance(G, graphtools.graphs.LandmarkGraph)