[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
YosefLab · Dec 13, 2024 · 4e1f2cd · 4e1f2cd
1 parent 9c791b4
commit 4e1f2cd
Show file tree

Hide file tree

Showing 29 changed files with 177 additions and 162 deletions.
diff --git a/docs/Makefile b/docs/Makefile
@@ -17,4 +17,4 @@ help:
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/api.md b/docs/api.md
@@ -46,4 +46,3 @@ from popv import algorithms
     "knn_on_scanorama",
     "celltypist",
     "base_algorithm",
-
diff --git a/docs/conf.py b/docs/conf.py
@@ -16,10 +16,6 @@
 from importlib.metadata import metadata
 from pathlib import Path
 import importlib.util
-import inspect
-import os
-import re
-import subprocess
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:

diff --git a/...e/jupyter_execute/jupyter_execute/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb b/...e/jupyter_execute/jupyter_execute/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb
@@ -309,7 +309,7 @@
     "        query_adata = sc.read(input_file[\"path\"])\n",
     "    except:\n",
     "        raise Exception(\n",
-    "            f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
+    "            \"Default download failed with wget. Use custom downloader or check provided link \"\n",
     "            + input_file[\"link\"]\n",
     "        )\n",
     "\n",
@@ -1348,4 +1348,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
diff --git a/...e/jupyter_execute/jupyter_execute/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb b/...e/jupyter_execute/jupyter_execute/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb
@@ -309,7 +309,7 @@
     "        query_adata = sc.read(input_file[\"path\"])\n",
     "    except:\n",
     "        raise Exception(\n",
-    "            f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
+    "            \"Default download failed with wget. Use custom downloader or check provided link \"\n",
     "            + input_file[\"link\"]\n",
     "        )\n",
     "\n",
@@ -1335,4 +1335,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
diff --git a/...e/jupyter_execute/jupyter_execute/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb b/...e/jupyter_execute/jupyter_execute/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb
@@ -309,7 +309,7 @@
     "        query_adata = sc.read(input_file[\"path\"])\n",
     "    except:\n",
     "        raise Exception(\n",
-    "            f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
+    "            \"Default download failed with wget. Use custom downloader or check provided link \"\n",
     "            + input_file[\"link\"]\n",
     "        )\n",
     "\n",
@@ -1322,4 +1322,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
diff --git a/...e/jupyter_execute/jupyter_execute/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb b/...e/jupyter_execute/jupyter_execute/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb
@@ -309,7 +309,7 @@
     "        query_adata = sc.read(input_file[\"path\"])\n",
     "    except:\n",
     "        raise Exception(\n",
-    "            f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
+    "            \"Default download failed with wget. Use custom downloader or check provided link \"\n",
     "            + input_file[\"link\"]\n",
     "        )\n",
     "\n",
@@ -1309,4 +1309,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
diff --git a/docs/jupyter_execute/jupyter_execute/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb b/docs/jupyter_execute/jupyter_execute/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb
@@ -309,7 +309,7 @@
     "        query_adata = sc.read(input_file[\"path\"])\n",
     "    except:\n",
     "        raise Exception(\n",
-    "            f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
+    "            \"Default download failed with wget. Use custom downloader or check provided link \"\n",
     "            + input_file[\"link\"]\n",
     "        )\n",
     "\n",
@@ -1296,4 +1296,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
diff --git a/docs/jupyter_execute/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb b/docs/jupyter_execute/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb
@@ -309,7 +309,7 @@
     "        query_adata = sc.read(input_file[\"path\"])\n",
     "    except:\n",
     "        raise Exception(\n",
-    "            f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
+    "            \"Default download failed with wget. Use custom downloader or check provided link \"\n",
     "            + input_file[\"link\"]\n",
     "        )\n",
     "\n",
@@ -1283,4 +1283,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
diff --git a/docs/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb b/docs/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb
@@ -309,7 +309,7 @@
     "        query_adata = sc.read(input_file[\"path\"])\n",
     "    except:\n",
     "        raise Exception(\n",
-    "            f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
+    "            \"Default download failed with wget. Use custom downloader or check provided link \"\n",
     "            + input_file[\"link\"]\n",
     "        )\n",
     "\n",
@@ -1270,4 +1270,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
diff --git a/docs/reference/popv.algorithms.knn_on_scvi.rst b/docs/reference/popv.algorithms.knn_on_scvi.rst
@@ -29,7 +29,3 @@ Methods table
 
 Methods
 ~~~~~~~
-
-
-
-
diff --git a/docs/reference/popv.algorithms.scanvi.rst b/docs/reference/popv.algorithms.scanvi.rst
@@ -29,7 +29,3 @@ Methods table
 
 Methods
 ~~~~~~~
-
-
-
-
diff --git a/docs/tutorials.md b/docs/tutorials.md
@@ -6,4 +6,4 @@
 :maxdepth: 1
 
 notebooks/tabula_sapiens_tutorial.ipynb
-```
+```
diff --git a/popv/__init__.py b/popv/__init__.py
@@ -5,8 +5,8 @@
 
 import scanpy as sc
 
-from ._settings import settings
 from . import algorithms, annotation, preprocessing, visualization
+from ._settings import settings
 
 try:
     import importlib.metadata as importlib_metadata
@@ -22,10 +22,4 @@
 popv_logger.propagate = False
 
 
-__all__ = [
-    "settings",
-    "algorithms",
-    "annotation",
-    "preprocessing",
-    "visualization"
-]
+__all__ = ["algorithms", "annotation", "preprocessing", "settings", "visualization"]
diff --git a/popv/_settings.py b/popv/_settings.py
@@ -47,7 +47,7 @@ def __init__(
         logging_dir: str = "./popv_log/",
         n_jobs: int = 1,
         cuml: bool = False,
-        shard_size: int = 100000
+        shard_size: int = 100000,
     ):
         """Set up Config manager for PopV."""
         self.seed = seed
@@ -170,4 +170,5 @@ def return_probabilities(self) -> bool:
     def return_probabilities(self, return_probabilities: bool):
         self._return_probabilities = return_probabilities
 
+
 settings = Config()
diff --git a/popv/algorithms/__init__.py b/popv/algorithms/__init__.py
@@ -9,14 +9,14 @@
 from ._svm import SVM as svm
 
 __all__ = [
-    "knn_on_scvi",
-    "scanvi",
+    "base_algorithm",
+    "celltypist",
     "knn_on_bbknn",
     "knn_on_harmony",
-    "svm",
-    "rf",
-    "onclass",
     "knn_on_scanorama",
-    "celltypist",
-    "base_algorithm",
+    "knn_on_scvi",
+    "onclass",
+    "rf",
+    "scanvi",
+    "svm",
 ]
diff --git a/popv/algorithms/_bbknn.py b/popv/algorithms/_bbknn.py
@@ -56,10 +56,10 @@ def __init__(
 
         self.method_kwargs = {
             "metric": "euclidean" if self.enable_cuml else "cosine",
-            "approx": not self.enable_cuml, # FAISS if cuml
+            "approx": not self.enable_cuml,  # FAISS if cuml
             "n_pcs": 50,
             "neighbors_within_batch": 3 if self.enable_cuml else 8,
-            "use_annoy": False, #pynndescent
+            "use_annoy": False,  # pynndescent
         }
         if method_kwargs is not None:
             self.method_kwargs.update(method_kwargs)
@@ -68,15 +68,16 @@ def __init__(
         if classifier_dict is not None:
             self.classifier_dict.update(classifier_dict)
 
-        self.embedding_kwargs = {
-            "min_dist": 0.1}
+        self.embedding_kwargs = {"min_dist": 0.1}
         self.embedding_kwargs.update(embedding_kwargs)
 
     def _compute_integration(self, adata):
         logging.info("Integrating data with bbknn")
         if len(adata.obs[self.batch_key].unique()) > 100 and self.enable_cuml:
-            logging.warning('Using PyNNDescent instead of RAPIDS as high number of batches leads to OOM.')
-            self.method_kwargs['approx'] = True
+            logging.warning(
+                "Using PyNNDescent instead of RAPIDS as high number of batches leads to OOM."
+            )
+            self.method_kwargs["approx"] = True
         sc.external.pp.bbknn(adata, batch_key=self.batch_key, **self.method_kwargs)
 
     def _predict(self, adata):
@@ -109,7 +110,9 @@ def _predict(self, adata):
         knn = KNeighborsClassifier(metric="precomputed", **self.classifier_dict)
         knn.fit(train_distances, y=train_y)
 
-        adata.obs[self.result_key] = adata.obs[self.labels_key].cat.categories[knn.predict(test_distances)]
+        adata.obs[self.result_key] = adata.obs[self.labels_key].cat.categories[
+            knn.predict(test_distances)
+        ]
 
         if self.return_probabilities:
             adata.obs[self.result_key + "_probabilities"] = np.max(
@@ -122,10 +125,12 @@ def _compute_embedding(self, adata):
                 f'Saving UMAP of bbknn results to adata.obs["{self.embedding_key}"]'
             )
             if len(adata.obs[self.batch_key]) < 30 and settings.cuml:
-                method = 'rapids'
+                method = "rapids"
             else:
-                logging.warning('Using UMAP instead of RAPIDS as high number of batches leads to OOM.')
-                method = 'umap'
+                logging.warning(
+                    "Using UMAP instead of RAPIDS as high number of batches leads to OOM."
+                )
+                method = "umap"
                 # RAPIDS not possible here as number of batches drastically increases GPU RAM.
             adata.obsm[self.embedding_key] = sc.tl.umap(
                 adata, copy=True, method=method, **self.embedding_kwargs

diff --git a/popv/algorithms/_celltypist.py b/popv/algorithms/_celltypist.py
@@ -57,20 +57,27 @@ def __init__(
     def _predict(self, adata):
         logging.info(f'Saving celltypist results to adata.obs["{self.result_key}"]')
 
-        flavor = 'rapids' if settings.cuml else 'vtraag'
-        method = 'rapids' if settings.cuml else 'umap'
-        sc.pp.neighbors(adata, n_neighbors=15, use_rep='X_pca', method=method)
-        sc.tl.louvain(adata, resolution=25., key_added='over_clustering', flavor=flavor)
+        flavor = "rapids" if settings.cuml else "vtraag"
+        method = "rapids" if settings.cuml else "umap"
+        sc.pp.neighbors(adata, n_neighbors=15, use_rep="X_pca", method=method)
+        sc.tl.louvain(
+            adata, resolution=25.0, key_added="over_clustering", flavor=flavor
+        )
 
         if adata.uns["_prediction_mode"] == "retrain":
             train_idx = adata.obs["_ref_subsample"]
             print(len(train_idx))
-            if len(train_idx) > 100000 and not True: # settings.cuml:
-                self.method_kwargs['use_SGD'] = True
-                self.method_kwargs['mini_batch'] = True
+            if len(train_idx) > 100000 and not True:  # settings.cuml:
+                self.method_kwargs["use_SGD"] = True
+                self.method_kwargs["mini_batch"] = True
 
             train_adata = adata[train_idx].copy()
-            model = celltypist.train(train_adata, self.labels_key, use_GPU=settings.cuml, **self.method_kwargs,)
+            model = celltypist.train(
+                train_adata,
+                self.labels_key,
+                use_GPU=settings.cuml,
+                **self.method_kwargs,
+            )
 
             if adata.uns["_save_path_trained_models"]:
                 model.write(adata.uns["_save_path_trained_models"] + "celltypist.pkl")
@@ -79,7 +86,7 @@ def _predict(self, adata):
         predictions = celltypist.annotate(
             adata,
             model=adata.uns["_save_path_trained_models"] + "celltypist.pkl",
-            over_clustering=adata.obs['over_clustering'],
+            over_clustering=adata.obs["over_clustering"],
             **self.classifier_dict,
         )
         out_column = (
@@ -90,6 +97,6 @@ def _predict(self, adata):
 
         adata.obs[self.result_key] = predictions.predicted_labels[out_column]
         if self.return_probabilities:
-            adata.obs[
-                self.result_key + "_probabilities"
-            ] = predictions.probability_matrix.max(axis=1).values
+            adata.obs[self.result_key + "_probabilities"] = (
+                predictions.probability_matrix.max(axis=1).values
+            )
diff --git a/popv/algorithms/_harmony.py b/popv/algorithms/_harmony.py
@@ -77,7 +77,7 @@ def _compute_integration(self, adata):
             adata.obsm["X_pca"],
             adata.obs,
             batch_key=self.batch_key,
-            use_gpu=settings.accelerator=="gpu",
+            use_gpu=settings.accelerator == "gpu",
         )
 
     def _predict(self, adata, result_key="popv_knn_on_harmony_prediction"):
@@ -89,6 +89,7 @@ def _predict(self, adata, result_key="popv_knn_on_harmony_prediction"):
 
         if settings.cuml:
             from cuml.neighbors import KNeighborsClassifier as cuKNeighbors
+
             knn = cuKNeighbors(n_neighbors=self.classifier_dict["n_neighbors"])
         else:
             knn = make_pipeline(
@@ -117,7 +118,7 @@ def _compute_embedding(self, adata):
             logging.info(
                 f'Saving UMAP of harmony results to adata.obs["{self.embedding_key}"]'
             )
-            method = 'rapids' if settings.cuml else 'umap'
+            method = "rapids" if settings.cuml else "umap"
             sc.pp.neighbors(adata, use_rep="X_pca_harmony", method=method)
             adata.obsm[self.embedding_key] = sc.tl.umap(
                 adata, copy=True, method=method, **self.embedding_kwargs
Original file line number	Diff line number	Diff line change
Expand Up		@@ -46,4 +46,3 @@ from popv import algorithms
		"knn_on_scanorama",
		"celltypist",
		"base_algorithm",
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,4 +6,4 @@ @@
     :maxdepth: 1
     notebooks/tabula_sapiens_tutorial.ipynb
-    ```
+    ```