Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Dec 13, 2024
1 parent 9c791b4 commit 4e1f2cd
Show file tree
Hide file tree
Showing 29 changed files with 177 additions and 162 deletions.
2 changes: 1 addition & 1 deletion docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ help:
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
1 change: 0 additions & 1 deletion docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,3 @@ from popv import algorithms
"knn_on_scanorama",
"celltypist",
"base_algorithm",
4 changes: 0 additions & 4 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@
from importlib.metadata import metadata
from pathlib import Path
import importlib.util
import inspect
import os
import re
import subprocess
from typing import TYPE_CHECKING

if TYPE_CHECKING:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@
" query_adata = sc.read(input_file[\"path\"])\n",
" except:\n",
" raise Exception(\n",
" f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
" \"Default download failed with wget. Use custom downloader or check provided link \"\n",
" + input_file[\"link\"]\n",
" )\n",
"\n",
Expand Down Expand Up @@ -1348,4 +1348,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@
" query_adata = sc.read(input_file[\"path\"])\n",
" except:\n",
" raise Exception(\n",
" f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
" \"Default download failed with wget. Use custom downloader or check provided link \"\n",
" + input_file[\"link\"]\n",
" )\n",
"\n",
Expand Down Expand Up @@ -1335,4 +1335,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@
" query_adata = sc.read(input_file[\"path\"])\n",
" except:\n",
" raise Exception(\n",
" f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
" \"Default download failed with wget. Use custom downloader or check provided link \"\n",
" + input_file[\"link\"]\n",
" )\n",
"\n",
Expand Down Expand Up @@ -1322,4 +1322,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@
" query_adata = sc.read(input_file[\"path\"])\n",
" except:\n",
" raise Exception(\n",
" f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
" \"Default download failed with wget. Use custom downloader or check provided link \"\n",
" + input_file[\"link\"]\n",
" )\n",
"\n",
Expand Down Expand Up @@ -1309,4 +1309,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@
" query_adata = sc.read(input_file[\"path\"])\n",
" except:\n",
" raise Exception(\n",
" f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
" \"Default download failed with wget. Use custom downloader or check provided link \"\n",
" + input_file[\"link\"]\n",
" )\n",
"\n",
Expand Down Expand Up @@ -1296,4 +1296,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@
" query_adata = sc.read(input_file[\"path\"])\n",
" except:\n",
" raise Exception(\n",
" f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
" \"Default download failed with wget. Use custom downloader or check provided link \"\n",
" + input_file[\"link\"]\n",
" )\n",
"\n",
Expand Down Expand Up @@ -1283,4 +1283,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
4 changes: 2 additions & 2 deletions docs/jupyter_execute/notebooks/tabula_sapiens_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@
" query_adata = sc.read(input_file[\"path\"])\n",
" except:\n",
" raise Exception(\n",
" f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
" \"Default download failed with wget. Use custom downloader or check provided link \"\n",
" + input_file[\"link\"]\n",
" )\n",
"\n",
Expand Down Expand Up @@ -1270,4 +1270,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
4 changes: 0 additions & 4 deletions docs/reference/popv.algorithms.knn_on_scvi.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,3 @@ Methods table

Methods
~~~~~~~




4 changes: 0 additions & 4 deletions docs/reference/popv.algorithms.scanvi.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,3 @@ Methods table

Methods
~~~~~~~




2 changes: 1 addition & 1 deletion docs/tutorials.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
:maxdepth: 1

notebooks/tabula_sapiens_tutorial.ipynb
```
```
10 changes: 2 additions & 8 deletions popv/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

import scanpy as sc

from ._settings import settings
from . import algorithms, annotation, preprocessing, visualization
from ._settings import settings

try:
import importlib.metadata as importlib_metadata
Expand All @@ -22,10 +22,4 @@
popv_logger.propagate = False


__all__ = [
"settings",
"algorithms",
"annotation",
"preprocessing",
"visualization"
]
__all__ = ["algorithms", "annotation", "preprocessing", "settings", "visualization"]
3 changes: 2 additions & 1 deletion popv/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(
logging_dir: str = "./popv_log/",
n_jobs: int = 1,
cuml: bool = False,
shard_size: int = 100000
shard_size: int = 100000,
):
"""Set up Config manager for PopV."""
self.seed = seed
Expand Down Expand Up @@ -170,4 +170,5 @@ def return_probabilities(self) -> bool:
def return_probabilities(self, return_probabilities: bool):
self._return_probabilities = return_probabilities


settings = Config()
14 changes: 7 additions & 7 deletions popv/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@
from ._svm import SVM as svm

__all__ = [
"knn_on_scvi",
"scanvi",
"base_algorithm",
"celltypist",
"knn_on_bbknn",
"knn_on_harmony",
"svm",
"rf",
"onclass",
"knn_on_scanorama",
"celltypist",
"base_algorithm",
"knn_on_scvi",
"onclass",
"rf",
"scanvi",
"svm",
]
25 changes: 15 additions & 10 deletions popv/algorithms/_bbknn.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ def __init__(

self.method_kwargs = {
"metric": "euclidean" if self.enable_cuml else "cosine",
"approx": not self.enable_cuml, # FAISS if cuml
"approx": not self.enable_cuml, # FAISS if cuml
"n_pcs": 50,
"neighbors_within_batch": 3 if self.enable_cuml else 8,
"use_annoy": False, #pynndescent
"use_annoy": False, # pynndescent
}
if method_kwargs is not None:
self.method_kwargs.update(method_kwargs)
Expand All @@ -68,15 +68,16 @@ def __init__(
if classifier_dict is not None:
self.classifier_dict.update(classifier_dict)

self.embedding_kwargs = {
"min_dist": 0.1}
self.embedding_kwargs = {"min_dist": 0.1}
self.embedding_kwargs.update(embedding_kwargs)

def _compute_integration(self, adata):
logging.info("Integrating data with bbknn")
if len(adata.obs[self.batch_key].unique()) > 100 and self.enable_cuml:
logging.warning('Using PyNNDescent instead of RAPIDS as high number of batches leads to OOM.')
self.method_kwargs['approx'] = True
logging.warning(
"Using PyNNDescent instead of RAPIDS as high number of batches leads to OOM."
)
self.method_kwargs["approx"] = True
sc.external.pp.bbknn(adata, batch_key=self.batch_key, **self.method_kwargs)

def _predict(self, adata):
Expand Down Expand Up @@ -109,7 +110,9 @@ def _predict(self, adata):
knn = KNeighborsClassifier(metric="precomputed", **self.classifier_dict)
knn.fit(train_distances, y=train_y)

adata.obs[self.result_key] = adata.obs[self.labels_key].cat.categories[knn.predict(test_distances)]
adata.obs[self.result_key] = adata.obs[self.labels_key].cat.categories[
knn.predict(test_distances)
]

if self.return_probabilities:
adata.obs[self.result_key + "_probabilities"] = np.max(
Expand All @@ -122,10 +125,12 @@ def _compute_embedding(self, adata):
f'Saving UMAP of bbknn results to adata.obs["{self.embedding_key}"]'
)
if len(adata.obs[self.batch_key]) < 30 and settings.cuml:
method = 'rapids'
method = "rapids"
else:
logging.warning('Using UMAP instead of RAPIDS as high number of batches leads to OOM.')
method = 'umap'
logging.warning(
"Using UMAP instead of RAPIDS as high number of batches leads to OOM."
)
method = "umap"
# RAPIDS not possible here as number of batches drastically increases GPU RAM.
adata.obsm[self.embedding_key] = sc.tl.umap(
adata, copy=True, method=method, **self.embedding_kwargs
Expand Down
31 changes: 19 additions & 12 deletions popv/algorithms/_celltypist.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,20 +57,27 @@ def __init__(
def _predict(self, adata):
logging.info(f'Saving celltypist results to adata.obs["{self.result_key}"]')

flavor = 'rapids' if settings.cuml else 'vtraag'
method = 'rapids' if settings.cuml else 'umap'
sc.pp.neighbors(adata, n_neighbors=15, use_rep='X_pca', method=method)
sc.tl.louvain(adata, resolution=25., key_added='over_clustering', flavor=flavor)
flavor = "rapids" if settings.cuml else "vtraag"
method = "rapids" if settings.cuml else "umap"
sc.pp.neighbors(adata, n_neighbors=15, use_rep="X_pca", method=method)
sc.tl.louvain(
adata, resolution=25.0, key_added="over_clustering", flavor=flavor
)

if adata.uns["_prediction_mode"] == "retrain":
train_idx = adata.obs["_ref_subsample"]
print(len(train_idx))
if len(train_idx) > 100000 and not True: # settings.cuml:
self.method_kwargs['use_SGD'] = True
self.method_kwargs['mini_batch'] = True
if len(train_idx) > 100000 and not True: # settings.cuml:
self.method_kwargs["use_SGD"] = True
self.method_kwargs["mini_batch"] = True

train_adata = adata[train_idx].copy()
model = celltypist.train(train_adata, self.labels_key, use_GPU=settings.cuml, **self.method_kwargs,)
model = celltypist.train(
train_adata,
self.labels_key,
use_GPU=settings.cuml,
**self.method_kwargs,
)

if adata.uns["_save_path_trained_models"]:
model.write(adata.uns["_save_path_trained_models"] + "celltypist.pkl")
Expand All @@ -79,7 +86,7 @@ def _predict(self, adata):
predictions = celltypist.annotate(
adata,
model=adata.uns["_save_path_trained_models"] + "celltypist.pkl",
over_clustering=adata.obs['over_clustering'],
over_clustering=adata.obs["over_clustering"],
**self.classifier_dict,
)
out_column = (
Expand All @@ -90,6 +97,6 @@ def _predict(self, adata):

adata.obs[self.result_key] = predictions.predicted_labels[out_column]
if self.return_probabilities:
adata.obs[
self.result_key + "_probabilities"
] = predictions.probability_matrix.max(axis=1).values
adata.obs[self.result_key + "_probabilities"] = (
predictions.probability_matrix.max(axis=1).values
)
5 changes: 3 additions & 2 deletions popv/algorithms/_harmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def _compute_integration(self, adata):
adata.obsm["X_pca"],
adata.obs,
batch_key=self.batch_key,
use_gpu=settings.accelerator=="gpu",
use_gpu=settings.accelerator == "gpu",
)

def _predict(self, adata, result_key="popv_knn_on_harmony_prediction"):
Expand All @@ -89,6 +89,7 @@ def _predict(self, adata, result_key="popv_knn_on_harmony_prediction"):

if settings.cuml:
from cuml.neighbors import KNeighborsClassifier as cuKNeighbors

knn = cuKNeighbors(n_neighbors=self.classifier_dict["n_neighbors"])
else:
knn = make_pipeline(
Expand Down Expand Up @@ -117,7 +118,7 @@ def _compute_embedding(self, adata):
logging.info(
f'Saving UMAP of harmony results to adata.obs["{self.embedding_key}"]'
)
method = 'rapids' if settings.cuml else 'umap'
method = "rapids" if settings.cuml else "umap"
sc.pp.neighbors(adata, use_rep="X_pca_harmony", method=method)
adata.obsm[self.embedding_key] = sc.tl.umap(
adata, copy=True, method=method, **self.embedding_kwargs
Expand Down
Loading

0 comments on commit 4e1f2cd

Please sign in to comment.