Skip to content

Commit

Permalink
Merge pull request #48 from scverse-bot/template-update-YosefLab-PopV…
Browse files Browse the repository at this point in the history
…-v0.4.0

Update template to v0.4.0
  • Loading branch information
canergen authored Dec 13, 2024
2 parents 4d17385 + 6c1749a commit 5a89ed6
Show file tree
Hide file tree
Showing 26 changed files with 510 additions and 150 deletions.
7 changes: 4 additions & 3 deletions .cruft.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"template": "https://github.com/scverse/cookiecutter-scverse",
"commit": "8e96abb5c3e2d5078c44713958da672711cf2a48",
"checkout": null,
"commit": "87a407a65408d75a949c0b54b19fd287475a56f8",
"checkout": "v0.4.0",
"context": {
"cookiecutter": {
"project_name": "PopV",
Expand All @@ -13,7 +13,8 @@
"project_repo": "https://github.com/YosefLab/PopV.git",
"license": "MIT License",
"_copy_without_render": [
".github/workflows/**.yaml",
".github/workflows/build.yaml",
".github/workflows/test.yaml",
"docs/_templates/autosummary/**.rst"
],
"_render_devdocs": false,
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ __pycache__/
/.pytest_cache/
/.cache/
/data/
/node_modules/

# docs
/docs/generated/
Expand Down
34 changes: 34 additions & 0 deletions .pre-commit-config.yaml.rej
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
diff a/.pre-commit-config.yaml b/.pre-commit-config.yaml (rejected hunks)
@@ -6,29 +6,18 @@ default_stages:
- push
minimum_pre_commit_version: 2.16.0
repos:
- - repo: https://github.com/psf/black
- rev: "24.4.2"
- hooks:
- - id: black
- - repo: https://github.com/asottile/blacken-docs
- rev: 1.16.0
- hooks:
- - id: blacken-docs
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v4.0.0-alpha.8
hooks:
- id: prettier
- # Newer versions of node don't work on systems that have an older version of GLIBC
- # (in particular Ubuntu 18.04 and Centos 7)
- # EOL of Centos 7 is in 2024-06, we can probably get rid of this then.
- # See https://github.com/scverse/cookiecutter-scverse/issues/143 and
- # https://github.com/jupyterlab/jupyterlab/issues/12675
- language_version: "17.9.1"
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.4
hooks:
- id: ruff
+ types_or: [python, pyi, jupyter]
args: [--fix, --exit-non-zero-on-fix]
+ - id: ruff-format
+ types_or: [python, pyi, jupyter]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
10 changes: 10 additions & 0 deletions README.md.rej
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
diff a/README.md b/README.md (rejected hunks)
@@ -17,7 +17,7 @@ Please refer to the [documentation][link-docs]. In particular, the

## Installation

-You need to have Python 3.9 or newer installed on your system. If you don't have
+You need to have Python 3.10 or newer installed on your system. If you don't have
Python installed, we recommend installing [Mambaforge](https://github.com/conda-forge/miniforge#mambaforge).

There are several alternative options to install PopV:
10 changes: 5 additions & 5 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Configuration file for the Sphinx documentation builder.
#

# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
Expand Down Expand Up @@ -45,10 +45,10 @@

html_context = {
"display_github": True, # Integrate GitHub
"github_user": "cane11", # Username
"github_repo": project_name, # Repo name
"github_version": "main", # Version
"conf_py_path": "/docs/", # Path in the checkout to the docs root
"github_user": "cane11",
"github_repo": "https://github.com/YosefLab/PopV.git",
"github_version": "main",
"conf_py_path": "/docs/",
}

# -- General configuration ---------------------------------------------------
Expand Down
5 changes: 3 additions & 2 deletions docs/contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ and [prettier][prettier-editors].
## Writing tests

```{note}
Remember to first install the package with `pip install '-e[dev,test]'`
Remember to first install the package with `pip install -e '.[dev,test]'`
```

This package uses the [pytest][] for automated testing. Please [write tests][scanpy-test-docs] for every function added
Expand Down Expand Up @@ -93,7 +93,7 @@ Before making a release, you need to update the version number in the `pyproject
> Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format.
Once you are done, commit and push your changes and navigate to the "Releases" page of this project on GitHub.
Specify `vX.X.X` as a tag name and create a release. For more information, see [managing Github releases][]. This will automatically create a git tag and trigger a Github workflow that creates a release on PyPI.
Specify `vX.X.X` as a tag name and create a release. For more information, see [managing GitHub releases][]. This will automatically create a git tag and trigger a Github workflow that creates a release on PyPI.

## Writing documentation

Expand Down Expand Up @@ -157,3 +157,4 @@ open _build/html/index.html
[numpydoc]: https://numpydoc.readthedocs.io/en/latest/format.html
[sphinx autodoc typehints]: https://github.com/tox-dev/sphinx-autodoc-typehints
[pypi]: https://pypi.org/
[managing GitHub releases]: https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository
4 changes: 3 additions & 1 deletion popv/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ def verbosity(self, level: str | int):
console = Console(force_terminal=True)
if console.is_jupyter is True:
console.is_jupyter = False
ch = RichHandler(level=level, show_path=False, console=console, show_time=False)
ch = RichHandler(
level=level, show_path=False, console=console, show_time=False
)
formatter = logging.Formatter("%(message)s")
ch.setFormatter(formatter)
popv_logger.addHandler(ch)
Expand Down
22 changes: 17 additions & 5 deletions popv/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ def subsample_dataset(
if labels_counts[label] < n_samples_per_label:
sample_idx.append(label_locs)
else:
label_subset = np.random.choice(label_locs, n_samples_per_label, replace=False)
label_subset = np.random.choice(
label_locs, n_samples_per_label, replace=False
)
sample_idx.append(label_subset)
sample_idx = np.concatenate(sample_idx)
return adata.obs_names[sample_idx]
Expand Down Expand Up @@ -79,7 +81,9 @@ def check_genes_is_subset(ref_genes, query_genes):
logging.info("All ref genes are in query dataset. Can use pretrained models.")
is_subset = True
else:
logging.info("Not all reference genes are in query dataset. Set 'prediction_mode' to 'retrain'.")
logging.info(
"Not all reference genes are in query dataset. Set 'prediction_mode' to 'retrain'."
)
is_subset = False
return is_subset

Expand All @@ -95,7 +99,9 @@ def make_batch_covariate(adata, batch_keys, new_batch_key):
batch_keys
List of keys in adat.obs corresponding to batches
"""
adata.obs[new_batch_key] = adata.obs[batch_keys].astype(str).sum(1).astype("category")
adata.obs[new_batch_key] = (
adata.obs[batch_keys].astype(str).sum(1).astype("category")
)


def calculate_depths(g):
Expand Down Expand Up @@ -142,7 +148,9 @@ def make_ontology_dag(obofile, lowercase=False):
"""
co = obonet.read_obo(obofile, encoding="utf-8")
id_to_name = {id_: data.get("name") for id_, data in co.nodes(data=True)}
name_to_id = {data["name"]: id_ for id_, data in co.nodes(data=True) if ("name" in data)}
name_to_id = {
data["name"]: id_ for id_, data in co.nodes(data=True) if ("name" in data)
}

# get all node ids that are celltypes (start with CL)
cl_ids = {id_: True for _, id_ in name_to_id.items() if id_.startswith("CL:")}
Expand All @@ -160,7 +168,11 @@ def make_ontology_dag(obofile, lowercase=False):
for node in co.nodes():
if node in cl_ids:
for child, parent, key in co.out_edges(node, keys=True):
if child.startswith("CL:") and parent.startswith("CL:") and key == "is_a":
if (
child.startswith("CL:")
and parent.startswith("CL:")
and key == "is_a"
):
childname = id_to_name[child]
parentname = id_to_name[parent]
g.add_edge(childname, parentname, key=key)
Expand Down
16 changes: 12 additions & 4 deletions popv/algorithms/_bbknn.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ def predict(self, adata):
]
)
if smallest_neighbor_graph < 15:
logging.warning(f"BBKNN found only {smallest_neighbor_graph} neighbors. Reduced neighbors in KNN.")
logging.warning(
f"BBKNN found only {smallest_neighbor_graph} neighbors. Reduced neighbors in KNN."
)
self.classifier_dict["n_neighbors"] = smallest_neighbor_graph

knn = KNeighborsClassifier(metric="precomputed", **self.classifier_dict)
Expand All @@ -95,9 +97,15 @@ def predict(self, adata):
adata.obs[self.result_key] = knn.predict(test_distances)

if adata.uns["_return_probabilities"]:
adata.obs[self.result_key + "_probabilities"] = np.max(knn.predict_proba(test_distances), axis=1)
adata.obs[self.result_key + "_probabilities"] = np.max(
knn.predict_proba(test_distances), axis=1
)

def compute_embedding(self, adata):
if adata.uns["_compute_embedding"]:
logging.info(f'Saving UMAP of bbknn results to adata.obs["{self.embedding_key}"]')
adata.obsm[self.embedding_key] = sc.tl.umap(adata, copy=True, **self.embedding_dict).obsm["X_umap"]
logging.info(
f'Saving UMAP of bbknn results to adata.obs["{self.embedding_key}"]'
)
adata.obsm[self.embedding_key] = sc.tl.umap(
adata, copy=True, **self.embedding_dict
).obsm["X_umap"]
8 changes: 6 additions & 2 deletions popv/algorithms/_celltypist.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,16 @@ def predict(self, adata):
**self.classifier_dict,
)
out_column = (
"majority_voting" if "majority_voting" in predictions.predicted_labels.columns else "predicted_labels"
"majority_voting"
if "majority_voting" in predictions.predicted_labels.columns
else "predicted_labels"
)

adata.obs[self.result_key] = predictions.predicted_labels[out_column]
if adata.uns["_return_probabilities"]:
adata.obs[self.result_key + "_probabilities"] = predictions.probability_matrix.max(axis=1).values
adata.obs[self.result_key + "_probabilities"] = (
predictions.probability_matrix.max(axis=1).values
)

def compute_embedding(self, adata):
pass
16 changes: 12 additions & 4 deletions popv/algorithms/_harmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ def __init__(
def compute_integration(self, adata):
logging.info("Integrating data with harmony")

adata.obsm["X_pca_harmony"] = harmonize(adata.obsm["X_pca"], adata.obs, batch_key=self.batch_key)
adata.obsm["X_pca_harmony"] = harmonize(
adata.obsm["X_pca"], adata.obs, batch_key=self.batch_key
)

def predict(self, adata, result_key="popv_knn_on_harmony_prediction"):
logging.info(f'Saving knn on harmony results to adata.obs["{result_key}"]')
Expand All @@ -75,7 +77,9 @@ def predict(self, adata, result_key="popv_knn_on_harmony_prediction"):
n_neighbors=self.classifier_dict["n_neighbors"],
parallel_batch_queries=True,
),
KNeighborsClassifier(metric="precomputed", weights=self.classifier_dict["weights"]),
KNeighborsClassifier(
metric="precomputed", weights=self.classifier_dict["weights"]
),
)

knn.fit(train_X, train_Y)
Expand All @@ -91,6 +95,10 @@ def predict(self, adata, result_key="popv_knn_on_harmony_prediction"):

def compute_embedding(self, adata):
if adata.uns["_compute_embedding"]:
logging.info(f'Saving UMAP of harmony results to adata.obs["{self.embedding_key}"]')
logging.info(
f'Saving UMAP of harmony results to adata.obs["{self.embedding_key}"]'
)
sc.pp.neighbors(adata, use_rep="X_pca_harmony")
adata.obsm[self.embedding_key] = sc.tl.umap(adata, copy=True, **self.embedding_dict).obsm["X_umap"]
adata.obsm[self.embedding_key] = sc.tl.umap(
adata, copy=True, **self.embedding_dict
).obsm["X_umap"]
38 changes: 27 additions & 11 deletions popv/algorithms/_onclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,12 @@ def compute_integration(self, adata):
pass

def predict(self, adata):
logging.info(f'Computing Onclass. Storing prediction in adata.obs["{self.result_key}"]')
adata.obs.loc[adata.obs["_dataset"] == "query", self.cell_ontology_obs_key] = adata.uns[
"unknown_celltype_label"
]
logging.info(
f'Computing Onclass. Storing prediction in adata.obs["{self.result_key}"]'
)
adata.obs.loc[adata.obs["_dataset"] == "query", self.cell_ontology_obs_key] = (
adata.uns["unknown_celltype_label"]
)

train_idx = adata.obs["_dataset"] == "ref"

Expand All @@ -127,10 +129,14 @@ def predict(self, adata):
cl_ontology_file = adata.uns["_cl_ontology_file"]
nlp_emb_file = adata.uns["_nlp_emb_file"]

celltype_dict, clid_2_name = self.make_celltype_to_cell_ontology_id_dict(cl_obo_file)
celltype_dict, clid_2_name = self.make_celltype_to_cell_ontology_id_dict(
cl_obo_file
)
self.make_cell_ontology_id(adata, celltype_dict, self.cell_ontology_obs_key)

train_model = OnClassModel(cell_type_nlp_emb_file=nlp_emb_file, cell_type_network_file=cl_ontology_file)
train_model = OnClassModel(
cell_type_nlp_emb_file=nlp_emb_file, cell_type_network_file=cl_ontology_file
)

if adata.uns["_save_path_trained_models"] is not None:
model_path = adata.uns["_save_path_trained_models"] + "/OnClass"
Expand Down Expand Up @@ -175,13 +181,17 @@ def predict(self, adata):
)

if adata.uns["_prediction_mode"] == "fast":
onclass_seen = np.argmax(train_model.model.predict(corr_test_feature), axis=1)
onclass_seen = np.argmax(
train_model.model.predict(corr_test_feature), axis=1
)
pred_label = [train_model.i2co[ind] for ind in onclass_seen]
pred_label_str = [clid_2_name[ind] for ind in pred_label]
adata.obs[self.result_key] = pred_label_str
adata.obs[self.seen_result_key] = pred_label_str
else:
onclass_pred = train_model.Predict(corr_test_feature, use_normalize=False, refine=True, unseen_ratio=-1.0)
onclass_pred = train_model.Predict(
corr_test_feature, use_normalize=False, refine=True, unseen_ratio=-1.0
)
pred_label = [train_model.i2co[ind] for ind in onclass_pred[2]]
pred_label_str = [clid_2_name[ind] for ind in pred_label]
adata.obs[self.result_key] = pred_label_str
Expand All @@ -192,9 +202,15 @@ def predict(self, adata):
adata.obs[self.seen_result_key] = pred_label_str

if adata.uns["_return_probabilities"]:
adata.obs[self.result_key + "_probabilities"] = np.max(onclass_pred[1], axis=1) / onclass_pred[1].sum(1)
adata.obsm["onclass_probabilities"] = onclass_pred[1] / onclass_pred[1].sum(1, keepdims=True)
adata.obs["popv_onclass_seen" + "_probabilities"] = np.max(onclass_pred[0], axis=1)
adata.obs[self.result_key + "_probabilities"] = np.max(
onclass_pred[1], axis=1
) / onclass_pred[1].sum(1)
adata.obsm["onclass_probabilities"] = onclass_pred[1] / onclass_pred[
1
].sum(1, keepdims=True)
adata.obs["popv_onclass_seen" + "_probabilities"] = np.max(
onclass_pred[0], axis=1
)

def compute_embedding(self, adata):
return None
18 changes: 14 additions & 4 deletions popv/algorithms/_rf.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,19 @@ def compute_integration(self, adata):
pass

def predict(self, adata):
logging.info(f'Computing random forest classifier. Storing prediction in adata.obs["{self.result_key}"]')
logging.info(
f'Computing random forest classifier. Storing prediction in adata.obs["{self.result_key}"]'
)

test_x = adata.layers[self.layers_key] if self.layers_key else adata.X

if adata.uns["_prediction_mode"] == "retrain":
train_idx = adata.obs["_ref_subsample"]
train_x = adata[train_idx].layers[self.layers_key] if self.layers_key else adata[train_idx].X
train_x = (
adata[train_idx].layers[self.layers_key]
if self.layers_key
else adata[train_idx].X
)
train_y = adata[train_idx].obs[self.labels_key].to_numpy()
rf = RandomForestClassifier(**self.classifier_dict)
rf.fit(train_x, train_y)
Expand All @@ -67,10 +73,14 @@ def predict(self, adata):
),
)
else:
rf = pickle.load(open(adata.uns["_save_path_trained_models"] + "rf_classifier.pkl", "rb"))
rf = pickle.load(
open(adata.uns["_save_path_trained_models"] + "rf_classifier.pkl", "rb")
)
adata.obs[self.result_key] = rf.predict(test_x)
if adata.uns["_return_probabilities"]:
adata.obs[self.result_key + "_probabilities"] = np.max(rf.predict_proba(test_x), axis=1)
adata.obs[self.result_key + "_probabilities"] = np.max(
rf.predict_proba(test_x), axis=1
)

def compute_embedding(self, adata):
pass
Loading

0 comments on commit 5a89ed6

Please sign in to comment.