Skip to content

Commit

Permalink
Merge pull request #48 from scverse-bot/template-update-YosefLab-PopV…
Browse files Browse the repository at this point in the history
…-v0.4.0

Update template to v0.4.0
  • Loading branch information
canergen authored Dec 13, 2024
2 parents 9d8c67e + 71f8e02 commit bc8b455
Showing 26 changed files with 510 additions and 150 deletions.
7 changes: 4 additions & 3 deletions .cruft.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"template": "https://github.com/scverse/cookiecutter-scverse",
"commit": "8e96abb5c3e2d5078c44713958da672711cf2a48",
"checkout": null,
"commit": "87a407a65408d75a949c0b54b19fd287475a56f8",
"checkout": "v0.4.0",
"context": {
"cookiecutter": {
"project_name": "PopV",
@@ -13,7 +13,8 @@
"project_repo": "https://github.com/YosefLab/PopV.git",
"license": "MIT License",
"_copy_without_render": [
".github/workflows/**.yaml",
".github/workflows/build.yaml",
".github/workflows/test.yaml",
"docs/_templates/autosummary/**.rst"
],
"_render_devdocs": false,
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@ __pycache__/
/.pytest_cache/
/.cache/
/data/
/node_modules/

# docs
/docs/generated/
34 changes: 34 additions & 0 deletions .pre-commit-config.yaml.rej
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
diff a/.pre-commit-config.yaml b/.pre-commit-config.yaml (rejected hunks)
@@ -6,29 +6,18 @@ default_stages:
- push
minimum_pre_commit_version: 2.16.0
repos:
- - repo: https://github.com/psf/black
- rev: "24.4.2"
- hooks:
- - id: black
- - repo: https://github.com/asottile/blacken-docs
- rev: 1.16.0
- hooks:
- - id: blacken-docs
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v4.0.0-alpha.8
hooks:
- id: prettier
- # Newer versions of node don't work on systems that have an older version of GLIBC
- # (in particular Ubuntu 18.04 and Centos 7)
- # EOL of Centos 7 is in 2024-06, we can probably get rid of this then.
- # See https://github.com/scverse/cookiecutter-scverse/issues/143 and
- # https://github.com/jupyterlab/jupyterlab/issues/12675
- language_version: "17.9.1"
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.4
hooks:
- id: ruff
+ types_or: [python, pyi, jupyter]
args: [--fix, --exit-non-zero-on-fix]
+ - id: ruff-format
+ types_or: [python, pyi, jupyter]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
10 changes: 10 additions & 0 deletions README.md.rej
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
diff a/README.md b/README.md (rejected hunks)
@@ -17,7 +17,7 @@ Please refer to the [documentation][link-docs]. In particular, the

## Installation

-You need to have Python 3.9 or newer installed on your system. If you don't have
+You need to have Python 3.10 or newer installed on your system. If you don't have
Python installed, we recommend installing [Mambaforge](https://github.com/conda-forge/miniforge#mambaforge).

There are several alternative options to install PopV:
10 changes: 5 additions & 5 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Configuration file for the Sphinx documentation builder.
#

# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
@@ -45,10 +45,10 @@

html_context = {
"display_github": True, # Integrate GitHub
"github_user": "cane11", # Username
"github_repo": project_name, # Repo name
"github_version": "main", # Version
"conf_py_path": "/docs/", # Path in the checkout to the docs root
"github_user": "cane11",
"github_repo": "https://github.com/YosefLab/PopV.git",
"github_version": "main",
"conf_py_path": "/docs/",
}

# -- General configuration ---------------------------------------------------
5 changes: 3 additions & 2 deletions docs/contributing.md
Original file line number Diff line number Diff line change
@@ -51,7 +51,7 @@ and [prettier][prettier-editors].
## Writing tests

```{note}
Remember to first install the package with `pip install '-e[dev,test]'`
Remember to first install the package with `pip install -e '.[dev,test]'`
```

This package uses the [pytest][] for automated testing. Please [write tests][scanpy-test-docs] for every function added
@@ -93,7 +93,7 @@ Before making a release, you need to update the version number in the `pyproject
> Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format.
Once you are done, commit and push your changes and navigate to the "Releases" page of this project on GitHub.
Specify `vX.X.X` as a tag name and create a release. For more information, see [managing Github releases][]. This will automatically create a git tag and trigger a Github workflow that creates a release on PyPI.
Specify `vX.X.X` as a tag name and create a release. For more information, see [managing GitHub releases][]. This will automatically create a git tag and trigger a Github workflow that creates a release on PyPI.

## Writing documentation

@@ -157,3 +157,4 @@ open _build/html/index.html
[numpydoc]: https://numpydoc.readthedocs.io/en/latest/format.html
[sphinx autodoc typehints]: https://github.com/tox-dev/sphinx-autodoc-typehints
[pypi]: https://pypi.org/
[managing GitHub releases]: https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository
4 changes: 3 additions & 1 deletion popv/_settings.py
Original file line number Diff line number Diff line change
@@ -101,7 +101,9 @@ def verbosity(self, level: str | int):
console = Console(force_terminal=True)
if console.is_jupyter is True:
console.is_jupyter = False
ch = RichHandler(level=level, show_path=False, console=console, show_time=False)
ch = RichHandler(
level=level, show_path=False, console=console, show_time=False
)
formatter = logging.Formatter("%(message)s")
ch.setFormatter(formatter)
popv_logger.addHandler(ch)
22 changes: 17 additions & 5 deletions popv/_utils.py
Original file line number Diff line number Diff line change
@@ -49,7 +49,9 @@ def subsample_dataset(
if labels_counts[label] < n_samples_per_label:
sample_idx.append(label_locs)
else:
label_subset = np.random.choice(label_locs, n_samples_per_label, replace=False)
label_subset = np.random.choice(
label_locs, n_samples_per_label, replace=False
)
sample_idx.append(label_subset)
sample_idx = np.concatenate(sample_idx)
return adata.obs_names[sample_idx]
@@ -79,7 +81,9 @@ def check_genes_is_subset(ref_genes, query_genes):
logging.info("All ref genes are in query dataset. Can use pretrained models.")
is_subset = True
else:
logging.info("Not all reference genes are in query dataset. Set 'prediction_mode' to 'retrain'.")
logging.info(
"Not all reference genes are in query dataset. Set 'prediction_mode' to 'retrain'."
)
is_subset = False
return is_subset

@@ -95,7 +99,9 @@ def make_batch_covariate(adata, batch_keys, new_batch_key):
batch_keys
List of keys in adat.obs corresponding to batches
"""
adata.obs[new_batch_key] = adata.obs[batch_keys].astype(str).sum(1).astype("category")
adata.obs[new_batch_key] = (
adata.obs[batch_keys].astype(str).sum(1).astype("category")
)


def calculate_depths(g):
@@ -142,7 +148,9 @@ def make_ontology_dag(obofile, lowercase=False):
"""
co = obonet.read_obo(obofile, encoding="utf-8")
id_to_name = {id_: data.get("name") for id_, data in co.nodes(data=True)}
name_to_id = {data["name"]: id_ for id_, data in co.nodes(data=True) if ("name" in data)}
name_to_id = {
data["name"]: id_ for id_, data in co.nodes(data=True) if ("name" in data)
}

# get all node ids that are celltypes (start with CL)
cl_ids = {id_: True for _, id_ in name_to_id.items() if id_.startswith("CL:")}
@@ -160,7 +168,11 @@ def make_ontology_dag(obofile, lowercase=False):
for node in co.nodes():
if node in cl_ids:
for child, parent, key in co.out_edges(node, keys=True):
if child.startswith("CL:") and parent.startswith("CL:") and key == "is_a":
if (
child.startswith("CL:")
and parent.startswith("CL:")
and key == "is_a"
):
childname = id_to_name[child]
parentname = id_to_name[parent]
g.add_edge(childname, parentname, key=key)
16 changes: 12 additions & 4 deletions popv/algorithms/_bbknn.py
Original file line number Diff line number Diff line change
@@ -86,7 +86,9 @@ def predict(self, adata):
]
)
if smallest_neighbor_graph < 15:
logging.warning(f"BBKNN found only {smallest_neighbor_graph} neighbors. Reduced neighbors in KNN.")
logging.warning(
f"BBKNN found only {smallest_neighbor_graph} neighbors. Reduced neighbors in KNN."
)
self.classifier_dict["n_neighbors"] = smallest_neighbor_graph

knn = KNeighborsClassifier(metric="precomputed", **self.classifier_dict)
@@ -95,9 +97,15 @@ def predict(self, adata):
adata.obs[self.result_key] = knn.predict(test_distances)

if adata.uns["_return_probabilities"]:
adata.obs[self.result_key + "_probabilities"] = np.max(knn.predict_proba(test_distances), axis=1)
adata.obs[self.result_key + "_probabilities"] = np.max(
knn.predict_proba(test_distances), axis=1
)

def compute_embedding(self, adata):
if adata.uns["_compute_embedding"]:
logging.info(f'Saving UMAP of bbknn results to adata.obs["{self.embedding_key}"]')
adata.obsm[self.embedding_key] = sc.tl.umap(adata, copy=True, **self.embedding_dict).obsm["X_umap"]
logging.info(
f'Saving UMAP of bbknn results to adata.obs["{self.embedding_key}"]'
)
adata.obsm[self.embedding_key] = sc.tl.umap(
adata, copy=True, **self.embedding_dict
).obsm["X_umap"]
8 changes: 6 additions & 2 deletions popv/algorithms/_celltypist.py
Original file line number Diff line number Diff line change
@@ -63,12 +63,16 @@ def predict(self, adata):
**self.classifier_dict,
)
out_column = (
"majority_voting" if "majority_voting" in predictions.predicted_labels.columns else "predicted_labels"
"majority_voting"
if "majority_voting" in predictions.predicted_labels.columns
else "predicted_labels"
)

adata.obs[self.result_key] = predictions.predicted_labels[out_column]
if adata.uns["_return_probabilities"]:
adata.obs[self.result_key + "_probabilities"] = predictions.probability_matrix.max(axis=1).values
adata.obs[self.result_key + "_probabilities"] = (
predictions.probability_matrix.max(axis=1).values
)

def compute_embedding(self, adata):
pass
16 changes: 12 additions & 4 deletions popv/algorithms/_harmony.py
Original file line number Diff line number Diff line change
@@ -61,7 +61,9 @@ def __init__(
def compute_integration(self, adata):
logging.info("Integrating data with harmony")

adata.obsm["X_pca_harmony"] = harmonize(adata.obsm["X_pca"], adata.obs, batch_key=self.batch_key)
adata.obsm["X_pca_harmony"] = harmonize(
adata.obsm["X_pca"], adata.obs, batch_key=self.batch_key
)

def predict(self, adata, result_key="popv_knn_on_harmony_prediction"):
logging.info(f'Saving knn on harmony results to adata.obs["{result_key}"]')
@@ -75,7 +77,9 @@ def predict(self, adata, result_key="popv_knn_on_harmony_prediction"):
n_neighbors=self.classifier_dict["n_neighbors"],
parallel_batch_queries=True,
),
KNeighborsClassifier(metric="precomputed", weights=self.classifier_dict["weights"]),
KNeighborsClassifier(
metric="precomputed", weights=self.classifier_dict["weights"]
),
)

knn.fit(train_X, train_Y)
@@ -91,6 +95,10 @@ def predict(self, adata, result_key="popv_knn_on_harmony_prediction"):

def compute_embedding(self, adata):
if adata.uns["_compute_embedding"]:
logging.info(f'Saving UMAP of harmony results to adata.obs["{self.embedding_key}"]')
logging.info(
f'Saving UMAP of harmony results to adata.obs["{self.embedding_key}"]'
)
sc.pp.neighbors(adata, use_rep="X_pca_harmony")
adata.obsm[self.embedding_key] = sc.tl.umap(adata, copy=True, **self.embedding_dict).obsm["X_umap"]
adata.obsm[self.embedding_key] = sc.tl.umap(
adata, copy=True, **self.embedding_dict
).obsm["X_umap"]
38 changes: 27 additions & 11 deletions popv/algorithms/_onclass.py
Original file line number Diff line number Diff line change
@@ -106,10 +106,12 @@ def compute_integration(self, adata):
pass

def predict(self, adata):
logging.info(f'Computing Onclass. Storing prediction in adata.obs["{self.result_key}"]')
adata.obs.loc[adata.obs["_dataset"] == "query", self.cell_ontology_obs_key] = adata.uns[
"unknown_celltype_label"
]
logging.info(
f'Computing Onclass. Storing prediction in adata.obs["{self.result_key}"]'
)
adata.obs.loc[adata.obs["_dataset"] == "query", self.cell_ontology_obs_key] = (
adata.uns["unknown_celltype_label"]
)

train_idx = adata.obs["_dataset"] == "ref"

@@ -127,10 +129,14 @@ def predict(self, adata):
cl_ontology_file = adata.uns["_cl_ontology_file"]
nlp_emb_file = adata.uns["_nlp_emb_file"]

celltype_dict, clid_2_name = self.make_celltype_to_cell_ontology_id_dict(cl_obo_file)
celltype_dict, clid_2_name = self.make_celltype_to_cell_ontology_id_dict(
cl_obo_file
)
self.make_cell_ontology_id(adata, celltype_dict, self.cell_ontology_obs_key)

train_model = OnClassModel(cell_type_nlp_emb_file=nlp_emb_file, cell_type_network_file=cl_ontology_file)
train_model = OnClassModel(
cell_type_nlp_emb_file=nlp_emb_file, cell_type_network_file=cl_ontology_file
)

if adata.uns["_save_path_trained_models"] is not None:
model_path = adata.uns["_save_path_trained_models"] + "/OnClass"
@@ -175,13 +181,17 @@ def predict(self, adata):
)

if adata.uns["_prediction_mode"] == "fast":
onclass_seen = np.argmax(train_model.model.predict(corr_test_feature), axis=1)
onclass_seen = np.argmax(
train_model.model.predict(corr_test_feature), axis=1
)
pred_label = [train_model.i2co[ind] for ind in onclass_seen]
pred_label_str = [clid_2_name[ind] for ind in pred_label]
adata.obs[self.result_key] = pred_label_str
adata.obs[self.seen_result_key] = pred_label_str
else:
onclass_pred = train_model.Predict(corr_test_feature, use_normalize=False, refine=True, unseen_ratio=-1.0)
onclass_pred = train_model.Predict(
corr_test_feature, use_normalize=False, refine=True, unseen_ratio=-1.0
)
pred_label = [train_model.i2co[ind] for ind in onclass_pred[2]]
pred_label_str = [clid_2_name[ind] for ind in pred_label]
adata.obs[self.result_key] = pred_label_str
@@ -192,9 +202,15 @@ def predict(self, adata):
adata.obs[self.seen_result_key] = pred_label_str

if adata.uns["_return_probabilities"]:
adata.obs[self.result_key + "_probabilities"] = np.max(onclass_pred[1], axis=1) / onclass_pred[1].sum(1)
adata.obsm["onclass_probabilities"] = onclass_pred[1] / onclass_pred[1].sum(1, keepdims=True)
adata.obs["popv_onclass_seen" + "_probabilities"] = np.max(onclass_pred[0], axis=1)
adata.obs[self.result_key + "_probabilities"] = np.max(
onclass_pred[1], axis=1
) / onclass_pred[1].sum(1)
adata.obsm["onclass_probabilities"] = onclass_pred[1] / onclass_pred[
1
].sum(1, keepdims=True)
adata.obs["popv_onclass_seen" + "_probabilities"] = np.max(
onclass_pred[0], axis=1
)

def compute_embedding(self, adata):
return None
18 changes: 14 additions & 4 deletions popv/algorithms/_rf.py
Original file line number Diff line number Diff line change
@@ -48,13 +48,19 @@ def compute_integration(self, adata):
pass

def predict(self, adata):
logging.info(f'Computing random forest classifier. Storing prediction in adata.obs["{self.result_key}"]')
logging.info(
f'Computing random forest classifier. Storing prediction in adata.obs["{self.result_key}"]'
)

test_x = adata.layers[self.layers_key] if self.layers_key else adata.X

if adata.uns["_prediction_mode"] == "retrain":
train_idx = adata.obs["_ref_subsample"]
train_x = adata[train_idx].layers[self.layers_key] if self.layers_key else adata[train_idx].X
train_x = (
adata[train_idx].layers[self.layers_key]
if self.layers_key
else adata[train_idx].X
)
train_y = adata[train_idx].obs[self.labels_key].to_numpy()
rf = RandomForestClassifier(**self.classifier_dict)
rf.fit(train_x, train_y)
@@ -67,10 +73,14 @@ def predict(self, adata):
),
)
else:
rf = pickle.load(open(adata.uns["_save_path_trained_models"] + "rf_classifier.pkl", "rb"))
rf = pickle.load(
open(adata.uns["_save_path_trained_models"] + "rf_classifier.pkl", "rb")
)
adata.obs[self.result_key] = rf.predict(test_x)
if adata.uns["_return_probabilities"]:
adata.obs[self.result_key + "_probabilities"] = np.max(rf.predict_proba(test_x), axis=1)
adata.obs[self.result_key + "_probabilities"] = np.max(
rf.predict_proba(test_x), axis=1
)

def compute_embedding(self, adata):
pass
Loading

0 comments on commit bc8b455

Please sign in to comment.