Skip to content

Commit

Permalink
Add python autoformatting using ruff (#921)
Browse files Browse the repository at this point in the history
  • Loading branch information
DriesSchaumont authored Dec 5, 2024
1 parent 30eea21 commit 9e87b86
Show file tree
Hide file tree
Showing 201 changed files with 12,877 additions and 6,747 deletions.
19 changes: 19 additions & 0 deletions .github/workflows/viash-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,25 @@ concurrency:
cancel-in-progress: ${{ !contains(github.ref, 'main')}}

jobs:
linting:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ruff
- name: Run Ruff
run: ruff check --output-format=github .


# phase 1
list:
env:
Expand Down
7 changes: 7 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.8.1
hooks:
- id: ruff
- id: ruff-format
43 changes: 43 additions & 0 deletions ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Exclude a variety of commonly ignored directories.
exclude = [
".git",
".pyenv",
".pytest_cache",
".ruff_cache",
".venv",
".vscode",
"__pypackages__",
"_build",
"build",
"dist",
"node_modules",
"site-packages",
]

builtins = ["meta"]




[format]
# Like Black, use double quotes for strings.
quote-style = "double"

# Like Black, indent with spaces, rather than tabs.
indent-style = "space"

# Like Black, respect magic trailing commas.
skip-magic-trailing-comma = false

# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"

[lint.flake8-pytest-style]
fixture-parentheses = false
mark-parentheses = false

[lint]
ignore = [
# module level import not at top of file
"E402"
]
77 changes: 53 additions & 24 deletions src/annotate/celltypist/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@
"output_obs_predictions": "celltypist_pred",
"output_obs_probabilities": "celltypist_probability",
}
meta = {
}
meta = {}
## VIASH END

sys.path.append(meta["resources_dir"])
Expand All @@ -37,16 +36,20 @@

logger = setup_logger()


def check_celltypist_format(indata):
if np.abs(np.expm1(indata[0]).sum()-10000) > 1:
if np.abs(np.expm1(indata[0]).sum() - 10000) > 1:
return False
return True


def main(par):

if (not par["model"] and not par["reference"]) or (par["model"] and par["reference"]):
raise ValueError("Make sure to provide either 'model' or 'reference', but not both.")
if (not par["model"] and not par["reference"]) or (
par["model"] and par["reference"]
):
raise ValueError(
"Make sure to provide either 'model' or 'reference', but not both."
)

input_mudata = mu.read_h5mu(par["input"])
input_adata = input_mudata.mod[par["modality"]]
Expand All @@ -59,29 +62,53 @@ def main(par):
if par["model"]:
logger.info("Loading CellTypist model")
model = celltypist.models.Model.load(par["model"])
cross_check_genes(input_modality.var.index, model.features, min_gene_overlap=par["input_reference_gene_overlap"])
cross_check_genes(
input_modality.var.index,
model.features,
min_gene_overlap=par["input_reference_gene_overlap"],
)

elif par["reference"]:
reference_modality = mu.read_h5mu(par["reference"]).mod[par["modality"]]

# subset to HVG if required
if par["reference_var_input"]:
reference_modality = subset_vars(reference_modality, par["reference_var_input"])
reference_modality = subset_vars(
reference_modality, par["reference_var_input"]
)

# Set var names to the desired gene name format (gene symbol, ensembl id, etc.)
# Set var names to the desired gene name format (gene symbol, ensembl id, etc.)
# CellTypist requires query gene names to be in index
reference_modality = set_var_index(reference_modality, par["reference_var_gene_names"])
reference_modality = set_var_index(
reference_modality, par["reference_var_gene_names"]
)

# Ensure enough overlap between genes in query and reference
cross_check_genes(input_modality.var.index, reference_modality.var.index, min_gene_overlap=par["input_reference_gene_overlap"])
cross_check_genes(
input_modality.var.index,
reference_modality.var.index,
min_gene_overlap=par["input_reference_gene_overlap"],
)

input_matrix = input_modality.layers[par["input_layer"]] if par["input_layer"] else input_modality.X
reference_matrix = reference_modality.layers[par["reference_layer"]] if par["reference_layer"] else reference_modality.X
input_matrix = (
input_modality.layers[par["input_layer"]]
if par["input_layer"]
else input_modality.X
)
reference_matrix = (
reference_modality.layers[par["reference_layer"]]
if par["reference_layer"]
else reference_modality.X
)

if not check_celltypist_format(input_matrix):
logger.warning("Input data is not in the reccommended format for CellTypist.")
logger.warning(
"Input data is not in the reccommended format for CellTypist."
)
if not check_celltypist_format(reference_matrix):
logger.warning("Reference data is not in the reccommended format for CellTypist.")
logger.warning(
"Reference data is not in the reccommended format for CellTypist."
)

labels = reference_modality.obs[par["reference_obs_target"]]

Expand All @@ -94,21 +121,23 @@ def main(par):
max_iter=par["max_iter"],
use_SGD=par["use_SGD"],
feature_selection=par["feature_selection"],
check_expression=par["check_expression"]
)
check_expression=par["check_expression"],
)

logger.info("Predicting CellTypist annotations")
predictions = celltypist.annotate(
input_modality,
model,
majority_voting=par["majority_voting"]
)
input_adata.obs[par["output_obs_predictions"]] = predictions.predicted_labels["predicted_labels"]
input_adata.obs[par["output_obs_probability"]] = predictions.probability_matrix.max(axis=1).values
input_modality, model, majority_voting=par["majority_voting"]
)
input_adata.obs[par["output_obs_predictions"]] = predictions.predicted_labels[
"predicted_labels"
]
input_adata.obs[par["output_obs_probability"]] = predictions.probability_matrix.max(
axis=1
).values

# copy observations back to input data (with full set of features)
input_mudata.write_h5mu(par["output"], compression=par["output_compression"])


if __name__ == '__main__':
if __name__ == "__main__":
main(par)
Loading

0 comments on commit 9e87b86

Please sign in to comment.