Add python autoformatting using ruff (#921)

openpipelines-bio · Dec 5, 2024 · 9e87b86 · 9e87b86
1 parent 30eea21
commit 9e87b86
Show file tree

Hide file tree

Showing 201 changed files with 12,877 additions and 6,747 deletions.
diff --git a/.github/workflows/viash-test.yml b/.github/workflows/viash-test.yml
@@ -10,6 +10,25 @@ concurrency:
   cancel-in-progress: ${{ !contains(github.ref, 'main')}}
 
 jobs:
+  linting:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install ruff
+      - name: Run Ruff
+        run: ruff check --output-format=github .
+
+
   # phase 1
   list:
     env:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,7 @@
+repos:
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.8.1
+  hooks:
+    - id: ruff
+    - id: ruff-format
diff --git a/ruff.toml b/ruff.toml
@@ -0,0 +1,43 @@
+# Exclude a variety of commonly ignored directories.
+exclude = [
+    ".git",
+    ".pyenv",
+    ".pytest_cache",
+    ".ruff_cache",
+    ".venv",
+    ".vscode",
+    "__pypackages__",
+    "_build",
+    "build",
+    "dist",
+    "node_modules",
+    "site-packages",
+]
+
+builtins = ["meta"]
+
+
+
+
+[format]
+# Like Black, use double quotes for strings.
+quote-style = "double"
+
+# Like Black, indent with spaces, rather than tabs.
+indent-style = "space"
+
+# Like Black, respect magic trailing commas.
+skip-magic-trailing-comma = false
+
+# Like Black, automatically detect the appropriate line ending.
+line-ending = "auto"
+
+[lint.flake8-pytest-style]
+fixture-parentheses = false
+mark-parentheses = false
+
+[lint]
+ignore = [
+  # module level import not at top of file
+  "E402"
+]
diff --git a/src/annotate/celltypist/script.py b/src/annotate/celltypist/script.py
@@ -25,8 +25,7 @@
     "output_obs_predictions": "celltypist_pred",
     "output_obs_probabilities": "celltypist_probability",
 }
-meta = {
-}
+meta = {}
 ## VIASH END
 
 sys.path.append(meta["resources_dir"])
@@ -37,16 +36,20 @@
 
 logger = setup_logger()
 
+
 def check_celltypist_format(indata):
-    if np.abs(np.expm1(indata[0]).sum()-10000) > 1:
+    if np.abs(np.expm1(indata[0]).sum() - 10000) > 1:
         return False
     return True
 
 
 def main(par):
-
-    if (not par["model"] and not par["reference"]) or (par["model"] and par["reference"]):
-        raise ValueError("Make sure to provide either 'model' or 'reference', but not both.")
+    if (not par["model"] and not par["reference"]) or (
+        par["model"] and par["reference"]
+    ):
+        raise ValueError(
+            "Make sure to provide either 'model' or 'reference', but not both."
+        )
 
     input_mudata = mu.read_h5mu(par["input"])
     input_adata = input_mudata.mod[par["modality"]]
@@ -59,29 +62,53 @@ def main(par):
     if par["model"]:
         logger.info("Loading CellTypist model")
         model = celltypist.models.Model.load(par["model"])
-        cross_check_genes(input_modality.var.index, model.features, min_gene_overlap=par["input_reference_gene_overlap"])
+        cross_check_genes(
+            input_modality.var.index,
+            model.features,
+            min_gene_overlap=par["input_reference_gene_overlap"],
+        )
 
     elif par["reference"]:
         reference_modality = mu.read_h5mu(par["reference"]).mod[par["modality"]]
 
         # subset to HVG if required
         if par["reference_var_input"]:
-            reference_modality = subset_vars(reference_modality, par["reference_var_input"])  
+            reference_modality = subset_vars(
+                reference_modality, par["reference_var_input"]
+            )
 
-        # Set var names to the desired gene name format (gene symbol, ensembl id, etc.)  
+        # Set var names to the desired gene name format (gene symbol, ensembl id, etc.)
         # CellTypist requires query gene names to be in index
-        reference_modality = set_var_index(reference_modality, par["reference_var_gene_names"])
+        reference_modality = set_var_index(
+            reference_modality, par["reference_var_gene_names"]
+        )
 
         # Ensure enough overlap between genes in query and reference
-        cross_check_genes(input_modality.var.index, reference_modality.var.index, min_gene_overlap=par["input_reference_gene_overlap"])
+        cross_check_genes(
+            input_modality.var.index,
+            reference_modality.var.index,
+            min_gene_overlap=par["input_reference_gene_overlap"],
+        )
 
-        input_matrix = input_modality.layers[par["input_layer"]] if par["input_layer"] else input_modality.X
-        reference_matrix = reference_modality.layers[par["reference_layer"]] if par["reference_layer"] else reference_modality.X
+        input_matrix = (
+            input_modality.layers[par["input_layer"]]
+            if par["input_layer"]
+            else input_modality.X
+        )
+        reference_matrix = (
+            reference_modality.layers[par["reference_layer"]]
+            if par["reference_layer"]
+            else reference_modality.X
+        )
 
         if not check_celltypist_format(input_matrix):
-            logger.warning("Input data is not in the reccommended format for CellTypist.")
+            logger.warning(
+                "Input data is not in the reccommended format for CellTypist."
+            )
         if not check_celltypist_format(reference_matrix):
-            logger.warning("Reference data is not in the reccommended format for CellTypist.")
+            logger.warning(
+                "Reference data is not in the reccommended format for CellTypist."
+            )
 
         labels = reference_modality.obs[par["reference_obs_target"]]
 
@@ -94,21 +121,23 @@ def main(par):
             max_iter=par["max_iter"],
             use_SGD=par["use_SGD"],
             feature_selection=par["feature_selection"],
-            check_expression=par["check_expression"]
-            )
+            check_expression=par["check_expression"],
+        )
 
     logger.info("Predicting CellTypist annotations")
     predictions = celltypist.annotate(
-        input_modality,
-        model,
-        majority_voting=par["majority_voting"]
-        )
-    input_adata.obs[par["output_obs_predictions"]] = predictions.predicted_labels["predicted_labels"]
-    input_adata.obs[par["output_obs_probability"]] = predictions.probability_matrix.max(axis=1).values
+        input_modality, model, majority_voting=par["majority_voting"]
+    )
+    input_adata.obs[par["output_obs_predictions"]] = predictions.predicted_labels[
+        "predicted_labels"
+    ]
+    input_adata.obs[par["output_obs_probability"]] = predictions.probability_matrix.max(
+        axis=1
+    ).values
 
     # copy observations back to input data (with full set of features)
     input_mudata.write_h5mu(par["output"], compression=par["output_compression"])
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main(par)