-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: Vladimir Shitov <[email protected]>
- Loading branch information
Showing
6 changed files
with
449 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
name: Sarah Ouologuem | ||
info: | ||
role: Contributor | ||
links: | ||
github: SarahOuologuem | ||
orcid: 0009-0005-3398-1700 | ||
organizations: | ||
- name: Helmholtz Munich | ||
href: https://www.helmholtz-munich.de | ||
role: Student Assistant |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
name: lsi | ||
namespace: "dimred" | ||
description: | | ||
Runs Latent Semantic Indexing. Computes cell embeddings, feature loadings and singular values. Uses the implementation of scipy. | ||
authors: | ||
- __merge__: /src/authors/sarah_ouologuem.yaml | ||
roles: [ contributor ] | ||
- __merge__: /src/authors/vladimir_shitov.yaml | ||
roles: [ contributor ] | ||
argument_groups: | ||
- name: Inputs | ||
arguments: | ||
- name: "--input" | ||
alternatives: ["-i"] | ||
type: file | ||
description: Path to input h5mu file | ||
direction: input | ||
required: true | ||
example: input.h5mu | ||
|
||
- name: "--modality" | ||
type: string | ||
default: "atac" | ||
description: On which modality to run LSI on. | ||
required: false | ||
|
||
- name: "--layer" | ||
type: string | ||
description: Use specified layer for expression values. If not specified, uses adata.X. | ||
required: false | ||
|
||
- name: "--var_input" | ||
type: string | ||
description: Column name in .var matrix that will be used to select which genes to run the LSI on. If not specified, uses all features. | ||
required: false | ||
|
||
- name: LSI options | ||
arguments: | ||
- name: "--num_components" | ||
type: integer | ||
default: 50 | ||
description: Number of components to compute. | ||
required: false | ||
min: 2 | ||
|
||
- name: "--scale_embeddings" | ||
type: boolean | ||
default: true | ||
description: Scale embeddings to zero mean and unit variance. | ||
|
||
- name: Outputs | ||
arguments: | ||
- name: "--output" | ||
alternatives: ["-o"] | ||
type: file | ||
description: Output h5mu file. | ||
direction: output | ||
required: true | ||
example: output.h5mu | ||
|
||
- name: "--output_compression" | ||
type: string | ||
default: "gzip" | ||
description: The compression format to be used on the output h5mu object. | ||
choices: ["gzip", "lzf"] | ||
required: false | ||
|
||
- name: "--obsm_output" | ||
type: string | ||
default: "X_lsi" | ||
description: In which .obsm slot to store the resulting embedding. | ||
required: false | ||
|
||
- name: "--varm_output" | ||
type: string | ||
default: "lsi" | ||
description: In which .varm slot to store the resulting loadings matrix. | ||
required: false | ||
|
||
- name: "--uns_output" | ||
type: string | ||
default: "lsi" | ||
description: In which .uns slot to store the stdev. | ||
required: false | ||
|
||
- name: "--overwrite" | ||
type: boolean_true | ||
description: Allow overwriting .obsm, .varm and .uns slots. | ||
|
||
|
||
resources: | ||
- type: python_script | ||
path: script.py | ||
- path: ../../utils/subset_vars.py | ||
- path: /src/utils/setup_logger.py | ||
test_resources: | ||
- type: python_script | ||
path: test.py | ||
- path: ../../utils/subset_vars.py | ||
- path: ../../../resources_test/concat_test_data | ||
|
||
|
||
engines: | ||
- type: docker | ||
image: python:3.11-slim | ||
setup: | ||
- type: apt | ||
packages: | ||
- procps | ||
- pkg-config # Otherwise h5py installation fails, which is required for scanpy | ||
- libhdf5-dev | ||
- gcc | ||
- type: python | ||
__merge__: [../../../src/base/requirements/anndata_mudata.yaml, .] | ||
packages: | ||
- muon~=0.1.6 | ||
__merge__: [ /src/base/requirements/python_test_setup.yaml, .] | ||
runners: | ||
- type: executable | ||
- type: nextflow | ||
directives: | ||
label: | ||
- highcpu | ||
- highmem |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
import muon as mu | ||
import mudata as md | ||
from anndata import AnnData | ||
import numpy as np | ||
import sys | ||
|
||
|
||
## VIASH START | ||
par = { | ||
"num_components": 50, # number of components to calculate with SVD | ||
"scale_embeddings": True, # scale embeddings to zero mean and unit variance | ||
"modality": "atac", # on which modality the LSI should be run | ||
"layer": None, # on which layer to run the LSI, if None, will run it on anndata.X | ||
"var_input": None, # column in anndata.var of the highly variable features | ||
|
||
"overwrite": True, | ||
"obsm_output": "X_lsi", | ||
"varm_output": "LSI", | ||
"uns_output": "lsi", | ||
"output": "output.h5mu", | ||
"output_compression": "gzip" | ||
} | ||
## VIASH END | ||
|
||
|
||
sys.path.append(meta["resources_dir"]) | ||
from subset_vars import subset_vars | ||
|
||
|
||
# START TEMPORARY WORKAROUND setup_logger | ||
# reason: resources aren't available when using Nextflow fusion | ||
# from setup_logger import setup_logger | ||
def setup_logger(): | ||
import logging | ||
from sys import stdout | ||
|
||
logger = logging.getLogger() | ||
logger.setLevel(logging.INFO) | ||
console_handler = logging.StreamHandler(stdout) | ||
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") | ||
console_handler.setFormatter(logFormatter) | ||
logger.addHandler(console_handler) | ||
|
||
return logger | ||
# END TEMPORARY WORKAROUND setup_logger | ||
logger = setup_logger() | ||
|
||
|
||
#1.read in mudata | ||
logger.info("Reading %s.", par["input"]) | ||
mdata = md.read_h5mu(par["input"]) | ||
|
||
#2. subset on modality | ||
if par["modality"] not in mdata.mod: | ||
raise ValueError(f"Modality '{par['modality']}' was not found in mudata {par['input']}.") | ||
adata = mdata.mod[par['modality']] | ||
|
||
|
||
#3. Specify layer | ||
if par['layer'] and par["layer"] not in adata.layers: | ||
raise ValueError(f"Layer '{par['layer']}' was not found in modality '{par['modality']}'.") | ||
layer = adata.X if not par['layer'] else adata.layers[par['layer']] | ||
adata_input_layer = AnnData(layer, var=adata.var) | ||
|
||
|
||
if not par["layer"]: | ||
logger.info("Using modality '%s' and adata.X for LSI computation", par['modality']) | ||
else: | ||
logger.info("Using modality '%s' and layer '%s' for LSI computation", par['modality'], par["layer"]) | ||
|
||
|
||
#4. Subset on highly variable features if applicable | ||
if par["var_input"]: | ||
adata_input_layer = subset_vars(adata_input_layer, par["var_input"]) | ||
|
||
|
||
|
||
#5. Run LSI | ||
logger.info("Computing %s LSI components on %s features", par["num_components"], adata_input_layer.X.shape[1]) | ||
mu.atac.tl.lsi(adata_input_layer, scale_embeddings = par["scale_embeddings"], n_comps = par["num_components"]) | ||
|
||
|
||
|
||
#6. Store output in object | ||
check_exist_dict = { | ||
"obsm_output": ("obsm"), | ||
"varm_output": ("varm"), | ||
"uns_output": ("uns") | ||
} | ||
for parameter_name, field in check_exist_dict.items(): | ||
if par[parameter_name] in getattr(adata, field): | ||
if not par["overwrite"]: | ||
raise ValueError(f"Requested to create field {par[parameter_name]} in .{field} " | ||
f"for modality {par['modality']}, but field already exists.") | ||
del getattr(adata, field)[par[parameter_name]] | ||
|
||
adata.obsm[par["obsm_output"]] = adata_input_layer.obsm['X_lsi'] | ||
adata.uns[par["uns_output"]] = adata_input_layer.uns['lsi'] | ||
if par["var_input"]: | ||
adata.varm[par["varm_output"]] = np.zeros(shape=(adata.n_vars, adata_input_layer.varm["LSI"].shape[1])) | ||
adata.varm[par["varm_output"]][adata.var[par["var_input"]]] = adata_input_layer.varm['LSI'] | ||
else: | ||
adata.varm[par["varm_output"]] = adata_input_layer.varm['LSI'] | ||
|
||
logger.info("Writing to %s.", par["output"]) | ||
mdata.write(filename = par["output"], compression=par["output_compression"]) | ||
|
||
logger.info("Finished") |
Oops, something went wrong.