|
| 1 | +import muon as mu |
| 2 | +import mudata as md |
| 3 | +from anndata import AnnData |
| 4 | +import numpy as np |
| 5 | +import sys |
| 6 | + |
| 7 | + |
| 8 | +## VIASH START |
| 9 | +par = { |
| 10 | + "num_components": 50, # number of components to calculate with SVD |
| 11 | + "scale_embeddings": True, # scale embeddings to zero mean and unit variance |
| 12 | + "modality": "atac", # on which modality the LSI should be run |
| 13 | + "layer": None, # on which layer to run the LSI, if None, will run it on anndata.X |
| 14 | + "var_input": None, # column in anndata.var of the highly variable features |
| 15 | + |
| 16 | + "overwrite": True, |
| 17 | + "obsm_output": "X_lsi", |
| 18 | + "varm_output": "LSI", |
| 19 | + "uns_output": "lsi", |
| 20 | + "output": "output.h5mu", |
| 21 | + "output_compression": "gzip" |
| 22 | +} |
| 23 | +## VIASH END |
| 24 | + |
| 25 | + |
| 26 | +sys.path.append(meta["resources_dir"]) |
| 27 | +from subset_vars import subset_vars |
| 28 | + |
| 29 | + |
| 30 | +# START TEMPORARY WORKAROUND setup_logger |
| 31 | +# reason: resources aren't available when using Nextflow fusion |
| 32 | +# from setup_logger import setup_logger |
| 33 | +def setup_logger(): |
| 34 | + import logging |
| 35 | + from sys import stdout |
| 36 | + |
| 37 | + logger = logging.getLogger() |
| 38 | + logger.setLevel(logging.INFO) |
| 39 | + console_handler = logging.StreamHandler(stdout) |
| 40 | + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") |
| 41 | + console_handler.setFormatter(logFormatter) |
| 42 | + logger.addHandler(console_handler) |
| 43 | + |
| 44 | + return logger |
| 45 | +# END TEMPORARY WORKAROUND setup_logger |
| 46 | +logger = setup_logger() |
| 47 | + |
| 48 | + |
| 49 | +#1.read in mudata |
| 50 | +logger.info("Reading %s.", par["input"]) |
| 51 | +mdata = md.read_h5mu(par["input"]) |
| 52 | + |
| 53 | +#2. subset on modality |
| 54 | +if par["modality"] not in mdata.mod: |
| 55 | + raise ValueError(f"Modality '{par['modality']}' was not found in mudata {par['input']}.") |
| 56 | +adata = mdata.mod[par['modality']] |
| 57 | + |
| 58 | + |
| 59 | +#3. Specify layer |
| 60 | +if par['layer'] and par["layer"] not in adata.layers: |
| 61 | + raise ValueError(f"Layer '{par['layer']}' was not found in modality '{par['modality']}'.") |
| 62 | +layer = adata.X if not par['layer'] else adata.layers[par['layer']] |
| 63 | +adata_input_layer = AnnData(layer, var=adata.var) |
| 64 | + |
| 65 | + |
| 66 | +if not par["layer"]: |
| 67 | + logger.info("Using modality '%s' and adata.X for LSI computation", par['modality']) |
| 68 | +else: |
| 69 | + logger.info("Using modality '%s' and layer '%s' for LSI computation", par['modality'], par["layer"]) |
| 70 | + |
| 71 | + |
| 72 | +#4. Subset on highly variable features if applicable |
| 73 | +if par["var_input"]: |
| 74 | + adata_input_layer = subset_vars(adata_input_layer, par["var_input"]) |
| 75 | + |
| 76 | + |
| 77 | + |
| 78 | +#5. Run LSI |
| 79 | +logger.info("Computing %s LSI components on %s features", par["num_components"], adata_input_layer.X.shape[1]) |
| 80 | +mu.atac.tl.lsi(adata_input_layer, scale_embeddings = par["scale_embeddings"], n_comps = par["num_components"]) |
| 81 | + |
| 82 | + |
| 83 | + |
| 84 | +#6. Store output in object |
| 85 | +check_exist_dict = { |
| 86 | + "obsm_output": ("obsm"), |
| 87 | + "varm_output": ("varm"), |
| 88 | + "uns_output": ("uns") |
| 89 | +} |
| 90 | +for parameter_name, field in check_exist_dict.items(): |
| 91 | + if par[parameter_name] in getattr(adata, field): |
| 92 | + if not par["overwrite"]: |
| 93 | + raise ValueError(f"Requested to create field {par[parameter_name]} in .{field} " |
| 94 | + f"for modality {par['modality']}, but field already exists.") |
| 95 | + del getattr(adata, field)[par[parameter_name]] |
| 96 | + |
| 97 | +adata.obsm[par["obsm_output"]] = adata_input_layer.obsm['X_lsi'] |
| 98 | +adata.uns[par["uns_output"]] = adata_input_layer.uns['lsi'] |
| 99 | +if par["var_input"]: |
| 100 | + adata.varm[par["varm_output"]] = np.zeros(shape=(adata.n_vars, adata_input_layer.varm["LSI"].shape[1])) |
| 101 | + adata.varm[par["varm_output"]][adata.var[par["var_input"]]] = adata_input_layer.varm['LSI'] |
| 102 | +else: |
| 103 | + adata.varm[par["varm_output"]] = adata_input_layer.varm['LSI'] |
| 104 | + |
| 105 | +logger.info("Writing to %s.", par["output"]) |
| 106 | +mdata.write(filename = par["output"], compression=par["output_compression"]) |
| 107 | + |
| 108 | +logger.info("Finished") |
0 commit comments