diff --git a/CHANGELOG.md b/CHANGELOG.md
index a2e1f09955c..7faa308b741 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -117,6 +117,8 @@
 
 * `transform/regress_out`: Allow providing 'input' and 'output' layers for scanpy regress_out functionality (PR #863).
 
+* Added `dimred/lsi` component (PR #552).
+
 * `metadata/copy_obs` component: Added a component to copy an .obs column from a MuData object to another (PR #874).
 
 * `workflows/annotation/scgpt_annotation` workflow: Added a scGPT transformer-based cell type annotation workflow (PR #832).
diff --git a/src/authors/sarah_ouologuem.yaml b/src/authors/sarah_ouologuem.yaml
new file mode 100644
index 00000000000..5ed8795a5d2
--- /dev/null
+++ b/src/authors/sarah_ouologuem.yaml
@@ -0,0 +1,10 @@
+name: Sarah Ouologuem
+info:
+  role: Contributor
+  links:
+    github: SarahOuologuem
+    orcid: 0009-0005-3398-1700
+  organizations:
+    - name: Helmholtz Munich
+      href: https://www.helmholtz-munich.de
+      role: Student Assistant
\ No newline at end of file
diff --git a/src/dimred/lsi/config.vsh.yaml b/src/dimred/lsi/config.vsh.yaml
new file mode 100644
index 00000000000..fd2ac8df0b1
--- /dev/null
+++ b/src/dimred/lsi/config.vsh.yaml
@@ -0,0 +1,124 @@
+name: lsi
+namespace: "dimred"
+description: |
+  Runs Latent Semantic Indexing. Computes cell embeddings, feature loadings and singular values. Uses the implementation of scipy.
+authors:
+  - __merge__: /src/authors/sarah_ouologuem.yaml
+    roles: [ contributor ]
+  - __merge__: /src/authors/vladimir_shitov.yaml
+    roles: [ contributor ]
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: "--input"
+        alternatives: ["-i"]
+        type: file
+        description: Path to input h5mu file
+        direction: input
+        required: true
+        example: input.h5mu
+
+      - name: "--modality"
+        type: string
+        default: "atac"
+        description: On which modality to run LSI on.
+        required: false
+
+      - name: "--layer"
+        type: string
+        description: Use specified layer for expression values. If not specified, uses adata.X.
+        required: false
+
+      - name: "--var_input"
+        type: string
+        description: Column name in .var matrix that will be used to select which genes to run the LSI on. If not specified, uses all features.
+        required: false
+
+  - name: LSI options
+    arguments:
+      - name: "--num_components"
+        type: integer
+        default: 50
+        description: Number of components to compute.
+        required: false
+        min: 2
+
+      - name: "--scale_embeddings"
+        type: boolean
+        default: true
+        description: Scale embeddings to zero mean and unit variance.
+
+  - name: Outputs
+    arguments:
+      - name: "--output"
+        alternatives: ["-o"]
+        type: file
+        description: Output h5mu file.
+        direction: output
+        required: true
+        example: output.h5mu
+
+      - name: "--output_compression"
+        type: string
+        default: "gzip"
+        description: The compression format to be used on the output h5mu object.
+        choices: ["gzip", "lzf"]
+        required: false
+
+      - name: "--obsm_output"
+        type: string
+        default: "X_lsi"
+        description: In which .obsm slot to store the resulting embedding.
+        required: false
+
+      - name: "--varm_output"
+        type: string
+        default: "lsi"
+        description: In which .varm slot to store the resulting loadings matrix.
+        required: false
+
+      - name: "--uns_output"
+        type: string
+        default: "lsi"
+        description: In which .uns slot to store the stdev.
+        required: false
+
+      - name: "--overwrite"
+        type: boolean_true
+        description: Allow overwriting .obsm, .varm and .uns slots.
+
+    
+resources:
+  - type: python_script
+    path: script.py
+  - path: ../../utils/subset_vars.py
+  - path: /src/utils/setup_logger.py
+test_resources:
+  - type: python_script
+    path: test.py
+  - path: ../../utils/subset_vars.py
+  - path: ../../../resources_test/concat_test_data
+  
+
+engines:
+  - type: docker
+    image: python:3.11-slim
+    setup:
+      - type: apt
+        packages:
+          - procps
+          - pkg-config  # Otherwise h5py installation fails, which is required for scanpy
+          - libhdf5-dev
+          - gcc
+      - type: python
+        __merge__: [../../../src/base/requirements/anndata_mudata.yaml, .]
+        packages:
+          - muon~=0.1.6
+    __merge__: [ /src/base/requirements/python_test_setup.yaml, .]
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: 
+        - highcpu
+        - highmem
diff --git a/src/dimred/lsi/script.py b/src/dimred/lsi/script.py
new file mode 100644
index 00000000000..8a1f5328823
--- /dev/null
+++ b/src/dimred/lsi/script.py
@@ -0,0 +1,108 @@
+import muon as mu
+import mudata as md
+from anndata import AnnData
+import numpy as np
+import sys
+
+
+## VIASH START
+par = {
+    "num_components": 50, # number of components to calculate with SVD
+    "scale_embeddings": True, # scale embeddings to zero mean and unit variance
+    "modality": "atac", # on which modality the LSI should be run 
+    "layer": None, # on which layer to run the LSI, if None, will run it on anndata.X 
+    "var_input": None, # column in anndata.var of the highly variable features
+
+    "overwrite": True, 
+    "obsm_output": "X_lsi",
+    "varm_output": "LSI",
+    "uns_output": "lsi",
+    "output": "output.h5mu",
+    "output_compression": "gzip"
+}
+## VIASH END
+
+
+sys.path.append(meta["resources_dir"])
+from subset_vars import subset_vars
+
+
+# START TEMPORARY WORKAROUND setup_logger
+# reason: resources aren't available when using Nextflow fusion
+# from setup_logger import setup_logger
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
+# END TEMPORARY WORKAROUND setup_logger
+logger = setup_logger()
+
+
+#1.read in mudata
+logger.info("Reading %s.", par["input"])
+mdata = md.read_h5mu(par["input"])
+
+#2. subset on modality
+if par["modality"] not in mdata.mod:
+    raise ValueError(f"Modality '{par['modality']}' was not found in mudata {par['input']}.")
+adata = mdata.mod[par['modality']]
+
+
+#3. Specify layer
+if par['layer'] and par["layer"] not in adata.layers:
+    raise ValueError(f"Layer '{par['layer']}' was not found in modality '{par['modality']}'.")
+layer = adata.X if not par['layer'] else adata.layers[par['layer']]
+adata_input_layer = AnnData(layer, var=adata.var)
+
+
+if not par["layer"]:
+    logger.info("Using modality '%s' and adata.X for LSI computation", par['modality'])
+else:
+    logger.info("Using modality '%s' and layer '%s' for LSI computation", par['modality'], par["layer"])
+
+
+#4. Subset on highly variable features if applicable
+if par["var_input"]:
+    adata_input_layer = subset_vars(adata_input_layer, par["var_input"])
+
+
+
+#5. Run LSI
+logger.info("Computing %s LSI components on %s features", par["num_components"], adata_input_layer.X.shape[1])
+mu.atac.tl.lsi(adata_input_layer, scale_embeddings = par["scale_embeddings"], n_comps = par["num_components"])
+
+
+
+#6. Store output in object
+check_exist_dict = {
+    "obsm_output": ("obsm"),
+    "varm_output": ("varm"),
+    "uns_output": ("uns")
+}
+for parameter_name, field in check_exist_dict.items():
+    if par[parameter_name] in getattr(adata, field):
+        if not par["overwrite"]:
+            raise ValueError(f"Requested to create field {par[parameter_name]} in .{field} "
+                            f"for modality {par['modality']}, but field already exists.")
+        del getattr(adata, field)[par[parameter_name]]
+
+adata.obsm[par["obsm_output"]] = adata_input_layer.obsm['X_lsi']
+adata.uns[par["uns_output"]] = adata_input_layer.uns['lsi']
+if par["var_input"]:
+    adata.varm[par["varm_output"]] = np.zeros(shape=(adata.n_vars, adata_input_layer.varm["LSI"].shape[1]))
+    adata.varm[par["varm_output"]][adata.var[par["var_input"]]] = adata_input_layer.varm['LSI']
+else:
+    adata.varm[par["varm_output"]] = adata_input_layer.varm['LSI']
+
+logger.info("Writing to %s.", par["output"])
+mdata.write(filename = par["output"], compression=par["output_compression"])
+
+logger.info("Finished")
diff --git a/src/dimred/lsi/test.py b/src/dimred/lsi/test.py
new file mode 100644
index 00000000000..f6d293c6535
--- /dev/null
+++ b/src/dimred/lsi/test.py
@@ -0,0 +1,201 @@
+import sys
+import pytest
+import subprocess
+import mudata as mu
+import numpy as np
+
+## VIASH START
+meta = {
+    'resources_dir': 'resources_test',
+    'executable': './target/docker/dimred/lsi/lsi',
+    'config': './src/dimred/lsi/config.vsh.yaml'
+}
+## VIASH END
+
+input_path = f"{meta['resources_dir']}/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"
+
+
+'''
+Tests: 
+1. general test 
+2. test HVF
+3. test modality
+4. test layer
+5. test overwrite 
+'''
+
+@pytest.fixture
+def atac_mudata(tmp_path):
+    
+    mdata = mu.read_h5mu(input_path)
+    mdata.mod["atac"].layers["counts"] = mdata.mod["atac"].X
+    mdata.mod["atac"].var["highly_variable"] = np.random.choice([True, False], size=mdata.mod["atac"].n_vars)
+    print(mdata)
+
+    mdata.write(tmp_path / "atac_mudata.h5mu")
+
+    return tmp_path / "atac_mudata.h5mu"
+
+# 1.general test
+def test_lsi(run_component, tmp_path):
+    output_path = tmp_path / "output_lsi.h5mu"
+    
+    cmd_args = [
+        "--input", input_path,
+        "--output", str(output_path),
+        "--obsm_output", "X_test",
+        "--num_components", "30"
+    ]
+    run_component(cmd_args)    
+
+    assert output_path.is_file()
+    data = mu.read_h5mu(output_path)
+    assert "X_test" in data.mod['atac'].obsm
+    assert data.mod["atac"].obsm["X_test"].shape == (data.mod["atac"].n_obs, 30)
+    assert "lsi" in data.mod['atac'].uns
+    assert "lsi" in data.mod['atac'].varm
+
+
+
+# 2.test HVF 
+def test_select_highly_variable_column(run_component, random_h5mu_path, atac_mudata):
+    output_path = random_h5mu_path()
+
+    # run component
+    cmd_args = [
+        "--input", str(atac_mudata),
+        "--output", str(output_path),
+        "--var_input", "highly_variable"
+    ]
+    run_component(cmd_args)
+    
+    assert output_path.is_file()
+    data = mu.read_h5mu(output_path)
+    assert "X_lsi" in data.mod['atac'].obsm
+    assert data.mod["atac"].obsm["X_lsi"].shape == (data.mod["atac"].n_obs, 50)
+    assert "highly_variable" in data.mod["atac"].var.columns
+    assert "lsi" in data.mod['atac'].uns
+    assert "lsi" in data.mod['atac'].varm
+    assert data.mod["atac"].varm["lsi"].shape == (data.mod["atac"].n_vars, 50)
+
+
+def test_highly_variable_column_does_not_exist_raises(run_component):
+    with pytest.raises(subprocess.CalledProcessError) as err:
+        cmd_args = [
+            "--input", input_path,
+            "--output", "output_lsi.h5mu",
+            "--var_input", "does_not_exist"
+        ]
+        run_component(cmd_args)
+
+    assert "ValueError: Requested to use .var column 'does_not_exist' as a selection of genes, but the column is not available." in \
+        err.value.stdout.decode('utf-8')
+        
+
+# 3.test modality
+def test_modality_does_not_exist_raises(run_component):
+    with pytest.raises(subprocess.CalledProcessError) as err:
+        cmd_args = [
+            "--input", input_path,
+            "--output", "output_lsi.h5mu",
+            "--modality", "does_not_exist"
+        ]
+        run_component(cmd_args)
+       
+    assert "ValueError: Modality 'does_not_exist' was not found in mudata " + input_path +"." in \
+        err.value.stdout.decode('utf-8')
+
+
+
+# 4.test layer 
+def test_selecting_input_layer(run_component, atac_mudata, tmp_path):
+    output_path = tmp_path / "output_lsi.h5mu"
+
+    # run component
+    cmd_args = [
+        "--input", str(atac_mudata),
+        "--output", str(output_path),
+        "--num_components", "20",
+        "--layer", "counts"
+        ]
+    run_component(cmd_args)
+
+
+    assert output_path.is_file()
+    data = mu.read_h5mu(output_path)
+    assert "counts" in data.mod["atac"].layers
+    assert "X_lsi" in data.mod['atac'].obsm
+    assert data.mod["atac"].obsm["X_lsi"].shape == (data.mod["atac"].n_obs, 20)
+    assert "lsi" in data.mod['atac'].uns
+    assert "lsi" in data.mod['atac'].varm
+
+
+
+def test_raise_if_input_layer_is_missing(run_component):
+    with pytest.raises(subprocess.CalledProcessError) as err:
+        cmd_args = [
+            "--input", input_path,
+            "--output", "output.h5mu",
+            "--layer", "does_not_exist"
+        ]
+        run_component(cmd_args)
+        
+    assert "ValueError: Layer 'does_not_exist' was not found in modality 'atac'." in \
+        err.value.stdout.decode('utf-8')
+
+
+
+# 5.test overwrite 
+
+def test_output_field_already_present_raises(run_component, tmp_path):
+    output_path = tmp_path / "output_lsi.h5mu"
+
+    #create slots 
+    input_data = mu.read_h5mu(input_path)
+    input_data.mod["atac"].varm["lsi"] = np.zeros(shape=(input_data.mod["atac"].n_vars, 50))
+    input_data.mod["atac"].obsm["X_lsi"] = np.zeros(shape=(input_data.mod["atac"].n_obs, 50))
+    input_data.mod["atac"].uns['lsi'] = "test"
+    tmp_file = tmp_path / "input_data_adjusted.h5mu"
+    input_data.write_h5mu(tmp_file)
+
+    with pytest.raises(subprocess.CalledProcessError) as err:
+        cmd_args = [
+            "--input", str(tmp_file),
+            "--output", str(output_path),
+            "--output_compression", "gzip"
+        ]
+        run_component(cmd_args)
+       
+    assert "ValueError: Requested to create field X_lsi in .obsm for " \
+        "modality atac, but field already exists." in \
+        err.value.stdout.decode('utf-8')
+
+def test_output_field_already_present_overwrite(run_component, tmp_path):
+    output_path = tmp_path / "output_lsi.h5mu"
+
+    #create slots 
+    input_data = mu.read_h5mu(input_path)
+    input_data.mod["atac"].varm["lsi"] = np.zeros(shape=(input_data.mod["atac"].n_vars, 50))
+    input_data.mod["atac"].obsm["X_lsi"] = np.zeros(shape=(input_data.mod["atac"].n_obs, 50))
+    input_data.mod["atac"].uns['lsi'] = "test"
+    tmp_file = tmp_path / "input_data_adjusted.h5mu"
+    input_data.write_h5mu(tmp_file)
+
+    cmd_args = [
+        "--input", str(tmp_file),
+        "--output", str(output_path),
+        "--output_compression", "gzip",
+        "--overwrite",
+        "--num_components", "30"
+    ]
+    run_component(cmd_args)
+
+    assert output_path.is_file()
+    data = mu.read_h5mu(output_path)
+    assert "X_lsi" in data.mod['atac'].obsm
+    assert data.mod["atac"].obsm["X_lsi"].shape == (data.mod["atac"].n_obs, 30)
+    assert "lsi" in data.mod['atac'].uns
+    assert "lsi" in data.mod['atac'].varm
+
+if __name__ == '__main__':
+    sys.exit(pytest.main([__file__]))
\ No newline at end of file
diff --git a/src/utils/subset_vars.py b/src/utils/subset_vars.py
index 10011c8fcca..64071e6d41a 100644
--- a/src/utils/subset_vars.py
+++ b/src/utils/subset_vars.py
@@ -1,5 +1,5 @@
 def subset_vars(adata, subset_col):
-    """Subset highly variable genes from AnnData object
+    """Subset AnnData object on highly variable genes
     
     Parameters
     ----------
@@ -13,4 +13,7 @@ def subset_vars(adata, subset_col):
     AnnData
         Copy of `adata` with subsetted features
     """
+    if not subset_col in adata.var.columns:
+        raise ValueError(f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available.")
+
     return adata[:, adata.var[subset_col]].copy()