Merge pull request #5 from NFDI4BIOIMAGE/dev

Adds molecules, status and description panels
NFDI4BIOIMAGE · Aug 27, 2024 · 739b47c · 739b47c
2 parents 349d72a + 9111686
commit 739b47c
Show file tree

Hide file tree

Showing 5 changed files with 310 additions and 201 deletions.
diff --git a/README.md b/README.md
@@ -72,7 +72,7 @@ This will open the vitessce viewer in a new tab using the first configuration fi
 
 ### Viewer window
 After autogenerating or selecting a config file, the Vitessce viewer is opened in a new tab:
-![Viewer GIF](https://github.com/user-attachments/assets/719a3093-2a6a-480d-afc9-522b4873310e)
+![Viewer GIF](https://github.com/user-attachments/assets/0f1effac-e681-4782-a69e-76702451fcbb)
 
 
 
@@ -91,21 +91,30 @@ The attachements must be in `.csv` format and the column with the cell identifie
 Only files attached to the dataset/image are available in the form.
 
 The following fields are available:
-`Config file name`    (required, "VitessceConfig-YYYY.MM.DD_HHMMSS.json"): Name of the config file to attach, a ".json" extension is added if missing. 	
-`Image`               (required): 	                                       OMERO Image to view, OME-NGFF images only.
-`Segmentation`        (optional, `None`): 	                               Segmentation to overlay on the image, OME-NGFF images only.
-`Cell identities`     (optional, `None`): 	                               `.csv` file with at least 2 columns: `Cell id column` and `Label column` defined in the 2 fileds below. 
-`Cell id column`      (optional, "cell_id"):   	                           Name of the `Cell id column` used in `Cell identities`, `Expression`, `Embeddings`.
-`Label column`        (optional, "label"): 	                               Name of the `Label` used in `Cell identities`.
-`Expression` 	      (optional, `None`):                                  `.csv` file with the `Cell id column` all other columns are considered as expression values and should be numerical.
-`Embeddings`          (optional, `None`):                                  `.csv` file with the `Cell id column` and the `Embedding x` and `Embedding y` columns defined in the 2 fileds below. 
-`Embedding x`         (optional, "UMAP_1"): 	                            Name of the `Embedding x` used in `Embeddings`.
-`Embedding y`         (optional, "UMAP_2"): 	                            Name of the `Embedding y` used in `Embeddings`.
-`Histograms`          (required , `True`): 	                                Add 3 plots showing: The number of transcripts per cell, the number of cells in each set, gene expression in each set.
-`Heatmap`             (required , `True`):                                  Adds an heatmap.
+- `Config file name`    (required, "VitessceConfig-YYYY.MM.DD_HHMMSS.json"): Name of the config file to attach, a ".json" extension is added if missing. 	
+- `Image`               (required): 	                                       OMERO Image to view, OME-NGFF images only.
+- `Segmentation`        (optional, `None`): 	                               Segmentation to overlay on the image, OME-NGFF images only.
+- `Cell identities`     (optional, `None`): 	                               `.csv` file with at least 2 columns: `Cell id column` and `Label column` defined in the 2 fileds below. 
+- `Cell id column`      (optional, "cell_id"):   	                           Name of the `Cell id column` used in `Cell identities`, `Expression`, `Embeddings`.
+- `Label column`        (optional, "label"): 	                               Name of the `Label` used in `Cell identities`.
+- `Expression` 	      (optional, `None`):                                  `.csv` file with the `Cell id column` all other columns are considered as expression values and should be numerical.
+- `Embeddings`          (optional, `None`):                                  `.csv` file with the `Cell id column` and the `Embedding x` and `Embedding y` columns defined in the 2 fileds below. 
+- `Embedding x`         (optional, "UMAP_1"): 	                            Name of the `Embedding x` used in `Embeddings`.
+- `Embedding y`         (optional, "UMAP_2"): 	                            Name of the `Embedding y` used in `Embeddings`.
+- `Molecules`           (optional, `None`): 	                               `.csv` file with at least 4 columns: Molecule id, label, x, y (headers in the fields below). 
+- `Molecule id`         (optional, "id"):   	                                Name of the `Molecule id column` used in `Molecules`.
+- `Molecule label`      (optional, "gene"):   	                            Name of the `Molecule label column` used in `Molecules`.
+- `Molecule x`          (optional, "x"):   	                                Name of the `Molecule x column` used in `Molecules`.
+- `Molecule y`          (optional, "y"):   	                                Name of the `Molecule y column` used in `Molecules`.
+- `Histograms`          (required , `True`): 	                                Add 3 plots showing: The number of transcripts per cell, the number of cells in each set, gene expression in each set.
+- `Heatmap`             (required , `True`):                                  Adds an heatmap.
+- `Status`              (required , `False`):                                 Adds a status panel to display info on the selected cell.
+- `Description`         (required , `False`):                                 Adds a description panel to display info on the dataset/image (taken from the description metadata field from OMERO).
 
 The `Expression` and `Cell identities` files are required to show the histograms.  
 The `Embeddings` file is necessary to show the cells in a scatterplot.
+The `Molecules` file is used to overlay molecules on the image. All molecules are displayed and selecting by gene is not yet possible.  
+The `Status` panel will be empty unless a `Segmentation` or `Embeddings` are provided.  
 
 #### Attaching preexisting config files
 Custom config files should have a `.json` extension and added as attachements to a dataset or an image.

diff --git a/omero_vitessce/forms.py b/omero_vitessce/forms.py
@@ -12,6 +12,10 @@ class ConfigForm(forms.Form):
     default_label_col = "label"
     default_embedding_x_col = "UMAP_1"
     default_embedding_y_col = "UMAP_2"
+    default_molecule_id = "id"
+    default_molecule_label = "gene"
+    default_molecule_x_col = "x"
+    default_molecule_y_col = "y"
 
     def __init__(self, file_names, file_urls,
                  img_names, img_urls, *args, **kwargs):
@@ -58,17 +62,45 @@ def __init__(self, file_names, file_urls,
                 empty_value=ConfigForm.default_embedding_y_col, strip=True,
                 min_length=1, max_length=20, required=False)
 
+        self.fields["molecules"] = forms.ChoiceField(
+                choices=self.text_choices, required=False)
+
+        self.fields["molecule id"] = forms.CharField(
+                empty_value=ConfigForm.default_molecule_id, strip=True,
+                min_length=1, max_length=20, required=False)
+
+        self.fields["molecule label"] = forms.CharField(
+                empty_value=ConfigForm.default_molecule_label, strip=True,
+                min_length=1, max_length=20, required=False)
+
+        self.fields["molecule x"] = forms.CharField(
+                empty_value=ConfigForm.default_molecule_x_col, strip=True,
+                min_length=1, max_length=20, required=False)
+
+        self.fields["molecule y"] = forms.CharField(
+                empty_value=ConfigForm.default_molecule_y_col, strip=True,
+                min_length=1, max_length=20, required=False)
+
         self.fields["histograms"] = forms.BooleanField(initial=True,
                                                        required=False)
         self.fields["heatmap"] = forms.BooleanField(initial=True,
                                                     required=False)
+        self.fields["status"] = forms.BooleanField(initial=False,
+                                                   required=False)
+        self.fields["description"] = forms.BooleanField(initial=False,
+                                                        required=False)
 
         # Set default values for CharField fields
         self.fields["config file name"].initial = filename
         self.fields["cell id column"].initial = ConfigForm.default_cell_id_col
         self.fields["label column"].initial = ConfigForm.default_label_col
         self.fields["embedding x"].initial = ConfigForm.default_embedding_x_col
         self.fields["embedding y"].initial = ConfigForm.default_embedding_y_col
+        self.fields["molecule id"].initial = ConfigForm.default_molecule_id
+        self.fields[
+                "molecule label"].initial = ConfigForm.default_molecule_label
+        self.fields["molecule x"].initial = ConfigForm.default_molecule_x_col
+        self.fields["molecule y"].initial = ConfigForm.default_molecule_y_col
 
     def make_config_file_name(self):
         """ Creates the default config file name with a timestamp:

diff --git a/omero_vitessce/utils.py b/omero_vitessce/utils.py
@@ -0,0 +1,252 @@
+import json
+from pathlib import Path
+
+from omero.util.temp_files import create_path
+
+from . import omero_vitessce_settings
+
+from vitessce import VitessceConfig, OmeZarrWrapper, MultiImageWrapper
+from vitessce import ViewType as Vt, FileType as Ft, CoordinationType as Ct
+from vitessce import hconcat, vconcat
+
+# Get the address of omeroweb from the config
+SERVER = omero_vitessce_settings.SERVER_ADDRESS[1:-1]
+
+
+def get_files_images(obj_type, obj_id, conn):
+    """ Gets all the non config files attached to an object,
+    and images if the object is a dataset,
+    and returns a list of file names and a list of urls
+    for the files and eventually the images
+    """
+    obj = conn.getObject(obj_type, obj_id)
+    file_names = [
+            i for i in obj.listAnnotations()
+            if i.OMERO_TYPE().NAME ==
+            "ome.model.annotations.FileAnnotation_name"]
+    file_names = [i for i in file_names
+                  if i.getFileName().endswith(".csv")]
+    file_urls = [i.getId() for i in file_names]
+    file_names = [i.getFileName() for i in file_names]
+    file_urls = [SERVER + "/webclient/annotation/" + str(i) for i in file_urls]
+
+    if obj_type == "dataset":
+        imgs = list(obj.listChildren())
+        img_urls = [build_zarr_image_url(i.getId()) for i in imgs]
+        img_names = [i.getName() for i in imgs]
+    else:
+        img_urls = [build_zarr_image_url(obj_id)]
+        img_names = [obj.getName()]
+
+    return file_names, file_urls, img_names, img_urls
+
+
+def build_viewer_url(config_id):
+    """ Generates urls like:
+    http://localhost:4080/omero_vitessce/?config=http://localhost:4080/webclient/annotation/999
+    """
+    return SERVER + "/omero_vitessce/?config=" + SERVER + \
+        "/webclient/annotation/" + str(config_id)
+
+
+def build_zarr_image_url(image_id):
+    """ Generates urls like:
+    http://localhost:4080/zarr/v0.4/image/99999.zarr/
+    """
+    return SERVER + "/zarr/v0.4/image/" + str(image_id) + ".zarr"
+
+
+def get_attached_configs(obj_type, obj_id, conn):
+    """ Gets all the ".json" files attached to an object
+    and returns a list of file names and a list of urls
+    generated with build_viewer_url
+    """
+    obj = conn.getObject(obj_type, obj_id)
+    config_files = [i for i in obj.listAnnotations()
+                    if i.OMERO_TYPE().NAME ==
+                    "ome.model.annotations.FileAnnotation_name"]
+    config_urls = [i.getId() for i in config_files
+                   if i.getFileName().endswith(".json")]
+    config_files = [i.getFileName() for i in config_files
+                    if i.getFileName().endswith(".json")]
+    config_urls = [build_viewer_url(i) for i in config_urls]
+    return config_files, config_urls
+
+
+def get_details(obj_type, obj_id, conn):
+    """ Gets all the ".json" files attached to an object
+    and returns a list of file names and a list of urls
+    generated with build_viewer_url
+    """
+    obj = conn.getObject(obj_type, obj_id)
+    name = obj.getName()
+    description = obj.getDescription()
+    if not description:
+        description = "Generated with omero-vitessce"
+    return description, name
+
+
+def add_molecules(config_args, vc_dataset):
+    """
+    Adds a file with molecule labels and locations to a vitessce dataset
+    """
+    vc_dataset = vc_dataset.add_file(
+        url=config_args.get("molecules"),
+        file_type=Ft.OBS_LOCATIONS_CSV,
+        coordination_values={
+            "obsType": "molecule"},
+        options={
+            "obsIndex": config_args.get("molecule id"),
+            "obsLocations": [config_args.get("molecule x"),
+                             config_args.get("molecule y")]})
+    vc_dataset = vc_dataset.add_file(
+        url=config_args.get("molecules"),
+        file_type=Ft.OBS_LABELS_CSV,
+        coordination_values={
+            "obsType": "molecule"},
+        options={
+            "obsIndex": config_args.get("molecule id"),
+            "obsLabels": config_args.get("molecule label")})
+    return vc_dataset
+
+
+def add_embeddings(config_args, vc_dataset):
+    """
+    Adds a file with cell embeddings to a vitessce dataset
+    """
+    vc_dataset = vc_dataset.add_file(
+        url=config_args.get("embeddings"),
+        file_type=Ft.OBS_EMBEDDING_CSV,
+        coordination_values={
+            "obsType": "cell",
+            "embeddingType": "cell"},
+        options={
+            "obsIndex": config_args.get("cell id column"),
+            "obsEmbedding": [config_args.get("embedding x"),
+                             config_args.get("embedding y")]})
+    return vc_dataset
+
+
+def add_cell_identities(config_args, vc_dataset):
+    """
+    Adds a file with cell identities to a vitessce dataset
+    """
+    vc_dataset = vc_dataset.add_file(
+        url=config_args.get("cell identities"),
+        file_type=Ft.OBS_SETS_CSV,
+        coordination_values={"obsType": "cell"},
+        options={
+            "obsIndex": config_args.get("cell id column"),
+            "obsSets": [
+                {"name": "Clustering",
+                 "column": config_args.get("label column")}]})
+    return vc_dataset
+
+
+def create_config(config_args, obj_type, obj_id, conn):
+    """
+    Generates a Vitessce config and returns it,
+    the results from the form are used as args.
+    """
+
+    description, name = get_details(obj_type, obj_id, conn)
+
+    vc = VitessceConfig(schema_version="1.0.16",
+                        name=name, description=description)
+    vc_dataset = vc.add_dataset()
+
+    img_url = config_args.get("image")
+    images = [OmeZarrWrapper(img_url=img_url, name="Image")]
+
+    sp = vc.add_view(Vt.SPATIAL, dataset=vc_dataset)
+    lc = vc.add_view(Vt.LAYER_CONTROLLER, dataset=vc_dataset)
+
+    displays = [sp]     # Heatmap, scatterplot and image
+    controllers = [lc]  # Spatial layer, gene and cell set selectors
+    hists = []          # Histograms and violin plots
+    texts = []          # Status and description
+
+    if config_args.get("cell identities"):
+        vc_dataset = add_cell_identities(config_args, vc_dataset)
+        os = vc.add_view(Vt.OBS_SETS, dataset=vc_dataset)
+        controllers.append(os)
+    if config_args.get("expression"):
+        vc_dataset = vc_dataset.add_file(
+            url=config_args.get("expression"),
+            file_type=Ft.OBS_FEATURE_MATRIX_CSV)
+        fl = vc.add_view(Vt.FEATURE_LIST, dataset=vc_dataset)
+        controllers.append(fl)
+    if config_args.get("embeddings"):
+        vc_dataset = add_embeddings(config_args, vc_dataset)
+        e_type = vc.add_coordination(Ct.EMBEDDING_TYPE)[0]
+        e_type.set_value("cell")
+        sc = vc.add_view(Vt.SCATTERPLOT, dataset=vc_dataset)
+        sc.use_coordination(e_type)
+        displays.append(sc)
+    if config_args.get("expression") and config_args.get("cell identities"):
+        if config_args.get("histograms"):
+            fh = vc.add_view(Vt.FEATURE_VALUE_HISTOGRAM, dataset=vc_dataset)
+            oh = vc.add_view(Vt.OBS_SET_SIZES, dataset=vc_dataset)
+            fd = vc.add_view(Vt.OBS_SET_FEATURE_VALUE_DISTRIBUTION,
+                             dataset=vc_dataset)
+            hists.extend([fh, oh, fd])
+        if config_args.get("heatmap"):
+            hm = vc.add_view(Vt.HEATMAP, dataset=vc_dataset)
+            displays.append(hm)
+    if config_args.get("segmentation"):
+        segmentation = OmeZarrWrapper(
+                img_url=config_args.get("segmentation"),
+                name="Segmentation", is_bitmask=True)
+        images.append(segmentation)
+    if config_args.get("molecules"):
+        vc_dataset = add_molecules(config_args, vc_dataset)
+        vc.link_views([sp, lc], c_types=[Ct.SPATIAL_POINT_LAYER],
+                      c_values=[{"opacity": 1, "radius": 2, "visible": True}])
+    if config_args.get("description"):
+        de = vc.add_view(Vt.DESCRIPTION, dataset=vc_dataset)
+        texts.append(de)
+    if config_args.get("status"):
+        st = vc.add_view(Vt.STATUS, dataset=vc_dataset)
+        texts.append(st)
+
+    vc_dataset.add_object(MultiImageWrapper(image_wrappers=images,
+                                            use_physical_size_scaling=True))
+    vc.add_coordination_by_dict({
+        Ct.SPATIAL_ZOOM: 2,
+        Ct.SPATIAL_TARGET_X: 0,
+        Ct.SPATIAL_TARGET_Y: 0
+    })
+
+    displays = hconcat(*displays)
+    controllers = hconcat(*controllers)
+    if texts:
+        texts = vconcat(*texts)
+        controllers = hconcat(texts, controllers)
+    if hists:
+        hists = hconcat(*hists)
+        controllers = hconcat(controllers, hists)
+    vc.layout(vconcat(displays, controllers))
+
+    return vc
+
+
+def attach_config(vc, obj_type, obj_id, filename, conn):
+    """
+    Generates a Vitessce config for an OMERO image and returns it.
+    Assumes the images is an OME NGFF v0.4 file
+    which can be served with omero-web-zarr.
+    """
+    config_path = create_path("omero-vitessce", ".tmp", folder=True)
+    if not filename.endswith(".json"):
+        filename = filename + ".json"
+    filename = Path(filename).name  # Sanitize filename
+
+    config_path = Path(config_path).joinpath(filename)
+    with open(config_path, "w") as outfile:
+        json.dump(vc.to_dict(), outfile, indent=4, sort_keys=False)
+
+    file_ann = conn.createFileAnnfromLocalFile(
+        config_path, mimetype="text/plain")
+    obj = conn.getObject(obj_type, obj_id)
+    obj.linkAnnotation(file_ann)
+    return file_ann.getId()