Skip to content

Commit

Permalink
Vitessce config generator (#40)
Browse files Browse the repository at this point in the history
* WIP

* path args

* derive name

* Docs

* Fix docs

* suppress instead

* specify name in both locations

* Add obs_sets and obs_embeddings params

* Fix for tests

* add partial lamin test

* fix docs again

* redirect parameter types

* Better return docs

* Fix mod link

* pin zarr

* pin skimage

* fix deps

* more undeclared deps

* more

* more

* pin vitessce min instead

* unpin skimage (incompatible with vitessce)

* Update pyproject.toml
  • Loading branch information
flying-sheep authored Oct 10, 2024
1 parent d5f0041 commit 932d2d6
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 3 deletions.
1 change: 1 addition & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,5 @@
pl.basic_plot
pl.BasicClass
pl.vitessce.gen_config
```
27 changes: 27 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,19 @@
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html

from __future__ import annotations

# -- Path setup --------------------------------------------------------------
from contextlib import suppress
import sys
from datetime import datetime
from importlib.metadata import metadata
from pathlib import Path
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from sphinx.application import Sphinx


HERE = Path(__file__).parent
sys.path.insert(0, str(HERE / "extensions"))
Expand Down Expand Up @@ -59,6 +67,8 @@
"sphinx.ext.mathjax",
"IPython.sphinxext.ipython_console_highlighting",
"sphinxext.opengraph",
"scanpydoc.elegant_typehints",
"scanpydoc.definition_list_typed_field",
*[p.stem for p in (HERE / "extensions").glob("*.py")],
]

Expand Down Expand Up @@ -96,6 +106,9 @@
"anndata": ("https://anndata.readthedocs.io/en/stable", None),
"scanpy": ("https://scanpy.readthedocs.io/en/stable", None),
"numpy": ("https://numpy.org/doc/stable", None),
"zarr": ("https://zarr.readthedocs.io/en/stable", None),
"vitessce": ("https://python-docs.vitessce.io", None),
"lamin": ("https://docs.lamin.ai", None),
}

# List of patterns, relative to source directory, that match files and
Expand Down Expand Up @@ -130,3 +143,17 @@
# https://github.com/duckdb/duckdb-web/issues/3806
("py:class", "duckdb.duckdb.DuckDBPyConnection"),
]

# Redirect broken parameter annotation classes
qualname_overrides = {
"zarr._storage.store.Store": "zarr.storage.MemoryStore",
"lnschema_core.models.Artifact": "lamindb.Artifact",
}


def setup(app: Sphinx) -> None:
"""Setup lamindb for CI."""
import lamindb as ln

with suppress(RuntimeError):
ln.setup.init(storage="/tmp/lamindb")
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@ optional-dependencies.dev = [
]
optional-dependencies.doc = [
"docutils>=0.8,!=0.18.*,!=0.19.*",
"ehrdata[lamin]",
"ehrdata[lamin,vitessce]",
"ipykernel",
"ipython",
"myst-nb>=1.1",
"pandas",
"scanpydoc",
# Until pybtex >0.23.0 releases: https://bitbucket.org/pybtex-devs/pybtex/issues/169/
"setuptools",
"sphinx>=4",
Expand All @@ -62,10 +63,12 @@ optional-dependencies.lamin = [
]
optional-dependencies.test = [
"coverage",
"ehrdata[vitessce,lamin]",
"pytest",
]
optional-dependencies.vitessce = [
"vitessce[all]",
"vitessce[all]>=3.4", # the actual dependency
"zarr<3", # vitessce does not support zarr>=3
]
# https://docs.pypi.org/project_metadata/#project-urls
urls.Documentation = "https://ehrdata.readthedocs.io/"
Expand Down
7 changes: 6 additions & 1 deletion src/ehrdata/pl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
__all__ = ["BasicClass", "basic_plot"]
from importlib.util import find_spec

__all__ = ["BasicClass", "basic_plot", "vitessce"]

from .basic import BasicClass, basic_plot

if find_spec("vitessce"):
from . import vitessce
118 changes: 118 additions & 0 deletions src/ehrdata/pl/vitessce.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
from __future__ import annotations

from functools import reduce
from operator import or_, truediv
from pathlib import Path
from types import MappingProxyType
from typing import TYPE_CHECKING

from vitessce import AnnDataWrapper, VitessceConfig
from vitessce import Component as cm

if TYPE_CHECKING:
from collections.abc import Mapping

from lamindb import Artifact
from zarr.storage import Store


def gen_config(
path: Path | None = None,
*,
store: Path | Store | None = None,
url: str | None = None,
artifact: Artifact | None = None,
# arguments not about how the store goes in:
name: str | None = None,
obs_sets: Mapping[str, str] = MappingProxyType({"obs/gender_concept_id": "Gender Concept ID"}),
obs_embeddings: Mapping[str, str] = MappingProxyType({"obsm/X_pca": "PCA"}),
) -> VitessceConfig:
r"""Generate a VitessceConfig for EHRData.
Parameters
----------
path
Path to the data’s Zarr store directory.
store
The data’s Zarr store or a path to it.
url
URL pointing to the data’s remote Zarr store.
artifact
Lamin artifact representing the data.
name
Name of the dataset.
If `None`, derived from `path`.
obs_sets
Mapping of observation set paths to names, e.g.
`{"obs/some_annotation": "My cool annotation"}`
obs_embeddings
Mapping of observation embedding paths to names, e.g.
`{"obsm/X_pca": "PCA"}`
Returns
-------
A :doc:`Vitessce <vitessce:index>` configuration object.
Call .\ :meth:`~vitessce.config.VitessceConfig.widget` on it to display it.
"""
obs_type = "person"
feature_type = "variable"

if name is None:
if artifact is not None:
name = artifact.description
elif path is not None:
name = path.stem
else:
msg = "`name` needs to be specified or derived from `path` or `artifact`."
raise ValueError(msg)

coordination = {
"obsType": obs_type,
"featureType": feature_type,
}

wrapper = AnnDataWrapper(
adata_path=path,
adata_url=url,
# vitessce is old and doesn’t deal with proper Paths
adata_store=str(store) if isinstance(store, Path) else store,
adata_artifact=artifact,
obs_set_paths=list(obs_sets.keys()),
obs_set_names=list(obs_sets.values()),
obs_embedding_paths=list(obs_embeddings.keys()),
obs_embedding_names=list(obs_embeddings.values()),
obs_feature_matrix_path="X",
coordination_values=coordination,
)

vc = VitessceConfig(schema_version="1.0.15", name=name)
dataset = vc.add_dataset(name=name).add_object(wrapper)

views = (
(
vc.add_view(cm.OBS_SETS, dataset=dataset),
vc.add_view(cm.OBS_SET_SIZES, dataset=dataset),
vc.add_view(cm.OBS_SET_FEATURE_VALUE_DISTRIBUTION, dataset=dataset),
),
(
vc.add_view(cm.FEATURE_LIST, dataset=dataset),
vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping="PCA"),
vc.add_view(cm.FEATURE_VALUE_HISTOGRAM, dataset=dataset),
),
(
vc.add_view(cm.DESCRIPTION, dataset=dataset),
vc.add_view(cm.STATUS, dataset=dataset),
vc.add_view(cm.HEATMAP, dataset=dataset),
),
)

vc.link_views(
[view for row in views for view in row],
list(coordination.keys()),
list(coordination.values()),
)

# (a / b / c) | (d / e / f) | ...
vc.layout(reduce(or_, (reduce(truediv, row) for row in views)))

return vc
25 changes: 25 additions & 0 deletions tests/test_vitessce.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import numpy as np
import pytest
from anndata import AnnData

from ehrdata.pl.vitessce import gen_config


@pytest.fixture
def adata() -> AnnData:
return AnnData(
X=np.array([[1, 2, 3], [4, 5, 6]]),
obs={"gender_concept_id": ["M", "F"]},
obsm={"X_pca": np.array([[1, 2], [3, 4]])},
)


def test_gen_config(adata, tmp_path):
adata.write_zarr(path := tmp_path / "test.zarr")
gen_config(path)


# needs more setup until it works
# def test_gen_config_lamin(adata):
# artifact = ln.Artifact.from_anndata(adata, description="Test AnnData")
# gen_config(artifact=artifact)

0 comments on commit 932d2d6

Please sign in to comment.