created bia-export base package (#125)

* created bia-export base package * updated README
BioImage-Archive · Jul 23, 2024 · 07e87d0 · 07e87d0
1 parent 66527f0
commit 07e87d0
Show file tree

Hide file tree

Showing 16 changed files with 447 additions and 0 deletions.
diff --git a/bia-export/.vscode/settings.json b/bia-export/.vscode/settings.json
@@ -0,0 +1,15 @@
+{
+    "python.testing.pytestArgs": [
+        "."
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnSave": true
+    },
+    "[json]": {
+        "editor.insertSpaces": true,
+        "editor.tabSize": 2
+    }
+}
diff --git a/bia-export/README.md b/bia-export/README.md
@@ -0,0 +1,36 @@
+BIA Export
+==========
+
+Export data from the BIA to feed static pages, and other downstream consumers. This:
+
+* Selects attributes for studies stored in local files
+* Transforms to a specific export format
+* Writes the result to a JSON file
+
+The expectation is to use this on the output from the bia-ingest package, that can cache the documents that will be uploaded to the api as local files.
+
+This does not yet:
+
+* Cover images, or even complete study metadata
+* Pulls data from the BIA Integrator API
+* Derives information from OME-Zarr representations (physical dimensions, axis sizes)
+
+Installation
+------------
+
+1. Clone the repository.
+2. Run `poetry install`
+
+Setup
+-----
+
+None required post installation
+
+Usage
+-----
+
+Run:
+
+    poetry run bia-export website-study S-BIADTEST -o bia_export.json -r test/input_data 
+
+This will create `bia-export.json` using the example test data.
diff --git a/bia-export/bia_export/__init__.py b/bia-export/bia_export/__init__.py
diff --git a/bia-export/bia_export/cli.py b/bia-export/bia_export/cli.py
@@ -0,0 +1,35 @@
+import typer
+import logging
+from rich.logging import RichHandler
+from typing_extensions import Annotated
+from pathlib import Path
+from .website_conversion import create_study
+
+logging.basicConfig(
+    level="NOTSET", format="%(message)s", datefmt="[%X]", handlers=[RichHandler()]
+)
+logger = logging.getLogger()
+
+app = typer.Typer()
+
+
+@app.command()
+def website_study(
+    accession_id: Annotated[str, typer.Argument(help="Accession ID of the study to export")],
+    root_directory: Annotated[Path, typer.Option("--root", "-r", help="If root directory specified then use files there, rather than calling API")] = None,
+    output_filename: Annotated[Path, typer.Option("--out_file", "-o",)] = Path("bia-images-export.json")
+    ):
+
+    abs_root = root_directory.resolve()
+    study = create_study(accession_id, abs_root)
+
+    with open(output_filename, "w") as output:
+        output.write(study.model_dump_json(indent=4)) 
+
+
+@app.command()
+def website_image():
+    pass
+
+if __name__ == "__main__":
+    app()
diff --git a/bia-export/bia_export/website_conversion.py b/bia-export/bia_export/website_conversion.py
@@ -0,0 +1,50 @@
+
+from pathlib import Path
+import json
+import logging
+from .website_models import (
+    Study,
+    ExperimentalImagingDataset
+)
+from glob import glob
+from typing import List
+
+logger = logging.getLogger(__name__)
+
+def create_study(
+        accession_id: str,
+        root_directory: Path
+) -> Study:
+    if root_directory:
+        study_path = root_directory.joinpath(f'studies/{accession_id}.json')
+
+        logger.info(f'Loading study from {study_path}')
+
+        with open(study_path, "r") as study_file:
+            study_dict = json.load(study_file)
+
+        study_dict["experimental_imaging_component"] = convert_experimental_imaging_datasets(accession_id, root_directory)
+
+        study = Study(**study_dict)
+
+        return study
+
+
+def convert_experimental_imaging_datasets(accession_id: str, root_directory: Path = None) -> List[ExperimentalImagingDataset]:
+    datasets = []
+    if root_directory:
+
+        eid_directory = root_directory.joinpath(f'experimental_imaging_datasets/{accession_id}/*.json')
+        eid_paths = glob(str(eid_directory))
+
+        for eid_path in eid_paths:
+
+            logger.info(f'Loading study from {eid_path}')
+
+            with open(eid_path, "r") as eid_file:
+                eid_dict = json.load(eid_file)
+            eid = ExperimentalImagingDataset(**eid_dict)
+            datasets.append(eid)
+
+    return datasets
+
diff --git a/bia-export/bia_export/website_models.py b/bia-export/bia_export/website_models.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+from pydantic import Field
+from typing import List, Optional
+from bia_shared_datamodels import bia_data_model, semantic_models
+
+
+class Study(semantic_models.Study, bia_data_model.DocumentMixin):
+    experimental_imaging_component: Optional[List[ExperimentalImagingDataset]] = Field(default_factory=list, description="""A dataset of that is associated with the study.""")
+
+class ExperimentalImagingDataset(semantic_models.ExperimentalImagingDataset, bia_data_model.DocumentMixin):
+    pass
diff --git a/bia-export/pyproject.toml b/bia-export/pyproject.toml
@@ -0,0 +1,33 @@
+[tool.poetry]
+name = "bia-export"
+version = "0.1.0"
+description = "BIA export functionality"
+authors = ["Matthew Hartley <[email protected]>"]
+readme = "README.md"
+packages = [{include = "bia_export"}]
+
+[tool.poetry.scripts]
+bia-export = "bia_export.cli:app"
+
+[tool.poetry.dependencies]
+python = "^3.10,<3.12"
+pydantic = "^2"
+bia-shared-datamodels = { path = "../bia-shared-datamodels", develop = true }
+zarr = "^2.16.1"
+fsspec = "^2023.10.0"
+requests = "^2.31.0"
+aiohttp = "^3.9.1"
+python-dotenv = "^1.0.0"
+typer = "^0.9.0"
+rich = "^13.7.0"
+ruamel-yaml = "^0.18.5"
+
+
+[tool.poetry.group.dev.dependencies]
+ipython = "^8.22.2"
+pytest = "^7.4.3"
+pytest-mock = "^3.14.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/bia-export/test/conftest.py b/bia-export/test/conftest.py
@@ -0,0 +1 @@
+import pytest
diff --git a/bia-export/test/input_data/biosample/S-BIADTEST/64a67727-4e7c-469a-91c4-6219ae072e99.json b/bia-export/test/input_data/biosample/S-BIADTEST/64a67727-4e7c-469a-91c4-6219ae072e99.json
@@ -0,0 +1,21 @@
+{
+    "title_id": "Test Biosample 1",
+    "uuid": "64a67727-4e7c-469a-91c4-6219ae072e99",
+    "organism_classification": [
+        {
+            "common_name": "human",
+            "scientific_name": "Homo sapiens",
+            "ncbi_id": null
+        }
+    ],
+    "biological_entity_description": "Test biological entity 1",
+    "experimental_variable_description": [
+        "Test experimental entity 1"
+    ],
+    "extrinsic_variable_description": [
+        "Test extrinsic variable 1"
+    ],
+    "intrinsic_variable_description": [
+        "Test intrinsic variable 1\\nwith escaped character"
+    ]
+}
diff --git a/bia-export/test/input_data/biosample/S-BIADTEST/6950718c-4917-47a1-a807-11b874e80a23.json b/bia-export/test/input_data/biosample/S-BIADTEST/6950718c-4917-47a1-a807-11b874e80a23.json
@@ -0,0 +1,21 @@
+{
+    "title_id": "Test Biosample 2 ",
+    "uuid": "6950718c-4917-47a1-a807-11b874e80a23",
+    "organism_classification": [
+        {
+            "common_name": "mouse",
+            "scientific_name": "Mus musculus",
+            "ncbi_id": null
+        }
+    ],
+    "biological_entity_description": "Test biological entity 2",
+    "experimental_variable_description": [
+        "Test experimental entity 2"
+    ],
+    "extrinsic_variable_description": [
+        "Test extrinsic variable 2"
+    ],
+    "intrinsic_variable_description": [
+        "Test intrinsic variable 2"
+    ]
+}
diff --git a/...t_data/experimental_imaging_datasets/S-BIADTEST/47a4ab60-c76d-4424-bfaa-c2a024de720c.json b/...t_data/experimental_imaging_datasets/S-BIADTEST/47a4ab60-c76d-4424-bfaa-c2a024de720c.json
@@ -0,0 +1,26 @@
+{
+    "title_id": "Study Component 1",
+    "uuid": "47a4ab60-c76d-4424-bfaa-c2a024de720c",
+    "file_reference_count": 4,
+    "description": "Description of study component 1",
+    "acquisition_process": [
+        "c2e44a1b-a43c-476e-8ddf-8587f4c955b3"
+    ],
+    "specimen_imaging_preparation_protocol": [
+        "7199d730-29f1-4ad8-b599-e9089cbb2d7b"
+    ],
+    "biological_entity": [
+        "64a67727-4e7c-469a-91c4-6219ae072e99",
+        "6950718c-4917-47a1-a807-11b874e80a23"
+    ],
+    "specimen_growth_protocol": [],
+    "analysis_method": [
+        {
+            "protocol_description": "Test image analysis",
+            "features_analysed": "Test image analysis overview"
+        }
+    ],
+    "correlation_method": [],
+    "example_image_uri": [],
+    "image_count": 0
+}
diff --git a/...t/test/input_data/image_acquisitions/S-BIADTEST/c2e44a1b-a43c-476e-8ddf-8587f4c955b3.json b/...t/test/input_data/image_acquisitions/S-BIADTEST/c2e44a1b-a43c-476e-8ddf-8587f4c955b3.json
@@ -0,0 +1,8 @@
+{
+    "title_id": "Test Primary Screen Image Acquisition",
+    "uuid": "c2e44a1b-a43c-476e-8ddf-8587f4c955b3",
+    "protocol_description": "Test image acquisition parameters 1",
+    "imaging_instrument_description": "Test imaging instrument 1",
+    "fbbi_id": [],
+    "imaging_method_name": "confocal microscopy"
+}
diff --git a/...ecimen_imaging_preparation_protocols/S-BIADTEST/7199d730-29f1-4ad8-b599-e9089cbb2d7b.json b/...ecimen_imaging_preparation_protocols/S-BIADTEST/7199d730-29f1-4ad8-b599-e9089cbb2d7b.json
@@ -0,0 +1,6 @@
+{
+    "title_id": "Test specimen 1",
+    "uuid": "7199d730-29f1-4ad8-b599-e9089cbb2d7b",
+    "protocol_description": "Test sample preparation protocol 1",
+    "signal_channel_information": []
+}
diff --git a/bia-export/test/input_data/studies/S-BIADTEST.json b/bia-export/test/input_data/studies/S-BIADTEST.json
@@ -0,0 +1,75 @@
+{
+    "uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71",
+    "accession_id": "S-BIADTEST",
+    "licence": "CC0",
+    "author": [
+        {
+            "rorid": null,
+            "address": null,
+            "website": null,
+            "orcid": "0000-0000-0000-0000",
+            "display_name": "Test Author1",
+            "affiliation": [
+                {
+                    "rorid": null,
+                    "address": null,
+                    "website": null,
+                    "display_name": "Test College 1"
+                }
+            ],
+            "contact_email": "[email protected]",
+            "role": "corresponding author"
+        },
+        {
+            "rorid": null,
+            "address": null,
+            "website": null,
+            "orcid": "1111-1111-1111-1111",
+            "display_name": "Test Author2",
+            "affiliation": [
+                {
+                    "rorid": null,
+                    "address": null,
+                    "website": null,
+                    "display_name": "Test College 2"
+                }
+            ],
+            "contact_email": "[email protected]",
+            "role": "first author"
+        }
+    ],
+    "title": "A test submission with title greater than 25 characters",
+    "release_date": "2024-02-13",
+    "keyword": [
+        "Test keyword1",
+        "Test keyword2",
+        "Test keyword3"
+    ],
+    "acknowledgement": "We thank you",
+    "description": "A test submission to allow testing without retrieving from bia server",
+    "see_also": [],
+    "related_publication": [],
+    "grant": [
+        {
+            "id": "TESTFUNDS1",
+            "funder": [
+                {
+                    "display_name": "Test funding body1",
+                    "id": null
+                }
+            ]
+        },
+        {
+            "id": "TESTFUNDS2",
+            "funder": [
+                {
+                    "display_name": "Test funding body2",
+                    "id": null
+                }
+            ]
+        }
+    ],
+    "funding_statement": "This work was funded by the EBI",
+    "annotation_component": [],
+    "attribute": {}
+}