From b4f218456e95afcaec73d27ac5bf44e37f9f738b Mon Sep 17 00:00:00 2001 From: sherwoodf <161822064+sherwoodf@users.noreply.github.com> Date: Tue, 23 Jul 2024 16:47:55 +0100 Subject: [PATCH] created bia-export base package (#125) * created bia-export base package * updated README --- bia-export/.vscode/settings.json | 15 ++++ bia-export/README.md | 36 ++++++++ bia-export/bia_export/__init__.py | 0 bia-export/bia_export/cli.py | 35 ++++++++ bia-export/bia_export/website_conversion.py | 50 +++++++++++ bia-export/bia_export/website_models.py | 11 +++ bia-export/pyproject.toml | 33 +++++++ bia-export/test/conftest.py | 1 + .../64a67727-4e7c-469a-91c4-6219ae072e99.json | 21 +++++ .../6950718c-4917-47a1-a807-11b874e80a23.json | 21 +++++ .../47a4ab60-c76d-4424-bfaa-c2a024de720c.json | 26 ++++++ .../c2e44a1b-a43c-476e-8ddf-8587f4c955b3.json | 8 ++ .../7199d730-29f1-4ad8-b599-e9089cbb2d7b.json | 6 ++ .../test/input_data/studies/S-BIADTEST.json | 75 ++++++++++++++++ bia-export/test/output_data/bia_export.json | 88 +++++++++++++++++++ bia-export/test/test_local_convert.py | 21 +++++ 16 files changed, 447 insertions(+) create mode 100644 bia-export/.vscode/settings.json create mode 100644 bia-export/README.md create mode 100644 bia-export/bia_export/__init__.py create mode 100644 bia-export/bia_export/cli.py create mode 100644 bia-export/bia_export/website_conversion.py create mode 100644 bia-export/bia_export/website_models.py create mode 100644 bia-export/pyproject.toml create mode 100644 bia-export/test/conftest.py create mode 100644 bia-export/test/input_data/biosample/S-BIADTEST/64a67727-4e7c-469a-91c4-6219ae072e99.json create mode 100644 bia-export/test/input_data/biosample/S-BIADTEST/6950718c-4917-47a1-a807-11b874e80a23.json create mode 100644 bia-export/test/input_data/experimental_imaging_datasets/S-BIADTEST/47a4ab60-c76d-4424-bfaa-c2a024de720c.json create mode 100644 bia-export/test/input_data/image_acquisitions/S-BIADTEST/c2e44a1b-a43c-476e-8ddf-8587f4c955b3.json create mode 100644 bia-export/test/input_data/specimen_imaging_preparation_protocols/S-BIADTEST/7199d730-29f1-4ad8-b599-e9089cbb2d7b.json create mode 100644 bia-export/test/input_data/studies/S-BIADTEST.json create mode 100644 bia-export/test/output_data/bia_export.json create mode 100644 bia-export/test/test_local_convert.py diff --git a/bia-export/.vscode/settings.json b/bia-export/.vscode/settings.json new file mode 100644 index 00000000..9ff30146 --- /dev/null +++ b/bia-export/.vscode/settings.json @@ -0,0 +1,15 @@ +{ + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true + }, + "[json]": { + "editor.insertSpaces": true, + "editor.tabSize": 2 + } +} \ No newline at end of file diff --git a/bia-export/README.md b/bia-export/README.md new file mode 100644 index 00000000..8fcea7bc --- /dev/null +++ b/bia-export/README.md @@ -0,0 +1,36 @@ +BIA Export +========== + +Export data from the BIA to feed static pages, and other downstream consumers. This: + +* Selects attributes for studies stored in local files +* Transforms to a specific export format +* Writes the result to a JSON file + +The expectation is to use this on the output from the bia-ingest package, that can cache the documents that will be uploaded to the api as local files. + +This does not yet: + +* Cover images, or even complete study metadata +* Pulls data from the BIA Integrator API +* Derives information from OME-Zarr representations (physical dimensions, axis sizes) + +Installation +------------ + +1. Clone the repository. +2. Run `poetry install` + +Setup +----- + +None required post installation + +Usage +----- + +Run: + + poetry run bia-export website-study S-BIADTEST -o bia_export.json -r test/input_data + +This will create `bia-export.json` using the example test data. \ No newline at end of file diff --git a/bia-export/bia_export/__init__.py b/bia-export/bia_export/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bia-export/bia_export/cli.py b/bia-export/bia_export/cli.py new file mode 100644 index 00000000..d6a2a487 --- /dev/null +++ b/bia-export/bia_export/cli.py @@ -0,0 +1,35 @@ +import typer +import logging +from rich.logging import RichHandler +from typing_extensions import Annotated +from pathlib import Path +from .website_conversion import create_study + +logging.basicConfig( + level="NOTSET", format="%(message)s", datefmt="[%X]", handlers=[RichHandler()] +) +logger = logging.getLogger() + +app = typer.Typer() + + +@app.command() +def website_study( + accession_id: Annotated[str, typer.Argument(help="Accession ID of the study to export")], + root_directory: Annotated[Path, typer.Option("--root", "-r", help="If root directory specified then use files there, rather than calling API")] = None, + output_filename: Annotated[Path, typer.Option("--out_file", "-o",)] = Path("bia-images-export.json") + ): + + abs_root = root_directory.resolve() + study = create_study(accession_id, abs_root) + + with open(output_filename, "w") as output: + output.write(study.model_dump_json(indent=4)) + + +@app.command() +def website_image(): + pass + +if __name__ == "__main__": + app() \ No newline at end of file diff --git a/bia-export/bia_export/website_conversion.py b/bia-export/bia_export/website_conversion.py new file mode 100644 index 00000000..fae766b5 --- /dev/null +++ b/bia-export/bia_export/website_conversion.py @@ -0,0 +1,50 @@ + +from pathlib import Path +import json +import logging +from .website_models import ( + Study, + ExperimentalImagingDataset +) +from glob import glob +from typing import List + +logger = logging.getLogger(__name__) + +def create_study( + accession_id: str, + root_directory: Path +) -> Study: + if root_directory: + study_path = root_directory.joinpath(f'studies/{accession_id}.json') + + logger.info(f'Loading study from {study_path}') + + with open(study_path, "r") as study_file: + study_dict = json.load(study_file) + + study_dict["experimental_imaging_component"] = convert_experimental_imaging_datasets(accession_id, root_directory) + + study = Study(**study_dict) + + return study + + +def convert_experimental_imaging_datasets(accession_id: str, root_directory: Path = None) -> List[ExperimentalImagingDataset]: + datasets = [] + if root_directory: + + eid_directory = root_directory.joinpath(f'experimental_imaging_datasets/{accession_id}/*.json') + eid_paths = glob(str(eid_directory)) + + for eid_path in eid_paths: + + logger.info(f'Loading study from {eid_path}') + + with open(eid_path, "r") as eid_file: + eid_dict = json.load(eid_file) + eid = ExperimentalImagingDataset(**eid_dict) + datasets.append(eid) + + return datasets + diff --git a/bia-export/bia_export/website_models.py b/bia-export/bia_export/website_models.py new file mode 100644 index 00000000..8927da8a --- /dev/null +++ b/bia-export/bia_export/website_models.py @@ -0,0 +1,11 @@ +from __future__ import annotations +from pydantic import Field +from typing import List, Optional +from bia_shared_datamodels import bia_data_model, semantic_models + + +class Study(semantic_models.Study, bia_data_model.DocumentMixin): + experimental_imaging_component: Optional[List[ExperimentalImagingDataset]] = Field(default_factory=list, description="""A dataset of that is associated with the study.""") + +class ExperimentalImagingDataset(semantic_models.ExperimentalImagingDataset, bia_data_model.DocumentMixin): + pass \ No newline at end of file diff --git a/bia-export/pyproject.toml b/bia-export/pyproject.toml new file mode 100644 index 00000000..4747b621 --- /dev/null +++ b/bia-export/pyproject.toml @@ -0,0 +1,33 @@ +[tool.poetry] +name = "bia-export" +version = "0.1.0" +description = "BIA export functionality" +authors = ["Matthew Hartley "] +readme = "README.md" +packages = [{include = "bia_export"}] + +[tool.poetry.scripts] +bia-export = "bia_export.cli:app" + +[tool.poetry.dependencies] +python = "^3.10,<3.12" +pydantic = "^2" +bia-shared-datamodels = { path = "../bia-shared-datamodels", develop = true } +zarr = "^2.16.1" +fsspec = "^2023.10.0" +requests = "^2.31.0" +aiohttp = "^3.9.1" +python-dotenv = "^1.0.0" +typer = "^0.9.0" +rich = "^13.7.0" +ruamel-yaml = "^0.18.5" + + +[tool.poetry.group.dev.dependencies] +ipython = "^8.22.2" +pytest = "^7.4.3" +pytest-mock = "^3.14.0" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/bia-export/test/conftest.py b/bia-export/test/conftest.py new file mode 100644 index 00000000..5871ed8e --- /dev/null +++ b/bia-export/test/conftest.py @@ -0,0 +1 @@ +import pytest diff --git a/bia-export/test/input_data/biosample/S-BIADTEST/64a67727-4e7c-469a-91c4-6219ae072e99.json b/bia-export/test/input_data/biosample/S-BIADTEST/64a67727-4e7c-469a-91c4-6219ae072e99.json new file mode 100644 index 00000000..a2ec6aa4 --- /dev/null +++ b/bia-export/test/input_data/biosample/S-BIADTEST/64a67727-4e7c-469a-91c4-6219ae072e99.json @@ -0,0 +1,21 @@ +{ + "title_id": "Test Biosample 1", + "uuid": "64a67727-4e7c-469a-91c4-6219ae072e99", + "organism_classification": [ + { + "common_name": "human", + "scientific_name": "Homo sapiens", + "ncbi_id": null + } + ], + "biological_entity_description": "Test biological entity 1", + "experimental_variable_description": [ + "Test experimental entity 1" + ], + "extrinsic_variable_description": [ + "Test extrinsic variable 1" + ], + "intrinsic_variable_description": [ + "Test intrinsic variable 1\\nwith escaped character" + ] +} \ No newline at end of file diff --git a/bia-export/test/input_data/biosample/S-BIADTEST/6950718c-4917-47a1-a807-11b874e80a23.json b/bia-export/test/input_data/biosample/S-BIADTEST/6950718c-4917-47a1-a807-11b874e80a23.json new file mode 100644 index 00000000..d74e079e --- /dev/null +++ b/bia-export/test/input_data/biosample/S-BIADTEST/6950718c-4917-47a1-a807-11b874e80a23.json @@ -0,0 +1,21 @@ +{ + "title_id": "Test Biosample 2 ", + "uuid": "6950718c-4917-47a1-a807-11b874e80a23", + "organism_classification": [ + { + "common_name": "mouse", + "scientific_name": "Mus musculus", + "ncbi_id": null + } + ], + "biological_entity_description": "Test biological entity 2", + "experimental_variable_description": [ + "Test experimental entity 2" + ], + "extrinsic_variable_description": [ + "Test extrinsic variable 2" + ], + "intrinsic_variable_description": [ + "Test intrinsic variable 2" + ] +} \ No newline at end of file diff --git a/bia-export/test/input_data/experimental_imaging_datasets/S-BIADTEST/47a4ab60-c76d-4424-bfaa-c2a024de720c.json b/bia-export/test/input_data/experimental_imaging_datasets/S-BIADTEST/47a4ab60-c76d-4424-bfaa-c2a024de720c.json new file mode 100644 index 00000000..7d62a574 --- /dev/null +++ b/bia-export/test/input_data/experimental_imaging_datasets/S-BIADTEST/47a4ab60-c76d-4424-bfaa-c2a024de720c.json @@ -0,0 +1,26 @@ +{ + "title_id": "Study Component 1", + "uuid": "47a4ab60-c76d-4424-bfaa-c2a024de720c", + "file_reference_count": 4, + "description": "Description of study component 1", + "acquisition_process": [ + "c2e44a1b-a43c-476e-8ddf-8587f4c955b3" + ], + "specimen_imaging_preparation_protocol": [ + "7199d730-29f1-4ad8-b599-e9089cbb2d7b" + ], + "biological_entity": [ + "64a67727-4e7c-469a-91c4-6219ae072e99", + "6950718c-4917-47a1-a807-11b874e80a23" + ], + "specimen_growth_protocol": [], + "analysis_method": [ + { + "protocol_description": "Test image analysis", + "features_analysed": "Test image analysis overview" + } + ], + "correlation_method": [], + "example_image_uri": [], + "image_count": 0 +} \ No newline at end of file diff --git a/bia-export/test/input_data/image_acquisitions/S-BIADTEST/c2e44a1b-a43c-476e-8ddf-8587f4c955b3.json b/bia-export/test/input_data/image_acquisitions/S-BIADTEST/c2e44a1b-a43c-476e-8ddf-8587f4c955b3.json new file mode 100644 index 00000000..175e697d --- /dev/null +++ b/bia-export/test/input_data/image_acquisitions/S-BIADTEST/c2e44a1b-a43c-476e-8ddf-8587f4c955b3.json @@ -0,0 +1,8 @@ +{ + "title_id": "Test Primary Screen Image Acquisition", + "uuid": "c2e44a1b-a43c-476e-8ddf-8587f4c955b3", + "protocol_description": "Test image acquisition parameters 1", + "imaging_instrument_description": "Test imaging instrument 1", + "fbbi_id": [], + "imaging_method_name": "confocal microscopy" +} \ No newline at end of file diff --git a/bia-export/test/input_data/specimen_imaging_preparation_protocols/S-BIADTEST/7199d730-29f1-4ad8-b599-e9089cbb2d7b.json b/bia-export/test/input_data/specimen_imaging_preparation_protocols/S-BIADTEST/7199d730-29f1-4ad8-b599-e9089cbb2d7b.json new file mode 100644 index 00000000..34de5ae8 --- /dev/null +++ b/bia-export/test/input_data/specimen_imaging_preparation_protocols/S-BIADTEST/7199d730-29f1-4ad8-b599-e9089cbb2d7b.json @@ -0,0 +1,6 @@ +{ + "title_id": "Test specimen 1", + "uuid": "7199d730-29f1-4ad8-b599-e9089cbb2d7b", + "protocol_description": "Test sample preparation protocol 1", + "signal_channel_information": [] +} \ No newline at end of file diff --git a/bia-export/test/input_data/studies/S-BIADTEST.json b/bia-export/test/input_data/studies/S-BIADTEST.json new file mode 100644 index 00000000..02c9edc1 --- /dev/null +++ b/bia-export/test/input_data/studies/S-BIADTEST.json @@ -0,0 +1,75 @@ +{ + "uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71", + "accession_id": "S-BIADTEST", + "licence": "CC0", + "author": [ + { + "rorid": null, + "address": null, + "website": null, + "orcid": "0000-0000-0000-0000", + "display_name": "Test Author1", + "affiliation": [ + { + "rorid": null, + "address": null, + "website": null, + "display_name": "Test College 1" + } + ], + "contact_email": "test_author1@ebi.ac.uk", + "role": "corresponding author" + }, + { + "rorid": null, + "address": null, + "website": null, + "orcid": "1111-1111-1111-1111", + "display_name": "Test Author2", + "affiliation": [ + { + "rorid": null, + "address": null, + "website": null, + "display_name": "Test College 2" + } + ], + "contact_email": "test_author2@ebi.ac.uk", + "role": "first author" + } + ], + "title": "A test submission with title greater than 25 characters", + "release_date": "2024-02-13", + "keyword": [ + "Test keyword1", + "Test keyword2", + "Test keyword3" + ], + "acknowledgement": "We thank you", + "description": "A test submission to allow testing without retrieving from bia server", + "see_also": [], + "related_publication": [], + "grant": [ + { + "id": "TESTFUNDS1", + "funder": [ + { + "display_name": "Test funding body1", + "id": null + } + ] + }, + { + "id": "TESTFUNDS2", + "funder": [ + { + "display_name": "Test funding body2", + "id": null + } + ] + } + ], + "funding_statement": "This work was funded by the EBI", + "annotation_component": [], + "attribute": {} +} \ No newline at end of file diff --git a/bia-export/test/output_data/bia_export.json b/bia-export/test/output_data/bia_export.json new file mode 100644 index 00000000..b74268d8 --- /dev/null +++ b/bia-export/test/output_data/bia_export.json @@ -0,0 +1,88 @@ +{ + "uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71", + "accession_id": "S-BIADTEST", + "licence": "CC0", + "author": [ + { + "rorid": null, + "address": null, + "website": null, + "orcid": "0000-0000-0000-0000", + "display_name": "Test Author1", + "affiliation": [ + { + "rorid": null, + "address": null, + "website": null, + "display_name": "Test College 1" + } + ], + "contact_email": "test_author1@ebi.ac.uk", + "role": "corresponding author" + }, + { + "rorid": null, + "address": null, + "website": null, + "orcid": "1111-1111-1111-1111", + "display_name": "Test Author2", + "affiliation": [ + { + "rorid": null, + "address": null, + "website": null, + "display_name": "Test College 2" + } + ], + "contact_email": "test_author2@ebi.ac.uk", + "role": "first author" + } + ], + "title": "A test submission with title greater than 25 characters", + "release_date": "2024-02-13", + "description": "A test submission to allow testing without retrieving from bia server", + "keyword": [ + "Test keyword1", + "Test keyword2", + "Test keyword3" + ], + "acknowledgement": "We thank you", + "see_also": [], + "related_publication": [], + "grant": [ + { + "id": "TESTFUNDS1", + "funder": [ + { + "display_name": "Test funding body1", + "id": null + } + ] + }, + { + "id": "TESTFUNDS2", + "funder": [ + { + "display_name": "Test funding body2", + "id": null + } + ] + } + ], + "funding_statement": "This work was funded by the EBI", + "attribute": {}, + "experimental_imaging_component": [ + { + "uuid": "47a4ab60-c76d-4424-bfaa-c2a024de720c", + "description": "Description of study component 1", + "analysis_method": [ + { + "protocol_description": "Test image analysis", + "features_analysed": "Test image analysis overview" + } + ], + "correlation_method": [], + "example_image_uri": [] + } + ] +} \ No newline at end of file diff --git a/bia-export/test/test_local_convert.py b/bia-export/test/test_local_convert.py new file mode 100644 index 00000000..ecef3cb2 --- /dev/null +++ b/bia-export/test/test_local_convert.py @@ -0,0 +1,21 @@ +from typer.testing import CliRunner +from pathlib import Path +import pytest +from bia_export.cli import app +import filecmp + +runner = CliRunner() + + + +def test_cli_export_website_studies(tmp_path): + input_root_path = Path(__file__).parent.joinpath("input_data") + expected_output = Path(__file__).parent.joinpath("output_data/bia_export.json") + outfile = tmp_path.joinpath('bia_export.json').resolve() + + result = runner.invoke(app, ["website-study", "S-BIADTEST", "-o", outfile, "-r", input_root_path]) + + + + assert result.exit_code == 0 + assert filecmp.cmp(expected_output, outfile, shallow=False)