Skip to content

Commit

Permalink
created bia-export base package (#125)
Browse files Browse the repository at this point in the history
* created bia-export base package

* updated README
  • Loading branch information
sherwoodf authored Jul 23, 2024
1 parent 66527f0 commit 07e87d0
Show file tree
Hide file tree
Showing 16 changed files with 447 additions and 0 deletions.
15 changes: 15 additions & 0 deletions bia-export/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true
},
"[json]": {
"editor.insertSpaces": true,
"editor.tabSize": 2
}
}
36 changes: 36 additions & 0 deletions bia-export/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
BIA Export
==========

Export data from the BIA to feed static pages, and other downstream consumers. This:

* Selects attributes for studies stored in local files
* Transforms to a specific export format
* Writes the result to a JSON file

The expectation is to use this on the output from the bia-ingest package, that can cache the documents that will be uploaded to the api as local files.

This does not yet:

* Cover images, or even complete study metadata
* Pulls data from the BIA Integrator API
* Derives information from OME-Zarr representations (physical dimensions, axis sizes)

Installation
------------

1. Clone the repository.
2. Run `poetry install`

Setup
-----

None required post installation

Usage
-----

Run:

poetry run bia-export website-study S-BIADTEST -o bia_export.json -r test/input_data

This will create `bia-export.json` using the example test data.
Empty file.
35 changes: 35 additions & 0 deletions bia-export/bia_export/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import typer
import logging
from rich.logging import RichHandler
from typing_extensions import Annotated
from pathlib import Path
from .website_conversion import create_study

logging.basicConfig(
level="NOTSET", format="%(message)s", datefmt="[%X]", handlers=[RichHandler()]
)
logger = logging.getLogger()

app = typer.Typer()


@app.command()
def website_study(
accession_id: Annotated[str, typer.Argument(help="Accession ID of the study to export")],
root_directory: Annotated[Path, typer.Option("--root", "-r", help="If root directory specified then use files there, rather than calling API")] = None,
output_filename: Annotated[Path, typer.Option("--out_file", "-o",)] = Path("bia-images-export.json")
):

abs_root = root_directory.resolve()
study = create_study(accession_id, abs_root)

with open(output_filename, "w") as output:
output.write(study.model_dump_json(indent=4))


@app.command()
def website_image():
pass

if __name__ == "__main__":
app()
50 changes: 50 additions & 0 deletions bia-export/bia_export/website_conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

from pathlib import Path
import json
import logging
from .website_models import (
Study,
ExperimentalImagingDataset
)
from glob import glob
from typing import List

logger = logging.getLogger(__name__)

def create_study(
accession_id: str,
root_directory: Path
) -> Study:
if root_directory:
study_path = root_directory.joinpath(f'studies/{accession_id}.json')

logger.info(f'Loading study from {study_path}')

with open(study_path, "r") as study_file:
study_dict = json.load(study_file)

study_dict["experimental_imaging_component"] = convert_experimental_imaging_datasets(accession_id, root_directory)

study = Study(**study_dict)

return study


def convert_experimental_imaging_datasets(accession_id: str, root_directory: Path = None) -> List[ExperimentalImagingDataset]:
datasets = []
if root_directory:

eid_directory = root_directory.joinpath(f'experimental_imaging_datasets/{accession_id}/*.json')
eid_paths = glob(str(eid_directory))

for eid_path in eid_paths:

logger.info(f'Loading study from {eid_path}')

with open(eid_path, "r") as eid_file:
eid_dict = json.load(eid_file)
eid = ExperimentalImagingDataset(**eid_dict)
datasets.append(eid)

return datasets

11 changes: 11 additions & 0 deletions bia-export/bia_export/website_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from __future__ import annotations
from pydantic import Field
from typing import List, Optional
from bia_shared_datamodels import bia_data_model, semantic_models


class Study(semantic_models.Study, bia_data_model.DocumentMixin):
experimental_imaging_component: Optional[List[ExperimentalImagingDataset]] = Field(default_factory=list, description="""A dataset of that is associated with the study.""")

class ExperimentalImagingDataset(semantic_models.ExperimentalImagingDataset, bia_data_model.DocumentMixin):
pass
33 changes: 33 additions & 0 deletions bia-export/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
[tool.poetry]
name = "bia-export"
version = "0.1.0"
description = "BIA export functionality"
authors = ["Matthew Hartley <[email protected]>"]
readme = "README.md"
packages = [{include = "bia_export"}]

[tool.poetry.scripts]
bia-export = "bia_export.cli:app"

[tool.poetry.dependencies]
python = "^3.10,<3.12"
pydantic = "^2"
bia-shared-datamodels = { path = "../bia-shared-datamodels", develop = true }
zarr = "^2.16.1"
fsspec = "^2023.10.0"
requests = "^2.31.0"
aiohttp = "^3.9.1"
python-dotenv = "^1.0.0"
typer = "^0.9.0"
rich = "^13.7.0"
ruamel-yaml = "^0.18.5"


[tool.poetry.group.dev.dependencies]
ipython = "^8.22.2"
pytest = "^7.4.3"
pytest-mock = "^3.14.0"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
1 change: 1 addition & 0 deletions bia-export/test/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import pytest
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"title_id": "Test Biosample 1",
"uuid": "64a67727-4e7c-469a-91c4-6219ae072e99",
"organism_classification": [
{
"common_name": "human",
"scientific_name": "Homo sapiens",
"ncbi_id": null
}
],
"biological_entity_description": "Test biological entity 1",
"experimental_variable_description": [
"Test experimental entity 1"
],
"extrinsic_variable_description": [
"Test extrinsic variable 1"
],
"intrinsic_variable_description": [
"Test intrinsic variable 1\\nwith escaped character"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"title_id": "Test Biosample 2 ",
"uuid": "6950718c-4917-47a1-a807-11b874e80a23",
"organism_classification": [
{
"common_name": "mouse",
"scientific_name": "Mus musculus",
"ncbi_id": null
}
],
"biological_entity_description": "Test biological entity 2",
"experimental_variable_description": [
"Test experimental entity 2"
],
"extrinsic_variable_description": [
"Test extrinsic variable 2"
],
"intrinsic_variable_description": [
"Test intrinsic variable 2"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"title_id": "Study Component 1",
"uuid": "47a4ab60-c76d-4424-bfaa-c2a024de720c",
"file_reference_count": 4,
"description": "Description of study component 1",
"acquisition_process": [
"c2e44a1b-a43c-476e-8ddf-8587f4c955b3"
],
"specimen_imaging_preparation_protocol": [
"7199d730-29f1-4ad8-b599-e9089cbb2d7b"
],
"biological_entity": [
"64a67727-4e7c-469a-91c4-6219ae072e99",
"6950718c-4917-47a1-a807-11b874e80a23"
],
"specimen_growth_protocol": [],
"analysis_method": [
{
"protocol_description": "Test image analysis",
"features_analysed": "Test image analysis overview"
}
],
"correlation_method": [],
"example_image_uri": [],
"image_count": 0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"title_id": "Test Primary Screen Image Acquisition",
"uuid": "c2e44a1b-a43c-476e-8ddf-8587f4c955b3",
"protocol_description": "Test image acquisition parameters 1",
"imaging_instrument_description": "Test imaging instrument 1",
"fbbi_id": [],
"imaging_method_name": "confocal microscopy"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"title_id": "Test specimen 1",
"uuid": "7199d730-29f1-4ad8-b599-e9089cbb2d7b",
"protocol_description": "Test sample preparation protocol 1",
"signal_channel_information": []
}
75 changes: 75 additions & 0 deletions bia-export/test/input_data/studies/S-BIADTEST.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
{
"uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71",
"accession_id": "S-BIADTEST",
"licence": "CC0",
"author": [
{
"rorid": null,
"address": null,
"website": null,
"orcid": "0000-0000-0000-0000",
"display_name": "Test Author1",
"affiliation": [
{
"rorid": null,
"address": null,
"website": null,
"display_name": "Test College 1"
}
],
"contact_email": "[email protected]",
"role": "corresponding author"
},
{
"rorid": null,
"address": null,
"website": null,
"orcid": "1111-1111-1111-1111",
"display_name": "Test Author2",
"affiliation": [
{
"rorid": null,
"address": null,
"website": null,
"display_name": "Test College 2"
}
],
"contact_email": "[email protected]",
"role": "first author"
}
],
"title": "A test submission with title greater than 25 characters",
"release_date": "2024-02-13",
"keyword": [
"Test keyword1",
"Test keyword2",
"Test keyword3"
],
"acknowledgement": "We thank you",
"description": "A test submission to allow testing without retrieving from bia server",
"see_also": [],
"related_publication": [],
"grant": [
{
"id": "TESTFUNDS1",
"funder": [
{
"display_name": "Test funding body1",
"id": null
}
]
},
{
"id": "TESTFUNDS2",
"funder": [
{
"display_name": "Test funding body2",
"id": null
}
]
}
],
"funding_statement": "This work was funded by the EBI",
"annotation_component": [],
"attribute": {}
}
Loading

0 comments on commit 07e87d0

Please sign in to comment.