From b8a10ac7cce8d501d74fd4e7d40eae3f6f0cea87 Mon Sep 17 00:00:00 2001 From: Francois Date: Tue, 6 Aug 2024 14:27:35 +0100 Subject: [PATCH 1/5] added default_open field to details section of datasets --- bia-export/bia_export/website_conversion.py | 162 +++++++++++++++----- bia-export/bia_export/website_models.py | 44 +++++- bia-export/test/output_data/bia_export.json | 53 +++++++ 3 files changed, 210 insertions(+), 49 deletions(-) diff --git a/bia-export/bia_export/website_conversion.py b/bia-export/bia_export/website_conversion.py index 3ca135bf..56e5ccb1 100644 --- a/bia-export/bia_export/website_conversion.py +++ b/bia-export/bia_export/website_conversion.py @@ -1,10 +1,13 @@ - from pathlib import Path import json import logging from .website_models import ( Study, - ExperimentalImagingDataset + ExperimentalImagingDataset, + ImageAcquisition, + BioSample, + SpecimenGrowthProtocol, + SpecimenImagingPrepartionProtocol, ) from glob import glob from typing import List, Type @@ -20,11 +23,13 @@ def read_api_json_file(file_path: Path, object_type: Type[BaseModel]) -> BaseMod """ with open(file_path, "r") as object_file: object_dict = json.load(object_file) - + return object_type(**object_dict) -def read_all_json(directory_path: Path, object_type: Type[BaseModel]) -> List[BaseModel]: +def read_all_json( + directory_path: Path, object_type: Type[BaseModel] +) -> List[BaseModel]: object_list = [] file_paths = glob(str(directory_path)) for file_path in file_paths: @@ -32,76 +37,151 @@ def read_all_json(directory_path: Path, object_type: Type[BaseModel]) -> List[Ba return object_list -def find_associated_objects(typed_associations: set, directory_path: Path, object_type: Type[BaseModel]) -> List[bia_data_model.UserIdentifiedObject]: +def find_associated_objects( + typed_associations: set, + directory_path: Path, + object_type: Type[bia_data_model.UserIdentifiedObject], +) -> List[dict]: linked_object = [] if len(typed_associations) == 0: return linked_object - - typed_object_in_study: List[bia_data_model.UserIdentifiedObject] = read_all_json(directory_path, object_type) + + typed_object_in_study: List[bia_data_model.UserIdentifiedObject] = read_all_json( + directory_path, object_type + ) for object in typed_object_in_study: if object.title_id in typed_associations: - linked_object.append(object) + linked_object.append(object.model_dump()) return linked_object -def create_study( - accession_id: str, - root_directory: Path -) -> Study: - +def create_study(accession_id: str, root_directory: Path) -> Study: + if root_directory: - study_path = root_directory.joinpath(f'studies/{accession_id}.json') + study_path = root_directory.joinpath(f"studies/{accession_id}.json") + + logger.info(f"Loading study from {study_path}") - logger.info(f'Loading study from {study_path}') - api_study = read_api_json_file(study_path, bia_data_model.Study) else: - #TODO: use client + # TODO: use client raise NotImplementedError study_dict = api_study.model_dump() - study_dict["experimental_imaging_component"] = create_experimental_imaging_datasets(accession_id, root_directory) + study_dict["experimental_imaging_component"] = create_experimental_imaging_datasets( + accession_id, root_directory + ) study = Study(**study_dict) return study -def create_experimental_imaging_datasets(accession_id: str, root_directory: Path = None) -> List[ExperimentalImagingDataset]: +def create_experimental_imaging_datasets( + accession_id: str, root_directory: Path = None +) -> List[ExperimentalImagingDataset]: eid_list = [] if root_directory: - - eid_directory = root_directory.joinpath(f'experimental_imaging_datasets/{accession_id}/*.json') - api_eids: List[bia_data_model.ExperimentalImagingDataset] = read_all_json(eid_directory, bia_data_model.ExperimentalImagingDataset) - + eid_directory = root_directory.joinpath( + f"experimental_imaging_datasets/{accession_id}/*.json" + ) + + api_eids: List[bia_data_model.ExperimentalImagingDataset] = read_all_json( + eid_directory, bia_data_model.ExperimentalImagingDataset + ) + + detail_map = { + ImageAcquisition: { + "source_directory": "image_acquisitions", + "association_field": "image_acquisition", + "bia_type": bia_data_model.ImageAcquisition, + "previously_displayed": set(), + }, + BioSample: { + "source_directory": "biosamples", + "association_field": "biosample", + "bia_type": bia_data_model.BioSample, + "previously_displayed": set(), + }, + SpecimenImagingPrepartionProtocol: { + "source_directory": "specimen_imaging_preparation_protocols", + "association_field": "specimen", + "bia_type": bia_data_model.SpecimenImagingPrepartionProtocol, + "previously_displayed": set(), + }, + SpecimenGrowthProtocol: { + "source_directory": "specimen_growth_protocols", + "association_field": "specimen", + "bia_type": bia_data_model.SpecimenGrowthProtocol, + "previously_displayed": set(), + }, + } + + def process_details_section( + root_directory: Path, + accession_id: str, + detail_map_info: dict, + typed_associations: set, + ): + + detail_path = root_directory.joinpath( + f"{detail_map_info['source_directory']}/{accession_id}/*.json" + ) + detail_linked_to_dataset = find_associated_objects( + typed_associations, + detail_path, + detail_map_info["bia_type"], + ) + + for detail in detail_linked_to_dataset: + if detail["uuid"] not in detail_map_info["previously_displayed"]: + detail["default_open"] = True + detail_map_info["previously_displayed"].add(detail["uuid"]) + else: + detail["default_open"] = False + + return detail_linked_to_dataset + for eid in api_eids: eid_dict = eid.model_dump() associations = eid.attribute["associations"] - association_by_type = {"biosample": set(), "image_acquisition": set(), "specimen": set()} + association_by_type = { + "biosample": set(), + "image_acquisition": set(), + "specimen": set(), + } for association in associations: for key in association_by_type.keys(): association_by_type[key].add(association[key]) - - biosample_directory = root_directory.joinpath(f'biosamples/{accession_id}/*.json') - sipp_directory = root_directory.joinpath(f'specimen_imaging_preparation_protocols/{accession_id}/*.json') - sgp_directory = root_directory.joinpath(f'specimen_growth_protocols/{accession_id}/*.json') - ia_directory = root_directory.joinpath(f'image_acquisitions/{accession_id}/*.json') - - biosample_linked_to_dataset = find_associated_objects(association_by_type["biosample"], biosample_directory, bia_data_model.BioSample) - sipp_linked_to_dataset = find_associated_objects(association_by_type["specimen"], sipp_directory, bia_data_model.SpecimenImagingPrepartionProtocol) - sgps_linked_to_dataset = find_associated_objects(association_by_type["specimen"], sgp_directory, bia_data_model.SpecimenGrowthProtocol) - ia_linked_to_dataset = find_associated_objects(association_by_type["image_acquisition"], ia_directory, bia_data_model.ImageAcquisition) - - eid_dict["biological_entity"] = biosample_linked_to_dataset - eid_dict["specimen_imaging_preparation_protocol"] = sipp_linked_to_dataset - eid_dict["specimen_growth_protocol"] = sgps_linked_to_dataset - eid_dict["acquisition_process"] = ia_linked_to_dataset + eid_dict["biological_entity"] = process_details_section( + root_directory, + accession_id, + detail_map[BioSample], + association_by_type["biosample"], + ) + eid_dict["specimen_imaging_preparation_protocol"] = process_details_section( + root_directory, + accession_id, + detail_map[SpecimenImagingPrepartionProtocol], + association_by_type["specimen"], + ) + eid_dict["specimen_growth_protocol"] = process_details_section( + root_directory, + accession_id, + detail_map[BioSample], + association_by_type["specimen"], + ) + eid_dict["acquisition_process"] = process_details_section( + root_directory, + accession_id, + detail_map[ImageAcquisition], + association_by_type["image_acquisition"], + ) eid = ExperimentalImagingDataset(**eid_dict) eid_list.append(eid) - + return eid_list - diff --git a/bia-export/bia_export/website_models.py b/bia-export/bia_export/website_models.py index 91f4d0bd..bcb72ea3 100644 --- a/bia-export/bia_export/website_models.py +++ b/bia-export/bia_export/website_models.py @@ -1,23 +1,51 @@ from __future__ import annotations -from pydantic import Field +from pydantic import Field, BaseModel from typing import List, Optional from bia_shared_datamodels import bia_data_model class Study(bia_data_model.Study): - experimental_imaging_component: Optional[List[ExperimentalImagingDataset]] = Field(default_factory=list, description="""A dataset of that is associated with the study.""") + experimental_imaging_component: Optional[List[ExperimentalImagingDataset]] = Field( + default_factory=list, + description="""A dataset of that is associated with the study.""", + ) + class ExperimentalImagingDataset(bia_data_model.ExperimentalImagingDataset): - acquisition_process: list[bia_data_model.ImageAcquisition] = Field( + acquisition_process: list[ImageAcquisition] = Field( description="""Processes involved in the creation of the images and files in this dataset.""" ) - specimen_imaging_preparation_protocol: list[bia_data_model.SpecimenImagingPrepartionProtocol] = Field( - description="""Processes involved in the preprapartion of the samples for imaged.""" + specimen_imaging_preparation_protocol: list[SpecimenImagingPrepartionProtocol] = ( + Field( + description="""Processes involved in the preprapartion of the samples for imaged.""" + ) ) - biological_entity: list[bia_data_model.BioSample] = Field( + biological_entity: list[BioSample] = Field( description="""The biological entity or entities that were imaged.""" ) - specimen_growth_protocol: Optional[list[bia_data_model.SpecimenGrowthProtocol]] = Field( - description="""Processes involved in the growth of the samples that were then imaged.""" + specimen_growth_protocol: Optional[list[SpecimenGrowthProtocol]] = Field( + default_factory=list, + description="""Processes involved in the growth of the samples that were then imaged.""", ) + +class DetailSection(BaseModel): + default_open: bool = Field() + + +class ImageAcquisition(bia_data_model.ImageAcquisition, DetailSection): + pass + + +class BioSample(bia_data_model.BioSample, DetailSection): + pass + + +class SpecimenGrowthProtocol(bia_data_model.SpecimenGrowthProtocol, DetailSection): + pass + + +class SpecimenImagingPrepartionProtocol( + bia_data_model.SpecimenImagingPrepartionProtocol, DetailSection +): + pass diff --git a/bia-export/test/output_data/bia_export.json b/bia-export/test/output_data/bia_export.json index 0f04e1f3..f7da1f20 100644 --- a/bia-export/test/output_data/bia_export.json +++ b/bia-export/test/output_data/bia_export.json @@ -111,6 +111,7 @@ "submitted_in_study_uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71", "acquisition_process": [ { + "default_open": true, "title_id": "Test Primary Screen Image Acquisition", "uuid": "c2e44a1b-a43c-476e-8ddf-8587f4c955b3", "version": 1, @@ -126,6 +127,7 @@ ], "specimen_imaging_preparation_protocol": [ { + "default_open": true, "title_id": "Test specimen 1", "uuid": "7199d730-29f1-4ad8-b599-e9089cbb2d7b", "version": 1, @@ -139,6 +141,7 @@ ], "biological_entity": [ { + "default_open": true, "title_id": "Test Biosample 1", "uuid": "64a67727-4e7c-469a-91c4-6219ae072e99", "version": 1, @@ -165,6 +168,7 @@ ] }, { + "default_open": true, "title_id": "Test Biosample 2", "uuid": "6950718c-4917-47a1-a807-11b874e80a23", "version": 1, @@ -192,6 +196,55 @@ } ], "specimen_growth_protocol": [] + }, + { + "title_id": "Study Component 2", + "uuid": "850a1ca3-9681-4a8a-b625-477936fcb954", + "version": 1, + "model": { + "type_name": "ExperimentalImagingDataset", + "version": 1 + }, + "description": "Description of study component 2", + "attribute": { + "associations": [ + { + "image_analysis": "Test image analysis", + "image_correlation": null, + "biosample": "Test Biosample 2 ", + "image_acquisition": "Test Primary Screen Image Acquisition", + "specimen": "Test specimen 2" + } + ] + }, + "analysis_method": [ + { + "protocol_description": "Test image analysis", + "features_analysed": "Test image analysis overview" + } + ], + "correlation_method": [], + "example_image_uri": [], + "submitted_in_study_uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71", + "acquisition_process": [ + { + "default_open": false, + "title_id": "Test Primary Screen Image Acquisition", + "uuid": "c2e44a1b-a43c-476e-8ddf-8587f4c955b3", + "version": 1, + "model": { + "type_name": "ImageAcquisition", + "version": 1 + }, + "protocol_description": "Test image acquisition parameters 1", + "imaging_instrument_description": "Test imaging instrument 1", + "fbbi_id": [], + "imaging_method_name": "confocal microscopy" + } + ], + "specimen_imaging_preparation_protocol": [], + "biological_entity": [], + "specimen_growth_protocol": [] } ] } \ No newline at end of file From 799fac4d0d9f50000fd266c379a6a2f0f6b670cd Mon Sep 17 00:00:00 2001 From: Francois Date: Tue, 6 Aug 2024 14:37:20 +0100 Subject: [PATCH 2/5] forgot to add additional experimental imaging dataset file --- .../850a1ca3-9681-4a8a-b625-477936fcb954.json | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 bia-export/test/input_data/experimental_imaging_datasets/S-BIADTEST/850a1ca3-9681-4a8a-b625-477936fcb954.json diff --git a/bia-export/test/input_data/experimental_imaging_datasets/S-BIADTEST/850a1ca3-9681-4a8a-b625-477936fcb954.json b/bia-export/test/input_data/experimental_imaging_datasets/S-BIADTEST/850a1ca3-9681-4a8a-b625-477936fcb954.json new file mode 100644 index 00000000..1a1944b9 --- /dev/null +++ b/bia-export/test/input_data/experimental_imaging_datasets/S-BIADTEST/850a1ca3-9681-4a8a-b625-477936fcb954.json @@ -0,0 +1,30 @@ +{ + "title_id": "Study Component 2", + "uuid": "850a1ca3-9681-4a8a-b625-477936fcb954", + "version": 1, + "model": { + "type_name": "ExperimentalImagingDataset", + "version": 1 + }, + "description": "Description of study component 2", + "attribute": { + "associations": [ + { + "image_analysis": "Test image analysis", + "image_correlation": null, + "biosample": "Test Biosample 2 ", + "image_acquisition": "Test Primary Screen Image Acquisition", + "specimen": "Test specimen 2" + } + ] + }, + "analysis_method": [ + { + "protocol_description": "Test image analysis", + "features_analysed": "Test image analysis overview" + } + ], + "correlation_method": [], + "example_image_uri": [], + "submitted_in_study_uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71" +} \ No newline at end of file From 2571efcb3da9d0fc2040f4d35c317e2c3195605e Mon Sep 17 00:00:00 2001 From: Francois Date: Tue, 6 Aug 2024 15:18:54 +0100 Subject: [PATCH 3/5] test pytest testing. Revert me --- .github/workflows/core.yaml | 2 +- bia-export/test/test_local_convert.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/core.yaml b/.github/workflows/core.yaml index 14bd31dc..86aca18e 100644 --- a/.github/workflows/core.yaml +++ b/.github/workflows/core.yaml @@ -39,4 +39,4 @@ jobs: - name: Check to see if poetry can build run: poetry build - name: Run pytest - run: poetry run pytest + run: poetry run pytest -s diff --git a/bia-export/test/test_local_convert.py b/bia-export/test/test_local_convert.py index 4e85a250..95ddbd9e 100644 --- a/bia-export/test/test_local_convert.py +++ b/bia-export/test/test_local_convert.py @@ -3,10 +3,11 @@ import pytest from bia_export.cli import app import filecmp +import logging -runner = CliRunner() - +LOGGER = logging.getLogger(__name__) +runner = CliRunner() def test_cli_export_website_studies(tmp_path): input_root_path = Path(__file__).parent.joinpath("input_data") @@ -15,8 +16,11 @@ def test_cli_export_website_studies(tmp_path): result = runner.invoke(app, ["website-study", "S-BIADTEST", "-o", outfile, "-r", input_root_path]) - - assert result.exit_code == 0 # Note this tests for exact equivance in files, i.e. order of fields and indentation matters + with open(expected_output) as expected_json: + print(expected_json.read()) + with open(outfile) as out_json: + print(out_json.read()) + assert filecmp.cmp(expected_output, outfile, shallow=False) From 89990099c2ef889b8127ae2ce4eed1ffbddb661e Mon Sep 17 00:00:00 2001 From: Francois Date: Tue, 6 Aug 2024 15:41:47 +0100 Subject: [PATCH 4/5] Revert "test pytest testing. Revert me" This reverts commit 2571efcb3da9d0fc2040f4d35c317e2c3195605e. --- .github/workflows/core.yaml | 2 +- bia-export/test/test_local_convert.py | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/core.yaml b/.github/workflows/core.yaml index 86aca18e..14bd31dc 100644 --- a/.github/workflows/core.yaml +++ b/.github/workflows/core.yaml @@ -39,4 +39,4 @@ jobs: - name: Check to see if poetry can build run: poetry build - name: Run pytest - run: poetry run pytest -s + run: poetry run pytest diff --git a/bia-export/test/test_local_convert.py b/bia-export/test/test_local_convert.py index 95ddbd9e..4e85a250 100644 --- a/bia-export/test/test_local_convert.py +++ b/bia-export/test/test_local_convert.py @@ -3,12 +3,11 @@ import pytest from bia_export.cli import app import filecmp -import logging - -LOGGER = logging.getLogger(__name__) runner = CliRunner() + + def test_cli_export_website_studies(tmp_path): input_root_path = Path(__file__).parent.joinpath("input_data") expected_output = Path(__file__).parent.joinpath("output_data/bia_export.json") @@ -16,11 +15,8 @@ def test_cli_export_website_studies(tmp_path): result = runner.invoke(app, ["website-study", "S-BIADTEST", "-o", outfile, "-r", input_root_path]) + + assert result.exit_code == 0 # Note this tests for exact equivance in files, i.e. order of fields and indentation matters - with open(expected_output) as expected_json: - print(expected_json.read()) - with open(outfile) as out_json: - print(out_json.read()) - assert filecmp.cmp(expected_output, outfile, shallow=False) From 8432beae5e12557c0310f7bc5e88a9e2590db137 Mon Sep 17 00:00:00 2001 From: Francois Date: Tue, 6 Aug 2024 15:57:21 +0100 Subject: [PATCH 5/5] sort glob output to improve reproducability of output json --- bia-export/bia_export/website_conversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bia-export/bia_export/website_conversion.py b/bia-export/bia_export/website_conversion.py index 56e5ccb1..e9ba2339 100644 --- a/bia-export/bia_export/website_conversion.py +++ b/bia-export/bia_export/website_conversion.py @@ -31,7 +31,7 @@ def read_all_json( directory_path: Path, object_type: Type[BaseModel] ) -> List[BaseModel]: object_list = [] - file_paths = glob(str(directory_path)) + file_paths = sorted(glob(str(directory_path))) for file_path in file_paths: object_list.append(read_api_json_file(file_path, object_type)) return object_list