Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Model updates, and some conversion logic #123

Merged
merged 8 commits into from
Jul 22, 2024
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import logging
from typing import List, Any, Dict
from .utils import (
dicts_to_api_models,
find_sections_recursive,
dict_to_uuid,
persist
)
from ..biostudies import (
Submission,
attributes_to_dict,
)
from src.bia_models import bia_data_model

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


def get_annotation_method(
submission: Submission, persist_artefacts=False
) -> List[bia_data_model.Specimen]:

annotation_method_model_dicts = extract_annotation_method_dicts(submission)
annotation_methods = dicts_to_api_models(annotation_method_model_dicts, bia_data_model.AnnotationMethod)

if persist_artefacts and annotation_methods:
persist(annotation_methods, "annotation_method", submission.accno)

return annotation_methods


def extract_annotation_method_dicts(submission: Submission) -> List[Dict[str, Any]]:
annotation_sections = find_sections_recursive(submission.section, ["Annotations"], [])

key_mapping = [
("title_id", "Name", ""),
("protocol_description", "Annotation overview", ""),
("annotation_criteria", "Annotation criteria", ""),
("annotation_coverage", "Annotation coverage", ""),
("method_type", "Annotation method", "other"),
]

model_dicts = []
for section in annotation_sections:
attr_dict = attributes_to_dict(section.attributes)

model_dict = {k: attr_dict.get(v, default) for k, v, default in key_mapping}

# TODO: change template to get source dataset information
model_dict["source_dataset"] = []

model_dict["accno"] = section.__dict__.get("accno", "")
model_dict["accession_id"] = submission.accno
model_dict["uuid"] = generate_annotation_method_uuid(model_dict)
model_dicts.append(model_dict)

return model_dicts


def generate_annotation_method_uuid(protocol_dict: Dict[str, Any]) -> str:
attributes_to_consider = [
"accession_id",
"accno",
"title_id",
"protocol_description",
"annotation_criteria",
"annotation_coverage",
"method_type",
"source_dataset"
]
return dict_to_uuid(protocol_dict, attributes_to_consider)
20 changes: 5 additions & 15 deletions bia-ingest-shared-models/bia_ingest_sm/conversion/biosample.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import logging
from pathlib import Path
from typing import List, Any, Dict
from .utils import (
dicts_to_api_models,
find_sections_recursive,
dict_to_uuid
dict_to_uuid,
persist
)
from ..biostudies import (
Submission,
attributes_to_dict,
)
from ..config import settings
from src.bia_models import bia_data_model, semantic_models

logger = logging.getLogger(__name__)
Expand All @@ -25,14 +24,7 @@ def get_biosample(
biosamples = dicts_to_api_models(biosample_model_dicts, bia_data_model.BioSample)

if persist_artefacts and biosamples:
output_dir = Path(settings.bia_data_dir) / "biosamples" / submission.accno
if not output_dir.is_dir():
output_dir.mkdir(parents=True)
logger.info(f"Created {output_dir}")
for biosample in biosamples:
output_path = output_dir / f"{biosample.uuid}.json"
output_path.write_text(biosample.model_dump_json(indent=2))
logger.info(f"Written {output_path}")
persist(biosamples, "biosamples", submission.accno)
return biosamples


Expand All @@ -41,7 +33,7 @@ def extract_biosample_dicts(submission: Submission) -> List[Dict[str, Any]]:

key_mapping = [
("title_id", "Title", ""),
("description", "Description", ""),
("biological_entity_description", "Biological entity", ""),
("organism", "Organism", ""),
]

Expand Down Expand Up @@ -93,9 +85,7 @@ def generate_biosample_uuid(biosample_dict: Dict[str, Any]) -> str:
"accno",
"title_id",
"organism_classification",
"description",
# TODO: Discuss including below in semantic_models.BioSample
# "biological_entity",
"biological_entity_description",
"intrinsic_variable_description",
"extrinsic_variable_description",
"experimental_variable_description",
Expand Down
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to revisit the logic of this file in light of the fact that we are not storing the list of file_references anymore. We may have to trigger the generation of file_references after obtaining the uuid for the experimental dataset, so we can pass this to the function that creates file_references, allowing them to point to their parent expermental imaging dataset.

Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
find_sections_recursive,
get_generic_section_as_list,
dict_to_uuid,
get_generic_section_as_dict
get_generic_section_as_dict,
persist
)
import bia_ingest_sm.conversion.biosample as biosample_conversion
import bia_ingest_sm.conversion.study as study_conversion
Expand Down Expand Up @@ -69,10 +70,11 @@ def get_experimental_imaging_dataset(
)

analysis_method_list = []
biosample_list = []
image_acquisition_method_list = []
correlation_method_list = []
specimen_preparation_method_list = []
biosample_list = []
image_acquisition_list = []
specimen_preparation_protocol_list = []
specimen_growth_protocol_list = []

if len(associations) > 0:
# Image Analysis Method
Expand All @@ -81,7 +83,7 @@ def get_experimental_imaging_dataset(
]
for analysis_method in analysis_method_dict.values():
if (
analysis_method.method_description
analysis_method.protocol_description
in analysis_methods_from_associations
):
analysis_method_list.append(analysis_method)
Expand All @@ -96,12 +98,11 @@ def get_experimental_imaging_dataset(
study_component_file_references = file_reference_uuids.get(section_name, [])
model_dict = {
"title_id": section_name,
# "description": attr_dict["Description"],
"description": attr_dict["Description"],
"submitted_in_study": study_conversion.get_study_uuid(submission),
"file": study_component_file_references,
"image": [],
"specimen_preparation_method": specimen_preparation_method_list,
"acquisition_method": image_acquisition_method_list,
"specimen_imaging_preparation_protocol": specimen_preparation_protocol_list,
"acquisition_process": image_acquisition_list,
"specimen_growth_protocol": specimen_growth_protocol_list,
"biological_entity": biosample_list,
"analysis_method": analysis_method_list,
"correlation_method": correlation_method_list,
Expand All @@ -118,18 +119,7 @@ def get_experimental_imaging_dataset(
)

if persist_artefacts and experimental_imaging_dataset:
output_dir = (
Path(settings.bia_data_dir)
/ "experimental_imaging_datasets"
/ submission.accno
)
if not output_dir.is_dir():
output_dir.mkdir(parents=True)
logger.info(f"Created {output_dir}")
for dataset in experimental_imaging_dataset:
output_path = output_dir / f"{dataset.uuid}.json"
output_path.write_text(dataset.model_dump_json(indent=2))
logger.info(f"Written {output_path}")
persist(experimental_imaging_dataset, "experimental_imaging_dataset", submission.accno)

return experimental_imaging_dataset

Expand All @@ -139,7 +129,7 @@ def get_image_analysis_method(
) -> Dict[str, semantic_models.ImageAnalysisMethod]:

key_mapping = [
("method_description", "Title", None,),
("protocol_description", "Title", None,),
("features_analysed", "Image analysis overview", None,),
]

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import logging
from typing import List, Any, Dict
from .utils import (
dicts_to_api_models,
find_sections_recursive,
dict_to_uuid,
persist
)
from ..biostudies import (
Submission,
attributes_to_dict,
)
from src.bia_models import bia_data_model

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


def get_image_acquisition(
submission: Submission, persist_artefacts=False
) -> List[bia_data_model.Specimen]:

image_acquisition_model_dicts = extract_image_acquisition_dicts(submission)
image_acquisitions = dicts_to_api_models(image_acquisition_model_dicts, bia_data_model.ImageAcquisition)

if persist_artefacts and image_acquisitions:
persist(image_acquisitions, "specimen_growth_protocol", submission.accno)

return image_acquisitions


def extract_image_acquisition_dicts(submission: Submission) -> List[Dict[str, Any]]:
acquisition_sections = find_sections_recursive(submission.section, ["Image acquisition"], [])

key_mapping = [
("title_id", "Title", ""),
("protocol_description", "Image acquisition parameters", ""),
("imaging_instrument_description", "Imaging instrument", ""),
("imaging_method_name", "Imaging method", ""),
]

model_dicts = []
for section in acquisition_sections:
attr_dict = attributes_to_dict(section.attributes)

model_dict = {k: attr_dict.get(v, default) for k, v, default in key_mapping}

# TODO: change template / create logic to lookup the fbbi ID
model_dict["fbbi_id"] = []

model_dict["accno"] = section.__dict__.get("accno", "")
model_dict["accession_id"] = submission.accno
model_dict["uuid"] = generate_image_acquisition_uuid(model_dict)
model_dicts.append(model_dict)

return model_dicts


def generate_image_acquisition_uuid(protocol_dict: Dict[str, Any]) -> str:
attributes_to_consider = [
"accession_id",
"accno",
"title_id",
"protocol_description",
"imaging_instrument_description",
"imaging_method_name",
"fbbi_id"
]
return dict_to_uuid(protocol_dict, attributes_to_consider)
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import logging
from typing import List, Any, Dict
from .utils import (
dicts_to_api_models,
find_sections_recursive,
dict_to_uuid,
persist
)
from ..biostudies import (
Submission,
attributes_to_dict,
)
from src.bia_models import bia_data_model

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


def get_specimen_growth_protocol(
submission: Submission, persist_artefacts=False
) -> List[bia_data_model.Specimen]:

specimen_growth_protocol_model_dicts = extract_specimen_growth_protocol_dicts(submission)
specimen_growth_protocols = dicts_to_api_models(specimen_growth_protocol_model_dicts, bia_data_model.SpecimenGrowthProtocol)

if persist_artefacts and specimen_growth_protocols:
persist(specimen_growth_protocols, "specimen_growth_protocol", submission.accno)

return specimen_growth_protocols


def extract_specimen_growth_protocol_dicts(submission: Submission) -> List[Dict[str, Any]]:
specimen_sections = find_sections_recursive(submission.section, ["Specimen"], [])

key_mapping = [
("title_id", "Title", ""),
("protocol_description", "Growth protocol", ""),
]

model_dicts = []
for section in specimen_sections:
attr_dict = attributes_to_dict(section.attributes)

model_dict = {k: attr_dict.get(v, default) for k, v, default in key_mapping}

model_dict["accno"] = section.__dict__.get("accno", "")
model_dict["accession_id"] = submission.accno
model_dict["uuid"] = generate_specimen_growth_protocol_uuid(model_dict)
model_dicts.append(model_dict)

return model_dicts


def generate_specimen_growth_protocol_uuid(protocol_dict: Dict[str, Any]) -> str:
attributes_to_consider = [
"accession_id",
"accno",
"title_id",
"protocol_description",
]
return dict_to_uuid(protocol_dict, attributes_to_consider)
Loading
Loading