diff --git a/bia-ingest/bia_ingest/bia_object_creation_utils.py b/bia-ingest/bia_ingest/bia_object_creation_utils.py index 9611a588..501f8945 100644 --- a/bia-ingest/bia_ingest/bia_object_creation_utils.py +++ b/bia-ingest/bia_ingest/bia_object_creation_utils.py @@ -1,12 +1,9 @@ -from pathlib import Path -from uuid import UUID from pydantic import BaseModel, ValidationError import hashlib import uuid from typing import Any, Dict, List, Type from bia_ingest.cli_logging import IngestionResult, log_failed_model_creation -from bia_ingest.config import settings def filter_model_dictionary(dictionary: dict, target_model: Type[BaseModel]): @@ -30,7 +27,7 @@ def dicts_to_api_models( dicts: List[Dict[str, Any]], api_model_class: Type[BaseModel], valdiation_error_tracking: IngestionResult, -) -> BaseModel: +) -> List[BaseModel]: """ This function instantiates any API model given a dict of its attributes Hence the use of the pydantic BaseModel which all API models are derived from in the type hinting diff --git a/bia-ingest/bia_ingest/cli.py b/bia-ingest/bia_ingest/cli.py index cac708c5..e11c7930 100644 --- a/bia-ingest/bia_ingest/cli.py +++ b/bia-ingest/bia_ingest/cli.py @@ -7,7 +7,6 @@ from bia_ingest.ingest.study import get_study from bia_ingest.ingest.dataset import get_dataset from bia_ingest.ingest.file_reference import get_file_reference_by_dataset -from bia_ingest.ingest.specimen import get_specimen from bia_ingest.ingest.image_acquisition_protocol import get_image_acquisition_protocol from bia_ingest.ingest.annotation_method import get_annotation_method from bia_ingest.persistence_strategy import ( @@ -76,6 +75,9 @@ def ingest( get_study(submission, result_summary, persister=persister) + # Specimen, BioSample and Protocol (specimen growth protocol) depend on Dataset + # Specimen (note - this is very different from Biostudies.Specimen) artefacts are processed as part of bia_data_models.Dataset + # BioSamples are processed as part of Specimen and specimen growth protocol (Protocol) are processed as part of BioSample datasets = get_dataset(submission, result_summary, persister=persister) process_files = determine_file_processing( @@ -95,10 +97,6 @@ def ingest( get_image_acquisition_protocol(submission, result_summary, persister=persister) - # Specimen - # Biosample and Specimen artefacts are processed as part of bia_data_models.Specimen (note - this is very different from Biostudies.Specimen) - get_specimen(submission, result_summary, persister=persister) - get_annotation_method(submission, result_summary, persister=persister) # typer.echo(study.model_dump_json(indent=2)) diff --git a/bia-ingest/bia_ingest/ingest/biosample.py b/bia-ingest/bia_ingest/ingest/biosample.py index ce7661df..023be286 100644 --- a/bia-ingest/bia_ingest/ingest/biosample.py +++ b/bia-ingest/bia_ingest/ingest/biosample.py @@ -1,6 +1,11 @@ import logging from typing import List, Any, Dict, Optional +from bia_ingest.ingest.generic_conversion_utils import get_associations_for_section +from bia_ingest.ingest.specimen_growth_protocol import ( + get_specimen_growth_protocol, +) + from ..bia_object_creation_utils import ( dict_to_uuid, dicts_to_api_models, @@ -30,10 +35,6 @@ def get_biosample( persister: Optional[PersistenceStrategy] = None, ) -> List[bia_data_model.BioSample]: biosample_model_dicts = extract_biosample_dicts(submission) - # - # growth_protocols = get_specimen_growth_protocol( - # submission, result_summary, persister - # ) biosamples = dicts_to_api_models( biosample_model_dicts, @@ -52,7 +53,113 @@ def get_biosample( return biosamples -def extract_biosample_dicts(submission: Submission) -> List[Dict[str, Any]]: +# TODO: Rewrite this function. What we need is +# get_biosample_for_association https://app.clickup.com/t/8696nan92 +def get_biosample_by_study_component( + submission: Submission, + result_summary: dict, + persister: Optional[PersistenceStrategy] = None, +) -> Dict[str, bia_data_model.BioSample]: + """Return biosample associated with growth protocol for s.component + + Return a dict with study component title as key and biosample as + value. The biosample will be associated with the growth protocol + for the study component if one exists. + """ + + biosample_model_dicts = extract_biosample_dicts(submission, filter_dict=False) + + # Get growth protocols as UUIDs needed in biosample + # If we are persisting this call ensures the growth protocols + # are created and persisted. + growth_protocols = get_specimen_growth_protocol( + submission, result_summary, persister + ) + growth_protocol_title_to_uuid_map = { + gp.title_id: gp.uuid for gp in growth_protocols + } + + # Get associations to allow mapping to biosample + study_components = find_sections_recursive( + submission.section, + [ + "Study Component", + ], + [], + ) + + biosample_by_study_component = {} + for study_component in study_components: + study_component_name = next( + attr.value for attr in study_component.attributes if attr.name == "Name" + ) + if study_component_name not in biosample_by_study_component: + biosample_by_study_component[study_component_name] = [] + associations = get_associations_for_section(study_component) + for association in associations: + biosample_title = association.get("biosample", None) + specimen_title = association.get("specimen", None) + growth_protocol_uuid = None + if biosample_title and specimen_title: + growth_protocol_uuid = growth_protocol_title_to_uuid_map.get( + specimen_title, None + ) + elif biosample_title: + logger.warning( + f"Could not find specimen association for biosample {biosample_title} in study component {study_component_name}" + ) + else: + # This is to be expected in some cases. E.g. Annotation datasets ... + logger.warning( + f"Could not find biosample for study component {study_component_name}" + ) + continue + + # Attach specimen growth protocol uuid and recompute biosample uuid + # Currently assuming there should be only one growth protocol + # per biosample AND biosample titles are unique + # TODO: Log warning if above is not true. + biosample_model_dict = next( + model_dict + for model_dict in biosample_model_dicts + if model_dict["title_id"] == biosample_title + ) + if growth_protocol_uuid: + biosample_model_dict["growth_protocol_uuid"] = growth_protocol_uuid + biosample_model_dict["uuid"] = generate_biosample_uuid( + biosample_model_dict + ) + biosample_model_dict = filter_model_dictionary( + biosample_model_dict, bia_data_model.BioSample + ) + biosample_model = dicts_to_api_models( + [ + biosample_model_dict, + ], + bia_data_model.BioSample, + result_summary[submission.accno], + ) + biosample_by_study_component[study_component_name].append( + biosample_model[0] + ) + + # Save unique biosample models + biosamples = {} + for biosample_list in biosample_by_study_component.values(): + biosamples |= {biosample.uuid: biosample for biosample in biosample_list} + biosamples = list(biosamples.values()) + if persister and biosamples: + persister.persist(biosamples) + log_model_creation_count( + bia_data_model.BioSample, len(biosamples), result_summary[submission.accno] + ) + return biosample_by_study_component + + +def extract_biosample_dicts( + submission: Submission, + filter_dict: bool = True, +) -> List[Dict[str, Any]]: biosample_sections = find_sections_recursive(submission.section, ["Biosample"], []) key_mapping = [ @@ -111,9 +218,11 @@ def extract_biosample_dicts(submission: Submission) -> List[Dict[str, Any]]: model_dict[api_key].append(attr_dict[biostudies_key]) model_dict["accession_id"] = submission.accno + model_dict["growth_protocol_uuid"] = None model_dict["uuid"] = generate_biosample_uuid(model_dict) model_dict["version"] = 0 - model_dict = filter_model_dictionary(model_dict, bia_data_model.BioSample) + if filter_dict: + model_dict = filter_model_dictionary(model_dict, bia_data_model.BioSample) model_dicts.append(model_dict) return model_dicts @@ -129,5 +238,6 @@ def generate_biosample_uuid(biosample_dict: Dict[str, Any]) -> str: "intrinsic_variable_description", "extrinsic_variable_description", "experimental_variable_description", + "growth_protocol_uuid", ] return dict_to_uuid(biosample_dict, attributes_to_consider) diff --git a/bia-ingest/bia_ingest/ingest/dataset.py b/bia-ingest/bia_ingest/ingest/dataset.py index 6cf258b9..8e89871f 100644 --- a/bia-ingest/bia_ingest/ingest/dataset.py +++ b/bia-ingest/bia_ingest/ingest/dataset.py @@ -10,7 +10,7 @@ from ..cli_logging import log_failed_model_creation, log_model_creation_count from .generic_conversion_utils import ( - get_generic_section_as_list, + get_associations_for_section, get_generic_section_as_dict, ) import bia_ingest.ingest.study as study_conversion @@ -58,40 +58,7 @@ def get_dataset( datasets = [] for section in study_components: attr_dict = attributes_to_dict(section.attributes) - key_mapping = [ - ( - "image_analysis", - "Image analysis", - None, - ), - ( - "image_correlation", - "Image correlation", - None, - ), - ( - "biosample", - "Biosample", - None, - ), - ( - "image_acquisition", - "Image acquisition", - None, - ), - ( - "specimen", - "Specimen", - None, - ), - ] - associations = get_generic_section_as_list( - section, - [ - "Associations", - ], - key_mapping, - ) + associations = get_associations_for_section(section) analysis_method_list = [] @@ -163,7 +130,9 @@ def get_dataset( ) dataset.attribute.append(acquisition_process_uuid_as_attr) - subject = get_specimen_for_dataset(submission, dataset, result_summary) + subject = get_specimen_for_dataset( + submission, dataset, result_summary, persister + ) if subject: subject_uuid_attr_dict = { "provenance": semantic_models.AttributeProvenance("bia_ingest"), diff --git a/bia-ingest/bia_ingest/ingest/generic_conversion_utils.py b/bia-ingest/bia_ingest/ingest/generic_conversion_utils.py index 9b37effe..16a0d119 100644 --- a/bia-ingest/bia_ingest/ingest/generic_conversion_utils.py +++ b/bia-ingest/bia_ingest/ingest/generic_conversion_utils.py @@ -24,7 +24,7 @@ def get_generic_section_as_list( root: Submission | Section, section_name: List[str], - key_mapping: List[Tuple[str, str, str | None | List]], + key_mapping: List[Tuple[str, str, Union[str, None, List]]], mapped_object: Optional[BaseModel] = None, mapped_attrs_dict: Optional[Dict[str, Any]] = None, valdiation_error_tracking: Optional[IngestionResult] = None, @@ -134,3 +134,44 @@ def object_value_pair_to_dict( object_dict[key].append(obj) return object_dict + + +def get_associations_for_section( + section: Section, +) -> List[BaseModel | Dict[str, str | List[str]]]: + """Return the associations for a section (assume Study Component)""" + key_mapping = [ + ( + "image_analysis", + "Image analysis", + None, + ), + ( + "image_correlation", + "Image correlation", + None, + ), + ( + "biosample", + "Biosample", + None, + ), + ( + "image_acquisition", + "Image acquisition", + None, + ), + ( + "specimen", + "Specimen", + None, + ), + ] + associations = get_generic_section_as_list( + section, + [ + "Associations", + ], + key_mapping, + ) + return associations diff --git a/bia-ingest/bia_ingest/ingest/specimen.py b/bia-ingest/bia_ingest/ingest/specimen.py index cac46c1e..0d28a278 100644 --- a/bia-ingest/bia_ingest/ingest/specimen.py +++ b/bia-ingest/bia_ingest/ingest/specimen.py @@ -9,9 +9,8 @@ filter_model_dictionary, ) -from ..cli_logging import log_failed_model_creation, log_model_creation_count +from ..cli_logging import log_model_creation_count from ..persistence_strategy import PersistenceStrategy -from pydantic import ValidationError from .generic_conversion_utils import ( get_generic_section_as_list, object_value_pair_to_dict, @@ -31,32 +30,35 @@ def get_specimen_for_dataset( submission: Submission, dataset: bia_data_model.Dataset, result_summary: dict, + persister: Optional[PersistenceStrategy] = None, ) -> bia_data_model.Specimen: """Return bia_data_model.Specimen for a particular dataset""" - # According to https://app.clickup.com/t/8695fqxpy we want one specimen - # per dataset, so if more than one association we are concatenation - # the required information from each. associations = next( attr.value.get("associations", []) for attr in dataset.attribute if attr.name == "associations" ) + # According to https://app.clickup.com/t/8695fqxpy we want one specimen + # per dataset, so if more than one association we are concatenation + # the required information from each. specimen_titles = set([association["specimen"] for association in associations]) - biosamples = biosample_conversion.get_biosample(submission, result_summary) - # Put UUIDs from assoication in set to prevent duplication - biosample_uuids = set() - for association in associations: - biosample_uuids.add( - *[b.uuid for b in biosamples if b.title_id == association["biosample"]] + # TODO: This function has redundancy - will be re-written: https://app.clickup.com/t/8696nan92 + biosamples_by_study_component = ( + biosample_conversion.get_biosample_by_study_component( + submission, result_summary, persister ) + ) + biosamples = biosamples_by_study_component.get(dataset.title_id, []) + # Put UUIDs of biosamples for study component in set to prevent duplication + biosample_uuids = set([biosample.uuid for biosample in biosamples]) biosample_list = list(biosample_uuids) biosample_list.sort() imaging_preparation_protocols = ( sipp_conversion.get_specimen_imaging_preparation_protocol( - submission, result_summary + submission, result_summary, persister ) ) imaging_preparation_protocol_list = [ @@ -75,16 +77,24 @@ def get_specimen_for_dataset( model_dict = filter_model_dictionary(model_dict, bia_data_model.Specimen) - try: - specimen = bia_data_model.Specimen.model_validate(model_dict) - except ValidationError: - log_failed_model_creation( - bia_data_model.Specimen, - result_summary[submission.accno], - ) - specimen = None + specimen = dicts_to_api_models( + [ + model_dict, + ], + bia_data_model.Specimen, + result_summary[submission.accno], + ) + + log_model_creation_count( + bia_data_model.Specimen, len(specimen), result_summary[submission.accno] + ) - return specimen + if specimen: + if persister: + persister.persist(specimen) + return specimen[0] + else: + return None # TODO: Discuss with @FS if we still need this function ( see clickup diff --git a/bia-ingest/bia_ingest/ingest/specimen_growth_protocol.py b/bia-ingest/bia_ingest/ingest/specimen_growth_protocol.py index d2bba25e..a7981809 100644 --- a/bia-ingest/bia_ingest/ingest/specimen_growth_protocol.py +++ b/bia-ingest/bia_ingest/ingest/specimen_growth_protocol.py @@ -51,6 +51,7 @@ def get_specimen_growth_protocol( def extract_specimen_growth_protocol_dicts( submission: Submission, + filter_dict: bool = True, ) -> List[Dict[str, Any]]: specimen_sections = find_sections_recursive(submission.section, ["Specimen"], []) @@ -72,7 +73,13 @@ def extract_specimen_growth_protocol_dicts( model_dict["accession_id"] = submission.accno model_dict["uuid"] = generate_specimen_growth_protocol_uuid(model_dict) model_dict["version"] = 0 - model_dict = filter_model_dictionary(model_dict, bia_data_model.Protocol) + + # Allow return of either filtered or unfiltered dict. + # The unfiltered dict is useful to get more information about + # the specimen growth protocol. E.g. the title_id is used in + # identifying parent specimen in biostudies pagetab + if filter_dict: + model_dict = filter_model_dictionary(model_dict, bia_data_model.Protocol) model_dicts.append(model_dict) diff --git a/bia-ingest/test/mock_objects/mock_association.py b/bia-ingest/test/mock_objects/mock_association.py index daf072c8..4f6bb630 100644 --- a/bia-ingest/test/mock_objects/mock_association.py +++ b/bia-ingest/test/mock_objects/mock_association.py @@ -3,7 +3,7 @@ from typing import List, Dict -def get_association_dicts() -> List[List[Dict]]: +def get_association_dicts() -> Dict[str, List[Dict]]: """Return list of List[dict]s for study component associaions Created independently to prevent recursion when computing @@ -11,9 +11,9 @@ def get_association_dicts() -> List[List[Dict]]: are generated looking at associations in dataset """ - return [ + return { # Associations for study component (dataset) 1 - [ + "Study Component 1": [ { "image_analysis": "Test image analysis", "image_correlation": None, @@ -30,7 +30,7 @@ def get_association_dicts() -> List[List[Dict]]: }, ], # Associations for study component (dataset) 2 - [ + "Study Component 2": [ { "image_analysis": "Test image analysis", "image_correlation": None, @@ -39,4 +39,4 @@ def get_association_dicts() -> List[List[Dict]]: "specimen": "Test specimen 2", }, ], - ] + } diff --git a/bia-ingest/test/mock_objects/mock_biosample.py b/bia-ingest/test/mock_objects/mock_biosample.py index cce8b7f6..e44e168d 100644 --- a/bia-ingest/test/mock_objects/mock_biosample.py +++ b/bia-ingest/test/mock_objects/mock_biosample.py @@ -1,21 +1,12 @@ -from typing import List +from typing import Dict, List +import copy +from .mock_specimen_growth_protocol import get_specimen_growth_protocol from bia_shared_datamodels import bia_data_model, semantic_models from bia_ingest.bia_object_creation_utils import dict_to_uuid from .utils import accession_id -def get_biosample() -> List[bia_data_model.BioSample]: - # For UUID - attributes_to_consider = [ - "accession_id", - "accno", - "title_id", - "organism_classification", - "biological_entity_description", - "intrinsic_variable_description", - "extrinsic_variable_description", - "experimental_variable_description", - ] +def get_biosample_dicts() -> List[dict]: taxon1 = semantic_models.Taxon.model_validate( { "common_name": "human", @@ -49,6 +40,7 @@ def get_biosample() -> List[bia_data_model.BioSample]: "Test intrinsic variable 1\nwith escaped character", ], "version": 0, + "growth_protocol_uuid": None, }, { "accno": "Biosample-2", @@ -68,13 +60,70 @@ def get_biosample() -> List[bia_data_model.BioSample]: "Test intrinsic variable 2", ], "version": 0, + "growth_protocol_uuid": None, }, ] + return biosample_info + + +def create_biosample(biosample_dict) -> bia_data_model.BioSample: + # For UUID + attributes_to_consider_for_uuid = [ + "accession_id", + "accno", + "title_id", + "organism_classification", + "biological_entity_description", + "intrinsic_variable_description", + "extrinsic_variable_description", + "experimental_variable_description", + "growth_protocol_uuid", + ] + biosample_dict2 = copy.deepcopy(biosample_dict) + biosample_dict2["uuid"] = dict_to_uuid( + biosample_dict, attributes_to_consider_for_uuid + ) + biosample_dict2.pop("accno") + biosample_dict2.pop("accession_id") + return bia_data_model.BioSample.model_validate(biosample_dict2) + +def get_biosample() -> List[bia_data_model.BioSample]: + biosample_dicts = get_biosample_dicts() biosample = [] - for biosample_dict in biosample_info: - biosample_dict["uuid"] = dict_to_uuid(biosample_dict, attributes_to_consider) - biosample_dict.pop("accno") - biosample_dict.pop("accession_id") - biosample.append(bia_data_model.BioSample.model_validate(biosample_dict)) + for biosample_dict in biosample_dicts: + biosample.append(create_biosample(biosample_dict)) return biosample + + +def get_biosample_by_study_component() -> Dict[str, List[bia_data_model.BioSample]]: + biosample_dicts = get_biosample_dicts() + biosample_by_study_component = {} + growth_protocol_uuids = [gp.uuid for gp in get_specimen_growth_protocol()] + + # For study component 1, association 1 + study_component_name = "Study Component 1" + biosample_dict = biosample_dicts[0] + biosample_dict["growth_protocol_uuid"] = growth_protocol_uuids[0] + biosample = create_biosample(biosample_dict) + biosample_by_study_component[study_component_name] = [ + biosample, + ] + + # For study component 1, association 2 + study_component_name = "Study Component 1" + biosample_dict = biosample_dicts[1] + biosample_dict["growth_protocol_uuid"] = growth_protocol_uuids[0] + biosample = create_biosample(biosample_dict) + biosample_by_study_component[study_component_name].append(biosample) + + # For study component 2, association 1 + study_component_name = "Study Component 2" + biosample_dict = biosample_dicts[1] + biosample_dict["growth_protocol_uuid"] = growth_protocol_uuids[1] + biosample = create_biosample(biosample_dict) + biosample_by_study_component[study_component_name] = [ + biosample, + ] + + return biosample_by_study_component diff --git a/bia-ingest/test/mock_objects/mock_dataset.py b/bia-ingest/test/mock_objects/mock_dataset.py index 914d41f9..592b0f14 100644 --- a/bia-ingest/test/mock_objects/mock_dataset.py +++ b/bia-ingest/test/mock_objects/mock_dataset.py @@ -17,7 +17,7 @@ def get_dataset() -> List[bia_data_model.Dataset]: "accession_id", ], ) - associations = get_association_dicts() + associations = list(get_association_dicts().values()) specimens = get_test_specimen_for_image() image_acquisition_protocol_uuids = [ str(iap.uuid) for iap in get_image_acquisition_protocol() diff --git a/bia-ingest/test/mock_objects/mock_specimen.py b/bia-ingest/test/mock_objects/mock_specimen.py index 00176df8..ae12eb5b 100644 --- a/bia-ingest/test/mock_objects/mock_specimen.py +++ b/bia-ingest/test/mock_objects/mock_specimen.py @@ -7,7 +7,7 @@ from .mock_specimen_imaging_preparation_protocol import ( get_specimen_imaging_preparation_protocol, ) -from .mock_biosample import get_biosample +from .mock_biosample import get_biosample, get_biosample_by_study_component # This function is written to provide test data for bia_data_model.Image. It currently @@ -23,21 +23,21 @@ def get_test_specimen_for_image() -> List[bia_data_model.Specimen]: imaging_preparation_protocols = { ipp.title_id: ipp.uuid for ipp in get_specimen_imaging_preparation_protocol() } - biosamples = {biosample.title_id: biosample.uuid for biosample in get_biosample()} + biosamples_by_study_component = get_biosample_by_study_component() # Specimens correspond to associations in Experimental Imaging Dataset dataset_associations = get_association_dicts() specimens = [] - for associations in dataset_associations: - biosample_titles = [a["biosample"] for a in associations] + for study_component, associations in dataset_associations.items(): + biosamples = biosamples_by_study_component.get(study_component, []) + biosample_uuids = [biosample.uuid for biosample in biosamples] + biosample_uuids.sort() specimen_title = associations[0]["specimen"] specimen_dict = { "imaging_preparation_protocol_uuid": [ imaging_preparation_protocols[specimen_title] ], - "sample_of_uuid": [ - biosamples[biosample_title] for biosample_title in biosample_titles - ], + "sample_of_uuid": biosample_uuids, "accession_id": accession_id, } specimen_dict["uuid"] = dict_to_uuid(specimen_dict, attributes_to_consider) diff --git a/bia-ingest/test/mock_objects/mock_specimen_growth_protocol.py b/bia-ingest/test/mock_objects/mock_specimen_growth_protocol.py index 67edc98a..b1c9d8ed 100644 --- a/bia-ingest/test/mock_objects/mock_specimen_growth_protocol.py +++ b/bia-ingest/test/mock_objects/mock_specimen_growth_protocol.py @@ -34,6 +34,5 @@ def get_specimen_growth_protocol() -> List[bia_data_model.Protocol]: protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider) protocol_dict.pop("accno") protocol_dict.pop("accession_id") - protocol_dict.pop("title_id") protocol.append(bia_data_model.Protocol.model_validate(protocol_dict)) return protocol diff --git a/bia-ingest/test/test_bia_ingest_cli.py b/bia-ingest/test/test_bia_ingest_cli.py index d7f19efc..50d1fa6e 100644 --- a/bia-ingest/test/test_bia_ingest_cli.py +++ b/bia-ingest/test/test_bia_ingest_cli.py @@ -1,3 +1,4 @@ +from test.mock_objects import mock_specimen_growth_protocol from typer.testing import CliRunner from bia_ingest import cli from bia_ingest.ingest.generic_conversion_utils import settings @@ -5,7 +6,6 @@ from bia_shared_datamodels import bia_data_model import pytest from .mock_objects import ( - utils, mock_study, mock_biosample, mock_dataset, @@ -18,20 +18,17 @@ runner = CliRunner() -accession_id = utils.accession_id - @pytest.fixture def expected_objects(): expected_objects_dict = { "study": mock_study.get_study(), "dataset": mock_dataset.get_dataset(), - "specimen": mock_specimen.get_specimen(), - "bio_sample": mock_biosample.get_biosample(), "image_acquisition_protocol": mock_image_acquisition_protocol.get_image_acquisition_protocol(), + "specimen": mock_specimen.get_test_specimen_for_image(), "specimen_imaging_preparation_protocol": mock_specimen_imaging_preparation_protocol.get_specimen_imaging_preparation_protocol(), "annotation_method": mock_annotation_method.get_annotation_method(), - # "protocol": mock_specimen_growth_protocol.get_specimen_growth_protocol(), + "protocol": mock_specimen_growth_protocol.get_specimen_growth_protocol(), } # File references are a special case as they depend on experimental dataset @@ -41,6 +38,13 @@ def expected_objects(): ) expected_objects_dict["file_reference"] = expected_file_references + # Biosamples are also a special case as each study component association can + # potentially create it's own biosample. + expected_biosamples = [] + for biosample_list in mock_biosample.get_biosample_by_study_component().values(): + expected_biosamples.extend(biosample_list) + expected_objects_dict["bio_sample"] = expected_biosamples + n_expected_objects = 0 for expected_objects in expected_objects_dict.values(): if isinstance(expected_objects, list): @@ -76,7 +80,7 @@ def _load_submission_table_info(accession_id: str): cli.app, [ "ingest", - accession_id, + test_submission.accno, "--persistence-mode", "disk", "--process-filelist", @@ -90,7 +94,7 @@ def _load_submission_table_info(accession_id: str): assert len(files_written) == n_expected_objects for dir_name, expected_objects in expected_objects_dict.items(): - dir_path = tmp_path / dir_name / accession_id + dir_path = tmp_path / dir_name / test_submission.accno if not isinstance(expected_objects, list): expected_objects = [ diff --git a/bia-ingest/test/test_shared_models.py b/bia-ingest/test/test_shared_models.py index f023a78c..d5fa0c02 100644 --- a/bia-ingest/test/test_shared_models.py +++ b/bia-ingest/test/test_shared_models.py @@ -43,6 +43,10 @@ mock_study.get_study, study.get_study, ), + ( + mock_biosample.get_biosample_by_study_component, + biosample.get_biosample_by_study_component, + ), ( mock_biosample.get_biosample, biosample.get_biosample,