diff --git a/bia-export/bia_export/cli.py b/bia-export/bia_export/cli.py index 564c9010..eaabb5c9 100644 --- a/bia-export/bia_export/cli.py +++ b/bia-export/bia_export/cli.py @@ -3,7 +3,9 @@ from rich.logging import RichHandler from typing_extensions import Annotated from pathlib import Path -from .website_conversion import create_study +from .website_conversion import create_studies +from typing import List +import json logging.basicConfig( level="NOTSET", format="%(message)s", datefmt="[%X]", handlers=[RichHandler()] @@ -15,17 +17,17 @@ @app.command() def website_study( - accession_id: Annotated[str, typer.Argument(help="Accession ID of the study to export")], + accession_id_list: Annotated[List[str], typer.Argument(help="Accession IDs of the studies to export")], root_directory: Annotated[Path, typer.Option("--root", "-r", help="If root directory specified then use files there, rather than calling API")] = None, output_filename: Annotated[Path, typer.Option("--out_file", "-o",)] = Path("bia-images-export.json") ): abs_root = root_directory.resolve() - study = create_study(accession_id, abs_root) + studies_map = create_studies(accession_id_list, abs_root) logging.info(f"Writing study info to {output_filename.absolute()}") with open(output_filename, "w") as output: - output.write(study.model_dump_json(indent=4)) + output.write(json.dumps(studies_map, indent=4)) @app.command() diff --git a/bia-export/bia_export/website_conversion.py b/bia-export/bia_export/website_conversion.py index 2c361981..dee6ae53 100644 --- a/bia-export/bia_export/website_conversion.py +++ b/bia-export/bia_export/website_conversion.py @@ -56,6 +56,14 @@ def find_associated_objects( return linked_object +def create_studies(accession_id_list: str, root_directory: Path) -> dict: + study_map = {} + for accession_id in accession_id_list: + study = create_study(accession_id, root_directory) + study_map[accession_id] = study.model_dump(mode='json') + return study_map + + def create_study(accession_id: str, root_directory: Path) -> Study: if root_directory: @@ -171,7 +179,7 @@ def process_details_section( eid_dict["specimen_growth_protocol"] = process_details_section( root_directory, accession_id, - detail_map[BioSample], + detail_map[SpecimenGrowthProtocol], association_by_type["specimen"], ) eid_dict["acquisition_process"] = process_details_section( diff --git a/bia-export/test/output_data/bia_export.json b/bia-export/test/output_data/bia_export.json index e25b2dde..24e39915 100644 --- a/bia-export/test/output_data/bia_export.json +++ b/bia-export/test/output_data/bia_export.json @@ -1,254 +1,256 @@ { - "uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71", - "version": 1, - "model": { - "type_name": "Study", - "version": 1 - }, - "accession_id": "S-BIADTEST", - "licence": "CC0", - "author": [ - { - "rorid": null, - "address": null, - "website": null, - "orcid": "0000-0000-0000-0000", - "display_name": "Test Author1", - "affiliation": [ - { - "rorid": null, - "address": null, - "website": null, - "display_name": "Test College 1" - } - ], - "contact_email": "test_author1@ebi.ac.uk", - "role": "corresponding author" + "S-BIADTEST": { + "uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71", + "version": 1, + "model": { + "type_name": "Study", + "version": 1 }, - { - "rorid": null, - "address": null, - "website": null, - "orcid": "1111-1111-1111-1111", - "display_name": "Test Author2", - "affiliation": [ - { - "rorid": null, - "address": null, - "website": null, - "display_name": "Test College 2" - } - ], - "contact_email": "test_author2@ebi.ac.uk", - "role": "first author" - } - ], - "title": "A test submission with title greater than 25 characters", - "release_date": "2024-02-13", - "description": "A test submission to allow testing without retrieving from bia server", - "keyword": [ - "Test keyword1", - "Test keyword2", - "Test keyword3" - ], - "acknowledgement": "We thank you", - "see_also": [], - "related_publication": [], - "grant": [ - { - "id": "TESTFUNDS1", - "funder": [ - { - "display_name": "Test funding body1", - "id": null - } - ] - }, - { - "id": "TESTFUNDS2", - "funder": [ - { - "display_name": "Test funding body2", - "id": null - } - ] - } - ], - "funding_statement": "This work was funded by the EBI", - "attribute": {}, - "experimental_imaging_component": [ - { - "title_id": "Study Component 1", - "uuid": "47a4ab60-c76d-4424-bfaa-c2a024de720c", - "version": 1, - "model": { - "type_name": "ExperimentalImagingDataset", - "version": 1 + "accession_id": "S-BIADTEST", + "licence": "CC0", + "author": [ + { + "rorid": null, + "address": null, + "website": null, + "orcid": "0000-0000-0000-0000", + "display_name": "Test Author1", + "affiliation": [ + { + "rorid": null, + "address": null, + "website": null, + "display_name": "Test College 1" + } + ], + "contact_email": "test_author1@ebi.ac.uk", + "role": "corresponding author" }, - "description": "Description of study component 1", - "attribute": { - "associations": [ + { + "rorid": null, + "address": null, + "website": null, + "orcid": "1111-1111-1111-1111", + "display_name": "Test Author2", + "affiliation": [ { - "biosample": "Test Biosample 1", - "image_acquisition": "Test Primary Screen Image Acquisition", - "specimen": "Test specimen 1" - }, + "rorid": null, + "address": null, + "website": null, + "display_name": "Test College 2" + } + ], + "contact_email": "test_author2@ebi.ac.uk", + "role": "first author" + } + ], + "title": "A test submission with title greater than 25 characters", + "release_date": "2024-02-13", + "description": "A test submission to allow testing without retrieving from bia server", + "keyword": [ + "Test keyword1", + "Test keyword2", + "Test keyword3" + ], + "acknowledgement": "We thank you", + "see_also": [], + "related_publication": [], + "grant": [ + { + "id": "TESTFUNDS1", + "funder": [ { - "biosample": "Test Biosample 2", - "image_acquisition": "Test Primary Screen Image Acquisition", - "specimen": "Test specimen 1" + "display_name": "Test funding body1", + "id": null } ] }, - "analysis_method": [ - { - "protocol_description": "Test image analysis", - "features_analysed": "Test image analysis overview" - } - ], - "correlation_method": [], - "example_image_uri": [], - "submitted_in_study_uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71", - "acquisition_process": [ - { - "default_open": true, - "title_id": "Test Primary Screen Image Acquisition", - "uuid": "c2e44a1b-a43c-476e-8ddf-8587f4c955b3", - "version": 1, - "model": { - "type_name": "ImageAcquisition", - "version": 1 - }, - "protocol_description": "Test image acquisition parameters 1", - "imaging_instrument_description": "Test imaging instrument 1", - "fbbi_id": [], - "imaging_method_name": [ - "confocal microscopy" - ] - } - ], - "specimen_imaging_preparation_protocol": [ - { - "default_open": true, - "title_id": "Test specimen 1", - "uuid": "7199d730-29f1-4ad8-b599-e9089cbb2d7b", - "version": 1, - "model": { - "type_name": "SpecimenImagingPreparationProtocol", - "version": 1 - }, - "protocol_description": "Test sample preparation protocol 1", - "signal_channel_information": [] - } - ], - "biological_entity": [ - { - "default_open": true, - "title_id": "Test Biosample 1", - "uuid": "64a67727-4e7c-469a-91c4-6219ae072e99", - "version": 1, - "model": { - "type_name": "BioSample", - "version": 1 - }, - "organism_classification": [ + { + "id": "TESTFUNDS2", + "funder": [ + { + "display_name": "Test funding body2", + "id": null + } + ] + } + ], + "funding_statement": "This work was funded by the EBI", + "attribute": {}, + "experimental_imaging_component": [ + { + "title_id": "Study Component 1", + "uuid": "47a4ab60-c76d-4424-bfaa-c2a024de720c", + "version": 1, + "model": { + "type_name": "ExperimentalImagingDataset", + "version": 1 + }, + "description": "Description of study component 1", + "attribute": { + "associations": [ { - "common_name": "human", - "scientific_name": "Homo sapiens", - "ncbi_id": null + "biosample": "Test Biosample 1", + "image_acquisition": "Test Primary Screen Image Acquisition", + "specimen": "Test specimen 1" + }, + { + "biosample": "Test Biosample 2", + "image_acquisition": "Test Primary Screen Image Acquisition", + "specimen": "Test specimen 1" } - ], - "biological_entity_description": "Test biological entity 1", - "experimental_variable_description": [ - "Test experimental entity 1" - ], - "extrinsic_variable_description": [ - "Test extrinsic variable 1" - ], - "intrinsic_variable_description": [ - "Test intrinsic variable 1\\nwith escaped character" ] }, - { - "default_open": true, - "title_id": "Test Biosample 2", - "uuid": "6950718c-4917-47a1-a807-11b874e80a23", - "version": 1, - "model": { - "type_name": "BioSample", - "version": 1 + "analysis_method": [ + { + "protocol_description": "Test image analysis", + "features_analysed": "Test image analysis overview" + } + ], + "correlation_method": [], + "example_image_uri": [], + "submitted_in_study_uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71", + "acquisition_process": [ + { + "default_open": true, + "title_id": "Test Primary Screen Image Acquisition", + "uuid": "c2e44a1b-a43c-476e-8ddf-8587f4c955b3", + "version": 1, + "model": { + "type_name": "ImageAcquisition", + "version": 1 + }, + "protocol_description": "Test image acquisition parameters 1", + "imaging_instrument_description": "Test imaging instrument 1", + "fbbi_id": [], + "imaging_method_name": [ + "confocal microscopy" + ] + } + ], + "specimen_imaging_preparation_protocol": [ + { + "default_open": true, + "title_id": "Test specimen 1", + "uuid": "7199d730-29f1-4ad8-b599-e9089cbb2d7b", + "version": 1, + "model": { + "type_name": "SpecimenImagingPreparationProtocol", + "version": 1 + }, + "protocol_description": "Test sample preparation protocol 1", + "signal_channel_information": [] + } + ], + "biological_entity": [ + { + "default_open": true, + "title_id": "Test Biosample 1", + "uuid": "64a67727-4e7c-469a-91c4-6219ae072e99", + "version": 1, + "model": { + "type_name": "BioSample", + "version": 1 + }, + "organism_classification": [ + { + "common_name": "human", + "scientific_name": "Homo sapiens", + "ncbi_id": null + } + ], + "biological_entity_description": "Test biological entity 1", + "experimental_variable_description": [ + "Test experimental entity 1" + ], + "extrinsic_variable_description": [ + "Test extrinsic variable 1" + ], + "intrinsic_variable_description": [ + "Test intrinsic variable 1\\nwith escaped character" + ] }, - "organism_classification": [ + { + "default_open": true, + "title_id": "Test Biosample 2", + "uuid": "6950718c-4917-47a1-a807-11b874e80a23", + "version": 1, + "model": { + "type_name": "BioSample", + "version": 1 + }, + "organism_classification": [ + { + "common_name": "mouse", + "scientific_name": "Mus musculus", + "ncbi_id": null + } + ], + "biological_entity_description": "Test biological entity 2", + "experimental_variable_description": [ + "Test experimental entity 2" + ], + "extrinsic_variable_description": [ + "Test extrinsic variable 2" + ], + "intrinsic_variable_description": [ + "Test intrinsic variable 2" + ] + } + ], + "specimen_growth_protocol": [] + }, + { + "title_id": "Study Component 2", + "uuid": "850a1ca3-9681-4a8a-b625-477936fcb954", + "version": 1, + "model": { + "type_name": "ExperimentalImagingDataset", + "version": 1 + }, + "description": "Description of study component 2", + "attribute": { + "associations": [ { - "common_name": "mouse", - "scientific_name": "Mus musculus", - "ncbi_id": null + "image_analysis": "Test image analysis", + "image_correlation": null, + "biosample": "Test Biosample 2 ", + "image_acquisition": "Test Primary Screen Image Acquisition", + "specimen": "Test specimen 2" } - ], - "biological_entity_description": "Test biological entity 2", - "experimental_variable_description": [ - "Test experimental entity 2" - ], - "extrinsic_variable_description": [ - "Test extrinsic variable 2" - ], - "intrinsic_variable_description": [ - "Test intrinsic variable 2" ] - } - ], - "specimen_growth_protocol": [] - }, - { - "title_id": "Study Component 2", - "uuid": "850a1ca3-9681-4a8a-b625-477936fcb954", - "version": 1, - "model": { - "type_name": "ExperimentalImagingDataset", - "version": 1 - }, - "description": "Description of study component 2", - "attribute": { - "associations": [ + }, + "analysis_method": [ { - "image_analysis": "Test image analysis", - "image_correlation": null, - "biosample": "Test Biosample 2 ", - "image_acquisition": "Test Primary Screen Image Acquisition", - "specimen": "Test specimen 2" + "protocol_description": "Test image analysis", + "features_analysed": "Test image analysis overview" } - ] - }, - "analysis_method": [ - { - "protocol_description": "Test image analysis", - "features_analysed": "Test image analysis overview" - } - ], - "correlation_method": [], - "example_image_uri": [], - "submitted_in_study_uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71", - "acquisition_process": [ - { - "default_open": false, - "title_id": "Test Primary Screen Image Acquisition", - "uuid": "c2e44a1b-a43c-476e-8ddf-8587f4c955b3", - "version": 1, - "model": { - "type_name": "ImageAcquisition", - "version": 1 - }, - "protocol_description": "Test image acquisition parameters 1", - "imaging_instrument_description": "Test imaging instrument 1", - "fbbi_id": [], - "imaging_method_name": [ - "confocal microscopy" - ] - } - ], - "specimen_imaging_preparation_protocol": [], - "biological_entity": [], - "specimen_growth_protocol": [] - } - ] + ], + "correlation_method": [], + "example_image_uri": [], + "submitted_in_study_uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71", + "acquisition_process": [ + { + "default_open": false, + "title_id": "Test Primary Screen Image Acquisition", + "uuid": "c2e44a1b-a43c-476e-8ddf-8587f4c955b3", + "version": 1, + "model": { + "type_name": "ImageAcquisition", + "version": 1 + }, + "protocol_description": "Test image acquisition parameters 1", + "imaging_instrument_description": "Test imaging instrument 1", + "fbbi_id": [], + "imaging_method_name": [ + "confocal microscopy" + ] + } + ], + "specimen_imaging_preparation_protocol": [], + "biological_entity": [], + "specimen_growth_protocol": [] + } + ] + } } \ No newline at end of file diff --git a/bia-ingest-shared-models/bia_ingest_sm/biostudies.py b/bia-ingest-shared-models/bia_ingest_sm/biostudies.py index 69db0041..66b3fc1a 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/biostudies.py +++ b/bia-ingest-shared-models/bia_ingest_sm/biostudies.py @@ -9,7 +9,7 @@ from pydantic import BaseModel, TypeAdapter -logger = logging.getLogger(__name__) +logger = logging.getLogger('__main__.'+__name__) STUDY_URL_TEMPLATE = "https://www.ebi.ac.uk/biostudies/api/v1/studies/{accession}" diff --git a/bia-ingest-shared-models/bia_ingest_sm/cli.py b/bia-ingest-shared-models/bia_ingest_sm/cli.py index 4e8fff4e..19d436ba 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/cli.py +++ b/bia-ingest-shared-models/bia_ingest_sm/cli.py @@ -1,5 +1,5 @@ import typer -from typing import Optional +from typing import List from typing_extensions import Annotated from bia_ingest_sm.biostudies import load_submission from bia_ingest_sm.conversion.study import get_study @@ -9,31 +9,63 @@ from bia_ingest_sm.conversion.file_reference import get_file_reference_by_dataset from bia_ingest_sm.conversion.specimen import get_specimen from bia_ingest_sm.conversion.image_acquisition import get_image_acquisition +import logging +from rich import print +from rich.logging import RichHandler +from .cli_logging import tabulate_errors, ObjectValidationResult app = typer.Typer() +logging.basicConfig( + level=logging.INFO, + format="%(message)s", + handlers=[RichHandler(show_time=False)] +) + +logger = logging.getLogger() + @app.command(help="Ingest from biostudies and echo json of bia_data_model.Study") -def ingest(accession_id: Annotated[str, typer.Argument()],) -> None: - submission = load_submission(accession_id) +def ingest(accession_id_list: Annotated[List[str], typer.Argument()], + verbose: Annotated[bool, typer.Option("-v")] = False) -> None: + + + if verbose: + logger.setLevel(logging.DEBUG) + + result_summary = {} + + for accession_id in accession_id_list: + print(f"[blue]-------- Starting ingest of {accession_id} --------[/blue]") + logger.debug(f"starting ingest of {accession_id}") + + result_summary[accession_id] = ObjectValidationResult() + + submission = load_submission(accession_id) + + study = get_study(submission, result_summary, persist_artefacts=True) - study = get_study(submission, persist_artefacts=True) + experimental_imaging_datasets = get_experimental_imaging_dataset( + submission, result_summary, persist_artefacts=True + ) - experimental_imaging_datasets = get_experimental_imaging_dataset( - submission, persist_artefacts=True - ) + file_references = get_file_reference_by_dataset( + submission, experimental_imaging_datasets, result_summary, persist_artefacts=True + ) - file_references = get_file_reference_by_dataset( - submission, experimental_imaging_datasets, persist_artefacts=True - ) + image_acquisitions = get_image_acquisition(submission, result_summary, persist_artefacts=True) - image_acquisitions = get_image_acquisition(submission, persist_artefacts=True) + # Specimen + # Biosample and Specimen artefacts are processed as part of bia_data_models.Specimen (note - this is very different from Biostudies.Specimen) + specimens = get_specimen(submission, result_summary, persist_artefacts=True) - # Specimen - # Biosample and Specimen artefacts are processed as part of bia_data_models.Specimen (note - this is very different from Biostudies.Specimen) - specimens = get_specimen(submission, persist_artefacts=True) + # typer.echo(study.model_dump_json(indent=2)) + + logger.debug(f"COMPLETED: Ingest of: {accession_id}") + print(f"[green]-------- Completed ingest of {accession_id} --------[/green]") + + print(tabulate_errors(result_summary)) - # typer.echo(study.model_dump_json(indent=2)) @app.callback() diff --git a/bia-ingest-shared-models/bia_ingest_sm/cli_logging.py b/bia-ingest-shared-models/bia_ingest_sm/cli_logging.py new file mode 100644 index 00000000..4076fb9e --- /dev/null +++ b/bia-ingest-shared-models/bia_ingest_sm/cli_logging.py @@ -0,0 +1,47 @@ +from rich.table import Table +from rich.text import Text +from pydantic import BaseModel, Field + +class ObjectValidationResult(BaseModel): + StudyValidation_ErrorCount: int = Field(default=0) + ExperimentalImagingDataseta_ValidationErrorCount: int = Field(default=0) + AnnotationDataseta_ValidationErrorCount: int = Field(default=0) + FileReferenceValidation_ErrorCount: int = Field(default=0) + BioSample_ValidationErrorCount: int = Field(default=0) + SpecimenGrowthProtocol_ValidationErrorCount: int = Field(default=0) + SpecimenImagingPreparationProtocol_ValidationErrorCount: int = Field(default=0) + Specimen_ValidationErrorCount: int = Field(default=0) + DerivedImage_ValidationErrorCount: int = Field(default=0) + AnnotationMethod_ValidationErrorCount: int = Field(default=0) + AnnotationDataset_ValidationErrorCount: int = Field(default=0) + AnnotationFile_ValidationErrorCount: int = Field(default=0) + ImageAnalysisMethod_ValidationErrorCount: int = Field(default=0) + ImageCorrelationMethod_ValidationErrorCount: int = Field(default=0) + RenderedView_ValidationErrorCount: int = Field(default=0) + Channel_ValidationErrorCount: int = Field(default=0) + Organism_ValidationErrorCount: int = Field(default=0) + ExternalLink_ValidationErrorCount: int = Field(default=0) + Contributor_ValidationErrorCount: int = Field(default=0) + Organisation_ValidationErrorCount: int = Field(default=0) + +def tabulate_errors(dict_of_results: dict[str, ObjectValidationResult]) -> Table: + table = Table("Accession ID", "Status", "Error: Count;") + for accession_id_key, validation_result in dict_of_results.items(): + error_message = "" + errors = validation_result.model_dump() + for field, value in errors.items(): + if value > 0: + error_message += f"{field}: {value}; " + + if error_message == "": + status = Text("Success") + status.stylize("green") + else: + status = Text("Failures") + status.stylize("red") + error_message = Text(error_message) + error_message.stylize("red") + + table.add_row(accession_id_key, status, error_message) + + return table \ No newline at end of file diff --git a/bia-ingest-shared-models/bia_ingest_sm/config.py b/bia-ingest-shared-models/bia_ingest_sm/config.py index d71f84b5..6914d20f 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/config.py +++ b/bia-ingest-shared-models/bia_ingest_sm/config.py @@ -4,7 +4,6 @@ from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict - default_output_base = ( f"{Path(os.environ.get('HOME', '')) / '.cache' / 'bia-integrator-data-sm'}" ) @@ -27,3 +26,4 @@ class Settings(BaseSettings): settings = Settings() + diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/annotation_method.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/annotation_method.py index 34485f12..dae1ca9e 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/annotation_method.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/annotation_method.py @@ -13,17 +13,16 @@ ) from bia_shared_datamodels import bia_data_model -logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('__main__.'+__name__) def get_annotation_method( - submission: Submission, persist_artefacts=False + submission: Submission, result_summary: dict, persist_artefacts=False ) -> List[bia_data_model.AnnotationMethod]: annotation_method_model_dicts = extract_annotation_method_dicts(submission) annotation_methods = dicts_to_api_models( - annotation_method_model_dicts, bia_data_model.AnnotationMethod + annotation_method_model_dicts, bia_data_model.AnnotationMethod, result_summary[submission.accno] ) if persist_artefacts and annotation_methods: @@ -64,6 +63,11 @@ def extract_annotation_method_dicts(submission: Submission) -> List[Dict[str, An model_dicts.append(model_dict) + + logger.info( + f"Ingesting: {submission.accno}. Created bia_data_model.AnnotationMethod. Count: {len(model_dicts)}" + ) + return model_dicts diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/biosample.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/biosample.py index 101ebb96..49167dd3 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/biosample.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/biosample.py @@ -13,16 +13,15 @@ ) from bia_shared_datamodels import bia_data_model, semantic_models -logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('__main__.'+__name__) def get_biosample( - submission: Submission, persist_artefacts=False + submission: Submission, result_summary: dict, persist_artefacts=False ) -> List[bia_data_model.BioSample]: biosample_model_dicts = extract_biosample_dicts(submission) - biosamples = dicts_to_api_models(biosample_model_dicts, bia_data_model.BioSample) + biosamples = dicts_to_api_models(biosample_model_dicts, bia_data_model.BioSample, result_summary[submission.accno]) if persist_artefacts and biosamples: persist(biosamples, "biosamples", submission.accno) @@ -79,6 +78,11 @@ def extract_biosample_dicts(submission: Submission) -> List[Dict[str, Any]]: model_dict = filter_model_dictionary(model_dict, bia_data_model.BioSample) model_dicts.append(model_dict) + + logger.info( + f"Ingesting: {submission.accno}. Created bia_data_model.BioSample. Count: {len(model_dicts)}" + ) + return model_dicts diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/experimental_imaging_dataset.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/experimental_imaging_dataset.py index 473009e2..93a6efad 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/experimental_imaging_dataset.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/experimental_imaging_dataset.py @@ -7,20 +7,22 @@ get_generic_section_as_dict, persist, filter_model_dictionary, + log_failed_model_creation ) import bia_ingest_sm.conversion.study as study_conversion from ..biostudies import ( Submission, attributes_to_dict, ) +from pydantic import ValidationError from bia_shared_datamodels import bia_data_model, semantic_models -logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) + +logger = logging.getLogger('__main__.'+__name__) def get_experimental_imaging_dataset( - submission: Submission, persist_artefacts=False + submission: Submission, result_summary: dict, persist_artefacts=False ) -> List[bia_data_model.ExperimentalImagingDataset]: """ Map biostudies.Submission study components to bia_data_model.ExperimentalImagingDataset @@ -32,7 +34,7 @@ def get_experimental_imaging_dataset( ], [], ) - analysis_method_dict = get_image_analysis_method(submission) + analysis_method_dict = get_image_analysis_method(submission, result_summary) experimental_imaging_dataset = [] for section in study_components: @@ -105,9 +107,18 @@ def get_experimental_imaging_dataset( model_dict, bia_data_model.ExperimentalImagingDataset ) - experimental_imaging_dataset.append( + + try: + experimental_imaging_dataset.append( bia_data_model.ExperimentalImagingDataset.model_validate(model_dict) - ) + ) + except(ValidationError): + log_failed_model_creation(bia_data_model.ExperimentalImagingDataset, result_summary) + + + logger.info( + f"Ingesting: {submission.accno}. Created bia_data_model.ExperimentalImagingDataset. Count: {len(experimental_imaging_dataset)}" + ) if persist_artefacts and experimental_imaging_dataset: persist( @@ -121,6 +132,7 @@ def get_experimental_imaging_dataset( def get_image_analysis_method( submission: Submission, + result_summary: dict ) -> Dict[str, semantic_models.ImageAnalysisMethod]: key_mapping = [ ( @@ -142,6 +154,7 @@ def get_image_analysis_method( ], key_mapping, semantic_models.ImageAnalysisMethod, + result_summary[submission.accno], ) diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/file_reference.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/file_reference.py index d1f9421c..e962bec9 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/file_reference.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/file_reference.py @@ -1,5 +1,6 @@ import logging from pathlib import Path +from pydantic import ValidationError from typing import List, Dict from .utils import ( dict_to_uuid, @@ -9,7 +10,6 @@ from ..biostudies import ( Submission, attributes_to_dict, - find_file_lists_in_submission, flist_from_flist_fname, file_uri, ) @@ -17,8 +17,7 @@ from ..config import settings from bia_shared_datamodels import bia_data_model -logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('__main__.'+__name__) def get_file_reference_by_dataset( @@ -27,6 +26,7 @@ def get_file_reference_by_dataset( bia_data_model.ExperimentalImagingDataset | bia_data_model.ImageAnnotationDataset ], + result_summary: dict, persist_artefacts: bool = False, ) -> Dict[str, List[bia_data_model.FileReference]]: """ @@ -75,14 +75,14 @@ def get_file_reference_by_dataset( files_in_fl = flist_from_flist_fname(submission.accno, fname) file_references = get_file_reference_for_submission_dataset( - submission.accno, dataset, files_in_fl + submission.accno, dataset, files_in_fl, result_summary ) if persist_artefacts: for file_reference in file_references: output_path = output_dir / f"{file_reference.uuid}.json" output_path.write_text(file_reference.model_dump_json(indent=2)) - logger.info(f"Written {output_path}") + logger.debug(f"Written {output_path}") fileref_to_datasets[dataset_name].extend(file_references) @@ -95,6 +95,7 @@ def get_file_reference_for_submission_dataset( bia_data_model.ExperimentalImagingDataset | bia_data_model.ImageAnnotationDataset, files_in_file_list: List[biostudies.File], + result_summary: dict ) -> List[bia_data_model.FileReference]: """ Return list of file references for particular submission dataset @@ -117,7 +118,13 @@ def get_file_reference_for_submission_dataset( file_dict["attribute"] = attributes_to_dict(f.attributes) file_dict["version"] = 1 file_dict = filter_model_dictionary(file_dict, bia_data_model.FileReference) - file_reference = bia_data_model.FileReference.model_validate(file_dict) - file_references.append(file_reference) + + try: + file_reference = bia_data_model.FileReference.model_validate(file_dict) + file_references.append(file_reference) + except(ValidationError): + logger.warn(f"Failed to create FileReference") + logger.debug("Pydantic Validation Error:", exc_info=True) + result_summary[accession_id].FileReference_ValidationErrorCount += 1 return file_references diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/image_acquisition.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/image_acquisition.py index 48916ada..c1f089db 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/image_acquisition.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/image_acquisition.py @@ -13,17 +13,16 @@ ) from bia_shared_datamodels import bia_data_model -logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('__main__.'+__name__) def get_image_acquisition( - submission: Submission, persist_artefacts=False + submission: Submission, result_summary: dict, persist_artefacts=False ) -> List[bia_data_model.ImageAcquisition]: image_acquisition_model_dicts = extract_image_acquisition_dicts(submission) image_acquisitions = dicts_to_api_models( - image_acquisition_model_dicts, bia_data_model.ImageAcquisition + image_acquisition_model_dicts, bia_data_model.ImageAcquisition, result_summary[submission.accno] ) if persist_artefacts and image_acquisitions: @@ -65,6 +64,9 @@ def extract_image_acquisition_dicts(submission: Submission) -> List[Dict[str, An ) model_dicts.append(model_dict) + logger.info( + f"Ingesting: {submission.accno}. Created bia_data_model.ImageAcquisition. Count: {len(model_dicts)}" + ) return model_dicts diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen.py index 01d56a99..48b5aff5 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen.py @@ -5,7 +5,6 @@ from .utils import ( dicts_to_api_models, - find_sections_recursive, dict_to_uuid, persist, filter_model_dictionary, @@ -14,7 +13,6 @@ ) from ..biostudies import ( Submission, - attributes_to_dict, ) from . import ( biosample as biosample_conversion, @@ -22,12 +20,11 @@ specimen_growth_protocol as sgp_conversion, ) -logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('__main__.'+__name__) def get_specimen( - submission: Submission, persist_artefacts: bool = False + submission: Submission, result_summary: dict, persist_artefacts: bool = False ) -> List[bia_data_model.Specimen]: """Create and persist bia_data_model.Specimen and models it depends on @@ -36,14 +33,14 @@ def get_specimen( (specimen) GrowthProtocol. """ - logger.info( + logger.debug( f"Starting creation of bia_shared_models.Specimen models for submission: {submission.accno}" ) # ToDo - when API in operation do we attempt to retreive from # API first before creating biosample, specimen_growth_protocol and # specimen_preparation_protocol? # Biosamples - biosamples = biosample_conversion.get_biosample(submission, persist_artefacts) + biosamples = biosample_conversion.get_biosample(submission, result_summary, persist_artefacts) # Index biosamples by title_id. Makes linking with associations more # straight forward. @@ -55,7 +52,7 @@ def get_specimen( # ImagingPreparationProtocol imaging_preparation_protocols = sipp_conversion.get_specimen_imaging_preparation_protocol( - submission, persist_artefacts + submission, result_summary, persist_artefacts ) imaging_preparation_protocol_uuids = object_value_pair_to_dict( imaging_preparation_protocols, key_attr="title_id", value_attr="uuid" @@ -63,7 +60,7 @@ def get_specimen( # GrowthProtocol growth_protocols = sgp_conversion.get_specimen_growth_protocol( - submission, persist_artefacts + submission, result_summary, persist_artefacts ) growth_protocol_uuids = object_value_pair_to_dict( growth_protocols, key_attr="title_id", value_attr="uuid" @@ -107,7 +104,8 @@ def get_specimen( model_dict = filter_model_dictionary(model_dict, bia_data_model.Specimen) model_dicts.append(model_dict) - specimens = dicts_to_api_models(model_dicts, bia_data_model.Specimen) + + specimens = dicts_to_api_models(model_dicts, bia_data_model.Specimen, result_summary[submission.accno]) if persist_artefacts and specimens: persist(specimens, "specimens", submission.accno) @@ -115,9 +113,9 @@ def get_specimen( # ToDo: How should we deal with situation where specimens for a # submission are exactly the same? E.g. see associations of S-BIAD1287 logger.info( - f"Finished the creation of bia_shared_models.Specimen models for submission: {submission.accno}. {len(model_dicts)} models created." + f"Ingesting: {submission.accno}. Created bia_data_model.Specimen. Count: {len(model_dicts)}" ) - return dicts_to_api_models(model_dicts, bia_data_model.Specimen) + return specimens def generate_specimen_uuid(specimen_dict: Dict[str, Any]) -> str: diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_growth_protocol.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_growth_protocol.py index 3ba54aee..1b8f6e84 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_growth_protocol.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_growth_protocol.py @@ -13,18 +13,17 @@ ) from bia_shared_datamodels import bia_data_model -logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('__main__.'+__name__) def get_specimen_growth_protocol( - submission: Submission, persist_artefacts=False + submission: Submission, result_summary: dict, persist_artefacts=False ) -> List[bia_data_model.SpecimenGrowthProtocol]: specimen_growth_protocol_model_dicts = extract_specimen_growth_protocol_dicts( submission ) specimen_growth_protocols = dicts_to_api_models( - specimen_growth_protocol_model_dicts, bia_data_model.SpecimenGrowthProtocol + specimen_growth_protocol_model_dicts, bia_data_model.SpecimenGrowthProtocol, result_summary[submission.accno] ) if persist_artefacts and specimen_growth_protocols: @@ -61,6 +60,10 @@ def extract_specimen_growth_protocol_dicts( model_dicts.append(model_dict) + logger.info( + f"Ingesting: {submission.accno}. Created bia_data_model.SpecimenGrowthProtocol. Count: {len(model_dicts)}" + ) + return model_dicts diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_imaging_preparation_protocol.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_imaging_preparation_protocol.py index de8d077c..5de05b2b 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_imaging_preparation_protocol.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_imaging_preparation_protocol.py @@ -13,12 +13,11 @@ ) from bia_shared_datamodels import bia_data_model -logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('__main__.'+__name__) def get_specimen_imaging_preparation_protocol( - submission: Submission, persist_artefacts=False + submission: Submission, result_summary: dict, persist_artefacts=False ) -> List[bia_data_model.SpecimenImagingPreparationProtocol]: specimen_preparation_protocol_model_dicts = ( extract_specimen_preparation_protocol_dicts(submission) @@ -26,6 +25,7 @@ def get_specimen_imaging_preparation_protocol( specimen_preparation_protocols = dicts_to_api_models( specimen_preparation_protocol_model_dicts, bia_data_model.SpecimenImagingPreparationProtocol, + result_summary[submission.accno], ) if persist_artefacts and specimen_preparation_protocols: @@ -66,6 +66,10 @@ def extract_specimen_preparation_protocol_dicts( ) model_dicts.append(model_dict) + + logger.info( + f"Ingesting: {submission.accno}. Created bia_data_model.SpecimenImagingPrepartionProtocol. Count: {len(model_dicts)}" + ) return model_dicts diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/study.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/study.py index b6b43f75..f6240c9a 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/study.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/study.py @@ -1,5 +1,6 @@ import logging from pathlib import Path +from pydantic import ValidationError import re from typing import List, Any, Dict from .utils import ( @@ -7,6 +8,7 @@ mattributes_to_dict, dict_to_uuid, find_sections_recursive, + log_failed_model_creation ) from ..biostudies import ( Submission, @@ -15,20 +17,19 @@ from ..config import settings from bia_shared_datamodels import bia_data_model, semantic_models -logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('__main__.'+__name__) def get_study( - submission: Submission, persist_artefacts: bool = False + submission: Submission, result_summary: dict, persist_artefacts: bool = False ) -> bia_data_model.Study: """ Return an API study model populated from the submission """ submission_attributes = attributes_to_dict(submission.attributes) - contributors = get_contributor(submission) - grants = get_grant(submission) + contributors = get_contributor(submission, result_summary) + grants = get_grant(submission, result_summary) study_attributes = attributes_to_dict(submission.section.attributes) @@ -67,7 +68,10 @@ def get_study( } # study_uuid = dict_to_uuid(study_dict, ["accession_id",]) # study_dict["uuid"] = study_uuid - study = bia_data_model.Study.model_validate(study_dict) + try: + study = bia_data_model.Study.model_validate(study_dict) + except(ValidationError): + log_failed_model_creation(bia_data_model.Study, result_summary) if persist_artefacts: output_dir = Path(settings.bia_data_dir) / "studies" @@ -109,7 +113,7 @@ def get_licence(study_attributes: Dict[str, Any]) -> semantic_models.LicenceType def get_external_reference( - submission: Submission, + submission: Submission, RESULT_SUMMARY: dict ) -> List[semantic_models.ExternalReference]: """ Map biostudies.Submission.Link to semantic_models.ExternalReference @@ -133,13 +137,13 @@ def get_external_reference( # TODO: Put comments and docstring -def get_grant(submission: Submission) -> List[semantic_models.Grant]: - funding_body_dict = get_funding_body(submission) +def get_grant(submission: Submission, RESULT_SUMMARY: dict) -> List[semantic_models.Grant]: + funding_body_dict = get_funding_body(submission, RESULT_SUMMARY) key_mapping = [ ("id", "grant_id", None), ] grant_dict = get_generic_section_as_dict( - submission, ["Funding",], key_mapping, semantic_models.Grant + submission, ["Funding",], key_mapping, semantic_models.Grant, RESULT_SUMMARY[submission.accno] ) grant_list = [] @@ -151,18 +155,18 @@ def get_grant(submission: Submission) -> List[semantic_models.Grant]: # TODO: Put comments and docstring -def get_funding_body(submission: Submission) -> semantic_models.FundingBody: +def get_funding_body(submission: Submission, result_summary: dict) -> semantic_models.FundingBody: key_mapping = [ ("display_name", "Agency", None,), ] funding_body = get_generic_section_as_dict( - submission, ["Funding",], key_mapping, semantic_models.FundingBody + submission, ["Funding",], key_mapping, semantic_models.FundingBody, result_summary[submission.accno] ) return funding_body -def get_affiliation(submission: Submission) -> Dict[str, semantic_models.Affiliation]: +def get_affiliation(submission: Submission, result_summary: dict) -> Dict[str, semantic_models.Affiliation]: """ Maps biostudies.Submission.Organisation sections to semantic_models.Affiliations """ @@ -185,14 +189,18 @@ def get_affiliation(submission: Submission) -> Dict[str, semantic_models.Affilia attr_dict = attributes_to_dict(section.attributes) model_dict = {k: attr_dict.get(v, default) for k, v, default in key_mapping} - affiliation_dict[section.accno] = semantic_models.Affiliation.model_validate( + try: + affiliation_dict[section.accno] = semantic_models.Affiliation.model_validate( model_dict - ) + ) + except(ValidationError): + log_failed_model_creation(semantic_models.Affiliation, result_summary) + return affiliation_dict -def get_publication(submission: Submission) -> List[semantic_models.Publication]: +def get_publication(submission: Submission, result_summary: dict) -> List[semantic_models.Publication]: publication_sections = find_sections_recursive( submission.section, ["publication",], [] ) @@ -208,16 +216,19 @@ def get_publication(submission: Submission) -> List[semantic_models.Publication] attr_dict = attributes_to_dict(section.attributes) model_dict = {k: attr_dict.get(v, default) for k, v, default in key_mapping} - publications.append(semantic_models.Publication.model_validate(model_dict)) + try: + publications.append(semantic_models.Publication.model_validate(model_dict)) + except(ValidationError): + log_failed_model_creation(semantic_models.Publication, result_summary) return publications -def get_contributor(submission: Submission) -> List[semantic_models.Contributor]: +def get_contributor(submission: Submission, result_summary: dict) -> List[semantic_models.Contributor]: """ Map authors in submission to semantic_model.Contributors """ - affiliation_dict = get_affiliation(submission) + affiliation_dict = get_affiliation(submission, result_summary) key_mapping = [ ("display_name", "Name", None), ("contact_email", "E-mail", "not@supplied.com"), @@ -238,6 +249,9 @@ def get_contributor(submission: Submission) -> List[semantic_models.Contributor] model_dict["affiliation"] = [ model_dict["affiliation"], ] - contributors.append(semantic_models.Contributor.model_validate(model_dict)) + try: + contributors.append(semantic_models.Contributor.model_validate(model_dict)) + except(ValidationError): + log_failed_model_creation(semantic_models.Contributor, result_summary) return contributors diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py index f6aa5a26..1610d9f8 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py @@ -3,7 +3,7 @@ import hashlib import uuid from typing import List, Any, Dict, Optional, Tuple, Type, Union -from pydantic import BaseModel +from pydantic import BaseModel, ValidationError from ..biostudies import ( Submission, attributes_to_dict, @@ -12,9 +12,16 @@ find_file_lists_in_submission, ) from ..config import settings +from ..cli_logging import ObjectValidationResult -logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('__main__.'+__name__) + + +def log_failed_model_creation(model_class, valdiation_error_tracking) -> None: + logger.error(f"Failed to create {model_class.__name__}") + logger.debug("Pydantic Validation Error:", exc_info=True) + field_name = f"{model_class.__name__}_ValidationErrorCount" + valdiation_error_tracking.__setattr__(field_name, valdiation_error_tracking.__getattribute__(field_name) + 1) # TODO: Put comments and docstring @@ -22,8 +29,9 @@ def get_generic_section_as_list( root: Submission | Section, section_name: List[str], key_mapping: List[Tuple[str, str, str | None | List]], - mapped_object: Optional[Any] = None, + mapped_object: Optional[BaseModel] = None, mapped_attrs_dict: Optional[Dict[str, Any]] = None, + valdiation_error_tracking: Optional[ObjectValidationResult] = None, ) -> List[Any | Dict[str, str | List[str]]]: """ Map biostudies.Submission objects to either semantic_models or bia_data_model equivalent @@ -43,7 +51,12 @@ def get_generic_section_as_list( if mapped_object is None: return_list.append(model_dict) else: - return_list.append(mapped_object.model_validate(model_dict)) + if not valdiation_error_tracking: + raise RuntimeError("If a mapped_object is provided, valdiation_error_tracking needs to also be provided.") + try: + return_list.append(mapped_object.model_validate(model_dict)) + except(ValidationError): + log_failed_model_creation(mapped_object, valdiation_error_tracking) return return_list @@ -52,7 +65,8 @@ def get_generic_section_as_dict( root: Submission | Section, section_name: List[str], key_mapping: List[Tuple[str, str, Union[str, None, List]]], - mapped_object: Optional[Any] = None, + mapped_object: Optional[BaseModel] = None, + valdiation_error_tracking: Optional[ObjectValidationResult] = None, ) -> Dict[str, Any | Dict[str, Dict[str, str | List[str]]]]: """ Map biostudies.Submission objects to dict containing either semantic_models or bia_data_model equivalent @@ -69,7 +83,12 @@ def get_generic_section_as_dict( if mapped_object is None: return_dict[section.accno] = model_dict else: - return_dict[section.accno] = mapped_object.model_validate(model_dict) + if not valdiation_error_tracking: + raise RuntimeError("If a mapped_object is provided, valdiation_error_tracking needs to also be provided.") + try: + return_dict[section.accno] = mapped_object.model_validate(model_dict) + except(ValidationError): + log_failed_model_creation(mapped_object, valdiation_error_tracking) return return_dict @@ -77,13 +96,15 @@ def get_generic_section_as_dict( # Hence the use of the pydantic BaseModel which all API models # are derived from in the type hinting def dicts_to_api_models( - dicts: List[Dict[str, Any]], api_model_class: Type[BaseModel] + dicts: List[Dict[str, Any]], api_model_class: Type[BaseModel], valdiation_error_tracking: ObjectValidationResult ) -> BaseModel: api_models = [] for model_dict in dicts: - api_models.append(api_model_class.model_validate(model_dict)) - + try: + api_models.append(api_model_class.model_validate(model_dict)) + except(ValidationError): + log_failed_model_creation(api_model_class, valdiation_error_tracking) return api_models @@ -143,15 +164,15 @@ def dict_to_uuid(my_dict: Dict[str, Any], attributes_to_consider: List[str]) -> return str(uuid.UUID(version=4, hex=hexdigest)) -def persist(object_list: List, object_path: str, sumbission_accno: str): +def persist(object_list: List[BaseModel], object_path: str, sumbission_accno: str): output_dir = Path(settings.bia_data_dir) / object_path / sumbission_accno if not output_dir.is_dir(): output_dir.mkdir(parents=True) - logger.info(f"Created {output_dir}") + logger.debug(f"Created {output_dir}") for object in object_list: output_path = output_dir / f"{object.uuid}.json" output_path.write_text(object.model_dump_json(indent=2)) - logger.info(f"Written {output_path}") + logger.debug(f"Written {output_path}") def filter_model_dictionary(dictionary: dict, target_model: Type[BaseModel]): diff --git a/bia-ingest-shared-models/test/conftest.py b/bia-ingest-shared-models/test/conftest.py index db2180d9..61209917 100644 --- a/bia-ingest-shared-models/test/conftest.py +++ b/bia-ingest-shared-models/test/conftest.py @@ -2,7 +2,8 @@ import json import pytest from bia_ingest_sm.biostudies import Submission - +from .utils import accession_id +from bia_ingest_sm.cli_logging import ObjectValidationResult @pytest.fixture def base_path() -> Path: @@ -18,3 +19,8 @@ def test_submission(base_path: Path) -> Submission: json_data = json.loads(submission_path.read_text()) submission = Submission.model_validate(json_data) return submission + + +@pytest.fixture +def result_summary(): + return {accession_id: ObjectValidationResult()} \ No newline at end of file diff --git a/bia-ingest-shared-models/test/test_file_reference.py b/bia-ingest-shared-models/test/test_file_reference.py index 5b0bc25e..52fb5a79 100644 --- a/bia-ingest-shared-models/test/test_file_reference.py +++ b/bia-ingest-shared-models/test/test_file_reference.py @@ -35,9 +35,9 @@ def mock_request_get(flist_url: str) -> Dict[str, str]: ] -def test_get_file_reference_for_submission_dataset(test_submission): - """Test creation of FileReferences for dataset with file list supplied - +def test_get_file_reference_for_submission_dataset(test_submission, result_summary): + """ + Test creation of FileReferences for dataset with file list supplied """ file_list_data = utils.get_test_file_list_data("file_list_study_component_2.json") files_in_filelist = [File.model_validate(f) for f in file_list_data] @@ -47,15 +47,17 @@ def test_get_file_reference_for_submission_dataset(test_submission): accession_id=test_submission.accno, submission_dataset=datasets_in_submission[0], files_in_file_list=files_in_filelist, + result_summary=result_summary ) assert created == expected -def test_create_file_reference_for_study_component(test_submission, caplog): +def test_create_file_reference_for_study_component(test_submission, caplog, result_summary): expected = {datasets_in_submission[0].title_id: utils.get_test_file_reference()} created = file_reference.get_file_reference_by_dataset( - test_submission, datasets_in_submission=datasets_in_submission + test_submission, datasets_in_submission=datasets_in_submission, result_summary=result_summary + ) assert created == expected @@ -64,7 +66,7 @@ def test_create_file_reference_for_study_component(test_submission, caplog): def test_create_file_reference_for_study_component_when_no_matching_sc_in_file_list( - test_submission, caplog + test_submission, caplog, result_summary ): """Test attempted creation of study FileReferences when study components in dataset do not match does in file_list @@ -73,7 +75,7 @@ def test_create_file_reference_for_study_component_when_no_matching_sc_in_file_l dataset = utils.get_test_experimental_imaging_dataset()[0] dataset.title_id = "Test name not in file list" created = file_reference.get_file_reference_by_dataset( - test_submission, datasets_in_submission=[dataset,] + test_submission, datasets_in_submission=[dataset,], result_summary=result_summary ) assert created is None diff --git a/bia-ingest-shared-models/test/test_shared_models.py b/bia-ingest-shared-models/test/test_shared_models.py index ba1f4800..433fb149 100644 --- a/bia-ingest-shared-models/test/test_shared_models.py +++ b/bia-ingest-shared-models/test/test_shared_models.py @@ -44,7 +44,7 @@ # (bia_data_model.Study, conversion.get_study_from_submission,), ), ) -def test_create_models(expected_model_func, model_creation_func, test_submission): +def test_create_models(expected_model_func, model_creation_func, test_submission, result_summary): expected = expected_model_func() - created = model_creation_func(test_submission) + created = model_creation_func(test_submission, result_summary) assert expected == created