Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

disallowed extra fields in models, and updated ingest and export code to handle these #127

Merged
merged 1 commit into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions bia-export/bia_export/website_models.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations
from pydantic import Field
from typing import List, Optional
from bia_shared_datamodels import bia_data_model, semantic_models
from bia_shared_datamodels import bia_data_model


class Study(semantic_models.Study, bia_data_model.DocumentMixin):
class Study(bia_data_model.Study):
experimental_imaging_component: Optional[List[ExperimentalImagingDataset]] = Field(default_factory=list, description="""A dataset of that is associated with the study.""")

class ExperimentalImagingDataset(semantic_models.ExperimentalImagingDataset, bia_data_model.DocumentMixin):
class ExperimentalImagingDataset(bia_data_model.ExperimentalImagingDataset):
pass
Original file line number Diff line number Diff line change
@@ -1,26 +1,14 @@
{
"title_id": "Study Component 1",
"uuid": "47a4ab60-c76d-4424-bfaa-c2a024de720c",
"file_reference_count": 4,
"description": "Description of study component 1",
"acquisition_process": [
"c2e44a1b-a43c-476e-8ddf-8587f4c955b3"
],
"specimen_imaging_preparation_protocol": [
"7199d730-29f1-4ad8-b599-e9089cbb2d7b"
],
"biological_entity": [
"64a67727-4e7c-469a-91c4-6219ae072e99",
"6950718c-4917-47a1-a807-11b874e80a23"
],
"specimen_growth_protocol": [],
"analysis_method": [
{
"protocol_description": "Test image analysis",
"features_analysed": "Test image analysis overview"
}
],
"submitted_in_study_uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71",
"correlation_method": [],
"example_image_uri": [],
"image_count": 0
"example_image_uri": []
}
1 change: 0 additions & 1 deletion bia-export/test/input_data/studies/S-BIADTEST.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,5 @@
}
],
"funding_statement": "This work was funded by the EBI",
"annotation_component": [],
"attribute": {}
}
4 changes: 3 additions & 1 deletion bia-export/test/output_data/bia_export.json
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
"attribute": {},
"experimental_imaging_component": [
{
"title_id": "Study Component 1",
"uuid": "47a4ab60-c76d-4424-bfaa-c2a024de720c",
"description": "Description of study component 1",
"analysis_method": [
Expand All @@ -82,7 +83,8 @@
}
],
"correlation_method": [],
"example_image_uri": []
"example_image_uri": [],
"submitted_in_study_uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
dicts_to_api_models,
find_sections_recursive,
dict_to_uuid,
persist
persist,
filter_model_dictionary
)
from ..biostudies import (
Submission,
Expand Down Expand Up @@ -52,6 +53,8 @@ def extract_annotation_method_dicts(submission: Submission) -> List[Dict[str, An
model_dict["accno"] = section.__dict__.get("accno", "")
model_dict["accession_id"] = submission.accno
model_dict["uuid"] = generate_annotation_method_uuid(model_dict)
model_dict = filter_model_dictionary(model_dict, bia_data_model.AnnotationMethod)

model_dicts.append(model_dict)

return model_dicts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
dicts_to_api_models,
find_sections_recursive,
dict_to_uuid,
persist
persist,
filter_model_dictionary
)
from ..biostudies import (
Submission,
Expand Down Expand Up @@ -74,6 +75,7 @@ def extract_biosample_dicts(submission: Submission) -> List[Dict[str, Any]]:

model_dict["accession_id"] = submission.accno
model_dict["uuid"] = generate_biosample_uuid(model_dict)
model_dict = filter_model_dictionary(model_dict, bia_data_model.BioSample)
model_dicts.append(model_dict)

return model_dicts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
get_generic_section_as_list,
dict_to_uuid,
get_generic_section_as_dict,
persist
persist,
filter_model_dictionary
)
from .file_reference import get_file_reference_by_study_component
import bia_ingest_sm.conversion.biosample as biosample_conversion
Expand Down Expand Up @@ -97,6 +98,9 @@ def get_experimental_imaging_dataset(
"example_image_uri": [],
}
model_dict["uuid"] = generate_experimental_imaging_dataset_uuid(model_dict)

model_dict = filter_model_dictionary(model_dict, bia_data_model.ExperimentalImagingDataset)

experimental_imaging_dataset.append(
bia_data_model.ExperimentalImagingDataset.model_validate(model_dict)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import List, Dict
from .utils import (
dict_to_uuid,
filter_model_dictionary,
)
from ..biostudies import (
Submission,
Expand Down Expand Up @@ -56,6 +57,7 @@ def get_file_reference_by_study_component(
file_dict["submission_dataset"] = fileref_uuid
file_dict["format"] = f.type
file_dict["attribute"] = attributes_to_dict(f.attributes)
file_dict = filter_model_dictionary(file_dict, bia_data_model.FileReference)
file_reference = bia_data_model.FileReference.model_validate(file_dict)
output_path = output_dir / f"{fileref_uuid}.json"
output_path.write_text(file_reference.model_dump_json(indent=2))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
dicts_to_api_models,
find_sections_recursive,
dict_to_uuid,
persist
persist,
filter_model_dictionary,
)
from ..biostudies import (
Submission,
Expand Down Expand Up @@ -51,6 +52,7 @@ def extract_image_acquisition_dicts(submission: Submission) -> List[Dict[str, An
model_dict["accno"] = section.__dict__.get("accno", "")
model_dict["accession_id"] = submission.accno
model_dict["uuid"] = generate_image_acquisition_uuid(model_dict)
model_dict = filter_model_dictionary(model_dict, bia_data_model.ImageAcquisition)
model_dicts.append(model_dict)

return model_dicts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
dicts_to_api_models,
find_sections_recursive,
dict_to_uuid,
persist
persist,
filter_model_dictionary
)
from ..biostudies import (
Submission,
Expand Down Expand Up @@ -46,6 +47,8 @@ def extract_specimen_growth_protocol_dicts(submission: Submission) -> List[Dict[
model_dict["accno"] = section.__dict__.get("accno", "")
model_dict["accession_id"] = submission.accno
model_dict["uuid"] = generate_specimen_growth_protocol_uuid(model_dict)
model_dict = filter_model_dictionary(model_dict, bia_data_model.SpecimenGrowthProtocol)

model_dicts.append(model_dict)

return model_dicts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
dicts_to_api_models,
find_sections_recursive,
dict_to_uuid,
persist
persist,
filter_model_dictionary
)
from ..biostudies import (
Submission,
Expand Down Expand Up @@ -49,6 +50,8 @@ def extract_specimen_preparation_protocol_dicts(submission: Submission) -> List[
model_dict["accno"] = section.__dict__.get("accno", "")
model_dict["accession_id"] = submission.accno
model_dict["uuid"] = generate_specimen_imaging_preparation_uuid(model_dict)
model_dict = filter_model_dictionary(model_dict, bia_data_model.SpecimenImagingPrepartionProtocol)

model_dicts.append(model_dict)

return model_dicts
Expand Down
1 change: 0 additions & 1 deletion bia-ingest-shared-models/bia_ingest_sm/conversion/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ def get_study(
"author": [c.model_dump() for c in contributors],
"grant": [g.model_dump() for g in grants],
"attribute": study_attributes,
"annotation_component": [],
}
# study_uuid = dict_to_uuid(study_dict, ["accession_id",])
# study_dict["uuid"] = study_uuid
Expand Down
8 changes: 7 additions & 1 deletion bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,10 @@ def persist(object_list: List, object_path: str, sumbission_accno: str):
for object in object_list:
output_path = output_dir / f"{object.uuid}.json"
output_path.write_text(object.model_dump_json(indent=2))
logger.info(f"Written {output_path}")
logger.info(f"Written {output_path}")


Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder whether this has utility beyond just the export code, and would be better off as a utils in the shared_models package. Thoughts?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Are you referring to persist or filter_model_dictionary
  2. Do you mean 'import code' in comment above? It seems both import and export use persist - So having it in a common place sounds good. However, I assume export is only reading data models so will not need the filter functionality (but will need the persisting).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. filter_model_dictionary

  2. i meant import. But the export could use it with slightly different data models (e.g. the study model for export is the study model + dataset model + biosample, protocols, Image Acquisitions etc.) so having a filter model dictionary method might be helpful.

  3. For persist - While both might want to write out to files, it feels like the directory/file structure might be very different for the export and ingest code, so i don't know how reusable the methods would be. Additionally, the functions related to filtering models makes sense to store in the model package since it's related and is going to be imported by both, but it's less obvious where we should put the persisting.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice workaround for the filter!

def filter_model_dictionary(dictionary: dict, target_model: Type[BaseModel]):
accepted_fields = target_model.model_fields.keys()
result_dict = {key: dictionary[key] for key in accepted_fields}
return result_dict
64 changes: 16 additions & 48 deletions bia-ingest-shared-models/test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@

from typing import Dict, List
from bia_shared_datamodels import bia_data_model, semantic_models
from bia_ingest_sm.conversion.utils import dict_to_uuid

from bia_ingest_sm.conversion.utils import (
dict_to_uuid,
filter_model_dictionary
)

def get_test_annotation_method() -> List[bia_data_model.AnnotationMethod]:
# For UUID
Expand All @@ -21,7 +23,7 @@ def get_test_annotation_method() -> List[bia_data_model.AnnotationMethod]:
"method_type",
"source_dataset",
]
protocol_info = [
annotation_method_info = [
{
"accno": "Annotations-29",
"accession_id": "S-BIADTEST",
Expand All @@ -34,47 +36,12 @@ def get_test_annotation_method() -> List[bia_data_model.AnnotationMethod]:
},
]

protocol = []
for protocol_dict in protocol_info:
protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider)
protocol.append(bia_data_model.AnnotationMethod.model_validate(protocol_dict))
return protocol


def get_test_specimen_growth_protocol() -> List[bia_data_model.ImageAcquisition]:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This whole method was just a duplicate, so i removed it.

# For UUID
attributes_to_consider = [
"accession_id",
"accno",
"title_id",
"protocol_description",
]
protocol_info = [
{
"accno": "Image acquisition-3",
"accession_id": "S-BIADTEST",
"title_id": "Test Primary Screen Image Acquisition",
"protocol_description": "Test image acquisition parameters 1",
"imaging_instrument_description": "Test imaging instrument 1",
"imaging_method_name": "confocal microscopy",
"fbbi_id": [],
},
{
"accno": "Image acquisition-7",
"accession_id": "S-BIADTEST",
"title_id": "Test Secondary Screen Image Acquisition",
"protocol_description": "Test image acquisition parameters 2",
"imaging_instrument_description": "Test imaging instrument 2",
"imaging_method_name": "fluorescence microscopy",
"fbbi_id": [],
},
]

protocol = []
for protocol_dict in protocol_info:
protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider)
protocol.append(bia_data_model.ImageAcquisition.model_validate(protocol_dict))
return protocol
annotation_method = []
for annotation_method_dict in annotation_method_info:
annotation_method_dict["uuid"] = dict_to_uuid(annotation_method_dict, attributes_to_consider)
annotation_method_dict = filter_model_dictionary(annotation_method_dict, bia_data_model.AnnotationMethod)
annotation_method.append(bia_data_model.AnnotationMethod.model_validate(annotation_method_dict))
return annotation_method


def get_test_specimen_growth_protocol() -> List[bia_data_model.SpecimenGrowthProtocol]:
Expand Down Expand Up @@ -103,6 +70,7 @@ def get_test_specimen_growth_protocol() -> List[bia_data_model.SpecimenGrowthPro
protocol = []
for protocol_dict in protocol_info:
protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider)
protocol_dict = filter_model_dictionary(protocol_dict, bia_data_model.SpecimenGrowthProtocol)
protocol.append(
bia_data_model.SpecimenGrowthProtocol.model_validate(protocol_dict)
)
Expand Down Expand Up @@ -139,6 +107,7 @@ def get_test_specimen_imaging_preparation_protocol() -> (
protocol = []
for protocol_dict in protocol_info:
protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider)
protocol_dict = filter_model_dictionary(protocol_dict, bia_data_model.SpecimenImagingPrepartionProtocol)
protocol.append(
bia_data_model.SpecimenImagingPrepartionProtocol.model_validate(protocol_dict)
)
Expand Down Expand Up @@ -213,6 +182,7 @@ def get_test_biosample() -> List[bia_data_model.BioSample]:
biosample = []
for biosample_dict in biosample_info:
biosample_dict["uuid"] = dict_to_uuid(biosample_dict, attributes_to_consider)
biosample_dict = filter_model_dictionary(biosample_dict, bia_data_model.BioSample)
biosample.append(bia_data_model.BioSample.model_validate(biosample_dict))
return biosample

Expand Down Expand Up @@ -252,6 +222,7 @@ def get_test_image_acquisition() -> List[bia_data_model.ImageAcquisition]:
image_acquisition_dict["uuid"] = dict_to_uuid(
image_acquisition_dict, attributes_to_consider
)
image_acquisition_dict = filter_model_dictionary(image_acquisition_dict, bia_data_model.ImageAcquisition)
image_acquisition.append(
bia_data_model.ImageAcquisition.model_validate(image_acquisition_dict)
)
Expand Down Expand Up @@ -310,6 +281,7 @@ def get_test_experimental_imaging_dataset() -> (
],
)
experimental_imaging_dataset_dict["uuid"] = experimental_imaging_dataset_uuid
experimental_imaging_dataset_dict = filter_model_dictionary(experimental_imaging_dataset_dict, bia_data_model.ExperimentalImagingDataset)
experimental_imaging_dataset1 = (
bia_data_model.ExperimentalImagingDataset.model_validate(
experimental_imaging_dataset_dict
Expand Down Expand Up @@ -523,10 +495,6 @@ def get_test_study() -> bia_data_model.Study:
"Test keyword3",
],
"grant": [g.model_dump() for g in grant],
"experimental_imaging_component": [
e.uuid for e in get_test_experimental_imaging_dataset()
],
"annotation_component": [],
}
study_uuid = dict_to_uuid(
study_dict,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
from __future__ import annotations

from . import semantic_models
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, ConfigDict
from typing import List, Optional
from uuid import UUID
from enum import Enum




class DocumentMixin(BaseModel):

# Throw error if you try to validate/create model from a dictionary with keys that aren't a field in the model
model_config = ConfigDict(extra="forbid")

uuid: UUID = Field(
description="""Unique ID (across the BIA database) used to refer to and identify a document."""
)
Expand Down
Loading