Skip to content

Commit

Permalink
added try catch & logging function to all attempts at creating models
Browse files Browse the repository at this point in the history
  • Loading branch information
sherwoodf committed Aug 19, 2024
1 parent 87e5cd5 commit 67e2fb2
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
get_generic_section_as_dict,
persist,
filter_model_dictionary,
log_failed_model_creation
)
import bia_ingest_sm.conversion.study as study_conversion
from ..biostudies import (
Submission,
attributes_to_dict,
)
from pydantic import ValidationError
from bia_shared_datamodels import bia_data_model, semantic_models


Expand Down Expand Up @@ -105,10 +107,15 @@ def get_experimental_imaging_dataset(
model_dict, bia_data_model.ExperimentalImagingDataset
)

experimental_imaging_dataset.append(

try:
experimental_imaging_dataset.append(
bia_data_model.ExperimentalImagingDataset.model_validate(model_dict)
)

)
except(ValidationError):
log_failed_model_creation(bia_data_model.Study, result_summary)


logger.info(
f"Ingesting: {submission.accno}. Created bia_data_model.ExperimentalImagingDataset. Count: {len(experimental_imaging_dataset)}"
)
Expand Down
37 changes: 26 additions & 11 deletions bia-ingest-shared-models/bia_ingest_sm/conversion/study.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import logging
from pathlib import Path
from pydantic import ValidationError
import re
from typing import List, Any, Dict
from .utils import (
get_generic_section_as_dict,
mattributes_to_dict,
dict_to_uuid,
find_sections_recursive,
log_failed_model_creation
)
from ..biostudies import (
Submission,
Expand Down Expand Up @@ -66,7 +68,10 @@ def get_study(
}
# study_uuid = dict_to_uuid(study_dict, ["accession_id",])
# study_dict["uuid"] = study_uuid
study = bia_data_model.Study.model_validate(study_dict)
try:
study = bia_data_model.Study.model_validate(study_dict)
except(ValidationError):
log_failed_model_creation(bia_data_model.Study, result_summary)

if persist_artefacts:
output_dir = Path(settings.bia_data_dir) / "studies"
Expand Down Expand Up @@ -150,18 +155,18 @@ def get_grant(submission: Submission, RESULT_SUMMARY: dict) -> List[semantic_mod


# TODO: Put comments and docstring
def get_funding_body(submission: Submission, RESULT_SUMMARY: dict) -> semantic_models.FundingBody:
def get_funding_body(submission: Submission, result_summary: dict) -> semantic_models.FundingBody:

key_mapping = [
("display_name", "Agency", None,),
]
funding_body = get_generic_section_as_dict(
submission, ["Funding",], key_mapping, semantic_models.FundingBody, RESULT_SUMMARY[submission.accno]
submission, ["Funding",], key_mapping, semantic_models.FundingBody, result_summary[submission.accno]
)
return funding_body


def get_affiliation(submission: Submission, RESULT_SUMMARY: dict) -> Dict[str, semantic_models.Affiliation]:
def get_affiliation(submission: Submission, result_summary: dict) -> Dict[str, semantic_models.Affiliation]:
"""
Maps biostudies.Submission.Organisation sections to semantic_models.Affiliations
"""
Expand All @@ -184,14 +189,18 @@ def get_affiliation(submission: Submission, RESULT_SUMMARY: dict) -> Dict[str, s
attr_dict = attributes_to_dict(section.attributes)

model_dict = {k: attr_dict.get(v, default) for k, v, default in key_mapping}
affiliation_dict[section.accno] = semantic_models.Affiliation.model_validate(
try:
affiliation_dict[section.accno] = semantic_models.Affiliation.model_validate(
model_dict
)
)
except(ValidationError):
log_failed_model_creation(semantic_models.Contributor, result_summary)


return affiliation_dict


def get_publication(submission: Submission, RESULT_SUMMARY: dict) -> List[semantic_models.Publication]:
def get_publication(submission: Submission, result_summary: dict) -> List[semantic_models.Publication]:
publication_sections = find_sections_recursive(
submission.section, ["publication",], []
)
Expand All @@ -207,16 +216,19 @@ def get_publication(submission: Submission, RESULT_SUMMARY: dict) -> List[semant
attr_dict = attributes_to_dict(section.attributes)

model_dict = {k: attr_dict.get(v, default) for k, v, default in key_mapping}
publications.append(semantic_models.Publication.model_validate(model_dict))
try:
publications.append(semantic_models.Publication.model_validate(model_dict))
except(ValidationError):
log_failed_model_creation(semantic_models.Contributor, result_summary)

return publications


def get_contributor(submission: Submission, RESULT_SUMMARY: dict) -> List[semantic_models.Contributor]:
def get_contributor(submission: Submission, result_summary: dict) -> List[semantic_models.Contributor]:
"""
Map authors in submission to semantic_model.Contributors
"""
affiliation_dict = get_affiliation(submission, RESULT_SUMMARY)
affiliation_dict = get_affiliation(submission, result_summary)
key_mapping = [
("display_name", "Name", None),
("contact_email", "E-mail", "[email protected]"),
Expand All @@ -237,6 +249,9 @@ def get_contributor(submission: Submission, RESULT_SUMMARY: dict) -> List[semant
model_dict["affiliation"] = [
model_dict["affiliation"],
]
contributors.append(semantic_models.Contributor.model_validate(model_dict))
try:
contributors.append(semantic_models.Contributor.model_validate(model_dict))
except(ValidationError):
log_failed_model_creation(semantic_models.Contributor, result_summary)

return contributors
28 changes: 18 additions & 10 deletions bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,22 @@

logger = logging.getLogger('biaingest')


def log_failed_model_creation(model_class, valdiation_error_tracking) -> None:
logger.error(f"Failed to create {model_class.__name__}")
logger.debug("Pydantic Validation Error:", exc_info=True)
field_name = f"{model_class.__name__}_ValidationErrorCount"
valdiation_error_tracking.__setattr__(field_name, valdiation_error_tracking.__getattribute__(field_name) + 1)


# TODO: Put comments and docstring
def get_generic_section_as_list(
root: Submission | Section,
section_name: List[str],
key_mapping: List[Tuple[str, str, str | None | List]],
mapped_object: Optional[BaseModel] = None,
mapped_attrs_dict: Optional[Dict[str, Any]] = None,
valdiation_error_tracking: Optional[ObjectValidationResult] = None,
) -> List[Any | Dict[str, str | List[str]]]:
"""
Map biostudies.Submission objects to either semantic_models or bia_data_model equivalent
Expand All @@ -42,7 +51,12 @@ def get_generic_section_as_list(
if mapped_object is None:
return_list.append(model_dict)
else:
return_list.append(mapped_object.model_validate(model_dict))
if not valdiation_error_tracking:
raise RuntimeError("If a mapped_object is provided, valdiation_error_tracking needs to also be provided.")
try:
return_list.append(mapped_object.model_validate(model_dict))
except(ValidationError):
log_failed_model_creation(mapped_object, valdiation_error_tracking)
return return_list


Expand Down Expand Up @@ -74,10 +88,7 @@ def get_generic_section_as_dict(
try:
return_dict[section.accno] = mapped_object.model_validate(model_dict)
except(ValidationError):
logger.warn(f"Failed to create {mapped_object.__name__}")
logger.debug("Pydantic Validation Error:", exc_info=True)
field_name = f"{mapped_object.__name__}_ValidationErrorCount"
valdiation_error_tracking.__setattr__(field_name, valdiation_error_tracking.__getattribute__(field_name) + 1)
log_failed_model_creation(mapped_object, valdiation_error_tracking)
return return_dict


Expand All @@ -93,10 +104,7 @@ def dicts_to_api_models(
try:
api_models.append(api_model_class.model_validate(model_dict))
except(ValidationError):
logger.warn(f"Failed to create {api_model_class.__name__}")
logger.debug("Pydantic Validation Error:", exc_info=True)
field_name = f"{api_model_class.__name__}_ValidationErrorCount"
valdiation_error_tracking.__setattr__(field_name, valdiation_error_tracking.__getattribute__(field_name) + 1)
log_failed_model_creation(api_model_class, valdiation_error_tracking)
return api_models


Expand Down Expand Up @@ -156,7 +164,7 @@ def dict_to_uuid(my_dict: Dict[str, Any], attributes_to_consider: List[str]) ->
return str(uuid.UUID(version=4, hex=hexdigest))


def persist(object_list: List, object_path: str, sumbission_accno: str):
def persist(object_list: List[BaseModel], object_path: str, sumbission_accno: str):
output_dir = Path(settings.bia_data_dir) / object_path / sumbission_accno
if not output_dir.is_dir():
output_dir.mkdir(parents=True)
Expand Down

0 comments on commit 67e2fb2

Please sign in to comment.