-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Model updates, and some conversion logic (#123)
* model updates to standardise names further and to account for which fields will actually be generated endpoints * model updates and added specimen growth, preparation, and image acquisition conversion logic * added logic to generate annotation method objects * tidied up imports * created empty annotation file list to make tests pass * moved file reference conversion to it's own file, fixed imports of shared models, and fixed file_name -> file_path as per model change for file references * updated models and ingest code
- Loading branch information
Showing
18 changed files
with
1,138 additions
and
830 deletions.
There are no files selected for viewing
71 changes: 71 additions & 0 deletions
71
bia-ingest-shared-models/bia_ingest_sm/conversion/annotation_method.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import logging | ||
from typing import List, Any, Dict | ||
from .utils import ( | ||
dicts_to_api_models, | ||
find_sections_recursive, | ||
dict_to_uuid, | ||
persist | ||
) | ||
from ..biostudies import ( | ||
Submission, | ||
attributes_to_dict, | ||
) | ||
from bia_shared_datamodels import bia_data_model | ||
|
||
logger = logging.getLogger(__name__) | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
|
||
def get_annotation_method( | ||
submission: Submission, persist_artefacts=False | ||
) -> List[bia_data_model.AnnotationMethod]: | ||
|
||
annotation_method_model_dicts = extract_annotation_method_dicts(submission) | ||
annotation_methods = dicts_to_api_models(annotation_method_model_dicts, bia_data_model.AnnotationMethod) | ||
|
||
if persist_artefacts and annotation_methods: | ||
persist(annotation_methods, "annotation_method", submission.accno) | ||
|
||
return annotation_methods | ||
|
||
|
||
def extract_annotation_method_dicts(submission: Submission) -> List[Dict[str, Any]]: | ||
annotation_sections = find_sections_recursive(submission.section, ["Annotations"], []) | ||
|
||
key_mapping = [ | ||
("title_id", "Name", ""), | ||
("protocol_description", "Annotation overview", ""), | ||
("annotation_criteria", "Annotation criteria", ""), | ||
("annotation_coverage", "Annotation coverage", ""), | ||
("method_type", "Annotation method", "other"), | ||
] | ||
|
||
model_dicts = [] | ||
for section in annotation_sections: | ||
attr_dict = attributes_to_dict(section.attributes) | ||
|
||
model_dict = {k: attr_dict.get(v, default) for k, v, default in key_mapping} | ||
|
||
# TODO: change template to get source dataset information | ||
model_dict["source_dataset"] = [] | ||
|
||
model_dict["accno"] = section.__dict__.get("accno", "") | ||
model_dict["accession_id"] = submission.accno | ||
model_dict["uuid"] = generate_annotation_method_uuid(model_dict) | ||
model_dicts.append(model_dict) | ||
|
||
return model_dicts | ||
|
||
|
||
def generate_annotation_method_uuid(protocol_dict: Dict[str, Any]) -> str: | ||
attributes_to_consider = [ | ||
"accession_id", | ||
"accno", | ||
"title_id", | ||
"protocol_description", | ||
"annotation_criteria", | ||
"annotation_coverage", | ||
"method_type", | ||
"source_dataset" | ||
] | ||
return dict_to_uuid(protocol_dict, attributes_to_consider) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
64 changes: 64 additions & 0 deletions
64
bia-ingest-shared-models/bia_ingest_sm/conversion/file_reference.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import logging | ||
from pathlib import Path | ||
from typing import List, Dict | ||
from .utils import ( | ||
dict_to_uuid, | ||
) | ||
from ..biostudies import ( | ||
Submission, | ||
attributes_to_dict, | ||
find_file_lists_in_submission, | ||
flist_from_flist_fname, | ||
file_uri, | ||
) | ||
from ..config import settings | ||
from bia_shared_datamodels import bia_data_model | ||
|
||
logger = logging.getLogger(__name__) | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
def get_file_reference_by_study_component( | ||
submission: Submission, persist_artefacts: bool = False | ||
) -> Dict[str, List[bia_data_model.FileReference]]: | ||
""" | ||
Return Dict of list of file references in study components. | ||
""" | ||
file_list_dicts = find_file_lists_in_submission(submission) | ||
fileref_to_study_components = {} | ||
|
||
if persist_artefacts: | ||
output_dir = Path(settings.bia_data_dir) / "file_references" / submission.accno | ||
if not output_dir.is_dir(): | ||
output_dir.mkdir(parents=True) | ||
logger.info(f"Created {output_dir}") | ||
|
||
for file_list_dict in file_list_dicts: | ||
study_component_name = file_list_dict["Name"] | ||
if study_component_name not in fileref_to_study_components: | ||
fileref_to_study_components[study_component_name] = [] | ||
|
||
fname = file_list_dict["File List"] | ||
files_in_fl = flist_from_flist_fname(submission.accno, fname) | ||
for f in files_in_fl: | ||
file_dict = { | ||
"accession_id": submission.accno, | ||
"file_path": str(f.path), | ||
"size_in_bytes": str(f.size), | ||
} | ||
fileref_uuid = dict_to_uuid( | ||
file_dict, ["accession_id", "file_path", "size_in_bytes"] | ||
) | ||
fileref_to_study_components[study_component_name].append(fileref_uuid) | ||
# TODO - Not storing submission_dataset uuid yet!!! | ||
if persist_artefacts: | ||
file_dict["uuid"] = fileref_uuid | ||
file_dict["uri"] = file_uri(submission.accno, f) | ||
file_dict["submission_dataset"] = fileref_uuid | ||
file_dict["format"] = f.type | ||
file_dict["attribute"] = attributes_to_dict(f.attributes) | ||
file_reference = bia_data_model.FileReference.model_validate(file_dict) | ||
output_path = output_dir / f"{fileref_uuid}.json" | ||
output_path.write_text(file_reference.model_dump_json(indent=2)) | ||
logger.info(f"Written {output_path}") | ||
|
||
return fileref_to_study_components |
Oops, something went wrong.