-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Model updates, and some conversion logic #123
Changes from all commits
eb05833
687b69d
76ed1ea
5bc4daa
2f5c33c
204c804
0dc3088
a513a7e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import logging | ||
from typing import List, Any, Dict | ||
from .utils import ( | ||
dicts_to_api_models, | ||
find_sections_recursive, | ||
dict_to_uuid, | ||
persist | ||
) | ||
from ..biostudies import ( | ||
Submission, | ||
attributes_to_dict, | ||
) | ||
from bia_shared_datamodels import bia_data_model | ||
|
||
logger = logging.getLogger(__name__) | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
|
||
def get_annotation_method( | ||
submission: Submission, persist_artefacts=False | ||
) -> List[bia_data_model.AnnotationMethod]: | ||
|
||
annotation_method_model_dicts = extract_annotation_method_dicts(submission) | ||
annotation_methods = dicts_to_api_models(annotation_method_model_dicts, bia_data_model.AnnotationMethod) | ||
|
||
if persist_artefacts and annotation_methods: | ||
persist(annotation_methods, "annotation_method", submission.accno) | ||
|
||
return annotation_methods | ||
|
||
|
||
def extract_annotation_method_dicts(submission: Submission) -> List[Dict[str, Any]]: | ||
annotation_sections = find_sections_recursive(submission.section, ["Annotations"], []) | ||
|
||
key_mapping = [ | ||
("title_id", "Name", ""), | ||
("protocol_description", "Annotation overview", ""), | ||
("annotation_criteria", "Annotation criteria", ""), | ||
("annotation_coverage", "Annotation coverage", ""), | ||
("method_type", "Annotation method", "other"), | ||
] | ||
|
||
model_dicts = [] | ||
for section in annotation_sections: | ||
attr_dict = attributes_to_dict(section.attributes) | ||
|
||
model_dict = {k: attr_dict.get(v, default) for k, v, default in key_mapping} | ||
|
||
# TODO: change template to get source dataset information | ||
model_dict["source_dataset"] = [] | ||
|
||
model_dict["accno"] = section.__dict__.get("accno", "") | ||
model_dict["accession_id"] = submission.accno | ||
model_dict["uuid"] = generate_annotation_method_uuid(model_dict) | ||
model_dicts.append(model_dict) | ||
|
||
return model_dicts | ||
|
||
|
||
def generate_annotation_method_uuid(protocol_dict: Dict[str, Any]) -> str: | ||
attributes_to_consider = [ | ||
"accession_id", | ||
"accno", | ||
"title_id", | ||
"protocol_description", | ||
"annotation_criteria", | ||
"annotation_coverage", | ||
"method_type", | ||
"source_dataset" | ||
] | ||
return dict_to_uuid(protocol_dict, attributes_to_consider) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import logging | ||
from pathlib import Path | ||
from typing import List, Dict | ||
from .utils import ( | ||
dict_to_uuid, | ||
) | ||
from ..biostudies import ( | ||
Submission, | ||
attributes_to_dict, | ||
find_file_lists_in_submission, | ||
flist_from_flist_fname, | ||
file_uri, | ||
) | ||
from ..config import settings | ||
from bia_shared_datamodels import bia_data_model | ||
|
||
logger = logging.getLogger(__name__) | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
def get_file_reference_by_study_component( | ||
submission: Submission, persist_artefacts: bool = False | ||
) -> Dict[str, List[bia_data_model.FileReference]]: | ||
""" | ||
Return Dict of list of file references in study components. | ||
""" | ||
file_list_dicts = find_file_lists_in_submission(submission) | ||
fileref_to_study_components = {} | ||
|
||
if persist_artefacts: | ||
output_dir = Path(settings.bia_data_dir) / "file_references" / submission.accno | ||
if not output_dir.is_dir(): | ||
output_dir.mkdir(parents=True) | ||
logger.info(f"Created {output_dir}") | ||
|
||
for file_list_dict in file_list_dicts: | ||
study_component_name = file_list_dict["Name"] | ||
if study_component_name not in fileref_to_study_components: | ||
fileref_to_study_components[study_component_name] = [] | ||
|
||
fname = file_list_dict["File List"] | ||
files_in_fl = flist_from_flist_fname(submission.accno, fname) | ||
for f in files_in_fl: | ||
file_dict = { | ||
"accession_id": submission.accno, | ||
"file_path": str(f.path), | ||
"size_in_bytes": str(f.size), | ||
} | ||
fileref_uuid = dict_to_uuid( | ||
file_dict, ["accession_id", "file_path", "size_in_bytes"] | ||
) | ||
fileref_to_study_components[study_component_name].append(fileref_uuid) | ||
# TODO - Not storing submission_dataset uuid yet!!! | ||
if persist_artefacts: | ||
file_dict["uuid"] = fileref_uuid | ||
file_dict["uri"] = file_uri(submission.accno, f) | ||
file_dict["submission_dataset"] = fileref_uuid | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was just a place holder - we need to pass the actual submission_dataset uuid (especially as this will now be the only link to its parent) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've not touched the file_reference code. That wasn't the intent of this PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok - I have created a clickup ticket to fix this which is assigned to me. |
||
file_dict["format"] = f.type | ||
file_dict["attribute"] = attributes_to_dict(f.attributes) | ||
file_reference = bia_data_model.FileReference.model_validate(file_dict) | ||
output_path = output_dir / f"{fileref_uuid}.json" | ||
output_path.write_text(file_reference.model_dump_json(indent=2)) | ||
logger.info(f"Written {output_path}") | ||
|
||
return fileref_to_study_components |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With new approach (file_reference points to parent EID) we may have to re-write this function. (see comment on assignment of submission_dataset)