diff --git a/config.yaml b/config.yaml index 56aac9b0..7538fbcd 100644 --- a/config.yaml +++ b/config.yaml @@ -2,6 +2,7 @@ destination: &dest syn12177492 staging_path: ./staging gx_folder: syn52948668 gx_table: syn60527066 +team_images_id: syn12861877 sources: - genes_biodomains: genes_biodomains_files: &genes_biodomains_files diff --git a/src/agoradatatools/process.py b/src/agoradatatools/process.py index 3fd7951a..03b504fd 100644 --- a/src/agoradatatools/process.py +++ b/src/agoradatatools/process.py @@ -69,6 +69,40 @@ def apply_custom_transformations( return None +def upload_dataversion_metadata( + syn: synapseclient.Synapse, + file_id: str, + file_version: str, + team_images_id: str, + staging_path: str, + destination: str, +) -> None: + """Uploads dataversion.json file to Synapse with metadata about the manifest file + + Args: + syn (synapseclient.Synapse): Synapse client session + file_id (str): Synapse ID of the manifest file + file_version (str): Version number of the manifest file + team_images_id (str): Synapse ID of the team_images folder + staging_path (str): Path to the staging directory + destination (str): Synapse ID of the destination folder + """ + dataversion_dict = { + "data_file": file_id, + "data_version": file_version, + "team_images_id": team_images_id, + } + dataversion_json_path = load.dict_to_json( + df=dataversion_dict, staging_path=staging_path, filename="dataversion.json" + ) + load.load( + file_path=dataversion_json_path, + provenance=[file_id], + destination=destination, + syn=syn, + ) + + @log_time(func_name="process_dataset", logger=logger) def process_dataset( dataset_obj: dict, @@ -209,6 +243,7 @@ def create_data_manifest( return None folders = syn.getChildren(parent) + folder = [ {"id": folder["id"], "version": folder["versionNumber"]} for folder in folders ] @@ -292,6 +327,16 @@ def process_all_files( destination=destination, syn=syn, ) + + upload_dataversion_metadata( + syn=syn, + file_id=file_id, + file_version=file_version, + team_images_id=config["team_images_id"], + staging_path=staging_path, + destination=destination, + ) + reporter.data_manifest_file = file_id reporter.data_manifest_version = file_version reporter.data_manifest_link = DatasetReport.format_link( diff --git a/test_config.yaml b/test_config.yaml index 0e2e69ff..4b617940 100644 --- a/test_config.yaml +++ b/test_config.yaml @@ -2,6 +2,7 @@ destination: &dest syn17015333 staging_path: ./staging gx_folder: syn52948670 gx_table: syn60527065 +team_images_id: syn12861877 sources: - genes_biodomains: genes_biodomains_files: &genes_biodomains_files diff --git a/tests/test_process.py b/tests/test_process.py index 7dd389d6..c2ddf3d0 100644 --- a/tests/test_process.py +++ b/tests/test_process.py @@ -16,6 +16,47 @@ GX_FOLDER = "test_folder" +class TestUploadDataversionMetadata: + file_id = "syn1111111" + file_version = "1" + team_images_id = "syn12861877" + destination = "syn1111113" + dataversion_dict = { + "data_file": file_id, + "data_version": file_version, + "team_images_id": team_images_id, + } + + def test_upload_dataversion_metadata(self, syn: Any): + with patch.object( + load, "dict_to_json", return_value="path/to/json" + ) as patch_dict_to_json, patch.object( + load, "load", return_value=("syn123", 1) + ) as patch_load: + # WHEN I call upload_dataversion_metadata with the correct arguments + process.upload_dataversion_metadata( + syn=syn, + file_id=self.file_id, + file_version=self.file_version, + team_images_id=self.team_images_id, + staging_path=STAGING_PATH, + destination=self.destination, + ) + # THEN I expect the dict_to_json function to be called with the correct arguments + patch_dict_to_json.assert_called_once_with( + df=self.dataversion_dict, + staging_path=STAGING_PATH, + filename="dataversion.json", + ) + # AND I expect the load function to be called with the correct arguments + patch_load.assert_called_once_with( + file_path="path/to/json", + provenance=[self.file_id], + destination=self.destination, + syn=syn, + ) + + class TestProcessDataset: dataset_object = { "neuropath_corr": {