Skip to content

Commit

Permalink
adds dataversion uploading
Browse files Browse the repository at this point in the history
  • Loading branch information
BWMac committed Jan 30, 2025
1 parent 306e9c5 commit 5421cbd
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 0 deletions.
1 change: 1 addition & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ destination: &dest syn12177492
staging_path: ./staging
gx_folder: syn52948668
gx_table: syn60527066
team_images_id: syn12861877
sources:
- genes_biodomains:
genes_biodomains_files: &genes_biodomains_files
Expand Down
45 changes: 45 additions & 0 deletions src/agoradatatools/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,40 @@ def apply_custom_transformations(
return None


def upload_dataversion_metadata(
syn: synapseclient.Synapse,
file_id: str,
file_version: str,
team_images_id: str,
staging_path: str,
destination: str,
) -> None:
"""Uploads dataversion.json file to Synapse with metadata about the manifest file
Args:
syn (synapseclient.Synapse): Synapse client session
file_id (str): Synapse ID of the manifest file
file_version (str): Version number of the manifest file
team_images_id (str): Synapse ID of the team_images folder
staging_path (str): Path to the staging directory
destination (str): Synapse ID of the destination folder
"""
dataversion_dict = {
"data_file": file_id,
"data_version": file_version,
"team_images_id": team_images_id,
}
dataversion_json_path = load.dict_to_json(
df=dataversion_dict, staging_path=staging_path, filename="dataversion.json"
)
load.load(
file_path=dataversion_json_path,
provenance=[file_id],
destination=destination,
syn=syn,
)


@log_time(func_name="process_dataset", logger=logger)
def process_dataset(
dataset_obj: dict,
Expand Down Expand Up @@ -209,6 +243,7 @@ def create_data_manifest(
return None

folders = syn.getChildren(parent)

folder = [
{"id": folder["id"], "version": folder["versionNumber"]} for folder in folders
]
Expand Down Expand Up @@ -292,6 +327,16 @@ def process_all_files(
destination=destination,
syn=syn,
)

upload_dataversion_metadata(
syn=syn,
file_id=file_id,
file_version=file_version,
team_images_id=config["team_images_id"],
staging_path=staging_path,
destination=destination,
)

reporter.data_manifest_file = file_id
reporter.data_manifest_version = file_version
reporter.data_manifest_link = DatasetReport.format_link(
Expand Down
1 change: 1 addition & 0 deletions test_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ destination: &dest syn17015333
staging_path: ./staging
gx_folder: syn52948670
gx_table: syn60527065
team_images_id: syn12861877
sources:
- genes_biodomains:
genes_biodomains_files: &genes_biodomains_files
Expand Down
41 changes: 41 additions & 0 deletions tests/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,47 @@
GX_FOLDER = "test_folder"


class TestUploadDataversionMetadata:
file_id = "syn1111111"
file_version = "1"
team_images_id = "syn12861877"
destination = "syn1111113"
dataversion_dict = {
"data_file": file_id,
"data_version": file_version,
"team_images_id": team_images_id,
}

def test_upload_dataversion_metadata(self, syn: Any):
with patch.object(
load, "dict_to_json", return_value="path/to/json"
) as patch_dict_to_json, patch.object(
load, "load", return_value=("syn123", 1)
) as patch_load:
# WHEN I call upload_dataversion_metadata with the correct arguments
process.upload_dataversion_metadata(
syn=syn,
file_id=self.file_id,
file_version=self.file_version,
team_images_id=self.team_images_id,
staging_path=STAGING_PATH,
destination=self.destination,
)
# THEN I expect the dict_to_json function to be called with the correct arguments
patch_dict_to_json.assert_called_once_with(
df=self.dataversion_dict,
staging_path=STAGING_PATH,
filename="dataversion.json",
)
# AND I expect the load function to be called with the correct arguments
patch_load.assert_called_once_with(
file_path="path/to/json",
provenance=[self.file_id],
destination=self.destination,
syn=syn,
)


class TestProcessDataset:
dataset_object = {
"neuropath_corr": {
Expand Down

0 comments on commit 5421cbd

Please sign in to comment.