Skip to content

Commit

Permalink
fixes manifest version bug
Browse files Browse the repository at this point in the history
  • Loading branch information
BWMac committed Jan 30, 2025
1 parent a98a688 commit 66e7fbb
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 17 deletions.
20 changes: 14 additions & 6 deletions src/agoradatatools/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,9 @@ def process_dataset(


def create_data_manifest(
syn: synapseclient.Synapse, parent: synapseclient.Folder = None
syn: synapseclient.Synapse, parent: Union[synapseclient.Folder, str] = None
) -> Union[DataFrame, None]:
"""Creates data manifest (dataframe) that has the IDs and version numbers of child synapse folders
"""Creates data manifest (dataframe) that has the IDs and version numbers of child synapse files
Args:
syn (synapseclient.Synapse): Synapse client session.
Expand All @@ -242,13 +242,21 @@ def create_data_manifest(
if not parent:
return None

folders = syn.getChildren(parent)
files = syn.getChildren(parent)

folder = [
{"id": folder["id"], "version": folder["versionNumber"]} for folder in folders
manifest_rows = [
{
"id": file["id"],
"version": (
file["versionNumber"]
if file["name"] != "data_manifest.csv"
else file["versionNumber"] + 1
),
}
for file in files
]

return DataFrame(folder)
return DataFrame(manifest_rows)


@log_time(func_name="process_all_files", logger=logger)
Expand Down
38 changes: 27 additions & 11 deletions tests/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@
import pandas as pd
import pytest

from synapseclient import File

from agoradatatools import process
from agoradatatools.errors import ADTDataProcessingError
from agoradatatools.etl import load, utils, extract
from agoradatatools.reporter import DatasetReport, ADTGXReporter
from agoradatatools.constants import Platform
from agoradatatools.gx import GreatExpectationsRunner


STAGING_PATH = "./staging"
GX_FOLDER = "test_folder"

Expand Down Expand Up @@ -424,27 +427,40 @@ def test_process_dataset_upload_false_gx_enabled(self, syn: Any):


class TestCreateDataManifest:
files = [
File(id="syn123", name="not_a_manifest", versionNumber=1),
File(id="syn456", name="data_manifest.csv", versionNumber=1),
]
manifest_rows = [
{"id": "syn123", "version": 1},
{"id": "syn456", "version": 2},
]

@pytest.fixture(scope="function", autouse=True)
def setup_method(self, syn: Any):
self.patch_syn_login = patch.object(
utils, "_login_to_synapse", return_value=syn
).start()
self.patch_get_children = patch.object(
syn, "getChildren", return_value=[{"id": "123", "versionNumber": 1}]
syn, "getChildren", return_value=self.files
).start()

def teardown_method(self):
mock.patch.stopall()

def test_create_data_manifest_parent_none(self, syn: Any):
assert process.create_data_manifest(syn=syn, parent=None) is None
self.patch_syn_login.assert_not_called()

def test_create_data_manifest_no_none(self, syn: Any):
df = process.create_data_manifest(syn=syn, parent="syn1111111")
# WHEN I call create_data_manifest with a parent of None
result = process.create_data_manifest(syn=syn, parent=None)
# THEN I expect the result to be None
assert result is None
# AND I expect the getChildren method to not be called
self.patch_get_children.assert_not_called()

def test_create_data_manifest_with_parent(self, syn: Any):
# WHEN I call create_data_manifest with a parent
result_df = process.create_data_manifest(syn=syn, parent="syn1111111")
# THEN I expect the getChildren method to be called with the parent
self.patch_get_children.assert_called_once_with("syn1111111")
self.patch_syn_login.assert_not_called()
assert isinstance(df, pd.DataFrame)
# AND I expect the result to be a dataframe with the correct rows
# Including incrementing the version number for the data_manifest.csv file
pd.testing.assert_frame_equal(result_df, pd.DataFrame(self.manifest_rows))


class TestProcessAllFiles:
Expand Down

0 comments on commit 66e7fbb

Please sign in to comment.