From 762cdfe2d37117556a4a4d1477a76c3322d365bd Mon Sep 17 00:00:00 2001 From: Chia-Hui Lin Date: Mon, 15 Feb 2021 11:59:53 -0800 Subject: [PATCH 01/42] update unit tests --- synapseutils/sync.py | 36 ++++++ .../unit_test_synapseutils_sync.py | 116 +++++++++++++++++- 2 files changed, 147 insertions(+), 5 deletions(-) diff --git a/synapseutils/sync.py b/synapseutils/sync.py index b3006bf40..87e2b60e0 100644 --- a/synapseutils/sync.py +++ b/synapseutils/sync.py @@ -3,6 +3,7 @@ from contextlib import contextmanager import io import os +import re import sys import threading import typing @@ -770,6 +771,18 @@ def readManifestFile(syn, manifestFile): raise ValueError("All rows in manifest must contain a unique file to upload") sys.stdout.write('OK\n') + # Check each size of uploaded file + sys.stdout.write('Validating that all the files are not empty...') + _check_size_each_file(df) + sys.stdout.write('OK\n') + + # check the name of each file should be store on Synapse + name_column = 'name' + if name_column in df.columns: + sys.stdout.write('Validating that all name column are filled... \n') + _check_file_name(df) + sys.stdout.write('OK\n') + sys.stdout.write('Validating provenance...') df = _sortAndFixProvenance(syn, df) sys.stdout.write('OK\n') @@ -869,6 +882,7 @@ def syncToSynapse(syn, manifestFile, dryRun=False, sendMessages=True, retries=MA """ df = readManifestFile(syn, manifestFile) + # have to check all size of single file sizes = [os.stat(os.path.expandvars(os.path.expanduser(f))).st_size for f in df.path if not is_url(f)] # Write output on what is getting pushed and estimated times - send out message. sys.stdout.write('='*50+'\n') @@ -919,3 +933,25 @@ def _manifest_upload(syn, df): uploader.upload(items) return True + + +def _check_file_name(df): + for idx, row in df.iterrows(): + pattern = "^[`\\w \\-\\+\\.\\(\\)]{1,256}$" + if not row['name']: + directory_name = os.path.basename(row['path']) + df.loc[df.path == row['path'], 'name'] = row['name'] = directory_name + sys.stdout.write('The file name you assigned at path: %s is empty, so we set the store name as %s.\n' + % (row['path'], directory_name)) + if not re.match(pattern, row['name']): + raise ValueError('The file name on your local side is invalid to store on Synapse. Names may only contain:' + 'letters, numbers, spaces, underscores, hyphens, periods, plus signs, apostrophes,' + 'and parentheses') + + +def _check_size_each_file(df): + for f in df.path: + single_file_size = os.stat(os.path.expandvars(os.path.expanduser(f))).st_size + if not is_url(f) and single_file_size == 0: + print('\nAll the files uploaded cannot be 0 byte.') + raise ValueError('All the files uploaded cannot be 0 byte.') diff --git a/tests/unit/synapseutils/unit_test_synapseutils_sync.py b/tests/unit/synapseutils/unit_test_synapseutils_sync.py index 72ebe9822..405f6a4dd 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_sync.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_sync.py @@ -9,9 +9,10 @@ import threading import pytest -from unittest.mock import ANY, patch, create_autospec, Mock, call +from unittest.mock import ANY, patch, create_autospec, Mock, call, MagicMock import synapseutils +from synapseutils import sync from synapseutils.sync import _FolderSync, _PendingProvenance, _SyncUploader, _SyncUploadItem from synapseclient import Activity, File, Folder, Project, Schema, Synapse from synapseclient.core.cumulative_transfer_progress import CumulativeTransferProgress @@ -36,7 +37,8 @@ def test_readManifest__sync_order_with_home_directory(syn): # mock isfile() to always return true to avoid having to create files in the home directory # side effect mocks values for: manfiest file, file1.txt, file2.txt, isfile(project.id) check in syn.get() with patch.object(syn, "get", return_value=Project()), \ - patch.object(os.path, "isfile", side_effect=[True, True, True, False]): + patch.object(os.path, "isfile", side_effect=[True, True, True, False]), \ + patch.object(sync, "_check_size_each_file", return_value=Mock()): manifest_dataframe = synapseutils.sync.readManifestFile(syn, manifest) expected_order = pd.Series([os.path.normpath(os.path.expanduser(file_path2)), os.path.normpath(os.path.expanduser(file_path1))]) @@ -58,8 +60,9 @@ def test_readManifestFile__synapseStore_values_not_set(syn): } manifest = StringIO(header+row1+row2) - with patch.object(syn, "get", return_value=Project()),\ - patch.object(os.path, "isfile", return_value=True): # side effect mocks values for: file1.txt + with patch.object(syn, "get", return_value=Project()), \ + patch.object(os.path, "isfile", return_value=True), \ + patch.object(sync, "_check_size_each_file", return_value=Mock()): # side effect mocks values for: file1.txt manifest_dataframe = synapseutils.sync.readManifestFile(syn, manifest) actual_synapseStore = (manifest_dataframe.set_index('path')['synapseStore'].to_dict()) assert expected_synapseStore == actual_synapseStore @@ -93,7 +96,8 @@ def test_readManifestFile__synapseStore_values_are_set(syn): } manifest = StringIO(header+row1+row2+row3+row4+row5+row6) - with patch.object(syn, "get", return_value=Project()),\ + with patch.object(syn, "get", return_value=Project()), \ + patch.object(sync, "_check_size_each_file", return_value=Mock()), \ patch.object(os.path, "isfile", return_value=True): # mocks values for: file1.txt, file3.txt, file5.txt manifest_dataframe = synapseutils.sync.readManifestFile(syn, manifest) @@ -773,3 +777,105 @@ def test_get_file_entity_provenance_dict__error_not_404(self): self.mock_syn.getProvenance.side_effect = SynapseHTTPError(response=Mock(status_code=400)) pytest.raises(SynapseHTTPError, synapseutils.sync._get_file_entity_provenance_dict, self.mock_syn, "syn123") + + +@patch.object(sync, 'os') +def test_check_size_each_file(mock_os, syn): + + project_id = "syn123" + header = 'path\tparent\n' + path1 = os.path.abspath(os.path.expanduser('~/file1.txt')) + path2 = 'http://www.synapse.org' + path3 = os.path.abspath(os.path.expanduser('~/file3.txt')) + path4 = 'http://www.github.com' + + row1 = '%s\t%s\n' % (path1, project_id) + row2 = '%s\t%s\n' % (path2, project_id) + row3 = '%s\t%s\n' % (path3, project_id) + row4 = '%s\t%s\n' % (path4, project_id) + + manifest = StringIO(header + row1 + row2 + row3 + row4) + mock_os.path.isfile.side_effect = [True, True, True, False] + mock_os.path.abspath.side_effect = [path1, path3] + mock_stat = MagicMock(spec='st_size') + mock_os.stat.return_value = mock_stat + mock_stat.st_size = 5 + + # mock syn.get() to return a project because the final check is making sure parent is a container + # mock isfile() to always return true to avoid having to create files in the home directory + # side effect mocks values for: manfiest file, file1.txt, file2.txt, isfile(project.id) check in syn.get() + with patch.object(syn, "get", return_value=Project()): + sync.readManifestFile(syn, manifest) + mock_os.stat.call_count == 4 + + mock_os.reset_mock() + manifest = StringIO(header + row1 + row2 + row3 + row4) + mock_os.path.isfile.side_effect = [True, True, True, False] + mock_os.path.abspath.side_effect = [path1, path3] + mock_stat = MagicMock(spec='st_size') + mock_os.stat.return_value = mock_stat + mock_stat.st_size = 0 + with pytest.raises(ValueError) as ve: + sync.readManifestFile(syn, manifest) + assert str(ve.value) == "All the files uploaded cannot be 0 byte." + + +@patch.object(sync, 'os') +def test_check_file_name(mock_os, syn): + + project_id = "syn123" + header = 'path\tparent\tname\n' + path1 = os.path.abspath(os.path.expanduser('~/file1.txt')) + path2 = os.path.abspath(os.path.expanduser('~/file2.txt')) + path3 = os.path.abspath(os.path.expanduser('~/file3.txt')) + + # f"{path1}\t{path2} foo" + row1 = '%s\t%s\tTest_file_name.txt\n' % (path1, project_id) + row2 = '%s\t%s\tTest_file_name(-`).txt\n' % (path2, project_id) + row3 = '%s\t%s\t\n' % (path3, project_id) + + manifest = StringIO(header + row1 + row2 + row3) + mock_os.path.isfile.side_effect = [True, True, True] + mock_os.path.abspath.side_effect = [path1, path2, path3] + mock_stat = MagicMock(spec='st_size') + mock_os.stat.return_value = mock_stat + mock_stat.st_size = 5 + mock_os.path.basename.return_value = 'file3.txt' + + + # mock syn.get() to return a project because the final check is making sure parent is a container + # mock isfile() to always return true to avoid having to create files in the home directory + # side effect mocks values for: manfiest file, file1.txt, file2.txt, isfile(project.id) check in syn.get() + with patch.object(syn, "get", return_value=Project()): + sync.readManifestFile(syn, manifest) + + path4 = os.path.abspath(os.path.expanduser('~/file4.txt')) + row4 = '%s\t%s\tTest_file_name_with_#\n' % (path4, project_id) + manifest = StringIO(header + row1 + row2 + row3 + row4) + mock_os.reset_mock() + mock_os.path.isfile.side_effect = [True, True, True, True] + mock_os.path.abspath.side_effect = [path1, path2, path3, path4] + mock_os.path.basename.return_value = 'file3.txt' + + with pytest.raises(ValueError) as ve: + sync.readManifestFile(syn, manifest) + assert str(ve.value) == "The file name on your local side is invalid to store on Synapse. Names may only contain:" \ + "letters, numbers, spaces, underscores, hyphens, periods, plus signs, " \ + "apostrophes,and parentheses" + + path4 = os.path.abspath(os.path.expanduser('~/file4.txt')) + long_file_name = 'test_filename_too_long_test_filename_too_long_test_filename_too_long_test_filename_too_long_' \ + 'test_filename_too_long_test_filename_too_long_test_filename_too_long_test_filename_too_long_' \ + 'test_filename_too_long_test_filename_too_long_test_filename_too_long_test_filename_too_long_' + row4 = '%s\t%s\t%s\n' % (path4, project_id, long_file_name) + manifest = StringIO(header + row1 + row2 + row3 + row4) + mock_os.reset_mock() + mock_os.path.isfile.side_effect = [True, True, True, True] + mock_os.path.abspath.side_effect = [path1, path2, path3, path4] + mock_os.path.basename.return_value = 'file3.txt' + + with pytest.raises(ValueError) as ve: + sync.readManifestFile(syn, manifest) + assert str(ve.value) == "The file name on your local side is invalid to store on Synapse. Names may only contain:" \ + "letters, numbers, spaces, underscores, hyphens, periods, plus signs, " \ + "apostrophes,and parentheses" From ee444e568dba468b9ca2b1b5020cd78dafe5688c Mon Sep 17 00:00:00 2001 From: Chia-Hui Lin Date: Mon, 15 Feb 2021 15:03:21 -0800 Subject: [PATCH 02/42] fix the format of unit tests --- .../unit_test_synapseutils_sync.py | 83 +++++++++++++++---- 1 file changed, 69 insertions(+), 14 deletions(-) diff --git a/tests/unit/synapseutils/unit_test_synapseutils_sync.py b/tests/unit/synapseutils/unit_test_synapseutils_sync.py index 405f6a4dd..cc030608c 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_sync.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_sync.py @@ -781,6 +781,9 @@ def test_get_file_entity_provenance_dict__error_not_404(self): @patch.object(sync, 'os') def test_check_size_each_file(mock_os, syn): + """ + Verify the check_size_each_file method works correctly + """ project_id = "syn123" header = 'path\tparent\n' @@ -789,10 +792,10 @@ def test_check_size_each_file(mock_os, syn): path3 = os.path.abspath(os.path.expanduser('~/file3.txt')) path4 = 'http://www.github.com' - row1 = '%s\t%s\n' % (path1, project_id) - row2 = '%s\t%s\n' % (path2, project_id) - row3 = '%s\t%s\n' % (path3, project_id) - row4 = '%s\t%s\n' % (path4, project_id) + row1 = f'{path1}\t{project_id}\n' + row2 = f'{path2}\t{project_id}\n' + row3 = f'{path3}\t{project_id}\n' + row4 = f'{path4}\t{project_id}\n' manifest = StringIO(header + row1 + row2 + row3 + row4) mock_os.path.isfile.side_effect = [True, True, True, False] @@ -808,6 +811,25 @@ def test_check_size_each_file(mock_os, syn): sync.readManifestFile(syn, manifest) mock_os.stat.call_count == 4 + +@patch.object(sync, 'os') +def test_check_size_each_file_raise_error(mock_os, syn): + """ + Verify the check_size_each_file method raises the ValueError when the file is empty. + """ + + project_id = "syn123" + header = 'path\tparent\n' + path1 = os.path.abspath(os.path.expanduser('~/file1.txt')) + path2 = 'http://www.synapse.org' + path3 = os.path.abspath(os.path.expanduser('~/file3.txt')) + path4 = 'http://www.github.com' + + row1 = f'{path1}\t{project_id}\n' + row2 = f'{path2}\t{project_id}\n' + row3 = f'{path3}\t{project_id}\n' + row4 = f'{path4}\t{project_id}\n' + mock_os.reset_mock() manifest = StringIO(header + row1 + row2 + row3 + row4) mock_os.path.isfile.side_effect = [True, True, True, False] @@ -822,6 +844,9 @@ def test_check_size_each_file(mock_os, syn): @patch.object(sync, 'os') def test_check_file_name(mock_os, syn): + """ + Verify the check_file_name method works correctly + """ project_id = "syn123" header = 'path\tparent\tname\n' @@ -829,28 +854,40 @@ def test_check_file_name(mock_os, syn): path2 = os.path.abspath(os.path.expanduser('~/file2.txt')) path3 = os.path.abspath(os.path.expanduser('~/file3.txt')) - # f"{path1}\t{path2} foo" - row1 = '%s\t%s\tTest_file_name.txt\n' % (path1, project_id) - row2 = '%s\t%s\tTest_file_name(-`).txt\n' % (path2, project_id) - row3 = '%s\t%s\t\n' % (path3, project_id) + row1 = f"{path1}\t{project_id}\tTest_file_name.txt\n" + row2 = f"{path2}\t{project_id}\tTest_file-name`s(1).txt\n" + row3 = f"{path3}\t{project_id}\t\n" manifest = StringIO(header + row1 + row2 + row3) mock_os.path.isfile.side_effect = [True, True, True] mock_os.path.abspath.side_effect = [path1, path2, path3] - mock_stat = MagicMock(spec='st_size') - mock_os.stat.return_value = mock_stat - mock_stat.st_size = 5 mock_os.path.basename.return_value = 'file3.txt' - # mock syn.get() to return a project because the final check is making sure parent is a container # mock isfile() to always return true to avoid having to create files in the home directory # side effect mocks values for: manfiest file, file1.txt, file2.txt, isfile(project.id) check in syn.get() with patch.object(syn, "get", return_value=Project()): sync.readManifestFile(syn, manifest) + +@patch.object(sync, 'os') +def test_check_file_name_with_illegal_char(mock_os, syn): + """ + Verify the check_file_name method raises the ValueError when the file name contains illegal char + """ + + project_id = "syn123" + header = 'path\tparent\tname\n' + path1 = os.path.abspath(os.path.expanduser('~/file1.txt')) + path2 = os.path.abspath(os.path.expanduser('~/file2.txt')) + path3 = os.path.abspath(os.path.expanduser('~/file3.txt')) path4 = os.path.abspath(os.path.expanduser('~/file4.txt')) - row4 = '%s\t%s\tTest_file_name_with_#\n' % (path4, project_id) + + row1 = f"{path1}\t{project_id}\tTest_file_name.txt\n" + row2 = f"{path2}\t{project_id}\tTest_file-name`s(1).txt\n" + row3 = f"{path3}\t{project_id}\t\n" + row4 = f"{path4}\t{project_id}\tTest_file_name_with_#.txt\n" + manifest = StringIO(header + row1 + row2 + row3 + row4) mock_os.reset_mock() mock_os.path.isfile.side_effect = [True, True, True, True] @@ -863,11 +900,29 @@ def test_check_file_name(mock_os, syn): "letters, numbers, spaces, underscores, hyphens, periods, plus signs, " \ "apostrophes,and parentheses" + +@patch.object(sync, 'os') +def test_check_file_name_with_too_long_filename(mock_os, syn): + """ + Verify the check_file_name method raises the ValueError when the file name is too long + """ + + project_id = "syn123" + header = 'path\tparent\tname\n' + path1 = os.path.abspath(os.path.expanduser('~/file1.txt')) + path2 = os.path.abspath(os.path.expanduser('~/file2.txt')) + path3 = os.path.abspath(os.path.expanduser('~/file3.txt')) path4 = os.path.abspath(os.path.expanduser('~/file4.txt')) + long_file_name = 'test_filename_too_long_test_filename_too_long_test_filename_too_long_test_filename_too_long_' \ 'test_filename_too_long_test_filename_too_long_test_filename_too_long_test_filename_too_long_' \ 'test_filename_too_long_test_filename_too_long_test_filename_too_long_test_filename_too_long_' - row4 = '%s\t%s\t%s\n' % (path4, project_id, long_file_name) + + row1 = f"{path1}\t{project_id}\tTest_file_name.txt\n" + row2 = f"{path2}\t{project_id}\tTest_file-name`s(1).txt\n" + row3 = f"{path3}\t{project_id}\t\n" + row4 = f"{path4}\t{project_id}\t{long_file_name}\n" + manifest = StringIO(header + row1 + row2 + row3 + row4) mock_os.reset_mock() mock_os.path.isfile.side_effect = [True, True, True, True] From ec39bd02562dbdf25a56e1071fb462bfe3c5a9ee Mon Sep 17 00:00:00 2001 From: Chia-Hui Lin Date: Tue, 16 Feb 2021 09:31:27 -0800 Subject: [PATCH 03/42] fix the integration tests --- synapseutils/sync.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapseutils/sync.py b/synapseutils/sync.py index 87e2b60e0..a44726c74 100644 --- a/synapseutils/sync.py +++ b/synapseutils/sync.py @@ -951,7 +951,9 @@ def _check_file_name(df): def _check_size_each_file(df): for f in df.path: + if is_url(f): + continue single_file_size = os.stat(os.path.expandvars(os.path.expanduser(f))).st_size - if not is_url(f) and single_file_size == 0: + if single_file_size == 0: print('\nAll the files uploaded cannot be 0 byte.') raise ValueError('All the files uploaded cannot be 0 byte.') From 1a20c20fcf2561d103b2f5eeeb21e752a08f3119 Mon Sep 17 00:00:00 2001 From: Chia-Hui Lin Date: Fri, 19 Feb 2021 11:57:02 -0800 Subject: [PATCH 04/42] fix the issue and add modify the comment --- synapseutils/sync.py | 30 ++++++++++--------- .../unit_test_synapseutils_sync.py | 27 ++++++++--------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/synapseutils/sync.py b/synapseutils/sync.py index a44726c74..ace810e12 100644 --- a/synapseutils/sync.py +++ b/synapseutils/sync.py @@ -937,23 +937,25 @@ def _manifest_upload(syn, df): def _check_file_name(df): for idx, row in df.iterrows(): - pattern = "^[`\\w \\-\\+\\.\\(\\)]{1,256}$" - if not row['name']: + compiled = re.compile(r"^[`\w \-\+\.\(\)]{1,256}$") + file_name = row['name'] + if not file_name: directory_name = os.path.basename(row['path']) - df.loc[df.path == row['path'], 'name'] = row['name'] = directory_name - sys.stdout.write('The file name you assigned at path: %s is empty, so we set the store name as %s.\n' - % (row['path'], directory_name)) - if not re.match(pattern, row['name']): - raise ValueError('The file name on your local side is invalid to store on Synapse. Names may only contain:' - 'letters, numbers, spaces, underscores, hyphens, periods, plus signs, apostrophes,' - 'and parentheses') + df.loc[df.path == row['path'], 'name'] = file_name = directory_name + sys.stdout.write('No file name assigned to path: %s, defaulting to %s\n' % (row['path'], directory_name)) + if not compiled.match(file_name): + raise ValueError("File name {} cannot be stored to Synapse. Names may contain letters, numbers, spaces, " + "underscores, hyphens, periods, plus signs, apostrophes, " + "and parentheses".format(file_name)) def _check_size_each_file(df): - for f in df.path: - if is_url(f): + # for f in df.path: + for idx, row in df.iterrows(): + file_path = row['path'] + file_name = row['name'] if 'name' in row else os.path.basename(row['path']) + if is_url(file_path): continue - single_file_size = os.stat(os.path.expandvars(os.path.expanduser(f))).st_size + single_file_size = os.stat(os.path.expandvars(os.path.expanduser(file_path))).st_size if single_file_size == 0: - print('\nAll the files uploaded cannot be 0 byte.') - raise ValueError('All the files uploaded cannot be 0 byte.') + raise ValueError("File {} is empty, empty files cannot be uploaded to Synapse".format(file_name)) diff --git a/tests/unit/synapseutils/unit_test_synapseutils_sync.py b/tests/unit/synapseutils/unit_test_synapseutils_sync.py index cc030608c..eeb5f63ec 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_sync.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_sync.py @@ -830,16 +830,16 @@ def test_check_size_each_file_raise_error(mock_os, syn): row3 = f'{path3}\t{project_id}\n' row4 = f'{path4}\t{project_id}\n' - mock_os.reset_mock() manifest = StringIO(header + row1 + row2 + row3 + row4) mock_os.path.isfile.side_effect = [True, True, True, False] mock_os.path.abspath.side_effect = [path1, path3] + mock_os.path.basename.return_value = 'file1.txt' mock_stat = MagicMock(spec='st_size') mock_os.stat.return_value = mock_stat mock_stat.st_size = 0 with pytest.raises(ValueError) as ve: sync.readManifestFile(syn, manifest) - assert str(ve.value) == "All the files uploaded cannot be 0 byte." + assert str(ve.value) == "File {} is empty, empty files cannot be uploaded to Synapse".format("file1.txt") @patch.object(sync, 'os') @@ -859,7 +859,7 @@ def test_check_file_name(mock_os, syn): row3 = f"{path3}\t{project_id}\t\n" manifest = StringIO(header + row1 + row2 + row3) - mock_os.path.isfile.side_effect = [True, True, True] + mock_os.path.isfile.return_value = True mock_os.path.abspath.side_effect = [path1, path2, path3] mock_os.path.basename.return_value = 'file3.txt' @@ -886,19 +886,19 @@ def test_check_file_name_with_illegal_char(mock_os, syn): row1 = f"{path1}\t{project_id}\tTest_file_name.txt\n" row2 = f"{path2}\t{project_id}\tTest_file-name`s(1).txt\n" row3 = f"{path3}\t{project_id}\t\n" - row4 = f"{path4}\t{project_id}\tTest_file_name_with_#.txt\n" + illegal_name = "Test_file_name_with_#.txt" + row4 = f"{path4}\t{project_id}\t{illegal_name}\n" manifest = StringIO(header + row1 + row2 + row3 + row4) - mock_os.reset_mock() - mock_os.path.isfile.side_effect = [True, True, True, True] + mock_os.path.isfile.return_value = True mock_os.path.abspath.side_effect = [path1, path2, path3, path4] mock_os.path.basename.return_value = 'file3.txt' with pytest.raises(ValueError) as ve: sync.readManifestFile(syn, manifest) - assert str(ve.value) == "The file name on your local side is invalid to store on Synapse. Names may only contain:" \ - "letters, numbers, spaces, underscores, hyphens, periods, plus signs, " \ - "apostrophes,and parentheses" + assert str(ve.value) == "File name {} cannot be stored to Synapse. Names may contain letters, numbers, spaces, " \ + "underscores, hyphens, periods, plus signs, apostrophes, " \ + "and parentheses".format(illegal_name) @patch.object(sync, 'os') @@ -924,13 +924,12 @@ def test_check_file_name_with_too_long_filename(mock_os, syn): row4 = f"{path4}\t{project_id}\t{long_file_name}\n" manifest = StringIO(header + row1 + row2 + row3 + row4) - mock_os.reset_mock() - mock_os.path.isfile.side_effect = [True, True, True, True] + mock_os.path.isfile.return_value = True mock_os.path.abspath.side_effect = [path1, path2, path3, path4] mock_os.path.basename.return_value = 'file3.txt' with pytest.raises(ValueError) as ve: sync.readManifestFile(syn, manifest) - assert str(ve.value) == "The file name on your local side is invalid to store on Synapse. Names may only contain:" \ - "letters, numbers, spaces, underscores, hyphens, periods, plus signs, " \ - "apostrophes,and parentheses" + assert str(ve.value) == "File name {} cannot be stored to Synapse. Names may contain letters, numbers, spaces, " \ + "underscores, hyphens, periods, plus signs, apostrophes, " \ + "and parentheses".format(long_file_name) From 16ce82beb352fca275090fd2ade5b0b4210e6061 Mon Sep 17 00:00:00 2001 From: Chia-Hui Lin Date: Fri, 5 Mar 2021 12:23:01 -0800 Subject: [PATCH 05/42] fix the issue from reviews --- synapseutils/sync.py | 2 +- tests/unit/synapseutils/unit_test_synapseutils_sync.py | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/synapseutils/sync.py b/synapseutils/sync.py index ace810e12..94fcb3297 100644 --- a/synapseutils/sync.py +++ b/synapseutils/sync.py @@ -779,7 +779,7 @@ def readManifestFile(syn, manifestFile): # check the name of each file should be store on Synapse name_column = 'name' if name_column in df.columns: - sys.stdout.write('Validating that all name column are filled... \n') + sys.stdout.write('Validating file names... \n') _check_file_name(df) sys.stdout.write('OK\n') diff --git a/tests/unit/synapseutils/unit_test_synapseutils_sync.py b/tests/unit/synapseutils/unit_test_synapseutils_sync.py index eeb5f63ec..5b828ae4f 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_sync.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_sync.py @@ -805,8 +805,6 @@ def test_check_size_each_file(mock_os, syn): mock_stat.st_size = 5 # mock syn.get() to return a project because the final check is making sure parent is a container - # mock isfile() to always return true to avoid having to create files in the home directory - # side effect mocks values for: manfiest file, file1.txt, file2.txt, isfile(project.id) check in syn.get() with patch.object(syn, "get", return_value=Project()): sync.readManifestFile(syn, manifest) mock_os.stat.call_count == 4 @@ -859,13 +857,12 @@ def test_check_file_name(mock_os, syn): row3 = f"{path3}\t{project_id}\t\n" manifest = StringIO(header + row1 + row2 + row3) + # mock isfile() to always return true to avoid having to create files in the home directory mock_os.path.isfile.return_value = True mock_os.path.abspath.side_effect = [path1, path2, path3] mock_os.path.basename.return_value = 'file3.txt' # mock syn.get() to return a project because the final check is making sure parent is a container - # mock isfile() to always return true to avoid having to create files in the home directory - # side effect mocks values for: manfiest file, file1.txt, file2.txt, isfile(project.id) check in syn.get() with patch.object(syn, "get", return_value=Project()): sync.readManifestFile(syn, manifest) From b1ad6aff002044c9fdc28c3fa6905e139740ade2 Mon Sep 17 00:00:00 2001 From: Chia-Hui Lin Date: Tue, 16 Mar 2021 16:41:01 -0700 Subject: [PATCH 06/42] fix the issue and little logic in unit tests --- synapseutils/sync.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/synapseutils/sync.py b/synapseutils/sync.py index 94fcb3297..b86d2cf73 100644 --- a/synapseutils/sync.py +++ b/synapseutils/sync.py @@ -936,8 +936,8 @@ def _manifest_upload(syn, df): def _check_file_name(df): + compiled = re.compile(r"^[`\w \-\+\.\(\)]{1,256}$") for idx, row in df.iterrows(): - compiled = re.compile(r"^[`\w \-\+\.\(\)]{1,256}$") file_name = row['name'] if not file_name: directory_name = os.path.basename(row['path']) @@ -950,12 +950,10 @@ def _check_file_name(df): def _check_size_each_file(df): - # for f in df.path: for idx, row in df.iterrows(): file_path = row['path'] file_name = row['name'] if 'name' in row else os.path.basename(row['path']) - if is_url(file_path): - continue - single_file_size = os.stat(os.path.expandvars(os.path.expanduser(file_path))).st_size - if single_file_size == 0: - raise ValueError("File {} is empty, empty files cannot be uploaded to Synapse".format(file_name)) + if not is_url(file_path): + single_file_size = os.stat(os.path.expandvars(os.path.expanduser(file_path))).st_size + if single_file_size == 0: + raise ValueError("File {} is empty, empty files cannot be uploaded to Synapse".format(file_name)) From 1ad215783f844e1c1c310369e748a30a9f5a2cdc Mon Sep 17 00:00:00 2001 From: Chia-Hui Lin Date: Thu, 18 Mar 2021 10:21:06 -0700 Subject: [PATCH 07/42] fix the issue for blank versionComment --- synapseclient/client.py | 1 + tests/unit/synapseclient/unit_test_client.py | 94 +++++++++++++++++++- 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/synapseclient/client.py b/synapseclient/client.py index 100f5b61f..27e7f8fce 100644 --- a/synapseclient/client.py +++ b/synapseclient/client.py @@ -1024,6 +1024,7 @@ def store(self, obj, *, createOrUpdate=True, forceVersion=True, versionLabel=Non properties, annotations, local_state = split_entity_namespaces(entity) bundle = None # Anything with a path is treated as a cache-able item + properties['versionComment'] = properties['versionComment'] if 'versionComment' in properties else None if entity.get('path', False): if 'concreteType' not in properties: properties['concreteType'] = File._synapse_entity_type diff --git a/tests/unit/synapseclient/unit_test_client.py b/tests/unit/synapseclient/unit_test_client.py index 8bace0e34..c4fbff83b 100644 --- a/tests/unit/synapseclient/unit_test_client.py +++ b/tests/unit/synapseclient/unit_test_client.py @@ -2046,7 +2046,7 @@ def test_store__existing_processed_as_update(syn): 'concreteType': 'org.sagebionetworks.repo.model.FileEntity', 'dataFileHandleId': file_handle_id, 'parentId': parent_id, - + 'versionComment': None, } expected_annotations = { 'foo': [3], @@ -2135,7 +2135,7 @@ def test_store__409_processed_as_update(syn): 'concreteType': 'org.sagebionetworks.repo.model.FileEntity', 'dataFileHandleId': file_handle_id, 'parentId': parent_id, - + 'versionComment': None, } expected_update_properties = { **expected_create_properties, @@ -2246,6 +2246,96 @@ def test_store__no_need_to_update_annotation(syn): mock_set_annotations.assert_not_called() +def test_store__update_versionComment(syn): + file_handle_id = '123412341234' + returned_file_handle = { + 'id': file_handle_id + } + + parent_id = 'syn122' + synapse_id = 'syn123' + etag = 'db9bc70b-1eb6-4a21-b3e8-9bf51d964031' + file_name = 'fake_file.txt' + + existing_bundle_annotations = { + 'foo': { + 'type': 'LONG', + 'value': ['1'] + }, + + # this annotation is not included in the passed which is interpreted as a deletion + 'bar': { + 'type': 'LONG', + 'value': ['2'] + }, + } + returned_bundle = { + 'entity': { + 'name': file_name, + 'id': synapse_id, + 'etag': etag, + 'concreteType': 'org.sagebionetworks.repo.model.FileEntity', + 'dataFileHandleId': file_handle_id, + }, + 'entityType': 'file', + 'fileHandles': [ + { + 'id': file_handle_id, + 'concreteType': 'org.sagebionetworks.repo.model.file.S3FileHandle', + } + ], + 'annotations': { + 'id': synapse_id, + 'etag': etag, + 'annotations': existing_bundle_annotations + }, + } + + expected_update_properties = { + 'id': synapse_id, + 'etag': etag, + 'name': file_name, + 'concreteType': 'org.sagebionetworks.repo.model.FileEntity', + 'dataFileHandleId': file_handle_id, + 'parentId': parent_id, + 'versionComment': '12345', + } + + with patch.object(syn, '_getEntityBundle') as mock_get_entity_bundle, \ + patch.object(synapseclient.client, 'upload_file_handle', return_value=returned_file_handle), \ + patch.object(syn.cache, 'contains', return_value=True), \ + patch.object(syn, '_createEntity') as mock_createEntity, \ + patch.object(syn, '_updateEntity') as mock_updateEntity, \ + patch.object(syn, 'findEntityId') as mock_findEntityId, \ + patch.object(syn, 'set_annotations') as mock_set_annotations, \ + patch.object(Entity, 'create'), \ + patch.object(syn, 'get'): + mock_get_entity_bundle.return_value = returned_bundle + + f = File(f"/{file_name}", parent=parent_id, versionComment='12345') + syn.store(f) + + assert mock_set_annotations.called + assert not mock_createEntity.called + assert not mock_findEntityId.called + + mock_updateEntity.assert_called_once_with( + expected_update_properties, + True, # createOrUpdate + None, # versionLabel + ) + + # entity that stores on synapse without versionComment + f = File(f"/{file_name}", parent=parent_id) + expected_update_properties['versionComment'] = None + syn.store(f) + + mock_updateEntity.assert_called_with( + expected_update_properties, + True, # createOrUpdate + None, # versionLabel + ) + def test_update_entity_version(syn): """Confirm behavior of entity version incrementing/labeling when invoking syn._updateEntity""" entity_id = 'syn123' From 47355829c6def3ea843c83ddd1ac4ac83b2ea21c Mon Sep 17 00:00:00 2001 From: Chia-Hui Lin Date: Thu, 18 Mar 2021 10:33:08 -0700 Subject: [PATCH 08/42] fix the issue for flake8 --- tests/unit/synapseclient/unit_test_client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/synapseclient/unit_test_client.py b/tests/unit/synapseclient/unit_test_client.py index c4fbff83b..85f9260a7 100644 --- a/tests/unit/synapseclient/unit_test_client.py +++ b/tests/unit/synapseclient/unit_test_client.py @@ -2336,6 +2336,7 @@ def test_store__update_versionComment(syn): None, # versionLabel ) + def test_update_entity_version(syn): """Confirm behavior of entity version incrementing/labeling when invoking syn._updateEntity""" entity_id = 'syn123' From ef789f33f089c69ff22f94d84d79ba2bffcd1fe9 Mon Sep 17 00:00:00 2001 From: Chia-Hui Lin Date: Fri, 19 Mar 2021 09:49:00 -0700 Subject: [PATCH 09/42] fix the review issue --- synapseclient/client.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapseclient/client.py b/synapseclient/client.py index 27e7f8fce..4fd92231c 100644 --- a/synapseclient/client.py +++ b/synapseclient/client.py @@ -1023,8 +1023,11 @@ def store(self, obj, *, createOrUpdate=True, forceVersion=True, versionLabel=Non entity = obj properties, annotations, local_state = split_entity_namespaces(entity) bundle = None - # Anything with a path is treated as a cache-able item + # Explicitly set an empty versionComment property if none is supplied, + # otherwise an existing entity bundle's versionComment will be copied to the update. properties['versionComment'] = properties['versionComment'] if 'versionComment' in properties else None + + # Anything with a path is treated as a cache-able item if entity.get('path', False): if 'concreteType' not in properties: properties['concreteType'] = File._synapse_entity_type From 86334dd35c3ab32b43867c589eb9fb7cf2b0aabe Mon Sep 17 00:00:00 2001 From: Ziming Dong Date: Fri, 19 Mar 2021 17:18:18 -0700 Subject: [PATCH 10/42] add credential provider to check AWS Parameter Store for token --- .../core/credentials/credential_provider.py | 47 +++++++- .../credentials/unit_test_cred_provider.py | 109 +++++++++++++++--- 2 files changed, 137 insertions(+), 19 deletions(-) diff --git a/synapseclient/core/credentials/credential_provider.py b/synapseclient/core/credentials/credential_provider.py index 20cd69d07..e8f178e35 100644 --- a/synapseclient/core/credentials/credential_provider.py +++ b/synapseclient/core/credentials/credential_provider.py @@ -1,9 +1,11 @@ import abc import deprecated.sphinx +import os +import synapseclient.core.utils as utils from synapseclient.core.exceptions import SynapseAuthenticationError -from .cred_data import SynapseApiKeyCredentials, SynapseAuthTokenCredentials -from . import cached_sessions +from synapseclient.core.credentials.cred_data import SynapseApiKeyCredentials, SynapseAuthTokenCredentials +from synapseclient.core.credentials import cached_sessions class SynapseCredentialsProvider(metaclass=abc.ABCMeta): @@ -12,6 +14,7 @@ class SynapseCredentialsProvider(metaclass=abc.ABCMeta): username/api key) from a source(e.g. login args, config file, cached credentials in keyring), and use them to return a ``SynapseCredentials` instance. """ + @abc.abstractmethod def _get_auth_info(self, syn, user_login_args): """ @@ -69,6 +72,7 @@ class UserArgsCredentialsProvider(SynapseCredentialsProvider): """ Retrieves auth info from user_login_args """ + def _get_auth_info(self, syn, user_login_args): return ( user_login_args.username, @@ -103,6 +107,7 @@ class ConfigFileCredentialsProvider(SynapseCredentialsProvider): """ Retrieves auth info from .synapseConfig file """ + def _get_auth_info(self, syn, user_login_args): config_dict = syn._get_config_authentication() # check to make sure we didn't accidentally provide the wrong user @@ -127,6 +132,7 @@ class CachedCredentialsProvider(SynapseCredentialsProvider): """ Retrieves auth info from cached_sessions """ + def _get_auth_info(self, syn, user_login_args): username = None password = None @@ -145,11 +151,44 @@ def _get_auth_info(self, syn, user_login_args): return username, password, api_key, auth_token +class AWSParameterStoreCredentialsProvider(SynapseCredentialsProvider): + """ + Retrieves user's authentication token from AWS SSM Parameter store + """ + + ENVIRONMENT_VAR_NAME = "SYNAPSE_TOKEN_AWS_SSM_PARAMETER_NAME" + + def _get_auth_info(self, syn, user_login_args): + ssm_param_name = os.environ.get(self.ENVIRONMENT_VAR_NAME) + token = None + if ssm_param_name: + boto3 = utils.attempt_import("boto3", + "\n\nThe Synapse client uses boto3 " + "in order to access Systems Manager Parameter Storage.\n") + import botocore # if can import boto3, can import botocore + try: + ssm_client = boto3.client('ssm') + result = ssm_client.get_parameter( + Name=ssm_param_name, + WithDecryption=True, + ) + token = result['Parameter']['Value'] + except botocore.exceptions.ClientError as e: + syn.logger.warn(f'{self.ENVIRONMENT_VAR_NAME} was defined as {ssm_param_name}, ' + 'but the matching parameter name could not be found in AWS Parameter Store. ' + f'Caused by AWS error:\n {e}') + + # if username is included in user's arguments, return it so that + # it may be validated against the username authenticated by the token + return user_login_args.username, None, None, token + + class SynapseCredentialsProviderChain(object): """ Class that has a list of ``SynapseCredentialsProvider`` from which this class attempts to retrieve ``SynapseCredentials``. """ + def __init__(self, cred_providers): """ :param list[``SynapseCredentialsProvider``] cred_providers: list of credential providers @@ -178,7 +217,9 @@ def get_credentials(self, syn, user_login_args): UserArgsSessionTokenCredentialsProvider(), # This provider is DEPRECATED UserArgsCredentialsProvider(), ConfigFileCredentialsProvider(), - CachedCredentialsProvider()]) + CachedCredentialsProvider(), + AWSParameterStoreCredentialsProvider() +]) def get_default_credential_chain(): diff --git a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py index b06a3b267..921ba9539 100644 --- a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py +++ b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py @@ -1,10 +1,18 @@ import base64 +import os +from unittest.mock import create_autospec, MagicMock, patch +import boto3 import pytest -from unittest.mock import create_autospec, MagicMock, patch +from botocore.stub import Stubber +from pytest_mock import MockerFixture -from synapseclient.core.exceptions import SynapseAuthenticationError from synapseclient.core.credentials import credential_provider +from synapseclient.core.credentials.cred_data import ( + SynapseApiKeyCredentials, + SynapseAuthTokenCredentials, + UserLoginArgs, +) from synapseclient.core.credentials.credential_provider import ( cached_sessions, CachedCredentialsProvider, @@ -13,12 +21,9 @@ SynapseCredentialsProviderChain, UserArgsCredentialsProvider, UserArgsSessionTokenCredentialsProvider, + AWSParameterStoreCredentialsProvider ) -from synapseclient.core.credentials.cred_data import ( - SynapseApiKeyCredentials, - SynapseAuthTokenCredentials, - UserLoginArgs, -) +from synapseclient.core.exceptions import SynapseAuthenticationError class TestSynapseApiKeyCredentialsProviderChain(object): @@ -94,18 +99,19 @@ def setup(self): self.session_token, self.auth_token ) + # SynapseApiKeyCredentialsProvider has abstractmethod so we can't instantiate it unless we overwrite it class SynapseCredProviderTester(SynapseCredentialsProvider): def _get_auth_info(self, syn, user_login_args): pass + self.provider = SynapseCredProviderTester() def test_get_synapse_credentials(self): auth_info = ("username", "password", "api_key") with patch.object(self.provider, "_get_auth_info", return_value=auth_info) as mock_get_auth_info, \ - patch.object(self.provider, "_create_synapse_credential") as mock_create_synapse_credentials: - + patch.object(self.provider, "_create_synapse_credential") as mock_create_synapse_credentials: self.provider.get_synapse_credentials(self.syn, self.user_login_args) mock_get_auth_info.assert_called_once_with(self.syn, self.user_login_args) @@ -134,8 +140,7 @@ def test_create_synapse_credential__username_not_None_password_not_None(self): over api key and auth bearer token)""" session_token = "37842837946" with patch.object(self.syn, "_getSessionToken", return_value=session_token) as mock_get_session_token, \ - patch.object(self.syn, "_getAPIKey", return_value=self.api_key) as mock_get_api_key: - + patch.object(self.syn, "_getAPIKey", return_value=self.api_key) as mock_get_api_key: # even if api key and/or auth_token is provided, password applies first cred = self.provider._create_synapse_credential( self.syn, @@ -244,11 +249,11 @@ def test_get_auth_info(self): returned_tuple = provider._get_auth_info(self.syn, user_login_args) assert ( - user_login_args.username, - user_login_args.password, - user_login_args.api_key, - user_login_args.auth_token - ) == returned_tuple + user_login_args.username, + user_login_args.password, + user_login_args.api_key, + user_login_args.auth_token + ) == returned_tuple class TestConfigFileCredentialsProvider(object): @@ -394,3 +399,75 @@ def test_get_auth_info__user_arg_username_is_not_None(self): self.mock_get_most_recent_user.assert_not_called() self.mock_api_key_credentials.get_from_keyring.assert_called_once_with(self.username) self.mock_auth_token_credentials.get_from_keyring.assert_called_once_with(self.username) + + +class TestAWSParameterStoreCredentialsProvider(object): + @pytest.fixture(autouse=True, scope='function') + def init_syn(self, syn): + self.syn = syn + + @pytest.fixture() + def environ_with_param_name(self): + return {'SYNAPSE_TOKEN_AWS_SSM_PARAMETER_NAME': '/synapse/cred/i-12134312'} + + def stub_ssm(self, mocker: MockerFixture): + # use fake credentials otherwise boto will look for them via http calls + ssm_client = boto3.client('ssm', aws_access_key_id='foo', aws_secret_access_key='bar', region_name='us-east-1') + stubber = Stubber(ssm_client) + mock_boto3_client = mocker.patch.object(boto3, 'client', return_value=ssm_client) + return mock_boto3_client, stubber + + def setup(self): + self.provider = AWSParameterStoreCredentialsProvider() + + def test_get_auth_info__no_environment_variable(self, mocker: MockerFixture, syn): + empty_dict = {} + mocker.patch.object(os, "environ", new=empty_dict) + mock_boto3_client, stubber = self.stub_ssm(mocker) + + user_login_args = UserLoginArgs(username=None, password=None, api_key=None, skip_cache=False, auth_token=None) + + assert (None,) * 4 == self.provider._get_auth_info(syn, user_login_args) + assert not mock_boto3_client.called + stubber.assert_no_pending_responses() + + def test_get_auth_info__parameter_name_not_exist(self, mocker: MockerFixture, syn, environ_with_param_name): + mocker.patch.object(os, 'environ', new=environ_with_param_name) + + mock_boto3_client, stubber = self.stub_ssm(mocker) + stubber.add_client_error('get_parameter', 'ParameterNotFound') + + user_login_args = UserLoginArgs(username=None, password=None, api_key=None, skip_cache=False, auth_token=None) + + with stubber: + assert (None,) * 4 == self.provider._get_auth_info(syn, user_login_args) + mock_boto3_client.assert_called_once_with("ssm") + stubber.assert_no_pending_responses() + + def test_get_auth_info__parameter_name_exists(self, mocker: MockerFixture, syn, environ_with_param_name): + mocker.patch.object(os, 'environ', new=environ_with_param_name) + + mock_boto3_client, stubber = self.stub_ssm(mocker) + token = 'KmhhY2tlciB2b2ljZSogIkknbSBpbiI=' + response = { + 'Parameter': { + 'Name': '/synapse/cred/i-12134312', + 'Type': 'SecureString', + 'Value': token, + 'Version': 502, + 'LastModifiedDate': 'Sun, 20 Apr 1969 16:20:00 GMT', + 'ARN': + 'arn:aws:ssm:us-east-1:123123123:parameter/synapse/cred/i-12134312', + 'DataType': 'text' + }, + } + stubber.add_response('get_parameter', response) + + username = 'foobar' + user_login_args = UserLoginArgs(username=username, password=None, api_key=None, + skip_cache=False, auth_token=None) + + with stubber: + assert (username, None, None, token) == self.provider._get_auth_info(syn, user_login_args) + mock_boto3_client.assert_called_once_with("ssm") + stubber.assert_no_pending_responses() From 346e49071416ccace1ad985ff7bc42d972de73c9 Mon Sep 17 00:00:00 2001 From: Ziming Dong Date: Fri, 19 Mar 2021 17:31:44 -0700 Subject: [PATCH 11/42] revert accidental overindent --- .../core/credentials/unit_test_cred_provider.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py index 921ba9539..c27bc9e71 100644 --- a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py +++ b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py @@ -249,11 +249,11 @@ def test_get_auth_info(self): returned_tuple = provider._get_auth_info(self.syn, user_login_args) assert ( - user_login_args.username, - user_login_args.password, - user_login_args.api_key, - user_login_args.auth_token - ) == returned_tuple + user_login_args.username, + user_login_args.password, + user_login_args.api_key, + user_login_args.auth_token + ) == returned_tuple class TestConfigFileCredentialsProvider(object): From c7c4be7fd5c43e12244987e39b6a8a5bef10c490 Mon Sep 17 00:00:00 2001 From: Ziming Dong Date: Fri, 19 Mar 2021 17:32:05 -0700 Subject: [PATCH 12/42] revert accidental overindent --- .../core/credentials/unit_test_cred_provider.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py index c27bc9e71..3af94260d 100644 --- a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py +++ b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py @@ -249,10 +249,10 @@ def test_get_auth_info(self): returned_tuple = provider._get_auth_info(self.syn, user_login_args) assert ( - user_login_args.username, - user_login_args.password, - user_login_args.api_key, - user_login_args.auth_token + user_login_args.username, + user_login_args.password, + user_login_args.api_key, + user_login_args.auth_token ) == returned_tuple From 01934d78dfd475afbcf356cd8c054babb8cd9409 Mon Sep 17 00:00:00 2001 From: Ziming Dong Date: Mon, 22 Mar 2021 09:52:30 -0700 Subject: [PATCH 13/42] handle boto3 ImportError with a log warning instead of re-raising it. --- .../core/credentials/credential_provider.py | 21 ++++++++++------- .../credentials/unit_test_cred_provider.py | 23 ++++++++++++++----- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/synapseclient/core/credentials/credential_provider.py b/synapseclient/core/credentials/credential_provider.py index e8f178e35..203c07a14 100644 --- a/synapseclient/core/credentials/credential_provider.py +++ b/synapseclient/core/credentials/credential_provider.py @@ -162,21 +162,26 @@ def _get_auth_info(self, syn, user_login_args): ssm_param_name = os.environ.get(self.ENVIRONMENT_VAR_NAME) token = None if ssm_param_name: - boto3 = utils.attempt_import("boto3", - "\n\nThe Synapse client uses boto3 " - "in order to access Systems Manager Parameter Storage.\n") - import botocore # if can import boto3, can import botocore try: + import boto3 + import botocore ssm_client = boto3.client('ssm') result = ssm_client.get_parameter( Name=ssm_param_name, WithDecryption=True, ) token = result['Parameter']['Value'] - except botocore.exceptions.ClientError as e: - syn.logger.warn(f'{self.ENVIRONMENT_VAR_NAME} was defined as {ssm_param_name}, ' - 'but the matching parameter name could not be found in AWS Parameter Store. ' - f'Caused by AWS error:\n {e}') + except ImportError: + syn.logger.warning( + f'{self.ENVIRONMENT_VAR_NAME} was defined as {ssm_param_name}, but "boto3" could not be imported.' + ' The Synapse client uses "boto3" in order to access Systems Manager Parameter Storage.' + ' Please ensure that you have installed "boto3" to enable this feature.') + # this except block must be defined after the ImportError except block + # otherwise, there's no guarantee "botocore" is already imported and defined + except botocore.exceptions.ClientError: + syn.logger.warning(f'{self.ENVIRONMENT_VAR_NAME} was defined as {ssm_param_name}, ' + 'but the matching parameter name could not be found in AWS Parameter Store. ' + f'Caused by AWS error:\n', exc_info=True) # if username is included in user's arguments, return it so that # it may be validated against the username authenticated by the token diff --git a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py index 3af94260d..d45eb62b3 100644 --- a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py +++ b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py @@ -6,6 +6,7 @@ import pytest from botocore.stub import Stubber from pytest_mock import MockerFixture +import sys from synapseclient.core.credentials import credential_provider from synapseclient.core.credentials.cred_data import ( @@ -421,8 +422,7 @@ def setup(self): self.provider = AWSParameterStoreCredentialsProvider() def test_get_auth_info__no_environment_variable(self, mocker: MockerFixture, syn): - empty_dict = {} - mocker.patch.object(os, "environ", new=empty_dict) + mocker.patch.dict(os.environ, {}, clear=True) mock_boto3_client, stubber = self.stub_ssm(mocker) user_login_args = UserLoginArgs(username=None, password=None, api_key=None, skip_cache=False, auth_token=None) @@ -431,11 +431,13 @@ def test_get_auth_info__no_environment_variable(self, mocker: MockerFixture, syn assert not mock_boto3_client.called stubber.assert_no_pending_responses() - def test_get_auth_info__parameter_name_not_exist(self, mocker: MockerFixture, syn, environ_with_param_name): - mocker.patch.object(os, 'environ', new=environ_with_param_name) + # there could be other errors as well, but we will handle them all in the same way + @pytest.mark.parametrize('error_code', ['UnrecognizedClientException', 'AccessDenied', 'ParameterNotFound']) + def test_get_auth_info__get_parameter_error(self, mocker: MockerFixture, syn, environ_with_param_name, error_code): + mocker.patch.dict(os.environ, environ_with_param_name) mock_boto3_client, stubber = self.stub_ssm(mocker) - stubber.add_client_error('get_parameter', 'ParameterNotFound') + stubber.add_client_error('get_parameter', error_code) user_login_args = UserLoginArgs(username=None, password=None, api_key=None, skip_cache=False, auth_token=None) @@ -445,7 +447,7 @@ def test_get_auth_info__parameter_name_not_exist(self, mocker: MockerFixture, sy stubber.assert_no_pending_responses() def test_get_auth_info__parameter_name_exists(self, mocker: MockerFixture, syn, environ_with_param_name): - mocker.patch.object(os, 'environ', new=environ_with_param_name) + mocker.patch.dict(os.environ, environ_with_param_name) mock_boto3_client, stubber = self.stub_ssm(mocker) token = 'KmhhY2tlciB2b2ljZSogIkknbSBpbiI=' @@ -471,3 +473,12 @@ def test_get_auth_info__parameter_name_exists(self, mocker: MockerFixture, syn, assert (username, None, None, token) == self.provider._get_auth_info(syn, user_login_args) mock_boto3_client.assert_called_once_with("ssm") stubber.assert_no_pending_responses() + + def test_get_auth_info__boto3_ImportError(self, mocker, syn, environ_with_param_name): + mocker.patch.dict(os.environ, environ_with_param_name) + # simulate import error by "removing" boto3 from sys.modules + mocker.patch.dict(sys.modules, {'boto3':None}) + + user_login_args = UserLoginArgs(username=None, password=None, api_key=None, skip_cache=False, auth_token=None) + + assert (None,) * 4 == self.provider._get_auth_info(syn, user_login_args) From 122446fcf2de77d6f3a1721e4de704f3960659f8 Mon Sep 17 00:00:00 2001 From: Ziming Dong Date: Mon, 22 Mar 2021 09:54:29 -0700 Subject: [PATCH 14/42] format imports --- synapseclient/core/credentials/credential_provider.py | 8 ++++---- .../core/credentials/unit_test_cred_provider.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/synapseclient/core/credentials/credential_provider.py b/synapseclient/core/credentials/credential_provider.py index 203c07a14..2a1d8badb 100644 --- a/synapseclient/core/credentials/credential_provider.py +++ b/synapseclient/core/credentials/credential_provider.py @@ -1,11 +1,11 @@ import abc -import deprecated.sphinx import os -import synapseclient.core.utils as utils -from synapseclient.core.exceptions import SynapseAuthenticationError -from synapseclient.core.credentials.cred_data import SynapseApiKeyCredentials, SynapseAuthTokenCredentials +import deprecated.sphinx + from synapseclient.core.credentials import cached_sessions +from synapseclient.core.credentials.cred_data import SynapseApiKeyCredentials, SynapseAuthTokenCredentials +from synapseclient.core.exceptions import SynapseAuthenticationError class SynapseCredentialsProvider(metaclass=abc.ABCMeta): diff --git a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py index d45eb62b3..58a3a3fcf 100644 --- a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py +++ b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py @@ -1,12 +1,12 @@ import base64 import os +import sys from unittest.mock import create_autospec, MagicMock, patch import boto3 import pytest from botocore.stub import Stubber from pytest_mock import MockerFixture -import sys from synapseclient.core.credentials import credential_provider from synapseclient.core.credentials.cred_data import ( From 18cc0b8f9b150dd9d6bef658b688fd7a74d79537 Mon Sep 17 00:00:00 2001 From: Ziming Dong Date: Mon, 22 Mar 2021 10:01:13 -0700 Subject: [PATCH 15/42] undo indent --- synapseclient/core/credentials/credential_provider.py | 2 +- .../synapseclient/core/credentials/unit_test_cred_provider.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/synapseclient/core/credentials/credential_provider.py b/synapseclient/core/credentials/credential_provider.py index 2a1d8badb..e918effd6 100644 --- a/synapseclient/core/credentials/credential_provider.py +++ b/synapseclient/core/credentials/credential_provider.py @@ -223,7 +223,7 @@ def get_credentials(self, syn, user_login_args): UserArgsCredentialsProvider(), ConfigFileCredentialsProvider(), CachedCredentialsProvider(), - AWSParameterStoreCredentialsProvider() + AWSParameterStoreCredentialsProvider(), ]) diff --git a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py index 58a3a3fcf..09e65f5d4 100644 --- a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py +++ b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py @@ -112,7 +112,7 @@ def _get_auth_info(self, syn, user_login_args): def test_get_synapse_credentials(self): auth_info = ("username", "password", "api_key") with patch.object(self.provider, "_get_auth_info", return_value=auth_info) as mock_get_auth_info, \ - patch.object(self.provider, "_create_synapse_credential") as mock_create_synapse_credentials: + patch.object(self.provider, "_create_synapse_credential") as mock_create_synapse_credentials: self.provider.get_synapse_credentials(self.syn, self.user_login_args) mock_get_auth_info.assert_called_once_with(self.syn, self.user_login_args) @@ -141,7 +141,7 @@ def test_create_synapse_credential__username_not_None_password_not_None(self): over api key and auth bearer token)""" session_token = "37842837946" with patch.object(self.syn, "_getSessionToken", return_value=session_token) as mock_get_session_token, \ - patch.object(self.syn, "_getAPIKey", return_value=self.api_key) as mock_get_api_key: + patch.object(self.syn, "_getAPIKey", return_value=self.api_key) as mock_get_api_key: # even if api key and/or auth_token is provided, password applies first cred = self.provider._create_synapse_credential( self.syn, From 0e320f09dcd5aa70d319870f3b09f270eb35a098 Mon Sep 17 00:00:00 2001 From: Ziming Dong Date: Mon, 22 Mar 2021 10:03:04 -0700 Subject: [PATCH 16/42] PEP8 --- .../synapseclient/core/credentials/unit_test_cred_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py index 09e65f5d4..f9346319c 100644 --- a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py +++ b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py @@ -477,7 +477,7 @@ def test_get_auth_info__parameter_name_exists(self, mocker: MockerFixture, syn, def test_get_auth_info__boto3_ImportError(self, mocker, syn, environ_with_param_name): mocker.patch.dict(os.environ, environ_with_param_name) # simulate import error by "removing" boto3 from sys.modules - mocker.patch.dict(sys.modules, {'boto3':None}) + mocker.patch.dict(sys.modules, {'boto3': None}) user_login_args = UserLoginArgs(username=None, password=None, api_key=None, skip_cache=False, auth_token=None) From 2778fc7d17e5b69bcdbb90fc76cfacc4af586d80 Mon Sep 17 00:00:00 2001 From: Robert Allaway Date: Tue, 6 Apr 2021 20:49:44 -0700 Subject: [PATCH 17/42] I always forget what the format is And I have to read the code to remember ;) --- synapseclient/core/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapseclient/core/cache.py b/synapseclient/core/cache.py index 774f94384..749d82465 100644 --- a/synapseclient/core/cache.py +++ b/synapseclient/core/cache.py @@ -288,7 +288,7 @@ def purge(self, before_date=None, after_date=None, dry_run=False): the cache. Either the before_date or after_date must be specified. If both are passed, files between the two dates are - selected for removal. + selected for removal. Dates must be formatted as a Unix timestamp (epoch time). :param before_date: if specified, all files before this date will be removed :param after_date: if specified, all files after this date will be removed From 14046fa8a11b1638a2f8a24c3c1dc604c6c5021d Mon Sep 17 00:00:00 2001 From: Robert Allaway Date: Tue, 6 Apr 2021 21:15:18 -0700 Subject: [PATCH 18/42] remove ws --- synapseclient/core/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapseclient/core/cache.py b/synapseclient/core/cache.py index 749d82465..8e4a4d285 100644 --- a/synapseclient/core/cache.py +++ b/synapseclient/core/cache.py @@ -288,7 +288,7 @@ def purge(self, before_date=None, after_date=None, dry_run=False): the cache. Either the before_date or after_date must be specified. If both are passed, files between the two dates are - selected for removal. Dates must be formatted as a Unix timestamp (epoch time). + selected for removal. Dates must be formatted as a Unix timestamp (epoch time). :param before_date: if specified, all files before this date will be removed :param after_date: if specified, all files after this date will be removed From c99d5789ae65278797efc519fbf6ee3666a82d40 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Wed, 7 Apr 2021 17:11:32 -0700 Subject: [PATCH 19/42] further clarification on date typing --- synapseclient/core/cache.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/synapseclient/core/cache.py b/synapseclient/core/cache.py index 8e4a4d285..1f9ab9f79 100644 --- a/synapseclient/core/cache.py +++ b/synapseclient/core/cache.py @@ -13,11 +13,12 @@ import collections.abc import datetime import json +import math import operator import os import re import shutil -import math +import typing from synapseclient.core.lock import Lock from synapseclient.core import utils @@ -280,15 +281,28 @@ def _cache_dirs(self): if os.path.isdir(path2) and re.match('\\d+', item2): yield path2 - def purge(self, before_date=None, after_date=None, dry_run=False): + def purge( + self, + before_date: typing.Union[datetime.datetime, int] = None, + after_date: typing.Union[datetime.datetime, int] = None, + dry_run: bool = False + ): """ - Purge the cache. Use with caution. Delete files whose cache maps were last updated prior to the given date. + Purge the cache. Use with caution. Deletes files whose cache maps were last updated in a specified period. Deletes .cacheMap files and files stored in the cache.cache_root_dir, but does not delete files stored outside the cache. Either the before_date or after_date must be specified. If both are passed, files between the two dates are - selected for removal. Dates must be formatted as a Unix timestamp (epoch time). + selected for removal. Dates must either be a timezone naive Python datetime.datetime instance or the number + of seconds since the unix epoch. For example to delete all the files modified in January 2021, either of the + following can be used:: + + # using offset naive datetime objects + cache.purge(after_date=datetime.datetime(2021, 1, 1), before_date=datetime.datetime(2021, 2, 1)) + + # using seconds since the unix epoch + cache.purge(after_date=1609459200, before_date=1612137600) :param before_date: if specified, all files before this date will be removed :param after_date: if specified, all files after this date will be removed From e8720f16ce8baa0be577058cf2f252be5136a51c Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Fri, 9 Apr 2021 06:05:29 -0700 Subject: [PATCH 20/42] SYNPY-1130 skip indexing any file that is not an S3FileHandle (no e.g. URLs or Google Cloud Files) --- synapseutils/migrate_functions.py | 54 +++++++++-------- .../unit_test_synapseutils_migrate.py | 60 +++++++++++++++---- 2 files changed, 75 insertions(+), 39 deletions(-) diff --git a/synapseutils/migrate_functions.py b/synapseutils/migrate_functions.py index f3dcb9ad2..c600bc78c 100644 --- a/synapseutils/migrate_functions.py +++ b/synapseutils/migrate_functions.py @@ -905,19 +905,31 @@ def _get_version_numbers(syn, entity_id): def _include_file_storage_location_in_index( + file_handle, source_storage_location_ids, - from_storage_location_id, to_storage_location_id, ): # helper determines whether a file is included in the index depending on its storage location. # if source_storage_location_ids are specified the from storage location must be in it. # if the current storage location already matches the destination location then we also # include it in the index, we'll mark it as already migrated. - return ( - not source_storage_location_ids or - str(from_storage_location_id) in source_storage_location_ids or - str(from_storage_location_id) == str(to_storage_location_id) - ) + + from_storage_location_id = file_handle['storageLocationId'] + if ( + (file_handle['concreteType'] == concrete_types.S3_FILE_HANDLE) and + ( + not source_storage_location_ids or + str(from_storage_location_id) in source_storage_location_ids or + str(from_storage_location_id) == str(to_storage_location_id) + ) + ): + migration_status = _MigrationStatus.INDEXED.value \ + if str(from_storage_location_id) != str(to_storage_location_id) \ + else _MigrationStatus.ALREADY_MIGRATED.value + return migration_status + + # this file is not included in this index + return None def _index_file_entity( @@ -963,17 +975,12 @@ def _index_file_entity( if entity_versions: insert_values = [] for (entity, version) in entity_versions: - from_storage_location_id = entity._file_handle['storageLocationId'] - - if _include_file_storage_location_in_index( + migration_status = _include_file_storage_location_in_index( + entity._file_handle, source_storage_location_ids, - from_storage_location_id, to_storage_location_id - ): - - migration_status = _MigrationStatus.INDEXED.value \ - if str(from_storage_location_id) != str(to_storage_location_id) \ - else _MigrationStatus.ALREADY_MIGRATED.value + ) + if migration_status: file_size = entity._file_handle['contentSize'] insert_values.append(( @@ -981,7 +988,7 @@ def _index_file_entity( _MigrationType.FILE.value, version, parent_id, - from_storage_location_id, + entity._file_handle['storageLocationId'], entity.dataFileHandleId, file_size, migration_status @@ -1060,18 +1067,13 @@ def _insert_row_batch(row_batch): for row_id, row_version, file_handles in _get_file_handle_rows(syn, entity_id): for col_id, file_handle in file_handles.items(): - existing_storage_location_id = file_handle['storageLocationId'] - if _include_file_storage_location_in_index( + migration_status = _include_file_storage_location_in_index( + file_handle, source_storage_location_ids, - existing_storage_location_id, dest_storage_location_id, - ): - - migration_status = _MigrationStatus.INDEXED.value \ - if str(dest_storage_location_id) != str(existing_storage_location_id) \ - else _MigrationStatus.ALREADY_MIGRATED.value - + ) + if migration_status: file_size = file_handle['contentSize'] row_batch.append(( entity_id, @@ -1080,7 +1082,7 @@ def _insert_row_batch(row_batch): row_id, col_id, row_version, - existing_storage_location_id, + file_handle['storageLocationId'], file_handle['id'], file_size, migration_status diff --git a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py index cbc631c37..ab8b22c1e 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py @@ -8,7 +8,13 @@ import synapseclient from synapseclient.core.exceptions import SynapseHTTPError import synapseclient.core.upload -from synapseclient.core.constants.concrete_types import FILE_ENTITY, FOLDER_ENTITY, PROJECT_ENTITY, TABLE_ENTITY +from synapseclient.core.constants.concrete_types import ( + FILE_ENTITY, + FOLDER_ENTITY, + PROJECT_ENTITY, + S3_FILE_HANDLE, + TABLE_ENTITY, +) from synapseclient.core import utils import synapseutils from synapseutils import migrate_functions @@ -224,6 +230,7 @@ def _index_file_entity_version_test(self, conn, file_version_strategy): mock_file._file_handle = { 'contentSize': file_size, 'storageLocationId': from_storage_location_id, + 'concreteType': S3_FILE_HANDLE, } syn.get.return_value = mock_file @@ -291,6 +298,7 @@ def test_index_file_entity__all(self, conn): mock_file_2.dataFileHandleId = 2 mock_file_2_size = 9876 mock_file_2._file_handle = { + 'concreteType': S3_FILE_HANDLE, 'contentSize': mock_file_2_size, 'storageLocationId': from_storage_location_id } @@ -301,6 +309,7 @@ def test_index_file_entity__all(self, conn): mock_file_3.dataFileHandleId = 3 mock_file_3_size = 1234 mock_file_3._file_handle = { + 'concreteType': S3_FILE_HANDLE, 'contentSize': mock_file_3_size, # already migrated @@ -313,6 +322,7 @@ def test_index_file_entity__all(self, conn): mock_file_4.dataFileHandleId = 4 mock_file_4_size = 9876 mock_file_4._file_handle = { + 'concreteType': S3_FILE_HANDLE, 'contentSize': mock_file_4_size, 'storageLocationId': 'not a matching storage location' @@ -414,6 +424,7 @@ def test_index_table_entity(self, mocker, conn): file_handle_1 = { 'fileHandle': { 'id': file_handle_id_1, + 'concreteType': S3_FILE_HANDLE, 'contentSize': file_handle_size_1, 'storageLocationId': from_storage_location_id, } @@ -421,6 +432,7 @@ def test_index_table_entity(self, mocker, conn): file_handle_2 = { 'fileHandle': { 'id': file_handle_id_2, + 'concreteType': S3_FILE_HANDLE, 'contentSize': file_handle_size_2, 'storageLocationId': from_storage_location_id, } @@ -430,6 +442,7 @@ def test_index_table_entity(self, mocker, conn): file_handle_3 = { 'fileHandle': { 'id': file_handle_id_3, + 'concreteType': S3_FILE_HANDLE, 'contentSize': file_handle_size_3, 'storageLocationId': 'not a source storage location', } @@ -439,6 +452,7 @@ def test_index_table_entity(self, mocker, conn): file_handle_4 = { 'fileHandle': { 'id': file_handle_id_4, + 'concreteType': S3_FILE_HANDLE, 'contentSize': file_handle_size_4, 'storageLocationId': to_storage_location_id, } @@ -2175,52 +2189,72 @@ def test_include_file_storage_location_in_index__source_location_ids_not_specifi """Verify that if source_storage_location_ids are not specified then we include the record in the index because no sources means all sources.""" - source_storage_location_ids = None from_storage_location_id = '1234' + file_handle = { + 'concreteType': S3_FILE_HANDLE, + 'storageLocationId': from_storage_location_id, + } + + source_storage_location_ids = None to_storage_location_id = '4321' assert _include_file_storage_location_in_index( + file_handle, source_storage_location_ids, - from_storage_location_id, to_storage_location_id, - ) is True + ) == _MigrationStatus.INDEXED.value def test_include_file_storage_location_in_index__source_location_id_matched(self): """Verify that if source_storage_location_ids is specified then we include the record if the file's current storage location matches.""" - source_storage_location_ids = ['0987', '1234', '8765'] from_storage_location_id = '1234' + file_handle = { + 'concreteType': S3_FILE_HANDLE, + 'storageLocationId': from_storage_location_id, + } + + source_storage_location_ids = ['0987', '1234', '8765'] to_storage_location_id = '4321' assert _include_file_storage_location_in_index( + file_handle, source_storage_location_ids, - from_storage_location_id, to_storage_location_id, - ) is True + ) is _MigrationStatus.INDEXED.value def test_include_file_storage_location_in_index__already_in_destination(self): """Verify that if the file is already in the destination storage location then we include it in the index (it will be marked as ALREADY_MIGRATED). This helps show what happened with the file.""" - source_storage_location_ids = ['0987', '8765'] from_storage_location_id = '1234' + file_handle = { + 'concreteType': S3_FILE_HANDLE, + 'storageLocationId': from_storage_location_id, + } + + source_storage_location_ids = ['0987', '8765'] to_storage_location_id = from_storage_location_id assert _include_file_storage_location_in_index( + file_handle, source_storage_location_ids, - from_storage_location_id, to_storage_location_id, - ) is True + ) == _MigrationStatus.ALREADY_MIGRATED.value def test_include_file_storage_location_in_index__exclude(self): """Verify that if the file's current storage location doesn't match one of the source specified locations and it isn't already in the target destination it is excluded from the index altogether. It's not relevant to the migration.""" - source_storage_location_ids = ['0987', '8765'] from_storage_location_id = '1234' + file_handle = { + 'concreteType': S3_FILE_HANDLE, + 'storageLocationId': from_storage_location_id, + } + + source_storage_location_ids = ['0987', '8765'] to_storage_location_id = '4321' assert _include_file_storage_location_in_index( + file_handle, source_storage_location_ids, - from_storage_location_id, to_storage_location_id, - ) is False + ) is None From 1da701af078e94225475db3fc3b6440c99626511 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Fri, 9 Apr 2021 06:49:26 -0700 Subject: [PATCH 21/42] SYNPY-1134 retry AWS put part 500s during multipart upload/copy --- synapseclient/core/upload/multipart_upload.py | 11 ++-- .../core/upload/unit_test_multipart_upload.py | 64 ++++++++++++++++--- 2 files changed, 59 insertions(+), 16 deletions(-) diff --git a/synapseclient/core/upload/multipart_upload.py b/synapseclient/core/upload/multipart_upload.py index 17b6015b6..aa88cd342 100644 --- a/synapseclient/core/upload/multipart_upload.py +++ b/synapseclient/core/upload/multipart_upload.py @@ -25,6 +25,7 @@ import time from typing import List, Mapping +from synapseclient.core.retry import with_retry from synapseclient.core import pool_provider from synapseclient.core.constants import concrete_types from synapseclient.core.exceptions import ( @@ -233,13 +234,11 @@ def _handle_part(self, part_number): body = self._part_request_body_provider_fn(part_number) if self._part_request_body_provider_fn else None part_size = len(body) if body else 0 for retry in range(2): + def put_fn(): + return session.put(part_url, body, headers=signed_headers) try: - response = session.put( - part_url, - body, - headers=signed_headers, - ) - + # use our backoff mechanism here, we have encountered 500s on puts to AWS signed urls + response = with_retry(put_fn) _raise_for_status(response) # completed upload part to s3 successfully diff --git a/tests/unit/synapseclient/core/upload/unit_test_multipart_upload.py b/tests/unit/synapseclient/core/upload/unit_test_multipart_upload.py index 5287c4868..7da9355bd 100644 --- a/tests/unit/synapseclient/core/upload/unit_test_multipart_upload.py +++ b/tests/unit/synapseclient/core/upload/unit_test_multipart_upload.py @@ -1,6 +1,7 @@ from concurrent.futures import Future import hashlib import json +import requests import pytest from unittest import mock @@ -231,6 +232,47 @@ def test_handle_part_aborted(self, syn): with pytest.raises(SynapseUploadAbortedException): upload._handle_part(5) + def test_handle_part_500(self, syn): + """Test that we retry if we encounter a 500 from AWS on a PUT to the signed URL""" + + upload = self._init_upload_attempt(syn) + upload._upload_id = '123' + part_number = 1 + chunk = b'1' * TestUploadAttempt.part_size + + pre_signed_url = 'https://foo.com/1' + signed_headers = {'a': 1} + + upload._pre_signed_part_urls = {part_number: (pre_signed_url, signed_headers)} + + mock_500 = mock.MagicMock(spec=requests.Response) + mock_500.status_code = 500 + mock_500.headers = {} + mock_500.reason = '' + + self._handle_part_success_test( + syn, + upload, + part_number, + pre_signed_url, + + + # initial call is expired and results in a 500 + # second call is successful + [ + ( + mock.call(pre_signed_url, chunk, headers=signed_headers), + mock_500 + ), + ( + mock.call(pre_signed_url, chunk, headers=signed_headers), + mock.Mock(status_code=200) + ), + ], + chunk, + None + ) + def _handle_part_success_test( self, syn, @@ -334,22 +376,24 @@ def test_handle_part_expired_url(self, syn): upload._pre_signed_part_urls = {part_number: (pre_signed_url_1, signed_headers_1)} + mock_403 = mock.MagicMock(spec=requests.Response) + mock_403.status_code = 403 + mock_403.headers = {} + mock_403.reason = '' + self._handle_part_success_test( syn, upload, part_number, pre_signed_url_1, + # initial call is expired and results in a 403 # second call is successful [ ( mock.call(pre_signed_url_1, chunk, headers=signed_headers_1), - mock.Mock( - status_code=403, - headers={}, - reason='' - ) + mock_403 ), ( mock.call(pre_signed_url_2, chunk, headers=signed_headers_2), @@ -392,11 +436,11 @@ def test_handle_part__url_expired_twice(self, syn): ] ] - mock_session.put.return_value = mock.Mock( - status_code=403, - headers={}, - reason='' - ) + mock_response = mock.MagicMock(spec=requests.Response) + mock_response.status_code = 403 + mock_response.headers = {} + mock_response.reason = '' + mock_session.put.return_value = mock_response with pytest.raises(SynapseHTTPError): upload._handle_part(1) From 668894d22c8ce18c02b38c138fcaf40c7dafd049 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Fri, 9 Apr 2021 06:59:23 -0700 Subject: [PATCH 22/42] SYNPY-1136 retry on a ConnectionError PUTting a part to an AWS presigned url --- synapseclient/core/upload/multipart_upload.py | 2 +- .../core/upload/unit_test_multipart_upload.py | 38 ++++++++++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/synapseclient/core/upload/multipart_upload.py b/synapseclient/core/upload/multipart_upload.py index aa88cd342..1376995a8 100644 --- a/synapseclient/core/upload/multipart_upload.py +++ b/synapseclient/core/upload/multipart_upload.py @@ -238,7 +238,7 @@ def put_fn(): return session.put(part_url, body, headers=signed_headers) try: # use our backoff mechanism here, we have encountered 500s on puts to AWS signed urls - response = with_retry(put_fn) + response = with_retry(put_fn, retry_exceptions=[requests.exceptions.ConnectionError]) _raise_for_status(response) # completed upload part to s3 successfully diff --git a/tests/unit/synapseclient/core/upload/unit_test_multipart_upload.py b/tests/unit/synapseclient/core/upload/unit_test_multipart_upload.py index 7da9355bd..4a22bfe4f 100644 --- a/tests/unit/synapseclient/core/upload/unit_test_multipart_upload.py +++ b/tests/unit/synapseclient/core/upload/unit_test_multipart_upload.py @@ -232,7 +232,7 @@ def test_handle_part_aborted(self, syn): with pytest.raises(SynapseUploadAbortedException): upload._handle_part(5) - def test_handle_part_500(self, syn): + def test_handle_part__500(self, syn): """Test that we retry if we encounter a 500 from AWS on a PUT to the signed URL""" upload = self._init_upload_attempt(syn) @@ -273,6 +273,42 @@ def test_handle_part_500(self, syn): None ) + def test_handle_part__connection_error(self, syn): + """Test that we retry if we encounter a ConnectionError on a reqeust to PUT to an AWS presigend url""" + + upload = self._init_upload_attempt(syn) + upload._upload_id = '123' + part_number = 1 + chunk = b'1' * TestUploadAttempt.part_size + + pre_signed_url = 'https://foo.com/1' + signed_headers = {'a': 1} + + upload._pre_signed_part_urls = {part_number: (pre_signed_url, signed_headers)} + + self._handle_part_success_test( + syn, + upload, + part_number, + pre_signed_url, + + + # initial call is expired and results in a 500 + # second call is successful + [ + ( + mock.call(pre_signed_url, chunk, headers=signed_headers), + requests.exceptions.ConnectionError('aborted') + ), + ( + mock.call(pre_signed_url, chunk, headers=signed_headers), + mock.Mock(status_code=200) + ), + ], + chunk, + None + ) + def _handle_part_success_test( self, syn, From 031d4d9aa1c2b4a1391f2d7c98cf10ddea8d1c02 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Fri, 9 Apr 2021 08:13:56 -0700 Subject: [PATCH 23/42] SYNPY-1135 part limits: higher default, round up for integer sizes --- synapseutils/migrate_functions.py | 14 ++++++++++---- .../unit_test_synapseutils_migrate.py | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/synapseutils/migrate_functions.py b/synapseutils/migrate_functions.py index c600bc78c..dd5cfc4a9 100644 --- a/synapseutils/migrate_functions.py +++ b/synapseutils/migrate_functions.py @@ -3,6 +3,7 @@ from enum import Enum import json import logging +import math import sys import traceback import typing @@ -13,7 +14,6 @@ from synapseclient.core import utils from synapseclient.core.upload.multipart_upload import ( MAX_NUMBER_OF_PARTS, - MIN_PART_SIZE, multipart_copy, shared_executor, ) @@ -40,6 +40,14 @@ def test_import_sqlite3(): raise +# we use a much larger default part size for part copies than we would for part uploads. +# with part copies the data transfer is within AWS so don't need to concern ourselves +# with upload failures of the actual bytes. +# this value aligns with what some AWS client libraries use e.g. +# https://github.com/aws/aws-sdk-java/blob/1.11.995/aws-java-sdk-s3/src/main/java/com/amazonaws/services/s3/transfer/TransferManagerConfiguration.java#L46 +DEFAULT_PART_SIZE = 100 * utils.MB + + class _MigrationStatus(Enum): # an internal enum for use within the sqlite db # to track the state of entities as they are indexed @@ -1239,9 +1247,7 @@ def _index_entity( def _get_part_size(file_size): - # recommended part size per - # https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/file/MultipartUploadCopyRequest.html - return max(MIN_PART_SIZE, (file_size / MAX_NUMBER_OF_PARTS)) + return max(DEFAULT_PART_SIZE, math.ceil((file_size / MAX_NUMBER_OF_PARTS))) def _create_new_file_version(syn, key, from_file_handle_id, to_file_handle_id, file_size, storage_location_id): diff --git a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py index ab8b22c1e..58f8c3ee9 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py @@ -23,6 +23,7 @@ _create_new_file_version, _confirm_migration, _ensure_schema, + _get_part_size, _get_row_dict, _include_file_storage_location_in_index, _index_container, @@ -38,6 +39,8 @@ _retrieve_index_settings, _verify_index_settings, _verify_storage_location_ownership, + DEFAULT_PART_SIZE, + MAX_NUMBER_OF_PARTS, index_files_for_migration, migrate_indexed_files, ) @@ -2258,3 +2261,16 @@ def test_include_file_storage_location_in_index__exclude(self): source_storage_location_ids, to_storage_location_id, ) is None + + +@pytest.mark.parametrize('file_size,expected_part_size', [ + (1, DEFAULT_PART_SIZE), + (10 * utils.MB, DEFAULT_PART_SIZE), + (5000 * utils.MB, DEFAULT_PART_SIZE), + (DEFAULT_PART_SIZE * MAX_NUMBER_OF_PARTS, DEFAULT_PART_SIZE), + ((DEFAULT_PART_SIZE * MAX_NUMBER_OF_PARTS) + 1, DEFAULT_PART_SIZE + 1), + ((5000000 * utils.MB) + 1, 524288001), +]) +def test_get_part_size(file_size, expected_part_size): + """Verify part size calculations.""" + assert _get_part_size(file_size) == expected_part_size From 6fe0899aac2282799721b60eefb76205e71ad20d Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Fri, 9 Apr 2021 08:26:02 -0700 Subject: [PATCH 24/42] SYNPY-1123 escape table query column names during migration indexing --- synapseutils/migrate_functions.py | 51 ++++--- .../unit_test_synapseutils_migrate.py | 134 +++++++++++++++++- 2 files changed, 162 insertions(+), 23 deletions(-) diff --git a/synapseutils/migrate_functions.py b/synapseutils/migrate_functions.py index dd5cfc4a9..75a90ec8d 100644 --- a/synapseutils/migrate_functions.py +++ b/synapseutils/migrate_functions.py @@ -922,9 +922,9 @@ def _include_file_storage_location_in_index( # if the current storage location already matches the destination location then we also # include it in the index, we'll mark it as already migrated. - from_storage_location_id = file_handle['storageLocationId'] + from_storage_location_id = file_handle.get('storageLocationId') if ( - (file_handle['concreteType'] == concrete_types.S3_FILE_HANDLE) and + (file_handle.get('concreteType') == concrete_types.S3_FILE_HANDLE) and ( not source_storage_location_ids or str(from_storage_location_id) in source_storage_location_ids or @@ -1020,24 +1020,32 @@ def _index_file_entity( ) -def _get_file_handle_rows(syn, table_id): +def _get_table_file_handle_rows(syn, table_id): file_handle_columns = [c for c in syn.restGET("/entity/{id}/column".format(id=table_id))['results'] if c['columnType'] == 'FILEHANDLEID'] - file_column_select = ','.join(c['name'] for c in file_handle_columns) - results = syn.tableQuery("select {} from {}".format(file_column_select, table_id)) - for row in results: - file_handles = {} - - # first two cols are row id and row version, rest are file handle ids from our query - row_id, row_version = row[:2] - - file_handle_ids = row[2:] - for i, file_handle_id in enumerate(file_handle_ids): - col_id = file_handle_columns[i]['id'] - file_handle = syn._getFileHandleDownload(file_handle_id, table_id, objectType='TableEntity')['fileHandle'] - file_handles[col_id] = file_handle - - yield row_id, row_version, file_handles + if file_handle_columns: + # quote column names in case they include whitespace or other special characters + # and double quote escape existing quotes. + file_column_select = '"' + '","'.join(c['name'].replace('"', '""') for c in file_handle_columns) + '"' + results = syn.tableQuery("select {} from {}".format(file_column_select, table_id)) + for row in results: + file_handles = {} + + # first two cols are row id and row version, rest are file handle ids from our query + row_id, row_version = row[:2] + + file_handle_ids = row[2:] + for i, file_handle_id in enumerate(file_handle_ids): + if file_handle_id: + col_id = file_handle_columns[i]['id'] + file_handle = syn._getFileHandleDownload( + file_handle_id, + table_id, + objectType='TableEntity' + )['fileHandle'] + file_handles[col_id] = file_handle + + yield row_id, row_version, file_handles def _index_table_entity( @@ -1073,9 +1081,8 @@ def _insert_row_batch(row_batch): row_batch ) - for row_id, row_version, file_handles in _get_file_handle_rows(syn, entity_id): + for row_id, row_version, file_handles in _get_table_file_handle_rows(syn, entity_id): for col_id, file_handle in file_handles.items(): - migration_status = _include_file_storage_location_in_index( file_handle, source_storage_location_ids, @@ -1120,9 +1127,9 @@ def _index_container( concrete_type = utils.concrete_type_of(container_entity) logging.info('Indexing %s %s', concrete_type[concrete_type.rindex('.') + 1:], entity_id) - include_types = ['folder'] + include_types = [] if file_version_strategy != 'skip': - include_types.append('file') + include_types.extend(('folder', 'file')) if include_table_files: include_types.append('table') diff --git a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py index 58f8c3ee9..adb33af6a 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py @@ -25,6 +25,7 @@ _ensure_schema, _get_part_size, _get_row_dict, + _get_table_file_handle_rows, _include_file_storage_location_in_index, _index_container, _index_entity, @@ -418,11 +419,13 @@ def test_index_table_entity(self, mocker, conn): file_handle_id_2 = 'fh2' file_handle_id_3 = 'fh3' file_handle_id_4 = 'fh4' + file_handle_id_5 = 'fh5' file_handle_size_1 = 100 file_handle_size_2 = 200 file_handle_size_3 = 300 file_handle_size_4 = 400 + file_handle_size_5 = 500 file_handle_1 = { 'fileHandle': { @@ -432,6 +435,7 @@ def test_index_table_entity(self, mocker, conn): 'storageLocationId': from_storage_location_id, } } + file_handle_2 = { 'fileHandle': { 'id': file_handle_id_2, @@ -461,9 +465,19 @@ def test_index_table_entity(self, mocker, conn): } } + # this has no listed storage location + file_handle_5 = { + 'fileHandle': { + 'id': file_handle_id_5, + 'concreteType': S3_FILE_HANDLE, + 'contentSize': file_handle_size_5, + } + } + syn.tableQuery.return_value = [ [1, 1, file_handle_id_1, file_handle_id_2], [2, 1, file_handle_id_3, file_handle_id_4], + [3, 1, file_handle_id_5, None], ] syn._getFileHandleDownload.side_effect = [ @@ -471,6 +485,7 @@ def test_index_table_entity(self, mocker, conn): file_handle_2, file_handle_3, file_handle_4, + file_handle_5, ] _index_table_entity(cursor, syn, table_id, parent_id, to_storage_location_id, [from_storage_location_id, '543']) @@ -519,6 +534,27 @@ def test_index_table_entity(self, mocker, conn): # file 3 is excluded entirely because it wasn't in a relevant storage location + def test_index_table_entity__no_file_handles(self, mocker, syn): + """Verify behavior when there are no file handle columns in an indexed table""" + + entity = 'syn123' + parent_id = 'syn456' + dest_storage_location_id = '12345' + source_storage_location_ids = None + + mock_get_table_file_handle_rows = mocker.patch.object(migrate_functions, '_get_table_file_handle_rows') + + def mock_get_table_file_handle_rows_side_effect(*args, **kwargs): + yield from [] + + mock_get_table_file_handle_rows.side_effect = mock_get_table_file_handle_rows_side_effect + mock_cursor = mock.MagicMock(sqlite3.Cursor) + + _index_table_entity(mock_cursor, syn, entity, parent_id, dest_storage_location_id, source_storage_location_ids) + + # should not have tried to insert anything + assert mock_cursor.executemany.called is False + @mock.patch.object(synapseutils.migrate_functions, '_index_entity') def test_index_container__files(self, mock_index_entity, conn): """Test indexing a project container, including files but not tables, and with one sub folder""" @@ -650,7 +686,7 @@ def test_index_container__tables(self, mock_index_entity, conn): continue_on_error, ) - syn.getChildren.assert_called_once_with(folder_id, includeTypes=['folder', 'table']) + syn.getChildren.assert_called_once_with(folder_id, includeTypes=['table']) expected_calls = [ mock.call( @@ -1899,6 +1935,102 @@ def test_migrate_table_attached_file__use_existing_file_handle(mocker, syn): assert mock_multipart_copy.called is False +def test_get_table_file_handle_rows(mocker, syn): + """Verify behavior of get_table_file_handles. In particular verify proper escaping of columns + names when querying tables for file handle ids""" + + table_id = 'syn123' + + mock_rest_get = mocker.patch.object(syn, 'restGET') + mock_rest_get.return_value = { + 'results': [ + { + 'id': 1, + 'name': 'column_1_file_handle', + 'columnType': 'FILEHANDLEID', + }, + { + 'id': 2, + 'name': 'column_2_not_a_file_handle', + 'columnType': 'STRING', + }, + { + 'id': 3, + 'name': 'column_3_file_handle with spaces and "quotes"', + 'columnType': 'FILEHANDLEID', + }, + ] + } + + row_1_id = 1 + row_1_version = 3 + row_1_col_1_val = '12345' + row_1_col_3_val = '54321' + + row_2_id = 2 + row_2_version = 4 + row_2_col_1_val = '98765' + row_2_col_2_val = '56789' + + mock_table_query = mocker.patch.object(syn, 'tableQuery') + mock_table_query.return_value = [ + (row_1_id, row_1_version, row_1_col_1_val, row_1_col_3_val), + (row_2_id, row_2_version, row_2_col_1_val, row_2_col_2_val), + ] + + file_handles = [ + {'fileHandle': {'id': row_1_col_1_val}}, + {'fileHandle': {'id': row_1_col_3_val}}, + {'fileHandle': {'id': row_2_col_1_val}}, + {'fileHandle': {'id': row_2_col_2_val}}, + ] + + mock_get_file_handle_download = mocker.patch.object(syn, '_getFileHandleDownload') + mock_get_file_handle_download.side_effect = [ + {'fileHandle': file_handle} for file_handle in file_handles + ] + + expected_table_file_rows = [ + (row_1_id, row_1_version, {1: file_handles[0], 3: file_handles[1]}), + (row_2_id, row_2_version, {1: file_handles[2], 3: file_handles[3]}), + ] + table_file_rows = [t for t in _get_table_file_handle_rows(syn, table_id)] + assert table_file_rows == expected_table_file_rows + + mock_rest_get.assert_called_once_with(f"/entity/{table_id}/column") + mock_table_query.assert_called_once_with( + f'select "column_1_file_handle","column_3_file_handle with spaces and ""quotes""" from {table_id}' + ) + assert mock_get_file_handle_download.call_args_list == [ + mock.call(file_handle['fileHandle']['id'], table_id, objectType='TableEntity') for file_handle in file_handles + ] + + +def test_get_table_file_handle_rows__no_file_columns(mocker, syn): + """Verify the behavior of get_table_file_handle_rows when no columns in the table are FILEHANDLEIDS""" + + table_id = 'syn123' + + mock_rest_get = mocker.patch.object(syn, 'restGET') + mock_rest_get.return_value = { + 'results': [ + { + 'id': 1, + 'name': 'column_1', + 'columnType': 'INTEGER', + }, + { + 'id': 2, + 'name': 'column_2', + 'columnType': 'STRING', + }, + ] + } + + assert [i for i in _get_table_file_handle_rows(syn, table_id)] == [] + mock_rest_get.assert_called_once_with(f"/entity/{table_id}/column") + + def _verify_schema(cursor): results = cursor.execute( """ From 3ee5952bf98b1d14c041df05f3df1a0e347ab6f6 Mon Sep 17 00:00:00 2001 From: Ziming Dong Date: Fri, 9 Apr 2021 17:25:10 -0700 Subject: [PATCH 25/42] add ability to login using environment variable --- docs/Credentials.rst | 48 +++++++++++++++---- docs/news.rst | 27 +++++++++++ synapseclient/client.py | 2 + .../core/credentials/credential_provider.py | 13 ++++- .../credentials/unit_test_cred_provider.py | 48 +++++++++++++++---- 5 files changed, 121 insertions(+), 17 deletions(-) diff --git a/docs/Credentials.rst b/docs/Credentials.rst index 685a8bba4..cd57a2355 100644 --- a/docs/Credentials.rst +++ b/docs/Credentials.rst @@ -6,7 +6,7 @@ There are multiple ways one can login to Synapse. We recommend users to choose t One Time Login ============== -Use `username` and `password` to login as follows:: +Use :code:`username` and :code:`password` to login as follows:: import synapseclient syn = synapseclient.login("username", "password") @@ -17,7 +17,38 @@ Alternately you can login using a personal access token obtained from synapse.or syn = synapseclient.login(authToken="authtoken") -Use `.synapseConfig` +Use Environment Variable +========================= + +Setting the :code:`SYNAPSE_ACCESS_TOKEN` environment variable will allow you to login +to synapse with a personal access token. + +The environment variable will take priority over credentials in the user's :code:`.synapseConfig` file +or any credentials saved in a prior login using :code:`syn.login(rememberMe=True)`. + +.. TODO: Once documentation for it is written, link to documentation about generating a personal access token + +In your shell, you can pass an environment variable to Python inline by defining it before the command: + +.. code-block:: bash + + SYNAPSE_ACCESS_TOKEN='' python3 + +Alternatively you may export it first, then start Python + +.. code-block:: bash + + export SYNAPSE_ACCESS_TOKEN='' + python3 +Once you are inside Python, you may simply login without passing any arguments: + +.. code-block:: python3 + + import synapseclient + syn = synapseclient.login() + + +Use :code:`.synapseConfig` ==================== For writing code using the Synapse Python client that is easy to share with others, please do not include your credentials in the code. Instead, please use `.synapseConfig` file to manage your credentials. @@ -28,20 +59,20 @@ When installing the Synapse Python client, the `.synapseConfig` is added to your #password = #authtoken = -To enable this section, uncomment it. You will only need to specify either a `username` and `password` pair, or an `authtoken`. For security purposes, we recommend that you use `authtoken` instead of your `password`. A personal access token generated from your synapse.org Settings can be used as your *.synapseConfig* authtoken. +To enable this section, uncomment it. You will only need to specify either a :code:`username` and :code:`password` pair, or an :code:`authtoken`. For security purposes, we recommend that you use :code:`authtoken` instead of your :code:`password`. A personal access token generated from your synapse.org Settings can be used as your *.synapseConfig* authtoken. :: [authentication] authtoken = -Now, you can login without specifying your `username` and `password`:: +Now, you can login without specifying your :code:`username` and :code:`password`:: import synapseclient syn = synapseclient.login() -The .synapseConfig also supports a legacy `apikey` which can be used with a `username` instead of the `password` or `authtoken`, however api key support in the synapseConfig is considered deprecated in favor of personal access tokens which -can be scoped to certain functions and which are revocable. If needed your legacy `apikey` can also be obtained from your synapse.org Settings. +The .synapseConfig also supports a legacy :code:`apikey` which can be used with a :code:`username` instead of the :code:`password` or :code:`authtoken`, however api key support in the synapseConfig is considered deprecated in favor of personal access tokens which +can be scoped to certain functions and which are revocable. If needed your legacy :code:`apikey` can also be obtained from your synapse.org Settings. Letting the Operating System Manage Your Synapse Credentials ============================================================ @@ -51,9 +82,10 @@ For users who would like to save their credentials and let other OS configured a import synapseclient syn = synapseclient.login("username", "password", rememberMe=True) -The application (keychain in Mac) will then prompt you to allow Python to access these credentials. Please choose “Yes” or “OK”. +The application (keychain in Mac) will then prompt you to allow Python to access these credentials. Please choose "Yes" or "OK". -The second time you login, you will not have to enter your `username` or `password`:: +The second time you login, you will not have to enter your :code:`username` or :code:`password`:: import synapseclient syn = synapseclient.login() + diff --git a/docs/news.rst b/docs/news.rst index aff987e08..a038a717b 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -2,6 +2,33 @@ Release Notes ============= + +2.4.0 (2021-05-XX) +================== + +Highlights +---------- + +- Added ability to :code:`syn.login()` without arguments if the :code:`SYNAPSE_ACCESS_TOKEN` enviroment variable is set with a valid personal access token + + .. code-block:: bash + + # set environment variable for python + SYNAPSE_ACCESS_TOKEN='' python3 + + .. code-block:: python3 + + import synapseclient + # login() will retrieve the personal access token from environment variable + syn = synapseclient.login() + The environment variable will take priority over credentials in the user's :code:`.synapseConfig` file + or any credentials saved in a prior login using :code:`syn.login(rememberMe=True)`. +New Features +------------ + +- [`SYNPY-1125 `__] - + Allow login with environment variables + 2.3.0 (2021-03-03) ================ diff --git a/synapseclient/client.py b/synapseclient/client.py index 4fd92231c..0160c876d 100644 --- a/synapseclient/client.py +++ b/synapseclient/client.py @@ -343,6 +343,8 @@ def login(self, email=None, password=None, apiKey=None, sessionToken=None, remem If no login arguments are provided or only username is provided, login() will attempt to log in using information from these sources (in order of preference): + #. User's personal access token from environment the variable: SYNAPSE_ACCESS_TOKEN + #. .synapseConfig file (in user home folder unless configured otherwise) #. cached credentials from previous `login()` where `rememberMe=True` was passed as a parameter diff --git a/synapseclient/core/credentials/credential_provider.py b/synapseclient/core/credentials/credential_provider.py index e918effd6..5474c1115 100644 --- a/synapseclient/core/credentials/credential_provider.py +++ b/synapseclient/core/credentials/credential_provider.py @@ -188,6 +188,16 @@ def _get_auth_info(self, syn, user_login_args): return user_login_args.username, None, None, token +class EnvironmentVariableCredentialsProvider(SynapseCredentialsProvider): + """ + Retrieves the user's auth token from an environment variable + """ + ENVIRONMENT_VAR_NAME = "SYNAPSE_ACCESS_TOKEN" + + def _get_auth_info(self, syn, user_login_args): + return user_login_args.username, None, None, os.environ.get(self.ENVIRONMENT_VAR_NAME) + + class SynapseCredentialsProviderChain(object): """ Class that has a list of ``SynapseCredentialsProvider`` from which this class attempts to retrieve @@ -221,9 +231,10 @@ def get_credentials(self, syn, user_login_args): DEFAULT_CREDENTIAL_PROVIDER_CHAIN = SynapseCredentialsProviderChain([ UserArgsSessionTokenCredentialsProvider(), # This provider is DEPRECATED UserArgsCredentialsProvider(), + EnvironmentVariableCredentialsProvider(), ConfigFileCredentialsProvider(), CachedCredentialsProvider(), - AWSParameterStoreCredentialsProvider(), + AWSParameterStoreCredentialsProvider(), # see service catalog issue: SC-260 ]) diff --git a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py index f9346319c..2e9206a75 100644 --- a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py +++ b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py @@ -22,7 +22,8 @@ SynapseCredentialsProviderChain, UserArgsCredentialsProvider, UserArgsSessionTokenCredentialsProvider, - AWSParameterStoreCredentialsProvider + AWSParameterStoreCredentialsProvider, + EnvironmentVariableCredentialsProvider, ) from synapseclient.core.exceptions import SynapseAuthenticationError @@ -112,7 +113,7 @@ def _get_auth_info(self, syn, user_login_args): def test_get_synapse_credentials(self): auth_info = ("username", "password", "api_key") with patch.object(self.provider, "_get_auth_info", return_value=auth_info) as mock_get_auth_info, \ - patch.object(self.provider, "_create_synapse_credential") as mock_create_synapse_credentials: + patch.object(self.provider, "_create_synapse_credential") as mock_create_synapse_credentials: self.provider.get_synapse_credentials(self.syn, self.user_login_args) mock_get_auth_info.assert_called_once_with(self.syn, self.user_login_args) @@ -141,7 +142,7 @@ def test_create_synapse_credential__username_not_None_password_not_None(self): over api key and auth bearer token)""" session_token = "37842837946" with patch.object(self.syn, "_getSessionToken", return_value=session_token) as mock_get_session_token, \ - patch.object(self.syn, "_getAPIKey", return_value=self.api_key) as mock_get_api_key: + patch.object(self.syn, "_getAPIKey", return_value=self.api_key) as mock_get_api_key: # even if api key and/or auth_token is provided, password applies first cred = self.provider._create_synapse_credential( self.syn, @@ -250,11 +251,11 @@ def test_get_auth_info(self): returned_tuple = provider._get_auth_info(self.syn, user_login_args) assert ( - user_login_args.username, - user_login_args.password, - user_login_args.api_key, - user_login_args.auth_token - ) == returned_tuple + user_login_args.username, + user_login_args.password, + user_login_args.api_key, + user_login_args.auth_token + ) == returned_tuple class TestConfigFileCredentialsProvider(object): @@ -482,3 +483,34 @@ def test_get_auth_info__boto3_ImportError(self, mocker, syn, environ_with_param_ user_login_args = UserLoginArgs(username=None, password=None, api_key=None, skip_cache=False, auth_token=None) assert (None,) * 4 == self.provider._get_auth_info(syn, user_login_args) + + +class TestEnvironmentVariableCredentialsProvider(): + + @pytest.fixture(autouse=True, scope='function') + def init_syn(self, syn): + self.syn = syn + + def setup(self): + self.provider = EnvironmentVariableCredentialsProvider() + + def test_get_auth_info__has_environment_variable(self, mocker: MockerFixture, syn): + token = "aHR0cHM6Ly93d3cueW91dHViZS5jb20vd2F0Y2g/dj1mQzdvVU9VRUVpNA==" + mocker.patch.dict(os.environ, {'SYNAPSE_ACCESS_TOKEN': token}) + + user_login_args = UserLoginArgs(username=None, password=None, api_key=None, skip_cache=False, auth_token=None) + assert (None, None, None, token) == self.provider._get_auth_info(syn, user_login_args) + + def test_get_auth_info__has_environment_variable_user_args_with_username(self, mocker: MockerFixture, syn): + token = "aHR0cHM6Ly93d3cueW91dHViZS5jb20vd2F0Y2g/dj1mQzdvVU9VRUVpNA==" + mocker.patch.dict(os.environ, {'SYNAPSE_ACCESS_TOKEN': token}) + username = "foobar" + user_login_args = UserLoginArgs(username=username, password=None, api_key=None, skip_cache=False, auth_token=None) + assert (username, None, None, token) == self.provider._get_auth_info(syn, user_login_args) + + def test_get_auth_info__no_environment_variable(self, mocker: MockerFixture, syn): + mocker.patch.dict(os.environ, {}, clear=True) + + user_login_args = UserLoginArgs(username=None, password=None, api_key=None, skip_cache=False, auth_token=None) + + assert (None,) * 4 == self.provider._get_auth_info(syn, user_login_args) From 35f61ba67c7363ab8fd143502f78626437aae741 Mon Sep 17 00:00:00 2001 From: Ziming Dong Date: Fri, 9 Apr 2021 17:31:16 -0700 Subject: [PATCH 26/42] PEP8 fixes --- .../core/credentials/credential_provider.py | 2 +- .../core/credentials/unit_test_cred_provider.py | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/synapseclient/core/credentials/credential_provider.py b/synapseclient/core/credentials/credential_provider.py index 5474c1115..3ea0c9712 100644 --- a/synapseclient/core/credentials/credential_provider.py +++ b/synapseclient/core/credentials/credential_provider.py @@ -234,7 +234,7 @@ def get_credentials(self, syn, user_login_args): EnvironmentVariableCredentialsProvider(), ConfigFileCredentialsProvider(), CachedCredentialsProvider(), - AWSParameterStoreCredentialsProvider(), # see service catalog issue: SC-260 + AWSParameterStoreCredentialsProvider(), # see service catalog issue: SC-260 ]) diff --git a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py index 2e9206a75..16df0c77b 100644 --- a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py +++ b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py @@ -250,12 +250,10 @@ def test_get_auth_info(self): provider = UserArgsCredentialsProvider() returned_tuple = provider._get_auth_info(self.syn, user_login_args) - assert ( - user_login_args.username, - user_login_args.password, - user_login_args.api_key, - user_login_args.auth_token - ) == returned_tuple + assert (user_login_args.username, + user_login_args.password, + user_login_args.api_key, + user_login_args.auth_token) == returned_tuple class TestConfigFileCredentialsProvider(object): @@ -505,7 +503,8 @@ def test_get_auth_info__has_environment_variable_user_args_with_username(self, m token = "aHR0cHM6Ly93d3cueW91dHViZS5jb20vd2F0Y2g/dj1mQzdvVU9VRUVpNA==" mocker.patch.dict(os.environ, {'SYNAPSE_ACCESS_TOKEN': token}) username = "foobar" - user_login_args = UserLoginArgs(username=username, password=None, api_key=None, skip_cache=False, auth_token=None) + user_login_args = UserLoginArgs(username=username, password=None, api_key=None, skip_cache=False, + auth_token=None) assert (username, None, None, token) == self.provider._get_auth_info(syn, user_login_args) def test_get_auth_info__no_environment_variable(self, mocker: MockerFixture, syn): From 46da2c83cef7ddb2d47b11cd3124b7e0a47d381e Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Mon, 12 Apr 2021 11:26:45 -0700 Subject: [PATCH 27/42] rm redundant test (very similar to test_evaluations#test_teams --- .../synapseclient/integration_test.py | 29 +------------------ 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/tests/integration/synapseclient/integration_test.py b/tests/integration/synapseclient/integration_test.py index f9d2c7dd5..0eadd8c13 100644 --- a/tests/integration/synapseclient/integration_test.py +++ b/tests/integration/synapseclient/integration_test.py @@ -3,7 +3,6 @@ import os import filecmp import shutil -import time import uuid import configparser from datetime import datetime @@ -12,7 +11,7 @@ from unittest.mock import patch from synapseclient import client -from synapseclient import Activity, Annotations, File, Folder, login, Project, Synapse, Team +from synapseclient import Activity, Annotations, File, Folder, login, Project, Synapse from synapseclient.core.credentials import credential_provider from synapseclient.core.exceptions import SynapseAuthenticationError, SynapseHTTPError, SynapseNoCredentialsError import synapseclient.core.utils as utils @@ -357,32 +356,6 @@ def test_get_user_profile(syn): assert p2.userName == p1.userName -def test_teams(syn): - unique_name = "Team Gnarly Rad " + str(uuid.uuid4()) - team = Team(name=unique_name, description="A gnarly rad team", canPublicJoin=True) - team = syn.store(team) - - team2 = syn.getTeam(team.id) - assert team == team2 - - # Asynchronously populates index, so wait 'til it's there - retry = 0 - backoff = 0.2 - while retry < 10: - retry += 1 - time.sleep(backoff) - backoff *= 2 - found_teams = list(syn._findTeam(team.name)) - if len(found_teams) > 0: - break - else: - print("Failed to create team. May not be a real error.") - - syn.delete(team) - - assert team == found_teams[0] - - def test_findEntityIdByNameAndParent(syn, schedule_for_cleanup): project_name = str(uuid.uuid1()) project_id = syn.store(Project(name=project_name))['id'] From cb006fce04cf47cdfad9e61c41933df209c94c65 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Mon, 12 Apr 2021 11:27:17 -0700 Subject: [PATCH 28/42] simplify evaluation test, rm use of deprecated /evaluation/submission/query API --- .../synapseclient/test_evaluations.py | 54 +++++++++---------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/tests/integration/synapseclient/test_evaluations.py b/tests/integration/synapseclient/test_evaluations.py index 71d1eb87c..edcbbfc3b 100644 --- a/tests/integration/synapseclient/test_evaluations.py +++ b/tests/integration/synapseclient/test_evaluations.py @@ -127,38 +127,34 @@ def test_evaluations(syn, project, schedule_for_cleanup): assert a['foo'] == ['bar'] assert a['bogosity'] == [bogosity[submission.id]] - # test query by submission annotations - # These queries run against an eventually consistent index table which is - # populated by an asynchronous worker. Thus, the queries may remain out - # of sync for some unbounded, but assumed to be short time. - attempts = 2 - while attempts > 0: - try: - results = syn.restGET("/evaluation/submission/query?query=SELECT+*+FROM+evaluation_%s" % ev.id) - assert len(results['rows']) == num_of_submissions + 1 - - results = syn.restGET( - "/evaluation/submission/query?query=SELECT+*+FROM+evaluation_%s where bogosity > 200" % ev.id) - assert len(results['rows']) == num_of_submissions - except AssertionError: - attempts -= 1 - time.sleep(2) - else: - attempts = 0 - # Test that we can retrieve submissions with a specific status invalid_submissions = list(syn.getSubmissions(ev, status='INVALID')) assert len(invalid_submissions) == 1, len(invalid_submissions) assert invalid_submissions[0]['name'] == 'Submission 01' - view = SubmissionViewSchema(name="Testing view", scopes=[ev['id']], - parent=project['id']) - view_ent = syn.store(view) - view_table = syn.tableQuery(f"select * from {view_ent.id}") - viewdf = view_table.asDataFrame() - assert viewdf['foo'].tolist() == ["bar", "bar"] - assert viewdf['bogosity'].tolist() == [123, 246] - assert viewdf['id'].astype(str).tolist() == list(bogosity.keys()) + # test that we can retrieve annotations via a submission view + # retry a few times because this may be related to asynchronous worker activity + attempts = 10 + sleep_time = 1 + i = 0 + while True: + try: + view = SubmissionViewSchema(name="Testing view", scopes=[ev['id']], + parent=project['id']) + view_ent = syn.store(view) + view_table = syn.tableQuery(f"select * from {view_ent.id}") + viewdf = view_table.asDataFrame() + assert viewdf['foo'].tolist() == ["bar", "bar"] + assert viewdf['bogosity'].tolist() == [123, 246] + assert viewdf['id'].astype(str).tolist() == list(bogosity.keys()) + break + except (AssertionError, KeyError): + i += 1 + if i >= attempts: + raise + + time.sleep(sleep_time) + sleep_time *= 2 finally: # Clean up @@ -190,6 +186,7 @@ def test_teams(syn, project, schedule_for_cleanup): # needs to be retried 'cause appending to the search index is asynchronous tries = 10 + sleep_time = 1 found_team = None while tries > 0: try: @@ -198,5 +195,6 @@ def test_teams(syn, project, schedule_for_cleanup): except ValueError: tries -= 1 if tries > 0: - time.sleep(1) + time.sleep(sleep_time) + sleep_time *= 2 assert team == found_team From 8c4df30b6796a0b8996f4395f5f020a2640b6381 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Tue, 13 Apr 2021 09:30:25 -0700 Subject: [PATCH 29/42] create view once, 8 retries with backoff (~4m) --- tests/integration/synapseclient/test_evaluations.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/integration/synapseclient/test_evaluations.py b/tests/integration/synapseclient/test_evaluations.py index edcbbfc3b..a852bd54b 100644 --- a/tests/integration/synapseclient/test_evaluations.py +++ b/tests/integration/synapseclient/test_evaluations.py @@ -132,16 +132,17 @@ def test_evaluations(syn, project, schedule_for_cleanup): assert len(invalid_submissions) == 1, len(invalid_submissions) assert invalid_submissions[0]['name'] == 'Submission 01' + view = SubmissionViewSchema(name="Testing view", scopes=[ev['id']], + parent=project['id']) + view_ent = syn.store(view) + # test that we can retrieve annotations via a submission view # retry a few times because this may be related to asynchronous worker activity - attempts = 10 + attempts = 8 sleep_time = 1 i = 0 while True: try: - view = SubmissionViewSchema(name="Testing view", scopes=[ev['id']], - parent=project['id']) - view_ent = syn.store(view) view_table = syn.tableQuery(f"select * from {view_ent.id}") viewdf = view_table.asDataFrame() assert viewdf['foo'].tolist() == ["bar", "bar"] @@ -185,7 +186,7 @@ def test_teams(syn, project, schedule_for_cleanup): assert found is not None, "Couldn't find user {} in team".format(p.userName) # needs to be retried 'cause appending to the search index is asynchronous - tries = 10 + tries = 8 sleep_time = 1 found_team = None while tries > 0: From f9c6360f3a81a56275ffd8232263d91d7e77956a Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Tue, 13 Apr 2021 15:10:37 -0700 Subject: [PATCH 30/42] add utility function for synapse query column name escaping; use getTableColumns vs a raw restGET call --- synapseclient/table.py | 14 +++++ synapseutils/migrate_functions.py | 8 +-- tests/unit/synapseclient/unit_test_tables.py | 26 ++++++++- .../unit_test_synapseutils_migrate.py | 55 +++++++++---------- 4 files changed, 68 insertions(+), 35 deletions(-) diff --git a/synapseclient/table.py b/synapseclient/table.py index 50f7527eb..92cc19cdb 100644 --- a/synapseclient/table.py +++ b/synapseclient/table.py @@ -531,6 +531,20 @@ def cast_row_set(rowset): return rowset +def escape_column_name(column): + """Escape the name of the given column for use in a Synapse table query statement + :param column: a string or column dictionary object with a 'name' key""" + col_name = column['name'] if isinstance(column, collections.Mapping) else str(column) + escaped_name = col_name.replace('"', '""') + return f'"{escaped_name}"' + + +def join_column_names(columns): + """Join the names of the given columns into a comma delimited list suitable for use in a Synapse table query + :param columns: a sequence of column string names or dictionary objets with column 'name' keys""" + return ",".join(escape_column_name(c) for c in columns) + + def _csv_to_pandas_df(filepath, separator=DEFAULT_SEPARATOR, quote_char=DEFAULT_QUOTE_CHARACTER, diff --git a/synapseutils/migrate_functions.py b/synapseutils/migrate_functions.py index 75a90ec8d..ff4bf6d46 100644 --- a/synapseutils/migrate_functions.py +++ b/synapseutils/migrate_functions.py @@ -12,6 +12,7 @@ from synapseclient.core.constants import concrete_types from synapseclient.core import pool_provider from synapseclient.core import utils +from synapseclient.table import join_column_names from synapseclient.core.upload.multipart_upload import ( MAX_NUMBER_OF_PARTS, multipart_copy, @@ -1021,12 +1022,9 @@ def _index_file_entity( def _get_table_file_handle_rows(syn, table_id): - file_handle_columns = [c for c in syn.restGET("/entity/{id}/column".format(id=table_id))['results'] - if c['columnType'] == 'FILEHANDLEID'] + file_handle_columns = [c for c in syn.getTableColumns(table_id) if c['columnType'] == 'FILEHANDLEID'] if file_handle_columns: - # quote column names in case they include whitespace or other special characters - # and double quote escape existing quotes. - file_column_select = '"' + '","'.join(c['name'].replace('"', '""') for c in file_handle_columns) + '"' + file_column_select = join_column_names(file_handle_columns) results = syn.tableQuery("select {} from {}".format(file_column_select, table_id)) for row in results: file_handles = {} diff --git a/tests/unit/synapseclient/unit_test_tables.py b/tests/unit/synapseclient/unit_test_tables.py index b6f02f6e5..9092cc55e 100644 --- a/tests/unit/synapseclient/unit_test_tables.py +++ b/tests/unit/synapseclient/unit_test_tables.py @@ -20,7 +20,7 @@ from synapseclient.table import Column, Schema, CsvFileTable, TableQueryResult, cast_values, \ as_table_columns, Table, build_table, RowSet, SelectColumn, EntityViewSchema, RowSetTable, Row, PartialRow, \ PartialRowset, SchemaBase, _get_view_type_mask_for_deprecated_type, EntityViewType, _get_view_type_mask, \ - MAX_NUM_TABLE_COLUMNS, SubmissionViewSchema + MAX_NUM_TABLE_COLUMNS, SubmissionViewSchema, escape_column_name, join_column_names from synapseclient.core.utils import from_unix_epoch_time from unittest.mock import patch @@ -1261,3 +1261,27 @@ def test_set_view_types_invalid_input(): view = EntityViewSchema(type='project', properties=properties) assert view['viewTypeMask'] == 2 pytest.raises(ValueError, view.set_entity_types, None) + + +@pytest.mark.parametrize("column,expected_name", ( + ("foo", '"foo"'), # all names are quoted + ('foo"bar', '"foo""bar"'), # quotes are double quoted + ('foo bar', '"foo bar"'), # other special characters e.g. spaces are left alone (within the quoted string) +)) +def test_escape_column_names(column, expected_name): + """Verify column name escaping""" + # test as a string + assert escape_column_name(column) == expected_name + + # test as a dictionary/column object + assert escape_column_name({'name': column}) == expected_name + + +def test_join_column_names(): + """Verify the behavior of join_column_names""" + column_names = ['foo', 'foo"bar', 'foo bar'] + column_dicts = [{'name': n} for n in column_names] + expected = '"foo","foo""bar","foo bar"' + + assert join_column_names(column_names) == expected + assert join_column_names(column_dicts) == expected diff --git a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py index adb33af6a..9a62b083f 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py @@ -406,14 +406,13 @@ def test_index_table_entity(self, mocker, conn): # gets the columns of the table # 4 columns, 2 of the file handles - syn.restGET.return_value = { - 'results': [ - {'columnType': 'STRING'}, - {'columnType': 'FILEHANDLEID', 'id': '1', 'name': 'col1'}, - {'columnType': 'FILEHANDLEID', 'id': '2', 'name': 'col2'}, - {'columnType': 'INTEGER'}, - ] - } + mock_get_table_columns = mocker.patch.object(syn, 'getTableColumns') + mock_get_table_columns.return_value = [ + {'columnType': 'STRING'}, + {'columnType': 'FILEHANDLEID', 'id': '1', 'name': 'col1'}, + {'columnType': 'FILEHANDLEID', 'id': '2', 'name': 'col2'}, + {'columnType': 'INTEGER'}, + ] file_handle_id_1 = 'fh1' file_handle_id_2 = 'fh2' @@ -1941,26 +1940,24 @@ def test_get_table_file_handle_rows(mocker, syn): table_id = 'syn123' - mock_rest_get = mocker.patch.object(syn, 'restGET') - mock_rest_get.return_value = { - 'results': [ - { - 'id': 1, - 'name': 'column_1_file_handle', - 'columnType': 'FILEHANDLEID', - }, - { - 'id': 2, - 'name': 'column_2_not_a_file_handle', - 'columnType': 'STRING', - }, - { - 'id': 3, - 'name': 'column_3_file_handle with spaces and "quotes"', - 'columnType': 'FILEHANDLEID', - }, - ] - } + mock_get_table_columns = mocker.patch.object(syn, 'getTableColumns') + mock_get_table_columns.return_value = [ + { + 'id': 1, + 'name': 'column_1_file_handle', + 'columnType': 'FILEHANDLEID', + }, + { + 'id': 2, + 'name': 'column_2_not_a_file_handle', + 'columnType': 'STRING', + }, + { + 'id': 3, + 'name': 'column_3_file_handle with spaces and "quotes"', + 'columnType': 'FILEHANDLEID', + }, + ] row_1_id = 1 row_1_version = 3 @@ -1997,7 +1994,7 @@ def test_get_table_file_handle_rows(mocker, syn): table_file_rows = [t for t in _get_table_file_handle_rows(syn, table_id)] assert table_file_rows == expected_table_file_rows - mock_rest_get.assert_called_once_with(f"/entity/{table_id}/column") + mock_get_table_columns.assert_called_once_with(table_id) mock_table_query.assert_called_once_with( f'select "column_1_file_handle","column_3_file_handle with spaces and ""quotes""" from {table_id}' ) From f11f53d44c1ead42b4fffaa8596f1c2aaa1c1b01 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Tue, 13 Apr 2021 15:12:09 -0700 Subject: [PATCH 31/42] fix asserted table test file handle structure --- .../synapseutils/unit_test_synapseutils_migrate.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py index 9a62b083f..80e078234 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py @@ -1976,10 +1976,10 @@ def test_get_table_file_handle_rows(mocker, syn): ] file_handles = [ - {'fileHandle': {'id': row_1_col_1_val}}, - {'fileHandle': {'id': row_1_col_3_val}}, - {'fileHandle': {'id': row_2_col_1_val}}, - {'fileHandle': {'id': row_2_col_2_val}}, + {'id': row_1_col_1_val}, + {'id': row_1_col_3_val}, + {'id': row_2_col_1_val}, + {'id': row_2_col_2_val}, ] mock_get_file_handle_download = mocker.patch.object(syn, '_getFileHandleDownload') @@ -1999,7 +1999,7 @@ def test_get_table_file_handle_rows(mocker, syn): f'select "column_1_file_handle","column_3_file_handle with spaces and ""quotes""" from {table_id}' ) assert mock_get_file_handle_download.call_args_list == [ - mock.call(file_handle['fileHandle']['id'], table_id, objectType='TableEntity') for file_handle in file_handles + mock.call(file_handle['id'], table_id, objectType='TableEntity') for file_handle in file_handles ] From 61df839d687d8b09fa28caa4e4cbbd714c6b112b Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Tue, 13 Apr 2021 15:22:17 -0700 Subject: [PATCH 32/42] assert file handle not fetched when no file handle columns --- .../unit_test_synapseutils_migrate.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py index 80e078234..09bbdf5e1 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py @@ -2008,24 +2008,24 @@ def test_get_table_file_handle_rows__no_file_columns(mocker, syn): table_id = 'syn123' - mock_rest_get = mocker.patch.object(syn, 'restGET') - mock_rest_get.return_value = { - 'results': [ - { - 'id': 1, - 'name': 'column_1', - 'columnType': 'INTEGER', - }, - { - 'id': 2, - 'name': 'column_2', - 'columnType': 'STRING', - }, - ] - } + mock_get_table_columns = mocker.patch.object(syn, 'getTableColumns') + mock_get_file_handle_download = mocker.patch.object(syn, '_getFileHandleDownload') + mock_get_table_columns.return_value = [ + { + 'id': 1, + 'name': 'column_1', + 'columnType': 'INTEGER', + }, + { + 'id': 2, + 'name': 'column_2', + 'columnType': 'STRING', + }, + ] assert [i for i in _get_table_file_handle_rows(syn, table_id)] == [] - mock_rest_get.assert_called_once_with(f"/entity/{table_id}/column") + mock_get_table_columns.assert_called_once_with(table_id) + assert mock_get_file_handle_download.called is False def _verify_schema(cursor): From 1a519f71831799e44fe2d85a20c33cc2aac27c03 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Tue, 13 Apr 2021 15:24:40 -0700 Subject: [PATCH 33/42] fix pep8 whitespace --- .../unit_test_synapseutils_migrate.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py index 09bbdf5e1..18883e80f 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py @@ -2011,16 +2011,16 @@ def test_get_table_file_handle_rows__no_file_columns(mocker, syn): mock_get_table_columns = mocker.patch.object(syn, 'getTableColumns') mock_get_file_handle_download = mocker.patch.object(syn, '_getFileHandleDownload') mock_get_table_columns.return_value = [ - { - 'id': 1, - 'name': 'column_1', - 'columnType': 'INTEGER', - }, - { - 'id': 2, - 'name': 'column_2', - 'columnType': 'STRING', - }, + { + 'id': 1, + 'name': 'column_1', + 'columnType': 'INTEGER', + }, + { + 'id': 2, + 'name': 'column_2', + 'columnType': 'STRING', + }, ] assert [i for i in _get_table_file_handle_rows(syn, table_id)] == [] From bea7828703f619a3dfda1b7f8c2788996c050702 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Wed, 14 Apr 2021 13:30:09 -0700 Subject: [PATCH 34/42] collections.Mapping -> collections.abc.Mapping --- synapseclient/client.py | 2 +- synapseclient/table.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapseclient/client.py b/synapseclient/client.py index 4fd92231c..af4e7b4a0 100644 --- a/synapseclient/client.py +++ b/synapseclient/client.py @@ -3229,7 +3229,7 @@ def _create_table_snapshot(self, table: typing.Union[Schema, str], comment: str # check the activity id or object is provided activity_id = None - if isinstance(activity, collections.Mapping): + if isinstance(activity, collections.abc.Mapping): if 'id' not in activity: activity = self._saveActivity(activity) activity_id = activity['id'] diff --git a/synapseclient/table.py b/synapseclient/table.py index 92cc19cdb..5b0448d6f 100644 --- a/synapseclient/table.py +++ b/synapseclient/table.py @@ -534,7 +534,7 @@ def cast_row_set(rowset): def escape_column_name(column): """Escape the name of the given column for use in a Synapse table query statement :param column: a string or column dictionary object with a 'name' key""" - col_name = column['name'] if isinstance(column, collections.Mapping) else str(column) + col_name = column['name'] if isinstance(column, collections.abc.Mapping) else str(column) escaped_name = col_name.replace('"', '""') return f'"{escaped_name}"' From 574341b060b572ef01765d0cb980f758dbdc80cc Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Wed, 14 Apr 2021 13:55:16 -0700 Subject: [PATCH 35/42] skip unstable test for now, SYNPY-816 --- tests/integration/synapseclient/test_evaluations.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/synapseclient/test_evaluations.py b/tests/integration/synapseclient/test_evaluations.py index a852bd54b..a912787cf 100644 --- a/tests/integration/synapseclient/test_evaluations.py +++ b/tests/integration/synapseclient/test_evaluations.py @@ -5,6 +5,7 @@ import random import pytest +import unittest from synapseclient import Evaluation, File, SubmissionViewSchema, Synapse, Team from synapseclient.core.exceptions import SynapseHTTPError @@ -165,6 +166,7 @@ def test_evaluations(syn, project, schedule_for_cleanup): pytest.raises(SynapseHTTPError, syn.getEvaluation, ev) +@unittest.skip(reason='Unstable timing, particularly on dev stack, SYNPY-816') def test_teams(syn, project, schedule_for_cleanup): name = "My Uniquely Named Team " + str(uuid.uuid4()) team = syn.store(Team(name=name, description="A fake team for testing...")) From 881fdc3ea2dd6a8ac3de1a04b156f79f39f263c5 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Wed, 14 Apr 2021 13:55:24 -0700 Subject: [PATCH 36/42] merge master --- .github/workflows/build.yml | 65 ++- docs/build/html/.buildinfo | 2 +- docs/build/html/Activity.html | 8 +- docs/build/html/Annotations.html | 8 +- docs/build/html/Client.html | 10 +- docs/build/html/CommandLineClient.html | 15 +- docs/build/html/Credentials.html | 8 +- docs/build/html/Entity.html | 8 +- docs/build/html/Evaluation.html | 8 +- docs/build/html/Multipart_upload.html | 8 +- docs/build/html/S3Storage.html | 8 +- docs/build/html/Table.html | 8 +- docs/build/html/Team.html | 8 +- docs/build/html/Upload.html | 8 +- docs/build/html/Utilities.html | 8 +- docs/build/html/Versions.html | 8 +- docs/build/html/Views.html | 8 +- docs/build/html/Wiki.html | 8 +- docs/build/html/_static/bizstyle.js | 2 +- .../html/_static/documentation_options.js | 2 +- docs/build/html/genindex.html | 8 +- docs/build/html/index.html | 49 +- docs/build/html/news.html | 471 ++++++++++-------- docs/build/html/objects.inv | Bin 2027 -> 2027 bytes docs/build/html/py-modindex.html | 8 +- docs/build/html/search.html | 8 +- docs/build/html/searchindex.js | 2 +- docs/build/html/sftp.html | 8 +- docs/build/html/synapseutils.html | 8 +- docs/news.rst | 33 ++ synapseclient/annotations.py | 13 +- synapseclient/client.py | 2 - synapseclient/synapsePythonClient | 2 +- .../synapseclient/integration_test.py | 6 +- .../synapseclient/test_evaluations.py | 7 +- .../synapseclient/unit_test_annotations.py | 31 +- 36 files changed, 496 insertions(+), 368 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4490c288f..0518a7745 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -123,6 +123,10 @@ jobs: if: github.event_name == 'release' + outputs: + sdist-package-name: ${{ steps.build-package.outputs.sdist-package-name }} + bdist-package-name: ${{ steps.build-package.outputs.bdist-package-name }} + steps: - uses: actions/checkout@v2 @@ -170,33 +174,61 @@ jobs: run: | python3 -m pip install --upgrade pip python3 -m pip install setuptools + python3 -m pip install wheel # install synapseclient python3 setup.py install # create distribution - python3 setup.py sdist + python3 setup.py sdist bdist_wheel + + SDIST_PACKAGE_NAME="synapseclient-${{env.VERSION}}.tar.gz" + BDIST_PACKAGE_NAME="synapseclient-${{env.VERSION}}-py3-none-any.whl" + RELEASE_URL_PREFIX="https://uploads.github.com/repos/${{ github.event.repository.full_name }}/releases/${{ github.event.release.id }}/assets?name=" + + echo "::set-output name=sdist-package-name::$SDIST_PACKAGE_NAME" + echo "::set-output name=bdist-package-name::$BDIST_PACKAGE_NAME" - echo "PACKAGE_NAME=synapseclient-${{env.VERSION}}.tar.gz" >> $GITHUB_ENV + echo "::set-output name=sdist-release-url::${RELEASE_URL_PREFIX}${SDIST_PACKAGE_NAME}" + echo "::set-output name=bdist-release-url::${RELEASE_URL_PREFIX}${BDIST_PACKAGE_NAME}" - # upload the artifact to the GitHub Action - - name: upload-build-artifact + # upload the packages as build artifacts of the GitHub Action + + - name: upload-build-sdist uses: actions/upload-artifact@v2 with: - name: synapseclient - path: dist/${{ env.PACKAGE_NAME }} + name: ${{ steps.build-package.outputs.sdist-package-name }} + path: dist/${{ steps.build-package.outputs.sdist-package-name }} - # upload the artifact as an asset of the GitHub Release - - name: upload-release-asset + - name: upload-build-bdist + uses: actions/upload-artifact@v2 + with: + name: ${{ steps.build-package.outputs.bdist-package-name }} + path: dist/${{ steps.build-package.outputs.bdist-package-name }} + + # upload the packages as artifacts of the GitHub release + + - name: upload-release-sdist uses: actions/upload-release-asset@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: - upload_url: https://uploads.github.com/repos/${{ github.event.repository.full_name }}/releases/${{ github.event.release.id }}/assets?name=${{ env.PACKAGE_NAME }} - asset_name: ${{ env.PACKAGE_NAME }} - asset_path: dist/${{ env.PACKAGE_NAME }} + upload_url: ${{ steps.build-package.outputs.sdist-release-url }} + asset_name: ${{ steps.build-package.outputs.sdist-package-name }} + asset_path: dist/${{ steps.build-package.outputs.sdist-package-name }} asset_content_type: application/gzip + - name: upload-release-bdist + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.build-package.outputs.bdist-release-url }} + asset_name: ${{ steps.build-package.outputs.bdist-package-name }} + asset_path: dist/${{ steps.build-package.outputs.bdist-package-name }} + asset_content_type: application/zip + + # re-download the built package to the appropriate pypi server. # we upload prereleases to test.pypi.org and releases to pypi.org. deploy: @@ -210,9 +242,16 @@ jobs: with: python-version: 3.6 - - uses: actions/download-artifact@v2 + - name: download-sdist + uses: actions/download-artifact@v2 + with: + name: ${{ needs.package.outputs.sdist-package-name }} + path: dist + + - name: download-bdist + uses: actions/download-artifact@v2 with: - name: synapseclient + name: ${{ needs.package.outputs.bdist-package-name }} path: dist - name: deploy-to-pypi diff --git a/docs/build/html/.buildinfo b/docs/build/html/.buildinfo index 7e0aae4d2..608bd26bb 100644 --- a/docs/build/html/.buildinfo +++ b/docs/build/html/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 28b32c8cad55c715a9d6bb541b4c4ce5 +config: 9c1bb04af5f147db0da20ac6db06bb18 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/build/html/Activity.html b/docs/build/html/Activity.html index 03606f20b..4c5730abc 100644 --- a/docs/build/html/Activity.html +++ b/docs/build/html/Activity.html @@ -6,7 +6,7 @@ - Provenance — Synapse Python Client 2.3.0 documentation + Provenance — Synapse Python Client 2.3.1 documentation @@ -46,7 +46,7 @@

Navigation

  • previous |
  • - + @@ -243,13 +243,13 @@

    Navigation

  • previous |
  • - +