From 9526a1424b24e1589637caf820ec9515f256de77 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Thu, 18 Feb 2021 15:59:42 -0800 Subject: [PATCH 01/16] apply passed provenance when storing a table --- synapseclient/client.py | 42 +++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/synapseclient/client.py b/synapseclient/client.py index 8eceef240..14629b91f 100644 --- a/synapseclient/client.py +++ b/synapseclient/client.py @@ -970,7 +970,15 @@ def store(self, obj, *, createOrUpdate=True, forceVersion=True, versionLabel=Non # _synapse_store hook # for objects that know how to store themselves if hasattr(obj, '_synapse_store'): - return obj._synapse_store(self) + obj = obj._synapse_store(self) + return self._apply_provenance( + obj, + activity=activity, + used=used, + executed=executed, + activityName=activityName, + activityDescription=activityDescription, + ) # Handle all non-Entity objects if not (isinstance(obj, Entity) or type(obj) == dict): @@ -1121,6 +1129,30 @@ def store(self, obj, *, createOrUpdate=True, forceVersion=True, versionLabel=Non annotations = self.set_annotations(Annotations(properties['id'], properties['etag'], annotations)) properties['etag'] = annotations.etag + properties = self._apply_provenance( + properties, + activity=activity, + used=used, + executed=executed, + activityName=activityName, + activityDescription=activityDescription, + ) + + # Return the updated Entity object + entity = Entity.create(properties, annotations, local_state) + return self.get(entity, downloadFile=False) + + def _apply_provenance( + self, + entity, + activity=None, + used=None, + executed=None, + activityName=None, + activityDescription=None + ): + # apply any provenance passed to via the store method to the entity + # If the parameters 'used' or 'executed' are given, create an Activity object if used or executed: if activity is not None: @@ -1132,14 +1164,12 @@ def store(self, obj, *, createOrUpdate=True, forceVersion=True, versionLabel=Non # If we have an Activity, set it as the Entity's provenance record if activity: - self.setProvenance(properties, activity) + self.setProvenance(entity, activity) # 'etag' has changed, so get the new Entity - properties = self._getEntity(properties) + entity = self._getEntity(entity) - # Return the updated Entity object - entity = Entity.create(properties, annotations, local_state) - return self.get(entity, downloadFile=False) + return entity def _createAccessRequirementIfNone(self, entity): """ From 5406634e7dfefbd3f1dcbdabe9d1dce263d9cf38 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Thu, 18 Feb 2021 16:35:03 -0800 Subject: [PATCH 02/16] unit tests for _apply_provenance --- tests/unit/synapseclient/unit_test_client.py | 79 +++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/tests/unit/synapseclient/unit_test_client.py b/tests/unit/synapseclient/unit_test_client.py index c9eb95166..f7915ec2c 100644 --- a/tests/unit/synapseclient/unit_test_client.py +++ b/tests/unit/synapseclient/unit_test_client.py @@ -11,7 +11,7 @@ import uuid import pytest -from unittest.mock import ANY, call, create_autospec, Mock, patch +from unittest.mock import ANY, call, create_autospec, MagicMock, Mock, patch import synapseclient from synapseclient.annotations import convert_old_annotation_json @@ -35,6 +35,7 @@ SynapseFileNotFoundError, SynapseHTTPError, SynapseMd5MismatchError, + SynapseProvenanceError, SynapseUnmetAccessRestrictions, ) from synapseclient.core.upload import upload_functions @@ -2133,6 +2134,82 @@ def test_store__existing_no_update(syn): assert not mock_updatentity.called +def test_store_self_stored_obj__provenance_applied(syn): + """Verify that any object with its own _synapse_store mechanism (e.g. a table) will have + any passed provenance applied""" + + obj = Mock() + del obj._before_synapse_store + obj._synapse_store = lambda x: x + + activity_kwargs = { + 'activity': MagicMock(spec=synapseclient.Activity), + 'activityName': 'test name', + 'activityDescription': 'test description', + 'used': ['syn123'], + 'executed': ['syn456'], + } + + with patch.object(syn, '_apply_provenance') as mock_apply_provenance: + stored = syn.store(obj, **activity_kwargs) + assert mock_apply_provenance.called_once_with(obj, **activity_kwargs) + assert stored == mock_apply_provenance.return_value + + +def test_apply_provenance__duplicate_args(syn): + """Verify that a SynapseProvenanceError is raised if both used/executed and an Activity is passed""" + with pytest.raises(SynapseProvenanceError): + syn._apply_provenance( + Mock(), + activity=MagicMock(spec=synapseclient.Activity), + used=['syn123'], + executed=['syn456'], + activityName='test name', + activityDescription='test description', + ) + + +def test_apply_provenance__activity(syn): + """Verify _apply_provenance behavior when an Activity is passed""" + + obj = Mock() + activity = synapseclient.Activity(used=['syn123']) + with patch.object(syn, 'setProvenance') as mock_set_provenance, \ + patch.object(syn, '_getEntity') as mock_get_entity: + result = syn._apply_provenance(obj, activity=activity) + + mock_set_provenance.assert_called_once_with(obj, activity) + mock_get_entity.assert_called_once_with(obj) + assert result is mock_get_entity.return_value + + +def test_apply_provenance__used_executed(syn): + """Verify _apply_provenance behavior with used and executed args""" + + obj = Mock() + + used = ['syn123'] + executed = ['syn456'] + name = 'test name' + description = 'test description' + + expected_activity = synapseclient.Activity(used=used, executed=executed, name=name, description=description) + + with patch.object(syn, 'setProvenance') as mock_set_provenance, \ + patch.object(syn, '_getEntity') as mock_get_entity: + result = syn._apply_provenance( + obj, + used=used, + executed=executed, + activityName=name, + activityDescription=description + ) + + mock_set_provenance.assert_called_once_with(obj, expected_activity) + mock_get_entity.assert_called_once_with(obj) + assert result is mock_get_entity.return_value + + def test_get_submission_with_annotations(syn): """Verify a getSubmission with annotation entityBundleJSON that uses the old style annotations is converted to bundle v2 style From 9385caed4e8929f7ba9c80f1842049e926cc8ec4 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Wed, 24 Mar 2021 10:25:01 -0700 Subject: [PATCH 03/16] don't stream responses, we already download in range requests and streaming the response can result in connection errors in the file writing thread as the socket is streamed. also attempt to retry ConnectionResetErrors in part downloads --- .../multithread_download/download_threads.py | 34 ++++++--- synapseclient/core/retry.py | 32 +++++--- .../unit_test_download_threads.py | 76 +++++++++++++++++-- .../synapseclient/core/unit_test_retry.py | 21 +++++ 4 files changed, 136 insertions(+), 27 deletions(-) diff --git a/synapseclient/core/multithread_download/download_threads.py b/synapseclient/core/multithread_download/download_threads.py index ff5dfb5ab..0a4e00367 100644 --- a/synapseclient/core/multithread_download/download_threads.py +++ b/synapseclient/core/multithread_download/download_threads.py @@ -17,10 +17,10 @@ from synapseclient.core.exceptions import SynapseError from synapseclient.core.pool_provider import get_executor +from synapseclient.core.retry import with_retry # constants MAX_QUEUE_SIZE: int = 20 -MAX_RETRIES: int = 20 MiB: int = 2 ** 20 SYNAPSE_DEFAULT_DOWNLOAD_PART_SIZE: int = 8 * MiB ISO_AWS_STR_FORMAT: str = '%Y%m%dT%H%M%SZ' @@ -324,16 +324,28 @@ def download_file(self, request): def _get_response_with_retry(presigned_url_provider, start: int, end: int) -> Response: session = _get_thread_session() range_header = {'Range': f'bytes={start}-{end}'} - response = session.get(presigned_url_provider.get_info().url, headers=range_header, stream=True) - # try request until successful or out of retries - try_counter = 1 - while response.status_code != HTTPStatus.PARTIAL_CONTENT: - if try_counter >= MAX_RETRIES: - raise SynapseError( - f'Could not download the file: {presigned_url_provider.get_info().file_name},' - f' please try again.') - response = session.get(presigned_url_provider.get_info().url, headers=range_header, stream=True) - try_counter += 1 + + def session_get(): + return session.get(presigned_url_provider.get_info().url, headers=range_header) + + response = None + cause = None + try: + response = with_retry( + session_get, + retry_status_code_in=False, + retry_status_codes=(HTTPStatus.PARTIAL_CONTENT,), + retry_exceptions=(ConnectionResetError,) + ) + except Exception as ex: + cause = ex + + if not response or response.status_code != HTTPStatus.PARTIAL_CONTENT: + raise SynapseError( + f'Could not download the file: {presigned_url_provider.get_info().file_name},' + f' please try again.' + ) from cause + return start, response @staticmethod diff --git a/synapseclient/core/retry.py b/synapseclient/core/retry.py index 7985ff51d..600e962ce 100644 --- a/synapseclient/core/retry.py +++ b/synapseclient/core/retry.py @@ -6,20 +6,29 @@ from synapseclient.core.utils import is_json from synapseclient.core.dozer import doze +DEFAULT_RETRIES = 3 +DEFAULT_WAIT = 1 +DEFAULT_BACK_OFF = 2 +DEFAULT_MAX_WAIT = 30 + def with_retry(function, verbose=False, - retry_status_codes=[429, 500, 502, 503, 504], retry_errors=[], retry_exceptions=[], - retries=3, wait=1, back_off=2, max_wait=30): + retry_status_codes=[429, 500, 502, 503, 504], retry_status_code_in=True, + retry_errors=[], retry_exceptions=[], + retries=DEFAULT_RETRIES, wait=DEFAULT_WAIT, back_off=DEFAULT_BACK_OFF, max_wait=DEFAULT_MAX_WAIT): """ Retries the given function under certain conditions. - :param function: A function with no arguments. If arguments are needed, use a lambda (see example). - :param retry_status_codes: What status codes to retry upon in the case of a SynapseHTTPError. - :param retry_errors: What reasons to retry upon, if function().response.json()['reason'] exists. - :param retry_exceptions: What types of exceptions, specified as strings, to retry upon. - :param retries: How many times to retry maximum. - :param wait: How many seconds to wait between retries. - :param back_off: Exponential constant to increase wait for between progressive failures. + :param function: A function with no arguments. If arguments are needed, use a lambda (see example). + :param retry_status_codes: What status codes to retry upon in the case of a SynapseHTTPError. + :param retry_status_code_in: True if should retry if the status code is in retry_status_codes, + False if should retry unless the status code is in retry_status_codes + :param retry_errors: What reasons to retry upon, if function().response.json()['reason'] exists. + :param retry_exceptions: What types of exceptions, specified as strings or Exception classes, to retry upon. + :param retries: How many times to retry maximum. + :param wait: How many seconds to wait between retries. + :param back_off: Exponential constant to increase wait for between progressive failures. + :param max_wait: back_off between requests will not exceed this value :returns: function() @@ -55,7 +64,10 @@ def foo(a, b, c): return [a, b, c] # Check if we got a retry-able error if response is not None and hasattr(response, 'status_code'): - if response.status_code in retry_status_codes: + if ( + (retry_status_code_in and response.status_code in retry_status_codes) or + (not retry_status_code_in and response.status_code not in retry_status_codes) + ): response_message = _get_message(response) retry = True logger.debug("retrying on status code: %s" % str(response.status_code)) diff --git a/tests/unit/synapseclient/core/multithread_download/unit_test_download_threads.py b/tests/unit/synapseclient/core/multithread_download/unit_test_download_threads.py index d19dd4645..93bef4662 100644 --- a/tests/unit/synapseclient/core/multithread_download/unit_test_download_threads.py +++ b/tests/unit/synapseclient/core/multithread_download/unit_test_download_threads.py @@ -16,6 +16,7 @@ PresignedUrlProvider, TransferStatus, ) +from synapseclient.core.retry import DEFAULT_RETRIES from synapseclient import Synapse from synapseclient.core.exceptions import SynapseError @@ -478,14 +479,12 @@ def test_get_response_with_retry__exceed_max_retries(self, mock_get_thread_sessi downloader._get_response_with_retry(mock_presigned_url_provider, start, end) expected_call_list = [ - mock.call( - presigned_url_info.url, headers={"Range": "bytes=5-42"}, stream=True - ) - ] * download_threads.MAX_RETRIES + mock.call(presigned_url_info.url, headers={"Range": "bytes=5-42"}) + ] * (DEFAULT_RETRIES + 1) assert expected_call_list == mock_requests_session.get.call_args_list @mock.patch.object(download_threads, "_get_thread_session") - def test_get_response_with_retry__partial_content_reponse(self, mock_get_thread_session): + def test_get_response_with_retry__partial_content_response(self, mock_get_thread_session): mock_requests_response = mock.Mock(status_code=206) mock_requests_session = mock.create_autospec(requests.Session) mock_requests_session.get.return_value = mock_requests_response @@ -510,9 +509,74 @@ def test_get_response_with_retry__partial_content_reponse(self, mock_get_thread_ mock_requests_session.get.assert_called_once_with( presigned_url_info.url, headers={"Range": "bytes=5-42"}, - stream=True ) + @mock.patch.object(download_threads, "_get_thread_session") + def test_get_response_with_retry__connection_reset(self, mock_get_thread_session): + """Verify a ConnectionResetError during a part download will be retried""" + + mock_requests_response = mock.Mock(status_code=206) + mock_requests_session = mock.create_autospec(requests.Session) + mock_requests_session.get.side_effect = [ + ConnectionResetError(), + mock_requests_response + ] + mock_get_thread_session.return_value = mock_requests_session + + mock_presigned_url_provider = mock.create_autospec(download_threads.PresignedUrlProvider) + presigned_url_info = download_threads.PresignedUrlInfo( + "foo.txt", "synapse.org/foo.txt", + datetime.datetime.utcnow() + ) + + mock_presigned_url_provider.get_info.return_value = presigned_url_info + start = 5 + end = 42 + + mock_syn = mock.Mock(spec=Synapse) + mock_executor = mock.Mock(spec=concurrent.futures.Executor) + downloader = _MultithreadedDownloader(mock_syn, mock_executor, 5) + assert ( + (start, mock_requests_response) == + downloader._get_response_with_retry(mock_presigned_url_provider, start, end) + ) + + expected_get_call_args_list = [mock.call(presigned_url_info.url, headers={"Range": "bytes=5-42"})] * 2 + assert mock_requests_session.get.call_args_list == expected_get_call_args_list + + @mock.patch.object(download_threads, "_get_thread_session") + def test_get_response_with_retry__error_status(self, mock_get_thread_session): + """Verify an errored status code during a part download will be retried""" + mock_requests_error_response = mock.Mock(status_code=500) + mock_requests_response = mock.Mock(status_code=206) + mock_requests_session = mock.create_autospec(requests.Session) + mock_requests_session.get.side_effect = [ + mock_requests_error_response, + mock_requests_response, + ] + mock_get_thread_session.return_value = mock_requests_session + + mock_presigned_url_provider = mock.create_autospec(download_threads.PresignedUrlProvider) + presigned_url_info = download_threads.PresignedUrlInfo( + "foo.txt", "synapse.org/foo.txt", + datetime.datetime.utcnow() + ) + + mock_presigned_url_provider.get_info.return_value = presigned_url_info + start = 5 + end = 42 + + mock_syn = mock.Mock(spec=Synapse) + mock_executor = mock.Mock(spec=concurrent.futures.Executor) + downloader = _MultithreadedDownloader(mock_syn, mock_executor, 5) + assert ( + (start, mock_requests_response) == + downloader._get_response_with_retry(mock_presigned_url_provider, start, end) + ) + + expected_get_call_args_list = [mock.call(presigned_url_info.url, headers={"Range": "bytes=5-42"})] * 2 + assert mock_requests_session.get.call_args_list == expected_get_call_args_list + def test_shared_executor(): """Test the shared_executor contextmanager which should set up thread_local Executor""" diff --git a/tests/unit/synapseclient/core/unit_test_retry.py b/tests/unit/synapseclient/core/unit_test_retry.py index e9e93768b..065075821 100644 --- a/tests/unit/synapseclient/core/unit_test_retry.py +++ b/tests/unit/synapseclient/core/unit_test_retry.py @@ -1,3 +1,5 @@ +from requests import Response + import pytest from unittest.mock import MagicMock @@ -55,6 +57,25 @@ def foo(): assert function.call_count == 1 + 4 + 3 + 4 + 1 +def test_with_retry__status_code_not_in(): + """Verify using retry with retry_status_code_in=False to retry unless thee status code matches""" + + non_matching_response = MagicMock(spec=Response) + non_matching_response.status_code = 200 + + matching_response = MagicMock(spec=Response) + matching_response.status_code = 201 + + fn = MagicMock() + fn.side_effect = [ + non_matching_response, + matching_response, + ] + + response = with_retry(fn, retry_status_codes=[201], retry_status_code_in=False) + assert response == matching_response + + def test_with_retry__no_status_code(): """Verify that with_retry can also be used on any function even whose return values don't have status_codes. From b9480546f5a044267b90223b695a8d026b46e387 Mon Sep 17 00:00:00 2001 From: linchiahui Date: Thu, 1 Apr 2021 11:42:09 -0700 Subject: [PATCH 04/16] fix the separator path for Windows --- synapseclient/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapseclient/client.py b/synapseclient/client.py index 4fd92231c..eebc04234 100644 --- a/synapseclient/client.py +++ b/synapseclient/client.py @@ -907,7 +907,7 @@ def _download_file_entity(self, downloadLocation, entity, ifcollision, submissio if downloadPath is None or not os.path.exists(downloadPath): return - entity.path = downloadPath + entity.path = os.path.normpath(downloadPath) entity.files = [os.path.basename(downloadPath)] entity.cacheDir = os.path.dirname(downloadPath) From 436c22fb54a09f894c0bdca8ab71e12b035363c4 Mon Sep 17 00:00:00 2001 From: linchiahui Date: Thu, 1 Apr 2021 15:02:38 -0700 Subject: [PATCH 05/16] fix the issue for unit test test_download_file_entity__correct_local_state --- tests/unit/synapseclient/core/unit_test_download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/synapseclient/core/unit_test_download.py b/tests/unit/synapseclient/core/unit_test_download.py index 5f57c8a4a..f94c950a8 100644 --- a/tests/unit/synapseclient/core/unit_test_download.py +++ b/tests/unit/synapseclient/core/unit_test_download.py @@ -520,7 +520,7 @@ def test_download_file_entity__correct_local_state(syn): with patch.object(syn.cache, 'get', return_value=mock_cache_path): syn._download_file_entity(downloadLocation=None, entity=file_entity, ifcollision="overwrite.local", submission=None) - assert mock_cache_path == file_entity.path + assert mock_cache_path == utils.normalize_path(file_entity.path) assert os.path.dirname(mock_cache_path) == file_entity.cacheDir assert 1 == len(file_entity.files) assert os.path.basename(mock_cache_path) == file_entity.files[0] From 5006f5c7a449bfdb8324e5991a23adfd2d1a42dc Mon Sep 17 00:00:00 2001 From: linchiahui Date: Thu, 1 Apr 2021 16:05:28 -0700 Subject: [PATCH 06/16] fix the issue for integrarion test the get method return file's path property is not properly for Windows assertion --- tests/integration/synapseclient/integration_test_Entity.py | 4 ++-- tests/integration/synapseutils/test_synapseutils_sync.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/synapseclient/integration_test_Entity.py b/tests/integration/synapseclient/integration_test_Entity.py index 42617c067..5c15c2473 100644 --- a/tests/integration/synapseclient/integration_test_Entity.py +++ b/tests/integration/synapseclient/integration_test_Entity.py @@ -507,7 +507,7 @@ def test_download_local_file_URL_path(syn, project, schedule_for_cleanup): localFileEntity = syn.store(File(dataFileHandleId=filehandle['id'], parent=project)) e = syn.get(localFileEntity.id) - assert path == e.path + assert path == utils.normalize_path(e.path) # SYNPY-424 @@ -566,7 +566,7 @@ def test_store__changing_externalURL_by_changing_path(syn, project, schedule_for assert ext.externalURL != url assert utils.normalize_path(temp_path) == utils.file_url_to_path(ext.externalURL) - assert temp_path == ext.path + assert temp_path == utils.normalize_path(ext.path) assert not ext.synapseStore diff --git a/tests/integration/synapseutils/test_synapseutils_sync.py b/tests/integration/synapseutils/test_synapseutils_sync.py index b488f793a..37cea649e 100644 --- a/tests/integration/synapseutils/test_synapseutils_sync.py +++ b/tests/integration/synapseutils/test_synapseutils_sync.py @@ -173,7 +173,7 @@ def test_syncFromSynapse(test_state): assert len(output) == len(uploaded_paths) for f in output: - assert f.path in uploaded_paths + assert utils.normalize_path(f.path) in uploaded_paths def test_syncFromSynapse__children_contain_non_file(test_state): @@ -240,7 +240,7 @@ def test_syncFromSynapse_Links(test_state): assert len(output) == len(uploaded_paths) for f in output: - assert f.path in uploaded_paths + assert utils.normalize_path(f.path) in uploaded_paths def test_write_manifest_data__unicode_characters_in_rows(test_state): From d78b23fcd51a427432bd9f635f83f43416e8bce8 Mon Sep 17 00:00:00 2001 From: linchiahui Date: Fri, 2 Apr 2021 15:12:52 -0700 Subject: [PATCH 07/16] add comment for the os_path_normpath --- synapseclient/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapseclient/client.py b/synapseclient/client.py index eebc04234..8100da174 100644 --- a/synapseclient/client.py +++ b/synapseclient/client.py @@ -907,6 +907,7 @@ def _download_file_entity(self, downloadLocation, entity, ifcollision, submissio if downloadPath is None or not os.path.exists(downloadPath): return + # converts the path format from forward slashes back to backward slashes on Windows entity.path = os.path.normpath(downloadPath) entity.files = [os.path.basename(downloadPath)] entity.cacheDir = os.path.dirname(downloadPath) From 56ca7fe5b9fb033087dfdec36410adfddaf7fac5 Mon Sep 17 00:00:00 2001 From: Chia-Hui Lin Date: Thu, 22 Apr 2021 12:02:00 -0700 Subject: [PATCH 08/16] add check method for the synapse id in get function --- synapseclient/__main__.py | 11 +++++++++-- tests/unit/synapseclient/unit_test_commandline.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/synapseclient/__main__.py b/synapseclient/__main__.py index 296b366f7..6117ac474 100644 --- a/synapseclient/__main__.py +++ b/synapseclient/__main__.py @@ -92,6 +92,7 @@ def get(args, syn): if args.recursive: if args.version is not None: raise ValueError('You cannot specify a version making a recursive download.') + _check_args_with_id(args) synapseutils.syncFromSynapse(syn, args.id, args.downloadLocation, followLink=args.followLink, manifest=args.manifest) elif args.queryString is not None: @@ -101,6 +102,7 @@ def get(args, syn): for id in ids: syn.get(id, downloadLocation=args.downloadLocation) else: + _check_args_with_id(args) # search by MD5 if isinstance(args.id, str) and os.path.isfile(args.id): entity = syn.get(args.id, version=args.version, limitSearch=args.limitSearch, downloadFile=False) @@ -121,6 +123,11 @@ def get(args, syn): syn.logger.info('Creating %s', entity.path) +def _check_args_with_id(args): + if args.id is None: + raise ValueError(f'For the {args.subparser} command, the following synapse ID sucha as syn123 are required') + + def sync(args, syn): synapseutils.syncToSynapse(syn, manifestFile=args.manifestFile, dryRun=args.dryRun, sendMessages=args.sendMessages, @@ -561,7 +568,7 @@ def build_parser(): parser.add_argument('-s', '--skip-checks', dest='skip_checks', action='store_true', help='suppress checking for version upgrade messages and endpoint redirection') - subparsers = parser.add_subparsers(title='commands', + subparsers = parser.add_subparsers(title='commands', dest='subparser', description='The following commands are available:', help='For additional help: "synapse -h"') @@ -585,7 +592,7 @@ def build_parser(): default=True, help='Download file using a multiple threaded implementation. ' 'This flag will be removed in the future when multi-threaded download ' 'is deemed fully stable and becomes the default implementation.') - parser_get.add_argument('id', metavar='syn123', nargs='?', type=str, + parser_get.add_argument('id', metavar='syn123 or local_file_path', nargs='?', type=str, help='Synapse ID of form syn123 of desired data object.') # add no manifest option parser_get.add_argument('--manifest', type=str, choices=['all', 'root', 'suppress'], diff --git a/tests/unit/synapseclient/unit_test_commandline.py b/tests/unit/synapseclient/unit_test_commandline.py index 79c6c96bd..1b029e0ae 100644 --- a/tests/unit/synapseclient/unit_test_commandline.py +++ b/tests/unit/synapseclient/unit_test_commandline.py @@ -572,3 +572,18 @@ def test_get__with_normal_id(self, mock_os): call(mock_entity), call('Downloaded file: %s', './base_tmp_path'), call('Creating %s', './tmp_path')] + + def test_get__without_synapse_id(self): + # test normal get command without synapse ID + parser = cmdline.build_parser() + with pytest.raises(ValueError) as ve: + args = parser.parse_args(['get']) + cmdline.get(args, self.syn) + assert str(ve.value) == "For the get command, the following synapse ID sucha as syn123 are required" + + # test get command with -r but without synapse ID + parser = cmdline.build_parser() + with pytest.raises(ValueError) as ve: + args = parser.parse_args(['get', '-r']) + cmdline.get(args, self.syn) + assert str(ve.value) == "For the get command, the following synapse ID sucha as syn123 are required" From 70a64337935f4860d4449c812a2f9855eb4ca181 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Thu, 22 Apr 2021 16:05:59 -0700 Subject: [PATCH 09/16] pull standard retry params out of client.py s they can be re-used by other parts of the code --- synapseclient/client.py | 16 +++++++----- .../multithread_download/download_threads.py | 12 +++++++-- synapseclient/core/retry.py | 26 +++++++++++++++++++ 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/synapseclient/client.py b/synapseclient/client.py index 100f5b61f..b4b029319 100644 --- a/synapseclient/client.py +++ b/synapseclient/client.py @@ -94,7 +94,12 @@ from synapseclient.core.pool_provider import DEFAULT_NUM_THREADS from synapseclient.core.utils import id_of, get_properties, MB, memoize, is_json, extract_synapse_id_from_query, \ find_data_file_handle, extract_zip_file_to_directory, is_integer, require_param -from synapseclient.core.retry import with_retry +from synapseclient.core.retry import ( + with_retry, + DEFAULT_RETRY_STATUS_CODES, + RETRYABLE_CONNECTION_ERRORS, + RETRYABLE_CONNECTION_EXCEPTIONS, +) from synapseclient.core import sts_transfer from synapseclient.core.upload.multipart_upload import multipart_upload_file, multipart_upload_string from synapseclient.core.remote_file_storage_wrappers import S3ClientWrapper, SFTPWrapper @@ -127,12 +132,9 @@ # Defines the standard retry policy applied to the rest methods # The retry period needs to span a minute because sending messages is limited to 10 per 60 seconds. -STANDARD_RETRY_PARAMS = {"retry_status_codes": [429, 500, 502, 503, 504], - "retry_errors": ["proxy error", "slow down", "timeout", "timed out", - "connection reset by peer", "unknown ssl protocol error", - "couldn't connect to host", "slowdown", "try again", - "connection reset by peer"], - "retry_exceptions": ["ConnectionError", "Timeout", "timeout", "ChunkedEncodingError"], +STANDARD_RETRY_PARAMS = {"retry_status_codes": DEFAULT_RETRY_STATUS_CODES, + "retry_errors": RETRYABLE_CONNECTION_ERRORS, + "retry_exceptions": RETRYABLE_CONNECTION_EXCEPTIONS, "retries": 60, # Retries for up to about 30 minutes "wait": 1, "max_wait": 30, diff --git a/synapseclient/core/multithread_download/download_threads.py b/synapseclient/core/multithread_download/download_threads.py index 0a4e00367..f0e0bce3a 100644 --- a/synapseclient/core/multithread_download/download_threads.py +++ b/synapseclient/core/multithread_download/download_threads.py @@ -17,7 +17,11 @@ from synapseclient.core.exceptions import SynapseError from synapseclient.core.pool_provider import get_executor -from synapseclient.core.retry import with_retry +from synapseclient.core.retry import ( + with_retry, + RETRYABLE_CONNECTION_ERRORS, + RETRYABLE_CONNECTION_EXCEPTIONS, +) # constants MAX_QUEUE_SIZE: int = 20 @@ -331,11 +335,15 @@ def session_get(): response = None cause = None try: + # currently when doing a range request to AWS we retry on anything other than a 206. + # this seems a bit excessive (i.e. some 400 statuses would suggest a non-retryable) + # but for now matching previous behavior. response = with_retry( session_get, retry_status_code_in=False, retry_status_codes=(HTTPStatus.PARTIAL_CONTENT,), - retry_exceptions=(ConnectionResetError,) + retry_errors=RETRYABLE_CONNECTION_ERRORS, + retry_exceptions=RETRYABLE_CONNECTION_EXCEPTIONS, ) except Exception as ex: cause = ex diff --git a/synapseclient/core/retry.py b/synapseclient/core/retry.py index 600e962ce..6057ae1de 100644 --- a/synapseclient/core/retry.py +++ b/synapseclient/core/retry.py @@ -11,6 +11,32 @@ DEFAULT_BACK_OFF = 2 DEFAULT_MAX_WAIT = 30 +DEFAULT_RETRY_STATUS_CODES = [429, 500, 502, 503, 504] + +# strings that may appear in responses that suggest a retryable condition +RETRYABLE_CONNECTION_ERRORS = [ + "proxy error", + "slow down", + "timeout", + "timed out", + "connection reset by peer", + "unknown ssl protocol error", + "couldn't connect to host", + "slowdown", + "try again", + "connection reset by peer", +] + +# Exceptions that may be retryable. These are socket level exceptions +# not associated with an HTTP response +RETRYABLE_CONNECTION_EXCEPTIONS = [ + "ChunkedEncodingError", + "ConnectionError", + 'ConnectionResetError', + "Timeout", + "timeout", +] + def with_retry(function, verbose=False, retry_status_codes=[429, 500, 502, 503, 504], retry_status_code_in=True, From 2a72244cbc53948bdc89f02cab16cb7f7fe1d030 Mon Sep 17 00:00:00 2001 From: Chia-Hui Lin Date: Thu, 22 Apr 2021 16:13:58 -0700 Subject: [PATCH 10/16] change some text and function name --- synapseclient/__main__.py | 10 +++++----- tests/unit/synapseclient/unit_test_commandline.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/synapseclient/__main__.py b/synapseclient/__main__.py index 6117ac474..af12644b9 100644 --- a/synapseclient/__main__.py +++ b/synapseclient/__main__.py @@ -92,7 +92,7 @@ def get(args, syn): if args.recursive: if args.version is not None: raise ValueError('You cannot specify a version making a recursive download.') - _check_args_with_id(args) + _validate_id_arg(args) synapseutils.syncFromSynapse(syn, args.id, args.downloadLocation, followLink=args.followLink, manifest=args.manifest) elif args.queryString is not None: @@ -102,7 +102,7 @@ def get(args, syn): for id in ids: syn.get(id, downloadLocation=args.downloadLocation) else: - _check_args_with_id(args) + _validate_id_arg(args) # search by MD5 if isinstance(args.id, str) and os.path.isfile(args.id): entity = syn.get(args.id, version=args.version, limitSearch=args.limitSearch, downloadFile=False) @@ -123,9 +123,9 @@ def get(args, syn): syn.logger.info('Creating %s', entity.path) -def _check_args_with_id(args): +def _validate_id_arg(args): if args.id is None: - raise ValueError(f'For the {args.subparser} command, the following synapse ID sucha as syn123 are required') + raise ValueError(f'Missing expected id argument for use with the {args.subparser} command') def sync(args, syn): @@ -592,7 +592,7 @@ def build_parser(): default=True, help='Download file using a multiple threaded implementation. ' 'This flag will be removed in the future when multi-threaded download ' 'is deemed fully stable and becomes the default implementation.') - parser_get.add_argument('id', metavar='syn123 or local_file_path', nargs='?', type=str, + parser_get.add_argument('id', metavar='local path', nargs='?', type=str, help='Synapse ID of form syn123 of desired data object.') # add no manifest option parser_get.add_argument('--manifest', type=str, choices=['all', 'root', 'suppress'], diff --git a/tests/unit/synapseclient/unit_test_commandline.py b/tests/unit/synapseclient/unit_test_commandline.py index 1b029e0ae..2ec082aa9 100644 --- a/tests/unit/synapseclient/unit_test_commandline.py +++ b/tests/unit/synapseclient/unit_test_commandline.py @@ -579,11 +579,11 @@ def test_get__without_synapse_id(self): with pytest.raises(ValueError) as ve: args = parser.parse_args(['get']) cmdline.get(args, self.syn) - assert str(ve.value) == "For the get command, the following synapse ID sucha as syn123 are required" + assert str(ve.value) == "Missing expected id argument for use with the get command" # test get command with -r but without synapse ID parser = cmdline.build_parser() with pytest.raises(ValueError) as ve: args = parser.parse_args(['get', '-r']) cmdline.get(args, self.syn) - assert str(ve.value) == "For the get command, the following synapse ID sucha as syn123 are required" + assert str(ve.value) == "Missing expected id argument for use with the get command" From ae4bea492bed2e35e4d3b972eae669dcf5d93119 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Thu, 22 Apr 2021 16:31:03 -0700 Subject: [PATCH 11/16] comment cleanup --- synapseclient/core/multithread_download/download_threads.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapseclient/core/multithread_download/download_threads.py b/synapseclient/core/multithread_download/download_threads.py index f0e0bce3a..ed011e33d 100644 --- a/synapseclient/core/multithread_download/download_threads.py +++ b/synapseclient/core/multithread_download/download_threads.py @@ -336,7 +336,7 @@ def session_get(): cause = None try: # currently when doing a range request to AWS we retry on anything other than a 206. - # this seems a bit excessive (i.e. some 400 statuses would suggest a non-retryable) + # this seems a bit excessive (i.e. some 400 statuses would suggest a non-retryable condition) # but for now matching previous behavior. response = with_retry( session_get, From 37c993fa02f84cb2654dc384236b6008d78c224b Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Thu, 22 Apr 2021 20:29:12 -0700 Subject: [PATCH 12/16] replace boolean inverter with separate expected_status_codes list --- .../core/multithread_download/download_threads.py | 3 +-- synapseclient/core/retry.py | 9 ++++----- tests/unit/synapseclient/core/unit_test_retry.py | 6 +++--- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/synapseclient/core/multithread_download/download_threads.py b/synapseclient/core/multithread_download/download_threads.py index ed011e33d..cce4e7272 100644 --- a/synapseclient/core/multithread_download/download_threads.py +++ b/synapseclient/core/multithread_download/download_threads.py @@ -340,8 +340,7 @@ def session_get(): # but for now matching previous behavior. response = with_retry( session_get, - retry_status_code_in=False, - retry_status_codes=(HTTPStatus.PARTIAL_CONTENT,), + expected_status_codes=(HTTPStatus.PARTIAL_CONTENT,), retry_errors=RETRYABLE_CONNECTION_ERRORS, retry_exceptions=RETRYABLE_CONNECTION_EXCEPTIONS, ) diff --git a/synapseclient/core/retry.py b/synapseclient/core/retry.py index 6057ae1de..ebe65cd77 100644 --- a/synapseclient/core/retry.py +++ b/synapseclient/core/retry.py @@ -39,7 +39,7 @@ def with_retry(function, verbose=False, - retry_status_codes=[429, 500, 502, 503, 504], retry_status_code_in=True, + retry_status_codes=[429, 500, 502, 503, 504], expected_status_codes=[], retry_errors=[], retry_exceptions=[], retries=DEFAULT_RETRIES, wait=DEFAULT_WAIT, back_off=DEFAULT_BACK_OFF, max_wait=DEFAULT_MAX_WAIT): """ @@ -47,8 +47,7 @@ def with_retry(function, verbose=False, :param function: A function with no arguments. If arguments are needed, use a lambda (see example). :param retry_status_codes: What status codes to retry upon in the case of a SynapseHTTPError. - :param retry_status_code_in: True if should retry if the status code is in retry_status_codes, - False if should retry unless the status code is in retry_status_codes + :param expected_status_codes: If specified responses with any other status codes result in a retry. :param retry_errors: What reasons to retry upon, if function().response.json()['reason'] exists. :param retry_exceptions: What types of exceptions, specified as strings or Exception classes, to retry upon. :param retries: How many times to retry maximum. @@ -91,8 +90,8 @@ def foo(a, b, c): return [a, b, c] # Check if we got a retry-able error if response is not None and hasattr(response, 'status_code'): if ( - (retry_status_code_in and response.status_code in retry_status_codes) or - (not retry_status_code_in and response.status_code not in retry_status_codes) + (expected_status_codes and response.status_code not in expected_status_codes) or + response.status_code in retry_status_codes ): response_message = _get_message(response) retry = True diff --git a/tests/unit/synapseclient/core/unit_test_retry.py b/tests/unit/synapseclient/core/unit_test_retry.py index 065075821..23506be4a 100644 --- a/tests/unit/synapseclient/core/unit_test_retry.py +++ b/tests/unit/synapseclient/core/unit_test_retry.py @@ -57,8 +57,8 @@ def foo(): assert function.call_count == 1 + 4 + 3 + 4 + 1 -def test_with_retry__status_code_not_in(): - """Verify using retry with retry_status_code_in=False to retry unless thee status code matches""" +def test_with_retry__expected_status_code(): + """Verify using retry expected_status_codes""" non_matching_response = MagicMock(spec=Response) non_matching_response.status_code = 200 @@ -72,7 +72,7 @@ def test_with_retry__status_code_not_in(): matching_response, ] - response = with_retry(fn, retry_status_codes=[201], retry_status_code_in=False) + response = with_retry(fn, expected_status_codes=[201]) assert response == matching_response From 131c71a314b953e966a870c7a2d7ad5d3e671414 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Fri, 23 Apr 2021 11:26:57 -0700 Subject: [PATCH 13/16] null check retry_status_codes --- synapseclient/core/retry.py | 2 +- .../synapseclient/core/unit_test_retry.py | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/synapseclient/core/retry.py b/synapseclient/core/retry.py index ebe65cd77..3f022fa97 100644 --- a/synapseclient/core/retry.py +++ b/synapseclient/core/retry.py @@ -91,7 +91,7 @@ def foo(a, b, c): return [a, b, c] if response is not None and hasattr(response, 'status_code'): if ( (expected_status_codes and response.status_code not in expected_status_codes) or - response.status_code in retry_status_codes + (retry_status_codes and response.status_code in retry_status_codes) ): response_message = _get_message(response) retry = True diff --git a/tests/unit/synapseclient/core/unit_test_retry.py b/tests/unit/synapseclient/core/unit_test_retry.py index 23506be4a..a2201b25e 100644 --- a/tests/unit/synapseclient/core/unit_test_retry.py +++ b/tests/unit/synapseclient/core/unit_test_retry.py @@ -57,6 +57,30 @@ def foo(): assert function.call_count == 1 + 4 + 3 + 4 + 1 +@pytest.mark.parametrize('values', ( + None, + [], + tuple(), +)) +def test_with_retry__empty_status_codes(values): + """Verify that passing some Falsey values for the various sequence args is ok""" + response = MagicMock(spec=Response) + response.status_code = 200 + + fn = MagicMock() + fn.return_value = response + + # no unexpected exceptions etc should be raised + returned_response = with_retry( + fn, + retry_status_codes=values, + expected_status_codes=values, + retry_exceptions=values, + retry_errors=values, + ) + assert returned_response == response + + def test_with_retry__expected_status_code(): """Verify using retry expected_status_codes""" From ce84262fbec02795b3c9d59429932a0f751b0ec4 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Tue, 27 Apr 2021 09:02:49 -0700 Subject: [PATCH 14/16] rm repeated word --- synapseclient/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapseclient/client.py b/synapseclient/client.py index e3c8c98c8..4e75da42b 100644 --- a/synapseclient/client.py +++ b/synapseclient/client.py @@ -3219,7 +3219,7 @@ def create_snapshot_version(self, table: typing.Union[EntityViewSchema, Schema, :param label: Optional snapshot label. :param activity: Optional activity ID applied to snapshot version. :param wait: True if this method should return the snapshot version after waiting for any necessary - asynchronous table updates to complete. If False this method will return return + asynchronous table updates to complete. If False this method will return as soon as any updates are initiated. :return: the snapshot version number if wait=True, None if wait=False """ From e56dca9bf49dabd2c5a2148ce8e80d9ca4e25c48 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Thu, 29 Apr 2021 11:07:19 -0700 Subject: [PATCH 15/16] rm deprecated status field from Evaluation --- synapseclient/evaluation.py | 3 --- tests/integration/synapseclient/test_evaluations.py | 10 +--------- tests/unit/synapseclient/unit_test_Evaluation.py | 6 +++--- 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/synapseclient/evaluation.py b/synapseclient/evaluation.py index 4e1ec331f..8038e1045 100644 --- a/synapseclient/evaluation.py +++ b/synapseclient/evaluation.py @@ -130,10 +130,7 @@ def getURI(cls, id): return '/evaluation/%s' % id def __init__(self, **kwargs): - kwargs['status'] = kwargs.get('status', 'OPEN') kwargs['contentSource'] = kwargs.get('contentSource', '') - if kwargs['status'] not in ['OPEN', 'PLANNED', 'CLOSED', 'COMPLETED']: - raise ValueError('Evaluation Status must be one of [OPEN, PLANNED, CLOSED, COMPLETED]') if not kwargs['contentSource'].startswith('syn'): # Verify that synapse Id given raise ValueError('The "contentSource" parameter must be specified as a Synapse Entity when creating an' ' Evaluation') diff --git a/tests/integration/synapseclient/test_evaluations.py b/tests/integration/synapseclient/test_evaluations.py index 95a3f12c8..a6f020cd9 100644 --- a/tests/integration/synapseclient/test_evaluations.py +++ b/tests/integration/synapseclient/test_evaluations.py @@ -15,7 +15,7 @@ def test_evaluations(syn, project, schedule_for_cleanup): # Create an Evaluation name = 'Test Evaluation %s' % str(uuid.uuid4()) ev = Evaluation(name=name, description='Evaluation for testing', - contentSource=project['id'], status='CLOSED') + contentSource=project['id']) ev = syn.store(ev) try: @@ -29,7 +29,6 @@ def test_evaluations(syn, project, schedule_for_cleanup): assert ev['id'] == evalNamed['id'] assert ev['name'] == evalNamed['name'] assert ev['ownerId'] == evalNamed['ownerId'] - assert ev['status'] == evalNamed['status'] # -- Get the Evaluation by project evalProj = syn.getEvaluationByContentSource(project) @@ -41,13 +40,6 @@ def test_evaluations(syn, project, schedule_for_cleanup): assert ev['id'] == evalProj['id'] assert ev['name'] == evalProj['name'] assert ev['ownerId'] == evalProj['ownerId'] - assert ev['status'] == evalProj['status'] - - # Update the Evaluation - ev['status'] = 'OPEN' - ev = syn.store(ev, createOrUpdate=True) - schedule_for_cleanup(ev) - assert ev.status == 'OPEN' # Add the current user as a participant myOwnerId = int(syn.getUserProfile()['ownerId']) diff --git a/tests/unit/synapseclient/unit_test_Evaluation.py b/tests/unit/synapseclient/unit_test_Evaluation.py index 63eba06cb..9d34a5a00 100644 --- a/tests/unit/synapseclient/unit_test_Evaluation.py +++ b/tests/unit/synapseclient/unit_test_Evaluation.py @@ -9,9 +9,9 @@ def test_Evaluation(): """Test the construction and accessors of Evaluation objects.""" - # Status can only be one of ['OPEN', 'PLANNED', 'CLOSED', 'COMPLETED'] - pytest.raises(ValueError, Evaluation, name='foo', description='bar', status='BAH') - pytest.raises(ValueError, Evaluation, name='foo', description='bar', status='OPEN', contentSource='a') + # contentSource must be specified and must be a synapse id + pytest.raises(ValueError, Evaluation, name='foo', description='bar') + pytest.raises(ValueError, Evaluation, name='foo', description='bar', contentSource='a') # Assert that the values are ev = Evaluation(name='foobar2', description='bar', status='OPEN', contentSource='syn1234') From 6f70dc09a71d61d155bb4266d30ba41a38b6fbd7 Mon Sep 17 00:00:00 2001 From: Jordan Kiang Date: Thu, 29 Apr 2021 15:59:49 -0700 Subject: [PATCH 16/16] assert the expected exception is being raised --- tests/unit/synapseclient/unit_test_Evaluation.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/unit/synapseclient/unit_test_Evaluation.py b/tests/unit/synapseclient/unit_test_Evaluation.py index 9d34a5a00..94cadef78 100644 --- a/tests/unit/synapseclient/unit_test_Evaluation.py +++ b/tests/unit/synapseclient/unit_test_Evaluation.py @@ -9,15 +9,18 @@ def test_Evaluation(): """Test the construction and accessors of Evaluation objects.""" - # contentSource must be specified and must be a synapse id - pytest.raises(ValueError, Evaluation, name='foo', description='bar') - pytest.raises(ValueError, Evaluation, name='foo', description='bar', contentSource='a') + content_source_missing_ex = 'The "contentSource" parameter must be specified' + with pytest.raises(ValueError, match=content_source_missing_ex): + # with no contentSource + Evaluation(name='foo', description='bar') + with pytest.raises(ValueError, match=content_source_missing_ex): + # with non-synapse id contentSource + Evaluation(name='foo', description='bar', contentSource='a') # Assert that the values are - ev = Evaluation(name='foobar2', description='bar', status='OPEN', contentSource='syn1234') + ev = Evaluation(name='foobar2', description='bar', contentSource='syn1234') assert ev['name'] == ev.name assert ev['description'] == ev.description - assert ev['status'] == ev.status def test_Submission():