Skip to content

Commit 56fca24

Browse files
merge the develop branch and fix conflict
2 parents 922106c + 53eef1f commit 56fca24

File tree

14 files changed

+354
-65
lines changed

14 files changed

+354
-65
lines changed

synapseclient/__main__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ def get(args, syn):
9292
if args.recursive:
9393
if args.version is not None:
9494
raise ValueError('You cannot specify a version making a recursive download.')
95+
_validate_id_arg(args)
9596
synapseutils.syncFromSynapse(syn, args.id, args.downloadLocation, followLink=args.followLink,
9697
manifest=args.manifest)
9798
elif args.queryString is not None:
@@ -101,6 +102,7 @@ def get(args, syn):
101102
for id in ids:
102103
syn.get(id, downloadLocation=args.downloadLocation)
103104
else:
105+
_validate_id_arg(args)
104106
# search by MD5
105107
if isinstance(args.id, str) and os.path.isfile(args.id):
106108
entity = syn.get(args.id, version=args.version, limitSearch=args.limitSearch, downloadFile=False)
@@ -121,6 +123,11 @@ def get(args, syn):
121123
syn.logger.info('Creating %s', entity.path)
122124

123125

126+
def _validate_id_arg(args):
127+
if args.id is None:
128+
raise ValueError(f'Missing expected id argument for use with the {args.subparser} command')
129+
130+
124131
def sync(args, syn):
125132
synapseutils.syncToSynapse(syn, manifestFile=args.manifestFile,
126133
dryRun=args.dryRun, sendMessages=args.sendMessages,
@@ -586,7 +593,7 @@ def build_parser():
586593
default=True, help='Download file using a multiple threaded implementation. '
587594
'This flag will be removed in the future when multi-threaded download '
588595
'is deemed fully stable and becomes the default implementation.')
589-
parser_get.add_argument('id', metavar='syn123', nargs='?', type=str,
596+
parser_get.add_argument('id', metavar='local path', nargs='?', type=str,
590597
help='Synapse ID of form syn123 of desired data object.')
591598
# add no manifest option
592599
parser_get.add_argument('--manifest', type=str, choices=['all', 'root', 'suppress'],

synapseclient/client.py

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,12 @@
9494
from synapseclient.core.pool_provider import DEFAULT_NUM_THREADS
9595
from synapseclient.core.utils import id_of, get_properties, MB, memoize, is_json, extract_synapse_id_from_query, \
9696
find_data_file_handle, extract_zip_file_to_directory, is_integer, require_param
97-
from synapseclient.core.retry import with_retry
97+
from synapseclient.core.retry import (
98+
with_retry,
99+
DEFAULT_RETRY_STATUS_CODES,
100+
RETRYABLE_CONNECTION_ERRORS,
101+
RETRYABLE_CONNECTION_EXCEPTIONS,
102+
)
98103
from synapseclient.core import sts_transfer
99104
from synapseclient.core.upload.multipart_upload import multipart_upload_file, multipart_upload_string
100105
from synapseclient.core.remote_file_storage_wrappers import S3ClientWrapper, SFTPWrapper
@@ -127,12 +132,9 @@
127132

128133
# Defines the standard retry policy applied to the rest methods
129134
# The retry period needs to span a minute because sending messages is limited to 10 per 60 seconds.
130-
STANDARD_RETRY_PARAMS = {"retry_status_codes": [429, 500, 502, 503, 504],
131-
"retry_errors": ["proxy error", "slow down", "timeout", "timed out",
132-
"connection reset by peer", "unknown ssl protocol error",
133-
"couldn't connect to host", "slowdown", "try again",
134-
"connection reset by peer"],
135-
"retry_exceptions": ["ConnectionError", "Timeout", "timeout", "ChunkedEncodingError"],
135+
STANDARD_RETRY_PARAMS = {"retry_status_codes": DEFAULT_RETRY_STATUS_CODES,
136+
"retry_errors": RETRYABLE_CONNECTION_ERRORS,
137+
"retry_exceptions": RETRYABLE_CONNECTION_EXCEPTIONS,
136138
"retries": 60, # Retries for up to about 30 minutes
137139
"wait": 1,
138140
"max_wait": 30,
@@ -907,7 +909,8 @@ def _download_file_entity(self, downloadLocation, entity, ifcollision, submissio
907909
if downloadPath is None or not os.path.exists(downloadPath):
908910
return
909911

910-
entity.path = downloadPath
912+
# converts the path format from forward slashes back to backward slashes on Windows
913+
entity.path = os.path.normpath(downloadPath)
911914
entity.files = [os.path.basename(downloadPath)]
912915
entity.cacheDir = os.path.dirname(downloadPath)
913916

@@ -996,7 +999,15 @@ def store(self, obj, *, createOrUpdate=True, forceVersion=True, versionLabel=Non
996999
# _synapse_store hook
9971000
# for objects that know how to store themselves
9981001
if hasattr(obj, '_synapse_store'):
999-
return obj._synapse_store(self)
1002+
obj = obj._synapse_store(self)
1003+
return self._apply_provenance(
1004+
obj,
1005+
activity=activity,
1006+
used=used,
1007+
executed=executed,
1008+
activityName=activityName,
1009+
activityDescription=activityDescription,
1010+
)
10001011

10011012
# Handle all non-Entity objects
10021013
if not (isinstance(obj, Entity) or type(obj) == dict):
@@ -1152,6 +1163,30 @@ def store(self, obj, *, createOrUpdate=True, forceVersion=True, versionLabel=Non
11521163
annotations = self.set_annotations(Annotations(properties['id'], properties['etag'], annotations))
11531164
properties['etag'] = annotations.etag
11541165

1166+
properties = self._apply_provenance(
1167+
properties,
1168+
activity=activity,
1169+
used=used,
1170+
executed=executed,
1171+
activityName=activityName,
1172+
activityDescription=activityDescription,
1173+
)
1174+
1175+
# Return the updated Entity object
1176+
entity = Entity.create(properties, annotations, local_state)
1177+
return self.get(entity, downloadFile=False)
1178+
1179+
def _apply_provenance(
1180+
self,
1181+
entity,
1182+
activity=None,
1183+
used=None,
1184+
executed=None,
1185+
activityName=None,
1186+
activityDescription=None
1187+
):
1188+
# apply any provenance passed to via the store method to the entity
1189+
11551190
# If the parameters 'used' or 'executed' are given, create an Activity object
11561191
if used or executed:
11571192
if activity is not None:
@@ -1163,14 +1198,12 @@ def store(self, obj, *, createOrUpdate=True, forceVersion=True, versionLabel=Non
11631198

11641199
# If we have an Activity, set it as the Entity's provenance record
11651200
if activity:
1166-
self.setProvenance(properties, activity)
1201+
self.setProvenance(entity, activity)
11671202

11681203
# 'etag' has changed, so get the new Entity
1169-
properties = self._getEntity(properties)
1204+
entity = self._getEntity(entity)
11701205

1171-
# Return the updated Entity object
1172-
entity = Entity.create(properties, annotations, local_state)
1173-
return self.get(entity, downloadFile=False)
1206+
return entity
11741207

11751208
def _createAccessRequirementIfNone(self, entity):
11761209
"""
@@ -3187,7 +3220,7 @@ def create_snapshot_version(self, table: typing.Union[EntityViewSchema, Schema,
31873220
:param label: Optional snapshot label.
31883221
:param activity: Optional activity ID applied to snapshot version.
31893222
:param wait: True if this method should return the snapshot version after waiting for any necessary
3190-
asynchronous table updates to complete. If False this method will return return
3223+
asynchronous table updates to complete. If False this method will return
31913224
as soon as any updates are initiated.
31923225
:return: the snapshot version number if wait=True, None if wait=False
31933226
"""

synapseclient/core/multithread_download/download_threads.py

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,14 @@
1717

1818
from synapseclient.core.exceptions import SynapseError
1919
from synapseclient.core.pool_provider import get_executor
20+
from synapseclient.core.retry import (
21+
with_retry,
22+
RETRYABLE_CONNECTION_ERRORS,
23+
RETRYABLE_CONNECTION_EXCEPTIONS,
24+
)
2025

2126
# constants
2227
MAX_QUEUE_SIZE: int = 20
23-
MAX_RETRIES: int = 20
2428
MiB: int = 2 ** 20
2529
SYNAPSE_DEFAULT_DOWNLOAD_PART_SIZE: int = 8 * MiB
2630
ISO_AWS_STR_FORMAT: str = '%Y%m%dT%H%M%SZ'
@@ -324,16 +328,31 @@ def download_file(self, request):
324328
def _get_response_with_retry(presigned_url_provider, start: int, end: int) -> Response:
325329
session = _get_thread_session()
326330
range_header = {'Range': f'bytes={start}-{end}'}
327-
response = session.get(presigned_url_provider.get_info().url, headers=range_header, stream=True)
328-
# try request until successful or out of retries
329-
try_counter = 1
330-
while response.status_code != HTTPStatus.PARTIAL_CONTENT:
331-
if try_counter >= MAX_RETRIES:
332-
raise SynapseError(
333-
f'Could not download the file: {presigned_url_provider.get_info().file_name},'
334-
f' please try again.')
335-
response = session.get(presigned_url_provider.get_info().url, headers=range_header, stream=True)
336-
try_counter += 1
331+
332+
def session_get():
333+
return session.get(presigned_url_provider.get_info().url, headers=range_header)
334+
335+
response = None
336+
cause = None
337+
try:
338+
# currently when doing a range request to AWS we retry on anything other than a 206.
339+
# this seems a bit excessive (i.e. some 400 statuses would suggest a non-retryable condition)
340+
# but for now matching previous behavior.
341+
response = with_retry(
342+
session_get,
343+
expected_status_codes=(HTTPStatus.PARTIAL_CONTENT,),
344+
retry_errors=RETRYABLE_CONNECTION_ERRORS,
345+
retry_exceptions=RETRYABLE_CONNECTION_EXCEPTIONS,
346+
)
347+
except Exception as ex:
348+
cause = ex
349+
350+
if not response or response.status_code != HTTPStatus.PARTIAL_CONTENT:
351+
raise SynapseError(
352+
f'Could not download the file: {presigned_url_provider.get_info().file_name},'
353+
f' please try again.'
354+
) from cause
355+
337356
return start, response
338357

339358
@staticmethod

synapseclient/core/retry.py

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,54 @@
66
from synapseclient.core.utils import is_json
77
from synapseclient.core.dozer import doze
88

9+
DEFAULT_RETRIES = 3
10+
DEFAULT_WAIT = 1
11+
DEFAULT_BACK_OFF = 2
12+
DEFAULT_MAX_WAIT = 30
13+
14+
DEFAULT_RETRY_STATUS_CODES = [429, 500, 502, 503, 504]
15+
16+
# strings that may appear in responses that suggest a retryable condition
17+
RETRYABLE_CONNECTION_ERRORS = [
18+
"proxy error",
19+
"slow down",
20+
"timeout",
21+
"timed out",
22+
"connection reset by peer",
23+
"unknown ssl protocol error",
24+
"couldn't connect to host",
25+
"slowdown",
26+
"try again",
27+
"connection reset by peer",
28+
]
29+
30+
# Exceptions that may be retryable. These are socket level exceptions
31+
# not associated with an HTTP response
32+
RETRYABLE_CONNECTION_EXCEPTIONS = [
33+
"ChunkedEncodingError",
34+
"ConnectionError",
35+
'ConnectionResetError',
36+
"Timeout",
37+
"timeout",
38+
]
39+
940

1041
def with_retry(function, verbose=False,
11-
retry_status_codes=[429, 500, 502, 503, 504], retry_errors=[], retry_exceptions=[],
12-
retries=3, wait=1, back_off=2, max_wait=30):
42+
retry_status_codes=[429, 500, 502, 503, 504], expected_status_codes=[],
43+
retry_errors=[], retry_exceptions=[],
44+
retries=DEFAULT_RETRIES, wait=DEFAULT_WAIT, back_off=DEFAULT_BACK_OFF, max_wait=DEFAULT_MAX_WAIT):
1345
"""
1446
Retries the given function under certain conditions.
1547
16-
:param function: A function with no arguments. If arguments are needed, use a lambda (see example).
17-
:param retry_status_codes: What status codes to retry upon in the case of a SynapseHTTPError.
18-
:param retry_errors: What reasons to retry upon, if function().response.json()['reason'] exists.
19-
:param retry_exceptions: What types of exceptions, specified as strings, to retry upon.
20-
:param retries: How many times to retry maximum.
21-
:param wait: How many seconds to wait between retries.
22-
:param back_off: Exponential constant to increase wait for between progressive failures.
48+
:param function: A function with no arguments. If arguments are needed, use a lambda (see example).
49+
:param retry_status_codes: What status codes to retry upon in the case of a SynapseHTTPError.
50+
:param expected_status_codes: If specified responses with any other status codes result in a retry.
51+
:param retry_errors: What reasons to retry upon, if function().response.json()['reason'] exists.
52+
:param retry_exceptions: What types of exceptions, specified as strings or Exception classes, to retry upon.
53+
:param retries: How many times to retry maximum.
54+
:param wait: How many seconds to wait between retries.
55+
:param back_off: Exponential constant to increase wait for between progressive failures.
56+
:param max_wait: back_off between requests will not exceed this value
2357
2458
:returns: function()
2559
@@ -55,7 +89,10 @@ def foo(a, b, c): return [a, b, c]
5589

5690
# Check if we got a retry-able error
5791
if response is not None and hasattr(response, 'status_code'):
58-
if response.status_code in retry_status_codes:
92+
if (
93+
(expected_status_codes and response.status_code not in expected_status_codes) or
94+
(retry_status_codes and response.status_code in retry_status_codes)
95+
):
5996
response_message = _get_message(response)
6097
retry = True
6198
logger.debug("retrying on status code: %s" % str(response.status_code))

synapseclient/evaluation.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,7 @@ def getURI(cls, id):
130130
return '/evaluation/%s' % id
131131

132132
def __init__(self, **kwargs):
133-
kwargs['status'] = kwargs.get('status', 'OPEN')
134133
kwargs['contentSource'] = kwargs.get('contentSource', '')
135-
if kwargs['status'] not in ['OPEN', 'PLANNED', 'CLOSED', 'COMPLETED']:
136-
raise ValueError('Evaluation Status must be one of [OPEN, PLANNED, CLOSED, COMPLETED]')
137134
if not kwargs['contentSource'].startswith('syn'): # Verify that synapse Id given
138135
raise ValueError('The "contentSource" parameter must be specified as a Synapse Entity when creating an'
139136
' Evaluation')

tests/integration/synapseclient/integration_test_Entity.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ def test_download_local_file_URL_path(syn, project, schedule_for_cleanup):
507507

508508
localFileEntity = syn.store(File(dataFileHandleId=filehandle['id'], parent=project))
509509
e = syn.get(localFileEntity.id)
510-
assert path == e.path
510+
assert path == utils.normalize_path(e.path)
511511

512512

513513
# SYNPY-424
@@ -566,7 +566,7 @@ def test_store__changing_externalURL_by_changing_path(syn, project, schedule_for
566566

567567
assert ext.externalURL != url
568568
assert utils.normalize_path(temp_path) == utils.file_url_to_path(ext.externalURL)
569-
assert temp_path == ext.path
569+
assert temp_path == utils.normalize_path(ext.path)
570570
assert not ext.synapseStore
571571

572572

tests/integration/synapseclient/test_evaluations.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def test_evaluations(syn, project, schedule_for_cleanup):
1515
# Create an Evaluation
1616
name = 'Test Evaluation %s' % str(uuid.uuid4())
1717
ev = Evaluation(name=name, description='Evaluation for testing',
18-
contentSource=project['id'], status='CLOSED')
18+
contentSource=project['id'])
1919
ev = syn.store(ev)
2020

2121
try:
@@ -29,7 +29,6 @@ def test_evaluations(syn, project, schedule_for_cleanup):
2929
assert ev['id'] == evalNamed['id']
3030
assert ev['name'] == evalNamed['name']
3131
assert ev['ownerId'] == evalNamed['ownerId']
32-
assert ev['status'] == evalNamed['status']
3332

3433
# -- Get the Evaluation by project
3534
evalProj = syn.getEvaluationByContentSource(project)
@@ -41,13 +40,6 @@ def test_evaluations(syn, project, schedule_for_cleanup):
4140
assert ev['id'] == evalProj['id']
4241
assert ev['name'] == evalProj['name']
4342
assert ev['ownerId'] == evalProj['ownerId']
44-
assert ev['status'] == evalProj['status']
45-
46-
# Update the Evaluation
47-
ev['status'] = 'OPEN'
48-
ev = syn.store(ev, createOrUpdate=True)
49-
schedule_for_cleanup(ev)
50-
assert ev.status == 'OPEN'
5143

5244
# Add the current user as a participant
5345
myOwnerId = int(syn.getUserProfile()['ownerId'])

tests/integration/synapseutils/test_synapseutils_sync.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def test_syncFromSynapse(test_state):
173173

174174
assert len(output) == len(uploaded_paths)
175175
for f in output:
176-
assert f.path in uploaded_paths
176+
assert utils.normalize_path(f.path) in uploaded_paths
177177

178178

179179
def test_syncFromSynapse__children_contain_non_file(test_state):
@@ -240,7 +240,7 @@ def test_syncFromSynapse_Links(test_state):
240240

241241
assert len(output) == len(uploaded_paths)
242242
for f in output:
243-
assert f.path in uploaded_paths
243+
assert utils.normalize_path(f.path) in uploaded_paths
244244

245245

246246
def test_write_manifest_data__unicode_characters_in_rows(test_state):

0 commit comments

Comments
 (0)