Skip to content

Commit b5e854a

Browse files
authored
Add retries to result uploading (#2970)
* Put the upload blob and queue send message into a looping try catch that tries to do the action 3 times with 5 seconds between attempts. * Moved the retry to the common performance file for use anywhere a retry on exception may be warranted. Then updated the upload.py upload and queue steps to use this new function, along with updating upload to hold a failure instead of excepting out at the first failure. This will ensure we still collect as much data as possible regardless of if the upload flow is broken.
1 parent 9e21af2 commit b5e854a

File tree

2 files changed

+54
-9
lines changed

2 files changed

+54
-9
lines changed

scripts/performance/common.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import os
1919
import sys
20+
import time
2021

2122

2223
def get_machine_architecture():
@@ -157,6 +158,33 @@ def push_dir(path: str = None) -> None:
157158
else:
158159
yield
159160

161+
def retry_on_exception(function, retry_count=3, retry_delay=5, retry_delay_multiplier=1, retry_on_exception=Exception):
162+
'''
163+
Retries the specified function if it throws an exception.
164+
165+
:param function: The function to execute.
166+
:param retry_count: The number of times to retry the function.
167+
:param retry_delay: The delay between retries (seconds).
168+
:param retry_delay_multiplier: The multiplier to apply to the retry delay after failure.
169+
:param retry_on_exception: The exception to retry on (Defaults to Exception).
170+
'''
171+
if retry_count < 0:
172+
raise ValueError('retry_count must be >= 0')
173+
if retry_delay < 0:
174+
raise ValueError('retry_delay must be >= 0')
175+
if retry_delay_multiplier < 1:
176+
raise ValueError('retry_delay_multiplier must be >= 1')
177+
178+
for i in range(retry_count):
179+
try:
180+
return function()
181+
except retry_on_exception as e:
182+
if i == retry_count - 1:
183+
raise
184+
getLogger().info('Exception caught: %s', e)
185+
getLogger().info('Retrying in %d seconds...', retry_delay)
186+
time.sleep(retry_delay)
187+
retry_delay *= retry_delay_multiplier
160188

161189
class RunCommand:
162190
'''

scripts/upload.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
from azure.storage.queue import QueueClient, TextBase64EncodePolicy
33
from traceback import format_exc
44
from glob import glob
5+
from performance.common import retry_on_exception
56
import os
7+
import time
68

79
from logging import getLogger
810

@@ -22,23 +24,38 @@ def upload(globpath, container, queue, sas_token_env, storage_account_uri):
2224
return 1
2325

2426
files = glob(globpath, recursive=True)
25-
27+
any_upload_or_queue_failed = False
2628
for infile in files:
2729
blob_name = get_unique_name(infile, os.getenv('HELIX_WORKITEM_ID'))
2830

2931
getLogger().info("uploading {}".format(infile))
3032

3133
blob_client = BlobClient(account_url=storage_account_uri.format('blob'), container_name=container, blob_name=blob_name, credential=sas_token)
3234

35+
upload_succeded = False
3336
with open(infile, "rb") as data:
34-
blob_client.upload_blob(data, blob_type="BlockBlob", content_settings=ContentSettings(content_type="application/json"))
35-
36-
if queue is not None:
37-
queue_client = QueueClient(account_url=storage_account_uri.format('queue'), queue_name=queue, credential=sas_token, message_encode_policy=TextBase64EncodePolicy())
38-
queue_client.send_message(blob_client.url)
39-
40-
getLogger().info("upload complete")
41-
return 0
37+
try:
38+
retry_on_exception(lambda: blob_client.upload_blob(data, blob_type="BlockBlob", content_settings=ContentSettings(content_type="application/json")))
39+
upload_succeded = True
40+
except Exception as ex:
41+
any_upload_or_queue_failed = True
42+
getLogger().error("upload failed")
43+
getLogger().error('{0}: {1}'.format(type(ex), str(ex)))
44+
45+
if upload_succeded:
46+
if queue is not None:
47+
try:
48+
queue_client = QueueClient(account_url=storage_account_uri.format('queue'), queue_name=queue, credential=sas_token, message_encode_policy=TextBase64EncodePolicy())
49+
retry_on_exception(lambda: queue_client.send_message(blob_client.url))
50+
getLogger().info("upload and queue complete")
51+
except Exception as ex:
52+
any_upload_or_queue_failed = True
53+
getLogger().error("queue failed")
54+
getLogger().error('{0}: {1}'.format(type(ex), str(ex)))
55+
else:
56+
getLogger().info("upload complete")
57+
58+
return any_upload_or_queue_failed # 0 (False) if all uploads and queues succeeded, 1 (True) otherwise
4259

4360
except Exception as ex:
4461
getLogger().error('{0}: {1}'.format(type(ex), str(ex)))

0 commit comments

Comments
 (0)