Skip to content

api: Add random exponential backoff to do_api_query. #538

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions zulip/zulip/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,8 @@ def fail(self):
super(RandomExponentialBackoff, self).fail()
# Exponential growth with ratio sqrt(2); compute random delay
# between x and 2x where x is growing exponentially
delay_scale = int(2 ** (self.number_of_retries / 2.0 - 1)) + 1
delay = min(delay_scale + random.randint(1, delay_scale), self.delay_cap)
message = "Sleeping for %ss [max %s] before retrying." % (delay, delay_scale * 2)
delay = random.random() * min(self.delay_cap, 2 ** self.number_of_retries)
message = "Sleeping for %ss before retrying." % (delay,)
try:
logger.warning(message)
except NameError:
Expand Down Expand Up @@ -510,13 +509,13 @@ def do_api_query(self, orig_request, url, method="POST",
query_state = {
'had_error_retry': False,
'request': request,
'failures': 0,
} # type: Dict[str, Any]

# create Random Exponential Backoff object to perform retries using the method
backoff = RandomExponentialBackoff(timeout_success_equivalent=300)

def error_retry(error_string):
# type: (str) -> bool
if not self.retry_on_errors or query_state["failures"] >= 10:
return False
if self.verbose:
if not query_state["had_error_retry"]:
sys.stdout.write("zulip API(%s): connection error%s -- retrying." %
Expand All @@ -526,9 +525,8 @@ def error_retry(error_string):
sys.stdout.write(".")
sys.stdout.flush()
query_state["request"]["dont_block"] = json.dumps(True)
time.sleep(1)
query_state["failures"] += 1
return True
Copy link
Member

@timabbott timabbott Mar 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just have this function do the backoff.fail() and return backoff.keep_going() ? It's deduplicate code.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a really nice idea. Implemented this and one other change and now my do_api_query commit is way more readable and has minimal changes.

backoff.fail()
return backoff.keep_going()

def end_error_retry(succeeded):
# type: (bool) -> None
Expand Down Expand Up @@ -629,6 +627,8 @@ def call_on_each_event(self, callback, event_types=None, narrow=None):
# type: (Callable[[Dict[str, Any]], None], Optional[List[str]], Optional[List[List[str]]]) -> None
if narrow is None:
narrow = []
# Use exponential backoff for registering.
register_backoff = RandomExponentialBackoff(timeout_success_equivalent=300)

def do_register():
# type: () -> Tuple[str, int]
Expand All @@ -641,14 +641,17 @@ def do_register():
if 'error' in res['result']:
if self.verbose:
print("Server returned error:\n%s" % res['msg'])
time.sleep(1)
register_backoff.fail()
else:
return (res['queue_id'], res['last_event_id'])

queue_id = None
# Make long-polling requests with `get_events`. Once a request
# has received an answer, pass it to the callback and before
# making a new long-polling request.

# Use exponential backoff for fetching events from server.
fetch_events_backoff = RandomExponentialBackoff(timeout_success_equivalent=300)
while True:
if queue_id is None:
(queue_id, last_event_id) = do_register()
Expand Down Expand Up @@ -682,7 +685,7 @@ def do_register():
# Add a pause here to cover against potential bugs in this library
# causing a DoS attack against a server when getting errors.
# TODO: Make this back off exponentially.
time.sleep(1)
fetch_events_backoff.fail()
continue

for event in res['events']:
Expand Down