diff --git a/.secrets.baseline b/.secrets.baseline index 3afdeefe..1d880e61 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": ".lock", "lines": null }, - "generated_at": "2020-10-26T20:49:34Z", + "generated_at": "2021-05-27T18:37:20Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -76,7 +76,7 @@ { "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9", "is_verified": false, - "line_number": 389, + "line_number": 285, "type": "Private Key" } ] diff --git a/cirrus/backoff.py b/cirrus/backoff.py new file mode 100644 index 00000000..b7d09ba2 --- /dev/null +++ b/cirrus/backoff.py @@ -0,0 +1,116 @@ +""" +Module to consolidate exponential backoff settings and functions +""" +from cdislogging import get_logger +from googleapiclient.errors import HttpError as GoogleHttpError +import json +import sys + +from cirrus.errors import CirrusError + +logger = get_logger(__name__) + + +def _print_func_name(function): + return "{}.{}".format(function.__module__, function.__name__) + + +def _print_kwargs(kwargs): + return ", ".join("{}={}".format(k, repr(v)) for k, v in list(kwargs.items())) + + +def log_backoff_retry(details): + args_str = ", ".join(map(str, details["args"])) + kwargs_str = ( + (", " + _print_kwargs(details["kwargs"])) if details.get("kwargs") else "" + ) + func_call_log = "{}({}{})".format( + _print_func_name(details["target"]), args_str, kwargs_str + ) + logger.warning( + "backoff: call {func_call} delay {wait:0.1f} seconds after {tries} tries".format( + func_call=func_call_log, **details + ) + ) + + +def log_backoff_giveup(details): + args_str = ", ".join(map(str, details["args"])) + kwargs_str = ( + (", " + _print_kwargs(details["kwargs"])) if details.get("kwargs") else "" + ) + func_call_log = "{}({}{})".format( + _print_func_name(details["target"]), args_str, kwargs_str + ) + logger.error( + "backoff: gave up call {func_call} after {tries} tries; exception: {exc}".format( + func_call=func_call_log, exc=sys.exc_info(), **details + ) + ) + + +def get_reason(http_error): + """ + temporary solution to work around googleapiclient bug that doesn't + parse reason from server response + """ + reason = http_error.resp.reason + try: + data = json.loads(http_error.content.decode("utf-8")) + if isinstance(data, dict): + reason = data["error"].get("reason") + if "errors" in data["error"] and len(data["error"]["errors"]) > 0: + reason = data["error"]["errors"][0]["reason"] + except (ValueError, KeyError, TypeError): + pass + if reason is None: + reason = "" + return reason + + +def exception_do_not_retry(e): + """ + True if we should not retry. + - We should not retry for errors that we raise (CirrusErrors) + - We should not retry for Google errors that are not temporary + and not recoverable by retry + """ + if isinstance(e, GoogleHttpError): + if e.resp.status == 403: + # Then we should return True unless it's a rate limit error. + # Note: There is overlap in the reason codes for these APIs + # which is not ideal. e.g. userRateLimitExceeded is in both + # resource manager API and directory API. + # Fortunately both cases warrant retrying. + # Valid rate limit reasons from CLOUD RESOURCE MANAGER API: + # cloud.google.com/resource-manager/docs/core_errors#FORBIDDEN + # Many limit errors listed; only a few warrant retry. + resource_rlreasons = [ + "concurrentLimitExceeded", + "limitExceeded", + "rateLimitExceeded", + "userRateLimitExceeded", + ] + # Valid rate limit reasons from DIRECTORY API: + # developers.google.com/admin-sdk/directory/v1/limits + directory_rlreasons = ["userRateLimitExceeded", "quotaExceeded"] + # Valid rate limit reasons from IAM API: + # IAM API doesn't seem to return rate-limit 403s. + + reason = get_reason(e) or e.resp.reason + logger.info("Got 403 from google with reason {}".format(reason)) + return ( + reason not in resource_rlreasons and reason not in directory_rlreasons + ) + return False + + return isinstance(e, CirrusError) + + +# Default settings to control usage of backoff library. +BACKOFF_SETTINGS = { + "on_backoff": log_backoff_retry, + "on_giveup": log_backoff_giveup, + "max_tries": 5, + "giveup": exception_do_not_retry, +} diff --git a/cirrus/google_cloud/manager.py b/cirrus/google_cloud/manager.py index 36684e40..c9d88e26 100644 --- a/cirrus/google_cloud/manager.py +++ b/cirrus/google_cloud/manager.py @@ -24,6 +24,7 @@ from requests.exceptions import HTTPError as requestsHttpError from urllib.parse import quote as urlquote +from cirrus.backoff import BACKOFF_SETTINGS from cirrus.config import config from cirrus.core import CloudManager from cirrus.errors import CirrusError, CirrusUserError, CirrusNotFound @@ -80,111 +81,6 @@ ] -def _print_func_name(function): - return "{}.{}".format(function.__module__, function.__name__) - - -def _print_kwargs(kwargs): - return ", ".join("{}={}".format(k, repr(v)) for k, v in list(kwargs.items())) - - -def log_backoff_retry(details): - args_str = ", ".join(map(str, details["args"])) - kwargs_str = ( - (", " + _print_kwargs(details["kwargs"])) if details.get("kwargs") else "" - ) - func_call_log = "{}({}{})".format( - _print_func_name(details["target"]), args_str, kwargs_str - ) - logger.warn( - "backoff: call {func_call} delay {wait:0.1f} seconds after {tries} tries".format( - func_call=func_call_log, **details - ) - ) - - -def log_backoff_giveup(details): - args_str = ", ".join(map(str, details["args"])) - kwargs_str = ( - (", " + _print_kwargs(details["kwargs"])) if details.get("kwargs") else "" - ) - func_call_log = "{}({}{})".format( - _print_func_name(details["target"]), args_str, kwargs_str - ) - logger.error( - "backoff: gave up call {func_call} after {tries} tries; exception: {exc}".format( - func_call=func_call_log, exc=sys.exc_info(), **details - ) - ) - - -def get_reason(http_error): - """ - temporary solution to work around googleapiclient bug that doesn't - parse reason from server response - """ - reason = http_error.resp.reason - try: - data = json.loads(http_error.content.decode("utf-8")) - if isinstance(data, dict): - reason = data["error"].get("reason") - if "errors" in data["error"] and len(data["error"]["errors"]) > 0: - reason = data["error"]["errors"][0]["reason"] - except (ValueError, KeyError, TypeError): - pass - if reason is None: - reason = "" - return reason - - -def exception_do_not_retry(e): - """ - True if we should not retry. - - We should not retry for errors that we raise (CirrusErrors) - - We should not retry for Google errors that are not temporary - and not recoverable by retry - """ - if isinstance(e, GoogleHttpError): - if e.resp.status == 403: - # Then we should return True unless it's a rate limit error. - # Note: There is overlap in the reason codes for these APIs - # which is not ideal. e.g. userRateLimitExceeded is in both - # resource manager API and directory API. - # Fortunately both cases warrant retrying. - # Valid rate limit reasons from CLOUD RESOURCE MANAGER API: - # cloud.google.com/resource-manager/docs/core_errors#FORBIDDEN - # Many limit errors listed; only a few warrant retry. - resource_rlreasons = [ - "concurrentLimitExceeded", - "limitExceeded", - "rateLimitExceeded", - "userRateLimitExceeded", - ] - # Valid rate limit reasons from DIRECTORY API: - # developers.google.com/admin-sdk/directory/v1/limits - directory_rlreasons = ["userRateLimitExceeded", "quotaExceeded"] - # Valid rate limit reasons from IAM API: - # IAM API doesn't seem to return rate-limit 403s. - - reason = get_reason(e) or e.resp.reason - logger.info("Got 403 from google with reason {}".format(reason)) - return ( - reason not in resource_rlreasons and reason not in directory_rlreasons - ) - return False - - return isinstance(e, CirrusError) - - -# Default settings to control usage of backoff library. -BACKOFF_SETTINGS = { - "on_backoff": log_backoff_retry, - "on_giveup": log_backoff_giveup, - "max_tries": 5, - "giveup": exception_do_not_retry, -} - - class GoogleCloudManager(CloudManager): """ Manage a Google Cloud Project (users, groups, resources, and policies) diff --git a/cirrus/google_cloud/services.py b/cirrus/google_cloud/services.py index 8c2fa1c3..30bdd9c1 100644 --- a/cirrus/google_cloud/services.py +++ b/cirrus/google_cloud/services.py @@ -4,7 +4,10 @@ See README for details on different ways to interact with Google's API(s) """ from googleapiclient.discovery import build +import backoff + from cirrus.config import config +from cirrus.google_cloud.manager import BACKOFF_SETTINGS class GoogleService(object): @@ -41,6 +44,7 @@ def use_delegated_credentials(self, user_to_become): delegated_credentials = self.creds.with_subject(user_to_become) self.creds = delegated_credentials + @backoff.on_exception(backoff.expo, Exception, **BACKOFF_SETTINGS) def build_service(self): """ Combines service, version, and creds to give a resource that diff --git a/test/test_google_cloud_manage.py b/test/test_google_cloud_manage.py index efbd1cb8..07088d73 100644 --- a/test/test_google_cloud_manage.py +++ b/test/test_google_cloud_manage.py @@ -1376,10 +1376,10 @@ def test_add_member_backoff_giveup(test_cloud_manager): mock_config = {"members.side_effect": HttpError(MagicMock(), b"test")} test_cloud_manager._admin_service.configure_mock(**mock_config) - warn = cirrus.google_cloud.manager.logger.warn - error = cirrus.google_cloud.manager.logger.error - with patch("cirrus.google_cloud.manager.logger.warn") as logger_warn, patch( - "cirrus.google_cloud.manager.logger.error" + warn = cirrus.backoff.logger.warning + error = cirrus.backoff.logger.error + with patch("cirrus.backoff.logger.warning") as logger_warn, patch( + "cirrus.backoff.logger.error" ) as logger_error: # keep the side effect to actually put logs, so you can see the format with `-s` logger_warn.side_effect = warn @@ -1402,10 +1402,10 @@ def test_authorized_session_retry(test_cloud_manager): mock_config = {"get.side_effect": HttpError(MagicMock(), b"test")} test_cloud_manager._authed_session.configure_mock(**mock_config) - warn = cirrus.google_cloud.manager.logger.warn - error = cirrus.google_cloud.manager.logger.error - with patch("cirrus.google_cloud.manager.logger.warn") as logger_warn, patch( - "cirrus.google_cloud.manager.logger.error" + warn = cirrus.backoff.logger.warning + error = cirrus.backoff.logger.error + with patch("cirrus.backoff.logger.warning") as logger_warn, patch( + "cirrus.backoff.logger.error" ) as logger_error: # keep the side effect to actually put logs, so you can see the format with `-s` logger_warn.side_effect = warn @@ -1427,10 +1427,10 @@ def test_handled_exception_no_retry(test_cloud_manager): mock_config = {"members.side_effect": CirrusError(MagicMock(), b"test")} test_cloud_manager._admin_service.configure_mock(**mock_config) - warn = cirrus.google_cloud.manager.logger.warn - error = cirrus.google_cloud.manager.logger.error - with patch("cirrus.google_cloud.manager.logger.warn") as logger_warn, patch( - "cirrus.google_cloud.manager.logger.error" + warn = cirrus.backoff.logger.warning + error = cirrus.backoff.logger.error + with patch("cirrus.backoff.logger.warning") as logger_warn, patch( + "cirrus.backoff.logger.error" ) as logger_error: # keep the side effect to actually put logs, so you can see the format with `-s` logger_warn.side_effect = warn @@ -1459,10 +1459,10 @@ def test_handled_exception_403_no_retry(test_cloud_manager): http_error = HttpError(resp=response, content=b"") mock_config = {"get.side_effect": http_error} test_cloud_manager._authed_session.configure_mock(**mock_config) - warn = cirrus.google_cloud.manager.logger.warn - error = cirrus.google_cloud.manager.logger.error - with patch("cirrus.google_cloud.manager.logger.warn") as logger_warn, patch( - "cirrus.google_cloud.manager.logger.error" + warn = cirrus.backoff.logger.warning + error = cirrus.backoff.logger.error + with patch("cirrus.backoff.logger.warning") as logger_warn, patch( + "cirrus.backoff.logger.error" ) as logger_error: # keep the side effect to actually put logs, so you can see the format with `-s` logger_warn.side_effect = warn @@ -1490,10 +1490,10 @@ def test_unhandled_exception_403_ratelimit_retry(test_cloud_manager): http_error = HttpError(resp=response, content=b"") mock_config = {"get.side_effect": http_error} test_cloud_manager._authed_session.configure_mock(**mock_config) - warn = cirrus.google_cloud.manager.logger.warn - error = cirrus.google_cloud.manager.logger.error - with patch("cirrus.google_cloud.manager.logger.warn") as logger_warn, patch( - "cirrus.google_cloud.manager.logger.error" + warn = cirrus.backoff.logger.warning + error = cirrus.backoff.logger.error + with patch("cirrus.backoff.logger.warning") as logger_warn, patch( + "cirrus.backoff.logger.error" ) as logger_error: # keep the side effect to actually put logs, so you can see the format with `-s` logger_warn.side_effect = warn @@ -1515,10 +1515,10 @@ def test_unhandled_exception_retry(test_cloud_manager): mock_config = {"members.side_effect": IndexError()} test_cloud_manager._admin_service.configure_mock(**mock_config) - warn = cirrus.google_cloud.manager.logger.warn - error = cirrus.google_cloud.manager.logger.error - with patch("cirrus.google_cloud.manager.logger.warn") as logger_warn, patch( - "cirrus.google_cloud.manager.logger.error" + warn = cirrus.backoff.logger.warning + error = cirrus.backoff.logger.error + with patch("cirrus.backoff.logger.warning") as logger_warn, patch( + "cirrus.backoff.logger.error" ) as logger_error: # keep the side effect to actually put logs, so you can see the format with `-s` logger_warn.side_effect = warn @@ -1541,10 +1541,10 @@ def test_authorized_session_unhandled_exception_retry(test_cloud_manager): mock_config = {"get.side_effect": Exception(MagicMock(), b"test")} test_cloud_manager._authed_session.configure_mock(**mock_config) - warn = cirrus.google_cloud.manager.logger.warn - error = cirrus.google_cloud.manager.logger.error - with patch("cirrus.google_cloud.manager.logger.warn") as logger_warn, patch( - "cirrus.google_cloud.manager.logger.error" + warn = cirrus.backoff.logger.warning + error = cirrus.backoff.logger.error + with patch("cirrus.backoff.logger.warning") as logger_warn, patch( + "cirrus.backoff.logger.error" ) as logger_error: # keep the side effect to actually put logs, so you can see the format with `-s` logger_warn.side_effect = warn