diff --git a/docs/config_reference.rst b/docs/config_reference.rst index 5b7be4138..f8185dc41 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -1604,6 +1604,32 @@ This handler transmits the whole log record, meaning that all the information wi .. versionadded:: 4.1 +.. py:attribute:: logging.handlers_perflog..httpjson..backoff_intervals + + :required: No + :default: ``[0.1, 0.2, 0.4, 0.8, 1.6, 3.2]`` + + List of wait intervals in seconds when server responds with HTTP error 429 (``TOO_MANY_REQUESTS``). + + In this case, ReFrame will retry contacting the server after waiting an amount of time that is determined by cyclically iterating this list of intervals. + + ReFrame will keep trying contacting the server, until a different HTTP resonse is received (either success or error) or the corresponding :attr:`~config.logging.handlers_perflog..httpjson..timeout` is exceeded. + + .. versionadded:: 4.7.3 + + +.. py:attribute:: logging.handlers_perflog..httpjson..retry_timeout + + :required: No + :default: ``0`` + + Timeout in seconds for retrying when server responds with HTTP error 429 (``TOO_MANY_REQUESTS``). + + If set to zero, ReFrame will retry until another HTTP response (success or error) is received. + + + .. versionadded:: 4.7.3 + .. _exec-mode-config: diff --git a/reframe/core/logging.py b/reframe/core/logging.py index bf68536c4..aefac32dd 100644 --- a/reframe/core/logging.py +++ b/reframe/core/logging.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: BSD-3-Clause import abc +import itertools import logging import logging.handlers import numbers @@ -554,6 +555,8 @@ def _create_httpjson_handler(site_config, config_prefix): json_formatter = site_config.get(f'{config_prefix}/json_formatter') extra_headers = site_config.get(f'{config_prefix}/extra_headers') debug = site_config.get(f'{config_prefix}/debug') + backoff_intervals = site_config.get(f'{config_prefix}/backoff_intervals') + retry_timeout = site_config.get(f'{config_prefix}/retry_timeout') parsed_url = urllib.parse.urlparse(url) if parsed_url.scheme not in {'http', 'https'}: @@ -595,7 +598,8 @@ def _create_httpjson_handler(site_config, config_prefix): 'no data will be sent to the server') return HTTPJSONHandler(url, extras, ignore_keys, json_formatter, - extra_headers, debug) + extra_headers, debug, backoff_intervals, + retry_timeout) def _record_to_json(record, extras, ignore_keys): @@ -645,7 +649,7 @@ class HTTPJSONHandler(logging.Handler): def __init__(self, url, extras=None, ignore_keys=None, json_formatter=None, extra_headers=None, - debug=False): + debug=False, backoff_intervals=(1, 2, 3), retry_timeout=0): super().__init__() self._url = url self._extras = extras @@ -669,6 +673,8 @@ def __init__(self, url, extras=None, ignore_keys=None, self._headers.update(extra_headers) self._debug = debug + self._timeout = retry_timeout + self._backoff_intervals = backoff_intervals def emit(self, record): # Convert tags to a list to make them JSON friendly @@ -680,7 +686,6 @@ def emit(self, record): return if self._debug: - import time ts = int(time.time() * 1_000) dump_file = f'httpjson_record_{ts}.json' with open(dump_file, 'w') as fp: @@ -688,11 +693,26 @@ def emit(self, record): return + timeout_time = time.time() + self._timeout try: - requests.post( - self._url, data=json_record, - headers=self._headers - ) + backoff_intervals = itertools.cycle(self._backoff_intervals) + while True: + response = requests.post( + self._url, data=json_record, + headers=self._headers + ) + if response.ok: + break + + if (response.status_code == 429 and + (not self._timeout or time.time() < timeout_time)): + time.sleep(next(backoff_intervals)) + continue + + raise LoggingError( + f'HTTPJSONhandler logging failed: HTTP response code ' + f'{response.status_code}' + ) except requests.exceptions.RequestException as e: raise LoggingError('logging failed') from e diff --git a/reframe/frontend/executors/__init__.py b/reframe/frontend/executors/__init__.py index 68d2d223c..8199369c8 100644 --- a/reframe/frontend/executors/__init__.py +++ b/reframe/frontend/executors/__init__.py @@ -20,9 +20,10 @@ import reframe.utility.jsonext as jsonext import reframe.utility.typecheck as typ from reframe.core.exceptions import (AbortTaskError, - JobNotStartedError, FailureLimitError, ForceExitError, + JobNotStartedError, + LoggingError, RunSessionTimeout, SkipTestError, StatisticsError, @@ -480,8 +481,13 @@ def finalize(self): self._current_stage = 'finalize' self._notify_listeners('on_task_success') - self._perflogger.log_performance(logging.INFO, self, - multiline=self._perflog_compat) + try: + self._perflogger.log_performance(logging.INFO, self, + multiline=self._perflog_compat) + except LoggingError as e: + logging.getlogger().warning( + f'could not log performance data for {self.testcase}: {e}' + ) @logging.time_function def cleanup(self, *args, **kwargs): @@ -491,8 +497,13 @@ def fail(self, exc_info=None, callback='on_task_failure'): self._failed_stage = self._current_stage self._exc_info = exc_info or sys.exc_info() self._notify_listeners(callback) - self._perflogger.log_performance(logging.INFO, self, - multiline=self._perflog_compat) + try: + self._perflogger.log_performance(logging.INFO, self, + multiline=self._perflog_compat) + except LoggingError as e: + logging.getlogger().warning( + f'could not log performance data for {self.testcase}: {e}' + ) def skip(self, exc_info=None): self._skipped = True diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index e18b29d49..2f5ce8c90 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -173,7 +173,12 @@ }, "json_formatter": {}, "extra_headers": {"type": "object"}, - "debug": {"type": "boolean"} + "debug": {"type": "boolean"}, + "backoff_intervals": { + "type": "array", + "items": {"type": "number"} + }, + "retry_timeout": {"type": "number"} }, "required": ["url"] } @@ -632,6 +637,8 @@ "logging/handlers_perflog/httpjson_json_formatter": null, "logging/handlers_perflog/httpjson_extra_headers": {}, "logging/handlers_perflog/httpjson_debug": false, + "logging/handlers_perflog/httpjson_backoff_intervals": [0.1, 0.2, 0.4, 0.8, 1.6, 3.2], + "logging/handlers_perflog/httpjson_retry_timeout": 0, "modes/options": [], "modes/target_systems": ["*"], "storage/enable": false,