Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[bugfix] Check response status in the httpjson log handler and retry in case of TOO_MANY_REQUESTS #3356

Merged
merged 11 commits into from
Jan 17, 2025
18 changes: 18 additions & 0 deletions docs/config_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1604,6 +1604,24 @@ This handler transmits the whole log record, meaning that all the information wi
.. versionadded:: 4.1


.. py:attribute:: logging.handlers_perflog..httpjson..backoff_intervals

List of wait intervals in seconds when server responds with HTTP error 429 (``TOO_MANY_REQUESTS``).

In this case, ReFrame will retry contacting the server after waiting an amount of time that is determined by cyclically iterating this list of intervals.

ReFrame will keep trying contacting the server, until a different HTTP resonse is received (either success or error) or the corresponding :attr:`~config.logging.handlers_perflog..httpjson..timeout` is exceeded.

.. versionadded:: 4.7.3


.. py:attribute:: logging.handlers_perflog..httpjson..timeout

Timeout in seconds for retrying when server responds with HTTP error 429 (``TOO_MANY_REQUESTS``).


.. versionadded:: 4.7.3


.. _exec-mode-config:

Expand Down
33 changes: 26 additions & 7 deletions reframe/core/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# SPDX-License-Identifier: BSD-3-Clause

import abc
import itertools
import logging
import logging.handlers
import numbers
Expand Down Expand Up @@ -554,6 +555,8 @@ def _create_httpjson_handler(site_config, config_prefix):
json_formatter = site_config.get(f'{config_prefix}/json_formatter')
extra_headers = site_config.get(f'{config_prefix}/extra_headers')
debug = site_config.get(f'{config_prefix}/debug')
backoff_intervals = site_config.get(f'{config_prefix}/backoff_intervals')
timeout = site_config.get(f'{config_prefix}/timeout')

parsed_url = urllib.parse.urlparse(url)
if parsed_url.scheme not in {'http', 'https'}:
Expand Down Expand Up @@ -595,7 +598,7 @@ def _create_httpjson_handler(site_config, config_prefix):
'no data will be sent to the server')

return HTTPJSONHandler(url, extras, ignore_keys, json_formatter,
extra_headers, debug)
extra_headers, debug, backoff_intervals, timeout)


def _record_to_json(record, extras, ignore_keys):
Expand Down Expand Up @@ -645,7 +648,7 @@ class HTTPJSONHandler(logging.Handler):

def __init__(self, url, extras=None, ignore_keys=None,
json_formatter=None, extra_headers=None,
debug=False):
debug=False, backoff_intervals=(1, 2, 3), timeout=0):
super().__init__()
self._url = url
self._extras = extras
Expand All @@ -669,6 +672,8 @@ def __init__(self, url, extras=None, ignore_keys=None,
self._headers.update(extra_headers)

self._debug = debug
self._timeout = timeout
self._backoff_intervals = backoff_intervals

def emit(self, record):
# Convert tags to a list to make them JSON friendly
Expand All @@ -680,19 +685,33 @@ def emit(self, record):
return

if self._debug:
import time
ts = int(time.time() * 1_000)
dump_file = f'httpjson_record_{ts}.json'
with open(dump_file, 'w') as fp:
fp.write(json_record)

return

timeout_time = time.time() + self._timeout
try:
requests.post(
self._url, data=json_record,
headers=self._headers
)
backoff_intervals = itertools.cycle(self._backoff_intervals)
while True:
response = requests.post(
self._url, data=json_record,
headers=self._headers
)
if response.ok:
break

if (response.status_code == 429 and
(not self._timeout or time.time() < timeout_time)):
time.sleep(next(backoff_intervals))
continue

raise LoggingError(
f'HTTPJSONhandler logging failed: HTTP response code '
f'{response.status_code}'
)
except requests.exceptions.RequestException as e:
raise LoggingError('logging failed') from e

Expand Down
21 changes: 16 additions & 5 deletions reframe/frontend/executors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@
import reframe.utility.jsonext as jsonext
import reframe.utility.typecheck as typ
from reframe.core.exceptions import (AbortTaskError,
JobNotStartedError,
FailureLimitError,
ForceExitError,
JobNotStartedError,
LoggingError,
RunSessionTimeout,
SkipTestError,
StatisticsError,
Expand Down Expand Up @@ -480,8 +481,13 @@ def finalize(self):

self._current_stage = 'finalize'
self._notify_listeners('on_task_success')
self._perflogger.log_performance(logging.INFO, self,
multiline=self._perflog_compat)
try:
self._perflogger.log_performance(logging.INFO, self,
multiline=self._perflog_compat)
except LoggingError as e:
logging.getlogger().warning(
f'could not log performance data for {self.testcase}: {e}'
)

@logging.time_function
def cleanup(self, *args, **kwargs):
Expand All @@ -491,8 +497,13 @@ def fail(self, exc_info=None, callback='on_task_failure'):
self._failed_stage = self._current_stage
self._exc_info = exc_info or sys.exc_info()
self._notify_listeners(callback)
self._perflogger.log_performance(logging.INFO, self,
multiline=self._perflog_compat)
try:
self._perflogger.log_performance(logging.INFO, self,
multiline=self._perflog_compat)
except LoggingError as e:
logging.getlogger().warning(
f'could not log performance data for {self.testcase}: {e}'
)

def skip(self, exc_info=None):
self._skipped = True
Expand Down
9 changes: 8 additions & 1 deletion reframe/schemas/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,12 @@
},
"json_formatter": {},
"extra_headers": {"type": "object"},
"debug": {"type": "boolean"}
"debug": {"type": "boolean"},
"sleep_intervals": {
"type": "array",
"items": {"type": "number"}
},
"timeout": {"type": "number"}
},
"required": ["url"]
}
Expand Down Expand Up @@ -632,6 +637,8 @@
"logging/handlers_perflog/httpjson_json_formatter": null,
"logging/handlers_perflog/httpjson_extra_headers": {},
"logging/handlers_perflog/httpjson_debug": false,
"logging/handlers_perflog/httpjson_sleep_intervals": [0.1, 0.2, 0.4, 0.8, 1.6, 3.2],
"logging/handlers_perflog/httpjson_timeout": 0,
"modes/options": [],
"modes/target_systems": ["*"],
"storage/enable": false,
Expand Down
Loading