From fd001d53965208094a23a2f4c25811ad87f89821 Mon Sep 17 00:00:00 2001 From: Eirini Koutsaniti Date: Fri, 20 Dec 2024 13:51:04 +0100 Subject: [PATCH 1/9] Add simple error checking in httpjson --- reframe/core/logging.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/reframe/core/logging.py b/reframe/core/logging.py index bf68536c4..b050f03c4 100644 --- a/reframe/core/logging.py +++ b/reframe/core/logging.py @@ -689,10 +689,15 @@ def emit(self, record): return try: - requests.post( + response = requests.post( self._url, data=json_record, headers=self._headers ) + if response.status_code != 200: + raise LoggingError( + f'logging failed: HTTP response code ' + f'{response.status_code}' + ) except requests.exceptions.RequestException as e: raise LoggingError('logging failed') from e From 9bfae8a44a197689304dc06be7bdb5476eb855d7 Mon Sep 17 00:00:00 2001 From: Eirini Koutsaniti Date: Fri, 20 Dec 2024 14:28:02 +0100 Subject: [PATCH 2/9] Handle 429 in httpjson logger --- reframe/core/logging.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/reframe/core/logging.py b/reframe/core/logging.py index b050f03c4..e24d2c28d 100644 --- a/reframe/core/logging.py +++ b/reframe/core/logging.py @@ -689,11 +689,18 @@ def emit(self, record): return try: - response = requests.post( - self._url, data=json_record, - headers=self._headers - ) - if response.status_code != 200: + while True: + response = requests.post( + self._url, data=json_record, + headers=self._headers + ) + if response.status_code == 200: + break + + if response.status_code == 429: + time.sleep(1) + continue + raise LoggingError( f'logging failed: HTTP response code ' f'{response.status_code}' From ffae0ec19e0dbd6cd30165c99cbf6d35927479c7 Mon Sep 17 00:00:00 2001 From: Eirini Koutsaniti Date: Fri, 20 Dec 2024 16:26:29 +0100 Subject: [PATCH 3/9] Fix import --- reframe/core/logging.py | 1 - 1 file changed, 1 deletion(-) diff --git a/reframe/core/logging.py b/reframe/core/logging.py index e24d2c28d..8a83b3942 100644 --- a/reframe/core/logging.py +++ b/reframe/core/logging.py @@ -680,7 +680,6 @@ def emit(self, record): return if self._debug: - import time ts = int(time.time() * 1_000) dump_file = f'httpjson_record_{ts}.json' with open(dump_file, 'w') as fp: From eb01cdefa64c50a0383ccca42aa2faedaa39e71d Mon Sep 17 00:00:00 2001 From: Eirini Koutsaniti Date: Wed, 8 Jan 2025 08:45:06 +0100 Subject: [PATCH 4/9] Small improvements --- reframe/core/logging.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/reframe/core/logging.py b/reframe/core/logging.py index 8a83b3942..5691caa7e 100644 --- a/reframe/core/logging.py +++ b/reframe/core/logging.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: BSD-3-Clause import abc +import itertools import logging import logging.handlers import numbers @@ -688,20 +689,21 @@ def emit(self, record): return try: + sleep_times = itertools.cycle([.1, .2, .4, .8, 1.6, 3.2]) while True: response = requests.post( self._url, data=json_record, headers=self._headers ) - if response.status_code == 200: + if response.ok: break if response.status_code == 429: - time.sleep(1) + time.sleep(next(sleep_times)) continue raise LoggingError( - f'logging failed: HTTP response code ' + f'HTTPJSONhandler logging failed: HTTP response code ' f'{response.status_code}' ) except requests.exceptions.RequestException as e: From 32e6bca32cfb807de36bf31fde365b5a8c59a769 Mon Sep 17 00:00:00 2001 From: Eirini Koutsaniti Date: Wed, 8 Jan 2025 09:10:05 +0100 Subject: [PATCH 5/9] Add warning for logging errors --- reframe/frontend/executors/__init__.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/reframe/frontend/executors/__init__.py b/reframe/frontend/executors/__init__.py index 68d2d223c..8199369c8 100644 --- a/reframe/frontend/executors/__init__.py +++ b/reframe/frontend/executors/__init__.py @@ -20,9 +20,10 @@ import reframe.utility.jsonext as jsonext import reframe.utility.typecheck as typ from reframe.core.exceptions import (AbortTaskError, - JobNotStartedError, FailureLimitError, ForceExitError, + JobNotStartedError, + LoggingError, RunSessionTimeout, SkipTestError, StatisticsError, @@ -480,8 +481,13 @@ def finalize(self): self._current_stage = 'finalize' self._notify_listeners('on_task_success') - self._perflogger.log_performance(logging.INFO, self, - multiline=self._perflog_compat) + try: + self._perflogger.log_performance(logging.INFO, self, + multiline=self._perflog_compat) + except LoggingError as e: + logging.getlogger().warning( + f'could not log performance data for {self.testcase}: {e}' + ) @logging.time_function def cleanup(self, *args, **kwargs): @@ -491,8 +497,13 @@ def fail(self, exc_info=None, callback='on_task_failure'): self._failed_stage = self._current_stage self._exc_info = exc_info or sys.exc_info() self._notify_listeners(callback) - self._perflogger.log_performance(logging.INFO, self, - multiline=self._perflog_compat) + try: + self._perflogger.log_performance(logging.INFO, self, + multiline=self._perflog_compat) + except LoggingError as e: + logging.getlogger().warning( + f'could not log performance data for {self.testcase}: {e}' + ) def skip(self, exc_info=None): self._skipped = True From 958372cbbacecc17b76649be386e5fbd19b7d354 Mon Sep 17 00:00:00 2001 From: Eirini Koutsaniti Date: Tue, 14 Jan 2025 18:32:44 +0100 Subject: [PATCH 6/9] Add sleep_times and timeout parameters in the httpjson logger --- docs/config_reference.rst | 13 +++++++++++++ reframe/core/logging.py | 20 ++++++++++++++++---- reframe/schemas/config.json | 9 ++++++++- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index 5b7be4138..9cdda2375 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -1603,6 +1603,19 @@ This handler transmits the whole log record, meaning that all the information wi .. versionadded:: 4.1 +.. py:attribute:: logging.handlers_perflog..httpjson..sleep_times + + In the case that the http request gets a response 429 (TOO_MANY_REQUESTS), Reframe will cycle over this list of sleep times until it succeeds, gets a different code or exceeds the timeout (when it is set). + + .. versionadded:: 4.7.3 + +.. py:attribute:: logging.handlers_perflog..httpjson..timeout + + In the case that the http request gets a response 429 (TOO_MANY_REQUESTS), Reframe will keep trying until it succeeds, gets a different error or exceeds the timeout (in seconds). + When this parameter is set to 0, Reframe will keep retrying until it gets a different status code. + + .. versionadded:: 4.7.3 + .. _exec-mode-config: diff --git a/reframe/core/logging.py b/reframe/core/logging.py index 5691caa7e..a70acbbb6 100644 --- a/reframe/core/logging.py +++ b/reframe/core/logging.py @@ -555,6 +555,8 @@ def _create_httpjson_handler(site_config, config_prefix): json_formatter = site_config.get(f'{config_prefix}/json_formatter') extra_headers = site_config.get(f'{config_prefix}/extra_headers') debug = site_config.get(f'{config_prefix}/debug') + sleep_times = site_config.get(f'{config_prefix}/sleep_times') + timeout = site_config.get(f'{config_prefix}/timeout') parsed_url = urllib.parse.urlparse(url) if parsed_url.scheme not in {'http', 'https'}: @@ -596,7 +598,7 @@ def _create_httpjson_handler(site_config, config_prefix): 'no data will be sent to the server') return HTTPJSONHandler(url, extras, ignore_keys, json_formatter, - extra_headers, debug) + extra_headers, debug, sleep_times, timeout) def _record_to_json(record, extras, ignore_keys): @@ -646,7 +648,8 @@ class HTTPJSONHandler(logging.Handler): def __init__(self, url, extras=None, ignore_keys=None, json_formatter=None, extra_headers=None, - debug=False): + debug=False, sleep_times=[.1, .2, .4, .8, 1.6, 3.2], + timeout=0): super().__init__() self._url = url self._extras = extras @@ -670,6 +673,8 @@ def __init__(self, url, extras=None, ignore_keys=None, self._headers.update(extra_headers) self._debug = debug + self._timeout = timeout + self._sleep_times = sleep_times def emit(self, record): # Convert tags to a list to make them JSON friendly @@ -688,8 +693,9 @@ def emit(self, record): return + timeout_time = time.time() + self._timeout try: - sleep_times = itertools.cycle([.1, .2, .4, .8, 1.6, 3.2]) + sleep_times = itertools.cycle(self._sleep_times) while True: response = requests.post( self._url, data=json_record, @@ -698,7 +704,13 @@ def emit(self, record): if response.ok: break - if response.status_code == 429: + if ( + response.status_code == 429 and + ( + not self._timeout or + time.time() < timeout_time + ) + ): time.sleep(next(sleep_times)) continue diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index e18b29d49..db9d166fd 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -173,7 +173,12 @@ }, "json_formatter": {}, "extra_headers": {"type": "object"}, - "debug": {"type": "boolean"} + "debug": {"type": "boolean"}, + "sleep_times": { + "type": "array", + "items": {"type": "number"} + }, + "timeout": {"type": "number"} }, "required": ["url"] } @@ -632,6 +637,8 @@ "logging/handlers_perflog/httpjson_json_formatter": null, "logging/handlers_perflog/httpjson_extra_headers": {}, "logging/handlers_perflog/httpjson_debug": false, + "logging/handlers_perflog/httpjson_sleep_times": [0.1, 0.2, 0.4, 0.8, 1.6, 3.2], + "logging/handlers_perflog/httpjson_timeout": 0, "modes/options": [], "modes/target_systems": ["*"], "storage/enable": false, From 2b8adff282f3da94336b09bdb94b6c4ec5fe4b7d Mon Sep 17 00:00:00 2001 From: Eirini Koutsaniti Date: Wed, 15 Jan 2025 11:45:38 +0100 Subject: [PATCH 7/9] Rename sleep_times to sleep_intervals --- docs/config_reference.rst | 4 ++-- reframe/core/logging.py | 12 ++++++------ reframe/schemas/config.json | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index 9cdda2375..25b36dda9 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -1603,9 +1603,9 @@ This handler transmits the whole log record, meaning that all the information wi .. versionadded:: 4.1 -.. py:attribute:: logging.handlers_perflog..httpjson..sleep_times +.. py:attribute:: logging.handlers_perflog..httpjson..sleep_intervals - In the case that the http request gets a response 429 (TOO_MANY_REQUESTS), Reframe will cycle over this list of sleep times until it succeeds, gets a different code or exceeds the timeout (when it is set). + In the case that the http request gets a response 429 (TOO_MANY_REQUESTS), Reframe will cycle over this list of sleep waiting intervals until it gets a different code or exceeds the timeout (when it is set). .. versionadded:: 4.7.3 diff --git a/reframe/core/logging.py b/reframe/core/logging.py index a70acbbb6..8b07d613d 100644 --- a/reframe/core/logging.py +++ b/reframe/core/logging.py @@ -555,7 +555,7 @@ def _create_httpjson_handler(site_config, config_prefix): json_formatter = site_config.get(f'{config_prefix}/json_formatter') extra_headers = site_config.get(f'{config_prefix}/extra_headers') debug = site_config.get(f'{config_prefix}/debug') - sleep_times = site_config.get(f'{config_prefix}/sleep_times') + sleep_intervals = site_config.get(f'{config_prefix}/sleep_intervals') timeout = site_config.get(f'{config_prefix}/timeout') parsed_url = urllib.parse.urlparse(url) @@ -598,7 +598,7 @@ def _create_httpjson_handler(site_config, config_prefix): 'no data will be sent to the server') return HTTPJSONHandler(url, extras, ignore_keys, json_formatter, - extra_headers, debug, sleep_times, timeout) + extra_headers, debug, sleep_intervals, timeout) def _record_to_json(record, extras, ignore_keys): @@ -648,7 +648,7 @@ class HTTPJSONHandler(logging.Handler): def __init__(self, url, extras=None, ignore_keys=None, json_formatter=None, extra_headers=None, - debug=False, sleep_times=[.1, .2, .4, .8, 1.6, 3.2], + debug=False, sleep_intervals=[.1, .2, .4, .8, 1.6, 3.2], timeout=0): super().__init__() self._url = url @@ -674,7 +674,7 @@ def __init__(self, url, extras=None, ignore_keys=None, self._debug = debug self._timeout = timeout - self._sleep_times = sleep_times + self._sleep_intervals = sleep_intervals def emit(self, record): # Convert tags to a list to make them JSON friendly @@ -695,7 +695,7 @@ def emit(self, record): timeout_time = time.time() + self._timeout try: - sleep_times = itertools.cycle(self._sleep_times) + sleep_intervals = itertools.cycle(self._sleep_intervals) while True: response = requests.post( self._url, data=json_record, @@ -711,7 +711,7 @@ def emit(self, record): time.time() < timeout_time ) ): - time.sleep(next(sleep_times)) + time.sleep(next(sleep_intervals)) continue raise LoggingError( diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index db9d166fd..2fbbc4887 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -174,7 +174,7 @@ "json_formatter": {}, "extra_headers": {"type": "object"}, "debug": {"type": "boolean"}, - "sleep_times": { + "sleep_intervals": { "type": "array", "items": {"type": "number"} }, @@ -637,7 +637,7 @@ "logging/handlers_perflog/httpjson_json_formatter": null, "logging/handlers_perflog/httpjson_extra_headers": {}, "logging/handlers_perflog/httpjson_debug": false, - "logging/handlers_perflog/httpjson_sleep_times": [0.1, 0.2, 0.4, 0.8, 1.6, 3.2], + "logging/handlers_perflog/httpjson_sleep_intervals": [0.1, 0.2, 0.4, 0.8, 1.6, 3.2], "logging/handlers_perflog/httpjson_timeout": 0, "modes/options": [], "modes/target_systems": ["*"], From 7bf459d27a01368b35091eb57b53454c21a3f5fa Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 17 Jan 2025 00:12:25 +0100 Subject: [PATCH 8/9] Rename `sleep_intervals` to `backoff_intervals` and enhance docs --- docs/config_reference.rst | 15 ++++++++++----- reframe/core/logging.py | 22 ++++++++-------------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index 25b36dda9..d6c08eb33 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -1603,19 +1603,24 @@ This handler transmits the whole log record, meaning that all the information wi .. versionadded:: 4.1 -.. py:attribute:: logging.handlers_perflog..httpjson..sleep_intervals - In the case that the http request gets a response 429 (TOO_MANY_REQUESTS), Reframe will cycle over this list of sleep waiting intervals until it gets a different code or exceeds the timeout (when it is set). +.. py:attribute:: logging.handlers_perflog..httpjson..backoff_intervals + + List of wait intervals in seconds when server responds with HTTP error 429 (``TOO_MANY_REQUESTS``). + + In this case, ReFrame will retry contacting the server after waiting an amount of time that is determined by cyclically iterating this list of intervals. + + ReFrame will keep trying contacting the server, until a different HTTP resonse is received (either success or error) or the corresponding :attr:`~config.logging.handlers_perflog..httpjson..timeout` is exceeded. .. versionadded:: 4.7.3 + .. py:attribute:: logging.handlers_perflog..httpjson..timeout - In the case that the http request gets a response 429 (TOO_MANY_REQUESTS), Reframe will keep trying until it succeeds, gets a different error or exceeds the timeout (in seconds). - When this parameter is set to 0, Reframe will keep retrying until it gets a different status code. + Timeout in seconds for retrying when server responds with HTTP error 429 (``TOO_MANY_REQUESTS``). - .. versionadded:: 4.7.3 + .. versionadded:: 4.7.3 .. _exec-mode-config: diff --git a/reframe/core/logging.py b/reframe/core/logging.py index 8b07d613d..4b1c9e048 100644 --- a/reframe/core/logging.py +++ b/reframe/core/logging.py @@ -555,7 +555,7 @@ def _create_httpjson_handler(site_config, config_prefix): json_formatter = site_config.get(f'{config_prefix}/json_formatter') extra_headers = site_config.get(f'{config_prefix}/extra_headers') debug = site_config.get(f'{config_prefix}/debug') - sleep_intervals = site_config.get(f'{config_prefix}/sleep_intervals') + backoff_intervals = site_config.get(f'{config_prefix}/backoff_intervals') timeout = site_config.get(f'{config_prefix}/timeout') parsed_url = urllib.parse.urlparse(url) @@ -598,7 +598,7 @@ def _create_httpjson_handler(site_config, config_prefix): 'no data will be sent to the server') return HTTPJSONHandler(url, extras, ignore_keys, json_formatter, - extra_headers, debug, sleep_intervals, timeout) + extra_headers, debug, backoff_intervals, timeout) def _record_to_json(record, extras, ignore_keys): @@ -648,8 +648,7 @@ class HTTPJSONHandler(logging.Handler): def __init__(self, url, extras=None, ignore_keys=None, json_formatter=None, extra_headers=None, - debug=False, sleep_intervals=[.1, .2, .4, .8, 1.6, 3.2], - timeout=0): + debug=False, backoff_intervals=(1, 2, 3), timeout=0): super().__init__() self._url = url self._extras = extras @@ -674,7 +673,7 @@ def __init__(self, url, extras=None, ignore_keys=None, self._debug = debug self._timeout = timeout - self._sleep_intervals = sleep_intervals + self._backoff_intervals = backoff_intervals def emit(self, record): # Convert tags to a list to make them JSON friendly @@ -695,7 +694,7 @@ def emit(self, record): timeout_time = time.time() + self._timeout try: - sleep_intervals = itertools.cycle(self._sleep_intervals) + backoff_intervals = itertools.cycle(self._backoff_intervals) while True: response = requests.post( self._url, data=json_record, @@ -704,14 +703,9 @@ def emit(self, record): if response.ok: break - if ( - response.status_code == 429 and - ( - not self._timeout or - time.time() < timeout_time - ) - ): - time.sleep(next(sleep_intervals)) + if (response.status_code == 429 and + (not self._timeout or time.time() < timeout_time)): + time.sleep(next(backoff_intervals)) continue raise LoggingError( From 347ef6c059ff901eeed348e2d7ea872e83fa9a36 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 17 Jan 2025 00:27:55 +0100 Subject: [PATCH 9/9] Rename also httpjson `timeout` to `retry_timeout` --- docs/config_reference.rst | 10 +++++++++- reframe/core/logging.py | 9 +++++---- reframe/schemas/config.json | 8 ++++---- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index d6c08eb33..f8185dc41 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -1606,6 +1606,9 @@ This handler transmits the whole log record, meaning that all the information wi .. py:attribute:: logging.handlers_perflog..httpjson..backoff_intervals + :required: No + :default: ``[0.1, 0.2, 0.4, 0.8, 1.6, 3.2]`` + List of wait intervals in seconds when server responds with HTTP error 429 (``TOO_MANY_REQUESTS``). In this case, ReFrame will retry contacting the server after waiting an amount of time that is determined by cyclically iterating this list of intervals. @@ -1615,10 +1618,15 @@ This handler transmits the whole log record, meaning that all the information wi .. versionadded:: 4.7.3 -.. py:attribute:: logging.handlers_perflog..httpjson..timeout +.. py:attribute:: logging.handlers_perflog..httpjson..retry_timeout + + :required: No + :default: ``0`` Timeout in seconds for retrying when server responds with HTTP error 429 (``TOO_MANY_REQUESTS``). + If set to zero, ReFrame will retry until another HTTP response (success or error) is received. + .. versionadded:: 4.7.3 diff --git a/reframe/core/logging.py b/reframe/core/logging.py index 4b1c9e048..aefac32dd 100644 --- a/reframe/core/logging.py +++ b/reframe/core/logging.py @@ -556,7 +556,7 @@ def _create_httpjson_handler(site_config, config_prefix): extra_headers = site_config.get(f'{config_prefix}/extra_headers') debug = site_config.get(f'{config_prefix}/debug') backoff_intervals = site_config.get(f'{config_prefix}/backoff_intervals') - timeout = site_config.get(f'{config_prefix}/timeout') + retry_timeout = site_config.get(f'{config_prefix}/retry_timeout') parsed_url = urllib.parse.urlparse(url) if parsed_url.scheme not in {'http', 'https'}: @@ -598,7 +598,8 @@ def _create_httpjson_handler(site_config, config_prefix): 'no data will be sent to the server') return HTTPJSONHandler(url, extras, ignore_keys, json_formatter, - extra_headers, debug, backoff_intervals, timeout) + extra_headers, debug, backoff_intervals, + retry_timeout) def _record_to_json(record, extras, ignore_keys): @@ -648,7 +649,7 @@ class HTTPJSONHandler(logging.Handler): def __init__(self, url, extras=None, ignore_keys=None, json_formatter=None, extra_headers=None, - debug=False, backoff_intervals=(1, 2, 3), timeout=0): + debug=False, backoff_intervals=(1, 2, 3), retry_timeout=0): super().__init__() self._url = url self._extras = extras @@ -672,7 +673,7 @@ def __init__(self, url, extras=None, ignore_keys=None, self._headers.update(extra_headers) self._debug = debug - self._timeout = timeout + self._timeout = retry_timeout self._backoff_intervals = backoff_intervals def emit(self, record): diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index 2fbbc4887..2f5ce8c90 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -174,11 +174,11 @@ "json_formatter": {}, "extra_headers": {"type": "object"}, "debug": {"type": "boolean"}, - "sleep_intervals": { + "backoff_intervals": { "type": "array", "items": {"type": "number"} }, - "timeout": {"type": "number"} + "retry_timeout": {"type": "number"} }, "required": ["url"] } @@ -637,8 +637,8 @@ "logging/handlers_perflog/httpjson_json_formatter": null, "logging/handlers_perflog/httpjson_extra_headers": {}, "logging/handlers_perflog/httpjson_debug": false, - "logging/handlers_perflog/httpjson_sleep_intervals": [0.1, 0.2, 0.4, 0.8, 1.6, 3.2], - "logging/handlers_perflog/httpjson_timeout": 0, + "logging/handlers_perflog/httpjson_backoff_intervals": [0.1, 0.2, 0.4, 0.8, 1.6, 3.2], + "logging/handlers_perflog/httpjson_retry_timeout": 0, "modes/options": [], "modes/target_systems": ["*"], "storage/enable": false,