-
Notifications
You must be signed in to change notification settings - Fork 4
Check the status of providers if they provide a status check endpoint #87
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
"""Test suite handler.""" | ||
|
||
import os | ||
import json | ||
import logging | ||
|
@@ -32,6 +33,7 @@ | |
from jsontas.jsontas import JsonTas | ||
import opentelemetry | ||
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator | ||
from requests.exceptions import ConnectionError as RequestsConnectionError, HTTPError | ||
|
||
from .esr_parameters import ESRParameters | ||
from .exceptions import EnvironmentProviderException | ||
|
@@ -121,6 +123,51 @@ def outcome(self) -> dict: | |
return self.test_suite_finished.get("data", {}).get("testSuiteOutcome", {}) | ||
return {} | ||
|
||
def _environment_active(self) -> bool: | ||
"""Check the status of the test environment.""" | ||
status_requests = ( | ||
( | ||
self.environment["iut"].get("provider_id", "iut_provider"), | ||
self.environment["iut"].get("status_request"), | ||
), | ||
( | ||
self.environment["executor"].get("provider_id", "execution_space_provider"), | ||
self.environment["executor"].get("status_request"), | ||
), | ||
( | ||
self.environment["log_area"].get("provider_id", "log_area_provider"), | ||
self.environment["log_area"].get("status_request"), | ||
), | ||
) | ||
executor = Executor(self.etos) | ||
for name, request in status_requests: | ||
if request is None: | ||
continue | ||
try: | ||
response = executor.do_request(request) | ||
response.json() | ||
except RequestsConnectionError: | ||
self.logger.exception( | ||
"Could not communicate with the status check endpoint for %r (%r)", | ||
name, | ||
request.get("url"), | ||
) | ||
return False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should an error like this return None, to indicate that health check was inconclusive? So that a few more retries could be done? In other words, False should be returned only when we know for sure that the environment is inactive. |
||
except HTTPError as http_error: | ||
self.logger.error( | ||
"Got error code %r when communicating with %r (%r)", | ||
http_error.response.status_code, | ||
name, | ||
request.get("url"), | ||
) | ||
return False | ||
except json.JSONDecodeError: | ||
t-persson marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.logger.exception( | ||
"Failed to parse the response from for %r (%r)", name, request.get("url") | ||
) | ||
return False | ||
return True | ||
|
||
def _start(self, identifier: str) -> None: | ||
"""Start ETR for this sub suite. | ||
|
||
|
@@ -156,6 +203,12 @@ def _start(self, identifier: str) -> None: | |
if self.finished: | ||
self.logger.info("ETOS test runner has finished", extra={"user_log": True}) | ||
break | ||
if not self._environment_active(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there risk of a race condition during termination, when self.finished is still False, but self._environment_active() already returns False? |
||
self.logger.error( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be written to the OpenTelemetry span? |
||
"ETOS test environment is no longer active! Canceling.", | ||
extra={"user_log": True}, | ||
) | ||
break | ||
finally: | ||
self.release(identifier) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will it be robust enough if one failed check leads to termination? Even if there are retries.