From 6b67d19eb01aae1c5822e2d16487f9a34bf5f895 Mon Sep 17 00:00:00 2001 From: Stelios Voutsinas Date: Fri, 17 Jan 2025 11:34:08 -0700 Subject: [PATCH] More information in get_endpoint exceptions and small docstring fixes Added some more test cases for TAP error handling --- CHANGES.rst | 2 + pyvo/dal/query.py | 5 +- pyvo/dal/tap.py | 24 ++-- pyvo/dal/tests/test_tap.py | 250 +++++++++++++++++++++++++++++++++++++ pyvo/dal/vosi.py | 90 +++++++++++-- pyvo/io/vosi/endpoint.py | 11 ++ 6 files changed, 362 insertions(+), 20 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 6232c8aa..091ada93 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,8 @@ Enhancements and Fixes - Make deletion of TAP jobs optional via a new ``delete`` kwarg. [#640] +- Provide more informative exception message when requests to endpoints fail. [#641] + - Change AsyncTAPJob.result to return None if no result is found explicitly [#644] diff --git a/pyvo/dal/query.py b/pyvo/dal/query.py index b56e9caf..e135ffcb 100644 --- a/pyvo/dal/query.py +++ b/pyvo/dal/query.py @@ -20,7 +20,8 @@ standard data model. Usually the field names are used to uniquely identify table columns. """ -__all__ = ["DALService", "DALQuery", "DALResults", "Record"] +__all__ = ["DALService", "DALServiceError", "DALQuery", "DALQueryError", + "DALResults", "Record"] import os import shutil @@ -64,7 +65,7 @@ def __init__(self, baseurl, *, session=None, capability_description=None,): the base URL that should be used for forming queries to the service. session : object optional session to use for network requests - description : str, optional + capability_description : str, optional the description of the service. """ self._baseurl = baseurl diff --git a/pyvo/dal/tap.py b/pyvo/dal/tap.py index d4547007..365002a6 100644 --- a/pyvo/dal/tap.py +++ b/pyvo/dal/tap.py @@ -118,14 +118,18 @@ def __init__(self, baseurl, *, capability_description=None, session=None): session : object optional session to use for network requests """ - super().__init__(baseurl, session=session, capability_description=capability_description) - - # Check if the session has an update_from_capabilities attribute. - # This means that the session is aware of IVOA capabilities, - # and can use this information in processing network requests. - # One such use case for this is auth. - if hasattr(self._session, 'update_from_capabilities'): - self._session.update_from_capabilities(self.capabilities) + try: + super().__init__(baseurl, session=session, capability_description=capability_description) + + # Check if the session has an update_from_capabilities attribute. + # This means that the session is aware of IVOA capabilities, + # and can use this information in processing network requests. + # One such use case for this is auth. + if hasattr(self._session, 'update_from_capabilities'): + self._session.update_from_capabilities(self.capabilities) + except DALServiceError as e: + raise DALServiceError(f"Cannot find TAP service at '" + f"{baseurl}'.\n\n{str(e)}") from None def get_tap_capability(self): """ @@ -373,8 +377,6 @@ def create_query( Parameters ---------- - baseurl : str - the base URL for the TAP service query : str the query string / parameters mode : str @@ -943,6 +945,8 @@ def wait(self, *, phases=None, timeout=600.): ---------- phases : list phases to wait for + timeout : float + maximum time to wait in seconds Raises ------ diff --git a/pyvo/dal/tests/test_tap.py b/pyvo/dal/tests/test_tap.py index e5e1adbe..aadc70db 100644 --- a/pyvo/dal/tests/test_tap.py +++ b/pyvo/dal/tests/test_tap.py @@ -841,6 +841,65 @@ def test_job_with_empty_error(self): assert "" in str(excinfo.value) + @pytest.mark.usefixtures('async_fixture') + def test_endpoint_503_with_retry_after(self): + service = TAPService('http://example.com/tap') + + with requests_mock.Mocker() as rm: + rm.get('http://example.com/tap/capabilities', + status_code=503, + headers={'Retry-After': '30'}, + text='Service temporarily unavailable') + + rm.get('http://example.com/capabilities', + status_code=404) + + with pytest.raises(DALServiceError) as excinfo: + service._get_endpoint('capabilities') + + error_msg = str(excinfo.value) + assert "503 Service Unavailable (Retry-After: 30)" in error_msg + assert "404 Not Found" in error_msg + + @pytest.mark.usefixtures('async_fixture') + def test_endpoint_case_insensitive_retry_after(self): + service = TAPService('http://example.com/tap') + + with requests_mock.Mocker() as rm: + rm.get('http://example.com/tap/capabilities', + status_code=503, + headers={'RETRY-AFTER': '60'}, + text='Service temporarily unavailable') + rm.get('http://example.com/capabilities', + status_code=404) + + with pytest.raises(DALServiceError) as excinfo: + service._get_endpoint('capabilities') + + error_msg = str(excinfo.value) + assert "503 Service Unavailable (Retry-After: 60)" in error_msg + + @pytest.mark.usefixtures('async_fixture') + def test_endpoint_stops_on_server_error(self): + service = TAPService('http://example.com/tap') + + with requests_mock.Mocker() as rm: + first_url = rm.get('http://example.com/tap/capabilities', + status_code=500, + text='Internal Server Error') + + second_url = rm.get('http://example.com/capabilities', + text='Success') + + with pytest.raises(DALServiceError) as excinfo: + service._get_endpoint('capabilities') + + assert first_url.call_count == 1 + assert second_url.call_count == 0 + + error_msg = str(excinfo.value) + assert "HTTP Code: 500" in error_msg + @pytest.mark.usefixtures("tapservice") class TestTAPCapabilities: @@ -908,6 +967,197 @@ def test_get_endpoint_candidates(): assert svc._get_endpoint_candidates("capabilities") == expected_urls +def test_timeout_error(): + service = TAPService('http://example.com/tap') + + with requests_mock.Mocker() as rm: + rm.register_uri( + 'GET', + 'http://example.com/tap/capabilities', + exc=requests.Timeout("Request timed out") + ) + rm.register_uri( + 'GET', + 'http://example.com/capabilities', + exc=requests.Timeout("Request timed out") + ) + + with pytest.raises(DALServiceError) as excinfo: + _ = service.capabilities + + error_message = str(excinfo.value) + assert "Request timed out" in error_message + + +def test_generic_request_exception(): + service = TAPService('http://example.com/tap') + + with requests_mock.Mocker() as rm: + rm.register_uri( + 'GET', + 'http://example.com/tap/capabilities', + exc=requests.RequestException("Some request error") + ) + rm.register_uri( + 'GET', + 'http://example.com/capabilities', + exc=requests.RequestException("Some request error") + ) + + with pytest.raises(DALServiceError) as excinfo: + _ = service.capabilities + + error_message = str(excinfo.value) + assert "Some request error" in error_message + + +def test_unexpected_exception(): + service = TAPService('http://example.com/tap') + + class CustomException(Exception): + pass + + with requests_mock.Mocker() as rm: + rm.register_uri( + 'GET', + 'http://example.com/tap/capabilities', + exc=CustomException("Unexpected error occurred") + ) + rm.register_uri( + 'GET', + 'http://example.com/capabilities', + exc=CustomException("Unexpected error occurred") + ) + + with pytest.raises(DALServiceError) as excinfo: + _ = service.capabilities + + error_message = str(excinfo.value) + assert "Unable to access the capabilities endpoint at:" in error_message + + +def test_tap_service_initialization_error(): + with requests_mock.Mocker() as rm: + rm.register_uri( + 'GET', + 'http://example.com/tap/capabilities', + status_code=404 + ) + rm.register_uri( + 'GET', + 'http://example.com/capabilities', + status_code=404 + ) + + service = TAPService('http://example.com/tap') + with pytest.raises(DALServiceError) as excinfo: + _ = service.capabilities + + error_message = str(excinfo.value) + assert "Unable to access the capabilities endpoint at:" in error_message + assert "404" in error_message + + +def test_endpoint_connection_errors(): + service = TAPService('http://example.com/tap') + + with requests_mock.Mocker() as rm: + rm.register_uri( + 'GET', + 'http://example.com/tap/capabilities', + status_code=404 + ) + rm.register_uri( + 'GET', + 'http://example.com/capabilities', + status_code=404 + ) + + with pytest.raises(DALServiceError) as excinfo: + _ = service.capabilities + + error_message = str(excinfo.value) + assert "Unable to access the capabilities endpoint at:" in error_message + assert "404" in error_message + assert "The service URL is incorrect" in error_message + + +def test_invalid_tap_url(): + service = TAPService('not-a-url') + + with requests_mock.Mocker() as rm: + rm.register_uri( + 'GET', + requests_mock.ANY, + exc=requests.exceptions.ConnectionError( + "Failed to establish connection") + ) + + with pytest.raises(DALServiceError) as excinfo: + _ = service.capabilities + + error_message = str(excinfo.value) + assert "Unable to access the capabilities endpoint at:" in error_message + assert "Connection failed" in error_message + + +def test_http_error_responses(): + error_codes = { + 403: "Forbidden", + 500: "Internal Server Error", + 502: "Bad Gateway", + 503: "Service Unavailable" + } + + for code, reason in error_codes.items(): + with requests_mock.Mocker() as rm: + rm.register_uri( + 'GET', + 'http://example.com/tap/capabilities', + status_code=code, + reason=reason + ) + rm.register_uri( + 'GET', + 'http://example.com/capabilities', + status_code=code, + reason=reason + ) + + service = TAPService('http://example.com/tap') + with pytest.raises(DALServiceError) as excinfo: + _ = service.capabilities + + error_message = str(excinfo.value) + assert "Unable to access the capabilities endpoint at:" in error_message + assert f"{code} {reason}" in error_message + + +def test_network_error(): + service = TAPService('http://example.com/tap') + + with requests_mock.Mocker() as rm: + rm.register_uri( + 'GET', + 'http://example.com/tap/capabilities', + exc=requests.exceptions.ConnectionError( + "Failed to establish connection") + ) + rm.register_uri( + 'GET', + 'http://example.com/capabilities', + exc=requests.exceptions.ConnectionError( + "Failed to establish connection") + ) + + with pytest.raises(DALServiceError) as excinfo: + _ = service.capabilities + + error_message = str(excinfo.value) + assert "Unable to access the capabilities endpoint at:" in error_message + assert "Connection failed" in error_message + + @pytest.mark.remote_data @pytest.mark.parametrize('stream_type', [BytesIO, StringIO]) def test_tap_upload_remote(stream_type): diff --git a/pyvo/dal/vosi.py b/pyvo/dal/vosi.py index c5367f0c..2b6f34fe 100644 --- a/pyvo/dal/vosi.py +++ b/pyvo/dal/vosi.py @@ -19,6 +19,7 @@ class EndpointMixin(): def _get_endpoint_candidates(self, endpoint): + """Construct endpoint URLs from base URL and endpoint""" urlcomp = urlparse(self.baseurl) # Include the port number if present netloc = urlcomp.hostname @@ -31,18 +32,92 @@ def _get_endpoint_candidates(self, endpoint): return [f'{curated_baseurl}/{endpoint}', url_sibling(curated_baseurl, endpoint)] + @staticmethod + def _build_error_message(endpoint, attempted_urls): + """Build error message from attempted URLs and their failures""" + return ( + f"Unable to access the {endpoint} endpoint at:\n" + f"{chr(10).join(attempted_urls)}\n\n" + f"This could mean:\n" + f"1. The service URL is incorrect\n" + f"2. The service is temporarily unavailable\n" + f"3. The service doesn't support this protocol\n" + f"4. If a 503 was encountered, retry after the suggested delay.\n" + ) + + @staticmethod + def _get_response_body(response): + """Extract response body for error messagess""" + + max_length = 500 + # Truncate if too long (500 chars), to avoid overwhelming the user + if response is None or not response.headers.get("Content-Type", + "").startswith("text"): + return "" + + return f" Response body: {response.text[:max_length]}" + + def _handle_http_error(self, error, url, attempted_urls): + """Handle HTTP errors and update attempted_urls list""" + + response_body = self._get_response_body(error.response) + + if error.response.status_code == 503: + # Handle Retry-After header, if present + retry_after = None + for header in error.response.headers: + if header.lower() == 'retry-after': + retry_after = error.response.headers[header] + break + + if retry_after: + attempted_urls.append( + f"- {url}: 503 Service Unavailable (Retry-After: " + f"{retry_after}){response_body}") + return True + elif error.response.status_code == 404: + attempted_urls.append(f"- {url}: 404 Not Found") + return True + + status_code = error.response.status_code + attempted_urls.append( + f"- {url}: HTTP Code: {status_code} " + f"{error.response.reason}{response_body}") + return False + def _get_endpoint(self, endpoint): - for ep_url in self._get_endpoint_candidates(endpoint): + """Attempt to connect to service endpoint""" + attempted_urls = [] + try: + candidates = self._get_endpoint_candidates(endpoint) + except (ValueError, AttributeError) as e: + raise DALServiceError( + f"Cannot construct endpoint URL from base '{self.baseurl}' and endpoint '{endpoint}'. " + f"{type(e).__name__}: {str(e)}" + ) from e + + for ep_url in candidates: try: response = self._session.get(ep_url, stream=True) response.raise_for_status() + return response.raw + except requests.HTTPError as e: + if not self._handle_http_error(e, ep_url, attempted_urls): + break + except requests.ConnectionError: + attempted_urls.append( + f"- {ep_url}: Connection failed " + f"(Possible causes: incorrect URL, DNS issue, or service is down)") + break + except requests.Timeout: + attempted_urls.append( + f"- {ep_url}: Request timed out (Service may be overloaded or slow)") + except (requests.RequestException, Exception) as e: + attempted_urls.append(f"- {ep_url}: {str(e)}") break - except requests.RequestException: - continue - else: - raise DALServiceError(f"No working {endpoint} endpoint provided") - return response.raw + raise DALServiceError( + self._build_error_message(endpoint, attempted_urls)) @deprecated(since="1.5") @@ -88,8 +163,7 @@ class CapabilityMixin(EndpointMixin): @stream_decode_content def _capabilities(self): """ - Returns capabilities as a - py:class:`~pyvo.io.vosi.availability.Availability` object + Retrieve the raw capabilities document from the service. """ return self._get_endpoint('capabilities') diff --git a/pyvo/io/vosi/endpoint.py b/pyvo/io/vosi/endpoint.py index 00095b0a..4340cdf2 100644 --- a/pyvo/io/vosi/endpoint.py +++ b/pyvo/io/vosi/endpoint.py @@ -67,6 +67,9 @@ def parse_tables(source, *, pedantic=None, filename=None, then *source* will be used as a filename for error messages. Therefore, *filename* is only required when source is a file-like object. + _debug_python_based_parser : bool, optional + If `True`, use the Python-based parser. This is useful for + debugging purposes. Defaults to False. Returns ------- @@ -113,6 +116,9 @@ def parse_capabilities(source, *, pedantic=None, filename=None, then *source* will be used as a filename for error messages. Therefore, *filename* is only required when source is a file-like object. + _debug_python_based_parser : bool, optional + If `True`, use the Python-based parser. This is useful for + debugging purposes. Defaults to False. Returns ------- @@ -159,6 +165,11 @@ def parse_availability(source, *, pedantic=None, filename=None, then *source* will be used as a filename for error messages. Therefore, *filename* is only required when source is a file-like object. + _debug_python_based_parser : bool, optional + If `True`, use the Python-based parser. This is useful for + debugging purposes. Defaults to False. + + Returns -------