From 95b0074a4ff83ba4b0e44f9a9cb5b0f2d94abd18 Mon Sep 17 00:00:00 2001 From: Miles Yucht Date: Wed, 11 Sep 2024 14:29:33 +0200 Subject: [PATCH 1/9] fix --- databricks/sdk/errors/base.py | 1 + tests/test_errors.py | 1 + 2 files changed, 2 insertions(+) diff --git a/databricks/sdk/errors/base.py b/databricks/sdk/errors/base.py index 973c3644e..a39099ec5 100644 --- a/databricks/sdk/errors/base.py +++ b/databricks/sdk/errors/base.py @@ -85,6 +85,7 @@ def __init__(self, message = f"{scimType} {message}".strip(" ") error_code = f"SCIM_{status}" super().__init__(message if message else error) + self.message = message self.error_code = error_code self.retry_after_secs = retry_after_secs self.details = [ErrorDetail.from_dict(detail) for detail in details] if details else [] diff --git a/tests/test_errors.py b/tests/test_errors.py index 2e19ec897..2125eac5c 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -124,3 +124,4 @@ def test_get_api_error(response, expected_error, expected_message): raise errors.get_api_error(response) assert isinstance(e.value, expected_error) assert str(e.value) == expected_message + assert e.value.message == expected_message From 35b1a3dd6b3fe1e3ba90812a740a8e18a0512d5d Mon Sep 17 00:00:00 2001 From: Miles Yucht Date: Thu, 12 Sep 2024 11:59:42 +0200 Subject: [PATCH 2/9] more work --- databricks/sdk/core.py | 46 ++++++--------- databricks/sdk/errors/__init__.py | 2 +- databricks/sdk/errors/base.py | 1 - databricks/sdk/errors/parser.py | 94 +++++++++++++++++++++++-------- tests/test_errors.py | 1 - 5 files changed, 89 insertions(+), 55 deletions(-) diff --git a/databricks/sdk/core.py b/databricks/sdk/core.py index e028e4b15..888be0584 100644 --- a/databricks/sdk/core.py +++ b/databricks/sdk/core.py @@ -10,7 +10,7 @@ from .config import * # To preserve backwards compatibility (as these definitions were previously in this module) from .credentials_provider import * -from .errors import DatabricksError, get_api_error +from .errors import DatabricksError, _Parser, _ErrorCustomizer from .logger import RoundTrip from .oauth import retrieve_token from .retries import retried @@ -71,6 +71,8 @@ def __init__(self, cfg: Config = None): # Default to 60 seconds self._http_timeout_seconds = cfg.http_timeout_seconds if cfg.http_timeout_seconds else 60 + self._error_parser = _Parser(extra_error_customizers=[_AddDebugErrorCustomizer(cfg)]) + @property def account_id(self) -> str: return self._cfg.account_id @@ -219,27 +221,6 @@ def _is_retryable(err: BaseException) -> Optional[str]: return f'matched {substring}' return None - @classmethod - def _parse_retry_after(cls, response: requests.Response) -> Optional[int]: - retry_after = response.headers.get("Retry-After") - if retry_after is None: - # 429 requests should include a `Retry-After` header, but if it's missing, - # we default to 1 second. - return cls._RETRY_AFTER_DEFAULT - # If the request is throttled, try parse the `Retry-After` header and sleep - # for the specified number of seconds. Note that this header can contain either - # an integer or a RFC1123 datetime string. - # See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After - # - # For simplicity, we only try to parse it as an integer, as this is what Databricks - # platform returns. Otherwise, we fall back and don't sleep. - try: - return int(retry_after) - except ValueError: - logger.debug(f'Invalid Retry-After header received: {retry_after}. Defaulting to 1') - # defaulting to 1 sleep second to make self._is_retryable() simpler - return cls._RETRY_AFTER_DEFAULT - def _perform(self, method: str, url: str, @@ -261,15 +242,8 @@ def _perform(self, stream=raw, timeout=self._http_timeout_seconds) self._record_request_log(response, raw=raw or data is not None or files is not None) - error = get_api_error(response) + error = self._error_parser.get_api_error(response) if error is not None: - status_code = response.status_code - is_http_unauthorized_or_forbidden = status_code in (401, 403) - is_too_many_requests_or_unavailable = status_code in (429, 503) - if is_http_unauthorized_or_forbidden: - error.message = self._cfg.wrap_debug_info(error.message) - if is_too_many_requests_or_unavailable: - error.retry_after_secs = self._parse_retry_after(response) raise error from None return response @@ -279,6 +253,18 @@ def _record_request_log(self, response: requests.Response, raw: bool = False) -> logger.debug(RoundTrip(response, self._cfg.debug_headers, self._debug_truncate_bytes, raw).generate()) +class _AddDebugErrorCustomizer(_ErrorCustomizer): + def __init__(self, cfg: Config): + self._cfg = cfg + + def customize_error(self, response: requests.Response, kwargs: dict): + status_code = response.status_code + is_http_unauthorized_or_forbidden = status_code in (401, 403) + message = kwargs.get('message', 'request failed') + if is_http_unauthorized_or_forbidden: + kwargs['message'] = self._cfg.wrap_debug_info(message) + + class StreamingResponse(BinaryIO): _response: requests.Response _buffer: bytes diff --git a/databricks/sdk/errors/__init__.py b/databricks/sdk/errors/__init__.py index 578406803..d49608a99 100644 --- a/databricks/sdk/errors/__init__.py +++ b/databricks/sdk/errors/__init__.py @@ -1,6 +1,6 @@ from .base import DatabricksError, ErrorDetail from .mapper import _error_mapper -from .parser import get_api_error +from .parser import _Parser, _ErrorCustomizer from .platform import * from .private_link import PrivateLinkValidationError from .sdk import * diff --git a/databricks/sdk/errors/base.py b/databricks/sdk/errors/base.py index a39099ec5..973c3644e 100644 --- a/databricks/sdk/errors/base.py +++ b/databricks/sdk/errors/base.py @@ -85,7 +85,6 @@ def __init__(self, message = f"{scimType} {message}".strip(" ") error_code = f"SCIM_{status}" super().__init__(message if message else error) - self.message = message self.error_code = error_code self.retry_after_secs = retry_after_secs self.details = [ErrorDetail.from_dict(detail) for detail in details] if details else [] diff --git a/databricks/sdk/errors/parser.py b/databricks/sdk/errors/parser.py index 3d15f1673..77362f66a 100644 --- a/databricks/sdk/errors/parser.py +++ b/databricks/sdk/errors/parser.py @@ -1,8 +1,9 @@ import abc +import functools import json import logging import re -from typing import Optional +from typing import Optional, List import requests @@ -21,6 +22,13 @@ def parse_error(self, response: requests.Response, response_body: bytes) -> Opti """Parses an error from the Databricks REST API. If the error cannot be parsed, returns None.""" +class _ErrorCustomizer(abc.ABC): + """A customizer for errors from the Databricks REST API.""" + + @abc.abstractmethod + def customize_error(self, response: requests.Response, kwargs: dict): + """Customize the error constructor parameters.""" + class _EmptyParser(_ErrorParser): """A parser that handles empty responses.""" @@ -106,10 +114,43 @@ def parse_error(self, response: requests.Response, response_body: bytes) -> Opti return None +class _RetryAfterCustomizer(_ErrorCustomizer): + _RETRY_AFTER_DEFAULT = 1 + + @classmethod + def _parse_retry_after(cls, response: requests.Response) -> Optional[int]: + retry_after = response.headers.get("Retry-After") + if retry_after is None: + logging.debug(f'No Retry-After header received in response with status code 429 or 503. Defaulting to {cls._RETRY_AFTER_DEFAULT}') + # 429 requests should include a `Retry-After` header, but if it's missing, + # we default to 1 second. + return cls._RETRY_AFTER_DEFAULT + # If the request is throttled, try parse the `Retry-After` header and sleep + # for the specified number of seconds. Note that this header can contain either + # an integer or a RFC1123 datetime string. + # See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After + # + # For simplicity, we only try to parse it as an integer, as this is what Databricks + # platform returns. Otherwise, we fall back and don't sleep. + try: + return int(retry_after) + except ValueError: + logging.debug(f'Invalid Retry-After header received: {retry_after}. Defaulting to {cls._RETRY_AFTER_DEFAULT}') + # defaulting to 1 sleep second to make self._is_retryable() simpler + return cls._RETRY_AFTER_DEFAULT + + def customize_error(self, response: requests.Response, kwargs: dict): + status_code = response.status_code + is_too_many_requests_or_unavailable = status_code in (429, 503) + if is_too_many_requests_or_unavailable: + kwargs['retry_after_secs'] = self._parse_retry_after(response) + + # A list of ErrorParsers that are tried in order to parse an API error from a response body. Most errors should be # parsable by the _StandardErrorParser, but additional parsers can be added here for specific error formats. The order # of the parsers is not important, as the set of errors that can be parsed by each parser should be disjoint. _error_parsers = [_EmptyParser(), _StandardErrorParser(), _StringErrorParser(), _HtmlErrorParser(), ] +_error_customizers = [_RetryAfterCustomizer(), ] def _unknown_error(response: requests.Response) -> str: @@ -124,24 +165,33 @@ def _unknown_error(response: requests.Response) -> str: f'https://github.com/databricks/databricks-sdk-go/issues. Request log:```{request_log}```') -def get_api_error(response: requests.Response) -> Optional[DatabricksError]: - """ - Handles responses from the REST API and returns a DatabricksError if the response indicates an error. - :param response: The response from the REST API. - :return: A DatabricksError if the response indicates an error, otherwise None. - """ - if not response.ok: - content = response.content - for parser in _error_parsers: - try: - error_args = parser.parse_error(response, content) - if error_args: - return _error_mapper(response, error_args) - except Exception as e: - logging.debug(f'Error parsing response with {parser}, continuing', exc_info=e) - return _error_mapper(response, {'message': 'unable to parse response. ' + _unknown_error(response)}) - - # Private link failures happen via a redirect to the login page. From a requests-perspective, the request - # is successful, but the response is not what we expect. We need to handle this case separately. - if _is_private_link_redirect(response): - return _get_private_link_validation_error(response.url) +class _Parser: + def __init__(self, + extra_error_parsers: Optional[List[_ErrorParser]] = None, + extra_error_customizers: Optional[List[_ErrorCustomizer]] = None): + self._error_parsers = _error_parsers + extra_error_parsers + self._error_customizers = _error_customizers + extra_error_customizers + + def get_api_error(self, response: requests.Response) -> Optional[DatabricksError]: + """ + Handles responses from the REST API and returns a DatabricksError if the response indicates an error. + :param response: The response from the REST API. + :return: A DatabricksError if the response indicates an error, otherwise None. + """ + if not response.ok: + content = response.content + for parser in self._error_parsers: + try: + error_args = parser.parse_error(response, content) + if error_args: + for customizer in self._error_customizers: + customizer.customize_error(response, error_args) + return _error_mapper(response, error_args) + except Exception as e: + logging.debug(f'Error parsing response with {parser}, continuing', exc_info=e) + return _error_mapper(response, {'message': 'unable to parse response. ' + _unknown_error(response)}) + + # Private link failures happen via a redirect to the login page. From a requests-perspective, the request + # is successful, but the response is not what we expect. We need to handle this case separately. + if _is_private_link_redirect(response): + return _get_private_link_validation_error(response.url) diff --git a/tests/test_errors.py b/tests/test_errors.py index 2125eac5c..2e19ec897 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -124,4 +124,3 @@ def test_get_api_error(response, expected_error, expected_message): raise errors.get_api_error(response) assert isinstance(e.value, expected_error) assert str(e.value) == expected_message - assert e.value.message == expected_message From 0e56142a0fe7a3ad2e6b703858041d0dc29d2ebf Mon Sep 17 00:00:00 2001 From: Miles Yucht Date: Thu, 12 Sep 2024 13:37:07 +0200 Subject: [PATCH 3/9] fixes --- databricks/sdk/errors/__init__.py | 3 +- databricks/sdk/errors/customizer.py | 47 +++++++++++++++++++++++++++++ databricks/sdk/errors/parser.py | 41 +------------------------ 3 files changed, 50 insertions(+), 41 deletions(-) create mode 100644 databricks/sdk/errors/customizer.py diff --git a/databricks/sdk/errors/__init__.py b/databricks/sdk/errors/__init__.py index d49608a99..2d35a0f36 100644 --- a/databricks/sdk/errors/__init__.py +++ b/databricks/sdk/errors/__init__.py @@ -1,6 +1,7 @@ from .base import DatabricksError, ErrorDetail from .mapper import _error_mapper -from .parser import _Parser, _ErrorCustomizer +from .parser import _Parser +from .customizer import _ErrorCustomizer from .platform import * from .private_link import PrivateLinkValidationError from .sdk import * diff --git a/databricks/sdk/errors/customizer.py b/databricks/sdk/errors/customizer.py new file mode 100644 index 000000000..9199b5200 --- /dev/null +++ b/databricks/sdk/errors/customizer.py @@ -0,0 +1,47 @@ +import abc +import logging +from typing import Optional + +import requests + + +class _ErrorCustomizer(abc.ABC): + """A customizer for errors from the Databricks REST API.""" + + @abc.abstractmethod + def customize_error(self, response: requests.Response, kwargs: dict): + """Customize the error constructor parameters.""" + + +class _RetryAfterCustomizer(_ErrorCustomizer): + _RETRY_AFTER_DEFAULT = 1 + + @classmethod + def _parse_retry_after(cls, response: requests.Response) -> Optional[int]: + retry_after = response.headers.get("Retry-After") + if retry_after is None: + logging.debug(f'No Retry-After header received in response with status code 429 or 503. Defaulting to {cls._RETRY_AFTER_DEFAULT}') + # 429 requests should include a `Retry-After` header, but if it's missing, + # we default to 1 second. + return cls._RETRY_AFTER_DEFAULT + # If the request is throttled, try parse the `Retry-After` header and sleep + # for the specified number of seconds. Note that this header can contain either + # an integer or a RFC1123 datetime string. + # See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After + # + # For simplicity, we only try to parse it as an integer, as this is what Databricks + # platform returns. Otherwise, we fall back and don't sleep. + try: + return int(retry_after) + except ValueError: + logging.debug(f'Invalid Retry-After header received: {retry_after}. Defaulting to {cls._RETRY_AFTER_DEFAULT}') + # defaulting to 1 sleep second to make self._is_retryable() simpler + return cls._RETRY_AFTER_DEFAULT + + def customize_error(self, response: requests.Response, kwargs: dict): + status_code = response.status_code + is_too_many_requests_or_unavailable = status_code in (429, 503) + if is_too_many_requests_or_unavailable: + kwargs['retry_after_secs'] = self._parse_retry_after(response) + + diff --git a/databricks/sdk/errors/parser.py b/databricks/sdk/errors/parser.py index 77362f66a..42f022e37 100644 --- a/databricks/sdk/errors/parser.py +++ b/databricks/sdk/errors/parser.py @@ -1,5 +1,4 @@ import abc -import functools import json import logging import re @@ -9,6 +8,7 @@ from ..logger import RoundTrip from .base import DatabricksError +from .customizer import _ErrorCustomizer, _RetryAfterCustomizer from .mapper import _error_mapper from .private_link import (_get_private_link_validation_error, _is_private_link_redirect) @@ -22,13 +22,6 @@ def parse_error(self, response: requests.Response, response_body: bytes) -> Opti """Parses an error from the Databricks REST API. If the error cannot be parsed, returns None.""" -class _ErrorCustomizer(abc.ABC): - """A customizer for errors from the Databricks REST API.""" - - @abc.abstractmethod - def customize_error(self, response: requests.Response, kwargs: dict): - """Customize the error constructor parameters.""" - class _EmptyParser(_ErrorParser): """A parser that handles empty responses.""" @@ -114,38 +107,6 @@ def parse_error(self, response: requests.Response, response_body: bytes) -> Opti return None -class _RetryAfterCustomizer(_ErrorCustomizer): - _RETRY_AFTER_DEFAULT = 1 - - @classmethod - def _parse_retry_after(cls, response: requests.Response) -> Optional[int]: - retry_after = response.headers.get("Retry-After") - if retry_after is None: - logging.debug(f'No Retry-After header received in response with status code 429 or 503. Defaulting to {cls._RETRY_AFTER_DEFAULT}') - # 429 requests should include a `Retry-After` header, but if it's missing, - # we default to 1 second. - return cls._RETRY_AFTER_DEFAULT - # If the request is throttled, try parse the `Retry-After` header and sleep - # for the specified number of seconds. Note that this header can contain either - # an integer or a RFC1123 datetime string. - # See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After - # - # For simplicity, we only try to parse it as an integer, as this is what Databricks - # platform returns. Otherwise, we fall back and don't sleep. - try: - return int(retry_after) - except ValueError: - logging.debug(f'Invalid Retry-After header received: {retry_after}. Defaulting to {cls._RETRY_AFTER_DEFAULT}') - # defaulting to 1 sleep second to make self._is_retryable() simpler - return cls._RETRY_AFTER_DEFAULT - - def customize_error(self, response: requests.Response, kwargs: dict): - status_code = response.status_code - is_too_many_requests_or_unavailable = status_code in (429, 503) - if is_too_many_requests_or_unavailable: - kwargs['retry_after_secs'] = self._parse_retry_after(response) - - # A list of ErrorParsers that are tried in order to parse an API error from a response body. Most errors should be # parsable by the _StandardErrorParser, but additional parsers can be added here for specific error formats. The order # of the parsers is not important, as the set of errors that can be parsed by each parser should be disjoint. From 40eae63b45eadc39bf30c90e9296199a5e1a858e Mon Sep 17 00:00:00 2001 From: Miles Yucht Date: Thu, 12 Sep 2024 13:39:34 +0200 Subject: [PATCH 4/9] work --- databricks/sdk/errors/__init__.py | 1 - databricks/sdk/errors/deserializer.py | 101 +++++++++++++++++++++++++ databricks/sdk/errors/parser.py | 105 ++------------------------ 3 files changed, 106 insertions(+), 101 deletions(-) create mode 100644 databricks/sdk/errors/deserializer.py diff --git a/databricks/sdk/errors/__init__.py b/databricks/sdk/errors/__init__.py index 2d35a0f36..b5c13a016 100644 --- a/databricks/sdk/errors/__init__.py +++ b/databricks/sdk/errors/__init__.py @@ -1,5 +1,4 @@ from .base import DatabricksError, ErrorDetail -from .mapper import _error_mapper from .parser import _Parser from .customizer import _ErrorCustomizer from .platform import * diff --git a/databricks/sdk/errors/deserializer.py b/databricks/sdk/errors/deserializer.py new file mode 100644 index 000000000..9cf9fa75a --- /dev/null +++ b/databricks/sdk/errors/deserializer.py @@ -0,0 +1,101 @@ +import abc +import json +import logging +import re +from typing import Optional, List + +import requests + +class _ErrorDeserializer(abc.ABC): + """A parser for errors from the Databricks REST API.""" + + @abc.abstractmethod + def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]: + """Parses an error from the Databricks REST API. If the error cannot be parsed, returns None.""" + + +class _EmptyDeserializer(_ErrorDeserializer): + """A parser that handles empty responses.""" + + def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]: + if len(response_body) == 0: + return {'message': response.reason} + return None + + +class _StandardErrorDeserializer(_ErrorDeserializer): + """ + Parses errors from the Databricks REST API using the standard error format. + """ + + def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]: + try: + payload_str = response_body.decode('utf-8') + resp: dict = json.loads(payload_str) + except json.JSONDecodeError as e: + logging.debug('_StandardErrorParser: unable to deserialize response as json', exc_info=e) + return None + + error_args = { + 'message': resp.get('message', 'request failed'), + 'error_code': resp.get('error_code'), + 'details': resp.get('details'), + } + + # Handle API 1.2-style errors + if 'error' in resp: + error_args['message'] = resp['error'] + + # Handle SCIM Errors + detail = resp.get('detail') + status = resp.get('status') + scim_type = resp.get('scimType') + if detail: + # Handle SCIM error message details + # @see https://tools.ietf.org/html/rfc7644#section-3.7.3 + if detail == "null": + detail = "SCIM API Internal Error" + error_args['message'] = f"{scim_type} {detail}".strip(" ") + error_args['error_code'] = f"SCIM_{status}" + return error_args + + +class _StringErrorDeserializer(_ErrorDeserializer): + """ + Parses errors from the Databricks REST API in the format "ERROR_CODE: MESSAGE". + """ + + __STRING_ERROR_REGEX = re.compile(r'([A-Z_]+): (.*)') + + def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]: + payload_str = response_body.decode('utf-8') + match = self.__STRING_ERROR_REGEX.match(payload_str) + if not match: + logging.debug('_StringErrorParser: unable to parse response as string') + return None + error_code, message = match.groups() + return {'error_code': error_code, 'message': message, 'status': response.status_code, } + + +class _HtmlErrorDeserializer(_ErrorDeserializer): + """ + Parses errors from the Databricks REST API in HTML format. + """ + + __HTML_ERROR_REGEXES = [re.compile(r'
(.*)
'), re.compile(r'(.*)'), ] + + def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]: + payload_str = response_body.decode('utf-8') + for regex in self.__HTML_ERROR_REGEXES: + match = regex.search(payload_str) + if match: + message = match.group(1) if match.group(1) else response.reason + return { + 'status': response.status_code, + 'message': message, + 'error_code': response.reason.upper().replace(' ', '_') + } + logging.debug('_HtmlErrorParser: no
 tag found in error response')
+        return None
+
+
diff --git a/databricks/sdk/errors/parser.py b/databricks/sdk/errors/parser.py
index 42f022e37..29bc35cde 100644
--- a/databricks/sdk/errors/parser.py
+++ b/databricks/sdk/errors/parser.py
@@ -1,116 +1,21 @@
-import abc
-import json
+import requests
 import logging
-import re
 from typing import Optional, List
 
-import requests
-
 from ..logger import RoundTrip
 from .base import DatabricksError
 from .customizer import _ErrorCustomizer, _RetryAfterCustomizer
+from .deserializer import (_EmptyDeserializer, _ErrorDeserializer, _HtmlErrorDeserializer,
+                             _StandardErrorDeserializer, _StringErrorDeserializer)
 from .mapper import _error_mapper
 from .private_link import (_get_private_link_validation_error,
                            _is_private_link_redirect)
 
 
-class _ErrorParser(abc.ABC):
-    """A parser for errors from the Databricks REST API."""
-
-    @abc.abstractmethod
-    def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
-        """Parses an error from the Databricks REST API. If the error cannot be parsed, returns None."""
-
-
-class _EmptyParser(_ErrorParser):
-    """A parser that handles empty responses."""
-
-    def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
-        if len(response_body) == 0:
-            return {'message': response.reason}
-        return None
-
-
-class _StandardErrorParser(_ErrorParser):
-    """
-    Parses errors from the Databricks REST API using the standard error format.
-    """
-
-    def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
-        try:
-            payload_str = response_body.decode('utf-8')
-            resp: dict = json.loads(payload_str)
-        except json.JSONDecodeError as e:
-            logging.debug('_StandardErrorParser: unable to deserialize response as json', exc_info=e)
-            return None
-
-        error_args = {
-            'message': resp.get('message', 'request failed'),
-            'error_code': resp.get('error_code'),
-            'details': resp.get('details'),
-        }
-
-        # Handle API 1.2-style errors
-        if 'error' in resp:
-            error_args['message'] = resp['error']
-
-        # Handle SCIM Errors
-        detail = resp.get('detail')
-        status = resp.get('status')
-        scim_type = resp.get('scimType')
-        if detail:
-            # Handle SCIM error message details
-            # @see https://tools.ietf.org/html/rfc7644#section-3.7.3
-            if detail == "null":
-                detail = "SCIM API Internal Error"
-            error_args['message'] = f"{scim_type} {detail}".strip(" ")
-            error_args['error_code'] = f"SCIM_{status}"
-        return error_args
-
-
-class _StringErrorParser(_ErrorParser):
-    """
-    Parses errors from the Databricks REST API in the format "ERROR_CODE: MESSAGE".
-    """
-
-    __STRING_ERROR_REGEX = re.compile(r'([A-Z_]+): (.*)')
-
-    def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
-        payload_str = response_body.decode('utf-8')
-        match = self.__STRING_ERROR_REGEX.match(payload_str)
-        if not match:
-            logging.debug('_StringErrorParser: unable to parse response as string')
-            return None
-        error_code, message = match.groups()
-        return {'error_code': error_code, 'message': message, 'status': response.status_code, }
-
-
-class _HtmlErrorParser(_ErrorParser):
-    """
-    Parses errors from the Databricks REST API in HTML format.
-    """
-
-    __HTML_ERROR_REGEXES = [re.compile(r'
(.*)
'), re.compile(r'(.*)'), ] - - def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]: - payload_str = response_body.decode('utf-8') - for regex in self.__HTML_ERROR_REGEXES: - match = regex.search(payload_str) - if match: - message = match.group(1) if match.group(1) else response.reason - return { - 'status': response.status_code, - 'message': message, - 'error_code': response.reason.upper().replace(' ', '_') - } - logging.debug('_HtmlErrorParser: no
 tag found in error response')
-        return None
-
-
 # A list of ErrorParsers that are tried in order to parse an API error from a response body. Most errors should be
 # parsable by the _StandardErrorParser, but additional parsers can be added here for specific error formats. The order
 # of the parsers is not important, as the set of errors that can be parsed by each parser should be disjoint.
-_error_parsers = [_EmptyParser(), _StandardErrorParser(), _StringErrorParser(), _HtmlErrorParser(), ]
+_error_parsers = [_EmptyDeserializer(), _StandardErrorDeserializer(), _StringErrorDeserializer(), _HtmlErrorDeserializer(), ]
 _error_customizers = [_RetryAfterCustomizer(), ]
 
 
@@ -128,7 +33,7 @@ def _unknown_error(response: requests.Response) -> str:
 
 class _Parser:
     def __init__(self,
-                 extra_error_parsers: Optional[List[_ErrorParser]] = None,
+                 extra_error_parsers: Optional[List[_ErrorDeserializer]] = None,
                  extra_error_customizers: Optional[List[_ErrorCustomizer]] = None):
         self._error_parsers = _error_parsers + extra_error_parsers
         self._error_customizers = _error_customizers + extra_error_customizers

From b5e2fc3a2ff04efb6a3897111610d4c1566fed35 Mon Sep 17 00:00:00 2001
From: Miles Yucht 
Date: Thu, 12 Sep 2024 15:18:19 +0200
Subject: [PATCH 5/9] fmt

---
 databricks/sdk/core.py                |  3 ++-
 databricks/sdk/errors/__init__.py     |  2 +-
 databricks/sdk/errors/customizer.py   | 10 ++++++----
 databricks/sdk/errors/deserializer.py |  5 ++---
 databricks/sdk/errors/parser.py       | 28 ++++++++++++++++++---------
 tests/test_errors.py                  |  3 ++-
 6 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/databricks/sdk/core.py b/databricks/sdk/core.py
index 888be0584..bb4709df3 100644
--- a/databricks/sdk/core.py
+++ b/databricks/sdk/core.py
@@ -10,7 +10,7 @@
 from .config import *
 # To preserve backwards compatibility (as these definitions were previously in this module)
 from .credentials_provider import *
-from .errors import DatabricksError, _Parser, _ErrorCustomizer
+from .errors import DatabricksError, _ErrorCustomizer, _Parser
 from .logger import RoundTrip
 from .oauth import retrieve_token
 from .retries import retried
@@ -254,6 +254,7 @@ def _record_request_log(self, response: requests.Response, raw: bool = False) ->
 
 
 class _AddDebugErrorCustomizer(_ErrorCustomizer):
+
     def __init__(self, cfg: Config):
         self._cfg = cfg
 
diff --git a/databricks/sdk/errors/__init__.py b/databricks/sdk/errors/__init__.py
index b5c13a016..8ad5ac708 100644
--- a/databricks/sdk/errors/__init__.py
+++ b/databricks/sdk/errors/__init__.py
@@ -1,6 +1,6 @@
 from .base import DatabricksError, ErrorDetail
-from .parser import _Parser
 from .customizer import _ErrorCustomizer
+from .parser import _Parser
 from .platform import *
 from .private_link import PrivateLinkValidationError
 from .sdk import *
diff --git a/databricks/sdk/errors/customizer.py b/databricks/sdk/errors/customizer.py
index 9199b5200..addbfc58c 100644
--- a/databricks/sdk/errors/customizer.py
+++ b/databricks/sdk/errors/customizer.py
@@ -20,7 +20,9 @@ class _RetryAfterCustomizer(_ErrorCustomizer):
     def _parse_retry_after(cls, response: requests.Response) -> Optional[int]:
         retry_after = response.headers.get("Retry-After")
         if retry_after is None:
-            logging.debug(f'No Retry-After header received in response with status code 429 or 503. Defaulting to {cls._RETRY_AFTER_DEFAULT}')
+            logging.debug(
+                f'No Retry-After header received in response with status code 429 or 503. Defaulting to {cls._RETRY_AFTER_DEFAULT}'
+            )
             # 429 requests should include a `Retry-After` header, but if it's missing,
             # we default to 1 second.
             return cls._RETRY_AFTER_DEFAULT
@@ -34,7 +36,9 @@ def _parse_retry_after(cls, response: requests.Response) -> Optional[int]:
         try:
             return int(retry_after)
         except ValueError:
-            logging.debug(f'Invalid Retry-After header received: {retry_after}. Defaulting to {cls._RETRY_AFTER_DEFAULT}')
+            logging.debug(
+                f'Invalid Retry-After header received: {retry_after}. Defaulting to {cls._RETRY_AFTER_DEFAULT}'
+            )
             # defaulting to 1 sleep second to make self._is_retryable() simpler
             return cls._RETRY_AFTER_DEFAULT
 
@@ -43,5 +47,3 @@ def customize_error(self, response: requests.Response, kwargs: dict):
         is_too_many_requests_or_unavailable = status_code in (429, 503)
         if is_too_many_requests_or_unavailable:
             kwargs['retry_after_secs'] = self._parse_retry_after(response)
-
-
diff --git a/databricks/sdk/errors/deserializer.py b/databricks/sdk/errors/deserializer.py
index 9cf9fa75a..53a6edfdb 100644
--- a/databricks/sdk/errors/deserializer.py
+++ b/databricks/sdk/errors/deserializer.py
@@ -2,10 +2,11 @@
 import json
 import logging
 import re
-from typing import Optional, List
+from typing import Optional
 
 import requests
 
+
 class _ErrorDeserializer(abc.ABC):
     """A parser for errors from the Databricks REST API."""
 
@@ -97,5 +98,3 @@ def parse_error(self, response: requests.Response, response_body: bytes) -> Opti
                 }
         logging.debug('_HtmlErrorParser: no 
 tag found in error response')
         return None
-
-
diff --git a/databricks/sdk/errors/parser.py b/databricks/sdk/errors/parser.py
index 29bc35cde..8f9bdb553 100644
--- a/databricks/sdk/errors/parser.py
+++ b/databricks/sdk/errors/parser.py
@@ -1,21 +1,27 @@
-import requests
 import logging
-from typing import Optional, List
+from typing import List, Optional
+
+import requests
 
 from ..logger import RoundTrip
 from .base import DatabricksError
 from .customizer import _ErrorCustomizer, _RetryAfterCustomizer
-from .deserializer import (_EmptyDeserializer, _ErrorDeserializer, _HtmlErrorDeserializer,
-                             _StandardErrorDeserializer, _StringErrorDeserializer)
+from .deserializer import (_EmptyDeserializer, _ErrorDeserializer,
+                           _HtmlErrorDeserializer, _StandardErrorDeserializer,
+                           _StringErrorDeserializer)
 from .mapper import _error_mapper
 from .private_link import (_get_private_link_validation_error,
                            _is_private_link_redirect)
 
-
 # A list of ErrorParsers that are tried in order to parse an API error from a response body. Most errors should be
 # parsable by the _StandardErrorParser, but additional parsers can be added here for specific error formats. The order
 # of the parsers is not important, as the set of errors that can be parsed by each parser should be disjoint.
-_error_parsers = [_EmptyDeserializer(), _StandardErrorDeserializer(), _StringErrorDeserializer(), _HtmlErrorDeserializer(), ]
+_error_parsers = [
+    _EmptyDeserializer(),
+    _StandardErrorDeserializer(),
+    _StringErrorDeserializer(),
+    _HtmlErrorDeserializer(),
+]
 _error_customizers = [_RetryAfterCustomizer(), ]
 
 
@@ -32,11 +38,14 @@ def _unknown_error(response: requests.Response) -> str:
 
 
 class _Parser:
+
     def __init__(self,
                  extra_error_parsers: Optional[List[_ErrorDeserializer]] = None,
                  extra_error_customizers: Optional[List[_ErrorCustomizer]] = None):
-        self._error_parsers = _error_parsers + extra_error_parsers
-        self._error_customizers = _error_customizers + extra_error_customizers
+        self._error_parsers = _error_parsers + (extra_error_parsers
+                                                if extra_error_parsers is not None else [])
+        self._error_customizers = _error_customizers + (extra_error_customizers
+                                                        if extra_error_customizers is not None else [])
 
     def get_api_error(self, response: requests.Response) -> Optional[DatabricksError]:
         """
@@ -55,7 +64,8 @@ def get_api_error(self, response: requests.Response) -> Optional[DatabricksError
                         return _error_mapper(response, error_args)
                 except Exception as e:
                     logging.debug(f'Error parsing response with {parser}, continuing', exc_info=e)
-            return _error_mapper(response, {'message': 'unable to parse response. ' + _unknown_error(response)})
+            return _error_mapper(response,
+                                 {'message': 'unable to parse response. ' + _unknown_error(response)})
 
         # Private link failures happen via a redirect to the login page. From a requests-perspective, the request
         # is successful, but the response is not what we expect. We need to handle this case separately.
diff --git a/tests/test_errors.py b/tests/test_errors.py
index 2e19ec897..12b9c0f92 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -120,7 +120,8 @@ def make_private_link_response() -> requests.Response:
              })), errors.NotFound, 'None Group with id 1234 is not found'
      ]])
 def test_get_api_error(response, expected_error, expected_message):
+    parser = errors._Parser()
     with pytest.raises(errors.DatabricksError) as e:
-        raise errors.get_api_error(response)
+        raise parser.get_api_error(response)
     assert isinstance(e.value, expected_error)
     assert str(e.value) == expected_message

From 87bca78b64e397e9de313604aaccd8f04d56dc78 Mon Sep 17 00:00:00 2001
From: Miles Yucht 
Date: Thu, 12 Sep 2024 15:46:40 +0200
Subject: [PATCH 6/9] tests

---
 databricks/sdk/core.py              |  2 +
 databricks/sdk/errors/customizer.py |  2 +
 databricks/sdk/errors/parser.py     |  6 +++
 tests/test_core.py                  | 79 +++++++++++++++++++++++------
 4 files changed, 74 insertions(+), 15 deletions(-)

diff --git a/databricks/sdk/core.py b/databricks/sdk/core.py
index bb4709df3..978140f28 100644
--- a/databricks/sdk/core.py
+++ b/databricks/sdk/core.py
@@ -254,6 +254,8 @@ def _record_request_log(self, response: requests.Response, raw: bool = False) ->
 
 
 class _AddDebugErrorCustomizer(_ErrorCustomizer):
+    """An error customizer that adds debug information about the configuration to unauthenticated and
+    unauthorized errors."""
 
     def __init__(self, cfg: Config):
         self._cfg = cfg
diff --git a/databricks/sdk/errors/customizer.py b/databricks/sdk/errors/customizer.py
index addbfc58c..64f0d7f2a 100644
--- a/databricks/sdk/errors/customizer.py
+++ b/databricks/sdk/errors/customizer.py
@@ -14,6 +14,8 @@ def customize_error(self, response: requests.Response, kwargs: dict):
 
 
 class _RetryAfterCustomizer(_ErrorCustomizer):
+    """An error customizer that sets the retry_after_secs parameter based on the Retry-After header."""
+
     _RETRY_AFTER_DEFAULT = 1
 
     @classmethod
diff --git a/databricks/sdk/errors/parser.py b/databricks/sdk/errors/parser.py
index 8f9bdb553..80aad2520 100644
--- a/databricks/sdk/errors/parser.py
+++ b/databricks/sdk/errors/parser.py
@@ -38,6 +38,12 @@ def _unknown_error(response: requests.Response) -> str:
 
 
 class _Parser:
+    """
+    A parser for errors from the Databricks REST API. It attempts to deserialize an error using a sequence of
+    deserializers, and then customizes the deserialized error using a sequence of customizers. If the error cannot be
+    deserialized, it returns a generic error with debugging information and instructions to report the issue to the SDK
+    issue tracker.
+    """
 
     def __init__(self,
                  extra_error_parsers: Optional[List[_ErrorDeserializer]] = None,
diff --git a/tests/test_core.py b/tests/test_core.py
index cc7926a72..7dbc66cdc 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -13,7 +13,7 @@
 import pytest
 import requests
 
-from databricks.sdk import WorkspaceClient
+from databricks.sdk import WorkspaceClient, errors
 from databricks.sdk.core import (ApiClient, Config, DatabricksError,
                                  StreamingResponse)
 from databricks.sdk.credentials_provider import (CliTokenSource,
@@ -359,8 +359,8 @@ def test_deletes(config, requests_mock):
     assert res is None
 
 
-def test_error(config, requests_mock):
-    errorJson = {
+@pytest.mark.parametrize('status_code,headers,body,expected_error', [
+    (400, {}, {
         "message":
         "errorMessage",
         "details": [{
@@ -378,20 +378,69 @@ def test_error(config, requests_mock):
                 "etag": "wrong etag"
             }
         }],
-    }
-
+    },
+     errors.BadRequest('errorMessage',
+                       details=[{
+                           'type': DatabricksError._error_info_type,
+                           'reason': 'error reason',
+                           'domain': 'error domain',
+                           'metadata': {
+                               'etag': 'error etag'
+                           },
+                       }])),
+    (401, {}, {
+        'error_code': 'UNAUTHORIZED',
+        'message': 'errorMessage',
+    },
+     errors.Unauthenticated('errorMessage. Config: host=http://localhost, auth_type=noop',
+                            error_code='UNAUTHORIZED')),
+    (403, {}, {
+        'error_code': 'FORBIDDEN',
+        'message': 'errorMessage',
+    },
+     errors.PermissionDenied('errorMessage. Config: host=http://localhost, auth_type=noop',
+                             error_code='FORBIDDEN')),
+    (429, {}, {
+        'error_code': 'TOO_MANY_REQUESTS',
+        'message': 'errorMessage',
+    }, errors.TooManyRequests('errorMessage', error_code='TOO_MANY_REQUESTS', retry_after_secs=1)),
+    (429, {
+        'Retry-After': '100'
+    }, {
+        'error_code': 'TOO_MANY_REQUESTS',
+        'message': 'errorMessage',
+    }, errors.TooManyRequests('errorMessage', error_code='TOO_MANY_REQUESTS', retry_after_secs=100)),
+    (503, {}, {
+        'error_code': 'TEMPORARILY_UNAVAILABLE',
+        'message': 'errorMessage',
+    }, errors.TemporarilyUnavailable('errorMessage', error_code='TEMPORARILY_UNAVAILABLE',
+                                     retry_after_secs=1)),
+    (503, {
+        'Retry-After': '100'
+    }, {
+        'error_code': 'TEMPORARILY_UNAVAILABLE',
+        'message': 'errorMessage',
+    },
+     errors.TemporarilyUnavailable('errorMessage', error_code='TEMPORARILY_UNAVAILABLE',
+                                   retry_after_secs=100)),
+])
+def test_error(config, requests_mock, status_code, headers, body, expected_error):
     client = ApiClient(config)
-    requests_mock.get("/test", json=errorJson, status_code=400, )
+    requests_mock.get("/test", json=body, status_code=status_code, headers=headers)
     with pytest.raises(DatabricksError) as raised:
-        client.do("GET", "/test", headers={"test": "test"})
-
-    error_infos = raised.value.get_error_info()
-    assert len(error_infos) == 1
-    error_info = error_infos[0]
-    assert error_info.reason == "error reason"
-    assert error_info.domain == "error domain"
-    assert error_info.metadata["etag"] == "error etag"
-    assert error_info.type == DatabricksError._error_info_type
+        client._perform("GET", "http://localhost/test", headers={"test": "test"})
+    actual = raised.value
+    assert isinstance(actual, type(expected_error))
+    assert str(actual) == str(expected_error)
+    assert actual.error_code == expected_error.error_code
+    assert actual.retry_after_secs == expected_error.retry_after_secs
+    expected_error_infos, actual_error_infos = expected_error.get_error_info(), actual.get_error_info()
+    assert len(expected_error_infos) == len(actual_error_infos)
+    for expected, actual in zip(expected_error_infos, actual_error_infos):
+        assert expected.type == actual.type
+        assert expected.reason == actual.reason
+        assert expected.domain == actual.domain
+        assert expected.metadata == actual.metadata
 
 
 def test_error_with_scimType():

From fabfb7484e344fa22870ea942be652c5f5929e3a Mon Sep 17 00:00:00 2001
From: Miles Yucht 
Date: Thu, 12 Sep 2024 15:50:26 +0200
Subject: [PATCH 7/9] improve test coverage for scim errors

---
 tests/test_core.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 7dbc66cdc..d54563d4e 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -423,6 +423,11 @@ def test_deletes(config, requests_mock):
     },
      errors.TemporarilyUnavailable('errorMessage', error_code='TEMPORARILY_UNAVAILABLE',
                                    retry_after_secs=100)),
+    (404, {}, {
+        'scimType': 'scim type',
+        'detail': 'detail',
+        'status': 'status',
+    }, errors.NotFound('scim type detail', error_code='SCIM_status')),
 ])
 def test_error(config, requests_mock, status_code, headers, body, expected_error):
     client = ApiClient(config)
@@ -443,12 +448,6 @@ def test_error(config, requests_mock, status_code, headers, body, expected_error
         assert expected.metadata == actual.metadata
 
 
-def test_error_with_scimType():
-    args = {"detail": "detail", "scimType": "scim type"}
-    error = DatabricksError(**args)
-    assert str(error) == f"scim type detail"
-
-
 @contextlib.contextmanager
 def http_fixture_server(handler: typing.Callable[[BaseHTTPRequestHandler], None]):
     from http.server import HTTPServer

From bd432ef313988994b487be8dfd370a204baaf8ac Mon Sep 17 00:00:00 2001
From: Miles Yucht 
Date: Fri, 13 Sep 2024 14:31:37 +0200
Subject: [PATCH 8/9] address comments

---
 databricks/sdk/core.py                     |  6 ++---
 databricks/sdk/errors/customizer.py        | 19 +++++++------
 databricks/sdk/errors/deserializer.py      | 18 ++++++++-----
 databricks/sdk/errors/parser.py            | 22 ++++++++-------
 databricks/sdk/logger/round_trip_logger.py |  3 ++-
 tests/test_errors.py                       | 31 +++++++++++++++-------
 6 files changed, 59 insertions(+), 40 deletions(-)

diff --git a/databricks/sdk/core.py b/databricks/sdk/core.py
index 978140f28..77e8c9aac 100644
--- a/databricks/sdk/core.py
+++ b/databricks/sdk/core.py
@@ -261,10 +261,8 @@ def __init__(self, cfg: Config):
         self._cfg = cfg
 
     def customize_error(self, response: requests.Response, kwargs: dict):
-        status_code = response.status_code
-        is_http_unauthorized_or_forbidden = status_code in (401, 403)
-        message = kwargs.get('message', 'request failed')
-        if is_http_unauthorized_or_forbidden:
+        if response.status_code in (401, 403):
+            message = kwargs.get('message', 'request failed')
             kwargs['message'] = self._cfg.wrap_debug_info(message)
 
 
diff --git a/databricks/sdk/errors/customizer.py b/databricks/sdk/errors/customizer.py
index 64f0d7f2a..3c9cccbb5 100644
--- a/databricks/sdk/errors/customizer.py
+++ b/databricks/sdk/errors/customizer.py
@@ -1,6 +1,5 @@
 import abc
 import logging
-from typing import Optional
 
 import requests
 
@@ -16,18 +15,20 @@ def customize_error(self, response: requests.Response, kwargs: dict):
 class _RetryAfterCustomizer(_ErrorCustomizer):
     """An error customizer that sets the retry_after_secs parameter based on the Retry-After header."""
 
-    _RETRY_AFTER_DEFAULT = 1
+    _DEFALT_RETRY_AFTER_SECONDS = 1
+    """The default number of seconds to wait before retrying a request if the Retry-After header is missing or is not
+    a valid integer."""
 
     @classmethod
-    def _parse_retry_after(cls, response: requests.Response) -> Optional[int]:
+    def _parse_retry_after(cls, response: requests.Response) -> int:
         retry_after = response.headers.get("Retry-After")
         if retry_after is None:
             logging.debug(
-                f'No Retry-After header received in response with status code 429 or 503. Defaulting to {cls._RETRY_AFTER_DEFAULT}'
+                f'No Retry-After header received in response with status code 429 or 503. Defaulting to {cls._DEFALT_RETRY_AFTER_SECONDS}'
             )
             # 429 requests should include a `Retry-After` header, but if it's missing,
             # we default to 1 second.
-            return cls._RETRY_AFTER_DEFAULT
+            return cls._DEFALT_RETRY_AFTER_SECONDS
         # If the request is throttled, try parse the `Retry-After` header and sleep
         # for the specified number of seconds. Note that this header can contain either
         # an integer or a RFC1123 datetime string.
@@ -39,13 +40,11 @@ def _parse_retry_after(cls, response: requests.Response) -> Optional[int]:
             return int(retry_after)
         except ValueError:
             logging.debug(
-                f'Invalid Retry-After header received: {retry_after}. Defaulting to {cls._RETRY_AFTER_DEFAULT}'
+                f'Invalid Retry-After header received: {retry_after}. Defaulting to {cls._DEFALT_RETRY_AFTER_SECONDS}'
             )
             # defaulting to 1 sleep second to make self._is_retryable() simpler
-            return cls._RETRY_AFTER_DEFAULT
+            return cls._DEFALT_RETRY_AFTER_SECONDS
 
     def customize_error(self, response: requests.Response, kwargs: dict):
-        status_code = response.status_code
-        is_too_many_requests_or_unavailable = status_code in (429, 503)
-        if is_too_many_requests_or_unavailable:
+        if response.status_code in (429, 503):
             kwargs['retry_after_secs'] = self._parse_retry_after(response)
diff --git a/databricks/sdk/errors/deserializer.py b/databricks/sdk/errors/deserializer.py
index 53a6edfdb..4da01ee68 100644
--- a/databricks/sdk/errors/deserializer.py
+++ b/databricks/sdk/errors/deserializer.py
@@ -11,14 +11,14 @@ class _ErrorDeserializer(abc.ABC):
     """A parser for errors from the Databricks REST API."""
 
     @abc.abstractmethod
-    def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
+    def deserialize_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
         """Parses an error from the Databricks REST API. If the error cannot be parsed, returns None."""
 
 
 class _EmptyDeserializer(_ErrorDeserializer):
     """A parser that handles empty responses."""
 
-    def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
+    def deserialize_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
         if len(response_body) == 0:
             return {'message': response.reason}
         return None
@@ -29,13 +29,19 @@ class _StandardErrorDeserializer(_ErrorDeserializer):
     Parses errors from the Databricks REST API using the standard error format.
     """
 
-    def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
+    def deserialize_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
         try:
             payload_str = response_body.decode('utf-8')
-            resp: dict = json.loads(payload_str)
+            resp = json.loads(payload_str)
+        except UnicodeDecodeError as e:
+            logging.debug('_StandardErrorParser: unable to decode response using utf-8', exc_info=e)
+            return None
         except json.JSONDecodeError as e:
             logging.debug('_StandardErrorParser: unable to deserialize response as json', exc_info=e)
             return None
+        if not isinstance(resp, dict):
+            logging.debug('_StandardErrorParser: response is valid JSON but not a dictionary')
+            return None
 
         error_args = {
             'message': resp.get('message', 'request failed'),
@@ -68,7 +74,7 @@ class _StringErrorDeserializer(_ErrorDeserializer):
 
     __STRING_ERROR_REGEX = re.compile(r'([A-Z_]+): (.*)')
 
-    def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
+    def deserialize_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
         payload_str = response_body.decode('utf-8')
         match = self.__STRING_ERROR_REGEX.match(payload_str)
         if not match:
@@ -85,7 +91,7 @@ class _HtmlErrorDeserializer(_ErrorDeserializer):
 
     __HTML_ERROR_REGEXES = [re.compile(r'
(.*)
'), re.compile(r'(.*)'), ] - def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]: + def deserialize_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]: payload_str = response_body.decode('utf-8') for regex in self.__HTML_ERROR_REGEXES: match = regex.search(payload_str) diff --git a/databricks/sdk/errors/parser.py b/databricks/sdk/errors/parser.py index 80aad2520..3408964fe 100644 --- a/databricks/sdk/errors/parser.py +++ b/databricks/sdk/errors/parser.py @@ -13,15 +13,19 @@ from .private_link import (_get_private_link_validation_error, _is_private_link_redirect) -# A list of ErrorParsers that are tried in order to parse an API error from a response body. Most errors should be -# parsable by the _StandardErrorParser, but additional parsers can be added here for specific error formats. The order -# of the parsers is not important, as the set of errors that can be parsed by each parser should be disjoint. -_error_parsers = [ +# A list of _ErrorDeserializers that are tried in order to parse an API error from a response body. Most errors should +# be parsable by the _StandardErrorDeserializer, but additional parsers can be added here for specific error formats. +# The order of the parsers is not important, as the set of errors that can be parsed by each parser should be disjoint. +_error_deserializers = [ _EmptyDeserializer(), _StandardErrorDeserializer(), _StringErrorDeserializer(), _HtmlErrorDeserializer(), ] + +# A list of _ErrorCustomizers that are applied to the error arguments after they are parsed. Customizers can modify the +# error arguments in any way, including adding or removing fields. Customizers are applied in order, so later +# customizers can override the changes made by earlier customizers. _error_customizers = [_RetryAfterCustomizer(), ] @@ -46,10 +50,10 @@ class _Parser: """ def __init__(self, - extra_error_parsers: Optional[List[_ErrorDeserializer]] = None, - extra_error_customizers: Optional[List[_ErrorCustomizer]] = None): - self._error_parsers = _error_parsers + (extra_error_parsers - if extra_error_parsers is not None else []) + extra_error_parsers: List[_ErrorDeserializer] = [], + extra_error_customizers: List[_ErrorCustomizer] = []): + self._error_parsers = _error_deserializers + (extra_error_parsers + if extra_error_parsers is not None else []) self._error_customizers = _error_customizers + (extra_error_customizers if extra_error_customizers is not None else []) @@ -63,7 +67,7 @@ def get_api_error(self, response: requests.Response) -> Optional[DatabricksError content = response.content for parser in self._error_parsers: try: - error_args = parser.parse_error(response, content) + error_args = parser.deserialize_error(response, content) if error_args: for customizer in self._error_customizers: customizer.customize_error(response, error_args) diff --git a/databricks/sdk/logger/round_trip_logger.py b/databricks/sdk/logger/round_trip_logger.py index f1d177aaa..1c0a47f08 100644 --- a/databricks/sdk/logger/round_trip_logger.py +++ b/databricks/sdk/logger/round_trip_logger.py @@ -48,7 +48,8 @@ def generate(self) -> str: # Raw streams with `Transfer-Encoding: chunked` do not have `Content-Type` header sb.append("< [raw stream]") elif self._response.content: - sb.append(self._redacted_dump("< ", self._response.content.decode('utf-8'))) + decoded = self._response.content.decode('utf-8', errors='replace') + sb.append(self._redacted_dump("< ", decoded)) return '\n'.join(sb) @staticmethod diff --git a/tests/test_errors.py b/tests/test_errors.py index 12b9c0f92..881f016f3 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -12,13 +12,20 @@ def fake_response(method: str, status_code: int, response_body: str, path: Optional[str] = None) -> requests.Response: + return fake_raw_response(method, status_code, response_body.encode('utf-8'), path) + + +def fake_raw_response(method: str, + status_code: int, + response_body: bytes, + path: Optional[str] = None) -> requests.Response: resp = requests.Response() resp.status_code = status_code resp.reason = http.client.responses.get(status_code, '') if path is None: path = '/api/2.0/service' resp.request = requests.Request(method, f"https://databricks.com{path}").prepare() - resp._content = response_body.encode('utf-8') + resp._content = response_body return resp @@ -110,15 +117,19 @@ def make_private_link_response() -> requests.Response: 'https://github.com/databricks/databricks-sdk-go/issues. Request log:```GET /api/2.0/service\n' '< 400 Bad Request\n' '< this is not a real response```')), - [ - fake_response( - 'GET', 404, - json.dumps({ - 'detail': 'Group with id 1234 is not found', - 'status': '404', - 'schemas': ['urn:ietf:params:scim:api:messages:2.0:Error'] - })), errors.NotFound, 'None Group with id 1234 is not found' - ]]) + (fake_response( + 'GET', 404, + json.dumps({ + 'detail': 'Group with id 1234 is not found', + 'status': '404', + 'schemas': ['urn:ietf:params:scim:api:messages:2.0:Error'] + })), errors.NotFound, 'None Group with id 1234 is not found'), + (fake_response('GET', 404, json.dumps("This is JSON but not a dictionary")), errors.NotFound, + 'unable to parse response. This is likely a bug in the Databricks SDK for Python or the underlying API. Please report this issue with the following debugging information to the SDK issue tracker at https://github.com/databricks/databricks-sdk-go/issues. Request log:```GET /api/2.0/service\n< 404 Not Found\n< "This is JSON but not a dictionary"```' + ), + (fake_raw_response('GET', 404, b'\x80'), errors.NotFound, + 'unable to parse response. This is likely a bug in the Databricks SDK for Python or the underlying API. Please report this issue with the following debugging information to the SDK issue tracker at https://github.com/databricks/databricks-sdk-go/issues. Request log:```GET /api/2.0/service\n< 404 Not Found\n< �```' + )]) def test_get_api_error(response, expected_error, expected_message): parser = errors._Parser() with pytest.raises(errors.DatabricksError) as e: From 2ecfa797b5644cecb775c717a1b9e4b0b545e852 Mon Sep 17 00:00:00 2001 From: Miles Yucht Date: Fri, 13 Sep 2024 15:14:50 +0200 Subject: [PATCH 9/9] fix typo --- databricks/sdk/errors/customizer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/databricks/sdk/errors/customizer.py b/databricks/sdk/errors/customizer.py index 3c9cccbb5..5c895becc 100644 --- a/databricks/sdk/errors/customizer.py +++ b/databricks/sdk/errors/customizer.py @@ -15,7 +15,7 @@ def customize_error(self, response: requests.Response, kwargs: dict): class _RetryAfterCustomizer(_ErrorCustomizer): """An error customizer that sets the retry_after_secs parameter based on the Retry-After header.""" - _DEFALT_RETRY_AFTER_SECONDS = 1 + _DEFAULT_RETRY_AFTER_SECONDS = 1 """The default number of seconds to wait before retrying a request if the Retry-After header is missing or is not a valid integer.""" @@ -24,11 +24,11 @@ def _parse_retry_after(cls, response: requests.Response) -> int: retry_after = response.headers.get("Retry-After") if retry_after is None: logging.debug( - f'No Retry-After header received in response with status code 429 or 503. Defaulting to {cls._DEFALT_RETRY_AFTER_SECONDS}' + f'No Retry-After header received in response with status code 429 or 503. Defaulting to {cls._DEFAULT_RETRY_AFTER_SECONDS}' ) # 429 requests should include a `Retry-After` header, but if it's missing, # we default to 1 second. - return cls._DEFALT_RETRY_AFTER_SECONDS + return cls._DEFAULT_RETRY_AFTER_SECONDS # If the request is throttled, try parse the `Retry-After` header and sleep # for the specified number of seconds. Note that this header can contain either # an integer or a RFC1123 datetime string. @@ -40,10 +40,10 @@ def _parse_retry_after(cls, response: requests.Response) -> int: return int(retry_after) except ValueError: logging.debug( - f'Invalid Retry-After header received: {retry_after}. Defaulting to {cls._DEFALT_RETRY_AFTER_SECONDS}' + f'Invalid Retry-After header received: {retry_after}. Defaulting to {cls._DEFAULT_RETRY_AFTER_SECONDS}' ) # defaulting to 1 sleep second to make self._is_retryable() simpler - return cls._DEFALT_RETRY_AFTER_SECONDS + return cls._DEFAULT_RETRY_AFTER_SECONDS def customize_error(self, response: requests.Response, kwargs: dict): if response.status_code in (429, 503):