From 5a3f83e313e7e696af420ffd184ed570c5a0ad4b Mon Sep 17 00:00:00 2001 From: Jesse Date: Wed, 7 Jun 2023 14:02:38 -0500 Subject: [PATCH 01/40] Use urllib3 for thrift transport + reuse http connections (#131) Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 2 + src/databricks/sql/auth/thrift_http_client.py | 153 +++++++++++++++++- src/databricks/sql/thrift_backend.py | 4 + tests/e2e/driver_tests.py | 1 + 4 files changed, 152 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d424c7b3..74d278b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## 2.5.x (Unreleased) +- Add support for HTTP 1.1 connections (connection pools) + ## 2.5.2 (2023-05-08) - Fix: SQLAlchemy adapter could not reflect TIMESTAMP or DATETIME columns diff --git a/src/databricks/sql/auth/thrift_http_client.py b/src/databricks/sql/auth/thrift_http_client.py index a924ea63..66a9d196 100644 --- a/src/databricks/sql/auth/thrift_http_client.py +++ b/src/databricks/sql/auth/thrift_http_client.py @@ -1,13 +1,20 @@ +import base64 import logging -from typing import Dict - +import urllib.parse +from typing import Dict, Union +import six import thrift -import urllib.parse, six, base64 - logger = logging.getLogger(__name__) +import ssl +import warnings +from http.client import HTTPResponse +from io import BytesIO + +from urllib3 import HTTPConnectionPool, HTTPSConnectionPool, ProxyManager + class THttpClient(thrift.transport.THttpClient.THttpClient): def __init__( @@ -20,22 +27,152 @@ def __init__( cert_file=None, key_file=None, ssl_context=None, + max_connections: int = 1, ): - super().__init__( - uri_or_host, port, path, cafile, cert_file, key_file, ssl_context - ) + if port is not None: + warnings.warn( + "Please use the THttpClient('http{s}://host:port/path') constructor", + DeprecationWarning, + stacklevel=2, + ) + self.host = uri_or_host + self.port = port + assert path + self.path = path + self.scheme = "http" + else: + parsed = urllib.parse.urlsplit(uri_or_host) + self.scheme = parsed.scheme + assert self.scheme in ("http", "https") + if self.scheme == "https": + self.certfile = cert_file + self.keyfile = key_file + self.context = ( + ssl.create_default_context(cafile=cafile) + if (cafile and not ssl_context) + else ssl_context + ) + self.port = parsed.port + self.host = parsed.hostname + self.path = parsed.path + if parsed.query: + self.path += "?%s" % parsed.query + try: + proxy = urllib.request.getproxies()[self.scheme] + except KeyError: + proxy = None + else: + if urllib.request.proxy_bypass(self.host): + proxy = None + if proxy: + parsed = urllib.parse.urlparse(proxy) + + # realhost and realport are the host and port of the actual request + self.realhost = self.host + self.realport = self.port + + # this is passed to ProxyManager + self.proxy_uri: str = proxy + self.host = parsed.hostname + self.port = parsed.port + self.proxy_auth = self.basic_proxy_auth_header(parsed) + else: + self.realhost = self.realport = self.proxy_auth = None + + self.max_connections = max_connections + + self.__wbuf = BytesIO() + self.__resp: Union[None, HTTPResponse] = None + self.__timeout = None + self.__custom_headers = None + self.__auth_provider = auth_provider def setCustomHeaders(self, headers: Dict[str, str]): self._headers = headers super().setCustomHeaders(headers) + def open(self): + + # self.__pool replaces the self.__http used by the original THttpClient + if self.scheme == "http": + pool_class = HTTPConnectionPool + elif self.scheme == "https": + pool_class = HTTPSConnectionPool + + _pool_kwargs = {"maxsize": self.max_connections} + + if self.using_proxy(): + proxy_manager = ProxyManager( + self.proxy_uri, + num_pools=1, + headers={"Proxy-Authorization": self.proxy_auth}, + ) + self.__pool = proxy_manager.connection_from_host( + self.host, self.port, pool_kwargs=_pool_kwargs + ) + else: + self.__pool = pool_class(self.host, self.port, **_pool_kwargs) + + def close(self): + self.__resp and self.__resp.release_conn() + self.__resp = None + + def read(self, sz): + return self.__resp.read(sz) + + def isOpen(self): + return self.__resp is not None + def flush(self): + + # Pull data out of buffer that will be sent in this request + data = self.__wbuf.getvalue() + self.__wbuf = BytesIO() + + # Header handling + headers = dict(self._headers) self.__auth_provider.add_headers(headers) self._headers = headers self.setCustomHeaders(self._headers) - super().flush() + + # Note: we don't set User-Agent explicitly in this class because PySQL + # should always provide one. Unlike the original THttpClient class, our version + # doesn't define a default User-Agent and so should raise an exception if one + # isn't provided. + assert self.__custom_headers and "User-Agent" in self.__custom_headers + + headers = { + "Content-Type": "application/x-thrift", + "Content-Length": str(len(data)), + } + + if self.using_proxy() and self.scheme == "http" and self.proxy_auth is not None: + headers["Proxy-Authorization" : self.proxy_auth] + + if self.__custom_headers: + custom_headers = {key: val for key, val in self.__custom_headers.items()} + headers.update(**custom_headers) + + # HTTP request + self.__resp = self.__pool.request( + "POST", + url=self.path, + body=data, + headers=headers, + preload_content=False, + timeout=self.__timeout, + ) + + # Get reply to flush the request + self.code = self.__resp.status + self.message = self.__resp.reason + self.headers = self.__resp.headers + + # Saves the cookie sent by the server response + if "Set-Cookie" in self.headers: + self.setCustomHeaders(dict("Cookie", self.headers["Set-Cookie"])) @staticmethod def basic_proxy_auth_header(proxy): diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 935c7711..d2fd1001 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -317,6 +317,10 @@ def attempt_request(attempt): try: logger.debug("Sending request: {}".format(request)) response = method(request) + + # Calling `close()` here releases the active HTTP connection back to the pool + self._transport.close() + logger.debug("Received response: {}".format(response)) return response except OSError as err: diff --git a/tests/e2e/driver_tests.py b/tests/e2e/driver_tests.py index 1c09d70e..4cb7be8b 100644 --- a/tests/e2e/driver_tests.py +++ b/tests/e2e/driver_tests.py @@ -458,6 +458,7 @@ def test_temp_view_fetch(self): # once what is being returned has stabilised @skipIf(pysql_has_version('<', '2'), 'requires pysql v2') + @skipIf(True, "Unclear the purpose of this test since urllib3 does not complain when timeout == 0") def test_socket_timeout(self): # We we expect to see a BlockingIO error when the socket is opened # in non-blocking mode, since no poll is done before the read From 9ef50e811b1c36b4ff76e0bcebf1691f282881ea Mon Sep 17 00:00:00 2001 From: mattdeekay <11141331+mattdeekay@users.noreply.github.com> Date: Wed, 7 Jun 2023 14:38:28 -0700 Subject: [PATCH 02/40] Default socket timeout to 15 min (#137) Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> --- src/databricks/sql/thrift_backend.py | 9 +++++---- tests/e2e/driver_tests.py | 15 +++++++++++++-- tests/unit/test_thrift_backend.py | 2 ++ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index d2fd1001..c61dc99e 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -37,6 +37,7 @@ DATABRICKS_REASON_HEADER = "x-databricks-reason-phrase" TIMESTAMP_AS_STRING_CONFIG = "spark.thriftserver.arrowBasedRowSet.timestampAsString" +DEFAULT_SOCKET_TIMEOUT = float(900) # see Connection.__init__ for parameter descriptions. # - Min/Max avoids unsustainable configs (sane values are far more constrained) @@ -99,8 +100,8 @@ def __init__( # _retry_stop_after_attempts_count # The maximum number of times we should retry retryable requests (defaults to 24) # _socket_timeout - # The timeout in seconds for socket send, recv and connect operations. Defaults to None for - # no timeout. Should be a positive float or integer. + # The timeout in seconds for socket send, recv and connect operations. Should be a positive float or integer. + # (defaults to 900) port = port or 443 if kwargs.get("_connection_uri"): @@ -152,8 +153,8 @@ def __init__( ssl_context=ssl_context, ) - timeout = kwargs.get("_socket_timeout") - # setTimeout defaults to None (i.e. no timeout), and is expected in ms + timeout = kwargs.get("_socket_timeout", DEFAULT_SOCKET_TIMEOUT) + # setTimeout defaults to 15 minutes and is expected in ms self._transport.setTimeout(timeout and (float(timeout) * 1000.0)) self._transport.setCustomHeaders(dict(http_headers)) diff --git a/tests/e2e/driver_tests.py b/tests/e2e/driver_tests.py index 4cb7be8b..831ed21f 100644 --- a/tests/e2e/driver_tests.py +++ b/tests/e2e/driver_tests.py @@ -18,7 +18,7 @@ import pytest import databricks.sql as sql -from databricks.sql import STRING, BINARY, NUMBER, DATETIME, DATE, DatabaseError, Error, OperationalError +from databricks.sql import STRING, BINARY, NUMBER, DATETIME, DATE, DatabaseError, Error, OperationalError, RequestError from tests.e2e.common.predicates import pysql_has_version, pysql_supports_arrow, compare_dbr_versions, is_thrift_v5_plus from tests.e2e.common.core_tests import CoreTestMixin, SmokeTestMixin from tests.e2e.common.large_queries_mixin import LargeQueriesMixin @@ -460,7 +460,7 @@ def test_temp_view_fetch(self): @skipIf(pysql_has_version('<', '2'), 'requires pysql v2') @skipIf(True, "Unclear the purpose of this test since urllib3 does not complain when timeout == 0") def test_socket_timeout(self): - # We we expect to see a BlockingIO error when the socket is opened + # We expect to see a BlockingIO error when the socket is opened # in non-blocking mode, since no poll is done before the read with self.assertRaises(OperationalError) as cm: with self.cursor({"_socket_timeout": 0}): @@ -468,6 +468,17 @@ def test_socket_timeout(self): self.assertIsInstance(cm.exception.args[1], io.BlockingIOError) + @skipIf(pysql_has_version('<', '2'), 'requires pysql v2') + def test_socket_timeout_user_defined(self): + # We expect to see a TimeoutError when the socket timeout is only + # 1 sec for a query that takes longer than that to process + with self.assertRaises(RequestError) as cm: + with self.cursor({"_socket_timeout": 1}) as cursor: + query = "select * from range(10000000)" + cursor.execute(query) + + self.assertIsInstance(cm.exception.args[1], TimeoutError) + def test_ssp_passthrough(self): for enable_ansi in (True, False): with self.cursor({"session_configuration": {"ansi_mode": enable_ansi}}) as cursor: diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index 1c2e589b..347bce15 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -217,6 +217,8 @@ def test_socket_timeout_is_propagated(self, t_http_client_class): self.assertEqual(t_http_client_class.return_value.setTimeout.call_args[0][0], 129 * 1000) ThriftBackend("hostname", 123, "path_value", [], auth_provider=AuthProvider(), _socket_timeout=0) self.assertEqual(t_http_client_class.return_value.setTimeout.call_args[0][0], 0) + ThriftBackend("hostname", 123, "path_value", [], auth_provider=AuthProvider()) + self.assertEqual(t_http_client_class.return_value.setTimeout.call_args[0][0], 900 * 1000) ThriftBackend("hostname", 123, "path_value", [], auth_provider=AuthProvider(), _socket_timeout=None) self.assertEqual(t_http_client_class.return_value.setTimeout.call_args[0][0], None) From dfabbdd89a8299ade657d064c32e245e5e0be6fd Mon Sep 17 00:00:00 2001 From: Jesse Date: Wed, 7 Jun 2023 17:23:19 -0500 Subject: [PATCH 03/40] Bump version to 2.6.0 (#139) Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 5 ++++- pyproject.toml | 2 +- src/databricks/sql/__init__.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74d278b0..11fc9cb2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,11 @@ # Release History -## 2.5.x (Unreleased) +## 2.6.x (Unreleased) + +## 2.6.0 (2023-06-07) - Add support for HTTP 1.1 connections (connection pools) +- Add a default socket timeout for thrift RPCs ## 2.5.2 (2023-05-08) diff --git a/pyproject.toml b/pyproject.toml index e93dcd1b..16965f67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databricks-sql-connector" -version = "2.5.2" +version = "2.6.0" description = "Databricks SQL Connector for Python" authors = ["Databricks "] license = "Apache-2.0" diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py index fdfb3fb6..028a52d9 100644 --- a/src/databricks/sql/__init__.py +++ b/src/databricks/sql/__init__.py @@ -28,7 +28,7 @@ def __repr__(self): DATE = DBAPITypeObject("date") ROWID = DBAPITypeObject() -__version__ = "2.5.2" +__version__ = "2.6.0" USER_AGENT_NAME = "PyDatabricksSqlConnector" # These two functions are pyhive legacy From 3d359bc8cbfa051c4aa906817bba85f9f29afd13 Mon Sep 17 00:00:00 2001 From: Jesse Date: Thu, 8 Jun 2023 18:30:55 -0500 Subject: [PATCH 04/40] Fix: some thrift RPCs failed with BadStatusLine (#141) --------- Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 2 ++ src/databricks/sql/auth/thrift_http_client.py | 1 + 2 files changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11fc9cb2..0401365f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## 2.6.x (Unreleased) +- Fix: http.client would raise a BadStatusLine exception in some cases + ## 2.6.0 (2023-06-07) - Add support for HTTP 1.1 connections (connection pools) diff --git a/src/databricks/sql/auth/thrift_http_client.py b/src/databricks/sql/auth/thrift_http_client.py index 66a9d196..89ad66a0 100644 --- a/src/databricks/sql/auth/thrift_http_client.py +++ b/src/databricks/sql/auth/thrift_http_client.py @@ -115,6 +115,7 @@ def open(self): self.__pool = pool_class(self.host, self.port, **_pool_kwargs) def close(self): + self.__resp and self.__resp.drain_conn() self.__resp and self.__resp.release_conn() self.__resp = None From 537980320068b54eada271c2a656a115ae2b3091 Mon Sep 17 00:00:00 2001 From: Jesse Date: Thu, 8 Jun 2023 18:44:52 -0500 Subject: [PATCH 05/40] Bump version to 2.6.1 (#142) Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 2 ++ pyproject.toml | 2 +- src/databricks/sql/__init__.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0401365f..b091b425 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## 2.6.x (Unreleased) +## 2.6.1 (2023-06-08) + - Fix: http.client would raise a BadStatusLine exception in some cases ## 2.6.0 (2023-06-07) diff --git a/pyproject.toml b/pyproject.toml index 16965f67..233c943c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databricks-sql-connector" -version = "2.6.0" +version = "2.6.1" description = "Databricks SQL Connector for Python" authors = ["Databricks "] license = "Apache-2.0" diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py index 028a52d9..7c43a02e 100644 --- a/src/databricks/sql/__init__.py +++ b/src/databricks/sql/__init__.py @@ -28,7 +28,7 @@ def __repr__(self): DATE = DBAPITypeObject("date") ROWID = DBAPITypeObject() -__version__ = "2.6.0" +__version__ = "2.6.1" USER_AGENT_NAME = "PyDatabricksSqlConnector" # These two functions are pyhive legacy From 869803995e07f3280048a27039fdd771e7592e9a Mon Sep 17 00:00:00 2001 From: Jesse Date: Wed, 14 Jun 2023 13:27:21 -0700 Subject: [PATCH 06/40] [ES-706907] Retry GetOperationStatus for http errors (#145) Signed-off-by: Jesse Whitehouse --- src/databricks/sql/thrift_backend.py | 14 ++++++++ tests/unit/test_thrift_backend.py | 48 +++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index c61dc99e..c17da877 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -14,6 +14,8 @@ import thrift.transport.TSocket import thrift.transport.TTransport +import urllib3.exceptions + import databricks.sql.auth.thrift_http_client from databricks.sql.auth.authenticators import AuthProvider from databricks.sql.thrift_api.TCLIService import TCLIService, ttypes @@ -324,6 +326,18 @@ def attempt_request(attempt): logger.debug("Received response: {}".format(response)) return response + + except urllib3.exceptions.HTTPError as err: + # retry on timeout. Happens a lot in Azure and it is safe as data has not been sent to server yet + + gos_name = TCLIServiceClient.GetOperationStatus.__name__ + if method.__name__ == gos_name: + retry_delay = bound_retry_delay(attempt, self._retry_delay_default) + logger.info( + f"GetOperationStatus failed with HTTP error and will be retried: {str(err)}" + ) + else: + raise err except OSError as err: error = err error_message = str(err) diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index 347bce15..7ef0fa2c 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -6,6 +6,7 @@ from ssl import CERT_NONE, CERT_REQUIRED import pyarrow +import urllib3 import databricks.sql from databricks.sql.thrift_api.TCLIService import ttypes @@ -1033,7 +1034,7 @@ def test_handle_execute_response_sets_active_op_handle(self): self.assertEqual(mock_resp.operationHandle, mock_cursor.active_op_handle) - @patch("thrift.transport.THttpClient.THttpClient") + @patch("databricks.sql.auth.thrift_http_client.THttpClient") @patch("databricks.sql.thrift_api.TCLIService.TCLIService.Client.GetOperationStatus") @patch("databricks.sql.thrift_backend._retry_policy", new_callable=retry_policy_factory) def test_make_request_will_retry_GetOperationStatus( @@ -1089,6 +1090,51 @@ def test_make_request_will_retry_GetOperationStatus( # The warnings should include this text self.assertIn(f"{this_gos_name} failed with code {errno.EEXIST} and will attempt to retry", cm.output[0]) + @patch("databricks.sql.thrift_api.TCLIService.TCLIService.Client.GetOperationStatus") + @patch("databricks.sql.thrift_backend._retry_policy", new_callable=retry_policy_factory) + def test_make_request_will_retry_GetOperationStatus_for_http_error( + self, mock_retry_policy, mock_gos): + + import urllib3.exceptions + mock_gos.side_effect = urllib3.exceptions.HTTPError("Read timed out") + + import thrift, errno + from databricks.sql.thrift_api.TCLIService.TCLIService import Client + from databricks.sql.exc import RequestError + from databricks.sql.utils import NoRetryReason + from databricks.sql.auth.thrift_http_client import THttpClient + + this_gos_name = "GetOperationStatus" + mock_gos.__name__ = this_gos_name + + protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(THttpClient) + client = Client(protocol) + + req = ttypes.TGetOperationStatusReq( + operationHandle=self.operation_handle, + getProgressUpdate=False, + ) + + EXPECTED_RETRIES = 2 + + thrift_backend = ThriftBackend( + "foobar", + 443, + "path", [], + auth_provider=AuthProvider(), + _retry_stop_after_attempts_count=EXPECTED_RETRIES, + _retry_delay_default=1) + + + with self.assertRaises(RequestError) as cm: + thrift_backend.make_request(client.GetOperationStatus, req) + + + self.assertEqual(NoRetryReason.OUT_OF_ATTEMPTS.value, cm.exception.context["no-retry-reason"]) + self.assertEqual(f'{EXPECTED_RETRIES}/{EXPECTED_RETRIES}', cm.exception.context["attempt"]) + + + @patch("thrift.transport.THttpClient.THttpClient") def test_make_request_wont_retry_if_headers_not_present(self, t_transport_class): From bbe539e925d1bbc23e1b6a189ab2da97ebddb965 Mon Sep 17 00:00:00 2001 From: Jesse Date: Wed, 14 Jun 2023 16:40:18 -0700 Subject: [PATCH 07/40] Bump version to 2.6.2 (#147) Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 4 ++++ pyproject.toml | 2 +- src/databricks/sql/__init__.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b091b425..b15ea555 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## 2.6.x (Unreleased) +## 2.6.2 (2023-06-14) + +- Fix: Retry GetOperationStatus requests for http errors + ## 2.6.1 (2023-06-08) - Fix: http.client would raise a BadStatusLine exception in some cases diff --git a/pyproject.toml b/pyproject.toml index 233c943c..9d08a688 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databricks-sql-connector" -version = "2.6.1" +version = "2.6.2" description = "Databricks SQL Connector for Python" authors = ["Databricks "] license = "Apache-2.0" diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py index 7c43a02e..6c7db4d5 100644 --- a/src/databricks/sql/__init__.py +++ b/src/databricks/sql/__init__.py @@ -28,7 +28,7 @@ def __repr__(self): DATE = DBAPITypeObject("date") ROWID = DBAPITypeObject() -__version__ = "2.6.1" +__version__ = "2.6.2" USER_AGENT_NAME = "PyDatabricksSqlConnector" # These two functions are pyhive legacy From 54e3769a495d9b54bc0b0bee9baf5d0753d47785 Mon Sep 17 00:00:00 2001 From: Jacky Hu Date: Tue, 20 Jun 2023 14:24:54 -0700 Subject: [PATCH 08/40] [PECO-626] Support OAuth flow for Databricks Azure (#86) ## Summary Support OAuth flow for Databricks Azure ## Background Some OAuth endpoints (e.g. Open ID Configuration) and scopes are different between Databricks Azure and AWS. Current code only supports OAuth flow on Databricks in AWS ## What changes are proposed in this pull request? - Change `OAuthManager` to decouple Databricks AWS specific configuration from OAuth flow - Add `sql/auth/endpoint.py` that implements cloud specific OAuth endpoint configuration - Change `DatabricksOAuthProvider` to work with the OAuth configurations in different Databricks cloud (AWS, Azure) - Add the corresponding unit tests --- CHANGELOG.md | 2 + src/databricks/sql/auth/auth.py | 16 ++- src/databricks/sql/auth/authenticators.py | 20 +++- src/databricks/sql/auth/endpoint.py | 112 ++++++++++++++++++ src/databricks/sql/auth/oauth.py | 39 +++--- .../sql/experimental/oauth_persistence.py | 11 ++ tests/unit/test_auth.py | 40 +++++++ tests/unit/test_endpoint.py | 57 +++++++++ 8 files changed, 273 insertions(+), 24 deletions(-) create mode 100644 src/databricks/sql/auth/endpoint.py create mode 100644 tests/unit/test_endpoint.py diff --git a/CHANGELOG.md b/CHANGELOG.md index b15ea555..a947be50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## 2.6.x (Unreleased) +- Add support for OAuth on Databricks Azure + ## 2.6.2 (2023-06-14) - Fix: Retry GetOperationStatus requests for http errors diff --git a/src/databricks/sql/auth/auth.py b/src/databricks/sql/auth/auth.py index b56d8f7f..48ffaad3 100644 --- a/src/databricks/sql/auth/auth.py +++ b/src/databricks/sql/auth/auth.py @@ -8,6 +8,7 @@ ExternalAuthProvider, DatabricksOAuthProvider, ) +from databricks.sql.auth.endpoint import infer_cloud_from_host, CloudType from databricks.sql.experimental.oauth_persistence import OAuthPersistence @@ -75,7 +76,9 @@ def get_auth_provider(cfg: ClientContext): PYSQL_OAUTH_SCOPES = ["sql", "offline_access"] PYSQL_OAUTH_CLIENT_ID = "databricks-sql-python" +PYSQL_OAUTH_AZURE_CLIENT_ID = "96eecda7-19ea-49cc-abb5-240097d554f5" PYSQL_OAUTH_REDIRECT_PORT_RANGE = list(range(8020, 8025)) +PYSQL_OAUTH_AZURE_REDIRECT_PORT_RANGE = [8030] def normalize_host_name(hostname: str): @@ -84,7 +87,16 @@ def normalize_host_name(hostname: str): return f"{maybe_scheme}{hostname}{maybe_trailing_slash}" +def get_client_id_and_redirect_port(hostname: str): + return ( + (PYSQL_OAUTH_CLIENT_ID, PYSQL_OAUTH_REDIRECT_PORT_RANGE) + if infer_cloud_from_host(hostname) == CloudType.AWS + else (PYSQL_OAUTH_AZURE_CLIENT_ID, PYSQL_OAUTH_AZURE_REDIRECT_PORT_RANGE) + ) + + def get_python_sql_connector_auth_provider(hostname: str, **kwargs): + (client_id, redirect_port_range) = get_client_id_and_redirect_port(hostname) cfg = ClientContext( hostname=normalize_host_name(hostname), auth_type=kwargs.get("auth_type"), @@ -94,10 +106,10 @@ def get_python_sql_connector_auth_provider(hostname: str, **kwargs): use_cert_as_auth=kwargs.get("_use_cert_as_auth"), tls_client_cert_file=kwargs.get("_tls_client_cert_file"), oauth_scopes=PYSQL_OAUTH_SCOPES, - oauth_client_id=kwargs.get("oauth_client_id") or PYSQL_OAUTH_CLIENT_ID, + oauth_client_id=kwargs.get("oauth_client_id") or client_id, oauth_redirect_port_range=[kwargs["oauth_redirect_port"]] if kwargs.get("oauth_client_id") and kwargs.get("oauth_redirect_port") - else PYSQL_OAUTH_REDIRECT_PORT_RANGE, + else redirect_port_range, oauth_persistence=kwargs.get("experimental_oauth_persistence"), credentials_provider=kwargs.get("credentials_provider"), ) diff --git a/src/databricks/sql/auth/authenticators.py b/src/databricks/sql/auth/authenticators.py index eb368e1e..1cd68f90 100644 --- a/src/databricks/sql/auth/authenticators.py +++ b/src/databricks/sql/auth/authenticators.py @@ -4,6 +4,7 @@ from typing import Callable, Dict, List from databricks.sql.auth.oauth import OAuthManager +from databricks.sql.auth.endpoint import get_oauth_endpoints, infer_cloud_from_host # Private API: this is an evolving interface and it will change in the future. # Please must not depend on it in your applications. @@ -70,11 +71,26 @@ def __init__( scopes: List[str], ): try: + cloud_type = infer_cloud_from_host(hostname) + if not cloud_type: + raise NotImplementedError("Cannot infer the cloud type from hostname") + + idp_endpoint = get_oauth_endpoints(cloud_type) + if not idp_endpoint: + raise NotImplementedError( + f"OAuth is not supported for cloud ${cloud_type.value}" + ) + + # Convert to the corresponding scopes in the corresponding IdP + cloud_scopes = idp_endpoint.get_scopes_mapping(scopes) + self.oauth_manager = OAuthManager( - port_range=redirect_port_range, client_id=client_id + port_range=redirect_port_range, + client_id=client_id, + idp_endpoint=idp_endpoint, ) self._hostname = hostname - self._scopes_as_str = DatabricksOAuthProvider.SCOPE_DELIM.join(scopes) + self._scopes_as_str = DatabricksOAuthProvider.SCOPE_DELIM.join(cloud_scopes) self._oauth_persistence = oauth_persistence self._client_id = client_id self._access_token = None diff --git a/src/databricks/sql/auth/endpoint.py b/src/databricks/sql/auth/endpoint.py new file mode 100644 index 00000000..e24f9d75 --- /dev/null +++ b/src/databricks/sql/auth/endpoint.py @@ -0,0 +1,112 @@ +# +# It implements all the cloud specific OAuth configuration/metadata +# +# Azure: It uses AAD +# AWS: It uses Databricks internal IdP +# GCP: Not support yet +# +from abc import ABC, abstractmethod +from enum import Enum +from typing import Optional, List +import os + +OIDC_REDIRECTOR_PATH = "oidc" + + +class OAuthScope: + OFFLINE_ACCESS = "offline_access" + SQL = "sql" + + +class CloudType(Enum): + AWS = "aws" + AZURE = "azure" + + +DATABRICKS_AWS_DOMAINS = [".cloud.databricks.com", ".dev.databricks.com"] +DATABRICKS_AZURE_DOMAINS = [ + ".azuredatabricks.net", + ".databricks.azure.cn", + ".databricks.azure.us", +] + + +# Infer cloud type from Databricks SQL instance hostname +def infer_cloud_from_host(hostname: str) -> Optional[CloudType]: + # normalize + host = hostname.lower().replace("https://", "").split("/")[0] + + if any(e for e in DATABRICKS_AZURE_DOMAINS if host.endswith(e)): + return CloudType.AZURE + elif any(e for e in DATABRICKS_AWS_DOMAINS if host.endswith(e)): + return CloudType.AWS + else: + return None + + +def get_databricks_oidc_url(hostname: str): + maybe_scheme = "https://" if not hostname.startswith("https://") else "" + maybe_trailing_slash = "/" if not hostname.endswith("/") else "" + return f"{maybe_scheme}{hostname}{maybe_trailing_slash}{OIDC_REDIRECTOR_PATH}" + + +class OAuthEndpointCollection(ABC): + @abstractmethod + def get_scopes_mapping(self, scopes: List[str]) -> List[str]: + raise NotImplementedError() + + # Endpoint for oauth2 authorization e.g https://idp.example.com/oauth2/v2.0/authorize + @abstractmethod + def get_authorization_url(self, hostname: str) -> str: + raise NotImplementedError() + + # Endpoint for well-known openid configuration e.g https://idp.example.com/oauth2/.well-known/openid-configuration + @abstractmethod + def get_openid_config_url(self, hostname: str) -> str: + raise NotImplementedError() + + +class AzureOAuthEndpointCollection(OAuthEndpointCollection): + DATATRICKS_AZURE_APP = "2ff814a6-3304-4ab8-85cb-cd0e6f879c1d" + + def get_scopes_mapping(self, scopes: List[str]) -> List[str]: + # There is no corresponding scopes in Azure, instead, access control will be delegated to Databricks + tenant_id = os.getenv( + "DATABRICKS_AZURE_TENANT_ID", + AzureOAuthEndpointCollection.DATATRICKS_AZURE_APP, + ) + azure_scope = f"{tenant_id}/user_impersonation" + mapped_scopes = [azure_scope] + if OAuthScope.OFFLINE_ACCESS in scopes: + mapped_scopes.append(OAuthScope.OFFLINE_ACCESS) + return mapped_scopes + + def get_authorization_url(self, hostname: str): + # We need get account specific url, which can be redirected by databricks unified oidc endpoint + return f"{get_databricks_oidc_url(hostname)}/oauth2/v2.0/authorize" + + def get_openid_config_url(self, hostname: str): + return "https://login.microsoftonline.com/organizations/v2.0/.well-known/openid-configuration" + + +class AwsOAuthEndpointCollection(OAuthEndpointCollection): + def get_scopes_mapping(self, scopes: List[str]) -> List[str]: + # No scope mapping in AWS + return scopes.copy() + + def get_authorization_url(self, hostname: str): + idp_url = get_databricks_oidc_url(hostname) + return f"{idp_url}/oauth2/v2.0/authorize" + + def get_openid_config_url(self, hostname: str): + idp_url = get_databricks_oidc_url(hostname) + return f"{idp_url}/.well-known/oauth-authorization-server" + + +def get_oauth_endpoints(cloud: CloudType) -> Optional[OAuthEndpointCollection]: + if cloud == CloudType.AWS: + return AwsOAuthEndpointCollection() + elif cloud == CloudType.AZURE: + return AzureOAuthEndpointCollection() + else: + return None diff --git a/src/databricks/sql/auth/oauth.py b/src/databricks/sql/auth/oauth.py index 0f49aa88..a2b9c6ed 100644 --- a/src/databricks/sql/auth/oauth.py +++ b/src/databricks/sql/auth/oauth.py @@ -14,17 +14,22 @@ from requests.exceptions import RequestException from databricks.sql.auth.oauth_http_handler import OAuthHttpSingleRequestHandler +from databricks.sql.auth.endpoint import OAuthEndpointCollection logger = logging.getLogger(__name__) class OAuthManager: - OIDC_REDIRECTOR_PATH = "oidc" - - def __init__(self, port_range: List[int], client_id: str): + def __init__( + self, + port_range: List[int], + client_id: str, + idp_endpoint: OAuthEndpointCollection, + ): self.port_range = port_range self.client_id = client_id self.redirect_port = None + self.idp_endpoint = idp_endpoint @staticmethod def __token_urlsafe(nbytes=32): @@ -34,14 +39,14 @@ def __token_urlsafe(nbytes=32): def __get_redirect_url(redirect_port: int): return f"http://localhost:{redirect_port}" - @staticmethod - def __fetch_well_known_config(idp_url: str): - known_config_url = f"{idp_url}/.well-known/oauth-authorization-server" + def __fetch_well_known_config(self, hostname: str): + known_config_url = self.idp_endpoint.get_openid_config_url(hostname) + try: response = requests.get(url=known_config_url) except RequestException as e: logger.error( - f"Unable to fetch OAuth configuration from {idp_url}.\n" + f"Unable to fetch OAuth configuration from {known_config_url}.\n" "Verify it is a valid workspace URL and that OAuth is " "enabled on this account." ) @@ -50,7 +55,7 @@ def __fetch_well_known_config(idp_url: str): if response.status_code != 200: msg = ( f"Received status {response.status_code} OAuth configuration from " - f"{idp_url}.\n Verify it is a valid workspace URL and " + f"{known_config_url}.\n Verify it is a valid workspace URL and " "that OAuth is enabled on this account." ) logger.error(msg) @@ -59,18 +64,12 @@ def __fetch_well_known_config(idp_url: str): return response.json() except requests.exceptions.JSONDecodeError as e: logger.error( - f"Unable to decode OAuth configuration from {idp_url}.\n" + f"Unable to decode OAuth configuration from {known_config_url}.\n" "Verify it is a valid workspace URL and that OAuth is " "enabled on this account." ) raise e - @staticmethod - def __get_idp_url(host: str): - maybe_scheme = "https://" if not host.startswith("https://") else "" - maybe_trailing_slash = "/" if not host.endswith("/") else "" - return f"{maybe_scheme}{host}{maybe_trailing_slash}{OAuthManager.OIDC_REDIRECTOR_PATH}" - @staticmethod def __get_challenge(): verifier_string = OAuthManager.__token_urlsafe(32) @@ -154,8 +153,7 @@ def __send_token_request(token_request_url, data): return response.json() def __send_refresh_token_request(self, hostname, refresh_token): - idp_url = OAuthManager.__get_idp_url(hostname) - oauth_config = OAuthManager.__fetch_well_known_config(idp_url) + oauth_config = self.__fetch_well_known_config(hostname) token_request_url = oauth_config["token_endpoint"] client = oauthlib.oauth2.WebApplicationClient(self.client_id) token_request_body = client.prepare_refresh_body( @@ -215,14 +213,15 @@ def check_and_refresh_access_token( return fresh_access_token, fresh_refresh_token, True def get_tokens(self, hostname: str, scope=None): - idp_url = self.__get_idp_url(hostname) - oauth_config = self.__fetch_well_known_config(idp_url) + oauth_config = self.__fetch_well_known_config(hostname) # We are going to override oauth_config["authorization_endpoint"] use the # /oidc redirector on the hostname, which may inject additional parameters. - auth_url = f"{hostname}oidc/v1/authorize" + auth_url = self.idp_endpoint.get_authorization_url(hostname) + state = OAuthManager.__token_urlsafe(16) (verifier, challenge) = OAuthManager.__get_challenge() client = oauthlib.oauth2.WebApplicationClient(self.client_id) + try: auth_response = self.__get_authorization_code( client, auth_url, scope, state, challenge diff --git a/src/databricks/sql/experimental/oauth_persistence.py b/src/databricks/sql/experimental/oauth_persistence.py index bd0066d9..13a96612 100644 --- a/src/databricks/sql/experimental/oauth_persistence.py +++ b/src/databricks/sql/experimental/oauth_persistence.py @@ -27,6 +27,17 @@ def read(self, hostname: str) -> Optional[OAuthToken]: pass +class OAuthPersistenceCache(OAuthPersistence): + def __init__(self): + self.tokens = {} + + def persist(self, hostname: str, oauth_token: OAuthToken): + self.tokens[hostname] = oauth_token + + def read(self, hostname: str) -> Optional[OAuthToken]: + return self.tokens.get(hostname) + + # Note this is only intended to be used for development class DevOnlyFilePersistence(OAuthPersistence): def __init__(self, file_path): diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index c52f9790..df4ac9d6 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -1,8 +1,15 @@ import unittest +import pytest +from typing import Optional +from unittest.mock import patch from databricks.sql.auth.auth import AccessTokenAuthProvider, BasicAuthProvider, AuthProvider, ExternalAuthProvider from databricks.sql.auth.auth import get_python_sql_connector_auth_provider +from databricks.sql.auth.oauth import OAuthManager +from databricks.sql.auth.authenticators import DatabricksOAuthProvider +from databricks.sql.auth.endpoint import CloudType, AwsOAuthEndpointCollection, AzureOAuthEndpointCollection from databricks.sql.auth.authenticators import CredentialsProvider, HeaderFactory +from databricks.sql.experimental.oauth_persistence import OAuthPersistenceCache class Auth(unittest.TestCase): @@ -38,6 +45,39 @@ def test_noop_auth_provider(self): self.assertEqual(len(http_request.keys()), 1) self.assertEqual(http_request['myKey'], 'myVal') + @patch.object(OAuthManager, "check_and_refresh_access_token") + @patch.object(OAuthManager, "get_tokens") + def test_oauth_auth_provider(self, mock_get_tokens, mock_check_and_refresh): + client_id = "mock-id" + scopes = ["offline_access", "sql"] + access_token = "mock_token" + refresh_token = "mock_refresh_token" + mock_get_tokens.return_value = (access_token, refresh_token) + mock_check_and_refresh.return_value = (access_token, refresh_token, False) + + params = [(CloudType.AWS, "foo.cloud.databricks.com", AwsOAuthEndpointCollection, "offline_access sql"), + (CloudType.AZURE, "foo.1.azuredatabricks.net", AzureOAuthEndpointCollection, + f"{AzureOAuthEndpointCollection.DATATRICKS_AZURE_APP}/user_impersonation offline_access")] + + for cloud_type, host, expected_endpoint_type, expected_scopes in params: + with self.subTest(cloud_type.value): + oauth_persistence = OAuthPersistenceCache() + auth_provider = DatabricksOAuthProvider(hostname=host, + oauth_persistence=oauth_persistence, + redirect_port_range=[8020], + client_id=client_id, + scopes=scopes) + + self.assertIsInstance(auth_provider.oauth_manager.idp_endpoint, expected_endpoint_type) + self.assertEqual(auth_provider.oauth_manager.port_range, [8020]) + self.assertEqual(auth_provider.oauth_manager.client_id, client_id) + self.assertEqual(oauth_persistence.read(host).refresh_token, refresh_token) + mock_get_tokens.assert_called_with(hostname=host, scope=expected_scopes) + + headers = {} + auth_provider.add_headers(headers) + self.assertEqual(headers['Authorization'], f"Bearer {access_token}") + def test_external_provider(self): class MyProvider(CredentialsProvider): def auth_type(self) -> str: diff --git a/tests/unit/test_endpoint.py b/tests/unit/test_endpoint.py new file mode 100644 index 00000000..63393039 --- /dev/null +++ b/tests/unit/test_endpoint.py @@ -0,0 +1,57 @@ +import unittest +import os +import pytest + +from unittest.mock import patch + +from databricks.sql.auth.endpoint import infer_cloud_from_host, CloudType, get_oauth_endpoints, \ + AzureOAuthEndpointCollection + +aws_host = "foo-bar.cloud.databricks.com" +azure_host = "foo-bar.1.azuredatabricks.net" + + +class EndpointTest(unittest.TestCase): + def test_infer_cloud_from_host(self): + param_list = [(CloudType.AWS, aws_host), (CloudType.AZURE, azure_host), (None, "foo.example.com")] + + for expected_type, host in param_list: + with self.subTest(expected_type or "None", expected_type=expected_type): + self.assertEqual(infer_cloud_from_host(host), expected_type) + self.assertEqual(infer_cloud_from_host(f"https://{host}/to/path"), expected_type) + + def test_oauth_endpoint(self): + scopes = ["offline_access", "sql", "admin"] + scopes2 = ["sql", "admin"] + azure_scope = f"{AzureOAuthEndpointCollection.DATATRICKS_AZURE_APP}/user_impersonation" + + param_list = [(CloudType.AWS, + aws_host, + f"https://{aws_host}/oidc/oauth2/v2.0/authorize", + f"https://{aws_host}/oidc/.well-known/oauth-authorization-server", + scopes, + scopes2 + ), + ( + CloudType.AZURE, + azure_host, + f"https://{azure_host}/oidc/oauth2/v2.0/authorize", + "https://login.microsoftonline.com/organizations/v2.0/.well-known/openid-configuration", + [azure_scope, "offline_access"], + [azure_scope] + )] + + for cloud_type, host, expected_auth_url, expected_config_url, expected_scopes, expected_scope2 in param_list: + with self.subTest(cloud_type): + endpoint = get_oauth_endpoints(cloud_type) + self.assertEqual(endpoint.get_authorization_url(host), expected_auth_url) + self.assertEqual(endpoint.get_openid_config_url(host), expected_config_url) + self.assertEqual(endpoint.get_scopes_mapping(scopes), expected_scopes) + self.assertEqual(endpoint.get_scopes_mapping(scopes2), expected_scope2) + + @patch.dict(os.environ, {'DATABRICKS_AZURE_TENANT_ID': '052ee82f-b79d-443c-8682-3ec1749e56b0'}) + def test_azure_oauth_scope_mappings_from_different_tenant_id(self): + scopes = ["offline_access", "sql", "all"] + endpoint = get_oauth_endpoints(CloudType.AZURE) + self.assertEqual(endpoint.get_scopes_mapping(scopes), + ['052ee82f-b79d-443c-8682-3ec1749e56b0/user_impersonation', "offline_access"]) From 7fcfa7b62dc2a3a1dfb0ca5ad654d3e3dae82aac Mon Sep 17 00:00:00 2001 From: Jesse Date: Fri, 23 Jun 2023 17:50:34 -0500 Subject: [PATCH 09/40] Use a separate logger for unsafe thrift responses (#153) --------- Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 1 + src/databricks/sql/thrift_backend.py | 26 ++++++++++++++++++++++++-- tests/e2e/driver_tests.py | 4 ++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a947be50..20d8f8a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## 2.6.x (Unreleased) +- Redact logged thrift responses by default - Add support for OAuth on Databricks Azure ## 2.6.2 (2023-06-14) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index c17da877..b4afeaa3 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -34,6 +34,16 @@ logger = logging.getLogger(__name__) +unsafe_logger = logging.getLogger("databricks.sql.unsafe") +unsafe_logger.setLevel(logging.DEBUG) + +# To capture these logs in client code, add a non-NullHandler. +# See our e2e test suite for an example with logging.FileHandler +unsafe_logger.addHandler(logging.NullHandler()) + +# Disable propagation so that handlers for `databricks.sql` don't pick up these messages +unsafe_logger.propagate = False + THRIFT_ERROR_MESSAGE_HEADER = "x-thriftserver-error-message" DATABRICKS_ERROR_OR_REDIRECT_HEADER = "x-databricks-error-or-redirect-message" DATABRICKS_REASON_HEADER = "x-databricks-reason-phrase" @@ -318,13 +328,25 @@ def attempt_request(attempt): error, error_message, retry_delay = None, None, None try: - logger.debug("Sending request: {}".format(request)) + # The MagicMocks in our unit tests have a `name` property instead of `__name__`. + logger.debug( + "Sending request: {}()".format( + getattr( + method, "__name__", getattr(method, "name", "UnknownMethod") + ) + ) + ) + unsafe_logger.debug("Sending request: {}".format(request)) response = method(request) # Calling `close()` here releases the active HTTP connection back to the pool self._transport.close() - logger.debug("Received response: {}".format(response)) + # We need to call type(response) here because thrift doesn't implement __name__ attributes for thrift responses + logger.debug( + "Received response: {}()".format(type(response).__name__) + ) + unsafe_logger.debug("Received response: {}".format(response)) return response except urllib3.exceptions.HTTPError as err: diff --git a/tests/e2e/driver_tests.py b/tests/e2e/driver_tests.py index 831ed21f..f8350475 100644 --- a/tests/e2e/driver_tests.py +++ b/tests/e2e/driver_tests.py @@ -28,6 +28,10 @@ log = logging.getLogger(__name__) +unsafe_logger = logging.getLogger("databricks.sql.unsafe") +unsafe_logger.setLevel(logging.DEBUG) +unsafe_logger.addHandler(logging.FileHandler("./tests-unsafe.log")) + # manually decorate DecimalTestsMixin to need arrow support for name in loader.getTestCaseNames(DecimalTestsMixin, 'test_'): fn = getattr(DecimalTestsMixin, name) From fecfa88e9188603fa1cb354512f6978f4af2ea11 Mon Sep 17 00:00:00 2001 From: Jesse Date: Fri, 23 Jun 2023 18:00:45 -0500 Subject: [PATCH 10/40] Improve e2e test development ergonomics (#155) --------- Signed-off-by: Jesse Whitehouse --- .gitignore | 5 +- CHANGELOG.md | 1 + CONTRIBUTING.md | 9 + poetry.lock | 1468 ++++++++--------- pyproject.toml | 10 + test.env.example | 7 + tests/e2e/__init__.py | 0 tests/e2e/common/staging_ingestion_tests.py | 288 ++++ tests/e2e/{driver_tests.py => test_driver.py} | 277 +--- 9 files changed, 1046 insertions(+), 1019 deletions(-) create mode 100644 test.env.example create mode 100644 tests/e2e/__init__.py create mode 100644 tests/e2e/common/staging_ingestion_tests.py rename tests/e2e/{driver_tests.py => test_driver.py} (72%) diff --git a/.gitignore b/.gitignore index 66c94734..d89a4116 100644 --- a/.gitignore +++ b/.gitignore @@ -204,4 +204,7 @@ dist/ build/ # vs code stuff -.vscode \ No newline at end of file +.vscode + +# don't commit authentication info to source control +test.env \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 20d8f8a7..4d3f8831 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## 2.6.x (Unreleased) +- Improve e2e test development ergonomics - Redact logged thrift responses by default - Add support for OAuth on Databricks Azure diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index aea830eb..53ec9735 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -109,6 +109,15 @@ export http_path="" export access_token="" ``` +Or you can write these into a file called `test.env` in the root of the repository: + +``` +host="****.cloud.databricks.com" +http_path="/sql/1.0/warehouses/***" +access_token="dapi***" +staging_ingestion_user="***@example.com" +``` + There are several e2e test suites available: - `PySQLCoreTestSuite` - `PySQLLargeQueriesSuite` diff --git a/poetry.lock b/poetry.lock index 3c95a628..8fee85b0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,10 +1,15 @@ +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. + [[package]] name = "alembic" -version = "1.10.4" +version = "1.11.1" description = "A database migration tool for SQLAlchemy." -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "alembic-1.11.1-py3-none-any.whl", hash = "sha256:dc871798a601fab38332e38d6ddb38d5e734f60034baeb8e2db5b642fccd8ab8"}, + {file = "alembic-1.11.1.tar.gz", hash = "sha256:6a810a6b012c88b33458fceb869aef09ac75d6ace5291915ba7fae44de372c01"}, +] [package.dependencies] importlib-metadata = {version = "*", markers = "python_version < \"3.9\""} @@ -20,9 +25,12 @@ tz = ["python-dateutil"] name = "astroid" version = "2.11.7" description = "An abstract syntax tree for Python with inference support." -category = "dev" optional = false python-versions = ">=3.6.2" +files = [ + {file = "astroid-2.11.7-py3-none-any.whl", hash = "sha256:86b0a340a512c65abf4368b80252754cda17c02cdbbd3f587dddf98112233e7b"}, + {file = "astroid-2.11.7.tar.gz", hash = "sha256:bb24615c77f4837c707669d16907331374ae8a964650a66999da3f5ca68dc946"}, +] [package.dependencies] lazy-object-proxy = ">=1.4.0" @@ -35,585 +43,9 @@ wrapt = ">=1.11,<2" name = "black" version = "22.12.0" description = "The uncompromising code formatter." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -click = ">=8.0.0" -mypy-extensions = ">=0.4.3" -pathspec = ">=0.9.0" -platformdirs = ">=2" -tomli = {version = ">=1.1.0", markers = "python_full_version < \"3.11.0a7\""} -typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implementation_name == \"cpython\""} -typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} - -[package.extras] -colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)"] -jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] -uvloop = ["uvloop (>=0.15.2)"] - -[[package]] -name = "certifi" -version = "2023.5.7" -description = "Python package for providing Mozilla's CA Bundle." -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "charset-normalizer" -version = "3.1.0" -description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" -optional = false -python-versions = ">=3.7.0" - -[[package]] -name = "click" -version = "8.1.3" -description = "Composable command line interface toolkit" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} - -[[package]] -name = "colorama" -version = "0.4.6" -description = "Cross-platform colored terminal text." -category = "dev" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" - -[[package]] -name = "dill" -version = "0.3.6" -description = "serialize all of python" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -graph = ["objgraph (>=1.7.2)"] - -[[package]] -name = "et-xmlfile" -version = "1.1.0" -description = "An implementation of lxml.xmlfile for the standard library" -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "exceptiongroup" -version = "1.1.1" -description = "Backport of PEP 654 (exception groups)" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -test = ["pytest (>=6)"] - -[[package]] -name = "greenlet" -version = "2.0.2" -description = "Lightweight in-process concurrent programming" -category = "main" -optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" - -[package.extras] -docs = ["Sphinx", "docutils (<0.18)"] -test = ["objgraph", "psutil"] - -[[package]] -name = "idna" -version = "3.4" -description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "importlib-metadata" -version = "6.6.0" -description = "Read metadata from Python packages" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} -zipp = ">=0.5" - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -perf = ["ipython"] -testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] - -[[package]] -name = "importlib-resources" -version = "5.12.0" -description = "Read resources from Python packages" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] - -[[package]] -name = "iniconfig" -version = "2.0.0" -description = "brain-dead simple config-ini parsing" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "isort" -version = "5.11.5" -description = "A Python utility / library to sort Python imports." -category = "dev" -optional = false -python-versions = ">=3.7.0" - -[package.extras] -colors = ["colorama (>=0.4.3,<0.5.0)"] -pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"] -plugins = ["setuptools"] -requirements-deprecated-finder = ["pip-api", "pipreqs"] - -[[package]] -name = "lazy-object-proxy" -version = "1.9.0" -description = "A fast and thorough lazy object proxy." -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "lz4" -version = "4.3.2" -description = "LZ4 Bindings for Python" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["sphinx (>=1.6.0)", "sphinx-bootstrap-theme"] -flake8 = ["flake8"] -tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] - -[[package]] -name = "mako" -version = "1.2.4" -description = "A super-fast templating language that borrows the best ideas from the existing templating languages." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} -MarkupSafe = ">=0.9.2" - -[package.extras] -babel = ["Babel"] -lingua = ["lingua"] -testing = ["pytest"] - -[[package]] -name = "markupsafe" -version = "2.1.2" -description = "Safely add untrusted strings to HTML/XML markup." -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "mccabe" -version = "0.7.0" -description = "McCabe checker, plugin for flake8" -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "mypy" -version = "0.950" -description = "Optional static typing for Python" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -mypy-extensions = ">=0.4.3" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""} -typing-extensions = ">=3.10" - -[package.extras] -dmypy = ["psutil (>=4.0)"] -python2 = ["typed-ast (>=1.4.0,<2)"] -reports = ["lxml"] - -[[package]] -name = "mypy-extensions" -version = "1.0.0" -description = "Type system extensions for programs checked with the mypy type checker." -category = "dev" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "numpy" -version = "1.21.6" -description = "NumPy is the fundamental package for array computing with Python." -category = "main" -optional = false -python-versions = ">=3.7,<3.11" - -[[package]] -name = "numpy" -version = "1.24.3" -description = "Fundamental package for array computing in Python" -category = "main" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "oauthlib" -version = "3.2.2" -description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" -category = "main" -optional = false -python-versions = ">=3.6" - -[package.extras] -rsa = ["cryptography (>=3.0.0)"] -signals = ["blinker (>=1.4.0)"] -signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] - -[[package]] -name = "openpyxl" -version = "3.1.2" -description = "A Python library to read/write Excel 2010 xlsx/xlsm files" -category = "main" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -et-xmlfile = "*" - -[[package]] -name = "packaging" -version = "23.1" -description = "Core utilities for Python packages" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "pandas" -version = "1.3.5" -description = "Powerful data structures for data analysis, time series, and statistics" -category = "main" -optional = false -python-versions = ">=3.7.1" - -[package.dependencies] -numpy = [ - {version = ">=1.17.3", markers = "platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, - {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""}, - {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, -] -python-dateutil = ">=2.7.3" -pytz = ">=2017.3" - -[package.extras] -test = ["hypothesis (>=3.58)", "pytest (>=6.0)", "pytest-xdist"] - -[[package]] -name = "pathspec" -version = "0.11.1" -description = "Utility library for gitignore style pattern matching of file paths." -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "platformdirs" -version = "3.5.0" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -typing-extensions = {version = ">=4.5", markers = "python_version < \"3.8\""} - -[package.extras] -docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"] - -[[package]] -name = "pluggy" -version = "1.0.0" -description = "plugin and hook calling mechanisms for python" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} - -[package.extras] -dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] - -[[package]] -name = "pyarrow" -version = "12.0.0" -description = "Python library for Apache Arrow" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -numpy = ">=1.16.6" - -[[package]] -name = "pylint" -version = "2.13.9" -description = "python code static checker" -category = "dev" -optional = false -python-versions = ">=3.6.2" - -[package.dependencies] -astroid = ">=2.11.5,<=2.12.0-dev0" -colorama = {version = "*", markers = "sys_platform == \"win32\""} -dill = ">=0.2" -isort = ">=4.2.5,<6" -mccabe = ">=0.6,<0.8" -platformdirs = ">=2.2.0" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""} - -[package.extras] -testutil = ["gitpython (>3)"] - -[[package]] -name = "pytest" -version = "7.3.1" -description = "pytest: simple powerful testing with Python" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -colorama = {version = "*", markers = "sys_platform == \"win32\""} -exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} -importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} -iniconfig = "*" -packaging = "*" -pluggy = ">=0.12,<2.0" -tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} - -[package.extras] -testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] - -[[package]] -name = "python-dateutil" -version = "2.8.2" -description = "Extensions to the standard Python datetime module" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "pytz" -version = "2023.3" -description = "World timezone definitions, modern and historical" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "requests" -version = "2.30.0" -description = "Python HTTP for Humans." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -certifi = ">=2017.4.17" -charset-normalizer = ">=2,<4" -idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<3" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)"] -use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] - -[[package]] -name = "setuptools" -version = "67.7.2" -description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] -testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] - -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "sqlalchemy" -version = "1.4.48" -description = "Database Abstraction Library" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" - -[package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} - -[package.extras] -aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] -aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"] -asyncio = ["greenlet (!=0.4.17)"] -asyncmy = ["asyncmy (>=0.2.3,!=0.2.4)", "greenlet (!=0.4.17)"] -mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2)"] -mssql = ["pyodbc"] -mssql-pymssql = ["pymssql"] -mssql-pyodbc = ["pyodbc"] -mypy = ["mypy (>=0.910)", "sqlalchemy2-stubs"] -mysql = ["mysqlclient (>=1.4.0)", "mysqlclient (>=1.4.0,<2)"] -mysql-connector = ["mysql-connector-python"] -oracle = ["cx_oracle (>=7)", "cx_oracle (>=7,<8)"] -postgresql = ["psycopg2 (>=2.7)"] -postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] -postgresql-pg8000 = ["pg8000 (>=1.16.6,!=1.29.0)"] -postgresql-psycopg2binary = ["psycopg2-binary"] -postgresql-psycopg2cffi = ["psycopg2cffi"] -pymysql = ["pymysql", "pymysql (<1)"] -sqlcipher = ["sqlcipher3_binary"] - -[[package]] -name = "thrift" -version = "0.16.0" -description = "Python bindings for the Apache Thrift RPC system" -category = "main" optional = false -python-versions = "*" - -[package.dependencies] -six = ">=1.7.2" - -[package.extras] -all = ["tornado (>=4.0)", "twisted"] -tornado = ["tornado (>=4.0)"] -twisted = ["twisted"] - -[[package]] -name = "tomli" -version = "2.0.1" -description = "A lil' TOML parser" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "typed-ast" -version = "1.5.4" -description = "a fork of Python 2 and 3 ast modules with type comment support" -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "typing-extensions" -version = "4.5.0" -description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "urllib3" -version = "2.0.2" -description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] - -[[package]] -name = "wrapt" -version = "1.15.0" -description = "Module for decorators, wrappers and monkey patching." -category = "dev" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" - -[[package]] -name = "zipp" -version = "3.15.0" -description = "Backport of pathlib-compatible object wrapper for zip files" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] - -[metadata] -lock-version = "1.1" -python-versions = "^3.7.1" -content-hash = "8432ddba9b066e5b1c34ca44918443f1f7566d95e4f0c0a9b630dd95b95bb71e" - -[metadata.files] -alembic = [ - {file = "alembic-1.10.4-py3-none-any.whl", hash = "sha256:43942c3d4bf2620c466b91c0f4fca136fe51ae972394a0cc8b90810d664e4f5c"}, - {file = "alembic-1.10.4.tar.gz", hash = "sha256:295b54bbb92c4008ab6a7dcd1e227e668416d6f84b98b3c4446a2bc6214a556b"}, -] -astroid = [ - {file = "astroid-2.11.7-py3-none-any.whl", hash = "sha256:86b0a340a512c65abf4368b80252754cda17c02cdbbd3f587dddf98112233e7b"}, - {file = "astroid-2.11.7.tar.gz", hash = "sha256:bb24615c77f4837c707669d16907331374ae8a964650a66999da3f5ca68dc946"}, -] -black = [ +python-versions = ">=3.7" +files = [ {file = "black-22.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eedd20838bd5d75b80c9f5487dbcb06836a43833a37846cf1d8c1cc01cef59d"}, {file = "black-22.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:159a46a4947f73387b4d83e87ea006dbb2337eab6c879620a3ba52699b1f4351"}, {file = "black-22.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d30b212bffeb1e252b31dd269dfae69dd17e06d92b87ad26e23890f3efea366f"}, @@ -627,11 +59,40 @@ black = [ {file = "black-22.12.0-py3-none-any.whl", hash = "sha256:436cc9167dd28040ad90d3b404aec22cedf24a6e4d7de221bec2730ec0c97bcf"}, {file = "black-22.12.0.tar.gz", hash = "sha256:229351e5a18ca30f447bf724d007f890f97e13af070bb6ad4c0a441cd7596a2f"}, ] -certifi = [ + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_full_version < \"3.11.0a7\""} +typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implementation_name == \"cpython\""} +typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + +[[package]] +name = "certifi" +version = "2023.5.7" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ {file = "certifi-2023.5.7-py3-none-any.whl", hash = "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716"}, {file = "certifi-2023.5.7.tar.gz", hash = "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7"}, ] -charset-normalizer = [ + +[[package]] +name = "charset-normalizer" +version = "3.1.0" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ {file = "charset-normalizer-3.1.0.tar.gz", hash = "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5"}, {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b"}, {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60"}, @@ -708,27 +169,79 @@ charset-normalizer = [ {file = "charset_normalizer-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b"}, {file = "charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d"}, ] -click = [ + +[[package]] +name = "click" +version = "8.1.3" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, ] -colorama = [ + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -dill = [ + +[[package]] +name = "dill" +version = "0.3.6" +description = "serialize all of python" +optional = false +python-versions = ">=3.7" +files = [ {file = "dill-0.3.6-py3-none-any.whl", hash = "sha256:a07ffd2351b8c678dfc4a856a3005f8067aea51d6ba6c700796a4d9e280f39f0"}, {file = "dill-0.3.6.tar.gz", hash = "sha256:e5db55f3687856d8fbdab002ed78544e1c4559a130302693d839dfe8f93f2373"}, ] -et-xmlfile = [ + +[package.extras] +graph = ["objgraph (>=1.7.2)"] + +[[package]] +name = "et-xmlfile" +version = "1.1.0" +description = "An implementation of lxml.xmlfile for the standard library" +optional = false +python-versions = ">=3.6" +files = [ {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, ] -exceptiongroup = [ + +[[package]] +name = "exceptiongroup" +version = "1.1.1" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"}, {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"}, ] -greenlet = [ + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "greenlet" +version = "2.0.2" +description = "Lightweight in-process concurrent programming" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" +files = [ {file = "greenlet-2.0.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:bdfea8c661e80d3c1c99ad7c3ff74e6e87184895bbaca6ee8cc61209f8b9b85d"}, {file = "greenlet-2.0.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:9d14b83fab60d5e8abe587d51c75b252bcc21683f24699ada8fb275d7712f5a9"}, {file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"}, @@ -790,27 +303,95 @@ greenlet = [ {file = "greenlet-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:db1a39669102a1d8d12b57de2bb7e2ec9066a6f2b3da35ae511ff93b01b5d564"}, {file = "greenlet-2.0.2.tar.gz", hash = "sha256:e7c8dc13af7db097bed64a051d2dd49e9f0af495c26995c00a9ee842690d34c0"}, ] -idna = [ + +[package.extras] +docs = ["Sphinx", "docutils (<0.18)"] +test = ["objgraph", "psutil"] + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, ] -importlib-metadata = [ - {file = "importlib_metadata-6.6.0-py3-none-any.whl", hash = "sha256:43dd286a2cd8995d5eaef7fee2066340423b818ed3fd70adf0bad5f1fac53fed"}, - {file = "importlib_metadata-6.6.0.tar.gz", hash = "sha256:92501cdf9cc66ebd3e612f1b4f0c0765dfa42f0fa38ffb319b6bd84dd675d705"}, + +[[package]] +name = "importlib-metadata" +version = "6.7.0" +description = "Read metadata from Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "importlib_metadata-6.7.0-py3-none-any.whl", hash = "sha256:cb52082e659e97afc5dac71e79de97d8681de3aa07ff18578330904a9d18e5b5"}, + {file = "importlib_metadata-6.7.0.tar.gz", hash = "sha256:1aaf550d4f73e5d6783e7acb77aec43d49da8017410afae93822cc9cca98c4d4"}, ] -importlib-resources = [ + +[package.dependencies] +typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] + +[[package]] +name = "importlib-resources" +version = "5.12.0" +description = "Read resources from Python packages" +optional = false +python-versions = ">=3.7" +files = [ {file = "importlib_resources-5.12.0-py3-none-any.whl", hash = "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"}, {file = "importlib_resources-5.12.0.tar.gz", hash = "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6"}, ] -iniconfig = [ + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] -isort = [ + +[[package]] +name = "isort" +version = "5.11.5" +description = "A Python utility / library to sort Python imports." +optional = false +python-versions = ">=3.7.0" +files = [ {file = "isort-5.11.5-py3-none-any.whl", hash = "sha256:ba1d72fb2595a01c7895a5128f9585a5cc4b6d395f1c8d514989b9a7eb2a8746"}, {file = "isort-5.11.5.tar.gz", hash = "sha256:6be1f76a507cb2ecf16c7cf14a37e41609ca082330be4e3436a18ef74add55db"}, ] -lazy-object-proxy = [ + +[package.extras] +colors = ["colorama (>=0.4.3,<0.5.0)"] +pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"] +plugins = ["setuptools"] +requirements-deprecated-finder = ["pip-api", "pipreqs"] + +[[package]] +name = "lazy-object-proxy" +version = "1.9.0" +description = "A fast and thorough lazy object proxy." +optional = false +python-versions = ">=3.7" +files = [ {file = "lazy-object-proxy-1.9.0.tar.gz", hash = "sha256:659fb5809fa4629b8a1ac5106f669cfc7bef26fbb389dda53b3e010d1ac4ebae"}, {file = "lazy_object_proxy-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b40387277b0ed2d0602b8293b94d7257e17d1479e257b4de114ea11a8cb7f2d7"}, {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8c6cfb338b133fbdbc5cfaa10fe3c6aeea827db80c978dbd13bc9dd8526b7d4"}, @@ -848,7 +429,14 @@ lazy-object-proxy = [ {file = "lazy_object_proxy-1.9.0-cp39-cp39-win32.whl", hash = "sha256:9090d8e53235aa280fc9239a86ae3ea8ac58eff66a705fa6aa2ec4968b95c821"}, {file = "lazy_object_proxy-1.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:db1c1722726f47e10e0b5fdbf15ac3b8adb58c091d12b3ab713965795036985f"}, ] -lz4 = [ + +[[package]] +name = "lz4" +version = "4.3.2" +description = "LZ4 Bindings for Python" +optional = false +python-versions = ">=3.7" +files = [ {file = "lz4-4.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1c4c100d99eed7c08d4e8852dd11e7d1ec47a3340f49e3a96f8dfbba17ffb300"}, {file = "lz4-4.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:edd8987d8415b5dad25e797043936d91535017237f72fa456601be1479386c92"}, {file = "lz4-4.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7c50542b4ddceb74ab4f8b3435327a0861f06257ca501d59067a6a482535a77"}, @@ -885,67 +473,109 @@ lz4 = [ {file = "lz4-4.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:4caedeb19e3ede6c7a178968b800f910db6503cb4cb1e9cc9221157572139b49"}, {file = "lz4-4.3.2.tar.gz", hash = "sha256:e1431d84a9cfb23e6773e72078ce8e65cad6745816d4cbf9ae67da5ea419acda"}, ] -mako = [ + +[package.extras] +docs = ["sphinx (>=1.6.0)", "sphinx-bootstrap-theme"] +flake8 = ["flake8"] +tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] + +[[package]] +name = "mako" +version = "1.2.4" +description = "A super-fast templating language that borrows the best ideas from the existing templating languages." +optional = false +python-versions = ">=3.7" +files = [ {file = "Mako-1.2.4-py3-none-any.whl", hash = "sha256:c97c79c018b9165ac9922ae4f32da095ffd3c4e6872b45eded42926deea46818"}, {file = "Mako-1.2.4.tar.gz", hash = "sha256:d60a3903dc3bb01a18ad6a89cdbe2e4eadc69c0bc8ef1e3773ba53d44c3f7a34"}, ] -markupsafe = [ - {file = "MarkupSafe-2.1.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:665a36ae6f8f20a4676b53224e33d456a6f5a72657d9c83c2aa00765072f31f7"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:340bea174e9761308703ae988e982005aedf427de816d1afe98147668cc03036"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22152d00bf4a9c7c83960521fc558f55a1adbc0631fbb00a9471e097b19d72e1"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28057e985dace2f478e042eaa15606c7efccb700797660629da387eb289b9323"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca244fa73f50a800cf8c3ebf7fd93149ec37f5cb9596aa8873ae2c1d23498601"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d9d971ec1e79906046aa3ca266de79eac42f1dbf3612a05dc9368125952bd1a1"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7e007132af78ea9df29495dbf7b5824cb71648d7133cf7848a2a5dd00d36f9ff"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7313ce6a199651c4ed9d7e4cfb4aa56fe923b1adf9af3b420ee14e6d9a73df65"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-win32.whl", hash = "sha256:c4a549890a45f57f1ebf99c067a4ad0cb423a05544accaf2b065246827ed9603"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:835fb5e38fd89328e9c81067fd642b3593c33e1e17e2fdbf77f5676abb14a156"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2ec4f2d48ae59bbb9d1f9d7efb9236ab81429a764dedca114f5fdabbc3788013"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:608e7073dfa9e38a85d38474c082d4281f4ce276ac0010224eaba11e929dd53a"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:65608c35bfb8a76763f37036547f7adfd09270fbdbf96608be2bead319728fcd"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2bfb563d0211ce16b63c7cb9395d2c682a23187f54c3d79bfec33e6705473c6"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da25303d91526aac3672ee6d49a2f3db2d9502a4a60b55519feb1a4c7714e07d"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9cad97ab29dfc3f0249b483412c85c8ef4766d96cdf9dcf5a1e3caa3f3661cf1"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:085fd3201e7b12809f9e6e9bc1e5c96a368c8523fad5afb02afe3c051ae4afcc"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1bea30e9bf331f3fef67e0a3877b2288593c98a21ccb2cf29b74c581a4eb3af0"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-win32.whl", hash = "sha256:7df70907e00c970c60b9ef2938d894a9381f38e6b9db73c5be35e59d92e06625"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:e55e40ff0cc8cc5c07996915ad367fa47da6b3fc091fdadca7f5403239c5fec3"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a6e40afa7f45939ca356f348c8e23048e02cb109ced1eb8420961b2f40fb373a"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf877ab4ed6e302ec1d04952ca358b381a882fbd9d1b07cccbfd61783561f98a"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63ba06c9941e46fa389d389644e2d8225e0e3e5ebcc4ff1ea8506dce646f8c8a"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f1cd098434e83e656abf198f103a8207a8187c0fc110306691a2e94a78d0abb2"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:55f44b440d491028addb3b88f72207d71eeebfb7b5dbf0643f7c023ae1fba619"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a6f2fcca746e8d5910e18782f976489939d54a91f9411c32051b4aab2bd7c513"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0b462104ba25f1ac006fdab8b6a01ebbfbce9ed37fd37fd4acd70c67c973e460"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-win32.whl", hash = "sha256:7668b52e102d0ed87cb082380a7e2e1e78737ddecdde129acadb0eccc5423859"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6d6607f98fcf17e534162f0709aaad3ab7a96032723d8ac8750ffe17ae5a0666"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a806db027852538d2ad7555b203300173dd1b77ba116de92da9afbc3a3be3eed"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a4abaec6ca3ad8660690236d11bfe28dfd707778e2442b45addd2f086d6ef094"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f03a532d7dee1bed20bc4884194a16160a2de9ffc6354b3878ec9682bb623c54"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cf06cdc1dda95223e9d2d3c58d3b178aa5dacb35ee7e3bbac10e4e1faacb419"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22731d79ed2eb25059ae3df1dfc9cb1546691cc41f4e3130fe6bfbc3ecbbecfa"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f8ffb705ffcf5ddd0e80b65ddf7bed7ee4f5a441ea7d3419e861a12eaf41af58"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8db032bf0ce9022a8e41a22598eefc802314e81b879ae093f36ce9ddf39ab1ba"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2298c859cfc5463f1b64bd55cb3e602528db6fa0f3cfd568d3605c50678f8f03"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-win32.whl", hash = "sha256:50c42830a633fa0cf9e7d27664637532791bfc31c731a87b202d2d8ac40c3ea2"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:bb06feb762bade6bf3c8b844462274db0c76acc95c52abe8dbed28ae3d44a147"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99625a92da8229df6d44335e6fcc558a5037dd0a760e11d84be2260e6f37002f"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8bca7e26c1dd751236cfb0c6c72d4ad61d986e9a41bbf76cb445f69488b2a2bd"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40627dcf047dadb22cd25ea7ecfe9cbf3bbbad0482ee5920b582f3809c97654f"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40dfd3fefbef579ee058f139733ac336312663c6706d1163b82b3003fb1925c4"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:090376d812fb6ac5f171e5938e82e7f2d7adc2b629101cec0db8b267815c85e2"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2e7821bffe00aa6bd07a23913b7f4e01328c3d5cc0b40b36c0bd81d362faeb65"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c0a33bc9f02c2b17c3ea382f91b4db0e6cde90b63b296422a939886a7a80de1c"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b8526c6d437855442cdd3d87eede9c425c4445ea011ca38d937db299382e6fa3"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-win32.whl", hash = "sha256:137678c63c977754abe9086a3ec011e8fd985ab90631145dfb9294ad09c102a7"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:0576fe974b40a400449768941d5d0858cc624e3249dfd1e0c33674e5c7ca7aed"}, - {file = "MarkupSafe-2.1.2.tar.gz", hash = "sha256:abcabc8c2b26036d62d4c746381a6f7cf60aafcc653198ad678306986b09450d"}, + +[package.dependencies] +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} +MarkupSafe = ">=0.9.2" + +[package.extras] +babel = ["Babel"] +lingua = ["lingua"] +testing = ["pytest"] + +[[package]] +name = "markupsafe" +version = "2.1.3" +description = "Safely add untrusted strings to HTML/XML markup." +optional = false +python-versions = ">=3.7" +files = [ + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, + {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, ] -mccabe = [ + +[[package]] +name = "mccabe" +version = "0.7.0" +description = "McCabe checker, plugin for flake8" +optional = false +python-versions = ">=3.6" +files = [ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] -mypy = [ + +[[package]] +name = "mypy" +version = "0.950" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.6" +files = [ {file = "mypy-0.950-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b"}, {file = "mypy-0.950-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0"}, {file = "mypy-0.950-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22"}, @@ -970,11 +600,36 @@ mypy = [ {file = "mypy-0.950-py3-none-any.whl", hash = "sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb"}, {file = "mypy-0.950.tar.gz", hash = "sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de"}, ] -mypy-extensions = [ + +[package.dependencies] +mypy-extensions = ">=0.4.3" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""} +typing-extensions = ">=3.10" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +python2 = ["typed-ast (>=1.4.0,<2)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] -numpy = [ + +[[package]] +name = "numpy" +version = "1.21.6" +description = "NumPy is the fundamental package for array computing with Python." +optional = false +python-versions = ">=3.7,<3.11" +files = [ {file = "numpy-1.21.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8737609c3bbdd48e380d463134a35ffad3b22dc56295eff6f79fd85bd0eeeb25"}, {file = "numpy-1.21.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fdffbfb6832cd0b300995a2b08b8f6fa9f6e856d562800fea9182316d99c4e8e"}, {file = "numpy-1.21.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3820724272f9913b597ccd13a467cc492a0da6b05df26ea09e78b171a0bb9da6"}, @@ -1006,48 +661,90 @@ numpy = [ {file = "numpy-1.21.6-cp39-cp39-win_amd64.whl", hash = "sha256:e31f0bb5928b793169b87e3d1e070f2342b22d5245c755e2b81caa29756246c3"}, {file = "numpy-1.21.6-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dd1c8f6bd65d07d3810b90d02eba7997e32abbdf1277a481d698969e921a3be0"}, {file = "numpy-1.21.6.zip", hash = "sha256:ecb55251139706669fdec2ff073c98ef8e9a84473e51e716211b41aa0f18e656"}, - {file = "numpy-1.24.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3c1104d3c036fb81ab923f507536daedc718d0ad5a8707c6061cdfd6d184e570"}, - {file = "numpy-1.24.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:202de8f38fc4a45a3eea4b63e2f376e5f2dc64ef0fa692838e31a808520efaf7"}, - {file = "numpy-1.24.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8535303847b89aa6b0f00aa1dc62867b5a32923e4d1681a35b5eef2d9591a463"}, - {file = "numpy-1.24.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d926b52ba1367f9acb76b0df6ed21f0b16a1ad87c6720a1121674e5cf63e2b6"}, - {file = "numpy-1.24.3-cp310-cp310-win32.whl", hash = "sha256:f21c442fdd2805e91799fbe044a7b999b8571bb0ab0f7850d0cb9641a687092b"}, - {file = "numpy-1.24.3-cp310-cp310-win_amd64.whl", hash = "sha256:ab5f23af8c16022663a652d3b25dcdc272ac3f83c3af4c02eb8b824e6b3ab9d7"}, - {file = "numpy-1.24.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9a7721ec204d3a237225db3e194c25268faf92e19338a35f3a224469cb6039a3"}, - {file = "numpy-1.24.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d6cc757de514c00b24ae8cf5c876af2a7c3df189028d68c0cb4eaa9cd5afc2bf"}, - {file = "numpy-1.24.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76e3f4e85fc5d4fd311f6e9b794d0c00e7002ec122be271f2019d63376f1d385"}, - {file = "numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1d3c026f57ceaad42f8231305d4653d5f05dc6332a730ae5c0bea3513de0950"}, - {file = "numpy-1.24.3-cp311-cp311-win32.whl", hash = "sha256:c91c4afd8abc3908e00a44b2672718905b8611503f7ff87390cc0ac3423fb096"}, - {file = "numpy-1.24.3-cp311-cp311-win_amd64.whl", hash = "sha256:5342cf6aad47943286afa6f1609cad9b4266a05e7f2ec408e2cf7aea7ff69d80"}, - {file = "numpy-1.24.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7776ea65423ca6a15255ba1872d82d207bd1e09f6d0894ee4a64678dd2204078"}, - {file = "numpy-1.24.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ae8d0be48d1b6ed82588934aaaa179875e7dc4f3d84da18d7eae6eb3f06c242c"}, - {file = "numpy-1.24.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecde0f8adef7dfdec993fd54b0f78183051b6580f606111a6d789cd14c61ea0c"}, - {file = "numpy-1.24.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4749e053a29364d3452c034827102ee100986903263e89884922ef01a0a6fd2f"}, - {file = "numpy-1.24.3-cp38-cp38-win32.whl", hash = "sha256:d933fabd8f6a319e8530d0de4fcc2e6a61917e0b0c271fded460032db42a0fe4"}, - {file = "numpy-1.24.3-cp38-cp38-win_amd64.whl", hash = "sha256:56e48aec79ae238f6e4395886b5eaed058abb7231fb3361ddd7bfdf4eed54289"}, - {file = "numpy-1.24.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4719d5aefb5189f50887773699eaf94e7d1e02bf36c1a9d353d9f46703758ca4"}, - {file = "numpy-1.24.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0ec87a7084caa559c36e0a2309e4ecb1baa03b687201d0a847c8b0ed476a7187"}, - {file = "numpy-1.24.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea8282b9bcfe2b5e7d491d0bf7f3e2da29700cec05b49e64d6246923329f2b02"}, - {file = "numpy-1.24.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:210461d87fb02a84ef243cac5e814aad2b7f4be953b32cb53327bb49fd77fbb4"}, - {file = "numpy-1.24.3-cp39-cp39-win32.whl", hash = "sha256:784c6da1a07818491b0ffd63c6bbe5a33deaa0e25a20e1b3ea20cf0e43f8046c"}, - {file = "numpy-1.24.3-cp39-cp39-win_amd64.whl", hash = "sha256:d5036197ecae68d7f491fcdb4df90082b0d4960ca6599ba2659957aafced7c17"}, - {file = "numpy-1.24.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:352ee00c7f8387b44d19f4cada524586f07379c0d49270f87233983bc5087ca0"}, - {file = "numpy-1.24.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7d6acc2e7524c9955e5c903160aa4ea083736fde7e91276b0e5d98e6332812"}, - {file = "numpy-1.24.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:35400e6a8d102fd07c71ed7dcadd9eb62ee9a6e84ec159bd48c28235bbb0f8e4"}, - {file = "numpy-1.24.3.tar.gz", hash = "sha256:ab344f1bf21f140adab8e47fdbc7c35a477dc01408791f8ba00d018dd0bc5155"}, ] -oauthlib = [ + +[[package]] +name = "numpy" +version = "1.25.0" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numpy-1.25.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8aa130c3042052d656751df5e81f6d61edff3e289b5994edcf77f54118a8d9f4"}, + {file = "numpy-1.25.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e3f2b96e3b63c978bc29daaa3700c028fe3f049ea3031b58aa33fe2a5809d24"}, + {file = "numpy-1.25.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6b267f349a99d3908b56645eebf340cb58f01bd1e773b4eea1a905b3f0e4208"}, + {file = "numpy-1.25.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aedd08f15d3045a4e9c648f1e04daca2ab1044256959f1f95aafeeb3d794c16"}, + {file = "numpy-1.25.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6d183b5c58513f74225c376643234c369468e02947b47942eacbb23c1671f25d"}, + {file = "numpy-1.25.0-cp310-cp310-win32.whl", hash = "sha256:d76a84998c51b8b68b40448ddd02bd1081bb33abcdc28beee6cd284fe11036c6"}, + {file = "numpy-1.25.0-cp310-cp310-win_amd64.whl", hash = "sha256:c0dc071017bc00abb7d7201bac06fa80333c6314477b3d10b52b58fa6a6e38f6"}, + {file = "numpy-1.25.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c69fe5f05eea336b7a740e114dec995e2f927003c30702d896892403df6dbf0"}, + {file = "numpy-1.25.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c7211d7920b97aeca7b3773a6783492b5b93baba39e7c36054f6e749fc7490c"}, + {file = "numpy-1.25.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecc68f11404930e9c7ecfc937aa423e1e50158317bf67ca91736a9864eae0232"}, + {file = "numpy-1.25.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e559c6afbca484072a98a51b6fa466aae785cfe89b69e8b856c3191bc8872a82"}, + {file = "numpy-1.25.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6c284907e37f5e04d2412950960894b143a648dea3f79290757eb878b91acbd1"}, + {file = "numpy-1.25.0-cp311-cp311-win32.whl", hash = "sha256:95367ccd88c07af21b379be1725b5322362bb83679d36691f124a16357390153"}, + {file = "numpy-1.25.0-cp311-cp311-win_amd64.whl", hash = "sha256:b76aa836a952059d70a2788a2d98cb2a533ccd46222558b6970348939e55fc24"}, + {file = "numpy-1.25.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b792164e539d99d93e4e5e09ae10f8cbe5466de7d759fc155e075237e0c274e4"}, + {file = "numpy-1.25.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7cd981ccc0afe49b9883f14761bb57c964df71124dcd155b0cba2b591f0d64b9"}, + {file = "numpy-1.25.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5aa48bebfb41f93043a796128854b84407d4df730d3fb6e5dc36402f5cd594c0"}, + {file = "numpy-1.25.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5177310ac2e63d6603f659fadc1e7bab33dd5a8db4e0596df34214eeab0fee3b"}, + {file = "numpy-1.25.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0ac6edfb35d2a99aaf102b509c8e9319c499ebd4978df4971b94419a116d0790"}, + {file = "numpy-1.25.0-cp39-cp39-win32.whl", hash = "sha256:7412125b4f18aeddca2ecd7219ea2d2708f697943e6f624be41aa5f8a9852cc4"}, + {file = "numpy-1.25.0-cp39-cp39-win_amd64.whl", hash = "sha256:26815c6c8498dc49d81faa76d61078c4f9f0859ce7817919021b9eba72b425e3"}, + {file = "numpy-1.25.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5b1b90860bf7d8a8c313b372d4f27343a54f415b20fb69dd601b7efe1029c91e"}, + {file = "numpy-1.25.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85cdae87d8c136fd4da4dad1e48064d700f63e923d5af6c8c782ac0df8044542"}, + {file = "numpy-1.25.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cc3fda2b36482891db1060f00f881c77f9423eead4c3579629940a3e12095fe8"}, + {file = "numpy-1.25.0.tar.gz", hash = "sha256:f1accae9a28dc3cda46a91de86acf69de0d1b5f4edd44a9b0c3ceb8036dfff19"}, +] + +[[package]] +name = "oauthlib" +version = "3.2.2" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +optional = false +python-versions = ">=3.6" +files = [ {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, ] -openpyxl = [ + +[package.extras] +rsa = ["cryptography (>=3.0.0)"] +signals = ["blinker (>=1.4.0)"] +signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] + +[[package]] +name = "openpyxl" +version = "3.1.2" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +optional = false +python-versions = ">=3.6" +files = [ {file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"}, {file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"}, ] -packaging = [ + +[package.dependencies] +et-xmlfile = "*" + +[[package]] +name = "packaging" +version = "23.1" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, ] -pandas = [ + +[[package]] +name = "pandas" +version = "1.3.5" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.7.1" +files = [ {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:62d5b5ce965bae78f12c1c0df0d387899dd4211ec0bdc52822373f13a3a022b9"}, {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:adfeb11be2d54f275142c8ba9bf67acee771b7186a5745249c7d5a06c670136b"}, {file = "pandas-1.3.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a8c055d58873ad81cae290d974d13dd479b82cbb975c3e1fa2cf1920715296"}, @@ -1074,74 +771,260 @@ pandas = [ {file = "pandas-1.3.5-cp39-cp39-win_amd64.whl", hash = "sha256:32e1a26d5ade11b547721a72f9bfc4bd113396947606e00d5b4a5b79b3dcb006"}, {file = "pandas-1.3.5.tar.gz", hash = "sha256:1e4285f5de1012de20ca46b188ccf33521bff61ba5c5ebd78b4fb28e5416a9f1"}, ] -pathspec = [ + +[package.dependencies] +numpy = [ + {version = ">=1.17.3", markers = "(platform_machine != \"aarch64\" and platform_machine != \"arm64\") and python_version < \"3.10\""}, + {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""}, + {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, +] +python-dateutil = ">=2.7.3" +pytz = ">=2017.3" + +[package.extras] +test = ["hypothesis (>=3.58)", "pytest (>=6.0)", "pytest-xdist"] + +[[package]] +name = "pathspec" +version = "0.11.1" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.7" +files = [ {file = "pathspec-0.11.1-py3-none-any.whl", hash = "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293"}, {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"}, ] -platformdirs = [ - {file = "platformdirs-3.5.0-py3-none-any.whl", hash = "sha256:47692bc24c1958e8b0f13dd727307cff1db103fca36399f457da8e05f222fdc4"}, - {file = "platformdirs-3.5.0.tar.gz", hash = "sha256:7954a68d0ba23558d753f73437c55f89027cf8f5108c19844d4b82e5af396335"}, + +[[package]] +name = "platformdirs" +version = "3.7.0" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = ">=3.7" +files = [ + {file = "platformdirs-3.7.0-py3-none-any.whl", hash = "sha256:cfd065ba43133ff103ab3bd10aecb095c2a0035fcd1f07217c9376900d94ba07"}, + {file = "platformdirs-3.7.0.tar.gz", hash = "sha256:87fbf6473e87c078d536980ba970a472422e94f17b752cfad17024c18876d481"}, ] -pluggy = [ - {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, - {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, + +[package.dependencies] +typing-extensions = {version = ">=4.6.3", markers = "python_version < \"3.8\""} + +[package.extras] +docs = ["furo (>=2023.5.20)", "proselint (>=0.13)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)"] + +[[package]] +name = "pluggy" +version = "1.2.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pluggy-1.2.0-py3-none-any.whl", hash = "sha256:c2fd55a7d7a3863cba1a013e4e2414658b1d07b6bc57b3919e0c63c9abb99849"}, + {file = "pluggy-1.2.0.tar.gz", hash = "sha256:d12f0c4b579b15f5e054301bb226ee85eeeba08ffec228092f8defbaa3a4c4b3"}, ] -pyarrow = [ - {file = "pyarrow-12.0.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:3b97649c8a9a09e1d8dc76513054f1331bd9ece78ee39365e6bf6bc7503c1e94"}, - {file = "pyarrow-12.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bc4ea634dacb03936f50fcf59574a8e727f90c17c24527e488d8ceb52ae284de"}, - {file = "pyarrow-12.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d568acfca3faa565d663e53ee34173be8e23a95f78f2abfdad198010ec8f745"}, - {file = "pyarrow-12.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b50bb9a82dca38a002d7cbd802a16b1af0f8c50ed2ec94a319f5f2afc047ee9"}, - {file = "pyarrow-12.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:3d1733b1ea086b3c101427d0e57e2be3eb964686e83c2363862a887bb5c41fa8"}, - {file = "pyarrow-12.0.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:a7cd32fe77f967fe08228bc100433273020e58dd6caced12627bcc0a7675a513"}, - {file = "pyarrow-12.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:92fb031e6777847f5c9b01eaa5aa0c9033e853ee80117dce895f116d8b0c3ca3"}, - {file = "pyarrow-12.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:280289ebfd4ac3570f6b776515baa01e4dcbf17122c401e4b7170a27c4be63fd"}, - {file = "pyarrow-12.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:272f147d4f8387bec95f17bb58dcfc7bc7278bb93e01cb7b08a0e93a8921e18e"}, - {file = "pyarrow-12.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:0846ace49998825eda4722f8d7f83fa05601c832549c9087ea49d6d5397d8cec"}, - {file = "pyarrow-12.0.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:993287136369aca60005ee7d64130f9466489c4f7425f5c284315b0a5401ccd9"}, - {file = "pyarrow-12.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a7b6a765ee4f88efd7d8348d9a1f804487d60799d0428b6ddf3344eaef37282"}, - {file = "pyarrow-12.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1c4fce253d5bdc8d62f11cfa3da5b0b34b562c04ce84abb8bd7447e63c2b327"}, - {file = "pyarrow-12.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e6be4d85707fc8e7a221c8ab86a40449ce62559ce25c94321df7c8500245888f"}, - {file = "pyarrow-12.0.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:ea830d9f66bfb82d30b5794642f83dd0e4a718846462d22328981e9eb149cba8"}, - {file = "pyarrow-12.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7b5b9f60d9ef756db59bec8d90e4576b7df57861e6a3d6a8bf99538f68ca15b3"}, - {file = "pyarrow-12.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b99e559d27db36ad3a33868a475f03e3129430fc065accc839ef4daa12c6dab6"}, - {file = "pyarrow-12.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b0810864a593b89877120972d1f7af1d1c9389876dbed92b962ed81492d3ffc"}, - {file = "pyarrow-12.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:23a77d97f4d101ddfe81b9c2ee03a177f0e590a7e68af15eafa06e8f3cf05976"}, - {file = "pyarrow-12.0.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:2cc63e746221cddb9001f7281dee95fd658085dd5b717b076950e1ccc607059c"}, - {file = "pyarrow-12.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d8c26912607e26c2991826bbaf3cf2b9c8c3e17566598c193b492f058b40d3a4"}, - {file = "pyarrow-12.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d8b90efc290e99a81d06015f3a46601c259ecc81ffb6d8ce288c91bd1b868c9"}, - {file = "pyarrow-12.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2466be046b81863be24db370dffd30a2e7894b4f9823fb60ef0a733c31ac6256"}, - {file = "pyarrow-12.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:0e36425b1c1cbf5447718b3f1751bf86c58f2b3ad299f996cd9b1aa040967656"}, - {file = "pyarrow-12.0.0.tar.gz", hash = "sha256:19c812d303610ab5d664b7b1de4051ae23565f9f94d04cbea9e50569746ae1ee"}, + +[package.dependencies] +importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pyarrow" +version = "12.0.1" +description = "Python library for Apache Arrow" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pyarrow-12.0.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:6d288029a94a9bb5407ceebdd7110ba398a00412c5b0155ee9813a40d246c5df"}, + {file = "pyarrow-12.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:345e1828efdbd9aa4d4de7d5676778aba384a2c3add896d995b23d368e60e5af"}, + {file = "pyarrow-12.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d6009fdf8986332b2169314da482baed47ac053311c8934ac6651e614deacd6"}, + {file = "pyarrow-12.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d3c4cbbf81e6dd23fe921bc91dc4619ea3b79bc58ef10bce0f49bdafb103daf"}, + {file = "pyarrow-12.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:cdacf515ec276709ac8042c7d9bd5be83b4f5f39c6c037a17a60d7ebfd92c890"}, + {file = "pyarrow-12.0.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:749be7fd2ff260683f9cc739cb862fb11be376de965a2a8ccbf2693b098db6c7"}, + {file = "pyarrow-12.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6895b5fb74289d055c43db3af0de6e16b07586c45763cb5e558d38b86a91e3a7"}, + {file = "pyarrow-12.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1887bdae17ec3b4c046fcf19951e71b6a619f39fa674f9881216173566c8f718"}, + {file = "pyarrow-12.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2c9cb8eeabbadf5fcfc3d1ddea616c7ce893db2ce4dcef0ac13b099ad7ca082"}, + {file = "pyarrow-12.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:ce4aebdf412bd0eeb800d8e47db854f9f9f7e2f5a0220440acf219ddfddd4f63"}, + {file = "pyarrow-12.0.1-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:e0d8730c7f6e893f6db5d5b86eda42c0a130842d101992b581e2138e4d5663d3"}, + {file = "pyarrow-12.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:43364daec02f69fec89d2315f7fbfbeec956e0d991cbbef471681bd77875c40f"}, + {file = "pyarrow-12.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:051f9f5ccf585f12d7de836e50965b3c235542cc896959320d9776ab93f3b33d"}, + {file = "pyarrow-12.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:be2757e9275875d2a9c6e6052ac7957fbbfc7bc7370e4a036a9b893e96fedaba"}, + {file = "pyarrow-12.0.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:cf812306d66f40f69e684300f7af5111c11f6e0d89d6b733e05a3de44961529d"}, + {file = "pyarrow-12.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:459a1c0ed2d68671188b2118c63bac91eaef6fc150c77ddd8a583e3c795737bf"}, + {file = "pyarrow-12.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85e705e33eaf666bbe508a16fd5ba27ca061e177916b7a317ba5a51bee43384c"}, + {file = "pyarrow-12.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9120c3eb2b1f6f516a3b7a9714ed860882d9ef98c4b17edcdc91d95b7528db60"}, + {file = "pyarrow-12.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:c780f4dc40460015d80fcd6a6140de80b615349ed68ef9adb653fe351778c9b3"}, + {file = "pyarrow-12.0.1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:a3c63124fc26bf5f95f508f5d04e1ece8cc23a8b0af2a1e6ab2b1ec3fdc91b24"}, + {file = "pyarrow-12.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b13329f79fa4472324f8d32dc1b1216616d09bd1e77cfb13104dec5463632c36"}, + {file = "pyarrow-12.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb656150d3d12ec1396f6dde542db1675a95c0cc8366d507347b0beed96e87ca"}, + {file = "pyarrow-12.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6251e38470da97a5b2e00de5c6a049149f7b2bd62f12fa5dbb9ac674119ba71a"}, + {file = "pyarrow-12.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:3de26da901216149ce086920547dfff5cd22818c9eab67ebc41e863a5883bac7"}, + {file = "pyarrow-12.0.1.tar.gz", hash = "sha256:cce317fc96e5b71107bf1f9f184d5e54e2bd14bbf3f9a3d62819961f0af86fec"}, ] -pylint = [ + +[package.dependencies] +numpy = ">=1.16.6" + +[[package]] +name = "pylint" +version = "2.13.9" +description = "python code static checker" +optional = false +python-versions = ">=3.6.2" +files = [ {file = "pylint-2.13.9-py3-none-any.whl", hash = "sha256:705c620d388035bdd9ff8b44c5bcdd235bfb49d276d488dd2c8ff1736aa42526"}, {file = "pylint-2.13.9.tar.gz", hash = "sha256:095567c96e19e6f57b5b907e67d265ff535e588fe26b12b5ebe1fc5645b2c731"}, ] -pytest = [ - {file = "pytest-7.3.1-py3-none-any.whl", hash = "sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362"}, - {file = "pytest-7.3.1.tar.gz", hash = "sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"}, + +[package.dependencies] +astroid = ">=2.11.5,<=2.12.0-dev0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +dill = ">=0.2" +isort = ">=4.2.5,<6" +mccabe = ">=0.6,<0.8" +platformdirs = ">=2.2.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""} + +[package.extras] +testutil = ["gitpython (>3)"] + +[[package]] +name = "pytest" +version = "7.3.2" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.3.2-py3-none-any.whl", hash = "sha256:cdcbd012c9312258922f8cd3f1b62a6580fdced17db6014896053d47cddf9295"}, + {file = "pytest-7.3.2.tar.gz", hash = "sha256:ee990a3cc55ba808b80795a79944756f315c67c12b56abd3ac993a7b8c17030b"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-dotenv" +version = "0.5.2" +description = "A py.test plugin that parses environment files before running tests" +optional = false +python-versions = "*" +files = [ + {file = "pytest-dotenv-0.5.2.tar.gz", hash = "sha256:2dc6c3ac6d8764c71c6d2804e902d0ff810fa19692e95fe138aefc9b1aa73732"}, + {file = "pytest_dotenv-0.5.2-py3-none-any.whl", hash = "sha256:40a2cece120a213898afaa5407673f6bd924b1fa7eafce6bda0e8abffe2f710f"}, ] -python-dateutil = [ + +[package.dependencies] +pytest = ">=5.0.0" +python-dotenv = ">=0.9.1" + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, ] -pytz = [ + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "python-dotenv" +version = "0.21.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.7" +files = [ + {file = "python-dotenv-0.21.1.tar.gz", hash = "sha256:1c93de8f636cde3ce377292818d0e440b6e45a82f215c3744979151fa8151c49"}, + {file = "python_dotenv-0.21.1-py3-none-any.whl", hash = "sha256:41e12e0318bebc859fcc4d97d4db8d20ad21721a6aa5047dd59f090391cb549a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + +[[package]] +name = "pytz" +version = "2023.3" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ {file = "pytz-2023.3-py2.py3-none-any.whl", hash = "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"}, {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, ] -requests = [ - {file = "requests-2.30.0-py3-none-any.whl", hash = "sha256:10e94cc4f3121ee6da529d358cdaeaff2f1c409cd377dbc72b825852f2f7e294"}, - {file = "requests-2.30.0.tar.gz", hash = "sha256:239d7d4458afcb28a692cdd298d87542235f4ca8d36d03a15bfc128a6559a2f4"}, + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, ] -setuptools = [ - {file = "setuptools-67.7.2-py3-none-any.whl", hash = "sha256:23aaf86b85ca52ceb801d32703f12d77517b2556af839621c641fca11287952b"}, - {file = "setuptools-67.7.2.tar.gz", hash = "sha256:f104fa03692a2602fa0fec6c6a9e63b6c8a968de13e17c026957dd1f53d80990"}, + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "setuptools" +version = "68.0.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "setuptools-68.0.0-py3-none-any.whl", hash = "sha256:11e52c67415a381d10d6b462ced9cfb97066179f0e871399e006c4ab101fc85f"}, + {file = "setuptools-68.0.0.tar.gz", hash = "sha256:baf1fdb41c6da4cd2eae722e135500da913332ab3f2f5c7d33af9b492acb5235"}, ] -six = [ + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] -sqlalchemy = [ + +[[package]] +name = "sqlalchemy" +version = "1.4.48" +description = "Database Abstraction Library" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ {file = "SQLAlchemy-1.4.48-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:4bac3aa3c3d8bc7408097e6fe8bf983caa6e9491c5d2e2488cfcfd8106f13b6a"}, {file = "SQLAlchemy-1.4.48-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:dbcae0e528d755f4522cad5842f0942e54b578d79f21a692c44d91352ea6d64e"}, {file = "SQLAlchemy-1.4.48-cp27-cp27m-win32.whl", hash = "sha256:cbbe8b8bffb199b225d2fe3804421b7b43a0d49983f81dc654d0431d2f855543"}, @@ -1184,14 +1067,68 @@ sqlalchemy = [ {file = "SQLAlchemy-1.4.48-cp39-cp39-win_amd64.whl", hash = "sha256:7ad2b0f6520ed5038e795cc2852eb5c1f20fa6831d73301ced4aafbe3a10e1f6"}, {file = "SQLAlchemy-1.4.48.tar.gz", hash = "sha256:b47bc287096d989a0838ce96f7d8e966914a24da877ed41a7531d44b55cdb8df"}, ] -thrift = [ + +[package.dependencies] +greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\")"} +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} + +[package.extras] +aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"] +asyncio = ["greenlet (!=0.4.17)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4)", "greenlet (!=0.4.17)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2)"] +mssql = ["pyodbc"] +mssql-pymssql = ["pymssql"] +mssql-pyodbc = ["pyodbc"] +mypy = ["mypy (>=0.910)", "sqlalchemy2-stubs"] +mysql = ["mysqlclient (>=1.4.0)", "mysqlclient (>=1.4.0,<2)"] +mysql-connector = ["mysql-connector-python"] +oracle = ["cx-oracle (>=7)", "cx-oracle (>=7,<8)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] +postgresql-pg8000 = ["pg8000 (>=1.16.6,!=1.29.0)"] +postgresql-psycopg2binary = ["psycopg2-binary"] +postgresql-psycopg2cffi = ["psycopg2cffi"] +pymysql = ["pymysql", "pymysql (<1)"] +sqlcipher = ["sqlcipher3-binary"] + +[[package]] +name = "thrift" +version = "0.16.0" +description = "Python bindings for the Apache Thrift RPC system" +optional = false +python-versions = "*" +files = [ {file = "thrift-0.16.0.tar.gz", hash = "sha256:2b5b6488fcded21f9d312aa23c9ff6a0195d0f6ae26ddbd5ad9e3e25dfc14408"}, ] -tomli = [ + +[package.dependencies] +six = ">=1.7.2" + +[package.extras] +all = ["tornado (>=4.0)", "twisted"] +tornado = ["tornado (>=4.0)"] +twisted = ["twisted"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] -typed-ast = [ + +[[package]] +name = "typed-ast" +version = "1.5.4" +description = "a fork of Python 2 and 3 ast modules with type comment support" +optional = false +python-versions = ">=3.6" +files = [ {file = "typed_ast-1.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:669dd0c4167f6f2cd9f57041e03c3c2ebf9063d0757dc89f79ba1daa2bfca9d4"}, {file = "typed_ast-1.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:211260621ab1cd7324e0798d6be953d00b74e0428382991adfddb352252f1d62"}, {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:267e3f78697a6c00c689c03db4876dd1efdfea2f251a5ad6555e82a26847b4ac"}, @@ -1217,15 +1154,42 @@ typed-ast = [ {file = "typed_ast-1.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:0fdbcf2fef0ca421a3f5912555804296f0b0960f0418c440f5d6d3abb549f3e1"}, {file = "typed_ast-1.5.4.tar.gz", hash = "sha256:39e21ceb7388e4bb37f4c679d72707ed46c2fbf2a5609b8b8ebc4b067d977df2"}, ] -typing-extensions = [ - {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, - {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, + +[[package]] +name = "typing-extensions" +version = "4.6.3" +description = "Backported and Experimental Type Hints for Python 3.7+" +optional = false +python-versions = ">=3.7" +files = [ + {file = "typing_extensions-4.6.3-py3-none-any.whl", hash = "sha256:88a4153d8505aabbb4e13aacb7c486c2b4a33ca3b3f807914a9b4c844c471c26"}, + {file = "typing_extensions-4.6.3.tar.gz", hash = "sha256:d91d5919357fe7f681a9f2b5b4cb2a5f1ef0a1e9f59c4d8ff0d3491e05c0ffd5"}, ] -urllib3 = [ - {file = "urllib3-2.0.2-py3-none-any.whl", hash = "sha256:d055c2f9d38dc53c808f6fdc8eab7360b6fdbbde02340ed25cfbcd817c62469e"}, - {file = "urllib3-2.0.2.tar.gz", hash = "sha256:61717a1095d7e155cdb737ac7bb2f4324a858a1e2e6466f6d03ff630ca68d3cc"}, + +[[package]] +name = "urllib3" +version = "2.0.3" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.7" +files = [ + {file = "urllib3-2.0.3-py3-none-any.whl", hash = "sha256:48e7fafa40319d358848e1bc6809b208340fafe2096f1725d05d67443d0483d1"}, + {file = "urllib3-2.0.3.tar.gz", hash = "sha256:bee28b5e56addb8226c96f7f13ac28cb4c301dd5ea8a6ca179c0b9835e032825"}, ] -wrapt = [ + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "wrapt" +version = "1.15.0" +description = "Module for decorators, wrappers and monkey patching." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +files = [ {file = "wrapt-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1"}, {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29"}, {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5fc8e02f5984a55d2c653f5fea93531e9836abbd84342c1d1e17abc4a15084c2"}, @@ -1302,7 +1266,23 @@ wrapt = [ {file = "wrapt-1.15.0-py3-none-any.whl", hash = "sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640"}, {file = "wrapt-1.15.0.tar.gz", hash = "sha256:d06730c6aed78cee4126234cf2d071e01b44b915e725a6cb439a879ec9754a3a"}, ] -zipp = [ + +[[package]] +name = "zipp" +version = "3.15.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = false +python-versions = ">=3.7" +files = [ {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"}, {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"}, ] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[metadata] +lock-version = "2.0" +python-versions = "^3.7.1" +content-hash = "4951f349c21ce8306bcf045928c58afc6e1e63e825768aeed358380de2c46b9b" diff --git a/pyproject.toml b/pyproject.toml index 9d08a688..f522d1ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ pytest = "^7.1.2" mypy = "^0.950" pylint = ">=2.12.0" black = "^22.3.0" +pytest-dotenv = "^0.5.2" [tool.poetry.urls] "Homepage" = "https://github.com/databricks/databricks-sql-python" @@ -50,3 +51,12 @@ exclude = ['ttypes\.py$', 'TCLIService\.py$'] [tool.black] exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist|thrift_api)/' + +[tool.pytest.ini_options] +minversion = "6.0" +testpaths = [ + "tests" +] +env_files = [ + "test.env" +] diff --git a/test.env.example b/test.env.example new file mode 100644 index 00000000..5ce7eca5 --- /dev/null +++ b/test.env.example @@ -0,0 +1,7 @@ +# Authentication details for running e2e tests +host="" +http_path="" +access_token="" + +# Only required to run the PySQLStagingIngestionTestSuite +staging_ingestion_user="" \ No newline at end of file diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/e2e/common/staging_ingestion_tests.py b/tests/e2e/common/staging_ingestion_tests.py new file mode 100644 index 00000000..19d5ba03 --- /dev/null +++ b/tests/e2e/common/staging_ingestion_tests.py @@ -0,0 +1,288 @@ +import os +import tempfile + +import pytest +import databricks.sql as sql +from databricks.sql import Error + +@pytest.fixture(scope="module", autouse=True) +def check_staging_ingestion_user(): + """This fixture verifies that a staging ingestion user email address + is present in the environment and raises an exception if not. The fixture + only evaluates when the test _isn't skipped_. + """ + + staging_ingestion_user = os.getenv("staging_ingestion_user") + + if staging_ingestion_user is None: + raise ValueError( + "To run this test you must designate a `staging_ingestion_user` environment variable. This will be the user associated with the personal access token." + ) + +class PySQLStagingIngestionTestSuiteMixin: + """Simple namespace for ingestion tests. These should be run against DBR >12.x + + In addition to connection credentials (host, path, token) this suite requires an env var + named staging_ingestion_user""" + + staging_ingestion_user = os.getenv("staging_ingestion_user") + + + def test_staging_ingestion_life_cycle(self): + """PUT a file into the staging location + GET the file from the staging location + REMOVE the file from the staging location + Try to GET the file again expecting to raise an exception + """ + + # PUT should succeed + + fh, temp_path = tempfile.mkstemp() + + original_text = "hello world!".encode("utf-8") + + with open(fh, "wb") as fp: + fp.write(original_text) + + with self.connection(extra_params={"staging_allowed_local_path": temp_path}) as conn: + + cursor = conn.cursor() + query = f"PUT '{temp_path}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE" + cursor.execute(query) + + # GET should succeed + + new_fh, new_temp_path = tempfile.mkstemp() + + with self.connection(extra_params={"staging_allowed_local_path": new_temp_path}) as conn: + cursor = conn.cursor() + query = f"GET 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' TO '{new_temp_path}'" + cursor.execute(query) + + with open(new_fh, "rb") as fp: + fetched_text = fp.read() + + assert fetched_text == original_text + + # REMOVE should succeed + + remove_query = ( + f"REMOVE 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv'" + ) + + with self.connection(extra_params={"staging_allowed_local_path": "/"}) as conn: + cursor = conn.cursor() + cursor.execute(remove_query) + + # GET after REMOVE should fail + + with pytest.raises(Error): + cursor = conn.cursor() + query = f"GET 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' TO '{new_temp_path}'" + cursor.execute(query) + + os.remove(temp_path) + os.remove(new_temp_path) + + + def test_staging_ingestion_put_fails_without_staging_allowed_local_path(self): + """PUT operations are not supported unless the connection was built with + a parameter called staging_allowed_local_path + """ + + fh, temp_path = tempfile.mkstemp() + + original_text = "hello world!".encode("utf-8") + + with open(fh, "wb") as fp: + fp.write(original_text) + + with pytest.raises(Error): + with self.connection() as conn: + cursor = conn.cursor() + query = f"PUT '{temp_path}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE" + cursor.execute(query) + + def test_staging_ingestion_put_fails_if_localFile_not_in_staging_allowed_local_path(self): + + + fh, temp_path = tempfile.mkstemp() + + original_text = "hello world!".encode("utf-8") + + with open(fh, "wb") as fp: + fp.write(original_text) + + base_path, filename = os.path.split(temp_path) + + # Add junk to base_path + base_path = os.path.join(base_path, "temp") + + with pytest.raises(Error): + with self.connection(extra_params={"staging_allowed_local_path": base_path}) as conn: + cursor = conn.cursor() + query = f"PUT '{temp_path}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE" + cursor.execute(query) + + def test_staging_ingestion_put_fails_if_file_exists_and_overwrite_not_set(self): + """PUT a file into the staging location twice. First command should succeed. Second should fail. + """ + + fh, temp_path = tempfile.mkstemp() + + original_text = "hello world!".encode("utf-8") + + with open(fh, "wb") as fp: + fp.write(original_text) + + def perform_put(): + with self.connection(extra_params={"staging_allowed_local_path": temp_path}) as conn: + cursor = conn.cursor() + query = f"PUT '{temp_path}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/12/15/file1.csv'" + cursor.execute(query) + + def perform_remove(): + remove_query = ( + f"REMOVE 'stage://tmp/{self.staging_ingestion_user}/tmp/12/15/file1.csv'" + ) + + with self.connection(extra_params={"staging_allowed_local_path": "/"}) as conn: + cursor = conn.cursor() + cursor.execute(remove_query) + + + # Make sure file does not exist + perform_remove() + + # Put the file + perform_put() + + # Try to put it again + with pytest.raises(sql.exc.ServerOperationError, match="FILE_IN_STAGING_PATH_ALREADY_EXISTS"): + perform_put() + + # Clean up after ourselves + perform_remove() + + def test_staging_ingestion_fails_to_modify_another_staging_user(self): + """The server should only allow modification of the staging_ingestion_user's files + """ + + some_other_user = "mary.poppins@databricks.com" + + fh, temp_path = tempfile.mkstemp() + + original_text = "hello world!".encode("utf-8") + + with open(fh, "wb") as fp: + fp.write(original_text) + + def perform_put(): + with self.connection(extra_params={"staging_allowed_local_path": temp_path}) as conn: + cursor = conn.cursor() + query = f"PUT '{temp_path}' INTO 'stage://tmp/{some_other_user}/tmp/12/15/file1.csv' OVERWRITE" + cursor.execute(query) + + def perform_remove(): + remove_query = ( + f"REMOVE 'stage://tmp/{some_other_user}/tmp/12/15/file1.csv'" + ) + + with self.connection(extra_params={"staging_allowed_local_path": "/"}) as conn: + cursor = conn.cursor() + cursor.execute(remove_query) + + def perform_get(): + with self.connection(extra_params={"staging_allowed_local_path": temp_path}) as conn: + cursor = conn.cursor() + query = f"GET 'stage://tmp/{some_other_user}/tmp/11/15/file1.csv' TO '{temp_path}'" + cursor.execute(query) + + # PUT should fail with permissions error + with pytest.raises(sql.exc.ServerOperationError, match="PERMISSION_DENIED"): + perform_put() + + # REMOVE should fail with permissions error + with pytest.raises(sql.exc.ServerOperationError, match="PERMISSION_DENIED"): + perform_remove() + + # GET should fail with permissions error + with pytest.raises(sql.exc.ServerOperationError, match="PERMISSION_DENIED"): + perform_get() + + def test_staging_ingestion_put_fails_if_absolute_localFile_not_in_staging_allowed_local_path(self): + """ + This test confirms that staging_allowed_local_path and target_file are resolved into absolute paths. + """ + + # If these two paths are not resolved absolutely, they appear to share a common path of /var/www/html + # after resolution their common path is only /var/www which should raise an exception + # Because the common path must always be equal to staging_allowed_local_path + staging_allowed_local_path = "/var/www/html" + target_file = "/var/www/html/../html1/not_allowed.html" + + with pytest.raises(Error): + with self.connection(extra_params={"staging_allowed_local_path": staging_allowed_local_path}) as conn: + cursor = conn.cursor() + query = f"PUT '{target_file}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE" + cursor.execute(query) + + def test_staging_ingestion_empty_local_path_fails_to_parse_at_server(self): + staging_allowed_local_path = "/var/www/html" + target_file = "" + + with pytest.raises(Error, match="EMPTY_LOCAL_FILE_IN_STAGING_ACCESS_QUERY"): + with self.connection(extra_params={"staging_allowed_local_path": staging_allowed_local_path}) as conn: + cursor = conn.cursor() + query = f"PUT '{target_file}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE" + cursor.execute(query) + + def test_staging_ingestion_invalid_staging_path_fails_at_server(self): + staging_allowed_local_path = "/var/www/html" + target_file = "index.html" + + with pytest.raises(Error, match="INVALID_STAGING_PATH_IN_STAGING_ACCESS_QUERY"): + with self.connection(extra_params={"staging_allowed_local_path": staging_allowed_local_path}) as conn: + cursor = conn.cursor() + query = f"PUT '{target_file}' INTO 'stageRANDOMSTRINGOFCHARACTERS://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE" + cursor.execute(query) + + def test_staging_ingestion_supports_multiple_staging_allowed_local_path_values(self): + """staging_allowed_local_path may be either a path-like object or a list of path-like objects. + + This test confirms that two configured base paths: + 1 - doesn't raise an exception + 2 - allows uploads from both paths + 3 - doesn't allow uploads from a third path + """ + + def generate_file_and_path_and_queries(): + """ + 1. Makes a temp file with some contents. + 2. Write a query to PUT it into a staging location + 3. Write a query to REMOVE it from that location (for cleanup) + """ + fh, temp_path = tempfile.mkstemp() + with open(fh, "wb") as fp: + original_text = "hello world!".encode("utf-8") + fp.write(original_text) + put_query = f"PUT '{temp_path}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/{id(temp_path)}.csv' OVERWRITE" + remove_query = f"REMOVE 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/{id(temp_path)}.csv'" + return fh, temp_path, put_query, remove_query + + fh1, temp_path1, put_query1, remove_query1 = generate_file_and_path_and_queries() + fh2, temp_path2, put_query2, remove_query2 = generate_file_and_path_and_queries() + fh3, temp_path3, put_query3, remove_query3 = generate_file_and_path_and_queries() + + with self.connection(extra_params={"staging_allowed_local_path": [temp_path1, temp_path2]}) as conn: + cursor = conn.cursor() + + cursor.execute(put_query1) + cursor.execute(put_query2) + + with pytest.raises(Error, match="Local file operations are restricted to paths within the configured staging_allowed_local_path"): + cursor.execute(put_query3) + + # Then clean up the files we made + cursor.execute(remove_query1) + cursor.execute(remove_query2) \ No newline at end of file diff --git a/tests/e2e/driver_tests.py b/tests/e2e/test_driver.py similarity index 72% rename from tests/e2e/driver_tests.py rename to tests/e2e/test_driver.py index f8350475..37428116 100644 --- a/tests/e2e/driver_tests.py +++ b/tests/e2e/test_driver.py @@ -5,7 +5,6 @@ import logging import os import sys -import tempfile import threading import time from unittest import loader, skipIf, skipUnless, TestCase @@ -25,6 +24,7 @@ from tests.e2e.common.timestamp_tests import TimestampTestsMixin from tests.e2e.common.decimal_tests import DecimalTestsMixin from tests.e2e.common.retry_test_mixins import Client429ResponseMixin, Client503ResponseMixin +from tests.e2e.common.staging_ingestion_tests import PySQLStagingIngestionTestSuiteMixin log = logging.getLogger(__name__) @@ -38,6 +38,7 @@ decorated = skipUnless(pysql_supports_arrow(), 'Decimal tests need arrow support')(fn) setattr(DecimalTestsMixin, name, decorated) + get_args_from_env = True @@ -107,7 +108,7 @@ def get_some_rows(self, cursor, fetchmany_size): # Exclude Retry tests because they require specific setups, and LargeQueries too slow for core # tests class PySQLCoreTestSuite(SmokeTestMixin, CoreTestMixin, DecimalTestsMixin, TimestampTestsMixin, - PySQLTestCase): + PySQLTestCase, PySQLStagingIngestionTestSuiteMixin): validate_row_value_type = True validate_result = True @@ -648,278 +649,6 @@ def test_initial_namespace(self): cursor.execute("select current_database()") self.assertEqual(cursor.fetchone()[0], table_name) -class PySQLStagingIngestionTestSuite(PySQLTestCase): - """Simple namespace for ingestion tests. These should be run against DBR >12.x - - In addition to connection credentials (host, path, token) this suite requires an env var - named staging_ingestion_user""" - - staging_ingestion_user = os.getenv("staging_ingestion_user") - - if staging_ingestion_user is None: - raise ValueError( - "To run these tests you must designate a `staging_ingestion_user` environment variable. This will the user associated with the personal access token." - ) - - def test_staging_ingestion_life_cycle(self): - """PUT a file into the staging location - GET the file from the staging location - REMOVE the file from the staging location - Try to GET the file again expecting to raise an exception - """ - - # PUT should succeed - - fh, temp_path = tempfile.mkstemp() - - original_text = "hello world!".encode("utf-8") - - with open(fh, "wb") as fp: - fp.write(original_text) - - with self.connection(extra_params={"staging_allowed_local_path": temp_path}) as conn: - - cursor = conn.cursor() - query = f"PUT '{temp_path}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE" - cursor.execute(query) - - # GET should succeed - - new_fh, new_temp_path = tempfile.mkstemp() - - with self.connection(extra_params={"staging_allowed_local_path": new_temp_path}) as conn: - cursor = conn.cursor() - query = f"GET 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' TO '{new_temp_path}'" - cursor.execute(query) - - with open(new_fh, "rb") as fp: - fetched_text = fp.read() - - assert fetched_text == original_text - - # REMOVE should succeed - - remove_query = ( - f"REMOVE 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv'" - ) - - with self.connection(extra_params={"staging_allowed_local_path": "/"}) as conn: - cursor = conn.cursor() - cursor.execute(remove_query) - - # GET after REMOVE should fail - - with pytest.raises(Error): - cursor = conn.cursor() - query = f"GET 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' TO '{new_temp_path}'" - cursor.execute(query) - - os.remove(temp_path) - os.remove(new_temp_path) - - - def test_staging_ingestion_put_fails_without_staging_allowed_local_path(self): - """PUT operations are not supported unless the connection was built with - a parameter called staging_allowed_local_path - """ - - fh, temp_path = tempfile.mkstemp() - - original_text = "hello world!".encode("utf-8") - - with open(fh, "wb") as fp: - fp.write(original_text) - - with pytest.raises(Error): - with self.connection() as conn: - cursor = conn.cursor() - query = f"PUT '{temp_path}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE" - cursor.execute(query) - - def test_staging_ingestion_put_fails_if_localFile_not_in_staging_allowed_local_path(self): - - - fh, temp_path = tempfile.mkstemp() - - original_text = "hello world!".encode("utf-8") - - with open(fh, "wb") as fp: - fp.write(original_text) - - base_path, filename = os.path.split(temp_path) - - # Add junk to base_path - base_path = os.path.join(base_path, "temp") - - with pytest.raises(Error): - with self.connection(extra_params={"staging_allowed_local_path": base_path}) as conn: - cursor = conn.cursor() - query = f"PUT '{temp_path}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE" - cursor.execute(query) - - def test_staging_ingestion_put_fails_if_file_exists_and_overwrite_not_set(self): - """PUT a file into the staging location twice. First command should succeed. Second should fail. - """ - - fh, temp_path = tempfile.mkstemp() - - original_text = "hello world!".encode("utf-8") - - with open(fh, "wb") as fp: - fp.write(original_text) - - def perform_put(): - with self.connection(extra_params={"staging_allowed_local_path": temp_path}) as conn: - cursor = conn.cursor() - query = f"PUT '{temp_path}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/12/15/file1.csv'" - cursor.execute(query) - - def perform_remove(): - remove_query = ( - f"REMOVE 'stage://tmp/{self.staging_ingestion_user}/tmp/12/15/file1.csv'" - ) - - with self.connection(extra_params={"staging_allowed_local_path": "/"}) as conn: - cursor = conn.cursor() - cursor.execute(remove_query) - - - # Make sure file does not exist - perform_remove() - - # Put the file - perform_put() - - # Try to put it again - with pytest.raises(sql.exc.ServerOperationError, match="FILE_IN_STAGING_PATH_ALREADY_EXISTS"): - perform_put() - - # Clean up after ourselves - perform_remove() - - def test_staging_ingestion_fails_to_modify_another_staging_user(self): - """The server should only allow modification of the staging_ingestion_user's files - """ - - some_other_user = "mary.poppins@databricks.com" - - fh, temp_path = tempfile.mkstemp() - - original_text = "hello world!".encode("utf-8") - - with open(fh, "wb") as fp: - fp.write(original_text) - - def perform_put(): - with self.connection(extra_params={"staging_allowed_local_path": temp_path}) as conn: - cursor = conn.cursor() - query = f"PUT '{temp_path}' INTO 'stage://tmp/{some_other_user}/tmp/12/15/file1.csv' OVERWRITE" - cursor.execute(query) - - def perform_remove(): - remove_query = ( - f"REMOVE 'stage://tmp/{some_other_user}/tmp/12/15/file1.csv'" - ) - - with self.connection(extra_params={"staging_allowed_local_path": "/"}) as conn: - cursor = conn.cursor() - cursor.execute(remove_query) - - def perform_get(): - with self.connection(extra_params={"staging_allowed_local_path": temp_path}) as conn: - cursor = conn.cursor() - query = f"GET 'stage://tmp/{some_other_user}/tmp/11/15/file1.csv' TO '{temp_path}'" - cursor.execute(query) - - # PUT should fail with permissions error - with pytest.raises(sql.exc.ServerOperationError, match="PERMISSION_DENIED"): - perform_put() - - # REMOVE should fail with permissions error - with pytest.raises(sql.exc.ServerOperationError, match="PERMISSION_DENIED"): - perform_remove() - - # GET should fail with permissions error - with pytest.raises(sql.exc.ServerOperationError, match="PERMISSION_DENIED"): - perform_get() - - def test_staging_ingestion_put_fails_if_absolute_localFile_not_in_staging_allowed_local_path(self): - """ - This test confirms that staging_allowed_local_path and target_file are resolved into absolute paths. - """ - - # If these two paths are not resolved absolutely, they appear to share a common path of /var/www/html - # after resolution their common path is only /var/www which should raise an exception - # Because the common path must always be equal to staging_allowed_local_path - staging_allowed_local_path = "/var/www/html" - target_file = "/var/www/html/../html1/not_allowed.html" - - with pytest.raises(Error): - with self.connection(extra_params={"staging_allowed_local_path": staging_allowed_local_path}) as conn: - cursor = conn.cursor() - query = f"PUT '{target_file}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE" - cursor.execute(query) - - def test_staging_ingestion_empty_local_path_fails_to_parse_at_server(self): - staging_allowed_local_path = "/var/www/html" - target_file = "" - - with pytest.raises(Error, match="EMPTY_LOCAL_FILE_IN_STAGING_ACCESS_QUERY"): - with self.connection(extra_params={"staging_allowed_local_path": staging_allowed_local_path}) as conn: - cursor = conn.cursor() - query = f"PUT '{target_file}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE" - cursor.execute(query) - - def test_staging_ingestion_invalid_staging_path_fails_at_server(self): - staging_allowed_local_path = "/var/www/html" - target_file = "index.html" - - with pytest.raises(Error, match="INVALID_STAGING_PATH_IN_STAGING_ACCESS_QUERY"): - with self.connection(extra_params={"staging_allowed_local_path": staging_allowed_local_path}) as conn: - cursor = conn.cursor() - query = f"PUT '{target_file}' INTO 'stageRANDOMSTRINGOFCHARACTERS://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE" - cursor.execute(query) - - def test_staging_ingestion_supports_multiple_staging_allowed_local_path_values(self): - """staging_allowed_local_path may be either a path-like object or a list of path-like objects. - - This test confirms that two configured base paths: - 1 - doesn't raise an exception - 2 - allows uploads from both paths - 3 - doesn't allow uploads from a third path - """ - - def generate_file_and_path_and_queries(): - """ - 1. Makes a temp file with some contents. - 2. Write a query to PUT it into a staging location - 3. Write a query to REMOVE it from that location (for cleanup) - """ - fh, temp_path = tempfile.mkstemp() - with open(fh, "wb") as fp: - original_text = "hello world!".encode("utf-8") - fp.write(original_text) - put_query = f"PUT '{temp_path}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/{id(temp_path)}.csv' OVERWRITE" - remove_query = f"REMOVE 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/{id(temp_path)}.csv'" - return fh, temp_path, put_query, remove_query - - fh1, temp_path1, put_query1, remove_query1 = generate_file_and_path_and_queries() - fh2, temp_path2, put_query2, remove_query2 = generate_file_and_path_and_queries() - fh3, temp_path3, put_query3, remove_query3 = generate_file_and_path_and_queries() - - with self.connection(extra_params={"staging_allowed_local_path": [temp_path1, temp_path2]}) as conn: - cursor = conn.cursor() - - cursor.execute(put_query1) - cursor.execute(put_query2) - - with pytest.raises(Error, match="Local file operations are restricted to paths within the configured staging_allowed_local_path"): - cursor.execute(put_query3) - - # Then clean up the files we made - cursor.execute(remove_query1) - cursor.execute(remove_query2) - def main(cli_args): global get_args_from_env From 8d70f6c046feda90402c713c8e47125da36ba55a Mon Sep 17 00:00:00 2001 From: Jesse Date: Mon, 26 Jun 2023 15:05:32 -0500 Subject: [PATCH 11/40] Don't raise exception when closing a stale Thrift session (#159) Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 1 + CONTRIBUTING.md | 2 ++ pyproject.toml | 2 ++ src/databricks/sql/client.py | 25 +++++++++++++++++++++++-- src/databricks/sql/thrift_backend.py | 6 ++++++ test.env.example | 6 +++++- tests/e2e/test_driver.py | 16 ++++++++++++++++ 7 files changed, 55 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d3f8831..35a8e0aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## 2.6.x (Unreleased) +- Fix: connector raised exception when calling close() on a closed Thrift session - Improve e2e test development ergonomics - Redact logged thrift responses by default - Add support for OAuth on Databricks Azure diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 53ec9735..6ab8b45f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -118,6 +118,8 @@ access_token="dapi***" staging_ingestion_user="***@example.com" ``` +To see logging output from pytest while running tests, set `log_cli = "true"` under `tool.pytest.ini_options` in `pyproject.toml`. You can also set `log_cli_level` to any of the default Python log levels: `DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL` + There are several e2e test suites available: - `PySQLCoreTestSuite` - `PySQLLargeQueriesSuite` diff --git a/pyproject.toml b/pyproject.toml index f522d1ce..f651421e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,8 @@ exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck [tool.pytest.ini_options] minversion = "6.0" +log_cli = "false" +log_cli_level = "INFO" testpaths = [ "tests" ] diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 722ed778..14e59df6 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -190,7 +190,7 @@ def read(self) -> Optional[OAuthToken]: session_configuration, catalog, schema ) self.open = True - logger.info("Successfully opened session " + str(self.get_session_id())) + logger.info("Successfully opened session " + str(self.get_session_id_hex())) self._cursors = [] # type: List[Cursor] def __enter__(self): @@ -214,6 +214,9 @@ def __del__(self): def get_session_id(self): return self.thrift_backend.handle_to_id(self._session_handle) + def get_session_id_hex(self): + return self.thrift_backend.handle_to_hex_id(self._session_handle) + def cursor( self, arraysize: int = DEFAULT_ARRAY_SIZE, @@ -244,7 +247,25 @@ def _close(self, close_cursors=True) -> None: if close_cursors: for cursor in self._cursors: cursor.close() - self.thrift_backend.close_session(self._session_handle) + + logger.info(f"Closing session {self.get_session_id_hex()}") + if not self.open: + logger.debug("Session appears to have been closed already") + + try: + self.thrift_backend.close_session(self._session_handle) + except DatabaseError as e: + if "Invalid SessionHandle" in str(e): + logger.warning( + f"Attempted to close session that was already closed: {e}" + ) + else: + logger.warning( + f"Attempt to close session raised an exception at the server: {e}" + ) + except Exception as e: + logger.error(f"Attempt to close session raised a local exception: {e}") + self.open = False def commit(self): diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index b4afeaa3..7756c56a 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -3,6 +3,7 @@ import logging import math import time +import uuid import threading import lz4.frame from ssl import CERT_NONE, CERT_REQUIRED, create_default_context @@ -1021,3 +1022,8 @@ def cancel_command(self, active_op_handle): @staticmethod def handle_to_id(session_handle): return session_handle.sessionId.guid + + @staticmethod + def handle_to_hex_id(session_handle: TCLIService.TSessionHandle): + this_uuid = uuid.UUID(bytes=session_handle.sessionId.guid) + return str(this_uuid) diff --git a/test.env.example b/test.env.example index 5ce7eca5..94aed419 100644 --- a/test.env.example +++ b/test.env.example @@ -4,4 +4,8 @@ http_path="" access_token="" # Only required to run the PySQLStagingIngestionTestSuite -staging_ingestion_user="" \ No newline at end of file +staging_ingestion_user="" + +# Only required to run SQLAlchemy tests +catalog="" +schema="" \ No newline at end of file diff --git a/tests/e2e/test_driver.py b/tests/e2e/test_driver.py index 37428116..26b7d186 100644 --- a/tests/e2e/test_driver.py +++ b/tests/e2e/test_driver.py @@ -616,6 +616,22 @@ def test_close_connection_closes_cursors(self): assert "RESOURCE_DOES_NOT_EXIST" in cm.exception.message + def test_closing_a_closed_connection_doesnt_fail(self): + + with self.assertLogs("databricks.sql", level="DEBUG",) as cm: + # Second .close() call is when this context manager exits + with self.connection() as conn: + # First .close() call is explicit here + conn.close() + + expected_message_was_found = False + for log in cm.output: + if expected_message_was_found: + break + target = "Session appears to have been closed already" + expected_message_was_found = target in log + + self.assertTrue(expected_message_was_found, "Did not find expected log messages") # use a RetrySuite to encapsulate these tests which we'll typically want to run together; however keep From c351b57f1449def33722effd9a9a4ea26b3fa6cc Mon Sep 17 00:00:00 2001 From: Jesse Date: Mon, 26 Jun 2023 16:45:20 -0500 Subject: [PATCH 12/40] Bump to version 2.7.0 (#161) Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 4 +++- pyproject.toml | 2 +- src/databricks/sql/__init__.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 35a8e0aa..d9bff868 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Release History -## 2.6.x (Unreleased) +## 2.7.x (Unreleased) + +## 2.7.0 (2023-06-26) - Fix: connector raised exception when calling close() on a closed Thrift session - Improve e2e test development ergonomics diff --git a/pyproject.toml b/pyproject.toml index f651421e..5d48aba5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databricks-sql-connector" -version = "2.6.2" +version = "2.7.0" description = "Databricks SQL Connector for Python" authors = ["Databricks "] license = "Apache-2.0" diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py index 6c7db4d5..b72d9421 100644 --- a/src/databricks/sql/__init__.py +++ b/src/databricks/sql/__init__.py @@ -28,7 +28,7 @@ def __repr__(self): DATE = DBAPITypeObject("date") ROWID = DBAPITypeObject() -__version__ = "2.6.2" +__version__ = "2.7.0" USER_AGENT_NAME = "PyDatabricksSqlConnector" # These two functions are pyhive legacy From 64be9bc84ae12ec4564f89402543d8b54c4d4b41 Mon Sep 17 00:00:00 2001 From: mattdeekay <11141331+mattdeekay@users.noreply.github.com> Date: Tue, 27 Jun 2023 16:30:55 -0700 Subject: [PATCH 13/40] Cloud Fetch download handler (#127) * Cloud Fetch download handler Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Issue fix: final result link compressed data has multiple LZ4 end-of-frame markers Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Addressing PR comments - Linting - Type annotations - Use response.ok - Log exception - Remove semaphore and only use threading.event - reset() flags method - Fix tests after removing semaphore - Link expiry logic should be in secs - Decompress data static function - link_expiry_buffer and static public methods - Docstrings and comments Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Changing logger.debug to remove url Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * _reset() comment to docstring Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * link_expiry_buffer -> link_expiry_buffer_secs Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> --------- Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> --- src/databricks/sql/cloudfetch/downloader.py | 151 +++++++++++++++++++ tests/unit/test_downloader.py | 155 ++++++++++++++++++++ 2 files changed, 306 insertions(+) create mode 100644 src/databricks/sql/cloudfetch/downloader.py create mode 100644 tests/unit/test_downloader.py diff --git a/src/databricks/sql/cloudfetch/downloader.py b/src/databricks/sql/cloudfetch/downloader.py new file mode 100644 index 00000000..d3c4a480 --- /dev/null +++ b/src/databricks/sql/cloudfetch/downloader.py @@ -0,0 +1,151 @@ +import logging + +import requests +import lz4.frame +import threading +import time + +from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink + +logger = logging.getLogger(__name__) + + +class ResultSetDownloadHandler(threading.Thread): + def __init__( + self, + downloadable_result_settings, + t_spark_arrow_result_link: TSparkArrowResultLink, + ): + super().__init__() + self.settings = downloadable_result_settings + self.result_link = t_spark_arrow_result_link + self.is_download_scheduled = False + self.is_download_finished = threading.Event() + self.is_file_downloaded_successfully = False + self.is_link_expired = False + self.is_download_timedout = False + self.result_file = None + + def is_file_download_successful(self) -> bool: + """ + Check and report if cloud fetch file downloaded successfully. + + This function will block until a file download finishes or until a timeout. + """ + timeout = self.settings.download_timeout + timeout = timeout if timeout and timeout > 0 else None + try: + if not self.is_download_finished.wait(timeout=timeout): + self.is_download_timedout = True + logger.debug( + "Cloud fetch download timed out after {} seconds for link representing rows {} to {}".format( + self.settings.download_timeout, + self.result_link.startRowOffset, + self.result_link.startRowOffset + self.result_link.rowCount, + ) + ) + return False + except Exception as e: + logger.error(e) + return False + return self.is_file_downloaded_successfully + + def run(self): + """ + Download the file described in the cloud fetch link. + + This function checks if the link has or is expiring, gets the file via a requests session, decompresses the + file, and signals to waiting threads that the download is finished and whether it was successful. + """ + self._reset() + + # Check if link is already expired or is expiring + if ResultSetDownloadHandler.check_link_expired( + self.result_link, self.settings.link_expiry_buffer_secs + ): + self.is_link_expired = True + return + + session = requests.Session() + session.timeout = self.settings.download_timeout + + try: + # Get the file via HTTP request + response = session.get(self.result_link.fileLink) + + if not response.ok: + self.is_file_downloaded_successfully = False + return + + # Save (and decompress if needed) the downloaded file + compressed_data = response.content + decompressed_data = ( + ResultSetDownloadHandler.decompress_data(compressed_data) + if self.settings.is_lz4_compressed + else compressed_data + ) + self.result_file = decompressed_data + + # The size of the downloaded file should match the size specified from TSparkArrowResultLink + self.is_file_downloaded_successfully = ( + len(self.result_file) == self.result_link.bytesNum + ) + except Exception as e: + logger.error(e) + self.is_file_downloaded_successfully = False + + finally: + session and session.close() + # Awaken threads waiting for this to be true which signals the run is complete + self.is_download_finished.set() + + def _reset(self): + """ + Reset download-related flags for every retry of run() + """ + self.is_file_downloaded_successfully = False + self.is_link_expired = False + self.is_download_timedout = False + self.is_download_finished = threading.Event() + + @staticmethod + def check_link_expired( + link: TSparkArrowResultLink, expiry_buffer_secs: int + ) -> bool: + """ + Check if a link has expired or will expire. + + Expiry buffer can be set to avoid downloading files that has not expired yet when the function is called, + but may expire before the file has fully downloaded. + """ + current_time = int(time.time()) + if ( + link.expiryTime < current_time + or link.expiryTime - current_time < expiry_buffer_secs + ): + return True + return False + + @staticmethod + def decompress_data(compressed_data: bytes) -> bytes: + """ + Decompress lz4 frame compressed data. + + Decompresses data that has been lz4 compressed, either via the whole frame or by series of chunks. + """ + uncompressed_data, bytes_read = lz4.frame.decompress( + compressed_data, return_bytes_read=True + ) + # The last cloud fetch file of the entire result is commonly punctuated by frequent end-of-frame markers. + # Full frame decompression above will short-circuit, so chunking is necessary + if bytes_read < len(compressed_data): + d_context = lz4.frame.create_decompression_context() + start = 0 + uncompressed_data = bytearray() + while start < len(compressed_data): + data, num_bytes, is_end = lz4.frame.decompress_chunk( + d_context, compressed_data[start:] + ) + uncompressed_data += data + start += num_bytes + return uncompressed_data diff --git a/tests/unit/test_downloader.py b/tests/unit/test_downloader.py new file mode 100644 index 00000000..cee3a83c --- /dev/null +++ b/tests/unit/test_downloader.py @@ -0,0 +1,155 @@ +import unittest +from unittest.mock import Mock, patch, MagicMock + +import databricks.sql.cloudfetch.downloader as downloader + + +class DownloaderTests(unittest.TestCase): + """ + Unit tests for checking downloader logic. + """ + + @patch('time.time', return_value=1000) + def test_run_link_expired(self, mock_time): + settings = Mock() + result_link = Mock() + # Already expired + result_link.expiryTime = 999 + d = downloader.ResultSetDownloadHandler(settings, result_link) + assert not d.is_link_expired + d.run() + assert d.is_link_expired + mock_time.assert_called_once() + + @patch('time.time', return_value=1000) + def test_run_link_past_expiry_buffer(self, mock_time): + settings = Mock(link_expiry_buffer_secs=5) + result_link = Mock() + # Within the expiry buffer time + result_link.expiryTime = 1004 + d = downloader.ResultSetDownloadHandler(settings, result_link) + assert not d.is_link_expired + d.run() + assert d.is_link_expired + mock_time.assert_called_once() + + @patch('requests.Session', return_value=MagicMock(get=MagicMock(return_value=MagicMock(ok=False)))) + @patch('time.time', return_value=1000) + def test_run_get_response_not_ok(self, mock_time, mock_session): + settings = Mock(link_expiry_buffer_secs=0, download_timeout=0) + settings.download_timeout = 0 + settings.use_proxy = False + result_link = Mock(expiryTime=1001) + + d = downloader.ResultSetDownloadHandler(settings, result_link) + d.run() + + assert not d.is_file_downloaded_successfully + assert d.is_download_finished.is_set() + + @patch('requests.Session', + return_value=MagicMock(get=MagicMock(return_value=MagicMock(ok=True, content=b"1234567890" * 9)))) + @patch('time.time', return_value=1000) + def test_run_uncompressed_data_length_incorrect(self, mock_time, mock_session): + settings = Mock(link_expiry_buffer_secs=0, download_timeout=0, use_proxy=False, is_lz4_compressed=False) + result_link = Mock(bytesNum=100, expiryTime=1001) + + d = downloader.ResultSetDownloadHandler(settings, result_link) + d.run() + + assert not d.is_file_downloaded_successfully + assert d.is_download_finished.is_set() + + @patch('requests.Session', return_value=MagicMock(get=MagicMock(return_value=MagicMock(ok=True)))) + @patch('time.time', return_value=1000) + def test_run_compressed_data_length_incorrect(self, mock_time, mock_session): + settings = Mock(link_expiry_buffer_secs=0, download_timeout=0, use_proxy=False) + settings.is_lz4_compressed = True + result_link = Mock(bytesNum=100, expiryTime=1001) + mock_session.return_value.get.return_value.content = \ + b'\x04"M\x18h@Z\x00\x00\x00\x00\x00\x00\x00\xec\x14\x00\x00\x00\xaf1234567890\n\x008P67890\x00\x00\x00\x00' + + d = downloader.ResultSetDownloadHandler(settings, result_link) + d.run() + + assert not d.is_file_downloaded_successfully + assert d.is_download_finished.is_set() + + @patch('requests.Session', + return_value=MagicMock(get=MagicMock(return_value=MagicMock(ok=True, content=b"1234567890" * 10)))) + @patch('time.time', return_value=1000) + def test_run_uncompressed_successful(self, mock_time, mock_session): + settings = Mock(link_expiry_buffer_secs=0, download_timeout=0, use_proxy=False) + settings.is_lz4_compressed = False + result_link = Mock(bytesNum=100, expiryTime=1001) + + d = downloader.ResultSetDownloadHandler(settings, result_link) + d.run() + + assert d.result_file == b"1234567890" * 10 + assert d.is_file_downloaded_successfully + assert d.is_download_finished.is_set() + + @patch('requests.Session', return_value=MagicMock(get=MagicMock(return_value=MagicMock(ok=True)))) + @patch('time.time', return_value=1000) + def test_run_compressed_successful(self, mock_time, mock_session): + settings = Mock(link_expiry_buffer_secs=0, download_timeout=0, use_proxy=False) + settings.is_lz4_compressed = True + result_link = Mock(bytesNum=100, expiryTime=1001) + mock_session.return_value.get.return_value.content = \ + b'\x04"M\x18h@d\x00\x00\x00\x00\x00\x00\x00#\x14\x00\x00\x00\xaf1234567890\n\x00BP67890\x00\x00\x00\x00' + + d = downloader.ResultSetDownloadHandler(settings, result_link) + d.run() + + assert d.result_file == b"1234567890" * 10 + assert d.is_file_downloaded_successfully + assert d.is_download_finished.is_set() + + @patch('requests.Session.get', side_effect=ConnectionError('foo')) + @patch('time.time', return_value=1000) + def test_download_connection_error(self, mock_time, mock_session): + settings = Mock(link_expiry_buffer_secs=0, use_proxy=False, is_lz4_compressed=True) + result_link = Mock(bytesNum=100, expiryTime=1001) + mock_session.return_value.get.return_value.content = \ + b'\x04"M\x18h@d\x00\x00\x00\x00\x00\x00\x00#\x14\x00\x00\x00\xaf1234567890\n\x00BP67890\x00\x00\x00\x00' + + d = downloader.ResultSetDownloadHandler(settings, result_link) + d.run() + + assert not d.is_file_downloaded_successfully + assert d.is_download_finished.is_set() + + @patch('requests.Session.get', side_effect=TimeoutError('foo')) + @patch('time.time', return_value=1000) + def test_download_timeout(self, mock_time, mock_session): + settings = Mock(link_expiry_buffer_secs=0, use_proxy=False, is_lz4_compressed=True) + result_link = Mock(bytesNum=100, expiryTime=1001) + mock_session.return_value.get.return_value.content = \ + b'\x04"M\x18h@d\x00\x00\x00\x00\x00\x00\x00#\x14\x00\x00\x00\xaf1234567890\n\x00BP67890\x00\x00\x00\x00' + + d = downloader.ResultSetDownloadHandler(settings, result_link) + d.run() + + assert not d.is_file_downloaded_successfully + assert d.is_download_finished.is_set() + + @patch("threading.Event.wait", return_value=True) + def test_is_file_download_successful_has_finished(self, mock_wait): + for timeout in [None, 0, 1]: + with self.subTest(timeout=timeout): + settings = Mock(download_timeout=timeout) + result_link = Mock() + handler = downloader.ResultSetDownloadHandler(settings, result_link) + + status = handler.is_file_download_successful() + assert status == handler.is_file_downloaded_successfully + + def test_is_file_download_successful_times_outs(self): + settings = Mock(download_timeout=1) + result_link = Mock() + handler = downloader.ResultSetDownloadHandler(settings, result_link) + + status = handler.is_file_download_successful() + assert not status + assert handler.is_download_timedout From 01b7a8ddd3e4fcf09eae076b8e75e31ede0bd1fb Mon Sep 17 00:00:00 2001 From: mattdeekay <11141331+mattdeekay@users.noreply.github.com> Date: Mon, 3 Jul 2023 11:18:12 -0700 Subject: [PATCH 14/40] Cloud Fetch download manager (#146) * Cloud Fetch download manager Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Bug fix: submit handler.run Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Type annotations Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Namedtuple -> dataclass Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Shutdown thread pool and clear handlers Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Docstrings and comments Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * handler.run is the correct call Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Link expiry buffer in secs Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Adding type annotations for download_handlers and downloadable_result_settings Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Move DownloadableResultSettings to downloader.py to avoid circular import Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Black linting Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Timeout is never None Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> --------- Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> --- .../sql/cloudfetch/download_manager.py | 166 ++++++++++++++ src/databricks/sql/cloudfetch/downloader.py | 28 ++- tests/unit/test_download_manager.py | 207 ++++++++++++++++++ tests/unit/test_downloader.py | 2 +- 4 files changed, 399 insertions(+), 4 deletions(-) create mode 100644 src/databricks/sql/cloudfetch/download_manager.py create mode 100644 tests/unit/test_download_manager.py diff --git a/src/databricks/sql/cloudfetch/download_manager.py b/src/databricks/sql/cloudfetch/download_manager.py new file mode 100644 index 00000000..aac3ac33 --- /dev/null +++ b/src/databricks/sql/cloudfetch/download_manager.py @@ -0,0 +1,166 @@ +import logging + +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass +from typing import List, Union + +from databricks.sql.cloudfetch.downloader import ( + ResultSetDownloadHandler, + DownloadableResultSettings, +) +from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink + +logger = logging.getLogger(__name__) + + +@dataclass +class DownloadedFile: + """ + Class for the result file and metadata. + + Attributes: + file_bytes (bytes): Downloaded file in bytes. + start_row_offset (int): The offset of the starting row in relation to the full result. + row_count (int): Number of rows the file represents in the result. + """ + + file_bytes: bytes + start_row_offset: int + row_count: int + + +class ResultFileDownloadManager: + def __init__(self, max_download_threads: int, lz4_compressed: bool): + self.download_handlers: List[ResultSetDownloadHandler] = [] + self.thread_pool = ThreadPoolExecutor(max_workers=max_download_threads + 1) + self.downloadable_result_settings = DownloadableResultSettings(lz4_compressed) + self.fetch_need_retry = False + self.num_consecutive_result_file_download_retries = 0 + + def add_file_links( + self, t_spark_arrow_result_links: List[TSparkArrowResultLink] + ) -> None: + """ + Create download handler for each cloud fetch link. + + Args: + t_spark_arrow_result_links: List of cloud fetch links consisting of file URL and metadata. + """ + for link in t_spark_arrow_result_links: + if link.rowCount <= 0: + continue + self.download_handlers.append( + ResultSetDownloadHandler(self.downloadable_result_settings, link) + ) + + def get_next_downloaded_file( + self, next_row_offset: int + ) -> Union[DownloadedFile, None]: + """ + Get next file that starts at given offset. + + This function gets the next downloaded file in which its rows start at the specified next_row_offset + in relation to the full result. File downloads are scheduled if not already, and once the correct + download handler is located, the function waits for the download status and returns the resulting file. + If there are no more downloads, a download was not successful, or the correct file could not be located, + this function shuts down the thread pool and returns None. + + Args: + next_row_offset (int): The offset of the starting row of the next file we want data from. + """ + # No more files to download from this batch of links + if not self.download_handlers: + self._shutdown_manager() + return None + + # Remove handlers we don't need anymore + self._remove_past_handlers(next_row_offset) + + # Schedule the downloads + self._schedule_downloads() + + # Find next file + idx = self._find_next_file_index(next_row_offset) + if idx is None: + self._shutdown_manager() + return None + handler = self.download_handlers[idx] + + # Check (and wait) for download status + if self._check_if_download_successful(handler): + # Buffer should be empty so set buffer to new ArrowQueue with result_file + result = DownloadedFile( + handler.result_file, + handler.result_link.startRowOffset, + handler.result_link.rowCount, + ) + self.download_handlers.pop(idx) + # Return True upon successful download to continue loop and not force a retry + return result + # Download was not successful for next download item, force a retry + self._shutdown_manager() + return None + + def _remove_past_handlers(self, next_row_offset: int): + # Any link in which its start to end range doesn't include the next row to be fetched does not need downloading + i = 0 + while i < len(self.download_handlers): + result_link = self.download_handlers[i].result_link + if result_link.startRowOffset + result_link.rowCount > next_row_offset: + i += 1 + continue + self.download_handlers.pop(i) + + def _schedule_downloads(self): + # Schedule downloads for all download handlers if not already scheduled. + for handler in self.download_handlers: + if handler.is_download_scheduled: + continue + try: + self.thread_pool.submit(handler.run) + except Exception as e: + logger.error(e) + break + handler.is_download_scheduled = True + + def _find_next_file_index(self, next_row_offset: int): + # Get the handler index of the next file in order + next_indices = [ + i + for i, handler in enumerate(self.download_handlers) + if handler.is_download_scheduled + and handler.result_link.startRowOffset == next_row_offset + ] + return next_indices[0] if len(next_indices) > 0 else None + + def _check_if_download_successful(self, handler: ResultSetDownloadHandler): + # Check (and wait until download finishes) if download was successful + if not handler.is_file_download_successful(): + if handler.is_link_expired: + self.fetch_need_retry = True + return False + elif handler.is_download_timedout: + # Consecutive file retries should not exceed threshold in settings + if ( + self.num_consecutive_result_file_download_retries + >= self.downloadable_result_settings.max_consecutive_file_download_retries + ): + self.fetch_need_retry = True + return False + self.num_consecutive_result_file_download_retries += 1 + + # Re-submit handler run to thread pool and recursively check download status + self.thread_pool.submit(handler.run) + return self._check_if_download_successful(handler) + else: + self.fetch_need_retry = True + return False + + self.num_consecutive_result_file_download_retries = 0 + self.fetch_need_retry = False + return True + + def _shutdown_manager(self): + # Clear download handlers and shutdown the thread pool to cancel pending futures + self.download_handlers = [] + self.thread_pool.shutdown(wait=False, cancel_futures=True) diff --git a/src/databricks/sql/cloudfetch/downloader.py b/src/databricks/sql/cloudfetch/downloader.py index d3c4a480..019c4ef9 100644 --- a/src/databricks/sql/cloudfetch/downloader.py +++ b/src/databricks/sql/cloudfetch/downloader.py @@ -1,4 +1,5 @@ import logging +from dataclasses import dataclass import requests import lz4.frame @@ -10,10 +11,28 @@ logger = logging.getLogger(__name__) +@dataclass +class DownloadableResultSettings: + """ + Class for settings common to each download handler. + + Attributes: + is_lz4_compressed (bool): Whether file is expected to be lz4 compressed. + link_expiry_buffer_secs (int): Time in seconds to prevent download of a link before it expires. Default 0 secs. + download_timeout (int): Timeout for download requests. Default 60 secs. + max_consecutive_file_download_retries (int): Number of consecutive download retries before shutting down. + """ + + is_lz4_compressed: bool + link_expiry_buffer_secs: int = 0 + download_timeout: int = 60 + max_consecutive_file_download_retries: int = 0 + + class ResultSetDownloadHandler(threading.Thread): def __init__( self, - downloadable_result_settings, + downloadable_result_settings: DownloadableResultSettings, t_spark_arrow_result_link: TSparkArrowResultLink, ): super().__init__() @@ -32,8 +51,11 @@ def is_file_download_successful(self) -> bool: This function will block until a file download finishes or until a timeout. """ - timeout = self.settings.download_timeout - timeout = timeout if timeout and timeout > 0 else None + timeout = ( + self.settings.download_timeout + if self.settings.download_timeout > 0 + else None + ) try: if not self.is_download_finished.wait(timeout=timeout): self.is_download_timedout = True diff --git a/tests/unit/test_download_manager.py b/tests/unit/test_download_manager.py new file mode 100644 index 00000000..97bf407a --- /dev/null +++ b/tests/unit/test_download_manager.py @@ -0,0 +1,207 @@ +import unittest +from unittest.mock import patch, MagicMock + +import databricks.sql.cloudfetch.download_manager as download_manager +import databricks.sql.cloudfetch.downloader as downloader +from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink + + +class DownloadManagerTests(unittest.TestCase): + """ + Unit tests for checking download manager logic. + """ + + def create_download_manager(self): + max_download_threads = 10 + lz4_compressed = True + return download_manager.ResultFileDownloadManager(max_download_threads, lz4_compressed) + + def create_result_link( + self, + file_link: str = "fileLink", + start_row_offset: int = 0, + row_count: int = 8000, + bytes_num: int = 20971520 + ): + return TSparkArrowResultLink(file_link, None, start_row_offset, row_count, bytes_num) + + def create_result_links(self, num_files: int, start_row_offset: int = 0): + result_links = [] + for i in range(num_files): + file_link = "fileLink_" + str(i) + result_link = self.create_result_link(file_link=file_link, start_row_offset=start_row_offset) + result_links.append(result_link) + start_row_offset += result_link.rowCount + return result_links + + def test_add_file_links_zero_row_count(self): + links = [self.create_result_link(row_count=0, bytes_num=0)] + manager = self.create_download_manager() + manager.add_file_links(links) + + assert not manager.download_handlers + + def test_add_file_links_success(self): + links = self.create_result_links(num_files=10) + manager = self.create_download_manager() + manager.add_file_links(links) + + assert len(manager.download_handlers) == 10 + + def test_remove_past_handlers_one(self): + links = self.create_result_links(num_files=10) + manager = self.create_download_manager() + manager.add_file_links(links) + + manager._remove_past_handlers(8000) + assert len(manager.download_handlers) == 9 + + def test_remove_past_handlers_all(self): + links = self.create_result_links(num_files=10) + manager = self.create_download_manager() + manager.add_file_links(links) + + manager._remove_past_handlers(8000*10) + assert len(manager.download_handlers) == 0 + + @patch("concurrent.futures.ThreadPoolExecutor.submit") + def test_schedule_downloads_partial_already_scheduled(self, mock_submit): + links = self.create_result_links(num_files=10) + manager = self.create_download_manager() + manager.add_file_links(links) + + for i in range(5): + manager.download_handlers[i].is_download_scheduled = True + + manager._schedule_downloads() + assert mock_submit.call_count == 5 + assert sum([1 if handler.is_download_scheduled else 0 for handler in manager.download_handlers]) == 10 + + @patch("concurrent.futures.ThreadPoolExecutor.submit") + def test_schedule_downloads_will_not_schedule_twice(self, mock_submit): + links = self.create_result_links(num_files=10) + manager = self.create_download_manager() + manager.add_file_links(links) + + for i in range(5): + manager.download_handlers[i].is_download_scheduled = True + + manager._schedule_downloads() + assert mock_submit.call_count == 5 + assert sum([1 if handler.is_download_scheduled else 0 for handler in manager.download_handlers]) == 10 + + manager._schedule_downloads() + assert mock_submit.call_count == 5 + + @patch("concurrent.futures.ThreadPoolExecutor.submit", side_effect=[True, KeyError("foo")]) + def test_schedule_downloads_submit_fails(self, mock_submit): + links = self.create_result_links(num_files=10) + manager = self.create_download_manager() + manager.add_file_links(links) + + manager._schedule_downloads() + assert mock_submit.call_count == 2 + assert sum([1 if handler.is_download_scheduled else 0 for handler in manager.download_handlers]) == 1 + + @patch("concurrent.futures.ThreadPoolExecutor.submit") + def test_find_next_file_index_all_scheduled_next_row_0(self, mock_submit): + links = self.create_result_links(num_files=10) + manager = self.create_download_manager() + manager.add_file_links(links) + manager._schedule_downloads() + + assert manager._find_next_file_index(0) == 0 + + @patch("concurrent.futures.ThreadPoolExecutor.submit") + def test_find_next_file_index_all_scheduled_next_row_7999(self, mock_submit): + links = self.create_result_links(num_files=10) + manager = self.create_download_manager() + manager.add_file_links(links) + manager._schedule_downloads() + + assert manager._find_next_file_index(7999) is None + + @patch("concurrent.futures.ThreadPoolExecutor.submit") + def test_find_next_file_index_all_scheduled_next_row_8000(self, mock_submit): + links = self.create_result_links(num_files=10) + manager = self.create_download_manager() + manager.add_file_links(links) + manager._schedule_downloads() + + assert manager._find_next_file_index(8000) == 1 + + @patch("concurrent.futures.ThreadPoolExecutor.submit", side_effect=[True, KeyError("foo")]) + def test_find_next_file_index_one_scheduled_next_row_8000(self, mock_submit): + links = self.create_result_links(num_files=10) + manager = self.create_download_manager() + manager.add_file_links(links) + manager._schedule_downloads() + + assert manager._find_next_file_index(8000) is None + + @patch("databricks.sql.cloudfetch.downloader.ResultSetDownloadHandler.is_file_download_successful", + return_value=True) + @patch("concurrent.futures.ThreadPoolExecutor.submit") + def test_check_if_download_successful_happy(self, mock_submit, mock_is_file_download_successful): + links = self.create_result_links(num_files=10) + manager = self.create_download_manager() + manager.add_file_links(links) + manager._schedule_downloads() + + status = manager._check_if_download_successful(manager.download_handlers[0]) + assert status + assert manager.num_consecutive_result_file_download_retries == 0 + + @patch("databricks.sql.cloudfetch.downloader.ResultSetDownloadHandler.is_file_download_successful", + return_value=False) + def test_check_if_download_successful_link_expired(self, mock_is_file_download_successful): + manager = self.create_download_manager() + handler = downloader.ResultSetDownloadHandler(manager.downloadable_result_settings, self.create_result_link()) + handler.is_link_expired = True + + status = manager._check_if_download_successful(handler) + mock_is_file_download_successful.assert_called() + assert not status + assert manager.fetch_need_retry + + @patch("databricks.sql.cloudfetch.downloader.ResultSetDownloadHandler.is_file_download_successful", + return_value=False) + def test_check_if_download_successful_download_timed_out_no_retries(self, mock_is_file_download_successful): + manager = self.create_download_manager() + handler = downloader.ResultSetDownloadHandler(manager.downloadable_result_settings, self.create_result_link()) + handler.is_download_timedout = True + + status = manager._check_if_download_successful(handler) + mock_is_file_download_successful.assert_called() + assert not status + assert manager.fetch_need_retry + + @patch("concurrent.futures.ThreadPoolExecutor.submit") + @patch("databricks.sql.cloudfetch.downloader.ResultSetDownloadHandler.is_file_download_successful", + return_value=False) + def test_check_if_download_successful_download_timed_out_1_retry(self, mock_is_file_download_successful, mock_submit): + manager = self.create_download_manager() + manager.downloadable_result_settings = download_manager.DownloadableResultSettings( + is_lz4_compressed=True, + download_timeout=0, + max_consecutive_file_download_retries=1, + ) + handler = downloader.ResultSetDownloadHandler(manager.downloadable_result_settings, self.create_result_link()) + handler.is_download_timedout = True + + status = manager._check_if_download_successful(handler) + assert mock_is_file_download_successful.call_count == 2 + assert mock_submit.call_count == 1 + assert not status + assert manager.fetch_need_retry + + @patch("databricks.sql.cloudfetch.downloader.ResultSetDownloadHandler.is_file_download_successful", + return_value=False) + def test_check_if_download_successful_other_reason(self, mock_is_file_download_successful): + manager = self.create_download_manager() + handler = downloader.ResultSetDownloadHandler(manager.downloadable_result_settings, self.create_result_link()) + + status = manager._check_if_download_successful(handler) + mock_is_file_download_successful.assert_called() + assert not status + assert manager.fetch_need_retry diff --git a/tests/unit/test_downloader.py b/tests/unit/test_downloader.py index cee3a83c..6e13c949 100644 --- a/tests/unit/test_downloader.py +++ b/tests/unit/test_downloader.py @@ -136,7 +136,7 @@ def test_download_timeout(self, mock_time, mock_session): @patch("threading.Event.wait", return_value=True) def test_is_file_download_successful_has_finished(self, mock_wait): - for timeout in [None, 0, 1]: + for timeout in [0, 1]: with self.subTest(timeout=timeout): settings = Mock(download_timeout=timeout) result_link = Mock() From 5a34a4a39cbf1ad4f5068ef5ed0781cdbae7460a Mon Sep 17 00:00:00 2001 From: mattdeekay <11141331+mattdeekay@users.noreply.github.com> Date: Wed, 5 Jul 2023 11:03:29 -0700 Subject: [PATCH 15/40] Cloud fetch queue and integration (#151) * Cloud fetch queue and integration Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Enable cloudfetch with direct results Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Typing and style changes Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Client-settable max_download_threads Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Docstrings and comments Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Increase default buffer size bytes to 104857600 Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Move max_download_threads to kwargs of ThriftBackend, fix unit tests Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Fix tests: staticmethod make_arrow_table mock not callable Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * cancel_futures in shutdown() only available in python >=3.9.0 Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Black linting Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Fix typing errors Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> --------- Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> --- src/databricks/sql/client.py | 7 +- .../sql/cloudfetch/download_manager.py | 4 +- src/databricks/sql/thrift_backend.py | 151 +++------ src/databricks/sql/utils.py | 297 +++++++++++++++++- tests/unit/test_cloud_fetch_queue.py | 231 ++++++++++++++ tests/unit/test_thrift_backend.py | 42 ++- 6 files changed, 596 insertions(+), 136 deletions(-) create mode 100644 tests/unit/test_cloud_fetch_queue.py diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 14e59df6..aa628441 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -17,7 +17,7 @@ logger = logging.getLogger(__name__) -DEFAULT_RESULT_BUFFER_SIZE_BYTES = 10485760 +DEFAULT_RESULT_BUFFER_SIZE_BYTES = 104857600 DEFAULT_ARRAY_SIZE = 100000 @@ -153,6 +153,8 @@ def read(self) -> Optional[OAuthToken]: # _use_arrow_native_timestamps # Databricks runtime will return native Arrow types for timestamps instead of Arrow strings # (True by default) + # use_cloud_fetch + # Enable use of cloud fetch to extract large query results in parallel via cloud storage if access_token: access_token_kv = {"access_token": access_token} @@ -189,6 +191,7 @@ def read(self) -> Optional[OAuthToken]: self._session_handle = self.thrift_backend.open_session( session_configuration, catalog, schema ) + self.use_cloud_fetch = kwargs.get("use_cloud_fetch", False) self.open = True logger.info("Successfully opened session " + str(self.get_session_id_hex())) self._cursors = [] # type: List[Cursor] @@ -497,6 +500,7 @@ def execute( max_bytes=self.buffer_size_bytes, lz4_compression=self.connection.lz4_compression, cursor=self, + use_cloud_fetch=self.connection.use_cloud_fetch, ) self.active_result_set = ResultSet( self.connection, @@ -822,6 +826,7 @@ def __iter__(self): break def _fill_results_buffer(self): + # At initialization or if the server does not have cloud fetch result links available results, has_more_rows = self.thrift_backend.fetch_results( op_handle=self.command_id, max_rows=self.arraysize, diff --git a/src/databricks/sql/cloudfetch/download_manager.py b/src/databricks/sql/cloudfetch/download_manager.py index aac3ac33..9a997f39 100644 --- a/src/databricks/sql/cloudfetch/download_manager.py +++ b/src/databricks/sql/cloudfetch/download_manager.py @@ -161,6 +161,6 @@ def _check_if_download_successful(self, handler: ResultSetDownloadHandler): return True def _shutdown_manager(self): - # Clear download handlers and shutdown the thread pool to cancel pending futures + # Clear download handlers and shutdown the thread pool self.download_handlers = [] - self.thread_pool.shutdown(wait=False, cancel_futures=True) + self.thread_pool.shutdown(wait=False) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 7756c56a..ef225d1f 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -5,7 +5,6 @@ import time import uuid import threading -import lz4.frame from ssl import CERT_NONE, CERT_REQUIRED, create_default_context from typing import List, Union @@ -26,11 +25,14 @@ ) from databricks.sql.utils import ( - ArrowQueue, ExecuteResponse, _bound, RequestErrorInfo, NoRetryReason, + ResultSetQueueFactory, + convert_arrow_based_set_to_arrow_table, + convert_decimals_in_arrow_table, + convert_column_based_set_to_arrow_table, ) logger = logging.getLogger(__name__) @@ -67,7 +69,6 @@ class ThriftBackend: CLOSED_OP_STATE = ttypes.TOperationState.CLOSED_STATE ERROR_OP_STATE = ttypes.TOperationState.ERROR_STATE - BIT_MASKS = [1, 2, 4, 8, 16, 32, 64, 128] def __init__( self, @@ -115,6 +116,8 @@ def __init__( # _socket_timeout # The timeout in seconds for socket send, recv and connect operations. Should be a positive float or integer. # (defaults to 900) + # max_download_threads + # Number of threads for handling cloud fetch downloads. Defaults to 10 port = port or 443 if kwargs.get("_connection_uri"): @@ -136,6 +139,9 @@ def __init__( "_use_arrow_native_timestamps", True ) + # Cloud fetch + self.max_download_threads = kwargs.get("max_download_threads", 10) + # Configure tls context ssl_context = create_default_context(cafile=kwargs.get("_tls_trusted_ca_file")) if kwargs.get("_tls_no_verify") is True: @@ -558,108 +564,14 @@ def _create_arrow_table(self, t_row_set, lz4_compressed, schema_bytes, descripti ( arrow_table, num_rows, - ) = ThriftBackend._convert_column_based_set_to_arrow_table( - t_row_set.columns, description - ) + ) = convert_column_based_set_to_arrow_table(t_row_set.columns, description) elif t_row_set.arrowBatches is not None: - ( - arrow_table, - num_rows, - ) = ThriftBackend._convert_arrow_based_set_to_arrow_table( + (arrow_table, num_rows,) = convert_arrow_based_set_to_arrow_table( t_row_set.arrowBatches, lz4_compressed, schema_bytes ) else: raise OperationalError("Unsupported TRowSet instance {}".format(t_row_set)) - return self._convert_decimals_in_arrow_table(arrow_table, description), num_rows - - @staticmethod - def _convert_decimals_in_arrow_table(table, description): - for (i, col) in enumerate(table.itercolumns()): - if description[i][1] == "decimal": - decimal_col = col.to_pandas().apply( - lambda v: v if v is None else Decimal(v) - ) - precision, scale = description[i][4], description[i][5] - assert scale is not None - assert precision is not None - # Spark limits decimal to a maximum scale of 38, - # so 128 is guaranteed to be big enough - dtype = pyarrow.decimal128(precision, scale) - col_data = pyarrow.array(decimal_col, type=dtype) - field = table.field(i).with_type(dtype) - table = table.set_column(i, field, col_data) - return table - - @staticmethod - def _convert_arrow_based_set_to_arrow_table( - arrow_batches, lz4_compressed, schema_bytes - ): - ba = bytearray() - ba += schema_bytes - n_rows = 0 - if lz4_compressed: - for arrow_batch in arrow_batches: - n_rows += arrow_batch.rowCount - ba += lz4.frame.decompress(arrow_batch.batch) - else: - for arrow_batch in arrow_batches: - n_rows += arrow_batch.rowCount - ba += arrow_batch.batch - arrow_table = pyarrow.ipc.open_stream(ba).read_all() - return arrow_table, n_rows - - @staticmethod - def _convert_column_based_set_to_arrow_table(columns, description): - arrow_table = pyarrow.Table.from_arrays( - [ThriftBackend._convert_column_to_arrow_array(c) for c in columns], - # Only use the column names from the schema, the types are determined by the - # physical types used in column based set, as they can differ from the - # mapping used in _hive_schema_to_arrow_schema. - names=[c[0] for c in description], - ) - return arrow_table, arrow_table.num_rows - - @staticmethod - def _convert_column_to_arrow_array(t_col): - """ - Return a pyarrow array from the values in a TColumn instance. - Note that ColumnBasedSet has no native support for complex types, so they will be converted - to strings server-side. - """ - field_name_to_arrow_type = { - "boolVal": pyarrow.bool_(), - "byteVal": pyarrow.int8(), - "i16Val": pyarrow.int16(), - "i32Val": pyarrow.int32(), - "i64Val": pyarrow.int64(), - "doubleVal": pyarrow.float64(), - "stringVal": pyarrow.string(), - "binaryVal": pyarrow.binary(), - } - for field in field_name_to_arrow_type.keys(): - wrapper = getattr(t_col, field) - if wrapper: - return ThriftBackend._create_arrow_array( - wrapper, field_name_to_arrow_type[field] - ) - - raise OperationalError("Empty TColumn instance {}".format(t_col)) - - @staticmethod - def _create_arrow_array(t_col_value_wrapper, arrow_type): - result = t_col_value_wrapper.values - nulls = t_col_value_wrapper.nulls # bitfield describing which values are null - assert isinstance(nulls, bytes) - - # The number of bits in nulls can be both larger or smaller than the number of - # elements in result, so take the minimum of both to iterate over. - length = min(len(result), len(nulls) * 8) - - for i in range(length): - if nulls[i >> 3] & ThriftBackend.BIT_MASKS[i & 0x7]: - result[i] = None - - return pyarrow.array(result, type=arrow_type) + return convert_decimals_in_arrow_table(arrow_table, description), num_rows def _get_metadata_resp(self, op_handle): req = ttypes.TGetResultSetMetadataReq(operationHandle=op_handle) @@ -752,6 +664,7 @@ def _results_message_to_execute_response(self, resp, operation_state): if t_result_set_metadata_resp.resultFormat not in [ ttypes.TSparkRowSetType.ARROW_BASED_SET, ttypes.TSparkRowSetType.COLUMN_BASED_SET, + ttypes.TSparkRowSetType.URL_BASED_SET, ]: raise OperationalError( "Expected results to be in Arrow or column based format, " @@ -783,13 +696,14 @@ def _results_message_to_execute_response(self, resp, operation_state): assert direct_results.resultSet.results.startRowOffset == 0 assert direct_results.resultSetMetadata - arrow_results, n_rows = self._create_arrow_table( - direct_results.resultSet.results, - lz4_compressed, - schema_bytes, - description, + arrow_queue_opt = ResultSetQueueFactory.build_queue( + row_set_type=t_result_set_metadata_resp.resultFormat, + t_row_set=direct_results.resultSet.results, + arrow_schema_bytes=schema_bytes, + max_download_threads=self.max_download_threads, + lz4_compressed=lz4_compressed, + description=description, ) - arrow_queue_opt = ArrowQueue(arrow_results, n_rows, 0) else: arrow_queue_opt = None return ExecuteResponse( @@ -843,7 +757,14 @@ def _check_direct_results_for_error(t_spark_direct_results): ) def execute_command( - self, operation, session_handle, max_rows, max_bytes, lz4_compression, cursor + self, + operation, + session_handle, + max_rows, + max_bytes, + lz4_compression, + cursor, + use_cloud_fetch=False, ): assert session_handle is not None @@ -864,7 +785,7 @@ def execute_command( ), canReadArrowResult=True, canDecompressLZ4Result=lz4_compression, - canDownloadResult=False, + canDownloadResult=use_cloud_fetch, confOverlay={ # We want to receive proper Timestamp arrow types. "spark.thriftserver.arrowBasedRowSet.timestampAsString": "false" @@ -993,6 +914,7 @@ def fetch_results( maxRows=max_rows, maxBytes=max_bytes, orientation=ttypes.TFetchOrientation.FETCH_NEXT, + includeResultSetMetadata=True, ) resp = self.make_request(self._client.FetchResults, req) @@ -1002,12 +924,17 @@ def fetch_results( expected_row_start_offset, resp.results.startRowOffset ) ) - arrow_results, n_rows = self._create_arrow_table( - resp.results, lz4_compressed, arrow_schema_bytes, description + + queue = ResultSetQueueFactory.build_queue( + row_set_type=resp.resultSetMetadata.resultFormat, + t_row_set=resp.results, + arrow_schema_bytes=arrow_schema_bytes, + max_download_threads=self.max_download_threads, + lz4_compressed=lz4_compressed, + description=description, ) - arrow_queue = ArrowQueue(arrow_results, n_rows) - return arrow_queue, resp.hasMoreRows + return queue, resp.hasMoreRows def close_command(self, op_handle): req = ttypes.TCloseOperationReq(operationHandle=op_handle) diff --git a/src/databricks/sql/utils.py b/src/databricks/sql/utils.py index ed558136..0aefc7a1 100644 --- a/src/databricks/sql/utils.py +++ b/src/databricks/sql/utils.py @@ -1,16 +1,94 @@ +from abc import ABC, abstractmethod from collections import namedtuple, OrderedDict from collections.abc import Iterable -import datetime, decimal +from decimal import Decimal +import datetime +import decimal from enum import Enum -from typing import Dict +import lz4.frame +from typing import Dict, List, Union, Any import pyarrow -from databricks.sql import exc +from databricks.sql import exc, OperationalError +from databricks.sql.cloudfetch.download_manager import ResultFileDownloadManager +from databricks.sql.thrift_api.TCLIService.ttypes import ( + TSparkArrowResultLink, + TSparkRowSetType, + TRowSet, +) + +BIT_MASKS = [1, 2, 4, 8, 16, 32, 64, 128] + + +class ResultSetQueue(ABC): + @abstractmethod + def next_n_rows(self, num_rows: int) -> pyarrow.Table: + pass + + @abstractmethod + def remaining_rows(self) -> pyarrow.Table: + pass + + +class ResultSetQueueFactory(ABC): + @staticmethod + def build_queue( + row_set_type: TSparkRowSetType, + t_row_set: TRowSet, + arrow_schema_bytes: bytes, + max_download_threads: int, + lz4_compressed: bool = True, + description: List[List[Any]] = None, + ) -> ResultSetQueue: + """ + Factory method to build a result set queue. + + Args: + row_set_type (enum): Row set type (Arrow, Column, or URL). + t_row_set (TRowSet): Result containing arrow batches, columns, or cloud fetch links. + arrow_schema_bytes (bytes): Bytes representing the arrow schema. + lz4_compressed (bool): Whether result data has been lz4 compressed. + description (List[List[Any]]): Hive table schema description. + max_download_threads (int): Maximum number of downloader thread pool threads. + + Returns: + ResultSetQueue + """ + if row_set_type == TSparkRowSetType.ARROW_BASED_SET: + arrow_table, n_valid_rows = convert_arrow_based_set_to_arrow_table( + t_row_set.arrowBatches, lz4_compressed, arrow_schema_bytes + ) + converted_arrow_table = convert_decimals_in_arrow_table( + arrow_table, description + ) + return ArrowQueue(converted_arrow_table, n_valid_rows) + elif row_set_type == TSparkRowSetType.COLUMN_BASED_SET: + arrow_table, n_valid_rows = convert_column_based_set_to_arrow_table( + t_row_set.columns, description + ) + converted_arrow_table = convert_decimals_in_arrow_table( + arrow_table, description + ) + return ArrowQueue(converted_arrow_table, n_valid_rows) + elif row_set_type == TSparkRowSetType.URL_BASED_SET: + return CloudFetchQueue( + arrow_schema_bytes, + start_row_offset=t_row_set.startRowOffset, + result_links=t_row_set.resultLinks, + lz4_compressed=lz4_compressed, + description=description, + max_download_threads=max_download_threads, + ) + else: + raise AssertionError("Row set type is not valid") -class ArrowQueue: +class ArrowQueue(ResultSetQueue): def __init__( - self, arrow_table: pyarrow.Table, n_valid_rows: int, start_row_index: int = 0 + self, + arrow_table: pyarrow.Table, + n_valid_rows: int, + start_row_index: int = 0, ): """ A queue-like wrapper over an Arrow table @@ -40,6 +118,119 @@ def remaining_rows(self) -> pyarrow.Table: return slice +class CloudFetchQueue(ResultSetQueue): + def __init__( + self, + schema_bytes, + max_download_threads: int, + start_row_offset: int = 0, + result_links: List[TSparkArrowResultLink] = None, + lz4_compressed: bool = True, + description: List[List[Any]] = None, + ): + """ + A queue-like wrapper over CloudFetch arrow batches. + + Attributes: + schema_bytes (bytes): Table schema in bytes. + max_download_threads (int): Maximum number of downloader thread pool threads. + start_row_offset (int): The offset of the first row of the cloud fetch links. + result_links (List[TSparkArrowResultLink]): Links containing the downloadable URL and metadata. + lz4_compressed (bool): Whether the files are lz4 compressed. + description (List[List[Any]]): Hive table schema description. + """ + self.schema_bytes = schema_bytes + self.max_download_threads = max_download_threads + self.start_row_index = start_row_offset + self.result_links = result_links + self.lz4_compressed = lz4_compressed + self.description = description + + self.download_manager = ResultFileDownloadManager( + self.max_download_threads, self.lz4_compressed + ) + self.download_manager.add_file_links(result_links) + + self.table = self._create_next_table() + self.table_row_index = 0 + + def next_n_rows(self, num_rows: int) -> pyarrow.Table: + """ + Get up to the next n rows of the cloud fetch Arrow dataframes. + + Args: + num_rows (int): Number of rows to retrieve. + + Returns: + pyarrow.Table + """ + if not self.table: + # Return empty pyarrow table to cause retry of fetch + return self._create_empty_table() + results = self.table.slice(0, 0) + while num_rows > 0 and self.table: + # Get remaining of num_rows or the rest of the current table, whichever is smaller + length = min(num_rows, self.table.num_rows - self.table_row_index) + table_slice = self.table.slice(self.table_row_index, length) + results = pyarrow.concat_tables([results, table_slice]) + self.table_row_index += table_slice.num_rows + + # Replace current table with the next table if we are at the end of the current table + if self.table_row_index == self.table.num_rows: + self.table = self._create_next_table() + self.table_row_index = 0 + num_rows -= table_slice.num_rows + return results + + def remaining_rows(self) -> pyarrow.Table: + """ + Get all remaining rows of the cloud fetch Arrow dataframes. + + Returns: + pyarrow.Table + """ + if not self.table: + # Return empty pyarrow table to cause retry of fetch + return self._create_empty_table() + results = self.table.slice(0, 0) + while self.table: + table_slice = self.table.slice( + self.table_row_index, self.table.num_rows - self.table_row_index + ) + results = pyarrow.concat_tables([results, table_slice]) + self.table_row_index += table_slice.num_rows + self.table = self._create_next_table() + self.table_row_index = 0 + return results + + def _create_next_table(self) -> Union[pyarrow.Table, None]: + # Create next table by retrieving the logical next downloaded file, or return None to signal end of queue + downloaded_file = self.download_manager.get_next_downloaded_file( + self.start_row_index + ) + if not downloaded_file: + # None signals no more Arrow tables can be built from the remaining handlers if any remain + return None + arrow_table = create_arrow_table_from_arrow_file( + downloaded_file.file_bytes, self.description + ) + + # The server rarely prepares the exact number of rows requested by the client in cloud fetch. + # Subsequently, we drop the extraneous rows in the last file if more rows are retrieved than requested + if arrow_table.num_rows > downloaded_file.row_count: + self.start_row_index += downloaded_file.row_count + return arrow_table.slice(0, downloaded_file.row_count) + + # At this point, whether the file has extraneous rows or not, the arrow table should have the correct num rows + assert downloaded_file.row_count == arrow_table.num_rows + self.start_row_index += arrow_table.num_rows + return arrow_table + + def _create_empty_table(self) -> pyarrow.Table: + # Create a 0-row table with just the schema bytes + return create_arrow_table_from_arrow_file(self.schema_bytes, self.description) + + ExecuteResponse = namedtuple( "ExecuteResponse", "status has_been_closed_server_side has_more_rows description lz4_compressed is_staging_operation " @@ -183,3 +374,99 @@ def escape_item(self, item): def inject_parameters(operation: str, parameters: Dict[str, str]): return operation % parameters + + +def create_arrow_table_from_arrow_file(file_bytes: bytes, description) -> pyarrow.Table: + arrow_table = convert_arrow_based_file_to_arrow_table(file_bytes) + return convert_decimals_in_arrow_table(arrow_table, description) + + +def convert_arrow_based_file_to_arrow_table(file_bytes: bytes): + try: + return pyarrow.ipc.open_stream(file_bytes).read_all() + except Exception as e: + raise RuntimeError("Failure to convert arrow based file to arrow table", e) + + +def convert_arrow_based_set_to_arrow_table(arrow_batches, lz4_compressed, schema_bytes): + ba = bytearray() + ba += schema_bytes + n_rows = 0 + for arrow_batch in arrow_batches: + n_rows += arrow_batch.rowCount + ba += ( + lz4.frame.decompress(arrow_batch.batch) + if lz4_compressed + else arrow_batch.batch + ) + arrow_table = pyarrow.ipc.open_stream(ba).read_all() + return arrow_table, n_rows + + +def convert_decimals_in_arrow_table(table, description) -> pyarrow.Table: + for (i, col) in enumerate(table.itercolumns()): + if description[i][1] == "decimal": + decimal_col = col.to_pandas().apply( + lambda v: v if v is None else Decimal(v) + ) + precision, scale = description[i][4], description[i][5] + assert scale is not None + assert precision is not None + # Spark limits decimal to a maximum scale of 38, + # so 128 is guaranteed to be big enough + dtype = pyarrow.decimal128(precision, scale) + col_data = pyarrow.array(decimal_col, type=dtype) + field = table.field(i).with_type(dtype) + table = table.set_column(i, field, col_data) + return table + + +def convert_column_based_set_to_arrow_table(columns, description): + arrow_table = pyarrow.Table.from_arrays( + [_convert_column_to_arrow_array(c) for c in columns], + # Only use the column names from the schema, the types are determined by the + # physical types used in column based set, as they can differ from the + # mapping used in _hive_schema_to_arrow_schema. + names=[c[0] for c in description], + ) + return arrow_table, arrow_table.num_rows + + +def _convert_column_to_arrow_array(t_col): + """ + Return a pyarrow array from the values in a TColumn instance. + Note that ColumnBasedSet has no native support for complex types, so they will be converted + to strings server-side. + """ + field_name_to_arrow_type = { + "boolVal": pyarrow.bool_(), + "byteVal": pyarrow.int8(), + "i16Val": pyarrow.int16(), + "i32Val": pyarrow.int32(), + "i64Val": pyarrow.int64(), + "doubleVal": pyarrow.float64(), + "stringVal": pyarrow.string(), + "binaryVal": pyarrow.binary(), + } + for field in field_name_to_arrow_type.keys(): + wrapper = getattr(t_col, field) + if wrapper: + return _create_arrow_array(wrapper, field_name_to_arrow_type[field]) + + raise OperationalError("Empty TColumn instance {}".format(t_col)) + + +def _create_arrow_array(t_col_value_wrapper, arrow_type): + result = t_col_value_wrapper.values + nulls = t_col_value_wrapper.nulls # bitfield describing which values are null + assert isinstance(nulls, bytes) + + # The number of bits in nulls can be both larger or smaller than the number of + # elements in result, so take the minimum of both to iterate over. + length = min(len(result), len(nulls) * 8) + + for i in range(length): + if nulls[i >> 3] & BIT_MASKS[i & 0x7]: + result[i] = None + + return pyarrow.array(result, type=arrow_type) diff --git a/tests/unit/test_cloud_fetch_queue.py b/tests/unit/test_cloud_fetch_queue.py new file mode 100644 index 00000000..e5611ce6 --- /dev/null +++ b/tests/unit/test_cloud_fetch_queue.py @@ -0,0 +1,231 @@ +import pyarrow +import unittest +from unittest.mock import MagicMock, patch + +from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink +import databricks.sql.utils as utils + + +class CloudFetchQueueSuite(unittest.TestCase): + + def create_result_link( + self, + file_link: str = "fileLink", + start_row_offset: int = 0, + row_count: int = 8000, + bytes_num: int = 20971520 + ): + return TSparkArrowResultLink(file_link, None, start_row_offset, row_count, bytes_num) + + def create_result_links(self, num_files: int, start_row_offset: int = 0): + result_links = [] + for i in range(num_files): + file_link = "fileLink_" + str(i) + result_link = self.create_result_link(file_link=file_link, start_row_offset=start_row_offset) + result_links.append(result_link) + start_row_offset += result_link.rowCount + return result_links + + @staticmethod + def make_arrow_table(): + batch = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]] + n_cols = len(batch[0]) if batch else 0 + schema = pyarrow.schema({"col%s" % i: pyarrow.uint32() for i in range(n_cols)}) + cols = [[batch[row][col] for row in range(len(batch))] for col in range(n_cols)] + return pyarrow.Table.from_pydict(dict(zip(schema.names, cols)), schema=schema) + + @staticmethod + def get_schema_bytes(): + schema = pyarrow.schema({"col%s" % i: pyarrow.uint32() for i in range(4)}) + sink = pyarrow.BufferOutputStream() + writer = pyarrow.ipc.RecordBatchStreamWriter(sink, schema) + writer.close() + return sink.getvalue().to_pybytes() + + + @patch("databricks.sql.utils.CloudFetchQueue._create_next_table", return_value=[None, None]) + def test_initializer_adds_links(self, mock_create_next_table): + schema_bytes = MagicMock() + result_links = self.create_result_links(10) + queue = utils.CloudFetchQueue(schema_bytes, result_links=result_links, max_download_threads=10) + + assert len(queue.download_manager.download_handlers) == 10 + mock_create_next_table.assert_called() + + def test_initializer_no_links_to_add(self): + schema_bytes = MagicMock() + result_links = [] + queue = utils.CloudFetchQueue(schema_bytes, result_links=result_links, max_download_threads=10) + + assert len(queue.download_manager.download_handlers) == 0 + assert queue.table is None + + @patch("databricks.sql.cloudfetch.download_manager.ResultFileDownloadManager.get_next_downloaded_file", return_value=None) + def test_create_next_table_no_download(self, mock_get_next_downloaded_file): + queue = utils.CloudFetchQueue(MagicMock(), result_links=[], max_download_threads=10) + + assert queue._create_next_table() is None + assert mock_get_next_downloaded_file.called_with(0) + + @patch("databricks.sql.utils.create_arrow_table_from_arrow_file") + @patch("databricks.sql.cloudfetch.download_manager.ResultFileDownloadManager.get_next_downloaded_file", + return_value=MagicMock(file_bytes=b"1234567890", row_count=4)) + def test_initializer_create_next_table_success(self, mock_get_next_downloaded_file, mock_create_arrow_table): + mock_create_arrow_table.return_value = self.make_arrow_table() + schema_bytes, description = MagicMock(), MagicMock() + queue = utils.CloudFetchQueue(schema_bytes, result_links=[], description=description, max_download_threads=10) + expected_result = self.make_arrow_table() + + assert mock_create_arrow_table.called_with(b"1234567890", True, schema_bytes, description) + assert mock_get_next_downloaded_file.called_with(0) + assert queue.table == expected_result + assert queue.table.num_rows == 4 + assert queue.table_row_index == 0 + assert queue.start_row_index == 4 + + table = queue._create_next_table() + assert table == expected_result + assert table.num_rows == 4 + assert queue.start_row_index == 8 + + @patch("databricks.sql.utils.CloudFetchQueue._create_next_table") + def test_next_n_rows_0_rows(self, mock_create_next_table): + mock_create_next_table.return_value = self.make_arrow_table() + schema_bytes, description = MagicMock(), MagicMock() + queue = utils.CloudFetchQueue(schema_bytes, result_links=[], description=description, max_download_threads=10) + assert queue.table == self.make_arrow_table() + assert queue.table.num_rows == 4 + assert queue.table_row_index == 0 + + result = queue.next_n_rows(0) + assert result.num_rows == 0 + assert queue.table_row_index == 0 + assert result == self.make_arrow_table()[0:0] + + @patch("databricks.sql.utils.CloudFetchQueue._create_next_table") + def test_next_n_rows_partial_table(self, mock_create_next_table): + mock_create_next_table.return_value = self.make_arrow_table() + schema_bytes, description = MagicMock(), MagicMock() + queue = utils.CloudFetchQueue(schema_bytes, result_links=[], description=description, max_download_threads=10) + assert queue.table == self.make_arrow_table() + assert queue.table.num_rows == 4 + assert queue.table_row_index == 0 + + result = queue.next_n_rows(3) + assert result.num_rows == 3 + assert queue.table_row_index == 3 + assert result == self.make_arrow_table()[:3] + + @patch("databricks.sql.utils.CloudFetchQueue._create_next_table") + def test_next_n_rows_more_than_one_table(self, mock_create_next_table): + mock_create_next_table.return_value = self.make_arrow_table() + schema_bytes, description = MagicMock(), MagicMock() + queue = utils.CloudFetchQueue(schema_bytes, result_links=[], description=description, max_download_threads=10) + assert queue.table == self.make_arrow_table() + assert queue.table.num_rows == 4 + assert queue.table_row_index == 0 + + result = queue.next_n_rows(7) + assert result.num_rows == 7 + assert queue.table_row_index == 3 + assert result == pyarrow.concat_tables([self.make_arrow_table(), self.make_arrow_table()])[:7] + + @patch("databricks.sql.utils.CloudFetchQueue._create_next_table") + def test_next_n_rows_more_than_one_table(self, mock_create_next_table): + mock_create_next_table.return_value = self.make_arrow_table() + schema_bytes, description = MagicMock(), MagicMock() + queue = utils.CloudFetchQueue(schema_bytes, result_links=[], description=description, max_download_threads=10) + assert queue.table == self.make_arrow_table() + assert queue.table.num_rows == 4 + assert queue.table_row_index == 0 + + result = queue.next_n_rows(7) + assert result.num_rows == 7 + assert queue.table_row_index == 3 + assert result == pyarrow.concat_tables([self.make_arrow_table(), self.make_arrow_table()])[:7] + + @patch("databricks.sql.utils.CloudFetchQueue._create_next_table") + def test_next_n_rows_only_one_table_returned(self, mock_create_next_table): + mock_create_next_table.side_effect = [self.make_arrow_table(), None] + schema_bytes, description = MagicMock(), MagicMock() + queue = utils.CloudFetchQueue(schema_bytes, result_links=[], description=description, max_download_threads=10) + assert queue.table == self.make_arrow_table() + assert queue.table.num_rows == 4 + assert queue.table_row_index == 0 + + result = queue.next_n_rows(7) + assert result.num_rows == 4 + assert result == self.make_arrow_table() + + @patch("databricks.sql.utils.CloudFetchQueue._create_next_table", return_value=None) + def test_next_n_rows_empty_table(self, mock_create_next_table): + schema_bytes = self.get_schema_bytes() + description = MagicMock() + queue = utils.CloudFetchQueue(schema_bytes, result_links=[], description=description, max_download_threads=10) + assert queue.table is None + + result = queue.next_n_rows(100) + assert result == pyarrow.ipc.open_stream(bytearray(schema_bytes)).read_all() + + @patch("databricks.sql.utils.CloudFetchQueue._create_next_table") + def test_remaining_rows_empty_table_fully_returned(self, mock_create_next_table): + mock_create_next_table.side_effect = [self.make_arrow_table(), None, 0] + schema_bytes, description = MagicMock(), MagicMock() + queue = utils.CloudFetchQueue(schema_bytes, result_links=[], description=description, max_download_threads=10) + assert queue.table == self.make_arrow_table() + assert queue.table.num_rows == 4 + queue.table_row_index = 4 + + result = queue.remaining_rows() + assert result.num_rows == 0 + assert result == self.make_arrow_table()[0:0] + + @patch("databricks.sql.utils.CloudFetchQueue._create_next_table") + def test_remaining_rows_partial_table_fully_returned(self, mock_create_next_table): + mock_create_next_table.side_effect = [self.make_arrow_table(), None] + schema_bytes, description = MagicMock(), MagicMock() + queue = utils.CloudFetchQueue(schema_bytes, result_links=[], description=description, max_download_threads=10) + assert queue.table == self.make_arrow_table() + assert queue.table.num_rows == 4 + queue.table_row_index = 2 + + result = queue.remaining_rows() + assert result.num_rows == 2 + assert result == self.make_arrow_table()[2:] + + @patch("databricks.sql.utils.CloudFetchQueue._create_next_table") + def test_remaining_rows_one_table_fully_returned(self, mock_create_next_table): + mock_create_next_table.side_effect = [self.make_arrow_table(), None] + schema_bytes, description = MagicMock(), MagicMock() + queue = utils.CloudFetchQueue(schema_bytes, result_links=[], description=description, max_download_threads=10) + assert queue.table == self.make_arrow_table() + assert queue.table.num_rows == 4 + assert queue.table_row_index == 0 + + result = queue.remaining_rows() + assert result.num_rows == 4 + assert result == self.make_arrow_table() + + @patch("databricks.sql.utils.CloudFetchQueue._create_next_table") + def test_remaining_rows_multiple_tables_fully_returned(self, mock_create_next_table): + mock_create_next_table.side_effect = [self.make_arrow_table(), self.make_arrow_table(), None] + schema_bytes, description = MagicMock(), MagicMock() + queue = utils.CloudFetchQueue(schema_bytes, result_links=[], description=description, max_download_threads=10) + assert queue.table == self.make_arrow_table() + assert queue.table.num_rows == 4 + queue.table_row_index = 3 + + result = queue.remaining_rows() + assert mock_create_next_table.call_count == 3 + assert result.num_rows == 5 + assert result == pyarrow.concat_tables([self.make_arrow_table(), self.make_arrow_table()])[3:] + + @patch("databricks.sql.utils.CloudFetchQueue._create_next_table", return_value=None) + def test_remaining_rows_empty_table(self, mock_create_next_table): + schema_bytes = self.get_schema_bytes() + description = MagicMock() + queue = utils.CloudFetchQueue(schema_bytes, result_links=[], description=description, max_download_threads=10) + assert queue.table is None + + result = queue.remaining_rows() + assert result == pyarrow.ipc.open_stream(bytearray(schema_bytes)).read_all() diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index 7ef0fa2c..0a18c39a 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -6,9 +6,9 @@ from ssl import CERT_NONE, CERT_REQUIRED import pyarrow -import urllib3 import databricks.sql +from databricks.sql import utils from databricks.sql.thrift_api.TCLIService import ttypes from databricks.sql import * from databricks.sql.auth.authenticators import AuthProvider @@ -327,7 +327,8 @@ def test_handle_execute_response_checks_operation_state_in_direct_results(self): thrift_backend._handle_execute_response(t_execute_resp, Mock()) self.assertIn("some information about the error", str(cm.exception)) - def test_handle_execute_response_sets_compression_in_direct_results(self): + @patch("databricks.sql.utils.ResultSetQueueFactory.build_queue", return_value=Mock()) + def test_handle_execute_response_sets_compression_in_direct_results(self, build_queue): for resp_type in self.execute_response_types: lz4Compressed=Mock() resultSet=MagicMock() @@ -589,9 +590,10 @@ def test_fall_back_to_hive_schema_if_no_arrow_schema(self, tcli_service_class): self.assertEqual(hive_schema_mock, thrift_backend._hive_schema_to_arrow_schema.call_args[0][0]) + @patch("databricks.sql.utils.ResultSetQueueFactory.build_queue", return_value=Mock()) @patch("databricks.sql.thrift_backend.TCLIService.Client") def test_handle_execute_response_reads_has_more_rows_in_direct_results( - self, tcli_service_class): + self, tcli_service_class, build_queue): for has_more_rows, resp_type in itertools.product([True, False], self.execute_response_types): with self.subTest(has_more_rows=has_more_rows, resp_type=resp_type): @@ -622,9 +624,10 @@ def test_handle_execute_response_reads_has_more_rows_in_direct_results( self.assertEqual(has_more_rows, execute_response.has_more_rows) + @patch("databricks.sql.utils.ResultSetQueueFactory.build_queue", return_value=Mock()) @patch("databricks.sql.thrift_backend.TCLIService.Client") def test_handle_execute_response_reads_has_more_rows_in_result_response( - self, tcli_service_class): + self, tcli_service_class, build_queue): for has_more_rows, resp_type in itertools.product([True, False], self.execute_response_types): with self.subTest(has_more_rows=has_more_rows, resp_type=resp_type): @@ -641,6 +644,9 @@ def test_handle_execute_response_reads_has_more_rows_in_result_response( status=self.okay_status, hasMoreRows=has_more_rows, results=results_mock, + resultSetMetadata=ttypes.TGetResultSetMetadataResp( + resultFormat=ttypes.TSparkRowSetType.ARROW_BASED_SET + ) ) operation_status_resp = ttypes.TGetOperationStatusResp( @@ -677,7 +683,12 @@ def test_arrow_batches_row_count_are_respected(self, tcli_service_class): rows=[], arrowBatches=[ ttypes.TSparkArrowBatch(batch=bytearray(), rowCount=15) for _ in range(10) - ])) + ] + ), + resultSetMetadata=ttypes.TGetResultSetMetadataResp( + resultFormat=ttypes.TSparkRowSetType.ARROW_BASED_SET + ) + ) tcli_service_instance.FetchResults.return_value = t_fetch_results_resp schema = pyarrow.schema([ pyarrow.field("column1", pyarrow.int32()), @@ -875,8 +886,8 @@ def test_create_arrow_table_raises_error_for_unsupported_type(self): with self.assertRaises(OperationalError): thrift_backend._create_arrow_table(t_row_set, Mock(), None, Mock()) - @patch.object(ThriftBackend, "_convert_arrow_based_set_to_arrow_table") - @patch.object(ThriftBackend, "_convert_column_based_set_to_arrow_table") + @patch("databricks.sql.thrift_backend.convert_arrow_based_set_to_arrow_table") + @patch("databricks.sql.thrift_backend.convert_column_based_set_to_arrow_table") def test_create_arrow_table_calls_correct_conversion_method(self, convert_col_mock, convert_arrow_mock): thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) @@ -910,12 +921,11 @@ def test_convert_arrow_based_set_to_arrow_table(self, open_stream_mock, lz4_deco ]).serialize().to_pybytes() arrow_batches = [ttypes.TSparkArrowBatch(batch=bytearray('Testing','utf-8'), rowCount=1) for _ in range(10)] - thrift_backend._convert_arrow_based_set_to_arrow_table(arrow_batches, False, schema) + utils.convert_arrow_based_set_to_arrow_table(arrow_batches, False, schema) lz4_decompress_mock.assert_not_called() - thrift_backend._convert_arrow_based_set_to_arrow_table(arrow_batches, True, schema) + utils.convert_arrow_based_set_to_arrow_table(arrow_batches, True, schema) lz4_decompress_mock.assert_called() - def test_convert_column_based_set_to_arrow_table_without_nulls(self): # Deliberately duplicate the column name to check that dups work @@ -931,7 +941,7 @@ def test_convert_column_based_set_to_arrow_table_without_nulls(self): binaryVal=ttypes.TBinaryColumn(values=[b'\x11', b'\x22', b'\x33'], nulls=bytes(1))) ] - arrow_table, n_rows = ThriftBackend._convert_column_based_set_to_arrow_table( + arrow_table, n_rows = utils.convert_column_based_set_to_arrow_table( t_cols, description) self.assertEqual(n_rows, 3) @@ -967,7 +977,7 @@ def test_convert_column_based_set_to_arrow_table_with_nulls(self): values=[b'\x11', b'\x22', b'\x33'], nulls=bytes([3]))) ] - arrow_table, n_rows = ThriftBackend._convert_column_based_set_to_arrow_table( + arrow_table, n_rows = utils.convert_column_based_set_to_arrow_table( t_cols, description) self.assertEqual(n_rows, 3) @@ -990,7 +1000,7 @@ def test_convert_column_based_set_to_arrow_table_uses_types_from_col_set(self): binaryVal=ttypes.TBinaryColumn(values=[b'\x11', b'\x22', b'\x33'], nulls=bytes(1))) ] - arrow_table, n_rows = ThriftBackend._convert_column_based_set_to_arrow_table( + arrow_table, n_rows = utils.convert_column_based_set_to_arrow_table( t_cols, description) self.assertEqual(n_rows, 3) @@ -1094,7 +1104,7 @@ def test_make_request_will_retry_GetOperationStatus( @patch("databricks.sql.thrift_backend._retry_policy", new_callable=retry_policy_factory) def test_make_request_will_retry_GetOperationStatus_for_http_error( self, mock_retry_policy, mock_gos): - + import urllib3.exceptions mock_gos.side_effect = urllib3.exceptions.HTTPError("Read timed out") @@ -1133,7 +1143,7 @@ def test_make_request_will_retry_GetOperationStatus_for_http_error( self.assertEqual(NoRetryReason.OUT_OF_ATTEMPTS.value, cm.exception.context["no-retry-reason"]) self.assertEqual(f'{EXPECTED_RETRIES}/{EXPECTED_RETRIES}', cm.exception.context["attempt"]) - + @patch("thrift.transport.THttpClient.THttpClient") @@ -1252,7 +1262,7 @@ def test_arrow_decimal_conversion(self): table, description = self.make_table_and_desc(height, n_decimal_cols, width, precision, scale, int_constant, decimal_constant) - decimal_converted_table = ThriftBackend._convert_decimals_in_arrow_table( + decimal_converted_table = utils.convert_decimals_in_arrow_table( table, description) for i in range(width): From 759401cc46b69e77a96e58d117dd0bf2934ccc1b Mon Sep 17 00:00:00 2001 From: mattdeekay <11141331+mattdeekay@users.noreply.github.com> Date: Fri, 7 Jul 2023 10:47:23 -0700 Subject: [PATCH 16/40] Cloud Fetch e2e tests (#154) * Cloud Fetch e2e tests Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Test case works for e2-dogfood shared unity catalog Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Moving test to LargeQueriesSuite and setting catalog to hive_metastore Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Align default value of buffer_size_bytes in driver tests Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> * Adding comment to specify what's needed to run successfully Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> --------- Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> --- tests/e2e/test_driver.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/tests/e2e/test_driver.py b/tests/e2e/test_driver.py index 26b7d186..c8713bf0 100644 --- a/tests/e2e/test_driver.py +++ b/tests/e2e/test_driver.py @@ -1,3 +1,4 @@ +import itertools from contextlib import contextmanager from collections import OrderedDict import datetime @@ -52,6 +53,7 @@ def __init__(self, method_name): # If running in local mode, just use environment variables for params. self.arguments = os.environ if get_args_from_env else {} self.arraysize = 1000 + self.buffer_size_bytes = 104857600 def connection_params(self, arguments): params = { @@ -84,7 +86,7 @@ def connection(self, extra_params=()): @contextmanager def cursor(self, extra_params=()): with self.connection(extra_params) as conn: - cursor = conn.cursor(arraysize=self.arraysize) + cursor = conn.cursor(arraysize=self.arraysize, buffer_size_bytes=self.buffer_size_bytes) try: yield cursor finally: @@ -104,6 +106,36 @@ def get_some_rows(self, cursor, fetchmany_size): else: return None + @skipUnless(pysql_supports_arrow(), 'needs arrow support') + def test_cloud_fetch(self): + # This test can take several minutes to run + limits = [100000, 300000] + threads = [10, 25] + self.arraysize = 100000 + # This test requires a large table with many rows to properly initiate cloud fetch. + # e2-dogfood host > hive_metastore catalog > main schema has such a table called store_sales. + # If this table is deleted or this test is run on a different host, a different table may need to be used. + base_query = "SELECT * FROM store_sales WHERE ss_sold_date_sk = 2452234 " + for num_limit, num_threads, lz4_compression in itertools.product(limits, threads, [True, False]): + with self.subTest(num_limit=num_limit, num_threads=num_threads, lz4_compression=lz4_compression): + cf_result, noop_result = None, None + query = base_query + "LIMIT " + str(num_limit) + with self.cursor({ + "use_cloud_fetch": True, + "max_download_threads": num_threads, + "catalog": "hive_metastore" + }) as cursor: + cursor.execute(query) + cf_result = cursor.fetchall() + with self.cursor({ + "catalog": "hive_metastore" + }) as cursor: + cursor.execute(query) + noop_result = cursor.fetchall() + assert len(cf_result) == len(noop_result) + for i in range(len(cf_result)): + assert cf_result[i] == noop_result[i] + # Exclude Retry tests because they require specific setups, and LargeQueries too slow for core # tests From 0e5c2447f7514c27f2cbf6b78e6d6b8124c3e03e Mon Sep 17 00:00:00 2001 From: mattdeekay <11141331+mattdeekay@users.noreply.github.com> Date: Mon, 10 Jul 2023 15:24:16 -0700 Subject: [PATCH 17/40] Update changelog for cloudfetch (#172) Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com> --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9bff868..0542eb82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## 2.7.x (Unreleased) +- Add support for Cloud Fetch + ## 2.7.0 (2023-06-26) - Fix: connector raised exception when calling close() on a closed Thrift session From f45280da2cfa50d1883de576e0251df12968219b Mon Sep 17 00:00:00 2001 From: Jesse Date: Tue, 11 Jul 2023 17:43:10 -0500 Subject: [PATCH 18/40] Improve sqlalchemy backward compatibility with 1.3.24 (#173) Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 1 + examples/sqlalchemy.py | 37 +++-- src/databricks/sqlalchemy/dialect/__init__.py | 38 ++++-- tests/e2e/sqlalchemy/test_basic.py | 128 +++++++++++++++--- 4 files changed, 166 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0542eb82..6e9d2963 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## 2.7.x (Unreleased) - Add support for Cloud Fetch +- Fix: Revised SQLAlchemy dialect and examples for compatibility with SQLAlchemy==1.3.x ## 2.7.0 (2023-06-26) diff --git a/examples/sqlalchemy.py b/examples/sqlalchemy.py index 2c0b693a..35160658 100644 --- a/examples/sqlalchemy.py +++ b/examples/sqlalchemy.py @@ -42,9 +42,15 @@ """ import os -from sqlalchemy.orm import declarative_base, Session +import sqlalchemy +from sqlalchemy.orm import Session from sqlalchemy import Column, String, Integer, BOOLEAN, create_engine, select +try: + from sqlalchemy.orm import declarative_base +except ImportError: + from sqlalchemy.ext.declarative import declarative_base + host = os.getenv("DATABRICKS_SERVER_HOSTNAME") http_path = os.getenv("DATABRICKS_HTTP_PATH") access_token = os.getenv("DATABRICKS_TOKEN") @@ -59,10 +65,20 @@ "_user_agent_entry": "PySQL Example Script", } -engine = create_engine( - f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}", - connect_args=extra_connect_args, -) +if sqlalchemy.__version__.startswith("1.3"): + # SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string + # Pass these in as connect_args instead + + conn_string = f"databricks://token:{access_token}@{host}" + connect_args = dict(catalog=catalog, schema=schema, http_path=http_path) + all_connect_args = {**extra_connect_args, **connect_args} + engine = create_engine(conn_string, connect_args=all_connect_args) +else: + engine = create_engine( + f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}", + connect_args=extra_connect_args, + ) + session = Session(bind=engine) base = declarative_base(bind=engine) @@ -73,7 +89,7 @@ class SampleObject(base): name = Column(String(255), primary_key=True) episodes = Column(Integer) - some_bool = Column(BOOLEAN) + some_bool = Column(BOOLEAN(create_constraint=False)) base.metadata.create_all() @@ -86,9 +102,14 @@ class SampleObject(base): session.commit() -stmt = select(SampleObject).where(SampleObject.name.in_(["Bim Adewunmi", "Miki Meek"])) +# SQLAlchemy 1.3 has slightly different methods +if sqlalchemy.__version__.startswith("1.3"): + stmt = select([SampleObject]).where(SampleObject.name.in_(["Bim Adewunmi", "Miki Meek"])) + output = [i for i in session.execute(stmt)] +else: + stmt = select(SampleObject).where(SampleObject.name.in_(["Bim Adewunmi", "Miki Meek"])) + output = [i for i in session.scalars(stmt)] -output = [i for i in session.scalars(stmt)] assert len(output) == 2 base.metadata.drop_all() diff --git a/src/databricks/sqlalchemy/dialect/__init__.py b/src/databricks/sqlalchemy/dialect/__init__.py index da508bb0..0f96c2bc 100644 --- a/src/databricks/sqlalchemy/dialect/__init__.py +++ b/src/databricks/sqlalchemy/dialect/__init__.py @@ -4,9 +4,10 @@ import decimal, re, datetime from dateutil.parser import parse +import sqlalchemy from sqlalchemy import types, processors, event from sqlalchemy.engine import default, Engine -from sqlalchemy.exc import DatabaseError +from sqlalchemy.exc import DatabaseError, SQLAlchemyError from sqlalchemy.engine import reflection from databricks import sql @@ -153,9 +154,7 @@ def get_columns(self, connection, table_name, schema=None, **kwargs): "date": DatabricksDate, } - with self.get_driver_connection( - connection - )._dbapi_connection.dbapi_connection.cursor() as cur: + with self.get_connection_cursor(connection) as cur: resp = cur.columns( catalog_name=self.catalog, schema_name=schema or self.schema, @@ -244,9 +243,7 @@ def get_indexes(self, connection, table_name, schema=None, **kw): def get_table_names(self, connection, schema=None, **kwargs): TABLE_NAME = 1 - with self.get_driver_connection( - connection - )._dbapi_connection.dbapi_connection.cursor() as cur: + with self.get_connection_cursor(connection) as cur: sql_str = "SHOW TABLES FROM {}".format( ".".join([self.catalog, schema or self.schema]) ) @@ -257,9 +254,7 @@ def get_table_names(self, connection, schema=None, **kwargs): def get_view_names(self, connection, schema=None, **kwargs): VIEW_NAME = 1 - with self.get_driver_connection( - connection - )._dbapi_connection.dbapi_connection.cursor() as cur: + with self.get_connection_cursor(connection) as cur: sql_str = "SHOW VIEWS FROM {}".format( ".".join([self.catalog, schema or self.schema]) ) @@ -292,6 +287,19 @@ def has_table(self, connection, table_name, schema=None, **kwargs) -> bool: else: raise e + def get_connection_cursor(self, connection): + """Added for backwards compatibility with 1.3.x""" + if hasattr(connection, "_dbapi_connection"): + return connection._dbapi_connection.dbapi_connection.cursor() + elif hasattr(connection, "raw_connection"): + return connection.raw_connection().cursor() + elif hasattr(connection, "connection"): + return connection.connection.cursor() + + raise SQLAlchemyError( + "Databricks dialect can't obtain a cursor context manager from the dbapi" + ) + @reflection.cache def get_schema_names(self, connection, **kw): # Equivalent to SHOW DATABASES @@ -314,3 +322,13 @@ def receive_do_connect(dialect, conn_rec, cargs, cparams): new_user_agent = "sqlalchemy" cparams["_user_agent_entry"] = new_user_agent + + if sqlalchemy.__version__.startswith("1.3"): + # SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string + # These should be passed in as connect_args when building the Engine + + if "schema" in cparams: + dialect.schema = cparams["schema"] + + if "catalog" in cparams: + dialect.catalog = cparams["catalog"] diff --git a/tests/e2e/sqlalchemy/test_basic.py b/tests/e2e/sqlalchemy/test_basic.py index 4f4df91b..89ceb07e 100644 --- a/tests/e2e/sqlalchemy/test_basic.py +++ b/tests/e2e/sqlalchemy/test_basic.py @@ -2,29 +2,81 @@ import pytest from unittest import skipIf from sqlalchemy import create_engine, select, insert, Column, MetaData, Table -from sqlalchemy.orm import declarative_base, Session +from sqlalchemy.orm import Session from sqlalchemy.types import SMALLINT, Integer, BOOLEAN, String, DECIMAL, Date +from sqlalchemy.engine import Engine + +from typing import Tuple + +try: + from sqlalchemy.orm import declarative_base +except ImportError: + from sqlalchemy.ext.declarative import declarative_base USER_AGENT_TOKEN = "PySQL e2e Tests" -@pytest.fixture -def db_engine(): +def sqlalchemy_1_3(): + import sqlalchemy + + return sqlalchemy.__version__.startswith("1.3") + + +def version_agnostic_select(object_to_select, *args, **kwargs): + """ + SQLAlchemy==1.3.x requires arguments to select() to be a Python list + + https://docs.sqlalchemy.org/en/20/changelog/migration_14.html#orm-query-is-internally-unified-with-select-update-delete-2-0-style-execution-available + """ + + if sqlalchemy_1_3(): + return select([object_to_select], *args, **kwargs) + else: + return select(object_to_select, *args, **kwargs) + + +def version_agnostic_connect_arguments(catalog=None, schema=None) -> Tuple[str, dict]: HOST = os.environ.get("host") HTTP_PATH = os.environ.get("http_path") ACCESS_TOKEN = os.environ.get("access_token") - CATALOG = os.environ.get("catalog") - SCHEMA = os.environ.get("schema") + CATALOG = catalog or os.environ.get("catalog") + SCHEMA = schema or os.environ.get("schema") + + ua_connect_args = {"_user_agent_entry": USER_AGENT_TOKEN} + + if sqlalchemy_1_3(): + conn_string = f"databricks://token:{ACCESS_TOKEN}@{HOST}" + connect_args = { + **ua_connect_args, + "http_path": HTTP_PATH, + "server_hostname": HOST, + "catalog": CATALOG, + "schema": SCHEMA, + } + + return conn_string, connect_args + else: + return ( + f"databricks://token:{ACCESS_TOKEN}@{HOST}?http_path={HTTP_PATH}&catalog={CATALOG}&schema={SCHEMA}", + ua_connect_args, + ) + + +@pytest.fixture +def db_engine() -> Engine: + conn_string, connect_args = version_agnostic_connect_arguments() + return create_engine(conn_string, connect_args=connect_args) - connect_args = {"_user_agent_entry": USER_AGENT_TOKEN} - engine = create_engine( - f"databricks://token:{ACCESS_TOKEN}@{HOST}?http_path={HTTP_PATH}&catalog={CATALOG}&schema={SCHEMA}", - connect_args=connect_args, +@pytest.fixture +def samples_engine() -> Engine: + + conn_string, connect_args = version_agnostic_connect_arguments( + catalog="samples", schema="nyctaxi" ) - return engine + return create_engine(conn_string, connect_args=connect_args) @pytest.fixture() @@ -62,6 +114,7 @@ def test_connect_args(db_engine): assert expected in user_agent +@pytest.mark.skipif(sqlalchemy_1_3(), reason="Pandas requires SQLAlchemy >= 1.4") def test_pandas_upload(db_engine, metadata_obj): import pandas as pd @@ -86,7 +139,7 @@ def test_pandas_upload(db_engine, metadata_obj): db_engine.execute("DROP TABLE mock_data") -def test_create_table_not_null(db_engine, metadata_obj): +def test_create_table_not_null(db_engine, metadata_obj: MetaData): table_name = "PySQLTest_{}".format(datetime.datetime.utcnow().strftime("%s")) @@ -95,7 +148,7 @@ def test_create_table_not_null(db_engine, metadata_obj): metadata_obj, Column("name", String(255)), Column("episodes", Integer), - Column("some_bool", BOOLEAN, nullable=False), + Column("some_bool", BOOLEAN(create_constraint=False), nullable=False), ) metadata_obj.create_all() @@ -135,7 +188,7 @@ def test_bulk_insert_with_core(db_engine, metadata_obj, session): metadata_obj.create_all() db_engine.execute(insert(SampleTable).values(rows)) - rows = db_engine.execute(select(SampleTable)).fetchall() + rows = db_engine.execute(version_agnostic_select(SampleTable)).fetchall() assert len(rows) == num_to_insert @@ -148,7 +201,7 @@ def test_create_insert_drop_table_core(base, db_engine, metadata_obj: MetaData): metadata_obj, Column("name", String(255)), Column("episodes", Integer), - Column("some_bool", BOOLEAN), + Column("some_bool", BOOLEAN(create_constraint=False)), Column("dollars", DECIMAL(10, 2)), ) @@ -161,7 +214,7 @@ def test_create_insert_drop_table_core(base, db_engine, metadata_obj: MetaData): with db_engine.connect() as conn: conn.execute(insert_stmt) - select_stmt = select(SampleTable) + select_stmt = version_agnostic_select(SampleTable) resp = db_engine.execute(select_stmt) result = resp.fetchall() @@ -187,7 +240,7 @@ class SampleObject(base): name = Column(String(255), primary_key=True) episodes = Column(Integer) - some_bool = Column(BOOLEAN) + some_bool = Column(BOOLEAN(create_constraint=False)) base.metadata.create_all() @@ -197,11 +250,15 @@ class SampleObject(base): session.add(sample_object_2) session.commit() - stmt = select(SampleObject).where( + stmt = version_agnostic_select(SampleObject).where( SampleObject.name.in_(["Bim Adewunmi", "Miki Meek"]) ) - output = [i for i in session.scalars(stmt)] + if sqlalchemy_1_3(): + output = [i for i in session.execute(stmt)] + else: + output = [i for i in session.scalars(stmt)] + assert len(output) == 2 base.metadata.drop_all() @@ -215,7 +272,7 @@ def test_dialect_type_mappings(base, db_engine, metadata_obj: MetaData): metadata_obj, Column("string_example", String(255)), Column("integer_example", Integer), - Column("boolean_example", BOOLEAN), + Column("boolean_example", BOOLEAN(create_constraint=False)), Column("decimal_example", DECIMAL(10, 2)), Column("date_example", Date), ) @@ -239,7 +296,7 @@ def test_dialect_type_mappings(base, db_engine, metadata_obj: MetaData): with db_engine.connect() as conn: conn.execute(insert_stmt) - select_stmt = select(SampleTable) + select_stmt = version_agnostic_select(SampleTable) resp = db_engine.execute(select_stmt) result = resp.fetchall() @@ -252,3 +309,34 @@ def test_dialect_type_mappings(base, db_engine, metadata_obj: MetaData): assert this_row["date_example"] == date_example metadata_obj.drop_all() + + +def test_inspector_smoke_test(samples_engine: Engine): + """It does not appear that 3L namespace is supported here""" + + from sqlalchemy.engine.reflection import Inspector + + schema, table = "nyctaxi", "trips" + + try: + inspector = Inspector.from_engine(samples_engine) + except Exception as e: + assert False, f"Could not build inspector: {e}" + + # Expect six columns + columns = inspector.get_columns(table, schema=schema) + + # Expect zero views, but the method should return + views = inspector.get_view_names(schema=schema) + + assert ( + len(columns) == 6 + ), "Dialect did not find the expected number of columns in samples.nyctaxi.trips" + assert len(views) == 0, "Views could not be fetched" + + +def test_get_table_names_smoke_test(samples_engine: Engine): + + with samples_engine.connect() as conn: + _names = samples_engine.table_names(schema="nyctaxi", connection=conn) + _names is not None, "get_table_names did not succeed" From 7382631a26d2977e1b400873bdde6172b8088786 Mon Sep 17 00:00:00 2001 From: Jesse Date: Tue, 11 Jul 2023 19:16:59 -0500 Subject: [PATCH 19/40] OAuth: don't override auth headers with contents of .netrc file (#122) Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 1 + src/databricks/sql/auth/oauth.py | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e9d2963..961423ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Add support for Cloud Fetch - Fix: Revised SQLAlchemy dialect and examples for compatibility with SQLAlchemy==1.3.x +- Fix: oauth would fail if expired credentials appeared in ~/.netrc ## 2.7.0 (2023-06-26) diff --git a/src/databricks/sql/auth/oauth.py b/src/databricks/sql/auth/oauth.py index a2b9c6ed..78f516fe 100644 --- a/src/databricks/sql/auth/oauth.py +++ b/src/databricks/sql/auth/oauth.py @@ -19,6 +19,22 @@ logger = logging.getLogger(__name__) +class IgnoreNetrcAuth(requests.auth.AuthBase): + """This auth method is a no-op. + + We use it to force requestslib to not use .netrc to write auth headers + when making .post() requests to the oauth token endpoints, since these + don't require authentication. + + In cases where .netrc is outdated or corrupt, these requests will fail. + + See issue #121 + """ + + def __call__(self, r): + return r + + class OAuthManager: def __init__( self, @@ -43,7 +59,7 @@ def __fetch_well_known_config(self, hostname: str): known_config_url = self.idp_endpoint.get_openid_config_url(hostname) try: - response = requests.get(url=known_config_url) + response = requests.get(url=known_config_url, auth=IgnoreNetrcAuth()) except RequestException as e: logger.error( f"Unable to fetch OAuth configuration from {known_config_url}.\n" @@ -149,7 +165,9 @@ def __send_token_request(token_request_url, data): "Accept": "application/json", "Content-Type": "application/x-www-form-urlencoded", } - response = requests.post(url=token_request_url, data=data, headers=headers) + response = requests.post( + url=token_request_url, data=data, headers=headers, auth=IgnoreNetrcAuth() + ) return response.json() def __send_refresh_token_request(self, hostname, refresh_token): From 1965df50c9026cd118ea3c1efac1bbca34165cfc Mon Sep 17 00:00:00 2001 From: Sebastian Eckweiler Date: Wed, 12 Jul 2023 02:51:54 +0200 Subject: [PATCH 20/40] Fix proxy connection pool creation (#158) Signed-off-by: Sebastian Eckweiler Signed-off-by: Jesse Whitehouse Co-authored-by: Sebastian Eckweiler Co-authored-by: Jesse Whitehouse --- CHANGELOG.md | 1 + src/databricks/sql/auth/thrift_http_client.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 961423ad..8be708c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - Add support for Cloud Fetch - Fix: Revised SQLAlchemy dialect and examples for compatibility with SQLAlchemy==1.3.x - Fix: oauth would fail if expired credentials appeared in ~/.netrc +- Fix: Python HTTP proxies were broken after switch to urllib3 ## 2.7.0 (2023-06-26) diff --git a/src/databricks/sql/auth/thrift_http_client.py b/src/databricks/sql/auth/thrift_http_client.py index 89ad66a0..fbae1cc2 100644 --- a/src/databricks/sql/auth/thrift_http_client.py +++ b/src/databricks/sql/auth/thrift_http_client.py @@ -109,7 +109,10 @@ def open(self): headers={"Proxy-Authorization": self.proxy_auth}, ) self.__pool = proxy_manager.connection_from_host( - self.host, self.port, pool_kwargs=_pool_kwargs + host=self.realhost, + port=self.realport, + scheme=self.scheme, + pool_kwargs=_pool_kwargs, ) else: self.__pool = pool_class(self.host, self.port, **_pool_kwargs) From d7f76e4d7a9e91da260373e643a22a1c0aadded3 Mon Sep 17 00:00:00 2001 From: Daniel Segesdi Date: Wed, 12 Jul 2023 21:58:33 +0200 Subject: [PATCH 21/40] Relax pandas dependency constraint to allow ^2.0.0 (#164) Signed-off-by: Daniel Segesdi Signed-off-by: Jesse Whitehouse Co-authored-by: Jesse Whitehouse --- CHANGELOG.md | 9 +++--- poetry.lock | 81 ++++++++++++++++++++++++++++++++++++++++++++++++-- pyproject.toml | 6 +++- 3 files changed, 89 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8be708c9..46b4fecf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,10 +2,11 @@ ## 2.7.x (Unreleased) -- Add support for Cloud Fetch -- Fix: Revised SQLAlchemy dialect and examples for compatibility with SQLAlchemy==1.3.x -- Fix: oauth would fail if expired credentials appeared in ~/.netrc -- Fix: Python HTTP proxies were broken after switch to urllib3 +- Add support for Cloud Fetch (#146, #151, #154) +- Fix: Revised SQLAlchemy dialect and examples for compatibility with SQLAlchemy==1.3.x (#173) +- Fix: oauth would fail if expired credentials appeared in ~/.netrc (#122) +- Fix: Python HTTP proxies were broken after switch to urllib3 (#158) +- Other: Relax pandas dependency constraint to allow ^2.0.0 (#164) ## 2.7.0 (2023-06-26) diff --git a/poetry.lock b/poetry.lock index 8fee85b0..2adf1804 100644 --- a/poetry.lock +++ b/poetry.lock @@ -777,7 +777,6 @@ numpy = [ {version = ">=1.17.3", markers = "(platform_machine != \"aarch64\" and platform_machine != \"arm64\") and python_version < \"3.10\""}, {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""}, {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, ] python-dateutil = ">=2.7.3" pytz = ">=2017.3" @@ -785,6 +784,73 @@ pytz = ">=2017.3" [package.extras] test = ["hypothesis (>=3.58)", "pytest (>=6.0)", "pytest-xdist"] +[[package]] +name = "pandas" +version = "2.0.3" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"}, + {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"}, + {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"}, + {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0"}, + {file = "pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210"}, + {file = "pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e"}, + {file = "pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8"}, + {file = "pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26"}, + {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d"}, + {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df"}, + {file = "pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd"}, + {file = "pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b"}, + {file = "pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061"}, + {file = "pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5"}, + {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089"}, + {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0"}, + {file = "pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02"}, + {file = "pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78"}, + {file = "pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b"}, + {file = "pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e"}, + {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b"}, + {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641"}, + {file = "pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682"}, + {file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"}, + {file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.20.3", markers = "python_version < \"3.10\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.1" + +[package.extras] +all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"] +aws = ["s3fs (>=2021.08.0)"] +clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"] +compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"] +computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2021.07.0)"] +gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"] +hdf5 = ["tables (>=3.6.1)"] +html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"] +mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"] +spss = ["pyreadstat (>=1.1.2)"] +sql-other = ["SQLAlchemy (>=1.4.16)"] +test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.6.3)"] + [[package]] name = "pathspec" version = "0.11.1" @@ -1166,6 +1232,17 @@ files = [ {file = "typing_extensions-4.6.3.tar.gz", hash = "sha256:d91d5919357fe7f681a9f2b5b4cb2a5f1ef0a1e9f59c4d8ff0d3491e05c0ffd5"}, ] +[[package]] +name = "tzdata" +version = "2023.3" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, + {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, +] + [[package]] name = "urllib3" version = "2.0.3" @@ -1285,4 +1362,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = "^3.7.1" -content-hash = "4951f349c21ce8306bcf045928c58afc6e1e63e825768aeed358380de2c46b9b" +content-hash = "063bfda7ab42a302be9e025266582e8532582f522e61505c4a90e25345a5638e" diff --git a/pyproject.toml b/pyproject.toml index 5d48aba5..0c44d0de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,11 @@ include = ["CHANGELOG.md"] [tool.poetry.dependencies] python = "^3.7.1" thrift = "^0.16.0" -pandas = "^1.2.5" +pandas = [ + {version = ">=1.2.5,<1.4.0", python = ">=3.7,<3.8"}, + {version =">=1.2.5,<3.0.0", python = ">=3.8"} +] + pyarrow = [ {version = ">=6.0.0", python = ">=3.7,<3.11"}, {version = ">=10.0.1", python = ">=3.11"} From 207dd7c5f786d30cac5ffde104ba2e7506681d61 Mon Sep 17 00:00:00 2001 From: Jesse Date: Wed, 12 Jul 2023 15:38:49 -0500 Subject: [PATCH 22/40] Use hex string version of operation ID instead of bytes (#170) --------- Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 2 +- src/databricks/sql/client.py | 2 +- src/databricks/sql/thrift_backend.py | 39 ++++++++++++++++++++++++---- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 46b4fecf..ba686648 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ - Fix: Revised SQLAlchemy dialect and examples for compatibility with SQLAlchemy==1.3.x (#173) - Fix: oauth would fail if expired credentials appeared in ~/.netrc (#122) - Fix: Python HTTP proxies were broken after switch to urllib3 (#158) -- Other: Relax pandas dependency constraint to allow ^2.0.0 (#164) +- Other: Connector now logs operation handle guids as hexadecimal instead of bytes (#170) ## 2.7.0 (2023-06-26) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index aa628441..ac782c8d 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -206,7 +206,7 @@ def __del__(self): if self.open: logger.debug( "Closing unclosed connection for session " - "{}".format(self.get_session_id()) + "{}".format(self.get_session_id_hex()) ) try: self._close(close_cursors=False) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index ef225d1f..135b1f44 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -532,7 +532,8 @@ def _check_command_not_in_error_or_closed_state( raise ServerOperationError( get_operations_resp.displayMessage, { - "operation-id": op_handle and op_handle.operationId.guid, + "operation-id": op_handle + and self.guid_to_hex_id(op_handle.operationId.guid), "diagnostic-info": get_operations_resp.diagnosticInfo, }, ) @@ -540,16 +541,20 @@ def _check_command_not_in_error_or_closed_state( raise ServerOperationError( get_operations_resp.errorMessage, { - "operation-id": op_handle and op_handle.operationId.guid, + "operation-id": op_handle + and self.guid_to_hex_id(op_handle.operationId.guid), "diagnostic-info": None, }, ) elif get_operations_resp.operationState == ttypes.TOperationState.CLOSED_STATE: raise DatabaseError( "Command {} unexpectedly closed server side".format( - op_handle and op_handle.operationId.guid + op_handle and self.guid_to_hex_id(op_handle.operationId.guid) ), - {"operation-id": op_handle and op_handle.operationId.guid}, + { + "operation-id": op_handle + and self.guid_to_hex_id(op_handle.operationId.guid) + }, ) def _poll_for_status(self, op_handle): @@ -942,7 +947,11 @@ def close_command(self, op_handle): return resp.status def cancel_command(self, active_op_handle): - logger.debug("Cancelling command {}".format(active_op_handle.operationId.guid)) + logger.debug( + "Cancelling command {}".format( + self.guid_to_hex_id(active_op_handle.operationId.guid) + ) + ) req = ttypes.TCancelOperationReq(active_op_handle) self.make_request(self._client.CancelOperation, req) @@ -954,3 +963,23 @@ def handle_to_id(session_handle): def handle_to_hex_id(session_handle: TCLIService.TSessionHandle): this_uuid = uuid.UUID(bytes=session_handle.sessionId.guid) return str(this_uuid) + + @staticmethod + def guid_to_hex_id(guid: bytes) -> str: + """Return a hexadecimal string instead of bytes + + Example: + IN b'\x01\xee\x1d)\xa4\x19\x1d\xb6\xa9\xc0\x8d\xf1\xfe\xbaB\xdd' + OUT '01ee1d29-a419-1db6-a9c0-8df1feba42dd' + + If conversion to hexadecimal fails, the original bytes are returned + """ + + this_uuid: Union[bytes, uuid.UUID] + + try: + this_uuid = uuid.UUID(bytes=guid) + except Exception as e: + logger.debug(f"Unable to convert bytes to UUID: {bytes} -- {str(e)}") + this_uuid = guid + return str(this_uuid) From 22e5aaaa6d5d14341d2112c2f91ae0ce07f04e9b Mon Sep 17 00:00:00 2001 From: Jesse Date: Wed, 12 Jul 2023 15:44:33 -0500 Subject: [PATCH 23/40] SQLAlchemy: fix has_table so it honours schema= argument (#174) --------- Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 2 + src/databricks/sqlalchemy/dialect/__init__.py | 11 +++-- tests/e2e/sqlalchemy/test_basic.py | 41 +++++++++++++++++++ 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba686648..7930ba81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,10 +3,12 @@ ## 2.7.x (Unreleased) - Add support for Cloud Fetch (#146, #151, #154) +- SQLAlchemy has_table function now honours schema= argument and adds catalog= argument (#174) - Fix: Revised SQLAlchemy dialect and examples for compatibility with SQLAlchemy==1.3.x (#173) - Fix: oauth would fail if expired credentials appeared in ~/.netrc (#122) - Fix: Python HTTP proxies were broken after switch to urllib3 (#158) - Other: Connector now logs operation handle guids as hexadecimal instead of bytes (#170) +- Add support for Cloud Fetch ## 2.7.0 (2023-06-26) diff --git a/src/databricks/sqlalchemy/dialect/__init__.py b/src/databricks/sqlalchemy/dialect/__init__.py index 0f96c2bc..cfb7d857 100644 --- a/src/databricks/sqlalchemy/dialect/__init__.py +++ b/src/databricks/sqlalchemy/dialect/__init__.py @@ -267,17 +267,22 @@ def do_rollback(self, dbapi_connection): # Databricks SQL Does not support transactions pass - def has_table(self, connection, table_name, schema=None, **kwargs) -> bool: + def has_table( + self, connection, table_name, schema=None, catalog=None, **kwargs + ) -> bool: """SQLAlchemy docstrings say dialect providers must implement this method""" - schema = schema or "default" + _schema = schema or self.schema + _catalog = catalog or self.catalog # DBR >12.x uses underscores in error messages DBR_LTE_12_NOT_FOUND_STRING = "Table or view not found" DBR_GT_12_NOT_FOUND_STRING = "TABLE_OR_VIEW_NOT_FOUND" try: - res = connection.execute(f"DESCRIBE TABLE {table_name}") + res = connection.execute( + f"DESCRIBE TABLE {_catalog}.{_schema}.{table_name}" + ) return True except DatabaseError as e: if DBR_GT_12_NOT_FOUND_STRING in str( diff --git a/tests/e2e/sqlalchemy/test_basic.py b/tests/e2e/sqlalchemy/test_basic.py index 89ceb07e..1d3125f2 100644 --- a/tests/e2e/sqlalchemy/test_basic.py +++ b/tests/e2e/sqlalchemy/test_basic.py @@ -340,3 +340,44 @@ def test_get_table_names_smoke_test(samples_engine: Engine): with samples_engine.connect() as conn: _names = samples_engine.table_names(schema="nyctaxi", connection=conn) _names is not None, "get_table_names did not succeed" + + +def test_has_table_across_schemas(db_engine: Engine, samples_engine: Engine): + """For this test to pass these conditions must be met: + - Table samples.nyctaxi.trips must exist + - Table samples.tpch.customer must exist + - The `catalog` and `schema` environment variables must be set and valid + """ + + with samples_engine.connect() as conn: + + # 1) Check for table within schema declared at engine creation time + assert samples_engine.dialect.has_table(connection=conn, table_name="trips") + + # 2) Check for table within another schema in the same catalog + assert samples_engine.dialect.has_table( + connection=conn, table_name="customer", schema="tpch" + ) + + # 3) Check for a table within a different catalog + other_catalog = os.environ.get("catalog") + other_schema = os.environ.get("schema") + + # Create a table in a different catalog + with db_engine.connect() as conn: + conn.execute("CREATE TABLE test_has_table (numbers_are_cool INT);") + + try: + # Verify that this table is not found in the samples catalog + assert not samples_engine.dialect.has_table( + connection=conn, table_name="test_has_table" + ) + # Verify that this table is found in a separate catalog + assert samples_engine.dialect.has_table( + connection=conn, + table_name="test_has_table", + schema=other_schema, + catalog=other_catalog, + ) + finally: + conn.execute("DROP TABLE test_has_table;") From 1eef43293b745d9796db8118618db1cf98bb0190 Mon Sep 17 00:00:00 2001 From: mattdeekay <11141331+mattdeekay@users.noreply.github.com> Date: Wed, 12 Jul 2023 13:51:37 -0700 Subject: [PATCH 24/40] Fix socket timeout test (#144) Signed-off-by: Jesse Whitehouse Co-authored-by: Jesse Whitehouse --- CHANGELOG.md | 3 ++- tests/e2e/test_driver.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7930ba81..53236afc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,9 @@ - Fix: Revised SQLAlchemy dialect and examples for compatibility with SQLAlchemy==1.3.x (#173) - Fix: oauth would fail if expired credentials appeared in ~/.netrc (#122) - Fix: Python HTTP proxies were broken after switch to urllib3 (#158) +- Other: Relax pandas dependency constraint to allow ^2.0.0 (#164) - Other: Connector now logs operation handle guids as hexadecimal instead of bytes (#170) -- Add support for Cloud Fetch +- Other: test_socket_timeout_user_defined e2e test was broken (#144) ## 2.7.0 (2023-06-26) diff --git a/tests/e2e/test_driver.py b/tests/e2e/test_driver.py index c8713bf0..d6e7e1ed 100644 --- a/tests/e2e/test_driver.py +++ b/tests/e2e/test_driver.py @@ -16,6 +16,7 @@ import pytz import thrift import pytest +from urllib3.connectionpool import ReadTimeoutError import databricks.sql as sql from databricks.sql import STRING, BINARY, NUMBER, DATETIME, DATE, DatabaseError, Error, OperationalError, RequestError @@ -509,12 +510,11 @@ def test_socket_timeout(self): def test_socket_timeout_user_defined(self): # We expect to see a TimeoutError when the socket timeout is only # 1 sec for a query that takes longer than that to process - with self.assertRaises(RequestError) as cm: + with self.assertRaises(ReadTimeoutError) as cm: with self.cursor({"_socket_timeout": 1}) as cursor: - query = "select * from range(10000000)" + query = "select * from range(1000000000)" cursor.execute(query) - self.assertIsInstance(cm.exception.args[1], TimeoutError) def test_ssp_passthrough(self): for enable_ansi in (True, False): From ec581447ab3f53687bfcf82084dc7e4e6cc0d4bf Mon Sep 17 00:00:00 2001 From: Bogdan Date: Wed, 12 Jul 2023 14:38:02 -0700 Subject: [PATCH 25/40] Disable non_native_boolean_check_constraint (#120) --------- Signed-off-by: Bogdan Kyryliuk Signed-off-by: Jesse Whitehouse Co-authored-by: Jesse Whitehouse --- CHANGELOG.md | 1 + examples/sqlalchemy.py | 2 +- src/databricks/sqlalchemy/dialect/__init__.py | 1 + tests/e2e/sqlalchemy/test_basic.py | 8 ++++---- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 53236afc..991a24ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Add support for Cloud Fetch (#146, #151, #154) - SQLAlchemy has_table function now honours schema= argument and adds catalog= argument (#174) +- SQLAlchemy set non_native_boolean_check_constraint False as it's not supported by Databricks (#120) - Fix: Revised SQLAlchemy dialect and examples for compatibility with SQLAlchemy==1.3.x (#173) - Fix: oauth would fail if expired credentials appeared in ~/.netrc (#122) - Fix: Python HTTP proxies were broken after switch to urllib3 (#158) diff --git a/examples/sqlalchemy.py b/examples/sqlalchemy.py index 35160658..efb8f0bf 100644 --- a/examples/sqlalchemy.py +++ b/examples/sqlalchemy.py @@ -89,7 +89,7 @@ class SampleObject(base): name = Column(String(255), primary_key=True) episodes = Column(Integer) - some_bool = Column(BOOLEAN(create_constraint=False)) + some_bool = Column(BOOLEAN) base.metadata.create_all() diff --git a/src/databricks/sqlalchemy/dialect/__init__.py b/src/databricks/sqlalchemy/dialect/__init__.py index cfb7d857..0dde4000 100644 --- a/src/databricks/sqlalchemy/dialect/__init__.py +++ b/src/databricks/sqlalchemy/dialect/__init__.py @@ -81,6 +81,7 @@ class DatabricksDialect(default.DefaultDialect): supports_multivalues_insert: bool = True supports_native_decimal: bool = True supports_sane_rowcount: bool = False + non_native_boolean_check_constraint: bool = False @classmethod def dbapi(cls): diff --git a/tests/e2e/sqlalchemy/test_basic.py b/tests/e2e/sqlalchemy/test_basic.py index 1d3125f2..f17828eb 100644 --- a/tests/e2e/sqlalchemy/test_basic.py +++ b/tests/e2e/sqlalchemy/test_basic.py @@ -148,7 +148,7 @@ def test_create_table_not_null(db_engine, metadata_obj: MetaData): metadata_obj, Column("name", String(255)), Column("episodes", Integer), - Column("some_bool", BOOLEAN(create_constraint=False), nullable=False), + Column("some_bool", BOOLEAN, nullable=False), ) metadata_obj.create_all() @@ -201,7 +201,7 @@ def test_create_insert_drop_table_core(base, db_engine, metadata_obj: MetaData): metadata_obj, Column("name", String(255)), Column("episodes", Integer), - Column("some_bool", BOOLEAN(create_constraint=False)), + Column("some_bool", BOOLEAN), Column("dollars", DECIMAL(10, 2)), ) @@ -240,7 +240,7 @@ class SampleObject(base): name = Column(String(255), primary_key=True) episodes = Column(Integer) - some_bool = Column(BOOLEAN(create_constraint=False)) + some_bool = Column(BOOLEAN) base.metadata.create_all() @@ -272,7 +272,7 @@ def test_dialect_type_mappings(base, db_engine, metadata_obj: MetaData): metadata_obj, Column("string_example", String(255)), Column("integer_example", Integer), - Column("boolean_example", BOOLEAN(create_constraint=False)), + Column("boolean_example", BOOLEAN), Column("decimal_example", DECIMAL(10, 2)), Column("date_example", Date), ) From 728d33aa5fd8eed1206222e9d343331ecd1aaf15 Mon Sep 17 00:00:00 2001 From: William Gentry Date: Wed, 12 Jul 2023 17:58:29 -0400 Subject: [PATCH 26/40] Remove unused import for SQLAlchemy 2 compatibility (#128) Signed-off-by: William Gentry Signed-off-by: Jesse Whitehouse Co-authored-by: Jesse Whitehouse --- CHANGELOG.md | 1 + src/databricks/sqlalchemy/dialect/__init__.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 991a24ce..979ed34b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - Fix: Revised SQLAlchemy dialect and examples for compatibility with SQLAlchemy==1.3.x (#173) - Fix: oauth would fail if expired credentials appeared in ~/.netrc (#122) - Fix: Python HTTP proxies were broken after switch to urllib3 (#158) +- Other: remove unused import in SQLAlchemy dialect - Other: Relax pandas dependency constraint to allow ^2.0.0 (#164) - Other: Connector now logs operation handle guids as hexadecimal instead of bytes (#170) - Other: test_socket_timeout_user_defined e2e test was broken (#144) diff --git a/src/databricks/sqlalchemy/dialect/__init__.py b/src/databricks/sqlalchemy/dialect/__init__.py index 0dde4000..f847531c 100644 --- a/src/databricks/sqlalchemy/dialect/__init__.py +++ b/src/databricks/sqlalchemy/dialect/__init__.py @@ -5,7 +5,7 @@ from dateutil.parser import parse import sqlalchemy -from sqlalchemy import types, processors, event +from sqlalchemy import types, event from sqlalchemy.engine import default, Engine from sqlalchemy.exc import DatabaseError, SQLAlchemyError from sqlalchemy.engine import reflection From 6a1d3b5ea5f3167a137f7bcb85666d4536982f71 Mon Sep 17 00:00:00 2001 From: Jesse Date: Fri, 21 Jul 2023 08:42:26 -0500 Subject: [PATCH 27/40] Bump version to 2.8.0 (#178) Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 4 +++- pyproject.toml | 2 +- src/databricks/sql/__init__.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 979ed34b..4f5b515d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Release History -## 2.7.x (Unreleased) +## 2.8.x (Unreleased) + +## 2.8.0 (2023-07-21) - Add support for Cloud Fetch (#146, #151, #154) - SQLAlchemy has_table function now honours schema= argument and adds catalog= argument (#174) diff --git a/pyproject.toml b/pyproject.toml index 0c44d0de..8d89ba2d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databricks-sql-connector" -version = "2.7.0" +version = "2.8.0" description = "Databricks SQL Connector for Python" authors = ["Databricks "] license = "Apache-2.0" diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py index b72d9421..f9a49281 100644 --- a/src/databricks/sql/__init__.py +++ b/src/databricks/sql/__init__.py @@ -28,7 +28,7 @@ def __repr__(self): DATE = DBAPITypeObject("date") ROWID = DBAPITypeObject() -__version__ = "2.7.0" +__version__ = "2.8.0" USER_AGENT_NAME = "PyDatabricksSqlConnector" # These two functions are pyhive legacy From b894605ba7fe2525f1cac830a330149b09b1d8c1 Mon Sep 17 00:00:00 2001 From: dbarrundia-tiger <125994996+dbarrundia-tiger@users.noreply.github.com> Date: Wed, 9 Aug 2023 12:05:59 -0400 Subject: [PATCH 28/40] Fix typo in python README quick start example (#186) --------- Co-authored-by: Jesse --- CHANGELOG.md | 2 ++ README.md | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f5b515d..87abcf34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## 2.8.x (Unreleased) +- Other: Fix typo in README quick start example + ## 2.8.0 (2023-07-21) - Add support for Cloud Fetch (#146, #151, #154) diff --git a/README.md b/README.md index 60c9081c..9913e3d6 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ from databricks import sql host = os.getenv("DATABRICKS_HOST") http_path = os.getenv("DATABRICKS_HTTP_PATH") -access_token = os.getenv("DATABRICKS_ACCESS_TOKEN") +access_token = os.getenv("DATABRICKS_TOKEN") connection = sql.connect( server_hostname=host, From 00a3928d9b888e704d2c540a9b0283c7d92a3759 Mon Sep 17 00:00:00 2001 From: Jesse Date: Wed, 9 Aug 2023 17:02:34 -0400 Subject: [PATCH 29/40] Configure autospec for mocked Client objects (#188) Resolves #187 Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 1 + src/databricks/sql/thrift_backend.py | 5 +-- tests/unit/test_thrift_backend.py | 56 ++++++++++++++-------------- 3 files changed, 30 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 87abcf34..0eaa9eba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## 2.8.x (Unreleased) - Other: Fix typo in README quick start example +- Other: Add autospec to Client mocks and tidy up `make_request` ## 2.8.0 (2023-07-21) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 135b1f44..02b38d95 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -335,12 +335,9 @@ def attempt_request(attempt): error, error_message, retry_delay = None, None, None try: - # The MagicMocks in our unit tests have a `name` property instead of `__name__`. logger.debug( "Sending request: {}()".format( - getattr( - method, "__name__", getattr(method, "name", "UnknownMethod") - ) + getattr(method, "__name__") ) ) unsafe_logger.debug("Sending request: {}".format(request)) diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index 0a18c39a..3668213c 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -96,7 +96,7 @@ def test_hive_schema_to_arrow_schema_preserves_column_names(self): self.assertEqual(arrow_schema.field(2).name, "column 2") self.assertEqual(arrow_schema.field(3).name, "") - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_bad_protocol_versions_are_rejected(self, tcli_service_client_cass): t_http_client_instance = tcli_service_client_cass.return_value bad_protocol_versions = [ @@ -123,7 +123,7 @@ def test_bad_protocol_versions_are_rejected(self, tcli_service_client_cass): self.assertIn("expected server to use a protocol version", str(cm.exception)) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_okay_protocol_versions_succeed(self, tcli_service_client_cass): t_http_client_instance = tcli_service_client_cass.return_value good_protocol_versions = [ @@ -351,7 +351,7 @@ def test_handle_execute_response_sets_compression_in_direct_results(self, build_ execute_response = thrift_backend._handle_execute_response(t_execute_resp, Mock()) self.assertEqual(execute_response.lz4_compressed, lz4Compressed) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_handle_execute_response_checks_operation_state_in_polls(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value @@ -380,7 +380,7 @@ def test_handle_execute_response_checks_operation_state_in_polls(self, tcli_serv if op_state_resp.errorMessage: self.assertIn(op_state_resp.errorMessage, str(cm.exception)) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_get_status_uses_display_message_if_available(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value @@ -405,7 +405,7 @@ def test_get_status_uses_display_message_if_available(self, tcli_service_class): self.assertEqual(display_message, str(cm.exception)) self.assertIn(diagnostic_info, str(cm.exception.message_with_context())) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_direct_results_uses_display_message_if_available(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value @@ -477,7 +477,7 @@ def test_handle_execute_response_checks_direct_results_for_error_statuses(self): thrift_backend._handle_execute_response(error_resp, Mock()) self.assertIn("this is a bad error", str(cm.exception)) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_handle_execute_response_can_handle_without_direct_results(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value @@ -542,7 +542,7 @@ def test_handle_execute_response_can_handle_with_direct_results(self): ttypes.TOperationState.FINISHED_STATE, ) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_use_arrow_schema_if_available(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value arrow_schema_mock = MagicMock(name="Arrow schema mock") @@ -566,7 +566,7 @@ def test_use_arrow_schema_if_available(self, tcli_service_class): self.assertEqual(execute_response.arrow_schema_bytes, arrow_schema_mock) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_fall_back_to_hive_schema_if_no_arrow_schema(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value hive_schema_mock = MagicMock(name="Hive schema mock") @@ -591,7 +591,7 @@ def test_fall_back_to_hive_schema_if_no_arrow_schema(self, tcli_service_class): thrift_backend._hive_schema_to_arrow_schema.call_args[0][0]) @patch("databricks.sql.utils.ResultSetQueueFactory.build_queue", return_value=Mock()) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_handle_execute_response_reads_has_more_rows_in_direct_results( self, tcli_service_class, build_queue): for has_more_rows, resp_type in itertools.product([True, False], @@ -625,7 +625,7 @@ def test_handle_execute_response_reads_has_more_rows_in_direct_results( self.assertEqual(has_more_rows, execute_response.has_more_rows) @patch("databricks.sql.utils.ResultSetQueueFactory.build_queue", return_value=Mock()) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_handle_execute_response_reads_has_more_rows_in_result_response( self, tcli_service_class, build_queue): for has_more_rows, resp_type in itertools.product([True, False], @@ -671,7 +671,7 @@ def test_handle_execute_response_reads_has_more_rows_in_result_response( self.assertEqual(has_more_rows, has_more_rows_resp) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_arrow_batches_row_count_are_respected(self, tcli_service_class): # make some semi-real arrow batches and check the number of rows is correct in the queue tcli_service_instance = tcli_service_class.return_value @@ -709,7 +709,7 @@ def test_arrow_batches_row_count_are_respected(self, tcli_service_class): self.assertEqual(arrow_queue.n_valid_rows, 15 * 10) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_execute_statement_calls_client_and_handle_execute_response(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value response = Mock() @@ -727,7 +727,7 @@ def test_execute_statement_calls_client_and_handle_execute_response(self, tcli_s # Check response handling thrift_backend._handle_execute_response.assert_called_with(response, cursor_mock) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_get_catalogs_calls_client_and_handle_execute_response(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value response = Mock() @@ -744,7 +744,7 @@ def test_get_catalogs_calls_client_and_handle_execute_response(self, tcli_servic # Check response handling thrift_backend._handle_execute_response.assert_called_with(response, cursor_mock) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_get_schemas_calls_client_and_handle_execute_response(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value response = Mock() @@ -769,7 +769,7 @@ def test_get_schemas_calls_client_and_handle_execute_response(self, tcli_service # Check response handling thrift_backend._handle_execute_response.assert_called_with(response, cursor_mock) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_get_tables_calls_client_and_handle_execute_response(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value response = Mock() @@ -798,7 +798,7 @@ def test_get_tables_calls_client_and_handle_execute_response(self, tcli_service_ # Check response handling thrift_backend._handle_execute_response.assert_called_with(response, cursor_mock) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_get_columns_calls_client_and_handle_execute_response(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value response = Mock() @@ -827,7 +827,7 @@ def test_get_columns_calls_client_and_handle_execute_response(self, tcli_service # Check response handling thrift_backend._handle_execute_response.assert_called_with(response, cursor_mock) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_open_session_user_provided_session_id_optional(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value tcli_service_instance.OpenSession.return_value = self.open_session_resp @@ -836,7 +836,7 @@ def test_open_session_user_provided_session_id_optional(self, tcli_service_class thrift_backend.open_session({}, None, None) self.assertEqual(len(tcli_service_instance.OpenSession.call_args_list), 1) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_op_handle_respected_in_close_command(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) @@ -844,7 +844,7 @@ def test_op_handle_respected_in_close_command(self, tcli_service_class): self.assertEqual(tcli_service_instance.CloseOperation.call_args[0][0].operationHandle, self.operation_handle) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_session_handle_respected_in_close_session(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) @@ -852,7 +852,7 @@ def test_session_handle_respected_in_close_session(self, tcli_service_class): self.assertEqual(tcli_service_instance.CloseSession.call_args[0][0].sessionHandle, self.session_handle) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_non_arrow_non_column_based_set_triggers_exception(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value results_mock = Mock() @@ -1021,7 +1021,7 @@ def test_convert_column_based_set_to_arrow_table_uses_types_from_col_set(self): self.assertEqual(arrow_table.column(2).to_pylist(), [1.15, 2.2, 3.3]) self.assertEqual(arrow_table.column(3).to_pylist(), [b'\x11', b'\x22', b'\x33']) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_cancel_command_uses_active_op_handle(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value @@ -1318,7 +1318,7 @@ def test_retry_args_bounding(self, mock_http_client): for (arg, val) in retry_delay_expected_vals.items(): self.assertEqual(getattr(backend, arg), val) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_configuration_passthrough(self, tcli_client_class): tcli_service_instance = tcli_client_class.return_value tcli_service_instance.OpenSession.return_value = self.open_session_resp @@ -1336,7 +1336,7 @@ def test_configuration_passthrough(self, tcli_client_class): open_session_req = tcli_client_class.return_value.OpenSession.call_args[0][0] self.assertEqual(open_session_req.configuration, expected_config) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_cant_set_timestamp_as_string_to_true(self, tcli_client_class): tcli_service_instance = tcli_client_class.return_value tcli_service_instance.OpenSession.return_value = self.open_session_resp @@ -1355,7 +1355,7 @@ def _construct_open_session_with_namespace(self, can_use_multiple_cats, cat, sch canUseMultipleCatalogs=can_use_multiple_cats, initialNamespace=ttypes.TNamespace(catalogName=cat, schemaName=schem)) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_initial_namespace_passthrough_to_open_session(self, tcli_client_class): tcli_service_instance = tcli_client_class.return_value @@ -1373,7 +1373,7 @@ def test_initial_namespace_passthrough_to_open_session(self, tcli_client_class): self.assertEqual(open_session_req.initialNamespace.catalogName, cat) self.assertEqual(open_session_req.initialNamespace.schemaName, schem) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_can_use_multiple_catalogs_is_set_in_open_session_req(self, tcli_client_class): tcli_service_instance = tcli_client_class.return_value tcli_service_instance.OpenSession.return_value = self.open_session_resp @@ -1384,7 +1384,7 @@ def test_can_use_multiple_catalogs_is_set_in_open_session_req(self, tcli_client_ open_session_req = tcli_client_class.return_value.OpenSession.call_args[0][0] self.assertTrue(open_session_req.canUseMultipleCatalogs) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_can_use_multiple_catalogs_is_false_fails_with_initial_catalog(self, tcli_client_class): tcli_service_instance = tcli_client_class.return_value @@ -1410,7 +1410,7 @@ def test_can_use_multiple_catalogs_is_false_fails_with_initial_catalog(self, tcl self._construct_open_session_with_namespace(False, cat, schem) backend.open_session({}, cat, schem) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) def test_protocol_v3_fails_if_initial_namespace_set(self, tcli_client_class): tcli_service_instance = tcli_client_class.return_value @@ -1430,7 +1430,7 @@ def test_protocol_v3_fails_if_initial_namespace_set(self, tcli_client_class): self.assertIn("Setting initial namespace not supported by the DBR version", str(cm.exception)) - @patch("databricks.sql.thrift_backend.TCLIService.Client") + @patch("databricks.sql.thrift_backend.TCLIService.Client", autospec=True) @patch("databricks.sql.thrift_backend.ThriftBackend._handle_execute_response") def test_execute_command_sets_complex_type_fields_correctly(self, mock_handle_execute_response, tcli_service_class): From 019acd83c4f99ac5dde437b77a0b0b0a5b94f860 Mon Sep 17 00:00:00 2001 From: Jesse Date: Wed, 9 Aug 2023 17:41:43 -0400 Subject: [PATCH 30/40] Use urllib3 for retries (#182) Behaviour is gated behind `enable_v3_retries` config. This will be removed and become the default behaviour in a subsequent release. Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 1 + src/databricks/sql/auth/retry.py | 410 ++++++++++++++++++ src/databricks/sql/auth/thrift_http_client.py | 24 + src/databricks/sql/client.py | 12 +- src/databricks/sql/exc.py | 22 + src/databricks/sql/thrift_backend.py | 57 ++- src/databricks/sqlalchemy/dialect/__init__.py | 2 +- tests/e2e/common/retry_test_mixins.py | 287 +++++++++++- tests/e2e/test_driver.py | 3 +- 9 files changed, 807 insertions(+), 11 deletions(-) create mode 100644 src/databricks/sql/auth/retry.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 0eaa9eba..0c76f11d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## 2.8.x (Unreleased) +- Replace retry handling with DatabricksRetryPolicy. This is disabled by default. To enable, set `enable_v3_retries=True` when creating `databricks.sql.client` - Other: Fix typo in README quick start example - Other: Add autospec to Client mocks and tidy up `make_request` diff --git a/src/databricks/sql/auth/retry.py b/src/databricks/sql/auth/retry.py new file mode 100644 index 00000000..182c7137 --- /dev/null +++ b/src/databricks/sql/auth/retry.py @@ -0,0 +1,410 @@ +import logging +import time +import typing +from enum import Enum +from typing import List, Optional, Tuple, Union + +from urllib3 import BaseHTTPResponse # type: ignore +from urllib3 import Retry +from urllib3.util.retry import RequestHistory + +from databricks.sql.exc import ( + CursorAlreadyClosedError, + MaxRetryDurationError, + NonRecoverableNetworkError, + OperationalError, + SessionAlreadyClosedError, + UnsafeToRetryError, +) + +logger = logging.getLogger(__name__) + + +class CommandType(Enum): + EXECUTE_STATEMENT = "ExecuteStatement" + CLOSE_SESSION = "CloseSession" + CLOSE_OPERATION = "CloseOperation" + OTHER = "Other" + + @classmethod + def get(cls, value: str): + value_name_map = {i.value: i.name for i in cls} + valid_command = value_name_map.get(value, False) + if valid_command: + return getattr(cls, str(valid_command)) + else: + return cls.OTHER + + +class DatabricksRetryPolicy(Retry): + """ + Implements our v3 retry policy by extending urllib3's robust default retry behaviour. + + Retry logic varies based on the overall wall-clock request time and Thrift CommandType + being issued. ThriftBackend starts a timer and sets the current CommandType prior to + initiating a network request. See `self.should_retry()` for details about what we do + and do not retry. + + :param delay_min: + Float of seconds for the minimum delay between retries. This is an alias for urllib3's + `backoff_factor`. + + :param delay_max: + Float of seconds for the maximum delay between retries. This is an alias for urllib3's + `backoff_max` + + :param stop_after_attempts_count: + Integer maximum number of attempts that will be retried. This is an alias for urllib3's + `total`. + + :param stop_after_attempts_duration: + Float of maximum number of seconds within which a request may be retried starting from + the beginning of the first request. + + :param delay_default: + Float of seconds the connector will wait between sucessive GetOperationStatus + requests. This parameter is not used to retry failed network requests. We include + it in this class to keep all retry behaviour encapsulated in this file. + + :param force_dangerous_codes: + List of integer HTTP status codes that the connector will retry, even for dangerous + commands like ExecuteStatement. This is passed to urllib3 by extending its status_forcelist + + :param urllib3_kwargs: + Dictionary of arguments that are passed to Retry.__init__. Any setting of Retry() that + Databricks does not override or extend may be modified here. + """ + + def __init__( + self, + delay_min: float, + delay_max: float, + stop_after_attempts_count: int, + stop_after_attempts_duration: float, + delay_default: float, + force_dangerous_codes: List[int], + urllib3_kwargs: dict = {}, + ): + # These values do not change from one command to the next + self.delay_max = delay_max + self.delay_min = delay_min + self.stop_after_attempts_count = stop_after_attempts_count + self.stop_after_attempts_duration = stop_after_attempts_duration + self._delay_default = delay_default + self.force_dangerous_codes = force_dangerous_codes + + # the urllib3 kwargs are a mix of configuration (some of which we override) + # and counters like `total` or `connect` which may change between successive retries + # we only care about urllib3 kwargs that we alias, override, or add to in some way + + # the length of _history increases as retries are performed + _history: Optional[Tuple[RequestHistory, ...]] = urllib3_kwargs.get("history") + + if not _history: + # no attempts were made so we can retry the current command as many times as specified + # by the user + _attempts_remaining = self.stop_after_attempts_count + else: + # at least one of our attempts has been consumed, and urllib3 will have set a total + # `total` is a counter that begins equal to self.stop_after_attempts_count and is + # decremented after each unsuccessful request. When `total` is zero, urllib3 raises a + # MaxRetryError + _total: int = urllib3_kwargs.pop("total") + _attempts_remaining = _total + + _urllib_kwargs_we_care_about = dict( + total=_attempts_remaining, + respect_retry_after_header=True, + backoff_factor=self.delay_min, + backoff_max=self.delay_max, + allowed_methods=["POST"], + status_forcelist=[429, 503, *self.force_dangerous_codes], + ) + + urllib3_kwargs.update(**_urllib_kwargs_we_care_about) + + super().__init__( + **urllib3_kwargs, # type: ignore + ) + + @classmethod + def __private_init__( + cls, retry_start_time: float, command_type: Optional[CommandType], **init_kwargs + ): + """ + Returns a new instance of DatabricksRetryPolicy with the _retry_start_time and _command_type + properties already set. This method should only be called by DatabricksRetryPolicy itself between + successive Retry attempts. + + :param retry_start_time: + Float unix timestamp. Used to monitor the overall request duration across successive + retries. Never set this value directly. Use self.start_retry_timer() instead. Users + never set this value. It is set by ThriftBackend immediately before issuing a network + request. + + :param command_type: + CommandType of the current request being retried. Used to modify retry behaviour based + on the type of Thrift command being issued. See self.should_retry() for details. Users + never set this value directly. It is set by ThriftBackend immediately before issuing + a network request. + + :param init_kwargs: + A dictionary of parameters that will be passed to __init__ in the new object + """ + + new_object = cls(**init_kwargs) + new_object._retry_start_time = retry_start_time + new_object.command_type = command_type + return new_object + + def new(self, **urllib3_incremented_counters: typing.Any) -> Retry: + """This method is responsible for passing the entire Retry state to its next iteration. + + urllib3 calls Retry.new() between successive requests as part of its `.increment()` method + as shown below: + + ```python + new_retry = self.new( + total=total, + connect=connect, + read=read, + redirect=redirect, + status=status_count, + other=other, + history=history, + ) + ``` + + The arguments it passes to `.new()` (total, connect, read, etc.) are those modified by `.increment()`. + + Since self.__init__ has a different signature than Retry.__init__ , we implement our own `self.new()` + to pipe our Databricks-specific state while preserving the super-class's behaviour. + + """ + + # These arguments will match the function signature for self.__init__ + databricks_init_params = dict( + delay_min=self.delay_min, + delay_max=self.delay_max, + stop_after_attempts_count=self.stop_after_attempts_count, + stop_after_attempts_duration=self.stop_after_attempts_duration, + delay_default=self.delay_default, + force_dangerous_codes=self.force_dangerous_codes, + urllib3_kwargs={}, + ) + + # Gather urllib3's current retry state _before_ increment was called + # These arguments match the function signature for Retry.__init__ + # Note: if we update urllib3 we may need to add/remove arguments from this dict + urllib3_init_params = dict( + total=self.total, + connect=self.connect, + read=self.read, + redirect=self.redirect, + status=self.status, + other=self.other, + allowed_methods=self.allowed_methods, + status_forcelist=self.status_forcelist, + backoff_factor=self.backoff_factor, # type: ignore + backoff_max=self.backoff_max, # type: ignore + raise_on_redirect=self.raise_on_redirect, + raise_on_status=self.raise_on_status, + history=self.history, + remove_headers_on_redirect=self.remove_headers_on_redirect, + respect_retry_after_header=self.respect_retry_after_header, + backoff_jitter=self.backoff_jitter, # type: ignore + ) + + # Update urllib3's current state to reflect the incremented counters + urllib3_init_params.update(**urllib3_incremented_counters) + + # Include urllib3's current state in our __init__ params + databricks_init_params["urllib3_kwargs"].update(**urllib3_init_params) # type: ignore + + return type(self).__private_init__( + retry_start_time=self._retry_start_time, + command_type=self.command_type, + **databricks_init_params, + ) + + @property + def command_type(self) -> Optional[CommandType]: + return self._command_type + + @command_type.setter + def command_type(self, value: CommandType) -> None: + self._command_type = value + + @property + def delay_default(self) -> float: + """Time in seconds the connector will wait between requests polling a GetOperationStatus Request + + This property is never read by urllib3 for the purpose of retries. It's stored in this class + to keep all retry logic in one place. + + This property is only set by __init__ and cannot be modified afterward. + """ + return self._delay_default + + def start_retry_timer(self) -> None: + """Timer is used to monitor the overall time across successive requests + + Should only be called by ThriftBackend before sending a Thrift command""" + self._retry_start_time = time.time() + + def check_timer_duration(self) -> float: + """Return time in seconds since the timer was started""" + + if self._retry_start_time is None: + raise OperationalError( + "Cannot check retry timer. Timer was not started for this request." + ) + else: + return time.time() - self._retry_start_time + + def check_proposed_wait(self, proposed_wait: Union[int, float]) -> None: + """Raise an exception if the proposed wait would exceed the configured max_attempts_duration""" + + proposed_overall_time = self.check_timer_duration() + proposed_wait + if proposed_overall_time > self.stop_after_attempts_duration: + raise MaxRetryDurationError( + f"Retry request would exceed Retry policy max retry duration of {self.stop_after_attempts_duration} seconds" + ) + + def sleep_for_retry(self, response: BaseHTTPResponse) -> bool: # type: ignore + """Sleeps for the duration specified in the response Retry-After header, if present + + A MaxRetryDurationError will be raised if doing so would exceed self.max_attempts_duration + + This method is only called by urllib3 internals. + """ + retry_after = self.get_retry_after(response) + if retry_after: + self.check_proposed_wait(retry_after) + time.sleep(retry_after) + return True + + return False + + def get_backoff_time(self) -> float: + """Calls urllib3's built-in get_backoff_time. + + Never returns a value larger than self.delay_max + A MaxRetryDurationError will be raised if the calculated backoff would exceed self.max_attempts_duration + + Note: within urllib3, a backoff is only calculated in cases where a Retry-After header is not present + in the previous unsuccessful request and `self.respect_retry_after_header` is True (which is always true) + """ + + proposed_backoff = super().get_backoff_time() + proposed_backoff = min(proposed_backoff, self.delay_max) + self.check_proposed_wait(proposed_backoff) + + return proposed_backoff + + def should_retry(self, method: str, status_code: int) -> Tuple[bool, str]: + """This method encapsulates the connector's approach to retries. + + We always retry a request unless one of these conditions is met: + + 1. The request received a 200 (Success) status code + Because the request succeeded . + 2. The request received a 501 (Not Implemented) status code + Because this request can never succeed. + 3. The request received a 404 (Not Found) code and the request CommandType + was CloseSession or CloseOperation. This code indicates that the session + or cursor was already closed. Further retries will always return the same + code. + 4. The request CommandType was ExecuteStatement and the HTTP code does not + appear in the default status_forcelist or force_dangerous_codes list. By + default, this means ExecuteStatement is only retried for codes 429 and 503. + This limit prevents automatically retrying non-idempotent commands that could + be destructive. + + + Q: What about OSErrors and Redirects? + A: urllib3 automatically retries in both scenarios + + Returns True if the request should be retried. Returns False or raises an exception + if a retry would violate the configured policy. + """ + + # Request succeeded. Don't retry. + if status_code == 200: + return False, "200 codes are not retried" + + # Request failed and server said NotImplemented. This isn't recoverable. Don't retry. + if status_code == 501: + raise NonRecoverableNetworkError("Received code 501 from server.") + + # Request failed and this method is not retryable. We only retry POST requests. + if not self._is_method_retryable(method): # type: ignore + return False, "Only POST requests are retried" + + # Request failed with 404 because CloseSession returns 404 if you repeat the request. + if ( + status_code == 404 + and self.command_type == CommandType.CLOSE_SESSION + and len(self.history) > 0 + ): + raise SessionAlreadyClosedError( + "CloseSession received 404 code from Databricks. Session is already closed." + ) + + # Request failed with 404 because CloseOperation returns 404 if you repeat the request. + if ( + status_code == 404 + and self.command_type == CommandType.CLOSE_OPERATION + and len(self.history) > 0 + ): + raise CursorAlreadyClosedError( + "CloseOperation received 404 code from Databricks. Cursor is already closed." + ) + + # Request failed, was an ExecuteStatement and the command may have reached the server + if ( + self.command_type == CommandType.EXECUTE_STATEMENT + and status_code not in self.status_forcelist + and status_code not in self.force_dangerous_codes + ): + raise UnsafeToRetryError( + "ExecuteStatement command can only be retried for codes 429 and 503" + ) + + # Request failed with a dangerous code, was an ExecuteStatement, but user forced retries for this + # dangerous code. Note that these lines _are not required_ to make these requests retry. They would + # retry automatically. This code is included only so that we can log the exact reason for the retry. + # This gives users signal that their _retry_dangerous_codes setting actually did something. + if ( + self.command_type == CommandType.EXECUTE_STATEMENT + and status_code in self.force_dangerous_codes + ): + return ( + True, + f"Request failed with dangerous code {status_code} that is one of the configured _retry_dangerous_codes.", + ) + + # None of the above conditions applied. Eagerly retry. + logger.debug( + f"This request should be retried: {self.command_type and self.command_type.value}" + ) + return ( + True, + "Failed requests are retried by default per configured DatabricksRetryPolicy", + ) + + def is_retry( + self, method: str, status_code: int, has_retry_after: bool = False + ) -> bool: + """ + Called by urllib3 when determining whether or not to retry + + Logs a debug message if the request will be retried + """ + + should_retry, msg = self.should_retry(method, status_code) + + if should_retry: + logger.debug(msg) + + return should_retry diff --git a/src/databricks/sql/auth/thrift_http_client.py b/src/databricks/sql/auth/thrift_http_client.py index fbae1cc2..11589258 100644 --- a/src/databricks/sql/auth/thrift_http_client.py +++ b/src/databricks/sql/auth/thrift_http_client.py @@ -15,6 +15,8 @@ from urllib3 import HTTPConnectionPool, HTTPSConnectionPool, ProxyManager +from databricks.sql.auth.retry import CommandType, DatabricksRetryPolicy + class THttpClient(thrift.transport.THttpClient.THttpClient): def __init__( @@ -28,6 +30,7 @@ def __init__( key_file=None, ssl_context=None, max_connections: int = 1, + retry_policy: Union[DatabricksRetryPolicy, int] = 0, ): if port is not None: warnings.warn( @@ -81,6 +84,10 @@ def __init__( self.max_connections = max_connections + # If retry_policy == 0 then urllib3 will not retry automatically + # this falls back to the pre-v3 behaviour where thrift_backend.py handles retry logic + self.retry_policy = retry_policy + self.__wbuf = BytesIO() self.__resp: Union[None, HTTPResponse] = None self.__timeout = None @@ -92,6 +99,13 @@ def setCustomHeaders(self, headers: Dict[str, str]): self._headers = headers super().setCustomHeaders(headers) + def startRetryTimer(self): + """Notify DatabricksRetryPolicy of the request start time + + This is used to enforce the retry_stop_after_attempts_duration + """ + self.retry_policy and self.retry_policy.start_retry_timer() + def open(self): # self.__pool replaces the self.__http used by the original THttpClient @@ -167,6 +181,7 @@ def flush(self): headers=headers, preload_content=False, timeout=self.__timeout, + retries=self.retry_policy, ) # Get reply to flush the request @@ -188,3 +203,12 @@ def basic_proxy_auth_header(proxy): ) cr = base64.b64encode(ap.encode()).strip() return "Basic " + six.ensure_str(cr) + + def set_retry_command_type(self, value: CommandType): + """Pass the provided CommandType to the retry policy""" + if isinstance(self.retry_policy, DatabricksRetryPolicy): + self.retry_policy.command_type = value + else: + logger.warning( + "DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set." + ) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index ac782c8d..eed13f1c 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -8,7 +8,11 @@ from databricks.sql import __version__ from databricks.sql import * -from databricks.sql.exc import OperationalError +from databricks.sql.exc import ( + OperationalError, + SessionAlreadyClosedError, + CursorAlreadyClosedError, +) from databricks.sql.thrift_backend import ThriftBackend from databricks.sql.utils import ExecuteResponse, ParamEscaper, inject_parameters from databricks.sql.types import Row @@ -257,6 +261,9 @@ def _close(self, close_cursors=True) -> None: try: self.thrift_backend.close_session(self._session_handle) + except RequestError as e: + if isinstance(e.args[1], SessionAlreadyClosedError): + logger.info("Session was closed by a prior request") except DatabaseError as e: if "Invalid SessionHandle" in str(e): logger.warning( @@ -958,6 +965,9 @@ def close(self) -> None: and self.connection.open ): self.thrift_backend.close_command(self.command_id) + except RequestError as e: + if isinstance(e.args[1], CursorAlreadyClosedError): + logger.info("Operation was canceled by a prior request") finally: self.has_been_closed_server_side = True self.op_state = self.thrift_backend.CLOSED_OP_STATE diff --git a/src/databricks/sql/exc.py b/src/databricks/sql/exc.py index bb1e203e..3b27283a 100644 --- a/src/databricks/sql/exc.py +++ b/src/databricks/sql/exc.py @@ -93,3 +93,25 @@ class RequestError(OperationalError): """ pass + + +class MaxRetryDurationError(RequestError): + """Thrown if the next HTTP request retry would exceed the configured + stop_after_attempts_duration + """ + + +class NonRecoverableNetworkError(RequestError): + """Thrown if an HTTP code 501 is received""" + + +class UnsafeToRetryError(RequestError): + """Thrown if ExecuteStatement request receives a code other than 200, 429, or 503""" + + +class SessionAlreadyClosedError(RequestError): + """Thrown if CloseSession receives a code 404. ThriftBackend should gracefully proceed as this is expected.""" + + +class CursorAlreadyClosedError(RequestError): + """Thrown if CancelOperation receives a code 404. ThriftBackend should gracefully proceed as this is expected.""" diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 02b38d95..9f54aadb 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -17,9 +17,11 @@ import urllib3.exceptions import databricks.sql.auth.thrift_http_client +from databricks.sql.auth.thrift_http_client import CommandType from databricks.sql.auth.authenticators import AuthProvider from databricks.sql.thrift_api.TCLIService import TCLIService, ttypes from databricks.sql import * +from databricks.sql.exc import MaxRetryDurationError from databricks.sql.thrift_api.TCLIService.TCLIService import ( Client as TCLIServiceClient, ) @@ -70,6 +72,12 @@ class ThriftBackend: CLOSED_OP_STATE = ttypes.TOperationState.CLOSED_STATE ERROR_OP_STATE = ttypes.TOperationState.ERROR_STATE + _retry_delay_min: float + _retry_delay_max: float + _retry_stop_after_attempts_count: int + _retry_stop_after_attempts_duration: float + _retry_delay_default: float + def __init__( self, server_hostname: str, @@ -113,9 +121,15 @@ def __init__( # # _retry_stop_after_attempts_count # The maximum number of times we should retry retryable requests (defaults to 24) + # _retry_dangerous_codes + # An iterable of integer HTTP status codes. ExecuteStatement commands will be retried if these codes are received. + # (defaults to []) # _socket_timeout # The timeout in seconds for socket send, recv and connect operations. Should be a positive float or integer. # (defaults to 900) + # _enable_v3_retries + # Whether to use the DatabricksRetryPolicy implemented in urllib3 + # (defaults to False) # max_download_threads # Number of threads for handling cloud fetch downloads. Defaults to 10 @@ -166,10 +180,28 @@ def __init__( self._auth_provider = auth_provider + # Connector version 3 retry approach + self.enable_v3_retries = kwargs.get("_enable_v3_retries", False) + self.force_dangerous_codes = kwargs.get("_retry_dangerous_codes", []) + + additional_transport_args = {} + if self.enable_v3_retries: + self.retry_policy = databricks.sql.auth.thrift_http_client.DatabricksRetryPolicy( + delay_min=self._retry_delay_min, + delay_max=self._retry_delay_max, + stop_after_attempts_count=self._retry_stop_after_attempts_count, + stop_after_attempts_duration=self._retry_stop_after_attempts_duration, + delay_default=self._retry_delay_default, + force_dangerous_codes=self.force_dangerous_codes, + ) + + additional_transport_args["retry_policy"] = self.retry_policy + self._transport = databricks.sql.auth.thrift_http_client.THttpClient( auth_provider=self._auth_provider, uri_or_host=uri, ssl_context=ssl_context, + **additional_transport_args, # type: ignore ) timeout = kwargs.get("_socket_timeout", DEFAULT_SOCKET_TIMEOUT) @@ -188,6 +220,7 @@ def __init__( self._request_lock = threading.RLock() + # TODO: Move this bounding logic into DatabricksRetryPolicy for v3 (PECO-918) def _initialize_retry_args(self, kwargs): # Configure retries & timing: use user-settings or defaults, and bound # by policy. Log.warn when given param gets restricted. @@ -335,12 +368,17 @@ def attempt_request(attempt): error, error_message, retry_delay = None, None, None try: - logger.debug( - "Sending request: {}()".format( - getattr(method, "__name__") - ) - ) + + this_method_name = getattr(method, "__name__") + + logger.debug("Sending request: {}()".format(this_method_name)) unsafe_logger.debug("Sending request: {}".format(request)) + + # These three lines are no-ops if the v3 retry policy is not in use + this_command_type = CommandType.get(this_method_name) + self._transport.set_retry_command_type(this_command_type) + self._transport.startRetryTimer() + response = method(request) # Calling `close()` here releases the active HTTP connection back to the pool @@ -356,9 +394,16 @@ def attempt_request(attempt): except urllib3.exceptions.HTTPError as err: # retry on timeout. Happens a lot in Azure and it is safe as data has not been sent to server yet + # TODO: don't use exception handling for GOS polling... + gos_name = TCLIServiceClient.GetOperationStatus.__name__ if method.__name__ == gos_name: - retry_delay = bound_retry_delay(attempt, self._retry_delay_default) + delay_default = ( + self.enable_v3_retries + and self.retry_policy.delay_default + or self._retry_delay_default + ) + retry_delay = bound_retry_delay(attempt, delay_default) logger.info( f"GetOperationStatus failed with HTTP error and will be retried: {str(err)}" ) diff --git a/src/databricks/sqlalchemy/dialect/__init__.py b/src/databricks/sqlalchemy/dialect/__init__.py index f847531c..a1020a74 100644 --- a/src/databricks/sqlalchemy/dialect/__init__.py +++ b/src/databricks/sqlalchemy/dialect/__init__.py @@ -74,7 +74,7 @@ class DatabricksDialect(default.DefaultDialect): driver: str = "databricks-sql-python" default_schema_name: str = "default" - preparer = DatabricksIdentifierPreparer + preparer = DatabricksIdentifierPreparer # type: ignore type_compiler = DatabricksTypeCompiler ddl_compiler = DatabricksDDLCompiler supports_statement_cache: bool = True diff --git a/tests/e2e/common/retry_test_mixins.py b/tests/e2e/common/retry_test_mixins.py index a088ba1e..dfe15998 100644 --- a/tests/e2e/common/retry_test_mixins.py +++ b/tests/e2e/common/retry_test_mixins.py @@ -1,3 +1,20 @@ +from contextlib import contextmanager +from typing import List +from unittest.mock import MagicMock, PropertyMock, patch + +import pytest +from urllib3.exceptions import MaxRetryError + +from databricks.sql.auth.retry import DatabricksRetryPolicy +from databricks.sql.exc import ( + MaxRetryDurationError, + NonRecoverableNetworkError, + RequestError, + SessionAlreadyClosedError, + UnsafeToRetryError, +) + + class Client429ResponseMixin: def test_client_should_retry_automatically_when_getting_429(self): with self.cursor() as cursor: @@ -15,8 +32,10 @@ def test_client_should_not_retry_429_if_RateLimitRetry_is_0(self): rows = cursor.fetchall() self.assertEqual(len(rows), 1) self.assertEqual(rows[0][0], 1) - expected = "Maximum rate of 1 requests per SECOND has been exceeded. " \ - "Please reduce the rate of requests and try again after 1 seconds." + expected = ( + "Maximum rate of 1 requests per SECOND has been exceeded. " + "Please reduce the rate of requests and try again after 1 seconds." + ) exception_str = str(cm.exception) # FIXME (Ali Smesseim, 7-Jul-2020): ODBC driver does not always return the @@ -36,3 +55,267 @@ def _test_retry_disabled_with_message(self, error_msg_substring, exception_type) with self.connection(self.conf_to_disable_temporarily_unavailable_retries): pass self.assertIn(error_msg_substring, str(cm.exception)) + + +@contextmanager +def mocked_server_response(status: int = 200, headers: dict = {"Retry-After": None}): + """Context manager for patching urllib3 responses""" + + # When mocking mocking a BaseHTTPResponse for urllib3 the mock must include + # 1. A status code + # 2. A headers dict + # 3. mock.get_redirect_location() return falsy + mock_response = MagicMock(headers=headers, status=status) + mock_response.get_redirect_location.return_value = False + + with patch("urllib3.connectionpool.HTTPSConnectionPool._get_conn") as getconn_mock: + getconn_mock.return_value.getresponse.return_value = mock_response + try: + yield getconn_mock + finally: + pass + + +@contextmanager +def mock_sequential_server_responses(responses: List[dict]): + """Same as the mocked_server_response context manager but it will yield + the provided responses in the order received + + `responses` should be a list of dictionaries containing these members: + - status: int + - headers: dict + """ + + mock_responses = [] + + # Each resp should have these members: + + for resp in responses: + _mock = MagicMock(headers=resp["headers"], status=resp["status"]) + _mock.get_redirect_location.return_value = False + mock_responses.append(_mock) + + with patch("urllib3.connectionpool.HTTPSConnectionPool._get_conn") as getconn_mock: + getconn_mock.return_value.getresponse.side_effect = mock_responses + try: + yield getconn_mock + finally: + pass + + +class PySQLRetryTestsMixin: + """Home for retry tests where we patch urllib to return different codes and monitor that it tries to retry""" + + # For testing purposes + _retry_policy = { + "_enable_v3_retries": True, + "_retry_delay_min": 0.1, + "_retry_delay_max": 5, + "_retry_stop_after_attempts_count": 5, + "_retry_stop_after_attempts_duration": 10, + "_retry_delay_default": 0.5, + } + + def test_retry_urllib3_settings_are_honored(self): + """Databricks overrides some of urllib3's configuration. This tests confirms that what configuration + we DON'T override is preserved in urllib3's internals + """ + + urllib3_config = {"connect": 10, "read": 11, "redirect": 12} + rp = DatabricksRetryPolicy( + delay_min=0.1, + delay_max=10.0, + stop_after_attempts_count=10, + stop_after_attempts_duration=10.0, + delay_default=1.0, + force_dangerous_codes=[], + urllib3_kwargs=urllib3_config, + ) + + assert rp.connect == 10 + assert rp.read == 11 + assert rp.redirect == 12 + + def test_oserror_retries(self): + """If a network error occurs during make_request, the request is retried according to policy""" + with patch( + "urllib3.connectionpool.HTTPSConnectionPool._validate_conn", + ) as mock_validate_conn: + mock_validate_conn.side_effect = OSError("Some arbitrary network error") + with self.assertRaises(MaxRetryError) as cm: + with self.connection(extra_params=self._retry_policy) as conn: + pass + + assert mock_validate_conn.call_count == 6 + + def test_retry_max_count_not_exceeded(self): + """GIVEN the max_attempts_count is 5 + WHEN the server sends nothing but 429 responses + THEN the connector issues six request (original plus five retries) + before raising an exception + """ + with mocked_server_response(status=404) as mock_obj: + with self.assertRaises(MaxRetryError) as cm: + with self.connection(extra_params=self._retry_policy) as conn: + pass + assert mock_obj.return_value.getresponse.call_count == 6 + + def test_retry_max_duration_not_exceeded(self): + """GIVEN the max attempt duration of 10 seconds + WHEN the server sends a Retry-After header of 60 seconds + THEN the connector raises a MaxRetryDurationError + """ + with mocked_server_response(status=429, headers={"Retry-After": "60"}): + with self.assertRaises(RequestError) as cm: + with self.connection(extra_params=self._retry_policy) as conn: + pass + assert isinstance(cm.exception.args[1], MaxRetryDurationError) + + def test_retry_abort_non_recoverable_error(self): + """GIVEN the server returns a code 501 + WHEN the connector receives this response + THEN nothing is retried and an exception is raised + """ + + # Code 501 is a Not Implemented error + with mocked_server_response(status=501): + with self.assertRaises(RequestError) as cm: + with self.connection(extra_params=self._retry_policy) as conn: + pass + assert isinstance(cm.exception.args[1], NonRecoverableNetworkError) + + def test_retry_abort_unsafe_execute_statement_retry_condition(self): + """GIVEN the server sends a code other than 429 or 503 + WHEN the connector sent an ExecuteStatement command + THEN nothing is retried because it's idempotent + """ + with self.connection(extra_params=self._retry_policy) as conn: + with conn.cursor() as cursor: + # Code 502 is a Bad Gateway, which we commonly see in production under heavy load + with mocked_server_response(status=502): + with self.assertRaises(RequestError) as cm: + cursor.execute("Not a real query") + assert isinstance(cm.exception.args[1], UnsafeToRetryError) + + def test_retry_dangerous_codes(self): + """GIVEN the server sends a dangerous code and the user forced this to be retryable + WHEN the connector sent an ExecuteStatement command + THEN the command is retried + """ + + # These http codes are not retried by default + # For some applications, idempotency is not important so we give users a way to force retries anyway + DANGEROUS_CODES = [502, 504, 400] + + additional_settings = { + "_retry_dangerous_codes": DANGEROUS_CODES, + "_retry_stop_after_attempts_count": 1, + } + + # Prove that these codes are not retried by default + with self.connection(extra_params={**self._retry_policy}) as conn: + with conn.cursor() as cursor: + for dangerous_code in DANGEROUS_CODES: + with mocked_server_response(status=dangerous_code) as mock_obj: + with self.assertRaises(RequestError) as cm: + cursor.execute("Not a real query") + assert isinstance(cm.exception.args[1], UnsafeToRetryError) + + # Prove that these codes are retried if forced by the user + with self.connection( + extra_params={**self._retry_policy, **additional_settings} + ) as conn: + with conn.cursor() as cursor: + for dangerous_code in DANGEROUS_CODES: + with mocked_server_response(status=dangerous_code) as mock_obj: + with pytest.raises(MaxRetryError) as cm: + cursor.execute("Not a real query") + + def test_retry_safe_execute_statement_retry_condition(self): + """GIVEN the server sends either code 429 or 503 + WHEN the connector sent an ExecuteStatement command + THEN the request is retried because these are idempotent + """ + + responses = [ + {"status": 429, "headers": {"Retry-After": "1"}}, + {"status": 503, "headers": {"Retry-After": None}}, + ] + + with self.connection( + extra_params={**self._retry_policy, "_retry_stop_after_attempts_count": 1} + ) as conn: + with conn.cursor() as cursor: + # Code 502 is a Bad Gateway, which we commonly see in production under heavy load + with mock_sequential_server_responses(responses) as mock_obj: + with pytest.raises(MaxRetryError): + cursor.execute("This query never reaches the server") + assert mock_obj.return_value.getresponse.call_count == 2 + + def test_retry_abort_close_session_on_404(self): + """GIVEN the connector sends a CloseSession command + WHEN server sends a 404 (which is normally retried) + THEN nothing is retried because 404 means the session already closed + """ + + # First response is a Bad Gateway -> Result is the command actually goes through + # Second response is a 404 because the session is no longer found + responses = [ + {"status": 502, "headers": {"Retry-After": "1"}}, + {"status": 404, "headers": {"Retry-After": None}}, + ] + + with self.connection(extra_params={**self._retry_policy}) as conn: + with mock_sequential_server_responses(responses): + with self.assertLogs( + "databricks.sql", + level="INFO", + ) as cm: + conn.close() + expected_message_was_found = False + for log in cm.output: + if expected_message_was_found: + break + target = "Session was closed by a prior request" + expected_message_was_found = target in log + self.assertTrue( + expected_message_was_found, "Did not find expected log messages" + ) + + def test_retry_abort_close_operation_on_404(self): + """GIVEN the connector sends a CancelOperation command + WHEN server sends a 404 (which is normally retried) + THEN nothing is retried because 404 means the operation was already canceled + """ + + # First response is a Bad Gateway -> Result is the command actually goes through + # Second response is a 404 because the session is no longer found + responses = [ + {"status": 502, "headers": {"Retry-After": "1"}}, + {"status": 404, "headers": {"Retry-After": None}}, + ] + + with self.connection(extra_params={**self._retry_policy}) as conn: + with conn.cursor() as curs: + with patch( + "databricks.sql.utils.ExecuteResponse.has_been_closed_server_side", + new_callable=PropertyMock, + return_value=False, + ): + # This call guarantees we have an open cursor at the server + curs.execute("SELECT 1") + with mock_sequential_server_responses(responses): + with self.assertLogs( + "databricks.sql", + level="INFO", + ) as cm: + curs.close() + expected_message_was_found = False + for log in cm.output: + if expected_message_was_found: + break + target = "Operation was canceled by a prior request" + expected_message_was_found = target in log + self.assertTrue( + expected_message_was_found, "Did not find expected log messages" + ) diff --git a/tests/e2e/test_driver.py b/tests/e2e/test_driver.py index d6e7e1ed..90bf5c3d 100644 --- a/tests/e2e/test_driver.py +++ b/tests/e2e/test_driver.py @@ -27,6 +27,7 @@ from tests.e2e.common.decimal_tests import DecimalTestsMixin from tests.e2e.common.retry_test_mixins import Client429ResponseMixin, Client503ResponseMixin from tests.e2e.common.staging_ingestion_tests import PySQLStagingIngestionTestSuiteMixin +from tests.e2e.common.retry_test_mixins import PySQLRetryTestsMixin log = logging.getLogger(__name__) @@ -141,7 +142,7 @@ def test_cloud_fetch(self): # Exclude Retry tests because they require specific setups, and LargeQueries too slow for core # tests class PySQLCoreTestSuite(SmokeTestMixin, CoreTestMixin, DecimalTestsMixin, TimestampTestsMixin, - PySQLTestCase, PySQLStagingIngestionTestSuiteMixin): + PySQLTestCase, PySQLStagingIngestionTestSuiteMixin, PySQLRetryTestsMixin): validate_row_value_type = True validate_result = True From af1aae7038dff5197036dbe14eed11231692d3a7 Mon Sep 17 00:00:00 2001 From: Jesse Date: Thu, 10 Aug 2023 11:03:33 -0400 Subject: [PATCH 31/40] Bump version to 2.9.0 (#189) * Add note to changelog about using cloud_fetch Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 6 ++++-- pyproject.toml | 2 +- src/databricks/sql/__init__.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c76f11d..30a060d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Release History -## 2.8.x (Unreleased) +## 2.10.x (Unreleased) + +## 2.9.0 (2023-08-10) - Replace retry handling with DatabricksRetryPolicy. This is disabled by default. To enable, set `enable_v3_retries=True` when creating `databricks.sql.client` - Other: Fix typo in README quick start example @@ -8,7 +10,7 @@ ## 2.8.0 (2023-07-21) -- Add support for Cloud Fetch (#146, #151, #154) +- Add support for Cloud Fetch. Disabled by default. Set `use_cloud_fetch=True` when building `databricks.sql.client` to enable it (#146, #151, #154) - SQLAlchemy has_table function now honours schema= argument and adds catalog= argument (#174) - SQLAlchemy set non_native_boolean_check_constraint False as it's not supported by Databricks (#120) - Fix: Revised SQLAlchemy dialect and examples for compatibility with SQLAlchemy==1.3.x (#173) diff --git a/pyproject.toml b/pyproject.toml index 8d89ba2d..90db58d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databricks-sql-connector" -version = "2.8.0" +version = "2.9.0" description = "Databricks SQL Connector for Python" authors = ["Databricks "] license = "Apache-2.0" diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py index f9a49281..c55d17ca 100644 --- a/src/databricks/sql/__init__.py +++ b/src/databricks/sql/__init__.py @@ -28,7 +28,7 @@ def __repr__(self): DATE = DBAPITypeObject("date") ROWID = DBAPITypeObject() -__version__ = "2.8.0" +__version__ = "2.9.0" USER_AGENT_NAME = "PyDatabricksSqlConnector" # These two functions are pyhive legacy From 0d99fc738d45042ef19ee753723d12b4833e8812 Mon Sep 17 00:00:00 2001 From: Jacobus Herman Date: Thu, 10 Aug 2023 23:46:35 +0200 Subject: [PATCH 32/40] Explicitly add urllib3 dependency (#191) Signed-off-by: Jacobus Herman Co-authored-by: Jesse Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 4 +- poetry.lock | 422 +++++++++++++++++++++++++------------------------ pyproject.toml | 28 ++-- 3 files changed, 233 insertions(+), 221 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30a060d6..195204cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Release History -## 2.10.x (Unreleased) +## 2.9.x (Unreleased) + +- Other: Explicitly pin urllib3 to ^2.0.0 ## 2.9.0 (2023-08-10) diff --git a/poetry.lock b/poetry.lock index 2adf1804..432907c5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2,13 +2,13 @@ [[package]] name = "alembic" -version = "1.11.1" +version = "1.11.2" description = "A database migration tool for SQLAlchemy." optional = false python-versions = ">=3.7" files = [ - {file = "alembic-1.11.1-py3-none-any.whl", hash = "sha256:dc871798a601fab38332e38d6ddb38d5e734f60034baeb8e2db5b642fccd8ab8"}, - {file = "alembic-1.11.1.tar.gz", hash = "sha256:6a810a6b012c88b33458fceb869aef09ac75d6ace5291915ba7fae44de372c01"}, + {file = "alembic-1.11.2-py3-none-any.whl", hash = "sha256:7981ab0c4fad4fe1be0cf183aae17689fe394ff874fd2464adb774396faf0796"}, + {file = "alembic-1.11.2.tar.gz", hash = "sha256:678f662130dc540dac12de0ea73de9f89caea9dbea138f60ef6263149bf84657"}, ] [package.dependencies] @@ -77,108 +77,108 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "certifi" -version = "2023.5.7" +version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2023.5.7-py3-none-any.whl", hash = "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716"}, - {file = "certifi-2023.5.7.tar.gz", hash = "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7"}, + {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, + {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, ] [[package]] name = "charset-normalizer" -version = "3.1.0" +version = "3.2.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7.0" files = [ - {file = "charset-normalizer-3.1.0.tar.gz", hash = "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-win32.whl", hash = "sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-win32.whl", hash = "sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-win32.whl", hash = "sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-win32.whl", hash = "sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-win32.whl", hash = "sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b"}, - {file = "charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d"}, + {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"}, + {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, ] [[package]] name = "click" -version = "8.1.3" +version = "8.1.6" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" files = [ - {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, - {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, + {file = "click-8.1.6-py3-none-any.whl", hash = "sha256:fa244bb30b3b5ee2cae3da8f55c9e5e0c0e86093306301fb418eb9dc40fbded5"}, + {file = "click-8.1.6.tar.gz", hash = "sha256:48ee849951919527a045bfe3bf7baa8a959c423134e1a5b98c05c20ba75a1cbd"}, ] [package.dependencies] @@ -198,13 +198,13 @@ files = [ [[package]] name = "dill" -version = "0.3.6" -description = "serialize all of python" +version = "0.3.7" +description = "serialize all of Python" optional = false python-versions = ">=3.7" files = [ - {file = "dill-0.3.6-py3-none-any.whl", hash = "sha256:a07ffd2351b8c678dfc4a856a3005f8067aea51d6ba6c700796a4d9e280f39f0"}, - {file = "dill-0.3.6.tar.gz", hash = "sha256:e5db55f3687856d8fbdab002ed78544e1c4559a130302693d839dfe8f93f2373"}, + {file = "dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e"}, + {file = "dill-0.3.7.tar.gz", hash = "sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03"}, ] [package.extras] @@ -223,13 +223,13 @@ files = [ [[package]] name = "exceptiongroup" -version = "1.1.1" +version = "1.1.2" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"}, - {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"}, + {file = "exceptiongroup-1.1.2-py3-none-any.whl", hash = "sha256:e346e69d186172ca7cf029c8c1d16235aa0e04035e5750b4b95039e65204328f"}, + {file = "exceptiongroup-1.1.2.tar.gz", hash = "sha256:12c3e887d6485d16943a309616de20ae5582633e0a2eda17f4e10fd61c1e8af5"}, ] [package.extras] @@ -665,36 +665,36 @@ files = [ [[package]] name = "numpy" -version = "1.25.0" +version = "1.25.2" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" files = [ - {file = "numpy-1.25.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8aa130c3042052d656751df5e81f6d61edff3e289b5994edcf77f54118a8d9f4"}, - {file = "numpy-1.25.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e3f2b96e3b63c978bc29daaa3700c028fe3f049ea3031b58aa33fe2a5809d24"}, - {file = "numpy-1.25.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6b267f349a99d3908b56645eebf340cb58f01bd1e773b4eea1a905b3f0e4208"}, - {file = "numpy-1.25.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aedd08f15d3045a4e9c648f1e04daca2ab1044256959f1f95aafeeb3d794c16"}, - {file = "numpy-1.25.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6d183b5c58513f74225c376643234c369468e02947b47942eacbb23c1671f25d"}, - {file = "numpy-1.25.0-cp310-cp310-win32.whl", hash = "sha256:d76a84998c51b8b68b40448ddd02bd1081bb33abcdc28beee6cd284fe11036c6"}, - {file = "numpy-1.25.0-cp310-cp310-win_amd64.whl", hash = "sha256:c0dc071017bc00abb7d7201bac06fa80333c6314477b3d10b52b58fa6a6e38f6"}, - {file = "numpy-1.25.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c69fe5f05eea336b7a740e114dec995e2f927003c30702d896892403df6dbf0"}, - {file = "numpy-1.25.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c7211d7920b97aeca7b3773a6783492b5b93baba39e7c36054f6e749fc7490c"}, - {file = "numpy-1.25.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecc68f11404930e9c7ecfc937aa423e1e50158317bf67ca91736a9864eae0232"}, - {file = "numpy-1.25.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e559c6afbca484072a98a51b6fa466aae785cfe89b69e8b856c3191bc8872a82"}, - {file = "numpy-1.25.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6c284907e37f5e04d2412950960894b143a648dea3f79290757eb878b91acbd1"}, - {file = "numpy-1.25.0-cp311-cp311-win32.whl", hash = "sha256:95367ccd88c07af21b379be1725b5322362bb83679d36691f124a16357390153"}, - {file = "numpy-1.25.0-cp311-cp311-win_amd64.whl", hash = "sha256:b76aa836a952059d70a2788a2d98cb2a533ccd46222558b6970348939e55fc24"}, - {file = "numpy-1.25.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b792164e539d99d93e4e5e09ae10f8cbe5466de7d759fc155e075237e0c274e4"}, - {file = "numpy-1.25.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7cd981ccc0afe49b9883f14761bb57c964df71124dcd155b0cba2b591f0d64b9"}, - {file = "numpy-1.25.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5aa48bebfb41f93043a796128854b84407d4df730d3fb6e5dc36402f5cd594c0"}, - {file = "numpy-1.25.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5177310ac2e63d6603f659fadc1e7bab33dd5a8db4e0596df34214eeab0fee3b"}, - {file = "numpy-1.25.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0ac6edfb35d2a99aaf102b509c8e9319c499ebd4978df4971b94419a116d0790"}, - {file = "numpy-1.25.0-cp39-cp39-win32.whl", hash = "sha256:7412125b4f18aeddca2ecd7219ea2d2708f697943e6f624be41aa5f8a9852cc4"}, - {file = "numpy-1.25.0-cp39-cp39-win_amd64.whl", hash = "sha256:26815c6c8498dc49d81faa76d61078c4f9f0859ce7817919021b9eba72b425e3"}, - {file = "numpy-1.25.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5b1b90860bf7d8a8c313b372d4f27343a54f415b20fb69dd601b7efe1029c91e"}, - {file = "numpy-1.25.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85cdae87d8c136fd4da4dad1e48064d700f63e923d5af6c8c782ac0df8044542"}, - {file = "numpy-1.25.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cc3fda2b36482891db1060f00f881c77f9423eead4c3579629940a3e12095fe8"}, - {file = "numpy-1.25.0.tar.gz", hash = "sha256:f1accae9a28dc3cda46a91de86acf69de0d1b5f4edd44a9b0c3ceb8036dfff19"}, + {file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"}, + {file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"}, + {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"}, + {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"}, + {file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"}, + {file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"}, + {file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"}, + {file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"}, + {file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"}, + {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"}, + {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"}, + {file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"}, + {file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"}, + {file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"}, + {file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"}, + {file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"}, + {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"}, + {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"}, + {file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"}, + {file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"}, + {file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"}, + {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, ] [[package]] @@ -853,32 +853,32 @@ xml = ["lxml (>=4.6.3)"] [[package]] name = "pathspec" -version = "0.11.1" +version = "0.11.2" description = "Utility library for gitignore style pattern matching of file paths." optional = false python-versions = ">=3.7" files = [ - {file = "pathspec-0.11.1-py3-none-any.whl", hash = "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293"}, - {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"}, + {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"}, + {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, ] [[package]] name = "platformdirs" -version = "3.7.0" +version = "3.10.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." optional = false python-versions = ">=3.7" files = [ - {file = "platformdirs-3.7.0-py3-none-any.whl", hash = "sha256:cfd065ba43133ff103ab3bd10aecb095c2a0035fcd1f07217c9376900d94ba07"}, - {file = "platformdirs-3.7.0.tar.gz", hash = "sha256:87fbf6473e87c078d536980ba970a472422e94f17b752cfad17024c18876d481"}, + {file = "platformdirs-3.10.0-py3-none-any.whl", hash = "sha256:d7c24979f292f916dc9cbf8648319032f551ea8c49a4c9bf2fb556a02070ec1d"}, + {file = "platformdirs-3.10.0.tar.gz", hash = "sha256:b45696dab2d7cc691a3226759c0d3b00c47c8b6e293d96f6436f733303f77f6d"}, ] [package.dependencies] -typing-extensions = {version = ">=4.6.3", markers = "python_version < \"3.8\""} +typing-extensions = {version = ">=4.7.1", markers = "python_version < \"3.8\""} [package.extras] -docs = ["furo (>=2023.5.20)", "proselint (>=0.13)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)"] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] [[package]] name = "pluggy" @@ -961,13 +961,13 @@ testutil = ["gitpython (>3)"] [[package]] name = "pytest" -version = "7.3.2" +version = "7.4.0" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" files = [ - {file = "pytest-7.3.2-py3-none-any.whl", hash = "sha256:cdcbd012c9312258922f8cd3f1b62a6580fdced17db6014896053d47cddf9295"}, - {file = "pytest-7.3.2.tar.gz", hash = "sha256:ee990a3cc55ba808b80795a79944756f315c67c12b56abd3ac993a7b8c17030b"}, + {file = "pytest-7.4.0-py3-none-any.whl", hash = "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32"}, + {file = "pytest-7.4.0.tar.gz", hash = "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a"}, ] [package.dependencies] @@ -1086,52 +1086,49 @@ files = [ [[package]] name = "sqlalchemy" -version = "1.4.48" +version = "1.4.49" description = "Database Abstraction Library" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ - {file = "SQLAlchemy-1.4.48-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:4bac3aa3c3d8bc7408097e6fe8bf983caa6e9491c5d2e2488cfcfd8106f13b6a"}, - {file = "SQLAlchemy-1.4.48-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:dbcae0e528d755f4522cad5842f0942e54b578d79f21a692c44d91352ea6d64e"}, - {file = "SQLAlchemy-1.4.48-cp27-cp27m-win32.whl", hash = "sha256:cbbe8b8bffb199b225d2fe3804421b7b43a0d49983f81dc654d0431d2f855543"}, - {file = "SQLAlchemy-1.4.48-cp27-cp27m-win_amd64.whl", hash = "sha256:627e04a5d54bd50628fc8734d5fc6df2a1aa5962f219c44aad50b00a6cdcf965"}, - {file = "SQLAlchemy-1.4.48-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9af1db7a287ef86e0f5cd990b38da6bd9328de739d17e8864f1817710da2d217"}, - {file = "SQLAlchemy-1.4.48-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:ce7915eecc9c14a93b73f4e1c9d779ca43e955b43ddf1e21df154184f39748e5"}, - {file = "SQLAlchemy-1.4.48-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5381ddd09a99638f429f4cbe1b71b025bed318f6a7b23e11d65f3eed5e181c33"}, - {file = "SQLAlchemy-1.4.48-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:87609f6d4e81a941a17e61a4c19fee57f795e96f834c4f0a30cee725fc3f81d9"}, - {file = "SQLAlchemy-1.4.48-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb0808ad34167f394fea21bd4587fc62f3bd81bba232a1e7fbdfa17e6cfa7cd7"}, - {file = "SQLAlchemy-1.4.48-cp310-cp310-win32.whl", hash = "sha256:d53cd8bc582da5c1c8c86b6acc4ef42e20985c57d0ebc906445989df566c5603"}, - {file = "SQLAlchemy-1.4.48-cp310-cp310-win_amd64.whl", hash = "sha256:4355e5915844afdc5cf22ec29fba1010166e35dd94a21305f49020022167556b"}, - {file = "SQLAlchemy-1.4.48-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:066c2b0413e8cb980e6d46bf9d35ca83be81c20af688fedaef01450b06e4aa5e"}, - {file = "SQLAlchemy-1.4.48-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c99bf13e07140601d111a7c6f1fc1519914dd4e5228315bbda255e08412f61a4"}, - {file = "SQLAlchemy-1.4.48-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ee26276f12614d47cc07bc85490a70f559cba965fb178b1c45d46ffa8d73fda"}, - {file = "SQLAlchemy-1.4.48-cp311-cp311-win32.whl", hash = "sha256:49c312bcff4728bffc6fb5e5318b8020ed5c8b958a06800f91859fe9633ca20e"}, - {file = "SQLAlchemy-1.4.48-cp311-cp311-win_amd64.whl", hash = "sha256:cef2e2abc06eab187a533ec3e1067a71d7bbec69e582401afdf6d8cad4ba3515"}, - {file = "SQLAlchemy-1.4.48-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:3509159e050bd6d24189ec7af373359f07aed690db91909c131e5068176c5a5d"}, - {file = "SQLAlchemy-1.4.48-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fc2ab4d9f6d9218a5caa4121bdcf1125303482a1cdcfcdbd8567be8518969c0"}, - {file = "SQLAlchemy-1.4.48-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e1ddbbcef9bcedaa370c03771ebec7e39e3944782bef49e69430383c376a250b"}, - {file = "SQLAlchemy-1.4.48-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f82d8efea1ca92b24f51d3aea1a82897ed2409868a0af04247c8c1e4fef5890"}, - {file = "SQLAlchemy-1.4.48-cp36-cp36m-win32.whl", hash = "sha256:e3e98d4907805b07743b583a99ecc58bf8807ecb6985576d82d5e8ae103b5272"}, - {file = "SQLAlchemy-1.4.48-cp36-cp36m-win_amd64.whl", hash = "sha256:25887b4f716e085a1c5162f130b852f84e18d2633942c8ca40dfb8519367c14f"}, - {file = "SQLAlchemy-1.4.48-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:0817c181271b0ce5df1aa20949f0a9e2426830fed5ecdcc8db449618f12c2730"}, - {file = "SQLAlchemy-1.4.48-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe1dd2562313dd9fe1778ed56739ad5d9aae10f9f43d9f4cf81d65b0c85168bb"}, - {file = "SQLAlchemy-1.4.48-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:68413aead943883b341b2b77acd7a7fe2377c34d82e64d1840860247cec7ff7c"}, - {file = "SQLAlchemy-1.4.48-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbde5642104ac6e95f96e8ad6d18d9382aa20672008cf26068fe36f3004491df"}, - {file = "SQLAlchemy-1.4.48-cp37-cp37m-win32.whl", hash = "sha256:11c6b1de720f816c22d6ad3bbfa2f026f89c7b78a5c4ffafb220e0183956a92a"}, - {file = "SQLAlchemy-1.4.48-cp37-cp37m-win_amd64.whl", hash = "sha256:eb5464ee8d4bb6549d368b578e9529d3c43265007193597ddca71c1bae6174e6"}, - {file = "SQLAlchemy-1.4.48-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:92e6133cf337c42bfee03ca08c62ba0f2d9695618c8abc14a564f47503157be9"}, - {file = "SQLAlchemy-1.4.48-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44d29a3fc6d9c45962476b470a81983dd8add6ad26fdbfae6d463b509d5adcda"}, - {file = "SQLAlchemy-1.4.48-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:005e942b451cad5285015481ae4e557ff4154dde327840ba91b9ac379be3b6ce"}, - {file = "SQLAlchemy-1.4.48-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c8cfe951ed074ba5e708ed29c45397a95c4143255b0d022c7c8331a75ae61f3"}, - {file = "SQLAlchemy-1.4.48-cp38-cp38-win32.whl", hash = "sha256:2b9af65cc58726129d8414fc1a1a650dcdd594ba12e9c97909f1f57d48e393d3"}, - {file = "SQLAlchemy-1.4.48-cp38-cp38-win_amd64.whl", hash = "sha256:2b562e9d1e59be7833edf28b0968f156683d57cabd2137d8121806f38a9d58f4"}, - {file = "SQLAlchemy-1.4.48-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:a1fc046756cf2a37d7277c93278566ddf8be135c6a58397b4c940abf837011f4"}, - {file = "SQLAlchemy-1.4.48-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d9b55252d2ca42a09bcd10a697fa041e696def9dfab0b78c0aaea1485551a08"}, - {file = "SQLAlchemy-1.4.48-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6dab89874e72a9ab5462997846d4c760cdb957958be27b03b49cf0de5e5c327c"}, - {file = "SQLAlchemy-1.4.48-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1fd8b5ee5a3acc4371f820934b36f8109ce604ee73cc668c724abb054cebcb6e"}, - {file = "SQLAlchemy-1.4.48-cp39-cp39-win32.whl", hash = "sha256:eee09350fd538e29cfe3a496ec6f148504d2da40dbf52adefb0d2f8e4d38ccc4"}, - {file = "SQLAlchemy-1.4.48-cp39-cp39-win_amd64.whl", hash = "sha256:7ad2b0f6520ed5038e795cc2852eb5c1f20fa6831d73301ced4aafbe3a10e1f6"}, - {file = "SQLAlchemy-1.4.48.tar.gz", hash = "sha256:b47bc287096d989a0838ce96f7d8e966914a24da877ed41a7531d44b55cdb8df"}, + {file = "SQLAlchemy-1.4.49-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2e126cf98b7fd38f1e33c64484406b78e937b1a280e078ef558b95bf5b6895f6"}, + {file = "SQLAlchemy-1.4.49-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:03db81b89fe7ef3857b4a00b63dedd632d6183d4ea5a31c5d8a92e000a41fc71"}, + {file = "SQLAlchemy-1.4.49-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:95b9df9afd680b7a3b13b38adf6e3a38995da5e162cc7524ef08e3be4e5ed3e1"}, + {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a63e43bf3f668c11bb0444ce6e809c1227b8f067ca1068898f3008a273f52b09"}, + {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f835c050ebaa4e48b18403bed2c0fda986525896efd76c245bdd4db995e51a4c"}, + {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c21b172dfb22e0db303ff6419451f0cac891d2e911bb9fbf8003d717f1bcf91"}, + {file = "SQLAlchemy-1.4.49-cp310-cp310-win32.whl", hash = "sha256:5fb1ebdfc8373b5a291485757bd6431de8d7ed42c27439f543c81f6c8febd729"}, + {file = "SQLAlchemy-1.4.49-cp310-cp310-win_amd64.whl", hash = "sha256:f8a65990c9c490f4651b5c02abccc9f113a7f56fa482031ac8cb88b70bc8ccaa"}, + {file = "SQLAlchemy-1.4.49-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8923dfdf24d5aa8a3adb59723f54118dd4fe62cf59ed0d0d65d940579c1170a4"}, + {file = "SQLAlchemy-1.4.49-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9ab2c507a7a439f13ca4499db6d3f50423d1d65dc9b5ed897e70941d9e135b0"}, + {file = "SQLAlchemy-1.4.49-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5debe7d49b8acf1f3035317e63d9ec8d5e4d904c6e75a2a9246a119f5f2fdf3d"}, + {file = "SQLAlchemy-1.4.49-cp311-cp311-win32.whl", hash = "sha256:82b08e82da3756765c2e75f327b9bf6b0f043c9c3925fb95fb51e1567fa4ee87"}, + {file = "SQLAlchemy-1.4.49-cp311-cp311-win_amd64.whl", hash = "sha256:171e04eeb5d1c0d96a544caf982621a1711d078dbc5c96f11d6469169bd003f1"}, + {file = "SQLAlchemy-1.4.49-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:36e58f8c4fe43984384e3fbe6341ac99b6b4e083de2fe838f0fdb91cebe9e9cb"}, + {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b31e67ff419013f99ad6f8fc73ee19ea31585e1e9fe773744c0f3ce58c039c30"}, + {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c14b29d9e1529f99efd550cd04dbb6db6ba5d690abb96d52de2bff4ed518bc95"}, + {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c40f3470e084d31247aea228aa1c39bbc0904c2b9ccbf5d3cfa2ea2dac06f26d"}, + {file = "SQLAlchemy-1.4.49-cp36-cp36m-win32.whl", hash = "sha256:706bfa02157b97c136547c406f263e4c6274a7b061b3eb9742915dd774bbc264"}, + {file = "SQLAlchemy-1.4.49-cp36-cp36m-win_amd64.whl", hash = "sha256:a7f7b5c07ae5c0cfd24c2db86071fb2a3d947da7bd487e359cc91e67ac1c6d2e"}, + {file = "SQLAlchemy-1.4.49-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:4afbbf5ef41ac18e02c8dc1f86c04b22b7a2125f2a030e25bbb4aff31abb224b"}, + {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24e300c0c2147484a002b175f4e1361f102e82c345bf263242f0449672a4bccf"}, + {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:201de072b818f8ad55c80d18d1a788729cccf9be6d9dc3b9d8613b053cd4836d"}, + {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7653ed6817c710d0c95558232aba799307d14ae084cc9b1f4c389157ec50df5c"}, + {file = "SQLAlchemy-1.4.49-cp37-cp37m-win32.whl", hash = "sha256:647e0b309cb4512b1f1b78471fdaf72921b6fa6e750b9f891e09c6e2f0e5326f"}, + {file = "SQLAlchemy-1.4.49-cp37-cp37m-win_amd64.whl", hash = "sha256:ab73ed1a05ff539afc4a7f8cf371764cdf79768ecb7d2ec691e3ff89abbc541e"}, + {file = "SQLAlchemy-1.4.49-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:37ce517c011560d68f1ffb28af65d7e06f873f191eb3a73af5671e9c3fada08a"}, + {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1878ce508edea4a879015ab5215546c444233881301e97ca16fe251e89f1c55"}, + {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0e8e608983e6f85d0852ca61f97e521b62e67969e6e640fe6c6b575d4db68557"}, + {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ccf956da45290df6e809ea12c54c02ace7f8ff4d765d6d3dfb3655ee876ce58d"}, + {file = "SQLAlchemy-1.4.49-cp38-cp38-win32.whl", hash = "sha256:f167c8175ab908ce48bd6550679cc6ea20ae169379e73c7720a28f89e53aa532"}, + {file = "SQLAlchemy-1.4.49-cp38-cp38-win_amd64.whl", hash = "sha256:45806315aae81a0c202752558f0df52b42d11dd7ba0097bf71e253b4215f34f4"}, + {file = "SQLAlchemy-1.4.49-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:b6d0c4b15d65087738a6e22e0ff461b407533ff65a73b818089efc8eb2b3e1de"}, + {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a843e34abfd4c797018fd8d00ffffa99fd5184c421f190b6ca99def4087689bd"}, + {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1c890421651b45a681181301b3497e4d57c0d01dc001e10438a40e9a9c25ee77"}, + {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d26f280b8f0a8f497bc10573849ad6dc62e671d2468826e5c748d04ed9e670d5"}, + {file = "SQLAlchemy-1.4.49-cp39-cp39-win32.whl", hash = "sha256:ec2268de67f73b43320383947e74700e95c6770d0c68c4e615e9897e46296294"}, + {file = "SQLAlchemy-1.4.49-cp39-cp39-win_amd64.whl", hash = "sha256:bbdf16372859b8ed3f4d05f925a984771cd2abd18bd187042f24be4886c2a15f"}, + {file = "SQLAlchemy-1.4.49.tar.gz", hash = "sha256:06ff25cbae30c396c4b7737464f2a7fc37a67b7da409993b182b024cec80aed9"}, ] [package.dependencies] @@ -1190,46 +1187,63 @@ files = [ [[package]] name = "typed-ast" -version = "1.5.4" +version = "1.5.5" description = "a fork of Python 2 and 3 ast modules with type comment support" optional = false python-versions = ">=3.6" files = [ - {file = "typed_ast-1.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:669dd0c4167f6f2cd9f57041e03c3c2ebf9063d0757dc89f79ba1daa2bfca9d4"}, - {file = "typed_ast-1.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:211260621ab1cd7324e0798d6be953d00b74e0428382991adfddb352252f1d62"}, - {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:267e3f78697a6c00c689c03db4876dd1efdfea2f251a5ad6555e82a26847b4ac"}, - {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c542eeda69212fa10a7ada75e668876fdec5f856cd3d06829e6aa64ad17c8dfe"}, - {file = "typed_ast-1.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:a9916d2bb8865f973824fb47436fa45e1ebf2efd920f2b9f99342cb7fab93f72"}, - {file = "typed_ast-1.5.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:79b1e0869db7c830ba6a981d58711c88b6677506e648496b1f64ac7d15633aec"}, - {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a94d55d142c9265f4ea46fab70977a1944ecae359ae867397757d836ea5a3f47"}, - {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:183afdf0ec5b1b211724dfef3d2cad2d767cbefac291f24d69b00546c1837fb6"}, - {file = "typed_ast-1.5.4-cp36-cp36m-win_amd64.whl", hash = "sha256:639c5f0b21776605dd6c9dbe592d5228f021404dafd377e2b7ac046b0349b1a1"}, - {file = "typed_ast-1.5.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cf4afcfac006ece570e32d6fa90ab74a17245b83dfd6655a6f68568098345ff6"}, - {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed855bbe3eb3715fca349c80174cfcfd699c2f9de574d40527b8429acae23a66"}, - {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6778e1b2f81dfc7bc58e4b259363b83d2e509a65198e85d5700dfae4c6c8ff1c"}, - {file = "typed_ast-1.5.4-cp37-cp37m-win_amd64.whl", hash = "sha256:0261195c2062caf107831e92a76764c81227dae162c4f75192c0d489faf751a2"}, - {file = "typed_ast-1.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2efae9db7a8c05ad5547d522e7dbe62c83d838d3906a3716d1478b6c1d61388d"}, - {file = "typed_ast-1.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7d5d014b7daa8b0bf2eaef684295acae12b036d79f54178b92a2b6a56f92278f"}, - {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:370788a63915e82fd6f212865a596a0fefcbb7d408bbbb13dea723d971ed8bdc"}, - {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4e964b4ff86550a7a7d56345c7864b18f403f5bd7380edf44a3c1fb4ee7ac6c6"}, - {file = "typed_ast-1.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:683407d92dc953c8a7347119596f0b0e6c55eb98ebebd9b23437501b28dcbb8e"}, - {file = "typed_ast-1.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4879da6c9b73443f97e731b617184a596ac1235fe91f98d279a7af36c796da35"}, - {file = "typed_ast-1.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e123d878ba170397916557d31c8f589951e353cc95fb7f24f6bb69adc1a8a97"}, - {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebd9d7f80ccf7a82ac5f88c521115cc55d84e35bf8b446fcd7836eb6b98929a3"}, - {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98f80dee3c03455e92796b58b98ff6ca0b2a6f652120c263efdba4d6c5e58f72"}, - {file = "typed_ast-1.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:0fdbcf2fef0ca421a3f5912555804296f0b0960f0418c440f5d6d3abb549f3e1"}, - {file = "typed_ast-1.5.4.tar.gz", hash = "sha256:39e21ceb7388e4bb37f4c679d72707ed46c2fbf2a5609b8b8ebc4b067d977df2"}, + {file = "typed_ast-1.5.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4bc1efe0ce3ffb74784e06460f01a223ac1f6ab31c6bc0376a21184bf5aabe3b"}, + {file = "typed_ast-1.5.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5f7a8c46a8b333f71abd61d7ab9255440d4a588f34a21f126bbfc95f6049e686"}, + {file = "typed_ast-1.5.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:597fc66b4162f959ee6a96b978c0435bd63791e31e4f410622d19f1686d5e769"}, + {file = "typed_ast-1.5.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d41b7a686ce653e06c2609075d397ebd5b969d821b9797d029fccd71fdec8e04"}, + {file = "typed_ast-1.5.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5fe83a9a44c4ce67c796a1b466c270c1272e176603d5e06f6afbc101a572859d"}, + {file = "typed_ast-1.5.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d5c0c112a74c0e5db2c75882a0adf3133adedcdbfd8cf7c9d6ed77365ab90a1d"}, + {file = "typed_ast-1.5.5-cp310-cp310-win_amd64.whl", hash = "sha256:e1a976ed4cc2d71bb073e1b2a250892a6e968ff02aa14c1f40eba4f365ffec02"}, + {file = "typed_ast-1.5.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c631da9710271cb67b08bd3f3813b7af7f4c69c319b75475436fcab8c3d21bee"}, + {file = "typed_ast-1.5.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b445c2abfecab89a932b20bd8261488d574591173d07827c1eda32c457358b18"}, + {file = "typed_ast-1.5.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc95ffaaab2be3b25eb938779e43f513e0e538a84dd14a5d844b8f2932593d88"}, + {file = "typed_ast-1.5.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61443214d9b4c660dcf4b5307f15c12cb30bdfe9588ce6158f4a005baeb167b2"}, + {file = "typed_ast-1.5.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6eb936d107e4d474940469e8ec5b380c9b329b5f08b78282d46baeebd3692dc9"}, + {file = "typed_ast-1.5.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e48bf27022897577d8479eaed64701ecaf0467182448bd95759883300ca818c8"}, + {file = "typed_ast-1.5.5-cp311-cp311-win_amd64.whl", hash = "sha256:83509f9324011c9a39faaef0922c6f720f9623afe3fe220b6d0b15638247206b"}, + {file = "typed_ast-1.5.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:44f214394fc1af23ca6d4e9e744804d890045d1643dd7e8229951e0ef39429b5"}, + {file = "typed_ast-1.5.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:118c1ce46ce58fda78503eae14b7664163aa735b620b64b5b725453696f2a35c"}, + {file = "typed_ast-1.5.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be4919b808efa61101456e87f2d4c75b228f4e52618621c77f1ddcaae15904fa"}, + {file = "typed_ast-1.5.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:fc2b8c4e1bc5cd96c1a823a885e6b158f8451cf6f5530e1829390b4d27d0807f"}, + {file = "typed_ast-1.5.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:16f7313e0a08c7de57f2998c85e2a69a642e97cb32f87eb65fbfe88381a5e44d"}, + {file = "typed_ast-1.5.5-cp36-cp36m-win_amd64.whl", hash = "sha256:2b946ef8c04f77230489f75b4b5a4a6f24c078be4aed241cfabe9cbf4156e7e5"}, + {file = "typed_ast-1.5.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2188bc33d85951ea4ddad55d2b35598b2709d122c11c75cffd529fbc9965508e"}, + {file = "typed_ast-1.5.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0635900d16ae133cab3b26c607586131269f88266954eb04ec31535c9a12ef1e"}, + {file = "typed_ast-1.5.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57bfc3cf35a0f2fdf0a88a3044aafaec1d2f24d8ae8cd87c4f58d615fb5b6311"}, + {file = "typed_ast-1.5.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:fe58ef6a764de7b4b36edfc8592641f56e69b7163bba9f9c8089838ee596bfb2"}, + {file = "typed_ast-1.5.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d09d930c2d1d621f717bb217bf1fe2584616febb5138d9b3e8cdd26506c3f6d4"}, + {file = "typed_ast-1.5.5-cp37-cp37m-win_amd64.whl", hash = "sha256:d40c10326893ecab8a80a53039164a224984339b2c32a6baf55ecbd5b1df6431"}, + {file = "typed_ast-1.5.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fd946abf3c31fb50eee07451a6aedbfff912fcd13cf357363f5b4e834cc5e71a"}, + {file = "typed_ast-1.5.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ed4a1a42df8a3dfb6b40c3d2de109e935949f2f66b19703eafade03173f8f437"}, + {file = "typed_ast-1.5.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:045f9930a1550d9352464e5149710d56a2aed23a2ffe78946478f7b5416f1ede"}, + {file = "typed_ast-1.5.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:381eed9c95484ceef5ced626355fdc0765ab51d8553fec08661dce654a935db4"}, + {file = "typed_ast-1.5.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bfd39a41c0ef6f31684daff53befddae608f9daf6957140228a08e51f312d7e6"}, + {file = "typed_ast-1.5.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8c524eb3024edcc04e288db9541fe1f438f82d281e591c548903d5b77ad1ddd4"}, + {file = "typed_ast-1.5.5-cp38-cp38-win_amd64.whl", hash = "sha256:7f58fabdde8dcbe764cef5e1a7fcb440f2463c1bbbec1cf2a86ca7bc1f95184b"}, + {file = "typed_ast-1.5.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:042eb665ff6bf020dd2243307d11ed626306b82812aba21836096d229fdc6a10"}, + {file = "typed_ast-1.5.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:622e4a006472b05cf6ef7f9f2636edc51bda670b7bbffa18d26b255269d3d814"}, + {file = "typed_ast-1.5.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1efebbbf4604ad1283e963e8915daa240cb4bf5067053cf2f0baadc4d4fb51b8"}, + {file = "typed_ast-1.5.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0aefdd66f1784c58f65b502b6cf8b121544680456d1cebbd300c2c813899274"}, + {file = "typed_ast-1.5.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:48074261a842acf825af1968cd912f6f21357316080ebaca5f19abbb11690c8a"}, + {file = "typed_ast-1.5.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:429ae404f69dc94b9361bb62291885894b7c6fb4640d561179548c849f8492ba"}, + {file = "typed_ast-1.5.5-cp39-cp39-win_amd64.whl", hash = "sha256:335f22ccb244da2b5c296e6f96b06ee9bed46526db0de38d2f0e5a6597b81155"}, + {file = "typed_ast-1.5.5.tar.gz", hash = "sha256:94282f7a354f36ef5dbce0ef3467ebf6a258e370ab33d5b40c249fa996e590dd"}, ] [[package]] name = "typing-extensions" -version = "4.6.3" +version = "4.7.1" description = "Backported and Experimental Type Hints for Python 3.7+" optional = false python-versions = ">=3.7" files = [ - {file = "typing_extensions-4.6.3-py3-none-any.whl", hash = "sha256:88a4153d8505aabbb4e13aacb7c486c2b4a33ca3b3f807914a9b4c844c471c26"}, - {file = "typing_extensions-4.6.3.tar.gz", hash = "sha256:d91d5919357fe7f681a9f2b5b4cb2a5f1ef0a1e9f59c4d8ff0d3491e05c0ffd5"}, + {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, + {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, ] [[package]] @@ -1245,13 +1259,13 @@ files = [ [[package]] name = "urllib3" -version = "2.0.3" +version = "2.0.4" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.7" files = [ - {file = "urllib3-2.0.3-py3-none-any.whl", hash = "sha256:48e7fafa40319d358848e1bc6809b208340fafe2096f1725d05d67443d0483d1"}, - {file = "urllib3-2.0.3.tar.gz", hash = "sha256:bee28b5e56addb8226c96f7f13ac28cb4c301dd5ea8a6ca179c0b9835e032825"}, + {file = "urllib3-2.0.4-py3-none-any.whl", hash = "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4"}, + {file = "urllib3-2.0.4.tar.gz", hash = "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11"}, ] [package.extras] @@ -1362,4 +1376,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = "^3.7.1" -content-hash = "063bfda7ab42a302be9e025266582e8532582f522e61505c4a90e25345a5638e" +content-hash = "8e61ec31838813ee794b06670a32118e6089b13c99b86a8da6850066cedbac2c" diff --git a/pyproject.toml b/pyproject.toml index 90db58d9..e795dfc5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,31 +5,31 @@ description = "Databricks SQL Connector for Python" authors = ["Databricks "] license = "Apache-2.0" readme = "README.md" -packages = [{include = "databricks", from = "src"}] +packages = [{ include = "databricks", from = "src" }] include = ["CHANGELOG.md"] [tool.poetry.dependencies] python = "^3.7.1" thrift = "^0.16.0" pandas = [ - {version = ">=1.2.5,<1.4.0", python = ">=3.7,<3.8"}, - {version =">=1.2.5,<3.0.0", python = ">=3.8"} + { version = ">=1.2.5,<1.4.0", python = ">=3.7,<3.8" }, + { version = ">=1.2.5,<3.0.0", python = ">=3.8" }, ] - pyarrow = [ - {version = ">=6.0.0", python = ">=3.7,<3.11"}, - {version = ">=10.0.1", python = ">=3.11"} + { version = ">=6.0.0", python = ">=3.7,<3.11" }, + { version = ">=10.0.1", python = ">=3.11" }, ] lz4 = "^4.0.2" -requests="^2.18.1" -oauthlib="^3.1.0" +requests = "^2.18.1" +oauthlib = "^3.1.0" numpy = [ - {version = ">=1.16.6", python = ">=3.7,<3.11"}, - {version = ">=1.23.4", python = ">=3.11"} + { version = ">=1.16.6", python = ">=3.7,<3.11" }, + { version = ">=1.23.4", python = ">=3.11" }, ] sqlalchemy = "^1.3.24" openpyxl = "^3.0.10" alembic = "^1.0.11" +urllib3 = "^2.0.0" [tool.poetry.dev-dependencies] pytest = "^7.1.2" @@ -60,9 +60,5 @@ exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck minversion = "6.0" log_cli = "false" log_cli_level = "INFO" -testpaths = [ - "tests" -] -env_files = [ - "test.env" -] +testpaths = ["tests"] +env_files = ["test.env"] From 7aaa014172cb1c1e32a7776f81f6b75d66d4717a Mon Sep 17 00:00:00 2001 From: Jesse Date: Fri, 11 Aug 2023 13:29:58 -0400 Subject: [PATCH 33/40] Bump to 2.9.1 (#195) Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 2 ++ pyproject.toml | 2 +- src/databricks/sql/__init__.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 195204cf..1fe5476d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## 2.9.x (Unreleased) +## 2.9.1 (2023-08-11) + - Other: Explicitly pin urllib3 to ^2.0.0 ## 2.9.0 (2023-08-10) diff --git a/pyproject.toml b/pyproject.toml index e795dfc5..5f24cdc9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databricks-sql-connector" -version = "2.9.0" +version = "2.9.1" description = "Databricks SQL Connector for Python" authors = ["Databricks "] license = "Apache-2.0" diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py index c55d17ca..0be19d5b 100644 --- a/src/databricks/sql/__init__.py +++ b/src/databricks/sql/__init__.py @@ -28,7 +28,7 @@ def __repr__(self): DATE = DBAPITypeObject("date") ROWID = DBAPITypeObject() -__version__ = "2.9.0" +__version__ = "2.9.1" USER_AGENT_NAME = "PyDatabricksSqlConnector" # These two functions are pyhive legacy From d28a6921a6cbd72b938d739ddc4532ad5a2958d6 Mon Sep 17 00:00:00 2001 From: Jesse Date: Wed, 16 Aug 2023 15:01:17 -0400 Subject: [PATCH 34/40] Make backwards compatible with urllib3~=1.0 (#197) Signed-off-by: Jesse Whitehouse --- pyproject.toml | 2 +- src/databricks/sql/auth/retry.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5f24cdc9..f3c16509 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ numpy = [ sqlalchemy = "^1.3.24" openpyxl = "^3.0.10" alembic = "^1.0.11" -urllib3 = "^2.0.0" +urllib3 = ">=1.0" [tool.poetry.dev-dependencies] pytest = "^7.1.2" diff --git a/src/databricks/sql/auth/retry.py b/src/databricks/sql/auth/retry.py index 182c7137..548b3622 100644 --- a/src/databricks/sql/auth/retry.py +++ b/src/databricks/sql/auth/retry.py @@ -4,7 +4,13 @@ from enum import Enum from typing import List, Optional, Tuple, Union -from urllib3 import BaseHTTPResponse # type: ignore +# We only use this import for type hinting +try: + # If urllib3~=2.0 is installed + from urllib3 import BaseHTTPResponse # type: ignore +except ImportError: + # If urllib3~=1.0 is installed + from urllib3 import HTTPResponse as BaseHTTPResponse from urllib3 import Retry from urllib3.util.retry import RequestHistory From 871294e01b0b532fc3c2274aff56e72d6d252cdc Mon Sep 17 00:00:00 2001 From: Jesse Date: Thu, 17 Aug 2023 08:51:20 -0400 Subject: [PATCH 35/40] Convenience improvements to v3 retry logic (#199) Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 4 ++++ examples/README.md | 3 ++- examples/v3_retries_query_execute.py | 35 ++++++++++++++++++++++++++++ src/databricks/sql/thrift_backend.py | 7 +++--- 4 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 examples/v3_retries_query_execute.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fe5476d..e14d4700 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## 2.9.x (Unreleased) +- Other: Add `examples/v3_retries_query_execute.py` +- Other: suppress log message when `_enable_v3_retries` is not `True` +- Other: make this connector backwards compatible with `urllib3>=1.0.0` + ## 2.9.1 (2023-08-11) - Other: Explicitly pin urllib3 to ^2.0.0 diff --git a/examples/README.md b/examples/README.md index 4fbe8527..bcf031fa 100644 --- a/examples/README.md +++ b/examples/README.md @@ -38,4 +38,5 @@ To run all of these examples you can clone the entire repository to your disk. O this example the string `ExamplePartnerTag` will be added to the the user agent on every request. - **`staging_ingestion.py`** shows how the connector handles Databricks' experimental staging ingestion commands `GET`, `PUT`, and `REMOVE`. - **`sqlalchemy.py`** shows a basic example of connecting to Databricks with [SQLAlchemy](https://www.sqlalchemy.org/). -- **`custom_cred_provider.py`** shows how to pass a custom credential provider to bypass connector authentication. Please install databricks-sdk prior to running this example. \ No newline at end of file +- **`custom_cred_provider.py`** shows how to pass a custom credential provider to bypass connector authentication. Please install databricks-sdk prior to running this example. +- **`v3_retries_query_execute.py`** shows how to enable v3 retries in connector version 2.9.x including how to enable retries for non-default retry cases. \ No newline at end of file diff --git a/examples/v3_retries_query_execute.py b/examples/v3_retries_query_execute.py new file mode 100644 index 00000000..377cebfb --- /dev/null +++ b/examples/v3_retries_query_execute.py @@ -0,0 +1,35 @@ +from databricks import sql +import os + +# Users of connector versions >= 2.9.0 and <= 3.0.0 can use the v3 retry behaviour by setting _enable_v3_retries=True +# This flag will be deprecated in databricks-sql-connector~=3.0.0 as it will become the default. +# +# The new retry behaviour is defined in src/databricks/sql/auth/retry.py +# +# The new retry behaviour allows users to force the connector to automatically retry requests that fail with codes +# that are not retried by default (in most cases only codes 429 and 503 are retried by default). Additional HTTP +# codes to retry are specified as a list passed to `_retry_dangerous_codes`. +# +# Note that, as implied in the name, doing this is *dangerous* and should not be configured in all usages. +# With the default behaviour, ExecuteStatement Thrift commands are only retried for codes 429 and 503 because +# we can be certain at run-time that the statement never reached Databricks compute. These codes are returned by +# the SQL gateway / load balancer. So there is no risk that retrying the request would result in a doubled +# (or tripled etc) command execution. These codes are always accompanied by a Retry-After header, which we honour. +# +# However, if your use-case emits idempotent queries such as SELECT statements, it can be helpful to retry +# for 502 (Bad Gateway) codes etc. In these cases, there is a possibility that the initial command _did_ reach +# Databricks compute and retrying it could result in additional executions. Retrying under these conditions uses +# an exponential back-off since a Retry-After header is not present. + +with sql.connect(server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME"), + http_path = os.getenv("DATABRICKS_HTTP_PATH"), + access_token = os.getenv("DATABRICKS_TOKEN"), + _enable_v3_retries = True, + _retry_dangerous_codes=[502,400]) as connection: + + with connection.cursor() as cursor: + cursor.execute("SELECT * FROM default.diamonds LIMIT 2") + result = cursor.fetchall() + + for row in result: + print(row) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 9f54aadb..4d07d671 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -375,9 +375,10 @@ def attempt_request(attempt): unsafe_logger.debug("Sending request: {}".format(request)) # These three lines are no-ops if the v3 retry policy is not in use - this_command_type = CommandType.get(this_method_name) - self._transport.set_retry_command_type(this_command_type) - self._transport.startRetryTimer() + if self.enable_v3_retries: + this_command_type = CommandType.get(this_method_name) + self._transport.set_retry_command_type(this_command_type) + self._transport.startRetryTimer() response = method(request) From 54a61026ce2bc6c983fbc1dfab0fa7c881ab271f Mon Sep 17 00:00:00 2001 From: Jesse Date: Fri, 18 Aug 2023 08:02:56 -0400 Subject: [PATCH 36/40] Bump version to 2.9.2 (#201) Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 2 ++ pyproject.toml | 2 +- src/databricks/sql/__init__.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e14d4700..7f9ea9d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## 2.9.x (Unreleased) +## 2.9.2 (2023-08-17) + - Other: Add `examples/v3_retries_query_execute.py` - Other: suppress log message when `_enable_v3_retries` is not `True` - Other: make this connector backwards compatible with `urllib3>=1.0.0` diff --git a/pyproject.toml b/pyproject.toml index f3c16509..cf5a13de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databricks-sql-connector" -version = "2.9.1" +version = "2.9.2" description = "Databricks SQL Connector for Python" authors = ["Databricks "] license = "Apache-2.0" diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py index 0be19d5b..6af6d227 100644 --- a/src/databricks/sql/__init__.py +++ b/src/databricks/sql/__init__.py @@ -28,7 +28,7 @@ def __repr__(self): DATE = DBAPITypeObject("date") ROWID = DBAPITypeObject() -__version__ = "2.9.1" +__version__ = "2.9.2" USER_AGENT_NAME = "PyDatabricksSqlConnector" # These two functions are pyhive legacy From a072574c0cbf4abdd1ad79d57144b8d98213b3f1 Mon Sep 17 00:00:00 2001 From: Jesse Date: Thu, 24 Aug 2023 11:49:14 -0400 Subject: [PATCH 37/40] Github Actions Fix: poetry install fails for python 3.7 tests (#208) snok/install-poetry@v1 installs the latest version of Poetry The latest version of poetry released on 20 August 2023 (four days ago as of this commit) which drops support for Python 3.7, causing our github action to fail. Until we complete #207 we need to conditionally install the last version of poetry that supports Python 3.7 (poetry==1.5.1) Signed-off-by: Jesse Whitehouse --- .github/workflows/code-quality-checks.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/code-quality-checks.yml b/.github/workflows/code-quality-checks.yml index fe47eb15..31c2b1fd 100644 --- a/.github/workflows/code-quality-checks.yml +++ b/.github/workflows/code-quality-checks.yml @@ -29,6 +29,7 @@ jobs: - name: Install Poetry uses: snok/install-poetry@v1 with: + version: ${{ matrix.python-version == 3.7 && '1.5.1' || 'latest' }} virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true @@ -80,6 +81,7 @@ jobs: - name: Install Poetry uses: snok/install-poetry@v1 with: + version: ${{ matrix.python-version == 3.7 && '1.5.1' || 'latest' }} virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true @@ -132,6 +134,7 @@ jobs: - name: Install Poetry uses: snok/install-poetry@v1 with: + version: ${{ matrix.python-version == 3.7 && '1.5.1' || 'latest' }} virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true From a918f13b76c07cacfcf6c5c3c90eb124654ae6bb Mon Sep 17 00:00:00 2001 From: Jesse Date: Thu, 24 Aug 2023 11:54:14 -0400 Subject: [PATCH 38/40] Make backwards compatible with urllib3~=1.0 [Follow up #197] (#206) * Make retry policy backwards compatible with urllib3~=1.0.0 We already implement the equivalent of backoff_max so the behaviour will be the same for urllib3==1.x and urllib3==2.x We do not implement backoff jitter so the behaviour for urllib3==1.x will NOT include backoff jitter whereas urllib3==2.x WILL include jitter. --------- Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 20 ++++++++++++-------- src/databricks/sql/auth/retry.py | 6 +----- tests/e2e/common/retry_test_mixins.py | 16 ++++++++++------ 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f9ea9d9..90096d8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,22 +1,26 @@ # Release History -## 2.9.x (Unreleased) +## 2.9.4 (Unreleased) + +## 2.9.3 (2023-08-24) + +- Fix: Connections failed when urllib3~=1.0.0 is installed (#206) ## 2.9.2 (2023-08-17) -- Other: Add `examples/v3_retries_query_execute.py` -- Other: suppress log message when `_enable_v3_retries` is not `True` -- Other: make this connector backwards compatible with `urllib3>=1.0.0` +- Other: Add `examples/v3_retries_query_execute.py` (#199) +- Other: suppress log message when `_enable_v3_retries` is not `True` (#199) +- Other: make this connector backwards compatible with `urllib3>=1.0.0` (#197) ## 2.9.1 (2023-08-11) -- Other: Explicitly pin urllib3 to ^2.0.0 +- Other: Explicitly pin urllib3 to ^2.0.0 (#191) ## 2.9.0 (2023-08-10) -- Replace retry handling with DatabricksRetryPolicy. This is disabled by default. To enable, set `enable_v3_retries=True` when creating `databricks.sql.client` -- Other: Fix typo in README quick start example -- Other: Add autospec to Client mocks and tidy up `make_request` +- Replace retry handling with DatabricksRetryPolicy. This is disabled by default. To enable, set `enable_v3_retries=True` when creating `databricks.sql.client` (#182) +- Other: Fix typo in README quick start example (#186) +- Other: Add autospec to Client mocks and tidy up `make_request` (#188) ## 2.8.0 (2023-07-21) diff --git a/src/databricks/sql/auth/retry.py b/src/databricks/sql/auth/retry.py index 548b3622..0b3ad175 100644 --- a/src/databricks/sql/auth/retry.py +++ b/src/databricks/sql/auth/retry.py @@ -56,8 +56,7 @@ class DatabricksRetryPolicy(Retry): `backoff_factor`. :param delay_max: - Float of seconds for the maximum delay between retries. This is an alias for urllib3's - `backoff_max` + Float of seconds for the maximum delay between retries. :param stop_after_attempts_count: Integer maximum number of attempts that will be retried. This is an alias for urllib3's @@ -122,7 +121,6 @@ def __init__( total=_attempts_remaining, respect_retry_after_header=True, backoff_factor=self.delay_min, - backoff_max=self.delay_max, allowed_methods=["POST"], status_forcelist=[429, 503, *self.force_dangerous_codes], ) @@ -212,13 +210,11 @@ def new(self, **urllib3_incremented_counters: typing.Any) -> Retry: allowed_methods=self.allowed_methods, status_forcelist=self.status_forcelist, backoff_factor=self.backoff_factor, # type: ignore - backoff_max=self.backoff_max, # type: ignore raise_on_redirect=self.raise_on_redirect, raise_on_status=self.raise_on_status, history=self.history, remove_headers_on_redirect=self.remove_headers_on_redirect, respect_retry_after_header=self.respect_retry_after_header, - backoff_jitter=self.backoff_jitter, # type: ignore ) # Update urllib3's current state to reflect the incremented counters diff --git a/tests/e2e/common/retry_test_mixins.py b/tests/e2e/common/retry_test_mixins.py index dfe15998..44b7afbf 100644 --- a/tests/e2e/common/retry_test_mixins.py +++ b/tests/e2e/common/retry_test_mixins.py @@ -58,14 +58,16 @@ def _test_retry_disabled_with_message(self, error_msg_substring, exception_type) @contextmanager -def mocked_server_response(status: int = 200, headers: dict = {"Retry-After": None}): +def mocked_server_response(status: int = 200, headers: dict = {}): """Context manager for patching urllib3 responses""" # When mocking mocking a BaseHTTPResponse for urllib3 the mock must include # 1. A status code # 2. A headers dict # 3. mock.get_redirect_location() return falsy - mock_response = MagicMock(headers=headers, status=status) + + # `msg` is included for testing when urllib3~=1.0.0 is installed + mock_response = MagicMock(headers=headers, msg=headers, status=status) mock_response.get_redirect_location.return_value = False with patch("urllib3.connectionpool.HTTPSConnectionPool._get_conn") as getconn_mock: @@ -91,7 +93,9 @@ def mock_sequential_server_responses(responses: List[dict]): # Each resp should have these members: for resp in responses: - _mock = MagicMock(headers=resp["headers"], status=resp["status"]) + _mock = MagicMock( + headers=resp["headers"], msg=resp["headers"], status=resp["status"] + ) _mock.get_redirect_location.return_value = False mock_responses.append(_mock) @@ -239,7 +243,7 @@ def test_retry_safe_execute_statement_retry_condition(self): responses = [ {"status": 429, "headers": {"Retry-After": "1"}}, - {"status": 503, "headers": {"Retry-After": None}}, + {"status": 503, "headers": {}}, ] with self.connection( @@ -262,7 +266,7 @@ def test_retry_abort_close_session_on_404(self): # Second response is a 404 because the session is no longer found responses = [ {"status": 502, "headers": {"Retry-After": "1"}}, - {"status": 404, "headers": {"Retry-After": None}}, + {"status": 404, "headers": {}}, ] with self.connection(extra_params={**self._retry_policy}) as conn: @@ -292,7 +296,7 @@ def test_retry_abort_close_operation_on_404(self): # Second response is a 404 because the session is no longer found responses = [ {"status": 502, "headers": {"Retry-After": "1"}}, - {"status": 404, "headers": {"Retry-After": None}}, + {"status": 404, "headers": {}}, ] with self.connection(extra_params={**self._retry_policy}) as conn: From a737ef3107f41d64803326cfe59f2fcacea88343 Mon Sep 17 00:00:00 2001 From: Jesse Date: Thu, 24 Aug 2023 13:44:36 -0400 Subject: [PATCH 39/40] Bump version to 2.9.3 (#209) --------- Signed-off-by: Jesse Whitehouse --- pyproject.toml | 2 +- src/databricks/sql/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cf5a13de..1565e0dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databricks-sql-connector" -version = "2.9.2" +version = "2.9.3" description = "Databricks SQL Connector for Python" authors = ["Databricks "] license = "Apache-2.0" diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py index 6af6d227..918bd47a 100644 --- a/src/databricks/sql/__init__.py +++ b/src/databricks/sql/__init__.py @@ -28,7 +28,7 @@ def __repr__(self): DATE = DBAPITypeObject("date") ROWID = DBAPITypeObject() -__version__ = "2.9.2" +__version__ = "2.9.3" USER_AGENT_NAME = "PyDatabricksSqlConnector" # These two functions are pyhive legacy From fddc9f936d4e03b8a11cdf0a51d4da8c1a942fd3 Mon Sep 17 00:00:00 2001 From: Javier Asensio Date: Fri, 2 Jun 2023 11:18:15 +0100 Subject: [PATCH 40/40] Add timeout hack to mitigate timeouts --- .github/workflows/code-quality-checks.yml | 166 ------------------ src/databricks/sql/auth/thrift_http_client.py | 1 + src/databricks/sql/thrift_backend.py | 109 ++++-------- 3 files changed, 34 insertions(+), 242 deletions(-) delete mode 100644 .github/workflows/code-quality-checks.yml diff --git a/.github/workflows/code-quality-checks.yml b/.github/workflows/code-quality-checks.yml deleted file mode 100644 index 31c2b1fd..00000000 --- a/.github/workflows/code-quality-checks.yml +++ /dev/null @@ -1,166 +0,0 @@ -name: Code Quality Checks -on: - push: - branches: - - main - pull_request: - branches: - - main -jobs: - run-unit-tests: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [3.7, 3.8, 3.9, "3.10", "3.11"] - steps: - #---------------------------------------------- - # check-out repo and set-up python - #---------------------------------------------- - - name: Check out repository - uses: actions/checkout@v2 - - name: Set up python ${{ matrix.python-version }} - id: setup-python - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - #---------------------------------------------- - # ----- install & configure poetry ----- - #---------------------------------------------- - - name: Install Poetry - uses: snok/install-poetry@v1 - with: - version: ${{ matrix.python-version == 3.7 && '1.5.1' || 'latest' }} - virtualenvs-create: true - virtualenvs-in-project: true - installer-parallel: true - - #---------------------------------------------- - # load cached venv if cache exists - #---------------------------------------------- - - name: Load cached venv - id: cached-poetry-dependencies - uses: actions/cache@v2 - with: - path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }} - #---------------------------------------------- - # install dependencies if cache does not exist - #---------------------------------------------- - - name: Install dependencies - if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction --no-root - #---------------------------------------------- - # install your root project, if required - #---------------------------------------------- - - name: Install library - run: poetry install --no-interaction - #---------------------------------------------- - # run test suite - #---------------------------------------------- - - name: Run tests - run: poetry run python -m pytest tests/unit - check-linting: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [3.7, 3.8, 3.9, "3.10"] - steps: - #---------------------------------------------- - # check-out repo and set-up python - #---------------------------------------------- - - name: Check out repository - uses: actions/checkout@v2 - - name: Set up python ${{ matrix.python-version }} - id: setup-python - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - #---------------------------------------------- - # ----- install & configure poetry ----- - #---------------------------------------------- - - name: Install Poetry - uses: snok/install-poetry@v1 - with: - version: ${{ matrix.python-version == 3.7 && '1.5.1' || 'latest' }} - virtualenvs-create: true - virtualenvs-in-project: true - installer-parallel: true - - #---------------------------------------------- - # load cached venv if cache exists - #---------------------------------------------- - - name: Load cached venv - id: cached-poetry-dependencies - uses: actions/cache@v2 - with: - path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }} - #---------------------------------------------- - # install dependencies if cache does not exist - #---------------------------------------------- - - name: Install dependencies - if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction --no-root - #---------------------------------------------- - # install your root project, if required - #---------------------------------------------- - - name: Install library - run: poetry install --no-interaction - #---------------------------------------------- - # black the code - #---------------------------------------------- - - name: Black - run: poetry run black --check src - - check-types: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [3.7, 3.8, 3.9, "3.10"] - steps: - #---------------------------------------------- - # check-out repo and set-up python - #---------------------------------------------- - - name: Check out repository - uses: actions/checkout@v2 - - name: Set up python ${{ matrix.python-version }} - id: setup-python - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - #---------------------------------------------- - # ----- install & configure poetry ----- - #---------------------------------------------- - - name: Install Poetry - uses: snok/install-poetry@v1 - with: - version: ${{ matrix.python-version == 3.7 && '1.5.1' || 'latest' }} - virtualenvs-create: true - virtualenvs-in-project: true - installer-parallel: true - - #---------------------------------------------- - # load cached venv if cache exists - #---------------------------------------------- - - name: Load cached venv - id: cached-poetry-dependencies - uses: actions/cache@v2 - with: - path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }} - #---------------------------------------------- - # install dependencies if cache does not exist - #---------------------------------------------- - - name: Install dependencies - if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction --no-root - #---------------------------------------------- - # install your root project, if required - #---------------------------------------------- - - name: Install library - run: poetry install --no-interaction - #---------------------------------------------- - # black the code - #---------------------------------------------- - - name: Mypy - run: poetry run mypy --install-types --non-interactive src diff --git a/src/databricks/sql/auth/thrift_http_client.py b/src/databricks/sql/auth/thrift_http_client.py index 11589258..0a3651b9 100644 --- a/src/databricks/sql/auth/thrift_http_client.py +++ b/src/databricks/sql/auth/thrift_http_client.py @@ -5,6 +5,7 @@ import six import thrift +import thrift.transport.THttpClient logger = logging.getLogger(__name__) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 4d07d671..04a719c0 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -1,16 +1,18 @@ -from decimal import Decimal import errno import logging import math +import os import time import uuid import threading from ssl import CERT_NONE, CERT_REQUIRED, create_default_context from typing import List, Union +import databricks.sql.auth.thrift_http_client +import lz4.frame import pyarrow -import thrift.transport.THttpClient import thrift.protocol.TBinaryProtocol +import thrift.transport.THttpClient import thrift.transport.TSocket import thrift.transport.TTransport @@ -54,6 +56,10 @@ DATABRICKS_REASON_HEADER = "x-databricks-reason-phrase" TIMESTAMP_AS_STRING_CONFIG = "spark.thriftserver.arrowBasedRowSet.timestampAsString" + +# HACK! +THRIFT_SOCKET_TIMEOUT = os.getenv("THRIFT_SOCKET_TIMEOUT", None) + DEFAULT_SOCKET_TIMEOUT = float(900) # see Connection.__init__ for parameter descriptions. @@ -145,13 +151,9 @@ def __init__( self.staging_allowed_local_path = staging_allowed_local_path self._initialize_retry_args(kwargs) - self._use_arrow_native_complex_types = kwargs.get( - "_use_arrow_native_complex_types", True - ) + self._use_arrow_native_complex_types = kwargs.get("_use_arrow_native_complex_types", True) self._use_arrow_native_decimals = kwargs.get("_use_arrow_native_decimals", True) - self._use_arrow_native_timestamps = kwargs.get( - "_use_arrow_native_timestamps", True - ) + self._use_arrow_native_timestamps = kwargs.get("_use_arrow_native_timestamps", True) # Cloud fetch self.max_download_threads = kwargs.get("max_download_threads", 10) @@ -204,7 +206,7 @@ def __init__( **additional_transport_args, # type: ignore ) - timeout = kwargs.get("_socket_timeout", DEFAULT_SOCKET_TIMEOUT) + timeout = THRIFT_SOCKET_TIMEOUT or kwargs.get("_socket_timeout", DEFAULT_SOCKET_TIMEOUT) # setTimeout defaults to 15 minutes and is expected in ms self._transport.setTimeout(timeout and (float(timeout) * 1000.0)) @@ -228,15 +230,11 @@ def _initialize_retry_args(self, kwargs): given_or_default = type_(kwargs.get(key, default)) bound = _bound(min, max, given_or_default) setattr(self, key, bound) - logger.debug( - "retry parameter: {} given_or_default {}".format(key, given_or_default) - ) + logger.debug("retry parameter: {} given_or_default {}".format(key, given_or_default)) if bound != given_or_default: logger.warning( "Override out of policy retry parameter: " - + "{} given {}, restricted to {}".format( - key, given_or_default, bound - ) + + "{} given {}, restricted to {}".format(key, given_or_default, bound) ) # Fail on retry delay min > max; consider later adding fail on min > duration? @@ -264,9 +262,7 @@ def _extract_error_message_from_headers(headers): if THRIFT_ERROR_MESSAGE_HEADER in headers: err_msg = headers[THRIFT_ERROR_MESSAGE_HEADER] if DATABRICKS_ERROR_OR_REDIRECT_HEADER in headers: - if ( - err_msg - ): # We don't expect both to be set, but log both here just in case + if err_msg: # We don't expect both to be set, but log both here just in case err_msg = "Thriftserver error: {}, Databricks error: {}".format( err_msg, headers[DATABRICKS_ERROR_OR_REDIRECT_HEADER] ) @@ -497,10 +493,7 @@ def _check_initial_namespace(self, catalog, schema, response): if not (catalog or schema): return - if ( - response.serverProtocolVersion - < ttypes.TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V4 - ): + if response.serverProtocolVersion < ttypes.TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V4: raise InvalidServerResponseError( "Setting initial namespace not supported by the DBR version, " "Please use a Databricks SQL endpoint or a cluster with DBR >= 9.0." @@ -515,10 +508,7 @@ def _check_initial_namespace(self, catalog, schema, response): def _check_session_configuration(self, session_configuration): # This client expects timetampsAsString to be false, so we do not allow users to modify that - if ( - session_configuration.get(TIMESTAMP_AS_STRING_CONFIG, "false").lower() - != "false" - ): + if session_configuration.get(TIMESTAMP_AS_STRING_CONFIG, "false").lower() != "false": raise Error( "Invalid session configuration: {} cannot be changed " "while using the Databricks SQL connector, it must be false not {}".format( @@ -530,18 +520,14 @@ def _check_session_configuration(self, session_configuration): def open_session(self, session_configuration, catalog, schema): try: self._transport.open() - session_configuration = { - k: str(v) for (k, v) in (session_configuration or {}).items() - } + session_configuration = {k: str(v) for (k, v) in (session_configuration or {}).items()} self._check_session_configuration(session_configuration) # We want to receive proper Timestamp arrow types. # We set it also in confOverlay in TExecuteStatementReq on a per query basic, # but it doesn't hurt to also set for the whole session. session_configuration[TIMESTAMP_AS_STRING_CONFIG] = "false" if catalog or schema: - initial_namespace = ttypes.TNamespace( - catalogName=catalog, schemaName=schema - ) + initial_namespace = ttypes.TNamespace(catalogName=catalog, schemaName=schema) else: initial_namespace = None @@ -567,9 +553,7 @@ def close_session(self, session_handle) -> None: finally: self._transport.close() - def _check_command_not_in_error_or_closed_state( - self, op_handle, get_operations_resp - ): + def _check_command_not_in_error_or_closed_state(self, op_handle, get_operations_resp): if get_operations_resp.operationState == ttypes.TOperationState.ERROR_STATE: if get_operations_resp.displayMessage: raise ServerOperationError( @@ -656,9 +640,7 @@ def map_type(t_type_entry): else: # Current thriftserver implementation should always return a primitiveEntry, # even for complex types - raise OperationalError( - "Thrift protocol error: t_type_entry not a primitiveEntry" - ) + raise OperationalError("Thrift protocol error: t_type_entry not a primitiveEntry") def convert_col(t_column_desc): return pyarrow.field( @@ -676,9 +658,7 @@ def _col_to_description(col): # Drop _TYPE suffix cleaned_type = (name[:-5] if name.endswith("_TYPE") else name).lower() else: - raise OperationalError( - "Thrift protocol error: t_type_entry not a primitiveEntry" - ) + raise OperationalError("Thrift protocol error: t_type_entry not a primitiveEntry") if type_entry.primitiveEntry.type == ttypes.TTypeId.DECIMAL_TYPE: qualifiers = type_entry.primitiveEntry.typeQualifiers.qualifiers @@ -699,9 +679,7 @@ def _col_to_description(col): @staticmethod def _hive_schema_to_description(t_table_schema): - return [ - ThriftBackend._col_to_description(col) for col in t_table_schema.columns - ] + return [ThriftBackend._col_to_description(col) for col in t_table_schema.columns] def _results_message_to_execute_response(self, resp, operation_state): if resp.directResults and resp.directResults.resultSetMetadata: @@ -729,9 +707,7 @@ def _results_message_to_execute_response(self, resp, operation_state): or (not direct_results.resultSet) or direct_results.resultSet.hasMoreRows ) - description = self._hive_schema_to_description( - t_result_set_metadata_resp.schema - ) + description = self._hive_schema_to_description(t_result_set_metadata_resp.schema) schema_bytes = ( t_result_set_metadata_resp.arrowSchema or self._hive_schema_to_arrow_schema(t_result_set_metadata_resp.schema) @@ -772,8 +748,7 @@ def _wait_until_command_done(self, op_handle, initial_operation_status_resp): op_handle, initial_operation_status_resp ) operation_state = ( - initial_operation_status_resp - and initial_operation_status_resp.operationState + initial_operation_status_resp and initial_operation_status_resp.operationState ) while not operation_state or operation_state in [ ttypes.TOperationState.RUNNING_STATE, @@ -788,21 +763,13 @@ def _wait_until_command_done(self, op_handle, initial_operation_status_resp): def _check_direct_results_for_error(t_spark_direct_results): if t_spark_direct_results: if t_spark_direct_results.operationStatus: - ThriftBackend._check_response_for_error( - t_spark_direct_results.operationStatus - ) + ThriftBackend._check_response_for_error(t_spark_direct_results.operationStatus) if t_spark_direct_results.resultSetMetadata: - ThriftBackend._check_response_for_error( - t_spark_direct_results.resultSetMetadata - ) + ThriftBackend._check_response_for_error(t_spark_direct_results.resultSetMetadata) if t_spark_direct_results.resultSet: - ThriftBackend._check_response_for_error( - t_spark_direct_results.resultSet - ) + ThriftBackend._check_response_for_error(t_spark_direct_results.resultSet) if t_spark_direct_results.closeOperation: - ThriftBackend._check_response_for_error( - t_spark_direct_results.closeOperation - ) + ThriftBackend._check_response_for_error(t_spark_direct_results.closeOperation) def execute_command( self, @@ -828,9 +795,7 @@ def execute_command( sessionHandle=session_handle, statement=operation, runAsync=True, - getDirectResults=ttypes.TSparkGetDirectResults( - maxRows=max_rows, maxBytes=max_bytes - ), + getDirectResults=ttypes.TSparkGetDirectResults(maxRows=max_rows, maxBytes=max_bytes), canReadArrowResult=True, canDecompressLZ4Result=lz4_compression, canDownloadResult=use_cloud_fetch, @@ -848,9 +813,7 @@ def get_catalogs(self, session_handle, max_rows, max_bytes, cursor): req = ttypes.TGetCatalogsReq( sessionHandle=session_handle, - getDirectResults=ttypes.TSparkGetDirectResults( - maxRows=max_rows, maxBytes=max_bytes - ), + getDirectResults=ttypes.TSparkGetDirectResults(maxRows=max_rows, maxBytes=max_bytes), ) resp = self.make_request(self._client.GetCatalogs, req) return self._handle_execute_response(resp, cursor) @@ -868,9 +831,7 @@ def get_schemas( req = ttypes.TGetSchemasReq( sessionHandle=session_handle, - getDirectResults=ttypes.TSparkGetDirectResults( - maxRows=max_rows, maxBytes=max_bytes - ), + getDirectResults=ttypes.TSparkGetDirectResults(maxRows=max_rows, maxBytes=max_bytes), catalogName=catalog_name, schemaName=schema_name, ) @@ -892,9 +853,7 @@ def get_tables( req = ttypes.TGetTablesReq( sessionHandle=session_handle, - getDirectResults=ttypes.TSparkGetDirectResults( - maxRows=max_rows, maxBytes=max_bytes - ), + getDirectResults=ttypes.TSparkGetDirectResults(maxRows=max_rows, maxBytes=max_bytes), catalogName=catalog_name, schemaName=schema_name, tableName=table_name, @@ -918,9 +877,7 @@ def get_columns( req = ttypes.TGetColumnsReq( sessionHandle=session_handle, - getDirectResults=ttypes.TSparkGetDirectResults( - maxRows=max_rows, maxBytes=max_bytes - ), + getDirectResults=ttypes.TSparkGetDirectResults(maxRows=max_rows, maxBytes=max_bytes), catalogName=catalog_name, schemaName=schema_name, tableName=table_name,