Merge remote-tracking branch 'origin/feature/DOP-16999' into feature/…

…DOP-16999
MobileTeleSystems · Aug 20, 2024 · 6b83824 · 6b83824
2 parents 4dc8b56 + 81d86f6
commit 6b83824
Show file tree

Hide file tree

Showing 6 changed files with 42 additions and 65 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -120,9 +120,6 @@
     {"rel": "icon", "href": "icon.svg", "type": "image/svg+xml"},
 ]
 
-# TODO: remove after https://github.com/mgeier/sphinx-last-updated-by-git/pull/77
-git_exclude_patterns = ["docs/_static/logo_wide.svg"]
-
 # The master toctree document.
 master_doc = "index"
 

diff --git a/onetl/connection/db_connection/jdbc_connection/connection.py b/onetl/connection/db_connection/jdbc_connection/connection.py
@@ -90,7 +90,7 @@ def sql(
 
         query = clear_statement(query)
 
-        log.info("|%s| Detected dialect: '%s'", self.__class__.__name__, self.jdbc_dialect)
+        log.info("|%s| Detected dialect: '%s'", self.__class__.__name__, self._get_spark_dialect_name())
         log.info("|%s| Executing SQL query (on executor):", self.__class__.__name__)
         log_lines(log, query)
 
@@ -196,7 +196,7 @@ def get_df_schema(
         columns: list[str] | None = None,
         options: JDBCReadOptions | None = None,
     ) -> StructType:
-        log.info("|%s| Detected dialect: '%s'", self.__class__.__name__, self.jdbc_dialect)
+        log.info("|%s| Detected dialect: '%s'", self.__class__.__name__, self._get_spark_dialect_name())
         log.info("|%s| Fetching schema of table %r ...", self.__class__.__name__, source)
 
         query = self.dialect.get_sql_query(source, columns=columns, limit=0, compact=True)

diff --git a/onetl/connection/db_connection/jdbc_mixin/connection.py b/onetl/connection/db_connection/jdbc_mixin/connection.py
@@ -205,7 +205,7 @@ def fetch(
 
         query = clear_statement(query)
 
-        log.info("|%s| Detected dialect: '%s'", self.__class__.__name__, self.jdbc_dialect)
+        log.info("|%s| Detected dialect: '%s'", self.__class__.__name__, self._get_spark_dialect_name())
         log.info("|%s| Executing SQL query (on driver):", self.__class__.__name__)
         log_lines(log, query)
 
@@ -278,7 +278,7 @@ def execute(
 
         statement = clear_statement(statement)
 
-        log.info("|%s| Detected dialect: '%s'", self.__class__.__name__, self.jdbc_dialect)
+        log.info("|%s| Detected dialect: '%s'", self.__class__.__name__, self._get_spark_dialect_name())
         log.info("|%s| Executing statement (on driver):", self.__class__.__name__)
         log_lines(log, statement)
 
@@ -310,16 +310,6 @@ def execute(
             log_lines(log, str(metrics))
         return df
 
-    @property
-    def jdbc_dialect(self):
-        """
-        Returns the JDBC dialect associated with the connection URL.
-        """
-        jdbc_dialects_package = self.spark._jvm.org.apache.spark.sql.jdbc
-        dialect = jdbc_dialects_package.JdbcDialects.get(self.jdbc_url).toString()
-
-        return dialect.split("$")[0] if "$" in dialect else dialect
-
     @validator("spark")
     def _check_java_class_imported(cls, spark):
         try:
@@ -429,6 +419,17 @@ def _get_jdbc_connection(self, options: JDBCFetchOptions | JDBCExecuteOptions):
         self._last_connection_and_options.data = (new_connection, options)
         return new_connection
 
+    def _get_spark_dialect_name(self) -> str:
+        """
+        Returns the name of the JDBC dialect associated with the connection URL.
+        """
+        dialect = self._get_spark_dialect().toString()
+        return dialect.split("$")[0] if "$" in dialect else dialect
+
+    def _get_spark_dialect(self):
+        jdbc_dialects_package = self.spark._jvm.org.apache.spark.sql.jdbc
+        return jdbc_dialects_package.JdbcDialects.get(self.jdbc_url)
+
     def _close_connections(self):
         with suppress(Exception):
             # connection maybe not opened yet
@@ -571,9 +572,7 @@ def _resultset_to_dataframe(self, result_set) -> DataFrame:
 
         from pyspark.sql import DataFrame  # noqa: WPS442
 
-        jdbc_dialects_package = self.spark._jvm.org.apache.spark.sql.jdbc  # type: ignore
-        jdbc_dialect = jdbc_dialects_package.JdbcDialects.get(self.jdbc_url)
-
+        jdbc_dialect = self._get_spark_dialect()
         jdbc_utils_package = self.spark._jvm.org.apache.spark.sql.execution.datasources.jdbc  # type: ignore
         jdbc_utils = jdbc_utils_package.JdbcUtils
 

diff --git a/requirements/docs.txt b/requirements/docs.txt
@@ -9,7 +9,8 @@ sphinx<8
 sphinx-copybutton
 sphinx-design
 sphinx-favicon
-sphinx-last-updated-by-git
+# https://github.com/mgeier/sphinx-last-updated-by-git/pull/77
+sphinx-last-updated-by-git>=0.3.8
 # TODO: uncomment after https://github.com/zqmillet/sphinx-plantuml/pull/4
 # sphinx-plantuml
 sphinx-tabs

diff --git a/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py b/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py
@@ -62,7 +62,7 @@ def test_clickhouse_connection_check_extra_is_handled_by_driver(spark, processin
 
 
 @pytest.mark.parametrize("suffix", ["", ";"])
-def test_clickhouse_connection_sql(spark, processing, load_table_data, suffix):
+def test_clickhouse_connection_sql(spark, processing, load_table_data, suffix, caplog):
     clickhouse = Clickhouse(
         host=processing.host,
         port=processing.port,
@@ -80,6 +80,9 @@ def test_clickhouse_connection_sql(spark, processing, load_table_data, suffix):
         order_by="id_int",
     )
 
+    with caplog.at_level(logging.INFO):
+        assert "Detected dialect: 'org.apache.spark.sql.jdbc.NoopDialect'" in caplog.text
+
     processing.assert_equal_df(df=df, other_frame=table_df, order_by="id_int")
     df = clickhouse.sql(f"SELECT * FROM {table} WHERE id_int < 50{suffix}")
     filtered_df = table_df[table_df.id_int < 50]
@@ -91,7 +94,7 @@ def test_clickhouse_connection_sql(spark, processing, load_table_data, suffix):
 
 
 @pytest.mark.parametrize("suffix", ["", ";"])
-def test_clickhouse_connection_fetch(spark, processing, load_table_data, suffix):
+def test_clickhouse_connection_fetch(spark, processing, load_table_data, suffix, caplog):
     clickhouse = Clickhouse(
         host=processing.host,
         port=processing.port,
@@ -105,6 +108,9 @@ def test_clickhouse_connection_fetch(spark, processing, load_table_data, suffix)
     table = load_table_data.full_name
     df = clickhouse.fetch(f"SELECT * FROM {table}{suffix}")
 
+    with caplog.at_level(logging.INFO):
+        assert "Detected dialect: 'org.apache.spark.sql.jdbc.NoopDialect'" in caplog.text
+
     table_df = processing.get_expected_dataframe(
         schema=load_table_data.schema,
         table=load_table_data.table,
@@ -273,6 +279,7 @@ def test_clickhouse_connection_execute_function(
     processing,
     load_table_data,
     suffix,
+    caplog,
 ):
     clickhouse = Clickhouse(
         host=processing.host,
@@ -306,6 +313,9 @@ def func_finalizer():
 
     processing.assert_equal_df(df=df, other_frame=other_df)
 
+    with caplog.at_level(logging.INFO):
+        assert "Detected dialect: 'org.apache.spark.sql.jdbc.NoopDialect'" in caplog.text
+
     # not enough arguments
     with pytest.raises(Exception):
         clickhouse.fetch(f"SELECT {func}(id_int) FROM {table}{suffix}")
@@ -338,21 +348,3 @@ def test_clickhouse_connection_no_jdbc_dialect(spark, processing, load_table_dat
 
     with caplog.at_level(logging.INFO):
         assert "Detected dialect: 'org.apache.spark.sql.jdbc.NoopDialect'" in caplog.text
-
-    # clear the caplog buffer
-    caplog.clear()
-    clickhouse.sql("SELECT version()")
-    with caplog.at_level(logging.INFO):
-        assert "Detected dialect: 'org.apache.spark.sql.jdbc.NoopDialect'" in caplog.text
-
-    # clear the caplog buffer
-    caplog.clear()
-    clickhouse.fetch("SELECT version()")
-    with caplog.at_level(logging.INFO):
-        assert "Detected dialect: 'org.apache.spark.sql.jdbc.NoopDialect'" in caplog.text
-
-    # clear the caplog buffer
-    caplog.clear()
-    clickhouse.execute(f"TRUNCATE TABLE {table}")
-    with caplog.at_level(logging.INFO):
-        assert "Detected dialect: 'org.apache.spark.sql.jdbc.NoopDialect'" in caplog.text
diff --git a/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py b/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py
@@ -48,7 +48,7 @@ def test_postgres_connection_check_fail(spark):
 
 
 @pytest.mark.parametrize("suffix", ["", ";"])
-def test_postgres_connection_sql(spark, processing, load_table_data, suffix):
+def test_postgres_connection_sql(spark, processing, load_table_data, suffix, caplog):
     postgres = Postgres(
         host=processing.host,
         port=processing.port,
@@ -67,6 +67,9 @@ def test_postgres_connection_sql(spark, processing, load_table_data, suffix):
         order_by="id_int",
     )
 
+    with caplog.at_level(logging.INFO):
+        assert "Detected dialect: 'org.apache.spark.sql.jdbc.PostgresDialect'" in caplog.text
+
     processing.assert_equal_df(df=df, other_frame=table_df)
 
     df = postgres.sql(f"SELECT * FROM {table} WHERE id_int < 50{suffix}")
@@ -79,7 +82,7 @@ def test_postgres_connection_sql(spark, processing, load_table_data, suffix):
 
 
 @pytest.mark.parametrize("suffix", ["", ";"])
-def test_postgres_connection_fetch(spark, processing, load_table_data, suffix):
+def test_postgres_connection_fetch(spark, processing, load_table_data, suffix, caplog):
     postgres = Postgres(
         host=processing.host,
         port=processing.port,
@@ -99,6 +102,9 @@ def test_postgres_connection_fetch(spark, processing, load_table_data, suffix):
     )
     processing.assert_equal_df(df=df, other_frame=table_df)
 
+    with caplog.at_level(logging.INFO):
+        assert "Detected dialect: 'org.apache.spark.sql.jdbc.PostgresDialect'" in caplog.text
+
     df = postgres.fetch(f"SELECT * FROM {table} WHERE id_int < 50{suffix}")
     filtered_df = table_df[table_df.id_int < 50]
     processing.assert_equal_df(df=df, other_frame=filtered_df)
@@ -1023,7 +1029,7 @@ def test_postgres_connection_fetch_with_legacy_jdbc_options(spark, processing):
     assert df is not None
 
 
-def test_postgres_connection_execute_with_legacy_jdbc_options(spark, processing):
+def test_postgres_connection_execute_with_legacy_jdbc_options(spark, processing, caplog):
     postgres = Postgres(
         host=processing.host,
         port=processing.port,
@@ -1036,6 +1042,9 @@ def test_postgres_connection_execute_with_legacy_jdbc_options(spark, processing)
     options = Postgres.JDBCOptions(query_timeout=30)
     postgres.execute("DROP TABLE IF EXISTS temp_table;", options=options)
 
+    with caplog.at_level(logging.INFO):
+        assert "Detected dialect: 'org.apache.spark.sql.jdbc.PostgresDialect'" in caplog.text
+
 
 def test_postgres_connection_jdbc_dialect_usage(spark, processing, load_table_data, caplog):
     postgres = Postgres(
@@ -1052,24 +1061,3 @@ def test_postgres_connection_jdbc_dialect_usage(spark, processing, load_table_da
 
     with caplog.at_level(logging.INFO):
         assert "Detected dialect: 'org.apache.spark.sql.jdbc.PostgresDialect'" in caplog.text
-
-    # clear the caplog buffer
-    caplog.clear()
-    postgres.sql("SELECT version()")
-    with caplog.at_level(logging.INFO):
-        assert "Detected dialect: 'org.apache.spark.sql.jdbc.PostgresDialect'" in caplog.text
-
-    caplog.clear()
-    postgres.fetch("SELECT version()")
-    with caplog.at_level(logging.INFO):
-        assert "Detected dialect: 'org.apache.spark.sql.jdbc.PostgresDialect'" in caplog.text
-
-    caplog.clear()
-    postgres.fetch("SELECT version()")
-    with caplog.at_level(logging.INFO):
-        assert "Detected dialect: 'org.apache.spark.sql.jdbc.PostgresDialect'" in caplog.text
-
-    caplog.clear()
-    postgres.execute(f"TRUNCATE TABLE {table}")
-    with caplog.at_level(logging.INFO):
-        assert "Detected dialect: 'org.apache.spark.sql.jdbc.PostgresDialect'" in caplog.text