diff --git a/tests/tests_integration/test_file_format_integration/test_avro_integration.py b/tests/tests_integration/test_file_format_integration/test_avro_integration.py index cb687776c..eaffd6499 100644 --- a/tests/tests_integration/test_file_format_integration/test_avro_integration.py +++ b/tests/tests_integration/test_file_format_integration/test_avro_integration.py @@ -13,8 +13,7 @@ try: from tests.util.assert_df import assert_equal_df except ImportError: - # pandas and spark can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) pytestmark = [pytest.mark.local_fs, pytest.mark.file_df_connection, pytest.mark.connection] @@ -72,7 +71,7 @@ def test_avro_reader( assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") @pytest.mark.parametrize( @@ -116,4 +115,4 @@ def test_avro_writer( assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") diff --git a/tests/tests_integration/test_file_format_integration/test_csv_integration.py b/tests/tests_integration/test_file_format_integration/test_csv_integration.py index 289e88273..526536f1c 100644 --- a/tests/tests_integration/test_file_format_integration/test_csv_integration.py +++ b/tests/tests_integration/test_file_format_integration/test_csv_integration.py @@ -16,8 +16,7 @@ from tests.util.assert_df import assert_equal_df from tests.util.spark_df import reset_column_names except ImportError: - # pandas and spark can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas or pyspark", allow_module_level=True) pytestmark = [pytest.mark.local_fs, pytest.mark.file_df_connection, pytest.mark.connection] @@ -56,7 +55,7 @@ def test_csv_reader_with_infer_schema( assert read_df.schema != df.schema assert read_df.schema == expected_df.schema - assert_equal_df(read_df, expected_df) + assert_equal_df(read_df, expected_df, order_by="id") @pytest.mark.parametrize( @@ -89,7 +88,7 @@ def test_csv_reader_with_options( assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") @pytest.mark.parametrize( @@ -131,4 +130,4 @@ def test_csv_writer_with_options( assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") diff --git a/tests/tests_integration/test_file_format_integration/test_excel_integration.py b/tests/tests_integration/test_file_format_integration/test_excel_integration.py index de8cc9cf9..1cb24e96a 100644 --- a/tests/tests_integration/test_file_format_integration/test_excel_integration.py +++ b/tests/tests_integration/test_file_format_integration/test_excel_integration.py @@ -16,8 +16,7 @@ from tests.util.assert_df import assert_equal_df from tests.util.spark_df import reset_column_names except ImportError: - # pandas and spark can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas or pyspark", allow_module_level=True) pytestmark = [pytest.mark.local_fs, pytest.mark.file_df_connection, pytest.mark.connection] @@ -56,7 +55,7 @@ def test_excel_reader_with_infer_schema( assert read_df.schema != df.schema assert read_df.schema == expected_df.schema - assert_equal_df(read_df, expected_df) + assert_equal_df(read_df, expected_df, order_by="id") @pytest.mark.parametrize("format", ["xlsx", "xls"]) @@ -96,7 +95,7 @@ def test_excel_reader_with_options( assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") @pytest.mark.parametrize( @@ -139,4 +138,4 @@ def test_excel_writer( assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") diff --git a/tests/tests_integration/test_file_format_integration/test_json_integration.py b/tests/tests_integration/test_file_format_integration/test_json_integration.py index 9f195233a..f1fbd1380 100644 --- a/tests/tests_integration/test_file_format_integration/test_json_integration.py +++ b/tests/tests_integration/test_file_format_integration/test_json_integration.py @@ -12,8 +12,7 @@ try: from tests.util.assert_df import assert_equal_df except ImportError: - # pandas and spark can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) pytestmark = [pytest.mark.local_fs, pytest.mark.file_df_connection, pytest.mark.connection] @@ -47,7 +46,7 @@ def test_json_reader( assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") def test_json_writer_is_not_supported( diff --git a/tests/tests_integration/test_file_format_integration/test_jsonline_integration.py b/tests/tests_integration/test_file_format_integration/test_jsonline_integration.py index ce955b261..f4678e17d 100644 --- a/tests/tests_integration/test_file_format_integration/test_jsonline_integration.py +++ b/tests/tests_integration/test_file_format_integration/test_jsonline_integration.py @@ -12,8 +12,7 @@ try: from tests.util.assert_df import assert_equal_df except ImportError: - # pandas and spark can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) pytestmark = [pytest.mark.local_fs, pytest.mark.file_df_connection, pytest.mark.connection] @@ -47,7 +46,7 @@ def test_jsonline_reader( assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") @pytest.mark.parametrize( @@ -85,4 +84,4 @@ def test_jsonline_writer( assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") diff --git a/tests/tests_integration/test_file_format_integration/test_orc_integration.py b/tests/tests_integration/test_file_format_integration/test_orc_integration.py index 9c11e43fa..a848f0f25 100644 --- a/tests/tests_integration/test_file_format_integration/test_orc_integration.py +++ b/tests/tests_integration/test_file_format_integration/test_orc_integration.py @@ -12,8 +12,7 @@ try: from tests.util.assert_df import assert_equal_df except ImportError: - # pandas and spark can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) pytestmark = [pytest.mark.local_fs, pytest.mark.file_df_connection, pytest.mark.connection] @@ -47,7 +46,7 @@ def test_orc_reader( assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") @pytest.mark.parametrize( @@ -85,4 +84,4 @@ def test_orc_writer( assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") diff --git a/tests/tests_integration/test_file_format_integration/test_parquet_integration.py b/tests/tests_integration/test_file_format_integration/test_parquet_integration.py index 79065e889..41d492c43 100644 --- a/tests/tests_integration/test_file_format_integration/test_parquet_integration.py +++ b/tests/tests_integration/test_file_format_integration/test_parquet_integration.py @@ -12,8 +12,7 @@ try: from tests.util.assert_df import assert_equal_df except ImportError: - # pandas and spark can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) pytestmark = [pytest.mark.local_fs, pytest.mark.file_df_connection, pytest.mark.connection] @@ -47,7 +46,7 @@ def test_parquet_reader( assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") @pytest.mark.parametrize( @@ -85,4 +84,4 @@ def test_parquet_writer( assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") diff --git a/tests/tests_integration/test_file_format_integration/test_xml_integration.py b/tests/tests_integration/test_file_format_integration/test_xml_integration.py index d03a6f61d..2be9d33a4 100644 --- a/tests/tests_integration/test_file_format_integration/test_xml_integration.py +++ b/tests/tests_integration/test_file_format_integration/test_xml_integration.py @@ -13,8 +13,7 @@ try: from tests.util.assert_df import assert_equal_df except ImportError: - # pandas and spark can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) pytestmark = [pytest.mark.local_fs, pytest.mark.file_df_connection, pytest.mark.connection] @@ -60,7 +59,7 @@ def test_xml_reader( read_df = reader.run() assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") def test_xml_reader_with_infer_schema( @@ -90,7 +89,7 @@ def test_xml_reader_with_infer_schema( assert set(read_df.columns) == set( expected_xml_attributes_df.columns, ) # "DataFrames have different column types: StructField('id', IntegerType(), True), StructField('id', LongType(), True), etc." - assert_equal_df(read_df, expected_xml_attributes_df) + assert_equal_df(read_df, expected_xml_attributes_df, order_by="id") @pytest.mark.parametrize( @@ -133,7 +132,7 @@ def test_xml_writer( assert read_df.count() assert read_df.schema == df.schema - assert_equal_df(read_df, df) + assert_equal_df(read_df, df, order_by="id") @pytest.mark.parametrize( @@ -166,4 +165,4 @@ def test_xml_reader_with_attributes( read_df = reader.run() assert read_df.count() assert read_df.schema == expected_xml_attributes_df.schema - assert_equal_df(read_df, expected_xml_attributes_df) + assert_equal_df(read_df, expected_xml_attributes_df, order_by="id") diff --git a/tests/tests_integration/tests_core_integration/test_file_df_reader_integration/test_common_file_df_reader_integration.py b/tests/tests_integration/tests_core_integration/test_file_df_reader_integration/test_common_file_df_reader_integration.py index ad58c0034..31e5f32b6 100644 --- a/tests/tests_integration/tests_core_integration/test_file_df_reader_integration/test_common_file_df_reader_integration.py +++ b/tests/tests_integration/tests_core_integration/test_file_df_reader_integration/test_common_file_df_reader_integration.py @@ -28,8 +28,7 @@ from tests.util.assert_df import assert_equal_df except ImportError: - # pandas and spark can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas or pyspark", allow_module_level=True) def test_file_df_reader_run( diff --git a/tests/tests_integration/tests_core_integration/test_file_df_writer_integration/test_common_file_df_writer_integration.py b/tests/tests_integration/tests_core_integration/test_file_df_writer_integration/test_common_file_df_writer_integration.py index 448710183..91f74bc8f 100644 --- a/tests/tests_integration/tests_core_integration/test_file_df_writer_integration/test_common_file_df_writer_integration.py +++ b/tests/tests_integration/tests_core_integration/test_file_df_writer_integration/test_common_file_df_writer_integration.py @@ -15,8 +15,7 @@ try: from tests.util.assert_df import assert_equal_df except ImportError: - # pandas and spark can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) @pytest.mark.parametrize( diff --git a/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py b/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py index 5a7f1bf4b..73d8abfb2 100644 --- a/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py +++ b/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py @@ -6,8 +6,7 @@ try: import pandas except ImportError: - # pandas can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) from onetl.connection import Clickhouse diff --git a/tests/tests_integration/tests_db_connection_integration/test_greenplum_integration.py b/tests/tests_integration/tests_db_connection_integration/test_greenplum_integration.py index 91a52bb9d..a424282d3 100644 --- a/tests/tests_integration/tests_db_connection_integration/test_greenplum_integration.py +++ b/tests/tests_integration/tests_db_connection_integration/test_greenplum_integration.py @@ -5,8 +5,7 @@ try: import pandas except ImportError: - # pandas can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) from onetl.connection import Greenplum diff --git a/tests/tests_integration/tests_db_connection_integration/test_hive_integration.py b/tests/tests_integration/tests_db_connection_integration/test_hive_integration.py index 8a90578cb..69d7bae6b 100644 --- a/tests/tests_integration/tests_db_connection_integration/test_hive_integration.py +++ b/tests/tests_integration/tests_db_connection_integration/test_hive_integration.py @@ -7,8 +7,7 @@ try: import pandas except ImportError: - # pandas can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) from onetl.connection import Hive diff --git a/tests/tests_integration/tests_db_connection_integration/test_mssql_integration.py b/tests/tests_integration/tests_db_connection_integration/test_mssql_integration.py index 96e23183b..9a875671a 100644 --- a/tests/tests_integration/tests_db_connection_integration/test_mssql_integration.py +++ b/tests/tests_integration/tests_db_connection_integration/test_mssql_integration.py @@ -5,8 +5,7 @@ try: import pandas except ImportError: - # pandas can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) from onetl.connection import MSSQL diff --git a/tests/tests_integration/tests_db_connection_integration/test_mysql_integration.py b/tests/tests_integration/tests_db_connection_integration/test_mysql_integration.py index dad717b1c..72a6b3b8f 100644 --- a/tests/tests_integration/tests_db_connection_integration/test_mysql_integration.py +++ b/tests/tests_integration/tests_db_connection_integration/test_mysql_integration.py @@ -5,8 +5,7 @@ try: import pandas except ImportError: - # pandas can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) from onetl.connection import MySQL diff --git a/tests/tests_integration/tests_db_connection_integration/test_oracle_integration.py b/tests/tests_integration/tests_db_connection_integration/test_oracle_integration.py index ce4031c75..6bd96b259 100644 --- a/tests/tests_integration/tests_db_connection_integration/test_oracle_integration.py +++ b/tests/tests_integration/tests_db_connection_integration/test_oracle_integration.py @@ -6,8 +6,7 @@ try: import pandas except ImportError: - # pandas can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) from onetl.connection import Oracle diff --git a/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py b/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py index 9f2d2253b..1fb74095f 100644 --- a/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py +++ b/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py @@ -5,8 +5,7 @@ try: import pandas except ImportError: - # pandas can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) from onetl.connection import Postgres diff --git a/tests/tests_integration/tests_strategy_integration/test_strategy_incremental_batch.py b/tests/tests_integration/tests_strategy_integration/test_strategy_incremental_batch.py index febf4d373..5a7c87a0f 100644 --- a/tests/tests_integration/tests_strategy_integration/test_strategy_incremental_batch.py +++ b/tests/tests_integration/tests_strategy_integration/test_strategy_incremental_batch.py @@ -12,8 +12,7 @@ from tests.util.to_pandas import to_pandas except ImportError: - # pandas can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) from etl_entities.hwm_store import HWMStoreStackManager diff --git a/tests/tests_integration/tests_strategy_integration/test_strategy_snapshot.py b/tests/tests_integration/tests_strategy_integration/test_strategy_snapshot.py index 80360e2c3..b61461b3a 100644 --- a/tests/tests_integration/tests_strategy_integration/test_strategy_snapshot.py +++ b/tests/tests_integration/tests_strategy_integration/test_strategy_snapshot.py @@ -12,8 +12,7 @@ from tests.util.to_pandas import to_pandas except ImportError: - # pandas can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) from onetl.connection import Postgres from onetl.db import DBReader diff --git a/tests/tests_integration/tests_strategy_integration/tests_incremental_batch_strategy_integration/test_strategy_incremental_batch_mongodb.py b/tests/tests_integration/tests_strategy_integration/tests_incremental_batch_strategy_integration/test_strategy_incremental_batch_mongodb.py index 0aafb2835..2809cdb1b 100644 --- a/tests/tests_integration/tests_strategy_integration/tests_incremental_batch_strategy_integration/test_strategy_incremental_batch_mongodb.py +++ b/tests/tests_integration/tests_strategy_integration/tests_incremental_batch_strategy_integration/test_strategy_incremental_batch_mongodb.py @@ -123,7 +123,7 @@ def test_mongodb_strategy_incremental_batch( # same behavior as SnapshotBatchStrategy, no rows skipped if "int" in hwm_column: # only changed data has been read - processing.assert_equal_df(df=first_df, other_frame=first_span, order_by="id_int") + processing.assert_equal_df(df=first_df, other_frame=first_span, order_by="_id") else: # date and datetime values have a random part # so instead of checking the whole dataframe a partial comparison should be performed @@ -172,7 +172,7 @@ def test_mongodb_strategy_incremental_batch( if "int" in hwm_column: # only changed data has been read - processing.assert_equal_df(df=second_df, other_frame=second_span, order_by="id_int") + processing.assert_equal_df(df=second_df, other_frame=second_span, order_by="_id") else: # date and datetime values have a random part # so instead of checking the whole dataframe a partial comparison should be performed @@ -224,7 +224,7 @@ def test_mongodb_strategy_incremental_batch_where(spark, processing, prepare_sch else: first_df = first_df.union(next_df) - processing.assert_equal_df(df=first_df, other_frame=first_span[:51], order_by="id_int") + processing.assert_equal_df(df=first_df, other_frame=first_span[:51], order_by="_id") # insert second span processing.insert_data( @@ -243,4 +243,4 @@ def test_mongodb_strategy_incremental_batch_where(spark, processing, prepare_sch else: second_df = second_df.union(next_df) - processing.assert_equal_df(df=second_df, other_frame=second_span[:19], order_by="id_int") + processing.assert_equal_df(df=second_df, other_frame=second_span[:19], order_by="_id") diff --git a/tests/tests_integration/tests_strategy_integration/tests_incremental_strategy_integration/test_strategy_increment_common.py b/tests/tests_integration/tests_strategy_integration/tests_incremental_strategy_integration/test_strategy_increment_common.py index 0701bd21e..3497266fa 100644 --- a/tests/tests_integration/tests_strategy_integration/tests_incremental_strategy_integration/test_strategy_increment_common.py +++ b/tests/tests_integration/tests_strategy_integration/tests_incremental_strategy_integration/test_strategy_increment_common.py @@ -11,8 +11,7 @@ try: import pandas except ImportError: - # pandas can be missing if someone runs tests for file connections only - pass + pytest.skip("Missing pandas", allow_module_level=True) from onetl.connection import Postgres from onetl.db import DBReader