fix(datasets): Investigate ManagedTableDataset test failures (#671)

* revert ignore databricks test and coverage * Update managed_table_dataset.py * attempt a fix * revert Signed-off-by: Sajid Alam <[email protected]> * Update managed_table_dataset.py Signed-off-by: Sajid Alam <[email protected]> * Update managed_table_dataset.py Signed-off-by: Sajid Alam <[email protected]> * Update managed_table_dataset.py Signed-off-by: Sajid Alam <[email protected]> * fix tests Signed-off-by: Sajid Alam <[email protected]> * revert Signed-off-by: Sajid Alam <[email protected]> * revert test Signed-off-by: Sajid Alam <[email protected]> * add compatability Signed-off-by: Sajid Alam <[email protected]> * Update test_managed_table_dataset.py Signed-off-by: Sajid Alam <[email protected]> * Update managed_table_dataset.py Signed-off-by: Sajid Alam <[email protected]> * Update test_managed_table_dataset.py Signed-off-by: Sajid Alam <[email protected]> * Update test_managed_table_dataset.py Signed-off-by: Sajid Alam <[email protected]> * debugging Signed-off-by: Sajid Alam <[email protected]> * update tests and save method Signed-off-by: Sajid Alam <[email protected]> * lint Signed-off-by: Sajid Alam <[email protected]> * fix mypy Signed-off-by: Sajid Alam <[email protected]> * test pyspark 3.4 and above Signed-off-by: Sajid Alam <[email protected]> * Update pyproject.toml Signed-off-by: Sajid Alam <[email protected]> * try DF.items in stead Signed-off-by: Sajid Alam <[email protected]> * Update managed_table_dataset.py Signed-off-by: Sajid Alam <[email protected]> * Update managed_table_dataset.py Signed-off-by: Sajid Alam <[email protected]> --------- Signed-off-by: Sajid Alam <[email protected]> Signed-off-by: Sajid Alam <[email protected]>
kedro-org · Jun 6, 2024 · 297a648 · 297a648
1 parent ca46a05
commit 297a648
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 2 deletions.
diff --git a/Makefile b/Makefile
@@ -27,7 +27,7 @@ test:
 
 # Run test_tensorflow_model_dataset separately, because these tests are flaky when run as part of the full test-suite
 dataset-tests: dataset-doctests
-	cd kedro-datasets && pytest tests --cov-config pyproject.toml --numprocesses 4 --dist loadfile --ignore tests/databricks --ignore tests/tensorflow
+	cd kedro-datasets && pytest tests --cov-config pyproject.toml --numprocesses 4 --dist loadfile --ignore tests/tensorflow
 	cd kedro-datasets && pytest tests/tensorflow/test_tensorflow_model_dataset.py  --no-cov
 
 extra_pytest_args-no-spark=--ignore kedro_datasets/databricks --ignore kedro_datasets/spark

diff --git a/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py b/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py
@@ -22,6 +22,7 @@
 from kedro_datasets.spark.spark_dataset import _get_spark
 
 logger = logging.getLogger(__name__)
+pd.DataFrame.iteritems = pd.DataFrame.items
 
 
 @dataclass(frozen=True)

diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml
@@ -291,7 +291,7 @@ version = {attr = "kedro_datasets.__version__"}
 fail_under = 100
 show_missing = true
 # temporarily ignore kedro_datasets/__init__.py in coverage report
-omit = ["tests/*", "kedro_datasets/holoviews/*", "kedro_datasets/netcdf/*", "kedro_datasets/snowflake/*", "kedro_datasets/tensorflow/*", "kedro_datasets/__init__.py", "kedro_datasets/conftest.py", "kedro_datasets/databricks/*"]
+omit = ["tests/*", "kedro_datasets/holoviews/*", "kedro_datasets/netcdf/*", "kedro_datasets/snowflake/*", "kedro_datasets/tensorflow/*", "kedro_datasets/__init__.py", "kedro_datasets/conftest.py"]
 exclude_also = ["raise NotImplementedError", "if TYPE_CHECKING:"]
 
 [tool.pytest.ini_options]