Resoloved conflicts

Signed-off-by: Elena Khaustova <[email protected]>
kedro-org · Jan 13, 2025 · 08c3a01 · 08c3a01
2 parents ded3561 + 630f4ea
commit 08c3a01
Showing 4 changed files with 36 additions and 14 deletions.
diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
@@ -1,11 +1,15 @@
 # Upcoming Release
 ## Major features and improvements
-* Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
+
+- Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
+
 ## Bug fixes and other changes
 
+- Fix polars.CSVDataset `save` method on Windows using `utf-8` as default encoding.
+
 ## Breaking Changes
 
-- Removed `tracking.MetricsDataset` and `tracking.JSONDataset`
+- Removed `tracking.MetricsDataset` and `tracking.JSONDataset`.
 
 ## Community contributions
 

diff --git a/kedro-datasets/docs/source/conf.py b/kedro-datasets/docs/source/conf.py
@@ -14,6 +14,8 @@
 from __future__ import annotations
 
 import importlib
+import inspect
+import os
 import re
 import sys
 from inspect import getmembers, isclass, isfunction
@@ -22,6 +24,8 @@
 from click import secho, style
 from kedro import __version__ as release
 
+import kedro_datasets
+
 # -- Project information -----------------------------------------------------
 
 project = "kedro-datasets"
@@ -47,7 +51,7 @@
     "sphinx_autodoc_typehints",
     "sphinx.ext.doctest",
     "sphinx.ext.ifconfig",
-    "sphinx.ext.viewcode",
+    "sphinx.ext.linkcode",
     "sphinxcontrib.jquery",
     "sphinx_copybutton",
     "myst_parser",
@@ -452,3 +456,25 @@ def setup(app):
 user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0"
 
 myst_heading_anchors = 5
+
+def linkcode_resolve(domain, info):
+    """Resolve a GitHub URL corresponding to a Python object."""
+    if domain != 'py':
+        return None
+
+    try:
+        mod = sys.modules[info['module']]
+        obj = mod
+        for attr in info['fullname'].split('.'):
+            obj = getattr(obj, attr)
+        obj = inspect.unwrap(obj)
+
+        filename = inspect.getsourcefile(obj)
+        source, lineno = inspect.getsourcelines(obj)
+        relpath = os.path.relpath(filename, start=os.path.dirname(
+          kedro_datasets.__file__))
+
+        return f'https://github.com/kedro-org/kedro-plugins/blob/main/kedro-datasets/kedro_datasets/{relpath}#L{lineno}#L{lineno + len(source) - 1}'
+
+    except (KeyError, ImportError, AttributeError, TypeError, OSError, ValueError):
+        return None
diff --git a/kedro-datasets/kedro_datasets/polars/csv_dataset.py b/kedro-datasets/kedro_datasets/polars/csv_dataset.py
@@ -72,7 +72,9 @@ class CSVDataset(AbstractVersionedDataset[pl.DataFrame, pl.DataFrame]):
 
     DEFAULT_LOAD_ARGS: dict[str, Any] = {"rechunk": True}
     DEFAULT_SAVE_ARGS: dict[str, Any] = {}
-    DEFAULT_FS_ARGS: dict[str, Any] = {"open_args_save": {"mode": "w"}}
+    DEFAULT_FS_ARGS: dict[str, Any] = {
+        "open_args_save": {"mode": "w", "encoding": "utf-8"}
+    }
 
     def __init__(  # noqa: PLR0913
         self,

diff --git a/kedro-datasets/tests/polars/test_csv_dataset.py b/kedro-datasets/tests/polars/test_csv_dataset.py
@@ -88,14 +88,12 @@ def mocked_csv_in_s3(mocked_s3_bucket, mocked_dataframe: pl.DataFrame):
 
 
 class TestCSVDataset:
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_save_and_load(self, csv_dataset, dummy_dataframe):
         """Test saving and reloading the dataset."""
         csv_dataset.save(dummy_dataframe)
         reloaded = csv_dataset.load()
         assert_frame_equal(dummy_dataframe, reloaded)
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_exists(self, csv_dataset, dummy_dataframe):
         """Test `exists` method invocation for both existing and
         nonexistent dataset."""
@@ -204,15 +202,13 @@ def test_version_str_repr(self, load_version, save_version):
         assert "load_args={'rechunk': True}" in str(ds)
         assert "load_args={'rechunk': True}" in str(ds_versioned)
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_save_and_load(self, versioned_csv_dataset, dummy_dataframe):
         """Test that saved and reloaded data matches the original one for
         the versioned dataset."""
         versioned_csv_dataset.save(dummy_dataframe)
         reloaded_df = versioned_csv_dataset.load()
         assert_frame_equal(dummy_dataframe, reloaded_df)
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_multiple_loads(self, versioned_csv_dataset, dummy_dataframe, filepath_csv):
         """Test that if a new version is created mid-run, by an
         external system, it won't be loaded in the current run."""
@@ -236,7 +232,6 @@ def test_multiple_loads(self, versioned_csv_dataset, dummy_dataframe, filepath_c
             ds_new.resolve_load_version() == v_new
         )  # new version is discoverable by a new instance
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_multiple_saves(self, dummy_dataframe, filepath_csv):
         """Test multiple cycles of save followed by load for the same dataset"""
         ds_versioned = CSVDataset(filepath=filepath_csv, version=Version(None, None))
@@ -259,7 +254,6 @@ def test_multiple_saves(self, dummy_dataframe, filepath_csv):
         ds_new = CSVDataset(filepath=filepath_csv, version=Version(None, None))
         assert ds_new.resolve_load_version() == second_load_version
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_release_instance_cache(self, dummy_dataframe, filepath_csv):
         """Test that cache invalidation does not affect other instances"""
         ds_a = CSVDataset(filepath=filepath_csv, version=Version(None, None))
@@ -288,14 +282,12 @@ def test_no_versions(self, versioned_csv_dataset):
         with pytest.raises(DatasetError, match=pattern):
             versioned_csv_dataset.load()
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_exists(self, versioned_csv_dataset, dummy_dataframe):
         """Test `exists` method invocation for versioned dataset."""
         assert not versioned_csv_dataset.exists()
         versioned_csv_dataset.save(dummy_dataframe)
         assert versioned_csv_dataset.exists()
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe):
         """Check the error when attempting to override the dataset if the
         corresponding CSV file for a given save version already exists."""
@@ -307,7 +299,6 @@ def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe):
         with pytest.raises(DatasetError, match=pattern):
             versioned_csv_dataset.save(dummy_dataframe)
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     @pytest.mark.parametrize(
         "load_version", ["2019-01-01T23.59.59.999Z"], indirect=True
     )
@@ -334,7 +325,6 @@ def test_http_filesystem_no_versioning(self):
                 filepath="https://example.com/file.csv", version=Version(None, None)
             )
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_versioning_existing_dataset(
         self, csv_dataset, versioned_csv_dataset, dummy_dataframe
     ):