Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Resoloved conflicts
Browse files Browse the repository at this point in the history
Signed-off-by: Elena Khaustova <[email protected]>
ElenaKhaustova committed Jan 13, 2025
2 parents ded3561 + 630f4ea commit 08c3a01
Showing 4 changed files with 36 additions and 14 deletions.
8 changes: 6 additions & 2 deletions kedro-datasets/RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
# Upcoming Release
## Major features and improvements
* Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.

- Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.

## Bug fixes and other changes

- Fix polars.CSVDataset `save` method on Windows using `utf-8` as default encoding.

## Breaking Changes

- Removed `tracking.MetricsDataset` and `tracking.JSONDataset`
- Removed `tracking.MetricsDataset` and `tracking.JSONDataset`.

## Community contributions

28 changes: 27 additions & 1 deletion kedro-datasets/docs/source/conf.py
Original file line number Diff line number Diff line change
@@ -14,6 +14,8 @@
from __future__ import annotations

import importlib
import inspect
import os
import re
import sys
from inspect import getmembers, isclass, isfunction
@@ -22,6 +24,8 @@
from click import secho, style
from kedro import __version__ as release

import kedro_datasets

# -- Project information -----------------------------------------------------

project = "kedro-datasets"
@@ -47,7 +51,7 @@
"sphinx_autodoc_typehints",
"sphinx.ext.doctest",
"sphinx.ext.ifconfig",
"sphinx.ext.viewcode",
"sphinx.ext.linkcode",
"sphinxcontrib.jquery",
"sphinx_copybutton",
"myst_parser",
@@ -452,3 +456,25 @@ def setup(app):
user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0"

myst_heading_anchors = 5

def linkcode_resolve(domain, info):
"""Resolve a GitHub URL corresponding to a Python object."""
if domain != 'py':
return None

try:
mod = sys.modules[info['module']]
obj = mod
for attr in info['fullname'].split('.'):
obj = getattr(obj, attr)
obj = inspect.unwrap(obj)

filename = inspect.getsourcefile(obj)
source, lineno = inspect.getsourcelines(obj)
relpath = os.path.relpath(filename, start=os.path.dirname(
kedro_datasets.__file__))

return f'https://github.com/kedro-org/kedro-plugins/blob/main/kedro-datasets/kedro_datasets/{relpath}#L{lineno}#L{lineno + len(source) - 1}'

except (KeyError, ImportError, AttributeError, TypeError, OSError, ValueError):
return None
4 changes: 3 additions & 1 deletion kedro-datasets/kedro_datasets/polars/csv_dataset.py
Original file line number Diff line number Diff line change
@@ -72,7 +72,9 @@ class CSVDataset(AbstractVersionedDataset[pl.DataFrame, pl.DataFrame]):

DEFAULT_LOAD_ARGS: dict[str, Any] = {"rechunk": True}
DEFAULT_SAVE_ARGS: dict[str, Any] = {}
DEFAULT_FS_ARGS: dict[str, Any] = {"open_args_save": {"mode": "w"}}
DEFAULT_FS_ARGS: dict[str, Any] = {
"open_args_save": {"mode": "w", "encoding": "utf-8"}
}

def __init__( # noqa: PLR0913
self,
10 changes: 0 additions & 10 deletions kedro-datasets/tests/polars/test_csv_dataset.py
Original file line number Diff line number Diff line change
@@ -88,14 +88,12 @@ def mocked_csv_in_s3(mocked_s3_bucket, mocked_dataframe: pl.DataFrame):


class TestCSVDataset:
@pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
def test_save_and_load(self, csv_dataset, dummy_dataframe):
"""Test saving and reloading the dataset."""
csv_dataset.save(dummy_dataframe)
reloaded = csv_dataset.load()
assert_frame_equal(dummy_dataframe, reloaded)

@pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
def test_exists(self, csv_dataset, dummy_dataframe):
"""Test `exists` method invocation for both existing and
nonexistent dataset."""
@@ -204,15 +202,13 @@ def test_version_str_repr(self, load_version, save_version):
assert "load_args={'rechunk': True}" in str(ds)
assert "load_args={'rechunk': True}" in str(ds_versioned)

@pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
def test_save_and_load(self, versioned_csv_dataset, dummy_dataframe):
"""Test that saved and reloaded data matches the original one for
the versioned dataset."""
versioned_csv_dataset.save(dummy_dataframe)
reloaded_df = versioned_csv_dataset.load()
assert_frame_equal(dummy_dataframe, reloaded_df)

@pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
def test_multiple_loads(self, versioned_csv_dataset, dummy_dataframe, filepath_csv):
"""Test that if a new version is created mid-run, by an
external system, it won't be loaded in the current run."""
@@ -236,7 +232,6 @@ def test_multiple_loads(self, versioned_csv_dataset, dummy_dataframe, filepath_c
ds_new.resolve_load_version() == v_new
) # new version is discoverable by a new instance

@pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
def test_multiple_saves(self, dummy_dataframe, filepath_csv):
"""Test multiple cycles of save followed by load for the same dataset"""
ds_versioned = CSVDataset(filepath=filepath_csv, version=Version(None, None))
@@ -259,7 +254,6 @@ def test_multiple_saves(self, dummy_dataframe, filepath_csv):
ds_new = CSVDataset(filepath=filepath_csv, version=Version(None, None))
assert ds_new.resolve_load_version() == second_load_version

@pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
def test_release_instance_cache(self, dummy_dataframe, filepath_csv):
"""Test that cache invalidation does not affect other instances"""
ds_a = CSVDataset(filepath=filepath_csv, version=Version(None, None))
@@ -288,14 +282,12 @@ def test_no_versions(self, versioned_csv_dataset):
with pytest.raises(DatasetError, match=pattern):
versioned_csv_dataset.load()

@pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
def test_exists(self, versioned_csv_dataset, dummy_dataframe):
"""Test `exists` method invocation for versioned dataset."""
assert not versioned_csv_dataset.exists()
versioned_csv_dataset.save(dummy_dataframe)
assert versioned_csv_dataset.exists()

@pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe):
"""Check the error when attempting to override the dataset if the
corresponding CSV file for a given save version already exists."""
@@ -307,7 +299,6 @@ def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe):
with pytest.raises(DatasetError, match=pattern):
versioned_csv_dataset.save(dummy_dataframe)

@pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
@pytest.mark.parametrize(
"load_version", ["2019-01-01T23.59.59.999Z"], indirect=True
)
@@ -334,7 +325,6 @@ def test_http_filesystem_no_versioning(self):
filepath="https://example.com/file.csv", version=Version(None, None)
)

@pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
def test_versioning_existing_dataset(
self, csv_dataset, versioned_csv_dataset, dummy_dataframe
):

0 comments on commit 08c3a01

Please sign in to comment.