Skip to content

Commit

Permalink
Merge branch 'main' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
Kedro committed Jul 18, 2024
2 parents 5e23489 + f54f463 commit 0f005f1
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 7 deletions.
1 change: 1 addition & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* Fixed a bug in the `DataCatalog` `shallow_copy()` method to ensure it returns the type of the used catalog and doesn't cast it to `DataCatalog`.
* Implemented key completion support for accessing datasets in the `DataCatalog`.
* Made [kedro-telemetry](https://github.com/kedro-org/kedro-plugins/tree/main/kedro-telemetry) a core dependency.
* Implemented dataset pretty printing.

## Breaking changes to the API

Expand Down
10 changes: 7 additions & 3 deletions kedro/io/cached_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,14 @@ def _from_config(config: dict, version: Version | None) -> AbstractDataset:
return AbstractDataset.from_config("_cached", config)

def _describe(self) -> dict[str, Any]:
return {
"dataset": self._dataset._describe(),
"cache": self._cache._describe(),
return {"dataset": self._dataset._describe(), "cache": self._cache._describe()}

def __repr__(self) -> str:
object_description = {
"dataset": self._dataset._pretty_repr(self._dataset._describe()),
"cache": self._dataset._pretty_repr(self._cache._describe()),
}
return self._pretty_repr(object_description)

def _load(self) -> Any:
data = self._cache.load() if self._cache.exists() else self._dataset.load()
Expand Down
21 changes: 21 additions & 0 deletions kedro/io/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import copy
import logging
import os
import pprint
import re
import sys
import warnings
from collections import namedtuple
from datetime import datetime, timezone
Expand Down Expand Up @@ -228,6 +230,7 @@ def save(self, data: _DI) -> None:
raise DatasetError(message) from exc

def __str__(self) -> str:
# TODO: Replace with __repr__ implementation in 0.20.0 release.
def _to_str(obj: Any, is_root: bool = False) -> str:
"""Returns a string representation where
1. The root level (i.e. the Dataset.__init__ arguments) are
Expand All @@ -254,6 +257,24 @@ def _to_str(obj: Any, is_root: bool = False) -> str:

return f"{type(self).__name__}({_to_str(self._describe(), True)})"

def _pretty_repr(self, object_description: dict[str, Any]) -> str:
str_keys = []
for arg_name, arg_descr in object_description.items():
if arg_descr is not None:
descr = pprint.pformat(
arg_descr,
sort_dicts=False,
compact=True,
depth=2,
width=sys.maxsize,
)
str_keys.append(f"{arg_name}={descr}")

return f"{type(self).__module__}.{type(self).__name__}({', '.join(str_keys)})"

def __repr__(self) -> str:
return self._pretty_repr(self._describe())

@abc.abstractmethod
def _load(self) -> _DO:
raise NotImplementedError(
Expand Down
8 changes: 8 additions & 0 deletions tests/io/test_cached_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,14 @@ def test_pickle(self, cached_ds, caplog):
_ = pickle.dumps(cached_ds)
assert caplog.records[0].message == f"{cached_ds}: clearing cache to pickle."

def test_repr(self):
assert (
repr(CachedDataset(MemoryDataset(42)))
== """kedro.io.cached_dataset.CachedDataset("""
"""dataset="kedro.io.memory_dataset.MemoryDataset(data='<int>')", """
"""cache='kedro.io.memory_dataset.MemoryDataset()')"""
)

def test_str(self):
assert (
str(CachedDataset(MemoryDataset(42))) == "CachedDataset(cache={}, "
Expand Down
15 changes: 13 additions & 2 deletions tests/io/test_core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import pprint
import shutil
from decimal import Decimal
from fractions import Fraction
Expand Down Expand Up @@ -206,11 +207,21 @@ def dummy_data():
class TestCoreFunctions:
@pytest.mark.parametrize("var", [1, True] + FALSE_BUILTINS)
def test_str_representation(self, var):
filepath = "."
assert str(MyDataset(var=var)) == f"MyDataset(filepath={filepath}, var={var})"
var_str = pprint.pformat(var)
filepath_str = pprint.pformat(PurePosixPath("."))
assert str(MyDataset(var=var)) == f"MyDataset(filepath=., var={var})"
assert (
repr(MyDataset(var=var))
== f"tests.io.test_core.MyDataset(filepath={filepath_str}, var={var_str})"
)

def test_str_representation_none(self):
assert str(MyDataset()) == "MyDataset(filepath=.)"
filepath_str = pprint.pformat(PurePosixPath("."))
assert (
repr(MyDataset())
== f"tests.io.test_core.MyDataset(filepath={filepath_str})"
)

def test_get_filepath_str(self):
path = get_filepath_str(PurePosixPath("example.com/test.csv"), "http")
Expand Down
25 changes: 25 additions & 0 deletions tests/io/test_lambda_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,32 @@ def _dummy_release():
assert "LambdaDataset(load=<tests.io.test_lambda_dataset._dummy_load>)" in str(
LambdaDataset(_dummy_load, None)
)
assert (
"kedro.io.lambda_dataset.LambdaDataset(load='<tests.io.test_lambda_dataset._dummy_load>')"
in repr(LambdaDataset(_dummy_load, None))
)
assert "LambdaDataset(save=<tests.io.test_lambda_dataset._dummy_save>)" in str(
LambdaDataset(None, _dummy_save)
)
assert (
"kedro.io.lambda_dataset.LambdaDataset(save='<tests.io.test_lambda_dataset._dummy_save>')"
in repr(LambdaDataset(None, _dummy_save))
)
assert "LambdaDataset(exists=<tests.io.test_lambda_dataset._dummy_exists>)" in str(
LambdaDataset(None, None, _dummy_exists)
)
assert (
"kedro.io.lambda_dataset.LambdaDataset(exists='<tests.io.test_lambda_dataset._dummy_exists>')"
in repr(LambdaDataset(None, None, _dummy_exists))
)
assert (
"LambdaDataset(release=<tests.io.test_lambda_dataset._dummy_release>)"
in str(LambdaDataset(None, None, None, _dummy_release))
)
assert (
"kedro.io.lambda_dataset.LambdaDataset(release='<tests.io.test_lambda_dataset._dummy_release>')"
in repr(LambdaDataset(None, None, None, _dummy_release))
)

# __init__ keys alphabetically sorted, None values not shown
expected = (
Expand All @@ -51,6 +67,15 @@ def _dummy_release():
actual = str(LambdaDataset(_dummy_load, _dummy_save, _dummy_exists, None))
assert actual == expected

# __init__ keys remains in the provided order, None values not shown
expected = (
"kedro.io.lambda_dataset.LambdaDataset(load='<tests.io.test_lambda_dataset._dummy_load>', "
"save='<tests.io.test_lambda_dataset._dummy_save>', "
"exists='<tests.io.test_lambda_dataset._dummy_exists>')"
)
actual = repr(LambdaDataset(_dummy_load, _dummy_save, _dummy_exists, None))
assert actual == expected


def test_ephemeral_attribute(mocked_dataset):
assert mocked_dataset._EPHEMERAL is False
Expand Down
28 changes: 26 additions & 2 deletions tests/io/test_memory_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,39 @@ def test_saving_none(self):
@pytest.mark.parametrize(
"input_data,expected",
[
("dummy_dataframe", "MemoryDataset(data=<DataFrame>)"),
("dummy_numpy_array", "MemoryDataset(data=<ndarray>)"),
(
"dummy_dataframe",
"MemoryDataset(data=<DataFrame>)",
),
(
"dummy_numpy_array",
"MemoryDataset(data=<ndarray>)",
),
],
indirect=["input_data"],
)
def test_str_representation(self, memory_dataset, input_data, expected):
"""Test string representation of the dataset"""
assert expected in str(memory_dataset)

@pytest.mark.parametrize(
"input_data,expected",
[
(
"dummy_dataframe",
"kedro.io.memory_dataset.MemoryDataset(data='<DataFrame>')",
),
(
"dummy_numpy_array",
"kedro.io.memory_dataset.MemoryDataset(data='<ndarray>')",
),
],
indirect=["input_data"],
)
def test_repr_representation(self, memory_dataset, input_data, expected):
"""Test string representation of the dataset"""
assert expected in repr(memory_dataset)

def test_exists(self, new_data):
"""Test `exists` method invocation"""
dataset = MemoryDataset()
Expand Down

0 comments on commit 0f005f1

Please sign in to comment.