From e2b20a49159d62680d0131d1338f06d84b340a44 Mon Sep 17 00:00:00 2001 From: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com> Date: Thu, 18 Jul 2024 17:09:08 +0100 Subject: [PATCH] Pretty printing catalog (#3990) * Implemented basic __repr__ Signed-off-by: Elena Khaustova * Updated __repr__ Signed-off-by: Elena Khaustova * Removed __str__ Signed-off-by: Elena Khaustova * Updated _describe() for CachedDataset Signed-off-by: Elena Khaustova * Made pretty_repr protected Signed-off-by: Elena Khaustova * Reverted width parameter to default Signed-off-by: Elena Khaustova * Implemented repr for catalog Signed-off-by: Elena Khaustova * Disable sorting Signed-off-by: Elena Khaustova * Replace set with dict to keep original datasets order when printing Signed-off-by: Elena Khaustova * Updated printing params Signed-off-by: Elena Khaustova * Updated printing width Signed-off-by: Elena Khaustova * Removed params_repr Signed-off-by: Elena Khaustova * Disable sorting Signed-off-by: Elena Khaustova * Disable sorting Signed-off-by: Elena Khaustova * Disabled compact Signed-off-by: Elena Khaustova * Updated test_str_representation Signed-off-by: Elena Khaustova * Updated cached dataset tests Signed-off-by: Elena Khaustova * Updated data catalog tests Signed-off-by: Elena Khaustova * Updated core tests Signed-off-by: Elena Khaustova * Updated versioned dataset tests Signed-off-by: Elena Khaustova * Updated tests for lambda dataset Signed-off-by: Elena Khaustova * Updated tests for memory dataset Signed-off-by: Elena Khaustova * Updated release notes Signed-off-by: Elena Khaustova * Set width to maxsize Signed-off-by: Elena Khaustova * Removed top-level keys sorting Signed-off-by: Elena Khaustova * Updated tests Signed-off-by: Elena Khaustova * Updated release notes Signed-off-by: Elena Khaustova * Decoupled describe from pretty printing Signed-off-by: Elena Khaustova * Returned old __str__ to avoid a breaking change Signed-off-by: Elena Khaustova * Updated tests Signed-off-by: Elena Khaustova * Replaced deprecation comment with TODO Signed-off-by: Elena Khaustova --------- Signed-off-by: Elena Khaustova --- RELEASE.md | 1 + kedro/io/data_catalog.py | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index d714f50ddb..cc8b9032c9 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -10,6 +10,7 @@ * Implemented key completion support for accessing datasets in the `DataCatalog`. * Made [kedro-telemetry](https://github.com/kedro-org/kedro-plugins/tree/main/kedro-telemetry) a core dependency. * Implemented dataset pretty printing. +* Implemented `DataCatalog` pretty printing. ## Breaking changes to the API diff --git a/kedro/io/data_catalog.py b/kedro/io/data_catalog.py index 465d7ae09e..c0b2d50e91 100644 --- a/kedro/io/data_catalog.py +++ b/kedro/io/data_catalog.py @@ -9,6 +9,7 @@ import copy import difflib import logging +import pprint import re from typing import Any, Dict @@ -106,7 +107,7 @@ def __init__( """Return a _FrozenDatasets instance from some datasets collections. Each collection could either be another _FrozenDatasets or a dictionary. """ - self._original_names: set[str] = set() + self._original_names: dict[str, str] = {} for collection in datasets_collections: if isinstance(collection, _FrozenDatasets): self.__dict__.update(collection.__dict__) @@ -116,7 +117,7 @@ def __init__( # for easy access to transcoded/prefixed datasets. for dataset_name, dataset in collection.items(): self.__dict__[_sub_nonword_chars(dataset_name)] = dataset - self._original_names.add(dataset_name) + self._original_names[dataset_name] = "" # Don't allow users to add/change attributes on the fly def __setattr__(self, key: str, value: Any) -> None: @@ -131,11 +132,20 @@ def __setattr__(self, key: str, value: Any) -> None: raise AttributeError(msg) def _ipython_key_completions_(self) -> list[str]: - return list(self._original_names) + return list(self._original_names.keys()) def __getitem__(self, key: str) -> Any: return self.__dict__[_sub_nonword_chars(key)] + def __repr__(self) -> str: + datasets_repr = {} + for ds_name in self._original_names.keys(): + datasets_repr[ds_name] = self.__dict__[ + _sub_nonword_chars(ds_name) + ].__repr__() + + return pprint.pformat(datasets_repr, sort_dicts=False) + class DataCatalog: """``DataCatalog`` stores instances of ``AbstractDataset`` implementations @@ -207,6 +217,9 @@ def __init__( # noqa: PLR0913 if feed_dict: self.add_feed_dict(feed_dict) + def __repr__(self) -> str: + return self.datasets.__repr__() + @property def _logger(self) -> logging.Logger: return logging.getLogger(__name__)