From e2b20a49159d62680d0131d1338f06d84b340a44 Mon Sep 17 00:00:00 2001
From: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com>
Date: Thu, 18 Jul 2024 17:09:08 +0100
Subject: [PATCH] Pretty printing catalog (#3990)

* Implemented basic __repr__

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated __repr__

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Removed __str__

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated _describe() for CachedDataset

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Made pretty_repr protected

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Reverted width parameter to default

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Implemented repr for catalog

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Disable sorting

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Replace set with dict to keep original datasets order when printing

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated printing params

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated printing width

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Removed params_repr

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Disable sorting

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Disable sorting

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Disabled compact

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated test_str_representation

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated cached dataset tests

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated data catalog tests

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated core tests

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated versioned dataset tests

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated tests for lambda dataset

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated tests for memory dataset

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated release notes

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Set width to maxsize

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Removed top-level keys sorting

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated tests

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated release notes

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Decoupled describe from pretty printing

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Returned old __str__ to avoid a breaking change

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated tests

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Replaced deprecation comment with TODO

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

---------

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>
---
 RELEASE.md               |  1 +
 kedro/io/data_catalog.py | 19 ++++++++++++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index d714f50ddb..cc8b9032c9 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -10,6 +10,7 @@
 * Implemented key completion support for accessing datasets in the `DataCatalog`.
 * Made [kedro-telemetry](https://github.com/kedro-org/kedro-plugins/tree/main/kedro-telemetry) a core dependency.
 * Implemented dataset pretty printing.
+* Implemented `DataCatalog` pretty printing.
 
 ## Breaking changes to the API
 
diff --git a/kedro/io/data_catalog.py b/kedro/io/data_catalog.py
index 465d7ae09e..c0b2d50e91 100644
--- a/kedro/io/data_catalog.py
+++ b/kedro/io/data_catalog.py
@@ -9,6 +9,7 @@
 import copy
 import difflib
 import logging
+import pprint
 import re
 from typing import Any, Dict
 
@@ -106,7 +107,7 @@ def __init__(
         """Return a _FrozenDatasets instance from some datasets collections.
         Each collection could either be another _FrozenDatasets or a dictionary.
         """
-        self._original_names: set[str] = set()
+        self._original_names: dict[str, str] = {}
         for collection in datasets_collections:
             if isinstance(collection, _FrozenDatasets):
                 self.__dict__.update(collection.__dict__)
@@ -116,7 +117,7 @@ def __init__(
                 # for easy access to transcoded/prefixed datasets.
                 for dataset_name, dataset in collection.items():
                     self.__dict__[_sub_nonword_chars(dataset_name)] = dataset
-                    self._original_names.add(dataset_name)
+                    self._original_names[dataset_name] = ""
 
     # Don't allow users to add/change attributes on the fly
     def __setattr__(self, key: str, value: Any) -> None:
@@ -131,11 +132,20 @@ def __setattr__(self, key: str, value: Any) -> None:
         raise AttributeError(msg)
 
     def _ipython_key_completions_(self) -> list[str]:
-        return list(self._original_names)
+        return list(self._original_names.keys())
 
     def __getitem__(self, key: str) -> Any:
         return self.__dict__[_sub_nonword_chars(key)]
 
+    def __repr__(self) -> str:
+        datasets_repr = {}
+        for ds_name in self._original_names.keys():
+            datasets_repr[ds_name] = self.__dict__[
+                _sub_nonword_chars(ds_name)
+            ].__repr__()
+
+        return pprint.pformat(datasets_repr, sort_dicts=False)
+
 
 class DataCatalog:
     """``DataCatalog`` stores instances of ``AbstractDataset`` implementations
@@ -207,6 +217,9 @@ def __init__(  # noqa: PLR0913
         if feed_dict:
             self.add_feed_dict(feed_dict)
 
+    def __repr__(self) -> str:
+        return self.datasets.__repr__()
+
     @property
     def _logger(self) -> logging.Logger:
         return logging.getLogger(__name__)