From 6b89335ca4b60737f411d575b777ee32295d67fc Mon Sep 17 00:00:00 2001 From: Victoria <112418493+veni-vidi-vici-dormivi@users.noreply.github.com> Date: Wed, 7 Feb 2024 13:05:15 +0100 Subject: [PATCH] show duplicates on non-unique query (#73) * show duplicates on non-unique query * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update filefinder/_filefinder.py Remove comment Co-authored-by: Mathias Hauser * Update filefinder/_filefinder.py Co-authored-by: Mathias Hauser --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Mathias Hauser --- .gitignore | 3 +++ filefinder/_filefinder.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d0f1579..74201ba 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,9 @@ __pycache__ .pytest_cache/ +# development scripts +devel/* + # C extensions *.so diff --git a/filefinder/_filefinder.py b/filefinder/_filefinder.py index 42c58bf..8f111b6 100644 --- a/filefinder/_filefinder.py +++ b/filefinder/_filefinder.py @@ -115,8 +115,10 @@ def find(self, keys=None, *, _allow_empty=False, **keys_kwargs): len_all = len(fc.df) len_unique = len(fc.combine_by_key().unique()) - msg = "This query leads to non-unique metadata. Please adjust your query." if len_all != len_unique: + duplicated = fc.df[fc.df.duplicated()] + msg = f"This query leads to non-unique metadata. Please adjust your query.\nFirst five duplicates:\n{duplicated.head()}" + raise ValueError(msg) return fc