diff --git a/CHANGELOG.md b/CHANGELOG.md index f1e6f1f..07bbbe3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## v0.4.0 - unreleased +- Added two methods to find _exactly_ one file or path (and raise an error otherwise): + `FileFinder.find_single_file` and `FileFinder.find_single_path` + ([#101](https://github.com/mathause/filefinder/pull/101)). - The `FileFinder.find_files` arguments `on_parse_error` and `_allow_empty` can no longer be passed by position ([#99](https://github.com/mathause/filefinder/pull/99)). - `FileFinder` now raises an error if an invalid `"{placeholder}"` is used diff --git a/README.md b/README.md index 9885d95..ee2014c 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,8 @@ file_pattern = "{category}_file_{number}" ff = FileFinder(path_pattern, file_pattern) ``` +## Create file and path names + Everything enclosed in curly brackets is a placeholder. Thus, you can create file and path names like so: @@ -27,6 +29,8 @@ ff.create_full_name(category="a", number=1) >>> /root/a/a_file_1 ``` +## Find files on disk + However, the strength of filefinder is parsing file names on disk. Assuming you have the following folder structure: @@ -73,6 +77,17 @@ ff.find_files(category=["a1", "b2"], number=1) >>> 2 /root/b2/b2_file_1 b2 1 ``` +Often we need to be sure to find _exactly one_ file or path. This can be achieved using + +```python +ff.find_single_file(category="a1", number=1) +>>> +>>> filename category number +>>> 0 /root/a1/a1_file_1 a1 1 +``` + +If none or more than one file is found a `ValueError` is raised. + ## Format syntax You can pass format specifiers to allow more complex formats, see diff --git a/filefinder/_filefinder.py b/filefinder/_filefinder.py index f3d10ff..eea67b3 100644 --- a/filefinder/_filefinder.py +++ b/filefinder/_filefinder.py @@ -144,6 +144,39 @@ def find( return fc + def find_single(self, keys=None, **keys_kwargs): + """ + find exactly one file/ path in the file system using the file and path pattern + + Parameters + ---------- + keys : dict + Dictionary containing keys to create the search pattern. + **keys_kwargs : {key: indexer, ...}, optional + The keyword arguments form of ``keys``. When the same key is passed in + ``keys`` and ``keys_kwargs`` the latter takes priority. + + Notes + ----- + Missing ``keys`` are replaced with ``"*"``. + + Raises + ------ + ValueError : if more or less than one file/ path is found + """ + + fc = self.find(keys, on_parse_error="raise", _allow_empty=False, **keys_kwargs) + + if len(fc) > 1: + n_found = len(fc) + msg = ( + f"Found more than one ({n_found}) files/ paths. Please adjust your" + f" query.\nFirst five files/ paths:\n{fc.df.head()}" + ) + raise ValueError(msg) + + return fc + @staticmethod def _glob(pattern): """Return a list of paths matching a pathname pattern @@ -373,15 +406,15 @@ def find_paths( >>> file_pattern = "{category}_file_{number}" >>> ff = FileFinder(path_pattern, file_pattern) - >>> ff.find() # doctest: +SKIP + >>> ff.find_paths() # doctest: +SKIP Looks for - "/root/*/" - >>> ff.find(category="foo") # doctest: +SKIP + >>> ff.find_paths(category="foo") # doctest: +SKIP Looks for - "/root/foo/" - >>> ff.find(dict(category=["foo", "bar"])) # doctest: +SKIP + >>> ff.find_paths(dict(category=["foo", "bar"])) # doctest: +SKIP Looks for - "/root/foo/" - "/root/bar/" @@ -446,6 +479,52 @@ def find_files( **keys_kwargs, ) + def find_single_path(self, keys=None, **keys_kwargs): + """ + find exactly one path in the file system using the path pattern + + Parameters + ---------- + keys : dict + Dictionary containing keys to create the search pattern. + **keys_kwargs : {key: indexer, ...}, optional + The keyword arguments form of ``keys``. When the same key is passed in + ``keys`` and ``keys_kwargs`` the latter takes priority. + + Notes + ----- + Missing ``keys`` are replaced with ``"*"``. + + Raises + ------ + ValueError : if more or less than one path is found + """ + + return self.path.find_single(keys, **keys_kwargs) + + def find_single_file(self, keys=None, **keys_kwargs): + """ + find exactly one file in the file system using the file and path pattern + + Parameters + ---------- + keys : dict + Dictionary containing keys to create the search pattern. + **keys_kwargs : {key: indexer, ...}, optional + The keyword arguments form of ``keys``. When the same key is passed in + ``keys`` and ``keys_kwargs`` the latter takes priority. + + Notes + ----- + Missing ``keys`` are replaced with ``"*"``. + + Raises + ------ + ValueError : if more or less than one file is found + """ + + return self.full.find_single(keys, **keys_kwargs) + def __repr__(self): repr_keys = "', '".join(sorted(self.full.keys)) diff --git a/filefinder/tests/test_filefinder.py b/filefinder/tests/test_filefinder.py index 8177250..f7bc105 100644 --- a/filefinder/tests/test_filefinder.py +++ b/filefinder/tests/test_filefinder.py @@ -317,6 +317,33 @@ def test_find_paths_one_of_several(tmp_path, test_paths, find_kwargs): pd.testing.assert_frame_equal(result.df, expected) +def test_find_single_path(tmp_path, test_paths): + + path_pattern = tmp_path / "{a}/foo" + file_pattern = "file_pattern" + + ff = FileFinder( + path_pattern=path_pattern, file_pattern=file_pattern, test_paths=test_paths + ) + + # error if more than one is found + with pytest.raises(ValueError, match=r"Found more than one \(2\) files/ paths"): + ff.find_single_path() + + # error if more than one is found + with pytest.raises(ValueError, match="Found no files matching criteria"): + ff.find_single_path(a="a3") + + expected = {"filename": {0: str(tmp_path / "a1/foo/*")}, "a": {0: "a1"}} + expected = pd.DataFrame.from_dict(expected) + + result = ff.find_single_path(a="a1") + pd.testing.assert_frame_equal(result.df, expected) + + result = ff.find_single_path({"a": "a1"}) + pd.testing.assert_frame_equal(result.df, expected) + + def test_find_file_none_found(tmp_path, test_paths): path_pattern = tmp_path / "{a}/foo/" @@ -457,6 +484,33 @@ def test_find_files_one_of_several(tmp_path, test_paths, find_kwargs): pd.testing.assert_frame_equal(result.df, expected) +def test_find_single_file(tmp_path, test_paths): + + path_pattern = tmp_path / "{a}/foo" + file_pattern = "file" + + ff = FileFinder( + path_pattern=path_pattern, file_pattern=file_pattern, test_paths=test_paths + ) + + # error if more than one is found + with pytest.raises(ValueError, match=r"Found more than one \(2\) files/ paths"): + ff.find_single_file() + + # error if more than one is found + with pytest.raises(ValueError, match="Found no files matching criteria"): + ff.find_single_file(a="a3") + + expected = {"filename": {0: str(tmp_path / "a1/foo/file")}, "a": {0: "a1"}} + expected = pd.DataFrame.from_dict(expected) + + result = ff.find_single_file(a="a1") + pd.testing.assert_frame_equal(result.df, expected) + + result = ff.find_single_file({"a": "a1"}) + pd.testing.assert_frame_equal(result.df, expected) + + def test_find_paths_scalar_number(): ff = FileFinder(