Skip to content

Commit

Permalink
add methods to find exactly one file/ path (#101)
Browse files Browse the repository at this point in the history
* find one

* one -> single

* changelog

* docs in readme
  • Loading branch information
mathause authored Oct 11, 2024
1 parent 0cded70 commit 07fba09
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 3 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## v0.4.0 - unreleased

- Added two methods to find _exactly_ one file or path (and raise an error otherwise):
`FileFinder.find_single_file` and `FileFinder.find_single_path`
([#101](https://github.com/mathause/filefinder/pull/101)).
- The `FileFinder.find_files` arguments `on_parse_error` and `_allow_empty` can no
longer be passed by position ([#99](https://github.com/mathause/filefinder/pull/99)).
- `FileFinder` now raises an error if an invalid `"{placeholder}"` is used
Expand Down
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ file_pattern = "{category}_file_{number}"
ff = FileFinder(path_pattern, file_pattern)
```

## Create file and path names

Everything enclosed in curly brackets is a placeholder. Thus, you can create file and
path names like so:

Expand All @@ -27,6 +29,8 @@ ff.create_full_name(category="a", number=1)
>>> /root/a/a_file_1
```

## Find files on disk

However, the strength of filefinder is parsing file names on disk. Assuming you have the
following folder structure:

Expand Down Expand Up @@ -73,6 +77,17 @@ ff.find_files(category=["a1", "b2"], number=1)
>>> 2 /root/b2/b2_file_1 b2 1
```

Often we need to be sure to find _exactly one_ file or path. This can be achieved using

```python
ff.find_single_file(category="a1", number=1)
>>> <FileContainer>
>>> filename category number
>>> 0 /root/a1/a1_file_1 a1 1
```

If none or more than one file is found a `ValueError` is raised.

## Format syntax

You can pass format specifiers to allow more complex formats, see
Expand Down
85 changes: 82 additions & 3 deletions filefinder/_filefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,39 @@ def find(

return fc

def find_single(self, keys=None, **keys_kwargs):
"""
find exactly one file/ path in the file system using the file and path pattern
Parameters
----------
keys : dict
Dictionary containing keys to create the search pattern.
**keys_kwargs : {key: indexer, ...}, optional
The keyword arguments form of ``keys``. When the same key is passed in
``keys`` and ``keys_kwargs`` the latter takes priority.
Notes
-----
Missing ``keys`` are replaced with ``"*"``.
Raises
------
ValueError : if more or less than one file/ path is found
"""

fc = self.find(keys, on_parse_error="raise", _allow_empty=False, **keys_kwargs)

if len(fc) > 1:
n_found = len(fc)
msg = (
f"Found more than one ({n_found}) files/ paths. Please adjust your"
f" query.\nFirst five files/ paths:\n{fc.df.head()}"
)
raise ValueError(msg)

return fc

@staticmethod
def _glob(pattern):
"""Return a list of paths matching a pathname pattern
Expand Down Expand Up @@ -373,15 +406,15 @@ def find_paths(
>>> file_pattern = "{category}_file_{number}"
>>> ff = FileFinder(path_pattern, file_pattern)
>>> ff.find() # doctest: +SKIP
>>> ff.find_paths() # doctest: +SKIP
Looks for
- "/root/*/"
>>> ff.find(category="foo") # doctest: +SKIP
>>> ff.find_paths(category="foo") # doctest: +SKIP
Looks for
- "/root/foo/"
>>> ff.find(dict(category=["foo", "bar"])) # doctest: +SKIP
>>> ff.find_paths(dict(category=["foo", "bar"])) # doctest: +SKIP
Looks for
- "/root/foo/"
- "/root/bar/"
Expand Down Expand Up @@ -446,6 +479,52 @@ def find_files(
**keys_kwargs,
)

def find_single_path(self, keys=None, **keys_kwargs):
"""
find exactly one path in the file system using the path pattern
Parameters
----------
keys : dict
Dictionary containing keys to create the search pattern.
**keys_kwargs : {key: indexer, ...}, optional
The keyword arguments form of ``keys``. When the same key is passed in
``keys`` and ``keys_kwargs`` the latter takes priority.
Notes
-----
Missing ``keys`` are replaced with ``"*"``.
Raises
------
ValueError : if more or less than one path is found
"""

return self.path.find_single(keys, **keys_kwargs)

def find_single_file(self, keys=None, **keys_kwargs):
"""
find exactly one file in the file system using the file and path pattern
Parameters
----------
keys : dict
Dictionary containing keys to create the search pattern.
**keys_kwargs : {key: indexer, ...}, optional
The keyword arguments form of ``keys``. When the same key is passed in
``keys`` and ``keys_kwargs`` the latter takes priority.
Notes
-----
Missing ``keys`` are replaced with ``"*"``.
Raises
------
ValueError : if more or less than one file is found
"""

return self.full.find_single(keys, **keys_kwargs)

def __repr__(self):

repr_keys = "', '".join(sorted(self.full.keys))
Expand Down
54 changes: 54 additions & 0 deletions filefinder/tests/test_filefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,33 @@ def test_find_paths_one_of_several(tmp_path, test_paths, find_kwargs):
pd.testing.assert_frame_equal(result.df, expected)


def test_find_single_path(tmp_path, test_paths):

path_pattern = tmp_path / "{a}/foo"
file_pattern = "file_pattern"

ff = FileFinder(
path_pattern=path_pattern, file_pattern=file_pattern, test_paths=test_paths
)

# error if more than one is found
with pytest.raises(ValueError, match=r"Found more than one \(2\) files/ paths"):
ff.find_single_path()

# error if more than one is found
with pytest.raises(ValueError, match="Found no files matching criteria"):
ff.find_single_path(a="a3")

expected = {"filename": {0: str(tmp_path / "a1/foo/*")}, "a": {0: "a1"}}
expected = pd.DataFrame.from_dict(expected)

result = ff.find_single_path(a="a1")
pd.testing.assert_frame_equal(result.df, expected)

result = ff.find_single_path({"a": "a1"})
pd.testing.assert_frame_equal(result.df, expected)


def test_find_file_none_found(tmp_path, test_paths):

path_pattern = tmp_path / "{a}/foo/"
Expand Down Expand Up @@ -457,6 +484,33 @@ def test_find_files_one_of_several(tmp_path, test_paths, find_kwargs):
pd.testing.assert_frame_equal(result.df, expected)


def test_find_single_file(tmp_path, test_paths):

path_pattern = tmp_path / "{a}/foo"
file_pattern = "file"

ff = FileFinder(
path_pattern=path_pattern, file_pattern=file_pattern, test_paths=test_paths
)

# error if more than one is found
with pytest.raises(ValueError, match=r"Found more than one \(2\) files/ paths"):
ff.find_single_file()

# error if more than one is found
with pytest.raises(ValueError, match="Found no files matching criteria"):
ff.find_single_file(a="a3")

expected = {"filename": {0: str(tmp_path / "a1/foo/file")}, "a": {0: "a1"}}
expected = pd.DataFrame.from_dict(expected)

result = ff.find_single_file(a="a1")
pd.testing.assert_frame_equal(result.df, expected)

result = ff.find_single_file({"a": "a1"})
pd.testing.assert_frame_equal(result.df, expected)


def test_find_paths_scalar_number():

ff = FileFinder(
Expand Down

0 comments on commit 07fba09

Please sign in to comment.