Skip to content

Commit

Permalink
Add capability to concat two FileContainers (#126)
Browse files Browse the repository at this point in the history
* add concat method

* test

* avoid copy

* add drop duplicates

* test drop_duplicates

* CHANGELOG

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
1 parent c17e2fd commit c8e8256
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
([#121](https://github.com/mathause/filefinder/pull/121)).
- Deprecated `combine_by_key` ([#115](https://github.com/mathause/filefinder/pull/115)).
- Added the number of paths to the repr ([#116](https://github.com/mathause/filefinder/pull/116)).
- Added capability to concat two `FileContainer`s ([#126](https://github.com/mathause/filefinder/pull/126)).

- Explicitly test on python 3.13 ([#103](https://github.com/mathause/filefinder/pull/103)).
- Drop support for python 3.9 ([#102](https://github.com/mathause/filefinder/pull/102)).
Expand Down
36 changes: 36 additions & 0 deletions filefinder/_filefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,42 @@ def search(self, **query):
df = self._get_subset(**query)
return type(self)(df)

def concat(self, other, drop_duplicates=True):
"""concatenate two FileContainers
Parameters
----------
other : FileContainer
The other FileContainer to concatenate.
drop_duplicates : bool, default True
Drop duplicates after concatenating.
Returns
-------
FileContainer
The concatenated FileContainer.
Raises
------
ValueError
If the other object is not a FileContainer.
ValueError
If the two FileContainers do not have the same keys.
"""

if not isinstance(other, FileContainer):
raise ValueError("Can only concatenate two FileContainers.")

if self.df.columns is not other.df.columns:
raise ValueError("FileContainers must have the same keys.")

df = pd.concat([self.df, other.df])

if drop_duplicates:
df = df.drop_duplicates()

return type(self)(df)

def _get_subset(self, **query):
if not query:
return pd.DataFrame(
Expand Down
22 changes: 22 additions & 0 deletions filefinder/tests/test_filecontainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,28 @@ def test_filecontainer_search(example_df, example_fc):
pd.testing.assert_frame_equal(result.df, expected)


def test_filecontainer_concat(example_fc):

with pytest.raises(ValueError, match="Can only concatenate two FileContainers."):
example_fc.concat("not a FileContainer")

with pytest.raises(ValueError, match="FileContainers must have the same keys"):
different_keys_fc = FileContainer(example_fc.df.loc[:, ["model", "scen"]])
example_fc.concat(different_keys_fc)

result = example_fc.concat(example_fc, drop_duplicates=False)
expected = pd.concat([example_fc.df, example_fc.df])

pd.testing.assert_frame_equal(result.df, expected)
assert len(result) == 10

result = example_fc.concat(example_fc, drop_duplicates=True)
expected = example_fc

pd.testing.assert_frame_equal(result.df, expected.df)
assert len(result) == 5


def test_fc_combine_by_key_deprecated(example_fc):

with pytest.warns(FutureWarning, match="`combine_by_key` has been deprecated"):
Expand Down

0 comments on commit c8e8256

Please sign in to comment.