Skip to content

Commit

Permalink
Merge pull request #136 from lincc-frameworks/from_lists
Browse files Browse the repository at this point in the history
add pack_lists class function
  • Loading branch information
dougbrn authored Aug 19, 2024
2 parents 975fbc8 + b087283 commit 6e2f433
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 0 deletions.
63 changes: 63 additions & 0 deletions src/nested_pandas/nestedframe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,69 @@ def from_flat(cls, df, base_columns, nested_columns=None, index=None, name="nest
nested_columns = [col for col in df.columns if col not in base_columns]
return out_df.add_nested(df[nested_columns], name=name)

@classmethod
def from_lists(cls, df, base_columns=None, list_columns=None, name="nested"):
"""Creates a NestedFrame with base and nested columns from a flat
dataframe.
Parameters
----------
df: pd.DataFrame or NestedFrame
A dataframe with list columns.
base_columns: list-like, or None
Any columns that have non-list values in the input df. These will
simply be kept as identical columns in the result
list_columns: list-like, or None
The list-value columns that should be packed into a nested column.
All columns in the list will attempt to be packed into a single
nested column with the name provided in `nested_name`. If None, is
defined as all columns not in `base_columns`.
name:
The name of the output column the `nested_columns` are packed into.
Returns
-------
NestedFrame
A NestedFrame with the specified nesting structure.
Examples
--------
>>> nf = NestedFrame({"c":[1,2,3], "d":[2,4,6],
... "e":[[1,2,3], [4,5,6], [7,8,9]]},
... index=[0,1,2])
>>> NestedFrame.from_lists(nf, base_columns=["c","d"])
"""

# Resolve base and list columns
if base_columns is None:
if list_columns is None:
# with no inputs, assume all columns are list-valued
list_columns = df.columns
else:
# if list_columns are defined, assume everything else is base
base_columns = [col for col in df.columns if col not in list_columns]
else:
if list_columns is None:
# with defined base_columns, assume everything else is list
list_columns = [col for col in df.columns if col not in base_columns]

if len(list_columns) == 0:
raise ValueError("No columns were assigned as list columns.")

# Pack list columns into a nested column
packed_df = packer.pack_lists(df[list_columns])
packed_df.name = name

# join the nested column to the base_column df
if base_columns is not None:
return df[base_columns].join(packed_df)
# or just return the packed_df as a nestedframe if no base cols
else:
return NestedFrame(packed_df.to_frame())

def _split_query(self, expr) -> dict:
"""Splits a pandas query into multiple subqueries for nested and base layers"""
# Ensure query has needed spacing for upcoming split
Expand Down
53 changes: 53 additions & 0 deletions tests/nested_pandas/nestedframe/test_nestedframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,59 @@ def test_recover_from_flat():
assert nf2.equals(nf)


def test_from_lists():
"""Test NestedFrame.from_lists behavior"""
nf = NestedFrame(
{"c": [1, 2, 3], "d": [2, 4, 6], "e": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}, index=[0, 1, 2]
)

# Test a few combinations
res = NestedFrame.from_lists(nf, base_columns=["c", "d"], name="nested_e")
assert list(res.columns) == ["c", "d", "nested_e"]
assert list(res.nested_columns) == ["nested_e"]

res = NestedFrame.from_lists(nf, base_columns=["c", "d"], list_columns=["e"])
assert list(res.columns) == ["c", "d", "nested"]
assert list(res.nested_columns) == ["nested"]

res = NestedFrame.from_lists(nf, list_columns=["e"])
assert list(res.columns) == ["c", "d", "nested"]
assert list(res.nested_columns) == ["nested"]

# Check for the no list columns error
with pytest.raises(ValueError):
res = NestedFrame.from_lists(nf, base_columns=["c", "d", "e"])

# Multiple list columns (of uneven length)
nf2 = NestedFrame(
{
"c": [1, 2, 3],
"d": [2, 4, 6],
"e": [[1, 2, 3], [4, 5, 6, 7], [8, 9]],
"f": [[10, 20, 30], [40, 50, 60, 70], [80, 90]],
},
index=[0, 1, 2],
)

res = NestedFrame.from_lists(nf2, list_columns=["e", "f"])
assert list(res.columns) == ["c", "d", "nested"]
assert list(res.nested_columns) == ["nested"]
assert list(res.nested.nest.fields) == ["e", "f"]

# Check for subsetting
res = NestedFrame.from_lists(nf, base_columns=["c"], list_columns=["e"])
assert list(res.columns) == ["c", "nested"]
assert list(res.nested_columns) == ["nested"]

res = NestedFrame.from_lists(nf, base_columns=[], list_columns=["e"])
assert list(res.columns) == ["nested"]
assert list(res.nested_columns) == ["nested"]

res = NestedFrame.from_lists(nf[["e"]], base_columns=None, list_columns=None)
assert list(res.columns) == ["nested"]
assert list(res.nested_columns) == ["nested"]


def test_query():
"""Test that NestedFrame.query handles nested queries correctly"""

Expand Down

0 comments on commit 6e2f433

Please sign in to comment.