Skip to content

Custom HTML Rendering for Nested Columns #103

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Nested-Pandas allows data like this:
To instead be represented like this:

<p align="center">
<img src="./docs/intro_images/nestedframe.png" alt="nestedframe" width="400"/>
<img src="./docs/intro_images/nestedframe_example.png" alt="nestedframe" width="300"/>
</p>

Where the nested data is represented as nested dataframes:
Expand Down
2 changes: 1 addition & 1 deletion docs/gettingstarted/quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@
"source": [
"The above dataframe is a `NestedFrame`, which extends the capabilities of the Pandas `DataFrame` to support columns with nested information. \n",
"\n",
"We now have the top level dataframe with 3 rows, each of which corresponds to a single object. The table has three columns beyond \"id\". Two columns, \"ra\" and \"dec\", have a single value for the object (in this case the position on the sky). The last column \"lightcurve\" contains a nested table with a series of observation times and observation brightnesses for the object. As we will see below, this nested table allows the user to easily access to the all of the observations for a given object.\n",
"We now have the top level dataframe with 3 rows, each of which corresponds to a single object. The table has three columns beyond \"id\". Two columns, \"ra\" and \"dec\", have a single value for the object (in this case the position on the sky). The last column \"lightcurve\" contains a nested table with a series of observation times and observation brightnesses for the object. The first row of this nested table is provided along with dimensions to provide a sense for the contents of the nested data. As we will see below, this nested table allows the user to easily access to the all of the observations for a given object.\n",
"\n",
"## Accessing Nested Data\n",
"\n",
Expand Down
6 changes: 3 additions & 3 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ Nested-Pandas allows data like this:

To instead be represented like this:

.. image:: ./intro_images/nestedframe.png
:width: 400
.. image:: ./intro_images/nestedframe_example.png
:width: 300
:align: center
:alt: pandas dataframes

Expand All @@ -36,7 +36,7 @@ Where the nested data is represented as nested dataframes:
object_nf.loc[0]["nested_sources"]

.. image:: ./intro_images/loc_into_nested.png
:width: 225
:width: 300
:align: center
:alt: pandas dataframes

Expand Down
Binary file removed docs/intro_images/nestedframe.png
Binary file not shown.
Binary file added docs/intro_images/nestedframe_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 3 additions & 2 deletions docs/tutorials/nested_spectra.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"outputs": [],
"source": [
"# Query SDSS for the corresponding spectra\n",
"SDSS.clear_cache()\n",
"sp = SDSS.get_spectra(matches=xid)\n",
"sp"
]
Expand Down Expand Up @@ -161,7 +162,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "lsdb",
"language": "python",
"name": "python3"
},
Expand All @@ -175,7 +176,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
"version": "3.12.8"
}
},
"nbformat": 4,
Expand Down
49 changes: 49 additions & 0 deletions src/nested_pandas/nestedframe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
from nested_pandas.series.dtype import NestedDtype
from nested_pandas.series.packer import pack, pack_lists, pack_sorted_df_into_struct

pd.set_option("display.max_rows", 30)
pd.set_option("display.min_rows", 5)

# Used to identify backtick-protected names in the expressions
# used in NestedFrame.eval() and NestedFrame.query().
_backtick_protected_names = re.compile(r"`[^`]+`", re.MULTILINE)
Expand Down Expand Up @@ -274,6 +277,52 @@
nest_cols.append(column)
return nest_cols

def _repr_html_(self) -> str | None:
"""Override html representation"""

# Without nested columns, just do representation as normal
if len(self.nested_columns) == 0:
# This mimics pandas behavior
if self.shape[0] > pd.get_option("display.max_rows"):
return super().to_html(max_rows=pd.get_option("display.min_rows"), show_dimensions=True)

Check warning on line 287 in src/nested_pandas/nestedframe/core.py

View check run for this annotation

Codecov / codecov/patch

src/nested_pandas/nestedframe/core.py#L287

Added line #L287 was not covered by tests
else:
return super().to_html(max_rows=pd.get_option("display.max_rows"), show_dimensions=True)

# Nested Column Formatting
# first cell shows the nested df header and a preview row
def repack_first_cell(chunk):
# Render header separately to keep data aligned
output = chunk.head(0).to_html(
max_rows=0, max_cols=5, show_dimensions=False, index=False, header=True
)
# Then add a preview row
output += repack_row(chunk)
return output

# remaining cells show only a preview row
def repack_row(chunk):
return chunk.to_html(max_rows=1, max_cols=5, show_dimensions=True, index=False, header=False)

# Apply repacking to all nested columns
repr = self.style.format(
{col: repack_first_cell for col in self.nested_columns}, subset=self.index[0]
)
repr = repr.format(
{col: repack_row for col in self.nested_columns}, subset=pd.IndexSlice[self.index[1] :]
)

# Recover some truncation formatting, limited to head truncation
if repr.data.shape[0] > pd.get_option("display.max_rows"):
html_repr = repr.to_html(max_rows=pd.get_option("display.min_rows"))
else:
# when under the max_rows threshold, display all rows (behavior of 0 here)
html_repr = repr.to_html(max_rows=0)

# Manually append dimensionality to a styler output
html_repr += f"{repr.data.shape[0]} rows x {repr.data.shape[1]} columns"

return html_repr

def _parse_hierarchical_components(self, delimited_path: str, delimiter: str = ".") -> list[str]:
"""
Given a string that may be a delimited path, parse it into its components,
Expand Down
26 changes: 26 additions & 0 deletions tests/nested_pandas/nestedframe/test_nestedframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,32 @@ def test_nestedseries_construction():
assert (frame[0] == [1, 2, 3]).all()


def test_html_repr():
"""Just make sure the html representation code doesn't throw any errors"""

base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2])

assert list(base.all_columns.keys()) == ["base"]
assert list(base.all_columns["base"]) == list(base.columns)

nested = pd.DataFrame(
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
)

base = base.add_nested(nested, "nested")

# Check nested repr
base._repr_html_()

# Check repr path without nested cols
base[["a", "b"]]._repr_html_()

# Check repr truncation for larger nf
nf = generate_data(100, 2)
nf._repr_html_()


def test_all_columns():
"""Test the all_columns function"""

Expand Down