Skip to content

Commit 9d4cc6c

Browse files
committed
Quick hack to make some Ibis tables work in show
1 parent a5d12fa commit 9d4cc6c

7 files changed

+106
-36
lines changed

docs/changelog.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ ITables ChangeLog
55
----------------------
66

77
**Added**
8-
- Added support for Ibis tables (#215)
8+
- Added support for Ibis tables ([#215](https://github.com/mwouts/itables/issues/215))
99

1010

1111
1.6.3 (2023-12-10)

docs/polars_dataframes.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ dataframes are displayed nicely with the default `itables` settings.
2121
from itables import init_notebook_mode, show
2222
from itables.sample_dfs import get_dict_of_test_dfs
2323
24-
dict_of_test_dfs = get_dict_of_test_dfs(polars=True)
24+
dict_of_test_dfs = get_dict_of_test_dfs(type="polars")
2525
init_notebook_mode(all_interactive=True)
2626
```
2727

itables/downsample.py

+38-7
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@
33

44
import pandas as pd
55

6+
try:
7+
from ibis.common.exceptions import ExpressionError
8+
except ImportError:
9+
10+
class ExpressionError(Exception):
11+
pass
12+
13+
614
logging.basicConfig()
715
logger = logging.getLogger(__name__)
816

@@ -11,18 +19,37 @@ def nbytes(df):
1119
try:
1220
return sum(x.values.nbytes for _, x in df.items())
1321
except AttributeError:
14-
# Polars DataFrame
15-
return df.estimated_size()
22+
try:
23+
# Polars DataFrame
24+
return df.estimated_size()
25+
except AttributeError:
26+
# Ibis Table
27+
# TODO: find a more direct way to estimate the size of the table
28+
nrows = df.count().execute()
29+
if not nrows:
30+
return 0
31+
return nrows * (nbytes(df.head(5).to_pandas()) / min(nrows, 5))
32+
33+
34+
def nrows(df):
35+
try:
36+
return len(df)
37+
except TypeError:
38+
# Pandas Styler
39+
return len(df.index)
40+
except ExpressionError:
41+
# ibis table
42+
return df.count().execute()
1643

1744

1845
def downsample(df, max_rows=0, max_columns=0, max_bytes=0):
1946
"""Return a subset of the dataframe that fits the limits"""
20-
org_rows, org_columns, org_bytes = len(df), len(df.columns), nbytes(df)
47+
org_rows, org_columns, org_bytes = nrows(df), len(df.columns), nbytes(df)
2148
df = _downsample(
2249
df, max_rows=max_rows, max_columns=max_columns, max_bytes=max_bytes
2350
)
2451

25-
if len(df) < org_rows or len(df.columns) < org_columns:
52+
if nrows(df) < org_rows or len(df.columns) < org_columns:
2653
link = '<a href="https://mwouts.github.io/itables/downsampling.html">downsampled</a>'
2754
reasons = []
2855
if org_rows > max_rows > 0:
@@ -76,7 +103,7 @@ def shrink_towards_target_aspect_ratio(
76103

77104
def _downsample(df, max_rows=0, max_columns=0, max_bytes=0, target_aspect_ratio=None):
78105
"""Implementation of downsample - may be called recursively"""
79-
if len(df) > max_rows > 0:
106+
if nrows(df) > max_rows > 0:
80107
second_half = max_rows // 2
81108
first_half = max_rows - second_half
82109
if second_half:
@@ -134,6 +161,10 @@ def _downsample(df, max_rows=0, max_columns=0, max_bytes=0, target_aspect_ratio=
134161
import polars as pl # noqa
135162

136163
df = pl.DataFrame({df.columns[0]: ["..."]})
137-
return df
138164

139-
return df
165+
try:
166+
len(df)
167+
return df
168+
except ExpressionError:
169+
# Ibis
170+
return df.to_pandas()

itables/javascript.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import numpy as np
1212
import pandas as pd
1313

14+
from .downsample import nrows
15+
1416
try:
1517
import pandas.io.formats.style as pd_style
1618
except ImportError:
@@ -22,6 +24,12 @@
2224
# Define pl.Series as pd.Series
2325
import pandas as pl
2426

27+
try:
28+
import ibis.expr.types.relations as ibis_relations
29+
except ImportError:
30+
ibis_relations = None
31+
32+
2533
from IPython.display import HTML, Javascript, display
2634

2735
import itables.options as opt
@@ -102,6 +110,8 @@ def init_notebook_mode(
102110
pd_style.Styler._repr_html_ = _datatables_repr_
103111
pl.DataFrame._repr_html_ = _datatables_repr_
104112
pl.Series._repr_html_ = _datatables_repr_
113+
if ibis_relations is not None:
114+
ibis_relations.Table._repr_html_ = _datatables_repr_
105115
else:
106116
pd.DataFrame._repr_html_ = _ORIGINAL_DATAFRAME_REPR_HTML
107117
if pd_style is not None:
@@ -111,6 +121,9 @@ def init_notebook_mode(
111121
del pd.Series._repr_html_
112122
if hasattr(pl.Series, "_repr_html_"):
113123
del pl.Series._repr_html_
124+
if ibis_relations is not None:
125+
if hasattr(ibis_relations.Table, "_repr_html_"):
126+
del ibis_relations.Table._repr_html_
114127

115128
if not connected:
116129
display(Javascript(read_package_file("external/jquery.min.js")))
@@ -601,12 +614,7 @@ def _min_rows(kwargs):
601614

602615
def _df_fits_in_one_page(df, kwargs):
603616
"""Display just the table (not the search box, etc...) if the rows fit on one 'page'"""
604-
try:
605-
# Pandas DF or Style
606-
return len(df.index) <= _min_rows(kwargs)
607-
except AttributeError:
608-
# Polars
609-
return len(df) <= _min_rows(kwargs)
617+
return nrows(df) <= _min_rows(kwargs)
610618

611619

612620
def safe_reset_index(df):

itables/sample_dfs.py

+36-17
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
11
import math
22
import string
33
from datetime import datetime, timedelta
4-
5-
try:
6-
from functools import lru_cache
7-
except ImportError:
8-
from functools32 import lru_cache
9-
4+
from functools import lru_cache
105
from itertools import cycle
116

127
import numpy as np
@@ -105,7 +100,7 @@ def get_df_complex_index():
105100
return df
106101

107102

108-
def get_dict_of_test_dfs(N=100, M=100, polars=False, ibis=False):
103+
def get_dict_of_test_dfs(N=100, M=100, type="pandas"):
109104
NM_values = np.reshape(np.linspace(start=0.0, stop=1.0, num=N * M), (N, M))
110105

111106
test_dfs = {
@@ -266,8 +261,10 @@ def get_dict_of_test_dfs(N=100, M=100, polars=False, ibis=False):
266261
}
267262
),
268263
}
264+
if type == "pandas":
265+
return test_dfs
269266

270-
if polars:
267+
if type == "polars":
271268
import polars as pl
272269
import pyarrow as pa
273270

@@ -279,23 +276,42 @@ def get_dict_of_test_dfs(N=100, M=100, polars=False, ibis=False):
279276
pass
280277
return polars_dfs
281278

282-
if ibis:
283-
import ibis as ib
279+
if type == "ibis_memtable":
280+
import ibis
284281

285-
con = ib.pandas.connect(test_dfs)
286282
ibis_dfs = {}
287-
for key in test_dfs:
283+
for key, df in test_dfs.items():
284+
# Ibis does not support tables with no columns
285+
if not len(df.columns):
286+
continue
287+
try:
288+
ibis_dfs[key] = ibis.memtable(df, name=key)
289+
except (TypeError, ibis.common.exceptions.IbisInputError):
290+
pass
291+
292+
return ibis_dfs
293+
294+
if type == "ibis_connect":
295+
import ibis
296+
297+
con = ibis.pandas.connect(test_dfs)
298+
ibis_dfs = {}
299+
for key, df in test_dfs.items():
300+
# Ibis does not support tables with no columns
301+
if not len(df.columns):
302+
continue
303+
288304
try:
289-
ibis_dfs[key] = con.table(key)
305+
ibis_dfs[f"{key}_connect"] = con.table(key)
290306
except (TypeError, AttributeError):
291307
pass
292308

293309
return ibis_dfs
294310

295-
return test_dfs
311+
raise NotImplementedError(type)
296312

297313

298-
def get_dict_of_test_series(polars=False):
314+
def get_dict_of_test_series(type="pandas"):
299315
series = {}
300316
for df_name, df in get_dict_of_test_dfs().items():
301317
if len(df.columns) > 6:
@@ -306,7 +322,10 @@ def get_dict_of_test_series(polars=False):
306322
continue
307323
series["{}.{}".format(df_name, col)] = df[col]
308324

309-
if polars:
325+
if type == "pandas":
326+
return series
327+
328+
if type == "polars":
310329
import polars as pl
311330
import pyarrow as pa
312331

@@ -325,7 +344,7 @@ def get_dict_of_test_series(polars=False):
325344

326345
return polars_series
327346

328-
return series
347+
raise NotImplementedError(type)
329348

330349

331350
@lru_cache()

tests/test_ibis.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,21 @@
88
except ImportError as e:
99
pytest.skip(str(e), allow_module_level=True)
1010

11+
# TODO Remove this (and find out how to evaluate count)
12+
ibis.options.interactive = True
13+
14+
15+
@pytest.mark.parametrize(
16+
"name,df",
17+
[(name, df) for name, df in get_dict_of_test_dfs(type="ibis_memtable").items()],
18+
)
19+
def test_show_ibis_memtable(name, df, use_to_html):
20+
to_html_datatable(df, use_to_html)
21+
1122

1223
@pytest.mark.parametrize(
13-
"name,df", [(name, df) for name, df in get_dict_of_test_dfs(ibis=True).items()]
24+
"name,df",
25+
[(name, df) for name, df in get_dict_of_test_dfs(type="ibis_connect").items()],
1426
)
15-
def test_show_ibis_df(name, df, use_to_html):
27+
def test_show_ibis_connect(name, df, use_to_html):
1628
to_html_datatable(df, use_to_html)

tests/test_polars.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@
1010

1111

1212
@pytest.mark.parametrize(
13-
"name,x", [(name, x) for name, x in get_dict_of_test_series(polars=True).items()]
13+
"name,x", [(name, x) for name, x in get_dict_of_test_series(type="polars").items()]
1414
)
1515
def test_show_polars_series(name, x, use_to_html):
1616
to_html_datatable(x, use_to_html)
1717

1818

1919
@pytest.mark.parametrize(
20-
"name,df", [(name, df) for name, df in get_dict_of_test_dfs(polars=True).items()]
20+
"name,df", [(name, df) for name, df in get_dict_of_test_dfs(type="polars").items()]
2121
)
2222
def test_show_polars_df(name, df, use_to_html):
2323
to_html_datatable(df, use_to_html)

0 commit comments

Comments
 (0)