Skip to content

Commit

Permalink
Summaries as option (#179)
Browse files Browse the repository at this point in the history
  • Loading branch information
cristineguadelupe authored Nov 15, 2024
1 parent 299609e commit 3135698
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 19 deletions.
55 changes: 36 additions & 19 deletions lib/kino/explorer.ex
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ defmodule Kino.Explorer do
depending on the given data
* `:num_rows` - the number of rows to show in the table. Defaults to `10`.
* `:summaries` - whether to show summaries for numeric and categorical columns.
Defaults to `true`.
"""
@spec new(DataFrame.t() | Series.t(), keyword()) :: t()
def new(data, opts \\ [])
Expand Down Expand Up @@ -65,16 +68,25 @@ defmodule Kino.Explorer do

@impl true
def init({df, name, opts}) do
{lazy, groups, df, total_rows, columns} = prepare_data(df, name)
num_rows = Keyword.get(opts, :num_rows)
info = info(columns, lazy, name, num_rows)

{:ok, info, %{df: df, total_rows: total_rows, columns: columns, groups: groups, name: name}}
summaries = Keyword.get(opts, :summaries, true)
{lazy, groups, df, total_rows, columns} = prepare_data(df, name, summaries)
num_rows = Keyword.get(opts, :num_rows, 10)
info = info(columns, lazy, name, num_rows, summaries)

{:ok, info,
%{
df: df,
total_rows: total_rows,
columns: columns,
groups: groups,
name: name,
summaries: summaries
}}
end

@impl true
def on_update(df, state) do
{_lazy, groups, df, total_rows, columns} = prepare_data(df, state.name)
{_lazy, groups, df, total_rows, columns} = prepare_data(df, state.name, state.summaries)

{:ok, %{state | df: df, total_rows: total_rows, columns: columns, groups: groups}}
end
Expand Down Expand Up @@ -103,10 +115,10 @@ defmodule Kino.Explorer do
{:ok, %{data: data, extension: ".parquet", type: "application/x-parquet"}}
end

defp columns(df, lazy, groups) do
defp columns(df, lazy, groups, summaries) do
dtypes = DataFrame.dtypes(df)
sample_data = df |> DataFrame.head(1) |> DataFrame.collect() |> DataFrame.to_columns()
summaries = if !lazy, do: summaries(df, groups)
summaries = if !lazy && summaries, do: summaries(df, groups)

for name <- df.names, dtype = Map.fetch!(dtypes, name) do
%{
Expand All @@ -118,24 +130,29 @@ defmodule Kino.Explorer do
end
end

defp info(columns, lazy, name, num_rows) do
defp info(columns, lazy, name, num_rows, summaries) do
name = if lazy, do: "Lazy - #{name}", else: name
has_composite_type_column? = Enum.any?(columns, &(&1.type == "list" || &1.type == "struct"))
features = [:export, :pagination, :sorting, :relocate]

formats =
if has_composite_type_column?, do: ["NDJSON", "Parquet"], else: ["CSV", "NDJSON", "Parquet"]

info = %{name: name, features: features, export: %{formats: formats}}
if(num_rows, do: Map.put(info, :num_rows, num_rows), else: info)
%{
name: name,
features: features,
export: %{formats: formats},
num_rows: num_rows,
summaries: summaries
}
end

defp get_records(%{df: df, groups: groups}, rows_spec) do
defp get_records(%{df: df, groups: groups, summaries: summaries}, rows_spec) do
lazy = lazy?(df)
df = df |> relocate(rows_spec[:relocates]) |> order_by(rows_spec[:order])
columns = columns(df, lazy, groups)
columns = columns(df, lazy, groups, summaries)
total_rows = if !lazy, do: DataFrame.n_rows(df)
summaries = if total_rows && total_rows > 0, do: summaries(df, groups)
summaries = if total_rows && total_rows > 0 && summaries, do: summaries(df, groups)
df = DataFrame.slice(df, rows_spec.offset, rows_spec.limit)
records = df |> DataFrame.collect() |> DataFrame.to_columns()
{columns, records, total_rows, summaries}
Expand Down Expand Up @@ -262,20 +279,20 @@ defmodule Kino.Explorer do
df |> relocate(rows_spec[:relocates]) |> order_by(rows_spec[:order]) |> DataFrame.collect()
end

defp prepare_data(%DataFrame{} = df, _name), do: prepare_data(df)
defp prepare_data(%DataFrame{} = df, _name, summaries), do: prepare_data(df, summaries)

defp prepare_data(%Series{} = s, name) do
defp prepare_data(%Series{} = s, name, summaries) do
column_name = name |> String.replace(" ", "_") |> String.downcase()
df = DataFrame.new([{column_name, s}])
prepare_data(df)
prepare_data(df, summaries)
end

defp prepare_data(df) do
defp prepare_data(df, summaries) do
lazy = lazy?(df)
groups = df.groups
df = DataFrame.ungroup(df)
total_rows = if !lazy, do: DataFrame.n_rows(df)
columns = columns(df, lazy, groups)
columns = columns(df, lazy, groups, summaries)

{lazy, groups, df, total_rows, columns}
end
Expand Down
12 changes: 12 additions & 0 deletions test/kino/explorer_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,18 @@ defmodule Kino.ExplorerTest do
} = data
end

test "summaries as an option" do
kino = Kino.Explorer.new(people_df(), summaries: false)
data = connect(kino)
refute Enum.any?(data.content.columns, & &1.summary)
end

test "summaries as an option (default to true)" do
kino = Kino.Explorer.new(people_df())
data = connect(kino)
assert Enum.all?(data.content.columns, & &1.summary)
end

test "column definitions include type" do
kino = Kino.Explorer.new(people_df())
data = connect(kino)
Expand Down

0 comments on commit 3135698

Please sign in to comment.