Skip to content

Commit

Permalink
Add DataFrame.frequencies/2 (#637)
Browse files Browse the repository at this point in the history
  • Loading branch information
anthony-khong authored Jul 2, 2023
1 parent 2b73718 commit 3f66bd2
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 0 deletions.
25 changes: 25 additions & 0 deletions lib/explorer/data_frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5030,6 +5030,31 @@ defmodule Explorer.DataFrame do
@spec nil_count(df :: DataFrame.t()) :: DataFrame.t()
def nil_count(df), do: Shared.apply_impl(df, :nil_count)

@doc """
Creates a new dataframe with unique rows and the frequencies of each.
## Examples
iex> df = Explorer.DataFrame.new(a: ["a", "a", "b"], b: [1, 1, nil])
iex> Explorer.DataFrame.frequencies(df, [:a, :b])
#Explorer.DataFrame<
Polars[2 x 3]
a string ["a", "b"]
b integer [1, nil]
counts integer [2, 1]
>
"""
@doc type: :single
@spec frequencies(df :: DataFrame.t(), columns :: column_names()) :: DataFrame.t()
def frequencies(%DataFrame{} = df, [col | _] = columns) do
df
|> group_by(columns)
|> summarise_with(&[counts: Series.count(&1[col])])
|> arrange_with(&[desc: &1[:counts]])
end

def frequencies(_df, []), do: raise(ArgumentError, "columns cannot be empty")

# Helpers

defp backend_from_options!(opts) do
Expand Down
27 changes: 27 additions & 0 deletions test/explorer/data_frame_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2973,6 +2973,33 @@ defmodule Explorer.DataFrameTest do
end
end

describe "frequencies/1" do
test "multiple columns with and without nils" do
df =
DF.new(
a: [1, 1, 1, 2, 3, 3],
b: [true, true, true, false, false, false],
c: ["a", nil, "a", nil, "a", nil]
)

assert DF.frequencies(df, [:a, :b]) |> DF.to_columns(atom_keys: true) ==
%{a: [1, 3, 2], b: [true, false, false], counts: [3, 2, 1]}

assert DF.frequencies(df, [:a, :c]) |> DF.to_columns(atom_keys: true) ==
%{a: [1, 1, 2, 3, 3], c: ["a", nil, nil, "a", nil], counts: [2, 1, 1, 1, 1]}
end

test "invalid columns args" do
assert_raise ArgumentError,
~r/cannot be empty/,
fn -> DF.new(a: [1]) |> DF.frequencies([]) end

assert_raise ArgumentError,
~r/could not find column name/,
fn -> DF.new(a: [1]) |> DF.frequencies([:x]) end
end
end

describe "nil_count/1" do
test "various dtypes" do
require Explorer.DataFrame, as: DF
Expand Down

0 comments on commit 3f66bd2

Please sign in to comment.