Skip to content

Commit

Permalink
Add mode and frequency functions
Browse files Browse the repository at this point in the history
  • Loading branch information
lbarasti committed Mar 30, 2020
1 parent 0299c15 commit 97da7c6
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
12 changes: 12 additions & 0 deletions spec/statistics_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,16 @@ describe Statistics do
quantile([42], 0.2).should eq 42
quantile([42], 1).should eq 42
end

it "can compute the mode and frequency hash of a sample" do
sample = [1, 1, 7, 7, 1, 5, 3, 6, 7, 6, 7, 10]
m, c = mode(sample)
m.should eq 7
c.should eq 4

f = frequency(sample)
f[1].should eq 3
f[10].should eq 1
f.max_by(&.last).first.should eq m
end
end
30 changes: 28 additions & 2 deletions src/statistics.cr
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ module Statistics
}
end

# Computes the number of occurrences of each value in the dataset.
#
# Returns a Hash with each the dataset values as keys and the number of times they appear as value.
#
# Parameters
# - `values`: a one-dimensional dataset
def frequency(values : Enumerable(T)) forall T
values.reduce(Hash(T, Int32).new(0)) { |freq, v|
freq[v] += 1
freq
}
end

# Computes the kurtosis of a dataset.
#
# Parameters
Expand Down Expand Up @@ -60,8 +73,9 @@ module Statistics
values.reduce(0) { |acc, v| acc + v } / values.size
end

# Computes the median of all elements in a dataset. For an even number of
# elements the mean of the two median elements will be computed.
# Computes the median of all elements in a dataset.
#
# For an even number of elements the mean of the two median elements will be computed.
#
# Parameters
# - `values`: a one-dimensional dataset.
Expand Down Expand Up @@ -98,6 +112,18 @@ module Statistics
0.5 * (a + b)
end

# Computes the modal (most common) value in a dataset.
#
# Returns a pair with the modal value and the bin-count for the modal bin.
# If there is more than one such value, no guarantees are made which one will be picked.
# NOTE: computing the mode requires traversing the entire dataset.
#
# Parameters
# - `values`: a one-dimensional dataset.
def mode(values : Enumerable)
frequency(values).max_by(&.last)
end

# Calculates the n-th moment about the mean for a sample.
#
# Parameters
Expand Down

0 comments on commit 97da7c6

Please sign in to comment.