-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathministat.rb
125 lines (110 loc) · 2.97 KB
/
ministat.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
require 'mathn'
module MiniStat
VERSION = '1.0.0'
class Data
attr_reader :data
def initialize(data)
@data = data.collect {|data| data.to_f}.sort
@sorted = true
end
# Return the median of your dataset. Naive implementaion
# -- does a sort on the data.
def median(data=@data)
unless @sorted and data == @data
data.sort!
@sort = true
end
if data.size == 0
return 0
end
if data.size % 2 == 0
return (data[data.size / 2 - 1] + data[(data.size / 2)]) / 2
else
split = (data.size + 1) / 2
return (data[split - 1.5] + data[split - 0.5]) / 2
end
end
def partition(pivot, data=@data)
low = []
high = []
data.each do |i|
high.push(i) if i > pivot
low.push(i) if i < pivot
end
return {:low => low, :high => high}
end
# First quartile.
def q1
@q1 ||= median(partition(median(@data), @data)[:low])
end
# Third quartile
def q3
@q3 ||= median(partition(median(@data), @data)[:high])
end
# Interquartile range, ie the middle 50% of the data.
def iqr
@iqr ||= q3 - q1
end
# Returns an array of outlying data points.
def outliers
@outliers ||=
@data.collect do |i|
i if (i < q1 - (1.5 * iqr) or i > q3 + (1.5 * iqr))
end.compact
end
# Computes arthmetic mean (most common average).
def mean(data=@data)
(data.inject(0) {|i,j| i += j}) / data.size
end
# Computes mode and a histogram.
def mode
@hist ||= {}
@max_freq ||= 0
@mode ||= nil
unless @mode
@data.each do |val|
@hist[val] ||= 0
@hist[val] += 1
@max_freq, @mode = @hist[val], val if @hist[val] > @max_freq
end
end
@mode
end
# Computes variance. Used to measure degree of spread
# in dataset.
def variance
@variance ||=
@data.inject(0) { |i,j| i += (j - mean(@data)) ** 2} / (@data.size)
end
# Standard deviation. Square root of variance, measure of the
# spread of the data about the mean.
def std_dev
@std_dev ||= Math.sqrt(variance)
end
# Geometric mean. Only applies to non-negative numbers, and
# relates to log-normal distribution.
def geometric_mean
@geoeteric_mean ||=
(@data.inject(1) {|i,j| i *= j})**(1.0/@data.size)
end
# Harmonic or subcontrary mean. Tends strongly toward the least
# elements of the dataset.
def harmonic_mean
@harmonic_mean ||=
@data.size.to_f / (@data.inject(0) {|i,j| i += (1.0/j)})
end
# Return a string with statisical info about a dataset.
def to_s
s = ""
s += "%.3f " % data.min
s += "%.3f " % q1
s += "%.3f " % median
s += "%.3f " % q3
s += "%.3f " % data.max
s += "%.3f " % mean
s += "%.3f " % variance
s += "%.3f " % std_dev
s
end
end
end