-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New descriptive statistics function for numerical slices
- Loading branch information
1 parent
09da71b
commit 1b5064b
Showing
9 changed files
with
377 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
1.79.1 | ||
1.80.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
package sliceutil_test | ||
|
||
import ( | ||
"fmt" | ||
"log" | ||
|
||
"github.com/Vonage/gosrvlib/pkg/sliceutil" | ||
) | ||
|
||
func ExampleStats() { | ||
data := []int{53, 83, 13, 79, 13, 37, 83, 29, 37, 13, 83, 83} | ||
|
||
ds, err := sliceutil.Stats(data) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
|
||
fmt.Printf("Count: %d\n", ds.Count) | ||
fmt.Printf("Entropy: %.3f\n", ds.Entropy) | ||
fmt.Printf("ExKurtosis: %.3f\n", ds.ExKurtosis) | ||
fmt.Printf("Max: %d\n", ds.Max) | ||
fmt.Printf("MaxID: %d\n", ds.MaxID) | ||
fmt.Printf("Mean: %.3f\n", ds.Mean) | ||
fmt.Printf("MeanDev: %.3f\n", ds.MeanDev) | ||
fmt.Printf("Median: %.3f\n", ds.Median) | ||
fmt.Printf("Min: %d\n", ds.Min) | ||
fmt.Printf("MinID: %d\n", ds.MinID) | ||
fmt.Printf("Mode: %d\n", ds.Mode) | ||
fmt.Printf("ModeFreq: %d\n", ds.ModeFreq) | ||
fmt.Printf("Range: %d\n", ds.Range) | ||
fmt.Printf("Skewness: %.3f\n", ds.Skewness) | ||
fmt.Printf("StdDev: %.3f\n", ds.StdDev) | ||
fmt.Printf("Sum: %d\n", ds.Sum) | ||
fmt.Printf("Variance: %.3f\n", ds.Variance) | ||
|
||
// Output: | ||
// Count: 12 | ||
// Entropy: -2277.134 | ||
// ExKurtosis: -1.910 | ||
// Max: 83 | ||
// MaxID: 1 | ||
// Mean: 50.500 | ||
// MeanDev: 0.000 | ||
// Median: 45.000 | ||
// Min: 13 | ||
// MinID: 2 | ||
// Mode: 83 | ||
// ModeFreq: 4 | ||
// Range: 70 | ||
// Skewness: -0.049 | ||
// StdDev: 30.285 | ||
// Sum: 606 | ||
// Variance: 917.182 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
package sliceutil | ||
|
||
import ( | ||
"fmt" | ||
"math" | ||
"slices" | ||
|
||
"github.com/Vonage/gosrvlib/pkg/typeutil" | ||
) | ||
|
||
// DescStats contains descriptive statistics items for a data set. | ||
type DescStats[V typeutil.Number] struct { | ||
// Count is the total number of items in the data set. | ||
Count int `json:"count"` | ||
|
||
// Entropy computes the Shannon entropy of a distribution. | ||
Entropy float64 `json:"entropy"` | ||
|
||
// ExKurtosis is the population excess kurtosis of the data set. | ||
// The kurtosis is defined by the 4th moment of the mean divided by the squared variance. | ||
// The excess kurtosis subtracts 3.0 so that the excess kurtosis of the normal distribution is zero. | ||
ExKurtosis float64 `json:"exkurtosis"` | ||
|
||
// Max is the maximum value of the data. | ||
Max V `json:"max"` | ||
|
||
// MaxID is the index (key) of the Max malue in a data set. | ||
MaxID int `json:"maxid"` | ||
|
||
// Mean or Average is a central tendency of the data. | ||
Mean float64 `json:"mean"` | ||
|
||
// MeanDev is the Mean Deviation or Mean Absolute Deviation. | ||
// It is an average of absolute differences between each value in the data, and the average of all values. | ||
MeanDev float64 `json:"meandev"` | ||
|
||
// Median is the value that divides the data into 2 equal parts. | ||
// When the data is sorted, the number of terms on the left and right side of median is the same. | ||
Median float64 `json:"median"` | ||
|
||
// Min is the minimal value of the data. | ||
Min V `json:"min"` | ||
|
||
// MinID is the index (key) of the Min malue in a data set. | ||
MinID int `json:"minid"` | ||
|
||
// Mode is the term appearing maximum time in data set. | ||
// It is the term that has the highest frequency. | ||
Mode V `json:"mode"` | ||
|
||
// ModeFreq is the frequency of the Mode value. | ||
ModeFreq int `json:"modefreq"` | ||
|
||
// Range is the difference between the highest (Max) and lowest (Min) value. | ||
Range V `json:"range"` | ||
|
||
// Skewness is a measure of the asymmetry of the probability distribution of a real-valued random variable about its mean. | ||
// Provides the adjusted Fisher-Pearson standardized moment coefficient. | ||
Skewness float64 `json:"skewness"` | ||
|
||
// StdDev is the Standard deviation of the data. | ||
// It measures the average distance between each quantity and mean. | ||
StdDev float64 `json:"stddev"` | ||
|
||
// Sum of all the values in the data. | ||
Sum V `json:"sum"` | ||
|
||
// Variance is a square of average distance between each quantity and Mean. | ||
Variance float64 `json:"variance"` | ||
} | ||
|
||
// Stats returns descriptive statistics parameters to summarize the input data set. | ||
// | ||
//nolint:gocognit,gocyclo | ||
func Stats[S ~[]V, V typeutil.Number](s S) (*DescStats[V], error) { | ||
n := len(s) | ||
|
||
if n < 1 { | ||
return nil, fmt.Errorf("input slice is empty") | ||
} | ||
|
||
ord := slices.Clone(s) | ||
slices.Sort(ord) | ||
|
||
ds := &DescStats[V]{ | ||
Count: len(s), | ||
Max: s[0], | ||
Median: float64(s[0]), | ||
Min: s[0], | ||
Mode: s[0], | ||
ModeFreq: 1, | ||
Sum: s[0], | ||
Mean: float64(s[0]), | ||
} | ||
|
||
if n == 1 { | ||
return ds, nil | ||
} | ||
|
||
nf := float64(n) | ||
freq := 1 | ||
|
||
for i := 1; i < n; i++ { | ||
v := s[i] | ||
vf := float64(s[i]) | ||
|
||
ds.Sum += v | ||
|
||
if v < ds.Min { | ||
ds.Min = v | ||
ds.MinID = i | ||
} | ||
|
||
if v > ds.Max { | ||
ds.Max = v | ||
ds.MaxID = i | ||
} | ||
|
||
if v != 0 { | ||
ds.Entropy -= vf * math.Log(vf) | ||
} | ||
|
||
if ord[i] == ord[i-1] { | ||
freq++ | ||
} else { | ||
if freq > ds.ModeFreq { | ||
ds.Mode = ord[i] | ||
ds.ModeFreq = freq | ||
} | ||
freq = 1 | ||
} | ||
} | ||
|
||
if freq > ds.ModeFreq { | ||
ds.Mode = ord[n-1] | ||
ds.ModeFreq = freq | ||
} | ||
|
||
ds.Range = ds.Max - ds.Min | ||
ds.Mean = float64(ds.Sum) / nf | ||
|
||
midpos := n / 2 | ||
if n%2 != 0 { | ||
ds.Median = float64(ord[midpos]) | ||
} else { | ||
ds.Median = (float64(ord[midpos-1]) + float64(ord[midpos])) / 2 | ||
} | ||
|
||
for i := 0; i < n; i++ { | ||
d := float64(ord[i]) - ds.Mean | ||
ds.MeanDev += d | ||
ds.Variance += d * d | ||
} | ||
|
||
ds.MeanDev /= nf | ||
ds.Variance /= (nf - 1) | ||
ds.StdDev = math.Sqrt(ds.Variance) | ||
|
||
if n < 3 { | ||
return ds, nil | ||
} | ||
|
||
for i := 0; i < n; i++ { | ||
d := (float64(ord[i]) - ds.Mean) / ds.StdDev | ||
d3 := d * d * d | ||
ds.Skewness += d3 | ||
ds.ExKurtosis += d3 * d | ||
} | ||
|
||
ds.Skewness *= (nf / ((nf - 1) * (nf - 2))) // adjusted Fisher-Pearson standardized moment coefficient | ||
|
||
if n < 4 { | ||
ds.ExKurtosis = 0 | ||
} else { | ||
ds.ExKurtosis = (ds.ExKurtosis * (((nf + 1) / (nf - 1)) * (nf / (nf - 2)) * (1 / (nf - 3)))) - (3 * ((nf - 1) / (nf - 2)) * ((nf - 1) / (nf - 3))) | ||
} | ||
|
||
return ds, nil | ||
} |
Oops, something went wrong.