-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathkmv_test.go
122 lines (97 loc) · 2.34 KB
/
kmv_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package gokmv
import (
"fmt"
"math/rand"
"testing"
)
func TestKMVSize(t *testing.T) {
kmv := NewKMV(2)
if kmv.Size() != 0 {
t.Error("An empty table should have size 0")
}
kmv.InsertUint64(1)
if kmv.Size() != 1 {
t.Error("We only added one element")
}
for i := 0; i < 10; i++ {
kmv.InsertUint64(uint64(i))
}
if kmv.Size() != 2 {
t.Error("Size should not be bigger than maxSize")
}
}
func TestKMVSeed(t *testing.T) {
kmv := NewKMV(2)
if kmv.Seed() == 0 {
t.Error("Seed should be a random number when it is not passed as a parameter")
}
kmv = NewKMVWithSeed(2, 12)
if kmv.Seed() != 12 {
t.Error("Seed was specified in the constructor")
}
}
func TestKMVElementsAdded(t *testing.T) {
kmv := NewKMV(2)
if kmv.ElementsAdded() != 0 {
t.Error("We did not add any element")
}
kmv.InsertUint64(1)
if kmv.ElementsAdded() != 1 {
t.Error("We added one element")
}
for i := 0; i < 10; i++ {
kmv.InsertUint64(uint64(i))
}
if kmv.ElementsAdded() != 11 {
t.Error("We added 11 elements")
}
}
func TestKMVInsertString(t *testing.T) {
kmv := NewKMV(2)
kmv.InsertString("Golang")
if kmv.ElementsAdded() != 1 || kmv.Size() != 1 {
t.Error("We added element one")
}
}
func inBounds(relativeError float64, approximation, real int) bool {
fApprox := float64(approximation)
fReal := float64(real)
if fApprox < (1-relativeError)*fReal {
return false
}
if fApprox > (1+relativeError)*fReal {
return false
}
return true
}
func TestKMVEstimateCardinality(t *testing.T) {
data := make(map[uint64]bool)
dataSize := 1000000
rand.Seed(42)
for len(data) != dataSize {
n := rand.Uint64()
data[n] = true
}
// We have a sample of `dataSize` random uint64
// We will estimate the carinality of the sample
// `iterations` times and check that the `avgEstimation` is
// not off by more than a factor of `relativeErr`
avgEstimation := 0
avgSize := 0
iterations := 10
for i := 0; i < iterations; i++ {
kmv := NewKMV(64)
for key := range data {
kmv.InsertString(fmt.Sprint(key))
}
avgEstimation += int(kmv.EstimateCardinality())
avgSize += kmv.Size()
}
avgEstimation /= iterations
avgSize /= iterations
relativeErr := 0.04
if !inBounds(relativeErr, avgEstimation, dataSize) {
errMsg := fmt.Sprintf("The kmv estimation was not in the theoretical bounds: %d out of %d", avgEstimation, dataSize)
t.Error(errMsg)
}
}