forked from dgryski/go-topk
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtopk_test.go
88 lines (65 loc) · 1.54 KB
/
topk_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
package topk
import (
"bufio"
"bytes"
"encoding/gob"
"log"
"os"
"reflect"
"sort"
"testing"
)
type freqs struct {
keys []string
counts map[string]int
}
func (f freqs) Len() int { return len(f.keys) }
// Actually 'Greater', since we want decreasing
func (f *freqs) Less(i, j int) bool {
return f.counts[f.keys[i]] >= f.counts[f.keys[j]] || f.counts[f.keys[i]] == f.counts[f.keys[j]] && f.keys[i] < f.keys[j]
}
func (f *freqs) Swap(i, j int) { f.keys[i], f.keys[j] = f.keys[j], f.keys[i] }
func TestTopK(t *testing.T) {
f, err := os.Open("testdata/domains.txt")
if err != nil {
t.Fatal(err)
}
scanner := bufio.NewScanner(f)
tk := New(100)
exact := make(map[string]int)
for scanner.Scan() {
item := scanner.Text()
exact[item]++
tk.Insert(item, 1)
}
if err := scanner.Err(); err != nil {
log.Println("error during scan: ", err)
}
var keys []string
for k, _ := range exact {
keys = append(keys, k)
}
freq := &freqs{keys: keys, counts: exact}
sort.Sort(freq)
top := tk.Keys()
// at least the top 25 must be in order
for i := 0; i < 25; i++ {
if top[i].Key != freq.keys[i] {
t.Errorf("key mismatch: idx=%d top=%s (%d) exact=%s (%d)", i, top[i].Key, top[i].Count, freq.keys[i], freq.counts[freq.keys[i]])
}
}
// gob
var buf bytes.Buffer
enc := gob.NewEncoder(&buf)
if err := enc.Encode(tk); err != nil {
t.Error(err)
}
decoded := New(100)
dec := gob.NewDecoder(&buf)
if err := dec.Decode(decoded); err != nil {
t.Error(err)
}
if !reflect.DeepEqual(tk, decoded) {
t.Error("they are not equal.")
}
}