forked from ekzhu/lshensemble
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlshensemble_benchmark_test.go
65 lines (59 loc) · 1.77 KB
/
lshensemble_benchmark_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
package lshensemble
import (
"log"
"sort"
"time"
)
func benchmark_lshensemble(rawDomains []rawDomain, rawQueries []rawDomain, threshold float64, outputFilename string) {
numHash := 256
numPart := 32
maxK := 4
// Minhash domains
start := time.Now()
domainRecords := minhashDomains(rawDomains, numHash)
log.Printf("Minhash %d domains in %s", len(domainRecords),
time.Now().Sub(start).String())
// Minhash queries
start = time.Now()
queries := minhashDomains(rawQueries, numHash)
log.Printf("Minhash %d query domains in %s", len(queries),
time.Now().Sub(start).String())
// Start main body of lsh ensemble
// Indexing
log.Print("Start building LSH Ensemble index")
sort.Sort(BySize(domainRecords))
index, _ := BootstrapLshEnsemblePlus(numPart, numHash, maxK, len(domainRecords),
Recs2Chan(domainRecords))
log.Print("Finished building LSH Ensemble index")
// Querying
log.Printf("Start querying LSH Ensemble index with %d queries", len(queries))
results := make(chan queryResult)
go func() {
for _, query := range queries {
r, d := index.QueryTimed(query.Signature, query.Size, threshold)
results <- queryResult{
queryKey: query.Key,
duration: d,
candidates: r,
}
}
close(results)
}()
outputQueryResults(results, outputFilename)
log.Printf("Finished querying LSH Ensemble index, output %s", outputFilename)
}
func minhashDomains(rawDomains []rawDomain, numHash int) []*DomainRecord {
domainRecords := make([]*DomainRecord, 0)
for _, domain := range rawDomains {
mh := NewMinhash(benchmarkSeed, numHash)
for v := range domain.values {
mh.Push([]byte(v))
}
domainRecords = append(domainRecords, &DomainRecord{
Key: domain.key,
Size: len(domain.values),
Signature: mh.Signature(),
})
}
return domainRecords
}