forked from ekzhu/lshensemble
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaccuracy_benchmark_test.go
116 lines (110 loc) · 2.67 KB
/
accuracy_benchmark_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
package lshensemble
import (
"bufio"
"encoding/csv"
"log"
"os"
"strconv"
"strings"
"time"
)
func benchmark_accuracy(groundTruthFilename, queryResultFilename, outputFilename string) {
groundTruths := readQueryResultFile(groundTruthFilename)
queryResults := readQueryResultFile(queryResultFilename)
precisions := make([]float64, 0)
recalls := make([]float64, 0)
for i := range queryResults {
recall, precision := recallPrecision(queryResults[i], groundTruths[i])
precisions = append(precisions, precision)
recalls = append(recalls, recall)
}
log.Printf("Mean Precision = %.4f", mean(precisions))
log.Printf("Mean Recall = %.4f", mean(recalls))
// Output results
file, err := os.Create(outputFilename)
if err != nil {
panic(err)
}
out := csv.NewWriter(file)
out.Write([]string{"Query", "Precision", "Recall"})
for i := range queryResults {
line := []string{
queryResults[i].queryKey.(string),
strconv.FormatFloat(precisions[i], 'f', -1, 64),
strconv.FormatFloat(recalls[i], 'f', -1, 64),
}
out.Write(line)
}
out.Flush()
file.Close()
log.Printf("Accuracy report output to %s", outputFilename)
}
func recallPrecision(result, groundTruth queryResult) (recall, precision float64) {
if len(groundTruth.candidates) == 0 {
return 1.0, 1.0
}
if len(result.candidates) == 0 {
return 0.0, 0.0
}
truth := make(map[interface{}]bool)
for _, v := range groundTruth.candidates {
truth[v] = true
}
test := make(map[interface{}]bool)
for _, v := range result.candidates {
test[v] = true
}
if len(truth) != len(groundTruth.candidates) {
panic("Ground truth contains duplicates")
}
if len(test) != len(result.candidates) {
panic("Query result contain duplicates!")
}
overlap := 0
for id := range test {
if _, found := truth[id]; found {
overlap += 1
}
}
recall = float64(overlap) / float64(len(truth))
precision = float64(overlap) / float64(len(test))
return
}
func readQueryResultFile(queryResultFile string) []queryResult {
results := make([]queryResult, 0)
file, err := os.Open(queryResultFile)
if err != nil {
panic(err)
}
scanner := bufio.NewScanner(file)
for scanner.Scan() {
raw := strings.Split(scanner.Text(), "\t")
key := raw[0]
candidates := make([]interface{}, len(raw[2:]))
for i := range candidates {
candidates[i] = raw[2+i]
}
dur, err := time.ParseDuration(raw[1])
if err != nil {
panic(err)
}
results = append(results, queryResult{
queryKey: key,
duration: dur,
candidates: candidates,
})
err = scanner.Err()
if err != nil {
panic(err)
}
}
file.Close()
return results
}
func mean(a []float64) float64 {
sum := 0.0
for _, v := range a {
sum += v
}
return sum / float64(len(a))
}