-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
152 lines (130 loc) · 3.45 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
package main
import (
"bufio"
"flag"
"fmt"
"os"
"os/exec"
"sync"
"time"
"github.com/influxdata/tdigest"
)
// this is evil don't do this at home
var globalMutex sync.RWMutex
const (
// refreshInterval is the interval at which the statistics are printed
refreshInterval = 10 * time.Second
// bucketSize is the size of each bucket, i.e. 0-1000, 1000-2000, etc.
bucketSize = 1000
)
// Category represents a category of line lengths
type Category struct {
name string
start int
end int
count int
}
// inRange checks if the given value is in the range of the category
func (cat *Category) inRange(val int) bool {
return val >= cat.start && val < cat.end
}
func main() {
var outputFile string
flag.StringVar(&outputFile, "f", "", "Filename to write the output in CSV format. If not provided, will print to stdout.")
flag.Parse()
// 1000 is the compression factor, the higher the more accurate the results
td := tdigest.NewWithCompression(10000)
// initialize the first category
categories := []*Category{
{"0-1000 chars", 0, 1000, 0},
}
scanner := bufio.NewScanner(os.Stdin)
// 100MB buffer should be enough for everyone
buf := make([]byte, 0, 100*1024*1024)
scanner.Buffer(buf, cap(buf))
totalLines := 0
// print statistics every interval
ticker := time.NewTicker(refreshInterval)
done := make(chan bool)
go func() {
for {
select {
case <-ticker.C:
printStatistics(categories, totalLines, td)
case <-done:
return
}
}
}()
// main loop
for scanner.Scan() {
globalMutex.Lock()
length := len(scanner.Text())
td.Add(float64(length), 1)
totalLines++
lastCategory := categories[len(categories)-1]
for length >= lastCategory.end {
newEnd := lastCategory.end + bucketSize
categories = append(categories, &Category{fmt.Sprintf("%d-%d chars", lastCategory.end, newEnd), lastCategory.end, newEnd, 0})
lastCategory = categories[len(categories)-1]
}
for _, cat := range categories {
if cat.inRange(length) {
cat.count++
break
}
}
globalMutex.Unlock()
}
done <- true
// print statistics one last time
printStatistics(categories, totalLines, td)
if outputFile != "" {
err := writeToCSV(outputFile, categories)
if err != nil {
fmt.Printf("Error writing to output file: %v\n", err)
return
}
}
}
// clearScreen clears the screen
func clearScreen() {
// ANSI is not working with ssh
cmd := exec.Command("clear")
cmd.Stdout = os.Stdout
cmd.Run()
}
// printStatistics prints the current statistics
func printStatistics(categories []*Category, totalLines int, td *tdigest.TDigest) {
globalMutex.RLock()
defer globalMutex.RUnlock()
clearScreen()
fmt.Println("Current statistics:")
for _, cat := range categories {
if cat.count > 0 {
fmt.Printf("%s - %d\n", cat.name, cat.count)
}
}
fmt.Printf("\nTotal lines: %d\n", totalLines)
fmt.Println("------------------")
fmt.Print("\nGlobal Summary:\n")
fmt.Printf("50th percentile: %f\n", td.Quantile(0.5))
fmt.Printf("90th percentile: %f\n", td.Quantile(0.9))
fmt.Printf("95th percentile: %f\n", td.Quantile(0.95))
fmt.Printf("99th percentile: %f\n", td.Quantile(0.99))
}
func writeToCSV(filename string, categories []*Category) error {
file, err := os.Create(filename)
if err != nil {
return err
}
defer file.Close()
writer := bufio.NewWriter(file)
writer.WriteString("Range,Count\n")
for _, cat := range categories {
if cat.count > 0 {
writer.WriteString(fmt.Sprintf("%s,%d\n", cat.name, cat.count))
}
}
return writer.Flush()
}