diff --git a/sample/.gitignore b/sample/.gitignore new file mode 100644 index 0000000..d64a3d9 --- /dev/null +++ b/sample/.gitignore @@ -0,0 +1 @@ +sample diff --git a/sample/main.go b/sample/main.go new file mode 100644 index 0000000..069c499 --- /dev/null +++ b/sample/main.go @@ -0,0 +1,45 @@ +package main + +import ( + "bufio" + "fmt" + "math/rand" + "os" + "strconv" + "time" +) + +func main() { + var total int64 + var emitted int64 + if len(os.Args) != 2 { + fmt.Fprintf(os.Stderr, "invalid argument. use a fraction to sample between 0.0 (no sampling) and 1.0 (100% sampling)") + os.Exit(1) + } + + target, err := strconv.ParseFloat(os.Args[1], 64) + if err != nil || target < 0.0 || target > 1.0 { + fmt.Fprintf(os.Stderr, "Unable to convert %q to a float between 0.0 and 1.0", os.Args[1]) + os.Exit(1) + } + + out := bufio.NewWriterSize(os.Stdout, 1024*512) + + rand.Seed(time.Now().UnixNano()) + scanner := bufio.NewScanner(os.Stdin) + for scanner.Scan() { + total += 1 + if target < rand.Float64() { + continue + } + emitted += 1 + out.WriteString(scanner.Text()) + out.WriteString("\n") + } + if err := scanner.Err(); err != nil { + fmt.Fprintln(os.Stderr, "Error reading standard input:", err) + os.Exit(2) + } + out.Flush() + fmt.Fprintf(os.Stderr, "Total of %d lines. Sampled to %d\n", total, emitted) +}