From 90cc69f3c72dda101c378066e5ca9ccffca76cb9 Mon Sep 17 00:00:00 2001
From: Maarten van Gompel <proycon@anaproy.nl>
Date: Mon, 1 Jul 2024 21:06:30 +0200
Subject: [PATCH] documentation and fixes for coverage

---
 README.md   |  8 ++++++++
 src/main.rs | 17 +++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index a0cf556..d716158 100644
--- a/README.md
+++ b/README.md
@@ -116,6 +116,14 @@ bad     3488    3491
 
 Unlike before, you will find the matches are now returned in reading order.
 
+If you add `--coverage` then you will get an extra last line with some coverage
+statistics. This is useful to see how much of the text is covered by your
+lexicon.
+
+```
+#coverage (tokens) = 7/627 = 0.011164274322169059
+```
+
 When using ``--tokens`` we rely on whitespace and punctuation to delimit
 tokens. This does not work for languages such as Chinese, Japanese and Korean
 that are not delimited in such a way. For such languages, similar linear search
diff --git a/src/main.rs b/src/main.rs
index b1e5e02..2e4763c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -127,6 +127,11 @@ fn main() {
         exit(1);
     }
 
+    if (!args.is_present("tokens") && !args.is_present("cjk")) && args.is_present("coverage") {
+        eprintln!("ERROR: --coverage can only be used with --tokens or --cjk");
+        exit(1);
+    }
+
     let mut lexicons: Vec<Lexicon> = if args.is_present("lexicon") {
         args.get_many("lexicon")
             .unwrap()
@@ -237,7 +242,11 @@ fn main() {
                     "#coverage (tokens) = {}/{} = {}",
                     matchcount,
                     totalcount,
-                    matchcount as f64 / totalcount as f64
+                    if totalcount == 0 {
+                        0.0
+                    } else {
+                        matchcount as f64 / totalcount as f64
+                    }
                 );
             }
         } else if args.is_present("cjk") {
@@ -281,7 +290,11 @@ fn main() {
                         "#coverage (chars) = {}/{} = {}",
                         matchcount,
                         totalcount,
-                        matchcount as f64 / totalcount as f64
+                        if totalcount == 0 {
+                            0.0
+                        } else {
+                            matchcount as f64 / totalcount as f64
+                        }
                     );
                 }
             }