From 3c58415e373f17608a986997106a79b66977ba34 Mon Sep 17 00:00:00 2001
From: Ryusuke Chiba <chiba.r221@gmail.com>
Date: Tue, 14 Jun 2022 11:21:03 +0900
Subject: [PATCH] Add distance-rankings command

Signed-off-by: Ryusuke Chiba <chiba.r221@gmail.com>
---
 README.md   | 62 +++++++++++++++++++++++++++++++++++++++++++++++------
 esqa/cli.py | 14 ++++++++++++
 2 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index b4c3af2..bd8d2f9 100644
--- a/README.md
+++ b/README.md
@@ -29,11 +29,11 @@ When we run Esqa, the following steps are executed.
 
 The following is the image.
 
-![Esqa overiew](doc/esqa-behavior.png "overivew")
+![Esqa overview](doc/esqa-behavior.png "overview")
 
 ## Functions
 
-Specfically esqa provides two functions, **assertion** and **compute distance**
+Specifically esqa provides two functions, **assertion** and **compute distance**
 between rankings from two index and query settings.
 
 With assertion function, we can check if the results ranking satisfy the expectation for the specified queries.
@@ -118,7 +118,7 @@ with **variables**.
 
 The following is an example of template file. As we can see, `query`
 block contains a variable `${query_str}`. The variables are injected
-from the Esqa configuraiton file.
+from the Esqa configuration file.
 
 ```json
 {
@@ -174,7 +174,7 @@ Esqa computes the comparison between the rankings in the current settings and pr
 
 Before we run the command we prepare the configuration for the esqa distance function.
 The format is the almost the same as validation settings except that the settings for
-distance function does not have asseert blocks.
+distance function does not have assert blocks.
 
 
 ```json
@@ -190,13 +190,13 @@ distance function does not have asseert blocks.
 }
 ```
 
-Before we chagnge the Es settings, we run the ranking command specifying the configuration file.
+Before changing the Es settings, we run the save command to preserve the current ranking.
 
 ```bash
 esqa save --config sample/ranking.json --index sample > output/ranking_before_change.json
 ```
 
-Then we change the Es index or query settings and run distance command specifing the ranking file.
+Then we change the Es index or query settings and run distance command specifying the ranking file.
 
 ```bash
 esqa distance --config sample/compared_ranking.json --index sample --ranking output/ranking.json
@@ -244,4 +244,52 @@ esqa distance --config sample/compared_ranking.json --index sample --ranking out
 ]
 ```
 
-We get the query cases which change the rankings compared with the rankings before change the settings.
+Or, we can compare between two preserved rankings by distance-rankings command.
+
+```bash
+esqa distance-rankings --ranking1 output/ranking1.json --ranking2 output/ranking2.json
+[
+  {
+    "name": "Windows PC",
+    "similarity": 0.5,
+    "ranking_pair": [
+      [
+        "4",
+        "6"
+      ],
+      [
+        "5",
+        "4"
+      ],
+      [
+        "6",
+        "5"
+      ]
+    ]
+  },
+  {
+    "name": "Tablet",
+    "similarity": 0.5416666666666666,
+    "ranking_pair": [
+      [
+        "22",
+        "21"
+      ],
+      [
+        "23",
+        "22"
+      ],
+      [
+        "3",
+        "23"
+      ],
+      [
+        "21",
+        "3"
+      ]
+    ]
+  }
+]
+```
+
+Finally, we get the query cases that have been changed significantly.
diff --git a/esqa/cli.py b/esqa/cli.py
index 19ca22b..dd3022d 100644
--- a/esqa/cli.py
+++ b/esqa/cli.py
@@ -67,5 +67,19 @@ def distance(ranking, config, threshold, target_field, index):
     print(_dump(results))
 
 
+@main.command()
+@click.option("-r1", "--ranking1", type=str, help="first ranking file")
+@click.option("-r2", "--ranking2", type=str, help="second ranking file")
+@click.option("-t", "--threshold", type=float, help="threshold", default=0.7)
+@click.option(
+    "-f", "--target-field", type=str, help="field to compare the document", default="id"
+)
+def distance_rankings(ranking1, ranking2, threshold, target_field):
+    rankings1 = load_rankings(ranking1)
+    rankings2 = load_rankings(ranking2)
+    results = compare_rankings(rankings1, rankings2, threshold, target_field)
+    print(_dump(results))
+
+
 if __name__ == "__main__":
     main()