From 1e08c073b095ec11ee0a57d012bb1a3142fa034e Mon Sep 17 00:00:00 2001
From: Dongge Liu <donggeliu@google.com>
Date: Fri, 3 Mar 2023 00:19:03 +1100
Subject: [PATCH] Fix ranking (#1765)

1. Fix average rank ranking:
* Need to replace `NaN` in the `experiment_df` with `0`; otherwise, it
may cause counterintuitive ranking in rare cases (e.g., two fuzzer both
found zero bug on one benchmark and `NaN` on the rest. They are ranked
differently because one got 0 on benchmark `A` and the other got 0 on
`B`, where some other fuzzer found some bug in `A` but no one found any
bug in `B`.)
    * Use `min` as the ranking method.
* Do not need `na_option` as `NaN` is now `0`. Otherwise, it should at
least be changed to `bottom`.
2. Fix average score ranking:
    * Replace `NaN` in the `experiment_df` with `0`.
---
 analysis/data_utils.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/analysis/data_utils.py b/analysis/data_utils.py
index bc2114a17..f6a31218e 100644
--- a/analysis/data_utils.py
+++ b/analysis/data_utils.py
@@ -362,9 +362,9 @@ def experiment_rank_by_average_rank(experiment_pivot_df):
     (smaller is better).
     """
     # Rank fuzzers in each benchmark block.
-    pivot_ranked = experiment_pivot_df.rank('columns',
-                                            na_option='keep',
-                                            ascending=False)
+    pivot_ranked = experiment_pivot_df.fillna(0).rank('columns',
+                                                      method='min',
+                                                      ascending=False)
     average_ranks = pivot_ranked.mean().sort_values()
     return average_ranks.rename('average rank')
 
@@ -373,9 +373,9 @@ def experiment_rank_by_num_firsts(experiment_pivot_df):
     """Creates experiment level ranking by number of first places in per
     benchmark rankings (higher is better)."""
     # Rank fuzzers in each benchmark block.
-    pivot_ranked = experiment_pivot_df.rank('columns',
-                                            na_option='keep',
-                                            ascending=False)
+    pivot_ranked = experiment_pivot_df.fillna(0).rank('columns',
+                                                      method='min',
+                                                      ascending=False)
     # Count first places for each fuzzer.
     firsts = pivot_ranked[pivot_ranked == 1]
     num_firsts = firsts.sum().sort_values(ascending=False)
@@ -386,9 +386,9 @@ def experiment_rank_by_average_normalized_score(experiment_pivot_df):
     """Creates experiment level ranking by taking the average of normalized per
     benchmark scores from 0 to 100, where 100 is the highest reach coverage."""
     # Normalize coverage values.
-    benchmark_maximum = experiment_pivot_df.max(axis='columns')
-    normalized_score = experiment_pivot_df.div(benchmark_maximum,
-                                               axis='index').mul(100)
+    benchmark_maximum = experiment_pivot_df.fillna(0).max(axis='columns')
+    normalized_score = experiment_pivot_df.fillna(0).div(benchmark_maximum,
+                                                         axis='index').mul(100)
 
     average_score = normalized_score.mean().sort_values(ascending=False)
     return average_score.rename('average normalized score')