From d97255dc3aea1227de84a422d46e361981bec47c Mon Sep 17 00:00:00 2001
From: Dmitry Ustalov <dmitry.ustalov@gmail.com>
Date: Sun, 15 Sep 2024 20:32:43 +0200
Subject: [PATCH] Do not rely on the global random seed

---
 utils_math.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/utils_math.py b/utils_math.py
index 8d2c7aa..12731f3 100644
--- a/utils_math.py
+++ b/utils_math.py
@@ -3,12 +3,10 @@
 import math
 import inspect
 
-from tqdm import tqdm
+from tqdm.auto import trange
 from sklearn.linear_model import LogisticRegression
 from collections import defaultdict
 
-np.random.seed(42)
-
 STYLE_CONTROL_ELEMENTS = [
     "sum_assistant_a_tokens",
     "header_count_a",
@@ -74,8 +72,8 @@ def get_bootstrap_result(battles, func_compute_elo, num_round, baseline_model="g
     kwargs = {}
     if baseline_model in inspect.signature(func_compute_elo).parameters:
         kwargs[baseline_model] = baseline_model
-    for _ in tqdm(range(num_round), desc="bootstrap"):
-        rows.append(func_compute_elo(battles.sample(frac=1.0, replace=True), **kwargs))
+    for round in trange(num_round, desc="bootstrap"):
+        rows.append(func_compute_elo(battles.sample(frac=1.0, replace=True, random_state=round), **kwargs))
     df = pd.DataFrame(rows)
     return df[df.median().sort_values(ascending=False).index]
 
@@ -206,9 +204,11 @@ def get_bootstrap_result_style_control(X, Y, battles, models, func_compute_elo,
         X.shape[0] / 2
     )  # Since we duplicate the battles when constructing X and Y, we don't want to sample the duplicates
 
+    rng = np.random.default_rng(42)
+
     battles_tie_idx = (battles["winner"] == "tie") | (battles["winner"] == "tie (bothbad)")
-    for _ in tqdm(range(num_round), desc="bootstrap"):
-        indices = np.random.choice(list(range(k)), size=(k), replace=True)
+    for _ in trange(num_round, desc="bootstrap"):
+        indices = rng.choice(list(range(k)), size=(k), replace=True)
 
         index2tie = np.zeros(k, dtype=bool)
         index2tie[battles_tie_idx] = True