From 8284a72fd649a23f58c510c3e7641eff40d29cbe Mon Sep 17 00:00:00 2001
From: Chris <53177842+ChristopherMarais@users.noreply.github.com>
Date: Tue, 15 Aug 2023 12:57:30 -0400
Subject: [PATCH 01/15] removed dropped module for docs

---
 docs/pc_mouseparty.md | 4 ----
 mkdocs.yml            | 1 -
 2 files changed, 5 deletions(-)
 delete mode 100644 docs/pc_mouseparty.md

diff --git a/docs/pc_mouseparty.md b/docs/pc_mouseparty.md
deleted file mode 100644
index 9b9e42a..0000000
--- a/docs/pc_mouseparty.md
+++ /dev/null
@@ -1,4 +0,0 @@
- 
-# pc_mouseparty module
-
-::: pc_mouseparty.pc_mouseparty
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
index 98dac36..bd55272 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -82,5 +82,4 @@ nav:
     - Examples:
         - examples/intro.ipynb
     - API Reference:
-          - pc_mouseparty module: pc_mouseparty.md
           - test_functions module: test_functions.md

From 6e233dede3f132db36b4c93c63161b5dd779d71e Mon Sep 17 00:00:00 2001
From: mcum96 <meghanic96@gmail.com>
Date: Fri, 18 Aug 2023 14:27:55 -0400
Subject: [PATCH 02/15] Added 3 functions to extract bouts from boris excels

---
 docs/pc_mouseparty.md                         |  58 +++++++++-
 .../vid_behavior/boris_extraction.py          | 102 ++++++++++++++++++
 2 files changed, 158 insertions(+), 2 deletions(-)
 create mode 100644 pc_mouseparty/vid_behavior/boris_extraction.py

diff --git a/docs/pc_mouseparty.md b/docs/pc_mouseparty.md
index 9b9e42a..4f62ecc 100644
--- a/docs/pc_mouseparty.md
+++ b/docs/pc_mouseparty.md
@@ -1,4 +1,58 @@
- 
+
 # pc_mouseparty module
 
-::: pc_mouseparty.pc_mouseparty
\ No newline at end of file
+::: pc_mouseparty.pc_mouseparty
+
+:::pc_mouseparty.pc_mouseparty.vid_behavior.boris_extraction
+
+function: threshold_bouts(start_stop_array, min_iti, min_bout):
+    """
+    thresholds behavior bouts
+    by combining behavior bouts with interbout intervals of < min_iti
+    and then removing remaining bouts of < min_bout
+
+    Args (3 total):
+        start_stop_array: numpy array of dim (# of bouts, 2)
+        min_iti: float, min interbout interval in seconds
+        min_bout: float, min bout length in seconds
+
+    Returns (1):
+        start_stop_array: numpy array (ndim=(n bouts, 2))
+            of start&stop times (ms)
+    """
+
+function get_behavior_bouts(boris_df, subject, behavior, min_iti=0, min_bout=0):
+    """
+    extracts behavior bout start and stop times from a boris df
+    thresholds individually by subject and behavior
+    returns start_stop_array ordered by start values
+
+    Args (5 total, 3 required):
+        boris_df: pandas dataframe of a boris file (aggregated event table)
+        subject: list of strings, desired subject(s) (as written in boris_df)
+        behavior: list of strings, desired behavior(s) (as written in boris_df)
+        min_iti: float, default=0, bouts w/ itis(s) < min_iti will be combined
+        min_bout: float, default=0, bouts < min_bout(s) will be deleted
+
+    Returns (1):
+        numpy array (ndim=(n bouts, 2)) of start&stop times (ms)
+
+function save_behavior_bouts(directory, boris_df, subject, behavior, min_bout=0,
+                        min_iti=0, filename=None):
+    """
+    saves a numpy array of start&stop times (ms)
+    as filename: subject_behavior_bouts.npy
+
+    Args (7 total, 4 required):
+        directory: path to folder where filename.npy will be saved
+            path format: './folder/folder/'
+        boris_df: pandas dataframe of a boris file (aggregated event table)
+        subject: list of strings, desired subjects (as written in boris_df)
+        behavior: list of strings, desired behaviors (as written in boris_df)
+        min_iti: float, default=0, bouts w/ itis(s) < min_iti will be combined
+        min_bout: float, default=0, bouts < min_bouts(s) will be deleted
+        filename: string, default=None, must end in .npy
+
+    Returns:
+        none
+    """
\ No newline at end of file
diff --git a/pc_mouseparty/vid_behavior/boris_extraction.py b/pc_mouseparty/vid_behavior/boris_extraction.py
new file mode 100644
index 0000000..99f862f
--- /dev/null
+++ b/pc_mouseparty/vid_behavior/boris_extraction.py
@@ -0,0 +1,102 @@
+
+import numpy as np
+
+
+def threshold_bouts(start_stop_array, min_iti, min_bout):
+    """
+    thresholds behavior bouts
+    by combining behavior bouts with interbout intervals of < min_iti
+    and then removing remaining bouts of < min_bout
+
+    Args (3 total):
+        start_stop_array: numpy array of dim (# of bouts, 2)
+        min_iti: float, min interbout interval in seconds
+        min_bout: float, min bout length in seconds
+
+    Returns (1):
+        start_stop_array: numpy array (ndim=(n bouts, 2))
+            of start&stop times (ms)
+    """
+
+    start_stop_array = np.sort(start_stop_array.flatten())
+    times_to_delete = []
+    if min_iti > 0:
+        for i in range(1, len(start_stop_array)-1, 2):
+            if (start_stop_array[i+1] - start_stop_array[i]) < min_iti:
+                times_to_delete.extend([i, i+1])
+    start_stop_array = np.delete(start_stop_array, times_to_delete)
+    bouts_to_delete = []
+    if min_bout > 0:
+        for i in range(0, len(start_stop_array)-1, 2):
+            if start_stop_array[i+1] - start_stop_array[i] < min_bout:
+                bouts_to_delete.extend([i, i+1])
+    start_stop_array = np.delete(start_stop_array, bouts_to_delete)
+    no_bouts = len(start_stop_array)/2
+    start_stop_array = np.reshape(start_stop_array, (int(no_bouts), 2))
+
+    return start_stop_array
+
+
+def get_behavior_bouts(boris_df, subject, behavior, min_iti=0, min_bout=0):
+    """
+    extracts behavior bout start and stop times from a boris df
+    thresholds individually by subject and behavior
+    returns start_stop_array ordered by start values
+
+    Args (5 total, 3 required):
+        boris_df: pandas dataframe of a boris file (aggregated event table)
+        subject: list of strings, desired subject(s) (as written in boris_df)
+        behavior: list of strings, desired behavior(s) (as written in boris_df)
+        min_iti: float, default=0, bouts w/ itis(s) < min_iti will be combined
+        min_bout: float, default=0, bouts < min_bout(s) will be deleted
+
+    Returns (1):
+        numpy array (ndim=(n bouts, 2)) of start&stop times (ms)
+    """
+    start_stop_arrays = []
+    for mouse in subject:
+        subject_df = boris_df[boris_df['Subject'] == mouse]
+        for act in behavior:
+            behavior_df = subject_df[subject_df['Behavior'] == act]
+            start_stop_array = behavior_df[['Start (s)',
+                                            'Stop (s)']].to_numpy()
+            start_stop_arrays.append(threshold_bouts(start_stop_array,
+                                                     min_bout, min_iti))
+    start_stop_array = np.concatenate(start_stop_arrays)
+    organizer = np.argsort(start_stop_array[:, 0])
+    start_stop_array = start_stop_array[organizer]
+
+    return start_stop_array * 1000
+
+
+def save_behavior_bouts(directory, boris_df, subject, behavior, min_bout=0,
+                        min_iti=0, filename=None):
+    """
+    saves a numpy array of start&stop times (ms)
+    as filename: subject_behavior_bouts.npy
+
+    Args (7 total, 4 required):
+        directory: path to folder where filename.npy will be saved
+            path format: './folder/folder/'
+        boris_df: pandas dataframe of a boris file (aggregated event table)
+        subject: list of strings, desired subjects (as written in boris_df)
+        behavior: list of strings, desired behaviors (as written in boris_df)
+        min_iti: float, default=0, bouts w/ itis(s) < min_iti will be combined
+        min_bout: float, default=0, bouts < min_bouts(s) will be deleted
+        filename: string, default=None, must end in .npy
+
+    Returns:
+        none
+    """
+    bouts_array = get_behavior_bouts(boris_df, subject,
+                                     behavior, min_bout, min_iti)
+    if filename is None:
+        if type(subject) == list:
+            subject = '_'.join(subject)
+        if type(behavior) == list:
+            behavior = '_'.join(behavior)
+        subject = subject.replace(" ", "")
+        behavior = behavior.replace(" ", "")
+        filename = f"{subject}_{behavior}_bouts.npy"
+
+    np.save(directory+filename, bouts_array)

From 66bf52c7d7b3ecee3837e7a1bfe7473e0c9ec6dc Mon Sep 17 00:00:00 2001
From: Chris <53177842+ChristopherMarais@users.noreply.github.com>
Date: Fri, 25 Aug 2023 10:12:31 -0400
Subject: [PATCH 03/15] added basic deepnote function

---
 pc_mouseparty/medpc/medpc_extraction.py | 70 +++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 pc_mouseparty/medpc/medpc_extraction.py

diff --git a/pc_mouseparty/medpc/medpc_extraction.py b/pc_mouseparty/medpc/medpc_extraction.py
new file mode 100644
index 0000000..7f78bac
--- /dev/null
+++ b/pc_mouseparty/medpc/medpc_extraction.py
@@ -0,0 +1,70 @@
+import re
+import pandas as pd
+
+def medpc_txt2df(text_file_path):
+    """
+    docstring
+    """
+    # Open the medpc text file
+    # with open(text_file_path, "r") as file: # use this for package
+    with open(text_file_path.name) as file: # use this for gradio app
+        medpc_txt_file = file.read()
+    
+    # split the file with each new line an element in a list    
+    medpc_txt_file_lst = medpc_txt_file.split('\n')
+    
+    # remove all empty elements in the list
+    medpc_txt_file_lst = list(filter(None, medpc_txt_file_lst))
+
+    # add medpc output vectors to lists
+    result = []
+    temp = []
+    for item in medpc_txt_file_lst:
+        # add values taht comeafter ":" to a list as floats
+        if re.search(r'^\s*\d+:\s+', item):
+            temp.append(item)
+        else:
+            if temp:
+                floats = [float(x) for x in re.findall(r'\d+\.\d+', ''.join(temp))]
+                result.append(floats)
+                temp = []
+            result.append(item)
+    if temp:
+        floats = [float(x) for x in re.findall(r'\d+\.\d+', ''.join(temp))]
+        result.append(floats)
+
+    # convert the list of lists and strings to 
+    # a dictionary with everything before ":"
+    # as a key and everything after as the value
+    result_dict = {}
+    for item in result:
+        if ':' in item:
+            index = item.index(':')
+            key = item[:index]
+            value = item[index+1:].strip()
+            if not value:
+                value = result[result.index(item)+1]
+            result_dict[key] = value
+        elif type(item) == str:
+            result_dict[item] = []
+
+    # convert the dictionary to a dataframe
+    # values are of unequal length
+    # convert all values to lists
+    pd_series_lst = []
+    for i,j in result_dict.items():
+        if type(j) != list:
+            result_dict[i] = [j] 
+        else:
+            result_dict[i] = j
+        pd_series_lst.append(pd.Series(j))
+
+    # add list to dataframe
+    df = pd.concat(pd_series_lst, axis=1)
+    df.columns = result_dict.keys()
+    df.to_csv("medpc_converted_file.csv")
+
+    return(
+        # df.head(5).to_html(),
+        "medpc_converted_file.csv"
+        )
\ No newline at end of file

From 8493923e3f8c4e65d64de2411d629946c30db426 Mon Sep 17 00:00:00 2001
From: Chris <53177842+ChristopherMarais@users.noreply.github.com>
Date: Fri, 25 Aug 2023 20:21:52 -0400
Subject: [PATCH 04/15] formatting code

---
 pc_mouseparty/medpc/medpc_extraction.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/pc_mouseparty/medpc/medpc_extraction.py b/pc_mouseparty/medpc/medpc_extraction.py
index 7f78bac..88f4414 100644
--- a/pc_mouseparty/medpc/medpc_extraction.py
+++ b/pc_mouseparty/medpc/medpc_extraction.py
@@ -1,18 +1,19 @@
 import re
 import pandas as pd
 
+
 def medpc_txt2df(text_file_path):
     """
     docstring
     """
     # Open the medpc text file
     # with open(text_file_path, "r") as file: # use this for package
-    with open(text_file_path.name) as file: # use this for gradio app
+    with open(text_file_path.name) as file:  # use this for gradio app
         medpc_txt_file = file.read()
-    
+
     # split the file with each new line an element in a list    
     medpc_txt_file_lst = medpc_txt_file.split('\n')
-    
+
     # remove all empty elements in the list
     medpc_txt_file_lst = list(filter(None, medpc_txt_file_lst))
 
@@ -25,12 +26,14 @@ def medpc_txt2df(text_file_path):
             temp.append(item)
         else:
             if temp:
-                floats = [float(x) for x in re.findall(r'\d+\.\d+', ''.join(temp))]
+                floats = [float(x) for x in re.findall(r'\d+\.\d+',
+                                                       ''.join(temp))]
                 result.append(floats)
                 temp = []
             result.append(item)
     if temp:
-        floats = [float(x) for x in re.findall(r'\d+\.\d+', ''.join(temp))]
+        floats = [float(x) for x in re.findall(r'\d+\.\d+', 
+                                               ''.join(temp))]
         result.append(floats)
 
     # convert the list of lists and strings to 
@@ -52,9 +55,9 @@ def medpc_txt2df(text_file_path):
     # values are of unequal length
     # convert all values to lists
     pd_series_lst = []
-    for i,j in result_dict.items():
+    for i, j in result_dict.items():
         if type(j) != list:
-            result_dict[i] = [j] 
+            result_dict[i] = [j]
         else:
             result_dict[i] = j
         pd_series_lst.append(pd.Series(j))
@@ -64,7 +67,7 @@ def medpc_txt2df(text_file_path):
     df.columns = result_dict.keys()
     df.to_csv("medpc_converted_file.csv")
 
-    return(
+    return (
         # df.head(5).to_html(),
         "medpc_converted_file.csv"
-        )
\ No newline at end of file
+        )

From 9190fa49c3f16c4a5dd9b0a8e1262f3730935aa0 Mon Sep 17 00:00:00 2001
From: Chris <53177842+ChristopherMarais@users.noreply.github.com>
Date: Fri, 25 Aug 2023 23:03:57 -0400
Subject: [PATCH 05/15] add function to remove zeros

---
 pc_mouseparty/medpc/medpc_extraction.py | 41 +++++++++++++++++++------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/pc_mouseparty/medpc/medpc_extraction.py b/pc_mouseparty/medpc/medpc_extraction.py
index 88f4414..83c11c2 100644
--- a/pc_mouseparty/medpc/medpc_extraction.py
+++ b/pc_mouseparty/medpc/medpc_extraction.py
@@ -4,14 +4,22 @@
 
 def medpc_txt2df(text_file_path):
     """
-    docstring
+    This function reads a medpc text data file into a pandas dataframe.
+
+    Args (2 total, 1 required):
+        par_1: 1D numpy array, Values observed in the field (counts).
+        par_2: int, default = 0, Additional value to add.
+
+    Return (1):
+        output_1 : str, The total sum as a string with a chosen suffix
+        added on.
     """
     # Open the medpc text file
     # with open(text_file_path, "r") as file: # use this for package
     with open(text_file_path.name) as file:  # use this for gradio app
         medpc_txt_file = file.read()
 
-    # split the file with each new line an element in a list    
+    # split the file with each new line an element in a list
     medpc_txt_file_lst = medpc_txt_file.split('\n')
 
     # remove all empty elements in the list
@@ -32,11 +40,11 @@ def medpc_txt2df(text_file_path):
                 temp = []
             result.append(item)
     if temp:
-        floats = [float(x) for x in re.findall(r'\d+\.\d+', 
+        floats = [float(x) for x in re.findall(r'\d+\.\d+',
                                                ''.join(temp))]
         result.append(floats)
 
-    # convert the list of lists and strings to 
+    # convert the list of lists and strings to
     # a dictionary with everything before ":"
     # as a key and everything after as the value
     result_dict = {}
@@ -65,9 +73,24 @@ def medpc_txt2df(text_file_path):
     # add list to dataframe
     df = pd.concat(pd_series_lst, axis=1)
     df.columns = result_dict.keys()
-    df.to_csv("medpc_converted_file.csv")
 
-    return (
-        # df.head(5).to_html(),
-        "medpc_converted_file.csv"
-        )
+    return (df)
+
+
+def cut_zeros(df):
+    """
+    This function removes all trailing zeros of the medpc dataframe.
+
+    Args (2 total, 1 required):
+        par_1: 1D numpy array, Values observed in the field (counts).
+        par_2: int, default = 0, Additional value to add.
+
+    Return (1):
+        output_1 : str, The total sum as a string with a chosen suffix
+        added on.
+    """
+    # find index of last row that does not only ahve 0 and Nan
+    last_idx = df[df.sum(axis=1).ne(0)].index[-1]
+    df = df[:last_idx+1]
+
+    return (df)

From 9120359fc50bd5253ca301cf69f7c506c8db17c3 Mon Sep 17 00:00:00 2001
From: Chris <53177842+ChristopherMarais@users.noreply.github.com>
Date: Fri, 25 Aug 2023 23:32:17 -0400
Subject: [PATCH 06/15] added functions for medpc data txt parsing

---
 pc_mouseparty/medpc/medpc_extraction.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/pc_mouseparty/medpc/medpc_extraction.py b/pc_mouseparty/medpc/medpc_extraction.py
index 83c11c2..3b3ea79 100644
--- a/pc_mouseparty/medpc/medpc_extraction.py
+++ b/pc_mouseparty/medpc/medpc_extraction.py
@@ -6,13 +6,11 @@ def medpc_txt2df(text_file_path):
     """
     This function reads a medpc text data file into a pandas dataframe.
 
-    Args (2 total, 1 required):
-        par_1: 1D numpy array, Values observed in the field (counts).
-        par_2: int, default = 0, Additional value to add.
+    Args (1 total, 1 required):
+        text_file_path : str, a path to a medpc text file as a string.
 
     Return (1):
-        output_1 : str, The total sum as a string with a chosen suffix
-        added on.
+        df : pandas dataframe, a dataframe with the medpc data.
     """
     # Open the medpc text file
     # with open(text_file_path, "r") as file: # use this for package
@@ -82,12 +80,11 @@ def cut_zeros(df):
     This function removes all trailing zeros of the medpc dataframe.
 
     Args (2 total, 1 required):
-        par_1: 1D numpy array, Values observed in the field (counts).
-        par_2: int, default = 0, Additional value to add.
+        df: pandas dataframe, a dataframe with the medpc data.
 
     Return (1):
-        output_1 : str, The total sum as a string with a chosen suffix
-        added on.
+        df : pandas dataframe, a dataframe with the medpc data
+        with trailing zeros removed.
     """
     # find index of last row that does not only ahve 0 and Nan
     last_idx = df[df.sum(axis=1).ne(0)].index[-1]

From 301f1b06987b793489de4df7b60b1d10d91fa70b Mon Sep 17 00:00:00 2001
From: Chaitra Peddireddy <chaitrapeddireddy49@gmail.com>
Date: Sat, 30 Sep 2023 20:34:26 -0400
Subject: [PATCH 07/15] Elo Score Functions

---
 .gitignore                                  |   3 +-
 pc_mouseparty/rank/elo_score.py             | 369 ++++++++++++++++++++
 pc_mouseparty/rank/elorating/calculation.py | 198 +++++++++++
 requirements.txt                            |   4 +
 4 files changed, 573 insertions(+), 1 deletion(-)
 create mode 100644 pc_mouseparty/rank/elo_score.py
 create mode 100644 pc_mouseparty/rank/elorating/calculation.py

diff --git a/.gitignore b/.gitignore
index 631004a..f8e6b96 100644
--- a/.gitignore
+++ b/.gitignore
@@ -103,4 +103,5 @@ ENV/
 .mypy_cache/
 
 # IDE settings
-.vscode/
\ No newline at end of file
+.vscode/
+.idea/
diff --git a/pc_mouseparty/rank/elo_score.py b/pc_mouseparty/rank/elo_score.py
new file mode 100644
index 0000000..5cb846d
--- /dev/null
+++ b/pc_mouseparty/rank/elo_score.py
@@ -0,0 +1,369 @@
+import os
+import pandas as pd
+import matplotlib.pyplot as plt
+import re
+from collections import defaultdict
+from elorating import calculation
+
+# Suppress all warnings
+import warnings
+
+warnings.filterwarnings("ignore")
+
+
+def _reward_competition(df, output_dir, plot_flag=True):
+    """
+    This private function takes in a dataframe and processes the elo score for reward
+    competition protocol
+    Unedited used the reward_competition jupyter notebook
+    Args (3 total, 2 required):
+        df (pandas dataframe): dataframe to be processed
+        output_dir (str): path to output directory
+        plot_flag (bool): flag to plot data, default True
+
+    Return(None):
+        None
+    """
+
+    for col in df.columns.tolist():
+        formatted_col_name = "_".join(str(col).lower().strip().split(" "))
+        df.rename(columns={col: formatted_col_name}, inplace=True)
+
+    # removing columns from given list of strings
+    to_remove = ["wins", "ties", "time"]
+    cols_to_keep = [col for col in df.columns if all(word not in col for word
+                                                     in to_remove)]
+    df = df[cols_to_keep]
+    df["animal_ids"] = df["match"].apply(
+        lambda x: tuple(sorted([all_ids.strip() for all_ids in re.findall(r"[-+]?(?:\d*\.\d+|\d+)", x)])))
+    df["cohort"] = "TODO"
+    cage_to_strain = {}
+    df["strain"] = df["cage"].astype(str).map(cage_to_strain)
+    all_cages = "_".join([str(cage) for cage in sorted(df["cage"].unique())])
+    df["index"] = df.index
+    reward_competition_df = df.reset_index(drop=True)
+
+    melted_reward_competition_df = reward_competition_df.melt(
+        id_vars=["index", "date", "cage", "box", "match", "animal_ids"],
+        var_name="trial",
+        value_name="winner")
+
+    melted_reward_competition_df = melted_reward_competition_df.dropna(
+        subset="winner")
+    melted_reward_competition_df["keep_row"] = \
+        melted_reward_competition_df["winner"].apply(
+            lambda x: True if "tie" in str(x).lower() or
+                              re.match(r'^-?\d+(?:\.\d+)$', str(x)) else False
+        )
+
+    melted_reward_competition_df = \
+        melted_reward_competition_df[melted_reward_competition_df["keep_row"]]
+
+    melted_reward_competition_df["winner"] = \
+        melted_reward_competition_df["winner"].astype(str).apply(
+            lambda x: x.lower().strip()
+        )
+
+    melted_reward_competition_df["match_is_tie"] = \
+        melted_reward_competition_df["winner"].apply(
+            lambda x: True if "tie" in x.lower().strip() else False
+        )
+
+    melted_reward_competition_df["winner"] = \
+        melted_reward_competition_df.apply(
+            lambda x: x["animal_ids"][0] if x["match_is_tie"] else x["winner"],
+            axis=1
+        )
+
+    melted_reward_competition_df[melted_reward_competition_df["match_is_tie"]]
+
+    melted_reward_competition_df = melted_reward_competition_df[
+        melted_reward_competition_df["trial"].str.contains('trial')]
+
+    melted_reward_competition_df["trial_number"] = \
+        melted_reward_competition_df["trial"].apply(
+            lambda x: int(x.lower().strip("trial").strip("winner").strip("_"))
+        )
+
+    melted_reward_competition_df = \
+        melted_reward_competition_df.sort_values(
+            ["index", "trial_number"]).reset_index(drop=True)
+
+    melted_reward_competition_df["loser"] = melted_reward_competition_df.apply(
+        lambda x: (list(set(x["animal_ids"]) - set([x["winner"]]))[0]), axis=1)
+
+    melted_reward_competition_df["session_number_difference"] = \
+        melted_reward_competition_df["date"].astype(
+            'category').cat.codes.diff()
+
+    cage_to_elo_rating_dict = defaultdict(dict)
+
+    for cage in melted_reward_competition_df["cage"].unique():
+        cage_df = \
+            melted_reward_competition_df[melted_reward_competition_df["cage"] == cage]
+        cage_to_elo_rating_dict[cage] = \
+            calculation.iterate_elo_rating_calculation_for_dataframe(
+                dataframe=cage_df,
+                winner_id_column="winner",
+                loser_id_column="loser",
+                additional_columns=melted_reward_competition_df.columns,
+                tie_column="match_is_tie"
+            )
+
+    cage_to_elo_rating_dict[list(cage_to_elo_rating_dict.keys())[0]][0]
+
+    all_cage_elo_rating_list = []
+
+    for key in cage_to_elo_rating_dict.keys():
+        cage_elo_rating_df = pd.DataFrame.from_dict(cage_to_elo_rating_dict[key], orient="index")
+        cage_elo_rating_df.insert(
+            0, 'total_trial_number', range(0, 0 + len(cage_elo_rating_df))
+        )
+
+        all_cage_elo_rating_list.append(cage_elo_rating_df)
+
+    all_cage_elo_rating_df = pd.concat(all_cage_elo_rating_list)
+
+    all_cage_elo_rating_df[all_cage_elo_rating_df["match_is_tie"]]
+
+    if cage_to_strain:
+        all_cage_elo_rating_df["strain"] = \
+            all_cage_elo_rating_df["cage"].astype(str).map(cage_to_strain)
+
+    all_cage_elo_rating_df["experiment_type"] = "Reward Competition"
+    all_cage_elo_rating_df["cohort"] = "TODO"
+    all_cage_elo_rating_df[all_cage_elo_rating_df["win_draw_loss"] == 0.5]
+
+    id_to_final_elo_rating_dict = defaultdict(dict)
+    sorted_func = enumerate(sorted(all_cage_elo_rating_df["subject_id"].unique()))
+    for index, subject_id in sorted_func:
+        per_subject_df = \
+            all_cage_elo_rating_df[
+                all_cage_elo_rating_df["subject_id"] == subject_id
+                ]
+        id_to_final_elo_rating_dict[index]["subject_id"] = subject_id
+
+        id_to_final_elo_rating_dict[index]["final_elo_rating"] = \
+            per_subject_df.iloc[-1]["updated_elo_rating"]
+        id_to_final_elo_rating_dict[index]["cohort"] = \
+            per_subject_df.iloc[-1]["cohort"]
+        id_to_final_elo_rating_dict[index]["cage"] = \
+            per_subject_df.iloc[-1]["cage"]
+
+    id_to_final_elo_rating_df = pd.DataFrame.from_dict(
+        id_to_final_elo_rating_dict, orient="index"
+    )
+    # Adding protocol name
+    id_to_final_elo_rating_df["experiment_type"] = "Reward Competition"
+    # Adding rank
+    id_to_final_elo_rating_df["rank"] = \
+        id_to_final_elo_rating_df.groupby("cage")["final_elo_rating"].rank(
+            "dense", ascending=False
+        )
+    # Sorting by cage and then id
+    id_to_final_elo_rating_df = id_to_final_elo_rating_df.sort_values(
+        by=['cage', "subject_id"], ascending=True).reset_index(drop=True)
+    id_to_final_elo_rating_df["rank"] = \
+        id_to_final_elo_rating_df.groupby("cage")["final_elo_rating"].rank(
+            "dense", ascending=False
+        )
+    id_to_final_elo_rating_df = \
+        id_to_final_elo_rating_df.sort_values(
+            by=['cage', "subject_id"], ascending=True).reset_index(drop=True)
+
+    if plot_flag:
+        for cage in all_cage_elo_rating_df["cage"].unique():
+            fig, ax = plt.subplots()
+            plt.rcParams["figure.figsize"] = (18, 10)
+            per_cage_df = \
+                all_cage_elo_rating_df[all_cage_elo_rating_df["cage"] == cage]
+
+            for index in per_cage_df["index"].unique():
+                first_session_in_trial = \
+                    per_cage_df[per_cage_df["index"] == index].iloc[0]["total_trial_number"]
+                plt.vlines(x=[first_session_in_trial - 0.5],
+                           ymin=700,
+                           ymax=1300,
+                           colors='black',
+                           linestyle='dashed'
+                           )
+
+            # Drawing a line for each subject
+            for subject in sorted(per_cage_df["subject_id"].unique()):
+                # Getting all the rows with the current subject
+                subject_df = per_cage_df[per_cage_df["subject_id"] == subject]
+                # Making the dates into days after the first session by
+                # subtracting all the dates by the first date
+                plt.plot(subject_df["total_trial_number"],
+                         subject_df["updated_elo_rating"],
+                         '-o',
+                         label=subject
+                         )
+
+            # Labeling the X/Y Axis and the title
+            ax.set_xlabel("Trial Number")
+            ax.set_ylabel("Elo Score")
+            ax.set_title(
+                "{} Elo Rating for {} {}".format("Rewards Competition", "TODO", str(cage)))
+            # To show the legend
+            ax.legend(loc="upper left")
+            plt.xticks(rotation=90)
+            plt.ylim(700, 1300)
+
+            # Checking if out dir exists
+            if not os.path.exists(output_dir):
+                os.makedirs(output_dir)
+
+            plt.savefig(
+                os.path.join(output_dir,
+                             "reward_competition_cage" + str(cage) + ".png"))
+
+    path = os.path.join(
+        output_dir, "reward_competition_cage" + all_cages + ".csv")
+
+    id_to_final_elo_rating_df.to_csv(path, index=False)
+
+    return None
+
+def general_processing(file_info, output_dir, plot_flag=True):
+    """
+        This function takes in a dataframe and processes elo score for home_cage_observation, urine_marking,
+        or test_tube protocols
+        Args (3 total, 3 required):
+            file_info (dict): dictionary with file names as key and value as a dictionary of
+            file information with the following properties:
+                file_path (str): path to file
+                protocol (str): protocol name
+                sheet (list): list of sheet names
+                cohort (str): cohort name
+            output_dir (str): path to output directory
+            plot_flag (bool): flag to plot data, default True
+
+        Return(None):
+            None
+    """
+    def process(df, protocol, cohort, output_dir, plot_flag):
+        # Initializing column names
+
+        find_col_names = df[df.apply(lambda row: 'winner' in row.values, axis=1)]
+
+        if not find_col_names.empty:
+            df.columns = find_col_names.iloc[0]
+            df = df[df.index != find_col_names.index[0]]
+
+        # check if there is a cage number col
+        mode_cage_val = None
+        cage_num = False
+        # finding column names for winner, loser, and tie
+        winner_col, tie_col, loser_col = None, None, None
+        for col in df.columns.tolist():
+            if "cage" in col.lower():
+                # filling all cage values with mode
+                mode_cage_val = df['cage #'].mode().iloc[0]
+                df['cage#'] = mode_cage_val
+                cage_num = True
+            if "winner" in col.lower():
+                winner_col = col
+            if "loser" in col.lower():
+                loser_col = col
+            if "tie" in col.lower():
+                tie_col = col
+
+        if not winner_col or not loser_col:
+            print("Winner or Loser column not found")
+            return None
+
+        if not cage_num:
+            try:
+                new_sheet_name = sheet.lower().replace("cage", "")
+                mode_cage_val = int(new_sheet_name)
+                df['cage#'] = mode_cage_val
+            except:
+                print("Cage# cannot be determined")
+                return None
+
+        # drop cols if winner & loss is NaN
+        df = df.dropna(subset=['winner', 'loser'], how='all')
+
+        # Autofill dates
+        df['date'] = pd.to_datetime(df['date'], errors='coerce')
+        df['date'].fillna(method='ffill', inplace=True)
+
+        # Identify sessions based on date values
+        df['session_number_difference'] = 0
+        previous_date = None
+        for index, row in df.iterrows():
+            current_date = row['date']
+            # check for session change
+            if not previous_date:
+                df.at[index, 'session_number_difference'] = 1
+            elif previous_date is not None and current_date != previous_date:
+                df.at[index, 'session_number_difference'] = 1
+            previous_date = current_date
+        # Elo Score from calculation.py
+        if tie_col:
+            df[tie_col] = df[tie_col].notna()
+
+        elo_calc = calculation.iterate_elo_rating_calculation_for_dataframe(dataframe=df, winner_id_column=winner_col,
+                                                                            loser_id_column=loser_col,
+                                                                            tie_column=tie_col)
+        elo_df = pd.DataFrame.from_dict(elo_calc, orient='index')
+        elo_df.groupby("subject_id").count()
+
+        cage_to_strain = {}
+        if cage_to_strain:
+            elo_df["subject_strain"] = elo_df["cage_num_of_subject"].map(cage_to_strain)
+            elo_df["agent_strain"] = elo_df["cage_num_of_agent"].map(cage_to_strain)
+        elo_df["experiment_type"] = protocol
+        elo_df["cohort"] = cohort
+
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+
+        if plot_flag:
+            max_elo_rating = elo_df["updated_elo_rating"].max()
+            min_elo_rating = elo_df["updated_elo_rating"].min()
+
+            plt.rcParams["figure.figsize"] = (13.5, 7.5)
+            fig, ax = plt.subplots()
+
+            # adjusting session number difference
+            elo_df['session_number_difference'] = \
+                df['session_number_difference'].repeat(2).reset_index(drop=True)
+
+            for index, row in elo_df[elo_df['session_number_difference'].astype(bool)].iterrows():
+                # Offsetting by 0.5 to avoid drawing the line on the dot
+                # Drawing the lines a little above the max and a little below the minimum
+                plt.vlines(x=[row["total_match_number"] - 0.5], ymin=min_elo_rating - 50, ymax=max_elo_rating + 50,
+                           colors='black', linestyle='dashed')
+            for subject in sorted(elo_df["subject_id"].unique()):
+                # Getting all the rows with the current subject
+                subject_dataframe = elo_df[elo_df["subject_id"] == subject]
+                # Making the current match number the X-Axis
+                plt.plot(subject_dataframe["total_match_number"], subject_dataframe["updated_elo_rating"], '-o',
+                         label=subject)
+                # plt.show()
+            ax.set_xlabel("Trial Number")
+            ax.set_ylabel("Elo rating")
+
+            ax.set_title(
+                "{} Elo Rating for {} {}".format(protocol, cohort, "Cage #" + str(mode_cage_val)))
+            ax.legend(loc="upper left")
+            plt.ylim(min_elo_rating - 50, max_elo_rating + 50)
+            fig.savefig(os.path.join(output_dir, protocol + "_cage" + str(mode_cage_val) + ".png"))
+
+        # Saving df csv to output dir
+        elo_df.to_csv(os.path.join(output_dir, protocol + "_cage" + str(mode_cage_val) + ".csv"), index=False)
+
+    for file_name, file_data in file_info.items():
+        file_path = file_data["file_path"]
+        protocol = file_data["protocol"]
+        sheets = file_data["sheet"]
+        cohort = file_data["cohort"]
+        xls = pd.ExcelFile(file_path)
+        for sheet in sheets:
+            data = pd.read_excel(xls, sheet_name=sheet)
+            if protocol == "reward_competition":
+                _reward_competition(df=data, output_dir=output_dir, plot_flag=plot_flag)
+            else:
+                process(df=data, protocol=protocol, cohort=cohort, output_dir=output_dir, plot_flag=plot_flag)
diff --git a/pc_mouseparty/rank/elorating/calculation.py b/pc_mouseparty/rank/elorating/calculation.py
new file mode 100644
index 0000000..35f175c
--- /dev/null
+++ b/pc_mouseparty/rank/elorating/calculation.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+"""Elo Rating Calculator
+
+Based on: https://www.omnicalculator.com/sports/elo
+"""
+import operator
+from collections import defaultdict
+import pandas as pd
+
+
+def calculate_elo_rating(subject_elo_rating, agent_elo_rating, k_factor=20, score=1, number_of_decimals=1):
+    """
+    Calculates the Elo rating of a given subject given it's original score, it's opponent,
+    the K-Factor, and whether or not it has won or not.
+    The calculation is based on: https://www.omnicalculator.com/sports/elo
+
+    Args:
+        subject_elo_rating(float): The original Elo rating for the subject
+        agent_elo_rating(float): The original Elo rating for the agent
+        k_factor(int): k-factor, or development coefficient.
+            - It usually takes values between 10 and 40, depending on player's strength
+        score(int): the actual outcome of the game.
+            - In chess, a win counts as 1 point, a draw is equal to 0.5, and a lose gives 0.
+        number_of_decimals(int): Number of decimals to round to
+
+    Returns:
+        int: Updated Elo rating of the subject
+    """
+    # Calculating the Elo rating
+    rating_difference = agent_elo_rating - subject_elo_rating
+    expected_score = 1 / (1 + 10 ** (rating_difference / 400))
+    new_elo_rating = subject_elo_rating + k_factor * (score - expected_score)
+    # Rounding to `number_of_decimals`
+    return round(new_elo_rating, number_of_decimals)
+
+
+def update_elo_rating(winner_id, loser_id, id_to_elo_rating=None, default_elo_rating=1000, \
+                      winner_score=1, loser_score=0, **calculate_elo_rating_params):
+    """
+    Updates the Elo rating in a dictionary that contains the ID of the subject as keys,
+    and the Elo rating as the values. You can also adjust how the Elo rating is calculated with 'calculate_elo_rating_params'.
+
+    Args:
+        winner_id(str): ID of the winner
+        loser_id(str): ID of the loser
+        id_to_elo_rating(dict): Dict that has the ID of the subjects as keys to the Elo Score as values
+        default_elo_rating(int): The default Elo rating to be used if there is not elo score for the specified ID
+        **calculate_elo_rating_params(kwargs): Other params for the calculate_elo_rating to change how the Elo rating is calculated
+
+    Returns:
+        Dict: Dict that has the ID of the subjects as keys to the Elo Score as values
+    """
+    if id_to_elo_rating is None:
+        id_to_elo_rating = defaultdict(lambda: default_elo_rating)
+
+    # Getting the current Elo Score
+    current_winner_rating = id_to_elo_rating[winner_id]
+    current_loser_rating = id_to_elo_rating[loser_id]
+
+    # Calculating Elo rating
+    id_to_elo_rating[winner_id] = calculate_elo_rating(subject_elo_rating=current_winner_rating, \
+                                                       agent_elo_rating=current_loser_rating, score=winner_score,
+                                                       **calculate_elo_rating_params)
+    id_to_elo_rating[loser_id] = calculate_elo_rating(subject_elo_rating=current_loser_rating, \
+                                                      agent_elo_rating=current_winner_rating, score=loser_score,
+                                                      **calculate_elo_rating_params)
+
+    return id_to_elo_rating
+
+
+def get_ranking_from_elo_rating_dictionary(input_dict, subject_id):
+    """
+    Orders a dictionary of subject ID keys to ELO score values by ELO score.
+    And then gets the rank of the subject with the inputted ID.
+    Lower ranks like 1 would represent those subjects with higher ELO scores and vice versa.
+
+    Args:
+        input_dict(dict):
+            Dictionary of subject ID keys to ELO score values
+        subject_id(str, int, or any value that's a key in input dict):
+            The ID of the subject that you want the ranking of
+
+    Returns:
+        int:
+            Ranking of the subject with the ID inputted
+    """
+    # Sorting the subject ID's by ELO score
+    sorted_subject_to_elo_rating = sorted(input_dict.items(), key=operator.itemgetter(1), reverse=True)
+    # Getting the rank of the subject based on ELO score
+    return [subject_tuple[0] for subject_tuple in sorted_subject_to_elo_rating].index(subject_id) + 1
+
+
+def iterate_elo_rating_calculation_for_dataframe(dataframe, winner_id_column, loser_id_column, tie_column=None,
+                                                 additional_columns=None):
+    """
+    Iterates through a dataframe that has the ID of winners and losers for a given event.
+    A dictionary will be created that contains the information of the event,
+    which can then be turned into a dataframe. Each key is either from winner or loser's perspective.
+
+    Args:
+        dataframe(Pandas DataFrame):
+        winner_id_column(str): The name of the column that has the winner's ID
+        loser_id_column(str): The name of the column that has the loser's ID
+        additional_columns(list): Additional columns to take from the
+
+    Returns:
+        Dict: With a key value pair for each event either from the winner or loser's perspective.
+            This can be turned into a dataframe with each key value pair being a row.
+    """
+    if additional_columns is None:
+        additional_columns = []
+
+    # Dictionary that keeps track of the current Elo rating of the subject
+    id_to_elo_rating = defaultdict(lambda: 1000)
+    # Dictionary that will be converted to a DataFrame
+    index_to_elo_rating_and_meta_data = defaultdict(dict)
+
+    # Indexes that will identify which row the dictionary key value pair will be
+    # The number of the index has no significance other than being the number of the row
+    all_indexes = iter(range(0, 99999))
+
+    # Keeping track of the number of matches
+    total_match_number = 1
+
+    # Making a copy in case there is an error with changing the type of the tie column
+    copied_dataframe = dataframe.copy()
+    # Changing the tie column type to bool
+    # So that we can filter out for booleans including False and 0
+    try:
+        copied_dataframe[tie_column] = copied_dataframe[tie_column].astype(bool)
+    except:
+        copied_dataframe = dataframe.copy()
+
+    for index, row in copied_dataframe.dropna(subset=winner_id_column).iterrows():
+        # Getting the ID of the winner subject
+        winner_id = row[winner_id_column]
+        # Getting the ID of the loser subject
+        loser_id = row[loser_id_column]
+
+        # Getting the current Elo Score
+        current_winner_rating = id_to_elo_rating[winner_id]
+        current_loser_rating = id_to_elo_rating[loser_id]
+
+        if tie_column:
+            # When there is nothing in the tie column
+            # Or when there is a false value indicating that it is not a tie
+            if pd.isna(copied_dataframe[tie_column][index]) or ~(copied_dataframe[tie_column][index]).any():
+                winner_score = 1
+                loser_score = 0
+            # When there is value in the tie column
+            else:
+                winner_score = 0.5
+                loser_score = 0.5
+        # When there is no tie column
+        else:
+            winner_score = 1
+            loser_score = 0
+
+        # Updating the dictionary with ID keys and Elo Score values
+        update_elo_rating(winner_id=winner_id, loser_id=loser_id, id_to_elo_rating=id_to_elo_rating, \
+                          winner_score=winner_score, loser_score=loser_score)
+
+        # Saving all the data for the winner
+        winner_index = next(all_indexes)
+        index_to_elo_rating_and_meta_data[winner_index]["total_match_number"] = total_match_number
+        index_to_elo_rating_and_meta_data[winner_index]["subject_id"] = winner_id
+        index_to_elo_rating_and_meta_data[winner_index]["agent_id"] = loser_id
+        index_to_elo_rating_and_meta_data[winner_index]["original_elo_rating"] = current_winner_rating
+        index_to_elo_rating_and_meta_data[winner_index]["updated_elo_rating"] = id_to_elo_rating[winner_id]
+        index_to_elo_rating_and_meta_data[winner_index]["win_draw_loss"] = winner_score
+        index_to_elo_rating_and_meta_data[winner_index]["subject_ranking"] = get_ranking_from_elo_rating_dictionary(
+            id_to_elo_rating, winner_id)
+        index_to_elo_rating_and_meta_data[winner_index]["agent_ranking"] = get_ranking_from_elo_rating_dictionary(
+            id_to_elo_rating, loser_id)
+        index_to_elo_rating_and_meta_data[winner_index]["pairing_index"] = 0
+        for column in additional_columns:
+            index_to_elo_rating_and_meta_data[winner_index][column] = row[column]
+
+            # Saving all the data for the loser
+        loser_index = next(all_indexes)
+        index_to_elo_rating_and_meta_data[loser_index]["total_match_number"] = total_match_number
+        index_to_elo_rating_and_meta_data[loser_index]["subject_id"] = loser_id
+        index_to_elo_rating_and_meta_data[loser_index]["agent_id"] = winner_id
+        index_to_elo_rating_and_meta_data[loser_index]["original_elo_rating"] = current_loser_rating
+        index_to_elo_rating_and_meta_data[loser_index]["updated_elo_rating"] = id_to_elo_rating[loser_id]
+        index_to_elo_rating_and_meta_data[loser_index]["win_draw_loss"] = loser_score
+        index_to_elo_rating_and_meta_data[loser_index]["subject_ranking"] = get_ranking_from_elo_rating_dictionary(
+            id_to_elo_rating, loser_id)
+        index_to_elo_rating_and_meta_data[loser_index]["agent_ranking"] = get_ranking_from_elo_rating_dictionary(
+            id_to_elo_rating, winner_id)
+        index_to_elo_rating_and_meta_data[loser_index]["pairing_index"] = 1
+        for column in additional_columns:
+            index_to_elo_rating_and_meta_data[loser_index][column] = row[column]
+
+            # Updating the match number
+        total_match_number += 1
+
+    return index_to_elo_rating_and_meta_data
diff --git a/requirements.txt b/requirements.txt
index e69de29..16e07a6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+setuptools~=60.2.0
+pandas~=2.1.1
+matplotlib~=3.8.0
+numpy~=1.26.0

From 53b9815c81b160f2def7c45d0d30b2565062bee2 Mon Sep 17 00:00:00 2001
From: Chaitra Peddireddy <chaitrapeddireddy49@gmail.com>
Date: Sun, 1 Oct 2023 16:09:28 -0400
Subject: [PATCH 08/15] Elo Score Functions

---
 pc_mouseparty/rank/__init__.py  |   2 +
 pc_mouseparty/rank/elo_score.py | 297 ++++++++++++++++++--------------
 2 files changed, 168 insertions(+), 131 deletions(-)

diff --git a/pc_mouseparty/rank/__init__.py b/pc_mouseparty/rank/__init__.py
index e69de29..bd20cc5 100644
--- a/pc_mouseparty/rank/__init__.py
+++ b/pc_mouseparty/rank/__init__.py
@@ -0,0 +1,2 @@
+from .elo_score import generate_elo_scores
+__all__ = ['generate_elo_scores']
diff --git a/pc_mouseparty/rank/elo_score.py b/pc_mouseparty/rank/elo_score.py
index 5cb846d..ff778ec 100644
--- a/pc_mouseparty/rank/elo_score.py
+++ b/pc_mouseparty/rank/elo_score.py
@@ -5,19 +5,20 @@
 from collections import defaultdict
 from elorating import calculation
 
-# Suppress all warnings
 import warnings
 
+# Suppress all warnings
 warnings.filterwarnings("ignore")
 
 
-def _reward_competition(df, output_dir, plot_flag=True):
+def __reward_competition(df, cohort, output_dir, plot_flag=True):
     """
-    This private function takes in a dataframe and processes the elo score for reward
-    competition protocol
+    This private function takes in a dataframe and processes the elo score
+    for reward competition protocol
     Unedited used the reward_competition jupyter notebook
-    Args (3 total, 2 required):
+    Args (4 total, 3 required):
         df (pandas dataframe): dataframe to be processed
+        cohort (str): cohort name
         output_dir (str): path to output directory
         plot_flag (bool): flag to plot data, default True
 
@@ -31,82 +32,80 @@ def _reward_competition(df, output_dir, plot_flag=True):
 
     # removing columns from given list of strings
     to_remove = ["wins", "ties", "time"]
-    cols_to_keep = [col for col in df.columns if all(word not in col for word
-                                                     in to_remove)]
+    cols_to_keep = \
+        [col for col in df.columns if all(word not in col
+                                          for word in to_remove)]
     df = df[cols_to_keep]
     df["animal_ids"] = df["match"].apply(
-        lambda x: tuple(sorted([all_ids.strip() for all_ids in re.findall(r"[-+]?(?:\d*\.\d+|\d+)", x)])))
+        lambda x: tuple(sorted([all_ids.strip()
+                                for all_ids in
+                                re.findall(r"[-+]?(?:\d*\.\d+|\d+)", x)])))
     df["cohort"] = "TODO"
     cage_to_strain = {}
     df["strain"] = df["cage"].astype(str).map(cage_to_strain)
-    all_cages = "_".join([str(cage) for cage in sorted(df["cage"].unique())])
+    all_cages = "_".join([str(cage)
+                          for cage in sorted(df["cage"].unique())])
     df["index"] = df.index
     reward_competition_df = df.reset_index(drop=True)
 
-    melted_reward_competition_df = reward_competition_df.melt(
+    melted_rc_df = reward_competition_df.melt(
         id_vars=["index", "date", "cage", "box", "match", "animal_ids"],
         var_name="trial",
         value_name="winner")
 
-    melted_reward_competition_df = melted_reward_competition_df.dropna(
-        subset="winner")
-    melted_reward_competition_df["keep_row"] = \
-        melted_reward_competition_df["winner"].apply(
-            lambda x: True if "tie" in str(x).lower() or
-                              re.match(r'^-?\d+(?:\.\d+)$', str(x)) else False
-        )
+    melted_rc_df = melted_rc_df.dropna(subset="winner")
+    melted_rc_df["keep_row"] = melted_rc_df["winner"].apply(
+        lambda x: True if "tie" in str(x).lower() or
+                          re.match(r'^-?\d+(?:\.\d+)$', str(x)) else False
+    )
 
-    melted_reward_competition_df = \
-        melted_reward_competition_df[melted_reward_competition_df["keep_row"]]
+    melted_rc_df = melted_rc_df[melted_rc_df["keep_row"]]
 
-    melted_reward_competition_df["winner"] = \
-        melted_reward_competition_df["winner"].astype(str).apply(
-            lambda x: x.lower().strip()
-        )
+    melted_rc_df["winner"] = melted_rc_df["winner"].astype(str).apply(
+        lambda x: x.lower().strip()
+    )
 
-    melted_reward_competition_df["match_is_tie"] = \
-        melted_reward_competition_df["winner"].apply(
-            lambda x: True if "tie" in x.lower().strip() else False
-        )
+    melted_rc_df["match_is_tie"] = melted_rc_df["winner"].apply(
+        lambda x: True if "tie" in x.lower().strip() else False
+    )
 
-    melted_reward_competition_df["winner"] = \
-        melted_reward_competition_df.apply(
-            lambda x: x["animal_ids"][0] if x["match_is_tie"] else x["winner"],
-            axis=1
+    melted_rc_df["winner"] = \
+        melted_rc_df.apply(
+            lambda x: x["animal_ids"][0] if x["match_is_tie"]
+            else x["winner"], axis=1
         )
 
-    melted_reward_competition_df[melted_reward_competition_df["match_is_tie"]]
+    melted_rc_df[melted_rc_df["match_is_tie"]]
 
-    melted_reward_competition_df = melted_reward_competition_df[
-        melted_reward_competition_df["trial"].str.contains('trial')]
+    melted_rc_df = \
+        melted_rc_df[melted_rc_df["trial"].str.contains('trial')]
 
-    melted_reward_competition_df["trial_number"] = \
-        melted_reward_competition_df["trial"].apply(
-            lambda x: int(x.lower().strip("trial").strip("winner").strip("_"))
-        )
+    melted_rc_df["trial_number"] = melted_rc_df["trial"].apply(
+        lambda x:
+        int(x.lower().strip("trial").strip("winner").strip("_"))
+    )
 
-    melted_reward_competition_df = \
-        melted_reward_competition_df.sort_values(
-            ["index", "trial_number"]).reset_index(drop=True)
+    melted_rc_df = melted_rc_df.sort_values(
+        ["index", "trial_number"]).reset_index(drop=True)
 
-    melted_reward_competition_df["loser"] = melted_reward_competition_df.apply(
-        lambda x: (list(set(x["animal_ids"]) - set([x["winner"]]))[0]), axis=1)
+    melted_rc_df["loser"] = melted_rc_df.apply(
+        lambda x:
+        (list(set(x["animal_ids"]) - set([x["winner"]]))[0]), axis=1
+    )
 
-    melted_reward_competition_df["session_number_difference"] = \
-        melted_reward_competition_df["date"].astype(
-            'category').cat.codes.diff()
+    melted_rc_df["session_number_difference"] = \
+        melted_rc_df["date"].astype('category').cat.codes.diff()
 
     cage_to_elo_rating_dict = defaultdict(dict)
 
-    for cage in melted_reward_competition_df["cage"].unique():
-        cage_df = \
-            melted_reward_competition_df[melted_reward_competition_df["cage"] == cage]
+    for cage in melted_rc_df["cage"].unique():
+        cage_df = melted_rc_df[melted_rc_df["cage"] == cage]
         cage_to_elo_rating_dict[cage] = \
             calculation.iterate_elo_rating_calculation_for_dataframe(
                 dataframe=cage_df,
                 winner_id_column="winner",
                 loser_id_column="loser",
-                additional_columns=melted_reward_competition_df.columns,
+                additional_columns=melted_rc_df.columns,
                 tie_column="match_is_tie"
             )
 
@@ -115,72 +114,69 @@ def _reward_competition(df, output_dir, plot_flag=True):
     all_cage_elo_rating_list = []
 
     for key in cage_to_elo_rating_dict.keys():
-        cage_elo_rating_df = pd.DataFrame.from_dict(cage_to_elo_rating_dict[key], orient="index")
+        cage_elo_rating_df = \
+            pd.DataFrame.from_dict(
+                cage_to_elo_rating_dict[key], orient="index")
         cage_elo_rating_df.insert(
             0, 'total_trial_number', range(0, 0 + len(cage_elo_rating_df))
         )
 
         all_cage_elo_rating_list.append(cage_elo_rating_df)
 
-    all_cage_elo_rating_df = pd.concat(all_cage_elo_rating_list)
+    all_elo_df = pd.concat(all_cage_elo_rating_list)
 
-    all_cage_elo_rating_df[all_cage_elo_rating_df["match_is_tie"]]
+    all_elo_df[all_elo_df["match_is_tie"]]
 
     if cage_to_strain:
-        all_cage_elo_rating_df["strain"] = \
-            all_cage_elo_rating_df["cage"].astype(str).map(cage_to_strain)
+        all_elo_df["strain"] = \
+            all_elo_df["cage"].astype(str).map(cage_to_strain)
 
-    all_cage_elo_rating_df["experiment_type"] = "Reward Competition"
-    all_cage_elo_rating_df["cohort"] = "TODO"
-    all_cage_elo_rating_df[all_cage_elo_rating_df["win_draw_loss"] == 0.5]
+    all_elo_df["experiment_type"] = "Reward Competition"
+    all_elo_df["cohort"] = "TODO"
+    all_elo_df[all_elo_df["win_draw_loss"] == 0.5]
 
-    id_to_final_elo_rating_dict = defaultdict(dict)
-    sorted_func = enumerate(sorted(all_cage_elo_rating_df["subject_id"].unique()))
+    id_to_elo_dict = defaultdict(dict)
+    sorted_func = enumerate(sorted(all_elo_df["subject_id"].unique()))
     for index, subject_id in sorted_func:
-        per_subject_df = \
-            all_cage_elo_rating_df[
-                all_cage_elo_rating_df["subject_id"] == subject_id
-                ]
-        id_to_final_elo_rating_dict[index]["subject_id"] = subject_id
+        per_subject_df = all_elo_df[all_elo_df["subject_id"] == subject_id]
+        id_to_elo_dict[index]["subject_id"] = subject_id
 
-        id_to_final_elo_rating_dict[index]["final_elo_rating"] = \
+        id_to_elo_dict[index]["final_elo_rating"] = \
             per_subject_df.iloc[-1]["updated_elo_rating"]
-        id_to_final_elo_rating_dict[index]["cohort"] = \
-            per_subject_df.iloc[-1]["cohort"]
-        id_to_final_elo_rating_dict[index]["cage"] = \
-            per_subject_df.iloc[-1]["cage"]
+        id_to_elo_dict[index]["cohort"] = per_subject_df.iloc[-1]["cohort"]
+        id_to_elo_dict[index]["cage"] = per_subject_df.iloc[-1]["cage"]
 
-    id_to_final_elo_rating_df = pd.DataFrame.from_dict(
-        id_to_final_elo_rating_dict, orient="index"
+    id_to_elo_df = pd.DataFrame.from_dict(
+        id_to_elo_dict, orient="index"
     )
     # Adding protocol name
-    id_to_final_elo_rating_df["experiment_type"] = "Reward Competition"
+    id_to_elo_df["experiment_type"] = "Reward Competition"
     # Adding rank
-    id_to_final_elo_rating_df["rank"] = \
-        id_to_final_elo_rating_df.groupby("cage")["final_elo_rating"].rank(
+    id_to_elo_df["rank"] = \
+        id_to_elo_df.groupby("cage")["final_elo_rating"].rank(
             "dense", ascending=False
         )
     # Sorting by cage and then id
-    id_to_final_elo_rating_df = id_to_final_elo_rating_df.sort_values(
+    id_to_elo_df = id_to_elo_df.sort_values(
         by=['cage', "subject_id"], ascending=True).reset_index(drop=True)
-    id_to_final_elo_rating_df["rank"] = \
-        id_to_final_elo_rating_df.groupby("cage")["final_elo_rating"].rank(
+    id_to_elo_df["rank"] = \
+        id_to_elo_df.groupby("cage")["final_elo_rating"].rank(
             "dense", ascending=False
         )
-    id_to_final_elo_rating_df = \
-        id_to_final_elo_rating_df.sort_values(
-            by=['cage', "subject_id"], ascending=True).reset_index(drop=True)
+    id_to_elo_df = id_to_elo_df.sort_values(
+        by=['cage', "subject_id"], ascending=True).reset_index(drop=True)
 
     if plot_flag:
-        for cage in all_cage_elo_rating_df["cage"].unique():
+        for cage in all_elo_df["cage"].unique():
             fig, ax = plt.subplots()
             plt.rcParams["figure.figsize"] = (18, 10)
             per_cage_df = \
-                all_cage_elo_rating_df[all_cage_elo_rating_df["cage"] == cage]
+                all_elo_df[all_elo_df["cage"] == cage]
 
             for index in per_cage_df["index"].unique():
+                col = "total_trial_number"
                 first_session_in_trial = \
-                    per_cage_df[per_cage_df["index"] == index].iloc[0]["total_trial_number"]
+                    per_cage_df[per_cage_df["index"] == index].iloc[0][col]
                 plt.vlines(x=[first_session_in_trial - 0.5],
                            ymin=700,
                            ymax=1300,
@@ -191,7 +187,8 @@ def _reward_competition(df, output_dir, plot_flag=True):
             # Drawing a line for each subject
             for subject in sorted(per_cage_df["subject_id"].unique()):
                 # Getting all the rows with the current subject
-                subject_df = per_cage_df[per_cage_df["subject_id"] == subject]
+                col = "subject_id"
+                subject_df = per_cage_df[per_cage_df[col] == subject]
                 # Making the dates into days after the first session by
                 # subtracting all the dates by the first date
                 plt.plot(subject_df["total_trial_number"],
@@ -204,7 +201,9 @@ def _reward_competition(df, output_dir, plot_flag=True):
             ax.set_xlabel("Trial Number")
             ax.set_ylabel("Elo Score")
             ax.set_title(
-                "{} Elo Rating for {} {}".format("Rewards Competition", "TODO", str(cage)))
+                "{} Elo Rating for {} {}".format(
+                    "Rewards Competition", cohort, str(cage))
+            )
             # To show the legend
             ax.legend(loc="upper left")
             plt.xticks(rotation=90)
@@ -214,53 +213,49 @@ def _reward_competition(df, output_dir, plot_flag=True):
             if not os.path.exists(output_dir):
                 os.makedirs(output_dir)
 
-            plt.savefig(
-                os.path.join(output_dir,
-                             "reward_competition_cage" + str(cage) + ".png"))
+            file_name = "reward_competition_cage" + str(cage) + ".png"
+            plt.savefig(os.path.join(output_dir, file_name))
 
-    path = os.path.join(
-        output_dir, "reward_competition_cage" + all_cages + ".csv")
+    file_name = "reward_competition_cage" + all_cages + ".csv"
+    path = os.path.join(output_dir, file_name)
 
-    id_to_final_elo_rating_df.to_csv(path, index=False)
+    id_to_elo_df.to_csv(path, index=False)
 
     return None
 
-def general_processing(file_info, output_dir, plot_flag=True):
-    """
-        This function takes in a dataframe and processes elo score for home_cage_observation, urine_marking,
-        or test_tube protocols
-        Args (3 total, 3 required):
-            file_info (dict): dictionary with file names as key and value as a dictionary of
-            file information with the following properties:
-                file_path (str): path to file
-                protocol (str): protocol name
-                sheet (list): list of sheet names
-                cohort (str): cohort name
+def __process(df, protocol, cohort, sheet, output_dir, plot_flag=True):
+        """
+        This private function takes in a dataframe and processes the elo score
+        for home_cage_observation, urine_marking, or test_tube protocols
+        Args (6 total, 5 required):
+            df (pandas dataframe): dataframe to be processed
+            protocol (str): protocol name
+            cohort (str): cohort name
+            sheet (str): sheet name
             output_dir (str): path to output directory
             plot_flag (bool): flag to plot data, default True
-
         Return(None):
             None
-    """
-    def process(df, protocol, cohort, output_dir, plot_flag):
+        """
         # Initializing column names
 
-        find_col_names = df[df.apply(lambda row: 'winner' in row.values, axis=1)]
+        find_col_names = df[df.apply(
+            lambda row: 'winner' in row.values, axis=1)]
 
         if not find_col_names.empty:
             df.columns = find_col_names.iloc[0]
             df = df[df.index != find_col_names.index[0]]
 
         # check if there is a cage number col
-        mode_cage_val = None
+        mode_cage = None
         cage_num = False
         # finding column names for winner, loser, and tie
         winner_col, tie_col, loser_col = None, None, None
         for col in df.columns.tolist():
             if "cage" in col.lower():
                 # filling all cage values with mode
-                mode_cage_val = df['cage #'].mode().iloc[0]
-                df['cage#'] = mode_cage_val
+                mode_cage = df['cage #'].mode().iloc[0]
+                df['cage#'] = mode_cage
                 cage_num = True
             if "winner" in col.lower():
                 winner_col = col
@@ -276,9 +271,9 @@ def process(df, protocol, cohort, output_dir, plot_flag):
         if not cage_num:
             try:
                 new_sheet_name = sheet.lower().replace("cage", "")
-                mode_cage_val = int(new_sheet_name)
-                df['cage#'] = mode_cage_val
-            except:
+                mode_cage = int(new_sheet_name)
+                df['cage#'] = mode_cage
+            except ValueError:
                 print("Cage# cannot be determined")
                 return None
 
@@ -304,16 +299,20 @@ def process(df, protocol, cohort, output_dir, plot_flag):
         if tie_col:
             df[tie_col] = df[tie_col].notna()
 
-        elo_calc = calculation.iterate_elo_rating_calculation_for_dataframe(dataframe=df, winner_id_column=winner_col,
-                                                                            loser_id_column=loser_col,
-                                                                            tie_column=tie_col)
+        elo_calc = calculation.iterate_elo_rating_calculation_for_dataframe(
+            dataframe=df, winner_id_column=winner_col,
+            loser_id_column=loser_col,
+            tie_column=tie_col
+        )
         elo_df = pd.DataFrame.from_dict(elo_calc, orient='index')
         elo_df.groupby("subject_id").count()
 
         cage_to_strain = {}
         if cage_to_strain:
-            elo_df["subject_strain"] = elo_df["cage_num_of_subject"].map(cage_to_strain)
-            elo_df["agent_strain"] = elo_df["cage_num_of_agent"].map(cage_to_strain)
+            elo_df["subject_strain"] = \
+                elo_df["cage_num_of_subject"].map(cage_to_strain)
+            elo_df["agent_strain"] = \
+                elo_df["cage_num_of_agent"].map(cage_to_strain)
         elo_df["experiment_type"] = protocol
         elo_df["cohort"] = cohort
 
@@ -328,32 +327,60 @@ def process(df, protocol, cohort, output_dir, plot_flag):
             fig, ax = plt.subplots()
 
             # adjusting session number difference
-            elo_df['session_number_difference'] = \
-                df['session_number_difference'].repeat(2).reset_index(drop=True)
+            col = "session_number_difference"
+            elo_df[col] = df[col].repeat(2).reset_index(drop=True)
 
-            for index, row in elo_df[elo_df['session_number_difference'].astype(bool)].iterrows():
+            for index, row in elo_df[elo_df[col].astype(bool)].iterrows():
                 # Offsetting by 0.5 to avoid drawing the line on the dot
-                # Drawing the lines a little above the max and a little below the minimum
-                plt.vlines(x=[row["total_match_number"] - 0.5], ymin=min_elo_rating - 50, ymax=max_elo_rating + 50,
-                           colors='black', linestyle='dashed')
+                # Drawing the lines above the max and below the minimum
+                plt.vlines(x=[row["total_match_number"] - 0.5],
+                           ymin=min_elo_rating - 50,
+                           ymax=max_elo_rating + 50,
+                           colors='black',
+                           linestyle='dashed')
             for subject in sorted(elo_df["subject_id"].unique()):
                 # Getting all the rows with the current subject
                 subject_dataframe = elo_df[elo_df["subject_id"] == subject]
                 # Making the current match number the X-Axis
-                plt.plot(subject_dataframe["total_match_number"], subject_dataframe["updated_elo_rating"], '-o',
+                plt.plot(subject_dataframe["total_match_number"],
+                         subject_dataframe["updated_elo_rating"],
+                         '-o',
                          label=subject)
                 # plt.show()
             ax.set_xlabel("Trial Number")
             ax.set_ylabel("Elo rating")
 
-            ax.set_title(
-                "{} Elo Rating for {} {}".format(protocol, cohort, "Cage #" + str(mode_cage_val)))
+            tite = "{} Elo Rating for {} {}".format(protocol,
+                                                    cohort,
+                                                    "Cage #" + str(mode_cage))
+            ax.set_title(tite)
             ax.legend(loc="upper left")
             plt.ylim(min_elo_rating - 50, max_elo_rating + 50)
-            fig.savefig(os.path.join(output_dir, protocol + "_cage" + str(mode_cage_val) + ".png"))
+            file_name = protocol + "_cage" + str(mode_cage) + ".png"
+            fig.savefig(os.path.join(output_dir, file_name))
 
         # Saving df csv to output dir
-        elo_df.to_csv(os.path.join(output_dir, protocol + "_cage" + str(mode_cage_val) + ".csv"), index=False)
+        file_name = protocol + "_cage" + str(mode_cage) + ".csv"
+        elo_df.to_csv(os.path.join(output_dir, file_name), index=False)
+
+def generate_elo_scores(file_info, output_dir, plot_flag=True):
+    """
+        This function takes in a dataframe and processes elo score for
+        home_cage_observation, urine_marking, or test_tube protocols
+        Args (3 total, 3 required):
+            file_info (dict):
+                dictionary with file names as key and value as a dictionary of
+                file information with the following properties:
+                    file_path (str): path to file
+                    protocol (str): protocol name
+                    sheet (list): list of sheet names
+                    cohort (str): cohort name
+            output_dir (str): path to output directory
+            plot_flag (bool): flag to plot data, default True
+
+        Return(None):
+            None
+    """
 
     for file_name, file_data in file_info.items():
         file_path = file_data["file_path"]
@@ -364,6 +391,14 @@ def process(df, protocol, cohort, output_dir, plot_flag):
         for sheet in sheets:
             data = pd.read_excel(xls, sheet_name=sheet)
             if protocol == "reward_competition":
-                _reward_competition(df=data, output_dir=output_dir, plot_flag=plot_flag)
+                __reward_competition(df=data,
+                                     cohort=cohort,
+                                     output_dir=output_dir,
+                                     plot_flag=plot_flag)
             else:
-                process(df=data, protocol=protocol, cohort=cohort, output_dir=output_dir, plot_flag=plot_flag)
+                __process(df=data,
+                          protocol=protocol,
+                          cohort=cohort,
+                          sheet=sheet,
+                          output_dir=output_dir,
+                          plot_flag=plot_flag)

From b8958a918f034f3467c9507099da1817fca3a750 Mon Sep 17 00:00:00 2001
From: Chaitra Peddireddy <chaitrapeddireddy49@gmail.com>
Date: Wed, 11 Oct 2023 10:40:22 -0400
Subject: [PATCH 09/15] Fix issue import issue in __init__.py

---
 pc_mouseparty/rank/__init__.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pc_mouseparty/rank/__init__.py b/pc_mouseparty/rank/__init__.py
index bd20cc5..e69de29 100644
--- a/pc_mouseparty/rank/__init__.py
+++ b/pc_mouseparty/rank/__init__.py
@@ -1,2 +0,0 @@
-from .elo_score import generate_elo_scores
-__all__ = ['generate_elo_scores']

From 51f645bd6c4427e0132fe447b62670727f5fd1de Mon Sep 17 00:00:00 2001
From: Chaitra Peddireddy <cpeddireddy@ufl.edu>
Date: Mon, 13 Nov 2023 09:04:40 -0500
Subject: [PATCH 10/15] Changed File Struct for Rank Dir

---
 pc_mouseparty/rank/{elorating => }/calculation.py | 0
 pc_mouseparty/rank/elo_score.py                   | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename pc_mouseparty/rank/{elorating => }/calculation.py (100%)

diff --git a/pc_mouseparty/rank/elorating/calculation.py b/pc_mouseparty/rank/calculation.py
similarity index 100%
rename from pc_mouseparty/rank/elorating/calculation.py
rename to pc_mouseparty/rank/calculation.py
diff --git a/pc_mouseparty/rank/elo_score.py b/pc_mouseparty/rank/elo_score.py
index ff778ec..628c6db 100644
--- a/pc_mouseparty/rank/elo_score.py
+++ b/pc_mouseparty/rank/elo_score.py
@@ -3,7 +3,7 @@
 import matplotlib.pyplot as plt
 import re
 from collections import defaultdict
-from elorating import calculation
+from pc_mouseparty.rank import calculation
 
 import warnings
 

From f8b4d32135f348e8afd556e2e4dcfd7b64c5e5fd Mon Sep 17 00:00:00 2001
From: Chaitra Peddireddy <cpeddireddy@ufl.edu>
Date: Wed, 20 Dec 2023 13:28:58 -0500
Subject: [PATCH 11/15] Changed rank module structure

---
 .gitignore                                  |   2 +
 pc_mouseparty/rank/elo_score.py             |   2 +-
 pc_mouseparty/rank/elorating/calculation.py | 198 --------------------
 3 files changed, 3 insertions(+), 199 deletions(-)
 delete mode 100644 pc_mouseparty/rank/elorating/calculation.py

diff --git a/.gitignore b/.gitignore
index f8e6b96..1001a93 100644
--- a/.gitignore
+++ b/.gitignore
@@ -105,3 +105,5 @@ ENV/
 # IDE settings
 .vscode/
 .idea/
+
+.DS_Store
diff --git a/pc_mouseparty/rank/elo_score.py b/pc_mouseparty/rank/elo_score.py
index 628c6db..732f6a3 100644
--- a/pc_mouseparty/rank/elo_score.py
+++ b/pc_mouseparty/rank/elo_score.py
@@ -3,7 +3,7 @@
 import matplotlib.pyplot as plt
 import re
 from collections import defaultdict
-from pc_mouseparty.rank import calculation
+from . import calculation
 
 import warnings
 
diff --git a/pc_mouseparty/rank/elorating/calculation.py b/pc_mouseparty/rank/elorating/calculation.py
deleted file mode 100644
index 35f175c..0000000
--- a/pc_mouseparty/rank/elorating/calculation.py
+++ /dev/null
@@ -1,198 +0,0 @@
-#!/usr/bin/env python3
-"""Elo Rating Calculator
-
-Based on: https://www.omnicalculator.com/sports/elo
-"""
-import operator
-from collections import defaultdict
-import pandas as pd
-
-
-def calculate_elo_rating(subject_elo_rating, agent_elo_rating, k_factor=20, score=1, number_of_decimals=1):
-    """
-    Calculates the Elo rating of a given subject given it's original score, it's opponent,
-    the K-Factor, and whether or not it has won or not.
-    The calculation is based on: https://www.omnicalculator.com/sports/elo
-
-    Args:
-        subject_elo_rating(float): The original Elo rating for the subject
-        agent_elo_rating(float): The original Elo rating for the agent
-        k_factor(int): k-factor, or development coefficient.
-            - It usually takes values between 10 and 40, depending on player's strength
-        score(int): the actual outcome of the game.
-            - In chess, a win counts as 1 point, a draw is equal to 0.5, and a lose gives 0.
-        number_of_decimals(int): Number of decimals to round to
-
-    Returns:
-        int: Updated Elo rating of the subject
-    """
-    # Calculating the Elo rating
-    rating_difference = agent_elo_rating - subject_elo_rating
-    expected_score = 1 / (1 + 10 ** (rating_difference / 400))
-    new_elo_rating = subject_elo_rating + k_factor * (score - expected_score)
-    # Rounding to `number_of_decimals`
-    return round(new_elo_rating, number_of_decimals)
-
-
-def update_elo_rating(winner_id, loser_id, id_to_elo_rating=None, default_elo_rating=1000, \
-                      winner_score=1, loser_score=0, **calculate_elo_rating_params):
-    """
-    Updates the Elo rating in a dictionary that contains the ID of the subject as keys,
-    and the Elo rating as the values. You can also adjust how the Elo rating is calculated with 'calculate_elo_rating_params'.
-
-    Args:
-        winner_id(str): ID of the winner
-        loser_id(str): ID of the loser
-        id_to_elo_rating(dict): Dict that has the ID of the subjects as keys to the Elo Score as values
-        default_elo_rating(int): The default Elo rating to be used if there is not elo score for the specified ID
-        **calculate_elo_rating_params(kwargs): Other params for the calculate_elo_rating to change how the Elo rating is calculated
-
-    Returns:
-        Dict: Dict that has the ID of the subjects as keys to the Elo Score as values
-    """
-    if id_to_elo_rating is None:
-        id_to_elo_rating = defaultdict(lambda: default_elo_rating)
-
-    # Getting the current Elo Score
-    current_winner_rating = id_to_elo_rating[winner_id]
-    current_loser_rating = id_to_elo_rating[loser_id]
-
-    # Calculating Elo rating
-    id_to_elo_rating[winner_id] = calculate_elo_rating(subject_elo_rating=current_winner_rating, \
-                                                       agent_elo_rating=current_loser_rating, score=winner_score,
-                                                       **calculate_elo_rating_params)
-    id_to_elo_rating[loser_id] = calculate_elo_rating(subject_elo_rating=current_loser_rating, \
-                                                      agent_elo_rating=current_winner_rating, score=loser_score,
-                                                      **calculate_elo_rating_params)
-
-    return id_to_elo_rating
-
-
-def get_ranking_from_elo_rating_dictionary(input_dict, subject_id):
-    """
-    Orders a dictionary of subject ID keys to ELO score values by ELO score.
-    And then gets the rank of the subject with the inputted ID.
-    Lower ranks like 1 would represent those subjects with higher ELO scores and vice versa.
-
-    Args:
-        input_dict(dict):
-            Dictionary of subject ID keys to ELO score values
-        subject_id(str, int, or any value that's a key in input dict):
-            The ID of the subject that you want the ranking of
-
-    Returns:
-        int:
-            Ranking of the subject with the ID inputted
-    """
-    # Sorting the subject ID's by ELO score
-    sorted_subject_to_elo_rating = sorted(input_dict.items(), key=operator.itemgetter(1), reverse=True)
-    # Getting the rank of the subject based on ELO score
-    return [subject_tuple[0] for subject_tuple in sorted_subject_to_elo_rating].index(subject_id) + 1
-
-
-def iterate_elo_rating_calculation_for_dataframe(dataframe, winner_id_column, loser_id_column, tie_column=None,
-                                                 additional_columns=None):
-    """
-    Iterates through a dataframe that has the ID of winners and losers for a given event.
-    A dictionary will be created that contains the information of the event,
-    which can then be turned into a dataframe. Each key is either from winner or loser's perspective.
-
-    Args:
-        dataframe(Pandas DataFrame):
-        winner_id_column(str): The name of the column that has the winner's ID
-        loser_id_column(str): The name of the column that has the loser's ID
-        additional_columns(list): Additional columns to take from the
-
-    Returns:
-        Dict: With a key value pair for each event either from the winner or loser's perspective.
-            This can be turned into a dataframe with each key value pair being a row.
-    """
-    if additional_columns is None:
-        additional_columns = []
-
-    # Dictionary that keeps track of the current Elo rating of the subject
-    id_to_elo_rating = defaultdict(lambda: 1000)
-    # Dictionary that will be converted to a DataFrame
-    index_to_elo_rating_and_meta_data = defaultdict(dict)
-
-    # Indexes that will identify which row the dictionary key value pair will be
-    # The number of the index has no significance other than being the number of the row
-    all_indexes = iter(range(0, 99999))
-
-    # Keeping track of the number of matches
-    total_match_number = 1
-
-    # Making a copy in case there is an error with changing the type of the tie column
-    copied_dataframe = dataframe.copy()
-    # Changing the tie column type to bool
-    # So that we can filter out for booleans including False and 0
-    try:
-        copied_dataframe[tie_column] = copied_dataframe[tie_column].astype(bool)
-    except:
-        copied_dataframe = dataframe.copy()
-
-    for index, row in copied_dataframe.dropna(subset=winner_id_column).iterrows():
-        # Getting the ID of the winner subject
-        winner_id = row[winner_id_column]
-        # Getting the ID of the loser subject
-        loser_id = row[loser_id_column]
-
-        # Getting the current Elo Score
-        current_winner_rating = id_to_elo_rating[winner_id]
-        current_loser_rating = id_to_elo_rating[loser_id]
-
-        if tie_column:
-            # When there is nothing in the tie column
-            # Or when there is a false value indicating that it is not a tie
-            if pd.isna(copied_dataframe[tie_column][index]) or ~(copied_dataframe[tie_column][index]).any():
-                winner_score = 1
-                loser_score = 0
-            # When there is value in the tie column
-            else:
-                winner_score = 0.5
-                loser_score = 0.5
-        # When there is no tie column
-        else:
-            winner_score = 1
-            loser_score = 0
-
-        # Updating the dictionary with ID keys and Elo Score values
-        update_elo_rating(winner_id=winner_id, loser_id=loser_id, id_to_elo_rating=id_to_elo_rating, \
-                          winner_score=winner_score, loser_score=loser_score)
-
-        # Saving all the data for the winner
-        winner_index = next(all_indexes)
-        index_to_elo_rating_and_meta_data[winner_index]["total_match_number"] = total_match_number
-        index_to_elo_rating_and_meta_data[winner_index]["subject_id"] = winner_id
-        index_to_elo_rating_and_meta_data[winner_index]["agent_id"] = loser_id
-        index_to_elo_rating_and_meta_data[winner_index]["original_elo_rating"] = current_winner_rating
-        index_to_elo_rating_and_meta_data[winner_index]["updated_elo_rating"] = id_to_elo_rating[winner_id]
-        index_to_elo_rating_and_meta_data[winner_index]["win_draw_loss"] = winner_score
-        index_to_elo_rating_and_meta_data[winner_index]["subject_ranking"] = get_ranking_from_elo_rating_dictionary(
-            id_to_elo_rating, winner_id)
-        index_to_elo_rating_and_meta_data[winner_index]["agent_ranking"] = get_ranking_from_elo_rating_dictionary(
-            id_to_elo_rating, loser_id)
-        index_to_elo_rating_and_meta_data[winner_index]["pairing_index"] = 0
-        for column in additional_columns:
-            index_to_elo_rating_and_meta_data[winner_index][column] = row[column]
-
-            # Saving all the data for the loser
-        loser_index = next(all_indexes)
-        index_to_elo_rating_and_meta_data[loser_index]["total_match_number"] = total_match_number
-        index_to_elo_rating_and_meta_data[loser_index]["subject_id"] = loser_id
-        index_to_elo_rating_and_meta_data[loser_index]["agent_id"] = winner_id
-        index_to_elo_rating_and_meta_data[loser_index]["original_elo_rating"] = current_loser_rating
-        index_to_elo_rating_and_meta_data[loser_index]["updated_elo_rating"] = id_to_elo_rating[loser_id]
-        index_to_elo_rating_and_meta_data[loser_index]["win_draw_loss"] = loser_score
-        index_to_elo_rating_and_meta_data[loser_index]["subject_ranking"] = get_ranking_from_elo_rating_dictionary(
-            id_to_elo_rating, loser_id)
-        index_to_elo_rating_and_meta_data[loser_index]["agent_ranking"] = get_ranking_from_elo_rating_dictionary(
-            id_to_elo_rating, winner_id)
-        index_to_elo_rating_and_meta_data[loser_index]["pairing_index"] = 1
-        for column in additional_columns:
-            index_to_elo_rating_and_meta_data[loser_index][column] = row[column]
-
-            # Updating the match number
-        total_match_number += 1
-
-    return index_to_elo_rating_and_meta_data

From 786c112ba1913cd2284b5b02425f398c811f7893 Mon Sep 17 00:00:00 2001
From: Chaitra Peddireddy <cpeddireddy@ufl.edu>
Date: Fri, 29 Dec 2023 22:34:32 -0500
Subject: [PATCH 12/15] Flake8 for rank dir

---
 pc_mouseparty/rank/calculation.py | 167 ++++++++++++++++++------------
 1 file changed, 103 insertions(+), 64 deletions(-)

diff --git a/pc_mouseparty/rank/calculation.py b/pc_mouseparty/rank/calculation.py
index 35f175c..ad75b94 100644
--- a/pc_mouseparty/rank/calculation.py
+++ b/pc_mouseparty/rank/calculation.py
@@ -8,19 +8,24 @@
 import pandas as pd
 
 
-def calculate_elo_rating(subject_elo_rating, agent_elo_rating, k_factor=20, score=1, number_of_decimals=1):
+def calculate_elo_rating(subject_elo_rating,
+                         agent_elo_rating,
+                         k_factor=20, score=1,
+                         number_of_decimals=1):
     """
-    Calculates the Elo rating of a given subject given it's original score, it's opponent,
-    the K-Factor, and whether or not it has won or not.
+    Calculates the Elo rating of a given subject given it's original score,
+    it's opponent, the K-Factor, and whether or not it has won or not.
     The calculation is based on: https://www.omnicalculator.com/sports/elo
 
     Args:
         subject_elo_rating(float): The original Elo rating for the subject
         agent_elo_rating(float): The original Elo rating for the agent
         k_factor(int): k-factor, or development coefficient.
-            - It usually takes values between 10 and 40, depending on player's strength
+            - It usually takes values between 10 and 40, depending on
+            player's strength
         score(int): the actual outcome of the game.
-            - In chess, a win counts as 1 point, a draw is equal to 0.5, and a lose gives 0.
+            - In chess, a win counts as 1 point, a draw is equal to 0.5,
+            and a lose gives 0.
         number_of_decimals(int): Number of decimals to round to
 
     Returns:
@@ -34,21 +39,30 @@ def calculate_elo_rating(subject_elo_rating, agent_elo_rating, k_factor=20, scor
     return round(new_elo_rating, number_of_decimals)
 
 
-def update_elo_rating(winner_id, loser_id, id_to_elo_rating=None, default_elo_rating=1000, \
-                      winner_score=1, loser_score=0, **calculate_elo_rating_params):
+def update_elo_rating(winner_id,
+                      loser_id,
+                      id_to_elo_rating=None,
+                      default_elo_rating=1000,
+                      winner_score=1,
+                      loser_score=0,
+                      **calculate_elo_rating_params):
     """
-    Updates the Elo rating in a dictionary that contains the ID of the subject as keys,
-    and the Elo rating as the values. You can also adjust how the Elo rating is calculated with 'calculate_elo_rating_params'.
+    Updates the Elo rating in a dictionary that contains the ID of the subject
+    as keys, and the Elo rating as the values. You can also adjust how the Elo
+    rating is calculated with 'calculate_elo_rating_params'.
 
     Args:
         winner_id(str): ID of the winner
         loser_id(str): ID of the loser
-        id_to_elo_rating(dict): Dict that has the ID of the subjects as keys to the Elo Score as values
-        default_elo_rating(int): The default Elo rating to be used if there is not elo score for the specified ID
-        **calculate_elo_rating_params(kwargs): Other params for the calculate_elo_rating to change how the Elo rating is calculated
+        id_to_elo_rating(dict): Dict that has the ID of the subjects as keys
+            to the Elo Score as values
+        default_elo_rating(int): The default Elo rating to be used if there is
+            not elo score for the specified ID
+        **calculate_elo_rating_params(kwargs): Other params for the
+            calculate_elo_rating to change how the Elo rating is calculated
 
     Returns:
-        Dict: Dict that has the ID of the subjects as keys to the Elo Score as values
+        Dict: Dict that has the subjects IDs (keys) to the Elo Score as vals
     """
     if id_to_elo_rating is None:
         id_to_elo_rating = defaultdict(lambda: default_elo_rating)
@@ -58,12 +72,17 @@ def update_elo_rating(winner_id, loser_id, id_to_elo_rating=None, default_elo_ra
     current_loser_rating = id_to_elo_rating[loser_id]
 
     # Calculating Elo rating
-    id_to_elo_rating[winner_id] = calculate_elo_rating(subject_elo_rating=current_winner_rating, \
-                                                       agent_elo_rating=current_loser_rating, score=winner_score,
-                                                       **calculate_elo_rating_params)
-    id_to_elo_rating[loser_id] = calculate_elo_rating(subject_elo_rating=current_loser_rating, \
-                                                      agent_elo_rating=current_winner_rating, score=loser_score,
-                                                      **calculate_elo_rating_params)
+    id_to_elo_rating[winner_id] = calculate_elo_rating(
+        subject_elo_rating=current_winner_rating,
+        agent_elo_rating=current_loser_rating,
+        score=winner_score,
+        **calculate_elo_rating_params)
+
+    id_to_elo_rating[loser_id] = \
+        calculate_elo_rating(subject_elo_rating=current_loser_rating,
+                             agent_elo_rating=current_winner_rating,
+                             score=loser_score,
+                             **calculate_elo_rating_params)
 
     return id_to_elo_rating
 
@@ -72,7 +91,8 @@ def get_ranking_from_elo_rating_dictionary(input_dict, subject_id):
     """
     Orders a dictionary of subject ID keys to ELO score values by ELO score.
     And then gets the rank of the subject with the inputted ID.
-    Lower ranks like 1 would represent those subjects with higher ELO scores and vice versa.
+    Lower ranks like 1 would represent those subjects with higher ELO scores
+    and vice versa.
 
     Args:
         input_dict(dict):
@@ -85,17 +105,26 @@ def get_ranking_from_elo_rating_dictionary(input_dict, subject_id):
             Ranking of the subject with the ID inputted
     """
     # Sorting the subject ID's by ELO score
-    sorted_subject_to_elo_rating = sorted(input_dict.items(), key=operator.itemgetter(1), reverse=True)
+    sorted_elo = sorted(input_dict.items(),
+                        key=operator.itemgetter(1),
+                        reverse=True)
     # Getting the rank of the subject based on ELO score
-    return [subject_tuple[0] for subject_tuple in sorted_subject_to_elo_rating].index(subject_id) + 1
+    rank = [subject_tuple[0] for subject_tuple in sorted_elo].index(subject_id)
+    rank += 1
+    return rank
 
 
-def iterate_elo_rating_calculation_for_dataframe(dataframe, winner_id_column, loser_id_column, tie_column=None,
+def iterate_elo_rating_calculation_for_dataframe(dataframe,
+                                                 winner_id_column,
+                                                 loser_id_column,
+                                                 tie_column=None,
                                                  additional_columns=None):
     """
-    Iterates through a dataframe that has the ID of winners and losers for a given event.
+    Iterates through a dataframe that has the ID of winners and losers for
+    a given event.
     A dictionary will be created that contains the information of the event,
-    which can then be turned into a dataframe. Each key is either from winner or loser's perspective.
+    which can then be turned into a dataframe. Each key is either from winner
+    or loser's perspective.
 
     Args:
         dataframe(Pandas DataFrame):
@@ -104,8 +133,10 @@ def iterate_elo_rating_calculation_for_dataframe(dataframe, winner_id_column, lo
         additional_columns(list): Additional columns to take from the
 
     Returns:
-        Dict: With a key value pair for each event either from the winner or loser's perspective.
-            This can be turned into a dataframe with each key value pair being a row.
+        Dict: With a key value pair for each event either from the winner or
+        loser's perspective.
+            This can be turned into a dataframe with each key value pair being
+            a row.
     """
     if additional_columns is None:
         additional_columns = []
@@ -113,29 +144,30 @@ def iterate_elo_rating_calculation_for_dataframe(dataframe, winner_id_column, lo
     # Dictionary that keeps track of the current Elo rating of the subject
     id_to_elo_rating = defaultdict(lambda: 1000)
     # Dictionary that will be converted to a DataFrame
-    index_to_elo_rating_and_meta_data = defaultdict(dict)
+    elo_metadata = defaultdict(dict)
 
-    # Indexes that will identify which row the dictionary key value pair will be
-    # The number of the index has no significance other than being the number of the row
+    # Indexes that will identify which row the dictionary key value pair
+    # The num of index has no significance other than being the number of row
     all_indexes = iter(range(0, 99999))
 
     # Keeping track of the number of matches
     total_match_number = 1
 
-    # Making a copy in case there is an error with changing the type of the tie column
+    # Making a copy in case there is an error with changing the type of the tie
     copied_dataframe = dataframe.copy()
     # Changing the tie column type to bool
     # So that we can filter out for booleans including False and 0
     try:
-        copied_dataframe[tie_column] = copied_dataframe[tie_column].astype(bool)
-    except:
+        copied_dataframe[tie_column] = \
+            copied_dataframe[tie_column].astype(bool)
+    except KeyError:
         copied_dataframe = dataframe.copy()
 
-    for index, row in copied_dataframe.dropna(subset=winner_id_column).iterrows():
+    for idx, rw in copied_dataframe.dropna(subset=winner_id_column).iterrows():
         # Getting the ID of the winner subject
-        winner_id = row[winner_id_column]
+        winner_id = rw[winner_id_column]
         # Getting the ID of the loser subject
-        loser_id = row[loser_id_column]
+        loser_id = rw[loser_id_column]
 
         # Getting the current Elo Score
         current_winner_rating = id_to_elo_rating[winner_id]
@@ -144,7 +176,8 @@ def iterate_elo_rating_calculation_for_dataframe(dataframe, winner_id_column, lo
         if tie_column:
             # When there is nothing in the tie column
             # Or when there is a false value indicating that it is not a tie
-            if pd.isna(copied_dataframe[tie_column][index]) or ~(copied_dataframe[tie_column][index]).any():
+            if (pd.isna(copied_dataframe[tie_column][idx]) or
+                    ~(copied_dataframe[tie_column][idx]).any()):
                 winner_score = 1
                 loser_score = 0
             # When there is value in the tie column
@@ -157,42 +190,48 @@ def iterate_elo_rating_calculation_for_dataframe(dataframe, winner_id_column, lo
             loser_score = 0
 
         # Updating the dictionary with ID keys and Elo Score values
-        update_elo_rating(winner_id=winner_id, loser_id=loser_id, id_to_elo_rating=id_to_elo_rating, \
-                          winner_score=winner_score, loser_score=loser_score)
+        update_elo_rating(winner_id=winner_id,
+                          loser_id=loser_id,
+                          id_to_elo_rating=id_to_elo_rating,
+                          winner_score=winner_score,
+                          loser_score=loser_score)
 
         # Saving all the data for the winner
         winner_index = next(all_indexes)
-        index_to_elo_rating_and_meta_data[winner_index]["total_match_number"] = total_match_number
-        index_to_elo_rating_and_meta_data[winner_index]["subject_id"] = winner_id
-        index_to_elo_rating_and_meta_data[winner_index]["agent_id"] = loser_id
-        index_to_elo_rating_and_meta_data[winner_index]["original_elo_rating"] = current_winner_rating
-        index_to_elo_rating_and_meta_data[winner_index]["updated_elo_rating"] = id_to_elo_rating[winner_id]
-        index_to_elo_rating_and_meta_data[winner_index]["win_draw_loss"] = winner_score
-        index_to_elo_rating_and_meta_data[winner_index]["subject_ranking"] = get_ranking_from_elo_rating_dictionary(
-            id_to_elo_rating, winner_id)
-        index_to_elo_rating_and_meta_data[winner_index]["agent_ranking"] = get_ranking_from_elo_rating_dictionary(
-            id_to_elo_rating, loser_id)
-        index_to_elo_rating_and_meta_data[winner_index]["pairing_index"] = 0
+        elo_metadata[winner_index]["total_match_number"] = total_match_number
+        elo_metadata[winner_index]["subject_id"] = winner_id
+        elo_metadata[winner_index]["agent_id"] = loser_id
+        elo_metadata[winner_index]["original_elo_rating"] = \
+            current_winner_rating
+        elo_metadata[winner_index]["updated_elo_rating"] = \
+            id_to_elo_rating[winner_id]
+        elo_metadata[winner_index]["win_draw_loss"] = winner_score
+        elo_metadata[winner_index]["subject_ranking"] = \
+            get_ranking_from_elo_rating_dictionary(id_to_elo_rating, winner_id)
+        elo_metadata[winner_index]["agent_ranking"] = \
+            get_ranking_from_elo_rating_dictionary(id_to_elo_rating, loser_id)
+        elo_metadata[winner_index]["pairing_index"] = 0
         for column in additional_columns:
-            index_to_elo_rating_and_meta_data[winner_index][column] = row[column]
+            elo_metadata[winner_index][column] = rw[column]
 
             # Saving all the data for the loser
         loser_index = next(all_indexes)
-        index_to_elo_rating_and_meta_data[loser_index]["total_match_number"] = total_match_number
-        index_to_elo_rating_and_meta_data[loser_index]["subject_id"] = loser_id
-        index_to_elo_rating_and_meta_data[loser_index]["agent_id"] = winner_id
-        index_to_elo_rating_and_meta_data[loser_index]["original_elo_rating"] = current_loser_rating
-        index_to_elo_rating_and_meta_data[loser_index]["updated_elo_rating"] = id_to_elo_rating[loser_id]
-        index_to_elo_rating_and_meta_data[loser_index]["win_draw_loss"] = loser_score
-        index_to_elo_rating_and_meta_data[loser_index]["subject_ranking"] = get_ranking_from_elo_rating_dictionary(
-            id_to_elo_rating, loser_id)
-        index_to_elo_rating_and_meta_data[loser_index]["agent_ranking"] = get_ranking_from_elo_rating_dictionary(
-            id_to_elo_rating, winner_id)
-        index_to_elo_rating_and_meta_data[loser_index]["pairing_index"] = 1
+        elo_metadata[loser_index]["total_match_number"] = total_match_number
+        elo_metadata[loser_index]["subject_id"] = loser_id
+        elo_metadata[loser_index]["agent_id"] = winner_id
+        elo_metadata[loser_index]["original_elo_rating"] = current_loser_rating
+        elo_metadata[loser_index]["updated_elo_rating"] = \
+            id_to_elo_rating[loser_id]
+        elo_metadata[loser_index]["win_draw_loss"] = loser_score
+        elo_metadata[loser_index]["subject_ranking"] = \
+            get_ranking_from_elo_rating_dictionary(id_to_elo_rating, loser_id)
+        elo_metadata[loser_index]["agent_ranking"] = \
+            get_ranking_from_elo_rating_dictionary(id_to_elo_rating, winner_id)
+        elo_metadata[loser_index]["pairing_index"] = 1
         for column in additional_columns:
-            index_to_elo_rating_and_meta_data[loser_index][column] = row[column]
+            elo_metadata[loser_index][column] = rw[column]
 
             # Updating the match number
         total_match_number += 1
 
-    return index_to_elo_rating_and_meta_data
+    return elo_metadata

From 8bc4dac12a32b80975b444af4f28bce0d2dd61ba Mon Sep 17 00:00:00 2001
From: Chaitra Peddireddy <cpeddireddy@ufl.edu>
Date: Fri, 29 Dec 2023 22:40:42 -0500
Subject: [PATCH 13/15] Fixed import error

---
 pc_mouseparty/rank/elo_score.py | 265 ++++++++++++++++----------------
 1 file changed, 133 insertions(+), 132 deletions(-)

diff --git a/pc_mouseparty/rank/elo_score.py b/pc_mouseparty/rank/elo_score.py
index 732f6a3..7bb3162 100644
--- a/pc_mouseparty/rank/elo_score.py
+++ b/pc_mouseparty/rank/elo_score.py
@@ -223,145 +223,146 @@ def __reward_competition(df, cohort, output_dir, plot_flag=True):
 
     return None
 
+
 def __process(df, protocol, cohort, sheet, output_dir, plot_flag=True):
-        """
-        This private function takes in a dataframe and processes the elo score
-        for home_cage_observation, urine_marking, or test_tube protocols
-        Args (6 total, 5 required):
-            df (pandas dataframe): dataframe to be processed
-            protocol (str): protocol name
-            cohort (str): cohort name
-            sheet (str): sheet name
-            output_dir (str): path to output directory
-            plot_flag (bool): flag to plot data, default True
-        Return(None):
-            None
-        """
-        # Initializing column names
-
-        find_col_names = df[df.apply(
-            lambda row: 'winner' in row.values, axis=1)]
-
-        if not find_col_names.empty:
-            df.columns = find_col_names.iloc[0]
-            df = df[df.index != find_col_names.index[0]]
-
-        # check if there is a cage number col
-        mode_cage = None
-        cage_num = False
-        # finding column names for winner, loser, and tie
-        winner_col, tie_col, loser_col = None, None, None
-        for col in df.columns.tolist():
-            if "cage" in col.lower():
-                # filling all cage values with mode
-                mode_cage = df['cage #'].mode().iloc[0]
-                df['cage#'] = mode_cage
-                cage_num = True
-            if "winner" in col.lower():
-                winner_col = col
-            if "loser" in col.lower():
-                loser_col = col
-            if "tie" in col.lower():
-                tie_col = col
-
-        if not winner_col or not loser_col:
-            print("Winner or Loser column not found")
+    """
+    This private function takes in a dataframe and processes the elo score
+    for home_cage_observation, urine_marking, or test_tube protocols
+    Args (6 total, 5 required):
+        df (pandas dataframe): dataframe to be processed
+        protocol (str): protocol name
+        cohort (str): cohort name
+        sheet (str): sheet name
+        output_dir (str): path to output directory
+        plot_flag (bool): flag to plot data, default True
+    Return(None):
+        None
+    """
+    # Initializing column names
+    find_col_names = df[df.apply(
+        lambda row: 'winner' in row.values, axis=1)]
+
+    if not find_col_names.empty:
+        df.columns = find_col_names.iloc[0]
+        df = df[df.index != find_col_names.index[0]]
+
+    # check if there is a cage number col
+    mode_cage = None
+    cage_num = False
+    # finding column names for winner, loser, and tie
+    winner_col, tie_col, loser_col = None, None, None
+    for col in df.columns.tolist():
+        if "cage" in col.lower():
+            # filling all cage values with mode
+            mode_cage = df['cage #'].mode().iloc[0]
+            df['cage#'] = mode_cage
+            cage_num = True
+        if "winner" in col.lower():
+            winner_col = col
+        if "loser" in col.lower():
+            loser_col = col
+        if "tie" in col.lower():
+            tie_col = col
+
+    if not winner_col or not loser_col:
+        print("Winner or Loser column not found")
+        return None
+
+    if not cage_num:
+        try:
+            new_sheet_name = sheet.lower().replace("cage", "")
+            mode_cage = int(new_sheet_name)
+            df['cage#'] = mode_cage
+        except ValueError:
+            print("Cage# cannot be determined")
             return None
 
-        if not cage_num:
-            try:
-                new_sheet_name = sheet.lower().replace("cage", "")
-                mode_cage = int(new_sheet_name)
-                df['cage#'] = mode_cage
-            except ValueError:
-                print("Cage# cannot be determined")
-                return None
-
-        # drop cols if winner & loss is NaN
-        df = df.dropna(subset=['winner', 'loser'], how='all')
-
-        # Autofill dates
-        df['date'] = pd.to_datetime(df['date'], errors='coerce')
-        df['date'].fillna(method='ffill', inplace=True)
-
-        # Identify sessions based on date values
-        df['session_number_difference'] = 0
-        previous_date = None
-        for index, row in df.iterrows():
-            current_date = row['date']
-            # check for session change
-            if not previous_date:
-                df.at[index, 'session_number_difference'] = 1
-            elif previous_date is not None and current_date != previous_date:
-                df.at[index, 'session_number_difference'] = 1
-            previous_date = current_date
-        # Elo Score from calculation.py
-        if tie_col:
-            df[tie_col] = df[tie_col].notna()
-
-        elo_calc = calculation.iterate_elo_rating_calculation_for_dataframe(
-            dataframe=df, winner_id_column=winner_col,
-            loser_id_column=loser_col,
-            tie_column=tie_col
-        )
-        elo_df = pd.DataFrame.from_dict(elo_calc, orient='index')
-        elo_df.groupby("subject_id").count()
-
-        cage_to_strain = {}
-        if cage_to_strain:
-            elo_df["subject_strain"] = \
-                elo_df["cage_num_of_subject"].map(cage_to_strain)
-            elo_df["agent_strain"] = \
-                elo_df["cage_num_of_agent"].map(cage_to_strain)
-        elo_df["experiment_type"] = protocol
-        elo_df["cohort"] = cohort
-
-        if not os.path.exists(output_dir):
-            os.makedirs(output_dir)
-
-        if plot_flag:
-            max_elo_rating = elo_df["updated_elo_rating"].max()
-            min_elo_rating = elo_df["updated_elo_rating"].min()
-
-            plt.rcParams["figure.figsize"] = (13.5, 7.5)
-            fig, ax = plt.subplots()
+    # drop cols if winner & loss is NaN
+    df = df.dropna(subset=['winner', 'loser'], how='all')
+
+    # Autofill dates
+    df['date'] = pd.to_datetime(df['date'], errors='coerce')
+    df['date'].fillna(method='ffill', inplace=True)
+
+    # Identify sessions based on date values
+    df['session_number_difference'] = 0
+    previous_date = None
+    for index, row in df.iterrows():
+        current_date = row['date']
+        # check for session change
+        if not previous_date:
+            df.at[index, 'session_number_difference'] = 1
+        elif previous_date is not None and current_date != previous_date:
+            df.at[index, 'session_number_difference'] = 1
+        previous_date = current_date
+    # Elo Score from calculation.py
+    if tie_col:
+        df[tie_col] = df[tie_col].notna()
+
+    elo_calc = calculation.iterate_elo_rating_calculation_for_dataframe(
+        dataframe=df, winner_id_column=winner_col,
+        loser_id_column=loser_col,
+        tie_column=tie_col
+    )
+    elo_df = pd.DataFrame.from_dict(elo_calc, orient='index')
+    elo_df.groupby("subject_id").count()
 
-            # adjusting session number difference
-            col = "session_number_difference"
-            elo_df[col] = df[col].repeat(2).reset_index(drop=True)
+    cage_to_strain = {}
+    if cage_to_strain:
+        elo_df["subject_strain"] = \
+            elo_df["cage_num_of_subject"].map(cage_to_strain)
+        elo_df["agent_strain"] = \
+            elo_df["cage_num_of_agent"].map(cage_to_strain)
+    elo_df["experiment_type"] = protocol
+    elo_df["cohort"] = cohort
 
-            for index, row in elo_df[elo_df[col].astype(bool)].iterrows():
-                # Offsetting by 0.5 to avoid drawing the line on the dot
-                # Drawing the lines above the max and below the minimum
-                plt.vlines(x=[row["total_match_number"] - 0.5],
-                           ymin=min_elo_rating - 50,
-                           ymax=max_elo_rating + 50,
-                           colors='black',
-                           linestyle='dashed')
-            for subject in sorted(elo_df["subject_id"].unique()):
-                # Getting all the rows with the current subject
-                subject_dataframe = elo_df[elo_df["subject_id"] == subject]
-                # Making the current match number the X-Axis
-                plt.plot(subject_dataframe["total_match_number"],
-                         subject_dataframe["updated_elo_rating"],
-                         '-o',
-                         label=subject)
-                # plt.show()
-            ax.set_xlabel("Trial Number")
-            ax.set_ylabel("Elo rating")
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
 
-            tite = "{} Elo Rating for {} {}".format(protocol,
-                                                    cohort,
-                                                    "Cage #" + str(mode_cage))
-            ax.set_title(tite)
-            ax.legend(loc="upper left")
-            plt.ylim(min_elo_rating - 50, max_elo_rating + 50)
-            file_name = protocol + "_cage" + str(mode_cage) + ".png"
-            fig.savefig(os.path.join(output_dir, file_name))
+    if plot_flag:
+        max_elo_rating = elo_df["updated_elo_rating"].max()
+        min_elo_rating = elo_df["updated_elo_rating"].min()
+
+        plt.rcParams["figure.figsize"] = (13.5, 7.5)
+        fig, ax = plt.subplots()
+
+        # adjusting session number difference
+        col = "session_number_difference"
+        elo_df[col] = df[col].repeat(2).reset_index(drop=True)
+
+        for index, row in elo_df[elo_df[col].astype(bool)].iterrows():
+            # Offsetting by 0.5 to avoid drawing the line on the dot
+            # Drawing the lines above the max and below the minimum
+            plt.vlines(x=[row["total_match_number"] - 0.5],
+                       ymin=min_elo_rating - 50,
+                       ymax=max_elo_rating + 50,
+                       colors='black',
+                       linestyle='dashed')
+        for subject in sorted(elo_df["subject_id"].unique()):
+            # Getting all the rows with the current subject
+            subject_dataframe = elo_df[elo_df["subject_id"] == subject]
+            # Making the current match number the X-Axis
+            plt.plot(subject_dataframe["total_match_number"],
+                     subject_dataframe["updated_elo_rating"],
+                     '-o',
+                     label=subject)
+            # plt.show()
+        ax.set_xlabel("Trial Number")
+        ax.set_ylabel("Elo rating")
+
+        tite = "{} Elo Rating for {} {}".format(protocol,
+                                                cohort,
+                                                "Cage #" + str(mode_cage))
+        ax.set_title(tite)
+        ax.legend(loc="upper left")
+        plt.ylim(min_elo_rating - 50, max_elo_rating + 50)
+        file_name = protocol + "_cage" + str(mode_cage) + ".png"
+        fig.savefig(os.path.join(output_dir, file_name))
+
+    # Saving df csv to output dir
+    file_name = protocol + "_cage" + str(mode_cage) + ".csv"
+    elo_df.to_csv(os.path.join(output_dir, file_name), index=False)
 
-        # Saving df csv to output dir
-        file_name = protocol + "_cage" + str(mode_cage) + ".csv"
-        elo_df.to_csv(os.path.join(output_dir, file_name), index=False)
 
 def generate_elo_scores(file_info, output_dir, plot_flag=True):
     """

From 05e2d1ca0ca8abc5e13f7c105225b7be69aedd83 Mon Sep 17 00:00:00 2001
From: Chaitra Peddireddy <cpeddireddy@ufl.edu>
Date: Fri, 29 Dec 2023 22:46:03 -0500
Subject: [PATCH 14/15] Fix Deepsourc py errors

---
 pc_mouseparty/rank/calculation.py | 14 +++++++++++---
 pc_mouseparty/rank/elo_score.py   |  1 -
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/pc_mouseparty/rank/calculation.py b/pc_mouseparty/rank/calculation.py
index ad75b94..7008f0f 100644
--- a/pc_mouseparty/rank/calculation.py
+++ b/pc_mouseparty/rank/calculation.py
@@ -197,7 +197,11 @@ def iterate_elo_rating_calculation_for_dataframe(dataframe,
                           loser_score=loser_score)
 
         # Saving all the data for the winner
-        winner_index = next(all_indexes)
+        try: 
+            winner_index = next(all_indexes)
+        except StopIteration:
+            print("There are more than 99999 rows in the dataframe. ")
+            continue
         elo_metadata[winner_index]["total_match_number"] = total_match_number
         elo_metadata[winner_index]["subject_id"] = winner_id
         elo_metadata[winner_index]["agent_id"] = loser_id
@@ -214,8 +218,12 @@ def iterate_elo_rating_calculation_for_dataframe(dataframe,
         for column in additional_columns:
             elo_metadata[winner_index][column] = rw[column]
 
-            # Saving all the data for the loser
-        loser_index = next(all_indexes)
+        # Saving all the data for the loser
+        try:
+            loser_index = next(all_indexes)
+        except StopIteration:
+            print("There are more than 99999 rows in the dataframe. ")
+            continue
         elo_metadata[loser_index]["total_match_number"] = total_match_number
         elo_metadata[loser_index]["subject_id"] = loser_id
         elo_metadata[loser_index]["agent_id"] = winner_id
diff --git a/pc_mouseparty/rank/elo_score.py b/pc_mouseparty/rank/elo_score.py
index 7bb3162..9cde9e0 100644
--- a/pc_mouseparty/rank/elo_score.py
+++ b/pc_mouseparty/rank/elo_score.py
@@ -346,7 +346,6 @@ def __process(df, protocol, cohort, sheet, output_dir, plot_flag=True):
                      subject_dataframe["updated_elo_rating"],
                      '-o',
                      label=subject)
-            # plt.show()
         ax.set_xlabel("Trial Number")
         ax.set_ylabel("Elo rating")
 

From 667da74ce4b4d4fd14dcd2167d34b7369315562c Mon Sep 17 00:00:00 2001
From: Chaitra Peddireddy <cpeddireddy@ufl.edu>
Date: Fri, 29 Dec 2023 22:50:43 -0500
Subject: [PATCH 15/15] fix deepsource py suggestions

---
 pc_mouseparty/rank/calculation.py | 2 +-
 pc_mouseparty/rank/elo_score.py   | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/pc_mouseparty/rank/calculation.py b/pc_mouseparty/rank/calculation.py
index 7008f0f..e04daf6 100644
--- a/pc_mouseparty/rank/calculation.py
+++ b/pc_mouseparty/rank/calculation.py
@@ -197,7 +197,7 @@ def iterate_elo_rating_calculation_for_dataframe(dataframe,
                           loser_score=loser_score)
 
         # Saving all the data for the winner
-        try: 
+        try:
             winner_index = next(all_indexes)
         except StopIteration:
             print("There are more than 99999 rows in the dataframe. ")
diff --git a/pc_mouseparty/rank/elo_score.py b/pc_mouseparty/rank/elo_score.py
index 9cde9e0..e419c75 100644
--- a/pc_mouseparty/rank/elo_score.py
+++ b/pc_mouseparty/rank/elo_score.py
@@ -349,10 +349,9 @@ def __process(df, protocol, cohort, sheet, output_dir, plot_flag=True):
         ax.set_xlabel("Trial Number")
         ax.set_ylabel("Elo rating")
 
-        tite = "{} Elo Rating for {} {}".format(protocol,
-                                                cohort,
-                                                "Cage #" + str(mode_cage))
-        ax.set_title(tite)
+        title = f"{protocol} Elo Rating for {cohort} Cage #{mode_cage}"
+
+        ax.set_title(title)
         ax.legend(loc="upper left")
         plt.ylim(min_elo_rating - 50, max_elo_rating + 50)
         file_name = protocol + "_cage" + str(mode_cage) + ".png"