From 8284a72fd649a23f58c510c3e7641eff40d29cbe Mon Sep 17 00:00:00 2001 From: Chris <53177842+ChristopherMarais@users.noreply.github.com> Date: Tue, 15 Aug 2023 12:57:30 -0400 Subject: [PATCH 01/15] removed dropped module for docs --- docs/pc_mouseparty.md | 4 ---- mkdocs.yml | 1 - 2 files changed, 5 deletions(-) delete mode 100644 docs/pc_mouseparty.md diff --git a/docs/pc_mouseparty.md b/docs/pc_mouseparty.md deleted file mode 100644 index 9b9e42a..0000000 --- a/docs/pc_mouseparty.md +++ /dev/null @@ -1,4 +0,0 @@ - -# pc_mouseparty module - -::: pc_mouseparty.pc_mouseparty \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 98dac36..bd55272 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -82,5 +82,4 @@ nav: - Examples: - examples/intro.ipynb - API Reference: - - pc_mouseparty module: pc_mouseparty.md - test_functions module: test_functions.md From 6e233dede3f132db36b4c93c63161b5dd779d71e Mon Sep 17 00:00:00 2001 From: mcum96 Date: Fri, 18 Aug 2023 14:27:55 -0400 Subject: [PATCH 02/15] Added 3 functions to extract bouts from boris excels --- docs/pc_mouseparty.md | 58 +++++++++- .../vid_behavior/boris_extraction.py | 102 ++++++++++++++++++ 2 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 pc_mouseparty/vid_behavior/boris_extraction.py diff --git a/docs/pc_mouseparty.md b/docs/pc_mouseparty.md index 9b9e42a..4f62ecc 100644 --- a/docs/pc_mouseparty.md +++ b/docs/pc_mouseparty.md @@ -1,4 +1,58 @@ - + # pc_mouseparty module -::: pc_mouseparty.pc_mouseparty \ No newline at end of file +::: pc_mouseparty.pc_mouseparty + +:::pc_mouseparty.pc_mouseparty.vid_behavior.boris_extraction + +function: threshold_bouts(start_stop_array, min_iti, min_bout): + """ + thresholds behavior bouts + by combining behavior bouts with interbout intervals of < min_iti + and then removing remaining bouts of < min_bout + + Args (3 total): + start_stop_array: numpy array of dim (# of bouts, 2) + min_iti: float, min interbout interval in seconds + min_bout: float, min bout length in seconds + + Returns (1): + start_stop_array: numpy array (ndim=(n bouts, 2)) + of start&stop times (ms) + """ + +function get_behavior_bouts(boris_df, subject, behavior, min_iti=0, min_bout=0): + """ + extracts behavior bout start and stop times from a boris df + thresholds individually by subject and behavior + returns start_stop_array ordered by start values + + Args (5 total, 3 required): + boris_df: pandas dataframe of a boris file (aggregated event table) + subject: list of strings, desired subject(s) (as written in boris_df) + behavior: list of strings, desired behavior(s) (as written in boris_df) + min_iti: float, default=0, bouts w/ itis(s) < min_iti will be combined + min_bout: float, default=0, bouts < min_bout(s) will be deleted + + Returns (1): + numpy array (ndim=(n bouts, 2)) of start&stop times (ms) + +function save_behavior_bouts(directory, boris_df, subject, behavior, min_bout=0, + min_iti=0, filename=None): + """ + saves a numpy array of start&stop times (ms) + as filename: subject_behavior_bouts.npy + + Args (7 total, 4 required): + directory: path to folder where filename.npy will be saved + path format: './folder/folder/' + boris_df: pandas dataframe of a boris file (aggregated event table) + subject: list of strings, desired subjects (as written in boris_df) + behavior: list of strings, desired behaviors (as written in boris_df) + min_iti: float, default=0, bouts w/ itis(s) < min_iti will be combined + min_bout: float, default=0, bouts < min_bouts(s) will be deleted + filename: string, default=None, must end in .npy + + Returns: + none + """ \ No newline at end of file diff --git a/pc_mouseparty/vid_behavior/boris_extraction.py b/pc_mouseparty/vid_behavior/boris_extraction.py new file mode 100644 index 0000000..99f862f --- /dev/null +++ b/pc_mouseparty/vid_behavior/boris_extraction.py @@ -0,0 +1,102 @@ + +import numpy as np + + +def threshold_bouts(start_stop_array, min_iti, min_bout): + """ + thresholds behavior bouts + by combining behavior bouts with interbout intervals of < min_iti + and then removing remaining bouts of < min_bout + + Args (3 total): + start_stop_array: numpy array of dim (# of bouts, 2) + min_iti: float, min interbout interval in seconds + min_bout: float, min bout length in seconds + + Returns (1): + start_stop_array: numpy array (ndim=(n bouts, 2)) + of start&stop times (ms) + """ + + start_stop_array = np.sort(start_stop_array.flatten()) + times_to_delete = [] + if min_iti > 0: + for i in range(1, len(start_stop_array)-1, 2): + if (start_stop_array[i+1] - start_stop_array[i]) < min_iti: + times_to_delete.extend([i, i+1]) + start_stop_array = np.delete(start_stop_array, times_to_delete) + bouts_to_delete = [] + if min_bout > 0: + for i in range(0, len(start_stop_array)-1, 2): + if start_stop_array[i+1] - start_stop_array[i] < min_bout: + bouts_to_delete.extend([i, i+1]) + start_stop_array = np.delete(start_stop_array, bouts_to_delete) + no_bouts = len(start_stop_array)/2 + start_stop_array = np.reshape(start_stop_array, (int(no_bouts), 2)) + + return start_stop_array + + +def get_behavior_bouts(boris_df, subject, behavior, min_iti=0, min_bout=0): + """ + extracts behavior bout start and stop times from a boris df + thresholds individually by subject and behavior + returns start_stop_array ordered by start values + + Args (5 total, 3 required): + boris_df: pandas dataframe of a boris file (aggregated event table) + subject: list of strings, desired subject(s) (as written in boris_df) + behavior: list of strings, desired behavior(s) (as written in boris_df) + min_iti: float, default=0, bouts w/ itis(s) < min_iti will be combined + min_bout: float, default=0, bouts < min_bout(s) will be deleted + + Returns (1): + numpy array (ndim=(n bouts, 2)) of start&stop times (ms) + """ + start_stop_arrays = [] + for mouse in subject: + subject_df = boris_df[boris_df['Subject'] == mouse] + for act in behavior: + behavior_df = subject_df[subject_df['Behavior'] == act] + start_stop_array = behavior_df[['Start (s)', + 'Stop (s)']].to_numpy() + start_stop_arrays.append(threshold_bouts(start_stop_array, + min_bout, min_iti)) + start_stop_array = np.concatenate(start_stop_arrays) + organizer = np.argsort(start_stop_array[:, 0]) + start_stop_array = start_stop_array[organizer] + + return start_stop_array * 1000 + + +def save_behavior_bouts(directory, boris_df, subject, behavior, min_bout=0, + min_iti=0, filename=None): + """ + saves a numpy array of start&stop times (ms) + as filename: subject_behavior_bouts.npy + + Args (7 total, 4 required): + directory: path to folder where filename.npy will be saved + path format: './folder/folder/' + boris_df: pandas dataframe of a boris file (aggregated event table) + subject: list of strings, desired subjects (as written in boris_df) + behavior: list of strings, desired behaviors (as written in boris_df) + min_iti: float, default=0, bouts w/ itis(s) < min_iti will be combined + min_bout: float, default=0, bouts < min_bouts(s) will be deleted + filename: string, default=None, must end in .npy + + Returns: + none + """ + bouts_array = get_behavior_bouts(boris_df, subject, + behavior, min_bout, min_iti) + if filename is None: + if type(subject) == list: + subject = '_'.join(subject) + if type(behavior) == list: + behavior = '_'.join(behavior) + subject = subject.replace(" ", "") + behavior = behavior.replace(" ", "") + filename = f"{subject}_{behavior}_bouts.npy" + + np.save(directory+filename, bouts_array) From 66bf52c7d7b3ecee3837e7a1bfe7473e0c9ec6dc Mon Sep 17 00:00:00 2001 From: Chris <53177842+ChristopherMarais@users.noreply.github.com> Date: Fri, 25 Aug 2023 10:12:31 -0400 Subject: [PATCH 03/15] added basic deepnote function --- pc_mouseparty/medpc/medpc_extraction.py | 70 +++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 pc_mouseparty/medpc/medpc_extraction.py diff --git a/pc_mouseparty/medpc/medpc_extraction.py b/pc_mouseparty/medpc/medpc_extraction.py new file mode 100644 index 0000000..7f78bac --- /dev/null +++ b/pc_mouseparty/medpc/medpc_extraction.py @@ -0,0 +1,70 @@ +import re +import pandas as pd + +def medpc_txt2df(text_file_path): + """ + docstring + """ + # Open the medpc text file + # with open(text_file_path, "r") as file: # use this for package + with open(text_file_path.name) as file: # use this for gradio app + medpc_txt_file = file.read() + + # split the file with each new line an element in a list + medpc_txt_file_lst = medpc_txt_file.split('\n') + + # remove all empty elements in the list + medpc_txt_file_lst = list(filter(None, medpc_txt_file_lst)) + + # add medpc output vectors to lists + result = [] + temp = [] + for item in medpc_txt_file_lst: + # add values taht comeafter ":" to a list as floats + if re.search(r'^\s*\d+:\s+', item): + temp.append(item) + else: + if temp: + floats = [float(x) for x in re.findall(r'\d+\.\d+', ''.join(temp))] + result.append(floats) + temp = [] + result.append(item) + if temp: + floats = [float(x) for x in re.findall(r'\d+\.\d+', ''.join(temp))] + result.append(floats) + + # convert the list of lists and strings to + # a dictionary with everything before ":" + # as a key and everything after as the value + result_dict = {} + for item in result: + if ':' in item: + index = item.index(':') + key = item[:index] + value = item[index+1:].strip() + if not value: + value = result[result.index(item)+1] + result_dict[key] = value + elif type(item) == str: + result_dict[item] = [] + + # convert the dictionary to a dataframe + # values are of unequal length + # convert all values to lists + pd_series_lst = [] + for i,j in result_dict.items(): + if type(j) != list: + result_dict[i] = [j] + else: + result_dict[i] = j + pd_series_lst.append(pd.Series(j)) + + # add list to dataframe + df = pd.concat(pd_series_lst, axis=1) + df.columns = result_dict.keys() + df.to_csv("medpc_converted_file.csv") + + return( + # df.head(5).to_html(), + "medpc_converted_file.csv" + ) \ No newline at end of file From 8493923e3f8c4e65d64de2411d629946c30db426 Mon Sep 17 00:00:00 2001 From: Chris <53177842+ChristopherMarais@users.noreply.github.com> Date: Fri, 25 Aug 2023 20:21:52 -0400 Subject: [PATCH 04/15] formatting code --- pc_mouseparty/medpc/medpc_extraction.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/pc_mouseparty/medpc/medpc_extraction.py b/pc_mouseparty/medpc/medpc_extraction.py index 7f78bac..88f4414 100644 --- a/pc_mouseparty/medpc/medpc_extraction.py +++ b/pc_mouseparty/medpc/medpc_extraction.py @@ -1,18 +1,19 @@ import re import pandas as pd + def medpc_txt2df(text_file_path): """ docstring """ # Open the medpc text file # with open(text_file_path, "r") as file: # use this for package - with open(text_file_path.name) as file: # use this for gradio app + with open(text_file_path.name) as file: # use this for gradio app medpc_txt_file = file.read() - + # split the file with each new line an element in a list medpc_txt_file_lst = medpc_txt_file.split('\n') - + # remove all empty elements in the list medpc_txt_file_lst = list(filter(None, medpc_txt_file_lst)) @@ -25,12 +26,14 @@ def medpc_txt2df(text_file_path): temp.append(item) else: if temp: - floats = [float(x) for x in re.findall(r'\d+\.\d+', ''.join(temp))] + floats = [float(x) for x in re.findall(r'\d+\.\d+', + ''.join(temp))] result.append(floats) temp = [] result.append(item) if temp: - floats = [float(x) for x in re.findall(r'\d+\.\d+', ''.join(temp))] + floats = [float(x) for x in re.findall(r'\d+\.\d+', + ''.join(temp))] result.append(floats) # convert the list of lists and strings to @@ -52,9 +55,9 @@ def medpc_txt2df(text_file_path): # values are of unequal length # convert all values to lists pd_series_lst = [] - for i,j in result_dict.items(): + for i, j in result_dict.items(): if type(j) != list: - result_dict[i] = [j] + result_dict[i] = [j] else: result_dict[i] = j pd_series_lst.append(pd.Series(j)) @@ -64,7 +67,7 @@ def medpc_txt2df(text_file_path): df.columns = result_dict.keys() df.to_csv("medpc_converted_file.csv") - return( + return ( # df.head(5).to_html(), "medpc_converted_file.csv" - ) \ No newline at end of file + ) From 9190fa49c3f16c4a5dd9b0a8e1262f3730935aa0 Mon Sep 17 00:00:00 2001 From: Chris <53177842+ChristopherMarais@users.noreply.github.com> Date: Fri, 25 Aug 2023 23:03:57 -0400 Subject: [PATCH 05/15] add function to remove zeros --- pc_mouseparty/medpc/medpc_extraction.py | 41 +++++++++++++++++++------ 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/pc_mouseparty/medpc/medpc_extraction.py b/pc_mouseparty/medpc/medpc_extraction.py index 88f4414..83c11c2 100644 --- a/pc_mouseparty/medpc/medpc_extraction.py +++ b/pc_mouseparty/medpc/medpc_extraction.py @@ -4,14 +4,22 @@ def medpc_txt2df(text_file_path): """ - docstring + This function reads a medpc text data file into a pandas dataframe. + + Args (2 total, 1 required): + par_1: 1D numpy array, Values observed in the field (counts). + par_2: int, default = 0, Additional value to add. + + Return (1): + output_1 : str, The total sum as a string with a chosen suffix + added on. """ # Open the medpc text file # with open(text_file_path, "r") as file: # use this for package with open(text_file_path.name) as file: # use this for gradio app medpc_txt_file = file.read() - # split the file with each new line an element in a list + # split the file with each new line an element in a list medpc_txt_file_lst = medpc_txt_file.split('\n') # remove all empty elements in the list @@ -32,11 +40,11 @@ def medpc_txt2df(text_file_path): temp = [] result.append(item) if temp: - floats = [float(x) for x in re.findall(r'\d+\.\d+', + floats = [float(x) for x in re.findall(r'\d+\.\d+', ''.join(temp))] result.append(floats) - # convert the list of lists and strings to + # convert the list of lists and strings to # a dictionary with everything before ":" # as a key and everything after as the value result_dict = {} @@ -65,9 +73,24 @@ def medpc_txt2df(text_file_path): # add list to dataframe df = pd.concat(pd_series_lst, axis=1) df.columns = result_dict.keys() - df.to_csv("medpc_converted_file.csv") - return ( - # df.head(5).to_html(), - "medpc_converted_file.csv" - ) + return (df) + + +def cut_zeros(df): + """ + This function removes all trailing zeros of the medpc dataframe. + + Args (2 total, 1 required): + par_1: 1D numpy array, Values observed in the field (counts). + par_2: int, default = 0, Additional value to add. + + Return (1): + output_1 : str, The total sum as a string with a chosen suffix + added on. + """ + # find index of last row that does not only ahve 0 and Nan + last_idx = df[df.sum(axis=1).ne(0)].index[-1] + df = df[:last_idx+1] + + return (df) From 9120359fc50bd5253ca301cf69f7c506c8db17c3 Mon Sep 17 00:00:00 2001 From: Chris <53177842+ChristopherMarais@users.noreply.github.com> Date: Fri, 25 Aug 2023 23:32:17 -0400 Subject: [PATCH 06/15] added functions for medpc data txt parsing --- pc_mouseparty/medpc/medpc_extraction.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pc_mouseparty/medpc/medpc_extraction.py b/pc_mouseparty/medpc/medpc_extraction.py index 83c11c2..3b3ea79 100644 --- a/pc_mouseparty/medpc/medpc_extraction.py +++ b/pc_mouseparty/medpc/medpc_extraction.py @@ -6,13 +6,11 @@ def medpc_txt2df(text_file_path): """ This function reads a medpc text data file into a pandas dataframe. - Args (2 total, 1 required): - par_1: 1D numpy array, Values observed in the field (counts). - par_2: int, default = 0, Additional value to add. + Args (1 total, 1 required): + text_file_path : str, a path to a medpc text file as a string. Return (1): - output_1 : str, The total sum as a string with a chosen suffix - added on. + df : pandas dataframe, a dataframe with the medpc data. """ # Open the medpc text file # with open(text_file_path, "r") as file: # use this for package @@ -82,12 +80,11 @@ def cut_zeros(df): This function removes all trailing zeros of the medpc dataframe. Args (2 total, 1 required): - par_1: 1D numpy array, Values observed in the field (counts). - par_2: int, default = 0, Additional value to add. + df: pandas dataframe, a dataframe with the medpc data. Return (1): - output_1 : str, The total sum as a string with a chosen suffix - added on. + df : pandas dataframe, a dataframe with the medpc data + with trailing zeros removed. """ # find index of last row that does not only ahve 0 and Nan last_idx = df[df.sum(axis=1).ne(0)].index[-1] From 301f1b06987b793489de4df7b60b1d10d91fa70b Mon Sep 17 00:00:00 2001 From: Chaitra Peddireddy Date: Sat, 30 Sep 2023 20:34:26 -0400 Subject: [PATCH 07/15] Elo Score Functions --- .gitignore | 3 +- pc_mouseparty/rank/elo_score.py | 369 ++++++++++++++++++++ pc_mouseparty/rank/elorating/calculation.py | 198 +++++++++++ requirements.txt | 4 + 4 files changed, 573 insertions(+), 1 deletion(-) create mode 100644 pc_mouseparty/rank/elo_score.py create mode 100644 pc_mouseparty/rank/elorating/calculation.py diff --git a/.gitignore b/.gitignore index 631004a..f8e6b96 100644 --- a/.gitignore +++ b/.gitignore @@ -103,4 +103,5 @@ ENV/ .mypy_cache/ # IDE settings -.vscode/ \ No newline at end of file +.vscode/ +.idea/ diff --git a/pc_mouseparty/rank/elo_score.py b/pc_mouseparty/rank/elo_score.py new file mode 100644 index 0000000..5cb846d --- /dev/null +++ b/pc_mouseparty/rank/elo_score.py @@ -0,0 +1,369 @@ +import os +import pandas as pd +import matplotlib.pyplot as plt +import re +from collections import defaultdict +from elorating import calculation + +# Suppress all warnings +import warnings + +warnings.filterwarnings("ignore") + + +def _reward_competition(df, output_dir, plot_flag=True): + """ + This private function takes in a dataframe and processes the elo score for reward + competition protocol + Unedited used the reward_competition jupyter notebook + Args (3 total, 2 required): + df (pandas dataframe): dataframe to be processed + output_dir (str): path to output directory + plot_flag (bool): flag to plot data, default True + + Return(None): + None + """ + + for col in df.columns.tolist(): + formatted_col_name = "_".join(str(col).lower().strip().split(" ")) + df.rename(columns={col: formatted_col_name}, inplace=True) + + # removing columns from given list of strings + to_remove = ["wins", "ties", "time"] + cols_to_keep = [col for col in df.columns if all(word not in col for word + in to_remove)] + df = df[cols_to_keep] + df["animal_ids"] = df["match"].apply( + lambda x: tuple(sorted([all_ids.strip() for all_ids in re.findall(r"[-+]?(?:\d*\.\d+|\d+)", x)]))) + df["cohort"] = "TODO" + cage_to_strain = {} + df["strain"] = df["cage"].astype(str).map(cage_to_strain) + all_cages = "_".join([str(cage) for cage in sorted(df["cage"].unique())]) + df["index"] = df.index + reward_competition_df = df.reset_index(drop=True) + + melted_reward_competition_df = reward_competition_df.melt( + id_vars=["index", "date", "cage", "box", "match", "animal_ids"], + var_name="trial", + value_name="winner") + + melted_reward_competition_df = melted_reward_competition_df.dropna( + subset="winner") + melted_reward_competition_df["keep_row"] = \ + melted_reward_competition_df["winner"].apply( + lambda x: True if "tie" in str(x).lower() or + re.match(r'^-?\d+(?:\.\d+)$', str(x)) else False + ) + + melted_reward_competition_df = \ + melted_reward_competition_df[melted_reward_competition_df["keep_row"]] + + melted_reward_competition_df["winner"] = \ + melted_reward_competition_df["winner"].astype(str).apply( + lambda x: x.lower().strip() + ) + + melted_reward_competition_df["match_is_tie"] = \ + melted_reward_competition_df["winner"].apply( + lambda x: True if "tie" in x.lower().strip() else False + ) + + melted_reward_competition_df["winner"] = \ + melted_reward_competition_df.apply( + lambda x: x["animal_ids"][0] if x["match_is_tie"] else x["winner"], + axis=1 + ) + + melted_reward_competition_df[melted_reward_competition_df["match_is_tie"]] + + melted_reward_competition_df = melted_reward_competition_df[ + melted_reward_competition_df["trial"].str.contains('trial')] + + melted_reward_competition_df["trial_number"] = \ + melted_reward_competition_df["trial"].apply( + lambda x: int(x.lower().strip("trial").strip("winner").strip("_")) + ) + + melted_reward_competition_df = \ + melted_reward_competition_df.sort_values( + ["index", "trial_number"]).reset_index(drop=True) + + melted_reward_competition_df["loser"] = melted_reward_competition_df.apply( + lambda x: (list(set(x["animal_ids"]) - set([x["winner"]]))[0]), axis=1) + + melted_reward_competition_df["session_number_difference"] = \ + melted_reward_competition_df["date"].astype( + 'category').cat.codes.diff() + + cage_to_elo_rating_dict = defaultdict(dict) + + for cage in melted_reward_competition_df["cage"].unique(): + cage_df = \ + melted_reward_competition_df[melted_reward_competition_df["cage"] == cage] + cage_to_elo_rating_dict[cage] = \ + calculation.iterate_elo_rating_calculation_for_dataframe( + dataframe=cage_df, + winner_id_column="winner", + loser_id_column="loser", + additional_columns=melted_reward_competition_df.columns, + tie_column="match_is_tie" + ) + + cage_to_elo_rating_dict[list(cage_to_elo_rating_dict.keys())[0]][0] + + all_cage_elo_rating_list = [] + + for key in cage_to_elo_rating_dict.keys(): + cage_elo_rating_df = pd.DataFrame.from_dict(cage_to_elo_rating_dict[key], orient="index") + cage_elo_rating_df.insert( + 0, 'total_trial_number', range(0, 0 + len(cage_elo_rating_df)) + ) + + all_cage_elo_rating_list.append(cage_elo_rating_df) + + all_cage_elo_rating_df = pd.concat(all_cage_elo_rating_list) + + all_cage_elo_rating_df[all_cage_elo_rating_df["match_is_tie"]] + + if cage_to_strain: + all_cage_elo_rating_df["strain"] = \ + all_cage_elo_rating_df["cage"].astype(str).map(cage_to_strain) + + all_cage_elo_rating_df["experiment_type"] = "Reward Competition" + all_cage_elo_rating_df["cohort"] = "TODO" + all_cage_elo_rating_df[all_cage_elo_rating_df["win_draw_loss"] == 0.5] + + id_to_final_elo_rating_dict = defaultdict(dict) + sorted_func = enumerate(sorted(all_cage_elo_rating_df["subject_id"].unique())) + for index, subject_id in sorted_func: + per_subject_df = \ + all_cage_elo_rating_df[ + all_cage_elo_rating_df["subject_id"] == subject_id + ] + id_to_final_elo_rating_dict[index]["subject_id"] = subject_id + + id_to_final_elo_rating_dict[index]["final_elo_rating"] = \ + per_subject_df.iloc[-1]["updated_elo_rating"] + id_to_final_elo_rating_dict[index]["cohort"] = \ + per_subject_df.iloc[-1]["cohort"] + id_to_final_elo_rating_dict[index]["cage"] = \ + per_subject_df.iloc[-1]["cage"] + + id_to_final_elo_rating_df = pd.DataFrame.from_dict( + id_to_final_elo_rating_dict, orient="index" + ) + # Adding protocol name + id_to_final_elo_rating_df["experiment_type"] = "Reward Competition" + # Adding rank + id_to_final_elo_rating_df["rank"] = \ + id_to_final_elo_rating_df.groupby("cage")["final_elo_rating"].rank( + "dense", ascending=False + ) + # Sorting by cage and then id + id_to_final_elo_rating_df = id_to_final_elo_rating_df.sort_values( + by=['cage', "subject_id"], ascending=True).reset_index(drop=True) + id_to_final_elo_rating_df["rank"] = \ + id_to_final_elo_rating_df.groupby("cage")["final_elo_rating"].rank( + "dense", ascending=False + ) + id_to_final_elo_rating_df = \ + id_to_final_elo_rating_df.sort_values( + by=['cage', "subject_id"], ascending=True).reset_index(drop=True) + + if plot_flag: + for cage in all_cage_elo_rating_df["cage"].unique(): + fig, ax = plt.subplots() + plt.rcParams["figure.figsize"] = (18, 10) + per_cage_df = \ + all_cage_elo_rating_df[all_cage_elo_rating_df["cage"] == cage] + + for index in per_cage_df["index"].unique(): + first_session_in_trial = \ + per_cage_df[per_cage_df["index"] == index].iloc[0]["total_trial_number"] + plt.vlines(x=[first_session_in_trial - 0.5], + ymin=700, + ymax=1300, + colors='black', + linestyle='dashed' + ) + + # Drawing a line for each subject + for subject in sorted(per_cage_df["subject_id"].unique()): + # Getting all the rows with the current subject + subject_df = per_cage_df[per_cage_df["subject_id"] == subject] + # Making the dates into days after the first session by + # subtracting all the dates by the first date + plt.plot(subject_df["total_trial_number"], + subject_df["updated_elo_rating"], + '-o', + label=subject + ) + + # Labeling the X/Y Axis and the title + ax.set_xlabel("Trial Number") + ax.set_ylabel("Elo Score") + ax.set_title( + "{} Elo Rating for {} {}".format("Rewards Competition", "TODO", str(cage))) + # To show the legend + ax.legend(loc="upper left") + plt.xticks(rotation=90) + plt.ylim(700, 1300) + + # Checking if out dir exists + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + plt.savefig( + os.path.join(output_dir, + "reward_competition_cage" + str(cage) + ".png")) + + path = os.path.join( + output_dir, "reward_competition_cage" + all_cages + ".csv") + + id_to_final_elo_rating_df.to_csv(path, index=False) + + return None + +def general_processing(file_info, output_dir, plot_flag=True): + """ + This function takes in a dataframe and processes elo score for home_cage_observation, urine_marking, + or test_tube protocols + Args (3 total, 3 required): + file_info (dict): dictionary with file names as key and value as a dictionary of + file information with the following properties: + file_path (str): path to file + protocol (str): protocol name + sheet (list): list of sheet names + cohort (str): cohort name + output_dir (str): path to output directory + plot_flag (bool): flag to plot data, default True + + Return(None): + None + """ + def process(df, protocol, cohort, output_dir, plot_flag): + # Initializing column names + + find_col_names = df[df.apply(lambda row: 'winner' in row.values, axis=1)] + + if not find_col_names.empty: + df.columns = find_col_names.iloc[0] + df = df[df.index != find_col_names.index[0]] + + # check if there is a cage number col + mode_cage_val = None + cage_num = False + # finding column names for winner, loser, and tie + winner_col, tie_col, loser_col = None, None, None + for col in df.columns.tolist(): + if "cage" in col.lower(): + # filling all cage values with mode + mode_cage_val = df['cage #'].mode().iloc[0] + df['cage#'] = mode_cage_val + cage_num = True + if "winner" in col.lower(): + winner_col = col + if "loser" in col.lower(): + loser_col = col + if "tie" in col.lower(): + tie_col = col + + if not winner_col or not loser_col: + print("Winner or Loser column not found") + return None + + if not cage_num: + try: + new_sheet_name = sheet.lower().replace("cage", "") + mode_cage_val = int(new_sheet_name) + df['cage#'] = mode_cage_val + except: + print("Cage# cannot be determined") + return None + + # drop cols if winner & loss is NaN + df = df.dropna(subset=['winner', 'loser'], how='all') + + # Autofill dates + df['date'] = pd.to_datetime(df['date'], errors='coerce') + df['date'].fillna(method='ffill', inplace=True) + + # Identify sessions based on date values + df['session_number_difference'] = 0 + previous_date = None + for index, row in df.iterrows(): + current_date = row['date'] + # check for session change + if not previous_date: + df.at[index, 'session_number_difference'] = 1 + elif previous_date is not None and current_date != previous_date: + df.at[index, 'session_number_difference'] = 1 + previous_date = current_date + # Elo Score from calculation.py + if tie_col: + df[tie_col] = df[tie_col].notna() + + elo_calc = calculation.iterate_elo_rating_calculation_for_dataframe(dataframe=df, winner_id_column=winner_col, + loser_id_column=loser_col, + tie_column=tie_col) + elo_df = pd.DataFrame.from_dict(elo_calc, orient='index') + elo_df.groupby("subject_id").count() + + cage_to_strain = {} + if cage_to_strain: + elo_df["subject_strain"] = elo_df["cage_num_of_subject"].map(cage_to_strain) + elo_df["agent_strain"] = elo_df["cage_num_of_agent"].map(cage_to_strain) + elo_df["experiment_type"] = protocol + elo_df["cohort"] = cohort + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + if plot_flag: + max_elo_rating = elo_df["updated_elo_rating"].max() + min_elo_rating = elo_df["updated_elo_rating"].min() + + plt.rcParams["figure.figsize"] = (13.5, 7.5) + fig, ax = plt.subplots() + + # adjusting session number difference + elo_df['session_number_difference'] = \ + df['session_number_difference'].repeat(2).reset_index(drop=True) + + for index, row in elo_df[elo_df['session_number_difference'].astype(bool)].iterrows(): + # Offsetting by 0.5 to avoid drawing the line on the dot + # Drawing the lines a little above the max and a little below the minimum + plt.vlines(x=[row["total_match_number"] - 0.5], ymin=min_elo_rating - 50, ymax=max_elo_rating + 50, + colors='black', linestyle='dashed') + for subject in sorted(elo_df["subject_id"].unique()): + # Getting all the rows with the current subject + subject_dataframe = elo_df[elo_df["subject_id"] == subject] + # Making the current match number the X-Axis + plt.plot(subject_dataframe["total_match_number"], subject_dataframe["updated_elo_rating"], '-o', + label=subject) + # plt.show() + ax.set_xlabel("Trial Number") + ax.set_ylabel("Elo rating") + + ax.set_title( + "{} Elo Rating for {} {}".format(protocol, cohort, "Cage #" + str(mode_cage_val))) + ax.legend(loc="upper left") + plt.ylim(min_elo_rating - 50, max_elo_rating + 50) + fig.savefig(os.path.join(output_dir, protocol + "_cage" + str(mode_cage_val) + ".png")) + + # Saving df csv to output dir + elo_df.to_csv(os.path.join(output_dir, protocol + "_cage" + str(mode_cage_val) + ".csv"), index=False) + + for file_name, file_data in file_info.items(): + file_path = file_data["file_path"] + protocol = file_data["protocol"] + sheets = file_data["sheet"] + cohort = file_data["cohort"] + xls = pd.ExcelFile(file_path) + for sheet in sheets: + data = pd.read_excel(xls, sheet_name=sheet) + if protocol == "reward_competition": + _reward_competition(df=data, output_dir=output_dir, plot_flag=plot_flag) + else: + process(df=data, protocol=protocol, cohort=cohort, output_dir=output_dir, plot_flag=plot_flag) diff --git a/pc_mouseparty/rank/elorating/calculation.py b/pc_mouseparty/rank/elorating/calculation.py new file mode 100644 index 0000000..35f175c --- /dev/null +++ b/pc_mouseparty/rank/elorating/calculation.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +"""Elo Rating Calculator + +Based on: https://www.omnicalculator.com/sports/elo +""" +import operator +from collections import defaultdict +import pandas as pd + + +def calculate_elo_rating(subject_elo_rating, agent_elo_rating, k_factor=20, score=1, number_of_decimals=1): + """ + Calculates the Elo rating of a given subject given it's original score, it's opponent, + the K-Factor, and whether or not it has won or not. + The calculation is based on: https://www.omnicalculator.com/sports/elo + + Args: + subject_elo_rating(float): The original Elo rating for the subject + agent_elo_rating(float): The original Elo rating for the agent + k_factor(int): k-factor, or development coefficient. + - It usually takes values between 10 and 40, depending on player's strength + score(int): the actual outcome of the game. + - In chess, a win counts as 1 point, a draw is equal to 0.5, and a lose gives 0. + number_of_decimals(int): Number of decimals to round to + + Returns: + int: Updated Elo rating of the subject + """ + # Calculating the Elo rating + rating_difference = agent_elo_rating - subject_elo_rating + expected_score = 1 / (1 + 10 ** (rating_difference / 400)) + new_elo_rating = subject_elo_rating + k_factor * (score - expected_score) + # Rounding to `number_of_decimals` + return round(new_elo_rating, number_of_decimals) + + +def update_elo_rating(winner_id, loser_id, id_to_elo_rating=None, default_elo_rating=1000, \ + winner_score=1, loser_score=0, **calculate_elo_rating_params): + """ + Updates the Elo rating in a dictionary that contains the ID of the subject as keys, + and the Elo rating as the values. You can also adjust how the Elo rating is calculated with 'calculate_elo_rating_params'. + + Args: + winner_id(str): ID of the winner + loser_id(str): ID of the loser + id_to_elo_rating(dict): Dict that has the ID of the subjects as keys to the Elo Score as values + default_elo_rating(int): The default Elo rating to be used if there is not elo score for the specified ID + **calculate_elo_rating_params(kwargs): Other params for the calculate_elo_rating to change how the Elo rating is calculated + + Returns: + Dict: Dict that has the ID of the subjects as keys to the Elo Score as values + """ + if id_to_elo_rating is None: + id_to_elo_rating = defaultdict(lambda: default_elo_rating) + + # Getting the current Elo Score + current_winner_rating = id_to_elo_rating[winner_id] + current_loser_rating = id_to_elo_rating[loser_id] + + # Calculating Elo rating + id_to_elo_rating[winner_id] = calculate_elo_rating(subject_elo_rating=current_winner_rating, \ + agent_elo_rating=current_loser_rating, score=winner_score, + **calculate_elo_rating_params) + id_to_elo_rating[loser_id] = calculate_elo_rating(subject_elo_rating=current_loser_rating, \ + agent_elo_rating=current_winner_rating, score=loser_score, + **calculate_elo_rating_params) + + return id_to_elo_rating + + +def get_ranking_from_elo_rating_dictionary(input_dict, subject_id): + """ + Orders a dictionary of subject ID keys to ELO score values by ELO score. + And then gets the rank of the subject with the inputted ID. + Lower ranks like 1 would represent those subjects with higher ELO scores and vice versa. + + Args: + input_dict(dict): + Dictionary of subject ID keys to ELO score values + subject_id(str, int, or any value that's a key in input dict): + The ID of the subject that you want the ranking of + + Returns: + int: + Ranking of the subject with the ID inputted + """ + # Sorting the subject ID's by ELO score + sorted_subject_to_elo_rating = sorted(input_dict.items(), key=operator.itemgetter(1), reverse=True) + # Getting the rank of the subject based on ELO score + return [subject_tuple[0] for subject_tuple in sorted_subject_to_elo_rating].index(subject_id) + 1 + + +def iterate_elo_rating_calculation_for_dataframe(dataframe, winner_id_column, loser_id_column, tie_column=None, + additional_columns=None): + """ + Iterates through a dataframe that has the ID of winners and losers for a given event. + A dictionary will be created that contains the information of the event, + which can then be turned into a dataframe. Each key is either from winner or loser's perspective. + + Args: + dataframe(Pandas DataFrame): + winner_id_column(str): The name of the column that has the winner's ID + loser_id_column(str): The name of the column that has the loser's ID + additional_columns(list): Additional columns to take from the + + Returns: + Dict: With a key value pair for each event either from the winner or loser's perspective. + This can be turned into a dataframe with each key value pair being a row. + """ + if additional_columns is None: + additional_columns = [] + + # Dictionary that keeps track of the current Elo rating of the subject + id_to_elo_rating = defaultdict(lambda: 1000) + # Dictionary that will be converted to a DataFrame + index_to_elo_rating_and_meta_data = defaultdict(dict) + + # Indexes that will identify which row the dictionary key value pair will be + # The number of the index has no significance other than being the number of the row + all_indexes = iter(range(0, 99999)) + + # Keeping track of the number of matches + total_match_number = 1 + + # Making a copy in case there is an error with changing the type of the tie column + copied_dataframe = dataframe.copy() + # Changing the tie column type to bool + # So that we can filter out for booleans including False and 0 + try: + copied_dataframe[tie_column] = copied_dataframe[tie_column].astype(bool) + except: + copied_dataframe = dataframe.copy() + + for index, row in copied_dataframe.dropna(subset=winner_id_column).iterrows(): + # Getting the ID of the winner subject + winner_id = row[winner_id_column] + # Getting the ID of the loser subject + loser_id = row[loser_id_column] + + # Getting the current Elo Score + current_winner_rating = id_to_elo_rating[winner_id] + current_loser_rating = id_to_elo_rating[loser_id] + + if tie_column: + # When there is nothing in the tie column + # Or when there is a false value indicating that it is not a tie + if pd.isna(copied_dataframe[tie_column][index]) or ~(copied_dataframe[tie_column][index]).any(): + winner_score = 1 + loser_score = 0 + # When there is value in the tie column + else: + winner_score = 0.5 + loser_score = 0.5 + # When there is no tie column + else: + winner_score = 1 + loser_score = 0 + + # Updating the dictionary with ID keys and Elo Score values + update_elo_rating(winner_id=winner_id, loser_id=loser_id, id_to_elo_rating=id_to_elo_rating, \ + winner_score=winner_score, loser_score=loser_score) + + # Saving all the data for the winner + winner_index = next(all_indexes) + index_to_elo_rating_and_meta_data[winner_index]["total_match_number"] = total_match_number + index_to_elo_rating_and_meta_data[winner_index]["subject_id"] = winner_id + index_to_elo_rating_and_meta_data[winner_index]["agent_id"] = loser_id + index_to_elo_rating_and_meta_data[winner_index]["original_elo_rating"] = current_winner_rating + index_to_elo_rating_and_meta_data[winner_index]["updated_elo_rating"] = id_to_elo_rating[winner_id] + index_to_elo_rating_and_meta_data[winner_index]["win_draw_loss"] = winner_score + index_to_elo_rating_and_meta_data[winner_index]["subject_ranking"] = get_ranking_from_elo_rating_dictionary( + id_to_elo_rating, winner_id) + index_to_elo_rating_and_meta_data[winner_index]["agent_ranking"] = get_ranking_from_elo_rating_dictionary( + id_to_elo_rating, loser_id) + index_to_elo_rating_and_meta_data[winner_index]["pairing_index"] = 0 + for column in additional_columns: + index_to_elo_rating_and_meta_data[winner_index][column] = row[column] + + # Saving all the data for the loser + loser_index = next(all_indexes) + index_to_elo_rating_and_meta_data[loser_index]["total_match_number"] = total_match_number + index_to_elo_rating_and_meta_data[loser_index]["subject_id"] = loser_id + index_to_elo_rating_and_meta_data[loser_index]["agent_id"] = winner_id + index_to_elo_rating_and_meta_data[loser_index]["original_elo_rating"] = current_loser_rating + index_to_elo_rating_and_meta_data[loser_index]["updated_elo_rating"] = id_to_elo_rating[loser_id] + index_to_elo_rating_and_meta_data[loser_index]["win_draw_loss"] = loser_score + index_to_elo_rating_and_meta_data[loser_index]["subject_ranking"] = get_ranking_from_elo_rating_dictionary( + id_to_elo_rating, loser_id) + index_to_elo_rating_and_meta_data[loser_index]["agent_ranking"] = get_ranking_from_elo_rating_dictionary( + id_to_elo_rating, winner_id) + index_to_elo_rating_and_meta_data[loser_index]["pairing_index"] = 1 + for column in additional_columns: + index_to_elo_rating_and_meta_data[loser_index][column] = row[column] + + # Updating the match number + total_match_number += 1 + + return index_to_elo_rating_and_meta_data diff --git a/requirements.txt b/requirements.txt index e69de29..16e07a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,4 @@ +setuptools~=60.2.0 +pandas~=2.1.1 +matplotlib~=3.8.0 +numpy~=1.26.0 From 53b9815c81b160f2def7c45d0d30b2565062bee2 Mon Sep 17 00:00:00 2001 From: Chaitra Peddireddy Date: Sun, 1 Oct 2023 16:09:28 -0400 Subject: [PATCH 08/15] Elo Score Functions --- pc_mouseparty/rank/__init__.py | 2 + pc_mouseparty/rank/elo_score.py | 297 ++++++++++++++++++-------------- 2 files changed, 168 insertions(+), 131 deletions(-) diff --git a/pc_mouseparty/rank/__init__.py b/pc_mouseparty/rank/__init__.py index e69de29..bd20cc5 100644 --- a/pc_mouseparty/rank/__init__.py +++ b/pc_mouseparty/rank/__init__.py @@ -0,0 +1,2 @@ +from .elo_score import generate_elo_scores +__all__ = ['generate_elo_scores'] diff --git a/pc_mouseparty/rank/elo_score.py b/pc_mouseparty/rank/elo_score.py index 5cb846d..ff778ec 100644 --- a/pc_mouseparty/rank/elo_score.py +++ b/pc_mouseparty/rank/elo_score.py @@ -5,19 +5,20 @@ from collections import defaultdict from elorating import calculation -# Suppress all warnings import warnings +# Suppress all warnings warnings.filterwarnings("ignore") -def _reward_competition(df, output_dir, plot_flag=True): +def __reward_competition(df, cohort, output_dir, plot_flag=True): """ - This private function takes in a dataframe and processes the elo score for reward - competition protocol + This private function takes in a dataframe and processes the elo score + for reward competition protocol Unedited used the reward_competition jupyter notebook - Args (3 total, 2 required): + Args (4 total, 3 required): df (pandas dataframe): dataframe to be processed + cohort (str): cohort name output_dir (str): path to output directory plot_flag (bool): flag to plot data, default True @@ -31,82 +32,80 @@ def _reward_competition(df, output_dir, plot_flag=True): # removing columns from given list of strings to_remove = ["wins", "ties", "time"] - cols_to_keep = [col for col in df.columns if all(word not in col for word - in to_remove)] + cols_to_keep = \ + [col for col in df.columns if all(word not in col + for word in to_remove)] df = df[cols_to_keep] df["animal_ids"] = df["match"].apply( - lambda x: tuple(sorted([all_ids.strip() for all_ids in re.findall(r"[-+]?(?:\d*\.\d+|\d+)", x)]))) + lambda x: tuple(sorted([all_ids.strip() + for all_ids in + re.findall(r"[-+]?(?:\d*\.\d+|\d+)", x)]))) df["cohort"] = "TODO" cage_to_strain = {} df["strain"] = df["cage"].astype(str).map(cage_to_strain) - all_cages = "_".join([str(cage) for cage in sorted(df["cage"].unique())]) + all_cages = "_".join([str(cage) + for cage in sorted(df["cage"].unique())]) df["index"] = df.index reward_competition_df = df.reset_index(drop=True) - melted_reward_competition_df = reward_competition_df.melt( + melted_rc_df = reward_competition_df.melt( id_vars=["index", "date", "cage", "box", "match", "animal_ids"], var_name="trial", value_name="winner") - melted_reward_competition_df = melted_reward_competition_df.dropna( - subset="winner") - melted_reward_competition_df["keep_row"] = \ - melted_reward_competition_df["winner"].apply( - lambda x: True if "tie" in str(x).lower() or - re.match(r'^-?\d+(?:\.\d+)$', str(x)) else False - ) + melted_rc_df = melted_rc_df.dropna(subset="winner") + melted_rc_df["keep_row"] = melted_rc_df["winner"].apply( + lambda x: True if "tie" in str(x).lower() or + re.match(r'^-?\d+(?:\.\d+)$', str(x)) else False + ) - melted_reward_competition_df = \ - melted_reward_competition_df[melted_reward_competition_df["keep_row"]] + melted_rc_df = melted_rc_df[melted_rc_df["keep_row"]] - melted_reward_competition_df["winner"] = \ - melted_reward_competition_df["winner"].astype(str).apply( - lambda x: x.lower().strip() - ) + melted_rc_df["winner"] = melted_rc_df["winner"].astype(str).apply( + lambda x: x.lower().strip() + ) - melted_reward_competition_df["match_is_tie"] = \ - melted_reward_competition_df["winner"].apply( - lambda x: True if "tie" in x.lower().strip() else False - ) + melted_rc_df["match_is_tie"] = melted_rc_df["winner"].apply( + lambda x: True if "tie" in x.lower().strip() else False + ) - melted_reward_competition_df["winner"] = \ - melted_reward_competition_df.apply( - lambda x: x["animal_ids"][0] if x["match_is_tie"] else x["winner"], - axis=1 + melted_rc_df["winner"] = \ + melted_rc_df.apply( + lambda x: x["animal_ids"][0] if x["match_is_tie"] + else x["winner"], axis=1 ) - melted_reward_competition_df[melted_reward_competition_df["match_is_tie"]] + melted_rc_df[melted_rc_df["match_is_tie"]] - melted_reward_competition_df = melted_reward_competition_df[ - melted_reward_competition_df["trial"].str.contains('trial')] + melted_rc_df = \ + melted_rc_df[melted_rc_df["trial"].str.contains('trial')] - melted_reward_competition_df["trial_number"] = \ - melted_reward_competition_df["trial"].apply( - lambda x: int(x.lower().strip("trial").strip("winner").strip("_")) - ) + melted_rc_df["trial_number"] = melted_rc_df["trial"].apply( + lambda x: + int(x.lower().strip("trial").strip("winner").strip("_")) + ) - melted_reward_competition_df = \ - melted_reward_competition_df.sort_values( - ["index", "trial_number"]).reset_index(drop=True) + melted_rc_df = melted_rc_df.sort_values( + ["index", "trial_number"]).reset_index(drop=True) - melted_reward_competition_df["loser"] = melted_reward_competition_df.apply( - lambda x: (list(set(x["animal_ids"]) - set([x["winner"]]))[0]), axis=1) + melted_rc_df["loser"] = melted_rc_df.apply( + lambda x: + (list(set(x["animal_ids"]) - set([x["winner"]]))[0]), axis=1 + ) - melted_reward_competition_df["session_number_difference"] = \ - melted_reward_competition_df["date"].astype( - 'category').cat.codes.diff() + melted_rc_df["session_number_difference"] = \ + melted_rc_df["date"].astype('category').cat.codes.diff() cage_to_elo_rating_dict = defaultdict(dict) - for cage in melted_reward_competition_df["cage"].unique(): - cage_df = \ - melted_reward_competition_df[melted_reward_competition_df["cage"] == cage] + for cage in melted_rc_df["cage"].unique(): + cage_df = melted_rc_df[melted_rc_df["cage"] == cage] cage_to_elo_rating_dict[cage] = \ calculation.iterate_elo_rating_calculation_for_dataframe( dataframe=cage_df, winner_id_column="winner", loser_id_column="loser", - additional_columns=melted_reward_competition_df.columns, + additional_columns=melted_rc_df.columns, tie_column="match_is_tie" ) @@ -115,72 +114,69 @@ def _reward_competition(df, output_dir, plot_flag=True): all_cage_elo_rating_list = [] for key in cage_to_elo_rating_dict.keys(): - cage_elo_rating_df = pd.DataFrame.from_dict(cage_to_elo_rating_dict[key], orient="index") + cage_elo_rating_df = \ + pd.DataFrame.from_dict( + cage_to_elo_rating_dict[key], orient="index") cage_elo_rating_df.insert( 0, 'total_trial_number', range(0, 0 + len(cage_elo_rating_df)) ) all_cage_elo_rating_list.append(cage_elo_rating_df) - all_cage_elo_rating_df = pd.concat(all_cage_elo_rating_list) + all_elo_df = pd.concat(all_cage_elo_rating_list) - all_cage_elo_rating_df[all_cage_elo_rating_df["match_is_tie"]] + all_elo_df[all_elo_df["match_is_tie"]] if cage_to_strain: - all_cage_elo_rating_df["strain"] = \ - all_cage_elo_rating_df["cage"].astype(str).map(cage_to_strain) + all_elo_df["strain"] = \ + all_elo_df["cage"].astype(str).map(cage_to_strain) - all_cage_elo_rating_df["experiment_type"] = "Reward Competition" - all_cage_elo_rating_df["cohort"] = "TODO" - all_cage_elo_rating_df[all_cage_elo_rating_df["win_draw_loss"] == 0.5] + all_elo_df["experiment_type"] = "Reward Competition" + all_elo_df["cohort"] = "TODO" + all_elo_df[all_elo_df["win_draw_loss"] == 0.5] - id_to_final_elo_rating_dict = defaultdict(dict) - sorted_func = enumerate(sorted(all_cage_elo_rating_df["subject_id"].unique())) + id_to_elo_dict = defaultdict(dict) + sorted_func = enumerate(sorted(all_elo_df["subject_id"].unique())) for index, subject_id in sorted_func: - per_subject_df = \ - all_cage_elo_rating_df[ - all_cage_elo_rating_df["subject_id"] == subject_id - ] - id_to_final_elo_rating_dict[index]["subject_id"] = subject_id + per_subject_df = all_elo_df[all_elo_df["subject_id"] == subject_id] + id_to_elo_dict[index]["subject_id"] = subject_id - id_to_final_elo_rating_dict[index]["final_elo_rating"] = \ + id_to_elo_dict[index]["final_elo_rating"] = \ per_subject_df.iloc[-1]["updated_elo_rating"] - id_to_final_elo_rating_dict[index]["cohort"] = \ - per_subject_df.iloc[-1]["cohort"] - id_to_final_elo_rating_dict[index]["cage"] = \ - per_subject_df.iloc[-1]["cage"] + id_to_elo_dict[index]["cohort"] = per_subject_df.iloc[-1]["cohort"] + id_to_elo_dict[index]["cage"] = per_subject_df.iloc[-1]["cage"] - id_to_final_elo_rating_df = pd.DataFrame.from_dict( - id_to_final_elo_rating_dict, orient="index" + id_to_elo_df = pd.DataFrame.from_dict( + id_to_elo_dict, orient="index" ) # Adding protocol name - id_to_final_elo_rating_df["experiment_type"] = "Reward Competition" + id_to_elo_df["experiment_type"] = "Reward Competition" # Adding rank - id_to_final_elo_rating_df["rank"] = \ - id_to_final_elo_rating_df.groupby("cage")["final_elo_rating"].rank( + id_to_elo_df["rank"] = \ + id_to_elo_df.groupby("cage")["final_elo_rating"].rank( "dense", ascending=False ) # Sorting by cage and then id - id_to_final_elo_rating_df = id_to_final_elo_rating_df.sort_values( + id_to_elo_df = id_to_elo_df.sort_values( by=['cage', "subject_id"], ascending=True).reset_index(drop=True) - id_to_final_elo_rating_df["rank"] = \ - id_to_final_elo_rating_df.groupby("cage")["final_elo_rating"].rank( + id_to_elo_df["rank"] = \ + id_to_elo_df.groupby("cage")["final_elo_rating"].rank( "dense", ascending=False ) - id_to_final_elo_rating_df = \ - id_to_final_elo_rating_df.sort_values( - by=['cage', "subject_id"], ascending=True).reset_index(drop=True) + id_to_elo_df = id_to_elo_df.sort_values( + by=['cage', "subject_id"], ascending=True).reset_index(drop=True) if plot_flag: - for cage in all_cage_elo_rating_df["cage"].unique(): + for cage in all_elo_df["cage"].unique(): fig, ax = plt.subplots() plt.rcParams["figure.figsize"] = (18, 10) per_cage_df = \ - all_cage_elo_rating_df[all_cage_elo_rating_df["cage"] == cage] + all_elo_df[all_elo_df["cage"] == cage] for index in per_cage_df["index"].unique(): + col = "total_trial_number" first_session_in_trial = \ - per_cage_df[per_cage_df["index"] == index].iloc[0]["total_trial_number"] + per_cage_df[per_cage_df["index"] == index].iloc[0][col] plt.vlines(x=[first_session_in_trial - 0.5], ymin=700, ymax=1300, @@ -191,7 +187,8 @@ def _reward_competition(df, output_dir, plot_flag=True): # Drawing a line for each subject for subject in sorted(per_cage_df["subject_id"].unique()): # Getting all the rows with the current subject - subject_df = per_cage_df[per_cage_df["subject_id"] == subject] + col = "subject_id" + subject_df = per_cage_df[per_cage_df[col] == subject] # Making the dates into days after the first session by # subtracting all the dates by the first date plt.plot(subject_df["total_trial_number"], @@ -204,7 +201,9 @@ def _reward_competition(df, output_dir, plot_flag=True): ax.set_xlabel("Trial Number") ax.set_ylabel("Elo Score") ax.set_title( - "{} Elo Rating for {} {}".format("Rewards Competition", "TODO", str(cage))) + "{} Elo Rating for {} {}".format( + "Rewards Competition", cohort, str(cage)) + ) # To show the legend ax.legend(loc="upper left") plt.xticks(rotation=90) @@ -214,53 +213,49 @@ def _reward_competition(df, output_dir, plot_flag=True): if not os.path.exists(output_dir): os.makedirs(output_dir) - plt.savefig( - os.path.join(output_dir, - "reward_competition_cage" + str(cage) + ".png")) + file_name = "reward_competition_cage" + str(cage) + ".png" + plt.savefig(os.path.join(output_dir, file_name)) - path = os.path.join( - output_dir, "reward_competition_cage" + all_cages + ".csv") + file_name = "reward_competition_cage" + all_cages + ".csv" + path = os.path.join(output_dir, file_name) - id_to_final_elo_rating_df.to_csv(path, index=False) + id_to_elo_df.to_csv(path, index=False) return None -def general_processing(file_info, output_dir, plot_flag=True): - """ - This function takes in a dataframe and processes elo score for home_cage_observation, urine_marking, - or test_tube protocols - Args (3 total, 3 required): - file_info (dict): dictionary with file names as key and value as a dictionary of - file information with the following properties: - file_path (str): path to file - protocol (str): protocol name - sheet (list): list of sheet names - cohort (str): cohort name +def __process(df, protocol, cohort, sheet, output_dir, plot_flag=True): + """ + This private function takes in a dataframe and processes the elo score + for home_cage_observation, urine_marking, or test_tube protocols + Args (6 total, 5 required): + df (pandas dataframe): dataframe to be processed + protocol (str): protocol name + cohort (str): cohort name + sheet (str): sheet name output_dir (str): path to output directory plot_flag (bool): flag to plot data, default True - Return(None): None - """ - def process(df, protocol, cohort, output_dir, plot_flag): + """ # Initializing column names - find_col_names = df[df.apply(lambda row: 'winner' in row.values, axis=1)] + find_col_names = df[df.apply( + lambda row: 'winner' in row.values, axis=1)] if not find_col_names.empty: df.columns = find_col_names.iloc[0] df = df[df.index != find_col_names.index[0]] # check if there is a cage number col - mode_cage_val = None + mode_cage = None cage_num = False # finding column names for winner, loser, and tie winner_col, tie_col, loser_col = None, None, None for col in df.columns.tolist(): if "cage" in col.lower(): # filling all cage values with mode - mode_cage_val = df['cage #'].mode().iloc[0] - df['cage#'] = mode_cage_val + mode_cage = df['cage #'].mode().iloc[0] + df['cage#'] = mode_cage cage_num = True if "winner" in col.lower(): winner_col = col @@ -276,9 +271,9 @@ def process(df, protocol, cohort, output_dir, plot_flag): if not cage_num: try: new_sheet_name = sheet.lower().replace("cage", "") - mode_cage_val = int(new_sheet_name) - df['cage#'] = mode_cage_val - except: + mode_cage = int(new_sheet_name) + df['cage#'] = mode_cage + except ValueError: print("Cage# cannot be determined") return None @@ -304,16 +299,20 @@ def process(df, protocol, cohort, output_dir, plot_flag): if tie_col: df[tie_col] = df[tie_col].notna() - elo_calc = calculation.iterate_elo_rating_calculation_for_dataframe(dataframe=df, winner_id_column=winner_col, - loser_id_column=loser_col, - tie_column=tie_col) + elo_calc = calculation.iterate_elo_rating_calculation_for_dataframe( + dataframe=df, winner_id_column=winner_col, + loser_id_column=loser_col, + tie_column=tie_col + ) elo_df = pd.DataFrame.from_dict(elo_calc, orient='index') elo_df.groupby("subject_id").count() cage_to_strain = {} if cage_to_strain: - elo_df["subject_strain"] = elo_df["cage_num_of_subject"].map(cage_to_strain) - elo_df["agent_strain"] = elo_df["cage_num_of_agent"].map(cage_to_strain) + elo_df["subject_strain"] = \ + elo_df["cage_num_of_subject"].map(cage_to_strain) + elo_df["agent_strain"] = \ + elo_df["cage_num_of_agent"].map(cage_to_strain) elo_df["experiment_type"] = protocol elo_df["cohort"] = cohort @@ -328,32 +327,60 @@ def process(df, protocol, cohort, output_dir, plot_flag): fig, ax = plt.subplots() # adjusting session number difference - elo_df['session_number_difference'] = \ - df['session_number_difference'].repeat(2).reset_index(drop=True) + col = "session_number_difference" + elo_df[col] = df[col].repeat(2).reset_index(drop=True) - for index, row in elo_df[elo_df['session_number_difference'].astype(bool)].iterrows(): + for index, row in elo_df[elo_df[col].astype(bool)].iterrows(): # Offsetting by 0.5 to avoid drawing the line on the dot - # Drawing the lines a little above the max and a little below the minimum - plt.vlines(x=[row["total_match_number"] - 0.5], ymin=min_elo_rating - 50, ymax=max_elo_rating + 50, - colors='black', linestyle='dashed') + # Drawing the lines above the max and below the minimum + plt.vlines(x=[row["total_match_number"] - 0.5], + ymin=min_elo_rating - 50, + ymax=max_elo_rating + 50, + colors='black', + linestyle='dashed') for subject in sorted(elo_df["subject_id"].unique()): # Getting all the rows with the current subject subject_dataframe = elo_df[elo_df["subject_id"] == subject] # Making the current match number the X-Axis - plt.plot(subject_dataframe["total_match_number"], subject_dataframe["updated_elo_rating"], '-o', + plt.plot(subject_dataframe["total_match_number"], + subject_dataframe["updated_elo_rating"], + '-o', label=subject) # plt.show() ax.set_xlabel("Trial Number") ax.set_ylabel("Elo rating") - ax.set_title( - "{} Elo Rating for {} {}".format(protocol, cohort, "Cage #" + str(mode_cage_val))) + tite = "{} Elo Rating for {} {}".format(protocol, + cohort, + "Cage #" + str(mode_cage)) + ax.set_title(tite) ax.legend(loc="upper left") plt.ylim(min_elo_rating - 50, max_elo_rating + 50) - fig.savefig(os.path.join(output_dir, protocol + "_cage" + str(mode_cage_val) + ".png")) + file_name = protocol + "_cage" + str(mode_cage) + ".png" + fig.savefig(os.path.join(output_dir, file_name)) # Saving df csv to output dir - elo_df.to_csv(os.path.join(output_dir, protocol + "_cage" + str(mode_cage_val) + ".csv"), index=False) + file_name = protocol + "_cage" + str(mode_cage) + ".csv" + elo_df.to_csv(os.path.join(output_dir, file_name), index=False) + +def generate_elo_scores(file_info, output_dir, plot_flag=True): + """ + This function takes in a dataframe and processes elo score for + home_cage_observation, urine_marking, or test_tube protocols + Args (3 total, 3 required): + file_info (dict): + dictionary with file names as key and value as a dictionary of + file information with the following properties: + file_path (str): path to file + protocol (str): protocol name + sheet (list): list of sheet names + cohort (str): cohort name + output_dir (str): path to output directory + plot_flag (bool): flag to plot data, default True + + Return(None): + None + """ for file_name, file_data in file_info.items(): file_path = file_data["file_path"] @@ -364,6 +391,14 @@ def process(df, protocol, cohort, output_dir, plot_flag): for sheet in sheets: data = pd.read_excel(xls, sheet_name=sheet) if protocol == "reward_competition": - _reward_competition(df=data, output_dir=output_dir, plot_flag=plot_flag) + __reward_competition(df=data, + cohort=cohort, + output_dir=output_dir, + plot_flag=plot_flag) else: - process(df=data, protocol=protocol, cohort=cohort, output_dir=output_dir, plot_flag=plot_flag) + __process(df=data, + protocol=protocol, + cohort=cohort, + sheet=sheet, + output_dir=output_dir, + plot_flag=plot_flag) From b8958a918f034f3467c9507099da1817fca3a750 Mon Sep 17 00:00:00 2001 From: Chaitra Peddireddy Date: Wed, 11 Oct 2023 10:40:22 -0400 Subject: [PATCH 09/15] Fix issue import issue in __init__.py --- pc_mouseparty/rank/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pc_mouseparty/rank/__init__.py b/pc_mouseparty/rank/__init__.py index bd20cc5..e69de29 100644 --- a/pc_mouseparty/rank/__init__.py +++ b/pc_mouseparty/rank/__init__.py @@ -1,2 +0,0 @@ -from .elo_score import generate_elo_scores -__all__ = ['generate_elo_scores'] From 51f645bd6c4427e0132fe447b62670727f5fd1de Mon Sep 17 00:00:00 2001 From: Chaitra Peddireddy Date: Mon, 13 Nov 2023 09:04:40 -0500 Subject: [PATCH 10/15] Changed File Struct for Rank Dir --- pc_mouseparty/rank/{elorating => }/calculation.py | 0 pc_mouseparty/rank/elo_score.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename pc_mouseparty/rank/{elorating => }/calculation.py (100%) diff --git a/pc_mouseparty/rank/elorating/calculation.py b/pc_mouseparty/rank/calculation.py similarity index 100% rename from pc_mouseparty/rank/elorating/calculation.py rename to pc_mouseparty/rank/calculation.py diff --git a/pc_mouseparty/rank/elo_score.py b/pc_mouseparty/rank/elo_score.py index ff778ec..628c6db 100644 --- a/pc_mouseparty/rank/elo_score.py +++ b/pc_mouseparty/rank/elo_score.py @@ -3,7 +3,7 @@ import matplotlib.pyplot as plt import re from collections import defaultdict -from elorating import calculation +from pc_mouseparty.rank import calculation import warnings From f8b4d32135f348e8afd556e2e4dcfd7b64c5e5fd Mon Sep 17 00:00:00 2001 From: Chaitra Peddireddy Date: Wed, 20 Dec 2023 13:28:58 -0500 Subject: [PATCH 11/15] Changed rank module structure --- .gitignore | 2 + pc_mouseparty/rank/elo_score.py | 2 +- pc_mouseparty/rank/elorating/calculation.py | 198 -------------------- 3 files changed, 3 insertions(+), 199 deletions(-) delete mode 100644 pc_mouseparty/rank/elorating/calculation.py diff --git a/.gitignore b/.gitignore index f8e6b96..1001a93 100644 --- a/.gitignore +++ b/.gitignore @@ -105,3 +105,5 @@ ENV/ # IDE settings .vscode/ .idea/ + +.DS_Store diff --git a/pc_mouseparty/rank/elo_score.py b/pc_mouseparty/rank/elo_score.py index 628c6db..732f6a3 100644 --- a/pc_mouseparty/rank/elo_score.py +++ b/pc_mouseparty/rank/elo_score.py @@ -3,7 +3,7 @@ import matplotlib.pyplot as plt import re from collections import defaultdict -from pc_mouseparty.rank import calculation +from . import calculation import warnings diff --git a/pc_mouseparty/rank/elorating/calculation.py b/pc_mouseparty/rank/elorating/calculation.py deleted file mode 100644 index 35f175c..0000000 --- a/pc_mouseparty/rank/elorating/calculation.py +++ /dev/null @@ -1,198 +0,0 @@ -#!/usr/bin/env python3 -"""Elo Rating Calculator - -Based on: https://www.omnicalculator.com/sports/elo -""" -import operator -from collections import defaultdict -import pandas as pd - - -def calculate_elo_rating(subject_elo_rating, agent_elo_rating, k_factor=20, score=1, number_of_decimals=1): - """ - Calculates the Elo rating of a given subject given it's original score, it's opponent, - the K-Factor, and whether or not it has won or not. - The calculation is based on: https://www.omnicalculator.com/sports/elo - - Args: - subject_elo_rating(float): The original Elo rating for the subject - agent_elo_rating(float): The original Elo rating for the agent - k_factor(int): k-factor, or development coefficient. - - It usually takes values between 10 and 40, depending on player's strength - score(int): the actual outcome of the game. - - In chess, a win counts as 1 point, a draw is equal to 0.5, and a lose gives 0. - number_of_decimals(int): Number of decimals to round to - - Returns: - int: Updated Elo rating of the subject - """ - # Calculating the Elo rating - rating_difference = agent_elo_rating - subject_elo_rating - expected_score = 1 / (1 + 10 ** (rating_difference / 400)) - new_elo_rating = subject_elo_rating + k_factor * (score - expected_score) - # Rounding to `number_of_decimals` - return round(new_elo_rating, number_of_decimals) - - -def update_elo_rating(winner_id, loser_id, id_to_elo_rating=None, default_elo_rating=1000, \ - winner_score=1, loser_score=0, **calculate_elo_rating_params): - """ - Updates the Elo rating in a dictionary that contains the ID of the subject as keys, - and the Elo rating as the values. You can also adjust how the Elo rating is calculated with 'calculate_elo_rating_params'. - - Args: - winner_id(str): ID of the winner - loser_id(str): ID of the loser - id_to_elo_rating(dict): Dict that has the ID of the subjects as keys to the Elo Score as values - default_elo_rating(int): The default Elo rating to be used if there is not elo score for the specified ID - **calculate_elo_rating_params(kwargs): Other params for the calculate_elo_rating to change how the Elo rating is calculated - - Returns: - Dict: Dict that has the ID of the subjects as keys to the Elo Score as values - """ - if id_to_elo_rating is None: - id_to_elo_rating = defaultdict(lambda: default_elo_rating) - - # Getting the current Elo Score - current_winner_rating = id_to_elo_rating[winner_id] - current_loser_rating = id_to_elo_rating[loser_id] - - # Calculating Elo rating - id_to_elo_rating[winner_id] = calculate_elo_rating(subject_elo_rating=current_winner_rating, \ - agent_elo_rating=current_loser_rating, score=winner_score, - **calculate_elo_rating_params) - id_to_elo_rating[loser_id] = calculate_elo_rating(subject_elo_rating=current_loser_rating, \ - agent_elo_rating=current_winner_rating, score=loser_score, - **calculate_elo_rating_params) - - return id_to_elo_rating - - -def get_ranking_from_elo_rating_dictionary(input_dict, subject_id): - """ - Orders a dictionary of subject ID keys to ELO score values by ELO score. - And then gets the rank of the subject with the inputted ID. - Lower ranks like 1 would represent those subjects with higher ELO scores and vice versa. - - Args: - input_dict(dict): - Dictionary of subject ID keys to ELO score values - subject_id(str, int, or any value that's a key in input dict): - The ID of the subject that you want the ranking of - - Returns: - int: - Ranking of the subject with the ID inputted - """ - # Sorting the subject ID's by ELO score - sorted_subject_to_elo_rating = sorted(input_dict.items(), key=operator.itemgetter(1), reverse=True) - # Getting the rank of the subject based on ELO score - return [subject_tuple[0] for subject_tuple in sorted_subject_to_elo_rating].index(subject_id) + 1 - - -def iterate_elo_rating_calculation_for_dataframe(dataframe, winner_id_column, loser_id_column, tie_column=None, - additional_columns=None): - """ - Iterates through a dataframe that has the ID of winners and losers for a given event. - A dictionary will be created that contains the information of the event, - which can then be turned into a dataframe. Each key is either from winner or loser's perspective. - - Args: - dataframe(Pandas DataFrame): - winner_id_column(str): The name of the column that has the winner's ID - loser_id_column(str): The name of the column that has the loser's ID - additional_columns(list): Additional columns to take from the - - Returns: - Dict: With a key value pair for each event either from the winner or loser's perspective. - This can be turned into a dataframe with each key value pair being a row. - """ - if additional_columns is None: - additional_columns = [] - - # Dictionary that keeps track of the current Elo rating of the subject - id_to_elo_rating = defaultdict(lambda: 1000) - # Dictionary that will be converted to a DataFrame - index_to_elo_rating_and_meta_data = defaultdict(dict) - - # Indexes that will identify which row the dictionary key value pair will be - # The number of the index has no significance other than being the number of the row - all_indexes = iter(range(0, 99999)) - - # Keeping track of the number of matches - total_match_number = 1 - - # Making a copy in case there is an error with changing the type of the tie column - copied_dataframe = dataframe.copy() - # Changing the tie column type to bool - # So that we can filter out for booleans including False and 0 - try: - copied_dataframe[tie_column] = copied_dataframe[tie_column].astype(bool) - except: - copied_dataframe = dataframe.copy() - - for index, row in copied_dataframe.dropna(subset=winner_id_column).iterrows(): - # Getting the ID of the winner subject - winner_id = row[winner_id_column] - # Getting the ID of the loser subject - loser_id = row[loser_id_column] - - # Getting the current Elo Score - current_winner_rating = id_to_elo_rating[winner_id] - current_loser_rating = id_to_elo_rating[loser_id] - - if tie_column: - # When there is nothing in the tie column - # Or when there is a false value indicating that it is not a tie - if pd.isna(copied_dataframe[tie_column][index]) or ~(copied_dataframe[tie_column][index]).any(): - winner_score = 1 - loser_score = 0 - # When there is value in the tie column - else: - winner_score = 0.5 - loser_score = 0.5 - # When there is no tie column - else: - winner_score = 1 - loser_score = 0 - - # Updating the dictionary with ID keys and Elo Score values - update_elo_rating(winner_id=winner_id, loser_id=loser_id, id_to_elo_rating=id_to_elo_rating, \ - winner_score=winner_score, loser_score=loser_score) - - # Saving all the data for the winner - winner_index = next(all_indexes) - index_to_elo_rating_and_meta_data[winner_index]["total_match_number"] = total_match_number - index_to_elo_rating_and_meta_data[winner_index]["subject_id"] = winner_id - index_to_elo_rating_and_meta_data[winner_index]["agent_id"] = loser_id - index_to_elo_rating_and_meta_data[winner_index]["original_elo_rating"] = current_winner_rating - index_to_elo_rating_and_meta_data[winner_index]["updated_elo_rating"] = id_to_elo_rating[winner_id] - index_to_elo_rating_and_meta_data[winner_index]["win_draw_loss"] = winner_score - index_to_elo_rating_and_meta_data[winner_index]["subject_ranking"] = get_ranking_from_elo_rating_dictionary( - id_to_elo_rating, winner_id) - index_to_elo_rating_and_meta_data[winner_index]["agent_ranking"] = get_ranking_from_elo_rating_dictionary( - id_to_elo_rating, loser_id) - index_to_elo_rating_and_meta_data[winner_index]["pairing_index"] = 0 - for column in additional_columns: - index_to_elo_rating_and_meta_data[winner_index][column] = row[column] - - # Saving all the data for the loser - loser_index = next(all_indexes) - index_to_elo_rating_and_meta_data[loser_index]["total_match_number"] = total_match_number - index_to_elo_rating_and_meta_data[loser_index]["subject_id"] = loser_id - index_to_elo_rating_and_meta_data[loser_index]["agent_id"] = winner_id - index_to_elo_rating_and_meta_data[loser_index]["original_elo_rating"] = current_loser_rating - index_to_elo_rating_and_meta_data[loser_index]["updated_elo_rating"] = id_to_elo_rating[loser_id] - index_to_elo_rating_and_meta_data[loser_index]["win_draw_loss"] = loser_score - index_to_elo_rating_and_meta_data[loser_index]["subject_ranking"] = get_ranking_from_elo_rating_dictionary( - id_to_elo_rating, loser_id) - index_to_elo_rating_and_meta_data[loser_index]["agent_ranking"] = get_ranking_from_elo_rating_dictionary( - id_to_elo_rating, winner_id) - index_to_elo_rating_and_meta_data[loser_index]["pairing_index"] = 1 - for column in additional_columns: - index_to_elo_rating_and_meta_data[loser_index][column] = row[column] - - # Updating the match number - total_match_number += 1 - - return index_to_elo_rating_and_meta_data From 786c112ba1913cd2284b5b02425f398c811f7893 Mon Sep 17 00:00:00 2001 From: Chaitra Peddireddy Date: Fri, 29 Dec 2023 22:34:32 -0500 Subject: [PATCH 12/15] Flake8 for rank dir --- pc_mouseparty/rank/calculation.py | 167 ++++++++++++++++++------------ 1 file changed, 103 insertions(+), 64 deletions(-) diff --git a/pc_mouseparty/rank/calculation.py b/pc_mouseparty/rank/calculation.py index 35f175c..ad75b94 100644 --- a/pc_mouseparty/rank/calculation.py +++ b/pc_mouseparty/rank/calculation.py @@ -8,19 +8,24 @@ import pandas as pd -def calculate_elo_rating(subject_elo_rating, agent_elo_rating, k_factor=20, score=1, number_of_decimals=1): +def calculate_elo_rating(subject_elo_rating, + agent_elo_rating, + k_factor=20, score=1, + number_of_decimals=1): """ - Calculates the Elo rating of a given subject given it's original score, it's opponent, - the K-Factor, and whether or not it has won or not. + Calculates the Elo rating of a given subject given it's original score, + it's opponent, the K-Factor, and whether or not it has won or not. The calculation is based on: https://www.omnicalculator.com/sports/elo Args: subject_elo_rating(float): The original Elo rating for the subject agent_elo_rating(float): The original Elo rating for the agent k_factor(int): k-factor, or development coefficient. - - It usually takes values between 10 and 40, depending on player's strength + - It usually takes values between 10 and 40, depending on + player's strength score(int): the actual outcome of the game. - - In chess, a win counts as 1 point, a draw is equal to 0.5, and a lose gives 0. + - In chess, a win counts as 1 point, a draw is equal to 0.5, + and a lose gives 0. number_of_decimals(int): Number of decimals to round to Returns: @@ -34,21 +39,30 @@ def calculate_elo_rating(subject_elo_rating, agent_elo_rating, k_factor=20, scor return round(new_elo_rating, number_of_decimals) -def update_elo_rating(winner_id, loser_id, id_to_elo_rating=None, default_elo_rating=1000, \ - winner_score=1, loser_score=0, **calculate_elo_rating_params): +def update_elo_rating(winner_id, + loser_id, + id_to_elo_rating=None, + default_elo_rating=1000, + winner_score=1, + loser_score=0, + **calculate_elo_rating_params): """ - Updates the Elo rating in a dictionary that contains the ID of the subject as keys, - and the Elo rating as the values. You can also adjust how the Elo rating is calculated with 'calculate_elo_rating_params'. + Updates the Elo rating in a dictionary that contains the ID of the subject + as keys, and the Elo rating as the values. You can also adjust how the Elo + rating is calculated with 'calculate_elo_rating_params'. Args: winner_id(str): ID of the winner loser_id(str): ID of the loser - id_to_elo_rating(dict): Dict that has the ID of the subjects as keys to the Elo Score as values - default_elo_rating(int): The default Elo rating to be used if there is not elo score for the specified ID - **calculate_elo_rating_params(kwargs): Other params for the calculate_elo_rating to change how the Elo rating is calculated + id_to_elo_rating(dict): Dict that has the ID of the subjects as keys + to the Elo Score as values + default_elo_rating(int): The default Elo rating to be used if there is + not elo score for the specified ID + **calculate_elo_rating_params(kwargs): Other params for the + calculate_elo_rating to change how the Elo rating is calculated Returns: - Dict: Dict that has the ID of the subjects as keys to the Elo Score as values + Dict: Dict that has the subjects IDs (keys) to the Elo Score as vals """ if id_to_elo_rating is None: id_to_elo_rating = defaultdict(lambda: default_elo_rating) @@ -58,12 +72,17 @@ def update_elo_rating(winner_id, loser_id, id_to_elo_rating=None, default_elo_ra current_loser_rating = id_to_elo_rating[loser_id] # Calculating Elo rating - id_to_elo_rating[winner_id] = calculate_elo_rating(subject_elo_rating=current_winner_rating, \ - agent_elo_rating=current_loser_rating, score=winner_score, - **calculate_elo_rating_params) - id_to_elo_rating[loser_id] = calculate_elo_rating(subject_elo_rating=current_loser_rating, \ - agent_elo_rating=current_winner_rating, score=loser_score, - **calculate_elo_rating_params) + id_to_elo_rating[winner_id] = calculate_elo_rating( + subject_elo_rating=current_winner_rating, + agent_elo_rating=current_loser_rating, + score=winner_score, + **calculate_elo_rating_params) + + id_to_elo_rating[loser_id] = \ + calculate_elo_rating(subject_elo_rating=current_loser_rating, + agent_elo_rating=current_winner_rating, + score=loser_score, + **calculate_elo_rating_params) return id_to_elo_rating @@ -72,7 +91,8 @@ def get_ranking_from_elo_rating_dictionary(input_dict, subject_id): """ Orders a dictionary of subject ID keys to ELO score values by ELO score. And then gets the rank of the subject with the inputted ID. - Lower ranks like 1 would represent those subjects with higher ELO scores and vice versa. + Lower ranks like 1 would represent those subjects with higher ELO scores + and vice versa. Args: input_dict(dict): @@ -85,17 +105,26 @@ def get_ranking_from_elo_rating_dictionary(input_dict, subject_id): Ranking of the subject with the ID inputted """ # Sorting the subject ID's by ELO score - sorted_subject_to_elo_rating = sorted(input_dict.items(), key=operator.itemgetter(1), reverse=True) + sorted_elo = sorted(input_dict.items(), + key=operator.itemgetter(1), + reverse=True) # Getting the rank of the subject based on ELO score - return [subject_tuple[0] for subject_tuple in sorted_subject_to_elo_rating].index(subject_id) + 1 + rank = [subject_tuple[0] for subject_tuple in sorted_elo].index(subject_id) + rank += 1 + return rank -def iterate_elo_rating_calculation_for_dataframe(dataframe, winner_id_column, loser_id_column, tie_column=None, +def iterate_elo_rating_calculation_for_dataframe(dataframe, + winner_id_column, + loser_id_column, + tie_column=None, additional_columns=None): """ - Iterates through a dataframe that has the ID of winners and losers for a given event. + Iterates through a dataframe that has the ID of winners and losers for + a given event. A dictionary will be created that contains the information of the event, - which can then be turned into a dataframe. Each key is either from winner or loser's perspective. + which can then be turned into a dataframe. Each key is either from winner + or loser's perspective. Args: dataframe(Pandas DataFrame): @@ -104,8 +133,10 @@ def iterate_elo_rating_calculation_for_dataframe(dataframe, winner_id_column, lo additional_columns(list): Additional columns to take from the Returns: - Dict: With a key value pair for each event either from the winner or loser's perspective. - This can be turned into a dataframe with each key value pair being a row. + Dict: With a key value pair for each event either from the winner or + loser's perspective. + This can be turned into a dataframe with each key value pair being + a row. """ if additional_columns is None: additional_columns = [] @@ -113,29 +144,30 @@ def iterate_elo_rating_calculation_for_dataframe(dataframe, winner_id_column, lo # Dictionary that keeps track of the current Elo rating of the subject id_to_elo_rating = defaultdict(lambda: 1000) # Dictionary that will be converted to a DataFrame - index_to_elo_rating_and_meta_data = defaultdict(dict) + elo_metadata = defaultdict(dict) - # Indexes that will identify which row the dictionary key value pair will be - # The number of the index has no significance other than being the number of the row + # Indexes that will identify which row the dictionary key value pair + # The num of index has no significance other than being the number of row all_indexes = iter(range(0, 99999)) # Keeping track of the number of matches total_match_number = 1 - # Making a copy in case there is an error with changing the type of the tie column + # Making a copy in case there is an error with changing the type of the tie copied_dataframe = dataframe.copy() # Changing the tie column type to bool # So that we can filter out for booleans including False and 0 try: - copied_dataframe[tie_column] = copied_dataframe[tie_column].astype(bool) - except: + copied_dataframe[tie_column] = \ + copied_dataframe[tie_column].astype(bool) + except KeyError: copied_dataframe = dataframe.copy() - for index, row in copied_dataframe.dropna(subset=winner_id_column).iterrows(): + for idx, rw in copied_dataframe.dropna(subset=winner_id_column).iterrows(): # Getting the ID of the winner subject - winner_id = row[winner_id_column] + winner_id = rw[winner_id_column] # Getting the ID of the loser subject - loser_id = row[loser_id_column] + loser_id = rw[loser_id_column] # Getting the current Elo Score current_winner_rating = id_to_elo_rating[winner_id] @@ -144,7 +176,8 @@ def iterate_elo_rating_calculation_for_dataframe(dataframe, winner_id_column, lo if tie_column: # When there is nothing in the tie column # Or when there is a false value indicating that it is not a tie - if pd.isna(copied_dataframe[tie_column][index]) or ~(copied_dataframe[tie_column][index]).any(): + if (pd.isna(copied_dataframe[tie_column][idx]) or + ~(copied_dataframe[tie_column][idx]).any()): winner_score = 1 loser_score = 0 # When there is value in the tie column @@ -157,42 +190,48 @@ def iterate_elo_rating_calculation_for_dataframe(dataframe, winner_id_column, lo loser_score = 0 # Updating the dictionary with ID keys and Elo Score values - update_elo_rating(winner_id=winner_id, loser_id=loser_id, id_to_elo_rating=id_to_elo_rating, \ - winner_score=winner_score, loser_score=loser_score) + update_elo_rating(winner_id=winner_id, + loser_id=loser_id, + id_to_elo_rating=id_to_elo_rating, + winner_score=winner_score, + loser_score=loser_score) # Saving all the data for the winner winner_index = next(all_indexes) - index_to_elo_rating_and_meta_data[winner_index]["total_match_number"] = total_match_number - index_to_elo_rating_and_meta_data[winner_index]["subject_id"] = winner_id - index_to_elo_rating_and_meta_data[winner_index]["agent_id"] = loser_id - index_to_elo_rating_and_meta_data[winner_index]["original_elo_rating"] = current_winner_rating - index_to_elo_rating_and_meta_data[winner_index]["updated_elo_rating"] = id_to_elo_rating[winner_id] - index_to_elo_rating_and_meta_data[winner_index]["win_draw_loss"] = winner_score - index_to_elo_rating_and_meta_data[winner_index]["subject_ranking"] = get_ranking_from_elo_rating_dictionary( - id_to_elo_rating, winner_id) - index_to_elo_rating_and_meta_data[winner_index]["agent_ranking"] = get_ranking_from_elo_rating_dictionary( - id_to_elo_rating, loser_id) - index_to_elo_rating_and_meta_data[winner_index]["pairing_index"] = 0 + elo_metadata[winner_index]["total_match_number"] = total_match_number + elo_metadata[winner_index]["subject_id"] = winner_id + elo_metadata[winner_index]["agent_id"] = loser_id + elo_metadata[winner_index]["original_elo_rating"] = \ + current_winner_rating + elo_metadata[winner_index]["updated_elo_rating"] = \ + id_to_elo_rating[winner_id] + elo_metadata[winner_index]["win_draw_loss"] = winner_score + elo_metadata[winner_index]["subject_ranking"] = \ + get_ranking_from_elo_rating_dictionary(id_to_elo_rating, winner_id) + elo_metadata[winner_index]["agent_ranking"] = \ + get_ranking_from_elo_rating_dictionary(id_to_elo_rating, loser_id) + elo_metadata[winner_index]["pairing_index"] = 0 for column in additional_columns: - index_to_elo_rating_and_meta_data[winner_index][column] = row[column] + elo_metadata[winner_index][column] = rw[column] # Saving all the data for the loser loser_index = next(all_indexes) - index_to_elo_rating_and_meta_data[loser_index]["total_match_number"] = total_match_number - index_to_elo_rating_and_meta_data[loser_index]["subject_id"] = loser_id - index_to_elo_rating_and_meta_data[loser_index]["agent_id"] = winner_id - index_to_elo_rating_and_meta_data[loser_index]["original_elo_rating"] = current_loser_rating - index_to_elo_rating_and_meta_data[loser_index]["updated_elo_rating"] = id_to_elo_rating[loser_id] - index_to_elo_rating_and_meta_data[loser_index]["win_draw_loss"] = loser_score - index_to_elo_rating_and_meta_data[loser_index]["subject_ranking"] = get_ranking_from_elo_rating_dictionary( - id_to_elo_rating, loser_id) - index_to_elo_rating_and_meta_data[loser_index]["agent_ranking"] = get_ranking_from_elo_rating_dictionary( - id_to_elo_rating, winner_id) - index_to_elo_rating_and_meta_data[loser_index]["pairing_index"] = 1 + elo_metadata[loser_index]["total_match_number"] = total_match_number + elo_metadata[loser_index]["subject_id"] = loser_id + elo_metadata[loser_index]["agent_id"] = winner_id + elo_metadata[loser_index]["original_elo_rating"] = current_loser_rating + elo_metadata[loser_index]["updated_elo_rating"] = \ + id_to_elo_rating[loser_id] + elo_metadata[loser_index]["win_draw_loss"] = loser_score + elo_metadata[loser_index]["subject_ranking"] = \ + get_ranking_from_elo_rating_dictionary(id_to_elo_rating, loser_id) + elo_metadata[loser_index]["agent_ranking"] = \ + get_ranking_from_elo_rating_dictionary(id_to_elo_rating, winner_id) + elo_metadata[loser_index]["pairing_index"] = 1 for column in additional_columns: - index_to_elo_rating_and_meta_data[loser_index][column] = row[column] + elo_metadata[loser_index][column] = rw[column] # Updating the match number total_match_number += 1 - return index_to_elo_rating_and_meta_data + return elo_metadata From 8bc4dac12a32b80975b444af4f28bce0d2dd61ba Mon Sep 17 00:00:00 2001 From: Chaitra Peddireddy Date: Fri, 29 Dec 2023 22:40:42 -0500 Subject: [PATCH 13/15] Fixed import error --- pc_mouseparty/rank/elo_score.py | 265 ++++++++++++++++---------------- 1 file changed, 133 insertions(+), 132 deletions(-) diff --git a/pc_mouseparty/rank/elo_score.py b/pc_mouseparty/rank/elo_score.py index 732f6a3..7bb3162 100644 --- a/pc_mouseparty/rank/elo_score.py +++ b/pc_mouseparty/rank/elo_score.py @@ -223,145 +223,146 @@ def __reward_competition(df, cohort, output_dir, plot_flag=True): return None + def __process(df, protocol, cohort, sheet, output_dir, plot_flag=True): - """ - This private function takes in a dataframe and processes the elo score - for home_cage_observation, urine_marking, or test_tube protocols - Args (6 total, 5 required): - df (pandas dataframe): dataframe to be processed - protocol (str): protocol name - cohort (str): cohort name - sheet (str): sheet name - output_dir (str): path to output directory - plot_flag (bool): flag to plot data, default True - Return(None): - None - """ - # Initializing column names - - find_col_names = df[df.apply( - lambda row: 'winner' in row.values, axis=1)] - - if not find_col_names.empty: - df.columns = find_col_names.iloc[0] - df = df[df.index != find_col_names.index[0]] - - # check if there is a cage number col - mode_cage = None - cage_num = False - # finding column names for winner, loser, and tie - winner_col, tie_col, loser_col = None, None, None - for col in df.columns.tolist(): - if "cage" in col.lower(): - # filling all cage values with mode - mode_cage = df['cage #'].mode().iloc[0] - df['cage#'] = mode_cage - cage_num = True - if "winner" in col.lower(): - winner_col = col - if "loser" in col.lower(): - loser_col = col - if "tie" in col.lower(): - tie_col = col - - if not winner_col or not loser_col: - print("Winner or Loser column not found") + """ + This private function takes in a dataframe and processes the elo score + for home_cage_observation, urine_marking, or test_tube protocols + Args (6 total, 5 required): + df (pandas dataframe): dataframe to be processed + protocol (str): protocol name + cohort (str): cohort name + sheet (str): sheet name + output_dir (str): path to output directory + plot_flag (bool): flag to plot data, default True + Return(None): + None + """ + # Initializing column names + find_col_names = df[df.apply( + lambda row: 'winner' in row.values, axis=1)] + + if not find_col_names.empty: + df.columns = find_col_names.iloc[0] + df = df[df.index != find_col_names.index[0]] + + # check if there is a cage number col + mode_cage = None + cage_num = False + # finding column names for winner, loser, and tie + winner_col, tie_col, loser_col = None, None, None + for col in df.columns.tolist(): + if "cage" in col.lower(): + # filling all cage values with mode + mode_cage = df['cage #'].mode().iloc[0] + df['cage#'] = mode_cage + cage_num = True + if "winner" in col.lower(): + winner_col = col + if "loser" in col.lower(): + loser_col = col + if "tie" in col.lower(): + tie_col = col + + if not winner_col or not loser_col: + print("Winner or Loser column not found") + return None + + if not cage_num: + try: + new_sheet_name = sheet.lower().replace("cage", "") + mode_cage = int(new_sheet_name) + df['cage#'] = mode_cage + except ValueError: + print("Cage# cannot be determined") return None - if not cage_num: - try: - new_sheet_name = sheet.lower().replace("cage", "") - mode_cage = int(new_sheet_name) - df['cage#'] = mode_cage - except ValueError: - print("Cage# cannot be determined") - return None - - # drop cols if winner & loss is NaN - df = df.dropna(subset=['winner', 'loser'], how='all') - - # Autofill dates - df['date'] = pd.to_datetime(df['date'], errors='coerce') - df['date'].fillna(method='ffill', inplace=True) - - # Identify sessions based on date values - df['session_number_difference'] = 0 - previous_date = None - for index, row in df.iterrows(): - current_date = row['date'] - # check for session change - if not previous_date: - df.at[index, 'session_number_difference'] = 1 - elif previous_date is not None and current_date != previous_date: - df.at[index, 'session_number_difference'] = 1 - previous_date = current_date - # Elo Score from calculation.py - if tie_col: - df[tie_col] = df[tie_col].notna() - - elo_calc = calculation.iterate_elo_rating_calculation_for_dataframe( - dataframe=df, winner_id_column=winner_col, - loser_id_column=loser_col, - tie_column=tie_col - ) - elo_df = pd.DataFrame.from_dict(elo_calc, orient='index') - elo_df.groupby("subject_id").count() - - cage_to_strain = {} - if cage_to_strain: - elo_df["subject_strain"] = \ - elo_df["cage_num_of_subject"].map(cage_to_strain) - elo_df["agent_strain"] = \ - elo_df["cage_num_of_agent"].map(cage_to_strain) - elo_df["experiment_type"] = protocol - elo_df["cohort"] = cohort - - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - if plot_flag: - max_elo_rating = elo_df["updated_elo_rating"].max() - min_elo_rating = elo_df["updated_elo_rating"].min() - - plt.rcParams["figure.figsize"] = (13.5, 7.5) - fig, ax = plt.subplots() + # drop cols if winner & loss is NaN + df = df.dropna(subset=['winner', 'loser'], how='all') + + # Autofill dates + df['date'] = pd.to_datetime(df['date'], errors='coerce') + df['date'].fillna(method='ffill', inplace=True) + + # Identify sessions based on date values + df['session_number_difference'] = 0 + previous_date = None + for index, row in df.iterrows(): + current_date = row['date'] + # check for session change + if not previous_date: + df.at[index, 'session_number_difference'] = 1 + elif previous_date is not None and current_date != previous_date: + df.at[index, 'session_number_difference'] = 1 + previous_date = current_date + # Elo Score from calculation.py + if tie_col: + df[tie_col] = df[tie_col].notna() + + elo_calc = calculation.iterate_elo_rating_calculation_for_dataframe( + dataframe=df, winner_id_column=winner_col, + loser_id_column=loser_col, + tie_column=tie_col + ) + elo_df = pd.DataFrame.from_dict(elo_calc, orient='index') + elo_df.groupby("subject_id").count() - # adjusting session number difference - col = "session_number_difference" - elo_df[col] = df[col].repeat(2).reset_index(drop=True) + cage_to_strain = {} + if cage_to_strain: + elo_df["subject_strain"] = \ + elo_df["cage_num_of_subject"].map(cage_to_strain) + elo_df["agent_strain"] = \ + elo_df["cage_num_of_agent"].map(cage_to_strain) + elo_df["experiment_type"] = protocol + elo_df["cohort"] = cohort - for index, row in elo_df[elo_df[col].astype(bool)].iterrows(): - # Offsetting by 0.5 to avoid drawing the line on the dot - # Drawing the lines above the max and below the minimum - plt.vlines(x=[row["total_match_number"] - 0.5], - ymin=min_elo_rating - 50, - ymax=max_elo_rating + 50, - colors='black', - linestyle='dashed') - for subject in sorted(elo_df["subject_id"].unique()): - # Getting all the rows with the current subject - subject_dataframe = elo_df[elo_df["subject_id"] == subject] - # Making the current match number the X-Axis - plt.plot(subject_dataframe["total_match_number"], - subject_dataframe["updated_elo_rating"], - '-o', - label=subject) - # plt.show() - ax.set_xlabel("Trial Number") - ax.set_ylabel("Elo rating") + if not os.path.exists(output_dir): + os.makedirs(output_dir) - tite = "{} Elo Rating for {} {}".format(protocol, - cohort, - "Cage #" + str(mode_cage)) - ax.set_title(tite) - ax.legend(loc="upper left") - plt.ylim(min_elo_rating - 50, max_elo_rating + 50) - file_name = protocol + "_cage" + str(mode_cage) + ".png" - fig.savefig(os.path.join(output_dir, file_name)) + if plot_flag: + max_elo_rating = elo_df["updated_elo_rating"].max() + min_elo_rating = elo_df["updated_elo_rating"].min() + + plt.rcParams["figure.figsize"] = (13.5, 7.5) + fig, ax = plt.subplots() + + # adjusting session number difference + col = "session_number_difference" + elo_df[col] = df[col].repeat(2).reset_index(drop=True) + + for index, row in elo_df[elo_df[col].astype(bool)].iterrows(): + # Offsetting by 0.5 to avoid drawing the line on the dot + # Drawing the lines above the max and below the minimum + plt.vlines(x=[row["total_match_number"] - 0.5], + ymin=min_elo_rating - 50, + ymax=max_elo_rating + 50, + colors='black', + linestyle='dashed') + for subject in sorted(elo_df["subject_id"].unique()): + # Getting all the rows with the current subject + subject_dataframe = elo_df[elo_df["subject_id"] == subject] + # Making the current match number the X-Axis + plt.plot(subject_dataframe["total_match_number"], + subject_dataframe["updated_elo_rating"], + '-o', + label=subject) + # plt.show() + ax.set_xlabel("Trial Number") + ax.set_ylabel("Elo rating") + + tite = "{} Elo Rating for {} {}".format(protocol, + cohort, + "Cage #" + str(mode_cage)) + ax.set_title(tite) + ax.legend(loc="upper left") + plt.ylim(min_elo_rating - 50, max_elo_rating + 50) + file_name = protocol + "_cage" + str(mode_cage) + ".png" + fig.savefig(os.path.join(output_dir, file_name)) + + # Saving df csv to output dir + file_name = protocol + "_cage" + str(mode_cage) + ".csv" + elo_df.to_csv(os.path.join(output_dir, file_name), index=False) - # Saving df csv to output dir - file_name = protocol + "_cage" + str(mode_cage) + ".csv" - elo_df.to_csv(os.path.join(output_dir, file_name), index=False) def generate_elo_scores(file_info, output_dir, plot_flag=True): """ From 05e2d1ca0ca8abc5e13f7c105225b7be69aedd83 Mon Sep 17 00:00:00 2001 From: Chaitra Peddireddy Date: Fri, 29 Dec 2023 22:46:03 -0500 Subject: [PATCH 14/15] Fix Deepsourc py errors --- pc_mouseparty/rank/calculation.py | 14 +++++++++++--- pc_mouseparty/rank/elo_score.py | 1 - 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pc_mouseparty/rank/calculation.py b/pc_mouseparty/rank/calculation.py index ad75b94..7008f0f 100644 --- a/pc_mouseparty/rank/calculation.py +++ b/pc_mouseparty/rank/calculation.py @@ -197,7 +197,11 @@ def iterate_elo_rating_calculation_for_dataframe(dataframe, loser_score=loser_score) # Saving all the data for the winner - winner_index = next(all_indexes) + try: + winner_index = next(all_indexes) + except StopIteration: + print("There are more than 99999 rows in the dataframe. ") + continue elo_metadata[winner_index]["total_match_number"] = total_match_number elo_metadata[winner_index]["subject_id"] = winner_id elo_metadata[winner_index]["agent_id"] = loser_id @@ -214,8 +218,12 @@ def iterate_elo_rating_calculation_for_dataframe(dataframe, for column in additional_columns: elo_metadata[winner_index][column] = rw[column] - # Saving all the data for the loser - loser_index = next(all_indexes) + # Saving all the data for the loser + try: + loser_index = next(all_indexes) + except StopIteration: + print("There are more than 99999 rows in the dataframe. ") + continue elo_metadata[loser_index]["total_match_number"] = total_match_number elo_metadata[loser_index]["subject_id"] = loser_id elo_metadata[loser_index]["agent_id"] = winner_id diff --git a/pc_mouseparty/rank/elo_score.py b/pc_mouseparty/rank/elo_score.py index 7bb3162..9cde9e0 100644 --- a/pc_mouseparty/rank/elo_score.py +++ b/pc_mouseparty/rank/elo_score.py @@ -346,7 +346,6 @@ def __process(df, protocol, cohort, sheet, output_dir, plot_flag=True): subject_dataframe["updated_elo_rating"], '-o', label=subject) - # plt.show() ax.set_xlabel("Trial Number") ax.set_ylabel("Elo rating") From 667da74ce4b4d4fd14dcd2167d34b7369315562c Mon Sep 17 00:00:00 2001 From: Chaitra Peddireddy Date: Fri, 29 Dec 2023 22:50:43 -0500 Subject: [PATCH 15/15] fix deepsource py suggestions --- pc_mouseparty/rank/calculation.py | 2 +- pc_mouseparty/rank/elo_score.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pc_mouseparty/rank/calculation.py b/pc_mouseparty/rank/calculation.py index 7008f0f..e04daf6 100644 --- a/pc_mouseparty/rank/calculation.py +++ b/pc_mouseparty/rank/calculation.py @@ -197,7 +197,7 @@ def iterate_elo_rating_calculation_for_dataframe(dataframe, loser_score=loser_score) # Saving all the data for the winner - try: + try: winner_index = next(all_indexes) except StopIteration: print("There are more than 99999 rows in the dataframe. ") diff --git a/pc_mouseparty/rank/elo_score.py b/pc_mouseparty/rank/elo_score.py index 9cde9e0..e419c75 100644 --- a/pc_mouseparty/rank/elo_score.py +++ b/pc_mouseparty/rank/elo_score.py @@ -349,10 +349,9 @@ def __process(df, protocol, cohort, sheet, output_dir, plot_flag=True): ax.set_xlabel("Trial Number") ax.set_ylabel("Elo rating") - tite = "{} Elo Rating for {} {}".format(protocol, - cohort, - "Cage #" + str(mode_cage)) - ax.set_title(tite) + title = f"{protocol} Elo Rating for {cohort} Cage #{mode_cage}" + + ax.set_title(title) ax.legend(loc="upper left") plt.ylim(min_elo_rating - 50, max_elo_rating + 50) file_name = protocol + "_cage" + str(mode_cage) + ".png"