From dcf3528d6d8f20fe7ec1b0bdfaadcf6963198457 Mon Sep 17 00:00:00 2001
From: sronilsson <sronilsson@gmail.com>
Date: Fri, 17 Jan 2025 15:40:06 -0500
Subject: [PATCH] roi movemstats

---
 docs/nb/shap_example_2.ipynb                  |  20 +-
 setup.py                                      |   2 +-
 simba/data_processors/cuda/create_shap_log.py |   4 +
 simba/data_processors/cuda/statistics.py      | 104 ++++++-
 simba/data_processors/movement_calculator.py  | 176 +++---------
 .../timebins_movement_calculator.py           |  93 ++-----
 .../feature_extraction_supplement_mixin.py    |  41 +--
 simba/mixins/statistics_mixin.py              | 260 +++++++++++++++++-
 simba/mixins/timeseries_features_mixin.py     |   6 +-
 simba/mixins/train_model_mixin.py             |  22 +-
 simba/model/grid_search_rf.py                 |   4 +-
 simba/roi_tools/ROI_analyzer.py               |  92 +++----
 simba/roi_tools/ROI_time_bin_calculator.py    |  38 +--
 .../pop_ups/check_videos_seekable_pop_up.py   |   2 +-
 simba/utils/read_write.py                     |  18 +-
 simba/utils/warnings.py                       |   5 +
 16 files changed, 524 insertions(+), 363 deletions(-)

diff --git a/docs/nb/shap_example_2.ipynb b/docs/nb/shap_example_2.ipynb
index f7eeed327..8def1cfa7 100644
--- a/docs/nb/shap_example_2.ipynb
+++ b/docs/nb/shap_example_2.ipynb
@@ -25,7 +25,7 @@
    "source": [
     "from simba.mixins.train_model_mixin import TrainModelMixin\n",
     "from simba.mixins.config_reader import ConfigReader\n",
-    "from simba.utils.read_write import read_df, read_config_file\n",
+    "from simba.utils.read_write import read_config_file, read_pickle\n",
     "import glob"
    ]
   },
@@ -54,7 +54,7 @@
     "# READ IN THE CONFIG AND THE CLASSIFIER\n",
     "config = read_config_file(config_path=CONFIG_PATH)\n",
     "config_object = ConfigReader(config_path=CONFIG_PATH)\n",
-    "clf = read_df(file_path=CLASSIFIER_PATH, file_type='pickle')"
+    "clf = read_pickle(data_path=CLASSIFIER_PATH)"
    ]
   },
   {
@@ -192,15 +192,19 @@
     }
    ],
    "source": [
-    "TrainModelMixin().create_shap_log_mp(ini_file_path=CONFIG_PATH,\n",
-    "                                     rf_clf=clf,\n",
-    "                                     x_df=data,\n",
-    "                                     y_df=target_df,\n",
-    "                                     x_names=data.columns,\n",
+    "TrainModelMixin().create_shap_log_mp(rf_clf=clf,\n",
+    "                                     x=data,\n",
+    "                                     y=target_df,\n",
+    "                                     x_names=list(data.columns),\n",
     "                                     clf_name=CLASSIFIER_NAME,\n",
     "                                     cnt_present=COUNT_PRESENT,\n",
     "                                     cnt_absent=COUNT_ABSENT,\n",
-    "                                     save_path=config_object.logs_path)"
+    "                                     core_cnt=2,\n",
+    "                                     chunk_size=100,\n",
+    "                                     verbose=True,\n",
+    "                                     save_dir=config_object.logs_path,\n",
+    "                                     save_file_suffix=1,\n",
+    "                                     plot=True)"
    ]
   },
   {
diff --git a/setup.py b/setup.py
index 9d1fea2f4..b141880c3 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@
 # Setup configuration
 setuptools.setup(
     name="simba-uw-tf-dev",
-    version="2.4.8",
+    version="2.5.1",
     author="Simon Nilsson, Jia Jie Choong, Sophia Hwang",
     author_email="sronilsson@gmail.com",
     description="Toolkit for computer classification and analysis of behaviors in experimental animals",
diff --git a/simba/data_processors/cuda/create_shap_log.py b/simba/data_processors/cuda/create_shap_log.py
index f8eab416a..10f2df683 100644
--- a/simba/data_processors/cuda/create_shap_log.py
+++ b/simba/data_processors/cuda/create_shap_log.py
@@ -38,6 +38,10 @@ def create_shap_log(rf_clf: Union[str, os.PathLike, RandomForestClassifier],
        :width: 500
        :align: center
 
+    .. note::
+       (i) The SHAP library has to be built from git repo rather than pip: `pip install git+https://github.com/slundberg/shap.git`.
+       (ii) The scikit model cannot be built using max_depth > 31. You can set this in the SimBA config under [create ensemble settings][rf_max_depth], or `rf_max_depth` in the config CSV's.
+
     :param Union[str, os.PathLike, RandomForestClassifier] rf_clf: Trained RandomForestClassifier model or path to the saved model. Can be a string, os.PathLike object, or an instance of RandomForestClassifier.
     :param Union[pd.DataFrame, np.ndarray] x: Input features used for SHAP value computation. Can be a pandas DataFrame or numpy ndarray.
     :param Union[pd.DataFrame, pd.Series, np.ndarray] y:  Target labels corresponding to the input features. Can be a pandas DataFrame, pandas Series, or numpy ndarray with 0 and 1 values.
diff --git a/simba/data_processors/cuda/statistics.py b/simba/data_processors/cuda/statistics.py
index 7622addeb..3d81100e3 100644
--- a/simba/data_processors/cuda/statistics.py
+++ b/simba/data_processors/cuda/statistics.py
@@ -9,14 +9,16 @@
 import numpy as np
 from numba import cuda
 from scipy.spatial import ConvexHull
-
-from simba.utils.read_write import read_df
-
+from simba.utils.read_write import read_df, get_unique_values_in_iterable
+from simba.utils.warnings import GPUToolsWarning
 try:
     import cupy as cp
     from cupyx.scipy.spatial.distance import cdist
 except:
+    GPUToolsWarning(msg='GPU tools not detected, reverting to CPU')
+    from scipy.spatial.distance import cdist
     import numpy as cp
+
 try:
    from cuml.cluster import KMeans
 except:
@@ -500,6 +502,7 @@ def euclidean_distance_to_static_point(data: np.ndarray,
     :param pixels_per_millimeter: A scaling factor that indicates how many pixels correspond to one millimeter. Defaults to 1 if no scaling is necessary.
     :param centimeter:  A flag to indicate whether the output distances should be converted from millimeters to centimeters. If True, the result is divided by 10. Defaults to False (millimeters).
     :param batch_size: The number of points to process in each batch to avoid memory overflow on the GPU. The default  batch size is set to 65 million points (6.5e+7). Adjust this parameter based on GPU memory capacity.
+    :param batch_size: The number of points to process in each batch to avoid memory overflow on the GPU. The default  batch size is set to 65 million points (6.5e+7). Adjust this parameter based on GPU memory capacity.
     :return: A 1D array of distances between each point in `data` and the static `point`, either in millimeters or centimeters depending on the `centimeter` flag.
     :rtype: np.ndarray
     """
@@ -514,7 +517,7 @@ def euclidean_distance_to_static_point(data: np.ndarray,
         results[l:r]  = cdist(batch_data, point).astype(np.float32) / pixels_per_millimeter
     if centimeter:
         results = results / 10
-    return results.get
+    return results.get()
 
 
 def dunn_index(x: np.ndarray, y: np.ndarray) -> float:
@@ -654,3 +657,96 @@ def kmeans_cuml(data: np.ndarray,
 
     return (mdl.cluster_centers_, mdl.predict(data))
 
+
+
+def xie_beni(x: np.ndarray, y: np.ndarray) -> float:
+    """
+    Computes the Xie-Beni index for clustering evaluation.
+
+    The score is calculated as the ratio between the average intra-cluster variance and the squared minimum distance between cluster centroids. This ensures that the index penalizes both loosely packed clusters and clusters that are too close to each other.
+
+    A lower Xie-Beni index indicates better clustering quality, signifying well-separated and compact clusters.
+
+    .. seealso::
+       To compute Xie-Beni on the CPU, use :func:`~simba.mixins.statistics_mixin.Statistics.xie_beni`
+       Significant GPU savings detected at about 1m features, 25 clusters.
+
+    :param np.ndarray x: The dataset as a 2D NumPy array of shape (n_samples, n_features).
+    :param np.ndarray y: Cluster labels for each data point as a 1D NumPy array of shape (n_samples,).
+    :returns: The Xie-Beni score for the dataset.
+    :rtype: float
+
+    :example:
+    >>> from sklearn.datasets import make_blobs
+    >>> X, y = make_blobs(n_samples=100000, centers=40, n_features=600, random_state=0, cluster_std=0.3)
+    >>> xie_beni(x=X, y=y)
+
+    :references:
+    .. [1] X. L. Xie, G. Beni (1991). A validity measure for fuzzy clustering.
+           In: IEEE Transactions on Pattern Analysis and Machine Intelligence 13(8), 841 - 847. DOI: 10.1109/34.85677
+    """
+    check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+    check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_0_shape=[x.shape[0], ])
+    _ = get_unique_values_in_iterable(data=y, name=xie_beni.__name__, min=2)
+    x, y = cp.array(x), cp.array(y)
+    cluster_ids = cp.unique(y)
+    centroids = cp.full(shape=(cluster_ids.shape[0], x.shape[1]), fill_value=-1.0, dtype=cp.float32)
+    intra_centroid_distances = cp.full(shape=(y.shape[0]), fill_value=-1.0, dtype=cp.float32)
+    obs_cnt = 0
+    for cnt, cluster_id in enumerate(cluster_ids):
+        cluster_obs = x[cp.argwhere(y == cluster_id).flatten()]
+        centroids[cnt] = cp.mean(cluster_obs, axis=0)
+        intra_dist = cp.linalg.norm(cluster_obs - centroids[cnt], axis=1)
+        intra_centroid_distances[obs_cnt: cluster_obs.shape[0] + obs_cnt] = intra_dist
+        obs_cnt += cluster_obs.shape[0]
+    compactness = cp.mean(cp.square(intra_centroid_distances))
+    cluster_dists = cdist(centroids, centroids).flatten()
+    d = cp.sqrt(cluster_dists[cp.argwhere(cluster_dists > 0).flatten()])
+    separation = cp.min(d)
+    xb = compactness / separation
+    return xb
+
+
+def i_index(x: np.ndarray, y: np.ndarray):
+    """
+    Calculate the I-Index for evaluating clustering quality.
+
+    The I-Index is a metric that measures the compactness and separation of clusters.
+    A higher I-Index indicates better clustering with compact and well-separated clusters.
+
+    .. seealso::
+       To compute Xie-Beni on the CPU, use :func:`~simba.mixins.statistics_mixin.Statistics.i_index`
+
+    :param np.ndarray x: The dataset as a 2D NumPy array of shape (n_samples, n_features).
+    :param np.ndarray y: Cluster labels for each data point as a 1D NumPy array of shape (n_samples,).
+    :returns: The I-index score for the dataset.
+    :rtype: float
+
+    :references:
+        .. [1] Zhao, Q., Xu, M., Fränti, P. (2009). Sum-of-Squares Based Cluster Validity Index and Significance Analysis.
+               In: Kolehmainen, M., Toivanen, P., Beliczynski, B. (eds) Adaptive and Natural Computing Algorithms. ICANNGA 2009.
+                Lecture Notes in Computer Science, vol 5495. Springer, Berlin, Heidelberg. https://doi.org/10.1007/978-3-642-04921-7_32
+
+    :example:
+    >>> X, y = make_blobs(n_samples=5000, centers=20, n_features=3, random_state=0, cluster_std=0.1)
+    >>> i_index(x=X, y=y)
+    """
+    check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+    check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value,
+                      accepted_axis_0_shape=[x.shape[0], ])
+    _ = get_unique_values_in_iterable(data=y, name=i_index.__name__, min=2)
+    x, y = cp.array(x), cp.array(y)
+    unique_y = cp.unique(y)
+    n_y = unique_y.shape[0]
+    global_centroid = cp.mean(x, axis=0)
+    sst = cp.sum(cp.linalg.norm(x - global_centroid, axis=1) ** 2)
+
+    swc = 0
+    for cluster_cnt, cluster_id in enumerate(unique_y):
+        cluster_obs = x[cp.argwhere(y == cluster_id).flatten()]
+        cluster_centroid = cp.mean(cluster_obs, axis=0)
+        swc += cp.sum(cp.linalg.norm(cluster_obs - cluster_centroid, axis=1) ** 2)
+
+    i_idx = sst / (n_y * swc)
+
+    return i_idx
\ No newline at end of file
diff --git a/simba/data_processors/movement_calculator.py b/simba/data_processors/movement_calculator.py
index 7f11c4545..908b11ff5 100644
--- a/simba/data_processors/movement_calculator.py
+++ b/simba/data_processors/movement_calculator.py
@@ -2,18 +2,19 @@
 
 import os
 from statistics import mean
-from typing import List, Optional
+from typing import List, Optional, Tuple
 
 import numpy as np
 import pandas as pd
 
+from simba.mixins.feature_extraction_supplement_mixin import FeatureExtractionSupplemental
 from simba.feature_extractors.perimeter_jit import jitted_centroid
 from simba.mixins.config_reader import ConfigReader
 from simba.mixins.feature_extraction_mixin import FeatureExtractionMixin
-from simba.utils.checks import (check_if_filepath_list_is_empty,
-                                check_that_column_exist)
+from simba.utils.checks import (check_if_filepath_list_is_empty,check_that_column_exist, check_valid_array, check_float)
 from simba.utils.printing import stdout_success
 from simba.utils.read_write import get_fn_ext, read_df
+from simba.utils.enums import Formats
 
 
 class MovementCalculator(ConfigReader, FeatureExtractionMixin):
@@ -36,28 +37,17 @@ class MovementCalculator(ConfigReader, FeatureExtractionMixin):
 
     """
 
-    def __init__(
-        self,
-        config_path: str,
-        body_parts: List[str],
-        threshold: float = 0.00,
-        file_paths: Optional[List[str]] = None,
-    ):
+    def __init__(self,
+                 config_path: str,
+                 body_parts: List[str],
+                 threshold: float = 0.00,
+                 file_paths: Optional[List[str]] = None):
         ConfigReader.__init__(self, config_path=config_path)
         FeatureExtractionMixin.__init__(self)
-        self.save_path = os.path.join(
-            self.logs_path, "Movement_log_{}.csv".format(self.datetime)
-        )
-        self.file_paths, self.body_parts, self.threshold = (
-            file_paths,
-            body_parts,
-            threshold,
-        )
+        self.save_path = os.path.join(self.logs_path, "Movement_log_{}.csv".format(self.datetime))
+        self.file_paths, self.body_parts, self.threshold = (file_paths, body_parts, threshold)
         if not self.file_paths:
-            check_if_filepath_list_is_empty(
-                filepaths=self.outlier_corrected_paths,
-                error_msg=f"SIMBA ERROR: Cannot process movement. ZERO data files found in the {self.outlier_corrected_dir} directory.",
-            )
+            check_if_filepath_list_is_empty(filepaths=self.outlier_corrected_paths, error_msg=f"SIMBA ERROR: Cannot process movement. ZERO data files found in the {self.outlier_corrected_dir} directory.")
             self.file_paths = self.outlier_corrected_paths
         print(f"Processing {len(self.file_paths)} video(s)...")
 
@@ -65,29 +55,14 @@ def __find_body_part_columns(self):
         self.body_parts_dict, self.bp_list = {}, []
         for bp_cnt, bp_name in enumerate(self.body_parts):
             if not bp_name.endswith("CENTER OF GRAVITY"):
-                animal_name = self.find_animal_name_from_body_part_name(
-                    bp_name=bp_name, bp_dict=self.animal_bp_dict
-                )
-                self.body_parts_dict[bp_cnt] = {
-                    "ANIMAL NAME": animal_name,
-                    "BODY-PART": bp_name,
-                    "BODY-PART HEADERS": [
-                        f"{bp_name}_x",
-                        f"{bp_name}_y",
-                        f"{bp_name}_p",
-                    ],
-                }
+                animal_name = self.find_animal_name_from_body_part_name(bp_name=bp_name, bp_dict=self.animal_bp_dict)
+                self.body_parts_dict[bp_cnt] = {"ANIMAL NAME": animal_name, "BODY-PART": bp_name, "BODY-PART HEADERS": [f"{bp_name}_x", f"{bp_name}_y", f"{bp_name}_p"]}
                 self.bp_list.extend((self.body_parts_dict[bp_cnt]["BODY-PART HEADERS"]))
             else:
                 pass
 
-    def __find_polygons(self, data):
-        print(data.shape)
-
     def run(self):
-        self.results = pd.DataFrame(
-            columns=["VIDEO", "ANIMAL", "BODY-PART", "MEASURE", "VALUE"]
-        )
+        self.results = pd.DataFrame(columns=["VIDEO", "ANIMAL", "BODY-PART", "MEASURE", "VALUE"])
         self.movement_dfs = {}
         for file_path in self.file_paths:
             self.__find_body_part_columns()
@@ -102,116 +77,37 @@ def run(self):
                 for animal_cnt, animal_data in self.body_parts_dict.items():
                     animal_df = self.data_df[animal_data["BODY-PART HEADERS"]]
                     if self.threshold > 0.00:
-                        animal_df = animal_df[
-                            animal_df[animal_data["BODY-PART HEADERS"][-1]]
-                            >= self.threshold
-                        ]
+                        animal_df = animal_df[animal_df[animal_data["BODY-PART HEADERS"][-1]] >= self.threshold]
                     animal_df = animal_df.iloc[:, 0:2].reset_index(drop=True)
-                    animal_df = self.create_shifted_df(df=animal_df)
-                    bp_time_1 = animal_df[
-                        [
-                            animal_data["BODY-PART HEADERS"][0],
-                            animal_data["BODY-PART HEADERS"][1],
-                        ]
-                    ].values.astype(float)
-                    bp_time_2 = animal_df[
-                        [
-                            animal_data["BODY-PART HEADERS"][0] + "_shifted",
-                            animal_data["BODY-PART HEADERS"][1] + "_shifted",
-                        ]
-                    ].values.astype(float)
-                    self.movement = pd.Series(
-                        self.framewise_euclidean_distance(
-                            location_1=bp_time_1,
-                            location_2=bp_time_2,
-                            px_per_mm=self.px_per_mm,
-                        )
-                    )
-                    self.movement.loc[0] = 0
-                    self.movement_dfs[video_name][
-                        f'{animal_data["ANIMAL NAME"]} {animal_data["BODY-PART"]}'
-                    ] = self.movement
-                    distance = round((self.movement.sum() / 10), 4)
-                    velocity_lst = []
-                    for df in np.array_split(
-                        self.movement, int(len(self.movement) / self.fps)
-                    ):
-                        velocity_lst.append(df.sum())
-                    self.results.loc[len(self.results)] = [
-                        video_name,
-                        animal_data["ANIMAL NAME"],
-                        animal_data["BODY-PART"],
-                        "Distance (cm)",
-                        distance,
-                    ]
-                    self.results.loc[len(self.results)] = [
-                        video_name,
-                        animal_data["ANIMAL NAME"],
-                        animal_data["BODY-PART"],
-                        "Velocity (cm/s)",
-                        round((mean(velocity_lst) / 10), 4),
-                    ]
+                    distance, velocity = FeatureExtractionSupplemental.distance_and_velocity(x=animal_df.values, fps=self.fps, pixels_per_mm=self.px_per_mm, centimeters=True)
+                    self.results.loc[len(self.results)] = [video_name, animal_data["ANIMAL NAME"], animal_data["BODY-PART"], "Distance (cm)", distance]
+                    self.results.loc[len(self.results)] = [ video_name, animal_data["ANIMAL NAME"], animal_data["BODY-PART"], "Velocity (cm/s)", velocity]
 
             else:
                 for animal in self.body_parts:
                     animal_name = animal.split("CENTER OF GRAVITY")[0].strip()
-                    x, y = (
-                        self.data_df[self.animal_bp_dict[animal_name]["X_bps"]],
-                        self.data_df[self.animal_bp_dict[animal_name]["Y_bps"]],
-                    )
-                    z = pd.concat([x, y], axis=1)[
-                        [item for items in zip(x.columns, y.columns) for item in items]
-                    ]
-                    df = pd.DataFrame(
-                        jitted_centroid(
-                            points=np.reshape(z.values, (len(z / 2), -1, 2)).astype(
-                                np.float32
-                            )
-                        ),
-                        columns=["X", "Y"],
-                    )
+                    x, y = (self.data_df[self.animal_bp_dict[animal_name]["X_bps"]], self.data_df[self.animal_bp_dict[animal_name]["Y_bps"]])
+                    z = pd.concat([x, y], axis=1)[[item for items in zip(x.columns, y.columns) for item in items]]
+                    df = pd.DataFrame(jitted_centroid(points=np.reshape(z.values, (len(z / 2), -1, 2)).astype(np.float32)), columns=["X", "Y"])
                     df = self.dataframe_savgol_smoother(df=df, fps=self.fps).astype(int)
-                    df_shifted = df.shift(1)
-                    df_shifted = df_shifted.combine_first(df).add_suffix("_shifted")
-                    self.movement = pd.Series(
-                        self.framewise_euclidean_distance(
-                            location_1=df.values.astype(np.float32),
-                            location_2=df_shifted.values.astype(np.float32),
-                            px_per_mm=self.px_per_mm,
-                        )
-                    )
-                    self.movement.loc[0] = 0
-                    self.movement_dfs[video_name][
-                        f'{animal_name} {"GRAVITY CENTER"}'
-                    ] = self.movement
-                    distance = round((self.movement.sum() / 10), 4)
-                    velocity_lst = []
-                    for df in np.array_split(
-                        self.movement, int(len(self.movement) / self.fps)
-                    ):
-                        velocity_lst.append(df.sum())
-                    self.results.loc[len(self.results)] = [
-                        video_name,
-                        animal_name,
-                        "GRAVITY CENTER",
-                        "Distance (cm)",
-                        distance,
-                    ]
-                    self.results.loc[len(self.results)] = [
-                        video_name,
-                        animal_name,
-                        "GRAVITY CENTER",
-                        "Velocity (cm/s)",
-                        round((mean(velocity_lst) / 10), 4),
-                    ]
+                    distance, velocity = FeatureExtractionSupplemental.distance_and_velocity(x=df.values, fps=self.fps, pixels_per_mm=self.px_per_mm, centimeters=True)
+                    self.results.loc[len(self.results)] = [video_name, animal_name, "GRAVITY CENTER", "Distance (cm)", distance]
+                    self.results.loc[len(self.results)] = [video_name, animal_name, "GRAVITY CENTER", "Velocity (cm/s)", velocity]
 
     def save(self):
         self.results.set_index("VIDEO").to_csv(self.save_path)
         self.timer.stop_timer()
-        stdout_success(
-            msg=f"Movement log saved in {self.save_path}",
-            elapsed_time=self.timer.elapsed_time_str,
-        )
+        stdout_success(msg=f"Movement log saved in {self.save_path}", elapsed_time=self.timer.elapsed_time_str)
+
+
+# test = MovementCalculator(config_path=r"C:\troubleshooting\ROI_movement_test\project_folder\project_config.ini",
+#                           body_parts=['Animal_1 CENTER OF GRAVITY'], #['Simon CENTER OF GRAVITY', 'JJ CENTER OF GRAVITY', 'Animal_1 CENTER OF GRAVITY']
+#                           threshold=0.00)
+# test.run()
+
+
+
+
 
 
 # test = MovementCalculator(config_path='/Users/simon/Desktop/envs/troubleshooting/two_black_animals_14bp/project_folder/project_config.ini',
diff --git a/simba/data_processors/timebins_movement_calculator.py b/simba/data_processors/timebins_movement_calculator.py
index 6feaa83e1..87c18fc50 100644
--- a/simba/data_processors/timebins_movement_calculator.py
+++ b/simba/data_processors/timebins_movement_calculator.py
@@ -41,42 +41,29 @@ class TimeBinsMovementCalculator(ConfigReader, FeatureExtractionMixin):
     >>> calculator.run()
     """
 
-    def __init__(
-        self,
-        config_path: str,
-        bin_length: Union[int, float],
-        body_parts: List[str],
-        plots: Optional[bool] = False,
-    ):
+    def __init__(self,
+                 config_path: str,
+                 bin_length: Union[int, float],
+                 body_parts: List[str],
+                 plots: Optional[bool] = False):
 
         ConfigReader.__init__(self, config_path=config_path)
-        log_event(
-            logger_name=str(self.__class__.__name__),
-            log_type=TagNames.CLASS_INIT.value,
-            msg=self.create_log_msg_from_init_args(locals=locals()),
-        )
+        log_event(logger_name=str(self.__class__.__name__), log_type=TagNames.CLASS_INIT.value, msg=self.create_log_msg_from_init_args(locals=locals()),)
         self.bin_length, self.plots = bin_length, plots
         check_float(name="TIME BIN", value=bin_length, min_value=10e-6)
         self.col_headers, self.bp_dict = [], {}
         for bp_cnt, bp in enumerate(body_parts):
             self.col_headers.extend((f"{bp}_x", f"{bp}_y"))
-            animal_name = self.find_animal_name_from_body_part_name(
-                bp_name=bp, bp_dict=self.animal_bp_dict
-            )
+            animal_name = self.find_animal_name_from_body_part_name(bp_name=bp, bp_dict=self.animal_bp_dict)
             self.bp_dict[bp_cnt] = {animal_name: [f"{bp}_x", f"{bp}_y"]}
-        check_if_filepath_list_is_empty(
-            filepaths=self.outlier_corrected_paths,
-            error_msg=f"SIMBA ERROR: Cannot analyze movement in time-bins, data directory {self.outlier_corrected_dir} is empty.",
-        )
+        check_if_filepath_list_is_empty(filepaths=self.outlier_corrected_paths, error_msg=f"SIMBA ERROR: Cannot analyze movement in time-bins, data directory {self.outlier_corrected_dir} is empty.",)
         self.animal_combinations = list(itertools.combinations(self.animal_bp_dict, 2))
         print(f"Processing {len(self.outlier_corrected_paths)} video(s)...")
 
     def __create_plots(self):
         timer = SimbaTimer(start=True)
         print("Creating time-bin movement plots...")
-        plots_dir = os.path.join(
-            self.project_path, "logs", f"time_bin_movement_plots_{self.datetime}"
-        )
+        plots_dir = os.path.join( self.project_path, "logs", f"time_bin_movement_plots_{self.datetime}")
         if not os.path.exists(plots_dir):
             os.makedirs(plots_dir)
         for video_name in self.results["VIDEO"].unique():
@@ -112,61 +99,29 @@ def __create_plots(self):
     def run(self):
         video_dict, self.out_df_lst = {}, []
         self.movement_dict = {}
-        self.save_path = os.path.join(
-            self.project_path,
-            "logs",
-            f"Time_bins_{self.bin_length}s_movement_results_{self.datetime}.csv",
-        )
-        check_all_file_names_are_represented_in_video_log(
-            video_info_df=self.video_info_df, data_paths=self.outlier_corrected_paths
-        )
+        self.save_path = os.path.join( self.project_path, "logs", f"Time_bins_{self.bin_length}s_movement_results_{self.datetime}.csv")
+        check_all_file_names_are_represented_in_video_log(video_info_df=self.video_info_df, data_paths=self.outlier_corrected_paths)
         for file_cnt, file_path in enumerate(self.outlier_corrected_paths):
             video_timer = SimbaTimer(start=True)
             _, video_name, _ = get_fn_ext(file_path)
-            print(
-                f"Processing time-bin movements for video {video_name} ({str(file_cnt+1)}/{str(len(self.outlier_corrected_paths))})..."
-            )
+            print(f"Processing time-bin movements for video {video_name} ({str(file_cnt+1)}/{str(len(self.outlier_corrected_paths))})...")
             video_dict[video_name] = {}
             video_settings, px_per_mm, fps = self.read_video_info(video_name=video_name)
             fps, self.movement_cols, self.velocity_cols = int(fps), set(), set()
             bin_length_frames = int(fps * self.bin_length)
             if bin_length_frames == 0:
-                raise FrameRangeError(
-                    msg=f"The specified time-bin length of {self.bin_length} is TOO SHORT for video {video_name} which has a specified FPS of {fps}. This results in time bins that are LESS THAN a single frame.",
-                    source=self.__class__.__name__,
-                )
+                raise FrameRangeError(msg=f"The specified time-bin length of {self.bin_length} is TOO SHORT for video {video_name} which has a specified FPS of {fps}. This results in time bins that are LESS THAN a single frame.", source=self.__class__.__name__,)
             self.data_df = read_df(file_path, self.file_type, usecols=self.col_headers)
             self.data_df = self.create_shifted_df(df=self.data_df)
             results = []
             for animal_data in self.bp_dict.values():
                 name, bps = list(animal_data.keys())[0], list(animal_data.values())[0]
-                bp_time_1, bp_time_2 = (
-                    self.data_df[bps].values,
-                    self.data_df[[f"{bps[0]}_shifted", f"{bps[1]}_shifted"]].values,
-                )
-                movement_data = pd.DataFrame(
-                    self.framewise_euclidean_distance(
-                        location_1=bp_time_1,
-                        location_2=bp_time_2,
-                        px_per_mm=px_per_mm,
-                        centimeter=True,
-                    ),
-                    columns=["VALUE"],
-                )
+                bp_time_1, bp_time_2 = (self.data_df[bps].values, self.data_df[[f"{bps[0]}_shifted", f"{bps[1]}_shifted"]].values,)
+                movement_data = pd.DataFrame(self.framewise_euclidean_distance(location_1=bp_time_1, location_2=bp_time_2, px_per_mm=px_per_mm, centimeter=True), columns=["VALUE"])
                 self.movement_dict[video_name] = movement_data
-                movement_df_lists = [
-                    movement_data[i : i + bin_length_frames]
-                    for i in range(0, movement_data.shape[0], bin_length_frames)
-                ]
+                movement_df_lists = [movement_data[i : i + bin_length_frames] for i in range(0, movement_data.shape[0], bin_length_frames)]
                 for bin, movement_df in enumerate(movement_df_lists):
-                    movement, velocity = (
-                        FeatureExtractionSupplemental.distance_and_velocity(
-                            x=movement_df["VALUE"].values,
-                            fps=fps,
-                            pixels_per_mm=1,
-                            centimeters=False,
-                        )
-                    )
+                    movement, velocity = (FeatureExtractionSupplemental.distance_and_velocity(x=movement_df["VALUE"].values, fps=fps, pixels_per_mm=1, centimeters=False))
                     results.append(
                         {
                             "VIDEO": video_name,
@@ -190,21 +145,13 @@ def run(self):
             results = pd.DataFrame(results).reset_index(drop=True)
             self.out_df_lst.append(results)
             video_timer.stop_timer()
-            print(
-                f"Video {video_name} complete (elapsed time: {video_timer.elapsed_time_str}s)..."
-            )
+            print(f"Video {video_name} complete (elapsed time: {video_timer.elapsed_time_str}s)...")
 
     def save(self):
-        self.results = pd.concat(self.out_df_lst, axis=0).sort_values(
-            by=["VIDEO", "TIME BIN #", "MEASUREMENT", "ANIMAL"]
-        )[["VIDEO", "TIME BIN #", "ANIMAL", "BODY-PART", "MEASUREMENT", "VALUE"]]
+        self.results = pd.concat(self.out_df_lst, axis=0).sort_values(by=["VIDEO", "TIME BIN #", "MEASUREMENT", "ANIMAL"])[["VIDEO", "TIME BIN #", "ANIMAL", "BODY-PART", "MEASUREMENT", "VALUE"]]
         self.results.set_index("VIDEO").to_csv(self.save_path)
         self.timer.stop_timer()
-        stdout_success(
-            msg=f"Movement time-bins results saved at {self.save_path}",
-            elapsed_time=self.timer.elapsed_time_str,
-            source=self.__class__.__name__,
-        )
+        stdout_success(msg=f"Movement time-bins results saved at {self.save_path}", elapsed_time=self.timer.elapsed_time_str, source=self.__class__.__name__)
         if self.plots:
             self.__create_plots()
 
diff --git a/simba/mixins/feature_extraction_supplement_mixin.py b/simba/mixins/feature_extraction_supplement_mixin.py
index fcbeb6ecb..a17f00a3d 100644
--- a/simba/mixins/feature_extraction_supplement_mixin.py
+++ b/simba/mixins/feature_extraction_supplement_mixin.py
@@ -747,7 +747,7 @@ def distance_and_velocity(x: np.ndarray,
         """
         Calculate total movement and mean velocity from a sequence of position data.
 
-        :param x: Array containing movement data. For example, created by ``simba.mixins.FeatureExtractionMixin.framewise_euclidean_distance``.
+        :param x: Array containing movement data. For example, created by ``simba.mixins.FeatureExtractionMixin.framewise_euclidean_distance``. If its a 2-dimensional array, then we assume its pixel coordinates. If it's a 1d array, we assume its frame-wise euclidean distances.
         :param fps: Frames per second of the data.
         :param pixels_per_mm: Conversion factor from pixels to millimeters.
         :param Optional[bool] centimeters: If True, results are returned in centimeters and centimeters per second. Defaults to True.
@@ -759,40 +759,25 @@ def distance_and_velocity(x: np.ndarray,
         >>> sum_movement, avg_velocity = FeatureExtractionSupplemental.distance_and_velocity(x=x, fps=10, pixels_per_mm=10, centimeters=True)
         """
 
-        check_valid_array(
-            data=x,
-            source=FeatureExtractionSupplemental.distance_and_velocity.__name__,
-            accepted_ndims=(1, 2),
-            accepted_dtypes=(np.float32, np.float64, np.int32, np.int64, int, float),
-        )
-        check_float(
-            name=f"{FeatureExtractionSupplemental.distance_and_velocity.__name__} fps",
-            value=fps,
-            min_value=1,
-        )
-        check_float(
-            name=f"{FeatureExtractionSupplemental.distance_and_velocity.__name__} pixels_per_mm",
-            value=pixels_per_mm,
-            min_value=10e-6,
-        )
+        check_valid_array(data=x, source=FeatureExtractionSupplemental.distance_and_velocity.__name__, accepted_ndims=(1, 2), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        check_float(name=f"{FeatureExtractionSupplemental.distance_and_velocity.__name__} fps", value=fps, min_value=1)
+        check_float(name=f"{FeatureExtractionSupplemental.distance_and_velocity.__name__} pixels_per_mm", value=pixels_per_mm, min_value=10e-6)
         if x.ndim == 2:
-            check_valid_array(
-                data=x,
-                source=FeatureExtractionSupplemental.distance_and_velocity.__name__,
-                accepted_axis_1_shape=(2,),
-            )
-            t = np.full((x.shape[0]), 0.0)
+            check_valid_array(data=x, source=FeatureExtractionSupplemental.distance_and_velocity.__name__, accepted_axis_1_shape=[2, ])
+            framewise_px_movement = np.full((x.shape[0]), 0.0, dtype=np.float64)
             for i in range(1, x.shape[0]):
-                t[i] = np.linalg.norm(x[i] - x[i - 1])
-            x = np.copy(t) / pixels_per_mm
-        movement = np.sum(x) / pixels_per_mm
+                framewise_px_movement[i] = np.linalg.norm(x[i] - x[i - 1])
+        else:
+            framewise_px_movement = x
+        movement = np.sum(framewise_px_movement) / pixels_per_mm
         v = []
-        for i in range(0, x.shape[0], int(fps)):
-            w = x[i : (i + int(fps))]
+        for i in range(1, framewise_px_movement.shape[0], int(fps)):
+            w = framewise_px_movement[i: (i + int(fps))]
             v.append((np.sum(w) / pixels_per_mm) * (1 / (w.shape[0] / int(fps))))
         if centimeters:
             v = [vi / 10 for vi in v]
             movement = movement / 10
+
         return movement, np.mean(v)
 
 
diff --git a/simba/mixins/statistics_mixin.py b/simba/mixins/statistics_mixin.py
index c4bfb2505..639d8f5a8 100644
--- a/simba/mixins/statistics_mixin.py
+++ b/simba/mixins/statistics_mixin.py
@@ -3978,6 +3978,14 @@ def xie_beni(x: np.ndarray, y: np.ndarray) -> float:
         """
         Computes the Xie-Beni index for clustering evaluation.
 
+        The score is calculated as the ratio between the average intra-cluster variance and the squared minimum distance between cluster centroids. This ensures that the index penalizes both loosely packed clusters and clusters that are too close to each other.
+
+        A lower Xie-Beni index indicates better clustering quality, signifying well-separated and compact clusters.
+
+        .. seealso::
+           To compute Xie-Beni on the GPU, use :func:`~simba.mixins.statistics_mixin.Statistics.xie_beni`
+
+
         :param np.ndarray x: The dataset as a 2D NumPy array of shape (n_samples, n_features).
         :param np.ndarray y: Cluster labels for each data point as a 1D NumPy array of shape (n_samples,).
         :returns: The Xie-Beni score for the dataset.
@@ -4057,6 +4065,9 @@ def i_index(x: np.ndarray, y: np.ndarray):
         The I-Index is a metric that measures the compactness and separation of clusters.
         A higher I-Index indicates better clustering with compact and well-separated clusters.
 
+        .. seealso::
+           To compute I-index on GPU, use :func:`~simba.data_processors.cuda.statistics.i_index`
+
         :param np.ndarray x: The dataset as a 2D NumPy array of shape (n_samples, n_features).
         :param np.ndarray y: Cluster labels for each data point as a 1D NumPy array of shape (n_samples,).
         :returns: The I-index score for the dataset.
@@ -4073,6 +4084,7 @@ def i_index(x: np.ndarray, y: np.ndarray):
         """
         check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
         check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_0_shape=[x.shape[0], ])
+        _ = get_unique_values_in_iterable(data=y, name=Statistics.i_index.__name__, min=2)
         unique_y = np.unique(y)
         n_y = unique_y.shape[0]
         global_centroid = np.mean(x, axis=0)
@@ -4091,6 +4103,12 @@ def sd_index(x: np.ndarray, y: np.ndarray) -> float:
         """
         Compute the SD (Scatter and Discriminant) Index for evaluating the quality of a clustering solution.
 
+        The SD Index combines two components to measure clustering quality:
+        1. **Scatter (SCAT)**: Evaluates the compactness of clusters by measuring the ratio of intra-cluster variance to the global standard deviation.
+        2. **Discriminant (DIS)**: Measures the separation between clusters relative to their distance from the global mean.
+
+        A lower SD Index indicates better clustering quality, reflecting compact and well-separated clusters.
+
         :param np.ndarray x: A 2D array of shape (n_samples, n_features) representing the feature vectors of the data points.
         :param np.ndarray y: A 1D array of shape (n_samples,) containing the cluster labels for each data point.
         :returns: The SD Index value. Lower values indicate better clustering quality with more compact and well-separated clusters.
@@ -4106,6 +4124,7 @@ def sd_index(x: np.ndarray, y: np.ndarray) -> float:
         """
         check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
         check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=(int,), accepted_axis_0_shape=[x.shape[0], ])
+        _ = get_unique_values_in_iterable(data=y, name=Statistics.sd_index.__name__, min=2)
         global_std = np.std(x)
         global_m = np.mean(x, axis=0)
         unique_clusters = np.unique(y)
@@ -4143,6 +4162,10 @@ def c_index(x: np.ndarray, y: np.ndarray) -> float:
            - 0 indicates perfect clustering (clusters are as compact as possible).
            - 1 indicates worst clustering (clusters are highly spread out).
 
+        :references:
+        .. [1] Ubert, L. J., & Levin, J. R. (1976). A general statistical framework for assessing categorical clustering in free recall. Psychological Bulletin, 83(5), 1072–1080.
+
+
         :example:
         >>> X, y = make_blobs(n_samples=800, centers=2, n_features=3, random_state=0, cluster_std=0.1)
         >>> Statistics.c_index(x=X, y=y)
@@ -4310,7 +4333,7 @@ def cop_index(x: np.ndarray, y: np.ndarray, epsilon: float = 1e-16) -> float:
 
         :example:
         >>> X, y = make_blobs(n_samples=50000, centers=10, n_features=3, random_state=0, cluster_std=1)
-        >>> cop_index(x=X, y=y)
+        >>> Statistics.cop_index(x=X, y=y)
         """
 
         unique_clusters = np.unique(y)
@@ -4350,6 +4373,7 @@ def pbm_index(x: np.ndarray, y: np.ndarray) -> float:
         :references:
         .. [1] Pakhira, M. K., Bandyopadhyay, S., & Maulik, U. (2004). Validity index for crisp and fuzzy clusters.
                Pattern Recognition, 37(4), 487–501. https://doi.org/10.1016/j.patcog.2003.09.021
+        .. [2] Bernard Desgraupes, University Paris Ouest Lab Modal’X, https://cran.r-project.org/web/packages/clusterCrit/vignettes/clusterCrit.pdf
 
         :example:
         >>> X, y = make_blobs(n_samples=5, centers=2, n_features=3, random_state=0, cluster_std=5)
@@ -4380,6 +4404,240 @@ def pbm_index(x: np.ndarray, y: np.ndarray) -> float:
 
         return (((1 / N_clusters) * E1) ** 2) / (EK * Dmin)
 
+    @staticmethod
+    def banfeld_raftery_index(x: np.ndarray, y: np.ndarray) -> float:
+        """
+        Computes the Banfeld-Raftery index for clustering evaluation.
+
+        Smaller values represent better clustering. Values can be negative.
+
+        :param x: 2D NumPy array of shape (n_samples, n_features) representing the dataset.
+        :param y: 1D NumPy array of shape (n_samples,) containing cluster labels for each data point.
+        :return: The Banfeld-Raftery index.
+        :rtype: float
+
+        :references:
+           .. [1] Banfield, J. D., & Raftery, A. E. (1993). Model-based Gaussian and non-Gaussian clustering. Biometrics, 49(3), 803-821. https://doi.org/10.2307/2532201
+
+        """
+        check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_0_shape=[x.shape[0], ])
+        _ = get_unique_values_in_iterable(data=y, name=Statistics.banfeld_raftery_index.__name__, min=2)
+        unique_labels = np.unique(y)
+        val = 0.0
+        for cluster_label in unique_labels:
+            cluster_data = x[y == cluster_label]
+            n_k = cluster_data.shape[0]
+            covariance_matrix = np.cov(cluster_data, rowvar=False)
+            determinant = np.linalg.det(covariance_matrix)
+            determinant = max(determinant, 1e-10)
+            val += n_k * np.log(determinant)
+
+        return val
+
+    @staticmethod
+    def scott_symons_index(x: np.ndarray, y: np.ndarray) -> float:
+        """
+        Compute the Scott-Symons index for clustering evaluation.
+
+        Smaller values represent better clustering. Values can be negative.
+
+        :param np.ndarray x: The dataset as a 2D NumPy array of shape (n_samples, n_features).
+        :param np.ndarray y: Cluster labels for each data point as a 1D NumPy array of shape (n_samples,).
+        :returns: The Scott-Symons index score.
+        :rtype: float
+
+
+        :references:
+           .. [1] . J. Scott and M. J. Symons. Clustering methods based on likelihood ratio criteria. Biometrics, 27:387–397, 1971.
+        """
+
+        check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_0_shape=[x.shape[0], ])
+        _ = get_unique_values_in_iterable(data=y, name=Statistics.scott_symons_index.__name__, min=2)
+        unique_labels = np.unique(y)
+        val = 0.0
+
+        for label in unique_labels:
+            cluster_points = x[y == label]
+            n_k = cluster_points.shape[0]
+            cov_matrix = np.cov(cluster_points, rowvar=False)
+            det_cov = np.linalg.det(cov_matrix)
+            val += n_k * np.log(det_cov / n_k)
+        return val
+
+    @staticmethod
+    def wemmert_gancarski_index(x: np.ndarray, y: np.ndarray) -> float:
+        """
+        Compute the Wemmert-Gançarski index for clustering evaluation.
+
+        The best case is when the index approaches 1, indicating good clustering. The worst case is when the index approaches 0, indicating poor clustering.
+
+        :param np.ndarray x: The dataset as a 2D NumPy array of shape (n_samples, n_features).
+        :param np.ndarray y: Cluster labels for each data point as a 1D NumPy array of shape (n_samples,).
+        :returns: The Wemmert-Gançarski index score.
+        :rtype: float
+
+        :references:
+           .. [1] Bernard Desgraupes, University Paris Ouest Lab Modal’X, https://cran.r-project.org/web/packages/clusterCrit/vignettes/clusterCrit.pdf
+        """
+
+        check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_0_shape=[x.shape[0], ])
+        _ = get_unique_values_in_iterable(data=y, name=Statistics.wemmert_gancarski_index.__name__, min=2)
+        unique_labels = np.unique(y)
+        total_score = 0.0
+
+        for label in unique_labels:
+            cluster_points = x[y == label]
+            n_k = cluster_points.shape[0]
+            G_k = np.mean(cluster_points, axis=0)
+
+            R_values = []
+            for point in cluster_points:
+                dist_to_G_k = np.linalg.norm(point - G_k)
+                distances_to_other_centroids = [np.linalg.norm(point - np.mean(x[y == other_label], axis=0)) for other_label in unique_labels if other_label != label]
+                min_dist_to_other_centroids = min(distances_to_other_centroids)
+                R_values.append(dist_to_G_k / min_dist_to_other_centroids)
+
+            J_k = max(0, 1 - (1 / n_k) * np.sum(R_values))
+            total_score += n_k * J_k
+
+        return total_score / x.shape[0]
+
+    @staticmethod
+    def mclain_rao_index(x: np.ndarray, y: np.ndarray) -> float:
+        """
+        Computes the McClain-Rao Index, which measures the quality of clustering by evaluating the ratio of
+        the mean within-cluster distances to the mean between-cluster distances.
+
+        The McClain-Rao Index is computed by calculating the mean ratio of intra-cluster distances (distances
+        between points within the same cluster) to inter-cluster distances (distances between points from
+        different clusters). A lower value indicates a better clustering result, with clusters being compact and well-separated.
+
+        :param np.ndarray x: The dataset as a 2D NumPy array of shape (n_samples, n_features).
+        :param np.ndarray y: Cluster labels for each data point as a 1D NumPy array of shape (n_samples,).
+        :returns: The McClain-Rao Index score, a lower value indicates better clustering quality.
+        :rtype: float
+
+        :references:
+           .. [1] McClain, J. O., & Rao, V. R. (1975). CLUSTISZ: A program to test for the quality of clustering of a set of objects.  *Journal of Marketing Research, 12*(4), 456-460. https://doi.org/10.1177/002224377501200410
+        """
+
+        check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_0_shape=[x.shape[0], ])
+        _ = get_unique_values_in_iterable(data=y, name=Statistics.mclain_rao_index.__name__, min=2)
+        unique_labels = np.unique(y)
+        ratios = np.full(shape=(len(unique_labels)), fill_value=np.nan, dtype=np.float64)
+        for cluster_cnt, cluster_id in enumerate(unique_labels):
+            cluster_obs = x[np.argwhere(y == cluster_id).flatten()]
+            noncluster_obs = x[np.argwhere(y != cluster_id).flatten()]
+            intra_dists = cdist(cluster_obs, cluster_obs)
+            np.fill_diagonal(intra_dists, np.nan)
+            intra_dist_mean = np.nanmean(intra_dists)
+            inter_dist_mean = np.mean(cdist(cluster_obs, noncluster_obs))
+            ratios[cluster_cnt] = intra_dist_mean / inter_dist_mean
+
+        return np.mean(ratios)
+
+    @staticmethod
+    def s_dbw_index(x: np.ndarray, y: np.ndarray) -> float:
+        """
+        Compute the S_Dbw index for evaluating the clustering quality.
+
+        A lower value indicates a better clustering result.
+
+        :param np.ndarray x: The dataset as a 2D NumPy array of shape (n_samples, n_features).
+        :param np.ndarray y: Cluster labels for each data point as a 1D NumPy array of shape (n_samples,).
+        :returns: The S_Dbw index score.
+        :rtype: float
+
+        .. note::
+           Behaves weird as the number of dimensions increase (> 20).
+
+        :example:
+        >>> from sklearn.datasets import make_blobs
+        >>> X, labels = make_blobs(n_samples=5000, centers=5, random_state=42, n_features=3, cluster_std=2)
+        >>> score = Statistics.s_dbw_index(X, labels)
+
+        :references:
+           .. [1]  M. Halkidi and M. Vazirgiannis. Clustering validity assessment: Finding the optimal partitioning of a data set. Proceedings IEEE International Conference on Data Mining, pages 187–194, 2001.
+           .. [2]  M. Halkidi and M. Vazirgiannis. Clustering validity assessment: Finding the optimal partitioning of a data set. Proceedings IEEE International Conference on Data Mining, pages 187–194, 2001.
+        """
+
+        check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value,
+                          accepted_axis_0_shape=[x.shape[0], ])
+        _ = get_unique_values_in_iterable(data=y, name=Statistics.s_dbw_index.__name__, min=2)
+        unique_labels = np.unique(y)
+        K = len(unique_labels)
+        centroids = np.array([x[y == label].mean(axis=0) for label in unique_labels])
+        variances = np.array([np.var(x[y == label], axis=0) for label in unique_labels])
+        sigma = np.sqrt(np.sum(np.linalg.norm(variances, axis=1)) / K)
+        s_dbw = 0.0
+        for k in range(K):
+            for k_prime in range(k + 1, K):
+                cluster_k = x[y == unique_labels[k]]
+                cluster_k_prime = x[y == unique_labels[k_prime]]
+                G_k = centroids[k]
+                G_k_prime = centroids[k_prime]
+                H_kk_prime = (G_k + G_k_prime) / 2
+                density_at_Gk = np.sum(np.linalg.norm(cluster_k - G_k, axis=1) < sigma) + np.sum(np.linalg.norm(cluster_k_prime - G_k, axis=1) < sigma)
+                density_at_Gk_prime = np.sum(np.linalg.norm(cluster_k - G_k_prime, axis=1) < sigma) + np.sum(np.linalg.norm(cluster_k_prime - G_k_prime, axis=1) < sigma)
+                density_at_Hkk_prime = np.sum(np.linalg.norm(cluster_k - H_kk_prime, axis=1) < sigma) + np.sum(np.linalg.norm(cluster_k_prime - H_kk_prime, axis=1) < sigma)
+                if max(density_at_Gk, density_at_Gk_prime) == 0:
+                    pass
+                else:
+                    Rkk_prime = density_at_Hkk_prime / max(density_at_Gk, density_at_Gk_prime)
+                    s_dbw += Rkk_prime
+
+        s_dbw /= (K * (K - 1)) / 2
+        return s_dbw
+
+    @staticmethod
+    def ray_turi_index(x: np.ndarray, y: np.ndarray) -> float:
+        """
+        Compute the Ray-Turi index for evaluating the clustering quality.
+
+        A lower value indicates a better clustering result.
+
+        :param np.ndarray x: The dataset as a 2D NumPy array of shape (n_samples, n_features).
+        :param np.ndarray y: Cluster labels for each data point as a 1D NumPy array of shape (n_samples,).
+        :returns: The Ray-Turi  index score.
+        :rtype: float
+
+        :example:
+        >>> from sklearn.datasets import make_blobs
+        >>> X, labels = make_blobs(n_samples=5000, centers=5, random_state=42, n_features=3, cluster_std=2)
+        >>> score = Statistics.s_dbw_index(X, labels)
+
+        :references:
+           .. [1] Ray, S., & Turi, R. H. (1999). Determination of number of clusters in k-means clustering and application in colour image segmentation. Proceedings of the 4th International Conference on Advances in Pattern Recognition and Digital Techniques, 137–143.
+        """
+
+        check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_0_shape=[x.shape[0], ])
+        n_clusters = get_unique_values_in_iterable(data=y, name=Statistics.ray_turi_index.__name__, min=2)
+        unique_labels = np.unique(y)
+
+        centroids = np.array([x[y == label].mean(axis=0) for label in unique_labels])
+        intra_dists = np.full(shape=(x.shape[0]), fill_value=np.nan, dtype=np.float32)
+        min_cluster_distance = np.inf
+        obs_cnt = 0
+        for cnt, cluster_id in enumerate(unique_labels):
+            cluster_obs = x[np.argwhere(y == cluster_id).flatten()]
+            centroids[cnt] = np.mean(cluster_obs, axis=0)
+            dists = np.linalg.norm(cluster_obs - centroids[cnt], axis=1) ** 2
+            intra_dists[obs_cnt: obs_cnt + dists.shape[0]] = dists
+            obs_cnt += dists.shape[0]
+
+        for i in range(n_clusters):
+            for j in range(i + 1, n_clusters):
+                distance = np.sum((centroids[i] - centroids[j]) ** 2)
+                min_cluster_distance = min(min_cluster_distance, distance)
+
+        return np.mean(intra_dists) / min_cluster_distance
+
     @staticmethod
     def fowlkes_mallows(x: np.ndarray, y: np.ndarray) -> float:
         """
diff --git a/simba/mixins/timeseries_features_mixin.py b/simba/mixins/timeseries_features_mixin.py
index 21013a262..3f0dd3ca1 100644
--- a/simba/mixins/timeseries_features_mixin.py
+++ b/simba/mixins/timeseries_features_mixin.py
@@ -801,9 +801,7 @@ def line_length(data: np.ndarray) -> float:
 
     @staticmethod
     @njit("(float32[:], float64[:], int64)", fastmath=True)
-    def sliding_line_length(
-        data: np.ndarray, window_sizes: np.ndarray, sample_rate: int
-    ) -> np.ndarray:
+    def sliding_line_length(data: np.ndarray, window_sizes: np.ndarray, sample_rate: int) -> np.ndarray:
         """
         Jitted compute of  sliding line length for a given time series using different window sizes.
 
@@ -1096,7 +1094,7 @@ def sliding_longest_strike(
 
                 results[r1 - 1, i] = result
 
-            return results
+        return results
 
     @staticmethod
     @njit(
diff --git a/simba/mixins/train_model_mixin.py b/simba/mixins/train_model_mixin.py
index f0f9f79d0..fae18b0ca 100644
--- a/simba/mixins/train_model_mixin.py
+++ b/simba/mixins/train_model_mixin.py
@@ -1721,13 +1721,21 @@ def check_raw_dataset_integrity(self, df: pd.DataFrame, logs_path: Optional[Unio
     def _create_shap_mp_helper(data: Tuple[int, pd.DataFrame],
                                explainer: shap.TreeExplainer,
                                clf_name: str,
-                               verbose: bool) -> Tuple[int, pd.DataFrame]:
+                               verbose: bool) -> Tuple[pd.DataFrame, int]:
 
         if verbose:
-            print(f'Processing SHAP batch {data[0] + 1}... ({len(data[1])} observations)')
+            print(f'Processing SHAP core batch {data[0] + 1}... ({len(data[1])} observations)')
         _ = data[1].pop(clf_name).values.reshape(-1, 1)
-        shap_results = explainer.shap_values(data[1].values, check_additivity=False)[1]
-        return shap_results, data[0]
+        shap_batch_results = np.full(shape=(len(data[1]), len(data[1].columns)), fill_value=np.nan, dtype=np.float32)
+        for idx in range(len(data[1])):
+            timer = SimbaTimer(start=True)
+            obs = data[1].iloc[idx, :].values
+            shap_batch_results[idx] = explainer.shap_values(obs, check_additivity=False)[1]
+            timer.stop_timer()
+            if verbose:
+                print(f'SHAP frame complete (core batch: {data[0] + 1}, core batch frame: {idx+1}/{len(data[1])}, frame processing time: {timer.elapsed_time_str}s)')
+
+        return shap_batch_results, data[0]
 
     def create_shap_log_mp(self,
                            rf_clf: RandomForestClassifier,
@@ -1798,7 +1806,7 @@ def create_shap_log_mp(self,
         check_int(name=f'{TrainModelMixin.create_shap_log_mp.__name__} core_cnt', value=core_cnt, min_value=-1, unaccepted_vals=[0])
         check_int(name=f'{TrainModelMixin.create_shap_log_mp.__name__} chunk_size', value=chunk_size, min_value=1)
         check_valid_boolean(value=[verbose, plot], source=f'{TrainModelMixin.create_shap_log_mp.__name__} verbose, plot')
-        core_cnt = [find_core_cnt()[0] if core_cnt is -1 or core_cnt > find_core_cnt()[0] else core_cnt][0]
+        core_cnt = [find_core_cnt()[0] if core_cnt == -1 or core_cnt > find_core_cnt()[0] else core_cnt][0]
         df = pd.DataFrame(np.hstack((x, y.reshape(-1, 1))), columns=x_names + [clf_name])
         del x; del y
         present_df, absent_df = df[df[clf_name] == 1], df[df[clf_name] == 0]
@@ -1822,14 +1830,14 @@ def create_shap_log_mp(self,
         print(f"Computing {cnt_present + cnt_absent} SHAP values. Follow progress in OS terminal... (CORES: {core_cnt}, CHUNK SIZE: {chunk_size})")
         with multiprocessing.Pool(core_cnt, maxtasksperchild=Defaults.MAXIMUM_MAX_TASK_PER_CHILD.value) as pool:
             constants = functools.partial(TrainModelMixin._create_shap_mp_helper, explainer=explainer, clf_name=clf_name, verbose=verbose)
-            for cnt, result in enumerate(pool.imap_unordered(constants, shap_data, chunksize=1)):
+            for cnt, result in enumerate(pool.imap(constants, shap_data, chunksize=1)):
                 proba = TrainModelMixin().clf_predict_proba(clf=rf_clf, x_df=shap_data[result[1]][1].drop(clf_name, axis=1), model_name=clf_name).reshape(-1, 1)
                 shap_sum = np.sum(result[0], axis=1).reshape(-1, 1)
                 batch_shap_results = np.hstack((result[0], np.full((result[0].shape[0]), expected_value).reshape(-1, 1), shap_sum + expected_value, proba, shap_data[result[1]][1][clf_name].values.reshape(-1, 1))).astype(np.float32)
                 shap_results.append(batch_shap_results)
                 shap_raw.append(shap_data[result[1]][1].drop(clf_name, axis=1))
                 if verbose:
-                    print(f"Completed SHAP batch (Batch {result[1] + 1}/{len(shap_data)}).")
+                    print(f"Completed SHAP care batch (Batch {result[1] + 1}/{len(shap_data)}).")
 
         pool.terminate(); pool.join()
         shap_df = pd.DataFrame(data=np.row_stack(shap_results), columns=list(x_names) + ["Expected_value", "Sum", "Prediction_probability", clf_name])
diff --git a/simba/model/grid_search_rf.py b/simba/model/grid_search_rf.py
index 7c927c1c0..54f705159 100644
--- a/simba/model/grid_search_rf.py
+++ b/simba/model/grid_search_rf.py
@@ -42,10 +42,10 @@ def __init__(self, config_path: Union[str, os.PathLike]):
         check_if_filepath_list_is_empty(filepaths=self.target_file_paths, error_msg=f"Zero data files found in {self.targets_folder}, cannot create models.")
         if not os.path.exists(self.configs_meta_dir): os.makedirs(self.configs_meta_dir)
         self.meta_file_lst = sorted(read_simba_meta_files(self.configs_meta_dir))
-        print(f"Reading in {len(self.target_file_paths)} annotated files...")
+        print(f"Reading in {len(self.target_file_paths)} annotated files found in the {self.targets_folder} directory...")
         self.data_df, self.frm_idx = self.read_all_files_in_folder_mp_futures(self.target_file_paths, self.file_type)
         self.frm_idx = pd.DataFrame({"VIDEO": list(self.data_df.index), "FRAME_IDX": self.frm_idx})
-        self.data_df = self.check_raw_dataset_integrity(self.data_df, logs_path=self.logs_path)
+        _ = self.check_raw_dataset_integrity(self.data_df, logs_path=self.logs_path)
         self.data_df = self.drop_bp_cords(df=self.data_df)
 
     def perform_sampling(self, meta_dict: dict):
diff --git a/simba/roi_tools/ROI_analyzer.py b/simba/roi_tools/ROI_analyzer.py
index 95ad52364..1f18425f1 100644
--- a/simba/roi_tools/ROI_analyzer.py
+++ b/simba/roi_tools/ROI_analyzer.py
@@ -1,28 +1,26 @@
 __author__ = "Simon Nilsson"
 
 import os
-from typing import List, Optional, Union
+from typing import List, Optional, Union, Tuple
+
 
 import numpy as np
 import pandas as pd
 
 from simba.mixins.config_reader import ConfigReader
 from simba.mixins.feature_extraction_mixin import FeatureExtractionMixin
-from simba.mixins.feature_extraction_supplement_mixin import \
-    FeatureExtractionSupplemental
+from simba.mixins.feature_extraction_supplement_mixin import FeatureExtractionSupplemental
 from simba.utils.checks import (
     check_all_file_names_are_represented_in_video_log,
     check_file_exist_and_readable, check_float, check_that_column_exist,
     check_valid_lst)
 from simba.utils.data import detect_bouts, slice_roi_dict_for_video
 from simba.utils.enums import Keys
-from simba.utils.errors import (CountError, MissingColumnsError,
-                                ROICoordinatesNotFoundError)
+from simba.utils.errors import (CountError, MissingColumnsError, ROICoordinatesNotFoundError)
 from simba.utils.printing import stdout_success
 from simba.utils.read_write import get_fn_ext, read_data_paths, read_df
 from simba.utils.warnings import NoDataFoundWarning
 
-
 class ROIAnalyzer(ConfigReader, FeatureExtractionMixin):
     """
     Analyze movements, entries, exits, and time-spent-in user-defined ROIs. Results are stored in the
@@ -47,9 +45,9 @@ class ROIAnalyzer(ConfigReader, FeatureExtractionMixin):
     def __init__(self,
                  config_path: Union[str, os.PathLike],
                  data_path: Optional[Union[str, os.PathLike, List[str]]] = None,
-                 detailed_bout_data: Optional[bool] = False,
-                 calculate_distances: Optional[bool] = False,
-                 threshold: Optional[float] = 0.0,
+                 detailed_bout_data: bool = False,
+                 calculate_distances: bool = False,
+                 threshold: float = 0.0,
                  body_parts: Optional[List[str]] = None):
 
         check_file_exist_and_readable(file_path=config_path)
@@ -58,11 +56,7 @@ def __init__(self,
             raise ROICoordinatesNotFoundError(expected_file_path=self.roi_coordinates_path)
         self.read_roi_data()
         FeatureExtractionMixin.__init__(self)
-        self.data_paths = read_data_paths(path=data_path,
-                                          default=self.outlier_corrected_paths,
-                                          default_name=self.outlier_corrected_dir,
-                                          file_type=self.file_type)
-
+        self.data_paths = read_data_paths(path=data_path, default=self.outlier_corrected_paths, default_name=self.outlier_corrected_dir, file_type=self.file_type)
         check_float(name="Body-part probability threshold", value=threshold, min_value=0.0, max_value=1.0)
         check_valid_lst(data=body_parts, source=f"{self.__class__.__name__} body-parts", valid_dtypes=(str,))
         if len(set(body_parts)) != len(body_parts):
@@ -110,7 +104,7 @@ def run(self):
                         roi_bouts["VIDEO"] = video_name
                         self.roi_bout_results.append(roi_bouts)
                         animal_bout_results[row["Name"]] = roi_bouts
-                        self.entry_results.loc[len(self.entry_results)] = [video_name,animal_name,row["Name"],len(roi_bouts)]
+                        self.entry_results.loc[len(self.entry_results)] = [video_name,animal_name,row["Name"], len(roi_bouts)]
                         self.time_results.loc[len(self.time_results)] = [video_name,animal_name,row["Name"],roi_bouts["Bout_time"].sum()]
                     for _, row in self.sliced_roi_dict[Keys.ROI_CIRCLES.value].iterrows():
 
@@ -159,58 +153,23 @@ def run(self):
                     if self.calculate_distances:
                         for roi_name, roi_data in animal_bout_results.items():
                             if len(roi_data) == 0:
-                                self.movements_df.loc[len(self.movements_df)] = [
-                                    video_name,
-                                    animal_name,
-                                    roi_name,
-                                    "Movement (cm)",
-                                    0,
-                                ]
-                                self.movements_df.loc[len(self.movements_df)] = [
-                                    video_name,
-                                    animal_name,
-                                    roi_name,
-                                    "Average velocity (cm/s)",
-                                    "None",
-                                ]
+                                self.movements_df.loc[len(self.movements_df)] = [video_name, animal_name, roi_name, "Movement (cm)", 0,]
+                                self.movements_df.loc[len(self.movements_df)] = [video_name, animal_name, roi_name, "Average velocity (cm/s)", "None",]
                             else:
                                 distances, velocities = [], []
-                                roi_frames = roi_data[
-                                    ["Start_frame", "End_frame"]
-                                ].values
+                                roi_frames = roi_data[["Start_frame", "End_frame"]].values
                                 for event in roi_frames:
-                                    event_pose = animal_df.loc[
-                                        np.arange(event[0], event[1] + 1), bp_names
-                                    ]
-                                    event_pose = event_pose[
-                                        event_pose[bp_names[2]] > self.threshold
-                                    ][bp_names[:2]].values
+                                    event_pose = animal_df.loc[np.arange(event[0], event[1] + 1), bp_names]
+                                    event_pose = event_pose[event_pose[bp_names[2]] > self.threshold][bp_names[:2]].values
                                     if event_pose.shape[0] > 1:
-                                        distance, velocity = (
-                                            FeatureExtractionSupplemental.distance_and_velocity(
-                                                x=event_pose,
-                                                fps=self.fps,
-                                                pixels_per_mm=pix_per_mm,
-                                                centimeters=True,
-                                            )
-                                        )
+                                        distance, velocity = (FeatureExtractionSupplemental.distance_and_velocity(x=event_pose, fps=self.fps, pixels_per_mm=pix_per_mm, centimeters=True))
                                         distances.append(distance)
+                                        print(distances, velocity)
                                         velocities.append(velocity)
-                                self.movements_df.loc[len(self.movements_df)] = [
-                                    video_name,
-                                    animal_name,
-                                    roi_name,
-                                    "Movement (cm)",
-                                    sum(distances),
-                                ]
-                                self.movements_df.loc[len(self.movements_df)] = [
-                                    video_name,
-                                    animal_name,
-                                    roi_name,
-                                    "Average velocity (cm/s)",
-                                    np.average(velocities),
-                                ]
-        if len(self.roi_bout_results) > 1:
+                                self.movements_df.loc[len(self.movements_df)] = [video_name, animal_name, roi_name, "Movement (cm)", sum(distances)]
+                                self.movements_df.loc[len(self.movements_df)] = [video_name, animal_name, roi_name, "Average velocity (cm/s)", np.average(velocities)]
+
+        if len(self.roi_bout_results) > 0:
             self.detailed_df = pd.concat(self.roi_bout_results, axis=0)
             self.detailed_df = self.detailed_df.rename(columns={"Event": "SHAPE NAME", "Start_time": "START TIME", "End Time": "END TIME", "Start_frame": "START FRAME", "End_frame": "END FRAME", "Bout_time": "DURATION (S)"})
             self.detailed_df["BODY-PART"] = self.detailed_df["ANIMAL"].map(self.bp_lk)
@@ -236,6 +195,17 @@ def save(self):
         stdout_success(msg=f"ROI time and ROI entry saved in the {self.logs_path} directory in CSV format.")
 
 
+# test = ROIAnalyzer(config_path = r"C:\troubleshooting\ROI_movement_test\project_folder\project_config.ini",
+#                    data_path=None,
+#                    calculate_distances=True,
+#                    detailed_bout_data=True,
+#                    body_parts=['Head'],
+#                    threshold=0.0)
+# test.run()
+
+
+
+
 # test = ROIAnalyzer(config_path = r"/Users/simon/Desktop/envs/simba/troubleshooting/two_black_animals_14bp/project_folder/project_config.ini",
 #                    data_path=None,
 #                    calculate_distances=True,
diff --git a/simba/roi_tools/ROI_time_bin_calculator.py b/simba/roi_tools/ROI_time_bin_calculator.py
index 9bdb15db8..d597a9cef 100644
--- a/simba/roi_tools/ROI_time_bin_calculator.py
+++ b/simba/roi_tools/ROI_time_bin_calculator.py
@@ -67,7 +67,7 @@ def __init__(self,
                 raise BodypartColumnNotFoundError(msg=f'The body-part {bp} is not a valid body-part in the SimBA project. Options: {self.body_parts_lst}', source=self.__class__.__name__)
         if len(set(body_parts)) != len(body_parts):
             raise DuplicationError(msg=f'All body-part entries have to be unique. Got {body_parts}', source=self.__class__.__name__)
-        self.roi_analyzer = ROIAnalyzer(config_path=self.config_path, data_path=self.outlier_corrected_dir, calculate_distances=False, threshold=threshold, body_parts=body_parts)
+        self.roi_analyzer = ROIAnalyzer(config_path=self.config_path, data_path=self.outlier_corrected_dir, calculate_distances=False, threshold=threshold, body_parts=body_parts, detailed_bout_data=True)
         self.roi_analyzer.run()
         self.animal_names = list(self.roi_analyzer.bp_dict.keys())
         self.bp_dict = self.roi_analyzer.bp_dict
@@ -107,22 +107,10 @@ def run(self):
                     if self.movement:
                         if len(frms_inside_roi_in_timebin) > 0:
                             bin_move = (self.movement_timebins.movement_dict[self.video_name].iloc[frms_inside_roi_in_timebin].values.flatten().astype(np.float32))
-                            _, velocity = (FeatureExtractionSupplemental.distance_and_velocity(x=bin_move,fps=fps, pixels_per_mm=1, centimeters=True))
-                            self.results_movement_velocity.loc[len(self.results_movement_velocity)] = [self.video_name,
-                                                                                                       shape_name,
-                                                                                                       animal_name,
-                                                                                                       body_part,
-                                                                                                       bin_cnt,
-                                                                                                       bin_move[1:].sum() / 10,
-                                                                                                       velocity]
+                            movement, velocity = (FeatureExtractionSupplemental.distance_and_velocity(x=bin_move,fps=fps, pixels_per_mm=1, centimeters=False))
+                            self.results_movement_velocity.loc[len(self.results_movement_velocity)] = [self.video_name, shape_name, animal_name, body_part, bin_cnt, bin_move[1:].sum() / 10, velocity]
                         else:
-                            self.results_movement_velocity.loc[len(self.results_movement_velocity)] = [self.video_name,
-                                                                                                       shape_name,
-                                                                                                       animal_name,
-                                                                                                       body_part,
-                                                                                                       bin_cnt,
-                                                                                                       0,
-                                                                                                       0]
+                            self.results_movement_velocity.loc[len(self.results_movement_velocity)] = [self.video_name, shape_name, animal_name, body_part, bin_cnt, 0, 0]
             video_timer.stop_timer()
             print(f"Video {self.video_name} complete (elapsed time {video_timer.elapsed_time_str}s)")
 
@@ -133,15 +121,19 @@ def save(self):
         stdout_success(msg=f"ROI time bin entry data saved at {self.save_path_entries}", elapsed_time=self.timer.elapsed_time_str)
         stdout_success(msg=f"ROI time bin time data saved at {self.save_path_time}", elapsed_time=self.timer.elapsed_time_str)
         if self.movement:
-            self.results_movement_velocity.sort_values(
-                by=["VIDEO", "SHAPE", "ANIMAL", "TIME BIN #"]
-            ).set_index("VIDEO").to_csv(self.save_path_movement_velocity)
-            stdout_success(
-                msg=f"ROI time-bin movement data saved at {self.save_path_movement_velocity}",
-                elapsed_time=self.timer.elapsed_time_str,
-            )
+            self.results_movement_velocity.sort_values(by=["VIDEO", "SHAPE", "ANIMAL", "TIME BIN #"]).set_index("VIDEO").to_csv(self.save_path_movement_velocity)
+            stdout_success(msg=f"ROI time-bin movement data saved at {self.save_path_movement_velocity}", elapsed_time=self.timer.elapsed_time_str)
 
 
+
+# test = ROITimebinCalculator(config_path=r"C:\troubleshooting\ROI_movement_test\project_folder\project_config.ini",
+#                             bin_length=0.5,
+#                             body_parts=['Head'],
+#                             threshold=0.00,
+#                             movement=True)
+# test.run()
+# test.save()
+
 # test = ROITimebinCalculator(config_path=r"/Users/simon/Desktop/envs/simba/troubleshooting/two_black_animals_14bp/project_folder/project_config.ini",
 #                             bin_length=1,
 #                             body_parts=['Nose_1'],
diff --git a/simba/ui/pop_ups/check_videos_seekable_pop_up.py b/simba/ui/pop_ups/check_videos_seekable_pop_up.py
index 75d31cbaa..50dc5e1f6 100644
--- a/simba/ui/pop_ups/check_videos_seekable_pop_up.py
+++ b/simba/ui/pop_ups/check_videos_seekable_pop_up.py
@@ -78,4 +78,4 @@ def run(self, directory: bool):
                               save_path=save_path)
 
 
-CheckVideoSeekablePopUp()
\ No newline at end of file
+#CheckVideoSeekablePopUp()
\ No newline at end of file
diff --git a/simba/utils/read_write.py b/simba/utils/read_write.py
index d631eb0b7..c0c23457b 100644
--- a/simba/utils/read_write.py
+++ b/simba/utils/read_write.py
@@ -72,16 +72,14 @@
 READ_OPTIONS = csv.ReadOptions(encoding="utf8")
 
 
-def read_df(
-    file_path: Union[str, os.PathLike],
-    file_type: Union[str, os.PathLike],
-    has_index: Optional[bool] = True,
-    remove_columns: Optional[List[str]] = None,
-    usecols: Optional[List[str]] = None,
-    anipose_data: Optional[bool] = False,
-    check_multiindex: Optional[bool] = False,
-    multi_index_headers_to_keep: Optional[int] = None,
-) -> pd.DataFrame:
+def read_df(file_path: Union[str, os.PathLike],
+            file_type: Union[str, os.PathLike],
+            has_index: Optional[bool] = True,
+            remove_columns: Optional[List[str]] = None,
+            usecols: Optional[List[str]] = None,
+            anipose_data: Optional[bool] = False,
+            check_multiindex: Optional[bool] = False,
+            multi_index_headers_to_keep: Optional[int] = None) -> Union[pd.DataFrame, dict]:
     """
     Read single tabular data file or pickle
 
diff --git a/simba/utils/warnings.py b/simba/utils/warnings.py
index 00703a514..3bc42bf8d 100644
--- a/simba/utils/warnings.py
+++ b/simba/utils/warnings.py
@@ -259,3 +259,8 @@ def CorruptedFileWarning(msg: str, source: str = ""):
 @log_warning
 def ResolutionWarning(msg: str, source: str = ""):
     pass
+
+
+@log_warning
+def GPUToolsWarning(msg: str, source: str = ""):
+    pass