uml

sgoldenlab · Dec 19, 2024 · dbb6e85 · dbb6e85
1 parent 422d5b4
commit dbb6e85
Show file tree

Hide file tree

Showing 21 changed files with 699 additions and 806 deletions.
diff --git a/setup.py b/setup.py
@@ -29,7 +29,7 @@
 # Setup configuration
 setuptools.setup(
     name="simba-uw-tf-dev",
-    version="2.4.4",
+    version="2.4.5",
     author="Simon Nilsson, Jia Jie Choong, Sophia Hwang",
     author_email="[email protected]",
     description="Toolkit for computer classification and analysis of behaviors in experimental animals",

diff --git a/simba/data_processors/cuda/utils.py b/simba/data_processors/cuda/utils.py
@@ -26,12 +26,28 @@ def _cuda_cos(x, t):
         t[i] = v
     return t
 
+@cuda.jit(device=True)
+def _cuda_min(x: np.ndarray):
+    return min(x)
+
+@cuda.jit(device=True)
+def _cuda_max(x: np.ndarray):
+    return max(x)
+
+@cuda.jit(device=True)
+def _cuda_standard_deviation(x):
+    m = _cuda_mean(x)
+    std_sum = 0
+    for i in range(x.shape[0]):
+        std_sum += abs(x[i] - m)
+    return math.sqrt(std_sum / x.shape[0])
+
 @cuda.jit(device=True)
 def _cuda_std(x: np.ndarray, x_hat: float):
     std = 0
     for i in range(x.shape[0]):
         std += (x[0] - x_hat) ** 2
-    return std
+    return math.sqrt(std / x.shape[0])
 
 @cuda.jit(device=True)
 def _rad2deg(x):
@@ -116,6 +132,33 @@ def _cuda_add_2d(x: np.ndarray, vals: np.ndarray) -> np.ndarray:
             x[i][j] = x[i][j] + vals[j]
     return x
 
+
+@cuda.jit(device=True)
+def _cuda_variance(x: np.ndarray):
+    mean = _cuda_mean(x)
+    num = 0
+    for i in range(x.shape[0]):
+        num += abs(x[i] - mean)
+    return num / (x.shape[0] - 1)
+
+
+@cuda.jit(device=True)
+def _cuda_mac(x: np.ndarray):
+    """ mean average change in 1d array (max size 512)"""
+    diff = cuda.local.array(shape=512, dtype=np.float64)
+    for i in range(512):
+        diff[i] = np.inf
+    for j in range(1, x.shape[0]):
+        diff[j] = abs(x[j] - x[j-1])
+    s, cnt = 0, 0
+    for p in range(diff.shape[0]):
+        if (diff[p] != np.inf):
+            s += diff[p]
+            cnt += 1
+    val = s / cnt
+    cuda.syncthreads()
+    return val
+
 def _cuda_available() -> Tuple[bool, Dict[int, Any]]:
     """
     Check if GPU available. If True, returns the GPUs, the model, physical slots and compute capabilitie(s).
@@ -137,18 +180,56 @@ def _cuda_available() -> Tuple[bool, Dict[int, Any]]:
     return is_available, devices
 
 
-# @guvectorize([(float64[:], float64[:])], '(n) -> (n)', target='cuda')
-# def _cuda_bubble_sort(arr, out):
-#     """
-#     :example:
-#     >>> a = np.random.randint(5, 50, (5, 200)).astype('float64')
-#     >>> d_a = cuda.to_device(a)
-#     >>> _cuda_bubble_sort(d_a)
-#     >>> d = d_a.copy_to_host()
-#     """
-#
-#     for i in range(len(arr)):
-#         for j in range(len(arr) - 1 - i):
-#             if arr[j] > arr[j + 1]:
-#                 arr[j], arr[j + 1] = arr[j + 1], arr[j]
-#     out = arr
+
+@cuda.jit(device=True)
+def _cuda_bubble_sort(x):
+    n = x.shape[0]
+    for i in range(n - 1):
+        for j in range(n - i - 1):
+            if x[j] > x[j + 1]:
+                x[j], x[j + 1] = x[j + 1], x[j]
+    return x
+
+
+@cuda.jit(device=True)
+def _cuda_median(x):
+    sorted_arr = _cuda_bubble_sort(x)
+    if not x.shape[0] % 2 == 0:
+        return sorted_arr[int(math.floor(x.shape[0] / 2))]
+    else:
+        loc_1, loc_2 = int((x.shape[0] / 2) - 1), int(x.shape[0] / 2)
+        return (sorted_arr[loc_1] + sorted_arr[loc_2]) / 2
+
+
+@cuda.jit(device=True)
+def _cuda_mad(x):
+    diff = cuda.local.array(shape=512, dtype=np.float32)
+    for i in range(512):
+        diff[i] = np.inf
+    m = _cuda_median(x)
+    for j in range(x.shape[0]):
+       diff[j] = abs(x[j] - m)
+    return _cuda_median(diff[0:x.shape[0]-1])
+
+@cuda.jit(device=True)
+def _cuda_rms(x: np.ndarray):
+    squared = cuda.local.array(shape=512, dtype=np.float64)
+    for i in range(512): squared[i] = np.inf
+    for j in range(x.shape[0]):
+        squared[j] = x[j] ** 2
+    m = _cuda_mean(squared[0: x.shape[0]-1])
+    return math.sqrt(m)
+
+
+@cuda.jit(device=True)
+def _cuda_range(x: np.ndarray):
+    return _cuda_max(x) - _cuda_min(x)
+
+@cuda.jit(device=True)
+def _cuda_abs_energy(x):
+    squared = cuda.local.array(shape=512, dtype=np.float64)
+    for i in range(512): squared[i] = np.inf
+    for j in range(x.shape[0]):
+        squared[j] = x[j] ** 2
+    m = _cuda_sum(squared[0: x.shape[0] - 1])
+    return math.sqrt(m)
diff --git a/simba/mixins/geometry_mixin.py b/simba/mixins/geometry_mixin.py
@@ -845,7 +845,7 @@ def view_shapes(shapes: List[Union[LineString, Polygon, MultiPolygon, MultiLineS
                     bg_img: Optional[np.ndarray] = None,
                     bg_clr: Optional[Tuple[int, int, int]] = None,
                     size: Optional[int] = None,
-                    color_palette: Union[str, List[Tuple[int, int, int]]] = 'Set1',
+                    color_palette: Union[str, List[Tuple[int, ...]]] = 'Set1',
                     fill_shapes: Optional[bool] = False,
                     thickness: Optional[int] = 2,
                     pixel_buffer: Optional[int] = 200,
@@ -864,7 +864,7 @@ def view_shapes(shapes: List[Union[LineString, Polygon, MultiPolygon, MultiLineS
         :param Optional[np.ndarray] bg_img: Optional. An image array (in np.ndarray format) to use as the background. If not provided, a blank canvas will be created.
         :param Optional[Tuple[int, int, int]] bg_clr: A tuple representing the RGB color of the background (e.g., (255, 255, 255) for white). This is ignored if bg_img is provided. If None the background is white.
         :param Optional[int] size: Optional. An integer to specify the size of the canvas (width and height). Only applicable if bg_img is not provided.
-        :param Optional[str] color_palette: Optional. A string specifying the color palette to be used for the shapes. Default is 'Set1', which uses distinct colors.
+        :param Optional[str] color_palette: Optional. A string specifying the color palette to be used for the shapes. Default is 'Set1', which uses distinct colors. Alternatively, a list of RGB value tuples of same length as `shapes`.
         :param Optional[int] thickness: Optional. An integer specifying the thickness of the lines when rendering LineString or Polygon borders. Default is 2.
         :param Optional[int] pixel_buffer: Optional. An integer specifying the number of pixels to add around the bounding box of the shapes for padding. Default is 200.
         :return: An image (np.ndarray) with the rendered shapes.
@@ -3233,13 +3233,13 @@ def cumsum_coord_geometries(self,
             return np.cumsum(img_arr, axis=0) / fps
 
     @staticmethod
-    def _cumsum_bool_helper(
-        data: np.ndarray, geometries: Dict[Tuple[int, int], Polygon]
-    ):
+    def _cumsum_bool_helper(data: np.ndarray,
+                            geometries: Dict[Tuple[int, int], Polygon],
+                            verbose: bool = True):
+
         data_point = Point(data[1:3])
-        print(
-            f"Processing animal grid square location for boolean in frame {int(data[0])}..."
-        )
+        if verbose:
+            print(f"Processing animal grid square location for boolean in frame {int(data[0])}...")
         for k, r in geometries.items():
             if r.contains(data_point):
                 return (int(data[0]), k[0], k[1])
@@ -3250,6 +3250,7 @@ def cumsum_bool_geometries(self,
                                geometries: Dict[Tuple[int, int], Polygon],
                                bool_data: np.ndarray,
                                fps: Optional[float] = None,
+                               verbose: bool = True,
                                core_cnt: Optional[int] = -1) -> np.ndarray:
         """
         Compute the cumulative sums of boolean events within polygon geometries over time using multiprocessing. For example, compute the cumulative time of classified events within spatial locations at all time-points of the video.
@@ -3262,6 +3263,7 @@ def cumsum_bool_geometries(self,
         :param Dict[Tuple[int, int], Polygon] geometries: Dictionary of polygons representing spatial regions. E.g., created by :func:`simba.mixins.geometry_mixin.GeometryMixin.bucket_img_into_grid_square` or :func:`simba.mixins.geometry_mixin.GeometryMixin.bucket_img_into_grid_hexagon`.
         :param np.ndarray bool_data: Boolean array with shape (data.shape[0],) or (data.shape[0], 1) indicating the presence or absence in each frame.
         :param Optional[float] fps: Frames per second. If provided, the result is normalized by the frame rate.
+        :param bool verbose: If true, prints progress. Default: True.
         :param Optional[float] core_cnt: Number of CPU cores to use for parallel processing. Default is -1, which means using all available cores.
         :returns: Matrix of size (frames x horizontal bins x verical bins) with times in seconds (if fps passed) or frames (if fps not passed)
         :rtype: np.ndarray
@@ -3275,39 +3277,14 @@ def cumsum_bool_geometries(self,
         >>> (500, 4, 4)
         """
 
-        check_valid_array(
-            data=data,
-            accepted_sizes=[2],
-            source=f"{GeometryMixin.cumsum_bool_geometries.__name__} data",
-        )
-        check_instance(
-            source=f"{GeometryMixin.cumsum_bool_geometries.__name__} geometries",
-            instance=geometries,
-            accepted_types=dict,
-        )
-        check_valid_array(
-            data=bool_data,
-            accepted_shapes=[(data.shape[0], 1), (data.shape[0],)],
-            source=f"{GeometryMixin.cumsum_bool_geometries.__name__} bool_data",
-        )
+        check_valid_array(data=data, accepted_sizes=[2], source=f"{GeometryMixin.cumsum_bool_geometries.__name__} data")
+        check_instance(source=f"{GeometryMixin.cumsum_bool_geometries.__name__} geometries",instance=geometries,accepted_types=dict)
+        check_valid_array(data=bool_data,accepted_shapes=[(data.shape[0], 1), (data.shape[0],)],source=f"{GeometryMixin.cumsum_bool_geometries.__name__} bool_data")
         if fps is not None:
-            check_float(
-                name=f"{GeometryMixin.cumsum_bool_geometries.__name__} fps",
-                value=fps,
-                min_value=1.0,
-            )
-        check_int(
-            name=f"{GeometryMixin.cumsum_bool_geometries.__name__} core_cnt",
-            value=core_cnt,
-            min_value=-1,
-        )
-        if not np.array_equal(
-            np.sort(np.unique(bool_data)).astype(int), np.array([0, 1])
-        ):
-            raise InvalidInputError(
-                msg=f"Invalid boolean data. Expected {np.array([0, 1])} but found {np.sort(np.unique(bool_data)).astype(int)}",
-                source=GeometryMixin.cumsum_bool_geometries.__name__,
-            )
+            check_float(name=f"{GeometryMixin.cumsum_bool_geometries.__name__} fps", value=fps, min_value=1.0)
+        check_int(name=f"{GeometryMixin.cumsum_bool_geometries.__name__} core_cnt", value=core_cnt, min_value=-1)
+        if not np.array_equal(np.sort(np.unique(bool_data)).astype(int), np.array([0, 1])):
+            raise InvalidInputError(msg=f"Invalid boolean data. Expected {np.array([0, 1])} but found {np.sort(np.unique(bool_data)).astype(int)}", source=GeometryMixin.cumsum_bool_geometries.__name__)
         if core_cnt == -1:
             core_cnt = find_core_cnt()[0]
         w, h = 0, 0
@@ -3320,12 +3297,10 @@ def cumsum_bool_geometries(self,
         data = np.hstack((frm_id, data))
         img_arr = np.zeros((data.shape[0], h + 1, w + 1))
         data = data[np.argwhere((data[:, 3] == 1))].reshape(-1, 4)
-        with multiprocessing.Pool(
-            core_cnt, maxtasksperchild=Defaults.LARGE_MAX_TASK_PER_CHILD.value
-        ) as pool:
-            constants = functools.partial(
-                self._cumsum_bool_helper, geometries=geometries
-            )
+        with multiprocessing.Pool(core_cnt, maxtasksperchild=Defaults.LARGE_MAX_TASK_PER_CHILD.value) as pool:
+            constants = functools.partial(self._cumsum_bool_helper,
+                                          geometries=geometries,
+                                          verbose=verbose)
             for cnt, result in enumerate(pool.imap(constants, data, chunksize=1)):
                 if result[1] != -1:
                     img_arr[result[0], result[2] - 1, result[1] - 1] = 1

diff --git a/simba/mixins/plotting_mixin.py b/simba/mixins/plotting_mixin.py
@@ -497,7 +497,7 @@ def make_location_heatmap_plot(frm_data: np.array,
         canvas.draw()
         mat = np.array(canvas.renderer._renderer)
         image = cv2.cvtColor(mat, cv2.COLOR_RGB2BGR)
-        image = cv2.resize(mat, img_size)
+        image = cv2.resize(image, img_size)
         image = np.uint8(image)
         plt.close("all")
         if file_name is not None:

diff --git a/simba/mixins/statistics_mixin.py b/simba/mixins/statistics_mixin.py
@@ -1958,15 +1958,17 @@ def sliding_kendall_tau(sample_1: np.ndarray, sample_2: np.ndarray, time_windows
         return results
 
     @staticmethod
-    def find_collinear_features(
-        df: pd.DataFrame,
-        threshold: float,
-        method: Optional[Literal["pearson", "spearman", "kendall"]] = "pearson",
-        verbose: Optional[bool] = False,
-    ) -> List[str]:
+    def find_collinear_features(df: pd.DataFrame,
+                                threshold: float,
+                                method: Optional[Literal["pearson", "spearman", "kendall"]] = "pearson",
+                                verbose: Optional[bool] = False) -> List[str]:
+
         """
         Identify collinear features in the dataframe based on the specified correlation method and threshold.
 
+        .. seealso::
+           For multicore numba accelerated method, see :func:`simba.mixins.train_model_mixin.TrainModelMixin.find_highly_correlated_fields`.
+
         :param pd.DataFrame df: Input DataFrame containing features.
         :param float threshold: Threshold value to determine collinearity.
         :param Optional[Literal['pearson', 'spearman', 'kendall']] method: Method for calculating correlation. Defaults to 'pearson'.
@@ -4343,3 +4345,35 @@ def symmetry_index(x: np.ndarray, y: np.ndarray, agg_type: Literal['mean', 'medi
         else:
             return np.float32(np.nanmedian(si_values))
 
+    @staticmethod
+    @njit("(float32[:], float64, float64)")
+    def sliding_iqr(x: np.ndarray, window_size: float, sample_rate: float) -> np.ndarray:
+        """
+        Compute the sliding interquartile range (IQR) for a 1D array of feature values.
+
+        :param ndarray x: 1D array representing the feature values for which the IQR will be calculated.
+        :param float window_size: Size of the sliding window, in seconds.  This value determines how many samples are included in each window.
+        :param float sample_rate: The sampling rate in samples per second, e.g., fps.
+        :returns : Sliding IQR values
+        :rtype: np.ndarray
+
+        :references:
+            .. [1] Hession, Leinani E., Gautam S. Sabnis, Gary A. Churchill, and Vivek Kumar. “A Machine-Vision-Based Frailty Index for Mice.” Nature Aging 2, no. 8 (August 16, 2022): 756–66. https://doi.org/10.1038/s43587-022-00266-0.
+
+        :example:
+        >>> data = np.random.randint(0, 50, (90,)).astype(np.float32)
+        >>> window_size = 0.5
+        >>> Statistics.sliding_iqr(x=data, window_size=0.5, sample_rate=10.0)
+        """
+
+        frm_win = max(1, int(window_size * sample_rate))
+        results = np.full(shape=(x.shape[0],), dtype=np.float32, fill_value=-1.0)
+        for r in range(frm_win, x.shape[0] + 1):
+            sorted_sample = np.sort(x[r - frm_win:r])
+            lower_idx = sorted_sample.shape[0] // 4
+            upper_idx = (3 * sorted_sample.shape[0]) // 4
+            lower_val = sorted_sample[lower_idx]
+            upper_val = sorted_sample[upper_idx]
+            results[r - 1] = upper_val - lower_val
+        return results
+
diff --git a/simba/mixins/timeseries_features_mixin.py b/simba/mixins/timeseries_features_mixin.py
@@ -428,7 +428,16 @@ def sliding_percent_beyond_n_std(data: np.ndarray, n: float, window_sizes: np.nd
             (int64[:], float64[:], int64),
         ]
     )
-    def sliding_unique(x: np.ndarray, time_windows: np.ndarray, fps: int):
+    def sliding_unique(x: np.ndarray, time_windows: np.ndarray, fps: int) -> np.ndarray:
+        """
+        Compute the number of unique values in a sliding window over an array of feature values.
+
+        :param x: 1D array of feature values for which the unique values are to be counted.
+        :param time_windows: Array of window sizes (in seconds) for which the unique values are counted.
+        :param int fps: The frame rate in frames per second, which is used to calculate the window size in samples.
+        :return: A 2D array where each row corresponds to a time window, and each element represents the count of unique values in the corresponding sliding window of the array `x`.
+        :rtype: np.ndarray
+        """
         results = np.full((x.shape[0], time_windows.shape[0]), -1)
         for i in prange(time_windows.shape[0]):
             window_size = int(time_windows[i] * fps)
@@ -922,10 +931,10 @@ def sliding_descriptive_statistics(data: np.ndarray, window_sizes: np.ndarray, s
                         results[j, r - 1, i] = np.median(sample)
                     elif statistics[j] == "mean":
                         results[j, r - 1, i] = np.mean(sample)
-                    elif statistics[j] == "mad":
-                        results[j, r - 1, i] = np.median(np.abs(sample - np.median(sample)))
                     elif statistics[j] == "sum":
                         results[j, r - 1, i] = np.sum(sample)
+                    elif statistics[j] == "mad":
+                        results[j, r - 1, i] = np.median(np.abs(sample - np.median(sample)))
                     elif statistics[j] == "mac":
                         results[j, r - 1, i] = np.mean(np.abs(sample[1:] - sample[:-1]))
                     elif statistics[j] == "rms":