Skip to content

Commit

Permalink
uml
Browse files Browse the repository at this point in the history
  • Loading branch information
sronilsson committed Dec 19, 2024
1 parent 422d5b4 commit dbb6e85
Show file tree
Hide file tree
Showing 21 changed files with 699 additions and 806 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# Setup configuration
setuptools.setup(
name="simba-uw-tf-dev",
version="2.4.4",
version="2.4.5",
author="Simon Nilsson, Jia Jie Choong, Sophia Hwang",
author_email="[email protected]",
description="Toolkit for computer classification and analysis of behaviors in experimental animals",
Expand Down
113 changes: 97 additions & 16 deletions simba/data_processors/cuda/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,28 @@ def _cuda_cos(x, t):
t[i] = v
return t

@cuda.jit(device=True)
def _cuda_min(x: np.ndarray):
return min(x)

@cuda.jit(device=True)
def _cuda_max(x: np.ndarray):
return max(x)

@cuda.jit(device=True)
def _cuda_standard_deviation(x):
m = _cuda_mean(x)
std_sum = 0
for i in range(x.shape[0]):
std_sum += abs(x[i] - m)
return math.sqrt(std_sum / x.shape[0])

@cuda.jit(device=True)
def _cuda_std(x: np.ndarray, x_hat: float):
std = 0
for i in range(x.shape[0]):
std += (x[0] - x_hat) ** 2
return std
return math.sqrt(std / x.shape[0])

@cuda.jit(device=True)
def _rad2deg(x):
Expand Down Expand Up @@ -116,6 +132,33 @@ def _cuda_add_2d(x: np.ndarray, vals: np.ndarray) -> np.ndarray:
x[i][j] = x[i][j] + vals[j]
return x


@cuda.jit(device=True)
def _cuda_variance(x: np.ndarray):
mean = _cuda_mean(x)
num = 0
for i in range(x.shape[0]):
num += abs(x[i] - mean)
return num / (x.shape[0] - 1)


@cuda.jit(device=True)
def _cuda_mac(x: np.ndarray):
""" mean average change in 1d array (max size 512)"""
diff = cuda.local.array(shape=512, dtype=np.float64)
for i in range(512):
diff[i] = np.inf
for j in range(1, x.shape[0]):
diff[j] = abs(x[j] - x[j-1])
s, cnt = 0, 0
for p in range(diff.shape[0]):
if (diff[p] != np.inf):
s += diff[p]
cnt += 1
val = s / cnt
cuda.syncthreads()
return val

def _cuda_available() -> Tuple[bool, Dict[int, Any]]:
"""
Check if GPU available. If True, returns the GPUs, the model, physical slots and compute capabilitie(s).
Expand All @@ -137,18 +180,56 @@ def _cuda_available() -> Tuple[bool, Dict[int, Any]]:
return is_available, devices


# @guvectorize([(float64[:], float64[:])], '(n) -> (n)', target='cuda')
# def _cuda_bubble_sort(arr, out):
# """
# :example:
# >>> a = np.random.randint(5, 50, (5, 200)).astype('float64')
# >>> d_a = cuda.to_device(a)
# >>> _cuda_bubble_sort(d_a)
# >>> d = d_a.copy_to_host()
# """
#
# for i in range(len(arr)):
# for j in range(len(arr) - 1 - i):
# if arr[j] > arr[j + 1]:
# arr[j], arr[j + 1] = arr[j + 1], arr[j]
# out = arr

@cuda.jit(device=True)
def _cuda_bubble_sort(x):
n = x.shape[0]
for i in range(n - 1):
for j in range(n - i - 1):
if x[j] > x[j + 1]:
x[j], x[j + 1] = x[j + 1], x[j]
return x


@cuda.jit(device=True)
def _cuda_median(x):
sorted_arr = _cuda_bubble_sort(x)
if not x.shape[0] % 2 == 0:
return sorted_arr[int(math.floor(x.shape[0] / 2))]
else:
loc_1, loc_2 = int((x.shape[0] / 2) - 1), int(x.shape[0] / 2)
return (sorted_arr[loc_1] + sorted_arr[loc_2]) / 2


@cuda.jit(device=True)
def _cuda_mad(x):
diff = cuda.local.array(shape=512, dtype=np.float32)
for i in range(512):
diff[i] = np.inf
m = _cuda_median(x)
for j in range(x.shape[0]):
diff[j] = abs(x[j] - m)
return _cuda_median(diff[0:x.shape[0]-1])

@cuda.jit(device=True)
def _cuda_rms(x: np.ndarray):
squared = cuda.local.array(shape=512, dtype=np.float64)
for i in range(512): squared[i] = np.inf
for j in range(x.shape[0]):
squared[j] = x[j] ** 2
m = _cuda_mean(squared[0: x.shape[0]-1])
return math.sqrt(m)


@cuda.jit(device=True)
def _cuda_range(x: np.ndarray):
return _cuda_max(x) - _cuda_min(x)

@cuda.jit(device=True)
def _cuda_abs_energy(x):
squared = cuda.local.array(shape=512, dtype=np.float64)
for i in range(512): squared[i] = np.inf
for j in range(x.shape[0]):
squared[j] = x[j] ** 2
m = _cuda_sum(squared[0: x.shape[0] - 1])
return math.sqrt(m)
67 changes: 21 additions & 46 deletions simba/mixins/geometry_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,7 +845,7 @@ def view_shapes(shapes: List[Union[LineString, Polygon, MultiPolygon, MultiLineS
bg_img: Optional[np.ndarray] = None,
bg_clr: Optional[Tuple[int, int, int]] = None,
size: Optional[int] = None,
color_palette: Union[str, List[Tuple[int, int, int]]] = 'Set1',
color_palette: Union[str, List[Tuple[int, ...]]] = 'Set1',
fill_shapes: Optional[bool] = False,
thickness: Optional[int] = 2,
pixel_buffer: Optional[int] = 200,
Expand All @@ -864,7 +864,7 @@ def view_shapes(shapes: List[Union[LineString, Polygon, MultiPolygon, MultiLineS
:param Optional[np.ndarray] bg_img: Optional. An image array (in np.ndarray format) to use as the background. If not provided, a blank canvas will be created.
:param Optional[Tuple[int, int, int]] bg_clr: A tuple representing the RGB color of the background (e.g., (255, 255, 255) for white). This is ignored if bg_img is provided. If None the background is white.
:param Optional[int] size: Optional. An integer to specify the size of the canvas (width and height). Only applicable if bg_img is not provided.
:param Optional[str] color_palette: Optional. A string specifying the color palette to be used for the shapes. Default is 'Set1', which uses distinct colors.
:param Optional[str] color_palette: Optional. A string specifying the color palette to be used for the shapes. Default is 'Set1', which uses distinct colors. Alternatively, a list of RGB value tuples of same length as `shapes`.
:param Optional[int] thickness: Optional. An integer specifying the thickness of the lines when rendering LineString or Polygon borders. Default is 2.
:param Optional[int] pixel_buffer: Optional. An integer specifying the number of pixels to add around the bounding box of the shapes for padding. Default is 200.
:return: An image (np.ndarray) with the rendered shapes.
Expand Down Expand Up @@ -3233,13 +3233,13 @@ def cumsum_coord_geometries(self,
return np.cumsum(img_arr, axis=0) / fps

@staticmethod
def _cumsum_bool_helper(
data: np.ndarray, geometries: Dict[Tuple[int, int], Polygon]
):
def _cumsum_bool_helper(data: np.ndarray,
geometries: Dict[Tuple[int, int], Polygon],
verbose: bool = True):

data_point = Point(data[1:3])
print(
f"Processing animal grid square location for boolean in frame {int(data[0])}..."
)
if verbose:
print(f"Processing animal grid square location for boolean in frame {int(data[0])}...")
for k, r in geometries.items():
if r.contains(data_point):
return (int(data[0]), k[0], k[1])
Expand All @@ -3250,6 +3250,7 @@ def cumsum_bool_geometries(self,
geometries: Dict[Tuple[int, int], Polygon],
bool_data: np.ndarray,
fps: Optional[float] = None,
verbose: bool = True,
core_cnt: Optional[int] = -1) -> np.ndarray:
"""
Compute the cumulative sums of boolean events within polygon geometries over time using multiprocessing. For example, compute the cumulative time of classified events within spatial locations at all time-points of the video.
Expand All @@ -3262,6 +3263,7 @@ def cumsum_bool_geometries(self,
:param Dict[Tuple[int, int], Polygon] geometries: Dictionary of polygons representing spatial regions. E.g., created by :func:`simba.mixins.geometry_mixin.GeometryMixin.bucket_img_into_grid_square` or :func:`simba.mixins.geometry_mixin.GeometryMixin.bucket_img_into_grid_hexagon`.
:param np.ndarray bool_data: Boolean array with shape (data.shape[0],) or (data.shape[0], 1) indicating the presence or absence in each frame.
:param Optional[float] fps: Frames per second. If provided, the result is normalized by the frame rate.
:param bool verbose: If true, prints progress. Default: True.
:param Optional[float] core_cnt: Number of CPU cores to use for parallel processing. Default is -1, which means using all available cores.
:returns: Matrix of size (frames x horizontal bins x verical bins) with times in seconds (if fps passed) or frames (if fps not passed)
:rtype: np.ndarray
Expand All @@ -3275,39 +3277,14 @@ def cumsum_bool_geometries(self,
>>> (500, 4, 4)
"""

check_valid_array(
data=data,
accepted_sizes=[2],
source=f"{GeometryMixin.cumsum_bool_geometries.__name__} data",
)
check_instance(
source=f"{GeometryMixin.cumsum_bool_geometries.__name__} geometries",
instance=geometries,
accepted_types=dict,
)
check_valid_array(
data=bool_data,
accepted_shapes=[(data.shape[0], 1), (data.shape[0],)],
source=f"{GeometryMixin.cumsum_bool_geometries.__name__} bool_data",
)
check_valid_array(data=data, accepted_sizes=[2], source=f"{GeometryMixin.cumsum_bool_geometries.__name__} data")
check_instance(source=f"{GeometryMixin.cumsum_bool_geometries.__name__} geometries",instance=geometries,accepted_types=dict)
check_valid_array(data=bool_data,accepted_shapes=[(data.shape[0], 1), (data.shape[0],)],source=f"{GeometryMixin.cumsum_bool_geometries.__name__} bool_data")
if fps is not None:
check_float(
name=f"{GeometryMixin.cumsum_bool_geometries.__name__} fps",
value=fps,
min_value=1.0,
)
check_int(
name=f"{GeometryMixin.cumsum_bool_geometries.__name__} core_cnt",
value=core_cnt,
min_value=-1,
)
if not np.array_equal(
np.sort(np.unique(bool_data)).astype(int), np.array([0, 1])
):
raise InvalidInputError(
msg=f"Invalid boolean data. Expected {np.array([0, 1])} but found {np.sort(np.unique(bool_data)).astype(int)}",
source=GeometryMixin.cumsum_bool_geometries.__name__,
)
check_float(name=f"{GeometryMixin.cumsum_bool_geometries.__name__} fps", value=fps, min_value=1.0)
check_int(name=f"{GeometryMixin.cumsum_bool_geometries.__name__} core_cnt", value=core_cnt, min_value=-1)
if not np.array_equal(np.sort(np.unique(bool_data)).astype(int), np.array([0, 1])):
raise InvalidInputError(msg=f"Invalid boolean data. Expected {np.array([0, 1])} but found {np.sort(np.unique(bool_data)).astype(int)}", source=GeometryMixin.cumsum_bool_geometries.__name__)
if core_cnt == -1:
core_cnt = find_core_cnt()[0]
w, h = 0, 0
Expand All @@ -3320,12 +3297,10 @@ def cumsum_bool_geometries(self,
data = np.hstack((frm_id, data))
img_arr = np.zeros((data.shape[0], h + 1, w + 1))
data = data[np.argwhere((data[:, 3] == 1))].reshape(-1, 4)
with multiprocessing.Pool(
core_cnt, maxtasksperchild=Defaults.LARGE_MAX_TASK_PER_CHILD.value
) as pool:
constants = functools.partial(
self._cumsum_bool_helper, geometries=geometries
)
with multiprocessing.Pool(core_cnt, maxtasksperchild=Defaults.LARGE_MAX_TASK_PER_CHILD.value) as pool:
constants = functools.partial(self._cumsum_bool_helper,
geometries=geometries,
verbose=verbose)
for cnt, result in enumerate(pool.imap(constants, data, chunksize=1)):
if result[1] != -1:
img_arr[result[0], result[2] - 1, result[1] - 1] = 1
Expand Down
2 changes: 1 addition & 1 deletion simba/mixins/plotting_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,7 @@ def make_location_heatmap_plot(frm_data: np.array,
canvas.draw()
mat = np.array(canvas.renderer._renderer)
image = cv2.cvtColor(mat, cv2.COLOR_RGB2BGR)
image = cv2.resize(mat, img_size)
image = cv2.resize(image, img_size)
image = np.uint8(image)
plt.close("all")
if file_name is not None:
Expand Down
46 changes: 40 additions & 6 deletions simba/mixins/statistics_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -1958,15 +1958,17 @@ def sliding_kendall_tau(sample_1: np.ndarray, sample_2: np.ndarray, time_windows
return results

@staticmethod
def find_collinear_features(
df: pd.DataFrame,
threshold: float,
method: Optional[Literal["pearson", "spearman", "kendall"]] = "pearson",
verbose: Optional[bool] = False,
) -> List[str]:
def find_collinear_features(df: pd.DataFrame,
threshold: float,
method: Optional[Literal["pearson", "spearman", "kendall"]] = "pearson",
verbose: Optional[bool] = False) -> List[str]:

"""
Identify collinear features in the dataframe based on the specified correlation method and threshold.
.. seealso::
For multicore numba accelerated method, see :func:`simba.mixins.train_model_mixin.TrainModelMixin.find_highly_correlated_fields`.
:param pd.DataFrame df: Input DataFrame containing features.
:param float threshold: Threshold value to determine collinearity.
:param Optional[Literal['pearson', 'spearman', 'kendall']] method: Method for calculating correlation. Defaults to 'pearson'.
Expand Down Expand Up @@ -4343,3 +4345,35 @@ def symmetry_index(x: np.ndarray, y: np.ndarray, agg_type: Literal['mean', 'medi
else:
return np.float32(np.nanmedian(si_values))

@staticmethod
@njit("(float32[:], float64, float64)")
def sliding_iqr(x: np.ndarray, window_size: float, sample_rate: float) -> np.ndarray:
"""
Compute the sliding interquartile range (IQR) for a 1D array of feature values.
:param ndarray x: 1D array representing the feature values for which the IQR will be calculated.
:param float window_size: Size of the sliding window, in seconds. This value determines how many samples are included in each window.
:param float sample_rate: The sampling rate in samples per second, e.g., fps.
:returns : Sliding IQR values
:rtype: np.ndarray
:references:
.. [1] Hession, Leinani E., Gautam S. Sabnis, Gary A. Churchill, and Vivek Kumar. “A Machine-Vision-Based Frailty Index for Mice.” Nature Aging 2, no. 8 (August 16, 2022): 756–66. https://doi.org/10.1038/s43587-022-00266-0.
:example:
>>> data = np.random.randint(0, 50, (90,)).astype(np.float32)
>>> window_size = 0.5
>>> Statistics.sliding_iqr(x=data, window_size=0.5, sample_rate=10.0)
"""

frm_win = max(1, int(window_size * sample_rate))
results = np.full(shape=(x.shape[0],), dtype=np.float32, fill_value=-1.0)
for r in range(frm_win, x.shape[0] + 1):
sorted_sample = np.sort(x[r - frm_win:r])
lower_idx = sorted_sample.shape[0] // 4
upper_idx = (3 * sorted_sample.shape[0]) // 4
lower_val = sorted_sample[lower_idx]
upper_val = sorted_sample[upper_idx]
results[r - 1] = upper_val - lower_val
return results

15 changes: 12 additions & 3 deletions simba/mixins/timeseries_features_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,16 @@ def sliding_percent_beyond_n_std(data: np.ndarray, n: float, window_sizes: np.nd
(int64[:], float64[:], int64),
]
)
def sliding_unique(x: np.ndarray, time_windows: np.ndarray, fps: int):
def sliding_unique(x: np.ndarray, time_windows: np.ndarray, fps: int) -> np.ndarray:
"""
Compute the number of unique values in a sliding window over an array of feature values.
:param x: 1D array of feature values for which the unique values are to be counted.
:param time_windows: Array of window sizes (in seconds) for which the unique values are counted.
:param int fps: The frame rate in frames per second, which is used to calculate the window size in samples.
:return: A 2D array where each row corresponds to a time window, and each element represents the count of unique values in the corresponding sliding window of the array `x`.
:rtype: np.ndarray
"""
results = np.full((x.shape[0], time_windows.shape[0]), -1)
for i in prange(time_windows.shape[0]):
window_size = int(time_windows[i] * fps)
Expand Down Expand Up @@ -922,10 +931,10 @@ def sliding_descriptive_statistics(data: np.ndarray, window_sizes: np.ndarray, s
results[j, r - 1, i] = np.median(sample)
elif statistics[j] == "mean":
results[j, r - 1, i] = np.mean(sample)
elif statistics[j] == "mad":
results[j, r - 1, i] = np.median(np.abs(sample - np.median(sample)))
elif statistics[j] == "sum":
results[j, r - 1, i] = np.sum(sample)
elif statistics[j] == "mad":
results[j, r - 1, i] = np.median(np.abs(sample - np.median(sample)))
elif statistics[j] == "mac":
results[j, r - 1, i] = np.mean(np.abs(sample[1:] - sample[:-1]))
elif statistics[j] == "rms":
Expand Down
Loading

0 comments on commit dbb6e85

Please sign in to comment.