diff --git a/docs/src/references/selection.rst b/docs/src/references/selection.rst index a9149d4ab..db451df9b 100644 --- a/docs/src/references/selection.rst +++ b/docs/src/references/selection.rst @@ -80,7 +80,7 @@ Farthest Point Sampling is a common selection technique intended to exploit the diversity of the input space. In FPS, the selection of the first point is made at random or by a separate metric. Each -subsequent selection is made to maximize the Haussdorf distance, i.e. the minimum +subsequent selection is made to maximize the Hausdorf distance, i.e. the minimum distance between a point and all previous selections. It is common to use the Euclidean distance, however other distance metrics may be employed. diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index fc0bf0113..0b382b07c 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -965,11 +965,11 @@ def __init__( def score(self, X, y=None): """ - Returns the Haussdorf distances of all samples to previous selections + Returns the Hausdorff distances of all samples to previous selections NOTE: This function does not compute the importance score each time it - is called, in order to avoid unnecessary computations. The haussdorf - distance is updated in :py:func:`self._update_haussdorf` + is called, in order to avoid unnecessary computations. The hausdorff + distance is updated in :py:func:`self._update_hausdorff` Parameters ---------- @@ -978,9 +978,9 @@ def score(self, X, y=None): Returns ------- - haussdorf : Haussdorf distances + hausdorff : Hausdorff distances """ - return self.haussdorf_ + return self.hausdorff_ def get_distance(self): """ @@ -1002,13 +1002,13 @@ def get_distance(self): Returns ------- - haussdorf : ndarray of shape (`n_to_select_from_`) + hausdorff : ndarray of shape (`n_to_select_from_`) the minimum distance from each point to the set of selected points. once a point is selected, the distance is not updated; the final list will reflect the distances when selected. """ - return self.haussdorf_ + return self.hausdorff_ def get_select_distance(self): """ @@ -1016,26 +1016,26 @@ def get_select_distance(self): Returns ------- - haussdorf_at_select : ndarray of shape (`n_to_select`) + hausdorff_at_select : ndarray of shape (`n_to_select`) at the time of selection, the minimum distance from each selected point to the set of previously selected points. """ mask = self.get_support(indices=True, ordered=True) - return self.haussdorf_at_select_[mask] + return self.hausdorff_at_select_[mask] def _init_greedy_search(self, X, y, n_to_select): """ Initializes the search. Prepares an array to store the selections, makes the initial selection (unless provided), and - computes the starting haussdorf distances. + computes the starting hausdorff distances. """ super()._init_greedy_search(X, y, n_to_select) self.norms_ = (X**2).sum(axis=abs(self._axis - 1)) - self.haussdorf_ = np.full(X.shape[self._axis], np.inf) - self.haussdorf_at_select_ = np.full(X.shape[self._axis], np.inf) + self.hausdorff_ = np.full(X.shape[self._axis], np.inf) + self.hausdorff_at_select_ = np.full(X.shape[self._axis], np.inf) if self.initialize == "random": random_state = check_random_state(self.random_state) @@ -1055,8 +1055,8 @@ def _init_greedy_search(self, X, y, n_to_select): else: raise ValueError("Invalid value of the initialize parameter") - def _update_haussdorf(self, X, y, last_selected): - self.haussdorf_at_select_[last_selected] = self.haussdorf_[last_selected] + def _update_hausdorff(self, X, y, last_selected): + self.hausdorff_at_select_[last_selected] = self.hausdorff_[last_selected] # distances of all points to the new point if self._axis == 1: @@ -1068,15 +1068,15 @@ def _update_haussdorf(self, X, y, last_selected): self.norms_ + self.norms_[last_selected] - 2 * X[last_selected] @ X.T ) - # update in-place the Haussdorf distance list - np.minimum(self.haussdorf_, new_dist, self.haussdorf_) + # update in-place the Hausdorff distance list + np.minimum(self.hausdorff_, new_dist, self.hausdorff_) def _update_post_selection(self, X, y, last_selected): """ Saves the most recent selections, increments the counter, - and, recomputes haussdorf distances. + and, recomputes hausdorff distances. """ - self._update_haussdorf(X, y, last_selected) + self._update_hausdorff(X, y, last_selected) super()._update_post_selection(X, y, last_selected) @@ -1135,11 +1135,11 @@ def __init__( def score(self, X, y=None): """ - Returns the Haussdorf distances of all samples to previous selections + Returns the Hausdorff distances of all samples to previous selections NOTE: This function does not compute the importance score each time it - is called, in order to avoid unnecessary computations. The haussdorf - distance is updated in :py:func:`self._update_haussdorf` + is called, in order to avoid unnecessary computations. The hausdorff + distance is updated in :py:func:`self._update_hausdorff` Parameters ---------- @@ -1148,9 +1148,9 @@ def score(self, X, y=None): Returns ------- - haussdorf : Haussdorf distances + hausdorff : Hausdorff distances """ - return self.haussdorf_ + return self.hausdorff_ def get_distance(self): """ @@ -1158,13 +1158,13 @@ def get_distance(self): Returns ------- - haussdorf : ndarray of shape (`n_to_select_from_`) + hausdorff : ndarray of shape (`n_to_select_from_`) the minimum distance from each point to the set of selected points. once a point is selected, the distance is not updated; the final list will reflect the distances when selected. """ - return self.haussdorf_ + return self.hausdorff_ def get_select_distance(self): """ @@ -1172,19 +1172,19 @@ def get_select_distance(self): Returns ------- - haussdorf_at_select : ndarray of shape (`n_to_select`) + hausdorff_at_select : ndarray of shape (`n_to_select`) at the time of selection, the minimum distance from each selected point to the set of previously selected points. """ mask = self.get_support(indices=True, ordered=True) - return self.haussdorf_at_select_[mask] + return self.hausdorff_at_select_[mask] def _init_greedy_search(self, X, y, n_to_select): """ Initializes the search. Prepares an array to store the selections, makes the initial selection (unless provided), and - computes the starting haussdorf distances. + computes the starting hausdorff distances. """ super()._init_greedy_search(X, y, n_to_select) @@ -1205,12 +1205,12 @@ def _init_greedy_search(self, X, y, n_to_select): raise ValueError("Invalid value of the initialize parameter") self.selected_idx_[0] = initialize - self.haussdorf_ = np.full(X.shape[self._axis], np.inf) - self.haussdorf_at_select_ = np.full(X.shape[self._axis], np.inf) + self.hausdorff_ = np.full(X.shape[self._axis], np.inf) + self.hausdorff_at_select_ = np.full(X.shape[self._axis], np.inf) self._update_post_selection(X, y, self.selected_idx_[0]) - def _update_haussdorf(self, X, y, last_selected): - self.haussdorf_at_select_[last_selected] = self.haussdorf_[last_selected] + def _update_hausdorff(self, X, y, last_selected): + self.hausdorff_at_select_[last_selected] = self.hausdorff_[last_selected] # distances of all points to the new point new_dist = ( @@ -1219,15 +1219,15 @@ def _update_haussdorf(self, X, y, last_selected): - 2 * np.take(self.pcovr_distance_, last_selected, axis=self._axis) ) - # update in-place the Haussdorf distance list - np.minimum(self.haussdorf_, new_dist, self.haussdorf_) + # update in-place the Hausdorff distance list + np.minimum(self.hausdorff_, new_dist, self.hausdorff_) def _update_post_selection(self, X, y, last_selected): """ Saves the most recent selections, increments the counter, - and, recomputes haussdorf distances. + and, recomputes hausdorff distances. """ - self._update_haussdorf(X, y, last_selected) + self._update_hausdorff(X, y, last_selected) super()._update_post_selection(X, y, last_selected) def _more_tags(self): diff --git a/src/skmatter/sample_selection/_voronoi_fps.py b/src/skmatter/sample_selection/_voronoi_fps.py index fd8ac297d..6490bda19 100644 --- a/src/skmatter/sample_selection/_voronoi_fps.py +++ b/src/skmatter/sample_selection/_voronoi_fps.py @@ -93,10 +93,10 @@ def __init__( def score(self, X=None, y=None): """ - Returns the Haussdorf distances of all samples to previous selections + Returns the Hausdorff distances of all samples to previous selections NOTE: This function does not compute the importance score each time it - is called, in order to avoid unnecessary computations. The haussdorf + is called, in order to avoid unnecessary computations. The hausdorff distance is updated in :py:func:`self._update_post_selection` Parameters @@ -106,9 +106,9 @@ def score(self, X=None, y=None): Returns ------- - haussdorf : Haussdorf distances + hausdorff : Hausdorff distances """ - return self.haussdorf_ + return self.hausdorff_ def get_distance(self): """ @@ -130,13 +130,13 @@ def get_distance(self): Returns ------- - haussdorf : ndarray of shape (`n_to_select_from_`) + hausdorff : ndarray of shape (`n_to_select_from_`) the minimum distance from each point to the set of selected points. once a point is selected, the distance is not updated; the final list will reflect the distances when selected. """ - return self.haussdorf_ + return self.hausdorff_ def get_select_distance(self): """ @@ -144,13 +144,13 @@ def get_select_distance(self): Returns ------- - haussdorf_at_select : ndarray of shape (`n_to_select`) + hausdorff_at_select : ndarray of shape (`n_to_select`) at the time of selection, the minimum distance from each selected point to the set of previously selected points. """ mask = self.get_support(indices=True, ordered=True) - return self.haussdorf_at_select_[mask] + return self.hausdorff_at_select_[mask] def _init_greedy_search(self, X, y, n_to_select): """ @@ -233,8 +233,8 @@ def _init_greedy_search(self, X, y, n_to_select): raise ValueError("Invalid value of the initialize parameter") self.selected_idx_[0] = initialize - self.haussdorf_ = np.full(X.shape[self._axis], np.inf) - self.haussdorf_at_select_ = np.full(X.shape[self._axis], np.inf) + self.hausdorff_ = np.full(X.shape[self._axis], np.inf) + self.hausdorff_at_select_ = np.full(X.shape[self._axis], np.inf) self._update_post_selection(X, y, self.selected_idx_[0]) def _continue_greedy_search(self, X, y, n_to_select): @@ -264,7 +264,7 @@ def _get_active(self, X, last_selected): S are the selected points from before this iteration; X are the candidates; The logic here is that we want to check if d(XL) can be smaller than - min(d(X,S)) (which is stored in self.haussdorf_) + min(d(X,S)) (which is stored in self.hausdorff_) now, if a point belongs to the Voronoi cell of S then min(d(X,S_i))=d(X,S). Triangle inequality implies that d(S,L) < |d(X,S) + d(L,X)| so we just need to check if @@ -284,7 +284,7 @@ def _get_active(self, X, last_selected): # calculation in a single block active_points = np.where( - self.dSL_[self.vlocation_of_idx] < self.haussdorf_ + self.dSL_[self.vlocation_of_idx] < self.hausdorff_ )[0] return active_points @@ -292,7 +292,7 @@ def _get_active(self, X, last_selected): def _update_post_selection(self, X, y, last_selected): """ Saves the most recently selected feature, increments the feature counter - and update the haussdorf distances + and update the hausdorff distances Let: L is the last point selected; S are the selected points from before this iteration; @@ -303,7 +303,7 @@ def _update_post_selection(self, X, y, last_selected): the distances between L and all the points in the dataset. """ - self.haussdorf_at_select_[last_selected] = self.haussdorf_[last_selected] + self.hausdorff_at_select_[last_selected] = self.hausdorff_[last_selected] active_points = self._get_active(X, last_selected) if len(active_points) > 0: @@ -314,7 +314,7 @@ def _update_post_selection(self, X, y, last_selected): - 2 * X[last_selected] @ X.T ) else: - self.new_dist_ = self.haussdorf_.copy() + self.new_dist_ = self.hausdorff_.copy() self.new_dist_[active_points] = ( self.norms_[active_points] @@ -323,9 +323,9 @@ def _update_post_selection(self, X, y, last_selected): ) self.new_dist_[last_selected] = 0 - updated_points = np.where(self.new_dist_ < self.haussdorf_)[0] + updated_points = np.where(self.new_dist_ < self.hausdorff_)[0] np.minimum( - self.haussdorf_, self.new_dist_, self.haussdorf_, casting="unsafe" + self.hausdorff_, self.new_dist_, self.hausdorff_, casting="unsafe" ) else: updated_points = np.array([]) diff --git a/tests/test_feature_simple_fps.py b/tests/test_feature_simple_fps.py index 50408e676..b29a2bc7b 100644 --- a/tests/test_feature_simple_fps.py +++ b/tests/test_feature_simple_fps.py @@ -49,7 +49,7 @@ def test_initialize(self): def test_get_distances(self): """ - This test checks that the haussdorf distances are returnable after fitting + This test checks that the hausdorff distances are returnable after fitting """ selector = FPS(n_to_select=7) selector.fit(self.X) diff --git a/tests/test_sample_simple_fps.py b/tests/test_sample_simple_fps.py index 7d192159c..ca7ee4bee 100644 --- a/tests/test_sample_simple_fps.py +++ b/tests/test_sample_simple_fps.py @@ -52,7 +52,7 @@ def test_initialize(self): def test_get_distances(self): """ - This test checks that the haussdorf distances are returnable after fitting + This test checks that the hausdorff distances are returnable after fitting """ selector = FPS(n_to_select=1) selector.fit(self.X) diff --git a/tests/test_voronoi_fps.py b/tests/test_voronoi_fps.py index 41fb63ddc..1a2c6b314 100644 --- a/tests/test_voronoi_fps.py +++ b/tests/test_voronoi_fps.py @@ -95,7 +95,7 @@ def test_switching_point(self): def test_get_distances(self): """ - This test checks that the haussdorf distances are returnable after fitting + This test checks that the hausdorff distances are returnable after fitting """ selector = VoronoiFPS(n_to_select=1) selector.fit(self.X) @@ -141,7 +141,7 @@ def test_calculate_dSL(self): selector.fit(self.X) active_points = np.where( - selector.dSL_[selector.vlocation_of_idx] < selector.haussdorf_ + selector.dSL_[selector.vlocation_of_idx] < selector.hausdorff_ )[0] ap = selector._get_active(self.X, selector.selected_idx_[-1]) @@ -165,13 +165,13 @@ def test_calculate_dSL(self): ) def test_score(self): - """This test check that function score return haussdorf distance""" + """This test check that function score return hausdorff distance""" selector = VoronoiFPS(n_to_select=3, initialize=0) selector.fit(self.X) self.assertTrue( np.allclose( - selector.haussdorf_, + selector.hausdorff_, selector.score(self.X, selector.selected_idx_[-1]), ) )