diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 20a81fd..0000000 --- a/.travis.yml +++ /dev/null @@ -1,30 +0,0 @@ -dist: xenial -language: python - -# some configuration for llvm (numba) -env: - global: - PIP_PREFER_BINARY=true # https://github.com/numba/llvmlite/issues/471 - -# Existing Python versions -python: - - "3.5" - - "3.6" - - "3.7" - - "3.8" - -# command to install dependencies -install: - - pip install -r requirements.txt - - python setup.py -q install - - pip install -r requirements_ci.txt - -# run tests -script: - # first disable Numba so the Python coverage reporting is correct - - export NUMBA_DISABLE_JIT=1 - - python -m pytest --cov=PyNomaly - -# report results -after_success: - - coveralls \ No newline at end of file diff --git a/MANIFEST b/MANIFEST deleted file mode 100644 index 03fc5a3..0000000 --- a/MANIFEST +++ /dev/null @@ -1,5 +0,0 @@ -# file GENERATED by distutils, do NOT edit -setup.cfg -setup.py -PyNomaly/__init__.py -PyNomaly/loop.py diff --git a/PyNomaly/loop.py b/PyNomaly/loop.py index 8746dfb..95d0ca7 100644 --- a/PyNomaly/loop.py +++ b/PyNomaly/loop.py @@ -10,13 +10,12 @@ except ImportError: pass -__author__ = 'Valentino Constantinou' -__version__ = '0.3.3' -__license__ = 'Apache License, Version 2.0' +__author__ = "Valentino Constantinou" +__version__ = "0.3.4" +__license__ = "Apache License, Version 2.0" class Utils: - @staticmethod def emit_progress_bar(progress: str, index: int, total: int) -> str: """ @@ -55,7 +54,7 @@ class LocalOutlierProbability(object): :param cluster_labels: a numpy array of cluster assignments w.r.t. each sample (optional, default None) :return: - """""" + """ """ Based on the work of Kriegel, Kröger, Schubert, and Zimek (2009) in LoOP: Local Outlier Probabilities. @@ -93,7 +92,7 @@ class Validate: """ @staticmethod - def _data(obj: Union['pd.DataFrame', np.ndarray]) -> np.ndarray: + def _data(obj: Union["pd.DataFrame", np.ndarray]) -> np.ndarray: """ Validates the input data to ensure it is either a Pandas DataFrame or Numpy array. @@ -101,24 +100,25 @@ def _data(obj: Union['pd.DataFrame', np.ndarray]) -> np.ndarray: :return: a vector of values to be used in calculating the local outlier probability. """ - if obj.__class__.__name__ == 'DataFrame': + if obj.__class__.__name__ == "DataFrame": points_vector = obj.values return points_vector - elif obj.__class__.__name__ == 'ndarray': + elif obj.__class__.__name__ == "ndarray": points_vector = obj return points_vector else: warnings.warn( "Provided data or distance matrix must be in ndarray " "or DataFrame.", - UserWarning) + UserWarning, + ) if isinstance(obj, list): points_vector = np.array(obj) return points_vector points_vector = np.array([obj]) return points_vector - def _inputs(self, obj: 'LocalOutlierProbability'): + def _inputs(self, obj: "LocalOutlierProbability"): """ Validates the inputs provided during initialization to ensure that the needed objects are provided. @@ -134,35 +134,43 @@ def _inputs(self, obj: 'LocalOutlierProbability'): elif all(v is not None for v in [obj.data, obj.distance_matrix]): warnings.warn( "Only one of the following may be provided: data or a " - "distance matrix (not both).", UserWarning + "distance matrix (not both).", + UserWarning, ) return False if obj.data is not None: points_vector = self._data(obj.data) return points_vector, obj.distance_matrix, obj.neighbor_matrix - if all(matrix is not None for matrix in [obj.neighbor_matrix, - obj.distance_matrix]): + if all( + matrix is not None + for matrix in [obj.neighbor_matrix, obj.distance_matrix] + ): dist_vector = self._data(obj.distance_matrix) neigh_vector = self._data(obj.neighbor_matrix) else: warnings.warn( "A neighbor index matrix and distance matrix must both be " - "provided when not using raw input data.", UserWarning + "provided when not using raw input data.", + UserWarning, ) return False if obj.distance_matrix.shape != obj.neighbor_matrix.shape: warnings.warn( "The shape of the distance and neighbor " - "index matrices must match.", UserWarning + "index matrices must match.", + UserWarning, ) return False - elif (obj.distance_matrix.shape[1] != obj.n_neighbors) \ - or (obj.neighbor_matrix.shape[1] != - obj.n_neighbors): - warnings.warn("The shape of the distance or " - "neighbor index matrix does not " - "match the number of neighbors " - "specified.", UserWarning) + elif (obj.distance_matrix.shape[1] != obj.n_neighbors) or ( + obj.neighbor_matrix.shape[1] != obj.n_neighbors + ): + warnings.warn( + "The shape of the distance or " + "neighbor index matrix does not " + "match the number of neighbors " + "specified.", + UserWarning, + ) return False return obj.data, dist_vector, neigh_vector @@ -184,7 +192,8 @@ def _cluster_size(obj) -> bool: "cluster. Specify a number of neighbors smaller than " "the smallest cluster size (observations in smallest " "cluster minus one).", - UserWarning) + UserWarning, + ) return False return True @@ -199,17 +208,19 @@ def _n_neighbors(obj) -> bool: """ if not obj.n_neighbors > 0: obj.n_neighbors = 10 - warnings.warn("n_neighbors must be greater than 0." - " Fit with " + str(obj.n_neighbors) + - " instead.", - UserWarning) + warnings.warn( + "n_neighbors must be greater than 0." + " Fit with " + str(obj.n_neighbors) + " instead.", + UserWarning, + ) return False elif obj.n_neighbors >= obj._n_observations(): obj.n_neighbors = obj._n_observations() - 1 warnings.warn( "n_neighbors must be less than the number of observations." " Fit with " + str(obj.n_neighbors) + " instead.", - UserWarning) + UserWarning, + ) return True @staticmethod @@ -222,8 +233,8 @@ def _extent(obj) -> bool: """ if obj.extent not in [1, 2, 3]: warnings.warn( - "extent parameter (lambda) must be 1, 2, or 3.", - UserWarning) + "extent parameter (lambda) must be 1, 2, or 3.", UserWarning + ) return False return True @@ -237,8 +248,8 @@ def _missing_values(obj) -> bool: """ if np.any(np.isnan(obj.data)): warnings.warn( - "Method does not support missing values in input data.", - UserWarning) + "Method does not support missing values in input data.", UserWarning + ) return False return True @@ -254,7 +265,8 @@ def _fit(obj) -> bool: warnings.warn( "Must fit on historical data by calling fit() prior to " "calling stream(x).", - UserWarning) + UserWarning, + ) return False return True @@ -272,7 +284,8 @@ def _no_cluster_labels(obj) -> bool: warnings.warn( "Stream approach does not support clustered data. " "Automatically refit using single cluster of points.", - UserWarning) + UserWarning, + ) return False return True @@ -294,43 +307,35 @@ def decorator(f): assert len(types) == f.__code__.co_argcount def new_f(*args, **kwds): - for (a, t) in zip(args, types): - if type(a).__name__ == 'DataFrame': + for a, t in zip(args, types): + if type(a).__name__ == "DataFrame": a = np.array(a) if isinstance(a, t) is False: - warnings.warn("Argument %r is not of type %s" % (a, t), - UserWarning) + warnings.warn( + "Argument %r is not of type %s" % (a, t), UserWarning + ) opt_types = { - 'distance_matrix': { - 'type': types[2] - }, - 'neighbor_matrix': { - 'type': types[3] - }, - 'extent': { - 'type': types[4] - }, - 'n_neighbors': { - 'type': types[5] - }, - 'cluster_labels': { - 'type': types[6] - }, - 'use_numba': { - 'type': types[7] - }, - 'progress_bar': { - 'type': types[8] - } + "distance_matrix": {"type": types[2]}, + "neighbor_matrix": {"type": types[3]}, + "extent": {"type": types[4]}, + "n_neighbors": {"type": types[5]}, + "cluster_labels": {"type": types[6]}, + "use_numba": {"type": types[7]}, + "progress_bar": {"type": types[8]}, } for x in kwds: - opt_types[x]['value'] = kwds[x] + opt_types[x]["value"] = kwds[x] for k in opt_types: try: - if isinstance(opt_types[k]['value'], - opt_types[k]['type']) is False: - warnings.warn("Argument %r is not of type %s." % ( - k, opt_types[k]['type']), UserWarning) + if ( + isinstance(opt_types[k]["value"], opt_types[k]["type"]) + is False + ): + warnings.warn( + "Argument %r is not of type %s." + % (k, opt_types[k]["type"]), + UserWarning, + ) except KeyError: pass return f(*args, **kwds) @@ -340,11 +345,28 @@ def new_f(*args, **kwds): return decorator - @accepts(object, np.ndarray, np.ndarray, np.ndarray, (int, np.integer), - (int, np.integer), list, bool, bool) - def __init__(self, data=None, distance_matrix=None, neighbor_matrix=None, - extent=3, n_neighbors=10, cluster_labels=None, - use_numba=False, progress_bar=False) -> None: + @accepts( + object, + np.ndarray, + np.ndarray, + np.ndarray, + (int, np.integer), + (int, np.integer), + list, + bool, + bool, + ) + def __init__( + self, + data=None, + distance_matrix=None, + neighbor_matrix=None, + extent=3, + n_neighbors=10, + cluster_labels=None, + use_numba=False, + progress_bar=False, + ) -> None: self.data = data self.distance_matrix = distance_matrix self.neighbor_matrix = neighbor_matrix @@ -361,11 +383,11 @@ def __init__(self, data=None, distance_matrix=None, neighbor_matrix=None, self.progress_bar = progress_bar self.is_fit = False - if self.use_numba is True and 'numba' not in sys.modules: + if self.use_numba is True and "numba" not in sys.modules: self.use_numba = False warnings.warn( - "Numba is not available, falling back to pure python mode.", - UserWarning) + "Numba is not available, falling back to pure python mode.", UserWarning + ) self.Validate()._inputs(self) self.Validate._extent(self) @@ -375,15 +397,14 @@ def __init__(self, data=None, distance_matrix=None, neighbor_matrix=None, """ @staticmethod - def _standard_distance(cardinality: float, sum_squared_distance: float) \ - -> float: + def _standard_distance(cardinality: float, sum_squared_distance: float) -> float: """ Calculates the standard distance of an observation. :param cardinality: the cardinality of the input observation. :param sum_squared_distance: the sum squared distance between all neighbors of the input observation. :return: the standard distance. - # """ + #""" division_result = sum_squared_distance / cardinality st_dist = sqrt(division_result) return st_dist @@ -400,8 +421,9 @@ def _prob_distance(extent: int, standard_distance: float) -> float: return extent * standard_distance @staticmethod - def _prob_outlier_factor(probabilistic_distance: np.ndarray, ev_prob_dist: - np.ndarray) -> np.ndarray: + def _prob_outlier_factor( + probabilistic_distance: np.ndarray, ev_prob_dist: np.ndarray + ) -> np.ndarray: """ Calculates the probabilistic outlier factor of an observation. :param probabilistic_distance: the probabilistic distance of the @@ -412,14 +434,14 @@ def _prob_outlier_factor(probabilistic_distance: np.ndarray, ev_prob_dist: if np.all(probabilistic_distance == ev_prob_dist): return np.zeros(probabilistic_distance.shape) else: - ev_prob_dist[ev_prob_dist == 0.] = 1.e-8 - result = np.divide(probabilistic_distance, ev_prob_dist) - 1. + ev_prob_dist[ev_prob_dist == 0.0] = 1.0e-8 + result = np.divide(probabilistic_distance, ev_prob_dist) - 1.0 return result @staticmethod - def _norm_prob_outlier_factor(extent: float, - ev_probabilistic_outlier_factor: list) \ - -> list: + def _norm_prob_outlier_factor( + extent: float, ev_probabilistic_outlier_factor: list + ) -> list: """ Calculates the normalized probabilistic outlier factor of an observation. @@ -434,8 +456,9 @@ def _norm_prob_outlier_factor(extent: float, return npofs @staticmethod - def _local_outlier_probability(plof_val: np.ndarray, nplof_val: np.ndarray) \ - -> np.ndarray: + def _local_outlier_probability( + plof_val: np.ndarray, nplof_val: np.ndarray + ) -> np.ndarray: """ Calculates the local outlier probability of an observation. :param plof_val: the probabilistic outlier factor of the input @@ -448,7 +471,7 @@ def _local_outlier_probability(plof_val: np.ndarray, nplof_val: np.ndarray) \ if np.all(plof_val == nplof_val): return np.zeros(plof_val.shape) else: - return np.maximum(0, erf_vec(plof_val / (nplof_val * np.sqrt(2.)))) + return np.maximum(0, erf_vec(plof_val / (nplof_val * np.sqrt(2.0)))) def _n_observations(self) -> int: """ @@ -502,8 +525,9 @@ def _assign_distances(self, data_store: np.ndarray) -> np.ndarray: :return: the updated storage matrix that collects information on each observation. """ - for vec, cluster_id in zip(range(self.distance_matrix.shape[0]), - self._cluster_labels()): + for vec, cluster_id in zip( + range(self.distance_matrix.shape[0]), self._cluster_labels() + ): data_store[vec][0] = cluster_id data_store[vec][1] = self.distance_matrix[vec] data_store[vec][2] = self.neighbor_matrix[vec] @@ -511,10 +535,10 @@ def _assign_distances(self, data_store: np.ndarray) -> np.ndarray: @staticmethod def _compute_distance_and_neighbor_matrix( - clust_points_vector: np.ndarray, - indices: np.ndarray, - distances: np.ndarray, - indexes: np.ndarray + clust_points_vector: np.ndarray, + indices: np.ndarray, + distances: np.ndarray, + indexes: np.ndarray, ) -> Tuple[np.ndarray, np.ndarray, int]: """ This helper method provides the heavy lifting for the _distances @@ -522,27 +546,27 @@ def _compute_distance_and_neighbor_matrix( written so that it can make full use of Numba's jit capabilities if desired. """ - for i in range(clust_points_vector.shape[0]): for j in range(i + 1, clust_points_vector.shape[0]): - p = ((i,), (j,)) + # Global index of the points + global_i = indices[0][i] + global_j = indices[0][j] - diff = clust_points_vector[p[0]] - clust_points_vector[p[1]] + # Compute Euclidean distance + diff = clust_points_vector[i] - clust_points_vector[j] d = np.dot(diff, diff) ** 0.5 - idx = indices[0][p[0]] - idx_max = distances[idx].argmax() + # Update distance and neighbor index for global_i + idx_max = distances[global_i].argmax() + if d < distances[global_i][idx_max]: + distances[global_i][idx_max] = d + indexes[global_i][idx_max] = global_j - if d < distances[idx][idx_max]: - distances[idx][idx_max] = d - indexes[idx][idx_max] = p[1][0] - - idx = indices[0][p[1]] - idx_max = distances[idx].argmax() - - if d < distances[idx][idx_max]: - distances[idx][idx_max] = d - indexes[idx][idx_max] = p[0][0] + # Update distance and neighbor index for global_j + idx_max = distances[global_j].argmax() + if d < distances[global_j][idx_max]: + distances[global_j][idx_max] = d + indexes[global_j][idx_max] = global_i yield distances, indexes, i @@ -555,20 +579,21 @@ def _distances(self, progress_bar: bool = False) -> None: :return: the updated storage matrix that collects information on each observation. """ - distances = np.full([self._n_observations(), self.n_neighbors], 9e10, - dtype=float) - indexes = np.full([self._n_observations(), self.n_neighbors], 9e10, - dtype=float) + distances = np.full( + [self._n_observations(), self.n_neighbors], 9e10, dtype=float + ) + indexes = np.full([self._n_observations(), self.n_neighbors], 9e10, dtype=float) self.points_vector = self.Validate._data(self.data) - compute = numba.jit(self._compute_distance_and_neighbor_matrix, - cache=True) if self.use_numba else \ - self._compute_distance_and_neighbor_matrix + compute = ( + numba.jit(self._compute_distance_and_neighbor_matrix, cache=True) + if self.use_numba + else self._compute_distance_and_neighbor_matrix + ) progress = "=" for cluster_id in set(self._cluster_labels()): indices = np.where(self._cluster_labels() == cluster_id) clust_points_vector = np.array( - self.points_vector.take(indices, axis=0)[0], - dtype=np.float64 + self.points_vector.take(indices, axis=0)[0], dtype=np.float64 ) # a generator that yields an updated distance matrix on each loop for c in compute(clust_points_vector, indices, distances, indexes): @@ -576,7 +601,8 @@ def _distances(self, progress_bar: bool = False) -> None: # update the progress bar if progress_bar is True: progress = Utils.emit_progress_bar( - progress, i+1, clust_points_vector.shape[0]) + progress, i + 1, clust_points_vector.shape[0] + ) self.distance_matrix = distances self.neighbor_matrix = indexes @@ -630,11 +656,10 @@ def _prob_distances(self, data_store: np.ndarray) -> np.ndarray: """ prob_distances = [] for i in range(data_store[:, 4].shape[0]): - prob_distances.append( - self._prob_distance(self.extent, data_store[:, 4][i])) + prob_distances.append(self._prob_distance(self.extent, data_store[:, 4][i])) return np.hstack((data_store, np.array([prob_distances]).T)) - def _prob_distances_ev(self, data_store: np.ndarray) -> np.ndarray: + def _prob_distances_ev(self, data_store) -> np.ndarray: """ Calculates the expected value of the probabilistic distance for each observation in the input data with respect to the cluster the @@ -648,19 +673,20 @@ def _prob_distances_ev(self, data_store: np.ndarray) -> np.ndarray: for cluster_id in self.cluster_labels_u: indices = np.where(data_store[:, 0] == cluster_id)[0] for index in indices: - nbrhood = data_store[index][2].astype(int) - nbrhood_prob_distances = np.take(data_store[:, 5], - nbrhood).astype(float) + # Global neighbor indices for the current point + nbrhood = data_store[index][2].astype(int) # Ensure global indices + nbrhood_prob_distances = np.take(data_store[:, 5], nbrhood).astype( + float + ) nbrhood_prob_distances_nonan = nbrhood_prob_distances[ - np.logical_not(np.isnan(nbrhood_prob_distances))] - prob_set_distance_ev[index] = \ - nbrhood_prob_distances_nonan.mean() + np.logical_not(np.isnan(nbrhood_prob_distances)) + ] + prob_set_distance_ev[index] = nbrhood_prob_distances_nonan.mean() + self.prob_distances_ev = prob_set_distance_ev - data_store = np.hstack((data_store, prob_set_distance_ev)) - return data_store + return np.hstack((data_store, prob_set_distance_ev)) - def _prob_local_outlier_factors(self, - data_store: np.ndarray) -> np.ndarray: + def _prob_local_outlier_factors(self, data_store: np.ndarray) -> np.ndarray: """ Calculates the probabilistic local outlier factor for each observation in the input data. @@ -670,13 +696,22 @@ def _prob_local_outlier_factors(self, each observation. """ return np.hstack( - (data_store, - np.array([np.apply_along_axis(self._prob_outlier_factor, 0, - data_store[:, 5], - data_store[:, 6])]).T)) + ( + data_store, + np.array( + [ + np.apply_along_axis( + self._prob_outlier_factor, + 0, + data_store[:, 5], + data_store[:, 6], + ) + ] + ).T, + ) + ) - def _prob_local_outlier_factors_ev(self, - data_store: np.ndarray) -> np.ndarray: + def _prob_local_outlier_factors_ev(self, data_store: np.ndarray) -> np.ndarray: """ Calculates the expected value of the probabilistic local outlier factor for each observation in the input data with respect to the cluster the @@ -689,21 +724,31 @@ def _prob_local_outlier_factors_ev(self, prob_local_outlier_factor_ev_dict = {} for cluster_id in self.cluster_labels_u: indices = np.where(data_store[:, 0] == cluster_id) - prob_local_outlier_factors = np.take(data_store[:, 7], - indices).astype(float) - prob_local_outlier_factors_nonan = prob_local_outlier_factors[ - np.logical_not(np.isnan(prob_local_outlier_factors))] - prob_local_outlier_factor_ev_dict[cluster_id] = ( - np.power(prob_local_outlier_factors_nonan, 2).sum() / - float(prob_local_outlier_factors_nonan.size) + prob_local_outlier_factors = np.take(data_store[:, 7], indices).astype( + float ) + prob_local_outlier_factors_nonan = prob_local_outlier_factors[ + np.logical_not(np.isnan(prob_local_outlier_factors)) + ] + prob_local_outlier_factor_ev_dict[cluster_id] = np.power( + prob_local_outlier_factors_nonan, 2 + ).sum() / float(prob_local_outlier_factors_nonan.size) data_store = np.hstack( - (data_store, np.array([[prob_local_outlier_factor_ev_dict[x] for x - in data_store[:, 0].tolist()]]).T)) + ( + data_store, + np.array( + [ + [ + prob_local_outlier_factor_ev_dict[x] + for x in data_store[:, 0].tolist() + ] + ] + ).T, + ) + ) return data_store - def _norm_prob_local_outlier_factors(self, data_store: np.ndarray) \ - -> np.ndarray: + def _norm_prob_local_outlier_factors(self, data_store: np.ndarray) -> np.ndarray: """ Calculates the normalized probabilistic local outlier factor for each observation in the input data. @@ -712,11 +757,20 @@ def _norm_prob_local_outlier_factors(self, data_store: np.ndarray) \ :return: the updated storage matrix that collects information on each observation. """ - return np.hstack((data_store, np.array([self._norm_prob_outlier_factor( - self.extent, data_store[:, 8].tolist())]).T)) + return np.hstack( + ( + data_store, + np.array( + [ + self._norm_prob_outlier_factor( + self.extent, data_store[:, 8].tolist() + ) + ] + ).T, + ) + ) - def _local_outlier_probabilities(self, - data_store: np.ndarray) -> np.ndarray: + def _local_outlier_probabilities(self, data_store: np.ndarray) -> np.ndarray: """ Calculates the local outlier probability for each observation in the input data. @@ -726,17 +780,26 @@ def _local_outlier_probabilities(self, each observation. """ return np.hstack( - (data_store, - np.array([np.apply_along_axis(self._local_outlier_probability, 0, - data_store[:, 7], - data_store[:, 9])]).T)) + ( + data_store, + np.array( + [ + np.apply_along_axis( + self._local_outlier_probability, + 0, + data_store[:, 7], + data_store[:, 9], + ) + ] + ).T, + ) + ) """ Public methods """ - def fit(self) -> 'LocalOutlierProbability': - + def fit(self) -> "LocalOutlierProbability": """ Calculates the local outlier probability for each observation in the input data according to the input parameters extent, n_neighbors, and @@ -748,8 +811,7 @@ def fit(self) -> 'LocalOutlierProbability': self.Validate._n_neighbors(self) if self.Validate._cluster_size(self) is False: sys.exit() - if self.data is not None and self.Validate._missing_values( - self) is False: + if self.data is not None and self.Validate._missing_values(self) is False: sys.exit() store = self._store() @@ -773,7 +835,6 @@ def fit(self) -> 'LocalOutlierProbability': return self def stream(self, x: np.ndarray) -> np.ndarray: - """ Calculates the local outlier probability for an individual sample according to the input parameters extent, n_neighbors, and @@ -812,12 +873,12 @@ def stream(self, x: np.ndarray) -> np.ndarray: ssd = np.power(distances, 2).sum() std_dist = np.sqrt(np.divide(ssd, self.n_neighbors)) prob_dist = self._prob_distance(self.extent, std_dist) - plof = self._prob_outlier_factor(np.array(prob_dist), - np.array( - self.prob_distances_ev.mean()) - ) + plof = self._prob_outlier_factor( + np.array(prob_dist), np.array(self.prob_distances_ev.mean()) + ) loop = self._local_outlier_probability( - plof, self.norm_prob_local_outlier_factor) + plof, self.norm_prob_local_outlier_factor + ) if orig_cluster_labels is not None: self.cluster_labels = orig_cluster_labels diff --git a/readme.md b/readme.md index d595187..0abfa6a 100644 --- a/readme.md +++ b/readme.md @@ -2,11 +2,13 @@ PyNomaly is a Python 3 implementation of LoOP (Local Outlier Probabilities). LoOP is a local density based outlier detection method by Kriegel, Kröger, Schubert, and Zimek which provides outlier -scores in the range of [0,1] that are directly interpretable as the probability of a sample being an outlier. +scores in the range of [0,1] that are directly interpretable as the probability of a sample being an outlier. + +PyNomaly is a core library of [deepchecks](https://github.com/deepchecks/deepchecks) and [pysad](https://github.com/selimfirat/pysad). [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -[![PyPi](https://img.shields.io/badge/pypi-0.3.3-blue.svg)](https://pypi.python.org/pypi/PyNomaly/0.3.3) -![](https://img.shields.io/pypi/dm/PyNomaly.svg?logoColor=blue) +[![PyPi](https://img.shields.io/badge/pypi-0.3.4-blue.svg)](https://pypi.python.org/pypi/PyNomaly/0.3.4) +[![Downloads](https://img.shields.io/pypi/dm/PyNomaly.svg?logoColor=blue)](https://pypistats.org/packages/pynomaly) ![Tests](https://github.com/vc1492a/PyNomaly/actions/workflows/tests.yml/badge.svg) [![Coverage Status](https://coveralls.io/repos/github/vc1492a/PyNomaly/badge.svg?branch=main)](https://coveralls.io/github/vc1492a/PyNomaly?branch=main) [![JOSS](http://joss.theoj.org/papers/f4d2cfe680768526da7c1f6a2c103266/status.svg)](http://joss.theoj.org/papers/f4d2cfe680768526da7c1f6a2c103266) @@ -29,16 +31,6 @@ The authors' 2009 paper detailing LoOP's theory, formulation, and application is Ludwig-Maximilians University Munich - Institute for Informatics; [LoOP: Local Outlier Probabilities](http://www.dbs.ifi.lmu.de/Publikationen/Papers/LoOP1649.pdf). -## PyNomaly Seeks Maintainers! :sparkles: - -Love using PyNomaly? Want to develop your open source software (OSS) experience and credentials? - -PyNomaly is looking for maintainers! PyNomaly doesn't need much on a day to day basis, but needs some attention. - -On the flip side, the sky is the limit... Have you seen [Mojo](https://docs.modular.com/mojo/notebooks/Matmul.html) and what it can do with matrix multiplication? Would definitely speed things up. - -Interested? Send an email to [vc1492a@gmail.com](vc1492a@gmail.com). - ## Implementation This Python 3 implementation uses Numpy and the formulas outlined in @@ -46,7 +38,7 @@ This Python 3 implementation uses Numpy and the formulas outlined in to calculate the Local Outlier Probability of each sample. ## Dependencies -- Python 3.5 - 3.8 +- Python 3.6 - 3.12 - numpy >= 1.16.3 - python-utils >= 2.3.0 - (optional) numba >= 0.45.1 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 224a779..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -description-file = README.md \ No newline at end of file diff --git a/setup.py b/setup.py index b66544a..9b9efc6 100644 --- a/setup.py +++ b/setup.py @@ -3,14 +3,14 @@ setup( name='PyNomaly', packages=['PyNomaly'], - version='0.3.3', + version='0.3.4', description='A Python 3 implementation of LoOP: Local Outlier ' 'Probabilities, a local density based outlier detection ' 'method providing an outlier score in the range of [0,1].', author='Valentino Constantinou', author_email='vc@valentino.io', url='https://github.com/vc1492a/PyNomaly', - download_url='https://github.com/vc1492a/PyNomaly/archive/0.3.3.tar.gz', + download_url='https://github.com/vc1492a/PyNomaly/archive/0.3.4.tar.gz', keywords=['outlier', 'anomaly', 'detection', 'machine', 'learning', 'probability'], classifiers=[], diff --git a/tests/test_loop.py b/tests/test_loop.py index bb214da..ba453e9 100644 --- a/tests/test_loop.py +++ b/tests/test_loop.py @@ -21,13 +21,16 @@ NUMBA = False if NUMBA is False: - logging.info("Numba is disabled. Coverage statistics are reflective of " - "testing native Python code. Consider also testing with numba" - " enabled.") + logging.info( + "Numba is disabled. Coverage statistics are reflective of " + "testing native Python code. Consider also testing with numba" + " enabled." + ) else: logging.warning( "Numba is enabled. Coverage statistics will be impacted (reduced) to" - " due the just-in-time compilation of native Python code.") + " due the just-in-time compilation of native Python code." + ) # load the iris dataset # and randomly permute it @@ -47,8 +50,9 @@ def X_n8() -> np.ndarray: :return: a Numpy array. """ # Toy sample (the last two samples are outliers): - X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 2], [1, 2], [2, 1], [5, 3], - [-4, 2]]) + X = np.array( + [[-2, -1], [-1, -1], [-1, -2], [1, 2], [1, 2], [2, 1], [5, 3], [-4, 2]] + ) return X @@ -59,18 +63,55 @@ def X_n20_scores() -> Tuple[np.ndarray, np.ndarray]: and the precalculated loOP scores based on that array. :return: tuple(input_data,exptected_scores) """ - input_data = np.array([0.02059752, 0.32629926, 0.63036653, 0.94409321, - 0.63251097, 0.47598494, 0.80204026, 0.34845067, - 0.81556468, 0.89183, 0.25210317, 0.11460502, - 0.19953434, 0.36955067, 0.06038041, 0.34527368, - 0.56621582, 0.90533649, 0.33773613, 0.71573306]) - - expected_scores = np.array([0.6356276742921594, 0.0, 0.0, - 0.48490790006974044, 0.0, 0.0, 0.0, 0.0, - 0.021728288376168012, 0.28285086151683225, - 0.0, 0.18881886507113213, 0.0, 0.0, - 0.45350246469681843, 0.0, 0.07886635748113013, - 0.3349068501560546, 0.0, 0.0]) + input_data = np.array( + [ + 0.02059752, + 0.32629926, + 0.63036653, + 0.94409321, + 0.63251097, + 0.47598494, + 0.80204026, + 0.34845067, + 0.81556468, + 0.89183, + 0.25210317, + 0.11460502, + 0.19953434, + 0.36955067, + 0.06038041, + 0.34527368, + 0.56621582, + 0.90533649, + 0.33773613, + 0.71573306, + ] + ) + + expected_scores = np.array( + [ + 0.6356276742921594, + 0.0, + 0.0, + 0.48490790006974044, + 0.0, + 0.0, + 0.0, + 0.0, + 0.021728288376168012, + 0.28285086151683225, + 0.0, + 0.18881886507113213, + 0.0, + 0.0, + 0.45350246469681843, + 0.0, + 0.07886635748113013, + 0.3349068501560546, + 0.0, + 0.0, + ] + ) return (input_data, expected_scores) @@ -124,7 +165,7 @@ def test_loop(X_n8) -> None: # Test LocalOutlierProbability: clf = loop.LocalOutlierProbability(X_n8, n_neighbors=5, use_numba=NUMBA) score = clf.fit().local_outlier_probabilities - share_outlier = 2. / 8. + share_outlier = 2.0 / 8.0 predictions = [-1 if s > share_outlier else 1 for s in score] assert_array_equal(predictions, 6 * [1] + 2 * [-1]) @@ -137,7 +178,7 @@ def test_loop(X_n8) -> None: # Test LocalOutlierProbability: clf = loop.LocalOutlierProbability(X_df, n_neighbors=5, use_numba=NUMBA) score = clf.fit().local_outlier_probabilities - share_outlier = 2. / 8. + share_outlier = 2.0 / 8.0 predictions = [-1 if s > share_outlier else 1 for s in score] assert_array_equal(predictions, 6 * [1] + 2 * [-1]) @@ -168,8 +209,7 @@ def test_loop_performance(X_n120) -> None: # Generate some abnormal novel observations X_outliers = rng.uniform(low=-4, high=4, size=(20, 2)) X_test = np.r_[X_n120, X_outliers] - X_labels = np.r_[ - np.repeat(1, X_n120.shape[0]), np.repeat(-1, X_outliers.shape[0])] + X_labels = np.r_[np.repeat(1, X_n120.shape[0]), np.repeat(-1, X_outliers.shape[0])] # fit the model clf = loop.LocalOutlierProbability( @@ -177,7 +217,7 @@ def test_loop_performance(X_n120) -> None: n_neighbors=X_test.shape[0] - 1, # test the progress bar progress_bar=True, - use_numba=NUMBA + use_numba=NUMBA, ) # predict scores (the lower, the more normal) @@ -186,7 +226,7 @@ def test_loop_performance(X_n120) -> None: X_pred = [-1 if s > share_outlier else 1 for s in score] # check that roc_auc is good - assert roc_auc_score(X_pred, X_labels) >= .98 + assert roc_auc_score(X_pred, X_labels) >= 0.98 def test_input_nodata(X_n140_outliers) -> None: @@ -198,14 +238,14 @@ def test_input_nodata(X_n140_outliers) -> None: """ with pytest.warns(UserWarning) as record: # attempt to fit loop without data or a distance matrix - loop.LocalOutlierProbability(n_neighbors=X_n140_outliers.shape[0] - 1, - use_numba=NUMBA) + loop.LocalOutlierProbability( + n_neighbors=X_n140_outliers.shape[0] - 1, use_numba=NUMBA + ) # check that only one warning was raised assert len(record) == 1 # check that the message matches - assert record[0].message.args[ - 0] == "Data or a distance matrix must be provided." + assert record[0].message.args[0] == "Data or a distance matrix must be provided." def test_input_incorrect_type(X_n140_outliers) -> None: @@ -217,18 +257,20 @@ def test_input_incorrect_type(X_n140_outliers) -> None: """ with pytest.warns(UserWarning) as record: # attempt to fit loop with a string input for n_neighbors - loop.LocalOutlierProbability(X_n140_outliers, - n_neighbors=str( - X_n140_outliers.shape[0] - 1), - use_numba=NUMBA - ) + loop.LocalOutlierProbability( + X_n140_outliers, + n_neighbors=str(X_n140_outliers.shape[0] - 1), + use_numba=NUMBA, + ) # check that only one warning was raised assert len(record) == 1 # check that the message matches - assert record[0].message.args[ - 0] == "Argument 'n_neighbors' is not of type (, " \ + assert ( + record[0].message.args[0] + == "Argument 'n_neighbors' is not of type (, " ")." + ) def test_input_neighbor_zero(X_n120) -> None: @@ -247,8 +289,10 @@ def test_input_neighbor_zero(X_n120) -> None: # check that only one warning was raised assert len(record) == 1 # check that the message matches - assert record[0].message.args[ - 0] == "n_neighbors must be greater than 0. Fit with 10 instead." + assert ( + record[0].message.args[0] + == "n_neighbors must be greater than 0. Fit with 10 instead." + ) def test_input_distonly(X_n120) -> None: @@ -259,7 +303,7 @@ def test_input_distonly(X_n120) -> None: :return: None """ # generate distance and neighbor indices - neigh = NearestNeighbors(metric='euclidean') + neigh = NearestNeighbors(metric="euclidean") neigh.fit(X_n120) d, idx = neigh.kneighbors(X_n120, n_neighbors=10, return_distance=True) @@ -270,9 +314,11 @@ def test_input_distonly(X_n120) -> None: # check that only one warning was raised assert len(record) == 1 # check that the message matches - assert record[0].message.args[ - 0] == "A neighbor index matrix and distance matrix must both " \ + assert ( + record[0].message.args[0] + == "A neighbor index matrix and distance matrix must both " "be provided when not using raw input data." + ) def test_input_neighboronly(X_n120) -> None: @@ -283,7 +329,7 @@ def test_input_neighboronly(X_n120) -> None: :return: None """ # generate distance and neighbor indices - neigh = NearestNeighbors(metric='euclidean') + neigh = NearestNeighbors(metric="euclidean") neigh.fit(X_n120) d, idx = neigh.kneighbors(X_n120, n_neighbors=10, return_distance=True) @@ -294,8 +340,7 @@ def test_input_neighboronly(X_n120) -> None: # check that only one warning was raised assert len(record) == 1 # check that the message matches - assert record[0].message.args[ - 0] == "Data or a distance matrix must be provided." + assert record[0].message.args[0] == "Data or a distance matrix must be provided." def test_input_too_many(X_n120) -> None: @@ -306,21 +351,24 @@ def test_input_too_many(X_n120) -> None: :return: None """ # generate distance and neighbor indices - neigh = NearestNeighbors(metric='euclidean') + neigh = NearestNeighbors(metric="euclidean") neigh.fit(X_n120) d, idx = neigh.kneighbors(X_n120, n_neighbors=10, return_distance=True) with pytest.warns(UserWarning) as record: # attempt to fit loop with data and a distance matrix - loop.LocalOutlierProbability(X_n120, distance_matrix=d, - neighbor_matrix=idx, use_numba=NUMBA) + loop.LocalOutlierProbability( + X_n120, distance_matrix=d, neighbor_matrix=idx, use_numba=NUMBA + ) # check that only one warning was raised assert len(record) == 1 # check that the message matches - assert record[0].message.args[ - 0] == "Only one of the following may be provided: data or a " \ + assert ( + record[0].message.args[0] + == "Only one of the following may be provided: data or a " "distance matrix (not both)." + ) def test_distance_neighbor_shape_mismatch(X_n120) -> None: @@ -331,30 +379,28 @@ def test_distance_neighbor_shape_mismatch(X_n120) -> None: :return: None """ # generate distance and neighbor indices - neigh = NearestNeighbors(metric='euclidean') + neigh = NearestNeighbors(metric="euclidean") neigh.fit(X_n120) d, idx = neigh.kneighbors(X_n120, n_neighbors=10, return_distance=True) # generate distance and neighbor indices of a different shape - neigh_2 = NearestNeighbors(metric='euclidean') + neigh_2 = NearestNeighbors(metric="euclidean") neigh_2.fit(X_n120) d_2, idx_2 = neigh.kneighbors(X_n120, n_neighbors=5, return_distance=True) with pytest.warns(UserWarning) as record: # attempt to fit loop with a mismatch in shapes loop.LocalOutlierProbability( - distance_matrix=d, - neighbor_matrix=idx_2, - n_neighbors=5, - use_numba=NUMBA + distance_matrix=d, neighbor_matrix=idx_2, n_neighbors=5, use_numba=NUMBA ) # check that only one warning was raised assert len(record) == 1 # check that the message matches - assert record[0].message.args[ - 0] == "The shape of the distance and neighbor " \ + assert ( + record[0].message.args[0] == "The shape of the distance and neighbor " "index matrices must match." + ) def test_input_neighbor_mismatch(X_n120) -> None: @@ -365,25 +411,25 @@ def test_input_neighbor_mismatch(X_n120) -> None: :return: None """ # generate distance and neighbor indices - neigh = NearestNeighbors(metric='euclidean') + neigh = NearestNeighbors(metric="euclidean") neigh.fit(X_n120) d, idx = neigh.kneighbors(X_n120, n_neighbors=5, return_distance=True) with pytest.warns(UserWarning) as record: # attempt to fit loop with a neighbor size mismatch - loop.LocalOutlierProbability(distance_matrix=d, - neighbor_matrix=idx, - n_neighbors=10, - use_numba=NUMBA) + loop.LocalOutlierProbability( + distance_matrix=d, neighbor_matrix=idx, n_neighbors=10, use_numba=NUMBA + ) # check that only one warning was raised assert len(record) == 1 # check that the message matches - assert record[0].message.args[ - 0] == "The shape of the distance or " \ - "neighbor index matrix does not " \ - "match the number of neighbors " \ + assert ( + record[0].message.args[0] == "The shape of the distance or " + "neighbor index matrix does not " + "match the number of neighbors " "specified." + ) def test_loop_dist_matrix(X_n120) -> None: @@ -394,14 +440,15 @@ def test_loop_dist_matrix(X_n120) -> None: :return: None """ # generate distance and neighbor indices - neigh = NearestNeighbors(metric='euclidean') + neigh = NearestNeighbors(metric="euclidean") neigh.fit(X_n120) d, idx = neigh.kneighbors(X_n120, n_neighbors=10, return_distance=True) # fit loop using data and distance matrix clf1 = loop.LocalOutlierProbability(X_n120, use_numba=NUMBA) - clf2 = loop.LocalOutlierProbability(distance_matrix=d, neighbor_matrix=idx, - use_numba=NUMBA) + clf2 = loop.LocalOutlierProbability( + distance_matrix=d, neighbor_matrix=idx, use_numba=NUMBA + ) scores1 = clf1.fit().local_outlier_probabilities scores2 = clf2.fit().local_outlier_probabilities @@ -418,12 +465,9 @@ def test_lambda_values(X_n140_outliers) -> None: :return: None """ # Fit the model with different extent (lambda) values - clf1 = loop.LocalOutlierProbability(X_n140_outliers, extent=1, - use_numba=NUMBA) - clf2 = loop.LocalOutlierProbability(X_n140_outliers, extent=2, - use_numba=NUMBA) - clf3 = loop.LocalOutlierProbability(X_n140_outliers, extent=3, - use_numba=NUMBA) + clf1 = loop.LocalOutlierProbability(X_n140_outliers, extent=1, use_numba=NUMBA) + clf2 = loop.LocalOutlierProbability(X_n140_outliers, extent=2, use_numba=NUMBA) + clf3 = loop.LocalOutlierProbability(X_n140_outliers, extent=3, use_numba=NUMBA) # predict scores (the lower, the more normal) score1 = clf1.fit().local_outlier_probabilities @@ -452,20 +496,19 @@ def test_parameters(X_n120) -> None: clf = loop.LocalOutlierProbability(X_n120, use_numba=NUMBA).fit() # check that the model has attributes post fit - assert (hasattr(clf, 'n_neighbors') and - clf.n_neighbors is not None) - assert (hasattr(clf, 'extent') and - clf.extent is not None) - assert (hasattr(clf, 'cluster_labels') and - clf._cluster_labels() is not None) - assert (hasattr(clf, 'prob_distances') and - clf.prob_distances is not None) - assert (hasattr(clf, 'prob_distances_ev') and - clf.prob_distances_ev is not None) - assert (hasattr(clf, 'norm_prob_local_outlier_factor') and - clf.norm_prob_local_outlier_factor is not None) - assert (hasattr(clf, 'local_outlier_probabilities') and - clf.local_outlier_probabilities is not None) + assert hasattr(clf, "n_neighbors") and clf.n_neighbors is not None + assert hasattr(clf, "extent") and clf.extent is not None + assert hasattr(clf, "cluster_labels") and clf._cluster_labels() is not None + assert hasattr(clf, "prob_distances") and clf.prob_distances is not None + assert hasattr(clf, "prob_distances_ev") and clf.prob_distances_ev is not None + assert ( + hasattr(clf, "norm_prob_local_outlier_factor") + and clf.norm_prob_local_outlier_factor is not None + ) + assert ( + hasattr(clf, "local_outlier_probabilities") + and clf.local_outlier_probabilities is not None + ) def test_n_neighbors() -> None: @@ -476,8 +519,7 @@ def test_n_neighbors() -> None: :return: None """ X = iris.data - clf = loop.LocalOutlierProbability(X, n_neighbors=500, - use_numba=NUMBA).fit() + clf = loop.LocalOutlierProbability(X, n_neighbors=500, use_numba=NUMBA).fit() assert clf.n_neighbors == X.shape[0] - 1 clf = loop.LocalOutlierProbability(X, n_neighbors=500, use_numba=NUMBA) @@ -498,8 +540,7 @@ def test_extent() -> None: :return: None """ X = np.array([[1, 1], [1, 0]]) - clf = loop.LocalOutlierProbability(X, n_neighbors=2, extent=4, - use_numba=NUMBA) + clf = loop.LocalOutlierProbability(X, n_neighbors=2, extent=4, use_numba=NUMBA) with pytest.warns(UserWarning) as record: clf.fit() @@ -534,8 +575,7 @@ def test_missing_values() -> None: X = np.array([1.3, 1.1, 0.9, 1.4, 1.5, np.nan, 3.2]) clf = loop.LocalOutlierProbability(X, n_neighbors=3, use_numba=NUMBA) - with pytest.raises(SystemExit) as record_a, pytest.warns( - UserWarning) as record_b: + with pytest.raises(SystemExit) as record_a, pytest.warns(UserWarning) as record_b: clf.fit() assert record_a.type == SystemExit @@ -543,8 +583,10 @@ def test_missing_values() -> None: # check that only one warning was raised assert len(record_b) == 1 # check that the message matches - assert record_b[0].message.args[ - 0] == "Method does not support missing values in input data." + assert ( + record_b[0].message.args[0] + == "Method does not support missing values in input data." + ) def test_small_cluster_size(X_n140_outliers) -> None: @@ -560,14 +602,10 @@ def test_small_cluster_size(X_n140_outliers) -> None: cluster_labels = a + b clf = loop.LocalOutlierProbability( - X_n140_outliers, - n_neighbors=50, - cluster_labels=cluster_labels, - use_numba=NUMBA + X_n140_outliers, n_neighbors=50, cluster_labels=cluster_labels, use_numba=NUMBA ) - with pytest.raises(SystemExit) as record_a, pytest.warns( - UserWarning) as record_b: + with pytest.raises(SystemExit) as record_a, pytest.warns(UserWarning) as record_b: clf.fit() assert record_a.type == SystemExit @@ -575,11 +613,13 @@ def test_small_cluster_size(X_n140_outliers) -> None: # check that only one warning was raised assert len(record_b) == 1 # check that the message matches - assert record_b[0].message.args[ - 0] == "Number of neighbors specified larger than smallest " \ - "cluster. Specify a number of neighbors smaller than " \ - "the smallest cluster size (observations in smallest " \ + assert ( + record_b[0].message.args[0] + == "Number of neighbors specified larger than smallest " + "cluster. Specify a number of neighbors smaller than " + "the smallest cluster size (observations in smallest " "cluster minus one)." + ) def test_stream_fit(X_n140_outliers) -> None: @@ -599,8 +639,10 @@ def test_stream_fit(X_n140_outliers) -> None: # check that the message matches messages = [i.message.args[0] for i in record] - assert "Must fit on historical data by calling fit() prior to " \ - "calling stream(x)." in messages + assert ( + "Must fit on historical data by calling fit() prior to " + "calling stream(x)." in messages + ) def test_stream_distance(X_n140_outliers) -> None: @@ -615,15 +657,15 @@ def test_stream_distance(X_n140_outliers) -> None: X_test = X_n140_outliers[100:140] # generate distance and neighbor indices - neigh = NearestNeighbors(metric='euclidean') + neigh = NearestNeighbors(metric="euclidean") neigh.fit(X_train) d, idx = neigh.kneighbors(X_train, n_neighbors=10, return_distance=True) # Fit the models in standard and distance matrix form m = loop.LocalOutlierProbability(X_train, use_numba=NUMBA).fit() - m_dist = loop.LocalOutlierProbability(distance_matrix=d, - neighbor_matrix=idx, - use_numba=NUMBA).fit() + m_dist = loop.LocalOutlierProbability( + distance_matrix=d, neighbor_matrix=idx, use_numba=NUMBA + ).fit() # Collect the scores X_test_scores = [] @@ -658,9 +700,9 @@ def test_stream_cluster(X_n140_outliers) -> None: # Fit the model X_train = X_n140_outliers[0:138] X_test = X_n140_outliers[139] - clf = loop.LocalOutlierProbability(X_train, - cluster_labels=cluster_labels, - use_numba=NUMBA).fit() + clf = loop.LocalOutlierProbability( + X_train, cluster_labels=cluster_labels, use_numba=NUMBA + ).fit() with pytest.warns(UserWarning) as record: clf.stream(X_test) @@ -668,9 +710,10 @@ def test_stream_cluster(X_n140_outliers) -> None: # check that only one warning was raised assert len(record) == 1 # check that the message matches - assert record[0].message.args[ - 0] == "Stream approach does not support clustered data. " \ + assert ( + record[0].message.args[0] == "Stream approach does not support clustered data. " "Automatically refit using single cluster of points." + ) def test_stream_performance(X_n140_outliers) -> None: @@ -715,5 +758,35 @@ def test_progress_bar(X_n8) -> None: """ # attempt to use the progress bar on a small number of observations - loop.LocalOutlierProbability(X_n8, use_numba=NUMBA, - progress_bar=True).fit() + loop.LocalOutlierProbability(X_n8, use_numba=NUMBA, progress_bar=True).fit() + + +def test_data_flipping() -> None: + """ + Tests the flipping of data and cluster labels and ensures that the + :return: None + """ + np.random.seed(1) + n = 9 + data = np.append( + np.random.normal(2, 1, [n, 2]), np.random.normal(8, 1, [n, 2]), axis=0 + ) + clus = np.append(np.ones(n), 2 * np.ones(n)).tolist() + model = loop.LocalOutlierProbability(data, n_neighbors=5, cluster_labels=clus) + fit = model.fit() + res = fit.local_outlier_probabilities + + data_flipped = np.flipud(data) + clus_flipped = np.flipud(clus).tolist() + model2 = loop.LocalOutlierProbability( + data_flipped, n_neighbors=5, cluster_labels=clus_flipped + ) + fit2 = model2.fit() + res2 = np.flipud(fit2.local_outlier_probabilities) + + assert_array_almost_equal(res, res2, decimal=6) + assert_array_almost_equal( + fit.norm_prob_local_outlier_factor, + fit2.norm_prob_local_outlier_factor, + decimal=6, + )