diff --git a/docs/src/conf.py b/docs/src/conf.py index 02bd041db..7c72f4cef 100644 --- a/docs/src/conf.py +++ b/docs/src/conf.py @@ -54,7 +54,7 @@ "sphinx_toggleprompt", ] -example_subdirs = ["pcovr", "selection", "regression", "reconstruction"] +example_subdirs = ["pcovr", "selection", "regression", "reconstruction", "neighbors"] sphinx_gallery_conf = { "filename_pattern": "/*", "examples_dirs": [f"../../examples/{p}" for p in example_subdirs], diff --git a/docs/src/references/index.rst b/docs/src/references/index.rst index 52125238b..8e48470de 100644 --- a/docs/src/references/index.rst +++ b/docs/src/references/index.rst @@ -12,5 +12,6 @@ API Reference linear_models decomposition metrics + neighbors datasets utils diff --git a/docs/src/references/metrics.rst b/docs/src/references/metrics.rst index 2ea0bb634..d5025b7b0 100644 --- a/docs/src/references/metrics.rst +++ b/docs/src/references/metrics.rst @@ -40,3 +40,18 @@ Component-wise Prediction Rigidity ---------------------------------- .. autofunction:: skmatter.metrics.componentwise_prediction_rigidity + + +.. _pairwise-euclidian-api: + +Pairwise Euclidean Distances +---------------------------- + +.. autofunction:: pairwise_euclidean_distances + +.. _pairwise-mahalanobis-api: + +Pairwise Mahalanobis distance +----------------------------- + +.. autofunction:: skmatter.metrics.pairwise_mahalanobis_distances diff --git a/docs/src/references/neighbors.rst b/docs/src/references/neighbors.rst new file mode 100644 index 000000000..73d5af9c4 --- /dev/null +++ b/docs/src/references/neighbors.rst @@ -0,0 +1,11 @@ +Neighbors +========= + +.. automodule:: skmatter.neighbors + +.. _sparse-kde-api: + +Sparse Kernel Density Estimation +-------------------------------- + +.. autoclass:: skmatter.neighbors.SparseKDE diff --git a/docs/src/references/utils.rst b/docs/src/references/utils.rst index 41a017156..3e6fc1e66 100644 --- a/docs/src/references/utils.rst +++ b/docs/src/references/utils.rst @@ -30,3 +30,9 @@ Random Partitioning with Overlaps --------------------------------- .. autofunction:: skmatter.model_selection.train_test_split + + +Nearest Grid Assigner +--------------------- + +.. autoclass:: skmatter.utils.NearestGridAssigner diff --git a/docs/src/tutorials.rst b/docs/src/tutorials.rst index fa3461fd0..96381ea39 100644 --- a/docs/src/tutorials.rst +++ b/docs/src/tutorials.rst @@ -6,3 +6,4 @@ examples/selection/index examples/regression/index examples/reconstruction/index + examples/neighbors/index diff --git a/examples/neighbors/README.rst b/examples/neighbors/README.rst new file mode 100644 index 000000000..bdbac52bf --- /dev/null +++ b/examples/neighbors/README.rst @@ -0,0 +1,2 @@ +Neighbors +========= diff --git a/examples/neighbors/sparse-kde.py b/examples/neighbors/sparse-kde.py new file mode 100644 index 000000000..e6ae419d8 --- /dev/null +++ b/examples/neighbors/sparse-kde.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# coding: utf-8 + +""" +Sparse KDE examples +==================================== + +We start by importing our modules +""" +# %% +# + +import numpy as np + +from skmatter.datasets import load_roy_dataset # TODO add a dataset + + +# %% +# +# After importing we start with the actual calculations and examples. + +roy_data = load_roy_dataset() + +a = np.array(2) +print(a) diff --git a/src/skmatter/metrics/__init__.py b/src/skmatter/metrics/__init__.py index e141e5991..d313a658f 100644 --- a/src/skmatter/metrics/__init__.py +++ b/src/skmatter/metrics/__init__.py @@ -37,6 +37,12 @@ kernel model. * :ref:`CPR-api` (CPR) computes the component-wise prediction rigidity of a linear or kernel model. + +TODO: Add some explanation what the pairwise metrics do. + +* :ref:`pairwise-euclidian-api` computes TODO +* :ref:`pairwise-mahalanobis-api` computes TODO + linear or kernel model. """ from ._reconstruction_measures import ( @@ -55,7 +61,7 @@ componentwise_prediction_rigidity, ) -from .pairwise import ( +from ._pairwise import ( pairwise_euclidean_distances, pairwise_mahalanobis_distances, ) diff --git a/src/skmatter/metrics/pairwise.py b/src/skmatter/metrics/_pairwise.py similarity index 90% rename from src/skmatter/metrics/pairwise.py rename to src/skmatter/metrics/_pairwise.py index 31cb5620c..eef4b27df 100644 --- a/src/skmatter/metrics/pairwise.py +++ b/src/skmatter/metrics/_pairwise.py @@ -14,20 +14,23 @@ def pairwise_euclidean_distances( """ Compute the pairwise distance matrix between each pair from a vector array X and Y. + .. math:: + d = XXX + For efficiency reasons, the euclidean distance between a pair of row vector x and y is computed as:: dist(x, y) = sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y)) - This formulation has two advantages over other ways of computing distances. - First, it is computationally efficient when dealing with sparse data. - Second, if one argument varies but the other remains unchanged, then - `dot(x, x)` and/or `dot(y, y)` can be pre-computed. + This formulation has two advantages over other ways of computing distances. First, + it is computationally efficient when dealing with sparse data. Second, if one + argument varies but the other remains unchanged, then `dot(x, x)` and/or `dot(y, y)` + can be pre-computed. - However, this is not the most precise way of doing this computation, - because this equation potentially suffers from "catastrophic cancellation". - Also, the distance matrix returned by this function may not be exactly - symmetric as required by, e.g., ``scipy.spatial.distance`` functions. + However, this is not the most precise way of doing this computation, because this + equation potentially suffers from "catastrophic cancellation". Also, the distance + matrix returned by this function may not be exactly symmetric as required by, e.g., + ``scipy.spatial.distance`` functions. Read more in the :ref:`User Guide `. @@ -68,7 +71,8 @@ def pairwise_euclidean_distances( See Also -------- - paired_distances : Distances between pairs of elements of X and Y. + :func:`sklearn.metrics.pairwise.paired_distance` : Distances between pairs of + elements of X and Y. Notes ----- @@ -143,6 +147,9 @@ def pairwise_mahalanobis_distances( """ Calculate the pairwise Mahalanobis distance between two arrays. + .. math:: + d = XXX + Parameters: x : np.ndarray The first input array. diff --git a/src/skmatter/neighbors/__init__.py b/src/skmatter/neighbors/__init__.py index ea26ec380..58cd23bff 100644 --- a/src/skmatter/neighbors/__init__.py +++ b/src/skmatter/neighbors/__init__.py @@ -1,3 +1,10 @@ -from ._sparsekde import SparseKDE, covariance, effdim, oas +""" +Some introduction what the neighbors directory is about. +Get some inspiration form sklearn. -__all__ = ["SparseKDE", "covariance", "effdim", "oas"] +* :ref:`sparse-kde-api` TODO +""" + +from ._sparsekde import SparseKDE + +__all__ = ["SparseKDE"] diff --git a/src/skmatter/neighbors/_sparsekde.py b/src/skmatter/neighbors/_sparsekde.py index 3ece66950..e61b4c38f 100644 --- a/src/skmatter/neighbors/_sparsekde.py +++ b/src/skmatter/neighbors/_sparsekde.py @@ -7,7 +7,7 @@ from sklearn.utils.validation import check_is_fitted, check_random_state from tqdm import tqdm -from ..metrics.pairwise import ( +from ..metrics._pairwise import ( pairwise_euclidean_distances, pairwise_mahalanobis_distances, ) @@ -105,8 +105,8 @@ class SparseKDE(BaseEstimator): [ 4.08667637, 3.42457743]]), fpoints=0.5, qs=0.85, weights=array([5.e-05, 5.e-05, 5.e-05, ..., 5.e-05, 5.e-05, 5.e-05])) - >>> estimator.score(result) - 2.7671739267690363 + >>> round(estimator.score(result), 3) + 2.767 >>> estimator.sample() array([[3.32383366, 3.51779084]]) """ diff --git a/src/skmatter/utils/__init__.py b/src/skmatter/utils/__init__.py index 459e1caa4..a7f01591f 100644 --- a/src/skmatter/utils/__init__.py +++ b/src/skmatter/utils/__init__.py @@ -19,6 +19,18 @@ no_progress_bar, ) +from ._sparsekde import ( + NearestGridAssigner, + GaussianMixtureModel, + covariance, + local_population, + effdim, + oas, + quick_shift, + get_gabriel_graph, + rij, +) + __all__ = [ "get_progress_bar", "no_progress_bar", @@ -29,4 +41,13 @@ "X_orthogonalizer", "Y_sample_orthogonalizer", "Y_feature_orthogonalizer", + "NearestGridAssigner", + "GaussianMixtureModel", + "covariance", + "local_population", + "effdim", + "oas", + "quick_shift", + "get_gabriel_graph", + "rij", ] diff --git a/src/skmatter/utils/_sparsekde.py b/src/skmatter/utils/_sparsekde.py index 2ff4ec132..0e0a023ed 100644 --- a/src/skmatter/utils/_sparsekde.py +++ b/src/skmatter/utils/_sparsekde.py @@ -8,9 +8,7 @@ class NearestGridAssigner: - """NearestGridAssigner Class - Assign descriptor to its nearest grid. This is an auxilirary class. - + """Assign descriptor to its nearest grid. This is an auxilirary class. Parameters ----------