Clean up the docstrings

theochem · Sep 8, 2024 · e452ea9 · e452ea9
1 parent 0722a34
commit e452ea9
Show file tree

Hide file tree

Showing 7 changed files with 14 additions and 13 deletions.
diff --git a/book/content/api_measures_similarity.rst b/book/content/api_measures_similarity.rst
@@ -1,7 +1,7 @@
 .. _measures.similarity:
 
 :mod:`selector.measures.similarity`
-==================================
+===================================
 
 .. automodule:: selector.measures.similarity
    :members:

diff --git a/book/content/api_methods_utils.rst b/book/content/api_methods_utils.rst
@@ -1,7 +1,7 @@
 .. _methods.utils:
 
 :mod:`selector.methods.utils`
-============================
+=============================
 
 .. automodule:: selector.methods.utils
    :members:

diff --git a/selector/measures/__init__.py b/selector/measures/__init__.py
@@ -21,4 +21,3 @@
 # along with this program; if not, see <http://www.gnu.org/licenses/>
 #
 # --
-
diff --git a/selector/measures/converter.py b/selector/measures/converter.py
@@ -294,8 +294,7 @@ def co_occurrence(x: np.ndarray) -> np.ndarray:
     r"""Calculate distance array from frequency using the co-occurrence method.
 
     .. math::
-        \delta_{ij} =  \left(1 + \frac{f_{ij}\sum_{i,j}{f_{ij}}}
-                       {\sum_{i}{f_{ij}}\sum_{j}{f_{ij}}} \right)^{-1}
+        \delta_{ij} =  \left(1 + \frac{f_{ij}\sum_{i,j}{f_{ij}}}{\sum_{i}{f_{ij}}\sum_{j}{f_{ij}}} \right)^{-1}
 
     where :math:`\delta_{ij}` is the distance between points :math:`i`
     and :math:`j`, and :math:`f_{ij}` is their frequency.

diff --git a/selector/measures/diversity.py b/selector/measures/diversity.py
@@ -200,6 +200,8 @@ def shannon_entropy(x: np.ndarray, normalize=True, truncation=False) -> float:
     But please note, when `completeness` is False and `normalize` is True, the formula has not been
     used in any literature. It is just a simple normalization of the entropy and the user can use it at their own risk.
 
+    References
+    ----------
     .. [1] Wang, Y., Geppert, H., & Bajorath, J. (2009). Shannon entropy-based fingerprint similarity
     search strategy. Journal of Chemical Information and Modeling, 49(7), 1687-1691.
     .. [2] Leguy, J., Glavatskikh, M., Cauchy, T., & Da Mota, B. (2021). Scalable estimator of the
@@ -284,6 +286,7 @@ def wdud(x: np.ndarray) -> float:
     r"""Compute the Wasserstein Distance to Uniform Distribution(WDUD).
 
     The equation for the Wasserstein Distance for a single feature to uniform distribution is
+
     .. math::
         WDUD(x) = \int_{0}^{1} |U(x) - V(x)|dx
 

diff --git a/selector/measures/similarity.py b/selector/measures/similarity.py
@@ -121,7 +121,7 @@ def modified_tanimoto(a: np.array, b: np.array) -> float:
     shorter vectors using a Bernoulli probability model.
 
     .. math::
-        mt = \frac{2-p}{3} T_1 + \frac{1+p}{3} T_0
+        {mt} = \frac{2-p}{3} T_1 + \frac{1+p}{3} T_0
 
     where :math:`p` is success probability of independent trials,
     :math:`T_1` is the number of common '1' bits between data points
@@ -146,10 +146,10 @@ def modified_tanimoto(a: np.array, b: np.array) -> float:
     The equation above has been derived from
 
     .. math::
-       mt_{\alpha} = {\alpha}T_1 + (1-\alpha)T_0
+       {mt}_{\alpha} = {\alpha}T_1 + (1-\alpha)T_0
 
     where :math:`\alpha = \frac{2-p}{3}`. This is done so that the expected value
-    of the modified tanimoto, :math:`E(MT)`, remains constant even as the number of
+    of the modified tanimoto, :math:`E(mt)`, remains constant even as the number of
     trials :math:`p` grows larger.
 
     Fligner, M. A., Verducci, J. S., and Blower, P. E.. (2002)

diff --git a/selector/methods/similarity.py b/selector/methods/similarity.py
@@ -187,6 +187,7 @@ def _scale_data(self, X: np.ndarray):
         r"""Scales the data between so it can be used with the similarity indexes.
 
         First each data point is normalized to be between 0 and 1.
+
         .. math::
             x_{ij} = \frac{x_{ij} - min(x_j)}{max(x_j) - min(x_j)}
 
@@ -196,14 +197,14 @@ def _scale_data(self, X: np.ndarray):
         .. math::
             w_{ij} = 1 - | x_{ij} - a_j |
 
-        where :math:`x_{ij}` is the element of the normalized array, and :math:`a_j` is the average of the j-th
+        where :math:`x_{ij}` is the element of the normalized array,
+        and :math:`a_j` is the average of the j-th
         column of the normalized array.
 
         Parameters
         ----------
         X: np.ndarray
             Array of features (columns) for each sample (rows).
-
         """
         min_value = np.min(X)
         max_value = np.max(X)
@@ -428,9 +429,8 @@ def select_from_cluster(
             Array of integers or strings representing the points ids of the data that belong to the
             current cluster. If `None`, all the samples in the data are treated as one cluster.
         start: str or list
-            srt: key on what is used to start the selection
-                {'medoid', 'random', 'outlier'}
-            list: indices of points that are included in the selection since the beginning
+            srt: key on what is used to start the selection {'medoid', 'random', 'outlier'}.
+            list: indices of points that are included in the selection since the beginning.
 
         Returns
         -------