Clean up and fix equations in docstrings

theochem · Sep 8, 2024 · 63d9de7 · 63d9de7
1 parent c4d2df0
commit 63d9de7
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 21 deletions.
diff --git a/.github/workflows/website_auto.yaml b/.github/workflows/website_auto.yaml
@@ -45,6 +45,7 @@ jobs:
       # Build the book
       - name: Build the book
         run: |
+          cp notebooks/*.ipynb book/content/.
           jupyter-book build ./book/content
 
       # Push the book's HTML to github-pages

diff --git a/selector/measures/converter.py b/selector/measures/converter.py
@@ -133,7 +133,7 @@ def reverse(x: np.ndarray) -> np.ndarray:
     r"""Calculate distance array from similarity using the reverse method.
 
     .. math::
-    \delta_{ij} = min(s_{ij}) + max(s_{ij}) - s_{ij}
+        \delta_{ij} = min(s_{ij}) + max(s_{ij}) - s_{ij}
 
     where :math:`\delta_{ij}` is the distance between points :math:`i`
     and :math:`j`, :math:`s_{ij}` is their similarity coefficient,
@@ -159,7 +159,7 @@ def reciprocal(x: np.ndarray) -> np.ndarray:
     r"""Calculate distance array from similarity using the reciprocal method.
 
     .. math::
-    \delta_{ij} = \frac{1}{s_{ij}}
+        \delta_{ij} = \frac{1}{s_{ij}}
 
     where :math:`\delta_{ij}` is the distance between points :math:`i`
     and :math:`j`, and :math:`s_{ij}` is their similarity coefficient.
@@ -186,7 +186,7 @@ def exponential(x: np.ndarray) -> np.ndarray:
     r"""Calculate distance matrix from similarity using the exponential method.
 
     .. math::
-    \delta_{ij} = -\ln{\frac{s_{ij}}{max(s_{ij})}}
+        \delta_{ij} = -\ln{\frac{s_{ij}}{max(s_{ij})}}
 
     where :math:`\delta_{ij}` is the distance between points :math:`i`
     and :math:`j`, and :math:`s_{ij}` is their similarity coefficient.
@@ -213,7 +213,7 @@ def gaussian(x: np.ndarray) -> np.ndarray:
     r"""Calculate distance matrix from similarity using the Gaussian method.
 
     .. math::
-    \delta_{ij} = \sqrt{-\ln{\frac{s_{ij}}{max(s_{ij})}}}
+        \delta_{ij} = \sqrt{-\ln{\frac{s_{ij}}{max(s_{ij})}}}
 
     where :math:`\delta_{ij}` is the distance between points :math:`i`
     and :math:`j`, and :math:`s_{ij}` is their similarity coefficient.
@@ -241,7 +241,7 @@ def correlation(x: np.ndarray) -> np.ndarray:
     r"""Calculate distance array from correlation array.
 
     .. math::
-    \delta_{ij} = \sqrt{1 - r_{ij}}
+        \delta_{ij} = \sqrt{1 - r_{ij}}
 
     where :math:`\delta_{ij}` is the distance between points :math:`i`
     and :math:`j`, and :math:`r_{ij}` is their correlation.
@@ -270,7 +270,7 @@ def transition(x: np.ndarray) -> np.ndarray:
     r"""Calculate distance array from frequency using the transition method.
 
     .. math::
-    \delta_{ij} = \frac{1}{\sqrt{f_{ij}}}
+        \delta_{ij} = \frac{1}{\sqrt{f_{ij}}}
 
     where :math:`\delta_{ij}` is the distance between points :math:`i`
     and :math:`j`, and :math:`f_{ij}` is their frequency.
@@ -294,8 +294,8 @@ def co_occurrence(x: np.ndarray) -> np.ndarray:
     r"""Calculate distance array from frequency using the co-occurrence method.
 
     .. math::
-    \delta_{ij} =  \left(1 + \frac{f_{ij}\sum_{i,j}{f_{ij}}}
-    {\sum_{i}{f_{ij}}\sum_{j}{f_{ij}}} \right)^{-1}
+        \delta_{ij} =  \left(1 + \frac{f_{ij}\sum_{i,j}{f_{ij}}}
+                       {\sum_{i}{f_{ij}}\sum_{j}{f_{ij}}} \right)^{-1}
 
     where :math:`\delta_{ij}` is the distance between points :math:`i`
     and :math:`j`, and :math:`f_{ij}` is their frequency.
@@ -360,7 +360,7 @@ def probability(x: np.ndarray) -> np.ndarray:
     r"""Calculate distance array from probability array.
 
     .. math::
-    \delta_{ij} = \sqrt{-\ln{\frac{s_{ij}}{max(s_{ij})}}}
+        \delta_{ij} = \sqrt{-\ln{\frac{s_{ij}}{max(s_{ij})}}}
 
     where :math:`\delta_{ij}` is the distance between points :math:`i`
     and :math:`j`, and :math:`p_{ij}` is their probablity.
@@ -390,7 +390,7 @@ def covariance(x: np.ndarray) -> np.ndarray:
     r"""Calculate distance array from similarity using the covariance method.
 
     .. math::
-    \delta_{ij} = \sqrt{s_{ii}+s_{jj}-2s_{ij}}
+        \delta_{ij} = \sqrt{s_{ii}+s_{jj}-2s_{ij}}
 
     where :math:`\delta_{ij}` is the distance between points :math:`i`
     and :math:`j`, :math:`s_{ii}` and :math:`s_{jj}` are the variances

diff --git a/selector/measures/similarity.py b/selector/measures/similarity.py
@@ -80,7 +80,7 @@ def tanimoto(a: np.array, b: np.array) -> float:
     For two binary or non-binary arrays :math:`A` and :math:`B`, Tanimoto coefficient
     is defined as the size of their intersection divided by the size of their union:
 
-    ..math::
+    .. math::
         T(A, B) = \frac{| A \cap B|}{| A \cup B |} =
         \frac{| A \cap B|}{|A| + |B| - | A \cap B|} =
         \frac{A \cdot B}{\|A\|^2 + \|B\|^2 - A \cdot B}
@@ -120,8 +120,8 @@ def modified_tanimoto(a: np.array, b: np.array) -> float:
     Adjusts calculation of the Tanimoto coefficient to counter its natural bias towards
     shorter vectors using a Bernoulli probability model.
 
-    ..math::
-    MT = \frac{2-p}{3}T_1 + \frac{1+p}{3}T_0
+    .. math::
+        mt = \frac{2-p}{3} T_1 + \frac{1+p}{3} T_0
 
     where :math:`p` is success probability of independent trials,
     :math:`T_1` is the number of common '1' bits between data points
@@ -145,8 +145,8 @@ def modified_tanimoto(a: np.array, b: np.array) -> float:
     -----
     The equation above has been derived from
 
-    ..math::
-    MT_\alpha= {\alpha}T_1 + (1-\alpha)T_0
+    .. math::
+       mt_{\alpha} = {\alpha}T_1 + (1-\alpha)T_0
 
     where :math:`\alpha = \frac{2-p}{3}`. This is done so that the expected value
     of the modified tanimoto, :math:`E(MT)`, remains constant even as the number of
@@ -196,10 +196,10 @@ def modified_tanimoto(a: np.array, b: np.array) -> float:
 
 
 def scaled_similarity_matrix(X: np.array) -> np.ndarray:
-    """Compute the scaled similarity matrix.
+    r"""Compute the scaled similarity matrix.
 
-    ..math::
-    X(i,j)=\frac{X(i,j)}{\\sqrt{X(i,i)X(j,j)}}
+    .. math::
+        X(i,j) = \frac{X(i,j)}{\sqrt{X(i,i)X(j,j)}}
 
     Parameters
     ----------

diff --git a/selector/methods/similarity.py b/selector/methods/similarity.py
@@ -188,21 +188,22 @@ def _scale_data(self, X: np.ndarray):
 
         First each data point is normalized to be between 0 and 1.
         .. math::
-            x_{ij} = \\frac{x_{ij} - min(x_j)}{max(x_j) - min(x_j)}
+            x_{ij} = \frac{x_{ij} - min(x_j)}{max(x_j) - min(x_j)}
 
         Then, the average of each column is calculated. Finally, each element of the final working
         array will be defined as
 
         .. math::
-            w_ij = 1 - | x_ij - a_j |
+            w_{ij} = 1 - | x_{ij} - a_j |
 
-        where $x_ij$ is the element of the normalized array, and $a_j$ is the average of the j-th
+        where :math:`x_{ij}` is the element of the normalized array, and :math:`a_j` is the average of the j-th
         column of the normalized array.
 
         Parameters
         ----------
         X: np.ndarray
             Array of features (columns) for each sample (rows).
+
         """
         min_value = np.min(X)
         max_value = np.max(X)