improvements to assessing curve fit (r2 and rmsd) (#57)

Improvements to metrics for assessing curve fit (see [here](#55 (comment))): - The coefficient of determination (``r2``) now is one if all points are fit by a straight line, rather than engative infinity. - A root-mean-square-deviation (square root of mean residual) is now calculated as the ``rmsd`` attribute of ``HillCurve`` objects and reported in fit parameter summaries from ``CurveFits``.
jbloomlab · Mar 25, 2024 · bc76d62 · bc76d62
1 parent 60b54ad
commit bc76d62
Show file tree

Hide file tree

Showing 8 changed files with 623 additions and 444 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -15,6 +15,10 @@ The format is based on `Keep a Changelog <https://keepachangelog.com>`_.
 
 - Add ``no_curve_fit_first`` argument to ``HillCurve`` to aid debugging/development.
 
+- Improvements to metrics for assessing curve fit (see [here](https://github.com/jbloomlab/neutcurve/issues/55#issuecomment-2016975219)):
+  - The coefficient of determination (``r2``) now is one if all points are fit by a straight line, rather than engative infinity.
+  - A root-mean-square-deviation (square root of mean residual) is now calculated as the ``rmsd`` attribute of ``HillCurve`` objects and reported in fit parameter summaries from ``CurveFits``.
+
 1.1.2
 -----
 

diff --git a/neutcurve/curvefits.py b/neutcurve/curvefits.py
@@ -537,6 +537,7 @@ def fitParams(
               - 'top': top of curve.
               - 'bottom': bottom of curve.
               - 'r2': coefficient of determination of fit
+              - 'rmsd': root-mean square deviation of fits
 
         """
         if ic50_error not in {None, "fit_stdev"}:
@@ -566,6 +567,7 @@ def fitParams(
                 "top",
                 "bottom",
                 "r2",
+                "rmsd",
             ]
             for serum in self.sera:
                 for virus in self.viruses[serum]:

diff --git a/neutcurve/hillcurve.py b/neutcurve/hillcurve.py
@@ -120,6 +120,9 @@ class HillCurve:
         `r2` (float)
             Coefficient of determination indicating how well the curve fits the
             data (https://en.wikipedia.org/wiki/Coefficient_of_determination).
+        `rmsd` (float)
+            Root mean square deviation of fitted to actual values (square root of mean
+            residual).
         `params_stdev` (dict or `None`)
             If standard deviations can be estimated on the fit
             parameters, keyed by 'bottom', 'top', 'midpoint',
@@ -322,6 +325,10 @@ class HillCurve:
     >>> round(neut.r2, 3)
     1.0
 
+    We can also quantify the goodness of fit with :attr:`HillCurve.rmsd`:
+    >>> round(neut.rmsd, 3)
+    0.0
+
     Now fit with bounds on the parameters. First, we make bounds cover the true values:
 
     >>> neut_bounds_cover = HillCurve(
@@ -337,6 +344,8 @@ class HillCurve:
     True
     >>> round(neut_bounds_cover.r2, 3)
     1.0
+    >>> round(neut_bounds_cover.rmsd, 3)
+    0.0
 
     Next fit with bounds that do not cover the true parameters:
     >>> neut_bounds_nocover = HillCurve(
@@ -352,6 +361,8 @@ class HillCurve:
     0.05
     >>> round(neut_bounds_nocover.r2, 2)
     0.99
+    >>> round(neut_bounds_nocover.rmsd, 3)
+    0.045
 
     Now fit with `infectivity_or_neutralized='neutralized'`, which is useful
     when the signal **increases** rather than decreases with increasing
@@ -629,7 +640,16 @@ def __init__(
         ssres = (
             (numpy.array([self.fracinfectivity(c) for c in self.cs]) - self.fs) ** 2
         ).sum()
-        self.r2 = 1 - ssres / sstot
+        if sstot == 0:
+            if ssres == 0:
+                self.r2 = 1.0
+            else:
+                self.r2 = 0.0
+        else:
+            self.r2 = 1.0 - ssres / sstot
+
+        # compute rmsd
+        self.rmsd = math.sqrt(ssres / len(self.cs))
 
     def _fit_curve(
         self,

diff --git a/notebooks/combine_curvefits.ipynb b/notebooks/combine_curvefits.ipynb
@@ -17,11 +17,11 @@
    "execution_count": 1,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-03-23T12:50:21.759483Z",
-     "iopub.status.busy": "2024-03-23T12:50:21.758464Z",
-     "iopub.status.idle": "2024-03-23T12:50:33.647118Z",
-     "shell.execute_reply": "2024-03-23T12:50:33.645350Z",
-     "shell.execute_reply.started": "2024-03-23T12:50:21.759449Z"
+     "iopub.execute_input": "2024-03-24T23:23:59.705113Z",
+     "iopub.status.busy": "2024-03-24T23:23:59.704634Z",
+     "iopub.status.idle": "2024-03-24T23:24:02.093882Z",
+     "shell.execute_reply": "2024-03-24T23:24:02.092535Z",
+     "shell.execute_reply.started": "2024-03-24T23:23:59.705078Z"
     }
    },
    "outputs": [],
@@ -43,11 +43,11 @@
    "execution_count": 2,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-03-23T12:50:33.649985Z",
-     "iopub.status.busy": "2024-03-23T12:50:33.648874Z",
-     "iopub.status.idle": "2024-03-23T12:50:33.661552Z",
-     "shell.execute_reply": "2024-03-23T12:50:33.660523Z",
-     "shell.execute_reply.started": "2024-03-23T12:50:33.649938Z"
+     "iopub.execute_input": "2024-03-24T23:24:02.099466Z",
+     "iopub.status.busy": "2024-03-24T23:24:02.099124Z",
+     "iopub.status.idle": "2024-03-24T23:24:02.108968Z",
+     "shell.execute_reply": "2024-03-24T23:24:02.108253Z",
+     "shell.execute_reply.started": "2024-03-24T23:24:02.099428Z"
     }
    },
    "outputs": [],
@@ -68,11 +68,11 @@
    "execution_count": 3,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-03-23T12:50:33.664360Z",
-     "iopub.status.busy": "2024-03-23T12:50:33.663975Z",
-     "iopub.status.idle": "2024-03-23T12:50:34.022519Z",
-     "shell.execute_reply": "2024-03-23T12:50:34.021361Z",
-     "shell.execute_reply.started": "2024-03-23T12:50:33.664331Z"
+     "iopub.execute_input": "2024-03-24T23:24:02.113186Z",
+     "iopub.status.busy": "2024-03-24T23:24:02.112951Z",
+     "iopub.status.idle": "2024-03-24T23:24:02.455126Z",
+     "shell.execute_reply": "2024-03-24T23:24:02.454289Z",
+     "shell.execute_reply.started": "2024-03-24T23:24:02.113160Z"
     },
     "tags": []
    },
@@ -94,11 +94,11 @@
    "execution_count": 4,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-03-23T12:50:34.026980Z",
-     "iopub.status.busy": "2024-03-23T12:50:34.026650Z",
-     "iopub.status.idle": "2024-03-23T12:50:34.581382Z",
-     "shell.execute_reply": "2024-03-23T12:50:34.580282Z",
-     "shell.execute_reply.started": "2024-03-23T12:50:34.026955Z"
+     "iopub.execute_input": "2024-03-24T23:24:02.462043Z",
+     "iopub.status.busy": "2024-03-24T23:24:02.461682Z",
+     "iopub.status.idle": "2024-03-24T23:24:03.071796Z",
+     "shell.execute_reply": "2024-03-24T23:24:03.070966Z",
+     "shell.execute_reply.started": "2024-03-24T23:24:02.462015Z"
     },
     "tags": []
    },
@@ -132,11 +132,11 @@
    "execution_count": 5,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-03-23T12:50:34.582583Z",
-     "iopub.status.busy": "2024-03-23T12:50:34.582359Z",
-     "iopub.status.idle": "2024-03-23T12:50:34.677224Z",
-     "shell.execute_reply": "2024-03-23T12:50:34.676140Z",
-     "shell.execute_reply.started": "2024-03-23T12:50:34.582562Z"
+     "iopub.execute_input": "2024-03-24T23:24:03.075999Z",
+     "iopub.status.busy": "2024-03-24T23:24:03.075781Z",
+     "iopub.status.idle": "2024-03-24T23:24:03.182389Z",
+     "shell.execute_reply": "2024-03-24T23:24:03.181609Z",
+     "shell.execute_reply.started": "2024-03-24T23:24:03.075975Z"
     },
     "tags": []
    },
@@ -162,11 +162,11 @@
    "execution_count": 6,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-03-23T12:50:34.678443Z",
-     "iopub.status.busy": "2024-03-23T12:50:34.678228Z",
-     "iopub.status.idle": "2024-03-23T12:50:34.752574Z",
-     "shell.execute_reply": "2024-03-23T12:50:34.751838Z",
-     "shell.execute_reply.started": "2024-03-23T12:50:34.678421Z"
+     "iopub.execute_input": "2024-03-24T23:24:03.186844Z",
+     "iopub.status.busy": "2024-03-24T23:24:03.186527Z",
+     "iopub.status.idle": "2024-03-24T23:24:03.257297Z",
+     "shell.execute_reply": "2024-03-24T23:24:03.256546Z",
+     "shell.execute_reply.started": "2024-03-24T23:24:03.186819Z"
     },
     "scrolled": true
    },
@@ -206,6 +206,7 @@
        "      <th>top</th>\n",
        "      <th>bottom</th>\n",
        "      <th>r2</th>\n",
+       "      <th>rmsd</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -225,6 +226,7 @@
        "      <td>1.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.996</td>\n",
+       "      <td>0.028</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -242,6 +244,7 @@
        "      <td>1.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.986</td>\n",
+       "      <td>0.053</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -259,6 +262,7 @@
        "      <td>1.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.982</td>\n",
+       "      <td>0.060</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -276,6 +280,7 @@
        "      <td>1.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.992</td>\n",
+       "      <td>0.039</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -293,6 +298,7 @@
        "      <td>1.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.980</td>\n",
+       "      <td>0.069</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
@@ -310,6 +316,7 @@
        "      <td>1.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.994</td>\n",
+       "      <td>0.035</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
@@ -327,6 +334,7 @@
        "      <td>1.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.990</td>\n",
+       "      <td>0.047</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -342,14 +350,14 @@
        "5  FI6v3  P80D         3         <NA>  0.013  interpolated   0.0128     0.013   \n",
        "6  FI6v3  P80D   average            2  0.012  interpolated   0.0125     0.012   \n",
        "\n",
-       "   midpoint_bound midpoint_bound_type  slope  top  bottom     r2  \n",
-       "0           0.017        interpolated  2.505  1.0     0.0  0.996  \n",
-       "1           0.019        interpolated  2.513  1.0     0.0  0.986  \n",
-       "2           0.015        interpolated  1.878  1.0     0.0  0.982  \n",
-       "3           0.017        interpolated  2.279  1.0     0.0  0.992  \n",
-       "4           0.012        interpolated  2.025  1.0     0.0  0.980  \n",
-       "5           0.013        interpolated  2.059  1.0     0.0  0.994  \n",
-       "6           0.012        interpolated  2.035  1.0     0.0  0.990  "
+       "   midpoint_bound midpoint_bound_type  slope  top  bottom     r2   rmsd  \n",
+       "0           0.017        interpolated  2.505  1.0     0.0  0.996  0.028  \n",
+       "1           0.019        interpolated  2.513  1.0     0.0  0.986  0.053  \n",
+       "2           0.015        interpolated  1.878  1.0     0.0  0.982  0.060  \n",
+       "3           0.017        interpolated  2.279  1.0     0.0  0.992  0.039  \n",
+       "4           0.012        interpolated  2.025  1.0     0.0  0.980  0.069  \n",
+       "5           0.013        interpolated  2.059  1.0     0.0  0.994  0.035  \n",
+       "6           0.012        interpolated  2.035  1.0     0.0  0.990  0.047  "
       ]
      },
      "execution_count": 6,
@@ -395,11 +403,11 @@
    "execution_count": 7,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-03-23T12:50:34.753685Z",
-     "iopub.status.busy": "2024-03-23T12:50:34.753493Z",
-     "iopub.status.idle": "2024-03-23T12:50:35.875072Z",
-     "shell.execute_reply": "2024-03-23T12:50:35.873283Z",
-     "shell.execute_reply.started": "2024-03-23T12:50:34.753666Z"
+     "iopub.execute_input": "2024-03-24T23:24:03.261594Z",
+     "iopub.status.busy": "2024-03-24T23:24:03.261369Z",
+     "iopub.status.idle": "2024-03-24T23:24:04.077435Z",
+     "shell.execute_reply": "2024-03-24T23:24:04.076116Z",
+     "shell.execute_reply.started": "2024-03-24T23:24:03.261569Z"
     }
    },
    "outputs": [
@@ -411,7 +419,7 @@
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
       "Cell \u001b[0;32mIn[7], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# NBVAL_RAISES_EXCEPTION\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[43mneutcurve\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mCurveFits\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcombineCurveFits\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfit1\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfit2_invalid\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/neutcurve/neutcurve/curvefits.py:204\u001b[0m, in \u001b[0;36mCurveFits.combineCurveFits\u001b[0;34m(curvefits_list, sera, viruses, serum_virus_replicates_to_drop)\u001b[0m\n\u001b[1;32m    193\u001b[0m combined_fits\u001b[38;5;241m.\u001b[39mdf \u001b[38;5;241m=\u001b[39m combined_fits\u001b[38;5;241m.\u001b[39m_get_avg_and_stderr_df(combined_fits\u001b[38;5;241m.\u001b[39mdf)\n\u001b[1;32m    194\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(combined_fits\u001b[38;5;241m.\u001b[39mdf) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\n\u001b[1;32m    195\u001b[0m     combined_fits\u001b[38;5;241m.\u001b[39mdf\u001b[38;5;241m.\u001b[39mgroupby(\n\u001b[1;32m    196\u001b[0m         [\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    202\u001b[0m     )\n\u001b[1;32m    203\u001b[0m ):\n\u001b[0;32m--> 204\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mduplicated sera/virus/replicate in `curvefits_list`\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    206\u001b[0m \u001b[38;5;66;03m# combine sera\u001b[39;00m\n\u001b[1;32m    207\u001b[0m combined_fits\u001b[38;5;241m.\u001b[39msera \u001b[38;5;241m=\u001b[39m combined_fits\u001b[38;5;241m.\u001b[39mdf[combined_fits\u001b[38;5;241m.\u001b[39mserum_col]\u001b[38;5;241m.\u001b[39munique()\u001b[38;5;241m.\u001b[39mtolist()\n",
+      "File \u001b[0;32m~/neutcurve/neutcurve/curvefits.py:207\u001b[0m, in \u001b[0;36mCurveFits.combineCurveFits\u001b[0;34m(curvefits_list, sera, viruses, serum_virus_replicates_to_drop)\u001b[0m\n\u001b[1;32m    196\u001b[0m combined_fits\u001b[38;5;241m.\u001b[39mdf \u001b[38;5;241m=\u001b[39m combined_fits\u001b[38;5;241m.\u001b[39m_get_avg_and_stderr_df(combined_fits\u001b[38;5;241m.\u001b[39mdf)\n\u001b[1;32m    197\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(combined_fits\u001b[38;5;241m.\u001b[39mdf) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\n\u001b[1;32m    198\u001b[0m     combined_fits\u001b[38;5;241m.\u001b[39mdf\u001b[38;5;241m.\u001b[39mgroupby(\n\u001b[1;32m    199\u001b[0m         [\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    205\u001b[0m     )\n\u001b[1;32m    206\u001b[0m ):\n\u001b[0;32m--> 207\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mduplicated sera/virus/replicate in `curvefits_list`\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    209\u001b[0m \u001b[38;5;66;03m# combine sera\u001b[39;00m\n\u001b[1;32m    210\u001b[0m combined_fits\u001b[38;5;241m.\u001b[39msera \u001b[38;5;241m=\u001b[39m combined_fits\u001b[38;5;241m.\u001b[39mdf[combined_fits\u001b[38;5;241m.\u001b[39mserum_col]\u001b[38;5;241m.\u001b[39munique()\u001b[38;5;241m.\u001b[39mtolist()\n",
       "\u001b[0;31mValueError\u001b[0m: duplicated sera/virus/replicate in `curvefits_list`"
      ]
     }