Skip to content

Commit

Permalink
improvements to assessing curve fit (r2 and rmsd) (#57)
Browse files Browse the repository at this point in the history
Improvements to metrics for assessing curve fit (see [here](#55 (comment))):
  - The coefficient of determination (``r2``) now is one if all points are fit by a straight line, rather than engative infinity.
  - A root-mean-square-deviation (square root of mean residual) is now calculated as the ``rmsd`` attribute of ``HillCurve`` objects and reported in fit parameter summaries from ``CurveFits``.
  • Loading branch information
jbloom committed Mar 25, 2024
1 parent 60b54ad commit bc76d62
Show file tree
Hide file tree
Showing 8 changed files with 623 additions and 444 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ The format is based on `Keep a Changelog <https://keepachangelog.com>`_.

- Add ``no_curve_fit_first`` argument to ``HillCurve`` to aid debugging/development.

- Improvements to metrics for assessing curve fit (see [here](https://github.com/jbloomlab/neutcurve/issues/55#issuecomment-2016975219)):
- The coefficient of determination (``r2``) now is one if all points are fit by a straight line, rather than engative infinity.
- A root-mean-square-deviation (square root of mean residual) is now calculated as the ``rmsd`` attribute of ``HillCurve`` objects and reported in fit parameter summaries from ``CurveFits``.

1.1.2
-----

Expand Down
2 changes: 2 additions & 0 deletions neutcurve/curvefits.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,7 @@ def fitParams(
- 'top': top of curve.
- 'bottom': bottom of curve.
- 'r2': coefficient of determination of fit
- 'rmsd': root-mean square deviation of fits
"""
if ic50_error not in {None, "fit_stdev"}:
Expand Down Expand Up @@ -566,6 +567,7 @@ def fitParams(
"top",
"bottom",
"r2",
"rmsd",
]
for serum in self.sera:
for virus in self.viruses[serum]:
Expand Down
22 changes: 21 additions & 1 deletion neutcurve/hillcurve.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ class HillCurve:
`r2` (float)
Coefficient of determination indicating how well the curve fits the
data (https://en.wikipedia.org/wiki/Coefficient_of_determination).
`rmsd` (float)
Root mean square deviation of fitted to actual values (square root of mean
residual).
`params_stdev` (dict or `None`)
If standard deviations can be estimated on the fit
parameters, keyed by 'bottom', 'top', 'midpoint',
Expand Down Expand Up @@ -322,6 +325,10 @@ class HillCurve:
>>> round(neut.r2, 3)
1.0
We can also quantify the goodness of fit with :attr:`HillCurve.rmsd`:
>>> round(neut.rmsd, 3)
0.0
Now fit with bounds on the parameters. First, we make bounds cover the true values:
>>> neut_bounds_cover = HillCurve(
Expand All @@ -337,6 +344,8 @@ class HillCurve:
True
>>> round(neut_bounds_cover.r2, 3)
1.0
>>> round(neut_bounds_cover.rmsd, 3)
0.0
Next fit with bounds that do not cover the true parameters:
>>> neut_bounds_nocover = HillCurve(
Expand All @@ -352,6 +361,8 @@ class HillCurve:
0.05
>>> round(neut_bounds_nocover.r2, 2)
0.99
>>> round(neut_bounds_nocover.rmsd, 3)
0.045
Now fit with `infectivity_or_neutralized='neutralized'`, which is useful
when the signal **increases** rather than decreases with increasing
Expand Down Expand Up @@ -629,7 +640,16 @@ def __init__(
ssres = (
(numpy.array([self.fracinfectivity(c) for c in self.cs]) - self.fs) ** 2
).sum()
self.r2 = 1 - ssres / sstot
if sstot == 0:
if ssres == 0:
self.r2 = 1.0
else:
self.r2 = 0.0
else:
self.r2 = 1.0 - ssres / sstot

# compute rmsd
self.rmsd = math.sqrt(ssres / len(self.cs))

def _fit_curve(
self,
Expand Down
96 changes: 52 additions & 44 deletions notebooks/combine_curvefits.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
"execution_count": 1,
"metadata": {
"execution": {
"iopub.execute_input": "2024-03-23T12:50:21.759483Z",
"iopub.status.busy": "2024-03-23T12:50:21.758464Z",
"iopub.status.idle": "2024-03-23T12:50:33.647118Z",
"shell.execute_reply": "2024-03-23T12:50:33.645350Z",
"shell.execute_reply.started": "2024-03-23T12:50:21.759449Z"
"iopub.execute_input": "2024-03-24T23:23:59.705113Z",
"iopub.status.busy": "2024-03-24T23:23:59.704634Z",
"iopub.status.idle": "2024-03-24T23:24:02.093882Z",
"shell.execute_reply": "2024-03-24T23:24:02.092535Z",
"shell.execute_reply.started": "2024-03-24T23:23:59.705078Z"
}
},
"outputs": [],
Expand All @@ -43,11 +43,11 @@
"execution_count": 2,
"metadata": {
"execution": {
"iopub.execute_input": "2024-03-23T12:50:33.649985Z",
"iopub.status.busy": "2024-03-23T12:50:33.648874Z",
"iopub.status.idle": "2024-03-23T12:50:33.661552Z",
"shell.execute_reply": "2024-03-23T12:50:33.660523Z",
"shell.execute_reply.started": "2024-03-23T12:50:33.649938Z"
"iopub.execute_input": "2024-03-24T23:24:02.099466Z",
"iopub.status.busy": "2024-03-24T23:24:02.099124Z",
"iopub.status.idle": "2024-03-24T23:24:02.108968Z",
"shell.execute_reply": "2024-03-24T23:24:02.108253Z",
"shell.execute_reply.started": "2024-03-24T23:24:02.099428Z"
}
},
"outputs": [],
Expand All @@ -68,11 +68,11 @@
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2024-03-23T12:50:33.664360Z",
"iopub.status.busy": "2024-03-23T12:50:33.663975Z",
"iopub.status.idle": "2024-03-23T12:50:34.022519Z",
"shell.execute_reply": "2024-03-23T12:50:34.021361Z",
"shell.execute_reply.started": "2024-03-23T12:50:33.664331Z"
"iopub.execute_input": "2024-03-24T23:24:02.113186Z",
"iopub.status.busy": "2024-03-24T23:24:02.112951Z",
"iopub.status.idle": "2024-03-24T23:24:02.455126Z",
"shell.execute_reply": "2024-03-24T23:24:02.454289Z",
"shell.execute_reply.started": "2024-03-24T23:24:02.113160Z"
},
"tags": []
},
Expand All @@ -94,11 +94,11 @@
"execution_count": 4,
"metadata": {
"execution": {
"iopub.execute_input": "2024-03-23T12:50:34.026980Z",
"iopub.status.busy": "2024-03-23T12:50:34.026650Z",
"iopub.status.idle": "2024-03-23T12:50:34.581382Z",
"shell.execute_reply": "2024-03-23T12:50:34.580282Z",
"shell.execute_reply.started": "2024-03-23T12:50:34.026955Z"
"iopub.execute_input": "2024-03-24T23:24:02.462043Z",
"iopub.status.busy": "2024-03-24T23:24:02.461682Z",
"iopub.status.idle": "2024-03-24T23:24:03.071796Z",
"shell.execute_reply": "2024-03-24T23:24:03.070966Z",
"shell.execute_reply.started": "2024-03-24T23:24:02.462015Z"
},
"tags": []
},
Expand Down Expand Up @@ -132,11 +132,11 @@
"execution_count": 5,
"metadata": {
"execution": {
"iopub.execute_input": "2024-03-23T12:50:34.582583Z",
"iopub.status.busy": "2024-03-23T12:50:34.582359Z",
"iopub.status.idle": "2024-03-23T12:50:34.677224Z",
"shell.execute_reply": "2024-03-23T12:50:34.676140Z",
"shell.execute_reply.started": "2024-03-23T12:50:34.582562Z"
"iopub.execute_input": "2024-03-24T23:24:03.075999Z",
"iopub.status.busy": "2024-03-24T23:24:03.075781Z",
"iopub.status.idle": "2024-03-24T23:24:03.182389Z",
"shell.execute_reply": "2024-03-24T23:24:03.181609Z",
"shell.execute_reply.started": "2024-03-24T23:24:03.075975Z"
},
"tags": []
},
Expand All @@ -162,11 +162,11 @@
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2024-03-23T12:50:34.678443Z",
"iopub.status.busy": "2024-03-23T12:50:34.678228Z",
"iopub.status.idle": "2024-03-23T12:50:34.752574Z",
"shell.execute_reply": "2024-03-23T12:50:34.751838Z",
"shell.execute_reply.started": "2024-03-23T12:50:34.678421Z"
"iopub.execute_input": "2024-03-24T23:24:03.186844Z",
"iopub.status.busy": "2024-03-24T23:24:03.186527Z",
"iopub.status.idle": "2024-03-24T23:24:03.257297Z",
"shell.execute_reply": "2024-03-24T23:24:03.256546Z",
"shell.execute_reply.started": "2024-03-24T23:24:03.186819Z"
},
"scrolled": true
},
Expand Down Expand Up @@ -206,6 +206,7 @@
" <th>top</th>\n",
" <th>bottom</th>\n",
" <th>r2</th>\n",
" <th>rmsd</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
Expand All @@ -225,6 +226,7 @@
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.996</td>\n",
" <td>0.028</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
Expand All @@ -242,6 +244,7 @@
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.986</td>\n",
" <td>0.053</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
Expand All @@ -259,6 +262,7 @@
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.982</td>\n",
" <td>0.060</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
Expand All @@ -276,6 +280,7 @@
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.992</td>\n",
" <td>0.039</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
Expand All @@ -293,6 +298,7 @@
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.980</td>\n",
" <td>0.069</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
Expand All @@ -310,6 +316,7 @@
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.994</td>\n",
" <td>0.035</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
Expand All @@ -327,6 +334,7 @@
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.990</td>\n",
" <td>0.047</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
Expand All @@ -342,14 +350,14 @@
"5 FI6v3 P80D 3 <NA> 0.013 interpolated 0.0128 0.013 \n",
"6 FI6v3 P80D average 2 0.012 interpolated 0.0125 0.012 \n",
"\n",
" midpoint_bound midpoint_bound_type slope top bottom r2 \n",
"0 0.017 interpolated 2.505 1.0 0.0 0.996 \n",
"1 0.019 interpolated 2.513 1.0 0.0 0.986 \n",
"2 0.015 interpolated 1.878 1.0 0.0 0.982 \n",
"3 0.017 interpolated 2.279 1.0 0.0 0.992 \n",
"4 0.012 interpolated 2.025 1.0 0.0 0.980 \n",
"5 0.013 interpolated 2.059 1.0 0.0 0.994 \n",
"6 0.012 interpolated 2.035 1.0 0.0 0.990 "
" midpoint_bound midpoint_bound_type slope top bottom r2 rmsd \n",
"0 0.017 interpolated 2.505 1.0 0.0 0.996 0.028 \n",
"1 0.019 interpolated 2.513 1.0 0.0 0.986 0.053 \n",
"2 0.015 interpolated 1.878 1.0 0.0 0.982 0.060 \n",
"3 0.017 interpolated 2.279 1.0 0.0 0.992 0.039 \n",
"4 0.012 interpolated 2.025 1.0 0.0 0.980 0.069 \n",
"5 0.013 interpolated 2.059 1.0 0.0 0.994 0.035 \n",
"6 0.012 interpolated 2.035 1.0 0.0 0.990 0.047 "
]
},
"execution_count": 6,
Expand Down Expand Up @@ -395,11 +403,11 @@
"execution_count": 7,
"metadata": {
"execution": {
"iopub.execute_input": "2024-03-23T12:50:34.753685Z",
"iopub.status.busy": "2024-03-23T12:50:34.753493Z",
"iopub.status.idle": "2024-03-23T12:50:35.875072Z",
"shell.execute_reply": "2024-03-23T12:50:35.873283Z",
"shell.execute_reply.started": "2024-03-23T12:50:34.753666Z"
"iopub.execute_input": "2024-03-24T23:24:03.261594Z",
"iopub.status.busy": "2024-03-24T23:24:03.261369Z",
"iopub.status.idle": "2024-03-24T23:24:04.077435Z",
"shell.execute_reply": "2024-03-24T23:24:04.076116Z",
"shell.execute_reply.started": "2024-03-24T23:24:03.261569Z"
}
},
"outputs": [
Expand All @@ -411,7 +419,7 @@
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[7], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# NBVAL_RAISES_EXCEPTION\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[43mneutcurve\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mCurveFits\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcombineCurveFits\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfit1\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfit2_invalid\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/neutcurve/neutcurve/curvefits.py:204\u001b[0m, in \u001b[0;36mCurveFits.combineCurveFits\u001b[0;34m(curvefits_list, sera, viruses, serum_virus_replicates_to_drop)\u001b[0m\n\u001b[1;32m 193\u001b[0m combined_fits\u001b[38;5;241m.\u001b[39mdf \u001b[38;5;241m=\u001b[39m combined_fits\u001b[38;5;241m.\u001b[39m_get_avg_and_stderr_df(combined_fits\u001b[38;5;241m.\u001b[39mdf)\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(combined_fits\u001b[38;5;241m.\u001b[39mdf) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\n\u001b[1;32m 195\u001b[0m combined_fits\u001b[38;5;241m.\u001b[39mdf\u001b[38;5;241m.\u001b[39mgroupby(\n\u001b[1;32m 196\u001b[0m [\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 202\u001b[0m )\n\u001b[1;32m 203\u001b[0m ):\n\u001b[0;32m--> 204\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mduplicated sera/virus/replicate in `curvefits_list`\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 206\u001b[0m \u001b[38;5;66;03m# combine sera\u001b[39;00m\n\u001b[1;32m 207\u001b[0m combined_fits\u001b[38;5;241m.\u001b[39msera \u001b[38;5;241m=\u001b[39m combined_fits\u001b[38;5;241m.\u001b[39mdf[combined_fits\u001b[38;5;241m.\u001b[39mserum_col]\u001b[38;5;241m.\u001b[39munique()\u001b[38;5;241m.\u001b[39mtolist()\n",
"File \u001b[0;32m~/neutcurve/neutcurve/curvefits.py:207\u001b[0m, in \u001b[0;36mCurveFits.combineCurveFits\u001b[0;34m(curvefits_list, sera, viruses, serum_virus_replicates_to_drop)\u001b[0m\n\u001b[1;32m 196\u001b[0m combined_fits\u001b[38;5;241m.\u001b[39mdf \u001b[38;5;241m=\u001b[39m combined_fits\u001b[38;5;241m.\u001b[39m_get_avg_and_stderr_df(combined_fits\u001b[38;5;241m.\u001b[39mdf)\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(combined_fits\u001b[38;5;241m.\u001b[39mdf) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\n\u001b[1;32m 198\u001b[0m combined_fits\u001b[38;5;241m.\u001b[39mdf\u001b[38;5;241m.\u001b[39mgroupby(\n\u001b[1;32m 199\u001b[0m [\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 205\u001b[0m )\n\u001b[1;32m 206\u001b[0m ):\n\u001b[0;32m--> 207\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mduplicated sera/virus/replicate in `curvefits_list`\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 209\u001b[0m \u001b[38;5;66;03m# combine sera\u001b[39;00m\n\u001b[1;32m 210\u001b[0m combined_fits\u001b[38;5;241m.\u001b[39msera \u001b[38;5;241m=\u001b[39m combined_fits\u001b[38;5;241m.\u001b[39mdf[combined_fits\u001b[38;5;241m.\u001b[39mserum_col]\u001b[38;5;241m.\u001b[39munique()\u001b[38;5;241m.\u001b[39mtolist()\n",
"\u001b[0;31mValueError\u001b[0m: duplicated sera/virus/replicate in `curvefits_list`"
]
}
Expand Down
Loading

0 comments on commit bc76d62

Please sign in to comment.