Merge pull request #623 from stan-dev/fix/json-encoding

Fix json encoding of NaN/infinity
stan-dev · Sep 23, 2022 · fb3dfe2 · fb3dfe2
2 parents c2bab85 + 92f5ab3
commit fb3dfe2
Show file tree

Hide file tree

Showing 12 changed files with 98 additions and 98 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -25,7 +25,7 @@ repos:
       - id: mypy
         # Copied from setup.cfg
         exclude: ^test/
-        additional_dependencies: [ numpy >= 1.22, types-ujson ]
+        additional_dependencies: [ numpy >= 1.22]
   # local uses the user-installed pylint, this allows dependency checking
   - repo: local
     hooks:

diff --git a/.pylintrc b/.pylintrc
@@ -3,7 +3,7 @@
 # A comma-separated list of package or module names from where C extensions may
 # be loaded. Extensions are loading into the active Python interpreter and may
 # run arbitrary code.
-extension-pkg-whitelist=ujson
+extension-pkg-whitelist=
 
 # Add files or directories to the blacklist. They should be base names, not
 # paths.

diff --git a/cmdstanpy/model.py b/cmdstanpy/model.py
@@ -1,22 +1,22 @@
 """CmdStanModel"""
 
 import io
+import json
 import os
 import platform
 import re
 import shutil
 import subprocess
 import sys
+import threading
 from collections import OrderedDict
 from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime
 from io import StringIO
 from multiprocessing import cpu_count
 from pathlib import Path
-import threading
 from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Union
 
-import ujson as json
 from tqdm.auto import tqdm
 
 from cmdstanpy import _CMDSTAN_REFRESH, _CMDSTAN_SAMPLING, _CMDSTAN_WARMUP
@@ -1587,6 +1587,7 @@ def _run_cmdstan(
                 env=os.environ,
                 universal_newlines=True,
             )
+            timer: Optional[threading.Timer]
             if timeout:
 
                 def _timer_target() -> None:

diff --git a/cmdstanpy/utils/__init__.py b/cmdstanpy/utils/__init__.py
@@ -85,7 +85,7 @@ def show_versions(output: bool = True) -> str:
     except Exception:
         deps_info.append(('cmdstan', 'NOT FOUND'))
 
-    deps = ['cmdstanpy', 'pandas', 'xarray', 'tqdm', 'numpy', 'ujson']
+    deps = ['cmdstanpy', 'pandas', 'xarray', 'tqdm', 'numpy']
     for module in deps:
         try:
             if module in sys.modules:

diff --git a/cmdstanpy/utils/json.py b/cmdstanpy/utils/json.py
@@ -2,30 +2,10 @@
 Utilities for writing Stan Json files
 """
 import json
-import math
 from collections.abc import Collection
-from typing import Any, List, Mapping, Union
+from typing import Any, List, Mapping
 
 import numpy as np
-import ujson
-
-from .logging import get_logger
-
-
-def rewrite_inf_nan(
-    data: Union[float, int, List[Any]]
-) -> Union[str, int, float, List[Any]]:
-    """Replaces NaN and Infinity with string representations"""
-    if isinstance(data, float):
-        if math.isnan(data):
-            return 'NaN'
-        if math.isinf(data):
-            return ('+' if data > 0 else '-') + 'inf'
-        return data
-    elif isinstance(data, list):
-        return [rewrite_inf_nan(item) for item in data]
-    else:
-        return data
 
 
 def serialize_complex(c: Any) -> List[float]:
@@ -56,7 +36,6 @@ def write_stan_json(path: str, data: Mapping[str, Any]) -> None:
     """
     data_out = {}
     for key, val in data.items():
-        handle_nan_inf = False
         if val is not None:
             if isinstance(val, (str, bytes)) or (
                 type(val).__module__ != 'numpy'
@@ -67,9 +46,9 @@ def write_stan_json(path: str, data: Mapping[str, Any]) -> None:
                     + f"write_stan_json for key '{key}'"
                 )
             try:
-                handle_nan_inf = not np.all(np.isfinite(val))
-            except TypeError:
                 # handles cases like val == ['hello']
+                np.isfinite(val)
+            except TypeError:
                 # pylint: disable=raise-missing-from
                 raise ValueError(
                     "Invalid type provided to "
@@ -86,12 +65,5 @@ def write_stan_json(path: str, data: Mapping[str, Any]) -> None:
         else:
             data_out[key] = val
 
-        if handle_nan_inf:
-            data_out[key] = rewrite_inf_nan(data_out[key])
-
     with open(path, 'w') as fd:
-        try:
-            ujson.dump(data_out, fd)
-        except TypeError as e:
-            get_logger().debug(e)
-            json.dump(data_out, fd, default=serialize_complex)
+        json.dump(data_out, fd, default=serialize_complex)
diff --git a/cmdstanpy/utils/stancsv.py b/cmdstanpy/utils/stancsv.py
@@ -1,6 +1,7 @@
 """
 Utility functions for reading the Stan CSV format
 """
+import json
 import math
 import re
 from enum import Enum, auto
@@ -17,7 +18,6 @@
 
 import numpy as np
 import pandas as pd
-import ujson
 
 from cmdstanpy import _CMDSTAN_SAMPLING, _CMDSTAN_THIN, _CMDSTAN_WARMUP
 
@@ -453,7 +453,7 @@ def read_metric(path: str) -> List[int]:
     """
     if path.endswith('.json'):
         with open(path, 'r') as fd:
-            metric_dict = ujson.load(fd)
+            metric_dict = json.load(fd)
         if 'inv_metric' in metric_dict:
             dims_np: np.ndarray = np.asarray(metric_dict['inv_metric'])
             return list(dims_np.shape)

diff --git a/docsrc/users-guide/examples/Run Generated Quantities.ipynb b/docsrc/users-guide/examples/Run Generated Quantities.ipynb
@@ -2,6 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "#  Generating new quantities of interest.\n",
     "\n",
@@ -19,11 +20,11 @@
     "-  transform parameters for reporting\n",
     "-  apply full Bayesian decision theory\n",
     "-  calculate log likelihoods, deviances, etc. for model comparison"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "## Example:  add posterior predictive checks to `bernoulli.stan`\n",
     "\n",
@@ -34,12 +35,13 @@
     "We instantiate the model `bernoulli`,\n",
     "as in the \"Hello World\" section\n",
     "of the CmdStanPy [tutorial](https://github.com/stan-dev/cmdstanpy/blob/develop/cmdstanpy_tutorial.ipynb) notebook."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import os\n",
     "from cmdstanpy import cmdstan_path, CmdStanModel, CmdStanMCMC, CmdStanGQ\n",
@@ -51,153 +53,151 @@
     "# instantiate, compile bernoulli model\n",
     "model = CmdStanModel(stan_file=stan_file)\n",
     "print(model.code())"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "The input data consists of `N` - the number of bernoulli trials and `y` - the list of observed outcomes.\n",
     "Inspection of the data shows that on average, there is a 20% chance of success for any given Bernoulli trial."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# examine bernoulli data\n",
-    "import ujson\n",
+    "import json\n",
     "import statistics\n",
     "with open(data_file,'r') as fp:\n",
-    "    data_dict = ujson.load(fp)\n",
+    "    data_dict = json.load(fp)\n",
     "print(data_dict)\n",
     "print('mean of y: {}'.format(statistics.mean(data_dict['y'])))"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "As in the \"Hello World\" tutorial, we produce a sample from the posterior of the model conditioned on the data:"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# fit the model to the data\n",
     "fit = model.sample(data=data_file)"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "The fitted model produces an estimate of `theta` - the chance of success"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "fit.summary()"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "To run a prior predictive check, we add a `generated quantities` block to the model, in which we generate a new data vector `y_rep` using the current estimate of theta.  The resulting model is in file [bernoulli_ppc.stan](https://github.com/stan-dev/cmdstanpy/blob/master/test/data/bernoulli_ppc.stan)"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "model_ppc = CmdStanModel(stan_file='bernoulli_ppc.stan')\n",
     "print(model_ppc.code())"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "We run the `generate_quantities` method on `bernoulli_ppc` using existing sample `fit` as input.  The `generate_quantities` method takes the values of `theta` in the `fit` sample as the set of draws from the posterior used to generate the corresponsing `y_rep` quantities of interest.\n",
     "\n",
     "The arguments to the `generate_quantities` method are:\n",
     " + `data`  - the data used to fit the model\n",
     " + `mcmc_sample` - either a `CmdStanMCMC` object or a list of stan-csv files\n"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "new_quantities = model_ppc.generate_quantities(data=data_file, mcmc_sample=fit)"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "The `generate_quantities` method returns a `CmdStanGQ` object which contains the values for all variables in the generated quantitites block of the program ``bernoulli_ppc.stan``.  Unlike the output from the ``sample`` method, it doesn't contain any information on the joint log probability density, sampler state, or parameters or transformed parameter values.\n",
     "\n",
     "In this example, each draw consists of the N-length array of replicate of the `bernoulli` model's input variable  `y`, which is an N-length array of Bernoulli outcomes."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "print(new_quantities.draws().shape, new_quantities.column_names)\n",
     "for i in range(3):\n",
     "    print (new_quantities.draws()[i,:])"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "We can also use ``draws_pd(inc_sample=True)`` to get a pandas DataFrame which combines the input drawset with the generated quantities."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "sample_plus = new_quantities.draws_pd(inc_sample=True)\n",
     "print(type(sample_plus),sample_plus.shape)\n",
     "names = list(sample_plus.columns.values[7:18])\n",
     "sample_plus.iloc[0:3, :]"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "For models as simple as the bernoulli models here, it would be trivial to re-run the sampler and generate a new sample which contains both the estimate of the parameters `theta` as well as `y_rep` values. For models which are difficult to fit, i.e., when producing a sample is computationally expensive, the `generate_quantities` method is preferred."
-   ],
-   "metadata": {}
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3.9.5 ('stan')",
    "language": "python",
    "name": "python3"
   },
@@ -212,8 +212,13 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.9.5"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "8765ce46b013071999fc1966b52035a7309a0da7551e066cc0f0fa23e83d4f60"
+   }
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}