Merge branch 'develop' of https://github.com/stan-dev/cmdstanpy into develop

mitzimorris · mitzimorris · commit 361cffb670cb · 2021-11-04T18:03:50.000-04:00
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -1,7 +1,7 @@
 name: CmdStanPy
 
 on:
-  push: 
+  push:
     branches:
       - 'develop'
       - 'master'
@@ -28,7 +28,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: [3.6, 3.7, 3.8, 3.9]
+        python-version: [3.6, 3.7, 3.8, 3.9, "3.10"]
       fail-fast: false
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
@@ -65,7 +65,7 @@ jobs:
         if: matrix.os == 'windows-latest'
         run: |
           $whl = Get-ChildItem -Path dist -Filter *.whl | Select-Object -First 1
-          pip install "$whl" 
+          pip install "$whl"
 
       - name: Show libraries
         run: python -m pip freeze
@@ -101,4 +101,4 @@ jobs:
       - name: Submit codecov
         run: |
           cd run_tests
-          codecov
+          codecov
diff --git a/cmdstanpy/utils.py b/cmdstanpy/utils.py
@@ -409,6 +409,22 @@ def cxx_toolchain_path(
     return compiler_path, tool_path
 
 
+def rewrite_inf_nan(
+    data: Union[float, int, List[Any]]
+) -> Union[str, int, float, List[Any]]:
+    """Replaces NaN and Infinity with string representations"""
+    if isinstance(data, float):
+        if math.isnan(data):
+            return 'NaN'
+        if math.isinf(data):
+            return ('+' if data > 0 else '-') + 'inf'
+        return data
+    elif isinstance(data, list):
+        return [rewrite_inf_nan(item) for item in data]
+    else:
+        return data
+
+
 def write_stan_json(path: str, data: Mapping[str, Any]) -> None:
     """
     Dump a mapping of strings to data to a JSON file.
@@ -430,6 +446,7 @@ def write_stan_json(path: str, data: Mapping[str, Any]) -> None:
     """
     data_out = {}
     for key, val in data.items():
+        handle_nan_inf = False
         if val is not None:
             if isinstance(val, (str, bytes)) or (
                 type(val).__module__ != 'numpy'
@@ -440,18 +457,14 @@ def write_stan_json(path: str, data: Mapping[str, Any]) -> None:
                     + f"write_stan_json for key '{key}'"
                 )
             try:
-                if not np.all(np.isfinite(val)):
-                    raise ValueError(
-                        "Input to write_stan_json has nan or infinite "
-                        + f"values for key '{key}'"
-                    )
+                handle_nan_inf = not np.all(np.isfinite(val))
             except TypeError:
                 # handles cases like val == ['hello']
                 # pylint: disable=raise-missing-from
                 raise ValueError(
                     "Invalid type provided to "
-                    + f"write_stan_json for key '{key}' "
-                    + f"as part of collection {type(val)}"
+                    f"write_stan_json for key '{key}' "
+                    f"as part of collection {type(val)}"
                 )
 
         if type(val).__module__ == 'numpy':
@@ -463,6 +476,9 @@ def write_stan_json(path: str, data: Mapping[str, Any]) -> None:
         else:
             data_out[key] = val
 
+        if handle_nan_inf:
+            data_out[key] = rewrite_inf_nan(data_out[key])
+
     with open(path, 'w') as fd:
         json.dump(data_out, fd)
 
@@ -591,12 +607,15 @@ def scan_sampler_csv(path: str, is_fixed_param: bool = False) -> Dict[str, Any]:
     dict: Dict[str, Any] = {}
     lineno = 0
     with open(path, 'r') as fd:
-        lineno = scan_config(fd, dict, lineno)
-        lineno = scan_column_names(fd, dict, lineno)
-        if not is_fixed_param:
-            lineno = scan_warmup_iters(fd, dict, lineno)
-            lineno = scan_hmc_params(fd, dict, lineno)
-        lineno = scan_sampling_iters(fd, dict, lineno)
+        try:
+            lineno = scan_config(fd, dict, lineno)
+            lineno = scan_column_names(fd, dict, lineno)
+            if not is_fixed_param:
+                lineno = scan_warmup_iters(fd, dict, lineno)
+                lineno = scan_hmc_params(fd, dict, lineno)
+            lineno = scan_sampling_iters(fd, dict, lineno)
+        except ValueError as e:
+            raise ValueError("Error in reading csv file: " + path) from e
     return dict
 
 
@@ -894,9 +913,12 @@ def scan_sampling_iters(
         data = line.split(',')
         if len(data) != num_cols:
             raise ValueError(
-                'line {}: bad draw, expecting {} items, found {}'.format(
+                'line {}: bad draw, expecting {} items, found {}\n'.format(
                     lineno, num_cols, len(line.split(','))
                 )
+                + 'This error could be caused by running out of disk space.\n'
+                'Try clearing up TEMP or setting output_dir to a path'
+                ' on another drive.',
             )
         cur_pos = fd.tell()
         line = fd.readline().strip()
diff --git a/test/__init__.py b/test/__init__.py
@@ -0,0 +1,32 @@
+"""Testing utilities for CmdStanPy."""
+
+import contextlib
+import unittest
+from importlib import reload
+
+
+class CustomTestCase(unittest.TestCase):
+    # pylint: disable=invalid-name
+    @contextlib.contextmanager
+    def assertRaisesRegexNested(self, exc, msg):
+        """A version of assertRaisesRegex that checks the full traceback.
+
+        Useful for when an exception is raised from another and you wish to
+        inspect the inner exception.
+        """
+        with self.assertRaises(exc) as ctx:
+            yield
+        exception = ctx.exception
+        exn_string = str(ctx.exception)
+        while exception.__cause__ is not None:
+            exception = exception.__cause__
+            exn_string += "\n" + str(exception)
+        self.assertRegex(exn_string, msg)
+
+    # pylint: disable=no-self-use
+    @contextlib.contextmanager
+    def without_import(self, library, module):
+        with unittest.mock.patch.dict('sys.modules', {library: None}):
+            reload(module)
+            yield
+        reload(module)
diff --git a/test/test_generate_quantities.py b/test/test_generate_quantities.py
@@ -6,7 +6,7 @@
 import logging
 import os
 import unittest
-from importlib import reload
+from test import CustomTestCase
 
 import numpy as np
 import pandas as pd
@@ -21,15 +21,7 @@
 DATAFILES_PATH = os.path.join(HERE, 'data')
 
 
-@contextlib.contextmanager
-def without_import(library, module):
-    with unittest.mock.patch.dict('sys.modules', {library: None}):
-        reload(module)
-        yield
-    reload(module)
-
-
-class GenerateQuantitiesTest(unittest.TestCase):
+class GenerateQuantitiesTest(CustomTestCase):
     def test_from_csv_files(self):
         # fitted_params sample - list of filenames
         goodfiles_path = os.path.join(DATAFILES_PATH, 'runset-good', 'bern')
@@ -357,7 +349,7 @@ def test_sample_plus_quantities_dedup(self):
             self.assertEqual(y_rep[0, i], bern_data['y'][i])
 
     def test_no_xarray(self):
-        with without_import('xarray', cmdstanpy.stanfit):
+        with self.without_import('xarray', cmdstanpy.stanfit):
             with self.assertRaises(ImportError):
                 # if this fails the testing framework is the problem
                 import xarray as _  # noqa
diff --git a/test/test_sample.py b/test/test_sample.py
@@ -9,8 +9,8 @@
 import stat
 import tempfile
 import unittest
-from importlib import reload
 from multiprocessing import cpu_count
+from test import CustomTestCase
 from time import time
 
 import numpy as np
@@ -47,14 +47,6 @@
 BERNOULLI_COLS = SAMPLER_STATE + ['theta']
 
 
-@contextlib.contextmanager
-def without_import(library, module):
-    with unittest.mock.patch.dict('sys.modules', {library: None}):
-        reload(module)
-        yield
-    reload(module)
-
-
 class SampleTest(unittest.TestCase):
     def test_bernoulli_good(self, stanfile='bernoulli.stan'):
         stan = os.path.join(DATAFILES_PATH, stanfile)
@@ -584,7 +576,7 @@ def test_show_progress(self, stanfile='bernoulli.stan'):
         self.assertTrue('Sampling completed' in console)
 
 
-class CmdStanMCMCTest(unittest.TestCase):
+class CmdStanMCMCTest(CustomTestCase):
     # pylint: disable=too-many-public-methods
     def test_validate_good_run(self):
         # construct fit using existing sampler output
@@ -1092,7 +1084,9 @@ def test_validate_bad_run(self):
             os.path.join(DATAFILES_PATH, 'runset-bad', 'bad-hdr-bern-3.csv'),
             os.path.join(DATAFILES_PATH, 'runset-bad', 'bad-hdr-bern-4.csv'),
         ]
-        with self.assertRaisesRegex(ValueError, 'CmdStan config mismatch'):
+        with self.assertRaisesRegexNested(
+            ValueError, 'CmdStan config mismatch'
+        ):
             CmdStanMCMC(runset)
 
         # bad draws
@@ -1102,7 +1096,7 @@ def test_validate_bad_run(self):
             os.path.join(DATAFILES_PATH, 'runset-bad', 'bad-draws-bern-3.csv'),
             os.path.join(DATAFILES_PATH, 'runset-bad', 'bad-draws-bern-4.csv'),
         ]
-        with self.assertRaisesRegex(ValueError, 'draws'):
+        with self.assertRaisesRegexNested(ValueError, 'draws'):
             CmdStanMCMC(runset)
 
         # mismatch - column headers, draws
@@ -1112,7 +1106,7 @@ def test_validate_bad_run(self):
             os.path.join(DATAFILES_PATH, 'runset-bad', 'bad-cols-bern-3.csv'),
             os.path.join(DATAFILES_PATH, 'runset-bad', 'bad-cols-bern-4.csv'),
         ]
-        with self.assertRaisesRegex(
+        with self.assertRaisesRegexNested(
             ValueError, 'bad draw, expecting 9 items, found 8'
         ):
             CmdStanMCMC(runset)
@@ -1604,7 +1598,7 @@ def test_xarray_draws(self):
         self.assertEqual(xr_var.theta.values.shape, (1, 100, 1))
 
     def test_no_xarray(self):
-        with without_import('xarray', cmdstanpy.stanfit):
+        with self.without_import('xarray', cmdstanpy.stanfit):
             with self.assertRaises(ImportError):
                 # if this fails the testing framework is the problem
                 import xarray as _  # noqa
diff --git a/test/test_utils.py b/test/test_utils.py