Merge pull request #489 from stan-dev/disk-warning

WardBrian · web-flow · commit b6874cf6e8a0 · 2021-11-02T12:32:52.000-04:00
Give more informative error for bad draws (highlight disk space as possible culprit)
diff --git a/cmdstanpy/utils.py b/cmdstanpy/utils.py
@@ -607,12 +607,15 @@ def scan_sampler_csv(path: str, is_fixed_param: bool = False) -> Dict[str, Any]:
     dict: Dict[str, Any] = {}
     lineno = 0
     with open(path, 'r') as fd:
-        lineno = scan_config(fd, dict, lineno)
-        lineno = scan_column_names(fd, dict, lineno)
-        if not is_fixed_param:
-            lineno = scan_warmup_iters(fd, dict, lineno)
-            lineno = scan_hmc_params(fd, dict, lineno)
-        lineno = scan_sampling_iters(fd, dict, lineno)
+        try:
+            lineno = scan_config(fd, dict, lineno)
+            lineno = scan_column_names(fd, dict, lineno)
+            if not is_fixed_param:
+                lineno = scan_warmup_iters(fd, dict, lineno)
+                lineno = scan_hmc_params(fd, dict, lineno)
+            lineno = scan_sampling_iters(fd, dict, lineno)
+        except ValueError as e:
+            raise ValueError("Error in reading csv file: " + path) from e
     return dict
 
 
@@ -910,9 +913,12 @@ def scan_sampling_iters(
         data = line.split(',')
         if len(data) != num_cols:
             raise ValueError(
-                'line {}: bad draw, expecting {} items, found {}'.format(
+                'line {}: bad draw, expecting {} items, found {}\n'.format(
                     lineno, num_cols, len(line.split(','))
                 )
+                + 'This error could be caused by running out of disk space.\n'
+                'Try clearing up TEMP or setting output_dir to a path'
+                ' on another drive.',
             )
         cur_pos = fd.tell()
         line = fd.readline().strip()
diff --git a/test/__init__.py b/test/__init__.py
@@ -0,0 +1,32 @@
+"""Testing utilities for CmdStanPy."""
+
+import contextlib
+import unittest
+from importlib import reload
+
+
+class CustomTestCase(unittest.TestCase):
+    # pylint: disable=invalid-name
+    @contextlib.contextmanager
+    def assertRaisesRegexNested(self, exc, msg):
+        """A version of assertRaisesRegex that checks the full traceback.
+
+        Useful for when an exception is raised from another and you wish to
+        inspect the inner exception.
+        """
+        with self.assertRaises(exc) as ctx:
+            yield
+        exception = ctx.exception
+        exn_string = str(ctx.exception)
+        while exception.__cause__ is not None:
+            exception = exception.__cause__
+            exn_string += "\n" + str(exception)
+        self.assertRegex(exn_string, msg)
+
+    # pylint: disable=no-self-use
+    @contextlib.contextmanager
+    def without_import(self, library, module):
+        with unittest.mock.patch.dict('sys.modules', {library: None}):
+            reload(module)
+            yield
+        reload(module)
diff --git a/test/test_generate_quantities.py b/test/test_generate_quantities.py
@@ -6,7 +6,7 @@
 import logging
 import os
 import unittest
-from importlib import reload
+from test import CustomTestCase
 
 import numpy as np
 import pandas as pd
@@ -21,15 +21,7 @@
 DATAFILES_PATH = os.path.join(HERE, 'data')
 
 
-@contextlib.contextmanager
-def without_import(library, module):
-    with unittest.mock.patch.dict('sys.modules', {library: None}):
-        reload(module)
-        yield
-    reload(module)
-
-
-class GenerateQuantitiesTest(unittest.TestCase):
+class GenerateQuantitiesTest(CustomTestCase):
     def test_from_csv_files(self):
         # fitted_params sample - list of filenames
         goodfiles_path = os.path.join(DATAFILES_PATH, 'runset-good', 'bern')
@@ -357,7 +349,7 @@ def test_sample_plus_quantities_dedup(self):
             self.assertEqual(y_rep[0, i], bern_data['y'][i])
 
     def test_no_xarray(self):
-        with without_import('xarray', cmdstanpy.stanfit):
+        with self.without_import('xarray', cmdstanpy.stanfit):
             with self.assertRaises(ImportError):
                 # if this fails the testing framework is the problem
                 import xarray as _  # noqa
diff --git a/test/test_sample.py b/test/test_sample.py
@@ -9,8 +9,8 @@
 import stat
 import tempfile
 import unittest
-from importlib import reload
 from multiprocessing import cpu_count
+from test import CustomTestCase
 from time import time
 
 import numpy as np
@@ -47,14 +47,6 @@
 BERNOULLI_COLS = SAMPLER_STATE + ['theta']
 
 
-@contextlib.contextmanager
-def without_import(library, module):
-    with unittest.mock.patch.dict('sys.modules', {library: None}):
-        reload(module)
-        yield
-    reload(module)
-
-
 class SampleTest(unittest.TestCase):
     def test_bernoulli_good(self, stanfile='bernoulli.stan'):
         stan = os.path.join(DATAFILES_PATH, stanfile)
@@ -584,7 +576,7 @@ def test_show_progress(self, stanfile='bernoulli.stan'):
         self.assertTrue('Sampling completed' in console)
 
 
-class CmdStanMCMCTest(unittest.TestCase):
+class CmdStanMCMCTest(CustomTestCase):
     # pylint: disable=too-many-public-methods
     def test_validate_good_run(self):
         # construct fit using existing sampler output
@@ -1092,7 +1084,9 @@ def test_validate_bad_run(self):
             os.path.join(DATAFILES_PATH, 'runset-bad', 'bad-hdr-bern-3.csv'),
             os.path.join(DATAFILES_PATH, 'runset-bad', 'bad-hdr-bern-4.csv'),
         ]
-        with self.assertRaisesRegex(ValueError, 'CmdStan config mismatch'):
+        with self.assertRaisesRegexNested(
+            ValueError, 'CmdStan config mismatch'
+        ):
             CmdStanMCMC(runset)
 
         # bad draws
@@ -1102,7 +1096,7 @@ def test_validate_bad_run(self):
             os.path.join(DATAFILES_PATH, 'runset-bad', 'bad-draws-bern-3.csv'),
             os.path.join(DATAFILES_PATH, 'runset-bad', 'bad-draws-bern-4.csv'),
         ]
-        with self.assertRaisesRegex(ValueError, 'draws'):
+        with self.assertRaisesRegexNested(ValueError, 'draws'):
             CmdStanMCMC(runset)
 
         # mismatch - column headers, draws
@@ -1112,7 +1106,7 @@ def test_validate_bad_run(self):
             os.path.join(DATAFILES_PATH, 'runset-bad', 'bad-cols-bern-3.csv'),
             os.path.join(DATAFILES_PATH, 'runset-bad', 'bad-cols-bern-4.csv'),
         ]
-        with self.assertRaisesRegex(
+        with self.assertRaisesRegexNested(
             ValueError, 'bad draw, expecting 9 items, found 8'
         ):
             CmdStanMCMC(runset)
@@ -1604,7 +1598,7 @@ def test_xarray_draws(self):
         self.assertEqual(xr_var.theta.values.shape, (1, 100, 1))
 
     def test_no_xarray(self):
-        with without_import('xarray', cmdstanpy.stanfit):
+        with self.without_import('xarray', cmdstanpy.stanfit):
             with self.assertRaises(ImportError):
                 # if this fails the testing framework is the problem
                 import xarray as _  # noqa
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -14,6 +14,7 @@
 import tempfile
 import unittest
 from pathlib import Path
+from test import CustomTestCase
 
 import numpy as np
 import pandas as pd
@@ -371,7 +372,7 @@ def test_write_stan_json_bad(self):
             write_stan_json(file_bad, dict_badtype_nested)
 
 
-class ReadStanCsvTest(unittest.TestCase):
+class ReadStanCsvTest(CustomTestCase):
     def test_check_sampler_csv_1(self):
         csv_good = os.path.join(DATAFILES_PATH, 'bernoulli_output_1.csv')
         dict = check_sampler_csv(
@@ -386,13 +387,13 @@ def test_check_sampler_csv_1(self):
         self.assertEqual(10, dict['draws_sampling'])
         self.assertEqual(8, len(dict['column_names']))
 
-        with self.assertRaisesRegex(
+        with self.assertRaisesRegexNested(
             ValueError, 'config error, expected thin = 2'
         ):
             check_sampler_csv(
                 path=csv_good, iter_warmup=100, iter_sampling=20, thin=2
             )
-        with self.assertRaisesRegex(
+        with self.assertRaisesRegexNested(
             ValueError, 'config error, expected save_warmup'
         ):
             check_sampler_csv(
@@ -401,7 +402,7 @@ def test_check_sampler_csv_1(self):
                 iter_sampling=10,
                 save_warmup=True,
             )
-        with self.assertRaisesRegex(ValueError, 'expected 1000 draws'):
+        with self.assertRaisesRegexNested(ValueError, 'expected 1000 draws'):
             check_sampler_csv(path=csv_good, iter_warmup=100)
 
     def test_check_sampler_csv_2(self):
@@ -411,34 +412,34 @@ def test_check_sampler_csv_2(self):
 
     def test_check_sampler_csv_3(self):
         csv_bad = os.path.join(DATAFILES_PATH, 'output_bad_cols.csv')
-        with self.assertRaisesRegex(Exception, '8 items'):
+        with self.assertRaisesRegexNested(Exception, '8 items'):
             check_sampler_csv(csv_bad)
 
     def test_check_sampler_csv_4(self):
         csv_bad = os.path.join(DATAFILES_PATH, 'output_bad_rows.csv')
-        with self.assertRaisesRegex(Exception, 'found 9'):
+        with self.assertRaisesRegexNested(Exception, 'found 9'):
             check_sampler_csv(csv_bad)
 
     def test_check_sampler_csv_metric_1(self):
         csv_bad = os.path.join(DATAFILES_PATH, 'output_bad_metric_1.csv')
-        with self.assertRaisesRegex(Exception, 'expecting metric'):
+        with self.assertRaisesRegexNested(Exception, 'expecting metric'):
             check_sampler_csv(csv_bad)
 
     def test_check_sampler_csv_metric_2(self):
         csv_bad = os.path.join(DATAFILES_PATH, 'output_bad_metric_2.csv')
-        with self.assertRaisesRegex(Exception, 'invalid step size'):
+        with self.assertRaisesRegexNested(Exception, 'invalid step size'):
             check_sampler_csv(csv_bad)
 
     def test_check_sampler_csv_metric_3(self):
         csv_bad = os.path.join(DATAFILES_PATH, 'output_bad_metric_3.csv')
-        with self.assertRaisesRegex(
+        with self.assertRaisesRegexNested(
             Exception, 'invalid or missing mass matrix specification'
         ):
             check_sampler_csv(csv_bad)
 
     def test_check_sampler_csv_metric_4(self):
         csv_bad = os.path.join(DATAFILES_PATH, 'output_bad_metric_4.csv')
-        with self.assertRaisesRegex(
+        with self.assertRaisesRegexNested(
             Exception, 'invalid or missing mass matrix specification'
         ):
             check_sampler_csv(csv_bad)
@@ -474,15 +475,17 @@ def test_check_sampler_csv_thin(self):
         self.assertEqual(dict['max_depth'], 11)
         self.assertEqual(dict['delta'], 0.98)
 
-        with self.assertRaisesRegex(ValueError, 'config error'):
+        with self.assertRaisesRegexNested(ValueError, 'config error'):
             check_sampler_csv(
                 path=csv_file,
                 is_fixed_param=False,
                 iter_sampling=490,
                 iter_warmup=490,
                 thin=9,
             )
-        with self.assertRaisesRegex(ValueError, 'expected 490 draws, found 70'):
+        with self.assertRaisesRegexNested(
+            ValueError, 'expected 490 draws, found 70'
+        ):
             check_sampler_csv(
                 path=csv_file,
                 is_fixed_param=False,