changes per code review

mitzimorris · mitzimorris · commit bda136ff77b0 · 2022-06-19T16:48:30.000-04:00
diff --git a/cmdstanpy/stanfit/mcmc.py b/cmdstanpy/stanfit/mcmc.py
@@ -95,33 +95,14 @@ def __init__(
         # only valid when not is_fixed_param
         self._metric: np.ndarray = np.array(())
         self._step_size: np.ndarray = np.array(())
-        self._divergences: np.ndarray = np.zeros(self.runset.chains, int)
-        self._max_treedepths: np.ndarray = np.zeros(self.runset.chains, int)
+        self._divergences: np.ndarray = np.array(())
+        self._max_treedepths: np.ndarray = np.array(())
 
         # info from CSV initial comments and header
         config = self._validate_csv_files()
         self._metadata: InferenceMetadata = InferenceMetadata(config)
-        # prelim diagnostics
-        if np.any(self._divergences) or np.any(self._max_treedepths):
-            diagnostics = ['Some chains may have failed to converge.']
-            ct_iters = config['num_samples']  # pylint: disable=unused-variable
-            for i in range(self.runset._chains):
-                if self._divergences[i] > 0:
-                    diagnostics.append(
-                        f'Chain {i + 1} had {self._divergences[i]} '
-                        'divergent transitions '
-                        f'({((self._divergences[i]/ct_iters)*100):.1f}%)'
-                    )
-                if self._max_treedepths[i] > 0:
-                    diagnostics.append(
-                        f'Chain {i + 1} had {self._max_treedepths[i]} '
-                        'iterations at max treedepth '
-                        f'({((self._max_treedepths[i]/ct_iters)*100):.1f}%)'
-                    )
-            diagnostics.append(
-                'Use function "diagnose()" to see further information.'
-            )
-            get_logger().warning('\n\t'.join(diagnostics))
+        if not self._is_fixed_param:
+            self._check_sampler_diagnostics()
 
     def __repr__(self) -> str:
         repr = 'CmdStanMCMC: model={} chains={}{}'.format(
@@ -304,6 +285,10 @@ def _validate_csv_files(self) -> Dict[str, Any]:
         Tabulates sampling iters which are divergent or at max treedepth
         Raises exception when inconsistencies detected.
         """
+        if not self._is_fixed_param:
+            self._divergences: np.ndarray = np.zeros(self.runset.chains, dtype=int)
+            self._max_treedepths: np.ndarray = np.zeros(self.runset.chains, dtype=int)
+
         dzero = {}
         for i in range(self.chains):
             if i == 0:
@@ -359,6 +344,32 @@ def _validate_csv_files(self) -> Dict[str, Any]:
                     self._max_treedepths[i] = drest['ct_max_treedepth']
         return dzero
 
+    # pylint: disable=unused-variable
+    def _check_sampler_diagnostics(self) -> None:
+        """
+        Warn if any iterations ended in divergences or hit maxtreedepth.
+        """
+        if np.any(self._divergences) or np.any(self._max_treedepths):
+            diagnostics = ['Some chains may have failed to converge.']
+            ct_iters = self.metadata.cmdstan_config['num_samples']
+            for i in range(self.runset._chains):
+                if self._divergences[i] > 0:
+                    diagnostics.append(
+                        f'Chain {i + 1} had {self._divergences[i]} '
+                        'divergent transitions '
+                        f'({((self._divergences[i]/ct_iters)*100):.1f}%)'
+                    )
+                if self._max_treedepths[i] > 0:
+                    diagnostics.append(
+                        f'Chain {i + 1} had {self._max_treedepths[i]} '
+                        'iterations at max treedepth '
+                        f'({((self._max_treedepths[i]/ct_iters)*100):.1f}%)'
+                    )
+            diagnostics.append(
+                'Use function "diagnose()" to see further information.'
+            )
+            get_logger().warning('\n\t'.join(diagnostics))
+    
     def _assemble_draws(self) -> None:
         """
         Allocates and populates the step size, metric, and sample arrays
diff --git a/cmdstanpy/utils.py b/cmdstanpy/utils.py
@@ -654,7 +654,7 @@ def scan_sampler_csv(path: str, is_fixed_param: bool = False) -> Dict[str, Any]:
             if not is_fixed_param:
                 lineno = scan_warmup_iters(fd, dict, lineno)
                 lineno = scan_hmc_params(fd, dict, lineno)
-            lineno = scan_sampling_iters(fd, dict, lineno)
+            lineno = scan_sampling_iters(fd, dict, lineno, is_fixed_param)
         except ValueError as e:
             raise ValueError("Error in reading csv file: " + path) from e
     return dict
@@ -957,25 +957,21 @@ def scan_hmc_params(
 
 
 def scan_sampling_iters(
-    fd: TextIO, config_dict: Dict[str, Any], lineno: int
+    fd: TextIO, config_dict: Dict[str, Any], lineno: int, is_fixed_param: bool
 ) -> int:
     """
     Parse sampling iteration, save number of iterations to config_dict.
     Also save number of divergences, max_treedepth hits
     """
     draws_found = 0
     num_cols = len(config_dict['column_names'])
-    idx_divergent = None
-    idx_treedepth = None
-    max_treedepth = None
-    ct_divergences = 0
-    ct_max_treedepth = 0
-    try:
+    if not is_fixed_param:
         idx_divergent = config_dict['column_names'].index('divergent__')
         idx_treedepth = config_dict['column_names'].index('treedepth__')
         max_treedepth = config_dict['max_depth']
-    except ValueError:
-        pass
+        ct_divergences = 0
+        ct_max_treedepth = 0
+
     cur_pos = fd.tell()
     line = fd.readline().strip()
     while len(line) > 0 and not line.startswith('#'):
@@ -991,17 +987,18 @@ def scan_sampling_iters(
                 'Try clearing up TEMP or setting output_dir to a path'
                 ' on another drive.',
             )
-        if max_treedepth:
+        cur_pos = fd.tell()
+        line = fd.readline().strip()
+        if not is_fixed_param:
             ct_divergences += int(data[idx_divergent])  # type: ignore
             if int(data[idx_treedepth]) == max_treedepth:  # type: ignore
                 ct_max_treedepth += 1
-        cur_pos = fd.tell()
-        line = fd.readline().strip()
+
+    fd.seek(cur_pos)
     config_dict['draws_sampling'] = draws_found
-    if max_treedepth:
+    if not is_fixed_param:
         config_dict['ct_divergences'] = ct_divergences
         config_dict['ct_max_treedepth'] = ct_max_treedepth
-    fd.seek(cur_pos)
     return lineno
 
 
diff --git a/test/test_sample.py b/test/test_sample.py
@@ -14,7 +14,7 @@
 from time import time
 
 import numpy as np
-from testfixtures import LogCapture
+from testfixtures import LogCapture, StringComparison
 
 try:
     import ujson as json
@@ -488,6 +488,8 @@ def test_fixed_param_good(self):
         self.assertEqual(datagen_fit.metric_type, None)
         self.assertEqual(datagen_fit.metric, None)
         self.assertEqual(datagen_fit.step_size, None)
+        self.assertEqual(datagen_fit.divergences, None)
+        self.assertEqual(datagen_fit.max_treedepths, None)
 
         for i in range(datagen_fit.runset.chains):
             csv_file = datagen_fit.runset.csv_files[i]
@@ -1805,13 +1807,12 @@ def test_diagnostics(self):
             fit = model.sample(
                 data=rdata,
                 seed=55157,
-                show_progress=False,
-                show_console=False,
-            )
-            msg = log.actual()[-1][-1]
-            self.assertTrue(
-                msg.startswith('Some chains may have failed to converge.')
             )
+            log.check_present((
+                'cmdstanpy',
+                'WARNING',
+                StringComparison(r'(?s).*Some chains may have failed to converge.*')
+            ))
             self.assertFalse(np.all(fit.divergences == 0))
 
         with LogCapture(level=logging.WARNING) as log:
@@ -1821,8 +1822,11 @@ def test_diagnostics(self):
                 seed=40508,
                 max_treedepth=3,
             )
-            msg = log.actual()[-1][-1]
-            self.assertTrue('max treedepth' in msg)
+            log.check_present((
+                'cmdstanpy',
+                'WARNING',
+                StringComparison(r'(?s).*max treedepth*')
+            ))
             self.assertFalse(np.all(fit.max_treedepths == 0))
 
         stan = os.path.join(DATAFILES_PATH, 'bernoulli.stan')