changes per code review

mitzimorris · mitzimorris · commit 2b1ff634e9c9 · 2021-11-10T19:50:23.000-05:00
diff --git a/cmdstanpy/model.py b/cmdstanpy/model.py
@@ -434,8 +434,8 @@ def exe_info(self) -> Dict[str, str]:
         """
         Run model with option 'info'. Parse output statements, which all
         have form 'key = value' into a Dict.
-        If exe file compiled with CmdStan < 2.27, calling model with
-        option 'info'  fail and method returns None.
+        If exe file compiled with CmdStan < 2.27, option 'info' isn't
+        available and the method returns an empty dictionary.
         """
         result: Dict[str, Any] = {}
         if self.exe_file is None:
@@ -941,19 +941,22 @@ def sample(
             parallel_procs = parallel_chains
             num_threads = threads_per_chain
             one_process_per_chain = True
+            assert isinstance(self.exe_file, str)  # make typechecker happy
+            info_dict = self.exe_info()
+            stan_threads = info_dict.get('STAN_THREADS')
+            if stan_threads is not None:
+                stan_threads = stan_threads.lower()
             if (
                 force_one_process_per_chain is None
-                and not cmdstan_version_before(2, 28)
+                and not cmdstan_version_before(2, 28, info_dict)
+                and stan_threads == 'true'
             ):
-                assert isinstance(self.exe_file, str)  # make typechecker happy
-                info_dict = self.exe_info()
-                if info_dict.get('STAN_THREADS') == 'true':
-                    one_process_per_chain = False
-                    num_threads = parallel_chains * num_threads
-                    parallel_procs = 1
+                one_process_per_chain = False
+                num_threads = parallel_chains * num_threads
+                parallel_procs = 1
             elif (
                 force_one_process_per_chain is False
-                and cmdstan_version_before(2, 28)
+                and cmdstan_version_before(2, 28, info_dict)
             ):
                 get_logger().warning(
                     'Installed version of CmdStan cannot multi-process chains, '
@@ -985,9 +988,7 @@ def sample(
                 iter_total = iter_total // refresh + 2
 
                 progress_hook = self._wrap_sampler_progress_hook(
-                    one_process_per_chain=one_process_per_chain,
-                    chains=chains,
-                    offset=chain_ids[0],
+                    chain_ids=chain_ids,
                     total=iter_total,
                 )
             runset = RunSet(
@@ -1007,7 +1008,9 @@ def sample(
                         show_console=show_console,
                         progress_hook=progress_hook,
                     )
-            if show_progress:
+            if show_progress and progress_hook is not None:
+                progress_hook("Done", -1)  # -1 == all chains finished
+
                 # advance terminal window cursor past progress bars
                 term_size: os.terminal_size = shutil.get_terminal_size(
                     fallback=(80, 24)
@@ -1381,6 +1384,7 @@ def _run_cmdstan(
         Args 'show_progress' and 'show_console' allow use of progress bar,
         streaming output to console, respectively.
         """
+        get_logger().debug('idx %d', idx)
         get_logger().debug(
             'running CmdStan, num_threads: %s',
             str(os.environ.get('STAN_NUM_THREADS')),
@@ -1389,8 +1393,8 @@ def _run_cmdstan(
         logger_prefix = 'CmdStan'
         console_prefix = ''
         if runset.one_process_per_chain:
-            logger_prefix = 'Chain [{}]'.format(idx + 1)
-            console_prefix = 'Chain [{}] '.format(idx + 1)
+            logger_prefix = 'Chain [{}]'.format(idx + runset.chain_ids[0])
+            console_prefix = 'Chain [{}] '.format(idx + runset.chain_ids[0])
 
         cmd = runset.cmd(idx)
         get_logger().debug('CmdStan args: %s', cmd)
@@ -1418,10 +1422,10 @@ def _run_cmdstan(
                     elif progress_hook is not None:
                         progress_hook(line, idx)
 
-            if progress_hook is not None and proc.returncode == 0:
-                progress_hook("Done", idx)
-
             stdout, _ = proc.communicate()
+            retcode = proc.returncode
+            runset._set_retcode(idx, retcode)
+
             if stdout:
                 fd_out.write(stdout)
                 if show_console:
@@ -1434,15 +1438,15 @@ def _run_cmdstan(
             raise RuntimeError(msg) from e
         finally:
             fd_out.close()
+
         if not show_progress:
             get_logger().info('%s done processing', logger_prefix)
 
-        runset._set_retcode(idx, proc.returncode)
-        if proc.returncode != 0:
-            retcode_summary = returncode_msg(proc.returncode)
+        if retcode != 0:
+            retcode_summary = returncode_msg(retcode)
             serror = ''
             try:
-                serror = os.strerror(proc.returncode)
+                serror = os.strerror(retcode)
             except (ArithmeticError, ValueError):
                 pass
             get_logger().error(
@@ -1462,9 +1466,7 @@ def _run_cmdstan(
     @staticmethod
     @progbar.wrap_callback
     def _wrap_sampler_progress_hook(
-        one_process_per_chain: bool,
-        chains: int,
-        offset: int,
+        chain_ids: List[int],
         total: int,
     ) -> Optional[Callable[[str, int], None]]:
         """
@@ -1473,39 +1475,34 @@ def _wrap_sampler_progress_hook(
         process, "Chain [id] Iteration" for multi-chain processing.
         For the latter, manage array of pbars, update accordingly.
         """
-        do_match = chains > 1 and not one_process_per_chain
         pat = re.compile(r'Chain \[(\d*)\] (Iteration.*)')
-
-        pbars: List[tqdm] = [
-            tqdm(
+        pbars: Dict[int, tqdm] = {
+            chain_id: tqdm(
                 total=total,
                 bar_format="{desc} |{bar}| {elapsed} {postfix[0][value]}",
                 postfix=[dict(value="Status")],
-                desc=f'chain {offset + i}',
+                desc=f'chain {chain_id}',
                 colour='yellow',
             )
-            for i in range(chains)
-        ]
+            for chain_id in chain_ids
+        }
 
         def progress_hook(line: str, idx: int) -> None:
             if line == "Done":
-                for i in range(chains):
-                    pbars[i].postfix[0]["value"] = 'Sampling completed'
-                    pbars[i].update(total - pbars[i].n)
-                    pbars[i].close()
+                for pbar in pbars.values():
+                    pbar.postfix[0]["value"] = 'Sampling completed'
+                    pbar.update(total - pbar.n)
+                    pbar.close()
             else:
-                if do_match:
-                    match = pat.match(line)
-                    if match:
-                        idx = int(match.group(1)) - offset
-                        mline = match.group(2).strip()
-                    else:
-                        return
+                match = pat.match(line)
+                if match:
+                    idx = int(match.group(1))
+                    mline = match.group(2).strip()
+                elif line.startswith("Iteration"):
+                    mline = line
+                    idx = chain_ids[idx]
                 else:
-                    if line.startswith("Iteration"):
-                        mline = line
-                    else:
-                        return
+                    return
                 if 'Sampling' in mline:
                     pbars[idx].colour = 'blue'
                 pbars[idx].update(1)
diff --git a/cmdstanpy/utils.py b/cmdstanpy/utils.py
@@ -225,7 +225,9 @@ def cmdstan_version() -> Optional[Tuple[int, ...]]:
     return tuple(int(x) for x in splits[0:2])
 
 
-def cmdstan_version_before(major: int, minor: int) -> bool:
+def cmdstan_version_before(
+    major: int, minor: int, info: Optional[Dict[str, str]] = None
+) -> bool:
     """
     Check that CmdStan version is less than Major.minor version.
 
@@ -234,7 +236,18 @@ def cmdstan_version_before(major: int, minor: int) -> bool:
 
     :return: True if version at or above major.minor, else False.
     """
-    cur_version = cmdstan_version()
+    cur_version = None
+    if info is None:
+        cur_version = cmdstan_version()
+    else:
+        if (
+            info['stan_version_major'] is not None
+            and info['stan_version_minor'] is not None
+        ):
+            cur_version = (
+                int(info['stan_version_major']),
+                int(info['stan_version_minor']),
+            )
     if cur_version is None:
         get_logger().info(
             'Cannot determine whether version is before %d.%d.', major, minor
diff --git a/docsrc/installation.rst b/docsrc/installation.rst
@@ -144,7 +144,7 @@ can be used to override these defaults:
 
 .. code-block:: bash
 
-    install_cmdstan -d my_local_cmdstan -v 2.20.0
+    install_cmdstan -d my_local_cmdstan -v 2.27.0
     ls -F my_local_cmdstan
 
 DIY Installation
diff --git a/docsrc/overview.rst b/docsrc/overview.rst
@@ -9,16 +9,20 @@ It wraps the
 command line interface in a small set of
 Python classes which provide methods to do analysis and manage the resulting
 set of model, data, and posterior estimates.
-
-CmdStanPy is a lightweight interface in that it is designed to use minimal
-memory beyond what is used by CmdStan itself to do inference given
-and model and data.It runs and records an analysis, but the user chooses
-whether or not to instantiate the results in memory,
-thus CmdStanPy has the potential to fit more complex models
+It is lightweight in that it uses minimal
+memory beyond the memory used by CmdStan.
+CmdStanPy runs CmdStan, but only instantiates the resulting inference
+objects in memory upon request.
+Thus CmdStanPy has the potential to fit more complex models
 to larger datasets than might be possible in PyStan or RStan.
-It manages the set of CmdStan input and output files and provides
-methods and options which allow the user to save these files
-to a specific filepath.
-By default, CmdStan output files are written to a temporary directory
-in order to avoid filling up the user's filesystem.
+
+CmdStan is a file-based interface.
+CmdStanPy manages the Stan program files and the CmdStan output files.
+The user can specify the output directory for the CmdStan outputs,
+otherwise the files will be written to a 
+temporary filesystem which persists throughout the session.
+This allows the user to test and develop models prospectively,
+following the Bayesian workflow.
+
+
 
diff --git a/test/test_sample.py b/test/test_sample.py
@@ -680,7 +680,7 @@ def test_show_progress(self, stanfile='bernoulli.stan'):
             bern_model.sample(
                 data=jdata,
                 chains=2,
-                chain_ids=[6,7],
+                chain_ids=[6, 7],
                 iter_warmup=100,
                 iter_sampling=100,
                 force_one_process_per_chain=True,