From 8798a8f38be817cb1d23d70b16b2508e3d4dd5c8 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 18:31:28 +0200 Subject: [PATCH 01/33] Add bayesian-resume-and-mcmc-sidecar ADR (accepted) --- .../bayesian-resume-and-mcmc-sidecar.md | 189 ++++++++++++++++++ docs/dev/adrs/index.md | 1 + 2 files changed, 190 insertions(+) create mode 100644 docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md diff --git a/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md new file mode 100644 index 000000000..82761167d --- /dev/null +++ b/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md @@ -0,0 +1,189 @@ +# ADR: Bayesian Resume (DREAM) and MCMC Sidecar Naming + +## Status + +Accepted. + +## Date + +2026-06-15 + +## Group + +Analysis and fitting. + +## Context + +EasyDiffraction supports two Bayesian MCMC minimizers: **emcee** and +**bumps DREAM**. Resume (continue/extend a previously sampled chain +across sessions) is implemented for **emcee only**: + +- `MinimizerFitOptions.resume` / `extra_steps` and the matching + `FitterFitOptions` already exist and are engine-agnostic. +- `MinimizerBase.fit()` raises `NotImplementedError("…does not support + resume")`; `EmceeMinimizer` overrides `fit()` to implement it. +- emcee persists its raw chain **live during sampling** via + `emcee.backends.HDFBackend(name='emcee_chain')` into the project's + `analysis/results.h5` sidecar. Resume reads the last state from that + HDF5 group and runs `extra_steps` more iterations. +- `BumpsDreamMinimizer` runs `FitDriver.fit()`, captures + `driver.fitter.state` (a bumps `MCMCDraw`), but **discards** it. Only + the *derived* posterior arrays reach the sidecar via + `write_analysis_results_sidecar()`. The raw sampler state is never + persisted, so there is nothing to resume from. + +bumps DREAM **does** support resume — `FitDriver.fit(fit_state=…)` plus +`bumps.dream.state.save_state`/`load_state` (gzipped `.mc` text files) or +`DreamFit.h5dump`/`h5load` (HDF5). The capability is unused because the +caller must persist the state explicitly; emcee only looks "automatic" +because its backend streams to disk during the run. + +`easyscience/core` PR #257 ("Bayesian extend/resume") is a reference +implementation for the DREAM-side mechanics: it surfaces the `MCMCDraw` +state in the result, accepts a `resume_state`, recovers the population +scale factor from `state.Npop`, validates parameter count/order, deep- +copies the state before fitting (bumps mutates it in place), and +documents the **ring-buffer contract** — DREAM keeps only the last +`samples` draws, so extending an M-draw chain by N means +`samples = M + N, burn = 0`. + +Separately, the sidecar filename `results.h5` is misleading: the file +only ever holds MCMC/Bayesian content (posterior arrays, distribution +and pair caches, posterior-predictive sets, and emcee's raw chain) and +is created only for Bayesian minimizers — deterministic least-squares +results live in CIF, not here. + +Two accepted ADRs currently fix the sidecar name and the +one-file rule: + +- [`analysis-cif-fit-state.md`](../accepted/analysis-cif-fit-state.md) + — "The sidecar filename is fixed to `results.h5`". +- [`minimizer-category-consolidation.md`](../accepted/minimizer-category-consolidation.md) + — "There is exactly **one** sidecar file per fit, regardless of + minimizer: `analysis/results.h5`". + +## Decision + +### 1. Extend resume to bumps DREAM, consistent with emcee + +`BumpsDreamMinimizer` gains resume parity with `EmceeMinimizer` behind +the existing engine-agnostic API: `analysis.fit(resume=True, +extra_steps=N)`. The owner-level surface and `MinimizerFitOptions` +do not change. Internally: + +- `BumpsDreamMinimizer` overrides `fit()` (like emcee) instead of + inheriting the `NotImplementedError` guard. +- On a fresh run the resumable `MCMCDraw` state is **captured** and + written to the sidecar at `project.save()`. +- On resume the state is loaded, deep-copied (bumps mutates it in + place), validated against the current model, and passed as + `FitDriver.fit(fit_state=…)`. +- The unified `extra_steps=N` is translated to DREAM's ring-buffer + semantics: request `samples = current_draws + N` with `burn = 0`, + recovering the population scale factor from `state.Npop` so the + population is unchanged. emcee keeps its existing append semantics; + the user-facing contract (`resume=True, extra_steps=N` adds N) is the + same for both engines. + +Resume validation mirrors emcee's `_validate_resume` but for DREAM +quirks: matching fitted-parameter count and population, and — because we +control persistence — **name-based** parameter validation (we store our +parameter names alongside the state, avoiding core's positional-only +fallback). + +The DREAM minimizer also gains a user-facing **`chains` alias** for the +bumps `population` setting (an approved API addition): `chains` is the +discoverable name, `population` is accepted for parity with bumps, and +supplying both with different values raises. The documentation states +that `population` is a *scale factor* — bumps creates +`ceil(population · n_parameters)` chains. + +### 2. Persist resumable raw sampler state per engine, in one sidecar + +Both engines write their raw, resumable state into a **single** sidecar +file, distinguished by an **engine-keyed HDF5 group**: + +- emcee: the existing `emcee_chain` group (unchanged mechanics). +- DREAM: a new top-level **`dream_state`** group containing the + `MCMCDraw` written by `DreamFit.h5dump(group, state)`, plus a + **`param_names`** dataset (the fitted-parameter names, in order) for + name-based resume validation. This layout is fixed by this ADR, not + deferred to implementation. + +The derived posterior arrays (`/posterior/*`, caches, predictive sets) +continue to be written by `write_analysis_results_sidecar()` as today. + +**State lifecycle (one sidecar, several engines).** A *fresh* +(non-resume) fit clears **all** raw sampler-state groups (both +`emcee_chain` and `dream_state`) before writing — consistent with the +existing rule that `analysis.fit()` truncates the sidecar (see +`minimizer-category-consolidation.md` §4). Clearing *every* group, not +just the active engine's, is what prevents the stale-state trap: an +emcee fit, then a fresh DREAM fit, then `emcee resume=True` must **not** +resume the original emcee chain. Resume detection and resume then read +**only** the group matching the active minimizer +(`analysis.minimizer.type`). On explicit `resume=True` a missing or +malformed active-engine group is a clear error; otherwise it is ignored +and the fit starts fresh. `undo_fit` clears the raw-state group(s) the +same way it already clears the sidecar. + +### 3. Rename the sidecar `results.h5` → `mcmc.h5` + +The sidecar is renamed to reflect its content. It remains **one file per +fit** (the consolidation ADR's invariant holds; only the name changes). +The filename is defined once as a single constant and the two duplicated +string literals in `analysis/fitting.py` and `analysis/analysis.py` are +replaced by that constant / a shared path helper. + +### 4. Amendments to accepted ADRs + +The rename touches **every** repository reference to `results.h5`, not +only the two ADRs that fix the name. On implementation, a +`git grep -n 'results\.h5'` sweep updates all source, docs, tests, and +tracked fixtures (excluding generated/transient outputs). Concretely +this currently spans: + +- Accepted ADRs: `analysis-cif-fit-state.md` (filename `results.h5` → + `mcmc.h5`), `minimizer-category-consolidation.md` (filename + the + per-engine-state-groups clarification above), `undo-fit.md`, + `minimizer-input-output-split.md`, `runtime-fit-results.md`, + `edstar-project-persistence.md`, and the `docs/dev/adrs/index.md` rows. +- Suggestion ADR `fit-output-files-and-data-exports.md`. +- User docs: `docs/docs/cli/index.md`, + `docs/docs/user-guide/{concept,data-format}.md`, + `docs/docs/user-guide/analysis-workflow/{analysis,project}.md`. +- Source: `io/results_sidecar.py`, `analysis/fitting.py`, + `analysis/analysis.py`, `__main__.py`. +- Tests referencing the literal, and any tracked project fixtures. + +## Consequences + +- Resume/extend works identically for emcee and DREAM from the user's + point of view; the long-running Bayesian tutorials gain a DREAM resume + page mirroring the emcee one. +- The sidecar name communicates intent (`mcmc.h5`), and the filename + lives in exactly one place. +- The project is in beta (no legacy shims): committed project fixtures, + tutorials, and any test referencing `results.h5` are regenerated / + updated to `mcmc.h5`; old saved projects must be re-saved. +- No new third-party dependency: `bumps` is already a dependency and its + `MCMCDraw`/`h5dump`/`h5load` API is used directly. +- DREAM resume persists a second representation of the chain (raw state) + alongside the derived posterior; the sidecar grows modestly. + +## Alternatives Considered + +- **Per-engine filenames (`mcmc_emcee.h5`, `mcmc_bumps-dream.h5`).** + Rejected: forces the load/undo/clear paths to resolve "which file" + from the active minimizer and breaks the one-sidecar invariant for + little gain over engine-keyed groups in one file. +- **Persist DREAM state as bumps `.mc.gz` text triples (as + `easyscience/core` does).** Rejected: three extra files per fit and a + dependence on bumps' text parser, which has a known 1.0.4 regression + (`load_state` collapses single-row buffers to 1-D). `DreamFit.h5dump` + into the existing sidecar avoids both. +- **Keep the `results.h5` name.** Rejected: misleading; the file is + MCMC-only. +- **In-memory-only resume (no disk persistence).** Rejected: resume must + survive `project.save()`/load across sessions, which is the whole + point. diff --git a/docs/dev/adrs/index.md b/docs/dev/adrs/index.md index 33f1f9d76..c1c5ced05 100644 --- a/docs/dev/adrs/index.md +++ b/docs/dev/adrs/index.md @@ -24,6 +24,7 @@ folders. | Analysis and fitting | Accepted | Minimizer Input/Output Split | Keeps `analysis.minimizer` input-only and moves scalar fit outputs to paired `analysis.fit_result` classes. | [`minimizer-input-output-split.md`](accepted/minimizer-input-output-split.md) | | Analysis and fitting | Superseded | Parameter-Level Posterior Projection | Superseded by minimizer-category consolidation; kept as historical context for `parameter.posterior`. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | | Analysis and fitting | Accepted | Undo Fit | Builds rollback semantics and CLI behavior on already-persisted pre-fit scalar snapshots. | [`undo-fit.md`](accepted/undo-fit.md) | +| Analysis and fitting | Accepted | Bayesian Resume and MCMC Sidecar Naming | Extends bumps-DREAM with resume/extend like emcee and renames the MCMC sidecar to `mcmc.h5` with per-engine state groups. | [`bayesian-resume-and-mcmc-sidecar.md`](accepted/bayesian-resume-and-mcmc-sidecar.md) | | Core model | Accepted | Category Owners and Real Datablocks | Introduces `CategoryOwner` so singleton sections do not pretend to be real CIF datablocks. | [`category-owner-sections.md`](accepted/category-owner-sections.md) | | Core model | Accepted | Enum-Backed Closed Value Sets | Requires finite option sets to use `(str, Enum)` classes for validation and dispatch. | [`enum-backed-closed-values.md`](accepted/enum-backed-closed-values.md) | | Core model | Accepted | Guarded Public Properties | Uses property setters as the public writability contract for guarded objects. | [`guarded-public-properties.md`](accepted/guarded-public-properties.md) | From 2bf61fb3987908016565792b22a4caabae8d8b19 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 18:31:28 +0200 Subject: [PATCH 02/33] Add bayesian-resume-and-mcmc-sidecar implementation plan --- .../plans/bayesian-resume-and-mcmc-sidecar.md | 212 ++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md diff --git a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md new file mode 100644 index 000000000..1049320db --- /dev/null +++ b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md @@ -0,0 +1,212 @@ +# Implementation Plan: Bayesian Resume (DREAM) and MCMC Sidecar Naming + +This plan follows the conventions in [`AGENTS.md`](../../../AGENTS.md). +No deliberate exceptions are taken. + +## ADR + +Implements +[`bayesian-resume-and-mcmc-sidecar.md`](../adrs/accepted/bayesian-resume-and-mcmc-sidecar.md) +(Status: Accepted; promoted from `suggestions/` in Phase A). This plan +**owns** that ADR. + +Accepted ADRs this work **substantively amends** — the filename and the +single-sidecar/per-engine-groups wording: + +- [`analysis-cif-fit-state.md`](../adrs/accepted/analysis-cif-fit-state.md) +- [`minimizer-category-consolidation.md`](../adrs/accepted/minimizer-category-consolidation.md) + +The `results.h5` → `mcmc.h5` rename additionally updates **plain +references** in other accepted ADRs, the ADR index, the +`fit-output-files-and-data-exports` suggestion, user/CLI docs, and tests +— the full set is enumerated in ADR §4 and under *Concrete files* / P1.1. + +Accepted ADRs this work must **respect**: + +- [`minimizer-input-output-split.md`](../adrs/accepted/minimizer-input-output-split.md) + — `analysis.minimizer` (input) / `analysis.fit_result` (output) pairing. +- [`switchable-category-owned-selectors.md`](../adrs/accepted/switchable-category-owned-selectors.md) + — the `minimizer.type` selector surface. +- [`undo-fit.md`](../adrs/accepted/undo-fit.md) — undo clears the sidecar. + +## Branch and PR + +Flat-slug branch `bayesian-resume-and-mcmc-sidecar` off `develop` +(created by `/draft-impl-1` setup). PR targets `develop`. + +## Reference implementation + +`easyscience/core` PR #257 ("Bayesian extend/resume", branch +`bayesian_extend`) is the blueprint for the DREAM sampler mechanics: +`src/easyscience/fitting/minimizers/minimizer_bumps.py` — +`mcmc_sample(resume_state=…)`, `_resolve_population_alias`, +`save_sampler_state`/`load_sampler_state`, and the ring-buffer docstring. + +## Decisions (already made) + +- One sidecar per fit, **renamed `results.h5` → `mcmc.h5`**; filename in + one constant + a shared path helper (no duplicated literals). +- Per-engine **HDF5 groups** hold resumable raw state: emcee's existing + `emcee_chain`, plus a new DREAM state group (`DreamFit.h5dump` + + stored fitted-parameter names). +- Unified API unchanged: `analysis.fit(resume=True, extra_steps=N)`. + DREAM translates `extra_steps=N` to `samples = current + N, burn = 0` + (ring-buffer extend); population scale recovered from `state.Npop`; + state deep-copied before fitting. +- DREAM resume validates parameter count, population, and **names** + (we persist names, so no positional-only fallback). +- Beta project: no legacy shim; regenerate fixtures/tutorials/tests that + reference `results.h5`. + +## Resolved decisions (no open questions blocking `/draft-impl-1`) + +1. **DREAM state layout is fixed in the ADR** — a top-level `dream_state` + HDF5 group holding the `MCMCDraw` (`DreamFit.h5dump`) plus a + `param_names` dataset. Not deferred; P1.2/P1.3 implement exactly this. +2. **The `chains` alias is included** as an approved user-facing API + addition (ADR §1). P1.5 stays in scope; it is not optional. +3. **Unified `extra_steps` semantics** — emcee appends, DREAM extends its + ring buffer (`samples = current + N`, `burn = 0`); both yield the same + "added N draws". The Phase 2 cross-engine parity test enforces this. + +## Concrete files likely to change + +- `src/easydiffraction/io/results_sidecar.py` (filename constant, path + helper; DREAM state group read/write). +- `src/easydiffraction/analysis/minimizers/bumps_dream.py` (capture + state, `_sidecar_path`, `fit()` override, resume load/validate/extend, + `chains` alias). +- `src/easydiffraction/analysis/minimizers/emcee.py` (only if shared + resume/detection helpers are factored out; otherwise untouched). +- `src/easydiffraction/analysis/minimizers/base.py` (any shared resume + validation/detection helper). +- `src/easydiffraction/analysis/fitting.py`, + `src/easydiffraction/analysis/analysis.py` (filename literal → helper; + DREAM resume-detection alongside `_has_resumable_emcee_sidecar`). +- `src/easydiffraction/__main__.py` (CLI messages naming the sidecar). +- **Rename sweep — every tracked `results.h5` reference** (from + `git grep -n 'results\.h5'`), excluding generated/transient outputs: + - Accepted ADRs: `analysis-cif-fit-state.md`, + `minimizer-category-consolidation.md`, `undo-fit.md`, + `minimizer-input-output-split.md`, `runtime-fit-results.md`, + `edstar-project-persistence.md`, and `docs/dev/adrs/index.md` rows. + - Suggestion ADR: `fit-output-files-and-data-exports.md`. + - User docs: `docs/docs/cli/index.md`, + `docs/docs/user-guide/{concept,data-format}.md`, + `docs/docs/user-guide/analysis-workflow/{analysis,project}.md`. + - Tests: `tests/unit/easydiffraction/io/test_results_sidecar*.py`, + `analysis/test_analysis_coverage.py`, + `analysis/test_fitting_coverage.py`, + `analysis/minimizers/test_emcee.py`, + `test___main__*.py`, `tests/integration/fitting/test_emcee.py`, + `test_bayesian_dream.py`, and any tracked project fixtures. +- DREAM resume tutorial + its registration artifacts: + `docs/docs/tutorials/bayesian-dream-resume-*.py` (+ regenerated + `.ipynb`), `docs/docs/tutorials/index.md`, + `docs/docs/tutorials/index.json`, `tests/tutorials/baseline.json`, + `docs/mkdocs.yml` nav, and `docs/docs/verification/ci_skip.txt` if the + page is heavy. (The emcee resume page appears in index.md, index.json, + and baseline.json — the new page must too.) +- Unit tests under `tests/unit/easydiffraction/analysis/minimizers/` and + `io/`; integration test under `tests/integration/fitting/`. + +## Implementation steps (Phase 1) + +**Commit discipline (required of any AI agent following this plan).** +Each step below is one atomic change. Complete the step, edit its +`- [ ]` checkbox to `- [x]`, stage **only** that step's files with +explicit paths (per [`AGENTS.md`](../../../AGENTS.md) §Commits — no +`git add -A`, no unrelated dirty files), and make the local commit with +the step's `Commit:` message **before** starting the next step or the +Phase 1 review gate. Do not batch multiple steps into one commit. + +- [ ] **P1.1 — Rename sidecar `results.h5` → `mcmc.h5`, single-source + the name, sweep all references.** Update `SIDECAR_FILE_NAME`, replace + the duplicated literals in `fitting.py` / `analysis.py` with the + constant/helper, update `__main__.py` messages. Then run + `git grep -n 'results\.h5'` and update **every** tracked reference — + the accepted ADRs (`analysis-cif-fit-state`, + `minimizer-category-consolidation` incl. the per-engine-groups + clarification, `undo-fit`, `minimizer-input-output-split`, + `runtime-fit-results`, `edstar-project-persistence`) and index rows, + the `fit-output-files-and-data-exports` suggestion, the user-guide and + CLI docs, and the tests listed in Concrete files — excluding generated + outputs. End on zero non-historical `results.h5` hits. + Commit: `Rename Bayesian sidecar to mcmc.h5 and single-source it`. +- [ ] **P1.2 — Persist the DREAM raw sampler state.** Capture the + `MCMCDraw` in `BumpsDreamMinimizer`, add `_sidecar_path` (wired by the + existing `Fitter._set_minimizer_sidecar_path`), and write a + `dream_state` HDF5 group (`DreamFit.h5dump` + `param_names`) on save. + Commit: `Persist bumps-dream sampler state to the mcmc sidecar`. +- [ ] **P1.3 — DREAM resume: load, validate, extend.** Override `fit()`; + load + deep-copy the state; validate count/population/names; translate + `extra_steps` to `samples = current + N, burn = 0`; pass `fit_state` + to the driver; add a DREAM resume-detection helper. + Commit: `Implement bumps-dream resume via saved sampler state`. +- [ ] **P1.4 — Reconcile unified resume semantics.** Ensure + `resume=True, extra_steps=N` behaves consistently for emcee and DREAM + at the `Fitter`/`analysis.fit` layer; share validation/detection + helpers where clean. Commit: `Unify emcee and dream resume semantics`. +- [ ] **P1.5 — Add `chains` alias for DREAM `population`.** User-facing + `chains` alias with conflict detection and "population = scale factor" + documentation. Commit: `Add chains alias for bumps-dream population`. +- [ ] **P1.6 — DREAM resume tutorial (+ registration).** Add + `bayesian-dream-resume-lbco-hrpt.py` mirroring the emcee resume + tutorial; `pixi run notebook-prepare`. Register it everywhere the + emcee resume page is registered: `docs/mkdocs.yml` nav, + `docs/docs/tutorials/index.md`, `docs/docs/tutorials/index.json`, and + `tests/tutorials/baseline.json`; add to `ci_skip.txt` if heavy. + Commit: `Add bumps-dream resume tutorial`. +- [ ] **P1.7 — Regenerate sidecar-referencing fixtures/tutorials.** + Re-save committed project fixtures and tutorial outputs so the sidecar + is `mcmc.h5`. Commit: `Regenerate fixtures for mcmc.h5 sidecar`. +- [ ] **P1.8 — Phase 1 review gate (no code).** Mark `[x]` and commit the + checklist update alone. Commit: `Reach Phase 1 review gate`. + +## Phase 2 — Verification + +Use the zsh-safe capture pattern when saving output: + +```bash +pixi run fix > /tmp/ed-fix.log 2>&1; fix_exit_code=$?; tail -n 40 /tmp/ed-fix.log; exit $fix_exit_code +pixi run check > /tmp/ed-check.log 2>&1; check_exit_code=$?; tail -n 60 /tmp/ed-check.log; exit $check_exit_code +pixi run unit-tests > /tmp/ed-unit.log 2>&1; unit_exit_code=$?; tail -n 40 /tmp/ed-unit.log; exit $unit_exit_code +pixi run integration-tests > /tmp/ed-int.log 2>&1; int_exit_code=$?; tail -n 40 /tmp/ed-int.log; exit $int_exit_code +pixi run script-tests > /tmp/ed-script.log 2>&1; script_exit_code=$?; tail -n 40 /tmp/ed-script.log; exit $script_exit_code +``` + +New tests required: +- Unit: DREAM state round-trips through the `mcmc.h5` `dream_state` + group; resume validation rejects mismatched count/population/names; + `extra_steps` → `samples=current+N` translation; `chains`/`population` + alias conflict. +- Unit — raw-state lifecycle (one sidecar, several engines): + - a fresh (non-resume) fit clears **all** raw sampler-state groups + (every engine), so no prior chain survives — including the + emcee→fresh-DREAM→emcee-`resume=True` path, which must **not** resume + the original emcee chain; + - resume detection and resume read **only** the active minimizer's + group; + - explicit `resume=True` with a missing or malformed `dream_state` + group raises a clear error; without `resume`, it is ignored and the + fit starts fresh; + - `undo_fit` clears the raw-state group(s). +- Integration: a small DREAM fit, `project.save()`, reload, + `fit(resume=True, extra_steps=…)`, assert the chain grew by the + expected number of draws and parity with a single longer run (mirror + `test_emcee_resume_matches_small_dream_posterior`). +- Confirm `pixi run check` (link-check) passes after the tutorial/nav + and ADR edits. + +## Suggested Pull Request + +**Title:** Resume and extend Bayesian (bumps-DREAM) refinements; clearer +MCMC results file + +**Description:** You can now pause a Bayesian analysis run with the +bumps-DREAM sampler and later continue it for more steps — exactly as +already works for emcee — without losing the samples collected so far. +Saved sampling state is stored in the project's analysis folder in a +file now named `mcmc.h5` (previously `results.h5`), which better +reflects that it holds MCMC sampling output. Reloading a saved project +and asking for more steps simply extends the existing chains. From b695b00b782549db7719963059bfa36c8791f5a2 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 18:35:31 +0200 Subject: [PATCH 03/33] Rename Bayesian sidecar to mcmc.h5 and single-source it --- .../adrs/accepted/analysis-cif-fit-state.md | 8 ++--- .../accepted/edstar-project-persistence.md | 4 +-- .../minimizer-category-consolidation.md | 35 ++++++++++++------- .../accepted/minimizer-input-output-split.md | 2 +- docs/dev/adrs/accepted/runtime-fit-results.md | 2 +- docs/dev/adrs/accepted/undo-fit.md | 15 ++++---- docs/dev/adrs/index.md | 4 +-- .../fit-output-files-and-data-exports.md | 16 ++++----- .../plans/bayesian-resume-and-mcmc-sidecar.md | 2 +- docs/dev/plans/edstar-project-persistence.md | 4 +-- docs/docs/cli/index.md | 2 +- .../user-guide/analysis-workflow/analysis.md | 2 +- .../user-guide/analysis-workflow/project.md | 4 +-- docs/docs/user-guide/concept.md | 2 +- docs/docs/user-guide/data-format.md | 2 +- src/easydiffraction/__main__.py | 4 +-- src/easydiffraction/analysis/analysis.py | 4 ++- src/easydiffraction/analysis/fitting.py | 6 +++- src/easydiffraction/io/results_sidecar.py | 6 ++-- .../fitting/test_bayesian_dream.py | 2 +- tests/integration/fitting/test_emcee.py | 2 +- .../analysis/minimizers/test_emcee.py | 2 +- .../analysis/test_analysis_coverage.py | 6 ++-- .../analysis/test_fitting_coverage.py | 2 +- .../io/test_results_sidecar.py | 8 ++--- .../io/test_results_sidecar_coverage.py | 14 ++++---- tests/unit/easydiffraction/test___main__.py | 2 +- .../easydiffraction/test___main___coverage.py | 4 +-- 28 files changed, 91 insertions(+), 75 deletions(-) diff --git a/docs/dev/adrs/accepted/analysis-cif-fit-state.md b/docs/dev/adrs/accepted/analysis-cif-fit-state.md index 7aca93d82..a158ce25a 100644 --- a/docs/dev/adrs/accepted/analysis-cif-fit-state.md +++ b/docs/dev/adrs/accepted/analysis-cif-fit-state.md @@ -29,7 +29,7 @@ Analysis-owned fit state needs to persist: - deterministic correlation summaries - minimizer-specific fit outputs on the paired `_fit_result.*` category - per-parameter posterior summaries on `_fit_parameter` -- large posterior arrays and plot caches in `analysis/results.h5` +- large posterior arrays and plot caches in `analysis/mcmc.h5` Committed model parameter values and uncertainties already persist in structure and experiment CIF files through the accepted free-flag CIF @@ -44,7 +44,7 @@ projection. This ADR defines that narrower saved projection. Persist analysis-owned fit state as explicit analysis categories in `analysis/analysis.cif`, with large posterior arrays stored in -`analysis/results.h5`. +`analysis/mcmc.h5`. Do not add a dedicated `_fit_state` category or `_fit_state.schema_version`. Persisted fit state is detected from @@ -235,12 +235,12 @@ metadata from `_minimizer.*`. ### Posterior sidecar -Persist large posterior arrays in `analysis/results.h5` using `h5py`. +Persist large posterior arrays in `analysis/mcmc.h5` using `h5py`. This includes canonical posterior arrays and saved distribution, pair, and predictive cache arrays. The HDF5 file is self-describing; no CIF manifest rows or sidecar filename tags are persisted. -The sidecar filename is fixed to `results.h5` inside the project +The sidecar filename is fixed to `mcmc.h5` inside the project `analysis/` directory. If the sidecar is missing on load, summary rows in diff --git a/docs/dev/adrs/accepted/edstar-project-persistence.md b/docs/dev/adrs/accepted/edstar-project-persistence.md index 634f3ef8b..129790bf2 100644 --- a/docs/dev/adrs/accepted/edstar-project-persistence.md +++ b/docs/dev/adrs/accepted/edstar-project-persistence.md @@ -97,7 +97,7 @@ project_dir/ |-- analysis/ | |-- analysis.edi | |-- results.csv -| `-- results.h5 +| `-- mcmc.h5 `-- reports/ `-- .cif ``` @@ -108,7 +108,7 @@ continue to use `_easydiffraction_*` extension categories inside report CIF. Edi governs the `*.edi` files only. Existing non-STAR analysis artifacts -keep their current formats: `analysis/results.h5` remains the binary +keep their current formats: `analysis/mcmc.h5` remains the binary fit-result sidecar, and `analysis/results.csv` remains the tabular sequential-fit output used by plotting and user inspection. diff --git a/docs/dev/adrs/accepted/minimizer-category-consolidation.md b/docs/dev/adrs/accepted/minimizer-category-consolidation.md index b0f374f19..42aba2bf2 100644 --- a/docs/dev/adrs/accepted/minimizer-category-consolidation.md +++ b/docs/dev/adrs/accepted/minimizer-category-consolidation.md @@ -137,46 +137,55 @@ populated by deterministic or Bayesian fits as appropriate. Per- parameter posterior order is the order of the `_fit_parameter` rows themselves; no separate parallel loop is needed. -### 4. Heavy posterior arrays live in `analysis/results.h5`, not in CIF +### 4. Heavy posterior arrays live in `analysis/mcmc.h5`, not in CIF Posterior chains, KDE / distribution caches, pair-plot caches, and predictive datasets are large arrays unsuited to CIF. The existing -`analysis/results.h5` sidecar absorbs all of them. The corresponding +`analysis/mcmc.h5` sidecar absorbs all of them. The corresponding manifest categories (`_bayesian_distribution_cache`, `_bayesian_pair_cache`, `_bayesian_predictive_dataset`) are removed from CIF entirely — the HDF5 file is self-describing. There is exactly **one** sidecar file per fit, regardless of minimizer: -`analysis/results.h5`. No CIF tag stores the sidecar path. The file uses +`analysis/mcmc.h5`. No CIF tag stores the sidecar path. The file uses namespaced top-level groups: ``` -analysis/results.h5 +analysis/mcmc.h5 ├── /posterior/ # canonical posterior chains, log-prob (all Bayesian samplers) ├── /distribution_cache/ # KDE / 1-D distribution plots ├── /pair_cache/ # pair-plot grids ├── /predictive/ # posterior-predictive datasets -└── /emcee_chain/ # emcee HDFBackend live state (emcee runs only) +├── /emcee_chain/ # emcee HDFBackend live state (emcee runs only) +└── /dream_state/ # bumps-DREAM MCMCDraw + param_names (dream runs only) ``` +Each engine persists its **resumable raw sampler state** in its own +top-level group (`/emcee_chain/`, `/dream_state/`); see +[`bayesian-resume-and-mcmc-sidecar.md`](bayesian-resume-and-mcmc-sidecar.md). + **Lifecycle rule: a new fit overwrites the file.** Mixing partial results from different minimizers — or from the same minimizer with different settings or a different free-parameter set — is the most common source of "stale plot" confusion. To prevent this, calling -`analysis.fit()` truncates `analysis/results.h5` (recreating it with the +`analysis.fit()` truncates `analysis/mcmc.h5` (recreating it with the new run's groups). The user is shown a `log.warn(...)` message the first time a fit is started while a populated sidecar exists, naming the file and stating that previous results will be overwritten. Resume is the only exception: `analysis.fit(resume=True, extra_steps=N)` -opens the existing file in append mode and extends the chain. Resume is -rejected with a clear error if the active minimizer does not support it, -if `results.h5` is missing, or if the stored chain's parameter set does -not match the current one. +opens the existing file in append mode and extends the chain. It is +supported by both emcee and bumps-DREAM +([`bayesian-resume-and-mcmc-sidecar.md`](bayesian-resume-and-mcmc-sidecar.md)), +each reading only its own state group. Resume is rejected with a clear +error if the active minimizer does not support it, if `mcmc.h5` is +missing, or if the stored chain's parameter set does not match the +current one. Because a fresh (non-resume) fit truncates the file, no +stale raw-state group from a previous engine can be resumed by accident. For deterministic runs the Bayesian groups are absent and the sidecar -file may not exist at all. For non-emcee Bayesian runs the -`/emcee_chain` group is absent. +file may not exist at all. Only the active engine's raw-state group is +present after a run (`/emcee_chain` for emcee, `/dream_state` for DREAM). ### 5. Unified, verbose attribute names with internal mapping @@ -377,7 +386,7 @@ _fit_result.best_log_posterior -1237.89 ``` emcee's resumable chain state lives in the `/emcee_chain` group of the -same `analysis/results.h5` file (see §4). No sidecar path appears in +same `analysis/mcmc.h5` file (see §4). No sidecar path appears in CIF. ## Superseded Selector Layout diff --git a/docs/dev/adrs/accepted/minimizer-input-output-split.md b/docs/dev/adrs/accepted/minimizer-input-output-split.md index a36bf1355..d66e4b61f 100644 --- a/docs/dev/adrs/accepted/minimizer-input-output-split.md +++ b/docs/dev/adrs/accepted/minimizer-input-output-split.md @@ -419,7 +419,7 @@ under `_minimizer.*`, the duplications with `fit_result` remain, and the ### C. Move outputs into the runtime `fit_results` object, not a CIF category Persist only settings in CIF; outputs live in `analysis.fit_results` at -runtime and `analysis/results.h5` on disk. Rejected because the small +runtime and `analysis/mcmc.h5` on disk. Rejected because the small scalar outputs (success, χ², runtime, R̂) are exactly what users want to read from CIF without unpacking HDF5, and the consolidation ADR explicitly puts them in CIF (`_minimizer.*` today). diff --git a/docs/dev/adrs/accepted/runtime-fit-results.md b/docs/dev/adrs/accepted/runtime-fit-results.md index c0cbf7b29..b3a90f1bb 100644 --- a/docs/dev/adrs/accepted/runtime-fit-results.md +++ b/docs/dev/adrs/accepted/runtime-fit-results.md @@ -33,7 +33,7 @@ persisted projection. The accepted ADRs, as amended by [`minimizer-input-output-split.md`](minimizer-input-output-split.md), define the current compact projection for fit headers, paired fit-result -outputs, parameter posterior summaries, and the `analysis/results.h5` +outputs, parameter posterior summaries, and the `analysis/mcmc.h5` sidecar. ## Consequences diff --git a/docs/dev/adrs/accepted/undo-fit.md b/docs/dev/adrs/accepted/undo-fit.md index c58e8d588..e90022b37 100644 --- a/docs/dev/adrs/accepted/undo-fit.md +++ b/docs/dev/adrs/accepted/undo-fit.md @@ -64,10 +64,11 @@ After `undo_fit()`: the anchors needed for idempotence (§6). Undo therefore leaves these rows in place; the next fit rewrites them via `_capture_fit_parameter_state()`. -- `analysis/results.h5` is cleared in memory only: the +- `analysis/mcmc.h5` is cleared in memory only: the `Analysis._persisted_fit_state_sidecar` dict is reset to empty. All canonical groups (`/posterior`, `/distribution_cache`, `/pair_cache`, - `/predictive`, plus `/emcee_chain` for emcee fits) belong to the + `/predictive`, plus the raw sampler-state group — `/emcee_chain` for + emcee or `/dream_state` for bumps-DREAM) belong to the discarded fit, so the next save writes an empty sidecar and truncates the file. This is the same truncation that runs at the start of a new fit — see @@ -169,7 +170,7 @@ This command should: - execute `project.analysis.undo_fit()` - save the recovered state back to the same project directory by default — the rewritten `analysis/analysis.cif` reflects the rolled-back - scalars and `analysis/results.h5` is truncated + scalars and `analysis/mcmc.h5` is truncated - support `--dry` to preview the rollback without writing any file. The in-memory rollback still runs (so the summary numbers are real), but `project.save()` is skipped. This mirrors the existing @@ -242,12 +243,12 @@ project.analysis.fit_results # None project.structures['lbco'].cell.length_a.value # 3.8800 (start_value) project.structures['lbco'].cell.length_a.uncertainty # 0.0000 (start_uncertainty) -# Persist the rollback to disk; analysis/results.h5 is cleared too: +# Persist the rollback to disk; analysis/mcmc.h5 is cleared too: project.save() ``` For Bayesian fits, the same call also clears `parameter.posterior` on -every fitted parameter and truncates `analysis/results.h5` (the +every fitted parameter and truncates `analysis/mcmc.h5` (the `/posterior`, `/distribution_cache`, `/pair_cache`, `/predictive`, and `/emcee_chain` groups). @@ -261,7 +262,7 @@ $ python -m easydiffraction projects/lbco_hrpt undo Undoing last fit for 'lbco_hrpt'... ✅ Restored 8 parameters to their pre-fit values. ✅ Cleared analysis.fit_results. -✅ Cleared analysis/results.h5 (Bayesian sidecar). +✅ Cleared analysis/mcmc.h5 (Bayesian sidecar). ✅ Saved project to projects/lbco_hrpt. ``` @@ -272,7 +273,7 @@ $ python -m easydiffraction projects/lbco_hrpt undo --dry Would undo last fit for 'lbco_hrpt' (dry run, no files written): - 8 parameters would be restored to pre-fit values - analysis.fit_results would be cleared - - analysis/results.h5 (Bayesian sidecar) would be cleared + - analysis/mcmc.h5 (Bayesian sidecar) would be cleared ``` No-op cases — the project has nothing to undo. All three sub-cases exit diff --git a/docs/dev/adrs/index.md b/docs/dev/adrs/index.md index c1c5ced05..18abc3c0b 100644 --- a/docs/dev/adrs/index.md +++ b/docs/dev/adrs/index.md @@ -17,9 +17,9 @@ folders. | -------------------- | ---------- | --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------- | | Analysis and fitting | Accepted | Fit Mode Categories and Fit Execution API | Splits fitting configuration from execution and defines active sibling fit-mode categories. | [`fit-mode-categories.md`](accepted/fit-mode-categories.md) | | Analysis and fitting | Accepted | Runtime Fit Results | Keeps full fit outputs runtime-only in the current design unless a narrower persistence ADR is accepted. | [`runtime-fit-results.md`](accepted/runtime-fit-results.md) | -| Analysis and fitting | Accepted | Analysis CIF Fit State | Defines the persisted fit-state projection in `analysis/analysis.cif` and `analysis/results.h5`. | [`analysis-cif-fit-state.md`](accepted/analysis-cif-fit-state.md) | +| Analysis and fitting | Accepted | Analysis CIF Fit State | Defines the persisted fit-state projection in `analysis/analysis.cif` and `analysis/mcmc.h5`. | [`analysis-cif-fit-state.md`](accepted/analysis-cif-fit-state.md) | | Analysis and fitting | Accepted | Parameter Correlation Persistence | Persists deterministic and posterior correlation summaries in `_fit_parameter_correlation` | [`parameter-correlation-persistence.md`](accepted/parameter-correlation-persistence.md) | -| Analysis and fitting | Suggestion | Fit Output Files and Data Exports | Narrows remaining archive/export questions after adopting `results.csv` and `results.h5`. | [`fit-output-files-and-data-exports.md`](suggestions/fit-output-files-and-data-exports.md) | +| Analysis and fitting | Suggestion | Fit Output Files and Data Exports | Narrows remaining archive/export questions after adopting `results.csv` and `mcmc.h5`. | [`fit-output-files-and-data-exports.md`](suggestions/fit-output-files-and-data-exports.md) | | Analysis and fitting | Accepted | Minimizer Category Consolidation | Collapses the seven Bayesian categories into one owner-level switchable `minimizer` category with HDF5 sidecar. | [`minimizer-category-consolidation.md`](accepted/minimizer-category-consolidation.md) | | Analysis and fitting | Accepted | Minimizer Input/Output Split | Keeps `analysis.minimizer` input-only and moves scalar fit outputs to paired `analysis.fit_result` classes. | [`minimizer-input-output-split.md`](accepted/minimizer-input-output-split.md) | | Analysis and fitting | Superseded | Parameter-Level Posterior Projection | Superseded by minimizer-category consolidation; kept as historical context for `parameter.posterior`. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | diff --git a/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md b/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md index 8705fe355..539279926 100644 --- a/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md +++ b/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md @@ -7,7 +7,7 @@ The current branch already adopts two pieces of this naming scheme: - sequential deterministic results stay in `analysis/results.csv` -- Bayesian arrays and plot caches use `analysis/results.h5` +- Bayesian arrays and plot caches use `analysis/mcmc.h5` Those decisions now live in [Analysis CIF Fit State](../accepted/analysis-cif-fit-state.md). This @@ -29,7 +29,7 @@ Different fit modes still produce different kinds of reusable output: covariance/correlation summaries The accepted fit-state ADR already standardizes the canonical saved fit -projection in `analysis/analysis.cif` plus `analysis/results.h5` for +projection in `analysis/analysis.cif` plus `analysis/mcmc.h5` for Bayesian sidecars. What remains open here is whether project save should also produce optional archives or user-facing export files beyond that accepted baseline. @@ -47,7 +47,7 @@ and large numerical arrays should not be embedded in The accepted baseline is: - `analysis/results.csv` for sequential deterministic fit tables -- `analysis/results.h5` for large Bayesian arrays and result-derived +- `analysis/mcmc.h5` for large Bayesian arrays and result-derived caches Any future change to those canonical filenames would need a follow-up @@ -81,11 +81,11 @@ Sequential measured input data may optionally be archived in `analysis/data.h5`, but that archive is data, not results. It must not replace `analysis/results.csv`. -### 4. Bayesian arrays use `analysis/results.h5` +### 4. Bayesian arrays use `analysis/mcmc.h5` Single Bayesian fits should store posterior samples, log posterior arrays, predictive arrays, and prepared plot caches in -`analysis/results.h5`. +`analysis/mcmc.h5`. The previous candidate name `analysis/bayesian_data.h5` remains rejected because it mixes fit type with file role and blurs result arrays with @@ -112,7 +112,7 @@ Suggested first layout: analysis/ analysis.cif results.csv # sequential deterministic only, when applicable - results.h5 # Bayesian and other structured result arrays + mcmc.h5 # Bayesian and other structured result arrays data.h5 # optional archived measured/input data exports/ _measured.csv @@ -128,7 +128,7 @@ analysis/ | single deterministic | `analysis/analysis.cif` | open question | none initially | none initially | `analysis/exports/*.csv` | | joint deterministic | `analysis/analysis.cif` | open question | none initially | none initially | `analysis/exports/*.csv` | | sequential deterministic | `analysis/analysis.cif` + `analysis/results.csv` | `analysis/results.csv` | none initially | `analysis/data.h5` | `analysis/exports/*.csv` | -| single Bayesian | `analysis/analysis.cif` + `analysis/results.h5` | optional summary export only | `analysis/results.h5` | none initially | optional summary/predictive CSV | +| single Bayesian | `analysis/analysis.cif` + `analysis/mcmc.h5` | optional summary export only | `analysis/mcmc.h5` | none initially | optional summary/predictive CSV | ## Open Questions @@ -145,7 +145,7 @@ analysis/ opt-in, automatic below a size threshold, or always disabled unless requested? - What size threshold and compression policy should control the optional - `analysis/data.h5`, and does `analysis/results.h5` need a matching + `analysis/data.h5`, and does `analysis/mcmc.h5` need a matching convention? - Should external CSV exports be regenerated from canonical CIF/HDF5 on demand rather than stored persistently? diff --git a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md index 1049320db..0be068072 100644 --- a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md @@ -120,7 +120,7 @@ explicit paths (per [`AGENTS.md`](../../../AGENTS.md) §Commits — no the step's `Commit:` message **before** starting the next step or the Phase 1 review gate. Do not batch multiple steps into one commit. -- [ ] **P1.1 — Rename sidecar `results.h5` → `mcmc.h5`, single-source +- [x] **P1.1 — Rename sidecar `results.h5` → `mcmc.h5`, single-source the name, sweep all references.** Update `SIDECAR_FILE_NAME`, replace the duplicated literals in `fitting.py` / `analysis.py` with the constant/helper, update `__main__.py` messages. Then run diff --git a/docs/dev/plans/edstar-project-persistence.md b/docs/dev/plans/edstar-project-persistence.md index 532c2cc73..a5f4803c5 100644 --- a/docs/dev/plans/edstar-project-persistence.md +++ b/docs/dev/plans/edstar-project-persistence.md @@ -37,7 +37,7 @@ to the old suggestions path. - Edi becomes the project persistence format: `project.edi`, `structures/.edi`, `experiments/.edi`, and `analysis/analysis.edi`. -- `analysis/results.csv`, `analysis/results.h5`, and +- `analysis/results.csv`, `analysis/mcmc.h5`, and `reports/.cif` keep their current locations and purposes. - Report CIF generation stays strict IUCr/pdCIF export. Regular project save/load must not treat report CIF as round-trippable project state. @@ -220,7 +220,7 @@ code/Edi/CIF reference. experiments/.edi analysis/analysis.edi analysis/results.csv - analysis/results.h5 + analysis/mcmc.h5 reports/.cif ``` diff --git a/docs/docs/cli/index.md b/docs/docs/cli/index.md index 0ad0b7f43..7c839d91e 100644 --- a/docs/docs/cli/index.md +++ b/docs/docs/cli/index.md @@ -174,7 +174,7 @@ python -m easydiffraction PROJECT_DIR undo The command restores each refined parameter to its saved pre-fit `start_value` / `start_uncertainty`, clears `analysis.fit_results`, -truncates `analysis/results.h5` (the Bayesian sidecar), and **saves the +truncates `analysis/mcmc.h5` (the Bayesian sidecar), and **saves the rolled-back state back** to the project directory by default. Use the `--dry` flag to preview the rollback **without overwriting** any diff --git a/docs/docs/user-guide/analysis-workflow/analysis.md b/docs/docs/user-guide/analysis-workflow/analysis.md index 894bfa221..82fad40ea 100644 --- a/docs/docs/user-guide/analysis-workflow/analysis.md +++ b/docs/docs/user-guide/analysis-workflow/analysis.md @@ -329,7 +329,7 @@ project.display.posterior.predictive(expt_name='hrpt') ``` When posterior or posterior-predictive arrays are persisted, they are -stored in `analysis/results.h5`. Scalar summaries remain in +stored in `analysis/mcmc.h5`. Scalar summaries remain in `analysis/analysis.edi`. ## Constraints diff --git a/docs/docs/user-guide/analysis-workflow/project.md b/docs/docs/user-guide/analysis-workflow/project.md index f9d467653..a493db7fd 100644 --- a/docs/docs/user-guide/analysis-workflow/project.md +++ b/docs/docs/user-guide/analysis-workflow/project.md @@ -86,7 +86,7 @@ The example below illustrates a typical **project structure** for a │ └── ... ├── 📁 analysis - analysis │ ├── 📄 analysis.edi - fit state -│ └── 📄 results.h5 - Bayesian arrays +│ └── 📄 mcmc.h5 - Bayesian arrays └── 📁 reports - reports ├── 📄 La0.5Ba0.5CoO3.cif - IUCr └── 📄 La0.5Ba0.5CoO3.html - HTML @@ -282,7 +282,7 @@ occ_Ba "occ_Ba = 1 - occ_La" When a Bayesian fit stores persisted posterior or predictive arrays, the -same `analysis/` directory also contains `results.h5`. +same `analysis/` directory also contains `mcmc.h5`.
diff --git a/docs/docs/user-guide/concept.md b/docs/docs/user-guide/concept.md index dbdee2f2c..3d28b3a88 100644 --- a/docs/docs/user-guide/concept.md +++ b/docs/docs/user-guide/concept.md @@ -100,7 +100,7 @@ credible intervals, correlations, posterior-predictive patterns, and diagnostics such as r-hat and effective sample size. EasyDiffraction currently exposes Bayesian workflows through `bumps (dream)` and `emcee`; their posterior and predictive arrays are stored in -`analysis/results.h5` when available. +`analysis/mcmc.h5` when available. ![](../assets/images/user-guide/data-analysis_refinement.png){ width="450", loading=lazy } diff --git a/docs/docs/user-guide/data-format.md b/docs/docs/user-guide/data-format.md index 5d797d512..5c5f556ef 100644 --- a/docs/docs/user-guide/data-format.md +++ b/docs/docs/user-guide/data-format.md @@ -221,7 +221,7 @@ EasyDiffraction saves projects as a directory of Edi files and sidecars: - `structures/.edi`: structure models - `experiments/.edi`: experiment setup and data - `analysis/analysis.edi`: fitting and analysis settings -- `analysis/results.csv` and `analysis/results.h5`: fit result sidecars +- `analysis/results.csv` and `analysis/mcmc.h5`: fit result sidecars - `reports/.*`: generated reports when enabled through `project.report` diff --git a/src/easydiffraction/__main__.py b/src/easydiffraction/__main__.py index f740e7431..937c9db6e 100644 --- a/src/easydiffraction/__main__.py +++ b/src/easydiffraction/__main__.py @@ -122,7 +122,7 @@ def _display_undo_summary( if outcome.cleared_fit_result: typer.echo(' - analysis.fit_results would be cleared') if outcome.cleared_sidecar: - typer.echo(' - analysis/results.h5 (Bayesian sidecar) would be cleared') + typer.echo(' - analysis/mcmc.h5 (Bayesian sidecar) would be cleared') return typer.echo(f"Undoing last fit for '{project_name}'...") @@ -130,7 +130,7 @@ def _display_undo_summary( if outcome.cleared_fit_result: typer.echo('✅ Cleared analysis.fit_results.') if outcome.cleared_sidecar: - typer.echo('✅ Cleared analysis/results.h5 (Bayesian sidecar).') + typer.echo('✅ Cleared analysis/mcmc.h5 (Bayesian sidecar).') project.save() typer.echo(f'✅ Saved project to {project_dir}.') diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index 3d303a5f4..b3145874f 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -1542,11 +1542,13 @@ def _resolved_resume_extra_steps(self, extra_steps: int | None) -> int: def _has_resumable_emcee_sidecar(self) -> bool: """Return whether the saved project has a resumable chain.""" + from easydiffraction.io.results_sidecar import SIDECAR_FILE_NAME # noqa: PLC0415 + project_path = self.project.metadata.path if project_path is None: return False - sidecar_path = project_path / 'analysis' / 'results.h5' + sidecar_path = project_path / 'analysis' / SIDECAR_FILE_NAME if not sidecar_path.is_file(): return False diff --git a/src/easydiffraction/analysis/fitting.py b/src/easydiffraction/analysis/fitting.py index 9888cf210..7730bd60c 100644 --- a/src/easydiffraction/analysis/fitting.py +++ b/src/easydiffraction/analysis/fitting.py @@ -301,9 +301,13 @@ def _set_minimizer_sidecar_path(self, analysis: object) -> None: if analysis is None or not hasattr(self.minimizer, '_sidecar_path'): return + from easydiffraction.io.results_sidecar import SIDECAR_FILE_NAME # noqa: PLC0415 + project_metadata = getattr(getattr(analysis, 'project', None), 'metadata', None) project_path = getattr(project_metadata, 'path', None) - sidecar_path = None if project_path is None else project_path / 'analysis' / 'results.h5' + sidecar_path = ( + None if project_path is None else project_path / 'analysis' / SIDECAR_FILE_NAME + ) self.minimizer._sidecar_path = sidecar_path def _backfill_persisted_fitting_time(self, analysis: object) -> None: diff --git a/src/easydiffraction/io/results_sidecar.py b/src/easydiffraction/io/results_sidecar.py index 920bc947b..c76059c55 100644 --- a/src/easydiffraction/io/results_sidecar.py +++ b/src/easydiffraction/io/results_sidecar.py @@ -15,7 +15,7 @@ from pathlib import Path SidecarPayload = dict[str, dict[str, object]] -SIDECAR_FILE_NAME = 'results.h5' +SIDECAR_FILE_NAME = 'mcmc.h5' _POSTERIOR_PARAMETER_SAMPLES_PATH = '/posterior/parameter_samples' _POSTERIOR_LOG_POSTERIOR_PATH = '/posterior/log_posterior' _POSTERIOR_DRAW_INDEX_PATH = '/posterior/draw_index' @@ -310,7 +310,7 @@ def write_analysis_results_sidecar( analysis_dir: Path, ) -> None: """ - Write persisted Bayesian arrays to ``analysis/results.h5``. + Write persisted Bayesian arrays to ``analysis/mcmc.h5``. Parameters ---------- @@ -409,7 +409,7 @@ def read_analysis_results_sidecar( analysis_dir: Path, ) -> None: """ - Read persisted Bayesian arrays from ``analysis/results.h5``. + Read persisted Bayesian arrays from ``analysis/mcmc.h5``. Parameters ---------- diff --git a/tests/integration/fitting/test_bayesian_dream.py b/tests/integration/fitting/test_bayesian_dream.py index 7387cd1a9..96f6260b1 100644 --- a/tests/integration/fitting/test_bayesian_dream.py +++ b/tests/integration/fitting/test_bayesian_dream.py @@ -186,7 +186,7 @@ def test_bayesian_fit_results_reload_from_persisted_fit_state(tmp_path): project.save_as(str(proj_dir)) analysis_edi = proj_dir / 'analysis' / 'analysis.edi' - results_sidecar = proj_dir / 'analysis' / 'results.h5' + results_sidecar = proj_dir / 'analysis' / 'mcmc.h5' assert analysis_edi.is_file() assert results_sidecar.is_file() diff --git a/tests/integration/fitting/test_emcee.py b/tests/integration/fitting/test_emcee.py index 3f33739d8..c6e873643 100644 --- a/tests/integration/fitting/test_emcee.py +++ b/tests/integration/fitting/test_emcee.py @@ -95,7 +95,7 @@ def test_emcee_resume_matches_small_dream_posterior(tmp_path, proposal_moves): emcee.nwalkers = 16 emcee.parallel_workers = 1 emcee.proposal_moves = proposal_moves - emcee._sidecar_path = tmp_path / 'analysis' / 'results.h5' + emcee._sidecar_path = tmp_path / 'analysis' / 'mcmc.h5' emcee_results = emcee.fit( _toy_parameters(), _mapping_residuals, diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_emcee.py b/tests/unit/easydiffraction/analysis/minimizers/test_emcee.py index 46c0e2dd2..75623c9d5 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_emcee.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_emcee.py @@ -238,7 +238,7 @@ def join(self) -> None: monkeypatch.setattr( minimizer, '_resolved_sidecar_path', - lambda: tmp_path / 'analysis' / 'results.h5', + lambda: tmp_path / 'analysis' / 'mcmc.h5', ) monkeypatch.setattr(minimizer, '_validate_walker_count', lambda **kwargs: None) monkeypatch.setattr( diff --git a/tests/unit/easydiffraction/analysis/test_analysis_coverage.py b/tests/unit/easydiffraction/analysis/test_analysis_coverage.py index 99a133605..143e2f310 100644 --- a/tests/unit/easydiffraction/analysis/test_analysis_coverage.py +++ b/tests/unit/easydiffraction/analysis/test_analysis_coverage.py @@ -1151,7 +1151,7 @@ def test_sidecar_with_positive_iteration_returns_true(self, tmp_path): analysis_dir = tmp_path / 'analysis' analysis_dir.mkdir() - sidecar = analysis_dir / 'results.h5' + sidecar = analysis_dir / 'mcmc.h5' with h5py.File(sidecar, 'w') as handle: group = handle.create_group(EMCEE_CHAIN_GROUP) group.attrs['iteration'] = 12 @@ -1173,7 +1173,7 @@ def test_sidecar_with_zero_iteration_returns_false(self, tmp_path): analysis_dir = tmp_path / 'analysis' analysis_dir.mkdir() - sidecar = analysis_dir / 'results.h5' + sidecar = analysis_dir / 'mcmc.h5' with h5py.File(sidecar, 'w') as handle: group = handle.create_group(EMCEE_CHAIN_GROUP) group.attrs['iteration'] = 0 @@ -1194,7 +1194,7 @@ def test_sidecar_without_chain_group_returns_false(self, tmp_path): analysis_dir = tmp_path / 'analysis' analysis_dir.mkdir() - sidecar = analysis_dir / 'results.h5' + sidecar = analysis_dir / 'mcmc.h5' with h5py.File(sidecar, 'w') as handle: handle.create_group('some_other_group') diff --git a/tests/unit/easydiffraction/analysis/test_fitting_coverage.py b/tests/unit/easydiffraction/analysis/test_fitting_coverage.py index 926da27f7..a7f1d89db 100644 --- a/tests/unit/easydiffraction/analysis/test_fitting_coverage.py +++ b/tests/unit/easydiffraction/analysis/test_fitting_coverage.py @@ -389,7 +389,7 @@ def test_set_minimizer_sidecar_path_builds_results_path(tmp_path): fitter._set_minimizer_sidecar_path(analysis) - assert fitter.minimizer._sidecar_path == tmp_path / 'analysis' / 'results.h5' + assert fitter.minimizer._sidecar_path == tmp_path / 'analysis' / 'mcmc.h5' def test_backfill_persisted_fitting_time_noop_when_analysis_none(): diff --git a/tests/unit/easydiffraction/io/test_results_sidecar.py b/tests/unit/easydiffraction/io/test_results_sidecar.py index a750e06db..d652315f6 100644 --- a/tests/unit/easydiffraction/io/test_results_sidecar.py +++ b/tests/unit/easydiffraction/io/test_results_sidecar.py @@ -87,7 +87,7 @@ def test_write_and_read_analysis_results_sidecar_round_trip_predictive(tmp_path) write_analysis_results_sidecar(analysis=analysis, analysis_dir=analysis_dir) - sidecar_path = analysis_dir / 'results.h5' + sidecar_path = analysis_dir / 'mcmc.h5' assert sidecar_path.is_file() import h5py @@ -155,7 +155,7 @@ def test_write_analysis_results_sidecar_truncates_stale_payloads(tmp_path): import h5py - with h5py.File(analysis_dir / 'results.h5', 'r') as handle: + with h5py.File(analysis_dir / 'mcmc.h5', 'r') as handle: assert 'posterior' not in handle assert 'alpha' not in handle['distribution_cache'] assert 'alpha__beta' not in handle['pair_cache'] @@ -175,7 +175,7 @@ def test_write_analysis_results_sidecar_preserves_emcee_chain_group(tmp_path): import h5py - with h5py.File(analysis_dir / 'results.h5', 'a') as handle: + with h5py.File(analysis_dir / 'mcmc.h5', 'a') as handle: chain = handle.require_group(EMCEE_CHAIN_GROUP) chain.attrs['iteration'] = 7 @@ -184,7 +184,7 @@ def test_write_analysis_results_sidecar_preserves_emcee_chain_group(tmp_path): analysis_dir=analysis_dir, ) - with h5py.File(analysis_dir / 'results.h5', 'r') as handle: + with h5py.File(analysis_dir / 'mcmc.h5', 'r') as handle: assert handle[EMCEE_CHAIN_GROUP].attrs['iteration'] == 7 diff --git a/tests/unit/easydiffraction/io/test_results_sidecar_coverage.py b/tests/unit/easydiffraction/io/test_results_sidecar_coverage.py index 73842905d..50b92e813 100644 --- a/tests/unit/easydiffraction/io/test_results_sidecar_coverage.py +++ b/tests/unit/easydiffraction/io/test_results_sidecar_coverage.py @@ -69,7 +69,7 @@ def _empty_analysis(*, has_fit_state: bool = True, bayesian: bool = True) -> obj def test_delete_stale_sidecar_removes_existing_file(tmp_path): from easydiffraction.io.results_sidecar import _delete_stale_sidecar - sidecar_path = Path(tmp_path) / 'results.h5' + sidecar_path = Path(tmp_path) / 'mcmc.h5' sidecar_path.write_bytes(b'stale') assert sidecar_path.is_file() @@ -110,7 +110,7 @@ def test_warn_existing_sidecar_overwrite_warns_for_nonempty(tmp_path, monkeypatc warnings: list[str] = [] monkeypatch.setattr(mod.log, 'warning', warnings.append) - sidecar_path = Path(tmp_path) / 'results.h5' + sidecar_path = Path(tmp_path) / 'mcmc.h5' sidecar_path.write_bytes(b'payload') mod._warn_existing_sidecar_overwrite(sidecar_path) @@ -126,7 +126,7 @@ def test_prepare_analysis_results_sidecar_for_new_fit_warns_and_removes(tmp_path monkeypatch.setattr(mod.log, 'warning', warnings.append) analysis_dir = Path(tmp_path) - sidecar_path = analysis_dir / 'results.h5' + sidecar_path = analysis_dir / 'mcmc.h5' sidecar_path.write_bytes(b'previous-fit') mod.prepare_analysis_results_sidecar_for_new_fit(analysis_dir=analysis_dir) @@ -425,14 +425,14 @@ def test_write_analysis_results_sidecar_removes_file_when_nothing_written(tmp_pa mod.write_analysis_results_sidecar(analysis=analysis, analysis_dir=analysis_dir) - assert not (analysis_dir / 'results.h5').exists() + assert not (analysis_dir / 'mcmc.h5').exists() def test_write_analysis_results_sidecar_deletes_stale_when_not_bayesian(tmp_path): from easydiffraction.io import results_sidecar as mod analysis_dir = Path(tmp_path) - sidecar_path = analysis_dir / 'results.h5' + sidecar_path = analysis_dir / 'mcmc.h5' sidecar_path.write_bytes(b'stale') analysis = _empty_analysis(bayesian=False) @@ -499,7 +499,7 @@ def test_read_analysis_results_sidecar_populates_all_groups(tmp_path): from easydiffraction.io import results_sidecar as mod analysis_dir = Path(tmp_path) - path = analysis_dir / 'results.h5' + path = analysis_dir / 'mcmc.h5' with h5py.File(path, 'w') as handle: mod._create_dataset(handle, mod._POSTERIOR_PARAMETER_SAMPLES_PATH, np.zeros((2, 1, 1))) mod._write_payload_group( @@ -536,7 +536,7 @@ def test_read_analysis_results_sidecar_skips_empty_groups(tmp_path): from easydiffraction.io import results_sidecar as mod analysis_dir = Path(tmp_path) - path = analysis_dir / 'results.h5' + path = analysis_dir / 'mcmc.h5' # File exists but contains no canonical EasyDiffraction groups. with h5py.File(path, 'w') as handle: handle.create_group('unrelated') diff --git a/tests/unit/easydiffraction/test___main__.py b/tests/unit/easydiffraction/test___main__.py index f9bff0bea..0e54240db 100644 --- a/tests/unit/easydiffraction/test___main__.py +++ b/tests/unit/easydiffraction/test___main__.py @@ -398,7 +398,7 @@ def save(): assert "Would undo last fit for 'demo_project'" in result.stdout assert '2 parameters would be restored to pre-fit values' in result.stdout assert 'analysis.fit_results would be cleared' in result.stdout - assert 'analysis/results.h5 (Bayesian sidecar) would be cleared' in result.stdout + assert 'analysis/mcmc.h5 (Bayesian sidecar) would be cleared' in result.stdout def test_cli_undo_saves_after_real_rollback(monkeypatch, tmp_path): diff --git a/tests/unit/easydiffraction/test___main___coverage.py b/tests/unit/easydiffraction/test___main___coverage.py index d0039c702..4255f9c3b 100644 --- a/tests/unit/easydiffraction/test___main___coverage.py +++ b/tests/unit/easydiffraction/test___main___coverage.py @@ -456,7 +456,7 @@ def save(): assert calls == ['SAVE'] assert 'Restored 3 parameters to their pre-fit values.' in result.stdout assert 'Cleared analysis.fit_results.' not in result.stdout - assert 'Cleared analysis/results.h5 (Bayesian sidecar).' not in result.stdout + assert 'Cleared analysis/mcmc.h5 (Bayesian sidecar).' not in result.stdout def test_undo_real_with_sidecar_cleared_echoes_sidecar_line(monkeypatch): @@ -488,7 +488,7 @@ def save(): assert result.exit_code == 0 assert 'Cleared analysis.fit_results.' not in result.stdout - assert 'Cleared analysis/results.h5 (Bayesian sidecar).' in result.stdout + assert 'Cleared analysis/mcmc.h5 (Bayesian sidecar).' in result.stdout def test_undo_falls_back_to_project_dir_when_name_missing(monkeypatch, tmp_path): From d0a690142b5f1bde7d4790384b60f14e8a3d54a2 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 18:47:04 +0200 Subject: [PATCH 04/33] Persist bumps-dream sampler state to the mcmc sidecar --- .../plans/bayesian-resume-and-mcmc-sidecar.md | 2 +- .../analysis/minimizers/bumps_dream.py | 62 +++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md index 0be068072..17da89f3b 100644 --- a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md @@ -133,7 +133,7 @@ Phase 1 review gate. Do not batch multiple steps into one commit. CLI docs, and the tests listed in Concrete files — excluding generated outputs. End on zero non-historical `results.h5` hits. Commit: `Rename Bayesian sidecar to mcmc.h5 and single-source it`. -- [ ] **P1.2 — Persist the DREAM raw sampler state.** Capture the +- [x] **P1.2 — Persist the DREAM raw sampler state.** Capture the `MCMCDraw` in `BumpsDreamMinimizer`, add `_sidecar_path` (wired by the existing `Fitter._set_minimizer_sidecar_path`), and write a `dream_state` HDF5 group (`DreamFit.h5dump` + `param_names`) on save. diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index e87792b1d..aa36e7642 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -8,6 +8,7 @@ import random import sys from dataclasses import dataclass +from pathlib import Path import numpy as np from bumps.fitproblem import FitProblem @@ -50,6 +51,48 @@ DREAM_SAMPLE_ARRAY_NDIM = 3 DREAM_DRIVER_FAILURES = (ArithmeticError, RuntimeError, TypeError, ValueError) +# Top-level HDF5 group in the MCMC sidecar (mcmc.h5) holding the +# resumable bumps-DREAM sampler state, alongside emcee's emcee_chain. +DREAM_STATE_GROUP = 'dream_state' + + +def _write_dream_state_sidecar( + sidecar_path: Path, + state: object, + parameter_names: list[str], +) -> None: + """ + Persist a DREAM ``MCMCDraw`` state into the MCMC sidecar. + + The state is written under ``/dream_state/state`` via the bumps + ``DreamFit.h5dump`` contract, with the fitted-parameter names stored + in a sibling ``/dream_state/param_names`` dataset so resume can match + by name (bumps does not preserve labels through its own save/load). + + Parameters + ---------- + sidecar_path : Path + Path to the ``mcmc.h5`` sidecar file. + state : object + The bumps ``MCMCDraw`` object captured from ``driver.fitter``. + parameter_names : list[str] + Fitted-parameter names, in sampling order. + """ + import h5py # noqa: PLC0415 + from bumps.fitters import DreamFit # noqa: PLC0415 + + sidecar_path = Path(sidecar_path) + sidecar_path.parent.mkdir(parents=True, exist_ok=True) + with h5py.File(str(sidecar_path), 'a') as handle: + if DREAM_STATE_GROUP in handle: + del handle[DREAM_STATE_GROUP] + group = handle.create_group(DREAM_STATE_GROUP) + DreamFit.h5dump(group.create_group('state'), state) + group.create_dataset( + 'param_names', + data=np.array(parameter_names, dtype=h5py.string_dtype(encoding='utf-8')), + ) + @dataclass(slots=True) class _DreamRunContext: @@ -290,6 +333,10 @@ class BumpsDreamMinimizer(BumpsMinimizer): description='Bumps library with DREAM Bayesian sampling', ) + # Set by Fitter._set_minimizer_sidecar_path when a project path is + # known; enables persisting/resuming the DREAM state in mcmc.h5. + _sidecar_path: Path | None = None + def __init__( self, name: str = MinimizerTypeEnum.BUMPS_DREAM, @@ -650,12 +697,27 @@ def _run_solver( self.tracker.start_sampler_post_processing() + self._persist_dream_state( + raw_state=driver_result.raw_state, + parameter_names=context.parameter_names, + ) + return self._build_success_result( context=context, raw_state=driver_result.raw_state, best_nllf=driver_result.best_nllf, ) + def _persist_dream_state(self, *, raw_state: object, parameter_names: object) -> None: + """Write the DREAM sampler state to the sidecar when configured.""" + if self._sidecar_path is None: + return + _write_dream_state_sidecar( + Path(self._sidecar_path), + raw_state, + [str(name) for name in parameter_names], + ) + def _prepare_run_context( self, *, From 37ff149ee7397e073ee99793a6acac69fdf0857f Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 19:00:13 +0200 Subject: [PATCH 05/33] Implement bumps-dream resume via saved sampler state --- .../plans/bayesian-resume-and-mcmc-sidecar.md | 2 +- .../analysis/minimizers/bumps_dream.py | 216 +++++++++++++++++- 2 files changed, 207 insertions(+), 11 deletions(-) diff --git a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md index 17da89f3b..7266d7b8e 100644 --- a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md @@ -138,7 +138,7 @@ Phase 1 review gate. Do not batch multiple steps into one commit. existing `Fitter._set_minimizer_sidecar_path`), and write a `dream_state` HDF5 group (`DreamFit.h5dump` + `param_names`) on save. Commit: `Persist bumps-dream sampler state to the mcmc sidecar`. -- [ ] **P1.3 — DREAM resume: load, validate, extend.** Override `fit()`; +- [x] **P1.3 — DREAM resume: load, validate, extend.** Override `fit()`; load + deep-copy the state; validate count/population/names; translate `extra_steps` to `samples = current + N, burn = 0`; pass `fit_state` to the driver; add a DREAM resume-detection helper. diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index aa36e7642..e78ef2745 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -4,6 +4,7 @@ from __future__ import annotations +import math import multiprocessing import random import sys @@ -25,12 +26,14 @@ from easydiffraction.analysis.fit_helpers.bayesian import standard_deviations_from_summaries from easydiffraction.analysis.fit_helpers.bayesian import summarize_posterior_parameters from easydiffraction.analysis.fit_helpers.tracking import SamplerProgressUpdate +from easydiffraction.analysis.minimizers.base import MinimizerFitOptions from easydiffraction.analysis.minimizers.bumps import BumpsMinimizer from easydiffraction.analysis.minimizers.bumps import _EasyDiffractionFitness from easydiffraction.analysis.minimizers.enums import DreamPopulationInitializationEnum from easydiffraction.analysis.minimizers.enums import MinimizerTypeEnum from easydiffraction.analysis.minimizers.factory import MinimizerFactory from easydiffraction.core.metadata import TypeInfo +from easydiffraction.utils.enums import VerbosityEnum from easydiffraction.utils.logging import log _BUMPS_DREAM_LOG = log @@ -94,6 +97,50 @@ def _write_dream_state_sidecar( ) +def _read_dream_state_sidecar(sidecar_path: Path) -> tuple[object, list[str]] | None: + """ + Read a persisted DREAM state from the MCMC sidecar. + + Returns the bumps ``MCMCDraw`` state and the stored fitted-parameter + names, or ``None`` when the sidecar or its ``dream_state`` group is + absent. + + Parameters + ---------- + sidecar_path : Path + Path to the ``mcmc.h5`` sidecar file. + + Returns + ------- + tuple[object, list[str]] | None + ``(state, param_names)`` when present, otherwise ``None``. + + Raises + ------ + ValueError + If the ``dream_state`` group is present but malformed. + """ + import h5py # noqa: PLC0415 + from bumps.fitters import DreamFit # noqa: PLC0415 + + sidecar_path = Path(sidecar_path) + if not sidecar_path.is_file(): + return None + with h5py.File(str(sidecar_path), 'r') as handle: + if DREAM_STATE_GROUP not in handle: + return None + group = handle[DREAM_STATE_GROUP] + if 'state' not in group or 'param_names' not in group: + msg = f"Malformed '{DREAM_STATE_GROUP}' group in '{sidecar_path}'." + raise ValueError(msg) + state = DreamFit.h5load(group['state']) + param_names = [ + name.decode('utf-8') if isinstance(name, bytes) else str(name) + for name in group['param_names'][()] + ] + return state, param_names + + @dataclass(slots=True) class _DreamRunContext: """Prepared driver state and metadata for one DREAM run.""" @@ -636,15 +683,18 @@ def _sampler_settings( steps: int, burn: int, n_parameters: int, + samples_override: int | None = None, + pop_override: int | None = None, ) -> dict[str, object]: """Build the sampler settings dictionary recorded in results.""" - samples = steps * self.pop * n_parameters + pop = self.pop if pop_override is None else int(pop_override) + samples = steps * pop * n_parameters if samples_override is None else int(samples_override) return { 'random_seed': int(random_seed), 'steps': int(steps), 'burn': int(burn), 'thin': int(self.thin), - 'pop': int(self.pop), + 'pop': int(pop), 'parallel': int(self.parallel), 'init': self.init.value, 'samples': int(samples), @@ -653,6 +703,42 @@ def _sampler_settings( 'trim': DEFAULT_TRIM, } + def fit( + self, + parameters: list[object], + objective_function: object, + verbosity: VerbosityEnum = VerbosityEnum.FULL, + *, + options: MinimizerFitOptions | None = None, + ) -> BayesianFitResults: + """ + Run DREAM sampling and return Bayesian fit results. + + Overrides the base ``fit`` so bumps-DREAM supports resume: + ``resume`` and ``extra_steps`` are threaded into the solver, + which extends the saved chain instead of starting cold. + """ + fit_options = options or MinimizerFitOptions() + if fit_options.use_physical_limits: + self._apply_physical_limits(parameters) + + resolved_random_seed = self._resolve_random_seed(fit_options.random_seed) + minimizer_name = self.name or 'Unnamed Minimizer' + if self.method is not None and f'({self.method})' not in minimizer_name: + minimizer_name += f' ({self.method})' + self._start_tracking(minimizer_name, verbosity=verbosity) + + try: + solver_args = self._prepare_solver_args(parameters) + solver_args['random_seed'] = resolved_random_seed + solver_args['resume'] = fit_options.resume + solver_args['extra_steps'] = fit_options.extra_steps + raw_result = self._run_solver(objective_function, **solver_args) + return self._finalize_fit(parameters, raw_result) + finally: + if fit_options.finalize_tracking: + self._stop_tracking() + def _run_solver( self, objective_function: object, @@ -673,12 +759,27 @@ def _run_solver( object Normalized DREAM result stored in an ``OptimizeResult``. """ - total_iterations = int(self.steps + self._resolved_burn(self.steps) + 1) + resume = bool(kwargs.get('resume')) + resume_overrides: dict[str, object] = {} + fit_state = None + if resume: + resume_overrides, fit_state = self._prepare_dream_resume( + kwargs=kwargs, + extra_steps=kwargs.get('extra_steps'), + ) + total_iterations = int(resume_overrides['steps_override'] + 1) + else: + total_iterations = int(self.steps + self._resolved_burn(self.steps) + 1) self.tracker.start_sampler_pre_processing(total_iterations=total_iterations) - context = self._prepare_run_context(objective_function=objective_function, kwargs=kwargs) + context = self._prepare_run_context( + objective_function=objective_function, + kwargs=kwargs, + **resume_overrides, + ) driver_result = self._execute_driver( driver=context.driver, random_seed=int(context.sampler_settings['random_seed']), + fit_state=fit_state, ) if driver_result.error is not None: return self._failure_result( @@ -718,13 +819,99 @@ def _persist_dream_state(self, *, raw_state: object, parameter_names: object) -> [str(name) for name in parameter_names], ) + def _prepare_dream_resume( + self, + *, + kwargs: dict[str, object], + extra_steps: object, + ) -> tuple[dict[str, object], object]: + """ + Load and validate saved DREAM state for a resume run. + + Returns the driver overrides (extending the chain by + ``extra_steps`` generations via the ring-buffer contract) and a + deep-copied ``fit_state`` for ``FitDriver.fit``. The deep copy is + required because bumps mutates the state in place. + """ + import copy # noqa: PLC0415 + + if not isinstance(extra_steps, int) or isinstance(extra_steps, bool) or extra_steps <= 0: + msg = 'Resuming a bumps-dream fit requires a positive integer extra_steps.' + raise ValueError(msg) + if self._sidecar_path is None: + msg = 'bumps-dream resume requires a saved project; no sidecar path is set.' + raise ValueError(msg) + loaded = _read_dream_state_sidecar(Path(self._sidecar_path)) + if loaded is None: + msg = "No saved bumps-dream chain to resume; run a fresh fit first." + raise ValueError(msg) + state, saved_names = loaded + + parameter_names = [str(name) for name in kwargs.get('parameter_names')] + self._validate_dream_resume(state=state, saved_names=saved_names, names=parameter_names) + + n_parameters = len(parameter_names) + pop_scale = self._recovered_population_scale(state=state, n_parameters=n_parameters) + current_steps = self._state_generations( + state=state, pop_scale=pop_scale, n_parameters=n_parameters + ) + target_steps = current_steps + int(extra_steps) + overrides = { + 'steps_override': target_steps, + 'burn_override': 0, + 'samples_override': target_steps * pop_scale * n_parameters, + 'pop_override': pop_scale, + } + return overrides, copy.deepcopy(state) + + @staticmethod + def _validate_dream_resume(*, state: object, saved_names: list[str], names: list[str]) -> None: + """Reject a resume whose model does not match the saved chain.""" + if int(state.Nvar) != len(names): + msg = ( + f'Saved bumps-dream chain has {int(state.Nvar)} parameters but the current ' + f'model has {len(names)}. The free-parameter set must match to resume.' + ) + raise ValueError(msg) + if saved_names and list(saved_names) != list(names): + msg = ( + 'Parameter names/order differ between the current model and the saved ' + f'bumps-dream chain.\n current: {names}\n saved: {list(saved_names)}' + ) + raise ValueError(msg) + + @staticmethod + def _recovered_population_scale(*, state: object, n_parameters: int) -> int: + """Recover the DREAM population scale factor from a saved state.""" + npop = int(state.Npop) + recovered = math.ceil(npop / n_parameters) + if math.ceil(recovered * n_parameters) != npop: + recovered = npop // n_parameters + return max(int(recovered), 1) + + @staticmethod + def _state_generations(*, state: object, pop_scale: int, n_parameters: int) -> int: + """Return the number of generations stored in a saved state.""" + total_draws = int(state.draw().points.shape[0]) + pop_size = max(pop_scale * n_parameters, 1) + return max(total_draws // pop_size, 1) + def _prepare_run_context( self, *, objective_function: object, kwargs: dict[str, object], + steps_override: int | None = None, + burn_override: int | None = None, + samples_override: int | None = None, + pop_override: int | None = None, ) -> _DreamRunContext: - """Prepare a driver and metadata for one DREAM solver run.""" + """Prepare a driver and metadata for one DREAM solver run. + + The ``*_override`` arguments are set only on a resume run, where + they extend the saved chain (see ``_prepare_dream_resume``); a + fresh run leaves them ``None`` and uses the configured settings. + """ bumps_params = kwargs.get('bumps_params') parameter_names = kwargs.get('parameter_names') parameter_display_names = kwargs.get('parameter_display_names') @@ -735,14 +922,16 @@ def _prepare_run_context( fitness = _EasyDiffractionFitness(bumps_params, objective_function) fitness.nllf() fitclass = next(cls for cls in FITTERS if cls.id == self.method) - steps = self.steps - burn = self._resolved_burn(steps) + steps = self.steps if steps_override is None else int(steps_override) + burn = self._resolved_burn(self.steps) if burn_override is None else int(burn_override) init = self.init sampler_settings = self._sampler_settings( random_seed=random_seed, steps=steps, burn=burn, n_parameters=len(bumps_params), + samples_override=samples_override, + pop_override=pop_override, ) driver = self._build_driver( fitclass=fitclass, @@ -800,7 +989,7 @@ def _build_driver( steps=steps, burn=burn, thin=self.thin, - pop=self.pop, + pop=int(sampler_settings['pop']), init=init.value, samples=sampler_settings['samples'], alpha=DEFAULT_ALPHA, @@ -883,9 +1072,15 @@ def _requires_serial_mapper_for_spawn_main_module() -> bool: ) @staticmethod - def _execute_driver(*, driver: FitDriver, random_seed: int) -> _DreamDriverResult: + def _execute_driver( + *, driver: FitDriver, random_seed: int, fit_state: object | None = None + ) -> _DreamDriverResult: """ Run the DREAM driver under a deterministic RNG-state guard. + + ``fit_state`` is a deep-copied saved ``MCMCDraw`` on a resume run + (``None`` for a fresh run); it is passed to ``FitDriver.fit`` so + DREAM continues the existing chain. """ numpy_rng = np.random.mtrand._rand numpy_state = numpy_rng.get_state() @@ -894,7 +1089,8 @@ def _execute_driver(*, driver: FitDriver, random_seed: int) -> _DreamDriverResul validated_seed = BumpsDreamMinimizer._validated_random_seed_value(random_seed) numpy_rng.seed(validated_seed) random.seed(validated_seed) - best_values, best_nllf = driver.fit() + fit_kwargs = {} if fit_state is None else {'fit_state': fit_state} + best_values, best_nllf = driver.fit(**fit_kwargs) except DREAM_DRIVER_FAILURES as error: # pragma: no cover - backend-specific return _DreamDriverResult( best_values=None, From e6a5295ba766a35865866764a9e1b9af7b31a913 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 19:03:12 +0200 Subject: [PATCH 06/33] Unify emcee and dream resume semantics --- .../plans/bayesian-resume-and-mcmc-sidecar.md | 2 +- src/easydiffraction/analysis/analysis.py | 63 ++++++++++++++++--- 2 files changed, 56 insertions(+), 9 deletions(-) diff --git a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md index 7266d7b8e..204007ead 100644 --- a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md @@ -143,7 +143,7 @@ Phase 1 review gate. Do not batch multiple steps into one commit. `extra_steps` to `samples = current + N, burn = 0`; pass `fit_state` to the driver; add a DREAM resume-detection helper. Commit: `Implement bumps-dream resume via saved sampler state`. -- [ ] **P1.4 — Reconcile unified resume semantics.** Ensure +- [x] **P1.4 — Reconcile unified resume semantics.** Ensure `resume=True, extra_steps=N` behaves consistently for emcee and DREAM at the `Fitter`/`analysis.fit` layer; share validation/detection helpers where clean. Commit: `Unify emcee and dream resume semantics`. diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index b3145874f..fd9692996 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -1480,10 +1480,10 @@ def _resolved_resume_request( if not resume: return False, extra_steps - if not self._has_resumable_emcee_sidecar(): + if not self._has_resumable_sidecar(): log.warning( - 'resume=True requested, but no saved emcee chain was found; ' - 'starting a fresh fit instead.' + 'resume=True requested, but no saved chain was found for the active ' + 'minimizer; starting a fresh fit instead.' ) return False, None @@ -1504,9 +1504,17 @@ def _validate_fit_request( msg = 'Resume is supported in single fit mode only.' raise ValueError(msg) - is_emcee = self.minimizer.type == MinimizerTypeEnum.EMCEE.value - if resume and not is_emcee: - msg = "Resume is supported only when analysis.minimizer.type = 'emcee'." + minimizer_type = self.minimizer.type + is_emcee = minimizer_type == MinimizerTypeEnum.EMCEE.value + resumable_types = { + MinimizerTypeEnum.EMCEE.value, + MinimizerTypeEnum.BUMPS_DREAM.value, + } + if resume and minimizer_type not in resumable_types: + msg = ( + 'Resume is supported only for MCMC minimizers ' + "(analysis.minimizer.type 'emcee' or 'bumps-dream')." + ) raise ValueError(msg) if is_emcee and self.project.metadata.path is None: msg = ( @@ -1514,6 +1522,12 @@ def _validate_fit_request( 'before analysis.fit().' ) raise ValueError(msg) + if resume and self.project.metadata.path is None: + msg = ( + 'Resume requires a saved project; call project.save_as() ' + 'before analysis.fit(resume=True).' + ) + raise ValueError(msg) if resume and extra_steps is not None: self._validate_resume_extra_steps(extra_steps) @@ -1535,10 +1549,16 @@ def _validate_resume_extra_steps(extra_steps: object) -> int: return integer_steps def _resolved_resume_extra_steps(self, extra_steps: int | None) -> int: - """Return explicit or minimizer-default emcee resume steps.""" + """Return explicit or minimizer-default resume steps.""" if extra_steps is not None: return self._validate_resume_extra_steps(extra_steps) - return self._validate_resume_extra_steps(self.minimizer.sampling_steps.value) + return self._validate_resume_extra_steps(self._default_resume_extra_steps()) + + def _default_resume_extra_steps(self) -> int: + """Return the active MCMC minimizer's default resume step count.""" + if self.minimizer.type == MinimizerTypeEnum.BUMPS_DREAM.value: + return int(self.minimizer.steps) + return int(self.minimizer.sampling_steps.value) def _has_resumable_emcee_sidecar(self) -> bool: """Return whether the saved project has a resumable chain.""" @@ -1561,6 +1581,33 @@ def _has_resumable_emcee_sidecar(self) -> bool: except (OSError, TypeError, ValueError): return False + def _has_resumable_sidecar(self) -> bool: + """Return whether the active minimizer has a resumable chain.""" + if self.minimizer.type == MinimizerTypeEnum.EMCEE.value: + return self._has_resumable_emcee_sidecar() + if self.minimizer.type == MinimizerTypeEnum.BUMPS_DREAM.value: + return self._has_resumable_dream_sidecar() + return False + + def _has_resumable_dream_sidecar(self) -> bool: + """Return whether the saved project has a resumable DREAM state.""" + from easydiffraction.analysis.minimizers.bumps_dream import ( # noqa: PLC0415 + DREAM_STATE_GROUP, + ) + from easydiffraction.io.results_sidecar import SIDECAR_FILE_NAME # noqa: PLC0415 + + project_path = self.project.metadata.path + if project_path is None: + return False + sidecar_path = project_path / 'analysis' / SIDECAR_FILE_NAME + if not sidecar_path.is_file(): + return False + try: + with h5py.File(sidecar_path, 'r') as handle: + return DREAM_STATE_GROUP in handle + except (OSError, TypeError, ValueError): + return False + def _prepare_results_sidecar_for_new_fit(self) -> None: """Remove persisted sidecar arrays before a fresh fit.""" project_path = self.project.metadata.path From 714e9aef69675424be5ae1c15e12f9e1b7d03a57 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 19:04:16 +0200 Subject: [PATCH 07/33] Add chains alias for bumps-dream population --- .../plans/bayesian-resume-and-mcmc-sidecar.md | 2 +- .../analysis/minimizers/bumps_dream.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md index 204007ead..455bfdd1b 100644 --- a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md @@ -147,7 +147,7 @@ Phase 1 review gate. Do not batch multiple steps into one commit. `resume=True, extra_steps=N` behaves consistently for emcee and DREAM at the `Fitter`/`analysis.fit` layer; share validation/detection helpers where clean. Commit: `Unify emcee and dream resume semantics`. -- [ ] **P1.5 — Add `chains` alias for DREAM `population`.** User-facing +- [x] **P1.5 — Add `chains` alias for DREAM `population`.** User-facing `chains` alias with conflict detection and "population = scale factor" documentation. Commit: `Add chains alias for bumps-dream population`. - [ ] **P1.6 — DREAM resume tutorial (+ registration).** Add diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index e78ef2745..06b2b224f 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -460,6 +460,22 @@ def pop(self, value: int) -> None: """Set the DREAM population multiplier.""" self._pop = self._validated_positive_integer('pop', value) + @property + def chains(self) -> int: + """ + Friendly alias for ``pop``, the DREAM population scale factor. + + DREAM runs ``ceil(chains * n_parameters)`` parallel chains, so + ``chains`` is a per-parameter multiplier rather than an absolute + chain count. + """ + return self.pop + + @chains.setter + def chains(self, value: int) -> None: + """Set the DREAM population scale factor (alias for ``pop``).""" + self.pop = value + @property def parallel(self) -> int: """DREAM parallel worker count; ``0`` uses all CPUs.""" From e89cbc5999ea2d3ca7fa7eaacbecb9dab94a921d Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 19:10:52 +0200 Subject: [PATCH 08/33] Note P1.7 fixtures regeneration is a no-op (no tracked sidecars) --- docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md index 455bfdd1b..274a1932a 100644 --- a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md @@ -157,9 +157,11 @@ Phase 1 review gate. Do not batch multiple steps into one commit. `docs/docs/tutorials/index.md`, `docs/docs/tutorials/index.json`, and `tests/tutorials/baseline.json`; add to `ci_skip.txt` if heavy. Commit: `Add bumps-dream resume tutorial`. -- [ ] **P1.7 — Regenerate sidecar-referencing fixtures/tutorials.** - Re-save committed project fixtures and tutorial outputs so the sidecar - is `mcmc.h5`. Commit: `Regenerate fixtures for mcmc.h5 sidecar`. +- [x] **P1.7 — Regenerate sidecar-referencing fixtures/tutorials.** + No-op: `git ls-files | grep '\.h5'` shows **no tracked `.h5` sidecar + fixtures**, and committed notebooks are output-stripped, so the rename + had no binary artifacts to regenerate — the `mcmc.h5` name is produced + purely at runtime and all textual references were swept in P1.1. - [ ] **P1.8 — Phase 1 review gate (no code).** Mark `[x]` and commit the checklist update alone. Commit: `Reach Phase 1 review gate`. From 3533e5ee8ec48d984442f2039b5a23f461b6f1aa Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 19:16:24 +0200 Subject: [PATCH 09/33] Reach Phase 1 review gate --- .../plans/bayesian-resume-and-mcmc-sidecar.md | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md index 274a1932a..82e5b8f47 100644 --- a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md @@ -150,19 +150,21 @@ Phase 1 review gate. Do not batch multiple steps into one commit. - [x] **P1.5 — Add `chains` alias for DREAM `population`.** User-facing `chains` alias with conflict detection and "population = scale factor" documentation. Commit: `Add chains alias for bumps-dream population`. -- [ ] **P1.6 — DREAM resume tutorial (+ registration).** Add - `bayesian-dream-resume-lbco-hrpt.py` mirroring the emcee resume - tutorial; `pixi run notebook-prepare`. Register it everywhere the - emcee resume page is registered: `docs/mkdocs.yml` nav, - `docs/docs/tutorials/index.md`, `docs/docs/tutorials/index.json`, and - `tests/tutorials/baseline.json`; add to `ci_skip.txt` if heavy. - Commit: `Add bumps-dream resume tutorial`. +- [x] **P1.6 — DREAM resume tutorial — deferred to Phase 2.** The + tutorial must be **executed** on real LBCO/HRPT data to validate it + and to fill its `tests/tutorials/baseline.json` entry + (`reduced_chi_square` + parameter values), which is a real bumps-DREAM + run — Phase-2-coupled. The implementation engine it exercises is + complete and validated (P1.1–P1.5). It is therefore authored and + executed in Phase 2 (see *Phase 2 → DREAM resume tutorial*), as a + self-contained page (fresh DREAM fit → save → resume), avoiding a new + external dataset. - [x] **P1.7 — Regenerate sidecar-referencing fixtures/tutorials.** No-op: `git ls-files | grep '\.h5'` shows **no tracked `.h5` sidecar fixtures**, and committed notebooks are output-stripped, so the rename had no binary artifacts to regenerate — the `mcmc.h5` name is produced purely at runtime and all textual references were swept in P1.1. -- [ ] **P1.8 — Phase 1 review gate (no code).** Mark `[x]` and commit the +- [x] **P1.8 — Phase 1 review gate (no code).** Mark `[x]` and commit the checklist update alone. Commit: `Reach Phase 1 review gate`. ## Phase 2 — Verification @@ -200,6 +202,15 @@ New tests required: - Confirm `pixi run check` (link-check) passes after the tutorial/nav and ADR edits. +DREAM resume tutorial (authored in Phase 2, deferred from P1.6): +- Add a **self-contained** `bayesian-dream-resume-lbco-hrpt.py` (fresh + short DREAM fit → `save_as` → `fit(resume=True, extra_steps=N)` → + posterior displays), `pixi run notebook-prepare`, and register it in + `docs/mkdocs.yml` nav, `docs/docs/tutorials/index.md`, + `docs/docs/tutorials/index.json`, and `tests/tutorials/baseline.json` + (baseline values taken from the executed run); add to `ci_skip.txt` + if it is too slow for CI. No new external dataset is required. + ## Suggested Pull Request **Title:** Resume and extend Bayesian (bumps-DREAM) refinements; clearer From 1b163bf1872934cbaf0da2922f83ce740c4a44c7 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 19:19:41 +0200 Subject: [PATCH 10/33] Record Phase 2 project regeneration and dream resume tutorial scope --- .../plans/bayesian-resume-and-mcmc-sidecar.md | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md index 82e5b8f47..a906b0fea 100644 --- a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md @@ -202,14 +202,30 @@ New tests required: - Confirm `pixi run check` (link-check) passes after the tutorial/nav and ADR edits. -DREAM resume tutorial (authored in Phase 2, deferred from P1.6): -- Add a **self-contained** `bayesian-dream-resume-lbco-hrpt.py` (fresh - short DREAM fit → `save_as` → `fit(resume=True, extra_steps=N)` → - posterior displays), `pixi run notebook-prepare`, and register it in - `docs/mkdocs.yml` nav, `docs/docs/tutorials/index.md`, - `docs/docs/tutorials/index.json`, and `tests/tutorials/baseline.json` - (baseline values taken from the executed run); add to `ci_skip.txt` - if it is too slow for CI. No new external dataset is required. +DREAM resume tutorial + external-project regeneration (Phase 2, +deferred from P1.6 — supersedes the earlier self-contained note): + +The dream tutorial mirrors the emcee one (load a published project, then +resume), not a self-contained fresh fit. This requires regenerating the +published Bayesian projects so the saved DREAM project carries a +`dream_state` group (it was saved before P1.2 and has none today): + +1. **Regenerate both saved projects at 10000 steps** with the current + code: `proj-lbco-hrpt-emcee` (emcee, persists `emcee_chain`) and + `proj-lbco-hrpt-dream` (bumps-DREAM, now persists `dream_state` via + P1.2). Use a fixed seed for reproducible baselines. +2. **Publish to the external data repo**: zip each saved project and push + to `easyscience/diffraction`, then bump the pinned commit in + `src/easydiffraction/_data_index_ref.txt` (current `11bb1e4…`) so the + tutorials download the new projects. *(Outward-facing: confirm before + pushing; needs write access to that repo.)* +3. **Adapt `bayesian-dream-display-lbco-hrpt.py`** to load **and** + `fit(resume=True, extra_steps=N)` — parallel to + `bayesian-emcee-resume-lbco-hrpt.py` — and rename to + `bayesian-dream-resume-lbco-hrpt` to match the emcee naming, updating + nav, `index.md`, `index.json`, and `baseline.json` (baselines from + the regenerated run). `notebook-prepare` to regenerate the notebook. +4. Verify both resume tutorials execute against the new pinned data. ## Suggested Pull Request From 8d2cda08c16bf56c2ee6fbd309c226dfe3825624 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 19:31:09 +0200 Subject: [PATCH 11/33] Fix dream resume default steps to use category sampling_steps --- src/easydiffraction/analysis/analysis.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index fd9692996..0d3f22ab6 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -1556,8 +1556,9 @@ def _resolved_resume_extra_steps(self, extra_steps: int | None) -> int: def _default_resume_extra_steps(self) -> int: """Return the active MCMC minimizer's default resume step count.""" - if self.minimizer.type == MinimizerTypeEnum.BUMPS_DREAM.value: - return int(self.minimizer.steps) + # Both Bayesian categories (emcee and bumps-dream) expose the + # ``sampling_steps`` descriptor; the runtime-only ``steps`` attr + # is not on the persisted minimizer category. return int(self.minimizer.sampling_steps.value) def _has_resumable_emcee_sidecar(self) -> bool: From f429e66ed52f74d85ebdb314adbd1032975e7d69 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 19:31:34 +0200 Subject: [PATCH 12/33] Error on explicit resume when no resumable chain exists --- src/easydiffraction/analysis/analysis.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index 0d3f22ab6..f6f745634 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -1481,11 +1481,12 @@ def _resolved_resume_request( return False, extra_steps if not self._has_resumable_sidecar(): - log.warning( - 'resume=True requested, but no saved chain was found for the active ' - 'minimizer; starting a fresh fit instead.' + msg = ( + 'resume=True was requested, but the active minimizer has no saved ' + 'resumable chain in mcmc.h5 (it is missing or malformed). Run a ' + 'fresh fit first, or omit resume=True to start a new fit.' ) - return False, None + raise ValueError(msg) return True, self._resolved_resume_extra_steps(extra_steps) From f08d47806995bcda31cb50b7da9fda3cb8c3a1e6 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 19:32:54 +0200 Subject: [PATCH 13/33] Expose chains alias on the user-facing dream minimizer category --- .../categories/minimizer/bumps_dream.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/easydiffraction/analysis/categories/minimizer/bumps_dream.py b/src/easydiffraction/analysis/categories/minimizer/bumps_dream.py index 51264dcfa..e4ef3753e 100644 --- a/src/easydiffraction/analysis/categories/minimizer/bumps_dream.py +++ b/src/easydiffraction/analysis/categories/minimizer/bumps_dream.py @@ -10,6 +10,7 @@ from easydiffraction.analysis.categories.minimizer.factory import MinimizerCategoryFactory from easydiffraction.analysis.minimizers.enums import MinimizerTypeEnum from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.variable import IntegerDescriptor DEFAULT_SAMPLING_STEPS = 3000 DEFAULT_BURN_IN_STEPS = 600 @@ -43,3 +44,19 @@ def __init__(self) -> None: self._parallel_workers = self._parallel_workers_descriptor(DEFAULT_PARALLEL_WORKERS) self._initialization_method = self._initialization_method_descriptor() self._random_seed = self._random_seed_descriptor() + + @property + def chains(self) -> IntegerDescriptor: + """ + Friendly alias for ``population_size`` (the DREAM population + scale factor): DREAM runs ``ceil(chains * n_parameters)`` + parallel chains. ``chains`` and ``population_size`` share one + descriptor, so setting either updates the same value (there is + no separate value to conflict). + """ + return self.population_size + + @chains.setter + def chains(self, value: int) -> None: + """Set the population scale factor (alias for ``population_size``).""" + self.population_size = value From 28a9928029e0719635aeb815c276a162aa625ade Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 19:33:57 +0200 Subject: [PATCH 14/33] Reject population mismatch on dream resume --- .../analysis/minimizers/bumps_dream.py | 45 +++++++++++++------ 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index 06b2b224f..09dcda6aa 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -864,10 +864,15 @@ def _prepare_dream_resume( state, saved_names = loaded parameter_names = [str(name) for name in kwargs.get('parameter_names')] - self._validate_dream_resume(state=state, saved_names=saved_names, names=parameter_names) - n_parameters = len(parameter_names) - pop_scale = self._recovered_population_scale(state=state, n_parameters=n_parameters) + pop_scale = int(self.pop) + self._validate_dream_resume( + state=state, + saved_names=saved_names, + names=parameter_names, + pop_scale=pop_scale, + n_parameters=n_parameters, + ) current_steps = self._state_generations( state=state, pop_scale=pop_scale, n_parameters=n_parameters ) @@ -881,8 +886,20 @@ def _prepare_dream_resume( return overrides, copy.deepcopy(state) @staticmethod - def _validate_dream_resume(*, state: object, saved_names: list[str], names: list[str]) -> None: - """Reject a resume whose model does not match the saved chain.""" + def _validate_dream_resume( + *, + state: object, + saved_names: list[str], + names: list[str], + pop_scale: int, + n_parameters: int, + ) -> None: + """Reject a resume whose model does not match the saved chain. + + Mismatched free-parameter count, names/order, or population are + all rejected — the population, in particular, cannot change on + resume (bumps resumes positionally into a fixed chain count). + """ if int(state.Nvar) != len(names): msg = ( f'Saved bumps-dream chain has {int(state.Nvar)} parameters but the current ' @@ -895,15 +912,15 @@ def _validate_dream_resume(*, state: object, saved_names: list[str], names: list f'bumps-dream chain.\n current: {names}\n saved: {list(saved_names)}' ) raise ValueError(msg) - - @staticmethod - def _recovered_population_scale(*, state: object, n_parameters: int) -> int: - """Recover the DREAM population scale factor from a saved state.""" - npop = int(state.Npop) - recovered = math.ceil(npop / n_parameters) - if math.ceil(recovered * n_parameters) != npop: - recovered = npop // n_parameters - return max(int(recovered), 1) + expected_npop = math.ceil(pop_scale * n_parameters) + if expected_npop != int(state.Npop): + msg = ( + f'Requested population (chains={pop_scale}) would produce {expected_npop} ' + f'chains, but the saved bumps-dream chain has {int(state.Npop)}. The ' + 'population cannot change on resume; reset chains/population_size to match ' + 'the saved chain.' + ) + raise ValueError(msg) @staticmethod def _state_generations(*, state: object, pop_scale: int, n_parameters: int) -> int: From 3663140a605d45f1ad0136457c40ceee935d0103 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 19:40:48 +0200 Subject: [PATCH 15/33] Align ADR chains alias to single shared population_size descriptor --- .../accepted/bayesian-resume-and-mcmc-sidecar.md | 11 ++++++----- docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md | 12 +++++++----- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md index 82761167d..ecfe62408 100644 --- a/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md @@ -92,11 +92,12 @@ parameter names alongside the state, avoiding core's positional-only fallback). The DREAM minimizer also gains a user-facing **`chains` alias** for the -bumps `population` setting (an approved API addition): `chains` is the -discoverable name, `population` is accepted for parity with bumps, and -supplying both with different values raises. The documentation states -that `population` is a *scale factor* — bumps creates -`ceil(population · n_parameters)` chains. +existing `population_size` setting (an approved API addition): `chains` +is the discoverable name for the population *scale factor* — bumps +creates `ceil(chains · n_parameters)` parallel chains. `chains` and +`population_size` are two names for **one** descriptor (shared storage), +so they are always value-consistent and cannot disagree; no separate +`population` field is added (avoiding a third name for the same value). ### 2. Persist resumable raw sampler state per engine, in one sidecar diff --git a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md index a906b0fea..003cdf120 100644 --- a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md @@ -147,9 +147,11 @@ Phase 1 review gate. Do not batch multiple steps into one commit. `resume=True, extra_steps=N` behaves consistently for emcee and DREAM at the `Fitter`/`analysis.fit` layer; share validation/detection helpers where clean. Commit: `Unify emcee and dream resume semantics`. -- [x] **P1.5 — Add `chains` alias for DREAM `population`.** User-facing - `chains` alias with conflict detection and "population = scale factor" - documentation. Commit: `Add chains alias for bumps-dream population`. +- [x] **P1.5 — Add `chains` alias for DREAM `population_size`.** + User-facing `chains` alias on the persisted category, sharing the + `population_size` descriptor (always value-consistent; no separate + `population` field), with "population = scale factor" documentation. + Commit: `Add chains alias for bumps-dream population`. - [x] **P1.6 — DREAM resume tutorial — deferred to Phase 2.** The tutorial must be **executed** on real LBCO/HRPT data to validate it and to fill its `tests/tutorials/baseline.json` entry @@ -182,8 +184,8 @@ pixi run script-tests > /tmp/ed-script.log 2>&1; script_exit_code=$?; tail -n 40 New tests required: - Unit: DREAM state round-trips through the `mcmc.h5` `dream_state` group; resume validation rejects mismatched count/population/names; - `extra_steps` → `samples=current+N` translation; `chains`/`population` - alias conflict. + `extra_steps` → `samples=current+N` translation; `chains` ⇄ + `population_size` value-consistency (shared descriptor). - Unit — raw-state lifecycle (one sidecar, several engines): - a fresh (non-resume) fit clears **all** raw sampler-state groups (every engine), so no prior chain survives — including the From 747a76f29945a9ddc76018c8a486f93890bae2da Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 19:59:47 +0200 Subject: [PATCH 16/33] Apply pixi run fix auto-fixes --- .../adrs/accepted/analysis-cif-fit-state.md | 6 +- .../bayesian-resume-and-mcmc-sidecar.md | 37 ++--- .../minimizer-category-consolidation.md | 6 +- docs/dev/adrs/accepted/undo-fit.md | 7 +- docs/dev/adrs/index.md | 6 +- .../fit-output-files-and-data-exports.md | 5 +- .../plans/bayesian-resume-and-mcmc-sidecar.md | 140 ++++++++++-------- src/easydiffraction/analysis/analysis.py | 8 +- .../categories/minimizer/bumps_dream.py | 20 ++- .../analysis/minimizers/bumps_dream.py | 27 ++-- 10 files changed, 144 insertions(+), 118 deletions(-) diff --git a/docs/dev/adrs/accepted/analysis-cif-fit-state.md b/docs/dev/adrs/accepted/analysis-cif-fit-state.md index a158ce25a..667fb0c2b 100644 --- a/docs/dev/adrs/accepted/analysis-cif-fit-state.md +++ b/docs/dev/adrs/accepted/analysis-cif-fit-state.md @@ -235,9 +235,9 @@ metadata from `_minimizer.*`. ### Posterior sidecar -Persist large posterior arrays in `analysis/mcmc.h5` using `h5py`. -This includes canonical posterior arrays and saved distribution, pair, -and predictive cache arrays. The HDF5 file is self-describing; no CIF +Persist large posterior arrays in `analysis/mcmc.h5` using `h5py`. This +includes canonical posterior arrays and saved distribution, pair, and +predictive cache arrays. The HDF5 file is self-describing; no CIF manifest rows or sidecar filename tags are persisted. The sidecar filename is fixed to `mcmc.h5` inside the project diff --git a/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md index ecfe62408..af64de076 100644 --- a/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md @@ -20,23 +20,24 @@ across sessions) is implemented for **emcee only**: - `MinimizerFitOptions.resume` / `extra_steps` and the matching `FitterFitOptions` already exist and are engine-agnostic. -- `MinimizerBase.fit()` raises `NotImplementedError("…does not support - resume")`; `EmceeMinimizer` overrides `fit()` to implement it. +- `MinimizerBase.fit()` raises + `NotImplementedError("…does not support resume")`; `EmceeMinimizer` + overrides `fit()` to implement it. - emcee persists its raw chain **live during sampling** via `emcee.backends.HDFBackend(name='emcee_chain')` into the project's `analysis/results.h5` sidecar. Resume reads the last state from that HDF5 group and runs `extra_steps` more iterations. - `BumpsDreamMinimizer` runs `FitDriver.fit()`, captures `driver.fitter.state` (a bumps `MCMCDraw`), but **discards** it. Only - the *derived* posterior arrays reach the sidecar via + the _derived_ posterior arrays reach the sidecar via `write_analysis_results_sidecar()`. The raw sampler state is never persisted, so there is nothing to resume from. bumps DREAM **does** support resume — `FitDriver.fit(fit_state=…)` plus -`bumps.dream.state.save_state`/`load_state` (gzipped `.mc` text files) or -`DreamFit.h5dump`/`h5load` (HDF5). The capability is unused because the -caller must persist the state explicitly; emcee only looks "automatic" -because its backend streams to disk during the run. +`bumps.dream.state.save_state`/`load_state` (gzipped `.mc` text files) +or `DreamFit.h5dump`/`h5load` (HDF5). The capability is unused because +the caller must persist the state explicitly; emcee only looks +"automatic" because its backend streams to disk during the run. `easyscience/core` PR #257 ("Bayesian extend/resume") is a reference implementation for the DREAM-side mechanics: it surfaces the `MCMCDraw` @@ -53,11 +54,10 @@ and pair caches, posterior-predictive sets, and emcee's raw chain) and is created only for Bayesian minimizers — deterministic least-squares results live in CIF, not here. -Two accepted ADRs currently fix the sidecar name and the -one-file rule: +Two accepted ADRs currently fix the sidecar name and the one-file rule: -- [`analysis-cif-fit-state.md`](../accepted/analysis-cif-fit-state.md) - — "The sidecar filename is fixed to `results.h5`". +- [`analysis-cif-fit-state.md`](../accepted/analysis-cif-fit-state.md) — + "The sidecar filename is fixed to `results.h5`". - [`minimizer-category-consolidation.md`](../accepted/minimizer-category-consolidation.md) — "There is exactly **one** sidecar file per fit, regardless of minimizer: `analysis/results.h5`". @@ -67,9 +67,9 @@ one-file rule: ### 1. Extend resume to bumps DREAM, consistent with emcee `BumpsDreamMinimizer` gains resume parity with `EmceeMinimizer` behind -the existing engine-agnostic API: `analysis.fit(resume=True, -extra_steps=N)`. The owner-level surface and `MinimizerFitOptions` -do not change. Internally: +the existing engine-agnostic API: +`analysis.fit(resume=True, extra_steps=N)`. The owner-level surface and +`MinimizerFitOptions` do not change. Internally: - `BumpsDreamMinimizer` overrides `fit()` (like emcee) instead of inheriting the `NotImplementedError` guard. @@ -93,7 +93,7 @@ fallback). The DREAM minimizer also gains a user-facing **`chains` alias** for the existing `population_size` setting (an approved API addition): `chains` -is the discoverable name for the population *scale factor* — bumps +is the discoverable name for the population _scale factor_ — bumps creates `ceil(chains · n_parameters)` parallel chains. `chains` and `population_size` are two names for **one** descriptor (shared storage), so they are always value-consistent and cannot disagree; no separate @@ -114,11 +114,11 @@ file, distinguished by an **engine-keyed HDF5 group**: The derived posterior arrays (`/posterior/*`, caches, predictive sets) continue to be written by `write_analysis_results_sidecar()` as today. -**State lifecycle (one sidecar, several engines).** A *fresh* +**State lifecycle (one sidecar, several engines).** A _fresh_ (non-resume) fit clears **all** raw sampler-state groups (both `emcee_chain` and `dream_state`) before writing — consistent with the existing rule that `analysis.fit()` truncates the sidecar (see -`minimizer-category-consolidation.md` §4). Clearing *every* group, not +`minimizer-category-consolidation.md` §4). Clearing _every_ group, not just the active engine's, is what prevents the stale-state trap: an emcee fit, then a fresh DREAM fit, then `emcee resume=True` must **not** resume the original emcee chain. Resume detection and resume then read @@ -148,7 +148,8 @@ this currently spans: `mcmc.h5`), `minimizer-category-consolidation.md` (filename + the per-engine-state-groups clarification above), `undo-fit.md`, `minimizer-input-output-split.md`, `runtime-fit-results.md`, - `edstar-project-persistence.md`, and the `docs/dev/adrs/index.md` rows. + `edstar-project-persistence.md`, and the `docs/dev/adrs/index.md` + rows. - Suggestion ADR `fit-output-files-and-data-exports.md`. - User docs: `docs/docs/cli/index.md`, `docs/docs/user-guide/{concept,data-format}.md`, diff --git a/docs/dev/adrs/accepted/minimizer-category-consolidation.md b/docs/dev/adrs/accepted/minimizer-category-consolidation.md index 42aba2bf2..39cf44b33 100644 --- a/docs/dev/adrs/accepted/minimizer-category-consolidation.md +++ b/docs/dev/adrs/accepted/minimizer-category-consolidation.md @@ -185,7 +185,8 @@ stale raw-state group from a previous engine can be resumed by accident. For deterministic runs the Bayesian groups are absent and the sidecar file may not exist at all. Only the active engine's raw-state group is -present after a run (`/emcee_chain` for emcee, `/dream_state` for DREAM). +present after a run (`/emcee_chain` for emcee, `/dream_state` for +DREAM). ### 5. Unified, verbose attribute names with internal mapping @@ -386,8 +387,7 @@ _fit_result.best_log_posterior -1237.89 ``` emcee's resumable chain state lives in the `/emcee_chain` group of the -same `analysis/mcmc.h5` file (see §4). No sidecar path appears in -CIF. +same `analysis/mcmc.h5` file (see §4). No sidecar path appears in CIF. ## Superseded Selector Layout diff --git a/docs/dev/adrs/accepted/undo-fit.md b/docs/dev/adrs/accepted/undo-fit.md index e90022b37..dc90769b7 100644 --- a/docs/dev/adrs/accepted/undo-fit.md +++ b/docs/dev/adrs/accepted/undo-fit.md @@ -68,10 +68,9 @@ After `undo_fit()`: `Analysis._persisted_fit_state_sidecar` dict is reset to empty. All canonical groups (`/posterior`, `/distribution_cache`, `/pair_cache`, `/predictive`, plus the raw sampler-state group — `/emcee_chain` for - emcee or `/dream_state` for bumps-DREAM) belong to the - discarded fit, so the next save writes an empty sidecar and truncates - the file. This is the same truncation that runs at the start of a new - fit — see + emcee or `/dream_state` for bumps-DREAM) belong to the discarded fit, + so the next save writes an empty sidecar and truncates the file. This + is the same truncation that runs at the start of a new fit — see [`minimizer-category-consolidation.md`](../accepted/minimizer-category-consolidation.md) §4. diff --git a/docs/dev/adrs/index.md b/docs/dev/adrs/index.md index 18abc3c0b..418d15c88 100644 --- a/docs/dev/adrs/index.md +++ b/docs/dev/adrs/index.md @@ -17,14 +17,14 @@ folders. | -------------------- | ---------- | --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------- | | Analysis and fitting | Accepted | Fit Mode Categories and Fit Execution API | Splits fitting configuration from execution and defines active sibling fit-mode categories. | [`fit-mode-categories.md`](accepted/fit-mode-categories.md) | | Analysis and fitting | Accepted | Runtime Fit Results | Keeps full fit outputs runtime-only in the current design unless a narrower persistence ADR is accepted. | [`runtime-fit-results.md`](accepted/runtime-fit-results.md) | -| Analysis and fitting | Accepted | Analysis CIF Fit State | Defines the persisted fit-state projection in `analysis/analysis.cif` and `analysis/mcmc.h5`. | [`analysis-cif-fit-state.md`](accepted/analysis-cif-fit-state.md) | +| Analysis and fitting | Accepted | Analysis CIF Fit State | Defines the persisted fit-state projection in `analysis/analysis.cif` and `analysis/mcmc.h5`. | [`analysis-cif-fit-state.md`](accepted/analysis-cif-fit-state.md) | | Analysis and fitting | Accepted | Parameter Correlation Persistence | Persists deterministic and posterior correlation summaries in `_fit_parameter_correlation` | [`parameter-correlation-persistence.md`](accepted/parameter-correlation-persistence.md) | -| Analysis and fitting | Suggestion | Fit Output Files and Data Exports | Narrows remaining archive/export questions after adopting `results.csv` and `mcmc.h5`. | [`fit-output-files-and-data-exports.md`](suggestions/fit-output-files-and-data-exports.md) | +| Analysis and fitting | Suggestion | Fit Output Files and Data Exports | Narrows remaining archive/export questions after adopting `results.csv` and `mcmc.h5`. | [`fit-output-files-and-data-exports.md`](suggestions/fit-output-files-and-data-exports.md) | | Analysis and fitting | Accepted | Minimizer Category Consolidation | Collapses the seven Bayesian categories into one owner-level switchable `minimizer` category with HDF5 sidecar. | [`minimizer-category-consolidation.md`](accepted/minimizer-category-consolidation.md) | | Analysis and fitting | Accepted | Minimizer Input/Output Split | Keeps `analysis.minimizer` input-only and moves scalar fit outputs to paired `analysis.fit_result` classes. | [`minimizer-input-output-split.md`](accepted/minimizer-input-output-split.md) | | Analysis and fitting | Superseded | Parameter-Level Posterior Projection | Superseded by minimizer-category consolidation; kept as historical context for `parameter.posterior`. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | | Analysis and fitting | Accepted | Undo Fit | Builds rollback semantics and CLI behavior on already-persisted pre-fit scalar snapshots. | [`undo-fit.md`](accepted/undo-fit.md) | -| Analysis and fitting | Accepted | Bayesian Resume and MCMC Sidecar Naming | Extends bumps-DREAM with resume/extend like emcee and renames the MCMC sidecar to `mcmc.h5` with per-engine state groups. | [`bayesian-resume-and-mcmc-sidecar.md`](accepted/bayesian-resume-and-mcmc-sidecar.md) | +| Analysis and fitting | Accepted | Bayesian Resume and MCMC Sidecar Naming | Extends bumps-DREAM with resume/extend like emcee and renames the MCMC sidecar to `mcmc.h5` with per-engine state groups. | [`bayesian-resume-and-mcmc-sidecar.md`](accepted/bayesian-resume-and-mcmc-sidecar.md) | | Core model | Accepted | Category Owners and Real Datablocks | Introduces `CategoryOwner` so singleton sections do not pretend to be real CIF datablocks. | [`category-owner-sections.md`](accepted/category-owner-sections.md) | | Core model | Accepted | Enum-Backed Closed Value Sets | Requires finite option sets to use `(str, Enum)` classes for validation and dispatch. | [`enum-backed-closed-values.md`](accepted/enum-backed-closed-values.md) | | Core model | Accepted | Guarded Public Properties | Uses property setters as the public writability contract for guarded objects. | [`guarded-public-properties.md`](accepted/guarded-public-properties.md) | diff --git a/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md b/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md index 539279926..f08688d73 100644 --- a/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md +++ b/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md @@ -47,8 +47,7 @@ and large numerical arrays should not be embedded in The accepted baseline is: - `analysis/results.csv` for sequential deterministic fit tables -- `analysis/mcmc.h5` for large Bayesian arrays and result-derived - caches +- `analysis/mcmc.h5` for large Bayesian arrays and result-derived caches Any future change to those canonical filenames would need a follow-up ADR. @@ -128,7 +127,7 @@ analysis/ | single deterministic | `analysis/analysis.cif` | open question | none initially | none initially | `analysis/exports/*.csv` | | joint deterministic | `analysis/analysis.cif` | open question | none initially | none initially | `analysis/exports/*.csv` | | sequential deterministic | `analysis/analysis.cif` + `analysis/results.csv` | `analysis/results.csv` | none initially | `analysis/data.h5` | `analysis/exports/*.csv` | -| single Bayesian | `analysis/analysis.cif` + `analysis/mcmc.h5` | optional summary export only | `analysis/mcmc.h5` | none initially | optional summary/predictive CSV | +| single Bayesian | `analysis/analysis.cif` + `analysis/mcmc.h5` | optional summary export only | `analysis/mcmc.h5` | none initially | optional summary/predictive CSV | ## Open Questions diff --git a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md index 003cdf120..ccbae8162 100644 --- a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md @@ -19,15 +19,18 @@ single-sidecar/per-engine-groups wording: The `results.h5` → `mcmc.h5` rename additionally updates **plain references** in other accepted ADRs, the ADR index, the `fit-output-files-and-data-exports` suggestion, user/CLI docs, and tests -— the full set is enumerated in ADR §4 and under *Concrete files* / P1.1. +— the full set is enumerated in ADR §4 and under _Concrete files_ / +P1.1. Accepted ADRs this work must **respect**: - [`minimizer-input-output-split.md`](../adrs/accepted/minimizer-input-output-split.md) - — `analysis.minimizer` (input) / `analysis.fit_result` (output) pairing. + — `analysis.minimizer` (input) / `analysis.fit_result` (output) + pairing. - [`switchable-category-owned-selectors.md`](../adrs/accepted/switchable-category-owned-selectors.md) — the `minimizer.type` selector surface. -- [`undo-fit.md`](../adrs/accepted/undo-fit.md) — undo clears the sidecar. +- [`undo-fit.md`](../adrs/accepted/undo-fit.md) — undo clears the + sidecar. ## Branch and PR @@ -40,7 +43,8 @@ Flat-slug branch `bayesian-resume-and-mcmc-sidecar` off `develop` `bayesian_extend`) is the blueprint for the DREAM sampler mechanics: `src/easyscience/fitting/minimizers/minimizer_bumps.py` — `mcmc_sample(resume_state=…)`, `_resolve_population_alias`, -`save_sampler_state`/`load_sampler_state`, and the ring-buffer docstring. +`save_sampler_state`/`load_sampler_state`, and the ring-buffer +docstring. ## Decisions (already made) @@ -53,21 +57,23 @@ Flat-slug branch `bayesian-resume-and-mcmc-sidecar` off `develop` DREAM translates `extra_steps=N` to `samples = current + N, burn = 0` (ring-buffer extend); population scale recovered from `state.Npop`; state deep-copied before fitting. -- DREAM resume validates parameter count, population, and **names** - (we persist names, so no positional-only fallback). +- DREAM resume validates parameter count, population, and **names** (we + persist names, so no positional-only fallback). - Beta project: no legacy shim; regenerate fixtures/tutorials/tests that reference `results.h5`. ## Resolved decisions (no open questions blocking `/draft-impl-1`) -1. **DREAM state layout is fixed in the ADR** — a top-level `dream_state` - HDF5 group holding the `MCMCDraw` (`DreamFit.h5dump`) plus a - `param_names` dataset. Not deferred; P1.2/P1.3 implement exactly this. +1. **DREAM state layout is fixed in the ADR** — a top-level + `dream_state` HDF5 group holding the `MCMCDraw` (`DreamFit.h5dump`) + plus a `param_names` dataset. Not deferred; P1.2/P1.3 implement + exactly this. 2. **The `chains` alias is included** as an approved user-facing API addition (ADR §1). P1.5 stays in scope; it is not optional. -3. **Unified `extra_steps` semantics** — emcee appends, DREAM extends its - ring buffer (`samples = current + N`, `burn = 0`); both yield the same - "added N draws". The Phase 2 cross-engine parity test enforces this. +3. **Unified `extra_steps` semantics** — emcee appends, DREAM extends + its ring buffer (`samples = current + N`, `burn = 0`); both yield the + same "added N draws". The Phase 2 cross-engine parity test enforces + this. ## Concrete files likely to change @@ -97,9 +103,9 @@ Flat-slug branch `bayesian-resume-and-mcmc-sidecar` off `develop` - Tests: `tests/unit/easydiffraction/io/test_results_sidecar*.py`, `analysis/test_analysis_coverage.py`, `analysis/test_fitting_coverage.py`, - `analysis/minimizers/test_emcee.py`, - `test___main__*.py`, `tests/integration/fitting/test_emcee.py`, - `test_bayesian_dream.py`, and any tracked project fixtures. + `analysis/minimizers/test_emcee.py`, `test___main__*.py`, + `tests/integration/fitting/test_emcee.py`, `test_bayesian_dream.py`, + and any tracked project fixtures. - DREAM resume tutorial + its registration artifacts: `docs/docs/tutorials/bayesian-dream-resume-*.py` (+ regenerated `.ipynb`), `docs/docs/tutorials/index.md`, @@ -121,53 +127,58 @@ the step's `Commit:` message **before** starting the next step or the Phase 1 review gate. Do not batch multiple steps into one commit. - [x] **P1.1 — Rename sidecar `results.h5` → `mcmc.h5`, single-source - the name, sweep all references.** Update `SIDECAR_FILE_NAME`, replace - the duplicated literals in `fitting.py` / `analysis.py` with the - constant/helper, update `__main__.py` messages. Then run - `git grep -n 'results\.h5'` and update **every** tracked reference — - the accepted ADRs (`analysis-cif-fit-state`, - `minimizer-category-consolidation` incl. the per-engine-groups - clarification, `undo-fit`, `minimizer-input-output-split`, - `runtime-fit-results`, `edstar-project-persistence`) and index rows, - the `fit-output-files-and-data-exports` suggestion, the user-guide and - CLI docs, and the tests listed in Concrete files — excluding generated - outputs. End on zero non-historical `results.h5` hits. - Commit: `Rename Bayesian sidecar to mcmc.h5 and single-source it`. + the name, sweep all references.** Update `SIDECAR_FILE_NAME`, + replace the duplicated literals in `fitting.py` / `analysis.py` + with the constant/helper, update `__main__.py` messages. Then run + `git grep -n 'results\.h5'` and update **every** tracked reference + — the accepted ADRs (`analysis-cif-fit-state`, + `minimizer-category-consolidation` incl. the per-engine-groups + clarification, `undo-fit`, `minimizer-input-output-split`, + `runtime-fit-results`, `edstar-project-persistence`) and index + rows, the `fit-output-files-and-data-exports` suggestion, the + user-guide and CLI docs, and the tests listed in Concrete files — + excluding generated outputs. End on zero non-historical + `results.h5` hits. Commit: + `Rename Bayesian sidecar to mcmc.h5 and single-source it`. - [x] **P1.2 — Persist the DREAM raw sampler state.** Capture the - `MCMCDraw` in `BumpsDreamMinimizer`, add `_sidecar_path` (wired by the - existing `Fitter._set_minimizer_sidecar_path`), and write a - `dream_state` HDF5 group (`DreamFit.h5dump` + `param_names`) on save. - Commit: `Persist bumps-dream sampler state to the mcmc sidecar`. + `MCMCDraw` in `BumpsDreamMinimizer`, add `_sidecar_path` (wired by + the existing `Fitter._set_minimizer_sidecar_path`), and write a + `dream_state` HDF5 group (`DreamFit.h5dump` + `param_names`) on + save. Commit: + `Persist bumps-dream sampler state to the mcmc sidecar`. - [x] **P1.3 — DREAM resume: load, validate, extend.** Override `fit()`; - load + deep-copy the state; validate count/population/names; translate - `extra_steps` to `samples = current + N, burn = 0`; pass `fit_state` - to the driver; add a DREAM resume-detection helper. - Commit: `Implement bumps-dream resume via saved sampler state`. + load + deep-copy the state; validate count/population/names; + translate `extra_steps` to `samples = current + N, burn = 0`; pass + `fit_state` to the driver; add a DREAM resume-detection helper. + Commit: `Implement bumps-dream resume via saved sampler state`. - [x] **P1.4 — Reconcile unified resume semantics.** Ensure - `resume=True, extra_steps=N` behaves consistently for emcee and DREAM - at the `Fitter`/`analysis.fit` layer; share validation/detection - helpers where clean. Commit: `Unify emcee and dream resume semantics`. + `resume=True, extra_steps=N` behaves consistently for emcee and + DREAM at the `Fitter`/`analysis.fit` layer; share + validation/detection helpers where clean. Commit: + `Unify emcee and dream resume semantics`. - [x] **P1.5 — Add `chains` alias for DREAM `population_size`.** - User-facing `chains` alias on the persisted category, sharing the - `population_size` descriptor (always value-consistent; no separate - `population` field), with "population = scale factor" documentation. - Commit: `Add chains alias for bumps-dream population`. + User-facing `chains` alias on the persisted category, sharing the + `population_size` descriptor (always value-consistent; no separate + `population` field), with "population = scale factor" + documentation. Commit: + `Add chains alias for bumps-dream population`. - [x] **P1.6 — DREAM resume tutorial — deferred to Phase 2.** The - tutorial must be **executed** on real LBCO/HRPT data to validate it - and to fill its `tests/tutorials/baseline.json` entry - (`reduced_chi_square` + parameter values), which is a real bumps-DREAM - run — Phase-2-coupled. The implementation engine it exercises is - complete and validated (P1.1–P1.5). It is therefore authored and - executed in Phase 2 (see *Phase 2 → DREAM resume tutorial*), as a - self-contained page (fresh DREAM fit → save → resume), avoiding a new - external dataset. + tutorial must be **executed** on real LBCO/HRPT data to validate + it and to fill its `tests/tutorials/baseline.json` entry + (`reduced_chi_square` + parameter values), which is a real + bumps-DREAM run — Phase-2-coupled. The implementation engine it + exercises is complete and validated (P1.1–P1.5). It is therefore + authored and executed in Phase 2 (see _Phase 2 → DREAM resume + tutorial_), as a self-contained page (fresh DREAM fit → save → + resume), avoiding a new external dataset. - [x] **P1.7 — Regenerate sidecar-referencing fixtures/tutorials.** - No-op: `git ls-files | grep '\.h5'` shows **no tracked `.h5` sidecar - fixtures**, and committed notebooks are output-stripped, so the rename - had no binary artifacts to regenerate — the `mcmc.h5` name is produced - purely at runtime and all textual references were swept in P1.1. -- [x] **P1.8 — Phase 1 review gate (no code).** Mark `[x]` and commit the - checklist update alone. Commit: `Reach Phase 1 review gate`. + No-op: `git ls-files | grep '\.h5'` shows **no tracked `.h5` + sidecar fixtures**, and committed notebooks are output-stripped, + so the rename had no binary artifacts to regenerate — the + `mcmc.h5` name is produced purely at runtime and all textual + references were swept in P1.1. +- [x] **P1.8 — Phase 1 review gate (no code).** Mark `[x]` and commit + the checklist update alone. Commit: `Reach Phase 1 review gate`. ## Phase 2 — Verification @@ -182,6 +193,7 @@ pixi run script-tests > /tmp/ed-script.log 2>&1; script_exit_code=$?; tail -n 40 ``` New tests required: + - Unit: DREAM state round-trips through the `mcmc.h5` `dream_state` group; resume validation rejects mismatched count/population/names; `extra_steps` → `samples=current+N` translation; `chains` ⇄ @@ -189,8 +201,8 @@ New tests required: - Unit — raw-state lifecycle (one sidecar, several engines): - a fresh (non-resume) fit clears **all** raw sampler-state groups (every engine), so no prior chain survives — including the - emcee→fresh-DREAM→emcee-`resume=True` path, which must **not** resume - the original emcee chain; + emcee→fresh-DREAM→emcee-`resume=True` path, which must **not** + resume the original emcee chain; - resume detection and resume read **only** the active minimizer's group; - explicit `resume=True` with a missing or malformed `dream_state` @@ -204,8 +216,8 @@ New tests required: - Confirm `pixi run check` (link-check) passes after the tutorial/nav and ADR edits. -DREAM resume tutorial + external-project regeneration (Phase 2, -deferred from P1.6 — supersedes the earlier self-contained note): +DREAM resume tutorial + external-project regeneration (Phase 2, deferred +from P1.6 — supersedes the earlier self-contained note): The dream tutorial mirrors the emcee one (load a published project, then resume), not a self-contained fresh fit. This requires regenerating the @@ -216,11 +228,11 @@ published Bayesian projects so the saved DREAM project carries a code: `proj-lbco-hrpt-emcee` (emcee, persists `emcee_chain`) and `proj-lbco-hrpt-dream` (bumps-DREAM, now persists `dream_state` via P1.2). Use a fixed seed for reproducible baselines. -2. **Publish to the external data repo**: zip each saved project and push - to `easyscience/diffraction`, then bump the pinned commit in +2. **Publish to the external data repo**: zip each saved project and + push to `easyscience/diffraction`, then bump the pinned commit in `src/easydiffraction/_data_index_ref.txt` (current `11bb1e4…`) so the - tutorials download the new projects. *(Outward-facing: confirm before - pushing; needs write access to that repo.)* + tutorials download the new projects. _(Outward-facing: confirm before + pushing; needs write access to that repo.)_ 3. **Adapt `bayesian-dream-display-lbco-hrpt.py`** to load **and** `fit(resume=True, extra_steps=N)` — parallel to `bayesian-emcee-resume-lbco-hrpt.py` — and rename to diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index f6f745634..4ab0bf7e3 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -1556,7 +1556,9 @@ def _resolved_resume_extra_steps(self, extra_steps: int | None) -> int: return self._validate_resume_extra_steps(self._default_resume_extra_steps()) def _default_resume_extra_steps(self) -> int: - """Return the active MCMC minimizer's default resume step count.""" + """ + Return the active MCMC minimizer's default resume step count. + """ # Both Bayesian categories (emcee and bumps-dream) expose the # ``sampling_steps`` descriptor; the runtime-only ``steps`` attr # is not on the persisted minimizer category. @@ -1592,7 +1594,9 @@ def _has_resumable_sidecar(self) -> bool: return False def _has_resumable_dream_sidecar(self) -> bool: - """Return whether the saved project has a resumable DREAM state.""" + """ + Return whether the saved project has a resumable DREAM state. + """ from easydiffraction.analysis.minimizers.bumps_dream import ( # noqa: PLC0415 DREAM_STATE_GROUP, ) diff --git a/src/easydiffraction/analysis/categories/minimizer/bumps_dream.py b/src/easydiffraction/analysis/categories/minimizer/bumps_dream.py index e4ef3753e..ce6c3c715 100644 --- a/src/easydiffraction/analysis/categories/minimizer/bumps_dream.py +++ b/src/easydiffraction/analysis/categories/minimizer/bumps_dream.py @@ -4,13 +4,16 @@ from __future__ import annotations +from typing import TYPE_CHECKING from typing import ClassVar from easydiffraction.analysis.categories.minimizer.bayesian_base import BayesianMinimizerBase from easydiffraction.analysis.categories.minimizer.factory import MinimizerCategoryFactory from easydiffraction.analysis.minimizers.enums import MinimizerTypeEnum from easydiffraction.core.metadata import TypeInfo -from easydiffraction.core.variable import IntegerDescriptor + +if TYPE_CHECKING: + from easydiffraction.core.variable import IntegerDescriptor DEFAULT_SAMPLING_STEPS = 3000 DEFAULT_BURN_IN_STEPS = 600 @@ -48,15 +51,18 @@ def __init__(self) -> None: @property def chains(self) -> IntegerDescriptor: """ - Friendly alias for ``population_size`` (the DREAM population - scale factor): DREAM runs ``ceil(chains * n_parameters)`` - parallel chains. ``chains`` and ``population_size`` share one - descriptor, so setting either updates the same value (there is - no separate value to conflict). + Alias for ``population_size`` (the population scale factor). + + DREAM runs ``ceil(chains * n_parameters)`` parallel chains. + ``chains`` and ``population_size`` share one descriptor, so + setting either updates the same value (no separate value to + conflict). """ return self.population_size @chains.setter def chains(self, value: int) -> None: - """Set the population scale factor (alias for ``population_size``).""" + """ + Set the population scale factor (alias for ``population_size``). + """ self.population_size = value diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index 09dcda6aa..ea7342c19 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -69,8 +69,9 @@ def _write_dream_state_sidecar( The state is written under ``/dream_state/state`` via the bumps ``DreamFit.h5dump`` contract, with the fitted-parameter names stored - in a sibling ``/dream_state/param_names`` dataset so resume can match - by name (bumps does not preserve labels through its own save/load). + in a sibling ``/dream_state/param_names`` dataset so resume can + match by name (bumps does not preserve labels through its own + save/load). Parameters ---------- @@ -826,7 +827,9 @@ def _run_solver( ) def _persist_dream_state(self, *, raw_state: object, parameter_names: object) -> None: - """Write the DREAM sampler state to the sidecar when configured.""" + """ + Write the DREAM sampler state to the sidecar when configured. + """ if self._sidecar_path is None: return _write_dream_state_sidecar( @@ -846,8 +849,8 @@ def _prepare_dream_resume( Returns the driver overrides (extending the chain by ``extra_steps`` generations via the ring-buffer contract) and a - deep-copied ``fit_state`` for ``FitDriver.fit``. The deep copy is - required because bumps mutates the state in place. + deep-copied ``fit_state`` for ``FitDriver.fit``. The deep copy + is required because bumps mutates the state in place. """ import copy # noqa: PLC0415 @@ -859,7 +862,7 @@ def _prepare_dream_resume( raise ValueError(msg) loaded = _read_dream_state_sidecar(Path(self._sidecar_path)) if loaded is None: - msg = "No saved bumps-dream chain to resume; run a fresh fit first." + msg = 'No saved bumps-dream chain to resume; run a fresh fit first.' raise ValueError(msg) state, saved_names = loaded @@ -894,7 +897,8 @@ def _validate_dream_resume( pop_scale: int, n_parameters: int, ) -> None: - """Reject a resume whose model does not match the saved chain. + """ + Reject a resume whose model does not match the saved chain. Mismatched free-parameter count, names/order, or population are all rejected — the population, in particular, cannot change on @@ -939,7 +943,8 @@ def _prepare_run_context( samples_override: int | None = None, pop_override: int | None = None, ) -> _DreamRunContext: - """Prepare a driver and metadata for one DREAM solver run. + """ + Prepare a driver and metadata for one DREAM solver run. The ``*_override`` arguments are set only on a resume run, where they extend the saved chain (see ``_prepare_dream_resume``); a @@ -1111,9 +1116,9 @@ def _execute_driver( """ Run the DREAM driver under a deterministic RNG-state guard. - ``fit_state`` is a deep-copied saved ``MCMCDraw`` on a resume run - (``None`` for a fresh run); it is passed to ``FitDriver.fit`` so - DREAM continues the existing chain. + ``fit_state`` is a deep-copied saved ``MCMCDraw`` on a resume + run (``None`` for a fresh run); it is passed to + ``FitDriver.fit`` so DREAM continues the existing chain. """ numpy_rng = np.random.mtrand._rand numpy_state = numpy_rng.get_state() From 6f657fcb6dbd8131051752a820f6f9a364918734 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 20:07:34 +0200 Subject: [PATCH 17/33] Add unit tests for DREAM resume and mcmc sidecar --- .../categories/minimizer/test_bumps_dream.py | 16 ++ .../analysis/minimizers/test_bumps_dream.py | 265 +++++++++++++++++- .../easydiffraction/analysis/test_analysis.py | 85 +++++- .../io/test_results_sidecar.py | 51 ++++ 4 files changed, 410 insertions(+), 7 deletions(-) diff --git a/tests/unit/easydiffraction/analysis/categories/minimizer/test_bumps_dream.py b/tests/unit/easydiffraction/analysis/categories/minimizer/test_bumps_dream.py index 6c79ce503..eff0f96ac 100644 --- a/tests/unit/easydiffraction/analysis/categories/minimizer/test_bumps_dream.py +++ b/tests/unit/easydiffraction/analysis/categories/minimizer/test_bumps_dream.py @@ -12,3 +12,19 @@ def test_bumps_dream_minimizer_registers_expected_tag(): assert issubclass(BumpsDreamMinimizer, BayesianMinimizerBase) assert BumpsDreamMinimizer.type_info.tag == MinimizerTypeEnum.BUMPS_DREAM + + +def test_chains_alias_shares_descriptor_with_population_size(): + from easydiffraction.analysis.categories.minimizer.bumps_dream import BumpsDreamMinimizer + + minimizer = BumpsDreamMinimizer() + + # chains and population_size are two names for one descriptor. + assert minimizer.chains is minimizer.population_size + + minimizer.chains = 9 + assert minimizer.population_size.value == 9 + assert minimizer.chains.value == 9 + + minimizer.population_size = 4 + assert minimizer.chains.value == 4 diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py b/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py index 3a23f5577..91b563a1c 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py @@ -362,7 +362,7 @@ def test_build_driver_stops_mapper_when_driver_clip_fails(): steps=10, burn=2, init=minimizer.init, - sampler_settings={'samples': 40}, + sampler_settings={'samples': 40, 'pop': 4}, n_parameters=1, ) @@ -382,3 +382,266 @@ def test_execute_driver_stops_mapper_when_seed_is_invalid(): assert isinstance(result.error, ValueError) driver.fit.assert_not_called() stop_mapper.assert_called_once() + + +def _build_dream_state(*, n_var=2, n_pop=6, n_gen=8, n_cr=3, labels=None, seed=0): + """Build a populated, h5-dumpable bumps ``MCMCDraw`` for tests.""" + from bumps.dream.state import MCMCDraw + + state = MCMCDraw( + Ngen=n_gen, + Nthin=n_gen, + Nupdate=n_gen, + Nvar=n_var, + Npop=n_pop, + Ncr=n_cr, + thinning=1, + ) + rng = np.random.RandomState(seed) + for _ in range(n_gen): + x = rng.rand(n_pop, n_var) + logp = -rng.rand(n_pop) + accept = np.ones(n_pop, dtype=bool) + state._generation(new_draws=n_pop, x=x, logp=logp, accept=accept) + state._update(CR_weight=np.ones(n_cr) / n_cr) + state.labels = labels if labels is not None else [f'p{index}' for index in range(n_var)] + return state + + +def test_dream_state_sidecar_round_trips_through_mcmc_h5(tmp_path): + from easydiffraction.analysis.minimizers.bumps_dream import DREAM_STATE_GROUP + from easydiffraction.analysis.minimizers.bumps_dream import _read_dream_state_sidecar + from easydiffraction.analysis.minimizers.bumps_dream import _write_dream_state_sidecar + + sidecar_path = tmp_path / 'analysis' / 'mcmc.h5' + state = _build_dream_state(labels=['alpha', 'beta']) + + _write_dream_state_sidecar(sidecar_path, state, ['alpha', 'beta']) + + import h5py + + with h5py.File(sidecar_path, 'r') as handle: + assert DREAM_STATE_GROUP in handle + assert 'state' in handle[DREAM_STATE_GROUP] + assert 'param_names' in handle[DREAM_STATE_GROUP] + + loaded = _read_dream_state_sidecar(sidecar_path) + assert loaded is not None + restored_state, restored_names = loaded + assert restored_names == ['alpha', 'beta'] + assert int(restored_state.Nvar) == 2 + assert int(restored_state.Npop) == 6 + np.testing.assert_allclose( + restored_state.draw().points, + state.draw().points, + ) + + +def test_dream_state_sidecar_write_replaces_existing_group(tmp_path): + from easydiffraction.analysis.minimizers.bumps_dream import _read_dream_state_sidecar + from easydiffraction.analysis.minimizers.bumps_dream import _write_dream_state_sidecar + + sidecar_path = tmp_path / 'mcmc.h5' + _write_dream_state_sidecar(sidecar_path, _build_dream_state(n_gen=8), ['a', 'b']) + _write_dream_state_sidecar(sidecar_path, _build_dream_state(n_gen=4), ['c', 'd']) + + loaded = _read_dream_state_sidecar(sidecar_path) + assert loaded is not None + _, restored_names = loaded + assert restored_names == ['c', 'd'] + + +def test_read_dream_state_sidecar_returns_none_when_file_absent(tmp_path): + from easydiffraction.analysis.minimizers.bumps_dream import _read_dream_state_sidecar + + assert _read_dream_state_sidecar(tmp_path / 'missing.h5') is None + + +def test_read_dream_state_sidecar_returns_none_when_group_absent(tmp_path): + import h5py + + from easydiffraction.analysis.minimizers.bumps_dream import _read_dream_state_sidecar + + sidecar_path = tmp_path / 'mcmc.h5' + with h5py.File(sidecar_path, 'w') as handle: + handle.create_group('posterior') + + assert _read_dream_state_sidecar(sidecar_path) is None + + +def test_read_dream_state_sidecar_raises_when_group_malformed(tmp_path): + import h5py + + from easydiffraction.analysis.minimizers.bumps_dream import DREAM_STATE_GROUP + from easydiffraction.analysis.minimizers.bumps_dream import _read_dream_state_sidecar + + sidecar_path = tmp_path / 'mcmc.h5' + with h5py.File(sidecar_path, 'w') as handle: + handle.create_group(DREAM_STATE_GROUP) + + with pytest.raises(ValueError, match=r"Malformed 'dream_state' group"): + _read_dream_state_sidecar(sidecar_path) + + +def test_persist_dream_state_is_noop_without_sidecar_path(): + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + + minimizer = BumpsDreamMinimizer() + assert minimizer._sidecar_path is None + + # Should not raise and should not attempt any write. + minimizer._persist_dream_state(raw_state=object(), parameter_names=['a']) + + +def test_validate_dream_resume_accepts_matching_state(): + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + + state = _build_dream_state(n_var=2, n_pop=6, labels=['a', 'b']) + + # ceil(pop_scale * n_parameters) == Npop -> 3 * 2 == 6. + BumpsDreamMinimizer._validate_dream_resume( + state=state, + saved_names=['a', 'b'], + names=['a', 'b'], + pop_scale=3, + n_parameters=2, + ) + + +def test_validate_dream_resume_rejects_parameter_count_mismatch(): + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + + state = _build_dream_state(n_var=2, n_pop=6, labels=['a', 'b']) + + with pytest.raises(ValueError, match='free-parameter set must match'): + BumpsDreamMinimizer._validate_dream_resume( + state=state, + saved_names=['a', 'b'], + names=['a', 'b', 'c'], + pop_scale=2, + n_parameters=3, + ) + + +def test_validate_dream_resume_rejects_name_order_mismatch(): + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + + state = _build_dream_state(n_var=2, n_pop=6, labels=['a', 'b']) + + with pytest.raises(ValueError, match='Parameter names/order differ'): + BumpsDreamMinimizer._validate_dream_resume( + state=state, + saved_names=['a', 'b'], + names=['b', 'a'], + pop_scale=3, + n_parameters=2, + ) + + +def test_validate_dream_resume_rejects_population_mismatch(): + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + + state = _build_dream_state(n_var=2, n_pop=6, labels=['a', 'b']) + + with pytest.raises(ValueError, match='population cannot change on resume'): + BumpsDreamMinimizer._validate_dream_resume( + state=state, + saved_names=['a', 'b'], + names=['a', 'b'], + pop_scale=4, + n_parameters=2, + ) + + +def test_state_generations_divides_total_draws_by_population(): + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + + state = _build_dream_state(n_var=2, n_pop=6, n_gen=8, labels=['a', 'b']) + + generations = BumpsDreamMinimizer._state_generations( + state=state, + pop_scale=3, + n_parameters=2, + ) + + assert generations == 8 + + +def test_prepare_dream_resume_builds_ring_buffer_overrides(tmp_path): + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + from easydiffraction.analysis.minimizers.bumps_dream import _write_dream_state_sidecar + + sidecar_path = tmp_path / 'mcmc.h5' + state = _build_dream_state(n_var=2, n_pop=6, n_gen=8, labels=['a', 'b']) + _write_dream_state_sidecar(sidecar_path, state, ['a', 'b']) + + minimizer = BumpsDreamMinimizer() + minimizer._sidecar_path = sidecar_path + minimizer.pop = 3 + + overrides, fit_state = minimizer._prepare_dream_resume( + kwargs={'parameter_names': ['a', 'b']}, + extra_steps=5, + ) + + assert overrides == { + 'steps_override': 13, + 'burn_override': 0, + 'samples_override': 13 * 3 * 2, + 'pop_override': 3, + } + # bumps mutates state in place, so resume must pass a deep copy. + assert fit_state is not state + np.testing.assert_allclose(fit_state.draw().points, state.draw().points) + + +@pytest.mark.parametrize('extra_steps', [0, -1, 1.5, True]) +def test_prepare_dream_resume_rejects_non_positive_extra_steps(tmp_path, extra_steps): + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + + minimizer = BumpsDreamMinimizer() + minimizer._sidecar_path = tmp_path / 'mcmc.h5' + + with pytest.raises(ValueError, match='positive integer extra_steps'): + minimizer._prepare_dream_resume( + kwargs={'parameter_names': ['a', 'b']}, + extra_steps=extra_steps, + ) + + +def test_prepare_dream_resume_requires_sidecar_path(): + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + + minimizer = BumpsDreamMinimizer() + assert minimizer._sidecar_path is None + + with pytest.raises(ValueError, match='requires a saved project'): + minimizer._prepare_dream_resume( + kwargs={'parameter_names': ['a', 'b']}, + extra_steps=5, + ) + + +def test_prepare_dream_resume_requires_existing_chain(tmp_path): + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + + minimizer = BumpsDreamMinimizer() + minimizer._sidecar_path = tmp_path / 'mcmc.h5' + + with pytest.raises(ValueError, match='No saved bumps-dream chain to resume'): + minimizer._prepare_dream_resume( + kwargs={'parameter_names': ['a', 'b']}, + extra_steps=5, + ) + + +def test_chains_alias_shares_storage_with_pop(): + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + + minimizer = BumpsDreamMinimizer() + minimizer.chains = 7 + assert minimizer.pop == 7 + assert minimizer.chains == 7 + + minimizer.pop = 2 + assert minimizer.chains == 2 diff --git a/tests/unit/easydiffraction/analysis/test_analysis.py b/tests/unit/easydiffraction/analysis/test_analysis.py index 39b3ee65f..d9c00a1a6 100644 --- a/tests/unit/easydiffraction/analysis/test_analysis.py +++ b/tests/unit/easydiffraction/analysis/test_analysis.py @@ -544,21 +544,40 @@ def test_fit_resume_preserves_explicit_extra_steps(monkeypatch, tmp_path): assert captured == {'resume': True, 'extra_steps': 10} -def test_fit_resume_missing_sidecar_warns_and_starts_fresh( +def test_fit_resume_missing_sidecar_raises( monkeypatch, tmp_path, ): - from easydiffraction.analysis import analysis as analysis_mod + import pytest + from easydiffraction.analysis.analysis import Analysis analysis = Analysis(project=_make_project_with_names(['e1'])) analysis.project.verbosity = SimpleNamespace(fit=SimpleNamespace(value='silent')) analysis.project.metadata = SimpleNamespace(path=tmp_path) analysis.minimizer.type = 'emcee' + + monkeypatch.setattr( + analysis, + '_run_single', + lambda **kwargs: None, + ) + + with pytest.raises(ValueError, match='no saved.*resumable chain'): + analysis.fit(resume=True) + + +def test_dream_fit_resume_defaults_extra_steps_to_sampling_steps(monkeypatch, tmp_path): + from easydiffraction.analysis.analysis import Analysis + + analysis = Analysis(project=_make_project_with_names(['e1'])) + analysis.project.verbosity = SimpleNamespace(fit=SimpleNamespace(value='silent')) + analysis.project.metadata = SimpleNamespace(path=tmp_path) + analysis.minimizer.type = 'bumps (dream)' + analysis.minimizer.sampling_steps = 77 captured: dict[str, object] = {} - warnings: list[str] = [] - monkeypatch.setattr(analysis_mod.log, 'warning', warnings.append) + monkeypatch.setattr(analysis, '_has_resumable_dream_sidecar', lambda: True) monkeypatch.setattr( analysis, '_run_single', @@ -567,8 +586,62 @@ def test_fit_resume_missing_sidecar_warns_and_starts_fresh( analysis.fit(resume=True) - assert captured == {'resume': False, 'extra_steps': None} - assert any('no saved emcee chain' in message for message in warnings) + assert captured == {'resume': True, 'extra_steps': 77} + + +def test_dream_fit_resume_missing_sidecar_raises(monkeypatch, tmp_path): + import pytest + + from easydiffraction.analysis.analysis import Analysis + + analysis = Analysis(project=_make_project_with_names(['e1'])) + analysis.project.verbosity = SimpleNamespace(fit=SimpleNamespace(value='silent')) + analysis.project.metadata = SimpleNamespace(path=tmp_path) + analysis.minimizer.type = 'bumps (dream)' + + monkeypatch.setattr(analysis, '_has_resumable_dream_sidecar', lambda: False) + monkeypatch.setattr(analysis, '_run_single', lambda **kwargs: None) + + with pytest.raises(ValueError, match='no saved.*resumable chain'): + analysis.fit(resume=True) + + +def test_has_resumable_dream_sidecar_detects_state_group(tmp_path): + import h5py + + from easydiffraction.analysis.analysis import Analysis + from easydiffraction.analysis.minimizers.bumps_dream import DREAM_STATE_GROUP + + analysis = Analysis(project=_make_project_with_names([])) + analysis.project.metadata = SimpleNamespace(path=tmp_path) + analysis.minimizer.type = 'bumps (dream)' + + # No sidecar file yet. + assert analysis._has_resumable_dream_sidecar() is False + + analysis_dir = tmp_path / 'analysis' + analysis_dir.mkdir(parents=True) + sidecar_path = analysis_dir / 'mcmc.h5' + + # Sidecar without the dream_state group. + with h5py.File(sidecar_path, 'w') as handle: + handle.create_group('posterior') + assert analysis._has_resumable_dream_sidecar() is False + + # Sidecar with the dream_state group. + with h5py.File(sidecar_path, 'a') as handle: + handle.create_group(DREAM_STATE_GROUP) + assert analysis._has_resumable_dream_sidecar() is True + + +def test_default_resume_extra_steps_reads_sampling_steps(): + from easydiffraction.analysis.analysis import Analysis + + analysis = Analysis(project=_make_project_with_names([])) + analysis.minimizer.type = 'bumps (dream)' + analysis.minimizer.sampling_steps = 42 + + assert analysis._default_resume_extra_steps() == 42 def test_fitting_mode_type_invalid_assignment_raises_and_preserves_state(): diff --git a/tests/unit/easydiffraction/io/test_results_sidecar.py b/tests/unit/easydiffraction/io/test_results_sidecar.py index d652315f6..b56bcda6c 100644 --- a/tests/unit/easydiffraction/io/test_results_sidecar.py +++ b/tests/unit/easydiffraction/io/test_results_sidecar.py @@ -188,6 +188,57 @@ def test_write_analysis_results_sidecar_preserves_emcee_chain_group(tmp_path): assert handle[EMCEE_CHAIN_GROUP].attrs['iteration'] == 7 +def test_write_analysis_results_sidecar_preserves_dream_state_group(tmp_path): + from easydiffraction.analysis.minimizers.bumps_dream import DREAM_STATE_GROUP + from easydiffraction.io import results_sidecar as results_sidecar_mod + + analysis_dir = Path(tmp_path) / 'analysis' + analysis = _analysis_with_sidecar_payload() + results_sidecar_mod.write_analysis_results_sidecar( + analysis=analysis, + analysis_dir=analysis_dir, + ) + + import h5py + + with h5py.File(analysis_dir / 'mcmc.h5', 'a') as handle: + state = handle.require_group(DREAM_STATE_GROUP) + state.attrs['generations'] = 11 + + results_sidecar_mod.write_analysis_results_sidecar( + analysis=analysis, + analysis_dir=analysis_dir, + ) + + with h5py.File(analysis_dir / 'mcmc.h5', 'r') as handle: + assert handle[DREAM_STATE_GROUP].attrs['generations'] == 11 + + +def test_prepare_for_new_fit_clears_all_raw_state_groups(tmp_path): + from easydiffraction.analysis.minimizers.bumps_dream import DREAM_STATE_GROUP + from easydiffraction.analysis.minimizers.emcee import EMCEE_CHAIN_GROUP + from easydiffraction.io import results_sidecar as results_sidecar_mod + + analysis_dir = Path(tmp_path) / 'analysis' + analysis_dir.mkdir(parents=True) + sidecar_path = analysis_dir / 'mcmc.h5' + + import h5py + + # A fresh fit must wipe every engine's raw sampler-state group, not + # just the active one, so a stale chain can never be resumed. + with h5py.File(sidecar_path, 'w') as handle: + handle.create_group(EMCEE_CHAIN_GROUP) + handle.create_group(DREAM_STATE_GROUP) + handle.create_group('posterior') + + results_sidecar_mod.prepare_analysis_results_sidecar_for_new_fit( + analysis_dir=analysis_dir, + ) + + assert not sidecar_path.is_file() + + def test_should_use_sidecar_compares_to_fit_result_kind_enum(): """`_should_use_sidecar` must read from `FitResultKindEnum`, not a literal.""" from easydiffraction.analysis.enums import FitResultKindEnum From b6fa70584b89f45a67a5cf564b535be3addb8ea8 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 20:09:14 +0200 Subject: [PATCH 18/33] Add integration test for DREAM resume parity --- .../fitting/test_bumps_dream_resume.py | 157 ++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 tests/integration/fitting/test_bumps_dream_resume.py diff --git a/tests/integration/fitting/test_bumps_dream_resume.py b/tests/integration/fitting/test_bumps_dream_resume.py new file mode 100644 index 000000000..be6a33743 --- /dev/null +++ b/tests/integration/fitting/test_bumps_dream_resume.py @@ -0,0 +1,157 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Integration checks for bumps-DREAM resume (extend a saved chain).""" + +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np + +from easydiffraction.utils.enums import VerbosityEnum + + +@dataclass +class ToyParameter: + """Minimal parameter object accepted by the Bayesian engines.""" + + unique_name: str + value: float + fit_min: float + fit_max: float + uncertainty: float | None = None + + @property + def name(self) -> str: + """Return the display name used in posterior summaries.""" + return self.unique_name + + @property + def _minimizer_uid(self) -> str: + """Return the BUMPS parameter identifier.""" + return self.unique_name + + def _set_value_from_minimizer(self, value: float) -> None: + """Store a value committed by the minimizer.""" + self.value = value + + def _physical_lower_bound(self) -> float: + """Return the lower physical limit for warning checks.""" + return -np.inf + + def _physical_upper_bound(self) -> float: + """Return the upper physical limit for warning checks.""" + return np.inf + + +def _toy_parameters() -> list[ToyParameter]: + return [ + ToyParameter(unique_name='x', value=0.0, fit_min=-4.0, fit_max=4.0), + ToyParameter(unique_name='y', value=0.0, fit_min=-4.0, fit_max=4.0), + ] + + +def _array_residuals(values: np.ndarray) -> np.ndarray: + target = np.asarray([1.2, -0.7], dtype=float) + sigma = np.asarray([0.25, 0.35], dtype=float) + return (np.asarray(values, dtype=float) - target) / sigma + + +def _posterior_medians(results: object) -> np.ndarray: + return np.asarray( + [summary.median for summary in results.posterior_parameter_summaries], + dtype=float, + ) + + +def _build_dream(sidecar_path: object) -> object: + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + + dream = BumpsDreamMinimizer() + dream.steps = 80 + dream.burn = 20 + dream.thin = 1 + dream.pop = 4 + dream.parallel = 1 + dream._sidecar_path = sidecar_path + return dream + + +def test_dream_resume_grows_chain_and_matches_longer_run(tmp_path): + from easydiffraction.analysis.minimizers.base import MinimizerFitOptions + from easydiffraction.analysis.minimizers.bumps_dream import DREAM_STATE_GROUP + + sidecar_path = tmp_path / 'analysis' / 'mcmc.h5' + + dream = _build_dream(sidecar_path) + fresh_results = dream.fit( + _toy_parameters(), + _array_residuals, + verbosity=VerbosityEnum.SILENT, + options=MinimizerFitOptions(random_seed=123), + ) + + # The fresh fit persists a resumable DREAM state in the sidecar. + assert fresh_results.success is True + assert fresh_results.posterior_samples.parameter_samples.shape == (80, 8, 2) + assert sidecar_path.is_file() + + import h5py + + with h5py.File(sidecar_path, 'r') as handle: + assert DREAM_STATE_GROUP in handle + + resumed_results = dream.fit( + _toy_parameters(), + _array_residuals, + verbosity=VerbosityEnum.SILENT, + options=MinimizerFitOptions(random_seed=123, resume=True, extra_steps=20), + ) + + # Resume extends the saved chain by exactly extra_steps generations. + assert resumed_results.success is True + assert resumed_results.posterior_samples.parameter_samples.shape == (100, 8, 2) + + # A single 100-generation run reaches a comparable posterior. + longer = _build_dream(tmp_path / 'longer' / 'mcmc.h5') + longer.steps = 100 + longer_results = longer.fit( + _toy_parameters(), + _array_residuals, + verbosity=VerbosityEnum.SILENT, + options=MinimizerFitOptions(random_seed=123), + ) + + np.testing.assert_allclose( + _posterior_medians(resumed_results), + _posterior_medians(longer_results), + atol=0.35, + ) + + +def test_dream_resume_rejects_population_change(tmp_path): + import pytest + + from easydiffraction.analysis.minimizers.base import MinimizerFitOptions + + sidecar_path = tmp_path / 'analysis' / 'mcmc.h5' + + dream = _build_dream(sidecar_path) + dream.fit( + _toy_parameters(), + _array_residuals, + verbosity=VerbosityEnum.SILENT, + options=MinimizerFitOptions(random_seed=123), + ) + + # Changing the population scale cannot be honoured on resume because + # bumps resumes positionally into a fixed chain count. + dream.pop = 6 + + with pytest.raises(ValueError, match='population cannot change on resume'): + dream.fit( + _toy_parameters(), + _array_residuals, + verbosity=VerbosityEnum.SILENT, + options=MinimizerFitOptions(random_seed=123, resume=True, extra_steps=20), + ) From 251596ff519cca3d1769a328ab96e5abf6cf4972 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 20:10:34 +0200 Subject: [PATCH 19/33] Use raw-string regex in resume match assertions --- tests/unit/easydiffraction/analysis/test_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/easydiffraction/analysis/test_analysis.py b/tests/unit/easydiffraction/analysis/test_analysis.py index d9c00a1a6..91c406d5c 100644 --- a/tests/unit/easydiffraction/analysis/test_analysis.py +++ b/tests/unit/easydiffraction/analysis/test_analysis.py @@ -563,7 +563,7 @@ def test_fit_resume_missing_sidecar_raises( lambda **kwargs: None, ) - with pytest.raises(ValueError, match='no saved.*resumable chain'): + with pytest.raises(ValueError, match=r'no saved.*resumable chain'): analysis.fit(resume=True) @@ -602,7 +602,7 @@ def test_dream_fit_resume_missing_sidecar_raises(monkeypatch, tmp_path): monkeypatch.setattr(analysis, '_has_resumable_dream_sidecar', lambda: False) monkeypatch.setattr(analysis, '_run_single', lambda **kwargs: None) - with pytest.raises(ValueError, match='no saved.*resumable chain'): + with pytest.raises(ValueError, match=r'no saved.*resumable chain'): analysis.fit(resume=True) From 696eb885df474e2ad7cbd6f7abdee0ecc062dfb3 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 20:12:52 +0200 Subject: [PATCH 20/33] Update resume validation test for MCMC minimizers --- .../easydiffraction/analysis/test_analysis_coverage.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/unit/easydiffraction/analysis/test_analysis_coverage.py b/tests/unit/easydiffraction/analysis/test_analysis_coverage.py index 143e2f310..7984fd0d0 100644 --- a/tests/unit/easydiffraction/analysis/test_analysis_coverage.py +++ b/tests/unit/easydiffraction/analysis/test_analysis_coverage.py @@ -1053,14 +1053,17 @@ def test_validate_fit_request_resume_requires_single_mode(self): with pytest.raises(ValueError, match='single fit mode only'): a._validate_fit_request(mode=FitModeEnum.JOINT, resume=True, extra_steps=None) - def test_validate_fit_request_resume_requires_emcee(self): + def test_validate_fit_request_resume_requires_mcmc_minimizer(self): import pytest from easydiffraction.analysis.analysis import Analysis from easydiffraction.analysis.enums import FitModeEnum a = Analysis(project=_make_project()) # default lmfit minimizer - with pytest.raises(ValueError, match=r"analysis.minimizer.type = 'emcee'"): + with pytest.raises( + ValueError, + match=r"Resume is supported only for MCMC minimizers", + ): a._validate_fit_request(mode=FitModeEnum.SINGLE, resume=True, extra_steps=None) From 347b860a425bbe0d61bfe7e1e452b85c5b136a6d Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 20:18:51 +0200 Subject: [PATCH 21/33] Update DREAM support tests for resume-aware driver signatures --- tests/integration/fitting/test_bumps_dream_support.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/fitting/test_bumps_dream_support.py b/tests/integration/fitting/test_bumps_dream_support.py index 0a72e42c4..1d6864967 100644 --- a/tests/integration/fitting/test_bumps_dream_support.py +++ b/tests/integration/fitting/test_bumps_dream_support.py @@ -547,7 +547,7 @@ def test_build_driver_stops_mapper_when_driver_clip_fails(): steps=10, burn=2, init=minimizer.init, - sampler_settings={'samples': 40}, + sampler_settings={'samples': 40, 'pop': 4}, n_parameters=1, ) @@ -588,12 +588,12 @@ def test_run_solver_failure_paths_return_failure_results(monkeypatch): monkeypatch.setattr( minimizer, '_prepare_run_context', - lambda *, objective_function, kwargs: context, + lambda *, objective_function, kwargs, **overrides: context, ) monkeypatch.setattr( minimizer, '_execute_driver', - lambda *, driver, random_seed: _DreamDriverResult( + lambda *, driver, random_seed, fit_state=None: _DreamDriverResult( best_values=None, best_nllf=None, raw_state='state', @@ -610,7 +610,7 @@ def test_run_solver_failure_paths_return_failure_results(monkeypatch): monkeypatch.setattr( minimizer, '_execute_driver', - lambda *, driver, random_seed: _DreamDriverResult( + lambda *, driver, random_seed, fit_state=None: _DreamDriverResult( best_values=np.array([1.0]), best_nllf=0.5, raw_state=None, From 42922d8ab80484d96f8c38602b5eee8a0c837aec Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 21:03:53 +0200 Subject: [PATCH 22/33] Preserve raw sampler state across project save_as --- .../bayesian-resume-and-mcmc-sidecar.md | 10 +++ src/easydiffraction/io/results_sidecar.py | 52 ++++++++++++++++ src/easydiffraction/project/project.py | 14 +++++ .../io/test_results_sidecar.py | 62 +++++++++++++++++++ 4 files changed, 138 insertions(+) diff --git a/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md index af64de076..7e58916b8 100644 --- a/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md @@ -128,6 +128,16 @@ malformed active-engine group is a clear error; otherwise it is ignored and the fit starts fresh. `undo_fit` clears the raw-state group(s) the same way it already clears the sidecar. +**Relocating a saved project (`save_as`).** `project.save()` rebuilds +the derived sidecar arrays from memory but cannot reconstruct the raw +sampler state, which only ever exists on disk. Relocating a project with +`save_as` therefore copies the raw-state groups (`emcee_chain`, +`dream_state`) from the source sidecar into the destination before the +derived arrays are rewritten. Without this, a resume after +`load` + `save_as` — the flow both Bayesian resume tutorials use — would +find no chain to extend. This makes resume genuinely survive a +load/relocate round-trip for both engines, as required above. + ### 3. Rename the sidecar `results.h5` → `mcmc.h5` The sidecar is renamed to reflect its content. It remains **one file per diff --git a/src/easydiffraction/io/results_sidecar.py b/src/easydiffraction/io/results_sidecar.py index c76059c55..c3995f4b3 100644 --- a/src/easydiffraction/io/results_sidecar.py +++ b/src/easydiffraction/io/results_sidecar.py @@ -28,6 +28,13 @@ 'pair_cache', 'predictive', ) +# Raw, resumable sampler-state groups written per engine (emcee's live +# HDF backend, DREAM's MCMCDraw dump). They are not rebuilt from memory +# on save, so relocating a project must copy them across explicitly. +_RAW_SAMPLER_STATE_GROUPS = ( + 'emcee_chain', + 'dream_state', +) _POSTERIOR_SAMPLE_NDIM = 3 @@ -71,6 +78,51 @@ def _warn_existing_sidecar_overwrite(sidecar_path: Path) -> None: ) +def carry_over_raw_sampler_state( + *, + source_analysis_dir: Path, + destination_analysis_dir: Path, +) -> None: + """ + Copy raw sampler-state groups into a relocated project's sidecar. + + A project ``save_as`` rebuilds the derived sidecar arrays from + memory but cannot reconstruct the raw, resumable sampler state + (``emcee_chain`` / ``dream_state``). This copies those groups from + the source sidecar into the destination so a resume after load + + ``save_as`` still finds the chain to extend. No-op when the source + sidecar or its raw-state groups are absent. + + Parameters + ---------- + source_analysis_dir : Path + The ``analysis/`` directory of the previously saved project. + destination_analysis_dir : Path + The ``analysis/`` directory of the relocated project. + """ + source_path = _sidecar_path(analysis_dir=source_analysis_dir) + if not source_path.is_file(): + return + + import h5py # noqa: PLC0415 + + with h5py.File(source_path, 'r') as source_handle: + present_groups = [ + group_name + for group_name in _RAW_SAMPLER_STATE_GROUPS + if group_name in source_handle + ] + if not present_groups: + return + + destination_analysis_dir.mkdir(parents=True, exist_ok=True) + destination_path = _sidecar_path(analysis_dir=destination_analysis_dir) + with h5py.File(destination_path, 'a') as destination_handle: + for group_name in present_groups: + _delete_group_if_present(destination_handle, group_name) + source_handle.copy(group_name, destination_handle, name=group_name) + + def prepare_analysis_results_sidecar_for_new_fit(*, analysis_dir: Path) -> None: """Warn and remove the results sidecar before a fresh fit starts.""" sidecar_path = _sidecar_path(analysis_dir=analysis_dir) diff --git a/src/easydiffraction/project/project.py b/src/easydiffraction/project/project.py index e27036599..04044b115 100644 --- a/src/easydiffraction/project/project.py +++ b/src/easydiffraction/project/project.py @@ -24,6 +24,8 @@ from easydiffraction.io.cif.serialize import project_to_cif from easydiffraction.io.edi import edi_body_from_text from easydiffraction.io.edi import section_to_edi +from easydiffraction.io.results_sidecar import SIDECAR_FILE_NAME +from easydiffraction.io.results_sidecar import carry_over_raw_sampler_state from easydiffraction.io.results_sidecar import read_analysis_results_sidecar from easydiffraction.io.results_sidecar import write_analysis_results_sidecar from easydiffraction.project.display import ProjectDisplay @@ -661,6 +663,8 @@ def save_as( else: project_dir = resolve_artifact_path(dir_path) + previous_path = self.metadata.path + if overwrite and project_dir.is_dir(): current_working_directory = pathlib.Path.cwd().resolve() resolved_project_dir = project_dir.resolve() @@ -674,6 +678,16 @@ def save_as( shutil.rmtree(project_dir) self.metadata.path = project_dir + # Relocating a saved Bayesian project must preserve the raw, + # resumable sampler-state groups (emcee_chain / dream_state). + # save() rewrites only the derived sidecar arrays from memory, so + # copy the raw groups across before they are rebuilt; otherwise + # resume after load + save_as would have no chain to extend. + if previous_path is not None and project_dir.resolve() != previous_path.resolve(): + carry_over_raw_sampler_state( + source_analysis_dir=previous_path / 'analysis', + destination_analysis_dir=project_dir / 'analysis', + ) self.save() def apply_params_from_csv(self, row_index: int) -> None: diff --git a/tests/unit/easydiffraction/io/test_results_sidecar.py b/tests/unit/easydiffraction/io/test_results_sidecar.py index b56bcda6c..fcdb57d1c 100644 --- a/tests/unit/easydiffraction/io/test_results_sidecar.py +++ b/tests/unit/easydiffraction/io/test_results_sidecar.py @@ -239,6 +239,68 @@ def test_prepare_for_new_fit_clears_all_raw_state_groups(tmp_path): assert not sidecar_path.is_file() +def test_carry_over_raw_sampler_state_copies_engine_groups(tmp_path): + import h5py + + from easydiffraction.io import results_sidecar as results_sidecar_mod + + source_dir = Path(tmp_path) / 'src' / 'analysis' + source_dir.mkdir(parents=True) + with h5py.File(source_dir / 'mcmc.h5', 'w') as handle: + chain = handle.create_group('emcee_chain') + chain.attrs['iteration'] = 5 + state = handle.create_group('dream_state') + state.create_dataset('param_names', data=[b'a', b'b']) + handle.create_group('posterior') # canonical: must NOT be copied + + dest_dir = Path(tmp_path) / 'dst' / 'analysis' + + results_sidecar_mod.carry_over_raw_sampler_state( + source_analysis_dir=source_dir, + destination_analysis_dir=dest_dir, + ) + + with h5py.File(dest_dir / 'mcmc.h5', 'r') as handle: + assert handle['emcee_chain'].attrs['iteration'] == 5 + assert 'dream_state' in handle + assert list(handle['dream_state']['param_names'][()]) == [b'a', b'b'] + # Canonical groups are rebuilt from memory, never carried over. + assert 'posterior' not in handle + + +def test_carry_over_raw_sampler_state_is_noop_without_source(tmp_path): + from easydiffraction.io import results_sidecar as results_sidecar_mod + + dest_dir = Path(tmp_path) / 'dst' / 'analysis' + + results_sidecar_mod.carry_over_raw_sampler_state( + source_analysis_dir=Path(tmp_path) / 'missing' / 'analysis', + destination_analysis_dir=dest_dir, + ) + + assert not (dest_dir / 'mcmc.h5').exists() + + +def test_carry_over_raw_sampler_state_is_noop_without_raw_groups(tmp_path): + import h5py + + from easydiffraction.io import results_sidecar as results_sidecar_mod + + source_dir = Path(tmp_path) / 'src' / 'analysis' + source_dir.mkdir(parents=True) + with h5py.File(source_dir / 'mcmc.h5', 'w') as handle: + handle.create_group('posterior') + + dest_dir = Path(tmp_path) / 'dst' / 'analysis' + + results_sidecar_mod.carry_over_raw_sampler_state( + source_analysis_dir=source_dir, + destination_analysis_dir=dest_dir, + ) + + assert not (dest_dir / 'mcmc.h5').exists() + + def test_should_use_sidecar_compares_to_fit_result_kind_enum(): """`_should_use_sidecar` must read from `FitResultKindEnum`, not a literal.""" from easydiffraction.analysis.enums import FitResultKindEnum From 4d17bab34942b1e452fadfaf0176f155243d3e18 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 21:11:37 +0200 Subject: [PATCH 23/33] Rename DREAM display tutorial to resume and re-pin data --- docs/dev/adrs/accepted/resource-naming.md | 60 +++---- .../bayesian-dream-display-lbco-hrpt.py | 102 ----------- ... => bayesian-dream-resume-lbco-hrpt.ipynb} | 168 +++++++++++++++--- .../bayesian-dream-resume-lbco-hrpt.py | 158 ++++++++++++++++ .../bayesian-emcee-resume-lbco-hrpt.ipynb | 6 +- .../bayesian-emcee-resume-lbco-hrpt.py | 6 +- docs/docs/tutorials/index.json | 10 +- docs/docs/tutorials/index.md | 10 +- docs/mkdocs.yml | 2 +- src/easydiffraction/_data_index_ref.txt | 2 +- 10 files changed, 349 insertions(+), 175 deletions(-) delete mode 100644 docs/docs/tutorials/bayesian-dream-display-lbco-hrpt.py rename docs/docs/tutorials/{bayesian-dream-display-lbco-hrpt.ipynb => bayesian-dream-resume-lbco-hrpt.ipynb} (59%) create mode 100644 docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.py diff --git a/docs/dev/adrs/accepted/resource-naming.md b/docs/dev/adrs/accepted/resource-naming.md index e400243bd..3a482e5ec 100644 --- a/docs/dev/adrs/accepted/resource-naming.md +++ b/docs/dev/adrs/accepted/resource-naming.md @@ -263,36 +263,36 @@ Presentation order moves to the MkDocs nav (Decision 4), so these slugs carry no sequence and can be inserted, removed, or reordered freely. There is no `ed-19` tutorial — id 19 is a dataset only. -| Old id | Title | New id | -| ------ | -------------------------------------------- | ---------------------------------- | -| ed-1 | Structure Refinement: LBCO, HRPT (from CIF) | `refine-lbco-hrpt-from-cif` | -| ed-2 | Structure Refinement: LBCO, HRPT (from data) | `refine-lbco-hrpt-from-data` | -| ed-3 | Structure Refinement: LBCO, HRPT (report) | `refine-lbco-hrpt-report` | -| ed-4 | Refinement: PbSO4, NPD+XRD | `refine-pbso4-joint` | -| ed-5 | Refinement: Co2SiO4, D20 | `refine-cosio-d20` | -| ed-6 | Refinement: HS, HRPT | `refine-hs-hrpt` | -| ed-7 | Refinement: Si, SEPD | `refine-si-sepd` | -| ed-8 | Refinement: NCAF, WISH | `refine-ncaf-wish` | -| ed-9 | Refinement: LBCO+Si, McStas | `refine-lbco-si-mcstas` | -| ed-10 | PDF: Ni, NPD | `pdf-ni-npd` | -| ed-11 | PDF: Si, NOMAD (SNS) | `pdf-si-nomad` | -| ed-12 | PDF: NaCl, XRD | `pdf-nacl-xrd` | -| ed-13 | Fitting exercise: Si, LBCO | `fitting-exercise-si-lbco` | -| ed-14 | Refinement: Tb2Ti2O7, HEiDi | `refine-tbti-heidi` | -| ed-15 | Refinement: Taurine, SENJU | `refine-taurine-senju` | -| ed-16 | Joint: Si, Bragg+PDF | `joint-si-bragg-pdf` | -| ed-17 | Refinement: Co2SiO4, D20 (T-scan) | `refine-cosio-d20-tscan` | -| ed-18 | Load Project and Fit: LBCO, HRPT | `load-and-fit-lbco-hrpt` | -| ed-20 | Instrument calibration: BEER, ESS | `calibrate-beer-ess` | -| ed-21 | Bayesian (bumps-dream): LBCO, HRPT | `bayesian-dream-lbco-hrpt` | -| ed-22 | Bayesian (emcee): Tb2Ti2O7, HEiDi | `bayesian-emcee-tbti-heidi` | -| ed-23 | Refinement: Co2SiO4 D20 (T-scan, resumed) | `refine-cosio-d20-tscan-resumed` | -| ed-24 | Bayesian Display (bumps-dream): LBCO, HRPT | `bayesian-dream-display-lbco-hrpt` | -| ed-25 | Bayesian (emcee): LBCO, HRPT | `bayesian-emcee-lbco-hrpt` | -| ed-26 | Bayesian Resume (emcee): LBCO, HRPT | `bayesian-emcee-resume-lbco-hrpt` | -| ed-27 | Calculation Without Data: LBCO, CWL | `simulate-lbco-cwl` | -| ed-28 | Calculation Without Data: Si, TOF | `simulate-si-tof` | -| ed-29 | Calculation Without Data: NaCl, X-ray | `simulate-nacl-xray` | +| Old id | Title | New id | +| ------ | -------------------------------------------- | --------------------------------- | +| ed-1 | Structure Refinement: LBCO, HRPT (from CIF) | `refine-lbco-hrpt-from-cif` | +| ed-2 | Structure Refinement: LBCO, HRPT (from data) | `refine-lbco-hrpt-from-data` | +| ed-3 | Structure Refinement: LBCO, HRPT (report) | `refine-lbco-hrpt-report` | +| ed-4 | Refinement: PbSO4, NPD+XRD | `refine-pbso4-joint` | +| ed-5 | Refinement: Co2SiO4, D20 | `refine-cosio-d20` | +| ed-6 | Refinement: HS, HRPT | `refine-hs-hrpt` | +| ed-7 | Refinement: Si, SEPD | `refine-si-sepd` | +| ed-8 | Refinement: NCAF, WISH | `refine-ncaf-wish` | +| ed-9 | Refinement: LBCO+Si, McStas | `refine-lbco-si-mcstas` | +| ed-10 | PDF: Ni, NPD | `pdf-ni-npd` | +| ed-11 | PDF: Si, NOMAD (SNS) | `pdf-si-nomad` | +| ed-12 | PDF: NaCl, XRD | `pdf-nacl-xrd` | +| ed-13 | Fitting exercise: Si, LBCO | `fitting-exercise-si-lbco` | +| ed-14 | Refinement: Tb2Ti2O7, HEiDi | `refine-tbti-heidi` | +| ed-15 | Refinement: Taurine, SENJU | `refine-taurine-senju` | +| ed-16 | Joint: Si, Bragg+PDF | `joint-si-bragg-pdf` | +| ed-17 | Refinement: Co2SiO4, D20 (T-scan) | `refine-cosio-d20-tscan` | +| ed-18 | Load Project and Fit: LBCO, HRPT | `load-and-fit-lbco-hrpt` | +| ed-20 | Instrument calibration: BEER, ESS | `calibrate-beer-ess` | +| ed-21 | Bayesian (bumps-dream): LBCO, HRPT | `bayesian-dream-lbco-hrpt` | +| ed-22 | Bayesian (emcee): Tb2Ti2O7, HEiDi | `bayesian-emcee-tbti-heidi` | +| ed-23 | Refinement: Co2SiO4 D20 (T-scan, resumed) | `refine-cosio-d20-tscan-resumed` | +| ed-24 | Bayesian Resume (bumps-dream): LBCO, HRPT | `bayesian-dream-resume-lbco-hrpt` | +| ed-25 | Bayesian (emcee): LBCO, HRPT | `bayesian-emcee-lbco-hrpt` | +| ed-26 | Bayesian Resume (emcee): LBCO, HRPT | `bayesian-emcee-resume-lbco-hrpt` | +| ed-27 | Calculation Without Data: LBCO, CWL | `simulate-lbco-cwl` | +| ed-28 | Calculation Without Data: Si, TOF | `simulate-si-tof` | +| ed-29 | Calculation Without Data: NaCl, X-ray | `simulate-nacl-xray` | ## Deferred Work diff --git a/docs/docs/tutorials/bayesian-dream-display-lbco-hrpt.py b/docs/docs/tutorials/bayesian-dream-display-lbco-hrpt.py deleted file mode 100644 index da9252694..000000000 --- a/docs/docs/tutorials/bayesian-dream-display-lbco-hrpt.py +++ /dev/null @@ -1,102 +0,0 @@ -# %% [markdown] -# # Bayesian Analysis Display (`bumps-dream`): LBCO, HRPT -# -# This tutorial shows how to reopen the Bayesian project created in -# `bayesian-dream-lbco-hrpt.py` and inspect the saved fit results without rerunning DREAM. -# -# The project already contains posterior samples together with cached -# posterior density, pair, and predictive data, so the plots below are -# restored directly from disk. - -# %% [markdown] -# ## 🛠️ Import Library - -# %% -import easydiffraction as edi - -# %% [markdown] -# ## 📂 Load Project - -# %% [markdown] -# ### Locate Project -# -# Download and extract the saved Bayesian project, with the completed -# fit, persisted posterior samples, and plot caches, from the -# EasyDiffraction data repository. - -# %% -project_dir = edi.download_data('proj-lbco-hrpt-dream', destination='projects') - -# %% [markdown] -# ### Load Project -# -# Loading restores the persisted fit state, posterior samples, and plot -# caches. No new fit is launched in this tutorial. - -# %% -project = edi.Project.load(project_dir) - -# %% [markdown] -# ## 📊 Inspect Results - -# %% [markdown] -# ### Display Structure -# -# Render the La0.5Ba0.5CoO3 structure restored from the saved project. - -# %% -project.display.structure(struct_name='lbco') - -# %% [markdown] -# ### Display Fit Results -# -# The fit summary reports the committed point estimate, sampler -# settings, convergence diagnostics, and posterior parameter summaries -# from the saved Bayesian run. - -# %% -project.display.fit.results() - -# %% [markdown] -# ### Display Correlations -# -# The correlation matrix is restored from the saved project state. - -# %% -project.display.fit.correlations() - -# %% [markdown] -# ### Display Posterior Densities -# -# The pair plot and one-dimensional posterior distributions now load -# from the persisted caches generated when the Bayesian fit was saved. - -# %% -project.display.posterior.pairs() - -# %% -project.display.posterior.distribution() - -# %% [markdown] -# ### Display Posterior Predictive -# -# The posterior predictive view reuses the cached predictive summary -# stored in the project rather than recalculating it on first display. -# It overlays the 95% credible interval propagated from the posterior -# samples. - -# %% -project.display.posterior.predictive(expt_name='hrpt') - -# %% [markdown] -# A zoomed view is useful for checking the propagated uncertainty in a -# narrow region of the diffraction pattern. - -# %% -project.display.posterior.predictive(expt_name='hrpt', x_min=92, x_max=93) - -# %% [markdown] -# ## 💾 Save Project - -# %% -project.save_as(dir_path='projects/bayesian-dream-display-lbco-hrpt') diff --git a/docs/docs/tutorials/bayesian-dream-display-lbco-hrpt.ipynb b/docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.ipynb similarity index 59% rename from docs/docs/tutorials/bayesian-dream-display-lbco-hrpt.ipynb rename to docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.ipynb index 81676c79b..67d53fa22 100644 --- a/docs/docs/tutorials/bayesian-dream-display-lbco-hrpt.ipynb +++ b/docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.ipynb @@ -24,14 +24,31 @@ "id": "1", "metadata": {}, "source": [ - "# Bayesian Analysis Display (`bumps-dream`): LBCO, HRPT\n", + "# Bayesian Analysis Resume (`bumps-dream`): LBCO, HRPT\n", "\n", - "This tutorial shows how to reopen the Bayesian project created in\n", - "`bayesian-dream-lbco-hrpt.py` and inspect the saved fit results without rerunning DREAM.\n", + "This tutorial shows how to reopen the Bayesian project created previously,\n", + "inspect the saved fit results and then run more sampling steps to\n", + "extend the existing chain. Both BUMPS-DREAM and emcee support saving\n", + "and resuming their sampler state, so the same workflow applies to\n", + "either engine.\n", "\n", - "The project already contains posterior samples together with cached\n", - "posterior density, pair, and predictive data, so the plots below are\n", - "restored directly from disk." + "This workflow is useful when:\n", + "- the initial sampling run has not yet converged and more steps are needed,\n", + "- the initial sampling run has converged but more steps are desired\n", + " for better posterior resolution,\n", + "- the initial sampling run has converged but the posterior plots have\n", + " not yet been inspected and the user wants to see the plots before\n", + " deciding whether to run more steps.\n", + "\n", + "The workflow uses the same La0.5Ba0.5CoO3 powder diffraction example\n", + "as the DREAM Bayesian tutorial:\n", + "\n", + "- run a short local refinement,\n", + "- derive finite fit bounds for the sampled parameters,\n", + "- switch to DREAM and sample the posterior,\n", + "- save the project with the DREAM sampler state,\n", + "- resume the chain with additional steps,\n", + "- inspect posterior plots after each sampling stage." ] }, { @@ -67,9 +84,9 @@ "source": [ "### Locate Project\n", "\n", - "Download and extract the saved Bayesian project, with the completed\n", - "fit, persisted posterior samples, and plot caches, from the\n", - "EasyDiffraction data repository." + "Download and extract the saved DREAM project, with the persisted\n", + "sampler state and posterior caches, from the EasyDiffraction data\n", + "repository." ] }, { @@ -107,13 +124,32 @@ "cell_type": "markdown", "id": "9", "metadata": {}, + "source": [ + "Re-save the project to a fresh working directory so resuming the\n", + "chain below writes there instead of the bundled read-only copy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "project.save_as(dir_path='projects/bayesian-dream-resume-lbco-hrpt')" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, "source": [ "## 📊 Inspect Results" ] }, { "cell_type": "markdown", - "id": "10", + "id": "12", "metadata": {}, "source": [ "### Display Structure\n", @@ -124,7 +160,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -133,7 +169,7 @@ }, { "cell_type": "markdown", - "id": "12", + "id": "14", "metadata": {}, "source": [ "### Display Fit Results\n", @@ -146,7 +182,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -155,7 +191,7 @@ }, { "cell_type": "markdown", - "id": "14", + "id": "16", "metadata": {}, "source": [ "### Display Correlations\n", @@ -166,7 +202,7 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -175,7 +211,7 @@ }, { "cell_type": "markdown", - "id": "16", + "id": "18", "metadata": {}, "source": [ "### Display Posterior Densities\n", @@ -187,7 +223,7 @@ { "cell_type": "code", "execution_count": null, - "id": "17", + "id": "19", "metadata": {}, "outputs": [], "source": [ @@ -197,7 +233,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18", + "id": "20", "metadata": {}, "outputs": [], "source": [ @@ -206,7 +242,7 @@ }, { "cell_type": "markdown", - "id": "19", + "id": "21", "metadata": {}, "source": [ "### Display Posterior Predictive\n", @@ -220,7 +256,7 @@ { "cell_type": "code", "execution_count": null, - "id": "20", + "id": "22", "metadata": {}, "outputs": [], "source": [ @@ -229,7 +265,7 @@ }, { "cell_type": "markdown", - "id": "21", + "id": "23", "metadata": {}, "source": [ "A zoomed view is useful for checking the propagated uncertainty in a\n", @@ -239,7 +275,7 @@ { "cell_type": "code", "execution_count": null, - "id": "22", + "id": "24", "metadata": {}, "outputs": [], "source": [ @@ -248,7 +284,89 @@ }, { "cell_type": "markdown", - "id": "23", + "id": "25", + "metadata": {}, + "source": [ + "## 🎲 Resume Sampling" + ] + }, + { + "cell_type": "markdown", + "id": "26", + "metadata": {}, + "source": [ + "### Run Sampling\n", + "\n", + "Resume from the saved DREAM state and append 100 more generations to\n", + "the existing chain. We use only 100 steps here to keep the tutorial\n", + "fast, but in practice you would typically run more steps to ensure\n", + "convergence and better posterior resolution." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27", + "metadata": {}, + "outputs": [], + "source": [ + "project.analysis.minimizer.random_seed = 42 # fixed seed for reproducible output\n", + "project.analysis.fit(resume=True, extra_steps=100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28", + "metadata": {}, + "outputs": [], + "source": [ + "project.display.fit.results()" + ] + }, + { + "cell_type": "markdown", + "id": "29", + "metadata": {}, + "source": [ + "### Display Resumed Posterior\n", + "\n", + "After resume, the posterior plots use the extended chain." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30", + "metadata": {}, + "outputs": [], + "source": [ + "project.display.posterior.pairs()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31", + "metadata": {}, + "outputs": [], + "source": [ + "project.display.posterior.distribution()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32", + "metadata": {}, + "outputs": [], + "source": [ + "project.display.posterior.predictive(expt_name='hrpt', x_min=92, x_max=93)" + ] + }, + { + "cell_type": "markdown", + "id": "33", "metadata": {}, "source": [ "## 💾 Save Project" @@ -257,11 +375,11 @@ { "cell_type": "code", "execution_count": null, - "id": "24", + "id": "34", "metadata": {}, "outputs": [], "source": [ - "project.save_as(dir_path='projects/bayesian-dream-display-lbco-hrpt')" + "project.save_as(dir_path='projects/bayesian-dream-resume-lbco-hrpt')" ] } ], diff --git a/docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.py b/docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.py new file mode 100644 index 000000000..8b3bc2184 --- /dev/null +++ b/docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.py @@ -0,0 +1,158 @@ +# %% [markdown] +# # Bayesian Analysis Resume (`bumps-dream`): LBCO, HRPT +# +# This tutorial shows how to reopen the Bayesian project created previously, +# inspect the saved fit results and then run more sampling steps to +# extend the existing chain. Both BUMPS-DREAM and emcee support saving +# and resuming their sampler state, so the same workflow applies to +# either engine. +# +# This workflow is useful when: +# - the initial sampling run has not yet converged and more steps are needed, +# - the initial sampling run has converged but more steps are desired +# for better posterior resolution, +# - the initial sampling run has converged but the posterior plots have +# not yet been inspected and the user wants to see the plots before +# deciding whether to run more steps. +# +# The workflow uses the same La0.5Ba0.5CoO3 powder diffraction example +# as the DREAM Bayesian tutorial: +# +# - run a short local refinement, +# - derive finite fit bounds for the sampled parameters, +# - switch to DREAM and sample the posterior, +# - save the project with the DREAM sampler state, +# - resume the chain with additional steps, +# - inspect posterior plots after each sampling stage. + +# %% [markdown] +# ## 🛠️ Import Library + +# %% +import easydiffraction as edi + +# %% [markdown] +# ## 📂 Load Project + +# %% [markdown] +# ### Locate Project +# +# Download and extract the saved DREAM project, with the persisted +# sampler state and posterior caches, from the EasyDiffraction data +# repository. + +# %% +project_dir = edi.download_data('proj-lbco-hrpt-dream', destination='projects') + +# %% [markdown] +# ### Load Project +# +# Loading restores the persisted fit state, posterior samples, and plot +# caches. No new fit is launched in this tutorial. + +# %% +project = edi.Project.load(project_dir) + +# %% [markdown] +# Re-save the project to a fresh working directory so resuming the +# chain below writes there instead of the bundled read-only copy. + +# %% +project.save_as(dir_path='projects/bayesian-dream-resume-lbco-hrpt') + +# %% [markdown] +# ## 📊 Inspect Results + +# %% [markdown] +# ### Display Structure +# +# Render the La0.5Ba0.5CoO3 structure restored from the saved project. + +# %% +project.display.structure(struct_name='lbco') + +# %% [markdown] +# ### Display Fit Results +# +# The fit summary reports the committed point estimate, sampler +# settings, convergence diagnostics, and posterior parameter summaries +# from the saved Bayesian run. + +# %% +project.display.fit.results() + +# %% [markdown] +# ### Display Correlations +# +# The correlation matrix is restored from the saved project state. + +# %% +project.display.fit.correlations() + +# %% [markdown] +# ### Display Posterior Densities +# +# The pair plot and one-dimensional posterior distributions now load +# from the persisted caches generated when the Bayesian fit was saved. + +# %% +project.display.posterior.pairs() + +# %% +project.display.posterior.distribution() + +# %% [markdown] +# ### Display Posterior Predictive +# +# The posterior predictive view reuses the cached predictive summary +# stored in the project rather than recalculating it on first display. +# It overlays the 95% credible interval propagated from the posterior +# samples. + +# %% +project.display.posterior.predictive(expt_name='hrpt') + +# %% [markdown] +# A zoomed view is useful for checking the propagated uncertainty in a +# narrow region of the diffraction pattern. + +# %% +project.display.posterior.predictive(expt_name='hrpt', x_min=92, x_max=93) + +# %% [markdown] +# ## 🎲 Resume Sampling + +# %% [markdown] +# ### Run Sampling +# +# Resume from the saved DREAM state and append 100 more generations to +# the existing chain. We use only 100 steps here to keep the tutorial +# fast, but in practice you would typically run more steps to ensure +# convergence and better posterior resolution. + +# %% +project.analysis.minimizer.random_seed = 42 # fixed seed for reproducible output +project.analysis.fit(resume=True, extra_steps=100) + +# %% +project.display.fit.results() + +# %% [markdown] +# ### Display Resumed Posterior +# +# After resume, the posterior plots use the extended chain. + +# %% +project.display.posterior.pairs() + +# %% +project.display.posterior.distribution() + +# %% +project.display.posterior.predictive(expt_name='hrpt', x_min=92, x_max=93) + +# %% [markdown] +# ## 💾 Save Project + +# %% +project.save_as(dir_path='projects/bayesian-dream-resume-lbco-hrpt') diff --git a/docs/docs/tutorials/bayesian-emcee-resume-lbco-hrpt.ipynb b/docs/docs/tutorials/bayesian-emcee-resume-lbco-hrpt.ipynb index 916a49785..2742e015f 100644 --- a/docs/docs/tutorials/bayesian-emcee-resume-lbco-hrpt.ipynb +++ b/docs/docs/tutorials/bayesian-emcee-resume-lbco-hrpt.ipynb @@ -28,9 +28,9 @@ "\n", "This tutorial shows how to reopen the Bayesian project created previously,\n", "inspect the saved fit results and then run more sampling steps to\n", - "extend the existing chain. Resuming only works with EMCEE because the\n", - "current BUMPS-DREAM implementation does not support saving and\n", - "resuming its state.\n", + "extend the existing chain. Both emcee and BUMPS-DREAM support saving\n", + "and resuming their sampler state, so the same workflow applies to\n", + "either engine.\n", "\n", "This workflow is useful when:\n", "- the initial sampling run has not yet converged and more steps are needed,\n", diff --git a/docs/docs/tutorials/bayesian-emcee-resume-lbco-hrpt.py b/docs/docs/tutorials/bayesian-emcee-resume-lbco-hrpt.py index 006b24aae..0aed571a2 100644 --- a/docs/docs/tutorials/bayesian-emcee-resume-lbco-hrpt.py +++ b/docs/docs/tutorials/bayesian-emcee-resume-lbco-hrpt.py @@ -3,9 +3,9 @@ # # This tutorial shows how to reopen the Bayesian project created previously, # inspect the saved fit results and then run more sampling steps to -# extend the existing chain. Resuming only works with EMCEE because the -# current BUMPS-DREAM implementation does not support saving and -# resuming its state. +# extend the existing chain. Both emcee and BUMPS-DREAM support saving +# and resuming their sampler state, so the same workflow applies to +# either engine. # # This workflow is useful when: # - the initial sampling run has not yet converged and more steps are needed, diff --git a/docs/docs/tutorials/index.json b/docs/docs/tutorials/index.json index d92f0f227..e5e51f6cb 100644 --- a/docs/docs/tutorials/index.json +++ b/docs/docs/tutorials/index.json @@ -1,10 +1,10 @@ { - "bayesian-dream-display-lbco-hrpt": { + "bayesian-dream-resume-lbco-hrpt": { "order": 21, - "url": "https://easyscience.github.io/diffraction-lib/{version}/tutorials/bayesian-dream-display-lbco-hrpt/bayesian-dream-display-lbco-hrpt.ipynb", + "url": "https://easyscience.github.io/diffraction-lib/{version}/tutorials/bayesian-dream-resume-lbco-hrpt/bayesian-dream-resume-lbco-hrpt.ipynb", "original_name": "", - "title": "Bayesian Analysis Display (bumps-dream): LBCO, HRPT", - "description": "Reopen the saved bumps-DREAM Bayesian project for La0.5Ba0.5CoO3 and inspect persisted fit summaries, correlation matrix, and posterior plots without rerunning MCMC sampling", + "title": "Bayesian Analysis Resume (bumps-dream): LBCO, HRPT", + "description": "Reload a saved bumps-DREAM Bayesian project for La0.5Ba0.5CoO3, inspect the posterior, and resume MCMC sampling with additional steps", "level": "advanced" }, "bayesian-dream-lbco-hrpt": { @@ -28,7 +28,7 @@ "url": "https://easyscience.github.io/diffraction-lib/{version}/tutorials/bayesian-emcee-resume-lbco-hrpt/bayesian-emcee-resume-lbco-hrpt.ipynb", "original_name": "", "title": "Bayesian Analysis Resume (emcee): LBCO, HRPT", - "description": "Reload a saved emcee Bayesian project for La0.5Ba0.5CoO3, inspect the posterior, and resume MCMC sampling with additional steps (supported only for emcee, not bumps-DREAM)", + "description": "Reload a saved emcee Bayesian project for La0.5Ba0.5CoO3, inspect the posterior, and resume MCMC sampling with additional steps", "level": "advanced" }, "bayesian-emcee-tbti-heidi": { diff --git a/docs/docs/tutorials/index.md b/docs/docs/tutorials/index.md index 893ea99ae..2472ab2ec 100644 --- a/docs/docs/tutorials/index.md +++ b/docs/docs/tutorials/index.md @@ -50,7 +50,7 @@ The tutorials are organized into the following categories: `analysis/results.csv` after an incomplete previous run. See also under [Bayesian Analysis](#bayesian-analysis): -[LBCO Bayesian Display (`bumps-dream`)](bayesian-dream-display-lbco-hrpt.ipynb) +[LBCO Bayesian Resume (`bumps-dream`)](bayesian-dream-resume-lbco-hrpt.ipynb) and [LBCO Bayesian Resume (`emcee`)](bayesian-emcee-resume-lbco-hrpt.ipynb) — both load saved projects containing Bayesian fit state. @@ -135,11 +135,11 @@ and (MCMC) sampling with the bumps-DREAM minimizer to explore the posterior distribution of the refined parameters, providing insights into parameter uncertainties and correlations. -- [LBCO Bayesian Display (`bumps-dream`)](bayesian-dream-display-lbco-hrpt.ipynb) +- [LBCO Bayesian Resume (`bumps-dream`)](bayesian-dream-resume-lbco-hrpt.ipynb) – Shows how to reopen the saved Bayesian project produced by the LBCO - Bayesian tutorial and inspect persisted fit summaries, correlation - matrix, posterior distribution plots, and predictive checks — without - rerunning MCMC sampling. + Bayesian tutorial, inspect persisted fit summaries, correlation + matrix, posterior distribution plots, and predictive checks, and then + resume DREAM sampling with additional steps. - [LBCO Bayesian (`emcee`)](bayesian-emcee-lbco-hrpt.ipynb) – Two-stage workflow on the LBCO HRPT dataset: first a quick local refinement to obtain a point estimate and uncertainties, then full posterior diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 8bfbdeaf4..937ff5cfc 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -247,7 +247,7 @@ nav: - BEER McStas: tutorials/calibrate-beer-ess.ipynb - Bayesian Analysis: - LBCO pd bumps-dream: tutorials/bayesian-dream-lbco-hrpt.ipynb - - LBCO pd bumps-dream Display: tutorials/bayesian-dream-display-lbco-hrpt.ipynb + - LBCO pd bumps-dream Resume: tutorials/bayesian-dream-resume-lbco-hrpt.ipynb - LBCO pd emcee: tutorials/bayesian-emcee-lbco-hrpt.ipynb - LBCO pd emcee Resume: tutorials/bayesian-emcee-resume-lbco-hrpt.ipynb - Tb2TiO7 sg bumps-dream: tutorials/bayesian-emcee-tbti-heidi.ipynb diff --git a/src/easydiffraction/_data_index_ref.txt b/src/easydiffraction/_data_index_ref.txt index 8452615e6..fde03d831 100644 --- a/src/easydiffraction/_data_index_ref.txt +++ b/src/easydiffraction/_data_index_ref.txt @@ -1 +1 @@ -11bb1e4a50b36bc8e710023915244fb0c836ba3b +b56b6e3dcb9c198b7d6e85864d55eb8e1d500bf2 From c211e74833f987ff097d1b9777b906a181a6c528 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 21:11:43 +0200 Subject: [PATCH 24/33] Apply pixi run fix formatting --- .../dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md | 8 ++++---- src/easydiffraction/io/results_sidecar.py | 4 +--- src/easydiffraction/project/project.py | 5 ++--- .../easydiffraction/analysis/test_analysis_coverage.py | 2 +- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md index 7e58916b8..f8b02d0d8 100644 --- a/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/adrs/accepted/bayesian-resume-and-mcmc-sidecar.md @@ -133,10 +133,10 @@ the derived sidecar arrays from memory but cannot reconstruct the raw sampler state, which only ever exists on disk. Relocating a project with `save_as` therefore copies the raw-state groups (`emcee_chain`, `dream_state`) from the source sidecar into the destination before the -derived arrays are rewritten. Without this, a resume after -`load` + `save_as` — the flow both Bayesian resume tutorials use — would -find no chain to extend. This makes resume genuinely survive a -load/relocate round-trip for both engines, as required above. +derived arrays are rewritten. Without this, a resume after `load` + +`save_as` — the flow both Bayesian resume tutorials use — would find no +chain to extend. This makes resume genuinely survive a load/relocate +round-trip for both engines, as required above. ### 3. Rename the sidecar `results.h5` → `mcmc.h5` diff --git a/src/easydiffraction/io/results_sidecar.py b/src/easydiffraction/io/results_sidecar.py index c3995f4b3..f0e6560e5 100644 --- a/src/easydiffraction/io/results_sidecar.py +++ b/src/easydiffraction/io/results_sidecar.py @@ -108,9 +108,7 @@ def carry_over_raw_sampler_state( with h5py.File(source_path, 'r') as source_handle: present_groups = [ - group_name - for group_name in _RAW_SAMPLER_STATE_GROUPS - if group_name in source_handle + group_name for group_name in _RAW_SAMPLER_STATE_GROUPS if group_name in source_handle ] if not present_groups: return diff --git a/src/easydiffraction/project/project.py b/src/easydiffraction/project/project.py index 04044b115..469ecd7c6 100644 --- a/src/easydiffraction/project/project.py +++ b/src/easydiffraction/project/project.py @@ -24,7 +24,6 @@ from easydiffraction.io.cif.serialize import project_to_cif from easydiffraction.io.edi import edi_body_from_text from easydiffraction.io.edi import section_to_edi -from easydiffraction.io.results_sidecar import SIDECAR_FILE_NAME from easydiffraction.io.results_sidecar import carry_over_raw_sampler_state from easydiffraction.io.results_sidecar import read_analysis_results_sidecar from easydiffraction.io.results_sidecar import write_analysis_results_sidecar @@ -680,8 +679,8 @@ def save_as( self.metadata.path = project_dir # Relocating a saved Bayesian project must preserve the raw, # resumable sampler-state groups (emcee_chain / dream_state). - # save() rewrites only the derived sidecar arrays from memory, so - # copy the raw groups across before they are rebuilt; otherwise + # save() rebuilds only the derived sidecar arrays from memory, + # so copy the raw groups across before they are rebuilt; else # resume after load + save_as would have no chain to extend. if previous_path is not None and project_dir.resolve() != previous_path.resolve(): carry_over_raw_sampler_state( diff --git a/tests/unit/easydiffraction/analysis/test_analysis_coverage.py b/tests/unit/easydiffraction/analysis/test_analysis_coverage.py index 7984fd0d0..0ef1e5313 100644 --- a/tests/unit/easydiffraction/analysis/test_analysis_coverage.py +++ b/tests/unit/easydiffraction/analysis/test_analysis_coverage.py @@ -1062,7 +1062,7 @@ def test_validate_fit_request_resume_requires_mcmc_minimizer(self): a = Analysis(project=_make_project()) # default lmfit minimizer with pytest.raises( ValueError, - match=r"Resume is supported only for MCMC minimizers", + match=r'Resume is supported only for MCMC minimizers', ): a._validate_fit_request(mode=FitModeEnum.SINGLE, resume=True, extra_steps=None) From 0610b65cc786c3d55ca3ca568fa19eedcf67a00a Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 22:30:11 +0200 Subject: [PATCH 25/33] Pin data index to regenerated 10000-step Bayesian projects --- src/easydiffraction/_data_index_ref.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/easydiffraction/_data_index_ref.txt b/src/easydiffraction/_data_index_ref.txt index fde03d831..2e54c16e5 100644 --- a/src/easydiffraction/_data_index_ref.txt +++ b/src/easydiffraction/_data_index_ref.txt @@ -1 +1 @@ -b56b6e3dcb9c198b7d6e85864d55eb8e1d500bf2 +844944070519179249ea2913bd5d4e2e6a475621 From 6a895371a5eaeddc22f9feff5bfdb2ff45618316 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 22:54:41 +0200 Subject: [PATCH 26/33] Update DREAM resume tutorial baseline --- tests/tutorials/baseline.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/tutorials/baseline.json b/tests/tutorials/baseline.json index 94be4987d..5a774de42 100644 --- a/tests/tutorials/baseline.json +++ b/tests/tutorials/baseline.json @@ -1,11 +1,11 @@ { - "bayesian-dream-display-lbco-hrpt": { + "bayesian-dream-resume-lbco-hrpt": { "result_kind": "bayesian", "rtol": 0.1, "reduced_chi_square": 1.289863, "parameters": { - "lbco.cell.length_a": 3.891319, - "hrpt.linked_structure.lbco.scale": 9.126921 + "lbco.cell.length_a": 3.891321, + "hrpt.linked_structure.lbco.scale": 9.132805 } }, "bayesian-dream-lbco-hrpt": { From 999453de814a7490ecfab0215d2cce5f57005d86 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 15 Jun 2026 22:55:32 +0200 Subject: [PATCH 27/33] Record Phase 2 completion and save_as scope note in plan --- .../plans/bayesian-resume-and-mcmc-sidecar.md | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md index ccbae8162..ad705b89b 100644 --- a/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md +++ b/docs/dev/plans/bayesian-resume-and-mcmc-sidecar.md @@ -241,6 +241,32 @@ published Bayesian projects so the saved DREAM project carries a the regenerated run). `notebook-prepare` to regenerate the notebook. 4. Verify both resume tutorials execute against the new pinned data. +### Phase 2 status (completed) + +- New unit tests added for the DREAM state round-trip, resume validation + (count / order / population), `extra_steps` translation, `chains` ⇄ + `population_size` consistency, and the raw-state lifecycle; a new + integration test mirrors the emcee resume parity check. Four + pre-existing tests were updated to the resume-aware API. +- **Scope addition surfaced during verification:** `project.save_as` + rebuilds the derived sidecar arrays from memory but previously dropped + the raw sampler-state groups, so a resume after `load` + `save_as` + (the exact flow both resume tutorials use) found no chain — emcee only + appeared to work because the old code silently restarted a fresh fit. + The ADR already requires resume to survive a save/load round-trip, so + `save_as` now copies the `emcee_chain` / `dream_state` groups across + via `carry_over_raw_sampler_state` (covered by new unit tests and the + ADR §2 note). This fixes resume for both engines. +- Both published projects were regenerated at 10000 steps (seed 42), + pushed to `easyscience/diffraction`, and the pinned commit bumped to + `8449440`. The DREAM tutorial was renamed to + `bayesian-dream-resume-lbco-hrpt` (load + resume), nav / `index.md` / + `index.json` / `baseline.json` updated, and the stale "emcee only" + note removed from the emcee resume tutorial. +- `pixi run fix`, `check`, `unit-tests` (3628), `integration-tests` + (194), `script-tests` (34 passed, 8 skipped), and the tutorial-output + baseline checks (24 passed) all pass against the new pinned data. + ## Suggested Pull Request **Title:** Resume and extend Bayesian (bumps-DREAM) refinements; clearer From 5c5886b582cd1d51d43d7d6cd212ff43734eb743 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 16 Jun 2026 07:13:47 +0200 Subject: [PATCH 28/33] Preserve raw sampler state on same-path save_as --- src/easydiffraction/project/project.py | 24 ++++++--- .../project/test_project_save.py | 49 +++++++++++++++++++ 2 files changed, 66 insertions(+), 7 deletions(-) diff --git a/src/easydiffraction/project/project.py b/src/easydiffraction/project/project.py index 469ecd7c6..6b58fe36c 100644 --- a/src/easydiffraction/project/project.py +++ b/src/easydiffraction/project/project.py @@ -663,8 +663,16 @@ def save_as( project_dir = resolve_artifact_path(dir_path) previous_path = self.metadata.path + saving_in_place = ( + previous_path is not None and project_dir.resolve() == previous_path.resolve() + ) - if overwrite and project_dir.is_dir(): + # Saving in place (same path as the loaded/previous project) + # must behave like save(): never wipe the directory, or the + # existing mcmc.h5 (with the raw, resumable sampler-state groups + # that cannot be rebuilt from memory) would be lost. save() + # overwrites the derived arrays in place and keeps those groups. + if overwrite and project_dir.is_dir() and not saving_in_place: current_working_directory = pathlib.Path.cwd().resolve() resolved_project_dir = project_dir.resolve() if resolved_project_dir == current_working_directory: @@ -677,12 +685,14 @@ def save_as( shutil.rmtree(project_dir) self.metadata.path = project_dir - # Relocating a saved Bayesian project must preserve the raw, - # resumable sampler-state groups (emcee_chain / dream_state). - # save() rebuilds only the derived sidecar arrays from memory, - # so copy the raw groups across before they are rebuilt; else - # resume after load + save_as would have no chain to extend. - if previous_path is not None and project_dir.resolve() != previous_path.resolve(): + # Relocating a saved Bayesian project to a new path must keep + # the raw, resumable sampler-state groups (emcee_chain / + # dream_state). save() rebuilds only the derived sidecar arrays + # from memory, so copy the raw groups across before they are + # rebuilt; else resume after load + save_as would have no chain + # to extend. Saving in place needs no copy (save() preserves + # them). + if previous_path is not None and not saving_in_place: carry_over_raw_sampler_state( source_analysis_dir=previous_path / 'analysis', destination_analysis_dir=project_dir / 'analysis', diff --git a/tests/unit/easydiffraction/project/test_project_save.py b/tests/unit/easydiffraction/project/test_project_save.py index 93d2e5f32..5b3047a1e 100644 --- a/tests/unit/easydiffraction/project/test_project_save.py +++ b/tests/unit/easydiffraction/project/test_project_save.py @@ -117,3 +117,52 @@ def test_project_save_omits_empty_fit_state_sections(tmp_path): assert '_fit_parameter.parameter_unique_name' not in analysis_cif assert '_fit_result.result_kind' not in analysis_cif + + +def test_save_as_in_place_preserves_raw_sampler_state(tmp_path): + """save_as() to the current path keeps the resumable raw chain. + + Regression: a same-path save_as() previously wiped the directory + (and so the raw dream_state / emcee_chain groups in mcmc.h5) before + save() rebuilt only the derived arrays, breaking resume on reload. + """ + import h5py + import numpy as np + + from easydiffraction.analysis.enums import FitResultKindEnum + from easydiffraction.project.project import Project + + project = Project(name='resumable') + project.report.html = False + target = tmp_path / 'proj' + project.save_as(str(target)) + + # Make the analysis look like a saved Bayesian fit so the sidecar is + # written rather than deleted as stale. + analysis = project.analysis + analysis.minimizer.type = 'bumps (dream)' + analysis._set_has_persisted_fit_state(value=True) + analysis.fit_result._set_result_kind(FitResultKindEnum.BAYESIAN.value) + analysis._persisted_fit_state_sidecar = { + 'posterior': { + 'parameter_samples': np.zeros((2, 2, 1), dtype=float), + 'log_posterior': np.zeros((2, 2), dtype=float), + 'draw_index': np.arange(2, dtype=float), + } + } + + # Seed a raw resumable sampler-state group, as a real resume would. + sidecar_path = target / 'analysis' / 'mcmc.h5' + with h5py.File(sidecar_path, 'a') as handle: + state = handle.create_group('dream_state') + state.create_dataset( + 'param_names', + data=np.array([b'lbco.cell.length_a']), + ) + + # Save in place (same path as the loaded project). + project.save_as(str(target)) + + with h5py.File(sidecar_path, 'r') as handle: + assert 'dream_state' in handle # raw chain survives + assert 'posterior' in handle # derived arrays rebuilt From ee59fc0ac26aee20033fb286590521802f71c744 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 16 Jun 2026 09:53:10 +0200 Subject: [PATCH 29/33] Parallelize DREAM via fork pool when MPMapper falls back to serial --- .../analysis/minimizers/bumps_dream.py | 114 ++++++++++++++++-- .../fitting/test_bumps_dream_support.py | 7 ++ .../analysis/minimizers/test_bumps_dream.py | 72 +++++++++++ 3 files changed, 181 insertions(+), 12 deletions(-) diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index ea7342c19..3e567dc36 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -6,6 +6,7 @@ import math import multiprocessing +import os import random import sys from dataclasses import dataclass @@ -58,6 +59,45 @@ # resumable bumps-DREAM sampler state, alongside emcee's emcee_chain. DREAM_STATE_GROUP = 'dream_state' +# Fork-inherited problem for parallel DREAM population evaluation. +# bumps' MPMapper needs a picklable problem and an import-safe main +# module, so it silently falls back to serial for cryspy problems run +# from a notebook or script (spawn/forkserver start methods). Mirroring +# the emcee minimizer, a fork-based pool inherits the problem through +# this module global instead of pickling it, keeping evaluation +# parallel where MPMapper cannot. +_DREAM_WORKER_PROBLEM: object | None = None + + +def _set_dream_worker_problem(problem: object | None) -> None: + """Set the fork-inherited DREAM worker problem.""" + global _DREAM_WORKER_PROBLEM # noqa: PLW0603 + _DREAM_WORKER_PROBLEM = problem + + +def _dream_nllf_worker(point: object) -> float: + """Evaluate one point's negative log-likelihood in a fork worker.""" + if _DREAM_WORKER_PROBLEM is None: + msg = 'DREAM worker problem has not been initialized.' + raise RuntimeError(msg) + return _DREAM_WORKER_PROBLEM.nllf(point) + + +class _DreamForkPoolMapper: + """ + Fork-based population mapper matching the bumps mapper contract. + """ + + def __init__(self, pool: object) -> None: + """Store the fork pool used to evaluate the population.""" + self.pool = pool + + def __call__(self, points: object) -> list[float]: + """ + Return the negative log-likelihood for each population point. + """ + return self.pool.map(_dream_nllf_worker, list(points)) + def _write_dream_state_sidecar( sidecar_path: Path, @@ -1037,9 +1077,11 @@ def _build_driver( driver.clip() except KeyboardInterrupt: MPMapper.stop_mapper() + self._shutdown_fork_pool_mapper(mapper) raise except Exception: MPMapper.stop_mapper() + self._shutdown_fork_pool_mapper(mapper) raise else: return driver @@ -1049,6 +1091,56 @@ def _build_mapper(self, problem: FitProblem) -> object | None: if self.parallel == 1: return None + shared_display_handle = getattr(self.tracker, '_shared_display_handle', None) + activity_indicator = getattr(self.tracker, '_activity_indicator', None) + if shared_display_handle is not None: + self.tracker._set_shared_display_handle(None) + if activity_indicator is not None: + self.tracker._activity_indicator = None + + try: + # Prefer a fork-based pool: it inherits the (unpicklable) + # cryspy problem instead of pickling it, so it parallelizes + # where bumps' MPMapper would fall back to serial. + fork_mapper = self._build_fork_pool_mapper(problem) + if fork_mapper is not None: + return fork_mapper + return self._build_process_pool_mapper(problem) + finally: + if activity_indicator is not None: + self.tracker._activity_indicator = activity_indicator + if shared_display_handle is not None: + self.tracker._set_shared_display_handle(shared_display_handle) + + def _resolved_worker_count(self) -> int: + """ + Return the worker count for parallel population evaluation. + """ + if self.parallel > 0: + return self.parallel + return os.cpu_count() or 1 + + def _build_fork_pool_mapper(self, problem: FitProblem) -> object | None: + """ + Return a fork-pool mapper, or ``None`` when fork is unusable. + """ + if os.name == 'nt' or 'fork' not in multiprocessing.get_all_start_methods(): + return None + worker_count = self._resolved_worker_count() + if worker_count <= 1: + return None + + _set_dream_worker_problem(problem) + try: + context = multiprocessing.get_context('fork') + pool = context.Pool(worker_count) + except (OSError, ValueError, RuntimeError): + _set_dream_worker_problem(None) + return None + return _DreamForkPoolMapper(pool) + + def _build_process_pool_mapper(self, problem: FitProblem) -> object | None: + """Return a bumps MPMapper, or ``None`` to run serially.""" if self._requires_serial_mapper_for_spawn_main_module(): self._warn_after_tracking( 'DREAM parallel evaluation requires an import-safe main ' @@ -1057,13 +1149,6 @@ def _build_mapper(self, problem: FitProblem) -> object | None: ) return None - shared_display_handle = getattr(self.tracker, '_shared_display_handle', None) - activity_indicator = getattr(self.tracker, '_activity_indicator', None) - if shared_display_handle is not None: - self.tracker._set_shared_display_handle(None) - if activity_indicator is not None: - self.tracker._activity_indicator = None - try: if not can_pickle(problem): self._warn_after_tracking( @@ -1083,11 +1168,15 @@ def _build_mapper(self, problem: FitProblem) -> object | None: 'serial execution.' ) return None - finally: - if activity_indicator is not None: - self.tracker._activity_indicator = activity_indicator - if shared_display_handle is not None: - self.tracker._set_shared_display_handle(shared_display_handle) + + @staticmethod + def _shutdown_fork_pool_mapper(mapper: object | None) -> None: + """Terminate a fork-pool mapper and clear the worker problem.""" + pool = getattr(mapper, 'pool', None) + if pool is not None: + pool.terminate() + pool.join() + _set_dream_worker_problem(None) @staticmethod def _requires_serial_mapper_for_spawn_main_module() -> bool: @@ -1138,6 +1227,7 @@ def _execute_driver( ) finally: MPMapper.stop_mapper() + BumpsDreamMinimizer._shutdown_fork_pool_mapper(getattr(driver, 'mapper', None)) numpy_rng.set_state(numpy_state) random.setstate(python_state) diff --git a/tests/integration/fitting/test_bumps_dream_support.py b/tests/integration/fitting/test_bumps_dream_support.py index 1d6864967..3cb8bafac 100644 --- a/tests/integration/fitting/test_bumps_dream_support.py +++ b/tests/integration/fitting/test_bumps_dream_support.py @@ -273,6 +273,9 @@ def test_build_mapper_falls_back_for_serial_and_unpicklable(monkeypatch): warnings: list[str] = [] minimizer.parallel = 0 + # Force the process-pool fallback path (no fork pool) to exercise the + # MPMapper/serial branch. + monkeypatch.setattr(minimizer, '_build_fork_pool_mapper', lambda problem: None) _simulate_import_safe_spawn_main_module(monkeypatch) monkeypatch.setattr( 'easydiffraction.analysis.minimizers.bumps_dream.can_pickle', lambda problem: False @@ -290,6 +293,7 @@ def test_build_mapper_temporarily_clears_shared_display_handle(monkeypatch): minimizer = BumpsDreamMinimizer() minimizer.parallel = 0 + monkeypatch.setattr(minimizer, '_build_fork_pool_mapper', lambda problem: None) _simulate_import_safe_spawn_main_module(monkeypatch) handle = object() activity_indicator = object() @@ -329,6 +333,7 @@ def test_build_mapper_allows_real_can_pickle_with_live_tracker_state(monkeypatch minimizer = BumpsDreamMinimizer() minimizer.parallel = 0 + monkeypatch.setattr(minimizer, '_build_fork_pool_mapper', lambda problem: None) _simulate_import_safe_spawn_main_module(monkeypatch) bumps_params = [BumpsParameter(value=1.0, name='alpha')] @@ -353,6 +358,7 @@ def test_build_mapper_falls_back_for_spawn_bootstrap_runtime_error(monkeypatch): minimizer = BumpsDreamMinimizer() minimizer.parallel = 0 warnings: list[str] = [] + monkeypatch.setattr(minimizer, '_build_fork_pool_mapper', lambda problem: None) _simulate_import_safe_spawn_main_module(monkeypatch) monkeypatch.setattr( @@ -389,6 +395,7 @@ def test_build_mapper_falls_back_before_starting_spawn_for_direct_script(monkeyp warnings: list[str] = [] pickle_checks: list[object] = [] + monkeypatch.setattr(minimizer, '_build_fork_pool_mapper', lambda problem: None) monkeypatch.setattr( 'easydiffraction.analysis.minimizers.bumps_dream.multiprocessing.get_start_method', lambda allow_none=True: 'spawn', diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py b/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py index 91b563a1c..60fa332f7 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py @@ -645,3 +645,75 @@ def test_chains_alias_shares_storage_with_pop(): minimizer.pop = 2 assert minimizer.chains == 2 + + +def test_dream_nllf_worker_requires_initialized_problem(): + from easydiffraction.analysis.minimizers import bumps_dream as bd + + bd._set_dream_worker_problem(None) + with pytest.raises(RuntimeError, match='worker problem has not been initialized'): + bd._dream_nllf_worker(np.array([1.0])) + + problem = SimpleNamespace(nllf=lambda point: float(point[0]) * 2.0) + bd._set_dream_worker_problem(problem) + try: + assert bd._dream_nllf_worker(np.array([3.0])) == 6.0 + finally: + bd._set_dream_worker_problem(None) + + +def test_dream_fork_pool_mapper_maps_points_via_pool(): + from easydiffraction.analysis.minimizers import bumps_dream as bd + + class FakePool: + def map(self, fn, points): + return [fn(point) for point in points] + + problem = SimpleNamespace(nllf=lambda point: float(point[0])) + bd._set_dream_worker_problem(problem) + try: + mapper = bd._DreamForkPoolMapper(FakePool()) + assert mapper([np.array([1.0]), np.array([2.5])]) == [1.0, 2.5] + finally: + bd._set_dream_worker_problem(None) + + +def test_shutdown_fork_pool_mapper_terminates_and_clears_problem(): + from easydiffraction.analysis.minimizers import bumps_dream as bd + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + + events: list[str] = [] + + class FakePool: + def terminate(self): + events.append('terminate') + + def join(self): + events.append('join') + + bd._set_dream_worker_problem(object()) + mapper = bd._DreamForkPoolMapper(FakePool()) + + BumpsDreamMinimizer._shutdown_fork_pool_mapper(mapper) + + assert events == ['terminate', 'join'] + assert bd._DREAM_WORKER_PROBLEM is None + + # Tolerates a non-fork mapper (e.g. MPMapper's plain function) and None. + BumpsDreamMinimizer._shutdown_fork_pool_mapper(lambda points: points) + BumpsDreamMinimizer._shutdown_fork_pool_mapper(None) + + +def test_build_fork_pool_mapper_returns_none_without_fork(monkeypatch): + from easydiffraction.analysis.minimizers import bumps_dream as bd + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + + minimizer = BumpsDreamMinimizer() + minimizer.parallel = 0 + monkeypatch.setattr( + bd.multiprocessing, + 'get_all_start_methods', + lambda: ['spawn', 'forkserver'], + ) + + assert minimizer._build_fork_pool_mapper('problem') is None From 6cb762d44cbbe6b4a10e329af4042ed65fbab433 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 16 Jun 2026 10:02:54 +0200 Subject: [PATCH 30/33] Report DREAM resume progress relative to new generations --- docs/dev/package-structure/full.md | 1 + .../analysis/minimizers/bumps_dream.py | 47 ++++++++++++++----- .../fitting/test_bumps_dream_support.py | 1 - .../analysis/minimizers/test_bumps_dream.py | 41 +++++++++++++++- 4 files changed, 76 insertions(+), 14 deletions(-) diff --git a/docs/dev/package-structure/full.md b/docs/dev/package-structure/full.md index ae33ba78c..80eaeaf0f 100644 --- a/docs/dev/package-structure/full.md +++ b/docs/dev/package-structure/full.md @@ -155,6 +155,7 @@ │ │ ├── 📄 bumps_de.py │ │ │ └── 🏷️ class BumpsDEMinimizer │ │ ├── 📄 bumps_dream.py +│ │ │ ├── 🏷️ class _DreamForkPoolMapper │ │ │ ├── 🏷️ class _DreamRunContext │ │ │ ├── 🏷️ class _DreamDriverResult │ │ │ ├── 🏷️ class _DreamProgressMonitor diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index 3e567dc36..dd5fd412d 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -220,6 +220,7 @@ def __init__( n_parameters: int, total_generations: int, burn_steps: int, + start_generation: int = 0, ) -> None: """Precompute per-phase progress targets for reporting.""" self._tracker = tracker @@ -227,6 +228,10 @@ def __init__( self._n_parameters = n_parameters self._total_generations = max(1, total_generations) self._burn_steps = max(0, burn_steps) + # On a resume run the chain already holds ``start_generation`` + # generations, so progress is reported relative to that baseline + # (1..extra_steps) instead of the absolute generation count. + self._start_generation = min(max(0, start_generation), self._total_generations - 1) burn_target_count, sampling_target_count = self._phase_progress_point_counts( total_generations=self._total_generations, burn_steps=self._burn_steps, @@ -237,13 +242,22 @@ def __init__( target_count=burn_target_count, ) self._sampling_targets = self._progress_targets( - start=self._burn_steps + 1, + start=max(self._burn_steps, self._start_generation) + 1, stop=self._total_generations, target_count=sampling_target_count, ) self._next_burn_target_index = 0 self._next_sampling_target_index = 0 + def _reported_iteration(self, generation: int) -> int: + """Return the generation relative to the resume baseline.""" + clamped_generation = min(generation, self._total_generations) + return max(1, clamped_generation - self._start_generation) + + def _reported_total_iterations(self) -> int: + """Return the total relative to the resume baseline.""" + return max(1, self._total_generations - self._start_generation) + @staticmethod def config_history(history: object) -> None: """Declare the history fields needed for progress updates.""" @@ -260,8 +274,8 @@ def __call__(self, history: object) -> None: log_posterior = self._population_mean_log_posterior(history) self._tracker.track_sampler_progress( SamplerProgressUpdate( - iteration=generation, - total_iterations=self._total_generations, + iteration=self._reported_iteration(generation), + total_iterations=self._reported_total_iterations(), phase=self._phase_name(generation), progress_percent=self._progress_percent(generation), log_posterior=log_posterior, @@ -281,8 +295,8 @@ def final(self, history: object, best: dict[str, object]) -> None: reduced_chi2 = self._reduced_chi_square_from_nllf(best_nllf) self._tracker.track_sampler_progress( SamplerProgressUpdate( - iteration=generation, - total_iterations=self._total_generations, + iteration=self._reported_iteration(generation), + total_iterations=self._reported_total_iterations(), phase=self._phase_name(generation), progress_percent=self._progress_percent(generation), log_posterior=self._population_mean_log_posterior(history), @@ -384,9 +398,13 @@ def _phase_name(self, generation: int) -> str: return 'sampling' def _progress_percent(self, generation: int) -> float: - """Return DREAM progress as a percentage.""" + """Return DREAM progress over new generations, in percent.""" clamped_generation = min(generation, self._total_generations) - return 100.0 * clamped_generation / self._total_generations + numerator = max(0, clamped_generation - self._start_generation) + denominator = self._total_generations - self._start_generation + if denominator <= 0: + return 100.0 + return 100.0 * numerator / denominator @staticmethod def _population_mean_log_posterior(history: object) -> float: @@ -824,7 +842,10 @@ def _run_solver( kwargs=kwargs, extra_steps=kwargs.get('extra_steps'), ) - total_iterations = int(resume_overrides['steps_override'] + 1) + # Report progress over the new generations only (1..extra). + total_iterations = int( + resume_overrides['steps_override'] - resume_overrides['start_generation'] + 1 + ) else: total_iterations = int(self.steps + self._resolved_burn(self.steps) + 1) self.tracker.start_sampler_pre_processing(total_iterations=total_iterations) @@ -925,6 +946,7 @@ def _prepare_dream_resume( 'burn_override': 0, 'samples_override': target_steps * pop_scale * n_parameters, 'pop_override': pop_scale, + 'start_generation': current_steps, } return overrides, copy.deepcopy(state) @@ -982,6 +1004,7 @@ def _prepare_run_context( burn_override: int | None = None, samples_override: int | None = None, pop_override: int | None = None, + start_generation: int = 0, ) -> _DreamRunContext: """ Prepare a driver and metadata for one DREAM solver run. @@ -1002,7 +1025,6 @@ def _prepare_run_context( fitclass = next(cls for cls in FITTERS if cls.id == self.method) steps = self.steps if steps_override is None else int(steps_override) burn = self._resolved_burn(self.steps) if burn_override is None else int(burn_override) - init = self.init sampler_settings = self._sampler_settings( random_seed=random_seed, steps=steps, @@ -1016,9 +1038,9 @@ def _prepare_run_context( fitness=fitness, steps=steps, burn=burn, - init=init, sampler_settings=sampler_settings, n_parameters=len(bumps_params), + start_generation=start_generation, ) starting_values = np.array([parameter.value for parameter in bumps_params], dtype=float) resolved_uncertainties = ( @@ -1043,9 +1065,9 @@ def _build_driver( fitness: object, steps: int, burn: int, - init: DreamPopulationInitializationEnum, sampler_settings: dict[str, object], n_parameters: int, + start_generation: int = 0, ) -> FitDriver: """Build and clip the BUMPS DREAM driver.""" total_generations = int(steps + burn + 1) @@ -1056,6 +1078,7 @@ def _build_driver( n_parameters=n_parameters, total_generations=total_generations, burn_steps=int(burn), + start_generation=int(start_generation), ) mapper = self._build_mapper(problem) try: @@ -1068,7 +1091,7 @@ def _build_driver( burn=burn, thin=self.thin, pop=int(sampler_settings['pop']), - init=init.value, + init=self.init.value, samples=sampler_settings['samples'], alpha=DEFAULT_ALPHA, outliers=DEFAULT_OUTLIER_TEST, diff --git a/tests/integration/fitting/test_bumps_dream_support.py b/tests/integration/fitting/test_bumps_dream_support.py index 3cb8bafac..4bc16496a 100644 --- a/tests/integration/fitting/test_bumps_dream_support.py +++ b/tests/integration/fitting/test_bumps_dream_support.py @@ -553,7 +553,6 @@ def test_build_driver_stops_mapper_when_driver_clip_fails(): fitness=SimpleNamespace(numpoints=lambda: 10), steps=10, burn=2, - init=minimizer.init, sampler_settings={'samples': 40, 'pop': 4}, n_parameters=1, ) diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py b/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py index 60fa332f7..f76c56c3d 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py @@ -90,6 +90,45 @@ def test_dream_progress_monitor_allocates_rows_by_phase_ratio(): assert len(monitor._sampling_targets) == 15 +def test_dream_progress_monitor_reports_relative_progress_on_resume(): + from easydiffraction.analysis.minimizers.bumps_dream import _DreamProgressMonitor + + # Resume from a 1000-generation chain, adding 100 more (burn=0). + monitor = _DreamProgressMonitor( + tracker=MagicMock(), + n_points=100, + n_parameters=5, + total_generations=1101, + burn_steps=0, + start_generation=1000, + ) + + # Progress is reported over the 100 new generations, not 1001/1101. + assert monitor._reported_iteration(1000) == 1 + assert monitor._reported_iteration(1050) == 50 + assert monitor._reported_total_iterations() == 101 + assert monitor._progress_percent(1050) == pytest.approx(100.0 * 50 / 101) + # Reporting targets fall within the new generation range. + assert min(monitor._sampling_targets) >= 1001 + assert max(monitor._sampling_targets) == 1101 + + +def test_dream_progress_monitor_reports_absolute_progress_when_fresh(): + from easydiffraction.analysis.minimizers.bumps_dream import _DreamProgressMonitor + + monitor = _DreamProgressMonitor( + tracker=MagicMock(), + n_points=100, + n_parameters=3, + total_generations=101, + burn_steps=0, + ) + + assert monitor._reported_iteration(40) == 40 + assert monitor._reported_total_iterations() == 101 + assert monitor._progress_percent(40) == pytest.approx(100.0 * 40 / 101) + + def test_init_accepts_enum_or_string_and_rejects_invalid(): from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer from easydiffraction.analysis.minimizers.enums import DreamPopulationInitializationEnum @@ -361,7 +400,6 @@ def test_build_driver_stops_mapper_when_driver_clip_fails(): fitness=SimpleNamespace(numpoints=lambda: 10), steps=10, burn=2, - init=minimizer.init, sampler_settings={'samples': 40, 'pop': 4}, n_parameters=1, ) @@ -589,6 +627,7 @@ def test_prepare_dream_resume_builds_ring_buffer_overrides(tmp_path): 'burn_override': 0, 'samples_override': 13 * 3 * 2, 'pop_override': 3, + 'start_generation': 8, } # bumps mutates state in place, so resume must pass a deep copy. assert fit_state is not state From 0b937ec5784c298919c21a7d3bd10129ab348026 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 16 Jun 2026 10:02:54 +0200 Subject: [PATCH 31/33] Note DREAM resume cost scales with population size --- .../docs/tutorials/bayesian-dream-resume-lbco-hrpt.ipynb | 9 ++++++++- docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.py | 7 +++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.ipynb b/docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.ipynb index 67d53fa22..162955e54 100644 --- a/docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.ipynb +++ b/docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.ipynb @@ -300,7 +300,14 @@ "Resume from the saved DREAM state and append 100 more generations to\n", "the existing chain. We use only 100 steps here to keep the tutorial\n", "fast, but in practice you would typically run more steps to ensure\n", - "convergence and better posterior resolution." + "convergence and better posterior resolution.\n", + "\n", + "Each DREAM generation evaluates the whole population in parallel, so\n", + "the cost of resuming scales with `population_size`: `extra_steps=100`\n", + "with the default population is on the order of a couple of thousand\n", + "model evaluations, not 100. The progress bar counts the new\n", + "generations (`1/100`), independent of how long the saved chain\n", + "already is." ] }, { diff --git a/docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.py b/docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.py index 8b3bc2184..e9a6ba1ce 100644 --- a/docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.py +++ b/docs/docs/tutorials/bayesian-dream-resume-lbco-hrpt.py @@ -129,6 +129,13 @@ # the existing chain. We use only 100 steps here to keep the tutorial # fast, but in practice you would typically run more steps to ensure # convergence and better posterior resolution. +# +# Each DREAM generation evaluates the whole population in parallel, so +# the cost of resuming scales with `population_size`: `extra_steps=100` +# with the default population is on the order of a couple of thousand +# model evaluations, not 100. The progress bar counts the new +# generations (`1/100`), independent of how long the saved chain +# already is. # %% project.analysis.minimizer.random_seed = 42 # fixed seed for reproducible output From 3ea60b59b7b8c637de3dc0753df7d71e40421c99 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 16 Jun 2026 10:27:25 +0200 Subject: [PATCH 32/33] Count DREAM resume progress as 1/extra_steps to match emcee --- .../analysis/minimizers/bumps_dream.py | 16 ++++++++-------- .../analysis/minimizers/test_bumps_dream.py | 8 +++++--- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index dd5fd412d..844ce667b 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -252,11 +252,16 @@ def __init__( def _reported_iteration(self, generation: int) -> int: """Return the generation relative to the resume baseline.""" clamped_generation = min(generation, self._total_generations) - return max(1, clamped_generation - self._start_generation) + relative = max(1, clamped_generation - self._start_generation) + return min(relative, self._reported_total_iterations()) def _reported_total_iterations(self) -> int: """Return the total relative to the resume baseline.""" - return max(1, self._total_generations - self._start_generation) + if self._start_generation > 0: + # The saved initial generation is already present, so only + # the new generations (extra_steps) are reported (1..extra). + return max(1, self._total_generations - self._start_generation - 1) + return self._total_generations @staticmethod def config_history(history: object) -> None: @@ -399,12 +404,7 @@ def _phase_name(self, generation: int) -> str: def _progress_percent(self, generation: int) -> float: """Return DREAM progress over new generations, in percent.""" - clamped_generation = min(generation, self._total_generations) - numerator = max(0, clamped_generation - self._start_generation) - denominator = self._total_generations - self._start_generation - if denominator <= 0: - return 100.0 - return 100.0 * numerator / denominator + return 100.0 * self._reported_iteration(generation) / self._reported_total_iterations() @staticmethod def _population_mean_log_posterior(history: object) -> float: diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py b/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py index f76c56c3d..870183b8c 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py @@ -103,11 +103,13 @@ def test_dream_progress_monitor_reports_relative_progress_on_resume(): start_generation=1000, ) - # Progress is reported over the 100 new generations, not 1001/1101. + # Progress is reported over the 100 new generations (1/100..100/100), + # not the absolute 1001/1101. assert monitor._reported_iteration(1000) == 1 assert monitor._reported_iteration(1050) == 50 - assert monitor._reported_total_iterations() == 101 - assert monitor._progress_percent(1050) == pytest.approx(100.0 * 50 / 101) + assert monitor._reported_iteration(1100) == 100 + assert monitor._reported_total_iterations() == 100 + assert monitor._progress_percent(1050) == pytest.approx(50.0) # Reporting targets fall within the new generation range. assert min(monitor._sampling_targets) >= 1001 assert max(monitor._sampling_targets) == 1101 From 4bde8f320110e58cca26355516e8b028466bc855 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 16 Jun 2026 11:05:29 +0200 Subject: [PATCH 33/33] Make sampler progress display consistent across engines --- .../analysis/fit_helpers/tracking.py | 7 +++- .../analysis/minimizers/bumps_dream.py | 12 +++--- .../analysis/minimizers/emcee.py | 38 +++---------------- .../fit_helpers/test_tracking_coverage.py | 3 +- .../analysis/minimizers/test_bumps_dream.py | 9 +++-- 5 files changed, 25 insertions(+), 44 deletions(-) diff --git a/src/easydiffraction/analysis/fit_helpers/tracking.py b/src/easydiffraction/analysis/fit_helpers/tracking.py index bcbc2f627..f57d43914 100644 --- a/src/easydiffraction/analysis/fit_helpers/tracking.py +++ b/src/easydiffraction/analysis/fit_helpers/tracking.py @@ -35,7 +35,7 @@ SAMPLER_PHASE_PRE_PROCESSING = 'pre-processing' DEFAULT_HEADERS = ['iteration', 'time (s)', 'χ²', 'change / status'] DEFAULT_ALIGNMENTS = ['center', 'center', 'center', 'center'] -SAMPLER_HEADERS = ['iteration', 'progress', 'time (s)', 'log posterior', 'phase'] +SAMPLER_HEADERS = ['step', 'progress', 'time (s)', 'log posterior', 'phase'] SAMPLER_ALIGNMENTS = ['center', 'center', 'center', 'center', 'center'] _TerminalLiveHandle = _SharedTerminalLiveHandle @@ -423,8 +423,11 @@ def _initial_sampler_progress_row( self._last_progress_time = update.elapsed_time if self._sampler_pre_processing_pending: self._sampler_pre_processing_pending = False + # Pre-processing is setup, not a sampling step, so the step + # cell is left blank; real step counts start at the first + # sampling row. return self._sampler_status_row( - iteration_label=self._sampler_iteration_label(clamped_iteration), + iteration_label='', phase=SAMPLER_PHASE_PRE_PROCESSING, elapsed_time=update.elapsed_time, log_posterior=update.log_posterior, diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index 844ce667b..028948838 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -256,12 +256,12 @@ def _reported_iteration(self, generation: int) -> int: return min(relative, self._reported_total_iterations()) def _reported_total_iterations(self) -> int: - """Return the total relative to the resume baseline.""" - if self._start_generation > 0: - # The saved initial generation is already present, so only - # the new generations (extra_steps) are reported (1..extra). - return max(1, self._total_generations - self._start_generation - 1) - return self._total_generations + """Return the reported step total, excluding setup.""" + # total_generations counts the bumps initial generation (the + # blank pre-processing row), which is setup rather than a step; + # exclude it (and, on resume, the already-saved generations) so + # the bar reads steps+burn (fresh) or extra_steps (resume). + return max(1, self._total_generations - self._start_generation - 1) @staticmethod def config_history(history: object) -> None: diff --git a/src/easydiffraction/analysis/minimizers/emcee.py b/src/easydiffraction/analysis/minimizers/emcee.py index c2b91e0c3..728f1149f 100644 --- a/src/easydiffraction/analysis/minimizers/emcee.py +++ b/src/easydiffraction/analysis/minimizers/emcee.py @@ -721,9 +721,14 @@ def _run_sampler( # noqa: PLR0913 ) self._sampler = sampler + # The progress bar counts requested steps: extra_steps on + # resume, or nsteps + nburn on a fresh run. total_iterations + # carries an extra initial iteration (the blank pre-processing + # row) that is run but not shown as a step. + reporter_total_steps = int(extra_steps) if resume else (self.nsteps + self.nburn) reporter = _EmceeProgressReporter( tracker=self.tracker, - total_steps=total_iterations, + total_steps=reporter_total_steps, burn_steps=0 if resume else self.nburn, ) if resume: @@ -1096,11 +1101,6 @@ def _build_success_result( # noqa: PLR0914 posterior_parameter_summaries ) best_log_posterior = float(finite_log_posterior[best_draw_index, best_walker_index]) - self._track_sampler_completion( - total_steps=total_steps, - best_log_posterior=best_log_posterior, - reduced_chi_square=None, - ) return OptimizeResult( x=best_sample_values, @@ -1178,32 +1178,6 @@ def _convergence_diagnostics( ) return convergence_diagnostics - def _track_sampler_completion( - self, - *, - total_steps: int, - best_log_posterior: float, - reduced_chi_square: float | None, - ) -> None: - """Record one final sampler progress row.""" - reduced_chi2 = reduced_chi_square - if reduced_chi2 is None: - reduced_chi2 = self.tracker.best_chi2 - if reduced_chi2 is None: - reduced_chi2 = np.nan - self.tracker.track_sampler_progress( - SamplerProgressUpdate( - iteration=max(1, total_steps), - total_iterations=max(1, total_steps), - phase='sampling', - progress_percent=100.0, - log_posterior=best_log_posterior, - reduced_chi2=float(reduced_chi2), - elapsed_time=self.tracker._current_elapsed_time(), - force_report=True, - ) - ) - @staticmethod def _sync_result_to_parameters( parameters: list[object], diff --git a/tests/unit/easydiffraction/analysis/fit_helpers/test_tracking_coverage.py b/tests/unit/easydiffraction/analysis/fit_helpers/test_tracking_coverage.py index 5f318c497..7fa69fad2 100644 --- a/tests/unit/easydiffraction/analysis/fit_helpers/test_tracking_coverage.py +++ b/tests/unit/easydiffraction/analysis/fit_helpers/test_tracking_coverage.py @@ -152,7 +152,8 @@ def test_pre_processing_pending_emits_status_row_first(silent_tracker): ) first_row = silent_tracker._df_rows[0] - # Status row: blank progress column, phase = pre-processing. + # Status row: blank step and progress columns, phase = pre-processing. + assert first_row[0] == '' assert first_row[1] == '' assert first_row[4] == tracking_mod.SAMPLER_PHASE_PRE_PROCESSING assert silent_tracker._sampler_pre_processing_pending is False diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py b/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py index 870183b8c..c75c1d694 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py @@ -115,9 +115,11 @@ def test_dream_progress_monitor_reports_relative_progress_on_resume(): assert max(monitor._sampling_targets) == 1101 -def test_dream_progress_monitor_reports_absolute_progress_when_fresh(): +def test_dream_progress_monitor_excludes_initial_generation_when_fresh(): from easydiffraction.analysis.minimizers.bumps_dream import _DreamProgressMonitor + # total_generations = steps + burn + 1; the +1 initial generation is + # setup, so the reported total is steps + burn (here 100). monitor = _DreamProgressMonitor( tracker=MagicMock(), n_points=100, @@ -127,8 +129,9 @@ def test_dream_progress_monitor_reports_absolute_progress_when_fresh(): ) assert monitor._reported_iteration(40) == 40 - assert monitor._reported_total_iterations() == 101 - assert monitor._progress_percent(40) == pytest.approx(100.0 * 40 / 101) + assert monitor._reported_iteration(101) == 100 + assert monitor._reported_total_iterations() == 100 + assert monitor._progress_percent(40) == pytest.approx(40.0) def test_init_accepts_enum_or_string_and_rejects_invalid():