diff --git a/docs/docs/tutorials/fitting-bayesian.ipynb b/docs/docs/tutorials/fitting-bayesian.ipynb new file mode 100644 index 00000000..77e8b884 --- /dev/null +++ b/docs/docs/tutorials/fitting-bayesian.ipynb @@ -0,0 +1,518 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget" + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "# Bayesian analysis\n", + "\n", + "The previous tutorials demonstrate how to obtain *maximum-likelihood* estimates of the parameters of a model by minimising $\\chi^2$.\n", + "An alternative, and often more informative, view of the same problem is provided by a **Bayesian** analysis, in which the goal is to characterise the full *posterior distribution* over the parameters given the data.\n", + "\n", + "Recall Bayes' theorem,\n", + "\n", + "$$\n", + "p(\\theta \\mid d) \\propto p(d \\mid \\theta)\\, p(\\theta),\n", + "$$ (bayes)\n", + "\n", + "where $\\theta$ are the model parameters, $d$ is the observed data, $p(d \\mid \\theta)$ is the likelihood, and $p(\\theta)$ is the prior. In `easyscience`, the `min`/`max` bounds of a `Parameter` are interpreted as a **uniform prior**, and a Gaussian likelihood is constructed from the data and supplied weights.\n", + "\n", + "`easyscience` exposes a Bayesian Markov-chain Monte Carlo (MCMC) sampler through the `Fitter.mcmc_sample` method. Under the hood this uses BUMPS' DREAM sampler, so the underlying minimizer must be switched to BUMPS.\n", + "\n", + "```{note}\n", + "This tutorial focuses on Bayesian analysis with a simple QENS model for illustration. For dedicated QENS fitting with more sophisticated models, consider using [`EasyDynamics`](https://github.com/easyscience/easydynamics).\n", + "```\n", + "\n", + "### When should you use a Bayesian analysis?\n", + "\n", + "A maximum-likelihood estimate (MLE) gives you a single best-fit value with a symmetric uncertainty, which is fast and often sufficient. A Bayesian analysis becomes valuable when:\n", + "\n", + "- **Your uncertainties are asymmetric** — MLE error bars assume the parameter distribution is Gaussian, which is not always true. The posterior samples capture skew naturally.\n", + "- **You have prior knowledge** — if you know from physics that a parameter *must* lie in a certain range, encoding that as a prior ($p(\\theta)$) is more principled than simply clamping bounds after the fit.\n", + "- **You care about parameter correlations** — the joint posterior (shown in the corner plot below) reveals trade-offs between parameters that a single covariance matrix can miss.\n", + "- **You want to propagate uncertainty to predictions** — with the posterior in hand, you can compute credible bands on any function of the parameters (see the posterior-predictive band section) without linearised error propagation.\n", + "\n", + "The trade-off is computational cost: MCMC requires thousands of model evaluations, whereas an MLE fit may converge in dozens. For the simple 4-parameter model used here the difference is negligible, but for expensive models it is worth starting with MLE and only switching to MCMC when you need the richer output.\n", + "\n", + "In this tutorial we re-use the QENS dataset and the Lorentzian-with-resolution model from the [Fitting QENS](fitting-qens.ipynb) tutorial, but instead of returning a single best-fit value with a symmetric error bar we will draw thousands of samples from the posterior.\n" + ] + }, + { + "cell_type": "markdown", + "id": "2", + "metadata": {}, + "source": [ + "## Load the data\n", + "\n", + "We load the same simulated QENS dataset that was used in the previous tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "def fetch_data(name: str, known_hash: str) -> str:\n", + " \"\"\"\n", + " Fetch pre-prepared data from a remote source and return the path to the file.\n", + " \"\"\"\n", + " import pooch\n", + "\n", + " return pooch.retrieve(\n", + " url=f'https://public.esss.dk/groups/scipp/dmsc-summer-school/2025/{name}',\n", + " known_hash=known_hash,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "filename = fetch_data(\n", + " '4-reduction/energy_transfer_QENS_unknown_quasi_elastic_many_neutrons.dat',\n", + " known_hash='sha256:e49fa9a1d2ef5eeb714524903e8ffa9de6616e5a299a799cbb0b2f3ee8fd459a',\n", + ")\n", + "\n", + "\n", + "def load(filename: str):\n", + " \"\"\"Load three-column text data (x, y, error) and filter NaN values.\n", + "\n", + " The file is expected to be a space-separated three-column text file\n", + " (commonly ``.dat`` or ``.txt``) with columns: x, y, error (1σ).\n", + " \"\"\"\n", + " x, y, error = np.loadtxt(filename, unpack=True)\n", + " selection = np.isfinite(y)\n", + " return x[selection], y[selection], error[selection]\n", + "\n", + "\n", + "omega, intensity_obs, intensity_error = load(filename)\n", + "\n", + "# Restrict to the region of interest, as in the QENS tutorial.\n", + "selection = (omega > -0.06) & (omega < 0.06)\n", + "omega, intensity_obs, intensity_error = (\n", + " omega[selection],\n", + " intensity_obs[selection],\n", + " intensity_error[selection],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots()\n", + "ax.errorbar(omega, intensity_obs, intensity_error, fmt='.')\n", + "ax.set(xlabel='$\\\\omega$/meV', ylabel='$I(\\\\omega)$')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "## Defining parameters with priors\n", + "\n", + "Create four `Parameter` objects, for the area $A$, $\\gamma$, $\\omega_0$ and $\\sigma$. The `min` and `max` arguments define a **uniform prior** on each parameter — the sampler will only consider values inside this range and will treat every value inside the range as equally plausible *a priori*.\n", + "\n", + "| Parameter | Initial Value | Min | Max |\n", + "| --- | --- | --- | --- |\n", + "| $A$ (area) | 10 | 1 | 100 |\n", + "| $\\gamma$ | 8.0 × 10-3 | 1.0 × 10-4 | 1.0 × 10-2 |\n", + "| $\\omega_0$ | 1.0 × 10-3 | 0 | 2.0 × 10-3 |\n", + "| $\\sigma$ | 1.0 × 10-3 | 1.0 × 10-5 | 1.0 × 10-1 |\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "from easyscience import Parameter\n", + "\n", + "area = Parameter(name='area', value=10, fixed=False, min=1, max=100)\n", + "gamma = Parameter(name='gamma', value=8e-3, fixed=False, min=1e-4, max=1e-2)\n", + "omega_0 = Parameter(name='omega_0', value=1e-3, fixed=False, min=0, max=2e-3)\n", + "sigma = Parameter(name='sigma', value=1e-3, fixed=False, min=1e-5, max=1e-1)" + ] + }, + { + "cell_type": "markdown", + "id": "77ce3f63", + "metadata": {}, + "source": [ + "## The model\n", + "\n", + "We re-use the convolution of a Lorentzian with a Gaussian resolution function from the QENS tutorial:\n", + "\n", + "$$\n", + "I(\\omega) = \\frac{A\\gamma}{\\pi\\big[(\\omega - \\omega_0)^2 + \\gamma^2\\big]} \\;\\ast\\; \\mathcal{N}(0, \\sigma),\n", + "$$ (model)\n", + "\n", + "where $A$ is a scale factor (area), $\\gamma$ is the Lorentzian half-width at half-maximum, $\\omega_0$ is the centre offset and $\\sigma$ is the width of the Gaussian resolution kernel.\n", + "\n", + "You might want to look at other QENS models, as implemented in the [EasyDynamics](https://github.com/easyscience/dynamics-lib) library.\n", + "\n", + "```{note}\n", + "The convolution below uses `np.convolve(..., 'same')` for simplicity. In `'same'` mode, `numpy` pads the signal edges with zeros, which can introduce small artefacts at the boundaries. Because our Gaussian kernel ($\\sigma \\approx 10^{-3}$) is much narrower than the data range ($\\pm 0.06$ meV), these edge effects are negligible here. For production work with broader kernels, consider `scipy.signal.convolve` with an explicit boundary mode such as `'reflect'` or `'nearest'`.\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36a0c4f4", + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.stats import norm\n", + "\n", + "\n", + "def lorentzian(x: np.ndarray) -> np.ndarray:\n", + " return area.value / np.pi * gamma.value / ((x - omega_0.value) ** 2 + gamma.value**2)\n", + "\n", + "\n", + "def intensity_model(x: np.ndarray) -> np.ndarray:\n", + " gauss = norm(0, sigma.value).pdf(x)\n", + " gauss /= gauss.sum()\n", + " return np.convolve(lorentzian(x), gauss, 'same')" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, + "source": [ + "## Maximum-likelihood fit (for reference)\n", + "\n", + "Before running MCMC, perform a quick maximum-likelihood fit. The result will give us a good starting point and lets us compare the central tendency of the posterior with the classical best-fit values.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "from easyscience import Fitter\n", + "from easyscience import ObjBase\n", + "\n", + "parameter_container = ObjBase(name='params', A=area, gamma=gamma, omega_0=omega_0, sigma=sigma)\n", + "\n", + "mle_fitter = Fitter(parameter_container, intensity_model)\n", + "mle_result = mle_fitter.fit(x=omega, y=intensity_obs, weights=1 / intensity_error)\n", + "\n", + "print(f'A = {area.value:.4g}')\n", + "print(f'gamma = {gamma.value:.4g}')\n", + "print(f'omega_0 = {omega_0.value:.4g}')\n", + "print(f'sigma = {sigma.value:.4g}')" + ] + }, + { + "cell_type": "markdown", + "id": "13", + "metadata": {}, + "source": [ + "## Drawing posterior samples with DREAM\n", + "\n", + "We now draw samples from the posterior distribution $p(\\theta \\mid d)$ using the BUMPS DREAM (DiffeRential Evolution Adaptive Metropolis) algorithm. DREAM is an ensemble MCMC method that runs multiple chains in parallel and automatically tunes the proposal distribution.\n", + "\n", + "DREAM only works with the BUMPS minimizer. We reuse the ``mle_fitter`` created above, switch it to BUMPS, and call `Fitter.mcmc_sample`, which returns a dictionary with the following keys:\n", + "\n", + "- `draws`: a `(n_samples, n_parameters)` array of posterior samples: each **row** is one complete draw from the joint posterior (one value for every parameter simultaneously), and each **column** holds all sampled values for a single parameter;\n", + "- `param_names`: the unique names of the parameters, in the same column order as `draws`;\n", + "- `internal_bumps_object`: the underlying BUMPS `MCMCDraw` object, useful for advanced diagnostics;\n", + "- `logp`: the log-posterior of each retained sample.\n", + "\n", + "The key sampling parameters are:\n", + "\n", + "- `samples` (4000): the total number of posterior draws to generate across all chains;\n", + "- `burn` (500): the number of initial *burn-in* iterations to discard — the sampler needs time to find the typical set of the posterior, and early samples are not representative;\n", + "- `thin` (2): the *thinning* interval — only every second sample is kept, which reduces autocorrelation between consecutive draws;\n", + "\n", + "First, we switch to the BUMPS minimizer:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "from easyscience import AvailableMinimizers\n", + "\n", + "mle_fitter.switch_minimizer(AvailableMinimizers.Bumps)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a219fcd", + "metadata": {}, + "outputs": [], + "source": [ + "result = mle_fitter.mcmc_sample(\n", + " x=omega,\n", + " y=intensity_obs,\n", + " weights=1 / intensity_error,\n", + " samples=10000,\n", + " burn=500,\n", + " thin=2,\n", + ")\n", + "\n", + "print(f'Drew {result[\"draws\"].shape[0]} samples for {result[\"draws\"].shape[1]} parameters.')\n", + "print('parameters:', result['param_names'])" + ] + }, + { + "cell_type": "markdown", + "id": "8766b170", + "metadata": {}, + "source": [ + "## Convergence diagnostics\n", + "\n", + "Before we trust the posterior samples, we should check that the MCMC chains have **converged** — that is, the sampler has found the typical set of the posterior and is no longer drifting. Two simple visual checks are:\n", + "\n", + "1. **Trace plot** — plot the sampled parameter values against the sample index. A well-converged chain looks like a \"hairy caterpillar\": it fluctuates around a stable mean with no long-term trends.\n", + "2. **Log-posterior plot** — the log-posterior $\\log p(\\theta \\mid d)$ should also stabilise after burn-in. If it is still climbing at the end of the run, the sampler has not yet converged.\n", + "\n", + "The `logp` array returned by `sample` contains the log-posterior for every *retained* sample (i.e. *after* burn-in and thinning). This means a flat `logp` trace is exactly what we want to see.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c49ab6f", + "metadata": {}, + "outputs": [], + "source": [ + "draws = result['draws']\n", + "logp = result['logp']\n", + "if callable(logp):\n", + " _, logp = logp()\n", + " logp = logp.flatten()\n", + "name_to_col = {name: idx for idx, name in enumerate(result['param_names'])}\n", + "\n", + "\n", + "def column_for(parameter):\n", + " return draws[:, name_to_col[parameter.unique_name]]\n", + "\n", + "\n", + "fig, axes = plt.subplots(5, 1, figsize=(10, 12), sharex=True)\n", + "\n", + "# Trace plots for each parameter\n", + "for ax, (label, par) in zip(\n", + " axes[:4],\n", + " (('area', area), ('gamma', gamma), ('omega_0', omega_0), ('sigma', sigma)),\n", + "):\n", + " ax.plot(column_for(par), lw=0.5)\n", + " ax.set_ylabel(label)\n", + " ax.set_xlim(0, len(draws) - 1)\n", + "\n", + "# Log-posterior trace\n", + "axes[4].plot(logp, lw=0.5, color='C4')\n", + "axes[4].set_ylabel('log-posterior')\n", + "axes[4].set_xlabel('sample index')\n", + "\n", + "fig.suptitle('MCMC trace plots — check for \"hairy caterpillar\" behaviour')\n", + "fig.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "## Posterior summaries\n", + "\n", + "Summarise each marginal posterior by its **median** and the 16th/84th percentiles, which together give an asymmetric 68% credible interval. Compare these with the MLE values you obtained above.\n", + "\n", + "\n", + "**Why might the Bayesian median differ from the MLE?**\n", + "The MLE finds the single point that maximises the likelihood, while the Bayesian median is the central value of the *posterior* — which also accounts for the prior $p(\\theta)$. If a parameter's posterior is skewed (asymmetric), the median and the mode (which approximates the MLE) will not coincide. The table below shows both so you can spot any such differences.\n", + "\n", + "\n", + "Note that the columns of `result['draws']` are ordered by `result['param_names']` (which use the parameters' `unique_name`), so it is worth building a small helper to look up a column by friendly name.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce3e38a8", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "summary_rows = []\n", + "for label, par in (\n", + " ('area', area),\n", + " ('gamma', gamma),\n", + " ('omega_0', omega_0),\n", + " ('sigma', sigma),\n", + "):\n", + " col = column_for(par)\n", + " lo, med, hi = np.percentile(col, [16, 50, 84])\n", + " summary_rows.append((label, med, med - lo, hi - med, par.value))\n", + "\n", + "print(f'{\"param\":<8s} {\"median\":>12s} {\"16th\":>12s} {\"84th\":>12s} {\"MLE\":>12s}')\n", + "for label, med, low, high, mle in summary_rows:\n", + " print(f'{label:<8s} {med:12.4g} {low:12.4g} {high:12.4g} {mle:12.4g}')" + ] + }, + { + "cell_type": "markdown", + "id": "17", + "metadata": {}, + "source": [ + "## Visualise the joint posterior\n", + "\n", + "Marginal summaries hide correlations between parameters. A *corner plot* (a triangular grid of pairwise scatter plots and 1-D histograms) is the standard way to display them. Below we build one with plain `matplotlib`. To produce a publication-quality version, use plotting packages such as [`corner`](https://corner.readthedocs.io/) which can generate corner plots with a single function call.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "labels = ['area', 'gamma', 'omega_0', 'sigma']\n", + "cols = np.column_stack([column_for(p) for p in (area, gamma, omega_0, sigma)])\n", + "n = len(labels)\n", + "\n", + "fig, axes = plt.subplots(n, n, figsize=(8, 8))\n", + "for i in range(n):\n", + " for j in range(n):\n", + " ax = axes[i, j]\n", + " if j > i:\n", + " ax.set_visible(False)\n", + " continue\n", + " if i == j:\n", + " ax.hist(cols[:, i], bins=40, color='C0', histtype='stepfilled', alpha=0.7)\n", + " ax.set_yticks([])\n", + " else:\n", + " ax.hexbin(cols[:, j], cols[:, i], gridsize=30, cmap='Blues', mincnt=1)\n", + " if i == n - 1:\n", + " ax.set_xlabel(labels[j])\n", + " else:\n", + " ax.set_xticklabels([])\n", + " if j == 0:\n", + " ax.set_ylabel(labels[i])\n", + " else:\n", + " ax.set_yticklabels([])\n", + "fig.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "19", + "metadata": {}, + "source": [ + "## Posterior-predictive band\n", + "\n", + "With the full posterior in hand we can propagate uncertainty through the model without assuming Gaussianity. Unlike error propagation from an MLE fit — which assumes parameters are normally distributed and independent — the posterior draws naturally capture any skew, heavy tails, and correlations between parameters. Below we pick a few hundred random draws, evaluate the model at each, and plot the resulting 95% credible band alongside the data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": {}, + "outputs": [], + "source": [ + "rng = np.random.default_rng(seed=0)\n", + "n_draws = 300\n", + "indices = rng.choice(draws.shape[0], size=n_draws, replace=False)\n", + "\n", + "predictions = np.empty((n_draws, omega.size))\n", + "saved = {p.unique_name: p.value for p in (area, gamma, omega_0, sigma)}\n", + "try:\n", + " for k, idx in enumerate(indices):\n", + " area.value = draws[idx, name_to_col[area.unique_name]]\n", + " gamma.value = draws[idx, name_to_col[gamma.unique_name]]\n", + " omega_0.value = draws[idx, name_to_col[omega_0.unique_name]]\n", + " sigma.value = draws[idx, name_to_col[sigma.unique_name]]\n", + " predictions[k] = intensity_model(omega)\n", + "finally:\n", + " for p in (area, gamma, omega_0, sigma):\n", + " p.value = saved[p.unique_name]\n", + "\n", + "lo = np.percentile(predictions, 2.5, axis=0)\n", + "hi = np.percentile(predictions, 97.5, axis=0)\n", + "mid = np.percentile(predictions, 50, axis=0)\n", + "\n", + "fig, ax = plt.subplots()\n", + "ax.errorbar(omega, intensity_obs, intensity_error, fmt='.', label='data')\n", + "ax.fill_between(omega, lo, hi, color='C1', alpha=0.3, label='95% posterior band')\n", + "ax.plot(omega, mid, '-', color='C1', label='posterior median')\n", + "ax.set(xlabel='$\\\\omega$/meV', ylabel='$I(\\\\omega)$')\n", + "ax.legend()\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "p312", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/tutorials/index.md b/docs/docs/tutorials/index.md index 4a73fb8c..588cfe81 100644 --- a/docs/docs/tutorials/index.md +++ b/docs/docs/tutorials/index.md @@ -23,6 +23,9 @@ The tutorials are organized into the following categories: - [Fitting SANS](fitting-sans.ipynb) – A tutorial demonstrating how to fit a small-angle neutron scattering (SANS) data using EasyScience framework. +- [Bayesian analysis](fitting-bayesian.ipynb) – A tutorial showing how + to run Bayesian MCMC sampling on a model with EasyScience to obtain + full posterior distributions for the parameters. - [Progress Callback](progress-callback.ipynb) – A tutorial showing how to monitor fitting progress across minimizer backends and update a notebook UI while a fit is running. diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 15f15dea..8620396a 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -176,10 +176,11 @@ nav: - User Guide: user-guide/index.md - Tutorials: - Tutorials: tutorials/index.md + - Bayesian analysis: tutorials/fitting-bayesian.ipynb + - Progress Callback: tutorials/progress-callback.ipynb - Workshops & Schools: - Fitting QENS: tutorials/fitting-qens.ipynb - Fitting SANS: tutorials/fitting-sans.ipynb - - Progress Callback: tutorials/progress-callback.ipynb - API Reference: - API Reference: api-reference/index.md - base_classes: api-reference/base_classes.md diff --git a/src/easyscience/fitting/fitter.py b/src/easyscience/fitting/fitter.py index 38ef7351..ef6db4ab 100644 --- a/src/easyscience/fitting/fitter.py +++ b/src/easyscience/fitting/fitter.py @@ -412,3 +412,105 @@ def _post_compute_reshaping( fit_result.y_calc = np.reshape(fit_result.y_calc, y.shape) fit_result.y_err = np.reshape(fit_result.y_err, y.shape) return fit_result + + def mcmc_sample( + self, + x: np.ndarray, + y: np.ndarray, + weights: np.ndarray, + samples: int = 10000, + burn: int = 2000, + thin: int = 10, + population: Optional[int] = None, + vectorized: bool = False, + sampler_kwargs: Optional[dict] = None, + progress_callback: Optional[Callable[[dict], Optional[bool]]] = None, + abort_test: Optional[Callable[[], bool]] = None, + ) -> dict: + """Run Bayesian MCMC sampling using the BUMPS DREAM sampler. + + Works with both a plain ``Fitter`` (single dataset) and a + ``MultiFitter`` (multiple datasets) via polymorphic dispatch: + ``_precompute_reshaping`` and ``_fit_function_wrapper`` are resolved + on the concrete subclass at call time, so multi-dataset flattening + is handled automatically when called on a ``MultiFitter`` instance. + + Parameters + ---------- + x : np.ndarray + Independent variable array (or list of arrays for ``MultiFitter``). + y : np.ndarray + Dependent variable array (or list of arrays for ``MultiFitter``). + weights : np.ndarray + Weight array (or list of arrays for ``MultiFitter``). + samples : int, default=10000 + Number of retained DREAM samples requested from BUMPS. + burn : int, default=2000 + Burn-in steps to discard before collecting samples. + thin : int, default=10 + Thinning interval — only every ``thin``-th sample is kept, + which reduces autocorrelation between consecutive draws. + population : Optional[int], default=None + BUMPS DREAM population count (number of parallel chains). + vectorized : bool, default=False + When ``True``, each x array may be multi-dimensional (e.g. an + ``(N, M, 2)`` grid for a 2D model) and is left as-is. When + ``False`` (default), each x array is expected to be 1-D. + sampler_kwargs : Optional[dict], default=None + Additional keyword arguments forwarded to the BUMPS DREAM sampler. + progress_callback : Optional[Callable[[dict], Optional[bool]]], default=None + Optional callback invoked at each DREAM generation. The payload + dict includes ``iteration`` and ``sampling: True``. + abort_test : Optional[Callable[[], bool]], default=None + Optional callable that returns ``True`` to abort sampling early. + + Returns + ------- + dict + Dictionary with keys ``'draws'``, ``'param_names'``, + ``'internal_bumps_object'``, and ``'logp'``. + + Raises + ------ + ValueError + If ``samples``, ``burn``, or ``thin`` are invalid. + RuntimeError + If the active minimizer is not a BUMPS instance. + """ + if not isinstance(samples, int) or samples <= 0: + raise ValueError('samples must be a positive integer.') + if not isinstance(burn, int) or burn < 0: + raise ValueError('burn must be a non-negative integer.') + if not isinstance(thin, int) or thin < 1: + raise ValueError('thin must be a positive integer.') + + x_fit, x_new, y_new, w_new, dims = self._precompute_reshaping(x, y, weights, vectorized) + self._dependent_dims = dims + + original_fit_func = self._fit_function + self.fit_function = self._fit_function_wrapper(x_new, flatten=True) + + try: + minimizer = self.minimizer + if not (hasattr(minimizer, 'package') and minimizer.package == 'bumps'): + raise RuntimeError( + 'Bayesian sampling requires a BUMPS minimizer. ' + 'Use ``fitter.switch_minimizer(AvailableMinimizers.Bumps)`` first.' + ) + + result = minimizer.mcmc_sample( + x=x_fit, + y=y_new, + weights=w_new, + samples=samples, + burn=burn, + thin=thin, + population=population, + sampler_kwargs=sampler_kwargs, + progress_callback=progress_callback, + abort_test=abort_test, + ) + finally: + self.fit_function = original_fit_func + + return result diff --git a/src/easyscience/fitting/minimizers/minimizer_bumps.py b/src/easyscience/fitting/minimizers/minimizer_bumps.py index 496cbfde..bd0a9b9a 100644 --- a/src/easyscience/fitting/minimizers/minimizer_bumps.py +++ b/src/easyscience/fitting/minimizers/minimizer_bumps.py @@ -1,11 +1,12 @@ # SPDX-FileCopyrightText: 2026 EasyScience contributors # SPDX-License-Identifier: BSD-3-Clause +from __future__ import annotations + import copy import warnings from typing import Any from typing import Callable -from typing import List import numpy as np from bumps.fitters import FIT_AVAILABLE_IDS @@ -64,11 +65,11 @@ def __init__( self._eval_counter: EvalCounter | None = None @staticmethod - def all_methods() -> List[str]: + def all_methods() -> list[str]: return FIT_AVAILABLE_IDS_FILTERED @staticmethod - def supported_methods() -> List[str]: + def supported_methods() -> list[str]: # only a small subset methods = ['amoeba', 'newton', 'lm'] return methods @@ -79,11 +80,12 @@ def fit( y: np.ndarray, weights: np.ndarray, model: Callable | None = None, - parameters: List[Parameter] | None = None, + parameters: list[Parameter] | None = None, method: str | None = None, tolerance: float | None = None, max_evaluations: int | None = None, progress_callback: Callable[[dict], bool | None] | None = None, + abort_test: Callable[[], bool] | None = None, minimizer_kwargs: dict | None = None, engine_kwargs: dict | None = None, **kwargs: Any, @@ -101,7 +103,7 @@ def fit( Weights for supplied measured points. model : Callable | None, default=None Optional Model which is being fitted to. By default, None. - parameters : List[Parameter] | None, default=None + parameters : list[Parameter] | None, default=None Optional parameters for the fit. By default, None. method : str | None, default=None Method for minimization. By default, None. @@ -116,6 +118,10 @@ def fit( Optional callback for progress updates. The payload field ``iteration`` carries the BUMPS optimizer step index. By default, None. + abort_test : Callable[[], bool] | None, default=None + Optional callback that returns ``True`` to signal that the fit + should be aborted. Called periodically during the + BUMPS optimizer iteration loop. minimizer_kwargs : dict | None, default=None Additional keyword arguments passed to the BUMPS minimizer. By default, None. @@ -203,6 +209,7 @@ def fit( fitclass=fitclass, problem=problem, monitors=monitors, + abort_test=abort_test if abort_test is not None else (lambda: False), **minimizer_kwargs, **kwargs, ) @@ -284,20 +291,20 @@ def _current_parameter_snapshot(self, problem, point: np.ndarray) -> dict: snapshot[dict_name] = float(value) return snapshot - def convert_to_pars_obj(self, par_list: List[Parameter] | None = None) -> List[BumpsParameter]: + def convert_to_pars_obj(self, par_list: list[Parameter] | None = None) -> list[BumpsParameter]: """ Create a container with the ``Parameters`` converted from the base object. Parameters ---------- - par_list : List[Parameter] | None, default=None + par_list : list[Parameter] | None, default=None If only a single/selection of parameter is required. Specify as a list. By default, None. Returns ------- - List[BumpsParameter] + list[BumpsParameter] Bumps Parameters list. """ if par_list is None: @@ -333,7 +340,7 @@ def convert_to_par_object(obj: Parameter) -> BumpsParameter: fixed=obj.fixed, ) - def _make_model(self, parameters: List[BumpsParameter] | None = None) -> Callable: + def _make_model(self, parameters: list[BumpsParameter] | None = None) -> Callable: """ Generate a bumps model from the supplied ``fit_function`` and parameters in the base object. Note that this makes a callable @@ -343,7 +350,7 @@ def _make_model(self, parameters: List[BumpsParameter] | None = None) -> Callabl Parameters ---------- - parameters : List[BumpsParameter] | None, default=None + parameters : list[BumpsParameter] | None, default=None Optional BUMPS parameters to bind into the model. Returns @@ -374,11 +381,186 @@ def _make_func(x, y, weights): return _outer(self) + def mcmc_sample( + self, + x: np.ndarray, + y: np.ndarray, + weights: np.ndarray, + samples: int = 10000, + burn: int = 2000, + thin: int = 10, + population: int | None = None, + sampler_kwargs: dict | None = None, + progress_callback: Callable[[dict], bool | None] | None = None, + abort_test: Callable[[], bool] | None = None, + ) -> dict: + """Run Bayesian MCMC sampling using the BUMPS DREAM sampler. + + Builds a BUMPS `FitProblem` from the current model and runs the DREAM + sampler. This is the public minimizer-level entry point for Bayesian + sampling; the higher-level `MultiFitter.mcmc_sample` delegates to this + method after flattening multi-dataset arrays. + + Parameters + ---------- + x : np.ndarray + Flattened independent variable array. + y : np.ndarray + Flattened dependent variable array. + weights : np.ndarray + Flattened weight array. + samples : int, default=10000 + Number of retained DREAM samples requested from BUMPS. + burn : int, default=2000 + Burn-in steps. + thin : int, default=10 + Thinning interval. + population : int | None, default=None + BUMPS DREAM population count (number of parallel chains). + sampler_kwargs : dict | None, default=None + Additional keyword arguments forwarded to `bumps.fitters.fit`. + progress_callback : Callable[[dict], bool | None] | None, default=None + Optional callback for progress updates during sampling. The + payload dict includes ``iteration`` (DREAM generation number) and + ``sampling: True``. + abort_test : Callable[[], bool] | None, default=None + Optional callback that returns ``True`` to signal that sampling + should be aborted. Called periodically during the DREAM sampling + loop. + + Returns + ------- + dict + Dictionary with keys ``'draws'``, ``'param_names'``, + ``'internal_bumps_object'``, and ``'logp'``. + + Raises + ------ + ValueError + If the input shapes or weights are invalid, or if + ``progress_callback`` is not callable. + FitError + If DREAM sampling was aborted by the user (via ``abort_test``). + Exception + Re-raised from DREAM fitting if any unexpected error occurs + (parameter values are restored beforehand). + """ + from bumps.fitters import DreamFit + from bumps.names import FitProblem + + x, y, weights = np.asarray(x), np.asarray(y), np.asarray(weights) + + if not isinstance(samples, int) or samples <= 0: + raise ValueError('samples must be a positive integer.') + if not isinstance(burn, int) or burn < 0: + raise ValueError('burn must be a non-negative integer.') + if not isinstance(thin, int) or thin < 1: + raise ValueError('thin must be a positive integer.') + + if y.shape != x.shape: + raise ValueError('x and y must have the same shape.') + + if not np.isfinite(x).all(): + raise ValueError('x cannot contain NaN or infinite values.') + if not np.isfinite(y).all(): + raise ValueError('y cannot contain NaN or infinite values.') + + if weights.shape != x.shape: + raise ValueError('Weights must have the same shape as x and y.') + + if not np.isfinite(weights).all(): + raise ValueError('Weights cannot be NaN or infinite.') + + if (weights <= 0).any(): + raise ValueError('Weights must be strictly positive and non-zero.') + + # Build the BUMPS Curve model using the minimizer's existing machinery + model_func = self._make_model() + curve = model_func(x, y, weights) + problem = FitProblem(curve) + + # Build DREAM kwargs + dream_kwargs: dict = {'samples': samples, 'burn': burn, 'thin': thin} + if population is not None: + dream_kwargs['pop'] = population + if sampler_kwargs: + dream_kwargs.update(sampler_kwargs) + + # Build monitors (same pattern as classical Bumps.fit()) + monitors = [] + if progress_callback is not None: + if not callable(progress_callback): + raise ValueError('progress_callback must be callable') + # Compute total DREAM steps for progress display (burn + sampling generations). + # BUMPS DREAM default population count is 10 when not specified by the user. + _dream_default_pop = 10 + pop_val = population if population is not None else _dream_default_pop + _total_steps = burn + (samples + pop_val - 1) // pop_val + monitors.append( + BumpsProgressMonitor( + problem, + progress_callback, + lambda problem, iteration, point, nllf: { + **self._build_sample_progress_payload(problem, iteration, point, nllf), + 'total_steps': _total_steps, + }, + ) + ) + + driver = FitDriver( + fitclass=DreamFit, + problem=problem, + monitors=monitors, + abort_test=abort_test if abort_test is not None else (lambda: False), + **dream_kwargs, + ) + driver.clip() + + from easyscience import global_object + + stack_status = global_object.stack.enabled + global_object.stack.enabled = False + + try: + x_opt, fx = driver.fit() + result_state = getattr(driver.fitter, 'state', None) + if result_state is None: + raise FitError('Sampling aborted by user') + except Exception: + self._restore_parameter_values() + raise + finally: + global_object.stack.enabled = stack_status + + draws = result_state.draw().points + param_names = [p.name[len(MINIMIZER_PARAMETER_PREFIX) :] for p in problem._parameters] + logp = getattr(result_state, 'logp', None) + + return { + 'draws': draws, + 'param_names': param_names, + 'internal_bumps_object': result_state, + 'logp': logp, + } + + def _build_sample_progress_payload( + self, problem, iteration: int, point: np.ndarray, nllf: float + ) -> dict: + """Build a progress payload for Bayesian DREAM sampling steps. + + Called by :class:`BumpsProgressMonitor` at each DREAM generation. + The payload includes ``sampling: True`` so downstream consumers can + distinguish sampling progress from classical fitting progress. + """ + payload = self._build_progress_payload(problem, iteration, point, nllf) + payload['sampling'] = True + return payload + def _set_parameter_fit_result( self, fit_result: Any, stack_status: bool, - par_list: List[BumpsParameter], + par_list: list[BumpsParameter], ) -> None: """ Update parameters to their final values and assign a std error @@ -390,7 +572,7 @@ def _set_parameter_fit_result( BUMPS OptimizeResult containing best-fit values and errors. stack_status : bool Whether the undo stack was enabled. - par_list : List[BumpsParameter] + par_list : list[BumpsParameter] List of BUMPS parameter objects. """ from easyscience import global_object diff --git a/src/easyscience/fitting/multi_fitter.py b/src/easyscience/fitting/multi_fitter.py index 50614552..92af9d9e 100644 --- a/src/easyscience/fitting/multi_fitter.py +++ b/src/easyscience/fitting/multi_fitter.py @@ -1,9 +1,9 @@ # SPDX-FileCopyrightText: 2026 EasyScience contributors # SPDX-License-Identifier: BSD-3-Clause +from __future__ import annotations + from typing import Callable -from typing import List -from typing import Optional import numpy as np @@ -26,8 +26,8 @@ class MultiFitter(Fitter): def __init__( self, - fit_objects: Optional[List] = None, - fit_functions: Optional[List[Callable]] = None, + fit_objects: list | None = None, + fit_functions: list[Callable] | None = None, ): # Create a dummy core object to hold all the fit objects. self._fit_objects = CollectionBase('multi', *fit_objects) @@ -37,7 +37,7 @@ def __init__( super().__init__(self._fit_objects, self._fit_functions[0]) def _fit_function_wrapper( - self, real_x: Optional[List[np.ndarray]] = None, flatten: bool = True + self, real_x: list[np.ndarray] | None = None, flatten: bool = True ) -> Callable: """ Simple fit function which injects the N real X (independent) @@ -47,7 +47,7 @@ def _fit_function_wrapper( Parameters ---------- - real_x : Optional[List[np.ndarray]], default=None + real_x : list[np.ndarray] | None, default=None List of independent x parameters to be injected. By default, None. flatten : bool, default=True @@ -80,31 +80,31 @@ def wrapped_fun(x, **kwargs): @staticmethod def _precompute_reshaping( - x: List[np.ndarray], - y: List[np.ndarray], - weights: Optional[List[np.ndarray]], + x: list[np.ndarray], + y: list[np.ndarray], + weights: list[np.ndarray] | None, vectorized: bool, - ) -> tuple[ - np.ndarray, List[np.ndarray], np.ndarray, Optional[np.ndarray], List[tuple[int, ...]] - ]: + ) -> tuple[np.ndarray, list[np.ndarray], np.ndarray, np.ndarray | None, list[tuple[int, ...]]]: """ Convert an array of X's and Y's to an acceptable shape for fitting. Parameters ---------- - x : List[np.ndarray] + x : list[np.ndarray] List of independent variables. - y : List[np.ndarray] + y : list[np.ndarray] List of dependent variables. - weights : Optional[List[np.ndarray]] + weights : list[np.ndarray] | None Optional weights for each dataset. vectorized : bool - Is the fn input vectorized or point based? + When ``True``, each x array may be multi-dimensional (e.g. an + ``(N, M, 2)`` grid for a 2D model) and is left as-is. When + ``False`` (default), each x array is expected to be 1-D. Returns ------- - tuple[np.ndarray, List[np.ndarray], np.ndarray, Optional[np.ndarray], List[tuple[int, ...]]] + tuple[np.ndarray, list[np.ndarray], np.ndarray, np.ndarray | None, list[tuple[int, ...]]] Reshaped x values, reshaped input data, flattened y values, flattened weights, and stored dependent dimensions. """ @@ -136,9 +136,9 @@ def _precompute_reshaping( def _post_compute_reshaping( self, fit_result_obj: FitResults, - x: List[np.ndarray], - y: List[np.ndarray], - ) -> List[FitResults]: + x: list[np.ndarray], + y: list[np.ndarray], + ) -> list[FitResults]: """ Split a multi-fit result object back into per-dataset results. @@ -146,14 +146,14 @@ def _post_compute_reshaping( ---------- fit_result_obj : FitResults Combined fit result returned by the minimizer. - x : List[np.ndarray] + x : list[np.ndarray] Original x coordinates for each dataset. - y : List[np.ndarray] + y : list[np.ndarray] Original y coordinates for each dataset. Returns ------- - List[FitResults] + list[FitResults] One fit result object per dataset. """ diff --git a/tests/integration/fitting/test_fitter.py b/tests/integration/fitting/test_fitter.py index 9f97b54a..0fb9b327 100644 --- a/tests/integration/fitting/test_fitter.py +++ b/tests/integration/fitting/test_fitter.py @@ -125,7 +125,7 @@ def test_basic_fit(fit_engine: AvailableMinimizers): try: f.switch_minimizer(fit_engine) except AttributeError: - pytest.skip(msg=f'{fit_engine} is not installed') + pytest.skip(reason=f'{fit_engine} is not installed') result = f.fit(x=x, y=y, weights=weights) @@ -173,7 +173,7 @@ def test_fit_result(fit_engine): try: f.switch_minimizer(fit_engine) except AttributeError: - pytest.skip(msg=f'{fit_engine} is not installed') + pytest.skip(reason=f'{fit_engine} is not installed') result = f.fit(x, y, weights=weights) check_fit_results(result, sp_sin, ref_sin, x, sp_ref1=sp_ref1, sp_ref2=sp_ref2) @@ -205,7 +205,7 @@ def test_basic_max_evaluations(fit_engine): try: f.switch_minimizer(fit_engine) except AttributeError: - pytest.skip(msg=f'{fit_engine} is not installed') + pytest.skip(reason=f'{fit_engine} is not installed') f.max_evaluations = 3 result = f.fit(x=x, y=y, weights=weights) # Result should not be the same as the reference @@ -240,7 +240,7 @@ def test_max_evaluations_populates_fit_result_fields(fit_engine): try: f.switch_minimizer(fit_engine) except AttributeError: - pytest.skip(msg=f'{fit_engine} is not installed') + pytest.skip(reason=f'{fit_engine} is not installed') f.max_evaluations = 3 result = f.fit(x=x, y=y, weights=weights) @@ -268,7 +268,7 @@ def test_bumps_max_evaluations_counts_objective_calls() -> None: try: f.switch_minimizer(AvailableMinimizers.Bumps) except AttributeError: - pytest.skip(msg=f'{AvailableMinimizers.Bumps} is not installed') + pytest.skip(reason=f'{AvailableMinimizers.Bumps} is not installed') f.max_evaluations = 3 result = f.fit(x=x, y=y, weights=weights) @@ -306,7 +306,7 @@ def test_basic_tolerance(fit_engine, tolerance): try: f.switch_minimizer(fit_engine) except AttributeError: - pytest.skip(msg=f'{fit_engine} is not installed') + pytest.skip(reason=f'{fit_engine} is not installed') f.tolerance = tolerance result = f.fit(x=x, y=y, weights=weights) # Result should not be the same as the reference @@ -377,7 +377,7 @@ def test_dependent_parameter(fit_engine): try: f.switch_minimizer(fit_engine) except AttributeError: - pytest.skip(msg=f'{fit_engine} is not installed') + pytest.skip(reason=f'{fit_engine} is not installed') result = f.fit(x, y, weights=weights) check_fit_results(result, sp_sin, ref_sin, x) @@ -405,12 +405,12 @@ def test_2D_vectorized(fit_engine): try: ff.switch_minimizer(fit_engine) except AttributeError: - pytest.skip(msg=f'{fit_engine} is not installed') + pytest.skip(reason=f'{fit_engine} is not installed') try: result = ff.fit(x=XY, y=mm(XY), weights=weights, vectorized=True) except FitError as e: if 'Unable to allocate' in str(e): - pytest.skip(msg='MemoryError - Matrix too large') + pytest.skip(reason='MemoryError - Matrix too large') else: raise e assert result.n_pars == len(m2.get_fit_parameters()) @@ -444,12 +444,12 @@ def test_2D_non_vectorized(fit_engine): try: ff.switch_minimizer(fit_engine) except AttributeError: - pytest.skip(msg=f'{fit_engine} is not installed') + pytest.skip(reason=f'{fit_engine} is not installed') try: result = ff.fit(x=XY, y=mm(XY.reshape(-1, 2)), weights=weights, vectorized=False) except FitError as e: if 'Unable to allocate' in str(e): - pytest.skip(msg='MemoryError - Matrix too large') + pytest.skip(reason='MemoryError - Matrix too large') else: raise e assert result.n_pars == len(m2.get_fit_parameters()) @@ -492,7 +492,7 @@ def test_fixed_parameter_does_not_change(fit_engine): try: f.switch_minimizer(fit_engine) except AttributeError: - pytest.skip(msg=f'{fit_engine} is not installed') + pytest.skip(reason=f'{fit_engine} is not installed') result = f.fit(x=x, y=y, weights=weights) @@ -567,7 +567,7 @@ def run_fit(weights): try: f.switch_minimizer(fit_engine) except AttributeError: - pytest.skip(msg=f'{fit_engine} is not installed') + pytest.skip(reason=f'{fit_engine} is not installed') f.fit(x=x, y=y, weights=weights) return model.offset.value, model.phase.value diff --git a/tests/integration/fitting/test_multi_fitter.py b/tests/integration/fitting/test_multi_fitter.py index 7e2d02d1..e4b87ea1 100644 --- a/tests/integration/fitting/test_multi_fitter.py +++ b/tests/integration/fitting/test_multi_fitter.py @@ -286,3 +286,215 @@ def test_multi_fit_1D_2D(fit_engine): assert result.success assert np.all(result.x == X[idx]) assert np.all(result.y_obs == Y[idx]) + + +# --------------------------------------------------------------------------- +# Tests for MultiFitter.mcmc_sample (Bayesian MCMC via BUMPS DREAM) +# --------------------------------------------------------------------------- + + +class TestMultiFitterMcmcSample: + """Integration tests for ``MultiFitter.mcmc_sample``.""" + + def test_raises_runtime_error_when_not_bumps(self): + """mcmc_sample() must raise RuntimeError if the minimizer is not a BUMPS instance.""" + sp = AbsSin(0.354, 3.05) + f = MultiFitter([sp], [sp]) + + x = np.linspace(0, 5, 50) + y = np.sin(x) + weights = np.ones_like(x) + + with pytest.raises(RuntimeError, match='Bayesian sampling requires a BUMPS minimizer'): + f.mcmc_sample(x=[x], y=[y], weights=[weights], samples=10, burn=5, thin=1) + + @pytest.mark.filterwarnings('ignore::UserWarning') + def test_returns_expected_keys_and_shapes(self): + """mcmc_sample() with BUMPS should return draws, param_names, state, logp.""" + ref_sin = AbsSin(0.2, np.pi) + sp = AbsSin(0.354, 3.05) + + x = np.linspace(0, 5, 50) + y = ref_sin(x) + weights = np.ones_like(x) + + sp.offset.fixed = False + sp.phase.fixed = False + + f = MultiFitter([sp], [sp]) + try: + f.switch_minimizer('Bumps') + except AttributeError: + pytest.skip('BUMPS is not installed') + + result = f.mcmc_sample(x=[x], y=[y], weights=[weights], samples=100, burn=20, thin=2) + + assert isinstance(result, dict) + assert 'draws' in result + assert 'param_names' in result + assert 'internal_bumps_object' in result + assert 'logp' in result + + # draws shape: (retained_samples, n_params) + assert result['draws'].ndim == 2 + assert result['draws'].shape[1] == len(result['param_names']) + + # param_names should match the model's fit parameters + expected_pars = {p.unique_name for p in sp.get_fit_parameters()} + assert set(result['param_names']) == expected_pars + + @pytest.mark.filterwarnings('ignore::UserWarning') + def test_multi_dataset_returns_consistent_param_names(self): + """mcmc_sample() with multiple datasets should have correct param_names across all.""" + ref_sin_1 = AbsSin(0.2, np.pi) + sp_sin_1 = AbsSin(0.354, 3.05) + sp_line = Line(0.43, 6.1) + + # Link a parameter across models + sp_line.m.make_dependent_on( + dependency_expression='sp_sin1', dependency_map={'sp_sin1': sp_sin_1.offset} + ) + + x1 = np.linspace(0, 5, 50) + y1 = ref_sin_1(x1) + x2 = np.copy(x1) + y2 = Line(1, 4.6)(x2) + weights = np.ones_like(x1) + + sp_sin_1.offset.fixed = False + sp_sin_1.phase.fixed = False + sp_line.c.fixed = False + + f = MultiFitter([sp_sin_1, sp_line], [sp_sin_1, sp_line]) + try: + f.switch_minimizer('Bumps') + except AttributeError: + pytest.skip('BUMPS is not installed') + + result = f.mcmc_sample( + x=[x1, x2], y=[y1, y2], weights=[weights, weights], samples=100, burn=20, thin=2 + ) + + # All parameters across both models should appear + all_params = {p.unique_name for p in sp_sin_1.get_fit_parameters()} + all_params |= {p.unique_name for p in sp_line.get_fit_parameters()} + assert set(result['param_names']) == all_params + + def test_population_param_controls_chain_count(self): + """Passing population should succeed and produce valid draws.""" + sp = AbsSin(0.354, 3.05) + sp.offset.fixed = False + sp.phase.fixed = False + f = MultiFitter([sp], [sp]) + try: + f.switch_minimizer('Bumps') + except AttributeError: + pytest.skip('BUMPS is not installed') + + x = np.linspace(0, 5, 50) + y = np.sin(x) + weights = np.ones_like(x) + + result = f.mcmc_sample( + x=[x], y=[y], weights=[weights], samples=100, burn=20, thin=2, population=5 + ) + assert 'draws' in result + + @pytest.mark.filterwarnings('ignore::UserWarning') + def test_vectorized_2d_input_produces_valid_draws(self): + """mcmc_sample() with vectorized=True and 2D input should produce valid draws.""" + sp = AbsSin2D(0.1, 1.75) + + x = np.linspace(0, 5, 50) + X, Y = np.meshgrid(x, x) + x2D = np.stack((X, Y), axis=2) + y2D = np.abs(np.sin(X)) * np.abs(np.sin(Y)) + weights = np.ones_like(y2D) + + sp.offset.fixed = False + sp.phase.fixed = False + + f = MultiFitter([sp], [sp]) + try: + f.switch_minimizer('Bumps') + except AttributeError: + pytest.skip('BUMPS is not installed') + + result = f.mcmc_sample( + x=[x2D], y=[y2D], weights=[weights], samples=100, burn=20, thin=2, vectorized=True + ) + + assert result['draws'].ndim == 2 + assert result['draws'].shape[0] > 0 + assert result['draws'].shape[1] == len(result['param_names']) + + def test_fit_function_restored_after_runtime_error(self): + """fit_function must be restored to its original value even when mcmc_sample() raises.""" + sp = AbsSin(0.354, 3.05) + f = MultiFitter([sp], [sp]) + + x = np.linspace(0, 5, 50) + y = np.sin(x) + weights = np.ones_like(x) + + original_func = f.fit_function + + with pytest.raises(RuntimeError): + f.mcmc_sample(x=[x], y=[y], weights=[weights], samples=10, burn=5, thin=1) + + assert f.fit_function is original_func + + @pytest.mark.filterwarnings('ignore::UserWarning') + def test_fit_function_restored_after_successful_sample(self): + """fit_function must be restored to its original value after a successful mcmc_sample().""" + ref_sin = AbsSin(0.2, np.pi) + sp = AbsSin(0.354, 3.05) + + x = np.linspace(0, 5, 50) + y = ref_sin(x) + weights = np.ones_like(x) + + sp.offset.fixed = False + sp.phase.fixed = False + + f = MultiFitter([sp], [sp]) + try: + f.switch_minimizer('Bumps') + except AttributeError: + pytest.skip('BUMPS is not installed') + + original_func = f.fit_function + f.mcmc_sample(x=[x], y=[y], weights=[weights], samples=100, burn=20, thin=2) + assert f.fit_function is original_func + + @pytest.mark.filterwarnings('ignore::UserWarning') + def test_sampler_kwargs_forwarded(self): + """sampler_kwargs dict is forwarded to the BUMPS DREAM sampler.""" + ref_sin = AbsSin(0.2, np.pi) + sp = AbsSin(0.354, 3.05) + + x = np.linspace(0, 5, 50) + y = ref_sin(x) + weights = np.ones_like(x) + + sp.offset.fixed = False + sp.phase.fixed = False + + f = MultiFitter([sp], [sp]) + try: + f.switch_minimizer('Bumps') + except AttributeError: + pytest.skip('BUMPS is not installed') + + result = f.mcmc_sample( + x=[x], + y=[y], + weights=[weights], + samples=100, + burn=20, + thin=2, + sampler_kwargs={'init': 'random'}, + ) + + assert result['draws'].ndim == 2 + assert result['draws'].shape[0] > 0 diff --git a/tests/unit/fitting/minimizers/test_minimizer_bumps.py b/tests/unit/fitting/minimizers/test_minimizer_bumps.py index 6b587dd7..7a223f2c 100644 --- a/tests/unit/fitting/minimizers/test_minimizer_bumps.py +++ b/tests/unit/fitting/minimizers/test_minimizer_bumps.py @@ -672,3 +672,390 @@ def test_gen_fit_results_uses_nit_for_budget_check(self, minimizer: Bumps, monke domain_fit_results.message == 'Fit stopped: reached maximum optimizer steps (3); objective evaluated 2 times' ) + + +# =================================================================== +# Bumps.mcmc_sample() — Bayesian DREAM sampling +# =================================================================== + + +class TestBumpsSample: + """Tests for the ``Bumps.mcmc_sample()`` method and its helpers.""" + + # Sentinel value to signal "set fitter.state = None" in _setup_driver_mock + ABORT = object() + + @pytest.fixture + def minimizer(self) -> Bumps: + return Bumps( + obj='obj', + fit_function='fit_function', + minimizer_enum=MagicMock(package='bumps', method='amoeba'), + ) + + @pytest.fixture(autouse=True) + def _mock_bumps_internals(self, monkeypatch): + """Prevent sample() from constructing real BUMPS objects. + + ``sample()`` imports ``DreamFit`` and ``FitProblem`` from the real + ``bumps`` package internally, which would try to build real model + objects. We redirect those to mocks and also mock ``FitDriver`` + (which *is* a module-level import) so the whole flow stays under + test control. + + Also mock ``_make_model`` on the class so that the ``minimizer`` + fixture (which uses ``obj='obj'``) doesn't fail inside ``sample()``. + """ + import bumps.fitters + import bumps.names + + monkeypatch.setattr(bumps.fitters, 'DreamFit', MagicMock()) + monkeypatch.setattr(bumps.names, 'FitProblem', MagicMock(return_value=MagicMock())) + monkeypatch.setattr( + Bumps, '_make_model', MagicMock(return_value=MagicMock(return_value=MagicMock())) + ) + + def _setup_driver_mock( + self, monkeypatch, fitter_state_value=None, fit_result=None, fit_side_effect=None + ): + """Helper to create a mocked FitDriver with configurable behavior. + + :param fitter_state_value: If ``None``, ``driver.fitter.state`` will be + a regular MagicMock (non-None). Pass ``ABORT`` to set it to ``None`` + and simulate user abort. + """ + from easyscience import global_object + + global_object.stack.enabled = False + + mock_driver = MagicMock() + mock_driver.clip = MagicMock() + + if fit_side_effect is not None: + mock_driver.fit.side_effect = fit_side_effect + else: + mock_driver.fit.return_value = fit_result or (np.array([1.0]), 0.0) + + mock_driver.stderr = MagicMock(return_value=np.array([0.1])) + + if fitter_state_value is TestBumpsSample.ABORT: + mock_driver.fitter.state = None + else: + mock_state = MagicMock() + mock_state.draw.return_value.points = np.array([[1.0]]) + mock_state.logp = None + mock_driver.fitter.state = mock_state + + mock_FitDriver = MagicMock(return_value=mock_driver) + monkeypatch.setattr( + easyscience.fitting.minimizers.minimizer_bumps, 'FitDriver', mock_FitDriver + ) + return mock_FitDriver, mock_driver + + def test_sample_basic(self, minimizer: Bumps, monkeypatch) -> None: + """Verify that mcmc_sample() returns a dict with expected keys.""" + mock_FitDriver, _ = self._setup_driver_mock(monkeypatch) + minimizer._make_model = MagicMock(return_value=MagicMock(return_value=MagicMock())) + + result = minimizer.mcmc_sample( + x=np.array([1.0, 2.0]), + y=np.array([0.1, 0.2]), + weights=np.array([1.0, 1.0]), + samples=100, + burn=20, + thin=2, + population=5, + ) + + assert isinstance(result, dict) + assert 'draws' in result + assert 'param_names' in result + assert 'internal_bumps_object' in result + assert 'logp' in result + mock_FitDriver.assert_called_once() + + def test_sample_with_progress_callback(self, minimizer: Bumps, monkeypatch) -> None: + """Verify progress callback is wired up as a monitor.""" + mock_FitDriver, _ = self._setup_driver_mock(monkeypatch) + minimizer._make_model = MagicMock(return_value=MagicMock(return_value=MagicMock())) + progress_callback = MagicMock() + + result = minimizer.mcmc_sample( + x=np.array([1.0]), + y=np.array([0.1]), + weights=np.array([1.0]), + samples=10, + burn=5, + thin=1, + progress_callback=progress_callback, + ) + + assert result is not None + call_kwargs = mock_FitDriver.call_args.kwargs + assert 'monitors' in call_kwargs + assert len(call_kwargs['monitors']) == 1 + assert isinstance(call_kwargs['monitors'][0], BumpsProgressMonitor) + + def test_sample_aborted_by_user_raises_fit_error(self, minimizer: Bumps, monkeypatch) -> None: + """Verify that sampling abortion raises FitError.""" + self._setup_driver_mock(monkeypatch, fitter_state_value=TestBumpsSample.ABORT) + minimizer._make_model = MagicMock(return_value=MagicMock(return_value=MagicMock())) + + with pytest.raises(FitError, match='Sampling aborted by user'): + minimizer.mcmc_sample(x=np.array([1.0]), y=np.array([0.1]), weights=np.array([1.0])) + + def test_sample_driver_exception_restores_parameters( + self, minimizer: Bumps, monkeypatch + ) -> None: + """Verify that a driver exception during sampling restores parameter values.""" + self._setup_driver_mock(monkeypatch, fit_side_effect=RuntimeError('driver failed')) + minimizer._make_model = MagicMock(return_value=MagicMock(return_value=MagicMock())) + minimizer._restore_parameter_values = MagicMock() + + with pytest.raises(RuntimeError, match='driver failed'): + minimizer.mcmc_sample(x=np.array([1.0]), y=np.array([0.1]), weights=np.array([1.0])) + + minimizer._restore_parameter_values.assert_called_once() + + def test_sample_population_param(self, minimizer: Bumps, monkeypatch) -> None: + """population kwarg is forwarded to DREAM as pop.""" + mock_FitDriver, _ = self._setup_driver_mock(monkeypatch) + minimizer._make_model = MagicMock(return_value=MagicMock(return_value=MagicMock())) + + minimizer.mcmc_sample( + x=np.array([1.0]), + y=np.array([0.1]), + weights=np.array([1.0]), + samples=10, + burn=0, + thin=1, + population=7, + ) + + call_kwargs = mock_FitDriver.call_args.kwargs + assert call_kwargs['pop'] == 7 + + def test_sample_rejects_non_callable_callback(self, minimizer: Bumps, monkeypatch) -> None: + import bumps.names + + monkeypatch.setattr(bumps.names, 'FitProblem', MagicMock(return_value=MagicMock())) + minimizer._make_model = MagicMock(return_value=MagicMock(return_value=MagicMock())) + + with pytest.raises(ValueError, match='progress_callback must be callable'): + minimizer.mcmc_sample( + x=np.array([1.0]), + y=np.array([0.1]), + weights=np.array([1.0]), + samples=10, + burn=5, + thin=1, + progress_callback='not-callable', + ) + + +# =================================================================== +# _build_sample_progress_payload +# =================================================================== + + +class TestBuildSampleProgressPayload: + @pytest.fixture + def minimizer(self) -> Bumps: + return Bumps( + obj='obj', + fit_function='fit_function', + minimizer_enum=MagicMock(package='bumps', method='amoeba'), + ) + + def test_payload_structure_and_sampling_flag(self, minimizer: Bumps) -> None: + b = minimizer + + mock_problem = MagicMock() + mock_problem.chisq.side_effect = [25.0, 12.5] + mock_problem.labels.return_value = ['palpha'] + mock_problem.getp.return_value = np.array([1.0]) + b._cached_pars = {'alpha': MagicMock(value=1.0)} + + payload = b._build_sample_progress_payload(mock_problem, 7, np.array([1.0]), 12.5) + + assert payload['iteration'] == 7 + assert payload['chi2'] == 25.0 + assert payload['reduced_chi2'] == 12.5 + assert payload['parameter_values'] == {'alpha': 1.0} + assert payload['sampling'] is True + assert payload['finished'] is False + assert payload['refresh_plots'] is False + + def test_payload_keys(self, minimizer: Bumps) -> None: + b = minimizer + mock_problem = MagicMock() + mock_problem.chisq.side_effect = [10.0, 5.0] + mock_problem.labels.return_value = ['pa'] + mock_problem.getp.return_value = np.array([5.0]) + b._cached_pars = {'a': MagicMock(value=5.0)} + + payload = b._build_sample_progress_payload(mock_problem, 1, np.array([5.0]), nllf=5.0) + + expected_keys = { + 'iteration', + 'chi2', + 'reduced_chi2', + 'parameter_values', + 'refresh_plots', + 'finished', + 'sampling', + } + assert set(payload.keys()) == expected_keys + + def test_delegates_to_build_progress_payload(self, minimizer: Bumps) -> None: + """_build_sample_progress_payload calls _build_progress_payload and adds sampling.""" + mock_problem = MagicMock() + + # Patch _build_progress_payload to track calls + base_payload = { + 'iteration': 3, + 'chi2': 42.0, + 'reduced_chi2': 21.0, + 'parameter_values': {'x': 7.0}, + 'refresh_plots': False, + 'finished': False, + } + with patch.object( + minimizer, '_build_progress_payload', return_value=base_payload + ) as mock_bpp: + result = minimizer._build_sample_progress_payload( + mock_problem, 3, np.array([7.0]), 21.0 + ) + + mock_bpp.assert_called_once_with(mock_problem, 3, np.array([7.0]), 21.0) + assert result == {**base_payload, 'sampling': True} + + +# =================================================================== +# _set_parameter_fit_result with stack_status=True +# =================================================================== + + +class TestSetParameterFitResultWithStack: + @pytest.fixture + def minimizer(self) -> Bumps: + return Bumps( + obj='obj', + fit_function='fit_function', + minimizer_enum=MagicMock(package='bumps', method='amoeba'), + ) + + def test_stack_status_true_calls_begin_end_macro(self, minimizer: Bumps) -> None: + from easyscience import global_object + + global_object.stack.enabled = False + + minimizer._cached_pars = {'a': MagicMock(), 'b': MagicMock()} + minimizer._cached_pars['a'].value = 'old_a' + minimizer._cached_pars['b'].value = 'old_b' + minimizer._restore_parameter_values = MagicMock() + + mock_fit_result = MagicMock() + mock_fit_result.x = np.array([1.0, 2.0]) + mock_fit_result.dx = np.array([0.1, 0.2]) + + mock_par_a = MagicMock() + mock_par_a.name = 'pa' + mock_par_b = MagicMock() + mock_par_b.name = 'pb' + par_list = [mock_par_a, mock_par_b] + + minimizer._set_parameter_fit_result(mock_fit_result, True, par_list) + + assert minimizer._cached_pars['a'].value == 1.0 + assert minimizer._cached_pars['a'].error == 0.1 + assert minimizer._cached_pars['b'].value == 2.0 + assert minimizer._cached_pars['b'].error == 0.2 + minimizer._restore_parameter_values.assert_called_once() + + +# =================================================================== +# convert_to_par_object +# =================================================================== + + +class TestConvertToParObject: + def test_convert_parameter_object(self) -> None: + from easyscience.variable import Parameter + + param = Parameter('thickness', 42.0, min=0.0, max=100.0) + param.fixed = False + + result = Bumps.convert_to_par_object(param) + + # convert_to_par_object uses obj.unique_name which is auto-assigned + assert result.name.startswith('p') + assert result.value == 42.0 + assert result.bounds == (0.0, 100.0) + assert result.fixed is False + + def test_convert_fixed_parameter(self) -> None: + from easyscience.variable import Parameter + + param = Parameter('roughness', 5.0, min=0.0, max=20.0) + param.fixed = True + + result = Bumps.convert_to_par_object(param) + + assert result.name.startswith('p') + assert result.fixed is True + + +# =================================================================== +# fit() with abort_test +# =================================================================== + + +class TestFitWithAbortTest: + @pytest.fixture + def minimizer(self) -> Bumps: + return Bumps( + obj='obj', + fit_function='fit_function', + minimizer_enum=MagicMock(package='bumps', method='amoeba'), + ) + + def test_abort_test_passed_to_fit_driver(self, minimizer: Bumps, monkeypatch) -> None: + from easyscience import global_object + + global_object.stack.enabled = False + + mock_driver = MagicMock() + mock_driver.clip = MagicMock() + mock_driver.fit = MagicMock(return_value=(np.array([42.0]), 0.0)) + mock_driver.stderr = MagicMock(return_value=np.array([0.1])) + mock_driver.monitor_runner.history.step = [0] + mock_FitDriver = MagicMock(return_value=mock_driver) + monkeypatch.setattr( + easyscience.fitting.minimizers.minimizer_bumps, 'FitDriver', mock_FitDriver + ) + + mock_problem = MagicMock() + mock_problem._parameters = [] + monkeypatch.setattr( + easyscience.fitting.minimizers.minimizer_bumps, + 'FitProblem', + MagicMock(return_value=mock_problem), + ) + + minimizer._make_model = MagicMock(return_value=MagicMock(return_value=MagicMock())) + minimizer._gen_fit_results = MagicMock(return_value='result') + minimizer._resolve_fitclass = MagicMock(return_value=MagicMock(id='amoeba')) + minimizer._set_parameter_fit_result = MagicMock() + minimizer._cached_pars = {'a': MagicMock(value=1.0)} + minimizer._cached_pars_vals = {'a': (1.0, 0.0)} + + abort_test = MagicMock(return_value=False) + + minimizer.fit( + x=np.array([1.0]), y=np.array([2.0]), weights=np.array([1.0]), abort_test=abort_test + ) + + call_kwargs = mock_FitDriver.call_args.kwargs + assert callable(call_kwargs['abort_test']) + assert call_kwargs['abort_test'] is not (lambda: False) diff --git a/tests/unit/fitting/test_fitter.py b/tests/unit/fitting/test_fitter.py index 702f5e59..634492c5 100644 --- a/tests/unit/fitting/test_fitter.py +++ b/tests/unit/fitting/test_fitter.py @@ -258,3 +258,156 @@ def test_post_compute_reshaping(self, fitter: Fitter): # TODO # def test_fit_function_wrapper() # def test_precompute_reshaping() + + +# =================================================================== +# Fitter.mcmc_sample() — Bayesian DREAM sampling +# =================================================================== + + +class TestFitterMcmcSample: + @pytest.fixture + def fitter(self, monkeypatch): + monkeypatch.setattr(Fitter, '_update_minimizer', MagicMock()) + return Fitter(MagicMock(), MagicMock()) + + def test_basic(self, fitter: Fitter): + """mcmc_sample() calls minimizer.mcmc_sample() and returns its result.""" + fitter._precompute_reshaping = MagicMock( + return_value=('x_fit', 'x_new', 'y_new', 'w_new', 'dims') + ) + fitter._fit_function_wrapper = MagicMock(return_value='wrapped') + fitter._minimizer = MagicMock() + fitter._minimizer.package = 'bumps' + expected = { + 'draws': np.array([[1.0]]), + 'param_names': ['a'], + 'internal_bumps_object': 'stub', + 'logp': None, + } + fitter._minimizer.mcmc_sample = MagicMock(return_value=expected) + + result = fitter.mcmc_sample( + x=np.array([1.0]), + y=np.array([0.1]), + weights=np.array([1.0]), + samples=100, + burn=20, + thin=2, + population=5, + ) + + assert result == expected + fitter._precompute_reshaping.assert_called_once_with( + np.array([1.0]), np.array([0.1]), np.array([1.0]), False + ) + fitter._fit_function_wrapper.assert_called_once_with('x_new', flatten=True) + fitter._minimizer.mcmc_sample.assert_called_once() + kw = fitter._minimizer.mcmc_sample.call_args.kwargs + assert kw['x'] == 'x_fit' + assert kw['y'] == 'y_new' + assert kw['weights'] == 'w_new' + assert kw['samples'] == 100 + assert kw['burn'] == 20 + assert kw['thin'] == 2 + assert kw['population'] == 5 + assert kw['progress_callback'] is None + assert fitter._dependent_dims == 'dims' + + def test_raises_if_not_bumps(self, fitter: Fitter): + """RuntimeError raised when the active minimizer is not BUMPS.""" + fitter._precompute_reshaping = MagicMock( + return_value=('x_fit', 'x_new', 'y_new', 'w_new', 'dims') + ) + fitter._fit_function_wrapper = MagicMock(return_value='wrapped') + fitter._minimizer = MagicMock() + fitter._minimizer.package = 'lmfit' + + with pytest.raises(RuntimeError, match='Bayesian sampling requires a BUMPS minimizer'): + fitter.mcmc_sample(x=np.array([1.0]), y=np.array([0.1]), weights=np.array([1.0])) + + def test_progress_callback_forwarded(self, fitter: Fitter): + """progress_callback is forwarded to minimizer.mcmc_sample().""" + fitter._precompute_reshaping = MagicMock( + return_value=('x_fit', 'x_new', 'y_new', 'w_new', 'dims') + ) + fitter._fit_function_wrapper = MagicMock(return_value='wrapped') + fitter._minimizer = MagicMock() + fitter._minimizer.package = 'bumps' + fitter._minimizer.mcmc_sample = MagicMock( + return_value={ + 'draws': [], + 'param_names': [], + 'internal_bumps_object': None, + 'logp': None, + } + ) + cb = MagicMock() + + fitter.mcmc_sample( + x=np.array([1.0]), + y=np.array([0.1]), + weights=np.array([1.0]), + progress_callback=cb, + ) + + assert fitter._minimizer.mcmc_sample.call_args.kwargs['progress_callback'] is cb + + def test_fit_function_restored_on_success(self, fitter: Fitter): + """Original fit function is restored after a successful call.""" + fitter._precompute_reshaping = MagicMock( + return_value=('x_fit', 'x_new', 'y_new', 'w_new', 'dims') + ) + fitter._fit_function_wrapper = MagicMock(return_value='wrapped') + fitter._minimizer = MagicMock() + fitter._minimizer.package = 'bumps' + fitter._minimizer.mcmc_sample = MagicMock( + return_value={ + 'draws': [], + 'param_names': [], + 'internal_bumps_object': None, + 'logp': None, + } + ) + original = fitter._fit_function + + fitter.mcmc_sample(x=np.array([1.0]), y=np.array([0.1]), weights=np.array([1.0])) + + assert fitter._fit_function is original + + def test_fit_function_restored_on_error(self, fitter: Fitter): + """Original fit function is restored even when minimizer raises.""" + fitter._precompute_reshaping = MagicMock( + return_value=('x_fit', 'x_new', 'y_new', 'w_new', 'dims') + ) + fitter._fit_function_wrapper = MagicMock(return_value='wrapped') + fitter._minimizer = MagicMock() + fitter._minimizer.package = 'bumps' + fitter._minimizer.mcmc_sample = MagicMock(side_effect=RuntimeError('boom')) + original = fitter._fit_function + + with pytest.raises(RuntimeError): + fitter.mcmc_sample(x=np.array([1.0]), y=np.array([0.1]), weights=np.array([1.0])) + + assert fitter._fit_function is original + + @pytest.mark.parametrize( + 'kwargs, match', + [ + ({'samples': 0}, 'samples must be a positive integer'), + ({'samples': -1}, 'samples must be a positive integer'), + ({'burn': -1}, 'burn must be a non-negative integer'), + ({'thin': 0}, 'thin must be a positive integer'), + ], + ) + def test_invalid_args_raise(self, fitter: Fitter, kwargs, match): + """Invalid samples/burn/thin values raise ValueError before any I/O.""" + with pytest.raises(ValueError, match=match): + fitter.mcmc_sample( + x=np.array([1.0]), + y=np.array([0.1]), + weights=np.array([1.0]), + samples=kwargs.get('samples', 10), + burn=kwargs.get('burn', 0), + thin=kwargs.get('thin', 1), + ) diff --git a/tests/unit/fitting/test_multi_fitter.py b/tests/unit/fitting/test_multi_fitter.py index a54a927b..ac897653 100644 --- a/tests/unit/fitting/test_multi_fitter.py +++ b/tests/unit/fitting/test_multi_fitter.py @@ -3,6 +3,7 @@ from unittest.mock import MagicMock +import numpy as np import pytest from easyscience import ObjBase @@ -61,3 +62,106 @@ def test_fit_progress_callback(self, multi_fitter: MultiFitter): max_evaluations=None, progress_callback=progress_callback, ) + + +# =================================================================== +# MultiFitter._post_compute_reshaping +# =================================================================== + + +class TestPostComputeReshaping: + def test_splits_single_result_into_two(self): + """Verify _post_compute_reshaping splits combined result by dataset.""" + import numpy as np + + from easyscience.fitting.minimizers import FitResults + + fit_objects = [Line(1.0, 0.5), Line(2.0, 1.5)] + mf = MultiFitter(fit_objects, fit_objects) + + # Simulate a combined result + combined = FitResults() + combined.success = True + combined.x = np.array([0, 1, 2]) + combined.y_obs = np.array([0.5, 1.0, 1.5]) + combined.y_calc = np.array([0.51, 0.99, 1.51]) + combined.y_err = np.array([0.01, 0.02, 0.03]) + combined.p = {'pm': 1.0, 'pc': 0.5} + combined.p0 = {'pm': 0.0, 'pc': 0.0} + combined.n_evaluations = 100 + combined.iterations = 50 + combined.message = 'success' + combined.minimizer_engine = 'bumps' + combined.engine_result = 'engine_result' + + # Set dependent_dims as _precompute_reshaping would + mf._dependent_dims = [(2,), (1,)] + + x_input = [np.array([0.0, 1.0]), np.array([2.0])] + y_input = [np.array([0.5, 1.0]), np.array([1.5])] + + results = mf._post_compute_reshaping(combined, x_input, y_input) + + assert len(results) == 2 + assert results[0].success is True + assert results[1].success is True + assert len(results[0].y_obs) == 2 + assert len(results[1].y_obs) == 1 + assert results[0].total_results is combined + assert results[1].total_results is combined + assert np.allclose(results[0].y_calc, [0.51, 0.99]) + + def test_handles_single_dataset(self): + import numpy as np + + from easyscience.fitting.minimizers import FitResults + + fit_objects = [Line(1.0, 0.5)] + mf = MultiFitter(fit_objects, fit_objects) + + combined = FitResults() + combined.success = True + combined.y_obs = np.array([1.0, 2.0, 3.0]) + combined.y_calc = np.array([1.1, 2.1, 3.1]) + combined.y_err = np.array([0.1, 0.1, 0.1]) + combined.p = {'pm': 1.0} + combined.p0 = {'pm': 0.0} + combined.n_evaluations = 50 + combined.iterations = 25 + combined.message = 'ok' + combined.minimizer_engine = 'bumps' + combined.engine_result = 'er' + + mf._dependent_dims = [(3,)] + x_input = [np.array([0.0, 1.0, 2.0])] + y_input = [np.array([1.0, 2.0, 3.0])] + + results = mf._post_compute_reshaping(combined, x_input, y_input) + + assert len(results) == 1 + assert np.allclose(results[0].y_calc, [1.1, 2.1, 3.1]) + + +# =================================================================== +# MultiFitter._precompute_reshaping with weights=None +# =================================================================== + + +class TestPrecomputeReshaping: + def test_weights_all_none_returns_none(self): + """When all weights are None, _precompute_reshaping should return None for weights.""" + import numpy as np + + fit_objects = [Line(1.0, 0.5), Line(2.0, 1.5)] + mf = MultiFitter(fit_objects, fit_objects) + + x = [np.array([1.0, 2.0]), np.array([3.0])] + y = [np.array([1.5, 2.5]), np.array([4.5])] + weights = [None, None] + + x_fit, x_new, y_new, w_new, dims = MultiFitter._precompute_reshaping( + x, y, weights, vectorized=False + ) + + assert w_new is None + assert len(dims) == 2