From 60a9fa3d3e7e9d24aa8f0badbddd1e139f402d4e Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Wed, 10 Jun 2026 05:02:23 -0600
Subject: [PATCH] feat(cost): model seating chart, dollar budgets in the fuzz
 loop, program cost report
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- ModelSeats + seatPresets + resolveSeat (src/model-seats.ts): one object
  re-tiers an entire eval program; economy preset uses the fleet-policy ids
  (cross-family judges, fully priced), frontier is deliberately empty —
  resolveSeat fails loud on any unset seat, a model id is never a silent
  default.
- BehaviorExplorer cost governance (src/fuzz): costOf + costBudgetUsd +
  ledger + onCost. Known cost accrues toward a hard ceiling with
  control-runtime maxCostUsd semantics (nonnegative finite, stop at >=);
  unknown-cost runs are counted apart, never folded in as $0. Capsule stats
  gain costUsd/costUnknownRuns only when tracking was wired, and the HTML
  capsule shows the cost KPI with the unpriced-run count.
- costReport + attachCostToReport (src/cost-report.ts): thin projection over
  CostLedger.summary() adding the per-model rollup (unpriced:true marks a
  lower-bound $); attachCostToReport is the one stamp every artifact uses
  and refuses to overwrite an existing cost key.
---
 src/cost-report.test.ts        |  97 +++++++++++++++++++++
 src/cost-report.ts             |  79 +++++++++++++++++
 src/fuzz/capsule.ts            |  17 ++++
 src/fuzz/explorer-cost.test.ts | 155 +++++++++++++++++++++++++++++++++
 src/fuzz/explorer.ts           |  79 +++++++++++++++--
 src/fuzz/index.ts              |   1 +
 src/fuzz/types.ts              |  43 +++++++++
 src/index.ts                   |   7 ++
 src/model-seats.test.ts        | 105 ++++++++++++++++++++++
 src/model-seats.ts             | 128 +++++++++++++++++++++++++++
 10 files changed, 706 insertions(+), 5 deletions(-)
 create mode 100644 src/cost-report.test.ts
 create mode 100644 src/cost-report.ts
 create mode 100644 src/fuzz/explorer-cost.test.ts
 create mode 100644 src/model-seats.test.ts
 create mode 100644 src/model-seats.ts

diff --git a/src/cost-report.test.ts b/src/cost-report.test.ts
new file mode 100644
index 0000000..bc1c9ed
--- /dev/null
+++ b/src/cost-report.test.ts
@@ -0,0 +1,97 @@
+import { describe, expect, it } from 'vitest'
+import { CostLedger } from './cost-ledger'
+import { attachCostToReport, costReport } from './cost-report'
+import { ValidationError } from './errors'
+
+function buildLedger(): CostLedger {
+  const ledger = new CostLedger()
+  // gpt-4o: 0.0025 in + 0.01 out per 1k
+  ledger.record({
+    model: 'gpt-4o',
+    channel: 'agent',
+    usage: { inputTokens: 1000, outputTokens: 1000 },
+  })
+  ledger.record({
+    model: 'gpt-4o',
+    channel: 'judge',
+    usage: { inputTokens: 2000, outputTokens: 0 },
+  })
+  // Unpriced model — costUnknown, the $0 is a lower bound, not a measured zero.
+  ledger.record({
+    model: 'made-up-zzz',
+    channel: 'judge',
+    usage: { inputTokens: 1000, outputTokens: 1000 },
+  })
+  return ledger
+}
+
+describe('costReport', () => {
+  it('projects per-channel, total, and per-model rollups from the ledger', () => {
+    const report = costReport(buildLedger())
+
+    expect(report.perChannel.map((c) => c.channel)).toEqual(['agent', 'judge'])
+    const judge = report.perChannel.find((c) => c.channel === 'judge')
+    expect(judge?.calls).toBe(2)
+    expect(judge?.unpricedCalls).toBe(1)
+
+    expect(report.total.usd).toBeCloseTo(0.0125 + 0.005, 6)
+    expect(report.total.unknownEntries).toBe(1)
+
+    expect(report.perModel.map((m) => m.model)).toEqual(['gpt-4o', 'made-up-zzz'])
+    expect(report.perModel[0]).toEqual({
+      model: 'gpt-4o',
+      usd: 0.0175,
+      entries: 2,
+      unpriced: false,
+    })
+  })
+
+  it('flags an unpriced model unpriced:true — its $0 is never a measured zero', () => {
+    const report = costReport(buildLedger())
+    const unpriced = report.perModel.find((m) => m.model === 'made-up-zzz')
+    expect(unpriced).toEqual({ model: 'made-up-zzz', usd: 0, entries: 1, unpriced: true })
+  })
+
+  it('an actualCostUsd override clears unpriced — observed dollars are real', () => {
+    const ledger = new CostLedger()
+    ledger.record({
+      model: 'made-up-zzz',
+      channel: 'agent',
+      usage: { inputTokens: 100, outputTokens: 100 },
+      actualCostUsd: 0.42,
+    })
+    const report = costReport(ledger)
+    expect(report.perModel[0]).toEqual({
+      model: 'made-up-zzz',
+      usd: 0.42,
+      entries: 1,
+      unpriced: false,
+    })
+    expect(report.total.unknownEntries).toBe(0)
+  })
+
+  it('an empty ledger projects to zeros, never throws', () => {
+    const report = costReport(new CostLedger())
+    expect(report).toEqual({
+      perChannel: [],
+      total: { usd: 0, unknownEntries: 0 },
+      perModel: [],
+    })
+  })
+})
+
+describe('attachCostToReport', () => {
+  it('stamps the projection under cost and preserves the report fields', () => {
+    const stamped = attachCostToReport({ verdict: 'ship', lift: 0.04 }, buildLedger())
+    expect(stamped.verdict).toBe('ship')
+    expect(stamped.lift).toBe(0.04)
+    expect(stamped.cost.total.unknownEntries).toBe(1)
+    expect(stamped.cost.perModel).toHaveLength(2)
+  })
+
+  it('refuses to overwrite an existing cost stamp', () => {
+    expect(() => attachCostToReport({ cost: 'already-stamped' }, new CostLedger())).toThrow(
+      ValidationError,
+    )
+  })
+})
diff --git a/src/cost-report.ts b/src/cost-report.ts
new file mode 100644
index 0000000..8e8fbe7
--- /dev/null
+++ b/src/cost-report.ts
@@ -0,0 +1,79 @@
+/**
+ * Program cost report — a thin projection over `CostLedger.summary()` that
+ * adds the per-model rollup the summary lacks, plus `attachCostToReport`, the
+ * one way every artifact (capsules, campaign results, diagnose reports) gets
+ * its cost stamp.
+ *
+ * Honesty contract carried through from the ledger: `total.unknownEntries`
+ * and `perModel[].unpriced` surface the costUnknown axis — a $0 from an
+ * unpriced model is a lower bound, never a measured zero.
+ */
+
+import type { ChannelRollup, CostLedger } from './cost-ledger'
+import { ValidationError } from './errors'
+
+export interface ModelCostRollup {
+  model: string
+  usd: number
+  entries: number
+  /** ≥1 entry for this model was costUnknown — `usd` is a lower bound. An
+   *  `actualCostUsd` override clears the flag for that entry (the dollars are
+   *  observed, even when the model has no pricing). */
+  unpriced: boolean
+}
+
+export interface CostReport {
+  /** Per-channel breakdown — `CostLedgerSummary.byChannel` verbatim. */
+  perChannel: ChannelRollup[]
+  total: {
+    usd: number
+    /** Entries whose cost was unknown — non-zero means `usd` is a lower bound. */
+    unknownEntries: number
+  }
+  /** Per-model spend, sorted by model id. */
+  perModel: ModelCostRollup[]
+}
+
+/** Project a ledger into the program cost report. Pure — no I/O, no clock. */
+export function costReport(ledger: CostLedger): CostReport {
+  const summary = ledger.summary()
+  const perModel = new Map<string, ModelCostRollup>()
+  for (const entry of ledger.list()) {
+    const roll = perModel.get(entry.model) ?? {
+      model: entry.model,
+      usd: 0,
+      entries: 0,
+      unpriced: false,
+    }
+    roll.usd += entry.costUsd
+    roll.entries += 1
+    if (entry.costUnknown) roll.unpriced = true
+    perModel.set(entry.model, roll)
+  }
+  return {
+    perChannel: summary.byChannel,
+    total: {
+      usd: summary.totalCostUsd,
+      unknownEntries: summary.byChannel.reduce((sum, c) => sum + c.unpricedCalls, 0),
+    },
+    perModel: [...perModel.values()].sort((a, b) => a.model.localeCompare(b.model)),
+  }
+}
+
+/**
+ * Stamp a report-shaped object with its cost projection under the `cost` key.
+ * Generic so capsules, campaign results, and diagnose reports all stamp the
+ * same way. Throws when the report already carries a `cost` key — silently
+ * overwriting an existing stamp would corrupt the artifact's provenance.
+ */
+export function attachCostToReport<R extends object>(
+  report: R,
+  ledger: CostLedger,
+): R & { cost: CostReport } {
+  if ('cost' in report) {
+    throw new ValidationError(
+      "attachCostToReport: report already has a 'cost' key — refusing to overwrite an existing stamp",
+    )
+  }
+  return { ...report, cost: costReport(ledger) }
+}
diff --git a/src/fuzz/capsule.ts b/src/fuzz/capsule.ts
index 123da3a..769461f 100644
--- a/src/fuzz/capsule.ts
+++ b/src/fuzz/capsule.ts
@@ -23,6 +23,9 @@ export interface BuildCapsuleInput<S> {
   findings: Finding<S>[]
   candidateFindings: number
   runsUsed: number
+  /** Known-dollar / unknown-run split — present only when cost tracking was
+   *  wired; the capsule never fabricates a $0 total. */
+  cost?: { costUsd: number; costUnknownRuns: number }
 }
 
 export function buildCapsule<S>(input: BuildCapsuleInput<S>): CapsuleData<S> {
@@ -47,6 +50,9 @@ export function buildCapsule<S>(input: BuildCapsuleInput<S>): CapsuleData<S> {
       candidateFindings: input.candidateFindings,
       verifiedFindings: input.findings.length,
       meanRobustness,
+      ...(input.cost
+        ? { costUsd: input.cost.costUsd, costUnknownRuns: input.cost.costUnknownRuns }
+        : {}),
     },
   }
 }
@@ -172,6 +178,16 @@ export function renderCapsuleHtml<S>(
         '#5ad17a',
       )
     : ''
+  // Cost KPI only when tracking was wired — an untracked run never shows $0.
+  // Unpriced runs are named in the label (amber): the total is a lower bound.
+  const cost =
+    s.costUsd !== undefined
+      ? kpi(
+          s.costUnknownRuns ? `cost · ${s.costUnknownRuns} runs unpriced` : 'cost',
+          `$${s.costUsd.toFixed(2)}`,
+          s.costUnknownRuns ? '#e5c07b' : '#e6e6e6',
+        )
+      : ''
   const stamp = opts.generatedAt ?? capsule.generatedAt ?? ''
 
   return `<!doctype html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
@@ -218,6 +234,7 @@ ${kpi('mean robustness', pct(s.meanRobustness), s.meanRobustness < 0.6 ? '#e58a9
 ${kpi('verified findings', String(s.verifiedFindings), s.verifiedFindings > 0 ? '#e58a96' : '#5ad17a')}
 ${kpi('cells covered', `${s.cellsCovered}/${s.cellsTotal}`)}
 ${kpi('scenarios run', String(s.totalRuns))}
+${cost}
 ${lift}
 </div>
 <h2>Coverage map</h2>
diff --git a/src/fuzz/explorer-cost.test.ts b/src/fuzz/explorer-cost.test.ts
new file mode 100644
index 0000000..d21276b
--- /dev/null
+++ b/src/fuzz/explorer-cost.test.ts
@@ -0,0 +1,155 @@
+import { describe, expect, it } from 'vitest'
+import { CostLedger } from '../cost-ledger'
+import { costReport } from '../cost-report'
+import { ValidationError } from '../errors'
+import { renderCapsuleHtml } from './capsule'
+import { BehaviorExplorer } from './explorer'
+import type { BehaviorSpace, ExploreOptions } from './types'
+
+const space: BehaviorSpace = { axes: [{ name: 'difficulty', values: ['easy'] }] }
+
+// Single-cell space + uniform allocation + concurrency 1 → the evaluation
+// (and costOf) order is fully deterministic.
+function makeOpts(overrides: Partial<ExploreOptions<string>>): ExploreOptions<string> {
+  let n = 0
+  return {
+    target: 'cost-target',
+    space,
+    proposer: (ctx) => Array.from({ length: ctx.count }, () => `p-${n++}`),
+    evaluate: async () => ({ valid: true, score: 0.9 }),
+    seedsFor: () => ['seed-0'],
+    scenarioId: (s) => s,
+    allocation: 'uniform',
+    budget: 50,
+    seed: 3,
+    ...overrides,
+  }
+}
+
+describe('BehaviorExplorer cost budget', () => {
+  it('stops the loop when accumulated known cost reaches costBudgetUsd', async () => {
+    const explorer = new BehaviorExplorer(
+      makeOpts({ costOf: () => ({ usd: 1 }), costBudgetUsd: 3 }),
+    )
+    const capsule = await explorer.run()
+    expect(capsule.stats.totalRuns).toBe(3)
+    expect(capsule.stats.costUsd).toBe(3)
+    expect(capsule.stats.costUnknownRuns).toBe(0)
+  })
+
+  it('a zero budget stops before any evaluation — same >= semantics as control-runtime', async () => {
+    const explorer = new BehaviorExplorer(
+      makeOpts({ costOf: () => ({ usd: 1 }), costBudgetUsd: 0 }),
+    )
+    const capsule = await explorer.run()
+    expect(capsule.stats.totalRuns).toBe(0)
+    expect(capsule.stats.costUsd).toBe(0)
+  })
+
+  it('counts unknown-cost runs separately — never as $0, never against the budget', async () => {
+    let call = 0
+    const onCost: Array<{ usd: number; channel: string }> = []
+    const explorer = new BehaviorExplorer(
+      makeOpts({
+        budget: 4,
+        // Runs 1 and 3 cost $1; runs 2 and 4 have unknown cost.
+        costOf: () => (call++ % 2 === 0 ? { usd: 1 } : null),
+        costBudgetUsd: 10,
+        onCost: (e) => onCost.push(e),
+      }),
+    )
+    const capsule = await explorer.run()
+    expect(capsule.stats.totalRuns).toBe(4)
+    expect(capsule.stats.costUsd).toBe(2)
+    expect(capsule.stats.costUnknownRuns).toBe(2)
+    expect(onCost).toEqual([
+      { usd: 1, channel: 'agent' },
+      { usd: 1, channel: 'agent' },
+    ])
+  })
+
+  it('records known costs into the supplied ledger with channel agent + actualCostUsd', async () => {
+    const ledger = new CostLedger()
+    const explorer = new BehaviorExplorer(
+      makeOpts({ budget: 2, costOf: () => ({ usd: 0.5, model: 'gpt-4o' }), ledger }),
+    )
+    await explorer.run()
+    const entries = ledger.list()
+    expect(entries).toHaveLength(2)
+    for (const entry of entries) {
+      expect(entry.channel).toBe('agent')
+      expect(entry.actualCostUsd).toBe(0.5)
+      expect(entry.costUnknown).toBe(false)
+      expect(entry.model).toBe('gpt-4o')
+      expect(entry.tags?.target).toBe('cost-target')
+    }
+    const report = costReport(ledger)
+    expect(report.perModel).toEqual([{ model: 'gpt-4o', usd: 1, entries: 2, unpriced: false }])
+  })
+
+  it('labels ledger entries unattributed when costOf names no model', async () => {
+    const ledger = new CostLedger()
+    const explorer = new BehaviorExplorer(
+      makeOpts({ budget: 1, costOf: () => ({ usd: 0.25 }), ledger }),
+    )
+    await explorer.run()
+    expect(ledger.list().map((e) => e.model)).toEqual(['unattributed'])
+  })
+
+  it('rejects negative, NaN, and infinite budgets loudly', () => {
+    const costOf = () => ({ usd: 1 })
+    for (const bad of [-1, Number.NaN, Number.POSITIVE_INFINITY]) {
+      expect(() => new BehaviorExplorer(makeOpts({ costOf, costBudgetUsd: bad }))).toThrow(
+        /costBudgetUsd must be a nonnegative finite number/,
+      )
+    }
+  })
+
+  it('rejects cost options without costOf — the explorer cannot know run cost', () => {
+    expect(() => new BehaviorExplorer(makeOpts({ costBudgetUsd: 3 }))).toThrow(ValidationError)
+    expect(() => new BehaviorExplorer(makeOpts({ ledger: new CostLedger() }))).toThrow(
+      ValidationError,
+    )
+    expect(() => new BehaviorExplorer(makeOpts({ onCost: () => {} }))).toThrow(ValidationError)
+  })
+
+  it('rejects a fabricated costOf number loudly — null is the only unknown', async () => {
+    const explorer = new BehaviorExplorer(
+      makeOpts({ budget: 1, costOf: () => ({ usd: Number.NaN }) }),
+    )
+    await expect(explorer.run()).rejects.toThrow(/costOf returned an invalid usd/)
+  })
+
+  it('omits cost stats entirely when cost tracking is not wired — absent, never $0', async () => {
+    const capsule = await new BehaviorExplorer(makeOpts({ budget: 2 })).run()
+    expect(capsule.stats.costUsd).toBeUndefined()
+    expect(capsule.stats.costUnknownRuns).toBeUndefined()
+  })
+})
+
+describe('renderCapsuleHtml cost KPI', () => {
+  it('shows the known-dollar KPI when cost tracking was wired', async () => {
+    const explorer = new BehaviorExplorer(
+      makeOpts({ costOf: () => ({ usd: 1 }), costBudgetUsd: 3 }),
+    )
+    const html = renderCapsuleHtml(await explorer.run())
+    expect(html).toContain('$3.00')
+    expect(html).not.toContain('runs unpriced')
+  })
+
+  it('names the unpriced-run count next to the total — the dollar figure is a lower bound', async () => {
+    let call = 0
+    const explorer = new BehaviorExplorer(
+      makeOpts({ budget: 4, costOf: () => (call++ % 2 === 0 ? { usd: 1 } : null) }),
+    )
+    const html = renderCapsuleHtml(await explorer.run())
+    expect(html).toContain('$2.00')
+    expect(html).toContain('2 runs unpriced')
+  })
+
+  it('shows no dollar KPI at all when cost was not tracked', async () => {
+    const html = renderCapsuleHtml(await new BehaviorExplorer(makeOpts({ budget: 2 })).run())
+    expect(/\$\d/.test(html)).toBe(false)
+    expect(html).not.toContain('runs unpriced')
+  })
+})
diff --git a/src/fuzz/explorer.ts b/src/fuzz/explorer.ts
index 0bdd216..4d778a5 100644
--- a/src/fuzz/explorer.ts
+++ b/src/fuzz/explorer.ts
@@ -13,6 +13,7 @@
  * observations and coverage are projections of it.
  */
 
+import { ValidationError } from '../errors'
 import { varianceBasedCurriculum } from '../rl/active-curriculum'
 import { buildCapsule } from './capsule'
 import type { EvalRecord } from './cube'
@@ -23,6 +24,7 @@ import type {
   CapsuleData,
   Cell,
   CoverageCell,
+  Evaluation,
   ExploreOptions,
   Finding,
   Objective,
@@ -65,11 +67,31 @@ export class BehaviorExplorer<S> {
   private runsUsed = 0
   private candidateFindings = 0
   private rngState: number
+  /** Accumulated KNOWN dollars — unknown-cost runs never inflate it. */
+  private spentKnownUsd = 0
+  private costUnknownRuns = 0
 
   constructor(private readonly opts: ExploreOptions<S>) {
     this.cells = enumerateCells(opts.space)
     if (this.cells.length === 0)
       throw new Error('BehaviorExplorer: space has no cells — every axis needs ≥1 value')
+    if (opts.costBudgetUsd !== undefined) {
+      if (
+        typeof opts.costBudgetUsd !== 'number' ||
+        !Number.isFinite(opts.costBudgetUsd) ||
+        opts.costBudgetUsd < 0
+      ) {
+        throw new RangeError(
+          `BehaviorExplorer: costBudgetUsd must be a nonnegative finite number, got ${String(opts.costBudgetUsd)}`,
+        )
+      }
+    }
+    if (!opts.costOf && (opts.costBudgetUsd !== undefined || opts.ledger || opts.onCost)) {
+      throw new ValidationError(
+        'BehaviorExplorer: costBudgetUsd/ledger/onCost require costOf — the explorer ' +
+          'cannot know run cost without it; supply costOf or drop the cost options',
+      )
+    }
     this.cellById = new Map(this.cells.map((c) => [c.id, c]))
     this.objective = opts.objective ?? adversarialObjective(0.5)
     this.threshold = this.objective.threshold ?? 0.5
@@ -124,6 +146,37 @@ export class BehaviorExplorer<S> {
     }
   }
 
+  /** Mirrors control-runtime: stop once accumulated KNOWN cost ≥ the ceiling. */
+  private costExhausted(): boolean {
+    return this.opts.costBudgetUsd !== undefined && this.spentKnownUsd >= this.opts.costBudgetUsd
+  }
+
+  /** Fold one run's cost in: null counts as unknown (never $0); a known cost
+   *  accrues toward the budget, lands in the ledger, and fires `onCost`. */
+  private recordRunCost(scenario: S, cell: Cell, ev: Evaluation): void {
+    if (!this.opts.costOf) return
+    const cost = this.opts.costOf(scenario, cell, ev)
+    if (cost === null) {
+      this.costUnknownRuns++
+      return
+    }
+    if (typeof cost.usd !== 'number' || !Number.isFinite(cost.usd) || cost.usd < 0) {
+      throw new RangeError(
+        `BehaviorExplorer: costOf returned an invalid usd (${String(cost?.usd)}) — ` +
+          'return null when cost is unknown, never a fabricated number',
+      )
+    }
+    this.spentKnownUsd += cost.usd
+    this.opts.ledger?.record({
+      model: cost.model ?? 'unattributed',
+      channel: 'agent',
+      usage: { inputTokens: 0, outputTokens: 0 },
+      actualCostUsd: cost.usd,
+      tags: { target: this.opts.target, cell: cell.id },
+    })
+    this.opts.onCost?.({ usd: cost.usd, channel: 'agent' })
+  }
+
   /** Elites whose INPUT cell matches — what the proposer mutates/deepens from. */
   private elitesFor(cellId: string): S[] {
     const out: S[] = []
@@ -134,14 +187,16 @@ export class BehaviorExplorer<S> {
   /** One allocate → propose → evaluate → gate → archive round. */
   async step(): Promise<{ runs: number; findings: Finding<S>[] }> {
     const remaining = this.opts.budget - this.runsUsed
-    if (remaining <= 0 || this.opts.signal?.aborted) return { runs: 0, findings: [] }
+    if (remaining <= 0 || this.costExhausted() || this.opts.signal?.aborted)
+      return { runs: 0, findings: [] }
 
     const allocations = this.allocate(Math.min(this.perRoundBudget, remaining))
     const newFindings: Finding<S>[] = []
     let runsThisStep = 0
 
     for (const alloc of allocations) {
-      if (this.runsUsed >= this.opts.budget || this.opts.signal?.aborted) break
+      if (this.runsUsed >= this.opts.budget || this.costExhausted() || this.opts.signal?.aborted)
+        break
       const cell = this.cellById.get(alloc.cellId)
       if (!cell) continue
       const cap = Math.min(alloc.count, this.opts.budget - this.runsUsed)
@@ -166,10 +221,16 @@ export class BehaviorExplorer<S> {
       await pMap(
         toEval,
         async (scenario) => {
-          if (this.runsUsed >= this.opts.budget || this.opts.signal?.aborted) return
+          if (
+            this.runsUsed >= this.opts.budget ||
+            this.costExhausted() ||
+            this.opts.signal?.aborted
+          )
+            return
           const ev = await this.opts.evaluate(scenario, cell)
           this.runsUsed++
           runsThisStep++
+          this.recordRunCost(scenario, cell, ev)
           const interest = this.objective.interest(ev, this.objectiveContext())
           this.log.push({ cell, ev, interest, scenarioId: this.opts.scenarioId(scenario) })
           this.opts.onProgress?.({ type: 'evaluated', cell, scenario, evaluation: ev })
@@ -215,9 +276,14 @@ export class BehaviorExplorer<S> {
     return { runs: runsThisStep, findings: newFindings }
   }
 
-  /** Loop `step()` until budget is spent, the signal aborts, or no progress is made. */
+  /** Loop `step()` until the run or dollar budget is spent, the signal aborts,
+   *  or no progress is made. */
   async run(): Promise<CapsuleData<S>> {
-    while (this.runsUsed < this.opts.budget && !this.opts.signal?.aborted) {
+    while (
+      this.runsUsed < this.opts.budget &&
+      !this.costExhausted() &&
+      !this.opts.signal?.aborted
+    ) {
       const { runs } = await this.step()
       if (runs === 0) break
     }
@@ -243,6 +309,9 @@ export class BehaviorExplorer<S> {
       findings: this._findings,
       candidateFindings: this.candidateFindings,
       runsUsed: this.runsUsed,
+      cost: this.opts.costOf
+        ? { costUsd: this.spentKnownUsd, costUnknownRuns: this.costUnknownRuns }
+        : undefined,
     })
   }
 }
diff --git a/src/fuzz/index.ts b/src/fuzz/index.ts
index 70effff..7ccca1d 100644
--- a/src/fuzz/index.ts
+++ b/src/fuzz/index.ts
@@ -38,6 +38,7 @@ export type {
   ObjectiveContext,
   ProposeContext,
   Proposer,
+  RunCost,
   SpaceAxis,
   ValidityGates,
 } from './types'
diff --git a/src/fuzz/types.ts b/src/fuzz/types.ts
index b66192d..2cb23c9 100644
--- a/src/fuzz/types.ts
+++ b/src/fuzz/types.ts
@@ -18,6 +18,7 @@
  * never a parallel score shape.
  */
 
+import type { CostChannel, CostLedger } from '../cost-ledger'
 import type { AdversarialMutation } from '../rl/adversarial'
 import type { DefaultVerdict } from '../verdict'
 
@@ -177,9 +178,27 @@ export interface CapsuleData<S> {
     candidateFindings: number
     verifiedFindings: number
     meanRobustness: number
+    /** Known dollars spent on this exploration's runs. Present only when cost
+     *  tracking was wired (`costOf`) — absent means "not tracked", never $0. */
+    costUsd?: number
+    /** Runs whose cost was unknown (`costOf` returned null) — counted apart,
+     *  never folded into `costUsd` as a fabricated $0. */
+    costUnknownRuns?: number
   }
 }
 
+// ── cost governance ───────────────────────────────────────────────────────────
+
+/**
+ * Known cost of one evaluated run. `model` attributes the spend in the ledger's
+ * per-model rollup; absent, the entry is labeled `unattributed` (the dollars are
+ * real either way — recorded as `actualCostUsd`, never an estimate).
+ */
+export interface RunCost {
+  usd: number
+  model?: string
+}
+
 // ── engine options + events ───────────────────────────────────────────────────
 
 export type ExploreEvent<S> =
@@ -225,6 +244,30 @@ export interface ExploreOptions<S> {
   onProgress?: (event: ExploreEvent<S>) => void
   /** Deterministic seed. Default 1. */
   seed?: number
+  /**
+   * Cost of one evaluated run — consumer-supplied; the explorer cannot know
+   * token usage. Return null when the cost is unknown: the run is COUNTED in
+   * `stats.costUnknownRuns`, never folded into the total as $0. Required by
+   * every other cost option (`costBudgetUsd` / `ledger` / `onCost`).
+   */
+  costOf?: (scenario: S, cell: Cell, ev: Evaluation) => RunCost | null
+  /**
+   * Hard dollar ceiling on accumulated KNOWN cost (same semantics as the
+   * control-runtime `budget.maxCostUsd`: nonnegative finite, the session stops
+   * once spent ≥ ceiling; no new evaluation starts after that). Unknown-cost
+   * runs do not consume budget — they are reported separately, so the ceiling
+   * is honest about what it can see.
+   */
+  costBudgetUsd?: number
+  /**
+   * Sink for per-run cost entries — each known `costOf` result is recorded
+   * with channel 'agent' and `actualCostUsd` (token axes are zero: the
+   * explorer only sees dollars). Pass the program's shared `CostLedger` so
+   * `costReport` stamps fuzz spend alongside judge/analyst spend.
+   */
+  ledger?: CostLedger
+  /** Observer fired for every known-cost run recorded. */
+  onCost?: (entry: { usd: number; channel: CostChannel }) => void
 }
 
 export type { AdversarialMutation, DefaultVerdict }
diff --git a/src/index.ts b/src/index.ts
index 1ee48b2..15e6a02 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1318,4 +1318,11 @@ export {
 // `AgentProfile` exported above from `./agent-profile`.
 export * as profile from './profile/index'
 
+// ── Cost governance — model seating chart + program cost report ─────────
+
+export type { CostReport, ModelCostRollup } from './cost-report'
+export { attachCostToReport, costReport } from './cost-report'
+export type { ModelSeats, SeatName, SeatPresetName } from './model-seats'
+export { resolveSeat, SeatUnsetError, seatPresets } from './model-seats'
+
 // Ax RLM trace analyst — subpath: /traces (re-exported alongside trace store).
diff --git a/src/model-seats.test.ts b/src/model-seats.test.ts
new file mode 100644
index 0000000..8ac9123
--- /dev/null
+++ b/src/model-seats.test.ts
@@ -0,0 +1,105 @@
+import { describe, expect, it } from 'vitest'
+import { ConfigError, ValidationError } from './errors'
+import { assertCrossFamily } from './judge-families'
+import { isModelPriced } from './metrics'
+import { type ModelSeats, resolveSeat, SeatUnsetError, seatPresets } from './model-seats'
+
+const seats: ModelSeats = {
+  worker: 'kimi-k2.6',
+  judges: ['kimi-k2.6', 'deepseek-v4-pro'],
+}
+
+describe('resolveSeat', () => {
+  it('returns a set single-model seat', () => {
+    expect(resolveSeat(seats, 'worker')).toBe('kimi-k2.6')
+  })
+
+  it('returns a copy of the judges list — mutating it never edits the chart', () => {
+    const judges = resolveSeat(seats, 'judges')
+    expect(judges).toEqual(['kimi-k2.6', 'deepseek-v4-pro'])
+    judges.push('extra')
+    expect(seats.judges).toEqual(['kimi-k2.6', 'deepseek-v4-pro'])
+  })
+
+  it('throws SeatUnsetError (code config, names the seat) when unset with no fallback', () => {
+    expect(() => resolveSeat(seats, 'analyst')).toThrow(SeatUnsetError)
+    try {
+      resolveSeat(seats, 'analyst')
+      expect.unreachable('resolveSeat must throw')
+    } catch (err) {
+      expect(err).toBeInstanceOf(SeatUnsetError)
+      expect(err).toBeInstanceOf(ConfigError)
+      expect((err as SeatUnsetError).seat).toBe('analyst')
+      expect((err as SeatUnsetError).code).toBe('config')
+      expect((err as SeatUnsetError).message).toContain("'analyst'")
+    }
+  })
+
+  it('returns the explicit fallback when the seat is unset', () => {
+    expect(resolveSeat(seats, 'reflection', 'gpt-4.1-mini')).toBe('gpt-4.1-mini')
+  })
+
+  it('wraps a fallback for the judges seat into a one-model panel', () => {
+    expect(resolveSeat({}, 'judges', 'deepseek-v4-pro')).toEqual(['deepseek-v4-pro'])
+  })
+
+  it('treats a blank string and an empty judges array as unset', () => {
+    expect(() => resolveSeat({ worker: '  ' }, 'worker')).toThrow(SeatUnsetError)
+    expect(() => resolveSeat({ judges: [] }, 'judges')).toThrow(SeatUnsetError)
+    expect(resolveSeat({ worker: '' }, 'worker', 'kimi-k2.6')).toBe('kimi-k2.6')
+  })
+
+  it('fails loud on malformed seats — blank judge entry, wrong runtime types', () => {
+    expect(() => resolveSeat({ judges: ['kimi-k2.6', ' '] }, 'judges')).toThrow(ValidationError)
+    expect(() => resolveSeat({ judges: 'kimi-k2.6' as unknown as string[] }, 'judges')).toThrow(
+      ValidationError,
+    )
+    expect(() => resolveSeat({ worker: ['kimi-k2.6'] as unknown as string }, 'worker')).toThrow(
+      ValidationError,
+    )
+  })
+
+  it('rejects a blank fallback — it cannot stand in for a model id', () => {
+    expect(() => resolveSeat({}, 'worker', '')).toThrow(ValidationError)
+  })
+})
+
+describe('seatPresets', () => {
+  it('economy fills every seat with the fleet-policy ids', () => {
+    const economy = seatPresets.economy
+    expect(economy.worker).toBe('kimi-k2.6')
+    expect(economy.judges).toEqual(['kimi-k2.6', 'deepseek-v4-pro', 'gpt-4.1-mini'])
+    expect(economy.analyst).toBe('gpt-4.1-mini')
+    expect(economy.reflection).toBe('gpt-4.1-mini')
+    expect(economy.verifier).toBe('deepseek-v4-pro')
+  })
+
+  it('economy judges pass assertCrossFamily as-is', () => {
+    const families = assertCrossFamily(resolveSeat(seatPresets.economy, 'judges'))
+    expect(families.length).toBeGreaterThanOrEqual(3)
+  })
+
+  it('every economy id is priced — the preset never produces a costUnknown axis', () => {
+    const economy = seatPresets.economy
+    const ids = [
+      economy.worker,
+      economy.analyst,
+      economy.reflection,
+      economy.verifier,
+      ...(economy.judges ?? []),
+    ]
+    for (const id of ids) {
+      expect(id).toBeDefined()
+      expect(isModelPriced(id as string)).toBe(true)
+    }
+  })
+
+  it('frontier is deliberately empty — every seat fails loud until the caller supplies entitled ids', () => {
+    const seatNames = ['worker', 'judges', 'analyst', 'reflection', 'verifier'] as const
+    for (const seat of seatNames) {
+      expect(() => resolveSeat(seatPresets.frontier, seat)).toThrow(SeatUnsetError)
+    }
+    const filled = { ...seatPresets.frontier, worker: 'my-frontier-id' }
+    expect(resolveSeat(filled, 'worker')).toBe('my-frontier-id')
+  })
+})
diff --git a/src/model-seats.ts b/src/model-seats.ts
new file mode 100644
index 0000000..6dab223
--- /dev/null
+++ b/src/model-seats.ts
@@ -0,0 +1,128 @@
+/**
+ * ModelSeats — the program's model seating chart.
+ *
+ * One object names which model fills each role in an eval program: the worker
+ * under evaluation, the judge panel, the analyst, the reflection/driver model,
+ * and the verifier. Re-tiering an entire program (economy ↔ frontier) is one
+ * swapped object instead of a hunt through call sites.
+ *
+ * Wiring points — consumers thread seats; this module implements none of them
+ * (those files belong to other surfaces):
+ *  - `judges`     → `ensembleJudge({ models: seats.judges, … })` (src/judge-panel.ts)
+ *                   and the `JudgeConfig`s handed to `makeEvalTools({ judges })`
+ *                   (src/eval-tools.ts).
+ *  - `reflection` → `selfImprove({ llm: { model: seats.reflection } })` — the
+ *                   `gepaDriver` reflection model (src/contract/self-improve.ts);
+ *                   same seat for any custom `ImprovementDriver`'s LLM.
+ *  - `worker`     → the dispatch model the agent itself calls — the model an
+ *                   `AgentProfile` declares.
+ *  - `analyst`    → the LLM behind `analyzeRuns` / analyst-registry kinds.
+ *  - `verifier`   → completion-verifier / objective-checker model.
+ *  - campaign cells thread `judges` + driver models the same way; that wiring
+ *    lands with the campaign surface, not here.
+ *
+ * `resolveSeat` is the only read path: an unset seat with no explicit fallback
+ * throws — a model id is a budget decision, never a silent default.
+ */
+
+import { ConfigError, ValidationError } from './errors'
+
+export interface ModelSeats {
+  /** The model under evaluation — what the agent itself dispatches with. */
+  worker?: string
+  /** Judge-panel model ids — thread into `ensembleJudge({ models })`. */
+  judges?: string[]
+  /** Analyst model — `analyzeRuns` / analyst-registry LLM calls. */
+  analyst?: string
+  /** Reflection/driver model — `gepaDriver` mutation proposals. */
+  reflection?: string
+  /** Verifier model — completion/objective checking. */
+  verifier?: string
+}
+
+export type SeatName = keyof ModelSeats
+
+export type SeatPresetName = keyof typeof seatPresets
+
+/**
+ * Tier presets — plain data, swap or spread freely.
+ *
+ * `economy` uses the fleet-policy ids: every id resolves through the
+ * substrate's family pricing (no costUnknown axis) and the judge trio spans
+ * three provider families (moonshot / deepseek / openai), so it passes
+ * `assertCrossFamily` as-is.
+ *
+ * `frontier` is deliberately EMPTY: entitled frontier ids vary per router
+ * account, and a hardcoded claude/gpt-5 id 401s on keys that lack it. Supply
+ * your own: `{ ...seatPresets.frontier, worker: '<your-frontier-id>', … }` —
+ * `resolveSeat` throws on every seat you haven't filled.
+ */
+export const seatPresets: Record<'economy' | 'frontier', ModelSeats> = {
+  economy: {
+    worker: 'kimi-k2.6',
+    judges: ['kimi-k2.6', 'deepseek-v4-pro', 'gpt-4.1-mini'],
+    analyst: 'gpt-4.1-mini',
+    reflection: 'gpt-4.1-mini',
+    verifier: 'deepseek-v4-pro',
+  },
+  frontier: {},
+}
+
+/** Thrown by `resolveSeat` when a seat is unset and no fallback was given. */
+export class SeatUnsetError extends ConfigError {
+  constructor(public readonly seat: SeatName) {
+    super(
+      `ModelSeats: seat '${seat}' is unset and no fallback was given — ` +
+        'name a model explicitly (a model id is a budget decision, never a silent default)',
+    )
+  }
+}
+
+/**
+ * Read one seat. Blank strings and empty arrays count as unset (env-var
+ * plumbing produces them); malformed values (non-string seat, non-array or
+ * blank-entry `judges`) throw `ValidationError`. When the seat is unset, an
+ * explicit `fallback` is returned (`[fallback]` for `judges` — a one-model
+ * panel); without one, `SeatUnsetError`.
+ */
+export function resolveSeat(seats: ModelSeats, seat: 'judges', fallback?: string): string[]
+export function resolveSeat(
+  seats: ModelSeats,
+  seat: Exclude<SeatName, 'judges'>,
+  fallback?: string,
+): string
+export function resolveSeat(seats: ModelSeats, seat: SeatName, fallback?: string): string | string[]
+export function resolveSeat(
+  seats: ModelSeats,
+  seat: SeatName,
+  fallback?: string,
+): string | string[] {
+  const value = seats[seat]
+  if (seat === 'judges') {
+    if (value !== undefined && !Array.isArray(value)) {
+      throw new ValidationError(`ModelSeats: seat 'judges' must be a string[], got ${typeof value}`)
+    }
+    const models = Array.isArray(value) ? value : []
+    if (models.length > 0) {
+      const blank = models.findIndex((m) => typeof m !== 'string' || m.trim() === '')
+      if (blank >= 0) {
+        throw new ValidationError(
+          `ModelSeats: judges[${blank}] is blank — every panel model must be a non-empty id`,
+        )
+      }
+      return [...models]
+    }
+  } else {
+    if (value !== undefined && typeof value !== 'string') {
+      throw new ValidationError(`ModelSeats: seat '${seat}' must be a string, got ${typeof value}`)
+    }
+    if (typeof value === 'string' && value.trim() !== '') return value
+  }
+  if (fallback !== undefined) {
+    if (fallback.trim() === '') {
+      throw new ValidationError(`ModelSeats: fallback for seat '${seat}' is blank`)
+    }
+    return seat === 'judges' ? [fallback] : fallback
+  }
+  throw new SeatUnsetError(seat)
+}