tangle-network · drewstone · Jun 10, 2026 · Jun 10, 2026
diff --git a/src/cost-report.test.ts b/src/cost-report.test.ts
@@ -0,0 +1,97 @@
+import { describe, expect, it } from 'vitest'
+import { CostLedger } from './cost-ledger'
+import { attachCostToReport, costReport } from './cost-report'
+import { ValidationError } from './errors'
+
+function buildLedger(): CostLedger {
+  const ledger = new CostLedger()
+  // gpt-4o: 0.0025 in + 0.01 out per 1k
+  ledger.record({
+    model: 'gpt-4o',
+    channel: 'agent',
+    usage: { inputTokens: 1000, outputTokens: 1000 },
+  })
+  ledger.record({
+    model: 'gpt-4o',
+    channel: 'judge',
+    usage: { inputTokens: 2000, outputTokens: 0 },
+  })
+  // Unpriced model — costUnknown, the $0 is a lower bound, not a measured zero.
+  ledger.record({
+    model: 'made-up-zzz',
+    channel: 'judge',
+    usage: { inputTokens: 1000, outputTokens: 1000 },
+  })
+  return ledger
+}
+
+describe('costReport', () => {
+  it('projects per-channel, total, and per-model rollups from the ledger', () => {
+    const report = costReport(buildLedger())
+
+    expect(report.perChannel.map((c) => c.channel)).toEqual(['agent', 'judge'])
+    const judge = report.perChannel.find((c) => c.channel === 'judge')
+    expect(judge?.calls).toBe(2)
+    expect(judge?.unpricedCalls).toBe(1)
+
+    expect(report.total.usd).toBeCloseTo(0.0125 + 0.005, 6)
+    expect(report.total.unknownEntries).toBe(1)
+
+    expect(report.perModel.map((m) => m.model)).toEqual(['gpt-4o', 'made-up-zzz'])
+    expect(report.perModel[0]).toEqual({
+      model: 'gpt-4o',
+      usd: 0.0175,
+      entries: 2,
+      unpriced: false,
+    })
+  })
+
+  it('flags an unpriced model unpriced:true — its $0 is never a measured zero', () => {
+    const report = costReport(buildLedger())
+    const unpriced = report.perModel.find((m) => m.model === 'made-up-zzz')
+    expect(unpriced).toEqual({ model: 'made-up-zzz', usd: 0, entries: 1, unpriced: true })
+  })
+
+  it('an actualCostUsd override clears unpriced — observed dollars are real', () => {
+    const ledger = new CostLedger()
+    ledger.record({
+      model: 'made-up-zzz',
+      channel: 'agent',
+      usage: { inputTokens: 100, outputTokens: 100 },
+      actualCostUsd: 0.42,
+    })
+    const report = costReport(ledger)
+    expect(report.perModel[0]).toEqual({
+      model: 'made-up-zzz',
+      usd: 0.42,
+      entries: 1,
+      unpriced: false,
+    })
+    expect(report.total.unknownEntries).toBe(0)
+  })
+
+  it('an empty ledger projects to zeros, never throws', () => {
+    const report = costReport(new CostLedger())
+    expect(report).toEqual({
+      perChannel: [],
+      total: { usd: 0, unknownEntries: 0 },
+      perModel: [],
+    })
+  })
+})
+
+describe('attachCostToReport', () => {
+  it('stamps the projection under cost and preserves the report fields', () => {
+    const stamped = attachCostToReport({ verdict: 'ship', lift: 0.04 }, buildLedger())
+    expect(stamped.verdict).toBe('ship')
+    expect(stamped.lift).toBe(0.04)
+    expect(stamped.cost.total.unknownEntries).toBe(1)
+    expect(stamped.cost.perModel).toHaveLength(2)
+  })
+
+  it('refuses to overwrite an existing cost stamp', () => {
+    expect(() => attachCostToReport({ cost: 'already-stamped' }, new CostLedger())).toThrow(
+      ValidationError,
+    )
+  })
+})
diff --git a/src/cost-report.ts b/src/cost-report.ts
@@ -0,0 +1,79 @@
+/**
+ * Program cost report — a thin projection over `CostLedger.summary()` that
+ * adds the per-model rollup the summary lacks, plus `attachCostToReport`, the
+ * one way every artifact (capsules, campaign results, diagnose reports) gets
+ * its cost stamp.
+ *
+ * Honesty contract carried through from the ledger: `total.unknownEntries`
+ * and `perModel[].unpriced` surface the costUnknown axis — a $0 from an
+ * unpriced model is a lower bound, never a measured zero.
+ */
+
+import type { ChannelRollup, CostLedger } from './cost-ledger'
+import { ValidationError } from './errors'
+
+export interface ModelCostRollup {
+  model: string
+  usd: number
+  entries: number
+  /** ≥1 entry for this model was costUnknown — `usd` is a lower bound. An
+   *  `actualCostUsd` override clears the flag for that entry (the dollars are
+   *  observed, even when the model has no pricing). */
+  unpriced: boolean
+}
+
+export interface CostReport {
+  /** Per-channel breakdown — `CostLedgerSummary.byChannel` verbatim. */
+  perChannel: ChannelRollup[]
+  total: {
+    usd: number
+    /** Entries whose cost was unknown — non-zero means `usd` is a lower bound. */
+    unknownEntries: number
+  }
+  /** Per-model spend, sorted by model id. */
+  perModel: ModelCostRollup[]
+}
+
+/** Project a ledger into the program cost report. Pure — no I/O, no clock. */
+export function costReport(ledger: CostLedger): CostReport {
+  const summary = ledger.summary()
+  const perModel = new Map<string, ModelCostRollup>()
+  for (const entry of ledger.list()) {
+    const roll = perModel.get(entry.model) ?? {
+      model: entry.model,
+      usd: 0,
+      entries: 0,
+      unpriced: false,
+    }
+    roll.usd += entry.costUsd
+    roll.entries += 1
+    if (entry.costUnknown) roll.unpriced = true
+    perModel.set(entry.model, roll)
+  }
+  return {
+    perChannel: summary.byChannel,
+    total: {
+      usd: summary.totalCostUsd,
+      unknownEntries: summary.byChannel.reduce((sum, c) => sum + c.unpricedCalls, 0),
+    },
+    perModel: [...perModel.values()].sort((a, b) => a.model.localeCompare(b.model)),
+  }
+}
+
+/**
+ * Stamp a report-shaped object with its cost projection under the `cost` key.
+ * Generic so capsules, campaign results, and diagnose reports all stamp the
+ * same way. Throws when the report already carries a `cost` key — silently
+ * overwriting an existing stamp would corrupt the artifact's provenance.
+ */
+export function attachCostToReport<R extends object>(
+  report: R,
+  ledger: CostLedger,
+): R & { cost: CostReport } {
+  if ('cost' in report) {
+    throw new ValidationError(
+      "attachCostToReport: report already has a 'cost' key — refusing to overwrite an existing stamp",
+    )
+  }
+  return { ...report, cost: costReport(ledger) }
+}
diff --git a/src/fuzz/capsule.ts b/src/fuzz/capsule.ts
@@ -23,6 +23,9 @@ export interface BuildCapsuleInput<S> {
   findings: Finding<S>[]
   candidateFindings: number
   runsUsed: number
+  /** Known-dollar / unknown-run split — present only when cost tracking was
+   *  wired; the capsule never fabricates a $0 total. */
+  cost?: { costUsd: number; costUnknownRuns: number }
 }
 
 export function buildCapsule<S>(input: BuildCapsuleInput<S>): CapsuleData<S> {
@@ -47,6 +50,9 @@ export function buildCapsule<S>(input: BuildCapsuleInput<S>): CapsuleData<S> {
       candidateFindings: input.candidateFindings,
       verifiedFindings: input.findings.length,
       meanRobustness,
+      ...(input.cost
+        ? { costUsd: input.cost.costUsd, costUnknownRuns: input.cost.costUnknownRuns }
+        : {}),
     },
   }
 }
@@ -172,6 +178,16 @@ export function renderCapsuleHtml<S>(
         '#5ad17a',
       )
     : ''
+  // Cost KPI only when tracking was wired — an untracked run never shows $0.
+  // Unpriced runs are named in the label (amber): the total is a lower bound.
+  const cost =
+    s.costUsd !== undefined
+      ? kpi(
+          s.costUnknownRuns ? `cost · ${s.costUnknownRuns} runs unpriced` : 'cost',
+          `$${s.costUsd.toFixed(2)}`,
+          s.costUnknownRuns ? '#e5c07b' : '#e6e6e6',
+        )
+      : ''
   const stamp = opts.generatedAt ?? capsule.generatedAt ?? ''
 
   return `<!doctype html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
@@ -218,6 +234,7 @@ ${kpi('mean robustness', pct(s.meanRobustness), s.meanRobustness < 0.6 ? '#e58a9
 ${kpi('verified findings', String(s.verifiedFindings), s.verifiedFindings > 0 ? '#e58a96' : '#5ad17a')}
 ${kpi('cells covered', `${s.cellsCovered}/${s.cellsTotal}`)}
 ${kpi('scenarios run', String(s.totalRuns))}
+${cost}
 ${lift}
 </div>
 <h2>Coverage map</h2>

diff --git a/src/fuzz/explorer-cost.test.ts b/src/fuzz/explorer-cost.test.ts
@@ -0,0 +1,155 @@
+import { describe, expect, it } from 'vitest'
+import { CostLedger } from '../cost-ledger'
+import { costReport } from '../cost-report'
+import { ValidationError } from '../errors'
+import { renderCapsuleHtml } from './capsule'
+import { BehaviorExplorer } from './explorer'
+import type { BehaviorSpace, ExploreOptions } from './types'
+
+const space: BehaviorSpace = { axes: [{ name: 'difficulty', values: ['easy'] }] }
+
+// Single-cell space + uniform allocation + concurrency 1 → the evaluation
+// (and costOf) order is fully deterministic.
+function makeOpts(overrides: Partial<ExploreOptions<string>>): ExploreOptions<string> {
+  let n = 0
+  return {
+    target: 'cost-target',
+    space,
+    proposer: (ctx) => Array.from({ length: ctx.count }, () => `p-${n++}`),
+    evaluate: async () => ({ valid: true, score: 0.9 }),
+    seedsFor: () => ['seed-0'],
+    scenarioId: (s) => s,
+    allocation: 'uniform',
+    budget: 50,
+    seed: 3,
+    ...overrides,
+  }
+}
+
+describe('BehaviorExplorer cost budget', () => {
+  it('stops the loop when accumulated known cost reaches costBudgetUsd', async () => {
+    const explorer = new BehaviorExplorer(
+      makeOpts({ costOf: () => ({ usd: 1 }), costBudgetUsd: 3 }),
+    )
+    const capsule = await explorer.run()
+    expect(capsule.stats.totalRuns).toBe(3)
+    expect(capsule.stats.costUsd).toBe(3)
+    expect(capsule.stats.costUnknownRuns).toBe(0)
+  })
+
+  it('a zero budget stops before any evaluation — same >= semantics as control-runtime', async () => {
+    const explorer = new BehaviorExplorer(
+      makeOpts({ costOf: () => ({ usd: 1 }), costBudgetUsd: 0 }),
+    )
+    const capsule = await explorer.run()
+    expect(capsule.stats.totalRuns).toBe(0)
+    expect(capsule.stats.costUsd).toBe(0)
+  })
+
+  it('counts unknown-cost runs separately — never as $0, never against the budget', async () => {
+    let call = 0
+    const onCost: Array<{ usd: number; channel: string }> = []
+    const explorer = new BehaviorExplorer(
+      makeOpts({
+        budget: 4,
+        // Runs 1 and 3 cost $1; runs 2 and 4 have unknown cost.
+        costOf: () => (call++ % 2 === 0 ? { usd: 1 } : null),
+        costBudgetUsd: 10,
+        onCost: (e) => onCost.push(e),
+      }),
+    )
+    const capsule = await explorer.run()
+    expect(capsule.stats.totalRuns).toBe(4)
+    expect(capsule.stats.costUsd).toBe(2)
+    expect(capsule.stats.costUnknownRuns).toBe(2)
+    expect(onCost).toEqual([
+      { usd: 1, channel: 'agent' },
+      { usd: 1, channel: 'agent' },
+    ])
+  })
+
+  it('records known costs into the supplied ledger with channel agent + actualCostUsd', async () => {
+    const ledger = new CostLedger()
+    const explorer = new BehaviorExplorer(
+      makeOpts({ budget: 2, costOf: () => ({ usd: 0.5, model: 'gpt-4o' }), ledger }),
+    )
+    await explorer.run()
+    const entries = ledger.list()
+    expect(entries).toHaveLength(2)
+    for (const entry of entries) {
+      expect(entry.channel).toBe('agent')
+      expect(entry.actualCostUsd).toBe(0.5)
+      expect(entry.costUnknown).toBe(false)
+      expect(entry.model).toBe('gpt-4o')
+      expect(entry.tags?.target).toBe('cost-target')
+    }
+    const report = costReport(ledger)
+    expect(report.perModel).toEqual([{ model: 'gpt-4o', usd: 1, entries: 2, unpriced: false }])
+  })
+
+  it('labels ledger entries unattributed when costOf names no model', async () => {
+    const ledger = new CostLedger()
+    const explorer = new BehaviorExplorer(
+      makeOpts({ budget: 1, costOf: () => ({ usd: 0.25 }), ledger }),
+    )
+    await explorer.run()
+    expect(ledger.list().map((e) => e.model)).toEqual(['unattributed'])
+  })
+
+  it('rejects negative, NaN, and infinite budgets loudly', () => {
+    const costOf = () => ({ usd: 1 })
+    for (const bad of [-1, Number.NaN, Number.POSITIVE_INFINITY]) {
+      expect(() => new BehaviorExplorer(makeOpts({ costOf, costBudgetUsd: bad }))).toThrow(
+        /costBudgetUsd must be a nonnegative finite number/,
+      )
+    }
+  })
+
+  it('rejects cost options without costOf — the explorer cannot know run cost', () => {
+    expect(() => new BehaviorExplorer(makeOpts({ costBudgetUsd: 3 }))).toThrow(ValidationError)
+    expect(() => new BehaviorExplorer(makeOpts({ ledger: new CostLedger() }))).toThrow(
+      ValidationError,
+    )
+    expect(() => new BehaviorExplorer(makeOpts({ onCost: () => {} }))).toThrow(ValidationError)
+  })
+
+  it('rejects a fabricated costOf number loudly — null is the only unknown', async () => {
+    const explorer = new BehaviorExplorer(
+      makeOpts({ budget: 1, costOf: () => ({ usd: Number.NaN }) }),
+    )
+    await expect(explorer.run()).rejects.toThrow(/costOf returned an invalid usd/)
+  })
+
+  it('omits cost stats entirely when cost tracking is not wired — absent, never $0', async () => {
+    const capsule = await new BehaviorExplorer(makeOpts({ budget: 2 })).run()
+    expect(capsule.stats.costUsd).toBeUndefined()
+    expect(capsule.stats.costUnknownRuns).toBeUndefined()
+  })
+})
+
+describe('renderCapsuleHtml cost KPI', () => {
+  it('shows the known-dollar KPI when cost tracking was wired', async () => {
+    const explorer = new BehaviorExplorer(
+      makeOpts({ costOf: () => ({ usd: 1 }), costBudgetUsd: 3 }),
+    )
+    const html = renderCapsuleHtml(await explorer.run())
+    expect(html).toContain('$3.00')
+    expect(html).not.toContain('runs unpriced')
+  })
+
+  it('names the unpriced-run count next to the total — the dollar figure is a lower bound', async () => {
+    let call = 0
+    const explorer = new BehaviorExplorer(
+      makeOpts({ budget: 4, costOf: () => (call++ % 2 === 0 ? { usd: 1 } : null) }),
+    )
+    const html = renderCapsuleHtml(await explorer.run())
+    expect(html).toContain('$2.00')
+    expect(html).toContain('2 runs unpriced')
+  })
+
+  it('shows no dollar KPI at all when cost was not tracked', async () => {
+    const html = renderCapsuleHtml(await new BehaviorExplorer(makeOpts({ budget: 2 })).run())
+    expect(/\$\d/.test(html)).toBe(false)
+    expect(html).not.toContain('runs unpriced')
+  })
+})