From c4bb3bb89782a07db48ce1d4194518817ffc32c4 Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Thu, 11 Jun 2026 08:34:30 -0600 Subject: [PATCH] =?UTF-8?q?fix(loops):=20normalize=20advisory=20deliverabl?= =?UTF-8?q?e=20fields=20=E2=80=94=20an=20authored=20omission=20cannot=20po?= =?UTF-8?q?ison=20the=20next=20generation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The deeper cost run crashed at gen2 authoring: an authored body returned a StrategyResult without progression (advisory, unvalidated since #217 made score/resolved harness-owned), the undefined rode through runBenchmark into the losses table, and compactLosses threw on .map — killing the run a generation AFTER the offending candidate ran. defineStrategy now normalizes progression/completions/shots on the deliverable (the source fix); compactLosses tolerates absence anyway (depth). Test: a body returning only {score, resolved} yields a well-formed cell. --- src/runtime/strategy-evolution.ts | 2 +- src/runtime/strategy.ts | 14 ++++++++++++-- tests/loops/strategy-suite.test.ts | 23 +++++++++++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/runtime/strategy-evolution.ts b/src/runtime/strategy-evolution.ts index 5c74a62..bc1f045 100644 --- a/src/runtime/strategy-evolution.ts +++ b/src/runtime/strategy-evolution.ts @@ -329,7 +329,7 @@ const compactLosses = (report: BenchmarkReport, detail: 'exact' | 'binary'): str score: r2(c.score), resolved: c.resolved, usd: Math.round(c.usd * 10000) / 10000, - progression: c.progression.map(r2), + progression: (c.progression ?? []).map(r2), }, ]), ), diff --git a/src/runtime/strategy.ts b/src/runtime/strategy.ts index 026fa59..efdf27d 100644 --- a/src/runtime/strategy.ts +++ b/src/runtime/strategy.ts @@ -842,10 +842,20 @@ export function defineStrategy( } const r = await run(ctx) // Override the body's self-reported score/resolved with the harness-verified - // values. The body's progression/completions/shots are advisory (display only). + // values. The body's progression/completions/shots are advisory (display only) — + // but NORMALIZED: an authored body that omits them must not poison downstream + // consumers (losses tables, anytime curves) with undefined. return { kind: 'done', - deliverable: { mode: name, ...r, score: verifiedBest, resolved: verifiedResolved }, + deliverable: { + mode: name, + ...r, + progression: Array.isArray(r.progression) ? r.progression : [], + completions: typeof r.completions === 'number' ? r.completions : 0, + shots: typeof r.shots === 'number' ? r.shots : 0, + score: verifiedBest, + resolved: verifiedResolved, + }, } }, }), diff --git a/tests/loops/strategy-suite.test.ts b/tests/loops/strategy-suite.test.ts index df24060..110b184 100644 --- a/tests/loops/strategy-suite.test.ts +++ b/tests/loops/strategy-suite.test.ts @@ -660,3 +660,26 @@ describe('consult', () => { expect(reply).toBe('DONE') }) }) + +describe('advisory-field normalization', () => { + it('a body omitting progression/completions/shots yields a well-formed cell', async () => { + stubRouter() + const surface = fixtureSurface(() => ({ passes: 1, total: 2 })) + const sloppy = defineStrategy('sloppy', async ({ shot }) => { + await shot() + // An authored body returning only the verified-overridden fields. + return { score: 0, resolved: false } as never + }) + const report = await runBenchmark({ + environment: surface, + tasks: [task], + worker, + strategies: [sloppy], + budget: 1, + concurrency: 1, + }) + const cell = report.perTask[0]?.cells?.sloppy + expect(cell?.progression).toEqual([]) + expect(cell?.score).toBeCloseTo(0.5) + }) +})