From 25c00a43fd7135e93df909580c14850c189d7442 Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Wed, 10 Jun 2026 18:20:38 -0600 Subject: [PATCH] =?UTF-8?q?fix(loops):=20funnel=20alignment=20=E2=80=94=20?= =?UTF-8?q?the=20search=20tie-band=20matches=20the=20gate=20under=20the=20?= =?UTF-8?q?cost=20objective?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cost run's HOLD was a funnel misalignment, not an author failure: the gate accepts score within −scoreTolerance (5pp) + significant savings, but search-side champion displacement required a 1pp tie — so the author's researchThenExecute (−3.7pp at 43% cheaper on the gen1 discriminating set, exactly the candidate the gate was built to judge) died in search and the gate saw identical-champion. The principle: the search filter must be no stricter than the promotion criterion. Under objective='cost', championEpsilon now defaults to scoreTolerance; CHAMPION_EPSILON env on the bench runner for explicit control. --- bench/src/flywheel-evolve.mts | 1 + src/runtime/strategy-evolution.ts | 7 ++++++- tests/loops/strategy-evolution.test.ts | 24 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/bench/src/flywheel-evolve.mts b/bench/src/flywheel-evolve.mts index 75c2c70..5e74dd4 100644 --- a/bench/src/flywheel-evolve.mts +++ b/bench/src/flywheel-evolve.mts @@ -90,6 +90,7 @@ async function main(): Promise { ...(process.env.OBJECTIVE === 'cost' ? { objective: 'cost' as const, scoreTolerance: Number(process.env.SCORE_TOLERANCE ?? 0.05) } : {}), + ...(process.env.CHAMPION_EPSILON ? { championEpsilon: Number(process.env.CHAMPION_EPSILON) } : {}), ...(process.env.LOSSES_DETAIL === 'binary' ? { lossesDetail: 'binary' as const } : {}), ...(process.env.REPRO ? { reproducerCheck: {} } : {}), // Endurance: phase ledger on disk (resume skips completed phases) + the gym recycled diff --git a/src/runtime/strategy-evolution.ts b/src/runtime/strategy-evolution.ts index fb7d917..5c74a62 100644 --- a/src/runtime/strategy-evolution.ts +++ b/src/runtime/strategy-evolution.ts @@ -368,7 +368,12 @@ export async function runStrategyEvolution(cfg: StrategyEvolutionConfig): Promis const populationSize = cfg.populationSize ?? 2 const baselines = cfg.baselines ?? [sample, refine, sampleThenRefine] const policy = cfg.champion ?? 'costAware' - const epsilon = cfg.championEpsilon ?? 0.01 + // FUNNEL ALIGNMENT: the search-side tie band must be no stricter than the promotion + // criterion, or the gate never sees the candidates it was designed to judge. Under the + // cost objective the gate accepts score within −scoreTolerance; a candidate that the + // gate would promote must therefore be able to DISPLACE in search at that same band. + const epsilon = + cfg.championEpsilon ?? (cfg.objective === 'cost' ? (cfg.scoreTolerance ?? 0.05) : 0.01) const byName = new Map(baselines.map((s) => [s.name, s])) const codeByName = new Map() diff --git a/tests/loops/strategy-evolution.test.ts b/tests/loops/strategy-evolution.test.ts index 4e1dc6f..806c4d3 100644 --- a/tests/loops/strategy-evolution.test.ts +++ b/tests/loops/strategy-evolution.test.ts @@ -628,3 +628,27 @@ describe('checkpoint and resume', () => { expect(phases).toEqual(['gen0', 'gen1', 'holdout']) }) }) + +describe('funnel alignment under the cost objective', () => { + it('the search tie-band defaults to the gate tolerance: a near-tie cheaper candidate displaces', () => { + const r = { + n: 1, + excluded: 0, + perStrategy: { + incumbent: { score: 0.733, resolved: 0, usd: 0.0249, ms: 0 }, + cheaper: { score: 0.696, resolved: 0, usd: 0.0141, ms: 0 }, + }, + perTask: [], + pareto: [], + } + // ε=1pp (the score-objective default): the incumbent holds. + expect(pickChampion(r.perStrategy, ['incumbent', 'cheaper'], 'costAware', 0.01).name).toBe( + 'incumbent', + ) + // ε=5pp (the cost-objective default = the gate's scoreTolerance): the cheaper + // within-band candidate displaces and REACHES the gate. + expect(pickChampion(r.perStrategy, ['incumbent', 'cheaper'], 'costAware', 0.05).name).toBe( + 'cheaper', + ) + }) +})