diff --git a/packages/optimization/src/ldai_optimizer/client.py b/packages/optimization/src/ldai_optimizer/client.py
index 7c34231d..7a901660 100644
--- a/packages/optimization/src/ldai_optimizer/client.py
+++ b/packages/optimization/src/ldai_optimizer/client.py
@@ -20,6 +20,7 @@
 import logging
 import os
 import random
+import re
 import time
 import uuid
 from typing import Any, Dict, List, Literal, Optional, Tuple, Union
@@ -49,8 +50,6 @@
     LDApiClient,
 )
 from ldai_optimizer.prompts import (
-    _acceptance_criteria_implies_cost_optimization,
-    _acceptance_criteria_implies_duration_optimization,
     build_message_history_text,
     build_new_variation_prompt,
     build_reasoning_history,
@@ -71,6 +70,15 @@
 logger.addFilter(RedactionFilter())
 
 
+def _interpolate(template: str, variables: Dict[str, Any]) -> str:
+    """Replace {{key}} tokens with values from variables; unresolved tokens become empty string."""
+    return re.sub(
+        r"\{\{(\w+)\}\}",
+        lambda m: str(variables.get(m.group(1), "")),
+        template,
+    )
+
+
 def _find_model_config(
     model_name: str, configs: List[Dict[str, Any]]
 ) -> Optional[Dict[str, Any]]:
@@ -404,18 +412,65 @@ def _judge_config(
         variables: Dict[str, Any],
     ) -> AIJudgeConfig:
         """
-        Fetch a judge configuration from the LaunchDarkly client.
+        Fetch a judge configuration by evaluating the flag variation directly.
 
-        Thin wrapper around LDAIClient.judge_config so callers do not need a
-        direct reference to the client.
+        Bypasses LDAIClient.judge_config to avoid the reserved-variable warnings
+        for 'message_history' and 'response_to_evaluate'. Those variables are
+        interpolated here with their actual values instead of being neutralised
+        by the SDK. If the template contains only a system message, a user turn
+        is synthesised from the provided message_history and response_to_evaluate
+        so that _evaluate_config_judge always receives a complete conversation.
 
         :param judge_key: The key for the judge configuration in LaunchDarkly
         :param context: The evaluation context
-        :param default: Fallback config when the flag is disabled or unreachable
-        :param variables: Template variables for instruction interpolation
+        :param default: Unused; kept for signature compatibility
+        :param variables: Template variables including message_history and response_to_evaluate
         :return: The resolved AIJudgeConfig
         """
-        return self._ldClient.judge_config(judge_key, context, default, variables)
+        variation: Dict[str, Any] = self._ldClient._client.variation(judge_key, context, {})
+        enabled: bool = bool(variation.get("_ldMeta", {}).get("enabled", False))
+
+        all_variables: Dict[str, Any] = {"ldctx": context.to_dict(), **variables}
+
+        messages: List[LDMessage] = []
+        raw_messages = variation.get("messages")
+        if isinstance(raw_messages, list) and all(isinstance(m, dict) for m in raw_messages):
+            messages = [
+                LDMessage(
+                    role=m["role"],
+                    content=_interpolate(m.get("content", ""), all_variables),
+                )
+                for m in raw_messages
+            ]
+
+        # New-style templates only have a system message. Auto-generate a user
+        # turn so _evaluate_config_judge always has a complete conversation to split.
+        if not any(m.role == "user" for m in messages):
+            message_history = variables.get("message_history", "")
+            response_to_evaluate = variables.get("response_to_evaluate", "")
+            parts: List[str] = []
+            if message_history:
+                parts.append(str(message_history))
+            parts.append(f"Here is the response to evaluate: {response_to_evaluate}")
+            messages.append(LDMessage(role="user", content="\n\n".join(parts)))
+
+        model: Optional[ModelConfig] = None
+        raw_model = variation.get("model")
+        if isinstance(raw_model, dict):
+            model = ModelConfig(
+                name=raw_model.get("name", ""),
+                parameters=raw_model.get("parameters"),
+                custom=raw_model.get("custom"),
+            )
+
+        return AIJudgeConfig(
+            key=judge_key,
+            enabled=enabled,
+            create_tracker=lambda: None,
+            model=model,
+            messages=messages,
+            evaluation_metric_key=variation.get("evaluationMetricKey"),
+        )
 
     def _serialize_scores(
         self, judge_results: Dict[str, JudgeResult]
@@ -850,9 +905,7 @@ async def _evaluate_acceptance_judge(
 
         if (
             agent_duration_ms is not None
-            and _acceptance_criteria_implies_duration_optimization(
-                {judge_key: optimization_judge}
-            )
+            and bool(self._options.latency_optimization)
         ):
             baseline_ms = self._baseline_duration_ms
             instructions += (
@@ -875,7 +928,7 @@ async def _evaluate_acceptance_judge(
                 "These suggestions will be used directly to generate the next variation."
             )
 
-        if _acceptance_criteria_implies_cost_optimization({judge_key: optimization_judge}):
+        if bool(self._options.token_optimization):
             current_cost = estimate_cost(
                 agent_usage,
                 _find_model_config(self._current_model or "", self._model_configs),
@@ -975,7 +1028,12 @@ async def _evaluate_acceptance_judge(
         return dataclasses.replace(judge_result, duration_ms=judge_duration_ms, usage=judge_response.usage)
 
     async def _get_agent_config(
-        self, agent_key: str, context: Context
+        self,
+        agent_key: str,
+        context: Context,
+        variation_key: Optional[str] = None,
+        project_key: Optional[str] = None,
+        base_url: Optional[str] = None,
     ) -> AIAgentConfig:
         """
         Fetch the agent configuration, replacing the instructions with the raw variation
@@ -985,16 +1043,39 @@ async def _get_agent_config(
         (including the tracker). We then call variation() separately to retrieve the
         unrendered instruction template and swap it in, keeping everything else intact.
 
+        When ``variation_key`` is provided the specific variation is fetched via the
+        LaunchDarkly REST API instead of using the SDK's default flag evaluation.
+
         :param agent_key: The key for the agent to get the configuration for
         :param context: The evaluation context
+        :param variation_key: Optional specific variation key to use as the base
+        :param project_key: LaunchDarkly project key; required when variation_key is set
+        :param base_url: Optional API base URL override
         :return: AIAgentConfig with raw {{placeholder}} instruction templates intact
         """
         try:
             agent_config = self._ldClient.agent_config(agent_key, context)
 
-            # variation() returns the raw JSON before chevron.render(), so instructions
-            # still contain {{placeholder}} tokens rather than empty strings.
-            raw_variation = self._ldClient._client.variation(agent_key, context, {})
+            if variation_key:
+                assert self._api_key is not None
+                api_client = LDApiClient(
+                    self._api_key,
+                    **({"base_url": base_url} if base_url else {}),
+                )
+                ai_config = api_client.get_ai_config(project_key, agent_key)
+                match = next(
+                    (v for v in (ai_config or {}).get("variations", []) if v.get("key") == variation_key),
+                    None,
+                )
+                if match is None:
+                    raise ValueError(
+                        f"variation_key '{variation_key}' not found in agent config '{agent_key}'"
+                    )
+                raw_variation = match
+            else:
+                # variation() returns the raw JSON before chevron.render(), so instructions
+                # still contain {{placeholder}} tokens rather than empty strings.
+                raw_variation = self._ldClient._client.variation(agent_key, context, {})
             raw_instructions = raw_variation.get(
                 "instructions", agent_config.instructions
             )
@@ -1030,20 +1111,20 @@ def _fetch_model_configs(
         self,
         project_key: Optional[str],
         base_url: Optional[str],
-        judges: Optional[Dict[str, "OptimizationJudge"]],
+        token_optimization: Optional[bool],
     ) -> None:
         """Populate ``_model_configs`` from the LD API when credentials are available.
 
         When an API key and project key are both present, fetches the model pricing
         catalogue so that ``estimate_cost`` can produce USD figures and the cost gate
         can make meaningful comparisons.  If either is absent, ``_model_configs`` is
-        reset to an empty list and a warning is emitted when cost judges are in use —
-        cost optimization will silently pass rather than blocking the run.
+        reset to an empty list and a warning is emitted when token_optimization is
+        enabled — cost data will be unavailable and the cost gate will pass unconditionally.
 
         :param project_key: LaunchDarkly project key, or None if not provided.
         :param base_url: Optional API base URL override.
-        :param judges: Judge map from the caller's options, used only to decide
-            whether a cost-related warning is appropriate.
+        :param token_optimization: Whether token/cost optimization is enabled; used only to
+            decide whether a cost-related warning is appropriate.
         """
         self._model_configs = []
         if self._has_api_key and project_key:
@@ -1056,9 +1137,9 @@ def _fetch_model_configs(
                 self._model_configs = api_client.get_model_configs(project_key)
             except Exception as exc:
                 logger.debug("Could not pre-fetch model configs: %s", exc)
-        elif _acceptance_criteria_implies_cost_optimization(judges or {}):
+        elif token_optimization:
             logger.warning(
-                "Cost optimization requires LAUNCHDARKLY_API_KEY and project_key to be set; "
+                "Token optimization requires LAUNCHDARKLY_API_KEY and project_key to be set; "
                 "cost data will not be available and the cost gate will pass unconditionally"
             )
 
@@ -1080,10 +1161,24 @@ async def optimize_from_options(
                 raise ValueError(
                     "auto_commit requires project_key to be set on OptimizationOptions"
                 )
+        if options.variation_key:
+            if not self._has_api_key:
+                raise ValueError(
+                    "variation_key requires LAUNCHDARKLY_API_KEY to be set"
+                )
+            if not options.project_key:
+                raise ValueError(
+                    "variation_key requires project_key to be set on OptimizationOptions"
+                )
         self._agent_key = agent_key
-        self._fetch_model_configs(options.project_key, options.base_url, options.judges)
+        self._fetch_model_configs(options.project_key, options.base_url, options.token_optimization)
         context = random.choice(options.context_choices)
-        agent_config = await self._get_agent_config(agent_key, context)
+        agent_config = await self._get_agent_config(
+            agent_key, context,
+            variation_key=options.variation_key,
+            project_key=options.project_key,
+            base_url=options.base_url,
+        )
         result = await self._run_optimization(agent_config, options)
         if options.auto_commit and self._last_run_succeeded and self._last_succeeded_context:
             self._commit_variation(
@@ -1119,10 +1214,24 @@ async def optimize_from_ground_truth_options(
                 raise ValueError(
                     "auto_commit requires project_key to be set on GroundTruthOptimizationOptions"
                 )
+        if options.variation_key:
+            if not self._has_api_key:
+                raise ValueError(
+                    "variation_key requires LAUNCHDARKLY_API_KEY to be set"
+                )
+            if not options.project_key:
+                raise ValueError(
+                    "variation_key requires project_key to be set on GroundTruthOptimizationOptions"
+                )
         self._agent_key = agent_key
-        self._fetch_model_configs(options.project_key, options.base_url, options.judges)
+        self._fetch_model_configs(options.project_key, options.base_url, options.token_optimization)
         context = random.choice(options.context_choices)
-        agent_config = await self._get_agent_config(agent_key, context)
+        agent_config = await self._get_agent_config(
+            agent_key, context,
+            variation_key=options.variation_key,
+            project_key=options.project_key,
+            base_url=options.base_url,
+        )
         result = await self._run_ground_truth_optimization(agent_config, options)
         if options.auto_commit and self._last_run_succeeded and self._last_succeeded_context:
             self._commit_variation(
@@ -1162,6 +1271,8 @@ async def _run_ground_truth_optimization(
             on_failing_result=gt_options.on_failing_result,
             on_status_update=gt_options.on_status_update,
             token_limit=gt_options.token_limit,
+            latency_optimization=gt_options.latency_optimization,
+            token_optimization=gt_options.token_optimization,
         )
         self._options = bridge
         self._agent_config = agent_config
@@ -1579,12 +1690,8 @@ async def _generate_new_variation(
         )
         self._safe_status_update("generating variation", status_ctx, iteration)
 
-        optimize_for_duration = _acceptance_criteria_implies_duration_optimization(
-            self._options.judges
-        )
-        optimize_for_cost = _acceptance_criteria_implies_cost_optimization(
-            self._options.judges
-        )
+        optimize_for_duration = bool(self._options.latency_optimization)
+        optimize_for_cost = bool(self._options.token_optimization)
         quality_already_passing = self._all_judges_passing()
         instructions = build_new_variation_prompt(
             self._history,
@@ -1708,7 +1815,7 @@ async def optimize_from_config(
         else:
             result = await self._run_optimization(agent_config, optimization_options)
 
-        if options.auto_commit and self._last_run_succeeded and self._last_succeeded_context:
+        if optimization_options.auto_commit and options.auto_commit and self._last_run_succeeded and self._last_succeeded_context:
             created_key = self._commit_variation(
                 self._last_succeeded_context,
                 project_key=options.project_key,
@@ -1989,6 +2096,9 @@ def _persist_and_forward(
                 on_failing_result=options.on_failing_result,
                 on_status_update=_persist_and_forward,
                 token_limit=config.get("tokenLimit"),
+                latency_optimization=config.get("latencyOptimization"),
+                token_optimization=config.get("tokenOptimization"),
+                auto_commit=config.get("autoCommit", True),
             )
 
         variable_choices: List[Dict[str, Any]] = config["variableChoices"] or [{}]
@@ -2009,6 +2119,9 @@ def _persist_and_forward(
             on_failing_result=options.on_failing_result,
             on_status_update=_persist_and_forward,
             token_limit=config.get("tokenLimit"),
+            latency_optimization=config.get("latencyOptimization"),
+            token_optimization=config.get("tokenOptimization"),
+            auto_commit=config.get("autoCommit", True),
         )
 
     async def _execute_agent_turn(
@@ -2269,7 +2382,7 @@ def _apply_duration_gate(
         :param ctx: Current optimization context.
         :return: (passed, updated_ctx) where passed reflects gate outcome.
         """
-        if not _acceptance_criteria_implies_duration_optimization(self._options.judges):
+        if not bool(self._options.latency_optimization):
             return passed_so_far, ctx
         passed = self._evaluate_duration(ctx)
         if passed:
@@ -2323,7 +2436,7 @@ def _apply_cost_gate(
         :param ctx: Current optimization context.
         :return: (passed, updated_ctx) where passed reflects gate outcome.
         """
-        if not _acceptance_criteria_implies_cost_optimization(self._options.judges):
+        if not bool(self._options.token_optimization):
             return passed_so_far, ctx
         passed = self._evaluate_cost(ctx)
         if passed:
diff --git a/packages/optimization/src/ldai_optimizer/dataclasses.py b/packages/optimization/src/ldai_optimizer/dataclasses.py
index eb206d90..2d45b909 100644
--- a/packages/optimization/src/ldai_optimizer/dataclasses.py
+++ b/packages/optimization/src/ldai_optimizer/dataclasses.py
@@ -348,9 +348,14 @@ class OptimizationOptions:
     context_choices: List[Context] = field(
         default_factory=lambda: [Context.builder("anonymous").anonymous(True).build()]
     )
+    # Base variation - Optional
+    variation_key: Optional[str] = None  # use this specific variation as the base; defaults to the flag's default variation; requires API key + project_key
+    # Optimization controls - Optional; when None the corresponding gate/prompt is disabled
+    latency_optimization: Optional[bool] = None
+    token_optimization: Optional[bool] = None
     # Auto-commit - Optional
     auto_commit: bool = False
-    project_key: Optional[str] = None  # required when auto_commit=True
+    project_key: Optional[str] = None  # required when auto_commit=True or variation_key is set
     output_key: Optional[str] = None   # variation key/name; auto-generated if omitted
     base_url: Optional[str] = None  # override to target a non-default LD instance
     on_passing_result: Optional[Callable[[OptimizationContext], None]] = None
@@ -440,9 +445,14 @@ class GroundTruthOptimizationOptions:
     context_choices: List[Context] = field(
         default_factory=lambda: [Context.builder("anonymous").anonymous(True).build()]
     )
+    # Base variation - Optional
+    variation_key: Optional[str] = None  # use this specific variation as the base; defaults to the flag's default variation; requires API key + project_key
+    # Optimization controls - Optional; when None the corresponding gate/prompt is disabled
+    latency_optimization: Optional[bool] = None
+    token_optimization: Optional[bool] = None
     # Auto-commit - Optional
     auto_commit: bool = False
-    project_key: Optional[str] = None  # required when auto_commit=True
+    project_key: Optional[str] = None  # required when auto_commit=True or variation_key is set
     output_key: Optional[str] = None   # variation key/name; auto-generated if omitted
     base_url: Optional[str] = None  # override to target a non-default LD instance
     token_limit: Optional[int] = None  # stop the run when total token usage reaches this value
diff --git a/packages/optimization/src/ldai_optimizer/ld_api_client.py b/packages/optimization/src/ldai_optimizer/ld_api_client.py
index 37f6549e..14843d90 100644
--- a/packages/optimization/src/ldai_optimizer/ld_api_client.py
+++ b/packages/optimization/src/ldai_optimizer/ld_api_client.py
@@ -90,6 +90,9 @@ class AgentOptimizationConfig(_AgentOptimizationConfigRequired, total=False):
     groundTruthResponses: List[str]
     metricKey: str
     tokenLimit: int
+    latencyOptimization: bool
+    tokenOptimization: bool
+    autoCommit: bool
 
 
 # ---------------------------------------------------------------------------
diff --git a/packages/optimization/src/ldai_optimizer/prompts.py b/packages/optimization/src/ldai_optimizer/prompts.py
index 9ba37d94..9e42ca49 100644
--- a/packages/optimization/src/ldai_optimizer/prompts.py
+++ b/packages/optimization/src/ldai_optimizer/prompts.py
@@ -1,6 +1,5 @@
 """Prompt-building functions for LaunchDarkly AI optimization."""
 
-import re
 from typing import Any, Dict, List, Optional
 
 from ldai_optimizer.dataclasses import (
@@ -9,64 +8,6 @@
 )
 from ldai_optimizer.util import judge_passed
 
-_DURATION_KEYWORDS = re.compile(
-    r"\b(fast|faster|quickly|quick|latency|low-latency|duration|response\s+time|"
-    r"time\s+to\s+respond|milliseconds|performant|snappy|efficient|seconds)\b|"
-    r"(?<![a-zA-Z])ms\b",
-    re.IGNORECASE,
-)
-
-_COST_KEYWORDS = re.compile(
-    r"\b(cheap|cheaper|cheapest|costs?|costly|expensive|budget|affordable|"
-    r"spend|spending|economical|cost-effective|frugal|"
-    r"price|pricing|bill|billing)\b",
-    re.IGNORECASE,
-)
-
-
-def _acceptance_criteria_implies_duration_optimization(
-    judges: Optional[Dict[str, OptimizationJudge]],
-) -> bool:
-    """Return True if any judge acceptance statement implies a latency optimization goal.
-
-    Scans each judge's acceptance_statement for latency-related keywords. The
-    check is case-insensitive. Returns False when judges is None or no judge
-    carries an acceptance statement.
-
-    :param judges: Judge configuration dict from OptimizationOptions, or None.
-    :return: True if duration optimization should be applied.
-    """
-    if not judges:
-        return False
-    for judge in judges.values():
-        if judge.acceptance_statement and _DURATION_KEYWORDS.search(
-            judge.acceptance_statement
-        ):
-            return True
-    return False
-
-
-def _acceptance_criteria_implies_cost_optimization(
-    judges: Optional[Dict[str, OptimizationJudge]],
-) -> bool:
-    """Return True if any judge acceptance statement implies a cost reduction goal.
-
-    Scans each judge's acceptance_statement for cost-related keywords. The
-    check is case-insensitive. Returns False when judges is None or no judge
-    carries an acceptance statement.
-
-    :param judges: Judge configuration dict from OptimizationOptions, or None.
-    :return: True if cost optimization should be applied.
-    """
-    if not judges:
-        return False
-    for judge in judges.values():
-        if judge.acceptance_statement and _COST_KEYWORDS.search(
-            judge.acceptance_statement
-        ):
-            return True
-    return False
-
 
 def build_message_history_text(
     history: List[OptimizationContext],
diff --git a/packages/optimization/tests/test_client.py b/packages/optimization/tests/test_client.py
index df9c6f6f..5a1d9895 100644
--- a/packages/optimization/tests/test_client.py
+++ b/packages/optimization/tests/test_client.py
@@ -6,7 +6,7 @@
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
-from ldai import AIAgentConfig, AIJudgeConfig, LDAIClient
+from ldai import AIAgentConfig, LDAIClient
 from ldai.client import Evaluator
 from ldai.models import LDMessage, ModelConfig
 from ldai.tracker import TokenUsage
@@ -35,8 +35,6 @@
     ToolDefinition,
 )
 from ldai_optimizer.prompts import (
-    _acceptance_criteria_implies_cost_optimization,
-    _acceptance_criteria_implies_duration_optimization,
     build_new_variation_prompt,
     variation_prompt_acceptance_criteria,
     variation_prompt_cost_optimization,
@@ -561,13 +559,13 @@ async def test_variables_in_context(self):
         _, _, ctx, _ = call_args.args
         assert ctx.current_variables == variables
 
-    async def test_duration_context_added_to_instructions_when_latency_keyword_present(self):
-        """When acceptance statement has a latency keyword and agent_duration_ms is provided,
-        the instructions mention the duration."""
-        judge = OptimizationJudge(
-            threshold=0.8,
-            acceptance_statement="The response must be fast.",
+    async def test_duration_context_added_when_latency_optimization_true_and_duration_provided(self):
+        """When latency_optimization=True and agent_duration_ms is provided,
+        the judge instructions mention the duration."""
+        self.client._options = _make_options(
+            handle_judge_call=self.handle_judge_call, latency_optimization=True
         )
+        judge = OptimizationJudge(threshold=0.8, acceptance_statement="Be accurate.")
         await self.client._evaluate_acceptance_judge(
             judge_key="speed",
             optimization_judge=judge,
@@ -583,6 +581,9 @@ async def test_duration_context_added_to_instructions_when_latency_keyword_prese
 
     async def test_duration_context_includes_baseline_comparison_when_history_present(self):
         """When a baseline duration is captured, the judge instructions include a baseline comparison."""
+        self.client._options = _make_options(
+            handle_judge_call=self.handle_judge_call, latency_optimization=True
+        )
         self.client._history = [
             OptimizationContext(
                 scores={},
@@ -595,10 +596,7 @@ async def test_duration_context_includes_baseline_comparison_when_history_presen
             )
         ]
         self.client._baseline_duration_ms = 2000.0
-        judge = OptimizationJudge(
-            threshold=0.8,
-            acceptance_statement="Responses should have low latency.",
-        )
+        judge = OptimizationJudge(threshold=0.8, acceptance_statement="Be accurate.")
         await self.client._evaluate_acceptance_judge(
             judge_key="latency",
             optimization_judge=judge,
@@ -615,6 +613,9 @@ async def test_duration_context_includes_baseline_comparison_when_history_presen
 
     async def test_duration_context_says_slower_when_candidate_is_slower(self):
         """When the candidate is slower than baseline, the instructions say 'slower'."""
+        self.client._options = _make_options(
+            handle_judge_call=self.handle_judge_call, latency_optimization=True
+        )
         self.client._history = [
             OptimizationContext(
                 scores={},
@@ -627,10 +628,7 @@ async def test_duration_context_says_slower_when_candidate_is_slower(self):
             )
         ]
         self.client._baseline_duration_ms = 1000.0
-        judge = OptimizationJudge(
-            threshold=0.8,
-            acceptance_statement="The response must be fast.",
-        )
+        judge = OptimizationJudge(threshold=0.8, acceptance_statement="Be accurate.")
         await self.client._evaluate_acceptance_judge(
             judge_key="speed",
             optimization_judge=judge,
@@ -643,12 +641,9 @@ async def test_duration_context_says_slower_when_candidate_is_slower(self):
         _, config, _, _ = self.handle_judge_call.call_args.args
         assert "slower" in config.instructions
 
-    async def test_duration_context_not_added_when_no_latency_keyword(self):
-        """When acceptance statement has no latency keyword, duration is not injected."""
-        judge = OptimizationJudge(
-            threshold=0.8,
-            acceptance_statement="The response must be accurate.",
-        )
+    async def test_duration_context_not_added_when_latency_optimization_is_none(self):
+        """When latency_optimization is None (not set), duration is not injected."""
+        judge = OptimizationJudge(threshold=0.8, acceptance_statement="Be accurate.")
         await self.client._evaluate_acceptance_judge(
             judge_key="accuracy",
             optimization_judge=judge,
@@ -660,14 +655,13 @@ async def test_duration_context_not_added_when_no_latency_keyword(self):
         )
         _, config, _, _ = self.handle_judge_call.call_args.args
         assert "2000ms" not in config.instructions
-        assert "duration" not in config.instructions.lower() or "acceptance" in config.instructions.lower()
 
     async def test_duration_context_not_added_when_agent_duration_ms_is_none(self):
-        """When agent_duration_ms is None, no duration block is added even if keyword matches."""
-        judge = OptimizationJudge(
-            threshold=0.8,
-            acceptance_statement="The response must be fast.",
+        """When agent_duration_ms is None, no duration block is added even if latency_optimization=True."""
+        self.client._options = _make_options(
+            handle_judge_call=self.handle_judge_call, latency_optimization=True
         )
+        judge = OptimizationJudge(threshold=0.8, acceptance_statement="Be accurate.")
         await self.client._evaluate_acceptance_judge(
             judge_key="speed",
             optimization_judge=judge,
@@ -723,20 +717,19 @@ def setup_method(self):
         self.handle_judge_call = AsyncMock(return_value=OptimizationResponse(output=JUDGE_PASS_RESPONSE))
         self.client._options = _make_options(handle_judge_call=self.handle_judge_call)
 
-    def _make_judge_config(self, enabled: bool = True) -> AIJudgeConfig:
-        return AIJudgeConfig(
-            key="ld-judge-key",
-            enabled=enabled,
-            create_tracker=MagicMock,
-            model=ModelConfig(name="gpt-4o", parameters={}),
-            messages=[
-                LDMessage(role="system", content="You are an evaluator."),
-                LDMessage(role="user", content="Evaluate this response."),
+    def _make_raw_variation(self, enabled: bool = True) -> Dict[str, Any]:
+        """Raw variation dict as returned by _client.variation for a judge flag."""
+        return {
+            "_ldMeta": {"enabled": enabled},
+            "messages": [
+                {"role": "system", "content": "You are an evaluator."},
+                {"role": "user", "content": "Evaluate this response."},
             ],
-        )
+            "model": {"name": "gpt-4o", "parameters": {}},
+        }
 
     async def test_calls_handle_judge_call_with_correct_config_type(self):
-        self.mock_ldai.judge_config.return_value = self._make_judge_config()
+        self.mock_ldai._client.variation.return_value = self._make_raw_variation()
         judge = OptimizationJudge(threshold=0.8, judge_key="ld-judge-key")
         await self.client._evaluate_config_judge(
             judge_key="quality",
@@ -754,7 +747,7 @@ async def test_calls_handle_judge_call_with_correct_config_type(self):
         assert isinstance(ctx, OptimizationJudgeContext)
 
     async def test_messages_has_system_and_user_turns(self):
-        self.mock_ldai.judge_config.return_value = self._make_judge_config()
+        self.mock_ldai._client.variation.return_value = self._make_raw_variation()
         judge = OptimizationJudge(threshold=0.8, judge_key="ld-judge-key")
         await self.client._evaluate_config_judge(
             judge_key="quality",
@@ -769,7 +762,7 @@ async def test_messages_has_system_and_user_turns(self):
         assert roles == ["system", "user"]
 
     async def test_messages_system_content_matches_instructions(self):
-        self.mock_ldai.judge_config.return_value = self._make_judge_config()
+        self.mock_ldai._client.variation.return_value = self._make_raw_variation()
         judge = OptimizationJudge(threshold=0.8, judge_key="ld-judge-key")
         await self.client._evaluate_config_judge(
             judge_key="quality",
@@ -784,7 +777,7 @@ async def test_messages_system_content_matches_instructions(self):
         assert system_msg.content == config.instructions
 
     async def test_messages_user_content_matches_context_user_input(self):
-        self.mock_ldai.judge_config.return_value = self._make_judge_config()
+        self.mock_ldai._client.variation.return_value = self._make_raw_variation()
         judge = OptimizationJudge(threshold=0.8, judge_key="ld-judge-key")
         await self.client._evaluate_config_judge(
             judge_key="quality",
@@ -799,7 +792,7 @@ async def test_messages_user_content_matches_context_user_input(self):
         assert user_msg.content == ctx.user_input
 
     async def test_messages_user_content_contains_ld_user_message(self):
-        self.mock_ldai.judge_config.return_value = self._make_judge_config()
+        self.mock_ldai._client.variation.return_value = self._make_raw_variation()
         judge = OptimizationJudge(threshold=0.8, judge_key="ld-judge-key")
         await self.client._evaluate_config_judge(
             judge_key="quality",
@@ -814,7 +807,7 @@ async def test_messages_user_content_contains_ld_user_message(self):
         assert "Evaluate this response." in user_msg.content
 
     async def test_returns_zero_score_when_judge_disabled(self):
-        self.mock_ldai.judge_config.return_value = self._make_judge_config(enabled=False)
+        self.mock_ldai._client.variation.return_value = self._make_raw_variation(enabled=False)
         judge = OptimizationJudge(threshold=0.8, judge_key="ld-judge-key")
         result = await self.client._evaluate_config_judge(
             judge_key="quality",
@@ -827,31 +820,37 @@ async def test_returns_zero_score_when_judge_disabled(self):
         assert result.score == 0.0
         self.handle_judge_call.assert_not_called()
 
-    async def test_returns_zero_score_when_judge_has_no_messages(self):
-        judge_config = AIJudgeConfig(
-            key="ld-judge-key",
-            enabled=True,
-            create_tracker=MagicMock,
-            model=ModelConfig(name="gpt-4o", parameters={}),
-            messages=None,
-        )
-        self.mock_ldai.judge_config.return_value = judge_config
+    async def test_system_only_template_auto_generates_user_message(self):
+        """When the flag template has only a system message, a user turn is synthesised."""
+        self.mock_ldai._client.variation.return_value = {
+            "_ldMeta": {"enabled": True},
+            "messages": [{"role": "system", "content": "You are an evaluator."}],
+            "model": {"name": "gpt-4o", "parameters": {}},
+        }
         judge = OptimizationJudge(threshold=0.8, judge_key="ld-judge-key")
-        result = await self.client._evaluate_config_judge(
+        await self.client._evaluate_config_judge(
             judge_key="quality",
             optimization_judge=judge,
-            completion_response="Any.",
+            completion_response="The answer is 42.",
             iteration=1,
             reasoning_history="",
-            user_input="Anything?",
+            user_input="What is the answer?",
         )
-        assert result.score == 0.0
-        self.handle_judge_call.assert_not_called()
-
-    async def test_template_variables_merged_into_judge_config_call(self):
-        self.mock_ldai.judge_config.return_value = self._make_judge_config()
+        _, config, _, _ = self.handle_judge_call.call_args.args
+        user_msg = next(m for m in config.messages if m.role == "user")
+        assert "The answer is 42." in user_msg.content
+
+    async def test_template_variables_interpolated_into_messages(self):
+        """Custom agent variables are interpolated into judge template messages."""
+        self.mock_ldai._client.variation.return_value = {
+            "_ldMeta": {"enabled": True},
+            "messages": [
+                {"role": "system", "content": "Evaluate in {{language}}."},
+                {"role": "user", "content": "Evaluate this response."},
+            ],
+            "model": {"name": "gpt-4o", "parameters": {}},
+        }
         judge = OptimizationJudge(threshold=0.8, judge_key="ld-judge-key")
-        variables = {"language": "Spanish"}
         await self.client._evaluate_config_judge(
             judge_key="quality",
             optimization_judge=judge,
@@ -859,16 +858,38 @@ async def test_template_variables_merged_into_judge_config_call(self):
             iteration=1,
             reasoning_history="",
             user_input="Q?",
-            variables=variables,
+            variables={"language": "Spanish"},
+        )
+        _, config, _, _ = self.handle_judge_call.call_args.args
+        assert "Spanish" in config.instructions
+
+    async def test_reserved_variables_interpolated_into_template_messages(self):
+        """message_history and response_to_evaluate are interpolated when present in the template."""
+        self.mock_ldai._client.variation.return_value = {
+            "_ldMeta": {"enabled": True},
+            "messages": [
+                {"role": "system", "content": "History: {{message_history}}"},
+                {"role": "user", "content": "Response: {{response_to_evaluate}}"},
+            ],
+            "model": {"name": "gpt-4o", "parameters": {}},
+        }
+        judge = OptimizationJudge(threshold=0.8, judge_key="ld-judge-key")
+        await self.client._evaluate_config_judge(
+            judge_key="quality",
+            optimization_judge=judge,
+            completion_response="My answer.",
+            iteration=1,
+            reasoning_history="",
+            user_input="Q?",
         )
-        call_kwargs = self.mock_ldai.judge_config.call_args
-        passed_vars = call_kwargs.args[3] if call_kwargs.args else call_kwargs.kwargs.get("variables", {})
-        assert passed_vars.get("language") == "Spanish"
-        assert "message_history" in passed_vars
-        assert "response_to_evaluate" in passed_vars
+        _, config, _, _ = self.handle_judge_call.call_args.args
+        system_msg = next(m for m in config.messages if m.role == "system")
+        assert "History:" in system_msg.content
+        user_msg = next(m for m in config.messages if m.role == "user")
+        assert "My answer." in user_msg.content
 
     async def test_agent_tools_included_without_evaluation_tool(self):
-        self.mock_ldai.judge_config.return_value = self._make_judge_config()
+        self.mock_ldai._client.variation.return_value = self._make_raw_variation()
         agent_tool = ToolDefinition(name="search", description="Search", input_schema={})
         judge = OptimizationJudge(threshold=0.8, judge_key="ld-judge-key")
         await self.client._evaluate_config_judge(
@@ -3862,132 +3883,6 @@ async def test_optimize_from_config_dispatches_to_gt_run(self):
         assert len(result) == 2
 
 
-# ---------------------------------------------------------------------------
-# _acceptance_criteria_implies_duration_optimization
-# ---------------------------------------------------------------------------
-
-
-class TestAcceptanceCriteriaImpliesDurationOptimization:
-    def test_returns_false_when_judges_is_none(self):
-        assert _acceptance_criteria_implies_duration_optimization(None) is False
-
-    def test_returns_false_when_judges_is_empty(self):
-        assert _acceptance_criteria_implies_duration_optimization({}) is False
-
-    def test_returns_false_when_no_acceptance_statements(self):
-        judges = {"quality": OptimizationJudge(threshold=0.8, judge_key="judge-1")}
-        assert _acceptance_criteria_implies_duration_optimization(judges) is False
-
-    def test_returns_false_when_acceptance_statement_has_no_latency_keywords(self):
-        judges = {
-            "accuracy": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The response must be accurate and complete.",
-            )
-        }
-        assert _acceptance_criteria_implies_duration_optimization(judges) is False
-
-    def test_detects_fast_keyword(self):
-        judges = {
-            "speed": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The response must be fast.",
-            )
-        }
-        assert _acceptance_criteria_implies_duration_optimization(judges) is True
-
-    def test_detects_faster_keyword(self):
-        judges = {
-            "speed": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The agent should respond faster.",
-            )
-        }
-        assert _acceptance_criteria_implies_duration_optimization(judges) is True
-
-    def test_detects_latency_keyword(self):
-        judges = {
-            "perf": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The agent must have low latency.",
-            )
-        }
-        assert _acceptance_criteria_implies_duration_optimization(judges) is True
-
-    def test_detects_duration_keyword(self):
-        judges = {
-            "perf": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="Minimize the duration of each response.",
-            )
-        }
-        assert _acceptance_criteria_implies_duration_optimization(judges) is True
-
-    def test_detects_ms_keyword(self):
-        judges = {
-            "perf": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="Responses should complete in under 500ms.",
-            )
-        }
-        assert _acceptance_criteria_implies_duration_optimization(judges) is True
-
-    def test_detects_response_time_phrase(self):
-        judges = {
-            "perf": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The response time should be minimized.",
-            )
-        }
-        assert _acceptance_criteria_implies_duration_optimization(judges) is True
-
-    def test_detects_efficient_keyword(self):
-        judges = {
-            "perf": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The model must be efficient.",
-            )
-        }
-        assert _acceptance_criteria_implies_duration_optimization(judges) is True
-
-    def test_detects_snappy_keyword(self):
-        judges = {
-            "perf": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="Responses should feel snappy.",
-            )
-        }
-        assert _acceptance_criteria_implies_duration_optimization(judges) is True
-
-    def test_case_insensitive_match(self):
-        judges = {
-            "perf": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The model must be EFFICIENT and FAST.",
-            )
-        }
-        assert _acceptance_criteria_implies_duration_optimization(judges) is True
-
-    def test_returns_true_when_any_judge_matches(self):
-        judges = {
-            "accuracy": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The response must be accurate.",
-            ),
-            "speed": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The response must be fast.",
-            ),
-        }
-        assert _acceptance_criteria_implies_duration_optimization(judges) is True
-
-    def test_returns_false_when_acceptance_statement_is_none(self):
-        judges = {
-            "quality": OptimizationJudge(threshold=0.8, acceptance_statement=None)
-        }
-        assert _acceptance_criteria_implies_duration_optimization(judges) is False
-
-
 # ---------------------------------------------------------------------------
 # _evaluate_duration
 # ---------------------------------------------------------------------------
@@ -4069,17 +3964,9 @@ class TestDurationOptimizationChaosMode:
     def setup_method(self):
         self.mock_ldai = _make_ldai_client()
 
-    def _duration_judges(self, statement="The response must be fast."):
-        return {
-            "speed": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement=statement,
-            )
-        }
-
     def _ctx_with(self, duration_ms, score=1.0, iteration=1):
         return OptimizationContext(
-            scores={"speed": JudgeResult(score=score)},
+            scores={"accuracy": JudgeResult(score=score)},
             completion_response="answer",
             current_instructions="Do X.",
             current_parameters={},
@@ -4105,7 +3992,7 @@ async def test_duration_gate_triggers_variation_when_not_fast_enough(self):
         handle_agent_call = AsyncMock(return_value=OptimizationResponse(output=VARIATION_RESPONSE))
         opts = _make_options(
             handle_agent_call=handle_agent_call,
-            judges=self._duration_judges(),
+            latency_optimization=True,
             max_attempts=5,
         )
 
@@ -4131,7 +4018,7 @@ async def test_duration_check_skipped_on_first_iteration_no_baseline(self):
 
         opts = _make_options(
             handle_agent_call=AsyncMock(return_value=OptimizationResponse(output="answer")),
-            judges=self._duration_judges(),
+            latency_optimization=True,
             max_attempts=3,
         )
 
@@ -4142,26 +4029,20 @@ async def test_duration_check_skipped_on_first_iteration_no_baseline(self):
         # Succeeds because history is empty and duration check is skipped
         assert result.duration_ms == 9999
 
-    async def test_no_duration_gate_when_acceptance_criteria_has_no_latency_keywords(self):
-        """Acceptance statement with no latency keywords → duration gate never applied."""
+    async def test_no_duration_gate_when_latency_optimization_is_none(self):
+        """latency_optimization=None → duration gate never applied."""
         client = _make_client(self.mock_ldai)
 
         # Judge passes on first try; duration would fail if gate were applied (same as baseline)
-        # but since acceptance criteria has no latency keywords, it should succeed anyway
+        # but since latency_optimization=None, the gate is not applied
         execute_side_effects = [
             self._ctx_with(duration_ms=2000, score=1.0, iteration=1),
             self._ctx_with(duration_ms=2000, score=1.0, iteration=2),   # validation
         ]
 
-        non_latency_judges = {
-            "accuracy": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The response must be accurate and complete.",
-            )
-        }
         opts = _make_options(
             handle_agent_call=AsyncMock(return_value=OptimizationResponse(output="answer")),
-            judges=non_latency_judges,
+            latency_optimization=None,
             max_attempts=3,
         )
 
@@ -4193,7 +4074,7 @@ async def test_evaluate_duration_called_in_validation_phase(self):
         handle_agent_call = AsyncMock(return_value=OptimizationResponse(output=VARIATION_RESPONSE))
         opts = _make_options(
             handle_agent_call=handle_agent_call,
-            judges=self._duration_judges(),
+            latency_optimization=True,
             max_attempts=5,
         )
 
@@ -4214,17 +4095,9 @@ class TestDurationOptimizationGroundTruthMode:
     def setup_method(self):
         self.mock_ldai = _make_ldai_client()
 
-    def _duration_judges(self):
-        return {
-            "speed": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The response must be fast.",
-            )
-        }
-
     def _gt_ctx(self, duration_ms, score=1.0, iteration=1, user_input="q"):
         return OptimizationContext(
-            scores={"speed": JudgeResult(score=score)},
+            scores={"acc": JudgeResult(score=score)},
             completion_response="answer",
             current_instructions="Do X.",
             current_parameters={},
@@ -4268,7 +4141,7 @@ async def test_duration_gate_applied_per_sample_in_ground_truth_mode(self):
         handle_agent_call = AsyncMock(return_value=OptimizationResponse(output=VARIATION_RESPONSE))
         opts = _make_gt_options(
             handle_agent_call=handle_agent_call,
-            judges=self._duration_judges(),
+            latency_optimization=True,
             max_attempts=5,
         )
 
@@ -4283,8 +4156,8 @@ async def test_duration_gate_applied_per_sample_in_ground_truth_mode(self):
         assert handle_agent_call.call_count == 2
         assert mock_execute.call_count == 6
 
-    async def test_no_duration_gate_in_gt_mode_when_no_latency_keywords(self):
-        """In GT mode, duration gate is not applied when acceptance criteria has no latency keywords."""
+    async def test_no_duration_gate_in_gt_mode_when_latency_optimization_not_set(self):
+        """In GT mode, duration gate is not applied when latency_optimization is None."""
         client = _make_client(self.mock_ldai)
 
         execute_side_effects = [
@@ -4292,15 +4165,9 @@ async def test_no_duration_gate_in_gt_mode_when_no_latency_keywords(self):
             self._gt_ctx(duration_ms=5000, score=1.0, iteration=2, user_input="q2"),
         ]
 
-        non_latency_judges = {
-            "accuracy": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The response must be accurate.",
-            )
-        }
         opts = _make_gt_options(
             handle_agent_call=AsyncMock(return_value=OptimizationResponse(output="answer")),
-            judges=non_latency_judges,
+            latency_optimization=None,
             max_attempts=3,
         )
 
@@ -4308,7 +4175,7 @@ async def test_no_duration_gate_in_gt_mode_when_no_latency_keywords(self):
             mock_execute.side_effect = execute_side_effects
             results = await client.optimize_from_ground_truth_options("test-agent", opts)
 
-        # Succeeds on first attempt even with slow duration (no latency keyword → no gate)
+        # Succeeds on first attempt even with slow duration (latency_optimization=None → no gate)
         assert isinstance(results, list)
         assert mock_execute.call_count == 2
 
@@ -4946,6 +4813,21 @@ async def test_commit_not_called_when_auto_commit_false(self):
 
         mock_commit.assert_not_called()
 
+    async def test_commit_not_called_when_api_config_auto_commit_false(self):
+        """autoCommit: false in the API config suppresses the commit even when
+        OptimizationFromConfigOptions.auto_commit is True (the default)."""
+        client = self._make_client_with_key()
+        mock_api = _make_mock_api_client()
+        api_config_no_commit = {**_API_CONFIG, "autoCommit": False}
+        mock_api.get_agent_optimization = MagicMock(return_value=api_config_no_commit)
+
+        with patch("ldai_optimizer.client.LDApiClient", return_value=mock_api):
+            with patch.object(client, "_commit_variation") as mock_commit:
+                # options.auto_commit is True (default); commit must still be skipped
+                await client.optimize_from_config("my-opt", _make_from_config_options())
+
+        mock_commit.assert_not_called()
+
     async def test_commit_receives_pre_built_api_client(self):
         """The api_client created for fetching config is reused for _commit_variation."""
         client = self._make_client_with_key()
@@ -5345,61 +5227,6 @@ def test_returns_partial_cost_when_only_output_count_is_none(self):
         assert estimate_cost(usage, model_config) == pytest.approx(60 * 0.001)
 
 
-# ---------------------------------------------------------------------------
-# _acceptance_criteria_implies_cost_optimization
-# ---------------------------------------------------------------------------
-
-
-class TestAcceptanceCriteriaImpliesCostOptimization:
-    def _judge(self, statement: str) -> Dict[str, OptimizationJudge]:
-        return {"j": OptimizationJudge(threshold=0.9, acceptance_statement=statement)}
-
-    def test_returns_false_when_judges_none(self):
-        assert _acceptance_criteria_implies_cost_optimization(None) is False
-
-    def test_returns_false_when_no_acceptance_statements(self):
-        judges = {"j": OptimizationJudge(threshold=0.9, judge_key="some-judge")}
-        assert _acceptance_criteria_implies_cost_optimization(judges) is False
-
-    def test_detects_cheap(self):
-        assert _acceptance_criteria_implies_cost_optimization(self._judge("Keep it cheap."))
-
-    def test_detects_cost(self):
-        assert _acceptance_criteria_implies_cost_optimization(self._judge("Reduce overall cost."))
-
-    def test_detects_costs_plural(self):
-        assert _acceptance_criteria_implies_cost_optimization(
-            self._judge("Keep the costs stable or lower them.")
-        )
-
-    def test_detects_budget(self):
-        assert _acceptance_criteria_implies_cost_optimization(self._judge("Stay within budget."))
-
-    def test_does_not_detect_token_to_avoid_false_positives(self):
-        assert not _acceptance_criteria_implies_cost_optimization(self._judge("Generate a valid authentication token."))
-
-    def test_detects_billing(self):
-        assert _acceptance_criteria_implies_cost_optimization(self._judge("Minimize billing."))
-
-    def test_detects_spend(self):
-        assert _acceptance_criteria_implies_cost_optimization(self._judge("Reduce spend on API calls."))
-
-    def test_case_insensitive(self):
-        assert _acceptance_criteria_implies_cost_optimization(self._judge("BUDGET FRIENDLY response"))
-
-    def test_no_match_on_unrelated_statement(self):
-        assert not _acceptance_criteria_implies_cost_optimization(
-            self._judge("Respond accurately and concisely.")
-        )
-
-    def test_multiple_judges_one_matches(self):
-        judges = {
-            "j1": OptimizationJudge(threshold=0.9, acceptance_statement="Be accurate."),
-            "j2": OptimizationJudge(threshold=0.9, acceptance_statement="Keep costs low."),
-        }
-        assert _acceptance_criteria_implies_cost_optimization(judges)
-
-
 # ---------------------------------------------------------------------------
 # _evaluate_cost
 # ---------------------------------------------------------------------------
@@ -5516,22 +5343,6 @@ def test_noop_when_all_values_none(self):
 class TestApplyDurationGate:
     """Unit tests for the _apply_duration_gate wrapper method."""
 
-    def _make_judges_with_latency(self):
-        return {
-            "latency": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The response must be faster and reduce latency.",
-            )
-        }
-
-    def _make_judges_no_latency(self):
-        return {
-            "accuracy": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The response must be accurate.",
-            )
-        }
-
     def _ctx(self, duration_ms=None, iteration=2):
         return OptimizationContext(
             scores={},
@@ -5545,12 +5356,12 @@ def _ctx(self, duration_ms=None, iteration=2):
 
     def setup_method(self):
         self.client = _make_client()
-        self.client._options = _make_options(judges=self._make_judges_with_latency())
+        self.client._options = _make_options(latency_optimization=True)
         self.client._initialize_class_members_from_config(_make_agent_config())
         self.client._baseline_duration_ms = 2000.0
 
     def test_no_entry_added_when_gate_not_active(self):
-        self.client._options = _make_options(judges=self._make_judges_no_latency())
+        self.client._options = _make_options(latency_optimization=None)
         ctx = self._ctx(1000)
         passed, updated = self.client._apply_duration_gate(True, ctx)
         assert passed is True
@@ -5621,22 +5432,6 @@ def test_no_threshold_field_on_judge_result(self):
 class TestApplyCostGate:
     """Unit tests for the _apply_cost_gate wrapper method."""
 
-    def _make_judges_with_cost(self):
-        return {
-            "cost": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The response must be cheaper and reduce cost.",
-            )
-        }
-
-    def _make_judges_no_cost(self):
-        return {
-            "accuracy": OptimizationJudge(
-                threshold=0.8,
-                acceptance_statement="The response must be accurate.",
-            )
-        }
-
     def _ctx(self, cost=None, iteration=2):
         return OptimizationContext(
             scores={},
@@ -5650,12 +5445,12 @@ def _ctx(self, cost=None, iteration=2):
 
     def setup_method(self):
         self.client = _make_client()
-        self.client._options = _make_options(judges=self._make_judges_with_cost())
+        self.client._options = _make_options(token_optimization=True)
         self.client._initialize_class_members_from_config(_make_agent_config())
         self.client._baseline_cost_usd = 0.010
 
     def test_no_entry_added_when_gate_not_active(self):
-        self.client._options = _make_options(judges=self._make_judges_no_cost())
+        self.client._options = _make_options(token_optimization=None)
         ctx = self._ctx(0.005)
         passed, updated = self.client._apply_cost_gate(True, ctx)
         assert passed is True
@@ -5711,12 +5506,8 @@ def test_existing_scores_are_preserved(self):
     def test_both_gates_active_compose_cleanly(self):
         """Duration + cost gate can both fire on the same context."""
         self.client._options = _make_options(
-            judges={
-                "perf": OptimizationJudge(
-                    threshold=0.8,
-                    acceptance_statement="The response must be faster, reduce latency, and cheaper cost.",
-                )
-            }
+            latency_optimization=True,
+            token_optimization=True,
         )
         self.client._baseline_duration_ms = 2000.0
         self.client._baseline_cost_usd = 0.010
@@ -6064,7 +5855,7 @@ def _set_pricing(self):
             {"id": "gpt-4o", "costPerInputToken": 0.000005, "costPerOutputToken": 0.000015}
         ]
 
-    async def test_cost_context_injected_into_instructions(self):
+    async def test_cost_context_injected_when_token_optimization_true(self):
         self._set_pricing()
         usage = TokenUsage(total=100, input=60, output=40)
         captured: list = []
@@ -6073,7 +5864,9 @@ async def _capture_judge_call(judge_key, judge_config, ctx, is_judge):
             captured.append(judge_config.instructions)
             return OptimizationResponse(output=JUDGE_PASS_RESPONSE)
 
-        self.client._options = _make_options(handle_judge_call=_capture_judge_call)
+        self.client._options = _make_options(
+            handle_judge_call=_capture_judge_call, token_optimization=True
+        )
         await self.client._evaluate_acceptance_judge(
             judge_key="cost-judge",
             optimization_judge=self._cost_judge(),
@@ -6088,7 +5881,8 @@ async def _capture_judge_call(judge_key, judge_config, ctx, is_judge):
         assert "60 input tokens" in instructions
         assert "40 output tokens" in instructions
 
-    async def test_cost_context_not_injected_for_non_cost_judge(self):
+    async def test_cost_context_not_injected_when_token_optimization_false(self):
+        self._set_pricing()
         usage = TokenUsage(total=100, input=60, output=40)
         captured: list = []
 
@@ -6096,14 +5890,12 @@ async def _capture_judge_call(judge_key, judge_config, ctx, is_judge):
             captured.append(judge_config.instructions)
             return OptimizationResponse(output=JUDGE_PASS_RESPONSE)
 
-        self.client._options = _make_options(handle_judge_call=_capture_judge_call)
-        non_cost_judge = OptimizationJudge(
-            threshold=0.9,
-            acceptance_statement="Be accurate and concise.",
+        self.client._options = _make_options(
+            handle_judge_call=_capture_judge_call, token_optimization=False
         )
         await self.client._evaluate_acceptance_judge(
-            judge_key="quality-judge",
-            optimization_judge=non_cost_judge,
+            judge_key="cost-judge",
+            optimization_judge=self._cost_judge(),
             completion_response="response",
             iteration=1,
             reasoning_history="",
@@ -6112,7 +5904,6 @@ async def _capture_judge_call(judge_key, judge_config, ctx, is_judge):
         )
         assert captured
         instructions = captured[0]
-        # The cost-specific augmentation phrase should not appear
         assert "cost/token-usage goal" not in instructions
 
     async def test_baseline_cost_shown_when_history_present(self):
@@ -6135,7 +5926,9 @@ async def _capture_judge_call(judge_key, judge_config, ctx, is_judge):
         )
         self.client._history = [baseline_ctx]
         self.client._baseline_cost_usd = 500.0
-        self.client._options = _make_options(handle_judge_call=_capture_judge_call)
+        self.client._options = _make_options(
+            handle_judge_call=_capture_judge_call, token_optimization=True
+        )
         await self.client._evaluate_acceptance_judge(
             judge_key="cost-judge",
             optimization_judge=self._cost_judge(),
@@ -6148,3 +5941,205 @@ async def _capture_judge_call(judge_key, judge_config, ctx, is_judge):
         assert captured
         instructions = captured[0]
         assert "baseline" in instructions.lower()
+
+
+# ---------------------------------------------------------------------------
+# variation_key in optimize_from_options
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestVariationKeyInOptimizeFromOptions:
+    def _make_client_with_key(self) -> OptimizationClient:
+        with patch.dict("os.environ", {"LAUNCHDARKLY_API_KEY": "test-api-key"}):
+            return OptimizationClient(_make_ldai_client())
+
+    def _make_client_without_key(self) -> OptimizationClient:
+        client = OptimizationClient(_make_ldai_client())
+        client._has_api_key = False
+        client._api_key = None
+        return client
+
+    def _make_ai_config_with_variations(self, *keys: str) -> dict:
+        return {
+            "variations": [
+                {"key": k, "instructions": f"Instructions for {k}.", "mode": "agent"}
+                for k in keys
+            ]
+        }
+
+    async def test_raises_when_variation_key_set_and_no_api_key(self):
+        client = self._make_client_without_key()
+        options = _make_options(variation_key="my-variation", project_key="my-project")
+
+        with pytest.raises(ValueError, match="LAUNCHDARKLY_API_KEY"):
+            await client.optimize_from_options("test-agent", options)
+
+    async def test_raises_when_variation_key_set_and_no_project_key(self):
+        client = self._make_client_with_key()
+        options = _make_options(variation_key="my-variation", project_key=None)
+
+        with pytest.raises(ValueError, match="project_key"):
+            await client.optimize_from_options("test-agent", options)
+
+    async def test_uses_variation_key_as_base_variation(self):
+        client = self._make_client_with_key()
+        ai_config = self._make_ai_config_with_variations("v1", "my-variation", "v3")
+
+        with patch("ldai_optimizer.client.LDApiClient") as mock_api_cls:
+            mock_api_instance = MagicMock()
+            mock_api_instance.get_ai_config.return_value = ai_config
+            mock_api_instance.get_model_configs.return_value = []
+            mock_api_cls.return_value = mock_api_instance
+
+            options = _make_options(
+                variation_key="my-variation",
+                project_key="my-project",
+            )
+            await client.optimize_from_options("test-agent", options)
+
+        mock_api_instance.get_ai_config.assert_called_with("my-project", "test-agent")
+        # Verify that the SDK default variation() was NOT called
+        client._ldClient._client.variation.assert_not_called()
+
+    async def test_raises_when_variation_key_not_found_in_config(self):
+        client = self._make_client_with_key()
+        ai_config = self._make_ai_config_with_variations("v1", "v2")
+
+        with patch("ldai_optimizer.client.LDApiClient") as mock_api_cls:
+            mock_api_instance = MagicMock()
+            mock_api_instance.get_ai_config.return_value = ai_config
+            mock_api_instance.get_model_configs.return_value = []
+            mock_api_cls.return_value = mock_api_instance
+
+            options = _make_options(
+                variation_key="nonexistent-key",
+                project_key="my-project",
+            )
+            with pytest.raises(ValueError, match="nonexistent-key"):
+                await client.optimize_from_options("test-agent", options)
+
+    async def test_no_api_call_when_variation_key_not_set(self):
+        client = self._make_client_without_key()
+        options = _make_options()  # no variation_key
+
+        # Should succeed and use the SDK default variation path
+        result = await client.optimize_from_options("test-agent", options)
+        client._ldClient._client.variation.assert_called()
+        assert result is not None
+
+
+# ---------------------------------------------------------------------------
+# variation_key in optimize_from_ground_truth_options
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestVariationKeyInOptimizeFromGroundTruthOptions:
+    def _make_client_with_key(self) -> OptimizationClient:
+        with patch.dict("os.environ", {"LAUNCHDARKLY_API_KEY": "test-api-key"}):
+            return OptimizationClient(_make_ldai_client())
+
+    def _make_client_without_key(self) -> OptimizationClient:
+        client = OptimizationClient(_make_ldai_client())
+        client._has_api_key = False
+        client._api_key = None
+        return client
+
+    async def test_raises_when_variation_key_set_and_no_api_key(self):
+        client = self._make_client_without_key()
+        options = _make_gt_options(variation_key="my-variation", project_key="my-project")
+
+        with pytest.raises(ValueError, match="LAUNCHDARKLY_API_KEY"):
+            await client.optimize_from_ground_truth_options("test-agent", options)
+
+    async def test_raises_when_variation_key_set_and_no_project_key(self):
+        client = self._make_client_with_key()
+        options = _make_gt_options(variation_key="my-variation", project_key=None)
+
+        with pytest.raises(ValueError, match="project_key"):
+            await client.optimize_from_ground_truth_options("test-agent", options)
+
+    async def test_raises_when_variation_key_not_found_in_config(self):
+        client = self._make_client_with_key()
+        ai_config = {"variations": [{"key": "v1"}, {"key": "v2"}]}
+
+        with patch("ldai_optimizer.client.LDApiClient") as mock_api_cls:
+            mock_api_instance = MagicMock()
+            mock_api_instance.get_ai_config.return_value = ai_config
+            mock_api_instance.get_model_configs.return_value = []
+            mock_api_cls.return_value = mock_api_instance
+
+            options = _make_gt_options(
+                variation_key="nonexistent-key",
+                project_key="my-project",
+            )
+            with pytest.raises(ValueError, match="nonexistent-key"):
+                await client.optimize_from_ground_truth_options("test-agent", options)
+
+
+# ---------------------------------------------------------------------------
+# latency_optimization / token_optimization boolean controls
+# ---------------------------------------------------------------------------
+
+
+class TestLatencyCostOptimizationBooleans:
+    """Verify that latency_optimization and token_optimization booleans directly
+    control gate and prompt behaviour, replacing the old regex approach."""
+
+    def setup_method(self):
+        self.client = _make_client()
+        self.client._initialize_class_members_from_config(_make_agent_config())
+        self.client._baseline_duration_ms = 1000.0
+        self.client._baseline_cost_usd = 0.010
+
+    def _ctx(self, duration_ms=500.0, cost=0.005, iteration=2):
+        return OptimizationContext(
+            scores={},
+            completion_response="response",
+            current_instructions="Do X.",
+            current_parameters={},
+            current_variables={},
+            iteration=iteration,
+            duration_ms=duration_ms,
+            estimated_cost_usd=cost,
+        )
+
+    def test_latency_gate_active_when_true(self):
+        self.client._options = _make_options(latency_optimization=True)
+        _, updated = self.client._apply_duration_gate(True, self._ctx(duration_ms=500.0))
+        assert "_latency_gate" in updated.scores
+
+    def test_latency_gate_inactive_when_none(self):
+        self.client._options = _make_options(latency_optimization=None)
+        _, updated = self.client._apply_duration_gate(True, self._ctx(duration_ms=500.0))
+        assert "_latency_gate" not in updated.scores
+
+    def test_latency_gate_inactive_when_false(self):
+        self.client._options = _make_options(latency_optimization=False)
+        _, updated = self.client._apply_duration_gate(True, self._ctx(duration_ms=500.0))
+        assert "_latency_gate" not in updated.scores
+
+    def test_cost_gate_active_when_true(self):
+        self.client._options = _make_options(token_optimization=True)
+        _, updated = self.client._apply_cost_gate(True, self._ctx(cost=0.005))
+        assert "_cost_gate" in updated.scores
+
+    def test_cost_gate_inactive_when_none(self):
+        self.client._options = _make_options(token_optimization=None)
+        _, updated = self.client._apply_cost_gate(True, self._ctx(cost=0.005))
+        assert "_cost_gate" not in updated.scores
+
+    def test_cost_gate_inactive_when_false(self):
+        self.client._options = _make_options(token_optimization=False)
+        _, updated = self.client._apply_cost_gate(True, self._ctx(cost=0.005))
+        assert "_cost_gate" not in updated.scores
+
+    def test_both_gates_independent(self):
+        """latency_optimization=True, token_optimization=False → only latency gate fires."""
+        self.client._options = _make_options(latency_optimization=True, token_optimization=False)
+        ctx = self._ctx()
+        _, ctx = self.client._apply_duration_gate(True, ctx)
+        _, ctx = self.client._apply_cost_gate(True, ctx)
+        assert "_latency_gate" in ctx.scores
+        assert "_cost_gate" not in ctx.scores