launchdarkly · andrewklatzke · Jun 1, 2026 · Jun 1, 2026 · Jun 2, 2026
@@ -20,6 +20,7 @@
 import logging
 import os
 import random
+import re
 import time
 import uuid
 from typing import Any, Dict, List, Literal, Optional, Tuple, Union
@@ -49,8 +50,6 @@
     LDApiClient,
 )
 from ldai_optimizer.prompts import (
-    _acceptance_criteria_implies_cost_optimization,
-    _acceptance_criteria_implies_duration_optimization,
     build_message_history_text,
     build_new_variation_prompt,
     build_reasoning_history,
@@ -71,6 +70,15 @@
 logger.addFilter(RedactionFilter())
 
 
+def _interpolate(template: str, variables: Dict[str, Any]) -> str:
+    """Replace {{key}} tokens with values from variables; unresolved tokens become empty string."""
+    return re.sub(
+        r"\{\{(\w+)\}\}",
+        lambda m: str(variables.get(m.group(1), "")),
+        template,
+    )
+
+
 def _find_model_config(
     model_name: str, configs: List[Dict[str, Any]]
 ) -> Optional[Dict[str, Any]]:
@@ -404,18 +412,65 @@ def _judge_config(
         variables: Dict[str, Any],
     ) -> AIJudgeConfig:
         """
-        Fetch a judge configuration from the LaunchDarkly client.
+        Fetch a judge configuration by evaluating the flag variation directly.
 
-        Thin wrapper around LDAIClient.judge_config so callers do not need a
-        direct reference to the client.
+        Bypasses LDAIClient.judge_config to avoid the reserved-variable warnings
+        for 'message_history' and 'response_to_evaluate'. Those variables are
+        interpolated here with their actual values instead of being neutralised
+        by the SDK. If the template contains only a system message, a user turn
+        is synthesised from the provided message_history and response_to_evaluate
+        so that _evaluate_config_judge always receives a complete conversation.
 
         :param judge_key: The key for the judge configuration in LaunchDarkly
         :param context: The evaluation context
-        :param default: Fallback config when the flag is disabled or unreachable
-        :param variables: Template variables for instruction interpolation
+        :param default: Unused; kept for signature compatibility
+        :param variables: Template variables including message_history and response_to_evaluate
         :return: The resolved AIJudgeConfig
         """
-        return self._ldClient.judge_config(judge_key, context, default, variables)
+        variation: Dict[str, Any] = self._ldClient._client.variation(judge_key, context, {})
+        enabled: bool = bool(variation.get("_ldMeta", {}).get("enabled", False))
+
+        all_variables: Dict[str, Any] = {"ldctx": context.to_dict(), **variables}
+
+        messages: List[LDMessage] = []
+        raw_messages = variation.get("messages")
+        if isinstance(raw_messages, list) and all(isinstance(m, dict) for m in raw_messages):
+            messages = [
+                LDMessage(
+                    role=m["role"],
+                    content=_interpolate(m.get("content", ""), all_variables),
+                )
+                for m in raw_messages
+            ]
+
+        # New-style templates only have a system message. Auto-generate a user
+        # turn so _evaluate_config_judge always has a complete conversation to split.
+        if not any(m.role == "user" for m in messages):
+            message_history = variables.get("message_history", "")
+            response_to_evaluate = variables.get("response_to_evaluate", "")
+            parts: List[str] = []
+            if message_history:
+                parts.append(str(message_history))
+            parts.append(f"Here is the response to evaluate: {response_to_evaluate}")
+            messages.append(LDMessage(role="user", content="\n\n".join(parts)))
+
+        model: Optional[ModelConfig] = None
+        raw_model = variation.get("model")
+        if isinstance(raw_model, dict):
+            model = ModelConfig(
+                name=raw_model.get("name", ""),
+                parameters=raw_model.get("parameters"),
+                custom=raw_model.get("custom"),
+            )
+
+        return AIJudgeConfig(
+            key=judge_key,
+            enabled=enabled,
+            create_tracker=lambda: None,
+            model=model,
+            messages=messages,
+            evaluation_metric_key=variation.get("evaluationMetricKey"),
+        )
 
     def _serialize_scores(
         self, judge_results: Dict[str, JudgeResult]
@@ -850,9 +905,7 @@ async def _evaluate_acceptance_judge(
 
         if (
             agent_duration_ms is not None
-            and _acceptance_criteria_implies_duration_optimization(
-                {judge_key: optimization_judge}
-            )
+            and bool(self._options.latency_optimization)
         ):
             baseline_ms = self._baseline_duration_ms
             instructions += (
@@ -875,7 +928,7 @@ async def _evaluate_acceptance_judge(
                 "These suggestions will be used directly to generate the next variation."
             )
 
-        if _acceptance_criteria_implies_cost_optimization({judge_key: optimization_judge}):
+        if bool(self._options.token_optimization):
             current_cost = estimate_cost(
                 agent_usage,
                 _find_model_config(self._current_model or "", self._model_configs),
@@ -975,7 +1028,12 @@ async def _evaluate_acceptance_judge(
         return dataclasses.replace(judge_result, duration_ms=judge_duration_ms, usage=judge_response.usage)
 
     async def _get_agent_config(
-        self, agent_key: str, context: Context
+        self,
+        agent_key: str,
+        context: Context,
+        variation_key: Optional[str] = None,
+        project_key: Optional[str] = None,
+        base_url: Optional[str] = None,
     ) -> AIAgentConfig:
         """
         Fetch the agent configuration, replacing the instructions with the raw variation
@@ -985,16 +1043,39 @@ async def _get_agent_config(
         (including the tracker). We then call variation() separately to retrieve the
         unrendered instruction template and swap it in, keeping everything else intact.
 
+        When ``variation_key`` is provided the specific variation is fetched via the
+        LaunchDarkly REST API instead of using the SDK's default flag evaluation.
+
         :param agent_key: The key for the agent to get the configuration for
         :param context: The evaluation context
+        :param variation_key: Optional specific variation key to use as the base
+        :param project_key: LaunchDarkly project key; required when variation_key is set
+        :param base_url: Optional API base URL override
         :return: AIAgentConfig with raw {{placeholder}} instruction templates intact
         """
         try:
             agent_config = self._ldClient.agent_config(agent_key, context)
 
-            # variation() returns the raw JSON before chevron.render(), so instructions
-            # still contain {{placeholder}} tokens rather than empty strings.
-            raw_variation = self._ldClient._client.variation(agent_key, context, {})
+            if variation_key:
+                assert self._api_key is not None
+                api_client = LDApiClient(
+                    self._api_key,
+                    **({"base_url": base_url} if base_url else {}),
+                )
+                ai_config = api_client.get_ai_config(project_key, agent_key)
+                match = next(
+                    (v for v in (ai_config or {}).get("variations", []) if v.get("key") == variation_key),
+                    None,
+                )
+                if match is None:
+                    raise ValueError(
+                        f"variation_key '{variation_key}' not found in agent config '{agent_key}'"
+                    )
+                raw_variation = match
+            else:
+                # variation() returns the raw JSON before chevron.render(), so instructions
+                # still contain {{placeholder}} tokens rather than empty strings.
+                raw_variation = self._ldClient._client.variation(agent_key, context, {})
             raw_instructions = raw_variation.get(
                 "instructions", agent_config.instructions
             )
@@ -1030,20 +1111,20 @@ def _fetch_model_configs(
         self,
         project_key: Optional[str],
         base_url: Optional[str],
-        judges: Optional[Dict[str, "OptimizationJudge"]],
+        token_optimization: Optional[bool],
     ) -> None:
         """Populate ``_model_configs`` from the LD API when credentials are available.
 
         When an API key and project key are both present, fetches the model pricing
         catalogue so that ``estimate_cost`` can produce USD figures and the cost gate
         can make meaningful comparisons.  If either is absent, ``_model_configs`` is
-        reset to an empty list and a warning is emitted when cost judges are in use —
-        cost optimization will silently pass rather than blocking the run.
+        reset to an empty list and a warning is emitted when token_optimization is
+        enabled — cost data will be unavailable and the cost gate will pass unconditionally.
 
         :param project_key: LaunchDarkly project key, or None if not provided.
         :param base_url: Optional API base URL override.
-        :param judges: Judge map from the caller's options, used only to decide
-            whether a cost-related warning is appropriate.
+        :param token_optimization: Whether token/cost optimization is enabled; used only to
+            decide whether a cost-related warning is appropriate.
         """
         self._model_configs = []
         if self._has_api_key and project_key:
@@ -1056,9 +1137,9 @@ def _fetch_model_configs(
                 self._model_configs = api_client.get_model_configs(project_key)
             except Exception as exc:
                 logger.debug("Could not pre-fetch model configs: %s", exc)
-        elif _acceptance_criteria_implies_cost_optimization(judges or {}):
+        elif token_optimization:
             logger.warning(
-                "Cost optimization requires LAUNCHDARKLY_API_KEY and project_key to be set; "
+                "Token optimization requires LAUNCHDARKLY_API_KEY and project_key to be set; "
                 "cost data will not be available and the cost gate will pass unconditionally"
             )
 
@@ -1080,10 +1161,24 @@ async def optimize_from_options(
                 raise ValueError(
                     "auto_commit requires project_key to be set on OptimizationOptions"
                 )
+        if options.variation_key:
+            if not self._has_api_key:
+                raise ValueError(
+                    "variation_key requires LAUNCHDARKLY_API_KEY to be set"
+                )
+            if not options.project_key:
+                raise ValueError(
+                    "variation_key requires project_key to be set on OptimizationOptions"
+                )
         self._agent_key = agent_key
-        self._fetch_model_configs(options.project_key, options.base_url, options.judges)
+        self._fetch_model_configs(options.project_key, options.base_url, options.token_optimization)
         context = random.choice(options.context_choices)
-        agent_config = await self._get_agent_config(agent_key, context)
+        agent_config = await self._get_agent_config(
+            agent_key, context,
+            variation_key=options.variation_key,
+            project_key=options.project_key,
+            base_url=options.base_url,
+        )
         result = await self._run_optimization(agent_config, options)
         if options.auto_commit and self._last_run_succeeded and self._last_succeeded_context:
             self._commit_variation(
@@ -1119,10 +1214,24 @@ async def optimize_from_ground_truth_options(
                 raise ValueError(
                     "auto_commit requires project_key to be set on GroundTruthOptimizationOptions"
                 )
+        if options.variation_key:
+            if not self._has_api_key:
+                raise ValueError(
+                    "variation_key requires LAUNCHDARKLY_API_KEY to be set"
+                )
+            if not options.project_key:
+                raise ValueError(
+                    "variation_key requires project_key to be set on GroundTruthOptimizationOptions"
+                )
         self._agent_key = agent_key
-        self._fetch_model_configs(options.project_key, options.base_url, options.judges)
+        self._fetch_model_configs(options.project_key, options.base_url, options.token_optimization)
         context = random.choice(options.context_choices)
-        agent_config = await self._get_agent_config(agent_key, context)
+        agent_config = await self._get_agent_config(
+            agent_key, context,
+            variation_key=options.variation_key,
+            project_key=options.project_key,
+            base_url=options.base_url,
+        )
         result = await self._run_ground_truth_optimization(agent_config, options)
         if options.auto_commit and self._last_run_succeeded and self._last_succeeded_context:
             self._commit_variation(
@@ -1162,6 +1271,8 @@ async def _run_ground_truth_optimization(
             on_failing_result=gt_options.on_failing_result,
             on_status_update=gt_options.on_status_update,
             token_limit=gt_options.token_limit,
+            latency_optimization=gt_options.latency_optimization,
+            token_optimization=gt_options.token_optimization,
         )
         self._options = bridge
         self._agent_config = agent_config
@@ -1579,12 +1690,8 @@ async def _generate_new_variation(
         )
         self._safe_status_update("generating variation", status_ctx, iteration)
 
-        optimize_for_duration = _acceptance_criteria_implies_duration_optimization(
-            self._options.judges
-        )
-        optimize_for_cost = _acceptance_criteria_implies_cost_optimization(
-            self._options.judges
-        )
+        optimize_for_duration = bool(self._options.latency_optimization)
+        optimize_for_cost = bool(self._options.token_optimization)
         quality_already_passing = self._all_judges_passing()
         instructions = build_new_variation_prompt(
             self._history,
@@ -1708,7 +1815,7 @@ async def optimize_from_config(
         else:
             result = await self._run_optimization(agent_config, optimization_options)
 
-        if options.auto_commit and self._last_run_succeeded and self._last_succeeded_context:
+        if optimization_options.auto_commit and options.auto_commit and self._last_run_succeeded and self._last_succeeded_context:
             created_key = self._commit_variation(
                 self._last_succeeded_context,
                 project_key=options.project_key,
@@ -1989,6 +2096,9 @@ def _persist_and_forward(
                 on_failing_result=options.on_failing_result,
                 on_status_update=_persist_and_forward,
                 token_limit=config.get("tokenLimit"),
+                latency_optimization=config.get("latencyOptimization"),
+                token_optimization=config.get("tokenOptimization"),
+                auto_commit=config.get("autoCommit", True),
             )
 
         variable_choices: List[Dict[str, Any]] = config["variableChoices"] or [{}]
@@ -2009,6 +2119,9 @@ def _persist_and_forward(
             on_failing_result=options.on_failing_result,
             on_status_update=_persist_and_forward,
             token_limit=config.get("tokenLimit"),
+            latency_optimization=config.get("latencyOptimization"),
+            token_optimization=config.get("tokenOptimization"),
+            auto_commit=config.get("autoCommit", True),
         )
 
     async def _execute_agent_turn(
@@ -2269,7 +2382,7 @@ def _apply_duration_gate(
         :param ctx: Current optimization context.
         :return: (passed, updated_ctx) where passed reflects gate outcome.
         """
-        if not _acceptance_criteria_implies_duration_optimization(self._options.judges):
+        if not bool(self._options.latency_optimization):
             return passed_so_far, ctx
         passed = self._evaluate_duration(ctx)
         if passed:
@@ -2323,7 +2436,7 @@ def _apply_cost_gate(
         :param ctx: Current optimization context.
         :return: (passed, updated_ctx) where passed reflects gate outcome.
         """
-        if not _acceptance_criteria_implies_cost_optimization(self._options.judges):
+        if not bool(self._options.token_optimization):
             return passed_so_far, ctx
         passed = self._evaluate_cost(ctx)
         if passed:

@@ -348,9 +348,14 @@ class OptimizationOptions:
     context_choices: List[Context] = field(
         default_factory=lambda: [Context.builder("anonymous").anonymous(True).build()]
     )
+    # Base variation - Optional
+    variation_key: Optional[str] = None  # use this specific variation as the base; defaults to the flag's default variation; requires API key + project_key
+    # Optimization controls - Optional; when None the corresponding gate/prompt is disabled
+    latency_optimization: Optional[bool] = None
+    token_optimization: Optional[bool] = None
     # Auto-commit - Optional
     auto_commit: bool = False
-    project_key: Optional[str] = None  # required when auto_commit=True
+    project_key: Optional[str] = None  # required when auto_commit=True or variation_key is set
     output_key: Optional[str] = None   # variation key/name; auto-generated if omitted
     base_url: Optional[str] = None  # override to target a non-default LD instance
     on_passing_result: Optional[Callable[[OptimizationContext], None]] = None
@@ -440,9 +445,14 @@ class GroundTruthOptimizationOptions:
     context_choices: List[Context] = field(
         default_factory=lambda: [Context.builder("anonymous").anonymous(True).build()]
     )
+    # Base variation - Optional
+    variation_key: Optional[str] = None  # use this specific variation as the base; defaults to the flag's default variation; requires API key + project_key
+    # Optimization controls - Optional; when None the corresponding gate/prompt is disabled
+    latency_optimization: Optional[bool] = None
+    token_optimization: Optional[bool] = None
     # Auto-commit - Optional
     auto_commit: bool = False
-    project_key: Optional[str] = None  # required when auto_commit=True
+    project_key: Optional[str] = None  # required when auto_commit=True or variation_key is set
     output_key: Optional[str] = None   # variation key/name; auto-generated if omitted
     base_url: Optional[str] = None  # override to target a non-default LD instance
     token_limit: Optional[int] = None  # stop the run when total token usage reaches this value

@@ -90,6 +90,9 @@ class AgentOptimizationConfig(_AgentOptimizationConfigRequired, total=False):
     groundTruthResponses: List[str]
     metricKey: str
     tokenLimit: int
+    latencyOptimization: bool
+    tokenOptimization: bool
+    autoCommit: bool
 
 
 # ---------------------------------------------------------------------------