feat: Enhance tokenization and add logprobs support in Tinker

bradhilton · bradhilton · commit eb6a50df45d1 · 2026-03-25T17:26:23.000-06:00
- Introduced `choice_offsets` and `extra_logprobs` to `TokenizedResult` for improved tokenization handling.
- Updated `tokenize_trajectory` function to process `Choice` instances and capture log probabilities.
- Added new `TinkerAsyncMessagesAndChoices` class to handle messages and choices with log probabilities in the Tinker server.
- Implemented a new API endpoint `/v1/messages_and_choices/with_logprobs` to retrieve messages and choices along with their log probabilities.
- Refactored server logic to integrate log probability handling for enhanced model interactions.
diff --git a/src/art/preprocessing/tokenize.py b/src/art/preprocessing/tokenize.py
@@ -6,6 +6,7 @@
 import random
 from typing import Any, Generator, cast
 
+from openai.types.chat.chat_completion import Choice
 from PIL import Image
 import torch
 from transformers.image_processing_utils import BaseImageProcessor
@@ -41,6 +42,8 @@ class TokenizedResult:
     pixel_values: torch.Tensor | None
     image_grid_thw: torch.Tensor | None
     trajectory: Trajectory
+    choice_offsets: list[int]
+    extra_logprobs: dict[str, list[float]]
     _tokenizer: "PreTrainedTokenizerBase" = field(repr=False, compare=False)
     weight: float = 0.0
     prompt_id: int = 0
@@ -63,6 +66,11 @@ def without_prompt(self) -> "TokenizedResult":
             pixel_values=None,
             image_grid_thw=None,
             trajectory=self.trajectory,
+            choice_offsets=self.choice_offsets,
+            extra_logprobs={
+                key: values[self.prompt_length :]
+                for key, values in self.extra_logprobs.items()
+            },
             _tokenizer=self._tokenizer,
             weight=self.weight,
             prompt_id=self.prompt_id,
@@ -207,8 +215,8 @@ def tokenize_trajectory(
             and allow_training_without_logprobs
         ):
             last_assistant_index = i
-        elif not isinstance(message, dict) and (
-            message.logprobs or allow_training_without_logprobs  # ty:ignore[possibly-missing-attribute]
+        elif isinstance(message, Choice) and (
+            message.logprobs or allow_training_without_logprobs
         ):
             last_assistant_index = i
     # If there are no trainable assistant messages, return None
@@ -265,6 +273,8 @@ def tokenize_trajectory(
     )
     assistant_mask: list[int] = [0] * len(token_ids)
     logprobs = [float("nan")] * len(token_ids)
+    choice_offsets, choice_token_logprobs = [], []
+
     for message in messages_and_choices:
         if isinstance(message, dict):
             if message["role"] != "assistant":
@@ -304,12 +314,14 @@ def tokenize_trajectory(
             if not choice.logprobs:  # ty:ignore[possibly-missing-attribute]
                 continue
             token_logprobs = choice.logprobs.content or choice.logprobs.refusal or []  # ty:ignore[possibly-missing-attribute]
-            if (
+            if token_logprobs and (
                 bytes(token_logprobs[0].bytes or []).decode("utf-8")
                 == "<think>"
                 == tokenizer.decode(token_ids[start - 4])
             ):
                 start -= 4
+            choice_offsets.append(start)
+            choice_token_logprobs.append(token_logprobs)
             try:
                 token_ids[start:end] = (
                     int(token_logprob.token.split(":")[1])
@@ -336,6 +348,18 @@ def tokenize_trajectory(
                 token_ids.pop(start + len(token_logprobs))
                 logprobs.pop(start + len(token_logprobs))
                 assistant_mask.pop(start + len(token_logprobs))
+    extra_logprobs: dict[str, list[float]] = {}
+    for start, token_logprobs in zip(choice_offsets, choice_token_logprobs):
+        for i, token_logprob in enumerate(token_logprobs):
+            token_extra_logprobs = (token_logprob.model_extra or {}).get(
+                "extra_logprobs"
+            )
+            if not isinstance(token_extra_logprobs, dict):
+                continue
+            for key, value in token_extra_logprobs.items():
+                extra_logprobs.setdefault(key, [float("nan")] * len(token_ids))[
+                    start + i
+                ] = float("nan") if value is None else float(value)
     if image_processor:
         images: list[Image.Image] = []
         for message in messages_and_choices:
@@ -369,6 +393,8 @@ def tokenize_trajectory(
                 token_ids[start:end] = [image_token_id] * num_image_tokens
                 logprobs[start:end] = [float("nan")] * num_image_tokens
                 assistant_mask[start:end] = [0] * num_image_tokens
+                for values in extra_logprobs.values():
+                    values[start:end] = [float("nan")] * num_image_tokens
             pixel_values = result["pixel_values"]
             image_grid_thw = result["image_grid_thw"]
         else:
@@ -387,6 +413,8 @@ def tokenize_trajectory(
         pixel_values=pixel_values,
         image_grid_thw=image_grid_thw,
         trajectory=trajectory,
+        choice_offsets=choice_offsets,
+        extra_logprobs=extra_logprobs,
         _tokenizer=tokenizer,
     )
 
diff --git a/src/art/tinker/client.py b/src/art/tinker/client.py
@@ -0,0 +1,159 @@
+from __future__ import annotations
+
+from functools import cached_property
+from typing import Any, Iterable, Mapping, cast
+
+import httpx
+from openai import AsyncOpenAI, BaseModel, _legacy_response
+from openai._base_client import make_request_options
+from openai._resource import AsyncAPIResource
+from openai._response import async_to_streamed_response_wrapper
+from openai._types import Body, Headers, NotGiven, Query, not_given
+from openai.resources.models import AsyncModels
+from openai.types import Model
+from openai.types.chat.chat_completion import Choice
+from openai.types.completion_usage import CompletionUsage
+
+from art.types import MessageOrChoice, MessagesAndChoices, Tools
+
+ParsedMessageOrChoice = Choice | dict[str, Any]
+ParsedMessagesAndChoices = list[ParsedMessageOrChoice]
+
+
+def _message_or_choice_to_dict(message_or_choice: MessageOrChoice) -> dict[str, Any]:
+    if isinstance(message_or_choice, dict):
+        return cast(dict[str, Any], message_or_choice)
+    return cast(dict[str, Any], message_or_choice.to_dict())
+
+
+class MessagesAndChoicesWithLogprobs(BaseModel):
+    messages_and_choices: ParsedMessagesAndChoices
+    usages: list[CompletionUsage]
+
+
+class TinkerAsyncModels(AsyncModels):
+    @cached_property
+    def with_raw_response(self) -> "TinkerAsyncModelsWithRawResponse":
+        return TinkerAsyncModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> "TinkerAsyncModelsWithStreamingResponse":
+        return TinkerAsyncModelsWithStreamingResponse(self)
+
+    async def put(
+        self,
+        model: str,
+        *,
+        target: str,
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Model:
+        if not model:
+            raise ValueError(
+                f"Expected a non-empty value for `model` but received {model!r}"
+            )
+
+        return await self._put(
+            f"/models/{model}",
+            body={"target": target},
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Model,
+        )
+
+
+class TinkerAsyncModelsWithRawResponse:
+    def __init__(self, models: TinkerAsyncModels) -> None:
+        self._models = models
+
+        self.put = _legacy_response.async_to_raw_response_wrapper(models.put)
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(models.retrieve)
+        self.list = _legacy_response.async_to_raw_response_wrapper(models.list)
+        self.delete = _legacy_response.async_to_raw_response_wrapper(models.delete)
+
+
+class TinkerAsyncModelsWithStreamingResponse:
+    def __init__(self, models: TinkerAsyncModels) -> None:
+        self._models = models
+
+        self.put = async_to_streamed_response_wrapper(models.put)
+        self.retrieve = async_to_streamed_response_wrapper(models.retrieve)
+        self.list = async_to_streamed_response_wrapper(models.list)
+        self.delete = async_to_streamed_response_wrapper(models.delete)
+
+
+class TinkerAsyncMessagesAndChoices(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> "TinkerAsyncMessagesAndChoicesWithRawResponse":
+        return TinkerAsyncMessagesAndChoicesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(
+        self,
+    ) -> "TinkerAsyncMessagesAndChoicesWithStreamingResponse":
+        return TinkerAsyncMessagesAndChoicesWithStreamingResponse(self)
+
+    async def with_logprobs(
+        self,
+        messages_and_choices: MessagesAndChoices,
+        *,
+        models: Iterable[str],
+        model_aliases: Mapping[str, str] | None = None,
+        tools: Tools | None,
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> MessagesAndChoicesWithLogprobs:
+        return await self._post(
+            "/messages_and_choices/with_logprobs",
+            body={
+                "messages_and_choices": [
+                    _message_or_choice_to_dict(item) for item in messages_and_choices
+                ],
+                "models": list(models),
+                "model_aliases": dict(model_aliases or {}),
+                "tools": tools,
+            },
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=MessagesAndChoicesWithLogprobs,
+        )
+
+
+class TinkerAsyncMessagesAndChoicesWithRawResponse:
+    def __init__(self, messages_and_choices: TinkerAsyncMessagesAndChoices) -> None:
+        self._messages_and_choices = messages_and_choices
+
+        self.with_logprobs = _legacy_response.async_to_raw_response_wrapper(
+            messages_and_choices.with_logprobs
+        )
+
+
+class TinkerAsyncMessagesAndChoicesWithStreamingResponse:
+    def __init__(self, messages_and_choices: TinkerAsyncMessagesAndChoices) -> None:
+        self._messages_and_choices = messages_and_choices
+
+        self.with_logprobs = async_to_streamed_response_wrapper(
+            messages_and_choices.with_logprobs
+        )
+
+
+class TinkerAsyncOpenAI(AsyncOpenAI):
+    @cached_property
+    def models(self) -> TinkerAsyncModels:
+        return TinkerAsyncModels(self)
+
+    @cached_property
+    def messages_and_choices(self) -> TinkerAsyncMessagesAndChoices:
+        return TinkerAsyncMessagesAndChoices(self)
diff --git a/src/art/tinker/server.py b/src/art/tinker/server.py