feat: Support PipelineTrainer with dedicated LocalBackend

vivekkalyan · vivekkalyan · commit 6a672821f77e · 2026-03-18T13:49:30.000-07:00
diff --git a/src/art/local/backend.py b/src/art/local/backend.py
@@ -159,19 +159,59 @@ def _allocated_gpu_count(self, model: Model) -> int:
     def __enter__(self) -> Self:
         return self
 
+    async def __aenter__(self) -> Self:
+        return self
+
     def __exit__(
         self,
         exc_type: type[BaseException] | None,
         exc: BaseException | None,
         tb: TracebackType | None,
     ) -> None:
+        try:
+            asyncio.get_running_loop()
+        except RuntimeError:
+            running_loop = False
+        else:
+            running_loop = True
+
+        if running_loop or any(
+            getattr(service, "aclose", None) is not None
+            for service in self._services.values()
+        ):
+            warnings.warn(
+                "LocalBackend used as a sync context manager. Cleanup uses the "
+                "best-effort sync shutdown path and cannot await service "
+                "teardown safely here; use `async with LocalBackend(...)` or "
+                "`await backend.close()` instead.",
+                RuntimeWarning,
+                stacklevel=2,
+            )
         self._close()
 
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        tb: TracebackType | None,
+    ) -> None:
+        await self.close()
+
     async def close(self) -> None:
         """
         If running vLLM in a separate process, this will kill that process and close the communication threads.
         """
-        self._close()
+        for _, service in self._services.items():
+            # Keep this logic aligned with _close(), but avoid double-closing
+            # services that expose an awaited aclose() path.
+            aclose = getattr(service, "aclose", None)
+            if aclose is not None:
+                await aclose()
+            else:
+                close = getattr(service, "close", None)
+                if close is not None:
+                    close()
+            close_proxy(service)
 
     def _close(self) -> None:
         for _, service in self._services.items():
@@ -219,21 +259,31 @@ def _model_inference_name(self, model: Model, step: int | None = None) -> str:
                   If None, returns name for latest checkpoint (step 0 initially).
         """
 
+        def _served_step() -> int | None:
+            if not isinstance(model, TrainableModel):
+                return None
+            if model.name not in self._services:
+                return None
+            from ..dev.validate import is_dedicated_mode
+
+            if not is_dedicated_mode(
+                model._internal_config or dev.InternalModelConfig()
+            ):
+                return None
+            loaded_step = getattr(self._services[model.name], "_latest_step", None)
+            return loaded_step if isinstance(loaded_step, int) else None
+
         # For LocalBackend, vLLM always serves LoRA adapters with @step suffix
         # Default to step 0 when not specified (the initial checkpoint created at registration)
         if step is not None:
             actual_step = step
-        elif model.name in self._services and self._in_process:
-            # In dedicated mode the service tracks which adapter vLLM has
-            # actually loaded.  Reading the filesystem would race: the
-            # checkpoint directory appears before the HTTP reload completes.
-            svc = self._services[model.name]
-            loaded_step = getattr(svc, "_latest_step", None)
-            actual_step = (
-                loaded_step if loaded_step is not None else self.__get_step(model)
-            )
         else:
-            actual_step = self.__get_step(model)
+            # In dedicated mode the service tracks which adapter vLLM has
+            # actually loaded. Reading the filesystem would race: the checkpoint
+            # directory appears before the HTTP reload completes.
+            actual_step = _served_step()
+            if actual_step is None:
+                actual_step = self.__get_step(model)
         name = f"{model.name}@{actual_step}"
         logger.debug(
             f"[BACKEND] _model_inference_name: step_arg={step} "
diff --git a/src/art/pipeline_trainer/trainer.py b/src/art/pipeline_trainer/trainer.py
@@ -154,6 +154,7 @@ def __init__(
             total_scenarios=total_scenarios,
             num_workers=num_rollout_workers,
         )
+        self._validate_backend_support()
 
     async def train(self, *, handle_signals: bool = True) -> None:
         """Run the training pipeline over the configured scenario iterator."""
@@ -277,6 +278,72 @@ async def _notify_policy() -> None:
             except asyncio.QueueFull:
                 loop.create_task(self._output_queue.put(None))
 
+    def _is_local_backend(self) -> bool:
+        from art.local.backend import LocalBackend
+
+        return isinstance(self.backend, LocalBackend)
+
+    def _local_backend_is_dedicated(self) -> bool:
+        if not isinstance(self.model, art.TrainableModel):
+            return False
+        from art.dev.validate import is_dedicated_mode
+
+        return is_dedicated_mode(
+            self.model._internal_config or art.dev.InternalModelConfig()
+        )
+
+    def _validate_backend_support(self) -> None:
+        if not self._is_local_backend():
+            return
+        if self._local_backend_is_dedicated():
+            self._validate_local_backend_train_config()
+            return
+        raise ValueError(
+            "PipelineTrainer only supports LocalBackend in dedicated mode. "
+            "Shared LocalBackend pauses inference during training and is not "
+            "a supported async PipelineTrainer path. Set both "
+            "trainer_gpu_ids and inference_gpu_ids on the TrainableModel "
+            "_internal_config to use LocalBackend with PipelineTrainer."
+        )
+
+    def _validate_local_backend_train_config(self) -> None:
+        if self.loss_fn not in {"cispo", "ppo"}:
+            raise ValueError(
+                "PipelineTrainer + LocalBackend(dedicated) only supports "
+                "loss_fn='cispo' or loss_fn='ppo'."
+            )
+        if self.loss_fn_config is not None:
+            raise ValueError(
+                "PipelineTrainer + LocalBackend(dedicated) requires "
+                "loss_fn_config=None."
+            )
+        if not self.normalize_advantages:
+            raise ValueError(
+                "PipelineTrainer + LocalBackend(dedicated) requires "
+                "normalize_advantages=True."
+            )
+        if self.adam_params is not None:
+            raise ValueError(
+                "PipelineTrainer + LocalBackend(dedicated) requires adam_params=None."
+            )
+
+    def _backend_train_kwargs(self, *, save_checkpoint: bool) -> dict[str, Any]:
+        if not self._is_local_backend():
+            return {
+                "learning_rate": self.learning_rate,
+                "loss_fn": self.loss_fn,
+                "loss_fn_config": self.loss_fn_config,
+                "normalize_advantages": self.normalize_advantages,
+                "save_checkpoint": save_checkpoint,
+                "adam_params": self.adam_params,
+            }
+
+        return {
+            "learning_rate": self.learning_rate,
+            "ppo": self.loss_fn == "ppo",
+            "save_checkpoint": save_checkpoint,
+        }
+
     async def _skip_scenarios(
         self, scenarios: AsyncIterator[ScenarioT], count: int
     ) -> int:
@@ -412,18 +479,14 @@ async def _training_stage(self) -> None:
 
             self._status.note_training_start(len(batch))
             train_call_start = time.monotonic()
+            train_kwargs = self._backend_train_kwargs(save_checkpoint=should_checkpoint)
             if os.getenv("ART_TRAIN_STEP_LOG"):
                 print(f"[train] step {expected_step} starting (batch={len(batch)})")
             try:
                 result = await self.backend.train(
                     self.model,
                     batch,
-                    learning_rate=self.learning_rate,
-                    loss_fn=self.loss_fn,
-                    loss_fn_config=self.loss_fn_config,
-                    normalize_advantages=self.normalize_advantages,
-                    save_checkpoint=should_checkpoint,
-                    adam_params=self.adam_params,
+                    **train_kwargs,
                 )
             except Exception:
                 self._status.note_training_end()
diff --git a/src/art/test/test_step_skipping.py b/src/art/test/test_step_skipping.py
@@ -44,7 +44,7 @@ async def test_step_skipping():
         # Set up backend with custom art path
         art_path = os.path.join(tmpdir, ".art")
 
-        with LocalBackend(path=art_path) as backend:
+        async with LocalBackend(path=art_path) as backend:
             # Create a test model
             model = TrainableModel(
                 name=f"test-step-skip-{uuid.uuid4()}",
diff --git a/src/art/unsloth/service.py b/src/art/unsloth/service.py
@@ -35,7 +35,7 @@
 from ..utils.get_model_step import get_step_from_dir
 from ..utils.output_dirs import get_step_checkpoint_dir
 from ..vllm import get_llm, get_worker, openai_server_task, run_on_workers
-from .train import gc_and_empty_cuda_cache, train
+from .train import StopTrainingLoop, gc_and_empty_cuda_cache, train
 
 logger = logging.getLogger(__name__)
 
@@ -55,6 +55,15 @@ class SupportsLoadLora(Protocol):
     def load_lora(self, lora_path: str, load_tensors: bool = True) -> LoRARequest: ...
 
 
+class _StopTrainInputs:
+    """Dedicated sentinel for stopping the background trainer loop."""
+
+
+_STOP_TRAIN_INPUT = _StopTrainInputs()
+_TRAIN_TASK_GRACEFUL_SHUTDOWN_TIMEOUT_S = 5.0
+_TRAIN_TASK_CANCEL_TIMEOUT_S = 1.0
+
+
 def precalculate_new_logprobs(
     trainer: "GRPOTrainer",
     peft_model: "PeftModelForCausalLM",
@@ -91,7 +100,7 @@ async def process_train_batch(
     packed_tensors: PackedTensors,
     config: types.TrainConfig,
     _config: dev.TrainConfig,
-    inputs_queue: asyncio.Queue[TrainInputs],
+    inputs_queue: asyncio.Queue[TrainInputs | _StopTrainInputs],
     results_queue: asyncio.Queue[dict[str, float]],
     train_task: asyncio.Task[None],
     trainer: "GRPOTrainer",
@@ -215,7 +224,7 @@ class UnslothState:
     tokenizer: PreTrainedTokenizerBase
     peft_model: peft.peft_model.PeftModelForCausalLM
     trainer: GRPOTrainer
-    inputs_queue: asyncio.Queue[TrainInputs]
+    inputs_queue: asyncio.Queue[TrainInputs | _StopTrainInputs]
     results_queue: asyncio.Queue[dict[str, float]]
     _is_offloaded: bool = False
     _pinned_buffers: dict[str, torch.Tensor] | None = None
@@ -316,6 +325,7 @@ class UnslothService:
     _vllm_log_file: Any = field(default=None, repr=False)
     _vllm_host: str = "127.0.0.1"
     _vllm_port: int = 0
+    _train_task: asyncio.Task[None] | None = field(default=None, init=False, repr=False)
 
     @property
     def is_dedicated(self) -> bool:
@@ -326,6 +336,46 @@ def _next_lora_id(self) -> int:
         self._lora_id_counter += 1
         return self._lora_id_counter
 
+    def _request_train_task_stop(self) -> asyncio.Task[None] | None:
+        train_task = self._train_task
+        if train_task is None:
+            return None
+        if train_task.done():
+            return train_task
+
+        # `_state` is a cached_property. Read from __dict__ directly so shutdown
+        # does not instantiate the full trainer state solely to stop a task.
+        state = self.__dict__.get("_state")
+        if isinstance(state, UnslothState):
+            state.inputs_queue.put_nowait(_STOP_TRAIN_INPUT)
+        return train_task
+
+    async def _shutdown_train_task(self) -> None:
+        train_task = self._request_train_task_stop()
+        if train_task is None:
+            return
+
+        try:
+            # Give the trainer loop time to consume the stop sentinel and exit
+            # normally before falling back to cancellation.
+            await asyncio.wait_for(
+                train_task, timeout=_TRAIN_TASK_GRACEFUL_SHUTDOWN_TIMEOUT_S
+            )
+        except asyncio.TimeoutError:
+            train_task.cancel()
+            try:
+                await asyncio.wait_for(train_task, timeout=_TRAIN_TASK_CANCEL_TIMEOUT_S)
+            except (asyncio.CancelledError, asyncio.TimeoutError):
+                pass
+        except asyncio.CancelledError:
+            pass
+        finally:
+            self._train_task = None
+
+    async def aclose(self) -> None:
+        await self._shutdown_train_task()
+        self.close()
+
     # =========================================================================
     # Dedicated mode: vLLM subprocess lifecycle
     # =========================================================================
@@ -450,6 +500,7 @@ async def _reload_adapter(self, checkpoint_path: str, step: int) -> None:
 
     def close(self) -> None:
         """Terminate vLLM subprocess if running."""
+        self._request_train_task_stop()
         if self._vllm_process is None:
             return
         self._vllm_process.terminate()
@@ -981,17 +1032,19 @@ def _state(self) -> UnslothState:
             trainer.create_optimizer()
 
         # Initialize queues
-        inputs_queue: asyncio.Queue[TrainInputs] = asyncio.Queue()
+        inputs_queue: asyncio.Queue[TrainInputs | _StopTrainInputs] = asyncio.Queue()
         results_queue: asyncio.Queue[dict[str, float]] = asyncio.Queue()
 
         # Patch trainer _prepare_inputs() to pull from queue
         def _async_prepare_inputs(*_: Any, **__: Any) -> dict[str, torch.Tensor]:
-            async def get_inputs() -> TrainInputs:
+            async def get_inputs() -> TrainInputs | _StopTrainInputs:
                 return await inputs_queue.get()
 
             # Force otherwise synchronous _prepare_inputs() to yield
             # with nested asyncio.run() call
             inputs = asyncio.run(get_inputs())
+            if isinstance(inputs, _StopTrainInputs):
+                raise StopTrainingLoop()
 
             return cast(dict[str, torch.Tensor], inputs)
 
diff --git a/src/art/unsloth/train.py b/src/art/unsloth/train.py
@@ -39,6 +39,10 @@
 }
 
 
+class StopTrainingLoop(Exception):
+    """Signal that the background trainer loop should exit cleanly."""
+
+
 def _canonicalize_upstream_metric_key(metric: str) -> str:
     if "/" in metric:
         return metric
@@ -74,7 +78,10 @@ async def train(
     if not is_train_dict:
         trainer._metrics = {"train": defaultdict(list)}
     try:
-        trainer.train()
+        try:
+            trainer.train()
+        except StopTrainingLoop:
+            return
     finally:
         trainer.compute_loss = _compute_loss
         trainer.log = _log  # ty:ignore[invalid-assignment]
diff --git a/src/art/utils/benchmarking/pull_model_trajectories.py b/src/art/utils/benchmarking/pull_model_trajectories.py
@@ -31,8 +31,9 @@ async def pull_model_trajectories(model: ArtModel) -> None:
             "Environment variable BACKUP_BUCKET is required but was not found."
         )
 
-    # Use the LocalBackend context manager to work with the on-disk artefacts.
-    with LocalBackend() as backend:
+    # Use the LocalBackend async context manager so backend cleanup can await
+    # any background service shutdown before returning.
+    async with LocalBackend() as backend:
         print(
             f"Pulling trajectories for model '{model.name}' from S3 bucket '{bucket}'…",
             flush=True,
diff --git a/tests/unit/test_pipeline_trainer_local_backend.py b/tests/unit/test_pipeline_trainer_local_backend.py