diff --git a/.gitignore b/.gitignore
index 42c6d12..a258f65 100644
--- a/.gitignore
+++ b/.gitignore
@@ -164,4 +164,7 @@ Thumbs.db
 env
 env/*
 
-__pycache__/
\ No newline at end of file
+__pycache__/
+
+devlog/
+worktrees/
\ No newline at end of file
diff --git a/deployment/on-device/android/ai-agent-usage-guide.mdx b/deployment/on-device/android/ai-agent-usage-guide.mdx
index fa3a9ac..07ebffc 100644
--- a/deployment/on-device/android/ai-agent-usage-guide.mdx
+++ b/deployment/on-device/android/ai-agent-usage-guide.mdx
@@ -24,7 +24,7 @@ MessageResponse (streaming)
 ```toml
 # gradle/libs.versions.toml
 [versions]
-leapSdk = "0.9.7"
+leapSdk = "0.10.0"
 
 [libraries]
 leap-sdk = { module = "ai.liquid.leap:leap-sdk", version.ref = "leapSdk" }
@@ -44,8 +44,8 @@ dependencies {
 ```kotlin
 // app/build.gradle.kts
 dependencies {
-    implementation("ai.liquid.leap:leap-sdk:0.9.7")
-    implementation("ai.liquid.leap:leap-model-downloader:0.9.7")
+    implementation("ai.liquid.leap:leap-sdk:0.10.0")
+    implementation("ai.liquid.leap:leap-model-downloader:0.10.0")
 }
 ```
 
diff --git a/deployment/on-device/android/android-quick-start-guide.mdx b/deployment/on-device/android/android-quick-start-guide.mdx
index 558e80a..32403a4 100644
--- a/deployment/on-device/android/android-quick-start-guide.mdx
+++ b/deployment/on-device/android/android-quick-start-guide.mdx
@@ -3,7 +3,7 @@ title: "Quick Start Guide"
 description: "Get up and running with the LEAP Android SDK in minutes. Install the SDK, load models, and start generating content."
 ---
 
-Latest version: `v0.9.7`
+Latest version: `v0.10.0`
 
 <Info>
 The LEAP SDK is now a **Kotlin Multiplatform** library supporting Android, iOS, macOS, and JVM. While Android is well-tested and production-ready, other platforms are currently in testing.
@@ -19,7 +19,7 @@ You should already have:
   plugins {
       id("com.android.application") version "8.13.2" apply false
       id("com.android.library") version "8.13.2" apply false
-      id("org.jetbrains.kotlin.android") version "2.3.10" apply false
+      id("org.jetbrains.kotlin.android") version "2.3.20" apply false
   }
   ```
 * A working Android device that supports `arm64-v8a` ABI with [developer mode enabled](https://developer.android.com/studio/debug/dev-options). We recommend having 3GB+ of RAM to run the models.
@@ -61,8 +61,16 @@ Add the following dependencies into `$PROJECT_ROOT/app/build.gradle.kts`:
 
 ```kotlin
 dependencies {
-  implementation("ai.liquid.leap:leap-sdk:0.9.7")
-  implementation("ai.liquid.leap:leap-model-downloader:0.9.7") // Android-specific model downloader
+  implementation("ai.liquid.leap:leap-sdk:0.10.0")
+  implementation("ai.liquid.leap:leap-model-downloader:0.10.0") // Android-specific model downloader
+
+  // Optional: OpenAI-compatible cloud chat client
+  // See /deployment/on-device/android/openai-client
+  // implementation("ai.liquid.leap:leap-openai-client:0.10.0")
+
+  // Optional: Voice assistant widget (Compose Multiplatform)
+  // See /deployment/on-device/android/voice-assistant
+  // implementation("ai.liquid.leap:leap-ui:0.10.0")
 }
 ```
 
@@ -72,11 +80,14 @@ In `gradle/libs.versions.toml`:
 
 ```toml
 [versions]
-leapSdk = "0.9.7"
+leapSdk = "0.10.0"
 
 [libraries]
 leap-sdk = { module = "ai.liquid.leap:leap-sdk", version.ref = "leapSdk" }
 leap-model-downloader = { module = "ai.liquid.leap:leap-model-downloader", version.ref = "leapSdk" }
+# Optional modules:
+leap-openai-client = { module = "ai.liquid.leap:leap-openai-client", version.ref = "leapSdk" }
+leap-ui = { module = "ai.liquid.leap:leap-ui", version.ref = "leapSdk" }
 ```
 
 Then in `app/build.gradle.kts`:
@@ -85,6 +96,8 @@ Then in `app/build.gradle.kts`:
 dependencies {
   implementation(libs.leap.sdk)
   implementation(libs.leap.model.downloader)
+  // implementation(libs.leap.openai.client)  // see openai-client.mdx
+  // implementation(libs.leap.ui)             // see voice-assistant.mdx
 }
 ```
 
diff --git a/deployment/on-device/android/model-loading.mdx b/deployment/on-device/android/model-loading.mdx
index db61957..9739930 100644
--- a/deployment/on-device/android/model-loading.mdx
+++ b/deployment/on-device/android/model-loading.mdx
@@ -149,6 +149,94 @@ Download a model from the LEAP Model Library and save it to the local cache, wit
 
 `Manifest`: The [`Manifest`](#manifest) instance that contains the metadata of the downloaded model.
 
+<br/>
+## `loadSimpleModel` (sideloaded models)
+
+`LeapDownloader.loadSimpleModel` and `LeapModelDownloader.loadSimpleModel` (added in v0.10.0) load a model from explicit resource paths or URLs without going through the LEAP Model Library manifest. Use this when:
+
+- You ship the model as an app asset or download it via your own pipeline.
+- You `adb push` a model into `/data/local/tmp/leap/` for development.
+- You stage a multimodal model + companion files (`mmproj`, audio decoder, audio tokenizer) into a known directory.
+
+```kotlin
+suspend fun loadSimpleModel(
+    model: ModelSource,
+    modelLoadingOptions: ModelLoadingOptions? = null,
+    generationTimeParameters: GenerationTimeParameters? = null,
+    progress: (ProgressData) -> Unit = {},
+): ModelRunner
+```
+
+`ModelSource` carries the four resource locations:
+
+```kotlin
+data class ModelSource(
+    val modelName: String,
+    val quantizationId: String,
+    val modelPath: String,
+    val mmprojPath: String? = null,
+    val audioDecoderPath: String? = null,
+    val audioTokenizerPath: String? = null,
+)
+```
+
+Each path can be:
+
+- An **absolute filesystem path** (e.g. `/data/local/tmp/leap/lfm2.gguf`).
+- A `file://` URL — both `file:///path` and `file://localhost/path` resolve identically (RFC 8089 §3). Other authorities are rejected.
+- An `http(s)://` URL — fetched and cached on first use.
+
+When a resource resolves to a local path that already exists on disk, the SDK skips the cache lookup and download entirely and uses the file verbatim. This is the recommended sideload entry point.
+
+### Sideload an Android-bundled model
+
+```kotlin
+import ai.liquid.leap.manifest.ModelSource
+import ai.liquid.leap.model_downloader.LeapModelDownloader
+
+val modelDownloader = LeapModelDownloader(context)
+
+lifecycleScope.launch {
+    val modelRunner = modelDownloader.loadSimpleModel(
+        model = ModelSource(
+            modelName = "LFM2-1.2B",
+            quantizationId = "Q5_K_M",
+            modelPath = "/data/local/tmp/leap/lfm2-1.2b-q5_k_m.gguf",
+        ),
+    )
+    // Use modelRunner...
+}
+```
+
+### Sideload a multimodal model with companion files
+
+```kotlin
+val modelRunner = modelDownloader.loadSimpleModel(
+    model = ModelSource(
+        modelName = "LFM2-VL-450M",
+        quantizationId = "Q4_K_M",
+        modelPath = "file:///data/local/tmp/leap/lfm2-vl.gguf",
+        mmprojPath = "file:///data/local/tmp/leap/lfm2-vl-mmproj.gguf",
+    ),
+)
+```
+
+The same API exists on `LeapDownloader` for cross-platform code:
+
+```kotlin
+import ai.liquid.leap.LeapDownloader
+import ai.liquid.leap.LeapDownloaderConfig
+
+val downloader = LeapDownloader(LeapDownloaderConfig(saveDir = baseDir))
+val modelRunner = downloader.loadSimpleModel(
+    model = ModelSource(
+        modelName = "LFM2-1.2B",
+        quantizationId = "Q5_K_M",
+        modelPath = localPath,
+    ),
+)
+```
+
 <br/>
 ## `LeapDownloaderConfig`
 The `LeapDownloaderConfig` class contains all the configuration options for `LeapDownloader`. It is a data class with the following fields:
diff --git a/deployment/on-device/android/openai-client.mdx b/deployment/on-device/android/openai-client.mdx
new file mode 100644
index 0000000..a6c2136
--- /dev/null
+++ b/deployment/on-device/android/openai-client.mdx
@@ -0,0 +1,229 @@
+---
+title: "OpenAI-Compatible Client"
+description: "Lightweight client for OpenAI-compatible chat completions APIs, ideal for hybrid on-device + cloud routing"
+---
+
+`leap-openai-client` (introduced in v0.10.0) is a small, dependency-light client for any OpenAI-compatible chat completions endpoint — OpenAI itself, OpenRouter, vLLM, llama-server, or your own proxy. It ships as a separate Maven artifact alongside `leap-sdk`, so you can route requests between an on-device LFM and a cloud model from the same app.
+
+## When to use it
+
+- **Hybrid on-device + cloud routing.** Run small / fast models on-device with `leap-sdk`, fall back to a larger cloud model for hard prompts.
+- **Standardised cloud API.** Talk to any OpenAI-compatible backend without pulling in a heavier OpenAI SDK.
+- **Streaming first.** SSE streaming is the only mode — non-streaming requests aren't exposed (`stream = true` is the default and not normally changed).
+
+## Add the dependency
+
+```kotlin
+dependencies {
+  implementation("ai.liquid.leap:leap-sdk:0.10.0")           // for the on-device side
+  implementation("ai.liquid.leap:leap-openai-client:0.10.0") // for the cloud side
+}
+```
+
+The Android module bundles a Ktor `HttpClient` on the OkHttp engine — no extra HTTP setup needed.
+
+## Basic usage
+
+```kotlin
+import ai.liquid.leap.openai.ChatCompletionEvent
+import ai.liquid.leap.openai.ChatCompletionRequest
+import ai.liquid.leap.openai.ChatMessage
+import ai.liquid.leap.openai.OpenAiClient
+import ai.liquid.leap.openai.OpenAiClientConfig
+import kotlinx.coroutines.flow.collect
+
+val client = OpenAiClient(
+    config = OpenAiClientConfig(
+        apiKey = "sk-…",
+        baseUrl = "https://api.openai.com/v1",
+    )
+)
+
+val request = ChatCompletionRequest(
+    model = "gpt-4o-mini",
+    messages = listOf(
+        ChatMessage.System("You are a helpful assistant."),
+        ChatMessage.User("What is the capital of Japan?"),
+    ),
+    temperature = 0.7,
+)
+
+client.streamChatCompletion(request).collect { event ->
+    when (event) {
+        is ChatCompletionEvent.Delta -> print(event.content)
+        is ChatCompletionEvent.Done  -> event.usage?.let { println("\nTokens: ${it.totalTokens}") }
+        is ChatCompletionEvent.Error -> println("\nError: ${event.message}")
+    }
+}
+
+client.close()  // closes the underlying HttpClient
+```
+
+## Configuration
+
+```kotlin
+data class OpenAiClientConfig(
+    val apiKey: String,
+    val baseUrl: String = "https://api.openai.com/v1",
+    val chatCompletionsPath: String = "/chat/completions",
+    val extraHeaders: Map<String, String> = emptyMap(),
+)
+```
+
+| Field | Default | Notes |
+|---|---|---|
+| `apiKey` | — | Sent as `Authorization: Bearer <apiKey>`. |
+| `baseUrl` | `https://api.openai.com/v1` | Override for OpenRouter, a self-hosted backend, etc. |
+| `chatCompletionsPath` | `/chat/completions` | Appended to `baseUrl`. |
+| `extraHeaders` | `emptyMap()` | Merged into every request — e.g. OpenRouter's `HTTP-Referer`. |
+
+### Talking to OpenRouter
+
+```kotlin
+val client = OpenAiClient(
+    OpenAiClientConfig(
+        apiKey = "sk-or-…",
+        baseUrl = "https://openrouter.ai/api/v1",
+        extraHeaders = mapOf(
+            "HTTP-Referer" to "https://yourapp.example.com",
+            "X-Title" to "Your App",
+        ),
+    )
+)
+```
+
+### Talking to a self-hosted vLLM / llama-server
+
+```kotlin
+val client = OpenAiClient(
+    OpenAiClientConfig(
+        apiKey = "anything",  // Required by config but typically unused
+        baseUrl = "http://10.0.0.42:8000/v1",
+    )
+)
+```
+
+## Request shape
+
+`ChatCompletionRequest` covers standard OpenAI fields plus a few OpenRouter-specific extensions. OpenRouter-only fields (`topK`, `minP`, `topA`, `repetitionPenalty`, `transforms`, `models`, `route`, `provider`) are silently ignored by stock OpenAI-compatible APIs, so you can leave them in cross-backend code.
+
+```kotlin
+data class ChatCompletionRequest(
+    val model: String,
+    val messages: List<ChatMessage>,
+    val temperature: Double? = null,
+    val topP: Double? = null,
+    val maxCompletionTokens: Int? = null,   // Preferred for newer OpenAI versions
+    val maxTokens: Int? = null,             // Legacy alias — some custom backends still require it
+    val frequencyPenalty: Double? = null,
+    val presencePenalty: Double? = null,
+    val stop: List<String>? = null,
+    val stream: Boolean = true,
+    // OpenRouter extensions
+    val topK: Int? = null,
+    val repetitionPenalty: Double? = null,
+    val minP: Double? = null,
+    val topA: Double? = null,
+    val transforms: List<String>? = null,
+    val models: List<String>? = null,
+    val route: String? = null,
+    val provider: ProviderPreferences? = null,
+)
+```
+
+`ChatMessage` is a sealed interface with three implementations:
+
+```kotlin
+ChatMessage.System("Be concise.")
+ChatMessage.User("Hello.")
+ChatMessage.Assistant("Hi there!")
+```
+
+## Response shape
+
+`streamChatCompletion(request)` returns a `Flow<ChatCompletionEvent>`:
+
+| Variant | Meaning |
+|---|---|
+| `Delta(content: String)` | Text chunk from the model. May be empty for role-only deltas. |
+| `Done(usage: Usage?)` | Stream finished. `usage` is non-`null` when the API includes token counts. |
+| `Error(message: String)` | HTTP error or stream parsing failure. |
+
+```kotlin
+data class Usage(val promptTokens: Int, val completionTokens: Int, val totalTokens: Int)
+```
+
+## Hybrid routing example
+
+A common pattern: route simple prompts to a small on-device LFM, escalate harder prompts to a cloud model.
+
+```kotlin
+import ai.liquid.leap.Conversation
+import ai.liquid.leap.MessageResponse
+import ai.liquid.leap.openai.ChatCompletionEvent
+import ai.liquid.leap.openai.ChatCompletionRequest
+import ai.liquid.leap.openai.ChatMessage as CloudChatMessage
+import ai.liquid.leap.openai.OpenAiClient
+import ai.liquid.leap.message.ChatMessage
+import ai.liquid.leap.message.ChatMessageContent
+import androidx.lifecycle.ViewModel
+import androidx.lifecycle.viewModelScope
+import kotlinx.coroutines.flow.collect
+import kotlinx.coroutines.launch
+
+class HybridChatViewModel(
+    private val onDevice: Conversation,
+    private val cloud: OpenAiClient,
+) : ViewModel() {
+
+    fun send(text: String, useCloud: Boolean) {
+        viewModelScope.launch {
+            if (useCloud) {
+                val request = ChatCompletionRequest(
+                    model = "gpt-4o-mini",
+                    messages = listOf(CloudChatMessage.User(text)),
+                )
+                cloud.streamChatCompletion(request).collect { event ->
+                    if (event is ChatCompletionEvent.Delta) appendChunk(event.content)
+                }
+            } else {
+                val message = ChatMessage(
+                    role = ChatMessage.Role.USER,
+                    content = listOf(ChatMessageContent.Text(text)),
+                )
+                onDevice.generateResponse(message).collect { response ->
+                    if (response is MessageResponse.Chunk) appendChunk(response.text)
+                }
+            }
+        }
+    }
+
+    private fun appendChunk(text: String) { /* … */ }
+
+    override fun onCleared() {
+        super.onCleared()
+        cloud.close()
+    }
+}
+```
+
+See [Cloud AI Comparison](./cloud-ai-comparison) for a side-by-side feature breakdown of on-device vs cloud chat APIs.
+
+## Lifecycle
+
+The `OpenAiClient(config)` factory creates an `HttpClient` internally and ties it to the returned client — call `close()` when you're done, typically in `ViewModel.onCleared()` or your DI scope's teardown:
+
+```kotlin
+override fun onCleared() {
+    super.onCleared()
+    client.close()
+}
+```
+
+If you need to share an `HttpClient` across multiple clients (e.g. you already manage one for other Ktor-based code), use the lower-level constructor that takes an `httpClient` you own:
+
+```kotlin
+val shared = HttpClient(OkHttp)  // your own instance
+val client = OpenAiClient(config = config, httpClient = shared)
+// Don't call client.close() — you own `shared` and decide when it dies
+```
diff --git a/deployment/on-device/android/utilities.mdx b/deployment/on-device/android/utilities.mdx
index 7ff1772..af99045 100644
--- a/deployment/on-device/android/utilities.mdx
+++ b/deployment/on-device/android/utilities.mdx
@@ -130,8 +130,8 @@ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) {
 ```kotlin
 // In build.gradle.kts
 dependencies {
-  implementation("ai.liquid.leap:leap-sdk:0.9.7")
-  implementation("ai.liquid.leap:leap-model-downloader:0.9.7")
+  implementation("ai.liquid.leap:leap-sdk:0.10.0")
+  implementation("ai.liquid.leap:leap-model-downloader:0.10.0")
 }
 ```
 
diff --git a/deployment/on-device/android/voice-assistant.mdx b/deployment/on-device/android/voice-assistant.mdx
new file mode 100644
index 0000000..4691d7c
--- /dev/null
+++ b/deployment/on-device/android/voice-assistant.mdx
@@ -0,0 +1,233 @@
+---
+title: "Voice Assistant Widget"
+description: "Drop-in voice UI for Android and JVM, powered by leap-ui's Compose Multiplatform widget"
+---
+
+The `leap-ui` module (introduced in v0.10.0) ships a ready-to-use voice assistant widget — an animated orb, mic button, and status label — backed by a state machine that handles recording, generation, and audio playback. You wire it to a model and it handles the rest.
+
+`leap-ui` is a Compose Multiplatform module, so the same widget runs on:
+
+- **Android** (this page) — Compose for Android. Maven artifact `ai.liquid.leap:leap-ui:0.10.0`.
+- **JVM / desktop** — Compose for Desktop. Same Maven artifact; the audio I/O implementations from `leap-ui-demo/android` need a desktop equivalent (the demo ships `AudioPipeline.kt` patterns you can adapt).
+- **iOS / macOS** — see the [iOS voice-assistant page](/deployment/on-device/ios/voice-assistant) for the SwiftUI / UIKit / AppKit hosting story and the `VoiceAssistantStore.makeForApple()` factory.
+- **Web (Wasm, experimental)** — present in the source tree (`leap-ui-demo/web`) but not part of the v0.10.0 stable release notes; treat as preview.
+
+## Add the dependency
+
+```kotlin
+dependencies {
+  implementation("ai.liquid.leap:leap-sdk:0.10.0")
+  implementation("ai.liquid.leap:leap-ui:0.10.0")
+}
+```
+
+`leap-ui` brings in Compose runtime, foundation, and material3 transitively. If your project doesn't already use Compose, add the standard Compose dependencies in addition.
+
+## Architecture
+
+```
+VoiceAssistantWidget (Compose UI)
+        ↓ intents
+VoiceAssistantStore  (state machine: IDLE → LISTENING → RESPONDING → IDLE)
+        ↓ uses
+VoiceAudioRecorder + VoiceAudioPlayer + VoiceConversation
+```
+
+- **`VoiceAssistantStore`** owns the session lifecycle. Hold it in your `ViewModel`; close it from `onCleared()`.
+- **`VoiceConversation`** is a thin interface you implement to bridge the store to a model. Wrap the SDK's `Conversation.generateResponse` flow and forward `AudioSample` chunks to `onAudioChunk`.
+- **Audio I/O** is provided through `VoiceAudioRecorder` and `VoiceAudioPlayer` interfaces — the demo apps ship `AndroidAudioRecorder` / `AndroidAudioPlayer` implementations you can drop in or replace.
+
+## Wire it in a ViewModel
+
+```kotlin
+import ai.liquid.leap.manifest.LeapDownloader
+import ai.liquid.leap.manifest.LeapDownloaderConfig
+import ai.liquid.leap.ui.VoiceAssistantIntent
+import ai.liquid.leap.ui.VoiceAssistantStore
+import ai.liquid.leap.ui.VoiceAssistantStoreState
+import ai.liquid.leap.ui.VoiceAudioPlayer
+import ai.liquid.leap.ui.VoiceAudioRecorder
+import android.app.Application
+import androidx.lifecycle.AndroidViewModel
+import androidx.lifecycle.viewModelScope
+import java.io.File
+import kotlinx.coroutines.flow.StateFlow
+import kotlinx.coroutines.launch
+
+class VoiceAssistantViewModel(application: Application) : AndroidViewModel(application) {
+  private val recorder: VoiceAudioRecorder = AndroidAudioRecorder()
+  private val player: VoiceAudioPlayer = AndroidAudioPlayer()
+
+  // viewModelScope is on Dispatchers.Main.immediate by default — required by the store
+  val store = VoiceAssistantStore(recorder = recorder, player = player, scope = viewModelScope)
+
+  val state: StateFlow<VoiceAssistantStoreState> = store.state
+
+  private val modelDir = File(application.filesDir, "leap_models").apply { mkdirs() }
+
+  init {
+    viewModelScope.launch { loadModel() }
+  }
+
+  fun processIntent(intent: VoiceAssistantIntent) = store.processIntent(intent)
+
+  private suspend fun loadModel() {
+    runCatching {
+      val downloader = LeapDownloader(LeapDownloaderConfig(saveDir = modelDir.absolutePath))
+      store.setModelProgress(0f, "Resolving manifest…")
+      val runner = downloader.loadModel(
+        modelName = "LFM2.5-Audio-1.5B",
+        quantizationSlug = "Q4_0",
+        progress = { pd ->
+          val pct = if (pd.total > 0) " (${(pd.bytes * 100 / pd.total).toInt()}%)" else ""
+          store.setModelProgress(
+            fraction = if (pd.total > 0) pd.bytes.toFloat() / pd.total else 0f,
+            message = "Downloading$pct",
+          )
+        },
+      )
+      store.setConversation(
+        LeapVoiceConversation(
+          conv = runner.createConversation(systemPrompt = "Respond with interleaved text and audio."),
+        )
+      )
+    }.onFailure { e -> store.setModelError("✗ ${e.message}") }
+  }
+
+  override fun onCleared() {
+    super.onCleared()
+    store.close()
+  }
+}
+```
+
+## Host the widget
+
+```kotlin
+import ai.liquid.leap.ui.StatusType
+import ai.liquid.leap.ui.VoiceAssistantWidget
+import androidx.activity.ComponentActivity
+import androidx.activity.compose.setContent
+import androidx.compose.foundation.background
+import androidx.compose.foundation.layout.fillMaxSize
+import androidx.compose.material3.MaterialTheme
+import androidx.compose.material3.darkColorScheme
+import androidx.compose.runtime.collectAsState
+import androidx.compose.runtime.getValue
+import androidx.compose.ui.Modifier
+import androidx.compose.ui.graphics.Color
+import androidx.lifecycle.viewmodel.compose.viewModel
+
+class MainActivity : ComponentActivity() {
+  override fun onCreate(savedInstanceState: Bundle?) {
+    super.onCreate(savedInstanceState)
+    setContent {
+      MaterialTheme(colorScheme = darkColorScheme(background = Color.Black)) {
+        val vm = viewModel<VoiceAssistantViewModel>()
+        val state by vm.state.collectAsState()
+
+        VoiceAssistantWidget(
+          state = state.widgetState,
+          onIntent = vm::processIntent,
+          modifier = Modifier.fillMaxSize().background(Color.Black),
+        )
+      }
+    }
+  }
+}
+```
+
+## Implement `VoiceConversation`
+
+The store delegates generation to your `VoiceConversation`. A minimal adapter that wraps a normal `Conversation` looks like this:
+
+```kotlin
+import ai.liquid.leap.Conversation
+import ai.liquid.leap.MessageResponse
+import ai.liquid.leap.message.ChatMessage
+import ai.liquid.leap.message.ChatMessageContent
+import ai.liquid.leap.message.GenerationStats
+import ai.liquid.leap.message.encodePcm16Wav
+import ai.liquid.leap.ui.VoiceConversation
+import kotlinx.coroutines.flow.collect
+
+class LeapVoiceConversation(private val conv: Conversation) : VoiceConversation {
+
+  override suspend fun generateResponse(
+    audioSamples: FloatArray,
+    sampleRate: Int,
+    onAudioChunk: (samples: FloatArray, sampleRate: Int) -> Unit,
+  ): GenerationStats? {
+    val wavBytes = encodePcm16Wav(audioSamples, sampleRate)
+    val userMessage = ChatMessage(
+      role = ChatMessage.Role.USER,
+      content = listOf(ChatMessageContent.Audio(wavBytes)),
+    )
+
+    var stats: GenerationStats? = null
+    conv.generateResponse(userMessage).collect { response ->
+      when (response) {
+        is MessageResponse.AudioSample -> onAudioChunk(response.samples, response.sampleRate)
+        is MessageResponse.Complete -> stats = response.stats
+        else -> Unit
+      }
+    }
+    return stats
+  }
+
+  override fun reset(): VoiceConversation =
+    LeapVoiceConversation(conv.modelRunner.createConversation())
+}
+```
+
+## Audio I/O implementations
+
+`AndroidAudioRecorder` and `AndroidAudioPlayer` aren't part of the `leap-ui` module — they're reference implementations shipped with the demo app at `leap-ui-demo/android/src/main/kotlin/ai/liquid/leap/uidemo/AudioPipeline.kt`. Copy the file into your project, or implement `VoiceAudioRecorder` and `VoiceAudioPlayer` against your audio stack of choice.
+
+The contracts are short:
+
+```kotlin
+interface VoiceAudioRecorder {
+  val amplitude: Float          // 0..1 RMS, used to drive orb animation
+  val nativeSampleRate: Int     // Available after start()
+  fun start(): Boolean
+  suspend fun stop(): FloatArray
+  suspend fun cancel()
+}
+
+interface VoiceAudioPlayer {
+  val amplitude: Float
+  fun enqueue(samples: FloatArray, sampleRate: Int)
+  suspend fun waitForPlayback()
+  fun stop()
+}
+```
+
+## Required permissions
+
+```xml
+<uses-permission android:name="android.permission.RECORD_AUDIO" />
+<uses-permission android:name="android.permission.INTERNET" />
+```
+
+Request `RECORD_AUDIO` at runtime — the standard `ActivityResultContracts.RequestPermission()` pattern is shown in the [Quick Start Guide](./android-quick-start-guide).
+
+## `interruptToSpeak`
+
+`VoiceAssistantStore` (added in v0.10.0) exposes an `interruptToSpeak: Boolean = true` constructor parameter:
+
+- `true` (default) — pressing during a response cancels the in-flight generation **and** immediately starts a new recording.
+- `false` — only cancels. The user must press again to start a new recording.
+
+```kotlin
+val store = VoiceAssistantStore(
+  recorder = recorder,
+  player = player,
+  scope = viewModelScope,
+  interruptToSpeak = false,
+)
+```
+
+## Compatible models
+
+Voice mode requires a model that emits audio output. The demo uses `LFM2.5-Audio-1.5B` at the `Q4_0` quantization, with a system prompt of *"Respond with interleaved text and audio."* See the [LEAP Model Library](https://leap.liquid.ai/models) for other audio-capable models.
diff --git a/deployment/on-device/ios/ai-agent-usage-guide.mdx b/deployment/on-device/ios/ai-agent-usage-guide.mdx
index 442d17f..7ec7c12 100644
--- a/deployment/on-device/ios/ai-agent-usage-guide.mdx
+++ b/deployment/on-device/ios/ai-agent-usage-guide.mdx
@@ -17,34 +17,34 @@ MessageResponse (streaming)
 
 ## Installation
 
-### Swift Package Manager (Recommended)
+### Swift Package Manager
 
 ```swift
+// swift-tools-version: 6.0
 // In Xcode: File → Add Package Dependencies
-// Repository: https://github.com/Liquid4All/leap-ios.git
-// Version: 0.9.2
+// Repository: https://github.com/Liquid4All/leap-sdk.git
+// Version: 0.10.0
+// Min platforms: iOS 17, macOS 15
 
 dependencies: [
-    .package(url: "https://github.com/Liquid4All/leap-ios.git", from: "0.9.2")
+    .package(url: "https://github.com/Liquid4All/leap-sdk.git", from: "0.10.0")
 ]
 
 targets: [
     .target(
         name: "YourApp",
         dependencies: [
-            .product(name: "LeapSDK", package: "leap-ios"),
-            .product(name: "LeapModelDownloader", package: "leap-ios") // Optional
+            .product(name: "LeapSDK", package: "leap-sdk"),
+            .product(name: "LeapModelDownloader", package: "leap-sdk"),  // Optional
+            .product(name: "LeapOpenAIClient", package: "leap-sdk"),     // Optional — see openai-client.mdx
+            .product(name: "LeapUI", package: "leap-sdk"),               // Optional — see voice-assistant.mdx
+            .product(name: "LeapSDKMacros", package: "leap-sdk")         // Optional — for @Generatable / @Guide
         ]
     )
 ]
 ```
 
-### CocoaPods
-
-```ruby
-pod 'Leap-SDK', '~> 0.9.2'
-pod 'Leap-Model-Downloader', '~> 0.9.2' # Optional
-```
+CocoaPods is no longer supported as of v0.10.0.
 
 ## Loading Models
 
@@ -75,7 +75,7 @@ Separate download from loading for better control:
 ```swift
 import LeapModelDownloader
 
-let downloader = ModelDownloader()
+let downloader = LeapModelDownloader()
 
 // Download model to cache
 let manifest = try await downloader.downloadModel(
@@ -637,7 +637,7 @@ Query download status and manage cached models:
 ```swift
 import LeapModelDownloader
 
-let downloader = ModelDownloader()
+let downloader = LeapModelDownloader()
 
 // Check download status
 let status = downloader.queryStatus("LFM2.5-1.2B-Instruct", quantization: "Q4_K_M")
@@ -836,7 +836,7 @@ import AppKit  // macOS
 
 ```swift
 // Check available disk space
-let downloader = ModelDownloader()
+let downloader = LeapModelDownloader()
 if let freeSpace = downloader.getAvailableDiskSpace() {
     print("Free space: \(freeSpace / 1_000_000_000) GB")
 }
diff --git a/deployment/on-device/ios/ios-quick-start-guide.mdx b/deployment/on-device/ios/ios-quick-start-guide.mdx
index a2d055b..9b5a738 100644
--- a/deployment/on-device/ios/ios-quick-start-guide.mdx
+++ b/deployment/on-device/ios/ios-quick-start-guide.mdx
@@ -3,59 +3,93 @@ title: "Quick Start Guide"
 description: "Get up and running with the LEAP iOS SDK in minutes. Install the SDK, load models, and start generating content."
 ---
 
-Latest version: `v0.9.2`
+Latest version: `v0.10.0`
+
+<Info>
+**Migrating from 0.9.x?** v0.10.0 unifies the SDK into a single Kotlin Multiplatform distribution published from [`Liquid4All/leap-sdk`](https://github.com/Liquid4All/leap-sdk). The standalone `Liquid4All/leap-ios` repo is no longer the source-of-truth — point your Swift Package Manager dependency at the new URL. Existing call sites (`Leap.load(...)`, `Conversation.generateResponse(...)`, etc.) keep compiling thanks to a Swift compatibility layer.
+</Info>
 
 ## Prerequisites[​](#prerequisites "Direct link to Prerequisites")
 
 Make sure you have:
 
-* Xcode 15.0 or later with Swift 5.9.
-* An iOS project targeting **iOS 15.0+** (macOS 12.0+ or Mac Catalyst 15.0+ are also supported).
+* Xcode 16.0 or later with Swift 6.0.
+* An iOS project targeting **iOS 17.0+** (macOS 15.0+ or Mac Catalyst 17.0+ are also supported).
 * A physical iPhone or iPad with at least 3 GB RAM for best performance. The simulator works for development but runs models much slower.
 
 ```
-iOS Deployment Target: 15.0
-macOS Deployment Target: 12.0
+iOS Deployment Target: 17.0
+macOS Deployment Target: 15.0
 ```
 
+<Warning>
+v0.10.0 raises the minimum iOS deployment target from 15.0 to **17.0** and macOS from 12.0 to **15.0**. Apps targeting older OSes need to either pin to `0.9.x` or bump their deployment target before upgrading.
+</Warning>
+
 <Warning>
 Always test on a real device before shipping. Simulator performance is not representative of production behaviour.
 </Warning>
 
 ## Install the SDK[​](#install-the-sdk "Direct link to Install the SDK")
 
-Choose your preferred installation method:
+The Leap SDK ships exclusively through Swift Package Manager in v0.10.0. CocoaPods support has been removed.
 
 <Tabs>
-  <Tab title="Swift Package Manager">
-    **Recommended**
-
-    1. In Xcode choose **File -> Add Package Dependencies**.
-    2. Enter `https://github.com/Liquid4All/leap-ios.git`.
-    3. Select the `0.9.2` release (or newer).
-    4. Add the **`LeapSDK`** product to your app target.
-    5. (Optional) Add **`LeapModelDownloader`** if you plan to download model bundles at runtime.
+  <Tab title="Swift Package Manager (Recommended)">
+    1. In Xcode choose **File → Add Package Dependencies**.
+    2. Enter `https://github.com/Liquid4All/leap-sdk.git`.
+    3. Select the `0.10.0` release (or newer).
+    4. Add the products you need to your app target.
+
+    The package vends five products. Most apps only need one or two:
+
+    | Product | What it provides | Transitively pulls in |
+    |---|---|---|
+    | `LeapSDK` | Core inference + conversation API | — |
+    | `LeapModelDownloader` | Hosted/manifest-based model fetch | `LeapSDK` |
+    | `LeapOpenAIClient` | OpenAI-compatible cloud chat client | — |
+    | `LeapUI` | Voice assistant widget (SwiftUI/Compose) | `LeapSDK` |
+    | `LeapSDKMacros` | `@Generatable` / `@Guide` macros | swift-syntax |
+
+    Because `LeapModelDownloader` and `LeapUI` already depend on `LeapSDK`, a typical app only adds `LeapModelDownloader` (or `LeapUI`) plus `LeapSDKMacros` if it uses constrained generation.
   </Tab>
-  <Tab title="CocoaPods">
-    1. Add the pod to your `Podfile`:
+  <Tab title="Binary XCFrameworks">
+    For explicit pinning, declare each framework as a `.binaryTarget` in your `Package.swift`. The XCFramework assets live on the `Liquid4All/leap-sdk` v0.10.0 release page.
+
+    <Warning>
+    The constrained-generation macros (`@Generatable`, `@Guide`) are Swift macros, not XCFrameworks — they ship as the `LeapSDKMacros` source target inside the SPM package and **cannot be installed as a `.binaryTarget`**. If you need them, use the **Swift Package Manager** tab above instead (or add the `LeapSDKMacros` source target separately on top of your binary targets).
+    </Warning>
+
 
-    ```ruby
-    pod 'Leap-SDK', '~> 0.9.2'
-    # Optional: pod 'Leap-Model-Downloader', '~> 0.9.2'
+    ```swift
+    .binaryTarget(
+      name: "LeapSDK",
+      url: "https://github.com/Liquid4All/leap-sdk/releases/download/v0.10.0/LeapSDK.xcframework.zip",
+      checksum: "8337c5056ed5285f6b6bee198b6c81757c608243ac2d6be5fa1084b6407016ae"
+    ),
+    .binaryTarget(
+      name: "LeapModelDownloader",
+      url: "https://github.com/Liquid4All/leap-sdk/releases/download/v0.10.0/LeapModelDownloader.xcframework.zip",
+      checksum: "0c7242e4c91433fc53822d06387d5e77f8891388113200028917e2eac45e36c3"
+    ),
+    .binaryTarget(
+      name: "LeapOpenAIClient",
+      url: "https://github.com/Liquid4All/leap-sdk/releases/download/v0.10.0/LeapOpenAIClient.xcframework.zip",
+      checksum: "4b8b641f5ce97818cbfa23b53bcfdd9361a44e3ae44146effcdabfc5ad6820a5"
+    ),
+    .binaryTarget(
+      name: "LeapUi",
+      url: "https://github.com/Liquid4All/leap-sdk/releases/download/v0.10.0/LeapUi.xcframework.zip",
+      checksum: "9f709bbdf04390f5135c16d191b337c74479491b72783dcd89bfab5ff5afbd59"
+    ),
     ```
 
-    2. Run `pod install`
-    3. Reopen the `.xcworkspace`.
-  </Tab>
-  <Tab title="Manual">
-    1. Download `LeapSDK.xcframework.zip` (and optionally `LeapModelDownloader.xcframework.zip`) from the [GitHub releases](https://github.com/Liquid4All/leap-ios/releases).
-    2. Unzip and drag the XCFramework(s) into Xcode.
-    3. Set the Embed setting to **Embed & Sign** for each framework.
+    Note that the binary target name is `LeapUi` (lowercase `i`) — `import LeapUi` in Swift sources matches the binary-target module name, even though the SPM library product is `LeapUI`.
   </Tab>
 </Tabs>
 
 <Info>
-The constrained-generation macros (`@Generatable`, `@Guide`) ship inside the `LeapSDK` product. No additional package is required.
+The constrained-generation macros (`@Generatable`, `@Guide`) ship in the `LeapSDKMacros` product. Add it to your target alongside `LeapSDK` if you use those macros.
 </Info>
 
 ## Getting and Loading Models[​](#getting-and-loading-models "Direct link to Getting and Loading Models")
@@ -163,7 +197,7 @@ final class ChatViewModel: ObservableObject {
       quantizationSlug: "lfm2-350m-enjp-mt-20250904-8da4w"
     )
     if let model {
-      let downloader = ModelDownloader()
+      let downloader = LeapModelDownloader()
       downloader.requestDownloadModel(model)
       let status = await downloader.queryStatus(model)
       switch status {
diff --git a/deployment/on-device/ios/model-loading.mdx b/deployment/on-device/ios/model-loading.mdx
index 4c56242..8cc16ff 100644
--- a/deployment/on-device/ios/model-loading.mdx
+++ b/deployment/on-device/ios/model-loading.mdx
@@ -37,12 +37,17 @@ public struct Leap {
 
 <br/>
 
-### `ModelDownloader.downloadModel(model:quantization:downloadProgress:)`
+### `LeapModelDownloader.downloadModel(model:quantization:downloadProgress:)`
 
 Download a model from the LEAP Model Library and save it to the local cache, without loading it into memory.
 
 ```swift
-public class ModelDownloader {
+public class LeapModelDownloader {
+  public init(
+    config: LeapDownloaderConfig = LeapDownloaderConfig(),
+    sessionConfiguration: NSURLSessionConfiguration? = nil
+  )
+
   public func downloadModel(
     _ model: String,
     quantization: String,
@@ -51,6 +56,34 @@ public class ModelDownloader {
 }
 ```
 
+<Info>
+**Renamed in v0.10.0.** This class was called `ModelDownloader` in the iOS-only 0.9.x SDK. Update call sites to `LeapModelDownloader` when upgrading.
+</Info>
+
+The optional `sessionConfiguration:` parameter (added in v0.10.0) lets you opt into background downloads using `NSURLSession` — downloads continue when the app is suspended or killed:
+
+```swift
+let backgroundConfig = NSURLSessionConfiguration.backgroundSessionConfiguration(
+    withIdentifier: "com.myapp.leap.downloads"
+)
+let downloader = LeapModelDownloader(sessionConfiguration: backgroundConfig)
+downloader.requestDownloadModel(model: "LFM2-1.2B", quantization: "Q5_K_M")
+```
+
+Wire up the AppDelegate background-events hook so the OS can resume your app on completion:
+
+```swift
+func application(
+    _ application: UIApplication,
+    handleEventsForBackgroundURLSession identifier: String,
+    completionHandler: @escaping () -> Void
+) {
+    downloader.handleBackgroundEvents(completionHandler: completionHandler)
+}
+```
+
+Pass `nil` (the default) for foreground-only downloads.
+
 **Arguments**
 
 | Name | Type | Required | Default | Description |
@@ -73,22 +106,25 @@ public struct DownloadedModelManifest {
 }
 ```
 
-<Accordion title="Legacy: Leap.load(url:options:)">
-  Loads a local model file (either a `.bundle` package or a `.gguf` checkpoint) and returns a `ModelRunner` instance.
+<Accordion title="Loading from a local URL: Leap.load(url:options:)">
+  Loads a local model file (either a `.bundle` package or a `.gguf` checkpoint) and returns a `ModelRunner`. Use this for sideloaded models — anything you ship as an app asset, `adb push` to the device, or download via your own pipeline.
 
   ```swift
   public struct Leap {
     public static func load(
       url: URL,
-      options: LiquidInferenceEngineOptions? = nil
+      options: LiquidInferenceEngineOptions? = nil,
+      autoDetectCompanionFiles: Bool = true
     ) async throws -> ModelRunner
   }
   ```
 
   - Throws `LeapError.modelLoadingFailure` if the file cannot be loaded.
-  - Automatically detects companion files placed alongside your model:
-  - `mmproj-*.gguf` enables multimodal vision tokens for both bundle and GGUF flows.
-  - Audio decoder artifacts whose filename contains "audio" and "decoder" with a `.gguf` or `.bin` extension unlock audio input/output for compatible checkpoints.
+  - `autoDetectCompanionFiles` (added in v0.10.0, defaults to `true`) picks up companion files sitting next to the model:
+    - `mmproj-*.gguf` enables multimodal vision tokens for both bundle and GGUF flows.
+    - Audio decoder artifacts whose filename contains "audio" and "decoder" with a `.gguf` or `.bin` extension unlock audio input/output for compatible checkpoints.
+
+    Set it to `false` if you want to control companion paths explicitly via `LiquidInferenceEngineOptions`.
   - Must be called from an async context (for example inside an `async` function or a `Task`). Keep the returned `ModelRunner` alive while you interact with the model.
 
   ```swift
@@ -99,9 +135,49 @@ public struct DownloadedModelManifest {
   // llama.cpp backend via .gguf
   let ggufURL = Bundle.main.url(forResource: "qwen3-0_6b", withExtension: "gguf")!
   let ggufRunner = try await Leap.load(url: ggufURL)
+
+  // Disable auto-detection if you'll wire companion files manually
+  let options = LiquidInferenceEngineOptions(
+    bundlePath: ggufURL.path,
+    mmProjPath: customMmprojURL.path
+  )
+  let manualRunner = try await Leap.load(url: ggufURL, options: options, autoDetectCompanionFiles: false)
   ```
 </Accordion>
 
+## What's new in 0.10.0
+
+v0.10.0 keeps existing call sites compiling thanks to a Swift compatibility layer, and adds a few ergonomic options:
+
+- **Builder-style options.** Chain `.with(...)` on `GenerationOptions`, `GenerationOptionsCompat`, or `LiquidInferenceEngineOptions` to set parameters one at a time.
+
+  ```swift
+  let opts = GenerationOptions()
+      .with(temperature: 0.7)
+      .with(topP: 0.9)
+      .with(jsonSchema: mySchema)
+  ```
+
+- **Exhaustive `onEnum(of:)` switching.** SKIE generates Swift enums for sealed Kotlin hierarchies (`MessageResponse`, `ChatMessageContent`, `LoadTimeParameters`, `FunctionArg`, `LeapNum`, `LeapFunctionParameterType`). Use `onEnum(of:)` to switch without a `default` case — the compiler errors when a new variant lands.
+
+  ```swift
+  for try await response in conversation.generateResponse(message: message) {
+      switch onEnum(of: response) {
+      case .chunk(let c): print(c.text)
+      case .reasoningChunk(let r): print("[thinking] \(r.reasoning)")
+      case .functionCalls(let f): handleFunctionCalls(f.functionCalls)
+      case .audioSample(let a): playAudio(samples: a.samples, sampleRate: a.sampleRate)
+      case .complete(let c): print("Done: \(c.finishReason)")
+      }
+  }
+  ```
+
+- **`ChatMessageContent` static factories.** Factory methods are now callable directly without `.companion`:
+
+  ```swift
+  let content = ChatMessageContent.fromFloatSamples(samples, sampleRate: 16000)
+  ```
+
 ### `LiquidInferenceEngineOptions`
 
 Pass a `LiquidInferenceEngineOptions` value when you need to override the default runtime configuration.
diff --git a/deployment/on-device/ios/openai-client.mdx b/deployment/on-device/ios/openai-client.mdx
new file mode 100644
index 0000000..2bfc056
--- /dev/null
+++ b/deployment/on-device/ios/openai-client.mdx
@@ -0,0 +1,224 @@
+---
+title: "OpenAI-Compatible Client"
+description: "Lightweight client for OpenAI-compatible chat completions APIs, ideal for hybrid on-device + cloud routing"
+---
+
+`LeapOpenAIClient` (introduced in v0.10.0) is a small, dependency-light client for any OpenAI-compatible chat completions endpoint — OpenAI itself, OpenRouter, vLLM, llama-server, or your own proxy. It ships in the same SPM package as `LeapSDK`, so you can route requests between an on-device LFM and a cloud model from a single app.
+
+## When to use it
+
+- **Hybrid on-device + cloud routing.** Run small / fast models on-device with `LeapSDK`, fall back to a larger cloud model for hard prompts.
+- **Standardised cloud API.** Talk to any OpenAI-compatible backend without pulling in a heavier OpenAI SDK.
+- **Streaming first.** SSE streaming is the only mode — non-streaming requests aren't exposed (set `stream = true`, which is the default).
+
+## Add the dependency
+
+Add the `LeapOpenAIClient` product to your target. See the [Quick Start Guide](./ios-quick-start-guide#install-the-sdk) for the full SPM setup.
+
+```swift
+dependencies: [
+    .package(url: "https://github.com/Liquid4All/leap-sdk.git", from: "0.10.0")
+]
+
+targets: [
+    .target(
+        name: "YourApp",
+        dependencies: [
+            .product(name: "LeapOpenAIClient", package: "leap-sdk"),
+        ]
+    )
+]
+```
+
+In Swift sources, `import LeapOpenAIClient`.
+
+## Basic usage
+
+```swift
+import LeapOpenAIClient
+
+let client = OpenAiClient(
+    config: OpenAiClientConfig(
+        apiKey: "sk-…",
+        baseUrl: "https://api.openai.com/v1"
+    )
+)
+
+let request = ChatCompletionRequest(
+    model: "gpt-4o-mini",
+    messages: [
+        ChatMessage.System(content: "You are a helpful assistant."),
+        ChatMessage.User(content: "What is the capital of Japan?")
+    ],
+    temperature: 0.7
+)
+
+for try await event in client.streamChatCompletion(request: request) {
+    switch onEnum(of: event) {
+    case .delta(let d):
+        print(d.content, terminator: "")
+    case .done(let d):
+        if let usage = d.usage {
+            print("\nTokens: \(usage.totalTokens)")
+        }
+    case .error(let e):
+        print("\nError: \(e.message)")
+    }
+}
+
+client.close()  // closes the underlying URLSession-backed HttpClient
+```
+
+The platform factory `OpenAiClient(config:)` uses the Darwin (URLSession) engine on iOS and macOS — no extra setup needed.
+
+## Configuration
+
+`OpenAiClientConfig` is a Kotlin data class bridged to Swift through SKIE — call its initializer directly from Swift; you don't define a Swift struct yourself.
+
+```swift
+let config = OpenAiClientConfig(
+    apiKey: "sk-…",
+    baseUrl: "https://api.openai.com/v1",      // Default
+    chatCompletionsPath: "/chat/completions",  // Default
+    extraHeaders: [:]                          // Default
+)
+```
+
+| Parameter | Default | Notes |
+|---|---|---|
+| `apiKey` | — (required) | Sent as `Authorization: Bearer <apiKey>`. |
+| `baseUrl` | `https://api.openai.com/v1` | Override for OpenRouter, a self-hosted backend, etc. |
+| `chatCompletionsPath` | `/chat/completions` | Appended to `baseUrl`. |
+| `extraHeaders` | `[:]` | Merged into every request — e.g. OpenRouter's `HTTP-Referer`. |
+
+### Talking to OpenRouter
+
+```swift
+let client = OpenAiClient(
+    config: OpenAiClientConfig(
+        apiKey: "sk-or-…",
+        baseUrl: "https://openrouter.ai/api/v1",
+        extraHeaders: [
+            "HTTP-Referer": "https://yourapp.example.com",
+            "X-Title": "Your App"
+        ]
+    )
+)
+```
+
+### Talking to a self-hosted vLLM / llama-server
+
+```swift
+let client = OpenAiClient(
+    config: OpenAiClientConfig(
+        apiKey: "anything",  // Required by config but typically unused
+        baseUrl: "http://10.0.0.42:8000/v1"
+    )
+)
+```
+
+## Request shape
+
+`ChatCompletionRequest` (also a Kotlin data class bridged via SKIE) covers standard OpenAI fields plus a few OpenRouter-specific extensions. OpenRouter-only fields are silently ignored by stock OpenAI-compatible APIs, so you can leave them in cross-backend code.
+
+```swift
+let request = ChatCompletionRequest(
+    model: "gpt-4o-mini",
+    messages: [ChatMessage.User(content: "Hello.")],
+    temperature: 0.7,
+    maxCompletionTokens: 256
+)
+```
+
+| Parameter | Type | Notes |
+|---|---|---|
+| `model` | `String` | Required. |
+| `messages` | `[ChatMessage]` | Required. |
+| `temperature` | `Double?` | Optional. |
+| `topP` | `Double?` | Optional. |
+| `maxCompletionTokens` | `Int?` | Preferred for newer OpenAI versions. |
+| `maxTokens` | `Int?` | Legacy alias — some custom backends still require it. |
+| `frequencyPenalty` | `Double?` | Optional. |
+| `presencePenalty` | `Double?` | Optional. |
+| `stop` | `[String]?` | Optional. |
+| `stream` | `Bool` | Defaults to `true`. SSE streaming is the only mode currently supported by `streamChatCompletion`. |
+| `topK`, `minP`, `topA`, `repetitionPenalty` | `Int?` / `Double?` | OpenRouter sampling extensions. |
+| `transforms`, `models`, `route` | `[String]?` / `String?` | OpenRouter routing extensions. |
+| `provider` | `ProviderPreferences?` | OpenRouter provider preferences. |
+
+`ChatMessage` is a sealed Kotlin interface (bridged to Swift via SKIE). Use the three concrete cases:
+
+```swift
+ChatMessage.System(content: "Be concise.")
+ChatMessage.User(content: "Hello.")
+ChatMessage.Assistant(content: "Hi there!")
+```
+
+## Response shape
+
+`streamChatCompletion(request:)` returns an `AsyncSequence` of `ChatCompletionEvent`s:
+
+| Event | Meaning |
+|---|---|
+| `.delta(content: String)` | Text chunk from the model. May be empty for role-only deltas. |
+| `.done(usage: Usage?)` | Stream finished. `usage` is non-`nil` when the API includes token counts. |
+| `.error(message: String)` | HTTP error or stream parsing failure. |
+
+`Usage` carries three integer fields: `promptTokens`, `completionTokens`, and `totalTokens`.
+
+## Hybrid routing example
+
+A common pattern: route simple prompts to a small on-device LFM, escalate harder prompts to a cloud model.
+
+```swift
+import LeapSDK
+import LeapOpenAIClient
+
+@MainActor
+final class HybridChatViewModel: ObservableObject {
+    private let onDevice: Conversation
+    private let cloud: OpenAiClient
+
+    init(onDevice: Conversation, cloud: OpenAiClient) {
+        self.onDevice = onDevice
+        self.cloud = cloud
+    }
+
+    func send(_ text: String, useCloud: Bool) async {
+        if useCloud {
+            let request = ChatCompletionRequest(
+                model: "gpt-4o-mini",
+                messages: [ChatMessage.User(content: text)]
+            )
+            for try await event in cloud.streamChatCompletion(request: request) {
+                if case let .delta(d) = onEnum(of: event) {
+                    appendChunk(d.content)
+                }
+            }
+        } else {
+            let userMessage = ChatMessage(role: .user, content: [.text(text)])
+            for try await response in onDevice.generateResponse(message: userMessage) {
+                if case let .chunk(c) = onEnum(of: response) {
+                    appendChunk(c.text)
+                }
+            }
+        }
+    }
+
+    private func appendChunk(_ text: String) { /* … */ }
+}
+```
+
+See [Cloud AI Comparison](./cloud-ai-comparison) for a side-by-side feature breakdown of on-device vs cloud chat APIs.
+
+## Lifecycle
+
+The `OpenAiClient(config:)` factory creates an `HttpClient` internally (using the Darwin / `URLSession` Ktor engine) and ties it to the returned client — call `close()` when you're done, typically in `deinit` of the owning view model:
+
+```swift
+deinit {
+    client.close()
+}
+```
+
+The lower-level constructor that accepts an externally-managed `HttpClient` is part of the Kotlin/Ktor surface and isn't a useful entry point from Swift — the Ktor engine machinery isn't bridged into the public Swift API. Use the platform `OpenAiClient(config:)` factory and let the SDK manage the underlying session. If you need shared-client behaviour, share the `OpenAiClient` instance itself across consumers and call `close()` exactly once in the owning component's teardown.
diff --git a/deployment/on-device/ios/voice-assistant.mdx b/deployment/on-device/ios/voice-assistant.mdx
new file mode 100644
index 0000000..223f527
--- /dev/null
+++ b/deployment/on-device/ios/voice-assistant.mdx
@@ -0,0 +1,237 @@
+---
+title: "Voice Assistant Widget"
+description: "Drop-in voice UI for iOS and macOS, powered by leap-ui's Compose Multiplatform widget"
+---
+
+The `LeapUI` SPM product (introduced in v0.10.0) ships a ready-to-use voice assistant widget — an animated orb, mic button, and status label — backed by a state machine that handles recording, generation, and audio playback. You wire it to a model and it handles the rest.
+
+`leap-ui` is a Compose Multiplatform module, so the same widget runs on:
+
+- **iOS** (this page) — bridged to UIKit via `VoiceAssistantViewController` and exposed to SwiftUI through a `UIViewControllerRepresentable`.
+- **macOS** — bridged to AppKit via `VoiceAssistantNSViewController`. The Swift call sites on this page work unchanged; substitute `NSViewControllerRepresentable` and host with `NSHostingController` if needed.
+- **Android** — see the [Android voice-assistant page](/deployment/on-device/android/voice-assistant).
+- **JVM / desktop** — Compose for Desktop. Same `leap-ui` Maven artifact (`ai.liquid.leap:leap-ui:0.10.0`).
+- **Web (Wasm, experimental)** — present in the source tree but not part of the v0.10.0 stable release notes; treat as preview.
+
+## Add the dependency
+
+Add the `LeapUI` product to your target alongside `LeapSDK`. See the [Quick Start Guide](./ios-quick-start-guide#install-the-sdk) for the full SPM setup.
+
+```swift
+dependencies: [
+    .package(url: "https://github.com/Liquid4All/leap-sdk.git", from: "0.10.0")
+]
+
+targets: [
+    .target(
+        name: "YourApp",
+        dependencies: [
+            .product(name: "LeapSDK", package: "leap-sdk"),
+            .product(name: "LeapUI", package: "leap-sdk"),
+        ]
+    )
+]
+```
+
+In Swift sources, `import LeapUi` (lowercase `i` — the binary-target module name).
+
+## Architecture
+
+```
+VoiceAssistantWidget (Compose UI)
+        ↓ intents
+VoiceAssistantStore  (state machine: IDLE → LISTENING → RESPONDING → IDLE)
+        ↓ uses
+VoiceAudioRecorder + VoiceAudioPlayer + VoiceConversation
+```
+
+- **`VoiceAssistantStore`** owns the session lifecycle. You instantiate it once when the screen appears and `close()` it when it goes away.
+- **`VoiceConversation`** is a thin protocol you implement to bridge the store to your model. The shipped demo uses a small `AppleVoiceConversation` adapter that wraps a normal `Conversation` from LeapSDK.
+- **Audio I/O** is provided by `AppleAudioRecorder` and `AppleAudioPlayer` (defaults), which you can swap out for custom implementations of `VoiceAudioRecorder` / `VoiceAudioPlayer`.
+
+## Quick wiring with `makeForApple()`
+
+The factory below hides Kotlin coroutine plumbing from Swift callers. It creates the store with a `MainScope()`, the default Apple audio recorder and player, and an EMA-smoothed amplitude.
+
+```swift
+import LeapSDK
+import LeapUi
+
+@MainActor
+final class VoiceAssistantViewModel: ObservableObject {
+    let store: VoiceAssistantStore
+
+    init() {
+        // Defaults: AppleAudioRecorder, AppleAudioPlayer, MainScope, interruptToSpeak = true
+        store = VoiceAssistantStore.makeForApple()
+    }
+
+    deinit {
+        store.close()
+    }
+
+    func loadModel() async {
+        do {
+            let runner = try await Leap.load(
+                model: "LFM2.5-Audio-1.5B",
+                quantization: "Q4_0"
+            ) { fraction, _ in
+                // Drive the orb's status text from download progress
+                Task { @MainActor in
+                    self.store.setModelProgress(
+                        fraction: Float(fraction),
+                        message: "Downloading (\(Int(fraction * 100))%)"
+                    )
+                }
+            }
+            let conversation = runner.createConversation(
+                systemPrompt: "Respond with interleaved text and audio."
+            )
+            store.setConversation(conv: AppleVoiceConversation(conversation: conversation))
+        } catch {
+            store.setModelError(message: "✗ \(error.localizedDescription)")
+        }
+    }
+}
+```
+
+### Customising the factory
+
+`makeForApple` exposes the same constructor parameters as `VoiceAssistantStore` itself:
+
+```swift
+let store = VoiceAssistantStore.makeForApple(
+    recorder: myCustomRecorder,
+    player: myCustomPlayer,
+    smoothingAlpha: 0.3,
+    playbackTimeoutMs: 10_000,
+    interruptToSpeak: false  // Press during a response only cancels — doesn't immediately re-record
+)
+```
+
+`interruptToSpeak` (new in v0.10.0) controls what happens when the user presses the orb while a response is being generated:
+
+- `true` (default) — cancels the in-flight generation **and** immediately starts a new recording.
+- `false` — only cancels. The user must press again to start a new recording.
+
+## Hosting the widget in SwiftUI
+
+`LeapUi` ships a `VoiceAssistantViewController` (UIKit) and `VoiceAssistantNSViewController` (AppKit). Wrap one with the matching `Representable` to drop it into a SwiftUI view tree.
+
+```swift
+import LeapUi
+import SwiftUI
+
+struct VoiceAssistantScreen: View {
+    @StateObject private var viewModel = VoiceAssistantViewModel()
+
+    var body: some View {
+        VoiceWidgetRepresentable(store: viewModel.store)
+            .background(Color.black)
+            .ignoresSafeArea()
+            .task { await viewModel.loadModel() }
+    }
+}
+
+private struct VoiceWidgetRepresentable: UIViewControllerRepresentable {
+    let store: VoiceAssistantStore
+
+    func makeUIViewController(context: Context) -> UIViewController {
+        VoiceAssistantViewControllerKt.VoiceAssistantViewController(
+            state: store.widgetStateHolder,
+            onIntent: { intent in store.processIntent(intent: intent) },
+            labels: VoiceWidgetLabels(
+                idle: "Tap and hold to speak",
+                listening: "Listening",
+                responding: "Generating",
+                micStartDescription: "Start recording",
+                micStopDescription: "Stop recording",
+                micCancelDescription: "Cancel recording"
+            ),
+            colors: VoiceWidgetColors.companion.Default,
+            showPoweredBy: true
+        )
+    }
+
+    func updateUIViewController(_ uiViewController: UIViewController, context: Context) {}
+}
+```
+
+## Implementing `VoiceConversation`
+
+The store calls into a `VoiceConversation` you provide. A minimal adapter that forwards to a LeapSDK `Conversation` looks like this:
+
+```swift
+import LeapSDK
+import LeapUi
+
+final class AppleVoiceConversation: VoiceConversation {
+    private let conversation: Conversation
+
+    init(conversation: Conversation) {
+        self.conversation = conversation
+    }
+
+    func generateResponse(
+        audioSamples: [Float],
+        sampleRate: Int32,
+        onAudioChunk: @escaping (_ samples: [Float], _ sampleRate: Int32) -> Void
+    ) async throws -> GenerationStats? {
+        let userMessage = ChatMessage(
+            role: .user,
+            content: [ChatMessageContent.fromFloatSamples(audioSamples, sampleRate: Int(sampleRate))]
+        )
+
+        var stats: GenerationStats?
+        for try await response in conversation.generateResponse(message: userMessage) {
+            switch onEnum(of: response) {
+            case .audioSample(let chunk):
+                onAudioChunk(chunk.samples, Int32(chunk.sampleRate))
+            case .complete(let c):
+                stats = c.stats
+            case .chunk, .reasoningChunk, .functionCalls:
+                break
+            }
+        }
+        return stats
+    }
+
+    func reset() -> VoiceConversation {
+        AppleVoiceConversation(conversation: conversation.modelRunner.createConversation())
+    }
+}
+```
+
+## Audio session
+
+iOS apps using the widget need to configure `AVAudioSession` for record + playback before the model starts streaming audio:
+
+```swift
+import AVFoundation
+
+let session = AVAudioSession.sharedInstance()
+try session.setCategory(.playAndRecord, mode: .default, options: [.defaultToSpeaker])
+try session.setActive(true)
+session.requestRecordPermission { _ in }
+```
+
+Add `NSMicrophoneUsageDescription` to your `Info.plist` so the OS can show the permission prompt.
+
+## What's in the `LeapUi` module
+
+| Symbol | Purpose |
+|---|---|
+| `VoiceAssistantStore` | State machine + orchestrator. Instantiate via `makeForApple()`. |
+| `VoiceAssistantStateHolder` | Compose-friendly state container surfaced to Swift. |
+| `VoiceAssistantViewController` (UIKit) | Pre-built controller hosting the Compose widget. |
+| `VoiceAssistantNSViewController` (AppKit) | macOS variant of the above. |
+| `VoiceAssistantWidget` (Compose) | Underlying widget — useful if you have your own Compose layer. |
+| `AppleAudioRecorder` / `AppleAudioPlayer` | Default audio I/O. Implement `VoiceAudioRecorder` / `VoiceAudioPlayer` to substitute. |
+| `VoiceConversation` | Protocol you implement to bridge the store to a model. |
+| `VoiceWidgetLabels`, `VoiceWidgetColors` | Theming. Uses `companion.Default` to access the canonical palette. |
+
+## Compatible models
+
+Voice mode requires a model that emits audio output. The shipped demo uses `LFM2.5-Audio-1.5B` at the `Q4_0` quantization, which streams interleaved text and audio when prompted with a system message like *"Respond with interleaved text and audio."*
+
+See the [LEAP Model Library](https://leap.liquid.ai/models) for other audio-capable models.
diff --git a/docs.json b/docs.json
index 1bb071b..a51294a 100644
--- a/docs.json
+++ b/docs.json
@@ -146,7 +146,9 @@
                   "deployment/on-device/ios/utilities",
                   "deployment/on-device/ios/cloud-ai-comparison",
                   "deployment/on-device/ios/constrained-generation",
-                  "deployment/on-device/ios/function-calling"
+                  "deployment/on-device/ios/function-calling",
+                  "deployment/on-device/ios/voice-assistant",
+                  "deployment/on-device/ios/openai-client"
                 ]
               },
               {
@@ -162,7 +164,9 @@
                   "deployment/on-device/android/utilities",
                   "deployment/on-device/android/cloud-ai-comparison",
                   "deployment/on-device/android/constrained-generation",
-                  "deployment/on-device/android/function-calling"
+                  "deployment/on-device/android/function-calling",
+                  "deployment/on-device/android/voice-assistant",
+                  "deployment/on-device/android/openai-client"
                 ]
               },
               "deployment/on-device/llama-cpp",
diff --git a/examples/android/leap-koog-agent.mdx b/examples/android/leap-koog-agent.mdx
index 960b1cd..c9b4527 100644
--- a/examples/android/leap-koog-agent.mdx
+++ b/examples/android/leap-koog-agent.mdx
@@ -107,8 +107,8 @@ Before running this example, ensure you have the following:
 
   ```kotlin
   dependencies {
-      // LeapSDK for on-device AI (0.9.7+)
-      implementation("ai.liquid.leap:leap-sdk:0.9.7")
+      // LeapSDK for on-device AI (0.10.0+)
+      implementation("ai.liquid.leap:leap-sdk:0.10.0")
 
       // Koog framework for AI agents
       implementation("ai.anthropic:koog-core:0.1.0")
diff --git a/examples/android/recipe-generator-constrained-output.mdx b/examples/android/recipe-generator-constrained-output.mdx
index c3f1560..ccb64c9 100644
--- a/examples/android/recipe-generator-constrained-output.mdx
+++ b/examples/android/recipe-generator-constrained-output.mdx
@@ -66,12 +66,12 @@ Before running this example, ensure you have the following:
   - **Minimum SDK**: API 24 (Android 7.0)
   - **Target SDK**: API 34 or higher
   - **Kotlin**: 1.9.0 or higher
-  - **LeapSDK**: 0.9.4 or higher
+  - **LeapSDK**: 0.10.0 or higher
   - **Internet connectivity**: Required for first-time model download
 </Accordion>
 
 <Accordion title="Model Setup - Automatic Download">
-  This example uses **LeapSDK 0.9.4+** with automatic model downloading capabilities.
+  This example uses **LeapSDK 0.10.0+** with automatic model downloading capabilities.
 
   **Automatic Model Management**
 
@@ -106,8 +106,8 @@ Before running this example, ensure you have the following:
 
   ```kotlin
   dependencies {
-      // LeapSDK for constrained generation (0.9.4+)
-      implementation("ai.liquid.leap:leap-sdk:0.9.7")
+      // LeapSDK for constrained generation (0.10.0+)
+      implementation("ai.liquid.leap:leap-sdk:0.10.0")
 
       // Kotlin serialization for type-safe parsing
       implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.0")
@@ -148,7 +148,7 @@ Follow these steps to generate structured recipes:
 
 3. **Gradle sync**
    - Wait for Gradle to sync all dependencies
-   - Ensure LeapSDK 0.9.7 is downloaded
+   - Ensure LeapSDK 0.10.0 is downloaded
 
 4. **Run the app**
    - Connect your Android device or start an emulator
@@ -212,7 +212,7 @@ class MainActivityViewModel : ViewModel() {
 
     fun initializeModel() {
         viewModelScope.launch {
-            // Download and load the model automatically (LeapSDK 0.9.4+)
+            // Download and load the model automatically (LeapSDK 0.10.0+)
             model = LeapDownloader.downloadAndLoadModel(
                 modelName = "lfm2-700m",
                 onProgress = { progress ->
@@ -314,7 +314,7 @@ fun generateRecipe(userInput: String) {
 
 ### Alternative: Using @Generatable Annotation
 
-LeapSDK 0.9.4+ provides the `@Generatable` annotation for simplified structured output:
+LeapSDK 0.10.0+ provides the `@Generatable` annotation for simplified structured output:
 
 ```kotlin
 @Generatable
diff --git a/examples/android/slogan-generator.mdx b/examples/android/slogan-generator.mdx
index 9c53d99..a19ad40 100644
--- a/examples/android/slogan-generator.mdx
+++ b/examples/android/slogan-generator.mdx
@@ -47,7 +47,7 @@ Before running this example, ensure you have the following:
 
   ```kotlin
   dependencies {
-      implementation("ai.liquid.leap:leap-sdk:0.9.7")
+      implementation("ai.liquid.leap:leap-sdk:0.10.0")
 
       // Android UI components
       implementation("androidx.appcompat:appcompat:1.6.1")
diff --git a/examples/android/vision-language-model-example.mdx b/examples/android/vision-language-model-example.mdx
index bd72b92..baf0b2a 100644
--- a/examples/android/vision-language-model-example.mdx
+++ b/examples/android/vision-language-model-example.mdx
@@ -112,8 +112,8 @@ Before running this example, ensure you have the following:
 
   ```kotlin
   dependencies {
-      // LeapSDK for VLM processing (0.9.7+)
-      implementation("ai.liquid.leap:leap-sdk:0.9.7")
+      // LeapSDK for VLM processing (0.10.0+)
+      implementation("ai.liquid.leap:leap-sdk:0.10.0")
 
       // Coil for image loading
       implementation("io.coil-kt:coil-compose:2.5.0")
diff --git a/examples/android/web-content-summarizer.mdx b/examples/android/web-content-summarizer.mdx
index 2430ddb..2b628a5 100644
--- a/examples/android/web-content-summarizer.mdx
+++ b/examples/android/web-content-summarizer.mdx
@@ -60,8 +60,8 @@ Before running this example, ensure you have the following:
 
   ```kotlin
   dependencies {
-      // LeapSDK for AI processing (0.9.7+)
-      implementation("ai.liquid.leap:leap-sdk:0.9.7")
+      // LeapSDK for AI processing (0.10.0+)
+      implementation("ai.liquid.leap:leap-sdk:0.10.0")
 
       // Networking for web scraping
       implementation("com.squareup.okhttp3:okhttp:4.12.0")