From 67f24ce39702b91c607c55b5175b62e32c49089a Mon Sep 17 00:00:00 2001 From: Rajas Bansal Date: Thu, 30 Apr 2026 20:23:58 -0700 Subject: [PATCH] add the pronunciation dict --- openapi.yaml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/openapi.yaml b/openapi.yaml index d446f7c..2cf5f37 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -3866,12 +3866,15 @@ paths: - Parameters: Sent as query parameters (model, voice, max_partial_length, language) **Client Events:** - - `tts_session.updated`: Update session parameters like voice + - `tts_session.updated`: Update session parameters like voice. The `session` object also accepts an `extra_params` field for additional model-specific parameters that fine-tune speech generation behavior, such as `pronunciation_dict` (a list of pronunciation rules for specific characters or symbols, where each entry uses the format `"/"` (e.g., `["omg/oh my god"]`) to override how the model pronounces matching tokens). ```json { "type": "tts_session.updated", "session": { - "voice": "tara" + "voice": "tara", + "extra_params": { + "pronunciation_dict": ["omg/oh my god"] + } } } ``` @@ -10128,6 +10131,16 @@ components: type: boolean default: false description: 'If true, output is streamed for several characters at a time instead of waiting for the full response. The stream terminates with `data: [DONE]`. If false, return the encoded audio as octet stream' + extra_params: + type: object + description: Additional model-specific parameters that fine-tune speech generation behavior. + properties: + pronunciation_dict: + type: array + items: + type: string + description: A list of pronunciation rules for specific characters or symbols. Each entry uses the format `"/"` (e.g., `["omg/oh my god"]`) to override how the model pronounces matching tokens. + example: ["omg/oh my god"] AudioTranscriptionRequest: type: object