From 22b5f36f127835e50acec7fe2f4f5eef00dfe407 Mon Sep 17 00:00:00 2001 From: Rishabh Bhargava Date: Thu, 7 May 2026 14:05:29 -0700 Subject: [PATCH] docs(audio): broaden supported file formats to include ogg, opus, aac MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verified end-to-end on api.together.ai for both /audio/transcriptions and /audio/translations across nvidia/parakeet-tdt-0.6b-v3, openai/whisper-large-v3, and mistralai/Voxtral-Mini-3B-2507: HTTP 200 + correct LibriSpeech transcript on .ogg, .opus, and .aac. The decoder is shared across STT models, so the broader format set was already supported in practice — just not documented. Surfaced via the STT feature-test framework (formats.unsupported test on together-voice repo). --- openapi.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openapi.yaml b/openapi.yaml index 2cf5f37..121421c 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -10151,7 +10151,7 @@ components: oneOf: - $ref: '#/components/schemas/AudioFileBinary' - $ref: '#/components/schemas/AudioFileUrl' - description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac. + description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac, .ogg, .opus, .aac. model: type: string description: Model to use for transcription @@ -10386,7 +10386,7 @@ components: - type: string format: uri description: Public HTTP/HTTPS URL to audio file - description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac. + description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac, .ogg, .opus, .aac. model: type: string description: Model to use for translation