@@ -159,7 +159,7 @@ jobs:
159159 repo : " google"
160160 name : " gemma-3-4b-it"
161161 quant : " quantized-int4-weight-only"
162- # Voxtral Realtime only supports int4-tile-packed on CUDA (offline mode)
162+ # Voxtral Realtime only supports int4-tile-packed on CUDA
163163 - model :
164164 repo : " mistralai"
165165 name : " Voxtral-Mini-4B-Realtime-2602"
@@ -197,12 +197,7 @@ jobs:
197197 echo "::endgroup::"
198198 fi
199199
200- # Voxtral Realtime uses offline mode for CUDA CI (not streaming)
201- VR_MODE=""
202- if [ "${{ matrix.model.name }}" = "Voxtral-Mini-4B-Realtime-2602" ]; then
203- VR_MODE="vr-offline"
204- fi
205- source .ci/scripts/export_model_artifact.sh cuda "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}" "$VR_MODE"
200+ source .ci/scripts/export_model_artifact.sh cuda "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}"
206201
207202 test-model-cuda-e2e :
208203 name : test-model-cuda-e2e
@@ -237,7 +232,7 @@ jobs:
237232 repo : " google"
238233 name : " gemma-3-4b-it"
239234 quant : " quantized-int4-weight-only"
240- # Voxtral Realtime only supports int4-tile-packed on CUDA (offline mode)
235+ # Voxtral Realtime only supports int4-tile-packed on CUDA
241236 - model :
242237 repo : " mistralai"
243238 name : " Voxtral-Mini-4B-Realtime-2602"
@@ -256,12 +251,7 @@ jobs:
256251 download-artifact : ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-${{ matrix.quant }}
257252 ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
258253 script : |
259- # Voxtral Realtime uses offline mode for CUDA CI (not streaming)
260- VR_MODE=""
261- if [ "${{ matrix.model.name }}" = "Voxtral-Mini-4B-Realtime-2602" ]; then
262- VR_MODE="vr-offline"
263- fi
264- source .ci/scripts/test_model_e2e.sh cuda "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}" "$VR_MODE"
254+ source .ci/scripts/test_model_e2e.sh cuda "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}"
265255
266256 test-cuda-pybind :
267257 name : test-cuda-pybind
0 commit comments