|
49 | 49 | from ...tools.tool_context import ToolContext |
50 | 50 | from ...utils.context_utils import Aclosing |
51 | 51 | from .audio_cache_manager import AudioCacheManager |
52 | | -from .transcription_manager import TranscriptionManager |
53 | 52 |
|
54 | 53 | if TYPE_CHECKING: |
55 | 54 | from ...agents.llm_agent import LlmAgent |
@@ -82,7 +81,6 @@ def __init__(self): |
82 | 81 |
|
83 | 82 | # Initialize configuration and managers |
84 | 83 | self.audio_cache_manager = AudioCacheManager() |
85 | | - self.transcription_manager = TranscriptionManager() |
86 | 84 |
|
87 | 85 | async def run_live( |
88 | 86 | self, |
@@ -246,16 +244,6 @@ async def _send_to_model( |
246 | 244 | elif live_request.activity_end: |
247 | 245 | await llm_connection.send_realtime(types.ActivityEnd()) |
248 | 246 | elif live_request.blob: |
249 | | - # Cache audio data here for transcription |
250 | | - if not invocation_context.transcription_cache: |
251 | | - invocation_context.transcription_cache = [] |
252 | | - if not invocation_context.run_config.input_audio_transcription: |
253 | | - # if the live model's input transcription is not enabled, then |
254 | | - # we use our own audio transcriber to achieve that. |
255 | | - invocation_context.transcription_cache.append( |
256 | | - TranscriptionEntry(role='user', data=live_request.blob) |
257 | | - ) |
258 | | - |
259 | 247 | # Cache input audio chunks before flushing |
260 | 248 | self.audio_cache_manager.cache_audio( |
261 | 249 | invocation_context, live_request.blob, cache_type='input' |
@@ -324,7 +312,7 @@ def get_author_for_event(llm_response): |
324 | 312 | # Cache output audio chunks from model responses |
325 | 313 | # TODO: support video data |
326 | 314 | if ( |
327 | | - invocation_context.run_config.save_live_audio |
| 315 | + invocation_context.run_config.save_live_blob |
328 | 316 | and event.content |
329 | 317 | and event.content.parts |
330 | 318 | and event.content.parts[0].inline_data |
@@ -603,14 +591,13 @@ async def _postprocess_live( |
603 | 591 | return |
604 | 592 |
|
605 | 593 | # Flush audio caches based on control events using configurable settings |
606 | | - if invocation_context.run_config.save_live_audio: |
607 | | - _handle_control_event_flush_event = ( |
608 | | - await self._handle_control_event_flush( |
609 | | - invocation_context, llm_response |
610 | | - ) |
| 594 | + if invocation_context.run_config.save_live_blob: |
| 595 | + flushed_events = await self._handle_control_event_flush( |
| 596 | + invocation_context, llm_response |
611 | 597 | ) |
612 | | - if _handle_control_event_flush_event: |
613 | | - yield _handle_control_event_flush_event |
| 598 | + for event in flushed_events: |
| 599 | + yield event |
| 600 | + if flushed_events: |
614 | 601 | return |
615 | 602 |
|
616 | 603 | # Builds the event. |
@@ -925,12 +912,15 @@ def _finalize_model_response_event( |
925 | 912 |
|
926 | 913 | async def _handle_control_event_flush( |
927 | 914 | self, invocation_context: InvocationContext, llm_response: LlmResponse |
928 | | - ) -> None: |
| 915 | + ) -> list[Event]: |
929 | 916 | """Handle audio cache flushing based on control events. |
930 | 917 |
|
931 | 918 | Args: |
932 | 919 | invocation_context: The invocation context containing audio caches. |
933 | 920 | llm_response: The LLM response containing control event information. |
| 921 | +
|
| 922 | + Returns: |
| 923 | + A list of Event objects created from the flushed caches. |
934 | 924 | """ |
935 | 925 |
|
936 | 926 | # Log cache statistics if enabled |
@@ -959,6 +949,7 @@ async def _handle_control_event_flush( |
959 | 949 | flush_user_audio=False, |
960 | 950 | flush_model_audio=True, |
961 | 951 | ) |
| 952 | + return [] |
962 | 953 |
|
963 | 954 | async def _run_and_handle_error( |
964 | 955 | self, |
|
0 commit comments