From e3dc8f6db2a1a0b4781451324736a77fcce58322 Mon Sep 17 00:00:00 2001 From: James Deck Date: Sun, 29 Mar 2026 11:27:05 +1000 Subject: [PATCH] fix(bidi): Gemini 3.1 Flash Live compatibility for BidiGeminiLiveModel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes for Gemini 3.1 Flash Live (`gemini-3.1-flash-live-preview`): 1. Route text input through `send_realtime_input` instead of `send_client_content`. Gemini 3.1 only accepts `send_client_content` for seeding initial context history — mid-session text causes error 1007. `send_realtime_input(text=...)` is compatible with both 2.5 and 3.1 models. 2. Only include `session_resumption` in live config when a handle is provided. Sending `{"handle": null}` causes Gemini 3.1 to reject the session with error 1007. 3. Default `api_version` from `v1alpha` to `v1beta`. `v1alpha` is not a documented Gemini Developer API version; `v1beta` is the correct endpoint for the Live API. Closes #1999 --- .../experimental/bidi/models/gemini_live.py | 18 +++++++++++------- .../bidi/models/test_gemini_live.py | 11 +++++------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/strands/experimental/bidi/models/gemini_live.py b/src/strands/experimental/bidi/models/gemini_live.py index 88d7f5a0c..67150520c 100644 --- a/src/strands/experimental/bidi/models/gemini_live.py +++ b/src/strands/experimental/bidi/models/gemini_live.py @@ -102,7 +102,7 @@ def _resolve_client_config(self, config: dict[str, Any]) -> dict[str, Any]: # Set default http_options if not provided if "http_options" not in resolved: - resolved["http_options"] = {"api_version": "v1alpha"} + resolved["http_options"] = {"api_version": "v1beta"} return resolved @@ -429,12 +429,14 @@ async def _send_image_content(self, image_input: BidiImageInputEvent) -> None: await self._live_session.send(input=msg) async def _send_text_content(self, text: str) -> None: - """Internal: Send text content using Gemini Live API.""" - # Create content with text - content = genai_types.Content(role="user", parts=[genai_types.Part(text=text)]) + """Internal: Send text content using Gemini Live API. - # Send as client content - await self._live_session.send_client_content(turns=content) + Uses send_realtime_input for text delivery. Gemini 3.1+ models only + accept send_client_content for seeding initial context history — mid-session + text must go through send_realtime_input. This path is also compatible with + Gemini 2.5 models. + """ + await self._live_session.send_realtime_input(text=text) async def _send_tool_result(self, tool_result: ToolResult) -> None: """Internal: Send tool result using Gemini Live API.""" @@ -491,7 +493,9 @@ def _build_live_config( """ config_dict: dict[str, Any] = self.config["inference"].copy() - config_dict["session_resumption"] = {"handle": kwargs.get("live_session_handle")} + live_session_handle = kwargs.get("live_session_handle") + if live_session_handle is not None: + config_dict["session_resumption"] = {"handle": live_session_handle} # Add system instruction if provided if system_prompt: diff --git a/tests/strands/experimental/bidi/models/test_gemini_live.py b/tests/strands/experimental/bidi/models/test_gemini_live.py index 3a9d7e3dc..0b0cd160f 100644 --- a/tests/strands/experimental/bidi/models/test_gemini_live.py +++ b/tests/strands/experimental/bidi/models/test_gemini_live.py @@ -198,14 +198,13 @@ async def test_send_all_content_types(mock_genai_client, model): _, mock_live_session, _ = mock_genai_client await model.start() - # Test text input + # Test text input — routed through send_realtime_input (Gemini 3.1 compatible) text_input = BidiTextInputEvent(text="Hello", role="user") await model.send(text_input) - mock_live_session.send_client_content.assert_called_once() - call_args = mock_live_session.send_client_content.call_args - content = call_args.kwargs.get("turns") - assert content.role == "user" - assert content.parts[0].text == "Hello" + mock_live_session.send_realtime_input.assert_called_with(text="Hello") + + # Reset for next assertion + mock_live_session.send_realtime_input.reset_mock() # Test audio input (base64 encoded) audio_b64 = base64.b64encode(b"audio_bytes").decode("utf-8")