GoogleCloudPlatform · msampathkumar · Aug 6, 2025 · Jul 28, 2025 · Jul 28, 2025 · Jul 28, 2025
diff --git a/genai/live/live_audio_with_txt.py b/genai/live/live_audio_with_txt.py
@@ -0,0 +1,88 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Test file: https://storage.googleapis.com/generativeai-downloads/data/16000.wav
+# Install helpers for converting files: pip install librosa soundfile
+
+import asyncio
+
+
+async def generate_content() -> None:
+    # [START googlegenaisdk_live_audio_with_txt]
+    import numpy as np
+    from IPython.display import Audio, display
+    from google import genai
+    from google.genai.types import (
+        Content,
+        LiveConnectConfig,
+        Modality,
+        Part,
+        SpeechConfig,
+        VoiceConfig,
+        PrebuiltVoiceConfig,
+    )
+
+    client = genai.Client()
+    voice_name = "Aoede"
+    model = "gemini-2.0-flash-live-preview-04-09"
+
+    config = LiveConnectConfig(
+        response_modalities=[Modality.AUDIO],
+        speech_config=SpeechConfig(
+            voice_config=VoiceConfig(
+                prebuilt_voice_config=PrebuiltVoiceConfig(
+                    voice_name=voice_name,
+                )
+            ),
+        ),
+    )
+
+    async with client.aio.live.connect(
+        model=model,
+        config=config,
+    ) as session:
+        text_input = "Hello? Gemini are you there?"
+        print("> ", text_input, "\n")
+
+        await session.send_client_content(
+            turns=Content(role="user", parts=[Part(text=text_input)])
+        )
+
+        audio_data = []
+        async for message in session.receive():
+            if (
+                message.server_content.model_turn
+                and message.server_content.model_turn.parts
+            ):
+                for part in message.server_content.model_turn.parts:
+                    if part.inline_data:
+                        audio_data.append(
+                            np.frombuffer(part.inline_data.data, dtype=np.int16)
+                        )
+
+        if audio_data:
+            print("Received audio answer: ")
+            display(Audio(np.concatenate(audio_data), rate=24000, autoplay=True))
+
+    # Example output:
+    # >  Hello? Gemini are you there?
+    # Received audio answer:
+    # <IPython.lib.display.Audio object>
+    # [STOP googlegenaisdk_live_audio_with_txt]
+    return None
+
+
+if __name__ == "__main__":
+    asyncio.run(generate_content())
@@ -0,0 +1,68 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+
+
+async def generate_content() -> list[str]:
+    # [START googlegenaisdk_live_code_exec_with_txt]
+    from google import genai
+    from google.genai.types import (
+        LiveConnectConfig,
+        Modality,
+        Tool,
+        ToolCodeExecution,
+        Content,
+        Part,
+    )
+
+    client = genai.Client()
+    model_id = "gemini-2.0-flash-live-preview-04-09"
+    config = LiveConnectConfig(
+        response_modalities=[Modality.TEXT],
+        tools=[Tool(code_execution=ToolCodeExecution())],
+    )
+    async with client.aio.live.connect(model=model_id, config=config) as session:
+        text_input = "Compute the largest prime palindrome under 10"
+        print("> ", text_input, "\n")
+        await session.send_client_content(
+            turns=Content(role="user", parts=[Part(text=text_input)])
+        )
+
+        response = []
+
+        async for chunk in session.receive():
+            if chunk.server_content:
+                if chunk.text is not None:
+                    response.append(chunk.text)
+
+                model_turn = chunk.server_content.model_turn
+                if model_turn:
+                    for part in model_turn.parts:
+                        if part.executable_code is not None:
+                            print(part.executable_code.code)
+
+                        if part.code_execution_result is not None:
+                            print(part.code_execution_result.output)
+
+    print("".join(response))
+    # Example output:
+    # > Compute the largest prime palindrome under 10
+    # Final Answer: The final answer is $\boxed{7}$
+    # [END googlegenaisdk_live_code_exec_with_txt]
+    return response
+
+
+if __name__ == "__main__":
+    asyncio.run(generate_content())
@@ -0,0 +1,78 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+from google.genai.types import FunctionResponse
+
+
+async def generate_content() -> list[FunctionResponse]:
+    # [START googlegenaisdk_live_func_call_with_txt]
+    from google import genai
+    from google.genai.types import (
+        LiveConnectConfig,
+        Modality,
+        Tool,
+        FunctionDeclaration,
+        FunctionResponse,
+        Content,
+        Part,
+    )
+
+    client = genai.Client()
+    model_id = "gemini-2.0-flash-live-preview-04-09"
+
+    turn_on_the_lights = FunctionDeclaration(name="turn_on_the_lights")
+    turn_off_the_lights = FunctionDeclaration(name="turn_off_the_lights")
+
+    config = LiveConnectConfig(
+        response_modalities=[Modality.TEXT],
+        tools=[Tool(function_declarations=[turn_on_the_lights, turn_off_the_lights])],
+    )
+    async with client.aio.live.connect(model=model_id, config=config) as session:
+        text_input = "Turn on the lights please"
+        print("> ", text_input, "\n")
+        await session.send_client_content(
+            turns=Content(role="user", parts=[Part(text=text_input)])
+        )
+
+        function_responses = []
+
+        async for chunk in session.receive():
+            if chunk.server_content:
+                if chunk.text is not None:
+                    print(chunk.text)
+
+            elif chunk.tool_call:
+
+                for fc in chunk.tool_call.function_calls:
+                    function_response = FunctionResponse(
+                        name=fc.name,
+                        response={
+                            "result": "ok"
+                        },  # simple, hard-coded function response
+                    )
+                    function_responses.append(function_response)
+                    print(function_response.response["result"])
+
+                await session.send_tool_response(function_responses=function_responses)
+
+    # Example output:
+    # >  Turn on the lights please
+    # ok
+    # [END googlegenaisdk_live_func_call_with_txt]
+    return function_responses
+
+
+if __name__ == "__main__":
+    asyncio.run(generate_content())
@@ -0,0 +1,69 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import asyncio
+
+
+async def generate_content() -> list[str]:
+    # [START googlegenaisdk_live_ground_googsearch_with_txt]
+    from google import genai
+    from google.genai.types import (
+        LiveConnectConfig,
+        Modality,
+        Tool,
+        GoogleSearch,
+        Content,
+        Part,
+    )
+
+    client = genai.Client()
+    model_id = "gemini-2.0-flash-live-preview-04-09"
+    config = LiveConnectConfig(
+        response_modalities=[Modality.TEXT],
+        tools=[Tool(google_search=GoogleSearch())],
+    )
+    async with client.aio.live.connect(model=model_id, config=config) as session:
+        text_input = "When did the last Brazil vs. Argentina soccer match happen?"
+        await session.send_client_content(
+            turns=Content(role="user", parts=[Part(text=text_input)])
+        )
+
+        response = []
+
+        async for chunk in session.receive():
+            if chunk.server_content:
+                if chunk.text is not None:
+                    response.append(chunk.text)
+
+                # The model might generate and execute Python code to use Search
+                model_turn = chunk.server_content.model_turn
+                if model_turn:
+                    for part in model_turn.parts:
+                        if part.executable_code is not None:
+                            print(part.executable_code.code)
+
+                        if part.code_execution_result is not None:
+                            print(part.code_execution_result.output)
+
+    print("".join(response))
+    # Example output:
+    # > When did the last Brazil vs. Argentina soccer match happen?
+    # The last Brazil vs. Argentina soccer match was on March 25, 2025, a 2026 World Cup qualifier, where Argentina defeated Brazil 4-1.
+    # [END googlegenaisdk_live_ground_googsearch_with_txt]
+    return response
+
+
+if __name__ == "__main__":
+    asyncio.run(generate_content())
@@ -0,0 +1,90 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from http.client import responses
+
+
+# Test file: https://storage.googleapis.com/generativeai-downloads/data/16000.wav
+# Install helpers for converting files: pip install librosa soundfile
+
+from pydantic import BaseModel
+
+
+class CalendarEvent(BaseModel):
+    name: str
+    date: str
+    participants: list[str]
+
+
+def generate_content() -> CalendarEvent:
+    # [START googlegenaisdk_live_structured_ouput_with_txt]
+    import os
+    import openai
+    from google.auth import default
+    import google.auth.transport.requests
+    from openai.types.chat import (
+        ChatCompletionSystemMessageParam,
+        ChatCompletionUserMessageParam,
+    )
+
+    project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
+    location = "us-central1"
+
+    # Programmatically get an access token
+    credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
+    credentials.refresh(google.auth.transport.requests.Request())
+    # Note: the credential lives for 1 hour by default (https://cloud.google.com/docs/authentication/token-types#at-lifetime); after expiration, it must be refreshed.
+
+    ##############################
+    # Choose one of the following:
+    ##############################
+
+    # If you are calling a Gemini model, set the ENDPOINT_ID variable to use openapi.
+    ENDPOINT_ID = "openapi"
+
+    # If you are calling a self-deployed model from Model Garden, set the
+    # ENDPOINT_ID variable and set the client's base URL to use your endpoint.
+    # ENDPOINT_ID = "YOUR_ENDPOINT_ID"
+
+    # OpenAI Client
+    client = openai.OpenAI(
+        base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/{ENDPOINT_ID}",
+        api_key=credentials.token,
+    )
+
+    completion = client.beta.chat.completions.parse(
+        model="google/gemini-2.0-flash-001",
+        messages=[
+            ChatCompletionSystemMessageParam(
+                role="system", content="Extract the event information."
+            ),
+            ChatCompletionUserMessageParam(
+                role="user",
+                content="Alice and Bob are going to a science fair on Friday.",
+            ),
+        ],
+        response_format=CalendarEvent,
+    )
+
+    response = completion.choices[0].message.parsed
+    print(response)
+
+    # System message: Extract the event information.
+    # User message: Alice and Bob are going to a science fair on Friday.
+    # Output message: name='science fair' date='Friday' participants=['Alice', 'Bob']
+    # [STOP googlegenaisdk_live_structured_ouput_with_txt]
+    return response
+
+
+if __name__ == "__main__":
+    generate_content()