Skip to content

Commit f19d350

Browse files
GuinersGuinersgericdong
authored
feat(genai): live samples part1 (#4190)
* adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * fixing functions names * linter changes and gemini code review fixes * changing response.candidates loop * renaming sc to ServerContent * adding mock to live-conversation-audio-with-audio * adding mock to live-conversation-audio-with-audio * fixing prompt message * Example output * hardcoded values fix * hardcoded values fix --------- Co-authored-by: Guiners <[email protected]> Co-authored-by: Eric Dong <[email protected]>
1 parent f25b80c commit f19d350

14 files changed

+806
-7
lines changed
95.4 KB
Binary file not shown.
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// [START googlegenaisdk_live_code_exec_with_txt]
16+
17+
'use strict';
18+
19+
const {GoogleGenAI, Modality} = require('@google/genai');
20+
21+
const GOOGLE_CLOUD_PROJECT = process.env.GOOGLE_CLOUD_PROJECT;
22+
const GOOGLE_CLOUD_LOCATION = process.env.GOOGLE_CLOUD_LOCATION || 'global';
23+
24+
async function generateLiveCodeExec(
25+
projectId = GOOGLE_CLOUD_PROJECT,
26+
location = GOOGLE_CLOUD_LOCATION
27+
) {
28+
const client = new GoogleGenAI({
29+
vertexai: true,
30+
project: projectId,
31+
location: location,
32+
});
33+
34+
const modelId = 'gemini-2.0-flash-live-preview-04-09';
35+
const config = {
36+
responseModalities: [Modality.TEXT],
37+
tools: [
38+
{
39+
codeExecution: {},
40+
},
41+
],
42+
};
43+
44+
const responseQueue = [];
45+
46+
async function waitMessage() {
47+
while (responseQueue.length === 0) {
48+
await new Promise(resolve => setTimeout(resolve, 100));
49+
}
50+
return responseQueue.shift();
51+
}
52+
53+
async function handleTurn() {
54+
const turns = [];
55+
let done = false;
56+
while (!done) {
57+
const message = await waitMessage();
58+
turns.push(message);
59+
if (message.serverContent && message.serverContent.turnComplete) {
60+
done = true;
61+
}
62+
}
63+
return turns;
64+
}
65+
66+
const session = await client.live.connect({
67+
model: modelId,
68+
config: config,
69+
callbacks: {
70+
onmessage: msg => responseQueue.push(msg),
71+
onerror: e => console.error('Error:', e.message),
72+
},
73+
});
74+
75+
const textInput = 'Compute the largest prime palindrome under 10';
76+
console.log('> ', textInput, '\n');
77+
78+
await session.sendClientContent({
79+
turns: [{role: 'user', parts: [{text: textInput}]}],
80+
});
81+
82+
const turns = await handleTurn();
83+
for (const turn of turns) {
84+
if (turn.text) {
85+
console.log('Received text:', turn.text);
86+
}
87+
}
88+
89+
// Example output:
90+
// > Compute the largest prime palindrome under 10
91+
// The largest prime palindrome under 10 is 7.
92+
93+
session.close();
94+
return turns;
95+
}
96+
97+
// [END googlegenaisdk_live_code_exec_with_txt]
98+
99+
module.exports = {
100+
generateLiveCodeExec,
101+
};
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// [START googlegenaisdk_live_conversation_audio_with_audio]
16+
17+
'use strict';
18+
19+
const fs = require('fs');
20+
const path = require('path');
21+
const {GoogleGenAI, Modality} = require('@google/genai');
22+
23+
const MODEL = 'gemini-2.0-flash-live-preview-04-09';
24+
const INPUT_RATE = 16000;
25+
const OUTPUT_RATE = 24000;
26+
const SAMPLE_WIDTH = 2; // 16-bit
27+
28+
const GOOGLE_CLOUD_PROJECT = process.env.GOOGLE_CLOUD_PROJECT;
29+
const GOOGLE_CLOUD_LOCATION = process.env.GOOGLE_CLOUD_LOCATION || 'global';
30+
31+
function readWavefile(filepath) {
32+
const buffer = fs.readFileSync(filepath);
33+
const audioBytes = buffer.subarray(44);
34+
const base64Data = audioBytes.toString('base64');
35+
const mimeType = `audio/pcm;rate=${INPUT_RATE}`;
36+
return {base64Data, mimeType};
37+
}
38+
39+
// Utility: write bytes -> .wav file
40+
function writeWavefile(filepath, audioFrames, rate = OUTPUT_RATE) {
41+
const rawAudioBytes = Buffer.concat(audioFrames);
42+
const header = Buffer.alloc(44);
43+
header.write('RIFF', 0);
44+
header.writeUInt32LE(36 + rawAudioBytes.length, 4);
45+
header.write('WAVE', 8);
46+
header.write('fmt ', 12);
47+
header.writeUInt32LE(16, 16);
48+
header.writeUInt16LE(1, 20);
49+
header.writeUInt16LE(1, 22);
50+
header.writeUInt32LE(rate, 24);
51+
header.writeUInt32LE(rate * SAMPLE_WIDTH, 28);
52+
header.writeUInt16LE(SAMPLE_WIDTH, 32);
53+
header.writeUInt16LE(16, 34);
54+
header.write('data', 36);
55+
header.writeUInt32LE(rawAudioBytes.length, 40);
56+
57+
fs.writeFileSync(filepath, Buffer.concat([header, rawAudioBytes]));
58+
console.log(`Model response saved to ${filepath}`);
59+
}
60+
61+
async function generateLiveConversation(
62+
projectId = GOOGLE_CLOUD_PROJECT,
63+
location = GOOGLE_CLOUD_LOCATION
64+
) {
65+
console.log('Starting audio conversation sample...');
66+
console.log(`Project: ${projectId}, Location: ${location}`);
67+
68+
const client = new GoogleGenAI({
69+
vertexai: true,
70+
project: projectId,
71+
location: location,
72+
});
73+
74+
const responseQueue = [];
75+
76+
async function waitMessage(timeoutMs = 60 * 1000) {
77+
const startTime = Date.now();
78+
79+
while (responseQueue.length === 0) {
80+
if (Date.now() - startTime > timeoutMs) {
81+
console.warn('No messages received within timeout. Exiting...');
82+
return null; // timeout occurred
83+
}
84+
await new Promise(resolve => setTimeout(resolve, 100));
85+
}
86+
87+
return responseQueue.shift();
88+
}
89+
90+
async function handleTurn() {
91+
const audioFrames = [];
92+
let done = false;
93+
94+
while (!done) {
95+
const message = await waitMessage();
96+
const serverContent = message.serverContent;
97+
98+
if (serverContent && serverContent.inputTranscription) {
99+
console.log('Input transcription', serverContent.inputTranscription);
100+
}
101+
if (serverContent && serverContent.outputTranscription) {
102+
console.log('Output transcription', serverContent.outputTranscription);
103+
}
104+
if (
105+
serverContent &&
106+
serverContent.modelTurn &&
107+
serverContent.modelTurn.parts
108+
) {
109+
for (const part of serverContent.modelTurn.parts) {
110+
if (part && part.inlineData && part.inlineData.data) {
111+
const audioData = Buffer.from(part.inlineData.data, 'base64');
112+
audioFrames.push(audioData);
113+
}
114+
}
115+
}
116+
if (serverContent && serverContent.turnComplete) {
117+
done = true;
118+
}
119+
}
120+
121+
return audioFrames;
122+
}
123+
124+
const session = await client.live.connect({
125+
model: MODEL,
126+
config: {
127+
responseModalities: [Modality.AUDIO],
128+
inputAudioTranscription: {},
129+
outputAudioTranscription: {},
130+
},
131+
callbacks: {
132+
onmessage: msg => responseQueue.push(msg),
133+
onerror: e => console.error(e.message),
134+
onclose: () => console.log('Closed'),
135+
},
136+
});
137+
138+
const wavFilePath = path.join(__dirname, 'hello_gemini_are_you_there.wav');
139+
console.log('Reading file:', wavFilePath);
140+
141+
const {base64Data, mimeType} = readWavefile(wavFilePath);
142+
const audioBytes = Buffer.from(base64Data, 'base64');
143+
144+
await session.sendRealtimeInput({
145+
media: {
146+
data: audioBytes.toString('base64'),
147+
mimeType: mimeType,
148+
},
149+
});
150+
151+
console.log('Audio sent, waiting for response...');
152+
153+
const audioFrames = await handleTurn();
154+
if (audioFrames.length > 0) {
155+
writeWavefile(
156+
path.join(__dirname, 'example_model_response.wav'),
157+
audioFrames,
158+
OUTPUT_RATE
159+
);
160+
}
161+
162+
await session.close();
163+
return audioFrames;
164+
}
165+
166+
// [END googlegenaisdk_live_conversation_audio_with_audio]
167+
168+
module.exports = {
169+
generateLiveConversation,
170+
};
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// [START googlegenaisdk_live_func_call_with_txt]
16+
17+
'use strict';
18+
19+
const {GoogleGenAI, Modality} = require('@google/genai');
20+
21+
const GOOGLE_CLOUD_PROJECT = process.env.GOOGLE_CLOUD_PROJECT;
22+
const GOOGLE_CLOUD_LOCATION = process.env.GOOGLE_CLOUD_LOCATION || 'global';
23+
24+
async function generateLiveFunctionCall(
25+
projectId = GOOGLE_CLOUD_PROJECT,
26+
location = GOOGLE_CLOUD_LOCATION
27+
) {
28+
const client = new GoogleGenAI({
29+
vertexai: true,
30+
project: projectId,
31+
location: location,
32+
});
33+
34+
const modelId = 'gemini-2.0-flash-live-preview-04-09';
35+
36+
const config = {
37+
responseModalities: [Modality.TEXT],
38+
tools: [
39+
{
40+
functionDeclarations: [
41+
{name: 'turn_on_the_lights'},
42+
{name: 'turn_off_the_lights'},
43+
],
44+
},
45+
],
46+
};
47+
48+
const responseQueue = [];
49+
50+
async function waitMessage() {
51+
while (responseQueue.length === 0) {
52+
await new Promise(resolve => setTimeout(resolve, 100));
53+
}
54+
return responseQueue.shift();
55+
}
56+
57+
async function handleTurn() {
58+
const turns = [];
59+
let done = false;
60+
while (!done) {
61+
const message = await waitMessage();
62+
turns.push(message);
63+
64+
if (message.toolCall) {
65+
for (const fc of message.toolCall.functionCalls) {
66+
console.log(`Model requested function call: ${fc.name}`);
67+
68+
await session.sendToolResponse({
69+
functionResponses: [
70+
{
71+
id: fc.id,
72+
name: fc.name,
73+
response: {result: 'ok'},
74+
},
75+
],
76+
});
77+
console.log(`Sent tool response for ${fc.name}:`, {result: 'ok'});
78+
}
79+
}
80+
81+
if (message.serverContent && message.serverContent.turnComplete) {
82+
done = true;
83+
}
84+
}
85+
return turns;
86+
}
87+
88+
const session = await client.live.connect({
89+
model: modelId,
90+
config: config,
91+
callbacks: {
92+
onmessage: msg => responseQueue.push(msg),
93+
onerror: e => console.error('Error:', e.message),
94+
},
95+
});
96+
97+
const textInput = 'Turn on the lights please';
98+
console.log('> ', textInput, '\n');
99+
100+
await session.sendClientContent({
101+
turns: [{role: 'user', parts: [{text: textInput}]}],
102+
});
103+
104+
const turns = await handleTurn();
105+
106+
for (const turn of turns) {
107+
if (turn.text) {
108+
console.log('Received text:', turn.text);
109+
}
110+
}
111+
112+
// Example output:
113+
//>> Turn on the lights please
114+
// Model requested function call: turn_on_the_lights
115+
// Sent tool response for turn_on_the_lights: { result: 'ok' }
116+
117+
session.close();
118+
return turns;
119+
}
120+
121+
// [END googlegenaisdk_live_func_call_with_txt]
122+
123+
module.exports = {
124+
generateLiveFunctionCall,
125+
};

0 commit comments

Comments
 (0)