Skip to content

Commit 5fe1ec4

Browse files
GuinersGuinersgericdong
authored
feat(genai): live samples part2 (#4189)
* adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * adding samples, test, lints * fixing functions names * linter changes and gemini code review fixes * renamed sc to serverContent * fixing package.json and adding delay to test * adding mock to live-txt-with-audio test * adding mock to live-ground-ragengine-with-txt --------- Co-authored-by: Guiners <[email protected]> Co-authored-by: Eric Dong <[email protected]>
1 parent 081678e commit 5fe1ec4

11 files changed

+809
-2
lines changed

genai/live/live-audio-with-txt.js

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// [START googlegenaisdk_live_audio_with_txt]
16+
17+
'use strict';
18+
19+
const {GoogleGenAI, Modality} = require('@google/genai');
20+
const fs = require('fs');
21+
22+
const GOOGLE_CLOUD_PROJECT = process.env.GOOGLE_CLOUD_PROJECT;
23+
const GOOGLE_CLOUD_LOCATION = process.env.GOOGLE_CLOUD_LOCATION || 'global';
24+
25+
async function generateLiveConversation(
26+
projectId = GOOGLE_CLOUD_PROJECT,
27+
location = GOOGLE_CLOUD_LOCATION
28+
) {
29+
const client = new GoogleGenAI({
30+
vertexai: true,
31+
project: projectId,
32+
location: location,
33+
});
34+
35+
const voiceName = 'Aoede';
36+
const modelId = 'gemini-2.0-flash-live-preview-04-09';
37+
const config = {
38+
responseModalities: [Modality.AUDIO],
39+
speechConfig: {
40+
voiceConfig: {
41+
prebuiltVoiceConfig: {
42+
voiceName: voiceName,
43+
},
44+
},
45+
},
46+
};
47+
48+
const responseQueue = [];
49+
50+
async function waitMessage() {
51+
while (responseQueue.length === 0) {
52+
await new Promise(resolve => setTimeout(resolve, 100));
53+
}
54+
return responseQueue.shift();
55+
}
56+
57+
async function handleTurn() {
58+
const audioChunks = [];
59+
let done = false;
60+
61+
while (!done) {
62+
const message = await waitMessage();
63+
64+
const serverContent = message.serverContent;
65+
if (
66+
serverContent &&
67+
serverContent.modelTurn &&
68+
serverContent.modelTurn.parts
69+
) {
70+
for (const part of serverContent.modelTurn.parts) {
71+
if (part && part.inlineData && part.inlineData.data) {
72+
audioChunks.push(Buffer.from(part.inlineData.data));
73+
}
74+
}
75+
}
76+
77+
if (serverContent && serverContent.turnComplete) {
78+
done = true;
79+
}
80+
}
81+
82+
return audioChunks;
83+
}
84+
85+
const session = await client.live.connect({
86+
model: modelId,
87+
config: config,
88+
callbacks: {
89+
onmessage: msg => responseQueue.push(msg),
90+
onerror: e => console.error('Error:', e.message),
91+
},
92+
});
93+
94+
const textInput = 'Hello? Gemini are you there?';
95+
console.log('> ', textInput, '\n');
96+
97+
await session.sendClientContent({
98+
turns: [{role: 'user', parts: [{text: textInput}]}],
99+
});
100+
101+
const audioChunks = await handleTurn();
102+
103+
session.close();
104+
105+
if (audioChunks.length > 0) {
106+
const audioBuffer = Buffer.concat(audioChunks);
107+
fs.writeFileSync('response.raw', audioBuffer);
108+
console.log('Received audio answer (saved to response.raw)');
109+
}
110+
111+
// Example output:
112+
//> Hello? Gemini, are you there?
113+
// Received audio answer (saved to response.raw)
114+
115+
return audioChunks;
116+
}
117+
118+
// [END googlegenaisdk_live_audio_with_txt]
119+
120+
module.exports = {
121+
generateLiveConversation,
122+
};
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// [START googlegenaisdk_live_ground_ragengine_with_txt]
16+
17+
'use strict';
18+
19+
const {GoogleGenAI, Modality} = require('@google/genai');
20+
21+
const GOOGLE_CLOUD_PROJECT = process.env.GOOGLE_CLOUD_PROJECT;
22+
const GOOGLE_CLOUD_LOCATION = process.env.GOOGLE_CLOUD_LOCATION || 'global';
23+
24+
// (DEVELOPER) put here your memory corpus
25+
const MEMORY_CORPUS =
26+
'projects/cloud-ai-devrel-softserve/locations/us-central1/ragCorpora/2305843009213693952';
27+
28+
async function generateLiveRagTextResponse(
29+
memoryCorpus = MEMORY_CORPUS,
30+
projectId = GOOGLE_CLOUD_PROJECT,
31+
location = GOOGLE_CLOUD_LOCATION
32+
) {
33+
const client = new GoogleGenAI({
34+
vertexai: true,
35+
project: projectId,
36+
location: location,
37+
});
38+
39+
const modelId = 'gemini-2.0-flash-live-preview-04-09';
40+
41+
// RAG store config
42+
const ragStore = {
43+
ragResources: [
44+
{
45+
ragCorpus: memoryCorpus, // Use memory corpus if you want to store context
46+
},
47+
],
48+
storeContext: true, // sink context into your memory corpus
49+
};
50+
51+
const config = {
52+
responseModalities: [Modality.TEXT],
53+
tools: [
54+
{
55+
retrieval: {
56+
vertexRagStore: ragStore,
57+
},
58+
},
59+
],
60+
};
61+
62+
const responseQueue = [];
63+
64+
async function waitMessage() {
65+
while (responseQueue.length === 0) {
66+
await new Promise(resolve => setTimeout(resolve, 100));
67+
}
68+
return responseQueue.shift();
69+
}
70+
71+
async function handleTurn() {
72+
const turns = [];
73+
let done = false;
74+
while (!done) {
75+
const message = await waitMessage();
76+
turns.push(message);
77+
if (message.serverContent && message.serverContent.turnComplete) {
78+
done = true;
79+
}
80+
}
81+
return turns;
82+
}
83+
84+
const session = await client.live.connect({
85+
model: modelId,
86+
config: config,
87+
callbacks: {
88+
onmessage: msg => responseQueue.push(msg),
89+
onerror: e => console.error('Error:', e.message),
90+
},
91+
});
92+
93+
const textInput = 'What are newest gemini models?';
94+
console.log('> ', textInput, '\n');
95+
96+
await session.sendClientContent({
97+
turns: [{role: 'user', parts: [{text: textInput}]}],
98+
});
99+
100+
const turns = await handleTurn();
101+
const response = [];
102+
103+
for (const turn of turns) {
104+
if (turn.text) {
105+
response.push(turn.text);
106+
}
107+
}
108+
109+
console.log(response.join(''));
110+
111+
// Example output:
112+
// > What are newest gemini models?
113+
// In December 2023, Google launched Gemini, their "most capable and general model". It's multimodal, meaning it understands and combines different types of information like text, code, audio, images, and video.
114+
115+
session.close();
116+
117+
return response;
118+
}
119+
120+
// [END googlegenaisdk_live_ground_ragengine_with_txt]
121+
122+
module.exports = {
123+
generateLiveRagTextResponse,
124+
};
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// [START googlegenaisdk_live_structured_output_with_txt]
16+
17+
'use strict';
18+
const {OpenAI} = require('openai');
19+
const {GoogleAuth} = require('google-auth-library');
20+
21+
const GOOGLE_CLOUD_PROJECT = process.env.GOOGLE_CLOUD_PROJECT;
22+
const GOOGLE_CLOUD_LOCATION =
23+
process.env.GOOGLE_CLOUD_LOCATION || 'us-central1';
24+
25+
const CalendarEventSchema = {
26+
type: 'object',
27+
properties: {
28+
name: {type: 'string'},
29+
date: {type: 'string'},
30+
participants: {
31+
type: 'array',
32+
items: {type: 'string'},
33+
},
34+
},
35+
required: ['name', 'date', 'participants'],
36+
};
37+
38+
async function generateStructuredTextResponse(
39+
projectId = GOOGLE_CLOUD_PROJECT,
40+
location = GOOGLE_CLOUD_LOCATION
41+
) {
42+
const auth = new GoogleAuth({
43+
scopes: ['https://www.googleapis.com/auth/cloud-platform'],
44+
});
45+
const client = await auth.getClient();
46+
const tokenResponse = await client.getAccessToken();
47+
48+
const token = tokenResponse.token;
49+
50+
const ENDPOINT_ID = 'openapi';
51+
const baseURL = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/endpoints/${ENDPOINT_ID}`;
52+
53+
const openAI = new OpenAI({
54+
apiKey: token,
55+
baseURL: baseURL,
56+
});
57+
58+
const completion = await openAI.chat.completions.create({
59+
model: 'google/gemini-2.0-flash-001',
60+
messages: [
61+
{role: 'system', content: 'Extract the event information.'},
62+
{
63+
role: 'user',
64+
content: 'Alice and Bob are going to a science fair on Friday.',
65+
},
66+
],
67+
response_format: {
68+
type: 'json_schema',
69+
json_schema: {
70+
name: 'CalendarEvent',
71+
schema: CalendarEventSchema,
72+
},
73+
},
74+
});
75+
76+
const response = completion.choices[0].message.content;
77+
console.log(response);
78+
79+
// Example expected output:
80+
// {
81+
// name: 'science fair',
82+
// date: 'Friday',
83+
// participants: ['Alice', 'Bob']
84+
// }
85+
86+
return response;
87+
}
88+
89+
// [END googlegenaisdk_live_structured_output_with_txt]
90+
91+
module.exports = {
92+
generateStructuredTextResponse,
93+
};

0 commit comments

Comments
 (0)