Skip to content

Commit 9c54cf9

Browse files
authored
chore(go): add text-to-speech samples (#3253)
1 parent 280cbd5 commit 9c54cf9

File tree

2 files changed

+95
-0
lines changed

2 files changed

+95
-0
lines changed

go/samples/text-to-speech/genkit.wav

177 KB
Binary file not shown.

go/samples/text-to-speech/main.go

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package main
16+
17+
import (
18+
"context"
19+
"encoding/base64"
20+
"io"
21+
"log"
22+
"os"
23+
24+
"github.com/firebase/genkit/go/ai"
25+
"github.com/firebase/genkit/go/genkit"
26+
"github.com/firebase/genkit/go/plugins/googlegenai"
27+
"google.golang.org/genai"
28+
)
29+
30+
func main() {
31+
ctx := context.Background()
32+
33+
// Initialize Genkit with the Google AI plugin. When you pass nil for the
34+
// Config parameter, the Google AI plugin will get the API key from the
35+
// GEMINI_API_KEY or GOOGLE_API_KEY environment variable, which is the recommended
36+
// practice.
37+
g, err := genkit.Init(ctx,
38+
genkit.WithPlugins(&googlegenai.GoogleAI{}),
39+
genkit.WithDefaultModel("googleai/gemini-2.5-flash-preview-tts"),
40+
)
41+
if err != nil {
42+
log.Fatal(err)
43+
}
44+
45+
// Define a simple flow that generates an audio from a given text
46+
genkit.DefineFlow(g, "text-to-speech-flow", func(ctx context.Context, input any) (string, error) {
47+
resp, err := genkit.Generate(ctx, g,
48+
ai.WithConfig(&genai.GenerateContentConfig{
49+
Temperature: genai.Ptr[float32](1.0),
50+
ResponseModalities: []string{"AUDIO"},
51+
SpeechConfig: &genai.SpeechConfig{
52+
VoiceConfig: &genai.VoiceConfig{
53+
PrebuiltVoiceConfig: &genai.PrebuiltVoiceConfig{
54+
VoiceName: "Algenib",
55+
},
56+
},
57+
},
58+
}),
59+
ai.WithPrompt("Say: Genkit is the best Gen AI library!"))
60+
if err != nil {
61+
return "", err
62+
}
63+
64+
// base64 encoded audio
65+
text := resp.Text()
66+
return text, nil
67+
})
68+
69+
// Define a simple flow that generates audio transcripts from a given audio
70+
genkit.DefineFlow(g, "speech-to-text-flow", func(ctx context.Context, input any) (string, error) {
71+
audio, err := os.Open("./genkit.wav")
72+
if err != nil {
73+
return "", err
74+
}
75+
defer audio.Close()
76+
77+
audioBytes, err := io.ReadAll(audio)
78+
if err != nil {
79+
return "", err
80+
}
81+
resp, err := genkit.Generate(ctx, g,
82+
ai.WithModelName("googleai/gemini-2.5-flash"),
83+
ai.WithMessages(ai.NewUserMessage(
84+
ai.NewTextPart("Can you transcribe the next audio?"),
85+
ai.NewMediaPart("audio/wav", "data:audio/wav;base64,"+base64.StdEncoding.EncodeToString(audioBytes)))),
86+
)
87+
if err != nil {
88+
return "", err
89+
}
90+
91+
return resp.Text(), nil
92+
})
93+
94+
<-ctx.Done()
95+
}

0 commit comments

Comments
 (0)