diff --git a/.gitignore b/.gitignore index 9bc0fe46..838740a5 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,6 @@ public/docs-bundle-experimental.json # testing folder for code samples and experiments testing/ + +# scratch folder for temporary files and planning +scratch/ diff --git a/astro.config.mjs b/astro.config.mjs index 8b5ab49a..da26ab45 100644 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -1,14 +1,14 @@ // @ts-check -import { defineConfig } from "astro/config"; -import starlight from "@astrojs/starlight"; -import starlightLinksValidatorPlugin from "starlight-links-validator"; -import starlightLlmsTxt from "starlight-llms-txt"; -import sitemap from "@astrojs/sitemap"; -import { sidebar } from "./src/sidebar"; -import { GOOGLE_DARK_THEME, GOOGLE_LIGHT_THEME } from "./src/google-theme"; +import { defineConfig } from 'astro/config'; +import starlight from '@astrojs/starlight'; +import starlightLinksValidatorPlugin from 'starlight-links-validator'; +import starlightLlmsTxt from 'starlight-llms-txt'; +import sitemap from '@astrojs/sitemap'; +import { sidebar } from './src/sidebar'; +import { GOOGLE_DARK_THEME, GOOGLE_LIGHT_THEME } from './src/google-theme'; -const site = "https://genkit.dev"; -const ogUrl = new URL("ogimage.png?v=1", site).href; +const site = 'https://genkit.dev'; +const ogUrl = new URL('ogimage.png?v=1', site).href; // https://astro.build/config export default defineConfig({ @@ -16,167 +16,156 @@ export default defineConfig({ site, markdown: { shikiConfig: { - langAlias: { dotprompt: "handlebars" }, + langAlias: { dotprompt: 'handlebars' }, }, }, integrations: [ starlight({ - favicon: "favicon.ico", + favicon: 'favicon.ico', expressiveCode: { themes: [GOOGLE_DARK_THEME, GOOGLE_LIGHT_THEME], }, pagination: false, - title: "Genkit", + title: 'Genkit', components: { - Sidebar: "./src/components/sidebar.astro", - Header: "./src/content/custom/header.astro", - Hero: "./src/content/custom/hero.astro", + Sidebar: './src/components/sidebar.astro', + Header: './src/content/custom/header.astro', + Hero: './src/content/custom/hero.astro', Head: './src/content/custom/head.astro', }, head: [ { - tag: "meta", + tag: 'meta', attrs: { - property: "og:image", + property: 'og:image', content: ogUrl, - width: "1085", - height: "377", + width: '1085', + height: '377', }, }, { - tag: "link", + tag: 'link', attrs: { - href: "https://fonts.gstatic.com", - rel: "preconnect", + href: 'https://fonts.gstatic.com', + rel: 'preconnect', crossorigin: true, }, }, { - tag: "link", + tag: 'link', attrs: { - href: "https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500&display=swap", - rel: "stylesheet", + href: 'https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500&display=swap', + rel: 'stylesheet', }, }, { - tag: "link", + tag: 'link', attrs: { - href: "https://fonts.googleapis.com/css2?family=Google+Sans+Text:wght@400;500&display=swap", - rel: "stylesheet", + href: 'https://fonts.googleapis.com/css2?family=Google+Sans+Text:wght@400;500&display=swap', + rel: 'stylesheet', }, }, { - tag: "link", + tag: 'link', attrs: { - href: "https://fonts.googleapis.com/css2?family=Google+Sans+Mono:wght@400;500&display=swap", - rel: "stylesheet", + href: 'https://fonts.googleapis.com/css2?family=Google+Sans+Mono:wght@400;500&display=swap', + rel: 'stylesheet', }, }, { - tag: "link", + tag: 'link', attrs: { - href: "https://fonts.googleapis.com/css2?family=Google+Symbols&display=block", - rel: "stylesheet", + href: 'https://fonts.googleapis.com/css2?family=Google+Symbols&display=block', + rel: 'stylesheet', }, }, ], plugins: [ starlightLinksValidatorPlugin(), starlightLlmsTxt({ - projectName: "Genkit", - description: "Open-source GenAI toolkit for JS, Go, and Python.", + projectName: 'Genkit', + description: 'Open-source GenAI toolkit for JS, Go, and Python.', minify: { whitespace: false }, customSets: [ { - label: "Building AI Workflows", - description: - "Guidance on how to generate content and interact with LLM and image models using Genkit.", + label: 'Building AI Workflows', + description: 'Guidance on how to generate content and interact with LLM and image models using Genkit.', paths: [ - "docs/models", - "docs/context", - "docs/flows", - "docs/dotprompt", - "docs/chat", - "docs/tool-calling", - "docs/interrupts", - "docs/rag", - "docs/multi-agent", - "docs/evaluation", - "docs/local-observability", - "docs/errors/types", + 'docs/models', + 'docs/context', + 'docs/flows', + 'docs/dotprompt', + 'docs/chat', + 'docs/tool-calling', + 'docs/interrupts', + 'docs/rag', + 'docs/multi-agent', + 'docs/evaluation', + 'docs/local-observability', + 'docs/errors/types', ], }, { - label: "Deploying AI Workflows", + label: 'Deploying AI Workflows', description: - "Guidance on how to deploy Genkit code to various environments including Firebase and Cloud Run or use within a Next.js app.", - paths: [ - "docs/firebase", - "docs/cloud-run", - "docs/deploy-node", - "docs/auth", - "docs/nextjs", - ], + 'Guidance on how to deploy Genkit code to various environments including Firebase and Cloud Run or use within a Next.js app.', + paths: ['docs/firebase', 'docs/cloud-run', 'docs/deploy-node', 'docs/auth', 'docs/nextjs'], }, { - label: "Observing AI Workflows", - description: - "Guidance about Genkit's various observability features and how to use them.", + label: 'Observing AI Workflows', + description: "Guidance about Genkit's various observability features and how to use them.", paths: [ - "docs/observability/getting-started", - "docs/observability/authentication", - "docs/observability/advanced-configuration", - "docs/observability/telemetry-collection", - "docs/observability/troubleshooting", + 'docs/observability/getting-started', + 'docs/observability/authentication', + 'docs/observability/advanced-configuration', + 'docs/observability/telemetry-collection', + 'docs/observability/troubleshooting', ], }, { - label: "Writing Plugins", - description: "Guidance about how to author plugins for Genkit.", - paths: [ - "docs/plugin-authoring", - "docs/plugin-authoring-evaluator", - ], + label: 'Writing Plugins', + description: 'Guidance about how to author plugins for Genkit.', + paths: ['docs/plugin-authoring', 'docs/plugin-authoring-evaluator'], }, { - label: "Plugin Documentation", + label: 'Plugin Documentation', description: - "Provider-specific documentation for the Google AI, Vertex AI, Firebase, Ollama, Chroma, and Pinecone plugins.", + 'Provider-specific documentation for the Google AI, Vertex AI, Firebase, Ollama, Chroma, and Pinecone plugins.', paths: [ - "docs/plugins/google-genai", - "docs/plugins/vertex-ai", - "docs/plugins/firebase", - "docs/plugins/ollama", - "docs/plugins/chroma", - "docs/plugins/pinecone", + 'docs/plugins/google-genai', + 'docs/plugins/vertex-ai', + 'docs/plugins/firebase', + 'docs/plugins/ollama', + 'docs/plugins/chroma', + 'docs/plugins/pinecone', ], }, ], }), ], logo: { - dark: "./src/assets/lockup_white_tight2.png", - light: "./src/assets/lockup_dark_tight.png", + dark: './src/assets/lockup_white_tight2.png', + light: './src/assets/lockup_dark_tight.png', replacesTitle: true, }, social: [ { - icon: "github", - label: "GitHub", - href: "https://github.com/firebase/genkit", + icon: 'github', + label: 'GitHub', + href: 'https://github.com/firebase/genkit', }, { - icon: "discord", - label: "Discord", - href: "https://discord.gg/qXt5zzQKpc", + icon: 'discord', + label: 'Discord', + href: 'https://discord.gg/qXt5zzQKpc', }, ], sidebar, - customCss: ["./src/tailwind.css"], + customCss: ['./src/tailwind.css'], }), sitemap(), ], redirects: { - "/discord": 'https://discord.gg/qXt5zzQKpc', + '/discord': 'https://discord.gg/qXt5zzQKpc', }, }); diff --git a/scripts/refactor-tabs.mjs b/scripts/refactor-tabs.mjs new file mode 100644 index 00000000..5b96576c --- /dev/null +++ b/scripts/refactor-tabs.mjs @@ -0,0 +1,57 @@ +import fs from 'fs/promises'; +import path from 'path'; + +const directoryPath = 'src/content/docs/unified-docs'; + +async function processFile(filePath) { + try { + let content = await fs.readFile(filePath, 'utf-8'); + let changed = false; + + if (content.includes('')) { + changed = true; + + content = content.replace(//g, ''); + content = content.replace(/<\/Tabs>/g, ''); + + content = content.replace(/]*>/g, ''); + content = content.replace(/]*>/g, ''); + content = content.replace(/]*>/g, ''); + content = content.replace(/<\/TabItem>/g, ''); + + const importRegex = /import { Tabs, TabItem } from '@astrojs\/starlight\/components';/g; + const newImport = + "import LangTabs from '../../../components/LangTabs.astro';\nimport LangTabItem from '../../../components/LangTabItem.astro';"; + + if (importRegex.test(content)) { + content = content.replace(importRegex, newImport); + } + } + + if (changed) { + await fs.writeFile(filePath, content, 'utf-8'); + console.log(`Updated: ${filePath}`); + } + } catch (error) { + console.error(`Error processing file ${filePath}:`, error); + } +} + +async function walk(dir) { + const files = await fs.readdir(dir, { withFileTypes: true }); + for (const file of files) { + const res = path.resolve(dir, file.name); + if (file.isDirectory()) { + await walk(res); + } else { + if (res.endsWith('.mdx')) { + await processFile(res); + } + } + } +} + +console.log('Starting tab replacement script...'); +walk(directoryPath).then(() => { + console.log('Tab replacement script finished.'); +}); diff --git a/src/components/LangTabItem.astro b/src/components/LangTabItem.astro new file mode 100644 index 00000000..cb4949a5 --- /dev/null +++ b/src/components/LangTabItem.astro @@ -0,0 +1,11 @@ +--- +// src/components/LangTabItem.astro +interface Props { + lang: 'js' | 'go' | 'python'; +} + +const { lang } = Astro.props; +--- +
+ +
diff --git a/src/components/LangTabs.astro b/src/components/LangTabs.astro new file mode 100644 index 00000000..4db708c8 --- /dev/null +++ b/src/components/LangTabs.astro @@ -0,0 +1,38 @@ +--- +// src/components/LangTabs.astro +--- +
+ +
+ + diff --git a/src/components/LanguagePreferenceSelector.astro b/src/components/LanguagePreferenceSelector.astro new file mode 100644 index 00000000..3bd29203 --- /dev/null +++ b/src/components/LanguagePreferenceSelector.astro @@ -0,0 +1,93 @@ +--- +import { Icon } from '@astrojs/starlight/components'; +const {variant} = Astro.props +const isSmall = variant === 'small'; +--- +
+ {!isSmall &&
Language:
} +
+ + +
+
+ + diff --git a/src/content/custom/head.astro b/src/content/custom/head.astro index 4013aa93..e00eb0ba 100644 --- a/src/content/custom/head.astro +++ b/src/content/custom/head.astro @@ -18,3 +18,5 @@ if (id.startsWith('docs/')) { --- {finalTitle} {head.filter(({ tag }) => tag !== 'title').map(({ tag: Tag, attrs, content }) => )} + + + ``` +
+ + ```go + // App Check verification middleware + func appCheckMiddleware() func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + appCheckToken := r.Header.Get("X-Firebase-AppCheck") + if appCheckToken == "" { + http.Error(w, "Missing App Check token", http.StatusUnauthorized) + return + } + + // Verify App Check token with Firebase Admin SDK + // Implementation depends on your specific setup + + next.ServeHTTP(w, r) + }) + } + } + + // Apply to protected routes + mux.Handle("POST /secureFlow", + appCheckMiddleware()( + firebaseAuthMiddleware(authClient)( + genkit.Handler(secureFlow)))) + ``` + + + ```python + from firebase_admin import app_check + + def verify_app_check_token(request): + """Verify Firebase App Check token""" + app_check_token = request.headers.get('X-Firebase-AppCheck') + if not app_check_token: + return False + + try: + # Verify the App Check token + app_check.verify_token(app_check_token) + return True + except Exception: + return False + + async def secure_context_provider(request): + """Context provider with App Check verification""" + # Verify App Check first + if not verify_app_check_token(request): + raise GenkitError(status='PERMISSION_DENIED', message='App Check verification failed') + + # Then verify Firebase Auth + token = verify_firebase_token(request) + if not token: + raise GenkitError(status='UNAUTHENTICATED', message='Authentication required') + + return { + 'auth': { + 'uid': token['uid'], + 'email': token.get('email'), + 'email_verified': token.get('email_verified', False) + }, + 'app_check_verified': True + } + ``` + + + +## Secrets Management + + + + **Set up secrets**: + ```bash + # Store API key in Firebase Functions secrets + firebase functions:secrets:set GEMINI_API_KEY + ``` + + **Use in functions**: + ```ts + import { defineSecret } from 'firebase-functions/params'; + + const geminiApiKey = defineSecret('GEMINI_API_KEY'); + const openaiApiKey = defineSecret('OPENAI_API_KEY'); + + export const multiModelFlow = onCallGenkit( + { + secrets: [geminiApiKey, openaiApiKey], + authPolicy: signedIn(), + }, + myFlow + ); + ``` + + + **Use Google Secret Manager**: + ```go + import ( + secretmanager "cloud.google.com/go/secretmanager/apiv1" + "cloud.google.com/go/secretmanager/apiv1/secretmanagerpb" + ) + + func getSecret(ctx context.Context, projectID, secretID string) (string, error) { + client, err := secretmanager.NewClient(ctx) + if err != nil { + return "", err + } + defer client.Close() + + req := &secretmanagerpb.AccessSecretVersionRequest{ + Name: fmt.Sprintf("projects/%s/secrets/%s/versions/latest", projectID, secretID), + } + + result, err := client.AccessSecretVersion(ctx, req) + if err != nil { + return "", err + } + + return string(result.Payload.Data), nil + } + + // Use in your application + func main() { + ctx := context.Background() + + apiKey, err := getSecret(ctx, "your-project-id", "gemini-api-key") + if err != nil { + log.Fatal(err) + } + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{APIKey: apiKey}), + ) + } + ``` + + + **Use Google Secret Manager**: + ```python + from google.cloud import secretmanager + + def get_secret(project_id: str, secret_id: str) -> str: + """Retrieve secret from Google Secret Manager""" + client = secretmanager.SecretManagerServiceClient() + name = f"projects/{project_id}/secrets/{secret_id}/versions/latest" + + response = client.access_secret_version(request={"name": name}) + return response.payload.data.decode("UTF-8") + + # Use in your application + GEMINI_API_KEY = get_secret("your-project-id", "gemini-api-key") + + ai = Genkit( + plugins=[GoogleGenAI(api_key=GEMINI_API_KEY)], + model=google_genai_name('gemini-2.5-flash'), + ) + ``` + + + +## Deployment + + + + **Deploy to Firebase Functions**: + ```bash + # Deploy all functions + firebase deploy --only functions + + # Deploy specific function + firebase deploy --only functions:generatePoem + + # Deploy with environment + firebase use production + firebase deploy --only functions + ``` + + **Test deployed function**: + ```bash + # Get function URL + firebase functions:config:get + + # Test with curl + curl -X POST https://your-region-your-project.cloudfunctions.net/generatePoem \ + -H "Authorization: Bearer $(firebase auth:print-access-token)" \ + -H "Content-Type: application/json" \ + -d '{"data": {"subject": "mountains"}}' + ``` + + + **Deploy to Cloud Run with Firebase integration**: + ```bash + # Build and deploy + gcloud run deploy genkit-firebase-app \ + --source . \ + --platform managed \ + --region us-central1 \ + --allow-unauthenticated \ + --set-env-vars GOOGLE_CLOUD_PROJECT=your-project-id + + # Test deployed service + curl -X POST https://genkit-firebase-app-hash-uc.a.run.app/generatePoem \ + -H "Authorization: Bearer $(gcloud auth print-identity-token)" \ + -H "Content-Type: application/json" \ + -d '{"subject": "mountains"}' + ``` + + + **Deploy to Cloud Run with Firebase integration**: + ```bash + # Create requirements.txt + echo "genkit + genkit-plugin-google-genai + genkit-plugin-flask + firebase-admin + flask" > requirements.txt + + # Deploy + gcloud run deploy genkit-firebase-app \ + --source . \ + --platform managed \ + --region us-central1 \ + --allow-unauthenticated \ + --set-env-vars GOOGLE_CLOUD_PROJECT=your-project-id + + # Test deployed service + curl -X POST https://genkit-firebase-app-hash-uc.a.run.app/generatePoem \ + -H "Authorization: Bearer $(gcloud auth print-identity-token)" \ + -H "Content-Type: application/json" \ + -d '{"data": "mountains"}' + ``` + + + +## Client Integration + +### Web Client + + + + ```html + + + + Genkit Firebase App + + + + + + + + + ``` + + + ```javascript + // Client-side JavaScript for Go backend + import { initializeApp } from 'firebase/app'; + import { getAuth, signInWithPopup, GoogleAuthProvider } from 'firebase/auth'; + + const app = initializeApp(firebaseConfig); + const auth = getAuth(app); + + async function callGenkitFlow(subject) { + const user = auth.currentUser; + if (!user) { + throw new Error('User not authenticated'); + } + + const idToken = await user.getIdToken(); + + const response = await fetch('https://your-cloud-run-url/generatePoem', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${idToken}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ subject }), + }); + + if (!response.ok) { + throw new Error('Request failed'); + } + + return response.json(); + } + ``` + + + ```javascript + // Client-side JavaScript for Python backend + import { initializeApp } from 'firebase/app'; + import { getAuth, signInWithPopup, GoogleAuthProvider } from 'firebase/auth'; + + const app = initializeApp(firebaseConfig); + const auth = getAuth(app); + + async function callGenkitFlow(subject) { + const user = auth.currentUser; + if (!user) { + throw new Error('User not authenticated'); + } + + const idToken = await user.getIdToken(); + + const response = await fetch('https://your-cloud-run-url/generatePoem', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${idToken}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ data: subject }), + }); + + if (!response.ok) { + throw new Error('Request failed'); + } + + return response.json(); + } + ``` + + + +## Local Development + + + + **Firebase Emulator Suite**: + ```bash + # Start emulators with Genkit + genkit start -- firebase emulators:start --inspect-functions + + # Or start separately + firebase emulators:start + genkit start -- npm run serve + ``` + + **Environment setup**: + ```bash + # Set up local environment + export GEMINI_API_KEY=your-api-key + export GOOGLE_CLOUD_PROJECT=your-project-id + ``` + + + **Local development**: + ```bash + # Set up environment + export GOOGLE_CLOUD_PROJECT=your-project-id + export GEMINI_API_KEY=your-api-key + gcloud auth application-default login + + # Start with Genkit dev UI + genkit start -- go run . + ``` + + + **Local development**: + ```bash + # Set up environment + export GOOGLE_CLOUD_PROJECT=your-project-id + export GEMINI_API_KEY=your-api-key + export GOOGLE_APPLICATION_CREDENTIALS=path/to/service-account.json + + # Start with Genkit dev UI + genkit start -- python main.py + ``` + + + +## Best Practices + +### Security +1. **Always use authentication** for production deployments +2. **Enable App Check** for client integrity verification +3. **Use Firebase Security Rules** for Firestore access control +4. **Store secrets securely** using Firebase Functions secrets or Secret Manager +5. **Validate input data** in your flows + +### Performance +1. **Optimize cold starts** by minimizing dependencies +2. **Use connection pooling** for database connections +3. **Implement caching** for frequently accessed data +4. **Monitor function performance** using Firebase Performance Monitoring + +### Cost Management +1. **Set up billing alerts** to monitor costs +2. **Use appropriate timeout values** for functions +3. **Implement rate limiting** to prevent abuse +4. **Monitor function invocations** and optimize accordingly + +## Troubleshooting + +### Common Issues + +1. **Authentication errors**: + - Verify Firebase project configuration + - Check that users have verified emails + - Ensure proper token handling + +2. **App Check failures**: + - Verify reCAPTCHA configuration + - Check App Check token generation + - Ensure proper client-side setup + +3. **Secret access issues**: + - Verify secret names and permissions + - Check service account roles + - Ensure secrets are properly configured + +## Next Steps + +- Learn about [Cloud Run deployment](/unified-docs/deployment/cloud-run) for more flexible hosting +- Explore [authorization patterns](/unified-docs/deployment/authorization) for advanced security +- Check out [monitoring and observability](/unified-docs/observability-monitoring) for production insights diff --git a/src/content/docs/unified-docs/developer-tools.mdx b/src/content/docs/unified-docs/developer-tools.mdx new file mode 100644 index 00000000..b4530d14 --- /dev/null +++ b/src/content/docs/unified-docs/developer-tools.mdx @@ -0,0 +1,310 @@ +--- +title: Developer Tools +description: Explore Genkit's developer tools, including the CLI for command-line operations and the local web-based Developer UI for interactive testing and development across JavaScript, Go, and Python. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +Genkit provides powerful developer tools that work consistently across JavaScript, Go, and Python: + +- A command-line interface (CLI) for project operations +- A local web-based Developer UI for interactive testing and development +- Monitoring and observability features + +## Command Line Interface (CLI) + +The Genkit CLI provides essential commands for working with your AI applications. The CLI is shared across all languages and works the same way regardless of your implementation language. + +### Installation + +Install the CLI globally using npm (works for all languages): + +```bash +npm install -g genkit-cli +``` + +### Starting the Developer UI + +The core pattern is the same across all languages - provide an entrypoint command to run your application: + + + + ```bash + # Start the developer UI with your application + genkit start -- + + # Common examples: + genkit start -- npm run dev + genkit start -- npx tsx --watch src/index.ts + genkit start -- node --watch src/index.js + + # Auto-open in browser + genkit start -o -- npm run dev + ``` + + + ```bash + # Start the developer UI with your Go application + genkit start -- go run . + genkit start -- go run main.go + + # Auto-open in browser + genkit start -o -- go run . + ``` + + + ```bash + # Start the developer UI with your Python application + genkit start -- python app.py + genkit start -- python main.py + genkit start -- python -m your_module + + # Auto-open in browser + genkit start -o -- python app.py + ``` + + + +After running the command, you'll see output like: + +```bash +Telemetry API running on http://localhost:4033 +Genkit Developer UI: http://localhost:4000 +``` + +Open `http://localhost:4000` in your browser to access the Developer UI. + +### Running Flows + +Execute flows directly from the command line (works the same across all languages): + +```bash +# Run a specified flow (your runtime must be running with GENKIT_ENV=dev) +genkit flow:run + +# Run with input data +genkit flow:run myFlow '{"input": "data"}' + +# Batch run flows +genkit flow:batchRun +``` + +### Evaluation Commands + + + + ```bash + # Evaluate a specific flow + genkit eval:flow + + # Evaluate with dataset + genkit eval:flow myFlow --input myDataset.json + + # Extract evaluation data + genkit eval:extractData --output results.json + + # Run evaluation on extracted data + genkit eval:run results.json + ``` + + + ```bash + # Evaluate a specific flow + genkit eval:flow + + # Evaluate with dataset + genkit eval:flow myFlow --input myDataset.json + + # Extract evaluation data + genkit eval:extractData --maxRows 100 --output results.json + + # Run evaluation on extracted data + genkit eval:run results.json + ``` + + + Evaluation commands are not yet available for Python. Use external evaluation tools or the Developer UI for testing. + + + +### Configuration Commands + +```bash +# View all available commands +genkit --help + +# Configure analytics opt-out +genkit config set analyticsOptOut true +genkit config get analyticsOptOut +``` + +## Genkit Developer UI + +The Genkit Developer UI is a local web application that provides an interactive interface for working with models, flows, prompts, and other components. **The UI works identically across all languages** - once your application is running, the interface and features are the same. + +### Features + +The Developer UI provides action runners for various Genkit components: + +- **Flow Runner**: Test and debug your AI workflows +- **Model Runner**: Experiment with different models and parameters +- **Prompt Runner**: Test and iterate on prompts +- **Tool Runner**: Test function calling capabilities +- **Retriever Runner**: Test document retrieval systems +- **Indexer Runner**: Test document indexing operations +- **Embedder Runner**: Test text embedding generation +- **Evaluator Runner**: Run evaluation metrics on your outputs + +![Genkit Developer UI](../../../assets/dev_ui/genkit_dev_ui_home.png) + +### Interactive Testing + +The Developer UI allows you to: + +1. **Test flows interactively** - Run flows with different inputs and see results in real-time +2. **Experiment with models** - Try different model configurations and compare outputs +3. **Iterate on prompts** - Modify prompts and see immediate results +4. **Debug tool calling** - Test function calls and inspect their execution +5. **Analyze traces** - Inspect detailed execution traces for debugging +6. **Run evaluations** - Test your flows against evaluation datasets + +![Genkit Developer UI Overview](/genkit_developer_ui_overview.gif) + +### Trace Inspection + +The Developer UI provides detailed trace inspection for all flow runs, allowing you to: + +- View step-by-step execution details +- Inspect inputs and outputs at each stage +- Analyze performance metrics +- Debug errors and exceptions +- Compare different execution runs + +This trace inspection works consistently across all languages, giving you the same debugging capabilities whether you're using JavaScript, Go, or Python. + +## Monitoring and Observability + +### Development Environment + +When running in development mode (with `GENKIT_ENV=dev` or using `genkit start`), Genkit automatically: + +- Enables trace collection +- Starts the telemetry API on `http://localhost:4033` +- Provides detailed debugging information +- Stores traces locally for inspection + +This works the same way across all languages. + +### OpenTelemetry Integration + + + + Genkit is fully instrumented with [OpenTelemetry](https://opentelemetry.io/) and provides hooks to export telemetry data to various monitoring systems. + + Configure telemetry export in your application: + + ```ts + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [googleAI()], + telemetry: { + instrumentation: 'genkit', + logger: 'genkit', + }, + }); + ``` + + + Genkit is fully instrumented with [OpenTelemetry](https://opentelemetry.io/) and provides hooks to export telemetry data. + + The [Google Cloud plugin](/go/docs/plugins/google-cloud) exports telemetry to Cloud's operations suite. + + ```go + import ( + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{}), + ) + // ... + } + ``` + + + OpenTelemetry integration is available but may have limited features compared to JavaScript and Go implementations. + + + +## Analytics and Privacy + +The Genkit CLI and Developer UI use cookies and similar technologies from Google to deliver and enhance the quality of its services and to analyze usage. [Learn more](https://policies.google.com/technologies/cookies). + +### Opting Out of Analytics + +To opt-out of analytics, run: + +```bash +genkit config set analyticsOptOut true +``` + +You can view the current setting by running: + +```bash +genkit config get analyticsOptOut +``` + +## Best Practices + +### Development Workflow + +1. **Start with the Developer UI** - Use the interactive interface to experiment with models and prompts +2. **Use watch mode** - Include `--watch` flags in your start commands to see changes immediately +3. **Test thoroughly** - Use the flow runner to test different input scenarios +4. **Monitor traces** - Inspect execution traces to understand performance and debug issues +5. **Evaluate regularly** - Use evaluation tools to measure and improve your AI application quality + +### Debugging Tips + + + + - Use `console.log()` statements in your flows - they'll appear in the Developer UI + - Check the trace inspector for detailed execution information + - Use the model runner to isolate model-specific issues + - Test prompts independently before integrating them into flows + + + - Use `log.Printf()` for debugging output + - Check traces in the Developer UI for execution details + - Test individual components before integrating them + - Use the monitoring features to track performance + + + - Use `print()` statements for debugging + - Test components individually in the Developer UI + - Monitor application logs for errors and performance issues + + + +### Common Patterns + +Regardless of your language, the development pattern is consistent: + +1. **Start your application** with `genkit start -- ` +2. **Open the Developer UI** at `http://localhost:4000` +3. **Test components** using the various runners +4. **Inspect traces** to understand execution flow +5. **Iterate and improve** based on results + +## Next Steps + +- Learn about [creating flows](/unified-docs/creating-flows) to build testable AI workflows +- Explore [evaluation](/unified-docs/evaluation) to measure and improve your application quality +- See [generating content](/unified-docs/generating-content) to understand model interactions +- Check out [tool calling](/unified-docs/tool-calling) for building interactive AI agents diff --git a/src/content/docs/unified-docs/dotprompt.mdx b/src/content/docs/unified-docs/dotprompt.mdx new file mode 100644 index 00000000..6191d2b9 --- /dev/null +++ b/src/content/docs/unified-docs/dotprompt.mdx @@ -0,0 +1,1150 @@ +--- +title: Managing prompts with Dotprompt +description: Learn how to use Dotprompt to manage prompts, models, and parameters for generative AI models across JavaScript and Go, with a streamlined approach to prompt engineering and iteration. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +Prompt engineering is the primary way that you, as an app developer, influence +the output of generative AI models. For example, when using LLMs, you can craft +prompts that influence the tone, format, length, and other characteristics of +the models' responses. + +The way you write these prompts will depend on the model you're using; a prompt +written for one model might not perform well when used with another model. +Similarly, the model parameters you set (temperature, top-k, and so on) will +also affect output differently depending on the model. + +Getting all three of these factors—the model, the model parameters, and +the prompt—working together to produce the output you want is rarely a +trivial process and often involves substantial iteration and experimentation. +Genkit provides a library and file format called Dotprompt, that aims to make +this iteration faster and more convenient. + +[Dotprompt](https://github.com/google/dotprompt) is designed around the premise +that **prompts are code**. You define your prompts along with the models and +model parameters they're intended for separately from your application code. +Then, you (or, perhaps someone not even involved with writing application code) +can rapidly iterate on the prompts and model parameters using the Genkit +Developer UI. Once your prompts are working the way you want, you can import +them into your application and run them using Genkit. + +Your prompt definitions each go in a file with a `.prompt` extension. Here's an +example of what these files look like: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +config: + temperature: 0.9 +input: + schema: + location: string + style?: string + name?: string + default: + location: a restaurant +--- + +You are the world's most welcoming AI assistant and are currently working at {{location}}. + +Greet a guest{{#if name}} named {{name}}{{/if}}{{#if style}} in the style of {{style}}{{/if}}. +``` + +The portion in the triple-dashes is YAML front matter, similar to the front +matter format used by GitHub Markdown and Jekyll; the rest of the file is the +prompt, which can optionally use Handlebars + templates. The following sections will go into more detail about each of +the parts that make a `.prompt` file and how to use them. + +## Before you begin + +Before reading this page, you should be familiar with the content covered on the +[Generating content with AI models](/unified-docs/generating-content) page. + +If you want to run the code examples on this page, first complete the steps in +the Getting started guide for your language: + + + + Complete the [Get started](/docs/get-started) guide. All examples assume you have already installed Genkit as a dependency in your project. + + + Complete the [Get started](/go/docs/get-started-go) guide. All examples assume you have already installed Genkit as a dependency in your project. + + + Dotprompt is not currently available for Python. You can define prompts directly in your Python code using the standard prompt patterns shown in the [Generating content](/unified-docs/generating-content) guide. + + + +## Creating prompt files + +Although Dotprompt provides several [different ways](#defining-prompts-in-code) to create +and load prompts, it's optimized for projects that organize their prompts as +`.prompt` files within a single directory (or subdirectories thereof). This +section shows you how to create and load prompts using this recommended setup. + +### Creating a prompt directory + +The Dotprompt library expects to find your prompts in a directory at your +project root and automatically loads any prompts it finds there. By default, +this directory is named `prompts`. For example, using the default directory +name, your project structure might look something like this: + + + + ``` + your-project/ + ├── lib/ + ├── node_modules/ + ├── prompts/ + │ └── hello.prompt + ├── src/ + ├── package-lock.json + ├── package.json + └── tsconfig.json + ``` + + + ``` + your-project/ + ├── prompts/ + │ └── hello.prompt + ├── main.go + ├── go.mod + └── go.sum + ``` + + + Dotprompt is not currently available for Python. Use standard prompt definition patterns in your Python code. + + + +If you want to use a different directory, you can specify it when you configure +Genkit: + + + + ```ts + const ai = genkit({ + promptDir: './llm_prompts', + // (Other settings...) + }); + ``` + + + ```go + g, err := genkit.Init(ctx.Background(), genkit.WithPromptDir("./llm_prompts")) + ``` + + + Not applicable - Dotprompt is not available for Python. + + + +### Creating a prompt file + +There are two ways to create a `.prompt` file: using a text editor, or with the +developer UI. + +#### Using a text editor + +If you want to create a prompt file using a text editor, create a text file with +the `.prompt` extension in your prompts directory: for example, +`prompts/hello.prompt`. + +Here is a minimal example of a prompt file: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +--- +You are the world's most welcoming AI assistant. Greet the user and offer your assistance. +``` + +The portion in the dashes is YAML front matter, similar to the front matter +format used by GitHub markdown and Jekyll; the rest of the file is the prompt, +which can optionally use Handlebars templates. The front matter section is +optional, but most prompt files will at least contain metadata specifying a +model. The remainder of this page shows you how to go beyond this, and make use +of Dotprompt's features in your prompt files. + +#### Using the developer UI + +You can also create a prompt file using the model runner in the developer UI. +Start with application code that imports the Genkit library and configures it to +use the model plugin you're interested in: + + + + ```ts + import { genkit } from 'genkit'; + + // Import the model plugins you want to use. + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + // Initialize and configure the model plugins. + plugins: [ + googleAI({ + apiKey: 'your-api-key', // Or (preferred): export GEMINI_API_KEY=... + }), + ], + }); + ``` + + It's okay if the file contains other code, but the above is all that's required. + + Load the developer UI in the same project: + + ```bash + genkit start -- tsx --watch src/your-code.ts + ``` + + + ```go + package main + + import ( + "context" + "log" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + g, err := genkit.Init(context.Background(), genkit.WithPlugins(&googlegenai.GoogleAI{})) + if err != nil { + log.Fatal(err) + } + + // Blocks end of program execution to use the developer UI. + select {} + } + ``` + + Load the developer UI in the same project: + + ```bash + genkit start -- go run . + ``` + + + Dotprompt is not currently available for Python. Use the standard Genkit Python patterns for prompt definition. + + + +In the Models section, choose the model you want to use from the list of models +provided by the plugin. + +Then, experiment with the prompt and configuration until you get results you're +happy with. When you're ready, press the Export button and save the file to your +prompts directory. + +## Running prompts + +After you've created prompt files, you can run them from your application code, +or using the tooling provided by Genkit. Regardless of how you want to run your +prompts, first start with application code that imports the Genkit library and +the model plugins you're interested in. + +If you're storing your prompts in a directory other than the default, be sure to +specify it when you configure Genkit. + +### Run prompts from code + + + + To use a prompt, first load it using the `prompt('file_name')` method: + + ```ts + const helloPrompt = ai.prompt('hello'); + ``` + + Once loaded, you can call the prompt like a function: + + ```ts + const response = await helloPrompt(); + + // Alternatively, use destructuring assignments to get only the properties + // you're interested in: + const { text } = await helloPrompt(); + ``` + + Or you can also run the prompt in streaming mode: + + ```ts + const { response, stream } = helloPrompt.stream(); + + for await (const chunk of stream) { + console.log(chunk.text); + } + // optional final (aggregated) response + console.log((await response).text); + ``` + + A callable prompt takes two optional parameters: the input to the prompt (see + the section below on [specifying input schemas](#input-and-output-schemas)), and a configuration + object, similar to that of the `generate()` method. For example: + + ```ts + const response2 = await helloPrompt( + // Prompt input: + { name: 'Ted' }, + + // Generation options: + { + config: { + temperature: 0.4, + }, + }, + ); + ``` + + Similarly for streaming: + + ```ts + const { stream } = helloPrompt.stream(input, options); + ``` + + Any parameters you pass to the prompt call will override the same parameters + specified in the prompt file. + + See [Generate content with AI models](/unified-docs/generating-content) for descriptions of the available + options. + + + To use a prompt, first load it using the `genkit.LookupPrompt()` function: + + ```go + helloPrompt := genkit.LookupPrompt(g, "hello") + ``` + + An executable prompt has similar options to that of `genkit.Generate()` and many + of them are overridable at execution time, including things like input (see the + section about [specifying input schemas](#input-and-output-schemas)), configuration, and more: + + ```go + resp, err := helloPrompt.Execute(context.Background(), + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithInput(map[string]any{"name": "John"}), + ai.WithConfig(&googlegenai.GeminiConfig{Temperature: 0.5}) + ) + ``` + + Any parameters you pass to the prompt call will override the same parameters + specified in the prompt file. + + See [Generate content with AI models](/unified-docs/generating-content) for descriptions of the available + options. + + + Dotprompt is not currently available for Python. Use the standard prompt patterns: + + ```python + # Define prompts directly in your code + response = await ai.generate( + prompt="You are a helpful assistant. Greet the user.", + model="googleai/gemini-2.5-flash" + ) + ``` + + + +### Using the developer UI + +As you're refining your app's prompts, you can run them in the Genkit developer +UI to quickly iterate on prompts and model configurations, independently from +your application code. + + + + Load the developer UI from your project directory: + + ```bash + genkit start -- tsx --watch src/your-code.ts + ``` + + + Load the developer UI from your project directory: + + ```bash + genkit start -- go run . + ``` + + + Dotprompt is not available for Python, but you can still use the developer UI to test prompts defined in your code. + + + +Once you've loaded prompts into the developer UI, you can run them with +different input values, and experiment with how changes to the prompt wording or +the configuration parameters affect the model output. When you're happy with the +result, you can click the **Export prompt** button to save the modified prompt +back into your project directory. + +## Model configuration + +In the front matter block of your prompt files, you can optionally specify model +configuration values for your prompt: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +config: + temperature: 1.4 + topK: 50 + topP: 0.4 + maxOutputTokens: 400 + stopSequences: + - "" + - "" +--- +``` + +These values map directly to the configuration parameters: + + + + ```ts + const response3 = await helloPrompt( + {}, + { + config: { + temperature: 1.4, + topK: 50, + topP: 0.4, + maxOutputTokens: 400, + stopSequences: ['', ''], + }, + }, + ); + ``` + + + ```go + resp, err := helloPrompt.Execute(context.Background(), + ai.WithConfig(&googlegenai.GeminiConfig{ + Temperature: 1.4, + TopK: 50, + TopP: 0.4, + MaxOutputTokens: 400, + StopSequences: []string{"", ""}, + })) + ``` + + + Not applicable - use standard configuration patterns in Python code. + + + +See [Generate content with AI models](/unified-docs/generating-content) for descriptions of the available +options. + +## Input and output schemas + +You can specify input and output schemas for your prompt by defining them in the +front matter section: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +input: + schema: + theme?: string + default: + theme: "pirate" +output: + schema: + dishname: string + description: string + calories: integer + allergens(array): string +--- +Invent a menu item for a {{theme}} themed restaurant. +``` + +These schemas are used in much the same way as those passed to a `generate()` +request or a flow definition. For example, the prompt defined above produces +structured output: + + + + ```ts + const menuPrompt = ai.prompt('menu'); + const { output } = await menuPrompt({ theme: 'medieval' }); + + const dishName = output['dishname']; + const description = output['description']; + ``` + + + ```go + menuPrompt := genkit.LookupPrompt(g, "menu") + if menuPrompt == nil { + log.Fatal("no prompt named 'menu' found") + } + + resp, err := menuPrompt.Execute(ctx, + ai.WithInput(map[string]any{"theme": "medieval"}), + ) + if err != nil { + log.Fatal(err) + } + + var output map[string]any + if err := resp.Output(&output); err != nil { + log.Fatal(err) + } + + log.Println(output["dishname"]) + log.Println(output["description"]) + ``` + + + Not applicable - use standard schema patterns in Python code. + + + +You have several options for defining schemas in a `.prompt` file: Dotprompt's +own schema definition format, Picoschema; standard JSON Schema; or, as +references to schemas defined in your application code. The following sections +describe each of these options in more detail. + +### Picoschema + +The schemas in the example above are defined in a format called Picoschema. +Picoschema is a compact, YAML-optimized schema definition format that makes it +easy to define the most important attributes of a schema for LLM usage. Here's a +longer example of a schema, which specifies the information an app might store +about an article: + +```yaml +schema: + title: string # string, number, and boolean types are defined like this + subtitle?: string # optional fields are marked with a `?` + draft?: boolean, true when in draft state + status?(enum, approval status): [PENDING, APPROVED] + date: string, the date of publication e.g. '2024-04-09' # descriptions follow a comma + tags(array, relevant tags for article): string # arrays are denoted via parentheses + authors(array): + name: string + email?: string + metadata?(object): # objects are also denoted via parentheses + updatedAt?: string, ISO timestamp of last update + approvedBy?: integer, id of approver + extra?: any, arbitrary extra data + (*): string, wildcard field +``` + +The above schema is equivalent to the following type definitions: + + + + ```ts + interface Article { + title: string; + subtitle?: string | null; + /** true when in draft state */ + draft?: boolean | null; + /** approval status */ + status?: 'PENDING' | 'APPROVED' | null; + /** the date of publication e.g. '2024-04-09' */ + date: string; + /** relevant tags for article */ + tags: string[]; + authors: { + name: string; + email?: string | null; + }[]; + metadata?: { + /** ISO timestamp of last update */ + updatedAt?: string | null; + /** id of approver */ + approvedBy?: number | null; + } | null; + /** arbitrary extra data */ + extra?: any; + /** wildcard field */ + [key: string]: any; + } + ``` + + + ```go + type Article struct { + Title string `json:"title"` + Subtitle string `json:"subtitle,omitempty" jsonschema:"required=false"` + Draft bool `json:"draft,omitempty"` // True when in draft state + Status string `json:"status,omitempty" jsonschema:"enum=PENDING,enum=APPROVED"` // Approval status + Date string `json:"date"` // The date of publication e.g. '2025-04-07' + Tags []string `json:"tags"` // Relevant tags for article + Authors []struct { + Name string `json:"name"` + Email string `json:"email,omitempty"` + } `json:"authors"` + Metadata struct { + UpdatedAt string `json:"updatedAt,omitempty"` // ISO timestamp of last update + ApprovedBy int `json:"approvedBy,omitempty"` // ID of approver + } `json:"metadata,omitempty"` + Extra any `json:"extra"` // Arbitrary extra data + } + ``` + + + Not applicable - use standard schema patterns in Python code. + + + +Picoschema supports scalar types `string`, `integer`, `number`, `boolean`, and +`any`. Objects, arrays, and enums are denoted by a parenthetical after the field +name. + +Objects defined by Picoschema have all properties required unless denoted +optional by `?`, and do not allow additional properties. When a property is +marked as optional, it is also made nullable to provide more leniency for LLMs +to return null instead of omitting a field. + +In an object definition, the special key `(*)` can be used to declare a +"wildcard" field definition. This will match any additional properties not +supplied by an explicit key. + +### JSON Schema + +Picoschema does not support many of the capabilities of full JSON schema. If you +require more robust schemas, you may supply a JSON Schema instead: + +```yaml +output: + schema: + type: object + properties: + field1: + type: number + minimum: 20 +``` + +### Schema references defined in code + + + + In addition to directly defining schemas in the `.prompt` file, you can + reference a schema registered with `defineSchema()` by name. If you're using + TypeScript, this approach will let you take advantage of the language's static + type checking features when you work with prompts. + + To register a schema using Zod: + + ```ts + import { z } from 'genkit'; + + const MenuItemSchema = ai.defineSchema( + 'MenuItemSchema', + z.object({ + dishname: z.string(), + description: z.string(), + calories: z.coerce.number(), + allergens: z.array(z.string()), + }), + ); + ``` + + Within your prompt, provide the name of the registered schema: + + ```dotprompt + --- + model: googleai/gemini-2.5-flash-latest + output: + schema: MenuItemSchema + --- + ``` + + The Dotprompt library will automatically resolve the name to the underlying + registered schema. You can then utilize the schema to strongly type the + output of a Dotprompt: + + ```ts + const menuPrompt = ai.prompt< + z.ZodTypeAny, // Input schema + typeof MenuItemSchema, // Output schema + z.ZodTypeAny // Custom options schema + >('menu'); + const { output } = await menuPrompt({ theme: 'medieval' }); + + // Now data is strongly typed as MenuItemSchema: + const dishName = output?.dishname; + const description = output?.description; + ``` + + + Schema references in Go are not yet implemented. Use Picoschema or JSON Schema directly in your `.prompt` files. + + + Not applicable - use standard schema patterns in Python code. + + + +## Prompt templates + +The portion of a `.prompt` file that follows the front matter (if present) is +the prompt itself, which will be passed to the model. While this prompt could be +a simple text string, very often you will want to incorporate user input into +the prompt. To do so, you can specify your prompt using the +Handlebars templating language. +Prompt templates can include placeholders that refer to the values defined by +your prompt's input schema. + +You already saw this in action in the section on input and output schemas: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +input: + schema: + theme?: string + default: + theme: "pirate" +output: + schema: + dishname: string + description: string + calories: integer + allergens(array): string +--- +Invent a menu item for a {{theme}} themed restaurant. +``` + +In this example, the Handlebars expression, `{{theme}}`, +resolves to the value of the input's `theme` property when you run the +prompt. To pass input to the prompt: + + + + ```ts + const menuPrompt = ai.prompt('menu'); + const { output } = await menuPrompt({ theme: 'medieval' }); + ``` + + + ```go + menuPrompt := genkit.LookupPrompt(g, "menu") + + resp, err := menuPrompt.Execute(context.Background(), + ai.WithInput(map[string]any{"theme": "medieval"}), + ) + ``` + + + Not applicable - use standard templating patterns in Python code. + + + +Note that because the input schema declared the `theme` property to be optional +and provided a default, you could have omitted the property, +and the prompt would have resolved using the default value. + +Handlebars templates also support some limited logical constructs. For example, +as an alternative to providing a default, you could define the prompt using +Handlebars's `#if` helper: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +input: + schema: + theme?: string +--- +Invent a menu item for a {{#if theme}}{{theme}} themed{{/if}} restaurant. +``` + +In this example, the prompt renders as "Invent a menu item for a restaurant" +when the `theme` property is unspecified. + +See the Handlebars +documentation for information on all of the built-in logical helpers. + +In addition to properties defined by your input schema, your templates can also +refer to values automatically defined by Genkit. The next few sections describe +these automatically-defined values and how you can use them. + +### Multi-message prompts + +By default, Dotprompt constructs a single message with a "user" role. +However, some prompts are best expressed as a combination of multiple +messages, such as a system prompt. + +The `{{role}}` helper provides a simple way to +construct multi-message prompts: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +input: + schema: + userQuestion: string +--- +{{role "system"}} +You are a helpful AI assistant that really loves to talk about food. Try to work +food items into all of your conversations. +{{role "user"}} +{{userQuestion}} +``` + +Note that your final prompt must contain at least one `user` role. + +### Multi-modal prompts + +For models that support multimodal input, such as images alongside text, you can +use the `{{media}}` helper: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +input: + schema: + photoUrl: string +--- +Describe this image in a detailed paragraph: + +{{media url=photoUrl}} +``` + +The URL can be `https:` or base64-encoded `data:` URIs for "inline" image usage. +In code, this would be: + + + + ```ts + const multimodalPrompt = ai.prompt('multimodal'); + const { text } = await multimodalPrompt({ + photoUrl: 'https://example.com/photo.jpg', + }); + ``` + + + ```go + multimodalPrompt := genkit.LookupPrompt(g, "multimodal") + + resp, err := multimodalPrompt.Execute(context.Background(), + ai.WithInput(map[string]any{"photoUrl": "https://example.com/photo.jpg"}), + ) + ``` + + + Not applicable - use standard multimodal patterns in Python code. + + + +See also [Multimodal input](/unified-docs/generating-content#multimodal-input), on the Generating content +page, for an example of constructing a `data:` URL. + +### Partials + +Partials are reusable templates that can be included inside any prompt. Partials +can be especially helpful for related prompts that share common behavior. + +When loading a prompt directory, any file prefixed with an underscore (`_`) is +considered a partial. So a file `_personality.prompt` might contain: + +```dotprompt +You should speak like a {{#if style}}{{style}}{{else}}helpful assistant.{{/if}}. +``` + +This can then be included in other prompts: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +input: + schema: + name: string + style?: string +--- + +{{role "system"}} +{{>personality style=style}} + +{{role "user"}} +Give the user a friendly greeting. + +User's Name: {{name}} +``` + +Partials are inserted using the +`{{>NAME_OF_PARTIAL args...}}` +syntax. If no arguments are provided to the partial, it executes with the same +context as the parent prompt. + +Partials accept both named arguments as above or a single positional argument +representing the context. This can be helpful for tasks such as rendering +members of a list. + +**\_destination.prompt** + +```dotprompt +- {{name}} ({{country}}) +``` + +**chooseDestination.prompt** + +```dotprompt +--- +model: googleai/gemini-2.5-flash +input: + schema: + destinations(array): + name: string + country: string +--- +Help the user decide between these vacation destinations: + +{{#each destinations}} +{{>destination this}} +{{/each}} +``` + +#### Defining partials in code + +You can also define partials in code: + + + + ```ts + ai.definePartial('personality', 'Talk like a {{#if style}}{{style}}{{else}}helpful assistant{{/if}}.'); + ``` + + Code-defined partials are available in all prompts. + + + ```go + genkit.DefinePartial(g, "personality", "Talk like a {{#if style}}{{style}}{{else}}helpful assistant{{/if}}.") + ``` + + Code-defined partials are available in all prompts. + + + Not applicable - use standard templating patterns in Python code. + + + +### Defining Custom Helpers + +You can define custom helpers to process and manage data inside of a prompt. +Helpers are registered globally: + + + + ```ts + ai.defineHelper('shout', (text: string) => text.toUpperCase()); + ``` + + Once a helper is defined you can use it in any prompt: + + ```dotprompt + --- + model: googleai/gemini-2.5-flash + input: + schema: + name: string + --- + + HELLO, {{shout name}}!!! + ``` + + + ```go + genkit.DefineHelper(g, "shout", func(input string) string { + return strings.ToUpper(input) + }) + ``` + + Once a helper is defined you can use it in any prompt: + + ```dotprompt + --- + model: googleai/gemini-2.5-flash + input: + schema: + name: string + --- + + HELLO, {{shout name}}!!! + ``` + + + Not applicable - use standard templating patterns in Python code. + + + +## Prompt variants + +Because prompt files are just text, you can (and should!) commit them to your +version control system, allowing you to compare changes over time easily. Often, +tweaked versions of prompts can only be fully tested in a production environment +side-by-side with existing versions. Dotprompt supports this through its +variants feature. + +To create a variant, create a `[name].[variant].prompt` file. For instance, if +you were using Gemini 2.0 Flash in your prompt but wanted to see if Gemini 2.5 +Pro would perform better, you might create two files: + +- `my_prompt.prompt`: the "baseline" prompt +- `my_prompt.gemini25pro.prompt`: a variant named `gemini25pro` + +To use a prompt variant: + + + + Specify the variant option when loading: + + ```ts + const myPrompt = ai.prompt('my_prompt', { variant: 'gemini25pro' }); + ``` + + + Specify the variant in the prompt name when loading: + + ```go + myPrompt := genkit.LookupPrompt(g, "my_prompt.gemini25pro") + ``` + + + Not applicable - use standard prompt patterns in Python code. + + + +The name of the variant is included in the metadata of generation traces, so you +can compare and contrast actual performance between variants in the Genkit trace +inspector. + +## Defining prompts in code + +All of the examples discussed so far have assumed that your prompts are defined +in individual `.prompt` files in a single directory (or subdirectories thereof), +accessible to your app at runtime. Dotprompt is designed around this setup, and +its authors consider it to be the best developer experience overall. + +However, if you have use cases that are not well supported by this setup, +you can also define prompts in code: + + + + Use the `definePrompt()` function. The first parameter is analogous to the front matter block of a + `.prompt` file; the second parameter can either be a Handlebars template string, + as in a prompt file, or a function that returns a `GenerateRequest`: + + ```ts + const myPrompt = ai.definePrompt({ + name: 'myPrompt', + model: 'googleai/gemini-2.5-flash', + input: { + schema: z.object({ + name: z.string(), + }), + }, + prompt: 'Hello, {{name}}. How are you today?', + }); + ``` + + ```ts + const myPrompt = ai.definePrompt({ + name: 'myPrompt', + model: 'googleai/gemini-2.5-flash', + input: { + schema: z.object({ + name: z.string(), + }), + }, + messages: async (input) => { + return [ + { + role: 'user', + content: [{ text: `Hello, ${input.name}. How are you today?` }], + }, + ]; + }, + }); + ``` + + + Use the `genkit.DefinePrompt()` function: + + ```go + type GeoQuery struct { + CountryCount int `json:"countryCount"` + } + + type CountryList struct { + Countries []string `json:"countries"` + } + + geographyPrompt, err := genkit.DefinePrompt( + g, "GeographyPrompt", + ai.WithSystem("You are a geography teacher. Respond only when the user asks about geography."), + ai.WithPrompt("Give me the {{countryCount}} biggest countries in the world by inhabitants."), + ai.WithConfig(&googlegenai.GeminiConfig{Temperature: 0.5}), + ai.WithInputType(GeoQuery{CountryCount: 10}), // Defaults to 10. + ai.WithOutputType(CountryList{}), + ) + if err != nil { + log.Fatal(err) + } + + resp, err := geographyPrompt.Execute(context.Background(), ai.WithInput(GeoQuery{CountryCount: 15})) + if err != nil { + log.Fatal(err) + } + + var list CountryList + if err := resp.Output(&list); err != nil { + log.Fatal(err) + } + + log.Printf("Countries: %s", list.Countries) + ``` + + Prompts may also be rendered into a `GenerateActionOptions` which may then be + processed and passed into `genkit.GenerateWithRequest()`: + + ```go + actionOpts, err := geographyPrompt.Render(ctx, ai.WithInput(GeoQuery{CountryCount: 15})) + if err != nil { + log.Fatal(err) + } + + // Do something with the value... + actionOpts.Config = &googlegenai.GeminiConfig{Temperature: 0.8} + + resp, err := genkit.GenerateWithRequest(ctx, g, actionOpts, nil, nil) // No middleware or streaming + ``` + + Note that all prompt options carry over to `GenerateActionOptions` with the + exception of `WithMiddleware()`, which must be passed separately if using + `Prompt.Render()` instead of `Prompt.Execute()`. + + + Define prompts directly in your Python code using standard patterns: + + ```python + # Define prompts as functions or templates + def create_greeting_prompt(name: str) -> str: + return f"Hello, {name}. How are you today?" + + # Use with generate + response = await ai.generate( + prompt=create_greeting_prompt("Alice"), + model="googleai/gemini-2.5-flash" + ) + ``` + + + +## Next steps + +- Learn about [tool calling](/unified-docs/tool-calling) to give your prompts access to external functions and APIs +- Explore [retrieval-augmented generation (RAG)](/unified-docs/rag) to incorporate external knowledge into your prompts +- See [creating flows](/unified-docs/creating-flows) to build complex AI workflows using your prompts +- Check out the [evaluation guide](/docs/evaluation) for testing and improving your prompt performance diff --git a/src/content/docs/unified-docs/error-handling.mdx b/src/content/docs/unified-docs/error-handling.mdx new file mode 100644 index 00000000..a9fde76d --- /dev/null +++ b/src/content/docs/unified-docs/error-handling.mdx @@ -0,0 +1,647 @@ +--- +title: Error handling +description: Learn about error handling in Genkit, including specialized error types, best practices, and debugging techniques across JavaScript, Go, and Python. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +Proper error handling is crucial for building robust AI applications. Genkit provides different error handling mechanisms and best practices across languages to help you build reliable and secure applications. + +## Availability and Approach + + + + JavaScript provides specialized error types and comprehensive error handling: + - `GenkitError` for internal framework errors + - `UserFacingError` for application-level errors + - Automatic error sanitization in web hosting plugins + - Built-in error tracing and debugging + - Security-focused error handling + + + Go uses standard Go error handling patterns: + - Standard `error` interface for all errors + - Custom error types for specific scenarios + - Error wrapping and unwrapping + - Context-aware error handling + - Structured error information + + + Python uses standard exception handling: + - Built-in exception types + - Custom exception classes + - Try-catch error handling + - Exception chaining and context + - Framework-specific error handling + + + +## Error types and classification + + + + Genkit knows about two specialized types: `GenkitError` and `UserFacingError`. The separation between these two error types helps you better understand where your error is coming from. + + ### GenkitError + + `GenkitError` is intended for use by Genkit itself or Genkit plugins. These represent internal framework errors: + + ```typescript + import { GenkitError } from 'genkit'; + + // Example of a GenkitError (typically thrown by the framework) + throw new GenkitError({ + status: 'INVALID_ARGUMENT', + message: 'Model configuration is invalid', + details: { modelName: 'invalid-model' } + }); + ``` + + ### UserFacingError + + `UserFacingError` is intended for [`ContextProviders`](/docs/deploy-node) and your application code. These represent application-level errors that can be safely shown to users: + + ```typescript + import { UserFacingError } from 'genkit'; + + const myFlow = ai.defineFlow({ + name: 'userFlow', + inputSchema: z.object({ userId: z.string() }), + outputSchema: z.string(), + }, async (input) => { + const user = await getUserById(input.userId); + + if (!user) { + throw new UserFacingError({ + status: 'NOT_FOUND', + message: 'User not found', + }); + } + + return `Hello, ${user.name}!`; + }); + ``` + + ### Error sanitization + + Genkit plugins for web hosting (e.g. [`@genkit-ai/express`](https://js.api.genkit.dev/modules/_genkit-ai_express.html) or [`@genkit-ai/next`](https://js.api.genkit.dev/modules/_genkit-ai_next.html)) SHOULD capture all other Error types and instead report them as an internal error in the response. This adds a layer of security to your application by ensuring that internal details of your application do not leak to attackers. + + ```typescript + // Internal errors are automatically sanitized + const myFlow = ai.defineFlow({ + name: 'secureFlow', + inputSchema: z.string(), + outputSchema: z.string(), + }, async (input) => { + try { + const result = await someInternalOperation(input); + return result; + } catch (error) { + // This will be caught by the web hosting plugin + // and converted to a generic "Internal Error" response + throw new Error('Database connection failed: ' + sensitiveInfo); + } + }); + ``` + + + Go uses the standard error interface for all error handling. You can create custom error types for specific scenarios: + + ```go + import ( + "errors" + "fmt" + ) + + // Custom error types + type ValidationError struct { + Field string + Message string + } + + func (e *ValidationError) Error() string { + return fmt.Sprintf("validation error in field %s: %s", e.Field, e.Message) + } + + type NotFoundError struct { + Resource string + ID string + } + + func (e *NotFoundError) Error() string { + return fmt.Sprintf("%s with ID %s not found", e.Resource, e.ID) + } + + // Usage in flows + func myFlow(ctx context.Context, input FlowInput) (string, error) { + if input.UserID == "" { + return "", &ValidationError{ + Field: "userID", + Message: "userID is required", + } + } + + user, err := getUserByID(ctx, input.UserID) + if err != nil { + return "", fmt.Errorf("failed to get user: %w", err) + } + + if user == nil { + return "", &NotFoundError{ + Resource: "User", + ID: input.UserID, + } + } + + return fmt.Sprintf("Hello, %s!", user.Name), nil + } + ``` + + ### Error wrapping + + Use error wrapping to preserve error context: + + ```go + func processData(ctx context.Context, data string) error { + if err := validateData(data); err != nil { + return fmt.Errorf("data validation failed: %w", err) + } + + if err := saveData(ctx, data); err != nil { + return fmt.Errorf("failed to save data: %w", err) + } + + return nil + } + + // Check for specific error types + func handleError(err error) { + var validationErr *ValidationError + if errors.As(err, &validationErr) { + log.Printf("Validation error: %s", validationErr.Message) + return + } + + var notFoundErr *NotFoundError + if errors.As(err, ¬FoundErr) { + log.Printf("Resource not found: %s", notFoundErr.Resource) + return + } + + log.Printf("Unknown error: %v", err) + } + ``` + + + Python uses standard exception handling with custom exception classes: + + ```python + from genkit.types import GenkitError + + # Custom exception classes + class ValidationError(Exception): + def __init__(self, field: str, message: str): + self.field = field + self.message = message + super().__init__(f"Validation error in field {field}: {message}") + + class NotFoundError(Exception): + def __init__(self, resource: str, resource_id: str): + self.resource = resource + self.resource_id = resource_id + super().__init__(f"{resource} with ID {resource_id} not found") + + # Usage in flows + @ai.flow() + async def my_flow(input_data: dict, ctx): + if not input_data.get('user_id'): + raise ValidationError('user_id', 'user_id is required') + + try: + user = await get_user_by_id(input_data['user_id']) + except Exception as e: + raise GenkitError( + status='INTERNAL', + message='Failed to retrieve user' + ) from e + + if not user: + raise NotFoundError('User', input_data['user_id']) + + return f"Hello, {user['name']}!" + + # Error handling + try: + result = await my_flow({'user_id': '123'}, ctx) + except ValidationError as e: + print(f"Validation error: {e.message}") + except NotFoundError as e: + print(f"Resource not found: {e.resource}") + except GenkitError as e: + print(f"Genkit error: {e.message}") + except Exception as e: + print(f"Unexpected error: {e}") + ``` + + + +## Error handling in flows + + + + Implement comprehensive error handling in your flows: + + ```typescript + const robustFlow = ai.defineFlow({ + name: 'robustFlow', + inputSchema: z.object({ + text: z.string(), + options: z.object({ + maxRetries: z.number().default(3), + }).optional(), + }), + outputSchema: z.string(), + }, async (input, { logger }) => { + const maxRetries = input.options?.maxRetries ?? 3; + let lastError: Error | null = null; + + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + logger.info(`Attempt ${attempt} of ${maxRetries}`); + + const result = await ai.generate({ + prompt: `Process this text: ${input.text}`, + }); + + if (!result.text) { + throw new UserFacingError({ + status: 'FAILED_PRECONDITION', + message: 'Generated content is empty', + }); + } + + return result.text; + } catch (error) { + lastError = error as Error; + logger.warn(`Attempt ${attempt} failed:`, { error: error.message }); + + // Don't retry for user-facing errors + if (error instanceof UserFacingError) { + throw error; + } + + // Wait before retrying (exponential backoff) + if (attempt < maxRetries) { + await new Promise(resolve => setTimeout(resolve, Math.pow(2, attempt) * 1000)); + } + } + } + + // All retries failed + throw new UserFacingError({ + status: 'DEADLINE_EXCEEDED', + message: 'Operation failed after multiple attempts', + details: { attempts: maxRetries, lastError: lastError?.message }, + }); + }); + ``` + + + Implement robust error handling with retries and proper error classification: + + ```go + func robustFlow(ctx context.Context, input FlowInput) (string, error) { + maxRetries := 3 + if input.Options != nil && input.Options.MaxRetries > 0 { + maxRetries = input.Options.MaxRetries + } + + var lastErr error + + for attempt := 1; attempt <= maxRetries; attempt++ { + log.Printf("Attempt %d of %d", attempt, maxRetries) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Process this text: " + input.Text), + ) + + if err != nil { + lastErr = err + log.Printf("Attempt %d failed: %v", attempt, err) + + // Check if it's a permanent error (don't retry) + var validationErr *ValidationError + if errors.As(err, &validationErr) { + return "", err // Don't retry validation errors + } + + // Wait before retrying (exponential backoff) + if attempt < maxRetries { + backoff := time.Duration(math.Pow(2, float64(attempt))) * time.Second + select { + case <-time.After(backoff): + continue + case <-ctx.Done(): + return "", ctx.Err() + } + } + continue + } + + if resp.Text() == "" { + return "", &ValidationError{ + Field: "output", + Message: "generated content is empty", + } + } + + return resp.Text(), nil + } + + return "", fmt.Errorf("operation failed after %d attempts, last error: %w", maxRetries, lastErr) + } + ``` + + + Implement comprehensive error handling with retries: + + ```python + import asyncio + from typing import Optional + + @ai.flow() + async def robust_flow(input_data: dict, ctx): + text = input_data.get('text', '') + max_retries = input_data.get('options', {}).get('max_retries', 3) + + last_error = None + + for attempt in range(1, max_retries + 1): + try: + logger.info(f"Attempt {attempt} of {max_retries}") + + result = await ai.generate( + prompt=f"Process this text: {text}", + ) + + if not result.text: + raise ValidationError('output', 'generated content is empty') + + return result.text + + except ValidationError: + # Don't retry validation errors + raise + except Exception as e: + last_error = e + logger.warning(f"Attempt {attempt} failed: {e}") + + # Wait before retrying (exponential backoff) + if attempt < max_retries: + backoff = 2 ** attempt + await asyncio.sleep(backoff) + + # All retries failed + raise GenkitError( + status='DEADLINE_EXCEEDED', + message=f'Operation failed after {max_retries} attempts', + details={'last_error': str(last_error)} + ) + ``` + + + +## Error handling in tools + + + + Tools should handle errors gracefully and provide meaningful feedback: + + ```typescript + const databaseTool = ai.defineTool({ + name: 'databaseQuery', + description: 'Query the database for information', + inputSchema: z.object({ + query: z.string(), + table: z.string(), + }), + outputSchema: z.array(z.record(z.any())), + }, async (input, { logger }) => { + try { + // Validate input + if (!input.query.trim()) { + throw new UserFacingError({ + status: 'INVALID_ARGUMENT', + message: 'Query cannot be empty', + }); + } + + // Execute query + const results = await database.query(input.query, input.table); + + logger.info('Database query executed successfully', { + table: input.table, + resultCount: results.length, + }); + + return results; + } catch (error) { + if (error instanceof UserFacingError) { + throw error; + } + + // Log internal errors but don't expose details + logger.error('Database query failed', { + error: error.message, + table: input.table + }); + + throw new UserFacingError({ + status: 'INTERNAL', + message: 'Database query failed', + }); + } + }); + ``` + + + Implement proper error handling in tools: + + ```go + func databaseQueryTool(ctx context.Context, input DatabaseQueryInput) ([]map[string]interface{}, error) { + // Validate input + if strings.TrimSpace(input.Query) == "" { + return nil, &ValidationError{ + Field: "query", + Message: "query cannot be empty", + } + } + + // Execute query + results, err := database.Query(ctx, input.Query, input.Table) + if err != nil { + log.Printf("Database query failed: %v", err) + + // Check for specific database errors + if isDatabaseConnectionError(err) { + return nil, fmt.Errorf("database connection failed: %w", err) + } + + if isDatabaseTimeoutError(err) { + return nil, fmt.Errorf("database query timeout: %w", err) + } + + // Generic database error + return nil, fmt.Errorf("database query failed: %w", err) + } + + log.Printf("Database query executed successfully, returned %d results", len(results)) + return results, nil + } + + // Helper functions to classify database errors + func isDatabaseConnectionError(err error) bool { + // Implementation depends on your database driver + return strings.Contains(err.Error(), "connection") + } + + func isDatabaseTimeoutError(err error) bool { + // Implementation depends on your database driver + return strings.Contains(err.Error(), "timeout") + } + ``` + + + Implement comprehensive error handling in tools: + + ```python + @ai.tool() + def database_query_tool(input_data: dict, ctx) -> list: + """Query the database for information""" + + query = input_data.get('query', '').strip() + table = input_data.get('table', '') + + # Validate input + if not query: + raise ValidationError('query', 'query cannot be empty') + + if not table: + raise ValidationError('table', 'table name is required') + + try: + # Execute query + results = database.query(query, table) + + logger.info(f"Database query executed successfully, returned {len(results)} results") + return results + + except DatabaseConnectionError as e: + logger.error(f"Database connection failed: {e}") + raise GenkitError( + status='UNAVAILABLE', + message='Database connection failed' + ) + except DatabaseTimeoutError as e: + logger.error(f"Database query timeout: {e}") + raise GenkitError( + status='DEADLINE_EXCEEDED', + message='Database query timeout' + ) + except Exception as e: + logger.error(f"Database query failed: {e}") + raise GenkitError( + status='INTERNAL', + message='Database query failed' + ) + ``` + + + +## Best practices + +### Error classification + + + + - Use `UserFacingError` for errors that can be safely shown to users + - Use `GenkitError` for internal framework errors + - Let other errors be automatically sanitized by web hosting plugins + - Provide meaningful error messages and status codes + - Include relevant context in error details + + + - Create custom error types for different error categories + - Use error wrapping to preserve error context + - Implement error classification functions + - Provide structured error information + - Use appropriate logging levels for different error types + + + - Create custom exception classes for different error types + - Use exception chaining to preserve error context + - Implement proper exception handling hierarchies + - Provide meaningful error messages + - Use structured logging for error information + + + +### Security considerations + + + + - Never expose internal system details in user-facing errors + - Use error sanitization in production environments + - Log detailed error information for debugging + - Implement rate limiting for error-prone operations + - Validate all inputs to prevent injection attacks + + + - Sanitize error messages before returning to clients + - Use structured logging to separate internal and external error details + - Implement proper input validation + - Use context timeouts to prevent resource exhaustion + - Implement circuit breakers for external dependencies + + + - Sanitize exception messages in production + - Use different exception types for internal vs external errors + - Implement proper input validation and sanitization + - Use async timeouts for long-running operations + - Implement retry logic with exponential backoff + + + +### Debugging and monitoring + + + + - Use the Developer UI to inspect error traces + - Implement comprehensive logging with error context + - Set up error monitoring and alerting + - Use error tracking services for production + - Include correlation IDs for distributed tracing + + + - Use structured logging for better error analysis + - Implement error metrics and monitoring + - Use distributed tracing for complex workflows + - Set up alerting for error rate thresholds + - Include request IDs for error correlation + + + - Use structured logging for error analysis + - Implement error tracking and monitoring + - Use APM tools for error insights + - Set up alerting for error patterns + - Include trace IDs for error correlation + + + +## Next steps + +- Learn about [observability and monitoring](/unified-docs/observability-monitoring) for tracking and debugging errors +- Explore [developer tools](/unified-docs/developer-tools) for debugging error scenarios +- See [creating flows](/unified-docs/creating-flows) for implementing robust error handling in workflows +- Check out [tool calling](/unified-docs/tool-calling) for error handling in tool implementations diff --git a/src/content/docs/unified-docs/evaluation.mdx b/src/content/docs/unified-docs/evaluation.mdx new file mode 100644 index 00000000..6962b760 --- /dev/null +++ b/src/content/docs/unified-docs/evaluation.mdx @@ -0,0 +1,910 @@ +--- +title: Evaluation +description: Learn about Genkit's evaluation capabilities across JavaScript and Go, including inference-based and raw evaluation, dataset creation, and how to use the Developer UI and CLI for testing and analysis. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; +import ThemeImage from '@/components/ThemeImage.astro'; + +Evaluation is a form of testing that helps you validate your LLM's responses and +ensure they meet your quality bar. + +Genkit supports third-party evaluation tools through plugins, paired +with powerful observability features that provide insight into the runtime state +of your LLM-powered applications. Genkit tooling helps you automatically extract +data including inputs, outputs, and information from intermediate steps to +evaluate the end-to-end quality of LLM responses as well as understand the +performance of your system's building blocks. + +## Types of evaluation + +Genkit supports two types of evaluation: + +- **Inference-based evaluation**: This type of evaluation runs against a + collection of pre-determined inputs, assessing the corresponding outputs for + quality. + + This is the most common evaluation type, suitable for most use cases. This approach tests a system's actual output for each evaluation run. + + You can perform the quality assessment manually, by visually inspecting the results. Alternatively, you can automate the assessment by using an evaluation metric. + +- **Raw evaluation**: This type of evaluation directly assesses the quality of + inputs without any inference. This approach typically is used with automated + evaluation using metrics. All required fields for evaluation (e.g., `input`, + `context`, `output` and `reference`) must be present in the input dataset. This + is useful when you have data coming from an external source (e.g., collected + from your production traces) and you want to have an objective measurement of + the quality of the collected data. + + For more information, see the [Advanced use](#advanced-use) section of this page. + +This section explains how to perform inference-based evaluation using Genkit. + +## Quick start + +### Setup + +1. Use an existing Genkit app or create a new one by following the Getting started guide for your language: + + + + Follow the [Get started](/docs/get-started) guide. + + + Follow the [Get started](/go/docs/get-started-go) guide. + + + Evaluation features are not yet available for Python. You can use external evaluation tools with Python Genkit applications. + + + +2. Add the following code to define a simple RAG application to evaluate. For this guide, we use a dummy retriever that always returns the same documents. + + + + ```js + import { genkit, z, Document } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + // Initialize Genkit + export const ai = genkit({ plugins: [googleAI()] }); + + // Dummy retriever that always returns the same docs + export const dummyRetriever = ai.defineRetriever( + { + name: 'dummyRetriever', + }, + async (i) => { + const facts = ["Dog is man's best friend", 'Dogs have evolved and were domesticated from wolves']; + // Just return facts as documents. + return { documents: facts.map((t) => Document.fromText(t)) }; + }, + ); + + // A simple question-answering flow + export const qaFlow = ai.defineFlow( + { + name: 'qaFlow', + inputSchema: z.object({ query: z.string() }), + outputSchema: z.object({ answer: z.string() }), + }, + async ({ query }) => { + const factDocs = await ai.retrieve({ + retriever: dummyRetriever, + query, + }); + + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Answer this question with the given context ${query}`, + docs: factDocs, + }); + return { answer: text }; + }, + ); + ``` + + + ```go + package main + + import ( + "context" + "fmt" + "log" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + + // Initialize Genkit + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatalf("Genkit initialization error: %v", err) + } + + // Dummy retriever that always returns the same facts + dummyRetrieverFunc := func(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) { + facts := []string{ + "Dog is man's best friend", + "Dogs have evolved and were domesticated from wolves", + } + // Just return facts as documents. + var docs []*ai.Document + for _, fact := range facts { + docs = append(docs, ai.DocumentFromText(fact, nil)) + } + return &ai.RetrieverResponse{Documents: docs}, nil + } + factsRetriever := genkit.DefineRetriever(g, "local", "dogFacts", dummyRetrieverFunc) + + m := googlegenai.GoogleAIModel(g, "gemini-2.5-flash") + if m == nil { + log.Fatal("failed to find model") + } + + // A simple question-answering flow + genkit.DefineFlow(g, "qaFlow", func(ctx context.Context, query string) (string, error) { + factDocs, err := ai.Retrieve(ctx, factsRetriever, ai.WithTextDocs(query)) + if err != nil { + return "", fmt.Errorf("retrieval failed: %w", err) + } + llmResponse, err := genkit.Generate(ctx, g, + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithPrompt("Answer this question with the given context: %s", query), + ai.WithDocs(factDocs.Documents...) + ) + if err != nil { + return "", fmt.Errorf("generation failed: %w", err) + } + return llmResponse.Text(), nil + }) + } + ``` + + + Evaluation features are not yet available for Python. You can use external evaluation tools with Python Genkit applications. + + + +3. (Optional) Add evaluation metrics to your application to use while evaluating: + + + + This guide uses the `MALICIOUSNESS` metric from the `genkitEval` plugin. + + ```js + import { genkitEval, GenkitMetric } from '@genkit-ai/evaluator'; + import { googleAI } from '@genkit-ai/googleai'; + + export const ai = genkit({ + plugins: [ + googleAI(), + // Add this plugin to your Genkit initialization block + genkitEval({ + judge: googleAI.model('gemini-2.5-flash'), + metrics: [GenkitMetric.MALICIOUSNESS], + }), + ], + }); + ``` + + **Note:** The configuration above requires installation of the [`@genkit-ai/evaluator`](https://www.npmjs.com/package/@genkit-ai/evaluator) package. + + ```bash + npm install @genkit-ai/evaluator + ``` + + + This guide uses the `EvaluatorRegex` metric from the `evaluators` package. + + ```go + import ( + "github.com/firebase/genkit/go/plugins/evaluators" + ) + + func main() { + // ... + + metrics := []evaluators.MetricConfig{ + { + MetricType: evaluators.EvaluatorRegex, + }, + } + + // Initialize Genkit + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &googlegenai.GoogleAI{}, + &evaluators.GenkitEval{Metrics: metrics}, // Add this plugin + ), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + } + ``` + + **Note:** Ensure that the `evaluators` package is installed in your go project: + + ```bash + go get github.com/firebase/genkit/go/plugins/evaluators + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +4. Start your Genkit application: + + + + ```bash + genkit start -- + ``` + + + ```bash + genkit start -- go run main.go + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +### Create a dataset + +Create a dataset to define the examples we want to use for evaluating our flow. + +1. Go to the Dev UI at `http://localhost:4000` and click the **Datasets** button + to open the Datasets page. + +2. Click on the **Create Dataset** button to open the create dataset dialog. + + a. Provide a `datasetId` for your new dataset. This guide uses + `myFactsQaDataset`. + + b. Select `Flow` dataset type. + + c. Leave the validation target field empty and click **Save** + +3. Your new dataset page appears, showing an empty dataset. Add examples to it by following these steps: + + a. Click the **Add example** button to open the example editor panel. + + b. Only the `input` field is required. Add the input data: + + + + Enter `{"query": "Who is man's best friend?"}` in the `input` field, and click **Save** to add the example to your dataset. + + Repeat this process to add more examples: + + ``` + {"query": "Can I give milk to my cats?"} + {"query": "From which animals did dogs evolve?"} + ``` + + + Enter `"Who is man's best friend?"` in the `Input` field, and click **Save** to add the example to your dataset. + + If you have configured the `EvaluatorRegex` metric and would like to try it out, you need to specify a Reference string that contains the pattern to match the output against. For the preceding input, set the `Reference output` text to `"(?i)dog"`, which is a case-insensitive regular-expression pattern to match the word "dog" in the flow output. + + Repeat this process to add more examples: + + ```text + "Can I give milk to my cats?" + "From which animals did dogs evolve?" + ``` + + If you are using the regular-expression evaluator, use the corresponding reference strings: + + ```text + "(?i)don't know" + "(?i)wolf|wolves" + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +By the end of this step, your dataset should have 3 examples in it, with the +values mentioned above. + +### Run evaluation and view results + +To start evaluating the flow, click the **Run new evaluation** button on your +dataset page. You can also start a new evaluation from the _Evaluations_ tab. + +1. Select the `Flow` radio button to evaluate a flow. + +2. Select `qaFlow` as the target flow to evaluate. + +3. Select `myFactsQaDataset` as the target dataset to use for evaluation. + +4. (Optional) If you have installed an evaluator metric using Genkit plugins, + you can see these metrics in this page. Select the metrics that you want to use + with this evaluation run. This is entirely optional: Omitting this step will + still return the results in the evaluation run, but without any associated + metrics. + +5. Finally, click **Run evaluation** to start evaluation. Depending on the flow + you're testing, this may take a while. Once the evaluation is complete, a + success message appears with a link to view the results. Click on the link to go + to the _Evaluation details_ page. + +You can see the details of your evaluation on this page, including original +input, extracted context and metrics (if any). + +## Core concepts + +### Terminology + +- **Evaluation**: An evaluation is a process that assesses system performance. In Genkit, such a system is usually a Genkit primitive, such as a flow or a + model. An evaluation can be automated or manual (human evaluation). + +- **Bulk inference** Inference is the act of running an input on a flow or model to get the corresponding output. Bulk inference involves performing inference on multiple inputs simultaneously. + +- **Metric** An evaluation metric is a criterion on which an inference is scored. Examples include accuracy, faithfulness, maliciousness, whether the output is in English, etc. + +- **Dataset** A dataset is a collection of examples to use for inference-based + evaluation. A dataset typically consists of `input` and optional `reference` + fields. The `reference` field does not affect the inference step of evaluation + but it is passed verbatim to any evaluation metrics. In Genkit, you can create a + dataset through the Dev UI. There are two types of datasets in Genkit: _Flow_ + datasets and _Model_ datasets. + +### Schema validation + +Depending on the type, datasets have schema validation support in the Dev UI: + +- Flow datasets support validation of the `input` and `reference` fields of the dataset against a flow in the Genkit application. Schema validation is optional and is only enforced if a schema is specified on the target flow. + +- Model datasets have implicit schema, supporting both `string` and `GenerateRequest` input types. String validation provides a convenient way to evaluate simple text prompts, while `GenerateRequest` provides complete control for advanced use cases (e.g. providing model parameters, message history, tools, etc). + +Note: Schema validation is a helper tool for editing examples, but it is +possible to save an example with invalid schema. These examples may fail when +running an evaluation. + +## Supported evaluators + +### Genkit evaluators + +Genkit includes built-in evaluators to help you get started: + + + + Genkit includes a small number of native evaluators, inspired by [RAGAS](https://docs.ragas.io/en/stable/): + + - **Faithfulness** -- Measures the factual consistency of the generated answer against the given context + - **Answer Relevancy** -- Assesses how pertinent the generated answer is to the given prompt + - **Maliciousness** -- Measures whether the generated output intends to deceive, harm, or exploit + + + Genkit includes a small number of built-in evaluators, ported from the [JS evaluators plugin](https://js.api.genkit.dev/enums/_genkit-ai_evaluator.GenkitMetric.html): + + - **EvaluatorDeepEqual** -- Checks if the generated output is deep-equal to the reference output provided. + - **EvaluatorRegex** -- Checks if the generated output matches the regular expression provided in the reference field. + - **EvaluatorJsonata** -- Checks if the generated output matches the [JSONATA](https://jsonata.org/) expression provided in the reference field. + + + Evaluation features are not yet available for Python. + + + +### Evaluator plugins + +Genkit supports additional evaluators through plugins: + + + + - [Vertex Rapid Evaluators](/docs/plugins/vertex-ai#evaluators) via the VertexAI Plugin + - Custom evaluators through the plugin system + + + - Custom evaluators through the plugin system + - Third-party evaluation tools through plugins + + + Not applicable - evaluation features are not yet available for Python. + + + +## Advanced use + +### Evaluation comparison + +The Developer UI offers visual tools for side-by-side comparison of multiple +evaluation runs. This feature allows you to analyze variations across different +executions within a unified interface, making it easier to assess changes in +output quality. Additionally, you can highlight outputs based on the performance +of specific metrics, indicating improvements or regressions. + +When comparing evaluations, one run is designated as the _Baseline_. All other +evaluations are compared against this baseline to determine whether their +performance has improved or regressed. + + + +#### Prerequisites + +To use the evaluation comparison feature, the following conditions must be met: + +- Evaluations must originate from a dataset source. Evaluations from file + sources are not comparable. +- All evaluations being compared must be from the same dataset. +- For metric highlighting, all evaluations must use at least one common + metric that produces a `number` or `boolean` score. + +#### Comparing evaluations + +1. Ensure you have at least two evaluation runs performed on the same dataset. + For instructions, refer to the + [Run evaluation section](#run-evaluation-and-view-results). + +2. In the Developer UI, navigate to the **Datasets** page. + +3. Select the relevant dataset and open its **Evaluations** tab. You should see + all evaluation runs associated with that dataset. + +4. Choose one evaluation to serve as the baseline for comparison. + +5. On the evaluation results page, click the **+ Comparison** button. If this + button is disabled, it means no other comparable evaluations are available + for this dataset. + +6. A new column will appear with a dropdown menu. Select another evaluation + from this menu to load its results alongside the baseline. + +You can now view the outputs side-by-side to visually inspect differences in +quality. This feature supports comparing up to three evaluations simultaneously. + +##### Metric highlighting (Optional) + +If your evaluations include metrics, you can enable metric highlighting to +color-code the results. This feature helps you quickly identify changes in +performance: improvements are colored green, while regressions are red. + +Note that highlighting is only supported for numeric and boolean metrics, and +the selected metric must be present in all evaluations being compared. + +To enable metric highlighting: + +1. After initiating a comparison, a **Choose a metric to compare** menu will + become available. + +2. Select a metric from the dropdown. By default, lower scores (for numeric + metrics) and `false` values (for boolean metrics) are considered + improvements and highlighted in green. You can reverse this logic by + ticking the checkbox in the menu. + +The comparison columns will now be color-coded according to the selected metric +and configuration, providing an at-a-glance overview of performance changes. + +### Evaluation using the CLI + +Genkit CLI provides a rich API for performing evaluation. This is especially +useful in environments where the Dev UI is not available (e.g. in a CI/CD +workflow). + +Genkit CLI provides 3 main evaluation commands: `eval:flow`, `eval:extractData`, +and `eval:run`. + +#### `eval:flow` command + +The `eval:flow` command runs inference-based evaluation on an input dataset. +This dataset may be provided either as a JSON file or by referencing an existing +dataset in your Genkit runtime. + +```bash +# Referencing an existing dataset +genkit eval:flow qaFlow --input myFactsQaDataset + +# or, using a dataset from a file +genkit eval:flow qaFlow --input testInputs.json +``` + +Note: Make sure that you start your genkit app before running these CLI +commands. + + + + ```bash + genkit start -- + ``` + + + ```bash + genkit start -- go run main.go + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +Here, `testInputs.json` should be an array of objects containing an `input` +field and an optional `reference` field: + + + + ```json + [ + { + "input": { "query": "What is the French word for Cheese?" } + }, + { + "input": { "query": "What green vegetable looks like cauliflower?" }, + "reference": "Broccoli" + } + ] + ``` + + + ```json + [ + { + "input": "What is the French word for Cheese?" + }, + { + "input": "What green vegetable looks like cauliflower?", + "reference": "Broccoli" + } + ] + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +If your flow requires auth, you may specify it using the `--context` argument: + +```bash +genkit eval:flow qaFlow --input testInputs.json --context '{"auth": {"email_verified": true}}' +``` + +By default, the `eval:flow` and `eval:run` commands use all available metrics +for evaluation. To run on a subset of the configured evaluators, use the +`--evaluators` flag and provide a comma-separated list of evaluators by name: + + + + ```bash + genkit eval:flow qaFlow --input testInputs.json --evaluators=genkitEval/maliciousness,genkitEval/answer_relevancy + ``` + + + ```bash + genkit eval:flow qaFlow --input testInputs.json --evaluators=genkitEval/regex,genkitEval/jsonata + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +You can view the results of your evaluation run in the Dev UI at +`localhost:4000/evaluate`. + +#### `eval:extractData` and `eval:run` commands + +To support _raw evaluation_, Genkit provides tools to extract data from traces +and run evaluation metrics on extracted data. This is useful, for example, if +you are using a different framework for evaluation or if you are collecting +inferences from a different environment to test locally for output quality. + +You can batch run your Genkit flow and add a unique label to the run which then +can be used to extract an _evaluation dataset_. A raw evaluation dataset is a +collection of inputs for evaluation metrics, _without_ running any prior +inference. + +Run your flow over your test inputs: + + + + ```bash + genkit flow:batchRun qaFlow testInputs.json --label firstRunSimple + ``` + + + ```bash + genkit flow:batchRun qaFlow testInputs.json + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +Extract the evaluation data: + + + + ```bash + genkit eval:extractData qaFlow --label firstRunSimple --output factsEvalDataset.json + ``` + + + ```bash + genkit eval:extractData qaFlow --maxRows 2 --output factsEvalDataset.json + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +The exported data has a format different from the dataset format presented +earlier. This is because this data is intended to be used with evaluation +metrics directly, without any inference step. Here is the syntax of the +extracted data. + +```json +Array<{ + "testCaseId": string, + "input": any, + "output": any, + "context": any[], + "traceIds": string[], +}>; +``` + +The data extractor automatically locates retrievers and adds the produced docs +to the context array. You can run evaluation metrics on this extracted dataset +using the `eval:run` command. + +```bash +genkit eval:run factsEvalDataset.json +``` + +By default, `eval:run` runs against all configured evaluators, and as with +`eval:flow`, results for `eval:run` appear in the evaluation page of Developer +UI, located at `localhost:4000/evaluate`. + +### Batching evaluations + + + + :::note + This feature is only available in the Node.js SDK. + ::: + + You can speed up evaluations by processing the inputs in batches using the CLI and Dev UI. When batching is enabled, the input data is grouped into batches of size `batchSize`. The data points in a batch are all run in parallel to provide significant performance improvements, especially when dealing with large datasets and/or complex evaluators. By default (when the flag is omitted), batching is disabled. + + The `batchSize` option has been integrated into the `eval:flow` and `eval:run` CLI commands. When a `batchSize` greater than 1 is provided, the evaluator will process the dataset in chunks of the specified size. This feature only affects the evaluator logic and not inference (when using `eval:flow`). Here are some examples of enabling batching with the CLI: + + ```bash + genkit eval:flow myFlow --input yourDataset.json --evaluators=custom/myEval --batchSize 10 + ``` + + Or, with `eval:run` + + ```bash + genkit eval:run yourDataset.json --evaluators=custom/myEval --batchSize 10 + ``` + + Batching is also available in the Dev UI for Genkit (JS) applications. You can set batch size when running a new evaluation, to enable parallelization. + + + Batching features are not yet available for Go. Evaluations run sequentially. + + + Not applicable - evaluation features are not yet available for Python. + + + +### Custom extractors + + + + Genkit provides reasonable default logic for extracting the necessary fields + (`input`, `output` and `context`) while doing an evaluation. However, you may + find that you need more control over the extraction logic for these fields. + Genkit supports customs extractors to achieve this. You can provide custom + extractors to be used in `eval:extractData` and `eval:flow` commands. + + First, as a preparatory step, introduce an auxiliary step in our `qaFlow` + example: + + ```js + export const qaFlow = ai.defineFlow( + { + name: 'qaFlow', + inputSchema: z.object({ query: z.string() }), + outputSchema: z.object({ answer: z.string() }), + }, + async ({ query }) => { + const factDocs = await ai.retrieve({ + retriever: dummyRetriever, + query, + }); + const factDocsModified = await ai.run('factModified', async () => { + // Let us use only facts that are considered silly. This is a + // hypothetical step for demo purposes, you may perform any + // arbitrary task inside a step and reference it in custom + // extractors. + // + // Assume you have a method that checks if a fact is silly + return factDocs.filter((d) => isSillyFact(d.text)); + }); + + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Answer this question with the given context ${query}`, + docs: factDocsModified, + }); + return { answer: text }; + }, + ); + ``` + + Next, configure a custom extractor to use the output of the `factModified` step + when evaluating this flow. + + If you don't have one a tools-config file to configure custom extractors, add + one named `genkit-tools.conf.js` to your project root. + + ```bash + cd /path/to/your/genkit/app + + touch genkit-tools.conf.js + ``` + + In the tools config file, add the following code: + + ```js + module.exports = { + evaluators: [ + { + actionRef: '/flow/qaFlow', + extractors: { + context: { outputOf: 'factModified' }, + }, + }, + ], + }; + ``` + + This config overrides the default extractors of Genkit's tooling, specifically + changing what is considered as `context` when evaluating this flow. + + Running evaluation again reveals that context is now populated as the output of + the step `factModified`. + + ```bash + genkit eval:flow qaFlow --input testInputs.json + ``` + + Evaluation extractors are specified as follows: + + - `evaluators` field accepts an array of EvaluatorConfig objects, which are + scoped by `flowName` + - `extractors` is an object that specifies the extractor overrides. The + current supported keys in `extractors` are `[input, output, context]`. The + acceptable value types are: + - `string` - this should be a step name, specified as a string. The output + of this step is extracted for this key. + - `{ inputOf: string }` or `{ outputOf: string }` - These objects + represent specific channels (input or output) of a step. For example, `{ + inputOf: 'foo-step' }` would extract the input of step `foo-step` for + this key. + - `(trace) => string;` - For further flexibility, you can provide a + function that accepts a Genkit trace and returns an `any`-type value, + and specify the extraction logic inside this function. Refer to + `genkit/genkit-tools/common/src/types/trace.ts` for the exact TraceData + schema. + + **Note:** The extracted data for all these extractors is the type corresponding + to the extractor. For example, if you use context: `{ outputOf: 'foo-step' }`, + and `foo-step` returns an array of objects, the extracted context is also an + array of objects. + + + Custom extractors are not yet available for Go. Use the default extraction logic provided by Genkit. + + + Not applicable - evaluation features are not yet available for Python. + + + +### Synthesizing test data using an LLM + + + + Here is an example flow that uses a PDF file to generate potential user + questions. + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import { chunk } from 'llm-chunk'; // npm install llm-chunk + import path from 'path'; + import { readFile } from 'fs/promises'; + import pdf from 'pdf-parse'; // npm install pdf-parse + + const ai = genkit({ plugins: [googleAI()] }); + + const chunkingConfig = { + minLength: 1000, // number of minimum characters into chunk + maxLength: 2000, // number of maximum characters into chunk + splitter: 'sentence', // paragraph | sentence + overlap: 100, // number of overlap chracters + delimiters: '', // regex for base split method + } as any; + + async function extractText(filePath: string) { + const pdfFile = path.resolve(filePath); + const dataBuffer = await readFile(pdfFile); + const data = await pdf(dataBuffer); + return data.text; + } + + export const synthesizeQuestions = ai.defineFlow( + { + name: 'synthesizeQuestions', + inputSchema: z.object({ filePath: z.string().describe('PDF file path') }), + outputSchema: z.object({ + questions: z.array( + z.object({ + query: z.string(), + }), + ), + }), + }, + async ({ filePath }) => { + filePath = path.resolve(filePath); + // `extractText` loads the PDF and extracts its contents as text. + const pdfTxt = await ai.run('extract-text', () => extractText(filePath)); + + const chunks = await ai.run('chunk-it', async () => chunk(pdfTxt, chunkingConfig)); + + const questions = []; + for (var i = 0; i < chunks.length; i++) { + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: { + text: `Generate one question about the following text: ${chunks[i]}`, + }, + }); + questions.push({ query: text }); + } + return { questions }; + }, + ); + ``` + + You can then use this command to export the data into a file and use for + evaluation. + + ```bash + genkit flow:run synthesizeQuestions '{"filePath": "my_input.pdf"}' --output synthesizedQuestions.json + ``` + + + Test data synthesis features are not yet available for Go. You can create test datasets manually or use external tools to generate evaluation data. + + + Not applicable - evaluation features are not yet available for Python. + + + +## Next steps + +- Learn about [creating flows](/unified-docs/creating-flows) to build AI workflows that can be evaluated +- Explore [retrieval-augmented generation (RAG)](/unified-docs/rag) for building knowledge-based systems that benefit from evaluation +- See [tool calling](/unified-docs/tool-calling) for creating AI agents that can be tested with evaluation metrics +- Check out the [developer tools documentation](/docs/devtools) for more information about the Genkit Developer UI diff --git a/src/content/docs/unified-docs/frameworks/express.mdx b/src/content/docs/unified-docs/frameworks/express.mdx new file mode 100644 index 00000000..e01b43bd --- /dev/null +++ b/src/content/docs/unified-docs/frameworks/express.mdx @@ -0,0 +1,969 @@ +--- +title: Express.js Integration +description: Learn how to integrate Genkit with Express.js applications across JavaScript, Go, and Python, including REST API endpoints, authentication, and deployment strategies. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; +import { Badge } from '@astrojs/starlight/components'; + + + +The Express.js integration allows you to expose Genkit flows and actions as REST API endpoints, making it easy to integrate AI capabilities into existing Express-based backends or deploy them to any platform that supports Express.js applications. + +:::note[Framework Availability] +Express.js integration is primarily available for JavaScript/Node.js. For other languages, see equivalent frameworks: +- **Go**: Gin or standard `net/http` +- **Python**: Flask or FastAPI +::: + +## Installation and Setup + + + + Install the Express plugin: + + ```bash + npm install @genkit-ai/express express + npm install -D @types/express # if using TypeScript + ``` + + Basic setup with Express integration: + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import { expressHandler } from '@genkit-ai/express'; + import express from 'express'; + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + // Define a Genkit flow + const simpleFlow = ai.defineFlow( + { + name: 'simpleFlow', + inputSchema: z.object({ input: z.string() }), + outputSchema: z.object({ output: z.string() }), + }, + async ({ input }, { sendChunk }) => { + const { text } = await ai.generate({ + prompt: input, + onChunk: (c) => sendChunk(c.text), + }); + return { output: text }; + }, + ); + + // Create Express app + const app = express(); + app.use(express.json()); + + // Expose Genkit flow as REST endpoint + app.post('/simpleFlow', expressHandler(simpleFlow)); + + app.listen(8080, () => { + console.log('Express server listening on port 8080'); + }); + ``` + + + For Go applications, use Gin or standard HTTP handlers. Here's an equivalent setup: + + ```go + package main + + import ( + "context" + "encoding/json" + "net/http" + "github.com/gin-gonic/gin" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + type FlowInput struct { + Input string `json:"input"` + } + + type FlowOutput struct { + Output string `json:"output"` + } + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + ) + if err != nil { + log.Fatal(err) + } + + // Define flow + simpleFlow := genkit.DefineFlow(g, "simpleFlow", + func(ctx context.Context, input FlowInput) (FlowOutput, error) { + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(input.Input), + ) + if err != nil { + return FlowOutput{}, err + } + return FlowOutput{Output: resp.Text()}, nil + }, + ) + + // Setup Gin router + r := gin.Default() + + r.POST("/simpleFlow", func(c *gin.Context) { + var input FlowInput + if err := c.ShouldBindJSON(&input); err != nil { + c.JSON(400, gin.H{"error": err.Error()}) + return + } + + result, err := simpleFlow.Run(ctx, input) + if err != nil { + c.JSON(500, gin.H{"error": err.Error()}) + return + } + + c.JSON(200, result) + }) + + r.Run(":8080") + } + ``` + + + For Python applications, use Flask or FastAPI. Here's a Flask equivalent: + + ```python + from flask import Flask, request, jsonify + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai + + ai = Genkit( + plugins=[GoogleGenai()], + ) + + # Define flow + @ai.define_flow( + input_schema={"input": str}, + output_schema={"output": str} + ) + async def simple_flow(input_data): + response = await ai.generate(input_data["input"]) + return {"output": response.text} + + # Create Flask app + app = Flask(__name__) + + @app.route('/simpleFlow', methods=['POST']) + async def handle_simple_flow(): + try: + input_data = request.get_json() + result = await simple_flow(input_data) + return jsonify(result) + except Exception as e: + return jsonify({"error": str(e)}), 500 + + if __name__ == '__main__': + app.run(host='0.0.0.0', port=8080) + ``` + + + +## Client Integration + + + + Access your Express-hosted flows from client applications: + + ```ts + import { runFlow, streamFlow } from 'genkit/beta/client'; + + // Basic flow execution + const result = await runFlow({ + url: 'http://localhost:8080/simpleFlow', + input: { input: 'Tell me a joke about programming' }, + }); + console.log(result); // { output: "Why do programmers prefer dark mode?..." } + + // Streaming flow execution + const streamResult = streamFlow({ + url: 'http://localhost:8080/simpleFlow', + input: { input: 'Write a story about AI' }, + }); + + for await (const chunk of streamResult.stream) { + console.log('Chunk:', chunk); + } + + const finalResult = await streamResult.output; + console.log('Final result:', finalResult); + ``` + + ### Frontend Integration + + ```ts + // React component example + import React, { useState } from 'react'; + import { runFlow } from 'genkit/beta/client'; + + function AIChat() { + const [input, setInput] = useState(''); + const [output, setOutput] = useState(''); + const [loading, setLoading] = useState(false); + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + setLoading(true); + + try { + const result = await runFlow({ + url: '/api/simpleFlow', + input: { input }, + }); + setOutput(result.output); + } catch (error) { + console.error('Error:', error); + } finally { + setLoading(false); + } + }; + + return ( +
+ setInput(e.target.value)} + placeholder="Ask me anything..." + /> + + {output &&
{output}
} +
+ ); + } + ``` +
+ + Access your Go HTTP endpoints from client applications: + + ```go + package main + + import ( + "bytes" + "encoding/json" + "fmt" + "net/http" + ) + + type FlowRequest struct { + Input string `json:"input"` + } + + type FlowResponse struct { + Output string `json:"output"` + } + + func callFlow(input string) (*FlowResponse, error) { + reqBody := FlowRequest{Input: input} + jsonData, err := json.Marshal(reqBody) + if err != nil { + return nil, err + } + + resp, err := http.Post( + "http://localhost:8080/simpleFlow", + "application/json", + bytes.NewBuffer(jsonData), + ) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + var result FlowResponse + err = json.NewDecoder(resp.Body).Decode(&result) + if err != nil { + return nil, err + } + + return &result, nil + } + + func main() { + result, err := callFlow("Tell me a joke about programming") + if err != nil { + fmt.Printf("Error: %v\n", err) + return + } + fmt.Printf("Response: %s\n", result.Output) + } + ``` + + + Access your Python Flask endpoints from client applications: + + ```python + import requests + import asyncio + import aiohttp + + # Synchronous client + def call_flow(input_text): + response = requests.post( + 'http://localhost:8080/simpleFlow', + json={'input': input_text} + ) + response.raise_for_status() + return response.json() + + # Asynchronous client + async def call_flow_async(input_text): + async with aiohttp.ClientSession() as session: + async with session.post( + 'http://localhost:8080/simpleFlow', + json={'input': input_text} + ) as response: + response.raise_for_status() + return await response.json() + + # Usage + result = call_flow("Tell me a joke about programming") + print(result['output']) + + # Async usage + async def main(): + result = await call_flow_async("Write a story about AI") + print(result['output']) + + asyncio.run(main()) + ``` + +
+ +## Authentication and Security + + + + Implement authentication for your Express endpoints: + + ### API Key Authentication + + ```ts + import { apiKey } from 'genkit/context'; + import { startFlowServer, withContextProvider } from '@genkit-ai/express'; + + const securedFlow = ai.defineFlow( + { + name: 'securedFlow', + inputSchema: z.object({ sensitiveData: z.string() }), + outputSchema: z.object({ output: z.string() }), + }, + async ({ sensitiveData }, { context }) => { + // Flow is automatically secured by API key check + return { output: 'This is protected content' }; + } + ); + + // Secure with API key + startFlowServer({ + flows: [withContextProvider(securedFlow, apiKey(process.env.MY_API_KEY))], + port: 8080, + }); + ``` + + ### Custom Authentication + + ```ts + import { ContextProvider, RequestData, UserFacingError } from 'genkit/context'; + + interface AuthContext { + auth?: { + user: string; + role: string; + }; + } + + const customAuth: ContextProvider = async (req: RequestData) => { + const token = req.headers['authorization']?.replace('Bearer ', ''); + + if (!token) { + throw new UserFacingError('UNAUTHENTICATED', 'Missing authorization token'); + } + + // Verify token (implement your own logic) + const user = await verifyJWT(token); + + return { + auth: { + user: user.id, + role: user.role, + }, + }; + }; + + const protectedFlow = ai.defineFlow( + { + name: 'protectedFlow', + inputSchema: z.object({ input: z.string() }), + outputSchema: z.object({ output: z.string() }), + }, + async ({ input }, { context }) => { + if (!context.auth || context.auth.role !== 'admin') { + throw new Error('Admin access required'); + } + + return { output: `Hello ${context.auth.user}, you said: ${input}` }; + } + ); + + startFlowServer({ + flows: [withContextProvider(protectedFlow, customAuth)], + }); + ``` + + ### Express Middleware Integration + + ```ts + import express from 'express'; + import jwt from 'jsonwebtoken'; + + const app = express(); + app.use(express.json()); + + // Custom auth middleware + const authMiddleware = (req: express.Request, res: express.Response, next: express.NextFunction) => { + const token = req.headers.authorization?.replace('Bearer ', ''); + + if (!token) { + return res.status(401).json({ error: 'Missing token' }); + } + + try { + const decoded = jwt.verify(token, process.env.JWT_SECRET!); + req.user = decoded; + next(); + } catch (error) { + return res.status(401).json({ error: 'Invalid token' }); + } + }; + + // Protected endpoint + app.post('/protectedFlow', authMiddleware, expressHandler(protectedFlow)); + ``` + + + Implement authentication in Go applications: + + ```go + import ( + "net/http" + "strings" + "github.com/gin-gonic/gin" + "github.com/golang-jwt/jwt/v4" + ) + + func authMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + authHeader := c.GetHeader("Authorization") + if authHeader == "" { + c.JSON(401, gin.H{"error": "Missing authorization header"}) + c.Abort() + return + } + + tokenString := strings.Replace(authHeader, "Bearer ", "", 1) + + token, err := jwt.Parse(tokenString, func(token *jwt.Token) (interface{}, error) { + return []byte(os.Getenv("JWT_SECRET")), nil + }) + + if err != nil || !token.Valid { + c.JSON(401, gin.H{"error": "Invalid token"}) + c.Abort() + return + } + + if claims, ok := token.Claims.(jwt.MapClaims); ok { + c.Set("user", claims) + } + + c.Next() + } + } + + func main() { + r := gin.Default() + + // Protected route + r.POST("/protectedFlow", authMiddleware(), func(c *gin.Context) { + user, _ := c.Get("user") + // Handle protected flow logic + c.JSON(200, gin.H{"message": "Protected content", "user": user}) + }) + + r.Run(":8080") + } + ``` + + + Implement authentication in Flask applications: + + ```python + from flask import Flask, request, jsonify + from functools import wraps + import jwt + import os + + app = Flask(__name__) + + def auth_required(f): + @wraps(f) + def decorated_function(*args, **kwargs): + token = request.headers.get('Authorization') + if not token: + return jsonify({'error': 'Missing authorization header'}), 401 + + try: + token = token.replace('Bearer ', '') + payload = jwt.decode(token, os.getenv('JWT_SECRET'), algorithms=['HS256']) + request.user = payload + except jwt.InvalidTokenError: + return jsonify({'error': 'Invalid token'}), 401 + + return f(*args, **kwargs) + return decorated_function + + @app.route('/protectedFlow', methods=['POST']) + @auth_required + async def protected_flow(): + user = request.user + input_data = request.get_json() + + # Handle protected flow logic + result = await handle_protected_flow(input_data, user) + return jsonify(result) + ``` + + + +## Advanced Features + +### Multiple Flows and Server Configuration + + + + Use `startFlowServer` for multiple flows with advanced configuration: + + ```ts + import { startFlowServer } from '@genkit-ai/express'; + + const chatFlow = ai.defineFlow( + { + name: 'chatFlow', + inputSchema: z.object({ message: z.string() }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ message }) => { + const { text } = await ai.generate({ + prompt: `Respond to this message: ${message}`, + }); + return { response: text }; + } + ); + + const summaryFlow = ai.defineFlow( + { + name: 'summaryFlow', + inputSchema: z.object({ text: z.string() }), + outputSchema: z.object({ summary: z.string() }), + }, + async ({ text }) => { + const { text: summary } = await ai.generate({ + prompt: `Summarize this text: ${text}`, + }); + return { summary }; + } + ); + + startFlowServer({ + flows: [chatFlow, summaryFlow], + port: 4567, + cors: { + origin: ['http://localhost:3000', 'https://myapp.com'], + credentials: true, + }, + pathPrefix: '/api/v1', + jsonParserOptions: { + limit: '10mb', + }, + }); + ``` + + + Configure multiple endpoints with Gin: + + ```go + func setupRoutes() *gin.Engine { + r := gin.Default() + + // CORS middleware + r.Use(func(c *gin.Context) { + c.Header("Access-Control-Allow-Origin", "*") + c.Header("Access-Control-Allow-Methods", "POST, GET, OPTIONS") + c.Header("Access-Control-Allow-Headers", "Content-Type, Authorization") + + if c.Request.Method == "OPTIONS" { + c.AbortWithStatus(204) + return + } + + c.Next() + }) + + // API group + api := r.Group("/api/v1") + { + api.POST("/chat", handleChatFlow) + api.POST("/summary", handleSummaryFlow) + } + + return r + } + + func main() { + r := setupRoutes() + r.Run(":4567") + } + ``` + + + Configure multiple endpoints with Flask: + + ```python + from flask import Flask + from flask_cors import CORS + + app = Flask(__name__) + CORS(app, origins=['http://localhost:3000', 'https://myapp.com']) + + @app.route('/api/v1/chat', methods=['POST']) + async def chat_flow(): + input_data = request.get_json() + result = await handle_chat_flow(input_data) + return jsonify(result) + + @app.route('/api/v1/summary', methods=['POST']) + async def summary_flow(): + input_data = request.get_json() + result = await handle_summary_flow(input_data) + return jsonify(result) + + if __name__ == '__main__': + app.run(host='0.0.0.0', port=4567) + ``` + + + +### Error Handling and Validation + + + + Implement comprehensive error handling: + + ```ts + import { UserFacingError } from 'genkit'; + + const robustFlow = ai.defineFlow( + { + name: 'robustFlow', + inputSchema: z.object({ + text: z.string().min(1).max(1000), + options: z.object({ + temperature: z.number().min(0).max(2).optional(), + }).optional(), + }), + outputSchema: z.object({ result: z.string() }), + }, + async ({ text, options }, { context }) => { + try { + const { text: result } = await ai.generate({ + prompt: text, + config: { + temperature: options?.temperature ?? 0.7, + }, + }); + + return { result }; + } catch (error) { + if (error.message.includes('rate limit')) { + throw new UserFacingError('RESOURCE_EXHAUSTED', 'Rate limit exceeded. Please try again later.'); + } + + throw new UserFacingError('INTERNAL', 'An unexpected error occurred.'); + } + } + ); + + // Custom error handling middleware + app.use((error: any, req: express.Request, res: express.Response, next: express.NextFunction) => { + if (error instanceof UserFacingError) { + return res.status(400).json({ + error: error.message, + code: error.code, + }); + } + + console.error('Unexpected error:', error); + res.status(500).json({ + error: 'Internal server error', + }); + }); + ``` + + + Implement error handling in Go: + + ```go + type ErrorResponse struct { + Error string `json:"error"` + Code string `json:"code,omitempty"` + } + + func handleFlowWithValidation(c *gin.Context) { + var input struct { + Text string `json:"text" binding:"required,min=1,max=1000"` + Options struct { + Temperature *float64 `json:"temperature,omitempty"` + } `json:"options,omitempty"` + } + + if err := c.ShouldBindJSON(&input); err != nil { + c.JSON(400, ErrorResponse{ + Error: "Invalid input: " + err.Error(), + Code: "INVALID_ARGUMENT", + }) + return + } + + result, err := processFlow(input.Text, input.Options.Temperature) + if err != nil { + if strings.Contains(err.Error(), "rate limit") { + c.JSON(429, ErrorResponse{ + Error: "Rate limit exceeded. Please try again later.", + Code: "RESOURCE_EXHAUSTED", + }) + return + } + + c.JSON(500, ErrorResponse{ + Error: "Internal server error", + Code: "INTERNAL", + }) + return + } + + c.JSON(200, gin.H{"result": result}) + } + ``` + + + Implement error handling in Flask: + + ```python + from flask import Flask, request, jsonify + from marshmallow import Schema, fields, ValidationError + + class FlowInputSchema(Schema): + text = fields.Str(required=True, validate=lambda x: 1 <= len(x) <= 1000) + options = fields.Dict(missing={}) + + schema = FlowInputSchema() + + @app.route('/robustFlow', methods=['POST']) + async def robust_flow(): + try: + # Validate input + input_data = schema.load(request.get_json()) + + # Process flow + result = await process_flow( + input_data['text'], + input_data['options'].get('temperature', 0.7) + ) + + return jsonify({'result': result}) + + except ValidationError as err: + return jsonify({ + 'error': 'Invalid input', + 'details': err.messages, + 'code': 'INVALID_ARGUMENT' + }), 400 + + except Exception as err: + if 'rate limit' in str(err): + return jsonify({ + 'error': 'Rate limit exceeded. Please try again later.', + 'code': 'RESOURCE_EXHAUSTED' + }), 429 + + return jsonify({ + 'error': 'Internal server error', + 'code': 'INTERNAL' + }), 500 + ``` + + + +## Deployment Considerations + +### Production Configuration + + + + Production-ready Express setup: + + ```ts + import helmet from 'helmet'; + import compression from 'compression'; + import rateLimit from 'express-rate-limit'; + + const app = express(); + + // Security middleware + app.use(helmet()); + app.use(compression()); + + // Rate limiting + const limiter = rateLimit({ + windowMs: 15 * 60 * 1000, // 15 minutes + max: 100, // limit each IP to 100 requests per windowMs + message: 'Too many requests from this IP', + }); + app.use('/api/', limiter); + + // Body parsing with limits + app.use(express.json({ limit: '10mb' })); + + // Health check endpoint + app.get('/health', (req, res) => { + res.json({ status: 'healthy', timestamp: new Date().toISOString() }); + }); + + // Start server + const port = process.env.PORT || 8080; + app.listen(port, () => { + console.log(`Server running on port ${port}`); + }); + ``` + + + Production-ready Go setup: + + ```go + import ( + "time" + "github.com/gin-contrib/cors" + "github.com/gin-contrib/gzip" + "golang.org/x/time/rate" + ) + + func setupProductionServer() *gin.Engine { + gin.SetMode(gin.ReleaseMode) + r := gin.New() + + // Middleware + r.Use(gin.Logger()) + r.Use(gin.Recovery()) + r.Use(gzip.Gzip(gzip.DefaultCompression)) + + // CORS + r.Use(cors.New(cors.Config{ + AllowOrigins: []string{"https://myapp.com"}, + AllowMethods: []string{"POST", "GET"}, + AllowHeaders: []string{"Content-Type", "Authorization"}, + ExposeHeaders: []string{"Content-Length"}, + AllowCredentials: true, + MaxAge: 12 * time.Hour, + })) + + // Rate limiting middleware + limiter := rate.NewLimiter(rate.Every(time.Minute), 60) + r.Use(func(c *gin.Context) { + if !limiter.Allow() { + c.JSON(429, gin.H{"error": "Rate limit exceeded"}) + c.Abort() + return + } + c.Next() + }) + + // Health check + r.GET("/health", func(c *gin.Context) { + c.JSON(200, gin.H{ + "status": "healthy", + "timestamp": time.Now().Format(time.RFC3339), + }) + }) + + return r + } + ``` + + + Production-ready Flask setup: + + ```python + from flask import Flask + from flask_cors import CORS + from flask_limiter import Limiter + from flask_limiter.util import get_remote_address + import os + + app = Flask(__name__) + + # CORS + CORS(app, origins=['https://myapp.com']) + + # Rate limiting + limiter = Limiter( + app, + key_func=get_remote_address, + default_limits=["100 per hour"] + ) + + @app.route('/health') + def health_check(): + return jsonify({ + 'status': 'healthy', + 'timestamp': datetime.utcnow().isoformat() + }) + + @app.errorhandler(429) + def ratelimit_handler(e): + return jsonify({'error': 'Rate limit exceeded'}), 429 + + if __name__ == '__main__': + port = int(os.environ.get('PORT', 8080)) + app.run(host='0.0.0.0', port=port, debug=False) + ``` + + + +## Next Steps + +- Learn about [creating flows](/unified-docs/creating-flows) to build more complex AI workflows +- Explore [authorization patterns](/unified-docs/deployment/authorization) for securing your applications +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other framework integrations: + - [Next.js](/unified-docs/frameworks/nextjs) for React applications diff --git a/src/content/docs/unified-docs/frameworks/nextjs.mdx b/src/content/docs/unified-docs/frameworks/nextjs.mdx new file mode 100644 index 00000000..fbd21837 --- /dev/null +++ b/src/content/docs/unified-docs/frameworks/nextjs.mdx @@ -0,0 +1,1142 @@ +--- +title: Next.js Integration +description: Learn how to integrate Genkit with Next.js applications across JavaScript, Go, and Python, including API routes, client-side calls, streaming, and deployment strategies. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; +import { Badge } from '@astrojs/starlight/components'; + + + +The Next.js integration provides a seamless way to build full-stack AI applications with Genkit, offering both server-side API routes and client-side streaming capabilities for modern React applications. + +:::note[Framework Availability] +Next.js integration is primarily available for JavaScript/Node.js. For other languages, see equivalent frameworks: +- **Go**: Gin with React frontend +- **Python**: FastAPI with React frontend +::: + +## Installation and Setup + + + + ### Create a Next.js Project + + If you don't have an existing Next.js project: + + ```bash + npx create-next-app@latest my-genkit-app --src-dir --typescript + cd my-genkit-app + ``` + + ### Install Dependencies + + ```bash + # Core Genkit and Next.js plugin + npm install genkit @genkit-ai/next + + # Choose your AI provider + npm install @genkit-ai/googleai + # or npm install @genkit-ai/vertexai + # or npm install @genkit-ai/compat-oai + + # Development tools (optional) + npm install -g genkit-cli + npm install --save-dev tsx + ``` + + ### Project Structure + + ``` + my-genkit-app/ + ├── src/ + │ ├── app/ + │ │ ├── api/ + │ │ │ └── flows/ + │ │ │ └── route.ts + │ │ └── page.tsx + │ └── genkit/ + │ └── flows.ts + ├── package.json + └── next.config.js + ``` + + + For Go applications, create a separate backend API and React frontend: + + ### Backend Setup (Go) + + ```bash + mkdir my-genkit-app + cd my-genkit-app + mkdir backend frontend + ``` + + ```go + // backend/main.go + package main + + import ( + "context" + "github.com/gin-gonic/gin" + "github.com/gin-contrib/cors" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + ) + if err != nil { + log.Fatal(err) + } + + r := gin.Default() + + // CORS for React frontend + r.Use(cors.New(cors.Config{ + AllowOrigins: []string{"http://localhost:3000"}, + AllowMethods: []string{"POST", "GET", "OPTIONS"}, + AllowHeaders: []string{"Content-Type", "Authorization"}, + AllowCredentials: true, + })) + + // API routes + r.POST("/api/flows/menuSuggestion", handleMenuSuggestion) + + r.Run(":8080") + } + ``` + + ### Frontend Setup (React) + + ```bash + cd frontend + npx create-next-app@latest . --typescript + npm install + ``` + + + For Python applications, create a FastAPI backend with React frontend: + + ### Backend Setup (Python) + + ```bash + mkdir my-genkit-app + cd my-genkit-app + mkdir backend frontend + + cd backend + pip install fastapi uvicorn genkit-plugin-google-genai + ``` + + ```python + # backend/main.py + from fastapi import FastAPI + from fastapi.middleware.cors import CORSMiddleware + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai + + ai = Genkit(plugins=[GoogleGenai()]) + + app = FastAPI() + + # CORS for React frontend + app.add_middleware( + CORSMiddleware, + allow_origins=["http://localhost:3000"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + + @app.post("/api/flows/menuSuggestion") + async def menu_suggestion(request: dict): + # Handle flow logic + pass + + if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8080) + ``` + + ### Frontend Setup (React) + + ```bash + cd ../frontend + npx create-next-app@latest . --typescript + npm install + ``` + + + +## Define Genkit Flows + + + + Create your Genkit flows in `src/genkit/flows.ts`: + + ```ts + import { googleAI } from '@genkit-ai/googleai'; + import { genkit, z } from 'genkit'; + + const ai = genkit({ + plugins: [googleAI()], + }); + + export const menuSuggestionFlow = ai.defineFlow( + { + name: 'menuSuggestionFlow', + inputSchema: z.object({ theme: z.string() }), + outputSchema: z.object({ menuItem: z.string() }), + streamSchema: z.string(), + }, + async ({ theme }, { sendChunk }) => { + const { stream, response } = ai.generateStream({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Invent a menu item for a ${theme} themed restaurant.`, + }); + + for await (const chunk of stream) { + sendChunk(chunk.text); + } + + const { text } = await response; + return { menuItem: text }; + } + ); + + export const chatFlow = ai.defineFlow( + { + name: 'chatFlow', + inputSchema: z.object({ + message: z.string(), + history: z.array(z.object({ + role: z.enum(['user', 'assistant']), + content: z.string(), + })).optional(), + }), + outputSchema: z.object({ response: z.string() }), + streamSchema: z.string(), + }, + async ({ message, history = [] }, { sendChunk }) => { + const conversationContext = history + .map(msg => `${msg.role}: ${msg.content}`) + .join('\n'); + + const { stream, response } = ai.generateStream({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `${conversationContext}\nuser: ${message}\nassistant:`, + }); + + for await (const chunk of stream) { + sendChunk(chunk.text); + } + + const { text } = await response; + return { response: text }; + } + ); + ``` + + + Define flows in your Go backend: + + ```go + // backend/flows.go + package main + + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/ai" + ) + + type MenuSuggestionInput struct { + Theme string `json:"theme"` + } + + type MenuSuggestionOutput struct { + MenuItem string `json:"menuItem"` + } + + func handleMenuSuggestion(c *gin.Context) { + var input MenuSuggestionInput + if err := c.ShouldBindJSON(&input); err != nil { + c.JSON(400, gin.H{"error": err.Error()}) + return + } + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(fmt.Sprintf("Invent a menu item for a %s themed restaurant.", input.Theme)), + ) + if err != nil { + c.JSON(500, gin.H{"error": err.Error()}) + return + } + + c.JSON(200, MenuSuggestionOutput{ + MenuItem: resp.Text(), + }) + } + + type ChatInput struct { + Message string `json:"message"` + History []struct { + Role string `json:"role"` + Content string `json:"content"` + } `json:"history"` + } + + type ChatOutput struct { + Response string `json:"response"` + } + + func handleChat(c *gin.Context) { + var input ChatInput + if err := c.ShouldBindJSON(&input); err != nil { + c.JSON(400, gin.H{"error": err.Error()}) + return + } + + // Build conversation context + var context strings.Builder + for _, msg := range input.History { + context.WriteString(fmt.Sprintf("%s: %s\n", msg.Role, msg.Content)) + } + context.WriteString(fmt.Sprintf("user: %s\nassistant:", input.Message)) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(context.String()), + ) + if err != nil { + c.JSON(500, gin.H{"error": err.Error()}) + return + } + + c.JSON(200, ChatOutput{ + Response: resp.Text(), + }) + } + ``` + + + Define flows in your Python backend: + + ```python + # backend/flows.py + from pydantic import BaseModel + from typing import List, Optional + + class MenuSuggestionInput(BaseModel): + theme: str + + class MenuSuggestionOutput(BaseModel): + menuItem: str + + class ChatMessage(BaseModel): + role: str + content: str + + class ChatInput(BaseModel): + message: str + history: Optional[List[ChatMessage]] = [] + + class ChatOutput(BaseModel): + response: str + + @ai.define_flow( + input_schema=MenuSuggestionInput, + output_schema=MenuSuggestionOutput + ) + async def menu_suggestion_flow(input_data: MenuSuggestionInput): + response = await ai.generate( + f"Invent a menu item for a {input_data.theme} themed restaurant." + ) + return MenuSuggestionOutput(menuItem=response.text) + + @ai.define_flow( + input_schema=ChatInput, + output_schema=ChatOutput + ) + async def chat_flow(input_data: ChatInput): + # Build conversation context + context = "\n".join([ + f"{msg.role}: {msg.content}" + for msg in input_data.history + ]) + context += f"\nuser: {input_data.message}\nassistant:" + + response = await ai.generate(context) + return ChatOutput(response=response.text) + ``` + + + +## Create API Routes + + + + Create API routes using the Genkit Next.js plugin: + + ### Individual Route Files + + Create `src/app/api/menuSuggestion/route.ts`: + + ```ts + import { menuSuggestionFlow } from '@/genkit/flows'; + import { appRoute } from '@genkit-ai/next'; + + export const POST = appRoute(menuSuggestionFlow); + ``` + + Create `src/app/api/chat/route.ts`: + + ```ts + import { chatFlow } from '@/genkit/flows'; + import { appRoute } from '@genkit-ai/next'; + + export const POST = appRoute(chatFlow); + ``` + + ### Unified Route Handler + + Alternatively, create `src/app/api/flows/[flowName]/route.ts`: + + ```ts + import { menuSuggestionFlow, chatFlow } from '@/genkit/flows'; + import { appRoute } from '@genkit-ai/next'; + + const flows = { + menuSuggestion: menuSuggestionFlow, + chat: chatFlow, + }; + + export async function POST( + request: Request, + { params }: { params: { flowName: string } } + ) { + const flow = flows[params.flowName as keyof typeof flows]; + + if (!flow) { + return new Response('Flow not found', { status: 404 }); + } + + return appRoute(flow)(request); + } + ``` + + + Set up API routes in your Gin router: + + ```go + func setupRoutes(g *genkit.Genkit) *gin.Engine { + r := gin.Default() + + // CORS middleware + r.Use(cors.New(cors.Config{ + AllowOrigins: []string{"http://localhost:3000"}, + AllowMethods: []string{"POST", "GET", "OPTIONS"}, + AllowHeaders: []string{"Content-Type", "Authorization"}, + AllowCredentials: true, + })) + + // API routes + api := r.Group("/api") + { + api.POST("/menuSuggestion", handleMenuSuggestion) + api.POST("/chat", handleChat) + } + + // Health check + r.GET("/health", func(c *gin.Context) { + c.JSON(200, gin.H{"status": "healthy"}) + }) + + return r + } + ``` + + + Set up API routes in FastAPI: + + ```python + # backend/main.py + @app.post("/api/menuSuggestion") + async def menu_suggestion_endpoint(input_data: MenuSuggestionInput): + result = await menu_suggestion_flow(input_data) + return result + + @app.post("/api/chat") + async def chat_endpoint(input_data: ChatInput): + result = await chat_flow(input_data) + return result + + @app.get("/health") + async def health_check(): + return {"status": "healthy"} + ``` + + + +## Frontend Implementation + + + + Create your React components with Genkit integration: + + ### Basic Usage + + ```tsx + // src/app/page.tsx + 'use client'; + + import { useState } from 'react'; + import { runFlow, streamFlow } from '@genkit-ai/next/client'; + import { menuSuggestionFlow } from '@/genkit/flows'; + + export default function Home() { + const [menuItem, setMenuItem] = useState(''); + const [isLoading, setIsLoading] = useState(false); + const [streamedText, setStreamedText] = useState(''); + + async function getMenuItem(formData: FormData) { + const theme = formData.get('theme')?.toString() ?? ''; + setIsLoading(true); + + try { + const result = await runFlow({ + url: '/api/menuSuggestion', + input: { theme }, + }); + + setMenuItem(result.menuItem); + } catch (error) { + console.error('Error generating menu item:', error); + } finally { + setIsLoading(false); + } + } + + async function streamMenuItem(formData: FormData) { + const theme = formData.get('theme')?.toString() ?? ''; + setIsLoading(true); + setStreamedText(''); + + try { + const result = streamFlow({ + url: '/api/menuSuggestion', + input: { theme }, + }); + + for await (const chunk of result.stream) { + setStreamedText((prev) => prev + chunk); + } + + const finalOutput = await result.output; + setMenuItem(finalOutput.menuItem); + } catch (error) { + console.error('Error streaming menu item:', error); + } finally { + setIsLoading(false); + } + } + + return ( +
+

AI Menu Generator

+ +
+
+ + +
+ +
+ + +
+
+ + {streamedText && ( +
+

Streaming Output:

+
{streamedText}
+
+ )} + + {menuItem && ( +
+

Final Output:

+
{menuItem}
+
+ )} +
+ ); + } + ``` + + ### Chat Interface + + ```tsx + // src/components/ChatInterface.tsx + 'use client'; + + import { useState } from 'react'; + import { streamFlow } from '@genkit-ai/next/client'; + import { chatFlow } from '@/genkit/flows'; + + interface Message { + role: 'user' | 'assistant'; + content: string; + } + + export default function ChatInterface() { + const [messages, setMessages] = useState([]); + const [input, setInput] = useState(''); + const [isLoading, setIsLoading] = useState(false); + + async function sendMessage() { + if (!input.trim()) return; + + const userMessage: Message = { role: 'user', content: input }; + setMessages(prev => [...prev, userMessage]); + setInput(''); + setIsLoading(true); + + try { + const result = streamFlow({ + url: '/api/chat', + input: { + message: input, + history: messages, + }, + }); + + let assistantMessage = ''; + setMessages(prev => [...prev, { role: 'assistant', content: '' }]); + + for await (const chunk of result.stream) { + assistantMessage += chunk; + setMessages(prev => [ + ...prev.slice(0, -1), + { role: 'assistant', content: assistantMessage } + ]); + } + + const finalOutput = await result.output; + setMessages(prev => [ + ...prev.slice(0, -1), + { role: 'assistant', content: finalOutput.response } + ]); + } catch (error) { + console.error('Error sending message:', error); + } finally { + setIsLoading(false); + } + } + + return ( +
+
+ {messages.map((message, index) => ( +
+
+ {message.content} +
+
+ ))} +
+ +
+ setInput(e.target.value)} + onKeyPress={(e) => e.key === 'Enter' && sendMessage()} + className="flex-1 border rounded-lg px-3 py-2" + placeholder="Type your message..." + disabled={isLoading} + /> + +
+
+ ); + } + ``` +
+ + Create React components that call your Go backend: + + ```tsx + // frontend/src/app/page.tsx + 'use client'; + + import { useState } from 'react'; + + interface MenuSuggestionResponse { + menuItem: string; + } + + export default function Home() { + const [menuItem, setMenuItem] = useState(''); + const [isLoading, setIsLoading] = useState(false); + + async function getMenuItem(formData: FormData) { + const theme = formData.get('theme')?.toString() ?? ''; + setIsLoading(true); + + try { + const response = await fetch('http://localhost:8080/api/menuSuggestion', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ theme }), + }); + + if (!response.ok) { + throw new Error('Failed to generate menu item'); + } + + const result: MenuSuggestionResponse = await response.json(); + setMenuItem(result.menuItem); + } catch (error) { + console.error('Error generating menu item:', error); + } finally { + setIsLoading(false); + } + } + + return ( +
+

AI Menu Generator

+ +
+
+ + +
+ + +
+ + {menuItem && ( +
+

Generated Menu Item:

+
{menuItem}
+
+ )} +
+ ); + } + ``` +
+ + Create React components that call your Python backend: + + ```tsx + // frontend/src/app/page.tsx + 'use client'; + + import { useState } from 'react'; + + interface MenuSuggestionResponse { + menuItem: string; + } + + export default function Home() { + const [menuItem, setMenuItem] = useState(''); + const [isLoading, setIsLoading] = useState(false); + + async function getMenuItem(formData: FormData) { + const theme = formData.get('theme')?.toString() ?? ''; + setIsLoading(true); + + try { + const response = await fetch('http://localhost:8080/api/menuSuggestion', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ theme }), + }); + + if (!response.ok) { + throw new Error('Failed to generate menu item'); + } + + const result: MenuSuggestionResponse = await response.json(); + setMenuItem(result.menuItem); + } catch (error) { + console.error('Error generating menu item:', error); + } finally { + setIsLoading(false); + } + } + + return ( +
+

AI Menu Generator

+ +
+
+ + +
+ + +
+ + {menuItem && ( +
+

Generated Menu Item:

+
{menuItem}
+
+ )} +
+ ); + } + ``` +
+
+ +## Authentication and Security + + + + ### API Key Authentication + + ```tsx + // Client-side with headers + const result = await runFlow({ + url: '/api/menuSuggestion', + headers: { + Authorization: 'Bearer your-token-here', + }, + input: { theme }, + }); + ``` + + ### Next.js Middleware + + ```ts + // middleware.ts + import { NextRequest, NextResponse } from 'next/server'; + import { jwtVerify } from 'jose'; + + export async function middleware(request: NextRequest) { + if (request.nextUrl.pathname.startsWith('/api/')) { + const token = request.headers.get('authorization')?.replace('Bearer ', ''); + + if (!token) { + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); + } + + try { + await jwtVerify(token, new TextEncoder().encode(process.env.JWT_SECRET!)); + } catch (error) { + return NextResponse.json({ error: 'Invalid token' }, { status: 401 }); + } + } + + return NextResponse.next(); + } + + export const config = { + matcher: '/api/:path*', + }; + ``` + + ### Session-based Authentication + + ```ts + // src/app/api/auth/route.ts + import { NextRequest, NextResponse } from 'next/server'; + import { cookies } from 'next/headers'; + + export async function POST(request: NextRequest) { + const { username, password } = await request.json(); + + // Verify credentials + if (await verifyCredentials(username, password)) { + const sessionToken = generateSessionToken(); + + cookies().set('session', sessionToken, { + httpOnly: true, + secure: process.env.NODE_ENV === 'production', + sameSite: 'strict', + maxAge: 60 * 60 * 24 * 7, // 1 week + }); + + return NextResponse.json({ success: true }); + } + + return NextResponse.json({ error: 'Invalid credentials' }, { status: 401 }); + } + ``` + + + Implement JWT authentication in your Go backend: + + ```go + import ( + "github.com/golang-jwt/jwt/v4" + "github.com/gin-gonic/gin" + ) + + func authMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + authHeader := c.GetHeader("Authorization") + if authHeader == "" { + c.JSON(401, gin.H{"error": "Missing authorization header"}) + c.Abort() + return + } + + tokenString := strings.Replace(authHeader, "Bearer ", "", 1) + + token, err := jwt.Parse(tokenString, func(token *jwt.Token) (interface{}, error) { + return []byte(os.Getenv("JWT_SECRET")), nil + }) + + if err != nil || !token.Valid { + c.JSON(401, gin.H{"error": "Invalid token"}) + c.Abort() + return + } + + c.Next() + } + } + + // Apply to protected routes + api.POST("/menuSuggestion", authMiddleware(), handleMenuSuggestion) + ``` + + + Implement JWT authentication in your FastAPI backend: + + ```python + from fastapi import Depends, HTTPException, status + from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials + import jwt + + security = HTTPBearer() + + def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)): + try: + payload = jwt.decode( + credentials.credentials, + os.getenv("JWT_SECRET"), + algorithms=["HS256"] + ) + return payload + except jwt.InvalidTokenError: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid token" + ) + + @app.post("/api/menuSuggestion") + async def menu_suggestion_endpoint( + input_data: MenuSuggestionInput, + user=Depends(verify_token) + ): + result = await menu_suggestion_flow(input_data) + return result + ``` + + + +## Deployment Considerations + +### Environment Variables + + + + Configure environment variables for production: + + ```bash + # .env.local + GEMINI_API_KEY=your_gemini_api_key + JWT_SECRET=your_jwt_secret + NEXTAUTH_SECRET=your_nextauth_secret + NEXTAUTH_URL=https://your-domain.com + ``` + + ### Vercel Deployment + + ```bash + # Install Vercel CLI + npm install -g vercel + + # Deploy + vercel + + # Set environment variables + vercel env add GEMINI_API_KEY + vercel env add JWT_SECRET + ``` + + ### Docker Deployment + + ```dockerfile + # Dockerfile + FROM node:18-alpine + + WORKDIR /app + + COPY package*.json ./ + RUN npm ci --only=production + + COPY . . + RUN npm run build + + EXPOSE 3000 + + CMD ["npm", "start"] + ``` + + + Deploy your Go backend: + + ```dockerfile + # Dockerfile + FROM golang:1.21-alpine AS builder + + WORKDIR /app + COPY go.mod go.sum ./ + RUN go mod download + + COPY . . + RUN go build -o main . + + FROM alpine:latest + RUN apk --no-cache add ca-certificates + WORKDIR /root/ + + COPY --from=builder /app/main . + + EXPOSE 8080 + + CMD ["./main"] + ``` + + ### Cloud Run Deployment + + ```bash + # Build and deploy + gcloud builds submit --tag gcr.io/PROJECT_ID/genkit-backend + gcloud run deploy --image gcr.io/PROJECT_ID/genkit-backend --platform managed + ``` + + + Deploy your Python backend: + + ```dockerfile + # Dockerfile + FROM python:3.11-slim + + WORKDIR /app + + COPY requirements.txt . + RUN pip install --no-cache-dir -r requirements.txt + + COPY . . + + EXPOSE 8080 + + CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] + ``` + + ### Cloud Run Deployment + + ```bash + # Build and deploy + gcloud builds submit --tag gcr.io/PROJECT_ID/genkit-backend + gcloud run deploy --image gcr.io/PROJECT_ID/genkit-backend --platform managed + ``` + + + +## Next Steps + +- Learn about [creating flows](/unified-docs/creating-flows) to build more complex AI workflows +- Explore [authorization patterns](/unified-docs/deployment/authorization) for securing your applications +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other framework integrations: + - [Express.js](/unified-docs/frameworks/express) for API-first applications diff --git a/src/content/docs/unified-docs/generating-content.mdx b/src/content/docs/unified-docs/generating-content.mdx new file mode 100644 index 00000000..e967a600 --- /dev/null +++ b/src/content/docs/unified-docs/generating-content.mdx @@ -0,0 +1,1085 @@ +--- +title: Generating content with AI models +description: Learn how to generate content with AI models using Genkit's unified interface across JavaScript, Go, and Python, covering basic usage, configuration, structured output, streaming, and multimodal input/output. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; +import LLMSummary from '@/components/llm-summary.astro'; +import ExampleLink from '@/components/ExampleLink.astro'; + + +Genkit provides a unified interface to interact with various generative AI models (LLMs, image generation) across JavaScript, Go, and Python. + +**Core Function:** `ai.generate()` (JS), `genkit.Generate()` (Go), `ai.generate()` (Python) + +**Basic Usage:** + + + + ```typescript + import { googleAI } from '@genkit-ai/googleai'; + import { genkit } from 'genkit'; + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), // Default model + }); + + // Generate with default model + const response1 = await ai.generate('prompt text'); + console.log(response1.text); + + // Generate with specific model reference + const response2 = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: 'prompt text', + }); + console.log(response2.text); + + // Generate with model string ID + const response3 = await ai.generate({ + model: 'googleai/gemini-2.5-flash', + prompt: 'prompt text', + }); + console.log(response3.text); + ``` + + + ```go + import ( + "context" + "log" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatalf("could not initialize Genkit: %v", err) + } + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("prompt text"), + ) + if err != nil { + log.Fatalf("could not generate: %v", err) + } + log.Println(resp.Text()) + } + ``` + + + ```python + import asyncio + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai + + ai = Genkit( + plugins=[GoogleGenai()], + model='googleai/gemini-2.5-flash', + ) + + async def main() -> None: + result = await ai.generate( + prompt='prompt text', + ) + print(result.text) + + ai.run_main(main()) + ``` + + + +**Configuration:** + +- **System Prompt:** `system: "Instruction for the model"` +- **Model Parameters:** `config: { maxOutputTokens: 512, temperature: 1.0, topP: 0.95, topK: 40, stopSequences: ["\n"] }` + +**Key Concepts:** + +- **Flexibility:** Easily swap models (`model` parameter). +- **Schema validation:** For defining and validating structured output schemas. +- **Streaming:** For real-time output using `generateStream`. +- **Multimodality:** Handle text, image, video, audio inputs (model-dependent). +- **Media Generation:** Create images, etc. (model-dependent). + + + +At the heart of generative AI are AI _models_. Currently, the two most prominent +examples of generative models are large language models (LLMs) and image +generation models. These models take input, called a _prompt_ (most commonly +text, an image, or a combination of both), and from it produce as output text, +an image, or even audio or video. + +The output of these models can be surprisingly convincing: LLMs generate text +that appears as though it could have been written by a human being, and image +generation models can produce images that are very close to real photographs or +artwork created by humans. + +In addition, LLMs have proven capable of tasks beyond simple text generation: + +- Writing computer programs +- Planning subtasks that are required to complete a larger task +- Organizing unorganized data +- Understanding and extracting information data from a corpus of text +- Following and performing automated activities based on a text description of + the activity + +There are many models available to you, from several different providers. Each +model has its own strengths and weaknesses and one model might excel at one task +but perform less well at others. Apps making use of generative AI can often +benefit from using multiple different models depending on the task at hand. + +As an app developer, you typically don't interact with generative AI +models directly, but rather through services available as web APIs. +Although these services often have similar functionality, they all provide them +through different and incompatible APIs. If you want to make use of multiple +model services, you have to use each of their proprietary SDKs, potentially +incompatible with each other. And if you want to upgrade from one model to the +newest and most capable one, you might have to build that integration all over +again. + +Genkit addresses this challenge by providing a single interface that abstracts +away the details of accessing potentially any generative AI model service, with +several pre-built implementations already available. Building your AI-powered +app around Genkit simplifies the process of making your first generative AI call +and makes it equally easy to combine multiple models or swap one model for +another as new models emerge. + +### Before you begin + +If you want to run the code examples on this page, first complete the steps in +the Getting started guide for your language. All of the examples assume that you +have already installed Genkit as a dependency in your project. + + + + Complete the [Getting started](/docs/get-started) guide. + + + Complete the [Get started](/go/docs/get-started-go) guide. + + + Complete the [Get started](/python/docs/get-started) guide. + + + +### Models supported by Genkit + +Genkit is designed to be flexible enough to use potentially any generative AI +model service. Its core libraries define the common interface for working with +models, and model plugins define the implementation details for working with a +specific model and its API. + +The Genkit team maintains plugins for working with models provided by Vertex AI, +Google Generative AI, and Ollama: + +- Gemini family of LLMs, through the + [Google Cloud Vertex AI plugin](/docs/plugins/vertex-ai) and [Google AI plugin](/docs/plugins/google-genai) +- Imagen2 and Imagen3 image generation models, through Google Cloud Vertex AI +- Anthropic's Claude 3 family of LLMs, through Google Cloud Vertex AI's model + garden +- Gemma 2, Llama 3, and many more open models, through the [Ollama + plugin](/docs/plugins/ollama) (you must host the Ollama server yourself) +- GPT, Dall-E and Whisper family of models, through the [OpenAI plugin](/docs/plugins/openai) +- Grok family of models, through the [xAI plugin](/docs/plugins/xai) +- DeepSeek Chat and DeepSeek Reasoner models, through the [DeepSeek plugin](/docs/plugins/deepseek) + +In addition, there are also several community-supported plugins that provide +interfaces to these models: + +- Claude 3 family of LLMs, through the [Anthropic plugin](https://thefireco.github.io/genkit-plugins/docs/plugins/genkitx-anthropic) +- GPT family of LLMs through the [Azure OpenAI plugin](https://thefireco.github.io/genkit-plugins/docs/plugins/genkitx-azure-openai) +- Command R family of LLMs through the [Cohere plugin](https://thefireco.github.io/genkit-plugins/docs/plugins/genkitx-cohere) +- Mistral family of LLMs through the [Mistral plugin](https://thefireco.github.io/genkit-plugins/docs/plugins/genkitx-mistral) +- Gemma 2, Llama 3, and many more open models hosted on Groq, through the + [Groq plugin](https://thefireco.github.io/genkit-plugins/docs/plugins/genkitx-groq) + +You can discover more by searching for packages tagged with `genkit-model` on your language's package registry. + +### Loading and configuring model plugins + +Before you can use Genkit to start generating content, you need to load and +configure a model plugin. If you're coming from the Getting Started guide, +you've already done this. Otherwise, see the Getting Started guide or the individual plugin's documentation and follow the steps there before +continuing. + +### The generate() method + +In Genkit, the primary interface through which you interact with generative AI +models is the `generate()` method. + +The simplest `generate()` call specifies the model you want to use and a text +prompt: + + + + ```ts + import { googleAI } from '@genkit-ai/googleai'; + import { genkit } from 'genkit'; + + const ai = genkit({ + plugins: [googleAI()], + // Optional. Specify a default model. + model: googleAI.model('gemini-2.5-flash'), + }); + + async function run() { + const response = await ai.generate('Invent a menu item for a restaurant with a pirate theme.'); + console.log(response.text); + } + + run(); + ``` + + + ```go + package main + + import ( + "context" + "log" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatalf("could not initialize Genkit: %v", err) + } + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Invent a menu item for a pirate themed restaurant."), + ) + if err != nil { + log.Fatalf("could not generate model response: %v", err) + } + + log.Println(resp.Text()) + } + ``` + + + ```python + import asyncio + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai + + ai = Genkit( + plugins=[GoogleGenai()], + model='googleai/gemini-2.5-flash', + ) + + async def main() -> None: + result = await ai.generate( + prompt='Invent a menu item for a pirate themed restaurant.', + ) + print(result.text) + + ai.run_main(main()) + ``` + + + +When you run this brief example, it will print out some debugging information +followed by the output of the `generate()` call, which will usually be Markdown +text as in the following example: + +```md +## The Blackheart's Bounty + +**A hearty stew of slow-cooked beef, spiced with rum and molasses, served in a +hollowed-out cannonball with a side of crusty bread and a dollop of tangy +pineapple salsa.** + +**Description:** This dish is a tribute to the hearty meals enjoyed by pirates +on the high seas. The beef is tender and flavorful, infused with the warm spices +of rum and molasses. The pineapple salsa adds a touch of sweetness and acidity, +balancing the richness of the stew. The cannonball serving vessel adds a fun and +thematic touch, making this dish a perfect choice for any pirate-themed +adventure. +``` + +Run the script again and you'll get a different output. + +The preceding code sample sent the generation request to the default model, +which you specified when you configured the Genkit instance. + +You can also specify a model for a single `generate()` call: + + + + ```ts + import { googleAI } from '@genkit-ai/googleai'; + + const response = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: 'Invent a menu item for a restaurant with a pirate theme.', + }); + ``` + + This example uses a model reference function provided by the model plugin. You can also specify the model using a string identifier: + + ```ts + const response = await ai.generate({ + model: 'googleai/gemini-2.5-flash-001', + prompt: 'Invent a menu item for a restaurant with a pirate theme.', + }); + ``` + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithModelName("googleai/gemini-2.5-pro"), + ai.WithPrompt("Invent a menu item for a pirate themed restaurant."), + ) + ``` + + + ```python + result = await ai.generate( + prompt='Invent a menu item for a pirate themed restaurant.', + model='googleai/gemini-2.0-pro', + ) + ``` + + + +A model string identifier looks like `providerid/modelid`, where the provider ID +(in this case, `googleai`) identifies the plugin, and the model ID is a +plugin-specific string identifier for a specific version of a model. + +These examples also illustrate an important point: when you use +`generate()` to make generative AI model calls, changing the model you want to +use is simply a matter of passing a different value to the model parameter. By +using `generate()` instead of the native model SDKs, you give yourself the +flexibility to more easily use several different models in your app and change +models in the future. + +So far you have only seen examples of the simplest `generate()` calls. However, +`generate()` also provides an interface for more advanced interactions with +generative models, which you will see in the sections that follow. + +### System prompts + +Some models support providing a _system prompt_, which gives the model +instructions as to how you want it to respond to messages from the user. You can +use the system prompt to specify a persona you want the model to adopt, the tone +of its responses, the format of its responses, and so on. + +If the model you're using supports system prompts, you can provide one: + + + + ```ts + const response = await ai.generate({ + prompt: 'What is your quest?', + system: "You are a knight from Monty Python's Flying Circus.", + }); + ``` + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithSystem("You are a food industry marketing consultant."), + ai.WithPrompt("Invent a menu item for a pirate themed restaurant."), + ) + ``` + + For models that don't support system prompts, `ai.WithSystem()` simulates it by + modifying the request to appear _like_ a system prompt. + + + ```python + result = await ai.generate( + system='You are a food industry marketing consultant.', + prompt='Invent a menu item for a pirate themed restaurant.', + ) + ``` + + + +### Multi-turn conversations with messages + +For multi-turn conversations, you can use the `messages` parameter instead of `prompt` to provide a conversation history. This is particularly useful when you need to maintain context across multiple interactions with the model. + + + + The `messages` parameter accepts an array of message objects, where each message has a `role` (one of `'system'`, `'user'`, `'model'`, or `'tool'`) and `content`: + + ```ts + const response = await ai.generate({ + messages: [ + { role: 'user', content: 'Hello, can you help me plan a trip?' }, + { role: 'model', content: 'Of course! I\'d be happy to help you plan a trip. Where are you thinking of going?' }, + { role: 'user', content: 'I want to visit Japan for two weeks in spring.' } + ], + }); + ``` + + You can also combine `messages` with other parameters like `system` prompts: + + ```ts + const response = await ai.generate({ + system: 'You are a helpful travel assistant.', + messages: [ + { role: 'user', content: 'What should I pack for Japan in spring?' } + ], + }); + ``` + + **When to use `messages` vs. Chat API:** + + - Use the `messages` parameter for simple multi-turn conversations where you manually manage the conversation history + - For persistent chat sessions with automatic history management, use the [Chat API](/docs/chat) instead + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithMessages( + NewUserMessage( + NewTextPart("Hello, can you help me plan a trip?"), + ), + NewModelMessage( + NewTextPart("Of course! I'd be happy to help you plan a trip. Where are you thinking of going?"), + ), + NewUserMessage( + NewTextPart("I want to visit Japan for two weeks in spring."), + ), + ), + ) + ``` + + + ```python + # Multi-turn conversation support varies by Python implementation + # Check the specific plugin documentation for message handling + result = await ai.generate( + prompt='Continue our conversation about trip planning to Japan.', + ) + ``` + + + +### Model parameters + +The `generate()` function takes a `config` parameter, through which you can +specify optional settings that control how the model generates content: + + + + ```ts + const response = await ai.generate({ + prompt: 'Invent a menu item for a restaurant with a pirate theme.', + config: { + maxOutputTokens: 512, + stopSequences: ['\n'], + temperature: 1.0, + topP: 0.95, + topK: 40, + }, + }); + ``` + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithPrompt("Invent a menu item for a pirate themed restaurant."), + ai.WithConfig(&googlegenai.GeminiConfig{ + MaxOutputTokens: 500, + StopSequences: ["", ""], + Temperature: 0.5, + TopP: 0.4, + TopK: 50, + }), + ) + ``` + + + ```python + result = await ai.generate( + prompt='Invent a menu item for a pirate themed restaurant.', + config={ + 'max_output_tokens': 400, + 'stop_sequences': ['', ''], + 'temperature': 1.2, + 'top_p': 0.4, + 'top_k': 50, + }, + ) + ``` + + + +The exact parameters that are supported depend on the individual model and model +API. However, the parameters in the previous example are common to almost every +model. The following is an explanation of these parameters: + +#### Parameters that control output length + +**maxOutputTokens** + +LLMs operate on units called _tokens_. A token usually, but does not +necessarily, map to a specific sequence of characters. When you pass a prompt to +a model, one of the first steps it takes is to _tokenize_ your prompt string +into a sequence of tokens. Then, the LLM generates a sequence of tokens from the +tokenized input. Finally, the sequence of tokens gets converted back into text, +which is your output. + +The maximum output tokens parameter simply sets a limit on how many tokens to +generate using the LLM. Every model potentially uses a different tokenizer, but +a good rule of thumb is to consider a single English word to be made of 2 to 4 +tokens. + +As stated earlier, some tokens might not map to character sequences. One such +example is that there is often a token that indicates the end of the sequence: +when an LLM generates this token, it stops generating more. Therefore, it's +possible and often the case that an LLM generates fewer tokens than the maximum +because it generated the "stop" token. + +**stopSequences** + +You can use this parameter to set the tokens or token sequences that, when +generated, indicate the end of LLM output. The correct values to use here +generally depend on how the model was trained, and are usually set by the model +plugin. However, if you have prompted the model to generate another stop +sequence, you might specify it here. + +Note that you are specifying character sequences, and not tokens per se. In most +cases, you will specify a character sequence that the model's tokenizer maps to +a single token. + +#### Parameters that control "creativity" + +The _temperature_, _top-p_, and _top-k_ parameters together control how +"creative" you want the model to be. Below are very brief explanations of what +these parameters mean, but the more important point to take away is this: these +parameters are used to adjust the character of an LLM's output. The optimal +values for them depend on your goals and preferences, and are likely to be found +only through experimentation. + +**temperature** + +LLMs are fundamentally token-predicting machines. For a given sequence of tokens +(such as the prompt) an LLM predicts, for each token in its vocabulary, the +likelihood that the token comes next in the sequence. The temperature is a +scaling factor by which these predictions are divided before being normalized to +a probability between 0 and 1. + +Low temperature values—between 0.0 and 1.0—amplify the difference in +likelihoods between tokens, with the result that the model will be even less +likely to produce a token it already evaluated to be unlikely. This is often +perceived as output that is less creative. Although 0.0 is technically not a +valid value, many models treat it as indicating that the model should behave +deterministically, and to only consider the single most likely token. + +High temperature values—those greater than 1.0—compress the +differences in likelihoods between tokens, with the result that the model +becomes more likely to produce tokens it had previously evaluated to be +unlikely. This is often perceived as output that is more creative. Some model +APIs impose a maximum temperature, often 2.0. + +**topP** + +_Top-p_ is a value between 0.0 and 1.0 that controls the number of possible +tokens you want the model to consider, by specifying the cumulative probability +of the tokens. For example, a value of 1.0 means to consider every possible +token (but still take into account the probability of each token). A value of +0.4 means to only consider the most likely tokens, whose probabilities add up to +0.4, and to exclude the remaining tokens from consideration. + +**topK** + +_Top-k_ is an integer value that also controls the number of possible tokens you +want the model to consider, but this time by explicitly specifying the maximum +number of tokens. Specifying a value of 1 means that the model should behave +deterministically. + +#### Experiment with model parameters + +You can experiment with the effect of these parameters on the output generated +by different model and prompt combinations by using the Developer UI. Start the +developer UI with the `genkit start` command and it will automatically load all +of the models defined by the plugins configured in your project. You can quickly +try different prompts and configuration values without having to repeatedly make +these changes in code. + +### Structured output + + + +When using generative AI as a component in your application, you often want +output in a format other than plain text. Even if you're just generating content +to display to the user, you can benefit from structured output simply for the +purpose of presenting it more attractively to the user. But for more advanced +applications of generative AI, such as programmatic use of the model's output, +or feeding the output of one model into another, structured output is a must. + +In Genkit, you can request structured output from a model by specifying a schema +when you call `generate()`: + + + + ```ts + import { z } from 'genkit'; + + const MenuItemSchema = z.object({ + name: z.string().describe('The name of the menu item.'), + description: z.string().describe('A description of the menu item.'), + calories: z.number().describe('The estimated number of calories.'), + allergens: z.array(z.string()).describe('Any known allergens in the menu item.'), + }); + + const response = await ai.generate({ + prompt: 'Suggest a menu item for a pirate-themed restaurant.', + output: { schema: MenuItemSchema }, + }); + ``` + + Model output schemas are specified using the [Zod](https://zod.dev/) + library. In addition to a schema definition language, Zod also provides runtime + type checking, which bridges the gap between static TypeScript types and the + unpredictable output of generative AI models. + + + ```go + type MenuItem struct { + Name string `json:"name"` + Description string `json:"description"` + Calories int `json:"calories"` + Allergens []string `json:"allergens"` + } + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Invent a menu item for a pirate themed restaurant."), + ai.WithOutputType(MenuItem{}), + ) + if err != nil { + log.Fatal(err) // One possible error is that the response does not conform to the type. + } + ``` + + Model output types are specified as JSON schema using the + [`invopop/jsonschema`](https://github.com/invopop/jsonschema) package. This + provides runtime type checking, which bridges the gap between static Go types + and the unpredictable output of generative AI models. + + + ```python + from pydantic import BaseModel + + class MenuItemSchema(BaseModel): + name: str + description: str + calories: int + allergens: list[str] + + result = await ai.generate( + prompt='Invent a menu item for a pirate themed restaurant.', + output_schema=MenuItemSchema, + ) + ``` + + Model output schemas are specified using [Pydantic Models](https://docs.pydantic.dev/latest/concepts/models/). In addition to a schema definition language, Pydantic also provides runtime + type checking, which bridges the gap between static Python types and the + unpredictable output of generative AI models. + + + +When you specify a schema in `generate()`, Genkit does several things behind the +scenes: + +- Augments the prompt with additional guidance about the desired output format. + This also has the side effect of specifying to the model what content exactly + you want to generate (for example, not only suggest a menu item but also + generate a description, a list of allergens, and so on). +- Parses the model output into a structured object. +- Verifies that the output conforms with the schema. + +To get structured output from a successful generate call, use the response +object's `output` property: + + + + ```ts + const menuItem = response.output; // Typed as z.infer + console.log(menuItem?.name); + ``` + + Note that the `output` property can be `null`. This can + happen when the model fails to generate output that conforms to the schema. + + + ```go + var item MenuItem + if err := resp.Output(&item); err != nil { + log.Fatalf(err) + } + + log.Printf("%s (%d calories, %d allergens): %s\n", + item.Name, item.Calories, len(item.Allergens), item.Description) + ``` + + Alternatively, you can use `genkit.GenerateData()` for a more succinct call: + + ```go + item, resp, err := genkit.GenerateData[MenuItem](ctx, g, + ai.WithPrompt("Invent a menu item for a pirate themed restaurant."), + ) + if err != nil { + log.Fatal(err) + } + ``` + + + ```python + output = response.output + ``` + + + +#### Handling errors + +The best strategy for dealing with schema validation errors will depend on your exact use +case, but here are some general hints: + +- **Try a different model**. For structured output to succeed, the model must be + capable of generating output in JSON. The most powerful LLMs, like Gemini and + Claude, are versatile enough to do this; however, smaller models, such as some + of the local models you would use with Ollama, might not be able to generate + structured output reliably unless they have been specifically trained to do + so. + +- **Make use of coercion abilities**: You can specify in your schemas that + the validation library should try to coerce non-conforming types into the type specified by the + schema. If your schema includes primitive types other than strings, using + coercion can reduce the number of `generate()` failures you experience. + +- **Retry the generate() call**. If the model you've chosen only rarely fails to + generate conformant output, you can treat the error as you would treat a + network error, and simply retry the request using some kind of incremental + back-off strategy. + +### Streaming + +When generating large amounts of text, you can improve the experience for your +users by presenting the output as it's generated—streaming the output. A +familiar example of streaming in action can be seen in most LLM chat apps: users +can read the model's response to their message as it's being generated, which +improves the perceived responsiveness of the application and enhances the +illusion of chatting with an intelligent counterpart. + +In Genkit, you can stream output using the streaming methods: + + + + ```ts + const { stream, response } = ai.generateStream({ + prompt: 'Tell a story.', + }); + + // Stream text chunks + for await (const chunk of stream) { + console.log(chunk.text); + } + + // Get final complete response + const finalResponse = await response; + console.log(finalResponse.text); + ``` + + Streaming also works with structured output: + + ```ts + const { stream, response } = ai.generateStream({ + prompt: 'Suggest three pirate-themed menu items.', + output: { schema: z.array(MenuItemSchema) }, + }); + + for await (const chunk of stream) { + console.log(chunk.output); // Accumulated output so far + } + + const finalResponse = await response; + console.log(finalResponse.output); + ``` + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Suggest a complete menu for a pirate themed restaurant."), + ai.WithStreaming(func(ctx context.Context, chunk *ai.ModelResponseChunk) error { + // Do something with the chunk... + log.Println(chunk.Text()) + return nil + }), + ) + if err != nil { + log.Fatal(err) + } + + log.Println(resp.Text()) + ``` + + + ```python + stream, response = ai.generate_stream( + prompt='Suggest a complete menu for a pirate themed restaurant.', + ) + + # Stream text chunks + async for chunk in stream: + print(chunk.text) + + # Get complete output + complete_text = (await response).text + ``` + + Streaming also works with structured output: + + ```python + stream, response = ai.generate_stream( + prompt='Suggest three pirate-themed menu items.', + output_schema=MenuSchema, + ) + + async for chunk in stream: + print(chunk.output) # Accumulated output so far + + print((await response).output) + ``` + + + +Streaming structured output works a little differently from streaming text: the +`output` property of a response chunk is an object constructed from the +accumulation of the chunks that have been produced so far, rather than an object +representing a single chunk (which might not be valid on its own). **Every chunk +of structured output in a sense supersedes the chunk that came before it**. + +### Multimodal input + + + +The examples you've seen so far have used text strings as model prompts. While +this remains the most common way to prompt generative AI models, many models can +also accept other media as prompts. Media prompts are most often used in +conjunction with text prompts that instruct the model to perform some operation +on the media, such as to caption an image or transcribe an audio recording. + +The ability to accept media input and the types of media you can use are +completely dependent on the model and its API. For example, the Gemini 1.5 +series of models can accept images, video, and audio as prompts. + +To provide a media prompt to a model that supports it, instead of passing a +simple text prompt to `generate`, pass an array consisting of a media part and a +text part: + + + + ```ts + const response = await ai.generate({ + prompt: [{ media: { url: 'https://.../image.jpg' } }, { text: 'What is in this image?' }], + }); + ``` + + You can also pass media data directly by encoding it as a data URL: + + ```ts + import { readFile } from 'node:fs/promises'; + + const data = await readFile('image.jpg'); + const response = await ai.generate({ + prompt: [{ media: { url: `data:image/jpeg;base64,${data.toString('base64')}` } }, { text: 'What is in this image?' }], + }); + ``` + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithMessages( + NewUserMessage( + NewMediaPart("image/jpeg", "https://example.com/photo.jpg"), + NewTextPart("Compose a poem about this image."), + ), + ), + ) + ``` + + You can also pass media data directly by encoding it as a data URL: + + ```go + image, err := ioutil.ReadFile("photo.jpg") + if err != nil { + log.Fatal(err) + } + + resp, err := genkit.Generate(ctx, g, + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithMessages( + NewUserMessage( + NewMediaPart("image/jpeg", "data:image/jpeg;base64," + base64.StdEncoding.EncodeToString(image)), + NewTextPart("Compose a poem about this image."), + ), + ), + ) + ``` + + + ```python + from genkit.ai import Part + + result = await ai.generate( + prompt=[ + Part(media={'url': 'https://example.com/photo.jpg'}), + Part(text='Compose a poem about this image.'), + ], + ) + ``` + + You can also pass media data directly by encoding it as a data URL: + + ```python + import base64 + from genkit.ai import Part + + # Read image bytes + with open('image.jpg', 'rb') as f: + image_bytes = f.read() + + base64_encoded_image = base64.b64encode(image_bytes).decode('utf-8') + + result = await ai.generate( + prompt=[ + Part(media={'url': f'data:image/jpeg;base64,{base64_encoded_image}'}), + Part(text='Compose a poem about this image.'), + ], + ) + ``` + + + +All models that support media input support both data URLs and HTTPS URLs. Some +model plugins add support for other media sources. For example, the Vertex AI +plugin also lets you use Cloud Storage (`gs://`) URLs. + +### Generating Media + +While most examples in this guide focus on generating text with LLMs, Genkit also supports generating other types of media, including **images** and **audio**. Thanks to its unified `generate()` interface, working with media models is just as straightforward as generating text. + +:::note +Genkit returns generated media as a **data URL**, a widely supported format for handling binary media in both browsers and Node.js environments. +::: + +#### Image Generation + +To generate an image, you can use models that support image generation. Here's an example using Google AI's image generation capabilities: + + + + ```ts + import { googleAI } from '@genkit-ai/googleai'; + import { parseDataUrl } from 'data-urls'; + import { writeFile } from 'node:fs/promises'; + + const response = await ai.generate({ + model: googleAI.model('gemini-2.5-flash-preview-tts'), // Example model with media capabilities + prompt: 'An illustration of a dog wearing a space suit, photorealistic', + output: { format: 'media' }, + }); + + const imagePart = response.output; + if (imagePart?.media?.url) { + const parsed = parseDataUrl(imagePart.media.url); + if (parsed) { + await writeFile('dog.png', parsed.body); + } + } + ``` + + + ```go + // Image generation support varies by Go implementation + // Check the specific plugin documentation for media generation + ``` + + + ```python + # Image generation support varies by Python implementation + # Check the specific plugin documentation for media generation + ``` + + + +### Next steps + +#### Learn more about Genkit + +- As an app developer, the primary way you influence the output of generative AI + models is through prompting. Read [Prompt management](/docs/dotprompt) to learn how + Genkit helps you develop effective prompts and manage them in your codebase. +- Although `generate()` is the nucleus of every generative AI powered + application, real-world applications usually require additional work before + and after invoking a generative AI model. To reflect this, Genkit introduces + the concept of _flows_, which are defined like functions but add additional + features such as observability and simplified deployment. To learn more, see + [Defining workflows](/docs/flows). + +#### Advanced LLM use + +- Many of your users will have interacted with large language models for the first time through chatbots. Although LLMs are capable of much more than simulating conversations, it remains a familiar and useful style of interaction. Even when your users will not be interacting directly with the model in this way, the conversational style of prompting is a powerful way to influence the output generated by an AI model. Read [Multi-turn chats](/docs/chat) to learn how to use Genkit as part of an LLM chat implementation. +- One way to enhance the capabilities of LLMs is to prompt them with a list of + ways they can request more information from you, or request you to perform + some action. This is known as _tool calling_ or _function calling_. Models + that are trained to support this capability can respond to a prompt with a + specially-formatted response, which indicates to the calling application that + it should perform some action and send the result back to the LLM along with + the original prompt. Genkit has library functions that automate both the + prompt generation and the call-response loop elements of a tool calling + implementation. See [Tool calling](/docs/tool-calling) to learn more. +- Retrieval-augmented generation (RAG) is a technique used to introduce + domain-specific information into a model's output. This is accomplished by + inserting relevant information into a prompt before passing it on to the + language model. A complete RAG implementation requires you to bring several + technologies together: text embedding generation models, vector databases, and + large language models. See [Retrieval-augmented generation (RAG)](/docs/rag) to + learn how Genkit simplifies the process of coordinating these various + elements. + +#### Testing model output + +As a software engineer, you're used to deterministic systems where the same +input always produces the same output. However, with AI models being +probabilistic, the output can vary based on subtle nuances in the input, the +model's training data, and even randomness deliberately introduced by parameters +like temperature. + +Genkit's evaluators are structured ways to assess the quality of your LLM's +responses, using a variety of strategies. Read more on the +[Evaluation](/docs/evaluation) page. diff --git a/src/content/docs/unified-docs/get-started.mdx b/src/content/docs/unified-docs/get-started.mdx new file mode 100644 index 00000000..7f4c6944 --- /dev/null +++ b/src/content/docs/unified-docs/get-started.mdx @@ -0,0 +1,538 @@ +--- +title: Get started with Genkit +description: Learn how to get started with Genkit across JavaScript, Go, and Python, including project setup, installing packages, configuring API keys, creating your first flow, and testing in the Developer UI. +--- + +import { LinkButton } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +This guide shows you how to get started with Genkit in your preferred language and test it in the Developer UI. + +## Prerequisites + +Before you begin, make sure your environment meets these requirements: + + + + - Node.js v20 or later + - npm + + This guide assumes you're already familiar with building Node.js applications. + + + - Go 1.24 or later ([Download and install](https://go.dev/doc/install)) + + This guide assumes you're already familiar with building Go applications. + + + - Python 3.10 or later ([Download and install](https://www.python.org/downloads/)) + - Node.js 20 or later (for the Genkit CLI and UI) + + :::note[Alpha Release] + The Genkit libraries for Python are currently in **Alpha**. You might see API and functional changes as development progresses. We recommend using it only for prototyping and exploration. + ::: + + + +## Set up your project + + + + Create a new Node.js project and configure TypeScript: + + ```sh + mkdir my-genkit-app + cd my-genkit-app + npm init -y + + # Set up your source directory + mkdir src + touch src/index.ts + + # Install and configure TypeScript + npm install -D typescript tsx + npx tsc --init + ``` + + This sets up your project structure and a TypeScript entry point at `src/index.ts`. + + + Initialize a new Go project directory: + + ```bash + mkdir genkit-intro && cd genkit-intro + + go mod init example/genkit-intro + ``` + + Create a `main.go` file for your application entry point. + + + Create a new project directory and set up a virtual environment: + + ```bash + mkdir genkit-intro && cd genkit-intro + ``` + + (Recommended) Create a Python virtual environment: + + ```bash + python3 -m venv . + ``` + + Activate the virtual environment if necessary: + + ```bash + source bin/activate # for bash + ``` + + + +## Install Genkit packages + + + + First, install the Genkit CLI globally. This gives you access to local developer tools, including the Developer UI: + + ```bash + npm install -g genkit-cli + ``` + + Then, add the following packages to your project: + + ```bash + npm install genkit @genkit-ai/googleai + ``` + + - `genkit` provides Genkit core capabilities. + - `@genkit-ai/googleai` provides access to the Google AI Gemini models. + + + Install the Genkit package for Go: + + ```bash + go get github.com/firebase/genkit/go + ``` + + This provides Genkit core capabilities and access to Google AI Gemini models. + + + Install the required Python packages: + + ```bash + pip3 install genkit + pip3 install genkit-plugin-google-genai + ``` + + Or create a `requirements.txt` file: + + ```text title="requirements.txt" + genkit + genkit-plugin-google-genai + ``` + + and run: + + ```bash + pip3 install -r requirements.txt + ``` + + + +## Configure your model API key + +Genkit can work with multiple model providers. This guide uses the **Gemini API**, which offers a generous free tier and doesn't require a credit card to get started. + +To use it, you'll need an API key from Google AI Studio: + + + Get a Gemini API Key + + +Once you have a key, set the `GEMINI_API_KEY` environment variable: + +```sh +export GEMINI_API_KEY= +``` + +:::note +Genkit also supports models from Vertex AI, Anthropic, OpenAI, Cohere, Ollama, and more. See [generating content](/unified-docs/generating-content) for details. +::: + +## Create your first application + + + + A flow is a special Genkit function with built-in observability, type safety, and tooling integration. + + Update `src/index.ts` with the following: + + ```ts + import { googleAI } from '@genkit-ai/googleai'; + import { genkit, z } from 'genkit'; + + // Initialize Genkit with the Google AI plugin + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash', { + temperature: 0.8 + }), + }); + + // Define input schema + const RecipeInputSchema = z.object({ + ingredient: z.string().describe('Main ingredient or cuisine type'), + dietaryRestrictions: z.string().optional().describe('Any dietary restrictions'), + }); + + // Define output schema + const RecipeSchema = z.object({ + title: z.string(), + description: z.string(), + prepTime: z.string(), + cookTime: z.string(), + servings: z.number(), + ingredients: z.array(z.string()), + instructions: z.array(z.string()), + tips: z.array(z.string()).optional(), + }); + + // Define a recipe generator flow + export const recipeGeneratorFlow = ai.defineFlow( + { + name: 'recipeGeneratorFlow', + inputSchema: RecipeInputSchema, + outputSchema: RecipeSchema, + }, + async (input) => { + // Create a prompt based on the input + const prompt = `Create a recipe with the following requirements: + Main ingredient: ${input.ingredient} + Dietary restrictions: ${input.dietaryRestrictions || 'none'}`; + + // Generate structured recipe data using the same schema + const { output } = await ai.generate({ + prompt, + output: { schema: RecipeSchema }, + }); + + if (!output) throw new Error('Failed to generate recipe'); + + return output; + } + ); + + // Run the flow + async function main() { + const recipe = await recipeGeneratorFlow({ + ingredient: 'avocado', + dietaryRestrictions: 'vegetarian' + }); + + console.log(recipe); + } + + main().catch(console.error); + ``` + + This code sample: + + - Defines reusable input and output schemas with [Zod](https://zod.dev/) + - Configures the `gemini-2.5-flash` model with temperature settings + - Defines a Genkit flow to generate a structured recipe based on your input + - Runs the flow with a sample input and prints the result + + ##### Why use flows? + + - Type-safe inputs and outputs + - Integrates with the Developer UI + - Easy deployment as APIs + - Built-in tracing and observability + + + Create a `main.go` file with the following sample code: + + ```go + package main + + import ( + "context" + "log" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + + // Initialize Genkit with the Google AI plugin and Gemini 2.5 Flash. + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatalf("could not initialize Genkit: %v", err) + } + + resp, err := genkit.Generate(ctx, g, ai.WithPrompt("What is the meaning of life?")) + if err != nil { + log.Fatalf("could not generate model response: %v", err) + } + + log.Println(resp.Text()) + } + ``` + + This code sample: + + - Initializes Genkit with the Google AI plugin + - Configures the `gemini-2.5-flash` model as the default + - Makes a simple generation request + - Prints the model's response + + For more advanced examples with flows and structured output, see [creating flows](/unified-docs/creating-flows). + + + Create a `main.py` file: + + ```python title="main.py" + import json + from pydantic import BaseModel, Field + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleAI + + ai = Genkit( + plugins=[GoogleAI()], + model='googleai/gemini-2.5-flash', + ) + + class RpgCharacter(BaseModel): + name: str = Field(description='name of the character') + back_story: str = Field(description='back story') + abilities: list[str] = Field(description='list of abilities (3-4)') + + @ai.flow() + async def generate_character(name: str): + result = await ai.generate( + prompt=f'generate an RPG character named {name}', + output_schema=RpgCharacter, + ) + return result.output + + async def main() -> None: + print(json.dumps(await generate_character('Goblorb'), indent=2)) + + ai.run_main(main()) + ``` + + This code sample: + + - Initializes Genkit with the Google AI plugin + - Defines a structured output schema using Pydantic + - Creates a flow to generate RPG characters + - Runs the flow and prints the structured result + + + +## Run your application + + + + Run your application to see it in action: + + ```bash + npx tsx src/index.ts + ``` + + You should see a structured recipe output in your console. + + + Run the app to see the model response: + + ```bash + go run . + # Example output (may vary): + # There is no single universally agreed-upon meaning of life; it's a deeply + # personal question. Many find meaning through connection, growth, + # contribution, happiness, or discovering their own purpose. + ``` + + + Run your app (Genkit apps are just regular Python applications): + + ```bash + python3 main.py + ``` + + You should see a structured RPG character output in JSON format. + + + +## Test in the Developer UI + +The **Developer UI** is a local tool for testing and inspecting Genkit components, like flows, with a visual interface. + +### Install the Genkit CLI (if needed) + + + + If you followed the installation steps above, you already have the Genkit CLI installed. + + + Install the Genkit CLI using npm: + + ```bash + npm install -g genkit-cli + ``` + + This requires Node.js to be installed on your system. + + + If you don't already have Node 20 or newer on your system, install it now. + + **Recommendation**: The [`nvm`](https://github.com/nvm-sh/nvm) and [`nvm-windows`](https://github.com/coreybutler/nvm-windows) tools are a convenient way to install specific versions of Node. + + To install `nvm`: + + **Linux, macOS, etc.:** + ```bash + curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash + ``` + + **Windows:** + Download and run the installer as described in the [nvm-windows docs](https://github.com/coreybutler/nvm-windows?tab=readme-ov-file#install-nvm-windows). + + Then, to install Node and npm: + ```bash + nvm install 20 + ``` + + Install the Genkit CLI: + ```bash + npm install -g genkit-cli + ``` + + + +### Start the Developer UI + + + + Run the following command from your project root: + + ```bash + genkit start -- npx tsx --watch src/index.ts + ``` + + This starts your app and launches the Developer UI at `http://localhost:4000` by default. + + :::note + The command after `--` should run the file that defines or imports your Genkit components. You can use `tsx`, `node`, or other commands based on your setup. Learn more in [developer tools](/unified-docs/developer-tools). + ::: + + ##### Optional: Add an npm script + + To make starting the Developer UI easier, add the following to your `package.json` scripts: + + ```json + "scripts": { + "genkit:ui": "genkit start -- npx tsx --watch src/index.ts" + } + ``` + + Then run it with: + + ```sh + npm run genkit:ui + ``` + + + Run the following command from your project root: + + ```bash + genkit start -- go run . + ``` + + This starts your app and launches the Developer UI at `http://localhost:4000` by default. + + + To inspect your app with Genkit Dev UI, run: + + ```bash + genkit start -- python3 main.py + ``` + + The command will print the Dev UI URL: + + ``` + Genkit Developer UI: http://localhost:4000 + ``` + + + +### Run and inspect flows + + + + In the Developer UI: + + 1. Select the `recipeGeneratorFlow` from the list of flows + 2. Enter sample input: + ```json + { + "ingredient": "avocado", + "dietaryRestrictions": "vegetarian" + } + ``` + 3. Click **Run** + + You'll see the generated recipe as structured output, along with a visual trace of the AI generation process for debugging and optimization. + + + + + In the Developer UI, you can: + + - Test generation requests with different prompts + - View traces of your application's execution + - Inspect model responses and performance metrics + - Debug any issues with your Genkit integration + + For more advanced flows and structured output, see [creating flows](/unified-docs/creating-flows). + + + In the Developer UI: + + 1. Select the `generate_character` flow from the list of flows + 2. Enter a character name as input (e.g., `"Goblorb"`) + 3. Click **Run** + + You'll see the generated RPG character as structured output, along with execution traces for debugging. + + + +## Next steps + +Now that you've created and tested your first Genkit application, explore more features to build powerful AI-driven applications: + +- [Developer tools](/unified-docs/developer-tools): Set up your local workflow with the Genkit CLI and Dev UI. +- [Generating content](/unified-docs/generating-content): Use Genkit's unified generation API to work with multimodal and structured output across supported models. +- [Creating flows](/unified-docs/creating-flows): Learn about streaming flows, schema customization, deployment options, and more. +- [Tool calling](/unified-docs/tool-calling): Enable your AI models to interact with external systems and APIs. +- [Managing prompts with Dotprompt](/unified-docs/dotprompt): Define flexible prompt templates using `.prompt` files or code. diff --git a/src/content/docs/unified-docs/interrupts.mdx b/src/content/docs/unified-docs/interrupts.mdx new file mode 100644 index 00000000..0cffe8aa --- /dev/null +++ b/src/content/docs/unified-docs/interrupts.mdx @@ -0,0 +1,551 @@ +--- +title: Pause generation using interrupts +description: Learn how to use interrupts in Genkit to pause and resume LLM generation, enabling human-in-the-loop interactions, asynchronous processing, and controlled task completion across JavaScript and Python. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +:::caution[Beta] +This feature of Genkit is in **Beta,** which means it is not yet part of Genkit's stable API. APIs of beta features may change in minor version releases. +::: + +_Interrupts_ are a special kind of [tool](/unified-docs/tool-calling) that can pause the +LLM generation-and-tool-calling loop to return control back to you. When +you're ready, you can then _resume_ generation by sending _replies_ that the LLM +processes for further generation. + +The most common uses for interrupts fall into a few categories: + +- **Human-in-the-Loop:** Enabling the user of an interactive AI + to clarify needed information or confirm the LLM's action + before it is completed, providing a measure of safety and confidence. +- **Async Processing:** Starting an asynchronous task that can only be + completed out-of-band, such as sending an approval notification to + a human reviewer or kicking off a long-running background process. +- **Exit from an Autonomous Task:** Providing the model a way + to mark a task as complete, in a workflow that might iterate through + a long series of tool calls. + +## Availability + + + + Interrupts are fully supported in JavaScript with comprehensive APIs for defining, using, and responding to interrupts. + + + Interrupts are not currently available in Go. Use alternative patterns like conditional tool execution or external coordination mechanisms. + + + Interrupts are supported in Python with similar functionality to JavaScript, though with some API differences. + + + +## Before you begin + +All of the examples documented here assume that you have already set up a +project with Genkit dependencies installed. If you want to run the code +examples on this page, first complete the steps in the Getting started guide for your language. + +Before diving too deeply, you should also be familiar with the following +concepts: + +- [Generating content](/unified-docs/generating-content) with AI models +- Genkit's system for [defining input and output schemas](/unified-docs/creating-flows) +- General methods of [tool calling](/unified-docs/tool-calling) + +## Overview of interrupts + +At a high level, this is what an interrupt looks like when +interacting with an LLM: + +1. The calling application prompts the LLM with a request. The prompt includes + a list of tools, including at least one for an interrupt that the LLM + can use to generate a response. +2. The LLM generates either a complete response or a tool call request + in a specific format. To the LLM, an interrupt call looks like any + other tool call. +3. If the LLM calls an interrupt tool, + the Genkit library automatically pauses generation rather than immediately + passing responses back to the model for additional processing. +4. The developer checks whether an interrupt call is made, and performs whatever + task is needed to collect the information needed for the interrupt response. +5. The developer resumes generation by passing an interrupt response to the + model. This action triggers a return to Step 2. + +## Define manual-response interrupts + +The most common kind of interrupt allows the LLM to request clarification from +the user, for example by asking a multiple-choice question. + + + + Use the Genkit instance's `defineInterrupt()` method: + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + const askQuestion = ai.defineInterrupt({ + name: 'askQuestion', + description: 'use this to ask the user a clarifying question', + inputSchema: z.object({ + choices: z.array(z.string()).describe('the choices to display to the user'), + allowOther: z.boolean().optional().describe('when true, allow write-ins'), + }), + outputSchema: z.string(), + }); + ``` + + Note that the `outputSchema` of an interrupt corresponds to the response data + you will provide as opposed to something that will be automatically populated + by a tool function. + + + Interrupts are not currently available in Go. Consider using alternative patterns: + + ```go + // Alternative: Use conditional tool execution + func conditionalTool(ctx context.Context, input ToolInput) (ToolOutput, error) { + // Check conditions and return early if user confirmation needed + if needsConfirmation(input) { + return ToolOutput{ + Status: "NEEDS_CONFIRMATION", + Message: "Please confirm this action", + }, nil + } + + // Proceed with normal execution + return executeAction(input) + } + ``` + + + Use the Genkit instance's `tool()` decorator: + + ```python + from pydantic import BaseModel, Field + + class Questions(BaseModel): + choices: list[str] = Field(description='the choices to display to the user') + allow_other: bool = Field(description='when true, allow write-ins') + + @ai.tool() + def ask_question(input: Questions, ctx) -> str: + """Use this to ask the user a clarifying question""" + ctx.interrupt() + ``` + + Note that the return type annotation of an interrupt corresponds to the response data + you will provide as opposed to something that will be automatically populated + by a tool function. + + + +## Use interrupts + +Interrupts are passed into the `tools` array when generating content, just like +other types of tools. You can pass both normal tools and interrupts to the +same `generate` call: + + + + ### Generate + + ```ts + const response = await ai.generate({ + prompt: "Ask me a movie trivia question.", + tools: [askQuestion], + }); + ``` + + ### definePrompt + + ```ts + const triviaPrompt = ai.definePrompt({ + name: "triviaPrompt", + tools: [askQuestion], + input: { + schema: z.object({ subject: z.string() }), + }, + prompt: "Ask me a trivia question about {{subject}}.", + }); + + const response = await triviaPrompt({ subject: "computer history" }); + ``` + + ### Prompt file + + ```dotprompt + --- + tools: [askQuestion] + input: + schema: + partyType: string + --- + + {{role "system"}} + Use the askQuestion tool if you need to clarify something. + + {{role "user"}} + Help me plan a {{partyType}} party next week. + ``` + + Then you can execute the prompt in your code as follows: + + ```ts + // assuming prompt file is named partyPlanner.prompt + const partyPlanner = ai.prompt("partyPlanner"); + + const response = await partyPlanner({ partyType: "birthday" }); + ``` + + ### Chat + + ```ts + const chat = ai.chat({ + system: "Use the askQuestion tool if you need to clarify something.", + tools: [askQuestion], + }); + + const response = await chat.send("make a plan for my birthday party"); + ``` + + + Interrupts are not available in Go. Use alternative patterns like conditional tool execution or external coordination mechanisms. + + + ```python + interrupted_response = await ai.generate( + prompt='Ask me a movie trivia question.', + tools=['ask_question'], + ) + ``` + + + +Genkit immediately returns a response on receipt of an interrupt tool call. + +## Respond to interrupts + +If you've passed one or more interrupts to your generate call, you +need to check the response for interrupts so that you can handle them: + + + + ```ts + // you can check the 'finishReason' of the response + response.finishReason === 'interrupted'; + // or you can check to see if any interrupt requests are on the response + response.interrupts.length > 0; + ``` + + Responding to an interrupt is done using the `resume` option on a subsequent + `generate` call, making sure to pass in the existing history. Each tool has + a `.respond()` method on it to help construct the response. + + Once resumed, the model re-enters the generation loop, including tool + execution, until either it completes or another interrupt is triggered: + + ```ts + let response = await ai.generate({ + tools: [askQuestion], + system: 'ask clarifying questions until you have a complete solution', + prompt: 'help me plan a backyard BBQ', + }); + + while (response.interrupts.length) { + const answers = []; + // multiple interrupts can be called at once, so we handle them all + for (const question of response.interrupts) { + answers.push( + // use the `respond` method on our tool to populate answers + askQuestion.respond( + question, + // send the tool request input to the user to respond + await askUser(question.toolRequest.input), + ), + ); + } + + response = await ai.generate({ + tools: [askQuestion], + messages: response.messages, + resume: { + respond: answers, + }, + }); + } + + // no more interrupts, we can see the final response + console.log(response.text); + ``` + + + Not applicable - interrupts are not available in Go. + + + ```python + # You can check the 'finish_reason' attribute of the response + if interrupted_response.finish_reason == 'interrupted': + print("Generation interrupted.") + + # Or you can check if any interrupt requests are on the response + if interrupted_response.interrupts and len(interrupted_response.interrupts) > 0: + print(f"Interrupts found: {len(interrupted_response.interrupts)}") + ``` + + Responding to an interrupt is done using the `tool_responses` option on a subsequent + `generate` call, making sure to pass in the existing history. There's a `tool_response` + helper function to help you construct the response. + + Once resumed, the model re-enters the generation loop, including tool + execution, until either it completes or another interrupt is triggered: + + ```python + from genkit.ai import tool_response + + response = await ai.generate( + messages=interrupted_response.messages, + tool_responses=[tool_response(interrupted_response.interrupts[0], 'b')], + tools=['ask_question'], + ) + ``` + + + +## Tools with restartable interrupts + +Another common pattern for interrupts is the need to _confirm_ an action that +the LLM suggests before actually performing it. For example, a payments app +might want the user to confirm certain kinds of transfers. + + + + For this use case, you can use the standard `defineTool` method to add custom + logic around when to trigger an interrupt, and what to do when an interrupt is + _restarted_ with additional metadata. + + ### Define a restartable tool + + Every tool has access to two special helpers in the second argument of its + implementation definition: + + - `interrupt`: when called, this method throws a special kind of exception that + is caught to pause the generation loop. You can provide additional metadata + as an object. + - `resumed`: when a request from an interrupted generation is restarted using + the `{resume: {restart: ...}}` option (see below), this helper contains the + metadata provided when restarting. + + If you were building a payments app, for example, you might want to confirm with + the user before making a transfer exceeding a certain amount: + + ```ts + const transferMoney = ai.defineTool({ + name: 'transferMoney', + description: 'Transfers money between accounts.', + inputSchema: z.object({ + toAccountId: z.string().describe('the account id of the transfer destination'), + amount: z.number().describe('the amount in integer cents (100 = $1.00)'), + }), + outputSchema: z.object({ + status: z.string().describe('the outcome of the transfer'), + message: z.string().optional(), + }) + }, async (input, {context, interrupt, resumed}) => { + // if the user rejected the transaction + if (resumed?.status === "REJECTED") { + return {status: 'REJECTED', message: 'The user rejected the transaction.'}; + } + // trigger an interrupt to confirm if amount > $100 + if (resumed?.status !== "APPROVED" && input.amount > 10000) { + interrupt({ + message: "Please confirm sending an amount > $100.", + }); + } + // complete the transaction if not interrupted + return doTransfer(input); + }); + ``` + + In this example, on first execution (when `resumed` is undefined), the tool + checks to see if the amount exceeds $100, and triggers an interrupt if so. On + second execution, it looks for a status in the new metadata provided and + performs the transfer or returns a rejection response, depending on whether it + is approved or rejected. + + ### Restart tools after interruption + + Interrupt tools give you full control over: + + 1. When an initial tool request should trigger an interrupt. + 2. When and whether to resume the generation loop. + 3. What additional information to provide to the tool when resuming. + + In the example shown in the previous section, the application might ask the user + to confirm the interrupted request to make sure the transfer amount is okay: + + ```ts + let response = await ai.generate({ + tools: [transferMoney], + prompt: "Transfer $1000 to account ABC123", + }); + + while (response.interrupts.length) { + const confirmations = []; + // multiple interrupts can be called at once, so we handle them all + for (const interrupt of response.interrupts) { + confirmations.push( + // use the 'restart' method on our tool to provide `resumed` metadata + transferMoney.restart( + interrupt, + // send the tool request input to the user to respond. assume that this + // returns `{status: "APPROVED"}` or `{status: "REJECTED"}` + await requestConfirmation(interrupt.toolRequest.input) + ) + ); + } + + response = await ai.generate({ + tools: [transferMoney], + messages: response.messages, + resume: { + restart: confirmations, + } + }) + } + + // no more interrupts, we can see the final response + console.log(response.text); + ``` + + + Not applicable - interrupts are not available in Go. Consider implementing confirmation logic within your tools: + + ```go + func transferMoney(ctx context.Context, input TransferInput) (TransferOutput, error) { + // Implement confirmation logic within the tool + if input.Amount > 10000 && !input.Confirmed { + return TransferOutput{ + Status: "NEEDS_CONFIRMATION", + Message: "Please confirm transfer amount > $100", + RequiresConfirmation: true, + }, nil + } + + // Proceed with transfer if confirmed or amount is small + return executeTransfer(input) + } + ``` + + + Similar patterns are available in Python, though the specific APIs may differ. Consult the Python documentation for the most current implementation details. + + + +## Best practices + +### When to use interrupts + + + + - **User confirmation**: For actions that have significant consequences (payments, deletions, etc.) + - **Missing information**: When the LLM needs clarification to proceed + - **Async operations**: For long-running tasks that need to complete out-of-band + - **Safety checks**: To add human oversight to autonomous AI workflows + + + Since interrupts are not available, consider these alternatives: + - **Conditional tools**: Return status codes that indicate when confirmation is needed + - **Multi-step flows**: Break complex operations into smaller, confirmable steps + - **External coordination**: Use external systems to manage approval workflows + + + - **User confirmation**: For actions that have significant consequences + - **Missing information**: When the LLM needs clarification to proceed + - **Async operations**: For long-running tasks that need to complete out-of-band + - **Safety checks**: To add human oversight to autonomous AI workflows + + + +### Error handling + + + + Always handle the case where interrupts might not be responded to: + + ```ts + let response = await ai.generate({ + tools: [askQuestion], + prompt: 'help me plan a party', + }); + + let maxRetries = 3; + let retryCount = 0; + + while (response.interrupts.length && retryCount < maxRetries) { + try { + // Handle interrupts... + response = await ai.generate({ + tools: [askQuestion], + messages: response.messages, + resume: { respond: answers }, + }); + retryCount++; + } catch (error) { + console.error('Error handling interrupt:', error); + break; + } + } + ``` + + + Implement proper error handling in your conditional tools: + + ```go + func handleConditionalTool(ctx context.Context, input ToolInput) (ToolOutput, error) { + if needsConfirmation(input) { + return ToolOutput{ + Status: "NEEDS_CONFIRMATION", + Message: "Please confirm this action", + }, nil + } + + result, err := executeAction(input) + if err != nil { + return ToolOutput{}, fmt.Errorf("action failed: %w", err) + } + + return result, nil + } + ``` + + + Always handle the case where interrupts might not be responded to: + + ```python + try: + response = await ai.generate( + messages=interrupted_response.messages, + tool_responses=[tool_response(interrupted_response.interrupts[0], user_input)], + tools=['ask_question'], + ) + except Exception as e: + print(f"Error handling interrupt: {e}") + # Handle error appropriately + ``` + + + +## Next steps + +- Learn about [tool calling](/unified-docs/tool-calling) to understand the foundation that interrupts build upon +- Explore [creating flows](/unified-docs/creating-flows) to build complex AI workflows that incorporate interrupts +- See [developer tools](/unified-docs/developer-tools) for testing and debugging interrupt-enabled applications +- Check out [generating content](/unified-docs/generating-content) for understanding the generation loop that interrupts can pause diff --git a/src/content/docs/unified-docs/mcp-server.mdx b/src/content/docs/unified-docs/mcp-server.mdx new file mode 100644 index 00000000..7a4edaff --- /dev/null +++ b/src/content/docs/unified-docs/mcp-server.mdx @@ -0,0 +1,260 @@ +--- +title: "Genkit MCP Server" +description: "Integrate Genkit with MCP-aware IDEs and tools." +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +The Genkit MCP (Model Context Protocol) Server enables seamless integration of your Genkit projects with various development environments and AI tools. By exposing Genkit functionalities through the Model Context Protocol, it allows LLM agents and IDEs to discover, interact with, and monitor your Genkit flows and other components. + +## What is the MCP Server? + +The Genkit MCP Server acts as a bridge between your Genkit application and external tools that understand the Model Context Protocol. This allows these tools to: + +- **Discover Genkit flows:** Tools can list all available flows defined in your project, along with their input schemas, enabling them to understand how to call them. +- **Run Genkit flows:** External tools can execute your Genkit flows, providing inputs and receiving outputs. +- **Access trace details:** The server allows for retrieval and analysis of execution traces for your Genkit flows, providing insights into their performance and behavior. +- **Look up Genkit documentation:** Integrated tools can access Genkit documentation directly through the MCP server, aiding in development and debugging. + +## Getting Started + +To use the Genkit MCP Server, you first need to have the Genkit CLI installed. If you haven't already, install it globally: + +```bash +npm install -g genkit-cli +``` + +:::note +The examples in this guide assume you have installed the Genkit CLI globally using `npm install -g genkit-cli`. If you have installed Genkit CLI locally in your project instead, you'll need to prefix all `genkit` commands with `npx` (e.g., use `npx genkit mcp` instead of `genkit mcp`). +::: + +### Configuring the MCP Server + +The Genkit MCP Server is typically configured within an MCP-aware IDE or tool. The configuration details often include: + +- **`serverName`**: A unique name for the server (e.g., "genkit"). +- **`command`**: The command to execute the MCP server (e.g., `genkit`). +- **`args`**: Arguments to pass to the command (e.g., `["mcp"]` to run the Genkit MCP server). +- **`cwd`**: The current working directory where the command should be executed. +- **`timeout`**: The maximum time (in milliseconds) the server is allowed to start. +- **`trust`**: A boolean indicating whether to automatically trust the server. Setting this to `true` allows tools to execute commands from this server without requiring explicit user confirmation for each action. + +## Integration with AI Development Tools + + + + To integrate the Genkit MCP Server with the Gemini CLI, you can add a configuration entry to your `.gemini/settings.json` file. This file is typically located in your project root or your user's home directory. + + ```json + { + "mcpServers": { + "genkit": { + "command": "genkit", + "args": ["mcp"], + "cwd": "", + "timeout": 30000, + "trust": false + } + } + } + ``` + + After adding this configuration, restart your Gemini CLI session for the changes to take effect. You can then interact with your Genkit flows and tools directly from the Gemini CLI. + + ### Video Tutorial + + Watch this video tutorial to see how to set up and use the Genkit MCP Server with the Gemini CLI: + +
+
+ +
+
+
+ + + Cursor AI IDE provides a built-in MCP client that supports an arbitrary number of MCP servers. + + To add the Genkit MCP Server in Cursor: + + 1. Open Cursor Settings by navigating to **File > Preferences > Cursor Settings** or by using the command palette. + 2. Select the **MCP** option in the settings. + 3. Click on the **"+ Add New MCP Server"** button. + 4. Provide the configuration details. You can set the `Type` to `stdio` and the `Command` to `genkit mcp`. Remember to specify the correct `cwd` if your Genkit project is not in the default directory. + 5. Configuration can be stored globally (`~/.cursor/mcp.json`) or locally (project-specific, `.cursor/mcp.json`). + + Once configured, Cursor's AI assistant will automatically invoke the server's tools when needed. + + + + Claude Code functions as both an MCP server and client and can connect to external tools via MCP. + + To add the Genkit MCP Server to Claude Code: + + 1. You can configure MCP servers in Claude Code through: + - Project configuration (available when running Claude Code in that directory). + - Global configuration (available in all projects). + - A checked-in `.mcp.json` file (shared with everyone in the project). + + 2. From the command line, use the `claude mcp add` command: + + ```bash + claude mcp add --transport stdio genkit genkit mcp --cwd --scope + ``` + + - Replace `` with the actual path to your Genkit project. + - Choose a ``: `local` (default, only available to you in the current project), `project` (shared with everyone via `.mcp.json`), or `user` (available to you across all projects). + + Claude Code will then be able to leverage Genkit's functionalities. + + + + Windsurf, an AI-enhanced IDE built on VS Code, also supports MCP servers to extend its capabilities. + + To set up the Genkit MCP Server in Windsurf: + + 1. Open Windsurf Settings by clicking the **Windsurf - Settings** button (bottom right) or by hitting `Cmd+Shift+P` (Mac) / `Ctrl+Shift+P` (Windows/Linux) and searching for "Open Windsurf Settings". + 2. Navigate to the **Cascade** section in **Advanced Settings** and look for the **MCP** option to enable it. + 3. You can add a new MCP server directly through the settings UI or by manually editing the `~/.codeium/windsurf/mcp_config.json` file. + 4. Provide the `stdio` transport command: `genkit mcp`. Ensure the working directory (`cwd`) is correctly set to your Genkit project. + + After configuration, Windsurf's AI assistant (Cascade) can interact with your Genkit components. + + + + Cline, an AI assistant for your CLI and Editor, can also extend its capabilities through custom MCP tools. + + To configure the Genkit MCP Server in Cline: + + 1. Click the **"MCP Servers"** icon in the top navigation bar of the Cline pane. + 2. Select the **"Installed"** tab. + 3. Click the **"Configure MCP Servers"** button at the bottom of the pane. + 4. You can then add a new server configuration using JSON. An example configuration would be: + + ```json + { + "mcpServers": { + "genkit": { + "command": "genkit", + "args": ["mcp"], + "cwd": "", + "timeout": 30000, + "trust": false + } + } + } + ``` + + The settings for all installed MCP servers are located in the `cline_mcp_settings.json` file. + + 5. Alternatively, you can ask Cline directly to "add a tool" and it can guide you through creating and installing a new MCP server. + + Once configured, Cline will automatically detect and leverage the tools provided by the Genkit MCP Server. + +
+ +## Using the MCP Server + +Once configured, your MCP-aware tool can interact with the Genkit MCP Server. Here are some of the available operations: + +### Look up Genkit Documentation + +You can use the `lookup_genkit_docs` tool to retrieve documentation for the Genkit AI framework. You can specify a language and particular files to look up. + +**Example:** +To get a list of available documentation files for JavaScript: + +``` +@genkit:lookup_genkit_docs { "language": "js" } +``` + +### List Genkit Flows + +The `list_flows` tool allows you to discover all defined Genkit flows in your project and inspect their input schemas. + +**Example:** + +``` +@genkit:list_flows {} +``` + +This will return a list of flows with their descriptions and input schemas, similar to: + +``` +- Flow name: recipeGeneratorFlow + Input schema: {"type":"object","properties":{"ingredient":{"type":"string"},"dietaryRestrictions":{"type":"string"}},"required":["ingredient","dietaryRestrictions"]} +``` + +### Run Genkit Flows + +You can execute a specific Genkit flow using the `run_flow` tool. You'll need to provide the `flowName` and any required `input` as a JSON string conforming to the flow's input schema. + +**Example:** +To run a `recipeGeneratorFlow` with specific ingredients and dietary restrictions: + +``` +@genkit:run_flow { "flowName": "recipeGeneratorFlow", "input": "{\"ingredient\": \"avocado\", \"dietaryRestrictions\": \"vegetarian\"}" } +``` + +The output will be the result of the flow execution, for example: + +```json +{ + "cookTime": "5 minutes", + "description": "A quick and easy vegetarian recipe featuring creamy avocado.", + "ingredients": [ + "1 ripe avocado", + "1/4 cup chopped red onion", + "1/4 cup chopped cilantro", + "1 tablespoon lime juice", + "1/4 teaspoon salt", + "1/4 teaspoon black pepper" + ], + "instructions": [ + "Halve the avocado and remove the pit.", + "Scoop the avocado flesh into a bowl.", + "Add the red onion, cilantro, lime juice, salt, and pepper.", + "Mash everything together with a fork until it is mostly smooth but still has some chunks.", + "Stir in the red onion, cilantro, lime juice, salt, and pepper.", + "Serve immediately with tortilla chips or as a topping for tacos or salads." + ], + "prepTime": "5 minutes", + "servings": 1, + "title": "Simple Avocado Mash", + "tips": [ + "For a spicier dish, add a pinch of cayenne pepper.", + "If you don't have fresh cilantro, you can use parsley instead." + ] +} +``` + +### Get Trace Details + +After running a flow, you can retrieve its detailed execution trace using the `get_trace` tool and the `traceId` returned from the flow execution. + +**Example:** + +``` +@genkit:get_trace { "traceId": "ecf38e20f418b2964f7ab472b799" } +``` + +The output will provide a breakdown of the trace, including details about each span, such as input, output, and execution time. + +## Local Development and Documentation Bundle + +The Genkit MCP Server includes a pre-built documentation bundle. If you need to update this bundle or work with custom documentation, the server can download and serve an experimental bundle from `http://genkit.dev/docs-bundle-experimental.json`. + +The documentation bundle is stored locally in `~/.genkit/docs//bundle.json`. + +## Next steps + +- Learn about [developer tools](/unified-docs/developer-tools) for more ways to work with Genkit locally +- Explore [creating flows](/unified-docs/creating-flows) to build flows that can be accessed through the MCP server +- See [observability and monitoring](/unified-docs/observability-monitoring) to understand how traces work in Genkit diff --git a/src/content/docs/unified-docs/model-context-protocol.mdx b/src/content/docs/unified-docs/model-context-protocol.mdx new file mode 100644 index 00000000..19cc6c45 --- /dev/null +++ b/src/content/docs/unified-docs/model-context-protocol.mdx @@ -0,0 +1,722 @@ +--- +title: Model Context Protocol (MCP) +description: Learn how to extend Genkit's capabilities using the Model Context Protocol to connect with external tools, resources, and data sources across JavaScript, Go, and Python. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +The Model Context Protocol (MCP) is an open standard that enables AI applications to securely connect with external tools, resources, and data sources. With Genkit's MCP integration, you can: + +- **Access external tools and data** from MCP servers as a client +- **Expose Genkit capabilities** as an MCP server for other applications +- **Build extensible AI workflows** that leverage external services and APIs +- **Create reusable tool ecosystems** that work across different AI applications + +MCP bridges the gap between your AI models and the external world, enabling more powerful and context-aware applications. + +## Core Concepts + +### MCP Servers and Clients + +- **MCP Server**: Provides tools, resources, and prompts that can be consumed by AI applications +- **MCP Client**: Consumes capabilities from MCP servers (your Genkit application acts as a client) +- **Tools**: Functions that the AI can call to perform actions or retrieve information +- **Resources**: Static or dynamic data sources that provide context to the AI +- **Prompts**: Reusable prompt templates with parameters + +### How MCP Enhances AI Workflows + +MCP enables your AI applications to: + +1. **Access real-time data** from external APIs and databases +2. **Perform actions** in external systems (file operations, API calls, etc.) +3. **Leverage specialized tools** without implementing them from scratch +4. **Share capabilities** between different AI applications +5. **Maintain security** through controlled access to external resources + +## Setting Up MCP with Genkit + + + + Install the MCP plugin: + + ```bash + npm install genkit @genkit-ai/mcp + ``` + + Basic setup with multiple MCP servers: + + ```ts + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import { createMcpHost } from '@genkit-ai/mcp'; + + const ai = genkit({ + plugins: [googleAI()], + }); + + // Create MCP host to manage multiple servers + const mcpHost = createMcpHost({ + name: 'myMcpClients', + mcpServers: { + // Filesystem operations + fs: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-filesystem', process.cwd()], + }, + // Memory/context management + memory: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-memory'], + }, + // Remote HTTP server + weather: { + url: 'https://api.weather.com/mcp', + headers: { + 'Authorization': 'Bearer your-api-key', + }, + }, + }, + }); + ``` + + + Import the MCP package: + + ```bash + go get github.com/firebase/genkit/go/plugins/mcp + ``` + + Basic setup: + + ```go + package main + + import ( + "context" + "log" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/mcp" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx) + if err != nil { + log.Fatal(err) + } + + // Create MCP manager for multiple servers + manager, err := mcp.NewMCPManager(mcp.MCPManagerOptions{ + Name: "my-app", + MCPServers: []mcp.MCPServerConfig{ + { + Name: "filesystem", + Config: mcp.MCPClientOptions{ + Name: "fs-server", + Stdio: &mcp.StdioConfig{ + Command: "npx", + Args: []string{"-y", "@modelcontextprotocol/server-filesystem", "."}, + }, + }, + }, + { + Name: "time", + Config: mcp.MCPClientOptions{ + Name: "time-server", + Stdio: &mcp.StdioConfig{ + Command: "uvx", + Args: []string{"mcp-server-time"}, + }, + }, + }, + }, + }) + if err != nil { + log.Fatal(err) + } + } + ``` + + + Install the MCP plugin: + + ```bash + pip install genkit-mcp + ``` + + Basic setup: + + ```python + import asyncio + from genkit.ai import Genkit + from genkit.plugins.mcp import MCPPlugin + + async def main(): + ai = Genkit( + plugins=[ + MCPPlugin( + name="my-mcp-client", + servers={ + "filesystem": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-filesystem", "."], + }, + "memory": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-memory"], + }, + "weather": { + "url": "https://api.weather.com/mcp", + "headers": { + "Authorization": "Bearer your-api-key", + }, + }, + }, + ), + ], + ) + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + +## Using MCP in AI Workflows + +### Accessing External Tools + +MCP servers provide tools that your AI can use just like any other Genkit tool: + + + + ```ts + // Get all available tools from connected MCP servers + const mcpTools = await mcpHost.getActiveTools(ai); + + // Use MCP tools in generation + const response = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: 'Analyze the files in the current directory and remember key findings for later.', + tools: mcpTools, + }); + + // Get tools from specific servers only + const fsTools = await mcpHost.getActiveTools(ai, ['fs']); + const memoryTools = await mcpHost.getActiveTools(ai, ['memory']); + + const response2 = await ai.generate({ + prompt: 'Read the README file and store its contents in memory.', + tools: [...fsTools, ...memoryTools], + }); + ``` + + + ```go + // Get all tools from all connected servers + tools, err := manager.GetActiveTools(ctx, g) + if err != nil { + log.Fatal(err) + } + + // Use tools in generation + resp, err := genkit.Generate(ctx, g, + ai.WithModel(myModel), + ai.WithPrompt("What time is it and what files are in the current directory?"), + ai.WithTools(tools...), + ) + if err != nil { + log.Fatal(err) + } + + // Get tools from a specific server + timeTool, err := manager.GetTool(ctx, g, "time", "get_current_time") + if err != nil { + log.Fatal(err) + } + ``` + + + ```python + # Get all available tools + tools = await mcp_plugin.get_active_tools() + + # Use tools in generation + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt="What time is it and what files are in the current directory?", + tools=tools, + ) + + # Get tools from specific servers + fs_tools = await mcp_plugin.get_tools_from_server("filesystem") + memory_tools = await mcp_plugin.get_tools_from_server("memory") + + response = await ai.generate( + prompt="Read files and remember important information", + tools=fs_tools + memory_tools, + ) + ``` + + + +### Working with Resources + +MCP resources provide contextual information that can enhance your AI's understanding: + + + + ```ts + // Get resources from MCP servers + const resources = await mcpHost.getActiveResources(ai); + + // Use resources to provide context + const response = await ai.generate({ + prompt: 'Based on the available system information, recommend optimizations.', + resources: resources, + }); + + // Access specific resources + const systemInfo = await mcpHost.getResource('system', 'system://info'); + ``` + + + ```go + // Get resources from MCP servers + resources, err := manager.GetActiveResources(ctx, g) + if err != nil { + log.Fatal(err) + } + + // Use resources in generation + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Analyze the system information and provide recommendations"), + ai.WithResources(resources...), + ) + ``` + + + ```python + # Get resources from MCP servers + resources = await mcp_plugin.get_active_resources() + + # Use resources in generation + response = await ai.generate( + prompt="Analyze the system information and provide recommendations", + resources=resources, + ) + ``` + + + +### Using MCP Prompts + +MCP servers can provide reusable prompt templates: + + + + ```ts + // Get a prompt from an MCP server + const analysisPrompt = await mcpHost.getPrompt('memory', 'analyze_data'); + + // Use the prompt with parameters + const response = await analysisPrompt({ + data: 'user interaction logs', + focus: 'user behavior patterns' + }); + ``` + + + ```go + // Get prompt from specific server + prompt, err := manager.GetPrompt(ctx, g, "memory", "analyze_data", map[string]any{ + "data": "user interaction logs", + "focus": "user behavior patterns", + }) + if err != nil { + log.Fatal(err) + } + ``` + + + ```python + # Get prompt from specific server + prompt = await mcp_plugin.get_prompt("memory", "analyze_data") + + # Use the prompt with parameters + response = await prompt({ + "data": "user interaction logs", + "focus": "user behavior patterns" + }) + ``` + + + +## Building Flows with MCP + +### Example: Document Analysis Workflow + + + + ```ts + const documentAnalysisFlow = ai.defineFlow( + { + name: 'documentAnalysis', + inputSchema: z.object({ + directory: z.string(), + analysisType: z.string(), + }), + outputSchema: z.object({ + summary: z.string(), + insights: z.array(z.string()), + recommendations: z.array(z.string()), + }), + }, + async ({ directory, analysisType }) => { + // Get MCP tools for file operations and memory + const tools = await mcpHost.getActiveTools(ai); + + // Step 1: Analyze documents in the directory + const analysisResult = await ai.generate({ + prompt: `Analyze all documents in ${directory} for ${analysisType}. + Read each file and extract key information.`, + tools: tools, + }); + + // Step 2: Store findings in memory for later reference + await ai.generate({ + prompt: `Store the following analysis results in memory: ${analysisResult.text}`, + tools: tools, + }); + + // Step 3: Generate insights and recommendations + const insights = await ai.generate({ + prompt: `Based on the document analysis, provide key insights and actionable recommendations.`, + tools: tools, + }); + + return { + summary: analysisResult.text, + insights: insights.text.split('\n').filter(line => line.trim()), + recommendations: [], // Parse from insights + }; + } + ); + ``` + + + ```go + type DocumentAnalysisInput struct { + Directory string `json:"directory"` + AnalysisType string `json:"analysis_type"` + } + + type DocumentAnalysisOutput struct { + Summary string `json:"summary"` + Insights []string `json:"insights"` + Recommendations []string `json:"recommendations"` + } + + documentAnalysisFlow := genkit.DefineFlow(g, "documentAnalysis", + func(ctx context.Context, input DocumentAnalysisInput) (DocumentAnalysisOutput, error) { + // Get MCP tools + tools, err := manager.GetActiveTools(ctx, g) + if err != nil { + return DocumentAnalysisOutput{}, err + } + + // Analyze documents + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(fmt.Sprintf("Analyze all documents in %s for %s", + input.Directory, input.AnalysisType)), + ai.WithTools(tools...), + ) + if err != nil { + return DocumentAnalysisOutput{}, err + } + + return DocumentAnalysisOutput{ + Summary: resp.Text(), + Insights: []string{}, // Parse from response + Recommendations: []string{}, // Parse from response + }, nil + }) + ``` + + + ```python + @ai.flow() + async def document_analysis_flow(directory: str, analysis_type: str): + # Get MCP tools + tools = await mcp_plugin.get_active_tools() + + # Step 1: Analyze documents + analysis_result = await ai.generate( + prompt=f"Analyze all documents in {directory} for {analysis_type}. " + f"Read each file and extract key information.", + tools=tools, + ) + + # Step 2: Store findings in memory + await ai.generate( + prompt=f"Store the following analysis results in memory: {analysis_result.text}", + tools=tools, + ) + + # Step 3: Generate insights + insights = await ai.generate( + prompt="Based on the document analysis, provide key insights and recommendations.", + tools=tools, + ) + + return { + "summary": analysis_result.text, + "insights": insights.text.split('\n'), + "recommendations": [] # Parse from insights + } + ``` + + + +## Advanced MCP Patterns + +### Dynamic Server Management + + + + ```ts + // Connect to servers dynamically based on user needs + const connectWeatherServer = async (apiKey: string) => { + await mcpHost.connect('weather', { + url: 'https://api.weather.com/mcp', + headers: { 'Authorization': `Bearer ${apiKey}` }, + }); + }; + + // Disconnect when no longer needed + const disconnectWeatherServer = async () => { + await mcpHost.disconnect('weather'); + }; + + // Check server status + const activeServers = await mcpHost.getActiveServers(); + console.log('Connected servers:', activeServers); + ``` + + + ```go + // Connect to server dynamically + err = manager.Connect("weather", mcp.MCPClientOptions{ + Name: "weather-server", + HTTP: &mcp.HTTPConfig{ + URL: "https://api.weather.com/mcp", + Headers: map[string]string{ + "Authorization": "Bearer " + apiKey, + }, + }, + }) + if err != nil { + log.Fatal(err) + } + + // Disconnect when done + err = manager.Disconnect("weather") + if err != nil { + log.Fatal(err) + } + ``` + + + ```python + # Connect to server dynamically + await mcp_plugin.connect_server("weather", { + "url": "https://api.weather.com/mcp", + "headers": { + "Authorization": f"Bearer {api_key}", + }, + }) + + # Disconnect server + await mcp_plugin.disconnect_server("weather") + + # Check active servers + active_servers = await mcp_plugin.get_active_servers() + print(f"Active servers: {active_servers}") + ``` + + + +### Error Handling and Resilience + + + + ```ts + const robustMcpFlow = ai.defineFlow( + { + name: 'robustMcpFlow', + inputSchema: z.object({ task: z.string() }), + outputSchema: z.string(), + }, + async ({ task }) => { + try { + // Wait for MCP connections to be ready + await mcpHost.ready(); + + // Get available tools with fallback + const tools = await mcpHost.getActiveTools(ai); + + if (tools.length === 0) { + return 'No MCP tools available, proceeding with basic capabilities.'; + } + + const response = await ai.generate({ + prompt: task, + tools: tools, + }); + + return response.text; + + } catch (error) { + console.error('MCP operation failed:', error); + + // Fallback to basic generation without MCP tools + const fallbackResponse = await ai.generate({ + prompt: `${task} (Note: External tools unavailable)`, + }); + + return fallbackResponse.text; + } finally { + // Clean up resources + await mcpHost.close(); + } + } + ); + ``` + + + ```go + robustMcpFlow := genkit.DefineFlow(g, "robustMcpFlow", + func(ctx context.Context, input struct{ Task string }) (string, error) { + // Graceful shutdown + defer func() { + if err := manager.Close(); err != nil { + log.Printf("Error closing MCP manager: %v", err) + } + }() + + // Try to get MCP tools + tools, err := manager.GetActiveTools(ctx, g) + if err != nil { + log.Printf("Failed to get MCP tools: %v", err) + // Fallback to basic generation + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(input.Task + " (Note: External tools unavailable)"), + ) + if err != nil { + return "", err + } + return resp.Text(), nil + } + + // Use MCP tools + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(input.Task), + ai.WithTools(tools...), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + }) + ``` + + + ```python + @ai.flow() + async def robust_mcp_flow(task: str) -> str: + try: + # Wait for MCP connections + await mcp_plugin.ready() + + # Get available tools + tools = await mcp_plugin.get_active_tools() + + if not tools: + return "No MCP tools available, proceeding with basic capabilities." + + response = await ai.generate( + prompt=task, + tools=tools, + ) + + return response.text + + except Exception as error: + print(f"MCP operation failed: {error}") + + # Fallback to basic generation + fallback_response = await ai.generate( + prompt=f"{task} (Note: External tools unavailable)", + ) + + return fallback_response.text + + finally: + # Clean up connections + await mcp_plugin.close() + ``` + + + +## Best Practices + +### Security and Trust + +1. **Validate MCP server sources**: Only connect to trusted MCP servers +2. **Sanitize inputs**: Validate all data passed to MCP tools +3. **Limit permissions**: Run MCP servers with minimal required permissions +4. **Monitor resource usage**: Track memory and CPU usage of MCP processes +5. **Use secure transports**: Prefer HTTPS and authenticated connections + +### Performance Optimization + +1. **Connection pooling**: Reuse MCP connections when possible +2. **Lazy loading**: Connect to servers only when needed +3. **Timeout configuration**: Set appropriate timeouts for MCP operations +4. **Resource cleanup**: Always close connections and clean up resources +5. **Caching**: Cache frequently accessed MCP resources + +### Development and Testing + +1. **Use MCP Inspector**: Test your MCP servers with the official inspector tool +2. **Mock MCP servers**: Create mock servers for testing and development +3. **Error simulation**: Test error handling with unreliable connections +4. **Performance testing**: Measure the impact of MCP operations on your flows +5. **Documentation**: Document your MCP integrations and dependencies + +## Common MCP Servers + +### Official MCP Servers + +- **@modelcontextprotocol/server-filesystem**: File system operations +- **@modelcontextprotocol/server-memory**: Context and memory management +- **@modelcontextprotocol/server-sqlite**: SQLite database operations +- **@modelcontextprotocol/server-git**: Git repository operations + +### Community MCP Servers + +- **Weather APIs**: Real-time weather data +- **Database connectors**: PostgreSQL, MySQL, MongoDB +- **Cloud services**: AWS, GCP, Azure integrations +- **Development tools**: GitHub, Jira, Slack integrations + +## Next Steps + +- Learn about [tool calling](/unified-docs/tool-calling) to understand how MCP tools integrate with Genkit's tool system +- Explore [creating flows](/unified-docs/creating-flows) to build workflows that leverage MCP capabilities +- See the [MCP Server guide](/unified-docs/mcp-server) for creating your own MCP servers +- Check out the [official MCP documentation](https://modelcontextprotocol.io) for more details on the protocol +- Browse the [MCP server registry](https://github.com/modelcontextprotocol/servers) for available servers diff --git a/src/content/docs/unified-docs/multi-agent-systems.mdx b/src/content/docs/unified-docs/multi-agent-systems.mdx new file mode 100644 index 00000000..c8a71e6c --- /dev/null +++ b/src/content/docs/unified-docs/multi-agent-systems.mdx @@ -0,0 +1,699 @@ +--- +title: Building multi-agent systems +description: Learn how to build multi-agent systems in Genkit by delegating tasks to specialized agents, addressing challenges of complex agentic workflows across different languages. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +:::caution[Beta] +This feature of Genkit is in **Beta,** which means it is not yet part of Genkit's stable API. APIs of beta features may change in minor version releases. +::: + +A powerful application of large language models are LLM-powered agents. An agent +is a system that can carry out complex tasks by planning how to break tasks into +smaller ones, and (with the help of [tool calling](/unified-docs/tool-calling)) execute tasks +that interact with external resources such as databases or even physical +devices. + +Multi-agent systems take this concept further by using specialized agents that can delegate tasks to each other, creating more sophisticated and maintainable AI workflows. + +## Availability and Approach + + + + JavaScript provides built-in multi-agent system support through Genkit's prompt-as-tool architecture. You can define specialized agents as prompts and use them as tools in other agents, creating hierarchical delegation patterns. + + Features include: + - Prompt-based agent definitions + - Automatic tool delegation between agents + - Specialized agent contexts and capabilities + - Integration with chat sessions and flows + - Built-in orchestration patterns + + + Go doesn't have built-in multi-agent system APIs. You need to implement agent coordination manually by: + - Creating separate functions or flows for each agent + - Implementing your own delegation logic + - Managing agent state and context manually + - Building coordination patterns using flows and tools + + + Python doesn't have built-in multi-agent system APIs. You need to implement agent coordination manually by: + - Creating separate functions or flows for each agent + - Implementing your own delegation logic + - Managing agent state and context manually + - Building coordination patterns using flows and tools + + + +## Why use multi-agent systems? + +As you build more complex AI applications, you start to run into some problems with single-agent architectures: + +- **Tool overload**: The more tools you add, the more you stretch the model's ability to consistently and correctly employ the right tool for the job. +- **Context switching**: Some tasks might best be served through a more focused back and forth between the user and the agent, rather than by a single tool call. +- **Specialized behavior**: Some tasks might benefit from a specialized prompt. For example, if your agent is responding to an unhappy customer, you might want its tone to be more business-like, whereas the agent that greets the customer initially can have a more friendly and lighthearted tone. + +Multi-agent systems address these issues by creating specialized agents that can delegate tasks to each other. + +## Single agent example + +Let's start with a simple customer service agent to understand the progression to multi-agent systems: + + + + Here are some excerpts from a very simple customer service agent built using a single prompt and several tools: + + ```typescript + const menuLookupTool = ai.defineTool( + { + name: 'menuLookupTool', + description: 'use this tool to look up the menu for a given date', + inputSchema: z.object({ + date: z.string().describe('the date to look up the menu for'), + }), + outputSchema: z.string().describe('the menu for a given date'), + }, + async (input) => { + // Retrieve the menu from a database, website, etc. + // ... + }, + ); + + const reservationTool = ai.defineTool( + { + name: 'reservationTool', + description: 'use this tool to try to book a reservation', + inputSchema: z.object({ + partySize: z.coerce.number().describe('the number of guests'), + date: z.string().describe('the date to book for'), + }), + outputSchema: z + .string() + .describe( + "true if the reservation was successfully booked and false if there's" + + ' no table available for the requested time', + ), + }, + async (input) => { + // Access your database to try to make the reservation. + // ... + }, + ); + + const chat = ai.chat({ + model: googleAI.model('gemini-2.5-flash'), + system: + "You are an AI customer service agent for Pavel's Cafe. Use the tools " + + 'available to you to help the customer. If you cannot help the ' + + 'customer with the available tools, politely explain so.', + tools: [menuLookupTool, reservationTool], + }); + ``` + + + In Go, you would implement a single agent using flows and tools: + + ```go + func menuLookupTool(ctx context.Context, input MenuLookupInput) (string, error) { + // Retrieve the menu from a database, website, etc. + return getMenuForDate(input.Date) + } + + func reservationTool(ctx context.Context, input ReservationInput) (string, error) { + // Access your database to try to make the reservation + success, err := makeReservation(input.PartySize, input.Date) + if err != nil { + return "", err + } + if success { + return "true", nil + } + return "false", nil + } + + func customerServiceAgent(ctx context.Context, userInput string) (string, error) { + // Build a prompt that includes available tools and user input + prompt := fmt.Sprintf(`You are an AI customer service agent for Pavel's Cafe. + Available tools: menuLookup, makeReservation + User: %s + + How can I help you today?`, userInput) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(prompt), + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithTools(menuLookupTool, reservationTool), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + ``` + + + In Python, you would implement a single agent using flows and tools: + + ```python + @ai.tool() + def menu_lookup_tool(date: str, ctx) -> str: + """Use this tool to look up the menu for a given date""" + # Retrieve the menu from a database, website, etc. + return get_menu_for_date(date) + + @ai.tool() + def reservation_tool(party_size: int, date: str, ctx) -> str: + """Use this tool to try to book a reservation""" + # Access your database to try to make the reservation + success = make_reservation(party_size, date) + return "true" if success else "false" + + @ai.flow() + async def customer_service_agent(user_input: str, ctx): + """AI customer service agent for Pavel's Cafe""" + + response = await ai.generate( + prompt=f"""You are an AI customer service agent for Pavel's Cafe. + Use the tools available to you to help the customer. + + User: {user_input}""", + tools=['menu_lookup_tool', 'reservation_tool'], + model=google_genai_name('gemini-2.5-flash'), + ) + + return response.text + ``` + + + +## Multi-agent architecture + + + + One approach you can use to deal with the issues that arise when building + complex agents is to create many specialized agents and use a general purpose + agent to delegate tasks to them. Genkit supports this architecture by allowing + you to specify prompts as tools. Each prompt represents a single specialized + agent, with its own set of tools available to it, and those agents are in turn + available as tools to your single orchestration agent, which is the primary + interface with the user. + + Here's what an expanded version of the previous example might look like as a + multi-agent system: + + ```typescript + // Define a prompt that represents a specialist agent + const reservationAgent = ai.definePrompt({ + name: 'reservationAgent', + description: 'Reservation Agent can help manage guest reservations', + tools: [reservationTool, reservationCancelationTool, reservationListTool], + system: 'Help guests make and manage reservations', + }); + + // Or load agents from .prompt files + const menuInfoAgent = ai.prompt('menuInfoAgent'); + const complaintAgent = ai.prompt('complaintAgent'); + + // The triage agent is the agent that users interact with initially + const triageAgent = ai.definePrompt({ + name: 'triageAgent', + description: 'Triage Agent', + tools: [reservationAgent, menuInfoAgent, complaintAgent], + system: `You are an AI customer service agent for Pavel's Cafe. + Greet the user and ask them how you can help. If appropriate, transfer to an + agent that can better handle the request. If you cannot help the customer with + the available tools, politely explain so.`, + }); + + // Start a chat session, initially with the triage agent + const chat = ai.chat(triageAgent); + ``` + + + In Go, you can implement multi-agent patterns by creating separate flows for each agent and implementing delegation logic: + + ```go + // Define specialized agent functions + func reservationAgent(ctx context.Context, userInput string) (string, error) { + prompt := fmt.Sprintf(`You are a reservation specialist for Pavel's Cafe. + Help guests make and manage reservations. + + User: %s`, userInput) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(prompt), + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithTools(reservationTool, reservationCancelationTool, reservationListTool), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + + func menuInfoAgent(ctx context.Context, userInput string) (string, error) { + prompt := fmt.Sprintf(`You are a menu information specialist for Pavel's Cafe. + Help guests with menu questions and dietary information. + + User: %s`, userInput) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(prompt), + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithTools(menuLookupTool, allergyInfoTool), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + + func complaintAgent(ctx context.Context, userInput string) (string, error) { + prompt := fmt.Sprintf(`You are a customer service specialist for Pavel's Cafe. + Handle customer complaints with empathy and professionalism. + + User: %s`, userInput) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(prompt), + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithTools(refundTool, managerContactTool), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + + // Triage agent that delegates to specialists + func triageAgent(ctx context.Context, userInput string) (string, error) { + // First, determine which agent should handle this request + classificationPrompt := fmt.Sprintf(`Classify this customer request: + User: %s + + Categories: + - reservation: booking, canceling, or modifying reservations + - menu: questions about food, ingredients, or menu items + - complaint: issues, problems, or complaints + - general: greeting or general questions + + Respond with just the category name.`, userInput) + + classResp, err := genkit.Generate(ctx, g, + ai.WithPrompt(classificationPrompt), + ai.WithModelName("googleai/gemini-2.5-flash"), + ) + if err != nil { + return "", err + } + + category := strings.ToLower(strings.TrimSpace(classResp.Text())) + + // Delegate to the appropriate specialist agent + switch category { + case "reservation": + return reservationAgent(ctx, userInput) + case "menu": + return menuInfoAgent(ctx, userInput) + case "complaint": + return complaintAgent(ctx, userInput) + default: + // Handle general queries directly + return generalGreeting(ctx, userInput) + } + } + ``` + + + In Python, you can implement multi-agent patterns by creating separate flows for each agent: + + ```python + @ai.flow() + async def reservation_agent(user_input: str, ctx): + """Reservation specialist for Pavel's Cafe""" + response = await ai.generate( + prompt=f"""You are a reservation specialist for Pavel's Cafe. + Help guests make and manage reservations. + + User: {user_input}""", + tools=['reservation_tool', 'reservation_cancelation_tool', 'reservation_list_tool'], + model=google_genai_name('gemini-2.5-flash'), + ) + return response.text + + @ai.flow() + async def menu_info_agent(user_input: str, ctx): + """Menu information specialist for Pavel's Cafe""" + response = await ai.generate( + prompt=f"""You are a menu information specialist for Pavel's Cafe. + Help guests with menu questions and dietary information. + + User: {user_input}""", + tools=['menu_lookup_tool', 'allergy_info_tool'], + model=google_genai_name('gemini-2.5-flash'), + ) + return response.text + + @ai.flow() + async def complaint_agent(user_input: str, ctx): + """Customer service specialist for Pavel's Cafe""" + response = await ai.generate( + prompt=f"""You are a customer service specialist for Pavel's Cafe. + Handle customer complaints with empathy and professionalism. + + User: {user_input}""", + tools=['refund_tool', 'manager_contact_tool'], + model=google_genai_name('gemini-2.5-flash'), + ) + return response.text + + @ai.flow() + async def triage_agent(user_input: str, ctx): + """Triage agent that delegates to specialists""" + + # First, classify the request + classification_response = await ai.generate( + prompt=f"""Classify this customer request: + User: {user_input} + + Categories: + - reservation: booking, canceling, or modifying reservations + - menu: questions about food, ingredients, or menu items + - complaint: issues, problems, or complaints + - general: greeting or general questions + + Respond with just the category name.""", + model=google_genai_name('gemini-2.5-flash'), + ) + + category = classification_response.text.strip().lower() + + # Delegate to the appropriate specialist agent + if category == "reservation": + return await reservation_agent(user_input, ctx) + elif category == "menu": + return await menu_info_agent(user_input, ctx) + elif category == "complaint": + return await complaint_agent(user_input, ctx) + else: + # Handle general queries directly + return await general_greeting(user_input, ctx) + ``` + + + +## Agent coordination patterns + + + + ### Hierarchical delegation + + The most common pattern is hierarchical delegation, where a triage agent routes requests to specialized agents: + + ```typescript + const triageAgent = ai.definePrompt({ + name: 'triageAgent', + description: 'Routes customer requests to appropriate specialists', + tools: [reservationAgent, menuAgent, supportAgent], + system: `You are a customer service triage agent. + Analyze the customer's request and delegate to the most appropriate specialist. + Always explain to the customer which specialist you're connecting them with.`, + }); + ``` + + ### Collaborative agents + + Agents can also work together on complex tasks: + + ```typescript + const researchAgent = ai.definePrompt({ + name: 'researchAgent', + description: 'Researches information for complex queries', + tools: [webSearchTool, databaseQueryTool], + system: 'Research and gather information to answer complex questions.', + }); + + const analysisAgent = ai.definePrompt({ + name: 'analysisAgent', + description: 'Analyzes research data and provides insights', + tools: [researchAgent, calculatorTool], + system: 'Analyze research data and provide clear, actionable insights.', + }); + ``` + + ### Sequential workflows + + You can create agents that work in sequence: + + ```typescript + const orderProcessingFlow = ai.defineFlow({ + name: 'orderProcessingFlow', + inputSchema: z.object({ order: z.string() }), + outputSchema: z.string(), + }, async (input) => { + // Step 1: Validate order + const validation = await validationAgent({ order: input.order }); + + if (!validation.isValid) { + return validation.errorMessage; + } + + // Step 2: Process payment + const payment = await paymentAgent({ + order: input.order, + validatedData: validation.data + }); + + // Step 3: Fulfill order + const fulfillment = await fulfillmentAgent({ + order: input.order, + paymentConfirmation: payment.confirmation + }); + + return fulfillment.trackingNumber; + }); + ``` + + + ### Hierarchical delegation + + Implement delegation through function routing: + + ```go + type AgentRouter struct { + agents map[string]func(context.Context, string) (string, error) + } + + func NewAgentRouter() *AgentRouter { + return &AgentRouter{ + agents: map[string]func(context.Context, string) (string, error){ + "reservation": reservationAgent, + "menu": menuInfoAgent, + "complaint": complaintAgent, + }, + } + } + + func (r *AgentRouter) Route(ctx context.Context, category, userInput string) (string, error) { + agent, exists := r.agents[category] + if !exists { + return "I'm sorry, I don't have a specialist for that type of request.", nil + } + + return agent(ctx, userInput) + } + ``` + + ### Sequential workflows + + Use flows to coordinate multiple agents: + + ```go + func orderProcessingFlow(ctx context.Context, order OrderInput) (string, error) { + // Step 1: Validate order + validation, err := validationAgent(ctx, order.Details) + if err != nil { + return "", err + } + + if !validation.IsValid { + return validation.ErrorMessage, nil + } + + // Step 2: Process payment + payment, err := paymentAgent(ctx, PaymentInput{ + Order: order.Details, + ValidatedData: validation.Data, + }) + if err != nil { + return "", err + } + + // Step 3: Fulfill order + fulfillment, err := fulfillmentAgent(ctx, FulfillmentInput{ + Order: order.Details, + PaymentConfirmation: payment.Confirmation, + }) + if err != nil { + return "", err + } + + return fulfillment.TrackingNumber, nil + } + ``` + + + ### Hierarchical delegation + + Implement delegation through flow routing: + + ```python + class AgentRouter: + def __init__(self): + self.agents = { + "reservation": reservation_agent, + "menu": menu_info_agent, + "complaint": complaint_agent, + } + + async def route(self, category: str, user_input: str, ctx): + agent = self.agents.get(category) + if not agent: + return "I'm sorry, I don't have a specialist for that type of request." + + return await agent(user_input, ctx) + + router = AgentRouter() + + @ai.flow() + async def triage_with_routing(user_input: str, ctx): + # Classify the request + classification = await classify_request(user_input, ctx) + + # Route to appropriate agent + return await router.route(classification, user_input, ctx) + ``` + + ### Sequential workflows + + Use flows to coordinate multiple agents: + + ```python + @ai.flow() + async def order_processing_flow(order_details: str, ctx): + # Step 1: Validate order + validation = await validation_agent(order_details, ctx) + + if not validation.get('is_valid'): + return validation.get('error_message') + + # Step 2: Process payment + payment = await payment_agent({ + 'order': order_details, + 'validated_data': validation.get('data') + }, ctx) + + # Step 3: Fulfill order + fulfillment = await fulfillment_agent({ + 'order': order_details, + 'payment_confirmation': payment.get('confirmation') + }, ctx) + + return fulfillment.get('tracking_number') + ``` + + + +## Best practices + +### Agent design principles + + + + - **Single responsibility**: Each agent should have a clear, focused purpose + - **Clear interfaces**: Define clear input/output schemas for agent communication + - **Graceful delegation**: Always explain to users when transferring between agents + - **Error handling**: Implement fallback strategies when specialist agents fail + - **Context preservation**: Maintain conversation context across agent transfers + + + - **Modular design**: Keep agent functions focused and composable + - **Error propagation**: Handle errors gracefully and provide meaningful feedback + - **State management**: Carefully manage state between agent calls + - **Resource efficiency**: Avoid unnecessary agent calls through smart routing + - **Testing**: Test individual agents and coordination logic separately + + + - **Flow composition**: Use flows to create reusable agent patterns + - **Type safety**: Use proper type hints for agent inputs and outputs + - **Async patterns**: Leverage async/await for efficient agent coordination + - **Error handling**: Implement comprehensive error handling and recovery + - **Monitoring**: Add logging and metrics to track agent performance + + + +### Performance considerations + + + + - **Minimize agent hops**: Avoid unnecessary delegation chains + - **Cache agent responses**: Cache responses for repeated queries + - **Parallel execution**: Use Promise.all() for independent agent calls + - **Context size**: Keep agent contexts focused to reduce token usage + - **Tool selection**: Provide only relevant tools to each agent + + + - **Concurrent execution**: Use goroutines for parallel agent processing + - **Connection pooling**: Reuse connections for agent communications + - **Memory management**: Be mindful of memory usage in long-running agent systems + - **Timeout handling**: Implement timeouts for agent calls + - **Resource limits**: Set appropriate limits on agent execution + + + - **Async coordination**: Use asyncio for efficient agent orchestration + - **Resource pooling**: Pool expensive resources across agents + - **Memory optimization**: Monitor memory usage in complex agent workflows + - **Caching strategies**: Implement intelligent caching for agent responses + - **Load balancing**: Distribute agent workloads appropriately + + + +### Security and safety + + + + - **Agent isolation**: Ensure agents can only access their designated tools + - **Input validation**: Validate all inputs before passing between agents + - **Permission boundaries**: Define clear permission boundaries for each agent + - **Audit trails**: Log agent interactions for debugging and compliance + - **Rate limiting**: Implement rate limiting to prevent agent abuse + + + - **Access control**: Implement proper access controls for agent functions + - **Input sanitization**: Sanitize all inputs to prevent injection attacks + - **Resource limits**: Set limits on agent resource consumption + - **Logging**: Implement comprehensive logging for agent activities + - **Validation**: Validate agent outputs before using them + + + - **Flow security**: Secure flow execution and data passing + - **Input validation**: Validate all agent inputs and outputs + - **Access patterns**: Control access to sensitive operations + - **Monitoring**: Monitor agent behavior for anomalies + - **Sandboxing**: Consider sandboxing for untrusted agent code + + + +## Next steps + +- Learn about [tool calling](/unified-docs/tool-calling) to understand how agents interact with external systems +- Explore [chat sessions](/unified-docs/chat-sessions) to build conversational multi-agent experiences +- See [creating flows](/unified-docs/creating-flows) for building complex agent workflows +- Check out [context](/unified-docs/context) for managing information flow between agents +- Review [developer tools](/unified-docs/developer-tools) for testing and debugging multi-agent systems diff --git a/src/content/docs/unified-docs/observability-monitoring.mdx b/src/content/docs/unified-docs/observability-monitoring.mdx new file mode 100644 index 00000000..a9579a0c --- /dev/null +++ b/src/content/docs/unified-docs/observability-monitoring.mdx @@ -0,0 +1,566 @@ +--- +title: Observability and Monitoring +description: Learn how to monitor and observe your Genkit AI workflows across JavaScript, Go, and Python, including local development tools, production monitoring, and OpenTelemetry integration. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +Genkit provides comprehensive observability features to help you understand, debug, and optimize your AI workflows. Whether you're developing locally or running in production, Genkit offers the tools you need to monitor performance, trace execution, and troubleshoot issues. + +## Overview + +Genkit's observability stack includes: + +- **Local Development Tools**: Built-in tracing and debugging via the Developer UI +- **Production Monitoring**: Firebase Genkit Monitoring dashboard for deployed applications +- **OpenTelemetry Integration**: Export telemetry data to any observability platform +- **Centralized Logging**: Structured logging with automatic export capabilities +- **Metrics Collection**: Performance metrics, error rates, and usage statistics + +## Local Development Observability + +### Developer UI Tracing + +During development, Genkit automatically collects traces and provides detailed debugging capabilities through the Developer UI: + + + + The Developer UI is automatically available when you run: + + ```bash + npx genkit start + ``` + + Features include: + - **Step-by-step trace inspection**: See every step of your flow execution + - **Input/output logging**: Examine data flowing through each step + - **Performance metrics**: View latency and execution statistics + - **Error debugging**: Detailed error information and stack traces + - **Flow testing**: Run and test flows directly from the UI + + + The trace store feature is enabled automatically in development environments: + + ```bash + genkit start + # or + genkit flow:run + ``` + + The Developer UI provides: + - **Flow execution traces**: Complete visibility into flow steps + - **Input/output inspection**: Debug data transformations + - **Performance analysis**: Identify bottlenecks and optimization opportunities + - **Interactive testing**: Test flows with different inputs + + + Development observability is built into the Genkit runtime: + + ```bash + # Development server with tracing + python -m genkit start + ``` + + Available features: + - **Automatic trace collection**: No configuration required + - **Real-time debugging**: Inspect flows as they execute + - **Data flow visualization**: See how data moves through your workflow + - **Error analysis**: Detailed error reporting and debugging + + + +### Local Logging + +Genkit provides a centralized logging system that integrates with the observability stack: + + + + ```ts + import { logger } from 'genkit/logging'; + + // Configure log level + logger.setLogLevel('debug'); + + // Use in your flows + export const myFlow = ai.defineFlow( + { name: 'myFlow' }, + async (input) => { + logger.info('Flow started', { input }); + + try { + const result = await processData(input); + logger.info('Flow completed successfully', { result }); + return result; + } catch (error) { + logger.error('Flow failed', { error: error.message }); + throw error; + } + } + ); + ``` + + + ```go + import ( + "context" + "log/slog" + "github.com/firebase/genkit/go/genkit" + ) + + func myFlow(ctx context.Context, input string) (string, error) { + slog.InfoContext(ctx, "Flow started", "input", input) + + result, err := processData(input) + if err != nil { + slog.ErrorContext(ctx, "Flow failed", "error", err) + return "", err + } + + slog.InfoContext(ctx, "Flow completed", "result", result) + return result, nil + } + ``` + + + ```python + import logging + from genkit.ai import Genkit + + # Configure logging + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger(__name__) + + ai = Genkit() + + @ai.flow() + async def my_flow(input: str) -> str: + logger.info(f"Flow started with input: {input}") + + try: + result = await process_data(input) + logger.info(f"Flow completed successfully: {result}") + return result + except Exception as error: + logger.error(f"Flow failed: {error}") + raise + ``` + + + +## Production Monitoring + +### Firebase Genkit Monitoring + +For production deployments, Genkit integrates with Firebase to provide comprehensive monitoring through the Genkit Monitoring dashboard. + +#### Setup and Configuration + + + + **1. Install the Firebase plugin:** + + ```bash + npm install @genkit-ai/firebase + ``` + + **2. Environment-based configuration:** + + ```bash + export ENABLE_FIREBASE_MONITORING=true + ``` + + **3. Programmatic configuration:** + + ```ts + import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; + + // Basic setup + enableFirebaseTelemetry(); + + // Advanced configuration + enableFirebaseTelemetry({ + forceDevExport: false, + metricExportIntervalMillis: 300_000, // 5 minutes + disableLoggingInputAndOutput: false, + disableMetrics: false, + disableTraces: false, + }); + ``` + + + **1. Install the Google Cloud plugin:** + + ```bash + go get github.com/firebase/genkit/go/plugins/googlecloud + ``` + + **2. Configure telemetry export:** + + ```go + import ( + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{}), + ) + if err != nil { + log.Fatal(err) + } + + // Telemetry is automatically configured + } + ``` + + + **1. Install monitoring dependencies:** + + ```bash + pip install genkit[monitoring] + ``` + + **2. Configure telemetry:** + + ```python + from genkit.ai import Genkit + from genkit.monitoring import enable_firebase_monitoring + + # Enable monitoring + enable_firebase_monitoring() + + ai = Genkit() + ``` + + + +#### Required Google Cloud APIs + +Enable these APIs in your Google Cloud project: + +- [Cloud Logging API](https://console.cloud.google.com/apis/library/logging.googleapis.com) +- [Cloud Trace API](https://console.cloud.google.com/apis/library/cloudtrace.googleapis.com) +- [Cloud Monitoring API](https://console.cloud.google.com/apis/library/monitoring.googleapis.com) + +#### IAM Permissions + +Grant these roles to your service account: + +- **Monitoring Metric Writer** (`roles/monitoring.metricWriter`) +- **Cloud Trace Agent** (`roles/cloudtrace.agent`) +- **Logs Writer** (`roles/logging.logWriter`) + +### Monitoring Dashboard Features + +The [Genkit Monitoring dashboard](https://console.firebase.google.com/project/_/genai_monitoring) provides: + +- **Performance Metrics**: Latency, throughput, and error rates +- **Usage Analytics**: Token consumption, model usage, and cost tracking +- **Trace Inspection**: Detailed execution traces with input/output logging +- **Error Analysis**: Error patterns, failure rates, and debugging information +- **Custom Metrics**: Application-specific metrics and KPIs + +## OpenTelemetry Integration + +Genkit is fully instrumented with [OpenTelemetry](https://opentelemetry.io/), allowing you to export telemetry data to any compatible observability platform. + +### Custom OpenTelemetry Configuration + + + + ```ts + import { NodeSDK } from '@opentelemetry/sdk-node'; + import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node'; + import { JaegerExporter } from '@opentelemetry/exporter-jaeger'; + + // Custom OpenTelemetry setup + const sdk = new NodeSDK({ + traceExporter: new JaegerExporter({ + endpoint: 'http://localhost:14268/api/traces', + }), + instrumentations: [getNodeAutoInstrumentations()], + }); + + sdk.start(); + + // Your Genkit app + import { genkit } from 'genkit'; + const ai = genkit({ /* config */ }); + ``` + + + ```go + import ( + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/jaeger" + "go.opentelemetry.io/otel/sdk/trace" + ) + + func setupTelemetry() { + // Create Jaeger exporter + exp, err := jaeger.New(jaeger.WithCollectorEndpoint( + jaeger.WithEndpoint("http://localhost:14268/api/traces"), + )) + if err != nil { + log.Fatal(err) + } + + // Create trace provider + tp := trace.NewTracerProvider( + trace.WithBatcher(exp), + ) + + otel.SetTracerProvider(tp) + } + ``` + + + ```python + from opentelemetry import trace + from opentelemetry.exporter.jaeger.thrift import JaegerExporter + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import BatchSpanProcessor + + # Configure OpenTelemetry + trace.set_tracer_provider(TracerProvider()) + tracer = trace.get_tracer(__name__) + + jaeger_exporter = JaegerExporter( + agent_host_name="localhost", + agent_port=6831, + ) + + span_processor = BatchSpanProcessor(jaeger_exporter) + trace.get_tracer_provider().add_span_processor(span_processor) + ``` + + + +### Popular Observability Platforms + +Genkit's OpenTelemetry integration works with: + +- **Jaeger**: Distributed tracing +- **Zipkin**: Request tracing and timing data +- **Prometheus**: Metrics collection and alerting +- **Grafana**: Visualization and dashboards +- **Datadog**: Full-stack monitoring +- **New Relic**: Application performance monitoring +- **Honeycomb**: Observability for complex systems + +## Advanced Configuration + +### Sampling and Performance + +Control telemetry collection to balance observability with performance: + + + + ```ts + import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; + import { TraceIdRatioBasedSampler } from '@opentelemetry/sdk-trace-base'; + + enableFirebaseTelemetry({ + // Sample 10% of traces + sampler: new TraceIdRatioBasedSampler(0.1), + + // Reduce export frequency for high-volume apps + metricExportIntervalMillis: 600_000, // 10 minutes + + // Disable input/output logging for sensitive data + disableLoggingInputAndOutput: true, + + // Custom auto-instrumentation + autoInstrumentationConfig: { + '@opentelemetry/instrumentation-fs': { enabled: false }, + '@opentelemetry/instrumentation-dns': { enabled: false }, + }, + }); + ``` + + + ```go + import ( + "go.opentelemetry.io/otel/sdk/trace" + ) + + func setupSampling() { + // Sample 10% of traces + sampler := trace.TraceIDRatioBased(0.1) + + tp := trace.NewTracerProvider( + trace.WithSampler(sampler), + // Other configuration... + ) + + otel.SetTracerProvider(tp) + } + ``` + + + ```python + from opentelemetry.sdk.trace.sampling import TraceIdRatioBasedSampler + + # Configure sampling + sampler = TraceIdRatioBasedSampler(0.1) # 10% sampling + + trace_provider = TracerProvider(sampler=sampler) + trace.set_tracer_provider(trace_provider) + ``` + + + +### Custom Metrics + +Add application-specific metrics to your observability stack: + + + + ```ts + import { metrics } from '@opentelemetry/api'; + + const meter = metrics.getMeter('my-genkit-app'); + const requestCounter = meter.createCounter('genkit_requests_total'); + const responseTime = meter.createHistogram('genkit_response_time'); + + export const myFlow = ai.defineFlow( + { name: 'myFlow' }, + async (input) => { + const startTime = Date.now(); + requestCounter.add(1, { flow: 'myFlow' }); + + try { + const result = await processData(input); + responseTime.record(Date.now() - startTime, { + flow: 'myFlow', + status: 'success' + }); + return result; + } catch (error) { + responseTime.record(Date.now() - startTime, { + flow: 'myFlow', + status: 'error' + }); + throw error; + } + } + ); + ``` + + + ```go + import ( + "go.opentelemetry.io/otel/metric" + ) + + func setupMetrics() { + meter := otel.Meter("my-genkit-app") + + requestCounter, _ := meter.Int64Counter("genkit_requests_total") + responseTime, _ := meter.Float64Histogram("genkit_response_time") + + // Use in your flows + requestCounter.Add(ctx, 1, metric.WithAttributes( + attribute.String("flow", "myFlow"), + )) + } + ``` + + + ```python + from opentelemetry import metrics + + meter = metrics.get_meter(__name__) + request_counter = meter.create_counter("genkit_requests_total") + response_time = meter.create_histogram("genkit_response_time") + + @ai.flow() + async def my_flow(input: str) -> str: + start_time = time.time() + request_counter.add(1, {"flow": "my_flow"}) + + try: + result = await process_data(input) + response_time.record( + time.time() - start_time, + {"flow": "my_flow", "status": "success"} + ) + return result + except Exception as error: + response_time.record( + time.time() - start_time, + {"flow": "my_flow", "status": "error"} + ) + raise + ``` + + + +## Troubleshooting + +### Common Issues + +**Metrics not appearing in dashboard:** +- Verify API permissions and service account roles +- Check that required Google Cloud APIs are enabled +- Ensure `metricExportIntervalMillis` isn't too high +- Confirm network connectivity to Google Cloud services + +**High telemetry costs:** +- Implement sampling to reduce data volume +- Disable input/output logging for large payloads +- Increase export intervals for non-critical environments +- Use trace sampling for high-traffic applications + +**Missing traces in production:** +- Verify OpenTelemetry configuration +- Check service account permissions +- Ensure trace export is enabled +- Validate network connectivity + +### Performance Optimization + +**Reduce telemetry overhead:** +- Use appropriate sampling rates +- Disable unnecessary auto-instrumentations +- Batch exports efficiently +- Monitor telemetry export performance + +**Optimize for production:** +- Disable development-only features +- Use environment-specific configurations +- Implement circuit breakers for telemetry exports +- Monitor resource usage + +## Best Practices + +### Development + +1. **Use the Developer UI**: Take advantage of built-in tracing during development +2. **Test monitoring setup**: Verify telemetry collection before deploying +3. **Monitor resource usage**: Ensure observability doesn't impact performance +4. **Implement health checks**: Monitor the health of your monitoring system + +### Production + +1. **Implement alerting**: Set up alerts for critical metrics and errors +2. **Use dashboards**: Create custom dashboards for your specific use cases +3. **Monitor costs**: Track telemetry costs and optimize as needed +4. **Regular reviews**: Regularly review and optimize your observability setup + +### Security + +1. **Protect sensitive data**: Disable input/output logging for sensitive information +2. **Secure credentials**: Use proper IAM roles and service accounts +3. **Network security**: Ensure secure connections to observability platforms +4. **Data retention**: Configure appropriate data retention policies + +## Next Steps + +- Set up [Firebase Genkit Monitoring](https://console.firebase.google.com/project/_/genai_monitoring) for your production applications +- Explore [OpenTelemetry documentation](https://opentelemetry.io/docs/) for advanced configurations +- Learn about [evaluation](/unified-docs/evaluation) to complement your monitoring strategy +- Check out [deployment guides](/unified-docs/deployment) for production-ready configurations diff --git a/src/content/docs/unified-docs/observability/advanced-configuration.mdx b/src/content/docs/unified-docs/observability/advanced-configuration.mdx new file mode 100644 index 00000000..5409d359 --- /dev/null +++ b/src/content/docs/unified-docs/observability/advanced-configuration.mdx @@ -0,0 +1,1107 @@ +--- +title: Advanced Configuration +description: Learn advanced configuration options for Genkit observability, including sampling, performance tuning, custom metrics, and telemetry optimization across JavaScript, Go, and Python. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +This guide covers advanced configuration options for fine-tuning Genkit's observability features to optimize performance, control costs, and customize telemetry collection. + +## Configuration Overview + +Genkit's observability system can be configured at multiple levels: + +- **Global Settings**: Apply to all telemetry collection +- **Language-Specific Options**: Platform-specific configurations +- **Runtime Controls**: Dynamic configuration changes +- **Environment Variables**: External configuration management + +## Default Configuration + +Understanding the default settings helps you make informed configuration decisions: + + + + ```ts + // Default Firebase telemetry configuration + { + autoInstrumentation: true, + autoInstrumentationConfig: { + '@opentelemetry/instrumentation-dns': { enabled: false }, + }, + disableMetrics: false, + disableTraces: false, + disableLoggingInputAndOutput: false, + forceDevExport: false, + metricExportIntervalMillis: 300_000, // 5 minutes + metricExportTimeoutMillis: 300_000, // 5 minutes + sampler: AlwaysOnSampler(), // 100% sampling + } + ``` + + + ```go + // Default configuration is handled by the Google Cloud plugin + // Telemetry is automatically configured with sensible defaults + type TelemetryConfig struct { + ProjectID string + EnableTracing bool // true + EnableMetrics bool // true + EnableLogging bool // true + SamplingRate float64 // 1.0 (100%) + ExportInterval time.Duration // 5 minutes + } + ``` + + + ```python + # Default monitoring configuration + default_config = { + 'enable_tracing': True, + 'enable_metrics': True, + 'enable_logging': True, + 'sampling_rate': 1.0, # 100% sampling + 'export_interval': 300, # 5 minutes + 'disable_input_output_logging': False, + } + ``` + + + +## Sampling Configuration + +Sampling reduces telemetry volume and costs while maintaining observability: + +### Trace Sampling + + + + ```ts + import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; + import { + TraceIdRatioBasedSampler, + ParentBasedSampler, + AlwaysOffSampler, + AlwaysOnSampler + } from '@opentelemetry/sdk-trace-base'; + + // Sample 10% of traces + enableFirebaseTelemetry({ + sampler: new TraceIdRatioBasedSampler(0.1), + }); + + // Parent-based sampling (inherit from parent span) + enableFirebaseTelemetry({ + sampler: new ParentBasedSampler({ + root: new TraceIdRatioBasedSampler(0.1), + }), + }); + + // Environment-based sampling + const samplingRate = process.env.NODE_ENV === 'production' ? 0.1 : 1.0; + enableFirebaseTelemetry({ + sampler: new TraceIdRatioBasedSampler(samplingRate), + }); + ``` + + + ```go + import ( + "go.opentelemetry.io/otel/sdk/trace" + ) + + func configureSampling() { + // 10% sampling + sampler := trace.TraceIDRatioBased(0.1) + + // Parent-based sampling + parentSampler := trace.ParentBased( + trace.TraceIDRatioBased(0.1), + ) + + // Environment-based sampling + var samplingRate float64 = 1.0 + if os.Getenv("ENVIRONMENT") == "production" { + samplingRate = 0.1 + } + + tp := trace.NewTracerProvider( + trace.WithSampler(trace.TraceIDRatioBased(samplingRate)), + ) + + otel.SetTracerProvider(tp) + } + ``` + + + ```python + from opentelemetry.sdk.trace.sampling import ( + TraceIdRatioBasedSampler, + ParentBased, + ALWAYS_OFF, + ALWAYS_ON + ) + + # 10% sampling + sampler = TraceIdRatioBasedSampler(0.1) + + # Parent-based sampling + parent_sampler = ParentBased( + root=TraceIdRatioBasedSampler(0.1) + ) + + # Environment-based sampling + import os + sampling_rate = 0.1 if os.getenv('ENVIRONMENT') == 'production' else 1.0 + + enable_firebase_monitoring( + sampling_rate=sampling_rate + ) + ``` + + + +### Custom Sampling Strategies + +Implement custom sampling logic based on your application needs: + + + + ```ts + import { Sampler, SamplingResult, SamplingDecision } from '@opentelemetry/sdk-trace-base'; + + class CustomSampler implements Sampler { + shouldSample(context, traceId, spanName, spanKind, attributes, links) { + // Sample all error traces + if (attributes['error'] === true) { + return { decision: SamplingDecision.RECORD_AND_SAMPLE }; + } + + // Sample 50% of flow executions + if (spanName.includes('flow:')) { + return Math.random() < 0.5 + ? { decision: SamplingDecision.RECORD_AND_SAMPLE } + : { decision: SamplingDecision.NOT_RECORD }; + } + + // Sample 10% of everything else + return Math.random() < 0.1 + ? { decision: SamplingDecision.RECORD_AND_SAMPLE } + : { decision: SamplingDecision.NOT_RECORD }; + } + + toString() { + return 'CustomSampler'; + } + } + + enableFirebaseTelemetry({ + sampler: new CustomSampler(), + }); + ``` + + + ```go + import ( + "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/trace" + ) + + type CustomSampler struct{} + + func (s CustomSampler) ShouldSample(p trace.SamplingParameters) trace.SamplingResult { + // Sample all error spans + if p.Attributes != nil { + for _, attr := range p.Attributes { + if attr.Key == "error" && attr.Value.AsBool() { + return trace.SamplingResult{ + Decision: trace.RecordAndSample, + } + } + } + } + + // Sample 50% of flow executions + if strings.Contains(p.Name, "flow:") { + if rand.Float64() < 0.5 { + return trace.SamplingResult{Decision: trace.RecordAndSample} + } + return trace.SamplingResult{Decision: trace.Drop} + } + + // Sample 10% of everything else + if rand.Float64() < 0.1 { + return trace.SamplingResult{Decision: trace.RecordAndSample} + } + return trace.SamplingResult{Decision: trace.Drop} + } + + func (s CustomSampler) Description() string { + return "CustomSampler" + } + ``` + + + ```python + from opentelemetry.sdk.trace.sampling import Sampler, SamplingResult + from opentelemetry.trace import SpanKind + import random + + class CustomSampler(Sampler): + def should_sample(self, parent_context, trace_id, name, kind, attributes, links, trace_state): + # Sample all error traces + if attributes and attributes.get('error') == True: + return SamplingResult(decision=True) + + # Sample 50% of flow executions + if 'flow:' in name: + return SamplingResult(decision=random.random() < 0.5) + + # Sample 10% of everything else + return SamplingResult(decision=random.random() < 0.1) + + def get_description(self): + return "CustomSampler" + + enable_firebase_monitoring( + custom_sampler=CustomSampler() + ) + ``` + + + +## Performance Optimization + +### Export Intervals and Batching + +Optimize telemetry export for your application's performance requirements: + + + + ```ts + enableFirebaseTelemetry({ + // Reduce export frequency for high-volume applications + metricExportIntervalMillis: 600_000, // 10 minutes + metricExportTimeoutMillis: 30_000, // 30 seconds + + // For development/testing - faster exports + // metricExportIntervalMillis: 10_000, // 10 seconds + // metricExportTimeoutMillis: 5_000, // 5 seconds + }); + + // Custom batch configuration for traces + import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-base'; + + const batchProcessor = new BatchSpanProcessor(exporter, { + maxExportBatchSize: 512, // Default: 512 + exportTimeoutMillis: 30_000, // Default: 30 seconds + scheduledDelayMillis: 5_000, // Default: 5 seconds + maxQueueSize: 2048, // Default: 2048 + }); + ``` + + + ```go + import ( + "go.opentelemetry.io/otel/sdk/trace" + "time" + ) + + func configurePerformance() { + // Configure batch span processor + batchProcessor := trace.NewBatchSpanProcessor( + exporter, + trace.WithBatchTimeout(30*time.Second), + trace.WithExportTimeout(10*time.Second), + trace.WithMaxExportBatchSize(512), + trace.WithMaxQueueSize(2048), + ) + + tp := trace.NewTracerProvider( + trace.WithSpanProcessor(batchProcessor), + ) + + otel.SetTracerProvider(tp) + } + ``` + + + ```python + from opentelemetry.sdk.trace.export import BatchSpanProcessor + + # Configure batch processing + batch_processor = BatchSpanProcessor( + exporter, + max_export_batch_size=512, + schedule_delay_millis=5000, + export_timeout_millis=30000, + max_queue_size=2048, + ) + + enable_firebase_monitoring( + export_interval=600, # 10 minutes + batch_processor=batch_processor + ) + ``` + + + +### Auto-Instrumentation Control + +Fine-tune automatic instrumentation to reduce overhead: + + + + ```ts + enableFirebaseTelemetry({ + autoInstrumentationConfig: { + // Disable file system instrumentation + '@opentelemetry/instrumentation-fs': { enabled: false }, + + // Disable DNS instrumentation + '@opentelemetry/instrumentation-dns': { enabled: false }, + + // Disable network instrumentation + '@opentelemetry/instrumentation-net': { enabled: false }, + + // Configure HTTP instrumentation + '@opentelemetry/instrumentation-http': { + enabled: true, + ignoreIncomingRequestHook: (req) => { + // Ignore health check requests + return req.url?.includes('/health') || false; + }, + ignoreOutgoingRequestHook: (options) => { + // Ignore internal requests + return options.hostname === 'localhost'; + }, + }, + + // Configure Express instrumentation + '@opentelemetry/instrumentation-express': { + enabled: true, + ignoreLayers: [ + // Ignore middleware layers + (layer) => layer.name === 'cors', + ], + }, + }, + }); + ``` + + + ```go + // Go auto-instrumentation is typically handled through + // manual instrumentation or specific library integrations + + import ( + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + ) + + func configureInstrumentation() { + // Configure HTTP client instrumentation + client := &http.Client{ + Transport: otelhttp.NewTransport( + http.DefaultTransport, + otelhttp.WithFilter(func(req *http.Request) bool { + // Skip health check requests + return !strings.Contains(req.URL.Path, "/health") + }), + ), + } + + // Configure HTTP server instrumentation + handler := otelhttp.NewHandler( + myHandler, + "my-service", + otelhttp.WithFilter(func(req *http.Request) bool { + // Skip internal requests + return req.Header.Get("X-Internal") == "" + }), + ) + } + ``` + + + ```python + from opentelemetry.instrumentation.requests import RequestsInstrumentor + from opentelemetry.instrumentation.flask import FlaskInstrumentor + + # Configure requests instrumentation + RequestsInstrumentor().instrument( + excluded_urls="localhost,127.0.0.1" + ) + + # Configure Flask instrumentation + FlaskInstrumentor().instrument_app( + app, + excluded_urls="/health,/metrics" + ) + + enable_firebase_monitoring( + auto_instrumentation_config={ + 'requests': {'enabled': True}, + 'flask': {'enabled': True}, + 'sqlalchemy': {'enabled': False}, # Disable if not needed + } + ) + ``` + + + +## Data Privacy and Security + +### Input/Output Logging Control + +Protect sensitive data by controlling what gets logged: + + + + ```ts + enableFirebaseTelemetry({ + // Disable all input/output logging + disableLoggingInputAndOutput: true, + }); + + // Or use custom filtering + import { logger } from 'genkit/logging'; + + const sanitizeData = (data: any) => { + if (typeof data === 'object' && data !== null) { + const sanitized = { ...data }; + // Remove sensitive fields + delete sanitized.password; + delete sanitized.apiKey; + delete sanitized.token; + return sanitized; + } + return data; + }; + + export const secureFlow = ai.defineFlow( + { name: 'secureFlow' }, + async (input) => { + logger.info('Flow started', { input: sanitizeData(input) }); + // Process data... + const result = await processData(input); + logger.info('Flow completed', { result: sanitizeData(result) }); + return result; + } + ); + ``` + + + ```go + import ( + "context" + "log/slog" + ) + + func sanitizeData(data interface{}) interface{} { + // Implement data sanitization logic + if m, ok := data.(map[string]interface{}); ok { + sanitized := make(map[string]interface{}) + for k, v := range m { + if k != "password" && k != "apiKey" && k != "token" { + sanitized[k] = v + } else { + sanitized[k] = "[REDACTED]" + } + } + return sanitized + } + return data + } + + func secureFlow(ctx context.Context, input map[string]interface{}) (string, error) { + slog.InfoContext(ctx, "Flow started", "input", sanitizeData(input)) + + result, err := processData(input) + if err != nil { + return "", err + } + + slog.InfoContext(ctx, "Flow completed", "result", sanitizeData(result)) + return result, nil + } + ``` + + + ```python + import logging + from typing import Any, Dict + + def sanitize_data(data: Any) -> Any: + """Remove sensitive information from data.""" + if isinstance(data, dict): + sanitized = {} + for key, value in data.items(): + if key.lower() in ['password', 'api_key', 'token', 'secret']: + sanitized[key] = '[REDACTED]' + else: + sanitized[key] = value + return sanitized + return data + + enable_firebase_monitoring( + disable_input_output_logging=True # Global disable + ) + + @ai.flow() + async def secure_flow(input: Dict[str, Any]) -> str: + logger.info(f"Flow started with input: {sanitize_data(input)}") + + result = await process_data(input) + logger.info(f"Flow completed: {sanitize_data(result)}") + + return result + ``` + + + +### Attribute Filtering + +Filter sensitive attributes from telemetry data: + + + + ```ts + import { SpanProcessor, Span } from '@opentelemetry/sdk-trace-base'; + + class AttributeFilterProcessor implements SpanProcessor { + private sensitiveKeys = ['password', 'apiKey', 'token', 'secret']; + + onStart(span: Span): void { + // Filter attributes on span start + const attributes = span.attributes; + for (const key of this.sensitiveKeys) { + if (key in attributes) { + span.setAttributes({ [key]: '[REDACTED]' }); + } + } + } + + onEnd(): void {} + shutdown(): Promise { return Promise.resolve(); } + forceFlush(): Promise { return Promise.resolve(); } + } + + // Add to tracer provider + const tp = new TracerProvider({ + spanProcessors: [new AttributeFilterProcessor()], + }); + ``` + + + ```go + import ( + "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/attribute" + ) + + type AttributeFilterProcessor struct { + sensitiveKeys map[string]bool + } + + func NewAttributeFilterProcessor() *AttributeFilterProcessor { + return &AttributeFilterProcessor{ + sensitiveKeys: map[string]bool{ + "password": true, + "apiKey": true, + "token": true, + "secret": true, + }, + } + } + + func (p *AttributeFilterProcessor) OnStart(parent context.Context, s trace.ReadWriteSpan) { + // Filter sensitive attributes + attrs := s.Attributes() + for _, attr := range attrs { + if p.sensitiveKeys[string(attr.Key)] { + s.SetAttributes(attribute.String(string(attr.Key), "[REDACTED]")) + } + } + } + + func (p *AttributeFilterProcessor) OnEnd(s trace.ReadOnlySpan) {} + func (p *AttributeFilterProcessor) Shutdown(ctx context.Context) error { return nil } + func (p *AttributeFilterProcessor) ForceFlush(ctx context.Context) error { return nil } + ``` + + + ```python + from opentelemetry.sdk.trace import SpanProcessor + from opentelemetry.trace import Span + + class AttributeFilterProcessor(SpanProcessor): + def __init__(self): + self.sensitive_keys = {'password', 'api_key', 'token', 'secret'} + + def on_start(self, span: Span, parent_context=None): + # Filter sensitive attributes + if hasattr(span, '_attributes'): + for key in list(span._attributes.keys()): + if key.lower() in self.sensitive_keys: + span._attributes[key] = '[REDACTED]' + + def on_end(self, span: Span): + pass + + def shutdown(self): + pass + + def force_flush(self, timeout_millis=None): + pass + + # Add to trace provider + trace_provider.add_span_processor(AttributeFilterProcessor()) + ``` + + + +## Custom Metrics + +Add application-specific metrics to enhance observability: + + + + ```ts + import { metrics } from '@opentelemetry/api'; + + // Create custom meters and instruments + const meter = metrics.getMeter('genkit-app', '1.0.0'); + + // Counters for tracking events + const flowExecutions = meter.createCounter('genkit_flow_executions_total', { + description: 'Total number of flow executions', + }); + + const tokenUsage = meter.createCounter('genkit_tokens_consumed_total', { + description: 'Total tokens consumed by model calls', + }); + + // Histograms for measuring distributions + const flowDuration = meter.createHistogram('genkit_flow_duration_ms', { + description: 'Flow execution duration in milliseconds', + boundaries: [10, 50, 100, 500, 1000, 5000, 10000], + }); + + const modelLatency = meter.createHistogram('genkit_model_latency_ms', { + description: 'Model call latency in milliseconds', + }); + + // Gauges for current values + const activeFlows = meter.createUpDownCounter('genkit_active_flows', { + description: 'Number of currently active flows', + }); + + // Use in your flows + export const instrumentedFlow = ai.defineFlow( + { name: 'instrumentedFlow' }, + async (input) => { + const startTime = Date.now(); + + flowExecutions.add(1, { flow: 'instrumentedFlow', version: '1.0' }); + activeFlows.add(1); + + try { + const result = await ai.generate({ + model: 'gemini-1.5-flash', + prompt: `Process: ${input}`, + }); + + // Track token usage if available + if (result.usage) { + tokenUsage.add(result.usage.totalTokens, { + model: 'gemini-1.5-flash', + flow: 'instrumentedFlow', + }); + } + + const duration = Date.now() - startTime; + flowDuration.record(duration, { + flow: 'instrumentedFlow', + status: 'success', + }); + + return result.text; + } catch (error) { + const duration = Date.now() - startTime; + flowDuration.record(duration, { + flow: 'instrumentedFlow', + status: 'error', + }); + throw error; + } finally { + activeFlows.add(-1); + } + } + ); + ``` + + + ```go + import ( + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/attribute" + "time" + ) + + func setupCustomMetrics() { + meter := otel.Meter("genkit-app") + + // Create instruments + flowExecutions, _ := meter.Int64Counter("genkit_flow_executions_total") + tokenUsage, _ := meter.Int64Counter("genkit_tokens_consumed_total") + flowDuration, _ := meter.Float64Histogram("genkit_flow_duration_ms") + activeFlows, _ := meter.Int64UpDownCounter("genkit_active_flows") + + // Use in flows + instrumentedFlow := genkit.DefineFlow(g, "instrumentedFlow", + func(ctx context.Context, input string) (string, error) { + startTime := time.Now() + + flowExecutions.Add(ctx, 1, metric.WithAttributes( + attribute.String("flow", "instrumentedFlow"), + attribute.String("version", "1.0"), + )) + activeFlows.Add(ctx, 1) + + defer func() { + duration := float64(time.Since(startTime).Milliseconds()) + flowDuration.Record(ctx, duration, metric.WithAttributes( + attribute.String("flow", "instrumentedFlow"), + )) + activeFlows.Add(ctx, -1) + }() + + // Process the flow + result, err := processData(input) + if err != nil { + return "", err + } + + return result, nil + }) + } + ``` + + + ```python + from opentelemetry import metrics + import time + + # Create custom meters and instruments + meter = metrics.get_meter("genkit-app", "1.0.0") + + flow_executions = meter.create_counter( + "genkit_flow_executions_total", + description="Total number of flow executions" + ) + + token_usage = meter.create_counter( + "genkit_tokens_consumed_total", + description="Total tokens consumed by model calls" + ) + + flow_duration = meter.create_histogram( + "genkit_flow_duration_ms", + description="Flow execution duration in milliseconds" + ) + + active_flows = meter.create_up_down_counter( + "genkit_active_flows", + description="Number of currently active flows" + ) + + @ai.flow() + async def instrumented_flow(input: str) -> str: + start_time = time.time() + + flow_executions.add(1, {"flow": "instrumented_flow", "version": "1.0"}) + active_flows.add(1) + + try: + result = await ai.generate( + model='gemini-1.5-flash', + prompt=f'Process: {input}', + ) + + # Track token usage if available + if hasattr(result, 'usage') and result.usage: + token_usage.add( + result.usage.total_tokens, + {"model": "gemini-1.5-flash", "flow": "instrumented_flow"} + ) + + duration = (time.time() - start_time) * 1000 # Convert to ms + flow_duration.record(duration, { + "flow": "instrumented_flow", + "status": "success" + }) + + return result.text + + except Exception as error: + duration = (time.time() - start_time) * 1000 + flow_duration.record(duration, { + "flow": "instrumented_flow", + "status": "error" + }) + raise + finally: + active_flows.add(-1) + ``` + + + +## Environment-Specific Configuration + +Configure observability differently for each environment: + + + + ```ts + const environment = process.env.NODE_ENV || 'development'; + + const getObservabilityConfig = () => { + switch (environment) { + case 'production': + return { + sampler: new TraceIdRatioBasedSampler(0.1), // 10% sampling + metricExportIntervalMillis: 300_000, // 5 minutes + disableLoggingInputAndOutput: true, + autoInstrumentationConfig: { + '@opentelemetry/instrumentation-fs': { enabled: false }, + '@opentelemetry/instrumentation-dns': { enabled: false }, + }, + }; + + case 'staging': + return { + sampler: new TraceIdRatioBasedSampler(0.5), // 50% sampling + metricExportIntervalMillis: 60_000, // 1 minute + disableLoggingInputAndOutput: false, + }; + + case 'development': + default: + return { + forceDevExport: true, + sampler: new AlwaysOnSampler(), // 100% sampling + metricExportIntervalMillis: 10_000, // 10 seconds + disableLoggingInputAndOutput: false, + }; + } + }; + + enableFirebaseTelemetry(getObservabilityConfig()); + ``` + + + ```go + func getObservabilityConfig() TelemetryConfig { + env := os.Getenv("ENVIRONMENT") + + switch env { + case "production": + return TelemetryConfig{ + SamplingRate: 0.1, // 10% sampling + ExportInterval: 5 * time.Minute, + EnableInputOutputLogging: false, + } + case "staging": + return TelemetryConfig{ + SamplingRate: 0.5, // 50% sampling + ExportInterval: 1 * time.Minute, + EnableInputOutputLogging: true, + } + default: // development + return TelemetryConfig{ + SamplingRate: 1.0, // 100% sampling + ExportInterval: 10 * time.Second, + EnableInputOutputLogging: true, + ForceDevExport: true, + } + } + } + ``` + + + ```python + import os + + def get_observability_config(): + environment = os.getenv('ENVIRONMENT', 'development') + + if environment == 'production': + return { + 'sampling_rate': 0.1, # 10% sampling + 'export_interval': 300, # 5 minutes + 'disable_input_output_logging': True, + } + elif environment == 'staging': + return { + 'sampling_rate': 0.5, # 50% sampling + 'export_interval': 60, # 1 minute + 'disable_input_output_logging': False, + } + else: # development + return { + 'sampling_rate': 1.0, # 100% sampling + 'export_interval': 10, # 10 seconds + 'disable_input_output_logging': False, + 'force_dev_export': True, + } + + enable_firebase_monitoring(**get_observability_config()) + ``` + + + +## Resource Management + +### Memory and CPU Optimization + +Monitor and optimize resource usage: + + + + ```ts + // Monitor memory usage + const memoryUsage = meter.createHistogram('genkit_memory_usage_mb', { + description: 'Memory usage in megabytes', + }); + + // Track memory periodically + setInterval(() => { + const usage = process.memoryUsage(); + memoryUsage.record(usage.heapUsed / 1024 / 1024, { + type: 'heap_used', + }); + memoryUsage.record(usage.rss / 1024 / 1024, { + type: 'rss', + }); + }, 30000); // Every 30 seconds + + // Optimize telemetry for memory-constrained environments + enableFirebaseTelemetry({ + autoInstrumentationConfig: { + // Disable memory-intensive instrumentations + '@opentelemetry/instrumentation-fs': { enabled: false }, + '@opentelemetry/instrumentation-dns': { enabled: false }, + }, + // Reduce batch sizes + spanProcessorConfig: { + maxExportBatchSize: 256, + maxQueueSize: 1024, + }, + }); + ``` + + + ```go + import ( + "runtime" + "time" + ) + + func monitorResources() { + meter := otel.Meter("genkit-app") + memoryGauge, _ := meter.Float64ObservableGauge("genkit_memory_usage_mb") + + // Register callback for memory monitoring + _, err := meter.RegisterCallback( + func(ctx context.Context, o metric.Observer) error { + var m runtime.MemStats + runtime.ReadMemStats(&m) + + o.ObserveFloat64(memoryGauge, float64(m.Alloc)/1024/1024, + metric.WithAttributes(attribute.String("type", "alloc"))) + o.ObserveFloat64(memoryGauge, float64(m.Sys)/1024/1024, + metric.WithAttributes(attribute.String("type", "sys"))) + + return nil + }, + memoryGauge, + ) + if err != nil { + log.Printf("Failed to register memory callback: %v", err) + } + } + ``` + + + ```python + import psutil + import threading + import time + + def monitor_resources(): + memory_gauge = meter.create_observable_gauge( + "genkit_memory_usage_mb", + description="Memory usage in megabytes" + ) + + def collect_metrics(): + while True: + process = psutil.Process() + memory_info = process.memory_info() + + memory_gauge.set(memory_info.rss / 1024 / 1024, {"type": "rss"}) + memory_gauge.set(memory_info.vms / 1024 / 1024, {"type": "vms"}) + + time.sleep(30) # Every 30 seconds + + # Start monitoring in background thread + monitor_thread = threading.Thread(target=collect_metrics, daemon=True) + monitor_thread.start() + + # Optimize for memory-constrained environments + enable_firebase_monitoring( + batch_size=256, + queue_size=1024, + export_interval=60, # Longer intervals + ) + ``` + + + +## Best Practices Summary + +### Development Environment + +1. **Enable verbose logging**: Use 100% sampling and short export intervals +2. **Test monitoring setup**: Verify telemetry collection before deploying +3. **Use the Developer UI**: Take advantage of built-in observability tools +4. **Monitor resource usage**: Ensure observability doesn't impact development + +### Staging Environment + +1. **Moderate sampling**: Use 50% sampling to balance cost and visibility +2. **Test production config**: Validate monitoring configuration +3. **Performance testing**: Measure observability overhead +4. **Security testing**: Verify data privacy controls + +### Production Environment + +1. **Optimize sampling**: Use 10% or lower sampling for high-volume applications +2. **Protect sensitive data**: Disable input/output logging for sensitive information +3. **Monitor costs**: Track telemetry expenses and optimize accordingly +4. **Set up alerting**: Configure alerts for critical metrics and errors + +### Security Considerations + +1. **Data privacy**: Implement proper data sanitization and filtering +2. **Access control**: Limit who can view observability data +3. **Credential security**: Use proper IAM roles and rotate keys regularly +4. **Network security**: Ensure secure connections to monitoring services + +## Next Steps + +- Learn about [troubleshooting common issues](/unified-docs/observability/troubleshooting) +- Explore the main [observability and monitoring guide](/unified-docs/observability-monitoring) +- Set up [authentication and permissions](/unified-docs/observability/authentication) +- Check out [deployment guides](/unified-docs/deployment) for production configurations diff --git a/src/content/docs/unified-docs/observability/authentication.mdx b/src/content/docs/unified-docs/observability/authentication.mdx new file mode 100644 index 00000000..5cb802c5 --- /dev/null +++ b/src/content/docs/unified-docs/observability/authentication.mdx @@ -0,0 +1,445 @@ +--- +# FLAG: This needs more review +title: Authentication and Setup +description: Learn how to set up authentication and permissions for Genkit observability and monitoring across JavaScript, Go, and Python environments. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +This guide covers the authentication and setup requirements for enabling Genkit observability and monitoring in production environments. + +## Prerequisites + +Before setting up observability, ensure you have: + +1. **Firebase Project**: A Firebase project with the Blaze pricing plan +2. **Google Cloud Project**: Access to the associated Google Cloud project +3. **Required APIs**: Enabled Google Cloud APIs for monitoring services +4. **Service Account**: Proper service account with necessary permissions + +## Required Google Cloud APIs + +Enable these APIs in your Google Cloud project: + +- [Cloud Logging API](https://console.cloud.google.com/apis/library/logging.googleapis.com) +- [Cloud Trace API](https://console.cloud.google.com/apis/library/cloudtrace.googleapis.com) +- [Cloud Monitoring API](https://console.cloud.google.com/apis/library/monitoring.googleapis.com) + +You can enable these APIs using the Google Cloud Console or the `gcloud` CLI: + +```bash +gcloud services enable logging.googleapis.com +gcloud services enable cloudtrace.googleapis.com +gcloud services enable monitoring.googleapis.com +``` + +## Service Account Setup + +### Required IAM Roles + +Grant the following roles to your service account: + +- **Monitoring Metric Writer** (`roles/monitoring.metricWriter`) +- **Cloud Trace Agent** (`roles/cloudtrace.agent`) +- **Logs Writer** (`roles/logging.logWriter`) + +### Creating a Service Account + + + + 1. Go to the [IAM & Admin > Service Accounts](https://console.cloud.google.com/iam-admin/serviceaccounts) page + 2. Click **Create Service Account** + 3. Enter a name and description for the service account + 4. Click **Create and Continue** + 5. Add the required roles: + - Monitoring Metric Writer + - Cloud Trace Agent + - Logs Writer + 6. Click **Continue** and then **Done** + 7. Download the service account key JSON file + + + ```bash + # Create service account + gcloud iam service-accounts create genkit-monitoring \ + --display-name="Genkit Monitoring Service Account" + + # Get your project ID + PROJECT_ID=$(gcloud config get-value project) + + # Grant required roles + gcloud projects add-iam-policy-binding $PROJECT_ID \ + --member="serviceAccount:genkit-monitoring@$PROJECT_ID.iam.gserviceaccount.com" \ + --role="roles/monitoring.metricWriter" + + gcloud projects add-iam-policy-binding $PROJECT_ID \ + --member="serviceAccount:genkit-monitoring@$PROJECT_ID.iam.gserviceaccount.com" \ + --role="roles/cloudtrace.agent" + + gcloud projects add-iam-policy-binding $PROJECT_ID \ + --member="serviceAccount:genkit-monitoring@$PROJECT_ID.iam.gserviceaccount.com" \ + --role="roles/logging.logWriter" + + # Create and download key + gcloud iam service-accounts keys create genkit-monitoring-key.json \ + --iam-account=genkit-monitoring@$PROJECT_ID.iam.gserviceaccount.com + ``` + + + +## Authentication Configuration + +### Environment Variables + +Set up authentication using environment variables: + +```bash +# Path to service account key file +export GOOGLE_APPLICATION_CREDENTIALS="/path/to/genkit-monitoring-key.json" + +# Or set the project ID directly +export GOOGLE_CLOUD_PROJECT="your-project-id" +``` + +### Application Default Credentials + +For production deployments, use Application Default Credentials (ADC): + + + + Cloud Functions automatically use the default service account. Ensure it has the required roles: + + ```bash + # Get the default service account + PROJECT_ID=$(gcloud config get-value project) + DEFAULT_SA="$PROJECT_ID@appspot.gserviceaccount.com" + + # Grant required roles + gcloud projects add-iam-policy-binding $PROJECT_ID \ + --member="serviceAccount:$DEFAULT_SA" \ + --role="roles/monitoring.metricWriter" + + gcloud projects add-iam-policy-binding $PROJECT_ID \ + --member="serviceAccount:$DEFAULT_SA" \ + --role="roles/cloudtrace.agent" + + gcloud projects add-iam-policy-binding $PROJECT_ID \ + --member="serviceAccount:$DEFAULT_SA" \ + --role="roles/logging.logWriter" + ``` + + + Specify a custom service account when deploying: + + ```bash + gcloud run deploy genkit-app \ + --image gcr.io/PROJECT_ID/genkit-app \ + --service-account genkit-monitoring@PROJECT_ID.iam.gserviceaccount.com \ + --region us-central1 + ``` + + + Attach the service account to your VM instance: + + ```bash + gcloud compute instances create genkit-vm \ + --service-account genkit-monitoring@PROJECT_ID.iam.gserviceaccount.com \ + --scopes cloud-platform + ``` + + + +## Language-Specific Setup + +### JavaScript/Node.js + + + + ```bash + # Set environment variables + export GOOGLE_APPLICATION_CREDENTIALS="/path/to/key.json" + export ENABLE_FIREBASE_MONITORING=true + ``` + + ```ts + import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; + + // Automatically uses GOOGLE_APPLICATION_CREDENTIALS + enableFirebaseTelemetry(); + ``` + + + ```ts + import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; + + enableFirebaseTelemetry({ + projectId: 'your-project-id', + // Optional: specify credentials path + keyFilename: '/path/to/key.json', + }); + ``` + + + +### Go + + + + ```bash + export GOOGLE_APPLICATION_CREDENTIALS="/path/to/key.json" + export GOOGLE_CLOUD_PROJECT="your-project-id" + ``` + + ```go + import ( + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{}), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + ```go + import ( + "google.golang.org/api/option" + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{ + ProjectID: "your-project-id", + ClientOptions: []option.ClientOption{ + option.WithCredentialsFile("/path/to/key.json"), + }, + }), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + +### Python + + + + ```bash + export GOOGLE_APPLICATION_CREDENTIALS="/path/to/key.json" + export GOOGLE_CLOUD_PROJECT="your-project-id" + ``` + + ```python + from genkit.ai import Genkit + from genkit.monitoring import enable_firebase_monitoring + + # Automatically uses environment variables + enable_firebase_monitoring() + + ai = Genkit() + ``` + + + ```python + from genkit.ai import Genkit + from genkit.monitoring import enable_firebase_monitoring + + enable_firebase_monitoring( + project_id='your-project-id', + credentials_path='/path/to/key.json' + ) + + ai = Genkit() + ``` + + + +## Local Development Authentication + +For local development and testing, you can use your personal Google Cloud credentials: + +### Using gcloud CLI + +```bash +# Authenticate with your Google account +gcloud auth application-default login + +# Set the project +gcloud config set project your-project-id +``` + +### Impersonating Service Accounts + +For testing with service account permissions locally: + +```bash +# Impersonate the service account +gcloud auth application-default login \ + --impersonate-service-account genkit-monitoring@your-project-id.iam.gserviceaccount.com +``` + +**Note**: You need the `roles/iam.serviceAccountTokenCreator` role to impersonate service accounts. + +## Testing Authentication + +Verify your authentication setup: + + + + ```ts + import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; + + // Enable with local testing + enableFirebaseTelemetry({ + forceDevExport: true, + metricExportIntervalMillis: 10_000, // 10 seconds for testing + }); + + // Test with a simple flow + export const testFlow = ai.defineFlow( + { name: 'testFlow' }, + async () => { + console.log('Testing observability...'); + return 'Authentication successful!'; + } + ); + ``` + + + ```go + func testAuthentication() { + ctx := context.Background() + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{}), + ) + if err != nil { + log.Fatalf("Authentication failed: %v", err) + } + + testFlow := genkit.DefineFlow(g, "testFlow", + func(ctx context.Context, input string) (string, error) { + log.Println("Testing observability...") + return "Authentication successful!", nil + }) + + // Run the test flow + result, err := testFlow.Run(ctx, "test") + if err != nil { + log.Fatalf("Test failed: %v", err) + } + + log.Printf("Test result: %s", result) + } + ``` + + + ```python + from genkit.ai import Genkit + from genkit.monitoring import enable_firebase_monitoring + + try: + enable_firebase_monitoring(force_dev_export=True) + ai = Genkit() + + @ai.flow() + async def test_flow() -> str: + print("Testing observability...") + return "Authentication successful!" + + # Test the flow + result = await test_flow() + print(f"Test result: {result}") + + except Exception as e: + print(f"Authentication failed: {e}") + ``` + + + +## Security Best Practices + +### Service Account Security + +1. **Principle of Least Privilege**: Only grant the minimum required roles +2. **Key Rotation**: Regularly rotate service account keys +3. **Key Storage**: Store keys securely and never commit them to version control +4. **Access Monitoring**: Monitor service account usage and access patterns + +### Environment Security + +1. **Environment Separation**: Use different service accounts for dev/staging/prod +2. **Secret Management**: Use secret management services for credentials +3. **Network Security**: Restrict network access to monitoring endpoints +4. **Audit Logging**: Enable audit logging for service account activities + +### Credential Management + +```bash +# Example: Using Google Secret Manager +gcloud secrets create genkit-monitoring-key --data-file=key.json + +# Grant access to the secret +gcloud secrets add-iam-policy-binding genkit-monitoring-key \ + --member="serviceAccount:your-app@your-project.iam.gserviceaccount.com" \ + --role="roles/secretmanager.secretAccessor" +``` + +## Troubleshooting Authentication + +### Common Issues + +**Permission Denied Errors**: +- Verify service account has required roles +- Check that APIs are enabled +- Ensure credentials are properly configured + +**Authentication Not Found**: +- Verify `GOOGLE_APPLICATION_CREDENTIALS` path +- Check service account key file exists and is readable +- Ensure project ID is correctly set + +**Quota Exceeded**: +- Check API quotas and limits +- Verify billing is enabled for the project +- Monitor usage in the Google Cloud Console + +### Debugging Commands + +```bash +# Check current authentication +gcloud auth list + +# Verify project configuration +gcloud config list + +# Test API access +gcloud logging logs list --limit=1 + +# Check service account permissions +gcloud projects get-iam-policy your-project-id \ + --flatten="bindings[].members" \ + --filter="bindings.members:serviceAccount:your-sa@your-project.iam.gserviceaccount.com" +``` + +## Next Steps + +- Configure [advanced monitoring options](/unified-docs/observability/advanced-configuration) +- Learn about [troubleshooting common issues](/unified-docs/observability/troubleshooting) +- Explore the main [observability and monitoring guide](/unified-docs/observability-monitoring) +- Set up [production deployment](/unified-docs/deployment) with proper monitoring diff --git a/src/content/docs/unified-docs/observability/overview.mdx b/src/content/docs/unified-docs/observability/overview.mdx new file mode 100644 index 00000000..8dc1c7e6 --- /dev/null +++ b/src/content/docs/unified-docs/observability/overview.mdx @@ -0,0 +1,243 @@ +--- +title: Observability Overview +description: Get started with Genkit's observability features, including local development tools, production monitoring, and telemetry configuration across JavaScript, Go, and Python. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +This guide provides an overview of Genkit's observability capabilities and helps you get started with monitoring your AI workflows in both development and production environments. + +## What is Observability? + +Observability in Genkit refers to the ability to understand the internal state of your AI workflows by examining their outputs. This includes: + +- **Tracing**: Following the execution path of your flows step-by-step +- **Metrics**: Collecting quantitative data about performance and usage +- **Logging**: Recording events and debugging information +- **Monitoring**: Real-time visibility into system health and performance + +## Observability Layers + +### 1. Local Development + +During development, Genkit provides built-in observability through the Developer UI: + +- **Automatic trace collection**: No configuration required +- **Interactive debugging**: Step through flow execution +- **Real-time inspection**: View inputs, outputs, and intermediate states +- **Performance analysis**: Identify bottlenecks and optimization opportunities + +### 2. Production Monitoring + +For deployed applications, Genkit offers production-grade monitoring: + +- **Firebase Genkit Monitoring**: Integrated dashboard for Firebase projects +- **OpenTelemetry export**: Send data to any observability platform +- **Custom metrics**: Application-specific monitoring +- **Alerting and dashboards**: Proactive monitoring and visualization + +## Key Features + +### Automatic Instrumentation + +Genkit automatically instruments your code to collect telemetry data: + + + + ```ts + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + // Observability is automatically enabled + const ai = genkit({ + plugins: [googleAI()], + }); + + // All flows are automatically traced + export const myFlow = ai.defineFlow( + { name: 'myFlow' }, + async (input) => { + // This execution will be automatically traced + return await ai.generate({ + model: 'gemini-1.5-flash', + prompt: `Process this: ${input}`, + }); + } + ); + ``` + + + ```go + import ( + "context" + "github.com/firebase/genkit/go/genkit" + ) + + func main() { + ctx := context.Background() + + // Observability is built-in + g, err := genkit.Init(ctx) + if err != nil { + log.Fatal(err) + } + + // Flows are automatically instrumented + myFlow := genkit.DefineFlow(g, "myFlow", + func(ctx context.Context, input string) (string, error) { + // Execution is automatically traced + return processData(input) + }) + } + ``` + + + ```python + from genkit.ai import Genkit + + # Observability is enabled by default + ai = Genkit() + + @ai.flow() + async def my_flow(input: str) -> str: + # This flow execution is automatically traced + result = await ai.generate( + model='gemini-1.5-flash', + prompt=f'Process this: {input}', + ) + return result.text + ``` + + + +### Developer UI Integration + +The Developer UI provides comprehensive observability during development: + +1. **Flow Execution Traces**: See every step of your flow execution +2. **Input/Output Inspection**: Examine data at each stage +3. **Performance Metrics**: View timing and resource usage +4. **Error Analysis**: Debug failures with detailed stack traces +5. **Interactive Testing**: Run flows with different inputs + +### Production Telemetry + +For production environments, Genkit exports telemetry data to monitoring platforms: + +- **Metrics**: Performance indicators, error rates, usage statistics +- **Traces**: Detailed execution paths for debugging +- **Logs**: Structured logging with context +- **Custom Events**: Application-specific monitoring points + +## Getting Started + +### 1. Local Development + +Start the Developer UI to begin observing your flows: + +```bash +# JavaScript +npx genkit start + +# Go +genkit start + +# Python +python -m genkit start +``` + +### 2. Production Setup + +Enable production monitoring by configuring telemetry export: + + + + ```bash + # Install Firebase plugin + npm install @genkit-ai/firebase + + # Enable monitoring + export ENABLE_FIREBASE_MONITORING=true + ``` + + + ```bash + # Install Google Cloud plugin + go get github.com/firebase/genkit/go/plugins/googlecloud + ``` + + + ```bash + # Install monitoring dependencies + pip install genkit[monitoring] + ``` + + + +### 3. Configure Permissions + +Set up the required Google Cloud permissions: + +- **Monitoring Metric Writer** (`roles/monitoring.metricWriter`) +- **Cloud Trace Agent** (`roles/cloudtrace.agent`) +- **Logs Writer** (`roles/logging.logWriter`) + +## Observability Best Practices + +### Development + +1. **Use the Developer UI**: Take advantage of built-in tracing +2. **Test with realistic data**: Use production-like inputs for testing +3. **Monitor performance**: Identify bottlenecks early +4. **Debug systematically**: Use traces to understand flow behavior + +### Production + +1. **Implement monitoring**: Set up dashboards and alerts +2. **Control costs**: Use sampling and filtering appropriately +3. **Protect sensitive data**: Configure input/output logging carefully +4. **Regular reviews**: Analyze metrics and optimize performance + +### Security + +1. **Secure credentials**: Use proper IAM roles and service accounts +2. **Data privacy**: Disable logging of sensitive information +3. **Network security**: Ensure secure connections to monitoring services +4. **Access control**: Limit who can view observability data + +## Common Use Cases + +### Debugging Flow Issues + +Use observability to diagnose problems: + +1. **Trace analysis**: Follow execution path to find failures +2. **Input/output inspection**: Verify data transformations +3. **Performance profiling**: Identify slow operations +4. **Error correlation**: Connect errors to specific inputs or conditions + +### Performance Optimization + +Leverage metrics to improve performance: + +1. **Latency analysis**: Find and optimize slow operations +2. **Resource usage**: Monitor memory and CPU consumption +3. **Throughput measurement**: Track requests per second +4. **Cost optimization**: Identify expensive operations + +### Production Monitoring + +Maintain system health in production: + +1. **Health dashboards**: Monitor key metrics and trends +2. **Alerting**: Get notified of issues before users are affected +3. **Capacity planning**: Use metrics to plan for growth +4. **Incident response**: Use traces and logs for troubleshooting + +## Next Steps + +- Learn about [authentication and setup](/unified-docs/observability/authentication) for production monitoring +- Explore [advanced configuration](/unified-docs/observability/advanced-configuration) options +- Check out [troubleshooting](/unified-docs/observability/troubleshooting) common issues +- Read the main [observability and monitoring](/unified-docs/observability-monitoring) guide for detailed implementation diff --git a/src/content/docs/unified-docs/observability/troubleshooting.mdx b/src/content/docs/unified-docs/observability/troubleshooting.mdx new file mode 100644 index 00000000..94e496dd --- /dev/null +++ b/src/content/docs/unified-docs/observability/troubleshooting.mdx @@ -0,0 +1,765 @@ +--- +title: Troubleshooting +description: Common issues and solutions for Genkit observability and monitoring across JavaScript, Go, and Python environments. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +This guide helps you diagnose and resolve common issues with Genkit's observability and monitoring features. + +## Common Issues + +### Metrics Not Appearing in Dashboard + +**Symptoms:** +- No metrics visible in the Firebase Genkit Monitoring dashboard +- Empty charts or "No data available" messages +- Metrics appear locally but not in production + +**Possible Causes and Solutions:** + + + + **1. Check Firebase plugin configuration:** + ```ts + import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; + + // Verify telemetry is enabled + enableFirebaseTelemetry({ + forceDevExport: true, // For testing + metricExportIntervalMillis: 10_000, // Faster for testing + }); + ``` + + **2. Verify environment variables:** + ```bash + echo $GOOGLE_APPLICATION_CREDENTIALS + echo $GOOGLE_CLOUD_PROJECT + echo $ENABLE_FIREBASE_MONITORING + ``` + + **3. Check API permissions:** + ```bash + # Test if APIs are accessible + gcloud logging logs list --limit=1 + gcloud monitoring metrics list --limit=1 + ``` + + + **1. Verify Google Cloud plugin setup:** + ```go + import ( + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{}), + ) + if err != nil { + log.Fatalf("Failed to initialize Genkit: %v", err) + } + + log.Println("Genkit initialized successfully") + } + ``` + + **2. Check environment setup:** + ```bash + echo $GOOGLE_APPLICATION_CREDENTIALS + echo $GOOGLE_CLOUD_PROJECT + ``` + + **3. Test authentication:** + ```go + // Add to your main function for testing + client, err := monitoring.NewMetricClient(ctx) + if err != nil { + log.Fatalf("Failed to create monitoring client: %v", err) + } + defer client.Close() + log.Println("Monitoring client created successfully") + ``` + + + **1. Verify monitoring setup:** + ```python + from genkit.ai import Genkit + from genkit.monitoring import enable_firebase_monitoring + + try: + enable_firebase_monitoring(force_dev_export=True) + print("Monitoring enabled successfully") + except Exception as e: + print(f"Failed to enable monitoring: {e}") + + ai = Genkit() + ``` + + **2. Check dependencies:** + ```bash + pip list | grep -E "(genkit|google|opentelemetry)" + ``` + + **3. Test authentication:** + ```python + from google.cloud import monitoring_v3 + + try: + client = monitoring_v3.MetricServiceClient() + project_name = f"projects/{your_project_id}" + metrics = client.list_metric_descriptors(name=project_name) + print("Authentication successful") + except Exception as e: + print(f"Authentication failed: {e}") + ``` + + + +### Authentication and Permission Issues + +**Symptoms:** +- "Permission denied" errors +- "Authentication failed" messages +- 403 or 401 HTTP status codes + +**Solutions:** + +**1. Verify IAM roles:** +```bash +# Check service account permissions +gcloud projects get-iam-policy YOUR_PROJECT_ID \ + --flatten="bindings[].members" \ + --filter="bindings.members:serviceAccount:YOUR_SERVICE_ACCOUNT" +``` + +**2. Required roles checklist:** +- ✅ `roles/monitoring.metricWriter` +- ✅ `roles/cloudtrace.agent` +- ✅ `roles/logging.logWriter` + +**3. Test service account:** +```bash +# Impersonate service account for testing +gcloud auth application-default login \ + --impersonate-service-account YOUR_SERVICE_ACCOUNT +``` + +**4. Verify API enablement:** +```bash +gcloud services list --enabled --filter="name:(logging|monitoring|cloudtrace)" +``` + +### High Telemetry Costs + +**Symptoms:** +- Unexpected Google Cloud billing charges +- High volume of telemetry data +- Performance impact from telemetry collection + +**Solutions:** + + + + **1. Implement sampling:** + ```ts + import { TraceIdRatioBasedSampler } from '@opentelemetry/sdk-trace-base'; + + enableFirebaseTelemetry({ + // Sample only 10% of traces + sampler: new TraceIdRatioBasedSampler(0.1), + + // Increase export intervals + metricExportIntervalMillis: 600_000, // 10 minutes + + // Disable input/output logging + disableLoggingInputAndOutput: true, + }); + ``` + + **2. Disable unnecessary instrumentation:** + ```ts + enableFirebaseTelemetry({ + autoInstrumentationConfig: { + '@opentelemetry/instrumentation-fs': { enabled: false }, + '@opentelemetry/instrumentation-dns': { enabled: false }, + '@opentelemetry/instrumentation-net': { enabled: false }, + }, + }); + ``` + + + **1. Configure sampling:** + ```go + import "go.opentelemetry.io/otel/sdk/trace" + + // 10% sampling + sampler := trace.TraceIDRatioBased(0.1) + + tp := trace.NewTracerProvider( + trace.WithSampler(sampler), + ) + + otel.SetTracerProvider(tp) + ``` + + **2. Optimize batch processing:** + ```go + batchProcessor := trace.NewBatchSpanProcessor( + exporter, + trace.WithBatchTimeout(60*time.Second), // Longer batches + trace.WithMaxExportBatchSize(1024), // Larger batches + ) + ``` + + + **1. Reduce sampling rate:** + ```python + enable_firebase_monitoring( + sampling_rate=0.1, # 10% sampling + export_interval=600, # 10 minutes + disable_input_output_logging=True, + ) + ``` + + **2. Optimize batch settings:** + ```python + from opentelemetry.sdk.trace.export import BatchSpanProcessor + + batch_processor = BatchSpanProcessor( + exporter, + max_export_batch_size=1024, + schedule_delay_millis=60000, # 1 minute + ) + ``` + + + +### Missing Traces in Production + +**Symptoms:** +- Traces visible in development but not production +- Incomplete trace data +- Gaps in trace timeline + +**Solutions:** + +**1. Check sampling configuration:** +- Ensure sampling rate isn't too low +- Verify parent-based sampling isn't dropping traces +- Test with higher sampling rates temporarily + +**2. Verify network connectivity:** +```bash +# Test connectivity to Google Cloud endpoints +curl -I https://cloudtrace.googleapis.com/ +curl -I https://monitoring.googleapis.com/ +``` + +**3. Check export configuration:** + + + + ```ts + // Add debugging to trace export + enableFirebaseTelemetry({ + // Enable debug logging + debug: true, + + // Reduce export timeout for testing + metricExportTimeoutMillis: 10_000, + + // Force export in all environments + forceDevExport: true, + }); + ``` + + + ```go + // Add logging to trace export + import "go.opentelemetry.io/otel/exporters/stdout/stdouttrace" + + // For debugging, add stdout exporter + stdoutExporter, err := stdouttrace.New( + stdouttrace.WithPrettyPrint(), + ) + if err != nil { + log.Fatal(err) + } + + tp := trace.NewTracerProvider( + trace.WithSpanProcessor(trace.NewSimpleSpanProcessor(stdoutExporter)), + ) + ``` + + + ```python + # Enable debug logging + import logging + logging.basicConfig(level=logging.DEBUG) + + # Add console exporter for debugging + from opentelemetry.exporter.console import ConsoleSpanExporter + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + + console_exporter = ConsoleSpanExporter() + trace_provider.add_span_processor( + SimpleSpanProcessor(console_exporter) + ) + ``` + + + +### Performance Issues + +**Symptoms:** +- Increased application latency +- High CPU or memory usage +- Slow response times + +**Solutions:** + +**1. Optimize telemetry overhead:** + + + + ```ts + // Reduce telemetry overhead + enableFirebaseTelemetry({ + // Use batch processing + spanProcessorConfig: { + maxExportBatchSize: 512, + scheduledDelayMillis: 5000, + maxQueueSize: 2048, + }, + + // Disable expensive instrumentations + autoInstrumentationConfig: { + '@opentelemetry/instrumentation-fs': { enabled: false }, + '@opentelemetry/instrumentation-dns': { enabled: false }, + }, + }); + ``` + + + ```go + // Optimize span processing + batchProcessor := trace.NewBatchSpanProcessor( + exporter, + trace.WithBatchTimeout(5*time.Second), + trace.WithMaxExportBatchSize(512), + trace.WithMaxQueueSize(2048), + ) + + tp := trace.NewTracerProvider( + trace.WithSpanProcessor(batchProcessor), + trace.WithSampler(trace.TraceIDRatioBased(0.1)), + ) + ``` + + + ```python + # Optimize performance + enable_firebase_monitoring( + sampling_rate=0.1, + batch_size=512, + schedule_delay_millis=5000, + max_queue_size=2048, + ) + ``` + + + +**2. Monitor resource usage:** + + + + ```ts + // Monitor memory usage + setInterval(() => { + const usage = process.memoryUsage(); + console.log('Memory usage:', { + heapUsed: Math.round(usage.heapUsed / 1024 / 1024) + 'MB', + heapTotal: Math.round(usage.heapTotal / 1024 / 1024) + 'MB', + rss: Math.round(usage.rss / 1024 / 1024) + 'MB', + }); + }, 30000); + ``` + + + ```go + // Monitor memory usage + import "runtime" + + func logMemoryUsage() { + var m runtime.MemStats + runtime.ReadMemStats(&m) + + log.Printf("Memory usage: Alloc=%dMB, Sys=%dMB, NumGC=%d", + m.Alloc/1024/1024, m.Sys/1024/1024, m.NumGC) + } + + // Call periodically + go func() { + for { + logMemoryUsage() + time.Sleep(30 * time.Second) + } + }() + ``` + + + ```python + import psutil + import threading + import time + + def monitor_memory(): + while True: + process = psutil.Process() + memory_info = process.memory_info() + print(f"Memory usage: RSS={memory_info.rss // 1024 // 1024}MB, " + f"VMS={memory_info.vms // 1024 // 1024}MB") + time.sleep(30) + + # Start monitoring thread + monitor_thread = threading.Thread(target=monitor_memory, daemon=True) + monitor_thread.start() + ``` + + + +## Debugging Tools + +### Enable Debug Logging + + + + ```ts + // Enable OpenTelemetry debug logging + process.env.OTEL_LOG_LEVEL = 'debug'; + + // Enable Genkit debug logging + import { logger } from 'genkit/logging'; + logger.setLogLevel('debug'); + + // Log telemetry configuration + enableFirebaseTelemetry({ + debug: true, + // ... other config + }); + ``` + + + ```go + import ( + "log/slog" + "os" + ) + + // Enable debug logging + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })) + slog.SetDefault(logger) + + // Log telemetry events + slog.Debug("Telemetry configuration", "config", config) + ``` + + + ```python + import logging + import os + + # Enable debug logging + logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + + # Enable OpenTelemetry debug logging + os.environ['OTEL_LOG_LEVEL'] = 'debug' + + # Log monitoring setup + logger = logging.getLogger(__name__) + logger.debug("Enabling Firebase monitoring") + ``` + + + +### Test Telemetry Export + + + + ```ts + // Test telemetry export with console output + import { ConsoleSpanExporter } from '@opentelemetry/exporter-console'; + import { SimpleSpanProcessor } from '@opentelemetry/sdk-trace-base'; + + const consoleExporter = new ConsoleSpanExporter(); + const processor = new SimpleSpanProcessor(consoleExporter); + + // Add to tracer provider for debugging + const tp = new TracerProvider({ + spanProcessors: [processor], + }); + ``` + + + ```go + // Test with stdout exporter + import "go.opentelemetry.io/otel/exporters/stdout/stdouttrace" + + stdoutExporter, err := stdouttrace.New( + stdouttrace.WithPrettyPrint(), + ) + if err != nil { + log.Fatal(err) + } + + tp := trace.NewTracerProvider( + trace.WithSpanProcessor(trace.NewSimpleSpanProcessor(stdoutExporter)), + ) + ``` + + + ```python + # Test with console exporter + from opentelemetry.exporter.console import ConsoleSpanExporter + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + + console_exporter = ConsoleSpanExporter() + trace_provider.add_span_processor( + SimpleSpanProcessor(console_exporter) + ) + ``` + + + +### Validate Configuration + + + + ```ts + // Configuration validation function + function validateTelemetryConfig() { + console.log('Environment variables:'); + console.log('GOOGLE_APPLICATION_CREDENTIALS:', process.env.GOOGLE_APPLICATION_CREDENTIALS); + console.log('GOOGLE_CLOUD_PROJECT:', process.env.GOOGLE_CLOUD_PROJECT); + console.log('ENABLE_FIREBASE_MONITORING:', process.env.ENABLE_FIREBASE_MONITORING); + + // Test API access + const { GoogleAuth } = require('google-auth-library'); + const auth = new GoogleAuth(); + + auth.getProjectId() + .then(projectId => console.log('Project ID:', projectId)) + .catch(err => console.error('Auth error:', err)); + } + + validateTelemetryConfig(); + ``` + + + ```go + func validateTelemetryConfig() { + log.Println("Environment variables:") + log.Println("GOOGLE_APPLICATION_CREDENTIALS:", os.Getenv("GOOGLE_APPLICATION_CREDENTIALS")) + log.Println("GOOGLE_CLOUD_PROJECT:", os.Getenv("GOOGLE_CLOUD_PROJECT")) + + // Test authentication + ctx := context.Background() + creds, err := google.FindDefaultCredentials(ctx) + if err != nil { + log.Printf("Credentials error: %v", err) + } else { + log.Printf("Project ID: %s", creds.ProjectID) + } + } + ``` + + + ```python + def validate_telemetry_config(): + import os + from google.auth import default + + print("Environment variables:") + print(f"GOOGLE_APPLICATION_CREDENTIALS: {os.getenv('GOOGLE_APPLICATION_CREDENTIALS')}") + print(f"GOOGLE_CLOUD_PROJECT: {os.getenv('GOOGLE_CLOUD_PROJECT')}") + + try: + credentials, project_id = default() + print(f"Project ID: {project_id}") + print("Authentication successful") + except Exception as e: + print(f"Authentication error: {e}") + + validate_telemetry_config() + ``` + + + +## Monitoring Health + +### Check Telemetry Pipeline + +```bash +# Check if telemetry data is being generated +gcloud logging read "resource.type=global" --limit=10 --format="table(timestamp,severity,textPayload)" + +# Check metrics +gcloud monitoring metrics list --filter="metric.type:genkit" --limit=10 + +# Check traces +gcloud trace list-traces --limit=10 +``` + +### Monitor Export Success + + + + ```ts + // Monitor export success/failure + import { metrics } from '@opentelemetry/api'; + + const meter = metrics.getMeter('telemetry-health'); + const exportSuccess = meter.createCounter('telemetry_export_success_total'); + const exportFailure = meter.createCounter('telemetry_export_failure_total'); + + // Custom exporter wrapper + class MonitoredExporter { + constructor(baseExporter) { + this.baseExporter = baseExporter; + } + + export(spans, resultCallback) { + this.baseExporter.export(spans, (result) => { + if (result.code === ExportResultCode.SUCCESS) { + exportSuccess.add(1); + } else { + exportFailure.add(1); + console.error('Export failed:', result.error); + } + resultCallback(result); + }); + } + } + ``` + + + ```go + // Monitor export health + type MonitoredExporter struct { + base trace.SpanExporter + successCounter metric.Int64Counter + failureCounter metric.Int64Counter + } + + func (e *MonitoredExporter) ExportSpans(ctx context.Context, spans []trace.ReadOnlySpan) error { + err := e.base.ExportSpans(ctx, spans) + if err != nil { + e.failureCounter.Add(ctx, 1) + log.Printf("Export failed: %v", err) + } else { + e.successCounter.Add(ctx, 1) + } + return err + } + ``` + + + ```python + # Monitor export health + from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult + + class MonitoredExporter(SpanExporter): + def __init__(self, base_exporter): + self.base_exporter = base_exporter + self.success_count = 0 + self.failure_count = 0 + + def export(self, spans): + try: + result = self.base_exporter.export(spans) + if result == SpanExportResult.SUCCESS: + self.success_count += 1 + else: + self.failure_count += 1 + print(f"Export failed: {result}") + return result + except Exception as e: + self.failure_count += 1 + print(f"Export error: {e}") + return SpanExportResult.FAILURE + ``` + + + +## Getting Help + +### Collect Diagnostic Information + +When reporting issues, include: + +1. **Environment details:** + - Operating system and version + - Language runtime version + - Genkit version + - Cloud environment (local, Cloud Run, etc.) + +2. **Configuration:** + - Telemetry configuration + - Environment variables + - IAM roles and permissions + +3. **Error messages:** + - Complete error logs + - Stack traces + - Console output + +4. **Reproduction steps:** + - Minimal code example + - Steps to reproduce the issue + - Expected vs actual behavior + +### Useful Commands + +```bash +# Check Genkit version +npm list genkit # JavaScript +go list -m github.com/firebase/genkit/go # Go +pip show genkit # Python + +# Check Google Cloud SDK +gcloud version + +# Test authentication +gcloud auth list +gcloud config list + +# Check API access +gcloud services list --enabled +gcloud projects get-iam-policy PROJECT_ID + +# View logs +gcloud logging read "resource.type=global" --limit=50 +``` + +### Community Resources + +- [Genkit GitHub Issues](https://github.com/firebase/genkit/issues) +- [Firebase Community](https://firebase.google.com/community) +- [Stack Overflow](https://stackoverflow.com/questions/tagged/genkit) +- [Google Cloud Support](https://cloud.google.com/support) + +## Next Steps + +- Review the main [observability and monitoring guide](/unified-docs/observability-monitoring) +- Check [authentication and setup](/unified-docs/observability/authentication) requirements +- Explore [advanced configuration](/unified-docs/observability/advanced-configuration) options +- Learn about [deployment best practices](/unified-docs/deployment) diff --git a/src/content/docs/unified-docs/plugin-authoring/models.mdx b/src/content/docs/unified-docs/plugin-authoring/models.mdx new file mode 100644 index 00000000..fd0c0468 --- /dev/null +++ b/src/content/docs/unified-docs/plugin-authoring/models.mdx @@ -0,0 +1,1121 @@ +--- +title: Writing Model Plugins +description: Learn how to create Genkit model plugins across JavaScript, Go, and Python to integrate new generative AI models with comprehensive examples and best practices. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +Model plugins add generative AI models to the Genkit registry. A model represents any generative model capable of receiving a prompt as input and generating text, media, or data as output. This guide covers creating model plugins across all supported languages. + +## Model Plugin Architecture + +A model plugin consists of three main components: + +1. **Metadata**: Declares the model's capabilities (multiturn, media, tools, etc.) +2. **Configuration Schema**: Defines model-specific parameters and options +3. **Generation Function**: Transforms requests/responses between Genkit and the model API + +## Basic Model Plugin + + + + ```ts + import { Genkit, z } from 'genkit'; + import { GenkitPlugin, genkitPlugin } from 'genkit/plugin'; + import { GenerationCommonConfigSchema } from 'genkit/model'; + + // Define model configuration schema + const MyModelConfigSchema = GenerationCommonConfigSchema.extend({ + customParam: z.string().optional(), + temperature: z.number().min(0).max(2).default(0.7), + maxTokens: z.number().positive().default(1000), + }); + + type MyModelConfig = z.infer; + + interface MyPluginOptions { + apiKey?: string; + baseURL?: string; + } + + export function myModelPlugin(options?: MyPluginOptions): GenkitPlugin { + return genkitPlugin('myModel', async (ai: Genkit) => { + const apiKey = options?.apiKey || process.env.MY_MODEL_API_KEY; + const baseURL = options?.baseURL || 'https://api.mymodel.com'; + + if (!apiKey) { + throw new Error('API key required'); + } + + // Create API client + const client = new MyModelAPIClient({ apiKey, baseURL }); + + // Define the model + ai.defineModel({ + name: 'myModel/text-generator', + label: 'My Custom Text Generator', + versions: ['v1', 'latest'], + supports: { + multiturn: true, + media: false, + tools: true, + systemRole: true, + output: ['text', 'json'], + }, + configSchema: MyModelConfigSchema, + }, async (request) => { + // Transform Genkit request to API format + const apiRequest = await transformRequest(request, client); + + // Call the model API + const apiResponse = await client.generate(apiRequest); + + // Transform API response to Genkit format + return transformResponse(apiResponse); + }); + }); + } + + // Request transformation + async function transformRequest(request: any, client: any) { + const config = request.config as MyModelConfig; + + return { + messages: request.messages.map((msg: any) => ({ + role: msg.role, + content: msg.content.map((part: any) => part.text).join(''), + })), + temperature: config.temperature, + max_tokens: config.maxTokens, + custom_param: config.customParam, + }; + } + + // Response transformation + function transformResponse(apiResponse: any) { + return { + candidates: [{ + message: { + role: 'model', + content: [{ text: apiResponse.text }], + }, + finishReason: apiResponse.finish_reason || 'stop', + }], + usage: { + inputTokens: apiResponse.usage?.prompt_tokens || 0, + outputTokens: apiResponse.usage?.completion_tokens || 0, + totalTokens: apiResponse.usage?.total_tokens || 0, + }, + }; + } + ``` + + + ```go + package mymodelplugin + + import ( + "context" + "encoding/json" + "fmt" + "os" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + const ProviderID = "myModel" + + // MyModelConfig defines configuration options + type MyModelConfig struct { + ai.GenerationCommonConfig + CustomParam string `json:"customParam,omitempty"` + Temperature float64 `json:"temperature,omitempty"` + MaxTokens int `json:"maxTokens,omitempty"` + } + + // MyModelPlugin implements the genkit.Plugin interface + type MyModelPlugin struct { + APIKey string + BaseURL string + } + + func (p *MyModelPlugin) Name() string { + return ProviderID + } + + func (p *MyModelPlugin) Init(ctx context.Context, g *genkit.Genkit) error { + // Set defaults from environment + if p.APIKey == "" { + p.APIKey = os.Getenv("MY_MODEL_API_KEY") + } + if p.BaseURL == "" { + p.BaseURL = "https://api.mymodel.com" + } + + if p.APIKey == "" { + return fmt.Errorf("API key required") + } + + // Create API client + client := NewMyModelAPIClient(p.APIKey, p.BaseURL) + + // Define the model + err := g.DefineModel(ProviderID, "text-generator", + &ai.ModelInfo{ + Label: "My Custom Text Generator", + Supports: &ai.ModelSupports{ + Multiturn: true, + Media: false, + Tools: true, + SystemRole: true, + }, + Versions: []string{"v1", "latest"}, + }, + func(ctx context.Context, req *ai.ModelRequest, cb ai.ModelStreamCallback) (*ai.ModelResponse, error) { + // Parse configuration + var config MyModelConfig + if req.Config != nil { + if typedConfig, ok := req.Config.(*MyModelConfig); ok { + config = *typedConfig + } + } + + // Set defaults + if config.Temperature == 0 { + config.Temperature = 0.7 + } + if config.MaxTokens == 0 { + config.MaxTokens = 1000 + } + + // Transform request + apiRequest, err := transformRequest(req, config) + if err != nil { + return nil, fmt.Errorf("failed to transform request: %w", err) + } + + // Call API + apiResponse, err := client.Generate(ctx, apiRequest) + if err != nil { + return nil, fmt.Errorf("API call failed: %w", err) + } + + // Transform response + return transformResponse(apiResponse) + }, + ) + + return err + } + + // API client interface + type MyModelAPIClient struct { + APIKey string + BaseURL string + } + + func NewMyModelAPIClient(apiKey, baseURL string) *MyModelAPIClient { + return &MyModelAPIClient{ + APIKey: apiKey, + BaseURL: baseURL, + } + } + + func (c *MyModelAPIClient) Generate(ctx context.Context, req *APIRequest) (*APIResponse, error) { + // Implementation of API call + // This would make actual HTTP requests to your model API + return &APIResponse{ + Text: "Generated response", + FinishReason: "stop", + Usage: &Usage{ + PromptTokens: 10, + CompletionTokens: 20, + TotalTokens: 30, + }, + }, nil + } + + // API request/response types + type APIRequest struct { + Messages []Message `json:"messages"` + Temperature float64 `json:"temperature"` + MaxTokens int `json:"max_tokens"` + CustomParam string `json:"custom_param,omitempty"` + } + + type APIResponse struct { + Text string `json:"text"` + FinishReason string `json:"finish_reason"` + Usage *Usage `json:"usage,omitempty"` + } + + type Message struct { + Role string `json:"role"` + Content string `json:"content"` + } + + type Usage struct { + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` + } + + // Transform Genkit request to API format + func transformRequest(req *ai.ModelRequest, config MyModelConfig) (*APIRequest, error) { + var messages []Message + + for _, msg := range req.Messages { + content := "" + for _, part := range msg.Content { + if part.Text != "" { + content += part.Text + } + } + + messages = append(messages, Message{ + Role: string(msg.Role), + Content: content, + }) + } + + return &APIRequest{ + Messages: messages, + Temperature: config.Temperature, + MaxTokens: config.MaxTokens, + CustomParam: config.CustomParam, + }, nil + } + + // Transform API response to Genkit format + func transformResponse(apiResp *APIResponse) (*ai.ModelResponse, error) { + finishReason := ai.FinishReasonStop + if apiResp.FinishReason == "length" { + finishReason = ai.FinishReasonLength + } + + response := &ai.ModelResponse{ + Candidates: []*ai.Candidate{ + { + Message: &ai.Message{ + Content: []*ai.Part{ai.NewTextPart(apiResp.Text)}, + Role: ai.RoleModel, + }, + FinishReason: finishReason, + }, + }, + } + + if apiResp.Usage != nil { + response.Usage = &ai.Usage{ + InputTokens: apiResp.Usage.PromptTokens, + OutputTokens: apiResp.Usage.CompletionTokens, + TotalTokens: apiResp.Usage.TotalTokens, + } + } + + return response, nil + } + + // Helper functions for users + func Model(g *genkit.Genkit, name string) *ai.Model { + return genkit.LookupModel(g, ProviderID, name) + } + + func ModelRef(name string, config *MyModelConfig) *ai.ModelRef { + return ai.NewModelRef(fmt.Sprintf("%s/%s", ProviderID, name), config) + } + ``` + + + ```python + import os + import asyncio + from typing import Dict, Any, List, Optional, AsyncGenerator + from dataclasses import dataclass + from genkit.ai import Genkit + from genkit.plugins.base import Plugin + + @dataclass + class MyModelConfig: + """Configuration for MyModel""" + custom_param: Optional[str] = None + temperature: float = 0.7 + max_tokens: int = 1000 + top_p: float = 1.0 + frequency_penalty: float = 0.0 + + class MyModelPlugin(Plugin): + def __init__( + self, + api_key: Optional[str] = None, + base_url: Optional[str] = None, + **kwargs + ): + self.api_key = api_key or os.getenv("MY_MODEL_API_KEY") + self.base_url = base_url or "https://api.mymodel.com" + + if not self.api_key: + raise ValueError("API key required") + + super().__init__(provider_id="myModel", **kwargs) + + # Create API client + self.client = MyModelAPIClient(self.api_key, self.base_url) + + def initialize(self, ai: Genkit) -> None: + """Initialize the plugin and register models""" + + ai.define_model( + name=f"{self.provider_id}/text-generator", + config_schema={ + "type": "object", + "properties": { + "custom_param": {"type": "string"}, + "temperature": {"type": "number", "minimum": 0, "maximum": 2, "default": 0.7}, + "max_tokens": {"type": "integer", "minimum": 1, "default": 1000}, + "top_p": {"type": "number", "minimum": 0, "maximum": 1, "default": 1.0}, + "frequency_penalty": {"type": "number", "minimum": -2, "maximum": 2, "default": 0.0}, + }, + }, + supports={ + "multiturn": True, + "media": False, + "tools": True, + "system_role": True, + "output": ["text", "json"], + }, + generate_fn=self._generate_text, + stream_fn=self._stream_text, + ) + + async def _generate_text(self, request: Dict[str, Any]) -> Dict[str, Any]: + """Generate text using the model""" + + # Parse configuration + config = MyModelConfig(**request.get("config", {})) + + # Transform request + api_request = self._transform_request(request, config) + + # Call API + api_response = await self.client.generate(api_request) + + # Transform response + return self._transform_response(api_response) + + async def _stream_text(self, request: Dict[str, Any]) -> AsyncGenerator[Dict[str, Any], None]: + """Stream text generation""" + + config = MyModelConfig(**request.get("config", {})) + api_request = self._transform_request(request, config) + + async for chunk in self.client.stream_generate(api_request): + yield self._transform_stream_chunk(chunk) + + def _transform_request(self, request: Dict[str, Any], config: MyModelConfig) -> Dict[str, Any]: + """Transform Genkit request to API format""" + + messages = [] + for msg in request.get("messages", []): + content = "" + for part in msg.get("content", []): + if "text" in part: + content += part["text"] + + messages.append({ + "role": msg["role"], + "content": content, + }) + + return { + "messages": messages, + "temperature": config.temperature, + "max_tokens": config.max_tokens, + "top_p": config.top_p, + "frequency_penalty": config.frequency_penalty, + "custom_param": config.custom_param, + } + + def _transform_response(self, api_response: Dict[str, Any]) -> Dict[str, Any]: + """Transform API response to Genkit format""" + + return { + "candidates": [{ + "message": { + "role": "model", + "content": [{"text": api_response["text"]}], + }, + "finish_reason": api_response.get("finish_reason", "stop"), + }], + "usage": { + "input_tokens": api_response.get("usage", {}).get("prompt_tokens", 0), + "output_tokens": api_response.get("usage", {}).get("completion_tokens", 0), + "total_tokens": api_response.get("usage", {}).get("total_tokens", 0), + }, + } + + def _transform_stream_chunk(self, chunk: Dict[str, Any]) -> Dict[str, Any]: + """Transform streaming chunk to Genkit format""" + + return { + "candidates": [{ + "message": { + "role": "model", + "content": [{"text": chunk.get("delta", "")}], + }, + "finish_reason": chunk.get("finish_reason"), + }], + } + + class MyModelAPIClient: + """API client for MyModel service""" + + def __init__(self, api_key: str, base_url: str): + self.api_key = api_key + self.base_url = base_url + + async def generate(self, request: Dict[str, Any]) -> Dict[str, Any]: + """Generate text (non-streaming)""" + # Implementation would make actual HTTP request + # This is a mock response + return { + "text": "Generated response text", + "finish_reason": "stop", + "usage": { + "prompt_tokens": 10, + "completion_tokens": 20, + "total_tokens": 30, + }, + } + + async def stream_generate(self, request: Dict[str, Any]) -> AsyncGenerator[Dict[str, Any], None]: + """Generate text (streaming)""" + # Implementation would make actual streaming HTTP request + # This is a mock streaming response + words = ["Generated", " response", " text"] + for word in words: + yield { + "delta": word, + "finish_reason": None, + } + + yield { + "delta": "", + "finish_reason": "stop", + } + + # Helper functions for users + def create_model_reference(name: str, config: Optional[MyModelConfig] = None) -> str: + """Create a model reference for use in generate calls""" + return f"myModel/{name}" + ``` + + + +## Advanced Model Features + +### Supporting Tool Calling + + + + ```ts + // In your model definition + ai.defineModel({ + name: 'myModel/tool-capable', + supports: { + tools: true, // Enable tool calling support + // ... other capabilities + }, + configSchema: MyModelConfigSchema, + }, async (request) => { + const config = request.config as MyModelConfig; + + // Check if tools are provided + const tools = request.tools || []; + + const apiRequest = { + messages: transformMessages(request.messages), + tools: tools.map(tool => ({ + type: 'function', + function: { + name: tool.name, + description: tool.description, + parameters: tool.inputSchema, + }, + })), + tool_choice: request.toolChoice || 'auto', + temperature: config.temperature, + }; + + const apiResponse = await client.generate(apiRequest); + + return { + candidates: [{ + message: { + role: 'model', + content: apiResponse.content ? [{ text: apiResponse.content }] : [], + toolCalls: apiResponse.tool_calls?.map(call => ({ + id: call.id, + name: call.function.name, + args: JSON.parse(call.function.arguments), + })) || [], + }, + finishReason: apiResponse.finish_reason || 'stop', + }], + usage: apiResponse.usage, + }; + }); + ``` + + + ```go + // In your model generation function + func(ctx context.Context, req *ai.ModelRequest, cb ai.ModelStreamCallback) (*ai.ModelResponse, error) { + // Handle tools if provided + var apiTools []APITool + for _, tool := range req.Tools { + apiTools = append(apiTools, APITool{ + Type: "function", + Function: APIFunction{ + Name: tool.Name, + Description: tool.Description, + Parameters: tool.InputSchema, + }, + }) + } + + apiRequest := &APIRequest{ + Messages: transformMessages(req.Messages), + Tools: apiTools, + ToolChoice: req.ToolChoice, + Temperature: config.Temperature, + } + + apiResponse, err := client.Generate(ctx, apiRequest) + if err != nil { + return nil, err + } + + // Transform tool calls + var toolCalls []*ai.ToolCall + for _, call := range apiResponse.ToolCalls { + var args map[string]interface{} + if err := json.Unmarshal([]byte(call.Function.Arguments), &args); err != nil { + return nil, fmt.Errorf("failed to parse tool arguments: %w", err) + } + + toolCalls = append(toolCalls, &ai.ToolCall{ + ID: call.ID, + Name: call.Function.Name, + Args: args, + }) + } + + return &ai.ModelResponse{ + Candidates: []*ai.Candidate{ + { + Message: &ai.Message{ + Content: transformContent(apiResponse.Content), + Role: ai.RoleModel, + ToolCalls: toolCalls, + }, + FinishReason: transformFinishReason(apiResponse.FinishReason), + }, + }, + Usage: transformUsage(apiResponse.Usage), + }, nil + } + ``` + + + ```python + async def _generate_text(self, request: Dict[str, Any]) -> Dict[str, Any]: + """Generate text with tool calling support""" + + config = MyModelConfig(**request.get("config", {})) + + # Handle tools + tools = [] + for tool in request.get("tools", []): + tools.append({ + "type": "function", + "function": { + "name": tool["name"], + "description": tool["description"], + "parameters": tool["input_schema"], + }, + }) + + api_request = { + "messages": self._transform_messages(request.get("messages", [])), + "tools": tools, + "tool_choice": request.get("tool_choice", "auto"), + "temperature": config.temperature, + "max_tokens": config.max_tokens, + } + + api_response = await self.client.generate(api_request) + + # Transform tool calls + tool_calls = [] + for call in api_response.get("tool_calls", []): + tool_calls.append({ + "id": call["id"], + "name": call["function"]["name"], + "args": json.loads(call["function"]["arguments"]), + }) + + return { + "candidates": [{ + "message": { + "role": "model", + "content": [{"text": api_response.get("content", "")}] if api_response.get("content") else [], + "tool_calls": tool_calls, + }, + "finish_reason": api_response.get("finish_reason", "stop"), + }], + "usage": api_response.get("usage", {}), + } + ``` + + + +### Supporting Media Input + + + + ```ts + ai.defineModel({ + name: 'myModel/vision-model', + supports: { + media: true, // Enable media support + // ... other capabilities + }, + configSchema: MyModelConfigSchema, + }, async (request) => { + const messages = request.messages.map(msg => ({ + role: msg.role, + content: msg.content.map(part => { + if (part.text) { + return { type: 'text', text: part.text }; + } else if (part.media) { + return { + type: 'image_url', + image_url: { + url: part.media.url, + detail: 'auto', + }, + }; + } + return null; + }).filter(Boolean), + })); + + const apiResponse = await client.generate({ + messages, + temperature: request.config.temperature, + }); + + return transformResponse(apiResponse); + }); + ``` + + + ```go + // In your request transformation function + func transformMessages(messages []*ai.Message) []APIMessage { + var apiMessages []APIMessage + + for _, msg := range messages { + var content []APIContent + + for _, part := range msg.Content { + if part.Text != "" { + content = append(content, APIContent{ + Type: "text", + Text: part.Text, + }) + } else if part.Media != nil { + content = append(content, APIContent{ + Type: "image_url", + ImageURL: &APIImageURL{ + URL: part.Media.URL, + Detail: "auto", + }, + }) + } + } + + apiMessages = append(apiMessages, APIMessage{ + Role: string(msg.Role), + Content: content, + }) + } + + return apiMessages + } + + type APIContent struct { + Type string `json:"type"` + Text string `json:"text,omitempty"` + ImageURL *APIImageURL `json:"image_url,omitempty"` + } + + type APIImageURL struct { + URL string `json:"url"` + Detail string `json:"detail"` + } + ``` + + + ```python + def _transform_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Transform messages with media support""" + + api_messages = [] + for msg in messages: + content = [] + + for part in msg.get("content", []): + if "text" in part: + content.append({ + "type": "text", + "text": part["text"], + }) + elif "media" in part: + content.append({ + "type": "image_url", + "image_url": { + "url": part["media"]["url"], + "detail": "auto", + }, + }) + + api_messages.append({ + "role": msg["role"], + "content": content, + }) + + return api_messages + ``` + + + +## Best Practices + +### Error Handling + + + + ```ts + import { GenkitError } from 'genkit'; + + // In your generation function + try { + const apiResponse = await client.generate(apiRequest); + return transformResponse(apiResponse); + } catch (error) { + if (error.status === 429) { + throw new GenkitError({ + source: 'myModel', + status: 'RESOURCE_EXHAUSTED', + message: 'Rate limit exceeded', + }); + } else if (error.status === 401) { + throw new GenkitError({ + source: 'myModel', + status: 'UNAUTHENTICATED', + message: 'Invalid API key', + }); + } else { + throw new GenkitError({ + source: 'myModel', + status: 'INTERNAL', + message: `Model API error: ${error.message}`, + }); + } + } + ``` + + + ```go + import "google.golang.org/grpc/codes" + + // In your generation function + apiResponse, err := client.Generate(ctx, apiRequest) + if err != nil { + // Handle specific error types + if isRateLimitError(err) { + return nil, fmt.Errorf("rate limit exceeded: %w", err) + } else if isAuthError(err) { + return nil, fmt.Errorf("authentication failed: %w", err) + } else { + return nil, fmt.Errorf("model API error: %w", err) + } + } + + func isRateLimitError(err error) bool { + // Check if error indicates rate limiting + return strings.Contains(err.Error(), "rate limit") || + strings.Contains(err.Error(), "429") + } + + func isAuthError(err error) bool { + // Check if error indicates authentication failure + return strings.Contains(err.Error(), "unauthorized") || + strings.Contains(err.Error(), "401") + } + ``` + + + ```python + import httpx + from genkit.exceptions import GenkitError + + async def _generate_text(self, request: Dict[str, Any]) -> Dict[str, Any]: + try: + api_response = await self.client.generate(api_request) + return self._transform_response(api_response) + except httpx.HTTPStatusError as e: + if e.response.status_code == 429: + raise GenkitError( + source="myModel", + status="RESOURCE_EXHAUSTED", + message="Rate limit exceeded" + ) + elif e.response.status_code == 401: + raise GenkitError( + source="myModel", + status="UNAUTHENTICATED", + message="Invalid API key" + ) + else: + raise GenkitError( + source="myModel", + status="INTERNAL", + message=f"Model API error: {e.response.text}" + ) + except Exception as e: + raise GenkitError( + source="myModel", + status="INTERNAL", + message=f"Unexpected error: {str(e)}" + ) + ``` + + + +### Configuration Validation + + + + ```ts + const MyModelConfigSchema = GenerationCommonConfigSchema.extend({ + temperature: z.number().min(0).max(2).default(0.7), + maxTokens: z.number().positive().max(4096).default(1000), + topP: z.number().min(0).max(1).default(1.0), + customParam: z.string().optional(), + }); + + // In your generation function + const config = MyModelConfigSchema.parse(request.config || {}); + ``` + + + ```go + func validateConfig(config *MyModelConfig) error { + if config.Temperature < 0 || config.Temperature > 2 { + return fmt.Errorf("temperature must be between 0 and 2") + } + if config.MaxTokens <= 0 || config.MaxTokens > 4096 { + return fmt.Errorf("maxTokens must be between 1 and 4096") + } + return nil + } + + // In your generation function + if err := validateConfig(&config); err != nil { + return nil, fmt.Errorf("invalid configuration: %w", err) + } + ``` + + + ```python + from pydantic import BaseModel, Field, validator + + class MyModelConfig(BaseModel): + temperature: float = Field(default=0.7, ge=0, le=2) + max_tokens: int = Field(default=1000, gt=0, le=4096) + top_p: float = Field(default=1.0, ge=0, le=1) + + @validator('temperature') + def validate_temperature(cls, v): + if not 0 <= v <= 2: + raise ValueError('temperature must be between 0 and 2') + return v + ``` + + + +## Testing Your Model Plugin + +### Unit Testing + + + + ```ts + import { describe, it, expect, beforeEach } from 'vitest'; + import { genkit } from 'genkit'; + import { myModelPlugin } from './my-model-plugin'; + + describe('MyModel Plugin', () => { + let ai: any; + + beforeEach(async () => { + ai = genkit({ + plugins: [myModelPlugin({ apiKey: 'test-key' })], + }); + }); + + it('should generate text', async () => { + const response = await ai.generate({ + model: 'myModel/text-generator', + prompt: 'Hello, world!', + config: { temperature: 0.5 }, + }); + + expect(response.text).toBeDefined(); + expect(response.text.length).toBeGreaterThan(0); + }); + + it('should handle tool calls', async () => { + const response = await ai.generate({ + model: 'myModel/tool-capable', + prompt: 'What is the weather like?', + tools: [{ + name: 'get_weather', + description: 'Get current weather', + inputSchema: { + type: 'object', + properties: { + location: { type: 'string' }, + }, + }, + }], + }); + + expect(response.toolCalls).toBeDefined(); + }); + }); + ``` + + + ```go + package mymodelplugin_test + + import ( + "context" + "testing" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + ) + + func TestMyModelPlugin(t *testing.T) { + ctx := context.Background() + + // Initialize Genkit with the plugin + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &MyModelPlugin{ + APIKey: "test-key", + }, + ), + ) + require.NoError(t, err) + + t.Run("should generate text", func(t *testing.T) { + model := genkit.LookupModel(g, "myModel", "text-generator") + require.NotNil(t, model) + + req := &ai.ModelRequest{ + Messages: []*ai.Message{ + { + Content: []*ai.Part{ai.NewTextPart("Hello, world!")}, + Role: ai.RoleUser, + }, + }, + Config: &MyModelConfig{ + Temperature: 0.5, + }, + } + + resp, err := model.Generate(ctx, req, nil) + require.NoError(t, err) + assert.NotEmpty(t, resp.Candidates) + assert.NotEmpty(t, resp.Candidates[0].Message.Content) + }) + } + ``` + + + ```python + import pytest + import asyncio + from genkit.ai import Genkit + from my_model_plugin import MyModelPlugin + + @pytest.fixture + async def ai(): + """Create a Genkit instance with the plugin for testing""" + return Genkit( + plugins=[ + MyModelPlugin(api_key="test-key"), + ], + ) + + @pytest.mark.asyncio + async def test_generate_text(ai): + """Test basic text generation""" + response = await ai.generate( + model="myModel/text-generator", + prompt="Hello, world!", + config={"temperature": 0.5}, + ) + + assert response["text"] + assert len(response["text"]) > 0 + + @pytest.mark.asyncio + async def test_tool_calling(ai): + """Test tool calling functionality""" + response = await ai.generate( + model="myModel/tool-capable", + prompt="What is the weather like?", + tools=[{ + "name": "get_weather", + "description": "Get current weather", + "input_schema": { + "type": "object", + "properties": { + "location": {"type": "string"}, + }, + }, + }], + ) + + assert "tool_calls" in response + ``` + + + +## Next Steps + +- Learn about writing embedder plugins for text embedding models +- Explore writing retriever plugins for custom data sources +- See telemetry plugins for monitoring and observability +- Check out the [plugin authoring overview](/unified-docs/plugin-authoring/overview) for general plugin concepts diff --git a/src/content/docs/unified-docs/plugin-authoring/overview.mdx b/src/content/docs/unified-docs/plugin-authoring/overview.mdx new file mode 100644 index 00000000..8eef22d7 --- /dev/null +++ b/src/content/docs/unified-docs/plugin-authoring/overview.mdx @@ -0,0 +1,468 @@ +--- +title: Writing Genkit Plugins +description: Learn how to extend Genkit's capabilities by writing custom plugins across JavaScript, Go, and Python, covering plugin creation, models, retrievers, and publishing. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +Genkit's capabilities are designed to be extended by plugins. Genkit plugins are configurable modules that can provide models, retrievers, indexers, trace stores, and more. You've already seen plugins in action just by using Genkit - every AI provider, vector database, and framework integration is implemented as a plugin. + +## Plugin Architecture + +Plugins in Genkit follow a consistent architecture across all languages, providing a way to: + +- **Register new actions**: Models, embedders, retrievers, indexers, and tools +- **Configure services**: API keys, endpoints, and service-specific settings +- **Extend functionality**: Add new capabilities to the Genkit ecosystem +- **Maintain consistency**: Follow established patterns for reliability and discoverability + + + + In JavaScript, plugins are created using the `genkitPlugin` helper: + + ```ts + import { genkit } from 'genkit'; + import { vertexAI } from '@genkit-ai/vertexai'; + + const ai = genkit({ + plugins: [vertexAI({ projectId: 'my-project' })], + }); + ``` + + The Vertex AI plugin takes configuration and registers models, embedders, and more with the Genkit registry, which powers the local UI and serves as a lookup service for named actions at runtime. + + + In Go, plugins implement the `genkit.Plugin` interface: + + ```go + import ( + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &googlegenai.GoogleAI{APIKey: "..."}, + &googlegenai.VertexAI{ + ProjectID: "my-project", + Location: "us-central1", + }, + ), + ) + ``` + + Plugins register resources with unique identifiers to prevent naming conflicts with other plugins. + + + In Python, plugins are classes that extend the base plugin functionality: + + ```python + from genkit.ai import Genkit + from genkit.plugins.vertexai import VertexAI + + ai = Genkit( + plugins=[ + VertexAI(project_id="my-project", location="us-central1"), + ], + ) + ``` + + Python plugins follow similar patterns to JavaScript and Go, providing consistent APIs across languages. + + + +## Creating a Plugin + +### Project Setup + + + + Create a new NPM package for your plugin: + + ```bash + mkdir genkitx-my-plugin + cd genkitx-my-plugin + npm init -y + npm install genkit + npm install --save-dev typescript + npx tsc --init + ``` + + Define and export your plugin using the `genkitPlugin` helper: + + ```ts + import { Genkit, z } from 'genkit'; + import { GenkitPlugin, genkitPlugin } from 'genkit/plugin'; + + interface MyPluginOptions { + apiKey?: string; + // Add any plugin configuration here + } + + export function myPlugin(options?: MyPluginOptions): GenkitPlugin { + return genkitPlugin( + 'myPlugin', + // Initializer function (required) + async (ai: Genkit) => { + const apiKey = options?.apiKey || process.env.MY_PLUGIN_API_KEY; + if (!apiKey) { + throw new Error('API key required'); + } + + // Register actions that are always available + ai.defineModel({ + name: 'myPlugin/my-model', + // ... model definition + }); + + ai.defineEmbedder({ + name: 'myPlugin/my-embedder', + // ... embedder definition + }); + }, + // Dynamic Action Resolver (optional) + async (ai: Genkit, actionType, actionName) => { + // Define actions on-demand + if (actionType === 'model' && actionName === 'dynamic-model') { + ai.defineModel({ + name: `myPlugin/${actionName}`, + // ... dynamic model definition + }); + } + }, + // List Actions function (optional) + async () => { + // Return metadata for all potential actions + const availableModels = await fetchAvailableModels(); + return availableModels.map(model => ({ + type: 'model', + name: `myPlugin/${model.id}`, + // ... other metadata + })); + } + ); + } + ``` + + + Create a Go package that implements the `genkit.Plugin` interface: + + ```go + package myplugin + + import ( + "context" + "os" + "github.com/firebase/genkit/go/genkit" + ) + + const ProviderID = "myplugin" + + // Plugin configuration struct + type MyPlugin struct { + APIKey string + // Other configuration options + } + + // Name returns the provider ID + func (p *MyPlugin) Name() string { + return ProviderID + } + + // Init initializes the plugin + func (p *MyPlugin) Init(ctx context.Context, g *genkit.Genkit) error { + // Set default values and validate configuration + if p.APIKey == "" { + p.APIKey = os.Getenv("MY_PLUGIN_API_KEY") + } + if p.APIKey == "" { + return fmt.Errorf("API key required") + } + + // Register models, embedders, etc. + err := g.DefineModel(ProviderID, "my-model", &ModelConfig{ + // ... model configuration + }, func(ctx context.Context, req *ai.GenerateRequest) (*ai.GenerateResponse, error) { + // ... model implementation + }) + if err != nil { + return fmt.Errorf("failed to define model: %w", err) + } + + return nil + } + ``` + + Usage: + + ```go + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &myplugin.MyPlugin{ + APIKey: "your-api-key", + }, + ), + ) + ``` + + + Create a Python package with a plugin class: + + ```python + import os + from typing import Optional, Dict, Any + from genkit.ai import Genkit + from genkit.plugins.base import Plugin + + class MyPlugin(Plugin): + def __init__(self, api_key: Optional[str] = None, **kwargs): + self.api_key = api_key or os.getenv("MY_PLUGIN_API_KEY") + if not self.api_key: + raise ValueError("API key required") + + super().__init__(provider_id="myplugin", **kwargs) + + def initialize(self, ai: Genkit) -> None: + """Initialize the plugin and register actions""" + + # Register a model + ai.define_model( + name=f"{self.provider_id}/my-model", + config_schema={ + "temperature": {"type": "number", "default": 0.7}, + "max_tokens": {"type": "integer", "default": 1000}, + }, + generate_fn=self._generate_text, + ) + + # Register an embedder + ai.define_embedder( + name=f"{self.provider_id}/my-embedder", + embed_fn=self._embed_text, + ) + + async def _generate_text(self, request: Dict[str, Any]) -> Dict[str, Any]: + """Generate text using the model""" + # Implementation here + pass + + async def _embed_text(self, text: str) -> List[float]: + """Generate embeddings for text""" + # Implementation here + pass + ``` + + Usage: + + ```python + from genkit.ai import Genkit + from my_plugin import MyPlugin + + ai = Genkit( + plugins=[ + MyPlugin(api_key="your-api-key"), + ], + ) + ``` + + + +## Plugin Configuration Best Practices + +### Secure Configuration + +For any plugin options that require secret values (API keys, tokens), provide both configuration options and environment variable defaults: + + + + ```ts + interface MyPluginOptions { + apiKey?: string; + endpoint?: string; + timeout?: number; + } + + export function myPlugin(options?: MyPluginOptions): GenkitPlugin { + return genkitPlugin('myPlugin', async (ai: Genkit) => { + // Prioritize explicit options, fall back to environment variables + const apiKey = options?.apiKey || process.env.MY_PLUGIN_API_KEY; + const endpoint = options?.endpoint || process.env.MY_PLUGIN_ENDPOINT || 'https://api.default.com'; + + if (!apiKey) { + throw new GenkitError({ + source: 'my-plugin', + status: 'INVALID_ARGUMENT', + message: 'Must supply either `options.apiKey` or set `MY_PLUGIN_API_KEY` environment variable.', + }); + } + + // Use configuration to set up the plugin + const client = new MyAPIClient({ apiKey, endpoint }); + + // Register actions using the configured client + ai.defineModel(/* ... */); + }); + } + ``` + + + ```go + type MyPlugin struct { + APIKey string + Endpoint string + Timeout time.Duration + } + + func (p *MyPlugin) Init(ctx context.Context, g *genkit.Genkit) error { + // Set defaults from environment variables + if p.APIKey == "" { + p.APIKey = os.Getenv("MY_PLUGIN_API_KEY") + } + if p.Endpoint == "" { + p.Endpoint = os.Getenv("MY_PLUGIN_ENDPOINT") + if p.Endpoint == "" { + p.Endpoint = "https://api.default.com" + } + } + if p.Timeout == 0 { + p.Timeout = 30 * time.Second + } + + if p.APIKey == "" { + return fmt.Errorf("API key required: set APIKey field or MY_PLUGIN_API_KEY environment variable") + } + + // Create client with configuration + client := NewMyAPIClient(p.APIKey, p.Endpoint, p.Timeout) + + // Register actions + return g.DefineModel(/* ... */) + } + ``` + + + ```python + class MyPlugin(Plugin): + def __init__( + self, + api_key: Optional[str] = None, + endpoint: Optional[str] = None, + timeout: int = 30, + **kwargs + ): + # Prioritize explicit options, fall back to environment variables + self.api_key = api_key or os.getenv("MY_PLUGIN_API_KEY") + self.endpoint = endpoint or os.getenv("MY_PLUGIN_ENDPOINT", "https://api.default.com") + self.timeout = timeout + + if not self.api_key: + raise ValueError( + "API key required: provide api_key parameter or set MY_PLUGIN_API_KEY environment variable" + ) + + super().__init__(provider_id="myplugin", **kwargs) + + # Create client with configuration + self.client = MyAPIClient( + api_key=self.api_key, + endpoint=self.endpoint, + timeout=self.timeout + ) + ``` + + + +## Plugin Types + +Genkit supports several types of plugins, each serving different purposes: + +### Model Plugins +Provide generative AI models that can receive prompts and generate text, media, or data. + +### Embedder Plugins +Provide text embedding models that convert text into vector representations. + +### Retriever Plugins +Provide document retrieval capabilities for RAG (Retrieval-Augmented Generation) systems. + +### Indexer Plugins +Provide document indexing capabilities for storing and organizing documents. + +### Tool Plugins +Provide function calling capabilities that models can use to interact with external systems. + +### Telemetry Plugins +Configure observability and monitoring for Genkit applications. + +## Publishing Plugins + +### Package Naming and Keywords + + + + Use the `genkitx-{name}` naming convention and include relevant keywords in your `package.json`: + + ```json + { + "name": "genkitx-my-plugin", + "keywords": [ + "genkit-plugin", + "genkit-model", + "genkit-embedder", + "genkit-retriever" + ], + "description": "My custom Genkit plugin", + "main": "dist/index.js", + "types": "dist/index.d.ts" + } + ``` + + Available keywords: + - `genkit-plugin`: Always include this + - `genkit-model`: If your plugin defines models + - `genkit-embedder`: If your plugin defines embedders + - `genkit-retriever`: If your plugin defines retrievers + - `genkit-indexer`: If your plugin defines indexers + - `genkit-telemetry`: If your plugin provides telemetry + - `genkit-deploy`: If your plugin includes deployment helpers + - `genkit-flow`: If your plugin enhances flows + + + Use descriptive package names that include "genkit" for discoverability: + + ``` + github.com/yourorg/genkit-plugins/servicename + github.com/yourorg/your-repo/genkit/servicename + ``` + + Include a comprehensive README.md with: + - Installation instructions + - Configuration options + - Usage examples + - API documentation + + + Use the `genkit-{name}` naming convention and include relevant classifiers in your `setup.py` or `pyproject.toml`: + + ```toml + [project] + name = "genkit-my-plugin" + description = "My custom Genkit plugin" + keywords = ["genkit", "plugin", "ai", "model"] + classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + ] + ``` + + + +## Next Steps + +- Learn about [writing model plugins](/unified-docs/plugin-authoring/models) to add new AI models +- Explore writing retriever plugins for custom data sources +- See writing embedder plugins for custom embedding models +- Check out telemetry plugins for monitoring and observability diff --git a/src/content/docs/unified-docs/plugins/anthropic.mdx b/src/content/docs/unified-docs/plugins/anthropic.mdx new file mode 100644 index 00000000..ad4f1401 --- /dev/null +++ b/src/content/docs/unified-docs/plugins/anthropic.mdx @@ -0,0 +1,818 @@ +--- +title: Anthropic (Claude) Plugin +description: Learn how to use Anthropic's Claude models with Genkit across JavaScript, Go, and Python for advanced reasoning, analysis, and conversational AI. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +Anthropic's Claude models are known for their advanced reasoning capabilities, safety features, and nuanced understanding of complex topics. Claude excels at analysis, writing, math, coding, and thoughtful conversation while maintaining helpful, harmless, and honest interactions. + +## Installation and Setup + + + + Claude models are available in JavaScript through Vertex AI Model Garden. You'll need access to Claude models in your Google Cloud project. + + Install the Vertex AI plugin: + + ```bash + npm install @genkit-ai/vertexai + ``` + + Configure the plugin with Claude models: + + ```ts + import { genkit } from 'genkit'; + import { vertexAI } from '@genkit-ai/vertexai'; + + const ai = genkit({ + plugins: [ + vertexAI({ + projectId: 'your-project-id', + location: 'us-central1', + models: ['claude-3-haiku', 'claude-3-sonnet', 'claude-3-opus'], + }), + ], + }); + ``` + + ### Prerequisites + + 1. **Google Cloud Project**: Set up a Google Cloud project with Vertex AI enabled + 2. **Claude Model Access**: Request access to Claude models in [Vertex AI Model Garden](https://console.cloud.google.com/vertex-ai/publishers/anthropic/model-garden) + 3. **Authentication**: Configure Google Cloud authentication + + ```bash + # Set up authentication + gcloud auth application-default login + export GOOGLE_CLOUD_PROJECT=your-project-id + ``` + + ### Available Models via Vertex AI + + - **claude-3-haiku**: Fast and efficient for simple tasks + - **claude-3-sonnet**: Balanced performance and capability + - **claude-3-opus**: Most capable for complex reasoning + + + Claude models are available in Go through the OpenAI-compatible Anthropic plugin. + + Install the required packages: + + ```bash + go get github.com/firebase/genkit/go/plugins/compat_oai/anthropic + go get github.com/openai/openai-go/option + ``` + + Configure the Anthropic plugin: + + ```go + package main + + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/compat_oai/anthropic" + "github.com/openai/openai-go/option" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&anthropic.Anthropic{ + Opts: []option.RequestOption{ + option.WithAPIKey(os.Getenv("ANTHROPIC_API_KEY")), + }, + }), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + ### API Key Configuration + + ```bash + export ANTHROPIC_API_KEY=your_anthropic_api_key + ``` + + Get your API key from [Anthropic Console](https://console.anthropic.com/). + + ### Available Models + + - **claude-3-7-sonnet-20250219**: Latest Claude 3.7 Sonnet with advanced capabilities + - **claude-3-5-haiku-20241022**: Fast and efficient Claude 3.5 Haiku + - **claude-3-5-sonnet-20240620**: Balanced Claude 3.5 Sonnet + - **claude-3-opus-20240229**: Most capable Claude 3 model + - **claude-3-haiku-20240307**: Fastest Claude 3 model + + + Claude models are currently not directly supported in Python Genkit. However, you can access Claude through: + + 1. **Vertex AI Model Garden** (if available in your region) + 2. **Custom OpenAI-compatible wrapper** using the Anthropic API + + For Vertex AI access (if available): + + ```python + from genkit.ai import Genkit + from genkit.plugins.vertex_ai import VertexAI + + ai = Genkit( + plugins=[ + VertexAI( + project_id="your-project-id", + location="us-central1", + models=["claude-3-haiku", "claude-3-sonnet", "claude-3-opus"], + ), + ], + ) + ``` + + For direct Anthropic API access, you would need to create a custom plugin or use the Anthropic Python SDK directly alongside Genkit. + + ### Environment Configuration + + ```bash + export GOOGLE_CLOUD_PROJECT=your-project-id + # or + export ANTHROPIC_API_KEY=your_anthropic_api_key + ``` + + + +## Basic Usage + +### Text Generation + + + + Use Claude models for text generation through Vertex AI: + + ```ts + import { genkit, z } from 'genkit'; + import { vertexAI } from '@genkit-ai/vertexai'; + + const ai = genkit({ + plugins: [ + vertexAI({ + projectId: 'your-project-id', + location: 'us-central1', + models: ['claude-3-sonnet'], + }), + ], + }); + + // Basic text generation + const response = await ai.generate({ + model: 'claude-3-sonnet', + prompt: 'Explain the concept of quantum entanglement in simple terms.', + }); + + console.log(response.text); + + // Flow with Claude + export const claudeAnalysisFlow = ai.defineFlow( + { + name: 'claudeAnalysisFlow', + inputSchema: z.object({ + text: z.string(), + analysisType: z.enum(['sentiment', 'summary', 'critique']), + }), + outputSchema: z.object({ analysis: z.string() }), + }, + async ({ text, analysisType }) => { + const prompts = { + sentiment: `Analyze the sentiment of this text: "${text}"`, + summary: `Provide a concise summary of this text: "${text}"`, + critique: `Provide a thoughtful critique of this text: "${text}"`, + }; + + const response = await ai.generate({ + model: 'claude-3-sonnet', + prompt: prompts[analysisType], + config: { + temperature: 0.3, + maxTokens: 500, + }, + }); + + return { analysis: response.text }; + }, + ); + ``` + + + Use Claude models with the Anthropic plugin: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/compat_oai/anthropic" + ) + + func main() { + ctx := context.Background() + + // Initialize Anthropic plugin + claude := &anthropic.Anthropic{ + Opts: []option.RequestOption{ + option.WithAPIKey(os.Getenv("ANTHROPIC_API_KEY")), + }, + } + g, err := genkit.Init(ctx, genkit.WithPlugins(claude)) + if err != nil { + log.Fatal(err) + } + + // Basic text generation + model := claude.Model(g, "claude-3-7-sonnet-20250219") + resp, err := genkit.Generate(ctx, g, + ai.WithModel(model), + ai.WithPrompt("Explain the concept of quantum entanglement in simple terms."), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text()) + + // Advanced reasoning task + reasoningResp, err := genkit.Generate(ctx, g, + ai.WithModel(model), + ai.WithPrompt("Analyze the ethical implications of AI in healthcare decision-making."), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.3, + "max_tokens": 1000, + }), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(reasoningResp.Text()) + } + ``` + + + Use Claude models through available integrations: + + ```python + # If using Vertex AI Model Garden + from genkit.ai import Genkit + from genkit.plugins.vertex_ai import VertexAI + + ai = Genkit( + plugins=[ + VertexAI( + project_id="your-project-id", + location="us-central1", + models=["claude-3-sonnet"], + ), + ], + ) + + # Basic text generation + response = await ai.generate( + model="claude-3-sonnet", + prompt="Explain the concept of quantum entanglement in simple terms." + ) + print(response.text) + + # Analysis task + async def analyze_text(text: str, analysis_type: str) -> str: + prompts = { + "sentiment": f"Analyze the sentiment of this text: \"{text}\"", + "summary": f"Provide a concise summary of this text: \"{text}\"", + "critique": f"Provide a thoughtful critique of this text: \"{text}\"", + } + + response = await ai.generate( + model="claude-3-sonnet", + prompt=prompts[analysis_type], + config={ + "temperature": 0.3, + "max_tokens": 500, + } + ) + return response.text + ``` + + + +## Advanced Features + +### Complex Reasoning + + + + Leverage Claude's reasoning capabilities: + + ```ts + // Complex analysis flow + export const complexAnalysisFlow = ai.defineFlow( + { + name: 'complexAnalysisFlow', + inputSchema: z.object({ + problem: z.string(), + context: z.string().optional(), + }), + outputSchema: z.object({ + analysis: z.string(), + reasoning: z.string(), + recommendations: z.array(z.string()), + }), + }, + async ({ problem, context }) => { + const prompt = context + ? `Given this context: ${context}\n\nAnalyze this problem step by step: ${problem}` + : `Analyze this problem step by step: ${problem}`; + + const response = await ai.generate({ + model: 'claude-3-opus', // Use most capable model for complex reasoning + prompt: `${prompt} + +Please provide: +1. A thorough analysis +2. Your reasoning process +3. Specific recommendations + +Format your response clearly with sections.`, + config: { + temperature: 0.2, // Lower temperature for analytical tasks + maxTokens: 2000, + }, + }); + + // Parse the structured response + const sections = response.text.split('\n\n'); + const analysis = sections[0] || ''; + const reasoning = sections[1] || ''; + const recommendations = sections.slice(2) + .filter(section => section.includes('-')) + .flatMap(section => + section.split('\n') + .filter(line => line.trim().startsWith('-')) + .map(line => line.replace(/^-\s*/, '').trim()) + ); + + return { analysis, reasoning, recommendations }; + }, + ); + + // Ethical reasoning flow + export const ethicalAnalysisFlow = ai.defineFlow( + { + name: 'ethicalAnalysisFlow', + inputSchema: z.object({ scenario: z.string() }), + outputSchema: z.object({ + ethicalConsiderations: z.array(z.string()), + stakeholders: z.array(z.string()), + recommendations: z.string(), + }), + }, + async ({ scenario }) => { + const response = await ai.generate({ + model: 'claude-3-sonnet', + prompt: `Analyze the ethical implications of this scenario: ${scenario} + +Please identify: +1. Key ethical considerations +2. Affected stakeholders +3. Recommended approach + +Be thorough and consider multiple perspectives.`, + config: { + temperature: 0.3, + maxTokens: 1500, + }, + }); + + // Extract structured information from response + const lines = response.text.split('\n').filter(line => line.trim()); + const ethicalConsiderations = lines + .filter(line => line.includes('ethical') || line.includes('moral')) + .slice(0, 5); + const stakeholders = lines + .filter(line => line.includes('stakeholder') || line.includes('affected')) + .slice(0, 5); + const recommendations = lines + .filter(line => line.includes('recommend') || line.includes('suggest')) + .join(' '); + + return { ethicalConsiderations, stakeholders, recommendations }; + }, + ); + ``` + + + Leverage Claude's reasoning capabilities: + + ```go + // Complex analysis function + func performComplexAnalysis(ctx context.Context, problem, context string) (map[string]interface{}, error) { + prompt := problem + if context != "" { + prompt = fmt.Sprintf("Given this context: %s\n\nAnalyze this problem step by step: %s", context, problem) + } + + fullPrompt := fmt.Sprintf(`%s + +Please provide: +1. A thorough analysis +2. Your reasoning process +3. Specific recommendations + +Format your response clearly with sections.`, prompt) + + model := claude.Model(g, "claude-3-opus-20240229") // Most capable model + resp, err := genkit.Generate(ctx, g, + ai.WithModel(model), + ai.WithPrompt(fullPrompt), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.2, + "max_tokens": 2000, + }), + ) + if err != nil { + return nil, fmt.Errorf("analysis failed: %w", err) + } + + // Parse structured response + sections := strings.Split(resp.Text(), "\n\n") + analysis := "" + reasoning := "" + var recommendations []string + + if len(sections) > 0 { + analysis = sections[0] + } + if len(sections) > 1 { + reasoning = sections[1] + } + if len(sections) > 2 { + for _, section := range sections[2:] { + lines := strings.Split(section, "\n") + for _, line := range lines { + if strings.HasPrefix(strings.TrimSpace(line), "-") { + recommendations = append(recommendations, strings.TrimSpace(strings.TrimPrefix(line, "-"))) + } + } + } + } + + return map[string]interface{}{ + "analysis": analysis, + "reasoning": reasoning, + "recommendations": recommendations, + }, nil + } + + // Ethical analysis function + func performEthicalAnalysis(ctx context.Context, scenario string) (map[string]interface{}, error) { + prompt := fmt.Sprintf(`Analyze the ethical implications of this scenario: %s + +Please identify: +1. Key ethical considerations +2. Affected stakeholders +3. Recommended approach + +Be thorough and consider multiple perspectives.`, scenario) + + model := claude.Model(g, "claude-3-7-sonnet-20250219") + resp, err := genkit.Generate(ctx, g, + ai.WithModel(model), + ai.WithPrompt(prompt), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.3, + "max_tokens": 1500, + }), + ) + if err != nil { + return nil, fmt.Errorf("ethical analysis failed: %w", err) + } + + // Extract structured information + lines := strings.Split(resp.Text(), "\n") + var ethicalConsiderations, stakeholders []string + var recommendations string + + for _, line := range lines { + line = strings.TrimSpace(line) + if strings.Contains(strings.ToLower(line), "ethical") || strings.Contains(strings.ToLower(line), "moral") { + ethicalConsiderations = append(ethicalConsiderations, line) + } + if strings.Contains(strings.ToLower(line), "stakeholder") || strings.Contains(strings.ToLower(line), "affected") { + stakeholders = append(stakeholders, line) + } + if strings.Contains(strings.ToLower(line), "recommend") || strings.Contains(strings.ToLower(line), "suggest") { + recommendations += line + " " + } + } + + return map[string]interface{}{ + "ethicalConsiderations": ethicalConsiderations[:min(len(ethicalConsiderations), 5)], + "stakeholders": stakeholders[:min(len(stakeholders), 5)], + "recommendations": strings.TrimSpace(recommendations), + }, nil + } + ``` + + + Leverage Claude's reasoning capabilities: + + ```python + from typing import List, Dict, Any, Optional + + # Complex analysis function + async def perform_complex_analysis( + problem: str, + context: Optional[str] = None + ) -> Dict[str, Any]: + prompt = problem + if context: + prompt = f"Given this context: {context}\n\nAnalyze this problem step by step: {problem}" + + full_prompt = f"""{prompt} + +Please provide: +1. A thorough analysis +2. Your reasoning process +3. Specific recommendations + +Format your response clearly with sections.""" + + try: + response = await ai.generate( + model="claude-3-opus", # Most capable model + prompt=full_prompt, + config={ + "temperature": 0.2, + "max_tokens": 2000, + } + ) + + # Parse structured response + sections = response.text.split('\n\n') + analysis = sections[0] if sections else "" + reasoning = sections[1] if len(sections) > 1 else "" + + recommendations = [] + for section in sections[2:]: + lines = section.split('\n') + for line in lines: + if line.strip().startswith('-'): + recommendations.append(line.replace('-', '').strip()) + + return { + "analysis": analysis, + "reasoning": reasoning, + "recommendations": recommendations, + } + except Exception as error: + print(f"Analysis failed: {error}") + return {"analysis": "", "reasoning": "", "recommendations": []} + + # Ethical analysis function + async def perform_ethical_analysis(scenario: str) -> Dict[str, Any]: + prompt = f"""Analyze the ethical implications of this scenario: {scenario} + +Please identify: +1. Key ethical considerations +2. Affected stakeholders +3. Recommended approach + +Be thorough and consider multiple perspectives.""" + + try: + response = await ai.generate( + model="claude-3-sonnet", + prompt=prompt, + config={ + "temperature": 0.3, + "max_tokens": 1500, + } + ) + + # Extract structured information + lines = [line.strip() for line in response.text.split('\n') if line.strip()] + + ethical_considerations = [ + line for line in lines + if 'ethical' in line.lower() or 'moral' in line.lower() + ][:5] + + stakeholders = [ + line for line in lines + if 'stakeholder' in line.lower() or 'affected' in line.lower() + ][:5] + + recommendations = ' '.join([ + line for line in lines + if 'recommend' in line.lower() or 'suggest' in line.lower() + ]) + + return { + "ethical_considerations": ethical_considerations, + "stakeholders": stakeholders, + "recommendations": recommendations, + } + except Exception as error: + print(f"Ethical analysis failed: {error}") + return {"ethical_considerations": [], "stakeholders": [], "recommendations": ""} + ``` + + + +### Conversational AI + + + + Build sophisticated conversational applications: + + ```ts + // Advanced conversational flow + export const claudeConversationFlow = ai.defineFlow( + { + name: 'claudeConversationFlow', + inputSchema: z.object({ + message: z.string(), + history: z.array(z.object({ + role: z.enum(['user', 'assistant']), + content: z.string(), + })).optional(), + personality: z.enum(['analytical', 'creative', 'supportive', 'professional']).optional(), + }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ message, history = [], personality = 'analytical' }) => { + const personalityPrompts = { + analytical: 'You are a thoughtful analyst who provides detailed, logical responses.', + creative: 'You are a creative thinker who offers imaginative and innovative perspectives.', + supportive: 'You are a supportive companion who provides encouragement and understanding.', + professional: 'You are a professional consultant who gives clear, actionable advice.', + }; + + const messages = [ + { role: 'system', content: personalityPrompts[personality] }, + ...history, + { role: 'user', content: message }, + ]; + + const response = await ai.generate({ + model: 'claude-3-sonnet', + messages, + config: { + temperature: personality === 'creative' ? 0.8 : 0.6, + maxTokens: 1000, + }, + }); + + return { response: response.text }; + }, + ); + ``` + + + Build sophisticated conversational applications: + + ```go + type ChatMessage struct { + Role string `json:"role"` + Content string `json:"content"` + } + + func handleClaudeConversation(ctx context.Context, message string, history []ChatMessage, personality string) (string, error) { + personalityPrompts := map[string]string{ + "analytical": "You are a thoughtful analyst who provides detailed, logical responses.", + "creative": "You are a creative thinker who offers imaginative and innovative perspectives.", + "supportive": "You are a supportive companion who provides encouragement and understanding.", + "professional": "You are a professional consultant who gives clear, actionable advice.", + } + + systemPrompt, exists := personalityPrompts[personality] + if !exists { + systemPrompt = personalityPrompts["analytical"] + } + + messages := []ChatMessage{ + {Role: "system", Content: systemPrompt}, + } + messages = append(messages, history...) + messages = append(messages, ChatMessage{Role: "user", Content: message}) + + temperature := 0.6 + if personality == "creative" { + temperature = 0.8 + } + + model := claude.Model(g, "claude-3-7-sonnet-20250219") + resp, err := genkit.Generate(ctx, g, + ai.WithModel(model), + ai.WithMessages(messages), + ai.WithConfig(map[string]interface{}{ + "temperature": temperature, + "max_tokens": 1000, + }), + ) + if err != nil { + return "", fmt.Errorf("conversation failed: %w", err) + } + + return resp.Text(), nil + } + ``` + + + Build sophisticated conversational applications: + + ```python + from typing import List, Dict, Optional + + async def handle_claude_conversation( + message: str, + history: List[Dict[str, str]] = None, + personality: str = "analytical" + ) -> str: + if history is None: + history = [] + + personality_prompts = { + "analytical": "You are a thoughtful analyst who provides detailed, logical responses.", + "creative": "You are a creative thinker who offers imaginative and innovative perspectives.", + "supportive": "You are a supportive companion who provides encouragement and understanding.", + "professional": "You are a professional consultant who gives clear, actionable advice.", + } + + system_prompt = personality_prompts.get(personality, personality_prompts["analytical"]) + + messages = [ + {"role": "system", "content": system_prompt}, + *history, + {"role": "user", "content": message}, + ] + + temperature = 0.8 if personality == "creative" else 0.6 + + try: + response = await ai.generate( + model="claude-3-sonnet", + messages=messages, + config={ + "temperature": temperature, + "max_tokens": 1000, + } + ) + return response.text + except Exception as error: + print(f"Conversation failed: {error}") + return "I'm sorry, I couldn't process your message at the moment." + ``` + + + +## Model Comparison + +### Available Models + +| Model | Capabilities | Best For | Context Window | +|-------|-------------|----------|----------------| +| **Claude 3 Haiku** | Fast, efficient | Simple tasks, quick responses | 200K tokens | +| **Claude 3 Sonnet** | Balanced performance | General-purpose tasks, analysis | 200K tokens | +| **Claude 3 Opus** | Most capable | Complex reasoning, research | 200K tokens | +| **Claude 3.5 Sonnet** | Enhanced reasoning | Advanced analysis, coding | 200K tokens | +| **Claude 3.7 Sonnet** | Latest capabilities | Cutting-edge reasoning tasks | 200K tokens | + +## Best Practices + +### Optimizing for Different Tasks + +1. **Analysis and reasoning**: Use Claude 3 Opus or 3.7 Sonnet with low temperature (0.2-0.3) +2. **Creative writing**: Use Claude 3.5 Sonnet with higher temperature (0.7-0.8) +3. **Quick responses**: Use Claude 3 Haiku for speed +4. **Ethical considerations**: Claude models excel at nuanced ethical reasoning + +### Prompt Engineering + +1. **Be specific**: Claude responds well to detailed, structured prompts +2. **Use examples**: Provide examples of desired output format +3. **Request reasoning**: Ask Claude to explain its thinking process +4. **Set context**: Provide relevant background information + +### Safety and Alignment + +1. **Built-in safety**: Claude has strong safety guardrails +2. **Helpful responses**: Models are trained to be helpful, harmless, and honest +3. **Nuanced understanding**: Excellent at understanding context and intent +4. **Ethical reasoning**: Strong capability for ethical analysis and decision-making + +## Next Steps + +- Learn about [generating content](/unified-docs/generating-content) to understand how to use these models effectively +- Explore [tool calling](/unified-docs/tool-calling) to add interactive capabilities (Note: Tool calling may have limitations with Claude models) +- See [creating flows](/unified-docs/creating-flows) to build structured AI workflows with advanced reasoning +- Check out [deployment guides](/unified-docs/deployment) for production deployment strategies diff --git a/src/content/docs/unified-docs/plugins/deepseek.mdx b/src/content/docs/unified-docs/plugins/deepseek.mdx new file mode 100644 index 00000000..09d1fba9 --- /dev/null +++ b/src/content/docs/unified-docs/plugins/deepseek.mdx @@ -0,0 +1,1051 @@ +--- +title: DeepSeek Plugin +description: Learn how to use DeepSeek's advanced AI models with Genkit across JavaScript, Go, and Python, including reasoning models, code generation, and cost-effective solutions. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +The DeepSeek plugin provides access to DeepSeek's powerful AI models, including their advanced reasoning models and cost-effective solutions. DeepSeek models are known for their strong performance in coding, mathematics, and reasoning tasks. + +## Installation and Setup + + + + Install the DeepSeek plugin: + + ```bash + npm install @genkit-ai/compat-oai + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { deepSeek } from '@genkit-ai/compat-oai/deepseek'; + + const ai = genkit({ + plugins: [deepSeek()], + }); + ``` + + ### API Key Configuration + + Set your DeepSeek API key using one of these methods: + + ```bash + # Environment variable (recommended) + export DEEPSEEK_API_KEY=your_deepseek_api_key + ``` + + ```ts + // Or pass directly to plugin (not recommended for production) + const ai = genkit({ + plugins: [deepSeek({ apiKey: 'your_deepseek_api_key' })], + }); + ``` + + Get your API key from [DeepSeek Platform](https://platform.deepseek.com/). + + + For Go applications, use the OpenAI-compatible client with DeepSeek endpoints: + + ```go + package main + + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/openai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&openai.OpenAI{ + APIKey: os.Getenv("DEEPSEEK_API_KEY"), + BaseURL: "https://api.deepseek.com/v1", + Models: []openai.ModelConfig{ + {Name: "deepseek-chat", Type: "chat"}, + {Name: "deepseek-coder", Type: "chat"}, + {Name: "deepseek-reasoner", Type: "chat"}, + }, + }), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + ### Environment Configuration + + ```bash + export DEEPSEEK_API_KEY=your_deepseek_api_key + ``` + + + For Python applications, use the OpenAI-compatible client: + + ```bash + pip install genkit-plugin-openai + ``` + + ```python + from genkit.ai import Genkit + from genkit.plugins.openai import OpenAI + + ai = Genkit( + plugins=[OpenAI( + api_key=os.getenv("DEEPSEEK_API_KEY"), + base_url="https://api.deepseek.com/v1", + models=[ + {"name": "deepseek-chat", "type": "chat"}, + {"name": "deepseek-coder", "type": "chat"}, + {"name": "deepseek-reasoner", "type": "chat"}, + ], + )], + ) + ``` + + ### Environment Configuration + + ```bash + export DEEPSEEK_API_KEY=your_deepseek_api_key + ``` + + + +## Basic Usage + +### Text Generation + + + + Use DeepSeek models for text generation: + + ```ts + import { genkit, z } from 'genkit'; + import { deepSeek } from '@genkit-ai/compat-oai/deepseek'; + + const ai = genkit({ + plugins: [deepSeek()], + }); + + // Basic text generation + const response = await ai.generate({ + model: deepSeek.model('deepseek-chat'), + prompt: 'Explain the concept of machine learning', + }); + + console.log(response.text); + + // Flow with DeepSeek + export const deepseekFlow = ai.defineFlow( + { + name: 'deepseekFlow', + inputSchema: z.object({ subject: z.string() }), + outputSchema: z.object({ information: z.string() }), + }, + async ({ subject }) => { + const llmResponse = await ai.generate({ + model: deepSeek.model('deepseek-chat'), + prompt: `Tell me something about ${subject}.`, + }); + return { information: llmResponse.text }; + }, + ); + + // Advanced reasoning tasks + const reasoningResponse = await ai.generate({ + model: deepSeek.model('deepseek-reasoner'), + prompt: 'Solve this step by step: If a train travels 120 km in 2 hours, and then 180 km in 3 hours, what is the average speed for the entire journey?', + config: { + temperature: 0.1, // Lower temperature for reasoning tasks + maxTokens: 1000, + }, + }); + ``` + + + Use DeepSeek models with the generation API: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func main() { + ctx := context.Background() + + // Basic text generation + resp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-chat"), + ai.WithPrompt("Explain the concept of machine learning"), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text()) + + // Advanced reasoning tasks + reasoningResp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-reasoner"), + ai.WithPrompt("Solve this step by step: If a train travels 120 km in 2 hours, and then 180 km in 3 hours, what is the average speed for the entire journey?"), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.1, + "max_tokens": 1000, + }), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(reasoningResp.Text()) + } + ``` + + + Use DeepSeek models with the generation API: + + ```python + from genkit.ai import Genkit + from genkit.plugins.openai import OpenAI, openai_name + + ai = Genkit( + plugins=[OpenAI( + api_key=os.getenv("DEEPSEEK_API_KEY"), + base_url="https://api.deepseek.com/v1", + models=[ + {"name": "deepseek-chat", "type": "chat"}, + {"name": "deepseek-reasoner", "type": "chat"}, + ], + )], + ) + + # Basic text generation + response = await ai.generate( + model=openai_name('deepseek-chat'), + prompt='Explain the concept of machine learning' + ) + print(response.text) + + # Advanced reasoning tasks + reasoning_response = await ai.generate( + model=openai_name('deepseek-reasoner'), + prompt='Solve this step by step: If a train travels 120 km in 2 hours, and then 180 km in 3 hours, what is the average speed for the entire journey?', + config={ + 'temperature': 0.1, + 'max_tokens': 1000, + } + ) + print(reasoning_response.text) + ``` + + + +### Code Generation + + + + Use DeepSeek for code generation and programming tasks: + + ```ts + // Code generation flow + export const codeGenerationFlow = ai.defineFlow( + { + name: 'codeGenerationFlow', + inputSchema: z.object({ + task: z.string(), + language: z.string(), + }), + outputSchema: z.object({ code: z.string() }), + }, + async ({ task, language }) => { + const response = await ai.generate({ + model: deepSeek.model('deepseek-coder'), + prompt: `Write ${language} code to ${task}. Include comments and error handling.`, + config: { + temperature: 0.2, // Lower temperature for code generation + maxTokens: 2000, + }, + }); + return { code: response.text }; + }, + ); + + // Code review and optimization + export const codeReviewFlow = ai.defineFlow( + { + name: 'codeReviewFlow', + inputSchema: z.object({ code: z.string() }), + outputSchema: z.object({ + review: z.string(), + suggestions: z.array(z.string()), + }), + }, + async ({ code }) => { + const response = await ai.generate({ + model: deepSeek.model('deepseek-coder'), + prompt: `Review this code and provide suggestions for improvement:\n\n${code}`, + config: { + temperature: 0.3, + maxTokens: 1500, + }, + }); + + // Parse the response to extract review and suggestions + const lines = response.text.split('\n'); + const review = lines.slice(0, 5).join('\n'); + const suggestions = lines.slice(5).filter(line => line.trim().startsWith('-')); + + return { review, suggestions }; + }, + ); + ``` + + + Use DeepSeek for code generation and programming tasks: + + ```go + // Code generation + func generateCode(ctx context.Context, task, language string) (string, error) { + prompt := fmt.Sprintf("Write %s code to %s. Include comments and error handling.", language, task) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-coder"), + ai.WithPrompt(prompt), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.2, + "max_tokens": 2000, + }), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + + // Code review and optimization + func reviewCode(ctx context.Context, code string) (string, error) { + prompt := fmt.Sprintf("Review this code and provide suggestions for improvement:\n\n%s", code) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-coder"), + ai.WithPrompt(prompt), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.3, + "max_tokens": 1500, + }), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + ``` + + + Use DeepSeek for code generation and programming tasks: + + ```python + # Code generation + async def generate_code(task: str, language: str) -> str: + prompt = f"Write {language} code to {task}. Include comments and error handling." + + response = await ai.generate( + model=openai_name('deepseek-coder'), + prompt=prompt, + config={ + 'temperature': 0.2, + 'max_tokens': 2000, + } + ) + return response.text + + # Code review and optimization + async def review_code(code: str) -> dict: + prompt = f"Review this code and provide suggestions for improvement:\n\n{code}" + + response = await ai.generate( + model=openai_name('deepseek-coder'), + prompt=prompt, + config={ + 'temperature': 0.3, + 'max_tokens': 1500, + } + ) + + # Parse the response to extract review and suggestions + lines = response.text.split('\n') + review = '\n'.join(lines[:5]) + suggestions = [line for line in lines[5:] if line.strip().startswith('-')] + + return {'review': review, 'suggestions': suggestions} + ``` + + + +## Advanced Features + +### Mathematical Reasoning + + + + Leverage DeepSeek's mathematical reasoning capabilities: + + ```ts + // Mathematical problem solving + export const mathSolverFlow = ai.defineFlow( + { + name: 'mathSolverFlow', + inputSchema: z.object({ problem: z.string() }), + outputSchema: z.object({ + solution: z.string(), + steps: z.array(z.string()), + }), + }, + async ({ problem }) => { + const response = await ai.generate({ + model: deepSeek.model('deepseek-reasoner'), + prompt: `Solve this mathematical problem step by step: ${problem}`, + config: { + temperature: 0.1, // Very low temperature for mathematical accuracy + maxTokens: 1500, + }, + }); + + // Parse the response to extract solution and steps + const lines = response.text.split('\n').filter(line => line.trim()); + const solution = lines[lines.length - 1]; + const steps = lines.slice(0, -1); + + return { solution, steps }; + }, + ); + + // Statistical analysis + export const statisticsFlow = ai.defineFlow( + { + name: 'statisticsFlow', + inputSchema: z.object({ + data: z.array(z.number()), + analysisType: z.enum(['descriptive', 'inferential', 'regression']), + }), + outputSchema: z.object({ analysis: z.string() }), + }, + async ({ data, analysisType }) => { + const dataStr = data.join(', '); + const prompt = `Perform ${analysisType} statistical analysis on this data: [${dataStr}]. Provide detailed calculations and interpretations.`; + + const response = await ai.generate({ + model: deepSeek.model('deepseek-reasoner'), + prompt, + config: { + temperature: 0.2, + maxTokens: 2000, + }, + }); + + return { analysis: response.text }; + }, + ); + ``` + + + Leverage DeepSeek's mathematical reasoning capabilities: + + ```go + // Mathematical problem solving + func solveMathProblem(ctx context.Context, problem string) (string, []string, error) { + prompt := fmt.Sprintf("Solve this mathematical problem step by step: %s", problem) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-reasoner"), + ai.WithPrompt(prompt), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.1, + "max_tokens": 1500, + }), + ) + if err != nil { + return "", nil, err + } + + lines := strings.Split(resp.Text(), "\n") + var steps []string + var solution string + + for _, line := range lines { + if strings.TrimSpace(line) != "" { + steps = append(steps, line) + } + } + + if len(steps) > 0 { + solution = steps[len(steps)-1] + steps = steps[:len(steps)-1] + } + + return solution, steps, nil + } + + // Statistical analysis + func performStatisticalAnalysis(ctx context.Context, data []float64, analysisType string) (string, error) { + dataStr := make([]string, len(data)) + for i, v := range data { + dataStr[i] = fmt.Sprintf("%.2f", v) + } + + prompt := fmt.Sprintf("Perform %s statistical analysis on this data: [%s]. Provide detailed calculations and interpretations.", + analysisType, strings.Join(dataStr, ", ")) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-reasoner"), + ai.WithPrompt(prompt), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.2, + "max_tokens": 2000, + }), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + ``` + + + Leverage DeepSeek's mathematical reasoning capabilities: + + ```python + from typing import List + + # Mathematical problem solving + async def solve_math_problem(problem: str) -> dict: + prompt = f"Solve this mathematical problem step by step: {problem}" + + response = await ai.generate( + model=openai_name('deepseek-reasoner'), + prompt=prompt, + config={ + 'temperature': 0.1, + 'max_tokens': 1500, + } + ) + + lines = [line for line in response.text.split('\n') if line.strip()] + solution = lines[-1] if lines else "" + steps = lines[:-1] if len(lines) > 1 else [] + + return {'solution': solution, 'steps': steps} + + # Statistical analysis + async def perform_statistical_analysis(data: List[float], analysis_type: str) -> str: + data_str = ', '.join([f"{x:.2f}" for x in data]) + prompt = f"Perform {analysis_type} statistical analysis on this data: [{data_str}]. Provide detailed calculations and interpretations." + + response = await ai.generate( + model=openai_name('deepseek-reasoner'), + prompt=prompt, + config={ + 'temperature': 0.2, + 'max_tokens': 2000, + } + ) + + return response.text + ``` + + + +### Conversational AI + + + + Build conversational applications with DeepSeek: + + ```ts + // Conversational chat flow + export const chatFlow = ai.defineFlow( + { + name: 'chatFlow', + inputSchema: z.object({ + message: z.string(), + history: z.array(z.object({ + role: z.enum(['user', 'assistant']), + content: z.string(), + })).optional(), + mode: z.enum(['general', 'coding', 'reasoning']).optional(), + }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ message, history = [], mode = 'general' }) => { + // Select model based on conversation mode + const modelMap = { + general: 'deepseek-chat', + coding: 'deepseek-coder', + reasoning: 'deepseek-reasoner', + }; + + // Build conversation context + const messages = [ + { role: 'system', content: `You are a helpful AI assistant specialized in ${mode} tasks.` }, + ...history, + { role: 'user', content: message }, + ]; + + const response = await ai.generate({ + model: deepSeek.model(modelMap[mode]), + messages, + config: { + temperature: mode === 'reasoning' ? 0.1 : 0.7, + maxTokens: 1500, + }, + }); + + return { response: response.text }; + }, + ); + + // Multi-turn reasoning conversation + export const reasoningChatFlow = ai.defineFlow( + { + name: 'reasoningChatFlow', + inputSchema: z.object({ + question: z.string(), + context: z.string().optional(), + }), + outputSchema: z.object({ + answer: z.string(), + reasoning: z.string(), + }), + }, + async ({ question, context }) => { + const prompt = context + ? `Given this context: ${context}\n\nAnswer this question with detailed reasoning: ${question}` + : `Answer this question with detailed reasoning: ${question}`; + + const response = await ai.generate({ + model: deepSeek.model('deepseek-reasoner'), + prompt, + config: { + temperature: 0.2, + maxTokens: 2000, + }, + }); + + // Split response into answer and reasoning + const parts = response.text.split('\n\n'); + const answer = parts[parts.length - 1]; + const reasoning = parts.slice(0, -1).join('\n\n'); + + return { answer, reasoning }; + }, + ); + ``` + + + Build conversational applications with DeepSeek: + + ```go + type ChatMessage struct { + Role string `json:"role"` + Content string `json:"content"` + } + + func handleChat(ctx context.Context, message string, history []ChatMessage, mode string) (string, error) { + // Select model based on conversation mode + modelMap := map[string]string{ + "general": "deepseek-chat", + "coding": "deepseek-coder", + "reasoning": "deepseek-reasoner", + } + + model, exists := modelMap[mode] + if !exists { + model = "deepseek-chat" + } + + // Build conversation context + messages := []ChatMessage{ + {Role: "system", Content: fmt.Sprintf("You are a helpful AI assistant specialized in %s tasks.", mode)}, + } + messages = append(messages, history...) + messages = append(messages, ChatMessage{Role: "user", Content: message}) + + temperature := 0.7 + if mode == "reasoning" { + temperature = 0.1 + } + + resp, err := genkit.Generate(ctx, g, + ai.WithModel(model), + ai.WithMessages(messages), + ai.WithConfig(map[string]interface{}{ + "temperature": temperature, + "max_tokens": 1500, + }), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + ``` + + + Build conversational applications with DeepSeek: + + ```python + from typing import List, Dict, Optional + + async def handle_chat( + message: str, + history: List[Dict[str, str]] = None, + mode: str = 'general' + ) -> str: + if history is None: + history = [] + + # Select model based on conversation mode + model_map = { + 'general': 'deepseek-chat', + 'coding': 'deepseek-coder', + 'reasoning': 'deepseek-reasoner', + } + + model = model_map.get(mode, 'deepseek-chat') + + # Build conversation context + messages = [ + {"role": "system", "content": f"You are a helpful AI assistant specialized in {mode} tasks."}, + *history, + {"role": "user", "content": message}, + ] + + temperature = 0.1 if mode == 'reasoning' else 0.7 + + response = await ai.generate( + model=openai_name(model), + messages=messages, + config={ + 'temperature': temperature, + 'max_tokens': 1500, + } + ) + + return response.text + ``` + + + +## Model Comparison + +### Available Models + +| Model | Capabilities | Best For | Context Window | +|-------|-------------|----------|----------------| +| **deepseek-chat** | General conversation, reasoning | General-purpose tasks, Q&A | 32K tokens | +| **deepseek-coder** | Code generation, programming | Software development, code review | 32K tokens | +| **deepseek-reasoner** | Advanced reasoning, mathematics | Complex problem solving, analysis | 32K tokens | + +### Performance Characteristics + + + + ```ts + // Performance comparison example + const performanceTest = async () => { + const prompt = "Explain the time complexity of quicksort algorithm"; + + // General model + const startGeneral = Date.now(); + const generalResponse = await ai.generate({ + model: deepSeek.model('deepseek-chat'), + prompt, + }); + const generalTime = Date.now() - startGeneral; + + // Specialized coder model + const startCoder = Date.now(); + const coderResponse = await ai.generate({ + model: deepSeek.model('deepseek-coder'), + prompt, + }); + const coderTime = Date.now() - startCoder; + + console.log(`General: ${generalTime}ms, Coder: ${coderTime}ms`); + console.log(`General length: ${generalResponse.text.length}, Coder length: ${coderResponse.text.length}`); + }; + ``` + + + ```go + func performanceTest(ctx context.Context) { + prompt := "Explain the time complexity of quicksort algorithm" + + // General model + startGeneral := time.Now() + generalResp, _ := genkit.Generate(ctx, g, + ai.WithModel("deepseek-chat"), + ai.WithPrompt(prompt), + ) + generalTime := time.Since(startGeneral) + + // Specialized coder model + startCoder := time.Now() + coderResp, _ := genkit.Generate(ctx, g, + ai.WithModel("deepseek-coder"), + ai.WithPrompt(prompt), + ) + coderTime := time.Since(startCoder) + + fmt.Printf("General: %v, Coder: %v\n", generalTime, coderTime) + fmt.Printf("General length: %d, Coder length: %d\n", + len(generalResp.Text()), len(coderResp.Text())) + } + ``` + + + ```python + import time + + async def performance_test(): + prompt = "Explain the time complexity of quicksort algorithm" + + # General model + start_general = time.time() + general_response = await ai.generate( + model=openai_name('deepseek-chat'), + prompt=prompt + ) + general_time = time.time() - start_general + + # Specialized coder model + start_coder = time.time() + coder_response = await ai.generate( + model=openai_name('deepseek-coder'), + prompt=prompt + ) + coder_time = time.time() - start_coder + + print(f"General: {general_time:.2f}s, Coder: {coder_time:.2f}s") + print(f"General length: {len(general_response.text)}, Coder length: {len(coder_response.text)}") + ``` + + + +## Advanced Configuration + +### Custom Model Configuration + + + + ```ts + // Advanced configuration with passthrough options + const response = await ai.generate({ + model: deepSeek.model('deepseek-chat'), + prompt: 'Analyze the latest developments in AI', + config: { + temperature: 0.7, + maxTokens: 2000, + topP: 0.9, + frequencyPenalty: 0.1, + presencePenalty: 0.1, + // Passthrough configuration for new features + stream: true, + logprobs: true, + top_logprobs: 5, + }, + }); + + // Environment-specific configuration + const environmentConfig = { + development: { + model: deepSeek.model('deepseek-chat'), + temperature: 0.8, + maxTokens: 1000, + }, + production: { + model: deepSeek.model('deepseek-reasoner'), + temperature: 0.3, + maxTokens: 2000, + }, + }; + + const config = environmentConfig[process.env.NODE_ENV || 'development']; + ``` + + + ```go + // Advanced configuration + resp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-chat"), + ai.WithPrompt("Analyze the latest developments in AI"), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.7, + "max_tokens": 2000, + "top_p": 0.9, + "frequency_penalty": 0.1, + "presence_penalty": 0.1, + "stream": true, + "logprobs": true, + "top_logprobs": 5, + }), + ) + ``` + + + ```python + # Advanced configuration + response = await ai.generate( + model=openai_name('deepseek-chat'), + prompt='Analyze the latest developments in AI', + config={ + 'temperature': 0.7, + 'max_tokens': 2000, + 'top_p': 0.9, + 'frequency_penalty': 0.1, + 'presence_penalty': 0.1, + 'stream': True, + 'logprobs': True, + 'top_logprobs': 5, + } + ) + ``` + + + +## Best Practices + +### Optimizing for Different Tasks + +1. **General conversation**: Use `deepseek-chat` with moderate temperature (0.7) +2. **Code generation**: Use `deepseek-coder` with low temperature (0.2) +3. **Mathematical reasoning**: Use `deepseek-reasoner` with very low temperature (0.1) +4. **Creative writing**: Use `deepseek-chat` with higher temperature (0.8-0.9) + +### Cost Optimization + +1. **Choose the right model**: Use specialized models for their intended tasks +2. **Optimize token usage**: Be specific in prompts and set appropriate `maxTokens` +3. **Cache responses**: Cache frequently requested computations +4. **Batch similar requests**: Group related queries when possible + +### Error Handling + + + + ```ts + const robustDeepSeekFlow = ai.defineFlow( + { + name: 'robustDeepSeekFlow', + inputSchema: z.object({ query: z.string() }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ query }) => { + try { + const response = await ai.generate({ + model: deepSeek.model('deepseek-chat'), + prompt: query, + config: { + temperature: 0.7, + maxTokens: 1000, + }, + }); + return { response: response.text }; + } catch (error) { + if (error.message.includes('rate_limit')) { + // Fallback to reasoning model with lower token limit + const fallbackResponse = await ai.generate({ + model: deepSeek.model('deepseek-reasoner'), + prompt: query, + config: { + maxTokens: 500, + }, + }); + return { response: fallbackResponse.text }; + } + throw error; + } + }, + ); + ``` + + + ```go + func robustDeepSeekGenerate(ctx context.Context, query string) (string, error) { + resp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-chat"), + ai.WithPrompt(query), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.7, + "max_tokens": 1000, + }), + ) + + if err != nil { + if strings.Contains(err.Error(), "rate_limit") { + // Fallback to reasoning model + fallbackResp, fallbackErr := genkit.Generate(ctx, g, + ai.WithModel("deepseek-reasoner"), + ai.WithPrompt(query), + ai.WithConfig(map[string]interface{}{ + "max_tokens": 500, + }), + ) + if fallbackErr != nil { + return "", fallbackErr + } + return fallbackResp.Text(), nil + } + return "", err + } + + return resp.Text(), nil + } + ``` + + + ```python + async def robust_deepseek_generate(query: str) -> str: + try: + response = await ai.generate( + model=openai_name('deepseek-chat'), + prompt=query, + config={ + 'temperature': 0.7, + 'max_tokens': 1000, + } + ) + return response.text + except Exception as error: + if 'rate_limit' in str(error): + # Fallback to reasoning model + fallback_response = await ai.generate( + model=openai_name('deepseek-reasoner'), + prompt=query, + config={ + 'max_tokens': 500, + } + ) + return fallback_response.text + raise error + ``` + + + +## Next Steps + +- Learn about [generating content](/unified-docs/generating-content) to understand how to use these models effectively +- Explore [tool calling](/unified-docs/tool-calling) to add interactive capabilities to your DeepSeek applications +- See [creating flows](/unified-docs/creating-flows) to build structured AI workflows with reasoning capabilities +- Check out [deployment guides](/unified-docs/deployment) for production deployment strategies diff --git a/src/content/docs/unified-docs/plugins/google-ai.mdx b/src/content/docs/unified-docs/plugins/google-ai.mdx new file mode 100644 index 00000000..3c27e258 --- /dev/null +++ b/src/content/docs/unified-docs/plugins/google-ai.mdx @@ -0,0 +1,590 @@ +--- +title: Google AI plugin +description: Learn how to use Google's Gemini models with Genkit across JavaScript, Go, and Python, including text generation, embeddings, TTS, video generation, and context caching. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +The Google AI plugin provides interfaces to Google's Gemini models through the [Gemini API](https://ai.google.dev/docs/gemini_api_overview), offering powerful text generation, embeddings, text-to-speech, video generation, and context caching capabilities. + +## Installation and Setup + + + + Install the Google AI plugin: + + ```bash + npm install @genkit-ai/googleai + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [googleAI()], + }); + ``` + + + The Google AI plugin is included with the Genkit Go package: + + ```go + import ( + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + Install the Google AI plugin: + + ```bash + pip install genkit-plugin-google-genai + ``` + + Configure the plugin when initializing Genkit: + + ```python + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai + + ai = Genkit( + plugins=[GoogleGenai()], + ) + ``` + + + +## API Key Configuration + +The plugin requires an API key for the Gemini API, which you can get from [Google AI Studio](https://aistudio.google.com/app/apikey). + + + + Configure your API key by doing one of the following: + + - Set the `GEMINI_API_KEY` environment variable: + ```bash + export GEMINI_API_KEY=your_api_key_here + ``` + + - Specify the API key when initializing the plugin: + ```ts + googleAI({ apiKey: yourKey }); + ``` + + :::caution + Don't embed your API key directly in code! Use environment variables or a service like Cloud Secret Manager. + ::: + + + Set the `GEMINI_API_KEY` environment variable: + + ```bash + export GEMINI_API_KEY=your_api_key_here + ``` + + The plugin will automatically use this environment variable. + + + Set the `GEMINI_API_KEY` environment variable: + + ```bash + export GEMINI_API_KEY=your_api_key_here + ``` + + The plugin will automatically use this environment variable. + + + +## Basic Usage + + + + Use the helper functions to reference models and embedders: + + ```ts + import { googleAI } from '@genkit-ai/googleai'; + + // Referencing models + const model = googleAI.model('gemini-2.5-flash'); + const modelPro = googleAI.model('gemini-2.5-flash-lite'); + + // Referencing embedders + const embedder = googleAI.embedder('gemini-embedding-001'); + + // Set default model + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + // Generate content + const llmResponse = await ai.generate('Tell me a joke.'); + + // Generate embeddings + const embeddings = await ai.embed({ + embedder: googleAI.embedder('gemini-embedding-001'), + content: 'Hello world', + }); + ``` + + + Use the models directly with the generation API: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatal(err) + } + + // Generate content + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Tell me a joke."), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text()) + } + ``` + + + Use the models with the generation API: + + ```python + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai, google_genai_name + + ai = Genkit( + plugins=[GoogleGenai()], + model=google_genai_name('gemini-2.5-flash'), + ) + + # Generate content + response = await ai.generate('Tell me a joke.') + print(response.text) + + # Generate embeddings + embeddings = await ai.embed( + embedder=google_genai_name('gemini-embedding-001'), + content='Hello world', + ) + ``` + + + +## Working with Files + + + + You can use files uploaded to the Gemini Files API: + + ```ts + import { GoogleAIFileManager } from '@google/generative-ai/server'; + + const fileManager = new GoogleAIFileManager(process.env.GEMINI_API_KEY); + const uploadResult = await fileManager.uploadFile('path/to/file.jpg', { + mimeType: 'image/jpeg', + displayName: 'Your Image', + }); + + const response = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: [ + { text: 'Describe this image:' }, + { + media: { + contentType: uploadResult.file.mimeType, + url: uploadResult.file.uri, + }, + }, + ], + }); + ``` + + + File handling in Go requires using the Google AI SDK directly for file uploads, then referencing the files in Genkit: + + ```go + // Upload files using the Google AI SDK, then reference in Genkit + // File upload implementation depends on your specific use case + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Describe this image:"), + ai.WithMedia(&ai.Media{ + ContentType: "image/jpeg", + URL: "uploaded_file_uri", + }), + ) + ``` + + + File handling in Python requires using the Google AI SDK for uploads: + + ```python + # Upload files using the Google AI SDK, then reference in Genkit + # File upload implementation depends on your specific use case + + response = await ai.generate( + prompt=[ + {'text': 'Describe this image:'}, + { + 'media': { + 'contentType': 'image/jpeg', + 'url': 'uploaded_file_uri', + } + } + ], + model=google_genai_name('gemini-2.5-flash'), + ) + ``` + + + +## Fine-tuned Models + + + + You can use models fine-tuned with the Google Gemini API. Follow the instructions from the [Gemini API](https://ai.google.dev/gemini-api/docs/model-tuning/tutorial?lang=python) or fine-tune using [AI Studio](https://aistudio.corp.google.com/app/tune). + + When calling a tuned model, use the tuned model's ID directly: + + ```ts + const llmResponse = await ai.generate({ + prompt: 'Suggest an item for the menu of fish themed restaurant', + model: googleAI.model('tunedModels/my-example-model-apbm8oqbvuv2'), + }); + ``` + + + Use fine-tuned models by specifying the tuned model ID: + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Suggest an item for the menu of fish themed restaurant"), + ai.WithModelName("googleai/tunedModels/my-example-model-apbm8oqbvuv2"), + ) + ``` + + + Use fine-tuned models by specifying the tuned model ID: + + ```python + response = await ai.generate( + prompt='Suggest an item for the menu of fish themed restaurant', + model=google_genai_name('tunedModels/my-example-model-apbm8oqbvuv2'), + ) + ``` + + + +## Text-to-Speech (TTS) + + + + Generate audio using the Gemini TTS model: + + ```ts + import { writeFile } from 'node:fs/promises'; + + const { media } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash-preview-tts'), + config: { + responseModalities: ['AUDIO'], + speechConfig: { + voiceConfig: { + prebuiltVoiceConfig: { voiceName: 'Algenib' }, + }, + }, + }, + prompt: 'Say that Genkit is an amazing Gen AI library', + }); + + if (media) { + const audioBuffer = Buffer.from( + media.url.substring(media.url.indexOf(',') + 1), + 'base64' + ); + await writeFile('output.wav', audioBuffer); + } + ``` + + ### Multi-speaker Audio + + Generate audio with multiple speakers: + + ```ts + const response = await ai.generate({ + model: googleAI.model('gemini-2.5-flash-preview-tts'), + config: { + responseModalities: ['AUDIO'], + speechConfig: { + multiSpeakerVoiceConfig: { + speakerVoiceConfigs: [ + { + speaker: 'Speaker1', + voiceConfig: { + prebuiltVoiceConfig: { voiceName: 'Algenib' }, + }, + }, + { + speaker: 'Speaker2', + voiceConfig: { + prebuiltVoiceConfig: { voiceName: 'Achernar' }, + }, + }, + ], + }, + }, + }, + prompt: `Here's the dialog: + Speaker1: "Genkit is an amazing Gen AI library!" + Speaker2: "I thought it was a framework."`, + }); + ``` + + + Text-to-speech functionality is currently available primarily in JavaScript. For Go applications, you would need to: + + 1. Use the Google AI SDK directly for TTS functionality + 2. Or call a JavaScript-based service that handles TTS + 3. Or use Google Cloud Text-to-Speech API separately + + ```go + // TTS is not directly supported in Go Genkit + // Consider using Google Cloud Text-to-Speech API or + // a JavaScript service for TTS functionality + ``` + + + Text-to-speech functionality is currently available primarily in JavaScript. For Python applications, you would need to: + + 1. Use the Google AI SDK directly for TTS functionality + 2. Or call a JavaScript-based service that handles TTS + 3. Or use Google Cloud Text-to-Speech API separately + + ```python + # TTS is not directly supported in Python Genkit + # Consider using Google Cloud Text-to-Speech API or + # a JavaScript service for TTS functionality + ``` + + + +## Video Generation (Veo) + + + + Generate videos using the Veo models: + + ```ts + const videoFlow = ai.defineFlow('text-to-video-veo', async () => { + let { operation } = await ai.generate({ + model: googleAI.model('veo-2.0-generate-001'), + prompt: 'A majestic dragon soaring over a mystical forest at dawn.', + config: { + durationSeconds: 5, + aspectRatio: '16:9', + }, + }); + + if (!operation) { + throw new Error('Expected the model to return an operation'); + } + + // Wait until the operation completes + while (!operation.done) { + operation = await ai.checkOperation(operation); + await new Promise((resolve) => setTimeout(resolve, 5000)); + } + + if (operation.error) { + throw new Error('Failed to generate video: ' + operation.error.message); + } + + const video = operation.output?.message?.content.find((p) => !!p.media); + if (!video) { + throw new Error('Failed to find the generated video'); + } + + return video; + }); + ``` + + ### Video from Photo Reference + + ```ts + const startingImage = fs.readFileSync('photo.jpg', { encoding: 'base64' }); + + let { operation } = await ai.generate({ + model: googleAI.model('veo-2.0-generate-001'), + prompt: [ + { text: 'make the subject in the photo move' }, + { + media: { + contentType: 'image/jpeg', + url: `data:image/jpeg;base64,${startingImage}`, + }, + }, + ], + config: { + durationSeconds: 5, + aspectRatio: '9:16', + personGeneration: 'allow_adult', + }, + }); + ``` + + + Video generation functionality is currently available primarily in JavaScript. For Go applications, you would need to: + + 1. Use the Google AI SDK directly for video generation + 2. Or call a JavaScript-based service that handles video generation + 3. Or implement video generation using the Gemini API directly + + ```go + // Video generation is not directly supported in Go Genkit + // Consider using the Google AI SDK directly or + // a JavaScript service for video generation functionality + ``` + + + Video generation functionality is currently available primarily in JavaScript. For Python applications, you would need to: + + 1. Use the Google AI SDK directly for video generation + 2. Or call a JavaScript-based service that handles video generation + 3. Or implement video generation using the Gemini API directly + + ```python + # Video generation is not directly supported in Python Genkit + # Consider using the Google AI SDK directly or + # a JavaScript service for video generation functionality + ``` + + + +## Context Caching + + + + Context caching allows models to reuse previously cached content to optimize performance: + + ```ts + const llmResponse = await ai.generate({ + messages: [ + { + role: 'user', + content: [{ text: 'Here is the relevant text from War and Peace.' }], + }, + { + role: 'model', + content: [ + { + text: 'Based on War and Peace, here is some analysis of Pierre Bezukhov\'s character.', + }, + ], + metadata: { + cache: { + ttlSeconds: 300, // Cache this message for 5 minutes + }, + }, + }, + ], + model: googleAI.model('gemini-2.5-flash-001'), + prompt: 'Describe Pierre\'s transformation throughout the novel', + }); + ``` + + ### Caching Large Documents + + ```ts + const textContent = await fs.readFile('path/to/war_and_peace.txt', 'utf-8'); + + const llmResponse = await ai.generate({ + messages: [ + { + role: 'user', + content: [{ text: textContent }], + }, + { + role: 'model', + content: [ + { + text: 'This analysis is based on the provided text from War and Peace.', + }, + ], + metadata: { + cache: { + ttlSeconds: 300, + }, + }, + }, + ], + model: googleAI.model('gemini-2.5-flash-001'), + prompt: 'Analyze the relationship between Pierre and Natasha.', + }); + ``` + + + Context caching functionality is currently available primarily in JavaScript. For Go applications, you would need to implement caching manually or use the Google AI SDK directly. + + ```go + // Context caching is not directly supported in Go Genkit + // Consider implementing your own caching layer or + // using the Google AI SDK directly for caching functionality + ``` + + + Context caching functionality is currently available primarily in JavaScript. For Python applications, you would need to implement caching manually or use the Google AI SDK directly. + + ```python + # Context caching is not directly supported in Python Genkit + # Consider implementing your own caching layer or + # using the Google AI SDK directly for caching functionality + ``` + + + +## Available Models + +The Google AI plugin supports various Gemini models: + +- **Text Generation**: `gemini-2.5-flash`, `gemini-2.5-flash-lite`, `gemini-1.5-pro` +- **Embeddings**: `gemini-embedding-001` +- **Text-to-Speech**: `gemini-2.5-flash-preview-tts` +- **Video Generation**: `veo-2.0-generate-001`, `veo-3.0-generate-preview` + +## Next Steps + +- Learn about [generating content](/unified-docs/generating-content) to understand how to use these models effectively +- Explore [tool calling](/unified-docs/tool-calling) to add interactive capabilities to your AI applications +- See [creating flows](/unified-docs/creating-flows) to build structured AI workflows +- Check out [context](/unified-docs/context) for managing information flow in your applications diff --git a/src/content/docs/unified-docs/plugins/mcp.mdx b/src/content/docs/unified-docs/plugins/mcp.mdx new file mode 100644 index 00000000..b45c9d49 --- /dev/null +++ b/src/content/docs/unified-docs/plugins/mcp.mdx @@ -0,0 +1,1049 @@ +--- +title: Model Context Protocol (MCP) Plugin +description: Learn how to integrate MCP servers with Genkit across JavaScript, Go, and Python for extensible tool and resource management. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +The Model Context Protocol (MCP) plugin enables integration between Genkit and the [Model Context Protocol](https://modelcontextprotocol.io), an open standard for connecting AI applications with external tools, resources, and prompts. MCP allows you to: + +- **Consume MCP tools and resources** from external servers as a client +- **Expose Genkit tools and prompts** as an MCP server for other applications +- **Manage multiple MCP connections** for complex workflows + +## Installation and Setup + + + + Install the MCP plugin: + + ```bash + npm install genkit @genkit-ai/mcp + ``` + + Basic setup: + + ```ts + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import { createMcpHost } from '@genkit-ai/mcp'; + + const ai = genkit({ + plugins: [googleAI()], + }); + + // Create MCP host to manage multiple servers + const mcpHost = createMcpHost({ + name: 'myMcpClients', + mcpServers: { + fs: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-filesystem', process.cwd()], + }, + memory: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-memory'], + }, + }, + }); + ``` + + + Import the MCP package: + + ```bash + go get github.com/firebase/genkit/go/plugins/mcp + ``` + + Basic setup: + + ```go + package main + + import ( + "context" + "log" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/mcp" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx) + if err != nil { + log.Fatal(err) + } + + // Create MCP manager for multiple servers + manager, err := mcp.NewMCPManager(mcp.MCPManagerOptions{ + Name: "my-app", + MCPServers: []mcp.MCPServerConfig{ + { + Name: "time-server", + Config: mcp.MCPClientOptions{ + Name: "mcp-server-time", + Stdio: &mcp.StdioConfig{ + Command: "uvx", + Args: []string{"mcp-server-time"}, + }, + }, + }, + }, + }) + if err != nil { + log.Fatal(err) + } + } + ``` + + + Install the MCP plugin: + + ```bash + pip install genkit-mcp + ``` + + Basic setup: + + ```python + import asyncio + from genkit.ai import Genkit + from genkit.plugins.mcp import MCPPlugin + + async def main(): + ai = Genkit( + plugins=[ + MCPPlugin( + name="my-mcp-client", + servers={ + "filesystem": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-filesystem", "."], + }, + "memory": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-memory"], + }, + }, + ), + ], + ) + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + +## MCP Client Usage + +### Connecting to MCP Servers + + + + #### Multiple Servers with MCP Host + + ```ts + import { createMcpHost } from '@genkit-ai/mcp'; + + const mcpHost = createMcpHost({ + name: 'myMcpClients', + mcpServers: { + // Filesystem server + fs: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-filesystem', process.cwd()], + }, + // Memory server + memory: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-memory'], + }, + // Remote HTTP server + remote: { + url: 'https://api.example.com/mcp', + headers: { + 'Authorization': 'Bearer your-token', + }, + }, + }, + rawToolResponses: false, // Process responses for better compatibility + }); + + // Use tools from all connected servers + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Analyze all files in ${process.cwd()} and remember key findings.`, + tools: await mcpHost.getActiveTools(ai), + resources: await mcpHost.getActiveResources(ai), + }); + + // Clean up when done + await mcpHost.close(); + ``` + + #### Single Server with MCP Client + + ```ts + import { createMcpClient } from '@genkit-ai/mcp'; + + const fsClient = createMcpClient({ + name: 'myFileSystemClient', + mcpServer: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-filesystem', process.cwd()], + }, + }); + + await fsClient.ready(); + + // Get tools from this specific client + const fsTools = await fsClient.getActiveTools(ai); + + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: 'List files in the current directory', + tools: fsTools, + }); + + await fsClient.disable(); + ``` + + + #### Multiple Servers with Manager + + ```go + manager, err := mcp.NewMCPManager(mcp.MCPManagerOptions{ + Name: "my-app", + MCPServers: []mcp.MCPServerConfig{ + { + Name: "filesystem", + Config: mcp.MCPClientOptions{ + Name: "filesystem-server", + Stdio: &mcp.StdioConfig{ + Command: "npx", + Args: []string{"-y", "@modelcontextprotocol/server-filesystem", "."}, + }, + }, + }, + { + Name: "time", + Config: mcp.MCPClientOptions{ + Name: "time-server", + Stdio: &mcp.StdioConfig{ + Command: "uvx", + Args: []string{"mcp-server-time"}, + }, + }, + }, + }, + }) + if err != nil { + log.Fatal(err) + } + + // Get all tools from all active servers + tools, err := manager.GetActiveTools(ctx, g) + if err != nil { + log.Fatal(err) + } + + // Use tools in generation + resp, err := genkit.Generate(ctx, g, + ai.WithModel(myModel), + ai.WithPrompt("What time is it and list files in current directory?"), + ai.WithTools(tools...), + ) + if err != nil { + log.Fatal(err) + } + ``` + + #### Single Server Client + + ```go + client, err := mcp.NewGenkitMCPClient(mcp.MCPClientOptions{ + Name: "time-client", + Stdio: &mcp.StdioConfig{ + Command: "uvx", + Args: []string{"mcp-server-time"}, + }, + }) + if err != nil { + log.Fatal(err) + } + + // Get a specific tool + timeTool, err := client.GetTool(ctx, g, "get_current_time") + if err != nil { + log.Fatal(err) + } + + // Use the tool + resp, err := genkit.Generate(ctx, g, + ai.WithModel(myModel), + ai.WithPrompt("What time is it?"), + ai.WithTools(timeTool), + ) + if err != nil { + log.Fatal(err) + } + ``` + + + #### Multiple Servers + + ```python + from genkit.plugins.mcp import MCPPlugin + + mcp_plugin = MCPPlugin( + name="my-mcp-client", + servers={ + "filesystem": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-filesystem", "."], + }, + "memory": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-memory"], + }, + "time": { + "command": "uvx", + "args": ["mcp-server-time"], + }, + }, + ) + + ai = Genkit(plugins=[mcp_plugin]) + + # Get all available tools + tools = await mcp_plugin.get_active_tools() + + # Use tools in generation + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt="What time is it and what files are in the current directory?", + tools=tools, + ) + ``` + + #### Single Server + + ```python + from genkit.plugins.mcp import MCPClient + + client = MCPClient( + name="time-client", + server_config={ + "command": "uvx", + "args": ["mcp-server-time"], + }, + ) + + await client.connect() + + # Get specific tool + time_tool = await client.get_tool("get_current_time") + + # Use the tool + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt="What time is it?", + tools=[time_tool], + ) + + await client.disconnect() + ``` + + + +### Using MCP Tools and Resources + + + + ```ts + // Get tools from specific servers + const fsTools = await mcpHost.getActiveTools(ai, ['fs']); + const memoryTools = await mcpHost.getActiveTools(ai, ['memory']); + + // Get all tools + const allTools = await mcpHost.getActiveTools(ai); + + // Get resources + const resources = await mcpHost.getActiveResources(ai); + + // Get prompts from a specific server + const prompt = await mcpHost.getPrompt('memory', 'recall_information'); + + // Use in generation with specific tools + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: 'Read the README file and remember its contents', + tools: [...fsTools, ...memoryTools], + resources: resources, + }); + + // Tool responses are automatically processed + // Raw responses can be enabled with rawToolResponses: true + ``` + + + ```go + // Get tools from specific server + timeTool, err := manager.GetTool(ctx, g, "time", "get_current_time") + if err != nil { + log.Fatal(err) + } + + // Get prompt from specific server + prompt, err := manager.GetPrompt(ctx, g, "time", "time_prompt", nil) + if err != nil { + log.Fatal(err) + } + + // Get all tools from all servers + allTools, err := manager.GetActiveTools(ctx, g) + if err != nil { + log.Fatal(err) + } + + // Use in generation + resp, err := genkit.Generate(ctx, g, + ai.WithModel(myModel), + ai.WithPrompt("Use available tools to help me"), + ai.WithTools(allTools...), + ) + if err != nil { + log.Fatal(err) + } + + // Dynamic server management + err = manager.Connect("weather", mcp.MCPClientOptions{ + Name: "weather-server", + Stdio: &mcp.StdioConfig{ + Command: "python", + Args: []string{"weather_server.py"}, + }, + }) + if err != nil { + log.Fatal(err) + } + + // Disconnect when done + err = manager.Disconnect("weather") + if err != nil { + log.Fatal(err) + } + ``` + + + ```python + # Get tools from specific servers + fs_tools = await mcp_plugin.get_tools_from_server("filesystem") + memory_tools = await mcp_plugin.get_tools_from_server("memory") + + # Get all available tools + all_tools = await mcp_plugin.get_active_tools() + + # Get resources + resources = await mcp_plugin.get_active_resources() + + # Get prompt from specific server + prompt = await mcp_plugin.get_prompt("memory", "recall_information") + + # Use in generation + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt="Read files and remember important information", + tools=fs_tools + memory_tools, + resources=resources, + ) + + # Dynamic server management + await mcp_plugin.connect_server("weather", { + "command": "python", + "args": ["weather_server.py"], + }) + + # Disconnect server + await mcp_plugin.disconnect_server("weather") + ``` + + + +## MCP Server Usage + +### Exposing Genkit as MCP Server + + + + ```ts + import { createMcpServer } from '@genkit-ai/mcp'; + import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; + import { genkit, z } from 'genkit'; + + const ai = genkit({ + plugins: [googleAI()], + }); + + // Define tools to expose + ai.defineTool( + { + name: 'add', + description: 'Add two numbers together', + inputSchema: z.object({ + a: z.number(), + b: z.number() + }), + outputSchema: z.number(), + }, + async ({ a, b }) => a + b + ); + + // Define prompts to expose + ai.definePrompt( + { + name: 'greeting', + description: 'Generate a friendly greeting', + input: { + schema: z.object({ + name: z.string().default('friend').optional(), + }), + }, + }, + `Hello {{name}}! How can I help you today?` + ); + + // Define resources to expose + ai.defineResource( + { + name: 'system-info', + uri: 'system://info', + }, + async () => ({ + content: [{ + text: `System: ${process.platform}, Node: ${process.version}`, + }], + }) + ); + + // Create and start MCP server + const server = createMcpServer(ai, { + name: 'genkit-calculator', + version: '1.0.0', + }); + + server.setup().then(async () => { + await server.start(); + const transport = new StdioServerTransport(); + await server.server?.connect(transport); + }); + ``` + + + ```go + package main + + import ( + "context" + "log" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/mcp" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx) + if err != nil { + log.Fatal(err) + } + + // Define tools to expose + addTool := genkit.DefineTool(g, "add", "Add two numbers", + func(ctx context.Context, input struct{ A, B int }) (int, error) { + return input.A + input.B, nil + }) + + multiplyTool := genkit.DefineTool(g, "multiply", "Multiply two numbers", + func(ctx context.Context, input struct{ A, B int }) (int, error) { + return input.A * input.B, nil + }) + + // Create MCP server with all tools + server := mcp.NewMCPServer(g, mcp.MCPServerOptions{ + Name: "genkit-calculator", + Version: "1.0.0", + }) + + // Or create server with specific tools only + specificServer := mcp.NewMCPServer(g, mcp.MCPServerOptions{ + Name: "genkit-math", + Version: "1.0.0", + Tools: []ai.Tool{addTool, multiplyTool}, + }) + + // Start the MCP server + log.Println("Starting MCP server...") + if err := server.ServeStdio(ctx); err != nil { + log.Fatal(err) + } + } + ``` + + + ```python + import asyncio + from genkit.ai import Genkit + from genkit.plugins.mcp import MCPServer + + async def main(): + ai = Genkit() + + # Define tools to expose + @ai.define_tool( + name="add", + description="Add two numbers together", + input_schema={ + "type": "object", + "properties": { + "a": {"type": "number"}, + "b": {"type": "number"}, + }, + "required": ["a", "b"], + }, + ) + async def add_tool(a: float, b: float) -> float: + return a + b + + @ai.define_tool( + name="multiply", + description="Multiply two numbers", + input_schema={ + "type": "object", + "properties": { + "a": {"type": "number"}, + "b": {"type": "number"}, + }, + "required": ["a", "b"], + }, + ) + async def multiply_tool(a: float, b: float) -> float: + return a * b + + # Create MCP server + server = MCPServer( + ai=ai, + name="genkit-calculator", + version="1.0.0", + ) + + # Start the server + print("Starting MCP server...") + await server.serve_stdio() + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + +## Advanced Configuration + +### Transport Options + + + + ```ts + // Stdio transport (default) + const mcpHost = createMcpHost({ + mcpServers: { + local: { + command: 'node', + args: ['server.js'], + env: { DEBUG: '1' }, + cwd: '/path/to/server', + }, + }, + }); + + // HTTP transport + const httpHost = createMcpHost({ + mcpServers: { + remote: { + url: 'https://api.example.com/mcp', + headers: { + 'Authorization': 'Bearer token', + 'X-API-Key': 'key', + }, + requestInit: { + timeout: 30000, + }, + }, + }, + }); + + // Custom transport + const customHost = createMcpHost({ + mcpServers: { + custom: { + transport: myCustomTransport, + }, + }, + }); + ``` + + + ```go + // Stdio transport + client, err := mcp.NewGenkitMCPClient(mcp.MCPClientOptions{ + Name: "stdio-server", + Stdio: &mcp.StdioConfig{ + Command: "python", + Args: []string{"server.py"}, + Env: []string{"DEBUG=1", "API_KEY=secret"}, + }, + }) + + // SSE transport + sseClient, err := mcp.NewGenkitMCPClient(mcp.MCPClientOptions{ + Name: "sse-server", + SSE: &mcp.SSEConfig{ + BaseURL: "http://localhost:3000/sse", + }, + }) + + // Disabled client (can be enabled later) + disabledClient, err := mcp.NewGenkitMCPClient(mcp.MCPClientOptions{ + Name: "optional-server", + Disabled: true, + Stdio: &mcp.StdioConfig{ + Command: "optional-server", + }, + }) + ``` + + + ```python + # Stdio transport + mcp_plugin = MCPPlugin( + servers={ + "local": { + "command": "python", + "args": ["server.py"], + "env": {"DEBUG": "1", "API_KEY": "secret"}, + "cwd": "/path/to/server", + }, + }, + ) + + # HTTP transport + http_plugin = MCPPlugin( + servers={ + "remote": { + "url": "https://api.example.com/mcp", + "headers": { + "Authorization": "Bearer token", + "X-API-Key": "key", + }, + "timeout": 30, + }, + }, + ) + + # Disabled server + disabled_plugin = MCPPlugin( + servers={ + "optional": { + "command": "optional-server", + "disabled": True, + }, + }, + ) + ``` + + + +### Error Handling and Lifecycle Management + + + + ```ts + const mcpHost = createMcpHost({ + name: 'robust-mcp-client', + mcpServers: { + fs: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-filesystem', process.cwd()], + }, + }, + }); + + try { + // Wait for connections to be established + await mcpHost.ready(); + + // Check server status + const activeServers = await mcpHost.getActiveServers(); + console.log('Active servers:', activeServers); + + // Use tools with error handling + const tools = await mcpHost.getActiveTools(ai); + + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: 'List files safely', + tools: tools, + }); + + } catch (error) { + console.error('MCP operation failed:', error); + } finally { + // Always clean up + await mcpHost.close(); + } + ``` + + + ```go + manager, err := mcp.NewMCPManager(mcp.MCPManagerOptions{ + Name: "robust-app", + MCPServers: []mcp.MCPServerConfig{ + { + Name: "filesystem", + Config: mcp.MCPClientOptions{ + Name: "fs-server", + Stdio: &mcp.StdioConfig{ + Command: "npx", + Args: []string{"-y", "@modelcontextprotocol/server-filesystem", "."}, + }, + }, + }, + }, + }) + if err != nil { + log.Fatal(err) + } + + // Graceful shutdown + defer func() { + if err := manager.Close(); err != nil { + log.Printf("Error closing MCP manager: %v", err) + } + }() + + // Check server health + tools, err := manager.GetActiveTools(ctx, g) + if err != nil { + log.Printf("Failed to get tools: %v", err) + return + } + + // Use tools with error handling + resp, err := genkit.Generate(ctx, g, + ai.WithModel(myModel), + ai.WithPrompt("List files safely"), + ai.WithTools(tools...), + ) + if err != nil { + log.Printf("Generation failed: %v", err) + return + } + ``` + + + ```python + async def robust_mcp_usage(): + mcp_plugin = MCPPlugin( + name="robust-client", + servers={ + "filesystem": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-filesystem", "."], + }, + }, + ) + + try: + # Wait for connections + await mcp_plugin.ready() + + # Check server status + active_servers = await mcp_plugin.get_active_servers() + print(f"Active servers: {active_servers}") + + # Get tools with error handling + tools = await mcp_plugin.get_active_tools() + + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt="List files safely", + tools=tools, + ) + + except Exception as error: + print(f"MCP operation failed: {error}") + finally: + # Clean up connections + await mcp_plugin.close() + ``` + + + +## Testing and Development + +### Testing Your MCP Server + + + + ```bash + # Test with MCP Inspector + npx @modelcontextprotocol/inspector node dist/server.js + + # Test with custom client + node test-client.js + ``` + + ```ts + // test-client.js + import { createMcpClient } from '@genkit-ai/mcp'; + + const client = createMcpClient({ + name: 'test-client', + mcpServer: { + command: 'node', + args: ['dist/server.js'], + }, + }); + + await client.ready(); + + // Test tools + const tools = await client.getActiveTools(); + console.log('Available tools:', tools.map(t => t.name)); + + // Test prompts + const prompts = await client.getActivePrompts(); + console.log('Available prompts:', prompts.map(p => p.name)); + + await client.disable(); + ``` + + + ```bash + # Build your server + go build -o server main.go + + # Test with MCP Inspector + npx @modelcontextprotocol/inspector ./server + + # Test with custom client + go run test-client.go + ``` + + ```go + // test-client.go + package main + + import ( + "context" + "log" + "github.com/firebase/genkit/go/plugins/mcp" + ) + + func main() { + ctx := context.Background() + + client, err := mcp.NewGenkitMCPClient(mcp.MCPClientOptions{ + Name: "test-client", + Stdio: &mcp.StdioConfig{ + Command: "./server", + }, + }) + if err != nil { + log.Fatal(err) + } + + // Test server capabilities + tools, err := client.GetActiveTools(ctx, nil) + if err != nil { + log.Fatal(err) + } + + log.Printf("Available tools: %d", len(tools)) + + // Clean up + client.Close() + } + ``` + + + ```bash + # Test with MCP Inspector + npx @modelcontextprotocol/inspector python server.py + + # Test with custom client + python test_client.py + ``` + + ```python + # test_client.py + import asyncio + from genkit.plugins.mcp import MCPClient + + async def test_server(): + client = MCPClient( + name="test-client", + server_config={ + "command": "python", + "args": ["server.py"], + }, + ) + + await client.connect() + + # Test server capabilities + tools = await client.get_active_tools() + print(f"Available tools: {len(tools)}") + + prompts = await client.get_active_prompts() + print(f"Available prompts: {len(prompts)}") + + await client.disconnect() + + if __name__ == "__main__": + asyncio.run(test_server()) + ``` + + + +## Best Practices + +### Security Considerations + +1. **Validate MCP server sources**: Only connect to trusted MCP servers +2. **Sanitize inputs**: Validate all data passed to MCP tools +3. **Limit permissions**: Run MCP servers with minimal required permissions +4. **Monitor resource usage**: Track memory and CPU usage of MCP processes + +### Performance Optimization + +1. **Connection pooling**: Reuse MCP connections when possible +2. **Lazy loading**: Connect to servers only when needed +3. **Timeout configuration**: Set appropriate timeouts for MCP operations +4. **Resource cleanup**: Always close connections and clean up resources + +### Error Handling + +1. **Graceful degradation**: Handle MCP server failures gracefully +2. **Retry logic**: Implement retry mechanisms for transient failures +3. **Logging**: Log MCP operations for debugging and monitoring +4. **Fallback strategies**: Provide alternatives when MCP tools are unavailable + +## Next Steps + +- Learn about [tool calling](/unified-docs/tool-calling) to understand how MCP tools integrate with Genkit +- Explore [creating flows](/unified-docs/creating-flows) to build workflows that leverage MCP capabilities +- See the [MCP Server guide](/unified-docs/mcp-server) for creating your own MCP servers +- Check out the [official MCP documentation](https://modelcontextprotocol.io) for more details on the protocol diff --git a/src/content/docs/unified-docs/plugins/ollama.mdx b/src/content/docs/unified-docs/plugins/ollama.mdx new file mode 100644 index 00000000..6d3335b1 --- /dev/null +++ b/src/content/docs/unified-docs/plugins/ollama.mdx @@ -0,0 +1,575 @@ +--- +title: Ollama plugin +description: Learn how to use Ollama for local AI models with Genkit across JavaScript, Go, and Python, including setup, configuration, and usage for both text generation and embeddings. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +The Ollama plugin provides interfaces to local LLMs supported by [Ollama](https://ollama.com/), enabling you to run powerful AI models locally without requiring cloud API keys or internet connectivity. + +## Prerequisites + +Before using the Ollama plugin, you need to install and run the Ollama server locally: + +1. **Download and install Ollama** from [ollama.com/download](https://ollama.com/download) +2. **Download models** using the Ollama CLI: + ```bash + ollama pull gemma + ollama pull llama2 + ollama pull nomic-embed-text # for embeddings + ``` +3. **Start the Ollama server** (usually starts automatically after installation) + +## Installation and Setup + + + + Install the Ollama plugin: + + ```bash + npm install genkitx-ollama + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { ollama } from 'genkitx-ollama'; + + const ai = genkit({ + plugins: [ + ollama({ + models: [ + { + name: 'gemma', + type: 'generate', // 'chat' | 'generate' | undefined + }, + { + name: 'llama2', + type: 'chat', + }, + ], + serverAddress: 'http://127.0.0.1:11434', // default local address + }), + ], + }); + ``` + + + The Ollama plugin is available through the Ollama package: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/ollama" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&ollama.Ollama{ + ServerAddress: "http://127.0.0.1:11434", + Models: []ollama.ModelConfig{ + {Name: "gemma", Type: "generate"}, + {Name: "llama2", Type: "chat"}, + }, + }), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + Install the Ollama plugin: + + ```bash + pip install genkit-plugin-ollama + ``` + + Configure the plugin when initializing Genkit: + + ```python + from genkit.ai import Genkit + from genkit.plugins.ollama import Ollama + + ai = Genkit( + plugins=[Ollama( + server_address="http://127.0.0.1:11434", + models=[ + {"name": "gemma", "type": "generate"}, + {"name": "llama2", "type": "chat"}, + ], + )], + ) + ``` + + + +## Basic Usage + + + + Use Ollama models for text generation: + + ```ts + // Basic text generation + const llmResponse = await ai.generate({ + model: 'ollama/gemma', + prompt: 'Tell me a joke about programming.', + }); + + console.log(llmResponse.text); + + // Chat-style interaction + const chatResponse = await ai.generate({ + model: 'ollama/llama2', + prompt: 'What are the benefits of using local AI models?', + config: { + temperature: 0.7, + maxTokens: 500, + }, + }); + + // Using in a flow + export const localAIFlow = ai.defineFlow( + { + name: 'localAIFlow', + inputSchema: z.object({ question: z.string() }), + outputSchema: z.object({ answer: z.string() }), + }, + async ({ question }) => { + const response = await ai.generate({ + model: 'ollama/gemma', + prompt: `Answer this question: ${question}`, + }); + return { answer: response.text }; + }, + ); + ``` + + + Use Ollama models with the generation API: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/ollama" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&ollama.Ollama{ + ServerAddress: "http://127.0.0.1:11434", + Models: []ollama.ModelConfig{ + {Name: "gemma", Type: "generate"}, + }, + }), + genkit.WithDefaultModel("ollama/gemma"), + ) + if err != nil { + log.Fatal(err) + } + + // Generate content + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Tell me a joke about programming."), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text()) + } + ``` + + + Use Ollama models with the generation API: + + ```python + from genkit.ai import Genkit + from genkit.plugins.ollama import Ollama, ollama_name + + ai = Genkit( + plugins=[Ollama( + server_address="http://127.0.0.1:11434", + models=[ + {"name": "gemma", "type": "generate"}, + {"name": "llama2", "type": "chat"}, + ], + )], + model=ollama_name('gemma'), + ) + + # Generate content + response = await ai.generate('Tell me a joke about programming.') + print(response.text) + + # With configuration + response = await ai.generate( + prompt='What are the benefits of using local AI models?', + model=ollama_name('llama2'), + config={ + 'temperature': 0.7, + 'max_tokens': 500, + } + ) + ``` + + + +## Embeddings + + + + Use Ollama for text embeddings: + + ```ts + const ai = genkit({ + plugins: [ + ollama({ + serverAddress: 'http://localhost:11434', + embedders: [ + { name: 'nomic-embed-text', dimensions: 768 }, + { name: 'all-minilm', dimensions: 384 }, + ], + }), + ], + }); + + // Generate embeddings + const embeddings = await ai.embed({ + embedder: 'ollama/nomic-embed-text', + content: 'Some text to embed!', + }); + + console.log('Embedding dimensions:', embeddings.length); + + // Use with vector databases + const ai = genkit({ + plugins: [ + ollama({ + embedders: [{ name: 'nomic-embed-text', dimensions: 768 }], + }), + chroma([ + { + embedder: 'ollama/nomic-embed-text', + collectionName: 'local-embeddings', + }, + ]), + ], + }); + + // Embedding flow + export const embedFlow = ai.defineFlow( + { + name: 'embedFlow', + inputSchema: z.object({ text: z.string() }), + outputSchema: z.object({ embedding: z.array(z.number()) }), + }, + async ({ text }) => { + const embedding = await ai.embed({ + embedder: 'ollama/nomic-embed-text', + content: text, + }); + return { embedding }; + }, + ); + ``` + + + Generate embeddings using Ollama models: + + ```go + g, err := genkit.Init(ctx, + genkit.WithPlugins(&ollama.Ollama{ + ServerAddress: "http://127.0.0.1:11434", + Embedders: []ollama.EmbedderConfig{ + {Name: "nomic-embed-text", Dimensions: 768}, + }, + }), + ) + + // Generate embeddings + embeddings, err := genkit.Embed(ctx, g, + ai.WithEmbedder("ollama/nomic-embed-text"), + ai.WithEmbedContent("Some text to embed!"), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("Generated %d-dimensional embedding\n", len(embeddings)) + ``` + + + Generate embeddings using Ollama models: + + ```python + ai = Genkit( + plugins=[Ollama( + server_address="http://127.0.0.1:11434", + embedders=[ + {"name": "nomic-embed-text", "dimensions": 768}, + {"name": "all-minilm", "dimensions": 384}, + ], + )], + ) + + # Generate embeddings + embeddings = await ai.embed( + embedder=ollama_name('nomic-embed-text'), + content='Some text to embed!', + ) + + print(f"Generated {len(embeddings)}-dimensional embedding") + ``` + + + +## Authentication and Remote Deployments + + + + For remote Ollama deployments that require authentication: + + ### Static Headers + + ```ts + const ai = genkit({ + plugins: [ + ollama({ + models: [{ name: 'gemma' }], + serverAddress: 'https://my-ollama-deployment.com', + requestHeaders: { + 'api-key': 'your-api-key-here', + 'authorization': 'Bearer your-token', + }, + }), + ], + }); + ``` + + ### Dynamic Headers + + ```ts + import { GoogleAuth } from 'google-auth-library'; + + const ai = genkit({ + plugins: [ + ollama({ + models: [{ name: 'gemma' }], + serverAddress: 'https://my-ollama-deployment.com', + requestHeaders: async (params) => { + const headers = await fetchWithAuthHeader(params.serverAddress); + return { Authorization: headers['Authorization'] }; + }, + }), + ], + }); + + // Function to fetch auth headers + async function fetchWithAuthHeader(url: string) { + const auth = new GoogleAuth(); + const client = await auth.getIdTokenClient(url); + const headers = await client.getRequestHeaders(url); + return headers; + } + ``` + + ### Environment-based Configuration + + ```ts + const ollamaConfig = process.env.NODE_ENV === 'production' + ? { + models: [{ name: 'gemma' }], + serverAddress: 'https://my-ollama-deployment.com', + requestHeaders: { 'api-key': process.env.OLLAMA_API_KEY }, + } + : { + models: [{ name: 'gemma' }], + serverAddress: 'http://127.0.0.1:11434', + }; + + const ai = genkit({ + plugins: [ollama(ollamaConfig)], + }); + ``` + + + For remote Ollama deployments with authentication: + + ```go + g, err := genkit.Init(ctx, + genkit.WithPlugins(&ollama.Ollama{ + ServerAddress: "https://my-ollama-deployment.com", + Models: []ollama.ModelConfig{ + {Name: "gemma", Type: "generate"}, + }, + RequestHeaders: map[string]string{ + "api-key": os.Getenv("OLLAMA_API_KEY"), + "authorization": "Bearer " + os.Getenv("OLLAMA_TOKEN"), + }, + }), + ) + ``` + + + For remote Ollama deployments with authentication: + + ```python + import os + + ai = Genkit( + plugins=[Ollama( + server_address="https://my-ollama-deployment.com", + models=[{"name": "gemma", "type": "generate"}], + request_headers={ + "api-key": os.getenv("OLLAMA_API_KEY"), + "authorization": f"Bearer {os.getenv('OLLAMA_TOKEN')}", + }, + )], + ) + ``` + + + +## Model Configuration + +### Model Types + + + + Configure different model types for different use cases: + + ```ts + const ai = genkit({ + plugins: [ + ollama({ + models: [ + // Chat models for conversational AI + { name: 'llama2', type: 'chat' }, + { name: 'mistral', type: 'chat' }, + + // Generate models for text completion + { name: 'gemma', type: 'generate' }, + { name: 'codellama', type: 'generate' }, + + // Auto-detect type (default) + { name: 'phi' }, // type will be auto-detected + ], + serverAddress: 'http://127.0.0.1:11434', + }), + ], + }); + ``` + + + Configure different model types: + + ```go + g, err := genkit.Init(ctx, + genkit.WithPlugins(&ollama.Ollama{ + ServerAddress: "http://127.0.0.1:11434", + Models: []ollama.ModelConfig{ + {Name: "llama2", Type: "chat"}, + {Name: "mistral", Type: "chat"}, + {Name: "gemma", Type: "generate"}, + {Name: "codellama", Type: "generate"}, + {Name: "phi"}, // auto-detect type + }, + }), + ) + ``` + + + Configure different model types: + + ```python + ai = Genkit( + plugins=[Ollama( + server_address="http://127.0.0.1:11434", + models=[ + {"name": "llama2", "type": "chat"}, + {"name": "mistral", "type": "chat"}, + {"name": "gemma", "type": "generate"}, + {"name": "codellama", "type": "generate"}, + {"name": "phi"}, # auto-detect type + ], + )], + ) + ``` + + + +## Popular Models + +Here are some popular models you can use with Ollama: + +### Text Generation Models +- **Llama 2**: `llama2` (7B, 13B, 70B variants) +- **Gemma**: `gemma` (2B, 7B variants) +- **Mistral**: `mistral` (7B) +- **Code Llama**: `codellama` (7B, 13B, 34B variants) +- **Phi**: `phi` (3B) +- **Qwen**: `qwen` (various sizes) + +### Embedding Models +- **Nomic Embed Text**: `nomic-embed-text` (768 dimensions) +- **All-MiniLM**: `all-minilm` (384 dimensions) +- **BGE**: `bge-large` (1024 dimensions) + +### Specialized Models +- **Llava**: `llava` (multimodal - text and images) +- **Dolphin**: `dolphin-mistral` (uncensored variant) +- **Orca**: `orca-mini` (smaller, efficient model) + +## Configuration Options + +### Generation Parameters +- `temperature`: Randomness (0.0-2.0) +- `top_p`: Nucleus sampling (0.0-1.0) +- `top_k`: Top-k sampling +- `repeat_penalty`: Repetition penalty +- `seed`: Random seed for reproducible outputs +- `num_predict`: Maximum tokens to generate +- `stop`: Stop sequences + +### Performance Tuning +- `num_ctx`: Context window size +- `num_batch`: Batch size for processing +- `num_gpu`: Number of GPU layers to use +- `num_thread`: Number of CPU threads + +## Advantages of Local Models + +### Privacy and Security +- **Data stays local**: No data sent to external APIs +- **No API keys required**: No risk of key exposure +- **Offline capability**: Works without internet connection +- **Full control**: Complete control over model behavior + +### Cost and Performance +- **No usage fees**: No per-token or per-request charges +- **Predictable costs**: Only hardware and electricity costs +- **Low latency**: No network round-trips +- **Customizable**: Fine-tune models for specific use cases + +### Development Benefits +- **Rapid prototyping**: No API rate limits +- **Consistent availability**: No service outages +- **Version control**: Pin specific model versions +- **Experimentation**: Try different models easily + +## Next Steps + +- Learn about [generating content](/unified-docs/generating-content) to understand how to use these models effectively +- Explore [RAG](/unified-docs/rag) to implement retrieval-augmented generation with local embeddings +- See [creating flows](/unified-docs/creating-flows) to build structured AI workflows with local models +- Check out [tool calling](/unified-docs/tool-calling) to add interactive capabilities to your local AI applications diff --git a/src/content/docs/unified-docs/plugins/openai.mdx b/src/content/docs/unified-docs/plugins/openai.mdx new file mode 100644 index 00000000..31ea1d8b --- /dev/null +++ b/src/content/docs/unified-docs/plugins/openai.mdx @@ -0,0 +1,653 @@ +--- +title: OpenAI plugin +description: Learn how to use OpenAI models with Genkit across JavaScript, Go, and Python, including GPT models, DALL-E image generation, Whisper transcription, and text-to-speech capabilities. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +The OpenAI plugin provides access to OpenAI's powerful AI models, including GPT for text generation, DALL-E for image generation, Whisper for speech transcription, and text-to-speech models. + +## Installation and Setup + + + + Install the OpenAI plugin: + + ```bash + npm install @genkit-ai/compat-oai + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { openAI } from '@genkit-ai/compat-oai/openai'; + + const ai = genkit({ + plugins: [openAI()], + }); + ``` + + :::note + The OpenAI plugin is built on top of the `openAICompatible` plugin and is pre-configured for OpenAI's API endpoints. + ::: + + + The OpenAI plugin is available through the OpenAI-compatible plugin: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/openai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&openai.OpenAI{ + APIKey: "your-api-key", // or use environment variable + }), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + Install the OpenAI plugin: + + ```bash + pip install genkit-plugin-openai + ``` + + Configure the plugin when initializing Genkit: + + ```python + from genkit.ai import Genkit + from genkit.plugins.openai import OpenAI + + ai = Genkit( + plugins=[OpenAI()], + ) + ``` + + + +## API Key Configuration + +The plugin requires an API key for the OpenAI API, which you can get from the [OpenAI Platform](https://platform.openai.com/api-keys). + + + + Configure your API key by doing one of the following: + + - Set the `OPENAI_API_KEY` environment variable: + ```bash + export OPENAI_API_KEY=your_api_key_here + ``` + + - Specify the API key when initializing the plugin: + ```ts + openAI({ apiKey: yourKey }); + ``` + + :::caution + Don't embed your API key directly in code! Use environment variables or a service like Google Cloud Secret Manager. + ::: + + + Set the `OPENAI_API_KEY` environment variable: + + ```bash + export OPENAI_API_KEY=your_api_key_here + ``` + + Or specify it in the plugin configuration: + + ```go + g, err := genkit.Init(ctx, + genkit.WithPlugins(&openai.OpenAI{ + APIKey: os.Getenv("OPENAI_API_KEY"), + }), + ) + ``` + + + Set the `OPENAI_API_KEY` environment variable: + + ```bash + export OPENAI_API_KEY=your_api_key_here + ``` + + The plugin will automatically use this environment variable. + + + +## Text Generation + + + + Use OpenAI's GPT models for text generation: + + ```ts + import { openAI } from '@genkit-ai/compat-oai/openai'; + + const ai = genkit({ + plugins: [openAI()], + }); + + // Basic text generation + const llmResponse = await ai.generate({ + prompt: 'Tell me a joke about programming', + model: openAI.model('gpt-4o'), + }); + + // With configuration + const configuredResponse = await ai.generate({ + prompt: 'Write a creative story about AI', + model: openAI.model('gpt-4o'), + config: { + temperature: 0.7, + maxTokens: 1000, + topP: 0.9, + }, + }); + + // Using in a flow + export const jokeFlow = ai.defineFlow( + { + name: 'jokeFlow', + inputSchema: z.object({ subject: z.string() }), + outputSchema: z.object({ joke: z.string() }), + }, + async ({ subject }) => { + const llmResponse = await ai.generate({ + prompt: `tell me a joke about ${subject}`, + model: openAI.model('gpt-4o'), + }); + return { joke: llmResponse.text }; + }, + ); + ``` + + + Use OpenAI models with the generation API: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/openai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&openai.OpenAI{}), + genkit.WithDefaultModel("openai/gpt-4o"), + ) + if err != nil { + log.Fatal(err) + } + + // Generate content + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Tell me a joke about programming"), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text()) + } + ``` + + + Use OpenAI models with the generation API: + + ```python + from genkit.ai import Genkit + from genkit.plugins.openai import OpenAI, openai_name + + ai = Genkit( + plugins=[OpenAI()], + model=openai_name('gpt-4o'), + ) + + # Generate content + response = await ai.generate('Tell me a joke about programming') + print(response.text) + + # With configuration + response = await ai.generate( + prompt='Write a creative story about AI', + model=openai_name('gpt-4o'), + config={ + 'temperature': 0.7, + 'max_tokens': 1000, + 'top_p': 0.9, + } + ) + ``` + + + +## Image Generation + + + + Generate images using DALL-E models: + + ```ts + // Basic image generation + const imageResponse = await ai.generate({ + model: openAI.model('dall-e-3'), + prompt: 'A photorealistic image of a cat programming a computer.', + config: { + size: '1024x1024', + style: 'vivid', + quality: 'hd', + }, + }); + + const imageUrl = imageResponse.media()?.url; + + // DALL-E 2 for faster generation + const quickImage = await ai.generate({ + model: openAI.model('dall-e-2'), + prompt: 'A simple cartoon of a robot', + config: { + size: '512x512', + n: 2, // Generate 2 variations + }, + }); + ``` + + + Image generation requires custom implementation using the OpenAI API: + + ```go + // Image generation requires custom implementation + // Use the OpenAI Go SDK directly for DALL-E functionality + ``` + + + Image generation requires custom implementation using the OpenAI API: + + ```python + # Image generation requires custom implementation + # Use the OpenAI Python SDK directly for DALL-E functionality + ``` + + + +## Text Embeddings + + + + Generate text embeddings for vector search and similarity: + + ```ts + // Generate embeddings + const embedding = await ai.embed({ + embedder: openAI.embedder('text-embedding-ada-002'), + content: 'This is a sample text for embedding', + }); + + // Using in a flow + export const embedFlow = ai.defineFlow( + { + name: 'embedFlow', + inputSchema: z.object({ text: z.string() }), + outputSchema: z.object({ embedding: z.string() }), + }, + async ({ text }) => { + const embedding = await ai.embed({ + embedder: openAI.embedder('text-embedding-ada-002'), + content: text, + }); + + return { embedding: JSON.stringify(embedding) }; + }, + ); + + // Use with vector databases + const ai = genkit({ + plugins: [ + openAI(), + chroma([ + { + embedder: openAI.embedder('text-embedding-ada-002'), + collectionName: 'my-collection', + }, + ]), + ], + }); + ``` + + + Generate embeddings using OpenAI models: + + ```go + // Generate embeddings + embeddings, err := genkit.Embed(ctx, g, + ai.WithEmbedder("openai/text-embedding-ada-002"), + ai.WithEmbedContent("This is a sample text for embedding"), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("Generated %d-dimensional embedding\n", len(embeddings)) + ``` + + + Generate embeddings using OpenAI models: + + ```python + # Generate embeddings + embeddings = await ai.embed( + embedder=openai_name('text-embedding-ada-002'), + content='This is a sample text for embedding', + ) + + print(f"Generated {len(embeddings)}-dimensional embedding") + ``` + + + +## Audio Processing + +### Speech-to-Text (Whisper) + + + + Transcribe audio files using Whisper: + + ```ts + import * as fs from 'fs'; + + const whisper = openAI.model('whisper-1'); + const audioFile = fs.readFileSync('path/to/your/audio.mp3'); + + const transcription = await ai.generate({ + model: whisper, + prompt: [ + { + media: { + contentType: 'audio/mp3', + url: `data:audio/mp3;base64,${audioFile.toString('base64')}`, + }, + }, + ], + config: { + language: 'en', // Optional: specify language + temperature: 0, // For more deterministic results + }, + }); + + console.log('Transcription:', transcription.text()); + + // With additional context + const contextualTranscription = await ai.generate({ + model: whisper, + prompt: [ + { text: 'This is a recording of a technical meeting about AI.' }, + { + media: { + contentType: 'audio/wav', + url: `data:audio/wav;base64,${audioFile.toString('base64')}`, + }, + }, + ], + }); + ``` + + + Audio transcription requires custom implementation using the OpenAI API: + + ```go + // Audio transcription requires custom implementation + // Use the OpenAI Go SDK directly for Whisper functionality + ``` + + + Audio transcription requires custom implementation using the OpenAI API: + + ```python + # Audio transcription requires custom implementation + # Use the OpenAI Python SDK directly for Whisper functionality + ``` + + + +### Text-to-Speech + + + + Generate speech from text: + + ```ts + import * as fs from 'fs'; + + const tts = openAI.model('tts-1'); + const speechResponse = await ai.generate({ + model: tts, + prompt: 'Hello, world! This is a test of text-to-speech.', + config: { + voice: 'alloy', // Options: alloy, echo, fable, onyx, nova, shimmer + response_format: 'mp3', // Options: mp3, opus, aac, flac + speed: 1.0, // 0.25 to 4.0 + }, + }); + + const audioData = speechResponse.media(); + if (audioData) { + fs.writeFileSync('output.mp3', Buffer.from(audioData.url.split(',')[1], 'base64')); + } + + // High-quality TTS + const hqSpeech = await ai.generate({ + model: openAI.model('tts-1-hd'), + prompt: 'This is high-quality text-to-speech.', + config: { + voice: 'nova', + response_format: 'wav', + }, + }); + ``` + + + Text-to-speech requires custom implementation using the OpenAI API: + + ```go + // Text-to-speech requires custom implementation + // Use the OpenAI Go SDK directly for TTS functionality + ``` + + + Text-to-speech requires custom implementation using the OpenAI API: + + ```python + # Text-to-speech requires custom implementation + # Use the OpenAI Python SDK directly for TTS functionality + ``` + + + +## Advanced Features + +### Web Search Integration + + + + Some OpenAI models support web search capabilities: + + ```ts + const llmResponse = await ai.generate({ + prompt: 'What was a positive news story from today?', + model: openAI.model('gpt-4o-search-preview'), + config: { + web_search_options: { + max_results: 5, + }, + }, + }); + ``` + + + Web search integration requires custom implementation: + + ```go + // Web search requires custom implementation + // Use the OpenAI API directly for search-enabled models + ``` + + + Web search integration requires custom implementation: + + ```python + # Web search requires custom implementation + # Use the OpenAI API directly for search-enabled models + ``` + + + +### Function Calling + + + + OpenAI models support function calling for tool integration: + + ```ts + const weatherTool = ai.defineTool({ + name: 'getWeather', + description: 'Get the current weather for a location', + inputSchema: z.object({ + location: z.string().describe('The city and state'), + }), + outputSchema: z.object({ + temperature: z.number(), + condition: z.string(), + }), + }, async ({ location }) => { + // Implementation here + return { temperature: 72, condition: 'sunny' }; + }); + + const response = await ai.generate({ + prompt: 'What\'s the weather like in San Francisco?', + model: openAI.model('gpt-4o'), + tools: [weatherTool], + }); + ``` + + + Function calling is supported through Genkit's tool system: + + ```go + // Define tools and use with OpenAI models + // See tool calling documentation for implementation details + ``` + + + Function calling is supported through Genkit's tool system: + + ```python + # Define tools and use with OpenAI models + # See tool calling documentation for implementation details + ``` + + + +### Passthrough Configuration + + + + Access new models and features without updating Genkit: + + ```ts + const llmResponse = await ai.generate({ + prompt: 'Tell me a cool story', + model: openAI.model('gpt-4-new'), // hypothetical new model + config: { + seed: 123, + new_feature_parameter: 'value', // hypothetical config for new model + logprobs: true, + top_logprobs: 5, + }, + }); + ``` + + Genkit passes this config as-is to the OpenAI API, giving you access to new model features. + + + Passthrough configuration allows access to new OpenAI features: + + ```go + // Custom configuration can be passed through to the OpenAI API + // See OpenAI Go SDK documentation for available options + ``` + + + Passthrough configuration allows access to new OpenAI features: + + ```python + # Custom configuration can be passed through to the OpenAI API + # See OpenAI Python SDK documentation for available options + ``` + + + +## Available Models + +### Text Generation +- **GPT-4 family**: `gpt-4o`, `gpt-4o-mini`, `gpt-4-turbo`, `gpt-4` +- **GPT-3.5**: `gpt-3.5-turbo` +- **Search-enabled**: `gpt-4o-search-preview` + +### Image Generation +- **DALL-E 3**: `dall-e-3` (high quality, 1024x1024, 1024x1792, 1792x1024) +- **DALL-E 2**: `dall-e-2` (faster, 256x256, 512x512, 1024x1024) + +### Embeddings +- **Text embeddings**: `text-embedding-ada-002`, `text-embedding-3-small`, `text-embedding-3-large` + +### Audio +- **Speech-to-text**: `whisper-1` +- **Text-to-speech**: `tts-1`, `tts-1-hd` + +## Configuration Options + +### Text Generation +- `temperature`: Randomness (0-2) +- `max_tokens`: Maximum response length +- `top_p`: Nucleus sampling (0-1) +- `frequency_penalty`: Reduce repetition (-2 to 2) +- `presence_penalty`: Encourage new topics (-2 to 2) +- `seed`: Deterministic outputs +- `logprobs`: Return log probabilities + +### Image Generation +- `size`: Image dimensions +- `style`: `vivid` or `natural` +- `quality`: `standard` or `hd` +- `n`: Number of images (1-10 for DALL-E 2) + +### Audio +- `voice`: TTS voice selection +- `response_format`: Audio format +- `speed`: Speech rate (0.25-4.0) +- `language`: Whisper language hint +- `temperature`: Whisper randomness + +## Next Steps + +- Learn about [generating content](/unified-docs/generating-content) to understand how to use these models effectively +- Explore [tool calling](/unified-docs/tool-calling) to integrate OpenAI's function calling capabilities +- See [creating flows](/unified-docs/creating-flows) to build structured AI workflows +- Check out [RAG](/unified-docs/rag) to implement retrieval-augmented generation with OpenAI embeddings diff --git a/src/content/docs/unified-docs/plugins/vertex-ai.mdx b/src/content/docs/unified-docs/plugins/vertex-ai.mdx new file mode 100644 index 00000000..29d13cc1 --- /dev/null +++ b/src/content/docs/unified-docs/plugins/vertex-ai.mdx @@ -0,0 +1,800 @@ +--- +title: Vertex AI plugin +description: Learn how to use Google Cloud Vertex AI with Genkit across JavaScript, Go, and Python, including Gemini models, Imagen image generation, evaluation metrics, Vector Search, and text-to-speech capabilities. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +The Vertex AI plugin provides interfaces to several Google Cloud AI services, offering enterprise-grade AI capabilities with advanced features like grounding, evaluation metrics, and vector search. + +## Available Services + +The Vertex AI plugin provides access to: + +- **Google generative AI models**: Gemini text generation, Imagen image generation, text and multimodal embeddings +- **Evaluation metrics**: BLEU, ROUGE, Fluency, Safety, Groundedness, and Summarization metrics through Vertex AI Rapid Evaluation API +- **Vector Search**: Enterprise-grade vector database service +- **Text-to-Speech**: Advanced speech synthesis capabilities +- **Model Garden**: Access to third-party models like Claude 3, Llama 3.1, and Mistral + +## Installation and Setup + + + + Install the Vertex AI plugin: + + ```bash + npm install @genkit-ai/vertexai + ``` + + If you want to locally run flows that use this plugin, you also need the [Google Cloud CLI tool](https://cloud.google.com/sdk/docs/install) installed. + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { vertexAI } from '@genkit-ai/vertexai'; + + const ai = genkit({ + plugins: [vertexAI({ location: 'us-central1' })], + }); + ``` + + + The Vertex AI plugin is included with the Genkit Go package: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{ + ProjectID: "your-project-id", + Location: "us-central1", + }), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + Vertex AI support in Python is available through the Google Cloud plugin: + + ```bash + pip install genkit-plugin-google-cloud + ``` + + Configure the plugin when initializing Genkit: + + ```python + from genkit.ai import Genkit + from genkit.plugins.google_cloud import GoogleCloud + + ai = Genkit( + plugins=[GoogleCloud( + project_id="your-project-id", + location="us-central1", + )], + ) + ``` + + + +## Authentication and Configuration + + + + The plugin requires: + + 1. **Google Cloud project ID**: Set via `projectId` in configuration or `GCLOUD_PROJECT` environment variable + 2. **API location**: Set via `location` in configuration or `GCLOUD_LOCATION` environment variable + 3. **Authentication**: Set up Google Cloud Application Default Credentials + + **Local development:** + ```bash + gcloud auth application-default login --project YOUR_PROJECT_ID + ``` + + **Production environments:** + - Google Cloud environments (Cloud Functions, Cloud Run) are automatically authenticated + - Other environments: see [Application Default Credentials](https://cloud.google.com/docs/authentication/provide-credentials-adc) docs + + **Required IAM role:** Vertex AI User (`roles/aiplatform.user`) + + + Configure authentication and project settings: + + ```go + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{ + ProjectID: "your-project-id", + Location: "us-central1", + }), + ) + ``` + + **Authentication setup:** + ```bash + gcloud auth application-default login --project YOUR_PROJECT_ID + ``` + + **Required IAM role:** Vertex AI User (`roles/aiplatform.user`) + + + Configure the plugin with your project details: + + ```python + ai = Genkit( + plugins=[GoogleCloud( + project_id="your-project-id", + location="us-central1", + )], + ) + ``` + + **Authentication setup:** + ```bash + gcloud auth application-default login --project YOUR_PROJECT_ID + ``` + + **Required IAM role:** Vertex AI User (`roles/aiplatform.user`) + + + +## Basic Usage + + + + Use Vertex AI models for text generation: + + ```ts + import { vertexAI } from '@genkit-ai/vertexai'; + + const ai = genkit({ + plugins: [vertexAI({ location: 'us-central1' })], + }); + + const llmResponse = await ai.generate({ + model: vertexAI.model('gemini-2.5-flash'), + prompt: 'What should I do when I visit Melbourne?', + }); + + // Generate embeddings + const embeddings = await ai.embed({ + embedder: vertexAI.embedder('gemini-embedding-001'), + content: 'How many widgets do you have in stock?', + }); + ``` + + + Use Vertex AI models with the generation API: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{ + ProjectID: "your-project-id", + Location: "us-central1", + }), + genkit.WithDefaultModel("vertexai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatal(err) + } + + // Generate content + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("What should I do when I visit Melbourne?"), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text()) + } + ``` + + + Use Vertex AI models with the generation API: + + ```python + from genkit.ai import Genkit + from genkit.plugins.google_cloud import GoogleCloud, vertex_ai_name + + ai = Genkit( + plugins=[GoogleCloud( + project_id="your-project-id", + location="us-central1", + )], + model=vertex_ai_name('gemini-2.5-flash'), + ) + + # Generate content + response = await ai.generate('What should I do when I visit Melbourne?') + print(response.text) + + # Generate embeddings + embeddings = await ai.embed( + embedder=vertex_ai_name('gemini-embedding-001'), + content='How many widgets do you have in stock?', + ) + ``` + + + +## Advanced Features + +### Grounding with Google Search and Private Data + + + + Ground Gemini responses using Google Search or your own data: + + ```ts + // Google Search grounding + await ai.generate({ + model: vertexAI.model('gemini-2.5-flash'), + prompt: 'What are the latest developments in AI?', + config: { + googleSearchRetrieval: { + disableAttribution: true, + } + } + }); + + // Private data grounding + await ai.generate({ + model: vertexAI.model('gemini-2.5-flash'), + prompt: 'What does our company policy say about remote work?', + config: { + vertexRetrieval: { + datastore: { + projectId: 'your-cloud-project', + location: 'us-central1', + collection: 'your-collection', + }, + disableAttribution: true, + } + } + }); + ``` + + :::caution[Pricing] + Vertex AI charges additional fees for grounding requests. See [Vertex AI pricing](https://cloud.google.com/vertex-ai/generative-ai/pricing) for details. + ::: + + + Grounding functionality is available through the Vertex AI API. Implement using the Google Cloud SDK directly or through custom configuration: + + ```go + // Grounding requires custom implementation using the Vertex AI API + // See Google Cloud documentation for grounding configuration + ``` + + + Grounding functionality is available through the Vertex AI API. Implement using the Google Cloud SDK directly: + + ```python + # Grounding requires custom implementation using the Vertex AI API + # See Google Cloud documentation for grounding configuration + ``` + + + +### Image Generation with Imagen + + + + Generate images from text prompts: + + ```ts + // Basic image generation + const response = await ai.generate({ + model: vertexAI.model('imagen-3.0-generate-002'), + output: { format: 'media' }, + prompt: 'a banana riding a bicycle', + }); + + // Advanced image editing + const baseImg = fs.readFileSync('base.png', { encoding: 'base64' }); + const maskImg = fs.readFileSync('mask.png', { encoding: 'base64' }); + + const editResponse = await ai.generate({ + model: vertexAI.model('imagen-3.0-generate-002'), + output: { format: 'media' }, + prompt: [ + { media: { url: `data:image/png;base64,${baseImg}` } }, + { + media: { url: `data:image/png;base64,${maskImg}` }, + metadata: { type: 'mask' }, + }, + { text: 'replace the background with a sunset' }, + ], + config: { + editConfig: { + editMode: 'outpainting', + }, + }, + }); + ``` + + + Image generation is available through the Vertex AI API. Implement using the Google Cloud SDK: + + ```go + // Image generation requires custom implementation using the Vertex AI API + // See Vertex AI Imagen documentation for implementation details + ``` + + + Image generation is available through the Vertex AI API. Implement using the Google Cloud SDK: + + ```python + # Image generation requires custom implementation using the Vertex AI API + # See Vertex AI Imagen documentation for implementation details + ``` + + + +### Multimodal Embeddings + + + + Generate embeddings from text, images, and video: + + ```ts + // Text embeddings + const textEmbeddings = await ai.embed({ + embedder: vertexAI.embedder('gemini-embedding-001'), + content: 'How many widgets do you have in stock?', + }); + + // Multimodal embeddings + const multimodalEmbeddings = await ai.embed({ + embedder: vertexAI.embedder('multimodal-embedding-001'), + content: { + content: [ + { + media: { + url: 'gs://cloud-samples-data/generative-ai/video/pixel8.mp4', + contentType: 'video/mp4', + }, + }, + ], + }, + }); + ``` + + + Generate embeddings using Vertex AI models: + + ```go + // Text embeddings + embeddings, err := genkit.Embed(ctx, g, + ai.WithEmbedder("vertexai/gemini-embedding-001"), + ai.WithEmbedContent("How many widgets do you have in stock?"), + ) + if err != nil { + log.Fatal(err) + } + ``` + + + Generate embeddings using Vertex AI models: + + ```python + # Text embeddings + embeddings = await ai.embed( + embedder=vertex_ai_name('gemini-embedding-001'), + content='How many widgets do you have in stock?', + ) + ``` + + + +## Model Garden Integration + + + + Access third-party models through Vertex AI Model Garden: + + ### Claude 3 Models + + ```ts + import { vertexAIModelGarden } from '@genkit-ai/vertexai/modelgarden'; + + const ai = genkit({ + plugins: [ + vertexAIModelGarden({ + location: 'us-central1', + models: ['claude-3-haiku', 'claude-3-sonnet', 'claude-3-opus'], + }), + ], + }); + + const response = await ai.generate({ + model: 'claude-3-sonnet', + prompt: 'What should I do when I visit Melbourne?', + }); + ``` + + ### Llama 3.1 405b + + ```ts + const ai = genkit({ + plugins: [ + vertexAIModelGarden({ + location: 'us-central1', + models: ['llama3-405b-instruct-maas'], + }), + ], + }); + + const response = await ai.generate({ + model: 'llama3-405b-instruct-maas', + prompt: 'Write a function that adds two numbers together', + }); + ``` + + ### Mistral Models + + ```ts + const ai = genkit({ + plugins: [ + vertexAIModelGarden({ + location: 'us-central1', + models: ['mistral-large', 'mistral-nemo', 'codestral'], + }), + ], + }); + + const response = await ai.generate({ + model: 'mistral-large', + prompt: 'Write a function that adds two numbers together', + config: { + version: 'mistral-large-2411', + temperature: 0.7, + maxOutputTokens: 1024, + topP: 0.9, + stopSequences: ['###'], + }, + }); + ``` + + + Model Garden integration requires custom implementation using the Vertex AI API: + + ```go + // Model Garden models require custom implementation + // See Vertex AI Model Garden documentation for setup + ``` + + + Model Garden integration requires custom implementation using the Vertex AI API: + + ```python + # Model Garden models require custom implementation + # See Vertex AI Model Garden documentation for setup + ``` + + + +## Evaluation Metrics + + + + Use Vertex AI Rapid Evaluation API for model evaluation: + + ```ts + import { vertexAIEvaluation, VertexAIEvaluationMetricType } from '@genkit-ai/vertexai/evaluation'; + + const ai = genkit({ + plugins: [ + vertexAIEvaluation({ + location: 'us-central1', + metrics: [ + VertexAIEvaluationMetricType.SAFETY, + { + type: VertexAIEvaluationMetricType.ROUGE, + metricSpec: { + rougeType: 'rougeLsum', + }, + }, + ], + }), + ], + }); + ``` + + Available metrics: + - **BLEU**: Translation quality + - **ROUGE**: Summarization quality + - **Fluency**: Text fluency + - **Safety**: Content safety + - **Groundedness**: Factual accuracy + - **Summarization Quality/Helpfulness/Verbosity**: Summary evaluation + + Run evaluations: + ```bash + genkit eval:run + genkit eval:flow -e vertexai/safety + ``` + + + Evaluation metrics are available through the Vertex AI API: + + ```go + // Evaluation requires custom implementation using the Vertex AI API + // See Vertex AI Rapid Evaluation documentation + ``` + + + Evaluation metrics are available through the Vertex AI API: + + ```python + # Evaluation requires custom implementation using the Vertex AI API + # See Vertex AI Rapid Evaluation documentation + ``` + + + +## Vector Search + + + + Use Vertex AI Vector Search for enterprise-grade vector operations: + + ### Setup + + 1. Create a Vector Search index in the [Google Cloud Console](https://console.cloud.google.com/vertex-ai/matching-engine/indexes) + 2. Configure dimensions based on your embedding model: + - `gemini-embedding-001`: 768 dimensions + - `text-multilingual-embedding-002`: 768 dimensions + - `multimodalEmbedding001`: 128, 256, 512, or 1408 dimensions + 3. Deploy the index to a standard endpoint + + ### Configuration + + ```ts + import { vertexAIVectorSearch } from '@genkit-ai/vertexai/vectorsearch'; + import { getFirestoreDocumentIndexer, getFirestoreDocumentRetriever } from '@genkit-ai/vertexai/vectorsearch'; + + const ai = genkit({ + plugins: [ + vertexAIVectorSearch({ + projectId: 'your-project-id', + location: 'us-central1', + vectorSearchOptions: [ + { + indexId: 'your-index-id', + indexEndpointId: 'your-endpoint-id', + deployedIndexId: 'your-deployed-index-id', + publicDomainName: 'your-domain-name', + documentRetriever: firestoreDocumentRetriever, + documentIndexer: firestoreDocumentIndexer, + embedder: vertexAI.embedder('gemini-embedding-001'), + }, + ], + }), + ], + }); + ``` + + ### Usage + + ```ts + import { vertexAiIndexerRef, vertexAiRetrieverRef } from '@genkit-ai/vertexai/vectorsearch'; + + // Index documents + await ai.index({ + indexer: vertexAiIndexerRef({ + indexId: 'your-index-id', + }), + documents, + }); + + // Retrieve similar documents + const results = await ai.retrieve({ + retriever: vertexAiRetrieverRef({ + indexId: 'your-index-id', + }), + query: queryDocument, + }); + ``` + + :::caution[Pricing] + Vector Search has both ingestion and hosting costs. See [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing#vectorsearch) for details. + ::: + + + Vector Search integration requires custom implementation using the Vertex AI API: + + ```go + // Vector Search requires custom implementation + // See Vertex AI Vector Search documentation for setup + ``` + + + Vector Search integration requires custom implementation using the Vertex AI API: + + ```python + # Vector Search requires custom implementation + # See Vertex AI Vector Search documentation for setup + ``` + + + +## Text-to-Speech + + + + Generate high-quality speech from text: + + ```ts + import { writeFile } from 'node:fs/promises'; + + const response = await ai.generate({ + model: vertexAI.model('gemini-2.5-flash-preview-tts'), + config: { + responseModalities: ['AUDIO'], + speechConfig: { + voiceConfig: { + prebuiltVoiceConfig: { voiceName: 'Algenib' }, + }, + }, + }, + prompt: 'Say that Genkit is an amazing Gen AI library', + }); + + if (response.media?.url) { + const audioBuffer = Buffer.from( + response.media.url.substring(response.media.url.indexOf(',') + 1), + 'base64' + ); + await writeFile('output.wav', audioBuffer); + } + ``` + + ### Multi-speaker Audio + + ```ts + const response = await ai.generate({ + model: vertexAI.model('gemini-2.5-flash-preview-tts'), + config: { + responseModalities: ['AUDIO'], + speechConfig: { + multiSpeakerVoiceConfig: { + speakerVoiceConfigs: [ + { + speaker: 'Speaker1', + voiceConfig: { + prebuiltVoiceConfig: { voiceName: 'Algenib' }, + }, + }, + { + speaker: 'Speaker2', + voiceConfig: { + prebuiltVoiceConfig: { voiceName: 'Achernar' }, + }, + }, + ], + }, + }, + }, + prompt: `Here's the dialog: + Speaker1: "Genkit is an amazing Gen AI library!" + Speaker2: "I thought it was a framework."`, + }); + ``` + + + Text-to-speech functionality requires custom implementation using the Vertex AI API: + + ```go + // TTS requires custom implementation using the Vertex AI API + // See Vertex AI Speech Generation documentation + ``` + + + Text-to-speech functionality requires custom implementation using the Vertex AI API: + + ```python + # TTS requires custom implementation using the Vertex AI API + # See Vertex AI Speech Generation documentation + ``` + + + +## Context Caching + + + + Optimize performance with context caching for large inputs: + + ```ts + const llmResponse = await ai.generate({ + messages: [ + { + role: 'user', + content: [{ text: 'Here is the relevant text from War and Peace.' }], + }, + { + role: 'model', + content: [ + { + text: "Based on War and Peace, here is some analysis of Pierre Bezukhov's character.", + }, + ], + metadata: { + cache: { + ttlSeconds: 300, // Cache for 5 minutes + }, + }, + }, + ], + model: vertexAI.model('gemini-2.5-flash'), + prompt: "Describe Pierre's transformation throughout the novel.", + }); + ``` + + **Supported models**: `gemini-2.5-flash-001`, `gemini-2.0-pro-001` + + + Context caching requires custom implementation using the Vertex AI API: + + ```go + // Context caching requires custom implementation + // See Vertex AI Context Caching documentation + ``` + + + Context caching requires custom implementation using the Vertex AI API: + + ```python + # Context caching requires custom implementation + # See Vertex AI Context Caching documentation + ``` + + + +## Available Models + +### Text Generation +- `gemini-2.5-flash`, `gemini-2.5-flash-lite`, `gemini-1.5-pro` +- `claude-3-haiku`, `claude-3-sonnet`, `claude-3-opus` (Model Garden) +- `llama3-405b-instruct-maas` (Model Garden) +- `mistral-large`, `mistral-nemo`, `codestral` (Model Garden) + +### Embeddings +- `gemini-embedding-001` (768 dimensions) +- `text-multilingual-embedding-002` (768 dimensions) +- `multimodal-embedding-001` (128-1408 dimensions) + +### Image Generation +- `imagen-3.0-generate-002`, `imagen-2.0-generate-001` + +### Text-to-Speech +- `gemini-2.5-flash-preview-tts` + +## Next Steps + +- Learn about [generating content](/unified-docs/generating-content) to understand how to use these models effectively +- Explore [evaluation](/unified-docs/evaluation) to leverage Vertex AI's evaluation metrics +- See [RAG](/unified-docs/rag) to implement retrieval-augmented generation with Vector Search +- Check out [creating flows](/unified-docs/creating-flows) to build structured AI workflows diff --git a/src/content/docs/unified-docs/plugins/xai.mdx b/src/content/docs/unified-docs/plugins/xai.mdx new file mode 100644 index 00000000..87fee3bd --- /dev/null +++ b/src/content/docs/unified-docs/plugins/xai.mdx @@ -0,0 +1,896 @@ +--- +title: xAI (Grok) Plugin +description: Learn how to use xAI's Grok models with Genkit across JavaScript, Go, and Python, including text generation, image generation, and advanced configuration. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +The xAI plugin provides access to xAI's powerful Grok family of models, including advanced text generation and image generation capabilities. Grok models are known for their real-time information access and conversational abilities. + +## Installation and Setup + + + + Install the xAI plugin: + + ```bash + npm install @genkit-ai/compat-oai + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { xAI } from '@genkit-ai/compat-oai/xai'; + + const ai = genkit({ + plugins: [xAI()], + }); + ``` + + ### API Key Configuration + + Set your xAI API key using one of these methods: + + ```bash + # Environment variable (recommended) + export XAI_API_KEY=your_xai_api_key + ``` + + ```ts + // Or pass directly to plugin (not recommended for production) + const ai = genkit({ + plugins: [xAI({ apiKey: 'your_xai_api_key' })], + }); + ``` + + Get your API key from [xAI Console](https://console.x.ai/). + + + For Go applications, use the OpenAI-compatible client with xAI endpoints: + + ```go + package main + + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/openai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&openai.OpenAI{ + APIKey: os.Getenv("XAI_API_KEY"), + BaseURL: "https://api.x.ai/v1", + Models: []openai.ModelConfig{ + {Name: "grok-3-mini", Type: "chat"}, + {Name: "grok-3", Type: "chat"}, + {Name: "grok-image", Type: "generate"}, + }, + }), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + ### Environment Configuration + + ```bash + export XAI_API_KEY=your_xai_api_key + ``` + + + For Python applications, use the OpenAI-compatible client: + + ```bash + pip install genkit-plugin-openai + ``` + + ```python + from genkit.ai import Genkit + from genkit.plugins.openai import OpenAI + + ai = Genkit( + plugins=[OpenAI( + api_key=os.getenv("XAI_API_KEY"), + base_url="https://api.x.ai/v1", + models=[ + {"name": "grok-3-mini", "type": "chat"}, + {"name": "grok-3", "type": "chat"}, + {"name": "grok-image", "type": "generate"}, + ], + )], + ) + ``` + + ### Environment Configuration + + ```bash + export XAI_API_KEY=your_xai_api_key + ``` + + + +## Basic Usage + +### Text Generation + + + + Use Grok models for text generation: + + ```ts + import { genkit, z } from 'genkit'; + import { xAI } from '@genkit-ai/compat-oai/xai'; + + const ai = genkit({ + plugins: [xAI()], + }); + + // Basic text generation + const response = await ai.generate({ + model: xAI.model('grok-3-mini'), + prompt: 'Explain quantum computing in simple terms', + }); + + console.log(response.text); + + // Flow with Grok + export const grokFlow = ai.defineFlow( + { + name: 'grokFlow', + inputSchema: z.object({ subject: z.string() }), + outputSchema: z.object({ fact: z.string() }), + }, + async ({ subject }) => { + const llmResponse = await ai.generate({ + model: xAI.model('grok-3-mini'), + prompt: `Tell me a fun fact about ${subject}`, + }); + return { fact: llmResponse.text }; + }, + ); + + // Real-time information queries + const newsResponse = await ai.generate({ + model: xAI.model('grok-3'), + prompt: 'What are the latest developments in AI this week?', + config: { + temperature: 0.7, + maxTokens: 500, + }, + }); + ``` + + + Use Grok models with the generation API: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func main() { + ctx := context.Background() + + // Basic text generation + resp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-3-mini"), + ai.WithPrompt("Explain quantum computing in simple terms"), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text()) + + // Real-time information queries + newsResp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-3"), + ai.WithPrompt("What are the latest developments in AI this week?"), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.7, + "max_tokens": 500, + }), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(newsResp.Text()) + } + ``` + + + Use Grok models with the generation API: + + ```python + from genkit.ai import Genkit + from genkit.plugins.openai import OpenAI, openai_name + + ai = Genkit( + plugins=[OpenAI( + api_key=os.getenv("XAI_API_KEY"), + base_url="https://api.x.ai/v1", + models=[ + {"name": "grok-3-mini", "type": "chat"}, + {"name": "grok-3", "type": "chat"}, + ], + )], + ) + + # Basic text generation + response = await ai.generate( + model=openai_name('grok-3-mini'), + prompt='Explain quantum computing in simple terms' + ) + print(response.text) + + # Real-time information queries + news_response = await ai.generate( + model=openai_name('grok-3'), + prompt='What are the latest developments in AI this week?', + config={ + 'temperature': 0.7, + 'max_tokens': 500, + } + ) + print(news_response.text) + ``` + + + +### Image Generation + + + + Use Grok for image generation: + + ```ts + // Image generation with Grok + const imageResponse = await ai.generate({ + model: xAI.model('grok-image'), + prompt: 'A futuristic cityscape with flying cars and neon lights', + config: { + size: '1024x1024', + quality: 'hd', + style: 'vivid', + }, + }); + + // Image generation flow + export const imageFlow = ai.defineFlow( + { + name: 'imageFlow', + inputSchema: z.object({ + description: z.string(), + style: z.string().optional(), + }), + outputSchema: z.object({ imageUrl: z.string() }), + }, + async ({ description, style }) => { + const prompt = style + ? `${description} in ${style} style` + : description; + + const response = await ai.generate({ + model: xAI.model('grok-image'), + prompt, + config: { + size: '1024x1024', + quality: 'hd', + }, + }); + + return { imageUrl: response.media?.url || '' }; + }, + ); + ``` + + + Use Grok for image generation: + + ```go + // Image generation + imageResp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-image"), + ai.WithPrompt("A futuristic cityscape with flying cars and neon lights"), + ai.WithConfig(map[string]interface{}{ + "size": "1024x1024", + "quality": "hd", + "style": "vivid", + }), + ) + if err != nil { + log.Fatal(err) + } + + // Access generated image + if imageResp.Media() != nil { + fmt.Printf("Generated image URL: %s\n", imageResp.Media().URL) + } + ``` + + + Use Grok for image generation: + + ```python + # Image generation + image_response = await ai.generate( + model=openai_name('grok-image'), + prompt='A futuristic cityscape with flying cars and neon lights', + config={ + 'size': '1024x1024', + 'quality': 'hd', + 'style': 'vivid', + } + ) + + # Access generated image + if image_response.media: + print(f"Generated image URL: {image_response.media.url}") + ``` + + + +## Advanced Features + +### Real-time Information Access + + + + Leverage Grok's real-time information capabilities: + + ```ts + // Current events and news + const newsFlow = ai.defineFlow( + { + name: 'newsFlow', + inputSchema: z.object({ topic: z.string() }), + outputSchema: z.object({ summary: z.string() }), + }, + async ({ topic }) => { + const response = await ai.generate({ + model: xAI.model('grok-3'), + prompt: `Provide a current summary of recent news about ${topic}. Include the latest developments and key information.`, + config: { + temperature: 0.3, // Lower temperature for factual content + maxTokens: 800, + }, + }); + return { summary: response.text }; + }, + ); + + // Market data and trends + const marketFlow = ai.defineFlow( + { + name: 'marketFlow', + inputSchema: z.object({ symbol: z.string() }), + outputSchema: z.object({ analysis: z.string() }), + }, + async ({ symbol }) => { + const response = await ai.generate({ + model: xAI.model('grok-3'), + prompt: `Analyze the current market situation for ${symbol}. Include recent price movements, news, and relevant factors.`, + config: { + temperature: 0.4, + maxTokens: 600, + }, + }); + return { analysis: response.text }; + }, + ); + ``` + + + Leverage Grok's real-time information capabilities: + + ```go + // Current events and news + func getNewsAnalysis(ctx context.Context, topic string) (string, error) { + resp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-3"), + ai.WithPrompt(fmt.Sprintf( + "Provide a current summary of recent news about %s. Include the latest developments and key information.", + topic, + )), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.3, + "max_tokens": 800, + }), + ) + if err != nil { + return "", err + } + return resp.Text(), nil + } + + // Market data and trends + func getMarketAnalysis(ctx context.Context, symbol string) (string, error) { + resp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-3"), + ai.WithPrompt(fmt.Sprintf( + "Analyze the current market situation for %s. Include recent price movements, news, and relevant factors.", + symbol, + )), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.4, + "max_tokens": 600, + }), + ) + if err != nil { + return "", err + } + return resp.Text(), nil + } + ``` + + + Leverage Grok's real-time information capabilities: + + ```python + # Current events and news + async def get_news_analysis(topic: str) -> str: + response = await ai.generate( + model=openai_name('grok-3'), + prompt=f"Provide a current summary of recent news about {topic}. Include the latest developments and key information.", + config={ + 'temperature': 0.3, + 'max_tokens': 800, + } + ) + return response.text + + # Market data and trends + async def get_market_analysis(symbol: str) -> str: + response = await ai.generate( + model=openai_name('grok-3'), + prompt=f"Analyze the current market situation for {symbol}. Include recent price movements, news, and relevant factors.", + config={ + 'temperature': 0.4, + 'max_tokens': 600, + } + ) + return response.text + ``` + + + +### Conversational AI + + + + Build conversational applications with Grok: + + ```ts + // Conversational chat flow + export const chatFlow = ai.defineFlow( + { + name: 'chatFlow', + inputSchema: z.object({ + message: z.string(), + history: z.array(z.object({ + role: z.enum(['user', 'assistant']), + content: z.string(), + })).optional(), + }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ message, history = [] }) => { + // Build conversation context + const messages = [ + { role: 'system', content: 'You are Grok, a helpful and witty AI assistant with access to real-time information.' }, + ...history, + { role: 'user', content: message }, + ]; + + const response = await ai.generate({ + model: xAI.model('grok-3'), + messages, + config: { + temperature: 0.8, + maxTokens: 1000, + }, + }); + + return { response: response.text }; + }, + ); + + // Personality-driven responses + export const personalityFlow = ai.defineFlow( + { + name: 'personalityFlow', + inputSchema: z.object({ + query: z.string(), + personality: z.enum(['witty', 'professional', 'casual', 'technical']), + }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ query, personality }) => { + const personalityPrompts = { + witty: 'Respond with humor and wit, making clever observations.', + professional: 'Respond in a professional, formal tone.', + casual: 'Respond in a casual, friendly manner.', + technical: 'Respond with technical depth and precision.', + }; + + const response = await ai.generate({ + model: xAI.model('grok-3-mini'), + prompt: `${personalityPrompts[personality]} Query: ${query}`, + config: { + temperature: personality === 'witty' ? 0.9 : 0.6, + maxTokens: 600, + }, + }); + + return { response: response.text }; + }, + ); + ``` + + + Build conversational applications with Grok: + + ```go + type ChatMessage struct { + Role string `json:"role"` + Content string `json:"content"` + } + + func handleChat(ctx context.Context, message string, history []ChatMessage) (string, error) { + // Build conversation context + messages := []ChatMessage{ + {Role: "system", Content: "You are Grok, a helpful and witty AI assistant with access to real-time information."}, + } + messages = append(messages, history...) + messages = append(messages, ChatMessage{Role: "user", Content: message}) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-3"), + ai.WithMessages(messages), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.8, + "max_tokens": 1000, + }), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + ``` + + + Build conversational applications with Grok: + + ```python + from typing import List, Dict + + async def handle_chat(message: str, history: List[Dict[str, str]] = None) -> str: + if history is None: + history = [] + + # Build conversation context + messages = [ + {"role": "system", "content": "You are Grok, a helpful and witty AI assistant with access to real-time information."}, + *history, + {"role": "user", "content": message}, + ] + + response = await ai.generate( + model=openai_name('grok-3'), + messages=messages, + config={ + 'temperature': 0.8, + 'max_tokens': 1000, + } + ) + + return response.text + ``` + + + +## Model Comparison + +### Available Models + +| Model | Capabilities | Best For | Context Window | +|-------|-------------|----------|----------------| +| **grok-3-mini** | Fast text generation | Quick responses, simple tasks | 128K tokens | +| **grok-3** | Advanced reasoning, real-time data | Complex analysis, current events | 128K tokens | +| **grok-image** | Image generation | Creative visuals, concept art | N/A | + +### Performance Characteristics + + + + ```ts + // Performance comparison example + const performanceTest = async () => { + const prompt = "Explain the impact of AI on modern society"; + + // Fast response with grok-3-mini + const startMini = Date.now(); + const miniResponse = await ai.generate({ + model: xAI.model('grok-3-mini'), + prompt, + }); + const miniTime = Date.now() - startMini; + + // Detailed response with grok-3 + const startFull = Date.now(); + const fullResponse = await ai.generate({ + model: xAI.model('grok-3'), + prompt, + }); + const fullTime = Date.now() - startFull; + + console.log(`Mini: ${miniTime}ms, Full: ${fullTime}ms`); + console.log(`Mini length: ${miniResponse.text.length}, Full length: ${fullResponse.text.length}`); + }; + ``` + + + ```go + func performanceTest(ctx context.Context) { + prompt := "Explain the impact of AI on modern society" + + // Fast response with grok-3-mini + startMini := time.Now() + miniResp, _ := genkit.Generate(ctx, g, + ai.WithModel("grok-3-mini"), + ai.WithPrompt(prompt), + ) + miniTime := time.Since(startMini) + + // Detailed response with grok-3 + startFull := time.Now() + fullResp, _ := genkit.Generate(ctx, g, + ai.WithModel("grok-3"), + ai.WithPrompt(prompt), + ) + fullTime := time.Since(startFull) + + fmt.Printf("Mini: %v, Full: %v\n", miniTime, fullTime) + fmt.Printf("Mini length: %d, Full length: %d\n", + len(miniResp.Text()), len(fullResp.Text())) + } + ``` + + + ```python + import time + + async def performance_test(): + prompt = "Explain the impact of AI on modern society" + + # Fast response with grok-3-mini + start_mini = time.time() + mini_response = await ai.generate( + model=openai_name('grok-3-mini'), + prompt=prompt + ) + mini_time = time.time() - start_mini + + # Detailed response with grok-3 + start_full = time.time() + full_response = await ai.generate( + model=openai_name('grok-3'), + prompt=prompt + ) + full_time = time.time() - start_full + + print(f"Mini: {mini_time:.2f}s, Full: {full_time:.2f}s") + print(f"Mini length: {len(mini_response.text)}, Full length: {len(full_response.text)}") + ``` + + + +## Advanced Configuration + +### Custom Model Configuration + + + + ```ts + // Advanced configuration with passthrough options + const response = await ai.generate({ + model: xAI.model('grok-3'), + prompt: 'Analyze the latest tech trends', + config: { + temperature: 0.7, + maxTokens: 1000, + topP: 0.9, + frequencyPenalty: 0.1, + presencePenalty: 0.1, + // Passthrough configuration for new features + stream: true, + logprobs: true, + top_logprobs: 5, + }, + }); + + // Environment-specific configuration + const environmentConfig = { + development: { + model: xAI.model('grok-3-mini'), + temperature: 0.8, + maxTokens: 500, + }, + production: { + model: xAI.model('grok-3'), + temperature: 0.6, + maxTokens: 1000, + }, + }; + + const config = environmentConfig[process.env.NODE_ENV || 'development']; + ``` + + + ```go + // Advanced configuration + resp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-3"), + ai.WithPrompt("Analyze the latest tech trends"), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.7, + "max_tokens": 1000, + "top_p": 0.9, + "frequency_penalty": 0.1, + "presence_penalty": 0.1, + "stream": true, + "logprobs": true, + "top_logprobs": 5, + }), + ) + ``` + + + ```python + # Advanced configuration + response = await ai.generate( + model=openai_name('grok-3'), + prompt='Analyze the latest tech trends', + config={ + 'temperature': 0.7, + 'max_tokens': 1000, + 'top_p': 0.9, + 'frequency_penalty': 0.1, + 'presence_penalty': 0.1, + 'stream': True, + 'logprobs': True, + 'top_logprobs': 5, + } + ) + ``` + + + +## Best Practices + +### Optimizing for Real-time Information + +1. **Use appropriate models**: Use `grok-3` for current events and real-time data +2. **Set proper temperature**: Lower temperature (0.3-0.5) for factual content +3. **Specify time context**: Include "current", "latest", or "recent" in prompts +4. **Verify information**: Cross-reference important facts when possible + +### Cost Optimization + +1. **Choose the right model**: Use `grok-3-mini` for simple tasks +2. **Optimize token usage**: Be concise in prompts and set appropriate `maxTokens` +3. **Cache responses**: Cache frequently requested information +4. **Batch requests**: Group similar requests when possible + +### Error Handling + + + + ```ts + const robustGrokFlow = ai.defineFlow( + { + name: 'robustGrokFlow', + inputSchema: z.object({ query: z.string() }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ query }) => { + try { + const response = await ai.generate({ + model: xAI.model('grok-3'), + prompt: query, + config: { + temperature: 0.7, + maxTokens: 800, + }, + }); + return { response: response.text }; + } catch (error) { + if (error.message.includes('rate_limit')) { + // Fallback to mini model + const fallbackResponse = await ai.generate({ + model: xAI.model('grok-3-mini'), + prompt: query, + }); + return { response: fallbackResponse.text }; + } + throw error; + } + }, + ); + ``` + + + ```go + func robustGrokGenerate(ctx context.Context, query string) (string, error) { + resp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-3"), + ai.WithPrompt(query), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.7, + "max_tokens": 800, + }), + ) + + if err != nil { + if strings.Contains(err.Error(), "rate_limit") { + // Fallback to mini model + fallbackResp, fallbackErr := genkit.Generate(ctx, g, + ai.WithModel("grok-3-mini"), + ai.WithPrompt(query), + ) + if fallbackErr != nil { + return "", fallbackErr + } + return fallbackResp.Text(), nil + } + return "", err + } + + return resp.Text(), nil + } + ``` + + + ```python + async def robust_grok_generate(query: str) -> str: + try: + response = await ai.generate( + model=openai_name('grok-3'), + prompt=query, + config={ + 'temperature': 0.7, + 'max_tokens': 800, + } + ) + return response.text + except Exception as error: + if 'rate_limit' in str(error): + # Fallback to mini model + fallback_response = await ai.generate( + model=openai_name('grok-3-mini'), + prompt=query + ) + return fallback_response.text + raise error + ``` + + + +## Next Steps + +- Learn about [generating content](/unified-docs/generating-content) to understand how to use these models effectively +- Explore [tool calling](/unified-docs/tool-calling) to add interactive capabilities to your Grok applications +- See [creating flows](/unified-docs/creating-flows) to build structured AI workflows with real-time information +- Check out [deployment guides](/unified-docs/deployment) for production deployment strategies diff --git a/src/content/docs/unified-docs/rag.mdx b/src/content/docs/unified-docs/rag.mdx new file mode 100644 index 00000000..e3dd4f11 --- /dev/null +++ b/src/content/docs/unified-docs/rag.mdx @@ -0,0 +1,940 @@ +--- +title: Retrieval-augmented generation (RAG) +description: Learn how Genkit simplifies retrieval-augmented generation (RAG) across JavaScript, Go, and Python by providing abstractions and plugins for indexers, embedders, and retrievers to incorporate external data into LLM responses. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +Genkit provides abstractions that help you build retrieval-augmented +generation (RAG) flows, as well as plugins that provide integrations with +related tools. + +## What is RAG? + +Retrieval-augmented generation is a technique used to incorporate external +sources of information into an LLM's responses. It's important to be able to do +so because, while LLMs are typically trained on a broad body of material, +practical use of LLMs often requires specific domain knowledge (for example, you +might want to use an LLM to answer customers' questions about your company's +products). + +One solution is to fine-tune the model using more specific data. However, this +can be expensive both in terms of compute cost and in terms of the effort needed +to prepare adequate training data. + +In contrast, RAG works by incorporating external data sources into a prompt at +the time it's passed to the model. For example, you could imagine the prompt, +"What is Bart's relationship to Lisa?" might be expanded ("augmented") by +prepending some relevant information, resulting in the prompt, "Homer and +Marge's children are named Bart, Lisa, and Maggie. What is Bart's relationship +to Lisa?" + +This approach has several advantages: + +- It can be more cost-effective because you don't have to retrain the model. +- You can continuously update your data source and the LLM can immediately + make use of the updated information. +- You now have the potential to cite references in your LLM's responses. + +On the other hand, using RAG naturally means longer prompts, and some LLM API +services charge for each input token you send. Ultimately, you must evaluate the +cost tradeoffs for your applications. + +RAG is a very broad area and there are many different techniques used to achieve +the best quality RAG. The core Genkit framework offers main abstractions to +help you do RAG: + + + + - **Indexers**: add documents to an "index" + - **Embedders**: transforms documents into a vector representation + - **Retrievers**: retrieve documents from an "index", given a query + + + - **Embedders**: transforms documents into a vector representation + - **Retrievers**: retrieve documents from an "index", given a query + + + - **Embedders**: transforms documents into a vector representation + - **Retrievers**: retrieve documents from an "index", given a query + + + +These definitions are broad on purpose because Genkit is un-opinionated about +what an "index" is or how exactly documents are retrieved from it. Genkit only +provides a `Document` format and everything else is defined by the retriever or +indexer implementation provider. + +### Indexers + + + + The index is responsible for keeping track of your documents in such a way that + you can quickly retrieve relevant documents given a specific query. This is most + often accomplished using a vector database, which indexes your documents using + multidimensional vectors called embeddings. A text embedding (opaquely) + represents the concepts expressed by a passage of text; these are generated + using special-purpose ML models. By indexing text using its embedding, a vector + database is able to cluster conceptually related text and retrieve documents + related to a novel string of text (the query). + + Before you can retrieve documents for the purpose of generation, you need to + ingest them into your document index. A typical ingestion flow does the + following: + + 1. Split up large documents into smaller documents so that only relevant + portions are used to augment your prompts – "chunking". This is necessary + because many LLMs have a limited context window, making it impractical to + include entire documents with a prompt. + + Genkit doesn't provide built-in chunking libraries; however, there are open + source libraries available that are compatible with Genkit. + + 2. Generate embeddings for each chunk. Depending on the database you're using, + you might explicitly do this with an embedding generation model, or you might + use the embedding generator provided by the database. + 3. Add the text chunk and its index to the database. + + You might run your ingestion flow infrequently or only once if you are working + with a stable source of data. On the other hand, if you are working with data + that frequently changes, you might continuously run the ingestion flow (for + example, in a Cloud Firestore trigger, whenever a document is updated). + + + In Go, indexing is typically handled by your chosen vector database or storage solution. + Genkit provides the abstractions for working with indexed documents, but the actual + indexing process is implementation-specific to your storage backend. + + Users are expected to add their own functionality to index documents using their + preferred vector database or storage solution. + + + In Python, indexing is outside the scope of Genkit and you should use the + SDKs/APIs provided by the vector store you are using. Genkit provides the + abstractions for working with indexed documents through retrievers. + + + +### Embedders + +An embedder is a function that takes content (text, images, audio, etc.) and +creates a numeric vector that encodes the semantic meaning of the original +content. As mentioned above, embedders are leveraged as part of the process of +indexing, however, they can also be used independently to create embeddings +without an index. + +### Retrievers + +A retriever is a concept that encapsulates logic related to any kind of document +retrieval. The most popular retrieval cases typically include retrieval from +vector stores, however, in Genkit a retriever can be any function that returns +data. + +To create a retriever, you can use one of the provided implementations or create +your own. + +## Supported indexers, retrievers, and embedders + +Genkit provides indexer and retriever support through its plugin system. The +following plugins are officially supported: + + + + **Vector Databases:** + - [Astra DB](/docs/plugins/astra-db) - DataStax Astra DB vector database + - [Chroma DB](/docs/plugins/chroma) vector database + - [Cloud Firestore vector store](/docs/plugins/firebase) + - [Cloud SQL for PostgreSQL](/docs/plugins/cloud-sql-pg) with pgvector extension + - [LanceDB](/docs/plugins/lancedb) open-source vector database + - [Neo4j](/docs/plugins/neo4j) graph database with vector search + - [Pinecone](/docs/plugins/pinecone) cloud vector database + - [Vertex AI Vector Search](/docs/plugins/vertex-ai) + + **Templates:** + - PostgreSQL with [`pgvector`](/docs/templates/pgvector) + + **Embedding Models:** + - Google AI and Vertex AI plugins provide text embedding models + + + **Vector Databases:** + - [Pinecone](/go/docs/plugins/pinecone) cloud vector database + - PostgreSQL with [`pgvector`](/go/docs/plugins/pgvector) + + **Embedding Models:** + - [Google Generative AI](/go/docs/plugins/google-genai) - Text embedding models + + + **Vector Databases:** + - Firestore Vector Store (via Firebase plugin) + - Dev Local Vector Store (for development/testing) + + **Embedding Models:** + - Google GenAI plugin provides text embedding models + + + +## Defining a RAG Flow + +The following examples show how you could ingest a collection of restaurant menu +PDF documents into a vector database and retrieve them for use in a flow that +determines what food items are available. + +### Install dependencies + + + + Install dependencies for processing PDFs: + + ```bash + npm install llm-chunk pdf-parse @genkit-ai/dev-local-vectorstore + + npm install --save-dev @types/pdf-parse + ``` + + + Install dependencies for text processing and PDF parsing: + + ```bash + go get github.com/tmc/langchaingo/textsplitter + go get github.com/ledongthuc/pdf + ``` + + + Install dependencies for your chosen vector store and PDF processing: + + ```bash + pip install genkit[google-genai,firebase] + # Add other dependencies as needed for PDF processing + ``` + + + +### Configure vector store + + + + Add a local vector store to your configuration: + + ```ts + import { devLocalIndexerRef, devLocalVectorstore } from '@genkit-ai/dev-local-vectorstore'; + import { googleAI } from '@genkit-ai/googleai'; + import { z, genkit } from 'genkit'; + + const ai = genkit({ + plugins: [ + // googleAI provides the gemini-embedding-001 embedder + googleAI(), + + // the local vector store requires an embedder to translate from text to vector + devLocalVectorstore([ + { + indexName: 'menuQA', + embedder: googleAI.embedder('gemini-embedding-001'), + }, + ]), + ], + }); + ``` + + + Configure your Genkit instance with embedding support: + + ```go + ctx := context.Background() + + g, err := genkit.Init(ctx, genkit.WithPlugins(&googlegenai.GoogleAI{})) + if err != nil { + log.Fatal(err) + } + + if err = localvec.Init(); err != nil { + log.Fatal(err) + } + ``` + + + Configure your Genkit instance with vector store support: + + ```python + from genkit.ai import Genkit, Document + from genkit.plugins.google_genai import GoogleGenai + from genkit.plugins.firebase.firestore import FirestoreVectorStore, DistanceMeasure + + ai = Genkit( + plugins=[ + GoogleGenai(), + FirestoreVectorStore( + name='my_firestore_retriever', + collection='mycollection', + vector_field='embedding', + content_field='text', + embedder='googleai/text-embedding-004', + distance_measure=DistanceMeasure.EUCLIDEAN, + firestore_client=firestore_client, + ), + ], + ) + ``` + + + +### Define an Indexer + + + + The following example shows how to create an indexer to ingest a collection of + PDF documents and store them in a local vector database. + + #### Create the indexer + + ```ts + export const menuPdfIndexer = devLocalIndexerRef('menuQA'); + ``` + + #### Create chunking config + + This example uses the `llm-chunk` library which provides a simple text splitter + to break up documents into segments that can be vectorized. + + The following definition configures the chunking function to guarantee a + document segment of between 1000 and 2000 characters, broken at the end of a + sentence, with an overlap between chunks of 100 characters. + + ```ts + const chunkingConfig = { + minLength: 1000, + maxLength: 2000, + splitter: 'sentence', + overlap: 100, + delimiters: '', + } as any; + ``` + + More chunking options for this library can be found in the [llm-chunk + documentation](https://www.npmjs.com/package/llm-chunk). + + #### Define your indexer flow + + ```ts + import { Document } from 'genkit/retriever'; + import { chunk } from 'llm-chunk'; + import { readFile } from 'fs/promises'; + import path from 'path'; + import pdf from 'pdf-parse'; + + async function extractTextFromPdf(filePath: string) { + const pdfFile = path.resolve(filePath); + const dataBuffer = await readFile(pdfFile); + const data = await pdf(dataBuffer); + return data.text; + } + + export const indexMenu = ai.defineFlow( + { + name: 'indexMenu', + inputSchema: z.object({ filePath: z.string().describe('PDF file path') }), + outputSchema: z.object({ + success: z.boolean(), + documentsIndexed: z.number(), + error: z.string().optional(), + }), + }, + async ({ filePath }) => { + try { + filePath = path.resolve(filePath); + + // Read the pdf + const pdfTxt = await ai.run('extract-text', () => extractTextFromPdf(filePath)); + + // Divide the pdf text into segments + const chunks = await ai.run('chunk-it', async () => chunk(pdfTxt, chunkingConfig)); + + // Convert chunks of text into documents to store in the index. + const documents = chunks.map((text) => { + return Document.fromText(text, { filePath }); + }); + + // Add documents to the index + await ai.index({ + indexer: menuPdfIndexer, + documents, + }); + + return { + success: true, + documentsIndexed: documents.length, + }; + } catch (err) { + // For unexpected errors that throw exceptions + return { + success: false, + documentsIndexed: 0, + error: err instanceof Error ? err.message : String(err) + }; + } + }, + ); + ``` + + #### Run the indexer flow + + ```bash + genkit flow:run indexMenu '{"filePath": "menu.pdf"}' + ``` + + + #### Create chunking config + + This example uses the `textsplitter` library which provides a simple text + splitter to break up documents into segments that can be vectorized. + + The following definition configures the chunking function to return document + segments of 200 characters, with an overlap between chunks of 20 characters. + + ```go + splitter := textsplitter.NewRecursiveCharacter( + textsplitter.WithChunkSize(200), + textsplitter.WithChunkOverlap(20), + ) + ``` + + More chunking options for this library can be found in the + [`langchaingo` documentation](https://pkg.go.dev/github.com/tmc/langchaingo/textsplitter#Option). + + #### Define your indexer flow + + ```go + genkit.DefineFlow( + g, "indexMenu", + func(ctx context.Context, path string) (any, error) { + // Extract plain text from the PDF. Wrap the logic in Run so it + // appears as a step in your traces. + pdfText, err := genkit.Run(ctx, "extract", func() (string, error) { + return readPDF(path) + }) + if err != nil { + return nil, err + } + + // Split the text into chunks. Wrap the logic in Run so it appears as a + // step in your traces. + docs, err := genkit.Run(ctx, "chunk", func() ([]*ai.Document, error) { + chunks, err := splitter.SplitText(pdfText) + if err != nil { + return nil, err + } + + var docs []*ai.Document + for _, chunk := range chunks { + docs = append(docs, ai.DocumentFromText(chunk, nil)) + } + return docs, nil + }) + if err != nil { + return nil, err + } + + // Add chunks to the index using custom index function + // Implementation depends on your chosen vector database + return map[string]interface{}{ + "success": true, + "documentsIndexed": len(docs), + }, nil + }, + ) + ``` + + ```go + // Helper function to extract plain text from a PDF. Excerpted from + // https://github.com/ledongthuc/pdf + func readPDF(path string) (string, error) { + f, r, err := pdf.Open(path) + if f != nil { + defer f.Close() + } + if err != nil { + return "", err + } + + reader, err := r.GetPlainText() + if err != nil { + return "", err + } + + bytes, err := io.ReadAll(reader) + if err != nil { + return "", err + } + + return string(bytes), nil + } + ``` + + #### Run the indexer flow + + ```bash + genkit flow:run indexMenu '"menu.pdf"' + ``` + + + In Python, indexing is typically handled by your vector store's SDK. Here's an example of how you might structure an indexing flow: + + ```python + @ai.flow() + async def index_menu(file_path: str): + # Extract text from PDF (implementation depends on your PDF library) + pdf_text = extract_text_from_pdf(file_path) + + # Chunk the text (implementation depends on your chunking library) + chunks = chunk_text(pdf_text) + + # Index using your vector store's SDK + # This is specific to your chosen vector database + # For Firestore, you would use the Firestore SDK directly + + return { + "success": True, + "documents_indexed": len(chunks) + } + ``` + + Note: Indexing is outside the scope of Genkit Python and should be done using your vector store's native SDK. + + + +After running the indexing flow, the vector database will be seeded with +documents and ready to be used in Genkit flows with retrieval steps. + +### Define a flow with retrieval + +The following example shows how you might use a retriever in a RAG flow: + + + + ```ts + import { devLocalRetrieverRef } from '@genkit-ai/dev-local-vectorstore'; + import { googleAI } from '@genkit-ai/googleai'; + + // Define the retriever reference + export const menuRetriever = devLocalRetrieverRef('menuQA'); + + export const menuQAFlow = ai.defineFlow( + { + name: 'menuQA', + inputSchema: z.object({ query: z.string() }), + outputSchema: z.object({ answer: z.string() }) + }, + async ({ query }) => { + // retrieve relevant documents + const docs = await ai.retrieve({ + retriever: menuRetriever, + query, + options: { k: 3 }, + }); + + // generate a response + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: ` + You are acting as a helpful AI assistant that can answer + questions about the food available on the menu at Genkit Grub Pub. + + Use only the context provided to answer the question. + If you don't know, do not make up an answer. + Do not add or change items on the menu. + + Question: ${query}`, + docs, + }); + + return { answer: text }; + }, + ); + ``` + + #### Run the retriever flow + + ```bash + genkit flow:run menuQA '{"query": "Recommend a dessert from the menu while avoiding dairy and nuts"}' + ``` + + + ```go + model := googlegenai.Model(g, "gemini-2.5-flash") + + _, menuPdfRetriever, err := localvec.DefineRetriever( + g, "menuQA", localvec.Config{Embedder: googlegenai.Embedder(g, "text-embedding-004")}, + ) + if err != nil { + log.Fatal(err) + } + + genkit.DefineFlow( + g, "menuQA", + func(ctx context.Context, question string) (string, error) { + // Retrieve text relevant to the user's question. + resp, err := ai.Retrieve(ctx, menuPdfRetriever, ai.WithTextDocs(question)) + if err != nil { + return "", err + } + + // Call Generate, including the menu information in your prompt. + return genkit.GenerateText(ctx, g, + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithDocs(resp.Documents), + ai.WithSystem(` + You are acting as a helpful AI assistant that can answer questions about the + food available on the menu at Genkit Grub Pub. + Use only the context provided to answer the question. If you don't know, do not + make up an answer. Do not add or change items on the menu.`), + ai.WithPrompt(question)) + }) + ``` + + #### Run the retriever flow + + ```bash + genkit flow:run menuQA '"Recommend a dessert from the menu while avoiding dairy and nuts"' + ``` + + + ```python + @ai.flow() + async def qa_flow(query: str): + docs = await ai.retrieve( + query=Document.from_text(query), + retriever='firestore/my_firestore_retriever' + ) + + response = await ai.generate( + prompt=f""" + You are acting as a helpful AI assistant that can answer + questions about the food available on the menu at Genkit Grub Pub. + + Use only the context provided to answer the question. + If you don't know, do not make up an answer. + Do not add or change items on the menu. + + Question: {query}""", + docs=docs + ) + return response.text + ``` + + #### Run the retriever flow + + ```python + result = await qa_flow('Recommend a dessert from the menu while avoiding dairy and nuts') + print(result) + ``` + + + +The output for this command should contain a response from the model, grounded +in the indexed menu file. + +## Write your own retrievers + +It's also possible to create your own retriever. This is useful if your +documents are managed in a document store that is not supported in Genkit (eg: +MySQL, Google Drive, etc.). The Genkit SDK provides flexible methods that let +you provide custom code for fetching documents. You can also define custom +retrievers that build on top of existing retrievers in Genkit and apply advanced +RAG techniques (such as reranking or prompt extensions) on top. + +### Simple Retrievers + + + + Simple retrievers let you easily convert existing code into retrievers: + + ```ts + import { z } from 'genkit'; + import { searchEmails } from './db'; + + ai.defineSimpleRetriever( + { + name: 'myDatabase', + configSchema: z + .object({ + limit: z.number().optional(), + }) + .optional(), + // we'll extract "message" from the returned email item + content: 'message', + // and several keys to use as metadata + metadata: ['from', 'to', 'subject'], + }, + async (query, config) => { + const result = await searchEmails(query.text, { limit: config.limit }); + return result.data.emails; + }, + ); + ``` + + + ```go + // Simple retriever example in Go + // Implementation depends on your specific use case and data source + ``` + + + ```python + from genkit.types import ( + RetrieverRequest, + RetrieverResponse, + Document, + ActionRunContext + ) + + async def my_retriever(request: RetrieverRequest, ctx: ActionRunContext): + """Example of a simple retriever. + + Args: + request: The request to the retriever. + ctx: The context of the retriever. + """ + # Your custom retrieval logic here + return RetrieverResponse(documents=[ + Document.from_text('Hello'), + Document.from_text('World') + ]) + + ai.define_retriever(name='my_retriever', fn=my_retriever) + ``` + + + +### Custom Retrievers + + + + ```ts + import { CommonRetrieverOptionsSchema } from 'genkit/retriever'; + import { z } from 'genkit'; + + export const menuRetriever = devLocalRetrieverRef('menuQA'); + + const advancedMenuRetrieverOptionsSchema = CommonRetrieverOptionsSchema.extend({ + preRerankK: z.number().max(1000), + }); + + const advancedMenuRetriever = ai.defineRetriever( + { + name: `custom/advancedMenuRetriever`, + configSchema: advancedMenuRetrieverOptionsSchema, + }, + async (input, options) => { + const extendedPrompt = await extendPrompt(input); + const docs = await ai.retrieve({ + retriever: menuRetriever, + query: extendedPrompt, + options: { k: options.preRerankK || 10 }, + }); + const rerankedDocs = await rerank(docs); + return rerankedDocs.slice(0, options.k || 3); + }, + ); + ``` + + (`extendPrompt` and `rerank` is something you would have to implement yourself, + not provided by the framework) + + And then you can just swap out your retriever: + + ```ts + const docs = await ai.retrieve({ + retriever: advancedRetriever, + query: input, + options: { preRerankK: 7, k: 3 }, + }); + ``` + + + For example, suppose you have a custom re-ranking function you want to use. The + following example defines a custom retriever that applies your function to the + menu retriever defined earlier: + + ```go + type CustomMenuRetrieverOptions struct { + K int + PreRerankK int + } + + advancedMenuRetriever := genkit.DefineRetriever( + g, "custom", "advancedMenuRetriever", + func(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) { + // Handle options passed using our custom type. + opts, _ := req.Options.(CustomMenuRetrieverOptions) + // Set fields to default values when either the field was undefined + // or when req.Options is not a CustomMenuRetrieverOptions. + if opts.K == 0 { + opts.K = 3 + } + if opts.PreRerankK == 0 { + opts.PreRerankK = 10 + } + + // Call the retriever as in the simple case. + resp, err := ai.Retrieve(ctx, menuPDFRetriever, + ai.WithDocs(req.Query), + ai.WithConfig(localvec.RetrieverOptions{K: opts.PreRerankK}), + ) + if err != nil { + return nil, err + } + + // Re-rank the returned documents using your custom function. + rerankedDocs := rerank(resp.Documents) + resp.Documents = rerankedDocs[:opts.K] + + return resp, nil + }, + ) + ``` + + + ```python + async def advanced_retriever(request: RetrieverRequest, ctx: ActionRunContext): + """Example of an advanced retriever with custom logic.""" + + # First, get initial results from base retriever + initial_docs = await ai.retrieve( + query=request.query, + retriever='my_base_retriever' + ) + + # Apply custom reranking or filtering logic + reranked_docs = custom_rerank_function(initial_docs, request.query) + + # Return top K results + k = getattr(request.options, 'k', 3) + return RetrieverResponse(documents=reranked_docs[:k]) + + ai.define_retriever(name='advanced_retriever', fn=advanced_retriever) + ``` + + Then you can use your custom retriever: + + ```python + docs = await ai.retrieve( + query=Document.from_text(query), + retriever='advanced_retriever' + ) + ``` + + + +## Rerankers and Two-Stage Retrieval + + + + A reranking model — also known as a cross-encoder — is a type of model that, + given a query and document, will output a similarity score. We use this score to + reorder the documents by relevance to our query. Reranker APIs take a list of + documents (for example the output of a retriever) and reorders the documents + based on their relevance to the query. This step can be useful for fine-tuning + the results and ensuring that the most pertinent information is used in the + prompt provided to a generative model. + + #### Reranker Example + + A reranker in Genkit is defined in a similar syntax to retrievers and indexers. + Here is an example using a reranker in Genkit. This flow reranks a set of + documents based on their relevance to the provided query using a predefined + Vertex AI reranker. + + ```ts + const FAKE_DOCUMENT_CONTENT = [ + 'pythagorean theorem', + 'e=mc^2', + 'pi', + 'dinosaurs', + 'quantum mechanics', + 'pizza', + 'harry potter', + ]; + + export const rerankFlow = ai.defineFlow( + { + name: 'rerankFlow', + inputSchema: z.object({ query: z.string() }), + outputSchema: z.array( + z.object({ + text: z.string(), + score: z.number(), + }), + ), + }, + async ({ query }) => { + const documents = FAKE_DOCUMENT_CONTENT.map((text) => ({ content: text })); + + const rerankedDocuments = await ai.rerank({ + reranker: 'vertexai/semantic-ranker-512', + query: { content: query }, + documents, + }); + + return rerankedDocuments.map((doc) => ({ + text: doc.content, + score: doc.metadata.score, + })); + }, + ); + ``` + + This reranker uses the Vertex AI genkit plugin with `semantic-ranker-512` to + score and rank documents. The higher the score, the more relevant the document + is to the query. + + #### Custom Rerankers + + You can also define custom rerankers to suit your specific use case. This is + helpful when you need to rerank documents using your own custom logic or a + custom model. Here's a simple example of defining a custom reranker: + + ```ts + export const customReranker = ai.defineReranker( + { + name: 'custom/reranker', + configSchema: z.object({ + k: z.number().optional(), + }), + }, + async (query, documents, options) => { + // Your custom reranking logic here + const rerankedDocs = documents.map((doc) => { + const score = Math.random(); // Assign random scores for demonstration + return { + ...doc, + metadata: { ...doc.metadata, score }, + }; + }); + + return rerankedDocs.sort((a, b) => b.metadata.score - a.metadata.score).slice(0, options.k || 3); + }, + ); + ``` + + Once defined, this custom reranker can be used just like any other reranker in + your RAG flows, giving you flexibility to implement advanced reranking + strategies. + + + Reranking functionality in Go can be implemented as part of custom retrievers. + You can apply reranking logic within your custom retriever implementations. + + + Reranking functionality in Python can be implemented as part of custom retrievers. + You can apply reranking logic within your custom retriever implementations. + + + +## Next steps + +- Learn about [tool calling](/unified-docs/tool-calling) to give your RAG system access to external APIs and functions +- Explore [multi-agent systems](/docs/multi-agent) for coordinating multiple AI agents with RAG capabilities +- Check out the vector database plugins for production-ready RAG implementations +- See the [evaluation guide](/docs/evaluation) for testing and improving your RAG system's performance diff --git a/src/content/docs/unified-docs/tool-calling.mdx b/src/content/docs/unified-docs/tool-calling.mdx new file mode 100644 index 00000000..22d3cf19 --- /dev/null +++ b/src/content/docs/unified-docs/tool-calling.mdx @@ -0,0 +1,807 @@ +--- +title: Tool calling +description: Learn how to enable LLMs to interact with external applications and data using Genkit's tool calling feature across JavaScript, Go, and Python, covering tool definition, usage, and advanced scenarios. +--- + +import ExampleLink from '@/components/ExampleLink.astro'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +_Tool calling_, also known as _function calling_, is a structured way to give +LLMs the ability to make requests back to the application that called it. You +define the tools you want to make available to the model, and the model will +make tool requests to your app as necessary to fulfill the prompts you give it. + +The use cases of tool calling generally fall into a few themes: + +**Giving an LLM access to information it wasn't trained with** + +- Frequently changing information, such as a stock price or the current + weather. +- Information specific to your app domain, such as product information or user + profiles. + +Note the overlap with [retrieval augmented generation](/unified-docs/rag) (RAG), which is also +a way to let an LLM integrate factual information into its generations. RAG is a +heavier solution that is most suited when you have a large amount of information +or the information that's most relevant to a prompt is ambiguous. On the other +hand, if retrieving the information the LLM needs is a simple function call or +database lookup, tool calling is more appropriate. + +**Introducing a degree of determinism into an LLM workflow** + +- Performing calculations that the LLM cannot reliably complete itself. +- Forcing an LLM to generate verbatim text under certain circumstances, such + as when responding to a question about an app's terms of service. + +**Performing an action when initiated by an LLM** + +- Turning on and off lights in an LLM-powered home assistant +- Reserving table reservations in an LLM-powered restaurant agent + +## Before you begin + +If you want to run the code examples on this page, first complete the steps in +the Getting started guide for your language. All of the examples assume that you +have already set up a project with Genkit dependencies installed. + + + + Complete the [Getting started](/docs/get-started) guide. + + + Complete the [Get started](/go/docs/get-started-go) guide. + + + Complete the [Get started](/python/docs/get-started) guide. + + + +This page discusses one of the advanced features of Genkit model abstraction, so +before you dive too deeply, you should be familiar with the content on the +[Generating content with AI models](/unified-docs/generating-content) page. You should also be familiar +with Genkit's system for defining input and output schemas, which is discussed +on the [Defining AI workflows](/unified-docs/creating-flows) page. + +## Overview of tool calling + + + +At a high level, this is what a typical tool-calling interaction with an LLM +looks like: + +1. The calling application prompts the LLM with a request and also includes in + the prompt a list of tools the LLM can use to generate a response. +2. The LLM either generates a complete response or generates a tool call request + in a specific format. +3. If the caller receives a complete response, the request is fulfilled and the + interaction ends; but if the caller receives a tool call, it performs + whatever logic is appropriate and sends a new request to the LLM containing + the original prompt or some variation of it as well as the result of the tool + call. +4. The LLM handles the new prompt as in Step 2. + +For this to work, several requirements must be met: + +- The model must be trained to make tool requests when it's needed to complete + a prompt. Most of the larger models provided through web APIs, such as + Gemini and Claude, can do this, but smaller and more specialized models + often cannot. Genkit will throw an error if you try to provide tools to a + model that doesn't support it. +- The calling application must provide tool definitions to the model in the + format it expects. +- The calling application must prompt the model to generate tool calling + requests in the format the application expects. + +## Tool calling with Genkit + +Genkit provides a single interface for tool calling with models that support it. +Each model plugin ensures that the last two of the above criteria are met, and +the Genkit instance's `generate()` function automatically carries out the tool +calling loop described earlier. + +### Model support + +Tool calling support depends on the model, the model API, and the Genkit plugin. +Consult the relevant documentation to determine if tool calling is likely to be +supported. In addition: + +- Genkit will throw an error if you try to provide tools to a model that + doesn't support it. +- If the plugin exports model references, the model info will indicate if it supports tool calling. + + + + Check the `info.supports.tools` property on model references: + + ```ts + import { googleAI } from '@genkit-ai/googleai'; + + const model = googleAI.model('gemini-2.5-flash'); + console.log(model.info.supports.tools); // true/false + ``` + + + Check the `ModelInfo.Supports.Tools` property: + + ```go + // Model support information is available through the plugin + // Check plugin documentation for specific model capabilities + ``` + + + Check the `info.supports.tools` property: + + ```python + # Model support information is available through the plugin + # Check plugin documentation for specific model capabilities + ``` + + + +### Defining tools + +Use the appropriate method for your language to define tools: + + + + Use the Genkit instance's `defineTool()` function: + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + const getWeather = ai.defineTool( + { + name: 'getWeather', + description: 'Gets the current weather in a given location', + inputSchema: z.object({ + location: z.string().describe('The location to get the current weather for'), + }), + outputSchema: z.string(), + }, + async (input) => { + // Here, we would typically make an API call or database query. For this + // example, we just return a fixed value. + return `The current weather in ${input.location} is 63°F and sunny.`; + }, + ); + ``` + + The syntax here looks just like the `defineFlow()` syntax; however, `name`, + `description`, and `inputSchema` parameters are required. When writing a tool + definition, take special care with the wording and descriptiveness of these + parameters. They are vital for the LLM to make effective use of the + available tools. + + + Use the `genkit.DefineTool()` function: + + ```go + package main + + import ( + "context" + "fmt" + "log" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + // Define the input structure for the tool + type WeatherInput struct { + Location string `json:"location" jsonschema_description:"Location to get weather for"` + } + + func main() { + ctx := context.Background() + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatalf("Genkit initialization failed: %v", err) + } + + getWeatherTool := genkit.DefineTool( + g, "getWeather", "Gets the current weather in a given location", + func(ctx context.Context, input WeatherInput) (string, error) { + // Here, we would typically make an API call or database query. For this + // example, we just return a fixed value. + log.Printf("Tool 'getWeather' called for location: %s", input.Location) + return fmt.Sprintf("The current weather in %s is 63°F and sunny.", input.Location), nil + }) + } + ``` + + The syntax here looks just like the `genkit.DefineFlow()` syntax; however, you + must write a description. Take special care with the wording and descriptiveness + of the description as it is vital for the LLM to decide to use it appropriately. + + + Use the Genkit instance's `tool()` decorator: + + ```python + from pydantic import BaseModel, Field + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai + + ai = Genkit( + plugins=[GoogleGenai()], + model='googleai/gemini-2.5-flash', + ) + + class WeatherInput(BaseModel): + location: str = Field(description='The location to get the current weather for') + + @ai.tool() + def get_weather(input: WeatherInput) -> str: + """Gets the current weather in a given location""" + # Replace with actual weather fetching logic + return f'The current weather in {input.location} is 63°F and sunny.' + ``` + + The syntax here looks just like the `flow()` syntax; however `description` + parameter is required. When writing a tool definition, take special care + with the wording and descriptiveness of these parameters. They are vital + for the LLM to make effective use of the available tools. + + + +### Using tools + +Include defined tools in your prompts to generate content: + + + + **Using `generate()`:** + + ```ts + const response = await ai.generate({ + prompt: "What is the weather in Baltimore?", + tools: [getWeather], + }); + ``` + + **Using `definePrompt()`:** + + ```ts + const weatherPrompt = ai.definePrompt( + { + name: "weatherPrompt", + tools: [getWeather], + }, + "What is the weather in {{location}}?" + ); + + const response = await weatherPrompt({ location: "Baltimore" }); + ``` + + **Using Prompt files:** + + ```dotprompt + --- + tools: [getWeather] + input: + schema: + location: string + --- + + What is the weather in {{location}}? + ``` + + Then you can execute the prompt in your code as follows: + + ```ts + // assuming prompt file is named weatherPrompt.prompt + const weatherPrompt = ai.prompt("weatherPrompt"); + + const response = await weatherPrompt({ location: "Baltimore" }); + ``` + + **Using Chat:** + + ```ts + const chat = ai.chat({ + system: "Answer questions using the tools you have.", + tools: [getWeather], + }); + + const response = await chat.send("What is the weather in Baltimore?"); + + // Or, specify tools that are message-specific + const response = await chat.send({ + prompt: "What is the weather in Baltimore?", + tools: [getWeather], + }); + ``` + + + **Using `genkit.Generate()`:** + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("What is the weather in San Francisco?"), + ai.WithTools(getWeatherTool), + ) + ``` + + **Using `genkit.DefinePrompt()`:** + + ```go + weatherPrompt, err := genkit.DefinePrompt(g, "weatherPrompt", + ai.WithPrompt("What is the weather in {{location}}?"), + ai.WithTools(getWeatherTool), + ) + if err != nil { + log.Fatal(err) + } + + resp, err := weatherPrompt.Execute(ctx, + ai.WithInput(map[string]any{"location": "San Francisco"}), + ) + ``` + + **Using a `.prompt` file:** + + Create a file named `prompts/weatherPrompt.prompt`: + + ```dotprompt + --- + system: "Answer questions using the tools you have." + tools: [getWeather] + input: + schema: + location: string + --- + + What is the weather in {{location}}? + ``` + + Then execute it in your Go code: + + ```go + // Assuming prompt file named weatherPrompt.prompt exists in ./prompts dir. + weatherPrompt := genkit.LookupPrompt("weatherPrompt") + if weatherPrompt == nil { + log.Fatal("no prompt named 'weatherPrompt' found") + } + + resp, err := weatherPrompt.Execute(ctx, + ai.WithInput(map[string]any{"location": "San Francisco"}), + ) + ``` + + + **Using `generate()`:** + + ```python + result = await ai.generate( + prompt='What is the weather in Baltimore?', + tools=['get_weather'], + ) + ``` + + **Using flows with tools:** + + ```python + @ai.flow() + async def weather_flow(location: str): + result = await ai.generate( + prompt=f'What is the weather in {location}?', + tools=['get_weather'], + ) + return result.text + ``` + + + +Genkit will automatically handle the tool call if the LLM needs to use the tool to answer the prompt. + +### Streaming and Tool Calling + +When combining tool calling with streaming responses, you will receive `toolRequest` and `toolResponse` content parts in the chunks of the stream: + + + + ```ts + const { stream } = ai.generateStream({ + prompt: "What is the weather in Baltimore?", + tools: [getWeather], + }); + + for await (const chunk of stream) { + console.log(chunk); + } + ``` + + This might produce a sequence of chunks similar to: + + ```ts + {index: 0, role: "model", content: [{text: "Okay, I'll check the weather"}]} + {index: 0, role: "model", content: [{text: "for Baltimore."}]} + // toolRequests will be emitted as a single chunk by most models + {index: 0, role: "model", content: [{toolRequest: {name: "getWeather", input: {location: "Baltimore"}}}]} + // when streaming multiple messages, Genkit increments the index and indicates the new role + {index: 1, role: "tool", content: [{toolResponse: {name: "getWeather", output: "Temperature: 68 degrees\nStatus: Cloudy."}}]} + {index: 2, role: "model", content: [{text: "The weather in Baltimore is 68 degrees and cloudy."}]} + ``` + + You can use these chunks to dynamically construct the full generated message sequence. + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("What is the weather in San Francisco?"), + ai.WithTools(getWeatherTool), + ai.WithStreaming(func(ctx context.Context, chunk *ai.ModelResponseChunk) error { + // Handle streaming chunks here + log.Println("Chunk:", chunk.Text()) + return nil + }), + ) + ``` + + + ```python + stream, response = ai.generate_stream( + prompt='What is the weather in Baltimore?', + tools=['get_weather'], + ) + + async for chunk in stream: + print(chunk) + ``` + + + +### Limiting Tool Call Iterations with `maxTurns` + +When working with tools that might trigger multiple sequential calls, you can control resource usage and prevent runaway execution using the `maxTurns` parameter. This sets a hard limit on how many back-and-forth interactions the model can have with your tools in a single generation cycle. + +**Why use maxTurns?** +- **Cost Control**: Prevents unexpected API usage charges from excessive tool calls +- **Performance**: Ensures responses complete within reasonable timeframes +- **Safety**: Guards against infinite loops in complex tool interactions +- **Predictability**: Makes your application behavior more deterministic + +The default value is 5 turns, which works well for most scenarios. Each "turn" represents one complete cycle where the model can make tool calls and receive responses. + + + + **Example: Web Research Agent** + + Consider a research agent that might need to search multiple times to find comprehensive information: + + ```ts + const webSearch = ai.defineTool( + { + name: 'webSearch', + description: 'Search the web for current information', + inputSchema: z.object({ + query: z.string().describe('Search query'), + }), + outputSchema: z.string(), + }, + async (input) => { + // Simulate web search API call + return `Search results for "${input.query}": [relevant information here]`; + }, + ); + + const response = await ai.generate({ + prompt: 'Research the latest developments in quantum computing, including recent breakthroughs, key companies, and future applications.', + tools: [webSearch], + maxTurns: 8, // Allow up to 8 research iterations + }); + ``` + + **Example: Financial Calculator** + + ```ts + const calculator = ai.defineTool( + { + name: 'calculator', + description: 'Perform mathematical calculations', + inputSchema: z.object({ + expression: z.string().describe('Mathematical expression to evaluate'), + }), + outputSchema: z.number(), + }, + async (input) => { + // Safe evaluation of mathematical expressions + return eval(input.expression); // In production, use a safe math parser + }, + ); + + const response = await ai.generate({ + prompt: 'Calculate the total value of my portfolio: 100 shares of AAPL, 50 shares of GOOGL, and 200 shares of MSFT. Also calculate what percentage each holding represents.', + tools: [calculator, stockAnalyzer], + maxTurns: 12, // Multiple stock lookups + calculations needed + }); + ``` + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Research the latest developments in quantum computing"), + ai.WithTools(webSearchTool), + ai.WithMaxTurns(8), // Allow up to 8 research iterations + ) + ``` + + + ```python + result = await ai.generate( + prompt='Research the latest developments in quantum computing', + tools=['web_search'], + max_turns=8, # Allow up to 8 research iterations + ) + ``` + + + +**What happens when maxTurns is reached?** + +When the limit is hit, Genkit stops the tool-calling loop and returns the model's current response, even if it was in the middle of using tools. The model will typically provide a partial answer or explain that it couldn't complete all the requested operations. + +### Dynamically defining tools at runtime + + + + As most things in Genkit tools need to be predefined during your app's + initialization. This is necessary so that you would be able interact with your + tools from the Genkit Dev UI. This is typically the recommended way. However + there are scenarios when the tool must be defined dynamically per user request. + + You can dynamically define tools using `ai.dynamicTool` function. It is very + similar to `ai.defineTool` method, however dynamic tools are not tracked by + Genkit runtime, so cannot be interacted with from Genkit Dev UI and must be + passed to the `ai.generate` call by reference (for regular tools you can also + use a string tool name). + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + ai.defineFlow('weatherFlow', async () => { + const getWeather = ai.dynamicTool( + { + name: 'getWeather', + description: 'Gets the current weather in a given location', + inputSchema: z.object({ + location: z.string().describe('The location to get the current weather for'), + }), + outputSchema: z.string(), + }, + async (input) => { + return `The current weather in ${input.location} is 63°F and sunny.`; + }, + ); + + const { text } = await ai.generate({ + prompt: 'What is the weather in Baltimore?', + tools: [getWeather], + }); + + return text; + }); + ``` + + When defining dynamic tools, to specify input and output schemas you can either + use Zod as shown in the previous example, or you can pass in manually + constructed JSON Schema. + + ```ts + const getWeather = ai.dynamicTool( + { + name: 'getWeather', + description: 'Gets the current weather in a given location', + inputJsonSchema: myInputJsonSchema, + outputJsonSchema: myOutputJsonSchema, + }, + async (input) => { + /* ... */ + }, + ); + ``` + + Dynamic tools don't require the implementation function. If you don't pass in + the function the tool will behave like an [interrupt](/docs/interrupts) and you can + do manual tool call handling: + + ```ts + const getWeather = ai.dynamicTool({ + name: 'getWeather', + description: 'Gets the current weather in a given location', + inputJsonSchema: myInputJsonSchema, + outputJsonSchema: myOutputJsonSchema, + }); + ``` + + + ```go + // Dynamic tool definition in Go + // Check Go documentation for specific implementation details + ``` + + + ```python + # Dynamic tool definition in Python + # Check Python documentation for specific implementation details + ``` + + + +### Pause the tool loop by using interrupts + +By default, Genkit repeatedly calls the LLM until every tool call has been +resolved. You can conditionally pause execution in situations where you want +to, for example: + +- Ask the user a question or display UI. +- Confirm a potentially risky action with the user. +- Request out-of-band approval for an action. + +**Interrupts** are special tools that can halt the loop and return control +to your code so that you can handle more advanced scenarios. Visit the +interrupts guide to learn how to use them. + +### Explicitly handling tool calls + +If you want full control over this tool-calling loop, for example to +apply more complicated logic, you can handle tool calls explicitly: + + + + Set the `returnToolRequests` parameter to `true`. Now it's your responsibility to ensure all of the tool requests are fulfilled: + + ```ts + const getWeather = ai.defineTool( + { + // ... tool definition ... + }, + async ({ location }) => { + // ... tool implementation ... + }, + ); + + const generateOptions: GenerateOptions = { + prompt: "What's the weather like in Baltimore?", + tools: [getWeather], + returnToolRequests: true, + }; + + let llmResponse; + while (true) { + llmResponse = await ai.generate(generateOptions); + const toolRequests = llmResponse.toolRequests; + if (toolRequests.length < 1) { + break; + } + const toolResponses: ToolResponsePart[] = await Promise.all( + toolRequests.map(async (part) => { + switch (part.toolRequest.name) { + case 'getWeather': + return { + toolResponse: { + name: part.toolRequest.name, + ref: part.toolRequest.ref, + output: await getWeather(part.toolRequest.input), + }, + }; + default: + throw Error('Tool not found'); + } + }), + ); + generateOptions.messages = llmResponse.messages; + generateOptions.prompt = toolResponses; + } + ``` + + + Set the `WithReturnToolRequests()` option to `true`. Now it's your responsibility to ensure all of the tool requests are fulfilled: + + ```go + getWeatherTool := genkit.DefineTool( + g, "getWeather", "Gets the current weather in a given location", + func(ctx context.Context, location WeatherInput) (string, error) { + // Tool implementation... + return "sunny", nil + }, + ) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("What is the weather in San Francisco?"), + ai.WithTools(getWeatherTool), + ai.WithReturnToolRequests(true), + ) + if err != nil { + log.Fatal(err) + } + + parts := []*ai.Part{} + for _, req := range resp.ToolRequests() { + tool := genkit.LookupTool(g, req.Name) + if tool == nil { + log.Fatalf("tool %q not found", req.Name) + } + + output, err := tool.RunRaw(ctx, req.Input) + if err != nil { + log.Fatalf("tool %q execution failed: %v", tool.Name(), err) + } + + parts = append(parts, + ai.NewToolResponsePart(&ai.ToolResponse{ + Name: req.Name, + Ref: req.Ref, + Output: output, + })) + } + + resp, err = genkit.Generate(ctx, g, + ai.WithMessages(append(resp.History(), ai.NewMessage(ai.RoleTool, nil, parts...))...), + ) + if err != nil { + log.Fatal(err) + } + ``` + + + Set the `return_tool_requests` parameter to `True`. Now it's your responsibility to ensure all of the tool requests are fulfilled: + + ```python + llm_response = await ai.generate( + prompt='What is the weather in Baltimore?', + tools=['get_weather'], + return_tool_requests=True, + ) + + tool_request_parts = llm_response.tool_requests + + if len(tool_request_parts) == 0: + print(llm_response.text) + else: + for part in tool_request_parts: + await handle_tool(part.name, part.input) + ``` + + + +## Extending Tool Capabilities with MCP + +The [Model Context Protocol (MCP)](/unified-docs/model-context-protocol) provides a powerful way to extend your tool-calling capabilities by connecting to external MCP servers. With MCP, you can: + +- **Access pre-built tools** from the MCP ecosystem without implementing them yourself +- **Connect to external services** like databases, APIs, and file systems +- **Share tools** between different AI applications +- **Build extensible workflows** that leverage community-maintained tools + +MCP tools work seamlessly with Genkit's tool-calling system, allowing you to mix custom tools with external MCP tools in the same generation request. + +## Next steps + +- Learn about [Model Context Protocol (MCP)](/unified-docs/model-context-protocol) to extend your tool capabilities with external servers +- Explore [interrupts](/docs/interrupts) to pause tool execution for user interaction +- See [retrieval-augmented generation (RAG)](/unified-docs/rag) for handling large amounts of contextual information +- Check out [multi-agent systems](/docs/multi-agent) for coordinating multiple AI agents with tools +- Browse the [tool calling example](https://github.com/firebase/genkit/tree/main/js/testapps/tool-calling) for a complete implementation diff --git a/src/content/docs/unified-docs/vector-databases/astra-db.mdx b/src/content/docs/unified-docs/vector-databases/astra-db.mdx new file mode 100644 index 00000000..86c8a365 --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/astra-db.mdx @@ -0,0 +1,727 @@ +--- +title: Astra DB Vector Database +description: Learn how to use DataStax Astra DB with Genkit across JavaScript, Go, and Python for serverless vector storage, semantic search, and RAG applications. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +DataStax Astra DB is a serverless vector database built on Apache Cassandra. It provides scalable vector storage with built-in embedding generation capabilities through Astra DB Vectorize, making it ideal for production AI applications that need reliable, distributed vector search. + +## Installation and Setup + + + + Install the Astra DB plugin: + + ```bash + npm install genkitx-astra-db + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { astraDB } from 'genkitx-astra-db'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [ + googleAI(), + astraDB([ + { + clientParams: { + applicationToken: process.env.ASTRA_DB_APPLICATION_TOKEN, + apiEndpoint: process.env.ASTRA_DB_API_ENDPOINT, + keyspace: 'default_keyspace', + }, + collectionName: 'documents', + embedder: googleAI.embedder('gemini-embedding-001'), + }, + ]), + ], + }); + ``` + + ### Prerequisites + + 1. **DataStax Account**: [Sign up for a free DataStax account](https://astra.datastax.com/signup) + 2. **Astra DB Database**: Create a Serverless Vector database + 3. **Collection**: Create a collection with dimensions matching your embedding model + 4. **Credentials**: Get your Application Token and API Endpoint + + ### Environment Variables + + ```bash + export ASTRA_DB_APPLICATION_TOKEN=your_application_token + export ASTRA_DB_API_ENDPOINT=your_astra_db_endpoint + ``` + + ### Using Astra DB Vectorize + + You can use Astra DB's built-in embedding generation: + + ```ts + const ai = genkit({ + plugins: [ + astraDB([ + { + clientParams: { + applicationToken: process.env.ASTRA_DB_APPLICATION_TOKEN, + apiEndpoint: process.env.ASTRA_DB_API_ENDPOINT, + keyspace: 'default_keyspace', + }, + collectionName: 'documents', + // No embedder needed - Astra DB Vectorize handles embedding generation + }, + ]), + ], + }); + ``` + + + For Go applications, you can use Astra DB through the DataStax Go driver: + + ```bash + go get github.com/datastax/astra-db-go + ``` + + ```go + package main + + import ( + "context" + "os" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/astradb" + "github.com/firebase/genkit/go/plugins/googleai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &googleai.GoogleAI{}, + &astradb.AstraDB{ + ApplicationToken: os.Getenv("ASTRA_DB_APPLICATION_TOKEN"), + APIEndpoint: os.Getenv("ASTRA_DB_API_ENDPOINT"), + Keyspace: "default_keyspace", + Collections: []astradb.CollectionConfig{ + { + Name: "documents", + Embedder: "googleai/gemini-embedding-001", + }, + }, + }, + ), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + For Python applications, install the Astra DB client: + + ```bash + pip install astrapy genkit-plugin-astradb + ``` + + ```python + import os + from genkit.ai import Genkit + from genkit.plugins.astradb import AstraDB + from genkit.plugins.google_genai import GoogleGenAI + + ai = Genkit( + plugins=[ + GoogleGenAI(), + AstraDB( + application_token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"), + api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"), + keyspace="default_keyspace", + collections=[ + { + "name": "documents", + "embedder": "googleai/gemini-embedding-001", + } + ], + ), + ], + ) + ``` + + + +## Basic Usage + +### Document Indexing + + + + Index documents for semantic search: + + ```ts + import { astraDBIndexerRef } from 'genkitx-astra-db'; + import { Document } from 'genkit'; + + // Create indexer reference + const documentsIndexer = astraDBIndexerRef({ + collectionName: 'documents', + }); + + // Prepare documents for indexing + const documents: Document[] = [ + { + content: 'Astra DB is a serverless vector database built on Apache Cassandra.', + metadata: { + title: 'Astra DB Overview', + category: 'database', + source: 'documentation', + score: 95, + }, + }, + { + content: 'Serverless databases provide automatic scaling and management.', + metadata: { + title: 'Serverless Architecture', + category: 'technology', + source: 'blog', + score: 88, + }, + }, + ]; + + // Index documents + await ai.index({ + indexer: documentsIndexer, + documents, + }); + + // Batch indexing for large datasets + const batchSize = 100; + for (let i = 0; i < largeDocumentSet.length; i += batchSize) { + const batch = largeDocumentSet.slice(i, i + batchSize); + await ai.index({ + indexer: documentsIndexer, + documents: batch, + }); + } + ``` + + + Index documents for semantic search: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func indexDocuments(ctx context.Context) error { + documents := []ai.Document{ + { + Content: "Astra DB is a serverless vector database built on Apache Cassandra.", + Metadata: map[string]interface{}{ + "title": "Astra DB Overview", + "category": "database", + "source": "documentation", + "score": 95, + }, + }, + { + Content: "Serverless databases provide automatic scaling and management.", + Metadata: map[string]interface{}{ + "title": "Serverless Architecture", + "category": "technology", + "source": "blog", + "score": 88, + }, + }, + } + + // Index documents + err := genkit.Index(ctx, g, + ai.WithIndexer("astradb/documents"), + ai.WithDocuments(documents), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + return nil + } + + // Batch indexing function + func batchIndexDocuments(ctx context.Context, documents []ai.Document, batchSize int) error { + for i := 0; i < len(documents); i += batchSize { + end := i + batchSize + if end > len(documents) { + end = len(documents) + } + + batch := documents[i:end] + err := genkit.Index(ctx, g, + ai.WithIndexer("astradb/documents"), + ai.WithDocuments(batch), + ) + if err != nil { + return fmt.Errorf("failed to index batch: %w", err) + } + } + return nil + } + ``` + + + Index documents for semantic search: + + ```python + from typing import List, Dict, Any + + # Prepare documents for indexing + documents = [ + { + "content": "Astra DB is a serverless vector database built on Apache Cassandra.", + "metadata": { + "title": "Astra DB Overview", + "category": "database", + "source": "documentation", + "score": 95, + }, + }, + { + "content": "Serverless databases provide automatic scaling and management.", + "metadata": { + "title": "Serverless Architecture", + "category": "technology", + "source": "blog", + "score": 88, + }, + }, + ] + + # Index documents + async def index_documents(docs: List[Dict[str, Any]], collection_name: str = "documents"): + try: + indexer = f"astradb/{collection_name}" + + await ai.index( + indexer=indexer, + documents=docs + ) + + return {"indexed": len(docs), "success": True} + except Exception as error: + print(f"Indexing failed: {error}") + return {"indexed": 0, "success": False} + + # Batch indexing for large datasets + async def batch_index_documents( + docs: List[Dict[str, Any]], + collection_name: str = "documents", + batch_size: int = 100 + ): + total_indexed = 0 + + for i in range(0, len(docs), batch_size): + batch = docs[i:i + batch_size] + + try: + await ai.index( + indexer=f"astradb/{collection_name}", + documents=batch + ) + total_indexed += len(batch) + except Exception as error: + print(f"Batch indexing failed: {error}") + break + + return {"indexed": total_indexed, "success": total_indexed == len(docs)} + ``` + + + +### Document Retrieval + + + + Retrieve relevant documents using semantic search: + + ```ts + import { astraDBRetrieverRef } from 'genkitx-astra-db'; + + // Create retriever reference + const documentsRetriever = astraDBRetrieverRef({ + collectionName: 'documents', + }); + + // Basic retrieval + const query = "What is a serverless database?"; + const docs = await ai.retrieve({ + retriever: documentsRetriever, + query, + options: { + k: 5, // Number of documents to retrieve + }, + }); + + console.log('Retrieved documents:', docs); + + // Advanced retrieval with filtering + type DocumentSchema = { + _id: string; + text: string; + score: number; + category: string; + }; + + const typedRetriever = astraDBRetrieverRef({ + collectionName: 'documents', + }); + + const filteredDocs = await ai.retrieve({ + retriever: typedRetriever, + query, + options: { + k: 3, + filter: { + score: { $gt: 90 }, // Only documents with score > 90 + category: 'database', // Only database-related documents + }, + }, + }); + ``` + + + Retrieve relevant documents using semantic search: + + ```go + // Basic retrieval + func retrieveDocuments(ctx context.Context, query string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("astradb/documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 5, + }), + ) + if err != nil { + return nil, fmt.Errorf("retrieval failed: %w", err) + } + + return docs, nil + } + + // Advanced retrieval with filtering + func advancedRetrieve(ctx context.Context, query string, limit int, filter map[string]interface{}) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("astradb/documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": limit, + "filter": filter, + }), + ) + if err != nil { + return nil, fmt.Errorf("advanced retrieval failed: %w", err) + } + + return docs, nil + } + + // Usage example with filtering + func searchHighQualityDocuments(ctx context.Context, query string) ([]ai.Document, error) { + filter := map[string]interface{}{ + "score": map[string]interface{}{ + "$gt": 90, + }, + "category": "database", + } + + return advancedRetrieve(ctx, query, 3, filter) + } + ``` + + + Retrieve relevant documents using semantic search: + + ```python + from typing import List, Dict, Any, Optional + + # Basic retrieval + async def retrieve_documents(query: str, collection_name: str = "documents", k: int = 5) -> List[Dict[str, Any]]: + try: + retriever = f"astradb/{collection_name}" + docs = await ai.retrieve( + retriever=retriever, + query=query, + options={"k": k} + ) + return docs + except Exception as error: + print(f"Retrieval failed: {error}") + return [] + + # Advanced retrieval with filtering + async def advanced_retrieve( + query: str, + collection_name: str = "documents", + k: int = 5, + filter_criteria: Optional[Dict[str, Any]] = None + ) -> List[Dict[str, Any]]: + try: + retriever = f"astradb/{collection_name}" + + options = {"k": k} + if filter_criteria: + options["filter"] = filter_criteria + + docs = await ai.retrieve( + retriever=retriever, + query=query, + options=options + ) + + return docs + except Exception as error: + print(f"Advanced retrieval failed: {error}") + return [] + + # Usage examples + async def search_high_quality_documents(query: str) -> List[Dict[str, Any]]: + # Search for high-quality database documents + filter_criteria = { + "score": {"$gt": 90}, + "category": "database" + } + + return await advanced_retrieve( + query=query, + k=3, + filter_criteria=filter_criteria + ) + ``` + + + +## Advanced Features + +### Hybrid Search with Filtering + + + + Combine vector similarity with metadata filtering: + + ```ts + // Complex filtering with multiple conditions + const complexRetriever = astraDBRetrieverRef({ + collectionName: 'documents', + }); + + const complexSearch = await ai.retrieve({ + retriever: complexRetriever, + query: "database performance optimization", + options: { + k: 10, + filter: { + $and: [ + { score: { $gte: 85 } }, + { category: { $in: ['database', 'performance'] } }, + { source: { $ne: 'deprecated' } }, + ], + }, + }, + }); + + // Range-based filtering + const recentDocuments = await ai.retrieve({ + retriever: complexRetriever, + query: "latest database features", + options: { + k: 5, + filter: { + score: { $gte: 80, $lte: 100 }, + category: 'database', + }, + }, + }); + + // Text-based filtering + const specificSource = await ai.retrieve({ + retriever: complexRetriever, + query: "vector search capabilities", + options: { + k: 3, + filter: { + source: { $regex: 'official.*docs' }, + category: 'database', + }, + }, + }); + ``` + + + Combine vector similarity with metadata filtering: + + ```go + // Complex filtering function + func performComplexSearch(ctx context.Context, query string) ([]ai.Document, error) { + // Multiple condition filtering + complexFilter := map[string]interface{}{ + "$and": []map[string]interface{}{ + {"score": map[string]interface{}{"$gte": 85}}, + {"category": map[string]interface{}{"$in": []string{"database", "performance"}}}, + {"source": map[string]interface{}{"$ne": "deprecated"}}, + }, + } + + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("astradb/documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 10, + "filter": complexFilter, + }), + ) + if err != nil { + return nil, fmt.Errorf("complex search failed: %w", err) + } + + return docs, nil + } + + // Range-based filtering + func searchByScoreRange(ctx context.Context, query string, minScore, maxScore int) ([]ai.Document, error) { + filter := map[string]interface{}{ + "score": map[string]interface{}{ + "$gte": minScore, + "$lte": maxScore, + }, + "category": "database", + } + + return advancedRetrieve(ctx, query, 5, filter) + } + + // Text pattern filtering + func searchBySourcePattern(ctx context.Context, query, pattern string) ([]ai.Document, error) { + filter := map[string]interface{}{ + "source": map[string]interface{}{ + "$regex": pattern, + }, + "category": "database", + } + + return advancedRetrieve(ctx, query, 3, filter) + } + ``` + + + Combine vector similarity with metadata filtering: + + ```python + # Complex filtering with multiple conditions + async def perform_complex_search(query: str) -> List[Dict[str, Any]]: + complex_filter = { + "$and": [ + {"score": {"$gte": 85}}, + {"category": {"$in": ["database", "performance"]}}, + {"source": {"$ne": "deprecated"}}, + ] + } + + return await advanced_retrieve( + query=query, + k=10, + filter_criteria=complex_filter + ) + + # Range-based filtering + async def search_by_score_range( + query: str, + min_score: int = 80, + max_score: int = 100 + ) -> List[Dict[str, Any]]: + filter_criteria = { + "score": {"$gte": min_score, "$lte": max_score}, + "category": "database" + } + + return await advanced_retrieve( + query=query, + k=5, + filter_criteria=filter_criteria + ) + + # Text pattern filtering + async def search_by_source_pattern(query: str, pattern: str) -> List[Dict[str, Any]]: + filter_criteria = { + "source": {"$regex": pattern}, + "category": "database" + } + + return await advanced_retrieve( + query=query, + k=3, + filter_criteria=filter_criteria + ) + + # Comprehensive search example + async def comprehensive_search_example(): + # Search for high-quality recent database documentation + results = await perform_complex_search("vector database optimization") + + # Search within score range + range_results = await search_by_score_range("serverless architecture", 85, 95) + + # Search official documentation + official_docs = await search_by_source_pattern("database features", "official.*docs") + + return { + "complex_search": results, + "score_range": range_results, + "official_docs": official_docs + } + ``` + + + +## Best Practices + +### Database Configuration + +1. **Collection Design**: Choose appropriate dimensions for your embedding model +2. **Keyspace Organization**: Use keyspaces to organize different data types +3. **Indexing Strategy**: Leverage Astra DB's automatic indexing capabilities +4. **Schema Design**: Structure metadata for effective filtering + +### Performance Optimization + +1. **Batch Operations**: Index documents in batches for better throughput +2. **Connection Pooling**: Reuse connections for multiple operations +3. **Filtering Strategy**: Use metadata filters to reduce search space +4. **Embedding Strategy**: Consider using Astra DB Vectorize for built-in embedding generation + +### Production Deployment + +1. **Security**: Use secure application tokens and rotate them regularly +2. **Monitoring**: Monitor query performance and database metrics +3. **Scaling**: Leverage Astra DB's automatic scaling capabilities +4. **Backup**: Implement backup strategies for critical data + +### Cost Optimization + +1. **Efficient Queries**: Use appropriate k values and filters +2. **Data Lifecycle**: Archive or delete old documents when appropriate +3. **Resource Monitoring**: Monitor usage to optimize costs +4. **Vectorize Usage**: Consider Astra DB Vectorize to reduce external embedding costs + +## Next Steps + +- Learn about [RAG implementation](/unified-docs/rag) to build complete retrieval-augmented generation systems +- Explore [creating flows](/unified-docs/creating-flows) to build structured AI workflows with vector search +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other vector database options for different use cases diff --git a/src/content/docs/unified-docs/vector-databases/chromadb.mdx b/src/content/docs/unified-docs/vector-databases/chromadb.mdx new file mode 100644 index 00000000..4028ffff --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/chromadb.mdx @@ -0,0 +1,572 @@ +--- +title: ChromaDB Vector Database +description: Learn how to use ChromaDB with Genkit across JavaScript, Go, and Python for vector storage, semantic search, and RAG applications. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +ChromaDB is an open-source vector database designed for AI applications. It provides efficient vector storage, similarity search, and metadata filtering capabilities. ChromaDB can run in-memory, as a standalone server, or in client/server mode, making it flexible for both development and production use. + +## Installation and Setup + + + + Install the ChromaDB plugin: + + ```bash + npm install genkitx-chromadb + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { chroma } from 'genkitx-chromadb'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [ + chroma([ + { + collectionName: 'my-documents', + embedder: googleAI.embedder('gemini-embedding-001'), + }, + ]), + ], + }); + ``` + + ### Configuration Options + + ```ts + // Advanced configuration + const ai = genkit({ + plugins: [ + chroma([ + { + collectionName: 'my-documents', + embedder: googleAI.embedder('gemini-embedding-001'), + clientParams: { + path: 'http://localhost:8000', // Custom Chroma server + // auth: { ... }, // Authentication if needed + }, + embedderOptions: { + taskType: 'RETRIEVAL_DOCUMENT', + }, + }, + ]), + ], + }); + ``` + + + For Go applications, you can use ChromaDB through the Go client: + + ```bash + go get github.com/chroma-core/chroma/go + ``` + + ```go + package main + + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/chroma" + "github.com/firebase/genkit/go/plugins/googleai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &googleai.GoogleAI{}, + &chroma.ChromaDB{ + ServerURL: "http://localhost:8000", + Collections: []chroma.CollectionConfig{ + { + Name: "my-documents", + Embedder: "googleai/gemini-embedding-001", + }, + }, + }, + ), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + For Python applications, install the ChromaDB client: + + ```bash + pip install chromadb genkit-plugin-chromadb + ``` + + ```python + from genkit.ai import Genkit + from genkit.plugins.chromadb import ChromaDB + from genkit.plugins.google_genai import GoogleGenAI + + ai = Genkit( + plugins=[ + GoogleGenAI(), + ChromaDB( + server_url="http://localhost:8000", + collections=[ + { + "name": "my-documents", + "embedder": "googleai/gemini-embedding-001", + } + ], + ), + ], + ) + ``` + + + +## Basic Usage + +### Document Indexing + + + + Index documents for semantic search: + + ```ts + import { chromaIndexerRef } from 'genkitx-chromadb'; + import { Document } from 'genkit'; + + // Prepare documents for indexing + const documents: Document[] = [ + { + content: 'ChromaDB is an open-source vector database for AI applications.', + metadata: { + title: 'ChromaDB Overview', + category: 'database', + source: 'documentation', + }, + }, + { + content: 'Vector databases enable semantic search and similarity matching.', + metadata: { + title: 'Vector Search', + category: 'technology', + source: 'blog', + }, + }, + ]; + + // Index documents using the default configured collection + await ai.index({ + indexer: chromaIndexerRef, + documents, + }); + + // Or specify a specific collection + const documentsIndexer = chromaIndexerRef({ + collectionName: 'my-documents', + }); + + await ai.index({ + indexer: documentsIndexer, + documents, + }); + ``` + + + Index documents for semantic search: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func indexDocuments(ctx context.Context) error { + documents := []ai.Document{ + { + Content: "ChromaDB is an open-source vector database for AI applications.", + Metadata: map[string]interface{}{ + "title": "ChromaDB Overview", + "category": "database", + "source": "documentation", + }, + }, + { + Content: "Vector databases enable semantic search and similarity matching.", + Metadata: map[string]interface{}{ + "title": "Vector Search", + "category": "technology", + "source": "blog", + }, + }, + } + + // Index documents + err := genkit.Index(ctx, g, + ai.WithIndexer("chromadb/my-documents"), + ai.WithDocuments(documents), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + return nil + } + ``` + + + Index documents for semantic search: + + ```python + from typing import List, Dict, Any + + # Prepare documents for indexing + documents = [ + { + "content": "ChromaDB is an open-source vector database for AI applications.", + "metadata": { + "title": "ChromaDB Overview", + "category": "database", + "source": "documentation", + }, + }, + { + "content": "Vector databases enable semantic search and similarity matching.", + "metadata": { + "title": "Vector Search", + "category": "technology", + "source": "blog", + }, + }, + ] + + # Index documents + async def index_documents(docs: List[Dict[str, Any]], collection_name: str = None): + try: + indexer = f"chromadb/{collection_name}" if collection_name else "chromadb/my-documents" + + await ai.index( + indexer=indexer, + documents=docs + ) + + return {"indexed": len(docs), "success": True} + except Exception as error: + print(f"Indexing failed: {error}") + return {"indexed": 0, "success": False} + ``` + + + +### Document Retrieval + + + + Retrieve relevant documents using semantic search: + + ```ts + import { chromaRetrieverRef } from 'genkitx-chromadb'; + + // Basic retrieval + const query = "What is a vector database?"; + const docs = await ai.retrieve({ + retriever: chromaRetrieverRef, + query, + }); + + console.log('Retrieved documents:', docs); + + // Advanced retrieval with specific collection and options + const documentsRetriever = chromaRetrieverRef({ + collectionName: 'my-documents', + }); + + const advancedDocs = await ai.retrieve({ + retriever: documentsRetriever, + query, + options: { + k: 5, // Number of documents to retrieve + where: { + category: 'database', // Metadata filtering + }, + }, + }); + ``` + + + Retrieve relevant documents using semantic search: + + ```go + // Basic retrieval + func retrieveDocuments(ctx context.Context, query string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("chromadb/my-documents"), + ai.WithQuery(query), + ) + if err != nil { + return nil, fmt.Errorf("retrieval failed: %w", err) + } + + return docs, nil + } + + // Advanced retrieval with options + func advancedRetrieve(ctx context.Context, query, collectionName string, limit int, filter map[string]interface{}) ([]ai.Document, error) { + retriever := fmt.Sprintf("chromadb/%s", collectionName) + + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever(retriever), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": limit, + "where": filter, + }), + ) + if err != nil { + return nil, fmt.Errorf("advanced retrieval failed: %w", err) + } + + return docs, nil + } + ``` + + + Retrieve relevant documents using semantic search: + + ```python + from typing import List, Dict, Any, Optional + + # Basic retrieval + async def retrieve_documents(query: str, collection_name: str = "my-documents") -> List[Dict[str, Any]]: + try: + retriever = f"chromadb/{collection_name}" + docs = await ai.retrieve( + retriever=retriever, + query=query + ) + return docs + except Exception as error: + print(f"Retrieval failed: {error}") + return [] + + # Advanced retrieval with options + async def advanced_retrieve( + query: str, + collection_name: str = "my-documents", + limit: int = 5, + filter_criteria: Optional[Dict[str, Any]] = None + ) -> List[Dict[str, Any]]: + try: + retriever = f"chromadb/{collection_name}" + + options = {"k": limit} + if filter_criteria: + options["where"] = filter_criteria + + docs = await ai.retrieve( + retriever=retriever, + query=query, + options=options + ) + + return docs + except Exception as error: + print(f"Advanced retrieval failed: {error}") + return [] + ``` + + + +## ChromaDB Server Setup + +### Running ChromaDB Server + + + + Start a ChromaDB server for production use: + + ```bash + # Install ChromaDB server + pip install chromadb + + # Run the server + chroma run --host 0.0.0.0 --port 8000 + ``` + + Connect to the server in your application: + + ```ts + const ai = genkit({ + plugins: [ + chroma([ + { + collectionName: 'my-documents', + embedder: googleAI.embedder('gemini-embedding-001'), + clientParams: { + path: 'http://your-chroma-server:8000', + }, + }, + ]), + ], + }); + ``` + + + Connect to a ChromaDB server: + + ```go + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &chroma.ChromaDB{ + ServerURL: "http://your-chroma-server:8000", + Collections: []chroma.CollectionConfig{ + { + Name: "my-documents", + Embedder: "googleai/gemini-embedding-001", + }, + }, + }, + ), + ) + ``` + + + Connect to a ChromaDB server: + + ```python + ai = Genkit( + plugins=[ + ChromaDB( + server_url="http://your-chroma-server:8000", + collections=[ + { + "name": "my-documents", + "embedder": "googleai/gemini-embedding-001", + } + ], + ), + ], + ) + ``` + + + +## Advanced Features + +### Metadata Filtering + + + + Use metadata filtering for precise retrieval: + + ```ts + // Category-based filtering + const techDocs = await ai.retrieve({ + retriever: chromaRetrieverRef, + query: "database concepts", + options: { + k: 5, + where: { + category: 'technology', + }, + }, + }); + + // Complex filtering with multiple conditions + const complexFilter = await ai.retrieve({ + retriever: chromaRetrieverRef, + query: "AI applications", + options: { + k: 10, + where: { + $and: [ + { category: { $in: ['technology', 'database'] } }, + { source: 'documentation' }, + ], + }, + }, + }); + ``` + + + Use metadata filtering for precise retrieval: + + ```go + // Category-based filtering + func retrieveByCategory(ctx context.Context, query, category string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("chromadb/my-documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 5, + "where": map[string]interface{}{ + "category": category, + }, + }), + ) + if err != nil { + return nil, fmt.Errorf("category filtering failed: %w", err) + } + return docs, nil + } + ``` + + + Use metadata filtering for precise retrieval: + + ```python + # Category-based filtering + async def retrieve_by_category(query: str, category: str, limit: int = 5) -> List[Dict[str, Any]]: + try: + docs = await ai.retrieve( + retriever="chromadb/my-documents", + query=query, + options={ + "k": limit, + "where": {"category": category} + } + ) + return docs + except Exception as error: + print(f"Category filtering failed: {error}") + return [] + ``` + + + +## Best Practices + +### Collection Management + +1. **Use descriptive collection names**: Choose names that reflect the content type +2. **Organize by domain**: Separate collections for different data types or domains +3. **Consider collection size**: Balance between too many small collections and few large ones +4. **Plan for scaling**: Design collection structure for future growth + +### Performance Optimization + +1. **Batch operations**: Index documents in batches for better performance +2. **Optimize embeddings**: Choose appropriate embedding models for your use case +3. **Use metadata filtering**: Combine semantic search with metadata filters +4. **Monitor memory usage**: ChromaDB loads collections into memory + +### Production Deployment + +1. **Use persistent storage**: Configure ChromaDB with persistent storage +2. **Set up monitoring**: Monitor collection sizes and query performance +3. **Backup collections**: Implement regular backup strategies +4. **Scale horizontally**: Consider distributed deployment for large datasets + +## Next Steps + +- Learn about [RAG implementation](/unified-docs/rag) to build complete retrieval-augmented generation systems +- Explore [creating flows](/unified-docs/creating-flows) to build structured AI workflows with vector search +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other vector database options for different use cases diff --git a/src/content/docs/unified-docs/vector-databases/cloud-firestore.mdx b/src/content/docs/unified-docs/vector-databases/cloud-firestore.mdx new file mode 100644 index 00000000..0ffb3537 --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/cloud-firestore.mdx @@ -0,0 +1,981 @@ +--- +title: Cloud Firestore Vector Search +description: Learn how to use Google Cloud Firestore as a vector database for RAG applications across JavaScript, Go, and Python with Genkit. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +Cloud Firestore provides native vector search capabilities, making it an excellent choice for RAG (Retrieval-Augmented Generation) applications. Firestore's vector search feature allows you to store and query high-dimensional vector embeddings alongside your document data, providing fast and scalable similarity search. + +## Key Features + +- **Native vector search**: Built-in support for high-dimensional vector operations +- **Scalable**: Automatically scales with your application needs +- **Real-time**: Supports real-time updates and queries +- **Integrated**: Part of the Firebase/Google Cloud ecosystem +- **Multi-modal**: Store vectors alongside structured document data + +## Installation and Setup + + + + Install the Firebase plugin: + + ```bash + npm install @genkit-ai/firebase firebase-admin + ``` + + Initialize Firebase Admin SDK: + + ```ts + import { initializeApp } from 'firebase-admin/app'; + import { getFirestore } from 'firebase-admin/firestore'; + + const app = initializeApp({ + projectId: 'your-firebase-project-id', + }); + + const firestore = getFirestore(app); + ``` + + + Install the Firebase plugin: + + ```bash + go get github.com/firebase/genkit/go/plugins/firebase + ``` + + Import and configure: + + ```go + import ( + "github.com/firebase/genkit/go/plugins/firebase" + firebasev4 "firebase.google.com/go/v4" + ) + + // Initialize Firebase + firebasePlugin := &firebase.Firebase{ + ProjectId: "your-firebase-project-id", + } + ``` + + + Install the Firebase plugin: + + ```bash + pip install genkit-plugin-firebase google-cloud-firestore + ``` + + Initialize Firestore client: + + ```python + from google.cloud import firestore + from genkit.plugins.firebase.firestore import FirestoreVectorStore + + # Initialize Firestore client + firestore_client = firestore.Client(project="your-firebase-project-id") + ``` + + + +## Prerequisites + +### Firebase Project Setup + +1. **Create a Firebase project** at [Firebase Console](https://console.firebase.google.com/) +2. **Enable Firestore** in your project: + - Go to Firestore Database in the Firebase console + - Click "Create database" + - Choose your security rules and location +3. **Upgrade to Blaze plan** (required for vector search features) + +### Authentication + + + + Set up authentication using one of these methods: + + **Option 1: Service Account Key** + ```bash + export GOOGLE_APPLICATION_CREDENTIALS="path/to/serviceAccountKey.json" + ``` + + **Option 2: Service Account Credentials (Environment Variable)** + ```bash + export GCLOUD_SERVICE_ACCOUNT_CREDS='{"type":"service_account",...}' + ``` + + **Option 3: Application Default Credentials** + ```bash + gcloud auth application-default login + ``` + + If using `GCLOUD_SERVICE_ACCOUNT_CREDS`, configure Firestore explicitly: + + ```ts + import { initializeApp } from 'firebase-admin/app'; + import { getFirestore } from 'firebase-admin/firestore'; + + const app = initializeApp(); + let firestore = getFirestore(app); + + if (process.env.GCLOUD_SERVICE_ACCOUNT_CREDS) { + const serviceAccountCreds = JSON.parse(process.env.GCLOUD_SERVICE_ACCOUNT_CREDS); + const authOptions = { credentials: serviceAccountCreds }; + firestore.settings(authOptions); + } + ``` + + + **Local Development:** + ```bash + firebase login + firebase use your-project-id + ``` + + **Production:** + ```bash + export GOOGLE_APPLICATION_CREDENTIALS="path/to/serviceAccountKey.json" + ``` + + **Custom Firebase App:** + ```go + import "google.golang.org/api/option" + + app, err := firebasev4.NewApp(ctx, &firebasev4.Config{ + ProjectID: "your-project-id", + }, option.WithCredentialsFile("path/to/serviceAccountKey.json")) + + firebasePlugin := &firebase.Firebase{ + App: app, + } + ``` + + + **Local Development:** + ```bash + gcloud auth application-default login + gcloud config set project your-project-id + ``` + + **Production:** + ```bash + export GOOGLE_APPLICATION_CREDENTIALS="path/to/serviceAccountKey.json" + ``` + + **Custom Configuration:** + ```python + from google.cloud import firestore + from google.oauth2 import service_account + + credentials = service_account.Credentials.from_service_account_file( + "path/to/serviceAccountKey.json" + ) + firestore_client = firestore.Client( + project="your-project-id", + credentials=credentials + ) + ``` + + + +## Basic Usage + +### Defining a Firestore Retriever + + + + ```ts + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import { defineFirestoreRetriever } from '@genkit-ai/firebase'; + import { initializeApp } from 'firebase-admin/app'; + import { getFirestore } from 'firebase-admin/firestore'; + + const ai = genkit({ + plugins: [googleAI()], + }); + + const app = initializeApp(); + const firestore = getFirestore(app); + + const retriever = defineFirestoreRetriever(ai, { + name: 'documentRetriever', + firestore, + collection: 'documents', + contentField: 'text', + vectorField: 'embedding', + embedder: googleAI.embedder('text-embedding-004'), + distanceMeasure: 'COSINE', // Options: 'COSINE', 'EUCLIDEAN', 'DOT_PRODUCT' + }); + ``` + + + ```go + import ( + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/firebase" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + + // Initialize plugins + firebasePlugin := &firebase.Firebase{ + ProjectId: "your-firebase-project-id", + } + + googleAIPlugin := &googlegenai.GoogleAI{ + APIKey: "your-api-key", + } + + g, err := genkit.Init(ctx, genkit.WithPlugins(firebasePlugin, googleAIPlugin)) + if err != nil { + log.Fatal(err) + } + + // Define retriever + retriever, err := firebase.DefineRetriever(ctx, g, firebase.RetrieverOptions{ + Name: "documentRetriever", + Collection: "documents", + VectorField: "embedding", + EmbedderName: "googleai/text-embedding-004", + TopK: 10, + }) + if err != nil { + log.Fatal(err) + } + } + ``` + + + ```python + from genkit.ai import Genkit + from genkit.plugins.firebase.firestore import FirestoreVectorStore + from genkit.plugins.google_genai import GoogleGenAI + from google.cloud import firestore + + # Initialize Firestore client + firestore_client = firestore.Client() + + ai = Genkit( + plugins=[ + GoogleGenAI(), + FirestoreVectorStore( + name='documentRetriever', + collection='documents', + vector_field='embedding', + content_field='text', + embedder='googleai/text-embedding-004', + firestore_client=firestore_client, + ), + ] + ) + ``` + + + +### Retrieving Documents + + + + ```ts + // Basic retrieval + const docs = await ai.retrieve({ + retriever, + query: 'What is machine learning?', + options: { + limit: 5, + }, + }); + + // Retrieval with filters + const filteredDocs = await ai.retrieve({ + retriever, + query: 'artificial intelligence concepts', + options: { + limit: 10, + where: { + category: 'technology', + status: 'published' + }, + collection: 'alternativeCollection', // Override default collection + }, + }); + + console.log('Retrieved documents:', docs); + ``` + + + ```go + // Basic retrieval + results, err := ai.Retrieve(ctx, retriever, ai.WithDocs("What is machine learning?")) + if err != nil { + log.Fatal(err) + } + + // Use retrieved documents in generation + var contextDocs []string + for _, doc := range results.Documents { + contextDocs = append(contextDocs, doc.Content[0].Text) + } + + context := strings.Join(contextDocs, "\n\n") + resp, err := genkit.Generate(ctx, g, + ai.WithModel(googleAIPlugin.Model(g, "gemini-1.5-flash")), + ai.WithPrompt(fmt.Sprintf("Context: %s\n\nQuestion: %s", + context, "What is machine learning?")), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("Answer: %s\n", resp.Text()) + ``` + + + ```python + from genkit.ai import Document + + async def retrieve_documents(): + # Create query document + query_doc = Document.from_text("What is machine learning?") + + # Retrieve documents + results = await ai.retrieve( + query=query_doc, + retriever='documentRetriever', + ) + + return results + + # Use in RAG workflow + async def rag_query(question: str): + query_doc = Document.from_text(question) + + # Retrieve relevant documents + retrieved_docs = await ai.retrieve( + query=query_doc, + retriever='documentRetriever', + ) + + # Prepare context + context = "\n\n".join([doc.content[0].text for doc in retrieved_docs]) + + # Generate answer + response = await ai.generate( + model="googleai/gemini-1.5-flash", + prompt=f"Context: {context}\n\nQuestion: {question}\n\nAnswer:", + ) + + return response.text + + # Example usage + # import asyncio + # answer = asyncio.run(rag_query("What is machine learning?")) + # print(answer) + ``` + + + +## Data Indexing + +### Document Structure + +Your Firestore documents should follow this structure: + +```json +{ + "text": "Your document content here...", + "embedding": [0.1, -0.2, 0.3, ...], + "metadata": { + "title": "Document Title", + "category": "Technology", + "author": "Author Name", + "timestamp": "2024-01-15T10:30:00Z" + } +} +``` + +### Populating the Index + + + + ```ts + import { chunk } from 'llm-chunk'; + import { FieldValue } from 'firebase-admin/firestore'; + import pdf from 'pdf-parse'; + import { readFile } from 'fs/promises'; + + const indexConfig = { + collection: 'documents', + contentField: 'text', + vectorField: 'embedding', + embedder: googleAI.embedder('text-embedding-004'), + }; + + export async function indexDocuments(filePath: string) { + // Extract text from PDF + const pdfFile = await readFile(filePath); + const data = await pdf(pdfFile); + const pdfText = data.text; + + // Chunk the text + const chunks = await chunk(pdfText); + + // Index each chunk + for (const text of chunks) { + const embedding = (await ai.embed({ + embedder: indexConfig.embedder, + content: text, + }))[0].embedding; + + await firestore.collection(indexConfig.collection).add({ + [indexConfig.vectorField]: FieldValue.vector(embedding), + [indexConfig.contentField]: text, + metadata: { + source: filePath, + timestamp: new Date().toISOString(), + }, + }); + } + } + + // Batch indexing for better performance + export async function batchIndexDocuments(documents: string[]) { + const batch = firestore.batch(); + + for (const text of documents) { + const embedding = (await ai.embed({ + embedder: indexConfig.embedder, + content: text, + }))[0].embedding; + + const docRef = firestore.collection(indexConfig.collection).doc(); + batch.set(docRef, { + [indexConfig.vectorField]: FieldValue.vector(embedding), + [indexConfig.contentField]: text, + metadata: { + timestamp: new Date().toISOString(), + }, + }); + } + + await batch.commit(); + } + ``` + + + ```go + import ( + "context" + "fmt" + "log" + "time" + "github.com/firebase/genkit/go/ai" + firebasev4 "firebase.google.com/go/v4" + ) + + func indexDocuments(ctx context.Context, g *genkit.Genkit, documents []string) error { + // Get Firestore client + app, err := firebasev4.NewApp(ctx, &firebasev4.Config{ + ProjectID: "your-project-id", + }) + if err != nil { + return err + } + + client, err := app.Firestore(ctx) + if err != nil { + return err + } + defer client.Close() + + // Get embedder + embedder := googleAIPlugin.Embedder(g, "text-embedding-004") + + for i, text := range documents { + // Generate embedding + embeddingResp, err := ai.Embed(ctx, embedder, ai.WithDocs(text)) + if err != nil { + return fmt.Errorf("failed to generate embedding: %w", err) + } + + // Store in Firestore + docData := map[string]interface{}{ + "text": text, + "embedding": embeddingResp.Embeddings[0].Embedding, + "metadata": map[string]interface{}{ + "index": i, + "timestamp": time.Now().Format(time.RFC3339), + }, + } + + _, err = client.Collection("documents").Doc(fmt.Sprintf("doc-%d", i)).Set(ctx, docData) + if err != nil { + return fmt.Errorf("failed to store document: %w", err) + } + } + + return nil + } + + // Batch indexing + func batchIndexDocuments(ctx context.Context, g *genkit.Genkit, documents []string) error { + app, err := firebasev4.NewApp(ctx, &firebasev4.Config{ + ProjectID: "your-project-id", + }) + if err != nil { + return err + } + + client, err := app.Firestore(ctx) + if err != nil { + return err + } + defer client.Close() + + batch := client.Batch() + embedder := googleAIPlugin.Embedder(g, "text-embedding-004") + + for i, text := range documents { + embeddingResp, err := ai.Embed(ctx, embedder, ai.WithDocs(text)) + if err != nil { + return err + } + + docRef := client.Collection("documents").Doc(fmt.Sprintf("doc-%d", i)) + batch.Set(docRef, map[string]interface{}{ + "text": text, + "embedding": embeddingResp.Embeddings[0].Embedding, + "metadata": map[string]interface{}{ + "index": i, + "timestamp": time.Now().Format(time.RFC3339), + }, + }) + } + + _, err = batch.Commit(ctx) + return err + } + ``` + + + ```python + from genkit.ai import Document + from genkit.types import TextPart + from google.cloud import firestore + import asyncio + + async def index_documents(ai: Genkit, documents: list[str], collection_name: str): + """Index documents in Firestore with embeddings.""" + + # Prepare documents for embedding + genkit_documents = [Document(content=[TextPart(text=doc)]) for doc in documents] + + # Generate embeddings + embed_response = await ai.embed( + embedder='googleai/text-embedding-004', + content=genkit_documents + ) + embeddings = [emb.embedding for emb in embed_response.embeddings] + + # Get Firestore client + firestore_client = firestore.Client() + + # Index each document + for i, document_text in enumerate(documents): + doc_id = f'doc-{i + 1}' + embedding = embeddings[i] + + doc_ref = firestore_client.collection(collection_name).document(doc_id) + doc_ref.set({ + 'text': document_text, + 'embedding': embedding, + 'metadata': { + 'index': i, + 'timestamp': firestore.SERVER_TIMESTAMP, + }, + }) + print(f"Indexed document {doc_id}") + + # Batch indexing for better performance + async def batch_index_documents(ai: Genkit, documents: list[str], collection_name: str): + """Batch index documents for better performance.""" + + genkit_documents = [Document(content=[TextPart(text=doc)]) for doc in documents] + embed_response = await ai.embed( + embedder='googleai/text-embedding-004', + content=genkit_documents + ) + embeddings = [emb.embedding for emb in embed_response.embeddings] + + firestore_client = firestore.Client() + batch = firestore_client.batch() + + for i, document_text in enumerate(documents): + doc_ref = firestore_client.collection(collection_name).document(f'doc-{i + 1}') + batch.set(doc_ref, { + 'text': document_text, + 'embedding': embeddings[i], + 'metadata': { + 'index': i, + 'timestamp': firestore.SERVER_TIMESTAMP, + }, + }) + + # Commit batch + batch.commit() + print(f"Batch indexed {len(documents)} documents") + + # Example usage + # documents = [ + # "Machine learning is a subset of artificial intelligence...", + # "Deep learning uses neural networks with multiple layers...", + # "Natural language processing enables computers to understand text...", + # ] + # asyncio.run(index_documents(ai, documents, 'documents')) + ``` + + + +## Creating Vector Indexes + +Firestore requires vector indexes for efficient similarity search. Create the index using the `gcloud` CLI: + + + + ```bash + # For text-embedding-004 (768 dimensions) + gcloud alpha firestore indexes composite create \ + --project=your-firebase-project-id \ + --collection-group=documents \ + --query-scope=COLLECTION \ + --field-config=vector-config='{"dimension":"768","flat": "{}"}',field-path=embedding + + # For other embedding models, adjust the dimension: + # text-embedding-3-small: 1536 dimensions + # text-embedding-3-large: 3072 dimensions + ``` + + You can also let Firestore suggest the command by making a query first: + + ```ts + try { + const docs = await ai.retrieve({ + retriever, + query: 'test query', + }); + } catch (error) { + // Firestore will throw an error with the exact command needed + console.error('Index required:', error.message); + } + ``` + + + ```bash + # Create vector index for your collection + gcloud alpha firestore indexes composite create \ + --project=your-firebase-project-id \ + --collection-group=documents \ + --query-scope=COLLECTION \ + --field-config=vector-config='{"dimension":"768","flat": "{}"}',field-path=embedding + ``` + + Check index creation status: + + ```bash + gcloud firestore indexes composite list --project=your-firebase-project-id + ``` + + + ```bash + # Create the vector index + gcloud firestore indexes composite create \ + --project=your-firebase-project-id \ + --collection-group=documents \ + --query-scope=COLLECTION \ + --field-config=vector-config='{"dimension":"768","flat": "{}"}',field-path=embedding + ``` + + Common embedding dimensions: + - **text-embedding-004**: 768 dimensions + - **text-embedding-3-small**: 1536 dimensions + - **text-embedding-3-large**: 3072 dimensions + + You can also trigger index creation by making a query: + + ```python + try: + query_doc = Document.from_text("test query") + results = await ai.retrieve( + query=query_doc, + retriever='documentRetriever', + ) + except Exception as error: + # Error message will contain the exact gcloud command needed + print(f"Index required: {error}") + ``` + + + +## Advanced Configuration + +### Retrieval Options + + + + ```ts + const retriever = defineFirestoreRetriever(ai, { + name: 'advancedRetriever', + firestore, + collection: 'documents', + contentField: 'text', + vectorField: 'embedding', + embedder: googleAI.embedder('text-embedding-004'), + distanceMeasure: 'COSINE', // 'COSINE', 'EUCLIDEAN', 'DOT_PRODUCT' + }); + + // Advanced retrieval with options + const docs = await ai.retrieve({ + retriever, + query: 'machine learning algorithms', + options: { + limit: 10, // Maximum number of results + where: { // Filter conditions + category: 'technology', + status: 'published', + 'metadata.author': 'John Doe' + }, + collection: 'tech_docs', // Override default collection + }, + }); + ``` + + + ```go + // Advanced retriever configuration + retriever, err := firebase.DefineRetriever(ctx, g, firebase.RetrieverOptions{ + Name: "advancedRetriever", + Collection: "documents", + VectorField: "embedding", + EmbedderName: "googleai/text-embedding-004", + TopK: 10, + // Additional configuration options + }) + + // Use with filtering (implementation depends on Firebase Go SDK capabilities) + results, err := ai.Retrieve(ctx, retriever, ai.WithDocs("machine learning")) + if err != nil { + log.Fatal(err) + } + + // Process results + for _, doc := range results.Documents { + fmt.Printf("Document: %s\n", doc.Content[0].Text) + } + ``` + + + ```python + # Advanced configuration + ai = Genkit( + plugins=[ + GoogleGenAI(), + FirestoreVectorStore( + name='advancedRetriever', + collection='documents', + vector_field='embedding', + content_field='text', + embedder='googleai/text-embedding-004', + firestore_client=firestore_client, + # Additional configuration options can be added here + ), + ] + ) + + # Advanced retrieval with custom logic + async def advanced_retrieve(query: str, filters: dict = None): + query_doc = Document.from_text(query) + + # Basic retrieval + results = await ai.retrieve( + query=query_doc, + retriever='advancedRetriever', + ) + + # Apply additional filtering if needed + if filters: + # Custom filtering logic here + pass + + return results + ``` + + + +### Multiple Collections + + + + ```ts + // Define multiple retrievers for different collections + const techRetriever = defineFirestoreRetriever(ai, { + name: 'techDocuments', + firestore, + collection: 'tech_docs', + contentField: 'content', + vectorField: 'embedding', + embedder: googleAI.embedder('text-embedding-004'), + }); + + const generalRetriever = defineFirestoreRetriever(ai, { + name: 'generalDocuments', + firestore, + collection: 'general_docs', + contentField: 'text', + vectorField: 'embedding', + embedder: googleAI.embedder('text-embedding-004'), + }); + + // Use different retrievers based on query type + async function smartRetrieve(query: string, domain: string) { + const retriever = domain === 'tech' ? techRetriever : generalRetriever; + + return await ai.retrieve({ + retriever, + query, + options: { limit: 5 }, + }); + } + ``` + + + ```go + // Define multiple retrievers + techRetriever, err := firebase.DefineRetriever(ctx, g, firebase.RetrieverOptions{ + Name: "techDocuments", + Collection: "tech_docs", + VectorField: "embedding", + EmbedderName: "googleai/text-embedding-004", + TopK: 5, + }) + + generalRetriever, err := firebase.DefineRetriever(ctx, g, firebase.RetrieverOptions{ + Name: "generalDocuments", + Collection: "general_docs", + VectorField: "embedding", + EmbedderName: "googleai/text-embedding-004", + TopK: 10, + }) + + // Smart retrieval function + func smartRetrieve(ctx context.Context, query, domain string) (*ai.RetrieveResponse, error) { + var retriever ai.Retriever + + switch domain { + case "tech": + retriever = techRetriever + default: + retriever = generalRetriever + } + + return ai.Retrieve(ctx, retriever, ai.WithDocs(query)) + } + ``` + + + ```python + # Define multiple retrievers + ai = Genkit( + plugins=[ + GoogleGenAI(), + FirestoreVectorStore( + name='techDocuments', + collection='tech_docs', + vector_field='embedding', + content_field='content', + embedder='googleai/text-embedding-004', + firestore_client=firestore_client, + ), + FirestoreVectorStore( + name='generalDocuments', + collection='general_docs', + vector_field='embedding', + content_field='text', + embedder='googleai/text-embedding-004', + firestore_client=firestore_client, + ), + ] + ) + + async def smart_retrieve(query: str, domain: str = 'general'): + """Retrieve from different collections based on domain.""" + retriever_name = 'techDocuments' if domain == 'tech' else 'generalDocuments' + + query_doc = Document.from_text(query) + return await ai.retrieve( + query=query_doc, + retriever=retriever_name, + ) + ``` + + + +## Best Practices + +### Performance Optimization + +1. **Batch Operations**: Use batch writes when indexing multiple documents +2. **Appropriate Chunking**: Split large documents into optimal chunk sizes (500-1000 tokens) +3. **Index Management**: Create indexes before querying to avoid errors +4. **Caching**: Implement caching for frequently accessed documents + +### Security + +1. **Firestore Rules**: Configure proper security rules for your collections: + ```javascript + // Example Firestore security rules + rules_version = '2'; + service cloud.firestore { + match /databases/{database}/documents { + match /documents/{document} { + allow read, write: if request.auth != null; + } + } + } + ``` + +2. **API Key Management**: Never expose API keys in client-side code +3. **Authentication**: Implement proper user authentication for sensitive data + +### Cost Management + +1. **Document Size**: Keep documents reasonably sized to minimize read costs +2. **Query Optimization**: Design efficient queries to reduce operation costs +3. **Storage Management**: Regularly clean up unused documents and embeddings +4. **Index Strategy**: Only create necessary indexes to minimize storage costs + +## Troubleshooting + +### Common Issues + +1. **Index Not Found Error**: + ```bash + # Create the required index + gcloud alpha firestore indexes composite create \ + --project=your-project-id \ + --collection-group=your-collection \ + --query-scope=COLLECTION \ + --field-config=vector-config='{"dimension":"768","flat": "{}"}',field-path=embedding + ``` + +2. **Authentication Errors**: + - Ensure `GOOGLE_APPLICATION_CREDENTIALS` is set correctly + - Verify Firebase project permissions + - Check that the service account has Firestore access + +3. **Dimension Mismatch**: + - Ensure diff --git a/src/content/docs/unified-docs/vector-databases/cloud-sql-postgresql.mdx b/src/content/docs/unified-docs/vector-databases/cloud-sql-postgresql.mdx new file mode 100644 index 00000000..c97eb0ee --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/cloud-sql-postgresql.mdx @@ -0,0 +1,914 @@ +--- +title: Cloud SQL for PostgreSQL Vector Database +description: Learn how to use Google Cloud SQL for PostgreSQL with pgvector extension and Genkit across JavaScript, Go, and Python for managed vector storage and semantic search. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully managed PostgreSQL database with vector search capabilities. It combines the reliability and scalability of Google Cloud with the power of PostgreSQL and pgvector, making it ideal for production AI applications that need managed vector storage with enterprise-grade features. + +## Installation and Setup + + + + Install the Cloud SQL PostgreSQL plugin: + + ```bash + npm install genkitx-cloud-sql-pg + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { postgres, PostgresEngine } from 'genkitx-cloud-sql-pg'; + import { vertexAI } from '@genkit-ai/vertexai'; + + // Create PostgresEngine instance + const engine = await PostgresEngine.fromInstance( + 'my-project', + 'us-central1', + 'my-instance', + 'my-database' + ); + + // Initialize vector store table + await engine.initVectorstoreTable('documents', 768, { + schemaName: 'public', + contentColumn: 'content', + embeddingColumn: 'embedding', + idColumn: 'id', + metadataColumns: [ + { name: 'title', dataType: 'TEXT' }, + { name: 'category', dataType: 'TEXT' }, + { name: 'source', dataType: 'TEXT' }, + ], + metadataJsonColumn: 'metadata', + storeMetadata: true, + overwriteExisting: false, + }); + + const ai = genkit({ + plugins: [ + vertexAI(), + postgres([ + { + tableName: 'documents', + engine: engine, + embedder: vertexAI.embedder('gemini-embedding-001'), + schemaName: 'public', + contentColumn: 'content', + embeddingColumn: 'embedding', + idColumn: 'id', + metadataColumns: ['title', 'category', 'source'], + metadataJsonColumn: 'metadata', + }, + ]), + ], + }); + ``` + + ### Prerequisites + + 1. **Google Cloud Project**: Set up a Google Cloud project + 2. **Cloud SQL Instance**: Create a PostgreSQL instance with pgvector extension + 3. **Authentication**: Configure Google Cloud authentication + 4. **Network Access**: Configure VPC or authorized networks + + ### Cloud SQL Instance Setup + + ```bash + # Create Cloud SQL PostgreSQL instance + gcloud sql instances create my-instance \ + --database-version=POSTGRES_15 \ + --tier=db-standard-2 \ + --region=us-central1 \ + --storage-type=SSD \ + --storage-size=100GB \ + --database-flags=shared_preload_libraries=vector + + # Create database + gcloud sql databases create my-database --instance=my-instance + + # Enable pgvector extension + gcloud sql connect my-instance --user=postgres --database=my-database + # Then run: CREATE EXTENSION IF NOT EXISTS vector; + ``` + + + For Go applications, you can use Cloud SQL through the Google Cloud SQL Go connector: + + ```bash + go get cloud.google.com/go/cloudsqlconn + go get github.com/lib/pq + go get github.com/pgvector/pgvector-go + ``` + + ```go + package main + + import ( + "context" + "database/sql" + "net" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/cloudsql" + "github.com/firebase/genkit/go/plugins/vertexai" + "cloud.google.com/go/cloudsqlconn" + ) + + func main() { + ctx := context.Background() + + // Create Cloud SQL connector + d, err := cloudsqlconn.NewDialer(ctx) + if err != nil { + log.Fatal(err) + } + defer d.Close() + + // Configure database connection + dsn := "user=postgres dbname=my-database sslmode=disable" + config, err := pq.ParseURL(dsn) + if err != nil { + log.Fatal(err) + } + + // Connect to Cloud SQL + db, err := sql.Open("postgres", config) + if err != nil { + log.Fatal(err) + } + defer db.Close() + + // Initialize Genkit + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &vertexai.VertexAI{}, + &cloudsql.CloudSQL{ + Database: db, + Tables: []cloudsql.TableConfig{ + { + Name: "documents", + Embedder: "vertexai/gemini-embedding-001", + Schema: cloudsql.TableSchema{ + ContentColumn: "content", + EmbeddingColumn: "embedding", + IDColumn: "id", + MetadataColumns: []string{"title", "category", "source"}, + }, + }, + }, + }, + ), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + For Python applications, install the Cloud SQL connector: + + ```bash + pip install cloud-sql-python-connector psycopg2-binary pgvector + ``` + + ```python + import os + from google.cloud.sql.connector import Connector + import psycopg2 + from pgvector.psycopg2 import register_vector + from genkit.ai import Genkit + from genkit.plugins.cloudsql import CloudSQL + from genkit.plugins.vertexai import VertexAI + + # Initialize Cloud SQL connector + def create_connection(): + connector = Connector() + + def getconn(): + conn = connector.connect( + "my-project:us-central1:my-instance", + "pg8000", + user="postgres", + password=os.getenv("DB_PASSWORD"), + db="my-database" + ) + register_vector(conn) + return conn + + return getconn + + # Initialize Genkit + ai = Genkit( + plugins=[ + VertexAI(), + CloudSQL( + connection_factory=create_connection(), + tables=[ + { + "name": "documents", + "embedder": "vertexai/gemini-embedding-001", + "schema": { + "content_column": "content", + "embedding_column": "embedding", + "id_column": "id", + "metadata_columns": ["title", "category", "source"], + }, + } + ], + ), + ], + ) + ``` + + + +## Basic Usage + +### Document Indexing + + + + Index documents with custom metadata handling: + + ```ts + import { postgresIndexerRef } from 'genkitx-cloud-sql-pg'; + import { Document } from 'genkit'; + + // Create indexer reference + const documentsIndexer = postgresIndexerRef({ + tableName: 'documents', + idColumn: 'id', + metadataColumns: ['title', 'category', 'source'], + }); + + // Prepare documents for indexing + const documents: Document[] = [ + { + content: 'Cloud SQL for PostgreSQL provides managed database services with vector capabilities.', + metadata: { + id: 'doc-1', + title: 'Cloud SQL Overview', + category: 'database', + source: 'documentation', + tags: ['cloud', 'sql', 'postgresql'], + }, + }, + { + content: 'Managed databases reduce operational overhead and provide automatic scaling.', + metadata: { + id: 'doc-2', + title: 'Managed Database Benefits', + category: 'technology', + source: 'blog', + tags: ['managed', 'scaling', 'operations'], + }, + }, + ]; + + // Index documents + await ai.index({ + indexer: documentsIndexer, + documents, + options: { + batchSize: 100, // Process documents in batches + }, + }); + + // Batch indexing for large datasets + const batchSize = 50; + for (let i = 0; i < largeDocumentSet.length; i += batchSize) { + const batch = largeDocumentSet.slice(i, i + batchSize); + await ai.index({ + indexer: documentsIndexer, + documents: batch, + options: { batchSize }, + }); + } + ``` + + + Index documents with custom metadata handling: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func indexDocuments(ctx context.Context) error { + documents := []ai.Document{ + { + Content: "Cloud SQL for PostgreSQL provides managed database services with vector capabilities.", + Metadata: map[string]interface{}{ + "id": "doc-1", + "title": "Cloud SQL Overview", + "category": "database", + "source": "documentation", + "tags": []string{"cloud", "sql", "postgresql"}, + }, + }, + { + Content: "Managed databases reduce operational overhead and provide automatic scaling.", + Metadata: map[string]interface{}{ + "id": "doc-2", + "title": "Managed Database Benefits", + "category": "technology", + "source": "blog", + "tags": []string{"managed", "scaling", "operations"}, + }, + }, + } + + // Index documents + err := genkit.Index(ctx, g, + ai.WithIndexer("cloudsql/documents"), + ai.WithDocuments(documents), + ai.WithOptions(map[string]interface{}{ + "batchSize": 100, + }), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + return nil + } + + // Batch indexing function + func batchIndexDocuments(ctx context.Context, documents []ai.Document, batchSize int) error { + for i := 0; i < len(documents); i += batchSize { + end := i + batchSize + if end > len(documents) { + end = len(documents) + } + + batch := documents[i:end] + err := genkit.Index(ctx, g, + ai.WithIndexer("cloudsql/documents"), + ai.WithDocuments(batch), + ai.WithOptions(map[string]interface{}{ + "batchSize": batchSize, + }), + ) + if err != nil { + return fmt.Errorf("failed to index batch: %w", err) + } + } + return nil + } + ``` + + + Index documents with custom metadata handling: + + ```python + from typing import List, Dict, Any + + # Prepare documents for indexing + documents = [ + { + "content": "Cloud SQL for PostgreSQL provides managed database services with vector capabilities.", + "metadata": { + "id": "doc-1", + "title": "Cloud SQL Overview", + "category": "database", + "source": "documentation", + "tags": ["cloud", "sql", "postgresql"], + }, + }, + { + "content": "Managed databases reduce operational overhead and provide automatic scaling.", + "metadata": { + "id": "doc-2", + "title": "Managed Database Benefits", + "category": "technology", + "source": "blog", + "tags": ["managed", "scaling", "operations"], + }, + }, + ] + + # Index documents + async def index_documents(docs: List[Dict[str, Any]], table_name: str = "documents"): + try: + indexer = f"cloudsql/{table_name}" + + await ai.index( + indexer=indexer, + documents=docs, + options={"batch_size": 100} + ) + + return {"indexed": len(docs), "success": True} + except Exception as error: + print(f"Indexing failed: {error}") + return {"indexed": 0, "success": False} + + # Batch indexing for large datasets + async def batch_index_documents( + docs: List[Dict[str, Any]], + table_name: str = "documents", + batch_size: int = 50 + ): + total_indexed = 0 + + for i in range(0, len(docs), batch_size): + batch = docs[i:i + batch_size] + + try: + await ai.index( + indexer=f"cloudsql/{table_name}", + documents=batch, + options={"batch_size": batch_size} + ) + total_indexed += len(batch) + except Exception as error: + print(f"Batch indexing failed: {error}") + break + + return {"indexed": total_indexed, "success": total_indexed == len(docs)} + ``` + + + +### Document Retrieval + + + + Retrieve documents with advanced filtering and distance strategies: + + ```ts + import { postgresRetrieverRef, DistanceStrategy } from 'genkitx-cloud-sql-pg'; + + // Create retriever reference with distance strategy + const documentsRetriever = postgresRetrieverRef({ + tableName: 'documents', + idColumn: 'id', + metadataColumns: ['title', 'category', 'source'], + distanceStrategy: DistanceStrategy.COSINE_DISTANCE, + }); + + // Basic retrieval + const query = "What are managed database benefits?"; + const docs = await ai.retrieve({ + retriever: documentsRetriever, + query, + options: { + k: 5, // Number of documents to retrieve (max 1000) + }, + }); + + console.log('Retrieved documents:', docs); + + // Advanced retrieval with SQL filtering + const filteredDocs = await ai.retrieve({ + retriever: documentsRetriever, + query: "cloud database services", + options: { + k: 3, + filter: "category = 'database' AND source = 'documentation'", + }, + }); + + // Complex filtering with multiple conditions + const complexDocs = await ai.retrieve({ + retriever: documentsRetriever, + query: "database scaling solutions", + options: { + k: 10, + filter: "category IN ('database', 'technology') AND source != 'deprecated'", + }, + }); + + // Different distance strategies + const euclideanRetriever = postgresRetrieverRef({ + tableName: 'documents', + distanceStrategy: DistanceStrategy.EUCLIDEAN_DISTANCE, + }); + + const dotProductRetriever = postgresRetrieverRef({ + tableName: 'documents', + distanceStrategy: DistanceStrategy.DOT_PRODUCT, + }); + ``` + + + Retrieve documents with advanced filtering and distance strategies: + + ```go + // Basic retrieval + func retrieveDocuments(ctx context.Context, query string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("cloudsql/documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 5, + }), + ) + if err != nil { + return nil, fmt.Errorf("retrieval failed: %w", err) + } + + return docs, nil + } + + // Advanced retrieval with SQL filtering + func advancedRetrieve(ctx context.Context, query string, limit int, filter string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("cloudsql/documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": limit, + "filter": filter, + }), + ) + if err != nil { + return nil, fmt.Errorf("advanced retrieval failed: %w", err) + } + + return docs, nil + } + + // Complex filtering examples + func searchDatabaseDocuments(ctx context.Context, query string) ([]ai.Document, error) { + filter := "category = 'database' AND source = 'documentation'" + return advancedRetrieve(ctx, query, 3, filter) + } + + func searchMultiCategoryDocuments(ctx context.Context, query string) ([]ai.Document, error) { + filter := "category IN ('database', 'technology') AND source != 'deprecated'" + return advancedRetrieve(ctx, query, 10, filter) + } + + // Usage example + func performSearches(ctx context.Context) error { + // Basic search + docs, err := retrieveDocuments(ctx, "What are managed database benefits?") + if err != nil { + return err + } + + // Filtered search + dbDocs, err := searchDatabaseDocuments(ctx, "cloud database services") + if err != nil { + return err + } + + // Complex search + multiDocs, err := searchMultiCategoryDocuments(ctx, "database scaling solutions") + if err != nil { + return err + } + + fmt.Printf("Found %d basic, %d database, %d multi-category documents\n", + len(docs), len(dbDocs), len(multiDocs)) + return nil + } + ``` + + + Retrieve documents with advanced filtering and distance strategies: + + ```python + from typing import List, Dict, Any, Optional + + # Basic retrieval + async def retrieve_documents(query: str, table_name: str = "documents", k: int = 5) -> List[Dict[str, Any]]: + try: + retriever = f"cloudsql/{table_name}" + docs = await ai.retrieve( + retriever=retriever, + query=query, + options={"k": k} + ) + return docs + except Exception as error: + print(f"Retrieval failed: {error}") + return [] + + # Advanced retrieval with SQL filtering + async def advanced_retrieve( + query: str, + table_name: str = "documents", + k: int = 5, + filter_clause: Optional[str] = None + ) -> List[Dict[str, Any]]: + try: + retriever = f"cloudsql/{table_name}" + + options = {"k": k} + if filter_clause: + options["filter"] = filter_clause + + docs = await ai.retrieve( + retriever=retriever, + query=query, + options=options + ) + + return docs + except Exception as error: + print(f"Advanced retrieval failed: {error}") + return [] + + # Specific search functions + async def search_database_documents(query: str) -> List[Dict[str, Any]]: + filter_clause = "category = 'database' AND source = 'documentation'" + return await advanced_retrieve(query, k=3, filter_clause=filter_clause) + + async def search_multi_category_documents(query: str) -> List[Dict[str, Any]]: + filter_clause = "category IN ('database', 'technology') AND source != 'deprecated'" + return await advanced_retrieve(query, k=10, filter_clause=filter_clause) + + # Comprehensive search example + async def perform_comprehensive_search(): + # Basic search + basic_docs = await retrieve_documents("What are managed database benefits?", k=5) + + # Database-specific search + db_docs = await search_database_documents("cloud database services") + + # Multi-category search + multi_docs = await search_multi_category_documents("database scaling solutions") + + return { + "basic_search": basic_docs, + "database_search": db_docs, + "multi_category_search": multi_docs, + "total_results": len(basic_docs) + len(db_docs) + len(multi_docs) + } + ``` + + + +## Advanced Features + +### Custom Table Configuration + + + + Configure custom table schemas for specific use cases: + + ```ts + // Advanced table configuration + await engine.initVectorstoreTable('custom_documents', 1536, { + schemaName: 'ai_data', + contentColumn: 'document_text', + embeddingColumn: 'text_embedding', + idColumn: 'document_id', + metadataColumns: [ + { name: 'title', dataType: 'TEXT' }, + { name: 'author', dataType: 'TEXT' }, + { name: 'created_date', dataType: 'TIMESTAMP' }, + { name: 'version', dataType: 'INTEGER' }, + { name: 'tags', dataType: 'TEXT[]' }, + { name: 'score', dataType: 'REAL' }, + ], + metadataJsonColumn: 'additional_metadata', + storeMetadata: true, + overwriteExisting: false, + }); + + // Configure retriever for custom table + const customRetriever = postgresRetrieverRef({ + tableName: 'custom_documents', + schemaName: 'ai_data', + contentColumn: 'document_text', + embeddingColumn: 'text_embedding', + idColumn: 'document_id', + metadataColumns: ['title', 'author', 'created_date', 'version', 'tags', 'score'], + metadataJsonColumn: 'additional_metadata', + distanceStrategy: DistanceStrategy.COSINE_DISTANCE, + }); + + // Advanced filtering with custom columns + const advancedSearch = await ai.retrieve({ + retriever: customRetriever, + query: "latest documentation updates", + options: { + k: 5, + filter: ` + created_date >= '2024-01-01' + AND version >= 2 + AND score > 0.8 + AND 'technical' = ANY(tags) + `, + }, + }); + ``` + + + Configure custom table schemas for specific use cases: + + ```go + // Custom table configuration + type CustomTableConfig struct { + SchemaName string + ContentColumn string + EmbeddingColumn string + IDColumn string + MetadataColumns []string + MetadataJSONColumn string + DistanceStrategy string + } + + func setupCustomTable(ctx context.Context, db *sql.DB) error { + // Create custom table with advanced schema + createTableSQL := ` + CREATE TABLE IF NOT EXISTS ai_data.custom_documents ( + document_id TEXT PRIMARY KEY, + document_text TEXT NOT NULL, + text_embedding vector(1536), + title TEXT, + author TEXT, + created_date TIMESTAMP, + version INTEGER, + tags TEXT[], + score REAL, + additional_metadata JSONB + ); + + CREATE INDEX IF NOT EXISTS custom_documents_embedding_idx + ON ai_data.custom_documents + USING ivfflat (text_embedding vector_cosine_ops) + WITH (lists = 100); + ` + + _, err := db.ExecContext(ctx, createTableSQL) + return err + } + + // Advanced retrieval with custom filtering + func advancedCustomRetrieve(ctx context.Context, query string) ([]ai.Document, error) { + filter := ` + created_date >= '2024-01-01' + AND version >= 2 + AND score > 0.8 + AND 'technical' = ANY(tags) + ` + + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("cloudsql/custom_documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 5, + "filter": filter, + "schema": "ai_data", + }), + ) + if err != nil { + return nil, fmt.Errorf("custom retrieval failed: %w", err) + } + + return docs, nil + } + ``` + + + Configure custom table schemas for specific use cases: + + ```python + # Custom table configuration + async def setup_custom_table(connection): + """Set up a custom table with advanced schema""" + cursor = connection.cursor() + + try: + # Create custom schema and table + cursor.execute(""" + CREATE SCHEMA IF NOT EXISTS ai_data; + + CREATE TABLE IF NOT EXISTS ai_data.custom_documents ( + document_id TEXT PRIMARY KEY, + document_text TEXT NOT NULL, + text_embedding vector(1536), + title TEXT, + author TEXT, + created_date TIMESTAMP, + version INTEGER, + tags TEXT[], + score REAL, + additional_metadata JSONB + ); + + CREATE INDEX IF NOT EXISTS custom_documents_embedding_idx + ON ai_data.custom_documents + USING ivfflat (text_embedding vector_cosine_ops) + WITH (lists = 100); + """) + + connection.commit() + return {"success": True} + except Exception as error: + connection.rollback() + print(f"Custom table setup failed: {error}") + return {"success": False, "error": str(error)} + finally: + cursor.close() + + # Advanced retrieval with custom filtering + async def advanced_custom_retrieve(query: str) -> List[Dict[str, Any]]: + filter_clause = """ + created_date >= '2024-01-01' + AND version >= 2 + AND score > 0.8 + AND 'technical' = ANY(tags) + """ + + try: + retriever = "cloudsql/custom_documents" + docs = await ai.retrieve( + retriever=retriever, + query=query, + options={ + "k": 5, + "filter": filter_clause, + "schema": "ai_data" + } + ) + return docs + except Exception as error: + print(f"Custom retrieval failed: {error}") + return [] + + # Complex metadata search + async def search_by_metadata_criteria( + query: str, + min_score: float = 0.8, + min_version: int = 2, + required_tags: List[str] = None, + date_range: tuple = None + ) -> List[Dict[str, Any]]: + """Search with complex metadata criteria""" + + filter_parts = [f"score >= {min_score}", f"version >= {min_version}"] + + if required_tags: + tag_conditions = " OR ".join([f"'{tag}' = ANY(tags)" for tag in required_tags]) + filter_parts.append(f"({tag_conditions})") + + if date_range: + start_date, end_date = date_range + filter_parts.append(f"created_date BETWEEN '{start_date}' AND '{end_date}'") + + filter_clause = " AND ".join(filter_parts) + + return await advanced_retrieve( + query=query, + table_name="custom_documents", + k=10, + filter_clause=filter_clause + ) + ``` + + + +## Best Practices + +### Database Configuration + +1. **Instance Sizing**: Choose appropriate machine types for your workload +2. **Storage Configuration**: Use SSD storage for better performance +3. **Connection Pooling**: Configure connection pooling for high-traffic applications +4. **Backup Strategy**: Set up automated backups and point-in-time recovery + +### Vector Optimization + +1. **Index Configuration**: Optimize pgvector index parameters for your data +2. **Embedding Dimensions**: Match vector dimensions to your embedding model +3. **Distance Strategy**: Choose the right distance function for your use case +4. **Batch Operations**: Use appropriate batch sizes for indexing + +### Performance Optimization + +1. **Query Optimization**: Use efficient SQL filters to reduce search space +2. **Index Management**: Monitor and maintain vector indexes +3. **Connection Management**: Use connection pooling and proper connection lifecycle +4. **Monitoring**: Set up Cloud Monitoring for database metrics + +### Production Deployment + +1. **High Availability**: Configure regional persistent disks and failover replicas +2. **Security**: Use private IP, SSL connections, and IAM authentication +3. **Scaling**: Configure read replicas for read-heavy workloads +4. **Maintenance**: Schedule maintenance windows and updates + +### Cost Optimization + +1. **Right-sizing**: Monitor resource usage and adjust instance sizes +2. **Storage Management**: Use appropriate storage types and sizes +3. **Connection Efficiency**: Minimize connection overhead +4. **Query Efficiency**: Optimize queries to reduce compute costs + +## Next Steps + +- Learn about [RAG implementation](/unified-docs/rag) to build complete retrieval-augmented generation systems +- Explore [creating flows](/unified-docs/creating-flows) to build structured AI workflows with managed vector search +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other vector database options for different use cases diff --git a/src/content/docs/unified-docs/vector-databases/dev-local-vectorstore.mdx b/src/content/docs/unified-docs/vector-databases/dev-local-vectorstore.mdx new file mode 100644 index 00000000..3c78202d --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/dev-local-vectorstore.mdx @@ -0,0 +1,729 @@ +--- +title: Dev Local Vector Store +description: Learn how to use the Dev Local Vector Store for local development and testing across JavaScript, Go, and Python with Genkit. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +The Dev Local Vector Store provides a simple, file-based vector database solution for local development and testing. It's designed to be lightweight and easy to set up, making it perfect for prototyping, testing, and development environments where you don't need the complexity of a full production vector database. + +## Key Features + +- **Zero setup**: No external dependencies or services required +- **File-based storage**: Stores vectors and metadata locally +- **Development focused**: Optimized for quick iteration and testing +- **Lightweight**: Minimal resource usage +- **Cross-platform**: Works on any system with file system access + +:::caution +The Dev Local Vector Store is intended for development and testing only. For production applications, use a dedicated vector database like Pinecone, ChromaDB, or Cloud Firestore. +::: + +## Installation and Setup + + + + JavaScript doesn't have a dedicated dev local vector store plugin, but you can create a simple in-memory or file-based solution for development: + + ```bash + npm install genkit + ``` + + Create a simple local vector store: + + ```ts + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import fs from 'fs/promises'; + import path from 'path'; + + const ai = genkit({ + plugins: [googleAI()], + }); + + // Simple local vector store implementation + class DevLocalVectorStore { + private storePath: string; + private embedder: any; + + constructor(storePath: string, embedder: any) { + this.storePath = storePath; + this.embedder = embedder; + } + + async index(documents: string[]) { + const embeddings = await Promise.all( + documents.map(async (doc) => { + const result = await ai.embed({ + embedder: this.embedder, + content: doc, + }); + return { + text: doc, + embedding: result[0].embedding, + }; + }) + ); + + await fs.writeFile( + this.storePath, + JSON.stringify(embeddings, null, 2) + ); + } + + async search(query: string, limit: number = 5) { + const queryEmbedding = await ai.embed({ + embedder: this.embedder, + content: query, + }); + + const data = JSON.parse(await fs.readFile(this.storePath, 'utf-8')); + + // Simple cosine similarity + const results = data + .map((item: any) => ({ + ...item, + similarity: this.cosineSimilarity( + queryEmbedding[0].embedding, + item.embedding + ), + })) + .sort((a: any, b: any) => b.similarity - a.similarity) + .slice(0, limit); + + return results; + } + + private cosineSimilarity(a: number[], b: number[]): number { + const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0); + const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0)); + const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0)); + return dotProduct / (magnitudeA * magnitudeB); + } + } + + // Usage + const localStore = new DevLocalVectorStore( + './dev-vector-store.json', + googleAI.embedder('text-embedding-004') + ); + ``` + + + Go doesn't have a dedicated dev local vector store plugin, but you can create a simple file-based solution: + + ```bash + go get github.com/firebase/genkit/go/genkit + ``` + + Create a simple local vector store: + + ```go + package main + + import ( + "context" + "encoding/json" + "fmt" + "math" + "os" + "sort" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + type VectorDocument struct { + Text string `json:"text"` + Embedding []float64 `json:"embedding"` + } + + type DevLocalVectorStore struct { + storePath string + embedder ai.Embedder + } + + func NewDevLocalVectorStore(storePath string, embedder ai.Embedder) *DevLocalVectorStore { + return &DevLocalVectorStore{ + storePath: storePath, + embedder: embedder, + } + } + + func (store *DevLocalVectorStore) Index(ctx context.Context, documents []string) error { + var vectorDocs []VectorDocument + + for _, doc := range documents { + resp, err := ai.Embed(ctx, store.embedder, ai.WithDocs(doc)) + if err != nil { + return err + } + + vectorDocs = append(vectorDocs, VectorDocument{ + Text: doc, + Embedding: resp.Embeddings[0].Embedding, + }) + } + + data, err := json.MarshalIndent(vectorDocs, "", " ") + if err != nil { + return err + } + + return os.WriteFile(store.storePath, data, 0644) + } + + func (store *DevLocalVectorStore) Search(ctx context.Context, query string, limit int) ([]VectorDocument, error) { + // Get query embedding + queryResp, err := ai.Embed(ctx, store.embedder, ai.WithDocs(query)) + if err != nil { + return nil, err + } + queryEmbedding := queryResp.Embeddings[0].Embedding + + // Load stored documents + data, err := os.ReadFile(store.storePath) + if err != nil { + return nil, err + } + + var docs []VectorDocument + if err := json.Unmarshal(data, &docs); err != nil { + return nil, err + } + + // Calculate similarities and sort + type result struct { + doc VectorDocument + similarity float64 + } + + var results []result + for _, doc := range docs { + similarity := cosineSimilarity(queryEmbedding, doc.Embedding) + results = append(results, result{doc: doc, similarity: similarity}) + } + + sort.Slice(results, func(i, j int) bool { + return results[i].similarity > results[j].similarity + }) + + // Return top results + if limit > len(results) { + limit = len(results) + } + + var topDocs []VectorDocument + for i := 0; i < limit; i++ { + topDocs = append(topDocs, results[i].doc) + } + + return topDocs, nil + } + + func cosineSimilarity(a, b []float64) float64 { + var dotProduct, magnitudeA, magnitudeB float64 + + for i := range a { + dotProduct += a[i] * b[i] + magnitudeA += a[i] * a[i] + magnitudeB += b[i] * b[i] + } + + magnitudeA = math.Sqrt(magnitudeA) + magnitudeB = math.Sqrt(magnitudeB) + + if magnitudeA == 0 || magnitudeB == 0 { + return 0 + } + + return dotProduct / (magnitudeA * magnitudeB) + } + ``` + + + Install the Dev Local Vector Store plugin: + + ```bash + pip install genkit-plugin-dev-local-vectorstore + ``` + + Configure the plugin: + + ```python + from genkit.ai import Genkit + from genkit.plugins.dev_local_vectorstore import DevLocalVectorStore + from genkit.plugins.google_genai import GoogleGenAI + + ai = Genkit( + plugins=[ + GoogleGenAI(), + DevLocalVectorStore( + name='my_vectorstore', + embedder='googleai/text-embedding-004', + ), + ], + ) + ``` + + + +## Basic Usage + +### Indexing Documents + + + + ```ts + // Index documents in the local store + const documents = [ + 'Machine learning is a subset of artificial intelligence.', + 'Deep learning uses neural networks with multiple layers.', + 'Natural language processing enables computers to understand text.', + 'Computer vision allows machines to interpret visual information.', + ]; + + await localStore.index(documents); + console.log('Documents indexed successfully'); + ``` + + + ```go + // Initialize the local store + embedder := googleAIPlugin.Embedder(g, "text-embedding-004") + localStore := NewDevLocalVectorStore("./dev-vector-store.json", embedder) + + // Index documents + documents := []string{ + "Machine learning is a subset of artificial intelligence.", + "Deep learning uses neural networks with multiple layers.", + "Natural language processing enables computers to understand text.", + "Computer vision allows machines to interpret visual information.", + } + + err := localStore.Index(ctx, documents) + if err != nil { + log.Fatal(err) + } + fmt.Println("Documents indexed successfully") + ``` + + + ```python + from genkit.types import Document + + # Prepare documents + data_list = [ + 'Machine learning is a subset of artificial intelligence.', + 'Deep learning uses neural networks with multiple layers.', + 'Natural language processing enables computers to understand text.', + 'Computer vision allows machines to interpret visual information.', + ] + + # Convert to Genkit documents + genkit_docs = [Document.from_text(text=item) for item in data_list] + + # Index documents + await DevLocalVectorStore.index('my_vectorstore', genkit_docs) + print("Documents indexed successfully") + ``` + + + +### Retrieving Documents + + + + ```ts + // Search for similar documents + const query = 'What is artificial intelligence?'; + const results = await localStore.search(query, 3); + + console.log('Search results:'); + results.forEach((result, index) => { + console.log(`${index + 1}. ${result.text} (similarity: ${result.similarity.toFixed(3)})`); + }); + + // Use in RAG workflow + async function ragQuery(question: string) { + const retrievedDocs = await localStore.search(question, 3); + const context = retrievedDocs.map(doc => doc.text).join('\n\n'); + + const response = await ai.generate({ + model: googleAI.model('gemini-1.5-flash'), + prompt: `Context: ${context}\n\nQuestion: ${question}\n\nAnswer:`, + }); + + return response.text; + } + + const answer = await ragQuery('What is machine learning?'); + console.log('Answer:', answer); + ``` + + + ```go + // Search for similar documents + query := "What is artificial intelligence?" + results, err := localStore.Search(ctx, query, 3) + if err != nil { + log.Fatal(err) + } + + fmt.Println("Search results:") + for i, result := range results { + fmt.Printf("%d. %s\n", i+1, result.Text) + } + + // Use in RAG workflow + func ragQuery(ctx context.Context, question string) (string, error) { + retrievedDocs, err := localStore.Search(ctx, question, 3) + if err != nil { + return "", err + } + + var contextParts []string + for _, doc := range retrievedDocs { + contextParts = append(contextParts, doc.Text) + } + context := strings.Join(contextParts, "\n\n") + + resp, err := genkit.Generate(ctx, g, + ai.WithModel(googleAIPlugin.Model(g, "gemini-1.5-flash")), + ai.WithPrompt(fmt.Sprintf("Context: %s\n\nQuestion: %s\n\nAnswer:", context, question)), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + + answer, err := ragQuery(ctx, "What is machine learning?") + if err != nil { + log.Fatal(err) + } + fmt.Printf("Answer: %s\n", answer) + ``` + + + ```python + from genkit.types import Document + + # Search for similar documents + async def search_documents(query: str): + query_doc = Document.from_text(query) + + results = await ai.retrieve( + query=query_doc, + retriever='my_vectorstore', + ) + + return results + + # Use in RAG workflow + async def rag_query(question: str): + query_doc = Document.from_text(question) + + # Retrieve relevant documents + retrieved_docs = await ai.retrieve( + query=query_doc, + retriever='my_vectorstore', + ) + + # Prepare context + context = "\n\n".join([doc.content[0].text for doc in retrieved_docs]) + + # Generate answer + response = await ai.generate( + model="googleai/gemini-1.5-flash", + prompt=f"Context: {context}\n\nQuestion: {question}\n\nAnswer:", + ) + + return response.text + + # Example usage + results = await search_documents("What is artificial intelligence?") + print("Search results:") + for i, doc in enumerate(results, 1): + print(f"{i}. {doc.content[0].text}") + + answer = await rag_query("What is machine learning?") + print(f"Answer: {answer}") + ``` + + + +## Advanced Configuration + +### Custom Storage Location + + + + ```ts + // Custom storage path + const customStore = new DevLocalVectorStore( + './data/custom-vector-store.json', + googleAI.embedder('text-embedding-004') + ); + + // Multiple stores for different domains + const techStore = new DevLocalVectorStore( + './data/tech-docs.json', + googleAI.embedder('text-embedding-004') + ); + + const generalStore = new DevLocalVectorStore( + './data/general-docs.json', + googleAI.embedder('text-embedding-004') + ); + ``` + + + ```go + // Custom storage paths + techStore := NewDevLocalVectorStore("./data/tech-docs.json", embedder) + generalStore := NewDevLocalVectorStore("./data/general-docs.json", embedder) + + // Index different types of content + techDocs := []string{ + "API documentation for REST endpoints", + "Database schema design principles", + "Microservices architecture patterns", + } + + generalDocs := []string{ + "Company policies and procedures", + "Meeting notes and summaries", + "Project planning documents", + } + + err := techStore.Index(ctx, techDocs) + if err != nil { + log.Fatal(err) + } + + err = generalStore.Index(ctx, generalDocs) + if err != nil { + log.Fatal(err) + } + ``` + + + ```python + # Multiple vector stores for different domains + ai = Genkit( + plugins=[ + GoogleGenAI(), + DevLocalVectorStore( + name='tech_docs', + embedder='googleai/text-embedding-004', + ), + DevLocalVectorStore( + name='general_docs', + embedder='googleai/text-embedding-004', + ), + ], + ) + + # Index different types of content + tech_docs = [ + Document.from_text("API documentation for REST endpoints"), + Document.from_text("Database schema design principles"), + Document.from_text("Microservices architecture patterns"), + ] + + general_docs = [ + Document.from_text("Company policies and procedures"), + Document.from_text("Meeting notes and summaries"), + Document.from_text("Project planning documents"), + ] + + await DevLocalVectorStore.index('tech_docs', tech_docs) + await DevLocalVectorStore.index('general_docs', general_docs) + ``` + + + +### Batch Operations + + + + ```ts + // Batch indexing for better performance + async function batchIndex(documents: string[], batchSize: number = 10) { + for (let i = 0; i < documents.length; i += batchSize) { + const batch = documents.slice(i, i + batchSize); + await localStore.index(batch); + console.log(`Indexed batch ${Math.floor(i / batchSize) + 1}`); + } + } + + // Large document set + const largeDocumentSet = [ + // ... hundreds of documents + ]; + + await batchIndex(largeDocumentSet, 20); + ``` + + + ```go + // Batch indexing function + func batchIndex(ctx context.Context, store *DevLocalVectorStore, documents []string, batchSize int) error { + for i := 0; i < len(documents); i += batchSize { + end := i + batchSize + if end > len(documents) { + end = len(documents) + } + + batch := documents[i:end] + err := store.Index(ctx, batch) + if err != nil { + return err + } + + fmt.Printf("Indexed batch %d\n", (i/batchSize)+1) + } + return nil + } + + // Usage + largeDocumentSet := []string{ + // ... hundreds of documents + } + + err := batchIndex(ctx, localStore, largeDocumentSet, 20) + if err != nil { + log.Fatal(err) + } + ``` + + + ```python + # Batch indexing for large document sets + async def batch_index(store_name: str, documents: list[str], batch_size: int = 20): + for i in range(0, len(documents), batch_size): + batch = documents[i:i + batch_size] + genkit_docs = [Document.from_text(text=doc) for doc in batch] + + await DevLocalVectorStore.index(store_name, genkit_docs) + print(f"Indexed batch {(i // batch_size) + 1}") + + # Large document set + large_document_set = [ + # ... hundreds of documents + ] + + await batch_index('my_vectorstore', large_document_set, 20) + ``` + + + +## Best Practices + +### Development Workflow + +1. **Start Simple**: Use the dev local vector store for initial prototyping +2. **Test Locally**: Validate your RAG pipeline before moving to production +3. **Iterate Quickly**: Make changes and test without external dependencies +4. **Document Structure**: Establish consistent document formats early + +### Performance Considerations + +1. **Document Size**: Keep documents reasonably sized (500-1000 tokens) +2. **Index Size**: Monitor file size for large document sets +3. **Search Limits**: Use appropriate limits for search results +4. **Batch Processing**: Use batch operations for large datasets + +### Migration to Production + + + + ```ts + // Environment-based vector store selection + const isProduction = process.env.NODE_ENV === 'production'; + + const vectorStore = isProduction + ? new PineconeVectorStore({ + apiKey: process.env.PINECONE_API_KEY, + indexName: 'production-index', + }) + : new DevLocalVectorStore( + './dev-vector-store.json', + googleAI.embedder('text-embedding-004') + ); + + // Same interface for both stores + await vectorStore.index(documents); + const results = await vectorStore.search(query, 5); + ``` + + + ```go + // Environment-based store selection + var store VectorStore + + if os.Getenv("ENVIRONMENT") == "production" { + store = NewPineconeVectorStore(os.Getenv("PINECONE_API_KEY"), "production-index") + } else { + store = NewDevLocalVectorStore("./dev-vector-store.json", embedder) + } + + // Same interface for both stores + err := store.Index(ctx, documents) + results, err := store.Search(ctx, query, 5) + ``` + + + ```python + import os + + # Environment-based configuration + if os.getenv("ENVIRONMENT") == "production": + ai = Genkit( + plugins=[ + GoogleGenAI(), + PineconeVectorStore( + name='production_store', + api_key=os.getenv("PINECONE_API_KEY"), + index_name='production-index', + embedder='googleai/text-embedding-004', + ), + ], + ) + store_name = 'production_store' + else: + ai = Genkit( + plugins=[ + GoogleGenAI(), + DevLocalVectorStore( + name='dev_store', + embedder='googleai/text-embedding-004', + ), + ], + ) + store_name = 'dev_store' + + # Same interface for both environments + query_doc = Document.from_text("search query") + results = await ai.retrieve(query=query_doc, retriever=store_name) + ``` + + + +## Limitations + +1. **Not for Production**: Designed for development and testing only +2. **No Persistence**: Data is stored in local files +3. **Single Machine**: Cannot be shared across multiple instances +4. **Limited Scalability**: Performance degrades with very large datasets +5. **No Advanced Features**: Lacks filtering, metadata search, and other advanced capabilities + +## Next Steps + +- Start with the Dev Local Vector Store for prototyping +- Learn about [RAG implementation](/unified-docs/rag) for building complete workflows +- Explore production vector databases like [Pinecone](/unified-docs/vector-databases/pinecone) or [ChromaDB](/unified-docs/vector-databases/chromadb) +- See [deployment guides](/unified-docs/deployment) for moving to production diff --git a/src/content/docs/unified-docs/vector-databases/lancedb.mdx b/src/content/docs/unified-docs/vector-databases/lancedb.mdx new file mode 100644 index 00000000..4ac30774 --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/lancedb.mdx @@ -0,0 +1,914 @@ +--- +title: LanceDB Vector Database +description: Learn how to use LanceDB with Genkit across JavaScript, Go, and Python for embedded vector storage, semantic search, and RAG applications. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +LanceDB is an open-source vector database designed for AI applications. It provides embedded vector storage with high performance, making it ideal for applications that need fast vector similarity search without the complexity of managing a separate database server. + +## Installation and Setup + + + + Install the LanceDB plugin: + + ```bash + npm install genkitx-lancedb + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { lancedb } from 'genkitx-lancedb'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [ + googleAI(), + lancedb([ + { + dbUri: '.db', // Database directory + tableName: 'documents', + embedder: googleAI.embedder('gemini-embedding-001'), + }, + ]), + ], + }); + ``` + + ### Configuration Options + + ```ts + // Advanced configuration + const ai = genkit({ + plugins: [ + googleAI(), + lancedb([ + { + dbUri: './vector-db', // Custom database directory + tableName: 'my-documents', + embedder: googleAI.embedder('gemini-embedding-001'), + // Additional LanceDB options can be specified here + }, + ]), + ], + }); + ``` + + + For Go applications, you can use LanceDB through the Go client: + + ```bash + go get github.com/lancedb/lancedb-go + ``` + + ```go + package main + + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/lancedb" + "github.com/firebase/genkit/go/plugins/googleai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &googleai.GoogleAI{}, + &lancedb.LanceDB{ + DatabaseURI: "./vector-db", + Tables: []lancedb.TableConfig{ + { + Name: "documents", + Embedder: "googleai/gemini-embedding-001", + }, + }, + }, + ), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + For Python applications, install the LanceDB client: + + ```bash + pip install lancedb genkit-plugin-lancedb + ``` + + ```python + from genkit.ai import Genkit + from genkit.plugins.lancedb import LanceDB + from genkit.plugins.google_genai import GoogleGenAI + + ai = Genkit( + plugins=[ + GoogleGenAI(), + LanceDB( + database_uri="./vector-db", + tables=[ + { + "name": "documents", + "embedder": "googleai/gemini-embedding-001", + } + ], + ), + ], + ) + ``` + + + +## Basic Usage + +### Document Indexing + + + + Index documents for semantic search: + + ```ts + import { lancedbIndexerRef, WriteMode } from 'genkitx-lancedb'; + import { Document } from 'genkit'; + + // Create indexer reference + const documentsIndexer = lancedbIndexerRef({ + tableName: 'documents', + }); + + // Prepare documents for indexing + const documents: Document[] = [ + { + content: 'LanceDB is an open-source vector database for AI applications.', + metadata: { + title: 'LanceDB Overview', + category: 'database', + source: 'documentation', + }, + }, + { + content: 'Embedded vector databases provide fast local search capabilities.', + metadata: { + title: 'Embedded Databases', + category: 'technology', + source: 'blog', + }, + }, + ]; + + // Index documents + await ai.index({ + indexer: documentsIndexer, + documents, + options: { + writeMode: WriteMode.Overwrite, // or WriteMode.Append + }, + }); + + // Batch indexing for better performance + const batchSize = 100; + for (let i = 0; i < largeDocumentSet.length; i += batchSize) { + const batch = largeDocumentSet.slice(i, i + batchSize); + await ai.index({ + indexer: documentsIndexer, + documents: batch, + options: { + writeMode: i === 0 ? WriteMode.Overwrite : WriteMode.Append, + }, + }); + } + ``` + + + Index documents for semantic search: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func indexDocuments(ctx context.Context) error { + documents := []ai.Document{ + { + Content: "LanceDB is an open-source vector database for AI applications.", + Metadata: map[string]interface{}{ + "title": "LanceDB Overview", + "category": "database", + "source": "documentation", + }, + }, + { + Content: "Embedded vector databases provide fast local search capabilities.", + Metadata: map[string]interface{}{ + "title": "Embedded Databases", + "category": "technology", + "source": "blog", + }, + }, + } + + // Index documents + err := genkit.Index(ctx, g, + ai.WithIndexer("lancedb/documents"), + ai.WithDocuments(documents), + ai.WithOptions(map[string]interface{}{ + "writeMode": "overwrite", // or "append" + }), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + return nil + } + + // Batch indexing + func batchIndexDocuments(ctx context.Context, documents []ai.Document, batchSize int) error { + for i := 0; i < len(documents); i += batchSize { + end := i + batchSize + if end > len(documents) { + end = len(documents) + } + + batch := documents[i:end] + writeMode := "append" + if i == 0 { + writeMode = "overwrite" + } + + err := genkit.Index(ctx, g, + ai.WithIndexer("lancedb/documents"), + ai.WithDocuments(batch), + ai.WithOptions(map[string]interface{}{ + "writeMode": writeMode, + }), + ) + if err != nil { + return fmt.Errorf("failed to index batch: %w", err) + } + } + return nil + } + ``` + + + Index documents for semantic search: + + ```python + from typing import List, Dict, Any + + # Prepare documents for indexing + documents = [ + { + "content": "LanceDB is an open-source vector database for AI applications.", + "metadata": { + "title": "LanceDB Overview", + "category": "database", + "source": "documentation", + }, + }, + { + "content": "Embedded vector databases provide fast local search capabilities.", + "metadata": { + "title": "Embedded Databases", + "category": "technology", + "source": "blog", + }, + }, + ] + + # Index documents + async def index_documents(docs: List[Dict[str, Any]], table_name: str = "documents"): + try: + indexer = f"lancedb/{table_name}" + + await ai.index( + indexer=indexer, + documents=docs, + options={ + "write_mode": "overwrite" # or "append" + } + ) + + return {"indexed": len(docs), "success": True} + except Exception as error: + print(f"Indexing failed: {error}") + return {"indexed": 0, "success": False} + + # Batch indexing for better performance + async def batch_index_documents( + docs: List[Dict[str, Any]], + table_name: str = "documents", + batch_size: int = 100 + ): + total_indexed = 0 + + for i in range(0, len(docs), batch_size): + batch = docs[i:i + batch_size] + write_mode = "overwrite" if i == 0 else "append" + + try: + await ai.index( + indexer=f"lancedb/{table_name}", + documents=batch, + options={"write_mode": write_mode} + ) + total_indexed += len(batch) + except Exception as error: + print(f"Batch indexing failed: {error}") + break + + return {"indexed": total_indexed, "success": total_indexed == len(docs)} + ``` + + + +### Document Retrieval + + + + Retrieve relevant documents using semantic search: + + ```ts + import { lancedbRetrieverRef } from 'genkitx-lancedb'; + + // Create retriever reference + const documentsRetriever = lancedbRetrieverRef({ + tableName: 'documents', + displayName: 'Documents', + }); + + // Basic retrieval + const query = "What is an embedded vector database?"; + const docs = await ai.retrieve({ + retriever: documentsRetriever, + query, + options: { + k: 5, // Number of documents to retrieve + }, + }); + + console.log('Retrieved documents:', docs); + + // Advanced retrieval with filtering + const filteredDocs = await ai.retrieve({ + retriever: documentsRetriever, + query, + options: { + k: 3, + filter: { + category: 'database', // Metadata filtering + }, + }, + }); + ``` + + + Retrieve relevant documents using semantic search: + + ```go + // Basic retrieval + func retrieveDocuments(ctx context.Context, query string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("lancedb/documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 5, + }), + ) + if err != nil { + return nil, fmt.Errorf("retrieval failed: %w", err) + } + + return docs, nil + } + + // Advanced retrieval with filtering + func advancedRetrieve(ctx context.Context, query string, limit int, filter map[string]interface{}) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("lancedb/documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": limit, + "filter": filter, + }), + ) + if err != nil { + return nil, fmt.Errorf("advanced retrieval failed: %w", err) + } + + return docs, nil + } + + // Usage example + func searchDocuments(ctx context.Context) error { + // Basic search + docs, err := retrieveDocuments(ctx, "What is an embedded vector database?") + if err != nil { + return err + } + + // Filtered search + filteredDocs, err := advancedRetrieve(ctx, + "database concepts", + 3, + map[string]interface{}{ + "category": "database", + }, + ) + if err != nil { + return err + } + + fmt.Printf("Found %d documents, %d filtered\n", len(docs), len(filteredDocs)) + return nil + } + ``` + + + Retrieve relevant documents using semantic search: + + ```python + from typing import List, Dict, Any, Optional + + # Basic retrieval + async def retrieve_documents(query: str, table_name: str = "documents", k: int = 5) -> List[Dict[str, Any]]: + try: + retriever = f"lancedb/{table_name}" + docs = await ai.retrieve( + retriever=retriever, + query=query, + options={"k": k} + ) + return docs + except Exception as error: + print(f"Retrieval failed: {error}") + return [] + + # Advanced retrieval with filtering + async def advanced_retrieve( + query: str, + table_name: str = "documents", + k: int = 5, + filter_criteria: Optional[Dict[str, Any]] = None + ) -> List[Dict[str, Any]]: + try: + retriever = f"lancedb/{table_name}" + + options = {"k": k} + if filter_criteria: + options["filter"] = filter_criteria + + docs = await ai.retrieve( + retriever=retriever, + query=query, + options=options + ) + + return docs + except Exception as error: + print(f"Advanced retrieval failed: {error}") + return [] + + # Usage examples + async def search_examples(): + # Basic search + docs = await retrieve_documents("What is an embedded vector database?", k=5) + + # Filtered search + filtered_docs = await advanced_retrieve( + query="database concepts", + k=3, + filter_criteria={"category": "database"} + ) + + print(f"Found {len(docs)} documents, {len(filtered_docs)} filtered") + return docs, filtered_docs + ``` + + + +## Advanced Features + +### Complete RAG Implementation + + + + Build a complete RAG system with document processing: + + ```ts + import { lancedbIndexerRef, lancedbRetrieverRef, WriteMode } from 'genkitx-lancedb'; + import { chunk } from 'llm-chunk'; + import { readFile } from 'fs/promises'; + import pdf from 'pdf-parse'; + + // Document processing configuration + const chunkingConfig = { + minLength: 1000, + maxLength: 2000, + splitter: 'sentence', + overlap: 100, + }; + + // PDF text extraction + async function extractTextFromPdf(filePath: string): Promise { + const dataBuffer = await readFile(filePath); + const data = await pdf(dataBuffer); + return data.text; + } + + // Document indexing flow + export const indexDocumentFlow = ai.defineFlow( + { + name: 'indexDocument', + inputSchema: z.object({ + filePath: z.string(), + tableName: z.string().optional().default('documents'), + }), + outputSchema: z.object({ + success: z.boolean(), + documentsIndexed: z.number(), + error: z.string().optional(), + }), + }, + async ({ filePath, tableName }) => { + try { + // Extract text from PDF + const pdfText = await ai.run('extract-text', () => + extractTextFromPdf(filePath) + ); + + // Chunk the text + const chunks = await ai.run('chunk-text', () => + chunk(pdfText, chunkingConfig) + ); + + // Convert to documents + const documents = chunks.map((text, index) => ({ + content: text, + metadata: { + filePath, + chunkIndex: index, + source: 'pdf', + }, + })); + + // Index documents + const indexer = lancedbIndexerRef({ tableName }); + await ai.index({ + indexer, + documents, + options: { writeMode: WriteMode.Overwrite }, + }); + + return { + success: true, + documentsIndexed: documents.length, + }; + } catch (error) { + return { + success: false, + documentsIndexed: 0, + error: error instanceof Error ? error.message : String(error), + }; + } + }, + ); + + // RAG query flow + export const ragQueryFlow = ai.defineFlow( + { + name: 'ragQuery', + inputSchema: z.object({ + query: z.string(), + tableName: z.string().optional().default('documents'), + }), + outputSchema: z.object({ + answer: z.string(), + sources: z.array(z.string()), + }), + }, + async ({ query, tableName }) => { + // Retrieve relevant documents + const retriever = lancedbRetrieverRef({ tableName }); + const docs = await ai.retrieve({ + retriever, + query, + options: { k: 3 }, + }); + + // Generate answer using retrieved context + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: ` + Answer the following question using only the provided context. + If you cannot answer based on the context, say so. + + Context: + ${docs.map(doc => doc.content).join('\n\n')} + + Question: ${query} + `, + }); + + return { + answer: text, + sources: docs.map(doc => doc.metadata?.filePath || 'unknown').filter(Boolean), + }; + }, + ); + ``` + + + Build a complete RAG system with document processing: + + ```go + import ( + "context" + "fmt" + "strings" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + // Document processing and indexing + func indexDocumentFromText(ctx context.Context, text, source string, tableName string) error { + // Simple text chunking (in production, use a proper chunking library) + chunks := chunkText(text, 1000, 100) + + var documents []ai.Document + for i, chunk := range chunks { + documents = append(documents, ai.Document{ + Content: chunk, + Metadata: map[string]interface{}{ + "source": source, + "chunkIndex": i, + }, + }) + } + + // Index documents + err := genkit.Index(ctx, g, + ai.WithIndexer(fmt.Sprintf("lancedb/%s", tableName)), + ai.WithDocuments(documents), + ai.WithOptions(map[string]interface{}{ + "writeMode": "overwrite", + }), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + return nil + } + + // Simple text chunking function + func chunkText(text string, chunkSize, overlap int) []string { + words := strings.Fields(text) + var chunks []string + + for i := 0; i < len(words); i += chunkSize - overlap { + end := i + chunkSize + if end > len(words) { + end = len(words) + } + + chunk := strings.Join(words[i:end], " ") + chunks = append(chunks, chunk) + + if end == len(words) { + break + } + } + + return chunks + } + + // RAG query function + func performRAGQuery(ctx context.Context, query, tableName string) (string, []string, error) { + // Retrieve relevant documents + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever(fmt.Sprintf("lancedb/%s", tableName)), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 3, + }), + ) + if err != nil { + return "", nil, fmt.Errorf("retrieval failed: %w", err) + } + + // Build context from retrieved documents + var contextParts []string + var sources []string + for _, doc := range docs { + contextParts = append(contextParts, doc.Content) + if source, ok := doc.Metadata["source"].(string); ok { + sources = append(sources, source) + } + } + context := strings.Join(contextParts, "\n\n") + + // Generate answer + prompt := fmt.Sprintf(` + Answer the following question using only the provided context. + If you cannot answer based on the context, say so. + + Context: + %s + + Question: %s + `, context, query) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("googleai/gemini-2.5-flash"), + ai.WithPrompt(prompt), + ) + if err != nil { + return "", nil, fmt.Errorf("generation failed: %w", err) + } + + return resp.Text(), sources, nil + } + ``` + + + Build a complete RAG system with document processing: + + ```python + import re + from typing import List, Dict, Any, Tuple + + # Simple text chunking function + def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 100) -> List[str]: + words = text.split() + chunks = [] + + for i in range(0, len(words), chunk_size - overlap): + end = min(i + chunk_size, len(words)) + chunk = ' '.join(words[i:end]) + chunks.append(chunk) + + if end == len(words): + break + + return chunks + + # Document indexing function + async def index_document_from_text( + text: str, + source: str, + table_name: str = "documents" + ) -> Dict[str, Any]: + try: + # Chunk the text + chunks = chunk_text(text, chunk_size=1000, overlap=100) + + # Convert to documents + documents = [ + { + "content": chunk, + "metadata": { + "source": source, + "chunk_index": i, + } + } + for i, chunk in enumerate(chunks) + ] + + # Index documents + await ai.index( + indexer=f"lancedb/{table_name}", + documents=documents, + options={"write_mode": "overwrite"} + ) + + return { + "success": True, + "documents_indexed": len(documents), + } + except Exception as error: + return { + "success": False, + "documents_indexed": 0, + "error": str(error), + } + + # RAG query function + async def perform_rag_query( + query: str, + table_name: str = "documents", + k: int = 3 + ) -> Tuple[str, List[str]]: + try: + # Retrieve relevant documents + docs = await ai.retrieve( + retriever=f"lancedb/{table_name}", + query=query, + options={"k": k} + ) + + # Build context and collect sources + context_parts = [doc["content"] for doc in docs] + context = "\n\n".join(context_parts) + + sources = [ + doc.get("metadata", {}).get("source", "unknown") + for doc in docs + ] + + # Generate answer + prompt = f""" + Answer the following question using only the provided context. + If you cannot answer based on the context, say so. + + Context: + {context} + + Question: {query} + """ + + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt=prompt + ) + + return response.text, list(set(sources)) # Remove duplicates + + except Exception as error: + print(f"RAG query failed: {error}") + return "I'm sorry, I couldn't process your query.", [] + + # Complete RAG workflow example + async def rag_workflow_example(): + # Index a document + sample_text = """ + LanceDB is an open-source vector database that provides embedded + vector storage capabilities. It's designed for AI applications + that need fast similarity search without managing a separate + database server. + """ + + index_result = await index_document_from_text( + text=sample_text, + source="lancedb_overview.txt", + table_name="knowledge_base" + ) + + if index_result["success"]: + # Query the indexed documents + answer, sources = await perform_rag_query( + query="What is LanceDB?", + table_name="knowledge_base" + ) + + return { + "answer": answer, + "sources": sources, + "indexed_documents": index_result["documents_indexed"] + } + else: + return {"error": "Failed to index documents"} + ``` + + + +## Best Practices + +### Performance Optimization + +1. **Batch operations**: Index documents in batches for better performance +2. **Appropriate chunk sizes**: Balance between context and retrieval precision +3. **Embedding model selection**: Choose models that match your use case +4. **Database location**: Use local storage for development, consider cloud storage for production + +### Data Management + +1. **Write modes**: Use `Overwrite` for complete rebuilds, `Append` for incremental updates +2. **Metadata design**: Structure metadata for effective filtering +3. **Version control**: Track document versions and updates +4. **Backup strategies**: Regular backups of the database directory + +### Production Deployment + +1. **Database persistence**: Ensure database directory is persistent in containerized environments +2. **Resource allocation**: Allocate sufficient memory for large datasets +3. **Monitoring**: Track query performance and database size +4. **Scaling**: Consider partitioning large datasets across multiple tables + +## Next Steps + +- Learn about [RAG implementation](/unified-docs/rag) to build complete retrieval-augmented generation systems +- Explore [creating flows](/unified-docs/creating-flows) to build structured AI workflows with vector search +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other vector database options for different use cases diff --git a/src/content/docs/unified-docs/vector-databases/neo4j.mdx b/src/content/docs/unified-docs/vector-databases/neo4j.mdx new file mode 100644 index 00000000..39dca335 --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/neo4j.mdx @@ -0,0 +1,727 @@ +--- +title: Neo4j Graph Vector Database +description: Learn how to use Neo4j with Genkit across JavaScript, Go, and Python for graph-based vector storage, semantic search, and knowledge graph applications. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +Neo4j is a graph database that combines the power of graph relationships with vector search capabilities. It enables you to store documents as nodes with vector embeddings while maintaining rich relationships between entities, making it ideal for knowledge graphs, recommendation systems, and complex AI applications that need both semantic search and graph traversal. + +## Installation and Setup + + + + Install the Neo4j plugin: + + ```bash + npm install genkitx-neo4j + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { neo4j } from 'genkitx-neo4j'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [ + googleAI(), + neo4j([ + { + indexId: 'documents-index', + embedder: googleAI.embedder('gemini-embedding-001'), + clientParams: { + url: process.env.NEO4J_URI || 'bolt://localhost:7687', + username: process.env.NEO4J_USERNAME || 'neo4j', + password: process.env.NEO4J_PASSWORD, + database: process.env.NEO4J_DATABASE || 'neo4j', + }, + }, + ]), + ], + }); + ``` + + ### Environment Variables + + ```bash + export NEO4J_URI=bolt://localhost:7687 + export NEO4J_USERNAME=neo4j + export NEO4J_PASSWORD=your_password + export NEO4J_DATABASE=neo4j + ``` + + ### Prerequisites + + 1. **Neo4j Database**: Install Neo4j Desktop, Neo4j AuraDB, or run Neo4j in Docker + 2. **Vector Index**: Create a vector index in your Neo4j database + 3. **Credentials**: Configure authentication credentials + + ### Creating Vector Index in Neo4j + + ```cypher + // Create a vector index for document embeddings + CREATE VECTOR INDEX documents_vector_index + FOR (n:Document) + ON n.embedding + OPTIONS {indexConfig: { + `vector.dimensions`: 768, + `vector.similarity_function`: 'cosine' + }} + ``` + + + For Go applications, you can use Neo4j through the official Go driver: + + ```bash + go get github.com/neo4j/neo4j-go-driver/v5/neo4j + ``` + + ```go + package main + + import ( + "context" + "os" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/neo4j" + "github.com/firebase/genkit/go/plugins/googleai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &googleai.GoogleAI{}, + &neo4j.Neo4j{ + URI: os.Getenv("NEO4J_URI"), + Username: os.Getenv("NEO4J_USERNAME"), + Password: os.Getenv("NEO4J_PASSWORD"), + Database: os.Getenv("NEO4J_DATABASE"), + Indexes: []neo4j.IndexConfig{ + { + Name: "documents-index", + Embedder: "googleai/gemini-embedding-001", + }, + }, + }, + ), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + For Python applications, install the Neo4j driver: + + ```bash + pip install neo4j genkit-plugin-neo4j + ``` + + ```python + import os + from genkit.ai import Genkit + from genkit.plugins.neo4j import Neo4j + from genkit.plugins.google_genai import GoogleGenAI + + ai = Genkit( + plugins=[ + GoogleGenAI(), + Neo4j( + uri=os.getenv("NEO4J_URI", "bolt://localhost:7687"), + username=os.getenv("NEO4J_USERNAME", "neo4j"), + password=os.getenv("NEO4J_PASSWORD"), + database=os.getenv("NEO4J_DATABASE", "neo4j"), + indexes=[ + { + "name": "documents-index", + "embedder": "googleai/gemini-embedding-001", + } + ], + ), + ], + ) + ``` + + + +## Basic Usage + +### Document Indexing + + + + Index documents as graph nodes with vector embeddings: + + ```ts + import { neo4jIndexerRef } from 'genkitx-neo4j'; + import { Document } from 'genkit'; + + // Create indexer reference + const documentsIndexer = neo4jIndexerRef({ + indexId: 'documents-index', + }); + + // Prepare documents for indexing + const documents: Document[] = [ + { + content: 'Neo4j combines graph database capabilities with vector search.', + metadata: { + title: 'Neo4j Overview', + category: 'database', + author: 'Neo4j Team', + tags: ['graph', 'vector', 'database'], + nodeId: 'doc-1', + }, + }, + { + content: 'Knowledge graphs represent entities and their relationships.', + metadata: { + title: 'Knowledge Graphs', + category: 'technology', + author: 'AI Researcher', + tags: ['knowledge', 'graph', 'ai'], + nodeId: 'doc-2', + }, + }, + ]; + + // Index documents + await ai.index({ + indexer: documentsIndexer, + documents, + }); + + // Create relationships between documents + const createRelationships = async () => { + // This would typically be done through direct Neo4j queries + // after indexing to establish relationships between nodes + console.log('Documents indexed as graph nodes with embeddings'); + }; + ``` + + + Index documents as graph nodes with vector embeddings: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func indexDocuments(ctx context.Context) error { + documents := []ai.Document{ + { + Content: "Neo4j combines graph database capabilities with vector search.", + Metadata: map[string]interface{}{ + "title": "Neo4j Overview", + "category": "database", + "author": "Neo4j Team", + "tags": []string{"graph", "vector", "database"}, + "nodeId": "doc-1", + }, + }, + { + Content: "Knowledge graphs represent entities and their relationships.", + Metadata: map[string]interface{}{ + "title": "Knowledge Graphs", + "category": "technology", + "author": "AI Researcher", + "tags": []string{"knowledge", "graph", "ai"}, + "nodeId": "doc-2", + }, + }, + } + + // Index documents + err := genkit.Index(ctx, g, + ai.WithIndexer("neo4j/documents-index"), + ai.WithDocuments(documents), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + return nil + } + + // Create relationships between indexed documents + func createDocumentRelationships(ctx context.Context) error { + // This would typically involve direct Neo4j queries + // to establish relationships between document nodes + fmt.Println("Documents indexed as graph nodes with embeddings") + return nil + } + ``` + + + Index documents as graph nodes with vector embeddings: + + ```python + from typing import List, Dict, Any + + # Prepare documents for indexing + documents = [ + { + "content": "Neo4j combines graph database capabilities with vector search.", + "metadata": { + "title": "Neo4j Overview", + "category": "database", + "author": "Neo4j Team", + "tags": ["graph", "vector", "database"], + "node_id": "doc-1", + }, + }, + { + "content": "Knowledge graphs represent entities and their relationships.", + "metadata": { + "title": "Knowledge Graphs", + "category": "technology", + "author": "AI Researcher", + "tags": ["knowledge", "graph", "ai"], + "node_id": "doc-2", + }, + }, + ] + + # Index documents + async def index_documents(docs: List[Dict[str, Any]], index_name: str = "documents-index"): + try: + indexer = f"neo4j/{index_name}" + + await ai.index( + indexer=indexer, + documents=docs + ) + + return {"indexed": len(docs), "success": True} + except Exception as error: + print(f"Indexing failed: {error}") + return {"indexed": 0, "success": False} + + # Create relationships between documents + async def create_document_relationships(): + # This would typically involve direct Neo4j queries + # to establish relationships between document nodes + print("Documents indexed as graph nodes with embeddings") + return {"relationships_created": True} + ``` + + + +### Document Retrieval + + + + Retrieve relevant documents using vector similarity: + + ```ts + import { neo4jRetrieverRef } from 'genkitx-neo4j'; + + // Create retriever reference + const documentsRetriever = neo4jRetrieverRef({ + indexId: 'documents-index', + displayName: 'Knowledge Base', + }); + + // Basic retrieval + const query = "What is a knowledge graph?"; + const docs = await ai.retrieve({ + retriever: documentsRetriever, + query, + options: { + k: 5, // Number of documents to retrieve (max 1000) + }, + }); + + console.log('Retrieved documents:', docs); + + // Advanced retrieval with custom scoring + const advancedDocs = await ai.retrieve({ + retriever: documentsRetriever, + query: "graph database relationships", + options: { + k: 3, + // Additional Neo4j-specific options can be passed here + }, + }); + + // Retrieve with specific author filter (would require custom Cypher) + const authorSpecificRetriever = neo4jRetrieverRef({ + indexId: 'documents-index', + displayName: 'Author-Specific Search', + }); + ``` + + + Retrieve relevant documents using vector similarity: + + ```go + // Basic retrieval + func retrieveDocuments(ctx context.Context, query string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("neo4j/documents-index"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 5, + }), + ) + if err != nil { + return nil, fmt.Errorf("retrieval failed: %w", err) + } + + return docs, nil + } + + // Advanced retrieval with custom options + func advancedRetrieve(ctx context.Context, query string, limit int) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("neo4j/documents-index"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": limit, + // Additional Neo4j-specific options + }), + ) + if err != nil { + return nil, fmt.Errorf("advanced retrieval failed: %w", err) + } + + return docs, nil + } + + // Usage example + func searchKnowledgeBase(ctx context.Context) error { + // Basic search + docs, err := retrieveDocuments(ctx, "What is a knowledge graph?") + if err != nil { + return err + } + + // Advanced search + advancedDocs, err := advancedRetrieve(ctx, "graph database relationships", 3) + if err != nil { + return err + } + + fmt.Printf("Found %d documents, %d advanced results\n", len(docs), len(advancedDocs)) + return nil + } + ``` + + + Retrieve relevant documents using vector similarity: + + ```python + from typing import List, Dict, Any, Optional + + # Basic retrieval + async def retrieve_documents(query: str, index_name: str = "documents-index", k: int = 5) -> List[Dict[str, Any]]: + try: + retriever = f"neo4j/{index_name}" + docs = await ai.retrieve( + retriever=retriever, + query=query, + options={"k": k} + ) + return docs + except Exception as error: + print(f"Retrieval failed: {error}") + return [] + + # Advanced retrieval with custom options + async def advanced_retrieve( + query: str, + index_name: str = "documents-index", + k: int = 5, + custom_options: Optional[Dict[str, Any]] = None + ) -> List[Dict[str, Any]]: + try: + retriever = f"neo4j/{index_name}" + + options = {"k": k} + if custom_options: + options.update(custom_options) + + docs = await ai.retrieve( + retriever=retriever, + query=query, + options=options + ) + + return docs + except Exception as error: + print(f"Advanced retrieval failed: {error}") + return [] + + # Usage examples + async def search_knowledge_base(): + # Basic search + docs = await retrieve_documents("What is a knowledge graph?", k=5) + + # Advanced search + advanced_docs = await advanced_retrieve( + query="graph database relationships", + k=3, + custom_options={"include_metadata": True} + ) + + print(f"Found {len(docs)} documents, {len(advanced_docs)} advanced results") + return docs, advanced_docs + ``` + + + +## Advanced Features + +### Graph-Enhanced Retrieval + + + + Combine vector search with graph traversal for enhanced results: + + ```ts + // Custom retrieval that combines vector search with graph relationships + const graphEnhancedRetrieval = async (query: string) => { + // First, perform vector search + const vectorResults = await ai.retrieve({ + retriever: documentsRetriever, + query, + options: { k: 3 }, + }); + + // Then, use Neo4j driver directly for graph traversal + // This would require additional Neo4j driver setup + const enhancedResults = vectorResults.map(doc => ({ + ...doc, + relatedNodes: [], // Would be populated by graph traversal + })); + + return enhancedResults; + }; + + // Knowledge graph construction + const buildKnowledgeGraph = async (documents: Document[]) => { + // Index documents first + await ai.index({ + indexer: documentsIndexer, + documents, + }); + + // Create relationships based on content similarity or metadata + // This would involve direct Cypher queries to Neo4j + console.log('Knowledge graph constructed with vector-enabled nodes'); + }; + + // Entity relationship extraction + const extractEntityRelationships = async (text: string) => { + // Use AI to extract entities and relationships + const { text: entities } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Extract entities and relationships from: ${text} + + Format as JSON with entities and relationships arrays.`, + }); + + return JSON.parse(entities); + }; + ``` + + + Combine vector search with graph traversal for enhanced results: + + ```go + import ( + "encoding/json" + "fmt" + "github.com/neo4j/neo4j-go-driver/v5/neo4j" + ) + + // Graph-enhanced retrieval combining vector search with graph traversal + func graphEnhancedRetrieval(ctx context.Context, query string) ([]map[string]interface{}, error) { + // First, perform vector search + vectorResults, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("neo4j/documents-index"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{"k": 3}), + ) + if err != nil { + return nil, fmt.Errorf("vector search failed: %w", err) + } + + // Enhance with graph traversal (requires Neo4j driver) + enhancedResults := make([]map[string]interface{}, len(vectorResults)) + for i, doc := range vectorResults { + enhancedResults[i] = map[string]interface{}{ + "document": doc, + "relatedNodes": []interface{}{}, // Would be populated by graph traversal + } + } + + return enhancedResults, nil + } + + // Knowledge graph construction + func buildKnowledgeGraph(ctx context.Context, documents []ai.Document) error { + // Index documents first + err := genkit.Index(ctx, g, + ai.WithIndexer("neo4j/documents-index"), + ai.WithDocuments(documents), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + // Create relationships based on content similarity or metadata + // This would involve direct Cypher queries to Neo4j + fmt.Println("Knowledge graph constructed with vector-enabled nodes") + return nil + } + + // Entity relationship extraction + func extractEntityRelationships(ctx context.Context, text string) (map[string]interface{}, error) { + prompt := fmt.Sprintf(`Extract entities and relationships from: %s + + Format as JSON with entities and relationships arrays.`, text) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("googleai/gemini-2.5-flash"), + ai.WithPrompt(prompt), + ) + if err != nil { + return nil, fmt.Errorf("entity extraction failed: %w", err) + } + + var result map[string]interface{} + err = json.Unmarshal([]byte(resp.Text()), &result) + if err != nil { + return nil, fmt.Errorf("failed to parse entities: %w", err) + } + + return result, nil + } + ``` + + + Combine vector search with graph traversal for enhanced results: + + ```python + import json + from typing import List, Dict, Any + + # Graph-enhanced retrieval combining vector search with graph traversal + async def graph_enhanced_retrieval(query: str) -> List[Dict[str, Any]]: + try: + # First, perform vector search + vector_results = await retrieve_documents(query, k=3) + + # Enhance with graph traversal (would require Neo4j driver) + enhanced_results = [] + for doc in vector_results: + enhanced_doc = { + "document": doc, + "related_nodes": [], # Would be populated by graph traversal + } + enhanced_results.append(enhanced_doc) + + return enhanced_results + except Exception as error: + print(f"Graph-enhanced retrieval failed: {error}") + return [] + + # Knowledge graph construction + async def build_knowledge_graph(documents: List[Dict[str, Any]]) -> Dict[str, Any]: + try: + # Index documents first + index_result = await index_documents(documents) + + if index_result["success"]: + # Create relationships based on content similarity or metadata + # This would involve direct Cypher queries to Neo4j + print("Knowledge graph constructed with vector-enabled nodes") + return {"graph_built": True, "nodes": len(documents)} + else: + return {"graph_built": False, "error": "Failed to index documents"} + except Exception as error: + print(f"Knowledge graph construction failed: {error}") + return {"graph_built": False, "error": str(error)} + + # Entity relationship extraction + async def extract_entity_relationships(text: str) -> Dict[str, Any]: + try: + prompt = f"""Extract entities and relationships from: {text} + + Format as JSON with entities and relationships arrays.""" + + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt=prompt + ) + + entities = json.loads(response.text) + return entities + except Exception as error: + print(f"Entity extraction failed: {error}") + return {"entities": [], "relationships": []} + + # Comprehensive knowledge graph workflow + async def knowledge_graph_workflow(documents: List[Dict[str, Any]], query: str): + # Build knowledge graph + graph_result = await build_knowledge_graph(documents) + + # Extract entities from query + query_entities = await extract_entity_relationships(query) + + # Perform graph-enhanced retrieval + enhanced_results = await graph_enhanced_retrieval(query) + + return { + "graph_construction": graph_result, + "query_entities": query_entities, + "enhanced_results": enhanced_results + } + ``` + + + +## Best Practices + +### Graph Design + +1. **Node Structure**: Design nodes with meaningful labels and properties +2. **Relationship Types**: Use descriptive relationship types for better traversal +3. **Index Strategy**: Create appropriate vector and property indexes +4. **Schema Design**: Plan your graph schema for optimal query performance + +### Vector Integration + +1. **Embedding Dimensions**: Match vector dimensions to your embedding model +2. **Similarity Functions**: Choose appropriate similarity functions (cosine, euclidean) +3. **Index Configuration**: Optimize vector index settings for your use case +4. **Hybrid Queries**: Combine vector search with graph traversal effectively + +### Performance Optimization + +1. **Query Optimization**: Use efficient Cypher queries for graph operations +2. **Index Management**: Maintain both vector and property indexes +3. **Connection Pooling**: Use connection pooling for better performance +4. **Memory Management**: Monitor memory usage for large graphs + +### Production Deployment + +1. **Clustering**: Use Neo4j clustering for high availability +2. **Backup Strategies**: Implement regular backup procedures +3. **Monitoring**: Monitor query performance and graph metrics +4. **Security**: Implement proper authentication and authorization + +## Next Steps + +- Learn about [RAG implementation](/unified-docs/rag) to build complete retrieval-augmented generation systems +- Explore [creating flows](/unified-docs/creating-flows) to build structured AI workflows with graph-enhanced search +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other vector database options for different use cases diff --git a/src/content/docs/unified-docs/vector-databases/pgvector.mdx b/src/content/docs/unified-docs/vector-databases/pgvector.mdx new file mode 100644 index 00000000..4165458a --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/pgvector.mdx @@ -0,0 +1,939 @@ +--- +title: pgvector (PostgreSQL Vector Extension) +description: Learn how to use pgvector with Genkit across JavaScript, Go, and Python for vector storage, semantic search, and RAG applications using PostgreSQL. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +pgvector is a PostgreSQL extension that adds vector similarity search capabilities to PostgreSQL databases. It provides efficient storage and querying of high-dimensional vectors, making it ideal for AI applications that need both relational and vector data in a single database. + +## Installation and Setup + +### PostgreSQL with pgvector Extension + + + + Install the required dependencies: + + ```bash + npm install postgres pgvector + ``` + + Set up your PostgreSQL database with pgvector: + + ```sql + -- Enable the pgvector extension + CREATE EXTENSION IF NOT EXISTS vector; + + -- Create a table for storing documents with embeddings + CREATE TABLE documents ( + id SERIAL PRIMARY KEY, + content TEXT NOT NULL, + embedding vector(768), -- Adjust dimension based on your embedding model + metadata JSONB, + created_at TIMESTAMP DEFAULT NOW() + ); + + -- Create an index for efficient vector similarity search + CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); + ``` + + Configure the database connection: + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import postgres from 'postgres'; + import { toSql } from 'pgvector'; + + const ai = genkit({ + plugins: [googleAI()], + }); + + const sql = postgres({ + host: 'localhost', + port: 5432, + database: 'your_database', + username: 'your_username', + password: 'your_password', + ssl: false, // Enable for production + }); + ``` + + + Install the required dependencies: + + ```bash + go get github.com/lib/pq + go get github.com/pgvector/pgvector-go + ``` + + Set up your PostgreSQL database with pgvector: + + ```sql + -- Enable the pgvector extension + CREATE EXTENSION IF NOT EXISTS vector; + + -- Create a table for storing documents with embeddings + CREATE TABLE documents ( + id SERIAL PRIMARY KEY, + content TEXT NOT NULL, + embedding vector(768), -- Adjust dimension based on your embedding model + metadata JSONB, + created_at TIMESTAMP DEFAULT NOW() + ); + + -- Create an index for efficient vector similarity search + CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); + ``` + + Configure the database connection: + + ```go + package main + + import ( + "database/sql" + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googleai" + "github.com/lib/pq" + "github.com/pgvector/pgvector-go" + ) + + func main() { + ctx := context.Background() + + // Initialize Genkit + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googleai.GoogleAI{}), + ) + if err != nil { + log.Fatal(err) + } + + // Connect to PostgreSQL + db, err := sql.Open("postgres", "postgres://username:password@localhost/dbname?sslmode=disable") + if err != nil { + log.Fatal(err) + } + defer db.Close() + } + ``` + + + Install the required dependencies: + + ```bash + pip install psycopg2-binary pgvector + ``` + + Set up your PostgreSQL database with pgvector: + + ```sql + -- Enable the pgvector extension + CREATE EXTENSION IF NOT EXISTS vector; + + -- Create a table for storing documents with embeddings + CREATE TABLE documents ( + id SERIAL PRIMARY KEY, + content TEXT NOT NULL, + embedding vector(768), -- Adjust dimension based on your embedding model + metadata JSONB, + created_at TIMESTAMP DEFAULT NOW() + ); + + -- Create an index for efficient vector similarity search + CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); + ``` + + Configure the database connection: + + ```python + import psycopg2 + from pgvector.psycopg2 import register_vector + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenAI + + # Initialize Genkit + ai = Genkit( + plugins=[GoogleGenAI()], + ) + + # Connect to PostgreSQL + conn = psycopg2.connect( + host="localhost", + database="your_database", + user="your_username", + password="your_password" + ) + + # Register pgvector types + register_vector(conn) + ``` + + + +## Basic Usage + +### Document Indexing + + + + Create a custom indexer for pgvector: + + ```ts + import { Document } from 'genkit'; + + const pgvectorIndexer = ai.defineIndexer( + { + name: 'pgvector-indexer', + configSchema: z.object({ + tableName: z.string().optional().default('documents'), + }), + }, + async (docs: Document[], options) => { + const tableName = options.tableName || 'documents'; + + for (const doc of docs) { + // Generate embedding for the document + const embedding = await ai.embed({ + embedder: googleAI.embedder('gemini-embedding-001'), + content: doc.content, + }); + + // Insert document with embedding into PostgreSQL + await sql` + INSERT INTO ${sql(tableName)} (content, embedding, metadata) + VALUES ( + ${doc.content}, + ${toSql(embedding[0].embedding)}, + ${JSON.stringify(doc.metadata || {})} + ) + `; + } + }, + ); + + // Usage + const documents: Document[] = [ + { + content: 'PostgreSQL is a powerful relational database with vector capabilities.', + metadata: { + title: 'PostgreSQL Overview', + category: 'database', + source: 'documentation', + }, + }, + { + content: 'pgvector extends PostgreSQL with efficient vector similarity search.', + metadata: { + title: 'pgvector Extension', + category: 'technology', + source: 'blog', + }, + }, + ]; + + await ai.index({ + indexer: pgvectorIndexer, + documents, + }); + ``` + + + Create a custom indexer for pgvector: + + ```go + import ( + "context" + "database/sql" + "encoding/json" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/pgvector/pgvector-go" + ) + + func createPgvectorIndexer(db *sql.DB) ai.Indexer { + return genkit.DefineIndexer(g, "pgvector-indexer", + func(ctx context.Context, docs []ai.Document) error { + for _, doc := range docs { + // Generate embedding + embedding, err := genkit.Embed(ctx, g, + ai.WithEmbedder("googleai/gemini-embedding-001"), + ai.WithContent(doc.Content), + ) + if err != nil { + return fmt.Errorf("failed to generate embedding: %w", err) + } + + // Convert metadata to JSON + metadataJSON, err := json.Marshal(doc.Metadata) + if err != nil { + return fmt.Errorf("failed to marshal metadata: %w", err) + } + + // Insert into PostgreSQL + _, err = db.ExecContext(ctx, ` + INSERT INTO documents (content, embedding, metadata) + VALUES ($1, $2, $3) + `, doc.Content, pgvector.NewVector(embedding[0].Embedding), metadataJSON) + + if err != nil { + return fmt.Errorf("failed to insert document: %w", err) + } + } + return nil + }, + ) + } + + // Usage + func indexDocuments(ctx context.Context, db *sql.DB) error { + indexer := createPgvectorIndexer(db) + + documents := []ai.Document{ + { + Content: "PostgreSQL is a powerful relational database with vector capabilities.", + Metadata: map[string]interface{}{ + "title": "PostgreSQL Overview", + "category": "database", + "source": "documentation", + }, + }, + { + Content: "pgvector extends PostgreSQL with efficient vector similarity search.", + Metadata: map[string]interface{}{ + "title": "pgvector Extension", + "category": "technology", + "source": "blog", + }, + }, + } + + return genkit.Index(ctx, g, + ai.WithIndexer(indexer), + ai.WithDocuments(documents), + ) + } + ``` + + + Create a custom indexer for pgvector: + + ```python + import json + from typing import List, Dict, Any + from pgvector.psycopg2 import register_vector + + async def create_pgvector_indexer(conn): + """Create a custom pgvector indexer""" + + async def index_documents(docs: List[Dict[str, Any]], table_name: str = "documents"): + cursor = conn.cursor() + + try: + for doc in docs: + # Generate embedding + embedding_response = await ai.embed( + embedder="googleai/gemini-embedding-001", + content=doc["content"] + ) + embedding = embedding_response[0]["embedding"] + + # Insert document with embedding + cursor.execute(""" + INSERT INTO %s (content, embedding, metadata) + VALUES (%%s, %%s, %%s) + """ % table_name, ( + doc["content"], + embedding, + json.dumps(doc.get("metadata", {})) + )) + + conn.commit() + return {"indexed": len(docs), "success": True} + + except Exception as error: + conn.rollback() + print(f"Indexing failed: {error}") + return {"indexed": 0, "success": False} + finally: + cursor.close() + + return index_documents + + # Usage + async def index_documents_example(): + indexer = await create_pgvector_indexer(conn) + + documents = [ + { + "content": "PostgreSQL is a powerful relational database with vector capabilities.", + "metadata": { + "title": "PostgreSQL Overview", + "category": "database", + "source": "documentation", + }, + }, + { + "content": "pgvector extends PostgreSQL with efficient vector similarity search.", + "metadata": { + "title": "pgvector Extension", + "category": "technology", + "source": "blog", + }, + }, + ] + + result = await indexer(documents) + return result + ``` + + + +### Document Retrieval + + + + Create a custom retriever for pgvector: + + ```ts + const pgvectorRetriever = ai.defineRetriever( + { + name: 'pgvector-retriever', + configSchema: z.object({ + tableName: z.string().optional().default('documents'), + k: z.number().optional().default(5), + threshold: z.number().optional(), + where: z.record(z.any()).optional(), + }), + }, + async (query: string, options) => { + const { tableName = 'documents', k = 5, threshold, where } = options; + + // Generate embedding for the query + const embedding = await ai.embed({ + embedder: googleAI.embedder('gemini-embedding-001'), + content: query, + }); + + // Build WHERE clause for metadata filtering + let whereClause = ''; + let whereParams: any[] = []; + if (where) { + const conditions = Object.entries(where).map(([key, value], index) => { + whereParams.push(value); + return `metadata->>'${key}' = $${whereParams.length + 2}`; + }); + whereClause = conditions.length > 0 ? `AND ${conditions.join(' AND ')}` : ''; + } + + // Query similar documents + const queryText = ` + SELECT content, metadata, 1 - (embedding <=> $1) as similarity + FROM ${tableName} + WHERE 1=1 ${whereClause} + ${threshold ? `AND 1 - (embedding <=> $1) >= $${whereParams.length + 2}` : ''} + ORDER BY embedding <=> $1 + LIMIT $2 + `; + + const params = [toSql(embedding[0].embedding), k, ...whereParams]; + if (threshold) params.push(threshold); + + const results = await sql.unsafe(queryText, params); + + return { + documents: results.map((row: any) => ({ + content: row.content, + metadata: { + ...row.metadata, + similarity: row.similarity, + }, + })), + }; + }, + ); + + // Usage + const docs = await ai.retrieve({ + retriever: pgvectorRetriever, + query: "What is PostgreSQL?", + options: { + k: 3, + threshold: 0.7, + where: { category: 'database' }, + }, + }); + ``` + + + Create a custom retriever for pgvector: + + ```go + func createPgvectorRetriever(db *sql.DB) ai.Retriever { + return genkit.DefineRetriever(g, "pgvector-retriever", + func(ctx context.Context, query string, options map[string]interface{}) ([]ai.Document, error) { + // Generate embedding for query + embedding, err := genkit.Embed(ctx, g, + ai.WithEmbedder("googleai/gemini-embedding-001"), + ai.WithContent(query), + ) + if err != nil { + return nil, fmt.Errorf("failed to generate query embedding: %w", err) + } + + // Extract options + k := 5 + if kVal, ok := options["k"].(int); ok { + k = kVal + } + + tableName := "documents" + if tableVal, ok := options["tableName"].(string); ok { + tableName = tableVal + } + + // Query similar documents + queryText := fmt.Sprintf(` + SELECT content, metadata, 1 - (embedding <=> $1) as similarity + FROM %s + ORDER BY embedding <=> $1 + LIMIT $2 + `, tableName) + + rows, err := db.QueryContext(ctx, queryText, + pgvector.NewVector(embedding[0].Embedding), k) + if err != nil { + return nil, fmt.Errorf("failed to query documents: %w", err) + } + defer rows.Close() + + var documents []ai.Document + for rows.Next() { + var content string + var metadataJSON []byte + var similarity float64 + + err := rows.Scan(&content, &metadataJSON, &similarity) + if err != nil { + return nil, fmt.Errorf("failed to scan row: %w", err) + } + + var metadata map[string]interface{} + if err := json.Unmarshal(metadataJSON, &metadata); err != nil { + metadata = make(map[string]interface{}) + } + metadata["similarity"] = similarity + + documents = append(documents, ai.Document{ + Content: content, + Metadata: metadata, + }) + } + + return documents, nil + }, + ) + } + + // Usage + func retrieveDocuments(ctx context.Context, db *sql.DB, query string) ([]ai.Document, error) { + retriever := createPgvectorRetriever(db) + + return genkit.Retrieve(ctx, g, + ai.WithRetriever(retriever), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 3, + "tableName": "documents", + }), + ) + } + ``` + + + Create a custom retriever for pgvector: + + ```python + async def create_pgvector_retriever(conn): + """Create a custom pgvector retriever""" + + async def retrieve_documents( + query: str, + table_name: str = "documents", + k: int = 5, + threshold: float = None, + where: Dict[str, Any] = None + ) -> List[Dict[str, Any]]: + cursor = conn.cursor() + + try: + # Generate embedding for query + embedding_response = await ai.embed( + embedder="googleai/gemini-embedding-001", + content=query + ) + embedding = embedding_response[0]["embedding"] + + # Build WHERE clause for metadata filtering + where_clause = "" + params = [embedding, k] + + if where: + conditions = [] + for key, value in where.items(): + conditions.append(f"metadata->>%s = %s") + params.extend([key, value]) + if conditions: + where_clause = f"AND {' AND '.join(conditions)}" + + if threshold: + where_clause += f" AND 1 - (embedding <=> %s) >= %s" + params.extend([embedding, threshold]) + + # Query similar documents + query_text = f""" + SELECT content, metadata, 1 - (embedding <=> %s) as similarity + FROM {table_name} + WHERE 1=1 {where_clause} + ORDER BY embedding <=> %s + LIMIT %s + """ + + cursor.execute(query_text, params) + results = cursor.fetchall() + + documents = [] + for row in results: + content, metadata, similarity = row + if isinstance(metadata, str): + metadata = json.loads(metadata) + metadata["similarity"] = similarity + + documents.append({ + "content": content, + "metadata": metadata, + }) + + return documents + + except Exception as error: + print(f"Retrieval failed: {error}") + return [] + finally: + cursor.close() + + return retrieve_documents + + # Usage + async def retrieve_documents_example(): + retriever = await create_pgvector_retriever(conn) + + docs = await retriever( + query="What is PostgreSQL?", + k=3, + threshold=0.7, + where={"category": "database"} + ) + + return docs + ``` + + + +## Advanced Features + +### Hybrid Search (Vector + Text) + + + + Combine vector similarity with traditional text search: + + ```ts + const hybridRetriever = ai.defineRetriever( + { + name: 'pgvector-hybrid-retriever', + configSchema: z.object({ + tableName: z.string().optional().default('documents'), + k: z.number().optional().default(5), + vectorWeight: z.number().optional().default(0.7), + textWeight: z.number().optional().default(0.3), + }), + }, + async (query: string, options) => { + const { tableName = 'documents', k = 5, vectorWeight = 0.7, textWeight = 0.3 } = options; + + // Generate embedding for vector search + const embedding = await ai.embed({ + embedder: googleAI.embedder('gemini-embedding-001'), + content: query, + }); + + // Hybrid search combining vector similarity and text search + const results = await sql` + WITH vector_search AS ( + SELECT + content, + metadata, + 1 - (embedding <=> ${toSql(embedding[0].embedding)}) as vector_score + FROM ${sql(tableName)} + ), + text_search AS ( + SELECT + content, + metadata, + ts_rank(to_tsvector('english', content), plainto_tsquery('english', ${query})) as text_score + FROM ${sql(tableName)} + WHERE to_tsvector('english', content) @@ plainto_tsquery('english', ${query}) + ) + SELECT + v.content, + v.metadata, + (${vectorWeight} * COALESCE(v.vector_score, 0) + ${textWeight} * COALESCE(t.text_score, 0)) as combined_score + FROM vector_search v + FULL OUTER JOIN text_search t ON v.content = t.content + ORDER BY combined_score DESC + LIMIT ${k} + `; + + return { + documents: results.map((row: any) => ({ + content: row.content, + metadata: { + ...row.metadata, + combined_score: row.combined_score, + }, + })), + }; + }, + ); + ``` + + + Combine vector similarity with traditional text search: + + ```go + func createHybridRetriever(db *sql.DB) ai.Retriever { + return genkit.DefineRetriever(g, "pgvector-hybrid-retriever", + func(ctx context.Context, query string, options map[string]interface{}) ([]ai.Document, error) { + // Generate embedding + embedding, err := genkit.Embed(ctx, g, + ai.WithEmbedder("googleai/gemini-embedding-001"), + ai.WithContent(query), + ) + if err != nil { + return nil, fmt.Errorf("failed to generate embedding: %w", err) + } + + // Extract options + k := 5 + vectorWeight := 0.7 + textWeight := 0.3 + tableName := "documents" + + if kVal, ok := options["k"].(int); ok { + k = kVal + } + if vwVal, ok := options["vectorWeight"].(float64); ok { + vectorWeight = vwVal + } + if twVal, ok := options["textWeight"].(float64); ok { + textWeight = twVal + } + if tnVal, ok := options["tableName"].(string); ok { + tableName = tnVal + } + + // Hybrid search query + queryText := fmt.Sprintf(` + WITH vector_search AS ( + SELECT + content, + metadata, + 1 - (embedding <=> $1) as vector_score + FROM %s + ), + text_search AS ( + SELECT + content, + metadata, + ts_rank(to_tsvector('english', content), plainto_tsquery('english', $2)) as text_score + FROM %s + WHERE to_tsvector('english', content) @@ plainto_tsquery('english', $2) + ) + SELECT + v.content, + v.metadata, + ($3 * COALESCE(v.vector_score, 0) + $4 * COALESCE(t.text_score, 0)) as combined_score + FROM vector_search v + FULL OUTER JOIN text_search t ON v.content = t.content + ORDER BY combined_score DESC + LIMIT $5 + `, tableName, tableName) + + rows, err := db.QueryContext(ctx, queryText, + pgvector.NewVector(embedding[0].Embedding), + query, + vectorWeight, + textWeight, + k, + ) + if err != nil { + return nil, fmt.Errorf("hybrid search failed: %w", err) + } + defer rows.Close() + + var documents []ai.Document + for rows.Next() { + var content string + var metadataJSON []byte + var combinedScore float64 + + err := rows.Scan(&content, &metadataJSON, &combinedScore) + if err != nil { + return nil, fmt.Errorf("failed to scan row: %w", err) + } + + var metadata map[string]interface{} + if err := json.Unmarshal(metadataJSON, &metadata); err != nil { + metadata = make(map[string]interface{}) + } + metadata["combined_score"] = combinedScore + + documents = append(documents, ai.Document{ + Content: content, + Metadata: metadata, + }) + } + + return documents, nil + }, + ) + } + ``` + + + Combine vector similarity with traditional text search: + + ```python + async def create_hybrid_retriever(conn): + """Create a hybrid retriever combining vector and text search""" + + async def hybrid_search( + query: str, + table_name: str = "documents", + k: int = 5, + vector_weight: float = 0.7, + text_weight: float = 0.3 + ) -> List[Dict[str, Any]]: + cursor = conn.cursor() + + try: + # Generate embedding + embedding_response = await ai.embed( + embedder="googleai/gemini-embedding-001", + content=query + ) + embedding = embedding_response[0]["embedding"] + + # Hybrid search query + query_text = f""" + WITH vector_search AS ( + SELECT + content, + metadata, + 1 - (embedding <=> %s) as vector_score + FROM {table_name} + ), + text_search AS ( + SELECT + content, + metadata, + ts_rank(to_tsvector('english', content), plainto_tsquery('english', %s)) as text_score + FROM {table_name} + WHERE to_tsvector('english', content) @@ plainto_tsquery('english', %s) + ) + SELECT + v.content, + v.metadata, + (%s * COALESCE(v.vector_score, 0) + %s * COALESCE(t.text_score, 0)) as combined_score + FROM vector_search v + FULL OUTER JOIN text_search t ON v.content = t.content + ORDER BY combined_score DESC + LIMIT %s + """ + + cursor.execute(query_text, [ + embedding, query, query, vector_weight, text_weight, k + ]) + results = cursor.fetchall() + + documents = [] + for row in results: + content, metadata, combined_score = row + if isinstance(metadata, str): + metadata = json.loads(metadata) + metadata["combined_score"] = combined_score + + documents.append({ + "content": content, + "metadata": metadata, + }) + + return documents + + except Exception as error: + print(f"Hybrid search failed: {error}") + return [] + finally: + cursor.close() + + return hybrid_search + ``` + + + +## Best Practices + +### Database Optimization + +1. **Choose the right index type**: + - `ivfflat`: Good for most use cases, faster builds + - `hnsw`: Better recall, slower builds + +2. **Optimize index parameters**: + ```sql + -- For ivfflat + CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); + + -- For hnsw (PostgreSQL 14+) + CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64); + ``` + +3. **Use appropriate vector dimensions**: Match your embedding model's output dimensions + +4. **Consider partitioning**: For large datasets, partition tables by metadata + +### Performance Optimization + +1. **Batch operations**: Insert/update documents in batches +2. **Connection pooling**: Use connection pools for production applications +3. **Vacuum regularly**: Keep statistics updated for optimal query planning +4. **Monitor query performance**: Use `EXPLAIN ANALYZE` to optimize queries + +### Production Deployment + +1. **Use managed PostgreSQL**: Consider cloud providers with pgvector support +2. **Set up replication**: Configure read replicas for scaling reads +3. **Backup strategies**: Regular backups including vector data +4. **Monitoring**: Track query performance and index usage + +## Next Steps + +- Learn about [RAG implementation](/unified-docs/rag) to build complete retrieval-augmented generation systems +- Explore [creating flows](/unified-docs/creating-flows) to build structured AI workflows with vector search +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other vector database options for different use cases diff --git a/src/content/docs/unified-docs/vector-databases/pinecone.mdx b/src/content/docs/unified-docs/vector-databases/pinecone.mdx new file mode 100644 index 00000000..21a18cc7 --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/pinecone.mdx @@ -0,0 +1,613 @@ +--- +title: Pinecone Vector Database +description: Learn how to use Pinecone cloud vector database with Genkit across JavaScript, Go, and Python for RAG applications, semantic search, and vector storage. +--- + +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; + +Pinecone is a fully managed cloud vector database that provides high-performance vector search capabilities. The Pinecone integration with Genkit enables you to build powerful RAG (Retrieval-Augmented Generation) applications with semantic search, document indexing, and intelligent retrieval. + +## Installation and Setup + + + + Install the Pinecone plugin: + + ```bash + npm install genkitx-pinecone + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { pinecone } from 'genkitx-pinecone'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [ + pinecone([ + { + indexId: 'my-knowledge-base', + embedder: googleAI.embedder('gemini-embedding-001'), + }, + ]), + ], + }); + ``` + + ### API Key Configuration + + Set your Pinecone API key using one of these methods: + + ```bash + # Environment variable (recommended) + export PINECONE_API_KEY=your_pinecone_api_key + ``` + + ```ts + // Or pass directly to plugin configuration + pinecone([ + { + indexId: 'my-knowledge-base', + embedder: googleAI.embedder('gemini-embedding-001'), + clientParams: { + apiKey: 'your_pinecone_api_key', + }, + }, + ]) + ``` + + Get your API key from [Pinecone Console](https://app.pinecone.io/). + + + For Go applications, use the Pinecone Go client: + + ```bash + go get github.com/pinecone-io/go-pinecone/pinecone + ``` + + ```go + package main + + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/pinecone" + "github.com/firebase/genkit/go/plugins/googleai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &googleai.GoogleAI{}, + &pinecone.Pinecone{ + APIKey: os.Getenv("PINECONE_API_KEY"), + Indexes: []pinecone.IndexConfig{ + { + IndexID: "my-knowledge-base", + Embedder: "googleai/gemini-embedding-001", + }, + }, + }, + ), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + ### Environment Configuration + + ```bash + export PINECONE_API_KEY=your_pinecone_api_key + ``` + + + For Python applications, install the Pinecone client: + + ```bash + pip install pinecone-client genkit-plugin-pinecone + ``` + + ```python + from genkit.ai import Genkit + from genkit.plugins.pinecone import Pinecone + from genkit.plugins.google_genai import GoogleGenAI + + ai = Genkit( + plugins=[ + GoogleGenAI(), + Pinecone( + api_key=os.getenv("PINECONE_API_KEY"), + indexes=[ + { + "index_id": "my-knowledge-base", + "embedder": "googleai/gemini-embedding-001", + } + ], + ), + ], + ) + ``` + + ### Environment Configuration + + ```bash + export PINECONE_API_KEY=your_pinecone_api_key + ``` + + + +## Basic Usage + +### Document Indexing + + + + Index documents for semantic search: + + ```ts + import { pineconeIndexerRef } from 'genkitx-pinecone'; + import { Document } from 'genkit'; + + // Prepare documents for indexing + const documents: Document[] = [ + { + content: 'Artificial Intelligence is transforming how we work and live.', + metadata: { + title: 'AI Overview', + category: 'technology', + source: 'blog', + }, + }, + { + content: 'Machine learning algorithms can identify patterns in large datasets.', + metadata: { + title: 'ML Patterns', + category: 'data-science', + source: 'research', + }, + }, + ]; + + // Index documents using the default configured index + await ai.index({ + indexer: pineconeIndexerRef, + documents, + }); + + // Or specify a specific index + const knowledgeBaseIndexer = pineconeIndexerRef({ + indexId: 'my-knowledge-base', + }); + + await ai.index({ + indexer: knowledgeBaseIndexer, + documents, + }); + ``` + + + Index documents for semantic search: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func indexDocuments(ctx context.Context) error { + documents := []ai.Document{ + { + Content: "Artificial Intelligence is transforming how we work and live.", + Metadata: map[string]interface{}{ + "title": "AI Overview", + "category": "technology", + "source": "blog", + }, + }, + { + Content: "Machine learning algorithms can identify patterns in large datasets.", + Metadata: map[string]interface{}{ + "title": "ML Patterns", + "category": "data-science", + "source": "research", + }, + }, + } + + // Index documents + err := genkit.Index(ctx, g, + ai.WithIndexer("pinecone/my-knowledge-base"), + ai.WithDocuments(documents), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + return nil + } + ``` + + + Index documents for semantic search: + + ```python + from typing import List, Dict, Any + + # Prepare documents for indexing + documents = [ + { + "content": "Artificial Intelligence is transforming how we work and live.", + "metadata": { + "title": "AI Overview", + "category": "technology", + "source": "blog", + }, + }, + { + "content": "Machine learning algorithms can identify patterns in large datasets.", + "metadata": { + "title": "ML Patterns", + "category": "data-science", + "source": "research", + }, + }, + ] + + # Index documents + async def index_documents(docs: List[Dict[str, Any]], index_id: str = None): + try: + indexer = f"pinecone/{index_id}" if index_id else "pinecone/my-knowledge-base" + + await ai.index( + indexer=indexer, + documents=docs + ) + + return {"indexed": len(docs), "success": True} + except Exception as error: + print(f"Indexing failed: {error}") + return {"indexed": 0, "success": False} + ``` + + + +### Document Retrieval + + + + Retrieve relevant documents using semantic search: + + ```ts + import { pineconeRetrieverRef } from 'genkitx-pinecone'; + + // Basic retrieval + const query = "How does machine learning work?"; + const docs = await ai.retrieve({ + retriever: pineconeRetrieverRef, + query, + }); + + console.log('Retrieved documents:', docs); + + // Advanced retrieval with specific index and options + const knowledgeBaseRetriever = pineconeRetrieverRef({ + indexId: 'my-knowledge-base', + }); + + const advancedDocs = await ai.retrieve({ + retriever: knowledgeBaseRetriever, + query, + options: { + k: 5, // Number of documents to retrieve + filter: { + category: 'technology', // Metadata filtering + }, + }, + }); + ``` + + + Retrieve relevant documents using semantic search: + + ```go + // Basic retrieval + func retrieveDocuments(ctx context.Context, query string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("pinecone/my-knowledge-base"), + ai.WithQuery(query), + ) + if err != nil { + return nil, fmt.Errorf("retrieval failed: %w", err) + } + + return docs, nil + } + + // Advanced retrieval with options + func advancedRetrieve(ctx context.Context, query, indexID string, limit int, filter map[string]interface{}) ([]ai.Document, error) { + retriever := fmt.Sprintf("pinecone/%s", indexID) + + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever(retriever), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": limit, + "filter": filter, + }), + ) + if err != nil { + return nil, fmt.Errorf("advanced retrieval failed: %w", err) + } + + return docs, nil + } + ``` + + + Retrieve relevant documents using semantic search: + + ```python + from typing import List, Dict, Any, Optional + + # Basic retrieval + async def retrieve_documents(query: str, index_id: str = "my-knowledge-base") -> List[Dict[str, Any]]: + try: + retriever = f"pinecone/{index_id}" + docs = await ai.retrieve( + retriever=retriever, + query=query + ) + return docs + except Exception as error: + print(f"Retrieval failed: {error}") + return [] + + # Advanced retrieval with options + async def advanced_retrieve( + query: str, + index_id: str = "my-knowledge-base", + limit: int = 5, + filter_criteria: Optional[Dict[str, Any]] = None + ) -> List[Dict[str, Any]]: + try: + retriever = f"pinecone/{index_id}" + + options = {"k": limit} + if filter_criteria: + options["filter"] = filter_criteria + + docs = await ai.retrieve( + retriever=retriever, + query=query, + options=options + ) + + return docs + except Exception as error: + print(f"Advanced retrieval failed: {error}") + return [] + ``` + + + +## RAG Implementation + + + + Build a complete RAG system with Pinecone: + + ```ts + // RAG flow with Pinecone retrieval + export const ragFlow = ai.defineFlow( + { + name: 'ragFlow', + inputSchema: z.object({ + question: z.string(), + indexId: z.string().optional(), + maxResults: z.number().optional(), + }), + outputSchema: z.object({ + answer: z.string(), + sources: z.array(z.object({ + content: z.string(), + metadata: z.record(z.any()), + score: z.number(), + })), + }), + }, + async ({ question, indexId, maxResults = 3 }) => { + // Retrieve relevant documents + const retriever = indexId + ? pineconeRetrieverRef({ indexId }) + : pineconeRetrieverRef; + + const docs = await ai.retrieve({ + retriever, + query: question, + options: { k: maxResults }, + }); + + // Build context from retrieved documents + const context = docs + .map(doc => `Source: ${doc.metadata?.title || 'Unknown'}\n${doc.content}`) + .join('\n\n'); + + // Generate answer using context + const response = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Based on the following context, answer the question: "${question}" + +Context: +${context} + +Answer:`, + config: { + temperature: 0.3, + maxTokens: 500, + }, + }); + + return { + answer: response.text, + sources: docs.map(doc => ({ + content: doc.content, + metadata: doc.metadata || {}, + score: doc.score || 0, + })), + }; + }, + ); + ``` + + + Build a complete RAG system with Pinecone: + + ```go + // RAG implementation + func ragQuery(ctx context.Context, question, indexID string, maxResults int) (string, []ai.Document, error) { + // Retrieve relevant documents + retriever := fmt.Sprintf("pinecone/%s", indexID) + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever(retriever), + ai.WithQuery(question), + ai.WithOptions(map[string]interface{}{ + "k": maxResults, + }), + ) + if err != nil { + return "", nil, fmt.Errorf("retrieval failed: %w", err) + } + + // Build context from retrieved documents + var contextParts []string + for _, doc := range docs { + title := "Unknown" + if t, ok := doc.Metadata["title"].(string); ok { + title = t + } + contextParts = append(contextParts, fmt.Sprintf("Source: %s\n%s", title, doc.Content)) + } + context := strings.Join(contextParts, "\n\n") + + // Generate answer using context + prompt := fmt.Sprintf(`Based on the following context, answer the question: "%s" + +Context: +%s + +Answer:`, question, context) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("googleai/gemini-2.5-flash"), + ai.WithPrompt(prompt), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.3, + "maxTokens": 500, + }), + ) + if err != nil { + return "", nil, fmt.Errorf("generation failed: %w", err) + } + + return resp.Text(), docs, nil + } + ``` + + + Build a complete RAG system with Pinecone: + + ```python + # RAG implementation + async def rag_query( + question: str, + index_id: str = "my-knowledge-base", + max_results: int = 3 + ) -> Dict[str, Any]: + try: + # Retrieve relevant documents + retriever = f"pinecone/{index_id}" + docs = await ai.retrieve( + retriever=retriever, + query=question, + options={"k": max_results} + ) + + # Build context from retrieved documents + context_parts = [] + for doc in docs: + title = doc.get("metadata", {}).get("title", "Unknown") + context_parts.append(f"Source: {title}\n{doc['content']}") + + context = "\n\n".join(context_parts) + + # Generate answer using context + prompt = f'''Based on the following context, answer the question: "{question}" + +Context: +{context} + +Answer:''' + + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt=prompt, + config={ + "temperature": 0.3, + "max_tokens": 500, + } + ) + + return { + "answer": response.text, + "sources": [ + { + "content": doc["content"], + "metadata": doc.get("metadata", {}), + "score": doc.get("score", 0), + } + for doc in docs + ], + } + except Exception as error: + print(f"RAG query failed: {error}") + return {"answer": "I'm sorry, I couldn't find relevant information.", "sources": []} + ``` + + + +## Best Practices + +### Index Management + +1. **Choose appropriate dimensions**: Match your embedding model's output dimensions +2. **Use meaningful metadata**: Include searchable fields like category, date, source +3. **Optimize for your use case**: Consider pod type and replicas based on query volume +4. **Monitor performance**: Track query latency and accuracy metrics + +### Query Optimization + +1. **Use specific queries**: More specific queries yield better results +2. **Leverage metadata filtering**: Combine semantic search with metadata filters +3. **Tune similarity thresholds**: Adjust based on your quality requirements +4. **Implement query expansion**: Enhance queries with synonyms or context + +### Cost Management + +1. **Right-size your index**: Choose appropriate pod types and replica counts +2. **Use namespaces**: Organize data efficiently within indexes +3. **Monitor usage**: Track query volume and storage costs +4. **Implement caching**: Cache frequent queries to reduce API calls + +## Next Steps + +- Learn about [RAG implementation](/unified-docs/rag) to build complete retrieval-augmented generation systems +- Explore [creating flows](/unified-docs/creating-flows) to build structured AI workflows with vector search +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other vector database options for different use cases diff --git a/src/scripts/language-preference.ts b/src/scripts/language-preference.ts new file mode 100644 index 00000000..ca8e786d --- /dev/null +++ b/src/scripts/language-preference.ts @@ -0,0 +1,149 @@ +type Language = 'js' | 'go' | 'python'; + +const LANGUAGES: Record = { + js: 'JavaScript', + go: 'Go', + python: 'Python', +}; + +const LANGUAGE_CODES = Object.keys(LANGUAGES) as Language[]; +const DEFAULT_LANGUAGE: Language = 'js'; + +class LanguagePreferenceEnhancer { + private storageKey = 'genkit-preferred-language'; + + constructor() { + if (document.readyState === 'loading') { + document.addEventListener('DOMContentLoaded', () => this.init()); + } else { + this.init(); + } + } + + init() { + const urlParams = new URLSearchParams(window.location.search); + const langFromUrl = urlParams.get('lang') as Language | null; + const storedLanguage = localStorage.getItem(this.storageKey) as Language | null; + + let language: Language; + + if (langFromUrl && LANGUAGE_CODES.includes(langFromUrl)) { + language = langFromUrl; + localStorage.setItem(this.storageKey, language); + } else if (storedLanguage && LANGUAGE_CODES.includes(storedLanguage)) { + language = storedLanguage; + this.updateUrl(language); + } else { + language = DEFAULT_LANGUAGE; + localStorage.setItem(this.storageKey, language); + this.updateUrl(language); + } + + this.setupTabListeners(); + this.restoreLanguagePreference(language); + this.observeContentChanges(); + } + + setupTabListeners() { + document.addEventListener('click', (event) => { + const tabButton = (event.target as HTMLElement).closest('[role="tab"]'); + if (!tabButton) return; + + const tabText = tabButton.textContent?.trim(); + if (tabText && Object.values(LANGUAGES).includes(tabText)) { + const langCode = (Object.keys(LANGUAGES) as Language[]).find((key) => LANGUAGES[key] === tabText); + if (langCode) { + this.storeLanguagePreference(langCode); + } + } + }); + } + + storeLanguagePreference(language: Language) { + if (!LANGUAGE_CODES.includes(language)) { + console.warn(`Unknown language: ${language}`); + return; + } + localStorage.setItem(this.storageKey, language); + this.updateUrl(language); + window.dispatchEvent(new CustomEvent('language-preference-changed', { detail: { language } })); + } + + updateUrl(language: Language) { + const url = new URL(window.location.toString()); + url.searchParams.set('lang', language); + window.history.replaceState({}, '', url); + } + + restoreLanguagePreference(language: Language) { + if (!LANGUAGE_CODES.includes(language)) { + console.warn(`Unknown language: ${language}, using default`); + language = DEFAULT_LANGUAGE; + } + this.activateLanguageTabs(language); + } + + activateLanguageTabs(language: Language) { + const languageTabGroups = document.querySelectorAll('[role="tablist"]'); + languageTabGroups.forEach((tabList) => { + const tabs = tabList.querySelectorAll('[role="tab"]'); + let hasLanguageTabs = false; + let targetTab: HTMLElement | null = null; + + tabs.forEach((tab) => { + const tabText = tab.textContent?.trim(); + if (tabText && Object.values(LANGUAGES).includes(tabText)) { + hasLanguageTabs = true; + if (LANGUAGES[language] === tabText) { + targetTab = tab; + } + } + }); + + if (hasLanguageTabs && targetTab && (targetTab as HTMLElement).getAttribute('aria-selected') !== 'true') { + (targetTab as HTMLElement).click(); + } + }); + } + + observeContentChanges() { + const observer = new MutationObserver((mutations) => { + for (const mutation of mutations) { + if (mutation.type === 'childList') { + const currentLanguage = (localStorage.getItem(this.storageKey) as Language) || DEFAULT_LANGUAGE; + this.restoreLanguagePreference(currentLanguage); + break; + } + } + }); + + observer.observe(document.body, { + childList: true, + subtree: true, + }); + } + + getCurrentLanguage(): Language { + const lang = localStorage.getItem(this.storageKey) as Language | null; + return lang && LANGUAGE_CODES.includes(lang) ? lang : DEFAULT_LANGUAGE; + } + + getLanguageName(language: Language): string { + return LANGUAGES[language] || LANGUAGES[DEFAULT_LANGUAGE]; + } + + setLanguage(language: Language) { + this.storeLanguagePreference(language); + this.restoreLanguagePreference(language); + } +} + +export const languagePreferenceEnhancer = new LanguagePreferenceEnhancer(); + +declare global { + interface Window { + languagePreferenceEnhancer: LanguagePreferenceEnhancer; + } +} + +window.languagePreferenceEnhancer = languagePreferenceEnhancer; diff --git a/src/sidebar.ts b/src/sidebar.ts index afb57a26..ad78f80c 100644 --- a/src/sidebar.ts +++ b/src/sidebar.ts @@ -13,16 +13,6 @@ const JS_SIDEBAR = [ label: "Summarize YouTube videos", slug: "docs/tutorials/tutorial-summarize-youtube-videos", }, - { - label: "Agentic barista Angular app", - link: "https://developers.google.com/solutions/learn/agentic-barista", - attrs: { - "data-external": true, - target: "_blank", - class: "external-icon", - rel: "noopener", - }, - }, ], }, { @@ -138,7 +128,6 @@ const JS_SIDEBAR = [ { label: "Writing plugins", items: [ - // NOTE: Deployment links were incorrectly placed here before, removed them. { label: "Overview", slug: "docs/plugin-authoring" }, { label: "Writing an Evaluator Plugin", @@ -146,16 +135,6 @@ const JS_SIDEBAR = [ }, ], }, - // { - // label: "Migration Guides", - // items: [ - // // Added 0.9->1.0 link to main Genkit section previously - // { - // label: "Migrate from 0.5 to 0.9", - // slug: "docs/migrating-from-0.5", - // }, - // ], - // }, { label: "Community", items: [{ label: "Connect with us", slug: "docs/feedback" }], @@ -171,7 +150,6 @@ const JS_SIDEBAR = [ target: "_blank", class: "external-icon", rel: "noopener", - // style: "font-weight: 600; font-size: var(--sl-text-base); color: var(--sl-color-white);", }, }, { label: "API stability channels", slug: "docs/api-stability" }, @@ -251,6 +229,7 @@ const GO_SIDEBAR = [ ], }, ]; + const PYTHON_SIDEBAR = [ { label: "Get started", slug: "python/docs/get-started" }, { label: "Deploy with Cloud Run", slug: "python/docs/cloud-run" }, @@ -301,12 +280,100 @@ const PYTHON_SIDEBAR = [ }, ]; +const UNIFIED_SIDEBAR = [ + { label: "Get started", slug: "unified-docs/get-started" }, + { label: "Developer tools", slug: "unified-docs/developer-tools" }, + { label: "MCP Server", slug: "unified-docs/mcp-server" }, + { + label: "Building AI workflows", + items: [ + { label: "Generating content", slug: "unified-docs/generating-content" }, + { label: "Passing information through context", slug: "unified-docs/context" }, + { label: "Creating flows", slug: "unified-docs/creating-flows" }, + { label: "Managing prompts with Dotprompt", slug: "unified-docs/dotprompt" }, + { label: "Creating persistent chat sessions", slug: "unified-docs/chat-sessions" }, + { label: "Tool calling", slug: "unified-docs/tool-calling" }, + { label: "Model Context Protocol (MCP)", slug: "unified-docs/model-context-protocol" }, + { label: "Pause generation using interrupts", slug: "unified-docs/interrupts" }, + { label: "Retrieval-augmented generation (RAG)", slug: "unified-docs/rag" }, + { label: "Building multi-agent systems", slug: "unified-docs/multi-agent-systems" }, + { label: "Error handling", slug: "unified-docs/error-handling" }, + { label: "Evaluation", slug: "unified-docs/evaluation" }, + ], + }, + { + label: "AI Providers", + items: [ + { label: "Google AI", slug: "unified-docs/plugins/google-ai" }, + { label: "Vertex AI", slug: "unified-docs/plugins/vertex-ai" }, + { label: "OpenAI", slug: "unified-docs/plugins/openai" }, + { label: "Anthropic (Claude)", slug: "unified-docs/plugins/anthropic" }, + { label: "xAI (Grok)", slug: "unified-docs/plugins/xai" }, + { label: "DeepSeek", slug: "unified-docs/plugins/deepseek" }, + { label: "Ollama", slug: "unified-docs/plugins/ollama" }, + ], + }, + { + label: "Vector Databases", + items: [ + { label: "Dev Local Vector Store", slug: "unified-docs/vector-databases/dev-local-vectorstore" }, + { label: "Pinecone", slug: "unified-docs/vector-databases/pinecone" }, + { label: "ChromaDB", slug: "unified-docs/vector-databases/chromadb" }, + { label: "pgvector", slug: "unified-docs/vector-databases/pgvector" }, + { label: "LanceDB", slug: "unified-docs/vector-databases/lancedb" }, + { label: "Astra DB", slug: "unified-docs/vector-databases/astra-db" }, + { label: "Neo4j", slug: "unified-docs/vector-databases/neo4j" }, + { label: "Cloud SQL PostgreSQL", slug: "unified-docs/vector-databases/cloud-sql-postgresql" }, + { label: "Cloud Firestore", slug: "unified-docs/vector-databases/cloud-firestore" }, + ], + }, + { + label: "Web Framework Integrations", + items: [ + { label: "Express.js", slug: "unified-docs/frameworks/express" }, + { label: "Next.js", slug: "unified-docs/frameworks/nextjs" }, + ], + }, + { + label: "Deployment", + items: [ + { label: "Overview", slug: "unified-docs/deployment" }, + { label: "Firebase", slug: "unified-docs/deployment/firebase" }, + { label: "Cloud Run", slug: "unified-docs/deployment/cloud-run" }, + { label: "Any Platform", slug: "unified-docs/deployment/any-platform" }, + { label: "Authorization & Security", slug: "unified-docs/deployment/authorization" }, + ], + }, + { + label: "Writing Plugins", + items: [ + { label: "Overview", slug: "unified-docs/plugin-authoring/overview" }, + { label: "Model Plugins", slug: "unified-docs/plugin-authoring/models" }, + ], + }, + { + label: "Observability and Monitoring", + items: [ + { label: "Overview", slug: "unified-docs/observability/overview" }, + { label: "Complete Guide", slug: "unified-docs/observability-monitoring" }, + { label: "Authentication & Setup", slug: "unified-docs/observability/authentication" }, + { label: "Advanced Configuration", slug: "unified-docs/observability/advanced-configuration" }, + { label: "Troubleshooting", slug: "unified-docs/observability/troubleshooting" }, + ], + }, +]; + export const sidebar = [ { label: "Introduction", slug: "" }, + { + label: "Unified Docs (Preview)", + items: UNIFIED_SIDEBAR, + collapsed: false, + }, { label: "Genkit JS", items: JS_SIDEBAR, - collapsed: false, + collapsed: true, }, { label: "Genkit Go",