diff --git a/apps/browser-extension/package.json b/apps/browser-extension/package.json
index 62ed39f8f..6f4e9a6d1 100644
--- a/apps/browser-extension/package.json
+++ b/apps/browser-extension/package.json
@@ -38,7 +38,7 @@
     "superjson": "^2.2.1",
     "tailwind-merge": "^2.2.1",
     "tailwindcss-animate": "^1.0.7",
-    "zod": "^3.24.2"
+    "zod": "^3.25.0"
   },
   "devDependencies": {
     "@crxjs/vite-plugin": "2.2.0",
diff --git a/apps/mcp/package.json b/apps/mcp/package.json
index daa0938a4..26e142802 100644
--- a/apps/mcp/package.json
+++ b/apps/mcp/package.json
@@ -41,6 +41,6 @@
     "@karakeep/sdk": "workspace:*",
     "@modelcontextprotocol/sdk": "^1.9.0",
     "turndown": "^7.2.0",
-    "zod": "^3.24.2"
+    "zod": "^3.25.0"
   }
 }
diff --git a/apps/mobile/package.json b/apps/mobile/package.json
index d1525a48b..79337ee7b 100644
--- a/apps/mobile/package.json
+++ b/apps/mobile/package.json
@@ -63,7 +63,7 @@
     "react-native-webview": "^13.13.5",
     "sonner-native": "^0.22.2",
     "tailwind-merge": "^2.2.1",
-    "zod": "^3.24.2",
+    "zod": "^3.25.0",
     "zustand": "^5.0.5"
   },
   "devDependencies": {
diff --git a/apps/web/components/settings/AISettings.tsx b/apps/web/components/settings/AISettings.tsx
index 78e3ef56a..16c12563c 100644
--- a/apps/web/components/settings/AISettings.tsx
+++ b/apps/web/components/settings/AISettings.tsx
@@ -44,7 +44,7 @@ import { api } from "@/lib/trpc";
 import { useUserSettings } from "@/lib/userSettings";
 import { cn } from "@/lib/utils";
 import { zodResolver } from "@hookform/resolvers/zod";
-import { Info, Plus, Save, Trash2 } from "lucide-react";
+import { Bot, Cpu, Image, Info, Plus, Save, Trash2 } from "lucide-react";
 import { Controller, useForm } from "react-hook-form";
 import { z } from "zod";
 
@@ -82,6 +82,98 @@ function SettingsSection({
   );
 }
 
+const providerDisplayNames: Record<string, string> = {
+  openai: "OpenAI",
+  anthropic: "Anthropic Claude",
+  google: "Google Gemini",
+  ollama: "Ollama (Local)",
+};
+
+function ProviderInfoItem({
+  icon,
+  label,
+  value,
+}: {
+  icon: React.ReactNode;
+  label: string;
+  value: string;
+}) {
+  return (
+    <div className="flex items-center gap-3">
+      <div
+        className="flex h-8 w-8 items-center justify-center rounded-md bg-muted"
+        aria-hidden="true"
+      >
+        {icon}
+      </div>
+      <div className="flex flex-col">
+        <span className="text-xs text-muted-foreground">{label}</span>
+        <span className="text-sm font-medium">{value}</span>
+      </div>
+    </div>
+  );
+}
+
+export function ProviderIndicator() {
+  const { t } = useTranslation();
+  const clientConfig = useClientConfig();
+
+  const { provider, textModel, imageModel, embeddingProvider, embeddingModel } =
+    clientConfig.inference;
+
+  if (!provider) {
+    return (
+      <SettingsSection title={t("settings.ai.provider_status")}>
+        <div className="flex items-start gap-2 rounded-md bg-amber-50 p-4 text-sm text-amber-800 dark:bg-amber-950 dark:text-amber-200">
+          <Info className="size-4 flex-shrink-0" />
+          <p>{t("settings.ai.no_provider_configured")}</p>
+        </div>
+      </SettingsSection>
+    );
+  }
+
+  const providerName = providerDisplayNames[provider] ?? provider;
+  const embeddingProviderName = embeddingProvider
+    ? (providerDisplayNames[embeddingProvider] ?? embeddingProvider)
+    : null;
+
+  return (
+    <SettingsSection
+      title={t("settings.ai.provider_status")}
+      description={t("settings.ai.provider_status_description")}
+    >
+      <div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
+        <ProviderInfoItem
+          icon={<Bot className="size-4 text-muted-foreground" />}
+          label={t("settings.ai.provider")}
+          value={providerName}
+        />
+        {providerName && textModel && (
+          <ProviderInfoItem
+            icon={<Cpu className="size-4 text-muted-foreground" />}
+            label={t("settings.ai.text_model")}
+            value={textModel}
+          />
+        )}
+        {providerName && imageModel && (
+          <ProviderInfoItem
+            icon={<Image className="size-4 text-muted-foreground" />}
+            label={t("settings.ai.image_model")}
+            value={imageModel}
+          />
+        )}
+        {embeddingProviderName && embeddingModel && (
+          <ProviderInfoItem
+            icon={<Cpu className="size-4 text-muted-foreground" />}
+            label={t("settings.ai.embeddings")}
+            value={`${embeddingProviderName} / ${embeddingModel}`}
+          />
+        )}
+      </div>
+    </SettingsSection>
+  );
+}
+
 export function AIPreferences() {
   const { t } = useTranslation();
   const clientConfig = useClientConfig();
@@ -677,6 +769,9 @@ export default function AISettings() {
         {t("settings.ai.ai_settings")}
       </h2>
 
+      {/* Provider Status */}
+      <ProviderIndicator />
+
       {/* AI Preferences */}
       <AIPreferences />
 
diff --git a/apps/web/lib/clientConfig.tsx b/apps/web/lib/clientConfig.tsx
index ab367be08..06d3e214f 100644
--- a/apps/web/lib/clientConfig.tsx
+++ b/apps/web/lib/clientConfig.tsx
@@ -16,6 +16,11 @@ export const ClientConfigCtx = createContext<ClientConfig>({
     inferredTagLang: "english",
     enableAutoTagging: false,
     enableAutoSummarization: false,
+    provider: null,
+    textModel: "",
+    imageModel: "",
+    embeddingProvider: null,
+    embeddingModel: "",
   },
   serverVersion: undefined,
   disableNewReleaseCheck: true,
diff --git a/apps/web/lib/i18n/locales/ar/translation.json b/apps/web/lib/i18n/locales/ar/translation.json
index e2d9eb7e6..8b6aecde6 100644
--- a/apps/web/lib/i18n/locales/ar/translation.json
+++ b/apps/web/lib/i18n/locales/ar/translation.json
@@ -193,7 +193,14 @@
       "tag_style_description": "اختر كيف ينبغي تنسيق علاماتك التي تم إنشاؤها تلقائيًا.",
       "auto_tagging_description": "إنشاء علامات تلقائيًا لعلاماتك المرجعية باستخدام الذكاء الاصطناعي.",
       "camelCase": "camelCase",
-      "auto_summarization": "التلخيص التلقائي"
+      "auto_summarization": "التلخيص التلقائي",
+      "provider_status": "حالة المزود",
+      "provider_status_description": "مزود الذكاء الاصطناعي والنماذج المكونة حاليًا لهذا المثيل.",
+      "no_provider_configured": "لم يتم تكوين مزود للذكاء الاصطناعي. اتصل بالمسؤول لإعداد استدلال الذكاء الاصطناعي.",
+      "provider": "المزود",
+      "text_model": "نموذج النص",
+      "image_model": "نموذج الصورة",
+      "embeddings": "التضمينات"
     },
     "feeds": {
       "rss_subscriptions": "اشتراكات RSS",
diff --git a/apps/web/lib/i18n/locales/cs/translation.json b/apps/web/lib/i18n/locales/cs/translation.json
index f13b21002..edb9d0ed4 100644
--- a/apps/web/lib/i18n/locales/cs/translation.json
+++ b/apps/web/lib/i18n/locales/cs/translation.json
@@ -116,7 +116,14 @@
       "tag_style_description": "Vyber si, jakým způsobem se mají automaticky generované štítky formátovat.",
       "auto_tagging_description": "Automaticky generovat štítky pro tvoje záložky pomocí umělý inteligence.",
       "camelCase": "camelCase",
-      "auto_summarization": "Automatický shrnutí"
+      "auto_summarization": "Automatický shrnutí",
+      "provider_status": "Stav poskytovatele",
+      "provider_status_description": "Poskytovatel AI a modely aktuálně nakonfigurované pro tuto instanci.",
+      "no_provider_configured": "Není nakonfigurován žádný poskytovatel AI. Kontaktujte administrátora pro nastavení AI inference.",
+      "provider": "Poskytovatel",
+      "text_model": "Textový model",
+      "image_model": "Obrazový model",
+      "embeddings": "Vložení"
     },
     "webhooks": {
       "webhooks": "Webhooky",
diff --git a/apps/web/lib/i18n/locales/da/translation.json b/apps/web/lib/i18n/locales/da/translation.json
index be382f86d..a03ce2b82 100644
--- a/apps/web/lib/i18n/locales/da/translation.json
+++ b/apps/web/lib/i18n/locales/da/translation.json
@@ -159,7 +159,14 @@
       "tag_style_description": "Vælg, hvordan dine automatisk genererede tags skal formateres.",
       "auto_tagging_description": "Generér automatisk tags til dine bogmærker ved hjælp af AI.",
       "camelCase": "camelCase",
-      "auto_summarization": "Automatisk opsummering"
+      "auto_summarization": "Automatisk opsummering",
+      "provider_status": "Udbyderstatus",
+      "provider_status_description": "AI-udbyderen og modellerne, der aktuelt er konfigureret for denne instans.",
+      "no_provider_configured": "Ingen AI-udbyder er konfigureret. Kontakt din administrator for at konfigurere AI-inferens.",
+      "provider": "Udbyder",
+      "text_model": "Tekstmodel",
+      "image_model": "Billedmodel",
+      "embeddings": "Indlejringer"
     },
     "broken_links": {
       "crawling_status": "Gennemsøgningsstatus",
diff --git a/apps/web/lib/i18n/locales/de/translation.json b/apps/web/lib/i18n/locales/de/translation.json
index 7192b89e1..c3dd726ee 100644
--- a/apps/web/lib/i18n/locales/de/translation.json
+++ b/apps/web/lib/i18n/locales/de/translation.json
@@ -190,7 +190,14 @@
       "tag_style_description": "Wähle, wie deine automatisch generierten Tags formatiert werden sollen.",
       "auto_tagging_description": "Automatische Tag-Generierung für deine Lesezeichen mithilfe von KI.",
       "camelCase": "camelCase",
-      "auto_summarization": "Automatische Zusammenfassung"
+      "auto_summarization": "Automatische Zusammenfassung",
+      "provider_status": "Anbieterstatus",
+      "provider_status_description": "Der KI-Anbieter und die Modelle, die derzeit für diese Instanz konfiguriert sind.",
+      "no_provider_configured": "Kein KI-Anbieter ist konfiguriert. Kontaktiere deinen Administrator, um KI-Inferenz einzurichten.",
+      "provider": "Anbieter",
+      "text_model": "Textmodell",
+      "image_model": "Bildmodell",
+      "embeddings": "Einbettungen"
     },
     "feeds": {
       "rss_subscriptions": "RSS-Abonnements",
diff --git a/apps/web/lib/i18n/locales/el/translation.json b/apps/web/lib/i18n/locales/el/translation.json
index 6fea6c6ee..48023d42f 100644
--- a/apps/web/lib/i18n/locales/el/translation.json
+++ b/apps/web/lib/i18n/locales/el/translation.json
@@ -193,7 +193,14 @@
       "tag_style_description": "Διάλεξε πώς να μορφοποιηθούν οι αυτόματα δημιουργημένες ετικέτες σου.",
       "auto_tagging_description": "Δημιουργήστε αυτόματα ετικέτες για τους σελιδοδείκτες σας χρησιμοποιώντας AI.",
       "camelCase": "camelCase",
-      "auto_summarization": "Αυτόματη δημιουργία περιλήψεων"
+      "auto_summarization": "Αυτόματη δημιουργία περιλήψεων",
+      "provider_status": "Κατάσταση παρόχου",
+      "provider_status_description": "Ο πάροχος AI και τα μοντέλα που έχουν ρυθμιστεί για αυτήν την εγκατάσταση.",
+      "no_provider_configured": "Δεν έχει ρυθμιστεί πάροχος AI. Επικοινωνήστε με τον διαχειριστή σας για να ρυθμίσετε την AI εξαγωγή συμπερασμάτων.",
+      "provider": "Πάροχος",
+      "text_model": "Μοντέλο κειμένου",
+      "image_model": "Μοντέλο εικόνας",
+      "embeddings": "Ενσωματώσεις"
     },
     "feeds": {
       "rss_subscriptions": "Συνδρομές RSS",
diff --git a/apps/web/lib/i18n/locales/en/translation.json b/apps/web/lib/i18n/locales/en/translation.json
index 1817db81f..a35f21be2 100644
--- a/apps/web/lib/i18n/locales/en/translation.json
+++ b/apps/web/lib/i18n/locales/en/translation.json
@@ -266,7 +266,14 @@
       "camelCase": "camelCase",
       "no_preference": "No preference",
       "inference_language": "Inference Language",
-      "inference_language_description": "Choose language for AI-generated tags and summaries."
+      "inference_language_description": "Choose language for AI-generated tags and summaries.",
+      "provider_status": "Provider Status",
+      "provider_status_description": "The AI provider and models currently configured for this instance.",
+      "no_provider_configured": "No AI provider is configured. Contact your administrator to set up AI inference.",
+      "provider": "Provider",
+      "text_model": "Text Model",
+      "image_model": "Image Model",
+      "embeddings": "Embeddings"
     },
     "feeds": {
       "rss_subscriptions": "RSS Subscriptions",
diff --git a/apps/web/lib/i18n/locales/en_US/translation.json b/apps/web/lib/i18n/locales/en_US/translation.json
index 9e98b09ec..bbca0b0d4 100644
--- a/apps/web/lib/i18n/locales/en_US/translation.json
+++ b/apps/web/lib/i18n/locales/en_US/translation.json
@@ -286,7 +286,14 @@
       "tag_style_description": "Choose how your auto-generated tags should be formatted.",
       "auto_tagging_description": "Automatically generate tags for your bookmarks using AI.",
       "camelCase": "camelCase",
-      "auto_summarization": "Auto-summarization"
+      "auto_summarization": "Auto-summarization",
+      "provider_status": "Provider Status",
+      "provider_status_description": "The AI provider and models currently configured for this instance.",
+      "no_provider_configured": "No AI provider is configured. Contact your administrator to set up AI inference.",
+      "provider": "Provider",
+      "text_model": "Text Model",
+      "image_model": "Image Model",
+      "embeddings": "Embeddings"
     },
     "feeds": {
       "rss_subscriptions": "RSS Subscriptions",
diff --git a/apps/web/lib/i18n/locales/es/translation.json b/apps/web/lib/i18n/locales/es/translation.json
index 6dd2aa78c..afb3dd661 100644
--- a/apps/web/lib/i18n/locales/es/translation.json
+++ b/apps/web/lib/i18n/locales/es/translation.json
@@ -136,7 +136,14 @@
       "tag_style_description": "Elige cómo quieres que se formateen las etiquetas que se generan automáticamente.",
       "auto_tagging_description": "Genera etiquetas automáticamente para tus marcadores usando IA.",
       "camelCase": "camelCase",
-      "auto_summarization": "Resumen automático"
+      "auto_summarization": "Resumen automático",
+      "provider_status": "Estado del proveedor",
+      "provider_status_description": "El proveedor de IA y los modelos configurados actualmente para esta instancia.",
+      "no_provider_configured": "No hay ningún proveedor de IA configurado. Contacta a tu administrador para configurar la inferencia de IA.",
+      "provider": "Proveedor",
+      "text_model": "Modelo de texto",
+      "image_model": "Modelo de imagen",
+      "embeddings": "Incrustaciones"
     },
     "user_settings": "Ajustes de usuario",
     "feeds": {
diff --git a/apps/web/lib/i18n/locales/fa/translation.json b/apps/web/lib/i18n/locales/fa/translation.json
index 6bd977888..b76cf1533 100644
--- a/apps/web/lib/i18n/locales/fa/translation.json
+++ b/apps/web/lib/i18n/locales/fa/translation.json
@@ -244,7 +244,14 @@
       "tag_style_description": "انتخاب کنید که برچسب‌های تولیدشده خودکار شما چگونه قالب‌بندی شوند.",
       "auto_tagging_description": "به‌طور خودکار با استفاده از هوش مصنوعی برای نشانک‌هایت برچسب تولید کن.",
       "camelCase": "camelCase",
-      "auto_summarization": "خلاصه‌سازی خودکار"
+      "auto_summarization": "خلاصه‌سازی خودکار",
+      "provider_status": "وضعیت ارائه‌دهنده",
+      "provider_status_description": "ارائه‌دهنده هوش مصنوعی و مدل‌های پیکربندی شده برای این نمونه.",
+      "no_provider_configured": "هیچ ارائه‌دهنده هوش مصنوعی پیکربندی نشده است. برای راه‌اندازی استنتاج هوش مصنوعی با مدیر خود تماس بگیرید.",
+      "provider": "ارائه‌دهنده",
+      "text_model": "مدل متن",
+      "image_model": "مدل تصویر",
+      "embeddings": "جاسازی‌ها"
     },
     "feeds": {
       "feed_enabled": "خوراک RSS فعال شد",
diff --git a/apps/web/lib/i18n/locales/fi/translation.json b/apps/web/lib/i18n/locales/fi/translation.json
index 06660ccd2..ed2d28289 100644
--- a/apps/web/lib/i18n/locales/fi/translation.json
+++ b/apps/web/lib/i18n/locales/fi/translation.json
@@ -193,7 +193,14 @@
       "tag_style_description": "Valitse, miten automaattisesti luotujen tunnisteiden muoto tulisi olla.",
       "auto_tagging_description": "Luo kirjanmerkeillesi automaattisesti tägejä tekoälyn avulla.",
       "camelCase": "camelCase",
-      "auto_summarization": "Automaattinen tiivistys"
+      "auto_summarization": "Automaattinen tiivistys",
+      "provider_status": "Palveluntarjoajan tila",
+      "provider_status_description": "Tällä hetkellä tälle instanssille määritetty tekoälypalveluntarjoaja ja mallit.",
+      "no_provider_configured": "Tekoälypalveluntarjoajaa ei ole määritetty. Ota yhteyttä ylläpitäjään tekoälypäättelyn käyttöönottamiseksi.",
+      "provider": "Palveluntarjoaja",
+      "text_model": "Tekstimalli",
+      "image_model": "Kuvamalli",
+      "embeddings": "Upotukset"
     },
     "feeds": {
       "rss_subscriptions": "RSS-tilaukset",
diff --git a/apps/web/lib/i18n/locales/fr/translation.json b/apps/web/lib/i18n/locales/fr/translation.json
index 94cb7b03e..bde2ed310 100644
--- a/apps/web/lib/i18n/locales/fr/translation.json
+++ b/apps/web/lib/i18n/locales/fr/translation.json
@@ -190,7 +190,14 @@
       "tag_style_description": "Choisissez le format de vos balises générées automatiquement.",
       "auto_tagging_description": "Générez automatiquement des balises pour vos favoris à l’aide de l’IA.",
       "camelCase": "camelCase",
-      "auto_summarization": "Résumés automatiques"
+      "auto_summarization": "Résumés automatiques",
+      "provider_status": "Statut du fournisseur",
+      "provider_status_description": "Le fournisseur d'IA et les modèles actuellement configurés pour cette instance.",
+      "no_provider_configured": "Aucun fournisseur d'IA n'est configuré. Contactez votre administrateur pour configurer l'inférence IA.",
+      "provider": "Fournisseur",
+      "text_model": "Modèle de texte",
+      "image_model": "Modèle d'image",
+      "embeddings": "Embeddings"
     },
     "feeds": {
       "rss_subscriptions": "Abonnements RSS",
diff --git a/apps/web/lib/i18n/locales/ga/translation.json b/apps/web/lib/i18n/locales/ga/translation.json
index b132ca45c..56e374d82 100644
--- a/apps/web/lib/i18n/locales/ga/translation.json
+++ b/apps/web/lib/i18n/locales/ga/translation.json
@@ -116,7 +116,14 @@
       "tag_style_description": "Roghnaigh conas ar cheart do chlibeanna uathghinte a bheith formáidithe.",
       "auto_tagging_description": "Clibeanna a ghiniúint go huathoibríoch do do leabharmharcanna ag úsáid AI.",
       "camelCase": "camelCase",
-      "auto_summarization": "Uathachoimriú"
+      "auto_summarization": "Uathachoimriú",
+      "provider_status": "Stádas an tSoláthraí",
+      "provider_status_description": "An soláthraí AI agus na samhlacha atá cumraithe faoi láthair don ásc seo.",
+      "no_provider_configured": "Níl aon soláthraí AI cumraithe. Déan teagmháil le do riarthóir chun tátal AI a shocrú.",
+      "provider": "Soláthraí",
+      "text_model": "Samhail Téacs",
+      "image_model": "Samhail Íomhá",
+      "embeddings": "Leabuithe"
     },
     "webhooks": {
       "webhooks": "Crúcaí Gréasáin",
diff --git a/apps/web/lib/i18n/locales/gl/translation.json b/apps/web/lib/i18n/locales/gl/translation.json
index 9fe11f1a0..977a3dc5a 100644
--- a/apps/web/lib/i18n/locales/gl/translation.json
+++ b/apps/web/lib/i18n/locales/gl/translation.json
@@ -256,7 +256,14 @@
       "tag_style_description": "Elixe como se deben formatar as etiquetas xeradas automaticamente.",
       "auto_tagging_description": "Xera automaticamente etiquetas para os teus marcadores usando a intelixencia artificial.",
       "camelCase": "camelCase (a primeira palabra en minúsculas e as seguintes en maiúsculas)",
-      "auto_summarization": "Resumo automático"
+      "auto_summarization": "Resumo automático",
+      "provider_status": "Estado do provedor",
+      "provider_status_description": "O provedor de IA e os modelos configurados actualmente para esta instancia.",
+      "no_provider_configured": "Non hai ningún provedor de IA configurado. Contacta co teu administrador para configurar a inferencia de IA.",
+      "provider": "Provedor",
+      "text_model": "Modelo de texto",
+      "image_model": "Modelo de imaxe",
+      "embeddings": "Incrustacións"
     },
     "feeds": {
       "rss_subscriptions": "Subscricións RSS",
diff --git a/apps/web/lib/i18n/locales/hr/translation.json b/apps/web/lib/i18n/locales/hr/translation.json
index 7ef093d0b..6ff3791e5 100644
--- a/apps/web/lib/i18n/locales/hr/translation.json
+++ b/apps/web/lib/i18n/locales/hr/translation.json
@@ -205,7 +205,14 @@
       "tag_style_description": "Odaberi kako će tvoje automatski generirane oznake biti formatirane.",
       "auto_tagging_description": "Automatski generiraj oznake za svoje knjižne oznake pomoću AI-ja.",
       "camelCase": "camelCase",
-      "auto_summarization": "Automatsko sažimanje"
+      "auto_summarization": "Automatsko sažimanje",
+      "provider_status": "Status pružatelja",
+      "provider_status_description": "AI pružatelj i modeli trenutno konfigurirani za ovu instancu.",
+      "no_provider_configured": "Nije konfiguriran nijedan AI pružatelj. Kontaktirajte svog administratora za postavljanje AI zaključivanja.",
+      "provider": "Pružatelj",
+      "text_model": "Tekstualni model",
+      "image_model": "Model slike",
+      "embeddings": "Ugradnje"
     },
     "import": {
       "import_bookmarks_from_html_file": "Import knjižnih oznaka iz HTML datoteke",
diff --git a/apps/web/lib/i18n/locales/hu/translation.json b/apps/web/lib/i18n/locales/hu/translation.json
index 1399e4a81..1686b132e 100644
--- a/apps/web/lib/i18n/locales/hu/translation.json
+++ b/apps/web/lib/i18n/locales/hu/translation.json
@@ -163,7 +163,14 @@
       "tag_style_description": "Válaszd ki, hogyan legyenek formázva az automatikusan létrehozott címkék.",
       "auto_tagging_description": "A MI használatával automatikusan címkéket generálhatsz a könyvjelzőidhez.",
       "camelCase": "camelCase",
-      "auto_summarization": "Automatikus összefoglalás"
+      "auto_summarization": "Automatikus összefoglalás",
+      "provider_status": "Szolgáltató állapota",
+      "provider_status_description": "A jelenleg ehhez a példányhoz konfigurált MI szolgáltató és modellek.",
+      "no_provider_configured": "Nincs MI szolgáltató konfigurálva. Lépjen kapcsolatba a rendszergazdával az MI következtetés beállításához.",
+      "provider": "Szolgáltató",
+      "text_model": "Szöveg modell",
+      "image_model": "Kép modell",
+      "embeddings": "Beágyazások"
     },
     "api_keys": {
       "new_api_key": "Új API kulcs",
diff --git a/apps/web/lib/i18n/locales/it/translation.json b/apps/web/lib/i18n/locales/it/translation.json
index d7fa773db..743132899 100644
--- a/apps/web/lib/i18n/locales/it/translation.json
+++ b/apps/web/lib/i18n/locales/it/translation.json
@@ -190,7 +190,14 @@
       "tag_style_description": "Scegli come formattare le etichette generate automaticamente.",
       "auto_tagging_description": "Genera automaticamente i tag per i tuoi segnalibri usando l'AI.",
       "camelCase": "camelCase",
-      "auto_summarization": "Riassunto automatico"
+      "auto_summarization": "Riassunto automatico",
+      "provider_status": "Stato del fornitore",
+      "provider_status_description": "Il fornitore AI e i modelli attualmente configurati per questa istanza.",
+      "no_provider_configured": "Nessun fornitore AI è configurato. Contatta il tuo amministratore per configurare l'inferenza AI.",
+      "provider": "Fornitore",
+      "text_model": "Modello di testo",
+      "image_model": "Modello di immagine",
+      "embeddings": "Embedding"
     },
     "feeds": {
       "rss_subscriptions": "Iscrizione RSS",
diff --git a/apps/web/lib/i18n/locales/ja/translation.json b/apps/web/lib/i18n/locales/ja/translation.json
index 58315b3ed..3d81315cf 100644
--- a/apps/web/lib/i18n/locales/ja/translation.json
+++ b/apps/web/lib/i18n/locales/ja/translation.json
@@ -268,7 +268,14 @@
       "tag_style_description": "自動生成されるタグの書式を選んでくれ。",
       "auto_tagging_description": "AIを使ってブックマークのタグを自動生成する。",
       "camelCase": "camelCase",
-      "auto_summarization": "自動要約"
+      "auto_summarization": "自動要約",
+      "provider_status": "プロバイダーのステータス",
+      "provider_status_description": "このインスタンスに現在設定されているAIプロバイダーとモデル。",
+      "no_provider_configured": "AIプロバイダーが設定されていません。AI推論を設定するには管理者に連絡してください。",
+      "provider": "プロバイダー",
+      "text_model": "テキストモデル",
+      "image_model": "画像モデル",
+      "embeddings": "埋め込み"
     },
     "import": {
       "import_export_bookmarks": "ブックマークのインポート/エクスポート",
diff --git a/apps/web/lib/i18n/locales/ko/translation.json b/apps/web/lib/i18n/locales/ko/translation.json
index 52be79177..b9bbfb9df 100644
--- a/apps/web/lib/i18n/locales/ko/translation.json
+++ b/apps/web/lib/i18n/locales/ko/translation.json
@@ -392,7 +392,14 @@
       "tag_style_description": "자동 생성 태그 형식을 선택하세요.",
       "auto_tagging_description": "AI를 사용하여 책갈피에 대한 태그를 자동으로 생성합니다.",
       "camelCase": "camelCase",
-      "auto_summarization": "자동 요약"
+      "auto_summarization": "자동 요약",
+      "provider_status": "제공자 상태",
+      "provider_status_description": "이 인스턴스에 현재 구성된 AI 제공자 및 모델입니다.",
+      "no_provider_configured": "AI 제공자가 구성되지 않았습니다. AI 추론을 설정하려면 관리자에게 문의하세요.",
+      "provider": "제공자",
+      "text_model": "텍스트 모델",
+      "image_model": "이미지 모델",
+      "embeddings": "임베딩"
     },
     "feeds": {
       "add_a_subscription": "구독 추가",
diff --git a/apps/web/lib/i18n/locales/nb_NO/translation.json b/apps/web/lib/i18n/locales/nb_NO/translation.json
index 8f1fde214..3ffafcd9c 100644
--- a/apps/web/lib/i18n/locales/nb_NO/translation.json
+++ b/apps/web/lib/i18n/locales/nb_NO/translation.json
@@ -312,7 +312,14 @@
       "tag_style_description": "Velg hvordan de automatisk genererte merkelappene dine skal formateres.",
       "auto_tagging_description": "Generer automatisk tagger for bokmerkene dine ved hjelp av AI.",
       "camelCase": "camelCase",
-      "auto_summarization": "Automatisk oppsummering"
+      "auto_summarization": "Automatisk oppsummering",
+      "provider_status": "Leverandørstatus",
+      "provider_status_description": "AI-leverandøren og modellene som er konfigurert for denne instansen.",
+      "no_provider_configured": "Ingen AI-leverandør er konfigurert. Kontakt administratoren din for å sette opp AI-inferens.",
+      "provider": "Leverandør",
+      "text_model": "Tekstmodell",
+      "image_model": "Bildemodell",
+      "embeddings": "Innbygginger"
     },
     "import": {
       "import_bookmarks_from_omnivore_export": "Importer bokmerker fra Omnivore-eksport",
diff --git a/apps/web/lib/i18n/locales/nl/translation.json b/apps/web/lib/i18n/locales/nl/translation.json
index c4987872f..7174883db 100644
--- a/apps/web/lib/i18n/locales/nl/translation.json
+++ b/apps/web/lib/i18n/locales/nl/translation.json
@@ -122,7 +122,14 @@
       "tag_style_description": "Kies hoe je automatisch gegenereerde tags moeten worden opgemaakt.",
       "auto_tagging_description": "Genereer automatisch tags voor je bladwijzers met behulp van AI.",
       "camelCase": "camelCase",
-      "auto_summarization": "Automatische samenvatting"
+      "auto_summarization": "Automatische samenvatting",
+      "provider_status": "Providerstatus",
+      "provider_status_description": "De AI-provider en modellen die momenteel zijn geconfigureerd voor deze instantie.",
+      "no_provider_configured": "Er is geen AI-provider geconfigureerd. Neem contact op met uw beheerder om AI-inferentie in te stellen.",
+      "provider": "Provider",
+      "text_model": "Tekstmodel",
+      "image_model": "Afbeeldingsmodel",
+      "embeddings": "Embeddings"
     },
     "import": {
       "import_export": "Importeren / Exporteren",
diff --git a/apps/web/lib/i18n/locales/pl/translation.json b/apps/web/lib/i18n/locales/pl/translation.json
index 8cb621e74..f550c4807 100644
--- a/apps/web/lib/i18n/locales/pl/translation.json
+++ b/apps/web/lib/i18n/locales/pl/translation.json
@@ -198,7 +198,14 @@
       "tag_style_description": "Wybierz, jak powinny być formatowane autogenerowane tagi.",
       "auto_tagging_description": "Automatycznie generuj tagi dla zakładek za pomocą AI.",
       "camelCase": "camelCase",
-      "auto_summarization": "Automatyczne podsumowywanie"
+      "auto_summarization": "Automatyczne podsumowywanie",
+      "provider_status": "Status dostawcy",
+      "provider_status_description": "Dostawca AI i modele aktualnie skonfigurowane dla tej instancji.",
+      "no_provider_configured": "Nie skonfigurowano dostawcy AI. Skontaktuj się z administratorem, aby skonfigurować wnioskowanie AI.",
+      "provider": "Dostawca",
+      "text_model": "Model tekstowy",
+      "image_model": "Model obrazu",
+      "embeddings": "Osadzenia"
     },
     "feeds": {
       "rss_subscriptions": "Subskrypcje RSS",
diff --git a/apps/web/lib/i18n/locales/pt/translation.json b/apps/web/lib/i18n/locales/pt/translation.json
index 7bf1ccae3..a3c65e627 100644
--- a/apps/web/lib/i18n/locales/pt/translation.json
+++ b/apps/web/lib/i18n/locales/pt/translation.json
@@ -214,7 +214,14 @@
       "tag_style_description": "Escolha como as suas etiquetas geradas automaticamente devem ser formatadas.",
       "auto_tagging_description": "Gerar automaticamente tags para seus favoritos usando IA.",
       "camelCase": "camelCase",
-      "auto_summarization": "Resumo automático"
+      "auto_summarization": "Resumo automático",
+      "provider_status": "Estado do fornecedor",
+      "provider_status_description": "O fornecedor de IA e os modelos atualmente configurados para esta instância.",
+      "no_provider_configured": "Nenhum fornecedor de IA está configurado. Contacte o seu administrador para configurar a inferência de IA.",
+      "provider": "Fornecedor",
+      "text_model": "Modelo de texto",
+      "image_model": "Modelo de imagem",
+      "embeddings": "Embeddings"
     },
     "api_keys": {
       "new_api_key": "Nova chave da API",
diff --git a/apps/web/lib/i18n/locales/pt_BR/translation.json b/apps/web/lib/i18n/locales/pt_BR/translation.json
index 2d1a7f8a5..839d8a709 100644
--- a/apps/web/lib/i18n/locales/pt_BR/translation.json
+++ b/apps/web/lib/i18n/locales/pt_BR/translation.json
@@ -184,7 +184,14 @@
       "tag_style_description": "Escolha como suas tags auto-geradas devem ser formatadas.",
       "auto_tagging_description": "Gere automaticamente tags para seus favoritos usando IA.",
       "camelCase": "camelCase",
-      "auto_summarization": "Resumo automático"
+      "auto_summarization": "Resumo automático",
+      "provider_status": "Status do provedor",
+      "provider_status_description": "O provedor de IA e os modelos configurados atualmente para esta instância.",
+      "no_provider_configured": "Nenhum provedor de IA está configurado. Entre em contato com seu administrador para configurar a inferência de IA.",
+      "provider": "Provedor",
+      "text_model": "Modelo de texto",
+      "image_model": "Modelo de imagem",
+      "embeddings": "Embeddings"
     },
     "feeds": {
       "rss_subscriptions": "Assinaturas de RSS",
diff --git a/apps/web/lib/i18n/locales/ru/translation.json b/apps/web/lib/i18n/locales/ru/translation.json
index f3da61699..ba1df5612 100644
--- a/apps/web/lib/i18n/locales/ru/translation.json
+++ b/apps/web/lib/i18n/locales/ru/translation.json
@@ -248,7 +248,14 @@
       "tag_style_description": "Выбери, как форматировать автосгенерированные теги.",
       "auto_tagging_description": "Автоматически генерируйте теги для ваших закладок с помощью ИИ.",
       "camelCase": "camelCase",
-      "auto_summarization": "Автоматическое создание сводок"
+      "auto_summarization": "Автоматическое создание сводок",
+      "provider_status": "Статус провайдера",
+      "provider_status_description": "Провайдер ИИ и модели, настроенные в данный момент для этого экземпляра.",
+      "no_provider_configured": "Провайдер ИИ не настроен. Свяжитесь с администратором для настройки ИИ-инференса.",
+      "provider": "Провайдер",
+      "text_model": "Текстовая модель",
+      "image_model": "Модель изображений",
+      "embeddings": "Эмбеддинги"
     },
     "feeds": {
       "rss_subscriptions": "RSS подписки",
diff --git a/apps/web/lib/i18n/locales/sk/translation.json b/apps/web/lib/i18n/locales/sk/translation.json
index 00196d26b..63ee80840 100644
--- a/apps/web/lib/i18n/locales/sk/translation.json
+++ b/apps/web/lib/i18n/locales/sk/translation.json
@@ -284,7 +284,14 @@
       "tag_style_description": "Vyber si, ako majú byť formátované automaticky generované tagy.",
       "auto_tagging_description": "Automaticky generujte štítky pre vaše záložky pomocou AI.",
       "camelCase": "camelCase",
-      "auto_summarization": "Automatické zhrnutie"
+      "auto_summarization": "Automatické zhrnutie",
+      "provider_status": "Stav poskytovateľa",
+      "provider_status_description": "Poskytovateľ AI a modely aktuálne nakonfigurované pre túto inštanciu.",
+      "no_provider_configured": "Nie je nakonfigurovaný žiadny poskytovateľ AI. Kontaktujte administrátora pre nastavenie AI inferencie.",
+      "provider": "Poskytovateľ",
+      "text_model": "Textový model",
+      "image_model": "Obrazový model",
+      "embeddings": "Vkladania"
     },
     "webhooks": {
       "add_auth_token": "Pridať autorizačný token",
diff --git a/apps/web/lib/i18n/locales/sl/translation.json b/apps/web/lib/i18n/locales/sl/translation.json
index 8b99a153e..6578f76d2 100644
--- a/apps/web/lib/i18n/locales/sl/translation.json
+++ b/apps/web/lib/i18n/locales/sl/translation.json
@@ -117,7 +117,14 @@
       "tag_style_description": "Izberi obliko samodejno ustvarjenih oznak.",
       "auto_tagging_description": "Samodejno ustvari oznake za tvoje zaznamke z uporabo UI.",
       "camelCase": "camelCase",
-      "auto_summarization": "Samodejno povzemanje"
+      "auto_summarization": "Samodejno povzemanje",
+      "provider_status": "Stanje ponudnika",
+      "provider_status_description": "Ponudnik AI in modeli, ki so trenutno konfigurirani za to instanco.",
+      "no_provider_configured": "Noben ponudnik AI ni konfiguriran. Obrnite se na skrbnika za nastavitev AI sklepanja.",
+      "provider": "Ponudnik",
+      "text_model": "Model besedila",
+      "image_model": "Model slike",
+      "embeddings": "Vložitve"
     },
     "back_to_app": "Nazaj v aplikacijo",
     "webhooks": {
diff --git a/apps/web/lib/i18n/locales/sv/translation.json b/apps/web/lib/i18n/locales/sv/translation.json
index b03c3d2e5..fea322230 100644
--- a/apps/web/lib/i18n/locales/sv/translation.json
+++ b/apps/web/lib/i18n/locales/sv/translation.json
@@ -195,7 +195,14 @@
       "tag_style_description": "Välj hur dina automatiskt genererade taggar ska formateras.",
       "auto_tagging_description": "Generera automatiskt taggar för dina bokmärken genom att använda AI.",
       "camelCase": "camelCase",
-      "auto_summarization": "Automatisk sammanfattning"
+      "auto_summarization": "Automatisk sammanfattning",
+      "provider_status": "Leverantörsstatus",
+      "provider_status_description": "AI-leverantören och modellerna som för närvarande är konfigurerade för denna instans.",
+      "no_provider_configured": "Ingen AI-leverantör är konfigurerad. Kontakta din administratör för att konfigurera AI-inferens.",
+      "provider": "Leverantör",
+      "text_model": "Textmodell",
+      "image_model": "Bildmodell",
+      "embeddings": "Inbäddningar"
     },
     "import": {
       "import_export": "Importera / exportera",
diff --git a/apps/web/lib/i18n/locales/tr/translation.json b/apps/web/lib/i18n/locales/tr/translation.json
index 8cd31dc0c..b8d40ca0d 100644
--- a/apps/web/lib/i18n/locales/tr/translation.json
+++ b/apps/web/lib/i18n/locales/tr/translation.json
@@ -193,7 +193,14 @@
       "tag_style_description": "Otomatik oluşturulan etiketlerinin nasıl biçimlendirileceğini seç.",
       "auto_tagging_description": "Yapay zeka kullanarak yer işaretlerin için otomatik olarak etiket oluştur.",
       "camelCase": "camelCase",
-      "auto_summarization": "Otomatik özetleme"
+      "auto_summarization": "Otomatik özetleme",
+      "provider_status": "Sağlayıcı Durumu",
+      "provider_status_description": "Bu örnek için şu anda yapılandırılmış yapay zeka sağlayıcısı ve modeller.",
+      "no_provider_configured": "Hiçbir yapay zeka sağlayıcısı yapılandırılmamış. Yapay zeka çıkarımını ayarlamak için yöneticinizle iletişime geçin.",
+      "provider": "Sağlayıcı",
+      "text_model": "Metin Modeli",
+      "image_model": "Görsel Modeli",
+      "embeddings": "Gömüler"
     },
     "feeds": {
       "rss_subscriptions": "RSS Abonelikleri",
diff --git a/apps/web/lib/i18n/locales/uk/translation.json b/apps/web/lib/i18n/locales/uk/translation.json
index 1329db9c5..2dcc6a44e 100644
--- a/apps/web/lib/i18n/locales/uk/translation.json
+++ b/apps/web/lib/i18n/locales/uk/translation.json
@@ -202,7 +202,14 @@
       "tag_style_description": "Обери, як форматуватимуться твої автоматично створені теги.",
       "auto_tagging_description": "Автоматично генеруйте теги для своїх закладок за допомогою штучного інтелекту.",
       "camelCase": "camelCase",
-      "auto_summarization": "Автоматичне підсумовування"
+      "auto_summarization": "Автоматичне підсумовування",
+      "provider_status": "Статус постачальника",
+      "provider_status_description": "Постачальник ШІ та моделі, які наразі налаштовані для цього екземпляра.",
+      "no_provider_configured": "Постачальник ШІ не налаштований. Зверніться до адміністратора для налаштування ШІ-інференсу.",
+      "provider": "Постачальник",
+      "text_model": "Текстова модель",
+      "image_model": "Модель зображень",
+      "embeddings": "Ембедінги"
     },
     "feeds": {
       "rss_subscriptions": "RSS-підписки",
diff --git a/apps/web/lib/i18n/locales/vi/translation.json b/apps/web/lib/i18n/locales/vi/translation.json
index 069938029..cc3a5628b 100644
--- a/apps/web/lib/i18n/locales/vi/translation.json
+++ b/apps/web/lib/i18n/locales/vi/translation.json
@@ -128,7 +128,14 @@
       "tag_style_description": "Chọn cách định dạng các thẻ tự động tạo của bạn.",
       "auto_tagging_description": "Tự động tạo thẻ cho dấu trang bằng AI.",
       "camelCase": "camelCase",
-      "auto_summarization": "Tự động tóm tắt"
+      "auto_summarization": "Tự động tóm tắt",
+      "provider_status": "Trạng thái nhà cung cấp",
+      "provider_status_description": "Nhà cung cấp AI và các mô hình hiện đang được cấu hình cho phiên bản này.",
+      "no_provider_configured": "Chưa có nhà cung cấp AI nào được cấu hình. Liên hệ với quản trị viên của bạn để thiết lập suy luận AI.",
+      "provider": "Nhà cung cấp",
+      "text_model": "Mô hình văn bản",
+      "image_model": "Mô hình hình ảnh",
+      "embeddings": "Nhúng"
     },
     "info": {
       "basic_details": "Thông tin cơ bản",
diff --git a/apps/web/lib/i18n/locales/zh/translation.json b/apps/web/lib/i18n/locales/zh/translation.json
index 7f16a5f6d..ef9d5d179 100644
--- a/apps/web/lib/i18n/locales/zh/translation.json
+++ b/apps/web/lib/i18n/locales/zh/translation.json
@@ -190,7 +190,14 @@
       "tag_style_description": "选择自动生成的标签应如何格式化。",
       "auto_tagging_description": "使用 AI 自动为你的书签生成标签。",
       "camelCase": "驼峰式命名",
-      "auto_summarization": "自动摘要"
+      "auto_summarization": "自动摘要",
+      "provider_status": "提供商状态",
+      "provider_status_description": "当前为此实例配置的AI提供商和模型。",
+      "no_provider_configured": "未配置AI提供商。请联系您的管理员设置AI推理。",
+      "provider": "提供商",
+      "text_model": "文本模型",
+      "image_model": "图像模型",
+      "embeddings": "嵌入"
     },
     "feeds": {
       "rss_subscriptions": "RSS订阅",
diff --git a/apps/web/lib/i18n/locales/zhtw/translation.json b/apps/web/lib/i18n/locales/zhtw/translation.json
index cafa02d68..48e9fa558 100644
--- a/apps/web/lib/i18n/locales/zhtw/translation.json
+++ b/apps/web/lib/i18n/locales/zhtw/translation.json
@@ -190,7 +190,14 @@
       "tag_style_description": "選擇自動產生的標籤應如何格式化。",
       "auto_tagging_description": "使用 AI 自動為你的書籤產生標籤。",
       "camelCase": "駝峰式大小寫",
-      "auto_summarization": "自動摘要"
+      "auto_summarization": "自動摘要",
+      "provider_status": "提供者狀態",
+      "provider_status_description": "目前為此實例配置的 AI 提供者和模型。",
+      "no_provider_configured": "未配置 AI 提供者。請聯繫您的管理員設定 AI 推論。",
+      "provider": "提供者",
+      "text_model": "文字模型",
+      "image_model": "圖片模型",
+      "embeddings": "嵌入"
     },
     "feeds": {
       "rss_subscriptions": "RSS 訂閱",
diff --git a/apps/web/package.json b/apps/web/package.json
index 5bd265951..dc0897d6b 100644
--- a/apps/web/package.json
+++ b/apps/web/package.json
@@ -101,7 +101,7 @@
     "sonner": "^2.0.7",
     "superjson": "^2.2.1",
     "tailwind-merge": "^2.2.1",
-    "zod": "^3.24.2",
+    "zod": "^3.25.0",
     "zustand": "^5.0.5"
   },
   "devDependencies": {
diff --git a/apps/workers/package.json b/apps/workers/package.json
index fdec2ebf5..eb98a993b 100644
--- a/apps/workers/package.json
+++ b/apps/workers/package.json
@@ -56,7 +56,7 @@
     "tesseract.js": "^7.0.0",
     "tsx": "^4.8.1",
     "typescript": "^5.9",
-    "zod": "^3.24.2"
+    "zod": "^3.25.0"
   },
   "devDependencies": {
     "@karakeep/prettier-config": "workspace:^0.1.0",
diff --git a/docs/docs/03-configuration/01-environment-variables.md b/docs/docs/03-configuration/01-environment-variables.md
index 7b09c38a3..7ab43709a 100644
--- a/docs/docs/03-configuration/01-environment-variables.md
+++ b/docs/docs/03-configuration/01-environment-variables.md
@@ -92,25 +92,31 @@ Either `OPENAI_API_KEY` or `OLLAMA_BASE_URL` need to be set for automatic taggin
 
 | Name                                 | Required | Default                | Description                                                                                                                                                                                                                                                                                                                                                                           |
 | ------------------------------------ | -------- | ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| OPENAI_API_KEY                       | No       | Not set                | The OpenAI key used for automatic tagging. More on that in [here](../integrations/openai).                                                                                                                                                                                                                                                                                            |
-| OPENAI_BASE_URL                      | No       | Not set                | If you just want to use OpenAI you don't need to pass this variable. If, however, you want to use some other openai compatible API (e.g. azure openai service), set this to the url of the API.                                                                                                                                                                                       |
+| INFERENCE_PROVIDER                   | No       | Auto-detected          | The AI provider to use for inference. Options: `openai`, `anthropic`, `google`, `ollama`. If not set, auto-detects based on which API key is configured.                                                                                                                                                                                                                              |
+| OPENAI_API_KEY                       | No       | Not set                | The OpenAI API key for inference and embeddings. See the [OpenAI integration guide](../integrations/openai) for details.                                                                                                                                                                                                                                                                                      |
+| OPENAI_BASE_URL                      | No       | Not set                | Custom base URL for OpenAI-compatible APIs (e.g., Azure OpenAI, OpenRouter). Not needed for standard OpenAI.                                                                                                                                                                                                                                                                          |
 | OPENAI_PROXY_URL                     | No       | Not set                | HTTP proxy server URL for OpenAI API requests (e.g., `http://proxy.example.com:8080`).                                                                                                                                                                                                                                                                                                |
 | OPENAI_SERVICE_TIER                  | No       | Not set                | Set to `auto`, `default`, or `flex`. Flex processing provides lower costs in exchange for slower response times and occasional resource unavailability. See [OpenAI Flex Processing](https://platform.openai.com/docs/guides/flex-processing) and [Chat Service Tier](https://platform.openai.com/docs/api-reference/chat/object#chat-object-service_tier) for more details.          |
-| OLLAMA_BASE_URL                      | No       | Not set                | If you want to use ollama for local inference, set the address of ollama API here.                                                                                                                                                                                                                                                                                                    |
+| OPENAI_USE_RESPONSES_API             | No       | false                  | Enable OpenAI's Responses API for GPT-5+ models. Provides advanced features like reasoning effort control. When false, uses Chat Completions API for all models.                                                                                                                                                                                                                      |
+| OPENAI_REASONING_EFFORT              | No       | low                    | Reasoning effort level for GPT-5 models. Options: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`. Higher values produce more thorough reasoning but cost more tokens. Note: `gpt-5.1` defaults to `none`; `gpt-5-pro` only supports `high`.                                                                                                                                      |
+| ANTHROPIC_API_KEY                    | No       | Not set                | The Anthropic API key for Claude models. Note: Anthropic does not support embeddings, so you'll need OpenAI or Google configured for embedding features.                                                                                                                                                                                                                              |
+| ANTHROPIC_BASE_URL                   | No       | Not set                | Custom base URL for Anthropic-compatible APIs. Not needed for standard Anthropic API.                                                                                                                                                                                                                                                                                                 |
+| GEMINI_API_KEY                       | No       | Not set                | The Google Gemini API key. Get one from [Google AI Studio](https://aistudio.google.com/).                                                                                                                                                                                                                                                                               |
+| GEMINI_BASE_URL                      | No       | Not set                | Custom base URL for Google Gemini API. Not needed for standard Gemini API.                                                                                                                                                                                                                                                                                                            |
+| OLLAMA_BASE_URL                      | No       | Not set                | If you want to use Ollama for local inference, set the address of the Ollama API here.                                                                                                                                                                                                                                                                                                |
 | OLLAMA_KEEP_ALIVE                    | No       | Not set                | Controls how long the model will stay loaded into memory following the request (example value: "5m").                                                                                                                                                                                                                                                                                 |
-| INFERENCE_TEXT_MODEL                 | No       | gpt-4.1-mini           | The model to use for text inference. You'll need to change this to some other model if you're using ollama.                                                                                                                                                                                                                                                                           |
-| INFERENCE_IMAGE_MODEL                | No       | gpt-4o-mini            | The model to use for image inference. You'll need to change this to some other model if you're using ollama and that model needs to support vision APIs (e.g. llava).                                                                                                                                                                                                                 |
+| INFERENCE_TEXT_MODEL                 | No       | gpt-5-mini             | The model to use for text inference. GPT-5 family is default. Change when using non-OpenAI providers (e.g., `claude-sonnet-4-5` for Anthropic, `gemini-2.5-flash` for Google). Legacy GPT-4 models still supported.                                                                                                                                                                   |
+| INFERENCE_IMAGE_MODEL                | No       | gpt-5-mini             | The model to use for image inference. Must support vision APIs. Change when using non-OpenAI providers (e.g., `llava` for Ollama). GPT-5 models support multimodal input.                                                                                                                                                                                                             |
+| EMBEDDING_PROVIDER                   | No       | Same as inference      | The provider to use for embeddings. Options: `openai`, `google`, `ollama`. Defaults to inference provider if it supports embeddings. Required when using Anthropic for inference.                                                                                                                                                                                                     |
 | EMBEDDING_TEXT_MODEL                 | No       | text-embedding-3-small | The model to be used for generating embeddings for the text.                                                                                                                                                                                                                                                                                                                          |
 | INFERENCE_CONTEXT_LENGTH             | No       | 2048                   | The max number of tokens that we'll pass to the inference model. If your content is larger than this size, it'll be truncated to fit. The larger this value, the more of the content will be used in tag inference, but the more expensive the inference will be (money-wise on openAI and resource-wise on ollama). Check the model you're using for its max supported content size. |
 | INFERENCE_MAX_OUTPUT_TOKENS          | No       | 2048                   | The maximum number of tokens that the inference model is allowed to generate in its response. This controls the length of AI-generated content like tags and summaries. Increase this if you need longer responses, but be aware that higher values will increase costs (for OpenAI) and processing time.                                                                             |
-| INFERENCE_USE_MAX_COMPLETION_TOKENS  | No       | false                  | \[OpenAI Only\] Whether to use the newer `max_completion_tokens` parameter instead of the deprecated `max_tokens` parameter. Set to `true` if using GPT-5 or o-series models which require this. Will become the default in a future release.                                                                                                                                         |
 | INFERENCE_LANG                       | No       | english                | The language in which the tags will be generated.                                                                                                                                                                                                                                                                                                                                     |
 | INFERENCE_NUM_WORKERS                | No       | 1                      | Number of concurrent workers for AI inference tasks (tagging and summarization). Increase this if you have multiple AI inference requests and want to process them in parallel.                                                                                                                                                                                                       |
 | INFERENCE_ENABLE_AUTO_TAGGING        | No       | true                   | Whether automatic AI tagging is enabled or disabled.                                                                                                                                                                                                                                                                                                                                  |
 | INFERENCE_ENABLE_AUTO_SUMMARIZATION  | No       | false                  | Whether automatic AI summarization is enabled or disabled.                                                                                                                                                                                                                                                                                                                            |
 | INFERENCE_JOB_TIMEOUT_SEC            | No       | 30                     | How long to wait for the inference job to finish before timing out. If you're running ollama without powerful GPUs, you might want to increase the timeout a bit.                                                                                                                                                                                                                     |
 | INFERENCE_FETCH_TIMEOUT_SEC          | No       | 300                    | \[Ollama Only\] The timeout of the fetch request to the ollama server. If your inference requests take longer than the default 5mins, you might want to increase this timeout.                                                                                                                                                                                                        |
-| INFERENCE_SUPPORTS_STRUCTURED_OUTPUT | No       | Not set                | \[DEPRECATED\] Whether the inference model supports structured output or not. Use INFERENCE_OUTPUT_SCHEMA instead. Setting this to true translates to INFERENCE_OUTPUT_SCHEMA=structured, and to false translates to INFERENCE_OUTPUT_SCHEMA=plain.                                                                                                                                   |
 | INFERENCE_OUTPUT_SCHEMA              | No       | structured             | Possible values are "structured", "json", "plain". Structured is the preferred option, but if your model doesn't support it, you can use "json" if your model supports JSON mode, otherwise "plain" which should be supported by all the models but the model might not output the data in the correct format.                                                                        |
 
 :::info
diff --git a/docs/docs/03-configuration/02-different-ai-providers.md b/docs/docs/03-configuration/02-different-ai-providers.md
index 9a86e04fb..c3f58563c 100644
--- a/docs/docs/03-configuration/02-different-ai-providers.md
+++ b/docs/docs/03-configuration/02-different-ai-providers.md
@@ -1,88 +1,292 @@
 # Configuring different AI Providers
 
-Karakeep uses LLM providers for AI tagging and summarization. We support OpenAI-compatible providers and ollama. This guide will show you how to configure different providers.
+Karakeep uses LLM providers for AI tagging and summarization. We support multiple providers including OpenAI, Anthropic, Google Gemini, and Ollama for local inference.
 
-## OpenAI
+## Provider Selection
+
+You can explicitly select a provider using the `INFERENCE_PROVIDER` environment variable:
 
-If you want to use OpenAI itself, you just need to pass in the OPENAI_API_KEY environment variable.
+```
+INFERENCE_PROVIDER=openai    # Use OpenAI
+INFERENCE_PROVIDER=anthropic # Use Anthropic Claude
+INFERENCE_PROVIDER=google    # Use Google Gemini
+INFERENCE_PROVIDER=ollama    # Use Ollama (local)
+```
+
+If not set, Karakeep will auto-detect based on which API key is configured (checked in the order above).
+
+## OpenAI
 
 ```
+INFERENCE_PROVIDER=openai  # Optional, auto-detected if OPENAI_API_KEY is set
 OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 
-# You can change the default models by uncommenting the following lines, and choosing your model.
-# INFERENCE_TEXT_MODEL=gpt-4.1-mini
-# INFERENCE_IMAGE_MODEL=gpt-4o-mini
+# Defaults use GPT-5 mini (cost-effective with good quality)
+# INFERENCE_TEXT_MODEL=gpt-5-mini
+# INFERENCE_IMAGE_MODEL=gpt-5-mini
 ```
 
-## Ollama
+### Supported Models
 
-Ollama is a local LLM provider that you can use to run your own LLM server. You'll need to pass ollama's address to karakeep and you need to ensure that it's accessible from within the karakeep container (e.g. no localhost addresses).
+Karakeep supports all OpenAI models. By default, it uses the `/v1/chat/completions` endpoint for maximum compatibility.
 
-```
-# MAKE SURE YOU DON'T HAVE OPENAI_API_KEY set, otherwise it takes precedence.
+#### GPT-5 Family
 
-OLLAMA_BASE_URL=http://ollama.mylab.com:11434
+| Model | Input | Output | Best For |
+|-------|-------|--------|----------|
+| `gpt-5.2` | $1.75/1M | $14/1M | Latest (knowledge cutoff: Aug 31, 2025) |
+| `gpt-5-pro` | $15/1M | $120/1M | Highest quality, complex reasoning |
+| `gpt-5` | $1.25/1M | $10/1M | General purpose |
+| `gpt-5-mini` (default) | $0.25/1M | $2/1M | Cost-effective balance |
+| `gpt-5-nano` | $0.05/1M | $0.40/1M | Fastest, cheapest |
 
-# Make sure to pull the models in ollama first. Example models:
-INFERENCE_TEXT_MODEL=gemma3
-INFERENCE_IMAGE_MODEL=llava
+:::tip
+Newer models have more recent training data cutoffs. For summarizing current events, consider `gpt-5.2`.
+:::
+
+#### o-series (Legacy Reasoning Models)
+
+GPT-5 models now include reasoning capabilities via `reasoning_effort`, making o-series largely unnecessary. Consider using `gpt-5` or `gpt-5-mini` with `OPENAI_REASONING_EFFORT=medium` instead.
+
+| Model | Notes |
+|-------|-------|
+| `o1-*`, `o3-*`, `o4-mini` | Still supported but GPT-5 recommended |
+
+#### GPT-4 Family (Legacy)
+
+Still fully supported via Chat Completions:
+- `gpt-4.1`, `gpt-4o`, `gpt-4o-mini`, `gpt-4-turbo`
 
-# If the model you're using doesn't support structured output, you also need:
-# INFERENCE_OUTPUT_SCHEMA=plain
 ```
+# GPT-5 Pro for highest quality
+INFERENCE_TEXT_MODEL=gpt-5-pro
+INFERENCE_IMAGE_MODEL=gpt-5-pro
 
-## Gemini
+# GPT-5 Nano for lowest cost
+INFERENCE_TEXT_MODEL=gpt-5-nano
+INFERENCE_IMAGE_MODEL=gpt-5-nano
+```
+
+### Responses API (Optional)
 
-Gemini has an OpenAI-compatible API. You need to get an api key from Google AI Studio.
+For GPT-5 and o-series models, you can enable the newer Responses API (`/v1/responses`) for advanced features like reasoning effort control:
 
 ```
+OPENAI_USE_RESPONSES_API=true
+OPENAI_REASONING_EFFORT=low  # none, minimal, low, medium, high, xhigh
+```
+
+| Setting | Behavior |
+|---------|----------|
+| `OPENAI_USE_RESPONSES_API=false` (default) | Uses Chat Completions for all models |
+| `OPENAI_USE_RESPONSES_API=true` | Uses Responses API for `gpt-5*`, `o1*`, `o3*`, `o4*`; Chat Completions for others |
+
+**Reasoning effort notes:**
+- `gpt-5.1` defaults to `none` (no reasoning unless specified)
+- `gpt-5-pro` only supports `high`
+- `xhigh` available for `gpt-5.1-codex-max` and later
+
+### OpenAI-Compatible Providers
 
-OPENAI_BASE_URL=https://generativelanguage.googleapis.com/v1beta
+For providers with OpenAI-compatible APIs (Azure, OpenRouter, etc.), use the OpenAI provider with a custom base URL:
+
+```
+INFERENCE_PROVIDER=openai
 OPENAI_API_KEY=YOUR_API_KEY
+OPENAI_BASE_URL=https://your-provider-api-endpoint
+INFERENCE_TEXT_MODEL=your-model-name
+INFERENCE_IMAGE_MODEL=your-model-name
+```
+
+#### Azure OpenAI
 
-# Example models:
-INFERENCE_TEXT_MODEL=gemini-2.0-flash
-INFERENCE_IMAGE_MODEL=gemini-2.0-flash
 ```
+INFERENCE_PROVIDER=openai
+
+# Deployed via Azure AI Foundry:
+OPENAI_BASE_URL=https://{your-azure-ai-foundry-resource-name}.cognitiveservices.azure.com/openai/v1/
 
-## OpenRouter
+# Or deployed via Azure OpenAI Service:
+# OPENAI_BASE_URL=https://{your-azure-openai-resource-name}.openai.azure.com/openai/v1/
 
+OPENAI_API_KEY=YOUR_API_KEY
+INFERENCE_TEXT_MODEL=YOUR_DEPLOYMENT_NAME
+INFERENCE_IMAGE_MODEL=YOUR_DEPLOYMENT_NAME
 ```
+
+:::warning
+The [model name is the deployment name](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/switching-endpoints#keyword-argument-for-model) you specified when deploying the model, which may differ from the base model name.
+:::
+
+#### OpenRouter
+
+```
+INFERENCE_PROVIDER=openai
 OPENAI_BASE_URL=https://openrouter.ai/api/v1
 OPENAI_API_KEY=YOUR_API_KEY
-
-# Example models:
 INFERENCE_TEXT_MODEL=meta-llama/llama-4-scout
 INFERENCE_IMAGE_MODEL=meta-llama/llama-4-scout
 ```
 
-## Perplexity
+## Anthropic
+
+Native support for Anthropic's Claude models via the `/v1/messages` API.
 
 ```
-OPENAI_BASE_URL: https://api.perplexity.ai
-OPENAI_API_KEY: Your Perplexity API Key
-INFERENCE_TEXT_MODEL: sonar-pro
-INFERENCE_IMAGE_MODEL: sonar-pro
+INFERENCE_PROVIDER=anthropic  # Optional, auto-detected if ANTHROPIC_API_KEY is set
+ANTHROPIC_API_KEY=sk-ant-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+
+# Recommended models (Claude Sonnet 4.5 is best balance of speed/quality):
+INFERENCE_TEXT_MODEL=claude-sonnet-4-5-20250929
+INFERENCE_IMAGE_MODEL=claude-sonnet-4-5-20250929
+
+# Alternative: use aliases (auto-updates to latest snapshot)
+# INFERENCE_TEXT_MODEL=claude-sonnet-4-5
+# INFERENCE_IMAGE_MODEL=claude-sonnet-4-5
+
+# Other options:
+# - claude-haiku-4-5-20251001 (fastest, cheapest)
+# - claude-opus-4-5-20251101 (most capable)
 ```
 
-## Azure
+### Claude Model Family
+
+| Model | Input | Output | Best For |
+|-------|-------|--------|----------|
+| `claude-sonnet-4-5` | $3/1M | $15/1M | Recommended - best balance |
+| `claude-haiku-4-5` | $1/1M | $5/1M | Fastest, most cost-effective |
+| `claude-opus-4-5` | $5/1M | $25/1M | Most capable, complex reasoning |
+
+### Structured Outputs
 
-Azure has an OpenAI-compatible API.
+Karakeep uses Anthropic's structured outputs feature (beta) with JSON schema for reliable parsing. When a schema is provided, Claude will return validated JSON matching the expected structure.
 
-You can get your API key from the Overview page of the Azure AI Foundry Portal or via "Keys + Endpoints" on the resource in the Azure Portal.
+:::warning Model Requirements
+Structured outputs require **Claude 4.5 generation** models:
+- `claude-haiku-4-5-*`
+- `claude-sonnet-4-5-*` (recommended)
+- `claude-opus-4-5-*`
+
+Older models (Claude 3.5, Claude 4.0) do **not** support structured outputs. To use an older model, set `INFERENCE_OUTPUT_SCHEMA=plain`.
+:::
 
 :::warning
-The [model name is the deployment name](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/switching-endpoints#keyword-argument-for-model) you specified when deploying the model, which may differ from the base model name.
+Anthropic does not provide an embeddings API. If you're using Anthropic for inference and need embeddings (for future semantic search features), you'll need to also configure OpenAI or Google for embeddings:
+
+```
+INFERENCE_PROVIDER=anthropic
+ANTHROPIC_API_KEY=sk-ant-xxx
+
+# For embeddings, provide an additional API key:
+OPENAI_API_KEY=sk-xxx  # Will be used for embeddings
+# Or: GEMINI_API_KEY=xxx
+```
+
+Karakeep will automatically use the available embedding provider.
 :::
 
+## Google Gemini
+
+Native support for Google's Gemini models with structured output via JSON schema. Get an API key from [Google AI Studio](https://aistudio.google.com/).
+
 ```
-# Deployed via Azure AI Foundry:
-OPENAI_BASE_URL=https://{your-azure-ai-foundry-resource-name}.cognitiveservices.azure.com/openai/v1/
+INFERENCE_PROVIDER=google  # Optional, auto-detected if GEMINI_API_KEY is set
+GEMINI_API_KEY=YOUR_API_KEY
 
-# Deployed via Azure OpenAI Service:
-OPENAI_BASE_URL=https://{your-azure-openai-resource-name}.openai.azure.com/openai/v1/
+# Recommended models:
+INFERENCE_TEXT_MODEL=gemini-2.5-flash
+INFERENCE_IMAGE_MODEL=gemini-2.5-flash
+```
+
+### Gemini 3 Family (Preview)
+
+The latest generation with state-of-the-art reasoning and agentic capabilities.
+
+| Model | Input | Output | Best For |
+|-------|-------|--------|----------|
+| `gemini-3-pro-preview` | $2/1M | $12/1M | Most intelligent, multimodal, agentic |
+| `gemini-3-flash-preview` | $0.50/1M | $3/1M | Fast + intelligent, search/grounding |
 
-OPENAI_API_KEY=YOUR_API_KEY
-INFERENCE_TEXT_MODEL=YOUR_DEPLOYMENT_NAME
-INFERENCE_IMAGE_MODEL=YOUR_DEPLOYMENT_NAME
 ```
+INFERENCE_TEXT_MODEL=gemini-3-flash-preview
+INFERENCE_IMAGE_MODEL=gemini-3-flash-preview
+```
+
+### Gemini 2.5 Family (Stable)
+
+Production-ready models with excellent price-performance.
+
+| Model | Input | Output | Best For |
+|-------|-------|--------|----------|
+| `gemini-2.5-pro` | $1.25/1M | $10/1M | Advanced reasoning, complex STEM |
+| `gemini-2.5-flash` (recommended) | $0.30/1M | $2.50/1M | Best price-performance |
+| `gemini-2.5-flash-lite` | $0.10/1M | $0.40/1M | Fastest, most cost-efficient |
+
+```
+# Balanced default
+INFERENCE_TEXT_MODEL=gemini-2.5-flash
+INFERENCE_IMAGE_MODEL=gemini-2.5-flash
+
+# Budget option
+INFERENCE_TEXT_MODEL=gemini-2.5-flash-lite
+INFERENCE_IMAGE_MODEL=gemini-2.5-flash-lite
+
+# Maximum quality
+INFERENCE_TEXT_MODEL=gemini-2.5-pro
+INFERENCE_IMAGE_MODEL=gemini-2.5-pro
+```
+
+### Structured Outputs
+
+Karakeep uses Gemini's structured output feature with JSON schema for reliable parsing. When outputting JSON, the model will conform to the expected schema. This is enabled automatically - no additional configuration needed.
+
+## Ollama
+
+Ollama is a local LLM provider that you can use to run your own LLM server. You'll need to pass Ollama's address to Karakeep and ensure it's accessible from within the Karakeep container (e.g., no localhost addresses).
+
+```
+INFERENCE_PROVIDER=ollama  # Optional, auto-detected if OLLAMA_BASE_URL is set
+OLLAMA_BASE_URL=http://ollama.mylab.com:11434
+
+# Make sure to pull the models in ollama first. Example models:
+INFERENCE_TEXT_MODEL=gemma3
+INFERENCE_IMAGE_MODEL=llava
+
+# If the model you're using doesn't support structured output, you also need:
+# INFERENCE_OUTPUT_SCHEMA=plain
+```
+
+## Embeddings Configuration
+
+Embeddings are used for semantic search features. By default, Karakeep uses the same provider for embeddings as for inference (except for Anthropic, which doesn't support embeddings).
+
+You can explicitly set the embedding provider:
+
+```
+EMBEDDING_PROVIDER=openai   # Use OpenAI for embeddings
+EMBEDDING_PROVIDER=google   # Use Google for embeddings
+EMBEDDING_PROVIDER=ollama   # Use Ollama for embeddings
+
+# Customize the embedding model
+EMBEDDING_TEXT_MODEL=text-embedding-3-small  # Default for OpenAI
+```
+
+## Model Defaults and Recommendations
+
+The defaults are for OpenAI. **You must set model names when using other providers.**
+
+| Provider | Text Model | Image Model | Notes |
+|----------|------------|-------------|-------|
+| OpenAI (default) | `gpt-5-mini` | `gpt-5-mini` | GPT-5 family; legacy GPT-4 still works |
+| Anthropic | `claude-sonnet-4-5-20250929` | `claude-sonnet-4-5-20250929` | Or use alias `claude-sonnet-4-5` |
+| Google | `gemini-2.5-flash` | `gemini-2.5-flash` | Stable; `gemini-3-*-preview` for latest |
+| Ollama | `gemma3` / `llama3.1` | `llava` | Depends on what you've pulled |
+
+### Embedding Models
+
+| Provider | Model | Notes |
+|----------|-------|-------|
+| OpenAI | `text-embedding-3-small` (default) | Also: `text-embedding-3-large` |
+| Google | `gemini-embedding-001` | Recommended (3072 dims) |
+| Ollama | `nomic-embed-text` | Must pull first |
+| Anthropic | N/A | Use OpenAI or Google for embeddings |
diff --git a/package.json b/package.json
index 6effc7a86..57aab166d 100644
--- a/package.json
+++ b/package.json
@@ -20,7 +20,8 @@
     "lint:fix": "turbo --no-daemon lint:fix --continue",
     "typecheck": "turbo --no-daemon typecheck",
     "preflight": "turbo run --no-daemon typecheck lint format",
-    "preflight:fix": "turbo run --no-daemon typecheck lint:fix format:fix"
+    "preflight:fix": "turbo run --no-daemon typecheck lint:fix format:fix",
+    "inference:live-test": "pnpm --filter @karakeep/shared exec tsx inference/live-test.ts"
   },
   "dependencies": {
     "husky": "^9.0.11"
diff --git a/packages/api/package.json b/packages/api/package.json
index e49204b95..bf90a1552 100644
--- a/packages/api/package.json
+++ b/packages/api/package.json
@@ -27,7 +27,7 @@
     "hono": "^4.10.6",
     "prom-client": "^15.1.3",
     "rss": "^1.2.2",
-    "zod": "^3.24.2"
+    "zod": "^3.25.0"
   },
   "devDependencies": {
     "@karakeep/prettier-config": "workspace:^0.1.0",
diff --git a/packages/benchmarks/package.json b/packages/benchmarks/package.json
index 52862862e..17b0cf367 100644
--- a/packages/benchmarks/package.json
+++ b/packages/benchmarks/package.json
@@ -19,7 +19,7 @@
     "p-limit": "^7.2.0",
     "superjson": "^2.2.1",
     "tinybench": "^6.0.0",
-    "zod": "^3.24.2"
+    "zod": "^3.25.0"
   },
   "devDependencies": {
     "@karakeep/prettier-config": "workspace:^0.1.0",
diff --git a/packages/e2e_tests/package.json b/packages/e2e_tests/package.json
index 45d512bbc..d9b049f98 100644
--- a/packages/e2e_tests/package.json
+++ b/packages/e2e_tests/package.json
@@ -21,7 +21,7 @@
     "@karakeep/trpc": "workspace:^0.1.0",
     "@trpc/client": "^11.4.3",
     "superjson": "^2.2.1",
-    "zod": "^3.24.2"
+    "zod": "^3.25.0"
   },
   "devDependencies": {
     "@karakeep/prettier-config": "workspace:^0.1.0",
diff --git a/packages/open-api/package.json b/packages/open-api/package.json
index f122b13e4..248cec569 100644
--- a/packages/open-api/package.json
+++ b/packages/open-api/package.json
@@ -7,7 +7,7 @@
   "dependencies": {
     "@asteasolutions/zod-to-openapi": "^7.2.0",
     "@karakeep/shared": "workspace:^0.1.0",
-    "zod": "^3.24.2"
+    "zod": "^3.25.0"
   },
   "devDependencies": {
     "@karakeep/prettier-config": "workspace:^0.1.0",
diff --git a/packages/shared/config.ts b/packages/shared/config.ts
index 7238e90c0..eaf52471a 100644
--- a/packages/shared/config.ts
+++ b/packages/shared/config.ts
@@ -56,26 +56,49 @@ const allEnv = z.object({
   OAUTH_PROVIDER_NAME: z.string().default("Custom Provider"),
   TURNSTILE_SITE_KEY: z.string().optional(),
   TURNSTILE_SECRET_KEY: z.string().optional(),
+  // Inference provider selection
+  INFERENCE_PROVIDER: z
+    .enum(["openai", "anthropic", "google", "ollama"])
+    .optional(),
+
+  // OpenAI configuration
   OPENAI_API_KEY: z.string().optional(),
   OPENAI_BASE_URL: z.string().url().optional(),
   OPENAI_PROXY_URL: z.string().url().optional(),
   OPENAI_SERVICE_TIER: z.enum(["auto", "default", "flex"]).optional(),
+
+  // Anthropic configuration
+  ANTHROPIC_API_KEY: z.string().optional(),
+  ANTHROPIC_BASE_URL: z.string().url().optional(),
+
+  // Google Gemini configuration
+  GEMINI_API_KEY: z.string().optional(),
+  GEMINI_BASE_URL: z.string().url().optional(),
+
+  // Ollama configuration
   OLLAMA_BASE_URL: z.string().url().optional(),
   OLLAMA_KEEP_ALIVE: z.string().optional(),
+
+  // Embeddings provider (defaults to inference provider if it supports embeddings)
+  EMBEDDING_PROVIDER: z.enum(["openai", "google", "ollama"]).optional(),
+
   INFERENCE_JOB_TIMEOUT_SEC: z.coerce.number().default(30),
   INFERENCE_FETCH_TIMEOUT_SEC: z.coerce.number().default(300),
-  INFERENCE_TEXT_MODEL: z.string().default("gpt-4.1-mini"),
-  INFERENCE_IMAGE_MODEL: z.string().default("gpt-4o-mini"),
+  INFERENCE_TEXT_MODEL: z.string().default("gpt-5-mini"),
+  INFERENCE_IMAGE_MODEL: z.string().default("gpt-5-mini"),
   EMBEDDING_TEXT_MODEL: z.string().default("text-embedding-3-small"),
   INFERENCE_CONTEXT_LENGTH: z.coerce.number().default(2048),
   INFERENCE_MAX_OUTPUT_TOKENS: z.coerce.number().default(2048),
-  INFERENCE_USE_MAX_COMPLETION_TOKENS: stringBool("false"),
-  INFERENCE_SUPPORTS_STRUCTURED_OUTPUT: optionalStringBool(),
   INFERENCE_OUTPUT_SCHEMA: z
     .enum(["structured", "json", "plain"])
     .default("structured"),
   INFERENCE_ENABLE_AUTO_TAGGING: stringBool("true"),
   INFERENCE_ENABLE_AUTO_SUMMARIZATION: stringBool("false"),
+  // OpenAI-specific options
+  OPENAI_USE_RESPONSES_API: stringBool("false"),
+  OPENAI_REASONING_EFFORT: z
+    .enum(["none", "minimal", "low", "medium", "high", "xhigh"])
+    .default("low"),
   OCR_CACHE_DIR: z.string().optional(),
   OCR_LANGS: z
     .string()
@@ -270,33 +293,79 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
           }
         : undefined,
     },
-    inference: {
-      isConfigured: !!val.OPENAI_API_KEY || !!val.OLLAMA_BASE_URL,
-      numWorkers: val.INFERENCE_NUM_WORKERS,
-      jobTimeoutSec: val.INFERENCE_JOB_TIMEOUT_SEC,
-      fetchTimeoutSec: val.INFERENCE_FETCH_TIMEOUT_SEC,
-      openAIApiKey: val.OPENAI_API_KEY,
-      openAIBaseUrl: val.OPENAI_BASE_URL,
-      openAIProxyUrl: val.OPENAI_PROXY_URL,
-      openAIServiceTier: val.OPENAI_SERVICE_TIER,
-      ollamaBaseUrl: val.OLLAMA_BASE_URL,
-      ollamaKeepAlive: val.OLLAMA_KEEP_ALIVE,
-      textModel: val.INFERENCE_TEXT_MODEL,
-      imageModel: val.INFERENCE_IMAGE_MODEL,
-      inferredTagLang: val.INFERENCE_LANG,
-      contextLength: val.INFERENCE_CONTEXT_LENGTH,
-      maxOutputTokens: val.INFERENCE_MAX_OUTPUT_TOKENS,
-      useMaxCompletionTokens: val.INFERENCE_USE_MAX_COMPLETION_TOKENS,
-      outputSchema:
-        val.INFERENCE_SUPPORTS_STRUCTURED_OUTPUT !== undefined
-          ? val.INFERENCE_SUPPORTS_STRUCTURED_OUTPUT
-            ? ("structured" as const)
-            : ("plain" as const)
-          : val.INFERENCE_OUTPUT_SCHEMA,
-      enableAutoTagging: val.INFERENCE_ENABLE_AUTO_TAGGING,
-      enableAutoSummarization: val.INFERENCE_ENABLE_AUTO_SUMMARIZATION,
-    },
+    inference: (() => {
+      // Determine the provider based on explicit setting or available credentials
+      const determineProvider = ():
+        | "openai"
+        | "anthropic"
+        | "google"
+        | "ollama"
+        | null => {
+        if (val.INFERENCE_PROVIDER) {
+          return val.INFERENCE_PROVIDER;
+        }
+        // Legacy behavior: auto-detect based on available credentials
+        if (val.OPENAI_API_KEY) return "openai";
+        if (val.ANTHROPIC_API_KEY) return "anthropic";
+        if (val.GEMINI_API_KEY) return "google";
+        if (val.OLLAMA_BASE_URL) return "ollama";
+        return null;
+      };
+
+      const provider = determineProvider();
+
+      return {
+        provider,
+        isConfigured: provider !== null,
+        numWorkers: val.INFERENCE_NUM_WORKERS,
+        jobTimeoutSec: val.INFERENCE_JOB_TIMEOUT_SEC,
+        fetchTimeoutSec: val.INFERENCE_FETCH_TIMEOUT_SEC,
+        // Provider-specific configs
+        openAIApiKey: val.OPENAI_API_KEY,
+        openAIBaseUrl: val.OPENAI_BASE_URL,
+        openAIProxyUrl: val.OPENAI_PROXY_URL,
+        openAIServiceTier: val.OPENAI_SERVICE_TIER,
+        anthropicApiKey: val.ANTHROPIC_API_KEY,
+        anthropicBaseUrl: val.ANTHROPIC_BASE_URL,
+        geminiApiKey: val.GEMINI_API_KEY,
+        geminiBaseUrl: val.GEMINI_BASE_URL,
+        ollamaBaseUrl: val.OLLAMA_BASE_URL,
+        ollamaKeepAlive: val.OLLAMA_KEEP_ALIVE,
+        // Model settings
+        textModel: val.INFERENCE_TEXT_MODEL,
+        imageModel: val.INFERENCE_IMAGE_MODEL,
+        inferredTagLang: val.INFERENCE_LANG,
+        contextLength: val.INFERENCE_CONTEXT_LENGTH,
+        maxOutputTokens: val.INFERENCE_MAX_OUTPUT_TOKENS,
+        outputSchema: val.INFERENCE_OUTPUT_SCHEMA,
+        enableAutoTagging: val.INFERENCE_ENABLE_AUTO_TAGGING,
+        enableAutoSummarization: val.INFERENCE_ENABLE_AUTO_SUMMARIZATION,
+        // OpenAI-specific options
+        openaiUseResponsesApi: val.OPENAI_USE_RESPONSES_API,
+        openaiReasoningEffort: val.OPENAI_REASONING_EFFORT,
+      };
+    })(),
     embedding: {
+      provider: (() => {
+        // Determine embedding provider
+        if (val.EMBEDDING_PROVIDER) {
+          return val.EMBEDDING_PROVIDER;
+        }
+        // Auto-detect based on inference provider
+        const inferenceProvider = val.INFERENCE_PROVIDER;
+        if (
+          inferenceProvider === "openai" ||
+          inferenceProvider === "google" ||
+          inferenceProvider === "ollama"
+        ) {
+          return inferenceProvider;
+        }
+        // For anthropic or auto-detected providers
+        if (val.OPENAI_API_KEY) return "openai" as const;
+        if (val.GEMINI_API_KEY) return "google" as const;
+        if (val.OLLAMA_BASE_URL) return "ollama" as const;
+        return null;
+      })(),
       textModel: val.EMBEDDING_TEXT_MODEL,
     },
     crawler: {
@@ -471,6 +540,12 @@ export const clientConfig = {
     inferredTagLang: serverConfig.inference.inferredTagLang,
     enableAutoTagging: serverConfig.inference.enableAutoTagging,
     enableAutoSummarization: serverConfig.inference.enableAutoSummarization,
+    // Provider info for read-only display
+    provider: serverConfig.inference.provider,
+    textModel: serverConfig.inference.textModel,
+    imageModel: serverConfig.inference.imageModel,
+    embeddingProvider: serverConfig.embedding.provider,
+    embeddingModel: serverConfig.embedding.textModel,
   },
   serverVersion: serverConfig.serverVersion,
   disableNewReleaseCheck: serverConfig.disableNewReleaseCheck,
diff --git a/packages/shared/index.ts b/packages/shared/index.ts
index e69de29bb..a21251e82 100644
--- a/packages/shared/index.ts
+++ b/packages/shared/index.ts
@@ -0,0 +1,3 @@
+// Shared package - modules are imported via their specific paths
+// e.g., import { x } from "@karakeep/shared/config"
+export {};
diff --git a/packages/shared/inference.ts b/packages/shared/inference.ts
deleted file mode 100644
index 61a621acc..000000000
--- a/packages/shared/inference.ts
+++ /dev/null
@@ -1,385 +0,0 @@
-import { Ollama } from "ollama";
-import OpenAI from "openai";
-import { zodResponseFormat } from "openai/helpers/zod";
-import * as undici from "undici";
-import { z } from "zod";
-import { zodToJsonSchema } from "zod-to-json-schema";
-
-import serverConfig from "./config";
-import { customFetch } from "./customFetch";
-import logger from "./logger";
-
-export interface InferenceResponse {
-  response: string;
-  totalTokens: number | undefined;
-}
-
-export interface EmbeddingResponse {
-  embeddings: number[][];
-}
-
-export interface InferenceOptions {
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  schema: z.ZodSchema<any> | null;
-  abortSignal?: AbortSignal;
-}
-
-const defaultInferenceOptions: InferenceOptions = {
-  schema: null,
-};
-
-export interface InferenceClient {
-  inferFromText(
-    prompt: string,
-    opts: Partial<InferenceOptions>,
-  ): Promise<InferenceResponse>;
-  inferFromImage(
-    prompt: string,
-    contentType: string,
-    image: string,
-    opts: Partial<InferenceOptions>,
-  ): Promise<InferenceResponse>;
-  generateEmbeddingFromText(inputs: string[]): Promise<EmbeddingResponse>;
-}
-
-const mapInferenceOutputSchema = <
-  T,
-  S extends typeof serverConfig.inference.outputSchema,
->(
-  opts: Record<S, T>,
-  type: S,
-): T => {
-  return opts[type];
-};
-
-export interface OpenAIInferenceConfig {
-  apiKey: string;
-  baseURL?: string;
-  proxyUrl?: string;
-  serviceTier?: typeof serverConfig.inference.openAIServiceTier;
-  textModel: string;
-  imageModel: string;
-  contextLength: number;
-  maxOutputTokens: number;
-  useMaxCompletionTokens: boolean;
-  outputSchema: "structured" | "json" | "plain";
-}
-
-export class InferenceClientFactory {
-  static build(): InferenceClient | null {
-    if (serverConfig.inference.openAIApiKey) {
-      return OpenAIInferenceClient.fromConfig();
-    }
-
-    if (serverConfig.inference.ollamaBaseUrl) {
-      return OllamaInferenceClient.fromConfig();
-    }
-    return null;
-  }
-}
-
-export class OpenAIInferenceClient implements InferenceClient {
-  openAI: OpenAI;
-  private config: OpenAIInferenceConfig;
-
-  constructor(config: OpenAIInferenceConfig) {
-    this.config = config;
-
-    const fetchOptions = config.proxyUrl
-      ? {
-          dispatcher: new undici.ProxyAgent(config.proxyUrl),
-        }
-      : undefined;
-
-    this.openAI = new OpenAI({
-      apiKey: config.apiKey,
-      baseURL: config.baseURL,
-      ...(fetchOptions ? { fetchOptions } : {}),
-      defaultHeaders: {
-        "X-Title": "Karakeep",
-        "HTTP-Referer": "https://karakeep.app",
-      },
-    });
-  }
-
-  static fromConfig(): OpenAIInferenceClient {
-    return new OpenAIInferenceClient({
-      apiKey: serverConfig.inference.openAIApiKey!,
-      baseURL: serverConfig.inference.openAIBaseUrl,
-      proxyUrl: serverConfig.inference.openAIProxyUrl,
-      serviceTier: serverConfig.inference.openAIServiceTier,
-      textModel: serverConfig.inference.textModel,
-      imageModel: serverConfig.inference.imageModel,
-      contextLength: serverConfig.inference.contextLength,
-      maxOutputTokens: serverConfig.inference.maxOutputTokens,
-      useMaxCompletionTokens: serverConfig.inference.useMaxCompletionTokens,
-      outputSchema: serverConfig.inference.outputSchema,
-    });
-  }
-
-  async inferFromText(
-    prompt: string,
-    _opts: Partial<InferenceOptions>,
-  ): Promise<InferenceResponse> {
-    const optsWithDefaults: InferenceOptions = {
-      ...defaultInferenceOptions,
-      ..._opts,
-    };
-    const chatCompletion = await this.openAI.chat.completions.create(
-      {
-        messages: [{ role: "user", content: prompt }],
-        model: this.config.textModel,
-        ...(this.config.serviceTier
-          ? { service_tier: this.config.serviceTier }
-          : {}),
-        ...(this.config.useMaxCompletionTokens
-          ? { max_completion_tokens: this.config.maxOutputTokens }
-          : { max_tokens: this.config.maxOutputTokens }),
-        response_format: mapInferenceOutputSchema(
-          {
-            structured: optsWithDefaults.schema
-              ? zodResponseFormat(optsWithDefaults.schema, "schema")
-              : undefined,
-            json: { type: "json_object" },
-            plain: undefined,
-          },
-          this.config.outputSchema,
-        ),
-      },
-      {
-        signal: optsWithDefaults.abortSignal,
-      },
-    );
-
-    const response = chatCompletion.choices[0].message.content;
-    if (!response) {
-      throw new Error(`Got no message content from OpenAI`);
-    }
-    return { response, totalTokens: chatCompletion.usage?.total_tokens };
-  }
-
-  async inferFromImage(
-    prompt: string,
-    contentType: string,
-    image: string,
-    _opts: Partial<InferenceOptions>,
-  ): Promise<InferenceResponse> {
-    const optsWithDefaults: InferenceOptions = {
-      ...defaultInferenceOptions,
-      ..._opts,
-    };
-    const chatCompletion = await this.openAI.chat.completions.create(
-      {
-        model: this.config.imageModel,
-        ...(this.config.serviceTier
-          ? { service_tier: this.config.serviceTier }
-          : {}),
-        ...(this.config.useMaxCompletionTokens
-          ? { max_completion_tokens: this.config.maxOutputTokens }
-          : { max_tokens: this.config.maxOutputTokens }),
-        response_format: mapInferenceOutputSchema(
-          {
-            structured: optsWithDefaults.schema
-              ? zodResponseFormat(optsWithDefaults.schema, "schema")
-              : undefined,
-            json: { type: "json_object" },
-            plain: undefined,
-          },
-          this.config.outputSchema,
-        ),
-        messages: [
-          {
-            role: "user",
-            content: [
-              { type: "text", text: prompt },
-              {
-                type: "image_url",
-                image_url: {
-                  url: `data:${contentType};base64,${image}`,
-                  detail: "low",
-                },
-              },
-            ],
-          },
-        ],
-      },
-      {
-        signal: optsWithDefaults.abortSignal,
-      },
-    );
-
-    const response = chatCompletion.choices[0].message.content;
-    if (!response) {
-      throw new Error(`Got no message content from OpenAI`);
-    }
-    return { response, totalTokens: chatCompletion.usage?.total_tokens };
-  }
-
-  async generateEmbeddingFromText(
-    inputs: string[],
-  ): Promise<EmbeddingResponse> {
-    const model = serverConfig.embedding.textModel;
-    const embedResponse = await this.openAI.embeddings.create({
-      model: model,
-      input: inputs,
-    });
-    const embedding2D: number[][] = embedResponse.data.map(
-      (embedding: OpenAI.Embedding) => embedding.embedding,
-    );
-    return { embeddings: embedding2D };
-  }
-}
-
-export interface OllamaInferenceConfig {
-  baseUrl: string;
-  textModel: string;
-  imageModel: string;
-  contextLength: number;
-  maxOutputTokens: number;
-  keepAlive?: string;
-  outputSchema: "structured" | "json" | "plain";
-}
-
-class OllamaInferenceClient implements InferenceClient {
-  ollama: Ollama;
-  private config: OllamaInferenceConfig;
-
-  constructor(config: OllamaInferenceConfig) {
-    this.config = config;
-    this.ollama = new Ollama({
-      host: config.baseUrl,
-      fetch: customFetch, // Use the custom fetch with configurable timeout
-    });
-  }
-
-  static fromConfig(): OllamaInferenceClient {
-    return new OllamaInferenceClient({
-      baseUrl: serverConfig.inference.ollamaBaseUrl!,
-      textModel: serverConfig.inference.textModel,
-      imageModel: serverConfig.inference.imageModel,
-      contextLength: serverConfig.inference.contextLength,
-      maxOutputTokens: serverConfig.inference.maxOutputTokens,
-      keepAlive: serverConfig.inference.ollamaKeepAlive,
-      outputSchema: serverConfig.inference.outputSchema,
-    });
-  }
-
-  async runModel(
-    model: string,
-    prompt: string,
-    _opts: InferenceOptions,
-    image?: string,
-  ) {
-    const optsWithDefaults: InferenceOptions = {
-      ...defaultInferenceOptions,
-      ..._opts,
-    };
-
-    let newAbortSignal = undefined;
-    if (optsWithDefaults.abortSignal) {
-      newAbortSignal = AbortSignal.any([optsWithDefaults.abortSignal]);
-      newAbortSignal.onabort = () => {
-        this.ollama.abort();
-      };
-    }
-    const chatCompletion = await this.ollama.generate({
-      model: model,
-      format: mapInferenceOutputSchema(
-        {
-          structured: optsWithDefaults.schema
-            ? zodToJsonSchema(optsWithDefaults.schema)
-            : undefined,
-          json: "json",
-          plain: undefined,
-        },
-        this.config.outputSchema,
-      ),
-      stream: true,
-      keep_alive: this.config.keepAlive,
-      options: {
-        num_ctx: this.config.contextLength,
-        num_predict: this.config.maxOutputTokens,
-      },
-      prompt: prompt,
-      images: image ? [image] : undefined,
-    });
-
-    let totalTokens = 0;
-    let response = "";
-    try {
-      for await (const part of chatCompletion) {
-        response += part.response;
-        if (!isNaN(part.eval_count)) {
-          totalTokens += part.eval_count;
-        }
-        if (!isNaN(part.prompt_eval_count)) {
-          totalTokens += part.prompt_eval_count;
-        }
-      }
-    } catch (e) {
-      if (e instanceof Error && e.name === "AbortError") {
-        throw e;
-      }
-      // There seem to be some bug in ollama where you can get some successful response, but still throw an error.
-      // Using stream + accumulating the response so far is a workaround.
-      // https://github.com/ollama/ollama-js/issues/72
-      totalTokens = NaN;
-      logger.warn(
-        `Got an exception from ollama, will still attempt to deserialize the response we got so far: ${e}`,
-      );
-    } finally {
-      if (newAbortSignal) {
-        newAbortSignal.onabort = null;
-      }
-    }
-
-    return { response, totalTokens };
-  }
-
-  async inferFromText(
-    prompt: string,
-    _opts: Partial<InferenceOptions>,
-  ): Promise<InferenceResponse> {
-    const optsWithDefaults: InferenceOptions = {
-      ...defaultInferenceOptions,
-      ..._opts,
-    };
-    return await this.runModel(
-      this.config.textModel,
-      prompt,
-      optsWithDefaults,
-      undefined,
-    );
-  }
-
-  async inferFromImage(
-    prompt: string,
-    _contentType: string,
-    image: string,
-    _opts: Partial<InferenceOptions>,
-  ): Promise<InferenceResponse> {
-    const optsWithDefaults: InferenceOptions = {
-      ...defaultInferenceOptions,
-      ..._opts,
-    };
-    return await this.runModel(
-      this.config.imageModel,
-      prompt,
-      optsWithDefaults,
-      image,
-    );
-  }
-
-  async generateEmbeddingFromText(
-    inputs: string[],
-  ): Promise<EmbeddingResponse> {
-    const embedding = await this.ollama.embed({
-      model: serverConfig.embedding.textModel,
-      input: inputs,
-      // Truncate the input to fit into the model's max token limit,
-      // in the future we want to add a way to split the input into multiple parts.
-      truncate: true,
-    });
-    return { embeddings: embedding.embeddings };
-  }
-}
diff --git a/packages/shared/inference/anthropic.test.ts b/packages/shared/inference/anthropic.test.ts
new file mode 100644
index 000000000..d6bc42f25
--- /dev/null
+++ b/packages/shared/inference/anthropic.test.ts
@@ -0,0 +1,343 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { z } from "zod";
+
+import {
+  AnthropicInferenceClient,
+  supportsStructuredOutputs,
+} from "./anthropic";
+
+// Mock the Anthropic SDK
+const mockBetaMessagesCreate = vi.fn();
+
+vi.mock("@anthropic-ai/sdk", () => ({
+  default: vi.fn().mockImplementation(() => ({
+    beta: {
+      messages: {
+        create: mockBetaMessagesCreate,
+      },
+    },
+  })),
+}));
+
+// Mock zod-to-json-schema (used for converting Zod schemas to JSON schema)
+vi.mock("zod-to-json-schema", () => ({
+  zodToJsonSchema: vi.fn((_schema) => ({
+    type: "object",
+    properties: { tags: { type: "array" } },
+  })),
+}));
+
+// Mock serverConfig
+vi.mock("../config", () => ({
+  default: {
+    inference: {
+      provider: "anthropic",
+      anthropicApiKey: "test-anthropic-key",
+      textModel: "claude-sonnet-4-5-20250929",
+      imageModel: "claude-sonnet-4-5-20250929",
+      maxOutputTokens: 2048,
+      outputSchema: "structured",
+    },
+  },
+}));
+
+describe("AnthropicInferenceClient", () => {
+  let client: AnthropicInferenceClient;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    client = new AnthropicInferenceClient();
+  });
+
+  describe("inferFromText", () => {
+    it("should call beta messages API with correct parameters", async () => {
+      mockBetaMessagesCreate.mockResolvedValueOnce({
+        content: [{ type: "text", text: "test response" }],
+        usage: { input_tokens: 10, output_tokens: 20 },
+      });
+
+      const result = await client.inferFromText("test prompt", {});
+
+      expect(mockBetaMessagesCreate).toHaveBeenCalledTimes(1);
+      expect(result.response).toBe("test response");
+      expect(result.totalTokens).toBe(30);
+    });
+
+    it("should include structured outputs beta flag when schema is provided", async () => {
+      mockBetaMessagesCreate.mockResolvedValueOnce({
+        content: [{ type: "text", text: '{"name": "test"}' }],
+        usage: { input_tokens: 5, output_tokens: 10 },
+      });
+
+      const schema = z.object({ name: z.string() });
+      await client.inferFromText("prompt", { schema });
+
+      expect(mockBetaMessagesCreate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          betas: ["structured-outputs-2025-11-13"],
+        }),
+        expect.any(Object),
+      );
+    });
+
+    it("should not include betas header when no schema provided", async () => {
+      mockBetaMessagesCreate.mockResolvedValueOnce({
+        content: [{ type: "text", text: "response" }],
+        usage: { input_tokens: 5, output_tokens: 10 },
+      });
+
+      await client.inferFromText("prompt", {});
+
+      const callArgs = mockBetaMessagesCreate.mock.calls[0][0];
+      expect(callArgs.betas).toBeUndefined();
+    });
+
+    it("should pass prompt as user message", async () => {
+      mockBetaMessagesCreate.mockResolvedValueOnce({
+        content: [{ type: "text", text: "response" }],
+        usage: { input_tokens: 5, output_tokens: 10 },
+      });
+
+      await client.inferFromText("my test prompt", {});
+
+      expect(mockBetaMessagesCreate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          messages: [{ role: "user", content: "my test prompt" }],
+        }),
+        expect.any(Object),
+      );
+    });
+
+    it("should include model and max_tokens", async () => {
+      mockBetaMessagesCreate.mockResolvedValueOnce({
+        content: [{ type: "text", text: "response" }],
+        usage: { input_tokens: 5, output_tokens: 10 },
+      });
+
+      await client.inferFromText("prompt", {});
+
+      expect(mockBetaMessagesCreate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          model: "claude-sonnet-4-5-20250929",
+          max_tokens: 2048,
+        }),
+        expect.any(Object),
+      );
+    });
+
+    it("should include output_format when schema is provided", async () => {
+      mockBetaMessagesCreate.mockResolvedValueOnce({
+        content: [{ type: "text", text: '{"name": "test"}' }],
+        usage: { input_tokens: 5, output_tokens: 10 },
+      });
+
+      const schema = z.object({ name: z.string() });
+      await client.inferFromText("prompt", { schema });
+
+      expect(mockBetaMessagesCreate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          output_format: expect.objectContaining({
+            type: "json_schema",
+          }),
+        }),
+        expect.any(Object),
+      );
+    });
+
+    it("should throw error when no text content returned", async () => {
+      mockBetaMessagesCreate.mockResolvedValueOnce({
+        content: [{ type: "tool_use", id: "123", name: "tool", input: {} }],
+        usage: { input_tokens: 5, output_tokens: 10 },
+      });
+
+      await expect(client.inferFromText("prompt", {})).rejects.toThrow(
+        "Got no text content from Anthropic",
+      );
+    });
+
+    it("should pass abort signal to API call", async () => {
+      mockBetaMessagesCreate.mockResolvedValueOnce({
+        content: [{ type: "text", text: "response" }],
+        usage: { input_tokens: 5, output_tokens: 10 },
+      });
+
+      const controller = new AbortController();
+      await client.inferFromText("prompt", {
+        abortSignal: controller.signal,
+      });
+
+      expect(mockBetaMessagesCreate).toHaveBeenCalledWith(
+        expect.any(Object),
+        expect.objectContaining({
+          signal: controller.signal,
+        }),
+      );
+    });
+  });
+
+  describe("inferFromImage", () => {
+    it("should include image in message content with base64 encoding", async () => {
+      mockBetaMessagesCreate.mockResolvedValueOnce({
+        content: [{ type: "text", text: "image description" }],
+        usage: { input_tokens: 100, output_tokens: 50 },
+      });
+
+      await client.inferFromImage(
+        "describe this image",
+        "image/png",
+        "base64encodedimage",
+        {},
+      );
+
+      expect(mockBetaMessagesCreate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          messages: [
+            {
+              role: "user",
+              content: [
+                {
+                  type: "image",
+                  source: {
+                    type: "base64",
+                    media_type: "image/png",
+                    data: "base64encodedimage",
+                  },
+                },
+                {
+                  type: "text",
+                  text: "describe this image",
+                },
+              ],
+            },
+          ],
+        }),
+        expect.any(Object),
+      );
+    });
+
+    it("should return response and token count", async () => {
+      mockBetaMessagesCreate.mockResolvedValueOnce({
+        content: [{ type: "text", text: "A beautiful sunset" }],
+        usage: { input_tokens: 150, output_tokens: 30 },
+      });
+
+      const result = await client.inferFromImage(
+        "describe",
+        "image/jpeg",
+        "imagedata",
+        {},
+      );
+
+      expect(result.response).toBe("A beautiful sunset");
+      expect(result.totalTokens).toBe(180);
+    });
+
+    it("should support different image types", async () => {
+      mockBetaMessagesCreate.mockResolvedValueOnce({
+        content: [{ type: "text", text: "response" }],
+        usage: { input_tokens: 100, output_tokens: 20 },
+      });
+
+      await client.inferFromImage("describe", "image/webp", "webpdata", {});
+
+      expect(mockBetaMessagesCreate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          messages: [
+            {
+              role: "user",
+              content: expect.arrayContaining([
+                expect.objectContaining({
+                  source: expect.objectContaining({
+                    media_type: "image/webp",
+                  }),
+                }),
+              ]),
+            },
+          ],
+        }),
+        expect.any(Object),
+      );
+    });
+  });
+});
+
+describe("AnthropicInferenceClient with plain output", () => {
+  let client: AnthropicInferenceClient;
+
+  beforeEach(async () => {
+    vi.clearAllMocks();
+
+    // Set output schema to plain text
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.outputSchema = "plain";
+
+    client = new AnthropicInferenceClient();
+  });
+
+  afterEach(async () => {
+    // Restore original config value to prevent test pollution
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.outputSchema = "structured";
+  });
+
+  it("should not include output_format when outputSchema is plain", async () => {
+    mockBetaMessagesCreate.mockResolvedValueOnce({
+      content: [{ type: "text", text: "plain response" }],
+      usage: { input_tokens: 5, output_tokens: 10 },
+    });
+
+    const schema = z.object({ name: z.string() });
+    await client.inferFromText("prompt", { schema });
+
+    const callArgs = mockBetaMessagesCreate.mock.calls[0][0];
+    expect(callArgs.output_format).toBeUndefined();
+  });
+
+  it("should not include betas header when outputSchema is plain", async () => {
+    mockBetaMessagesCreate.mockResolvedValueOnce({
+      content: [{ type: "text", text: "plain response" }],
+      usage: { input_tokens: 5, output_tokens: 10 },
+    });
+
+    await client.inferFromText("prompt", {});
+
+    const callArgs = mockBetaMessagesCreate.mock.calls[0][0];
+    expect(callArgs.betas).toBeUndefined();
+  });
+});
+
+describe("supportsStructuredOutputs", () => {
+  it("should return true for Claude Sonnet 4.5 models", () => {
+    expect(supportsStructuredOutputs("claude-sonnet-4-5-20250929")).toBe(true);
+    expect(supportsStructuredOutputs("claude-sonnet-4-5")).toBe(true);
+  });
+
+  it("should return true for Claude Haiku 4.5 models", () => {
+    expect(supportsStructuredOutputs("claude-haiku-4-5-20251001")).toBe(true);
+    expect(supportsStructuredOutputs("claude-haiku-4-5")).toBe(true);
+  });
+
+  it("should return true for Claude Opus 4.5 models", () => {
+    expect(supportsStructuredOutputs("claude-opus-4-5-20251101")).toBe(true);
+    expect(supportsStructuredOutputs("claude-opus-4-5")).toBe(true);
+  });
+
+  it("should return true for Claude Opus 4.1 models", () => {
+    expect(supportsStructuredOutputs("claude-opus-4-1-20250415")).toBe(true);
+    expect(supportsStructuredOutputs("claude-opus-4-1")).toBe(true);
+  });
+
+  it("should return false for older Claude models", () => {
+    expect(supportsStructuredOutputs("claude-sonnet-4-20250514")).toBe(false);
+    expect(supportsStructuredOutputs("claude-3-5-sonnet-20241022")).toBe(false);
+    expect(supportsStructuredOutputs("claude-3-opus-20240229")).toBe(false);
+    expect(supportsStructuredOutputs("claude-3-haiku-20240307")).toBe(false);
+    expect(supportsStructuredOutputs("claude-2.1")).toBe(false);
+  });
+
+  it("should handle edge cases", () => {
+    expect(supportsStructuredOutputs("")).toBe(false);
+    expect(supportsStructuredOutputs("gpt-4")).toBe(false);
+    expect(supportsStructuredOutputs("claude")).toBe(false);
+  });
+});
diff --git a/packages/shared/inference/anthropic.ts b/packages/shared/inference/anthropic.ts
new file mode 100644
index 000000000..dc40cd522
--- /dev/null
+++ b/packages/shared/inference/anthropic.ts
@@ -0,0 +1,244 @@
+import type {
+  BetaMessage,
+  MessageCreateParamsNonStreaming,
+} from "@anthropic-ai/sdk/resources/beta/messages/messages";
+import Anthropic from "@anthropic-ai/sdk";
+import { zodToJsonSchema } from "zod-to-json-schema";
+
+import type {
+  InferenceClient,
+  InferenceOptions,
+  InferenceResponse,
+} from "./types";
+import serverConfig from "../config";
+import { defaultInferenceOptions } from "./types";
+
+/**
+ * Beta feature identifier for structured outputs.
+ * Update this when Anthropic releases new beta versions.
+ */
+const STRUCTURED_OUTPUTS_BETA = "structured-outputs-2025-11-13";
+
+/**
+ * Convert a Zod schema to Anthropic's output_format structure.
+ * Uses zod-to-json-schema since betaZodOutputFormat requires Zod 4.
+ */
+function zodToAnthropicFormat(schema: InferenceOptions["schema"]) {
+  if (!schema) return undefined;
+
+  const rawSchema = zodToJsonSchema(schema, { $refStrategy: "none" });
+  // Remove $schema field - Anthropic doesn't accept it
+  const { $schema, ...jsonSchema } = rawSchema as Record<string, unknown>;
+  void $schema;
+
+  return {
+    type: "json_schema" as const,
+    schema: jsonSchema,
+  };
+}
+
+/**
+ * Claude models that support structured outputs (beta).
+ * Per official docs: Sonnet 4.5, Opus 4.1, Opus 4.5, and Haiku 4.5.
+ * @see https://platform.claude.com/docs/en/build-with-claude/structured-outputs
+ */
+const STRUCTURED_OUTPUT_MODELS = [
+  // Claude Sonnet 4.5
+  "claude-sonnet-4-5-20250929",
+  "claude-sonnet-4-5",
+  // Claude Opus 4.1
+  "claude-opus-4-1-20250415",
+  "claude-opus-4-1",
+  // Claude Opus 4.5
+  "claude-opus-4-5-20251101",
+  "claude-opus-4-5",
+  // Claude Haiku 4.5
+  "claude-haiku-4-5-20251001",
+  "claude-haiku-4-5",
+];
+
+/**
+ * Check if a Claude model supports structured outputs.
+ * Exported for testing.
+ */
+export function supportsStructuredOutputs(model: string): boolean {
+  return STRUCTURED_OUTPUT_MODELS.some(
+    (m) => model === m || model.startsWith(m),
+  );
+}
+
+/**
+ * Validate that the model supports required features.
+ * Throws if the model doesn't support structured outputs when needed.
+ */
+function validateModel(model: string, needsStructuredOutput: boolean): void {
+  if (needsStructuredOutput && !supportsStructuredOutputs(model)) {
+    throw new Error(
+      `Model "${model}" does not support structured outputs. ` +
+        `Use a Claude 4.5 model (e.g., claude-sonnet-4-5, claude-opus-4-5, claude-haiku-4-5) ` +
+        `or set INFERENCE_OUTPUT_SCHEMA=plain to disable structured outputs.`,
+    );
+  }
+}
+
+/**
+ * Supported image media types for Anthropic's API.
+ */
+const SUPPORTED_MEDIA_TYPES = [
+  "image/jpeg",
+  "image/png",
+  "image/gif",
+  "image/webp",
+] as const;
+type AnthropicMediaType = (typeof SUPPORTED_MEDIA_TYPES)[number];
+
+/**
+ * Validate and convert a content type to Anthropic's expected media type.
+ * Throws if the content type is not supported.
+ */
+function toAnthropicMediaType(contentType: string): AnthropicMediaType {
+  if (!SUPPORTED_MEDIA_TYPES.includes(contentType as AnthropicMediaType)) {
+    throw new Error(
+      `Unsupported image type: "${contentType}". Anthropic supports: ${SUPPORTED_MEDIA_TYPES.join(", ")}`,
+    );
+  }
+  return contentType as AnthropicMediaType;
+}
+
+/**
+ * Extract text response and token count from an Anthropic message.
+ * Throws if no text content is found.
+ */
+function extractTextResponse(message: BetaMessage): InferenceResponse {
+  const textBlock = message.content.find((block) => block.type === "text");
+  if (!textBlock || textBlock.type !== "text") {
+    throw new Error("Got no text content from Anthropic");
+  }
+
+  const totalTokens =
+    (message.usage.input_tokens ?? 0) + (message.usage.output_tokens ?? 0);
+
+  return { response: textBlock.text, totalTokens };
+}
+
+/**
+ * Anthropic Inference Client
+ *
+ * Uses Claude's Messages API for text and vision inference.
+ * Supports structured outputs via output_format (beta feature).
+ * Only Claude 4.5+ models support structured outputs.
+ * Note: Anthropic does not provide an embeddings API.
+ */
+export class AnthropicInferenceClient implements InferenceClient {
+  private anthropic: Anthropic;
+
+  constructor() {
+    this.anthropic = new Anthropic({
+      apiKey: serverConfig.inference.anthropicApiKey,
+      ...(serverConfig.inference.anthropicBaseUrl && {
+        baseURL: serverConfig.inference.anthropicBaseUrl,
+      }),
+    });
+  }
+
+  async inferFromText(
+    prompt: string,
+    _opts: Partial<InferenceOptions>,
+  ): Promise<InferenceResponse> {
+    const optsWithDefaults: InferenceOptions = {
+      ...defaultInferenceOptions,
+      ..._opts,
+    };
+
+    const model = serverConfig.inference.textModel;
+    const useStructuredOutput =
+      !!optsWithDefaults.schema &&
+      serverConfig.inference.outputSchema !== "plain";
+
+    // Validate model supports structured outputs if needed
+    validateModel(model, useStructuredOutput);
+
+    // Build base request options
+    const baseOptions: MessageCreateParamsNonStreaming = {
+      model,
+      max_tokens: serverConfig.inference.maxOutputTokens,
+      messages: [{ role: "user", content: prompt }],
+    };
+
+    // Only add beta header and output_format when using structured outputs
+    if (useStructuredOutput) {
+      baseOptions.betas = [STRUCTURED_OUTPUTS_BETA];
+      baseOptions.output_format = zodToAnthropicFormat(
+        optsWithDefaults.schema!,
+      );
+    }
+
+    const message = await this.anthropic.beta.messages.create(baseOptions, {
+      signal: optsWithDefaults.abortSignal ?? undefined,
+    });
+
+    return extractTextResponse(message);
+  }
+
+  async inferFromImage(
+    prompt: string,
+    contentType: string,
+    image: string,
+    _opts: Partial<InferenceOptions>,
+  ): Promise<InferenceResponse> {
+    const optsWithDefaults: InferenceOptions = {
+      ...defaultInferenceOptions,
+      ..._opts,
+    };
+
+    const model = serverConfig.inference.imageModel;
+    const useStructuredOutput =
+      !!optsWithDefaults.schema &&
+      serverConfig.inference.outputSchema !== "plain";
+
+    // Validate model supports structured outputs if needed
+    validateModel(model, useStructuredOutput);
+
+    // Validate and convert content type to Anthropic's expected media type
+    const mediaType = toAnthropicMediaType(contentType);
+
+    // Build base request options
+    const baseOptions: MessageCreateParamsNonStreaming = {
+      model,
+      max_tokens: serverConfig.inference.maxOutputTokens,
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "image",
+              source: {
+                type: "base64",
+                media_type: mediaType,
+                data: image,
+              },
+            },
+            {
+              type: "text",
+              text: prompt,
+            },
+          ],
+        },
+      ],
+    };
+
+    // Only add beta header and output_format when using structured outputs
+    if (useStructuredOutput) {
+      baseOptions.betas = [STRUCTURED_OUTPUTS_BETA];
+      baseOptions.output_format = zodToAnthropicFormat(
+        optsWithDefaults.schema!,
+      );
+    }
+
+    const message = await this.anthropic.beta.messages.create(baseOptions, {
+      signal: optsWithDefaults.abortSignal ?? undefined,
+    });
+
+    return extractTextResponse(message);
+  }
+}
diff --git a/packages/shared/inference/factory.test.ts b/packages/shared/inference/factory.test.ts
new file mode 100644
index 000000000..e9fb6ba2a
--- /dev/null
+++ b/packages/shared/inference/factory.test.ts
@@ -0,0 +1,159 @@
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+import { AnthropicInferenceClient } from "./anthropic";
+import { EmbeddingClientFactory, InferenceClientFactory } from "./factory";
+import { GoogleEmbeddingClient, GoogleGeminiInferenceClient } from "./google";
+import { OllamaEmbeddingClient, OllamaInferenceClient } from "./ollama";
+import { OpenAIEmbeddingClient, OpenAIInferenceClient } from "./openai";
+
+// Mock all provider clients to avoid constructing real API clients
+vi.mock("./openai", () => ({
+  OpenAIInferenceClient: vi.fn(),
+  OpenAIEmbeddingClient: vi.fn(),
+}));
+
+vi.mock("./anthropic", () => ({
+  AnthropicInferenceClient: vi.fn(),
+}));
+
+vi.mock("./google", () => ({
+  GoogleGeminiInferenceClient: vi.fn(),
+  GoogleEmbeddingClient: vi.fn(),
+}));
+
+vi.mock("./ollama", () => ({
+  OllamaInferenceClient: vi.fn(),
+  OllamaEmbeddingClient: vi.fn(),
+}));
+
+// Mock serverConfig with proper types
+vi.mock("../config", () => ({
+  default: {
+    inference: {
+      provider: "openai" as "openai" | "anthropic" | "google" | "ollama" | null,
+    },
+    embedding: {
+      provider: "openai" as "openai" | "google" | "ollama" | null,
+    },
+  },
+}));
+
+describe("InferenceClientFactory", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("should return OpenAIInferenceClient when provider is openai", async () => {
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.provider = "openai";
+
+    const client = InferenceClientFactory.build();
+
+    expect(client).toBeInstanceOf(OpenAIInferenceClient);
+    expect(OpenAIInferenceClient).toHaveBeenCalledTimes(1);
+  });
+
+  it("should return AnthropicInferenceClient when provider is anthropic", async () => {
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.provider = "anthropic";
+
+    const client = InferenceClientFactory.build();
+
+    expect(client).toBeInstanceOf(AnthropicInferenceClient);
+    expect(AnthropicInferenceClient).toHaveBeenCalledTimes(1);
+  });
+
+  it("should return GoogleGeminiInferenceClient when provider is google", async () => {
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.provider = "google";
+
+    const client = InferenceClientFactory.build();
+
+    expect(client).toBeInstanceOf(GoogleGeminiInferenceClient);
+    expect(GoogleGeminiInferenceClient).toHaveBeenCalledTimes(1);
+  });
+
+  it("should return OllamaInferenceClient when provider is ollama", async () => {
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.provider = "ollama";
+
+    const client = InferenceClientFactory.build();
+
+    expect(client).toBeInstanceOf(OllamaInferenceClient);
+    expect(OllamaInferenceClient).toHaveBeenCalledTimes(1);
+  });
+
+  it("should return null when provider is not configured", async () => {
+    const { default: serverConfig } = await import("../config");
+    (serverConfig.inference as { provider: string | null }).provider = null;
+
+    const client = InferenceClientFactory.build();
+
+    expect(client).toBeNull();
+  });
+
+  it("should return null for unknown provider", async () => {
+    const { default: serverConfig } = await import("../config");
+    (serverConfig.inference as { provider: string | null }).provider =
+      "unknown";
+
+    const client = InferenceClientFactory.build();
+
+    expect(client).toBeNull();
+  });
+});
+
+describe("EmbeddingClientFactory", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("should return OpenAIEmbeddingClient when provider is openai", async () => {
+    const { default: serverConfig } = await import("../config");
+    serverConfig.embedding.provider = "openai";
+
+    const client = EmbeddingClientFactory.build();
+
+    expect(client).toBeInstanceOf(OpenAIEmbeddingClient);
+    expect(OpenAIEmbeddingClient).toHaveBeenCalledTimes(1);
+  });
+
+  it("should return GoogleEmbeddingClient when provider is google", async () => {
+    const { default: serverConfig } = await import("../config");
+    serverConfig.embedding.provider = "google";
+
+    const client = EmbeddingClientFactory.build();
+
+    expect(client).toBeInstanceOf(GoogleEmbeddingClient);
+    expect(GoogleEmbeddingClient).toHaveBeenCalledTimes(1);
+  });
+
+  it("should return OllamaEmbeddingClient when provider is ollama", async () => {
+    const { default: serverConfig } = await import("../config");
+    serverConfig.embedding.provider = "ollama";
+
+    const client = EmbeddingClientFactory.build();
+
+    expect(client).toBeInstanceOf(OllamaEmbeddingClient);
+    expect(OllamaEmbeddingClient).toHaveBeenCalledTimes(1);
+  });
+
+  it("should return null when provider is not configured", async () => {
+    const { default: serverConfig } = await import("../config");
+    (serverConfig.embedding as { provider: string | null }).provider = null;
+
+    const client = EmbeddingClientFactory.build();
+
+    expect(client).toBeNull();
+  });
+
+  it("should return null for anthropic (no embedding support)", async () => {
+    const { default: serverConfig } = await import("../config");
+    (serverConfig.embedding as { provider: string | null }).provider =
+      "anthropic";
+
+    const client = EmbeddingClientFactory.build();
+
+    expect(client).toBeNull();
+  });
+});
diff --git a/packages/shared/inference/factory.ts b/packages/shared/inference/factory.ts
new file mode 100644
index 000000000..c660f7784
--- /dev/null
+++ b/packages/shared/inference/factory.ts
@@ -0,0 +1,76 @@
+import type { EmbeddingClient, InferenceClient } from "./types";
+import serverConfig from "../config";
+import { AnthropicInferenceClient } from "./anthropic";
+import { GoogleEmbeddingClient, GoogleGeminiInferenceClient } from "./google";
+import { OllamaEmbeddingClient, OllamaInferenceClient } from "./ollama";
+import { OpenAIEmbeddingClient, OpenAIInferenceClient } from "./openai";
+
+/**
+ * Factory for creating inference clients based on configuration.
+ *
+ * Supported providers:
+ * - openai: OpenAI GPT models (Chat Completions + Responses API)
+ * - anthropic: Anthropic Claude models
+ * - google: Google Gemini models
+ * - ollama: Self-hosted local models
+ */
+export class InferenceClientFactory {
+  static build(): InferenceClient | null {
+    const provider = serverConfig.inference.provider;
+
+    switch (provider) {
+      case "openai":
+        return new OpenAIInferenceClient();
+      case "anthropic":
+        return new AnthropicInferenceClient();
+      case "google":
+        return new GoogleGeminiInferenceClient();
+      case "ollama":
+        return new OllamaInferenceClient();
+      case null:
+        return null;
+      default: {
+        // Compile-time exhaustiveness check - TypeScript will error if a valid case is missing
+        // At runtime, gracefully return null for any unexpected values
+        const _exhaustive: never = provider;
+        void _exhaustive;
+        return null;
+      }
+    }
+  }
+}
+
+/**
+ * Factory for creating embedding clients based on configuration.
+ *
+ * Supported providers:
+ * - openai: OpenAI text-embedding models
+ * - google: Google Gemini embedding models
+ * - ollama: Self-hosted embedding models
+ *
+ * Note: Anthropic does not provide embeddings. When using Anthropic for inference,
+ * configure a separate embedding provider (openai, google, or ollama).
+ */
+export class EmbeddingClientFactory {
+  static build(): EmbeddingClient | null {
+    const provider = serverConfig.embedding.provider;
+
+    switch (provider) {
+      case "openai":
+        return new OpenAIEmbeddingClient();
+      case "google":
+        return new GoogleEmbeddingClient();
+      case "ollama":
+        return new OllamaEmbeddingClient();
+      case null:
+        return null;
+      default: {
+        // Compile-time exhaustiveness check - TypeScript will error if a valid case is missing
+        // At runtime, gracefully return null for any unexpected values (e.g., anthropic for embeddings)
+        const _exhaustive: never = provider;
+        void _exhaustive;
+        return null;
+      }
+    }
+  }
+}
diff --git a/packages/shared/inference/google.test.ts b/packages/shared/inference/google.test.ts
new file mode 100644
index 000000000..0e6936eff
--- /dev/null
+++ b/packages/shared/inference/google.test.ts
@@ -0,0 +1,316 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { z } from "zod";
+
+import { GoogleEmbeddingClient, GoogleGeminiInferenceClient } from "./google";
+
+// Mock the Google Gen AI SDK
+const mockGenerateContent = vi.fn();
+const mockEmbedContent = vi.fn();
+
+vi.mock("@google/genai", () => ({
+  GoogleGenAI: vi.fn().mockImplementation(() => ({
+    models: {
+      generateContent: mockGenerateContent,
+      embedContent: mockEmbedContent,
+    },
+  })),
+  Type: {
+    STRING: "STRING",
+    NUMBER: "NUMBER",
+    OBJECT: "OBJECT",
+    ARRAY: "ARRAY",
+    BOOLEAN: "BOOLEAN",
+  },
+}));
+
+// Mock serverConfig
+vi.mock("../config", () => ({
+  default: {
+    inference: {
+      provider: "google",
+      geminiApiKey: "test-gemini-key",
+      textModel: "gemini-2.5-flash",
+      imageModel: "gemini-2.5-flash",
+      maxOutputTokens: 2048,
+      outputSchema: "structured",
+    },
+    embedding: {
+      provider: "google",
+      textModel: "text-embedding-004",
+    },
+  },
+}));
+
+describe("GoogleGeminiInferenceClient", () => {
+  let client: GoogleGeminiInferenceClient;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    client = new GoogleGeminiInferenceClient();
+  });
+
+  describe("inferFromText", () => {
+    it("should call generateContent with correct parameters", async () => {
+      mockGenerateContent.mockResolvedValueOnce({
+        text: "test response",
+        usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 20 },
+      });
+
+      const result = await client.inferFromText("test prompt", {});
+
+      expect(mockGenerateContent).toHaveBeenCalledTimes(1);
+      expect(result.response).toBe("test response");
+      expect(result.totalTokens).toBe(30);
+    });
+
+    it("should pass prompt as contents", async () => {
+      mockGenerateContent.mockResolvedValueOnce({
+        text: "response",
+        usageMetadata: { promptTokenCount: 5, candidatesTokenCount: 10 },
+      });
+
+      await client.inferFromText("my test prompt", {});
+
+      expect(mockGenerateContent).toHaveBeenCalledWith(
+        expect.objectContaining({
+          contents: "my test prompt",
+        }),
+      );
+    });
+
+    it("should include model in request", async () => {
+      mockGenerateContent.mockResolvedValueOnce({
+        text: "response",
+        usageMetadata: { promptTokenCount: 5, candidatesTokenCount: 10 },
+      });
+
+      await client.inferFromText("prompt", {});
+
+      expect(mockGenerateContent).toHaveBeenCalledWith(
+        expect.objectContaining({
+          model: "gemini-2.5-flash",
+        }),
+      );
+    });
+
+    it("should set JSON response format with maxOutputTokens", async () => {
+      mockGenerateContent.mockResolvedValueOnce({
+        text: '{"name": "test"}',
+        usageMetadata: { promptTokenCount: 5, candidatesTokenCount: 10 },
+      });
+
+      await client.inferFromText("prompt", {});
+
+      expect(mockGenerateContent).toHaveBeenCalledWith(
+        expect.objectContaining({
+          config: expect.objectContaining({
+            maxOutputTokens: 2048,
+            responseMimeType: "application/json",
+          }),
+        }),
+      );
+    });
+
+    it("should include JSON schema when schema is provided", async () => {
+      mockGenerateContent.mockResolvedValueOnce({
+        text: '{"name": "test"}',
+        usageMetadata: { promptTokenCount: 5, candidatesTokenCount: 10 },
+      });
+
+      const schema = z.object({ name: z.string() });
+      await client.inferFromText("prompt", { schema });
+
+      expect(mockGenerateContent).toHaveBeenCalledWith(
+        expect.objectContaining({
+          config: expect.objectContaining({
+            responseJsonSchema: expect.objectContaining({
+              type: "object",
+              properties: expect.objectContaining({
+                name: expect.objectContaining({ type: "string" }),
+              }),
+            }),
+          }),
+        }),
+      );
+    });
+
+    it("should throw error when no text content returned", async () => {
+      mockGenerateContent.mockResolvedValueOnce({
+        text: null,
+        usageMetadata: { promptTokenCount: 5, candidatesTokenCount: 0 },
+      });
+
+      await expect(client.inferFromText("prompt", {})).rejects.toThrow(
+        "Got no text content from Google Gemini",
+      );
+    });
+
+    it("should pass abort signal in config", async () => {
+      mockGenerateContent.mockResolvedValueOnce({
+        text: "response",
+        usageMetadata: { promptTokenCount: 5, candidatesTokenCount: 10 },
+      });
+
+      const controller = new AbortController();
+      await client.inferFromText("prompt", {
+        abortSignal: controller.signal,
+      });
+
+      expect(mockGenerateContent).toHaveBeenCalledWith(
+        expect.objectContaining({
+          config: expect.objectContaining({
+            abortSignal: controller.signal,
+          }),
+        }),
+      );
+    });
+  });
+
+  describe("inferFromImage", () => {
+    it("should include image as inline data", async () => {
+      mockGenerateContent.mockResolvedValueOnce({
+        text: "image description",
+        usageMetadata: { promptTokenCount: 100, candidatesTokenCount: 50 },
+      });
+
+      await client.inferFromImage(
+        "describe this image",
+        "image/png",
+        "base64encodedimage",
+        {},
+      );
+
+      expect(mockGenerateContent).toHaveBeenCalledWith(
+        expect.objectContaining({
+          contents: [
+            {
+              inlineData: {
+                mimeType: "image/png",
+                data: "base64encodedimage",
+              },
+            },
+            "describe this image",
+          ],
+        }),
+      );
+    });
+
+    it("should return response and token count", async () => {
+      mockGenerateContent.mockResolvedValueOnce({
+        text: "A colorful parrot",
+        usageMetadata: { promptTokenCount: 150, candidatesTokenCount: 30 },
+      });
+
+      const result = await client.inferFromImage(
+        "describe",
+        "image/jpeg",
+        "imagedata",
+        {},
+      );
+
+      expect(result.response).toBe("A colorful parrot");
+      expect(result.totalTokens).toBe(180);
+    });
+
+    it("should handle missing usage metadata gracefully", async () => {
+      mockGenerateContent.mockResolvedValueOnce({
+        text: "response",
+        usageMetadata: undefined,
+      });
+
+      const result = await client.inferFromImage(
+        "describe",
+        "image/jpeg",
+        "data",
+        {},
+      );
+
+      expect(result.totalTokens).toBe(0);
+    });
+  });
+});
+
+describe("GoogleGeminiInferenceClient with plain output", () => {
+  let client: GoogleGeminiInferenceClient;
+
+  beforeEach(async () => {
+    vi.clearAllMocks();
+
+    // Set output schema to plain text
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.outputSchema = "plain";
+
+    client = new GoogleGeminiInferenceClient();
+  });
+
+  afterEach(async () => {
+    // Restore original config value to prevent test pollution
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.outputSchema = "structured";
+  });
+
+  it("should use text/plain mime type when outputSchema is plain", async () => {
+    mockGenerateContent.mockResolvedValueOnce({
+      text: "plain text response",
+      usageMetadata: { promptTokenCount: 5, candidatesTokenCount: 10 },
+    });
+
+    await client.inferFromText("prompt", {});
+
+    expect(mockGenerateContent).toHaveBeenCalledWith(
+      expect.objectContaining({
+        config: expect.objectContaining({
+          responseMimeType: "text/plain",
+        }),
+      }),
+    );
+  });
+});
+
+describe("GoogleEmbeddingClient", () => {
+  let client: GoogleEmbeddingClient;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    client = new GoogleEmbeddingClient();
+  });
+
+  it("should generate embeddings for text inputs in batch", async () => {
+    // Mock single batch call returning multiple embeddings
+    mockEmbedContent.mockResolvedValueOnce({
+      embeddings: [{ values: [0.1, 0.2, 0.3] }, { values: [0.4, 0.5, 0.6] }],
+    });
+
+    const result = await client.generateEmbeddingFromText(["hello", "world"]);
+
+    expect(mockEmbedContent).toHaveBeenCalledTimes(1);
+    expect(result.embeddings).toEqual([
+      [0.1, 0.2, 0.3],
+      [0.4, 0.5, 0.6],
+    ]);
+  });
+
+  it("should pass all inputs in a single batch request", async () => {
+    mockEmbedContent.mockResolvedValueOnce({
+      embeddings: [{ values: [0.1] }, { values: [0.2] }, { values: [0.3] }],
+    });
+
+    await client.generateEmbeddingFromText(["a", "b", "c"]);
+
+    expect(mockEmbedContent).toHaveBeenCalledTimes(1);
+    expect(mockEmbedContent).toHaveBeenCalledWith({
+      model: "text-embedding-004",
+      contents: ["a", "b", "c"],
+    });
+  });
+
+  it("should handle empty embeddings array", async () => {
+    mockEmbedContent.mockResolvedValueOnce({
+      embeddings: [],
+    });
+
+    const result = await client.generateEmbeddingFromText(["test"]);
+
+    expect(result.embeddings).toEqual([]);
+  });
+});
diff --git a/packages/shared/inference/google.ts b/packages/shared/inference/google.ts
new file mode 100644
index 000000000..fba2a4b20
--- /dev/null
+++ b/packages/shared/inference/google.ts
@@ -0,0 +1,202 @@
+import { GoogleGenAI, Type } from "@google/genai";
+import { zodToJsonSchema } from "zod-to-json-schema";
+
+import type {
+  EmbeddingClient,
+  EmbeddingResponse,
+  InferenceClient,
+  InferenceOptions,
+  InferenceResponse,
+} from "./types";
+import serverConfig from "../config";
+import { defaultInferenceOptions } from "./types";
+
+/**
+ * Maximum number of texts per batch for Google's embedding API.
+ */
+const EMBEDDING_BATCH_SIZE = 100;
+
+/**
+ * Build generation config for Gemini API requests.
+ * Handles output format (plain text, JSON, or structured JSON schema).
+ */
+function buildGenerationConfig(
+  opts: InferenceOptions,
+): Record<string, unknown> {
+  const config: Record<string, unknown> = {
+    maxOutputTokens: serverConfig.inference.maxOutputTokens,
+  };
+
+  // Configure response format based on outputSchema setting
+  if (serverConfig.inference.outputSchema === "plain") {
+    config.responseMimeType = "text/plain";
+  } else {
+    config.responseMimeType = "application/json";
+
+    // If a Zod schema is provided, convert it to JSON schema for structured output
+    if (opts.schema) {
+      config.responseJsonSchema = zodToJsonSchema(opts.schema, {
+        $refStrategy: "none",
+      });
+    }
+  }
+
+  return config;
+}
+
+/**
+ * Create a GoogleGenAI client instance.
+ * Validates API key and applies base URL if configured.
+ */
+function createGoogleClient(): GoogleGenAI {
+  const apiKey = serverConfig.inference.geminiApiKey;
+  if (!apiKey) {
+    throw new Error(
+      "Gemini API key is not configured. Set GEMINI_API_KEY environment variable.",
+    );
+  }
+
+  return new GoogleGenAI({
+    apiKey,
+    ...(serverConfig.inference.geminiBaseUrl && {
+      httpOptions: { baseUrl: serverConfig.inference.geminiBaseUrl },
+    }),
+  });
+}
+
+/**
+ * Google Gemini Inference Client
+ *
+ * Uses Google's unified Gen AI SDK for text and vision inference.
+ * Supports Gemini 2.5 and 3.x models with structured output via JSON schema.
+ */
+export class GoogleGeminiInferenceClient implements InferenceClient {
+  private ai: GoogleGenAI;
+
+  constructor() {
+    this.ai = createGoogleClient();
+  }
+
+  async inferFromText(
+    prompt: string,
+    _opts: Partial<InferenceOptions>,
+  ): Promise<InferenceResponse> {
+    const optsWithDefaults: InferenceOptions = {
+      ...defaultInferenceOptions,
+      ..._opts,
+    };
+
+    const generationConfig = buildGenerationConfig(optsWithDefaults);
+
+    const result = await this.ai.models.generateContent({
+      model: serverConfig.inference.textModel,
+      contents: prompt,
+      config: {
+        ...generationConfig,
+        abortSignal: optsWithDefaults.abortSignal,
+      },
+    });
+
+    const response = result.text;
+    if (!response) {
+      throw new Error("Got no text content from Google Gemini");
+    }
+
+    const totalTokens =
+      (result.usageMetadata?.promptTokenCount ?? 0) +
+      (result.usageMetadata?.candidatesTokenCount ?? 0);
+
+    return { response, totalTokens };
+  }
+
+  async inferFromImage(
+    prompt: string,
+    contentType: string,
+    image: string,
+    _opts: Partial<InferenceOptions>,
+  ): Promise<InferenceResponse> {
+    const optsWithDefaults: InferenceOptions = {
+      ...defaultInferenceOptions,
+      ..._opts,
+    };
+
+    const generationConfig = buildGenerationConfig(optsWithDefaults);
+
+    const result = await this.ai.models.generateContent({
+      model: serverConfig.inference.imageModel,
+      contents: [
+        {
+          inlineData: {
+            mimeType: contentType,
+            data: image,
+          },
+        },
+        prompt,
+      ],
+      config: {
+        ...generationConfig,
+        abortSignal: optsWithDefaults.abortSignal,
+      },
+    });
+
+    const response = result.text;
+    if (!response) {
+      throw new Error("Got no text content from Google Gemini");
+    }
+
+    const totalTokens =
+      (result.usageMetadata?.promptTokenCount ?? 0) +
+      (result.usageMetadata?.candidatesTokenCount ?? 0);
+
+    return { response, totalTokens };
+  }
+}
+
+/**
+ * Google Gemini Embedding Client
+ *
+ * Uses Google's unified Gen AI SDK for text embeddings.
+ * Recommended model: gemini-embedding-001 (3072 dimensions, supports 128-3072).
+ * Handles batching automatically for inputs larger than 100 texts.
+ */
+export class GoogleEmbeddingClient implements EmbeddingClient {
+  private ai: GoogleGenAI;
+
+  constructor() {
+    this.ai = createGoogleClient();
+  }
+
+  async generateEmbeddingFromText(
+    inputs: string[],
+  ): Promise<EmbeddingResponse> {
+    // Google's embedding API has a limit of 100 texts per batch
+    // Process in chunks if necessary
+    if (inputs.length <= EMBEDDING_BATCH_SIZE) {
+      return this.embedBatch(inputs);
+    }
+
+    // Process in batches and combine results
+    const allEmbeddings: number[][] = [];
+    for (let i = 0; i < inputs.length; i += EMBEDDING_BATCH_SIZE) {
+      const batch = inputs.slice(i, i + EMBEDDING_BATCH_SIZE);
+      const result = await this.embedBatch(batch);
+      allEmbeddings.push(...result.embeddings);
+    }
+
+    return { embeddings: allEmbeddings };
+  }
+
+  private async embedBatch(inputs: string[]): Promise<EmbeddingResponse> {
+    const result = await this.ai.models.embedContent({
+      model: serverConfig.embedding.textModel,
+      contents: inputs,
+    });
+
+    const embeddings = (result.embeddings ?? []).map((e) => e.values ?? []);
+
+    return { embeddings };
+  }
+}
+
+// Re-export Type enum for use in schema definitions if needed
+export { Type as GeminiSchemaType };
diff --git a/packages/shared/inference/index.ts b/packages/shared/inference/index.ts
new file mode 100644
index 000000000..99142741a
--- /dev/null
+++ b/packages/shared/inference/index.ts
@@ -0,0 +1,35 @@
+/**
+ * Inference Module
+ *
+ * Provides a unified interface for AI inference and embeddings across multiple providers:
+ * - OpenAI (GPT-4, GPT-5, with Chat Completions and Responses API support)
+ * - Anthropic (Claude)
+ * - Google (Gemini)
+ * - Ollama (self-hosted)
+ *
+ * Usage:
+ *   import { InferenceClientFactory, EmbeddingClientFactory } from "@karakeep/shared/inference";
+ *
+ *   const inferenceClient = InferenceClientFactory.build();
+ *   const embeddingClient = EmbeddingClientFactory.build();
+ */
+
+// Types
+export type {
+  InferenceClient,
+  EmbeddingClient,
+  InferenceResponse,
+  EmbeddingResponse,
+  InferenceOptions,
+} from "./types";
+
+export { defaultInferenceOptions } from "./types";
+
+// Factories (main entry point for most consumers)
+export { InferenceClientFactory, EmbeddingClientFactory } from "./factory";
+
+// Individual clients (for advanced usage or testing)
+export { OpenAIInferenceClient, OpenAIEmbeddingClient } from "./openai";
+export { AnthropicInferenceClient } from "./anthropic";
+export { GoogleGeminiInferenceClient, GoogleEmbeddingClient } from "./google";
+export { OllamaInferenceClient, OllamaEmbeddingClient } from "./ollama";
diff --git a/packages/shared/inference/live-test.ts b/packages/shared/inference/live-test.ts
new file mode 100644
index 000000000..6f3573bd0
--- /dev/null
+++ b/packages/shared/inference/live-test.ts
@@ -0,0 +1,593 @@
+/**
+ * Live test script for inference providers.
+ * Run with: pnpm inference:live-test
+ *
+ * Set env vars before running:
+ *   OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY
+ */
+
+import { z } from "zod";
+
+/**
+ * Actual schema used by the app for tagging (from tagging.ts)
+ */
+const openAIResponseSchema = z.object({
+  tags: z.array(z.string()),
+});
+
+/**
+ * Realistic tagging prompt similar to what the app sends
+ */
+function buildRealisticTaggingPrompt(content: string): string {
+  return `
+You are an expert whose responsibility is to help with automatic tagging for a read-it-later/bookmarking app.
+Please analyze the TEXT_CONTENT below and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are:
+- Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres.
+- The tags must be in english.
+- If the tag is not generic enough, don't include it.
+- The content can include text for cookie consent and privacy policy, ignore those while tagging.
+- Aim for 3-5 tags.
+- If there are no good tags, leave the array empty.
+
+<TEXT_CONTENT>
+${content}
+</TEXT_CONTENT>
+You must respond in JSON with the key "tags" and the value is an array of string tags.`;
+}
+
+/**
+ * Realistic image prompt similar to what the app sends (from prompts.ts)
+ */
+function buildRealisticImagePrompt(): string {
+  return `
+You are an expert whose responsibility is to help with automatic text tagging for a read-it-later/bookmarking app.
+Please analyze the attached image and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are:
+- Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres.
+- The tags must be in english.
+- If the tag is not generic enough, don't include it.
+- Aim for 10-15 tags.
+- If there are no good tags, don't emit any.
+You must respond in valid JSON with the key "tags" and the value is list of tags. Don't wrap the response in a markdown code.`;
+}
+
+/**
+ * A small test image (1x1 red pixel PNG) for image inference testing
+ */
+const TEST_IMAGE_BASE64 =
+  "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==";
+
+/**
+ * Realistic bookmark content for testing
+ */
+const REALISTIC_BOOKMARK_CONTENT = `
+URL: https://example.com/typescript-best-practices
+Title: 10 TypeScript Best Practices Every Developer Should Know
+Description: Learn essential TypeScript patterns and practices to write cleaner, more maintainable code.
+Content: TypeScript has become the go-to language for modern web development. In this article, we'll explore
+10 best practices that will help you write better TypeScript code. From strict null checks to proper type
+inference, these patterns will make your codebase more robust and easier to maintain. We'll cover topics
+like avoiding 'any', using discriminated unions, leveraging utility types, and more.
+`;
+
+interface TestResult {
+  provider: string;
+  test: string;
+  status: "pass" | "fail" | "skip";
+  duration?: number;
+  response?: string;
+  error?: string;
+}
+
+const results: TestResult[] = [];
+
+async function runTest(
+  provider: string,
+  testName: string,
+  fn: () => Promise<unknown>,
+): Promise<void> {
+  const start = Date.now();
+  try {
+    const result = await fn();
+    const duration = Date.now() - start;
+    results.push({
+      provider,
+      test: testName,
+      status: "pass",
+      duration,
+      response:
+        typeof result === "string"
+          ? result.slice(0, 100)
+          : JSON.stringify(result).slice(0, 100),
+    });
+    console.log(`✅ ${provider}/${testName} (${duration}ms)`);
+  } catch (e) {
+    const duration = Date.now() - start;
+    const error = e instanceof Error ? e.message : String(e);
+    results.push({
+      provider,
+      test: testName,
+      status: "fail",
+      duration,
+      error,
+    });
+    console.log(`❌ ${provider}/${testName} (${duration}ms): ${error}`);
+  }
+}
+
+async function testOpenAI() {
+  if (!process.env.OPENAI_API_KEY) {
+    results.push({ provider: "openai", test: "all", status: "skip" });
+    console.log("⏭️  OpenAI: skipped (no API key)");
+    return;
+  }
+
+  const { default: OpenAI } = await import("openai");
+  const client = new OpenAI();
+
+  // Test 1: Basic chat completion (Chat Completions API)
+  await runTest("openai", "chat-completion", async () => {
+    const response = await client.chat.completions.create({
+      model: "gpt-5.2",
+      messages: [{ role: "user", content: "Say 'hello' and nothing else." }],
+      max_completion_tokens: 10,
+    });
+    return response.choices[0]?.message?.content;
+  });
+
+  // Test 2: Structured output with realistic tagging prompt (Chat Completions API)
+  await runTest("openai", "structured-tagging", async () => {
+    const { zodResponseFormat } = await import("openai/helpers/zod");
+    const prompt = buildRealisticTaggingPrompt(REALISTIC_BOOKMARK_CONTENT);
+    const response = await client.chat.completions.create({
+      model: "gpt-5.2",
+      messages: [{ role: "user", content: prompt }],
+      response_format: zodResponseFormat(openAIResponseSchema, "tagging"),
+    });
+    const result = JSON.parse(response.choices[0]?.message?.content || "{}");
+
+    // Validate response structure matches what app expects
+    if (!Array.isArray(result.tags)) {
+      throw new Error("Response missing 'tags' array");
+    }
+    if (result.tags.length === 0) {
+      throw new Error("Expected at least one tag");
+    }
+    return result;
+  });
+
+  // Test 3: Embeddings
+  await runTest("openai", "embeddings", async () => {
+    const response = await client.embeddings.create({
+      model: "text-embedding-3-small",
+      input: ["hello world", "test embedding"],
+    });
+    return `${response.data.length} embeddings, dim=${response.data[0]?.embedding.length}`;
+  });
+
+  // Test 4: Responses API (new API for GPT-5/o-series)
+  await runTest("openai", "responses-api", async () => {
+    const response = await client.responses.create({
+      model: "gpt-5.2", // Works with any model
+      input: "Say 'responses api works' and nothing else.",
+    });
+    // Get text from output
+    const textItem = response.output.find((item) => item.type === "message");
+    if (textItem?.type === "message") {
+      const textContent = textItem.content.find(
+        (c) => c.type === "output_text",
+      );
+      if (textContent?.type === "output_text") {
+        return textContent.text;
+      }
+    }
+    return response;
+  });
+
+  // Test 5: Verify Responses API model detection patterns exist in code
+  await runTest("openai", "responses-model-check", async () => {
+    const fs = await import("fs");
+    const url = await import("url");
+    const path = await import("path");
+
+    // Get the path to the OpenAI inference client
+    const __dirname = path.dirname(url.fileURLToPath(import.meta.url));
+    const openaiPath = path.join(__dirname, "openai.ts");
+    const content = fs.readFileSync(openaiPath, "utf-8");
+
+    // Check that we detect GPT-5, o1, o3, o4 models for Responses API
+    const hasGpt5 = content.includes('model.startsWith("gpt-5")');
+    const hasO1 = content.includes('model.startsWith("o1")');
+    const hasO3 = content.includes('model.startsWith("o3")');
+    const hasO4 = content.includes('model.startsWith("o4")');
+
+    if (!hasGpt5 || !hasO1 || !hasO3 || !hasO4) {
+      throw new Error(
+        `Missing model prefixes: gpt-5=${hasGpt5}, o1=${hasO1}, o3=${hasO3}, o4=${hasO4}`,
+      );
+    }
+
+    return "Responses API model detection patterns verified";
+  });
+
+  // Test 6: Image inference (like image bookmark tagging)
+  await runTest("openai", "image-tagging", async () => {
+    const { zodResponseFormat } = await import("openai/helpers/zod");
+    const response = await client.chat.completions.create({
+      model: "gpt-5.2",
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "image_url",
+              image_url: {
+                url: `data:image/png;base64,${TEST_IMAGE_BASE64}`,
+                detail: "low",
+              },
+            },
+            { type: "text", text: buildRealisticImagePrompt() },
+          ],
+        },
+      ],
+      response_format: zodResponseFormat(openAIResponseSchema, "tagging"),
+    });
+    const result = JSON.parse(response.choices[0]?.message?.content || "{}");
+    if (!Array.isArray(result.tags)) {
+      throw new Error("Response missing 'tags' array");
+    }
+    return result;
+  });
+}
+
+async function testAnthropic() {
+  if (!process.env.ANTHROPIC_API_KEY) {
+    results.push({ provider: "anthropic", test: "all", status: "skip" });
+    console.log("⏭️  Anthropic: skipped (no API key)");
+    return;
+  }
+
+  const { default: Anthropic } = await import("@anthropic-ai/sdk");
+  const client = new Anthropic();
+
+  // Test 1: Basic message with Claude 4.5
+  await runTest("anthropic", "message-4.5", async () => {
+    const response = await client.messages.create({
+      model: "claude-sonnet-4-5-20250929",
+      max_tokens: 50,
+      messages: [{ role: "user", content: "Say 'hello' and nothing else." }],
+    });
+    const block = response.content[0];
+    return block?.type === "text" ? block.text : "";
+  });
+
+  // Test 2: Structured output with realistic tagging prompt (Claude 4.5 only)
+  await runTest("anthropic", "structured-tagging-4.5", async () => {
+    const { zodToJsonSchema } = await import("zod-to-json-schema");
+    const rawSchema = zodToJsonSchema(openAIResponseSchema, {
+      $refStrategy: "none",
+    });
+    // Remove $schema field - Anthropic doesn't accept it
+    const { $schema, ...jsonSchema } = rawSchema as Record<string, unknown>;
+    void $schema;
+
+    const prompt = buildRealisticTaggingPrompt(REALISTIC_BOOKMARK_CONTENT);
+    const response = await client.beta.messages.create({
+      model: "claude-sonnet-4-5-20250929",
+      max_tokens: 200,
+      betas: ["structured-outputs-2025-11-13"],
+      messages: [{ role: "user", content: prompt }],
+      output_format: {
+        type: "json_schema",
+        schema: jsonSchema,
+      },
+    });
+    const block = response.content.find((b) => b.type === "text");
+    const result = block?.type === "text" ? JSON.parse(block.text) : {};
+
+    // Validate response structure matches what app expects
+    if (!Array.isArray(result.tags)) {
+      throw new Error("Response missing 'tags' array");
+    }
+    if (result.tags.length === 0) {
+      throw new Error("Expected at least one tag");
+    }
+    return result;
+  });
+
+  // Test 3: Verify model validation in our client
+  await runTest("anthropic", "model-validation", async () => {
+    const { supportsStructuredOutputs } = await import("./anthropic.js");
+
+    // Should support 4.5 models
+    if (!supportsStructuredOutputs("claude-sonnet-4-5-20250929")) {
+      throw new Error("Should support claude-sonnet-4-5-20250929");
+    }
+    if (!supportsStructuredOutputs("claude-haiku-4-5")) {
+      throw new Error("Should support claude-haiku-4-5");
+    }
+    if (!supportsStructuredOutputs("claude-opus-4-5")) {
+      throw new Error("Should support claude-opus-4-5");
+    }
+
+    // Should NOT support older models
+    if (supportsStructuredOutputs("claude-sonnet-4-20250514")) {
+      throw new Error("Should NOT support claude-sonnet-4-20250514");
+    }
+    if (supportsStructuredOutputs("claude-3-5-sonnet-20241022")) {
+      throw new Error("Should NOT support claude-3-5-sonnet-20241022");
+    }
+
+    return "Model validation working correctly";
+  });
+
+  // Test 4: Using model alias (claude-sonnet-4-5 instead of dated version)
+  await runTest("anthropic", "model-alias", async () => {
+    const response = await client.messages.create({
+      model: "claude-sonnet-4-5", // alias
+      max_tokens: 30,
+      messages: [{ role: "user", content: "Reply with just 'ok'" }],
+    });
+    const block = response.content[0];
+    return block?.type === "text" ? block.text : "";
+  });
+
+  // Test 5: Haiku 4.5 for faster/cheaper option
+  await runTest("anthropic", "haiku-4.5", async () => {
+    const response = await client.messages.create({
+      model: "claude-haiku-4-5",
+      max_tokens: 30,
+      messages: [{ role: "user", content: "Reply with just 'fast'" }],
+    });
+    const block = response.content[0];
+    return block?.type === "text" ? block.text : "";
+  });
+
+  // Test 6: Image inference (like image bookmark tagging)
+  await runTest("anthropic", "image-tagging-4.5", async () => {
+    const { zodToJsonSchema } = await import("zod-to-json-schema");
+    const rawSchema = zodToJsonSchema(openAIResponseSchema, {
+      $refStrategy: "none",
+    });
+    const { $schema, ...jsonSchema } = rawSchema as Record<string, unknown>;
+    void $schema;
+
+    const response = await client.beta.messages.create({
+      model: "claude-sonnet-4-5-20250929",
+      max_tokens: 200,
+      betas: ["structured-outputs-2025-11-13"],
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "image",
+              source: {
+                type: "base64",
+                media_type: "image/png",
+                data: TEST_IMAGE_BASE64,
+              },
+            },
+            { type: "text", text: buildRealisticImagePrompt() },
+          ],
+        },
+      ],
+      output_format: {
+        type: "json_schema",
+        schema: jsonSchema,
+      },
+    });
+    const block = response.content.find((b) => b.type === "text");
+    const result = block?.type === "text" ? JSON.parse(block.text) : {};
+    if (!Array.isArray(result.tags)) {
+      throw new Error("Response missing 'tags' array");
+    }
+    return result;
+  });
+}
+
+async function testGoogle() {
+  if (!process.env.GEMINI_API_KEY) {
+    results.push({ provider: "google", test: "all", status: "skip" });
+    console.log("⏭️  Google: skipped (no API key)");
+    return;
+  }
+
+  const { GoogleGenAI } = await import("@google/genai");
+  const client = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+  // Test 1: Basic generation
+  await runTest("google", "generate", async () => {
+    const response = await client.models.generateContent({
+      model: "gemini-3-flash-preview",
+      contents: "Say 'hello' and nothing else.",
+    });
+    return response.text;
+  });
+
+  // Test 2: Structured output with realistic tagging prompt
+  await runTest("google", "structured-tagging", async () => {
+    const { zodToJsonSchema } = await import("zod-to-json-schema");
+    const prompt = buildRealisticTaggingPrompt(REALISTIC_BOOKMARK_CONTENT);
+    const response = await client.models.generateContent({
+      model: "gemini-3-flash-preview",
+      contents: prompt,
+      config: {
+        responseMimeType: "application/json",
+        responseJsonSchema: zodToJsonSchema(openAIResponseSchema, {
+          $refStrategy: "none",
+        }),
+      },
+    });
+    const result = JSON.parse(response.text ?? "{}");
+
+    // Validate response structure matches what app expects
+    if (!Array.isArray(result.tags)) {
+      throw new Error("Response missing 'tags' array");
+    }
+    if (result.tags.length === 0) {
+      throw new Error("Expected at least one tag");
+    }
+    return result;
+  });
+
+  // Test 3: Embeddings
+  await runTest("google", "embeddings", async () => {
+    const response = await client.models.embedContent({
+      model: "gemini-embedding-001",
+      contents: ["hello world", "test embedding"],
+    });
+    return `${response.embeddings?.length} embeddings, dim=${response.embeddings?.[0]?.values?.length}`;
+  });
+
+  // Test 4: Image inference (like image bookmark tagging)
+  await runTest("google", "image-tagging", async () => {
+    const { zodToJsonSchema } = await import("zod-to-json-schema");
+    const response = await client.models.generateContent({
+      model: "gemini-3-flash-preview",
+      contents: [
+        {
+          inlineData: {
+            mimeType: "image/png",
+            data: TEST_IMAGE_BASE64,
+          },
+        },
+        buildRealisticImagePrompt(),
+      ],
+      config: {
+        responseMimeType: "application/json",
+        responseJsonSchema: zodToJsonSchema(openAIResponseSchema, {
+          $refStrategy: "none",
+        }),
+      },
+    });
+    const result = JSON.parse(response.text ?? "{}");
+    if (!Array.isArray(result.tags)) {
+      throw new Error("Response missing 'tags' array");
+    }
+    return result;
+  });
+}
+
+/**
+ * Test our ACTUAL InferenceClient implementations - the same code path the app uses.
+ * This is the real integration test that verifies the factory and client classes work.
+ */
+async function testInferenceClientImplementations() {
+  // Import serverConfig and factory
+  const { default: serverConfig } = await import("../config.js");
+  const { InferenceClientFactory } = await import("./index.js");
+
+  const provider = serverConfig.inference.provider;
+  const textModel = serverConfig.inference.textModel;
+
+  if (!provider) {
+    console.log("⏭️  No INFERENCE_PROVIDER configured, skipping client tests");
+    return;
+  }
+
+  console.log(`   Provider: ${provider}, Model: ${textModel}`);
+
+  // Build the client via factory (exactly like the app does)
+  const client = InferenceClientFactory.build();
+
+  if (!client) {
+    console.log(`⏭️  Factory returned null for provider: ${provider}`);
+    return;
+  }
+
+  // Test 1: Basic inferFromText (no schema - like summarization)
+  await runTest(`factory-${provider}`, "inferFromText-plain", async () => {
+    const result = await client.inferFromText(
+      "Summarize in one sentence: TypeScript is a typed superset of JavaScript.",
+      { schema: undefined },
+    );
+
+    if (!result.response || result.response.length < 10) {
+      throw new Error(`Response too short: ${result.response}`);
+    }
+
+    return {
+      response: result.response.slice(0, 100),
+      tokens: result.totalTokens,
+    };
+  });
+
+  // Test 2: inferFromText with schema (like tagging)
+  await runTest(`factory-${provider}`, "inferFromText-tagging", async () => {
+    const prompt = buildRealisticTaggingPrompt(REALISTIC_BOOKMARK_CONTENT);
+
+    const result = await client.inferFromText(prompt, {
+      schema: openAIResponseSchema,
+    });
+
+    // Parse exactly like the app does in tagging.ts
+    const parsed = openAIResponseSchema.parse(JSON.parse(result.response));
+
+    if (parsed.tags.length === 0) {
+      throw new Error("Expected at least one tag");
+    }
+
+    return {
+      tags: parsed.tags,
+      tokens: result.totalTokens,
+    };
+  });
+
+  // Test 3: inferFromImage with schema (like image bookmark tagging)
+  await runTest(`factory-${provider}`, "inferFromImage-tagging", async () => {
+    const result = await client.inferFromImage(
+      buildRealisticImagePrompt(),
+      "image/png",
+      TEST_IMAGE_BASE64,
+      { schema: openAIResponseSchema },
+    );
+
+    // Parse exactly like the app does in tagging.ts
+    const parsed = openAIResponseSchema.parse(JSON.parse(result.response));
+
+    return {
+      tags: parsed.tags,
+      tokens: result.totalTokens,
+    };
+  });
+}
+
+async function main() {
+  console.log("🧪 Live Inference Provider Tests\n");
+  console.log("================================\n");
+
+  console.log("--- Direct SDK Tests ---\n");
+
+  await testOpenAI();
+  await testAnthropic();
+  await testGoogle();
+
+  // Test our InferenceClient implementations with realistic tagging
+  console.log("\n--- InferenceClient Implementation Tests ---\n");
+  await testInferenceClientImplementations();
+
+  console.log("\n================================");
+  console.log("\n📊 Summary:\n");
+
+  const passed = results.filter((r) => r.status === "pass").length;
+  const failed = results.filter((r) => r.status === "fail").length;
+  const skipped = results.filter((r) => r.status === "skip").length;
+
+  console.log(`  ✅ Passed: ${passed}`);
+  console.log(`  ❌ Failed: ${failed}`);
+  console.log(`  ⏭️  Skipped: ${skipped}`);
+
+  if (failed > 0) {
+    console.log("\n❌ Failed tests:");
+    results
+      .filter((r) => r.status === "fail")
+      .forEach((r) => {
+        console.log(`  - ${r.provider}/${r.test}: ${r.error}`);
+      });
+    process.exit(1);
+  }
+
+  console.log("\n✅ All tests passed!");
+}
+
+main().catch(console.error);
diff --git a/packages/shared/inference/ollama.test.ts b/packages/shared/inference/ollama.test.ts
new file mode 100644
index 000000000..75cf4c744
--- /dev/null
+++ b/packages/shared/inference/ollama.test.ts
@@ -0,0 +1,372 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { z } from "zod";
+
+import { OllamaEmbeddingClient, OllamaInferenceClient } from "./ollama";
+
+// Mock chat response generator for streaming
+async function* mockChatStream(
+  parts: {
+    message: { content: string };
+    eval_count?: number;
+    prompt_eval_count?: number;
+  }[],
+) {
+  for (const part of parts) {
+    yield part;
+  }
+}
+
+// Mock Ollama SDK
+const mockChat = vi.fn();
+const mockEmbed = vi.fn();
+const mockAbort = vi.fn();
+
+vi.mock("ollama", () => ({
+  Ollama: vi.fn().mockImplementation(() => ({
+    chat: mockChat,
+    embed: mockEmbed,
+    abort: mockAbort,
+  })),
+}));
+
+// Mock customFetch
+vi.mock("../customFetch", () => ({
+  customFetch: vi.fn(),
+}));
+
+// Mock logger
+vi.mock("../logger", () => ({
+  default: {
+    warn: vi.fn(),
+    info: vi.fn(),
+    error: vi.fn(),
+  },
+}));
+
+// Mock serverConfig
+vi.mock("../config", () => ({
+  default: {
+    inference: {
+      provider: "ollama",
+      ollamaBaseUrl: "http://localhost:11434",
+      ollamaKeepAlive: "5m",
+      textModel: "gemma3",
+      imageModel: "llava",
+      maxOutputTokens: 2048,
+      contextLength: 4096,
+      outputSchema: "structured",
+    },
+    embedding: {
+      provider: "ollama",
+      textModel: "nomic-embed-text",
+    },
+  },
+}));
+
+describe("OllamaInferenceClient", () => {
+  let client: OllamaInferenceClient;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    client = new OllamaInferenceClient();
+  });
+
+  describe("inferFromText", () => {
+    it("should call chat with correct parameters", async () => {
+      mockChat.mockReturnValueOnce(
+        mockChatStream([
+          {
+            message: { content: "Hello " },
+            eval_count: 5,
+            prompt_eval_count: 10,
+          },
+          {
+            message: { content: "world!" },
+            eval_count: 5,
+            prompt_eval_count: 0,
+          },
+        ]),
+      );
+
+      const result = await client.inferFromText("test prompt", {});
+
+      expect(mockChat).toHaveBeenCalledTimes(1);
+      expect(result.response).toBe("Hello world!");
+      expect(result.totalTokens).toBe(20);
+    });
+
+    it("should pass prompt as user message", async () => {
+      mockChat.mockReturnValueOnce(
+        mockChatStream([{ message: { content: "response" } }]),
+      );
+
+      await client.inferFromText("my test prompt", {});
+
+      expect(mockChat).toHaveBeenCalledWith(
+        expect.objectContaining({
+          messages: [
+            { role: "user", content: "my test prompt", images: undefined },
+          ],
+        }),
+      );
+    });
+
+    it("should use configured model", async () => {
+      mockChat.mockReturnValueOnce(
+        mockChatStream([{ message: { content: "response" } }]),
+      );
+
+      await client.inferFromText("prompt", {});
+
+      expect(mockChat).toHaveBeenCalledWith(
+        expect.objectContaining({
+          model: "gemma3",
+        }),
+      );
+    });
+
+    it("should set streaming to true", async () => {
+      mockChat.mockReturnValueOnce(
+        mockChatStream([{ message: { content: "response" } }]),
+      );
+
+      await client.inferFromText("prompt", {});
+
+      expect(mockChat).toHaveBeenCalledWith(
+        expect.objectContaining({
+          stream: true,
+        }),
+      );
+    });
+
+    it("should include context length and max tokens in options", async () => {
+      mockChat.mockReturnValueOnce(
+        mockChatStream([{ message: { content: "response" } }]),
+      );
+
+      await client.inferFromText("prompt", {});
+
+      expect(mockChat).toHaveBeenCalledWith(
+        expect.objectContaining({
+          options: {
+            num_ctx: 4096,
+            num_predict: 2048,
+          },
+        }),
+      );
+    });
+
+    it("should include JSON schema format when schema is provided", async () => {
+      mockChat.mockReturnValueOnce(
+        mockChatStream([{ message: { content: '{"name": "test"}' } }]),
+      );
+
+      const schema = z.object({ name: z.string() });
+      await client.inferFromText("prompt", { schema });
+
+      expect(mockChat).toHaveBeenCalledWith(
+        expect.objectContaining({
+          format: expect.objectContaining({
+            type: "object",
+            properties: expect.objectContaining({
+              name: expect.objectContaining({ type: "string" }),
+            }),
+          }),
+        }),
+      );
+    });
+
+    it("should include keep_alive setting", async () => {
+      mockChat.mockReturnValueOnce(
+        mockChatStream([{ message: { content: "response" } }]),
+      );
+
+      await client.inferFromText("prompt", {});
+
+      expect(mockChat).toHaveBeenCalledWith(
+        expect.objectContaining({
+          keep_alive: "5m",
+        }),
+      );
+    });
+
+    it("should accumulate tokens from stream", async () => {
+      mockChat.mockReturnValueOnce(
+        mockChatStream([
+          { message: { content: "a" }, eval_count: 10, prompt_eval_count: 50 },
+          { message: { content: "b" }, eval_count: 20, prompt_eval_count: 0 },
+          { message: { content: "c" }, eval_count: 5, prompt_eval_count: 0 },
+        ]),
+      );
+
+      const result = await client.inferFromText("prompt", {});
+
+      expect(result.response).toBe("abc");
+      expect(result.totalTokens).toBe(85);
+    });
+  });
+
+  describe("inferFromImage", () => {
+    it("should include image in message", async () => {
+      mockChat.mockReturnValueOnce(
+        mockChatStream([{ message: { content: "image description" } }]),
+      );
+
+      await client.inferFromImage(
+        "describe this image",
+        "image/png",
+        "base64encodedimage",
+        {},
+      );
+
+      expect(mockChat).toHaveBeenCalledWith(
+        expect.objectContaining({
+          messages: [
+            {
+              role: "user",
+              content: "describe this image",
+              images: ["base64encodedimage"],
+            },
+          ],
+        }),
+      );
+    });
+
+    it("should use image model", async () => {
+      mockChat.mockReturnValueOnce(
+        mockChatStream([{ message: { content: "response" } }]),
+      );
+
+      await client.inferFromImage("describe", "image/jpeg", "imagedata", {});
+
+      expect(mockChat).toHaveBeenCalledWith(
+        expect.objectContaining({
+          model: "llava",
+        }),
+      );
+    });
+  });
+});
+
+describe("OllamaInferenceClient with JSON output", () => {
+  let client: OllamaInferenceClient;
+
+  beforeEach(async () => {
+    vi.clearAllMocks();
+
+    // Set output schema to json (not structured)
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.outputSchema = "json";
+
+    client = new OllamaInferenceClient();
+  });
+
+  afterEach(async () => {
+    // Restore original config value to prevent test pollution
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.outputSchema = "structured";
+  });
+
+  it("should use 'json' format string when outputSchema is json", async () => {
+    mockChat.mockReturnValueOnce(
+      mockChatStream([{ message: { content: '{"result": true}' } }]),
+    );
+
+    await client.inferFromText("prompt", {});
+
+    expect(mockChat).toHaveBeenCalledWith(
+      expect.objectContaining({
+        format: "json",
+      }),
+    );
+  });
+});
+
+describe("OllamaInferenceClient with plain output", () => {
+  let client: OllamaInferenceClient;
+
+  beforeEach(async () => {
+    vi.clearAllMocks();
+
+    // Set output schema to plain
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.outputSchema = "plain";
+
+    client = new OllamaInferenceClient();
+  });
+
+  afterEach(async () => {
+    // Restore original config value to prevent test pollution
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.outputSchema = "structured";
+  });
+
+  it("should not set format when outputSchema is plain", async () => {
+    mockChat.mockReturnValueOnce(
+      mockChatStream([{ message: { content: "plain text response" } }]),
+    );
+
+    const schema = z.object({ name: z.string() });
+    await client.inferFromText("prompt", { schema });
+
+    expect(mockChat).toHaveBeenCalledWith(
+      expect.objectContaining({
+        format: undefined,
+      }),
+    );
+  });
+});
+
+describe("OllamaEmbeddingClient", () => {
+  let client: OllamaEmbeddingClient;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    client = new OllamaEmbeddingClient();
+  });
+
+  it("should generate embeddings for text inputs", async () => {
+    mockEmbed.mockResolvedValueOnce({
+      embeddings: [
+        [0.1, 0.2, 0.3],
+        [0.4, 0.5, 0.6],
+      ],
+    });
+
+    const result = await client.generateEmbeddingFromText(["hello", "world"]);
+
+    expect(result.embeddings).toEqual([
+      [0.1, 0.2, 0.3],
+      [0.4, 0.5, 0.6],
+    ]);
+  });
+
+  it("should use configured embedding model", async () => {
+    mockEmbed.mockResolvedValueOnce({
+      embeddings: [[0.1]],
+    });
+
+    await client.generateEmbeddingFromText(["test"]);
+
+    expect(mockEmbed).toHaveBeenCalledWith({
+      model: "nomic-embed-text",
+      input: ["test"],
+      truncate: true,
+    });
+  });
+
+  it("should pass all inputs in a single request", async () => {
+    mockEmbed.mockResolvedValueOnce({
+      embeddings: [[0.1], [0.2], [0.3]],
+    });
+
+    await client.generateEmbeddingFromText(["a", "b", "c"]);
+
+    expect(mockEmbed).toHaveBeenCalledTimes(1);
+    expect(mockEmbed).toHaveBeenCalledWith({
+      model: "nomic-embed-text",
+      input: ["a", "b", "c"],
+      truncate: true,
+    });
+  });
+});
diff --git a/packages/shared/inference/ollama.ts b/packages/shared/inference/ollama.ts
new file mode 100644
index 000000000..c46f18b2a
--- /dev/null
+++ b/packages/shared/inference/ollama.ts
@@ -0,0 +1,162 @@
+import { Ollama } from "ollama";
+import { zodToJsonSchema } from "zod-to-json-schema";
+
+import type {
+  EmbeddingClient,
+  EmbeddingResponse,
+  InferenceClient,
+  InferenceOptions,
+  InferenceResponse,
+} from "./types";
+import serverConfig from "../config";
+import { customFetch } from "../customFetch";
+import logger from "../logger";
+import { defaultInferenceOptions } from "./types";
+
+/**
+ * Ollama Inference Client
+ *
+ * Uses Ollama's local API for self-hosted LLM inference.
+ */
+export class OllamaInferenceClient implements InferenceClient {
+  ollama: Ollama;
+
+  constructor() {
+    this.ollama = new Ollama({
+      host: serverConfig.inference.ollamaBaseUrl,
+      fetch: customFetch,
+    });
+  }
+
+  async runModel(
+    model: string,
+    prompt: string,
+    opts: InferenceOptions,
+    image?: string,
+  ): Promise<InferenceResponse> {
+    // Set up abort handling with addEventListener for automatic cleanup
+    // Using { once: true } ensures the handler is removed after firing
+    if (opts.abortSignal) {
+      opts.abortSignal.addEventListener(
+        "abort",
+        () => {
+          this.ollama.abort();
+        },
+        { once: true },
+      );
+    }
+
+    const outputSchema = serverConfig.inference.outputSchema;
+    let format: "json" | object | undefined;
+
+    if (outputSchema === "structured" && opts.schema) {
+      format = zodToJsonSchema(opts.schema);
+    } else if (outputSchema === "json") {
+      format = "json";
+    }
+
+    const chatCompletion = await this.ollama.chat({
+      model,
+      format,
+      stream: true,
+      keep_alive: serverConfig.inference.ollamaKeepAlive,
+      options: {
+        num_ctx: serverConfig.inference.contextLength,
+        num_predict: serverConfig.inference.maxOutputTokens,
+      },
+      messages: [
+        { role: "user", content: prompt, images: image ? [image] : undefined },
+      ],
+    });
+
+    let totalTokens: number | undefined = 0;
+    let response = "";
+    try {
+      for await (const part of chatCompletion) {
+        response += part.message.content;
+        if (part.eval_count !== undefined) {
+          totalTokens += part.eval_count;
+        }
+        if (part.prompt_eval_count !== undefined) {
+          totalTokens += part.prompt_eval_count;
+        }
+      }
+    } catch (e) {
+      if (e instanceof Error && e.name === "AbortError") {
+        throw e;
+      }
+      // There seem to be some bug in ollama where you can get some successful response, but still throw an error.
+      // Using stream + accumulating the response so far is a workaround.
+      // https://github.com/ollama/ollama-js/issues/72
+      totalTokens = undefined;
+      logger.warn(
+        `Got an exception from ollama, will still attempt to deserialize the response we got so far: ${e}`,
+      );
+    }
+
+    return { response, totalTokens };
+  }
+
+  async inferFromText(
+    prompt: string,
+    _opts: Partial<InferenceOptions>,
+  ): Promise<InferenceResponse> {
+    const optsWithDefaults: InferenceOptions = {
+      ...defaultInferenceOptions,
+      ..._opts,
+    };
+    return await this.runModel(
+      serverConfig.inference.textModel,
+      prompt,
+      optsWithDefaults,
+      undefined,
+    );
+  }
+
+  async inferFromImage(
+    prompt: string,
+    _contentType: string,
+    image: string,
+    _opts: Partial<InferenceOptions>,
+  ): Promise<InferenceResponse> {
+    const optsWithDefaults: InferenceOptions = {
+      ...defaultInferenceOptions,
+      ..._opts,
+    };
+    return await this.runModel(
+      serverConfig.inference.imageModel,
+      prompt,
+      optsWithDefaults,
+      image,
+    );
+  }
+}
+
+/**
+ * Ollama Embedding Client
+ *
+ * Uses Ollama's embed API for local text embeddings.
+ */
+export class OllamaEmbeddingClient implements EmbeddingClient {
+  ollama: Ollama;
+
+  constructor() {
+    this.ollama = new Ollama({
+      host: serverConfig.inference.ollamaBaseUrl,
+      fetch: customFetch,
+    });
+  }
+
+  async generateEmbeddingFromText(
+    inputs: string[],
+  ): Promise<EmbeddingResponse> {
+    const embedding = await this.ollama.embed({
+      model: serverConfig.embedding.textModel,
+      input: inputs,
+      // Truncate the input to fit into the model's max token limit,
+      // in the future we want to add a way to split the input into multiple parts.
+      truncate: true,
+    });
+    return { embeddings: embedding.embeddings };
+  }
+}
diff --git a/packages/shared/inference/openai.test.ts b/packages/shared/inference/openai.test.ts
new file mode 100644
index 000000000..76cd08054
--- /dev/null
+++ b/packages/shared/inference/openai.test.ts
@@ -0,0 +1,398 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { z } from "zod";
+
+import { OpenAIEmbeddingClient, OpenAIInferenceClient } from "./openai";
+
+// Mock the OpenAI SDK
+const mockChatCompletionsCreate = vi.fn();
+const mockResponsesCreate = vi.fn();
+const mockEmbeddingsCreate = vi.fn();
+
+vi.mock("openai", () => ({
+  default: vi.fn().mockImplementation(() => ({
+    chat: {
+      completions: {
+        create: mockChatCompletionsCreate,
+      },
+    },
+    responses: {
+      create: mockResponsesCreate,
+    },
+    embeddings: {
+      create: mockEmbeddingsCreate,
+    },
+  })),
+}));
+
+// Mock serverConfig
+vi.mock("../config", () => ({
+  default: {
+    inference: {
+      provider: "openai",
+      openAIApiKey: "test-api-key",
+      openAIBaseUrl: undefined,
+      openAIProxyUrl: undefined,
+      openaiUseResponsesApi: false,
+      openaiReasoningEffort: "low",
+      textModel: "gpt-4o-mini",
+      imageModel: "gpt-4o-mini",
+      maxOutputTokens: 2048,
+      outputSchema: "structured",
+    },
+    embedding: {
+      provider: "openai",
+      textModel: "text-embedding-3-small",
+    },
+  },
+}));
+
+describe("OpenAIInferenceClient", () => {
+  let client: OpenAIInferenceClient;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    client = new OpenAIInferenceClient();
+  });
+
+  describe("inferFromText", () => {
+    it("should use Chat Completions API by default", async () => {
+      mockChatCompletionsCreate.mockResolvedValueOnce({
+        choices: [{ message: { content: "test response" } }],
+        usage: { total_tokens: 100 },
+      });
+
+      const result = await client.inferFromText("test prompt", {});
+
+      expect(mockChatCompletionsCreate).toHaveBeenCalledTimes(1);
+      expect(mockResponsesCreate).not.toHaveBeenCalled();
+      expect(result.response).toBe("test response");
+      expect(result.totalTokens).toBe(100);
+    });
+
+    it("should pass prompt as user message", async () => {
+      mockChatCompletionsCreate.mockResolvedValueOnce({
+        choices: [{ message: { content: "response" } }],
+        usage: { total_tokens: 50 },
+      });
+
+      await client.inferFromText("my prompt", {});
+
+      expect(mockChatCompletionsCreate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          messages: [{ role: "user", content: "my prompt" }],
+        }),
+        expect.any(Object),
+      );
+    });
+
+    it("should include model and max_tokens in request", async () => {
+      mockChatCompletionsCreate.mockResolvedValueOnce({
+        choices: [{ message: { content: "response" } }],
+        usage: { total_tokens: 50 },
+      });
+
+      await client.inferFromText("prompt", {});
+
+      expect(mockChatCompletionsCreate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          model: "gpt-4o-mini",
+          max_tokens: 2048,
+        }),
+        expect.any(Object),
+      );
+    });
+
+    it("should use structured output format when schema is provided", async () => {
+      mockChatCompletionsCreate.mockResolvedValueOnce({
+        choices: [{ message: { content: '{"name": "test"}' } }],
+        usage: { total_tokens: 50 },
+      });
+
+      const schema = z.object({ name: z.string() });
+      await client.inferFromText("prompt", { schema });
+
+      expect(mockChatCompletionsCreate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          response_format: expect.objectContaining({
+            type: "json_schema",
+          }),
+        }),
+        expect.any(Object),
+      );
+    });
+
+    it("should throw error when no message content returned", async () => {
+      mockChatCompletionsCreate.mockResolvedValueOnce({
+        choices: [{ message: { content: null } }],
+        usage: { total_tokens: 50 },
+      });
+
+      await expect(client.inferFromText("prompt", {})).rejects.toThrow(
+        "Got no message content from OpenAI Chat Completions",
+      );
+    });
+
+    it("should pass abort signal to API call", async () => {
+      mockChatCompletionsCreate.mockResolvedValueOnce({
+        choices: [{ message: { content: "response" } }],
+        usage: { total_tokens: 50 },
+      });
+
+      const controller = new AbortController();
+      await client.inferFromText("prompt", {
+        abortSignal: controller.signal,
+      });
+
+      expect(mockChatCompletionsCreate).toHaveBeenCalledWith(
+        expect.any(Object),
+        expect.objectContaining({
+          signal: controller.signal,
+        }),
+      );
+    });
+  });
+
+  describe("inferFromImage", () => {
+    it("should include image in message content", async () => {
+      mockChatCompletionsCreate.mockResolvedValueOnce({
+        choices: [{ message: { content: "image description" } }],
+        usage: { total_tokens: 150 },
+      });
+
+      await client.inferFromImage(
+        "describe this image",
+        "image/png",
+        "base64encodedimage",
+        {},
+      );
+
+      expect(mockChatCompletionsCreate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          messages: [
+            {
+              role: "user",
+              content: [
+                { type: "text", text: "describe this image" },
+                {
+                  type: "image_url",
+                  image_url: {
+                    url: "data:image/png;base64,base64encodedimage",
+                    detail: "low",
+                  },
+                },
+              ],
+            },
+          ],
+        }),
+        expect.any(Object),
+      );
+    });
+
+    it("should return response and token count", async () => {
+      mockChatCompletionsCreate.mockResolvedValueOnce({
+        choices: [{ message: { content: "A cat sitting on a table" } }],
+        usage: { total_tokens: 200 },
+      });
+
+      const result = await client.inferFromImage(
+        "describe",
+        "image/jpeg",
+        "imagedata",
+        {},
+      );
+
+      expect(result.response).toBe("A cat sitting on a table");
+      expect(result.totalTokens).toBe(200);
+    });
+  });
+});
+
+describe("OpenAIInferenceClient with Responses API", () => {
+  let client: OpenAIInferenceClient;
+
+  beforeEach(async () => {
+    vi.clearAllMocks();
+
+    // Enable Responses API for GPT-5 models
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.openaiUseResponsesApi = true;
+    serverConfig.inference.textModel = "gpt-5-mini";
+    serverConfig.inference.imageModel = "gpt-5-mini";
+
+    client = new OpenAIInferenceClient();
+  });
+
+  afterEach(async () => {
+    // Restore original config values to prevent test pollution
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.openaiUseResponsesApi = false;
+    serverConfig.inference.textModel = "gpt-4o-mini";
+    serverConfig.inference.imageModel = "gpt-4o-mini";
+  });
+
+  it("should use Responses API for GPT-5 models when enabled", async () => {
+    mockResponsesCreate.mockResolvedValueOnce({
+      output_text: "response from responses api",
+      usage: { total_tokens: 100 },
+    });
+
+    const result = await client.inferFromText("test prompt", {});
+
+    expect(mockResponsesCreate).toHaveBeenCalledTimes(1);
+    expect(mockChatCompletionsCreate).not.toHaveBeenCalled();
+    expect(result.response).toBe("response from responses api");
+  });
+
+  it("should include reasoning effort for GPT-5 models", async () => {
+    mockResponsesCreate.mockResolvedValueOnce({
+      output_text: "response",
+      usage: { total_tokens: 100 },
+    });
+
+    await client.inferFromText("prompt", { reasoningEffort: "high" });
+
+    expect(mockResponsesCreate).toHaveBeenCalledWith(
+      expect.objectContaining({
+        reasoning: { effort: "high" },
+      }),
+      expect.any(Object),
+    );
+  });
+});
+
+describe("OpenAIInferenceClient with json output", () => {
+  let client: OpenAIInferenceClient;
+
+  beforeEach(async () => {
+    vi.clearAllMocks();
+
+    // Set output schema to json
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.outputSchema = "json";
+    serverConfig.inference.openaiUseResponsesApi = false;
+    serverConfig.inference.textModel = "gpt-4o-mini";
+
+    client = new OpenAIInferenceClient();
+  });
+
+  afterEach(async () => {
+    // Restore original config values to prevent test pollution
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.outputSchema = "structured";
+  });
+
+  it("should use json_object format when outputSchema is json", async () => {
+    mockChatCompletionsCreate.mockResolvedValueOnce({
+      choices: [{ message: { content: '{"tags": ["test"]}' } }],
+      usage: { total_tokens: 50 },
+    });
+
+    const schema = z.object({ tags: z.array(z.string()) });
+    await client.inferFromText("prompt", { schema });
+
+    expect(mockChatCompletionsCreate).toHaveBeenCalledWith(
+      expect.objectContaining({
+        response_format: { type: "json_object" },
+      }),
+      expect.any(Object),
+    );
+  });
+
+  it("should return JSON response as-is in json mode", async () => {
+    mockChatCompletionsCreate.mockResolvedValueOnce({
+      choices: [{ message: { content: '{"tags": ["ai", "ml"]}' } }],
+      usage: { total_tokens: 50 },
+    });
+
+    const schema = z.object({ tags: z.array(z.string()) });
+    const result = await client.inferFromText("prompt", { schema });
+
+    // Chat Completions returns response as-is (no normalization)
+    expect(result.response).toBe('{"tags": ["ai", "ml"]}');
+  });
+});
+
+describe("OpenAIInferenceClient with plain output", () => {
+  let client: OpenAIInferenceClient;
+
+  beforeEach(async () => {
+    vi.clearAllMocks();
+
+    // Set output schema to plain
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.outputSchema = "plain";
+    serverConfig.inference.openaiUseResponsesApi = false;
+    serverConfig.inference.textModel = "gpt-4o-mini";
+
+    client = new OpenAIInferenceClient();
+  });
+
+  afterEach(async () => {
+    // Restore original config values to prevent test pollution
+    const { default: serverConfig } = await import("../config");
+    serverConfig.inference.outputSchema = "structured";
+  });
+
+  it("should not set response_format when outputSchema is plain", async () => {
+    mockChatCompletionsCreate.mockResolvedValueOnce({
+      choices: [{ message: { content: "plain text response" } }],
+      usage: { total_tokens: 50 },
+    });
+
+    const schema = z.object({ tags: z.array(z.string()) });
+    await client.inferFromText("prompt", { schema });
+
+    expect(mockChatCompletionsCreate).toHaveBeenCalledWith(
+      expect.objectContaining({
+        response_format: undefined,
+      }),
+      expect.any(Object),
+    );
+  });
+
+  it("should return raw text without JSON parsing in plain mode", async () => {
+    mockChatCompletionsCreate.mockResolvedValueOnce({
+      choices: [{ message: { content: "This is plain text, not JSON" } }],
+      usage: { total_tokens: 50 },
+    });
+
+    const result = await client.inferFromText("prompt", {});
+
+    expect(result.response).toBe("This is plain text, not JSON");
+  });
+});
+
+describe("OpenAIEmbeddingClient", () => {
+  let client: OpenAIEmbeddingClient;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    client = new OpenAIEmbeddingClient();
+  });
+
+  it("should generate embeddings for text inputs", async () => {
+    mockEmbeddingsCreate.mockResolvedValueOnce({
+      data: [{ embedding: [0.1, 0.2, 0.3] }, { embedding: [0.4, 0.5, 0.6] }],
+    });
+
+    const result = await client.generateEmbeddingFromText(["hello", "world"]);
+
+    expect(result.embeddings).toEqual([
+      [0.1, 0.2, 0.3],
+      [0.4, 0.5, 0.6],
+    ]);
+  });
+
+  it("should use configured embedding model", async () => {
+    mockEmbeddingsCreate.mockResolvedValueOnce({
+      data: [{ embedding: [0.1] }],
+    });
+
+    await client.generateEmbeddingFromText(["test"]);
+
+    expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
+      model: "text-embedding-3-small",
+      input: ["test"],
+    });
+  });
+});
diff --git a/packages/shared/inference/openai.ts b/packages/shared/inference/openai.ts
new file mode 100644
index 000000000..6c0f4edc4
--- /dev/null
+++ b/packages/shared/inference/openai.ts
@@ -0,0 +1,432 @@
+import type { Response as OpenAIResponse } from "openai/resources/responses/responses";
+import OpenAI from "openai";
+import { zodResponseFormat } from "openai/helpers/zod";
+import * as undici from "undici";
+import { zodToJsonSchema } from "zod-to-json-schema";
+
+import type {
+  EmbeddingClient,
+  EmbeddingResponse,
+  InferenceClient,
+  InferenceOptions,
+  InferenceResponse,
+} from "./types";
+import serverConfig from "../config";
+import { defaultInferenceOptions } from "./types";
+
+/**
+ * Supported image media types for OpenAI's vision API.
+ */
+const SUPPORTED_IMAGE_TYPES = [
+  "image/jpeg",
+  "image/png",
+  "image/gif",
+  "image/webp",
+] as const;
+type OpenAIImageType = (typeof SUPPORTED_IMAGE_TYPES)[number];
+
+/**
+ * Validate and convert a content type to OpenAI's expected image type.
+ * Throws if the content type is not supported.
+ */
+function validateImageType(contentType: string): OpenAIImageType {
+  if (!SUPPORTED_IMAGE_TYPES.includes(contentType as OpenAIImageType)) {
+    throw new Error(
+      `Unsupported image type: "${contentType}". OpenAI supports: ${SUPPORTED_IMAGE_TYPES.join(", ")}`,
+    );
+  }
+  return contentType as OpenAIImageType;
+}
+
+/**
+ * Create an OpenAI client instance with shared configuration.
+ * Used by both inference and embedding clients.
+ */
+function createOpenAIClient(): OpenAI {
+  const fetchOptions = serverConfig.inference.openAIProxyUrl
+    ? {
+        dispatcher: new undici.ProxyAgent(
+          serverConfig.inference.openAIProxyUrl,
+        ),
+      }
+    : undefined;
+
+  return new OpenAI({
+    apiKey: serverConfig.inference.openAIApiKey,
+    baseURL: serverConfig.inference.openAIBaseUrl,
+    ...(fetchOptions ? { fetchOptions } : {}),
+    defaultHeaders: {
+      "X-Title": "Karakeep",
+      "HTTP-Referer": "https://karakeep.app",
+    },
+  });
+}
+
+/**
+ * Check if model requires max_completion_tokens instead of max_tokens.
+ * GPT-5 and o-series models require this parameter.
+ */
+function requiresMaxCompletionTokens(model: string): boolean {
+  return (
+    model.startsWith("gpt-5") ||
+    model.startsWith("o1") ||
+    model.startsWith("o3") ||
+    model.startsWith("o4")
+  );
+}
+
+/**
+ * Check if model is an o-series reasoning model.
+ */
+function isOSeriesModel(model: string): boolean {
+  return (
+    model.startsWith("o1") || model.startsWith("o3") || model.startsWith("o4")
+  );
+}
+
+/**
+ * Get the appropriate token limit parameter for the model.
+ * GPT-5 and o-series require max_completion_tokens; others use max_tokens.
+ * Note: max_tokens is deprecated in OpenAI API, but kept for older models.
+ */
+function getTokenLimitParam(model: string): Record<string, number> {
+  const tokens = serverConfig.inference.maxOutputTokens;
+  if (requiresMaxCompletionTokens(model)) {
+    return { max_completion_tokens: tokens };
+  }
+  return { max_tokens: tokens };
+}
+
+/**
+ * Determines which OpenAI API to use based on model name.
+ * GPT-5+ models can use the Responses API for advanced features.
+ * GPT-4 and earlier use Chat Completions API.
+ * Exported for testing.
+ */
+export function shouldUseResponsesApi(model: string): boolean {
+  // Use Responses API for GPT-5+ models when explicitly enabled
+  if (!serverConfig.inference.openaiUseResponsesApi) {
+    return false;
+  }
+  return requiresMaxCompletionTokens(model);
+}
+
+/**
+ * Extract the response content from a chat completion, with defensive checks.
+ * Throws if no content is available.
+ */
+function extractChatCompletionContent(
+  chatCompletion: OpenAI.Chat.Completions.ChatCompletion,
+): string {
+  const firstChoice = chatCompletion.choices[0];
+  if (!firstChoice?.message?.content) {
+    throw new Error(
+      "Got no message content from OpenAI Chat Completions (empty choices or null content)",
+    );
+  }
+  return firstChoice.message.content;
+}
+
+/**
+ * OpenAI Inference Client
+ *
+ * Supports both Chat Completions API (legacy, broad compatibility) and
+ * Responses API (newer, GPT-5+ features like reasoning effort).
+ */
+export class OpenAIInferenceClient implements InferenceClient {
+  openAI: OpenAI;
+
+  constructor() {
+    this.openAI = createOpenAIClient();
+  }
+
+  async inferFromText(
+    prompt: string,
+    _opts: Partial<InferenceOptions>,
+  ): Promise<InferenceResponse> {
+    const optsWithDefaults: InferenceOptions = {
+      ...defaultInferenceOptions,
+      ..._opts,
+    };
+
+    const model = serverConfig.inference.textModel;
+
+    if (shouldUseResponsesApi(model)) {
+      return this.inferFromTextResponses(prompt, model, optsWithDefaults);
+    }
+    return this.inferFromTextChatCompletions(prompt, model, optsWithDefaults);
+  }
+
+  async inferFromImage(
+    prompt: string,
+    contentType: string,
+    image: string,
+    _opts: Partial<InferenceOptions>,
+  ): Promise<InferenceResponse> {
+    const optsWithDefaults: InferenceOptions = {
+      ...defaultInferenceOptions,
+      ..._opts,
+    };
+
+    // Validate image content type
+    const validatedType = validateImageType(contentType);
+
+    const model = serverConfig.inference.imageModel;
+
+    if (shouldUseResponsesApi(model)) {
+      return this.inferFromImageResponses(
+        prompt,
+        validatedType,
+        image,
+        model,
+        optsWithDefaults,
+      );
+    }
+    return this.inferFromImageChatCompletions(
+      prompt,
+      validatedType,
+      image,
+      model,
+      optsWithDefaults,
+    );
+  }
+
+  // ===========================================================================
+  // Chat Completions API (Legacy - works with all models)
+  // ===========================================================================
+
+  private async inferFromTextChatCompletions(
+    prompt: string,
+    model: string,
+    opts: InferenceOptions,
+  ): Promise<InferenceResponse> {
+    const chatCompletion = await this.openAI.chat.completions.create(
+      {
+        messages: [{ role: "user", content: prompt }],
+        model,
+        ...getTokenLimitParam(model),
+        response_format: this.getChatCompletionsResponseFormat(opts),
+      },
+      {
+        signal: opts.abortSignal,
+      },
+    );
+
+    const response = extractChatCompletionContent(chatCompletion);
+    return { response, totalTokens: chatCompletion.usage?.total_tokens };
+  }
+
+  private async inferFromImageChatCompletions(
+    prompt: string,
+    contentType: string,
+    image: string,
+    model: string,
+    opts: InferenceOptions,
+  ): Promise<InferenceResponse> {
+    const chatCompletion = await this.openAI.chat.completions.create(
+      {
+        model,
+        ...getTokenLimitParam(model),
+        response_format: this.getChatCompletionsResponseFormat(opts),
+        messages: [
+          {
+            role: "user",
+            content: [
+              { type: "text", text: prompt },
+              {
+                type: "image_url",
+                image_url: {
+                  url: `data:${contentType};base64,${image}`,
+                  detail: "low",
+                },
+              },
+            ],
+          },
+        ],
+      },
+      {
+        signal: opts.abortSignal,
+      },
+    );
+
+    const response = extractChatCompletionContent(chatCompletion);
+    return { response, totalTokens: chatCompletion.usage?.total_tokens };
+  }
+
+  private getChatCompletionsResponseFormat(opts: InferenceOptions) {
+    const outputSchema = serverConfig.inference.outputSchema;
+
+    if (outputSchema === "structured" && opts.schema) {
+      return zodResponseFormat(opts.schema, "schema");
+    } else if (outputSchema === "json") {
+      return { type: "json_object" as const };
+    }
+    return undefined;
+  }
+
+  // ===========================================================================
+  // Responses API (GPT-5+ features: reasoning, verbosity, conversation)
+  // ===========================================================================
+
+  private async inferFromTextResponses(
+    prompt: string,
+    model: string,
+    opts: InferenceOptions,
+  ): Promise<InferenceResponse> {
+    const requestObj = this.buildResponsesRequest(model, prompt, opts);
+
+    const response = await this.openAI.responses.create(requestObj, {
+      signal: opts.abortSignal,
+    });
+
+    return this.extractResponsesApiResult(response, opts);
+  }
+
+  private async inferFromImageResponses(
+    prompt: string,
+    contentType: string,
+    image: string,
+    model: string,
+    opts: InferenceOptions,
+  ): Promise<InferenceResponse> {
+    // Responses API handles images as structured input
+    const input = [
+      {
+        type: "message",
+        role: "user",
+        content: [
+          { type: "input_text", text: prompt },
+          {
+            type: "input_image",
+            image_url: `data:${contentType};base64,${image}`,
+          },
+        ],
+      },
+    ];
+
+    const requestObj = this.buildResponsesRequest(model, input, opts);
+
+    const response = await this.openAI.responses.create(requestObj, {
+      signal: opts.abortSignal,
+    });
+
+    return this.extractResponsesApiResult(response, opts);
+  }
+
+  private buildResponsesRequest(
+    model: string,
+    input: string | unknown[],
+    opts: InferenceOptions,
+  ) {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const requestObj: any = {
+      model,
+      input,
+      text: this.getResponsesTextFormat(opts),
+      store: opts.store ?? false,
+      temperature: 1,
+      top_p: 1,
+    };
+
+    // GPT-5 and o-series models support reasoning effort
+    // Note: o-series models default to "medium" and don't support "none"
+    if (requiresMaxCompletionTokens(model)) {
+      const effort =
+        opts.reasoningEffort ||
+        serverConfig.inference.openaiReasoningEffort ||
+        "low";
+
+      // Validate: o-series models don't support "none" reasoning effort
+      if (isOSeriesModel(model) && effort === "none") {
+        throw new Error(
+          `Model "${model}" does not support reasoning effort "none". ` +
+            `O-series models require at least "low" reasoning effort.`,
+        );
+      }
+
+      requestObj.reasoning = { effort };
+    }
+
+    // o-series models (o1, o3, o4) need max_output_tokens to control output length
+    if (isOSeriesModel(model)) {
+      requestObj.max_output_tokens = serverConfig.inference.maxOutputTokens;
+    }
+
+    if (opts.previousResponseId) {
+      requestObj.previous_response_id = opts.previousResponseId;
+    }
+
+    return requestObj;
+  }
+
+  private getResponsesTextFormat(opts: InferenceOptions) {
+    if (opts.schema) {
+      return {
+        format: {
+          type: "json_schema",
+          name: "response",
+          strict: true,
+          schema: zodToJsonSchema(opts.schema),
+        },
+      };
+    } else if (serverConfig.inference.outputSchema === "json") {
+      return { format: { type: "json" } };
+    }
+    return { format: { type: "text" } };
+  }
+
+  private extractResponsesApiResult(
+    response: OpenAIResponse,
+    opts: InferenceOptions,
+  ): InferenceResponse {
+    // Use SDK's output_text convenience property (aggregates all text output)
+    const outputText = response.output_text;
+
+    if (!outputText) {
+      throw new Error("Got no output text from OpenAI Responses API");
+    }
+
+    // Parse JSON if expecting structured output
+    let finalResponse = outputText;
+    if (opts.schema || serverConfig.inference.outputSchema === "json") {
+      try {
+        finalResponse = JSON.stringify(JSON.parse(outputText));
+      } catch {
+        // If parsing fails, return as-is
+        finalResponse = outputText;
+      }
+    }
+
+    return {
+      response: finalResponse,
+      totalTokens: response.usage?.total_tokens,
+    };
+  }
+}
+
+/**
+ * OpenAI Embedding Client
+ *
+ * Uses the /v1/embeddings endpoint for text embeddings.
+ */
+export class OpenAIEmbeddingClient implements EmbeddingClient {
+  openAI: OpenAI;
+
+  constructor() {
+    this.openAI = createOpenAIClient();
+  }
+
+  async generateEmbeddingFromText(
+    inputs: string[],
+  ): Promise<EmbeddingResponse> {
+    const model = serverConfig.embedding.textModel;
+    const embedResponse = await this.openAI.embeddings.create({
+      model,
+      input: inputs,
+    });
+    const embedding2D: number[][] = embedResponse.data.map(
+      (embedding: OpenAI.Embedding) => embedding.embedding,
+    );
+    return { embeddings: embedding2D };
+  }
+}
diff --git a/packages/shared/inference/types.ts b/packages/shared/inference/types.ts
new file mode 100644
index 000000000..98746e567
--- /dev/null
+++ b/packages/shared/inference/types.ts
@@ -0,0 +1,48 @@
+import type { z } from "zod";
+
+export interface InferenceResponse {
+  response: string;
+  totalTokens: number | undefined;
+}
+
+export interface EmbeddingResponse {
+  embeddings: number[][];
+}
+
+export interface InferenceOptions {
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  schema: z.ZodSchema<any> | null;
+  abortSignal?: AbortSignal;
+  /**
+   * Reasoning effort for OpenAI Responses API.
+   * Values: "none" | "minimal" | "low" | "medium" | "high" | "xhigh"
+   * - Models before gpt-5.1 (o-series): default "medium", don't support "none"
+   * - gpt-5.1: default "none", supports "none" | "low" | "medium" | "high"
+   * - gpt-5-pro: only supports "high"
+   * - Models after gpt-5.1-codex-max: additionally support "xhigh"
+   */
+  reasoningEffort?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh";
+  previousResponseId?: string;
+  store?: boolean;
+}
+
+export const defaultInferenceOptions: InferenceOptions = {
+  schema: null,
+};
+
+export interface InferenceClient {
+  inferFromText(
+    prompt: string,
+    opts: Partial<InferenceOptions>,
+  ): Promise<InferenceResponse>;
+  inferFromImage(
+    prompt: string,
+    contentType: string,
+    image: string,
+    opts: Partial<InferenceOptions>,
+  ): Promise<InferenceResponse>;
+}
+
+export interface EmbeddingClient {
+  generateEmbeddingFromText(inputs: string[]): Promise<EmbeddingResponse>;
+}
diff --git a/packages/shared/package.json b/packages/shared/package.json
index 937393547..43b7357db 100644
--- a/packages/shared/package.json
+++ b/packages/shared/package.json
@@ -5,7 +5,9 @@
   "private": true,
   "type": "module",
   "dependencies": {
+    "@anthropic-ai/sdk": "^0.71.2",
     "@aws-sdk/client-s3": "^3.842.0",
+    "@google/genai": "^1.34.0",
     "glob": "^11.0.0",
     "html-to-text": "^9.0.5",
     "js-tiktoken": "^1.0.20",
@@ -14,7 +16,7 @@
     "openai": "^4.86.1",
     "typescript-parsec": "^0.3.4",
     "winston": "^3.11.0",
-    "zod": "^3.24.2",
+    "zod": "^3.25.0",
     "zod-to-json-schema": "^3.24.3"
   },
   "devDependencies": {
diff --git a/packages/trpc/package.json b/packages/trpc/package.json
index d9fa12c07..7783b17c1 100644
--- a/packages/trpc/package.json
+++ b/packages/trpc/package.json
@@ -26,7 +26,7 @@
     "stripe": "^18.3.0",
     "superjson": "^2.2.1",
     "tiny-invariant": "^1.3.3",
-    "zod": "^3.24.2"
+    "zod": "^3.25.0"
   },
   "devDependencies": {
     "@karakeep/prettier-config": "workspace:^0.1.0",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 7b4be6d2b..badf31f49 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -111,8 +111,8 @@ importers:
         specifier: ^1.0.7
         version: 1.0.7(tailwindcss@3.4.1)
       zod:
-        specifier: ^3.24.2
-        version: 3.24.2
+        specifier: ^3.25.0
+        version: 3.25.76
     devDependencies:
       '@crxjs/vite-plugin':
         specifier: 2.2.0
@@ -287,8 +287,8 @@ importers:
         specifier: ^7.2.0
         version: 7.2.0
       zod:
-        specifier: ^3.24.2
-        version: 3.24.2
+        specifier: ^3.25.0
+        version: 3.25.76
     devDependencies:
       '@karakeep/prettier-config':
         specifier: workspace:^0.1.0
@@ -459,8 +459,8 @@ importers:
         specifier: ^2.2.1
         version: 2.2.1
       zod:
-        specifier: ^3.24.2
-        version: 3.24.2
+        specifier: ^3.25.0
+        version: 3.25.76
       zustand:
         specifier: ^5.0.5
         version: 5.0.8(@types/react@19.1.8)(immer@9.0.21)(react@19.1.0)(use-sync-external-store@1.5.0(react@19.1.0))
@@ -748,8 +748,8 @@ importers:
         specifier: ^2.2.1
         version: 2.2.1
       zod:
-        specifier: ^3.24.2
-        version: 3.24.2
+        specifier: ^3.25.0
+        version: 3.25.76
       zustand:
         specifier: ^5.0.5
         version: 5.0.8(@types/react@19.1.8)(immer@9.0.21)(react@19.1.0)(use-sync-external-store@1.5.0(react@19.1.0))
@@ -953,8 +953,8 @@ importers:
         specifier: ^5.9
         version: 5.9.3
       zod:
-        specifier: ^3.24.2
-        version: 3.24.2
+        specifier: ^3.25.0
+        version: 3.25.76
     devDependencies:
       '@karakeep/prettier-config':
         specifier: workspace:^0.1.0
@@ -1034,7 +1034,7 @@ importers:
         version: 0.4.0(@trpc/server@11.4.3(typescript@5.9.3))(hono@4.10.6)
       '@hono/zod-validator':
         specifier: ^0.5.0
-        version: 0.5.0(hono@4.10.6)(zod@3.24.2)
+        version: 0.5.0(hono@4.10.6)(zod@3.25.76)
       '@karakeep/db':
         specifier: workspace:*
         version: link:../db
@@ -1066,8 +1066,8 @@ importers:
         specifier: ^1.2.2
         version: 1.2.2
       zod:
-        specifier: ^3.24.2
-        version: 3.24.2
+        specifier: ^3.25.0
+        version: 3.25.76
     devDependencies:
       '@karakeep/prettier-config':
         specifier: workspace:^0.1.0
@@ -1112,8 +1112,8 @@ importers:
         specifier: ^6.0.0
         version: 6.0.0
       zod:
-        specifier: ^3.24.2
-        version: 3.24.2
+        specifier: ^3.25.0
+        version: 3.25.76
     devDependencies:
       '@karakeep/prettier-config':
         specifier: workspace:^0.1.0
@@ -1195,8 +1195,8 @@ importers:
         specifier: ^2.2.1
         version: 2.2.1
       zod:
-        specifier: ^3.24.2
-        version: 3.24.2
+        specifier: ^3.25.0
+        version: 3.25.76
     devDependencies:
       '@karakeep/prettier-config':
         specifier: workspace:^0.1.0
@@ -1221,13 +1221,13 @@ importers:
     dependencies:
       '@asteasolutions/zod-to-openapi':
         specifier: ^7.2.0
-        version: 7.3.3(zod@3.24.2)
+        version: 7.3.3(zod@3.25.76)
       '@karakeep/shared':
         specifier: workspace:^0.1.0
         version: link:../shared
       zod:
-        specifier: ^3.24.2
-        version: 3.24.2
+        specifier: ^3.25.0
+        version: 3.25.76
     devDependencies:
       '@karakeep/prettier-config':
         specifier: workspace:^0.1.0
@@ -1300,9 +1300,15 @@ importers:
 
   packages/shared:
     dependencies:
+      '@anthropic-ai/sdk':
+        specifier: ^0.71.2
+        version: 0.71.2(zod@3.25.76)
       '@aws-sdk/client-s3':
         specifier: ^3.842.0
         version: 3.842.0
+      '@google/genai':
+        specifier: ^1.34.0
+        version: 1.34.0
       glob:
         specifier: ^11.0.0
         version: 11.0.2
@@ -1320,7 +1326,7 @@ importers:
         version: 0.5.17
       openai:
         specifier: ^4.86.1
-        version: 4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.24.2)
+        version: 4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)
       typescript-parsec:
         specifier: ^0.3.4
         version: 0.3.4
@@ -1328,11 +1334,11 @@ importers:
         specifier: ^3.11.0
         version: 3.17.0
       zod:
-        specifier: ^3.24.2
-        version: 3.24.2
+        specifier: ^3.25.0
+        version: 3.25.76
       zod-to-json-schema:
         specifier: ^3.24.3
-        version: 3.24.5(zod@3.24.2)
+        version: 3.24.5(zod@3.25.76)
     devDependencies:
       '@karakeep/prettier-config':
         specifier: workspace:^0.1.0
@@ -1372,7 +1378,7 @@ importers:
         version: 19.1.0
       react-native:
         specifier: 0.79.5
-        version: 0.79.5(@babel/core@7.28.0)(@types/react@19.2.5)(react@19.1.0)
+        version: 0.79.5(@babel/core@7.26.0)(@types/react@19.2.5)(react@19.1.0)
       superjson:
         specifier: ^2.2.1
         version: 2.2.1
@@ -1463,8 +1469,8 @@ importers:
         specifier: ^1.3.3
         version: 1.3.3
       zod:
-        specifier: ^3.24.2
-        version: 3.24.2
+        specifier: ^3.25.0
+        version: 3.25.76
     devDependencies:
       '@karakeep/prettier-config':
         specifier: workspace:^0.1.0
@@ -1560,8 +1566,8 @@ importers:
         specifier: ^5.3.0
         version: 5.4.1
       zod:
-        specifier: ^3.24.2
-        version: 3.24.2
+        specifier: ^3.25.0
+        version: 3.25.76
     devDependencies:
       '@types/node':
         specifier: ^24
@@ -1669,6 +1675,15 @@ packages:
     resolution: {integrity: sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==}
     engines: {node: '>=6.0.0'}
 
+  '@anthropic-ai/sdk@0.71.2':
+    resolution: {integrity: sha512-TGNDEUuEstk/DKu0/TflXAEt+p+p/WhTlFzEnoosvbaDU2LTjm42igSdlL0VijrKpWejtOKxX0b8A7uc+XiSAQ==}
+    hasBin: true
+    peerDependencies:
+      zod: ^3.25.0 || ^4.0.0
+    peerDependenciesMeta:
+      zod:
+        optional: true
+
   '@apideck/better-ajv-errors@0.3.6':
     resolution: {integrity: sha512-P+ZygBLZtkp0qqOAJJVX4oX/sFo5JR3eBWwwuqHHhK0GIgQOKWrAfiAaWX0aArHkRWHMuggFEgAZNxVPwPZYaA==}
     engines: {node: '>=10'}
@@ -3626,6 +3641,15 @@ packages:
   '@ghostery/url-parser@1.3.0':
     resolution: {integrity: sha512-FEzdSeiva0Mt3bR4xePFzthhjT4IzvA5QTvS1xXkNyLpMGeq40mb3V2fSs0ZItRaP9IybZthDfHUSbQ1HLdx4Q==}
 
+  '@google/genai@1.34.0':
+    resolution: {integrity: sha512-vu53UMPvjmb7PGzlYu6Tzxso8Dfhn+a7eQFaS2uNemVtDZKwzSpJ5+ikqBbXplF7RGB1STcVDqCkPvquiwb2sw==}
+    engines: {node: '>=20.0.0'}
+    peerDependencies:
+      '@modelcontextprotocol/sdk': ^1.24.0
+    peerDependenciesMeta:
+      '@modelcontextprotocol/sdk':
+        optional: true
+
   '@hapi/hoek@9.3.0':
     resolution: {integrity: sha512-/c6rf4UJlmHlC9b5BaNvzAcFv7HZ2QHaV0D4/HNlBdvFnvQq8RI4kYdhyPCl7Xj+oWvTWQ8ujhqS53LIgAe6KQ==}
 
@@ -6971,6 +6995,9 @@ packages:
   big.js@5.2.2:
     resolution: {integrity: sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==}
 
+  bignumber.js@9.3.1:
+    resolution: {integrity: sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==}
+
   binary-extensions@2.3.0:
     resolution: {integrity: sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==}
     engines: {node: '>=8'}
@@ -7048,6 +7075,9 @@ packages:
     resolution: {integrity: sha512-Db1SbgBS/fg/392AblrMJk97KggmvYhr4pB5ZIMTWtaivCPMWLkmb7m21cJvpvgK+J3nsU2CmmixNBZx4vFj/w==}
     engines: {node: '>=8.0.0'}
 
+  buffer-equal-constant-time@1.0.1:
+    resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==}
+
   buffer-from@1.1.2:
     resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==}
 
@@ -8258,6 +8288,9 @@ packages:
   eastasianwidth@0.2.0:
     resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==}
 
+  ecdsa-sig-formatter@1.0.11:
+    resolution: {integrity: sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==}
+
   ee-first@1.1.1:
     resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==}
 
@@ -9066,6 +9099,14 @@ packages:
   functions-have-names@1.2.3:
     resolution: {integrity: sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ==}
 
+  gaxios@7.1.3:
+    resolution: {integrity: sha512-YGGyuEdVIjqxkxVH1pUTMY/XtmmsApXrCVv5EU25iX6inEPbV+VakJfLealkBtJN69AQmh1eGOdCl9Sm1UP6XQ==}
+    engines: {node: '>=18'}
+
+  gcp-metadata@8.1.2:
+    resolution: {integrity: sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==}
+    engines: {node: '>=18'}
+
   gel@2.1.0:
     resolution: {integrity: sha512-HCeRqInCt6BjbMmeghJ6BKeYwOj7WJT5Db6IWWAA3IMUUa7or7zJfTUEkUWCxiOtoXnwnm96sFK9Fr47Yh2hOA==}
     engines: {node: '>= 18.0.0'}
@@ -9191,6 +9232,14 @@ packages:
     engines: {node: '>=14'}
     deprecated: The gm module has been sunset. Please migrate to an alternative. https://github.com/aheckmann/gm?tab=readme-ov-file#2025-02-24-this-project-is-not-maintained
 
+  google-auth-library@10.5.0:
+    resolution: {integrity: sha512-7ABviyMOlX5hIVD60YOfHw4/CxOfBhyduaYB+wbFWCWoni4N7SLcV46hrVRktuBbZjFC9ONyqamZITN7q3n32w==}
+    engines: {node: '>=18'}
+
+  google-logging-utils@1.1.3:
+    resolution: {integrity: sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==}
+    engines: {node: '>=14'}
+
   gopd@1.2.0:
     resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==}
     engines: {node: '>= 0.4'}
@@ -9216,6 +9265,10 @@ packages:
     resolution: {integrity: sha512-5v6yZd4JK3eMI3FqqCouswVqwugaA9r4dNZB1wwcmrD02QkV5H0y7XBQW8QwQqEaZY1pM9aqORSORhJRdNK44Q==}
     engines: {node: '>=6.0'}
 
+  gtoken@8.0.0:
+    resolution: {integrity: sha512-+CqsMbHPiSTdtSO14O51eMNlrp9N79gmeqmXeouJOhfucAedHw9noVe/n5uJk3tbKE6a+6ZCQg3RPhVhHByAIw==}
+    engines: {node: '>=18'}
+
   gzip-size@6.0.0:
     resolution: {integrity: sha512-ax7ZYomf6jqPTQ4+XCpUGyXKHk5WweS+e05MBO4/y3WJ5RkmPXNKvX+bx1behVILVwr6JSQvZAku021CHPXG3Q==}
     engines: {node: '>=10'}
@@ -10084,6 +10137,9 @@ packages:
     engines: {node: '>=6'}
     hasBin: true
 
+  json-bigint@1.0.0:
+    resolution: {integrity: sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==}
+
   json-buffer@3.0.1:
     resolution: {integrity: sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==}
 
@@ -10107,6 +10163,10 @@ packages:
     resolution: {integrity: sha512-CTUKmIlPJbsWfzRRnOXz+0MjIqvnleIXwFTzz+t9T86HnYX/Rozria6ZVGLktAU9e+NygNljveP+yxqtQp/Q4w==}
     engines: {node: '>=12.0.0'}
 
+  json-schema-to-ts@3.1.1:
+    resolution: {integrity: sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==}
+    engines: {node: '>=16'}
+
   json-schema-traverse@0.4.1:
     resolution: {integrity: sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==}
 
@@ -10128,6 +10188,12 @@ packages:
     resolution: {integrity: sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==}
     engines: {node: '>=0.10.0'}
 
+  jwa@2.0.1:
+    resolution: {integrity: sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==}
+
+  jws@4.0.1:
+    resolution: {integrity: sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==}
+
   keyv@4.5.4:
     resolution: {integrity: sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==}
 
@@ -13291,6 +13357,10 @@ packages:
     deprecated: Rimraf versions prior to v4 are no longer supported
     hasBin: true
 
+  rimraf@5.0.10:
+    resolution: {integrity: sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==}
+    hasBin: true
+
   rolldown-plugin-dts@0.13.14:
     resolution: {integrity: sha512-wjNhHZz9dlN6PTIXyizB6u/mAg1wEFMW9yw7imEVe3CxHSRnNHVyycIX0yDEOVJfDNISLPbkCIPEpFpizy5+PQ==}
     engines: {node: '>=20.18.0'}
@@ -14293,6 +14363,9 @@ packages:
   trough@2.2.0:
     resolution: {integrity: sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==}
 
+  ts-algebra@2.0.0:
+    resolution: {integrity: sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==}
+
   ts-interface-checker@0.1.13:
     resolution: {integrity: sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==}
 
@@ -15293,8 +15366,8 @@ packages:
     peerDependencies:
       zod: ^3.24.1
 
-  zod@3.24.2:
-    resolution: {integrity: sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==}
+  zod@3.25.76:
+    resolution: {integrity: sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==}
 
   zustand@5.0.8:
     resolution: {integrity: sha512-gyPKpIaxY9XcO2vSMrLbiER7QMAMGOQZVRdJ6Zi782jkbzZygq5GI9nG8g+sMgitRtndwaBSl7uiqC49o1SSiw==}
@@ -15437,6 +15510,12 @@ snapshots:
       '@jridgewell/gen-mapping': 0.3.8
       '@jridgewell/trace-mapping': 0.3.25
 
+  '@anthropic-ai/sdk@0.71.2(zod@3.25.76)':
+    dependencies:
+      json-schema-to-ts: 3.1.1
+    optionalDependencies:
+      zod: 3.25.76
+
   '@apideck/better-ajv-errors@0.3.6(ajv@8.17.1)':
     dependencies:
       ajv: 8.17.1
@@ -15484,10 +15563,10 @@ snapshots:
 
   '@asamuzakjp/nwsapi@2.3.9': {}
 
-  '@asteasolutions/zod-to-openapi@7.3.3(zod@3.24.2)':
+  '@asteasolutions/zod-to-openapi@7.3.3(zod@3.25.76)':
     dependencies:
       openapi3-ts: 4.4.0
-      zod: 3.24.2
+      zod: 3.25.76
 
   '@auth/core@0.27.0':
     dependencies:
@@ -18942,6 +19021,15 @@ snapshots:
     dependencies:
       tldts-experimental: 7.0.8
 
+  '@google/genai@1.34.0':
+    dependencies:
+      google-auth-library: 10.5.0
+      ws: 8.18.3
+    transitivePeerDependencies:
+      - bufferutil
+      - supports-color
+      - utf-8-validate
+
   '@hapi/hoek@9.3.0': {}
 
   '@hapi/topo@5.1.0':
@@ -18968,10 +19056,10 @@ snapshots:
       '@trpc/server': 11.4.3(typescript@5.9.3)
       hono: 4.10.6
 
-  '@hono/zod-validator@0.5.0(hono@4.10.6)(zod@3.24.2)':
+  '@hono/zod-validator@0.5.0(hono@4.10.6)(zod@3.25.76)':
     dependencies:
       hono: 4.10.6
-      zod: 3.24.2
+      zod: 3.25.76
 
   '@hookform/error-message@2.0.1(react-dom@19.1.0(react@19.1.0))(react-hook-form@7.62.0(react@19.1.0))(react@19.1.0)':
     dependencies:
@@ -19597,8 +19685,8 @@ snapshots:
       express-rate-limit: 7.5.0(express@5.1.0)
       pkce-challenge: 5.0.0
       raw-body: 3.0.0
-      zod: 3.24.2
-      zod-to-json-schema: 3.24.5(zod@3.24.2)
+      zod: 3.25.76
+      zod-to-json-schema: 3.24.5(zod@3.25.76)
     transitivePeerDependencies:
       - supports-color
 
@@ -20656,6 +20744,15 @@ snapshots:
     optionalDependencies:
       '@types/react': 19.1.8
 
+  '@react-native/virtualized-lists@0.79.5(@types/react@19.2.5)(react-native@0.79.5(@babel/core@7.26.0)(@types/react@19.2.5)(react@19.1.0))(react@19.1.0)':
+    dependencies:
+      invariant: 2.2.4
+      nullthrows: 1.1.1
+      react: 19.1.0
+      react-native: 0.79.5(@babel/core@7.26.0)(@types/react@19.2.5)(react@19.1.0)
+    optionalDependencies:
+      '@types/react': 19.2.5
+
   '@react-native/virtualized-lists@0.79.5(@types/react@19.2.5)(react-native@0.79.5(@babel/core@7.28.0)(@types/react@19.2.5)(react@19.1.0))(react@19.1.0)':
     dependencies:
       invariant: 2.2.4
@@ -22727,6 +22824,8 @@ snapshots:
 
   big.js@5.2.2: {}
 
+  bignumber.js@9.3.1: {}
+
   binary-extensions@2.3.0: {}
 
   bindings@1.5.0:
@@ -22845,6 +22944,8 @@ snapshots:
 
   buffer-crc32@1.0.0: {}
 
+  buffer-equal-constant-time@1.0.1: {}
+
   buffer-from@1.1.2: {}
 
   buffer@5.7.1:
@@ -24020,6 +24121,10 @@ snapshots:
 
   eastasianwidth@0.2.0: {}
 
+  ecdsa-sig-formatter@1.0.11:
+    dependencies:
+      safe-buffer: 5.2.1
+
   ee-first@1.1.1: {}
 
   ejs@3.1.10:
@@ -25024,6 +25129,23 @@ snapshots:
 
   functions-have-names@1.2.3: {}
 
+  gaxios@7.1.3:
+    dependencies:
+      extend: 3.0.2
+      https-proxy-agent: 7.0.6(supports-color@10.0.0)
+      node-fetch: 3.3.2
+      rimraf: 5.0.10
+    transitivePeerDependencies:
+      - supports-color
+
+  gcp-metadata@8.1.2:
+    dependencies:
+      gaxios: 7.1.3
+      google-logging-utils: 1.1.3
+      json-bigint: 1.0.0
+    transitivePeerDependencies:
+      - supports-color
+
   gel@2.1.0:
     dependencies:
       '@petamoriken/float16': 3.9.3
@@ -25181,6 +25303,20 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
+  google-auth-library@10.5.0:
+    dependencies:
+      base64-js: 1.5.1
+      ecdsa-sig-formatter: 1.0.11
+      gaxios: 7.1.3
+      gcp-metadata: 8.1.2
+      google-logging-utils: 1.1.3
+      gtoken: 8.0.0
+      jws: 4.0.1
+    transitivePeerDependencies:
+      - supports-color
+
+  google-logging-utils@1.1.3: {}
+
   gopd@1.2.0: {}
 
   got@11.8.6:
@@ -25226,6 +25362,13 @@ snapshots:
       section-matter: 1.0.0
       strip-bom-string: 1.0.0
 
+  gtoken@8.0.0:
+    dependencies:
+      gaxios: 7.1.3
+      jws: 4.0.1
+    transitivePeerDependencies:
+      - supports-color
+
   gzip-size@6.0.0:
     dependencies:
       duplexer: 0.1.2
@@ -26260,6 +26403,10 @@ snapshots:
 
   jsesc@3.1.0: {}
 
+  json-bigint@1.0.0:
+    dependencies:
+      bignumber.js: 9.3.1
+
   json-buffer@3.0.1: {}
 
   json-crawl@0.5.3: {}
@@ -26282,6 +26429,11 @@ snapshots:
       json-schema-compare: 0.2.2
       lodash: 4.17.21
 
+  json-schema-to-ts@3.1.1:
+    dependencies:
+      '@babel/runtime': 7.28.4
+      ts-algebra: 2.0.0
+
   json-schema-traverse@0.4.1: {}
 
   json-schema-traverse@1.0.0: {}
@@ -26298,6 +26450,17 @@ snapshots:
 
   jsonpointer@5.0.1: {}
 
+  jwa@2.0.1:
+    dependencies:
+      buffer-equal-constant-time: 1.0.1
+      ecdsa-sig-formatter: 1.0.11
+      safe-buffer: 5.2.1
+
+  jws@4.0.1:
+    dependencies:
+      jwa: 2.0.1
+      safe-buffer: 5.2.1
+
   keyv@4.5.4:
     dependencies:
       json-buffer: 3.0.1
@@ -26466,7 +26629,7 @@ snapshots:
       async-mutex: 0.4.1
       better-sqlite3: 11.3.0
       drizzle-orm: 0.33.0(@opentelemetry/api@1.9.0)(@types/better-sqlite3@7.6.13)(@types/react@19.2.5)(better-sqlite3@11.3.0)(kysely@0.28.5)(react@19.1.0)
-      zod: 3.24.2
+      zod: 3.25.76
     transitivePeerDependencies:
       - '@aws-sdk/client-rds-data'
       - '@cloudflare/workers-types'
@@ -28343,7 +28506,7 @@ snapshots:
       is-docker: 2.2.1
       is-wsl: 2.2.0
 
-  openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.24.2):
+  openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76):
     dependencies:
       '@types/node': 18.19.111
       '@types/node-fetch': 2.6.12
@@ -28354,7 +28517,7 @@ snapshots:
       node-fetch: 2.7.0(encoding@0.1.13)
     optionalDependencies:
       ws: 8.18.3
-      zod: 3.24.2
+      zod: 3.25.76
     transitivePeerDependencies:
       - encoding
 
@@ -29891,6 +30054,54 @@ snapshots:
       - supports-color
       - utf-8-validate
 
+  react-native@0.79.5(@babel/core@7.26.0)(@types/react@19.2.5)(react@19.1.0):
+    dependencies:
+      '@jest/create-cache-key-function': 29.7.0
+      '@react-native/assets-registry': 0.79.5
+      '@react-native/codegen': 0.79.5(@babel/core@7.26.0)
+      '@react-native/community-cli-plugin': 0.79.5
+      '@react-native/gradle-plugin': 0.79.5
+      '@react-native/js-polyfills': 0.79.5
+      '@react-native/normalize-colors': 0.79.5
+      '@react-native/virtualized-lists': 0.79.5(@types/react@19.2.5)(react-native@0.79.5(@babel/core@7.26.0)(@types/react@19.2.5)(react@19.1.0))(react@19.1.0)
+      abort-controller: 3.0.0
+      anser: 1.4.10
+      ansi-regex: 5.0.1
+      babel-jest: 29.7.0(@babel/core@7.26.0)
+      babel-plugin-syntax-hermes-parser: 0.25.1
+      base64-js: 1.5.1
+      chalk: 4.1.2
+      commander: 12.1.0
+      event-target-shim: 5.0.1
+      flow-enums-runtime: 0.0.6
+      glob: 7.2.3
+      invariant: 2.2.4
+      jest-environment-node: 29.7.0
+      memoize-one: 5.2.1
+      metro-runtime: 0.82.5
+      metro-source-map: 0.82.5
+      nullthrows: 1.1.1
+      pretty-format: 29.7.0
+      promise: 8.3.0
+      react: 19.1.0
+      react-devtools-core: 6.1.5
+      react-refresh: 0.14.2
+      regenerator-runtime: 0.13.11
+      scheduler: 0.25.0
+      semver: 7.7.3
+      stacktrace-parser: 0.1.11
+      whatwg-fetch: 3.6.20
+      ws: 6.2.3
+      yargs: 17.7.2
+    optionalDependencies:
+      '@types/react': 19.2.5
+    transitivePeerDependencies:
+      - '@babel/core'
+      - '@react-native-community/cli'
+      - bufferutil
+      - supports-color
+      - utf-8-validate
+
   react-native@0.79.5(@babel/core@7.28.0)(@types/react@19.2.5)(react@19.1.0):
     dependencies:
       '@jest/create-cache-key-function': 29.7.0
@@ -30418,6 +30629,10 @@ snapshots:
     dependencies:
       glob: 7.2.3
 
+  rimraf@5.0.10:
+    dependencies:
+      glob: 10.4.5
+
   rolldown-plugin-dts@0.13.14(rolldown@1.0.0-beta.28)(typescript@5.9.3):
     dependencies:
       '@babel/generator': 7.28.0
@@ -31624,6 +31839,8 @@ snapshots:
 
   trough@2.2.0: {}
 
+  ts-algebra@2.0.0: {}
+
   ts-interface-checker@0.1.13: {}
 
   tsconfck@3.1.6(typescript@5.9.3):
@@ -32809,11 +33026,11 @@ snapshots:
 
   zlibjs@0.3.1: {}
 
-  zod-to-json-schema@3.24.5(zod@3.24.2):
+  zod-to-json-schema@3.24.5(zod@3.25.76):
     dependencies:
-      zod: 3.24.2
+      zod: 3.25.76
 
-  zod@3.24.2: {}
+  zod@3.25.76: {}
 
   zustand@5.0.8(@types/react@19.1.8)(immer@9.0.21)(react@19.1.0)(use-sync-external-store@1.5.0(react@19.1.0)):
     optionalDependencies:
diff --git a/tools/compare-models/package.json b/tools/compare-models/package.json
index 5a493bd2f..2281924f3 100644
--- a/tools/compare-models/package.json
+++ b/tools/compare-models/package.json
@@ -14,7 +14,7 @@
     "@karakeep/sdk": "workspace:^",
     "@karakeep/shared": "workspace:^",
     "chalk": "^5.3.0",
-    "zod": "^3.24.2"
+    "zod": "^3.25.0"
   },
   "devDependencies": {
     "@types/node": "^24",
diff --git a/tools/compare-models/src/bookmarkProcessor.ts b/tools/compare-models/src/bookmarkProcessor.ts
index 21280b974..bcca05c84 100644
--- a/tools/compare-models/src/bookmarkProcessor.ts
+++ b/tools/compare-models/src/bookmarkProcessor.ts
@@ -1,7 +1,6 @@
-import type { InferenceClient } from "@karakeep/shared/inference";
 import { buildTextPrompt } from "@karakeep/shared/prompts";
 
-import { inferTags } from "./inferenceClient";
+import { type InferenceClient, inferTags } from "./inferenceClient";
 import type { Bookmark } from "./types";
 
 export async function extractBookmarkContent(
diff --git a/tools/compare-models/src/config.ts b/tools/compare-models/src/config.ts
index 3a2d0d41b..ca2fb960b 100644
--- a/tools/compare-models/src/config.ts
+++ b/tools/compare-models/src/config.ts
@@ -24,11 +24,6 @@ const envSchema = z.object({
     .string()
     .optional()
     .transform((val) => (val ? parseInt(val, 10) : 2048)),
-  INFERENCE_USE_MAX_COMPLETION_TOKENS: z
-    .string()
-    .optional()
-    .transform((val) => val === "true")
-    .default("false"),
 });
 
 export const config = envSchema.parse(process.env);
diff --git a/tools/compare-models/src/inferenceClient.ts b/tools/compare-models/src/inferenceClient.ts
index 0a5ed8b5f..c9235a5b2 100644
--- a/tools/compare-models/src/inferenceClient.ts
+++ b/tools/compare-models/src/inferenceClient.ts
@@ -1,46 +1,163 @@
-import type { InferenceClient } from "@karakeep/shared/inference";
-import {
-  OpenAIInferenceClient,
-  type OpenAIInferenceConfig,
-} from "@karakeep/shared/inference";
+/**
+ * Standalone inference client for the compare-models benchmarking tool.
+ * This is intentionally separate from the main @karakeep/shared inference module
+ * to keep the tool self-contained with minimal dependencies.
+ */
+import OpenAI from "openai";
+import { zodResponseFormat } from "openai/helpers/zod";
 import { z } from "zod";
 
+/** Simplified options for this standalone benchmarking tool */
+export interface CompareModelsInferenceOptions {
+  schema: z.ZodSchema<unknown> | null;
+}
+
+export interface InferenceResponse {
+  response: string;
+  totalTokens: number | undefined;
+}
+
+export class InferenceClient {
+  private client: OpenAI;
+  private modelName: string;
+
+  constructor(apiKey: string, baseUrl?: string, modelName?: string) {
+    this.client = new OpenAI({
+      apiKey,
+      baseURL: baseUrl,
+      defaultHeaders: {
+        "X-Title": "Karakeep Model Comparison",
+      },
+    });
+    this.modelName = modelName || "gpt-4o-mini";
+  }
+
+  async inferTags(
+    content: string,
+    model?: string,
+    lang: string = "english",
+    customPrompts: string[] = [],
+  ): Promise<string[]> {
+    const useModel = model || this.modelName;
+    const tagsSchema = z.object({
+      tags: z.array(z.string()),
+    });
+
+    const response = await this.inferFromText(
+      this.buildPrompt(content, lang, customPrompts),
+      useModel,
+      { schema: tagsSchema },
+    );
+
+    const parsed = tagsSchema.safeParse(
+      this.parseJsonFromResponse(response.response),
+    );
+    if (!parsed.success) {
+      throw new Error(
+        `Failed to parse model response: ${parsed.error.message}`,
+      );
+    }
+
+    return parsed.data.tags;
+  }
+
+  private async inferFromText(
+    prompt: string,
+    model: string,
+    opts: CompareModelsInferenceOptions,
+  ): Promise<InferenceResponse> {
+    const chatCompletion = await this.client.chat.completions.create({
+      messages: [{ role: "user", content: prompt }],
+      model: model,
+      response_format: opts.schema
+        ? zodResponseFormat(opts.schema, "schema")
+        : { type: "json_object" as const },
+    });
+
+    if (!chatCompletion.choices.length) {
+      throw new Error("Got no choices from model");
+    }
+    const response = chatCompletion.choices[0].message.content;
+    if (!response) {
+      throw new Error("Got no message content from model");
+    }
+
+    return {
+      response,
+      totalTokens: chatCompletion.usage?.total_tokens,
+    };
+  }
+
+  private buildPrompt(
+    content: string,
+    lang: string,
+    customPrompts: string[],
+  ): string {
+    return `
+You are an expert whose responsibility is to help with automatic tagging for a read-it-later app.
+Please analyze the TEXT_CONTENT below and suggest relevant tags that describe its key themes, topics, and main ideas. The rules are:
+- Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres.
+- The tags must be in ${lang}.
+- If tag is not generic enough, don't include it.
+- The content can include text for cookie consent and privacy policy, ignore those while tagging.
+- Aim for 3-5 tags.
+- If there are no good tags, leave the array empty.
+${customPrompts.map((p) => `- ${p}`).join("\n")}
+
+<TEXT_CONTENT>
+${content}
+</TEXT_CONTENT>
+You must respond in JSON with key "tags" and the value is an array of string tags.`;
+  }
+
+  private parseJsonFromResponse(response: string): unknown {
+    const trimmedResponse = response.trim();
+
+    try {
+      return JSON.parse(trimmedResponse);
+    } catch {
+      const jsonBlockRegex = /```(?:json)?\s*(\{[\s\S]*?\})\s*```/i;
+      const match = trimmedResponse.match(jsonBlockRegex);
+
+      if (match) {
+        try {
+          return JSON.parse(match[1]);
+        } catch {}
+      }
+
+      const jsonObjectRegex = /\{[\s\S]*\}/;
+      const objectMatch = trimmedResponse.match(jsonObjectRegex);
+
+      if (objectMatch) {
+        try {
+          return JSON.parse(objectMatch[0]);
+        } catch {}
+      }
+
+      return JSON.parse(trimmedResponse);
+    }
+  }
+}
+
 import { config } from "./config";
 
+// Wrapper functions to match expected API
 export function createInferenceClient(modelName: string): InferenceClient {
-  const inferenceConfig: OpenAIInferenceConfig = {
-    apiKey: config.OPENAI_API_KEY,
-    baseURL: config.OPENAI_BASE_URL,
-    serviceTier: config.OPENAI_SERVICE_TIER,
-    textModel: modelName,
-    imageModel: modelName, // Use same model for images if needed
-    contextLength: config.INFERENCE_CONTEXT_LENGTH,
-    maxOutputTokens: config.INFERENCE_MAX_OUTPUT_TOKENS,
-    useMaxCompletionTokens: config.INFERENCE_USE_MAX_COMPLETION_TOKENS,
-    outputSchema: "structured",
-  };
-
-  return new OpenAIInferenceClient(inferenceConfig);
+  return new InferenceClient(
+    config.OPENAI_API_KEY,
+    config.OPENAI_BASE_URL,
+    modelName,
+  );
 }
 
+/**
+ * Wrapper for benchmarking that exposes the full API.
+ */
 export async function inferTags(
-  inferenceClient: InferenceClient,
+  client: InferenceClient,
   prompt: string,
+  lang?: string,
+  customPrompts?: string[],
 ): Promise<string[]> {
-  const tagsSchema = z.object({
-    tags: z.array(z.string()),
-  });
-
-  const response = await inferenceClient.inferFromText(prompt, {
-    schema: tagsSchema,
-  });
-
-  const parsed = tagsSchema.safeParse(JSON.parse(response.response));
-  if (!parsed.success) {
-    throw new Error(
-      `Failed to parse model response: ${parsed.error.message}`,
-    );
-  }
-
-  return parsed.data.tags;
+  return client.inferTags(prompt, undefined, lang, customPrompts);
 }