richardr1126 · richardr1126 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ OpenReader is an open source, self-host-friendly text-to-speech document reader
 
 ## ✨ Highlights
 
-- 🎯 **Multi-provider TTS** with OpenAI-compatible endpoints (OpenAI, DeepInfra, Kokoro, KittenTTS-FastAPI, Orpheus, custom).
+- 🎯 **Multi-provider TTS** with OpenAI-compatible endpoints and cloud providers (Kokoro-FastAPI, KittenTTS-FastAPI, Orpheus-FastAPI or OpenAI, Replicate, DeepInfra).
 - 📖 **Read-along playback** for PDF/EPUB with sentence-aware narration.
 - ⏱️ **Word-by-word highlighting** via optional `whisper.cpp` timestamps.
 - 🛜 **Sync + library import** to bring docs across devices and from server-mounted folders.

diff --git a/docs-site/docs/configure/tts-provider-guides/replicate.md b/docs-site/docs/configure/tts-provider-guides/replicate.md
@@ -0,0 +1,41 @@
+---
+title: Replicate
+---
+
+Use Replicate's hosted TTS models as your provider.
+
+## Setup
+
+**Environment variables (recommended for deployment):**
+
+```env
+API_KEY=r8_...
+NEXT_PUBLIC_DEFAULT_TTS_PROVIDER=replicate
+NEXT_PUBLIC_DEFAULT_TTS_MODEL=alphanumericuser/kokoro-82m:89b6fa84e4fa2dd6bd3a96be3e1f12827a3516c9fda8fddbac7a0be131c9a6f5
+```
+
+**Or in-app via Settings -> TTS Provider:**
+
+1. Set provider to `Replicate`.
+2. Enter your `API_KEY`.
+3. Choose a model and voice.
+
+Settings modal values override env vars. See [TTS Providers](../tts-providers) for how the two layers interact.
+
+## Notes
+
+- Built-in Replicate models:
+  - `alphanumericuser/kokoro-82m:89b6fa84e4fa2dd6bd3a96be3e1f12827a3516c9fda8fddbac7a0be131c9a6f5`
+  - `google/gemini-3.1-flash-tts`
+  - `minimax/speech-2.8-turbo`
+  - `qwen/qwen3-tts`
+  - `inworld/tts-1.5-mini`
+- You can also choose `Other` and enter any Replicate model ID (for example `owner/model-name` or `owner/model-name:version`).
+- Native model speed is not available on all Replicate models; OpenReader hides/disables native speed controls where unsupported.
+- TTS requests are sent from the server, not the browser. The API key is never exposed to clients.
+
+## References
+
+- [Replicate](https://replicate.com/explore)
+- [TTS Providers](../tts-providers)
+- [TTS Environment Variables](../../reference/environment-variables#tts-provider-and-request-behavior)
diff --git a/docs-site/docs/configure/tts-providers.md b/docs-site/docs/configure/tts-providers.md
@@ -15,10 +15,17 @@ Set env vars as deployment-level defaults. Users (or you, in a single-user setup
 ## Providers
 
 - **OpenAI**: Cloud. Base URL pre-filled (`https://api.openai.com/v1`). API key required.
-- **Deepinfra**: Cloud. Base URL pre-filled (`https://api.deepinfra.com/v1/openai`). API key required.
+- **Replicate**: Cloud. Base URL managed internally by OpenReader. API key required.
+- **DeepInfra**: Cloud. Base URL pre-filled (`https://api.deepinfra.com/v1/openai`). API key required.
 - **Custom OpenAI-Like**: Self-hosted or any custom endpoint. `API_BASE` must be set manually (typically ending in `/v1`). API key optional.
 
-For `OpenAI` and `Deepinfra` you only need to supply an API key. For `Custom OpenAI-Like` you must also set `API_BASE`.
+For `OpenAI`, `DeepInfra`, and `Replicate` you only need to supply an API key. For `Custom OpenAI-Like` you must also set `API_BASE`.
+
+## Built-in model catalogs
+
+- **Replicate** models: `alphanumericuser/kokoro-82m:89b6fa84e4fa2dd6bd3a96be3e1f12827a3516c9fda8fddbac7a0be131c9a6f5`, `google/gemini-3.1-flash-tts`, `minimax/speech-2.8-turbo`, `qwen/qwen3-tts`, `inworld/tts-1.5-mini` (or choose `Other` and enter any Replicate model ID, such as `owner/model` or `owner/model:version`)
+- **OpenAI** models: `tts-1`, `tts-1-hd`, `gpt-4o-mini-tts`
+- **DeepInfra** models: includes `hexgrad/Kokoro-82M` and additional hosted models (depending on API key / feature flags)
 
 ## Custom provider requirements
 
@@ -36,6 +43,7 @@ TTS requests originate from the **Next.js server**, not the browser. `API_BASE`
 - [Kokoro-FastAPI](./tts-provider-guides/kokoro-fastapi)
 - [KittenTTS-FastAPI](./tts-provider-guides/kitten-tts-fastapi)
 - [Orpheus-FastAPI](./tts-provider-guides/orpheus-fastapi)
+- [Replicate](./tts-provider-guides/replicate)
 - [DeepInfra](./tts-provider-guides/deepinfra)
 - [OpenAI](./tts-provider-guides/openai)
 - [Other](./tts-provider-guides/other)

diff --git a/docs-site/docs/deploy/vercel-deployment.md b/docs-site/docs/deploy/vercel-deployment.md
@@ -18,8 +18,7 @@ This guide covers deploying OpenReader to Vercel with external Postgres and S3-c
 Recommended production setup (auth enabled):
 
 ```bash
-API_BASE=https://api.deepinfra.com/v1/openai
-API_KEY=your_deepinfra_key
+API_KEY=your_replicate_key
 POSTGRES_URL=postgres://...
 USE_EMBEDDED_WEED_MINI=false
 S3_ACCESS_KEY_ID=...
@@ -33,8 +32,8 @@ AUTH_SECRET=...
 NEXT_PUBLIC_ENABLE_DOCX_CONVERSION=false
 NEXT_PUBLIC_ENABLE_DESTRUCTIVE_DELETE_ACTIONS=false
 NEXT_PUBLIC_ENABLE_TTS_PROVIDERS_TAB=false
-NEXT_PUBLIC_DEFAULT_TTS_PROVIDER=deepinfra
-NEXT_PUBLIC_DEFAULT_TTS_MODEL=hexgrad/Kokoro-82M
+NEXT_PUBLIC_DEFAULT_TTS_PROVIDER=replicate
+NEXT_PUBLIC_DEFAULT_TTS_MODEL=alphanumericuser/kokoro-82m:89b6fa84e4fa2dd6bd3a96be3e1f12827a3516c9fda8fddbac7a0be131c9a6f5
 NEXT_PUBLIC_SHOW_ALL_DEEPINFRA_MODELS=false
 NEXT_PUBLIC_ENABLE_AUDIOBOOK_EXPORT=true
 NEXT_PUBLIC_ENABLE_WORD_HIGHLIGHT=false
@@ -49,8 +48,8 @@ We recommend setting these defaults for a production-like environment:
 - `NEXT_PUBLIC_ENABLE_DOCX_CONVERSION=false`: Disables DOCX upload (requires external tools anyway)
 - `NEXT_PUBLIC_ENABLE_DESTRUCTIVE_DELETE_ACTIONS=false`: Hides destructive "Delete All" actions
 - `NEXT_PUBLIC_ENABLE_TTS_PROVIDERS_TAB=false`: Hides the Settings -> TTS Provider section
-- `NEXT_PUBLIC_DEFAULT_TTS_PROVIDER=deepinfra`: Points default TTS to a scalable provider
-- `NEXT_PUBLIC_DEFAULT_TTS_MODEL=hexgrad/Kokoro-82M`: Uses a high-quality default model
+- `NEXT_PUBLIC_DEFAULT_TTS_PROVIDER=replicate`: Points default TTS to a scalable provider
+- `NEXT_PUBLIC_DEFAULT_TTS_MODEL=alphanumericuser/kokoro-82m:89b6fa84e4fa2dd6bd3a96be3e1f12827a3516c9fda8fddbac7a0be131c9a6f5`: Uses a low-cost default model
 - `NEXT_PUBLIC_SHOW_ALL_DEEPINFRA_MODELS=false`: Restricts usage to free models if no key is provided
 - `NEXT_PUBLIC_ENABLE_AUDIOBOOK_EXPORT=true`: (Optional) Controls audiobook export UI
 - `NEXT_PUBLIC_ENABLE_WORD_HIGHLIGHT=false`: (Optional) Controls word highlighting UI (requires timestamp backend)

diff --git a/docs-site/docs/docker-quick-start.md b/docs-site/docs/docker-quick-start.md
@@ -8,7 +8,7 @@ import TabItem from '@theme/TabItem';
 ## Prerequisites
 
 - A recent Docker version installed
-- A TTS API server that OpenReader can reach (Kokoro-FastAPI, KittenTTS-FastAPI, Orpheus-FastAPI, DeepInfra, OpenAI, or equivalent)
+- A TTS API server that OpenReader can reach (Kokoro-FastAPI, KittenTTS-FastAPI, Orpheus-FastAPI, Replicate, DeepInfra, OpenAI, or equivalent)
 
 :::note
 If you have suitable hardware, you can run Kokoro locally with Docker. See [Kokoro-FastAPI](./configure/tts-provider-guides/kokoro-fastapi).

diff --git a/docs-site/docs/introduction.md b/docs-site/docs/introduction.md
@@ -8,7 +8,7 @@ OpenReader is an open source text-to-speech document reader built with Next.js.
 
 > Previously named **OpenReader-WebUI**.
 
-It supports multiple TTS providers including OpenAI, DeepInfra, and custom OpenAI-compatible endpoints such as [Kokoro-FastAPI](https://github.com/remsky/Kokoro-FastAPI), [KittenTTS-FastAPI](https://github.com/richardr1126/KittenTTS-FastAPI), and [Orpheus-FastAPI](https://github.com/Lex-au/Orpheus-FastAPI).
+It supports multiple TTS providers including OpenAI, Replicate, DeepInfra, and custom OpenAI-compatible endpoints such as [Kokoro-FastAPI](https://github.com/remsky/Kokoro-FastAPI), [KittenTTS-FastAPI](https://github.com/richardr1126/KittenTTS-FastAPI), and [Orpheus-FastAPI](https://github.com/Lex-au/Orpheus-FastAPI).
 
 ## ✨ Highlights
 
@@ -18,6 +18,7 @@ It supports multiple TTS providers including OpenAI, DeepInfra, and custom OpenA
   - [**Orpheus-FastAPI**](https://github.com/Lex-au/Orpheus-FastAPI)
   - **Custom OpenAI-compatible**: any TTS API with `/v1/audio/voices` and `/v1/audio/speech` endpoints
   - **Cloud TTS providers**:
+    - [**Replicate**](https://replicate.com/explore): includes a built-in catalog and supports any Replicate model ID via `Other`
     - [**DeepInfra**](https://deepinfra.com/models/text-to-speech): Kokoro-82M and other hosted models
     - [**OpenAI API**](https://platform.openai.com/docs/pricing#transcription-and-speech): `tts-1`, `tts-1-hd`, and `gpt-4o-mini-tts`
 - 🛜 **Server-side Document Storage**

diff --git a/docs-site/docs/reference/environment-variables.md b/docs-site/docs/reference/environment-variables.md
@@ -379,7 +379,7 @@ Controls whether the **TTS Provider** section appears in the Settings modal.
  Sets the default TTS provider for new users.
 
  - Default: `custom-openai`
- - Example values: `deepinfra`, `openai`, `custom-openai`
+ - Example values: `replicate`, `deepinfra`, `openai`, `custom-openai`
 
  ### NEXT_PUBLIC_DEFAULT_TTS_MODEL
 

diff --git a/docs-site/sidebars.ts b/docs-site/sidebars.ts
@@ -23,6 +23,7 @@ const sidebars: SidebarsConfig = {
             'configure/tts-provider-guides/kokoro-fastapi',
             'configure/tts-provider-guides/kitten-tts-fastapi',
             'configure/tts-provider-guides/orpheus-fastapi',
+            'configure/tts-provider-guides/replicate',
             'configure/tts-provider-guides/deepinfra',
             'configure/tts-provider-guides/openai',
             'configure/tts-provider-guides/other',

diff --git a/package.json b/package.json
@@ -59,6 +59,7 @@
     "react-pdf": "^9.2.1",
     "react-reader": "^2.0.15",
     "remark-gfm": "^4.0.1",
+    "replicate": "^1.4.0",
     "uuid": "^11.1.0"
   },
   "devDependencies": {

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/src/app/api/audiobook/chapter/route.ts b/src/app/api/audiobook/chapter/route.ts
@@ -29,6 +29,7 @@ import { getOpenReaderTestNamespace, getUnclaimedUserIdForNamespace } from '@/li
 import { buildAllowedAudiobookUserIds, pickAudiobookOwner } from '@/lib/server/audiobooks/user-scope';
 import { getFFmpegPath } from '@/lib/server/audiobooks/ffmpeg-bin';
 import { generateTTSBuffer } from '@/lib/server/tts/generate';
+import { supportsNativeModelSpeed } from '@/lib/shared/tts-provider-catalog';
 import type { AudiobookGenerationSettings } from '@/types/client';
 import type { TTSAudiobookFormat } from '@/types/tts';
 
@@ -40,7 +41,7 @@ interface ConversionRequest {
   bookId?: string;
   format?: TTSAudiobookFormat;
   chapterIndex?: number;
-  settings?: AudiobookGenerationSettings;
+  settings?: unknown;
 }
 
 type ChapterObject = {
@@ -92,6 +93,35 @@ function s3NotConfiguredResponse(): NextResponse {
   );
 }
 
+function normalizeNativeSpeedForSettings(settings: AudiobookGenerationSettings): AudiobookGenerationSettings {
+  return supportsNativeModelSpeed(settings.ttsProvider, settings.ttsModel)
+    ? settings
+    : { ...settings, nativeSpeed: 1 };
+}
+
+function isFiniteNumber(value: unknown): value is number {
+  return typeof value === 'number' && Number.isFinite(value);
+}
+
+function isAudiobookFormat(value: unknown): value is TTSAudiobookFormat {
+  return value === 'mp3' || value === 'm4b';
+}
+
+function isAudiobookGenerationSettings(value: unknown): value is AudiobookGenerationSettings {
+  if (typeof value !== 'object' || value === null) {
+    return false;
+  }
+
+  const record = value as Record<string, unknown>;
+  return typeof record.ttsProvider === 'string'
+    && typeof record.ttsModel === 'string'
+    && typeof record.voice === 'string'
+    && isFiniteNumber(record.nativeSpeed)
+    && isFiniteNumber(record.postSpeed)
+    && isAudiobookFormat(record.format)
+    && (record.ttsInstructions === undefined || typeof record.ttsInstructions === 'string');
+}
+
 function chapterFileMimeType(format: TTSAudiobookFormat): string {
   return format === 'mp3' ? 'audio/mpeg' : 'audio/mp4';
 }
@@ -290,28 +320,53 @@ export async function POST(request: NextRequest) {
     const existingChapters = listChapterObjects(objectNames);
     const hasChapters = existingChapters.length > 0;
 
-    let existingSettings: AudiobookGenerationSettings | null = null;
+    let normalizedExistingSettings: AudiobookGenerationSettings | undefined;
     try {
-      existingSettings = JSON.parse(
+      const parsedSettings = JSON.parse(
         (await getAudiobookObjectBuffer(bookId, storageUserId, 'audiobook.meta.json', testNamespace)).toString('utf8'),
-      ) as AudiobookGenerationSettings;
+      ) as unknown;
+      if (!isAudiobookGenerationSettings(parsedSettings)) {
+        console.error('Invalid audiobook.meta.json settings payload', { bookId, storageUserId });
+        return NextResponse.json({ error: 'Invalid audiobook metadata settings' }, { status: 500 });
+      }
+      normalizedExistingSettings = normalizeNativeSpeedForSettings(parsedSettings);
     } catch (error) {
       if (!isMissingBlobError(error)) throw error;
-      existingSettings = null;
+      normalizedExistingSettings = undefined;
+    }
+
+    const incomingSettings = (() => {
+      if (data.settings === undefined) {
+        return undefined;
+      }
+      if (!isAudiobookGenerationSettings(data.settings)) {
+        return null;
+      }
+      return normalizeNativeSpeedForSettings(data.settings);
+    })();
+
+    if (incomingSettings === null) {
+      return NextResponse.json({ error: 'Invalid audiobook settings payload' }, { status: 400 });
     }
 
-    const incomingSettings = data.settings;
-    if (existingSettings && hasChapters && incomingSettings) {
+    const mergedSettings = normalizedExistingSettings && incomingSettings
+      ? normalizeNativeSpeedForSettings({
+          ...normalizedExistingSettings,
+          ...incomingSettings,
+        })
+      : normalizedExistingSettings ?? incomingSettings;
+
+    if (normalizedExistingSettings && hasChapters && incomingSettings) {
       const mismatch =
-        existingSettings.ttsProvider !== incomingSettings.ttsProvider ||
-        existingSettings.ttsModel !== incomingSettings.ttsModel ||
-        existingSettings.voice !== incomingSettings.voice ||
-        existingSettings.nativeSpeed !== incomingSettings.nativeSpeed ||
-        existingSettings.postSpeed !== incomingSettings.postSpeed ||
-        existingSettings.format !== incomingSettings.format ||
-        (existingSettings.ttsInstructions || '') !== (incomingSettings.ttsInstructions || '');
+        normalizedExistingSettings.ttsProvider !== incomingSettings.ttsProvider ||
+        normalizedExistingSettings.ttsModel !== incomingSettings.ttsModel ||
+        normalizedExistingSettings.voice !== incomingSettings.voice ||
+        normalizedExistingSettings.nativeSpeed !== incomingSettings.nativeSpeed ||
+        normalizedExistingSettings.postSpeed !== incomingSettings.postSpeed ||
+        normalizedExistingSettings.format !== incomingSettings.format ||
+        (normalizedExistingSettings.ttsInstructions || '') !== (incomingSettings.ttsInstructions || '');
       if (mismatch) {
-        return NextResponse.json({ error: 'Audiobook settings mismatch', settings: existingSettings }, { status: 409 });
+        return NextResponse.json({ error: 'Audiobook settings mismatch', settings: normalizedExistingSettings }, { status: 409 });
       }
     }
 
@@ -322,10 +377,9 @@ export async function POST(request: NextRequest) {
 
     const format: TTSAudiobookFormat =
       (existingFormats.values().next().value as TTSAudiobookFormat | undefined) ??
-      existingSettings?.format ??
-      incomingSettings?.format ??
+      mergedSettings?.format ??
       requestedFormat;
-    const rawPostSpeed = incomingSettings?.postSpeed ?? existingSettings?.postSpeed ?? 1;
+    const rawPostSpeed = mergedSettings?.postSpeed ?? 1;
     const postSpeed = Number.isFinite(Number(rawPostSpeed)) ? Number(rawPostSpeed) : 1;
 
     let chapterIndex: number;
@@ -349,22 +403,20 @@ export async function POST(request: NextRequest) {
     }
 
     const provider = request.headers.get('x-tts-provider')
-      || incomingSettings?.ttsProvider
-      || existingSettings?.ttsProvider
+      || mergedSettings?.ttsProvider
       || 'openai';
     const openApiKey = request.headers.get('x-openai-key') || process.env.API_KEY || 'none';
     const openApiBaseUrl = request.headers.get('x-openai-base-url') || process.env.API_BASE;
-    const model = incomingSettings?.ttsModel ?? existingSettings?.ttsModel;
-    const voice = incomingSettings?.voice
-      || existingSettings?.voice
+    const model = mergedSettings?.ttsModel;
+    const voice = mergedSettings?.voice
       || (provider === 'openai'
         ? 'alloy'
         : provider === 'deepinfra'
           ? 'af_bella'
           : 'af_sarah');
-    const rawNativeSpeed = incomingSettings?.nativeSpeed ?? existingSettings?.nativeSpeed ?? 1;
+    const rawNativeSpeed = mergedSettings?.nativeSpeed ?? 1;
     const nativeSpeed = Number.isFinite(Number(rawNativeSpeed)) ? Number(rawNativeSpeed) : 1;
-    const instructions = incomingSettings?.ttsInstructions ?? existingSettings?.ttsInstructions;
+    const instructions = mergedSettings?.ttsInstructions;
 
     if (authEnabled && userId && isTtsRateLimitEnabled()) {
       const isAnonymous = Boolean(user?.isAnonymous);
@@ -499,7 +551,7 @@ export async function POST(request: NextRequest) {
     await deleteAudiobookObject(bookId, storageUserId, 'complete.mp3.manifest.json', testNamespace).catch(() => {});
     await deleteAudiobookObject(bookId, storageUserId, 'complete.m4b.manifest.json', testNamespace).catch(() => {});
 
-    if (!existingSettings && incomingSettings) {
+    if (!normalizedExistingSettings && incomingSettings) {
       await putAudiobookObject(
         bookId,
         storageUserId,