mnvsk97 · mnvsk97 · Apr 13, 2026 · Apr 11, 2026 · Apr 11, 2026
diff --git a/.env.example b/.env.example
@@ -4,3 +4,21 @@ GEMINI_API_KEY=your-api-key-here
 # Ollama backend (local, no API key needed)
 # EYEROLL_BACKEND=ollama
 # OLLAMA_HOST=http://localhost:11434
+
+# OpenAI backend
+# OPENAI_API_KEY=sk-...
+
+# OpenRouter backend (--backend openrouter)
+# OPENROUTER_API_KEY=sk-or-...
+
+# Groq backend (--backend groq)
+# GROQ_API_KEY=gsk_...
+
+# Grok / xAI backend (--backend grok)
+# GROK_API_KEY=xai-...
+
+# Cerebras backend (--backend cerebras)
+# CEREBRAS_API_KEY=...
+
+# Gemini service account backend (--backend gemini-sa)
+# GOOGLE_APPLICATION_CREDENTIALS=/path/to/credentials.json
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ eyeroll is a Claude Code plugin that analyzes screen recordings, Loom videos, Yo
 
 # Install the CLI
 pip install eyeroll[gemini]      # Gemini Flash API (recommended)
-pip install eyeroll[openai]      # OpenAI GPT-4o
+pip install eyeroll[openai]      # OpenAI GPT-4o + OpenRouter/Groq/Grok/Cerebras
 pip install eyeroll              # Ollama only (local, no API key)
 pip install eyeroll[all]         # everything
 ```
@@ -65,6 +65,9 @@ eyeroll watch https://loom.com/share/abc123
 eyeroll watch ./bug.mp4 --context "checkout broken after PR #432"
 eyeroll watch ./bug.mp4 -cc .eyeroll/context.md --parallel 4
 eyeroll watch ./bug.mp4 --backend ollama -m qwen3-vl:2b
+eyeroll watch ./bug.mp4 --backend groq
+eyeroll watch ./bug.mp4 --backend openrouter -m anthropic/claude-3.5-sonnet
+eyeroll watch ./bug.mp4 --backend openai-compat --base-url https://my-server/v1
 eyeroll history
 ```
 
@@ -100,6 +103,11 @@ eyeroll history
 | **gemini** | Direct upload | Yes | GEMINI_API_KEY | ~$0.15 | Best quality |
 | **openai** | Frame-by-frame | Whisper | OPENAI_API_KEY | ~$0.20 | Existing OpenAI users |
 | **ollama** | Frame-by-frame | No | None | Free | Privacy, offline |
+| **openrouter** | Frame-by-frame | Yes | OPENROUTER_API_KEY | varies | Model variety |
+| **groq** | Frame-by-frame | Yes | GROQ_API_KEY | cheap | Low latency |
+| **grok** | Frame-by-frame | Yes | GROK_API_KEY | varies | xAI models |
+| **cerebras** | Frame-by-frame | Yes | CEREBRAS_API_KEY | cheap | Fast inference |
+| **openai-compat** | Frame-by-frame | No | any env var | varies | Custom/self-hosted endpoints |
 
 Ollama auto-installs if not found (macOS/Linux).
 

diff --git a/eyeroll/backend.py b/eyeroll/backend.py
@@ -4,6 +4,11 @@
   - gemini: Google Gemini Flash API (requires GEMINI_API_KEY)
   - openai: OpenAI GPT-4o API (requires OPENAI_API_KEY)
   - ollama: Local Ollama with vision models like qwen3-vl (no API key needed)
+  - openrouter: OpenRouter API (requires OPENROUTER_API_KEY)
+  - groq: Groq API (requires GROQ_API_KEY)
+  - grok: xAI Grok API (requires GROK_API_KEY)
+  - cerebras: Cerebras API (requires CEREBRAS_API_KEY)
+  - openai-compat: Any OpenAI-compatible endpoint (requires base_url + API key)
 """
 
 import base64
@@ -194,31 +199,51 @@ def supports_audio(self) -> bool:
 
 
 # ---------------------------------------------------------------------------
-# OpenAI Backend
+# OpenAI Backend (also handles OpenAI-compatible providers)
 # ---------------------------------------------------------------------------
 
+# Maps provider name -> (base_url, api_key_env, default_model, has_whisper)
+# has_whisper=False: the Whisper transcription endpoint only exists on OpenAI proper.
+_OPENAI_COMPAT_PROVIDERS = {
+    "openrouter": ("https://openrouter.ai/api/v1",   "OPENROUTER_API_KEY", "openai/gpt-4o",           False),
+    "groq":       ("https://api.groq.com/openai/v1", "GROQ_API_KEY",       "llama-3.3-70b-versatile", False),
+    "grok":       ("https://api.x.ai/v1",            "GROK_API_KEY",       "grok-2-vision-1212",      False),
+    "cerebras":   ("https://api.cerebras.ai/v1",     "CEREBRAS_API_KEY",   "llama3.1-70b",            False),
+}
+
+
 class OpenAIBackend(Backend):
-    """OpenAI GPT-4o API backend."""
+    """OpenAI GPT-4o API backend.
+
+    Also handles OpenAI-compatible providers (Groq, Grok, OpenRouter, Cerebras) and
+    custom self-hosted endpoints. Use get_backend() to construct for named providers.
+    """
 
-    def __init__(self, model: str = "gpt-4o"):
+    def __init__(
+        self,
+        model: str = "gpt-4o",
+        base_url: str | None = None,
+        api_key_env: str = "OPENAI_API_KEY",
+        has_whisper: bool = True,
+    ):
         try:
             from openai import OpenAI
         except ImportError:
             raise ImportError(
                 "OpenAI backend requires openai. Install with: pip install eyeroll[openai]"
             )
-        api_key = os.environ.get("OPENAI_API_KEY")
+        # Try the provider-specific env var first, then fall back to OPENAI_API_KEY
+        api_key = os.environ.get(api_key_env) or os.environ.get("OPENAI_API_KEY")
         if not api_key:
             raise AnalysisError(
-                "No OpenAI API key found.\n\n"
-                "Get a key at: https://platform.openai.com/api-keys\n"
-                "Then: export OPENAI_API_KEY=your-key\n\n"
+                f"No API key found. Set {api_key_env} and try again.\n\n"
                 "Or use a different backend:\n"
                 "  eyeroll watch <source> --backend gemini\n"
                 "  eyeroll watch <source> --backend ollama"
             )
-        self._client = OpenAI(api_key=api_key)
+        self._client = OpenAI(api_key=api_key, base_url=base_url)
         self._model = model
+        self._has_whisper = has_whisper  # only OpenAI's endpoint has Whisper
 
     def analyze_image(self, image_path: str, prompt: str, verbose: bool = False) -> str:
         with open(image_path, "rb") as f:
@@ -240,11 +265,16 @@ def analyze_image(self, image_path: str, prompt: str, verbose: bool = False) ->
 
     def analyze_video(self, video_path: str, prompt: str, verbose: bool = False) -> str:
         raise AnalysisError(
-            "OpenAI does not support direct video upload. "
+            "OpenAI-compatible backends do not support direct video upload. "
             "Use frame-by-frame mode instead."
         )
 
     def analyze_audio(self, audio_path: str, prompt: str, verbose: bool = False) -> str:
+        if not self._has_whisper:
+            raise AnalysisError(
+                f"Audio transcription is not supported for this provider. "
+                "Only the OpenAI backend has the Whisper endpoint."
+            )
         with open(audio_path, "rb") as f:
             transcript = self._client.audio.transcriptions.create(
                 model="whisper-1",
@@ -265,7 +295,7 @@ def supports_video(self) -> bool:
 
     @property
     def supports_audio(self) -> bool:
-        return True
+        return self._has_whisper
 
 
 # ---------------------------------------------------------------------------
@@ -450,8 +480,10 @@ def get_backend(name: str | None = None, **kwargs) -> Backend:
     """Get or create the active backend.
 
     Args:
-        name: 'gemini', 'openai', or 'ollama'. Defaults to EYEROLL_BACKEND env var, then 'gemini'.
-        **kwargs: passed to backend constructor (e.g., model, host).
+        name: Backend name. One of: 'gemini', 'openai', 'ollama', 'openrouter', 'groq',
+              'grok', 'cerebras', 'openai-compat'.
+              Defaults to EYEROLL_BACKEND env var, then 'gemini'.
+        **kwargs: Passed to backend constructor (e.g., model, host, base_url).
     """
     global _current_backend
     if _current_backend is not None:
@@ -466,8 +498,23 @@ def get_backend(name: str | None = None, **kwargs) -> Backend:
         _current_backend = OpenAIBackend(**kwargs)
     elif name == "ollama":
         _current_backend = OllamaBackend(**kwargs)
+    elif name in _OPENAI_COMPAT_PROVIDERS:
+        url, key_env, default_model, has_whisper = _OPENAI_COMPAT_PROVIDERS[name]
+        _current_backend = OpenAIBackend(
+            model=kwargs.get("model", default_model),
+            base_url=url,
+            api_key_env=key_env,
+            has_whisper=has_whisper,
+        )
+    elif name == "openai-compat":
+        # Requires base_url in kwargs; model is optional
+        _current_backend = OpenAIBackend(has_whisper=False, **kwargs)
     else:
-        raise ValueError(f"Unknown backend: {name}. Use 'gemini', 'openai', or 'ollama'.")
+        raise ValueError(
+            f"Unknown backend: {name}. "
+            "Use 'gemini', 'openai', 'ollama', 'openrouter', 'groq', 'grok', "
+            "'cerebras', or 'openai-compat'."
+        )
 
     return _current_backend
 

diff --git a/eyeroll/cli.py b/eyeroll/cli.py
@@ -142,8 +142,12 @@ def _validate_openai(api_key: str) -> None:
               help="Additional context (Slack message, issue description, what to do with the video, etc.)")
 @click.option("--max-frames", default=20, show_default=True,
               help="Maximum key frames to analyze from video.")
-@click.option("--backend", "-b", type=click.Choice(["gemini", "openai", "ollama"]), default=None,
+@click.option("--backend", "-b",
+              type=click.Choice(["gemini", "openai", "ollama", "openrouter", "groq", "grok", "cerebras", "openai-compat"]),
+              default=None,
               help="Vision backend. Defaults to EYEROLL_BACKEND env var, then gemini.")
+@click.option("--base-url", default=None,
+              help="Base URL for --backend openai-compat (e.g. https://my-server/v1).")
 @click.option("--model", "-m", default=None,
               help="Model override (e.g., qwen3-vl:8b for ollama, gemini-2.0-flash for gemini).")
 @click.option("--codebase-context", "-cc", default=None,
@@ -154,23 +158,30 @@ def _validate_openai(api_key: str) -> None:
 @click.option("--output", "-o", default=None,
               help="Write output to file instead of stdout.")
 @click.option("--verbose", "-v", is_flag=True, help="Show progress details.")
-def watch(source, context, codebase_context, max_frames, backend, model, parallel, no_cache, output, verbose):
+def watch(source, context, codebase_context, max_frames, backend, model, parallel, no_cache, output, verbose, base_url):
     """Analyze a video/screenshot and produce structured notes.
 
     SOURCE can be a URL (YouTube, Loom, etc.) or a local file path.
 
     \b
     Backends:
-      gemini   Google Gemini Flash API (default, requires GEMINI_API_KEY)
-      openai   OpenAI GPT-4o (requires OPENAI_API_KEY)
-      ollama   Local models via Ollama (e.g., qwen3-vl, no API key needed)
+      gemini       Google Gemini Flash API (default, requires GEMINI_API_KEY)
+      openai       OpenAI GPT-4o (requires OPENAI_API_KEY)
+      ollama       Local models via Ollama (e.g., qwen3-vl, no API key needed)
+      openrouter   OpenRouter API (requires OPENROUTER_API_KEY)
+      groq         Groq API (requires GROQ_API_KEY)
+      grok         xAI Grok API (requires GROK_API_KEY)
+      cerebras     Cerebras API (requires CEREBRAS_API_KEY)
+      openai-compat  Any OpenAI-compatible endpoint (requires --base-url)
 
     \b
     Examples:
       eyeroll watch https://loom.com/share/abc123
       eyeroll watch ./recording.mp4 --context "checkout broken after PR #432"
       eyeroll watch demo.mp4 -c "create a skill from this" --backend ollama
       eyeroll watch screenshot.png -b ollama -m qwen3-vl:2b
+      eyeroll watch video.mp4 --backend groq
+      eyeroll watch video.mp4 --backend openai-compat --base-url https://my-server/v1
     """
     from .watch import watch as run_watch
 
@@ -181,6 +192,10 @@ def watch(source, context, codebase_context, max_frames, backend, model, paralle
         elif not model.startswith("gemini"):
             backend = "ollama"
 
+    # --base-url implies openai-compat if no backend specified
+    if base_url and not backend:
+        backend = "openai-compat"
+
     # Default parallel workers: 3 for API backends (separate servers), 1 for ollama (single GPU)
     if parallel is None:
         effective_backend = backend or os.environ.get("EYEROLL_BACKEND", "gemini")
@@ -199,6 +214,7 @@ def watch(source, context, codebase_context, max_frames, backend, model, paralle
             max_frames=max_frames,
             backend_name=backend,
             model=model,
+            base_url=base_url,
             verbose=verbose,
             no_cache=no_cache,
             parallel=parallel,

diff --git a/eyeroll/watch.py b/eyeroll/watch.py
@@ -36,6 +36,7 @@ def watch(
     max_frames: int = 20,
     backend_name: str | None = None,
     model: str | None = None,
+    base_url: str | None = None,
     verbose: bool = False,
     no_cache: bool = False,
     parallel: int = 1,
@@ -47,8 +48,10 @@ def watch(
         context: Optional text context (Slack message, issue body, etc.)
         codebase_context: Optional codebase context (project structure, stack, key files).
         max_frames: Maximum number of key frames to extract and analyze.
-        backend_name: 'gemini', 'openai', or 'ollama'. Defaults to EYEROLL_BACKEND env var, then 'gemini'.
+        backend_name: Backend name (e.g. 'gemini', 'openai', 'groq', 'openai-compat').
+                      Defaults to EYEROLL_BACKEND env var, then 'gemini'.
         model: Model override (e.g., 'qwen3-vl:8b' for ollama, 'gemini-2.0-flash' for gemini).
+        base_url: Base URL for openai-compat backend (e.g. https://my-server/v1).
         verbose: Print progress to stderr.
         no_cache: Skip cache lookup and force fresh analysis.
         parallel: Number of concurrent workers for frame analysis (default: 1 = sequential).
@@ -60,6 +63,8 @@ def watch(
     backend_kwargs = {}
     if model:
         backend_kwargs["model"] = model
+    if base_url:
+        backend_kwargs["base_url"] = base_url
 
     # Initialize backend early to fail fast on config errors
     backend = get_backend(backend_name, **backend_kwargs)

diff --git a/tests/test_backend.py b/tests/test_backend.py
@@ -248,7 +248,7 @@ def test_openai_no_api_key():
     env = {k: v for k, v in os.environ.items() if k != "OPENAI_API_KEY"}
     with patch.dict(os.environ, env, clear=True), \
          patch.dict("sys.modules", {"openai": mock_openai_mod}):
-        with pytest.raises(AnalysisError, match="No OpenAI API key found"):
+        with pytest.raises(AnalysisError, match="No API key found"):
             OpenAIBackend()
 
 
@@ -264,7 +264,7 @@ def test_openai_supports_audio():
 
 def test_openai_analyze_video_raises():
     backend = _make_openai()
-    with pytest.raises(AnalysisError, match="does not support direct video"):
+    with pytest.raises(AnalysisError, match="do not support direct video"):
         backend.analyze_video("/path/to/video.mp4", "describe")
 
 

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -36,7 +36,10 @@ def test_watch_local_file(runner, tmp_path):
 
     # The CLI does: from .watch import watch as run_watch
     # So we patch eyeroll.watch.watch
-    with patch("eyeroll.watch.watch", return_value=report) as mock_watch:
+    # Clear EYEROLL_BACKEND so the default (gemini) is used and parallel=3
+    env = {k: v for k, v in os.environ.items() if k != "EYEROLL_BACKEND"}
+    with patch("eyeroll.watch.watch", return_value=report) as mock_watch, \
+         patch.dict(os.environ, env, clear=True):
         result = runner.invoke(cli, ["watch", "/fake/video.mp4"])
 
     assert result.exit_code == 0
@@ -48,6 +51,7 @@ def test_watch_local_file(runner, tmp_path):
         max_frames=20,
         backend_name=None,
         model=None,
+        base_url=None,
         verbose=False,
         no_cache=False,
         parallel=3,  # default for API backends (gemini)