Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,21 @@ GEMINI_API_KEY=your-api-key-here
# Ollama backend (local, no API key needed)
# EYEROLL_BACKEND=ollama
# OLLAMA_HOST=http://localhost:11434

# OpenAI backend
# OPENAI_API_KEY=sk-...

# OpenRouter backend (--backend openrouter)
# OPENROUTER_API_KEY=sk-or-...

# Groq backend (--backend groq)
# GROQ_API_KEY=gsk_...

# Grok / xAI backend (--backend grok)
# GROK_API_KEY=xai-...

# Cerebras backend (--backend cerebras)
# CEREBRAS_API_KEY=...

# Gemini service account backend (--backend gemini-sa)
# GOOGLE_APPLICATION_CREDENTIALS=/path/to/credentials.json
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ eyeroll is a Claude Code plugin that analyzes screen recordings, Loom videos, Yo

# Install the CLI
pip install eyeroll[gemini] # Gemini Flash API (recommended)
pip install eyeroll[openai] # OpenAI GPT-4o
pip install eyeroll[openai] # OpenAI GPT-4o + OpenRouter/Groq/Grok/Cerebras
pip install eyeroll # Ollama only (local, no API key)
pip install eyeroll[all] # everything
```
Expand Down Expand Up @@ -65,6 +65,9 @@ eyeroll watch https://loom.com/share/abc123
eyeroll watch ./bug.mp4 --context "checkout broken after PR #432"
eyeroll watch ./bug.mp4 -cc .eyeroll/context.md --parallel 4
eyeroll watch ./bug.mp4 --backend ollama -m qwen3-vl:2b
eyeroll watch ./bug.mp4 --backend groq
eyeroll watch ./bug.mp4 --backend openrouter -m anthropic/claude-3.5-sonnet
eyeroll watch ./bug.mp4 --backend openai-compat --base-url https://my-server/v1
eyeroll history
```

Expand Down Expand Up @@ -100,6 +103,11 @@ eyeroll history
| **gemini** | Direct upload | Yes | GEMINI_API_KEY | ~$0.15 | Best quality |
| **openai** | Frame-by-frame | Whisper | OPENAI_API_KEY | ~$0.20 | Existing OpenAI users |
| **ollama** | Frame-by-frame | No | None | Free | Privacy, offline |
| **openrouter** | Frame-by-frame | Yes | OPENROUTER_API_KEY | varies | Model variety |
| **groq** | Frame-by-frame | Yes | GROQ_API_KEY | cheap | Low latency |
| **grok** | Frame-by-frame | Yes | GROK_API_KEY | varies | xAI models |
| **cerebras** | Frame-by-frame | Yes | CEREBRAS_API_KEY | cheap | Fast inference |
| **openai-compat** | Frame-by-frame | No | any env var | varies | Custom/self-hosted endpoints |

Ollama auto-installs if not found (macOS/Linux).

Expand Down
73 changes: 60 additions & 13 deletions eyeroll/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
- gemini: Google Gemini Flash API (requires GEMINI_API_KEY)
- openai: OpenAI GPT-4o API (requires OPENAI_API_KEY)
- ollama: Local Ollama with vision models like qwen3-vl (no API key needed)
- openrouter: OpenRouter API (requires OPENROUTER_API_KEY)
- groq: Groq API (requires GROQ_API_KEY)
- grok: xAI Grok API (requires GROK_API_KEY)
- cerebras: Cerebras API (requires CEREBRAS_API_KEY)
- openai-compat: Any OpenAI-compatible endpoint (requires base_url + API key)
"""

import base64
Expand Down Expand Up @@ -194,31 +199,51 @@ def supports_audio(self) -> bool:


# ---------------------------------------------------------------------------
# OpenAI Backend
# OpenAI Backend (also handles OpenAI-compatible providers)
# ---------------------------------------------------------------------------

# Maps provider name -> (base_url, api_key_env, default_model, has_whisper)
# has_whisper=False: the Whisper transcription endpoint only exists on OpenAI proper.
_OPENAI_COMPAT_PROVIDERS = {
"openrouter": ("https://openrouter.ai/api/v1", "OPENROUTER_API_KEY", "openai/gpt-4o", False),
"groq": ("https://api.groq.com/openai/v1", "GROQ_API_KEY", "llama-3.3-70b-versatile", False),
"grok": ("https://api.x.ai/v1", "GROK_API_KEY", "grok-2-vision-1212", False),
"cerebras": ("https://api.cerebras.ai/v1", "CEREBRAS_API_KEY", "llama3.1-70b", False),
}


class OpenAIBackend(Backend):
"""OpenAI GPT-4o API backend."""
"""OpenAI GPT-4o API backend.

Also handles OpenAI-compatible providers (Groq, Grok, OpenRouter, Cerebras) and
custom self-hosted endpoints. Use get_backend() to construct for named providers.
"""

def __init__(self, model: str = "gpt-4o"):
def __init__(
self,
model: str = "gpt-4o",
base_url: str | None = None,
api_key_env: str = "OPENAI_API_KEY",
has_whisper: bool = True,
):
try:
from openai import OpenAI
except ImportError:
raise ImportError(
"OpenAI backend requires openai. Install with: pip install eyeroll[openai]"
)
api_key = os.environ.get("OPENAI_API_KEY")
# Try the provider-specific env var first, then fall back to OPENAI_API_KEY
api_key = os.environ.get(api_key_env) or os.environ.get("OPENAI_API_KEY")
if not api_key:
raise AnalysisError(
"No OpenAI API key found.\n\n"
"Get a key at: https://platform.openai.com/api-keys\n"
"Then: export OPENAI_API_KEY=your-key\n\n"
f"No API key found. Set {api_key_env} and try again.\n\n"
"Or use a different backend:\n"
" eyeroll watch <source> --backend gemini\n"
" eyeroll watch <source> --backend ollama"
)
self._client = OpenAI(api_key=api_key)
self._client = OpenAI(api_key=api_key, base_url=base_url)
self._model = model
self._has_whisper = has_whisper # only OpenAI's endpoint has Whisper

def analyze_image(self, image_path: str, prompt: str, verbose: bool = False) -> str:
with open(image_path, "rb") as f:
Expand All @@ -240,11 +265,16 @@ def analyze_image(self, image_path: str, prompt: str, verbose: bool = False) ->

def analyze_video(self, video_path: str, prompt: str, verbose: bool = False) -> str:
raise AnalysisError(
"OpenAI does not support direct video upload. "
"OpenAI-compatible backends do not support direct video upload. "
"Use frame-by-frame mode instead."
)

def analyze_audio(self, audio_path: str, prompt: str, verbose: bool = False) -> str:
if not self._has_whisper:
raise AnalysisError(
f"Audio transcription is not supported for this provider. "
"Only the OpenAI backend has the Whisper endpoint."
)
with open(audio_path, "rb") as f:
transcript = self._client.audio.transcriptions.create(
model="whisper-1",
Expand All @@ -265,7 +295,7 @@ def supports_video(self) -> bool:

@property
def supports_audio(self) -> bool:
return True
return self._has_whisper


# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -450,8 +480,10 @@ def get_backend(name: str | None = None, **kwargs) -> Backend:
"""Get or create the active backend.

Args:
name: 'gemini', 'openai', or 'ollama'. Defaults to EYEROLL_BACKEND env var, then 'gemini'.
**kwargs: passed to backend constructor (e.g., model, host).
name: Backend name. One of: 'gemini', 'openai', 'ollama', 'openrouter', 'groq',
'grok', 'cerebras', 'openai-compat'.
Defaults to EYEROLL_BACKEND env var, then 'gemini'.
**kwargs: Passed to backend constructor (e.g., model, host, base_url).
"""
global _current_backend
if _current_backend is not None:
Expand All @@ -466,8 +498,23 @@ def get_backend(name: str | None = None, **kwargs) -> Backend:
_current_backend = OpenAIBackend(**kwargs)
elif name == "ollama":
_current_backend = OllamaBackend(**kwargs)
elif name in _OPENAI_COMPAT_PROVIDERS:
url, key_env, default_model, has_whisper = _OPENAI_COMPAT_PROVIDERS[name]
_current_backend = OpenAIBackend(
model=kwargs.get("model", default_model),
base_url=url,
api_key_env=key_env,
has_whisper=has_whisper,
)
elif name == "openai-compat":
# Requires base_url in kwargs; model is optional
_current_backend = OpenAIBackend(has_whisper=False, **kwargs)
else:
raise ValueError(f"Unknown backend: {name}. Use 'gemini', 'openai', or 'ollama'.")
raise ValueError(
f"Unknown backend: {name}. "
"Use 'gemini', 'openai', 'ollama', 'openrouter', 'groq', 'grok', "
"'cerebras', or 'openai-compat'."
)

return _current_backend

Expand Down
26 changes: 21 additions & 5 deletions eyeroll/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,12 @@ def _validate_openai(api_key: str) -> None:
help="Additional context (Slack message, issue description, what to do with the video, etc.)")
@click.option("--max-frames", default=20, show_default=True,
help="Maximum key frames to analyze from video.")
@click.option("--backend", "-b", type=click.Choice(["gemini", "openai", "ollama"]), default=None,
@click.option("--backend", "-b",
type=click.Choice(["gemini", "openai", "ollama", "openrouter", "groq", "grok", "cerebras", "openai-compat"]),
default=None,
help="Vision backend. Defaults to EYEROLL_BACKEND env var, then gemini.")
@click.option("--base-url", default=None,
help="Base URL for --backend openai-compat (e.g. https://my-server/v1).")
@click.option("--model", "-m", default=None,
help="Model override (e.g., qwen3-vl:8b for ollama, gemini-2.0-flash for gemini).")
@click.option("--codebase-context", "-cc", default=None,
Expand All @@ -154,23 +158,30 @@ def _validate_openai(api_key: str) -> None:
@click.option("--output", "-o", default=None,
help="Write output to file instead of stdout.")
@click.option("--verbose", "-v", is_flag=True, help="Show progress details.")
def watch(source, context, codebase_context, max_frames, backend, model, parallel, no_cache, output, verbose):
def watch(source, context, codebase_context, max_frames, backend, model, parallel, no_cache, output, verbose, base_url):
"""Analyze a video/screenshot and produce structured notes.

SOURCE can be a URL (YouTube, Loom, etc.) or a local file path.

\b
Backends:
gemini Google Gemini Flash API (default, requires GEMINI_API_KEY)
openai OpenAI GPT-4o (requires OPENAI_API_KEY)
ollama Local models via Ollama (e.g., qwen3-vl, no API key needed)
gemini Google Gemini Flash API (default, requires GEMINI_API_KEY)
openai OpenAI GPT-4o (requires OPENAI_API_KEY)
ollama Local models via Ollama (e.g., qwen3-vl, no API key needed)
openrouter OpenRouter API (requires OPENROUTER_API_KEY)
groq Groq API (requires GROQ_API_KEY)
grok xAI Grok API (requires GROK_API_KEY)
cerebras Cerebras API (requires CEREBRAS_API_KEY)
openai-compat Any OpenAI-compatible endpoint (requires --base-url)

\b
Examples:
eyeroll watch https://loom.com/share/abc123
eyeroll watch ./recording.mp4 --context "checkout broken after PR #432"
eyeroll watch demo.mp4 -c "create a skill from this" --backend ollama
eyeroll watch screenshot.png -b ollama -m qwen3-vl:2b
eyeroll watch video.mp4 --backend groq
eyeroll watch video.mp4 --backend openai-compat --base-url https://my-server/v1
"""
from .watch import watch as run_watch

Expand All @@ -181,6 +192,10 @@ def watch(source, context, codebase_context, max_frames, backend, model, paralle
elif not model.startswith("gemini"):
backend = "ollama"

# --base-url implies openai-compat if no backend specified
if base_url and not backend:
backend = "openai-compat"

# Default parallel workers: 3 for API backends (separate servers), 1 for ollama (single GPU)
if parallel is None:
effective_backend = backend or os.environ.get("EYEROLL_BACKEND", "gemini")
Expand All @@ -199,6 +214,7 @@ def watch(source, context, codebase_context, max_frames, backend, model, paralle
max_frames=max_frames,
backend_name=backend,
model=model,
base_url=base_url,
verbose=verbose,
no_cache=no_cache,
parallel=parallel,
Expand Down
7 changes: 6 additions & 1 deletion eyeroll/watch.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def watch(
max_frames: int = 20,
backend_name: str | None = None,
model: str | None = None,
base_url: str | None = None,
verbose: bool = False,
no_cache: bool = False,
parallel: int = 1,
Expand All @@ -47,8 +48,10 @@ def watch(
context: Optional text context (Slack message, issue body, etc.)
codebase_context: Optional codebase context (project structure, stack, key files).
max_frames: Maximum number of key frames to extract and analyze.
backend_name: 'gemini', 'openai', or 'ollama'. Defaults to EYEROLL_BACKEND env var, then 'gemini'.
backend_name: Backend name (e.g. 'gemini', 'openai', 'groq', 'openai-compat').
Defaults to EYEROLL_BACKEND env var, then 'gemini'.
model: Model override (e.g., 'qwen3-vl:8b' for ollama, 'gemini-2.0-flash' for gemini).
base_url: Base URL for openai-compat backend (e.g. https://my-server/v1).
verbose: Print progress to stderr.
no_cache: Skip cache lookup and force fresh analysis.
parallel: Number of concurrent workers for frame analysis (default: 1 = sequential).
Expand All @@ -60,6 +63,8 @@ def watch(
backend_kwargs = {}
if model:
backend_kwargs["model"] = model
if base_url:
backend_kwargs["base_url"] = base_url

# Initialize backend early to fail fast on config errors
backend = get_backend(backend_name, **backend_kwargs)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def test_openai_no_api_key():
env = {k: v for k, v in os.environ.items() if k != "OPENAI_API_KEY"}
with patch.dict(os.environ, env, clear=True), \
patch.dict("sys.modules", {"openai": mock_openai_mod}):
with pytest.raises(AnalysisError, match="No OpenAI API key found"):
with pytest.raises(AnalysisError, match="No API key found"):
OpenAIBackend()


Expand All @@ -264,7 +264,7 @@ def test_openai_supports_audio():

def test_openai_analyze_video_raises():
backend = _make_openai()
with pytest.raises(AnalysisError, match="does not support direct video"):
with pytest.raises(AnalysisError, match="do not support direct video"):
backend.analyze_video("/path/to/video.mp4", "describe")


Expand Down
6 changes: 5 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ def test_watch_local_file(runner, tmp_path):

# The CLI does: from .watch import watch as run_watch
# So we patch eyeroll.watch.watch
with patch("eyeroll.watch.watch", return_value=report) as mock_watch:
# Clear EYEROLL_BACKEND so the default (gemini) is used and parallel=3
env = {k: v for k, v in os.environ.items() if k != "EYEROLL_BACKEND"}
with patch("eyeroll.watch.watch", return_value=report) as mock_watch, \
patch.dict(os.environ, env, clear=True):
result = runner.invoke(cli, ["watch", "/fake/video.mp4"])

assert result.exit_code == 0
Expand All @@ -48,6 +51,7 @@ def test_watch_local_file(runner, tmp_path):
max_frames=20,
backend_name=None,
model=None,
base_url=None,
verbose=False,
no_cache=False,
parallel=3, # default for API backends (gemini)
Expand Down
Loading