1616
1717from evalview .core .loader import TestCaseLoader
1818from evalview .core .pricing import get_model_pricing_info
19+ from evalview .core .llm_provider import (
20+ detect_available_providers ,
21+ get_missing_provider_message ,
22+ get_provider_status ,
23+ get_or_select_provider ,
24+ save_provider_preference ,
25+ PROVIDER_CONFIGS ,
26+ LLMProvider ,
27+ )
1928from evalview .adapters .http_adapter import HTTPAdapter
2029from evalview .adapters .tapescope_adapter import TapeScopeAdapter
2130from evalview .adapters .langgraph_adapter import LangGraphAdapter
@@ -364,8 +373,9 @@ async def health():
364373 console .print ("\n [bold]1. Start the demo agent:[/bold]" )
365374 console .print (" [cyan]pip install fastapi uvicorn[/cyan]" )
366375 console .print (" [cyan]python demo-agent/agent.py[/cyan]" )
367- console .print ("\n [bold]2. In another terminal, run tests:[/bold]" )
368- console .print (" [cyan]export OPENAI_API_KEY='your-key-here'[/cyan]" )
376+ console .print ("\n [bold]2. In another terminal, set an API key (any one):[/bold]" )
377+ console .print (" [cyan]export ANTHROPIC_API_KEY='your-key'[/cyan] [dim]# or OPENAI_API_KEY, GEMINI_API_KEY, XAI_API_KEY[/dim]" )
378+ console .print ("\n [bold]3. Run tests:[/bold]" )
369379 console .print (" [cyan]evalview run[/cyan]" )
370380 console .print ("\n [dim]The demo agent runs on http://localhost:8000[/dim]" )
371381 console .print ("[dim]Edit tests/test-cases/example.yaml to add more tests[/dim]\n " )
@@ -494,11 +504,13 @@ def quickstart():
494504 else :
495505 console .print ("[bold]Step 3/4:[/bold] Config already exists\n " )
496506
497- # Check for OPENAI_API_KEY
498- if not os .getenv ("OPENAI_API_KEY" ):
499- console .print ("[yellow]⚠️ OPENAI_API_KEY not set[/yellow]" )
500- console .print ("\n To complete the quickstart, set your OpenAI API key:" )
501- console .print (" [cyan]export OPENAI_API_KEY='your-key-here'[/cyan]\n " )
507+ # Check for any LLM provider API key
508+ available_providers = detect_available_providers ()
509+ if not available_providers :
510+ console .print ("[yellow]⚠️ No LLM provider API key set[/yellow]" )
511+ console .print ("\n To complete the quickstart, set at least one API key:" )
512+ console .print (" [cyan]export ANTHROPIC_API_KEY='your-key'[/cyan] [dim]# recommended[/dim]" )
513+ console .print (" [dim]# or: OPENAI_API_KEY, GEMINI_API_KEY, XAI_API_KEY[/dim]\n " )
502514 console .print ("Then run this command again.\n " )
503515 return
504516
@@ -1173,18 +1185,21 @@ async def _run_async(
11731185 from evalview .core .retry import RetryConfig , with_retry
11741186 from evalview .core .config import ScoringWeights
11751187
1176- # Validate OPENAI_API_KEY upfront (required for LLM-as-judge evaluation)
1177- openai_api_key = os .getenv ("OPENAI_API_KEY" )
1178- if not openai_api_key :
1179- console .print ("\n [red bold]❌ Error: OPENAI_API_KEY is required[/red bold]\n " )
1180- console .print ("EvalView uses LLM-as-judge to evaluate output quality." )
1181- console .print ("Please set your OpenAI API key:\n " )
1182- console .print (" [cyan]export OPENAI_API_KEY='your-key-here'[/cyan]" )
1183- console .print ("\n Or add it to your .env file:" )
1184- console .print (" [cyan]echo 'OPENAI_API_KEY=your-key-here' >> .env[/cyan]\n " )
1185- console .print ("[dim]Get your API key at: https://platform.openai.com/api-keys[/dim]" )
1188+ # Interactive provider selection for LLM-as-judge
1189+ result = get_or_select_provider (console )
1190+ if result is None :
11861191 return
11871192
1193+ selected_provider , selected_api_key = result
1194+
1195+ # Save preference for future runs
1196+ save_provider_preference (selected_provider )
1197+
1198+ # Set environment variable for the evaluators to use
1199+ config_for_provider = PROVIDER_CONFIGS [selected_provider ]
1200+ os .environ ["EVAL_PROVIDER" ] = selected_provider .value
1201+ os .environ [config_for_provider .env_var ] = selected_api_key
1202+
11881203 if debug :
11891204 console .print ("[dim]🐛 Debug mode enabled - will show raw responses[/dim]\n " )
11901205 verbose = True # Debug implies verbose
@@ -1229,14 +1244,17 @@ async def _run_async(
12291244 with open (config_path ) as f :
12301245 config = yaml .safe_load (f )
12311246
1232- # Extract model config
1247+ # Extract model config (can be string or dict)
12331248 model_config = config .get ("model" , {})
12341249 if verbose and model_config :
1235- console .print (f"[dim]💰 Model: { model_config .get ('name' , 'gpt-5-mini' )} [/dim]" )
1236- if "pricing" in model_config :
1237- console .print (
1238- f"[dim]💵 Custom pricing: ${ model_config ['pricing' ]['input_per_1m' ]:.2f} in, ${ model_config ['pricing' ]['output_per_1m' ]:.2f} out[/dim]"
1239- )
1250+ if isinstance (model_config , str ):
1251+ console .print (f"[dim]💰 Model: { model_config } [/dim]" )
1252+ elif isinstance (model_config , dict ):
1253+ console .print (f"[dim]💰 Model: { model_config .get ('name' , 'gpt-5-mini' )} [/dim]" )
1254+ if "pricing" in model_config :
1255+ console .print (
1256+ f"[dim]💵 Custom pricing: ${ model_config ['pricing' ]['input_per_1m' ]:.2f} in, ${ model_config ['pricing' ]['output_per_1m' ]:.2f} out[/dim]"
1257+ )
12401258
12411259 # SSRF protection config - defaults to True for local development
12421260 # Set to False in production when using untrusted test cases
@@ -1288,6 +1306,17 @@ async def _run_async(
12881306 model_config = model_config ,
12891307 allow_private_urls = allow_private_urls ,
12901308 )
1309+ elif adapter_type == "anthropic" :
1310+ # Anthropic Claude adapter for direct API testing
1311+ from evalview .adapters .anthropic_adapter import AnthropicAdapter
1312+ adapter = AnthropicAdapter (
1313+ model = config .get ("model" , "claude-sonnet-4-5-20250929" ),
1314+ tools = config .get ("tools" , []),
1315+ system_prompt = config .get ("system_prompt" ),
1316+ max_tokens = config .get ("max_tokens" , 4096 ),
1317+ timeout = config .get ("timeout" , 120.0 ),
1318+ verbose = verbose ,
1319+ )
12911320 else :
12921321 # HTTP adapter for standard REST APIs
12931322 adapter = HTTPAdapter (
@@ -1298,19 +1327,8 @@ async def _run_async(
12981327 allow_private_urls = allow_private_urls ,
12991328 )
13001329
1301- # Validate OPENAI_API_KEY is set (required for LLM-as-judge evaluation)
1302- openai_api_key = os .getenv ("OPENAI_API_KEY" )
1303- if not openai_api_key :
1304- console .print ("\n [red bold]❌ Error: OPENAI_API_KEY is required for evaluation[/red bold]\n " )
1305- console .print ("EvalView uses LLM-as-judge to evaluate agent output quality." )
1306- console .print ("Please set your OpenAI API key:\n " )
1307- console .print (" [cyan]export OPENAI_API_KEY='your-key-here'[/cyan]" )
1308- console .print ("\n Or add it to your .env file:" )
1309- console .print (" [cyan]echo 'OPENAI_API_KEY=your-key-here' >> .env[/cyan]\n " )
1310- console .print ("[dim]Get your API key at: https://platform.openai.com/api-keys[/dim]" )
1311- return
1312-
13131330 # Initialize evaluator with configurable weights
1331+ # (LLM provider is auto-detected by the OutputEvaluator)
13141332 scoring_weights = None
13151333 if "scoring" in config and "weights" in config ["scoring" ]:
13161334 try :
@@ -1321,7 +1339,6 @@ async def _run_async(
13211339 console .print (f"[yellow]⚠️ Invalid scoring weights in config: { e } . Using defaults.[/yellow]" )
13221340
13231341 evaluator = Evaluator (
1324- openai_api_key = openai_api_key ,
13251342 default_weights = scoring_weights ,
13261343 )
13271344
0 commit comments