@@ -1064,6 +1064,7 @@ async def _init_wizard_async(dir: str):
10641064
10651065
10661066@main .command ()
1067+ @click .argument ("path" , required = False , default = None )
10671068@click .option (
10681069 "--pattern" ,
10691070 default = "*.yaml" ,
@@ -1139,6 +1140,7 @@ async def _init_wizard_async(dir: str):
11391140 help = "Generate HTML report to specified path" ,
11401141)
11411142def run (
1143+ path : Optional [str ],
11421144 pattern : str ,
11431145 test : tuple ,
11441146 filter : str ,
@@ -1154,14 +1156,19 @@ def run(
11541156 watch : bool ,
11551157 html_report : str ,
11561158):
1157- """Run test cases against the agent."""
1159+ """Run test cases against the agent.
1160+
1161+ PATH can be a directory containing test cases (e.g., examples/anthropic)
1162+ or a specific test file (e.g., examples/anthropic/test-case.yaml).
1163+ """
11581164 asyncio .run (_run_async (
1159- pattern , test , filter , output , verbose , track , compare_baseline , debug ,
1165+ path , pattern , test , filter , output , verbose , track , compare_baseline , debug ,
11601166 sequential , max_workers , max_retries , retry_delay , watch , html_report
11611167 ))
11621168
11631169
11641170async def _run_async (
1171+ path : Optional [str ],
11651172 pattern : str ,
11661173 test : tuple ,
11671174 filter : str ,
@@ -1185,6 +1192,13 @@ async def _run_async(
11851192 from evalview .core .retry import RetryConfig , with_retry
11861193 from evalview .core .config import ScoringWeights
11871194
1195+ # Load environment variables from path directory if provided
1196+ if path :
1197+ target_dir = Path (path ) if Path (path ).is_dir () else Path (path ).parent
1198+ path_env = target_dir / ".env.local"
1199+ if path_env .exists ():
1200+ load_dotenv (dotenv_path = str (path_env ), override = True )
1201+
11881202 # Interactive provider selection for LLM-as-judge
11891203 result = get_or_select_provider (console )
11901204 if result is None :
@@ -1235,10 +1249,25 @@ async def _run_async(
12351249
12361250 console .print ("[blue]Running test cases...[/blue]\n " )
12371251
1238- # Load config
1239- config_path = Path (".evalview/config.yaml" )
1252+ # Load config - check path directory first, then current directory
1253+ config_path = None
1254+ if path :
1255+ # Check for config in the provided path directory
1256+ target_dir = Path (path ) if Path (path ).is_dir () else Path (path ).parent
1257+ path_config = target_dir / ".evalview" / "config.yaml"
1258+ if path_config .exists ():
1259+ config_path = path_config
1260+ if verbose :
1261+ console .print (f"[dim]📂 Using config from: { path_config } [/dim]" )
1262+
1263+ # Fall back to current directory config
1264+ if config_path is None :
1265+ config_path = Path (".evalview/config.yaml" )
1266+
12401267 if not config_path .exists ():
12411268 console .print ("[red]❌ Config file not found. Run 'evalview init' first.[/red]" )
1269+ if path :
1270+ console .print (f"[dim]Looked in: { Path (path ) / '.evalview/config.yaml' } and .evalview/config.yaml[/dim]" )
12421271 return
12431272
12441273 with open (config_path ) as f :
@@ -1357,9 +1386,30 @@ async def _run_async(
13571386 tracker = RegressionTracker ()
13581387
13591388 # Load test cases
1389+ # Priority: 1. path argument, 2. pattern option, 3. default tests/test-cases/
1390+
1391+ # Check if path argument is provided (e.g., evalview run examples/anthropic)
1392+ if path :
1393+ target_path = Path (path )
1394+ if target_path .exists () and target_path .is_file ():
1395+ # Load single file directly
1396+ try :
1397+ test_cases = [TestCaseLoader .load_from_file (target_path )]
1398+ if verbose :
1399+ console .print (f"[dim]📄 Loading test case from: { path } [/dim]\n " )
1400+ except Exception as e :
1401+ console .print (f"[red]❌ Failed to load test case: { e } [/red]" )
1402+ return
1403+ elif target_path .exists () and target_path .is_dir ():
1404+ # Load all YAML files from specified directory
1405+ test_cases = TestCaseLoader .load_from_directory (target_path , "*.yaml" )
1406+ if verbose :
1407+ console .print (f"[dim]📁 Loading test cases from: { path } [/dim]\n " )
1408+ else :
1409+ console .print (f"[red]❌ Path not found: { path } [/red]" )
1410+ return
13601411 # Check if pattern is a direct file path
1361- pattern_path = Path (pattern )
1362- if pattern_path .exists () and pattern_path .is_file ():
1412+ elif (pattern_path := Path (pattern )).exists () and pattern_path .is_file ():
13631413 # Load single file directly
13641414 try :
13651415 test_cases = [TestCaseLoader .load_from_file (pattern_path )]
@@ -1378,8 +1428,9 @@ async def _run_async(
13781428 test_cases_dir = Path ("tests/test-cases" )
13791429 if not test_cases_dir .exists ():
13801430 console .print ("[red]❌ Test cases directory not found: tests/test-cases[/red]" )
1381- console .print ("[dim]Tip: You can also specify a file path directly:[/dim]" )
1382- console .print ("[dim] evalview run --pattern path/to/test-case.yaml[/dim]" )
1431+ console .print ("[dim]Tip: You can specify a path or file directly:[/dim]" )
1432+ console .print ("[dim] evalview run examples/anthropic[/dim]" )
1433+ console .print ("[dim] evalview run path/to/test-case.yaml[/dim]" )
13831434 return
13841435 test_cases = TestCaseLoader .load_from_directory (test_cases_dir , pattern )
13851436
@@ -1610,8 +1661,13 @@ async def execute_single_test(test_case):
16101661 """Execute a single test case with optional retry logic."""
16111662 test_adapter = get_adapter_for_test (test_case )
16121663
1664+ # Merge test case tools into context for adapters that support them
1665+ context = dict (test_case .input .context ) if test_case .input .context else {}
1666+ if hasattr (test_case , 'tools' ) and test_case .tools :
1667+ context ['tools' ] = test_case .tools
1668+
16131669 async def _execute ():
1614- return await test_adapter .execute (test_case .input .query , test_case . input . context )
1670+ return await test_adapter .execute (test_case .input .query , context )
16151671
16161672 # Execute with retry if configured
16171673 if retry_config .max_retries > 0 :
0 commit comments