@@ -344,6 +344,7 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
344
344
MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
345
345
MODEL_PATH = f"{ llm_models_root ()} /llama-3.1-model/Llama-3.1-8B-Instruct"
346
346
347
+ @pytest .mark .skip_less_device (2 )
347
348
@pytest .mark .skip_less_device_memory (32000 )
348
349
@pytest .mark .parametrize ("disable_overlap_scheduler" , [False , True ])
349
350
def test_auto_dtype (self , disable_overlap_scheduler ):
@@ -374,6 +375,7 @@ def test_auto_dtype(self, disable_overlap_scheduler):
374
375
task = GSM8K (self .MODEL_NAME )
375
376
task .evaluate (llm )
376
377
378
+ @pytest .mark .skip_less_device (2 )
377
379
def test_ngram (self ):
378
380
speculative_decoding_config = {
379
381
"decoding_type" : "NGram" ,
@@ -421,6 +423,7 @@ def test_ngram(self):
421
423
task = GSM8K (self .MODEL_NAME )
422
424
task .evaluate (llm )
423
425
426
+ @pytest .mark .skip_less_device (2 )
424
427
@parametrize_with_ids ("overlap_scheduler" , [True , False ])
425
428
@parametrize_with_ids ("eagle3_one_model" , [True , False ])
426
429
def test_eagle3 (self , overlap_scheduler , eagle3_one_model ):
@@ -479,6 +482,7 @@ def test_eagle3(self, overlap_scheduler, eagle3_one_model):
479
482
task = GSM8K (self .MODEL_NAME )
480
483
task .evaluate (llm )
481
484
485
+ @pytest .mark .skip_less_device (2 )
482
486
@pytest .mark .skip_less_device_memory (32000 )
483
487
@pytest .mark .parametrize ("backend" , ["xgrammar" , "llguidance" ])
484
488
def test_guided_decoding (self , backend : str , mocker ):
0 commit comments