Skip to content

Commit c4d1b6c

Browse files
committed
move test
Signed-off-by: fishbell <[email protected]>
1 parent 3bcdfc3 commit c4d1b6c

File tree

4 files changed

+65
-109
lines changed

4 files changed

+65
-109
lines changed

.github/workflows/linux.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ jobs:
534534
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }}
535535
timeout: 60
536536
- name: 'API tests'
537-
cmd: 'python -m pytest -v ./tests/python_tests/test_continuous_batching.py ./tests/python_tests/test_generation_config.py ./tests/python_tests/test_sampling.py ./tests/python_tests/test_text_streamer.py'
537+
cmd: 'python -m pytest -v ./tests/python_tests/test_continuous_batching.py -k "not eagle3" ./tests/python_tests/test_generation_config.py ./tests/python_tests/test_sampling.py ./tests/python_tests/test_text_streamer.py'
538538
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }}
539539
timeout: 60
540540
- name: 'Rag tests'
@@ -554,7 +554,7 @@ jobs:
554554
- name: 'EAGLE3 speculative decoding tests'
555555
cmd: |
556556
python -m pip install git+https://github.com/xufang-lisa/optimum-intel.git@e67abb1a20fb190b39c1dc0216cddb65b300210f
557-
python -m pytest -v ./tests/python_tests/test_eagle3.py'
557+
python -m pytest -v ./tests/python_tests/test_continuous_batching.py -k "eagle3"
558558
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).eagle3_speculative_decoding.test }}
559559
timeout: 90
560560
defaults:

.github/workflows/windows.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,7 @@ jobs:
635635
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }}
636636
timeout: 60
637637
- name: 'API tests'
638-
cmd: 'python -m pytest -s -v tests/python_tests/test_continuous_batching.py tests/python_tests/test_generation_config.py tests/python_tests/test_sampling.py tests/python_tests/test_text_streamer.py'
638+
cmd: 'python -m pytest -s -v tests/python_tests/test_continuous_batching.py -k "not eagle3" tests/python_tests/test_generation_config.py tests/python_tests/test_sampling.py tests/python_tests/test_text_streamer.py'
639639
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }}
640640
timeout: 60
641641
- name: 'Rag tests'
@@ -655,7 +655,7 @@ jobs:
655655
- name: 'EAGLE3 speculative decoding tests'
656656
cmd: |
657657
python -m pip install git+https://github.com/xufang-lisa/optimum-intel.git@e67abb1a20fb190b39c1dc0216cddb65b300210f
658-
python -m pytest -v ./tests/python_tests/test_eagle3.py'
658+
python -m pytest -v ./tests/python_tests/test_continuous_batching.py -k "eagle3"
659659
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).eagle3_speculative_decoding.test }}
660660
timeout: 90
661661
defaults:

tests/python_tests/test_continuous_batching.py

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@
1616
from utils.generation_config import get_greedy, get_beam_search, \
1717
get_multinomial_all_parameters, get_multinomial_temperature_and_num_return_sequence, \
1818
get_multinomial_temperature_and_top_k, get_multinomial_temperature, get_multinomial_temperature_and_top_p
19-
from utils.hugging_face import download_and_convert_model
20-
from utils.ov_genai_pipelines import create_ov_pipeline, create_ov_cb_pipeline, PipelineType, dict_to_scheduler_config, generate_and_compare, prepare_generation_config_by_pipe_type, GenerationChatInputsType
19+
from utils.hugging_face import download_and_convert_model, run_hugging_face
20+
from utils.ov_genai_pipelines import create_ov_pipeline, create_ov_cb_pipeline, PipelineType, dict_to_scheduler_config, generate_and_compare, prepare_generation_config_by_pipe_type, convert_decoded_results_to_generation_result, GenerationChatInputsType
21+
from utils.comparation import compare_generation_results
2122
from data.models import get_chat_models_list
2223
from data.test_dataset import get_test_dataset
2324

@@ -489,22 +490,45 @@ def get_data_by_pipeline_type(model_path: Path, pipeline_type: str, generation_c
489490
return pipe, prompt, generation_config
490491

491492

492-
def run_extended_perf_metrics_collection(model_id, generation_config: GenerationConfig, prompt: str, pipeline_type: PipelineType):
493+
def run_extended_perf_metrics_collection(model_id, generation_config: GenerationConfig, prompt: str, pipeline_type: PipelineType, draft_model_id):
493494
_, _, model_path = download_and_convert_model(model_id)
494-
ov_pipe = create_ov_pipeline(model_path, pipeline_type=pipeline_type)
495+
draft_model_path = None
496+
if draft_model_id is not None:
497+
_,_, draft_model_path = download_and_convert_model(draft_model_id)
498+
ov_pipe = create_ov_pipeline(model_path, pipeline_type=pipeline_type, draft_model_path = draft_model_path)
495499
return ov_pipe.generate([prompt], generation_config).extended_perf_metrics
496500

501+
eagle_models_and_input = [
502+
("Qwen/Qwen3-1.7B", "AngelSlim/Qwen3-1.7B_eagle3", """Code:
503+
def add(a, b):
504+
return a + b
497505
506+
Question: Can you please add 2 and 3
507+
A:""")]
508+
509+
speculative_cases = [
510+
("TinyLlama/TinyLlama-1.1B-Chat-v1.0", None, "Why is the Sun yellow?"),
511+
eagle_models_and_input[0],
512+
]
498513
@pytest.mark.parametrize("pipeline_type", [PipelineType.PAGED_ATTENTION, PipelineType.SPECULATIVE_DECODING])
514+
@pytest.mark.parametrize("main_model_id,draft_model_id, prompt", speculative_cases)
499515
@pytest.mark.precommit
500-
def test_speculative_decoding_extended_perf_metrics(pipeline_type):
516+
def test_speculative_decoding_extended_perf_metrics(pipeline_type, main_model_id, draft_model_id, prompt):
501517
import time
502518
start_time = time.perf_counter()
503-
model_id : str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
504-
generation_config = GenerationConfig(do_sample=False, max_new_tokens=20, ignore_eos=True, num_assistant_tokens=5)
505-
extended_perf_metrics = run_extended_perf_metrics_collection(model_id, generation_config, "Why is the Sun yellow?", pipeline_type)
506-
total_time = (time.perf_counter() - start_time) * 1000
519+
extended_perf_metrics = None
520+
if draft_model_id is None:
521+
generation_config = GenerationConfig(do_sample=False, max_new_tokens=20, ignore_eos=True, num_assistant_tokens=5)
522+
extended_perf_metrics = run_extended_perf_metrics_collection(main_model_id, generation_config, prompt, pipeline_type, draft_model_id)
523+
total_time = (time.perf_counter() - start_time) * 1000
507524

525+
else:
526+
if (pipeline_type == PipelineType.SPECULATIVE_DECODING):
527+
generation_config = GenerationConfig(do_sample=False, max_new_tokens=20, ignore_eos=True, num_assistant_tokens=5)
528+
extended_perf_metrics = run_extended_perf_metrics_collection(main_model_id, generation_config, prompt, pipeline_type, draft_model_id)
529+
total_time = (time.perf_counter() - start_time) * 1000
530+
531+
508532
if (pipeline_type == PipelineType.SPECULATIVE_DECODING):
509533
assert not extended_perf_metrics is None
510534
assert not extended_perf_metrics.main_model_metrics is None
@@ -542,3 +566,31 @@ def test_speculative_decoding_extended_perf_metrics(pipeline_type):
542566
assert std_gen_duration == 0
543567
else:
544568
assert extended_perf_metrics is None
569+
570+
devices = [
571+
('CPU', 'CPU')
572+
]
573+
@pytest.mark.parametrize("main_model,draft_model,prompt", eagle_models_and_input)
574+
@pytest.mark.parametrize("main_device,draft_device", devices)
575+
@pytest.mark.precommit
576+
def test_eagle3_sd_string_inputs(main_model, main_device, draft_model, draft_device, prompt):
577+
# Download and convert model:
578+
main_opt_model, main_hf_tokenizer, main_model_path = download_and_convert_model(main_model)
579+
__, __, draft_model_path = download_and_convert_model(draft_model)
580+
581+
# Create OpenVINO GenAI pipeline:
582+
583+
ov_pipe = create_ov_pipeline(main_model_path, pipeline_type = PipelineType.SPECULATIVE_DECODING, draft_model_path = draft_model_path)
584+
585+
# Run reference HF model:
586+
ov_generation_config = GenerationConfig(max_new_tokens=20)
587+
ref_gen_results = run_hugging_face(main_opt_model, main_hf_tokenizer, [prompt], ov_generation_config)
588+
589+
# Run OpenVINO GenAI pipeline:
590+
ov_decoded_results = ov_pipe.generate([prompt], ov_generation_config)
591+
ov_gen_results = convert_decoded_results_to_generation_result(ov_decoded_results, 1, 1, False)
592+
593+
del ov_pipe
594+
595+
# Compare results:
596+
compare_generation_results([prompt], ref_gen_results, ov_gen_results, ov_generation_config)

tests/python_tests/test_eagle3.py

Lines changed: 0 additions & 96 deletions
This file was deleted.

0 commit comments

Comments
 (0)