Skip to content
1 change: 1 addition & 0 deletions .jenkins/lm-eval-harness/configs/models-ovis.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ovis2_5-9b.yaml

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please fix precommit issues
Is this still planed for 1.23?

12 changes: 12 additions & 0 deletions .jenkins/lm-eval-harness/configs/ovis2_5-9b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
model_name: "/mnt/weka/data/llm/aidc-ai/ovis2.5-9b"
tasks:
- name: "gsm8k"
metrics:
- name: "exact_match,strict-match"
value: 0.700
- name: "exact_match,flexible-extract"
value: 0.700
limit: 256
num_fewshot: 8
dtype: "bfloat16"
trust_remote_code: True
107 changes: 59 additions & 48 deletions .jenkins/test_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,54 +5,59 @@ stages:
- name: v0_gsm8k_small_g3_tp1_part1
flavor: g3
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1
- name: v0_gsm8k_small_g3_tp1_part2
flavor: g3
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small-2.txt -t 1
- name: v0_gsm8k_small_g3_tp1_part3
flavor: g3
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small-3.txt -t 1
- name: v0_gsm8k_small_g3_tp2
flavor: g3.s
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 2
- name: v0_gsm8k_small_g2_tp1
flavor: g2
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1
- name: v0_gsm8k_small_g2_tp2
flavor: g2.s
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 2
- name: v0_gsm8k_g2_deepseek-v2-lite_tp1
flavor: g3
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-deepseek.txt -t 1
- name: v0_gsm8k_g3_gemma3_tp1
flavor: g3.s
command: >-
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-gemma.txt -t 1
- name: v0_gsm8k_g3_ovis2_5_tp1
flavor: g3.s
command: >-
export PT_HPU_LAZY_MODE=1 && export VLLM_SKIP_WARMUP=true &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-ovis.txt -t 1
- name: test_gsm8k_small_models_apc
steps:
- name: gsm8k_small_g3_tp1_apc
flavor: g3
command: >-
export VLLM_CONTIGUOUS_PA=false &&
export VLLM_CONTIGUOUS_PA=false &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 -a
- name: gsm8k_small_g2_tp1_apc
flavor: g2
command: >-
export VLLM_CONTIGUOUS_PA=false &&
export VLLM_CONTIGUOUS_PA=false &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 -a
- name: test_gsm8k_small_models_merged_prefill
steps:
Expand All @@ -66,139 +71,139 @@ stages:
- name: v0_gsm8k_large_g3_tp2_part1
flavor: g3.s
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large.txt -t 2
- name: v0_gsm8k_large_g3_tp2_part2
flavor: g3.s
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large-2.txt -t 2
- name: v0_gsm8k_large_g2_tp4
flavor: g2.m
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large.txt -t 4
- name: test_gsm8k_fp8
steps:
- name: gsm8k_small_g3_tp1_fp8
flavor: g3
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-fp8-g3-tp1.txt -t 1
# - name: gsm8k_small_g3_tp2_fp8
# flavor: g3.s
# command: >-
# cd .jenkins/lm-eval-harness &&
# PT_HPU_LAZY_MODE=1
# cd .jenkins/lm-eval-harness &&
# PT_HPU_LAZY_MODE=1
# bash run-tests.sh -c configs/models-fp8.txt -t 2
- name: test_gsm8k_fp8_bypass_inc
steps:
- name: gsm8k_fp8_llama4_scout_g3_tp2_compressed_tensor
flavor: g3.s
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-fp8-compressedtensor.txt -t 2
- name: gsm8k_fp8_qwen3_30B_g3_tp1_block_scale_dynamic
flavor: g3
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-fp8-blockfp8.txt -t 1
- name: gsm8k_fp8_qwen3_30B_g3_tp1_block_scale_dequant
flavor: g3
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1 VLLM_HPU_FORCE_CHANNEL_FP8=0
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1 VLLM_HPU_FORCE_CHANNEL_FP8=0
bash run-tests.sh -c configs/models-fp8-blockfp8.txt -t 1
- name: test_gsm8k_mss
steps:
- name: gsm8k_small_g3_tp1_mss
flavor: g3
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-mss.txt -t 1
- name: gsm8k_small_g2_tp1_mss
flavor: g2
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-mss.txt -t 1
- name: gsm8k_small_g3_tp2_mss
flavor: g3.s
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-mss.txt -t 2
- name: gsm8k_small_g2_tp2_mss
flavor: g2.s
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-mss.txt -t 2
- name: gsm8k_small_g2_tp1_spec_decode
flavor: g2
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-mss.txt -t 1
- name: test_gsm8k_spec_decode
steps:
# - name: gsm8k_small_g2_tp1_mlp_spec_decode
# flavor: g2
# command: >-
# PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
# PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
# pytest -v tests/spec_decode/e2e/test_mlp_correctness.py::test_mlp_e2e_greedy_correctness
- name: gsm8k_small_g2_tp1_medusa_spec_decode
flavor: g2
command: >-
PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
pytest -v tests/spec_decode/e2e/test_medusa_correctness.py::test_medusa_e2e_greedy_correctness
- name: gsm8k_small_g2_tp1_eagle_spec_decode
flavor: g2
command: >-
PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
pytest -v tests/spec_decode/e2e/test_eagle_correctness.py::test_eagle_e2e_greedy_correctness
#TODO(kwisniewski98) temporary disable test, until model specific for Gaudi2 is uploaded to test infrastructure
# - name: test_deepseek_mtp
# steps:
# - name: test_deepseek_mtp_correctness
# flavor: g3
# command: >-
# PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
# PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
# pytest -v tests/spec_decode/e2e/test_mtp_correctness.py::test_mtp_e2e_greedy_correctness
- name: tests_lora
steps:
- name: test_llama_lora
flavor: g2
command: >-
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
pytest -v tests/lora/test_llama_hpu.py::test_llama_lora_1x
- name: test_multilora
flavor: g2
command: >-
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
pytest -v tests/lora/test_multilora_hpu.py::test_llama_multilora_1x
# - name: test_long_context
# flavor: g2
# command: >-
# PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
# PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
# pytest -v tests/lora/test_long_context_hpu.py::test_quality
- name: tests_multimodal
steps:
- name: multimodal_small_g3_tp1
flavor: g3
command: >-
cd .jenkins/vision &&
PT_HPU_LAZY_MODE=1
cd .jenkins/vision &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-small.txt -t 1
- name: multimodal_small_g3_tp2
flavor: g3.s
command: >-
cd .jenkins/vision &&
PT_HPU_LAZY_MODE=1
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-small.txt -t 2
- name: multimodal_qwen_tp1
flavor: g3.s
Expand All @@ -210,13 +215,13 @@ stages:
flavor: g3
command: >-
cd .jenkins/vision &&
PT_HPU_LAZY_MODE=1
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-mss.txt -t 1
- name: multimodal_small_g3_tp2_mss
flavor: g3.s
command: >-
cd .jenkins/vision &&
PT_HPU_LAZY_MODE=1
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-mss.txt -t 2
- name: multimodal_llama4_scout_g3_tp2_ep
flavor: g3.s
Expand All @@ -230,26 +235,32 @@ stages:
cd .jenkins/vision &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-gemma.txt -t 1
- name: multimodal_ovis2_5_g3_tp1_ep
flavor: g3.s
command: >-
cd .jenkins/vision &&
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
bash run-tests.sh -c configs/models-ovis.txt -t 1
- name: tests_int4_quantization
steps:
- name: test_awq
flavor: g2
command: >-
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
pytest -v tests/quantization/test_awq.py::test_awq
- name: test_gptq
flavor: g2
command: >-
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
pytest -v tests/quantization/test_gptq.py::test_gptq
- name: tests_guided_decode
steps:
- name: test_lazy_outlines
flavor: g2
command: >-
pip install -e tests/vllm_test_utils &&
export VLLM_SKIP_WARMUP=true && PT_HPU_LAZY_MODE=1
pytest -v tests/entrypoints/llm/test_lazy_outlines.py -s -vvv --log-cli-level=INFO
export VLLM_SKIP_WARMUP=true && PT_HPU_LAZY_MODE=1
pytest -v tests/entrypoints/llm/test_lazy_outlines.py -s -vvv --log-cli-level=INFO
# - name: test_guided_generate
# flavor: g2
# command: >-
Expand Down
1 change: 1 addition & 0 deletions .jenkins/vision/configs/models-ovis.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ovis2_5-9b.yaml
7 changes: 7 additions & 0 deletions .jenkins/vision/configs/ovis2_5-9b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
model_name: "/mnt/weka/data/llm/aidc-ai/ovis2.5-9b"
dtype: "bfloat16"
max_model_len: 32768
max_num_seqs: 32
num_prompts: 4
limit_mm_per_prompt_image: 5
trust_remote_code: True
2 changes: 2 additions & 0 deletions .jenkins/vision/test_enc_dec_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def fail_on_exit():
def launch_enc_dec_model(config, question, images):
model_name = config.get('model_name')
dtype = config.get('dtype', 'bfloat16')
trust_remote_code = config.get('trust_remote_code', False)
max_num_seqs = config.get('max_num_seqs', 128)
max_model_len = config.get('max_model_len', 4096)
enforce_eager = config.get('enforce_eager', False)
Expand All @@ -41,6 +42,7 @@ def launch_enc_dec_model(config, question, images):
enable_expert_parallel=enable_expert_parallel,
enforce_eager=enforce_eager,
limit_mm_per_prompt={"image": limit_mm_per_prompt_image},
trust_remote_code=trust_remote_code,
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
Expand Down
31 changes: 31 additions & 0 deletions examples/offline_inference/vision_language_multi_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,36 @@ def load_ovis(question: str, image_urls: list[str]) -> ModelRequestData:
)


# ovis2_5
def load_ovis2_5(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "AIDC-AI/Ovis2.5-2B"

engine_args = EngineArgs(
model=model_name,
max_model_len=8192,
max_num_seqs=2,
trust_remote_code=True,
dtype="half",
limit_mm_per_prompt={"image": len(image_urls)},
)

placeholders = "\n".join(
f"Image-{i}: <image>\n" for i, _ in enumerate(image_urls, start=1)
)
messages = [{"role": "user", "content": f"{placeholders}\n{question}"}]

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
prompt = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)

return ModelRequestData(
engine_args=engine_args,
prompt=prompt,
image_data=[fetch_image(url) for url in image_urls],
)


def load_pixtral_hf(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "mistral-community/pixtral-12b"

Expand Down Expand Up @@ -742,6 +772,7 @@ def load_tarsier(question: str, image_urls: list[str]) -> ModelRequestData:
"mllama": load_mllama,
"NVLM_D": load_nvlm_d,
"ovis": load_ovis,
"ovis2_5": load_ovis2_5,
"phi3_v": load_phi3v,
"phi4_mm": load_phi4mm,
"pixtral_hf": load_pixtral_hf,
Expand Down
Loading