Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .jenkins/lm-eval-harness/configs/models-ovis.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ovis2_5-9b.yaml
12 changes: 12 additions & 0 deletions .jenkins/lm-eval-harness/configs/ovis2_5-9b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
model_name: "/mnt/weka/data/llm/aidc-ai/ovis2.5-9b"
tasks:
- name: "gsm8k"
metrics:
- name: "exact_match,strict-match"
value: 0.700
- name: "exact_match,flexible-extract"
value: 0.700
limit: 256
num_fewshot: 8
dtype: "bfloat16"
trust_remote_code: True
107 changes: 59 additions & 48 deletions .jenkins/test_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,54 +5,59 @@ stages:
- name: v0_gsm8k_small_g3_tp1_part1
flavor: g3
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1
- name: v0_gsm8k_small_g3_tp1_part2
flavor: g3
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small-2.txt -t 1
- name: v0_gsm8k_small_g3_tp1_part3
flavor: g3
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small-3.txt -t 1
- name: v0_gsm8k_small_g3_tp2
flavor: g3.s
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 2
- name: v0_gsm8k_small_g2_tp1
flavor: g2
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1
- name: v0_gsm8k_small_g2_tp2
flavor: g2.s
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 2
- name: v0_gsm8k_g2_deepseek-v2-lite_tp1
flavor: g3
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-deepseek.txt -t 1
- name: v0_gsm8k_g3_gemma3_tp1
flavor: g3.s
command: >-
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-gemma.txt -t 1
- name: v0_gsm8k_g3_ovis2_5_tp1
flavor: g3.s
command: >-
export PT_HPU_LAZY_MODE=1 && export VLLM_SKIP_WARMUP=true &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-ovis.txt -t 1
- name: test_gsm8k_small_models_apc
steps:
- name: gsm8k_small_g3_tp1_apc
flavor: g3
command: >-
export VLLM_CONTIGUOUS_PA=false &&
export VLLM_CONTIGUOUS_PA=false &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 -a
- name: gsm8k_small_g2_tp1_apc
flavor: g2
command: >-
export VLLM_CONTIGUOUS_PA=false &&
export VLLM_CONTIGUOUS_PA=false &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 -a
- name: test_gsm8k_small_models_merged_prefill
steps:
Expand All @@ -66,139 +71,139 @@ stages:
- name: v0_gsm8k_large_g3_tp2_part1
flavor: g3.s
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large.txt -t 2
- name: v0_gsm8k_large_g3_tp2_part2
flavor: g3.s
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large-2.txt -t 2
- name: v0_gsm8k_large_g2_tp4
flavor: g2.m
command: >-
export PT_HPU_LAZY_MODE=1 &&
export PT_HPU_LAZY_MODE=1 &&
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large.txt -t 4
- name: test_gsm8k_fp8
steps:
- name: gsm8k_small_g3_tp1_fp8
flavor: g3
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-fp8-g3-tp1.txt -t 1
# - name: gsm8k_small_g3_tp2_fp8
# flavor: g3.s
# command: >-
# cd .jenkins/lm-eval-harness &&
# PT_HPU_LAZY_MODE=1
# cd .jenkins/lm-eval-harness &&
# PT_HPU_LAZY_MODE=1
# bash run-tests.sh -c configs/models-fp8.txt -t 2
- name: test_gsm8k_fp8_bypass_inc
steps:
- name: gsm8k_fp8_llama4_scout_g3_tp2_compressed_tensor
flavor: g3.s
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-fp8-compressedtensor.txt -t 2
- name: gsm8k_fp8_qwen3_30B_g3_tp1_block_scale_dynamic
flavor: g3
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-fp8-blockfp8.txt -t 1
- name: gsm8k_fp8_qwen3_30B_g3_tp1_block_scale_dequant
flavor: g3
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1 VLLM_HPU_FORCE_CHANNEL_FP8=0
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1 VLLM_HPU_FORCE_CHANNEL_FP8=0
bash run-tests.sh -c configs/models-fp8-blockfp8.txt -t 1
- name: test_gsm8k_mss
steps:
- name: gsm8k_small_g3_tp1_mss
flavor: g3
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-mss.txt -t 1
- name: gsm8k_small_g2_tp1_mss
flavor: g2
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-mss.txt -t 1
- name: gsm8k_small_g3_tp2_mss
flavor: g3.s
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-mss.txt -t 2
- name: gsm8k_small_g2_tp2_mss
flavor: g2.s
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-mss.txt -t 2
- name: gsm8k_small_g2_tp1_spec_decode
flavor: g2
command: >-
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
cd .jenkins/lm-eval-harness &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-mss.txt -t 1
- name: test_gsm8k_spec_decode
steps:
# - name: gsm8k_small_g2_tp1_mlp_spec_decode
# flavor: g2
# command: >-
# PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
# PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
# pytest -v tests/spec_decode/e2e/test_mlp_correctness.py::test_mlp_e2e_greedy_correctness
- name: gsm8k_small_g2_tp1_medusa_spec_decode
flavor: g2
command: >-
PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
pytest -v tests/spec_decode/e2e/test_medusa_correctness.py::test_medusa_e2e_greedy_correctness
- name: gsm8k_small_g2_tp1_eagle_spec_decode
flavor: g2
command: >-
PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
pytest -v tests/spec_decode/e2e/test_eagle_correctness.py::test_eagle_e2e_greedy_correctness
#TODO(kwisniewski98) temporary disable test, until model specific for Gaudi2 is uploaded to test infrastructure
# - name: test_deepseek_mtp
# steps:
# - name: test_deepseek_mtp_correctness
# flavor: g3
# command: >-
# PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
# PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
# pytest -v tests/spec_decode/e2e/test_mtp_correctness.py::test_mtp_e2e_greedy_correctness
- name: tests_lora
steps:
- name: test_llama_lora
flavor: g2
command: >-
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
pytest -v tests/lora/test_llama_hpu.py::test_llama_lora_1x
- name: test_multilora
flavor: g2
command: >-
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
pytest -v tests/lora/test_multilora_hpu.py::test_llama_multilora_1x
# - name: test_long_context
# flavor: g2
# command: >-
# PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
# PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
# pytest -v tests/lora/test_long_context_hpu.py::test_quality
- name: tests_multimodal
steps:
- name: multimodal_small_g3_tp1
flavor: g3
command: >-
cd .jenkins/vision &&
PT_HPU_LAZY_MODE=1
cd .jenkins/vision &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-small.txt -t 1
- name: multimodal_small_g3_tp2
flavor: g3.s
command: >-
cd .jenkins/vision &&
PT_HPU_LAZY_MODE=1
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-small.txt -t 2
- name: multimodal_qwen_tp1
flavor: g3.s
Expand All @@ -210,13 +215,13 @@ stages:
flavor: g3
command: >-
cd .jenkins/vision &&
PT_HPU_LAZY_MODE=1
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-mss.txt -t 1
- name: multimodal_small_g3_tp2_mss
flavor: g3.s
command: >-
cd .jenkins/vision &&
PT_HPU_LAZY_MODE=1
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-mss.txt -t 2
- name: multimodal_llama4_scout_g3_tp2_ep
flavor: g3.s
Expand All @@ -230,26 +235,32 @@ stages:
cd .jenkins/vision &&
PT_HPU_LAZY_MODE=1
bash run-tests.sh -c configs/models-gemma.txt -t 1
- name: multimodal_ovis2_5_g3_tp1_ep
flavor: g3.s
command: >-
cd .jenkins/vision &&
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
bash run-tests.sh -c configs/models-ovis.txt -t 1
- name: tests_int4_quantization
steps:
- name: test_awq
flavor: g2
command: >-
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
pytest -v tests/quantization/test_awq.py::test_awq
- name: test_gptq
flavor: g2
command: >-
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
pytest -v tests/quantization/test_gptq.py::test_gptq
- name: tests_guided_decode
steps:
- name: test_lazy_outlines
flavor: g2
command: >-
pip install -e tests/vllm_test_utils &&
export VLLM_SKIP_WARMUP=true && PT_HPU_LAZY_MODE=1
pytest -v tests/entrypoints/llm/test_lazy_outlines.py -s -vvv --log-cli-level=INFO
export VLLM_SKIP_WARMUP=true && PT_HPU_LAZY_MODE=1
pytest -v tests/entrypoints/llm/test_lazy_outlines.py -s -vvv --log-cli-level=INFO
# - name: test_guided_generate
# flavor: g2
# command: >-
Expand Down
1 change: 1 addition & 0 deletions .jenkins/vision/configs/models-ovis.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ovis2_5-9b.yaml
7 changes: 7 additions & 0 deletions .jenkins/vision/configs/ovis2_5-9b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
model_name: "/mnt/weka/data/llm/aidc-ai/ovis2.5-9b"
dtype: "bfloat16"
max_model_len: 32768
max_num_seqs: 32
num_prompts: 4
limit_mm_per_prompt_image: 5
trust_remote_code: True
2 changes: 2 additions & 0 deletions .jenkins/vision/test_enc_dec_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def fail_on_exit():
def launch_enc_dec_model(config, question, images):
model_name = config.get('model_name')
dtype = config.get('dtype', 'bfloat16')
trust_remote_code = config.get('trust_remote_code', False)
max_num_seqs = config.get('max_num_seqs', 128)
max_model_len = config.get('max_model_len', 4096)
enforce_eager = config.get('enforce_eager', False)
Expand All @@ -41,6 +42,7 @@ def launch_enc_dec_model(config, question, images):
enable_expert_parallel=enable_expert_parallel,
enforce_eager=enforce_eager,
limit_mm_per_prompt={"image": limit_mm_per_prompt_image},
trust_remote_code=trust_remote_code,
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
Expand Down
31 changes: 31 additions & 0 deletions examples/offline_inference/vision_language_multi_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,36 @@ def load_ovis(question: str, image_urls: list[str]) -> ModelRequestData:
)


# ovis2_5
def load_ovis2_5(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "AIDC-AI/Ovis2.5-2B"

engine_args = EngineArgs(
model=model_name,
max_model_len=8192,
max_num_seqs=2,
trust_remote_code=True,
dtype="half",
limit_mm_per_prompt={"image": len(image_urls)},
)

placeholders = "\n".join(
f"Image-{i}: <image>\n" for i, _ in enumerate(image_urls, start=1)
)
messages = [{"role": "user", "content": f"{placeholders}\n{question}"}]

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
prompt = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)

return ModelRequestData(
engine_args=engine_args,
prompt=prompt,
image_data=[fetch_image(url) for url in image_urls],
)


def load_pixtral_hf(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "mistral-community/pixtral-12b"

Expand Down Expand Up @@ -742,6 +772,7 @@ def load_tarsier(question: str, image_urls: list[str]) -> ModelRequestData:
"mllama": load_mllama,
"NVLM_D": load_nvlm_d,
"ovis": load_ovis,
"ovis2_5": load_ovis2_5,
"phi3_v": load_phi3v,
"phi4_mm": load_phi4mm,
"pixtral_hf": load_pixtral_hf,
Expand Down
Loading