Skip to content

Commit 341574f

Browse files
Mirroring changes in test-pipeline.yaml into test-amd.yaml (vllm-project#27242)
Signed-off-by: Alexei V. Ivanov <[email protected]> Signed-off-by: Alberto Perdomo <[email protected]>
1 parent 000ec03 commit 341574f

File tree

1 file changed

+65
-14
lines changed

1 file changed

+65
-14
lines changed

.buildkite/test-amd.yaml

Lines changed: 65 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -454,8 +454,8 @@ steps:
454454
- pytest -v -s compile/test_fusion_attn.py
455455
- pytest -v -s compile/test_functionalization.py
456456
- pytest -v -s compile/test_silu_mul_quant_fusion.py
457-
- pytest -v -s compile/test_sequence_parallelism.py
458-
- pytest -v -s compile/test_async_tp.py
457+
# - pytest -v -s compile/test_sequence_parallelism.py
458+
# - pytest -v -s compile/test_async_tp.py
459459
- pytest -v -s compile/test_fusion_all_reduce.py
460460
- pytest -v -s compile/test_decorator.py
461461
- pytest -v -s compile/test_noop_elimination.py
@@ -474,8 +474,8 @@ steps:
474474
- pytest -v -s compile/test_basic_correctness.py
475475
- pytest -v -s compile/piecewise/
476476

477-
- label: PyTorch Fullgraph Test # 20min
478-
timeout_in_minutes: 30
477+
- label: PyTorch Fullgraph Test # 22min
478+
timeout_in_minutes: 35
479479
mirror_hardwares: [amdexperimental, amdproduction]
480480
agent_pool: mi325_1
481481
# grade: Blocking
@@ -485,6 +485,7 @@ steps:
485485
- tests/compile
486486
commands:
487487
- pytest -v -s compile/test_full_graph.py
488+
- pytest -v -s compile/test_fusions_e2e.py
488489

489490
- label: Kernels Core Operation Test # 48min
490491
timeout_in_minutes: 75
@@ -494,6 +495,7 @@ steps:
494495
source_file_dependencies:
495496
- csrc/
496497
- tests/kernels/core
498+
- tests/kernels/test_top_k_per_row.py
497499
commands:
498500
- pytest -v -s kernels/core kernels/test_top_k_per_row.py
499501

@@ -606,7 +608,7 @@ steps:
606608
# we can only upgrade after this is resolved
607609
# TODO(jerryzh168): resolve the above comment
608610
- uv pip install --system torchao==0.13.0
609-
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/
611+
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py
610612

611613
- label: LM Eval Small Models # 53min
612614
timeout_in_minutes: 75
@@ -848,6 +850,18 @@ steps:
848850
- pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing
849851
- cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model # Otherwise, mp_method="spawn" doesn't work
850852

853+
- label: Multi-Modal Accuracy Eval (Small Models) # 50min
854+
mirror_hardwares: [amdexperimental]
855+
agent_pool: mi325_1
856+
timeout_in_minutes: 70
857+
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
858+
source_file_dependencies:
859+
- vllm/multimodal/
860+
- vllm/inputs/
861+
- vllm/v1/core/
862+
commands:
863+
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-mm-small.txt --tp-size=1
864+
851865
- label: Multi-Modal Models Test (Extended) 1
852866
mirror_hardwares: [amdexperimental]
853867
agent_pool: mi325_1
@@ -923,8 +937,8 @@ steps:
923937
# Whisper needs spawn method to avoid deadlock
924938
- VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
925939

926-
- label: Blackwell Test # 38 min
927-
timeout_in_minutes: 60
940+
- label: Blackwell Test # 21 min
941+
timeout_in_minutes: 30
928942
working_dir: "/vllm-workspace/"
929943
gpu: b200
930944
# optional: true
@@ -937,8 +951,6 @@ steps:
937951
- vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py
938952
- vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
939953
- vllm/v1/attention/backends/flashinfer.py
940-
- vllm/compilation/fusion.py
941-
- vllm/compilation/fusion_attn.py
942954
commands:
943955
- nvidia-smi
944956
- python3 examples/offline_inference/basic/chat.py
@@ -955,13 +967,32 @@ steps:
955967
- pytest -v -s tests/kernels/quantization/test_nvfp4_scaled_mm.py
956968
- pytest -v -s tests/kernels/quantization/test_flashinfer_scaled_mm.py
957969
- pytest -v -s tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py
970+
- pytest -v -s tests/kernels/quantization/test_nvfp4_qutlass.py
971+
- pytest -v -s tests/kernels/quantization/test_mxfp4_qutlass.py
958972
- pytest -v -s tests/kernels/moe/test_nvfp4_moe.py
959973
- pytest -v -s tests/kernels/moe/test_ocp_mx_moe.py
960-
# Fusion
961-
- pytest -v -s tests/compile/test_fusion_all_reduce.py
962-
- pytest -v -s tests/compile/test_fusion_attn.py::test_attention_quant_pattern
963974
- pytest -v -s tests/kernels/moe/test_flashinfer.py
975+
976+
- label: Blackwell Fusion Tests # 30 min
977+
timeout_in_minutes: 40
978+
working_dir: "/vllm-workspace/"
979+
gpu: b200
980+
source_file_dependencies:
981+
- csrc/quantization/fp4/
982+
- vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
983+
- vllm/v1/attention/backends/flashinfer.py
984+
- vllm/compilation/
985+
# can affect pattern matching
986+
- vllm/model_executor/layers/layernorm.py
987+
- vllm/model_executor/layers/activation.py
988+
- vllm/model_executor/layers/quantization/input_quant_fp8.py
989+
commands:
990+
- nvidia-smi
991+
- pytest -v -s tests/compile/test_fusion_attn.py
964992
- pytest -v -s tests/compile/test_silu_mul_quant_fusion.py
993+
# this runner has 2 GPUs available even though num_gpus=2 is not set
994+
- pytest -v -s tests/compile/test_fusion_all_reduce.py
995+
- pytest -v -s tests/compile/test_fusions_e2e.py
965996

966997
- label: Blackwell GPT-OSS Eval
967998
timeout_in_minutes: 60
@@ -1129,6 +1160,11 @@ steps:
11291160
- pytest -v -s plugins_tests/test_io_processor_plugins.py
11301161
- pip uninstall prithvi_io_processor_plugin -y
11311162
# end io_processor plugins test
1163+
# begin stat_logger plugins test
1164+
- pip install -e ./plugins/vllm_add_dummy_stat_logger
1165+
- pytest -v -s plugins_tests/test_stats_logger_plugins.py
1166+
- pip uninstall dummy_stat_logger -y
1167+
# end stat_logger plugins test
11321168
# other tests continue here:
11331169
- pytest -v -s plugins_tests/test_scheduler_plugins.py
11341170
- pip install -e ./plugins/vllm_add_dummy_model
@@ -1173,7 +1209,6 @@ steps:
11731209
- pytest -v -s -x lora/test_llama_tp.py
11741210
- pytest -v -s -x lora/test_llm_with_multi_loras.py
11751211

1176-
11771212
- label: Weight Loading Multiple GPU Test # 33min
11781213
timeout_in_minutes: 45
11791214
mirror_hardwares: [amdexperimental]
@@ -1202,6 +1237,18 @@ steps:
12021237
commands:
12031238
- bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt
12041239

1240+
- label: NixlConnector PD accuracy tests (Distributed) # 30min
1241+
mirror_hardwares: [amdexperimental]
1242+
agent_pool: mi325_4
1243+
timeout_in_minutes: 30
1244+
working_dir: "/vllm-workspace/tests"
1245+
num_gpus: 4
1246+
source_file_dependencies:
1247+
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
1248+
- tests/v1/kv_connector/nixl_integration/
1249+
commands:
1250+
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
1251+
- bash v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh
12051252

12061253
##### multi gpus test #####
12071254
##### A100 test #####
@@ -1233,12 +1280,16 @@ steps:
12331280
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
12341281

12351282
##### H200 test #####
1236-
- label: Distrubted Tests (H200) # optional
1283+
- label: Distributed Tests (H200) # optional
12371284
gpu: h200
12381285
optional: true
12391286
working_dir: "/vllm-workspace/"
12401287
num_gpus: 2
12411288
commands:
1289+
- pytest -v -s tests/compile/test_async_tp.py
1290+
- pytest -v -s tests/compile/test_sequence_parallelism.py
1291+
- pytest -v -s tests/compile/test_fusion_all_reduce.py
1292+
- pytest -v -s tests/compile/test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm
12421293
- pytest -v -s tests/distributed/test_context_parallel.py
12431294
- CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048
12441295

0 commit comments

Comments
 (0)