@@ -454,8 +454,8 @@ steps:
454454 - pytest -v -s compile/test_fusion_attn.py
455455 - pytest -v -s compile/test_functionalization.py
456456 - pytest -v -s compile/test_silu_mul_quant_fusion.py
457- - pytest -v -s compile/test_sequence_parallelism.py
458- - pytest -v -s compile/test_async_tp.py
457+ # - pytest -v -s compile/test_sequence_parallelism.py
458+ # - pytest -v -s compile/test_async_tp.py
459459 - pytest -v -s compile/test_fusion_all_reduce.py
460460 - pytest -v -s compile/test_decorator.py
461461 - pytest -v -s compile/test_noop_elimination.py
@@ -474,8 +474,8 @@ steps:
474474 - pytest -v -s compile/test_basic_correctness.py
475475 - pytest -v -s compile/piecewise/
476476
477- - label : PyTorch Fullgraph Test # 20min
478- timeout_in_minutes : 30
477+ - label : PyTorch Fullgraph Test # 22min
478+ timeout_in_minutes : 35
479479 mirror_hardwares : [amdexperimental, amdproduction]
480480 agent_pool : mi325_1
481481 # grade: Blocking
@@ -485,6 +485,7 @@ steps:
485485 - tests/compile
486486 commands :
487487 - pytest -v -s compile/test_full_graph.py
488+ - pytest -v -s compile/test_fusions_e2e.py
488489
489490- label : Kernels Core Operation Test # 48min
490491 timeout_in_minutes : 75
@@ -494,6 +495,7 @@ steps:
494495 source_file_dependencies :
495496 - csrc/
496497 - tests/kernels/core
498+ - tests/kernels/test_top_k_per_row.py
497499 commands :
498500 - pytest -v -s kernels/core kernels/test_top_k_per_row.py
499501
@@ -606,7 +608,7 @@ steps:
606608 # we can only upgrade after this is resolved
607609 # TODO(jerryzh168): resolve the above comment
608610 - uv pip install --system torchao==0.13.0
609- - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/
611+ - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py
610612
611613- label : LM Eval Small Models # 53min
612614 timeout_in_minutes : 75
@@ -848,6 +850,18 @@ steps:
848850 - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing
849851 - cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model # Otherwise, mp_method="spawn" doesn't work
850852
853+ - label : Multi-Modal Accuracy Eval (Small Models) # 50min
854+ mirror_hardwares : [amdexperimental]
855+ agent_pool : mi325_1
856+ timeout_in_minutes : 70
857+ working_dir : " /vllm-workspace/.buildkite/lm-eval-harness"
858+ source_file_dependencies :
859+ - vllm/multimodal/
860+ - vllm/inputs/
861+ - vllm/v1/core/
862+ commands :
863+ - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-mm-small.txt --tp-size=1
864+
851865- label : Multi-Modal Models Test (Extended) 1
852866 mirror_hardwares : [amdexperimental]
853867 agent_pool : mi325_1
@@ -923,8 +937,8 @@ steps:
923937 # Whisper needs spawn method to avoid deadlock
924938 - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
925939
926- - label : Blackwell Test # 38 min
927- timeout_in_minutes : 60
940+ - label : Blackwell Test # 21 min
941+ timeout_in_minutes : 30
928942 working_dir : " /vllm-workspace/"
929943 gpu : b200
930944 # optional: true
@@ -937,8 +951,6 @@ steps:
937951 - vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py
938952 - vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
939953 - vllm/v1/attention/backends/flashinfer.py
940- - vllm/compilation/fusion.py
941- - vllm/compilation/fusion_attn.py
942954 commands :
943955 - nvidia-smi
944956 - python3 examples/offline_inference/basic/chat.py
@@ -955,13 +967,32 @@ steps:
955967 - pytest -v -s tests/kernels/quantization/test_nvfp4_scaled_mm.py
956968 - pytest -v -s tests/kernels/quantization/test_flashinfer_scaled_mm.py
957969 - pytest -v -s tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py
970+ - pytest -v -s tests/kernels/quantization/test_nvfp4_qutlass.py
971+ - pytest -v -s tests/kernels/quantization/test_mxfp4_qutlass.py
958972 - pytest -v -s tests/kernels/moe/test_nvfp4_moe.py
959973 - pytest -v -s tests/kernels/moe/test_ocp_mx_moe.py
960- # Fusion
961- - pytest -v -s tests/compile/test_fusion_all_reduce.py
962- - pytest -v -s tests/compile/test_fusion_attn.py::test_attention_quant_pattern
963974 - pytest -v -s tests/kernels/moe/test_flashinfer.py
975+
976+ - label : Blackwell Fusion Tests # 30 min
977+ timeout_in_minutes : 40
978+ working_dir : " /vllm-workspace/"
979+ gpu : b200
980+ source_file_dependencies :
981+ - csrc/quantization/fp4/
982+ - vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
983+ - vllm/v1/attention/backends/flashinfer.py
984+ - vllm/compilation/
985+ # can affect pattern matching
986+ - vllm/model_executor/layers/layernorm.py
987+ - vllm/model_executor/layers/activation.py
988+ - vllm/model_executor/layers/quantization/input_quant_fp8.py
989+ commands :
990+ - nvidia-smi
991+ - pytest -v -s tests/compile/test_fusion_attn.py
964992 - pytest -v -s tests/compile/test_silu_mul_quant_fusion.py
993+ # this runner has 2 GPUs available even though num_gpus=2 is not set
994+ - pytest -v -s tests/compile/test_fusion_all_reduce.py
995+ - pytest -v -s tests/compile/test_fusions_e2e.py
965996
966997- label : Blackwell GPT-OSS Eval
967998 timeout_in_minutes : 60
@@ -1129,6 +1160,11 @@ steps:
11291160 - pytest -v -s plugins_tests/test_io_processor_plugins.py
11301161 - pip uninstall prithvi_io_processor_plugin -y
11311162 # end io_processor plugins test
1163+ # begin stat_logger plugins test
1164+ - pip install -e ./plugins/vllm_add_dummy_stat_logger
1165+ - pytest -v -s plugins_tests/test_stats_logger_plugins.py
1166+ - pip uninstall dummy_stat_logger -y
1167+ # end stat_logger plugins test
11321168 # other tests continue here:
11331169 - pytest -v -s plugins_tests/test_scheduler_plugins.py
11341170 - pip install -e ./plugins/vllm_add_dummy_model
@@ -1173,7 +1209,6 @@ steps:
11731209 - pytest -v -s -x lora/test_llama_tp.py
11741210 - pytest -v -s -x lora/test_llm_with_multi_loras.py
11751211
1176-
11771212- label : Weight Loading Multiple GPU Test # 33min
11781213 timeout_in_minutes : 45
11791214 mirror_hardwares : [amdexperimental]
@@ -1202,6 +1237,18 @@ steps:
12021237 commands :
12031238 - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt
12041239
1240+ - label : NixlConnector PD accuracy tests (Distributed) # 30min
1241+ mirror_hardwares : [amdexperimental]
1242+ agent_pool : mi325_4
1243+ timeout_in_minutes : 30
1244+ working_dir : " /vllm-workspace/tests"
1245+ num_gpus : 4
1246+ source_file_dependencies :
1247+ - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
1248+ - tests/v1/kv_connector/nixl_integration/
1249+ commands :
1250+ - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
1251+ - bash v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh
12051252
12061253# #### multi gpus test #####
12071254# #### A100 test #####
@@ -1233,12 +1280,16 @@ steps:
12331280 - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
12341281
12351282# #### H200 test #####
1236- - label : Distrubted Tests (H200) # optional
1283+ - label : Distributed Tests (H200) # optional
12371284 gpu : h200
12381285 optional : true
12391286 working_dir : " /vllm-workspace/"
12401287 num_gpus : 2
12411288 commands :
1289+ - pytest -v -s tests/compile/test_async_tp.py
1290+ - pytest -v -s tests/compile/test_sequence_parallelism.py
1291+ - pytest -v -s tests/compile/test_fusion_all_reduce.py
1292+ - pytest -v -s tests/compile/test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm
12421293 - pytest -v -s tests/distributed/test_context_parallel.py
12431294 - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048
12441295
0 commit comments