Skip to content

Commit ce061da

Browse files
authored
enable mixed dtype benchmark (#472)
1 parent e9d477d commit ce061da

File tree

9 files changed

+778
-153
lines changed

9 files changed

+778
-153
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#############################################################################
2+
### Benchmarks for required shapes for second SGLang release ###
3+
#############################################################################
4+
5+
# The data type " FP16U4FP16F16FP16S4 " in the benchmark are: A, B, C, Mma, Scale, Zero
6+
7+
8+
# int4
9+
PvcMixedPrecisionGemmFP16U4FP16F16FP16S4_RCR_1 --bm_name=mixed_dtype_int4 --m=32 --k=4096 --n=14336 --l=1
10+
PvcMixedPrecisionGemmBF16U4BF16BF16BF16S4_RCR_1 --bm_name=mixed_dtype_int4 --m=32 --k=4096 --n=14336 --l=1
11+
PvcMixedPrecisionGemmFP16U4FP16S8FP16S4_RCR_1 --bm_name=mixed_dtype_int4 --m=32 --k=4096 --n=14336 --l=1
12+
PvcMixedPrecisionGemmFP16U4S8S8FP16S4_RCR_1 --bm_name=mixed_dtype_int4 --m=32 --k=4096 --n=14336 --l=1
13+
PvcMixedPrecisionGemmBF16U4BF16S8BF16S4_RCR_1 --bm_name=mixed_dtype_int4 --m=32 --k=4096 --n=14336 --l=1
14+
PvcMixedPrecisionGemmBF16U4S8S8BF16S4_RCR_1 --bm_name=mixed_dtype_int4 --m=32 --k=4096 --n=14336 --l=1
15+
16+
# int8
17+
PvcMixedPrecisionGemmBF16S8BF16S8BF16S8_RCR_1 --bm_name=mixed_dtype_int8 --m=32 --k=4096 --n=14336 --l=1
18+
PvcMixedPrecisionGemmFP16S8FP16S8FP16S8_RCR_1 --bm_name=mixed_dtype_int8 --m=32 --k=4096 --n=14336 --l=1

benchmarks/gemm/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ set(CONFIG_FILE_INTEL_SGLANG_SPLITK --config_file=${CMAKE_SOURCE_DIR}/benchmarks
3535

3636
set(CONFIG_FILE_CUDA --config_file=${CMAKE_SOURCE_DIR}/benchmarks/device/ampere/input_files/input_gemm.in)
3737

38+
set(CONFIG_FILE_INTEL_MIXED_DTYPE --config_file=${CMAKE_SOURCE_DIR}/benchmarks/device/bmg/input_files/input_sglang_gemm_mixed_dtype.in)
39+
3840
cutlass_benchmark_add_suite(cutlass_benchmarks_gemm)
3941

4042
if(CUTLASS_ENABLE_SYCL)
@@ -51,6 +53,7 @@ cutlass_benchmark_add_executable(
5153
CONFIG_FILE_INTEL_PYTORCH
5254
CONFIG_FILE_INTEL_SGLANG
5355
CONFIG_FILE_INTEL_SGLANG_SPLITK
56+
CONFIG_FILE_INTEL_MIXED_DTYPE
5457
)
5558

5659
else()

0 commit comments

Comments
 (0)