Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
de9589c
Adding a benchmark for gemm with input/output bf16 with fp32 accum.
aman2930 Oct 29, 2025
26225de
Fixing the total_flops calculations formula and adding the rescale yamls
aman2930 Oct 30, 2025
79bd4c0
Removing a yaml.
aman2930 Oct 30, 2025
197c59f
Merge branch 'sanbao_trace' into amangu/dev
aman2930 Oct 30, 2025
0fef570
Making change to gemm_simple to take in_dtype as input. Also added ge…
aman2930 Oct 30, 2025
879e1b8
Making change to gemm_simple to take in_dtype as input. Also added ge…
aman2930 Oct 30, 2025
23634e1
Fixing typos
aman2930 Oct 30, 2025
665898b
Merge branch 'sanbao_trace' into amangu/dev
aman2930 Oct 30, 2025
bb92b9e
Adding yaml for gemm_grouped = gemm_batched.
aman2930 Oct 30, 2025
fe7e553
Merge branch 'sanbao_trace' into amangu/dev
aman2930 Oct 31, 2025
9600a50
Adding gemm_batched for grouped gemm scaled matrix multiplication ben…
aman2930 Oct 31, 2025
ea9e423
Merge branch 'sanbao_trace' into amangu/dev
aman2930 Nov 4, 2025
a2c8ab7
Adding gemm_grouped benchmark using ragged_dot kernel.
aman2930 Nov 4, 2025
5db4ba0
Replacing tokamax.ragged_dot() with jax.lax.ragged_dot().
aman2930 Nov 5, 2025
18cef9b
Adding tiling scope for ragged_dot API usage. Fixed the iteration met…
aman2930 Nov 10, 2025
91ee081
Fixing a mising import.
aman2930 Nov 10, 2025
b1f7614
Adding the tiling parameter into the calculation metric function thou…
aman2930 Nov 10, 2025
80007f5
Adding ragged_dot_tiling to the metric metadata.
aman2930 Nov 10, 2025
3284215
Adding gemm_grouped benchmark configs and fixing gemm_simple config
aman2930 Nov 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions configs/gemm_grouped_inference.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
benchmarks:
- benchmark_name: "gemm_batched_simple"
trace_dir: "../microbenchmarks/gemm_batched_simple_inference"
csv_path: "../microbenchmarks/gemm_batched_simple_inference"
xlml_metrics_dir: "../microbenchmarks/gemm_batched_simple_inference"
num_runs: 1000
benchmark_sweep_params:
- {b: {start: 4, end: 256, multiplier: 2}, m: {start: 256, end: 2048, multiplier: 2}, k: {start: 256, end: 2048, multiplier: 2}, n: {start: 256, end: 2048, multiplier: 2}, in_dtype_str: "bf16", out_dtype_str: "bf16"}
- {b: {start: 4, end: 256, multiplier: 2}, m: {start: 256, end: 2048, multiplier: 2}, k: {start: 256, end: 2048, multiplier: 2}, n: {start: 256, end: 2048, multiplier: 2}, in_dtype_str: "fp8", out_dtype_str: "bf16"}
8 changes: 8 additions & 0 deletions configs/gemm_grouped_rescale_inference.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
benchmarks:
- benchmark_name: "gemm_batched"
trace_dir: "../microbenchmarks/gemm_batched_rescale_inference"
csv_path: "../microbenchmarks/gemm_batched_rescale_inference"
xlml_metrics_dir: "../microbenchmarks/gemm_batched_rescale_inference"
num_runs: 1000
benchmark_sweep_params:
- {b: {start: 4, end: 256, multiplier: 2}, m: {start: 56, end: 2048, multiplier: 2}, k: {start: 256, end: 2048, multiplier: 2}, n: {start: 256, end: 2048, multiplier: 2}, in_dtype_str: "fp8", out_dtype_str: "bf16"}
8 changes: 8 additions & 0 deletions configs/gemm_rescale_inference.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
benchmarks:
- benchmark_name: "gemm"
trace_dir: "../microbenchmarks/gemm_rescale_inference"
csv_path: "../microbenchmarks/gemm_rescale_inference"
xlml_metrics_dir: "../microbenchmarks/gemm_rescale_inference"
num_runs: 1000
benchmark_sweep_params:
- {m: {start: 512, end: 65536, multiplier: 2}, k: {start: 512, end: 65536, multiplier: 2}, n: {start: 512, end: 65536, multiplier: 2}}
9 changes: 9 additions & 0 deletions configs/gemm_simple_inference.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
benchmarks:
- benchmark_name: "gemm_simple"
trace_dir: "../microbenchmarks/gemm_simple_inference"
csv_path: "../microbenchmarks/gemm_simple_inference"
xlml_metrics_dir: "../microbenchmarks/gemm_simple_inference"
num_runs: 1000
benchmark_sweep_params:
- {m: {start: 512, end: 65536, multiplier: 2}, k: {start: 512, end: 65536, multiplier: 2}, n: {start: 512, end: 65536, multiplier: 2}, in_dtype_str: "bf16", out_dtype_str: "bf16"}
- {m: {start: 512, end: 65536, multiplier: 2}, k: {start: 512, end: 65536, multiplier: 2}, n: {start: 512, end: 65536, multiplier: 2}, in_dtype_str: "fp8", out_dtype_str: "bf16"}
Loading