Skip to content

Commit e8526fb

Browse files
committed
Update collectives configurations for different topologies.
1 parent 1e6c308 commit e8526fb

18 files changed

+156
-16
lines changed
Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
benchmarks:
22
- benchmark_name: all_gather
33
benchmark_sweep_params:
4-
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 4}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5-
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 4}, dtype: "float32", mesh_shape: "1x8", ici_size_range: 8, sharding_strategy: "1x8", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6-
7-
warmup_tries: 10
8-
trace_dir: "../microbenchmarks/all_gather"
9-
csv_path: "../microbenchmarks/all_gather"
10-
xlml_metrics_dir: "../microbenchmarks/all_gather"
11-
xla_dump_dir: "../microbenchmarks/all_gather/hlo_graphs"
4+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x1"
7+
csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x1"
8+
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x1"
9+
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_2x2x1/hlo_graphs"
Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
benchmarks:
22
- benchmark_name: all_gather
33
benchmark_sweep_params:
4-
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 4}, dtype: "float32", mesh_shape: "2x4x2", ici_size_range: 16, sharding_strategy: "1x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5-
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 4}, dtype: "float32", mesh_shape: "2x2x4", ici_size_range: 16, sharding_strategy: "1x2x4", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6-
7-
warmup_tries: 10
8-
trace_dir: "../microbenchmarks/all_gather"
9-
csv_path: "../microbenchmarks/all_gather"
10-
xlml_metrics_dir: "../microbenchmarks/all_gather"
11-
xla_dump_dir: "../microbenchmarks/all_gather/hlo_graphs"
4+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x2"
7+
csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x2"
8+
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x2"
9+
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_2x2x2/hlo_graphs"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
benchmarks:
2+
- benchmark_name: all_gather
3+
benchmark_sweep_params:
4+
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x4"
7+
csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x4"
8+
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x4"
9+
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_2x2x4/hlo_graphs"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
benchmarks:
2+
- benchmark_name: all_gather
3+
benchmark_sweep_params:
4+
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x4x4"
7+
csv_path: "../microbenchmarks/all_gather_tpu7x_2x4x4"
8+
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x4x4"
9+
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_2x4x4/hlo_graphs"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
benchmarks:
2+
- benchmark_name: all_gather
3+
benchmark_sweep_params:
4+
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4"
7+
csv_path: "../microbenchmarks/all_gather_tpu7x_4x4x4"
8+
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4"
9+
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4/hlo_graphs"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
benchmarks:
2+
- benchmark_name: all_gather
3+
benchmark_sweep_params:
4+
- {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/all_gather_tpu7x_4x4x8"
7+
csv_path: "../microbenchmarks/all_gather_tpu7x_4x4x8"
8+
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_4x4x8"
9+
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_4x4x8/hlo_graphs"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
benchmarks:
2+
- benchmark_name: psum
3+
benchmark_sweep_params:
4+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/psum_tpu7x_2x2x1"
7+
csv_path: "../microbenchmarks/psum_tpu7x_2x2x1"
8+
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x2x1"
9+
xla_dump_dir: "../microbenchmarks/psum_tpu7x_2x2x1/hlo_graphs"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
benchmarks:
2+
- benchmark_name: psum
3+
benchmark_sweep_params:
4+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/psum_tpu7x_2x2x2"
7+
csv_path: "../microbenchmarks/psum_tpu7x_2x2x2"
8+
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x2x2"
9+
xla_dump_dir: "../microbenchmarks/psum_tpu7x_2x2x2/hlo_graphs"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
benchmarks:
2+
- benchmark_name: psum
3+
benchmark_sweep_params:
4+
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/psum_tpu7x_2x2x4"
7+
csv_path: "../microbenchmarks/psum_tpu7x_2x2x4"
8+
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x2x4"
9+
xla_dump_dir: "../microbenchmarks/psum_tpu7x_2x2x4/hlo_graphs"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
benchmarks:
2+
- benchmark_name: psum
3+
benchmark_sweep_params:
4+
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
5+
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
6+
trace_dir: "../microbenchmarks/psum_tpu7x_2x4x4"
7+
csv_path: "../microbenchmarks/psum_tpu7x_2x4x4"
8+
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x4x4"
9+
xla_dump_dir: "../microbenchmarks/psum_tpu7x_2x4x4/hlo_graphs"

0 commit comments

Comments
 (0)