Skip to content

Commit 90eb966

Browse files
TroyGardenmeta-codesync[bot]
authored andcommitted
add sparse_data_dist_base.yml config for pipeline benchmark (meta-pytorch#3429)
Summary: Pull Request resolved: meta-pytorch#3429 # context * yaml config support was added in previous diff * here we add the basic commands and usages in README * also add typical benchmark use cases in the yaml config Reviewed By: spmex Differential Revision: D83849104 fbshipit-source-id: bf1328680bf0eefa7b0e05e0703324c75a5502fa
1 parent 988ef36 commit 90eb966

File tree

3 files changed

+38
-1
lines changed

3 files changed

+38
-1
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# TorchRec Benchmark
2+
## usage
3+
- internal:
4+
```
5+
hash=$(hg whereami | cut -c 1-10)
6+
buck2 run @fbcode//mode/opt fbcode//torchrec/distributed/benchmark:benchmark_train_pipeline -- \
7+
--yaml_config=fbcode/torchrec/distributed/benchmark/yaml/sparse_data_dist_base.yml \
8+
--profile_name=sparse_data_dist_base_${hash:-$USER} # overrides the yaml config
9+
```
10+
- oss:
11+
```
12+
hash=`git rev-parse --short HEAD`
13+
python -m torchrec.distributed.benchmark.benchmark_train_pipeline \
14+
--yaml_config=fbcode/torchrec/distributed/benchmark/yaml/sparse_data_dist_base.yml \
15+
--profile_name=sparse_data_dist_base_${hash:-$USER} # overrides the yaml config
16+
```

torchrec/distributed/benchmark/benchmark_train_pipeline.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ class RunOptions:
7878
Default is "kjt" (KeyedJaggedTensor).
7979
profile (str): Directory to save profiling results. If empty, profiling is disabled.
8080
Default is "" (disabled).
81+
profile_name (str): Name of the profiling file. Default is pipeline classname.
8182
planner_type (str): Type of sharding planner to use. Options are:
8283
- "embedding": EmbeddingShardingPlanner (default)
8384
- "hetero": HeteroEmbeddingShardingPlanner
@@ -100,6 +101,7 @@ class RunOptions:
100101
compute_kernel: EmbeddingComputeKernel = EmbeddingComputeKernel.FUSED
101102
input_type: str = "kjt"
102103
profile: str = ""
104+
profile_name: str = ""
103105
planner_type: str = "embedding"
104106
pooling_factors: Optional[List[float]] = None
105107
num_poolings: Optional[List[float]] = None
@@ -406,7 +408,11 @@ def _func_to_benchmark(
406408
pipeline.progress(iter(bench_inputs))
407409

408410
result = benchmark_func(
409-
name=type(pipeline).__name__,
411+
name=(
412+
type(pipeline).__name__
413+
if run_option.profile_name == ""
414+
else run_option.profile_name
415+
),
410416
bench_inputs=bench_inputs, # pyre-ignore
411417
prof_inputs=bench_inputs, # pyre-ignore
412418
num_benchmarks=5,
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# this is a very basic sparse data dist config
2+
# runs on 2 ranks, showing traces with reasonable workloads
3+
RunOptions:
4+
world_size: 2
5+
num_batches: 10
6+
sharding_type: table_wise
7+
profile: "."
8+
profile_name: "sparse_data_dist_base"
9+
# export_stacks: True # enable this to export stack traces
10+
PipelineConfig:
11+
pipeline: "sparse"
12+
EmbeddingTablesConfig:
13+
num_unweighted_features: 100
14+
num_weighted_features: 100
15+
embedding_feature_dim: 128

0 commit comments

Comments
 (0)