Add tensor and pipeline parallelism to run config

anandhu-eng · web-flow · commit 8a14e6093a03 · 2025-10-29T10:03:33.000+05:30
diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py
@@ -703,6 +703,8 @@ def preprocess(i):
             else:
                 run_config += f" --tensor_parallelism={tmp_tp_size}"
                 run_config += f" --pipeline_parallelism={tmp_pp_size}"
+            if is_true(env.get('MLC_CUSTOM_CONFIG', '')):
+                run_config += f" --llm_gen_config_path={env['MLC_MLPERF_INFERENCE_NVIDIA_CODE_PATH']}/code/llama2-70b/tensorrt/generation_config.json"
 
         enable_sort = env.get('MLC_MLPERF_NVIDIA_HARNESS_ENABLE_SORT')
         if enable_sort and not is_false(enable_sort):