llm-d · sallyom · Jun 6, 2025
diff --git a/quickstart/examples/tool-calling/llama3-2-tool-calling.yaml b/quickstart/examples/tool-calling/llama3-2-tool-calling.yaml
@@ -0,0 +1,53 @@
+# To run this example:
+# ./llmd-installer.sh  --values-file examples/tool-calling/llama3-2-tool-calling.yaml
+sampleApplication:
+  baseConfigMapRefName: basic-gpu-with-nixl-and-redis-lookup-preset
+  model:
+    modelArtifactURI: hf://meta-llama/Llama-3.2-3B-Instruct
+    modelName: "meta-llama/Llama-3.2-3B-Instruct"
+modelservice:
+  resources:
+    limits:
+      nvidia.com/gpu: 1
+  prefill:
+    replicas: 1
+    extraArgs:
+      - --enable-auto-tool-choice
+      - --tool-call-parser
+      - llama3_json
+      - --chat-template
+      # See https://github.com/neuralmagic/vllm/tree/main/examples for other templates
+      - /workspace/vllm/examples/tool_chat_template_llama3.2_json.jinja
+  decode:
+    replicas: 1
+    extraArgs:
+      - --enable-auto-tool-choice
+      - --tool-call-parser
+      - llama3_json
+      - --chat-template
+      # See https://github.com/neuralmagic/vllm/tree/main/examples for other templates
+      - /workspace/vllm/examples/tool_chat_template_llama3.2_json.jinja
+  epp:
+    defaultEnvVarsOverride:
+      - name: ENABLE_KVCACHE_AWARE_SCORER
+        value: "true"
+      - name: ENABLE_PREFIX_AWARE_SCORER
+        value: "true"
+      - name: ENABLE_LOAD_AWARE_SCORER
+        value: "true"
+      - name: ENABLE_SESSION_AWARE_SCORER
+        value: "true"
+      - name: PD_ENABLED
+        value: "true"
+      - name: PD_PROMPT_LEN_THRESHOLD
+        value: "10"
+      - name: PREFILL_ENABLE_KVCACHE_AWARE_SCORER
+        value: "true"
+      - name: PREFILL_ENABLE_LOAD_AWARE_SCORER
+        value: "true"
+      - name: PREFILL_ENABLE_PREFIX_AWARE_SCORER
+        value: "true"
+      - name: PREFILL_ENABLE_SESSION_AWARE_SCORER
+        value: "true"
+redis:
+  enabled: true