From 350550c8e15bab1d31cef9559fa6c94efab22de1 Mon Sep 17 00:00:00 2001 From: sallyom Date: Fri, 6 Jun 2025 18:29:00 -0400 Subject: [PATCH] add tool-calling quickstart Signed-off-by: sallyom --- .../tool-calling/llama3-2-tool-calling.yaml | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 quickstart/examples/tool-calling/llama3-2-tool-calling.yaml diff --git a/quickstart/examples/tool-calling/llama3-2-tool-calling.yaml b/quickstart/examples/tool-calling/llama3-2-tool-calling.yaml new file mode 100644 index 0000000..03ddb22 --- /dev/null +++ b/quickstart/examples/tool-calling/llama3-2-tool-calling.yaml @@ -0,0 +1,53 @@ +# To run this example: +# ./llmd-installer.sh --values-file examples/tool-calling/llama3-2-tool-calling.yaml +sampleApplication: + baseConfigMapRefName: basic-gpu-with-nixl-and-redis-lookup-preset + model: + modelArtifactURI: hf://meta-llama/Llama-3.2-3B-Instruct + modelName: "meta-llama/Llama-3.2-3B-Instruct" +modelservice: + resources: + limits: + nvidia.com/gpu: 1 + prefill: + replicas: 1 + extraArgs: + - --enable-auto-tool-choice + - --tool-call-parser + - llama3_json + - --chat-template + # See https://github.com/neuralmagic/vllm/tree/main/examples for other templates + - /workspace/vllm/examples/tool_chat_template_llama3.2_json.jinja + decode: + replicas: 1 + extraArgs: + - --enable-auto-tool-choice + - --tool-call-parser + - llama3_json + - --chat-template + # See https://github.com/neuralmagic/vllm/tree/main/examples for other templates + - /workspace/vllm/examples/tool_chat_template_llama3.2_json.jinja + epp: + defaultEnvVarsOverride: + - name: ENABLE_KVCACHE_AWARE_SCORER + value: "true" + - name: ENABLE_PREFIX_AWARE_SCORER + value: "true" + - name: ENABLE_LOAD_AWARE_SCORER + value: "true" + - name: ENABLE_SESSION_AWARE_SCORER + value: "true" + - name: PD_ENABLED + value: "true" + - name: PD_PROMPT_LEN_THRESHOLD + value: "10" + - name: PREFILL_ENABLE_KVCACHE_AWARE_SCORER + value: "true" + - name: PREFILL_ENABLE_LOAD_AWARE_SCORER + value: "true" + - name: PREFILL_ENABLE_PREFIX_AWARE_SCORER + value: "true" + - name: PREFILL_ENABLE_SESSION_AWARE_SCORER + value: "true" +redis: + enabled: true