From a4c66505fb22de9a2386cb07aac501766d619ca0 Mon Sep 17 00:00:00 2001 From: Derek Kozikowski <106621615+derekk-nm@users.noreply.github.com> Date: Tue, 2 Sep 2025 14:48:22 -0400 Subject: [PATCH 1/4] add deepseek-ai path, fix Smol gsm8k value --- HuggingFaceTB/SmolLM3-3B/accuracy/tasks.yml | 2 +- deepseek-ai/DeepSeek-R1-0528/accuracy/tasks.yml | 5 +++++ deepseek-ai/DeepSeek-R1-0528/storage.yml | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 deepseek-ai/DeepSeek-R1-0528/accuracy/tasks.yml create mode 100644 deepseek-ai/DeepSeek-R1-0528/storage.yml diff --git a/HuggingFaceTB/SmolLM3-3B/accuracy/tasks.yml b/HuggingFaceTB/SmolLM3-3B/accuracy/tasks.yml index 0b4633c..72dcb0f 100644 --- a/HuggingFaceTB/SmolLM3-3B/accuracy/tasks.yml +++ b/HuggingFaceTB/SmolLM3-3B/accuracy/tasks.yml @@ -2,4 +2,4 @@ tasks: - name: gsm8k metrics: - name: exact_match,strict-match - value: 0 + value: 0.4708 diff --git a/deepseek-ai/DeepSeek-R1-0528/accuracy/tasks.yml b/deepseek-ai/DeepSeek-R1-0528/accuracy/tasks.yml new file mode 100644 index 0000000..0b4633c --- /dev/null +++ b/deepseek-ai/DeepSeek-R1-0528/accuracy/tasks.yml @@ -0,0 +1,5 @@ +tasks: + - name: gsm8k + metrics: + - name: exact_match,strict-match + value: 0 diff --git a/deepseek-ai/DeepSeek-R1-0528/storage.yml b/deepseek-ai/DeepSeek-R1-0528/storage.yml new file mode 100644 index 0000000..fb93360 --- /dev/null +++ b/deepseek-ai/DeepSeek-R1-0528/storage.yml @@ -0,0 +1,3 @@ +# storage configs for https://huggingface.co/deepseek-ai/DeepSeek-R1-0528 +model: hf +data: hf From befaaf260318b48fbc0cb8f7943bd398ead252a4 Mon Sep 17 00:00:00 2001 From: Derek Kozikowski <106621615+derekk-nm@users.noreply.github.com> Date: Wed, 3 Sep 2025 10:25:20 -0400 Subject: [PATCH 2/4] Qwen3 model on quad, deepseek limit gpu memory --- Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml | 3 +++ RedHatAI/DeepSeek-R1-0528-quantized.w4a16/accuracy/server.yml | 1 + 2 files changed, 4 insertions(+) create mode 100644 Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml diff --git a/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml b/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml new file mode 100644 index 0000000..22eafe9 --- /dev/null +++ b/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml @@ -0,0 +1,3 @@ +trust-remote-code: true +tensor-parallel-size: 4 +max-model-len: 16384 diff --git a/RedHatAI/DeepSeek-R1-0528-quantized.w4a16/accuracy/server.yml b/RedHatAI/DeepSeek-R1-0528-quantized.w4a16/accuracy/server.yml index fbae63f..81cff55 100644 --- a/RedHatAI/DeepSeek-R1-0528-quantized.w4a16/accuracy/server.yml +++ b/RedHatAI/DeepSeek-R1-0528-quantized.w4a16/accuracy/server.yml @@ -1,3 +1,4 @@ max-model-len: 4096 tensor-parallel-size: 8 trust-remote-code: true +gpu_memory_utilization: 0.8 From 055ec5c6fd12f3cbffadf044394ac74bbdd227ad Mon Sep 17 00:00:00 2001 From: Derek Kozikowski <106621615+derekk-nm@users.noreply.github.com> Date: Wed, 3 Sep 2025 10:43:43 -0400 Subject: [PATCH 3/4] try Qwen3 gpu_memory_utilization --- Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml b/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml index 22eafe9..5ed9d38 100644 --- a/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml +++ b/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml @@ -1,3 +1,4 @@ trust-remote-code: true tensor-parallel-size: 4 max-model-len: 16384 +gpu_memory_utilization: 0.6 From a633fb13b2595602976822fbd6862946f1523304 Mon Sep 17 00:00:00 2001 From: Derek Kozikowski <106621615+derekk-nm@users.noreply.github.com> Date: Wed, 3 Sep 2025 10:51:03 -0400 Subject: [PATCH 4/4] try max-model-len --- Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml b/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml index 5ed9d38..d11f711 100644 --- a/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml +++ b/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8/accuracy/server.yml @@ -1,4 +1,3 @@ trust-remote-code: true tensor-parallel-size: 4 -max-model-len: 16384 -gpu_memory_utilization: 0.6 +max-model-len: 4096