Skip to content

Commit ded4cb5

Browse files
authored
[Gaudi] Enable Qwen3_moe model (#3244)
Signed-off-by: yuanwu <[email protected]>
1 parent a220e57 commit ded4cb5

File tree

3 files changed

+563
-2
lines changed

3 files changed

+563
-2
lines changed

backends/gaudi/server/text_generation_server/models/__init__.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@
104104
from text_generation_server.models.custom_modeling.flash_qwen3_modeling import (
105105
Qwen3ForCausalLM,
106106
)
107+
from text_generation_server.models.custom_modeling.flash_qwen3_moe_modeling import (
108+
Qwen3MoeForCausalLM,
109+
)
107110
from text_generation_server.models.custom_modeling.flash_mistral_modeling import (
108111
FlashMistralForCausalLM,
109112
)
@@ -292,7 +295,11 @@ class ModelType(enum.Enum):
292295
"name": "Qwen 3",
293296
"url": "https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f",
294297
}
295-
298+
QWEN3_MOE = {
299+
"type": "qwen3_moe",
300+
"name": "Qwen 3 Moe",
301+
"url": "https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f",
302+
}
296303
GALACTICA = {
297304
"type": "galactica",
298305
"name": "Galactica",
@@ -808,6 +815,18 @@ def get_model(
808815
trust_remote_code=trust_remote_code,
809816
lora_adapter_ids=lora_adapter_ids,
810817
)
818+
elif model_type == QWEN3_MOE:
819+
return FlashCausalLM(
820+
model_id=model_id,
821+
model_class=Qwen3MoeForCausalLM,
822+
revision=revision,
823+
quantize=quantize,
824+
speculator=speculator,
825+
dtype=dtype,
826+
kv_cache_dtype=kv_cache_dtype,
827+
trust_remote_code=trust_remote_code,
828+
lora_adapter_ids=lora_adapter_ids,
829+
)
811830
elif model_type == MLLAMA:
812831
return FlashMllamaCausalLM(
813832
model_id=model_id,

0 commit comments

Comments
 (0)