File tree Expand file tree Collapse file tree 3 files changed +563
-2
lines changed
backends/gaudi/server/text_generation_server/models Expand file tree Collapse file tree 3 files changed +563
-2
lines changed Original file line number Diff line number Diff line change 104
104
from text_generation_server .models .custom_modeling .flash_qwen3_modeling import (
105
105
Qwen3ForCausalLM ,
106
106
)
107
+ from text_generation_server .models .custom_modeling .flash_qwen3_moe_modeling import (
108
+ Qwen3MoeForCausalLM ,
109
+ )
107
110
from text_generation_server .models .custom_modeling .flash_mistral_modeling import (
108
111
FlashMistralForCausalLM ,
109
112
)
@@ -292,7 +295,11 @@ class ModelType(enum.Enum):
292
295
"name" : "Qwen 3" ,
293
296
"url" : "https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f" ,
294
297
}
295
-
298
+ QWEN3_MOE = {
299
+ "type" : "qwen3_moe" ,
300
+ "name" : "Qwen 3 Moe" ,
301
+ "url" : "https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f" ,
302
+ }
296
303
GALACTICA = {
297
304
"type" : "galactica" ,
298
305
"name" : "Galactica" ,
@@ -808,6 +815,18 @@ def get_model(
808
815
trust_remote_code = trust_remote_code ,
809
816
lora_adapter_ids = lora_adapter_ids ,
810
817
)
818
+ elif model_type == QWEN3_MOE :
819
+ return FlashCausalLM (
820
+ model_id = model_id ,
821
+ model_class = Qwen3MoeForCausalLM ,
822
+ revision = revision ,
823
+ quantize = quantize ,
824
+ speculator = speculator ,
825
+ dtype = dtype ,
826
+ kv_cache_dtype = kv_cache_dtype ,
827
+ trust_remote_code = trust_remote_code ,
828
+ lora_adapter_ids = lora_adapter_ids ,
829
+ )
811
830
elif model_type == MLLAMA :
812
831
return FlashMllamaCausalLM (
813
832
model_id = model_id ,
You can’t perform that action at this time.
0 commit comments