-
Notifications
You must be signed in to change notification settings - Fork 365
Expand file tree
/
Copy pathmodels.json
More file actions
37 lines (37 loc) · 11.4 KB
/
models.json
File metadata and controls
37 lines (37 loc) · 11.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
[
{"model": "google/gemma-3-270m-it", "int4": true, "int8": true, "fp16": false, "apple": false, "pipeline_tag": "text-generation", "tags": ["completion"], "description": "Gemma 3 270M instruction-tuned model for on-device text completion."},
{"model": "google/functiongemma-270m-it", "int4": true, "int8": true, "fp16": false, "apple": false, "pipeline_tag": "text-generation", "tags": ["completion","tools"], "description": "Gemma 3 270M fine-tuned for structured tool and function calling."},
{"model": "LiquidAI/LFM2.5-350M", "int4": true, "int8": true, "fp16": false, "apple": false, "pipeline_tag": "text-generation", "tags": ["completion","tools","embed"], "description": "LFM2.5 350M compact hybrid language model from Liquid AI designed for edge deployment."},
{"model": "LiquidAI/LFM2-700M", "int4": true, "int8": true, "fp16": false, "apple": false, "pipeline_tag": "text-generation", "tags": ["completion","tools","embed"], "description": "LFM2 700M hybrid language model from Liquid AI for on-device chat and embeddings."},
{"model": "Qwen/Qwen3-0.6B", "int4": true, "int8": true, "fp16": false, "apple": false, "pipeline_tag": "text-generation", "tags": ["completion","tools","embed"], "description": "Qwen3 0.6B compact model supporting thinking and non-thinking modes for on-device chat."},
{"model": "Qwen/Qwen3.5-0.8B", "int4": true, "int8": true, "fp16": false, "apple": false, "pipeline_tag": "image-text-to-text", "tags": ["vision","completion","tools","embed"], "description": "Qwen3.5 0.8B hybrid vision-language model with DeltaNet for on-device multimodal inference."},
{"model": "google/gemma-3-1b-it", "int4": true, "int8": false, "fp16": false, "apple": false, "pipeline_tag": "text-generation", "tags": ["completion"], "description": "Gemma 3 1B instruction-tuned model for on-device text completion."},
{"model": "google/gemma-3n-E2B-it", "int4": true, "int8": false, "fp16": false, "apple": false, "pipeline_tag": "text-generation", "tags": ["completion","tools"], "description": "Gemma 3n E2B instruction-tuned model with 2B effective parameters for on-device completion and tool use."},
{"model": "google/gemma-3n-E4B-it", "int4": true, "int8": false, "fp16": false, "apple": false, "pipeline_tag": "text-generation", "tags": ["completion","tools"], "description": "Gemma 3n E4B instruction-tuned model with 4B effective parameters for on-device completion and tool use."},
{"model": "google/gemma-4-E2B-it", "int4": true, "int8": false, "fp16": false, "apple": true, "pipeline_tag": "image-text-to-text", "tags": ["vision","audio","completion","tools","apple-npu"], "description": "Gemma 4 E2B instruction-tuned multimodal model with vision and audio for on-device inference."},
{"model": "google/gemma-4-E4B-it", "int4": true, "int8": false, "fp16": false, "apple": true, "pipeline_tag": "image-text-to-text", "tags": ["vision","audio","completion","tools","apple-npu"], "description": "Gemma 4 E4B instruction-tuned multimodal model with vision and audio for on-device inference."},
{"model": "LiquidAI/LFM2.5-1.2B-Thinking", "int4": true, "int8": false, "fp16": false, "apple": false, "pipeline_tag": "text-generation", "tags": ["completion","tools","embed"], "description": "LFM2.5 1.2B reasoning model from Liquid AI with extended thinking for on-device deployment."},
{"model": "LiquidAI/LFM2.5-1.2B-Instruct", "int4": true, "int8": false, "fp16": false, "apple": false, "pipeline_tag": "text-generation", "tags": ["completion","tools","embed"], "description": "LFM2.5 1.2B instruction-tuned language model from Liquid AI designed for edge deployment."},
{"model": "Qwen/Qwen3-1.7B", "int4": true, "int8": false, "fp16": false, "apple": false, "pipeline_tag": "text-generation", "tags": ["completion","tools","embed"], "description": "Qwen3 1.7B model supporting thinking and non-thinking modes for on-device reasoning."},
{"model": "Qwen/Qwen3.5-2B", "int4": true, "int8": false, "fp16": false, "apple": false, "pipeline_tag": "image-text-to-text", "tags": ["vision","completion","tools","embed"], "description": "Qwen3.5 2B hybrid vision-language model with DeltaNet for on-device multimodal inference."},
{"model": "tencent/Youtu-LLM-2B", "int4": true, "int8": false, "fp16": false, "apple": false, "pipeline_tag": "text-generation", "tags": ["completion","tools", "embed"], "description": "Youtu-LLM 1.96B model from Tencent with Dense MLA attention, 128k context, and native agentic capabilities excelling at coding, STEM, and reasoning."},
{"model": "LiquidAI/LFM2-2.6B", "int4": true, "int8": false, "fp16": false, "apple": false, "pipeline_tag": "text-generation", "tags": ["completion","tools","embed"], "description": "LFM2 2.6B hybrid language model from Liquid AI for on-device chat and embeddings."},
{"model": "LiquidAI/LFM2-VL-450M", "int4": true, "int8": true, "fp16": false, "apple": true, "pipeline_tag": "image-text-to-text", "tags": ["vision","text-embed","image-embed","apple-npu"], "description": "LFM2-VL 450M compact vision-language model from Liquid AI for on-device image understanding."},
{"model": "LiquidAI/LFM2.5-VL-450M", "int4": true, "int8": true, "fp16": false, "apple": true, "pipeline_tag": "image-text-to-text", "tags": ["vision","text-embed","image-embed","apple-npu"], "description": "LFM2.5-VL 450M refreshed compact vision-language model from Liquid AI for image and text understanding."},
{"model": "LiquidAI/LFM2.5-VL-1.6B", "int4": true, "int8": false, "fp16": false, "apple": true, "pipeline_tag": "image-text-to-text", "tags": ["vision","text-embed","image-embed","apple-npu"], "description": "LFM2.5-VL 1.6B vision-language model from Liquid AI for image and text understanding."},
{"model": "UsefulSensors/moonshine-base", "int4": true, "int8": true, "fp16": false, "apple": true, "pipeline_tag": "automatic-speech-recognition", "tags": ["transcription","speech-embed"], "description": "Moonshine Base 61M parameter English speech recognition model optimized for live transcription."},
{"model": "openai/whisper-tiny", "int4": true, "int8": true, "fp16": false, "apple": true, "pipeline_tag": "automatic-speech-recognition", "tags": ["transcription","speech-embed","apple-npu"], "description": "Whisper Tiny 39M parameter multilingual speech recognition model by OpenAI."},
{"model": "openai/whisper-base", "int4": true, "int8": true, "fp16": false, "apple": true, "pipeline_tag": "automatic-speech-recognition", "tags": ["transcription","speech-embed","apple-npu"], "description": "Whisper Base 74M parameter multilingual speech recognition model by OpenAI."},
{"model": "openai/whisper-small", "int4": true, "int8": true, "fp16": false, "apple": true, "pipeline_tag": "automatic-speech-recognition", "tags": ["transcription","speech-embed","apple-npu"], "description": "Whisper Small 244M parameter multilingual speech recognition model by OpenAI."},
{"model": "openai/whisper-medium", "int4": true, "int8": true, "fp16": false, "apple": true, "pipeline_tag": "automatic-speech-recognition", "tags": ["transcription","speech-embed","apple-npu"], "description": "Whisper Medium 769M parameter multilingual speech recognition model by OpenAI."},
{"model": "openai/whisper-large-v3", "int4": true, "int8": true, "fp16": false, "apple": true, "pipeline_tag": "automatic-speech-recognition", "tags": ["transcription","speech-embed","apple-npu"], "description": "Whisper Large v3 1.55B parameter multilingual speech recognition model by OpenAI with improved accuracy and 128 mel bins."},
{"model": "snakers4/silero-vad", "int4": true, "int8": true, "fp16": false, "apple": false, "pipeline_tag": "voice-activity-detection", "tags": ["vad"], "description": "Silero VAD tiny voice activity detection model supporting over 100 languages."},
{"model": "nomic-ai/nomic-embed-text-v2-moe", "int4": true, "int8": true, "fp16": false, "apple": false, "pipeline_tag": "feature-extraction", "tags": ["embed"], "description": "Nomic Embed Text v2 MoE 305M multilingual text embedding model using mixture-of-experts."},
{"model": "Qwen/Qwen3-Embedding-0.6B", "int4": true, "int8": true, "fp16": false, "apple": false, "pipeline_tag": "feature-extraction", "tags": ["embed"], "description": "Qwen3 0.6B text embedding model supporting 100+ languages with 1024-dimensional vectors."},
{"model": "nvidia/parakeet-ctc-0.6b", "int4": true, "int8": true, "fp16": false, "apple": true, "pipeline_tag": "automatic-speech-recognition", "tags": ["transcription","speech-embed","apple-npu"], "description": "Parakeet CTC 0.6b is a speech recognition model optimized for on-device performance and live transcription."},
{"model": "nvidia/parakeet-ctc-1.1b", "int4": true, "int8": false, "fp16": false, "apple": true, "pipeline_tag": "automatic-speech-recognition", "tags": ["transcription","speech-embed","apple-npu"], "description": "Parakeet CTC 1.1b is a speech recognition model optimized for on-device performance and live transcription."},
{"model": "nvidia/parakeet-tdt-0.6b-v3", "int4": true, "int8": true, "fp16": false, "apple": true, "pipeline_tag": "automatic-speech-recognition", "tags": ["transcription","speech-embed","apple-npu"], "description": "Parakeet TDT 0.6b is a speech recognition model optimized for on-device performance and live transcription."},
{"model": "LiquidAI/LFM2-8B-A1B", "int4": true, "int8": false, "fp16": false, "apple": false, "pipeline_tag": "text-generation", "tags": ["completion","tools","embed"], "description": "LFM2 8B MoE model with 1.5B active parameters for high-quality on-device inference."},
{"model": "pyannote/segmentation-3.0", "int4": true, "int8": true, "fp16": false, "apple": false, "pipeline_tag": "voice-activity-detection", "tags": ["diarization"], "description": "PyAnnote segmentation-3.0 speaker diarization model using SincNet frontend and BiLSTM layers."},
{"model": "pyannote/wespeaker-voxceleb-resnet34-LM", "int4": true, "int8": true, "fp16": false, "apple": false, "pipeline_tag": "speaker-recognition", "tags": ["speaker-embed"], "description": "WeSpeaker ResNet34-LM speaker embedding model producing 256-dimensional speaker representations."}
]