Skip to content

Supporting Velvet model #11716

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
67b38f5
Supporting Velvet model
fbuciuni90 Feb 6, 2025
07e1d0a
Update convert_hf_to_gguf.py
fbuciuni90 Feb 6, 2025
99be555
Update convert_hf_to_gguf.py
fbuciuni90 Feb 6, 2025
3df9d22
Update include/llama.h
fbuciuni90 Feb 6, 2025
52b0bb3
Update src/llama-chat.cpp
fbuciuni90 Feb 6, 2025
9d86a04
removing whitespaces in src/lla-a-chat.cpp
fbuciuni90 Feb 7, 2025
66e6d10
fixing position of LLM_CHAT_TEMPLATE_VELVET in enum
fbuciuni90 Feb 7, 2025
3979557
updating velvet chat template
fbuciuni90 Feb 7, 2025
0a8995a
adding test case for velvet chat template
fbuciuni90 Feb 7, 2025
e8981aa
upadated llama-vocab.cpp with velvet case
fbuciuni90 Feb 12, 2025
c4c923a
attempt to fix pretokenization regex
fbuciuni90 Feb 20, 2025
50bf79b
fixed pre tokenization regex
fbuciuni90 Feb 21, 2025
308ef21
fixed chat template
fbuciuni90 Feb 21, 2025
22257d8
Merge branch 'master' into master
fbuciuni90 Mar 20, 2025
8e6b29e
Merge branch 'master' into master
fbuciuni90 Mar 26, 2025
8c57157
Merge branch 'master' into master
fbuciuni90 Apr 11, 2025
a7c4895
Merge branch 'ggml-org:master' into master
fbuciuni90 Apr 11, 2025
bc15f90
fix typo in llama-vocab.cpp caused by merge
fbuciuni90 Apr 11, 2025
bd0ffeb
fix indentation tab issue
fbuciuni90 Apr 11, 2025
c516dbd
fix chat template test caused by sync
fbuciuni90 Apr 14, 2025
a78b983
small fix in chat template test
fbuciuni90 Apr 16, 2025
ac6206c
Merge branch 'master' into master
fbuciuni90 May 15, 2025
4e743d8
Merge branch 'ggml-org:master' into master
fbuciuni90 May 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
if chkhsh == "b3f499bb4255f8ca19fccd664443283318f2fd2414d5e0b040fbdd0cc195d6c5":
# ref: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
res = "deepseek-r1-qwen"
if chkhsh == "a3df2b8943e01cfd7d68c9f8446b294f3d8706d1d6853df65df7fda5d4fcb19f":
# ref: https://huggingface.co/Almawave/Velvet-14B
res = "velvet"
if chkhsh == "ccc2ef013c104be7bae2965776d611e1d7a8a2a9c547dd93a682c9a9fc80352e":
# ref: https://huggingface.co/Xenova/gpt-4o
res = "gpt-4o"
Expand Down
1 change: 1 addition & 0 deletions convert_hf_to_gguf_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ class TOKENIZER_TYPE(IntEnum):
{"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"},
{"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
{"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
{"name": "velvet", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Almawave/Velvet-14B"},
{"name": "gpt-4o", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Xenova/gpt-4o", },
{"name": "superbpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k", },
{"name": "trillion", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/trillionlabs/Trillion-7B-preview", },
Expand Down
1 change: 1 addition & 0 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ extern "C" {
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
LLAMA_VOCAB_PRE_TYPE_VELVET = 36,
};

enum llama_rope_type {
Expand Down
28 changes: 28 additions & 0 deletions src/llama-chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
{ "granite", LLM_CHAT_TEMPLATE_GRANITE },
{ "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
{ "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
{ "velvet", LLM_CHAT_TEMPLATE_VELVET },
{ "yandex", LLM_CHAT_TEMPLATE_YANDEX },
{ "bailing", LLM_CHAT_TEMPLATE_BAILING },
{ "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
Expand Down Expand Up @@ -177,6 +178,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
return LLM_CHAT_TEMPLATE_GIGACHAT;
} else if (tmpl_contains("<|role_start|>")) {
return LLM_CHAT_TEMPLATE_MEGREZ;
} else if (tmpl_contains("<instruction>")) {
return LLM_CHAT_TEMPLATE_VELVET;
} else if (tmpl_contains(" Ассистент:")) {
return LLM_CHAT_TEMPLATE_YANDEX;
} else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
Expand Down Expand Up @@ -583,6 +586,31 @@ int32_t llm_chat_apply_template(
if (add_ass) {
ss << "<|role_start|>assistant<|role_end|>";
}
} else if (tmpl == LLM_CHAT_TEMPLATE_VELVET) {
// Velvet template
std::string leading_space = "";
std::string trailing_space = "";
bool trim_assistant_message = false;
bool is_inside_turn = false;
std::string system_message = "";
std::string last_message(chat.back()->content);
ss << "<s>";
for (auto message : chat) {
if (!is_inside_turn) {
ss << leading_space << "<instruction>" << trailing_space;
is_inside_turn = true;
}
std::string role(message->role);
std::string content(message->content);
if (role == "system" && content != " ") {
system_message = content + "\n\n";
} else if (role == "user") {
ss << (content==last_message ? system_message : "") << content << leading_space << "</instruction>";
} else {
ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
is_inside_turn = false;
}
}
} else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
// Yandex template ("\n\n" is defined as EOT token)

Expand Down
1 change: 1 addition & 0 deletions src/llama-chat.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ enum llm_chat_template {
LLM_CHAT_TEMPLATE_GRANITE,
LLM_CHAT_TEMPLATE_GIGACHAT,
LLM_CHAT_TEMPLATE_MEGREZ,
LLM_CHAT_TEMPLATE_VELVET,
LLM_CHAT_TEMPLATE_YANDEX,
LLM_CHAT_TEMPLATE_BAILING,
LLM_CHAT_TEMPLATE_LLAMA4,
Expand Down
10 changes: 10 additions & 0 deletions src/llama-vocab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
};
break;
case LLAMA_VOCAB_PRE_TYPE_VELVET:
// original regex from tokenizer.json
//"[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*|\\p{N}|[\\p{P}\\p{S}]{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
regex_exprs = {
"[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*|\\p{N}|[\\p{P}\\p{S}]{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
};
break;
case LLAMA_VOCAB_PRE_TYPE_GPT4O:
regex_exprs = {
// original regex from tokenizer.json
Expand Down Expand Up @@ -1629,6 +1636,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
} else if (
tokenizer_pre == "megrez") {
pre_type = LLAMA_VOCAB_PRE_TYPE_QWEN2;
} else if (
tokenizer_pre == "velvet") {
pre_type = LLAMA_VOCAB_PRE_TYPE_VELVET;
} else if (
tokenizer_pre == "gpt-4o" ||
tokenizer_pre == "llama4") {
Expand Down
8 changes: 8 additions & 0 deletions tests/test-chat-template.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,14 @@ int main(void) {
/* .bos_token= */ "",
/* .eos_token= */ "",
},
{
/* .name= */ "velvet",
/* .template_str= */ "{{ bos_token }}{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif %}\n {%- set ns.index = ns.index + 1 %}\n {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if message[\"role\"] == \"user\" %}\n {%- if message == user_messages[-1] and system_message is defined and tools is not none %}\n {{- \"<instruction>\" + system_message + \"\\n\\n\" + \"<list_tools>[\" }}\n {%- for tool in tools %}\n{{ tool }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %}\n {{- \"</list_tools>\" + \"\\n\\n\" + message[\"content\"] + \"</instruction>\" }}\n {%- elif message == user_messages[-1] and system_message is defined and tools is none %}\n {{- \"<instruction>\" + system_message + \"\\n\\n\" + message[\"content\"] + \"</instruction>\" }}\n {%- else %}\n {{- \"<instruction>\" + message[\"content\"] + \"</instruction>\" }}\n {%- endif %}\n {%- elif (message.tool_calls is defined and message.tool_calls is not none) %}\n {{- \"<tool_call>[\" }}\n {%- for tool_call in message.tool_calls %}\n{{ tool_call }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %} {{- \"</tool_call>\" + eos_token }}\n {%- elif message[\"role\"] == \"assistant\" %}\n {{- message[\"content\"] + eos_token}}\n {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n {%- if message.content is defined and message.content.content is defined %}\n {%- set content = message.content.content %}\n {%- else %}\n {%- set content = message.content %}\n {%- endif %}\n {{- '<tool_result>{\"content\": ' + content|string }}\n {{- '}</tool_result>' }}\n {%- else %}\n {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n {%- endif %}\n{%- endfor %}\n",
/* .expected_output= */ "<s><instruction>Hello</instruction>Hi there</s><instruction>Who are you</instruction> I am an assistant </s><instruction>You are a helpful assistant\n\nAnother question</instruction>",
/* .expected_output_jinja= */ "<s><instruction>Hello</instruction>Hi there</s><instruction>Who are you</instruction> I am an assistant </s><instruction>You are a helpful assistant\n\nAnother question</instruction>",
/* .bos_token= */ "<s>",
/* .eos_token= */ "</s>",
},
{
/* .name= */ "yandex/YandexGPT-5-Lite-8B-instruct",
/* .template_str= */ "<s>{%- set names = {'assistant': ' Ассистент:', 'user': ' Пользователь:'} %}\n{%- set tools_prefix = 'Тебе доступны следующие функции:' %}\n{%- macro __render_tool(tool) %}\n {%- set name = tool.function.name %}\n {%- set description = tool.function.description|default('') %}\n {%- set parameters = tool.function.parameters|tojson %}\n {{- '\\n' }}function {{ '{' }}'name':'{{ name }}',\n {%- if tool.function.description %}'description':'{{ description }}',{% endif %}\n'parameters':{{ parameters }}\n {{- '}' }}\n{%- endmacro %}\n{%- macro __render_tools(tools) %}\n {{- tools_prefix }}\n {%- for tool in tools %}\n {{- __render_tool(tool) }}\n {%- endfor %}\n {{- '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_tool_message(message) %}\n {{- '\\n\\nРезультат вызова' }} {{ message.name }}: {{ message.content }} {{ '\\n\\n' }}\n{%- endmacro %}\n{%- if tools -%}\n {{- __render_tools(tools) }}\n{%- endif -%}\n{%- macro __render_user_message(message) %}\n{{ names.user }} {{ message.content + '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_assistant_message(message) %}\n {{- names.assistant }}\n {%- set call = message['function_call'] %}\n {%- if call %}\n {{- '\\n[TOOL_CALL_START]' }}{{ call.name }}{{ '\\n' }}{{ call.arguments|tojson }}\n {%- else %}\n {{- ' ' + message.content + '\\n\\n' }}\n {%- endif %}\n{%- endmacro %}\n{%- if not add_generation_prompt is defined %}\n{%- set add_generation_prompt = false %}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'user' %}\n {{- __render_user_message(message) }}\n {%- endif %}\n {%- if message.role == 'assistant' and not loop.last %}\n {{- __render_assistant_message(message) }}\n {%- endif %}\n {%- if message.role == 'tool' %}\n {{- __render_tool_message(message) }}\n {%- endif %}\n {%- if loop.last %}\n {{- ' Ассистент:[SEP]' }}\n {%- endif %}\n{%- endfor %}\n",
Expand Down