diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index b5402cbead880..d72e73462fbe2 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -777,6 +777,9 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "b3f499bb4255f8ca19fccd664443283318f2fd2414d5e0b040fbdd0cc195d6c5": # ref: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B res = "deepseek-r1-qwen" + if chkhsh == "a3df2b8943e01cfd7d68c9f8446b294f3d8706d1d6853df65df7fda5d4fcb19f": + # ref: https://huggingface.co/Almawave/Velvet-14B + res = "velvet" if chkhsh == "ccc2ef013c104be7bae2965776d611e1d7a8a2a9c547dd93a682c9a9fc80352e": # ref: https://huggingface.co/Xenova/gpt-4o res = "gpt-4o" diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 5993a4c9836b5..6cb6a1e86a433 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -109,6 +109,7 @@ class TOKENIZER_TYPE(IntEnum): {"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"}, {"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"}, {"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"}, + {"name": "velvet", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Almawave/Velvet-14B"}, {"name": "gpt-4o", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Xenova/gpt-4o", }, {"name": "superbpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k", }, {"name": "trillion", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/trillionlabs/Trillion-7B-preview", }, diff --git a/include/llama.h b/include/llama.h index 99e5fba244fcc..99934ef58f667 100644 --- a/include/llama.h +++ b/include/llama.h @@ -114,6 +114,7 @@ extern "C" { LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33, LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34, LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35, + LLAMA_VOCAB_PRE_TYPE_VELVET = 36, }; enum llama_rope_type { diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp index d12743e6b9a0c..fb5ad555b44f5 100644 --- a/src/llama-chat.cpp +++ b/src/llama-chat.cpp @@ -60,6 +60,7 @@ static const std::map LLM_CHAT_TEMPLATES = { { "granite", LLM_CHAT_TEMPLATE_GRANITE }, { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT }, { "megrez", LLM_CHAT_TEMPLATE_MEGREZ }, + { "velvet", LLM_CHAT_TEMPLATE_VELVET }, { "yandex", LLM_CHAT_TEMPLATE_YANDEX }, { "bailing", LLM_CHAT_TEMPLATE_BAILING }, { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 }, @@ -177,6 +178,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) { return LLM_CHAT_TEMPLATE_GIGACHAT; } else if (tmpl_contains("<|role_start|>")) { return LLM_CHAT_TEMPLATE_MEGREZ; + } else if (tmpl_contains("")) { + return LLM_CHAT_TEMPLATE_VELVET; } else if (tmpl_contains(" Ассистент:")) { return LLM_CHAT_TEMPLATE_YANDEX; } else if (tmpl_contains("ASSISTANT") && tmpl_contains("'HUMAN'")) { @@ -583,6 +586,31 @@ int32_t llm_chat_apply_template( if (add_ass) { ss << "<|role_start|>assistant<|role_end|>"; } + } else if (tmpl == LLM_CHAT_TEMPLATE_VELVET) { + // Velvet template + std::string leading_space = ""; + std::string trailing_space = ""; + bool trim_assistant_message = false; + bool is_inside_turn = false; + std::string system_message = ""; + std::string last_message(chat.back()->content); + ss << ""; + for (auto message : chat) { + if (!is_inside_turn) { + ss << leading_space << "" << trailing_space; + is_inside_turn = true; + } + std::string role(message->role); + std::string content(message->content); + if (role == "system" && content != " ") { + system_message = content + "\n\n"; + } else if (role == "user") { + ss << (content==last_message ? system_message : "") << content << leading_space << ""; + } else { + ss << trailing_space << (trim_assistant_message ? trim(content) : content) << ""; + is_inside_turn = false; + } + } } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) { // Yandex template ("\n\n" is defined as EOT token) diff --git a/src/llama-chat.h b/src/llama-chat.h index db24ade21e2ad..5ec123fb11dde 100644 --- a/src/llama-chat.h +++ b/src/llama-chat.h @@ -39,6 +39,7 @@ enum llm_chat_template { LLM_CHAT_TEMPLATE_GRANITE, LLM_CHAT_TEMPLATE_GIGACHAT, LLM_CHAT_TEMPLATE_MEGREZ, + LLM_CHAT_TEMPLATE_VELVET, LLM_CHAT_TEMPLATE_YANDEX, LLM_CHAT_TEMPLATE_BAILING, LLM_CHAT_TEMPLATE_LLAMA4, diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 9389ca805a584..0974e12051da8 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -396,6 +396,13 @@ struct llm_tokenizer_bpe : llm_tokenizer { "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)", }; break; + case LLAMA_VOCAB_PRE_TYPE_VELVET: + // original regex from tokenizer.json + //"[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*|\\p{N}|[\\p{P}\\p{S}]{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + regex_exprs = { + "[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*|\\p{N}|[\\p{P}\\p{S}]{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }; + break; case LLAMA_VOCAB_PRE_TYPE_GPT4O: regex_exprs = { // original regex from tokenizer.json @@ -1629,6 +1636,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { } else if ( tokenizer_pre == "megrez") { pre_type = LLAMA_VOCAB_PRE_TYPE_QWEN2; + } else if ( + tokenizer_pre == "velvet") { + pre_type = LLAMA_VOCAB_PRE_TYPE_VELVET; } else if ( tokenizer_pre == "gpt-4o" || tokenizer_pre == "llama4") { diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index a0a50f9881fe0..b7645efcbc661 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -274,6 +274,14 @@ int main(void) { /* .bos_token= */ "", /* .eos_token= */ "", }, + { + /* .name= */ "velvet", + /* .template_str= */ "{{ bos_token }}{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif %}\n {%- set ns.index = ns.index + 1 %}\n {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if message[\"role\"] == \"user\" %}\n {%- if message == user_messages[-1] and system_message is defined and tools is not none %}\n {{- \"\" + system_message + \"\\n\\n\" + \"[\" }}\n {%- for tool in tools %}\n{{ tool }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %}\n {{- \"\" + \"\\n\\n\" + message[\"content\"] + \"\" }}\n {%- elif message == user_messages[-1] and system_message is defined and tools is none %}\n {{- \"\" + system_message + \"\\n\\n\" + message[\"content\"] + \"\" }}\n {%- else %}\n {{- \"\" + message[\"content\"] + \"\" }}\n {%- endif %}\n {%- elif (message.tool_calls is defined and message.tool_calls is not none) %}\n {{- \"[\" }}\n {%- for tool_call in message.tool_calls %}\n{{ tool_call }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %} {{- \"\" + eos_token }}\n {%- elif message[\"role\"] == \"assistant\" %}\n {{- message[\"content\"] + eos_token}}\n {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n {%- if message.content is defined and message.content.content is defined %}\n {%- set content = message.content.content %}\n {%- else %}\n {%- set content = message.content %}\n {%- endif %}\n {{- '{\"content\": ' + content|string }}\n {{- '}' }}\n {%- else %}\n {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n {%- endif %}\n{%- endfor %}\n", + /* .expected_output= */ "HelloHi thereWho are you I am an assistant You are a helpful assistant\n\nAnother question", + /* .expected_output_jinja= */ "HelloHi thereWho are you I am an assistant You are a helpful assistant\n\nAnother question", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, { /* .name= */ "yandex/YandexGPT-5-Lite-8B-instruct", /* .template_str= */ "{%- set names = {'assistant': ' Ассистент:', 'user': ' Пользователь:'} %}\n{%- set tools_prefix = 'Тебе доступны следующие функции:' %}\n{%- macro __render_tool(tool) %}\n {%- set name = tool.function.name %}\n {%- set description = tool.function.description|default('') %}\n {%- set parameters = tool.function.parameters|tojson %}\n {{- '\\n' }}function {{ '{' }}'name':'{{ name }}',\n {%- if tool.function.description %}'description':'{{ description }}',{% endif %}\n'parameters':{{ parameters }}\n {{- '}' }}\n{%- endmacro %}\n{%- macro __render_tools(tools) %}\n {{- tools_prefix }}\n {%- for tool in tools %}\n {{- __render_tool(tool) }}\n {%- endfor %}\n {{- '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_tool_message(message) %}\n {{- '\\n\\nРезультат вызова' }} {{ message.name }}: {{ message.content }} {{ '\\n\\n' }}\n{%- endmacro %}\n{%- if tools -%}\n {{- __render_tools(tools) }}\n{%- endif -%}\n{%- macro __render_user_message(message) %}\n{{ names.user }} {{ message.content + '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_assistant_message(message) %}\n {{- names.assistant }}\n {%- set call = message['function_call'] %}\n {%- if call %}\n {{- '\\n[TOOL_CALL_START]' }}{{ call.name }}{{ '\\n' }}{{ call.arguments|tojson }}\n {%- else %}\n {{- ' ' + message.content + '\\n\\n' }}\n {%- endif %}\n{%- endmacro %}\n{%- if not add_generation_prompt is defined %}\n{%- set add_generation_prompt = false %}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'user' %}\n {{- __render_user_message(message) }}\n {%- endif %}\n {%- if message.role == 'assistant' and not loop.last %}\n {{- __render_assistant_message(message) }}\n {%- endif %}\n {%- if message.role == 'tool' %}\n {{- __render_tool_message(message) }}\n {%- endif %}\n {%- if loop.last %}\n {{- ' Ассистент:[SEP]' }}\n {%- endif %}\n{%- endfor %}\n",