@@ -126,6 +126,8 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
126126typedef minja::chat_template common_chat_template;
127127
128128struct common_chat_templates {
129+ bool add_bos;
130+ bool add_eos;
129131 bool has_explicit_template; // Model had builtin template or template overridde was specified.
130132 std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
131133 std::unique_ptr<common_chat_template> template_tool_use;
@@ -143,6 +145,8 @@ struct templates_params {
143145 bool enable_thinking = true ;
144146 std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
145147 json extra_context;
148+ bool add_bos;
149+ bool add_eos;
146150};
147151
148152common_chat_tool_choice common_chat_tool_choice_parse_oaicompat (const std::string & tool_choice) {
@@ -445,6 +449,8 @@ std::string common_chat_format_single(
445449
446450 common_chat_templates_inputs inputs;
447451 inputs.use_jinja = use_jinja;
452+ inputs.add_bos = tmpls->add_bos ;
453+ inputs.add_eos = tmpls->add_eos ;
448454
449455 std::string fmt_past_msg;
450456 if (!past_msg.empty ()) {
@@ -469,6 +475,8 @@ std::string common_chat_format_single(
469475std::string common_chat_format_example (const struct common_chat_templates * tmpls, bool use_jinja) {
470476 common_chat_templates_inputs inputs;
471477 inputs.use_jinja = use_jinja;
478+ inputs.add_bos = tmpls->add_bos ;
479+ inputs.add_eos = tmpls->add_eos ;
472480 auto add_simple_msg = [&](auto role, auto content) {
473481 common_chat_msg msg;
474482 msg.role = role;
@@ -546,6 +554,8 @@ common_chat_templates_ptr common_chat_templates_init(
546554 }
547555 std::string token_bos = bos_token_override;
548556 std::string token_eos = eos_token_override;
557+ bool add_bos = false ;
558+ bool add_eos = false ;
549559 if (model) {
550560 const auto * vocab = llama_model_get_vocab (model);
551561 const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) {
@@ -560,9 +570,13 @@ common_chat_templates_ptr common_chat_templates_init(
560570 };
561571 token_bos = get_token (llama_vocab_bos (vocab), " BOS" , " bos_token" );
562572 token_eos = get_token (llama_vocab_eos (vocab), " EOS" , " eos_token" );
573+ add_bos = llama_vocab_get_add_bos (vocab);
574+ add_eos = llama_vocab_get_add_eos (vocab);
563575 }
564576 common_chat_templates_ptr tmpls (new common_chat_templates ());
565577 tmpls->has_explicit_template = has_explicit_template;
578+ tmpls->add_bos = add_bos;
579+ tmpls->add_eos = add_eos;
566580 try {
567581 tmpls->template_default = std::make_unique<minja::chat_template>(default_template_src, token_bos, token_eos);
568582 } catch (const std::exception & e) {
@@ -592,6 +606,7 @@ const char * common_chat_format_name(common_chat_format format) {
592606 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return " Functionary v3.1 Llama 3.1" ;
593607 case COMMON_CHAT_FORMAT_HERMES_2_PRO: return " Hermes 2 Pro" ;
594608 case COMMON_CHAT_FORMAT_COMMAND_R7B: return " Command R7B" ;
609+ case COMMON_CHAT_FORMAT_GPT_OSS: return " GPT-OSS" ;
595610 default :
596611 throw std::runtime_error (" Unknown chat format" );
597612 }
@@ -600,6 +615,7 @@ const char * common_chat_format_name(common_chat_format format) {
600615const char * common_reasoning_format_name (common_reasoning_format format) {
601616 switch (format) {
602617 case COMMON_REASONING_FORMAT_NONE: return " none" ;
618+ case COMMON_REASONING_FORMAT_AUTO: return " auto" ;
603619 case COMMON_REASONING_FORMAT_DEEPSEEK: return " deepseek" ;
604620 case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return " deepseek-legacy" ;
605621 default :
@@ -748,10 +764,10 @@ static std::string apply(
748764 // instead of using `chat_template_options.use_bos_token = false`, since these tokens
749765 // may be needed inside the template / between messages too.
750766 auto result = tmpl.apply (tmpl_inputs, tmpl_opts);
751- if (string_starts_with (result, tmpl.bos_token ())) {
767+ if (inputs. add_bos && string_starts_with (result, tmpl.bos_token ())) {
752768 result = result.substr (tmpl.bos_token ().size ());
753769 }
754- if (string_ends_with (result, tmpl.eos_token ())) {
770+ if (inputs. add_eos && string_ends_with (result, tmpl.eos_token ())) {
755771 result = result.substr (0 , result.size () - tmpl.eos_token ().size ());
756772 }
757773 return result;
@@ -1289,6 +1305,26 @@ static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
12891305 tool_calls_end);
12901306}
12911307
1308+ static common_chat_params common_chat_params_init_gpt_oss (const common_chat_template & tmpl, const struct templates_params & inputs) {
1309+ common_chat_params data;
1310+ auto prompt = apply (tmpl, inputs);
1311+
1312+ data.prompt = prompt;
1313+ data.format = COMMON_CHAT_FORMAT_GPT_OSS;
1314+
1315+ // TODO: support tool calls in GPT-OSS?
1316+
1317+ return data;
1318+ }
1319+ static void common_chat_parse_gpt_oss (common_chat_msg_parser & builder) {
1320+ // TODO @ngxson : this won't work with --special enabled, we should fix that
1321+ builder.try_parse_reasoning (" <|channel|>analysis<|message|>" , " <|start|>assistant<|channel|>final<|message|>" );
1322+ if (!builder.syntax ().parse_tool_calls ) {
1323+ builder.add_content (builder.consume_rest ());
1324+ return ;
1325+ }
1326+ }
1327+
12921328static common_chat_params common_chat_params_init_firefunction_v2 (const common_chat_template & tmpl, const struct templates_params & inputs) {
12931329 LOG_DBG (" %s\n " , __func__);
12941330 common_chat_params data;
@@ -1731,6 +1767,8 @@ static common_chat_params common_chat_templates_apply_jinja(
17311767 params.enable_thinking = inputs.enable_thinking ;
17321768 params.grammar = inputs.grammar ;
17331769 params.now = inputs.now ;
1770+ params.add_bos = inputs.add_bos ;
1771+ params.add_eos = inputs.add_eos ;
17341772
17351773 params.extra_context = json::object ();
17361774 for (auto el : inputs.chat_template_kwargs ) {
@@ -1772,6 +1810,11 @@ static common_chat_params common_chat_templates_apply_jinja(
17721810 return common_chat_params_init_hermes_2_pro (tmpl, params);
17731811 }
17741812
1813+ // GPT-OSS
1814+ if (src.find (" <|channel|>" ) != std::string::npos && params.json_schema .is_null ()) {
1815+ return common_chat_params_init_gpt_oss (tmpl, params);
1816+ }
1817+
17751818 // Use generic handler when mixing tools + JSON schema.
17761819 // TODO: support that mix in handlers below.
17771820 if ((params.tools .is_array () && params.json_schema .is_object ())) {
@@ -1923,6 +1966,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
19231966 case COMMON_CHAT_FORMAT_COMMAND_R7B:
19241967 common_chat_parse_command_r7b (builder);
19251968 break ;
1969+ case COMMON_CHAT_FORMAT_GPT_OSS:
1970+ common_chat_parse_gpt_oss (builder);
1971+ break ;
19261972 default :
19271973 throw std::runtime_error (std::string (" Unsupported format: " ) + common_chat_format_name (builder.syntax ().format ));
19281974 }
0 commit comments