Skip to content

Commit 02010ec

Browse files
Merge pull request #271 from menloresearch/update-dev-from-master-2025-10-03-00-32
Sync master with upstream release b6673
2 parents 6928d0d + d64c810 commit 02010ec

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1742
-92
lines changed

.devops/intel.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
ARG ONEAPI_VERSION=2025.1.1-0-devel-ubuntu24.04
1+
ARG ONEAPI_VERSION=2025.2.2-0-devel-ubuntu24.04
22

33
## Build Image
44

5-
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
5+
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build
66

77
ARG GGML_SYCL_F16=OFF
88
RUN apt-get update && \
@@ -31,7 +31,7 @@ RUN mkdir -p /app/full \
3131
&& cp requirements.txt /app/full \
3232
&& cp .devops/tools.sh /app/full/tools.sh
3333

34-
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
34+
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS base
3535

3636
RUN apt-get update \
3737
&& apt-get install -y libgomp1 curl\

.devops/rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1616
# gfx803, gfx900, gfx906, gfx1032, gfx1101, gfx1102,not officialy supported
1717
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
1818

19-
ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
19+
ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
2020
#ARG ROCM_DOCKER_ARCH='gfx1151'
2121

2222
# Set ROCm architectures

.github/workflows/build.yml

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -362,11 +362,11 @@ jobs:
362362
id: checkout
363363
uses: actions/checkout@v4
364364

365-
- name: ccache
366-
uses: ggml-org/[email protected]
367-
with:
368-
key: ubuntu-latest-cmake-rpc
369-
evict-old-files: 1d
365+
# - name: ccache
366+
# uses: ggml-org/[email protected]
367+
# with:
368+
# key: ubuntu-latest-cmake-rpc
369+
# evict-old-files: 1d
370370

371371
- name: Dependencies
372372
id: depends
@@ -387,8 +387,8 @@ jobs:
387387
cd build
388388
ctest -L main --verbose
389389
390-
ubuntu-22-cmake-vulkan:
391-
runs-on: ubuntu-22.04
390+
ubuntu-24-cmake-vulkan:
391+
runs-on: ubuntu-24.04
392392

393393
steps:
394394
- name: Clone
@@ -398,20 +398,40 @@ jobs:
398398
- name: ccache
399399
uses: ggml-org/[email protected]
400400
with:
401-
key: ubuntu-22-cmake-vulkan
401+
key: ubuntu-24-cmake-vulkan
402402
evict-old-files: 1d
403403

404404
- name: Dependencies
405405
id: depends
406406
run: |
407-
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
408-
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
407+
sudo add-apt-repository -y ppa:kisak/kisak-mesa
409408
sudo apt-get update -y
410-
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev
409+
sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libcurl4-openssl-dev
410+
411+
- name: Get latest Vulkan SDK version
412+
id: vulkan_sdk_version
413+
run: |
414+
echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"
415+
416+
- name: Cache Vulkan SDK
417+
id: cache_vulkan_sdk
418+
uses: actions/cache@v4
419+
with:
420+
path: ./vulkan_sdk
421+
key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }}
422+
423+
- name: Install Vulkan SDK
424+
if: steps.cache_vulkan_sdk.outputs.cache-hit != 'true'
425+
id: vulkan_sdk_install
426+
run: |
427+
mkdir -p vulkan_sdk
428+
cd vulkan_sdk
429+
curl --no-progress-meter https://sdk.lunarg.com/sdk/download/latest/linux/vulkan_sdk.tar.xz | tar -Jx --strip-components=1
411430
412431
- name: Build
413432
id: cmake_build
414433
run: |
434+
source ./vulkan_sdk/setup-env.sh
415435
cmake -B build \
416436
-DGGML_VULKAN=ON
417437
cmake --build build --config Release -j $(nproc)
@@ -421,6 +441,7 @@ jobs:
421441
run: |
422442
cd build
423443
export GGML_VK_VISIBLE_DEVICES=0
444+
export GGML_VK_DISABLE_F16=1
424445
# This is using llvmpipe and runs slower than other backends
425446
ctest -L main --verbose --timeout 4200
426447
@@ -1059,7 +1080,7 @@ jobs:
10591080
shell: bash
10601081

10611082
env:
1062-
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe
1083+
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/24751ead-ddc5-4479-b9e6-f9fe2ff8b9f2/intel-deep-learning-essentials-2025.2.1.25_offline.exe
10631084
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
10641085
ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
10651086
steps:

.github/workflows/release.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ jobs:
462462
shell: bash
463463

464464
env:
465-
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe
465+
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/24751ead-ddc5-4479-b9e6-f9fe2ff8b9f2/intel-deep-learning-essentials-2025.2.1.25_offline.exe
466466
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
467467
ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
468468

@@ -505,6 +505,7 @@ jobs:
505505
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
506506
507507
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
508+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero_v2.dll" ./build/bin
508509
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
509510
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
510511
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
@@ -513,10 +514,15 @@ jobs:
513514
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
514515
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
515516
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
517+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl-ls.exe" ./build/bin
516518
517519
cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
518520
cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
519521
522+
cp "${{ env.ONEAPI_ROOT }}/tcm/latest/bin/tcm.dll" ./build/bin
523+
cp "${{ env.ONEAPI_ROOT }}/tcm/latest/bin/libhwloc-15.dll" ./build/bin
524+
cp "${{ env.ONEAPI_ROOT }}/umf/latest/bin/umf.dll" ./build/bin
525+
520526
echo "cp oneAPI running time dll files to ./build/bin done"
521527
7z a llama-bin-win-sycl-x64.zip ./build/bin/*
522528

CODEOWNERS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@
5959
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
6060
/ggml/src/ggml-cuda/mmvf.* @JohannesGaessler
6161
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
62+
/ggml/src/ggml-cuda/fattn-wmma* @IMbackK
63+
/ggml/src/ggml-hip/ @IMbackK
64+
/ggml/src/ggml-cuda/vendors/hip.h @IMbackK
6265
/ggml/src/ggml-impl.h @ggerganov @slaren
6366
/ggml/src/ggml-metal/ @ggerganov
6467
/ggml/src/ggml-opencl/ @lhez @max-krasnyansky

ci/run.sh

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ mkdir -p "$2"
3434
OUT=$(realpath "$1")
3535
MNT=$(realpath "$2")
3636

37-
rm -f "$OUT/*.log"
38-
rm -f "$OUT/*.exit"
39-
rm -f "$OUT/*.md"
37+
rm -f $OUT/*.log
38+
rm -f $OUT/*.exit
39+
rm -f $OUT/*.md
4040

4141
sd=`dirname $0`
4242
cd $sd/../
@@ -607,6 +607,7 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
607607
fi
608608

609609
ret=0
610+
610611
test $ret -eq 0 && gg_run ctest_debug
611612
test $ret -eq 0 && gg_run ctest_release
612613

@@ -624,4 +625,6 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
624625
test $ret -eq 0 && gg_run ctest_with_model_release
625626
fi
626627

628+
cat $OUT/README.md
629+
627630
exit $ret

common/chat-parser.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,35 @@ bool common_chat_msg_parser::add_tool_calls(const json & arr) {
7575
}
7676
return true;
7777
}
78+
79+
bool common_chat_msg_parser::add_tool_call_short_form(const json & tool_call) {
80+
if (!tool_call.is_object() || tool_call.size() != 1) {
81+
return false;
82+
}
83+
84+
// Get the tool name (the single key in the object)
85+
auto it = tool_call.begin();
86+
std::string name = it.key();
87+
88+
if (name.empty()) {
89+
return false;
90+
}
91+
92+
// Get the arguments (the nested object)
93+
const json & args_json = it.value();
94+
std::string arguments = "";
95+
96+
if (args_json.is_object()) {
97+
arguments = args_json.dump();
98+
} else if (args_json.is_string()) {
99+
arguments = args_json;
100+
} else if (!args_json.is_null()) {
101+
// For other types, convert to string representation
102+
arguments = args_json.dump();
103+
}
104+
105+
return add_tool_call(name, "", arguments);
106+
}
78107
void common_chat_msg_parser::finish() {
79108
if (!is_partial_ && pos_ != input_.size()) {
80109
throw std::runtime_error("Unexpected content at end of input");// + input_.substr(pos_));

common/chat-parser.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ class common_chat_msg_parser {
6464
// Adds an array of tool calls using their "name", "id" and "arguments" fields.
6565
bool add_tool_calls(const nlohmann::ordered_json & arr);
6666

67+
// Adds a tool call using the short form: { "tool_name": { "arg1": val, "arg2": val } }
68+
bool add_tool_call_short_form(const nlohmann::ordered_json & tool_call);
69+
6770
void finish();
6871

6972
bool consume_spaces();

common/chat.cpp

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,7 @@ const char * common_chat_format_name(common_chat_format format) {
638638
case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS";
639639
case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";
640640
case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
641+
case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
641642
default:
642643
throw std::runtime_error("Unknown chat format");
643644
}
@@ -801,6 +802,7 @@ static std::string apply(
801802
}
802803
tmpl_inputs.add_generation_prompt = inputs.add_generation_prompt;
803804
tmpl_inputs.extra_context = inputs.extra_context;
805+
tmpl_inputs.extra_context["enable_thinking"] = inputs.enable_thinking;
804806
if (additional_context) {
805807
tmpl_inputs.extra_context.merge_patch(*additional_context);
806808
}
@@ -1264,6 +1266,75 @@ static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_
12641266
}
12651267
return data;
12661268
}
1269+
1270+
static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) {
1271+
common_chat_params data;
1272+
1273+
// Generate the prompt using the apply() function with the template
1274+
data.prompt = apply(tmpl, inputs);
1275+
data.format = COMMON_CHAT_FORMAT_APERTUS;
1276+
1277+
// Handle thinking tags appropriately based on inputs.enable_thinking
1278+
if (string_ends_with(data.prompt, "<|inner_prefix|>")) {
1279+
if (!inputs.enable_thinking) {
1280+
data.prompt += "<|inner_suffix|>";
1281+
} else {
1282+
data.thinking_forced_open = true;
1283+
}
1284+
}
1285+
1286+
// When tools are present, build grammar for the <|tools_prefix|> format
1287+
if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
1288+
data.grammar_lazy = true;
1289+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1290+
auto schemas = json::array();
1291+
foreach_function(inputs.tools, [&](const json & tool) {
1292+
const auto & function = tool.at("function");
1293+
schemas.push_back({
1294+
{ "type", "object" },
1295+
{ "properties",
1296+
{
1297+
{ function.at("name"), function.at("parameters") }
1298+
} },
1299+
{ "required", json::array({ function.at("name") }) },
1300+
});
1301+
});
1302+
auto schema = json{
1303+
{ "type", "array" },
1304+
{ "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
1305+
{ "minItems", 1 },
1306+
};
1307+
if (!inputs.parallel_tool_calls) {
1308+
schema["maxItems"] = 1;
1309+
}
1310+
builder.add_rule("root",
1311+
std::string(data.thinking_forced_open ? "( \"<|inner_suffix|>\" space )? " : "") +
1312+
"\"<|tools_prefix|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tools_suffix|>\"");
1313+
});
1314+
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1315+
// If thinking_forced_open, then we capture the <|inner_suffix|> tag in the grammar,
1316+
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1317+
std::string(data.thinking_forced_open ?
1318+
"[\\s\\S]*?(<\\|inner_suffix\\|>\\s*)" :
1319+
"(?:<\\|inner_prefix\\|>[\\s\\S]*?<\\|inner_suffix\\|>\\s*)?") +
1320+
"(<\\|tools_prefix\\|>)[\\s\\S]*" });
1321+
data.preserved_tokens = {
1322+
"<|system_start|>",
1323+
"<|system_end|>",
1324+
"<|developer_start|>",
1325+
"<|developer_end|>",
1326+
"<|user_start|>",
1327+
"<|user_end|>",
1328+
"<|assistant_start|>",
1329+
"<|assistant_end|>",
1330+
"<|inner_prefix|>",
1331+
"<|inner_suffix|>",
1332+
"<|tools_prefix|>",
1333+
"<|tools_suffix|>",
1334+
};
1335+
}
1336+
return data;
1337+
}
12671338
static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
12681339
if (!builder.syntax().parse_tool_calls) {
12691340
builder.add_content(builder.consume_rest());
@@ -2323,6 +2394,37 @@ static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
23232394
builder.add_content(builder.consume_rest());
23242395
}
23252396

2397+
static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
2398+
// Parse thinking tags
2399+
builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
2400+
if (!builder.syntax().parse_tool_calls) {
2401+
builder.add_content(builder.consume_rest());
2402+
return;
2403+
}
2404+
2405+
// Look for tool calls
2406+
static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>"));
2407+
if (auto res = builder.try_find_regex(tool_call_regex)) {
2408+
builder.move_to(res->groups[0].end);
2409+
2410+
auto tool_calls_data = builder.consume_json();
2411+
if (tool_calls_data.json.is_array()) {
2412+
builder.consume_spaces();
2413+
if (!builder.try_consume_literal("<|tools_suffix|>")) {
2414+
throw common_chat_msg_partial_exception("Incomplete tool call");
2415+
}
2416+
for (const auto & value : tool_calls_data.json) {
2417+
if (value.is_object()) {
2418+
builder.add_tool_call_short_form(value);
2419+
}
2420+
}
2421+
} else {
2422+
throw common_chat_msg_partial_exception("Incomplete tool call");
2423+
}
2424+
}
2425+
builder.add_content(builder.consume_rest());
2426+
}
2427+
23262428
static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
23272429
// Parse thinking tags first - this handles the main reasoning content
23282430
builder.try_parse_reasoning("<seed:think>", "</seed:think>");
@@ -2567,6 +2669,11 @@ static common_chat_params common_chat_templates_apply_jinja(
25672669
return common_chat_params_init_nemotron_v2(tmpl, params);
25682670
}
25692671

2672+
// Apertus format detection
2673+
if (src.find("<|system_start|>") != std::string::npos && src.find("<|tools_prefix|>") != std::string::npos) {
2674+
return common_chat_params_init_apertus(tmpl, params);
2675+
}
2676+
25702677
// Use generic handler when mixing tools + JSON schema.
25712678
// TODO: support that mix in handlers below.
25722679
if ((params.tools.is_array() && params.json_schema.is_object())) {
@@ -2734,6 +2841,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
27342841
case COMMON_CHAT_FORMAT_NEMOTRON_V2:
27352842
common_chat_parse_nemotron_v2(builder);
27362843
break;
2844+
case COMMON_CHAT_FORMAT_APERTUS:
2845+
common_chat_parse_apertus(builder);
2846+
break;
27372847
default:
27382848
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
27392849
}

common/chat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ enum common_chat_format {
114114
COMMON_CHAT_FORMAT_GPT_OSS,
115115
COMMON_CHAT_FORMAT_SEED_OSS,
116116
COMMON_CHAT_FORMAT_NEMOTRON_V2,
117+
COMMON_CHAT_FORMAT_APERTUS,
117118

118119
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
119120
};

0 commit comments

Comments
 (0)