From 75faa5ad211683c3567b38e744f91cc8f4203167 Mon Sep 17 00:00:00 2001 From: hipudding Date: Tue, 2 Sep 2025 17:12:37 +0800 Subject: [PATCH 1/4] Add TRUNC unary op with SYCL support --- Testing/Temporary/CTestCostData.txt | 1 + docs/ops.md | 1 + docs/ops/CPU.csv | 4 ++ docs/ops/SYCL.csv | 4 ++ ggml/include/ggml.h | 9 ++++ ggml/src/ggml-cann/aclnn_ops.cpp | 25 ++++++----- ggml/src/ggml-cpu/ggml-cpu.c | 1 + ggml/src/ggml-cpu/ops.cpp | 4 ++ ggml/src/ggml-cpu/unary-ops.cpp | 8 ++++ ggml/src/ggml-cpu/unary-ops.h | 1 + ggml/src/ggml-sycl/element_wise.cpp | 36 +++++++++++++-- ggml/src/ggml-sycl/element_wise.hpp | 2 + ggml/src/ggml-sycl/ggml-sycl.cpp | 4 ++ .../vulkan-shaders/vulkan-shaders-gen.cpp | 8 +++- ggml/src/ggml.c | 17 ++++++- tests/test-backend-ops.cpp | 44 +++++++++++++++++++ trunc_support.csv | 1 + 17 files changed, 155 insertions(+), 15 deletions(-) create mode 100644 Testing/Temporary/CTestCostData.txt create mode 100644 trunc_support.csv diff --git a/Testing/Temporary/CTestCostData.txt b/Testing/Temporary/CTestCostData.txt new file mode 100644 index 0000000000000..ed97d539c095c --- /dev/null +++ b/Testing/Temporary/CTestCostData.txt @@ -0,0 +1 @@ +--- diff --git a/docs/ops.md b/docs/ops.md index 9a81ca0a9770c..4a4bbd7f9d617 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -100,4 +100,5 @@ Legend: | SWIGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | | TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | ❌ | | TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | +| TRUNC | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | | UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ | diff --git a/docs/ops/CPU.csv b/docs/ops/CPU.csv index 21e0d1b3c9117..01a6ae1095d0b 100644 --- a/docs/ops/CPU.csv +++ b/docs/ops/CPU.csv @@ -1,6 +1,8 @@ "backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name" "CPU","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" +"CPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" +"CPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" @@ -61,6 +63,8 @@ "CPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU" "CPU","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" +"CPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" +"CPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" diff --git a/docs/ops/SYCL.csv b/docs/ops/SYCL.csv index 5d022ee91aa7a..10f2061639f7c 100644 --- a/docs/ops/SYCL.csv +++ b/docs/ops/SYCL.csv @@ -1,6 +1,8 @@ "backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name" "SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" +"SYCL0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" @@ -61,6 +63,8 @@ "SYCL0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" "SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" +"SYCL0","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 7e9c3c8c7a096..9cae41274299c 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -559,6 +559,7 @@ extern "C" { enum ggml_unary_op { GGML_UNARY_OP_ABS, + GGML_UNARY_OP_TRUNC, GGML_UNARY_OP_SGN, GGML_UNARY_OP_NEG, GGML_UNARY_OP_STEP, @@ -1027,6 +1028,14 @@ extern "C" { GGML_API struct ggml_tensor * ggml_abs_inplace( struct ggml_context * ctx, struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_trunc( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_trunc_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_sgn( struct ggml_context * ctx, diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp index 11fbd1bc6769f..9c312faab7a13 100755 --- a/ggml/src/ggml-cann/aclnn_ops.cpp +++ b/ggml/src/ggml-cann/aclnn_ops.cpp @@ -1425,21 +1425,25 @@ static void aclnn_pow_tensor_tensor(ggml_backend_cann_context& ctx, * @param start Starting exponent offset. * @param stop Stopping exponent offset (exclusive). * @param step Step size for the exponent increment. + * @param dtype Data type for slope tensor. */ static void aclnn_get_slope_inner(ggml_backend_cann_context& ctx, void* slope_buffer, - float m, int64_t size, float start, float stop, float step){ + float m, int64_t size, float start, float stop, float step, ggml_type dtype){ + aclDataType acl_type = ggml_cann_type_mapping(dtype); + size_t type_size = ggml_type_size(dtype); + int64_t ne[] = {size}; - size_t nb[] = {sizeof(uint16_t)}; + size_t nb[] = {type_size}; - ggml_cann_pool_alloc arange_allocator(ctx.pool(), size * sizeof(uint16_t)); + ggml_cann_pool_alloc arange_allocator(ctx.pool(), size * type_size); void* arange_buffer = arange_allocator.get(); aclTensor* arange_tensor = ggml_cann_create_tensor( - arange_buffer, ACL_FLOAT16, sizeof(uint16_t), ne, nb, 1); + arange_buffer, acl_type, type_size, ne, nb, 1); aclnn_arange(ctx, arange_tensor, start, stop, step, size); aclTensor* slope_tensor = ggml_cann_create_tensor( - slope_buffer, ACL_FLOAT16, sizeof(uint16_t), ne, nb, 1); + slope_buffer, acl_type, type_size, ne, nb, 1); aclScalar* sc = aclCreateScalar(&m, aclDataType::ACL_FLOAT); @@ -1470,10 +1474,11 @@ static void aclnn_get_slope_inner(ggml_backend_cann_context& ctx, void* slope_bu * @param n_head Total number of attention heads. * @param slope_buffer Pointer to the output buffer (float array) for storing slopes. * @param max_bias Maximum bias value for slope computation. + * @param dtype Data type for slope tensor. * */ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head, - void* slope_buffer, float max_bias) { + void* slope_buffer, float max_bias, ggml_type dtype) { const int n_head_log2 = 1u << (uint32_t) floor(log2(n_head)); float m0 = powf(2.0f, -(max_bias) / n_head_log2); @@ -1490,7 +1495,7 @@ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head, float step = 1; float count = n_head_log2; // end needs to be +1 because aclnn uses a left-closed, right-open interval. - aclnn_get_slope_inner(ctx, slope_buffer, m0, count, start, end + 1, step); + aclnn_get_slope_inner(ctx, slope_buffer, m0, count, start, end + 1, step, dtype); if (n_head_log2 < n_head) { // arange2 start = 2 * (n_head_log2 - n_head_log2) + 1; @@ -1499,7 +1504,7 @@ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head, count = n_head - n_head_log2; aclnn_get_slope_inner( ctx, (char *) slope_buffer + n_head_log2 * sizeof(float), - m1, count, start, end + 1, step); + m1, count, start, end + 1, step, dtype); } } @@ -1536,7 +1541,7 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask, ggml_cann_pool_alloc bias_allocator( ctx.pool(), ggml_nelements(dst) * ggml_element_size(dst)); bias_buffer = bias_allocator.get(); - aclnn_get_slope(ctx, n_heads, slope_buffer, max_bias); + aclnn_get_slope(ctx, n_heads, slope_buffer, max_bias, GGML_TYPE_F32); } // broadcast for mask, slop and dst; @@ -3269,7 +3274,7 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){ const int64_t n_heads = src0->ne[2]; ggml_cann_pool_alloc slope_allocator(ctx.pool(), n_heads * sizeof(uint16_t)); void* slope_buffer = slope_allocator.get(); - aclnn_get_slope(ctx, n_heads, slope_buffer, maxBias); + aclnn_get_slope(ctx, n_heads, slope_buffer, maxBias, GGML_TYPE_F16); int64_t slope_ne[] = {1, 1, n_heads, 1}; size_t slope_nb[GGML_MAX_DIMS]; diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index 0d5d3a3440aaf..6f643f7225901 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -2162,6 +2162,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { case GGML_OP_UNARY: switch (ggml_get_unary_op(node)) { case GGML_UNARY_OP_ABS: + case GGML_UNARY_OP_TRUNC: case GGML_UNARY_OP_SGN: case GGML_UNARY_OP_NEG: case GGML_UNARY_OP_STEP: diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index 8c1f7948855ac..b00e40f7c9147 100644 --- a/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp @@ -9336,6 +9336,10 @@ void ggml_compute_forward_unary( { ggml_compute_forward_abs(params, dst); } break; + case GGML_UNARY_OP_TRUNC: + { + ggml_compute_forward_trunc(params, dst); + } break; case GGML_UNARY_OP_SGN: { ggml_compute_forward_sgn(params, dst); diff --git a/ggml/src/ggml-cpu/unary-ops.cpp b/ggml/src/ggml-cpu/unary-ops.cpp index 4fce569b3bfc8..fe7deedd1a783 100644 --- a/ggml/src/ggml-cpu/unary-ops.cpp +++ b/ggml/src/ggml-cpu/unary-ops.cpp @@ -4,6 +4,10 @@ static inline float op_abs(float x) { return fabsf(x); } +static inline float op_trunc(float x) { + return truncf(x); +} + static inline float op_sgn(float x) { return (x > 0.f) ? 1.f : ((x < 0.f) ? -1.f : 0.f); } @@ -125,6 +129,10 @@ void ggml_compute_forward_abs(const ggml_compute_params * params, ggml_tensor * unary_op(params, dst); } +void ggml_compute_forward_trunc(const ggml_compute_params * params, ggml_tensor * dst) { + unary_op(params, dst); +} + void ggml_compute_forward_sgn(const ggml_compute_params * params, ggml_tensor * dst) { unary_op(params, dst); } diff --git a/ggml/src/ggml-cpu/unary-ops.h b/ggml/src/ggml-cpu/unary-ops.h index b1ade2c8e341f..5241f43e4458b 100644 --- a/ggml/src/ggml-cpu/unary-ops.h +++ b/ggml/src/ggml-cpu/unary-ops.h @@ -7,6 +7,7 @@ extern "C" { #endif void ggml_compute_forward_abs(const struct ggml_compute_params * params, struct ggml_tensor * dst); +void ggml_compute_forward_trunc(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_sgn(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_neg(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_step(const struct ggml_compute_params * params, struct ggml_tensor * dst); diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index 0363b06a3ec9b..b509fcbefad04 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -39,6 +39,11 @@ static __dpct_inline__ T op_abs(T x) { return sycl::fabs(x); } +template +static __dpct_inline__ T op_trunc(T x) { + return sycl::trunc(x); +} + template static __dpct_inline__ T op_elu(T x) { return (x > static_cast(0.f)) ? x : sycl::expm1(x); @@ -164,6 +169,13 @@ static void unary_op_abs_kernel(const T * x, T * dst, const int k, const sycl::n } } +template +static void unary_op_trunc_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_trunc(x[i]); + } +} + template static void unary_op_elu_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { SYCL_GLOBAL_ID_LOOP(k, item_ct1) { @@ -425,8 +437,8 @@ static void upscale_sycl(const T *x, T *dst, const int nb00, const int nb01, int dst_size = ne10 * ne11 * ne12 * ne13; int num_blocks = ceil_div(dst_size, SYCL_UPSCALE_BLOCK_SIZE); sycl::range<1> gridDim(num_blocks * SYCL_UPSCALE_BLOCK_SIZE); - sycl_parallel_for<1>( - stream, sycl::nd_range<1>(gridDim, sycl::range<1>(SYCL_UPSCALE_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) { + sycl_parallel_for(stream, + sycl::nd_range<1>(gridDim, sycl::range<1>(SYCL_UPSCALE_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) { upscale(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3, item_ct1); }); } @@ -661,6 +673,19 @@ static inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor }); } +static inline void ggml_sycl_op_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, 256); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256), + sycl::range<1>(256)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_trunc_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); +} + static inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { @@ -935,7 +960,7 @@ static inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tens ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream, float min_arg, float max_arg) { const int num_blocks = ceil_div(k_elements, SYCL_CLAMP_BLOCK_SIZE); - sycl_parallel_for(stream, + sycl_parallel_for(main_stream, sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_CLAMP_BLOCK_SIZE), sycl::range<1>(SYCL_CLAMP_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) { @@ -1139,6 +1164,11 @@ void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { ggml_sycl_op_abs(ctx, dst); } +void ggml_sycl_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); + ggml_sycl_op_trunc(ctx, dst); +} + void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); ggml_sycl_op_elu(ctx, dst); diff --git a/ggml/src/ggml-sycl/element_wise.hpp b/ggml/src/ggml-sycl/element_wise.hpp index 50749e87d783e..eb517e020706a 100644 --- a/ggml/src/ggml-sycl/element_wise.hpp +++ b/ggml/src/ggml-sycl/element_wise.hpp @@ -75,6 +75,8 @@ void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst); +void ggml_sycl_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst); + void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_geglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 18ff4e0b0c7cf..7213c15e9e162 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -3626,6 +3626,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg case GGML_UNARY_OP_ABS: ggml_sycl_abs(ctx, dst); break; + case GGML_UNARY_OP_TRUNC: + ggml_sycl_trunc(ctx, dst); + break; case GGML_UNARY_OP_ELU: ggml_sycl_elu(ctx, dst); break; @@ -4181,6 +4184,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g case GGML_UNARY_OP_EXP: case GGML_UNARY_OP_SGN: case GGML_UNARY_OP_ABS: + case GGML_UNARY_OP_TRUNC: case GGML_UNARY_OP_ELU: #if defined (GGML_SYCL_F16) return ggml_is_contiguous(op->src[0]) && (op->type == op->src[0]->type); diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp index 6c64e1b513bea..1263a70e4f757 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp @@ -854,7 +854,13 @@ void write_output_files() { fputs(len.c_str(), src); } - for (const std::string& btype : {"f16", "f32", "q8_1"}) { + std::vector btypes = {"f16", "f32"}; + +#if defined(GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT) + btypes.push_back("q8_1"); +#endif + + for (const std::string& btype : btypes) { for (const auto& tname : type_names) { if (btype == "q8_1" && !is_legacy_quant(tname)) { continue; diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index d76ea58f789e2..6ce25c699df67 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -1127,6 +1127,7 @@ static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = { "ABS", + "ROUND", "SGN", "NEG", "STEP", @@ -1143,7 +1144,7 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = { "GELU_ERF", }; -static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15"); +static_assert(GGML_UNARY_OP_COUNT == 16, "GGML_UNARY_OP_COUNT != 16"); static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = { @@ -2479,6 +2480,20 @@ struct ggml_tensor * ggml_abs_inplace( return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS); } +// ggml_trunc + +struct ggml_tensor * ggml_trunc( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC); +} + +struct ggml_tensor * ggml_trunc_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC); +} + // ggml_sgn struct ggml_tensor * ggml_sgn( diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 3a58621094d17..95be5ed3b643a 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -3555,6 +3555,49 @@ struct test_sin : public test_case { } }; +// GGML_OP_TRUNC +// Note: TRUNC is not differentiable, so gradient tests are skipped. +struct test_trunc : public test_case { + const ggml_type type; + const std::array ne; + + std::string vars() override { + return VARS_TO_STR2(type, ne); + } + + test_trunc(ggml_type type = GGML_TYPE_F32, + std::array ne = {10, 2, 2, 2}) + : type(type), ne(ne) {} + + ggml_tensor * build_graph(ggml_context * ctx) override { + ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data()); + ggml_set_param(a); + ggml_set_name(a, "a"); + + ggml_tensor * out = ggml_trunc(ctx, a); + ggml_set_name(out, "out"); + + return out; + } + + void initialize_tensors(ggml_context * ctx) override { + for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + init_tensor_uniform(t, -6.5f, 6.5f); // Covers interval [-2*pi, 2*pi]. + } + } + + double max_maa_err() override { + if (type == GGML_TYPE_F16 || type == GGML_TYPE_BF16) { + return 1e-3; + } + return 1e-6; + } + + bool grad_precise() override { + return false; + } +}; + // GGML_OP_COS struct test_cos : public test_case { const ggml_type type; @@ -6177,6 +6220,7 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_sqrt(type)); test_cases.emplace_back(new test_log(type)); test_cases.emplace_back(new test_sin(type)); + test_cases.emplace_back(new test_trunc(type)); test_cases.emplace_back(new test_cos(type)); test_cases.emplace_back(new test_clamp(type)); } diff --git a/trunc_support.csv b/trunc_support.csv new file mode 100644 index 0000000000000..ac94446e88df5 --- /dev/null +++ b/trunc_support.csv @@ -0,0 +1 @@ +"backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name" From 29597b7f4b61e8661f789afd574a8bb7d682836a Mon Sep 17 00:00:00 2001 From: safranowith Date: Wed, 17 Sep 2025 12:34:08 +0300 Subject: [PATCH 2/4] Moving the operator to the end of the list --- ggml/include/ggml.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 9cae41274299c..544312101ca30 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -559,7 +559,6 @@ extern "C" { enum ggml_unary_op { GGML_UNARY_OP_ABS, - GGML_UNARY_OP_TRUNC, GGML_UNARY_OP_SGN, GGML_UNARY_OP_NEG, GGML_UNARY_OP_STEP, @@ -574,6 +573,7 @@ extern "C" { GGML_UNARY_OP_HARDSIGMOID, GGML_UNARY_OP_EXP, GGML_UNARY_OP_GELU_ERF, + GGML_UNARY_OP_TRUNC, GGML_UNARY_OP_COUNT, }; From 08c0cec662da40fa253de7814a3a16e28412a317 Mon Sep 17 00:00:00 2001 From: safranowith Date: Wed, 17 Sep 2025 12:43:38 +0300 Subject: [PATCH 3/4] Error correction --- ggml/src/ggml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 6ce25c699df67..1dcf8f32f1632 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -1127,7 +1127,6 @@ static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = { "ABS", - "ROUND", "SGN", "NEG", "STEP", @@ -1142,6 +1141,7 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = { "HARDSIGMOID", "EXP", "GELU_ERF", + "TRUNC", }; static_assert(GGML_UNARY_OP_COUNT == 16, "GGML_UNARY_OP_COUNT != 16"); From 1f19008f0f2b67aa2b7ec0ac1595542458e59a69 Mon Sep 17 00:00:00 2001 From: safranowith Date: Wed, 17 Sep 2025 13:31:30 +0300 Subject: [PATCH 4/4] Fixing a change that is not related to my change --- ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp index 1263a70e4f757..82cb0a91bd1b8 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp @@ -853,7 +853,7 @@ void write_output_files() { fputs(data.c_str(), src); fputs(len.c_str(), src); } - + for (const std::string& btype : {"f16", "f32", "q8_1"}) { std::vector btypes = {"f16", "f32"}; #if defined(GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT)