Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Testing/Temporary/CTestCostData.txt
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Be careful not to add extra files that are not used in the project.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the review!
This has been fixed.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
---
1 change: 1 addition & 0 deletions docs/ops.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,5 @@ Legend:
| SWIGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
| TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | ❌ |
| TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
| TRUNC | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
| UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ |
4 changes: 4 additions & 0 deletions docs/ops/CPU.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name"
"CPU","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
"CPU","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
"CPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
"CPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
"CPU","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
"CPU","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
"CPU","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
Expand Down Expand Up @@ -61,6 +63,8 @@
"CPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
"CPU","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
"CPU","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
"CPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
"CPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
"CPU","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
"CPU","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
"CPU","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
Expand Down
4 changes: 4 additions & 0 deletions docs/ops/SYCL.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name"
"SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
"SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
"SYCL0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
"SYCL0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
"SYCL0","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
"SYCL0","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
"SYCL0","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
Expand Down Expand Up @@ -61,6 +63,8 @@
"SYCL0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL"
"SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
"SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
"SYCL0","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
"SYCL0","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
"SYCL0","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
"SYCL0","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
"SYCL0","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
Expand Down
9 changes: 9 additions & 0 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,7 @@ extern "C" {

enum ggml_unary_op {
GGML_UNARY_OP_ABS,
GGML_UNARY_OP_TRUNC,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add new unary ops at the end of the enum.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the review
This will be fixed soon

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has been fixed.

GGML_UNARY_OP_SGN,
GGML_UNARY_OP_NEG,
GGML_UNARY_OP_STEP,
Expand Down Expand Up @@ -1027,6 +1028,14 @@ extern "C" {
GGML_API struct ggml_tensor * ggml_abs_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_trunc(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_trunc_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_sgn(
struct ggml_context * ctx,
Expand Down
25 changes: 15 additions & 10 deletions ggml/src/ggml-cann/aclnn_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1425,21 +1425,25 @@ static void aclnn_pow_tensor_tensor(ggml_backend_cann_context& ctx,
* @param start Starting exponent offset.
* @param stop Stopping exponent offset (exclusive).
* @param step Step size for the exponent increment.
* @param dtype Data type for slope tensor.
*/
static void aclnn_get_slope_inner(ggml_backend_cann_context& ctx, void* slope_buffer,
float m, int64_t size, float start, float stop, float step){
float m, int64_t size, float start, float stop, float step, ggml_type dtype){
aclDataType acl_type = ggml_cann_type_mapping(dtype);
size_t type_size = ggml_type_size(dtype);

int64_t ne[] = {size};
size_t nb[] = {sizeof(uint16_t)};
size_t nb[] = {type_size};

ggml_cann_pool_alloc arange_allocator(ctx.pool(), size * sizeof(uint16_t));
ggml_cann_pool_alloc arange_allocator(ctx.pool(), size * type_size);
void* arange_buffer = arange_allocator.get();

aclTensor* arange_tensor = ggml_cann_create_tensor(
arange_buffer, ACL_FLOAT16, sizeof(uint16_t), ne, nb, 1);
arange_buffer, acl_type, type_size, ne, nb, 1);
aclnn_arange(ctx, arange_tensor, start, stop, step, size);

aclTensor* slope_tensor = ggml_cann_create_tensor(
slope_buffer, ACL_FLOAT16, sizeof(uint16_t), ne, nb, 1);
slope_buffer, acl_type, type_size, ne, nb, 1);

aclScalar* sc = aclCreateScalar(&m, aclDataType::ACL_FLOAT);

Expand Down Expand Up @@ -1470,10 +1474,11 @@ static void aclnn_get_slope_inner(ggml_backend_cann_context& ctx, void* slope_bu
* @param n_head Total number of attention heads.
* @param slope_buffer Pointer to the output buffer (float array) for storing slopes.
* @param max_bias Maximum bias value for slope computation.
* @param dtype Data type for slope tensor.
*
*/
static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
void* slope_buffer, float max_bias) {
void* slope_buffer, float max_bias, ggml_type dtype) {
const int n_head_log2 = 1u << (uint32_t) floor(log2(n_head));

float m0 = powf(2.0f, -(max_bias) / n_head_log2);
Expand All @@ -1490,7 +1495,7 @@ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
float step = 1;
float count = n_head_log2;
// end needs to be +1 because aclnn uses a left-closed, right-open interval.
aclnn_get_slope_inner(ctx, slope_buffer, m0, count, start, end + 1, step);
aclnn_get_slope_inner(ctx, slope_buffer, m0, count, start, end + 1, step, dtype);
if (n_head_log2 < n_head) {
// arange2
start = 2 * (n_head_log2 - n_head_log2) + 1;
Expand All @@ -1499,7 +1504,7 @@ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
count = n_head - n_head_log2;
aclnn_get_slope_inner(
ctx, (char *) slope_buffer + n_head_log2 * sizeof(float),
m1, count, start, end + 1, step);
m1, count, start, end + 1, step, dtype);
}
}

Expand Down Expand Up @@ -1536,7 +1541,7 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask,
ggml_cann_pool_alloc bias_allocator(
ctx.pool(), ggml_nelements(dst) * ggml_element_size(dst));
bias_buffer = bias_allocator.get();
aclnn_get_slope(ctx, n_heads, slope_buffer, max_bias);
aclnn_get_slope(ctx, n_heads, slope_buffer, max_bias, GGML_TYPE_F32);
}

// broadcast for mask, slop and dst;
Expand Down Expand Up @@ -3269,7 +3274,7 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
const int64_t n_heads = src0->ne[2];
ggml_cann_pool_alloc slope_allocator(ctx.pool(), n_heads * sizeof(uint16_t));
void* slope_buffer = slope_allocator.get();
aclnn_get_slope(ctx, n_heads, slope_buffer, maxBias);
aclnn_get_slope(ctx, n_heads, slope_buffer, maxBias, GGML_TYPE_F16);

int64_t slope_ne[] = {1, 1, n_heads, 1};
size_t slope_nb[GGML_MAX_DIMS];
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-cpu/ggml-cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2162,6 +2162,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
case GGML_OP_UNARY:
switch (ggml_get_unary_op(node)) {
case GGML_UNARY_OP_ABS:
case GGML_UNARY_OP_TRUNC:
case GGML_UNARY_OP_SGN:
case GGML_UNARY_OP_NEG:
case GGML_UNARY_OP_STEP:
Expand Down
4 changes: 4 additions & 0 deletions ggml/src/ggml-cpu/ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9336,6 +9336,10 @@ void ggml_compute_forward_unary(
{
ggml_compute_forward_abs(params, dst);
} break;
case GGML_UNARY_OP_TRUNC:
{
ggml_compute_forward_trunc(params, dst);
} break;
case GGML_UNARY_OP_SGN:
{
ggml_compute_forward_sgn(params, dst);
Expand Down
8 changes: 8 additions & 0 deletions ggml/src/ggml-cpu/unary-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ static inline float op_abs(float x) {
return fabsf(x);
}

static inline float op_trunc(float x) {
return truncf(x);
}

static inline float op_sgn(float x) {
return (x > 0.f) ? 1.f : ((x < 0.f) ? -1.f : 0.f);
}
Expand Down Expand Up @@ -125,6 +129,10 @@ void ggml_compute_forward_abs(const ggml_compute_params * params, ggml_tensor *
unary_op<op_abs>(params, dst);
}

void ggml_compute_forward_trunc(const ggml_compute_params * params, ggml_tensor * dst) {
unary_op<op_trunc>(params, dst);
}

void ggml_compute_forward_sgn(const ggml_compute_params * params, ggml_tensor * dst) {
unary_op<op_sgn>(params, dst);
}
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-cpu/unary-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ extern "C" {
#endif

void ggml_compute_forward_abs(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_trunc(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_sgn(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_neg(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_step(const struct ggml_compute_params * params, struct ggml_tensor * dst);
Expand Down
36 changes: 33 additions & 3 deletions ggml/src/ggml-sycl/element_wise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ static __dpct_inline__ T op_abs(T x) {
return sycl::fabs(x);
}

template<typename T>
static __dpct_inline__ T op_trunc(T x) {
return sycl::trunc(x);
}

template<typename T>
static __dpct_inline__ T op_elu(T x) {
return (x > static_cast<T>(0.f)) ? x : sycl::expm1(x);
Expand Down Expand Up @@ -164,6 +169,13 @@ static void unary_op_abs_kernel(const T * x, T * dst, const int k, const sycl::n
}
}

template<typename T>
static void unary_op_trunc_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
dst[i] = op_trunc(x[i]);
}
}

template<typename T>
static void unary_op_elu_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
Expand Down Expand Up @@ -425,8 +437,8 @@ static void upscale_sycl(const T *x, T *dst, const int nb00, const int nb01,
int dst_size = ne10 * ne11 * ne12 * ne13;
int num_blocks = ceil_div(dst_size, SYCL_UPSCALE_BLOCK_SIZE);
sycl::range<1> gridDim(num_blocks * SYCL_UPSCALE_BLOCK_SIZE);
sycl_parallel_for<1>(
stream, sycl::nd_range<1>(gridDim, sycl::range<1>(SYCL_UPSCALE_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) {
sycl_parallel_for(stream,
sycl::nd_range<1>(gridDim, sycl::range<1>(SYCL_UPSCALE_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) {
upscale(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3, item_ct1);
});
}
Expand Down Expand Up @@ -661,6 +673,19 @@ static inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor
});
}

static inline void ggml_sycl_op_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
const int num_blocks = ceil_div(k_elements, 256);
sycl_parallel_for(stream,
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
sycl::range<1>(256)),
[=](sycl::nd_item<1> item_ct1) {
unary_op_trunc_kernel(src, dst_ptr, k_elements, item_ct1);
});
});
}

static inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
Expand Down Expand Up @@ -935,7 +960,7 @@ static inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tens
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream, float min_arg, float max_arg) {
const int num_blocks = ceil_div(k_elements, SYCL_CLAMP_BLOCK_SIZE);
sycl_parallel_for(stream,
sycl_parallel_for(main_stream,
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_CLAMP_BLOCK_SIZE),
sycl::range<1>(SYCL_CLAMP_BLOCK_SIZE)),
[=](sycl::nd_item<1> item_ct1) {
Expand Down Expand Up @@ -1139,6 +1164,11 @@ void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_op_abs(ctx, dst);
}

void ggml_sycl_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_trunc(ctx, dst);
}

void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_elu(ctx, dst);
Expand Down
2 changes: 2 additions & 0 deletions ggml/src/ggml-sycl/element_wise.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst);

void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst);

void ggml_sycl_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst);

void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);

void ggml_sycl_geglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
Expand Down
4 changes: 4 additions & 0 deletions ggml/src/ggml-sycl/ggml-sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3626,6 +3626,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
case GGML_UNARY_OP_ABS:
ggml_sycl_abs(ctx, dst);
break;
case GGML_UNARY_OP_TRUNC:
ggml_sycl_trunc(ctx, dst);
break;
case GGML_UNARY_OP_ELU:
ggml_sycl_elu(ctx, dst);
break;
Expand Down Expand Up @@ -4181,6 +4184,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_UNARY_OP_EXP:
case GGML_UNARY_OP_SGN:
case GGML_UNARY_OP_ABS:
case GGML_UNARY_OP_TRUNC:
case GGML_UNARY_OP_ELU:
#if defined (GGML_SYCL_F16)
return ggml_is_contiguous(op->src[0]) && (op->type == op->src[0]->type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -854,7 +854,13 @@ void write_output_files() {
fputs(len.c_str(), src);
}

for (const std::string& btype : {"f16", "f32", "q8_1"}) {
std::vector<std::string> btypes = {"f16", "f32"};

#if defined(GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT)
btypes.push_back("q8_1");
#endif

for (const std::string& btype : btypes) {
for (const auto& tname : type_names) {
Comment on lines 927 to 935
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change should be in a separate PR

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is my code from #15740, no idea what it is doing here.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand what needs fixing
This is not a change I made
I would appreciate a clearer explanation
thanks

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change is in 75faa5a for whatever reason, it should not be a part of that commit.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I fixed it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bildschirmfoto 2025-09-18 um 15 42 25

Not fully, no. You can look this up yourself.

if (btype == "q8_1" && !is_legacy_quant(tname)) {
continue;
Expand Down
17 changes: 16 additions & 1 deletion ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -1127,6 +1127,7 @@ static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");

static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
"ABS",
"ROUND",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"ROUND",
"TRUNC",

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error has been corrected.

"SGN",
"NEG",
"STEP",
Expand All @@ -1143,7 +1144,7 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
"GELU_ERF",
};

static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15");
static_assert(GGML_UNARY_OP_COUNT == 16, "GGML_UNARY_OP_COUNT != 16");


static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
Expand Down Expand Up @@ -2479,6 +2480,20 @@ struct ggml_tensor * ggml_abs_inplace(
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
}

// ggml_trunc

struct ggml_tensor * ggml_trunc(
struct ggml_context * ctx,
struct ggml_tensor * a) {
return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
}

struct ggml_tensor * ggml_trunc_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a) {
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
}

// ggml_sgn

struct ggml_tensor * ggml_sgn(
Expand Down
Loading