Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ggml/src/ggml-cpu/ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7519,8 +7519,8 @@ static void ggml_compute_forward_upscale_f32(
float pixel_offset = 0.5f;
if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
pixel_offset = 0.0f;
sf0 = (float)(ne0 - 1) / (src0->ne[0] - 1);
sf1 = (float)(ne1 - 1) / (src0->ne[1] - 1);
sf0 = ne0 > 1 && ne00 > 1 ? (float)(ne0 - 1) / (ne00 - 1) : sf0;
sf1 = ne1 > 1 && ne01 > 1 ? (float)(ne1 - 1) / (ne01 - 1) : sf1;
}

for (int64_t i3 = 0; i3 < ne3; i3++) {
Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-cuda/upscale.cu
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
float pixel_offset = 0.5f;
if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
sf0 = (float)(dst->ne[0] - 1) / (src0->ne[0] - 1);
sf1 = (float)(dst->ne[1] - 1) / (src0->ne[1] - 1);
sf0 = dst->ne[0] > 1 && src0->ne[0] > 1 ? (float)(dst->ne[0] - 1) / (src0->ne[0] - 1) : sf0;
sf1 = dst->ne[1] > 1 && src0->ne[1] > 1 ? (float)(dst->ne[1] - 1) / (src0->ne[1] - 1) : sf1;
pixel_offset = 0.0f;
}
upscale_f32_bilinear_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],
Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-opencl/ggml-opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6156,8 +6156,8 @@ static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, gg
CL_CHECK(clSetKernelArg(kernel, 15, sizeof(float), &sf3));
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
sf0 = (float)(ne0 - 1) / (ne00 - 1);
sf1 = (float)(ne1 - 1) / (ne01 - 1);
sf0 = ne0 > 1 && ne00 > 1 ? (float)(ne0 - 1) / (ne00 - 1) : sf0;
sf1 = ne1 > 1 && ne01 > 1 ? (float)(ne1 - 1) / (ne01 - 1) : sf1;
pixel_offset = 0.0f;
}

Expand Down
34 changes: 19 additions & 15 deletions ggml/src/ggml-vulkan/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ struct vk_device_struct {
vk_pipeline pipeline_add_id_f32;

vk_pipeline pipeline_concat_f32, pipeline_concat_f16, pipeline_concat_i32;
vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32, pipeline_upscale_bilinear_ac_f32;
vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32;
vk_pipeline pipeline_scale_f32;
vk_pipeline pipeline_sqr_f32;
vk_pipeline pipeline_sqrt_f32;
Expand Down Expand Up @@ -1240,6 +1240,7 @@ struct vk_op_upscale_push_constants {
uint32_t nb00; uint32_t nb01; uint32_t nb02; uint32_t nb03;
uint32_t ne10; uint32_t ne11; uint32_t ne12; uint32_t ne13;
float sf0; float sf1; float sf2; float sf3;
float pixel_offset;
};

struct vk_op_sum_rows_push_constants
Expand Down Expand Up @@ -3498,7 +3499,6 @@ static void ggml_vk_load_shaders(vk_device& device) {

ggml_vk_create_pipeline(device, device->pipeline_upscale_nearest_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_NEAREST}, 1);
ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR}, 1);
ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_ac_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS}, 1);

ggml_vk_create_pipeline(device, device->pipeline_scale_f32, "scale_f32", scale_f32_len, scale_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);

Expand Down Expand Up @@ -7855,14 +7855,14 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
return nullptr;
case GGML_OP_UPSCALE:
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
int mode = ggml_get_op_params_i32(dst, 0);
ggml_scale_mode mode = (ggml_scale_mode)(ggml_get_op_params_i32(dst, 0) & 0xFF);
switch (mode) {
case GGML_SCALE_MODE_NEAREST:
return ctx->device->pipeline_upscale_nearest_f32;
case GGML_SCALE_MODE_BILINEAR:
return ctx->device->pipeline_upscale_bilinear_f32;
case GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS:
return ctx->device->pipeline_upscale_bilinear_ac_f32;
default:
return nullptr;
}
}
return nullptr;
Expand Down Expand Up @@ -9351,22 +9351,26 @@ static void ggml_vk_upscale(ggml_backend_vk_context * ctx, vk_context& subctx, c
const uint32_t src0_type_size = ggml_type_size(src0->type);
const uint32_t mode = (uint32_t)ggml_get_op_params_i32(dst, 0);

float sf0 = (float)dst->ne[0] / src0->ne[0];
float sf1 = (float)dst->ne[1] / src0->ne[1];
float sf2 = (float)dst->ne[2] / src0->ne[2];
float sf3 = (float)dst->ne[3] / src0->ne[3];
GGML_TENSOR_UNARY_OP_LOCALS

float sf0 = (float)ne0 / ne00;
float sf1 = (float)ne1 / ne01;
float sf2 = (float)ne2 / ne02;
float sf3 = (float)ne3 / ne03;
float pixel_offset = 0.5f;

if (mode & GGML_SCALE_FLAG_ALIGN_CORNERS) {
sf0 = (float)(dst->ne[0] - 1) / (src0->ne[0] - 1);
sf1 = (float)(dst->ne[1] - 1) / (src0->ne[1] - 1);
sf0 = ne0 > 1 && ne00 > 1 ? (float)(ne0 - 1) / (ne00 - 1) : sf0;
sf1 = ne1 > 1 && ne01 > 1 ? (float)(ne1 - 1) / (ne01 - 1) : sf1;
pixel_offset = 0.0f;
}

ggml_vk_op_f32<vk_op_upscale_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_UPSCALE, {
(uint32_t)ggml_nelements(dst), 0, 0,
(uint32_t)src0->ne[0], (uint32_t)src0->ne[1],
(uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
(uint32_t)dst->ne[0], (uint32_t)dst->ne[1], (uint32_t)dst->ne[2],(uint32_t)dst->ne[3],
sf0, sf1, sf2, sf3,
(uint32_t)ne00, (uint32_t)ne01,
(uint32_t)nb00 / src0_type_size, (uint32_t)nb01 / src0_type_size, (uint32_t)nb02 / src0_type_size, (uint32_t)nb03 / src0_type_size,
(uint32_t)ne0, (uint32_t)ne1, (uint32_t)ne2, (uint32_t)ne3,
sf0, sf1, sf2, sf3, pixel_offset
}, dryrun);
}

Expand Down
17 changes: 2 additions & 15 deletions ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ layout (push_constant) uniform parameter
uint nb00; uint nb01; uint nb02; uint nb03;
uint ne10; uint ne11; uint ne12; uint ne13;
float sf0; float sf1; float sf2; float sf3;
float pixel_offset;
} p;

#include "types.glsl"
Expand All @@ -19,7 +20,6 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};
// from ggml.h: enum ggml_scale_mode, enum ggml_scale_flag
#define NEAREST 0
#define BILINEAR 1
#define ALIGN_CORNERS (1 << 8)

layout (constant_id = 0) const uint scale_mode = 0;

Expand Down Expand Up @@ -52,7 +52,7 @@ float fetch_bilinear(ivec2 c0, ivec2 c1, vec2 d, uint i12, uint i13) {
float interpolate_bilinear(uint i10, uint i11, uint i12, uint i13) {
const ivec2 ne0 = ivec2(p.ne00, p.ne01);

const vec2 c = (vec2(i10, i11) + 0.5) / vec2(p.sf0, p.sf1) - 0.5;
const vec2 c = (vec2(i10, i11) + p.pixel_offset) / vec2(p.sf0, p.sf1) - p.pixel_offset;
const vec2 c0f = floor(c);
const vec2 d = c - c0f;
const ivec2 c0 = max(ivec2(c0f), 0);
Expand All @@ -61,16 +61,6 @@ float interpolate_bilinear(uint i10, uint i11, uint i12, uint i13) {
return fetch_bilinear(c0, c1, d, i12, i13);
}

float interpolate_bilinear_align_corners(uint i10, uint i11, uint i12, uint i13) {
const vec2 c = vec2(i10, i11) / vec2(p.sf0, p.sf1);
const vec2 c0f = floor(c);
const vec2 d = c - c0f;
const ivec2 c0 = ivec2(c0f);
const ivec2 c1 = c0 + 1;

return fetch_bilinear(c0, c1, d, i12, i13);
}

void main() {
const uint idx = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;

Expand All @@ -91,9 +81,6 @@ void main() {
case BILINEAR:
result = interpolate_bilinear(i10, i11, i12, i13);
break;
case BILINEAR | ALIGN_CORNERS:
result = interpolate_bilinear_align_corners(i10, i11, i12, i13);
break;
}

data_d[p.d_offset + idx] = D_TYPE(result);
Expand Down
2 changes: 2 additions & 0 deletions tests/test-backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6877,6 +6877,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {5, 7, 11, 13}, {2, 5, 7, 11}, mode));
}
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS));
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS));
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS));

test_cases.emplace_back(new test_sum());
test_cases.emplace_back(new test_sum_rows());
Expand Down
Loading