Skip to content

Commit 586cacd

Browse files
authored
[libclc] Optimize generic CLC fmin/fmax (#128506)
With this commit, the CLC fmin/fmax builtins use clang's __builtin_elementwise_(min|max)imumnum which helps us generate LLVM minimumnum/maximumnum intrinsics directly. These intrinsics uniformly select the non-NaN input over the (quiet or signalling) NaN input, which corresponds to what the OpenCL CTS tests. These intrinsics maintain the vector types, as opposed to scalarizing, which was previously happening. This commit therefore helps to optimize codegen for those targets. Note that there is ongoing discussion regarding how these builtins should handle signalling NaNs in the OpenCL specification and whether they should be able to return a quiet NaN as per the IEEE behaviour. If the specification and/or CTS is ever updated to allow or mandate returning a qNAN, these builtins could/should be updated to use __builtin_elementwise_(min|max)num instead which would lower to LLVM minnum/maxnum intrinsics. The SPIR-V targets maintain the old implementations, as the LLVM -> SPIR-V translator can't currently handle the LLVM intrinsics. The implementation has been simplifies to consistently use clang builtins, as opposed to before where the half version was explicitly defined. [1] KhronosGroup/OpenCL-CTS#2285
1 parent 315e2e2 commit 586cacd

File tree

9 files changed

+9
-214
lines changed

9 files changed

+9
-214
lines changed

libclc/clc/lib/amdgcn/SOURCES

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
math/clc_fmax.cl
2-
math/clc_fmin.cl
31
math/clc_ldexp_override.cl
42
workitem/clc_get_global_offset.cl
53
workitem/clc_get_global_size.cl

libclc/clc/lib/generic/math/clc_fmax.cl

Lines changed: 2 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -6,53 +6,10 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#include <clc/clcmacro.h>
109
#include <clc/internal/clc.h>
11-
#include <clc/relational/clc_isnan.h>
1210

13-
#define __FLOAT_ONLY
14-
#define __CLC_MIN_VECSIZE 1
1511
#define FUNCTION __clc_fmax
16-
#define __IMPL_FUNCTION __builtin_fmaxf
17-
#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
18-
#include <clc/math/gentype.inc>
19-
#undef __CLC_MIN_VECSIZE
20-
#undef FUNCTION
21-
#undef __IMPL_FUNCTION
22-
23-
#ifdef cl_khr_fp64
24-
25-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
26-
27-
#define __DOUBLE_ONLY
28-
#define __CLC_MIN_VECSIZE 1
29-
#define FUNCTION __clc_fmax
30-
#define __IMPL_FUNCTION __builtin_fmax
31-
#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
32-
#include <clc/math/gentype.inc>
33-
#undef __CLC_MIN_VECSIZE
34-
#undef FUNCTION
35-
#undef __IMPL_FUNCTION
12+
#define __IMPL_FUNCTION(x) __builtin_elementwise_maximumnum
13+
#define __CLC_BODY <clc/shared/binary_def.inc>
3614

37-
#endif
38-
39-
#ifdef cl_khr_fp16
40-
41-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
42-
43-
_CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) {
44-
if (__clc_isnan(x))
45-
return y;
46-
if (__clc_isnan(y))
47-
return x;
48-
return (x < y) ? y : x;
49-
}
50-
51-
#define __HALF_ONLY
52-
#define __CLC_SUPPORTED_VECSIZE_OR_1 2
53-
#define FUNCTION __clc_fmax
54-
#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
5515
#include <clc/math/gentype.inc>
56-
#undef FUNCTION
57-
58-
#endif

libclc/clc/lib/generic/math/clc_fmin.cl

Lines changed: 2 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,52 +6,10 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#include <clc/clcmacro.h>
109
#include <clc/internal/clc.h>
11-
#include <clc/relational/clc_isnan.h>
1210

13-
#define __FLOAT_ONLY
14-
#define __CLC_MIN_VECSIZE 1
1511
#define FUNCTION __clc_fmin
16-
#define __IMPL_FUNCTION __builtin_fminf
17-
#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
18-
#include <clc/math/gentype.inc>
19-
#undef __CLC_MIN_VECSIZE
20-
#undef FUNCTION
21-
#undef __IMPL_FUNCTION
22-
23-
#ifdef cl_khr_fp64
24-
25-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
26-
27-
#define __DOUBLE_ONLY
28-
#define __CLC_MIN_VECSIZE 1
29-
#define FUNCTION __clc_fmin
30-
#define __IMPL_FUNCTION __builtin_fmin
31-
#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
32-
#include <clc/math/gentype.inc>
33-
#undef __CLC_MIN_VECSIZE
34-
#undef FUNCTION
35-
#undef __IMPL_FUNCTION
12+
#define __IMPL_FUNCTION(x) __builtin_elementwise_minimumnum
13+
#define __CLC_BODY <clc/shared/binary_def.inc>
3614

37-
#endif
38-
39-
#ifdef cl_khr_fp16
40-
41-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
42-
43-
_CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) {
44-
if (__clc_isnan(x))
45-
return y;
46-
if (__clc_isnan(y))
47-
return x;
48-
return (y < x) ? y : x;
49-
}
50-
51-
#define __HALF_ONLY
52-
#define __CLC_SUPPORTED_VECSIZE_OR_1 2
53-
#define FUNCTION __clc_fmin
54-
#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
5515
#include <clc/math/gentype.inc>
56-
57-
#endif

libclc/clc/lib/r600/SOURCES

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
1-
math/clc_fmax.cl
2-
math/clc_fmin.cl
31
math/clc_native_rsqrt.cl
42
math/clc_rsqrt_override.cl

libclc/clc/lib/r600/math/clc_fmax.cl

Lines changed: 0 additions & 41 deletions
This file was deleted.

libclc/clc/lib/r600/math/clc_fmin.cl

Lines changed: 0 additions & 42 deletions
This file was deleted.

libclc/clc/lib/spirv/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1+
math/clc_fmax.cl
2+
math/clc_fmin.cl
13
math/clc_runtime_has_hw_fma32.cl

libclc/clc/lib/amdgcn/math/clc_fmax.cl renamed to libclc/clc/lib/spirv/math/clc_fmax.cl

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,40 +8,23 @@
88

99
#include <clc/clcmacro.h>
1010
#include <clc/internal/clc.h>
11-
#include <clc/relational/clc_isnan.h>
1211

1312
_CLC_DEF _CLC_OVERLOAD float __clc_fmax(float x, float y) {
14-
// fcanonicalize removes sNaNs and flushes denormals if not enabled. Otherwise
15-
// fmax instruction flushes the values for comparison, but outputs original
16-
// denormal
17-
x = __builtin_canonicalizef(x);
18-
y = __builtin_canonicalizef(y);
1913
return __builtin_fmaxf(x, y);
2014
}
2115

2216
#ifdef cl_khr_fp64
23-
2417
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
25-
2618
_CLC_DEF _CLC_OVERLOAD double __clc_fmax(double x, double y) {
27-
x = __builtin_canonicalize(x);
28-
y = __builtin_canonicalize(y);
2919
return __builtin_fmax(x, y);
3020
}
31-
3221
#endif
33-
#ifdef cl_khr_fp16
3422

23+
#ifdef cl_khr_fp16
3524
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
36-
3725
_CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) {
38-
if (__clc_isnan(x))
39-
return y;
40-
if (__clc_isnan(y))
41-
return x;
42-
return (y < x) ? x : y;
26+
return __builtin_fmaxf16(x, y);
4327
}
44-
4528
#endif
4629

4730
#define FUNCTION __clc_fmax

libclc/clc/lib/amdgcn/math/clc_fmin.cl renamed to libclc/clc/lib/spirv/math/clc_fmin.cl

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,41 +8,23 @@
88

99
#include <clc/clcmacro.h>
1010
#include <clc/internal/clc.h>
11-
#include <clc/relational/clc_isnan.h>
1211

1312
_CLC_DEF _CLC_OVERLOAD float __clc_fmin(float x, float y) {
14-
// fcanonicalize removes sNaNs and flushes denormals if not enabled. Otherwise
15-
// fmin instruction flushes the values for comparison, but outputs original
16-
// denormal
17-
x = __builtin_canonicalizef(x);
18-
y = __builtin_canonicalizef(y);
1913
return __builtin_fminf(x, y);
2014
}
2115

2216
#ifdef cl_khr_fp64
23-
2417
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
25-
2618
_CLC_DEF _CLC_OVERLOAD double __clc_fmin(double x, double y) {
27-
x = __builtin_canonicalize(x);
28-
y = __builtin_canonicalize(y);
2919
return __builtin_fmin(x, y);
3020
}
31-
3221
#endif
3322

3423
#ifdef cl_khr_fp16
35-
3624
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
37-
3825
_CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) {
39-
if (__clc_isnan(x))
40-
return y;
41-
if (__clc_isnan(y))
42-
return x;
43-
return (y < x) ? y : x;
26+
return __builtin_fminf16(x, y);
4427
}
45-
4628
#endif
4729

4830
#define FUNCTION __clc_fmin

0 commit comments

Comments
 (0)