Skip to content

Commit bb727b8

Browse files
committed
[libc][math] Refactor acosf16 implementation to header-only in src/__support/math folder.
1 parent ef4e4a0 commit bb727b8

File tree

7 files changed

+229
-155
lines changed

7 files changed

+229
-155
lines changed

libc/shared/math.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "math/acos.h"
1515
#include "math/acosf.h"
16+
#include "math/acosf16.h"
1617
#include "math/exp.h"
1718
#include "math/exp10.h"
1819
#include "math/exp10f.h"

libc/shared/math/acosf16.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//===-- Shared acosf16 function ---------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SHARED_MATH_ACOSF16_H
10+
#define LLVM_LIBC_SHARED_MATH_ACOSF16_H
11+
12+
#include "include/llvm-libc-macros/float16-macros.h"
13+
14+
#ifdef LIBC_TYPES_HAS_FLOAT16
15+
16+
#include "shared/libc_common.h"
17+
#include "src/__support/math/acosf16.h"
18+
19+
namespace LIBC_NAMESPACE_DECL {
20+
namespace shared {
21+
22+
using math::acosf16;
23+
24+
} // namespace shared
25+
} // namespace LIBC_NAMESPACE_DECL
26+
27+
#endif // LIBC_TYPES_HAS_FLOAT16
28+
29+
#endif // LLVM_LIBC_SHARED_MATH_ACOSF16_H

libc/src/__support/math/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,22 @@ add_header_library(
3131
libc.src.__support.macros.optimization
3232
)
3333

34+
add_header_library(
35+
acosf16
36+
HDRS
37+
acosf16.h
38+
DEPENDS
39+
libc.src.__support.FPUtil.cast
40+
libc.src.__support.FPUtil.except_value_utils
41+
libc.src.__support.FPUtil.fenv_impl
42+
libc.src.__support.FPUtil.fp_bits
43+
libc.src.__support.FPUtil.multiply_add
44+
libc.src.__support.FPUtil.polyeval
45+
libc.src.__support.FPUtil.sqrt
46+
libc.src.__support.macros.optimization
47+
libc.src.__support.macros.properties.types
48+
)
49+
3450
add_header_library(
3551
asin_utils
3652
HDRS

libc/src/__support/math/acosf16.h

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
//===-- Implementation header for acosf16 -----------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSF16_H
10+
#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSF16_H
11+
12+
#include "include/llvm-libc-macros/float16-macros.h"
13+
14+
#ifdef LIBC_TYPES_HAS_FLOAT16
15+
16+
#include "src/__support/FPUtil/FEnvImpl.h"
17+
#include "src/__support/FPUtil/FPBits.h"
18+
#include "src/__support/FPUtil/PolyEval.h"
19+
#include "src/__support/FPUtil/cast.h"
20+
#include "src/__support/FPUtil/except_value_utils.h"
21+
#include "src/__support/FPUtil/multiply_add.h"
22+
#include "src/__support/FPUtil/sqrt.h"
23+
#include "src/__support/macros/optimization.h"
24+
25+
namespace LIBC_NAMESPACE_DECL {
26+
27+
namespace math {
28+
29+
// Generated by Sollya using the following command:
30+
// > round(pi/2, SG, RN);
31+
// > round(pi, SG, RN);
32+
static constexpr float PI_OVER_2 = 0x1.921fb6p0f;
33+
static constexpr float PI = 0x1.921fb6p1f;
34+
35+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
36+
static constexpr size_t N_EXCEPTS = 2;
37+
38+
static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ACOSF16_EXCEPTS{{
39+
// (input, RZ output, RU offset, RD offset, RN offset)
40+
{0xacaf, 0x3e93, 1, 0, 0},
41+
{0xb874, 0x4052, 1, 0, 1},
42+
}};
43+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
44+
45+
static constexpr float16 acosf16(float16 x) {
46+
using FPBits = fputil::FPBits<float16>;
47+
FPBits xbits(x);
48+
49+
uint16_t x_u = xbits.uintval();
50+
uint16_t x_abs = x_u & 0x7fff;
51+
uint16_t x_sign = x_u >> 15;
52+
53+
// |x| > 0x1p0, |x| > 1, or x is NaN.
54+
if (LIBC_UNLIKELY(x_abs > 0x3c00)) {
55+
// acosf16(NaN) = NaN
56+
if (xbits.is_nan()) {
57+
if (xbits.is_signaling_nan()) {
58+
fputil::raise_except_if_required(FE_INVALID);
59+
return FPBits::quiet_nan().get_val();
60+
}
61+
62+
return x;
63+
}
64+
65+
// 1 < |x| <= +/-inf
66+
fputil::raise_except_if_required(FE_INVALID);
67+
fputil::set_errno_if_required(EDOM);
68+
69+
return FPBits::quiet_nan().get_val();
70+
}
71+
72+
float xf = x;
73+
74+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
75+
// Handle exceptional values
76+
if (auto r = ACOSF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
77+
return r.value();
78+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
79+
80+
// |x| == 0x1p0, x is 1 or -1
81+
// if x is (-)1, return pi, else
82+
// if x is (+)1, return 0
83+
if (LIBC_UNLIKELY(x_abs == 0x3c00))
84+
return fputil::cast<float16>(x_sign ? PI : 0.0f);
85+
86+
float xsq = xf * xf;
87+
88+
// |x| <= 0x1p-1, |x| <= 0.5
89+
if (x_abs <= 0x3800) {
90+
// if x is 0, return pi/2
91+
if (LIBC_UNLIKELY(x_abs == 0))
92+
return fputil::cast<float16>(PI_OVER_2);
93+
94+
// Note that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
95+
// Degree-6 minimax polynomial of asin(x) generated by Sollya with:
96+
// > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
97+
float interm =
98+
fputil::polyeval(xsq, 0x1.000002p0f, 0x1.554c2ap-3f, 0x1.3541ccp-4f,
99+
0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
100+
return fputil::cast<float16>(fputil::multiply_add(-xf, interm, PI_OVER_2));
101+
}
102+
103+
// When |x| > 0.5, assume that 0.5 < |x| <= 1
104+
//
105+
// Step-by-step range-reduction proof:
106+
// 1: Let y = asin(x), such that, x = sin(y)
107+
// 2: From complimentary angle identity:
108+
// x = sin(y) = cos(pi/2 - y)
109+
// 3: Let z = pi/2 - y, such that x = cos(z)
110+
// 4: From double angle formula; cos(2A) = 1 - 2 * sin^2(A):
111+
// z = 2A, z/2 = A
112+
// cos(z) = 1 - 2 * sin^2(z/2)
113+
// 5: Make sin(z/2) subject of the formula:
114+
// sin(z/2) = sqrt((1 - cos(z))/2)
115+
// 6: Recall [3]; x = cos(z). Therefore:
116+
// sin(z/2) = sqrt((1 - x)/2)
117+
// 7: Let u = (1 - x)/2
118+
// 8: Therefore:
119+
// asin(sqrt(u)) = z/2
120+
// 2 * asin(sqrt(u)) = z
121+
// 9: Recall [3]; z = pi/2 - y. Therefore:
122+
// y = pi/2 - z
123+
// y = pi/2 - 2 * asin(sqrt(u))
124+
// 10: Recall [1], y = asin(x). Therefore:
125+
// asin(x) = pi/2 - 2 * asin(sqrt(u))
126+
// 11: Recall that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
127+
// Therefore:
128+
// acos(x) = pi/2 - (pi/2 - 2 * asin(sqrt(u)))
129+
// acos(x) = 2 * asin(sqrt(u))
130+
//
131+
// THE RANGE REDUCTION, HOW?
132+
// 12: Recall [7], u = (1 - x)/2
133+
// 13: Since 0.5 < x <= 1, therefore:
134+
// 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5
135+
//
136+
// Hence, we can reuse the same [0, 0.5] domain polynomial approximation for
137+
// Step [11] as `sqrt(u)` is in range.
138+
// When -1 < x <= -0.5, the identity:
139+
// acos(x) = pi - acos(-x)
140+
// allows us to compute for the negative x value (lhs)
141+
// with a positive x value instead (rhs).
142+
143+
float xf_abs = (xf < 0 ? -xf : xf);
144+
float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f);
145+
float sqrt_u = fputil::sqrt<float>(u);
146+
147+
// Degree-6 minimax polynomial of asin(x) generated by Sollya with:
148+
// > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
149+
float asin_sqrt_u =
150+
sqrt_u * fputil::polyeval(u, 0x1.000002p0f, 0x1.554c2ap-3f,
151+
0x1.3541ccp-4f, 0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
152+
153+
return fputil::cast<float16>(
154+
x_sign ? fputil::multiply_add(-2.0f, asin_sqrt_u, PI) : 2 * asin_sqrt_u);
155+
}
156+
157+
} // namespace math
158+
159+
} // namespace LIBC_NAMESPACE_DECL
160+
161+
#endif // LIBC_TYPES_HAS_FLOAT16
162+
163+
#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOS_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4040,17 +4040,8 @@ add_entrypoint_object(
40404040
HDRS
40414041
../acosf16.h
40424042
DEPENDS
4043-
libc.hdr.errno_macros
4044-
libc.hdr.fenv_macros
4045-
libc.src.__support.FPUtil.cast
4046-
libc.src.__support.FPUtil.except_value_utils
4047-
libc.src.__support.FPUtil.fenv_impl
4048-
libc.src.__support.FPUtil.fp_bits
4049-
libc.src.__support.FPUtil.multiply_add
4050-
libc.src.__support.FPUtil.polyeval
4051-
libc.src.__support.FPUtil.sqrt
4052-
libc.src.__support.macros.optimization
4053-
libc.src.__support.macros.properties.types
4043+
libc.src.__support.math.acosf16
4044+
libc.src.errno.errno
40544045
)
40554046

40564047
add_entrypoint_object(

libc/src/math/generic/acosf16.cpp

Lines changed: 2 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -8,144 +8,10 @@
88
//===----------------------------------------------------------------------===//
99

1010
#include "src/math/acosf16.h"
11-
#include "hdr/errno_macros.h"
12-
#include "hdr/fenv_macros.h"
13-
#include "src/__support/FPUtil/FEnvImpl.h"
14-
#include "src/__support/FPUtil/FPBits.h"
15-
#include "src/__support/FPUtil/PolyEval.h"
16-
#include "src/__support/FPUtil/cast.h"
17-
#include "src/__support/FPUtil/except_value_utils.h"
18-
#include "src/__support/FPUtil/multiply_add.h"
19-
#include "src/__support/FPUtil/sqrt.h"
20-
#include "src/__support/macros/optimization.h"
11+
#include "src/__support/math/acosf16.h"
2112

2213
namespace LIBC_NAMESPACE_DECL {
2314

24-
// Generated by Sollya using the following command:
25-
// > round(pi/2, SG, RN);
26-
// > round(pi, SG, RN);
27-
static constexpr float PI_OVER_2 = 0x1.921fb6p0f;
28-
static constexpr float PI = 0x1.921fb6p1f;
15+
LLVM_LIBC_FUNCTION(float16, acosf16, (float16 x)) { return math::acosf16(x); }
2916

30-
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
31-
static constexpr size_t N_EXCEPTS = 2;
32-
33-
static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ACOSF16_EXCEPTS{{
34-
// (input, RZ output, RU offset, RD offset, RN offset)
35-
{0xacaf, 0x3e93, 1, 0, 0},
36-
{0xb874, 0x4052, 1, 0, 1},
37-
}};
38-
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
39-
40-
LLVM_LIBC_FUNCTION(float16, acosf16, (float16 x)) {
41-
using FPBits = fputil::FPBits<float16>;
42-
FPBits xbits(x);
43-
44-
uint16_t x_u = xbits.uintval();
45-
uint16_t x_abs = x_u & 0x7fff;
46-
uint16_t x_sign = x_u >> 15;
47-
48-
// |x| > 0x1p0, |x| > 1, or x is NaN.
49-
if (LIBC_UNLIKELY(x_abs > 0x3c00)) {
50-
// acosf16(NaN) = NaN
51-
if (xbits.is_nan()) {
52-
if (xbits.is_signaling_nan()) {
53-
fputil::raise_except_if_required(FE_INVALID);
54-
return FPBits::quiet_nan().get_val();
55-
}
56-
57-
return x;
58-
}
59-
60-
// 1 < |x| <= +/-inf
61-
fputil::raise_except_if_required(FE_INVALID);
62-
fputil::set_errno_if_required(EDOM);
63-
64-
return FPBits::quiet_nan().get_val();
65-
}
66-
67-
float xf = x;
68-
69-
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
70-
// Handle exceptional values
71-
if (auto r = ACOSF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
72-
return r.value();
73-
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
74-
75-
// |x| == 0x1p0, x is 1 or -1
76-
// if x is (-)1, return pi, else
77-
// if x is (+)1, return 0
78-
if (LIBC_UNLIKELY(x_abs == 0x3c00))
79-
return fputil::cast<float16>(x_sign ? PI : 0.0f);
80-
81-
float xsq = xf * xf;
82-
83-
// |x| <= 0x1p-1, |x| <= 0.5
84-
if (x_abs <= 0x3800) {
85-
// if x is 0, return pi/2
86-
if (LIBC_UNLIKELY(x_abs == 0))
87-
return fputil::cast<float16>(PI_OVER_2);
88-
89-
// Note that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
90-
// Degree-6 minimax polynomial of asin(x) generated by Sollya with:
91-
// > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
92-
float interm =
93-
fputil::polyeval(xsq, 0x1.000002p0f, 0x1.554c2ap-3f, 0x1.3541ccp-4f,
94-
0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
95-
return fputil::cast<float16>(fputil::multiply_add(-xf, interm, PI_OVER_2));
96-
}
97-
98-
// When |x| > 0.5, assume that 0.5 < |x| <= 1
99-
//
100-
// Step-by-step range-reduction proof:
101-
// 1: Let y = asin(x), such that, x = sin(y)
102-
// 2: From complimentary angle identity:
103-
// x = sin(y) = cos(pi/2 - y)
104-
// 3: Let z = pi/2 - y, such that x = cos(z)
105-
// 4: From double angle formula; cos(2A) = 1 - 2 * sin^2(A):
106-
// z = 2A, z/2 = A
107-
// cos(z) = 1 - 2 * sin^2(z/2)
108-
// 5: Make sin(z/2) subject of the formula:
109-
// sin(z/2) = sqrt((1 - cos(z))/2)
110-
// 6: Recall [3]; x = cos(z). Therefore:
111-
// sin(z/2) = sqrt((1 - x)/2)
112-
// 7: Let u = (1 - x)/2
113-
// 8: Therefore:
114-
// asin(sqrt(u)) = z/2
115-
// 2 * asin(sqrt(u)) = z
116-
// 9: Recall [3]; z = pi/2 - y. Therefore:
117-
// y = pi/2 - z
118-
// y = pi/2 - 2 * asin(sqrt(u))
119-
// 10: Recall [1], y = asin(x). Therefore:
120-
// asin(x) = pi/2 - 2 * asin(sqrt(u))
121-
// 11: Recall that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
122-
// Therefore:
123-
// acos(x) = pi/2 - (pi/2 - 2 * asin(sqrt(u)))
124-
// acos(x) = 2 * asin(sqrt(u))
125-
//
126-
// THE RANGE REDUCTION, HOW?
127-
// 12: Recall [7], u = (1 - x)/2
128-
// 13: Since 0.5 < x <= 1, therefore:
129-
// 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5
130-
//
131-
// Hence, we can reuse the same [0, 0.5] domain polynomial approximation for
132-
// Step [11] as `sqrt(u)` is in range.
133-
// When -1 < x <= -0.5, the identity:
134-
// acos(x) = pi - acos(-x)
135-
// allows us to compute for the negative x value (lhs)
136-
// with a positive x value instead (rhs).
137-
138-
float xf_abs = (xf < 0 ? -xf : xf);
139-
float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f);
140-
float sqrt_u = fputil::sqrt<float>(u);
141-
142-
// Degree-6 minimax polynomial of asin(x) generated by Sollya with:
143-
// > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
144-
float asin_sqrt_u =
145-
sqrt_u * fputil::polyeval(u, 0x1.000002p0f, 0x1.554c2ap-3f,
146-
0x1.3541ccp-4f, 0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
147-
148-
return fputil::cast<float16>(
149-
x_sign ? fputil::multiply_add(-2.0f, asin_sqrt_u, PI) : 2 * asin_sqrt_u);
150-
}
15117
} // namespace LIBC_NAMESPACE_DECL

0 commit comments

Comments
 (0)