Skip to content

Commit 63666aa

Browse files
ZvRzyan18christophe-lunarg
authored andcommitted
Improve accuracy
1 parent a04132c commit 63666aa

File tree

1 file changed

+169
-75
lines changed

1 file changed

+169
-75
lines changed

glm/gtx/fast_trigonometry.inl

Lines changed: 169 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -26,36 +26,14 @@ namespace detail
2626
return detail::functor1<vec, L, T, T, Q>::call(cos_52s, x);
2727
}
2828

29-
30-
31-
//polynomials
32-
template<typename T>
33-
GLM_FUNC_QUALIFIER T tanPoly(T x, T sign)
34-
{
35-
return sign * ((((((T(-9.580235e-4) * x + T(1.025743e-2)) * x - T(1.964406e-3)) * x - T(1.656241e-1)) * x - T(2.652585e-4)) * x + T(1.000025e-0)) * x - T(3.973469e-7))
36-
/ ((((((T(-9.580235e-4) * x - T(1.228280e-3)) * x + T(4.313990e-2)) * x - T(8.634596e-4)) * x - T(4.997638e-1)) * x - T(2.400507e-5)) * x + T(1.000000e-0));
37-
}
38-
39-
template<typename T>
40-
GLM_FUNC_QUALIFIER T asinPoly(T x)
41-
{
42-
return ((((((T(1.550104e-1) * x - T(6.284173e-2)) * x + T(5.446719e-2)) * x + T(1.561094e-1)) * x + T(9.531123e-4)) * x + T(9.999680e-1)) * x + T(1.740546e-7));
43-
}
44-
45-
template<typename T>
46-
GLM_FUNC_QUALIFIER T acosPoly(T x)
47-
{
48-
return ((((((T(-1.550104e-1) * x + T(6.284173e-2)) * x - 5.446719e-2) * x - T(1.561094e-1)) * x - T(9.531123e-4)) * x - T(9.999680e-1)) * x + T(1.570796e-0));
49-
}
5029
}//namespace detail
5130

5231
// wrapAngle
5332
template<typename T>
5433
GLM_FUNC_QUALIFIER T wrapAngle(T angle)
5534
{
56-
//glm::trunc() is much faster than glm::mod but i got an error when i try to use it
57-
//return abs<T>(angle - trunc<T>(angle * T(0.15915494309189534561)) * two_pi<T>());
58-
return abs<T>(mod<T>(angle, two_pi<T>()));
35+
return abs<T>(angle - ::trunc(angle * T(0.15915494309189534561)) * two_pi<T>());
36+
//return abs<T>(mod<T>(angle, two_pi<T>()));
5937
}
6038

6139
template<length_t L, typename T, qualifier Q>
@@ -68,6 +46,7 @@ namespace detail
6846
template<typename T>
6947
GLM_FUNC_QUALIFIER T fastCos(T x)
7048
{
49+
/*
7150
T const angle(wrapAngle<T>(x));
7251
7352
if(angle < half_pi<T>())
@@ -78,6 +57,37 @@ namespace detail
7857
return -detail::cos_52s(angle - pi<T>());
7958
8059
return detail::cos_52s(two_pi<T>() - angle);
60+
*/
61+
/*
62+
approximated using "cos(sqrt(x))"
63+
interval [0, pi/2]
64+
*/
65+
T mx, x2, out, C0, C1, C2, C3, C4, C5, C6;
66+
bool sign, flip;
67+
int q;
68+
/* coefficients */
69+
C0 = T( 2.0254959785478004e-09);
70+
C1 = T(-2.7543954056392956e-07);
71+
C2 = T( 2.4801444520278038e-05);
72+
C3 = T(-1.3888888105171723e-03);
73+
C4 = T( 0.4166666664617346e-01);
74+
C5 = T(-4.9999999999799011e-01);
75+
C6 = T( 0.9999999999999678e-00);
76+
/* sign handle */
77+
mx = x < T(0.0) ? -x : x;
78+
/* remainder */
79+
mx = mx - ::trunc(mx * T(0.1591549)) * T(6.283185307);
80+
/* range reduction*/
81+
q = ((int)(mx * T(0.6366197))+1);
82+
flip = (q == 2 || q == 4);
83+
sign = (q == 2 || q == 3);
84+
mx = mx - (T(1.57079632) * T(q-1));
85+
mx = flip ? (mx - T(1.57079632)) : mx;
86+
/*range reduction*/
87+
x2 = mx * mx;
88+
out = ((((((C0 * x2 + C1) * x2 + C2) * x2 + C3) * x2 + C4) * x2 + C5) * x2 + C6);
89+
out = sign ? -out : out;
90+
return out;
8191
}
8292

8393
template<length_t L, typename T, qualifier Q>
@@ -90,7 +100,38 @@ namespace detail
90100
template<typename T>
91101
GLM_FUNC_QUALIFIER T fastSin(T x)
92102
{
93-
return fastCos<T>(half_pi<T>() - x);
103+
//return fastCos<T>(half_pi<T>() - x);
104+
/*
105+
this function is approximated using this formula : (sin(sqrt(x))-sqrt(x)) / (x * sqrt(x))
106+
interval [0, pi/2]
107+
sin(x) ≈ x * x3 * sin_poly(x2)
108+
where : sin_poly(x) = (((C0 * x + C1) * x + C2) * x + C3)
109+
for -x = -sin(abs(x))
110+
*/
111+
T mx, x2, out, C0, C1, C2, C3;
112+
bool sign, flip;
113+
int q;
114+
/* coefficients */
115+
C0 = T( 2.678156924342569e-06);
116+
C1 = T(-1.983369323468157e-04);
117+
C2 = T( 8.333309602749911e-03);
118+
C3 = T(-1.666666655047327e-01);
119+
mx = x;
120+
sign = mx < T(0.0);
121+
mx = sign ? -mx : mx;
122+
/* remainder */
123+
mx = mx - ::trunc(mx * T(0.1591549)) * T(6.283185307);
124+
/* range reduction */
125+
q = ((int)(mx * T(0.6366197))+1);
126+
flip = (q == 2 || q == 4);
127+
sign ^= q > 2;
128+
mx = mx - (T(1.57079632) * T(q-1));
129+
mx = flip ? (T(1.57079632) - mx) : mx;
130+
/* polynomial */
131+
x2 = mx * mx;
132+
out = mx + (x2 * mx) * (((C0 * x2 + C1) * x2 + C2) * x2 + C3);
133+
out = sign ? -out : out;
134+
return out;
94135
}
95136

96137
template<length_t L, typename T, qualifier Q>
@@ -103,28 +144,45 @@ namespace detail
103144
template<typename T>
104145
GLM_FUNC_QUALIFIER T fastTan(T x)
105146
{
106-
T ms = (x < T(0.0)) ? T(-1.0) : T(1.0);
107-
T mx = x * ms;
147+
/*
148+
calculated using "sin(x)/cos(x)"
149+
this is much more efficient because it only perform single
150+
range reduction and remainder operation, unlike separate sin and cos
151+
*/
152+
T mx, sx, cx, out, x2, S0, S1, S2, S3, C0, C1, C2, C3, C4, C5, C6;
153+
bool sign, flip;
154+
int q;
155+
156+
S0 = T( 2.6781569243425690e-06);
157+
S1 = T(-1.9833693234681570e-04);
158+
S2 = T( 8.3333096027499110e-03);
159+
S3 = T(-1.6666666550473270e-01);
160+
C0 = T( 2.0254959785478004e-09);
161+
C1 = T(-2.7543954056392956e-07);
162+
C2 = T( 2.4801444520278038e-05);
163+
C3 = T(-1.3888888105171723e-03);
164+
C4 = T( 0.4166666664617346e-01);
165+
C5 = T(-4.9999999999799011e-01);
166+
C6 = T( 0.9999999999999678e-00);
108167

109-
if(mx > two_pi<T>())
110-
mx = wrapAngle<T>(mx);
111-
112-
switch((int)(mx * T(0.6366197))) //(1.0 / half_pi<T>())
113-
{
114-
case 0:
115-
return detail::tanPoly<T>(mx, ms);
116-
break;
117-
case 1:
118-
return -detail::tanPoly<T>(pi<T>() - mx, ms);
119-
break;
120-
case 2:
121-
return detail::tanPoly<T>(mx - pi<T>(), ms);
122-
break;
123-
case 3:
124-
return -detail::tanPoly<T>(two_pi<T>() - mx, ms);
125-
break;
126-
}
127-
return T(NAN);
168+
mx = x;
169+
sign = mx < T(0.0);
170+
mx = sign ? -mx : mx;
171+
/* remainder */
172+
mx = mx - ::trunc(mx * T(0.1591549)) * T(6.283185307);
173+
/* range reduction */
174+
q = ((int)(mx * T(0.6366197))+1);
175+
flip = (q == 2 || q == 4);
176+
sign ^= flip;
177+
mx = mx - (T(1.57079632) * T(q-1));
178+
mx = flip ? (T(1.57079632) - mx) : mx;
179+
/* polynomial */
180+
x2 = mx * mx;
181+
sx = mx + (x2 * mx) * (((S0 * x2 + S1) * x2 + S2) * x2 + S3);
182+
cx = ((((((C0 * x2 + C1) * x2 + C2) * x2 + C3) * x2 + C4) * x2 + C5) * x2 + C6);
183+
out = sx / cx;
184+
out = sign ? -out : out;
185+
return out;
128186
}
129187

130188
template<length_t L, typename T, qualifier Q>
@@ -137,11 +195,30 @@ namespace detail
137195
template<typename T>
138196
GLM_FUNC_QUALIFIER T fastAsin(T x)
139197
{
140-
T ms = (x < T(0.0)) ? T(-1.0) : T(1.0);
141-
T mx = x * ms;
142-
if(mx > T(0.5))
143-
return ms * (half_pi<T>() - T(2.0) * detail::asinPoly<T>(fastSqrt<T>((T(1.0) - mx) * T(0.5))) );
144-
return ms * detail::asinPoly<T>(mx);
198+
/*
199+
approximated using this formula "(atan(sqrt(x))-sqrt(x)) / (x * sqrt(x))"
200+
for x <= 0.5 asin(x) = x + x^3 + asin_poly(x^2)
201+
for x > 0.5 asin(x) = asin(sqrt((1 - x) / 2))
202+
*/
203+
// TODO : fix glm::fastSqrt, it kills accuracy
204+
T mx, x2, out, C0, C1, C2, C3, C4;
205+
bool sign, more_x;
206+
/* coefficients interval [0, 0.5] */
207+
C0 = T(0.038328305e-00);
208+
C1 = T(0.026433362e-00);
209+
C2 = T(0.045020215e-00);
210+
C3 = T(0.074987613e-00);
211+
C4 = T(0.166666730e-00);
212+
mx = x;
213+
sign = mx < T(0.0);
214+
mx = sign ? -mx : mx;
215+
more_x = mx >= T(0.5);
216+
mx = more_x ? (fastSqrt<T>((T(1.0) - mx) * T(0.5))) : mx;
217+
x2 = mx * mx;
218+
out = mx + (x2 * mx) * ((((C0 * x2 + C1) * x2 + C2) * x2 + C3) * x2 + C4);
219+
out = more_x ? (T(1.57079632) - T(2.0) * out) : out;
220+
out = sign ? -out : out;
221+
return out;
145222
}
146223

147224
template<length_t L, typename T, qualifier Q>
@@ -154,20 +231,8 @@ namespace detail
154231
template<typename T>
155232
GLM_FUNC_QUALIFIER T fastAcos(T x)
156233
{
157-
bool is_negative = (x < T(0.0));
158-
T mx = is_negative ? -x : x;
159-
160-
if(is_negative)
161-
{
162-
if(mx > T(0.5))
163-
return T(2.0) * detail::acosPoly<T>(fastSqrt<T>((T(1.0) - mx) * T(0.5)));
164-
return pi<T>() - detail::acosPoly<T>(mx);
165-
}
166-
if(mx > T(0.5))
167-
return (pi<T>() - T(2.0) * detail::acosPoly<T>(fastSqrt<T>((T(1.0) - mx) * T(0.5))) );
168-
return detail::acosPoly<T>(mx);
234+
return T(1.57079632) - fastAsin<T>(x);
169235
}
170-
171236
template<length_t L, typename T, qualifier Q>
172237
GLM_FUNC_QUALIFIER vec<L, T, Q> fastAcos(vec<L, T, Q> const& x)
173238
{
@@ -178,11 +243,15 @@ namespace detail
178243
template<typename T>
179244
GLM_FUNC_QUALIFIER T fastAtan(T y, T x)
180245
{
181-
if (y < T(0.0) && x < T(0.0))
182-
return fastAtan<T>(y / x) - pi<T>();
183-
else if(y > T(0.0) && x < T(0.0))
184-
return fastAtan<T>(y / x) + pi<T>();
185-
return fastAtan<T>(y / x);
246+
T ratio, mx;
247+
bool both_negative, y_positive;
248+
both_negative = x < T(0.0) && y < T(0.0f);
249+
y_positive = y > T(0.0) && x < T(0.0);
250+
ratio = y / x;
251+
mx = fastAtan<T>(ratio);
252+
mx = both_negative ? (mx - T(3.14159265)) : mx;
253+
mx = y_positive ? (mx + T(3.14159265)) : mx;
254+
return mx;
186255
}
187256

188257
template<length_t L, typename T, qualifier Q>
@@ -194,17 +263,42 @@ namespace detail
194263
template<typename T>
195264
GLM_FUNC_QUALIFIER T fastAtan(T x)
196265
{
197-
T ms = (x < T(0.0)) ? T(-1.0) : T(1.0);
198-
T mx = x * ms;
199-
mx = (mx * fastInverseSqrt<T>(T(1.0) + mx * mx));
200-
if(mx > T(0.5))
201-
return ms * (half_pi<T>() - T(2.0) * detail::asinPoly<T>(fastSqrt<T>((T(1.0) - mx) * T(0.5))));
202-
return ms * detail::asinPoly<T>(mx);
266+
/*
267+
approximated using this formula "(atan(sqrt(x))-sqrt(x)) / (x * sqrt(x))"
268+
interval [0, 0.5]
269+
for x <= 0.5 atan(x) = x*C0 + x2*C1 + x3*C2...
270+
for x > 0.5 && x < 1.0 atan(x) = atan((x - 1) / (1 + x)) + pi/4
271+
272+
for x > 1.0 -> inf atan(x) = asin(x / sqrt(1 + x*x))
273+
better accuracy:
274+
for x > 1.0 -> inf atan(x) = pi_half - atan(1.0 / x)
275+
*/
276+
T mx, x2, out, C0, C1, C2, C3, C4;
277+
bool low, high, sign;
278+
C0 = T(-0.037163095549048668);
279+
C1 = T( 0.092448081545483562);
280+
C2 = T(-0.139917441027514330);
281+
C3 = T( 0.199831512979624430);
282+
C4 = T(-0.333331771776715250);
283+
mx = x;
284+
sign = mx < T(0.0);
285+
mx = sign ? -mx : mx;
286+
high = mx > T(1.0);
287+
mx = high ? (T(1.0) / mx) : mx;
288+
low = mx > 0.5f;
289+
mx = low ? ((mx - T(1.0)) / (T(1.0) + mx)) : mx;
290+
x2 = mx*mx;
291+
out = mx + (x2 * mx) * ((((C0 * x2 + C1) * x2 + C2) * x2 + C3) * x2 + C4);
292+
out = low ? (out + T(0.78539816)) : out;
293+
out = high ? (T(1.57079632) - out) : out;
294+
out = sign ? -out : out;
295+
return out;
203296
}
204297

205298
template<length_t L, typename T, qualifier Q>
206299
GLM_FUNC_QUALIFIER vec<L, T, Q> fastAtan(vec<L, T, Q> const& x)
207300
{
208301
return detail::functor1<vec, L, T, T, Q>::call(fastAtan, x);
209302
}
303+
210304
}//namespace glm

0 commit comments

Comments
 (0)