@@ -26,36 +26,14 @@ namespace detail
2626 return detail::functor1<vec, L, T, T, Q>::call (cos_52s, x);
2727 }
2828
29-
30-
31- // polynomials
32- template <typename T>
33- GLM_FUNC_QUALIFIER T tanPoly (T x, T sign)
34- {
35- return sign * ((((((T (-9.580235e-4 ) * x + T (1.025743e-2 )) * x - T (1.964406e-3 )) * x - T (1.656241e-1 )) * x - T (2.652585e-4 )) * x + T (1.000025e-0 )) * x - T (3.973469e-7 ))
36- / ((((((T (-9.580235e-4 ) * x - T (1.228280e-3 )) * x + T (4.313990e-2 )) * x - T (8.634596e-4 )) * x - T (4.997638e-1 )) * x - T (2.400507e-5 )) * x + T (1.000000e-0 ));
37- }
38-
39- template <typename T>
40- GLM_FUNC_QUALIFIER T asinPoly (T x)
41- {
42- return ((((((T (1.550104e-1 ) * x - T (6.284173e-2 )) * x + T (5.446719e-2 )) * x + T (1.561094e-1 )) * x + T (9.531123e-4 )) * x + T (9.999680e-1 )) * x + T (1.740546e-7 ));
43- }
44-
45- template <typename T>
46- GLM_FUNC_QUALIFIER T acosPoly (T x)
47- {
48- return ((((((T (-1.550104e-1 ) * x + T (6.284173e-2 )) * x - 5.446719e-2 ) * x - T (1.561094e-1 )) * x - T (9.531123e-4 )) * x - T (9.999680e-1 )) * x + T (1.570796e-0 ));
49- }
5029}// namespace detail
5130
5231 // wrapAngle
5332 template <typename T>
5433 GLM_FUNC_QUALIFIER T wrapAngle (T angle)
5534 {
56- // glm::trunc() is much faster than glm::mod but i got an error when i try to use it
57- // return abs<T>(angle - trunc<T>(angle * T(0.15915494309189534561)) * two_pi<T>());
58- return abs<T>(mod<T>(angle, two_pi<T>()));
35+ return abs<T>(angle - ::trunc (angle * T (0.15915494309189534561 )) * two_pi<T>());
36+ // return abs<T>(mod<T>(angle, two_pi<T>()));
5937 }
6038
6139 template <length_t L, typename T, qualifier Q>
@@ -68,6 +46,7 @@ namespace detail
6846 template <typename T>
6947 GLM_FUNC_QUALIFIER T fastCos (T x)
7048 {
49+ /*
7150 T const angle(wrapAngle<T>(x));
7251
7352 if(angle < half_pi<T>())
@@ -78,6 +57,37 @@ namespace detail
7857 return -detail::cos_52s(angle - pi<T>());
7958
8059 return detail::cos_52s(two_pi<T>() - angle);
60+ */
61+ /*
62+ approximated using "cos(sqrt(x))"
63+ interval [0, pi/2]
64+ */
65+ T mx, x2, out, C0, C1, C2, C3, C4, C5, C6;
66+ bool sign, flip;
67+ int q;
68+ /* coefficients */
69+ C0 = T ( 2.0254959785478004e-09 );
70+ C1 = T (-2.7543954056392956e-07 );
71+ C2 = T ( 2.4801444520278038e-05 );
72+ C3 = T (-1.3888888105171723e-03 );
73+ C4 = T ( 0.4166666664617346e-01 );
74+ C5 = T (-4.9999999999799011e-01 );
75+ C6 = T ( 0.9999999999999678e-00 );
76+ /* sign handle */
77+ mx = x < T (0.0 ) ? -x : x;
78+ /* remainder */
79+ mx = mx - ::trunc (mx * T (0.1591549 )) * T (6.283185307 );
80+ /* range reduction*/
81+ q = ((int )(mx * T (0.6366197 ))+1 );
82+ flip = (q == 2 || q == 4 );
83+ sign = (q == 2 || q == 3 );
84+ mx = mx - (T (1.57079632 ) * T (q-1 ));
85+ mx = flip ? (mx - T (1.57079632 )) : mx;
86+ /* range reduction*/
87+ x2 = mx * mx;
88+ out = ((((((C0 * x2 + C1) * x2 + C2) * x2 + C3) * x2 + C4) * x2 + C5) * x2 + C6);
89+ out = sign ? -out : out;
90+ return out;
8191 }
8292
8393 template <length_t L, typename T, qualifier Q>
@@ -90,7 +100,38 @@ namespace detail
90100 template <typename T>
91101 GLM_FUNC_QUALIFIER T fastSin (T x)
92102 {
93- return fastCos<T>(half_pi<T>() - x);
103+ // return fastCos<T>(half_pi<T>() - x);
104+ /*
105+ this function is approximated using this formula : (sin(sqrt(x))-sqrt(x)) / (x * sqrt(x))
106+ interval [0, pi/2]
107+ sin(x) ≈ x * x3 * sin_poly(x2)
108+ where : sin_poly(x) = (((C0 * x + C1) * x + C2) * x + C3)
109+ for -x = -sin(abs(x))
110+ */
111+ T mx, x2, out, C0, C1, C2, C3;
112+ bool sign, flip;
113+ int q;
114+ /* coefficients */
115+ C0 = T ( 2.678156924342569e-06 );
116+ C1 = T (-1.983369323468157e-04 );
117+ C2 = T ( 8.333309602749911e-03 );
118+ C3 = T (-1.666666655047327e-01 );
119+ mx = x;
120+ sign = mx < T (0.0 );
121+ mx = sign ? -mx : mx;
122+ /* remainder */
123+ mx = mx - ::trunc (mx * T (0.1591549 )) * T (6.283185307 );
124+ /* range reduction */
125+ q = ((int )(mx * T (0.6366197 ))+1 );
126+ flip = (q == 2 || q == 4 );
127+ sign ^= q > 2 ;
128+ mx = mx - (T (1.57079632 ) * T (q-1 ));
129+ mx = flip ? (T (1.57079632 ) - mx) : mx;
130+ /* polynomial */
131+ x2 = mx * mx;
132+ out = mx + (x2 * mx) * (((C0 * x2 + C1) * x2 + C2) * x2 + C3);
133+ out = sign ? -out : out;
134+ return out;
94135 }
95136
96137 template <length_t L, typename T, qualifier Q>
@@ -103,28 +144,45 @@ namespace detail
103144 template <typename T>
104145 GLM_FUNC_QUALIFIER T fastTan (T x)
105146 {
106- T ms = (x < T (0.0 )) ? T (-1.0 ) : T (1.0 );
107- T mx = x * ms;
147+ /*
148+ calculated using "sin(x)/cos(x)"
149+ this is much more efficient because it only perform single
150+ range reduction and remainder operation, unlike separate sin and cos
151+ */
152+ T mx, sx, cx, out, x2, S0, S1, S2, S3, C0, C1, C2, C3, C4, C5, C6;
153+ bool sign, flip;
154+ int q;
155+
156+ S0 = T ( 2.6781569243425690e-06 );
157+ S1 = T (-1.9833693234681570e-04 );
158+ S2 = T ( 8.3333096027499110e-03 );
159+ S3 = T (-1.6666666550473270e-01 );
160+ C0 = T ( 2.0254959785478004e-09 );
161+ C1 = T (-2.7543954056392956e-07 );
162+ C2 = T ( 2.4801444520278038e-05 );
163+ C3 = T (-1.3888888105171723e-03 );
164+ C4 = T ( 0.4166666664617346e-01 );
165+ C5 = T (-4.9999999999799011e-01 );
166+ C6 = T ( 0.9999999999999678e-00 );
108167
109- if (mx > two_pi<T>())
110- mx = wrapAngle<T>(mx);
111-
112- switch ((int )(mx * T (0.6366197 ))) // (1.0 / half_pi<T>())
113- {
114- case 0 :
115- return detail::tanPoly<T>(mx, ms);
116- break ;
117- case 1 :
118- return -detail::tanPoly<T>(pi<T>() - mx, ms);
119- break ;
120- case 2 :
121- return detail::tanPoly<T>(mx - pi<T>(), ms);
122- break ;
123- case 3 :
124- return -detail::tanPoly<T>(two_pi<T>() - mx, ms);
125- break ;
126- }
127- return T (NAN);
168+ mx = x;
169+ sign = mx < T (0.0 );
170+ mx = sign ? -mx : mx;
171+ /* remainder */
172+ mx = mx - ::trunc (mx * T (0.1591549 )) * T (6.283185307 );
173+ /* range reduction */
174+ q = ((int )(mx * T (0.6366197 ))+1 );
175+ flip = (q == 2 || q == 4 );
176+ sign ^= flip;
177+ mx = mx - (T (1.57079632 ) * T (q-1 ));
178+ mx = flip ? (T (1.57079632 ) - mx) : mx;
179+ /* polynomial */
180+ x2 = mx * mx;
181+ sx = mx + (x2 * mx) * (((S0 * x2 + S1) * x2 + S2) * x2 + S3);
182+ cx = ((((((C0 * x2 + C1) * x2 + C2) * x2 + C3) * x2 + C4) * x2 + C5) * x2 + C6);
183+ out = sx / cx;
184+ out = sign ? -out : out;
185+ return out;
128186 }
129187
130188 template <length_t L, typename T, qualifier Q>
@@ -137,11 +195,30 @@ namespace detail
137195 template <typename T>
138196 GLM_FUNC_QUALIFIER T fastAsin (T x)
139197 {
140- T ms = (x < T (0.0 )) ? T (-1.0 ) : T (1.0 );
141- T mx = x * ms;
142- if (mx > T (0.5 ))
143- return ms * (half_pi<T>() - T (2.0 ) * detail::asinPoly<T>(fastSqrt<T>((T (1.0 ) - mx) * T (0.5 ))) );
144- return ms * detail::asinPoly<T>(mx);
198+ /*
199+ approximated using this formula "(atan(sqrt(x))-sqrt(x)) / (x * sqrt(x))"
200+ for x <= 0.5 asin(x) = x + x^3 + asin_poly(x^2)
201+ for x > 0.5 asin(x) = asin(sqrt((1 - x) / 2))
202+ */
203+ // TODO : fix glm::fastSqrt, it kills accuracy
204+ T mx, x2, out, C0, C1, C2, C3, C4;
205+ bool sign, more_x;
206+ /* coefficients interval [0, 0.5] */
207+ C0 = T (0.038328305e-00 );
208+ C1 = T (0.026433362e-00 );
209+ C2 = T (0.045020215e-00 );
210+ C3 = T (0.074987613e-00 );
211+ C4 = T (0.166666730e-00 );
212+ mx = x;
213+ sign = mx < T (0.0 );
214+ mx = sign ? -mx : mx;
215+ more_x = mx >= T (0.5 );
216+ mx = more_x ? (fastSqrt<T>((T (1.0 ) - mx) * T (0.5 ))) : mx;
217+ x2 = mx * mx;
218+ out = mx + (x2 * mx) * ((((C0 * x2 + C1) * x2 + C2) * x2 + C3) * x2 + C4);
219+ out = more_x ? (T (1.57079632 ) - T (2.0 ) * out) : out;
220+ out = sign ? -out : out;
221+ return out;
145222 }
146223
147224 template <length_t L, typename T, qualifier Q>
@@ -154,20 +231,8 @@ namespace detail
154231 template <typename T>
155232 GLM_FUNC_QUALIFIER T fastAcos (T x)
156233 {
157- bool is_negative = (x < T (0.0 ));
158- T mx = is_negative ? -x : x;
159-
160- if (is_negative)
161- {
162- if (mx > T (0.5 ))
163- return T (2.0 ) * detail::acosPoly<T>(fastSqrt<T>((T (1.0 ) - mx) * T (0.5 )));
164- return pi<T>() - detail::acosPoly<T>(mx);
165- }
166- if (mx > T (0.5 ))
167- return (pi<T>() - T (2.0 ) * detail::acosPoly<T>(fastSqrt<T>((T (1.0 ) - mx) * T (0.5 ))) );
168- return detail::acosPoly<T>(mx);
234+ return T (1.57079632 ) - fastAsin<T>(x);
169235 }
170-
171236 template <length_t L, typename T, qualifier Q>
172237 GLM_FUNC_QUALIFIER vec<L, T, Q> fastAcos (vec<L, T, Q> const & x)
173238 {
@@ -178,11 +243,15 @@ namespace detail
178243 template <typename T>
179244 GLM_FUNC_QUALIFIER T fastAtan (T y, T x)
180245 {
181- if (y < T (0.0 ) && x < T (0.0 ))
182- return fastAtan<T>(y / x) - pi<T>();
183- else if (y > T (0.0 ) && x < T (0.0 ))
184- return fastAtan<T>(y / x) + pi<T>();
185- return fastAtan<T>(y / x);
246+ T ratio, mx;
247+ bool both_negative, y_positive;
248+ both_negative = x < T (0.0 ) && y < T (0 .0f );
249+ y_positive = y > T (0.0 ) && x < T (0.0 );
250+ ratio = y / x;
251+ mx = fastAtan<T>(ratio);
252+ mx = both_negative ? (mx - T (3.14159265 )) : mx;
253+ mx = y_positive ? (mx + T (3.14159265 )) : mx;
254+ return mx;
186255 }
187256
188257 template <length_t L, typename T, qualifier Q>
@@ -194,17 +263,42 @@ namespace detail
194263 template <typename T>
195264 GLM_FUNC_QUALIFIER T fastAtan (T x)
196265 {
197- T ms = (x < T (0.0 )) ? T (-1.0 ) : T (1.0 );
198- T mx = x * ms;
199- mx = (mx * fastInverseSqrt<T>(T (1.0 ) + mx * mx));
200- if (mx > T (0.5 ))
201- return ms * (half_pi<T>() - T (2.0 ) * detail::asinPoly<T>(fastSqrt<T>((T (1.0 ) - mx) * T (0.5 ))));
202- return ms * detail::asinPoly<T>(mx);
266+ /*
267+ approximated using this formula "(atan(sqrt(x))-sqrt(x)) / (x * sqrt(x))"
268+ interval [0, 0.5]
269+ for x <= 0.5 atan(x) = x*C0 + x2*C1 + x3*C2...
270+ for x > 0.5 && x < 1.0 atan(x) = atan((x - 1) / (1 + x)) + pi/4
271+
272+ for x > 1.0 -> inf atan(x) = asin(x / sqrt(1 + x*x))
273+ better accuracy:
274+ for x > 1.0 -> inf atan(x) = pi_half - atan(1.0 / x)
275+ */
276+ T mx, x2, out, C0, C1, C2, C3, C4;
277+ bool low, high, sign;
278+ C0 = T (-0.037163095549048668 );
279+ C1 = T ( 0.092448081545483562 );
280+ C2 = T (-0.139917441027514330 );
281+ C3 = T ( 0.199831512979624430 );
282+ C4 = T (-0.333331771776715250 );
283+ mx = x;
284+ sign = mx < T (0.0 );
285+ mx = sign ? -mx : mx;
286+ high = mx > T (1.0 );
287+ mx = high ? (T (1.0 ) / mx) : mx;
288+ low = mx > 0 .5f ;
289+ mx = low ? ((mx - T (1.0 )) / (T (1.0 ) + mx)) : mx;
290+ x2 = mx*mx;
291+ out = mx + (x2 * mx) * ((((C0 * x2 + C1) * x2 + C2) * x2 + C3) * x2 + C4);
292+ out = low ? (out + T (0.78539816 )) : out;
293+ out = high ? (T (1.57079632 ) - out) : out;
294+ out = sign ? -out : out;
295+ return out;
203296 }
204297
205298 template <length_t L, typename T, qualifier Q>
206299 GLM_FUNC_QUALIFIER vec<L, T, Q> fastAtan (vec<L, T, Q> const & x)
207300 {
208301 return detail::functor1<vec, L, T, T, Q>::call (fastAtan, x);
209302 }
303+
210304}// namespace glm
0 commit comments