1 // Distributed under the Boost Software License, Version 1.0. (See
2 // accompanying file LICENSE_1_0.txt or copy at
3 // http://www.boost.org/LICENSE_1_0.txt)
4 // (C) Copyright 2007 Anthony Williams
10 static fixed::value_t
const internal_pi
=0x3243f6a8;
11 static fixed::value_t
const internal_two_pi
=0x6487ed51;
12 static fixed::value_t
const internal_half_pi
=0x1921fb54;
13 static fixed::value_t
const internal_quarter_pi
=0xc90fdaa;
15 fixed
& fixed::operator%=(const fixed other
)
17 m_nVal
= m_nVal
%other
.m_nVal
;
21 fixed
& fixed::operator*=(const fixed val
)
23 bool const val_negative
=val
.m_nVal
<0;
24 bool const this_negative
=m_nVal
<0;
25 bool const negate
=val_negative
^ this_negative
;
26 uvalue_t
const other
=val_negative
?-val
.m_nVal
:val
.m_nVal
;
27 uvalue_t
const self
=this_negative
?-m_nVal
:m_nVal
;
29 if (uvalue_t
const self_upper
=(self
>>32))
30 m_nVal
= (self_upper
* other
) << (32 - resolution_shift
);
34 if (uvalue_t
const self_lower
= (self
&0xffffffff)) {
35 unsigned long const other_upper
=static_cast<unsigned long>(other
>>32);
36 unsigned long const other_lower
=static_cast<unsigned long>(other
&0xffffffff);
37 uvalue_t
const lower_self_upper_other_res
=self_lower
*other_upper
;
38 uvalue_t
const lower_self_lower_other_res
=self_lower
*other_lower
;
39 m_nVal
+=(lower_self_upper_other_res
<<(32-resolution_shift
))
40 + (lower_self_lower_other_res
>>resolution_shift
);
49 fixed
& fixed::operator/=(fixed
const divisor
)
51 /* This is an approximate fixed point division. Problem was: for
52 doing fixed point division, we have to shift the numerator left
53 by "resolution_shift" bits, and divide by then denominator then;
54 the first shift would however overflow. Solution: shift as many
55 bytes left as long as the highest-order bit doesn't get lost, and
56 apply the remaining bits as a "right shift" to the denominator.
57 The result is approximately the same, and for XCSoar, we can
61 /** number of bits in a value_f */
62 bits
= sizeof(value_t
) * 8,
65 unsigned shift
= resolution_shift
;
66 value_t numerator
= m_nVal
, denominator
= divisor
.m_nVal
;
68 /* shift the numerator left by as many multiple of 7 bits as possible */
70 /* check the most significant 8 bits; we can shift by at
71 least 7 bits if there is either 0xff or 0x00 */
72 (((numerator
>> (bits
- 8)) + 1) & 0xfe) == 0) {
77 bool small_denominator
= denominator
>= -resolution
* 1024 &&
78 denominator
<= resolution
* 1024;
80 /* apply the remaining bits to the denominator */
81 if (!small_denominator
)
82 denominator
>>= shift
;
84 /* now do the real division */
85 m_nVal
= gcc_likely(denominator
!= 0)
86 ? numerator
/ denominator
89 if (small_denominator
)
95 fixed
fixed::sqrt() const
97 #ifdef FAST_BUT_IMPRECISE_SQRT
98 /* TODO: this algorithm is too imprecise, with inaccuracy of about
99 2.1%; disabling it for now, until we have a better one */
101 unsigned const max_shift
=62;
102 uvalue_t a_squared
=1LL<<max_shift
;
103 unsigned b_shift
=(max_shift
+resolution_shift
)/2;
104 uvalue_t a
=1LL<<b_shift
;
108 while(b_shift
&& a_squared
>x
)
115 uvalue_t remainder
=x
-a_squared
;
118 while(remainder
&& b_shift
) {
119 uvalue_t b_squared
=1LL<<(2*b_shift
-resolution_shift
);
120 int const two_a_b_shift
=b_shift
+1-resolution_shift
;
121 uvalue_t two_a_b
=(two_a_b_shift
>0)?(a
<<two_a_b_shift
):(a
>>-two_a_b_shift
);
123 while (b_shift
&& remainder
<(b_squared
+two_a_b
)) {
129 uvalue_t
const delta
=b_squared
+two_a_b
;
130 if ((2 * remainder
) > delta
) {
138 return fixed(internal(), a
);
141 return fixed(::sqrt(as_double()));
147 int const max_power
=63-fixed::resolution_shift
;
148 fixed::value_t
const log_two_power_n_reversed
[]={
149 0x18429946ELL
,0x1791272EFLL
,0x16DFB516FLL
,0x162E42FF0LL
,0x157CD0E70LL
,0x14CB5ECF1LL
,0x1419ECB71LL
,0x13687A9F2LL
,
150 0x12B708872LL
,0x1205966F3LL
,0x115424573LL
,0x10A2B23F4LL
,0xFF140274LL
,0xF3FCE0F5LL
,0xE8E5BF75LL
,0xDDCE9DF6LL
,
151 0xD2B77C76LL
,0xC7A05AF7LL
,0xBC893977LL
,0xB17217F8LL
,0xA65AF679LL
,0x9B43D4F9LL
,0x902CB379LL
,0x851591FaLL
,
152 0x79FE707bLL
,0x6EE74EFbLL
,0x63D02D7BLL
,0x58B90BFcLL
,0x4DA1EA7CLL
,0x428AC8FdLL
,0x3773A77DLL
,0x2C5C85FeLL
,
153 0x2145647ELL
,0x162E42FfLL
,0xB17217FLL
156 fixed::value_t
const log_one_plus_two_power_minus_n
[]={
157 0x67CC8FBLL
,0x391FEF9LL
,0x1E27077LL
,0xF85186LL
,
158 0x7E0A6CLL
,0x3F8151LL
,0x1FE02ALL
,0xFF805LL
,0x7FE01LL
,0x3FF80LL
,0x1FFE0LL
,0xFFF8LL
,
159 0x7FFELL
,0x4000LL
,0x2000LL
,0x1000LL
,0x800LL
,0x400LL
,0x200LL
,0x100LL
,
160 0x80LL
,0x40LL
,0x20LL
,0x10LL
,0x8LL
,0x4LL
,0x2LL
,0x1LL
163 fixed::value_t
const log_one_over_one_minus_two_power_minus_n
[]={
164 0xB172180LL
,0x49A5884LL
,0x222F1D0LL
,0x108598BLL
,
165 0x820AECLL
,0x408159LL
,0x20202BLL
,0x100805LL
,0x80201LL
,0x40080LL
,0x20020LL
,0x10008LL
,
166 0x8002LL
,0x4001LL
,0x2000LL
,0x1000LL
,0x800LL
,0x400LL
,0x200LL
,0x100LL
,
167 0x80LL
,0x40LL
,0x20LL
,0x10LL
,0x8LL
,0x4LL
,0x2LL
,0x1LL
172 fixed
fixed::exp() const
174 if(m_nVal
>=log_two_power_n_reversed
[0])
177 if(m_nVal
<-log_two_power_n_reversed
[63-2*resolution_shift
])
178 return fixed(internal(),0);
181 return fixed(internal(),resolution
);
183 value_t res
=resolution
;
187 value_t
const* log_entry
=log_two_power_n_reversed
;
189 while (temp
&& power
>(-(int)resolution_shift
)) {
190 while (!power
|| (temp
<*log_entry
)) {
192 log_entry
=log_one_plus_two_power_minus_n
;
199 res
+=(res
>>(-power
));
204 int power
=resolution_shift
;
205 value_t
const* log_entry
=log_two_power_n_reversed
+(max_power
-power
);
208 while (temp
&& power
>(-(int)resolution_shift
)) {
209 while (!power
|| (temp
>(-*log_entry
))) {
211 log_entry
=log_one_over_one_minus_two_power_minus_n
;
219 res
-=(res
>>(-power
));
225 return fixed(internal(), res
);
228 fixed
fixed::log() const
233 if (m_nVal
== resolution
)
236 uvalue_t temp
=m_nVal
;
238 uvalue_t
const scale_position
=0x8000000000000000LL
;
239 while (temp
< scale_position
) {
244 value_t res
=(left_shift
<max_power
)?
245 log_two_power_n_reversed
[left_shift
]:
246 -log_two_power_n_reversed
[2*max_power
-left_shift
];
247 unsigned right_shift
=1;
248 uvalue_t shifted_temp
=temp
>>1;
249 while (temp
&& (right_shift
<resolution_shift
)) {
250 while (right_shift
< resolution_shift
&&
251 temp
< shifted_temp
+ scale_position
) {
257 shifted_temp
=temp
>>right_shift
;
258 res
+=log_one_over_one_minus_two_power_minus_n
[right_shift
-1];
261 return fixed(internal(), res
);
266 static constexpr int arctantab
[32] = {
267 297197971, 210828714, 124459457, 65760959, 33381290, 16755422, 8385879,
268 4193963, 2097109, 1048571, 524287, 262144, 131072, 65536, 32768, 16384,
269 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0, 0,
272 static constexpr fixed::value_t cordic_scale_factor
= 0x22C2DD1C; /* 0.271572 * 2^31*/
274 constexpr int_least32_t scale_cordic_result(fixed::value_t a
)
276 return (int_least32_t)((a
* cordic_scale_factor
) >> 31);
279 constexpr fixed::value_t
scale_cordic_result_accurate(fixed::value_t a
)
281 return (fixed::value_t
)((a
* cordic_scale_factor
)
282 >> (31 - fixed::accurate_cordic_shift
));
285 constexpr int_least32_t right_shift(int_least32_t val
, int shift
)
287 return (shift
<0)?(val
<<-shift
):(val
>>shift
);
290 void perform_cordic_rotation_unscaled(int_least32_t &x
, int_least32_t &y
,
293 const int_least32_t *arctanptr
= arctantab
;
295 for (int i
= -1; i
<= (int)fixed::resolution_shift
; ++i
) {
296 const int_least32_t yshift
= right_shift(y
, i
);
297 const int_least32_t xshift
= right_shift(x
, i
);
302 theta
+= *arctanptr
++;
306 theta
-= *arctanptr
++;
311 void perform_cordic_rotation(int_least32_t &px
, int_least32_t &py
,
314 perform_cordic_rotation_unscaled(px
, py
, theta
);
315 px
= scale_cordic_result(px
);
316 py
= scale_cordic_result(py
);
319 fixed::value_t
perform_cordic_rotation_accurate_sin(int_least32_t theta
)
321 int_least32_t x_cos
= 1 << fixed::resolution_shift
;
322 int_least32_t x_sin
= 0;
323 perform_cordic_rotation_unscaled(x_cos
, x_sin
, theta
);
324 return scale_cordic_result_accurate(x_sin
);
327 void perform_cordic_polarization(int_least32_t &argx
, int_least32_t &argy
)
329 int_least32_t theta
=0;
330 int_least32_t x
= argx
, y
= argy
;
331 const int_least32_t *arctanptr
= arctantab
;
333 for (int i
= -1; i
<= (int)fixed::resolution_shift
; ++i
) {
334 const int_least32_t yshift
= right_shift(y
,i
);
335 const int_least32_t xshift
= right_shift(x
,i
);
339 theta
-= *arctanptr
++;
343 theta
+= *arctanptr
++;
347 argx
= scale_cordic_result(x
);
353 * Normalize the value to the range 0 to #internal_two_pi.
356 static inline int_least32_t
357 NormalizeInternalAngle(fixed::value_t a
)
359 a
%= internal_two_pi
;
361 a
+= internal_two_pi
;
366 fixed::accurate_half_sin() const
368 int_least32_t x
= NormalizeInternalAngle(m_nVal
>> 1);
370 bool negate_sin
=false;
372 if( x
> internal_pi
)
374 x
=internal_two_pi
-x
;
377 if(x
>internal_half_pi
)
382 const value_t x_sin
= perform_cordic_rotation_accurate_sin(x
);
383 return fixed(fixed::internal(),
384 (negate_sin
? -x_sin
:x_sin
) );
387 std::pair
<fixed
, fixed
>
388 fixed::sin_cos(fixed theta
)
390 int_least32_t x
= NormalizeInternalAngle(theta
.m_nVal
);
392 bool negate_cos
=false;
393 bool negate_sin
=false;
395 if (x
> internal_pi
) {
396 x
= internal_two_pi
- x
;
400 if (x
> internal_half_pi
) {
405 int_least32_t x_cos
= 1 << resolution_shift
, x_sin
=0;
406 perform_cordic_rotation(x_cos
, x_sin
, x
);
408 return std::make_pair(fixed(internal(), negate_sin
? -x_sin
: x_sin
),
409 fixed(internal(), negate_cos
? -x_cos
: x_cos
));
412 fixed
fixed::atan() const
415 to_polar(fixed(1), *this, &r
, &theta
);
419 fixed
fixed::atan2(const fixed y
, const fixed x
)
422 to_polar(x
,y
, &r
, &theta
);
426 void fixed::to_polar(const fixed x
, const fixed y
, fixed
*r
, fixed
*theta
)
428 bool const negative_x
= x
.m_nVal
< 0;
429 bool const negative_y
= y
.m_nVal
< 0;
431 uvalue_t a
= negative_x
? -x
.m_nVal
: x
.m_nVal
;
432 uvalue_t b
= negative_y
? -y
.m_nVal
: y
.m_nVal
;
434 unsigned right_shift
= 0;
435 const unsigned max_value
= 1U << resolution_shift
;
437 while (a
>= max_value
|| b
>= max_value
) {
443 int_least32_t xtemp
= (int_least32_t)a
;
444 int_least32_t ytemp
= (int_least32_t)b
;
445 perform_cordic_polarization(xtemp
,ytemp
);
446 r
->m_nVal
= value_t(xtemp
) << right_shift
;
449 if (negative_x
&& negative_y
)
450 theta
->m_nVal
-= internal_pi
;
452 theta
->m_nVal
= internal_pi
- theta
->m_nVal
;
454 theta
->m_nVal
= -theta
->m_nVal
;
458 fixed
fixed::sqr() const
460 uvalue_t
const self
=(m_nVal
<0)?-m_nVal
:m_nVal
;
463 uvalue_t
const self_upper
=(self
>>32);
465 res
.m_nVal
= (self_upper
* self
) << (32 - resolution_shift
);
469 if (uvalue_t
const self_lower
= (self
&0xffffffff)) {
470 uvalue_t
const lower_upper
=self_lower
*self_upper
;
471 uvalue_t
const lower_lower
=self_lower
*self_lower
;
473 (lower_upper
<<(32-resolution_shift
))
474 +(lower_lower
>>resolution_shift
);
481 fixed
rsqrt_guess(fixed x
) {
482 union { double f
; uint64_t u
; } y
= {x
.as_float()};
483 y
.u
= 0x5fe6ec85e7de30daLL
- (y
.u
>>1);
491 static constexpr fixed threehalfs
= fixed(1.5);
493 fixed
y(rsqrt_guess(*this));
494 if (y
.m_nVal
<2) return y
;
496 const fixed x2
= fixed(internal(), m_nVal
>>1);
497 #define tolerance (1<<10)
498 value_t v_last
= y
.m_nVal
;
500 y
*= threehalfs
-x2
*y
.sqr();
501 assert(y
>= fixed(0));
502 const value_t err
= y
.m_nVal
-v_last
;
503 if ((y
.m_nVal
<2) || ((err
>0? err
:-err
) < tolerance
))
509 #elif defined(__BIONIC__)
511 /* Android's Bionic doesn't have sincos(), but with LTO=y, gcc folds
512 sin() and cos() into sincos(); the following kludge works around
513 this problem by providing the symbol */
515 #pragma GCC optimize ("O0")
519 sincos(double x
, double *s
, double *c
);
522 sincos(double x
, double *s
, double *c
)