*: use more constexpr
[xcsoar.git] / src / Math / fixed.cpp
blob3cec429a116393520ff4405e9988bc5b0ec99e63
1 // Distributed under the Boost Software License, Version 1.0. (See
2 // accompanying file LICENSE_1_0.txt or copy at
3 // http://www.boost.org/LICENSE_1_0.txt)
4 // (C) Copyright 2007 Anthony Williams
5 #include "fixed.hpp"
6 #include "Compiler.h"
8 #ifdef FIXED_MATH
10 static fixed::value_t const internal_pi=0x3243f6a8;
11 static fixed::value_t const internal_two_pi=0x6487ed51;
12 static fixed::value_t const internal_half_pi=0x1921fb54;
13 static fixed::value_t const internal_quarter_pi=0xc90fdaa;
15 fixed& fixed::operator%=(const fixed other)
17 m_nVal = m_nVal%other.m_nVal;
18 return *this;
21 fixed& fixed::operator*=(const fixed val)
23 bool const val_negative=val.m_nVal<0;
24 bool const this_negative=m_nVal<0;
25 bool const negate=val_negative ^ this_negative;
26 uvalue_t const other=val_negative?-val.m_nVal:val.m_nVal;
27 uvalue_t const self=this_negative?-m_nVal:m_nVal;
29 if (uvalue_t const self_upper=(self>>32))
30 m_nVal = (self_upper * other) << (32 - resolution_shift);
31 else
32 m_nVal = 0;
34 if (uvalue_t const self_lower = (self&0xffffffff)) {
35 unsigned long const other_upper=static_cast<unsigned long>(other>>32);
36 unsigned long const other_lower=static_cast<unsigned long>(other&0xffffffff);
37 uvalue_t const lower_self_upper_other_res=self_lower*other_upper;
38 uvalue_t const lower_self_lower_other_res=self_lower*other_lower;
39 m_nVal+=(lower_self_upper_other_res<<(32-resolution_shift))
40 + (lower_self_lower_other_res>>resolution_shift);
43 if (negate)
44 m_nVal = -m_nVal;
46 return *this;
49 fixed& fixed::operator/=(fixed const divisor)
51 /* This is an approximate fixed point division. Problem was: for
52 doing fixed point division, we have to shift the numerator left
53 by "resolution_shift" bits, and divide by then denominator then;
54 the first shift would however overflow. Solution: shift as many
55 bytes left as long as the highest-order bit doesn't get lost, and
56 apply the remaining bits as a "right shift" to the denominator.
57 The result is approximately the same, and for XCSoar, we can
58 neglect the error. */
60 enum {
61 /** number of bits in a value_f */
62 bits = sizeof(value_t) * 8,
65 unsigned shift = resolution_shift;
66 value_t numerator = m_nVal, denominator = divisor.m_nVal;
68 /* shift the numerator left by as many multiple of 7 bits as possible */
69 while (shift >= 7 &&
70 /* check the most significant 8 bits; we can shift by at
71 least 7 bits if there is either 0xff or 0x00 */
72 (((numerator >> (bits - 8)) + 1) & 0xfe) == 0) {
73 shift -= 7;
74 numerator <<= 7;
77 bool small_denominator = denominator >= -resolution * 1024 &&
78 denominator <= resolution * 1024;
80 /* apply the remaining bits to the denominator */
81 if (!small_denominator)
82 denominator >>= shift;
84 /* now do the real division */
85 m_nVal = gcc_likely(denominator != 0)
86 ? numerator / denominator
87 : fixed_max.m_nVal;
89 if (small_denominator)
90 m_nVal <<= shift;
92 return *this;
95 fixed fixed::sqrt() const
97 #ifdef FAST_BUT_IMPRECISE_SQRT
98 /* TODO: this algorithm is too imprecise, with inaccuracy of about
99 2.1%; disabling it for now, until we have a better one */
101 unsigned const max_shift=62;
102 uvalue_t a_squared=1LL<<max_shift;
103 unsigned b_shift=(max_shift+resolution_shift)/2;
104 uvalue_t a=1LL<<b_shift;
106 uvalue_t x=m_nVal;
108 while(b_shift && a_squared>x)
110 a>>=1;
111 a_squared>>=2;
112 --b_shift;
115 uvalue_t remainder=x-a_squared;
116 --b_shift;
118 while(remainder && b_shift) {
119 uvalue_t b_squared=1LL<<(2*b_shift-resolution_shift);
120 int const two_a_b_shift=b_shift+1-resolution_shift;
121 uvalue_t two_a_b=(two_a_b_shift>0)?(a<<two_a_b_shift):(a>>-two_a_b_shift);
123 while (b_shift && remainder<(b_squared+two_a_b)) {
124 b_squared>>=2;
125 two_a_b>>=1;
126 --b_shift;
129 uvalue_t const delta=b_squared+two_a_b;
130 if ((2 * remainder) > delta) {
131 a += 1LL << b_shift;
132 remainder -= delta;
133 if (b_shift)
134 --b_shift;
138 return fixed(internal(), a);
139 #else
140 /* slow fallback */
141 return fixed(::sqrt(as_double()));
142 #endif
145 namespace
147 int const max_power=63-fixed::resolution_shift;
148 fixed::value_t const log_two_power_n_reversed[]={
149 0x18429946ELL,0x1791272EFLL,0x16DFB516FLL,0x162E42FF0LL,0x157CD0E70LL,0x14CB5ECF1LL,0x1419ECB71LL,0x13687A9F2LL,
150 0x12B708872LL,0x1205966F3LL,0x115424573LL,0x10A2B23F4LL,0xFF140274LL,0xF3FCE0F5LL,0xE8E5BF75LL,0xDDCE9DF6LL,
151 0xD2B77C76LL,0xC7A05AF7LL,0xBC893977LL,0xB17217F8LL,0xA65AF679LL,0x9B43D4F9LL,0x902CB379LL,0x851591FaLL,
152 0x79FE707bLL,0x6EE74EFbLL,0x63D02D7BLL,0x58B90BFcLL,0x4DA1EA7CLL,0x428AC8FdLL,0x3773A77DLL,0x2C5C85FeLL,
153 0x2145647ELL,0x162E42FfLL,0xB17217FLL
156 fixed::value_t const log_one_plus_two_power_minus_n[]={
157 0x67CC8FBLL,0x391FEF9LL,0x1E27077LL,0xF85186LL,
158 0x7E0A6CLL,0x3F8151LL,0x1FE02ALL,0xFF805LL,0x7FE01LL,0x3FF80LL,0x1FFE0LL,0xFFF8LL,
159 0x7FFELL,0x4000LL,0x2000LL,0x1000LL,0x800LL,0x400LL,0x200LL,0x100LL,
160 0x80LL,0x40LL,0x20LL,0x10LL,0x8LL,0x4LL,0x2LL,0x1LL
163 fixed::value_t const log_one_over_one_minus_two_power_minus_n[]={
164 0xB172180LL,0x49A5884LL,0x222F1D0LL,0x108598BLL,
165 0x820AECLL,0x408159LL,0x20202BLL,0x100805LL,0x80201LL,0x40080LL,0x20020LL,0x10008LL,
166 0x8002LL,0x4001LL,0x2000LL,0x1000LL,0x800LL,0x400LL,0x200LL,0x100LL,
167 0x80LL,0x40LL,0x20LL,0x10LL,0x8LL,0x4LL,0x2LL,0x1LL
172 fixed fixed::exp() const
174 if(m_nVal>=log_two_power_n_reversed[0])
175 return fixed_max;
177 if(m_nVal<-log_two_power_n_reversed[63-2*resolution_shift])
178 return fixed(internal(),0);
180 if(!m_nVal)
181 return fixed(internal(),resolution);
183 value_t res=resolution;
185 if(m_nVal>0) {
186 int power=max_power;
187 value_t const* log_entry=log_two_power_n_reversed;
188 value_t temp=m_nVal;
189 while (temp && power>(-(int)resolution_shift)) {
190 while (!power || (temp<*log_entry)) {
191 if(!power)
192 log_entry=log_one_plus_two_power_minus_n;
193 else
194 ++log_entry;
195 --power;
197 temp -= *log_entry;
198 if (power < 0)
199 res+=(res>>(-power));
200 else
201 res<<=power;
203 } else {
204 int power=resolution_shift;
205 value_t const* log_entry=log_two_power_n_reversed+(max_power-power);
206 value_t temp=m_nVal;
208 while (temp && power>(-(int)resolution_shift)) {
209 while (!power || (temp>(-*log_entry))) {
210 if (!power)
211 log_entry=log_one_over_one_minus_two_power_minus_n;
212 else
213 ++log_entry;
215 --power;
217 temp+=*log_entry;
218 if (power < 0)
219 res-=(res>>(-power));
220 else
221 res>>=power;
225 return fixed(internal(), res);
228 fixed fixed::log() const
230 if (m_nVal <= 0)
231 return -fixed_max;
233 if (m_nVal == resolution)
234 return fixed(0);
236 uvalue_t temp=m_nVal;
237 int left_shift=0;
238 uvalue_t const scale_position=0x8000000000000000LL;
239 while (temp < scale_position) {
240 ++left_shift;
241 temp<<=1;
244 value_t res=(left_shift<max_power)?
245 log_two_power_n_reversed[left_shift]:
246 -log_two_power_n_reversed[2*max_power-left_shift];
247 unsigned right_shift=1;
248 uvalue_t shifted_temp=temp>>1;
249 while (temp && (right_shift<resolution_shift)) {
250 while (right_shift < resolution_shift &&
251 temp < shifted_temp + scale_position) {
252 shifted_temp>>=1;
253 ++right_shift;
256 temp-=shifted_temp;
257 shifted_temp=temp>>right_shift;
258 res+=log_one_over_one_minus_two_power_minus_n[right_shift-1];
261 return fixed(internal(), res);
264 namespace
266 static constexpr int arctantab[32] = {
267 297197971, 210828714, 124459457, 65760959, 33381290, 16755422, 8385879,
268 4193963, 2097109, 1048571, 524287, 262144, 131072, 65536, 32768, 16384,
269 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0, 0,
272 static constexpr fixed::value_t cordic_scale_factor = 0x22C2DD1C; /* 0.271572 * 2^31*/
274 constexpr int_least32_t scale_cordic_result(fixed::value_t a)
276 return (int_least32_t)((a * cordic_scale_factor) >> 31);
279 constexpr fixed::value_t scale_cordic_result_accurate(fixed::value_t a)
281 return (fixed::value_t)((a * cordic_scale_factor)
282 >> (31 - fixed::accurate_cordic_shift));
285 constexpr int_least32_t right_shift(int_least32_t val, int shift)
287 return (shift<0)?(val<<-shift):(val>>shift);
290 void perform_cordic_rotation_unscaled(int_least32_t &x, int_least32_t &y,
291 int_least32_t theta)
293 const int_least32_t *arctanptr = arctantab;
295 for (int i = -1; i <= (int)fixed::resolution_shift; ++i) {
296 const int_least32_t yshift = right_shift(y, i);
297 const int_least32_t xshift = right_shift(x, i);
299 if (theta < 0) {
300 x += yshift;
301 y -= xshift;
302 theta += *arctanptr++;
303 } else {
304 x -= yshift;
305 y += xshift;
306 theta -= *arctanptr++;
311 void perform_cordic_rotation(int_least32_t &px, int_least32_t &py,
312 int_least32_t theta)
314 perform_cordic_rotation_unscaled(px, py, theta);
315 px = scale_cordic_result(px);
316 py = scale_cordic_result(py);
319 fixed::value_t perform_cordic_rotation_accurate_sin(int_least32_t theta)
321 int_least32_t x_cos = 1 << fixed::resolution_shift;
322 int_least32_t x_sin = 0;
323 perform_cordic_rotation_unscaled(x_cos, x_sin, theta);
324 return scale_cordic_result_accurate(x_sin);
327 void perform_cordic_polarization(int_least32_t &argx, int_least32_t &argy)
329 int_least32_t theta=0;
330 int_least32_t x = argx, y = argy;
331 const int_least32_t *arctanptr = arctantab;
333 for (int i = -1; i <= (int)fixed::resolution_shift; ++i) {
334 const int_least32_t yshift = right_shift(y,i);
335 const int_least32_t xshift = right_shift(x,i);
336 if (y < 0) {
337 y += xshift;
338 x -= yshift;
339 theta -= *arctanptr++;
340 } else {
341 y -= xshift;
342 x += yshift;
343 theta += *arctanptr++;
347 argx = scale_cordic_result(x);
348 argy = theta;
353 * Normalize the value to the range 0 to #internal_two_pi.
355 gcc_const
356 static inline int_least32_t
357 NormalizeInternalAngle(fixed::value_t a)
359 a %= internal_two_pi;
360 if (a < 0)
361 a += internal_two_pi;
362 return a;
365 fixed
366 fixed::accurate_half_sin() const
368 int_least32_t x = NormalizeInternalAngle(m_nVal >> 1);
370 bool negate_sin=false;
372 if( x > internal_pi )
374 x =internal_two_pi-x;
375 negate_sin=true;
377 if(x>internal_half_pi)
379 x=internal_pi-x;
382 const value_t x_sin = perform_cordic_rotation_accurate_sin(x);
383 return fixed(fixed::internal(),
384 (negate_sin? -x_sin:x_sin) );
387 std::pair<fixed, fixed>
388 fixed::sin_cos(fixed theta)
390 int_least32_t x = NormalizeInternalAngle(theta.m_nVal);
392 bool negate_cos=false;
393 bool negate_sin=false;
395 if (x > internal_pi) {
396 x = internal_two_pi - x;
397 negate_sin = true;
400 if (x > internal_half_pi) {
401 x = internal_pi - x;
402 negate_cos = true;
405 int_least32_t x_cos = 1 << resolution_shift, x_sin=0;
406 perform_cordic_rotation(x_cos, x_sin, x);
408 return std::make_pair(fixed(internal(), negate_sin ? -x_sin : x_sin),
409 fixed(internal(), negate_cos ? -x_cos : x_cos));
412 fixed fixed::atan() const
414 fixed r,theta;
415 to_polar(fixed(1), *this, &r, &theta);
416 return theta;
419 fixed fixed::atan2(const fixed y, const fixed x)
421 fixed r,theta;
422 to_polar(x,y, &r, &theta);
423 return theta;
426 void fixed::to_polar(const fixed x, const fixed y, fixed *r, fixed *theta)
428 bool const negative_x = x.m_nVal < 0;
429 bool const negative_y = y.m_nVal < 0;
431 uvalue_t a = negative_x ? -x.m_nVal : x.m_nVal;
432 uvalue_t b = negative_y ? -y.m_nVal : y.m_nVal;
434 unsigned right_shift = 0;
435 const unsigned max_value = 1U << resolution_shift;
437 while (a >= max_value || b >= max_value) {
438 ++right_shift;
439 a >>= 1;
440 b >>= 1;
443 int_least32_t xtemp = (int_least32_t)a;
444 int_least32_t ytemp = (int_least32_t)b;
445 perform_cordic_polarization(xtemp,ytemp);
446 r->m_nVal = value_t(xtemp) << right_shift;
447 theta->m_nVal=ytemp;
449 if (negative_x && negative_y)
450 theta->m_nVal -= internal_pi;
451 else if (negative_x)
452 theta->m_nVal = internal_pi - theta->m_nVal;
453 else if (negative_y)
454 theta->m_nVal = -theta->m_nVal;
458 fixed fixed::sqr() const
460 uvalue_t const self=(m_nVal<0)?-m_nVal:m_nVal;
462 fixed res;
463 uvalue_t const self_upper=(self>>32);
464 if (self_upper)
465 res.m_nVal = (self_upper * self) << (32 - resolution_shift);
466 else
467 res.m_nVal = 0;
469 if (uvalue_t const self_lower = (self&0xffffffff)) {
470 uvalue_t const lower_upper=self_lower*self_upper;
471 uvalue_t const lower_lower=self_lower*self_lower;
472 res.m_nVal+=
473 (lower_upper<<(32-resolution_shift))
474 +(lower_lower>>resolution_shift);
476 return res;
480 static inline
481 fixed rsqrt_guess(fixed x) {
482 union { double f; uint64_t u; } y = {x.as_float()};
483 y.u = 0x5fe6ec85e7de30daLL - (y.u>>1);
484 return fixed(y.f);
488 fixed
489 fixed::rsqrt() const
491 static constexpr fixed threehalfs = fixed(1.5);
493 fixed y(rsqrt_guess(*this));
494 if (y.m_nVal<2) return y;
496 const fixed x2 = fixed(internal(), m_nVal>>1);
497 #define tolerance (1<<10)
498 value_t v_last= y.m_nVal;
499 while (1) {
500 y *= threehalfs-x2*y.sqr();
501 assert(y>= fixed(0));
502 const value_t err = y.m_nVal-v_last;
503 if ((y.m_nVal<2) || ((err>0? err:-err) < tolerance))
504 return y;
505 v_last = y.m_nVal;
509 #endif