src/Math/fixed.cpp

   1 // Distributed under the Boost Software License, Version 1.0. (See
   2 // accompanying file LICENSE_1_0.txt or copy at
   3 // http://www.boost.org/LICENSE_1_0.txt)
   4 // (C) Copyright 2007 Anthony Williams
   5 #include "fixed.hpp"
   6 #include "Compiler.h"
   7
   8 #ifdef FIXED_MATH
   9
  10 static fixed::value_t const internal_pi=0x3243f6a8;
  11 static fixed::value_t const internal_two_pi=0x6487ed51;
  12 static fixed::value_t const internal_half_pi=0x1921fb54;
  13 static fixed::value_t const internal_quarter_pi=0xc90fdaa;
  14
  15 fixed& fixed::operator%=(const fixed other)
  16 {
  17   m_nVal = m_nVal%other.m_nVal;
  18   return *this;
  19 }
  20
  21 fixed& fixed::operator*=(const fixed val)
  22 {
  23   bool const val_negative=val.m_nVal<0;
  24   bool const this_negative=m_nVal<0;
  25   bool const negate=val_negative ^ this_negative;
  26   uvalue_t const other=val_negative?-val.m_nVal:val.m_nVal;
  27   uvalue_t const self=this_negative?-m_nVal:m_nVal;
  28
  29   if (uvalue_t const self_upper=(self>>32))
  30     m_nVal = (self_upper * other) << (32 - resolution_shift);
  31   else
  32     m_nVal = 0;
  33
  34   if (uvalue_t const self_lower = (self&0xffffffff)) {
  35     unsigned long const other_upper=static_cast<unsigned long>(other>>32);
  36     unsigned long const other_lower=static_cast<unsigned long>(other&0xffffffff);
  37     uvalue_t const lower_self_upper_other_res=self_lower*other_upper;
  38     uvalue_t const lower_self_lower_other_res=self_lower*other_lower;
  39     m_nVal+=(lower_self_upper_other_res<<(32-resolution_shift))
  40       + (lower_self_lower_other_res>>resolution_shift);
  41   }
  42
  43   if (negate)
  44     m_nVal = -m_nVal;
  45
  46   return *this;
  47 }
  48
  49 fixed& fixed::operator/=(fixed const divisor)
  50 {
  51   /* This is an approximate fixed point division.  Problem was: for
  52      doing fixed point division, we have to shift the numerator left
  53      by "resolution_shift" bits, and divide by then denominator then;
  54      the first shift would however overflow.  Solution: shift as many
  55      bytes left as long as the highest-order bit doesn't get lost, and
  56      apply the remaining bits as a "right shift" to the denominator.
  57      The result is approximately the same, and for XCSoar, we can
  58      neglect the error. */
  59
  60   enum {
  61     /** number of bits in a value_f */
  62     bits = sizeof(value_t) * 8,
  63   };
  64
  65   unsigned shift = resolution_shift;
  66   value_t numerator = m_nVal, denominator = divisor.m_nVal;
  67
  68   /* shift the numerator left by as many multiple of 7 bits as possible */
  69   while (shift >= 7 &&
  70          /* check the most significant 8 bits; we can shift by at
  71             least 7 bits if there is either 0xff or 0x00 */
  72          (((numerator >> (bits - 8)) + 1) & 0xfe) == 0) {
  73     shift -= 7;
  74     numerator <<= 7;
  75   }
  76
  77   bool small_denominator = denominator >= -resolution * 1024 &&
  78     denominator <= resolution * 1024;
  79
  80   /* apply the remaining bits to the denominator */
  81   if (!small_denominator)
  82     denominator >>= shift;
  83
  84   /* now do the real division */
  85   m_nVal = gcc_likely(denominator != 0)
  86     ? numerator / denominator
  87     : fixed_max.m_nVal;
  88
  89   if (small_denominator)
  90     m_nVal <<= shift;
  91
  92   return *this;
  93 }
  94
  95 fixed fixed::sqrt() const
  96 {
  97 #ifdef FAST_BUT_IMPRECISE_SQRT
  98   /* TODO: this algorithm is too imprecise, with inaccuracy of about
  99      2.1%; disabling it for now, until we have a better one */
 100
 101   unsigned const max_shift=62;
 102   uvalue_t a_squared=1LL<<max_shift;
 103   unsigned b_shift=(max_shift+resolution_shift)/2;
 104   uvalue_t a=1LL<<b_shift;
 105
 106   uvalue_t x=m_nVal;
 107
 108   while(b_shift && a_squared>x)
 109     {
 110       a>>=1;
 111       a_squared>>=2;
 112       --b_shift;
 113     }
 114
 115   uvalue_t remainder=x-a_squared;
 116   --b_shift;
 117
 118   while(remainder && b_shift) {
 119     uvalue_t b_squared=1LL<<(2*b_shift-resolution_shift);
 120     int const two_a_b_shift=b_shift+1-resolution_shift;
 121     uvalue_t two_a_b=(two_a_b_shift>0)?(a<<two_a_b_shift):(a>>-two_a_b_shift);
 122
 123     while (b_shift && remainder<(b_squared+two_a_b)) {
 124       b_squared>>=2;
 125       two_a_b>>=1;
 126       --b_shift;
 127     }
 128
 129     uvalue_t const delta=b_squared+two_a_b;
 130     if ((2 * remainder) > delta) {
 131       a += 1LL << b_shift;
 132       remainder -= delta;
 133       if (b_shift)
 134         --b_shift;
 135     }
 136   }
 137
 138   return fixed(internal(), a);
 139 #else
 140   /* slow fallback */
 141   return fixed(::sqrt(as_double()));
 142 #endif
 143 }
 144
 145 namespace
 146 {
 147   int const max_power=63-fixed::resolution_shift;
 148   fixed::value_t const log_two_power_n_reversed[]={
 149     0x18429946ELL,0x1791272EFLL,0x16DFB516FLL,0x162E42FF0LL,0x157CD0E70LL,0x14CB5ECF1LL,0x1419ECB71LL,0x13687A9F2LL,
 150     0x12B708872LL,0x1205966F3LL,0x115424573LL,0x10A2B23F4LL,0xFF140274LL,0xF3FCE0F5LL,0xE8E5BF75LL,0xDDCE9DF6LL,
 151     0xD2B77C76LL,0xC7A05AF7LL,0xBC893977LL,0xB17217F8LL,0xA65AF679LL,0x9B43D4F9LL,0x902CB379LL,0x851591FaLL,
 152     0x79FE707bLL,0x6EE74EFbLL,0x63D02D7BLL,0x58B90BFcLL,0x4DA1EA7CLL,0x428AC8FdLL,0x3773A77DLL,0x2C5C85FeLL,
 153     0x2145647ELL,0x162E42FfLL,0xB17217FLL
 154   };
 155
 156   fixed::value_t const log_one_plus_two_power_minus_n[]={
 157     0x67CC8FBLL,0x391FEF9LL,0x1E27077LL,0xF85186LL,
 158     0x7E0A6CLL,0x3F8151LL,0x1FE02ALL,0xFF805LL,0x7FE01LL,0x3FF80LL,0x1FFE0LL,0xFFF8LL,
 159     0x7FFELL,0x4000LL,0x2000LL,0x1000LL,0x800LL,0x400LL,0x200LL,0x100LL,
 160     0x80LL,0x40LL,0x20LL,0x10LL,0x8LL,0x4LL,0x2LL,0x1LL
 161   };
 162
 163   fixed::value_t const log_one_over_one_minus_two_power_minus_n[]={
 164     0xB172180LL,0x49A5884LL,0x222F1D0LL,0x108598BLL,
 165     0x820AECLL,0x408159LL,0x20202BLL,0x100805LL,0x80201LL,0x40080LL,0x20020LL,0x10008LL,
 166     0x8002LL,0x4001LL,0x2000LL,0x1000LL,0x800LL,0x400LL,0x200LL,0x100LL,
 167     0x80LL,0x40LL,0x20LL,0x10LL,0x8LL,0x4LL,0x2LL,0x1LL
 168   };
 169 }
 170
 171
 172 fixed fixed::exp() const
 173 {
 174     if(m_nVal>=log_two_power_n_reversed[0])
 175         return fixed_max;
 176
 177     if(m_nVal<-log_two_power_n_reversed[63-2*resolution_shift])
 178         return fixed(internal(),0);
 179
 180     if(!m_nVal)
 181         return fixed(internal(),resolution);
 182
 183     value_t res=resolution;
 184
 185     if(m_nVal>0) {
 186       int power=max_power;
 187       value_t const* log_entry=log_two_power_n_reversed;
 188       value_t temp=m_nVal;
 189       while (temp && power>(-(int)resolution_shift)) {
 190         while (!power || (temp<*log_entry)) {
 191           if(!power)
 192             log_entry=log_one_plus_two_power_minus_n;
 193           else
 194             ++log_entry;
 195           --power;
 196         }
 197         temp -= *log_entry;
 198         if (power < 0)
 199           res+=(res>>(-power));
 200         else
 201           res<<=power;
 202       }
 203     } else {
 204       int power=resolution_shift;
 205       value_t const* log_entry=log_two_power_n_reversed+(max_power-power);
 206       value_t temp=m_nVal;
 207
 208       while (temp && power>(-(int)resolution_shift)) {
 209         while (!power || (temp>(-*log_entry))) {
 210           if (!power)
 211             log_entry=log_one_over_one_minus_two_power_minus_n;
 212           else
 213             ++log_entry;
 214
 215           --power;
 216         }
 217         temp+=*log_entry;
 218         if (power < 0)
 219           res-=(res>>(-power));
 220         else
 221           res>>=power;
 222       }
 223     }
 224
 225     return fixed(internal(), res);
 226 }
 227
 228 fixed fixed::log() const
 229 {
 230   if (m_nVal <= 0)
 231       return -fixed_max;
 232
 233   if (m_nVal == resolution)
 234       return fixed(0);
 235
 236   uvalue_t temp=m_nVal;
 237   int left_shift=0;
 238   uvalue_t const scale_position=0x8000000000000000LL;
 239   while (temp < scale_position) {
 240     ++left_shift;
 241     temp<<=1;
 242   }
 243
 244   value_t res=(left_shift<max_power)?
 245     log_two_power_n_reversed[left_shift]:
 246     -log_two_power_n_reversed[2*max_power-left_shift];
 247   unsigned right_shift=1;
 248   uvalue_t shifted_temp=temp>>1;
 249   while (temp && (right_shift<resolution_shift)) {
 250     while (right_shift < resolution_shift &&
 251            temp < shifted_temp + scale_position) {
 252       shifted_temp>>=1;
 253       ++right_shift;
 254     }
 255
 256     temp-=shifted_temp;
 257     shifted_temp=temp>>right_shift;
 258     res+=log_one_over_one_minus_two_power_minus_n[right_shift-1];
 259   }
 260
 261   return fixed(internal(), res);
 262 }
 263
 264 namespace
 265 {
 266   static constexpr int arctantab[32] = {
 267     297197971, 210828714, 124459457, 65760959, 33381290, 16755422, 8385879,
 268     4193963, 2097109, 1048571, 524287, 262144, 131072, 65536, 32768, 16384,
 269     8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0, 0,
 270   };
 271
 272   static constexpr fixed::value_t cordic_scale_factor = 0x22C2DD1C; /* 0.271572 * 2^31*/
 273
 274   constexpr int_least32_t scale_cordic_result(fixed::value_t a)
 275   {
 276     return (int_least32_t)((a * cordic_scale_factor) >> 31);
 277   }
 278
 279   constexpr fixed::value_t scale_cordic_result_accurate(fixed::value_t a)
 280   {
 281     return (fixed::value_t)((a * cordic_scale_factor)
 282                             >> (31 - fixed::accurate_cordic_shift));
 283   }
 284
 285   constexpr int_least32_t right_shift(int_least32_t val, int shift)
 286   {
 287     return (shift<0)?(val<<-shift):(val>>shift);
 288   }
 289
 290   void perform_cordic_rotation_unscaled(int_least32_t &x, int_least32_t &y,
 291                                         int_least32_t theta)
 292   {
 293     const int_least32_t *arctanptr = arctantab;
 294
 295     for (int i = -1; i <= (int)fixed::resolution_shift; ++i) {
 296       const int_least32_t yshift = right_shift(y, i);
 297       const int_least32_t xshift = right_shift(x, i);
 298
 299       if (theta < 0) {
 300         x += yshift;
 301         y -= xshift;
 302         theta += *arctanptr++;
 303       } else {
 304         x -= yshift;
 305         y += xshift;
 306         theta -= *arctanptr++;
 307       }
 308     }
 309   }
 310
 311   void perform_cordic_rotation(int_least32_t &px, int_least32_t &py,
 312                                int_least32_t theta)
 313   {
 314     perform_cordic_rotation_unscaled(px, py, theta);
 315     px = scale_cordic_result(px);
 316     py = scale_cordic_result(py);
 317   }
 318
 319   fixed::value_t perform_cordic_rotation_accurate_sin(int_least32_t theta)
 320   {
 321     int_least32_t x_cos = 1 << fixed::resolution_shift;
 322     int_least32_t x_sin = 0;
 323     perform_cordic_rotation_unscaled(x_cos, x_sin, theta);
 324     return scale_cordic_result_accurate(x_sin);
 325   }
 326
 327   void perform_cordic_polarization(int_least32_t &argx, int_least32_t &argy)
 328   {
 329     int_least32_t theta=0;
 330     int_least32_t x = argx, y = argy;
 331     const int_least32_t *arctanptr = arctantab;
 332
 333     for (int i = -1; i <= (int)fixed::resolution_shift; ++i) {
 334       const int_least32_t yshift = right_shift(y,i);
 335       const int_least32_t xshift = right_shift(x,i);
 336       if (y < 0) {
 337         y += xshift;
 338         x -= yshift;
 339         theta -= *arctanptr++;
 340       } else {
 341         y -= xshift;
 342         x += yshift;
 343         theta += *arctanptr++;
 344       }
 345     }
 346
 347     argx = scale_cordic_result(x);
 348     argy = theta;
 349   }
 350 }
 351
 352 /**
 353  * Normalize the value to the range 0 to #internal_two_pi.
 354  */
 355 gcc_const
 356 static inline int_least32_t
 357 NormalizeInternalAngle(fixed::value_t a)
 358 {
 359   a %= internal_two_pi;
 360   if (a < 0)
 361     a += internal_two_pi;
 362   return a;
 363 }
 364
 365 fixed
 366 fixed::accurate_half_sin() const
 367 {
 368   int_least32_t x = NormalizeInternalAngle(m_nVal >> 1);
 369
 370   bool negate_sin=false;
 371
 372   if( x > internal_pi )
 373   {
 374     x =internal_two_pi-x;
 375     negate_sin=true;
 376   }
 377   if(x>internal_half_pi)
 378   {
 379     x=internal_pi-x;
 380   }
 381
 382   const value_t x_sin = perform_cordic_rotation_accurate_sin(x);
 383   return fixed(fixed::internal(),
 384                (negate_sin? -x_sin:x_sin) );
 385 }
 386
 387 std::pair<fixed, fixed>
 388 fixed::sin_cos(fixed theta)
 389 {
 390   int_least32_t x = NormalizeInternalAngle(theta.m_nVal);
 391
 392   bool negate_cos=false;
 393   bool negate_sin=false;
 394
 395   if (x > internal_pi) {
 396     x = internal_two_pi - x;
 397     negate_sin = true;
 398   }
 399
 400   if (x > internal_half_pi) {
 401     x = internal_pi - x;
 402     negate_cos = true;
 403   }
 404
 405   int_least32_t x_cos = 1 << resolution_shift, x_sin=0;
 406   perform_cordic_rotation(x_cos, x_sin, x);
 407
 408   return std::make_pair(fixed(internal(), negate_sin ? -x_sin : x_sin),
 409                         fixed(internal(), negate_cos ? -x_cos : x_cos));
 410 }
 411
 412 fixed fixed::atan() const
 413 {
 414   fixed r,theta;
 415   to_polar(fixed(1), *this, &r, &theta);
 416   return theta;
 417 }
 418
 419 fixed fixed::atan2(const fixed y, const fixed x)
 420 {
 421   fixed r,theta;
 422   to_polar(x,y, &r, &theta);
 423   return theta;
 424 }
 425
 426 void fixed::to_polar(const fixed x, const fixed y, fixed *r, fixed *theta)
 427 {
 428   bool const negative_x = x.m_nVal < 0;
 429   bool const negative_y = y.m_nVal < 0;
 430
 431   uvalue_t a = negative_x ? -x.m_nVal : x.m_nVal;
 432   uvalue_t b = negative_y ? -y.m_nVal : y.m_nVal;
 433
 434   unsigned right_shift = 0;
 435   const unsigned max_value = 1U << resolution_shift;
 436
 437   while (a >= max_value || b >= max_value) {
 438     ++right_shift;
 439     a >>= 1;
 440     b >>= 1;
 441   }
 442
 443   int_least32_t xtemp = (int_least32_t)a;
 444   int_least32_t ytemp = (int_least32_t)b;
 445   perform_cordic_polarization(xtemp,ytemp);
 446   r->m_nVal = value_t(xtemp) << right_shift;
 447   theta->m_nVal=ytemp;
 448
 449   if (negative_x && negative_y)
 450     theta->m_nVal -= internal_pi;
 451   else if (negative_x)
 452     theta->m_nVal = internal_pi - theta->m_nVal;
 453   else if (negative_y)
 454     theta->m_nVal = -theta->m_nVal;
 455 }
 456
 457
 458 fixed fixed::sqr() const
 459 {
 460   uvalue_t const self=(m_nVal<0)?-m_nVal:m_nVal;
 461
 462   fixed res;
 463   uvalue_t const self_upper=(self>>32);
 464   if (self_upper)
 465     res.m_nVal = (self_upper * self) << (32 - resolution_shift);
 466   else
 467     res.m_nVal = 0;
 468
 469   if (uvalue_t const self_lower = (self&0xffffffff)) {
 470     uvalue_t const lower_upper=self_lower*self_upper;
 471     uvalue_t const lower_lower=self_lower*self_lower;
 472     res.m_nVal+=
 473       (lower_upper<<(32-resolution_shift))
 474       +(lower_lower>>resolution_shift);
 475   }
 476   return res;
 477 }
 478
 479
 480 static inline
 481 fixed rsqrt_guess(fixed x) {
 482   union { double f; uint64_t u; } y = {x.as_float()};
 483   y.u = 0x5fe6ec85e7de30daLL - (y.u>>1);
 484   return fixed(y.f);
 485 }
 486
 487
 488 fixed
 489 fixed::rsqrt() const
 490 {
 491   static constexpr fixed threehalfs = fixed(1.5);
 492
 493   fixed y(rsqrt_guess(*this));
 494   if (y.m_nVal<2) return y;
 495
 496   const fixed x2 = fixed(internal(), m_nVal>>1);
 497 #define tolerance (1<<10)
 498   value_t v_last= y.m_nVal;
 499   while (1) {
 500     y *= threehalfs-x2*y.sqr();
 501     assert(y>= fixed(0));
 502     const value_t err = y.m_nVal-v_last;
 503     if ((y.m_nVal<2) || ((err>0? err:-err) < tolerance))
 504       return y;
 505     v_last = y.m_nVal;
 506   }
 507 }
 508
 509 #endif