- fixed endianness problems at PCI DMA block transfers using getHostMemAddr()
[bochs-mirror.git] / fpu / softfloat-macros.h
blob34f87c698644cab26b9679fdb8a16f470a48ad30
1 /*============================================================================
2 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
3 Arithmetic Package, Release 2b.
5 Written by John R. Hauser. This work was made possible in part by the
6 International Computer Science Institute, located at Suite 600, 1947 Center
7 Street, Berkeley, California 94704. Funding was partially provided by the
8 National Science Foundation under grant MIP-9311980. The original version
9 of this code was written as part of a project to build a fixed-point vector
10 processor in collaboration with the University of California at Berkeley,
11 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
12 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
13 arithmetic/SoftFloat.html'.
15 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
16 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
17 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
18 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
19 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
20 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
21 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
22 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
24 Derivative works are acceptable, even for commercial purposes, so long as
25 (1) the source code for the derivative work includes prominent notice that
26 the work is derivative, and (2) the source code includes prominent notice with
27 these four paragraphs for those parts of this code that are retained.
28 =============================================================================*/
30 /*============================================================================
31 * Adapted for Bochs (x86 achitecture simulator) by
32 * Stanislav Shwartsman [sshwarts at sourceforge net]
33 * ==========================================================================*/
35 #ifndef _SOFTFLOAT_MACROS_H_
36 #define _SOFTFLOAT_MACROS_H_
38 /*----------------------------------------------------------------------------
39 | Shifts `a' right by the number of bits given in `count'. If any nonzero
40 | bits are shifted off, they are ``jammed'' into the least significant bit of
41 | the result by setting the least significant bit to 1. The value of `count'
42 | can be arbitrarily large; in particular, if `count' is greater than 32, the
43 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
44 | The result is stored in the location pointed to by `zPtr'.
45 *----------------------------------------------------------------------------*/
47 BX_CPP_INLINE Bit32u shift32RightJamming(Bit32u a, int count)
49 Bit32u z;
51 if (count == 0) {
52 z = a;
54 else if (count < 32) {
55 z = (a>>count) | ((a<<((-count) & 31)) != 0);
57 else {
58 z = (a != 0);
61 return z;
64 /*----------------------------------------------------------------------------
65 | Shifts `a' right by the number of bits given in `count'. If any nonzero
66 | bits are shifted off, they are ``jammed'' into the least significant bit of
67 | the result by setting the least significant bit to 1. The value of `count'
68 | can be arbitrarily large; in particular, if `count' is greater than 64, the
69 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
70 | The result is stored in the location pointed to by `zPtr'.
71 *----------------------------------------------------------------------------*/
73 BX_CPP_INLINE Bit64u shift64RightJamming(Bit64u a, int count)
75 Bit64u z;
77 if (count == 0) {
78 z = a;
80 else if (count < 64) {
81 z = (a>>count) | ((a << ((-count) & 63)) != 0);
83 else {
84 z = (a != 0);
87 return z;
90 /*----------------------------------------------------------------------------
91 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
92 | _plus_ the number of bits given in `count'. The shifted result is at most
93 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
94 | bits shifted off form a second 64-bit result as follows: The _last_ bit
95 | shifted off is the most-significant bit of the extra result, and the other
96 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
97 | bits shifted off were all zero. This extra result is stored in the location
98 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
99 | (This routine makes more sense if `a0' and `a1' are considered to form
100 | a fixed-point value with binary point between `a0' and `a1'. This fixed-
101 | point value is shifted right by the number of bits given in `count', and
102 | the integer part of the result is returned at the location pointed to by
103 | `z0Ptr'. The fractional part of the result may be slightly corrupted as
104 | described above, and is returned at the location pointed to by `z1Ptr'.)
105 *----------------------------------------------------------------------------*/
107 BX_CPP_INLINE void
108 shift64ExtraRightJamming(
109 Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
111 Bit64u z0, z1;
112 int negCount = (-count) & 63;
114 if (count == 0) {
115 z1 = a1;
116 z0 = a0;
118 else if (count < 64) {
119 z1 = (a0<<negCount) | (a1 != 0);
120 z0 = a0>>count;
122 else {
123 if (count == 64) {
124 z1 = a0 | (a1 != 0);
126 else {
127 z1 = ((a0 | a1) != 0);
129 z0 = 0;
131 *z1Ptr = z1;
132 *z0Ptr = z0;
135 /*----------------------------------------------------------------------------
136 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
137 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
138 | any carry out is lost. The result is broken into two 64-bit pieces which
139 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
140 *----------------------------------------------------------------------------*/
142 BX_CPP_INLINE void
143 add128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
145 Bit64u z1 = a1 + b1;
146 *z1Ptr = z1;
147 *z0Ptr = a0 + b0 + (z1 < a1);
150 /*----------------------------------------------------------------------------
151 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
152 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
153 | 2^128, so any borrow out (carry out) is lost. The result is broken into two
154 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
155 | `z1Ptr'.
156 *----------------------------------------------------------------------------*/
158 BX_CPP_INLINE void
159 sub128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
161 *z1Ptr = a1 - b1;
162 *z0Ptr = a0 - b0 - (a1 < b1);
165 /*----------------------------------------------------------------------------
166 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
167 | into two 64-bit pieces which are stored at the locations pointed to by
168 | `z0Ptr' and `z1Ptr'.
169 *----------------------------------------------------------------------------*/
171 BX_CPP_INLINE void mul64To128(Bit64u a, Bit64u b, Bit64u *z0Ptr, Bit64u *z1Ptr)
173 Bit32u aHigh, aLow, bHigh, bLow;
174 Bit64u z0, zMiddleA, zMiddleB, z1;
176 aLow = (Bit32u) a;
177 aHigh = (Bit32u)(a>>32);
178 bLow = (Bit32u) b;
179 bHigh = (Bit32u)(b>>32);
180 z1 = ((Bit64u) aLow) * bLow;
181 zMiddleA = ((Bit64u) aLow) * bHigh;
182 zMiddleB = ((Bit64u) aHigh) * bLow;
183 z0 = ((Bit64u) aHigh) * bHigh;
184 zMiddleA += zMiddleB;
185 z0 += (((Bit64u) (zMiddleA < zMiddleB))<<32) + (zMiddleA>>32);
186 zMiddleA <<= 32;
187 z1 += zMiddleA;
188 z0 += (z1 < zMiddleA);
189 *z1Ptr = z1;
190 *z0Ptr = z0;
193 /*----------------------------------------------------------------------------
194 | Returns an approximation to the 64-bit integer quotient obtained by dividing
195 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
196 | divisor `b' must be at least 2^63. If q is the exact quotient truncated
197 | toward zero, the approximation returned lies between q and q + 2 inclusive.
198 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
199 | unsigned integer is returned.
200 *----------------------------------------------------------------------------*/
202 #ifdef USE_estimateDiv128To64
203 static Bit64u estimateDiv128To64(Bit64u a0, Bit64u a1, Bit64u b)
205 Bit64u b0, b1;
206 Bit64u rem0, rem1, term0, term1;
207 Bit64u z;
209 if (b <= a0) return BX_CONST64(0xFFFFFFFFFFFFFFFF);
210 b0 = b>>32;
211 z = (b0<<32 <= a0) ? BX_CONST64(0xFFFFFFFF00000000) : (a0 / b0)<<32;
212 mul64To128(b, z, &term0, &term1);
213 sub128(a0, a1, term0, term1, &rem0, &rem1);
214 while (((Bit64s) rem0) < 0) {
215 z -= BX_CONST64(0x100000000);
216 b1 = b<<32;
217 add128(rem0, rem1, b0, b1, &rem0, &rem1);
219 rem0 = (rem0<<32) | (rem1>>32);
220 z |= (b0<<32 <= rem0) ? 0xFFFFFFFF : rem0 / b0;
221 return z;
223 #endif
225 /*----------------------------------------------------------------------------
226 | Returns an approximation to the square root of the 32-bit significand given
227 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
228 | `aExp' (the least significant bit) is 1, the integer returned approximates
229 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
230 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
231 | case, the approximation returned lies strictly within +/-2 of the exact
232 | value.
233 *----------------------------------------------------------------------------*/
235 #ifdef USE_estimateSqrt32
236 static Bit32u estimateSqrt32(Bit16s aExp, Bit32u a)
238 static const Bit16u sqrtOddAdjustments[] = {
239 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
240 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
242 static const Bit16u sqrtEvenAdjustments[] = {
243 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
244 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
246 Bit32u z;
248 int index = (a>>27) & 15;
249 if (aExp & 1) {
250 z = 0x4000 + (a>>17) - sqrtOddAdjustments[index];
251 z = ((a / z)<<14) + (z<<15);
252 a >>= 1;
254 else {
255 z = 0x8000 + (a>>17) - sqrtEvenAdjustments[index];
256 z = a / z + z;
257 z = (0x20000 <= z) ? 0xFFFF8000 : (z<<15);
258 if (z <= a) return (Bit32u) (((Bit32s) a)>>1);
260 return ((Bit32u) ((((Bit64u) a)<<31) / z)) + (z>>1);
262 #endif
264 /*----------------------------------------------------------------------------
265 | Returns the number of leading 0 bits before the most-significant 1 bit of
266 | `a'. If `a' is zero, 32 is returned.
267 *----------------------------------------------------------------------------*/
269 static int countLeadingZeros32(Bit32u a)
271 static const int countLeadingZerosHigh[] = {
272 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
273 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
274 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
275 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
276 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
277 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
278 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
279 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
281 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
282 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
283 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
284 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
285 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
286 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
287 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
289 int shiftCount = 0;
290 if (a < 0x10000) {
291 shiftCount += 16;
292 a <<= 16;
294 if (a < 0x1000000) {
295 shiftCount += 8;
296 a <<= 8;
298 shiftCount += countLeadingZerosHigh[ a>>24 ];
299 return shiftCount;
302 /*----------------------------------------------------------------------------
303 | Returns the number of leading 0 bits before the most-significant 1 bit of
304 | `a'. If `a' is zero, 64 is returned.
305 *----------------------------------------------------------------------------*/
307 BX_CPP_INLINE int countLeadingZeros64(Bit64u a)
309 int shiftCount = 0;
310 if (a < ((Bit64u) 1)<<32) {
311 shiftCount += 32;
313 else {
314 a >>= 32;
316 shiftCount += countLeadingZeros32((int)(a));
317 return shiftCount;
320 #ifdef FLOATX80
322 /*----------------------------------------------------------------------------
323 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
324 | number of bits given in `count'. Any bits shifted off are lost. The value
325 | of `count' can be arbitrarily large; in particular, if `count' is greater
326 | than 128, the result will be 0. The result is broken into two 64-bit pieces
327 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
328 *----------------------------------------------------------------------------*/
330 BX_CPP_INLINE void
331 shift128Right(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
333 Bit64u z0, z1;
334 int negCount = (-count) & 63;
336 if (count == 0) {
337 z1 = a1;
338 z0 = a0;
340 else if (count < 64) {
341 z1 = (a0<<negCount) | (a1>>count);
342 z0 = a0>>count;
344 else {
345 z1 = (count < 64) ? (a0>>(count & 63)) : 0;
346 z0 = 0;
348 *z1Ptr = z1;
349 *z0Ptr = z0;
352 /*----------------------------------------------------------------------------
353 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
354 | number of bits given in `count'. If any nonzero bits are shifted off, they
355 | are ``jammed'' into the least significant bit of the result by setting the
356 | least significant bit to 1. The value of `count' can be arbitrarily large;
357 | in particular, if `count' is greater than 128, the result will be either
358 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
359 | nonzero. The result is broken into two 64-bit pieces which are stored at
360 | the locations pointed to by `z0Ptr' and `z1Ptr'.
361 *----------------------------------------------------------------------------*/
363 BX_CPP_INLINE void
364 shift128RightJamming(
365 Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
367 Bit64u z0, z1;
368 int negCount = (-count) & 63;
370 if (count == 0) {
371 z1 = a1;
372 z0 = a0;
374 else if (count < 64) {
375 z1 = (a0<<negCount) | (a1>>count) | ((a1<<negCount) != 0);
376 z0 = a0>>count;
378 else {
379 if (count == 64) {
380 z1 = a0 | (a1 != 0);
382 else if (count < 128) {
383 z1 = (a0>>(count & 63)) | (((a0<<negCount) | a1) != 0);
385 else {
386 z1 = ((a0 | a1) != 0);
388 z0 = 0;
390 *z1Ptr = z1;
391 *z0Ptr = z0;
394 /*----------------------------------------------------------------------------
395 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
396 | number of bits given in `count'. Any bits shifted off are lost. The value
397 | of `count' must be less than 64. The result is broken into two 64-bit
398 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
399 *----------------------------------------------------------------------------*/
401 BX_CPP_INLINE void
402 shortShift128Left(
403 Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
405 *z1Ptr = a1<<count;
406 *z0Ptr = (count == 0) ? a0 : (a0<<count) | (a1>>((-count) & 63));
409 /*----------------------------------------------------------------------------
410 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
411 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
412 | modulo 2^192, so any carry out is lost. The result is broken into three
413 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
414 | `z1Ptr', and `z2Ptr'.
415 *----------------------------------------------------------------------------*/
417 BX_CPP_INLINE void add192(
418 Bit64u a0,
419 Bit64u a1,
420 Bit64u a2,
421 Bit64u b0,
422 Bit64u b1,
423 Bit64u b2,
424 Bit64u *z0Ptr,
425 Bit64u *z1Ptr,
426 Bit64u *z2Ptr
429 Bit64u z0, z1, z2;
430 unsigned carry0, carry1;
432 z2 = a2 + b2;
433 carry1 = (z2 < a2);
434 z1 = a1 + b1;
435 carry0 = (z1 < a1);
436 z0 = a0 + b0;
437 z1 += carry1;
438 z0 += (z1 < carry1);
439 z0 += carry0;
440 *z2Ptr = z2;
441 *z1Ptr = z1;
442 *z0Ptr = z0;
445 /*----------------------------------------------------------------------------
446 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
447 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
448 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
449 | result is broken into three 64-bit pieces which are stored at the locations
450 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
451 *----------------------------------------------------------------------------*/
453 BX_CPP_INLINE void sub192(
454 Bit64u a0,
455 Bit64u a1,
456 Bit64u a2,
457 Bit64u b0,
458 Bit64u b1,
459 Bit64u b2,
460 Bit64u *z0Ptr,
461 Bit64u *z1Ptr,
462 Bit64u *z2Ptr
465 Bit64u z0, z1, z2;
466 unsigned borrow0, borrow1;
468 z2 = a2 - b2;
469 borrow1 = (a2 < b2);
470 z1 = a1 - b1;
471 borrow0 = (a1 < b1);
472 z0 = a0 - b0;
473 z0 -= (z1 < borrow1);
474 z1 -= borrow1;
475 z0 -= borrow0;
476 *z2Ptr = z2;
477 *z1Ptr = z1;
478 *z0Ptr = z0;
481 /*----------------------------------------------------------------------------
482 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
483 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
484 | Otherwise, returns 0.
485 *----------------------------------------------------------------------------*/
487 BX_CPP_INLINE int eq128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
489 return (a0 == b0) && (a1 == b1);
492 /*----------------------------------------------------------------------------
493 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
494 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
495 | Otherwise, returns 0.
496 *----------------------------------------------------------------------------*/
498 BX_CPP_INLINE int le128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
500 return (a0 < b0) || ((a0 == b0) && (a1 <= b1));
503 /*----------------------------------------------------------------------------
504 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
505 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
506 | returns 0.
507 *----------------------------------------------------------------------------*/
509 BX_CPP_INLINE int lt128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
511 return (a0 < b0) || ((a0 == b0) && (a1 < b1));
514 #endif /* FLOATX80 */
516 /*----------------------------------------------------------------------------
517 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
518 | `b' to obtain a 192-bit product. The product is broken into three 64-bit
519 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
520 | `z2Ptr'.
521 *----------------------------------------------------------------------------*/
523 BX_CPP_INLINE void mul128By64To192(
524 Bit64u a0,
525 Bit64u a1,
526 Bit64u b,
527 Bit64u *z0Ptr,
528 Bit64u *z1Ptr,
529 Bit64u *z2Ptr
532 Bit64u z0, z1, z2, more1;
534 mul64To128(a1, b, &z1, &z2);
535 mul64To128(a0, b, &z0, &more1);
536 add128(z0, more1, 0, z1, &z0, &z1);
537 *z2Ptr = z2;
538 *z1Ptr = z1;
539 *z0Ptr = z0;
542 #ifdef FLOAT128
544 /*----------------------------------------------------------------------------
545 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
546 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
547 | product. The product is broken into four 64-bit pieces which are stored at
548 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
549 *----------------------------------------------------------------------------*/
551 BX_CPP_INLINE void mul128To256(
552 Bit64u a0,
553 Bit64u a1,
554 Bit64u b0,
555 Bit64u b1,
556 Bit64u *z0Ptr,
557 Bit64u *z1Ptr,
558 Bit64u *z2Ptr,
559 Bit64u *z3Ptr
562 Bit64u z0, z1, z2, z3;
563 Bit64u more1, more2;
565 mul64To128(a1, b1, &z2, &z3);
566 mul64To128(a1, b0, &z1, &more2);
567 add128(z1, more2, 0, z2, &z1, &z2);
568 mul64To128(a0, b0, &z0, &more1);
569 add128(z0, more1, 0, z1, &z0, &z1);
570 mul64To128(a0, b1, &more1, &more2);
571 add128(more1, more2, 0, z2, &more1, &z2);
572 add128(z0, z1, 0, more1, &z0, &z1);
573 *z3Ptr = z3;
574 *z2Ptr = z2;
575 *z1Ptr = z1;
576 *z0Ptr = z0;
580 /*----------------------------------------------------------------------------
581 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
582 | by 64 _plus_ the number of bits given in `count'. The shifted result is
583 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
584 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
585 | off form a third 64-bit result as follows: The _last_ bit shifted off is
586 | the most-significant bit of the extra result, and the other 63 bits of the
587 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
588 | were all zero. This extra result is stored in the location pointed to by
589 | `z2Ptr'. The value of `count' can be arbitrarily large.
590 | (This routine makes more sense if `a0', `a1', and `a2' are considered
591 | to form a fixed-point value with binary point between `a1' and `a2'. This
592 | fixed-point value is shifted right by the number of bits given in `count',
593 | and the integer part of the result is returned at the locations pointed to
594 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
595 | corrupted as described above, and is returned at the location pointed to by
596 | `z2Ptr'.)
597 *----------------------------------------------------------------------------*/
599 BX_CPP_INLINE void shift128ExtraRightJamming(
600 Bit64u a0,
601 Bit64u a1,
602 Bit64u a2,
603 int count,
604 Bit64u *z0Ptr,
605 Bit64u *z1Ptr,
606 Bit64u *z2Ptr
609 Bit64u z0, z1, z2;
610 int negCount = (-count) & 63;
612 if (count == 0) {
613 z2 = a2;
614 z1 = a1;
615 z0 = a0;
617 else {
618 if (count < 64) {
619 z2 = a1<<negCount;
620 z1 = (a0<<negCount) | (a1>>count);
621 z0 = a0>>count;
623 else {
624 if (count == 64) {
625 z2 = a1;
626 z1 = a0;
628 else {
629 a2 |= a1;
630 if (count < 128) {
631 z2 = a0<<negCount;
632 z1 = a0>>(count & 63);
634 else {
635 z2 = (count == 128) ? a0 : (a0 != 0);
636 z1 = 0;
639 z0 = 0;
641 z2 |= (a2 != 0);
643 *z2Ptr = z2;
644 *z1Ptr = z1;
645 *z0Ptr = z0;
648 #endif /* FLOAT128 */
650 #endif