compiler-rt/lib/builtins/hexagon/dfaddsub.S

   1 //===----------------------Hexagon builtin routine ------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 // Double Precision Multiply
  10
  11 #define A r1:0
  12 #define AH r1
  13 #define AL r0
  14 #define B r3:2
  15 #define BH r3
  16 #define BL r2
  17
  18 #define EXPA r4
  19 #define EXPB r5
  20 #define EXPB_A r5:4
  21
  22 #define ZTMP r7:6
  23 #define ZTMPH r7
  24 #define ZTMPL r6
  25
  26 #define ATMP r13:12
  27 #define ATMPH r13
  28 #define ATMPL r12
  29
  30 #define BTMP r9:8
  31 #define BTMPH r9
  32 #define BTMPL r8
  33
  34 #define ATMP2 r11:10
  35 #define ATMP2H r11
  36 #define ATMP2L r10
  37
  38 #define EXPDIFF r15
  39 #define EXTRACTOFF r14
  40 #define EXTRACTAMT r15:14
  41
  42 #define TMP r28
  43
  44 #define MANTBITS 52
  45 #define HI_MANTBITS 20
  46 #define EXPBITS 11
  47 #define BIAS 1024
  48 #define MANTISSA_TO_INT_BIAS 52
  49 #define SR_BIT_INEXACT 5
  50
  51 #ifndef SR_ROUND_OFF
  52 #define SR_ROUND_OFF 22
  53 #endif
  54
  55 #define NORMAL p3
  56 #define BIGB p2
  57
  58 #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
  59 #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
  60 #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
  61 #define END(TAG) .size TAG,.-TAG
  62
  63         .text
  64         .global __hexagon_adddf3
  65         .global __hexagon_subdf3
  66         .type __hexagon_adddf3, @function
  67         .type __hexagon_subdf3, @function
  68
  69 Q6_ALIAS(adddf3)
  70 FAST_ALIAS(adddf3)
  71 FAST2_ALIAS(adddf3)
  72 Q6_ALIAS(subdf3)
  73 FAST_ALIAS(subdf3)
  74 FAST2_ALIAS(subdf3)
  75
  76         .p2align 5
  77 __hexagon_adddf3:
  78         {
  79                 EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
  80                 EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
  81                 ATMP = combine(##0x20000000,#0)
  82         }
  83         {
  84                 NORMAL = dfclass(A,#2)
  85                 NORMAL = dfclass(B,#2)
  86                 BTMP = ATMP
  87                 BIGB = cmp.gtu(EXPB,EXPA)                       // Is B substantially greater than A?
  88         }
  89         {
  90                 if (!NORMAL) jump .Ladd_abnormal                // If abnormal, go to special code
  91                 if (BIGB) A = B                         // if B >> A, swap A and B
  92                 if (BIGB) B = A                         // If B >> A, swap A and B
  93                 if (BIGB) EXPB_A = combine(EXPA,EXPB)   // swap exponents
  94         }
  95         {
  96                 ATMP = insert(A,#MANTBITS,#EXPBITS-2)   // Q1.62
  97                 BTMP = insert(B,#MANTBITS,#EXPBITS-2)   // Q1.62
  98                 EXPDIFF = sub(EXPA,EXPB)
  99                 ZTMP = combine(#62,#1)
 100         }
 101 #undef BIGB
 102 #undef NORMAL
 103 #define B_POS p3
 104 #define A_POS p2
 105 #define NO_STICKIES p1
 106 .Ladd_continue:
 107         {
 108                 EXPDIFF = min(EXPDIFF,ZTMPH)            // If exponent difference >= ~60,
 109                                                         // will collapse to sticky bit
 110                 ATMP2 = neg(ATMP)
 111                 A_POS = cmp.gt(AH,#-1)
 112                 EXTRACTOFF = #0
 113         }
 114         {
 115                 if (!A_POS) ATMP = ATMP2
 116                 ATMP2 = extractu(BTMP,EXTRACTAMT)
 117                 BTMP = ASR(BTMP,EXPDIFF)
 118 #undef EXTRACTAMT
 119 #undef EXPDIFF
 120 #undef EXTRACTOFF
 121 #define ZERO r15:14
 122                 ZERO = #0
 123         }
 124         {
 125                 NO_STICKIES = cmp.eq(ATMP2,ZERO)
 126                 if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
 127                 EXPB = add(EXPA,#-BIAS-60)
 128                 B_POS = cmp.gt(BH,#-1)
 129         }
 130         {
 131                 ATMP = add(ATMP,BTMP)                   // ADD!!!
 132                 ATMP2 = sub(ATMP,BTMP)                  // Negate and ADD --> SUB!!!
 133                 ZTMP = combine(#54,##2045)
 134         }
 135         {
 136                 p0 = cmp.gtu(EXPA,ZTMPH)                // must be pretty high in case of large cancellation
 137                 p0 = !cmp.gtu(EXPA,ZTMPL)
 138                 if (!p0.new) jump:nt .Ladd_ovf_unf
 139                 if (!B_POS) ATMP = ATMP2                // if B neg, pick difference
 140         }
 141         {
 142                 A = convert_d2df(ATMP)                  // Convert to Double Precision, taking care of flags, etc.  So nice!
 143                 p0 = cmp.eq(ATMPH,#0)
 144                 p0 = cmp.eq(ATMPL,#0)
 145                 if (p0.new) jump:nt .Ladd_zero          // or maybe conversion handles zero case correctly?
 146         }
 147         {
 148                 AH += asl(EXPB,#HI_MANTBITS)
 149                 jumpr r31
 150         }
 151         .falign
 152 __hexagon_subdf3:
 153         {
 154                 BH = togglebit(BH,#31)
 155                 jump __qdsp_adddf3
 156         }
 157
 158
 159         .falign
 160 .Ladd_zero:
 161         // True zero, full cancellation
 162         // +0 unless round towards negative infinity
 163         {
 164                 TMP = USR
 165                 A = #0
 166                 BH = #1
 167         }
 168         {
 169                 TMP = extractu(TMP,#2,#22)
 170                 BH = asl(BH,#31)
 171         }
 172         {
 173                 p0 = cmp.eq(TMP,#2)
 174                 if (p0.new) AH = xor(AH,BH)
 175                 jumpr r31
 176         }
 177         .falign
 178 .Ladd_ovf_unf:
 179         // Overflow or Denormal is possible
 180         // Good news: Underflow flag is not possible!
 181
 182         // ATMP has 2's complement value
 183         //
 184         // EXPA has A's exponent, EXPB has EXPA-BIAS-60
 185         //
 186         // Convert, extract exponent, add adjustment.
 187         // If > 2046, overflow
 188         // If <= 0, denormal
 189         //
 190         // Note that we've not done our zero check yet, so do that too
 191
 192         {
 193                 A = convert_d2df(ATMP)
 194                 p0 = cmp.eq(ATMPH,#0)
 195                 p0 = cmp.eq(ATMPL,#0)
 196                 if (p0.new) jump:nt .Ladd_zero
 197         }
 198         {
 199                 TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
 200                 AH += asl(EXPB,#HI_MANTBITS)
 201         }
 202         {
 203                 EXPB = add(EXPB,TMP)
 204                 B = combine(##0x00100000,#0)
 205         }
 206         {
 207                 p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
 208                 if (p0.new) jump:nt .Ladd_ovf
 209         }
 210         {
 211                 p0 = cmp.gt(EXPB,#0)
 212                 if (p0.new) jumpr:t r31
 213                 TMP = sub(#1,EXPB)
 214         }
 215         {
 216                 B = insert(A,#MANTBITS,#0)
 217                 A = ATMP
 218         }
 219         {
 220                 B = lsr(B,TMP)
 221         }
 222         {
 223                 A = insert(B,#63,#0)
 224                 jumpr r31
 225         }
 226         .falign
 227 .Ladd_ovf:
 228         // We get either max finite value or infinity.  Either way, overflow+inexact
 229         {
 230                 A = ATMP                                // 2's complement value
 231                 TMP = USR
 232                 ATMP = combine(##0x7fefffff,#-1)        // positive max finite
 233         }
 234         {
 235                 EXPB = extractu(TMP,#2,#SR_ROUND_OFF)   // rounding bits
 236                 TMP = or(TMP,#0x28)                     // inexact + overflow
 237                 BTMP = combine(##0x7ff00000,#0)         // positive infinity
 238         }
 239         {
 240                 USR = TMP
 241                 EXPB ^= lsr(AH,#31)                     // Does sign match rounding?
 242                 TMP = EXPB                              // unmodified rounding mode
 243         }
 244         {
 245                 p0 = !cmp.eq(TMP,#1)                    // If not round-to-zero and
 246                 p0 = !cmp.eq(EXPB,#2)                   // Not rounding the other way,
 247                 if (p0.new) ATMP = BTMP                 // we should get infinity
 248         }
 249         {
 250                 A = insert(ATMP,#63,#0)                 // insert inf/maxfinite, leave sign
 251         }
 252         {
 253                 p0 = dfcmp.eq(A,A)
 254                 jumpr r31
 255         }
 256
 257 .Ladd_abnormal:
 258         {
 259                 ATMP = extractu(A,#63,#0)               // strip off sign
 260                 BTMP = extractu(B,#63,#0)               // strip off sign
 261         }
 262         {
 263                 p3 = cmp.gtu(ATMP,BTMP)
 264                 if (!p3.new) A = B                      // sort values
 265                 if (!p3.new) B = A                      // sort values
 266         }
 267         {
 268                 // Any NaN --> NaN, possibly raise invalid if sNaN
 269                 p0 = dfclass(A,#0x0f)           // A not NaN?
 270                 if (!p0.new) jump:nt .Linvalid_nan_add
 271                 if (!p3) ATMP = BTMP
 272                 if (!p3) BTMP = ATMP
 273         }
 274         {
 275                 // Infinity + non-infinity number is infinity
 276                 // Infinity + infinity --> inf or nan
 277                 p1 = dfclass(A,#0x08)           // A is infinity
 278                 if (p1.new) jump:nt .Linf_add
 279         }
 280         {
 281                 p2 = dfclass(B,#0x01)           // B is zero
 282                 if (p2.new) jump:nt .LB_zero    // so return A or special 0+0
 283                 ATMP = #0
 284         }
 285         // We are left with adding one or more subnormals
 286         {
 287                 p0 = dfclass(A,#4)
 288                 if (p0.new) jump:nt .Ladd_two_subnormal
 289                 ATMP = combine(##0x20000000,#0)
 290         }
 291         {
 292                 EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
 293                 EXPB = #1
 294                 // BTMP already ABS(B)
 295                 BTMP = asl(BTMP,#EXPBITS-2)
 296         }
 297 #undef ZERO
 298 #define EXTRACTOFF r14
 299 #define EXPDIFF r15
 300         {
 301                 ATMP = insert(A,#MANTBITS,#EXPBITS-2)
 302                 EXPDIFF = sub(EXPA,EXPB)
 303                 ZTMP = combine(#62,#1)
 304                 jump .Ladd_continue
 305         }
 306
 307 .Ladd_two_subnormal:
 308         {
 309                 ATMP = extractu(A,#63,#0)
 310                 BTMP = extractu(B,#63,#0)
 311         }
 312         {
 313                 ATMP = neg(ATMP)
 314                 BTMP = neg(BTMP)
 315                 p0 = cmp.gt(AH,#-1)
 316                 p1 = cmp.gt(BH,#-1)
 317         }
 318         {
 319                 if (p0) ATMP = A
 320                 if (p1) BTMP = B
 321         }
 322         {
 323                 ATMP = add(ATMP,BTMP)
 324         }
 325         {
 326                 BTMP = neg(ATMP)
 327                 p0 = cmp.gt(ATMPH,#-1)
 328                 B = #0
 329         }
 330         {
 331                 if (!p0) A = BTMP
 332                 if (p0) A = ATMP
 333                 BH = ##0x80000000
 334         }
 335         {
 336                 if (!p0) AH = or(AH,BH)
 337                 p0 = dfcmp.eq(A,B)
 338                 if (p0.new) jump:nt .Lzero_plus_zero
 339         }
 340         {
 341                 jumpr r31
 342         }
 343
 344 .Linvalid_nan_add:
 345         {
 346                 TMP = convert_df2sf(A)                  // will generate invalid if sNaN
 347                 p0 = dfclass(B,#0x0f)                   // if B is not NaN
 348                 if (p0.new) B = A                       // make it whatever A is
 349         }
 350         {
 351                 BL = convert_df2sf(B)                   // will generate invalid if sNaN
 352                 A = #-1
 353                 jumpr r31
 354         }
 355         .falign
 356 .LB_zero:
 357         {
 358                 p0 = dfcmp.eq(ATMP,A)                   // is A also zero?
 359                 if (!p0.new) jumpr:t r31                // If not, just return A
 360         }
 361         // 0 + 0 is special
 362         // if equal integral values, they have the same sign, which is fine for all rounding
 363         // modes.
 364         // If unequal in sign, we get +0 for all rounding modes except round down
 365 .Lzero_plus_zero:
 366         {
 367                 p0 = cmp.eq(A,B)
 368                 if (p0.new) jumpr:t r31
 369         }
 370         {
 371                 TMP = USR
 372         }
 373         {
 374                 TMP = extractu(TMP,#2,#SR_ROUND_OFF)
 375                 A = #0
 376         }
 377         {
 378                 p0 = cmp.eq(TMP,#2)
 379                 if (p0.new) AH = ##0x80000000
 380                 jumpr r31
 381         }
 382 .Linf_add:
 383         // adding infinities is only OK if they are equal
 384         {
 385                 p0 = !cmp.eq(AH,BH)                     // Do they have different signs
 386                 p0 = dfclass(B,#8)                      // And is B also infinite?
 387                 if (!p0.new) jumpr:t r31                // If not, just a normal inf
 388         }
 389         {
 390                 BL = ##0x7f800001                       // sNAN
 391         }
 392         {
 393                 A = convert_sf2df(BL)                   // trigger invalid, set NaN
 394                 jumpr r31
 395         }
 396 END(__hexagon_adddf3)