compiler-rt/lib/builtins/hexagon/dfdiv.S

   1 //===----------------------Hexagon builtin routine ------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 // Double Precision Divide
  10
  11 #define A r1:0
  12 #define AH r1
  13 #define AL r0
  14
  15 #define B r3:2
  16 #define BH r3
  17 #define BL r2
  18
  19 #define Q r5:4
  20 #define QH r5
  21 #define QL r4
  22
  23 #define PROD r7:6
  24 #define PRODHI r7
  25 #define PRODLO r6
  26
  27 #define SFONE r8
  28 #define SFDEN r9
  29 #define SFERROR r10
  30 #define SFRECIP r11
  31
  32 #define EXPBA r13:12
  33 #define EXPB r13
  34 #define EXPA r12
  35
  36 #define REMSUB2 r15:14
  37
  38
  39
  40 #define SIGN r28
  41
  42 #define Q_POSITIVE p3
  43 #define NORMAL p2
  44 #define NO_OVF_UNF p1
  45 #define P_TMP p0
  46
  47 #define RECIPEST_SHIFT 3
  48 #define QADJ 61
  49
  50 #define DFCLASS_NORMAL 0x02
  51 #define DFCLASS_NUMBER 0x0F
  52 #define DFCLASS_INFINITE 0x08
  53 #define DFCLASS_ZERO 0x01
  54 #define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
  55 #define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
  56
  57 #define DF_MANTBITS 52
  58 #define DF_EXPBITS 11
  59 #define SF_MANTBITS 23
  60 #define SF_EXPBITS 8
  61 #define DF_BIAS 0x3ff
  62
  63 #define SR_ROUND_OFF 22
  64
  65 #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
  66 #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
  67 #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
  68 #define END(TAG) .size TAG,.-TAG
  69
  70         .text
  71         .global __hexagon_divdf3
  72         .type __hexagon_divdf3,@function
  73         Q6_ALIAS(divdf3)
  74         FAST_ALIAS(divdf3)
  75         FAST2_ALIAS(divdf3)
  76         .p2align 5
  77 __hexagon_divdf3:
  78         {
  79                 NORMAL = dfclass(A,#DFCLASS_NORMAL)
  80                 NORMAL = dfclass(B,#DFCLASS_NORMAL)
  81                 EXPBA = combine(BH,AH)
  82                 SIGN = xor(AH,BH)
  83         }
  84 #undef A
  85 #undef AH
  86 #undef AL
  87 #undef B
  88 #undef BH
  89 #undef BL
  90 #define REM r1:0
  91 #define REMHI r1
  92 #define REMLO r0
  93 #define DENOM r3:2
  94 #define DENOMHI r3
  95 #define DENOMLO r2
  96         {
  97                 if (!NORMAL) jump .Ldiv_abnormal
  98                 PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
  99                 SFONE = ##0x3f800001
 100         }
 101         {
 102                 SFDEN = or(SFONE,PRODLO)
 103                 EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
 104                 EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
 105                 Q_POSITIVE = cmp.gt(SIGN,#-1)
 106         }
 107 #undef SIGN
 108 #define ONE r28
 109 .Ldenorm_continue:
 110         {
 111                 SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
 112                 SFERROR = and(SFONE,#-2)
 113                 ONE = #1
 114                 EXPA = sub(EXPA,EXPB)
 115         }
 116 #undef EXPB
 117 #define RECIPEST r13
 118         {
 119                 SFERROR -= sfmpy(SFRECIP,SFDEN):lib
 120                 REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
 121                 RECIPEST = ##0x00800000 << RECIPEST_SHIFT
 122         }
 123         {
 124                 SFRECIP += sfmpy(SFRECIP,SFERROR):lib
 125                 DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
 126                 SFERROR = and(SFONE,#-2)
 127         }
 128         {
 129                 SFERROR -= sfmpy(SFRECIP,SFDEN):lib
 130                 QH = #-DF_BIAS+1
 131                 QL = #DF_BIAS-1
 132         }
 133         {
 134                 SFRECIP += sfmpy(SFRECIP,SFERROR):lib
 135                 NO_OVF_UNF = cmp.gt(EXPA,QH)
 136                 NO_OVF_UNF = !cmp.gt(EXPA,QL)
 137         }
 138         {
 139                 RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
 140                 Q = #0
 141                 EXPA = add(EXPA,#-QADJ)
 142         }
 143 #undef SFERROR
 144 #undef SFRECIP
 145 #define TMP r10
 146 #define TMP1 r11
 147         {
 148                 RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
 149         }
 150
 151 #define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
 152         { \
 153                 PROD = mpyu(RECIPEST,REMHI); \
 154                 REM = asl(REM,# ## ( REMSHIFT )); \
 155         }; \
 156         { \
 157                 PRODLO = # ## 0; \
 158                 REM -= mpyu(PRODHI,DENOMLO); \
 159                 REMSUB2 = mpyu(PRODHI,DENOMHI); \
 160         }; \
 161         { \
 162                 Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
 163                 REM -= asl(REMSUB2, # ## 32); \
 164                 EXTRA \
 165         }
 166
 167
 168         DIV_ITER1B(ASL,14,15,)
 169         DIV_ITER1B(ASR,1,15,)
 170         DIV_ITER1B(ASR,16,15,)
 171         DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
 172
 173 #undef REMSUB2
 174 #define TMPPAIR r15:14
 175 #define TMPPAIRHI r15
 176 #define TMPPAIRLO r14
 177 #undef RECIPEST
 178 #define EXPB r13
 179         {
 180                 // compare or sub with carry
 181                 TMPPAIR = sub(REM,DENOM)
 182                 P_TMP = cmp.gtu(DENOM,REM)
 183                 // set up amt to add to q
 184                 if (!P_TMP.new) PRODLO  = #2
 185         }
 186         {
 187                 Q = add(Q,PROD)
 188                 if (!P_TMP) REM = TMPPAIR
 189                 TMPPAIR = #0
 190         }
 191         {
 192                 P_TMP = cmp.eq(REM,TMPPAIR)
 193                 if (!P_TMP.new) QL = or(QL,ONE)
 194         }
 195         {
 196                 PROD = neg(Q)
 197         }
 198         {
 199                 if (!Q_POSITIVE) Q = PROD
 200         }
 201 #undef REM
 202 #undef REMHI
 203 #undef REMLO
 204 #undef DENOM
 205 #undef DENOMLO
 206 #undef DENOMHI
 207 #define A r1:0
 208 #define AH r1
 209 #define AL r0
 210 #define B r3:2
 211 #define BH r3
 212 #define BL r2
 213         {
 214                 A = convert_d2df(Q)
 215                 if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
 216         }
 217         {
 218                 AH += asl(EXPA,#DF_MANTBITS-32)
 219                 jumpr r31
 220         }
 221
 222 .Ldiv_ovf_unf:
 223         {
 224                 AH += asl(EXPA,#DF_MANTBITS-32)
 225                 EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
 226         }
 227         {
 228                 PROD = abs(Q)
 229                 EXPA = add(EXPA,EXPB)
 230         }
 231         {
 232                 P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS)          // overflow
 233                 if (P_TMP.new) jump:nt .Ldiv_ovf
 234         }
 235         {
 236                 P_TMP = cmp.gt(EXPA,#0)
 237                 if (P_TMP.new) jump:nt .Lpossible_unf           // round up to normal possible...
 238         }
 239         // Underflow
 240         // We know what the infinite range exponent should be (EXPA)
 241         // Q is 2's complement, PROD is abs(Q)
 242         // Normalize Q, shift right, add a high bit, convert, change exponent
 243
 244 #define FUDGE1 7        // how much to shift right
 245 #define FUDGE2 4        // how many guard/round to keep at lsbs
 246
 247         {
 248                 EXPB = add(clb(PROD),#-1)                       // doesn't need to be added in since
 249                 EXPA = sub(#FUDGE1,EXPA)                        // we extract post-converted exponent
 250                 TMP = USR
 251                 TMP1 = #63
 252         }
 253         {
 254                 EXPB = min(EXPA,TMP1)
 255                 TMP1 = or(TMP,#0x030)
 256                 PROD = asl(PROD,EXPB)
 257                 EXPA = #0
 258         }
 259         {
 260                 TMPPAIR = extractu(PROD,EXPBA)                          // bits that will get shifted out
 261                 PROD = lsr(PROD,EXPB)                                   // shift out bits
 262                 B = #1
 263         }
 264         {
 265                 P_TMP = cmp.gtu(B,TMPPAIR)
 266                 if (!P_TMP.new) PRODLO = or(BL,PRODLO)
 267                 PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
 268         }
 269         {
 270                 Q = neg(PROD)
 271                 P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
 272                 if (!P_TMP.new) TMP = TMP1
 273         }
 274         {
 275                 USR = TMP
 276                 if (Q_POSITIVE) Q = PROD
 277                 TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
 278         }
 279         {
 280                 A = convert_d2df(Q)
 281         }
 282         {
 283                 AH += asl(TMP,#DF_MANTBITS-32)
 284                 jumpr r31
 285         }
 286
 287
 288 .Lpossible_unf:
 289         // If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal
 290         // The answer is correct, but we need to raise Underflow
 291         {
 292                 B = extractu(A,#63,#0)
 293                 TMPPAIR = combine(##0x00100000,#0)              // min normal
 294                 TMP = #0x7FFF
 295         }
 296         {
 297                 P_TMP = dfcmp.eq(TMPPAIR,B)             // Is everything zero in the rounded value...
 298                 P_TMP = bitsset(PRODHI,TMP)             // but a bunch of bits set in the unrounded abs(quotient)?
 299         }
 300
 301 #if (__HEXAGON_ARCH__ == 60)
 302                 TMP = USR               // If not, just return
 303                 if (!P_TMP) jumpr r31   // Else, we want to set Unf+Inexact
 304                                         // Note that inexact is already set...
 305 #else
 306         {
 307                 if (!P_TMP) jumpr r31                   // If not, just return
 308                 TMP = USR                               // Else, we want to set Unf+Inexact
 309         }                                               // Note that inexact is already set...
 310 #endif
 311         {
 312                 TMP = or(TMP,#0x30)
 313         }
 314         {
 315                 USR = TMP
 316         }
 317         {
 318                 p0 = dfcmp.eq(A,A)
 319                 jumpr r31
 320         }
 321
 322 .Ldiv_ovf:
 323
 324         // Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
 325
 326         {
 327                 TMP = USR
 328                 B = combine(##0x7fefffff,#-1)
 329                 AH = mux(Q_POSITIVE,#0,#-1)
 330         }
 331         {
 332                 PROD = combine(##0x7ff00000,#0)
 333                 QH = extractu(TMP,#2,#SR_ROUND_OFF)
 334                 TMP = or(TMP,#0x28)
 335         }
 336         {
 337                 USR = TMP
 338                 QH ^= lsr(AH,#31)
 339                 QL = QH
 340         }
 341         {
 342                 p0 = !cmp.eq(QL,#1)             // if not round-to-zero
 343                 p0 = !cmp.eq(QH,#2)             // and not rounding the other way
 344                 if (p0.new) B = PROD            // go to inf
 345                 p0 = dfcmp.eq(B,B)              // get exceptions
 346         }
 347         {
 348                 A = insert(B,#63,#0)
 349                 jumpr r31
 350         }
 351
 352 #undef ONE
 353 #define SIGN r28
 354 #undef NORMAL
 355 #undef NO_OVF_UNF
 356 #define P_INF p1
 357 #define P_ZERO p2
 358 .Ldiv_abnormal:
 359         {
 360                 P_TMP = dfclass(A,#DFCLASS_NUMBER)
 361                 P_TMP = dfclass(B,#DFCLASS_NUMBER)
 362                 Q_POSITIVE = cmp.gt(SIGN,#-1)
 363         }
 364         {
 365                 P_INF = dfclass(A,#DFCLASS_INFINITE)
 366                 P_INF = dfclass(B,#DFCLASS_INFINITE)
 367         }
 368         {
 369                 P_ZERO = dfclass(A,#DFCLASS_ZERO)
 370                 P_ZERO = dfclass(B,#DFCLASS_ZERO)
 371         }
 372         {
 373                 if (!P_TMP) jump .Ldiv_nan
 374                 if (P_INF) jump .Ldiv_invalid
 375         }
 376         {
 377                 if (P_ZERO) jump .Ldiv_invalid
 378         }
 379         {
 380                 P_ZERO = dfclass(A,#DFCLASS_NONZERO)            // nonzero
 381                 P_ZERO = dfclass(B,#DFCLASS_NONINFINITE)        // non-infinite
 382         }
 383         {
 384                 P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite
 385                 P_INF = dfclass(B,#DFCLASS_NONZERO)     // nonzero
 386         }
 387         {
 388                 if (!P_ZERO) jump .Ldiv_zero_result
 389                 if (!P_INF) jump .Ldiv_inf_result
 390         }
 391         // Now we've narrowed it down to (de)normal / (de)normal
 392         // Set up A/EXPA B/EXPB and go back
 393 #undef P_ZERO
 394 #undef P_INF
 395 #define P_TMP2 p1
 396         {
 397                 P_TMP = dfclass(A,#DFCLASS_NORMAL)
 398                 P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
 399                 TMP = ##0x00100000
 400         }
 401         {
 402                 EXPBA = combine(BH,AH)
 403                 AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)          // clear out hidden bit, sign bit
 404                 BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)          // clear out hidden bit, sign bit
 405         }
 406         {
 407                 if (P_TMP) AH = or(AH,TMP)                              // if normal, add back in hidden bit
 408                 if (P_TMP2) BH = or(BH,TMP)                             // if normal, add back in hidden bit
 409         }
 410         {
 411                 QH = add(clb(A),#-DF_EXPBITS)
 412                 QL = add(clb(B),#-DF_EXPBITS)
 413                 TMP = #1
 414         }
 415         {
 416                 EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
 417                 EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
 418         }
 419         {
 420                 A = asl(A,QH)
 421                 B = asl(B,QL)
 422                 if (!P_TMP) EXPA = sub(TMP,QH)
 423                 if (!P_TMP2) EXPB = sub(TMP,QL)
 424         }       // recreate values needed by resume coke
 425         {
 426                 PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
 427         }
 428         {
 429                 SFDEN = or(SFONE,PRODLO)
 430                 jump .Ldenorm_continue
 431         }
 432
 433 .Ldiv_zero_result:
 434         {
 435                 AH = xor(AH,BH)
 436                 B = #0
 437         }
 438         {
 439                 A = insert(B,#63,#0)
 440                 jumpr r31
 441         }
 442 .Ldiv_inf_result:
 443         {
 444                 p2 = dfclass(B,#DFCLASS_ZERO)
 445                 p2 = dfclass(A,#DFCLASS_NONINFINITE)
 446         }
 447         {
 448                 TMP = USR
 449                 if (!p2) jump 1f
 450                 AH = xor(AH,BH)
 451         }
 452         {
 453                 TMP = or(TMP,#0x04)             // DBZ
 454         }
 455         {
 456                 USR = TMP
 457         }
 458 1:
 459         {
 460                 B = combine(##0x7ff00000,#0)
 461                 p0 = dfcmp.uo(B,B)              // take possible exception
 462         }
 463         {
 464                 A = insert(B,#63,#0)
 465                 jumpr r31
 466         }
 467 .Ldiv_nan:
 468         {
 469                 p0 = dfclass(A,#0x10)
 470                 p1 = dfclass(B,#0x10)
 471                 if (!p0.new) A = B
 472                 if (!p1.new) B = A
 473         }
 474         {
 475                 QH = convert_df2sf(A)   // get possible invalid exceptions
 476                 QL = convert_df2sf(B)
 477         }
 478         {
 479                 A = #-1
 480                 jumpr r31
 481         }
 482
 483 .Ldiv_invalid:
 484         {
 485                 TMP = ##0x7f800001
 486         }
 487         {
 488                 A = convert_sf2df(TMP)          // get invalid, get DF qNaN
 489                 jumpr r31
 490         }
 491 END(__hexagon_divdf3)