Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / compiler-rt / lib / builtins / hexagon / dfdiv.S
blob202965ec4789fed01ddd1a9219cd921f2069d392
1 //===----------------------Hexagon builtin routine ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 // Double Precision Divide
11 #define A r1:0
12 #define AH r1
13 #define AL r0
15 #define B r3:2
16 #define BH r3
17 #define BL r2
19 #define Q r5:4
20 #define QH r5
21 #define QL r4
23 #define PROD r7:6
24 #define PRODHI r7
25 #define PRODLO r6
27 #define SFONE r8
28 #define SFDEN r9
29 #define SFERROR r10
30 #define SFRECIP r11
32 #define EXPBA r13:12
33 #define EXPB r13
34 #define EXPA r12
36 #define REMSUB2 r15:14
40 #define SIGN r28
42 #define Q_POSITIVE p3
43 #define NORMAL p2
44 #define NO_OVF_UNF p1
45 #define P_TMP p0
47 #define RECIPEST_SHIFT 3
48 #define QADJ 61
50 #define DFCLASS_NORMAL 0x02
51 #define DFCLASS_NUMBER 0x0F
52 #define DFCLASS_INFINITE 0x08
53 #define DFCLASS_ZERO 0x01
54 #define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
55 #define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
57 #define DF_MANTBITS 52
58 #define DF_EXPBITS 11
59 #define SF_MANTBITS 23
60 #define SF_EXPBITS 8
61 #define DF_BIAS 0x3ff
63 #define SR_ROUND_OFF 22
65 #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
66 #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
67 #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
68 #define END(TAG) .size TAG,.-TAG
70         .text
71         .global __hexagon_divdf3
72         .type __hexagon_divdf3,@function
73         Q6_ALIAS(divdf3)
74         FAST_ALIAS(divdf3)
75         FAST2_ALIAS(divdf3)
76         .p2align 5
77 __hexagon_divdf3:
78         {
79                 NORMAL = dfclass(A,#DFCLASS_NORMAL)
80                 NORMAL = dfclass(B,#DFCLASS_NORMAL)
81                 EXPBA = combine(BH,AH)
82                 SIGN = xor(AH,BH)
83         }
84 #undef A
85 #undef AH
86 #undef AL
87 #undef B
88 #undef BH
89 #undef BL
90 #define REM r1:0
91 #define REMHI r1
92 #define REMLO r0
93 #define DENOM r3:2
94 #define DENOMHI r3
95 #define DENOMLO r2
96         {
97                 if (!NORMAL) jump .Ldiv_abnormal
98                 PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
99                 SFONE = ##0x3f800001
100         }
101         {
102                 SFDEN = or(SFONE,PRODLO)
103                 EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
104                 EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
105                 Q_POSITIVE = cmp.gt(SIGN,#-1)
106         }
107 #undef SIGN
108 #define ONE r28
109 .Ldenorm_continue:
110         {
111                 SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
112                 SFERROR = and(SFONE,#-2)
113                 ONE = #1
114                 EXPA = sub(EXPA,EXPB)
115         }
116 #undef EXPB
117 #define RECIPEST r13
118         {
119                 SFERROR -= sfmpy(SFRECIP,SFDEN):lib
120                 REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
121                 RECIPEST = ##0x00800000 << RECIPEST_SHIFT
122         }
123         {
124                 SFRECIP += sfmpy(SFRECIP,SFERROR):lib
125                 DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
126                 SFERROR = and(SFONE,#-2)
127         }
128         {
129                 SFERROR -= sfmpy(SFRECIP,SFDEN):lib
130                 QH = #-DF_BIAS+1
131                 QL = #DF_BIAS-1
132         }
133         {
134                 SFRECIP += sfmpy(SFRECIP,SFERROR):lib
135                 NO_OVF_UNF = cmp.gt(EXPA,QH)
136                 NO_OVF_UNF = !cmp.gt(EXPA,QL)
137         }
138         {
139                 RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
140                 Q = #0
141                 EXPA = add(EXPA,#-QADJ)
142         }
143 #undef SFERROR
144 #undef SFRECIP
145 #define TMP r10
146 #define TMP1 r11
147         {
148                 RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
149         }
151 #define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
152         { \
153                 PROD = mpyu(RECIPEST,REMHI); \
154                 REM = asl(REM,# ## ( REMSHIFT )); \
155         }; \
156         { \
157                 PRODLO = # ## 0; \
158                 REM -= mpyu(PRODHI,DENOMLO); \
159                 REMSUB2 = mpyu(PRODHI,DENOMHI); \
160         }; \
161         { \
162                 Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
163                 REM -= asl(REMSUB2, # ## 32); \
164                 EXTRA \
165         }
168         DIV_ITER1B(ASL,14,15,)
169         DIV_ITER1B(ASR,1,15,)
170         DIV_ITER1B(ASR,16,15,)
171         DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
173 #undef REMSUB2
174 #define TMPPAIR r15:14
175 #define TMPPAIRHI r15
176 #define TMPPAIRLO r14
177 #undef RECIPEST
178 #define EXPB r13
179         {
180                 // compare or sub with carry
181                 TMPPAIR = sub(REM,DENOM)
182                 P_TMP = cmp.gtu(DENOM,REM)
183                 // set up amt to add to q
184                 if (!P_TMP.new) PRODLO  = #2
185         }
186         {
187                 Q = add(Q,PROD)
188                 if (!P_TMP) REM = TMPPAIR
189                 TMPPAIR = #0
190         }
191         {
192                 P_TMP = cmp.eq(REM,TMPPAIR)
193                 if (!P_TMP.new) QL = or(QL,ONE)
194         }
195         {
196                 PROD = neg(Q)
197         }
198         {
199                 if (!Q_POSITIVE) Q = PROD
200         }
201 #undef REM
202 #undef REMHI
203 #undef REMLO
204 #undef DENOM
205 #undef DENOMLO
206 #undef DENOMHI
207 #define A r1:0
208 #define AH r1
209 #define AL r0
210 #define B r3:2
211 #define BH r3
212 #define BL r2
213         {
214                 A = convert_d2df(Q)
215                 if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
216         }
217         {
218                 AH += asl(EXPA,#DF_MANTBITS-32)
219                 jumpr r31
220         }
222 .Ldiv_ovf_unf:
223         {
224                 AH += asl(EXPA,#DF_MANTBITS-32)
225                 EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
226         }
227         {
228                 PROD = abs(Q)
229                 EXPA = add(EXPA,EXPB)
230         }
231         {
232                 P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS)          // overflow
233                 if (P_TMP.new) jump:nt .Ldiv_ovf
234         }
235         {
236                 P_TMP = cmp.gt(EXPA,#0)
237                 if (P_TMP.new) jump:nt .Lpossible_unf           // round up to normal possible...
238         }
239         // Underflow
240         // We know what the infinite range exponent should be (EXPA)
241         // Q is 2's complement, PROD is abs(Q)
242         // Normalize Q, shift right, add a high bit, convert, change exponent
244 #define FUDGE1 7        // how much to shift right
245 #define FUDGE2 4        // how many guard/round to keep at lsbs
247         {
248                 EXPB = add(clb(PROD),#-1)                       // doesn't need to be added in since
249                 EXPA = sub(#FUDGE1,EXPA)                        // we extract post-converted exponent
250                 TMP = USR
251                 TMP1 = #63
252         }
253         {
254                 EXPB = min(EXPA,TMP1)
255                 TMP1 = or(TMP,#0x030)
256                 PROD = asl(PROD,EXPB)
257                 EXPA = #0
258         }
259         {
260                 TMPPAIR = extractu(PROD,EXPBA)                          // bits that will get shifted out
261                 PROD = lsr(PROD,EXPB)                                   // shift out bits
262                 B = #1
263         }
264         {
265                 P_TMP = cmp.gtu(B,TMPPAIR)
266                 if (!P_TMP.new) PRODLO = or(BL,PRODLO)
267                 PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
268         }
269         {
270                 Q = neg(PROD)
271                 P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
272                 if (!P_TMP.new) TMP = TMP1
273         }
274         {
275                 USR = TMP
276                 if (Q_POSITIVE) Q = PROD
277                 TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
278         }
279         {
280                 A = convert_d2df(Q)
281         }
282         {
283                 AH += asl(TMP,#DF_MANTBITS-32)
284                 jumpr r31
285         }
288 .Lpossible_unf:
289         // If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal
290         // The answer is correct, but we need to raise Underflow
291         {
292                 B = extractu(A,#63,#0)
293                 TMPPAIR = combine(##0x00100000,#0)              // min normal
294                 TMP = #0x7FFF
295         }
296         {
297                 P_TMP = dfcmp.eq(TMPPAIR,B)             // Is everything zero in the rounded value...
298                 P_TMP = bitsset(PRODHI,TMP)             // but a bunch of bits set in the unrounded abs(quotient)?
299         }
301 #if (__HEXAGON_ARCH__ == 60)
302                 TMP = USR               // If not, just return
303                 if (!P_TMP) jumpr r31   // Else, we want to set Unf+Inexact
304                                         // Note that inexact is already set...
305 #else
306         {
307                 if (!P_TMP) jumpr r31                   // If not, just return
308                 TMP = USR                               // Else, we want to set Unf+Inexact
309         }                                               // Note that inexact is already set...
310 #endif
311         {
312                 TMP = or(TMP,#0x30)
313         }
314         {
315                 USR = TMP
316         }
317         {
318                 p0 = dfcmp.eq(A,A)
319                 jumpr r31
320         }
322 .Ldiv_ovf:
324         // Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
326         {
327                 TMP = USR
328                 B = combine(##0x7fefffff,#-1)
329                 AH = mux(Q_POSITIVE,#0,#-1)
330         }
331         {
332                 PROD = combine(##0x7ff00000,#0)
333                 QH = extractu(TMP,#2,#SR_ROUND_OFF)
334                 TMP = or(TMP,#0x28)
335         }
336         {
337                 USR = TMP
338                 QH ^= lsr(AH,#31)
339                 QL = QH
340         }
341         {
342                 p0 = !cmp.eq(QL,#1)             // if not round-to-zero
343                 p0 = !cmp.eq(QH,#2)             // and not rounding the other way
344                 if (p0.new) B = PROD            // go to inf
345                 p0 = dfcmp.eq(B,B)              // get exceptions
346         }
347         {
348                 A = insert(B,#63,#0)
349                 jumpr r31
350         }
352 #undef ONE
353 #define SIGN r28
354 #undef NORMAL
355 #undef NO_OVF_UNF
356 #define P_INF p1
357 #define P_ZERO p2
358 .Ldiv_abnormal:
359         {
360                 P_TMP = dfclass(A,#DFCLASS_NUMBER)
361                 P_TMP = dfclass(B,#DFCLASS_NUMBER)
362                 Q_POSITIVE = cmp.gt(SIGN,#-1)
363         }
364         {
365                 P_INF = dfclass(A,#DFCLASS_INFINITE)
366                 P_INF = dfclass(B,#DFCLASS_INFINITE)
367         }
368         {
369                 P_ZERO = dfclass(A,#DFCLASS_ZERO)
370                 P_ZERO = dfclass(B,#DFCLASS_ZERO)
371         }
372         {
373                 if (!P_TMP) jump .Ldiv_nan
374                 if (P_INF) jump .Ldiv_invalid
375         }
376         {
377                 if (P_ZERO) jump .Ldiv_invalid
378         }
379         {
380                 P_ZERO = dfclass(A,#DFCLASS_NONZERO)            // nonzero
381                 P_ZERO = dfclass(B,#DFCLASS_NONINFINITE)        // non-infinite
382         }
383         {
384                 P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite
385                 P_INF = dfclass(B,#DFCLASS_NONZERO)     // nonzero
386         }
387         {
388                 if (!P_ZERO) jump .Ldiv_zero_result
389                 if (!P_INF) jump .Ldiv_inf_result
390         }
391         // Now we've narrowed it down to (de)normal / (de)normal
392         // Set up A/EXPA B/EXPB and go back
393 #undef P_ZERO
394 #undef P_INF
395 #define P_TMP2 p1
396         {
397                 P_TMP = dfclass(A,#DFCLASS_NORMAL)
398                 P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
399                 TMP = ##0x00100000
400         }
401         {
402                 EXPBA = combine(BH,AH)
403                 AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)          // clear out hidden bit, sign bit
404                 BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)          // clear out hidden bit, sign bit
405         }
406         {
407                 if (P_TMP) AH = or(AH,TMP)                              // if normal, add back in hidden bit
408                 if (P_TMP2) BH = or(BH,TMP)                             // if normal, add back in hidden bit
409         }
410         {
411                 QH = add(clb(A),#-DF_EXPBITS)
412                 QL = add(clb(B),#-DF_EXPBITS)
413                 TMP = #1
414         }
415         {
416                 EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
417                 EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
418         }
419         {
420                 A = asl(A,QH)
421                 B = asl(B,QL)
422                 if (!P_TMP) EXPA = sub(TMP,QH)
423                 if (!P_TMP2) EXPB = sub(TMP,QL)
424         }       // recreate values needed by resume coke
425         {
426                 PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
427         }
428         {
429                 SFDEN = or(SFONE,PRODLO)
430                 jump .Ldenorm_continue
431         }
433 .Ldiv_zero_result:
434         {
435                 AH = xor(AH,BH)
436                 B = #0
437         }
438         {
439                 A = insert(B,#63,#0)
440                 jumpr r31
441         }
442 .Ldiv_inf_result:
443         {
444                 p2 = dfclass(B,#DFCLASS_ZERO)
445                 p2 = dfclass(A,#DFCLASS_NONINFINITE)
446         }
447         {
448                 TMP = USR
449                 if (!p2) jump 1f
450                 AH = xor(AH,BH)
451         }
452         {
453                 TMP = or(TMP,#0x04)             // DBZ
454         }
455         {
456                 USR = TMP
457         }
459         {
460                 B = combine(##0x7ff00000,#0)
461                 p0 = dfcmp.uo(B,B)              // take possible exception
462         }
463         {
464                 A = insert(B,#63,#0)
465                 jumpr r31
466         }
467 .Ldiv_nan:
468         {
469                 p0 = dfclass(A,#0x10)
470                 p1 = dfclass(B,#0x10)
471                 if (!p0.new) A = B
472                 if (!p1.new) B = A
473         }
474         {
475                 QH = convert_df2sf(A)   // get possible invalid exceptions
476                 QL = convert_df2sf(B)
477         }
478         {
479                 A = #-1
480                 jumpr r31
481         }
483 .Ldiv_invalid:
484         {
485                 TMP = ##0x7f800001
486         }
487         {
488                 A = convert_sf2df(TMP)          // get invalid, get DF qNaN
489                 jumpr r31
490         }
491 END(__hexagon_divdf3)