Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gcc4 / gcc / config / sh / lib1funcs.asm
blob53334500865152f36c0408d7c55177f713790388
1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2 2004, 2005
3 Free Software Foundation, Inc.
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any
8 later version.
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
17 executable.)
19 This file is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; see the file COPYING. If not, write to
26 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
27 Boston, MA 02110-1301, USA. */
29 !! libgcc routines for the Renesas / SuperH SH CPUs.
30 !! Contributed by Steve Chamberlain.
31 !! sac@cygnus.com
33 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
34 !! recoded in assembly by Toshiyasu Morita
35 !! tm@netcom.com
37 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
38 ELF local label prefixes by J"orn Rennecke
39 amylaar@cygnus.com */
41 #ifdef __ELF__
42 #define LOCAL(X) .L_##X
43 #define FUNC(X) .type X,@function
44 #define HIDDEN_FUNC(X) FUNC(X); .hidden X
45 #define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X)
46 #define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X
47 #define ENDFUNC(X) ENDFUNC0(X)
48 #else
49 #define LOCAL(X) L_##X
50 #define FUNC(X)
51 #define HIDDEN_FUNC(X)
52 #define HIDDEN_ALIAS(X,Y) ALIAS (X,Y)
53 #define ENDFUNC(X)
54 #endif
56 #define CONCAT(A,B) A##B
57 #define GLOBAL0(U,X) CONCAT(U,__##X)
58 #define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X)
60 #define ALIAS(X,Y) .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y)
62 #ifdef __SH2A__
63 #undef FMOVD_WORKS
64 #define FMOVD_WORKS
65 #endif
67 #if ! __SH5__
68 #ifdef L_ashiftrt
69 .global GLOBAL(ashiftrt_r4_0)
70 .global GLOBAL(ashiftrt_r4_1)
71 .global GLOBAL(ashiftrt_r4_2)
72 .global GLOBAL(ashiftrt_r4_3)
73 .global GLOBAL(ashiftrt_r4_4)
74 .global GLOBAL(ashiftrt_r4_5)
75 .global GLOBAL(ashiftrt_r4_6)
76 .global GLOBAL(ashiftrt_r4_7)
77 .global GLOBAL(ashiftrt_r4_8)
78 .global GLOBAL(ashiftrt_r4_9)
79 .global GLOBAL(ashiftrt_r4_10)
80 .global GLOBAL(ashiftrt_r4_11)
81 .global GLOBAL(ashiftrt_r4_12)
82 .global GLOBAL(ashiftrt_r4_13)
83 .global GLOBAL(ashiftrt_r4_14)
84 .global GLOBAL(ashiftrt_r4_15)
85 .global GLOBAL(ashiftrt_r4_16)
86 .global GLOBAL(ashiftrt_r4_17)
87 .global GLOBAL(ashiftrt_r4_18)
88 .global GLOBAL(ashiftrt_r4_19)
89 .global GLOBAL(ashiftrt_r4_20)
90 .global GLOBAL(ashiftrt_r4_21)
91 .global GLOBAL(ashiftrt_r4_22)
92 .global GLOBAL(ashiftrt_r4_23)
93 .global GLOBAL(ashiftrt_r4_24)
94 .global GLOBAL(ashiftrt_r4_25)
95 .global GLOBAL(ashiftrt_r4_26)
96 .global GLOBAL(ashiftrt_r4_27)
97 .global GLOBAL(ashiftrt_r4_28)
98 .global GLOBAL(ashiftrt_r4_29)
99 .global GLOBAL(ashiftrt_r4_30)
100 .global GLOBAL(ashiftrt_r4_31)
101 .global GLOBAL(ashiftrt_r4_32)
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
115 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
116 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
117 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
118 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
119 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
120 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
121 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
122 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
123 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
124 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
125 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
126 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
127 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
128 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
129 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
130 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
131 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
132 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
133 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
134 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
135 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
137 .align 1
138 GLOBAL(ashiftrt_r4_32):
139 GLOBAL(ashiftrt_r4_31):
140 rotcl r4
142 subc r4,r4
144 GLOBAL(ashiftrt_r4_30):
145 shar r4
146 GLOBAL(ashiftrt_r4_29):
147 shar r4
148 GLOBAL(ashiftrt_r4_28):
149 shar r4
150 GLOBAL(ashiftrt_r4_27):
151 shar r4
152 GLOBAL(ashiftrt_r4_26):
153 shar r4
154 GLOBAL(ashiftrt_r4_25):
155 shar r4
156 GLOBAL(ashiftrt_r4_24):
157 shlr16 r4
158 shlr8 r4
160 exts.b r4,r4
162 GLOBAL(ashiftrt_r4_23):
163 shar r4
164 GLOBAL(ashiftrt_r4_22):
165 shar r4
166 GLOBAL(ashiftrt_r4_21):
167 shar r4
168 GLOBAL(ashiftrt_r4_20):
169 shar r4
170 GLOBAL(ashiftrt_r4_19):
171 shar r4
172 GLOBAL(ashiftrt_r4_18):
173 shar r4
174 GLOBAL(ashiftrt_r4_17):
175 shar r4
176 GLOBAL(ashiftrt_r4_16):
177 shlr16 r4
179 exts.w r4,r4
181 GLOBAL(ashiftrt_r4_15):
182 shar r4
183 GLOBAL(ashiftrt_r4_14):
184 shar r4
185 GLOBAL(ashiftrt_r4_13):
186 shar r4
187 GLOBAL(ashiftrt_r4_12):
188 shar r4
189 GLOBAL(ashiftrt_r4_11):
190 shar r4
191 GLOBAL(ashiftrt_r4_10):
192 shar r4
193 GLOBAL(ashiftrt_r4_9):
194 shar r4
195 GLOBAL(ashiftrt_r4_8):
196 shar r4
197 GLOBAL(ashiftrt_r4_7):
198 shar r4
199 GLOBAL(ashiftrt_r4_6):
200 shar r4
201 GLOBAL(ashiftrt_r4_5):
202 shar r4
203 GLOBAL(ashiftrt_r4_4):
204 shar r4
205 GLOBAL(ashiftrt_r4_3):
206 shar r4
207 GLOBAL(ashiftrt_r4_2):
208 shar r4
209 GLOBAL(ashiftrt_r4_1):
211 shar r4
213 GLOBAL(ashiftrt_r4_0):
217 ENDFUNC(GLOBAL(ashiftrt_r4_0))
218 ENDFUNC(GLOBAL(ashiftrt_r4_1))
219 ENDFUNC(GLOBAL(ashiftrt_r4_2))
220 ENDFUNC(GLOBAL(ashiftrt_r4_3))
221 ENDFUNC(GLOBAL(ashiftrt_r4_4))
222 ENDFUNC(GLOBAL(ashiftrt_r4_5))
223 ENDFUNC(GLOBAL(ashiftrt_r4_6))
224 ENDFUNC(GLOBAL(ashiftrt_r4_7))
225 ENDFUNC(GLOBAL(ashiftrt_r4_8))
226 ENDFUNC(GLOBAL(ashiftrt_r4_9))
227 ENDFUNC(GLOBAL(ashiftrt_r4_10))
228 ENDFUNC(GLOBAL(ashiftrt_r4_11))
229 ENDFUNC(GLOBAL(ashiftrt_r4_12))
230 ENDFUNC(GLOBAL(ashiftrt_r4_13))
231 ENDFUNC(GLOBAL(ashiftrt_r4_14))
232 ENDFUNC(GLOBAL(ashiftrt_r4_15))
233 ENDFUNC(GLOBAL(ashiftrt_r4_16))
234 ENDFUNC(GLOBAL(ashiftrt_r4_17))
235 ENDFUNC(GLOBAL(ashiftrt_r4_18))
236 ENDFUNC(GLOBAL(ashiftrt_r4_19))
237 ENDFUNC(GLOBAL(ashiftrt_r4_20))
238 ENDFUNC(GLOBAL(ashiftrt_r4_21))
239 ENDFUNC(GLOBAL(ashiftrt_r4_22))
240 ENDFUNC(GLOBAL(ashiftrt_r4_23))
241 ENDFUNC(GLOBAL(ashiftrt_r4_24))
242 ENDFUNC(GLOBAL(ashiftrt_r4_25))
243 ENDFUNC(GLOBAL(ashiftrt_r4_26))
244 ENDFUNC(GLOBAL(ashiftrt_r4_27))
245 ENDFUNC(GLOBAL(ashiftrt_r4_28))
246 ENDFUNC(GLOBAL(ashiftrt_r4_29))
247 ENDFUNC(GLOBAL(ashiftrt_r4_30))
248 ENDFUNC(GLOBAL(ashiftrt_r4_31))
249 ENDFUNC(GLOBAL(ashiftrt_r4_32))
250 #endif
252 #ifdef L_ashiftrt_n
255 ! GLOBAL(ashrsi3)
257 ! Entry:
259 ! r4: Value to shift
260 ! r5: Shifts
262 ! Exit:
264 ! r0: Result
266 ! Destroys:
268 ! (none)
271 .global GLOBAL(ashrsi3)
272 HIDDEN_FUNC(GLOBAL(ashrsi3))
273 .align 2
274 GLOBAL(ashrsi3):
275 mov #31,r0
276 and r0,r5
277 mova LOCAL(ashrsi3_table),r0
278 mov.b @(r0,r5),r5
279 #ifdef __sh1__
280 add r5,r0
281 jmp @r0
282 #else
283 braf r5
284 #endif
285 mov r4,r0
287 .align 2
288 LOCAL(ashrsi3_table):
289 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
300 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
301 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
302 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
303 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
304 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
305 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
306 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
307 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
308 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
309 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
310 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
311 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
312 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
313 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
314 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
315 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
316 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
317 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
318 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
319 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
320 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
322 LOCAL(ashrsi3_31):
323 rotcl r0
325 subc r0,r0
327 LOCAL(ashrsi3_30):
328 shar r0
329 LOCAL(ashrsi3_29):
330 shar r0
331 LOCAL(ashrsi3_28):
332 shar r0
333 LOCAL(ashrsi3_27):
334 shar r0
335 LOCAL(ashrsi3_26):
336 shar r0
337 LOCAL(ashrsi3_25):
338 shar r0
339 LOCAL(ashrsi3_24):
340 shlr16 r0
341 shlr8 r0
343 exts.b r0,r0
345 LOCAL(ashrsi3_23):
346 shar r0
347 LOCAL(ashrsi3_22):
348 shar r0
349 LOCAL(ashrsi3_21):
350 shar r0
351 LOCAL(ashrsi3_20):
352 shar r0
353 LOCAL(ashrsi3_19):
354 shar r0
355 LOCAL(ashrsi3_18):
356 shar r0
357 LOCAL(ashrsi3_17):
358 shar r0
359 LOCAL(ashrsi3_16):
360 shlr16 r0
362 exts.w r0,r0
364 LOCAL(ashrsi3_15):
365 shar r0
366 LOCAL(ashrsi3_14):
367 shar r0
368 LOCAL(ashrsi3_13):
369 shar r0
370 LOCAL(ashrsi3_12):
371 shar r0
372 LOCAL(ashrsi3_11):
373 shar r0
374 LOCAL(ashrsi3_10):
375 shar r0
376 LOCAL(ashrsi3_9):
377 shar r0
378 LOCAL(ashrsi3_8):
379 shar r0
380 LOCAL(ashrsi3_7):
381 shar r0
382 LOCAL(ashrsi3_6):
383 shar r0
384 LOCAL(ashrsi3_5):
385 shar r0
386 LOCAL(ashrsi3_4):
387 shar r0
388 LOCAL(ashrsi3_3):
389 shar r0
390 LOCAL(ashrsi3_2):
391 shar r0
392 LOCAL(ashrsi3_1):
394 shar r0
396 LOCAL(ashrsi3_0):
400 ENDFUNC(GLOBAL(ashrsi3))
401 #endif
403 #ifdef L_ashiftlt
406 ! GLOBAL(ashlsi3)
408 ! Entry:
410 ! r4: Value to shift
411 ! r5: Shifts
413 ! Exit:
415 ! r0: Result
417 ! Destroys:
419 ! (none)
421 .global GLOBAL(ashlsi3)
422 HIDDEN_FUNC(GLOBAL(ashlsi3))
423 .align 2
424 GLOBAL(ashlsi3):
425 mov #31,r0
426 and r0,r5
427 mova LOCAL(ashlsi3_table),r0
428 mov.b @(r0,r5),r5
429 #ifdef __sh1__
430 add r5,r0
431 jmp @r0
432 #else
433 braf r5
434 #endif
435 mov r4,r0
437 .align 2
438 LOCAL(ashlsi3_table):
439 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
447 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
448 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
449 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
450 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
451 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
452 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
453 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
454 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
455 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
456 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
457 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
458 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
459 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
460 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
461 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
462 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
463 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
464 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
465 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
466 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
467 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
468 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
469 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
470 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
472 LOCAL(ashlsi3_6):
473 shll2 r0
474 LOCAL(ashlsi3_4):
475 shll2 r0
476 LOCAL(ashlsi3_2):
478 shll2 r0
480 LOCAL(ashlsi3_7):
481 shll2 r0
482 LOCAL(ashlsi3_5):
483 shll2 r0
484 LOCAL(ashlsi3_3):
485 shll2 r0
486 LOCAL(ashlsi3_1):
488 shll r0
490 LOCAL(ashlsi3_14):
491 shll2 r0
492 LOCAL(ashlsi3_12):
493 shll2 r0
494 LOCAL(ashlsi3_10):
495 shll2 r0
496 LOCAL(ashlsi3_8):
498 shll8 r0
500 LOCAL(ashlsi3_15):
501 shll2 r0
502 LOCAL(ashlsi3_13):
503 shll2 r0
504 LOCAL(ashlsi3_11):
505 shll2 r0
506 LOCAL(ashlsi3_9):
507 shll8 r0
509 shll r0
511 LOCAL(ashlsi3_22):
512 shll2 r0
513 LOCAL(ashlsi3_20):
514 shll2 r0
515 LOCAL(ashlsi3_18):
516 shll2 r0
517 LOCAL(ashlsi3_16):
519 shll16 r0
521 LOCAL(ashlsi3_23):
522 shll2 r0
523 LOCAL(ashlsi3_21):
524 shll2 r0
525 LOCAL(ashlsi3_19):
526 shll2 r0
527 LOCAL(ashlsi3_17):
528 shll16 r0
530 shll r0
532 LOCAL(ashlsi3_30):
533 shll2 r0
534 LOCAL(ashlsi3_28):
535 shll2 r0
536 LOCAL(ashlsi3_26):
537 shll2 r0
538 LOCAL(ashlsi3_24):
539 shll16 r0
541 shll8 r0
543 LOCAL(ashlsi3_31):
544 shll2 r0
545 LOCAL(ashlsi3_29):
546 shll2 r0
547 LOCAL(ashlsi3_27):
548 shll2 r0
549 LOCAL(ashlsi3_25):
550 shll16 r0
551 shll8 r0
553 shll r0
555 LOCAL(ashlsi3_0):
559 ENDFUNC(GLOBAL(ashlsi3))
560 #endif
562 #ifdef L_lshiftrt
565 ! GLOBAL(lshrsi3)
567 ! Entry:
569 ! r4: Value to shift
570 ! r5: Shifts
572 ! Exit:
574 ! r0: Result
576 ! Destroys:
578 ! (none)
580 .global GLOBAL(lshrsi3)
581 HIDDEN_FUNC(GLOBAL(lshrsi3))
582 .align 2
583 GLOBAL(lshrsi3):
584 mov #31,r0
585 and r0,r5
586 mova LOCAL(lshrsi3_table),r0
587 mov.b @(r0,r5),r5
588 #ifdef __sh1__
589 add r5,r0
590 jmp @r0
591 #else
592 braf r5
593 #endif
594 mov r4,r0
596 .align 2
597 LOCAL(lshrsi3_table):
598 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
606 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
607 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
608 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
609 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
610 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
611 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
612 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
613 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
614 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
615 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
616 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
617 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
618 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
619 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
620 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
621 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
622 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
623 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
624 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
625 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
626 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
627 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
628 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
629 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
631 LOCAL(lshrsi3_6):
632 shlr2 r0
633 LOCAL(lshrsi3_4):
634 shlr2 r0
635 LOCAL(lshrsi3_2):
637 shlr2 r0
639 LOCAL(lshrsi3_7):
640 shlr2 r0
641 LOCAL(lshrsi3_5):
642 shlr2 r0
643 LOCAL(lshrsi3_3):
644 shlr2 r0
645 LOCAL(lshrsi3_1):
647 shlr r0
649 LOCAL(lshrsi3_14):
650 shlr2 r0
651 LOCAL(lshrsi3_12):
652 shlr2 r0
653 LOCAL(lshrsi3_10):
654 shlr2 r0
655 LOCAL(lshrsi3_8):
657 shlr8 r0
659 LOCAL(lshrsi3_15):
660 shlr2 r0
661 LOCAL(lshrsi3_13):
662 shlr2 r0
663 LOCAL(lshrsi3_11):
664 shlr2 r0
665 LOCAL(lshrsi3_9):
666 shlr8 r0
668 shlr r0
670 LOCAL(lshrsi3_22):
671 shlr2 r0
672 LOCAL(lshrsi3_20):
673 shlr2 r0
674 LOCAL(lshrsi3_18):
675 shlr2 r0
676 LOCAL(lshrsi3_16):
678 shlr16 r0
680 LOCAL(lshrsi3_23):
681 shlr2 r0
682 LOCAL(lshrsi3_21):
683 shlr2 r0
684 LOCAL(lshrsi3_19):
685 shlr2 r0
686 LOCAL(lshrsi3_17):
687 shlr16 r0
689 shlr r0
691 LOCAL(lshrsi3_30):
692 shlr2 r0
693 LOCAL(lshrsi3_28):
694 shlr2 r0
695 LOCAL(lshrsi3_26):
696 shlr2 r0
697 LOCAL(lshrsi3_24):
698 shlr16 r0
700 shlr8 r0
702 LOCAL(lshrsi3_31):
703 shlr2 r0
704 LOCAL(lshrsi3_29):
705 shlr2 r0
706 LOCAL(lshrsi3_27):
707 shlr2 r0
708 LOCAL(lshrsi3_25):
709 shlr16 r0
710 shlr8 r0
712 shlr r0
714 LOCAL(lshrsi3_0):
718 ENDFUNC(GLOBAL(lshrsi3))
719 #endif
721 #ifdef L_movmem
722 .text
723 .balign 4
724 .global GLOBAL(movmem)
725 HIDDEN_FUNC(GLOBAL(movmem))
726 HIDDEN_ALIAS(movstr,movmem)
727 /* This would be a lot simpler if r6 contained the byte count
728 minus 64, and we wouldn't be called here for a byte count of 64. */
729 GLOBAL(movmem):
730 sts.l pr,@-r15
731 shll2 r6
732 bsr GLOBAL(movmemSI52+2)
733 mov.l @(48,r5),r0
734 .balign 4
735 LOCAL(movmem_loop): /* Reached with rts */
736 mov.l @(60,r5),r0
737 add #-64,r6
738 mov.l r0,@(60,r4)
739 tst r6,r6
740 mov.l @(56,r5),r0
741 bt LOCAL(movmem_done)
742 mov.l r0,@(56,r4)
743 cmp/pl r6
744 mov.l @(52,r5),r0
745 add #64,r5
746 mov.l r0,@(52,r4)
747 add #64,r4
748 bt GLOBAL(movmemSI52)
749 ! done all the large groups, do the remainder
750 ! jump to movmem+
751 mova GLOBAL(movmemSI4)+4,r0
752 add r6,r0
753 jmp @r0
754 LOCAL(movmem_done): ! share slot insn, works out aligned.
755 lds.l @r15+,pr
756 mov.l r0,@(56,r4)
757 mov.l @(52,r5),r0
759 mov.l r0,@(52,r4)
760 .balign 4
761 ! ??? We need aliases movstr* for movmem* for the older libraries. These
762 ! aliases will be removed at the some point in the future.
763 .global GLOBAL(movmemSI64)
764 HIDDEN_FUNC(GLOBAL(movmemSI64))
765 HIDDEN_ALIAS(movstrSI64,movmemSI64)
766 GLOBAL(movmemSI64):
767 mov.l @(60,r5),r0
768 mov.l r0,@(60,r4)
769 .global GLOBAL(movmemSI60)
770 HIDDEN_FUNC(GLOBAL(movmemSI60))
771 HIDDEN_ALIAS(movstrSI60,movmemSI60)
772 GLOBAL(movmemSI60):
773 mov.l @(56,r5),r0
774 mov.l r0,@(56,r4)
775 .global GLOBAL(movmemSI56)
776 HIDDEN_FUNC(GLOBAL(movmemSI56))
777 HIDDEN_ALIAS(movstrSI56,movmemSI56)
778 GLOBAL(movmemSI56):
779 mov.l @(52,r5),r0
780 mov.l r0,@(52,r4)
781 .global GLOBAL(movmemSI52)
782 HIDDEN_FUNC(GLOBAL(movmemSI52))
783 HIDDEN_ALIAS(movstrSI52,movmemSI52)
784 GLOBAL(movmemSI52):
785 mov.l @(48,r5),r0
786 mov.l r0,@(48,r4)
787 .global GLOBAL(movmemSI48)
788 HIDDEN_FUNC(GLOBAL(movmemSI48))
789 HIDDEN_ALIAS(movstrSI48,movmemSI48)
790 GLOBAL(movmemSI48):
791 mov.l @(44,r5),r0
792 mov.l r0,@(44,r4)
793 .global GLOBAL(movmemSI44)
794 HIDDEN_FUNC(GLOBAL(movmemSI44))
795 HIDDEN_ALIAS(movstrSI44,movmemSI44)
796 GLOBAL(movmemSI44):
797 mov.l @(40,r5),r0
798 mov.l r0,@(40,r4)
799 .global GLOBAL(movmemSI40)
800 HIDDEN_FUNC(GLOBAL(movmemSI40))
801 HIDDEN_ALIAS(movstrSI40,movmemSI40)
802 GLOBAL(movmemSI40):
803 mov.l @(36,r5),r0
804 mov.l r0,@(36,r4)
805 .global GLOBAL(movmemSI36)
806 HIDDEN_FUNC(GLOBAL(movmemSI36))
807 HIDDEN_ALIAS(movstrSI36,movmemSI36)
808 GLOBAL(movmemSI36):
809 mov.l @(32,r5),r0
810 mov.l r0,@(32,r4)
811 .global GLOBAL(movmemSI32)
812 HIDDEN_FUNC(GLOBAL(movmemSI32))
813 HIDDEN_ALIAS(movstrSI32,movmemSI32)
814 GLOBAL(movmemSI32):
815 mov.l @(28,r5),r0
816 mov.l r0,@(28,r4)
817 .global GLOBAL(movmemSI28)
818 HIDDEN_FUNC(GLOBAL(movmemSI28))
819 HIDDEN_ALIAS(movstrSI28,movmemSI28)
820 GLOBAL(movmemSI28):
821 mov.l @(24,r5),r0
822 mov.l r0,@(24,r4)
823 .global GLOBAL(movmemSI24)
824 HIDDEN_FUNC(GLOBAL(movmemSI24))
825 HIDDEN_ALIAS(movstrSI24,movmemSI24)
826 GLOBAL(movmemSI24):
827 mov.l @(20,r5),r0
828 mov.l r0,@(20,r4)
829 .global GLOBAL(movmemSI20)
830 HIDDEN_FUNC(GLOBAL(movmemSI20))
831 HIDDEN_ALIAS(movstrSI20,movmemSI20)
832 GLOBAL(movmemSI20):
833 mov.l @(16,r5),r0
834 mov.l r0,@(16,r4)
835 .global GLOBAL(movmemSI16)
836 HIDDEN_FUNC(GLOBAL(movmemSI16))
837 HIDDEN_ALIAS(movstrSI16,movmemSI16)
838 GLOBAL(movmemSI16):
839 mov.l @(12,r5),r0
840 mov.l r0,@(12,r4)
841 .global GLOBAL(movmemSI12)
842 HIDDEN_FUNC(GLOBAL(movmemSI12))
843 HIDDEN_ALIAS(movstrSI12,movmemSI12)
844 GLOBAL(movmemSI12):
845 mov.l @(8,r5),r0
846 mov.l r0,@(8,r4)
847 .global GLOBAL(movmemSI8)
848 HIDDEN_FUNC(GLOBAL(movmemSI8))
849 HIDDEN_ALIAS(movstrSI8,movmemSI8)
850 GLOBAL(movmemSI8):
851 mov.l @(4,r5),r0
852 mov.l r0,@(4,r4)
853 .global GLOBAL(movmemSI4)
854 HIDDEN_FUNC(GLOBAL(movmemSI4))
855 HIDDEN_ALIAS(movstrSI4,movmemSI4)
856 GLOBAL(movmemSI4):
857 mov.l @(0,r5),r0
859 mov.l r0,@(0,r4)
861 ENDFUNC(GLOBAL(movmemSI64))
862 ENDFUNC(GLOBAL(movmemSI60))
863 ENDFUNC(GLOBAL(movmemSI56))
864 ENDFUNC(GLOBAL(movmemSI52))
865 ENDFUNC(GLOBAL(movmemSI48))
866 ENDFUNC(GLOBAL(movmemSI44))
867 ENDFUNC(GLOBAL(movmemSI40))
868 ENDFUNC(GLOBAL(movmemSI36))
869 ENDFUNC(GLOBAL(movmemSI32))
870 ENDFUNC(GLOBAL(movmemSI28))
871 ENDFUNC(GLOBAL(movmemSI24))
872 ENDFUNC(GLOBAL(movmemSI20))
873 ENDFUNC(GLOBAL(movmemSI16))
874 ENDFUNC(GLOBAL(movmemSI12))
875 ENDFUNC(GLOBAL(movmemSI8))
876 ENDFUNC(GLOBAL(movmemSI4))
877 ENDFUNC(GLOBAL(movmem))
878 #endif
880 #ifdef L_movmem_i4
881 .text
882 .global GLOBAL(movmem_i4_even)
883 .global GLOBAL(movmem_i4_odd)
884 .global GLOBAL(movmemSI12_i4)
886 HIDDEN_FUNC(GLOBAL(movmem_i4_even))
887 HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
888 HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
890 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
891 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
892 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
894 .p2align 5
895 L_movmem_2mod4_end:
896 mov.l r0,@(16,r4)
898 mov.l r1,@(20,r4)
900 .p2align 2
902 GLOBAL(movmem_i4_even):
903 mov.l @r5+,r0
904 bra L_movmem_start_even
905 mov.l @r5+,r1
907 GLOBAL(movmem_i4_odd):
908 mov.l @r5+,r1
909 add #-4,r4
910 mov.l @r5+,r2
911 mov.l @r5+,r3
912 mov.l r1,@(4,r4)
913 mov.l r2,@(8,r4)
915 L_movmem_loop:
916 mov.l r3,@(12,r4)
917 dt r6
918 mov.l @r5+,r0
919 bt/s L_movmem_2mod4_end
920 mov.l @r5+,r1
921 add #16,r4
922 L_movmem_start_even:
923 mov.l @r5+,r2
924 mov.l @r5+,r3
925 mov.l r0,@r4
926 dt r6
927 mov.l r1,@(4,r4)
928 bf/s L_movmem_loop
929 mov.l r2,@(8,r4)
931 mov.l r3,@(12,r4)
933 ENDFUNC(GLOBAL(movmem_i4_even))
934 ENDFUNC(GLOBAL(movmem_i4_odd))
936 .p2align 4
937 GLOBAL(movmemSI12_i4):
938 mov.l @r5,r0
939 mov.l @(4,r5),r1
940 mov.l @(8,r5),r2
941 mov.l r0,@r4
942 mov.l r1,@(4,r4)
944 mov.l r2,@(8,r4)
946 ENDFUNC(GLOBAL(movmemSI12_i4))
947 #endif
949 #ifdef L_mulsi3
952 .global GLOBAL(mulsi3)
953 HIDDEN_FUNC(GLOBAL(mulsi3))
955 ! r4 = aabb
956 ! r5 = ccdd
957 ! r0 = aabb*ccdd via partial products
959 ! if aa == 0 and cc = 0
960 ! r0 = bb*dd
962 ! else
963 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
966 GLOBAL(mulsi3):
967 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
968 mov r5,r3 ! r3 = ccdd
969 swap.w r4,r2 ! r2 = bbaa
970 xtrct r2,r3 ! r3 = aacc
971 tst r3,r3 ! msws zero ?
972 bf hiset
973 rts ! yes - then we have the answer
974 sts macl,r0
976 hiset: sts macl,r0 ! r0 = bb*dd
977 mulu.w r2,r5 ! brewing macl = aa*dd
978 sts macl,r1
979 mulu.w r3,r4 ! brewing macl = cc*bb
980 sts macl,r2
981 add r1,r2
982 shll16 r2
984 add r2,r0
986 ENDFUNC(GLOBAL(mulsi3))
987 #endif
988 #endif /* ! __SH5__ */
989 #ifdef L_sdivsi3_i4
990 .title "SH DIVIDE"
991 !! 4 byte integer Divide code for the Renesas SH
992 #ifdef __SH4__
993 !! args in r4 and r5, result in fpul, clobber dr0, dr2
995 .global GLOBAL(sdivsi3_i4)
996 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
997 GLOBAL(sdivsi3_i4):
998 lds r4,fpul
999 float fpul,dr0
1000 lds r5,fpul
1001 float fpul,dr2
1002 fdiv dr2,dr0
1004 ftrc dr0,fpul
1006 ENDFUNC(GLOBAL(sdivsi3_i4))
1007 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1008 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
1010 #if ! __SH5__ || __SH5__ == 32
1011 #if __SH5__
1012 .mode SHcompact
1013 #endif
1014 .global GLOBAL(sdivsi3_i4)
1015 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1016 GLOBAL(sdivsi3_i4):
1017 sts.l fpscr,@-r15
1018 mov #8,r2
1019 swap.w r2,r2
1020 lds r2,fpscr
1021 lds r4,fpul
1022 float fpul,dr0
1023 lds r5,fpul
1024 float fpul,dr2
1025 fdiv dr2,dr0
1026 ftrc dr0,fpul
1028 lds.l @r15+,fpscr
1030 ENDFUNC(GLOBAL(sdivsi3_i4))
1031 #endif /* ! __SH5__ || __SH5__ == 32 */
1032 #endif /* ! __SH4__ */
1033 #endif
1035 #ifdef L_sdivsi3
1036 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1037 sh2e/sh3e code. */
1038 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1040 !! Steve Chamberlain
1041 !! sac@cygnus.com
1045 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1047 .global GLOBAL(sdivsi3)
1048 #if __SHMEDIA__
1049 #if __SH5__ == 32
1050 .section .text..SHmedia32,"ax"
1051 #else
1052 .text
1053 #endif
1054 .align 2
1055 #if 0
1056 /* The assembly code that follows is a hand-optimized version of the C
1057 code that follows. Note that the registers that are modified are
1058 exactly those listed as clobbered in the patterns divsi3_i1 and
1059 divsi3_i1_media.
1061 int __sdivsi3 (i, j)
1062 int i, j;
1064 register unsigned long long r18 asm ("r18");
1065 register unsigned long long r19 asm ("r19");
1066 register unsigned long long r0 asm ("r0") = 0;
1067 register unsigned long long r1 asm ("r1") = 1;
1068 register int r2 asm ("r2") = i >> 31;
1069 register int r3 asm ("r3") = j >> 31;
1071 r2 = r2 ? r2 : r1;
1072 r3 = r3 ? r3 : r1;
1073 r18 = i * r2;
1074 r19 = j * r3;
1075 r2 *= r3;
1077 r19 <<= 31;
1078 r1 <<= 31;
1080 if (r18 >= r19)
1081 r0 |= r1, r18 -= r19;
1082 while (r19 >>= 1, r1 >>= 1);
1084 return r2 * (int)r0;
1087 GLOBAL(sdivsi3):
1088 pt/l LOCAL(sdivsi3_dontadd), tr2
1089 pt/l LOCAL(sdivsi3_loop), tr1
1090 ptabs/l r18, tr0
1091 movi 0, r0
1092 movi 1, r1
1093 shari.l r4, 31, r2
1094 shari.l r5, 31, r3
1095 cmveq r2, r1, r2
1096 cmveq r3, r1, r3
1097 muls.l r4, r2, r18
1098 muls.l r5, r3, r19
1099 muls.l r2, r3, r2
1100 shlli r19, 31, r19
1101 shlli r1, 31, r1
1102 LOCAL(sdivsi3_loop):
1103 bgtu r19, r18, tr2
1104 or r0, r1, r0
1105 sub r18, r19, r18
1106 LOCAL(sdivsi3_dontadd):
1107 shlri r1, 1, r1
1108 shlri r19, 1, r19
1109 bnei r1, 0, tr1
1110 muls.l r0, r2, r0
1111 add.l r0, r63, r0
1112 blink tr0, r63
1113 #elif 0 /* ! 0 */
1114 // inputs: r4,r5
1115 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1116 // result in r0
1117 GLOBAL(sdivsi3):
1118 // can create absolute value without extra latency,
1119 // but dependent on proper sign extension of inputs:
1120 // shari.l r5,31,r2
1121 // xor r5,r2,r20
1122 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1123 shari.l r5,31,r2
1124 ori r2,1,r2
1125 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1126 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1127 shari.l r4,31,r3
1128 nsb r20,r0
1129 shlld r20,r0,r25
1130 shlri r25,48,r25
1131 sub r19,r25,r1
1132 mmulfx.w r1,r1,r2
1133 mshflo.w r1,r63,r1
1134 // If r4 was to be used in-place instead of r21, could use this sequence
1135 // to compute absolute:
1136 // sub r63,r4,r19 // compute absolute value of r4
1137 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1138 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1139 ori r3,1,r3
1140 mmulfx.w r25,r2,r2
1141 sub r19,r0,r0
1142 muls.l r4,r3,r21
1143 msub.w r1,r2,r2
1144 addi r2,-2,r1
1145 mulu.l r21,r1,r19
1146 mmulfx.w r2,r2,r2
1147 shlli r1,15,r1
1148 shlrd r19,r0,r19
1149 mulu.l r19,r20,r3
1150 mmacnfx.wl r25,r2,r1
1151 ptabs r18,tr0
1152 sub r21,r3,r25
1154 mulu.l r25,r1,r2
1155 addi r0,14,r0
1156 xor r4,r5,r18
1157 shlrd r2,r0,r2
1158 mulu.l r2,r20,r3
1159 add r19,r2,r19
1160 shari.l r18,31,r18
1161 sub r25,r3,r25
1163 mulu.l r25,r1,r2
1164 sub r25,r20,r25
1165 add r19,r18,r19
1166 shlrd r2,r0,r2
1167 mulu.l r2,r20,r3
1168 addi r25,1,r25
1169 add r19,r2,r19
1171 cmpgt r25,r3,r25
1172 add.l r19,r25,r0
1173 xor r0,r18,r0
1174 blink tr0,r63
1175 #else /* ! 0 && ! 0 */
1177 // inputs: r4,r5
1178 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1179 // result in r0
1180 HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1181 #ifndef __pic__
1182 FUNC(GLOBAL(sdivsi3))
1183 GLOBAL(sdivsi3): /* this is the shcompact entry point */
1184 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1185 // with the SHcompact implementation, which clobbers tr1 / tr2.
1186 .global GLOBAL(sdivsi3_1)
1187 GLOBAL(sdivsi3_1):
1188 .global GLOBAL(div_table_internal)
1189 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1190 shori GLOBAL(div_table_internal) & 65535, r20
1191 #endif
1192 .global GLOBAL(sdivsi3_2)
1193 // div_table in r20
1194 // clobbered: r1,r18,r19,r21,r25,tr0
1195 GLOBAL(sdivsi3_2):
1196 nsb r5, r1
1197 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
1198 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
1199 ldx.ub r20, r21, r19 // u0.8
1200 shari r25, 32, r25 // normalize to s2.30
1201 shlli r21, 1, r21
1202 muls.l r25, r19, r19 // s2.38
1203 ldx.w r20, r21, r21 // s2.14
1204 ptabs r18, tr0
1205 shari r19, 24, r19 // truncate to s2.14
1206 sub r21, r19, r19 // some 11 bit inverse in s1.14
1207 muls.l r19, r19, r21 // u0.28
1208 sub r63, r1, r1
1209 addi r1, 92, r1
1210 muls.l r25, r21, r18 // s2.58
1211 shlli r19, 45, r19 // multiply by two and convert to s2.58
1212 /* bubble */
1213 sub r19, r18, r18
1214 shari r18, 28, r18 // some 22 bit inverse in s1.30
1215 muls.l r18, r25, r0 // s2.60
1216 muls.l r18, r4, r25 // s32.30
1217 /* bubble */
1218 shari r0, 16, r19 // s-16.44
1219 muls.l r19, r18, r19 // s-16.74
1220 shari r25, 63, r0
1221 shari r4, 14, r18 // s19.-14
1222 shari r19, 30, r19 // s-16.44
1223 muls.l r19, r18, r19 // s15.30
1224 xor r21, r0, r21 // You could also use the constant 1 << 27.
1225 add r21, r25, r21
1226 sub r21, r19, r21
1227 shard r21, r1, r21
1228 sub r21, r0, r0
1229 blink tr0, r63
1230 #ifndef __pic__
1231 ENDFUNC(GLOBAL(sdivsi3))
1232 #endif
1233 ENDFUNC(GLOBAL(sdivsi3_2))
1234 #endif
1235 #elif defined __SHMEDIA__
1236 /* m5compact-nofpu */
1237 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1238 .mode SHmedia
1239 .section .text..SHmedia32,"ax"
1240 .align 2
1241 FUNC(GLOBAL(sdivsi3))
1242 GLOBAL(sdivsi3):
1243 pt/l LOCAL(sdivsi3_dontsub), tr0
1244 pt/l LOCAL(sdivsi3_loop), tr1
1245 ptabs/l r18,tr2
1246 shari.l r4,31,r18
1247 shari.l r5,31,r19
1248 xor r4,r18,r20
1249 xor r5,r19,r21
1250 sub.l r20,r18,r20
1251 sub.l r21,r19,r21
1252 xor r18,r19,r19
1253 shlli r21,32,r25
1254 addi r25,-1,r21
1255 addz.l r20,r63,r20
1256 LOCAL(sdivsi3_loop):
1257 shlli r20,1,r20
1258 bgeu/u r21,r20,tr0
1259 sub r20,r21,r20
1260 LOCAL(sdivsi3_dontsub):
1261 addi.l r25,-1,r25
1262 bnei r25,-32,tr1
1263 xor r20,r19,r20
1264 sub.l r20,r19,r0
1265 blink tr2,r63
1266 ENDFUNC(GLOBAL(sdivsi3))
1267 #else /* ! __SHMEDIA__ */
1268 FUNC(GLOBAL(sdivsi3))
1269 GLOBAL(sdivsi3):
1270 mov r4,r1
1271 mov r5,r0
1273 tst r0,r0
1274 bt div0
1275 mov #0,r2
1276 div0s r2,r1
1277 subc r3,r3
1278 subc r2,r1
1279 div0s r0,r3
1280 rotcl r1
1281 div1 r0,r3
1282 rotcl r1
1283 div1 r0,r3
1284 rotcl r1
1285 div1 r0,r3
1286 rotcl r1
1287 div1 r0,r3
1288 rotcl r1
1289 div1 r0,r3
1290 rotcl r1
1291 div1 r0,r3
1292 rotcl r1
1293 div1 r0,r3
1294 rotcl r1
1295 div1 r0,r3
1296 rotcl r1
1297 div1 r0,r3
1298 rotcl r1
1299 div1 r0,r3
1300 rotcl r1
1301 div1 r0,r3
1302 rotcl r1
1303 div1 r0,r3
1304 rotcl r1
1305 div1 r0,r3
1306 rotcl r1
1307 div1 r0,r3
1308 rotcl r1
1309 div1 r0,r3
1310 rotcl r1
1311 div1 r0,r3
1312 rotcl r1
1313 div1 r0,r3
1314 rotcl r1
1315 div1 r0,r3
1316 rotcl r1
1317 div1 r0,r3
1318 rotcl r1
1319 div1 r0,r3
1320 rotcl r1
1321 div1 r0,r3
1322 rotcl r1
1323 div1 r0,r3
1324 rotcl r1
1325 div1 r0,r3
1326 rotcl r1
1327 div1 r0,r3
1328 rotcl r1
1329 div1 r0,r3
1330 rotcl r1
1331 div1 r0,r3
1332 rotcl r1
1333 div1 r0,r3
1334 rotcl r1
1335 div1 r0,r3
1336 rotcl r1
1337 div1 r0,r3
1338 rotcl r1
1339 div1 r0,r3
1340 rotcl r1
1341 div1 r0,r3
1342 rotcl r1
1343 div1 r0,r3
1344 rotcl r1
1345 addc r2,r1
1347 mov r1,r0
1350 div0: rts
1351 mov #0,r0
1353 ENDFUNC(GLOBAL(sdivsi3))
1354 #endif /* ! __SHMEDIA__ */
1355 #endif /* ! __SH4__ */
1356 #endif
1357 #ifdef L_udivsi3_i4
1359 .title "SH DIVIDE"
1360 !! 4 byte integer Divide code for the Renesas SH
1361 #ifdef __SH4__
1362 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1363 !! and t bit
1365 .global GLOBAL(udivsi3_i4)
1366 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1367 GLOBAL(udivsi3_i4):
1368 mov #1,r1
1369 cmp/hi r1,r5
1370 bf trivial
1371 rotr r1
1372 xor r1,r4
1373 lds r4,fpul
1374 mova L1,r0
1375 #ifdef FMOVD_WORKS
1376 fmov.d @r0+,dr4
1377 #else
1378 #ifdef __LITTLE_ENDIAN__
1379 fmov.s @r0+,fr5
1380 fmov.s @r0,fr4
1381 #else
1382 fmov.s @r0+,fr4
1383 fmov.s @r0,fr5
1384 #endif
1385 #endif
1386 float fpul,dr0
1387 xor r1,r5
1388 lds r5,fpul
1389 float fpul,dr2
1390 fadd dr4,dr0
1391 fadd dr4,dr2
1392 fdiv dr2,dr0
1394 ftrc dr0,fpul
1396 trivial:
1398 lds r4,fpul
1400 .align 2
1401 #ifdef FMOVD_WORKS
1402 .align 3 ! make double below 8 byte aligned.
1403 #endif
1405 .double 2147483648
1407 ENDFUNC(GLOBAL(udivsi3_i4))
1408 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1409 #if ! __SH5__ || __SH5__ == 32
1410 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1411 .mode SHmedia
1412 .global GLOBAL(udivsi3_i4)
1413 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1414 GLOBAL(udivsi3_i4):
1415 addz.l r4,r63,r20
1416 addz.l r5,r63,r21
1417 fmov.qd r20,dr0
1418 fmov.qd r21,dr32
1419 ptabs r18,tr0
1420 float.qd dr0,dr0
1421 float.qd dr32,dr32
1422 fdiv.d dr0,dr32,dr0
1423 ftrc.dq dr0,dr32
1424 fmov.s fr33,fr32
1425 blink tr0,r63
1427 ENDFUNC(GLOBAL(udivsi3_i4))
1428 #endif /* ! __SH5__ || __SH5__ == 32 */
1429 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1430 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1432 .global GLOBAL(udivsi3_i4)
1433 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1434 GLOBAL(udivsi3_i4):
1435 mov #1,r1
1436 cmp/hi r1,r5
1437 bf trivial
1438 sts.l fpscr,@-r15
1439 mova L1,r0
1440 lds.l @r0+,fpscr
1441 rotr r1
1442 xor r1,r4
1443 lds r4,fpul
1444 #ifdef FMOVD_WORKS
1445 fmov.d @r0+,dr4
1446 #else
1447 #ifdef __LITTLE_ENDIAN__
1448 fmov.s @r0+,fr5
1449 fmov.s @r0,fr4
1450 #else
1451 fmov.s @r0+,fr4
1452 fmov.s @r0,fr5
1453 #endif
1454 #endif
1455 float fpul,dr0
1456 xor r1,r5
1457 lds r5,fpul
1458 float fpul,dr2
1459 fadd dr4,dr0
1460 fadd dr4,dr2
1461 fdiv dr2,dr0
1462 ftrc dr0,fpul
1464 lds.l @r15+,fpscr
1466 #ifdef FMOVD_WORKS
1467 .align 3 ! make double below 8 byte aligned.
1468 #endif
1469 trivial:
1471 lds r4,fpul
1473 .align 2
1475 #ifndef FMOVD_WORKS
1476 .long 0x80000
1477 #else
1478 .long 0x180000
1479 #endif
1480 .double 2147483648
1482 ENDFUNC(GLOBAL(udivsi3_i4))
1483 #endif /* ! __SH4__ */
1484 #endif
1486 #ifdef L_udivsi3
1487 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1488 sh2e/sh3e code. */
1489 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1491 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1492 .global GLOBAL(udivsi3)
1493 HIDDEN_FUNC(GLOBAL(udivsi3))
1495 #if __SHMEDIA__
1496 #if __SH5__ == 32
1497 .section .text..SHmedia32,"ax"
1498 #else
1499 .text
1500 #endif
1501 .align 2
1502 #if 0
1503 /* The assembly code that follows is a hand-optimized version of the C
1504 code that follows. Note that the registers that are modified are
1505 exactly those listed as clobbered in the patterns udivsi3_i1 and
1506 udivsi3_i1_media.
1508 unsigned
1509 __udivsi3 (i, j)
1510 unsigned i, j;
1512 register unsigned long long r0 asm ("r0") = 0;
1513 register unsigned long long r18 asm ("r18") = 1;
1514 register unsigned long long r4 asm ("r4") = i;
1515 register unsigned long long r19 asm ("r19") = j;
1517 r19 <<= 31;
1518 r18 <<= 31;
1520 if (r4 >= r19)
1521 r0 |= r18, r4 -= r19;
1522 while (r19 >>= 1, r18 >>= 1);
1524 return r0;
1527 GLOBAL(udivsi3):
1528 pt/l LOCAL(udivsi3_dontadd), tr2
1529 pt/l LOCAL(udivsi3_loop), tr1
1530 ptabs/l r18, tr0
1531 movi 0, r0
1532 movi 1, r18
1533 addz.l r5, r63, r19
1534 addz.l r4, r63, r4
1535 shlli r19, 31, r19
1536 shlli r18, 31, r18
1537 LOCAL(udivsi3_loop):
1538 bgtu r19, r4, tr2
1539 or r0, r18, r0
1540 sub r4, r19, r4
1541 LOCAL(udivsi3_dontadd):
1542 shlri r18, 1, r18
1543 shlri r19, 1, r19
1544 bnei r18, 0, tr1
1545 blink tr0, r63
1546 #else
1547 GLOBAL(udivsi3):
1548 // inputs: r4,r5
1549 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1550 // result in r0.
1551 addz.l r5,r63,r22
1552 nsb r22,r0
1553 shlld r22,r0,r25
1554 shlri r25,48,r25
1555 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1556 sub r20,r25,r21
1557 mmulfx.w r21,r21,r19
1558 mshflo.w r21,r63,r21
1559 ptabs r18,tr0
1560 mmulfx.w r25,r19,r19
1561 sub r20,r0,r0
1562 /* bubble */
1563 msub.w r21,r19,r19
1564 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1565 before the msub.w, but we need a different value for
1566 r19 to keep errors under control. */
1567 mulu.l r4,r21,r18
1568 mmulfx.w r19,r19,r19
1569 shlli r21,15,r21
1570 shlrd r18,r0,r18
1571 mulu.l r18,r22,r20
1572 mmacnfx.wl r25,r19,r21
1573 /* bubble */
1574 sub r4,r20,r25
1576 mulu.l r25,r21,r19
1577 addi r0,14,r0
1578 /* bubble */
1579 shlrd r19,r0,r19
1580 mulu.l r19,r22,r20
1581 add r18,r19,r18
1582 /* bubble */
1583 sub.l r25,r20,r25
1585 mulu.l r25,r21,r19
1586 addz.l r25,r63,r25
1587 sub r25,r22,r25
1588 shlrd r19,r0,r19
1589 mulu.l r19,r22,r20
1590 addi r25,1,r25
1591 add r18,r19,r18
1593 cmpgt r25,r20,r25
1594 add.l r18,r25,r0
1595 blink tr0,r63
1596 #endif
1597 #elif defined (__SHMEDIA__)
1598 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1599 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1600 So use a short shmedia loop. */
1601 // clobbered: r20,r21,r25,tr0,tr1,tr2
1602 .mode SHmedia
1603 .section .text..SHmedia32,"ax"
1604 .align 2
1605 GLOBAL(udivsi3):
1606 pt/l LOCAL(udivsi3_dontsub), tr0
1607 pt/l LOCAL(udivsi3_loop), tr1
1608 ptabs/l r18,tr2
1609 shlli r5,32,r25
1610 addi r25,-1,r21
1611 addz.l r4,r63,r20
1612 LOCAL(udivsi3_loop):
1613 shlli r20,1,r20
1614 bgeu/u r21,r20,tr0
1615 sub r20,r21,r20
1616 LOCAL(udivsi3_dontsub):
1617 addi.l r25,-1,r25
1618 bnei r25,-32,tr1
1619 add.l r20,r63,r0
1620 blink tr2,r63
1621 #else /* ! defined (__SHMEDIA__) */
1622 LOCAL(div8):
1623 div1 r5,r4
1624 LOCAL(div7):
1625 div1 r5,r4; div1 r5,r4; div1 r5,r4
1626 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1628 LOCAL(divx4):
1629 div1 r5,r4; rotcl r0
1630 div1 r5,r4; rotcl r0
1631 div1 r5,r4; rotcl r0
1632 rts; div1 r5,r4
1634 GLOBAL(udivsi3):
1635 sts.l pr,@-r15
1636 extu.w r5,r0
1637 cmp/eq r5,r0
1638 #ifdef __sh1__
1639 bf LOCAL(large_divisor)
1640 #else
1641 bf/s LOCAL(large_divisor)
1642 #endif
1643 div0u
1644 swap.w r4,r0
1645 shlr16 r4
1646 bsr LOCAL(div8)
1647 shll16 r5
1648 bsr LOCAL(div7)
1649 div1 r5,r4
1650 xtrct r4,r0
1651 xtrct r0,r4
1652 bsr LOCAL(div8)
1653 swap.w r4,r4
1654 bsr LOCAL(div7)
1655 div1 r5,r4
1656 lds.l @r15+,pr
1657 xtrct r4,r0
1658 swap.w r0,r0
1659 rotcl r0
1661 shlr16 r5
1663 LOCAL(large_divisor):
1664 #ifdef __sh1__
1665 div0u
1666 #endif
1667 mov #0,r0
1668 xtrct r4,r0
1669 xtrct r0,r4
1670 bsr LOCAL(divx4)
1671 rotcl r0
1672 bsr LOCAL(divx4)
1673 rotcl r0
1674 bsr LOCAL(divx4)
1675 rotcl r0
1676 bsr LOCAL(divx4)
1677 rotcl r0
1678 lds.l @r15+,pr
1680 rotcl r0
1682 ENDFUNC(GLOBAL(udivsi3))
1683 #endif /* ! __SHMEDIA__ */
1684 #endif /* __SH4__ */
1685 #endif /* L_udivsi3 */
1687 #ifdef L_udivdi3
1688 #ifdef __SHMEDIA__
1689 .mode SHmedia
1690 .section .text..SHmedia32,"ax"
1691 .align 2
1692 .global GLOBAL(udivdi3)
1693 FUNC(GLOBAL(udivdi3))
1694 GLOBAL(udivdi3):
1695 HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1696 shlri r3,1,r4
1697 nsb r4,r22
1698 shlld r3,r22,r6
1699 shlri r6,49,r5
1700 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1701 sub r21,r5,r1
1702 mmulfx.w r1,r1,r4
1703 mshflo.w r1,r63,r1
1704 sub r63,r22,r20 // r63 == 64 % 64
1705 mmulfx.w r5,r4,r4
1706 pta LOCAL(large_divisor),tr0
1707 addi r20,32,r9
1708 msub.w r1,r4,r1
1709 madd.w r1,r1,r1
1710 mmulfx.w r1,r1,r4
1711 shlri r6,32,r7
1712 bgt/u r9,r63,tr0 // large_divisor
1713 mmulfx.w r5,r4,r4
1714 shlri r2,32+14,r19
1715 addi r22,-31,r0
1716 msub.w r1,r4,r1
1718 mulu.l r1,r7,r4
1719 addi r1,-3,r5
1720 mulu.l r5,r19,r5
1721 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1722 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1723 the case may be, %0000000000000000 000.11111111111, still */
1724 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1725 mulu.l r5,r3,r8
1726 mshalds.l r1,r21,r1
1727 shari r4,26,r4
1728 shlld r8,r0,r8
1729 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1730 sub r2,r8,r2
1731 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1733 shlri r2,22,r21
1734 mulu.l r21,r1,r21
1735 shlld r5,r0,r8
1736 addi r20,30-22,r0
1737 shlrd r21,r0,r21
1738 mulu.l r21,r3,r5
1739 add r8,r21,r8
1740 mcmpgt.l r21,r63,r21 // See Note 1
1741 addi r20,30,r0
1742 mshfhi.l r63,r21,r21
1743 sub r2,r5,r2
1744 andc r2,r21,r2
1746 /* small divisor: need a third divide step */
1747 mulu.l r2,r1,r7
1748 ptabs r18,tr0
1749 addi r2,1,r2
1750 shlrd r7,r0,r7
1751 mulu.l r7,r3,r5
1752 add r8,r7,r8
1753 sub r2,r3,r2
1754 cmpgt r2,r5,r5
1755 add r8,r5,r2
1756 /* could test r3 here to check for divide by zero. */
1757 blink tr0,r63
1759 LOCAL(large_divisor):
1760 mmulfx.w r5,r4,r4
1761 shlrd r2,r9,r25
1762 shlri r25,32,r8
1763 msub.w r1,r4,r1
1765 mulu.l r1,r7,r4
1766 addi r1,-3,r5
1767 mulu.l r5,r8,r5
1768 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1769 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1770 the case may be, %0000000000000000 000.11111111111, still */
1771 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1772 shlri r5,14-1,r8
1773 mulu.l r8,r7,r5
1774 mshalds.l r1,r21,r1
1775 shari r4,26,r4
1776 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1777 sub r25,r5,r25
1778 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1780 shlri r25,22,r21
1781 mulu.l r21,r1,r21
1782 pta LOCAL(no_lo_adj),tr0
1783 addi r22,32,r0
1784 shlri r21,40,r21
1785 mulu.l r21,r7,r5
1786 add r8,r21,r8
1787 shlld r2,r0,r2
1788 sub r25,r5,r25
1789 bgtu/u r7,r25,tr0 // no_lo_adj
1790 addi r8,1,r8
1791 sub r25,r7,r25
1792 LOCAL(no_lo_adj):
1793 mextr4 r2,r25,r2
1795 /* large_divisor: only needs a few adjustments. */
1796 mulu.l r8,r6,r5
1797 ptabs r18,tr0
1798 /* bubble */
1799 cmpgtu r5,r2,r5
1800 sub r8,r5,r2
1801 blink tr0,r63
1802 ENDFUNC(GLOBAL(udivdi3))
1803 /* Note 1: To shift the result of the second divide stage so that the result
1804 always fits into 32 bits, yet we still reduce the rest sufficiently
1805 would require a lot of instructions to do the shifts just right. Using
1806 the full 64 bit shift result to multiply with the divisor would require
1807 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1808 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1809 know that the rest after taking this partial result into account will
1810 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1811 upper 32 bits of the partial result are nonzero. */
1812 #endif /* __SHMEDIA__ */
1813 #endif /* L_udivdi3 */
1815 #ifdef L_divdi3
1816 #ifdef __SHMEDIA__
1817 .mode SHmedia
1818 .section .text..SHmedia32,"ax"
1819 .align 2
1820 .global GLOBAL(divdi3)
1821 FUNC(GLOBAL(divdi3))
1822 GLOBAL(divdi3):
1823 pta GLOBAL(udivdi3_internal),tr0
1824 shari r2,63,r22
1825 shari r3,63,r23
1826 xor r2,r22,r2
1827 xor r3,r23,r3
1828 sub r2,r22,r2
1829 sub r3,r23,r3
1830 beq/u r22,r23,tr0
1831 ptabs r18,tr1
1832 blink tr0,r18
1833 sub r63,r2,r2
1834 blink tr1,r63
1835 ENDFUNC(GLOBAL(divdi3))
1836 #endif /* __SHMEDIA__ */
1837 #endif /* L_divdi3 */
1839 #ifdef L_umoddi3
1840 #ifdef __SHMEDIA__
1841 .mode SHmedia
1842 .section .text..SHmedia32,"ax"
1843 .align 2
1844 .global GLOBAL(umoddi3)
1845 FUNC(GLOBAL(umoddi3))
1846 GLOBAL(umoddi3):
1847 HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1848 shlri r3,1,r4
1849 nsb r4,r22
1850 shlld r3,r22,r6
1851 shlri r6,49,r5
1852 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1853 sub r21,r5,r1
1854 mmulfx.w r1,r1,r4
1855 mshflo.w r1,r63,r1
1856 sub r63,r22,r20 // r63 == 64 % 64
1857 mmulfx.w r5,r4,r4
1858 pta LOCAL(large_divisor),tr0
1859 addi r20,32,r9
1860 msub.w r1,r4,r1
1861 madd.w r1,r1,r1
1862 mmulfx.w r1,r1,r4
1863 shlri r6,32,r7
1864 bgt/u r9,r63,tr0 // large_divisor
1865 mmulfx.w r5,r4,r4
1866 shlri r2,32+14,r19
1867 addi r22,-31,r0
1868 msub.w r1,r4,r1
1870 mulu.l r1,r7,r4
1871 addi r1,-3,r5
1872 mulu.l r5,r19,r5
1873 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1874 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1875 the case may be, %0000000000000000 000.11111111111, still */
1876 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1877 mulu.l r5,r3,r5
1878 mshalds.l r1,r21,r1
1879 shari r4,26,r4
1880 shlld r5,r0,r5
1881 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1882 sub r2,r5,r2
1883 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1885 shlri r2,22,r21
1886 mulu.l r21,r1,r21
1887 addi r20,30-22,r0
1888 /* bubble */ /* could test r3 here to check for divide by zero. */
1889 shlrd r21,r0,r21
1890 mulu.l r21,r3,r5
1891 mcmpgt.l r21,r63,r21 // See Note 1
1892 addi r20,30,r0
1893 mshfhi.l r63,r21,r21
1894 sub r2,r5,r2
1895 andc r2,r21,r2
1897 /* small divisor: need a third divide step */
1898 mulu.l r2,r1,r7
1899 ptabs r18,tr0
1900 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1901 shlrd r7,r0,r7
1902 mulu.l r7,r3,r5
1903 /* bubble */
1904 addi r8,1,r7
1905 cmpgt r7,r5,r7
1906 cmvne r7,r8,r2
1907 sub r2,r5,r2
1908 blink tr0,r63
1910 LOCAL(large_divisor):
1911 mmulfx.w r5,r4,r4
1912 shlrd r2,r9,r25
1913 shlri r25,32,r8
1914 msub.w r1,r4,r1
1916 mulu.l r1,r7,r4
1917 addi r1,-3,r5
1918 mulu.l r5,r8,r5
1919 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1920 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1921 the case may be, %0000000000000000 000.11111111111, still */
1922 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1923 shlri r5,14-1,r8
1924 mulu.l r8,r7,r5
1925 mshalds.l r1,r21,r1
1926 shari r4,26,r4
1927 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1928 sub r25,r5,r25
1929 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1931 shlri r25,22,r21
1932 mulu.l r21,r1,r21
1933 pta LOCAL(no_lo_adj),tr0
1934 addi r22,32,r0
1935 shlri r21,40,r21
1936 mulu.l r21,r7,r5
1937 add r8,r21,r8
1938 shlld r2,r0,r2
1939 sub r25,r5,r25
1940 bgtu/u r7,r25,tr0 // no_lo_adj
1941 addi r8,1,r8
1942 sub r25,r7,r25
1943 LOCAL(no_lo_adj):
1944 mextr4 r2,r25,r2
1946 /* large_divisor: only needs a few adjustments. */
1947 mulu.l r8,r6,r5
1948 ptabs r18,tr0
1949 add r2,r6,r7
1950 cmpgtu r5,r2,r8
1951 cmvne r8,r7,r2
1952 sub r2,r5,r2
1953 shlrd r2,r22,r2
1954 blink tr0,r63
1955 ENDFUNC(GLOBAL(umoddi3))
1956 /* Note 1: To shift the result of the second divide stage so that the result
1957 always fits into 32 bits, yet we still reduce the rest sufficiently
1958 would require a lot of instructions to do the shifts just right. Using
1959 the full 64 bit shift result to multiply with the divisor would require
1960 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1961 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1962 know that the rest after taking this partial result into account will
1963 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1964 upper 32 bits of the partial result are nonzero. */
1965 #endif /* __SHMEDIA__ */
1966 #endif /* L_umoddi3 */
1968 #ifdef L_moddi3
1969 #ifdef __SHMEDIA__
1970 .mode SHmedia
1971 .section .text..SHmedia32,"ax"
1972 .align 2
1973 .global GLOBAL(moddi3)
1974 FUNC(GLOBAL(moddi3))
1975 GLOBAL(moddi3):
1976 pta GLOBAL(umoddi3_internal),tr0
1977 shari r2,63,r22
1978 shari r3,63,r23
1979 xor r2,r22,r2
1980 xor r3,r23,r3
1981 sub r2,r22,r2
1982 sub r3,r23,r3
1983 beq/u r22,r63,tr0
1984 ptabs r18,tr1
1985 blink tr0,r18
1986 sub r63,r2,r2
1987 blink tr1,r63
1988 ENDFUNC(GLOBAL(moddi3))
1989 #endif /* __SHMEDIA__ */
1990 #endif /* L_moddi3 */
1992 #ifdef L_set_fpscr
1993 #if !defined (__SH2A_NOFPU__)
1994 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1995 #ifdef __SH5__
1996 .mode SHcompact
1997 #endif
1998 .global GLOBAL(set_fpscr)
1999 HIDDEN_FUNC(GLOBAL(set_fpscr))
2000 GLOBAL(set_fpscr):
2001 lds r4,fpscr
2002 #ifdef __PIC__
2003 mov.l r12,@-r15
2004 mova LOCAL(set_fpscr_L0),r0
2005 mov.l LOCAL(set_fpscr_L0),r12
2006 add r0,r12
2007 mov.l LOCAL(set_fpscr_L1),r0
2008 mov.l @(r0,r12),r1
2009 mov.l @r15+,r12
2010 #else
2011 mov.l LOCAL(set_fpscr_L1),r1
2012 #endif
2013 swap.w r4,r0
2014 or #24,r0
2015 #ifndef FMOVD_WORKS
2016 xor #16,r0
2017 #endif
2018 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2019 swap.w r0,r3
2020 mov.l r3,@(4,r1)
2021 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2022 swap.w r0,r2
2023 mov.l r2,@r1
2024 #endif
2025 #ifndef FMOVD_WORKS
2026 xor #8,r0
2027 #else
2028 xor #24,r0
2029 #endif
2030 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2031 swap.w r0,r2
2033 mov.l r2,@r1
2034 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2035 swap.w r0,r3
2037 mov.l r3,@(4,r1)
2038 #endif
2039 .align 2
2040 #ifdef __PIC__
2041 LOCAL(set_fpscr_L0):
2042 .long _GLOBAL_OFFSET_TABLE_
2043 LOCAL(set_fpscr_L1):
2044 .long GLOBAL(fpscr_values@GOT)
2045 #else
2046 LOCAL(set_fpscr_L1):
2047 .long GLOBAL(fpscr_values)
2048 #endif
2050 ENDFUNC(GLOBAL(set_fpscr))
2051 #ifndef NO_FPSCR_VALUES
2052 #ifdef __ELF__
2053 .comm GLOBAL(fpscr_values),8,4
2054 #else
2055 .comm GLOBAL(fpscr_values),8
2056 #endif /* ELF */
2057 #endif /* NO_FPSCR_VALUES */
2058 #endif /* SH2E / SH3E / SH4 */
2059 #endif /* __SH2A_NOFPU__ */
2060 #endif /* L_set_fpscr */
2061 #ifdef L_ic_invalidate
2062 #if __SH5__ == 32
2063 .mode SHmedia
2064 .section .text..SHmedia32,"ax"
2065 .align 2
2066 .global GLOBAL(init_trampoline)
2067 HIDDEN_FUNC(GLOBAL(init_trampoline))
2068 GLOBAL(init_trampoline):
2069 st.l r0,8,r2
2070 #ifdef __LITTLE_ENDIAN__
2071 movi 9,r20
2072 shori 0x402b,r20
2073 shori 0xd101,r20
2074 shori 0xd002,r20
2075 #else
2076 movi 0xffffffffffffd002,r20
2077 shori 0xd101,r20
2078 shori 0x402b,r20
2079 shori 9,r20
2080 #endif
2081 st.q r0,0,r20
2082 st.l r0,12,r3
2083 ENDFUNC(GLOBAL(init_trampoline))
2084 .global GLOBAL(ic_invalidate)
2085 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2086 GLOBAL(ic_invalidate):
2087 ocbwb r0,0
2088 synco
2089 icbi r0, 0
2090 ptabs r18, tr0
2091 synci
2092 blink tr0, r63
2093 ENDFUNC(GLOBAL(ic_invalidate))
2094 #elif defined(__SH4A__)
2095 .global GLOBAL(ic_invalidate)
2096 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2097 GLOBAL(ic_invalidate):
2098 ocbwb @r4
2099 synco
2101 icbi @r4
2102 ENDFUNC(GLOBAL(ic_invalidate))
2103 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2104 /* For system code, we use ic_invalidate_line_i, but user code
2105 needs a different mechanism. A kernel call is generally not
2106 available, and it would also be slow. Different SH4 variants use
2107 different sizes and associativities of the Icache. We use a small
2108 bit of dispatch code that can be put hidden in every shared object,
2109 which calls the actual processor-specific invalidation code in a
2110 separate module.
2111 Or if you have operating system support, the OS could mmap the
2112 procesor-specific code from a single page, since it is highly
2113 repetitive. */
2114 .global GLOBAL(ic_invalidate)
2115 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2116 GLOBAL(ic_invalidate):
2117 mov.l 0f,r1
2118 #ifdef __pic__
2119 mova 0f,r0
2120 mov.l 1f,r2
2121 add r1,r0
2122 mov.l @(r0,r2),r1
2123 #endif
2124 ocbwb @r4
2125 mov.l @(8,r1),r0
2126 sub r1,r4
2127 and r4,r0
2128 add r1,r0
2129 jmp @r0
2130 mov.l @(4,r1),r0
2131 #ifndef __pic__
2132 0: .long GLOBAL(ic_invalidate_array)
2133 #else /* __pic__ */
2134 .global GLOBAL(ic_invalidate_array)
2135 /* ??? Why won't the assembler allow to add these two constants? */
2136 0: .long _GLOBAL_OFFSET_TABLE_
2137 1: .long GLOBAL(ic_invalidate_array)@GOT
2138 ENDFUNC(GLOBAL(ic_invalidate))
2139 #endif /* __pic__ */
2140 #endif /* SH4 */
2141 #endif /* L_ic_invalidate */
2143 #ifdef L_ic_invalidate_array
2144 #if defined(__SH4A__)
2145 /* This is needed when an SH4 dso with trampolines is used on SH4A. */
2146 .global GLOBAL(ic_invalidate_array)
2147 FUNC(GLOBAL(ic_invalidate_array))
2148 GLOBAL(ic_invalidate_array):
2149 add r1,r4
2150 synco
2152 icbi @r4
2153 .long 0
2154 ENDFUNC(GLOBAL(ic_invalidate_array))
2155 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2156 .global GLOBAL(ic_invalidate_array)
2157 .p2align 5
2158 FUNC(GLOBAL(ic_invalidate_array))
2159 /* This must be aligned to the beginning of a cache line. */
2160 GLOBAL(ic_invalidate_array):
2161 #ifndef WAYS
2162 #define WAYS 4
2163 #define WAY_SIZE 0x4000
2164 #endif
2165 #if WAYS == 1
2166 .rept WAY_SIZE * WAYS / 32
2169 .rept 7
2170 .long WAY_SIZE - 32
2171 .endr
2172 .endr
2173 #elif WAYS <= 6
2174 .rept WAY_SIZE * WAYS / 32
2175 braf r0
2176 add #-8,r0
2177 .long WAY_SIZE + 8
2178 .long WAY_SIZE - 32
2179 .rept WAYS-2
2180 braf r0
2182 .endr
2183 .rept 7 - WAYS
2186 .endr
2187 .endr
2188 #else /* WAYS > 6 */
2189 /* This variant needs two different pages for mmap-ing. */
2190 .rept WAYS-1
2191 .rept WAY_SIZE / 32
2192 braf r0
2194 .long WAY_SIZE
2195 .rept 6
2196 .long WAY_SIZE - 32
2197 .endr
2198 .endr
2199 .endr
2200 .rept WAY_SIZE / 32
2202 .rept 15
2204 .endr
2205 .endr
2206 #endif /* WAYS */
2207 ENDFUNC(GLOBAL(ic_invalidate_array))
2208 #endif /* SH4 */
2209 #endif /* L_ic_invalidate_array */
2211 #if defined (__SH5__) && __SH5__ == 32
2212 #ifdef L_shcompact_call_trampoline
2213 .section .rodata
2214 .align 1
2215 LOCAL(ct_main_table):
2216 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2217 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2218 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2219 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2220 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2221 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2222 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2223 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2224 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2225 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2226 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2227 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2228 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2229 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2230 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2231 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2232 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2233 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2234 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2235 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2236 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2237 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2238 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2239 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2240 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2241 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2242 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2243 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2244 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2245 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2246 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2247 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2248 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2249 .mode SHmedia
2250 .section .text..SHmedia32, "ax"
2251 .align 2
2253 /* This function loads 64-bit general-purpose registers from the
2254 stack, from a memory address contained in them or from an FP
2255 register, according to a cookie passed in r1. Its execution
2256 time is linear on the number of registers that actually have
2257 to be copied. See sh.h for details on the actual bit pattern.
2259 The function to be called is passed in r0. If a 32-bit return
2260 value is expected, the actual function will be tail-called,
2261 otherwise the return address will be stored in r10 (that the
2262 caller should expect to be clobbered) and the return value
2263 will be expanded into r2/r3 upon return. */
2265 .global GLOBAL(GCC_shcompact_call_trampoline)
2266 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2267 GLOBAL(GCC_shcompact_call_trampoline):
2268 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2269 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2270 pt/l LOCAL(ct_loop), tr1
2271 addz.l r1, r63, r1
2272 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2273 LOCAL(ct_loop):
2274 nsb r1, r28
2275 shlli r28, 1, r29
2276 ldx.w r0, r29, r30
2277 LOCAL(ct_main_label):
2278 ptrel/l r30, tr2
2279 blink tr2, r63
2280 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2281 /* It must be dr0, so just do it. */
2282 fmov.dq dr0, r2
2283 movi 7, r30
2284 shlli r30, 29, r31
2285 andc r1, r31, r1
2286 blink tr1, r63
2287 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2288 /* It is either dr0 or dr2. */
2289 movi 7, r30
2290 shlri r1, 26, r32
2291 shlli r30, 26, r31
2292 andc r1, r31, r1
2293 fmov.dq dr0, r3
2294 beqi/l r32, 4, tr1
2295 fmov.dq dr2, r3
2296 blink tr1, r63
2297 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2298 shlri r1, 23 - 3, r34
2299 andi r34, 3 << 3, r33
2300 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2301 LOCAL(ct_r4_fp_base):
2302 ptrel/l r32, tr2
2303 movi 7, r30
2304 shlli r30, 23, r31
2305 andc r1, r31, r1
2306 blink tr2, r63
2307 LOCAL(ct_r4_fp_copy):
2308 fmov.dq dr0, r4
2309 blink tr1, r63
2310 fmov.dq dr2, r4
2311 blink tr1, r63
2312 fmov.dq dr4, r4
2313 blink tr1, r63
2314 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2315 shlri r1, 20 - 3, r34
2316 andi r34, 3 << 3, r33
2317 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2318 LOCAL(ct_r5_fp_base):
2319 ptrel/l r32, tr2
2320 movi 7, r30
2321 shlli r30, 20, r31
2322 andc r1, r31, r1
2323 blink tr2, r63
2324 LOCAL(ct_r5_fp_copy):
2325 fmov.dq dr0, r5
2326 blink tr1, r63
2327 fmov.dq dr2, r5
2328 blink tr1, r63
2329 fmov.dq dr4, r5
2330 blink tr1, r63
2331 fmov.dq dr6, r5
2332 blink tr1, r63
2333 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2334 /* It must be dr8. */
2335 fmov.dq dr8, r6
2336 movi 15, r30
2337 shlli r30, 16, r31
2338 andc r1, r31, r1
2339 blink tr1, r63
2340 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2341 shlri r1, 16 - 3, r34
2342 andi r34, 3 << 3, r33
2343 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2344 LOCAL(ct_r6_fp_base):
2345 ptrel/l r32, tr2
2346 movi 7, r30
2347 shlli r30, 16, r31
2348 andc r1, r31, r1
2349 blink tr2, r63
2350 LOCAL(ct_r6_fp_copy):
2351 fmov.dq dr0, r6
2352 blink tr1, r63
2353 fmov.dq dr2, r6
2354 blink tr1, r63
2355 fmov.dq dr4, r6
2356 blink tr1, r63
2357 fmov.dq dr6, r6
2358 blink tr1, r63
2359 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2360 /* It is either dr8 or dr10. */
2361 movi 15 << 12, r31
2362 shlri r1, 12, r32
2363 andc r1, r31, r1
2364 fmov.dq dr8, r7
2365 beqi/l r32, 8, tr1
2366 fmov.dq dr10, r7
2367 blink tr1, r63
2368 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2369 shlri r1, 12 - 3, r34
2370 andi r34, 3 << 3, r33
2371 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2372 LOCAL(ct_r7_fp_base):
2373 ptrel/l r32, tr2
2374 movi 7 << 12, r31
2375 andc r1, r31, r1
2376 blink tr2, r63
2377 LOCAL(ct_r7_fp_copy):
2378 fmov.dq dr0, r7
2379 blink tr1, r63
2380 fmov.dq dr2, r7
2381 blink tr1, r63
2382 fmov.dq dr4, r7
2383 blink tr1, r63
2384 fmov.dq dr6, r7
2385 blink tr1, r63
2386 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2387 /* It is either dr8 or dr10. */
2388 movi 15 << 8, r31
2389 andi r1, 1 << 8, r32
2390 andc r1, r31, r1
2391 fmov.dq dr8, r8
2392 beq/l r32, r63, tr1
2393 fmov.dq dr10, r8
2394 blink tr1, r63
2395 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2396 shlri r1, 8 - 3, r34
2397 andi r34, 3 << 3, r33
2398 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2399 LOCAL(ct_r8_fp_base):
2400 ptrel/l r32, tr2
2401 movi 7 << 8, r31
2402 andc r1, r31, r1
2403 blink tr2, r63
2404 LOCAL(ct_r8_fp_copy):
2405 fmov.dq dr0, r8
2406 blink tr1, r63
2407 fmov.dq dr2, r8
2408 blink tr1, r63
2409 fmov.dq dr4, r8
2410 blink tr1, r63
2411 fmov.dq dr6, r8
2412 blink tr1, r63
2413 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2414 /* It is either dr8 or dr10. */
2415 movi 15 << 4, r31
2416 andi r1, 1 << 4, r32
2417 andc r1, r31, r1
2418 fmov.dq dr8, r9
2419 beq/l r32, r63, tr1
2420 fmov.dq dr10, r9
2421 blink tr1, r63
2422 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2423 shlri r1, 4 - 3, r34
2424 andi r34, 3 << 3, r33
2425 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2426 LOCAL(ct_r9_fp_base):
2427 ptrel/l r32, tr2
2428 movi 7 << 4, r31
2429 andc r1, r31, r1
2430 blink tr2, r63
2431 LOCAL(ct_r9_fp_copy):
2432 fmov.dq dr0, r9
2433 blink tr1, r63
2434 fmov.dq dr2, r9
2435 blink tr1, r63
2436 fmov.dq dr4, r9
2437 blink tr1, r63
2438 fmov.dq dr6, r9
2439 blink tr1, r63
2440 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2441 pt/l LOCAL(ct_r2_load), tr2
2442 movi 3, r30
2443 shlli r30, 29, r31
2444 and r1, r31, r32
2445 andc r1, r31, r1
2446 beq/l r31, r32, tr2
2447 addi.l r2, 8, r3
2448 ldx.q r2, r63, r2
2449 /* Fall through. */
2450 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2451 pt/l LOCAL(ct_r3_load), tr2
2452 movi 3, r30
2453 shlli r30, 26, r31
2454 and r1, r31, r32
2455 andc r1, r31, r1
2456 beq/l r31, r32, tr2
2457 addi.l r3, 8, r4
2458 ldx.q r3, r63, r3
2459 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2460 pt/l LOCAL(ct_r4_load), tr2
2461 movi 3, r30
2462 shlli r30, 23, r31
2463 and r1, r31, r32
2464 andc r1, r31, r1
2465 beq/l r31, r32, tr2
2466 addi.l r4, 8, r5
2467 ldx.q r4, r63, r4
2468 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2469 pt/l LOCAL(ct_r5_load), tr2
2470 movi 3, r30
2471 shlli r30, 20, r31
2472 and r1, r31, r32
2473 andc r1, r31, r1
2474 beq/l r31, r32, tr2
2475 addi.l r5, 8, r6
2476 ldx.q r5, r63, r5
2477 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2478 pt/l LOCAL(ct_r6_load), tr2
2479 movi 3 << 16, r31
2480 and r1, r31, r32
2481 andc r1, r31, r1
2482 beq/l r31, r32, tr2
2483 addi.l r6, 8, r7
2484 ldx.q r6, r63, r6
2485 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2486 pt/l LOCAL(ct_r7_load), tr2
2487 movi 3 << 12, r31
2488 and r1, r31, r32
2489 andc r1, r31, r1
2490 beq/l r31, r32, tr2
2491 addi.l r7, 8, r8
2492 ldx.q r7, r63, r7
2493 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2494 pt/l LOCAL(ct_r8_load), tr2
2495 movi 3 << 8, r31
2496 and r1, r31, r32
2497 andc r1, r31, r1
2498 beq/l r31, r32, tr2
2499 addi.l r8, 8, r9
2500 ldx.q r8, r63, r8
2501 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2502 pt/l LOCAL(ct_check_tramp), tr2
2503 ldx.q r9, r63, r9
2504 blink tr2, r63
2505 LOCAL(ct_r2_load):
2506 ldx.q r2, r63, r2
2507 blink tr1, r63
2508 LOCAL(ct_r3_load):
2509 ldx.q r3, r63, r3
2510 blink tr1, r63
2511 LOCAL(ct_r4_load):
2512 ldx.q r4, r63, r4
2513 blink tr1, r63
2514 LOCAL(ct_r5_load):
2515 ldx.q r5, r63, r5
2516 blink tr1, r63
2517 LOCAL(ct_r6_load):
2518 ldx.q r6, r63, r6
2519 blink tr1, r63
2520 LOCAL(ct_r7_load):
2521 ldx.q r7, r63, r7
2522 blink tr1, r63
2523 LOCAL(ct_r8_load):
2524 ldx.q r8, r63, r8
2525 blink tr1, r63
2526 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2527 movi 1, r30
2528 ldx.q r15, r63, r2
2529 shlli r30, 29, r31
2530 addi.l r15, 8, r15
2531 andc r1, r31, r1
2532 blink tr1, r63
2533 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2534 movi 1, r30
2535 ldx.q r15, r63, r3
2536 shlli r30, 26, r31
2537 addi.l r15, 8, r15
2538 andc r1, r31, r1
2539 blink tr1, r63
2540 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2541 movi 1, r30
2542 ldx.q r15, r63, r4
2543 shlli r30, 23, r31
2544 addi.l r15, 8, r15
2545 andc r1, r31, r1
2546 blink tr1, r63
2547 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2548 movi 1, r30
2549 ldx.q r15, r63, r5
2550 shlli r30, 20, r31
2551 addi.l r15, 8, r15
2552 andc r1, r31, r1
2553 blink tr1, r63
2554 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2555 movi 1, r30
2556 ldx.q r15, r63, r6
2557 shlli r30, 16, r31
2558 addi.l r15, 8, r15
2559 andc r1, r31, r1
2560 blink tr1, r63
2561 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2562 ldx.q r15, r63, r7
2563 movi 1 << 12, r31
2564 addi.l r15, 8, r15
2565 andc r1, r31, r1
2566 blink tr1, r63
2567 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2568 ldx.q r15, r63, r8
2569 movi 1 << 8, r31
2570 addi.l r15, 8, r15
2571 andc r1, r31, r1
2572 blink tr1, r63
2573 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2574 andi r1, 7 << 1, r30
2575 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2576 shlli r30, 2, r31
2577 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2578 sub.l r32, r31, r33
2579 ptabs/l r33, tr2
2580 blink tr2, r63
2581 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2582 ldx.q r15, r63, r3
2583 addi.l r15, 8, r15
2584 ldx.q r15, r63, r4
2585 addi.l r15, 8, r15
2586 ldx.q r15, r63, r5
2587 addi.l r15, 8, r15
2588 ldx.q r15, r63, r6
2589 addi.l r15, 8, r15
2590 ldx.q r15, r63, r7
2591 addi.l r15, 8, r15
2592 ldx.q r15, r63, r8
2593 addi.l r15, 8, r15
2594 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2595 ldx.q r15, r63, r9
2596 addi.l r15, 8, r15
2597 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2598 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2599 pt/u LOCAL(ct_ret_wide), tr2
2600 andi r1, 1, r1
2601 bne/u r1, r63, tr2
2602 LOCAL(ct_call_func): /* Just branch to the function. */
2603 blink tr0, r63
2604 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2605 64-bit return value. */
2606 add.l r18, r63, r10
2607 blink tr0, r18
2608 ptabs r10, tr0
2609 #if __LITTLE_ENDIAN__
2610 shari r2, 32, r3
2611 add.l r2, r63, r2
2612 #else
2613 add.l r2, r63, r3
2614 shari r2, 32, r2
2615 #endif
2616 blink tr0, r63
2618 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2619 #endif /* L_shcompact_call_trampoline */
2621 #ifdef L_shcompact_return_trampoline
2622 /* This function does the converse of the code in `ret_wide'
2623 above. It is tail-called by SHcompact functions returning
2624 64-bit non-floating-point values, to pack the 32-bit values in
2625 r2 and r3 into r2. */
2627 .mode SHmedia
2628 .section .text..SHmedia32, "ax"
2629 .align 2
2630 .global GLOBAL(GCC_shcompact_return_trampoline)
2631 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2632 GLOBAL(GCC_shcompact_return_trampoline):
2633 ptabs/l r18, tr0
2634 #if __LITTLE_ENDIAN__
2635 addz.l r2, r63, r2
2636 shlli r3, 32, r3
2637 #else
2638 addz.l r3, r63, r3
2639 shlli r2, 32, r2
2640 #endif
2641 or r3, r2, r2
2642 blink tr0, r63
2644 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2645 #endif /* L_shcompact_return_trampoline */
2647 #ifdef L_shcompact_incoming_args
2648 .section .rodata
2649 .align 1
2650 LOCAL(ia_main_table):
2651 .word 1 /* Invalid, just loop */
2652 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2653 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2654 .word 1 /* Invalid, just loop */
2655 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2656 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2657 .word 1 /* Invalid, just loop */
2658 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2659 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2660 .word 1 /* Invalid, just loop */
2661 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2662 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2663 .word 1 /* Invalid, just loop */
2664 .word 1 /* Invalid, just loop */
2665 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2666 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2667 .word 1 /* Invalid, just loop */
2668 .word 1 /* Invalid, just loop */
2669 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2670 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2671 .word 1 /* Invalid, just loop */
2672 .word 1 /* Invalid, just loop */
2673 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2674 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2675 .word 1 /* Invalid, just loop */
2676 .word 1 /* Invalid, just loop */
2677 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2678 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2679 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2680 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2681 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2682 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2683 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2684 .mode SHmedia
2685 .section .text..SHmedia32, "ax"
2686 .align 2
2688 /* This function stores 64-bit general-purpose registers back in
2689 the stack, and loads the address in which each register
2690 was stored into itself. The lower 32 bits of r17 hold the address
2691 to begin storing, and the upper 32 bits of r17 hold the cookie.
2692 Its execution time is linear on the
2693 number of registers that actually have to be copied, and it is
2694 optimized for structures larger than 64 bits, as opposed to
2695 individual `long long' arguments. See sh.h for details on the
2696 actual bit pattern. */
2698 .global GLOBAL(GCC_shcompact_incoming_args)
2699 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2700 GLOBAL(GCC_shcompact_incoming_args):
2701 ptabs/l r18, tr0 /* Prepare to return. */
2702 shlri r17, 32, r0 /* Load the cookie. */
2703 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2704 pt/l LOCAL(ia_loop), tr1
2705 add.l r17, r63, r17
2706 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2707 LOCAL(ia_loop):
2708 nsb r0, r36
2709 shlli r36, 1, r37
2710 ldx.w r43, r37, r38
2711 LOCAL(ia_main_label):
2712 ptrel/l r38, tr2
2713 blink tr2, r63
2714 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2715 movi 3, r38
2716 shlli r38, 29, r39
2717 and r0, r39, r40
2718 andc r0, r39, r0
2719 stx.q r17, r63, r2
2720 add.l r17, r63, r2
2721 addi.l r17, 8, r17
2722 beq/u r39, r40, tr1
2723 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2724 movi 3, r38
2725 shlli r38, 26, r39
2726 and r0, r39, r40
2727 andc r0, r39, r0
2728 stx.q r17, r63, r3
2729 add.l r17, r63, r3
2730 addi.l r17, 8, r17
2731 beq/u r39, r40, tr1
2732 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2733 movi 3, r38
2734 shlli r38, 23, r39
2735 and r0, r39, r40
2736 andc r0, r39, r0
2737 stx.q r17, r63, r4
2738 add.l r17, r63, r4
2739 addi.l r17, 8, r17
2740 beq/u r39, r40, tr1
2741 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2742 movi 3, r38
2743 shlli r38, 20, r39
2744 and r0, r39, r40
2745 andc r0, r39, r0
2746 stx.q r17, r63, r5
2747 add.l r17, r63, r5
2748 addi.l r17, 8, r17
2749 beq/u r39, r40, tr1
2750 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2751 movi 3, r38
2752 shlli r38, 16, r39
2753 and r0, r39, r40
2754 andc r0, r39, r0
2755 stx.q r17, r63, r6
2756 add.l r17, r63, r6
2757 addi.l r17, 8, r17
2758 beq/u r39, r40, tr1
2759 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2760 movi 3 << 12, r39
2761 and r0, r39, r40
2762 andc r0, r39, r0
2763 stx.q r17, r63, r7
2764 add.l r17, r63, r7
2765 addi.l r17, 8, r17
2766 beq/u r39, r40, tr1
2767 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2768 movi 3 << 8, r39
2769 and r0, r39, r40
2770 andc r0, r39, r0
2771 stx.q r17, r63, r8
2772 add.l r17, r63, r8
2773 addi.l r17, 8, r17
2774 beq/u r39, r40, tr1
2775 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2776 stx.q r17, r63, r9
2777 add.l r17, r63, r9
2778 blink tr0, r63
2779 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2780 movi 1, r38
2781 shlli r38, 29, r39
2782 andc r0, r39, r0
2783 stx.q r17, r63, r2
2784 addi.l r17, 8, r17
2785 blink tr1, r63
2786 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2787 movi 1, r38
2788 shlli r38, 26, r39
2789 andc r0, r39, r0
2790 stx.q r17, r63, r3
2791 addi.l r17, 8, r17
2792 blink tr1, r63
2793 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2794 movi 1, r38
2795 shlli r38, 23, r39
2796 andc r0, r39, r0
2797 stx.q r17, r63, r4
2798 addi.l r17, 8, r17
2799 blink tr1, r63
2800 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2801 movi 1, r38
2802 shlli r38, 20, r39
2803 andc r0, r39, r0
2804 stx.q r17, r63, r5
2805 addi.l r17, 8, r17
2806 blink tr1, r63
2807 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2808 movi 1, r38
2809 shlli r38, 16, r39
2810 andc r0, r39, r0
2811 stx.q r17, r63, r6
2812 addi.l r17, 8, r17
2813 blink tr1, r63
2814 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2815 movi 1 << 12, r39
2816 andc r0, r39, r0
2817 stx.q r17, r63, r7
2818 addi.l r17, 8, r17
2819 blink tr1, r63
2820 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2821 movi 1 << 8, r39
2822 andc r0, r39, r0
2823 stx.q r17, r63, r8
2824 addi.l r17, 8, r17
2825 blink tr1, r63
2826 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2827 andi r0, 7 << 1, r38
2828 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2829 shlli r38, 2, r39
2830 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2831 sub.l r40, r39, r41
2832 ptabs/l r41, tr2
2833 blink tr2, r63
2834 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2835 stx.q r17, r63, r3
2836 addi.l r17, 8, r17
2837 stx.q r17, r63, r4
2838 addi.l r17, 8, r17
2839 stx.q r17, r63, r5
2840 addi.l r17, 8, r17
2841 stx.q r17, r63, r6
2842 addi.l r17, 8, r17
2843 stx.q r17, r63, r7
2844 addi.l r17, 8, r17
2845 stx.q r17, r63, r8
2846 addi.l r17, 8, r17
2847 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2848 stx.q r17, r63, r9
2849 LOCAL(ia_return): /* Return. */
2850 blink tr0, r63
2851 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2852 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2853 #endif /* L_shcompact_incoming_args */
2854 #endif
2855 #if __SH5__
2856 #ifdef L_nested_trampoline
2857 #if __SH5__ == 32
2858 .section .text..SHmedia32,"ax"
2859 #else
2860 .text
2861 #endif
2862 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2863 .global GLOBAL(GCC_nested_trampoline)
2864 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2865 GLOBAL(GCC_nested_trampoline):
2866 .mode SHmedia
2867 ptrel/u r63, tr0
2868 gettr tr0, r0
2869 #if __SH5__ == 64
2870 ld.q r0, 24, r1
2871 #else
2872 ld.l r0, 24, r1
2873 #endif
2874 ptabs/l r1, tr1
2875 #if __SH5__ == 64
2876 ld.q r0, 32, r1
2877 #else
2878 ld.l r0, 28, r1
2879 #endif
2880 blink tr1, r63
2882 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2883 #endif /* L_nested_trampoline */
2884 #endif /* __SH5__ */
2885 #if __SH5__ == 32
2886 #ifdef L_push_pop_shmedia_regs
2887 .section .text..SHmedia32,"ax"
2888 .mode SHmedia
2889 .align 2
2890 #ifndef __SH4_NOFPU__
2891 .global GLOBAL(GCC_push_shmedia_regs)
2892 FUNC(GLOBAL(GCC_push_shmedia_regs))
2893 GLOBAL(GCC_push_shmedia_regs):
2894 addi.l r15, -14*8, r15
2895 fst.d r15, 13*8, dr62
2896 fst.d r15, 12*8, dr60
2897 fst.d r15, 11*8, dr58
2898 fst.d r15, 10*8, dr56
2899 fst.d r15, 9*8, dr54
2900 fst.d r15, 8*8, dr52
2901 fst.d r15, 7*8, dr50
2902 fst.d r15, 6*8, dr48
2903 fst.d r15, 5*8, dr46
2904 fst.d r15, 4*8, dr44
2905 fst.d r15, 3*8, dr42
2906 fst.d r15, 2*8, dr40
2907 fst.d r15, 1*8, dr38
2908 fst.d r15, 0*8, dr36
2909 #else /* ! __SH4_NOFPU__ */
2910 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2911 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2912 GLOBAL(GCC_push_shmedia_regs_nofpu):
2913 #endif /* ! __SH4_NOFPU__ */
2914 ptabs/l r18, tr0
2915 addi.l r15, -27*8, r15
2916 gettr tr7, r62
2917 gettr tr6, r61
2918 gettr tr5, r60
2919 st.q r15, 26*8, r62
2920 st.q r15, 25*8, r61
2921 st.q r15, 24*8, r60
2922 st.q r15, 23*8, r59
2923 st.q r15, 22*8, r58
2924 st.q r15, 21*8, r57
2925 st.q r15, 20*8, r56
2926 st.q r15, 19*8, r55
2927 st.q r15, 18*8, r54
2928 st.q r15, 17*8, r53
2929 st.q r15, 16*8, r52
2930 st.q r15, 15*8, r51
2931 st.q r15, 14*8, r50
2932 st.q r15, 13*8, r49
2933 st.q r15, 12*8, r48
2934 st.q r15, 11*8, r47
2935 st.q r15, 10*8, r46
2936 st.q r15, 9*8, r45
2937 st.q r15, 8*8, r44
2938 st.q r15, 7*8, r35
2939 st.q r15, 6*8, r34
2940 st.q r15, 5*8, r33
2941 st.q r15, 4*8, r32
2942 st.q r15, 3*8, r31
2943 st.q r15, 2*8, r30
2944 st.q r15, 1*8, r29
2945 st.q r15, 0*8, r28
2946 blink tr0, r63
2947 #ifndef __SH4_NOFPU__
2948 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2949 #else
2950 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2951 #endif
2952 #ifndef __SH4_NOFPU__
2953 .global GLOBAL(GCC_pop_shmedia_regs)
2954 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2955 GLOBAL(GCC_pop_shmedia_regs):
2956 pt .L0, tr1
2957 movi 41*8, r0
2958 fld.d r15, 40*8, dr62
2959 fld.d r15, 39*8, dr60
2960 fld.d r15, 38*8, dr58
2961 fld.d r15, 37*8, dr56
2962 fld.d r15, 36*8, dr54
2963 fld.d r15, 35*8, dr52
2964 fld.d r15, 34*8, dr50
2965 fld.d r15, 33*8, dr48
2966 fld.d r15, 32*8, dr46
2967 fld.d r15, 31*8, dr44
2968 fld.d r15, 30*8, dr42
2969 fld.d r15, 29*8, dr40
2970 fld.d r15, 28*8, dr38
2971 fld.d r15, 27*8, dr36
2972 blink tr1, r63
2973 #else /* ! __SH4_NOFPU__ */
2974 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2975 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2976 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2977 #endif /* ! __SH4_NOFPU__ */
2978 movi 27*8, r0
2979 .L0:
2980 ptabs r18, tr0
2981 ld.q r15, 26*8, r62
2982 ld.q r15, 25*8, r61
2983 ld.q r15, 24*8, r60
2984 ptabs r62, tr7
2985 ptabs r61, tr6
2986 ptabs r60, tr5
2987 ld.q r15, 23*8, r59
2988 ld.q r15, 22*8, r58
2989 ld.q r15, 21*8, r57
2990 ld.q r15, 20*8, r56
2991 ld.q r15, 19*8, r55
2992 ld.q r15, 18*8, r54
2993 ld.q r15, 17*8, r53
2994 ld.q r15, 16*8, r52
2995 ld.q r15, 15*8, r51
2996 ld.q r15, 14*8, r50
2997 ld.q r15, 13*8, r49
2998 ld.q r15, 12*8, r48
2999 ld.q r15, 11*8, r47
3000 ld.q r15, 10*8, r46
3001 ld.q r15, 9*8, r45
3002 ld.q r15, 8*8, r44
3003 ld.q r15, 7*8, r35
3004 ld.q r15, 6*8, r34
3005 ld.q r15, 5*8, r33
3006 ld.q r15, 4*8, r32
3007 ld.q r15, 3*8, r31
3008 ld.q r15, 2*8, r30
3009 ld.q r15, 1*8, r29
3010 ld.q r15, 0*8, r28
3011 add.l r15, r0, r15
3012 blink tr0, r63
3014 #ifndef __SH4_NOFPU__
3015 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
3016 #else
3017 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3018 #endif
3019 #endif /* __SH5__ == 32 */
3020 #endif /* L_push_pop_shmedia_regs */
3022 #if __SH5__
3023 #ifdef L_div_table
3024 #if defined(__pic__) && defined(__SHMEDIA__)
3025 .global GLOBAL(sdivsi3)
3026 FUNC(GLOBAL(sdivsi3))
3027 #if __SH5__ == 32
3028 .section .text..SHmedia32,"ax"
3029 #else
3030 .text
3031 #endif
3032 #if 0
3033 /* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3034 in a text section does not work (at least for shared libraries):
3035 the linker sets the LSB of the address as if this was SHmedia code. */
3036 #define TEXT_DATA_BUG
3037 #endif
3038 .align 2
3039 // inputs: r4,r5
3040 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3041 // result in r0
3042 .global GLOBAL(sdivsi3)
3043 GLOBAL(sdivsi3):
3044 #ifdef TEXT_DATA_BUG
3045 ptb datalabel Local_div_table,tr0
3046 #else
3047 ptb GLOBAL(div_table_internal),tr0
3048 #endif
3049 nsb r5, r1
3050 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
3051 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
3052 /* bubble */
3053 gettr tr0,r20
3054 ldx.ub r20, r21, r19 // u0.8
3055 shari r25, 32, r25 // normalize to s2.30
3056 shlli r21, 1, r21
3057 muls.l r25, r19, r19 // s2.38
3058 ldx.w r20, r21, r21 // s2.14
3059 ptabs r18, tr0
3060 shari r19, 24, r19 // truncate to s2.14
3061 sub r21, r19, r19 // some 11 bit inverse in s1.14
3062 muls.l r19, r19, r21 // u0.28
3063 sub r63, r1, r1
3064 addi r1, 92, r1
3065 muls.l r25, r21, r18 // s2.58
3066 shlli r19, 45, r19 // multiply by two and convert to s2.58
3067 /* bubble */
3068 sub r19, r18, r18
3069 shari r18, 28, r18 // some 22 bit inverse in s1.30
3070 muls.l r18, r25, r0 // s2.60
3071 muls.l r18, r4, r25 // s32.30
3072 /* bubble */
3073 shari r0, 16, r19 // s-16.44
3074 muls.l r19, r18, r19 // s-16.74
3075 shari r25, 63, r0
3076 shari r4, 14, r18 // s19.-14
3077 shari r19, 30, r19 // s-16.44
3078 muls.l r19, r18, r19 // s15.30
3079 xor r21, r0, r21 // You could also use the constant 1 << 27.
3080 add r21, r25, r21
3081 sub r21, r19, r21
3082 shard r21, r1, r21
3083 sub r21, r0, r0
3084 blink tr0, r63
3085 ENDFUNC(GLOBAL(sdivsi3))
3086 /* This table has been generated by divtab.c .
3087 Defects for bias -330:
3088 Max defect: 6.081536e-07 at -1.000000e+00
3089 Min defect: 2.849516e-08 at 1.030651e+00
3090 Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3091 Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3092 Defect at 1: 1.238659e-07
3093 Defect at -2: 1.061708e-07 */
3094 #else /* ! __pic__ || ! __SHMEDIA__ */
3095 .section .rodata
3096 #endif /* __pic__ */
3097 #if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3098 .balign 2
3099 .type Local_div_table,@object
3100 .size Local_div_table,128
3101 /* negative division constants */
3102 .word -16638
3103 .word -17135
3104 .word -17737
3105 .word -18433
3106 .word -19103
3107 .word -19751
3108 .word -20583
3109 .word -21383
3110 .word -22343
3111 .word -23353
3112 .word -24407
3113 .word -25582
3114 .word -26863
3115 .word -28382
3116 .word -29965
3117 .word -31800
3118 /* negative division factors */
3119 .byte 66
3120 .byte 70
3121 .byte 75
3122 .byte 81
3123 .byte 87
3124 .byte 93
3125 .byte 101
3126 .byte 109
3127 .byte 119
3128 .byte 130
3129 .byte 142
3130 .byte 156
3131 .byte 172
3132 .byte 192
3133 .byte 214
3134 .byte 241
3135 .skip 16
3136 Local_div_table:
3137 .skip 16
3138 /* positive division factors */
3139 .byte 241
3140 .byte 214
3141 .byte 192
3142 .byte 172
3143 .byte 156
3144 .byte 142
3145 .byte 130
3146 .byte 119
3147 .byte 109
3148 .byte 101
3149 .byte 93
3150 .byte 87
3151 .byte 81
3152 .byte 75
3153 .byte 70
3154 .byte 66
3155 /* positive division constants */
3156 .word 31801
3157 .word 29966
3158 .word 28383
3159 .word 26864
3160 .word 25583
3161 .word 24408
3162 .word 23354
3163 .word 22344
3164 .word 21384
3165 .word 20584
3166 .word 19752
3167 .word 19104
3168 .word 18434
3169 .word 17738
3170 .word 17136
3171 .word 16639
3172 .section .rodata
3173 #endif /* TEXT_DATA_BUG */
3174 .balign 2
3175 .type GLOBAL(div_table),@object
3176 .size GLOBAL(div_table),128
3177 /* negative division constants */
3178 .word -16638
3179 .word -17135
3180 .word -17737
3181 .word -18433
3182 .word -19103
3183 .word -19751
3184 .word -20583
3185 .word -21383
3186 .word -22343
3187 .word -23353
3188 .word -24407
3189 .word -25582
3190 .word -26863
3191 .word -28382
3192 .word -29965
3193 .word -31800
3194 /* negative division factors */
3195 .byte 66
3196 .byte 70
3197 .byte 75
3198 .byte 81
3199 .byte 87
3200 .byte 93
3201 .byte 101
3202 .byte 109
3203 .byte 119
3204 .byte 130
3205 .byte 142
3206 .byte 156
3207 .byte 172
3208 .byte 192
3209 .byte 214
3210 .byte 241
3211 .skip 16
3212 .global GLOBAL(div_table)
3213 GLOBAL(div_table):
3214 HIDDEN_ALIAS(div_table_internal,div_table)
3215 .skip 16
3216 /* positive division factors */
3217 .byte 241
3218 .byte 214
3219 .byte 192
3220 .byte 172
3221 .byte 156
3222 .byte 142
3223 .byte 130
3224 .byte 119
3225 .byte 109
3226 .byte 101
3227 .byte 93
3228 .byte 87
3229 .byte 81
3230 .byte 75
3231 .byte 70
3232 .byte 66
3233 /* positive division constants */
3234 .word 31801
3235 .word 29966
3236 .word 28383
3237 .word 26864
3238 .word 25583
3239 .word 24408
3240 .word 23354
3241 .word 22344
3242 .word 21384
3243 .word 20584
3244 .word 19752
3245 .word 19104
3246 .word 18434
3247 .word 17738
3248 .word 17136
3249 .word 16639
3250 #endif /* L_div_table */
3251 #endif /* __SH5__ */