Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gcc4 / gcc / config / xtensa / lib1funcs.asm
blob718fcbe39d80532c4508f85fd10069e9f0432748
1 /* Assembly functions for the Xtensa version of libgcc1.
2 Copyright (C) 2001,2002,2003, 2005 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 In addition to the permissions in the GNU General Public License, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of this file into combinations with other programs,
15 and to distribute those combinations without any restriction coming
16 from the use of this file. (The General Public License restrictions
17 do apply in other respects; for example, they cover modification of
18 the file, and distribution when not linked into a combine
19 executable.)
21 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
22 WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 for more details.
26 You should have received a copy of the GNU General Public License
27 along with GCC; see the file COPYING. If not, write to the Free
28 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
29 02110-1301, USA. */
31 #include "xtensa-config.h"
33 # Note: These functions use a minimum stack frame size of 32. This is
34 # necessary for Xtensa configurations that only support a fixed register
35 # window size of 8, where even leaf functions (such as these) need to
36 # allocate space for a 4-word "extra save area".
38 # Define macros for the ABS and ADDX* instructions to handle cases
39 # where they are not included in the Xtensa processor configuration.
41 .macro do_abs dst, src, tmp
42 #if XCHAL_HAVE_ABS
43 abs \dst, \src
44 #else
45 neg \tmp, \src
46 movgez \tmp, \src, \src
47 mov \dst, \tmp
48 #endif
49 .endm
51 .macro do_addx2 dst, as, at, tmp
52 #if XCHAL_HAVE_ADDX
53 addx2 \dst, \as, \at
54 #else
55 slli \tmp, \as, 1
56 add \dst, \tmp, \at
57 #endif
58 .endm
60 .macro do_addx4 dst, as, at, tmp
61 #if XCHAL_HAVE_ADDX
62 addx4 \dst, \as, \at
63 #else
64 slli \tmp, \as, 2
65 add \dst, \tmp, \at
66 #endif
67 .endm
69 .macro do_addx8 dst, as, at, tmp
70 #if XCHAL_HAVE_ADDX
71 addx8 \dst, \as, \at
72 #else
73 slli \tmp, \as, 3
74 add \dst, \tmp, \at
75 #endif
76 .endm
78 # Define macros for function entry and return, supporting either the
79 # standard register windowed ABI or the non-windowed call0 ABI. These
80 # macros do not allocate any extra stack space, so they only work for
81 # leaf functions that do not need to spill anything to the stack.
83 .macro abi_entry reg, size
84 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
85 entry \reg, \size
86 #else
87 /* do nothing */
88 #endif
89 .endm
91 .macro abi_return
92 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
93 retw
94 #else
95 ret
96 #endif
97 .endm
100 #ifdef L_mulsi3
101 .align 4
102 .global __mulsi3
103 .type __mulsi3,@function
104 __mulsi3:
105 abi_entry sp, 32
107 #if XCHAL_HAVE_MUL16
108 or a4, a2, a3
109 srai a4, a4, 16
110 bnez a4, .LMUL16
111 mul16u a2, a2, a3
112 abi_return
113 .LMUL16:
114 srai a4, a2, 16
115 srai a5, a3, 16
116 mul16u a7, a4, a3
117 mul16u a6, a5, a2
118 mul16u a4, a2, a3
119 add a7, a7, a6
120 slli a7, a7, 16
121 add a2, a7, a4
123 #elif XCHAL_HAVE_MAC16
124 mul.aa.hl a2, a3
125 mula.aa.lh a2, a3
126 rsr a5, ACCLO
127 umul.aa.ll a2, a3
128 rsr a4, ACCLO
129 slli a5, a5, 16
130 add a2, a4, a5
132 #else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
134 # Multiply one bit at a time, but unroll the loop 4x to better
135 # exploit the addx instructions and avoid overhead.
136 # Peel the first iteration to save a cycle on init.
138 # Avoid negative numbers.
139 xor a5, a2, a3 # top bit is 1 iff one of the inputs is negative
140 do_abs a3, a3, a6
141 do_abs a2, a2, a6
143 # Swap so the second argument is smaller.
144 sub a7, a2, a3
145 mov a4, a3
146 movgez a4, a2, a7 # a4 = max(a2, a3)
147 movltz a3, a2, a7 # a3 = min(a2, a3)
149 movi a2, 0
150 extui a6, a3, 0, 1
151 movnez a2, a4, a6
153 do_addx2 a7, a4, a2, a7
154 extui a6, a3, 1, 1
155 movnez a2, a7, a6
157 do_addx4 a7, a4, a2, a7
158 extui a6, a3, 2, 1
159 movnez a2, a7, a6
161 do_addx8 a7, a4, a2, a7
162 extui a6, a3, 3, 1
163 movnez a2, a7, a6
165 bgeui a3, 16, .Lmult_main_loop
166 neg a3, a2
167 movltz a2, a3, a5
168 abi_return
170 .align 4
171 .Lmult_main_loop:
172 srli a3, a3, 4
173 slli a4, a4, 4
175 add a7, a4, a2
176 extui a6, a3, 0, 1
177 movnez a2, a7, a6
179 do_addx2 a7, a4, a2, a7
180 extui a6, a3, 1, 1
181 movnez a2, a7, a6
183 do_addx4 a7, a4, a2, a7
184 extui a6, a3, 2, 1
185 movnez a2, a7, a6
187 do_addx8 a7, a4, a2, a7
188 extui a6, a3, 3, 1
189 movnez a2, a7, a6
191 bgeui a3, 16, .Lmult_main_loop
193 neg a3, a2
194 movltz a2, a3, a5
196 #endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
198 abi_return
199 .size __mulsi3,.-__mulsi3
201 #endif /* L_mulsi3 */
204 # Define a macro for the NSAU (unsigned normalize shift amount)
205 # instruction, which computes the number of leading zero bits,
206 # to handle cases where it is not included in the Xtensa processor
207 # configuration.
209 .macro do_nsau cnt, val, tmp, a
210 #if XCHAL_HAVE_NSA
211 nsau \cnt, \val
212 #else
213 mov \a, \val
214 movi \cnt, 0
215 extui \tmp, \a, 16, 16
216 bnez \tmp, 0f
217 movi \cnt, 16
218 slli \a, \a, 16
220 extui \tmp, \a, 24, 8
221 bnez \tmp, 1f
222 addi \cnt, \cnt, 8
223 slli \a, \a, 8
225 movi \tmp, __nsau_data
226 extui \a, \a, 24, 8
227 add \tmp, \tmp, \a
228 l8ui \tmp, \tmp, 0
229 add \cnt, \cnt, \tmp
230 #endif /* !XCHAL_HAVE_NSA */
231 .endm
233 #ifdef L_nsau
234 .section .rodata
235 .align 4
236 .global __nsau_data
237 .type __nsau_data,@object
238 __nsau_data:
239 #if !XCHAL_HAVE_NSA
240 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
241 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
242 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
243 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
244 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
245 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
246 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
247 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
248 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
249 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
250 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
251 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
252 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
253 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
254 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
255 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
256 #endif /* !XCHAL_HAVE_NSA */
257 .size __nsau_data,.-__nsau_data
258 .hidden __nsau_data
259 #endif /* L_nsau */
262 #ifdef L_udivsi3
263 .align 4
264 .global __udivsi3
265 .type __udivsi3,@function
266 __udivsi3:
267 abi_entry sp, 32
268 bltui a3, 2, .Lle_one # check if the divisor <= 1
270 mov a6, a2 # keep dividend in a6
271 do_nsau a5, a6, a2, a7 # dividend_shift = nsau(dividend)
272 do_nsau a4, a3, a2, a7 # divisor_shift = nsau(divisor)
273 bgeu a5, a4, .Lspecial
275 sub a4, a4, a5 # count = divisor_shift - dividend_shift
276 ssl a4
277 sll a3, a3 # divisor <<= count
278 movi a2, 0 # quotient = 0
280 # test-subtract-and-shift loop; one quotient bit on each iteration
281 #if XCHAL_HAVE_LOOPS
282 loopnez a4, .Lloopend
283 #endif /* XCHAL_HAVE_LOOPS */
284 .Lloop:
285 bltu a6, a3, .Lzerobit
286 sub a6, a6, a3
287 addi a2, a2, 1
288 .Lzerobit:
289 slli a2, a2, 1
290 srli a3, a3, 1
291 #if !XCHAL_HAVE_LOOPS
292 addi a4, a4, -1
293 bnez a4, .Lloop
294 #endif /* !XCHAL_HAVE_LOOPS */
295 .Lloopend:
297 bltu a6, a3, .Lreturn
298 addi a2, a2, 1 # increment quotient if dividend >= divisor
299 .Lreturn:
300 abi_return
302 .Lle_one:
303 beqz a3, .Lerror # if divisor == 1, return the dividend
304 abi_return
306 .Lspecial:
307 # return dividend >= divisor
308 bltu a6, a3, .Lreturn0
309 movi a2, 1
310 abi_return
312 .Lerror:
313 # just return 0; could throw an exception
315 .Lreturn0:
316 movi a2, 0
317 abi_return
318 .size __udivsi3,.-__udivsi3
320 #endif /* L_udivsi3 */
323 #ifdef L_divsi3
324 .align 4
325 .global __divsi3
326 .type __divsi3,@function
327 __divsi3:
328 abi_entry sp, 32
329 xor a7, a2, a3 # sign = dividend ^ divisor
330 do_abs a6, a2, a4 # udividend = abs(dividend)
331 do_abs a3, a3, a4 # udivisor = abs(divisor)
332 bltui a3, 2, .Lle_one # check if udivisor <= 1
333 do_nsau a5, a6, a2, a8 # udividend_shift = nsau(udividend)
334 do_nsau a4, a3, a2, a8 # udivisor_shift = nsau(udivisor)
335 bgeu a5, a4, .Lspecial
337 sub a4, a4, a5 # count = udivisor_shift - udividend_shift
338 ssl a4
339 sll a3, a3 # udivisor <<= count
340 movi a2, 0 # quotient = 0
342 # test-subtract-and-shift loop; one quotient bit on each iteration
343 #if XCHAL_HAVE_LOOPS
344 loopnez a4, .Lloopend
345 #endif /* XCHAL_HAVE_LOOPS */
346 .Lloop:
347 bltu a6, a3, .Lzerobit
348 sub a6, a6, a3
349 addi a2, a2, 1
350 .Lzerobit:
351 slli a2, a2, 1
352 srli a3, a3, 1
353 #if !XCHAL_HAVE_LOOPS
354 addi a4, a4, -1
355 bnez a4, .Lloop
356 #endif /* !XCHAL_HAVE_LOOPS */
357 .Lloopend:
359 bltu a6, a3, .Lreturn
360 addi a2, a2, 1 # increment quotient if udividend >= udivisor
361 .Lreturn:
362 neg a5, a2
363 movltz a2, a5, a7 # return (sign < 0) ? -quotient : quotient
364 abi_return
366 .Lle_one:
367 beqz a3, .Lerror
368 neg a2, a6 # if udivisor == 1, then return...
369 movgez a2, a6, a7 # (sign < 0) ? -udividend : udividend
370 abi_return
372 .Lspecial:
373 bltu a6, a3, .Lreturn0 # if dividend < divisor, return 0
374 movi a2, 1
375 movi a4, -1
376 movltz a2, a4, a7 # else return (sign < 0) ? -1 : 1
377 abi_return
379 .Lerror:
380 # just return 0; could throw an exception
382 .Lreturn0:
383 movi a2, 0
384 abi_return
385 .size __divsi3,.-__divsi3
387 #endif /* L_divsi3 */
390 #ifdef L_umodsi3
391 .align 4
392 .global __umodsi3
393 .type __umodsi3,@function
394 __umodsi3:
395 abi_entry sp, 32
396 bltui a3, 2, .Lle_one # check if the divisor is <= 1
398 do_nsau a5, a2, a6, a7 # dividend_shift = nsau(dividend)
399 do_nsau a4, a3, a6, a7 # divisor_shift = nsau(divisor)
400 bgeu a5, a4, .Lspecial
402 sub a4, a4, a5 # count = divisor_shift - dividend_shift
403 ssl a4
404 sll a3, a3 # divisor <<= count
406 # test-subtract-and-shift loop
407 #if XCHAL_HAVE_LOOPS
408 loopnez a4, .Lloopend
409 #endif /* XCHAL_HAVE_LOOPS */
410 .Lloop:
411 bltu a2, a3, .Lzerobit
412 sub a2, a2, a3
413 .Lzerobit:
414 srli a3, a3, 1
415 #if !XCHAL_HAVE_LOOPS
416 addi a4, a4, -1
417 bnez a4, .Lloop
418 #endif /* !XCHAL_HAVE_LOOPS */
419 .Lloopend:
421 .Lspecial:
422 bltu a2, a3, .Lreturn
423 sub a2, a2, a3 # subtract once more if dividend >= divisor
424 .Lreturn:
425 abi_return
427 .Lle_one:
428 # the divisor is either 0 or 1, so just return 0.
429 # someday we may want to throw an exception if the divisor is 0.
430 movi a2, 0
431 abi_return
432 .size __umodsi3,.-__umodsi3
434 #endif /* L_umodsi3 */
437 #ifdef L_modsi3
438 .align 4
439 .global __modsi3
440 .type __modsi3,@function
441 __modsi3:
442 abi_entry sp, 32
443 mov a7, a2 # save original (signed) dividend
444 do_abs a2, a2, a4 # udividend = abs(dividend)
445 do_abs a3, a3, a4 # udivisor = abs(divisor)
446 bltui a3, 2, .Lle_one # check if udivisor <= 1
447 do_nsau a5, a2, a6, a8 # udividend_shift = nsau(udividend)
448 do_nsau a4, a3, a6, a8 # udivisor_shift = nsau(udivisor)
449 bgeu a5, a4, .Lspecial
451 sub a4, a4, a5 # count = udivisor_shift - udividend_shift
452 ssl a4
453 sll a3, a3 # udivisor <<= count
455 # test-subtract-and-shift loop
456 #if XCHAL_HAVE_LOOPS
457 loopnez a4, .Lloopend
458 #endif /* XCHAL_HAVE_LOOPS */
459 .Lloop:
460 bltu a2, a3, .Lzerobit
461 sub a2, a2, a3
462 .Lzerobit:
463 srli a3, a3, 1
464 #if !XCHAL_HAVE_LOOPS
465 addi a4, a4, -1
466 bnez a4, .Lloop
467 #endif /* !XCHAL_HAVE_LOOPS */
468 .Lloopend:
470 .Lspecial:
471 bltu a2, a3, .Lreturn
472 sub a2, a2, a3 # subtract once more if udividend >= udivisor
473 .Lreturn:
474 bgez a7, .Lpositive
475 neg a2, a2 # if (dividend < 0), return -udividend
476 .Lpositive:
477 abi_return
479 .Lle_one:
480 # udivisor is either 0 or 1, so just return 0.
481 # someday we may want to throw an exception if udivisor is 0.
482 movi a2, 0
483 abi_return
484 .size __modsi3,.-__modsi3
486 #endif /* L_modsi3 */