1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998-2024 Free Software Foundation, Inc.
3 Contributed by Denis Chertykov <chertykov@gmail.com>
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #if defined (__AVR_TINY__)
25 #define __zero_reg__ r17
26 #define __tmp_reg__ r16
28 #define __zero_reg__ r1
29 #define __tmp_reg__ r0
32 #if defined (__AVR_HAVE_SPH__)
36 #define __RAMPZ__ 0x3B
39 /* Most of the functions here are called directly from avr.md
40 patterns, instead of using the standard libcall mechanisms.
41 This can make better code because GCC knows exactly which
42 of the call-used registers (not all of them) are clobbered. */
44 /* FIXME: At present, there is no SORT directive in the linker
45 script so that we must not assume that different modules
46 in the same input section like .libgcc.text.mul will be
47 located close together. Therefore, we cannot use
48 RCALL/RJMP to call a function like __udivmodhi4 from
49 __divmodhi4 and have to use lengthy XCALL/XJMP even
50 though they are in the same input section and all same
51 input sections together are small enough to reach every
52 location with a RCALL/RJMP instruction. */
54 #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
55 #error device not supported
58 .macro mov_l r_dest, r_src
59 #if defined (__AVR_HAVE_MOVW__)
66 .macro mov_h r_dest, r_src
67 #if defined (__AVR_HAVE_MOVW__)
74 .macro wmov r_dest, r_src
75 #if defined (__AVR_HAVE_MOVW__)
79 mov \r_dest+1, \r_src+1
83 .macro mov4 r_dest, r_src
85 wmov \r_dest+2, \r_src+2
88 #if defined (__AVR_HAVE_JMP_CALL__)
96 #if defined (__AVR_HAVE_EIJMP_EICALL__)
106 .macro do_prologue_saves n_pushed n_frame=0
107 ldi r26, lo8(\n_frame)
108 ldi r27, hi8(\n_frame)
109 ldi r30, lo8(gs(.L_prologue_saves.\@))
110 ldi r31, hi8(gs(.L_prologue_saves.\@))
111 XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
112 .L_prologue_saves.\@:
117 .macro do_epilogue_restores n_pushed n_frame=0
119 #ifdef __AVR_HAVE_SPH__
122 subi r28, lo8(-\n_frame)
123 sbci r29, hi8(-\n_frame)
130 subi r28, lo8(-\n_frame)
132 #endif /* HAVE SPH */
134 XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
137 ;; Support function entry and exit for convenience
139 .macro wsubi r_arg1, i_arg2
140 #if defined (__AVR_TINY__)
141 subi \r_arg1, lo8(\i_arg2)
142 sbci \r_arg1+1, hi8(\i_arg2)
144 sbiw \r_arg1, \i_arg2
148 .macro waddi r_arg1, i_arg2
149 #if defined (__AVR_TINY__)
150 subi \r_arg1, lo8(-\i_arg2)
151 sbci \r_arg1+1, hi8(-\i_arg2)
153 adiw \r_arg1, \i_arg2
176 ;; Skip next instruction, typically a jump target
177 #define skip cpse 16,16
179 ;; Negate a 2-byte value held in consecutive registers
186 ;; Negate a 4-byte value held in consecutive registers
187 ;; Sets the V flag for signed overflow tests if REG >= 16
199 adc \reg, __zero_reg__
200 adc \reg+1, __zero_reg__
201 adc \reg+2, __zero_reg__
202 adc \reg+3, __zero_reg__
206 #define exp_lo(N) hlo8 ((N) << 23)
207 #define exp_hi(N) hhi8 ((N) << 23)
210 .section .text.libgcc.mul, "ax", @progbits
212 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
213 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
214 #if !defined (__AVR_HAVE_MUL__)
215 /*******************************************************
216 Multiplication 8 x 8 without MUL
217 *******************************************************/
218 #if defined (L_mulqi3)
220 #define r_arg2 r22 /* multiplicand */
221 #define r_arg1 r24 /* multiplier */
222 #define r_res __tmp_reg__ /* result */
225 clr r_res ; clear result
229 add r_arg2,r_arg2 ; shift multiplicand
230 breq __mulqi3_exit ; while multiplicand != 0
232 brne __mulqi3_loop ; exit if multiplier = 0
234 mov r_arg1,r_res ; result to return register
242 #endif /* defined (L_mulqi3) */
245 /*******************************************************
246 Widening Multiplication 16 = 8 x 8 without MUL
247 Multiplication 16 x 16 without MUL
248 *******************************************************/
255 ;; Output overlaps input, thus expand result in CC0/1
258 #define CC0 __tmp_reg__
261 #if defined (L_umulqihi3)
262 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
263 ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
264 ;;; Clobbers: __tmp_reg__, R21..R23
270 #endif /* L_umulqihi3 */
272 #if defined (L_mulqihi3)
273 ;;; R25:R24 = (signed int) R22 * (signed int) R24
274 ;;; (C1:C0) = (signed int) A0 * (signed int) B0
275 ;;; Clobbers: __tmp_reg__, R20..R23
281 ;; The multiplication runs twice as fast if A1 is zero, thus:
284 #ifdef __AVR_HAVE_JMP_CALL__
285 ;; Store B0 * sign of A
290 #else /* have no CALL */
291 ;; Skip sign-extension of A if A >= 0
292 ;; Same size as with the first alternative but avoids errata skip
293 ;; and is faster if A >= 0
299 #endif /* HAVE_JMP_CALL */
300 ;; 1-extend A after the multiplication
304 #endif /* L_mulqihi3 */
306 #if defined (L_mulhi3)
307 ;;; R25:R24 = R23:R22 * R25:R24
308 ;;; (C1:C0) = (A1:A0) * (B1:B0)
309 ;;; Clobbers: __tmp_reg__, R21..R23
317 ;; Bit n of A is 1 --> C += B << n
324 ;; If B == 0 we are ready
328 ;; Carry = n-th bit of A
331 ;; If bit n of A is set, then go add B * 2^n to C
334 ;; Carry = 0 --> The ROR above acts like CP A0, 0
335 ;; Thus, it is sufficient to CPC the high part to test A against 0
337 ;; Only proceed if A != 0
340 ;; Move Result into place
345 #endif /* L_mulhi3 */
378 /*******************************************************
379 Widening Multiplication 32 = 16 x 16 without MUL
380 *******************************************************/
382 #if defined (L_umulhisi3)
392 #endif /* L_umulhisi3 */
394 #if defined (L_mulhisi3)
401 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
408 #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
409 ;; Zero-extend A and __mulsi3 will run at least twice as fast
410 ;; compared to a sign-extended A.
415 ;; If A < 0 then perform the B * 0xffff.... before the
416 ;; very multiplication by initializing the high part of the
417 ;; result CC with -B.
422 #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
424 #endif /* L_mulhisi3 */
427 /*******************************************************
428 Multiplication 32 x 32 without MUL
429 *******************************************************/
431 #if defined (L_mulsi3)
433 #if defined (__AVR_TINY__)
434 in r26, __SP_L__ ; safe to use X, as it is CC0/CC1
436 subi r26, lo8(-3) ; Add 3 to point past return address
438 push B0 ; save callee saved regs
440 ld B0, X+ ; load from caller stack
451 DEFUN __mulsi3_helper
456 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
458 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
461 lsl B0 $ rol B1 $ rol B2 $ rol B3
463 3: ;; A >>= 1: Carry = n-th bit of A
464 lsr A3 $ ror A2 $ ror A1 $ ror A0
467 ;; Only continue if A != 0
473 ;; All bits of A are consumed: Copy result to return register C
476 #if defined (__AVR_TINY__)
477 pop B1 ; restore callee saved regs
479 #endif /* defined (__AVR_TINY__) */
483 #endif /* L_mulsi3 */
502 #endif /* !defined (__AVR_HAVE_MUL__) */
503 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
506 #if defined (__AVR_HAVE_MUL__)
521 /*******************************************************
522 Widening Multiplication 32 = 16 x 16 with MUL
523 *******************************************************/
525 #if defined (L_mulhisi3)
526 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
527 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
528 ;;; Clobbers: __tmp_reg__
537 XJMP __usmulhisi3_tail
539 #endif /* L_mulhisi3 */
541 #if defined (L_usmulhisi3)
542 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
543 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
544 ;;; Clobbers: __tmp_reg__
550 DEFUN __usmulhisi3_tail
557 ENDF __usmulhisi3_tail
558 #endif /* L_usmulhisi3 */
560 #if defined (L_umulhisi3)
561 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
562 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
563 ;;; Clobbers: __tmp_reg__
570 #ifdef __AVR_HAVE_JMP_CALL__
571 ;; This function is used by many other routines, often multiple times.
572 ;; Therefore, if the flash size is not too limited, avoid the RCALL
573 ;; and inverst 6 Bytes to speed things up.
588 #endif /* L_umulhisi3 */
590 /*******************************************************
591 Widening Multiplication 32 = 16 x 32 with MUL
592 *******************************************************/
594 #if defined (L_mulshisi3)
595 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
596 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
597 ;;; Clobbers: __tmp_reg__
599 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
600 ;; Some cores have problem skipping 2-word instruction
605 #endif /* __AVR_HAVE_JMP_CALL__ */
610 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
611 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
612 ;;; Clobbers: __tmp_reg__
615 ;; One-extend R27:R26 (A1:A0)
620 #endif /* L_mulshisi3 */
622 #if defined (L_muluhisi3)
623 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
624 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
625 ;;; Clobbers: __tmp_reg__
638 #endif /* L_muluhisi3 */
640 /*******************************************************
641 Multiplication 32 x 32 with MUL
642 *******************************************************/
644 #if defined (L_mulsi3)
645 ;;; R25:R22 = R25:R22 * R21:R18
646 ;;; (C3:C0) = C3:C0 * B3:B0
647 ;;; Clobbers: R26, R27, __tmp_reg__
655 ;; A1:A0 now contains the high word of A
666 #endif /* L_mulsi3 */
681 #endif /* __AVR_HAVE_MUL__ */
683 /*******************************************************
684 Multiplication 24 x 24 with MUL
685 *******************************************************/
687 #if defined (L_mulpsi3)
689 ;; A[0..2]: In: Multiplicand; Out: Product
694 ;; B[0..2]: In: Multiplier
699 #if defined (__AVR_HAVE_MUL__)
701 ;; C[0..2]: Expand Result
706 ;; R24:R22 *= R20:R18
707 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
716 mul AA2, B0 $ add C2, r0
717 mul AA0, B2 $ add C2, r0
729 #else /* !HAVE_MUL */
730 ;; C[0..2]: Expand Result
731 #if defined (__AVR_TINY__)
735 #endif /* defined (__AVR_TINY__) */
739 ;; R24:R22 *= R20:R18
740 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
743 #if defined (__AVR_TINY__)
746 subi r26, lo8(-3) ; Add 3 to point past return address
748 push B0 ; save callee saved regs
750 ld B0,X+ ; load from caller stack
753 #endif /* defined (__AVR_TINY__) */
759 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
760 LSR B2 $ ror B1 $ ror B0
762 ;; If the N-th Bit of B[] was set...
765 ;; ...then add A[] * 2^N to the Result C[]
766 ADD C0,A0 $ adc C1,A1 $ adc C2,A2
768 1: ;; Multiply A[] by 2
769 LSL A0 $ rol A1 $ rol A2
771 ;; Loop until B[] is 0
772 subi B0,0 $ sbci B1,0 $ sbci B2,0
775 ;; Copy C[] to the return Register A[]
780 #if defined (__AVR_TINY__)
783 #endif /* (__AVR_TINY__) */
791 #endif /* HAVE_MUL */
801 #endif /* L_mulpsi3 */
803 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
805 ;; A[0..2]: In: Multiplicand
810 ;; BB: In: Multiplier
818 ;; C[] = A[] * sign_extend (BB)
846 #endif /* L_mulsqipsi3 && HAVE_MUL */
848 /*******************************************************
849 Multiplication 64 x 64
850 *******************************************************/
854 ;; A[0..7]: In: Multiplicand
865 ;; B[0..7]: In: Multiplier
876 #if defined (__AVR_HAVE_MUL__)
877 ;; Define C[] for convenience
878 ;; Notice that parts of C[] overlap A[] respective B[]
888 #if defined (L_muldi3)
891 ;; R25:R18 *= R17:R10
892 ;; Ordinary ABI-Function
900 ;; Counting in Words, we have to perform a 4 * 4 Multiplication
903 mul A7,B0 $ $ mov C7,r0
904 mul A0,B7 $ $ add C7,r0
905 mul A6,B1 $ $ add C7,r0
906 mul A6,B0 $ mov C6,r0 $ add C7,r1
907 mul B6,A1 $ $ add C7,r0
908 mul B6,A0 $ add C6,r0 $ adc C7,r1
911 mul A2,B4 $ add C6,r0 $ adc C7,r1
912 mul A3,B4 $ $ add C7,r0
913 mul A2,B5 $ $ add C7,r0
930 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
940 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
950 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
954 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
968 #endif /* L_muldi3 */
970 #if defined (L_muldi3_6)
971 ;; A helper for some 64-bit multiplications with MUL available
983 #endif /* L_muldi3_6 */
994 #else /* !HAVE_MUL */
996 #if defined (L_muldi3)
1010 ;; R25:R18 *= R17:R10
1011 ;; Ordinary ABI-Function
1027 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1028 ;; where N = 64 - Loop.
1029 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1030 ;; B[] will have its initial Value again.
1031 LSR B7 $ ror B6 $ ror B5 $ ror B4
1032 ror B3 $ ror B2 $ ror B1 $ ror B0
1034 ;; If the N-th Bit of B[] was set then...
1036 ;; ...finish Rotation...
1039 ;; ...and add A[] * 2^N to the Result C[]
1040 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
1041 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
1043 1: ;; Multiply A[] by 2
1044 LSL A0 $ rol A1 $ rol A2 $ rol A3
1045 rol A4 $ rol A5 $ rol A6 $ rol A7
1050 ;; We expanded the Result in C[]
1051 ;; Copy Result to the Return Register A[]
1075 #endif /* L_muldi3 */
1076 #endif /* HAVE_MUL */
1077 #endif /* if not __AVR_TINY__ */
1097 /*******************************************************
1098 Widening Multiplication 64 = 32 x 32 with MUL
1099 *******************************************************/
1101 #if defined (__AVR_HAVE_MUL__)
1121 #if defined (L_umulsidi3)
1123 ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1125 ;; R18[8] = R22[4] * R18[4]
1127 ;; Ordinary ABI Function, but additionally sets
1128 ;; X = R20[2] = B2[2]
1129 ;; Z = R22[2] = A0[2]
1135 DEFUN __umulsidi3_helper
1136 push 29 $ push 28 ; Y
1138 ;; Counting in Words, we have to perform 4 Multiplications
1142 push 23 $ push 22 ; C0
1146 push 27 $ push 26 ; A0
1147 push 19 $ push 18 ; B2
1149 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1150 ;; B2 C2 -- -- -- B0 A2
1154 ;; Sign-extend A. T holds the sign of A
1156 ;; Subtract B from the high part of the result
1161 0: wmov 18, 28 ;; B0
1165 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1166 ;; B0 C2 -- -- A2 C4 C6
1171 pop 26 $ pop 27 ;; B2
1172 pop 18 $ pop 19 ;; A0
1175 ;; Move result C into place and save A0 in Z
1182 pop 28 $ pop 29 ;; Y
1184 ENDF __umulsidi3_helper
1185 #endif /* L_umulsidi3 */
1188 #if defined (L_mulsidi3)
1190 ;; Signed widening 64 = 32 * 32 Multiplication
1192 ;; R18[8] = R22[4] * R18[4]
1193 ;; Ordinary ABI Function
1196 sbrs B3, 7 ; Enhanced core has no skip bug
1197 XJMP __umulsidi3_helper
1199 ;; B needs sign-extension
1202 XCALL __umulsidi3_helper
1212 #endif /* L_mulsidi3 */
1230 #endif /* HAVE_MUL */
1232 /**********************************************************
1233 Widening Multiplication 64 = 32 x 32 without MUL
1234 **********************************************************/
1235 #ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1236 #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1267 ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1269 ;; R18[8] = R22[4] * R18[4]
1270 ;; Ordinary ABI Function
1279 ;; Save 10 Registers: R10..R17, R28, R29
1280 do_prologue_saves 10
1283 ;; Move B into place...
1292 ;; Move A into place...
1302 do_epilogue_restores 10
1330 #endif /* L_mulsidi3 && !HAVE_MUL */
1331 #endif /* if not __AVR_TINY__ */
1332 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1335 .section .text.libgcc.div, "ax", @progbits
1337 /*******************************************************
1338 Division 8 / 8 => (result + remainder)
1339 *******************************************************/
1340 #define r_rem r25 /* remainder */
1341 #define r_arg1 r24 /* dividend, quotient */
1342 #define r_arg2 r22 /* divisor */
1343 #define r_cnt r23 /* loop count */
1345 #if defined (L_udivmodqi4)
1347 clr r_rem ; clear remainder
1348 ldi r_cnt,8 ; init loop counter
1349 lsl r_arg1 ; shift dividend
1351 rol r_rem ; shift dividend into remainder
1352 cp r_rem,r_arg2 ; compare remainder & divisor
1353 brcs __udivmodqi4_ep ; remainder <= divisor
1354 sub r_rem,r_arg2 ; restore remainder
1356 rol r_arg1 ; shift dividend (with CARRY)
1357 dec r_cnt ; decrement loop counter
1358 brne __udivmodqi4_loop
1359 com r_arg1 ; complement result
1360 ; because C flag was complemented in loop
1363 #endif /* defined (L_udivmodqi4) */
1365 #if defined (L_divmodqi4)
1367 bst r_arg1,7 ; store sign of dividend
1368 mov __tmp_reg__,r_arg1
1369 eor __tmp_reg__,r_arg2; r0.7 is sign of result
1371 neg r_arg1 ; dividend negative : negate
1373 neg r_arg2 ; divisor negative : negate
1374 XCALL __udivmodqi4 ; do the unsigned div/mod
1376 neg r_rem ; correct remainder sign
1379 neg r_arg1 ; correct result sign
1383 #endif /* defined (L_divmodqi4) */
1391 /*******************************************************
1392 Division 16 / 16 => (result + remainder)
1393 *******************************************************/
1394 #define r_remL r26 /* remainder Low */
1395 #define r_remH r27 /* remainder High */
1397 /* return: remainder */
1398 #define r_arg1L r24 /* dividend Low */
1399 #define r_arg1H r25 /* dividend High */
1401 /* return: quotient */
1402 #define r_arg2L r22 /* divisor Low */
1403 #define r_arg2H r23 /* divisor High */
1405 #define r_cnt r21 /* loop count */
1407 #if defined (L_udivmodhi4)
1410 sub r_remH,r_remH ; clear remainder and carry
1411 ldi r_cnt,17 ; init loop counter
1412 rjmp __udivmodhi4_ep ; jump to entry point
1414 rol r_remL ; shift dividend into remainder
1416 cp r_remL,r_arg2L ; compare remainder & divisor
1418 brcs __udivmodhi4_ep ; remainder < divisor
1419 sub r_remL,r_arg2L ; restore remainder
1422 rol r_arg1L ; shift dividend (with CARRY)
1424 dec r_cnt ; decrement loop counter
1425 brne __udivmodhi4_loop
1428 ; div/mod results to return registers, as for the div() function
1429 mov_l r_arg2L, r_arg1L ; quotient
1430 mov_h r_arg2H, r_arg1H
1431 mov_l r_arg1L, r_remL ; remainder
1432 mov_h r_arg1H, r_remH
1435 #endif /* defined (L_udivmodhi4) */
1437 #if defined (L_divmodhi4)
1441 bst r_arg1H,7 ; store sign of dividend
1442 mov __tmp_reg__,r_arg2H
1444 com __tmp_reg__ ; r0.7 is sign of result
1445 rcall __divmodhi4_neg1 ; dividend negative: negate
1448 rcall __divmodhi4_neg2 ; divisor negative: negate
1449 XCALL __udivmodhi4 ; do the unsigned div/mod
1451 rcall __divmodhi4_neg2 ; correct remainder sign
1452 brtc __divmodhi4_exit
1454 ;; correct dividend/remainder sign
1460 ;; correct divisor/result sign
1467 #endif /* defined (L_divmodhi4) */
1480 /*******************************************************
1481 Division 24 / 24 => (result + remainder)
1482 *******************************************************/
1484 ;; A[0..2]: In: Dividend; Out: Quotient
1489 ;; B[0..2]: In: Divisor; Out: Remainder
1494 ;; C[0..2]: Expand remainder
1495 #define C0 __zero_reg__
1502 #if defined (L_udivmodpsi4)
1503 ;; R24:R22 = R24:R24 udiv R20:R18
1504 ;; R20:R18 = R24:R22 umod R20:R18
1505 ;; Clobbers: R21, R25, R26
1510 ; Clear remainder and carry. C0 is already 0
1513 ; jump to entry point
1514 rjmp __udivmodpsi4_start
1516 ; shift dividend into remainder
1520 ; compare remainder & divisor
1524 brcs __udivmodpsi4_start ; remainder <= divisor
1525 sub C0, B0 ; restore remainder
1528 __udivmodpsi4_start:
1529 ; shift dividend (with CARRY)
1533 ; decrement loop counter
1535 brne __udivmodpsi4_loop
1539 ; div/mod results to return registers
1544 clr __zero_reg__ ; C0
1547 #endif /* defined (L_udivmodpsi4) */
1549 #if defined (L_divmodpsi4)
1550 ;; R24:R22 = R24:R22 div R20:R18
1551 ;; R20:R18 = R24:R22 mod R20:R18
1552 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1555 ; R0.7 will contain the sign of the result:
1556 ; R0.7 = A.sign ^ B.sign
1558 ; T-flag = sign of dividend
1562 ; Adjust dividend's sign
1563 rcall __divmodpsi4_negA
1565 ; Adjust divisor's sign
1567 rcall __divmodpsi4_negB
1569 ; Do the unsigned div/mod
1572 ; Adjust quotient's sign
1574 rcall __divmodpsi4_negA
1576 ; Adjust remainder's sign
1577 brtc __divmodpsi4_end
1580 ; Correct divisor/remainder sign
1588 ; Correct dividend/quotient sign
1599 #endif /* defined (L_divmodpsi4) */
1615 /*******************************************************
1616 Division 32 / 32 => (result + remainder)
1617 *******************************************************/
1618 #define r_remHH r31 /* remainder High */
1621 #define r_remL r26 /* remainder Low */
1623 /* return: remainder */
1624 #define r_arg1HH r25 /* dividend High */
1625 #define r_arg1HL r24
1627 #define r_arg1L r22 /* dividend Low */
1629 /* return: quotient */
1630 #define r_arg2HH r21 /* divisor High */
1631 #define r_arg2HL r20
1633 #define r_arg2L r18 /* divisor Low */
1635 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1637 #if defined (L_udivmodsi4)
1639 ldi r_remL, 33 ; init loop counter
1642 sub r_remH,r_remH ; clear remainder and carry
1643 mov_l r_remHL, r_remL
1644 mov_h r_remHH, r_remH
1645 rjmp __udivmodsi4_ep ; jump to entry point
1647 rol r_remL ; shift dividend into remainder
1651 cp r_remL,r_arg2L ; compare remainder & divisor
1653 cpc r_remHL,r_arg2HL
1654 cpc r_remHH,r_arg2HH
1655 brcs __udivmodsi4_ep ; remainder <= divisor
1656 sub r_remL,r_arg2L ; restore remainder
1658 sbc r_remHL,r_arg2HL
1659 sbc r_remHH,r_arg2HH
1661 rol r_arg1L ; shift dividend (with CARRY)
1665 dec r_cnt ; decrement loop counter
1666 brne __udivmodsi4_loop
1667 ; __zero_reg__ now restored (r_cnt == 0)
1672 ; div/mod results to return registers, as for the ldiv() function
1673 mov_l r_arg2L, r_arg1L ; quotient
1674 mov_h r_arg2H, r_arg1H
1675 mov_l r_arg2HL, r_arg1HL
1676 mov_h r_arg2HH, r_arg1HH
1677 mov_l r_arg1L, r_remL ; remainder
1678 mov_h r_arg1H, r_remH
1679 mov_l r_arg1HL, r_remHL
1680 mov_h r_arg1HH, r_remHH
1683 #endif /* defined (L_udivmodsi4) */
1685 #if defined (L_divmodsi4)
1687 mov __tmp_reg__,r_arg2HH
1688 bst r_arg1HH,7 ; store sign of dividend
1690 com __tmp_reg__ ; r0.7 is sign of result
1691 XCALL __negsi2 ; dividend negative: negate
1694 rcall __divmodsi4_neg2 ; divisor negative: negate
1695 XCALL __udivmodsi4 ; do the unsigned div/mod
1696 sbrc __tmp_reg__, 7 ; correct quotient sign
1697 rcall __divmodsi4_neg2
1698 brtc __divmodsi4_exit ; correct remainder sign
1701 ;; correct divisor/quotient sign
1712 #endif /* defined (L_divmodsi4) */
1714 #if defined (L_negsi2)
1716 ;; (neg:SI (reg:SI 22)))
1717 ;; Sets the V flag for signed overflow tests
1722 #endif /* L_negsi2 */
1738 /* *di routines use registers below R19 and won't work with tiny arch
1741 #if !defined (__AVR_TINY__)
1742 /*******************************************************
1745 *******************************************************/
1747 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1748 ;; at least 16k of Program Memory. For smaller Devices, depend
1749 ;; on MOVW and SP Size. There is a Connexion between SP Size and
1750 ;; Flash Size so that SP Size can be used to test for Flash Size.
1752 #if defined (__AVR_HAVE_JMP_CALL__)
1753 # define SPEED_DIV 8
1754 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1755 # define SPEED_DIV 16
1757 # define SPEED_DIV 0
1760 ;; A[0..7]: In: Dividend;
1761 ;; Out: Quotient (T = 0)
1762 ;; Out: Remainder (T = 1)
1772 ;; B[0..7]: In: Divisor; Out: Clobber
1782 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1792 ;; Holds Signs during Division Routine
1793 #define SS __tmp_reg__
1795 ;; Bit-Counter in Division Routine
1796 #define R_cnt __zero_reg__
1798 ;; Scratch Register for Negation
1801 #if defined (L_udivdi3)
1803 ;; R25:R18 = R24:R18 umod R17:R10
1804 ;; Ordinary ABI-Function
1808 rjmp __udivdi3_umoddi3
1811 ;; R25:R18 = R24:R18 udiv R17:R10
1812 ;; Ordinary ABI-Function
1818 DEFUN __udivdi3_umoddi3
1829 ENDF __udivdi3_umoddi3
1830 #endif /* L_udivdi3 */
1832 #if defined (L_udivmod64)
1834 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1835 ;; No Registers saved/restored; the Callers will take Care.
1836 ;; Preserves B[] and T-flag
1837 ;; T = 0: Compute Quotient in A[]
1838 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1842 ;; Clear Remainder (C6, C7 will follow)
1849 #if SPEED_DIV == 0 || SPEED_DIV == 16
1850 ;; Initialize Loop-Counter
1853 #endif /* SPEED_DIV */
1860 1: ;; Compare shifted Devidend against Divisor
1861 ;; If -- even after Shifting -- it is smaller...
1862 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1863 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1866 ;; ...then we can subtract it. Thus, it is legal to shift left
1867 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1868 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1869 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1870 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1876 ;; Shifted 64 Bits: A7 has traveled to C7
1878 ;; Divisor is greater than Dividend. We have:
1881 ;; Thus, we can return immediately
1884 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1887 ;; Push of A7 is not needed because C7 is still 0
1891 #elif SPEED_DIV == 16
1893 ;; Compare shifted Dividend against Divisor
1901 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1902 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1903 wmov C2,A6 $ wmov C0,A4
1904 wmov A6,A2 $ wmov A4,A0
1905 wmov A2,C6 $ wmov A0,C4
1907 ;; Set Bit Counter to 32
1911 #error SPEED_DIV = ?
1912 #endif /* SPEED_DIV */
1914 ;; The very Division + Remainder Routine
1916 3: ;; Left-shift Dividend...
1917 lsl A0 $ rol A1 $ rol A2 $ rol A3
1918 rol A4 $ rol A5 $ rol A6 $ rol A7
1920 ;; ...into Remainder
1921 rol C0 $ rol C1 $ rol C2 $ rol C3
1922 rol C4 $ rol C5 $ rol C6 $ rol C7
1924 ;; Compare Remainder and Divisor
1925 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1926 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1930 ;; Divisor fits into Remainder: Subtract it from Remainder...
1931 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1932 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1934 ;; ...and set according Bit in the upcoming Quotient
1935 ;; The Bit will travel to its final Position
1938 4: ;; This Bit is done
1941 ;; __zero_reg__ is 0 again
1943 ;; T = 0: We are fine with the Quotient in A[]
1944 ;; T = 1: Copy Remainder to A[]
1950 ;; Move the Sign of the Result to SS.7
1956 #endif /* L_udivmod64 */
1959 #if defined (L_divdi3)
1961 ;; R25:R18 = R24:R18 mod R17:R10
1962 ;; Ordinary ABI-Function
1966 rjmp __divdi3_moddi3
1969 ;; R25:R18 = R24:R18 div R17:R10
1970 ;; Ordinary ABI-Function
1976 DEFUN __divdi3_moddi3
1981 ;; Both Signs are 0: the following Complexitiy is not needed
1982 XJMP __udivdi3_umoddi3
1983 #endif /* SPEED_DIV */
1986 ;; Save 12 Registers: Y, 17...8
1988 do_prologue_saves 12
1990 ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1991 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1994 ;; Adjust Dividend's Sign as needed
1996 ;; Compiling for Speed we know that at least one Sign must be < 0
1997 ;; Thus, if A[] >= 0 then we know B[] < 0
2001 #endif /* SPEED_DIV */
2005 ;; Adjust Divisor's Sign and SS.7 as needed
2012 com B4 $ com B5 $ com B6 $ com B7
2013 $ com B1 $ com B2 $ com B3
2015 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
2016 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
2018 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2021 ;; Adjust Result's Sign
2022 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2027 #endif /* __AVR_HAVE_JMP_CALL__ */
2030 4: ;; Epilogue: Restore 12 Registers and return
2031 do_epilogue_restores 12
2033 ENDF __divdi3_moddi3
2035 #endif /* L_divdi3 */
2041 .section .text.libgcc, "ax", @progbits
2043 #define TT __tmp_reg__
2045 #if defined (L_adddi3)
2047 ;; (plus:DI (reg:DI 18)
2049 ;; Sets the V flag for signed overflow tests
2050 ;; Sets the C flag for unsigned overflow tests
2052 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
2053 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
2056 #endif /* L_adddi3 */
2058 #if defined (L_adddi3_s8)
2060 ;; (plus:DI (reg:DI 18)
2061 ;; (sign_extend:SI (reg:QI 26))))
2062 ;; Sets the V flag for signed overflow tests
2063 ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2068 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
2069 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
2072 #endif /* L_adddi3_s8 */
2074 #if defined (L_subdi3)
2076 ;; (minus:DI (reg:DI 18)
2078 ;; Sets the V flag for signed overflow tests
2079 ;; Sets the C flag for unsigned overflow tests
2081 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
2082 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
2085 #endif /* L_subdi3 */
2087 #if defined (L_cmpdi2)
2089 ;; (compare (reg:DI 18)
2092 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
2093 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
2096 #endif /* L_cmpdi2 */
2098 #if defined (L_cmpdi2_s8)
2100 ;; (compare (reg:DI 18)
2101 ;; (sign_extend:SI (reg:QI 26))))
2106 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
2107 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
2110 #endif /* L_cmpdi2_s8 */
2112 #if defined (L_negdi2)
2114 ;; (neg:DI (reg:DI 18)))
2115 ;; Sets the V flag for signed overflow tests
2118 com A4 $ com A5 $ com A6 $ com A7
2119 $ com A1 $ com A2 $ com A3
2121 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
2122 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
2126 #endif /* L_negdi2 */
2157 #endif /* !defined (__AVR_TINY__) */
2160 .section .text.libgcc.prologue, "ax", @progbits
2162 /**********************************
2163 * This is a prologue subroutine
2164 **********************************/
2165 #if !defined (__AVR_TINY__)
2166 #if defined (L_prologue)
2168 ;; This function does not clobber T-flag; 64-bit division relies on it
2169 DEFUN __prologue_saves__
2188 #if !defined (__AVR_HAVE_SPH__)
2193 #elif defined (__AVR_XMEGA__)
2205 in __tmp_reg__,__SREG__
2208 out __SREG__,__tmp_reg__
2210 #endif /* #SP = 8/16 */
2214 ENDF __prologue_saves__
2215 #endif /* defined (L_prologue) */
2218 * This is an epilogue subroutine
2220 #if defined (L_epilogue)
2222 DEFUN __epilogue_restores__
2240 #if !defined (__AVR_HAVE_SPH__)
2245 #elif defined (__AVR_XMEGA__)
2248 adc r29,__zero_reg__
2255 adc r29,__zero_reg__
2256 in __tmp_reg__,__SREG__
2259 out __SREG__,__tmp_reg__
2263 #endif /* #SP = 8/16 */
2265 ENDF __epilogue_restores__
2266 #endif /* defined (L_epilogue) */
2267 #endif /* !defined (__AVR_TINY__) */
2270 .section .fini9,"ax",@progbits
2276 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
2278 .section .fini0,"ax",@progbits
2282 #endif /* defined (L_exit) */
2290 #endif /* defined (L_cleanup) */
2293 .section .text.libgcc, "ax", @progbits
2296 DEFUN __tablejump2__
2299 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2300 ;; Word address of gs() jumptable entry in R24:Z
2303 #elif defined (__AVR_HAVE_ELPM__)
2304 ;; Word address of jumptable entry in Z
2307 out __RAMPZ__, __tmp_reg__
2310 ;; Read word address from jumptable and jump
2312 #if defined (__AVR_HAVE_ELPMX__)
2313 elpm __tmp_reg__, Z+
2315 mov r30, __tmp_reg__
2316 #ifdef __AVR_HAVE_RAMPD__
2317 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2318 out __RAMPZ__, __zero_reg__
2321 #elif defined (__AVR_HAVE_ELPM__)
2328 #elif defined (__AVR_HAVE_LPMX__)
2331 mov r30, __tmp_reg__
2333 #elif defined (__AVR_TINY__)
2334 wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2336 ld r31, Z ; Use ld instead of lpm to load Z
2337 mov r30, __tmp_reg__
2348 #endif /* L_tablejump2 */
2350 #if defined(__AVR_TINY__)
2352 .section .init4,"ax",@progbits
2353 .global __do_copy_data
2355 ldi r18, hi8(__data_end)
2356 ldi r26, lo8(__data_start)
2357 ldi r27, hi8(__data_start)
2358 ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2359 ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2360 rjmp .L__do_copy_data_start
2361 .L__do_copy_data_loop:
2364 .L__do_copy_data_start:
2365 cpi r26, lo8(__data_end)
2367 brne .L__do_copy_data_loop
2371 .section .init4,"ax",@progbits
2372 DEFUN __do_copy_data
2373 #if defined(__AVR_HAVE_ELPMX__)
2374 ldi r17, hi8(__data_end)
2375 ldi r26, lo8(__data_start)
2376 ldi r27, hi8(__data_start)
2377 ldi r30, lo8(__data_load_start)
2378 ldi r31, hi8(__data_load_start)
2379 ldi r16, hh8(__data_load_start)
2381 rjmp .L__do_copy_data_start
2382 .L__do_copy_data_loop:
2385 .L__do_copy_data_start:
2386 cpi r26, lo8(__data_end)
2388 brne .L__do_copy_data_loop
2389 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2390 ldi r17, hi8(__data_end)
2391 ldi r26, lo8(__data_start)
2392 ldi r27, hi8(__data_start)
2393 ldi r30, lo8(__data_load_start)
2394 ldi r31, hi8(__data_load_start)
2395 ldi r16, hh8(__data_load_start - 0x10000)
2396 .L__do_copy_data_carry:
2399 rjmp .L__do_copy_data_start
2400 .L__do_copy_data_loop:
2404 brcs .L__do_copy_data_carry
2405 .L__do_copy_data_start:
2406 cpi r26, lo8(__data_end)
2408 brne .L__do_copy_data_loop
2409 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2410 ldi r17, hi8(__data_end)
2411 ldi r26, lo8(__data_start)
2412 ldi r27, hi8(__data_start)
2413 ldi r30, lo8(__data_load_start)
2414 ldi r31, hi8(__data_load_start)
2415 rjmp .L__do_copy_data_start
2416 .L__do_copy_data_loop:
2417 #if defined (__AVR_HAVE_LPMX__)
2424 .L__do_copy_data_start:
2425 cpi r26, lo8(__data_end)
2427 brne .L__do_copy_data_loop
2428 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2429 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2430 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2431 out __RAMPZ__, __zero_reg__
2432 #endif /* ELPM && RAMPD */
2434 #endif /* L_copy_data */
2435 #endif /* !defined (__AVR_TINY__) */
2437 /* __do_clear_bss is only necessary if there is anything in .bss section. */
2440 .section .init4,"ax",@progbits
2441 DEFUN __do_clear_bss
2442 ldi r18, hi8(__bss_end)
2443 ldi r26, lo8(__bss_start)
2444 ldi r27, hi8(__bss_start)
2445 rjmp .do_clear_bss_start
2448 .do_clear_bss_start:
2449 cpi r26, lo8(__bss_end)
2451 brne .do_clear_bss_loop
2453 #endif /* L_clear_bss */
2455 /* __do_global_ctors and __do_global_dtors are only necessary
2456 if there are any constructors/destructors. */
2458 #if defined(__AVR_TINY__)
2459 #define cdtors_tst_reg r18
2461 #define cdtors_tst_reg r17
2465 .section .init6,"ax",@progbits
2466 DEFUN __do_global_ctors
2467 ldi cdtors_tst_reg, pm_hi8(__ctors_start)
2468 ldi r28, pm_lo8(__ctors_end)
2469 ldi r29, pm_hi8(__ctors_end)
2470 #ifdef __AVR_HAVE_EIJMP_EICALL__
2471 ldi r16, pm_hh8(__ctors_end)
2472 #endif /* HAVE_EIJMP */
2473 rjmp .L__do_global_ctors_start
2474 .L__do_global_ctors_loop:
2476 #ifdef __AVR_HAVE_EIJMP_EICALL__
2477 sbc r16, __zero_reg__
2479 #endif /* HAVE_EIJMP */
2482 XCALL __tablejump2__
2483 .L__do_global_ctors_start:
2484 cpi r28, pm_lo8(__ctors_start)
2485 cpc r29, cdtors_tst_reg
2486 #ifdef __AVR_HAVE_EIJMP_EICALL__
2487 ldi r24, pm_hh8(__ctors_start)
2489 #endif /* HAVE_EIJMP */
2490 brne .L__do_global_ctors_loop
2491 ENDF __do_global_ctors
2492 #endif /* L_ctors */
2495 .section .fini6,"ax",@progbits
2496 DEFUN __do_global_dtors
2497 ldi cdtors_tst_reg, pm_hi8(__dtors_end)
2498 ldi r28, pm_lo8(__dtors_start)
2499 ldi r29, pm_hi8(__dtors_start)
2500 #ifdef __AVR_HAVE_EIJMP_EICALL__
2501 ldi r16, pm_hh8(__dtors_start)
2502 #endif /* HAVE_EIJMP */
2503 rjmp .L__do_global_dtors_start
2504 .L__do_global_dtors_loop:
2505 #ifdef __AVR_HAVE_EIJMP_EICALL__
2507 #endif /* HAVE_EIJMP */
2510 XCALL __tablejump2__
2512 #ifdef __AVR_HAVE_EIJMP_EICALL__
2513 adc r16, __zero_reg__
2514 #endif /* HAVE_EIJMP */
2515 .L__do_global_dtors_start:
2516 cpi r28, pm_lo8(__dtors_end)
2517 cpc r29, cdtors_tst_reg
2518 #ifdef __AVR_HAVE_EIJMP_EICALL__
2519 ldi r24, pm_hh8(__dtors_end)
2521 #endif /* HAVE_EIJMP */
2522 brne .L__do_global_dtors_loop
2523 ENDF __do_global_dtors
2524 #endif /* L_dtors */
2526 #undef cdtors_tst_reg
2528 .section .text.libgcc, "ax", @progbits
2530 #if !defined (__AVR_TINY__)
2531 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2532 ;; Loading n bytes from Flash; n = 3,4
2533 ;; R22... = Flash[Z]
2534 ;; Clobbers: __tmp_reg__
2536 #if (defined (L_load_3) \
2537 || defined (L_load_4)) \
2538 && !defined (__AVR_HAVE_LPMX__)
2546 .macro .load dest, n
2549 .if \dest != D0+\n-1
2556 #if defined (L_load_3)
2563 #endif /* L_load_3 */
2565 #if defined (L_load_4)
2573 #endif /* L_load_4 */
2575 #endif /* L_load_3 || L_load_3 */
2576 #endif /* !defined (__AVR_TINY__) */
2578 #if !defined (__AVR_TINY__)
2579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2580 ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2581 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2582 ;; Clobbers: __tmp_reg__, R21, R30, R31
2584 #if (defined (L_xload_1) \
2585 || defined (L_xload_2) \
2586 || defined (L_xload_3) \
2587 || defined (L_xload_4))
2595 ;; Register containing bits 16+ of the address
2599 .macro .xload dest, n
2600 #if defined (__AVR_HAVE_ELPMX__)
2602 #elif defined (__AVR_HAVE_ELPM__)
2605 .if \dest != D0+\n-1
2607 adc HHI8, __zero_reg__
2610 #elif defined (__AVR_HAVE_LPMX__)
2615 .if \dest != D0+\n-1
2619 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2620 .if \dest == D0+\n-1
2621 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2622 out __RAMPZ__, __zero_reg__
2627 #if defined (L_xload_1)
2629 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2638 #if defined (__AVR_HAVE_ELPM__)
2640 #endif /* __AVR_HAVE_ELPM__ */
2645 #endif /* LPMx && ! ELPM */
2647 #endif /* L_xload_1 */
2649 #if defined (L_xload_2)
2653 #if defined (__AVR_HAVE_ELPM__)
2655 #endif /* __AVR_HAVE_ELPM__ */
2663 #endif /* L_xload_2 */
2665 #if defined (L_xload_3)
2669 #if defined (__AVR_HAVE_ELPM__)
2671 #endif /* __AVR_HAVE_ELPM__ */
2681 #endif /* L_xload_3 */
2683 #if defined (L_xload_4)
2687 #if defined (__AVR_HAVE_ELPM__)
2689 #endif /* __AVR_HAVE_ELPM__ */
2701 #endif /* L_xload_4 */
2703 #endif /* L_xload_{1|2|3|4} */
2704 #endif /* if !defined (__AVR_TINY__) */
2706 #if !defined (__AVR_TINY__)
2707 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2708 ;; memcopy from Address Space __pgmx to RAM
2709 ;; R23:Z = Source Address
2710 ;; X = Destination Address
2711 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2713 #if defined (L_movmemx)
2719 ;; #Bytes to copy fity in 8 Bits (1..255)
2720 ;; Zero-extend Loop Counter
2733 #if defined (__AVR_HAVE_ELPM__)
2737 0: ;; Load 1 Byte from Flash...
2739 #if defined (__AVR_HAVE_ELPMX__)
2741 #elif defined (__AVR_HAVE_ELPM__)
2744 adc HHI8, __zero_reg__
2746 #elif defined (__AVR_HAVE_LPMX__)
2753 ;; ...and store that Byte to RAM Destination
2757 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2758 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2759 out __RAMPZ__, __zero_reg__
2760 #endif /* ELPM && RAMPD */
2765 1: ;; Read 1 Byte from RAM...
2767 ;; and store that Byte to RAM Destination
2777 #endif /* L_movmemx */
2778 #endif /* !defined (__AVR_TINY__) */
2781 .section .text.libgcc.builtins, "ax", @progbits
2783 /**********************************
2784 * Find first set Bit (ffs)
2785 **********************************/
2787 #if defined (L_ffssi2)
2788 ;; find first set bit
2789 ;; r25:r24 = ffs32 (r25:r22)
2790 ;; clobbers: r22, r26
2808 #endif /* defined (L_ffssi2) */
2810 #if defined (L_ffshi2)
2811 ;; find first set bit
2812 ;; r25:r24 = ffs16 (r25:r24)
2816 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2817 ;; Some cores have problem skipping 2-word instruction
2821 cpse r24, __zero_reg__
2822 #endif /* __AVR_HAVE_JMP_CALL__ */
2823 1: XJMP __loop_ffsqi2
2829 #endif /* defined (L_ffshi2) */
2831 #if defined (L_loop_ffsqi2)
2832 ;; Helper for ffshi2, ffssi2
2833 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2844 #endif /* defined (L_loop_ffsqi2) */
2847 /**********************************
2848 * Count trailing Zeros (ctz)
2849 **********************************/
2851 #if defined (L_ctzsi2)
2852 ;; count trailing zeros
2853 ;; r25:r24 = ctz32 (r25:r22)
2854 ;; clobbers: r26, r22
2856 ;; Note that ctz(0) in undefined for GCC
2862 #endif /* defined (L_ctzsi2) */
2864 #if defined (L_ctzhi2)
2865 ;; count trailing zeros
2866 ;; r25:r24 = ctz16 (r25:r24)
2869 ;; Note that ctz(0) in undefined for GCC
2875 #endif /* defined (L_ctzhi2) */
2878 /**********************************
2879 * Count leading Zeros (clz)
2880 **********************************/
2882 #if defined (L_clzdi2)
2883 ;; count leading zeros
2884 ;; r25:r24 = clz64 (r25:r18)
2885 ;; clobbers: r22, r23, r26
2898 #endif /* defined (L_clzdi2) */
2900 #if defined (L_clzsi2)
2901 ;; count leading zeros
2902 ;; r25:r24 = clz32 (r25:r22)
2914 #endif /* defined (L_clzsi2) */
2916 #if defined (L_clzhi2)
2917 ;; count leading zeros
2918 ;; r25:r24 = clz16 (r25:r24)
2940 #endif /* defined (L_clzhi2) */
2943 /**********************************
2945 **********************************/
2947 #if defined (L_paritydi2)
2948 ;; r25:r24 = parity64 (r25:r18)
2949 ;; clobbers: __tmp_reg__
2957 #endif /* defined (L_paritydi2) */
2959 #if defined (L_paritysi2)
2960 ;; r25:r24 = parity32 (r25:r22)
2961 ;; clobbers: __tmp_reg__
2967 #endif /* defined (L_paritysi2) */
2969 #if defined (L_parityhi2)
2970 ;; r25:r24 = parity16 (r25:r24)
2971 ;; clobbers: __tmp_reg__
2977 ;; r25:r24 = parity8 (r24)
2978 ;; clobbers: __tmp_reg__
2980 ;; parity is in r24[0..7]
2981 mov __tmp_reg__, r24
2983 eor r24, __tmp_reg__
2984 ;; parity is in r24[0..3]
2988 ;; parity is in r24[0,3]
2991 ;; parity is in r24[0]
2996 #endif /* defined (L_parityhi2) */
2999 /**********************************
3001 **********************************/
3003 #if defined (L_popcounthi2)
3005 ;; r25:r24 = popcount16 (r25:r24)
3006 ;; clobbers: __tmp_reg__
3016 DEFUN __popcounthi2_tail
3018 add r24, __tmp_reg__
3020 ENDF __popcounthi2_tail
3021 #endif /* defined (L_popcounthi2) */
3023 #if defined (L_popcountsi2)
3025 ;; r25:r24 = popcount32 (r25:r22)
3026 ;; clobbers: __tmp_reg__
3033 XJMP __popcounthi2_tail
3035 #endif /* defined (L_popcountsi2) */
3037 #if defined (L_popcountdi2)
3039 ;; r25:r24 = popcount64 (r25:r18)
3040 ;; clobbers: r22, r23, __tmp_reg__
3049 XJMP __popcounthi2_tail
3051 #endif /* defined (L_popcountdi2) */
3053 #if defined (L_popcountqi2)
3055 ;; r24 = popcount8 (r24)
3056 ;; clobbers: __tmp_reg__
3058 mov __tmp_reg__, r24 ; oeoeoeoe
3059 andi r24, 0xAA ; o0o0o0o0
3061 ;; Four values 0, 1 or 2: # bits set o+e
3062 sub __tmp_reg__, r24 ; 44332211
3063 mov r24, __tmp_reg__ ; 44332211
3064 andi r24, 0x33 ; 00330011
3065 eor __tmp_reg__, r24 ; 44002200
3066 lsr __tmp_reg__ ; 04400220
3067 lsr __tmp_reg__ ; 00440022
3068 add r24, __tmp_reg__ ; 04210421
3069 mov __tmp_reg__, r24 ; h421l421
3070 swap __tmp_reg__ ; l421h421
3071 add r24, __tmp_reg__ ; 84218421
3072 andi r24, 0xf ; 00008421 /17
3075 #endif /* defined (L_popcountqi2) */
3078 /**********************************
3080 **********************************/
3082 ;; swap two registers with different register number
3089 #if defined (L_bswapsi2)
3091 ;; r25:r22 = bswap32 (r25:r22)
3097 #endif /* defined (L_bswapsi2) */
3099 #if defined (L_bswapdi2)
3101 ;; r25:r18 = bswap64 (r25:r18)
3109 #endif /* defined (L_bswapdi2) */
3112 /**********************************
3114 **********************************/
3116 #if defined (L_ashrdi3)
3118 #define SS __zero_reg__
3120 ;; Arithmetic shift right
3121 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3128 ;; Logic shift right
3129 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3131 ;; Signs are in SS (zero_reg)
3132 mov __tmp_reg__, r16
3157 mov r16, __tmp_reg__
3163 #endif /* defined (L_ashrdi3) */
3165 #if defined (L_ashldi3)
3167 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3168 ;; This function does not clobber T.
3170 mov __tmp_reg__, r16
3193 mov r16, __tmp_reg__
3196 #endif /* defined (L_ashldi3) */
3198 #if defined (L_rotldi3)
3200 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3206 mov __tmp_reg__, r25
3214 mov r18, __tmp_reg__
3224 adc r18, __zero_reg__
3230 #endif /* defined (L_rotldi3) */
3233 .section .text.libgcc.fmul, "ax", @progbits
3235 /***********************************************************/
3236 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3237 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3238 /***********************************************************/
3244 #define A0 __tmp_reg__
3247 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3248 ;;; Clobbers: r24, r25, __tmp_reg__
3250 ;; A0.7 = negate result?
3258 #endif /* L_fmuls */
3261 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3262 ;;; Clobbers: r24, r25, __tmp_reg__
3264 ;; A0.7 = negate result?
3269 ;; Helper for __fmuls and __fmulsu
3274 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3275 ;; Some cores have problem skipping 2-word instruction
3280 #endif /* __AVR_HAVE_JMP_CALL__ */
3283 ;; C = -C iff A0.7 = 1
3287 #endif /* L_fmulsu */
3291 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3292 ;;; Clobbers: r24, r25, __tmp_reg__
3299 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3322 /**********************************
3324 **********************************/
3326 #if defined (L_powif)
3327 #ifndef __AVR_TINY__
3329 ;; float output and arg #1
3341 ;; float X: input and iterated squares
3347 ;; float Y: expand result
3354 ;; .0 == 0 => Y = 1.0f implicitly.
3358 ;;; Integer exponent input.
3362 #define ONE 0x3f800000
3365 ;; Save 11 Registers: R9...R17, R28, R29
3366 do_prologue_saves 11
3368 ;; Fill local vars with input parameters.
3371 ;; Save sign of exponent for later.
3378 ;; Y := (I % 2) ? X : 1.0f
3379 ;; (When we come from below, this is like SET, i.e. Flags.Y_set := 1).
3381 ;; Flags.Y_set = false means that we have to assume Y = 1.0f below.
3383 2: ;; We have A == X when we come from above.
3395 #ifdef __WITH_AVRLIBC__
3400 #endif /* Have AVR-LibC? */
3403 ;; if (I % 2 == 1) Y := Y * X
3407 ;; When Y is not set => Y := Y * X = 1.0f * X (= A)
3408 ;; Plus, we have to set Y_set = 1 (= I0.0)
3410 ;; Y is already set: Y := X * Y (= A * Y)
3424 ;; When Y is still not set, the result is 1.0f (= A).
3428 ;; if (I was < 0) Y = 1.0f / Y
3434 1: ;; A := 1 / Y = A / Y
3439 do_epilogue_restores 11
3466 #endif /* __AVR_TINY__ */
3467 #endif /* L_powif */
3469 #include "lib1funcs-fixed.S"