1 /* SPDX-License-Identifier: GPL-2.0 */
2 /* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
3 /* Modified by SuperH, Inc. September 2003 */
7 ! by Toshiyasu Morita (tm@netcom.com)
8 ! hacked by J"orn Rernnecke (joern.rennecke@superh.com) ("o for o-umlaut)
9 ! SH5 code Copyright 2002 SuperH Ltd.
11 ! Entry: ARG0: destination pointer
12 ! ARG1: source pointer
15 ! Exit: RESULT: destination pointer
16 ! any other registers in the range r0-r7: trashed
18 ! Notes: Usually one wants to do small reads and write a longword, but
19 ! unfortunately it is difficult in some cases to concatanate bytes
20 ! into a longword on the SH, so this does a longword read and small
23 ! This implementation makes two assumptions about how it is called:
25 ! 1.: If the byte count is nonzero, the address of the last byte to be
26 ! copied is unsigned greater than the address of the first byte to
27 ! be copied. This could be easily swapped for a signed comparison,
28 ! but the algorithm used needs some comparison.
30 ! 2.: When there are two or three bytes in the last word of an 11-or-more
31 ! bytes memory chunk to b copied, the rest of the word can be read
32 ! without side effects.
33 ! This could be easily changed by increasing the minimum size of
34 ! a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2,
35 ! however, this would cost a few extra cyles on average.
36 ! For SHmedia, the assumption is that any quadword can be read in its
37 ! enirety if at least one byte is included in the copy.
40 .section .text..SHmedia32,"ax"
42 .type memcpy, @function
47 #define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1
48 #define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1
49 #define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1
50 #define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1
58 movi (L1-L0+63*32 + 1) & 0xffff,r1
66 /* Rearranged to make cut2 safe */
68 L4_7: /* 4..7 byte memcpy cntd. */
76 L1: /* 0 byte memcpy */
84 L2_3: /* 2 or 3 byte memcpy cntd. */
93 L8_15: /* 8..15 byte memcpy cntd. */
100 /* 2 or 3 byte memcpy */
110 /* 4 .. 7 byte memcpy */
111 LDUAL (r3, 0, r0, r1)
119 /* 8 .. 15 byte memcpy */
120 LDUAQ (r3, 0, r0, r1)
128 /* 16 .. 24 byte memcpy */
129 LDUAQ (r3, 0, r0, r1)
130 LDUAQ (r3, 8, r8, r9)
153 movi 64+8, r27 // could subtract r7 from that.
202 .size memcpy,.-memcpy