1 ! SPDX-License-Identifier: GPL-2.0
5 ! by Toshiyasu Morita (tm@netcom.com)
6 ! hacked by J"orn Rernnecke (joern.rennecke@superh.com) ("o for o-umlaut)
7 ! SH5 code Copyright 2002 SuperH Ltd.
9 ! Entry: ARG0: destination pointer
10 ! ARG1: source pointer
13 ! Exit: RESULT: destination pointer
14 ! any other registers in the range r0-r7: trashed
16 ! Notes: Usually one wants to do small reads and write a longword, but
17 ! unfortunately it is difficult in some cases to concatanate bytes
18 ! into a longword on the SH, so this does a longword read and small
21 ! This implementation makes two assumptions about how it is called:
23 ! 1.: If the byte count is nonzero, the address of the last byte to be
24 ! copied is unsigned greater than the address of the first byte to
25 ! be copied. This could be easily swapped for a signed comparison,
26 ! but the algorithm used needs some comparison.
28 ! 2.: When there are two or three bytes in the last word of an 11-or-more
29 ! bytes memory chunk to b copied, the rest of the word can be read
30 ! without side effects.
31 ! This could be easily changed by increasing the minimum size of
32 ! a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2,
33 ! however, this would cost a few extra cyles on average.
34 ! For SHmedia, the assumption is that any quadword can be read in its
35 ! enirety if at least one byte is included in the copy.
37 /* Imported into Linux kernel by Richard Curnow. This is used to implement the
38 __copy_user function in the general case, so it has to be a distinct
39 function from intra-kernel memcpy to allow for exception fix-ups in the
40 event that the user pointer is bad somewhere in the copy (e.g. due to
41 running off the end of the vma).
43 Note, this algorithm will be slightly wasteful in the case where the source
44 and destination pointers are equally aligned, because the stlo/sthi pairs
45 could then be merged back into single stores. If there are a lot of cache
46 misses, this is probably offset by the stall lengths on the preloads.
50 /* NOTE : Prefetches removed and allocos guarded by synco to avoid TAKum03020
51 * erratum. The first two prefetches are nop-ed out to avoid upsetting the
52 * instruction counts used in the jump address calculation.
55 .section .text..SHmedia32,"ax"
58 .global copy_user_memcpy
59 .global copy_user_memcpy_end
62 #define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1
63 #define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1
64 #define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1
65 #define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1
67 nop ! ld.b r3,0,r63 ! TAKum03020
73 movi (L1-L0+63*32 + 1) & 0xffff,r1
81 /* Rearranged to make cut2 safe */
83 L4_7: /* 4..7 byte memcpy cntd. */
91 L1: /* 0 byte memcpy */
99 L2_3: /* 2 or 3 byte memcpy cntd. */
108 L8_15: /* 8..15 byte memcpy cntd. */
115 /* 2 or 3 byte memcpy */
117 nop ! ld.b r2,0,r63 ! TAKum03020
125 /* 4 .. 7 byte memcpy */
126 LDUAL (r3, 0, r0, r1)
134 /* 8 .. 15 byte memcpy */
135 LDUAQ (r3, 0, r0, r1)
143 /* 16 .. 24 byte memcpy */
144 LDUAQ (r3, 0, r0, r1)
145 LDUAQ (r3, 8, r8, r9)
160 ! ld.b r2, 0, r63 ! TAKum03020
168 movi 64+8, r27 ! could subtract r7 from that.
182 ! ldx.q r22, r36, r63 ! TAKum03020
217 copy_user_memcpy_end: