1 ! SPDX-License-Identifier: GPL-2.0
2 ! Copyright (C) 2008-2012 Imagination Technologies Ltd.
13 MOV A1.2, D0Ar2 ! source pointer
14 MOV A0.2, D1Ar1 ! destination pointer
15 MOV A0.3, D1Ar1 ! for return value
16 ! If there are less than 16 bytes to copy use the byte copy loop
20 ! Simply copy a byte at a time
29 ! Finally set return value and return
34 ANDS D1Ar5, D1Ar1, #7 ! test destination alignment
37 ! The destination address is not 8 byte aligned. We will copy bytes from
38 ! the source to the destination until the remaining data has an 8 byte
39 ! destination address alignment (i.e we should never copy more than 7
43 ADD D1Ar5, D1Ar5, #1 ! dest is aligned when D1Ar5 reaches #8
44 SUB D1Ar3, D1Ar3, #1 ! decrement count of remaining bytes
49 ! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte
50 ! blocks, then jump to the unaligned copy loop or fall through to the aligned
51 ! copy loop as appropriate.
54 LSR D1Ar5, D1Ar3, #3 ! D1Ar5 = number of 8 byte blocks
55 ANDS D0Ar4, D0Ar4, #7 ! test source alignment
56 BNZ $Lunaligned_copy ! if unaligned, use unaligned copy loop
58 ! Both source and destination are 8 byte aligned - the easy case.
60 LSRS D1Ar5, D1Ar3, #5 ! D1Ar5 = number of 32 byte blocks
65 GETL D0Re0, D1Re0, [A1.2++]
66 GETL D0Ar6, D1Ar5, [A1.2++]
67 SETL [A0.2++], D0Re0, D1Re0
68 SETL [A0.2++], D0Ar6, D1Ar5
69 GETL D0Re0, D1Re0, [A1.2++]
70 GETL D0Ar6, D1Ar5, [A1.2++]
71 SETL [A0.2++], D0Re0, D1Re0
72 SETL [A0.2++], D0Ar6, D1Ar5
75 ! If there are any remaining bytes use the byte copy loop, otherwise we are done
76 ANDS D1Ar3, D1Ar3, #0x1f
80 ! The destination is 8 byte aligned but the source is not, and there are 8
81 ! or more bytes to be copied.
83 ! Adjust the source pointer (A1.2) to the 8 byte boundary before its
87 ANDMB D0Ar4, D0Ar4, #0xfff8
89 ! Save the number of bytes of mis-alignment in D0Ar4 for use later
90 SUBS D0Ar6, D0Ar6, D0Ar4
92 ! if there is no mis-alignment after all, use the aligned copy loop
96 GETL D0Re0, D1Re0, [A1.2]
100 ! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly
101 ! 4 bytes, and more than 4 bytes.
103 BLT $Lunaligned_1_2_3 ! use 1-3 byte mis-alignment loop
104 BZ $Lunaligned_4 ! use 4 byte mis-alignment loop
106 ! The mis-alignment is more than 4 bytes
109 ! Calculate the bit offsets required for the shift operations necesssary
111 ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
112 MULW D0Ar6, D0Ar6, #8
114 SUB D1Ar5, D1Ar5, D0Ar6
115 ! Move data 4 bytes before we enter the main loop
119 GETL D0Ar2, D1Ar1, [++A1.2]
120 ! form 64-bit data in D0Re0, D1Re0
121 LSR D0Re0, D0Re0, D0Ar6
123 LSL D1Re0, D1Re0, D1Ar5
124 ADD D0Re0, D0Re0, D1Re0
126 LSR D0Ar2, D0Ar2, D0Ar6
127 LSL D1Re0, D1Ar1, D1Ar5
128 ADD D1Re0, D1Re0, D0Ar2
130 SETL [A0.2++], D0Re0, D1Re0
137 ! Calculate the bit offsets required for the shift operations necesssary
139 ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
140 MULW D0Ar6, D0Ar6, #8
142 SUB D1Ar5, D1Ar5, D0Ar6
145 ! form 64-bit data in D0Re0,D1Re0
146 LSR D0Re0, D0Re0, D0Ar6
147 LSL D1Ar1, D1Re0, D1Ar5
148 ADD D0Re0, D0Re0, D1Ar1
150 LSR D0FrT, D0Ar2, D0Ar6
151 GETL D0Ar2, D1Ar1, [++A1.2]
154 LSL D1Re0, D1Re0, D1Ar5
155 ADD D1Re0, D1Re0, D0FrT
157 SETL [A0.2++], D0Re0, D1Re0
164 ! The 4 byte mis-alignment case - this does not require any shifting, just a
165 ! shuffling of registers.
169 GETL D0Ar2, D1Ar1, [++A1.2]
171 SETL [A0.2++], D0Re0, D1Re0
176 ! If there are no remaining bytes to copy, we are done.
177 ANDS D1Ar3, D1Ar3, #7
179 ! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte
180 ! address of the remaining bytes, and fall through to the byte copy loop.
182 ADD D1Ar5, D0Ar4, D0Ar6
186 .size _memcpy,.-_memcpy