1 /* $NetBSD: memcpy_arm.S,v 1.4 2013/08/11 04:56:32 matt Exp $ */
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include <machine/asm.h>
35 #if defined(__ARM_EABI__)
36 STRONG_ALIAS(__aeabi_memcpy, memcpy)
38 #endif /* !defined(__minix) */
41 * This is one fun bit of code ...
42 * Some easy listening music is suggested while trying to understand this
43 * code e.g. Iron Maiden
45 * For anyone attempting to understand it :
47 * The core code is implemented here with simple stubs for memcpy().
49 * All local labels are prefixed with Lmemcpy_
50 * Following the prefix a label starting f is used in the forward copy code
51 * while a label using b is used in the backwards copy code
52 * The source and destination addresses determine whether a forward or
53 * backward copy is performed.
54 * Separate bits of code are used to deal with the following situations
55 * for both the forward and backwards copy.
56 * unaligned source address
57 * unaligned destination address
58 * Separate copy routines are used to produce an optimised result for each
60 * The copy code will use LDM/STM instructions to copy up to 32 bytes at
61 * a time where possible.
63 * Note: r12 (aka ip) can be trashed during the function along with
64 * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
65 * Additional registers are preserved prior to use i.e. r4, r5 & lr
67 * Apologies for the state of the comments ;-)
70 /* For MINIX, we always spill r0, r4, r5, and lr, so we can easily
71 * clean up the stack after a phys_copy fault. NetBSD, in contrast,
72 * spills the minimum number of registers for each path.
75 /* LINTSTUB: Func: void *phys_copy(void *src, void *dst, size_t len) */
77 /* switch the source and destination registers */
82 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
85 /* save leaf functions having to store this away */
87 push {r0, r4, r5, lr} /* memcpy() returns dest addr */
89 push {r0, lr} /* memcpy() returns dest addr */
93 blt .Lmemcpy_l4 /* less than 4 bytes */
95 bne .Lmemcpy_destul /* oh unaligned destination addr */
97 bne .Lmemcpy_srcul /* oh unaligned source addr */
100 /* We have aligned source and destination */
102 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
104 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
105 #if !defined(__minix)
106 push {r4} /* borrow r4 */
109 /* blat 32 bytes at a time */
110 /* XXX for really big copies perhaps we should use more registers */
112 ldmia r1!, {r3, r4, r12, lr}
113 stmia r0!, {r3, r4, r12, lr}
114 ldmia r1!, {r3, r4, r12, lr}
115 stmia r0!, {r3, r4, r12, lr}
120 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
121 stmiage r0!, {r3, r4, r12, lr}
123 #if !defined(__minix)
124 pop {r4} /* return r4 */
130 /* blat 12 bytes at a time */
132 ldmiage r1!, {r3, r12, lr}
133 stmiage r0!, {r3, r12, lr}
144 ldmiage r1!, {r3, r12}
145 stmiage r0!, {r3, r12}
149 /* less than 4 bytes to go */
157 ldmiaeq sp!, {r0, pc}^ /* done */
159 popeq {r0, pc} /* done */
162 /* copy the crud byte at a time */
178 /* erg - unaligned destination */
183 /* align destination with byte copies */
191 blt .Lmemcpy_l4 /* less the 4 bytes */
194 beq .Lmemcpy_t8 /* we have an aligned source */
196 /* erg - unaligned source */
197 /* This is where it gets nasty ... */
205 blt .Lmemcpy_srcul1loop4
207 #if !defined(__minix)
211 .Lmemcpy_srcul1loop16:
217 ldmia r1!, {r4, r5, r12, lr}
219 orr r3, r3, r4, lsr #24
221 orr r4, r4, r5, lsr #24
223 orr r5, r5, r12, lsr #24
225 orr r12, r12, lr, lsr #24
227 orr r3, r3, r4, lsl #24
229 orr r4, r4, r5, lsl #24
231 orr r5, r5, r12, lsl #24
233 orr r12, r12, lr, lsl #24
235 stmia r0!, {r3-r5, r12}
237 bge .Lmemcpy_srcul1loop16
238 #if !defined(__minix)
242 blt .Lmemcpy_srcul1l4
244 .Lmemcpy_srcul1loop4:
252 orr r12, r12, lr, lsr #24
254 orr r12, r12, lr, lsl #24
258 bge .Lmemcpy_srcul1loop4
266 blt .Lmemcpy_srcul2loop4
268 #if !defined(__minix)
272 .Lmemcpy_srcul2loop16:
278 ldmia r1!, {r4, r5, r12, lr}
280 orr r3, r3, r4, lsr #16
282 orr r4, r4, r5, lsr #16
284 orr r5, r5, r12, lsr #16
285 mov r12, r12, lsl #16
286 orr r12, r12, lr, lsr #16
288 orr r3, r3, r4, lsl #16
290 orr r4, r4, r5, lsl #16
292 orr r5, r5, r12, lsl #16
293 mov r12, r12, lsr #16
294 orr r12, r12, lr, lsl #16
296 stmia r0!, {r3-r5, r12}
298 bge .Lmemcpy_srcul2loop16
299 #if !defined(__minix)
303 blt .Lmemcpy_srcul2l4
305 .Lmemcpy_srcul2loop4:
313 orr r12, r12, lr, lsr #16
315 orr r12, r12, lr, lsl #16
319 bge .Lmemcpy_srcul2loop4
327 blt .Lmemcpy_srcul3loop4
329 #if !defined(__minix)
333 .Lmemcpy_srcul3loop16:
339 ldmia r1!, {r4, r5, r12, lr}
341 orr r3, r3, r4, lsr #8
343 orr r4, r4, r5, lsr #8
345 orr r5, r5, r12, lsr #8
346 mov r12, r12, lsl #24
347 orr r12, r12, lr, lsr #8
349 orr r3, r3, r4, lsl #8
351 orr r4, r4, r5, lsl #8
353 orr r5, r5, r12, lsl #8
354 mov r12, r12, lsr #24
355 orr r12, r12, lr, lsl #8
357 stmia r0!, {r3-r5, r12}
359 bge .Lmemcpy_srcul3loop16
360 #if !defined(__minix)
364 blt .Lmemcpy_srcul3l4
366 .Lmemcpy_srcul3loop4:
374 orr r12, r12, lr, lsr #8
376 orr r12, r12, lr, lsl #8
380 bge .Lmemcpy_srcul3loop4
387 LABEL(phys_copy_fault) /* kernel can send us here */
391 LABEL(phys_copy_fault_in_kernel) /* kernel can send us here */
393 mrc p15, 0, r0, c6, c0, 0 /* Read DFAR */