mkfs: move directory entry manipulation
[minix.git] / common / lib / libc / arch / arm / string / memcpy_xscale.S
blob3e157d075d663b6bc66d947b0dc1fa419ede60f6
1 /*      $NetBSD: memcpy_xscale.S,v 1.2 2007/06/21 21:37:04 scw Exp $    */
3 /*
4  * Copyright 2003 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Steve C. Woodford for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
38 #include <machine/asm.h>
40 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
41 ENTRY(memcpy)
42         pld     [r1]
43         cmp     r2, #0x0c
44         ble     .Lmemcpy_short          /* <= 12 bytes */
45         mov     r3, r0                  /* We must not clobber r0 */
47         /* Word-align the destination buffer */
48         ands    ip, r3, #0x03           /* Already word aligned? */
49         beq     .Lmemcpy_wordaligned    /* Yup */
50         cmp     ip, #0x02
51         ldrb    ip, [r1], #0x01
52         sub     r2, r2, #0x01
53         strb    ip, [r3], #0x01
54         ldrleb  ip, [r1], #0x01
55         suble   r2, r2, #0x01
56         strleb  ip, [r3], #0x01
57         ldrltb  ip, [r1], #0x01
58         sublt   r2, r2, #0x01
59         strltb  ip, [r3], #0x01
61         /* Destination buffer is now word aligned */
62 .Lmemcpy_wordaligned:
63         ands    ip, r1, #0x03           /* Is src also word-aligned? */
64         bne     .Lmemcpy_bad_align      /* Nope. Things just got bad */
66         /* Quad-align the destination buffer */
67         tst     r3, #0x07               /* Already quad aligned? */
68         ldrne   ip, [r1], #0x04
69         stmfd   sp!, {r4-r9}            /* Free up some registers */
70         subne   r2, r2, #0x04
71         strne   ip, [r3], #0x04
73         /* Destination buffer quad aligned, source is at least word aligned */
74         subs    r2, r2, #0x80
75         blt     .Lmemcpy_w_lessthan128
77         /* Copy 128 bytes at a time */
78 .Lmemcpy_w_loop128:
79         ldr     r4, [r1], #0x04         /* LD:00-03 */
80         ldr     r5, [r1], #0x04         /* LD:04-07 */
81         pld     [r1, #0x18]             /* Prefetch 0x20 */
82         ldr     r6, [r1], #0x04         /* LD:08-0b */
83         ldr     r7, [r1], #0x04         /* LD:0c-0f */
84         ldr     r8, [r1], #0x04         /* LD:10-13 */
85         ldr     r9, [r1], #0x04         /* LD:14-17 */
86         strd    r4, [r3], #0x08         /* ST:00-07 */
87         ldr     r4, [r1], #0x04         /* LD:18-1b */
88         ldr     r5, [r1], #0x04         /* LD:1c-1f */
89         strd    r6, [r3], #0x08         /* ST:08-0f */
90         ldr     r6, [r1], #0x04         /* LD:20-23 */
91         ldr     r7, [r1], #0x04         /* LD:24-27 */
92         pld     [r1, #0x18]             /* Prefetch 0x40 */
93         strd    r8, [r3], #0x08         /* ST:10-17 */
94         ldr     r8, [r1], #0x04         /* LD:28-2b */
95         ldr     r9, [r1], #0x04         /* LD:2c-2f */
96         strd    r4, [r3], #0x08         /* ST:18-1f */
97         ldr     r4, [r1], #0x04         /* LD:30-33 */
98         ldr     r5, [r1], #0x04         /* LD:34-37 */
99         strd    r6, [r3], #0x08         /* ST:20-27 */
100         ldr     r6, [r1], #0x04         /* LD:38-3b */
101         ldr     r7, [r1], #0x04         /* LD:3c-3f */
102         strd    r8, [r3], #0x08         /* ST:28-2f */
103         ldr     r8, [r1], #0x04         /* LD:40-43 */
104         ldr     r9, [r1], #0x04         /* LD:44-47 */
105         pld     [r1, #0x18]             /* Prefetch 0x60 */
106         strd    r4, [r3], #0x08         /* ST:30-37 */
107         ldr     r4, [r1], #0x04         /* LD:48-4b */
108         ldr     r5, [r1], #0x04         /* LD:4c-4f */
109         strd    r6, [r3], #0x08         /* ST:38-3f */
110         ldr     r6, [r1], #0x04         /* LD:50-53 */
111         ldr     r7, [r1], #0x04         /* LD:54-57 */
112         strd    r8, [r3], #0x08         /* ST:40-47 */
113         ldr     r8, [r1], #0x04         /* LD:58-5b */
114         ldr     r9, [r1], #0x04         /* LD:5c-5f */
115         strd    r4, [r3], #0x08         /* ST:48-4f */
116         ldr     r4, [r1], #0x04         /* LD:60-63 */
117         ldr     r5, [r1], #0x04         /* LD:64-67 */
118         pld     [r1, #0x18]             /* Prefetch 0x80 */
119         strd    r6, [r3], #0x08         /* ST:50-57 */
120         ldr     r6, [r1], #0x04         /* LD:68-6b */
121         ldr     r7, [r1], #0x04         /* LD:6c-6f */
122         strd    r8, [r3], #0x08         /* ST:58-5f */
123         ldr     r8, [r1], #0x04         /* LD:70-73 */
124         ldr     r9, [r1], #0x04         /* LD:74-77 */
125         strd    r4, [r3], #0x08         /* ST:60-67 */
126         ldr     r4, [r1], #0x04         /* LD:78-7b */
127         ldr     r5, [r1], #0x04         /* LD:7c-7f */
128         strd    r6, [r3], #0x08         /* ST:68-6f */
129         strd    r8, [r3], #0x08         /* ST:70-77 */
130         subs    r2, r2, #0x80
131         strd    r4, [r3], #0x08         /* ST:78-7f */
132         bge     .Lmemcpy_w_loop128
134 .Lmemcpy_w_lessthan128:
135         adds    r2, r2, #0x80           /* Adjust for extra sub */
136         ldmeqfd sp!, {r4-r9}
137         bxeq    lr                      /* Return now if done */
138         subs    r2, r2, #0x20
139         blt     .Lmemcpy_w_lessthan32
141         /* Copy 32 bytes at a time */
142 .Lmemcpy_w_loop32:
143         ldr     r4, [r1], #0x04
144         ldr     r5, [r1], #0x04
145         pld     [r1, #0x18]
146         ldr     r6, [r1], #0x04
147         ldr     r7, [r1], #0x04
148         ldr     r8, [r1], #0x04
149         ldr     r9, [r1], #0x04
150         strd    r4, [r3], #0x08
151         ldr     r4, [r1], #0x04
152         ldr     r5, [r1], #0x04
153         strd    r6, [r3], #0x08
154         strd    r8, [r3], #0x08
155         subs    r2, r2, #0x20
156         strd    r4, [r3], #0x08
157         bge     .Lmemcpy_w_loop32
159 .Lmemcpy_w_lessthan32:
160         adds    r2, r2, #0x20           /* Adjust for extra sub */
161         ldmeqfd sp!, {r4-r9}
162         bxeq    lr                      /* Return now if done */
164         and     r4, r2, #0x18
165         rsbs    r4, r4, #0x18
166         addne   pc, pc, r4, lsl #1
167         nop
169         /* At least 24 bytes remaining */
170         ldr     r4, [r1], #0x04
171         ldr     r5, [r1], #0x04
172         sub     r2, r2, #0x08
173         strd    r4, [r3], #0x08
175         /* At least 16 bytes remaining */
176         ldr     r4, [r1], #0x04
177         ldr     r5, [r1], #0x04
178         sub     r2, r2, #0x08
179         strd    r4, [r3], #0x08
181         /* At least 8 bytes remaining */
182         ldr     r4, [r1], #0x04
183         ldr     r5, [r1], #0x04
184         subs    r2, r2, #0x08
185         strd    r4, [r3], #0x08
187         /* Less than 8 bytes remaining */
188         ldmfd   sp!, {r4-r9}
189         bxeq    lr                      /* Return now if done */
190         subs    r2, r2, #0x04
191         ldrge   ip, [r1], #0x04
192         strge   ip, [r3], #0x04
193         bxeq    lr                      /* Return now if done */
194         addlt   r2, r2, #0x04
195         ldrb    ip, [r1], #0x01
196         cmp     r2, #0x02
197         ldrgeb  r2, [r1], #0x01
198         strb    ip, [r3], #0x01
199         ldrgtb  ip, [r1]
200         strgeb  r2, [r3], #0x01
201         strgtb  ip, [r3]
202         bx      lr
206  * At this point, it has not been possible to word align both buffers.
207  * The destination buffer is word aligned, but the source buffer is not.
208  */
209 .Lmemcpy_bad_align:
210         stmfd   sp!, {r4-r7}
211         bic     r1, r1, #0x03
212         cmp     ip, #2
213         ldr     ip, [r1], #0x04
214         bgt     .Lmemcpy_bad3
215         beq     .Lmemcpy_bad2
216         b       .Lmemcpy_bad1
218 .Lmemcpy_bad1_loop16:
219 #ifdef __ARMEB__
220         mov     r4, ip, lsl #8
221 #else
222         mov     r4, ip, lsr #8
223 #endif
224         ldr     r5, [r1], #0x04
225         pld     [r1, #0x018]
226         ldr     r6, [r1], #0x04
227         ldr     r7, [r1], #0x04
228         ldr     ip, [r1], #0x04
229 #ifdef __ARMEB__
230         orr     r4, r4, r5, lsr #24
231         mov     r5, r5, lsl #8
232         orr     r5, r5, r6, lsr #24
233         mov     r6, r6, lsl #8
234         orr     r6, r6, r7, lsr #24
235         mov     r7, r7, lsl #8
236         orr     r7, r7, ip, lsr #24
237 #else
238         orr     r4, r4, r5, lsl #24
239         mov     r5, r5, lsr #8
240         orr     r5, r5, r6, lsl #24
241         mov     r6, r6, lsr #8
242         orr     r6, r6, r7, lsl #24
243         mov     r7, r7, lsr #8
244         orr     r7, r7, ip, lsl #24
245 #endif
246         str     r4, [r3], #0x04
247         str     r5, [r3], #0x04
248         str     r6, [r3], #0x04
249         str     r7, [r3], #0x04
250         sub     r2, r2, #0x10
252 .Lmemcpy_bad1:
253         cmp     r2, #0x20
254         bge     .Lmemcpy_bad1_loop16
255         cmp     r2, #0x10
256         blt     .Lmemcpy_bad1_loop16_short
258         /* copy last 16 bytes (without preload) */
259 #ifdef __ARMEB__
260         mov     r4, ip, lsl #8
261 #else
262         mov     r4, ip, lsr #8
263 #endif
264         ldr     r5, [r1], #0x04
265         ldr     r6, [r1], #0x04
266         ldr     r7, [r1], #0x04
267         ldr     ip, [r1], #0x04
268 #ifdef __ARMEB__
269         orr     r4, r4, r5, lsr #24
270         mov     r5, r5, lsl #8
271         orr     r5, r5, r6, lsr #24
272         mov     r6, r6, lsl #8
273         orr     r6, r6, r7, lsr #24
274         mov     r7, r7, lsl #8
275         orr     r7, r7, ip, lsr #24
276 #else
277         orr     r4, r4, r5, lsl #24
278         mov     r5, r5, lsr #8
279         orr     r5, r5, r6, lsl #24
280         mov     r6, r6, lsr #8
281         orr     r6, r6, r7, lsl #24
282         mov     r7, r7, lsr #8
283         orr     r7, r7, ip, lsl #24
284 #endif
285         str     r4, [r3], #0x04
286         str     r5, [r3], #0x04
287         str     r6, [r3], #0x04
288         str     r7, [r3], #0x04
289         subs    r2, r2, #0x10
290         ldmeqfd sp!, {r4-r7}
291         bxeq    lr                      /* Return now if done */
293 .Lmemcpy_bad1_loop16_short:
294         subs    r2, r2, #0x04
295         sublt   r1, r1, #0x03
296         blt     .Lmemcpy_bad_done
298 .Lmemcpy_bad1_loop4:
299 #ifdef __ARMEB__
300         mov     r4, ip, lsl #8
301 #else
302         mov     r4, ip, lsr #8
303 #endif
304         ldr     ip, [r1], #0x04
305         subs    r2, r2, #0x04
306 #ifdef __ARMEB__
307         orr     r4, r4, ip, lsr #24
308 #else
309         orr     r4, r4, ip, lsl #24
310 #endif
311         str     r4, [r3], #0x04
312         bge     .Lmemcpy_bad1_loop4
313         sub     r1, r1, #0x03
314         b       .Lmemcpy_bad_done
316 .Lmemcpy_bad2_loop16:
317 #ifdef __ARMEB__
318         mov     r4, ip, lsl #16
319 #else
320         mov     r4, ip, lsr #16
321 #endif
322         ldr     r5, [r1], #0x04
323         pld     [r1, #0x018]
324         ldr     r6, [r1], #0x04
325         ldr     r7, [r1], #0x04
326         ldr     ip, [r1], #0x04
327 #ifdef __ARMEB__
328         orr     r4, r4, r5, lsr #16
329         mov     r5, r5, lsl #16
330         orr     r5, r5, r6, lsr #16
331         mov     r6, r6, lsl #16
332         orr     r6, r6, r7, lsr #16
333         mov     r7, r7, lsl #16
334         orr     r7, r7, ip, lsr #16
335 #else
336         orr     r4, r4, r5, lsl #16
337         mov     r5, r5, lsr #16
338         orr     r5, r5, r6, lsl #16
339         mov     r6, r6, lsr #16
340         orr     r6, r6, r7, lsl #16
341         mov     r7, r7, lsr #16
342         orr     r7, r7, ip, lsl #16
343 #endif
344         str     r4, [r3], #0x04
345         str     r5, [r3], #0x04
346         str     r6, [r3], #0x04
347         str     r7, [r3], #0x04
348         sub     r2, r2, #0x10
350 .Lmemcpy_bad2:
351         cmp     r2, #0x20
352         bge     .Lmemcpy_bad2_loop16
353         cmp     r2, #0x10
354         blt     .Lmemcpy_bad2_loop16_short
356         /* copy last 16 bytes (without preload) */
357 #ifdef __ARMEB__
358         mov     r4, ip, lsl #16
359 #else
360         mov     r4, ip, lsr #16
361 #endif
362         ldr     r5, [r1], #0x04
363         ldr     r6, [r1], #0x04
364         ldr     r7, [r1], #0x04
365         ldr     ip, [r1], #0x04
366 #ifdef __ARMEB__
367         orr     r4, r4, r5, lsr #16
368         mov     r5, r5, lsl #16
369         orr     r5, r5, r6, lsr #16
370         mov     r6, r6, lsl #16
371         orr     r6, r6, r7, lsr #16
372         mov     r7, r7, lsl #16
373         orr     r7, r7, ip, lsr #16
374 #else
375         orr     r4, r4, r5, lsl #16
376         mov     r5, r5, lsr #16
377         orr     r5, r5, r6, lsl #16
378         mov     r6, r6, lsr #16
379         orr     r6, r6, r7, lsl #16
380         mov     r7, r7, lsr #16
381         orr     r7, r7, ip, lsl #16
382 #endif
383         str     r4, [r3], #0x04
384         str     r5, [r3], #0x04
385         str     r6, [r3], #0x04
386         str     r7, [r3], #0x04
387         subs    r2, r2, #0x10
388         ldmeqfd sp!, {r4-r7}
389         bxeq    lr                      /* Return now if done */
391 .Lmemcpy_bad2_loop16_short:
392         subs    r2, r2, #0x04
393         sublt   r1, r1, #0x02
394         blt     .Lmemcpy_bad_done
396 .Lmemcpy_bad2_loop4:
397 #ifdef __ARMEB__
398         mov     r4, ip, lsl #16
399 #else
400         mov     r4, ip, lsr #16
401 #endif
402         ldr     ip, [r1], #0x04
403         subs    r2, r2, #0x04
404 #ifdef __ARMEB__
405         orr     r4, r4, ip, lsr #16
406 #else
407         orr     r4, r4, ip, lsl #16
408 #endif
409         str     r4, [r3], #0x04
410         bge     .Lmemcpy_bad2_loop4
411         sub     r1, r1, #0x02
412         b       .Lmemcpy_bad_done
414 .Lmemcpy_bad3_loop16:
415 #ifdef __ARMEB__
416         mov     r4, ip, lsl #24
417 #else
418         mov     r4, ip, lsr #24
419 #endif
420         ldr     r5, [r1], #0x04
421         pld     [r1, #0x018]
422         ldr     r6, [r1], #0x04
423         ldr     r7, [r1], #0x04
424         ldr     ip, [r1], #0x04
425 #ifdef __ARMEB__
426         orr     r4, r4, r5, lsr #8
427         mov     r5, r5, lsl #24
428         orr     r5, r5, r6, lsr #8
429         mov     r6, r6, lsl #24
430         orr     r6, r6, r7, lsr #8
431         mov     r7, r7, lsl #24
432         orr     r7, r7, ip, lsr #8
433 #else
434         orr     r4, r4, r5, lsl #8
435         mov     r5, r5, lsr #24
436         orr     r5, r5, r6, lsl #8
437         mov     r6, r6, lsr #24
438         orr     r6, r6, r7, lsl #8
439         mov     r7, r7, lsr #24
440         orr     r7, r7, ip, lsl #8
441 #endif
442         str     r4, [r3], #0x04
443         str     r5, [r3], #0x04
444         str     r6, [r3], #0x04
445         str     r7, [r3], #0x04
446         sub     r2, r2, #0x10
448 .Lmemcpy_bad3:
449         cmp     r2, #0x20
450         bge     .Lmemcpy_bad3_loop16
451         cmp     r2, #0x10
452         blt     .Lmemcpy_bad3_loop16_short
454         /* copy last 16 bytes (without preload) */
455 #ifdef __ARMEB__
456         mov     r4, ip, lsl #24
457 #else
458         mov     r4, ip, lsr #24
459 #endif
460         ldr     r5, [r1], #0x04
461         ldr     r6, [r1], #0x04
462         ldr     r7, [r1], #0x04
463         ldr     ip, [r1], #0x04
464 #ifdef __ARMEB__
465         orr     r4, r4, r5, lsr #8
466         mov     r5, r5, lsl #24
467         orr     r5, r5, r6, lsr #8
468         mov     r6, r6, lsl #24
469         orr     r6, r6, r7, lsr #8
470         mov     r7, r7, lsl #24
471         orr     r7, r7, ip, lsr #8
472 #else
473         orr     r4, r4, r5, lsl #8
474         mov     r5, r5, lsr #24
475         orr     r5, r5, r6, lsl #8
476         mov     r6, r6, lsr #24
477         orr     r6, r6, r7, lsl #8
478         mov     r7, r7, lsr #24
479         orr     r7, r7, ip, lsl #8
480 #endif
481         str     r4, [r3], #0x04
482         str     r5, [r3], #0x04
483         str     r6, [r3], #0x04
484         str     r7, [r3], #0x04
485         subs    r2, r2, #0x10
486         ldmeqfd sp!, {r4-r7}
487         bxeq    lr                      /* Return now if done */
489 .Lmemcpy_bad3_loop16_short:
490         subs    r2, r2, #0x04
491         sublt   r1, r1, #0x01
492         blt     .Lmemcpy_bad_done
494 .Lmemcpy_bad3_loop4:
495 #ifdef __ARMEB__
496         mov     r4, ip, lsl #24
497 #else
498         mov     r4, ip, lsr #24
499 #endif
500         ldr     ip, [r1], #0x04
501         subs    r2, r2, #0x04
502 #ifdef __ARMEB__
503         orr     r4, r4, ip, lsr #8
504 #else
505         orr     r4, r4, ip, lsl #8
506 #endif
507         str     r4, [r3], #0x04
508         bge     .Lmemcpy_bad3_loop4
509         sub     r1, r1, #0x01
511 .Lmemcpy_bad_done:
512         ldmfd   sp!, {r4-r7}
513         adds    r2, r2, #0x04
514         bxeq    lr
515         ldrb    ip, [r1], #0x01
516         cmp     r2, #0x02
517         ldrgeb  r2, [r1], #0x01
518         strb    ip, [r3], #0x01
519         ldrgtb  ip, [r1]
520         strgeb  r2, [r3], #0x01
521         strgtb  ip, [r3]
522         bx      lr
526  * Handle short copies (less than 16 bytes), possibly misaligned.
527  * Some of these are *very* common, thanks to the network stack,
528  * and so are handled specially.
529  */
530 .Lmemcpy_short:
531 #ifndef _STANDALONE
532         add     pc, pc, r2, lsl #2
533         nop
534         bx      lr                      /* 0x00 */
535         b       .Lmemcpy_bytewise       /* 0x01 */
536         b       .Lmemcpy_bytewise       /* 0x02 */
537         b       .Lmemcpy_bytewise       /* 0x03 */
538         b       .Lmemcpy_4              /* 0x04 */
539         b       .Lmemcpy_bytewise       /* 0x05 */
540         b       .Lmemcpy_6              /* 0x06 */
541         b       .Lmemcpy_bytewise       /* 0x07 */
542         b       .Lmemcpy_8              /* 0x08 */
543         b       .Lmemcpy_bytewise       /* 0x09 */
544         b       .Lmemcpy_bytewise       /* 0x0a */
545         b       .Lmemcpy_bytewise       /* 0x0b */
546         b       .Lmemcpy_c              /* 0x0c */
547 #endif
548 .Lmemcpy_bytewise:
549         mov     r3, r0                  /* We must not clobber r0 */
550         ldrb    ip, [r1], #0x01
551 1:      subs    r2, r2, #0x01
552         strb    ip, [r3], #0x01
553         ldrneb  ip, [r1], #0x01
554         bne     1b
555         bx      lr
557 #ifndef _STANDALONE
558 /******************************************************************************
559  * Special case for 4 byte copies
560  */
561 #define LMEMCPY_4_LOG2  6       /* 64 bytes */
562 #define LMEMCPY_4_PAD   .align LMEMCPY_4_LOG2
563         LMEMCPY_4_PAD
564 .Lmemcpy_4:
565         and     r2, r1, #0x03
566         orr     r2, r2, r0, lsl #2
567         ands    r2, r2, #0x0f
568         sub     r3, pc, #0x14
569         addne   pc, r3, r2, lsl #LMEMCPY_4_LOG2
572  * 0000: dst is 32-bit aligned, src is 32-bit aligned
573  */
574         ldr     r2, [r1]
575         str     r2, [r0]
576         bx      lr
577         LMEMCPY_4_PAD
580  * 0001: dst is 32-bit aligned, src is 8-bit aligned
581  */
582         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
583         ldr     r2, [r1, #3]            /* BE:r2 = 3xxx  LE:r2 = xxx3 */
584 #ifdef __ARMEB__
585         mov     r3, r3, lsl #8          /* r3 = 012. */
586         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
587 #else
588         mov     r3, r3, lsr #8          /* r3 = .210 */
589         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
590 #endif
591         str     r3, [r0]
592         bx      lr
593         LMEMCPY_4_PAD
596  * 0010: dst is 32-bit aligned, src is 16-bit aligned
597  */
598 #ifdef __ARMEB__
599         ldrh    r3, [r1]
600         ldrh    r2, [r1, #0x02]
601 #else
602         ldrh    r3, [r1, #0x02]
603         ldrh    r2, [r1]
604 #endif
605         orr     r3, r2, r3, lsl #16
606         str     r3, [r0]
607         bx      lr
608         LMEMCPY_4_PAD
611  * 0011: dst is 32-bit aligned, src is 8-bit aligned
612  */
613         ldr     r3, [r1, #-3]           /* BE:r3 = xxx0  LE:r3 = 0xxx */
614         ldr     r2, [r1, #1]            /* BE:r2 = 123x  LE:r2 = x321 */
615 #ifdef __ARMEB__
616         mov     r3, r3, lsl #24         /* r3 = 0... */
617         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
618 #else
619         mov     r3, r3, lsr #24         /* r3 = ...0 */
620         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
621 #endif
622         str     r3, [r0]
623         bx      lr
624         LMEMCPY_4_PAD
627  * 0100: dst is 8-bit aligned, src is 32-bit aligned
628  */
629         ldr     r2, [r1]
630 #ifdef __ARMEB__
631         strb    r2, [r0, #0x03]
632         mov     r3, r2, lsr #8
633         mov     r1, r2, lsr #24
634         strb    r1, [r0]
635 #else
636         strb    r2, [r0]
637         mov     r3, r2, lsr #8
638         mov     r1, r2, lsr #24
639         strb    r1, [r0, #0x03]
640 #endif
641         strh    r3, [r0, #0x01]
642         bx      lr
643         LMEMCPY_4_PAD
646  * 0101: dst is 8-bit aligned, src is 8-bit aligned
647  */
648         ldrb    r2, [r1]
649         ldrh    r3, [r1, #0x01]
650         ldrb    r1, [r1, #0x03]
651         strb    r2, [r0]
652         strh    r3, [r0, #0x01]
653         strb    r1, [r0, #0x03]
654         bx      lr
655         LMEMCPY_4_PAD
658  * 0110: dst is 8-bit aligned, src is 16-bit aligned
659  */
660         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
661         ldrh    r3, [r1, #0x02]         /* LE:r3 = ..23  LE:r3 = ..32 */
662 #ifdef __ARMEB__
663         mov     r1, r2, lsr #8          /* r1 = ...0 */
664         strb    r1, [r0]
665         mov     r2, r2, lsl #8          /* r2 = .01. */
666         orr     r2, r2, r3, lsr #8      /* r2 = .012 */
667 #else
668         strb    r2, [r0]
669         mov     r2, r2, lsr #8          /* r2 = ...1 */
670         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
671         mov     r3, r3, lsr #8          /* r3 = ...3 */
672 #endif
673         strh    r2, [r0, #0x01]
674         strb    r3, [r0, #0x03]
675         bx      lr
676         LMEMCPY_4_PAD
679  * 0111: dst is 8-bit aligned, src is 8-bit aligned
680  */
681         ldrb    r2, [r1]
682         ldrh    r3, [r1, #0x01]
683         ldrb    r1, [r1, #0x03]
684         strb    r2, [r0]
685         strh    r3, [r0, #0x01]
686         strb    r1, [r0, #0x03]
687         bx      lr
688         LMEMCPY_4_PAD
691  * 1000: dst is 16-bit aligned, src is 32-bit aligned
692  */
693         ldr     r2, [r1]
694 #ifdef __ARMEB__
695         strh    r2, [r0, #0x02]
696         mov     r3, r2, lsr #16
697         strh    r3, [r0]
698 #else
699         strh    r2, [r0]
700         mov     r3, r2, lsr #16
701         strh    r3, [r0, #0x02]
702 #endif
703         bx      lr
704         LMEMCPY_4_PAD
707  * 1001: dst is 16-bit aligned, src is 8-bit aligned
708  */
709         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
710         ldr     r3, [r1, #3]            /* BE:r3 = 3xxx  LE:r3 = xxx3 */
711         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
712         strh    r1, [r0]
713 #ifdef __ARMEB__
714         mov     r2, r2, lsl #8          /* r2 = 012. */
715         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
716 #else
717         mov     r2, r2, lsr #24         /* r2 = ...2 */
718         orr     r2, r2, r3, lsl #8      /* r2 = xx32 */
719 #endif
720         strh    r2, [r0, #0x02]
721         bx      lr
722         LMEMCPY_4_PAD
725  * 1010: dst is 16-bit aligned, src is 16-bit aligned
726  */
727         ldrh    r2, [r1]
728         ldrh    r3, [r1, #0x02]
729         strh    r2, [r0]
730         strh    r3, [r0, #0x02]
731         bx      lr
732         LMEMCPY_4_PAD
735  * 1011: dst is 16-bit aligned, src is 8-bit aligned
736  */
737         ldr     r3, [r1, #1]            /* BE:r3 = 123x  LE:r3 = x321 */
738         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
739         mov     r1, r3, lsr #8          /* BE:r1 = .123  LE:r1 = .x32 */
740         strh    r1, [r0, #0x02]
741 #ifdef __ARMEB__
742         mov     r3, r3, lsr #24         /* r3 = ...1 */
743         orr     r3, r3, r2, lsl #8      /* r3 = xx01 */
744 #else
745         mov     r3, r3, lsl #8          /* r3 = 321. */
746         orr     r3, r3, r2, lsr #24     /* r3 = 3210 */
747 #endif
748         strh    r3, [r0]
749         bx      lr
750         LMEMCPY_4_PAD
753  * 1100: dst is 8-bit aligned, src is 32-bit aligned
754  */
755         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
756 #ifdef __ARMEB__
757         strb    r2, [r0, #0x03]
758         mov     r3, r2, lsr #8
759         mov     r1, r2, lsr #24
760         strh    r3, [r0, #0x01]
761         strb    r1, [r0]
762 #else
763         strb    r2, [r0]
764         mov     r3, r2, lsr #8
765         mov     r1, r2, lsr #24
766         strh    r3, [r0, #0x01]
767         strb    r1, [r0, #0x03]
768 #endif
769         bx      lr
770         LMEMCPY_4_PAD
773  * 1101: dst is 8-bit aligned, src is 8-bit aligned
774  */
775         ldrb    r2, [r1]
776         ldrh    r3, [r1, #0x01]
777         ldrb    r1, [r1, #0x03]
778         strb    r2, [r0]
779         strh    r3, [r0, #0x01]
780         strb    r1, [r0, #0x03]
781         bx      lr
782         LMEMCPY_4_PAD
785  * 1110: dst is 8-bit aligned, src is 16-bit aligned
786  */
787 #ifdef __ARMEB__
788         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
789         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
790         strb    r3, [r0, #0x03]
791         mov     r3, r3, lsr #8          /* r3 = ...2 */
792         orr     r3, r3, r2, lsl #8      /* r3 = ..12 */
793         strh    r3, [r0, #0x01]
794         mov     r2, r2, lsr #8          /* r2 = ...0 */
795         strb    r2, [r0]
796 #else
797         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
798         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
799         strb    r2, [r0]
800         mov     r2, r2, lsr #8          /* r2 = ...1 */
801         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
802         strh    r2, [r0, #0x01]
803         mov     r3, r3, lsr #8          /* r3 = ...3 */
804         strb    r3, [r0, #0x03]
805 #endif
806         bx      lr
807         LMEMCPY_4_PAD
810  * 1111: dst is 8-bit aligned, src is 8-bit aligned
811  */
812         ldrb    r2, [r1]
813         ldrh    r3, [r1, #0x01]
814         ldrb    r1, [r1, #0x03]
815         strb    r2, [r0]
816         strh    r3, [r0, #0x01]
817         strb    r1, [r0, #0x03]
818         bx      lr
819         LMEMCPY_4_PAD
822 /******************************************************************************
823  * Special case for 6 byte copies
824  */
825 #define LMEMCPY_6_LOG2  6       /* 64 bytes */
826 #define LMEMCPY_6_PAD   .align LMEMCPY_6_LOG2
827         LMEMCPY_6_PAD
828 .Lmemcpy_6:
829         and     r2, r1, #0x03
830         orr     r2, r2, r0, lsl #2
831         ands    r2, r2, #0x0f
832         sub     r3, pc, #0x14
833         addne   pc, r3, r2, lsl #LMEMCPY_6_LOG2
836  * 0000: dst is 32-bit aligned, src is 32-bit aligned
837  */
838         ldr     r2, [r1]
839         ldrh    r3, [r1, #0x04]
840         str     r2, [r0]
841         strh    r3, [r0, #0x04]
842         bx      lr
843         LMEMCPY_6_PAD
846  * 0001: dst is 32-bit aligned, src is 8-bit aligned
847  */
848         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
849         ldr     r3, [r1, #0x03]         /* BE:r3 = 345x  LE:r3 = x543 */
850 #ifdef __ARMEB__
851         mov     r2, r2, lsl #8          /* r2 = 012. */
852         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
853 #else
854         mov     r2, r2, lsr #8          /* r2 = .210 */
855         orr     r2, r2, r3, lsl #24     /* r2 = 3210 */
856 #endif
857         mov     r3, r3, lsr #8          /* BE:r3 = .345  LE:r3 = .x54 */
858         str     r2, [r0]
859         strh    r3, [r0, #0x04]
860         bx      lr
861         LMEMCPY_6_PAD
864  * 0010: dst is 32-bit aligned, src is 16-bit aligned
865  */
866         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
867         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
868 #ifdef __ARMEB__
869         mov     r1, r3, lsr #16         /* r1 = ..23 */
870         orr     r1, r1, r2, lsl #16     /* r1 = 0123 */
871         str     r1, [r0]
872         strh    r3, [r0, #0x04]
873 #else
874         mov     r1, r3, lsr #16         /* r1 = ..54 */
875         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
876         str     r2, [r0]
877         strh    r1, [r0, #0x04]
878 #endif
879         bx      lr
880         LMEMCPY_6_PAD
883  * 0011: dst is 32-bit aligned, src is 8-bit aligned
884  */
885         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
886         ldr     r3, [r1, #1]            /* BE:r3 = 1234  LE:r3 = 4321 */
887         ldr     r1, [r1, #5]            /* BE:r1 = 5xxx  LE:r3 = xxx5 */
888 #ifdef __ARMEB__
889         mov     r2, r2, lsl #24         /* r2 = 0... */
890         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
891         mov     r3, r3, lsl #8          /* r3 = 234. */
892         orr     r1, r3, r1, lsr #24     /* r1 = 2345 */
893 #else
894         mov     r2, r2, lsr #24         /* r2 = ...0 */
895         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
896         mov     r1, r1, lsl #8          /* r1 = xx5. */
897         orr     r1, r1, r3, lsr #24     /* r1 = xx54 */
898 #endif
899         str     r2, [r0]
900         strh    r1, [r0, #0x04]
901         bx      lr
902         LMEMCPY_6_PAD
905  * 0100: dst is 8-bit aligned, src is 32-bit aligned
906  */
907         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
908         ldrh    r2, [r1, #0x04]         /* BE:r2 = ..45  LE:r2 = ..54 */
909         mov     r1, r3, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
910         strh    r1, [r0, #0x01]
911 #ifdef __ARMEB__
912         mov     r1, r3, lsr #24         /* r1 = ...0 */
913         strb    r1, [r0]
914         mov     r3, r3, lsl #8          /* r3 = 123. */
915         orr     r3, r3, r2, lsr #8      /* r3 = 1234 */
916 #else
917         strb    r3, [r0]
918         mov     r3, r3, lsr #24         /* r3 = ...3 */
919         orr     r3, r3, r2, lsl #8      /* r3 = .543 */
920         mov     r2, r2, lsr #8          /* r2 = ...5 */
921 #endif
922         strh    r3, [r0, #0x03]
923         strb    r2, [r0, #0x05]
924         bx      lr
925         LMEMCPY_6_PAD
928  * 0101: dst is 8-bit aligned, src is 8-bit aligned
929  */
930         ldrb    r2, [r1]
931         ldrh    r3, [r1, #0x01]
932         ldrh    ip, [r1, #0x03]
933         ldrb    r1, [r1, #0x05]
934         strb    r2, [r0]
935         strh    r3, [r0, #0x01]
936         strh    ip, [r0, #0x03]
937         strb    r1, [r0, #0x05]
938         bx      lr
939         LMEMCPY_6_PAD
942  * 0110: dst is 8-bit aligned, src is 16-bit aligned
943  */
944         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
945         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
946 #ifdef __ARMEB__
947         mov     r3, r2, lsr #8          /* r3 = ...0 */
948         strb    r3, [r0]
949         strb    r1, [r0, #0x05]
950         mov     r3, r1, lsr #8          /* r3 = .234 */
951         strh    r3, [r0, #0x03]
952         mov     r3, r2, lsl #8          /* r3 = .01. */
953         orr     r3, r3, r1, lsr #24     /* r3 = .012 */
954         strh    r3, [r0, #0x01]
955 #else
956         strb    r2, [r0]
957         mov     r3, r1, lsr #24
958         strb    r3, [r0, #0x05]
959         mov     r3, r1, lsr #8          /* r3 = .543 */
960         strh    r3, [r0, #0x03]
961         mov     r3, r2, lsr #8          /* r3 = ...1 */
962         orr     r3, r3, r1, lsl #8      /* r3 = 4321 */
963         strh    r3, [r0, #0x01]
964 #endif
965         bx      lr
966         LMEMCPY_6_PAD
969  * 0111: dst is 8-bit aligned, src is 8-bit aligned
970  */
971         ldrb    r2, [r1]
972         ldrh    r3, [r1, #0x01]
973         ldrh    ip, [r1, #0x03]
974         ldrb    r1, [r1, #0x05]
975         strb    r2, [r0]
976         strh    r3, [r0, #0x01]
977         strh    ip, [r0, #0x03]
978         strb    r1, [r0, #0x05]
979         bx      lr
980         LMEMCPY_6_PAD
983  * 1000: dst is 16-bit aligned, src is 32-bit aligned
984  */
985 #ifdef __ARMEB__
986         ldr     r2, [r1]                /* r2 = 0123 */
987         ldrh    r3, [r1, #0x04]         /* r3 = ..45 */
988         mov     r1, r2, lsr #16         /* r1 = ..01 */
989         orr     r3, r3, r2, lsl#16      /* r3 = 2345 */
990         strh    r1, [r0]
991         str     r3, [r0, #0x02]
992 #else
993         ldrh    r2, [r1, #0x04]         /* r2 = ..54 */
994         ldr     r3, [r1]                /* r3 = 3210 */
995         mov     r2, r2, lsl #16         /* r2 = 54.. */
996         orr     r2, r2, r3, lsr #16     /* r2 = 5432 */
997         strh    r3, [r0]
998         str     r2, [r0, #0x02]
999 #endif
1000         bx      lr
1001         LMEMCPY_6_PAD
1004  * 1001: dst is 16-bit aligned, src is 8-bit aligned
1005  */
1006         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
1007         ldr     r2, [r1, #3]            /* BE:r2 = 345x  LE:r2 = x543 */
1008         mov     r1, r3, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
1009 #ifdef __ARMEB__
1010         mov     r2, r2, lsr #8          /* r2 = .345 */
1011         orr     r2, r2, r3, lsl #24     /* r2 = 2345 */
1012 #else
1013         mov     r2, r2, lsl #8          /* r2 = 543. */
1014         orr     r2, r2, r3, lsr #24     /* r2 = 5432 */
1015 #endif
1016         strh    r1, [r0]
1017         str     r2, [r0, #0x02]
1018         bx      lr
1019         LMEMCPY_6_PAD
1022  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1023  */
1024         ldrh    r2, [r1]
1025         ldr     r3, [r1, #0x02]
1026         strh    r2, [r0]
1027         str     r3, [r0, #0x02]
1028         bx      lr
1029         LMEMCPY_6_PAD
1032  * 1011: dst is 16-bit aligned, src is 8-bit aligned
1033  */
1034         ldrb    r3, [r1]                /* r3 = ...0 */
1035         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
1036         ldrb    r1, [r1, #0x05]         /* r1 = ...5 */
1037 #ifdef __ARMEB__
1038         mov     r3, r3, lsl #8          /* r3 = ..0. */
1039         orr     r3, r3, r2, lsr #24     /* r3 = ..01 */
1040         orr     r1, r1, r2, lsl #8      /* r1 = 2345 */
1041 #else
1042         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
1043         mov     r1, r1, lsl #24         /* r1 = 5... */
1044         orr     r1, r1, r2, lsr #8      /* r1 = 5432 */
1045 #endif
1046         strh    r3, [r0]
1047         str     r1, [r0, #0x02]
1048         bx      lr
1049         LMEMCPY_6_PAD
1052  * 1100: dst is 8-bit aligned, src is 32-bit aligned
1053  */
1054         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1055         ldrh    r1, [r1, #0x04]         /* BE:r1 = ..45  LE:r1 = ..54 */
1056 #ifdef __ARMEB__
1057         mov     r3, r2, lsr #24         /* r3 = ...0 */
1058         strb    r3, [r0]
1059         mov     r2, r2, lsl #8          /* r2 = 123. */
1060         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
1061 #else
1062         strb    r2, [r0]
1063         mov     r2, r2, lsr #8          /* r2 = .321 */
1064         orr     r2, r2, r1, lsl #24     /* r2 = 4321 */
1065         mov     r1, r1, lsr #8          /* r1 = ...5 */
1066 #endif
1067         str     r2, [r0, #0x01]
1068         strb    r1, [r0, #0x05]
1069         bx      lr
1070         LMEMCPY_6_PAD
1073  * 1101: dst is 8-bit aligned, src is 8-bit aligned
1074  */
1075         ldrb    r2, [r1]
1076         ldrh    r3, [r1, #0x01]
1077         ldrh    ip, [r1, #0x03]
1078         ldrb    r1, [r1, #0x05]
1079         strb    r2, [r0]
1080         strh    r3, [r0, #0x01]
1081         strh    ip, [r0, #0x03]
1082         strb    r1, [r0, #0x05]
1083         bx      lr
1084         LMEMCPY_6_PAD
1087  * 1110: dst is 8-bit aligned, src is 16-bit aligned
1088  */
1089         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1090         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
1091 #ifdef __ARMEB__
1092         mov     r3, r2, lsr #8          /* r3 = ...0 */
1093         strb    r3, [r0]
1094         mov     r2, r2, lsl #24         /* r2 = 1... */
1095         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
1096 #else
1097         strb    r2, [r0]
1098         mov     r2, r2, lsr #8          /* r2 = ...1 */
1099         orr     r2, r2, r1, lsl #8      /* r2 = 4321 */
1100         mov     r1, r1, lsr #24         /* r1 = ...5 */
1101 #endif
1102         str     r2, [r0, #0x01]
1103         strb    r1, [r0, #0x05]
1104         bx      lr
1105         LMEMCPY_6_PAD
1108  * 1111: dst is 8-bit aligned, src is 8-bit aligned
1109  */
1110         ldrb    r2, [r1]
1111         ldr     r3, [r1, #0x01]
1112         ldrb    r1, [r1, #0x05]
1113         strb    r2, [r0]
1114         str     r3, [r0, #0x01]
1115         strb    r1, [r0, #0x05]
1116         bx      lr
1117         LMEMCPY_6_PAD
1120 /******************************************************************************
1121  * Special case for 8 byte copies
1122  */
1123 #define LMEMCPY_8_LOG2  6       /* 64 bytes */
1124 #define LMEMCPY_8_PAD   .align LMEMCPY_8_LOG2
1125         LMEMCPY_8_PAD
1126 .Lmemcpy_8:
1127         and     r2, r1, #0x03
1128         orr     r2, r2, r0, lsl #2
1129         ands    r2, r2, #0x0f
1130         sub     r3, pc, #0x14
1131         addne   pc, r3, r2, lsl #LMEMCPY_8_LOG2
1134  * 0000: dst is 32-bit aligned, src is 32-bit aligned
1135  */
1136         ldr     r2, [r1]
1137         ldr     r3, [r1, #0x04]
1138         str     r2, [r0]
1139         str     r3, [r0, #0x04]
1140         bx      lr
1141         LMEMCPY_8_PAD
1144  * 0001: dst is 32-bit aligned, src is 8-bit aligned
1145  */
1146         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
1147         ldr     r2, [r1, #0x03]         /* BE:r2 = 3456  LE:r2 = 6543 */
1148         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1149 #ifdef __ARMEB__
1150         mov     r3, r3, lsl #8          /* r3 = 012. */
1151         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
1152         orr     r2, r1, r2, lsl #8      /* r2 = 4567 */
1153 #else
1154         mov     r3, r3, lsr #8          /* r3 = .210 */
1155         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
1156         mov     r1, r1, lsl #24         /* r1 = 7... */
1157         orr     r2, r1, r2, lsr #8      /* r2 = 7654 */
1158 #endif
1159         str     r3, [r0]
1160         str     r2, [r0, #0x04]
1161         bx      lr
1162         LMEMCPY_8_PAD
1165  * 0010: dst is 32-bit aligned, src is 16-bit aligned
1166  */
1167         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1168         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1169         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1170 #ifdef __ARMEB__
1171         mov     r2, r2, lsl #16         /* r2 = 01.. */
1172         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
1173         orr     r3, r1, r3, lsl #16     /* r3 = 4567 */
1174 #else
1175         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
1176         mov     r3, r3, lsr #16         /* r3 = ..54 */
1177         orr     r3, r3, r1, lsl #16     /* r3 = 7654 */
1178 #endif
1179         str     r2, [r0]
1180         str     r3, [r0, #0x04]
1181         bx      lr
1182         LMEMCPY_8_PAD
1185  * 0011: dst is 32-bit aligned, src is 8-bit aligned
1186  */
1187         ldrb    r3, [r1]                /* r3 = ...0 */
1188         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
1189         ldr     r1, [r1, #0x05]         /* BE:r1 = 567x  LE:r1 = x765 */
1190 #ifdef __ARMEB__
1191         mov     r3, r3, lsl #24         /* r3 = 0... */
1192         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
1193         mov     r2, r2, lsl #24         /* r2 = 4... */
1194         orr     r2, r2, r1, lsr #8      /* r2 = 4567 */
1195 #else
1196         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
1197         mov     r2, r2, lsr #24         /* r2 = ...4 */
1198         orr     r2, r2, r1, lsl #8      /* r2 = 7654 */
1199 #endif
1200         str     r3, [r0]
1201         str     r2, [r0, #0x04]
1202         bx      lr
1203         LMEMCPY_8_PAD
1206  * 0100: dst is 8-bit aligned, src is 32-bit aligned
1207  */
1208         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
1209         ldr     r2, [r1, #0x04]         /* BE:r2 = 4567  LE:r2 = 7654 */
1210 #ifdef __ARMEB__
1211         mov     r1, r3, lsr #24         /* r1 = ...0 */
1212         strb    r1, [r0]
1213         mov     r1, r3, lsr #8          /* r1 = .012 */
1214         strb    r2, [r0, #0x07]
1215         mov     r3, r3, lsl #24         /* r3 = 3... */
1216         orr     r3, r3, r2, lsr #8      /* r3 = 3456 */
1217 #else
1218         strb    r3, [r0]
1219         mov     r1, r2, lsr #24         /* r1 = ...7 */
1220         strb    r1, [r0, #0x07]
1221         mov     r1, r3, lsr #8          /* r1 = .321 */
1222         mov     r3, r3, lsr #24         /* r3 = ...3 */
1223         orr     r3, r3, r2, lsl #8      /* r3 = 6543 */
1224 #endif
1225         strh    r1, [r0, #0x01]
1226         str     r3, [r0, #0x03]
1227         bx      lr
1228         LMEMCPY_8_PAD
1231  * 0101: dst is 8-bit aligned, src is 8-bit aligned
1232  */
1233         ldrb    r2, [r1]
1234         ldrh    r3, [r1, #0x01]
1235         ldr     ip, [r1, #0x03]
1236         ldrb    r1, [r1, #0x07]
1237         strb    r2, [r0]
1238         strh    r3, [r0, #0x01]
1239         str     ip, [r0, #0x03]
1240         strb    r1, [r0, #0x07]
1241         bx      lr
1242         LMEMCPY_8_PAD
1245  * 0110: dst is 8-bit aligned, src is 16-bit aligned
1246  */
1247         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1248         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1249         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1250 #ifdef __ARMEB__
1251         mov     ip, r2, lsr #8          /* ip = ...0 */
1252         strb    ip, [r0]
1253         mov     ip, r2, lsl #8          /* ip = .01. */
1254         orr     ip, ip, r3, lsr #24     /* ip = .012 */
1255         strb    r1, [r0, #0x07]
1256         mov     r3, r3, lsl #8          /* r3 = 345. */
1257         orr     r3, r3, r1, lsr #8      /* r3 = 3456 */
1258 #else
1259         strb    r2, [r0]                /* 0 */
1260         mov     ip, r1, lsr #8          /* ip = ...7 */
1261         strb    ip, [r0, #0x07]         /* 7 */
1262         mov     ip, r2, lsr #8          /* ip = ...1 */
1263         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
1264         mov     r3, r3, lsr #8          /* r3 = .543 */
1265         orr     r3, r3, r1, lsl #24     /* r3 = 6543 */
1266 #endif
1267         strh    ip, [r0, #0x01]
1268         str     r3, [r0, #0x03]
1269         bx      lr
1270         LMEMCPY_8_PAD
1273  * 0111: dst is 8-bit aligned, src is 8-bit aligned
1274  */
1275         ldrb    r3, [r1]                /* r3 = ...0 */
1276         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
1277         ldrh    r2, [r1, #0x05]         /* BE:r2 = ..56  LE:r2 = ..65 */
1278         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1279         strb    r3, [r0]
1280         mov     r3, ip, lsr #16         /* BE:r3 = ..12  LE:r3 = ..43 */
1281 #ifdef __ARMEB__
1282         strh    r3, [r0, #0x01]
1283         orr     r2, r2, ip, lsl #16     /* r2 = 3456 */
1284 #else
1285         strh    ip, [r0, #0x01]
1286         orr     r2, r3, r2, lsl #16     /* r2 = 6543 */
1287 #endif
1288         str     r2, [r0, #0x03]
1289         strb    r1, [r0, #0x07]
1290         bx      lr
1291         LMEMCPY_8_PAD
1294  * 1000: dst is 16-bit aligned, src is 32-bit aligned
1295  */
1296         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1297         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1298         mov     r1, r2, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
1299 #ifdef __ARMEB__
1300         strh    r1, [r0]
1301         mov     r1, r3, lsr #16         /* r1 = ..45 */
1302         orr     r2, r1 ,r2, lsl #16     /* r2 = 2345 */
1303 #else
1304         strh    r2, [r0]
1305         orr     r2, r1, r3, lsl #16     /* r2 = 5432 */
1306         mov     r3, r3, lsr #16         /* r3 = ..76 */
1307 #endif
1308         str     r2, [r0, #0x02]
1309         strh    r3, [r0, #0x06]
1310         bx      lr
1311         LMEMCPY_8_PAD
1314  * 1001: dst is 16-bit aligned, src is 8-bit aligned
1315  */
1316         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
1317         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1318         ldrb    ip, [r1, #0x07]         /* ip = ...7 */
1319         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
1320         strh    r1, [r0]
1321 #ifdef __ARMEB__
1322         mov     r1, r2, lsl #24         /* r1 = 2... */
1323         orr     r1, r1, r3, lsr #8      /* r1 = 2345 */
1324         orr     r3, ip, r3, lsl #8      /* r3 = 4567 */
1325 #else
1326         mov     r1, r2, lsr #24         /* r1 = ...2 */
1327         orr     r1, r1, r3, lsl #8      /* r1 = 5432 */
1328         mov     r3, r3, lsr #24         /* r3 = ...6 */
1329         orr     r3, r3, ip, lsl #8      /* r3 = ..76 */
1330 #endif
1331         str     r1, [r0, #0x02]
1332         strh    r3, [r0, #0x06]
1333         bx      lr
1334         LMEMCPY_8_PAD
1337  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1338  */
1339         ldrh    r2, [r1]
1340         ldr     ip, [r1, #0x02]
1341         ldrh    r3, [r1, #0x06]
1342         strh    r2, [r0]
1343         str     ip, [r0, #0x02]
1344         strh    r3, [r0, #0x06]
1345         bx      lr
1346         LMEMCPY_8_PAD
1349  * 1011: dst is 16-bit aligned, src is 8-bit aligned
1350  */
1351         ldr     r3, [r1, #0x05]         /* BE:r3 = 567x  LE:r3 = x765 */
1352         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
1353         ldrb    ip, [r1]                /* ip = ...0 */
1354         mov     r1, r3, lsr #8          /* BE:r1 = .567  LE:r1 = .x76 */
1355         strh    r1, [r0, #0x06]
1356 #ifdef __ARMEB__
1357         mov     r3, r3, lsr #24         /* r3 = ...5 */
1358         orr     r3, r3, r2, lsl #8      /* r3 = 2345 */
1359         mov     r2, r2, lsr #24         /* r2 = ...1 */
1360         orr     r2, r2, ip, lsl #8      /* r2 = ..01 */
1361 #else
1362         mov     r3, r3, lsl #24         /* r3 = 5... */
1363         orr     r3, r3, r2, lsr #8      /* r3 = 5432 */
1364         orr     r2, ip, r2, lsl #8      /* r2 = 3210 */
1365 #endif
1366         str     r3, [r0, #0x02]
1367         strh    r2, [r0]
1368         bx      lr
1369         LMEMCPY_8_PAD
1372  * 1100: dst is 8-bit aligned, src is 32-bit aligned
1373  */
1374         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1375         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1376         mov     r1, r3, lsr #8          /* BE:r1 = .456  LE:r1 = .765 */
1377         strh    r1, [r0, #0x05]
1378 #ifdef __ARMEB__
1379         strb    r3, [r0, #0x07]
1380         mov     r1, r2, lsr #24         /* r1 = ...0 */
1381         strb    r1, [r0]
1382         mov     r2, r2, lsl #8          /* r2 = 123. */
1383         orr     r2, r2, r3, lsr #24     /* r2 = 1234 */
1384         str     r2, [r0, #0x01]
1385 #else
1386         strb    r2, [r0]
1387         mov     r1, r3, lsr #24         /* r1 = ...7 */
1388         strb    r1, [r0, #0x07]
1389         mov     r2, r2, lsr #8          /* r2 = .321 */
1390         orr     r2, r2, r3, lsl #24     /* r2 = 4321 */
1391         str     r2, [r0, #0x01]
1392 #endif
1393         bx      lr
1394         LMEMCPY_8_PAD
1397  * 1101: dst is 8-bit aligned, src is 8-bit aligned
1398  */
1399         ldrb    r3, [r1]                /* r3 = ...0 */
1400         ldrh    r2, [r1, #0x01]         /* BE:r2 = ..12  LE:r2 = ..21 */
1401         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
1402         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1403         strb    r3, [r0]
1404         mov     r3, ip, lsr #16         /* BE:r3 = ..34  LE:r3 = ..65 */
1405 #ifdef __ARMEB__
1406         strh    ip, [r0, #0x05]
1407         orr     r2, r3, r2, lsl #16     /* r2 = 1234 */
1408 #else
1409         strh    r3, [r0, #0x05]
1410         orr     r2, r2, ip, lsl #16     /* r2 = 4321 */
1411 #endif
1412         str     r2, [r0, #0x01]
1413         strb    r1, [r0, #0x07]
1414         bx      lr
1415         LMEMCPY_8_PAD
1418  * 1110: dst is 8-bit aligned, src is 16-bit aligned
1419  */
1420         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1421         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1422         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1423 #ifdef __ARMEB__
1424         mov     ip, r2, lsr #8          /* ip = ...0 */
1425         strb    ip, [r0]
1426         mov     ip, r2, lsl #24         /* ip = 1... */
1427         orr     ip, ip, r3, lsr #8      /* ip = 1234 */
1428         strb    r1, [r0, #0x07]
1429         mov     r1, r1, lsr #8          /* r1 = ...6 */
1430         orr     r1, r1, r3, lsl #8      /* r1 = 3456 */
1431 #else
1432         strb    r2, [r0]
1433         mov     ip, r2, lsr #8          /* ip = ...1 */
1434         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
1435         mov     r2, r1, lsr #8          /* r2 = ...7 */
1436         strb    r2, [r0, #0x07]
1437         mov     r1, r1, lsl #8          /* r1 = .76. */
1438         orr     r1, r1, r3, lsr #24     /* r1 = .765 */
1439 #endif
1440         str     ip, [r0, #0x01]
1441         strh    r1, [r0, #0x05]
1442         bx      lr
1443         LMEMCPY_8_PAD
1446  * 1111: dst is 8-bit aligned, src is 8-bit aligned
1447  */
1448         ldrb    r2, [r1]
1449         ldr     ip, [r1, #0x01]
1450         ldrh    r3, [r1, #0x05]
1451         ldrb    r1, [r1, #0x07]
1452         strb    r2, [r0]
1453         str     ip, [r0, #0x01]
1454         strh    r3, [r0, #0x05]
1455         strb    r1, [r0, #0x07]
1456         bx      lr
1457         LMEMCPY_8_PAD
1459 /******************************************************************************
1460  * Special case for 12 byte copies
1461  */
1462 #define LMEMCPY_C_LOG2  7       /* 128 bytes */
1463 #define LMEMCPY_C_PAD   .align LMEMCPY_C_LOG2
1464         LMEMCPY_C_PAD
1465 .Lmemcpy_c:
1466         and     r2, r1, #0x03
1467         orr     r2, r2, r0, lsl #2
1468         ands    r2, r2, #0x0f
1469         sub     r3, pc, #0x14
1470         addne   pc, r3, r2, lsl #LMEMCPY_C_LOG2
1473  * 0000: dst is 32-bit aligned, src is 32-bit aligned
1474  */
1475         ldr     r2, [r1]
1476         ldr     r3, [r1, #0x04]
1477         ldr     r1, [r1, #0x08]
1478         str     r2, [r0]
1479         str     r3, [r0, #0x04]
1480         str     r1, [r0, #0x08]
1481         bx      lr
1482         LMEMCPY_C_PAD
1485  * 0001: dst is 32-bit aligned, src is 8-bit aligned
1486  */
1487         ldrb    r2, [r1, #0xb]          /* r2 = ...B */
1488         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
1489         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1490         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
1491 #ifdef __ARMEB__
1492         orr     r2, r2, ip, lsl #8      /* r2 = 89AB */
1493         str     r2, [r0, #0x08]
1494         mov     r2, ip, lsr #24         /* r2 = ...7 */
1495         orr     r2, r2, r3, lsl #8      /* r2 = 4567 */
1496         mov     r1, r1, lsl #8          /* r1 = 012. */
1497         orr     r1, r1, r3, lsr #24     /* r1 = 0123 */
1498 #else
1499         mov     r2, r2, lsl #24         /* r2 = B... */
1500         orr     r2, r2, ip, lsr #8      /* r2 = BA98 */
1501         str     r2, [r0, #0x08]
1502         mov     r2, ip, lsl #24         /* r2 = 7... */
1503         orr     r2, r2, r3, lsr #8      /* r2 = 7654 */
1504         mov     r1, r1, lsr #8          /* r1 = .210 */
1505         orr     r1, r1, r3, lsl #24     /* r1 = 3210 */
1506 #endif
1507         str     r2, [r0, #0x04]
1508         str     r1, [r0]
1509         bx      lr
1510         LMEMCPY_C_PAD
1513  * 0010: dst is 32-bit aligned, src is 16-bit aligned
1514  */
1515         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1516         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1517         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
1518         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
1519 #ifdef __ARMEB__
1520         mov     r2, r2, lsl #16         /* r2 = 01.. */
1521         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
1522         str     r2, [r0]
1523         mov     r3, r3, lsl #16         /* r3 = 45.. */
1524         orr     r3, r3, ip, lsr #16     /* r3 = 4567 */
1525         orr     r1, r1, ip, lsl #16     /* r1 = 89AB */
1526 #else
1527         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
1528         str     r2, [r0]
1529         mov     r3, r3, lsr #16         /* r3 = ..54 */
1530         orr     r3, r3, ip, lsl #16     /* r3 = 7654 */
1531         mov     r1, r1, lsl #16         /* r1 = BA.. */
1532         orr     r1, r1, ip, lsr #16     /* r1 = BA98 */
1533 #endif
1534         str     r3, [r0, #0x04]
1535         str     r1, [r0, #0x08]
1536         bx      lr
1537         LMEMCPY_C_PAD
1540  * 0011: dst is 32-bit aligned, src is 8-bit aligned
1541  */
1542         ldrb    r2, [r1]                /* r2 = ...0 */
1543         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
1544         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
1545         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
1546 #ifdef __ARMEB__
1547         mov     r2, r2, lsl #24         /* r2 = 0... */
1548         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
1549         str     r2, [r0]
1550         mov     r3, r3, lsl #24         /* r3 = 4... */
1551         orr     r3, r3, ip, lsr #8      /* r3 = 4567 */
1552         mov     r1, r1, lsr #8          /* r1 = .9AB */
1553         orr     r1, r1, ip, lsl #24     /* r1 = 89AB */
1554 #else
1555         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
1556         str     r2, [r0]
1557         mov     r3, r3, lsr #24         /* r3 = ...4 */
1558         orr     r3, r3, ip, lsl #8      /* r3 = 7654 */
1559         mov     r1, r1, lsl #8          /* r1 = BA9. */
1560         orr     r1, r1, ip, lsr #24     /* r1 = BA98 */
1561 #endif
1562         str     r3, [r0, #0x04]
1563         str     r1, [r0, #0x08]
1564         bx      lr
1565         LMEMCPY_C_PAD
1568  * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
1569  */
1570         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1571         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1572         ldr     ip, [r1, #0x08]         /* BE:ip = 89AB  LE:ip = BA98 */
1573         mov     r1, r2, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
1574         strh    r1, [r0, #0x01]
1575 #ifdef __ARMEB__
1576         mov     r1, r2, lsr #24         /* r1 = ...0 */
1577         strb    r1, [r0]
1578         mov     r1, r2, lsl #24         /* r1 = 3... */
1579         orr     r2, r1, r3, lsr #8      /* r1 = 3456 */
1580         mov     r1, r3, lsl #24         /* r1 = 7... */
1581         orr     r1, r1, ip, lsr #8      /* r1 = 789A */
1582 #else
1583         strb    r2, [r0]
1584         mov     r1, r2, lsr #24         /* r1 = ...3 */
1585         orr     r2, r1, r3, lsl #8      /* r1 = 6543 */
1586         mov     r1, r3, lsr #24         /* r1 = ...7 */
1587         orr     r1, r1, ip, lsl #8      /* r1 = A987 */
1588         mov     ip, ip, lsr #24         /* ip = ...B */
1589 #endif
1590         str     r2, [r0, #0x03]
1591         str     r1, [r0, #0x07]
1592         strb    ip, [r0, #0x0b]
1593         bx      lr
1594         LMEMCPY_C_PAD
1597  * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
1598  */
1599         ldrb    r2, [r1]
1600         ldrh    r3, [r1, #0x01]
1601         ldr     ip, [r1, #0x03]
1602         strb    r2, [r0]
1603         ldr     r2, [r1, #0x07]
1604         ldrb    r1, [r1, #0x0b]
1605         strh    r3, [r0, #0x01]
1606         str     ip, [r0, #0x03]
1607         str     r2, [r0, #0x07]
1608         strb    r1, [r0, #0x0b]
1609         bx      lr
1610         LMEMCPY_C_PAD
1613  * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
1614  */
1615         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1616         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1617         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
1618         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
1619 #ifdef __ARMEB__
1620         mov     r2, r2, ror #8          /* r2 = 1..0 */
1621         strb    r2, [r0]
1622         mov     r2, r2, lsr #16         /* r2 = ..1. */
1623         orr     r2, r2, r3, lsr #24     /* r2 = ..12 */
1624         strh    r2, [r0, #0x01]
1625         mov     r2, r3, lsl #8          /* r2 = 345. */
1626         orr     r3, r2, ip, lsr #24     /* r3 = 3456 */
1627         mov     r2, ip, lsl #8          /* r2 = 789. */
1628         orr     r2, r2, r1, lsr #8      /* r2 = 789A */
1629 #else
1630         strb    r2, [r0]
1631         mov     r2, r2, lsr #8          /* r2 = ...1 */
1632         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
1633         strh    r2, [r0, #0x01]
1634         mov     r2, r3, lsr #8          /* r2 = .543 */
1635         orr     r3, r2, ip, lsl #24     /* r3 = 6543 */
1636         mov     r2, ip, lsr #8          /* r2 = .987 */
1637         orr     r2, r2, r1, lsl #24     /* r2 = A987 */
1638         mov     r1, r1, lsr #8          /* r1 = ...B */
1639 #endif
1640         str     r3, [r0, #0x03]
1641         str     r2, [r0, #0x07]
1642         strb    r1, [r0, #0x0b]
1643         bx      lr
1644         LMEMCPY_C_PAD
1647  * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
1648  */
1649         ldrb    r2, [r1]
1650         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
1651         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
1652         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
1653         strb    r2, [r0]
1654 #ifdef __ARMEB__
1655         mov     r2, r3, lsr #16         /* r2 = ..12 */
1656         strh    r2, [r0, #0x01]
1657         mov     r3, r3, lsl #16         /* r3 = 34.. */
1658         orr     r3, r3, ip, lsr #16     /* r3 = 3456 */
1659         mov     ip, ip, lsl #16         /* ip = 78.. */
1660         orr     ip, ip, r1, lsr #16     /* ip = 789A */
1661         mov     r1, r1, lsr #8          /* r1 = .9AB */
1662 #else
1663         strh    r3, [r0, #0x01]
1664         mov     r3, r3, lsr #16         /* r3 = ..43 */
1665         orr     r3, r3, ip, lsl #16     /* r3 = 6543 */
1666         mov     ip, ip, lsr #16         /* ip = ..87 */
1667         orr     ip, ip, r1, lsl #16     /* ip = A987 */
1668         mov     r1, r1, lsr #16         /* r1 = ..xB */
1669 #endif
1670         str     r3, [r0, #0x03]
1671         str     ip, [r0, #0x07]
1672         strb    r1, [r0, #0x0b]
1673         bx      lr
1674         LMEMCPY_C_PAD
1677  * 1000: dst is 16-bit aligned, src is 32-bit aligned
1678  */
1679         ldr     ip, [r1]                /* BE:ip = 0123  LE:ip = 3210 */
1680         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1681         ldr     r2, [r1, #0x08]         /* BE:r2 = 89AB  LE:r2 = BA98 */
1682         mov     r1, ip, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
1683 #ifdef __ARMEB__
1684         strh    r1, [r0]
1685         mov     r1, ip, lsl #16         /* r1 = 23.. */
1686         orr     r1, r1, r3, lsr #16     /* r1 = 2345 */
1687         mov     r3, r3, lsl #16         /* r3 = 67.. */
1688         orr     r3, r3, r2, lsr #16     /* r3 = 6789 */
1689 #else
1690         strh    ip, [r0]
1691         orr     r1, r1, r3, lsl #16     /* r1 = 5432 */
1692         mov     r3, r3, lsr #16         /* r3 = ..76 */
1693         orr     r3, r3, r2, lsl #16     /* r3 = 9876 */
1694         mov     r2, r2, lsr #16         /* r2 = ..BA */
1695 #endif
1696         str     r1, [r0, #0x02]
1697         str     r3, [r0, #0x06]
1698         strh    r2, [r0, #0x0a]
1699         bx      lr
1700         LMEMCPY_C_PAD
1703  * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
1704  */
1705         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
1706         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1707         mov     ip, r2, lsr #8          /* BE:ip = .x01  LE:ip = .210 */
1708         strh    ip, [r0]
1709         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
1710         ldrb    r1, [r1, #0x0b]         /* r1 = ...B */
1711 #ifdef __ARMEB__
1712         mov     r2, r2, lsl #24         /* r2 = 2... */
1713         orr     r2, r2, r3, lsr #8      /* r2 = 2345 */
1714         mov     r3, r3, lsl #24         /* r3 = 6... */
1715         orr     r3, r3, ip, lsr #8      /* r3 = 6789 */
1716         orr     r1, r1, ip, lsl #8      /* r1 = 89AB */
1717 #else
1718         mov     r2, r2, lsr #24         /* r2 = ...2 */
1719         orr     r2, r2, r3, lsl #8      /* r2 = 5432 */
1720         mov     r3, r3, lsr #24         /* r3 = ...6 */
1721         orr     r3, r3, ip, lsl #8      /* r3 = 9876 */
1722         mov     r1, r1, lsl #8          /* r1 = ..B. */
1723         orr     r1, r1, ip, lsr #24     /* r1 = ..BA */
1724 #endif
1725         str     r2, [r0, #0x02]
1726         str     r3, [r0, #0x06]
1727         strh    r1, [r0, #0x0a]
1728         bx      lr
1729         LMEMCPY_C_PAD
1732  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1733  */
1734         ldrh    r2, [r1]
1735         ldr     r3, [r1, #0x02]
1736         ldr     ip, [r1, #0x06]
1737         ldrh    r1, [r1, #0x0a]
1738         strh    r2, [r0]
1739         str     r3, [r0, #0x02]
1740         str     ip, [r0, #0x06]
1741         strh    r1, [r0, #0x0a]
1742         bx      lr
1743         LMEMCPY_C_PAD
1746  * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
1747  */
1748         ldr     r2, [r1, #0x09]         /* BE:r2 = 9ABx  LE:r2 = xBA9 */
1749         ldr     r3, [r1, #0x05]         /* BE:r3 = 5678  LE:r3 = 8765 */
1750         mov     ip, r2, lsr #8          /* BE:ip = .9AB  LE:ip = .xBA */
1751         strh    ip, [r0, #0x0a]
1752         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
1753         ldrb    r1, [r1]                /* r1 = ...0 */
1754 #ifdef __ARMEB__
1755         mov     r2, r2, lsr #24         /* r2 = ...9 */
1756         orr     r2, r2, r3, lsl #8      /* r2 = 6789 */
1757         mov     r3, r3, lsr #24         /* r3 = ...5 */
1758         orr     r3, r3, ip, lsl #8      /* r3 = 2345 */
1759         mov     r1, r1, lsl #8          /* r1 = ..0. */
1760         orr     r1, r1, ip, lsr #24     /* r1 = ..01 */
1761 #else
1762         mov     r2, r2, lsl #24         /* r2 = 9... */
1763         orr     r2, r2, r3, lsr #8      /* r2 = 9876 */
1764         mov     r3, r3, lsl #24         /* r3 = 5... */
1765         orr     r3, r3, ip, lsr #8      /* r3 = 5432 */
1766         orr     r1, r1, ip, lsl #8      /* r1 = 3210 */
1767 #endif
1768         str     r2, [r0, #0x06]
1769         str     r3, [r0, #0x02]
1770         strh    r1, [r0]
1771         bx      lr
1772         LMEMCPY_C_PAD
1775  * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
1776  */
1777         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1778         ldr     ip, [r1, #0x04]         /* BE:ip = 4567  LE:ip = 7654 */
1779         ldr     r1, [r1, #0x08]         /* BE:r1 = 89AB  LE:r1 = BA98 */
1780 #ifdef __ARMEB__
1781         mov     r3, r2, lsr #24         /* r3 = ...0 */
1782         strb    r3, [r0]
1783         mov     r2, r2, lsl #8          /* r2 = 123. */
1784         orr     r2, r2, ip, lsr #24     /* r2 = 1234 */
1785         str     r2, [r0, #0x01]
1786         mov     r2, ip, lsl #8          /* r2 = 567. */
1787         orr     r2, r2, r1, lsr #24     /* r2 = 5678 */
1788         str     r2, [r0, #0x05]
1789         mov     r2, r1, lsr #8          /* r2 = ..9A */
1790         strh    r2, [r0, #0x09]
1791         strb    r1, [r0, #0x0b]
1792 #else
1793         strb    r2, [r0]
1794         mov     r3, r2, lsr #8          /* r3 = .321 */
1795         orr     r3, r3, ip, lsl #24     /* r3 = 4321 */
1796         str     r3, [r0, #0x01]
1797         mov     r3, ip, lsr #8          /* r3 = .765 */
1798         orr     r3, r3, r1, lsl #24     /* r3 = 8765 */
1799         str     r3, [r0, #0x05]
1800         mov     r1, r1, lsr #8          /* r1 = .BA9 */
1801         strh    r1, [r0, #0x09]
1802         mov     r1, r1, lsr #16         /* r1 = ...B */
1803         strb    r1, [r0, #0x0b]
1804 #endif
1805         bx      lr
1806         LMEMCPY_C_PAD
1809  * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
1810  */
1811         ldrb    r2, [r1, #0x0b]         /* r2 = ...B */
1812         ldr     r3, [r1, #0x07]         /* BE:r3 = 789A  LE:r3 = A987 */
1813         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
1814         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
1815         strb    r2, [r0, #0x0b]
1816 #ifdef __ARMEB__
1817         strh    r3, [r0, #0x09]
1818         mov     r3, r3, lsr #16         /* r3 = ..78 */
1819         orr     r3, r3, ip, lsl #16     /* r3 = 5678 */
1820         mov     ip, ip, lsr #16         /* ip = ..34 */
1821         orr     ip, ip, r1, lsl #16     /* ip = 1234 */
1822         mov     r1, r1, lsr #16         /* r1 = ..x0 */
1823 #else
1824         mov     r2, r3, lsr #16         /* r2 = ..A9 */
1825         strh    r2, [r0, #0x09]
1826         mov     r3, r3, lsl #16         /* r3 = 87.. */
1827         orr     r3, r3, ip, lsr #16     /* r3 = 8765 */
1828         mov     ip, ip, lsl #16         /* ip = 43.. */
1829         orr     ip, ip, r1, lsr #16     /* ip = 4321 */
1830         mov     r1, r1, lsr #8          /* r1 = .210 */
1831 #endif
1832         str     r3, [r0, #0x05]
1833         str     ip, [r0, #0x01]
1834         strb    r1, [r0]
1835         bx      lr
1836         LMEMCPY_C_PAD
1839  * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
1840  */
1841 #ifdef __ARMEB__
1842         ldrh    r2, [r1, #0x0a]         /* r2 = ..AB */
1843         ldr     ip, [r1, #0x06]         /* ip = 6789 */
1844         ldr     r3, [r1, #0x02]         /* r3 = 2345 */
1845         ldrh    r1, [r1]                /* r1 = ..01 */
1846         strb    r2, [r0, #0x0b]
1847         mov     r2, r2, lsr #8          /* r2 = ...A */
1848         orr     r2, r2, ip, lsl #8      /* r2 = 789A */
1849         mov     ip, ip, lsr #8          /* ip = .678 */
1850         orr     ip, ip, r3, lsl #24     /* ip = 5678 */
1851         mov     r3, r3, lsr #8          /* r3 = .234 */
1852         orr     r3, r3, r1, lsl #24     /* r3 = 1234 */
1853         mov     r1, r1, lsr #8          /* r1 = ...0 */
1854         strb    r1, [r0]
1855         str     r3, [r0, #0x01]
1856         str     ip, [r0, #0x05]
1857         strh    r2, [r0, #0x09]
1858 #else
1859         ldrh    r2, [r1]                /* r2 = ..10 */
1860         ldr     r3, [r1, #0x02]         /* r3 = 5432 */
1861         ldr     ip, [r1, #0x06]         /* ip = 9876 */
1862         ldrh    r1, [r1, #0x0a]         /* r1 = ..BA */
1863         strb    r2, [r0]
1864         mov     r2, r2, lsr #8          /* r2 = ...1 */
1865         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
1866         mov     r3, r3, lsr #24         /* r3 = ...5 */
1867         orr     r3, r3, ip, lsl #8      /* r3 = 8765 */
1868         mov     ip, ip, lsr #24         /* ip = ...9 */
1869         orr     ip, ip, r1, lsl #8      /* ip = .BA9 */
1870         mov     r1, r1, lsr #8          /* r1 = ...B */
1871         str     r2, [r0, #0x01]
1872         str     r3, [r0, #0x05]
1873         strh    ip, [r0, #0x09]
1874         strb    r1, [r0, #0x0b]
1875 #endif
1876         bx      lr
1877         LMEMCPY_C_PAD
1880  * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
1881  */
1882         ldrb    r2, [r1]
1883         ldr     r3, [r1, #0x01]
1884         ldr     ip, [r1, #0x05]
1885         strb    r2, [r0]
1886         ldrh    r2, [r1, #0x09]
1887         ldrb    r1, [r1, #0x0b]
1888         str     r3, [r0, #0x01]
1889         str     ip, [r0, #0x05]
1890         strh    r2, [r0, #0x09]
1891         strb    r1, [r0, #0x0b]
1892         bx      lr
1893 #endif  /* !_STANDALONE */