On Tue, Nov 06, 2007 at 02:33:53AM -0800, akpm@linux-foundation.org wrote:
[mmotm.git] / arch / blackfin / lib / memcpy.S
blobc31bf22aab190adcb35e187ec258b3833e4a6943
1 /*
2  * internal version of memcpy(), issued by the compiler to copy blocks of
3  * data around. This is really memmove() - it has to be able to deal with
4  * possible overlaps, because that ambiguity is when the compiler gives up
5  * and calls a function. We have our own, internal version so that we get
6  * something we trust, even if the user has redefined the normal symbol.
7  *
8  * Copyright 2004-2009 Analog Devices Inc.
9  *
10  * Licensed under the ADI BSD license or the GPL-2 (or later)
11  */
13 #include <linux/linkage.h>
15 /* void *memcpy(void *dest, const void *src, size_t n);
16  * R0 = To Address (dest) (leave unchanged to form result)
17  * R1 = From Address (src)
18  * R2 = count
19  *
20  * Note: Favours word alignment
21  */
23 #ifdef CONFIG_MEMCPY_L1
24 .section .l1.text
25 #else
26 .text
27 #endif
29 .align 2
31 ENTRY(_memcpy)
32         CC = R2 <=  0;  /* length not positive? */
33         IF CC JUMP .L_P1L2147483647;    /* Nothing to do */
35         P0 = R0 ;       /* dst*/
36         P1 = R1 ;       /* src*/
37         P2 = R2 ;       /* length */
39         /* check for overlapping data */
40         CC = R1 < R0;   /* src < dst */
41         IF !CC JUMP .Lno_overlap;
42         R3 = R1 + R2;
43         CC = R0 < R3;   /* and dst < src+len */
44         IF CC JUMP .Lhas_overlap;
46 .Lno_overlap:
47         /* Check for aligned data.*/
49         R3 = R1 | R0;
50         R1 = 0x3;
51         R3 = R3 & R1;
52         CC = R3;        /* low bits set on either address? */
53         IF CC JUMP .Lnot_aligned;
55         /* Both addresses are word-aligned, so we can copy
56         at least part of the data using word copies.*/
57         P2 = P2 >> 2;
58         CC = P2 <= 2;
59         IF !CC JUMP .Lmore_than_seven;
60         /* less than eight bytes... */
61         P2 = R2;
62         LSETUP(.Lthree_start, .Lthree_end) LC0=P2;
63 .Lthree_start:
64         R3 = B[P1++] (X);
65 .Lthree_end:
66         B[P0++] = R3;
68         RTS;
70 .Lmore_than_seven:
71         /* There's at least eight bytes to copy. */
72         P2 += -1;       /* because we unroll one iteration */
73         LSETUP(.Lword_loops, .Lword_loope) LC0=P2;
74         I1 = P1;
75         R3 = [I1++];
76 #if ANOMALY_05000202
77 .Lword_loops:
78         [P0++] = R3;
79 .Lword_loope:
80         R3 = [I1++];
81 #else
82 .Lword_loops:
83 .Lword_loope:
84         MNOP || [P0++] = R3 || R3 = [I1++];
85 #endif
86         [P0++] = R3;
87         /* Any remaining bytes to copy? */
88         R3 = 0x3;
89         R3 = R2 & R3;
90         CC = R3 == 0;
91         P1 = I1;        /* in case there's something left, */
92         IF !CC JUMP .Lbytes_left;
93         RTS;
94 .Lbytes_left:   P2 = R3;
95 .Lnot_aligned:
96         /* From here, we're copying byte-by-byte. */
97         LSETUP (.Lbyte_start, .Lbyte_end) LC0=P2;
98 .Lbyte_start:
99         R1 = B[P1++] (X);
100 .Lbyte_end:
101         B[P0++] = R1;
103 .L_P1L2147483647:
104         RTS;
106 .Lhas_overlap:
107         /* Need to reverse the copying, because the
108          * dst would clobber the src.
109          * Don't bother to work out alignment for
110          * the reverse case.
111          */
112         P0 = P0 + P2;
113         P0 += -1;
114         P1 = P1 + P2;
115         P1 += -1;
116         LSETUP(.Lover_start, .Lover_end) LC0=P2;
117 .Lover_start:
118         R1 = B[P1--] (X);
119 .Lover_end:
120         B[P0--] = R1;
122         RTS;
124 ENDPROC(_memcpy)