Automatic merge of rsync://rsync.kernel.org/pub/scm/linux/kernel/git/gregkh/driver...
[linux-2.6/verdex.git] / arch / sh64 / lib / memcpy.c
blobc785d0aa194dbc731f5189541cc156d4cb071106
1 /*
2 * Copyright (C) 2002 Mark Debbage (Mark.Debbage@superh.com)
4 * May be copied or modified under the terms of the GNU General Public
5 * License. See linux/COPYING for more information.
7 */
9 #include <linux/config.h>
10 #include <linux/types.h>
11 #include <asm/string.h>
13 // This is a simplistic optimization of memcpy to increase the
14 // granularity of access beyond one byte using aligned
15 // loads and stores. This is not an optimal implementation
16 // for SH-5 (especially with regard to prefetching and the cache),
17 // and a better version should be provided later ...
19 void *memcpy(void *dest, const void *src, size_t count)
21 char *d = (char *) dest, *s = (char *) src;
23 if (count >= 32) {
24 int i = 8 - (((unsigned long) d) & 0x7);
26 if (i != 8)
27 while (i-- && count--) {
28 *d++ = *s++;
31 if (((((unsigned long) d) & 0x7) == 0) &&
32 ((((unsigned long) s) & 0x7) == 0)) {
33 while (count >= 32) {
34 unsigned long long t1, t2, t3, t4;
35 t1 = *(unsigned long long *) (s);
36 t2 = *(unsigned long long *) (s + 8);
37 t3 = *(unsigned long long *) (s + 16);
38 t4 = *(unsigned long long *) (s + 24);
39 *(unsigned long long *) (d) = t1;
40 *(unsigned long long *) (d + 8) = t2;
41 *(unsigned long long *) (d + 16) = t3;
42 *(unsigned long long *) (d + 24) = t4;
43 d += 32;
44 s += 32;
45 count -= 32;
47 while (count >= 8) {
48 *(unsigned long long *) d =
49 *(unsigned long long *) s;
50 d += 8;
51 s += 8;
52 count -= 8;
56 if (((((unsigned long) d) & 0x3) == 0) &&
57 ((((unsigned long) s) & 0x3) == 0)) {
58 while (count >= 4) {
59 *(unsigned long *) d = *(unsigned long *) s;
60 d += 4;
61 s += 4;
62 count -= 4;
66 if (((((unsigned long) d) & 0x1) == 0) &&
67 ((((unsigned long) s) & 0x1) == 0)) {
68 while (count >= 2) {
69 *(unsigned short *) d = *(unsigned short *) s;
70 d += 2;
71 s += 2;
72 count -= 2;
77 while (count--) {
78 *d++ = *s++;
81 return d;