1 /* Copyright (C) 2011-2012 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <http://www.gnu.org/licenses/>. */
22 #include <arch/chip.h>
24 /* Must be 8 bytes in size. */
25 #define word_t uint64_t
27 /* How many cache lines ahead should we prefetch? */
28 #define PREFETCH_LINES_AHEAD 3
31 __memcpy (void *__restrict dstv
, const void *__restrict srcv
, size_t n
)
33 char *__restrict dst1
= (char *) dstv
;
34 const char *__restrict src1
= (const char *) srcv
;
35 const char *__restrict src1_end
;
36 const char *__restrict prefetch
;
37 word_t
*__restrict dst8
; /* 8-byte pointer to destination memory. */
38 word_t final
; /* Final bytes to write to trailing word, if any */
48 /* Locate the end of source memory we will copy. Don't prefetch
50 src1_end
= src1
+ n
- 1;
52 /* Prefetch ahead a few cache lines, but not past the end. */
54 for (i
= 0; i
< PREFETCH_LINES_AHEAD
; i
++)
56 __insn_prefetch (prefetch
);
57 prefetch
+= CHIP_L2_LINE_SIZE ();
58 prefetch
= (prefetch
> src1_end
) ? prefetch
: src1
;
61 /* Copy bytes until dst is word-aligned. */
62 for (; (uintptr_t) dst1
& (sizeof (word_t
) - 1); n
--)
65 /* 8-byte pointer to destination memory. */
66 dst8
= (word_t
*) dst1
;
68 if (__builtin_expect ((uintptr_t) src1
& (sizeof (word_t
) - 1), 0))
70 /* Misaligned copy. Copy 8 bytes at a time, but don't bother
72 TODO: Consider prefetching and using wh64 as well. */
74 /* Create an aligned src8. */
75 const word_t
*__restrict src8
=
76 (const word_t
*) ((uintptr_t) src1
& -sizeof (word_t
));
80 for (; n
>= sizeof (word_t
); n
-= sizeof (word_t
))
83 a
= __insn_dblalign (a
, b
, src1
);
91 b
= ((const char *) src8
<= src1_end
) ? *src8
: 0;
93 /* Final source bytes to write to trailing partial word, if any. */
94 final
= __insn_dblalign (a
, b
, src1
);
100 const word_t
*__restrict src8
= (const word_t
*) src1
;
102 /* src8 and dst8 are both word-aligned. */
103 if (n
>= CHIP_L2_LINE_SIZE ())
105 /* Copy until 'dst' is cache-line-aligned. */
106 for (; (uintptr_t) dst8
& (CHIP_L2_LINE_SIZE () - 1);
107 n
-= sizeof (word_t
))
110 /* If copying to self, return. The test is cheap enough
111 that we do it despite the fact that the memcpy() contract
112 doesn't require us to support overlapping dst and src.
113 This is the most common case of overlap, and any close
114 overlap will cause corruption due to the wh64 below.
115 This case is particularly important since the compiler
116 will emit memcpy() calls for aggregate copies even if it
117 can't prove that src != dst. */
118 if (__builtin_expect (dst8
== src8
, 0))
121 for (; n
>= CHIP_L2_LINE_SIZE ();)
125 /* Prefetch and advance to next line to prefetch, but
126 don't go past the end. */
127 __insn_prefetch (prefetch
);
128 prefetch
+= CHIP_L2_LINE_SIZE ();
129 prefetch
= (prefetch
> src1_end
) ? prefetch
:
132 /* Copy an entire cache line. Manually unrolled to
133 avoid idiosyncracies of compiler unrolling. */
134 #define COPY_WORD(offset) ({ dst8[offset] = src8[offset]; n -= 8; })
143 #if CHIP_L2_LINE_SIZE() != 64
144 # error "Fix code that assumes particular L2 cache line size."
147 dst8
+= CHIP_L2_LINE_SIZE () / sizeof (word_t
);
148 src8
+= CHIP_L2_LINE_SIZE () / sizeof (word_t
);
152 for (; n
>= sizeof (word_t
); n
-= sizeof (word_t
))
155 if (__builtin_expect (n
== 0, 1))
161 /* n != 0 if we get here. Write out any trailing bytes. */
162 dst1
= (char *) dst8
;
163 #ifndef __BIG_ENDIAN__
166 *(uint32_t *) dst1
= final
;
173 *(uint16_t *) dst1
= final
;
179 *(uint8_t *) dst1
= final
;
183 *(uint32_t *) dst1
= final
>> 32;
192 *(uint16_t *) dst1
= final
>> 16;
200 *(uint8_t *) dst1
= final
>> 8;
205 weak_alias (__memcpy
, memcpy
)
206 libc_hidden_builtin_def (memcpy
)