2 Copyright (c) 2015-2024, Synopsys, Inc. All rights reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
7 1) Redistributions of source code must retain the above copyright notice,
8 this list of conditions and the following disclaimer.
10 2) Redistributions in binary form must reproduce the above copyright notice,
11 this list of conditions and the following disclaimer in the documentation
12 and/or other materials provided with the distribution.
14 3) Neither the name of the Synopsys, Inc., nor the names of its contributors
15 may be used to endorse or promote products derived from this software
16 without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 POSSIBILITY OF SUCH DAMAGE.
31 /* This implementation is optimized for performance. For code size a generic
32 implementation of this function from newlib/libc/string/memcpy.c will be
34 #if !defined (__OPTIMIZE_SIZE__) && !defined (PREFER_SIZE_OVER_SPEED) \
35 && !defined (__ARC_RF16__)
39 #if defined (__ARCHS__)
41 #ifdef __LITTLE_ENDIAN__
42 # define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
43 # define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
44 # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
45 # define MERGE_2(RX,RY,IMM)
46 # define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
47 # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
49 # define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
50 # define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
51 # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
52 # define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
53 # define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
54 # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
58 # define PREFETCH_READ(RX) prefetch [RX, 56]
59 # define PREFETCH_WRITE(RX) prefetchw [RX, 64]
60 # define LOADX(DST,RX) ldd.ab DST, [RX, 8]
61 # define STOREX(SRC,RX) std.ab SRC, [RX, 8]
65 # define PREFETCH_READ(RX) prefetch [RX, 28]
66 # define PREFETCH_WRITE(RX) prefetchw [RX, 32]
67 # define LOADX(DST,RX) ld.ab DST, [RX, 4]
68 # define STOREX(SRC,RX) st.ab SRC, [RX, 4]
74 ;;; MEMCPY copy memory regions
76 ;;; r0 - output memory region
77 ;;; r1 - input memory region
78 ;;; r2 - size in bytes
80 ;;; r0 - pointer to the first byte of the output region
82 ;;; r1, r2, r3, r4, r5, r6, r8r9, r10r11, lp_count
84 #if !defined (__ARC_UNALIGNED__)
86 ;;; MEMCPY routine for the case when the CPU only accepts ALIGNED
87 ;;; accesses to memory.
89 prefetch [r1] ; Prefetch the read location
90 prefetchw [r0] ; Prefetch the write location
94 mov r3, r0 ; don't clobber ret val
103 lpnz .Laligndestination
110 ; Check the alignment of the source
112 bnz.d .Lsourceunaligned
114 ; CASE 0: Both source and destination are 32bit aligned
115 ; Convert len to Dwords, unfold x4
116 lsr.f lp_count, r2, ZOLSHFT
117 lpnz .Lcopy32_64bytes
131 and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
133 lpnz .Lcopyremainingbytes
137 .Lcopyremainingbytes:
144 beq.d .LunalignedOffby2
147 bhi.d .LunalignedOffby3
150 ; CASE 1: The source is unaligned, off by 1
151 ; Hence I need to read 1 byte for a 16bit alignment
152 ; and 2bytes to reach 32bit alignment
155 ; Convert to words, unfold x2
156 lsr.f lp_count, r2, 3
161 ; Both src and dst are aligned
165 prefetch [r1, 28] ;Prefetch the next read location
167 prefetchw [r3, 32] ;Prefetch the next write location
181 ; Write back the remaining 16bits
182 EXTRACT_1 (r6, r5, 16)
184 ; Write back the remaining 8bits
185 EXTRACT_2 (r5, r5, 16)
188 and.f lp_count, r2, 0x07 ;Last 8bytes
189 lpnz .Lcopybytewise_1
197 ; CASE 2: The source is unaligned, off by 2
201 ; Both src and dst are aligned
202 ; Convert to words, unfold x2
203 lsr.f lp_count, r2, 3
204 #ifdef __BIG_ENDIAN__
210 prefetch [r1, 28] ;Prefetch the next read location
212 prefetchw [r3, 32] ;Prefetch the next write location
226 #ifdef __BIG_ENDIAN__
231 and.f lp_count, r2, 0x07 ;Last 8bytes
232 lpnz .Lcopybytewise_2
240 ; CASE 3: The source is unaligned, off by 3
241 ; Hence, I need to read 1byte for achieve the 32bit alignment
243 ; Both src and dst are aligned
244 ; Convert to words, unfold x2
245 lsr.f lp_count, r2, 3
246 #ifdef __BIG_ENDIAN__
252 prefetch [r1, 28] ;Prefetch the next read location
254 prefetchw [r3, 32] ;Prefetch the next write location
268 #ifdef __BIG_ENDIAN__
273 and.f lp_count, r2, 0x07 ;Last 8bytes
274 lpnz .Lcopybytewise_3
285 ;;; MEMCPY routine which is used by systems with unaligned memory
286 ;;; accesses. This is the case for most of ARCHS CPU family.
288 prefetch [r1] ; Prefetch the read location
289 prefetchw [r0] ; Prefetch the write location
293 mov r3, r0 ; don't clobber ret val
300 ;;; Convert len to Dwords, unfold x4
301 lsr.f lp_count, r2, ZOLSHFT
317 and r2, r2, ZOLAND ;Remaining 31 bytes
318 lsr.f lp_count, r2, 3 ;Convert to 64-bit words.
325 and.f lp_count, r2, 0x07 ; Last 7 bytes
327 and.f lp_count, r2, ZOLAND
331 lpnz .Lcopyremainingbytes
335 .Lcopyremainingbytes:
342 #endif /* __ARCHS__ */
344 #endif /* !__OPTIMIZE_SIZE__ && !PREFER_SIZE_OVER_SPEED */