2 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
9 #include <linux/linkage.h>
11 #ifdef __LITTLE_ENDIAN__
12 # define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
13 # define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
14 # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
15 # define MERGE_2(RX,RY,IMM)
16 # define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
17 # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
19 # define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
20 # define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
21 # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
22 # define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
23 # define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
24 # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
27 #ifdef CONFIG_ARC_HAS_LL64
28 # define PREFETCH_READ(RX) prefetch [RX, 56]
29 # define PREFETCH_WRITE(RX) prefetchw [RX, 64]
30 # define LOADX(DST,RX) ldd.ab DST, [RX, 8]
31 # define STOREX(SRC,RX) std.ab SRC, [RX, 8]
35 # define PREFETCH_READ(RX) prefetch [RX, 28]
36 # define PREFETCH_WRITE(RX) prefetchw [RX, 32]
37 # define LOADX(DST,RX) ld.ab DST, [RX, 4]
38 # define STOREX(SRC,RX) st.ab SRC, [RX, 4]
44 prefetch [r1] ; Prefetch the read location
45 prefetchw [r0] ; Prefetch the write location
49 mov r3, r0 ; don;t clobber ret val
58 lpnz @.Laligndestination
65 ;;; Check the alignment of the source
67 bnz.d @.Lsourceunaligned
69 ;;; CASE 0: Both source and destination are 32bit aligned
70 ;;; Convert len to Dwords, unfold x4
71 lsr.f lp_count, r2, ZOLSHFT
72 lpnz @.Lcopy32_64bytes
86 and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
88 lpnz @.Lcopyremainingbytes
99 beq.d @.LunalignedOffby2
102 bhi.d @.LunalignedOffby3
105 ;;; CASE 1: The source is unaligned, off by 1
106 ;; Hence I need to read 1 byte for a 16bit alignment
107 ;; and 2bytes to reach 32bit alignment
110 ;; Convert to words, unfold x2
111 lsr.f lp_count, r2, 3
116 ;; Both src and dst are aligned
120 prefetch [r1, 28] ;Prefetch the next read location
122 prefetchw [r3, 32] ;Prefetch the next write location
136 ;; Write back the remaining 16bits
137 EXTRACT_1 (r6, r5, 16)
139 ;; Write back the remaining 8bits
140 EXTRACT_2 (r5, r5, 16)
143 and.f lp_count, r2, 0x07 ;Last 8bytes
144 lpnz @.Lcopybytewise_1
152 ;;; CASE 2: The source is unaligned, off by 2
156 ;; Both src and dst are aligned
157 ;; Convert to words, unfold x2
158 lsr.f lp_count, r2, 3
159 #ifdef __BIG_ENDIAN__
165 prefetch [r1, 28] ;Prefetch the next read location
167 prefetchw [r3, 32] ;Prefetch the next write location
181 #ifdef __BIG_ENDIAN__
186 and.f lp_count, r2, 0x07 ;Last 8bytes
187 lpnz @.Lcopybytewise_2
195 ;;; CASE 3: The source is unaligned, off by 3
196 ;;; Hence, I need to read 1byte for achieve the 32bit alignment
198 ;; Both src and dst are aligned
199 ;; Convert to words, unfold x2
200 lsr.f lp_count, r2, 3
201 #ifdef __BIG_ENDIAN__
207 prefetch [r1, 28] ;Prefetch the next read location
209 prefetchw [r3, 32] ;Prefetch the next write location
223 #ifdef __BIG_ENDIAN__
228 and.f lp_count, r2, 0x07 ;Last 8bytes
229 lpnz @.Lcopybytewise_3