1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * linux/arch/arm/lib/csumpartialcopygeneric.S
5 * Copyright (C) 1995-2001 Russell King
7 #include <asm/assembler.h>
11 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
12 * r0 = src, r1 = dst, r2 = len, r3 = sum
13 * Returns : r0 = checksum
15 * Note that 'tst' and 'teq' preserve the carry flag.
27 * Align an unaligned destination pointer. We know that
28 * we have >= 8 bytes here, so we don't need to check
29 * the length. Note that the source pointer hasn't been
38 adcs sum, sum, ip, put_byte_1 @ update checksum
41 reteq lr @ dst is now 32bit aligned
43 .Ldst_16bit: load2b r8, ip
45 adcs sum, sum, r8, put_byte_0
47 adcs sum, sum, ip, put_byte_1
49 ret lr @ dst is now 32bit aligned
52 * Handle 0 to 7 bytes, with any alignment of source and
53 * destination pointers. Note that when we get here, C = 0
55 .Lless8: teq len, #0 @ check for zero count
58 /* we must have at least one byte. */
59 tst dst, #1 @ dst 16-bit aligned
65 adcs sum, sum, ip, put_byte_1 @ update checksum
72 adcs sum, sum, r8, put_byte_0
74 adcs sum, sum, ip, put_byte_1
83 adcs sum, sum, r8, put_byte_0 @ update checksum
90 cmp len, #8 @ Ensure that we have at least
91 blo .Lless8 @ 8 bytes to copy.
93 adds sum, sum, #0 @ C = 0
94 tst dst, #3 @ Test destination alignment
95 blne .Ldst_unaligned @ align destination, return here
98 * Ok, the dst pointer is now 32bit aligned, and we know
99 * that we must have more than 4 bytes to copy. Note
100 * that C contains the carry from the dst alignment above.
103 tst src, #3 @ Test source alignment
104 bne .Lsrc_not_aligned
106 /* Routine for src & dst aligned */
111 1: load4l r4, r5, r6, r7
112 stmia dst!, {r4, r5, r6, r7}
140 mov r5, r4, get_byte_0
142 adcs sum, sum, r4, lspush #16
144 mov r5, r4, get_byte_1
146 mov r5, r4, get_byte_2
150 adcsne sum, sum, r5, put_byte_0
153 * If the dst pointer was not 16-bit aligned, we
154 * need to rotate the checksum here to get around
155 * the inefficient byte manipulations in the
156 * architecture independent code.
158 .Ldone: adc r0, sum, #0
159 ldr sum, [sp, #0] @ dst
165 adc sum, sum, #0 @ include C from dst alignment
172 mov r4, r5, lspull #8 @ C = 0
175 1: load4l r5, r6, r7, r8
176 orr r4, r4, r5, lspush #24
177 mov r5, r5, lspull #8
178 orr r5, r5, r6, lspush #24
179 mov r6, r6, lspull #8
180 orr r6, r6, r7, lspush #24
181 mov r7, r7, lspull #8
182 orr r7, r7, r8, lspush #24
183 stmia dst!, {r4, r5, r6, r7}
188 mov r4, r8, lspull #8
197 orr r4, r4, r5, lspush #24
198 mov r5, r5, lspull #8
199 orr r5, r5, r6, lspush #24
203 mov r4, r6, lspull #8
207 orr r4, r4, r5, lspush #24
210 mov r4, r5, lspull #8
213 mov r5, r4, get_byte_0
216 adcs sum, sum, r4, lspush #16
218 mov r5, r4, get_byte_1
220 mov r5, r4, get_byte_2
223 .Lsrc2_aligned: mov r4, r5, lspull #16
227 1: load4l r5, r6, r7, r8
228 orr r4, r4, r5, lspush #16
229 mov r5, r5, lspull #16
230 orr r5, r5, r6, lspush #16
231 mov r6, r6, lspull #16
232 orr r6, r6, r7, lspush #16
233 mov r7, r7, lspull #16
234 orr r7, r7, r8, lspush #16
235 stmia dst!, {r4, r5, r6, r7}
240 mov r4, r8, lspull #16
249 orr r4, r4, r5, lspush #16
250 mov r5, r5, lspull #16
251 orr r5, r5, r6, lspush #16
255 mov r4, r6, lspull #16
259 orr r4, r4, r5, lspush #16
262 mov r4, r5, lspull #16
265 mov r5, r4, get_byte_0
270 mov r5, r4, get_byte_1
277 .Lsrc3_aligned: mov r4, r5, lspull #24
281 1: load4l r5, r6, r7, r8
282 orr r4, r4, r5, lspush #8
283 mov r5, r5, lspull #24
284 orr r5, r5, r6, lspush #8
285 mov r6, r6, lspull #24
286 orr r6, r6, r7, lspush #8
287 mov r7, r7, lspull #24
288 orr r7, r7, r8, lspush #8
289 stmia dst!, {r4, r5, r6, r7}
294 mov r4, r8, lspull #24
303 orr r4, r4, r5, lspush #8
304 mov r5, r5, lspull #24
305 orr r5, r5, r6, lspush #8
309 mov r4, r6, lspull #24
313 orr r4, r4, r5, lspush #8
316 mov r4, r5, lspull #24
319 mov r5, r4, get_byte_0
325 mov r5, r4, get_byte_0
327 adcs sum, sum, r4, lspush #24
328 mov r5, r4, get_byte_1