2 * strcpy/stpcpy - copy a string returning pointer to start/end.
4 * Copyright (c) 2020-2023, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
7 #if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED))
8 /* See strchr-stub.c */
13 * ARMv8-a, AArch64, Advanced SIMD.
42 # define STRCPY stpcpy
43 # define IFSTPCPY(X,...) X,__VA_ARGS__
45 # define STRCPY strcpy
46 # define IFSTPCPY(X,...)
51 For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
52 per byte. We take 4 bits of every comparison byte with shift right and narrow
53 by 4 instruction. Since the bits in the nibble mask reflect the order in
54 which things occur in the original string, counting leading zeros identifies
55 exactly which byte matched. */
61 ld1 {vdata.16b}, [src]
62 cmeq vhas_nul.16b, vdata.16b, 0
64 shrn vend.8b, vhas_nul.8h, 4
70 cmeq vhas_nul.16b, vdata.16b, 0
71 shrn vend.8b, vhas_nul.8h, 4
73 cbz synd, L(start_loop)
80 add len, tmp, len, lsr 2
84 ldr dataq2, [srcin, tmp]
86 str dataq2, [dstin, tmp]
87 IFSTPCPY (add result, dstin, len)
98 ldr data2, [srcin, tmp]
100 str data2, [dstin, tmp]
101 IFSTPCPY (add result, dstin, len)
109 ldr dataw2, [srcin, tmp]
111 str dataw2, [dstin, tmp]
112 IFSTPCPY (add result, dstin, len)
120 strb wzr, [dstin, len]
121 IFSTPCPY (add result, dstin, len)
126 sub tmp, srcin, dstin
133 cmeq vhas_nul.16b, vdata.16b, 0
134 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
136 cbnz synd, L(loopend)
137 str dataq, [dst, -16]
138 ldr dataq, [src, 32]!
139 cmeq vhas_nul.16b, vdata.16b, 0
140 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
145 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
148 #ifndef __AARCH64EB__
154 ldr dataq, [dst, tmp]
156 IFSTPCPY (add result, dst, 15)