2 * memrchr - find last character in a memory zone.
4 * Copyright (c) 2020-2022, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
10 * ARMv8-a, AArch64, Advanced SIMD.
14 #if defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)
15 /* See memrchr-stub.c */
41 For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
42 per byte. We take 4 bits of every comparison byte with shift right and narrow
43 by 4 instruction. Since the bits in the nibble mask reflect the order in
44 which things occur in the original string, counting leading zeros identifies
45 exactly which byte matched. */
53 ld1 {vdata.16b}, [src]
54 dup vrepchr.16b, chrin
55 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
57 shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */
60 cbz synd, L(start_loop)
63 sub result, endm1, synd, lsr 2
64 cmp cntin, synd, lsr 2
65 csel result, result, xzr, hi
70 subs cntrem, src, srcin
73 /* Make sure that it won't overread by a 16-byte chunk */
75 tbz cntrem, 4, L(loop32_2)
80 ldr qdata, [src, -32]!
81 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
82 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
88 subs cntrem, cntrem, 32
89 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
91 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
97 shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */
105 sub tmp, tmp, synd, lsr 2
107 csel result, tmp, xzr, hs