2 * memchr - find a character in a memory zone
4 * Copyright (c) 2014-2022, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
8 #if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED))
9 /* See memchr-stub.c */
19 /* Arguments and results. */
44 * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
45 * per byte. For each tuple, bit 0 is set if the relevant byte matched the
46 * requested character and bit 1 is not used (faster than using a 32bit
47 * syndrome). Since the bits in the syndrome reflect exactly the order in which
48 * things occur in the original string, counting trailing zeros allows to
49 * identify exactly which byte has matched.
55 /* Do not dereference srcin if no bytes to compare. */
56 cbz cntin, L(zero_length)
58 * Magic constant 0x40100401 allows us to identify which lane matches
62 movk wtmp2, #0x4010, lsl #16
63 dup vrepchr.16b, chrin
64 /* Work with aligned 32-byte chunks */
66 dup vrepmask.4s, wtmp2
68 and cntrem, cntin, #31
72 * Input string is not 32-byte aligned. We calculate the syndrome
73 * value for the aligned 32 bytes block containing the first bytes
74 * and mask the irrelevant part.
77 ld1 {vdata1.16b, vdata2.16b}, [src], #32
79 adds cntin, cntin, tmp
80 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
81 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
82 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
83 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
84 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
85 addp vend.16b, vend.16b, vend.16b /* 128->64 */
87 /* Clear the soff*2 lower bits */
91 /* The first block can also be the last */
93 /* Have we found something already? */
97 ld1 {vdata1.16b, vdata2.16b}, [src], #32
98 subs cntin, cntin, #32
99 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
100 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
101 /* If we're out of data we finish regardless of the result */
103 /* Use a fast check for the termination condition */
104 orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
105 addp vend.2d, vend.2d, vend.2d
107 /* We're not out of data, loop if we haven't found the character */
111 /* Termination condition found, let's calculate the syndrome value */
112 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
113 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
114 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
115 addp vend.16b, vend.16b, vend.16b /* 128->64 */
117 /* Only do the clear for the last possible block */
121 /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
122 add tmp, cntrem, soff
130 /* Count the trailing zeros using bit reversing */
132 /* Compensate the last post-increment */
134 /* Check that we have found a character */
136 /* And count the leading zeros */
138 /* Compute the potential result */
139 add result, src, synd, lsr #1
140 /* Select result or NULL */
141 csel result, xzr, result, eq