2 * strchrnul - find a character or nul in a string
4 * Copyright (c) 2014-2022, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
7 #if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED))
8 /* See strchrnul-stub.c */
19 /* Arguments and results. */
42 For each 32-byte hunk we calculate a 64-bit syndrome value, with
43 two bits per byte (LSB is always in bits 0 and 1, for both big
44 and little-endian systems). For each tuple, bit 0 is set iff
45 the relevant byte matched the requested character or nul. Since the
46 bits in the syndrome reflect exactly the order in which things occur
47 in the original string a count_trailing_zeros() operation will
48 identify exactly which byte is causing the termination. */
50 /* Locals and temporaries. */
54 /* Magic constant 0x40100401 to allow us to identify which lane
55 matches the termination condition. */
57 movk wtmp2, #0x4010, lsl #16
58 dup vrepchr.16b, chrin
59 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
60 dup vrepmask.4s, wtmp2
64 /* Input string is not 32-byte aligned. Rather than forcing
65 the padding bytes to a safe value, we calculate the syndrome
66 for all the bytes, but then mask off those bits of the
67 syndrome that are related to the padding. */
68 ld1 {vdata1.16b, vdata2.16b}, [src], #32
70 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
71 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
72 cmhs vhas_nul1.16b, vhas_chr1.16b, vdata1.16b
73 cmhs vhas_nul2.16b, vhas_chr2.16b, vdata2.16b
74 and vhas_chr1.16b, vhas_nul1.16b, vrepmask.16b
75 and vhas_chr2.16b, vhas_nul2.16b, vrepmask.16b
77 addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
79 addp vend1.16b, vend1.16b, vend1.16b // 128->64
83 bic tmp1, tmp3, tmp1 // Mask padding bits.
88 ld1 {vdata1.16b, vdata2.16b}, [src], #32
89 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
90 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
91 cmhs vhas_nul1.16b, vhas_chr1.16b, vdata1.16b
92 cmhs vhas_nul2.16b, vhas_chr2.16b, vdata2.16b
93 orr vend1.16b, vhas_nul1.16b, vhas_nul2.16b
94 umaxp vend1.16b, vend1.16b, vend1.16b
98 /* Termination condition found. Now need to establish exactly why
100 and vhas_chr1.16b, vhas_nul1.16b, vrepmask.16b
101 and vhas_chr2.16b, vhas_nul2.16b, vrepmask.16b
102 addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
103 addp vend1.16b, vend1.16b, vend1.16b // 128->64
107 /* Count the trailing zeros, by bit reversing... */
109 /* Re-bias source. */
111 clz tmp1, tmp1 /* ... and counting the leading zeros. */
112 /* tmp1 is twice the offset into the fragment. */
113 add result, src, tmp1, lsr #1