2 * strchrnul - find a character or nul in a string
4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
15 #include "../asmdefs.h"
17 /* Arguments and results. */
40 For each 32-byte hunk we calculate a 64-bit syndrome value, with
41 two bits per byte (LSB is always in bits 0 and 1, for both big
42 and little-endian systems). For each tuple, bit 0 is set iff
43 the relevant byte matched the requested character or nul. Since the
44 bits in the syndrome reflect exactly the order in which things occur
45 in the original string a count_trailing_zeros() operation will
46 identify exactly which byte is causing the termination. */
48 /* Locals and temporaries. */
50 ENTRY (__strchrnul_aarch64)
51 /* Magic constant 0x40100401 to allow us to identify which lane
52 matches the termination condition. */
54 movk wtmp2, #0x4010, lsl #16
55 dup vrepchr.16b, chrin
56 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
57 dup vrepmask.4s, wtmp2
61 /* Input string is not 32-byte aligned. Rather than forcing
62 the padding bytes to a safe value, we calculate the syndrome
63 for all the bytes, but then mask off those bits of the
64 syndrome that are related to the padding. */
65 ld1 {vdata1.16b, vdata2.16b}, [src], #32
67 cmeq vhas_nul1.16b, vdata1.16b, #0
68 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
69 cmeq vhas_nul2.16b, vdata2.16b, #0
70 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
71 orr vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b
72 orr vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b
73 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
74 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
76 addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
78 addp vend1.16b, vend1.16b, vend1.16b // 128->64
82 bic tmp1, tmp3, tmp1 // Mask padding bits.
86 ld1 {vdata1.16b, vdata2.16b}, [src], #32
87 cmeq vhas_nul1.16b, vdata1.16b, #0
88 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
89 cmeq vhas_nul2.16b, vdata2.16b, #0
90 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
91 /* Use a fast check for the termination condition. */
92 orr vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b
93 orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
94 orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b
95 addp vend1.2d, vend1.2d, vend1.2d
99 /* Termination condition found. Now need to establish exactly why
101 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
102 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
103 addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
104 addp vend1.16b, vend1.16b, vend1.16b // 128->64
108 /* Count the trailing zeros, by bit reversing... */
110 /* Re-bias source. */
112 clz tmp1, tmp1 /* ... and counting the leading zeros. */
113 /* tmp1 is twice the offset into the fragment. */
114 add result, src, tmp1, lsr #1
117 END (__strchrnul_aarch64)