2 * strlen - calculate the length of a string
4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 #include "../asmdefs.h"
16 /* Arguments and results. */
20 /* Locals and temporaries. */
33 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
34 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
35 can be done in parallel across the entire word. A faster check
36 (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
37 false hits for characters 129..255. */
39 #define REP8_01 0x0101010101010101
40 #define REP8_7f 0x7f7f7f7f7f7f7f7f
42 /* This implementation is compatible with Memory Tagging. All loads
43 are 16 bytes in size and 16 bytes aligned. This also avoids the
44 need for page boundary checks. This implementation is correct
45 even without Memory Tagging, but other implementations could be
46 more beneficial if Memory Tagging is not enabled.
48 First load is aligned down and can contain bytes that are located
49 before the string. This is handled by modifying the "zeroones"
50 mask. The bytes that need to be ignored are set to zero.
51 If the string is aligned in such a way that 8 or more bytes from
52 the first load should be ignored, there is a special case
53 (skip_first_8_bytes) which only compares the second 8 bytes.
55 If there is a NUL byte in the first load, we calculate the length
56 from the 2 8-byte words using conditional select to reduce branch
59 If the string is longer than 16 bytes, we check 32 bytes per
60 iteration using the fast NUL check (main_loop). If we encounter
61 non-ASCII characters, we fallback to a second loop
62 (nonascii_loop) using the full NUL check. */
64 ENTRY(__strlen_aarch64_mte)
65 bic src, srcin, 15 /* Align down to 16 bytes. */
67 /* (offset & 63) holds number of bits to ignore in a register.*/
69 ldp data1, data2, [src], -16
70 lsl tmp1, zeroones, offset /* Shift (offset & 63). */
72 /* For big-endian, carry propagation (if the final byte in the
73 string is 0x01) means we cannot use has_nul1/2 directly.
74 e.g. 0x0100 - 0x0101 = 0xffff, so 0x01 will be mistaken for NUL.
75 Since we expect strings to be small and early-exit,
76 byte-swap the data now so has_null1/2 will be correct. */
80 tbnz srcin, 3, L(skip_first_8_bytes)
82 orr tmp2, data1, REP8_7f
83 sub tmp3, data2, zeroones
84 orr tmp4, data2, REP8_7f
85 bics has_nul1, tmp1, tmp2
86 bic has_nul2, tmp3, tmp4
87 /* If comparison happens, C flag is always set. */
88 ccmp has_nul2, 0, 0, eq
91 /* Enter with C = has_nul1 == 0. */
92 csel has_nul1, has_nul1, has_nul2, cc
93 and tmp2, srcin, 7 /* Bytes to ignore. */
94 rev has_nul1, has_nul1
96 clz tmp1, has_nul1 /* Count bits before NUL. */
97 /* Add 8 if NUL byte is not in first register. */
99 csel len, tmp2, tmp3, cc
100 add len, len, tmp1, lsr 3
103 L(skip_first_8_bytes):
104 sub tmp1, data2, tmp1
105 orr tmp2, data2, REP8_7f
106 bics has_nul1, tmp1, tmp2
109 rev has_nul1, has_nul1
110 lsl tmp1, has_nul1, offset /* Ignore bytes before string. */
111 clz tmp1, tmp1 /* Count bits before NUL. */
115 /* The inner loop processes 32 bytes per iteration and uses the fast
116 NUL check. If we encounter non-ASCII characters, use a second
117 loop with the accurate NUL check. */
120 ldp data1, data2, [src, 32]!
121 sub tmp1, data1, zeroones
122 sub tmp3, data2, zeroones
124 tst tmp2, zeroones, lsl 7
126 ldp data1, data2, [src, 16]
127 sub tmp1, data1, zeroones
128 sub tmp3, data2, zeroones
130 tst tmp2, zeroones, lsl 7
134 /* The fast check failed, so do the slower, accurate NUL check. */
135 orr tmp2, data1, REP8_7f
136 orr tmp4, data2, REP8_7f
137 bics has_nul1, tmp1, tmp2
138 bic has_nul2, tmp3, tmp4
139 ccmp has_nul2, 0, 0, eq
142 /* Enter with C = has_nul1 == 0. */
145 /* For big-endian, carry propagation (if the final byte in the
146 string is 0x01) means we cannot use has_nul1/2 directly. The
147 easiest way to get the correct byte is to byte-swap the data
148 and calculate the syndrome a second time. */
149 csel data1, data1, data2, cc
151 sub tmp1, data1, zeroones
152 orr tmp2, data1, REP8_7f
153 bic has_nul1, tmp1, tmp2
155 csel has_nul1, has_nul1, has_nul2, cc
158 rev has_nul1, has_nul1
161 csel len, len, tmp2, cc
162 add len, len, tmp1, lsr 3
166 ldp data1, data2, [src, 16]!
167 sub tmp1, data1, zeroones
168 orr tmp2, data1, REP8_7f
169 sub tmp3, data2, zeroones
170 orr tmp4, data2, REP8_7f
171 bics has_nul1, tmp1, tmp2
172 bic has_nul2, tmp3, tmp4
173 ccmp has_nul2, 0, 0, eq
175 ldp data1, data2, [src, 16]!
176 sub tmp1, data1, zeroones
177 orr tmp2, data1, REP8_7f
178 sub tmp3, data2, zeroones
179 orr tmp4, data2, REP8_7f
180 bics has_nul1, tmp1, tmp2
181 bic has_nul2, tmp3, tmp4
182 ccmp has_nul2, 0, 0, eq
186 END(__strlen_aarch64_mte)