2 * strncmp - compare two strings
4 * Copyright (c) 2013-2022, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
7 #if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED))
8 /* See strcmp-stub.c */
19 #define REP8_01 0x0101010101010101
20 #define REP8_7f 0x7f7f7f7f7f7f7f7f
22 /* Parameters and result. */
28 /* Internal variables. */
45 #define neg_offset x15
47 /* Define endian dependent shift operations.
48 On big-endian early bytes are at MSB and on little-endian LSB.
49 LS_FW means shifting towards early bytes.
50 LS_BK means shifting towards later bytes.
66 mov zeroones, #REP8_01
70 cbnz count, L(mutual_align)
72 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
73 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
74 can be done in parallel across the entire word. */
81 sub tmp1, data1, zeroones
82 orr tmp2, data1, #REP8_7f
83 eor diff, data1, data2 /* Non-zero if differences found. */
84 csinv endloop, diff, xzr, hi /* Last Dword or differences. */
85 bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
86 ccmp endloop, #0, #0, eq
88 /* End of main loop */
92 orr syndrome, diff, has_nul
93 add limit, limit, 8 /* Rewind limit to before last subs. */
95 /* Limit was reached. Check if the NUL byte or the difference
96 is before the limit. */
97 rev syndrome, syndrome
101 lsl data1, data1, pos
102 cmp limit, pos, lsr #3
103 lsl data2, data2, pos
104 /* But we need to zero-extend (char is unsigned) the value and then
105 perform a signed 32-bit subtraction. */
106 lsr data1, data1, #56
107 sub result, data1, data2, lsr #56
108 csel result, result, xzr, hi
111 /* Not reached the limit, must have found the end or a diff. */
112 tbz limit, #63, L(not_limit)
114 cbz limit, L(not_limit)
116 lsl limit, tmp1, #3 /* Bits -> bytes. */
118 lsr mask, mask, limit
119 bic data1, data1, mask
120 bic data2, data2, mask
122 /* Make sure that the NUL byte is marked in the syndrome. */
123 orr has_nul, has_nul, mask
126 /* For big-endian we cannot use the trick with the syndrome value
127 as carry-propagation can corrupt the upper bits if the trailing
128 bytes in the string contain 0x01. */
129 /* However, if there is no NUL byte in the dword, we can generate
130 the result directly. We can't just subtract the bytes as the
131 MSB might be significant. */
135 cneg result, result, lo
138 /* Re-compute the NUL-byte detection, using a byte-reversed value. */
140 sub tmp1, tmp3, zeroones
141 orr tmp2, tmp3, #REP8_7f
142 bic has_nul, tmp1, tmp2
144 orr syndrome, diff, has_nul
146 /* The most-significant-non-zero bit of the syndrome marks either the
147 first bit that is different, or the top bit of the first zero byte.
148 Shifting left now will bring the critical information into the
151 lsl data1, data1, pos
152 lsl data2, data2, pos
153 /* But we need to zero-extend (char is unsigned) the value and then
154 perform a signed 32-bit subtraction. */
155 lsr data1, data1, #56
156 sub result, data1, data2, lsr #56
161 /* Sources are mutually aligned, but are not currently at an
162 alignment boundary. Round down the addresses and then mask off
163 the bytes that precede the start point.
164 We also need to adjust the limit calculations, but without
165 overflowing if the limit is near ULONG_MAX. */
168 ldr data1, [src1], #8
169 neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */
170 ldr data2, [src2], #8
172 LS_FW tmp2, tmp2, tmp3 /* Shift (count & 63). */
173 /* Adjust the limit and ensure it doesn't overflow. */
174 adds limit, limit, count
175 csinv limit, limit, xzr, lo
176 orr data1, data1, tmp2
177 orr data2, data2, tmp2
181 /* Don't bother with dwords for up to 16 bytes. */
184 b.hs L(try_misaligned_words)
187 /* Perhaps we can do better than this. */
188 ldrb data1w, [src1], #1
189 ldrb data2w, [src2], #1
190 subs limit, limit, #1
191 ccmp data1w, #1, #0, hi /* NZCV = 0b0000. */
192 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
195 sub result, data1, data2
197 /* Align the SRC1 to a dword by doing a bytewise compare and then do
199 L(try_misaligned_words):
200 cbz count, L(src1_aligned)
204 sub limit, limit, count
207 ldrb data1w, [src1], #1
208 ldrb data2w, [src2], #1
210 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
212 subs count, count, #1
213 b.hi L(page_end_loop)
215 /* The following diagram explains the comparison of misaligned strings.
216 The bytes are shown in natural order. For little-endian, it is
217 reversed in the registers. The "x" bytes are before the string.
218 The "|" separates data that is loaded at one time.
219 src1 | a a a a a a a a | b b b c c c c c | . . .
220 src2 | x x x x x a a a a a a a a b b b | c c c c c . . .
222 After shifting in each step, the data looks like this:
224 data1 a a a a a a a a b b b c c c c c b b b c c c c c
225 data2 a a a a a a a a b b b 0 0 0 0 0 0 0 0 c c c c c
227 The bytes with "0" are eliminated from the syndrome via mask.
229 Align SRC2 down to 16 bytes. This way we can read 16 bytes at a
230 time from SRC2. The comparison happens in 3 steps. After each step
231 the loop can exit, or read from SRC1 or SRC2. */
233 /* Calculate offset from 8 byte alignment to string start in bits. No
234 need to mask offset since shifts are ignoring upper bits. */
238 neg neg_offset, offset
239 ldr data1, [src1], #8
240 ldp tmp1, tmp2, [src2], #16
241 LS_BK mask, mask, neg_offset
242 and neg_offset, neg_offset, #63 /* Need actual value for cmp later. */
243 /* Skip the first compare if data in tmp1 is irrelevant. */
244 tbnz offset, 6, L(misaligned_mid_loop)
247 /* STEP_A: Compare full 8 bytes when there is enough data from SRC2.*/
248 LS_FW data2, tmp1, offset
249 LS_BK tmp1, tmp2, neg_offset
250 subs limit, limit, #8
251 orr data2, data2, tmp1 /* 8 bytes from SRC2 combined from two regs.*/
252 sub has_nul, data1, zeroones
253 eor diff, data1, data2 /* Non-zero if differences found. */
254 orr tmp3, data1, #REP8_7f
255 csinv endloop, diff, xzr, hi /* If limit, set to all ones. */
256 bic has_nul, has_nul, tmp3 /* Non-zero if NUL byte found in SRC1. */
257 orr tmp3, endloop, has_nul
258 cbnz tmp3, L(full_check)
260 ldr data1, [src1], #8
261 L(misaligned_mid_loop):
262 /* STEP_B: Compare first part of data1 to second part of tmp2. */
263 LS_FW data2, tmp2, offset
265 /* For big-endian we do a byte reverse to avoid carry-propagation
266 problem described above. This way we can reuse the has_nul in the
267 next step and also use syndrome value trick at the end. */
269 #define data1_fixed tmp3
271 #define data1_fixed data1
273 sub has_nul, data1_fixed, zeroones
274 orr tmp3, data1_fixed, #REP8_7f
275 eor diff, data2, data1 /* Non-zero if differences found. */
276 bic has_nul, has_nul, tmp3 /* Non-zero if NUL terminator. */
280 cmp limit, neg_offset, lsr #3
281 orr syndrome, diff, has_nul
282 bic syndrome, syndrome, mask /* Ignore later bytes. */
283 csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */
284 cbnz tmp3, L(syndrome_check)
286 /* STEP_C: Compare second part of data1 to first part of tmp1. */
287 ldp tmp1, tmp2, [src2], #16
289 LS_BK data2, tmp1, neg_offset
290 eor diff, data2, data1 /* Non-zero if differences found. */
291 orr syndrome, diff, has_nul
292 and syndrome, syndrome, mask /* Ignore earlier bytes. */
293 csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */
294 cbnz tmp3, L(syndrome_check)
296 ldr data1, [src1], #8
303 cmp pos, limit, lsl #3