4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9 #if __ARM_ARCH >= 7 && __ARM_ARCH_ISA_ARM >= 1
11 /* Implementation of strcmp for ARMv7 when DSP instructions are
12 available. Use ldrd to support wider loads, provided the data
13 is sufficiently aligned. Use saturating arithmetic to optimize
16 #include "../asmdefs.h"
19 STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first
20 byte in the string. If comparing completely random strings
21 the pre-check will save time, since there is a very high
22 probability of a mismatch in the first character: we save
23 significant overhead if this is the common case. However,
24 if strings are likely to be identical (eg because we're
25 verifying a hit in a hash table), then this check is largely
28 #define STRCMP_NO_PRECHECK 0
30 /* This version uses Thumb-2 code. */
34 #ifdef __ARM_BIG_ENDIAN
38 #define MSB 0x000000ff
39 #define LSB 0xff000000
40 #define BYTE0_OFFSET 24
41 #define BYTE1_OFFSET 16
42 #define BYTE2_OFFSET 8
43 #define BYTE3_OFFSET 0
44 #else /* not __ARM_BIG_ENDIAN */
48 #define BYTE0_OFFSET 0
49 #define BYTE1_OFFSET 8
50 #define BYTE2_OFFSET 16
51 #define BYTE3_OFFSET 24
52 #define MSB 0xff000000
53 #define LSB 0x000000ff
54 #endif /* not __ARM_BIG_ENDIAN */
56 /* Parameters and result. */
59 #define result r0 /* Overlaps src1. */
61 /* Internal variables. */
66 /* Additional internal variables for 64-bit aligned data. */
71 #define syndrome_a tmp1
72 #define syndrome_b tmp2
74 /* Additional internal variables for 32-bit aligned data. */
80 /* Macro to compute and return the result value for word-aligned
82 .macro strcmp_epilogue_aligned synd d1 d2 restore_r6
83 #ifdef __ARM_BIG_ENDIAN
84 /* If data1 contains a zero byte, then syndrome will contain a 1 in
85 bit 7 of that byte. Otherwise, the highest set bit in the
86 syndrome will highlight the first different bit. It is therefore
87 sufficient to extract the eight bits starting with the syndrome
99 ldrd r4, r5, [sp], #16
102 sub result, result, r1, lsr #24
105 /* To use the big-endian trick we'd have to reverse all three words.
106 that's slower than this approach. */
113 ldrd r6, r7, [sp, #8]
118 and result, \d1, #255
120 ldrd r4, r5, [sp], #16
123 sub result, result, r1
131 L(strcmp_start_addr):
132 #if STRCMP_NO_PRECHECK == 0
138 ENTRY_ALIGN (__strcmp_arm, 0)
139 #if STRCMP_NO_PRECHECK == 0
147 strd r4, r5, [sp, #-16]!
148 .cfi_def_cfa_offset 16
152 strd r6, r7, [sp, #8]
157 cbz r2, L(loop_aligned8)
164 /* Deal with mutual misalignment by aligning downwards and then
165 masking off the unwanted loaded data to prevent a difference. */
170 lsl tmp2, tmp2, #3 /* Bytes -> bits. */
171 ldrd data1a, data1b, [src1], #16
173 ldrd data2a, data2b, [src2], #16
174 /* In thumb code we can't use MVN with a register shift, but
176 S2HI tmp1, const_m1, tmp2
177 orn data1a, data1a, tmp1
178 orn data2a, data2a, tmp1
179 beq L(start_realigned8)
180 orn data1b, data1b, tmp1
182 orn data2b, data2b, tmp1
184 b L(start_realigned8)
186 /* Unwind the inner loop by a factor of 2, giving 16 bytes per
188 .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */
189 .p2align 2 /* Always word aligned. */
191 ldrd data1a, data1b, [src1], #16
192 ldrd data2a, data2b, [src2], #16
194 uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
195 eor syndrome_a, data1a, data2a
196 sel syndrome_a, syndrome_a, const_m1
197 cbnz syndrome_a, L(diff_in_a)
198 uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */
199 eor syndrome_b, data1b, data2b
200 sel syndrome_b, syndrome_b, const_m1
201 cbnz syndrome_b, L(diff_in_b)
203 ldrd data1a, data1b, [src1, #-8]
204 ldrd data2a, data2b, [src2, #-8]
205 uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
206 eor syndrome_a, data1a, data2a
207 sel syndrome_a, syndrome_a, const_m1
208 uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */
209 eor syndrome_b, data1b, data2b
210 sel syndrome_b, syndrome_b, const_m1
211 /* Can't use CBZ for backwards branch. */
212 orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */
216 cbnz syndrome_a, L(diff_in_a)
219 strcmp_epilogue_aligned syndrome_b, data1b, data2b 1
223 strcmp_epilogue_aligned syndrome_a, data1a, data2a 1
232 /* Unrolled by a factor of 2, to reduce the number of post-increment
235 ldr data1, [src1], #8
236 ldr data2, [src2], #8
238 uadd8 syndrome, data1, const_m1 /* Only need GE bits. */
239 eor syndrome, data1, data2
240 sel syndrome, syndrome, const_m1
241 cbnz syndrome, L(aligned4_done)
242 ldr data1, [src1, #-4]
243 ldr data2, [src2, #-4]
244 uadd8 syndrome, data1, const_m1
245 eor syndrome, data1, data2
246 sel syndrome, syndrome, const_m1
251 strcmp_epilogue_aligned syndrome, data1, data2, 0
255 /* Deal with mutual misalignment by aligning downwards and then
256 masking off the unwanted loaded data to prevent a difference. */
257 lsl tmp1, tmp1, #3 /* Bytes -> bits. */
259 ldr data1, [src1], #8
261 ldr data2, [src2], #8
263 /* In thumb code we can't use MVN with a register shift, but
265 S2HI tmp1, const_m1, tmp1
266 orn data1, data1, tmp1
267 orn data2, data2, tmp1
268 b L(start_realigned4)
276 ldr data1, [src1], #4
280 #if STRCMP_NO_PRECHECK == 1
281 ldrb data2, [src2, #1]
282 uxtb tmp1, data1, ror #BYTE1_OFFSET
283 subs tmp1, tmp1, data2
284 bne L(misaligned_exit)
285 cbz data2, L(misaligned_exit)
288 ldrb data2, [src2, #2]
289 uxtb tmp1, data1, ror #BYTE2_OFFSET
290 subs tmp1, tmp1, data2
291 bne L(misaligned_exit)
292 cbz data2, L(misaligned_exit)
295 ldrb data2, [src2, #3]
296 uxtb tmp1, data1, ror #BYTE3_OFFSET
297 subs tmp1, tmp1, data2
298 bne L(misaligned_exit)
300 cbnz data2, L(src1_aligned)
301 #else /* STRCMP_NO_PRECHECK */
302 /* If we've done the pre-check, then we don't need to check the
303 first byte again here. */
304 ldrb data2, [src2, #2]
305 uxtb tmp1, data1, ror #BYTE2_OFFSET
306 subs tmp1, tmp1, data2
307 bne L(misaligned_exit)
308 cbz data2, L(misaligned_exit)
311 ldrb data2, [src2, #3]
312 uxtb tmp1, data1, ror #BYTE3_OFFSET
313 subs tmp1, tmp1, data2
314 bne L(misaligned_exit)
315 cbnz data2, L(aligned_m1)
325 #if STRCMP_NO_PRECHECK == 0
331 /* src1 is word aligned, but src2 has no common alignment
333 ldr data1, [src1], #4
334 lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */
337 ldr data2, [src2], #4
338 bhi L(overlap1) /* C=1, Z=0 => src2[1:0] = 0b11. */
339 bcs L(overlap2) /* C=1, Z=1 => src2[1:0] = 0b10. */
341 /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */
343 bic tmp1, data1, #MSB
344 uadd8 syndrome, data1, const_m1
345 eors syndrome, tmp1, data2, S2LO #8
346 sel syndrome, syndrome, const_m1
349 ldr data2, [src2], #4
350 eor tmp1, tmp1, data1
351 cmp tmp1, data2, S2HI #24
353 ldr data1, [src1], #4
356 S2LO data2, data2, #8
360 bics syndrome, syndrome, #MSB
361 bne L(strcmp_done_equal)
363 /* We can only get here if the MSB of data1 contains 0, so
364 fast-path the exit. */
367 ldrd r4, r5, [sp], #16
370 /* R6/7 Not used in this sequence. */
378 S2LO data1, data1, #24
379 and data2, data2, #LSB
382 .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */
384 and tmp1, data1, const_m1, S2LO #16
385 uadd8 syndrome, data1, const_m1
386 eors syndrome, tmp1, data2, S2LO #16
387 sel syndrome, syndrome, const_m1
390 ldr data2, [src2], #4
391 eor tmp1, tmp1, data1
392 cmp tmp1, data2, S2HI #16
394 ldr data1, [src1], #4
397 S2LO data2, data2, #16
400 ands syndrome, syndrome, const_m1, S2LO #16
401 bne L(strcmp_done_equal)
404 S2LO data1, data1, #16
405 #ifdef __ARM_BIG_ENDIAN
406 lsl data2, data2, #16
411 S2LO data1, data1, #16
412 and data2, data2, const_m1, S2LO #16
415 .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */
417 and tmp1, data1, #LSB
418 uadd8 syndrome, data1, const_m1
419 eors syndrome, tmp1, data2, S2LO #24
420 sel syndrome, syndrome, const_m1
423 ldr data2, [src2], #4
424 eor tmp1, tmp1, data1
425 cmp tmp1, data2, S2HI #8
427 ldr data1, [src1], #4
430 S2LO data2, data2, #24
434 bne L(strcmp_done_equal)
437 S2LO data1, data1, #8
438 bic data2, data2, #MSB
441 L(strcmp_done_equal):
444 ldrd r4, r5, [sp], #16
447 /* R6/7 not used in this sequence. */
454 #ifndef __ARM_BIG_ENDIAN
457 /* Now everything looks big-endian... */
459 uadd8 tmp1, data1, const_m1
460 eor tmp1, data1, data2
461 sel syndrome, tmp1, const_m1
463 lsl data1, data1, tmp1
464 lsl data2, data2, tmp1
465 lsr result, data1, #24
466 ldrd r4, r5, [sp], #16
469 /* R6/7 not used in this sequence. */
472 sub result, result, data2, lsr #24
477 #endif /* __ARM_ARCH >= 7 && __ARM_ARCH_ISA_ARM >= 1 */