2 * Copyright (c) 2012-2014 ARM Ltd
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. The name of the company may not be used to endorse or promote
14 * products derived from this software without specific prior written
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /* Implementation of strcmp for ARMv6. Use ldrd to support wider
30 loads, provided the data is sufficiently aligned. Use
31 saturating arithmetic to optimize the compares. */
34 STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first
35 byte in the string. If comparing completely random strings
36 the pre-check will save time, since there is a very high
37 probability of a mismatch in the first character: we save
38 significant overhead if this is the common case. However,
39 if strings are likely to be identical (eg because we're
40 verifying a hit in a hash table), then this check is largely
45 /* Parameters and result. */
48 #define result r0 /* Overlaps src1. */
50 /* Internal variables. */
55 /* Additional internal variables for 64-bit aligned data. */
60 #define syndrome_a tmp1
61 #define syndrome_b tmp2
63 /* Additional internal variables for 32-bit aligned data. */
69 /* Macro to compute and return the result value for word-aligned
71 .macro strcmp_epilogue_aligned synd d1 d2 restore_r6
72 #ifdef __ARM_BIG_ENDIAN
73 /* If data1 contains a zero byte, then syndrome will contain a 1 in
74 bit 7 of that byte. Otherwise, the highest set bit in the
75 syndrome will highlight the first different bit. It is therefore
76 sufficient to extract the eight bits starting with the syndrome
88 ldrd r4, r5, [sp], #16
91 sub result, result, r1, lsr #24
94 /* To use the big-endian trick we'd have to reverse all three words.
95 that's slower than this approach. */
102 ldrd r6, r7, [sp, #8]
107 and result, \d1, #255
109 ldrd r4, r5, [sp], #16
112 sub result, result, r1
121 #ifndef STRCMP_NO_PRECHECK
127 #ifndef STRCMP_NO_PRECHECK
134 .cfi_sections .debug_frame
136 strd r4, r5, [sp, #-16]!
137 .cfi_def_cfa_offset 16
141 strd r6, r7, [sp, #8]
153 /* Deal with mutual misalignment by aligning downwards and then
154 masking off the unwanted loaded data to prevent a difference. */
159 lsl tmp2, tmp2, #3 /* Bytes -> bits. */
160 ldrd data1a, data1b, [src1], #16
162 ldrd data2a, data2b, [src2], #16
163 /* In ARM code we can't use ORN, but with do have MVN with a
165 mvn tmp1, const_m1, S2HI tmp2
166 orr data1a, data1a, tmp1
167 orr data2a, data2a, tmp1
168 beq .Lstart_realigned8
169 orr data1b, data1b, tmp1
171 orr data2b, data2b, tmp1
175 /* Unwind the inner loop by a factor of 2, giving 16 bytes per
177 .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */
178 .p2align 2 /* Always word aligned. */
180 ldrd data1a, data1b, [src1], #16
181 ldrd data2a, data2b, [src2], #16
183 uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
184 eor syndrome_a, data1a, data2a
185 sel syndrome_a, syndrome_a, const_m1
186 uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */
187 eor syndrome_b, data1b, data2b
188 sel syndrome_b, syndrome_b, const_m1
189 orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */
192 ldrd data1a, data1b, [src1, #-8]
193 ldrd data2a, data2b, [src2, #-8]
194 uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
195 eor syndrome_a, data1a, data2a
196 sel syndrome_a, syndrome_a, const_m1
197 uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */
198 eor syndrome_b, data1b, data2b
199 sel syndrome_b, syndrome_b, const_m1
200 orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */
208 strcmp_epilogue_aligned syndrome_b, data1b, data2b 1
212 strcmp_epilogue_aligned syndrome_a, data1a, data2a 1
221 /* Unrolled by a factor of 2, to reduce the number of post-increment
224 ldr data1, [src1], #8
225 ldr data2, [src2], #8
227 uadd8 syndrome, data1, const_m1 /* Only need GE bits. */
228 eor syndrome, data1, data2
229 sel syndrome, syndrome, const_m1
233 ldr data1, [src1, #-4]
234 ldr data2, [src2, #-4]
235 uadd8 syndrome, data1, const_m1
236 eor syndrome, data1, data2
237 sel syndrome, syndrome, const_m1
242 strcmp_epilogue_aligned syndrome, data1, data2, 0
246 /* Deal with mutual misalignment by aligning downwards and then
247 masking off the unwanted loaded data to prevent a difference. */
248 lsl tmp1, tmp1, #3 /* Bytes -> bits. */
250 ldr data1, [src1], #8
252 ldr data2, [src2], #8
254 /* In ARM code we can't use ORN, but with do have MVN with a
256 mvn tmp1, const_m1, S2HI tmp1
257 orr data1, data1, tmp1
258 orr data2, data2, tmp1
267 ldr data1, [src1], #4
271 #ifdef STRCMP_NO_PRECHECK
272 ldrb data2, [src2, #1]
273 uxtb tmp1, data1, ror #BYTE1_OFFSET
276 bne .Lmisaligned_exit
279 ldrb data2, [src2, #2]
280 uxtb tmp1, data1, ror #BYTE2_OFFSET
283 bne .Lmisaligned_exit
286 ldrb data2, [src2, #3]
287 uxtb tmp1, data1, ror #BYTE3_OFFSET
292 #else /* STRCMP_NO_PRECHECK */
293 /* If we've done the pre-check, then we don't need to check the
294 first byte again here. */
295 ldrb data2, [src2, #2]
296 uxtb tmp1, data1, ror #BYTE2_OFFSET
299 bne .Lmisaligned_exit
302 ldrb data2, [src2, #3]
303 uxtb tmp1, data1, ror #BYTE3_OFFSET
311 sub result, tmp1, data2
316 #ifndef STRCMP_NO_PRECHECK
322 /* src1 is word aligned, but src2 has no common alignment
324 ldr data1, [src1], #4
325 lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */
328 ldr data2, [src2], #4
329 bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */
330 bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */
332 /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */
334 bic tmp1, data1, #MSB
335 uadd8 syndrome, data1, const_m1
336 eors syndrome, tmp1, data2, S2LO #8
337 sel syndrome, syndrome, const_m1
340 ldreq data2, [src2], #4
343 eor tmp1, tmp1, data1
344 cmp tmp1, data2, S2HI #24
346 ldr data1, [src1], #4
349 S2LO data2, data2, #8
353 bics syndrome, syndrome, #MSB
354 bne .Lstrcmp_done_equal
356 /* We can only get here if the MSB of data1 contains 0, so
357 fast-path the exit. */
360 ldrd r4, r5, [sp], #16
363 /* R6/7 Not used in this sequence. */
371 S2LO data1, data1, #24
372 and data2, data2, #LSB
375 .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */
377 and tmp1, data1, const_m1, S2LO #16
378 uadd8 syndrome, data1, const_m1
379 eors syndrome, tmp1, data2, S2LO #16
380 sel syndrome, syndrome, const_m1
383 ldreq data2, [src2], #4
385 eor tmp1, tmp1, data1
386 cmp tmp1, data2, S2HI #16
388 ldr data1, [src1], #4
391 S2LO data2, data2, #16
394 ands syndrome, syndrome, const_m1, S2LO #16
395 bne .Lstrcmp_done_equal
398 S2LO data1, data1, #16
399 #ifdef __ARM_BIG_ENDIAN
400 lsl data2, data2, #16
405 S2LO data1, data1, #16
406 and data2, data2, const_m1, S2LO #16
409 .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */
411 and tmp1, data1, #LSB
412 uadd8 syndrome, data1, const_m1
413 eors syndrome, tmp1, data2, S2LO #24
414 sel syndrome, syndrome, const_m1
417 ldreq data2, [src2], #4
419 eor tmp1, tmp1, data1
420 cmp tmp1, data2, S2HI #8
422 ldr data1, [src1], #4
425 S2LO data2, data2, #24
429 bne .Lstrcmp_done_equal
432 S2LO data1, data1, #8
433 bic data2, data2, #MSB
439 ldrd r4, r5, [sp], #16
442 /* R6/7 not used in this sequence. */
449 #ifndef __ARM_BIG_ENDIAN
452 /* Now everything looks big-endian... */
454 uadd8 tmp1, data1, const_m1
455 eor tmp1, data1, data2
456 sel syndrome, tmp1, const_m1
458 lsl data1, data1, tmp1
459 lsl data2, data2, tmp1
460 lsr result, data1, #24
461 ldrd r4, r5, [sp], #16
464 /* R6/7 not used in this sequence. */
467 sub result, result, data2, lsr #24
470 .size strcmp, . - .Lstrcmp_start_addr