2 * Copyright (c) 2012-2014 ARM Ltd
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. The name of the company may not be used to endorse or promote
14 * products derived from this software without specific prior written
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /* Very similar to the generic code, but uses Thumb2 as implemented
34 /* Parameters and result. */
37 #define result r0 /* Overlaps src1. */
39 /* Internal variables. */
44 #define syndrome r12 /* Overlaps tmp1 */
50 .cfi_sections .debug_frame
52 prologue push_ip=HAVE_PAC_LEAF
55 /* Strings not at same byte offset from a word boundary. */
56 bne .Lstrcmp_unaligned
62 ldreq data2, [src2], #4
64 /* Although s1 and s2 have identical initial alignment, they are
65 not currently word aligned. Rather than comparing bytes,
66 make sure that any bytes fetched from before the addressed
67 bytes are forced to 0xff. Then they will always compare
72 S2LO tmp1, data2, tmp1
74 orr data1, data1, tmp1
75 orr data2, data2, tmp1
79 sub syndrome, data1, #0x01010101
81 /* check for any zero bytes in first word */
83 biceq syndrome, syndrome, data1
84 tsteq syndrome, #0x80808080
85 ldreq data1, [src1], #4
86 ldreq data2, [src2], #4
90 /* There's a zero or a different byte in the word */
91 S2HI result, data1, #24
95 cmpcs result, data2, S2HI #24
97 S2LOEQ data2, data2, #8
99 /* On a big-endian machine, RESULT contains the desired byte in bits
100 0-7; on a little-endian machine they are in bits 24-31. In
101 both cases the other bits in RESULT are all zero. For DATA2 the
102 interesting byte is at the other end of the word, but the
103 other bits are not necessarily zero. We need a signed result
104 representing the differnece in the unsigned bytes, so for the
105 little-endian case we can't just shift the interesting bits
107 #ifdef __ARM_BIG_ENDIAN
108 sub result, result, data2, lsr #24
110 and data2, data2, #255
111 lsrs result, result, #24
112 subs result, result, data2
114 epilogue push_ip=HAVE_PAC_LEAF
118 /* The assembly code below is based on the following alogrithm. */
119 #ifdef __ARM_BIG_ENDIAN
127 #define body(shift) \
128 mask = 0xffffffffU RSHIFT shift; \
133 tmp2 = data1 & mask; \
134 if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0)) \
136 data2 RSHIFT= shift; \
139 if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0)) \
141 /* See comment in assembler below re syndrome on big-endian */\
142 if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask) \
143 data2 RSHIFT= shift; \
147 tmp2 = data1 RSHIFT (32 - shift); \
148 data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \
154 if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0)) \
156 tmp2 = data1 >> (32 - shift); \
157 data2 = (data2 << (32 - shift)) RSHIFT (32 - shift); \
163 const unsigned* src1;
164 const unsigned* src2;
165 unsigned data1, data2;
168 unsigned b1 = 0x01010101;
172 while (((unsigned) s1) & 3)
176 if (c1 == 0 || c1 != c2)
179 src1 = (unsigned*) (((unsigned)s1) & ~3);
180 src2 = (unsigned*) (((unsigned)s2) & ~3);
181 tmp2 = ((unsigned) s2) & 3;
197 #ifdef __ARM_BIG_ENDIAN
198 c1 = (char) tmp2 >> 24;
199 c2 = (char) data2 >> 24;
200 #else /* not __ARM_BIG_ENDIAN */
203 #endif /* not __ARM_BIG_ENDIAN */
206 } while (c1 != 0 && c1 == c2);
211 /* First of all, compare bytes until src1(sp1) is word-aligned. */
217 ldrb data1, [src1], #1
218 ldrb data2, [src2], #1
222 beq .Lstrcmp_unaligned
223 sub result, data1, data2
224 epilogue push_ip=HAVE_PAC_LEAF
229 .cfi_adjust_cfa_offset 4
232 ldr data1, [src1], #4
235 ldr data2, [src2], #4
240 /* Critical inner Loop: Block with 3 bytes initial overlap */
243 bic tmp2, data1, #MSB
244 cmp tmp2, data2, S2LO #8
245 sub syndrome, data1, #0x01010101
246 bic syndrome, syndrome, data1
248 ands syndrome, syndrome, #0x80808080
250 ldreq data2, [src2], #4
252 eor tmp2, tmp2, data1
253 cmp tmp2, data2, S2HI #24
255 ldr data1, [src1], #4
258 S2LO data2, data2, #8
262 #ifdef __ARM_BIG_ENDIAN
263 /* The syndrome value may contain false ones if the string ends
264 with the bytes 0x01 0x00. */
265 tst data1, #0xff000000
267 tstne data1, #0x00ff0000
268 tstne data1, #0x0000ff00
269 beq .Lstrcmp_done_equal
271 bics syndrome, syndrome, #0xff000000
272 bne .Lstrcmp_done_equal
275 S2LO tmp2, data1, #24
276 #ifdef __ARM_BIG_ENDIAN
277 lsl data2, data2, #24
282 S2LO tmp2, data1, #24
283 and data2, data2, #LSB
286 /* Critical inner Loop: Block with 2 bytes initial overlap. */
289 S2HI tmp2, data1, #16
290 sub syndrome, data1, #0x01010101
292 bic syndrome, syndrome, data1
293 cmp tmp2, data2, S2LO #16
295 ands syndrome, syndrome, #0x80808080
297 ldreq data2, [src2], #4
299 eor tmp2, tmp2, data1
300 cmp tmp2, data2, S2HI #16
302 ldr data1, [src1], #4
306 #ifdef __ARM_BIG_ENDIAN
307 /* The syndrome value may contain false ones if the string ends
308 with the bytes 0x01 0x00 */
309 tst data1, #0xff000000
311 tstne data1, #0x00ff0000
312 beq .Lstrcmp_done_equal
314 lsls syndrome, syndrome, #16
315 bne .Lstrcmp_done_equal
318 S2LO tmp2, data1, #16
319 #ifdef __ARM_BIG_ENDIAN
320 lsl data2, data2, #16
325 S2HI data2, data2, #16
326 S2LO tmp2, data1, #16
328 S2LO data2, data2, #16
331 /* Critical inner Loop: Block with 1 byte initial overlap. */
334 and tmp2, data1, #LSB
335 cmp tmp2, data2, S2LO #24
336 sub syndrome, data1, #0x01010101
337 bic syndrome, syndrome, data1
339 ands syndrome, syndrome, #0x80808080
341 ldreq data2, [src2], #4
343 eor tmp2, tmp2, data1
344 cmp tmp2, data2, S2HI #8
346 ldr data1, [src1], #4
349 S2LO data2, data2, #24
352 /* The syndrome value may contain false ones if the string ends
353 with the bytes 0x01 0x00. */
355 beq .Lstrcmp_done_equal
356 ldr data2, [src2], #4
359 bic data2, data2, #MSB
366 .cfi_adjust_cfa_offset -4
367 epilogue push_ip=HAVE_PAC_LEAF
372 and result, data2, #LSB
377 S2LOEQ tmp2, tmp2, #8
378 S2LOEQ data2, data2, #8
380 sub result, r2, result
383 .cfi_adjust_cfa_offset -4
384 epilogue push_ip=HAVE_PAC_LEAF
388 .size strcmp, . - strcmp