2 * Copyright (c) 2012-2014 ARM Ltd
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. The name of the company may not be used to endorse or promote
14 * products derived from this software without specific prior written
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /* Basic ARM implementation. This should run on anything except
30 for ARMv6-M, but there are better implementations for later
31 revisions of the architecture. This version can support ARMv4T
32 ARM/Thumb interworking. */
33 /* Parameters and result. */
36 #define result r0 /* Overlaps src1. */
38 /* Internal variables. */
44 #define syndrome r12 /* Overlaps tmp1 */
46 /* For armv4t and newer, toolchains will transparently convert
47 'bx lr' to 'mov pc, lr' if needed. GCC has deprecated support
48 for anything older than armv4t, but this should handle that
49 corner case in case anyone needs it anyway */
51 #if __ARM_ARCH <= 4 && __ARM_ARCH_ISA_THUMB == 0
60 .cfi_sections .debug_frame
64 /* Strings not at same byte offset from a word boundary. */
65 bne .Lstrcmp_unaligned
70 ldreq data2, [src2], #4
72 /* Although s1 and s2 have identical initial alignment, they are
73 not currently word aligned. Rather than comparing bytes,
74 make sure that any bytes fetched from before the addressed
75 bytes are forced to 0xff. Then they will always compare
80 S2LO tmp1, data2, tmp1
82 orr data1, data1, tmp1
83 orr data2, data2, tmp1
85 /* Load the 'magic' constant 0x01010101. */
90 orr magic1, magic1, magic1, lsl #8
91 orr magic1, magic1, magic1, lsl #16
94 sub syndrome, data1, magic1
96 /* check for any zero bytes in first word */
97 biceq syndrome, syndrome, data1
98 tsteq syndrome, magic1, lsl #7
99 ldreq data1, [src1], #4
100 ldreq data2, [src2], #4
103 /* There's a zero or a different byte in the word */
104 S2HI result, data1, #24
105 S2LO data1, data1, #8
107 cmpcs result, data2, S2HI #24
108 S2LOEQ data2, data2, #8
110 /* On a big-endian machine, RESULT contains the desired byte in bits
111 0-7; on a little-endian machine they are in bits 24-31. In
112 both cases the other bits in RESULT are all zero. For DATA2 the
113 interesting byte is at the other end of the word, but the
114 other bits are not necessarily zero. We need a signed result
115 representing the differnece in the unsigned bytes, so for the
116 little-endian case we can't just shift the interesting bits
118 #ifdef __ARM_BIG_ENDIAN
119 sub result, result, data2, lsr #24
121 and data2, data2, #255
122 rsb result, data2, result, lsr #24
126 .cfi_def_cfa_offset 0
131 /* The assembly code below is based on the following alogrithm. */
132 #ifdef __ARM_BIG_ENDIAN
140 #define body(shift) \
141 mask = 0xffffffffU RSHIFT shift; \
146 tmp2 = data1 & mask; \
147 if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0)) \
149 data2 RSHIFT= shift; \
152 if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0)) \
154 /* See comment in assembler below re syndrome on big-endian */\
155 if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask) \
156 data2 RSHIFT= shift; \
160 tmp2 = data1 RSHIFT (32 - shift); \
161 data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \
167 if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0)) \
169 tmp2 = data1 >> (32 - shift); \
170 data2 = (data2 << (32 - shift)) RSHIFT (32 - shift); \
176 const unsigned* src1;
177 const unsigned* src2;
178 unsigned data1, data2;
181 unsigned b1 = 0x01010101;
185 while (((unsigned) s1) & 3)
189 if (c1 == 0 || c1 != c2)
192 src1 = (unsigned*) (((unsigned)s1) & ~3);
193 src2 = (unsigned*) (((unsigned)s2) & ~3);
194 tmp2 = ((unsigned) s2) & 3;
210 #ifdef __ARM_BIG_ENDIAN
211 c1 = (char) tmp2 >> 24;
212 c2 = (char) data2 >> 24;
213 #else /* not __ARM_BIG_ENDIAN */
216 #endif /* not __ARM_BIG_ENDIAN */
219 } while (c1 != 0 && c1 == c2);
224 /* First of all, compare bytes until src1(sp1) is word-aligned. */
228 ldrb data1, [src1], #1
229 ldrb data2, [src2], #1
232 beq .Lstrcmp_unaligned
233 sub result, data1, data2
238 .cfi_def_cfa_offset 8
242 orr magic1, magic1, magic1, lsl #8
243 orr magic1, magic1, magic1, lsl #16
245 ldr data1, [src1], #4
248 ldr data2, [src2], #4
253 /* Critical inner Loop: Block with 3 bytes initial overlap */
256 bic tmp2, data1, #MSB
257 cmp tmp2, data2, S2LO #8
258 sub syndrome, data1, magic1
259 bic syndrome, syndrome, data1
261 ands syndrome, syndrome, magic1, lsl #7
262 ldreq data2, [src2], #4
264 eor tmp2, tmp2, data1
265 cmp tmp2, data2, S2HI #24
267 ldr data1, [src1], #4
270 S2LO data2, data2, #8
274 #ifdef __ARM_BIG_ENDIAN
275 /* The syndrome value may contain false ones if the string ends
276 with the bytes 0x01 0x00. */
277 tst data1, #0xff000000
278 tstne data1, #0x00ff0000
279 tstne data1, #0x0000ff00
280 beq .Lstrcmp_done_equal
282 bics syndrome, syndrome, #0xff000000
283 bne .Lstrcmp_done_equal
286 S2LO tmp2, data1, #24
287 #ifdef __ARM_BIG_ENDIAN
288 lsl data2, data2, #24
293 S2LO tmp2, data1, #24
294 and data2, data2, #LSB
297 /* Critical inner Loop: Block with 2 bytes initial overlap. */
300 S2HI tmp2, data1, #16
301 sub syndrome, data1, magic1
303 bic syndrome, syndrome, data1
304 cmp tmp2, data2, S2LO #16
306 ands syndrome, syndrome, magic1, lsl #7
307 ldreq data2, [src2], #4
309 eor tmp2, tmp2, data1
310 cmp tmp2, data2, S2HI #16
312 ldr data1, [src1], #4
316 #ifdef __ARM_BIG_ENDIAN
317 /* The syndrome value may contain false ones if the string ends
318 with the bytes 0x01 0x00 */
319 tst data1, #0xff000000
320 tstne data1, #0x00ff0000
321 beq .Lstrcmp_done_equal
323 lsls syndrome, syndrome, #16
324 bne .Lstrcmp_done_equal
327 S2LO tmp2, data1, #16
328 #ifdef __ARM_BIG_ENDIAN
329 lsl data2, data2, #16
334 S2HI data2, data2, #16
335 S2LO tmp2, data1, #16
337 S2LO data2, data2, #16
340 /* Critical inner Loop: Block with 1 byte initial overlap. */
343 and tmp2, data1, #LSB
344 cmp tmp2, data2, S2LO #24
345 sub syndrome, data1, magic1
346 bic syndrome, syndrome, data1
348 ands syndrome, syndrome, magic1, lsl #7
349 ldreq data2, [src2], #4
351 eor tmp2, tmp2, data1
352 cmp tmp2, data2, S2HI #8
354 ldr data1, [src1], #4
357 S2LO data2, data2, #24
360 /* The syndrome value may contain false ones if the string ends
361 with the bytes 0x01 0x00. */
363 beq .Lstrcmp_done_equal
364 ldr data2, [src2], #4
367 bic data2, data2, #MSB
375 .cfi_def_cfa_offset 0
381 and result, data2, #LSB
384 S2LOEQ tmp2, tmp2, #8
385 S2LOEQ data2, data2, #8
387 sub result, r2, result
391 .cfi_def_cfa_offset 0
394 .size strcmp, . - strcmp