Cygwin: access: Fix X_OK behaviour for backup operators and admins
[newlib-cygwin.git] / newlib / libc / machine / arm / strcmp-armv7m.S
blob825b6e77fd94216be142e91022a1b0d71166c6e9
1 /*
2  * Copyright (c) 2012-2014 ARM Ltd
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. The name of the company may not be used to endorse or promote
14  *    products derived from this software without specific prior written
15  *    permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
29 /* Very similar to the generic code, but uses Thumb2 as implemented
30    in ARMv7-M.  */
32 #include "arm_asm.h"
34 /* Parameters and result.  */
35 #define src1            r0
36 #define src2            r1
37 #define result          r0      /* Overlaps src1.  */
39 /* Internal variables.  */
40 #define data1           r2
41 #define data2           r3
42 #define tmp2            r5
43 #define tmp1            r12
44 #define syndrome        r12     /* Overlaps tmp1 */
46         .thumb
47         .syntax unified
48 def_fn strcmp
49         .fnstart
50         .cfi_sections .debug_frame
51         .cfi_startproc
52         prologue push_ip=HAVE_PAC_LEAF
53         eor     tmp1, src1, src2
54         tst     tmp1, #3
55         /* Strings not at same byte offset from a word boundary.  */
56         bne     .Lstrcmp_unaligned
57         ands    tmp1, src1, #3
58         bic     src1, src1, #3
59         bic     src2, src2, #3
60         ldr     data1, [src1], #4
61         it      eq
62         ldreq   data2, [src2], #4
63         beq     4f
64         /* Although s1 and s2 have identical initial alignment, they are
65            not currently word aligned.  Rather than comparing bytes,
66            make sure that any bytes fetched from before the addressed
67            bytes are forced to 0xff.  Then they will always compare
68            equal.  */
69         eor     tmp1, tmp1, #3
70         mvn     data2, #MSB
71         lsl     tmp1, tmp1, #3
72         S2LO    tmp1, data2, tmp1
73         ldr     data2, [src2], #4
74         orr     data1, data1, tmp1
75         orr     data2, data2, tmp1
76         .p2align        2
77         /* Critical loop.  */
79         sub     syndrome, data1, #0x01010101
80         cmp     data1, data2
81         /* check for any zero bytes in first word */
82         itttt   eq
83         biceq   syndrome, syndrome, data1
84         tsteq   syndrome, #0x80808080
85         ldreq   data1, [src1], #4
86         ldreq   data2, [src2], #4
87         beq     4b
89         .cfi_remember_state
90         /* There's a zero or a different byte in the word */
91         S2HI    result, data1, #24
92         S2LO    data1, data1, #8
93         cmp     result, #1
94         it      cs
95         cmpcs   result, data2, S2HI #24
96         it      eq
97         S2LOEQ  data2, data2, #8
98         beq     2b
99         /* On a big-endian machine, RESULT contains the desired byte in bits
100            0-7; on a little-endian machine they are in bits 24-31.  In
101            both cases the other bits in RESULT are all zero.  For DATA2 the
102            interesting byte is at the other end of the word, but the
103            other bits are not necessarily zero.  We need a signed result
104            representing the differnece in the unsigned bytes, so for the
105            little-endian case we can't just shift the interesting bits
106            up.  */
107 #ifdef __ARM_BIG_ENDIAN
108         sub     result, result, data2, lsr #24
109 #else
110         and     data2, data2, #255
111         lsrs    result, result, #24
112         subs    result, result, data2
113 #endif
114         epilogue push_ip=HAVE_PAC_LEAF
117 #if 0
118         /* The assembly code below is based on the following alogrithm.  */
119 #ifdef __ARM_BIG_ENDIAN
120 #define RSHIFT <<
121 #define LSHIFT >>
122 #else
123 #define RSHIFT >>
124 #define LSHIFT <<
125 #endif
127 #define body(shift)                                                     \
128   mask = 0xffffffffU RSHIFT shift;                                      \
129   data1 = *src1++;                                                      \
130   data2 = *src2++;                                                      \
131   do                                                                    \
132     {                                                                   \
133       tmp2 = data1 & mask;                                              \
134       if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0))              \
135         {                                                               \
136           data2 RSHIFT= shift;                                          \
137           break;                                                        \
138         }                                                               \
139       if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0))     \
140         {                                                               \
141           /* See comment in assembler below re syndrome on big-endian */\
142           if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask)             \
143             data2 RSHIFT= shift;                                        \
144           else                                                          \
145             {                                                           \
146               data2 = *src2;                                            \
147               tmp2 = data1 RSHIFT (32 - shift);                         \
148               data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift);  \
149             }                                                           \
150           break;                                                        \
151         }                                                               \
152       data2 = *src2++;                                                  \
153       tmp2 ^= data1;                                                    \
154       if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0))       \
155         {                                                               \
156           tmp2 = data1 >> (32 - shift);                                 \
157           data2 = (data2 << (32 - shift)) RSHIFT (32 - shift);          \
158           break;                                                        \
159         }                                                               \
160       data1 = *src1++;                                                  \
161     } while (1)
163   const unsigned* src1;
164   const unsigned* src2;
165   unsigned data1, data2;
166   unsigned mask;
167   unsigned shift;
168   unsigned b1 = 0x01010101;
169   char c1, c2;
170   unsigned tmp2;
172   while (((unsigned) s1) & 3)
173     {
174       c1 = *s1++;
175       c2 = *s2++;
176       if (c1 == 0 || c1 != c2)
177         return c1 - (int)c2;
178     }
179   src1 = (unsigned*) (((unsigned)s1) & ~3);
180   src2 = (unsigned*) (((unsigned)s2) & ~3);
181   tmp2 = ((unsigned) s2) & 3;
182   if (tmp2 == 1)
183     {
184       body(8);
185     }
186   else if (tmp2 == 2)
187     {
188       body(16);
189     }
190   else
191     {
192       body (24);
193     }
195   do
196     {
197 #ifdef __ARM_BIG_ENDIAN
198       c1 = (char) tmp2 >> 24;
199       c2 = (char) data2 >> 24;
200 #else /* not  __ARM_BIG_ENDIAN */
201       c1 = (char) tmp2;
202       c2 = (char) data2;
203 #endif /* not  __ARM_BIG_ENDIAN */
204       tmp2 RSHIFT= 8;
205       data2 RSHIFT= 8;
206     } while (c1 != 0 && c1 == c2);
207   return c1 - c2;
208 #endif /* 0 */
211         /* First of all, compare bytes until src1(sp1) is word-aligned. */
212 .Lstrcmp_unaligned:
213         .cfi_restore_state
214         tst     src1, #3
215         beq     2f
216         .cfi_remember_state
217         ldrb    data1, [src1], #1
218         ldrb    data2, [src2], #1
219         cmp     data1, #1
220         it      cs
221         cmpcs   data1, data2
222         beq     .Lstrcmp_unaligned
223         sub     result, data1, data2
224         epilogue push_ip=HAVE_PAC_LEAF
227         .cfi_restore_state
228         stmfd   sp!, {r5}
229         .cfi_adjust_cfa_offset 4
230         .cfi_rel_offset 5, 0
232         ldr     data1, [src1], #4
233         and     tmp2, src2, #3
234         bic     src2, src2, #3
235         ldr     data2, [src2], #4
236         cmp     tmp2, #2
237         beq     .Loverlap2
238         bhi     .Loverlap1
240         /* Critical inner Loop: Block with 3 bytes initial overlap */
241         .p2align        2
242 .Loverlap3:
243         bic     tmp2, data1, #MSB
244         cmp     tmp2, data2, S2LO #8
245         sub     syndrome, data1, #0x01010101
246         bic     syndrome, syndrome, data1
247         bne     4f
248         ands    syndrome, syndrome, #0x80808080
249         it      eq
250         ldreq   data2, [src2], #4
251         bne     5f
252         eor     tmp2, tmp2, data1
253         cmp     tmp2, data2, S2HI #24
254         bne     6f
255         ldr     data1, [src1], #4
256         b       .Loverlap3
258         S2LO    data2, data2, #8
259         b       .Lstrcmp_tail
262 #ifdef __ARM_BIG_ENDIAN
263         /* The syndrome value may contain false ones if the string ends
264         with the bytes 0x01 0x00.  */
265         tst     data1, #0xff000000
266         itt     ne
267         tstne   data1, #0x00ff0000
268         tstne   data1, #0x0000ff00
269         beq     .Lstrcmp_done_equal
270 #else
271         bics    syndrome, syndrome, #0xff000000
272         bne     .Lstrcmp_done_equal
273 #endif
274         ldrb    data2, [src2]
275         S2LO    tmp2, data1, #24
276 #ifdef __ARM_BIG_ENDIAN
277         lsl     data2, data2, #24
278 #endif
279         b       .Lstrcmp_tail
282         S2LO    tmp2, data1, #24
283         and     data2, data2, #LSB
284         b       .Lstrcmp_tail
286         /* Critical inner Loop: Block with 2 bytes initial overlap.  */
287         .p2align        2
288 .Loverlap2:
289         S2HI    tmp2, data1, #16
290         sub     syndrome, data1, #0x01010101
291         S2LO    tmp2, tmp2, #16
292         bic     syndrome, syndrome, data1
293         cmp     tmp2, data2, S2LO #16
294         bne     4f
295         ands    syndrome, syndrome, #0x80808080
296         it      eq
297         ldreq   data2, [src2], #4
298         bne     5f
299         eor     tmp2, tmp2, data1
300         cmp     tmp2, data2, S2HI #16
301         bne     6f
302         ldr     data1, [src1], #4
303         b       .Loverlap2
306 #ifdef __ARM_BIG_ENDIAN
307         /* The syndrome value may contain false ones if the string ends
308         with the bytes 0x01 0x00 */
309         tst     data1, #0xff000000
310         it      ne
311         tstne   data1, #0x00ff0000
312         beq     .Lstrcmp_done_equal
313 #else
314         lsls    syndrome, syndrome, #16
315         bne     .Lstrcmp_done_equal
316 #endif
317         ldrh    data2, [src2]
318         S2LO    tmp2, data1, #16
319 #ifdef __ARM_BIG_ENDIAN
320         lsl     data2, data2, #16
321 #endif
322         b       .Lstrcmp_tail
325         S2HI    data2, data2, #16
326         S2LO    tmp2, data1, #16
328         S2LO    data2, data2, #16
329         b       .Lstrcmp_tail
331         /* Critical inner Loop: Block with 1 byte initial overlap.  */
332         .p2align        2
333 .Loverlap1:
334         and     tmp2, data1, #LSB
335         cmp     tmp2, data2, S2LO #24
336         sub     syndrome, data1, #0x01010101
337         bic     syndrome, syndrome, data1
338         bne     4f
339         ands    syndrome, syndrome, #0x80808080
340         it      eq
341         ldreq   data2, [src2], #4
342         bne     5f
343         eor     tmp2, tmp2, data1
344         cmp     tmp2, data2, S2HI #8
345         bne     6f
346         ldr     data1, [src1], #4
347         b       .Loverlap1
349         S2LO    data2, data2, #24
350         b       .Lstrcmp_tail
352         /* The syndrome value may contain false ones if the string ends
353            with the bytes 0x01 0x00.  */
354         tst     data1, #LSB
355         beq     .Lstrcmp_done_equal
356         ldr     data2, [src2], #4
358         S2LO    tmp2, data1, #8
359         bic     data2, data2, #MSB
360         b       .Lstrcmp_tail
361 .Lstrcmp_done_equal:
362         mov     result, #0
363         .cfi_remember_state
364         ldmfd   sp!, {r5}
365         .cfi_restore 5
366         .cfi_adjust_cfa_offset -4
367         epilogue push_ip=HAVE_PAC_LEAF
369 .Lstrcmp_tail:
370         .cfi_restore_state
371         and     r2, tmp2, #LSB
372         and     result, data2, #LSB
373         cmp     result, #1
374         it      cs
375         cmpcs   result, r2
376         itt     eq
377         S2LOEQ  tmp2, tmp2, #8
378         S2LOEQ  data2, data2, #8
379         beq     .Lstrcmp_tail
380         sub     result, r2, result
381         ldmfd   sp!, {r5}
382         .cfi_restore 5
383         .cfi_adjust_cfa_offset -4
384         epilogue push_ip=HAVE_PAC_LEAF
385         .cfi_endproc
386         .cantunwind
387         .fnend
388         .size strcmp, . - strcmp