1 /* ANSI C standard library function strcmp.
3 Copyright (c) 2001-20012 Tensilica Inc.
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
13 The above copyright notice and this permission notice shall be included
14 in all copies or substantial portions of the Software.
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
24 #include "xtensa-asm.h"
26 #define MASK4 0x40404040
30 .literal .Lmask0, MASK0
31 .literal .Lmask1, MASK1
32 .literal .Lmask2, MASK2
33 .literal .Lmask3, MASK3
34 .literal .Lmask4, MASK4
35 #endif /* XCHAL_HAVE_L32R */
41 .type strcmp, @function
45 /* a2 = s1, a3 = s2 */
47 l8ui a8, a2, 0 // byte 0 from s1
48 l8ui a9, a3, 0 // byte 0 from s2
53 bnone a11, a10, .Laligned
55 xor a11, a2, a3 // compare low two bits of s1 and s2
56 bany a11, a10, .Lunaligned // if they have different alignment
58 /* s1/s2 are not word-aligned. */
59 addi a2, a2, 1 // advance s1
60 beqz a8, .Leq // bytes equal, if zero, strings are equal
61 addi a3, a3, 1 // advance s2
62 bnone a2, a10, .Laligned // if s1/s2 now aligned
63 l8ui a8, a2, 0 // byte 1 from s1
64 l8ui a9, a3, 0 // byte 1 from s2
65 addi a2, a2, 1 // advance s1
66 bne a8, a9, .Lretdiff // if different, return difference
67 beqz a8, .Leq // bytes equal, if zero, strings are equal
68 addi a3, a3, 1 // advance s2
69 bnone a2, a10, .Laligned // if s1/s2 now aligned
70 l8ui a8, a2, 0 // byte 2 from s1
71 l8ui a9, a3, 0 // byte 2 from s2
72 addi a2, a2, 1 // advance s1
73 bne a8, a9, .Lretdiff // if different, return difference
74 beqz a8, .Leq // bytes equal, if zero, strings are equal
75 addi a3, a3, 1 // advance s2
78 /* s1 and s2 have different alignment.
80 If the zero-overhead loop option is available, use an (almost)
81 infinite zero-overhead loop with conditional exits so we only pay
82 for taken branches when exiting the loop.
84 Note: It is important for this unaligned case to come before the
85 code for aligned strings, because otherwise some of the branches
86 above cannot reach and have to be transformed to branches around
87 jumps. The unaligned code is smaller and the branches can reach
92 #if XCHAL_HAVE_DENSITY
93 /* (2 mod 4) alignment for loop instruction */
95 /* (1 mod 4) alignment for loop instruction */
102 #if XCHAL_HAVE_DENSITY
103 _movi.n a8, 0 // set up for the maximum loop count
105 _movi a8, 0 // set up for the maximum loop count
107 loop a8, .Lretdiff // loop forever (almost anyway)
113 bne a8, a9, .Lretdiff
124 /* s1 is word-aligned; s2 is word-aligned.
126 If the zero-overhead loop option is available, use an (almost)
127 infinite zero-overhead loop with conditional exits so we only pay
128 for taken branches when exiting the loop. */
130 /* New algorithm, relying on the fact that all normal ASCII is between
133 Rather than check all bytes for zero:
134 Take one word (4 bytes). Call it w1.
135 Shift w1 left by one into w1'.
136 Or w1 and w1'. For all normal ASCII bit 6 will be 1; for zero it won't.
137 Check that all 4 bit 6's (one for each byte) are one:
138 If they are, we are definitely not done.
139 If they are not, we are probably done, but need to check for zero. */
144 /* (2 mod 4) alignment for loop instruction */
150 l32r a4, .Lmask0 // mask for byte 0
160 loop a0, .Laligned_done
162 /* First unrolled loop body. */
163 l32i a8, a2, 0 // get word from s1
164 l32i a9, a3, 0 // get word from s2
168 bnall a9, a7, .Lprobeq
170 /* Second unrolled loop body. */
171 l32i a8, a2, 4 // get word from s1+4
172 l32i a9, a3, 4 // get word from s2+4
176 bnall a9, a7, .Lprobeq2
178 addi a2, a2, 8 // advance s1 pointer
179 addi a3, a3, 8 // advance s2 pointer
184 /* Adjust pointers to account for the loop unrolling. */
188 #else /* !XCHAL_HAVE_LOOPS */
191 movi a4, MASK0 // mask for byte 0
195 addi a2, a2, 4 // advance s1 pointer
196 addi a3, a3, 4 // advance s2 pointer
198 l32i a8, a2, 0 // get word from s1
199 l32i a9, a3, 0 // get word from s2
203 ball a9, a7, .Lnextword
204 #endif /* !XCHAL_HAVE_LOOPS */
206 /* align (0 mod 4) */
208 /* Words are probably equal, but check for sure.
209 If not, loop over the rest of string using normal algorithm. */
211 bnone a8, a4, .Leq // if byte 0 is zero
213 l32r a5, .Lmask1 // mask for byte 1
214 l32r a6, .Lmask2 // mask for byte 2
215 bnone a8, a5, .Leq // if byte 1 is zero
216 l32r a7, .Lmask3 // mask for byte 3
217 bnone a8, a6, .Leq // if byte 2 is zero
218 bnone a8, a7, .Leq // if byte 3 is zero
219 /* align (1 mod 4) */
221 const16 a5, MASK1@h // mask for byte 1
223 bnone a8, a5, .Leq // if byte 1 is zero
224 const16 a6, MASK2@h // mask for byte 2
226 bnone a8, a6, .Leq // if byte 2 is zero
227 const16 a7, MASK3@h // mask for byte 3
229 bnone a8, a7, .Leq // if byte 3 is zero
230 /* align (2 mod 4) */
231 #endif /* XCHAL_HAVE_L32R */
232 #if XCHAL_HAVE_DENSITY
233 addi.n a2, a2, 4 // advance s1 pointer
234 addi.n a3, a3, 4 // advance s2 pointer
235 /* align (1 mod 4) or (2 mod 4) */
237 addi a2, a2, 4 // advance s1 pointer
238 addi a3, a3, 4 // advance s2 pointer
243 /* align (2 mod 4) */
244 #endif /* XCHAL_HAVE_DENSITY */
247 loop a0, .Leq // loop forever (a4 is bigger than max iters)
248 l32i a8, a2, 0 // get word from s1
249 l32i a9, a3, 0 // get word from s2
250 addi a2, a2, 4 // advance s1 pointer
252 bnone a8, a4, .Leq // if byte 0 is zero
253 bnone a8, a5, .Leq // if byte 1 is zero
254 bnone a8, a6, .Leq // if byte 2 is zero
255 bnone a8, a7, .Leq // if byte 3 is zero
256 addi a3, a3, 4 // advance s2 pointer
258 #else /* !XCHAL_HAVE_LOOPS */
262 addi a3, a3, 4 // advance s2 pointer
264 l32i a8, a2, 0 // get word from s1
265 l32i a9, a3, 0 // get word from s2
266 addi a2, a2, 4 // advance s1 pointer
268 bnone a8, a4, .Leq // if byte 0 is zero
269 bnone a8, a5, .Leq // if byte 1 is zero
270 bnone a8, a6, .Leq // if byte 2 is zero
271 bany a8, a7, .Lnextword2 // if byte 3 is zero
272 #endif /* !XCHAL_HAVE_LOOPS */
274 /* Words are equal; some byte is zero. */
275 .Leq: movi a2, 0 // return equal
278 .Lwne2: /* Words are not equal. On big-endian processors, if none of the
279 bytes are zero, the return value can be determined by a simple
283 bnall a10, a7, .Lsomezero
284 bgeu a8, a9, .Lposreturn
290 .Lsomezero: // There is probably some zero byte.
291 #endif /* __XTENSA_EB__ */
292 .Lwne: /* Words are not equal. */
293 xor a2, a8, a9 // get word with nonzero in byte that differs
294 bany a2, a4, .Ldiff0 // if byte 0 differs
295 movi a5, MASK1 // mask for byte 1
296 bnone a8, a4, .Leq // if byte 0 is zero
297 bany a2, a5, .Ldiff1 // if byte 1 differs
298 movi a6, MASK2 // mask for byte 2
299 bnone a8, a5, .Leq // if byte 1 is zero
300 bany a2, a6, .Ldiff2 // if byte 2 differs
301 bnone a8, a6, .Leq // if byte 2 is zero
306 /* Byte 0 is equal (at least) and there is a difference before a zero
307 byte. Just subtract words to get the return value.
308 The high order equal bytes cancel, leaving room for the sign. */
313 /* Need to make room for the sign, so can't subtract whole words. */
319 #else /* !__XTENSA_EB__ */
320 /* Little-endian is a little more difficult because can't subtract
323 /* Bytes 0-2 are equal; byte 3 is different.
324 For little-endian need to have a sign bit for the difference. */
331 /* Byte 0 is different. */
338 /* Byte 0 is equal; byte 1 is different. */
345 /* Bytes 0-1 are equal; byte 2 is different. */
351 #endif /* !__XTENSA_EB */
353 .size strcmp, . - strcmp