clarify the purpose of this project
[nyanglibc.git] / string / strcmp.s
blobee48132be3c30e057cbc9c83c7a9d5defdc5263f
1 .text
2 .globl strcmp
3 .type strcmp,@function
4 .align 1<<4
5 strcmp:
6 mov %esi, %ecx
7 mov %edi, %eax
8 and $0x3f, %rcx
9 and $0x3f, %rax
10 cmp $0x30, %ecx
11 ja .Lcrosscache
12 cmp $0x30, %eax
13 ja .Lcrosscache
14 movlpd (%rdi), %xmm1
15 movlpd (%rsi), %xmm2
16 movhpd 8(%rdi), %xmm1
17 movhpd 8(%rsi), %xmm2
18 pxor %xmm0, %xmm0
19 pcmpeqb %xmm1, %xmm0
20 pcmpeqb %xmm2, %xmm1
21 psubb %xmm0, %xmm1
22 pmovmskb %xmm1, %edx
23 sub $0xffff, %edx
24 jnz .Lless16bytes
25 add $16, %rsi
26 add $16, %rdi
27 .p2align 4
28 .Lcrosscache:
29 and $0xfffffffffffffff0, %rsi
30 and $0xfffffffffffffff0, %rdi
31 mov $0xffff, %edx
32 xor %r8d, %r8d
33 and $0xf, %ecx
34 and $0xf, %eax
35 cmp %eax, %ecx
36 je .Lashr_0
37 ja .Lbigger
38 mov %edx, %r8d
39 xchg %ecx, %eax
40 xchg %rsi, %rdi
41 .Lbigger:
42 lea 15(%rax), %r9
43 sub %rcx, %r9
44 lea .Lunaligned_table(%rip), %r10
45 movslq (%r10, %r9,4), %r9
46 lea (%r10, %r9), %r10
47 jmp *%r10
48 .p2align 4
49 .Lashr_0:
50 movdqa (%rsi), %xmm1
51 pxor %xmm0, %xmm0
52 pcmpeqb %xmm1, %xmm0
53 pcmpeqb (%rdi), %xmm1
54 psubb %xmm0, %xmm1
55 pmovmskb %xmm1, %r9d
56 shr %cl, %edx
57 shr %cl, %r9d
58 sub %r9d, %edx
59 jne .Lless32bytes
61 mov $16, %rcx
62 mov $16, %r9
63 pxor %xmm0, %xmm0
64 .p2align 4
65 .Lloop_ashr_0:
66 movdqa (%rsi, %rcx), %xmm1
67 movdqa (%rdi, %rcx), %xmm2
69 pcmpeqb %xmm1, %xmm0
70 pcmpeqb %xmm2, %xmm1
71 psubb %xmm0, %xmm1
72 pmovmskb %xmm1, %edx
73 sub $0xffff, %edx
74 jnz .Lexit
75 add $16, %rcx
76 movdqa (%rsi, %rcx), %xmm1
77 movdqa (%rdi, %rcx), %xmm2
79 pcmpeqb %xmm1, %xmm0
80 pcmpeqb %xmm2, %xmm1
81 psubb %xmm0, %xmm1
82 pmovmskb %xmm1, %edx
83 sub $0xffff, %edx
84 jnz .Lexit
85 add $16, %rcx
86 jmp .Lloop_ashr_0
87 .p2align 4
88 .Lashr_1:
89 pxor %xmm0, %xmm0
90 movdqa (%rdi), %xmm2
91 movdqa (%rsi), %xmm1
92 pcmpeqb %xmm1, %xmm0
93 pslldq $15, %xmm2
95 pcmpeqb %xmm1, %xmm2
96 psubb %xmm0, %xmm2
97 pmovmskb %xmm2, %r9d
98 shr %cl, %edx
99 shr %cl, %r9d
100 sub %r9d, %edx
101 jnz .Lless32bytes
102 movdqa (%rdi), %xmm3
104 pxor %xmm0, %xmm0
105 mov $16, %rcx
106 mov $1, %r9d
107 lea 1(%rdi), %r10
108 and $0xfff, %r10
109 sub $0x1000, %r10
110 .p2align 4
111 .Lloop_ashr_1:
112 add $16, %r10
113 jg .Lnibble_ashr_1
114 .Lgobble_ashr_1:
115 movdqa (%rsi, %rcx), %xmm1
116 movdqa (%rdi, %rcx), %xmm2
117 movdqa %xmm2, %xmm4
118 psrldq $1, %xmm3
119 pslldq $15, %xmm2
120 por %xmm3, %xmm2
122 pcmpeqb %xmm1, %xmm0
123 pcmpeqb %xmm2, %xmm1
124 psubb %xmm0, %xmm1
125 pmovmskb %xmm1, %edx
126 sub $0xffff, %edx
127 jnz .Lexit
128 add $16, %rcx
129 movdqa %xmm4, %xmm3
130 add $16, %r10
131 jg .Lnibble_ashr_1
132 movdqa (%rsi, %rcx), %xmm1
133 movdqa (%rdi, %rcx), %xmm2
134 movdqa %xmm2, %xmm4
135 psrldq $1, %xmm3
136 pslldq $15, %xmm2
137 por %xmm3, %xmm2
139 pcmpeqb %xmm1, %xmm0
140 pcmpeqb %xmm2, %xmm1
141 psubb %xmm0, %xmm1
142 pmovmskb %xmm1, %edx
143 sub $0xffff, %edx
144 jnz .Lexit
145 add $16, %rcx
146 movdqa %xmm4, %xmm3
147 jmp .Lloop_ashr_1
148 .p2align 4
149 .Lnibble_ashr_1:
150 pcmpeqb %xmm3, %xmm0
151 pmovmskb %xmm0, %edx
152 test $0xfffe, %edx
153 jnz .Lashr_1_exittail
154 pxor %xmm0, %xmm0
155 sub $0x1000, %r10
156 jmp .Lgobble_ashr_1
157 .p2align 4
158 .Lashr_1_exittail:
159 movdqa (%rsi, %rcx), %xmm1
160 psrldq $1, %xmm0
161 psrldq $1, %xmm3
162 jmp .Laftertail
163 .p2align 4
164 .Lashr_2:
165 pxor %xmm0, %xmm0
166 movdqa (%rdi), %xmm2
167 movdqa (%rsi), %xmm1
168 pcmpeqb %xmm1, %xmm0
169 pslldq $14, %xmm2
171 pcmpeqb %xmm1, %xmm2
172 psubb %xmm0, %xmm2
173 pmovmskb %xmm2, %r9d
174 shr %cl, %edx
175 shr %cl, %r9d
176 sub %r9d, %edx
177 jnz .Lless32bytes
178 movdqa (%rdi), %xmm3
180 pxor %xmm0, %xmm0
181 mov $16, %rcx
182 mov $2, %r9d
183 lea 2(%rdi), %r10
184 and $0xfff, %r10
185 sub $0x1000, %r10
186 .p2align 4
187 .Lloop_ashr_2:
188 add $16, %r10
189 jg .Lnibble_ashr_2
190 .Lgobble_ashr_2:
191 movdqa (%rsi, %rcx), %xmm1
192 movdqa (%rdi, %rcx), %xmm2
193 movdqa %xmm2, %xmm4
194 psrldq $2, %xmm3
195 pslldq $14, %xmm2
196 por %xmm3, %xmm2
198 pcmpeqb %xmm1, %xmm0
199 pcmpeqb %xmm2, %xmm1
200 psubb %xmm0, %xmm1
201 pmovmskb %xmm1, %edx
202 sub $0xffff, %edx
203 jnz .Lexit
204 add $16, %rcx
205 movdqa %xmm4, %xmm3
206 add $16, %r10
207 jg .Lnibble_ashr_2
208 movdqa (%rsi, %rcx), %xmm1
209 movdqa (%rdi, %rcx), %xmm2
210 movdqa %xmm2, %xmm4
211 psrldq $2, %xmm3
212 pslldq $14, %xmm2
213 por %xmm3, %xmm2
215 pcmpeqb %xmm1, %xmm0
216 pcmpeqb %xmm2, %xmm1
217 psubb %xmm0, %xmm1
218 pmovmskb %xmm1, %edx
219 sub $0xffff, %edx
220 jnz .Lexit
221 add $16, %rcx
222 movdqa %xmm4, %xmm3
223 jmp .Lloop_ashr_2
224 .p2align 4
225 .Lnibble_ashr_2:
226 pcmpeqb %xmm3, %xmm0
227 pmovmskb %xmm0, %edx
228 test $0xfffc, %edx
229 jnz .Lashr_2_exittail
230 pxor %xmm0, %xmm0
231 sub $0x1000, %r10
232 jmp .Lgobble_ashr_2
233 .p2align 4
234 .Lashr_2_exittail:
235 movdqa (%rsi, %rcx), %xmm1
236 psrldq $2, %xmm0
237 psrldq $2, %xmm3
238 jmp .Laftertail
239 .p2align 4
240 .Lashr_3:
241 pxor %xmm0, %xmm0
242 movdqa (%rdi), %xmm2
243 movdqa (%rsi), %xmm1
244 pcmpeqb %xmm1, %xmm0
245 pslldq $13, %xmm2
247 pcmpeqb %xmm1, %xmm2
248 psubb %xmm0, %xmm2
249 pmovmskb %xmm2, %r9d
250 shr %cl, %edx
251 shr %cl, %r9d
252 sub %r9d, %edx
253 jnz .Lless32bytes
254 movdqa (%rdi), %xmm3
256 pxor %xmm0, %xmm0
257 mov $16, %rcx
258 mov $3, %r9d
259 lea 3(%rdi), %r10
260 and $0xfff, %r10
261 sub $0x1000, %r10
262 .p2align 4
263 .Lloop_ashr_3:
264 add $16, %r10
265 jg .Lnibble_ashr_3
266 .Lgobble_ashr_3:
267 movdqa (%rsi, %rcx), %xmm1
268 movdqa (%rdi, %rcx), %xmm2
269 movdqa %xmm2, %xmm4
270 psrldq $3, %xmm3
271 pslldq $13, %xmm2
272 por %xmm3, %xmm2
274 pcmpeqb %xmm1, %xmm0
275 pcmpeqb %xmm2, %xmm1
276 psubb %xmm0, %xmm1
277 pmovmskb %xmm1, %edx
278 sub $0xffff, %edx
279 jnz .Lexit
280 add $16, %rcx
281 movdqa %xmm4, %xmm3
282 add $16, %r10
283 jg .Lnibble_ashr_3
284 movdqa (%rsi, %rcx), %xmm1
285 movdqa (%rdi, %rcx), %xmm2
286 movdqa %xmm2, %xmm4
287 psrldq $3, %xmm3
288 pslldq $13, %xmm2
289 por %xmm3, %xmm2
291 pcmpeqb %xmm1, %xmm0
292 pcmpeqb %xmm2, %xmm1
293 psubb %xmm0, %xmm1
294 pmovmskb %xmm1, %edx
295 sub $0xffff, %edx
296 jnz .Lexit
297 add $16, %rcx
298 movdqa %xmm4, %xmm3
299 jmp .Lloop_ashr_3
300 .p2align 4
301 .Lnibble_ashr_3:
302 pcmpeqb %xmm3, %xmm0
303 pmovmskb %xmm0, %edx
304 test $0xfff8, %edx
305 jnz .Lashr_3_exittail
306 pxor %xmm0, %xmm0
307 sub $0x1000, %r10
308 jmp .Lgobble_ashr_3
309 .p2align 4
310 .Lashr_3_exittail:
311 movdqa (%rsi, %rcx), %xmm1
312 psrldq $3, %xmm0
313 psrldq $3, %xmm3
314 jmp .Laftertail
315 .p2align 4
316 .Lashr_4:
317 pxor %xmm0, %xmm0
318 movdqa (%rdi), %xmm2
319 movdqa (%rsi), %xmm1
320 pcmpeqb %xmm1, %xmm0
321 pslldq $12, %xmm2
323 pcmpeqb %xmm1, %xmm2
324 psubb %xmm0, %xmm2
325 pmovmskb %xmm2, %r9d
326 shr %cl, %edx
327 shr %cl, %r9d
328 sub %r9d, %edx
329 jnz .Lless32bytes
330 movdqa (%rdi), %xmm3
332 pxor %xmm0, %xmm0
333 mov $16, %rcx
334 mov $4, %r9d
335 lea 4(%rdi), %r10
336 and $0xfff, %r10
337 sub $0x1000, %r10
338 .p2align 4
339 .Lloop_ashr_4:
340 add $16, %r10
341 jg .Lnibble_ashr_4
342 .Lgobble_ashr_4:
343 movdqa (%rsi, %rcx), %xmm1
344 movdqa (%rdi, %rcx), %xmm2
345 movdqa %xmm2, %xmm4
346 psrldq $4, %xmm3
347 pslldq $12, %xmm2
348 por %xmm3, %xmm2
350 pcmpeqb %xmm1, %xmm0
351 pcmpeqb %xmm2, %xmm1
352 psubb %xmm0, %xmm1
353 pmovmskb %xmm1, %edx
354 sub $0xffff, %edx
355 jnz .Lexit
356 add $16, %rcx
357 movdqa %xmm4, %xmm3
358 add $16, %r10
359 jg .Lnibble_ashr_4
360 movdqa (%rsi, %rcx), %xmm1
361 movdqa (%rdi, %rcx), %xmm2
362 movdqa %xmm2, %xmm4
363 psrldq $4, %xmm3
364 pslldq $12, %xmm2
365 por %xmm3, %xmm2
367 pcmpeqb %xmm1, %xmm0
368 pcmpeqb %xmm2, %xmm1
369 psubb %xmm0, %xmm1
370 pmovmskb %xmm1, %edx
371 sub $0xffff, %edx
372 jnz .Lexit
373 add $16, %rcx
374 movdqa %xmm4, %xmm3
375 jmp .Lloop_ashr_4
376 .p2align 4
377 .Lnibble_ashr_4:
378 pcmpeqb %xmm3, %xmm0
379 pmovmskb %xmm0, %edx
380 test $0xfff0, %edx
381 jnz .Lashr_4_exittail
382 pxor %xmm0, %xmm0
383 sub $0x1000, %r10
384 jmp .Lgobble_ashr_4
385 .p2align 4
386 .Lashr_4_exittail:
387 movdqa (%rsi, %rcx), %xmm1
388 psrldq $4, %xmm0
389 psrldq $4, %xmm3
390 jmp .Laftertail
391 .p2align 4
392 .Lashr_5:
393 pxor %xmm0, %xmm0
394 movdqa (%rdi), %xmm2
395 movdqa (%rsi), %xmm1
396 pcmpeqb %xmm1, %xmm0
397 pslldq $11, %xmm2
399 pcmpeqb %xmm1, %xmm2
400 psubb %xmm0, %xmm2
401 pmovmskb %xmm2, %r9d
402 shr %cl, %edx
403 shr %cl, %r9d
404 sub %r9d, %edx
405 jnz .Lless32bytes
406 movdqa (%rdi), %xmm3
408 pxor %xmm0, %xmm0
409 mov $16, %rcx
410 mov $5, %r9d
411 lea 5(%rdi), %r10
412 and $0xfff, %r10
413 sub $0x1000, %r10
414 .p2align 4
415 .Lloop_ashr_5:
416 add $16, %r10
417 jg .Lnibble_ashr_5
418 .Lgobble_ashr_5:
419 movdqa (%rsi, %rcx), %xmm1
420 movdqa (%rdi, %rcx), %xmm2
421 movdqa %xmm2, %xmm4
422 psrldq $5, %xmm3
423 pslldq $11, %xmm2
424 por %xmm3, %xmm2
426 pcmpeqb %xmm1, %xmm0
427 pcmpeqb %xmm2, %xmm1
428 psubb %xmm0, %xmm1
429 pmovmskb %xmm1, %edx
430 sub $0xffff, %edx
431 jnz .Lexit
432 add $16, %rcx
433 movdqa %xmm4, %xmm3
434 add $16, %r10
435 jg .Lnibble_ashr_5
436 movdqa (%rsi, %rcx), %xmm1
437 movdqa (%rdi, %rcx), %xmm2
438 movdqa %xmm2, %xmm4
439 psrldq $5, %xmm3
440 pslldq $11, %xmm2
441 por %xmm3, %xmm2
443 pcmpeqb %xmm1, %xmm0
444 pcmpeqb %xmm2, %xmm1
445 psubb %xmm0, %xmm1
446 pmovmskb %xmm1, %edx
447 sub $0xffff, %edx
448 jnz .Lexit
449 add $16, %rcx
450 movdqa %xmm4, %xmm3
451 jmp .Lloop_ashr_5
452 .p2align 4
453 .Lnibble_ashr_5:
454 pcmpeqb %xmm3, %xmm0
455 pmovmskb %xmm0, %edx
456 test $0xffe0, %edx
457 jnz .Lashr_5_exittail
458 pxor %xmm0, %xmm0
459 sub $0x1000, %r10
460 jmp .Lgobble_ashr_5
461 .p2align 4
462 .Lashr_5_exittail:
463 movdqa (%rsi, %rcx), %xmm1
464 psrldq $5, %xmm0
465 psrldq $5, %xmm3
466 jmp .Laftertail
467 .p2align 4
468 .Lashr_6:
469 pxor %xmm0, %xmm0
470 movdqa (%rdi), %xmm2
471 movdqa (%rsi), %xmm1
472 pcmpeqb %xmm1, %xmm0
473 pslldq $10, %xmm2
475 pcmpeqb %xmm1, %xmm2
476 psubb %xmm0, %xmm2
477 pmovmskb %xmm2, %r9d
478 shr %cl, %edx
479 shr %cl, %r9d
480 sub %r9d, %edx
481 jnz .Lless32bytes
482 movdqa (%rdi), %xmm3
484 pxor %xmm0, %xmm0
485 mov $16, %rcx
486 mov $6, %r9d
487 lea 6(%rdi), %r10
488 and $0xfff, %r10
489 sub $0x1000, %r10
490 .p2align 4
491 .Lloop_ashr_6:
492 add $16, %r10
493 jg .Lnibble_ashr_6
494 .Lgobble_ashr_6:
495 movdqa (%rsi, %rcx), %xmm1
496 movdqa (%rdi, %rcx), %xmm2
497 movdqa %xmm2, %xmm4
498 psrldq $6, %xmm3
499 pslldq $10, %xmm2
500 por %xmm3, %xmm2
502 pcmpeqb %xmm1, %xmm0
503 pcmpeqb %xmm2, %xmm1
504 psubb %xmm0, %xmm1
505 pmovmskb %xmm1, %edx
506 sub $0xffff, %edx
507 jnz .Lexit
508 add $16, %rcx
509 movdqa %xmm4, %xmm3
510 add $16, %r10
511 jg .Lnibble_ashr_6
512 movdqa (%rsi, %rcx), %xmm1
513 movdqa (%rdi, %rcx), %xmm2
514 movdqa %xmm2, %xmm4
515 psrldq $6, %xmm3
516 pslldq $10, %xmm2
517 por %xmm3, %xmm2
519 pcmpeqb %xmm1, %xmm0
520 pcmpeqb %xmm2, %xmm1
521 psubb %xmm0, %xmm1
522 pmovmskb %xmm1, %edx
523 sub $0xffff, %edx
524 jnz .Lexit
525 add $16, %rcx
526 movdqa %xmm4, %xmm3
527 jmp .Lloop_ashr_6
528 .p2align 4
529 .Lnibble_ashr_6:
530 pcmpeqb %xmm3, %xmm0
531 pmovmskb %xmm0, %edx
532 test $0xffc0, %edx
533 jnz .Lashr_6_exittail
534 pxor %xmm0, %xmm0
535 sub $0x1000, %r10
536 jmp .Lgobble_ashr_6
537 .p2align 4
538 .Lashr_6_exittail:
539 movdqa (%rsi, %rcx), %xmm1
540 psrldq $6, %xmm0
541 psrldq $6, %xmm3
542 jmp .Laftertail
543 .p2align 4
544 .Lashr_7:
545 pxor %xmm0, %xmm0
546 movdqa (%rdi), %xmm2
547 movdqa (%rsi), %xmm1
548 pcmpeqb %xmm1, %xmm0
549 pslldq $9, %xmm2
551 pcmpeqb %xmm1, %xmm2
552 psubb %xmm0, %xmm2
553 pmovmskb %xmm2, %r9d
554 shr %cl, %edx
555 shr %cl, %r9d
556 sub %r9d, %edx
557 jnz .Lless32bytes
558 movdqa (%rdi), %xmm3
560 pxor %xmm0, %xmm0
561 mov $16, %rcx
562 mov $7, %r9d
563 lea 7(%rdi), %r10
564 and $0xfff, %r10
565 sub $0x1000, %r10
566 .p2align 4
567 .Lloop_ashr_7:
568 add $16, %r10
569 jg .Lnibble_ashr_7
570 .Lgobble_ashr_7:
571 movdqa (%rsi, %rcx), %xmm1
572 movdqa (%rdi, %rcx), %xmm2
573 movdqa %xmm2, %xmm4
574 psrldq $7, %xmm3
575 pslldq $9, %xmm2
576 por %xmm3, %xmm2
578 pcmpeqb %xmm1, %xmm0
579 pcmpeqb %xmm2, %xmm1
580 psubb %xmm0, %xmm1
581 pmovmskb %xmm1, %edx
582 sub $0xffff, %edx
583 jnz .Lexit
584 add $16, %rcx
585 movdqa %xmm4, %xmm3
586 add $16, %r10
587 jg .Lnibble_ashr_7
588 movdqa (%rsi, %rcx), %xmm1
589 movdqa (%rdi, %rcx), %xmm2
590 movdqa %xmm2, %xmm4
591 psrldq $7, %xmm3
592 pslldq $9, %xmm2
593 por %xmm3, %xmm2
595 pcmpeqb %xmm1, %xmm0
596 pcmpeqb %xmm2, %xmm1
597 psubb %xmm0, %xmm1
598 pmovmskb %xmm1, %edx
599 sub $0xffff, %edx
600 jnz .Lexit
601 add $16, %rcx
602 movdqa %xmm4, %xmm3
603 jmp .Lloop_ashr_7
604 .p2align 4
605 .Lnibble_ashr_7:
606 pcmpeqb %xmm3, %xmm0
607 pmovmskb %xmm0, %edx
608 test $0xff80, %edx
609 jnz .Lashr_7_exittail
610 pxor %xmm0, %xmm0
611 sub $0x1000, %r10
612 jmp .Lgobble_ashr_7
613 .p2align 4
614 .Lashr_7_exittail:
615 movdqa (%rsi, %rcx), %xmm1
616 psrldq $7, %xmm0
617 psrldq $7, %xmm3
618 jmp .Laftertail
619 .p2align 4
620 .Lashr_8:
621 pxor %xmm0, %xmm0
622 movdqa (%rdi), %xmm2
623 movdqa (%rsi), %xmm1
624 pcmpeqb %xmm1, %xmm0
625 pslldq $8, %xmm2
627 pcmpeqb %xmm1, %xmm2
628 psubb %xmm0, %xmm2
629 pmovmskb %xmm2, %r9d
630 shr %cl, %edx
631 shr %cl, %r9d
632 sub %r9d, %edx
633 jnz .Lless32bytes
634 movdqa (%rdi), %xmm3
636 pxor %xmm0, %xmm0
637 mov $16, %rcx
638 mov $8, %r9d
639 lea 8(%rdi), %r10
640 and $0xfff, %r10
641 sub $0x1000, %r10
642 .p2align 4
643 .Lloop_ashr_8:
644 add $16, %r10
645 jg .Lnibble_ashr_8
646 .Lgobble_ashr_8:
647 movdqa (%rsi, %rcx), %xmm1
648 movdqa (%rdi, %rcx), %xmm2
649 movdqa %xmm2, %xmm4
650 psrldq $8, %xmm3
651 pslldq $8, %xmm2
652 por %xmm3, %xmm2
654 pcmpeqb %xmm1, %xmm0
655 pcmpeqb %xmm2, %xmm1
656 psubb %xmm0, %xmm1
657 pmovmskb %xmm1, %edx
658 sub $0xffff, %edx
659 jnz .Lexit
660 add $16, %rcx
661 movdqa %xmm4, %xmm3
662 add $16, %r10
663 jg .Lnibble_ashr_8
664 movdqa (%rsi, %rcx), %xmm1
665 movdqa (%rdi, %rcx), %xmm2
666 movdqa %xmm2, %xmm4
667 psrldq $8, %xmm3
668 pslldq $8, %xmm2
669 por %xmm3, %xmm2
671 pcmpeqb %xmm1, %xmm0
672 pcmpeqb %xmm2, %xmm1
673 psubb %xmm0, %xmm1
674 pmovmskb %xmm1, %edx
675 sub $0xffff, %edx
676 jnz .Lexit
677 add $16, %rcx
678 movdqa %xmm4, %xmm3
679 jmp .Lloop_ashr_8
680 .p2align 4
681 .Lnibble_ashr_8:
682 pcmpeqb %xmm3, %xmm0
683 pmovmskb %xmm0, %edx
684 test $0xff00, %edx
685 jnz .Lashr_8_exittail
686 pxor %xmm0, %xmm0
687 sub $0x1000, %r10
688 jmp .Lgobble_ashr_8
689 .p2align 4
690 .Lashr_8_exittail:
691 movdqa (%rsi, %rcx), %xmm1
692 psrldq $8, %xmm0
693 psrldq $8, %xmm3
694 jmp .Laftertail
695 .p2align 4
696 .Lashr_9:
697 pxor %xmm0, %xmm0
698 movdqa (%rdi), %xmm2
699 movdqa (%rsi), %xmm1
700 pcmpeqb %xmm1, %xmm0
701 pslldq $7, %xmm2
703 pcmpeqb %xmm1, %xmm2
704 psubb %xmm0, %xmm2
705 pmovmskb %xmm2, %r9d
706 shr %cl, %edx
707 shr %cl, %r9d
708 sub %r9d, %edx
709 jnz .Lless32bytes
710 movdqa (%rdi), %xmm3
712 pxor %xmm0, %xmm0
713 mov $16, %rcx
714 mov $9, %r9d
715 lea 9(%rdi), %r10
716 and $0xfff, %r10
717 sub $0x1000, %r10
718 .p2align 4
719 .Lloop_ashr_9:
720 add $16, %r10
721 jg .Lnibble_ashr_9
722 .Lgobble_ashr_9:
723 movdqa (%rsi, %rcx), %xmm1
724 movdqa (%rdi, %rcx), %xmm2
725 movdqa %xmm2, %xmm4
726 psrldq $9, %xmm3
727 pslldq $7, %xmm2
728 por %xmm3, %xmm2
730 pcmpeqb %xmm1, %xmm0
731 pcmpeqb %xmm2, %xmm1
732 psubb %xmm0, %xmm1
733 pmovmskb %xmm1, %edx
734 sub $0xffff, %edx
735 jnz .Lexit
736 add $16, %rcx
737 movdqa %xmm4, %xmm3
738 add $16, %r10
739 jg .Lnibble_ashr_9
740 movdqa (%rsi, %rcx), %xmm1
741 movdqa (%rdi, %rcx), %xmm2
742 movdqa %xmm2, %xmm4
743 psrldq $9, %xmm3
744 pslldq $7, %xmm2
745 por %xmm3, %xmm2
747 pcmpeqb %xmm1, %xmm0
748 pcmpeqb %xmm2, %xmm1
749 psubb %xmm0, %xmm1
750 pmovmskb %xmm1, %edx
751 sub $0xffff, %edx
752 jnz .Lexit
753 add $16, %rcx
754 movdqa %xmm4, %xmm3
755 jmp .Lloop_ashr_9
756 .p2align 4
757 .Lnibble_ashr_9:
758 pcmpeqb %xmm3, %xmm0
759 pmovmskb %xmm0, %edx
760 test $0xfe00, %edx
761 jnz .Lashr_9_exittail
762 pxor %xmm0, %xmm0
763 sub $0x1000, %r10
764 jmp .Lgobble_ashr_9
765 .p2align 4
766 .Lashr_9_exittail:
767 movdqa (%rsi, %rcx), %xmm1
768 psrldq $9, %xmm0
769 psrldq $9, %xmm3
770 jmp .Laftertail
771 .p2align 4
772 .Lashr_10:
773 pxor %xmm0, %xmm0
774 movdqa (%rdi), %xmm2
775 movdqa (%rsi), %xmm1
776 pcmpeqb %xmm1, %xmm0
777 pslldq $6, %xmm2
779 pcmpeqb %xmm1, %xmm2
780 psubb %xmm0, %xmm2
781 pmovmskb %xmm2, %r9d
782 shr %cl, %edx
783 shr %cl, %r9d
784 sub %r9d, %edx
785 jnz .Lless32bytes
786 movdqa (%rdi), %xmm3
788 pxor %xmm0, %xmm0
789 mov $16, %rcx
790 mov $10, %r9d
791 lea 10(%rdi), %r10
792 and $0xfff, %r10
793 sub $0x1000, %r10
794 .p2align 4
795 .Lloop_ashr_10:
796 add $16, %r10
797 jg .Lnibble_ashr_10
798 .Lgobble_ashr_10:
799 movdqa (%rsi, %rcx), %xmm1
800 movdqa (%rdi, %rcx), %xmm2
801 movdqa %xmm2, %xmm4
802 psrldq $10, %xmm3
803 pslldq $6, %xmm2
804 por %xmm3, %xmm2
806 pcmpeqb %xmm1, %xmm0
807 pcmpeqb %xmm2, %xmm1
808 psubb %xmm0, %xmm1
809 pmovmskb %xmm1, %edx
810 sub $0xffff, %edx
811 jnz .Lexit
812 add $16, %rcx
813 movdqa %xmm4, %xmm3
814 add $16, %r10
815 jg .Lnibble_ashr_10
816 movdqa (%rsi, %rcx), %xmm1
817 movdqa (%rdi, %rcx), %xmm2
818 movdqa %xmm2, %xmm4
819 psrldq $10, %xmm3
820 pslldq $6, %xmm2
821 por %xmm3, %xmm2
823 pcmpeqb %xmm1, %xmm0
824 pcmpeqb %xmm2, %xmm1
825 psubb %xmm0, %xmm1
826 pmovmskb %xmm1, %edx
827 sub $0xffff, %edx
828 jnz .Lexit
829 add $16, %rcx
830 movdqa %xmm4, %xmm3
831 jmp .Lloop_ashr_10
832 .p2align 4
833 .Lnibble_ashr_10:
834 pcmpeqb %xmm3, %xmm0
835 pmovmskb %xmm0, %edx
836 test $0xfc00, %edx
837 jnz .Lashr_10_exittail
838 pxor %xmm0, %xmm0
839 sub $0x1000, %r10
840 jmp .Lgobble_ashr_10
841 .p2align 4
842 .Lashr_10_exittail:
843 movdqa (%rsi, %rcx), %xmm1
844 psrldq $10, %xmm0
845 psrldq $10, %xmm3
846 jmp .Laftertail
847 .p2align 4
848 .Lashr_11:
849 pxor %xmm0, %xmm0
850 movdqa (%rdi), %xmm2
851 movdqa (%rsi), %xmm1
852 pcmpeqb %xmm1, %xmm0
853 pslldq $5, %xmm2
855 pcmpeqb %xmm1, %xmm2
856 psubb %xmm0, %xmm2
857 pmovmskb %xmm2, %r9d
858 shr %cl, %edx
859 shr %cl, %r9d
860 sub %r9d, %edx
861 jnz .Lless32bytes
862 movdqa (%rdi), %xmm3
864 pxor %xmm0, %xmm0
865 mov $16, %rcx
866 mov $11, %r9d
867 lea 11(%rdi), %r10
868 and $0xfff, %r10
869 sub $0x1000, %r10
870 .p2align 4
871 .Lloop_ashr_11:
872 add $16, %r10
873 jg .Lnibble_ashr_11
874 .Lgobble_ashr_11:
875 movdqa (%rsi, %rcx), %xmm1
876 movdqa (%rdi, %rcx), %xmm2
877 movdqa %xmm2, %xmm4
878 psrldq $11, %xmm3
879 pslldq $5, %xmm2
880 por %xmm3, %xmm2
882 pcmpeqb %xmm1, %xmm0
883 pcmpeqb %xmm2, %xmm1
884 psubb %xmm0, %xmm1
885 pmovmskb %xmm1, %edx
886 sub $0xffff, %edx
887 jnz .Lexit
888 add $16, %rcx
889 movdqa %xmm4, %xmm3
890 add $16, %r10
891 jg .Lnibble_ashr_11
892 movdqa (%rsi, %rcx), %xmm1
893 movdqa (%rdi, %rcx), %xmm2
894 movdqa %xmm2, %xmm4
895 psrldq $11, %xmm3
896 pslldq $5, %xmm2
897 por %xmm3, %xmm2
899 pcmpeqb %xmm1, %xmm0
900 pcmpeqb %xmm2, %xmm1
901 psubb %xmm0, %xmm1
902 pmovmskb %xmm1, %edx
903 sub $0xffff, %edx
904 jnz .Lexit
905 add $16, %rcx
906 movdqa %xmm4, %xmm3
907 jmp .Lloop_ashr_11
908 .p2align 4
909 .Lnibble_ashr_11:
910 pcmpeqb %xmm3, %xmm0
911 pmovmskb %xmm0, %edx
912 test $0xf800, %edx
913 jnz .Lashr_11_exittail
914 pxor %xmm0, %xmm0
915 sub $0x1000, %r10
916 jmp .Lgobble_ashr_11
917 .p2align 4
918 .Lashr_11_exittail:
919 movdqa (%rsi, %rcx), %xmm1
920 psrldq $11, %xmm0
921 psrldq $11, %xmm3
922 jmp .Laftertail
923 .p2align 4
924 .Lashr_12:
925 pxor %xmm0, %xmm0
926 movdqa (%rdi), %xmm2
927 movdqa (%rsi), %xmm1
928 pcmpeqb %xmm1, %xmm0
929 pslldq $4, %xmm2
931 pcmpeqb %xmm1, %xmm2
932 psubb %xmm0, %xmm2
933 pmovmskb %xmm2, %r9d
934 shr %cl, %edx
935 shr %cl, %r9d
936 sub %r9d, %edx
937 jnz .Lless32bytes
938 movdqa (%rdi), %xmm3
940 pxor %xmm0, %xmm0
941 mov $16, %rcx
942 mov $12, %r9d
943 lea 12(%rdi), %r10
944 and $0xfff, %r10
945 sub $0x1000, %r10
946 .p2align 4
947 .Lloop_ashr_12:
948 add $16, %r10
949 jg .Lnibble_ashr_12
950 .Lgobble_ashr_12:
951 movdqa (%rsi, %rcx), %xmm1
952 movdqa (%rdi, %rcx), %xmm2
953 movdqa %xmm2, %xmm4
954 psrldq $12, %xmm3
955 pslldq $4, %xmm2
956 por %xmm3, %xmm2
958 pcmpeqb %xmm1, %xmm0
959 pcmpeqb %xmm2, %xmm1
960 psubb %xmm0, %xmm1
961 pmovmskb %xmm1, %edx
962 sub $0xffff, %edx
963 jnz .Lexit
964 add $16, %rcx
965 movdqa %xmm4, %xmm3
966 add $16, %r10
967 jg .Lnibble_ashr_12
968 movdqa (%rsi, %rcx), %xmm1
969 movdqa (%rdi, %rcx), %xmm2
970 movdqa %xmm2, %xmm4
971 psrldq $12, %xmm3
972 pslldq $4, %xmm2
973 por %xmm3, %xmm2
975 pcmpeqb %xmm1, %xmm0
976 pcmpeqb %xmm2, %xmm1
977 psubb %xmm0, %xmm1
978 pmovmskb %xmm1, %edx
979 sub $0xffff, %edx
980 jnz .Lexit
981 add $16, %rcx
982 movdqa %xmm4, %xmm3
983 jmp .Lloop_ashr_12
984 .p2align 4
985 .Lnibble_ashr_12:
986 pcmpeqb %xmm3, %xmm0
987 pmovmskb %xmm0, %edx
988 test $0xf000, %edx
989 jnz .Lashr_12_exittail
990 pxor %xmm0, %xmm0
991 sub $0x1000, %r10
992 jmp .Lgobble_ashr_12
993 .p2align 4
994 .Lashr_12_exittail:
995 movdqa (%rsi, %rcx), %xmm1
996 psrldq $12, %xmm0
997 psrldq $12, %xmm3
998 jmp .Laftertail
999 .p2align 4
1000 .Lashr_13:
1001 pxor %xmm0, %xmm0
1002 movdqa (%rdi), %xmm2
1003 movdqa (%rsi), %xmm1
1004 pcmpeqb %xmm1, %xmm0
1005 pslldq $3, %xmm2
1007 pcmpeqb %xmm1, %xmm2
1008 psubb %xmm0, %xmm2
1009 pmovmskb %xmm2, %r9d
1010 shr %cl, %edx
1011 shr %cl, %r9d
1012 sub %r9d, %edx
1013 jnz .Lless32bytes
1014 movdqa (%rdi), %xmm3
1016 pxor %xmm0, %xmm0
1017 mov $16, %rcx
1018 mov $13, %r9d
1019 lea 13(%rdi), %r10
1020 and $0xfff, %r10
1021 sub $0x1000, %r10
1022 .p2align 4
1023 .Lloop_ashr_13:
1024 add $16, %r10
1025 jg .Lnibble_ashr_13
1026 .Lgobble_ashr_13:
1027 movdqa (%rsi, %rcx), %xmm1
1028 movdqa (%rdi, %rcx), %xmm2
1029 movdqa %xmm2, %xmm4
1030 psrldq $13, %xmm3
1031 pslldq $3, %xmm2
1032 por %xmm3, %xmm2
1034 pcmpeqb %xmm1, %xmm0
1035 pcmpeqb %xmm2, %xmm1
1036 psubb %xmm0, %xmm1
1037 pmovmskb %xmm1, %edx
1038 sub $0xffff, %edx
1039 jnz .Lexit
1040 add $16, %rcx
1041 movdqa %xmm4, %xmm3
1042 add $16, %r10
1043 jg .Lnibble_ashr_13
1044 movdqa (%rsi, %rcx), %xmm1
1045 movdqa (%rdi, %rcx), %xmm2
1046 movdqa %xmm2, %xmm4
1047 psrldq $13, %xmm3
1048 pslldq $3, %xmm2
1049 por %xmm3, %xmm2
1051 pcmpeqb %xmm1, %xmm0
1052 pcmpeqb %xmm2, %xmm1
1053 psubb %xmm0, %xmm1
1054 pmovmskb %xmm1, %edx
1055 sub $0xffff, %edx
1056 jnz .Lexit
1057 add $16, %rcx
1058 movdqa %xmm4, %xmm3
1059 jmp .Lloop_ashr_13
1060 .p2align 4
1061 .Lnibble_ashr_13:
1062 pcmpeqb %xmm3, %xmm0
1063 pmovmskb %xmm0, %edx
1064 test $0xe000, %edx
1065 jnz .Lashr_13_exittail
1066 pxor %xmm0, %xmm0
1067 sub $0x1000, %r10
1068 jmp .Lgobble_ashr_13
1069 .p2align 4
1070 .Lashr_13_exittail:
1071 movdqa (%rsi, %rcx), %xmm1
1072 psrldq $13, %xmm0
1073 psrldq $13, %xmm3
1074 jmp .Laftertail
1075 .p2align 4
1076 .Lashr_14:
1077 pxor %xmm0, %xmm0
1078 movdqa (%rdi), %xmm2
1079 movdqa (%rsi), %xmm1
1080 pcmpeqb %xmm1, %xmm0
1081 pslldq $2, %xmm2
1083 pcmpeqb %xmm1, %xmm2
1084 psubb %xmm0, %xmm2
1085 pmovmskb %xmm2, %r9d
1086 shr %cl, %edx
1087 shr %cl, %r9d
1088 sub %r9d, %edx
1089 jnz .Lless32bytes
1090 movdqa (%rdi), %xmm3
1092 pxor %xmm0, %xmm0
1093 mov $16, %rcx
1094 mov $14, %r9d
1095 lea 14(%rdi), %r10
1096 and $0xfff, %r10
1097 sub $0x1000, %r10
1098 .p2align 4
1099 .Lloop_ashr_14:
1100 add $16, %r10
1101 jg .Lnibble_ashr_14
1102 .Lgobble_ashr_14:
1103 movdqa (%rsi, %rcx), %xmm1
1104 movdqa (%rdi, %rcx), %xmm2
1105 movdqa %xmm2, %xmm4
1106 psrldq $14, %xmm3
1107 pslldq $2, %xmm2
1108 por %xmm3, %xmm2
1110 pcmpeqb %xmm1, %xmm0
1111 pcmpeqb %xmm2, %xmm1
1112 psubb %xmm0, %xmm1
1113 pmovmskb %xmm1, %edx
1114 sub $0xffff, %edx
1115 jnz .Lexit
1116 add $16, %rcx
1117 movdqa %xmm4, %xmm3
1118 add $16, %r10
1119 jg .Lnibble_ashr_14
1120 movdqa (%rsi, %rcx), %xmm1
1121 movdqa (%rdi, %rcx), %xmm2
1122 movdqa %xmm2, %xmm4
1123 psrldq $14, %xmm3
1124 pslldq $2, %xmm2
1125 por %xmm3, %xmm2
1127 pcmpeqb %xmm1, %xmm0
1128 pcmpeqb %xmm2, %xmm1
1129 psubb %xmm0, %xmm1
1130 pmovmskb %xmm1, %edx
1131 sub $0xffff, %edx
1132 jnz .Lexit
1133 add $16, %rcx
1134 movdqa %xmm4, %xmm3
1135 jmp .Lloop_ashr_14
1136 .p2align 4
1137 .Lnibble_ashr_14:
1138 pcmpeqb %xmm3, %xmm0
1139 pmovmskb %xmm0, %edx
1140 test $0xc000, %edx
1141 jnz .Lashr_14_exittail
1142 pxor %xmm0, %xmm0
1143 sub $0x1000, %r10
1144 jmp .Lgobble_ashr_14
1145 .p2align 4
1146 .Lashr_14_exittail:
1147 movdqa (%rsi, %rcx), %xmm1
1148 psrldq $14, %xmm0
1149 psrldq $14, %xmm3
1150 jmp .Laftertail
1151 .p2align 4
1152 .Lashr_15:
1153 pxor %xmm0, %xmm0
1154 movdqa (%rdi), %xmm2
1155 movdqa (%rsi), %xmm1
1156 pcmpeqb %xmm1, %xmm0
1157 pslldq $1, %xmm2
1159 pcmpeqb %xmm1, %xmm2
1160 psubb %xmm0, %xmm2
1161 pmovmskb %xmm2, %r9d
1162 shr %cl, %edx
1163 shr %cl, %r9d
1164 sub %r9d, %edx
1165 jnz .Lless32bytes
1166 movdqa (%rdi), %xmm3
1168 pxor %xmm0, %xmm0
1169 mov $16, %rcx
1170 mov $15, %r9d
1171 lea 15(%rdi), %r10
1172 and $0xfff, %r10
1173 sub $0x1000, %r10
1174 .p2align 4
1175 .Lloop_ashr_15:
1176 add $16, %r10
1177 jg .Lnibble_ashr_15
1178 .Lgobble_ashr_15:
1179 movdqa (%rsi, %rcx), %xmm1
1180 movdqa (%rdi, %rcx), %xmm2
1181 movdqa %xmm2, %xmm4
1182 psrldq $15, %xmm3
1183 pslldq $1, %xmm2
1184 por %xmm3, %xmm2
1186 pcmpeqb %xmm1, %xmm0
1187 pcmpeqb %xmm2, %xmm1
1188 psubb %xmm0, %xmm1
1189 pmovmskb %xmm1, %edx
1190 sub $0xffff, %edx
1191 jnz .Lexit
1192 add $16, %rcx
1193 movdqa %xmm4, %xmm3
1194 add $16, %r10
1195 jg .Lnibble_ashr_15
1196 movdqa (%rsi, %rcx), %xmm1
1197 movdqa (%rdi, %rcx), %xmm2
1198 movdqa %xmm2, %xmm4
1199 psrldq $15, %xmm3
1200 pslldq $1, %xmm2
1201 por %xmm3, %xmm2
1203 pcmpeqb %xmm1, %xmm0
1204 pcmpeqb %xmm2, %xmm1
1205 psubb %xmm0, %xmm1
1206 pmovmskb %xmm1, %edx
1207 sub $0xffff, %edx
1208 jnz .Lexit
1209 add $16, %rcx
1210 movdqa %xmm4, %xmm3
1211 jmp .Lloop_ashr_15
1212 .p2align 4
1213 .Lnibble_ashr_15:
1214 pcmpeqb %xmm3, %xmm0
1215 pmovmskb %xmm0, %edx
1216 test $0x8000, %edx
1217 jnz .Lashr_15_exittail
1218 pxor %xmm0, %xmm0
1219 sub $0x1000, %r10
1220 jmp .Lgobble_ashr_15
1221 .p2align 4
1222 .Lashr_15_exittail:
1223 movdqa (%rsi, %rcx), %xmm1
1224 psrldq $15, %xmm3
1225 psrldq $15, %xmm0
1226 .p2align 4
1227 .Laftertail:
1229 pcmpeqb %xmm3, %xmm1
1230 psubb %xmm0, %xmm1
1231 pmovmskb %xmm1, %edx
1232 not %edx
1233 .p2align 4
1234 .Lexit:
1235 lea -16(%r9, %rcx), %rax
1236 .Lless32bytes:
1237 lea (%rdi, %rax), %rdi
1238 lea (%rsi, %rcx), %rsi
1239 test %r8d, %r8d
1240 jz .Lret
1241 xchg %rsi, %rdi
1242 .p2align 4
1243 .Lret:
1244 .Lless16bytes:
1245 bsf %rdx, %rdx
1246 movzbl (%rsi, %rdx), %ecx
1247 movzbl (%rdi, %rdx), %eax
1248 sub %ecx, %eax
1250 .Lstrcmp_exitz:
1251 xor %eax, %eax
1253 .p2align 4
1254 .LByte0:
1255 movzx (%rsi), %ecx
1256 movzx (%rdi), %eax
1257 sub %ecx, %eax
1259 .size strcmp,.-strcmp
1260 .section .rodata,"a",@progbits
1261 .p2align 3
1262 .Lunaligned_table:
1263 .int .Lashr_1 - .Lunaligned_table
1264 .int .Lashr_2 - .Lunaligned_table
1265 .int .Lashr_3 - .Lunaligned_table
1266 .int .Lashr_4 - .Lunaligned_table
1267 .int .Lashr_5 - .Lunaligned_table
1268 .int .Lashr_6 - .Lunaligned_table
1269 .int .Lashr_7 - .Lunaligned_table
1270 .int .Lashr_8 - .Lunaligned_table
1271 .int .Lashr_9 - .Lunaligned_table
1272 .int .Lashr_10 - .Lunaligned_table
1273 .int .Lashr_11 - .Lunaligned_table
1274 .int .Lashr_12 - .Lunaligned_table
1275 .int .Lashr_13 - .Lunaligned_table
1276 .int .Lashr_14 - .Lunaligned_table
1277 .int .Lashr_15 - .Lunaligned_table
1278 .int .Lashr_0 - .Lunaligned_table