4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2009, Intel Corporation
24 * All rights reserved.
28 * str[n]cmp - compare chars between two string
32 #include "proc64_id.h"
34 #define LABEL(s) .strcmp##s
38 * Since the counter, %r11, is unsigned, we branch to strcmp_exitz
39 * if the new counter > the old one or is 0.
41 #define UPDATE_STRNCMP_COUNTER \
42 /* calculate left number to compare */ \
43 lea
-16(%rcx
, %r11), %r9; \
45 jb LABEL
(strcmp_exitz
); \
47 je LABEL
(strcmp_exitz
); \
50 #define UPDATE_STRNCMP_COUNTER
54 * This implementation uses SSE to compare up to 16 bytes at a time.
59 je LABEL
(strcmp_exitz
)
62 ENTRY
(strcmp
) /* (const char *, const char *) */
66 and $
0x3f, %rcx
/* rsi alignment in cache line */
67 and $
0x3f, %rax
/* rdi alignment in cache line */
69 ja LABEL
(crosscache
) /* rsi: 16-byte load will cross cache line */
71 ja LABEL
(crosscache
) /* rdi: 16-byte load will cross cache line */
76 pxor
%xmm0
, %xmm0
/* clear %xmm0 for null char checks */
77 pcmpeqb
%xmm1
, %xmm0
/* Any null chars? */
78 pcmpeqb
%xmm2
, %xmm1
/* compare first 16 bytes for equality */
79 psubb
%xmm0
, %xmm1
/* packed sub of comparison results*/
81 sub $
0xffff, %edx
/* if first 16 bytes are same, edx == 0xffff */
82 jnz LABEL
(less16bytes
) /* If not, found mismatch or null char */
85 jbe LABEL
(strcmp_exitz
) /* finish comparision */
87 add $
16, %rsi
/* prepare to search next 16 bytes */
88 add $
16, %rdi
/* prepare to search next 16 bytes */
91 * Determine rdi and rsi string offsets from 16-byte alignment.
92 * Use relative offset difference between the two to determine which case
97 and $
0xfffffffffffffff0, %rsi
/* force %rsi to be 16 byte aligned */
98 and $
0xfffffffffffffff0, %rdi
/* force %rdi to be 16 byte aligned */
99 mov $
0xffff, %edx
/* for equivalent offset */
101 and $
0xf, %ecx
/* offset of rsi */
102 and $
0xf, %eax
/* offset of rdi */
104 je LABEL
(ashr_0
) /* both strings have the same alignment */
106 mov
%edx
, %r8d
/* r8d is offset flag for exit tail */
112 lea LABEL
(unaligned_table
)(%rip
), %r10
113 movslq
(%r10, %r9, 4), %r9
114 lea
(%r10, %r9), %r10
115 jmp
*%r10 /* jump to corresponding case */
118 * ashr_0 handles the following cases:
119 * str1 offset = str2 offset
124 pxor
%xmm0
, %xmm0
/* clear %xmm0 for null char check */
125 pcmpeqb
%xmm1
, %xmm0
/* Any null chars? */
126 pcmpeqb
(%rdi
), %xmm1
/* compare 16 bytes for equality */
127 psubb
%xmm0
, %xmm1
/* packed sub of comparison results*/
129 shr
%cl
, %edx
/* adjust 0xffff for offset */
130 shr
%cl
, %r9d
/* adjust for 16-byte offset */
133 * edx must be the same with r9d if in left byte (16-rcx) is equal to
134 * the start from (16-rax) and no null char was seen.
136 jne LABEL
(less32bytes
) /* mismatch or null char */
137 UPDATE_STRNCMP_COUNTER
140 pxor
%xmm0
, %xmm0
/* clear xmm0, may have changed above */
143 * Now both strings are aligned at 16-byte boundary. Loop over strings
144 * checking 32-bytes per iteration.
148 movdqa
(%rsi
, %rcx
), %xmm1
149 movdqa
(%rdi
, %rcx
), %xmm2
156 jnz LABEL
(exit
) /* mismatch or null char seen */
158 #ifdef USE_AS_STRNCMP
160 jbe LABEL
(strcmp_exitz
)
163 movdqa
(%rsi
, %rcx
), %xmm1
164 movdqa
(%rdi
, %rcx
), %xmm2
172 #ifdef USE_AS_STRNCMP
174 jbe LABEL
(strcmp_exitz
)
177 jmp LABEL
(loop_ashr_0
)
180 * ashr_1 handles the following cases:
181 * abs(str1 offset - str2 offset) = 15
188 pcmpeqb
%xmm1
, %xmm0
/* Any null chars? */
189 pslldq $
15, %xmm2
/* shift first string to align with second */
190 pcmpeqb
%xmm1
, %xmm2
/* compare 16 bytes for equality */
191 psubb
%xmm0
, %xmm2
/* packed sub of comparison results*/
193 shr
%cl
, %edx
/* adjust 0xffff for offset */
194 shr
%cl
, %r9d
/* adjust for 16-byte offset */
196 jnz LABEL
(less32bytes
) /* mismatch or null char seen */
198 UPDATE_STRNCMP_COUNTER
201 mov $
16, %rcx
/* index for loads */
202 mov $
1, %r9d
/* rdi bytes already examined. Used in exit code */
204 * Setup %r10 value allows us to detect crossing a page boundary.
205 * When %r10 goes positive we are crossing a page boundary and
206 * need to do a nibble.
209 and $
0xfff, %r10 /* offset into 4K page */
210 sub $
0x1000, %r10 /* subtract 4K pagesize */
216 jg LABEL
(nibble_ashr_1
) /* cross page boundary */
218 LABEL
(gobble_ashr_1
):
219 movdqa
(%rsi
, %rcx
), %xmm1
220 movdqa
(%rdi
, %rcx
), %xmm2
221 movdqa
%xmm2
, %xmm4
/* store for next cycle */
225 por
%xmm3
, %xmm2
/* merge into one 16byte value */
234 #ifdef USE_AS_STRNCMP
236 jbe LABEL
(strcmp_exitz
)
242 jg LABEL
(nibble_ashr_1
) /* cross page boundary */
244 movdqa
(%rsi
, %rcx
), %xmm1
245 movdqa
(%rdi
, %rcx
), %xmm2
246 movdqa
%xmm2
, %xmm4
/* store for next cycle */
250 por
%xmm3
, %xmm2
/* merge into one 16byte value */
259 #ifdef USE_AS_STRNCMP
261 jbe LABEL
(strcmp_exitz
)
265 jmp LABEL
(loop_ashr_1
)
268 * Nibble avoids loads across page boundary. This is to avoid a potential
269 * access into unmapped memory.
272 LABEL
(nibble_ashr_1
):
274 movdqa
(%rsi
, %rcx
), %xmm1
281 #ifdef USE_AS_STRNCMP
283 jbe LABEL
(strcmp_exitz
)
286 sub $
0x1000, %r10 /* subtract 4K from %r10 */
287 jmp LABEL
(gobble_ashr_1
)
290 * ashr_2 handles the following cases:
291 * abs(str1 offset - str2 offset) = 14
306 jnz LABEL
(less32bytes
)
308 UPDATE_STRNCMP_COUNTER
311 mov $
16, %rcx
/* index for loads */
312 mov $
2, %r9d
/* rdi bytes already examined. Used in exit code */
314 * Setup %r10 value allows us to detect crossing a page boundary.
315 * When %r10 goes positive we are crossing a page boundary and
316 * need to do a nibble.
319 and $
0xfff, %r10 /* offset into 4K page */
320 sub $
0x1000, %r10 /* subtract 4K pagesize */
326 jg LABEL
(nibble_ashr_2
)
328 LABEL
(gobble_ashr_2
):
329 movdqa
(%rsi
, %rcx
), %xmm1
330 movdqa
(%rdi
, %rcx
), %xmm2
344 #ifdef USE_AS_STRNCMP
346 jbe LABEL
(strcmp_exitz
)
353 jg LABEL
(nibble_ashr_2
) /* cross page boundary */
355 movdqa
(%rsi
, %rcx
), %xmm1
356 movdqa
(%rdi
, %rcx
), %xmm2
370 #ifdef USE_AS_STRNCMP
372 jbe LABEL
(strcmp_exitz
)
377 jmp LABEL
(loop_ashr_2
)
380 LABEL
(nibble_ashr_2
):
382 movdqa
(%rsi
, %rcx
), %xmm1
389 #ifdef USE_AS_STRNCMP
391 jbe LABEL
(strcmp_exitz
)
394 sub $
0x1000, %r10 /* subtract 4K from %r10 */
395 jmp LABEL
(gobble_ashr_2
)
398 * ashr_3 handles the following cases:
399 * abs(str1 offset - str2 offset) = 13
414 jnz LABEL
(less32bytes
)
417 UPDATE_STRNCMP_COUNTER
420 mov $
16, %rcx
/* index for loads */
421 mov $
3, %r9d
/* rdi bytes already examined. Used in exit code */
423 * Setup %r10 value allows us to detect crossing a page boundary.
424 * When %r10 goes positive we are crossing a page boundary and
425 * need to do a nibble.
428 and $
0xfff, %r10 /* offset into 4K page */
429 sub $
0x1000, %r10 /* subtract 4K pagesize */
435 jg LABEL
(nibble_ashr_3
)
437 LABEL
(gobble_ashr_3
):
438 movdqa
(%rsi
, %rcx
), %xmm1
439 movdqa
(%rdi
, %rcx
), %xmm2
453 #ifdef USE_AS_STRNCMP
455 jbe LABEL
(strcmp_exitz
)
462 jg LABEL
(nibble_ashr_3
) /* cross page boundary */
464 movdqa
(%rsi
, %rcx
), %xmm1
465 movdqa
(%rdi
, %rcx
), %xmm2
479 #ifdef USE_AS_STRNCMP
481 jbe LABEL
(strcmp_exitz
)
486 jmp LABEL
(loop_ashr_3
)
489 LABEL
(nibble_ashr_3
):
491 movdqa
(%rsi
, %rcx
), %xmm1
498 #ifdef USE_AS_STRNCMP
500 jbe LABEL
(strcmp_exitz
)
503 sub $
0x1000, %r10 /* subtract 4K from %r10 */
504 jmp LABEL
(gobble_ashr_3
)
507 * ashr_4 handles the following cases:
508 * abs(str1 offset - str2 offset) = 12
523 jnz LABEL
(less32bytes
)
526 UPDATE_STRNCMP_COUNTER
529 mov $
16, %rcx
/* index for loads */
530 mov $
4, %r9d
/* rdi bytes already examined. Used in exit code */
532 * Setup %r10 value allows us to detect crossing a page boundary.
533 * When %r10 goes positive we are crossing a page boundary and
534 * need to do a nibble.
537 and $
0xfff, %r10 /* offset into 4K page */
538 sub $
0x1000, %r10 /* subtract 4K pagesize */
544 jg LABEL
(nibble_ashr_4
)
546 LABEL
(gobble_ashr_4
):
547 movdqa
(%rsi
, %rcx
), %xmm1
548 movdqa
(%rdi
, %rcx
), %xmm2
562 #ifdef USE_AS_STRNCMP
564 jbe LABEL
(strcmp_exitz
)
571 jg LABEL
(nibble_ashr_4
) /* cross page boundary */
573 movdqa
(%rsi
, %rcx
), %xmm1
574 movdqa
(%rdi
, %rcx
), %xmm2
588 #ifdef USE_AS_STRNCMP
590 jbe LABEL
(strcmp_exitz
)
595 jmp LABEL
(loop_ashr_4
)
598 LABEL
(nibble_ashr_4
):
600 movdqa
(%rsi
, %rcx
), %xmm1
607 #ifdef USE_AS_STRNCMP
609 jbe LABEL
(strcmp_exitz
)
612 sub $
0x1000, %r10 /* subtract 4K from %r10 */
613 jmp LABEL
(gobble_ashr_4
)
616 * ashr_5 handles the following cases:
617 * abs(str1 offset - str2 offset) = 11
632 jnz LABEL
(less32bytes
)
635 UPDATE_STRNCMP_COUNTER
638 mov $
16, %rcx
/* index for loads */
639 mov $
5, %r9d
/* rdi bytes already examined. Used in exit code */
641 * Setup %r10 value allows us to detect crossing a page boundary.
642 * When %r10 goes positive we are crossing a page boundary and
643 * need to do a nibble.
646 and $
0xfff, %r10 /* offset into 4K page */
647 sub $
0x1000, %r10 /* subtract 4K pagesize */
653 jg LABEL
(nibble_ashr_5
)
655 LABEL
(gobble_ashr_5
):
656 movdqa
(%rsi
, %rcx
), %xmm1
657 movdqa
(%rdi
, %rcx
), %xmm2
671 #ifdef USE_AS_STRNCMP
673 jbe LABEL
(strcmp_exitz
)
680 jg LABEL
(nibble_ashr_5
) /* cross page boundary */
682 movdqa
(%rsi
, %rcx
), %xmm1
683 movdqa
(%rdi
, %rcx
), %xmm2
697 #ifdef USE_AS_STRNCMP
699 jbe LABEL
(strcmp_exitz
)
704 jmp LABEL
(loop_ashr_5
)
707 LABEL
(nibble_ashr_5
):
709 movdqa
(%rsi
, %rcx
), %xmm1
716 #ifdef USE_AS_STRNCMP
718 jbe LABEL
(strcmp_exitz
)
721 sub $
0x1000, %r10 /* subtract 4K from %r10 */
722 jmp LABEL
(gobble_ashr_5
)
725 * ashr_6 handles the following cases:
726 * abs(str1 offset - str2 offset) = 10
741 jnz LABEL
(less32bytes
)
744 UPDATE_STRNCMP_COUNTER
747 mov $
16, %rcx
/* index for loads */
748 mov $
6, %r9d
/* rdi bytes already examined. Used in exit code */
750 * Setup %r10 value allows us to detect crossing a page boundary.
751 * When %r10 goes positive we are crossing a page boundary and
752 * need to do a nibble.
755 and $
0xfff, %r10 /* offset into 4K page */
756 sub $
0x1000, %r10 /* subtract 4K pagesize */
762 jg LABEL
(nibble_ashr_6
)
764 LABEL
(gobble_ashr_6
):
765 movdqa
(%rsi
, %rcx
), %xmm1
766 movdqa
(%rdi
, %rcx
), %xmm2
780 #ifdef USE_AS_STRNCMP
782 jbe LABEL
(strcmp_exitz
)
789 jg LABEL
(nibble_ashr_6
) /* cross page boundary */
791 movdqa
(%rsi
, %rcx
), %xmm1
792 movdqa
(%rdi
, %rcx
), %xmm2
806 #ifdef USE_AS_STRNCMP
808 jbe LABEL
(strcmp_exitz
)
813 jmp LABEL
(loop_ashr_6
)
816 LABEL
(nibble_ashr_6
):
818 movdqa
(%rsi
, %rcx
), %xmm1
825 #ifdef USE_AS_STRNCMP
827 jbe LABEL
(strcmp_exitz
)
830 sub $
0x1000, %r10 /* subtract 4K from %r10 */
831 jmp LABEL
(gobble_ashr_6
)
834 * ashr_7 handles the following cases:
835 * abs(str1 offset - str2 offset) = 9
850 jnz LABEL
(less32bytes
)
853 UPDATE_STRNCMP_COUNTER
856 mov $
16, %rcx
/* index for loads */
857 mov $
7, %r9d
/* rdi bytes already examined. Used in exit code */
859 * Setup %r10 value allows us to detect crossing a page boundary.
860 * When %r10 goes positive we are crossing a page boundary and
861 * need to do a nibble.
864 and $
0xfff, %r10 /* offset into 4K page */
865 sub $
0x1000, %r10 /* subtract 4K pagesize */
871 jg LABEL
(nibble_ashr_7
)
873 LABEL
(gobble_ashr_7
):
874 movdqa
(%rsi
, %rcx
), %xmm1
875 movdqa
(%rdi
, %rcx
), %xmm2
889 #ifdef USE_AS_STRNCMP
891 jbe LABEL
(strcmp_exitz
)
898 jg LABEL
(nibble_ashr_7
) /* cross page boundary */
900 movdqa
(%rsi
, %rcx
), %xmm1
901 movdqa
(%rdi
, %rcx
), %xmm2
915 #ifdef USE_AS_STRNCMP
917 jbe LABEL
(strcmp_exitz
)
922 jmp LABEL
(loop_ashr_7
)
925 LABEL
(nibble_ashr_7
):
927 movdqa
(%rsi
, %rcx
), %xmm1
934 #ifdef USE_AS_STRNCMP
936 jbe LABEL
(strcmp_exitz
)
939 sub $
0x1000, %r10 /* subtract 4K from %r10 */
940 jmp LABEL
(gobble_ashr_7
)
943 * ashr_8 handles the following cases:
944 * abs(str1 offset - str2 offset) = 8
959 jnz LABEL
(less32bytes
)
962 UPDATE_STRNCMP_COUNTER
965 mov $
16, %rcx
/* index for loads */
966 mov $
8, %r9d
/* rdi bytes already examined. Used in exit code */
968 * Setup %r10 value allows us to detect crossing a page boundary.
969 * When %r10 goes positive we are crossing a page boundary and
970 * need to do a nibble.
973 and $
0xfff, %r10 /* offset into 4K page */
974 sub $
0x1000, %r10 /* subtract 4K pagesize */
980 jg LABEL
(nibble_ashr_8
)
982 LABEL
(gobble_ashr_8
):
983 movdqa
(%rsi
, %rcx
), %xmm1
984 movdqa
(%rdi
, %rcx
), %xmm2
998 #ifdef USE_AS_STRNCMP
1000 jbe LABEL
(strcmp_exitz
)
1007 jg LABEL
(nibble_ashr_8
) /* cross page boundary */
1009 movdqa
(%rsi
, %rcx
), %xmm1
1010 movdqa
(%rdi
, %rcx
), %xmm2
1017 pcmpeqb
%xmm1
, %xmm0
1018 pcmpeqb
%xmm2
, %xmm1
1020 pmovmskb
%xmm1
, %edx
1024 #ifdef USE_AS_STRNCMP
1026 jbe LABEL
(strcmp_exitz
)
1031 jmp LABEL
(loop_ashr_8
)
1034 LABEL
(nibble_ashr_8
):
1036 movdqa
(%rsi
, %rcx
), %xmm1
1037 pcmpeqb
%xmm1
, %xmm0
1038 pcmpeqb
%xmm4
, %xmm1
1040 pmovmskb
%xmm1
, %edx
1043 #ifdef USE_AS_STRNCMP
1045 jbe LABEL
(strcmp_exitz
)
1048 sub $
0x1000, %r10 /* subtract 4K from %r10 */
1049 jmp LABEL
(gobble_ashr_8
)
1052 * ashr_9 handles the following cases:
1053 * abs(str1 offset - str2 offset) = 7
1058 movdqa
(%rdi
), %xmm2
1059 movdqa
(%rsi
), %xmm1
1060 pcmpeqb
%xmm1
, %xmm0
1062 pcmpeqb
%xmm1
, %xmm2
1064 pmovmskb
%xmm2
, %r9d
1068 jnz LABEL
(less32bytes
)
1069 movdqa
(%rdi
), %xmm3
1071 UPDATE_STRNCMP_COUNTER
1074 mov $
16, %rcx
/* index for loads */
1075 mov $
9, %r9d
/* rdi bytes already examined. Used in exit code */
1077 * Setup %r10 value allows us to detect crossing a page boundary.
1078 * When %r10 goes positive we are crossing a page boundary and
1079 * need to do a nibble.
1082 and $
0xfff, %r10 /* offset into 4K page */
1083 sub $
0x1000, %r10 /* subtract 4K pagesize */
1089 jg LABEL
(nibble_ashr_9
)
1091 LABEL
(gobble_ashr_9
):
1092 movdqa
(%rsi
, %rcx
), %xmm1
1093 movdqa
(%rdi
, %rcx
), %xmm2
1100 pcmpeqb
%xmm1
, %xmm0
1101 pcmpeqb
%xmm2
, %xmm1
1103 pmovmskb
%xmm1
, %edx
1107 #ifdef USE_AS_STRNCMP
1109 jbe LABEL
(strcmp_exitz
)
1116 jg LABEL
(nibble_ashr_9
) /* cross page boundary */
1118 movdqa
(%rsi
, %rcx
), %xmm1
1119 movdqa
(%rdi
, %rcx
), %xmm2
1126 pcmpeqb
%xmm1
, %xmm0
1127 pcmpeqb
%xmm2
, %xmm1
1129 pmovmskb
%xmm1
, %edx
1133 #ifdef USE_AS_STRNCMP
1135 jbe LABEL
(strcmp_exitz
)
1139 movdqa
%xmm4
, %xmm3
/* store for next cycle */
1140 jmp LABEL
(loop_ashr_9
)
1143 LABEL
(nibble_ashr_9
):
1145 movdqa
(%rsi
, %rcx
), %xmm1
1146 pcmpeqb
%xmm1
, %xmm0
1147 pcmpeqb
%xmm4
, %xmm1
1149 pmovmskb
%xmm1
, %edx
1152 #ifdef USE_AS_STRNCMP
1154 jbe LABEL
(strcmp_exitz
)
1157 sub $
0x1000, %r10 /* subtract 4K from %r10 */
1158 jmp LABEL
(gobble_ashr_9
)
1161 * ashr_10 handles the following cases:
1162 * abs(str1 offset - str2 offset) = 6
1167 movdqa
(%rdi
), %xmm2
1168 movdqa
(%rsi
), %xmm1
1169 pcmpeqb
%xmm1
, %xmm0
1171 pcmpeqb
%xmm1
, %xmm2
1173 pmovmskb
%xmm2
, %r9d
1177 jnz LABEL
(less32bytes
)
1178 movdqa
(%rdi
), %xmm3
1180 UPDATE_STRNCMP_COUNTER
1183 mov $
16, %rcx
/* index for loads */
1184 mov $
10, %r9d
/* rdi bytes already examined. Used in exit code */
1186 * Setup %r10 value allows us to detect crossing a page boundary.
1187 * When %r10 goes positive we are crossing a page boundary and
1188 * need to do a nibble.
1191 and $
0xfff, %r10 /* offset into 4K page */
1192 sub $
0x1000, %r10 /* subtract 4K pagesize */
1196 LABEL
(loop_ashr_10
):
1198 jg LABEL
(nibble_ashr_10
)
1200 LABEL
(gobble_ashr_10
):
1201 movdqa
(%rsi
, %rcx
), %xmm1
1202 movdqa
(%rdi
, %rcx
), %xmm2
1209 pcmpeqb
%xmm1
, %xmm0
1210 pcmpeqb
%xmm2
, %xmm1
1212 pmovmskb
%xmm1
, %edx
1216 #ifdef USE_AS_STRNCMP
1218 jbe LABEL
(strcmp_exitz
)
1225 jg LABEL
(nibble_ashr_10
) /* cross page boundary */
1227 movdqa
(%rsi
, %rcx
), %xmm1
1228 movdqa
(%rdi
, %rcx
), %xmm2
1235 pcmpeqb
%xmm1
, %xmm0
1236 pcmpeqb
%xmm2
, %xmm1
1238 pmovmskb
%xmm1
, %edx
1242 #ifdef USE_AS_STRNCMP
1244 jbe LABEL
(strcmp_exitz
)
1249 jmp LABEL
(loop_ashr_10
)
1252 LABEL
(nibble_ashr_10
):
1254 movdqa
(%rsi
, %rcx
), %xmm1
1255 pcmpeqb
%xmm1
, %xmm0
1256 pcmpeqb
%xmm4
, %xmm1
1258 pmovmskb
%xmm1
, %edx
1261 #ifdef USE_AS_STRNCMP
1263 jbe LABEL
(strcmp_exitz
)
1266 sub $
0x1000, %r10 /* subtract 4K from %r10 */
1267 jmp LABEL
(gobble_ashr_10
)
1270 * ashr_11 handles the following cases:
1271 * abs(str1 offset - str2 offset) = 5
1276 movdqa
(%rdi
), %xmm2
1277 movdqa
(%rsi
), %xmm1
1278 pcmpeqb
%xmm1
, %xmm0
1280 pcmpeqb
%xmm1
, %xmm2
1282 pmovmskb
%xmm2
, %r9d
1286 jnz LABEL
(less32bytes
)
1287 movdqa
(%rdi
), %xmm3
1289 UPDATE_STRNCMP_COUNTER
1292 mov $
16, %rcx
/* index for loads */
1293 mov $
11, %r9d
/* rdi bytes already examined. Used in exit code */
1295 * Setup %r10 value allows us to detect crossing a page boundary.
1296 * When %r10 goes positive we are crossing a page boundary and
1297 * need to do a nibble.
1300 and $
0xfff, %r10 /* offset into 4K page */
1301 sub $
0x1000, %r10 /* subtract 4K pagesize */
1305 LABEL
(loop_ashr_11
):
1307 jg LABEL
(nibble_ashr_11
)
1309 LABEL
(gobble_ashr_11
):
1310 movdqa
(%rsi
, %rcx
), %xmm1
1311 movdqa
(%rdi
, %rcx
), %xmm2
1318 pcmpeqb
%xmm1
, %xmm0
1319 pcmpeqb
%xmm2
, %xmm1
1321 pmovmskb
%xmm1
, %edx
1325 #ifdef USE_AS_STRNCMP
1327 jbe LABEL
(strcmp_exitz
)
1334 jg LABEL
(nibble_ashr_11
) /* cross page boundary */
1336 movdqa
(%rsi
, %rcx
), %xmm1
1337 movdqa
(%rdi
, %rcx
), %xmm2
1344 pcmpeqb
%xmm1
, %xmm0
1345 pcmpeqb
%xmm2
, %xmm1
1347 pmovmskb
%xmm1
, %edx
1351 #ifdef USE_AS_STRNCMP
1353 jbe LABEL
(strcmp_exitz
)
1358 jmp LABEL
(loop_ashr_11
)
1361 LABEL
(nibble_ashr_11
):
1363 movdqa
(%rsi
, %rcx
), %xmm1
1364 pcmpeqb
%xmm1
, %xmm0
1365 pcmpeqb
%xmm4
, %xmm1
1367 pmovmskb
%xmm1
, %edx
1370 #ifdef USE_AS_STRNCMP
1372 jbe LABEL
(strcmp_exitz
)
1375 sub $
0x1000, %r10 /* subtract 4K from %r10 */
1376 jmp LABEL
(gobble_ashr_11
)
1379 * ashr_12 handles the following cases:
1380 * abs(str1 offset - str2 offset) = 4
1385 movdqa
(%rdi
), %xmm2
1386 movdqa
(%rsi
), %xmm1
1387 pcmpeqb
%xmm1
, %xmm0
1389 pcmpeqb
%xmm1
, %xmm2
1391 pmovmskb
%xmm2
, %r9d
1395 jnz LABEL
(less32bytes
)
1396 movdqa
(%rdi
), %xmm3
1398 UPDATE_STRNCMP_COUNTER
1401 mov $
16, %rcx
/* index for loads */
1402 mov $
12, %r9d
/* rdi bytes already examined. Used in exit code */
1404 * Setup %r10 value allows us to detect crossing a page boundary.
1405 * When %r10 goes positive we are crossing a page boundary and
1406 * need to do a nibble.
1409 and $
0xfff, %r10 /* offset into 4K page */
1410 sub $
0x1000, %r10 /* subtract 4K pagesize */
1414 LABEL
(loop_ashr_12
):
1416 jg LABEL
(nibble_ashr_12
)
1418 LABEL
(gobble_ashr_12
):
1419 movdqa
(%rsi
, %rcx
), %xmm1
1420 movdqa
(%rdi
, %rcx
), %xmm2
1427 pcmpeqb
%xmm1
, %xmm0
1428 pcmpeqb
%xmm2
, %xmm1
1430 pmovmskb
%xmm1
, %edx
1434 #ifdef USE_AS_STRNCMP
1436 jbe LABEL
(strcmp_exitz
)
1443 jg LABEL
(nibble_ashr_12
) /* cross page boundary */
1445 movdqa
(%rsi
, %rcx
), %xmm1
1446 movdqa
(%rdi
, %rcx
), %xmm2
1453 pcmpeqb
%xmm1
, %xmm0
1454 pcmpeqb
%xmm2
, %xmm1
1456 pmovmskb
%xmm1
, %edx
1460 #ifdef USE_AS_STRNCMP
1462 jbe LABEL
(strcmp_exitz
)
1467 jmp LABEL
(loop_ashr_12
)
1470 LABEL
(nibble_ashr_12
):
1472 movdqa
(%rsi
, %rcx
), %xmm1
1473 pcmpeqb
%xmm1
, %xmm0
1474 pcmpeqb
%xmm4
, %xmm1
1476 pmovmskb
%xmm1
, %edx
1479 #ifdef USE_AS_STRNCMP
1481 jbe LABEL
(strcmp_exitz
)
1484 sub $
0x1000, %r10 /* subtract 4K from %r10 */
1485 jmp LABEL
(gobble_ashr_12
)
1488 * ashr_13 handles the following cases:
1489 * abs(str1 offset - str2 offset) = 3
1494 movdqa
(%rdi
), %xmm2
1495 movdqa
(%rsi
), %xmm1
1496 pcmpeqb
%xmm1
, %xmm0
1498 pcmpeqb
%xmm1
, %xmm2
1500 pmovmskb
%xmm2
, %r9d
1504 jnz LABEL
(less32bytes
)
1505 movdqa
(%rdi
), %xmm3
1507 UPDATE_STRNCMP_COUNTER
1510 mov $
16, %rcx
/* index for loads */
1511 mov $
13, %r9d
/* rdi bytes already examined. Used in exit code */
1513 * Setup %r10 value allows us to detect crossing a page boundary.
1514 * When %r10 goes positive we are crossing a page boundary and
1515 * need to do a nibble.
1518 and $
0xfff, %r10 /* offset into 4K page */
1519 sub $
0x1000, %r10 /* subtract 4K pagesize */
1523 LABEL
(loop_ashr_13
):
1525 jg LABEL
(nibble_ashr_13
)
1527 LABEL
(gobble_ashr_13
):
1528 movdqa
(%rsi
, %rcx
), %xmm1
1529 movdqa
(%rdi
, %rcx
), %xmm2
1536 pcmpeqb
%xmm1
, %xmm0
1537 pcmpeqb
%xmm2
, %xmm1
1539 pmovmskb
%xmm1
, %edx
1543 #ifdef USE_AS_STRNCMP
1545 jbe LABEL
(strcmp_exitz
)
1552 jg LABEL
(nibble_ashr_13
) /* cross page boundary */
1554 movdqa
(%rsi
, %rcx
), %xmm1
1555 movdqa
(%rdi
, %rcx
), %xmm2
1562 pcmpeqb
%xmm1
, %xmm0
1563 pcmpeqb
%xmm2
, %xmm1
1565 pmovmskb
%xmm1
, %edx
1569 #ifdef USE_AS_STRNCMP
1571 jbe LABEL
(strcmp_exitz
)
1576 jmp LABEL
(loop_ashr_13
)
1579 LABEL
(nibble_ashr_13
):
1581 movdqa
(%rsi
, %rcx
), %xmm1
1582 pcmpeqb
%xmm1
, %xmm0
1583 pcmpeqb
%xmm4
, %xmm1
1585 pmovmskb
%xmm1
, %edx
1588 #ifdef USE_AS_STRNCMP
1590 jbe LABEL
(strcmp_exitz
)
1593 sub $
0x1000, %r10 /* subtract 4K from %r10 */
1594 jmp LABEL
(gobble_ashr_13
)
1597 * ashr_14 handles the following cases:
1598 * abs(str1 offset - str2 offset) = 2
1603 movdqa
(%rdi
), %xmm2
1604 movdqa
(%rsi
), %xmm1
1605 pcmpeqb
%xmm1
, %xmm0
1607 pcmpeqb
%xmm1
, %xmm2
1609 pmovmskb
%xmm2
, %r9d
1613 jnz LABEL
(less32bytes
)
1614 movdqa
(%rdi
), %xmm3
1616 UPDATE_STRNCMP_COUNTER
1619 mov $
16, %rcx
/* index for loads */
1620 mov $
14, %r9d
/* rdi bytes already examined. Used in exit code */
1622 * Setup %r10 value allows us to detect crossing a page boundary.
1623 * When %r10 goes positive we are crossing a page boundary and
1624 * need to do a nibble.
1627 and $
0xfff, %r10 /* offset into 4K page */
1628 sub $
0x1000, %r10 /* subtract 4K pagesize */
1632 LABEL
(loop_ashr_14
):
1634 jg LABEL
(nibble_ashr_14
)
1636 LABEL
(gobble_ashr_14
):
1637 movdqa
(%rsi
, %rcx
), %xmm1
1638 movdqa
(%rdi
, %rcx
), %xmm2
1645 pcmpeqb
%xmm1
, %xmm0
1646 pcmpeqb
%xmm2
, %xmm1
1648 pmovmskb
%xmm1
, %edx
1652 #ifdef USE_AS_STRNCMP
1654 jbe LABEL
(strcmp_exitz
)
1661 jg LABEL
(nibble_ashr_14
) /* cross page boundary */
1663 movdqa
(%rsi
, %rcx
), %xmm1
1664 movdqa
(%rdi
, %rcx
), %xmm2
1671 pcmpeqb
%xmm1
, %xmm0
1672 pcmpeqb
%xmm2
, %xmm1
1674 pmovmskb
%xmm1
, %edx
1678 #ifdef USE_AS_STRNCMP
1680 jbe LABEL
(strcmp_exitz
)
1685 jmp LABEL
(loop_ashr_14
)
1688 LABEL
(nibble_ashr_14
):
1690 movdqa
(%rsi
, %rcx
), %xmm1
1691 pcmpeqb
%xmm1
, %xmm0
1692 pcmpeqb
%xmm4
, %xmm1
1694 pmovmskb
%xmm1
, %edx
1697 #ifdef USE_AS_STRNCMP
1699 jbe LABEL
(strcmp_exitz
)
1702 sub $
0x1000, %r10 /* subtract 4K from %r10 */
1703 jmp LABEL
(gobble_ashr_14
)
1706 * ashr_15 handles the following cases:
1707 * abs(str1 offset - str2 offset) = 1
1712 movdqa
(%rdi
), %xmm2
1713 movdqa
(%rsi
), %xmm1
1714 pcmpeqb
%xmm1
, %xmm0
1716 pcmpeqb
%xmm1
, %xmm2
1718 pmovmskb
%xmm2
, %r9d
1722 jnz LABEL
(less32bytes
)
1724 movdqa
(%rdi
), %xmm3
1726 UPDATE_STRNCMP_COUNTER
1729 mov $
16, %rcx
/* index for loads */
1730 mov $
15, %r9d
/* rdi bytes already examined. Used in exit code */
1732 * Setup %r10 value allows us to detect crossing a page boundary.
1733 * When %r10 goes positive we are crossing a page boundary and
1734 * need to do a nibble.
1737 and $
0xfff, %r10 /* offset into 4K page */
1738 sub $
0x1000, %r10 /* subtract 4K pagesize */
1742 LABEL
(loop_ashr_15
):
1744 jg LABEL
(nibble_ashr_15
)
1746 LABEL
(gobble_ashr_15
):
1747 movdqa
(%rsi
, %rcx
), %xmm1
1748 movdqa
(%rdi
, %rcx
), %xmm2
1755 pcmpeqb
%xmm1
, %xmm0
1756 pcmpeqb
%xmm2
, %xmm1
1758 pmovmskb
%xmm1
, %edx
1762 #ifdef USE_AS_STRNCMP
1764 jbe LABEL
(strcmp_exitz
)
1771 jg LABEL
(nibble_ashr_15
) /* cross page boundary */
1773 movdqa
(%rsi
, %rcx
), %xmm1
1774 movdqa
(%rdi
, %rcx
), %xmm2
1781 pcmpeqb
%xmm1
, %xmm0
1782 pcmpeqb
%xmm2
, %xmm1
1784 pmovmskb
%xmm1
, %edx
1788 #ifdef USE_AS_STRNCMP
1790 jbe LABEL
(strcmp_exitz
)
1795 jmp LABEL
(loop_ashr_15
)
1798 LABEL
(nibble_ashr_15
):
1800 movdqa
(%rsi
, %rcx
), %xmm1
1801 pcmpeqb
%xmm1
, %xmm0
1802 pcmpeqb
%xmm4
, %xmm1
1804 pmovmskb
%xmm1
, %edx
1807 #ifdef USE_AS_STRNCMP
1809 jbe LABEL
(strcmp_exitz
)
1812 sub $
0x1000, %r10 /* subtract 4K from %r10 */
1813 jmp LABEL
(gobble_ashr_15
)
1817 lea
-16(%r9, %rcx
), %rax
/* locate the exact offset for rdi */
1819 lea
(%rdi
, %rax
), %rdi
/* locate the exact address for first operand(rdi) */
1820 lea
(%rsi
, %rcx
), %rsi
/* locate the exact address for second operand(rsi) */
1823 xchg
%rsi
, %rdi
/* recover original order according to flag(%r8d) */
1829 * Check to see if BSF is fast on this processor. If not, use a different
1832 testl $USE_BSF
,.memops_method(%rip)
1834 bsf
%rdx
, %rdx
/* find and store bit index in %rdx */
1836 #ifdef USE_AS_STRNCMP
1838 jbe LABEL
(strcmp_exitz
)
1840 xor %ecx
, %ecx
/* clear %ecx */
1841 xor %eax
, %eax
/* clear %eax */
1843 movb
(%rsi
, %rdx
), %cl
1844 movb
(%rdi
, %rdx
), %al
1849 #ifdef USE_AS_STRNCMP
1850 LABEL
(strcmp_exitz
):
1856 * This exit tail does not use the bsf instruction.
1861 jz LABEL
(next_8_bytes
)
1884 #ifdef USE_AS_STRNCMP
1886 jbe LABEL
(strcmp_exitz
)
1897 * never need to handle byte 0 for strncmpy
1898 #ifdef USE_AS_STRNCMP
1900 jbe LABEL(strcmp_exitz)
1912 #ifdef USE_AS_STRNCMP
1914 jbe LABEL
(strcmp_exitz
)
1925 #ifdef USE_AS_STRNCMP
1927 jbe LABEL
(strcmp_exitz
)
1938 #ifdef USE_AS_STRNCMP
1940 jbe LABEL
(strcmp_exitz
)
1951 #ifdef USE_AS_STRNCMP
1953 jbe LABEL
(strcmp_exitz
)
1964 #ifdef USE_AS_STRNCMP
1966 jbe LABEL
(strcmp_exitz
)
1977 #ifdef USE_AS_STRNCMP
1979 jbe LABEL
(strcmp_exitz
)
1988 LABEL
(next_8_bytes
):
1991 #ifdef USE_AS_STRNCMP
1993 jbe LABEL
(strcmp_exitz
)
2016 #ifdef USE_AS_STRNCMP
2018 jbe LABEL
(strcmp_exitz
)
2026 .pushsection .rodata
2028 LABEL
(unaligned_table
):
2029 .int LABEL(ashr_0) - LABEL(unaligned_table)
2030 .int LABEL(ashr_15) - LABEL(unaligned_table)
2031 .int LABEL(ashr_14) - LABEL(unaligned_table)
2032 .int LABEL(ashr_13) - LABEL(unaligned_table)
2033 .int LABEL(ashr_12) - LABEL(unaligned_table)
2034 .int LABEL(ashr_11) - LABEL(unaligned_table)
2035 .int LABEL(ashr_10) - LABEL(unaligned_table)
2036 .int LABEL(ashr_9) - LABEL(unaligned_table)
2037 .int LABEL(ashr_8) - LABEL(unaligned_table)
2038 .int LABEL(ashr_7) - LABEL(unaligned_table)
2039 .int LABEL(ashr_6) - LABEL(unaligned_table)
2040 .int LABEL(ashr_5) - LABEL(unaligned_table)
2041 .int LABEL(ashr_4) - LABEL(unaligned_table)
2042 .int LABEL(ashr_3) - LABEL(unaligned_table)
2043 .int LABEL(ashr_2) - LABEL(unaligned_table)
2044 .int LABEL(ashr_1) - LABEL(unaligned_table)
2046 #ifdef USE_AS_STRNCMP
2049 SET_SIZE
(strcmp
) /* (const char *, const char *) */