1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE
3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefix=X86 --check-prefix=SSE --check-prefix=X86-SSE1
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=SSE --check-prefix=X86-SSE2
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
9 ; This tests codegen time inlining/optimization of memcmp
12 @.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
14 declare i32 @memcmp(i8*, i8*, i64)
16 define i32 @length0(i8* %X, i8* %Y) nounwind {
19 ; X86-NEXT: xorl %eax, %eax
24 ; X64-NEXT: xorl %eax, %eax
26 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 0) nounwind
30 define i1 @length0_eq(i8* %X, i8* %Y) nounwind {
31 ; X86-LABEL: length0_eq:
33 ; X86-NEXT: movb $1, %al
36 ; X64-LABEL: length0_eq:
38 ; X64-NEXT: movb $1, %al
40 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 0) nounwind
41 %c = icmp eq i32 %m, 0
45 define i32 @length2(i8* %X, i8* %Y) nounwind {
48 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
49 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
50 ; X86-NEXT: movzwl (%ecx), %ecx
51 ; X86-NEXT: movzwl (%eax), %edx
52 ; X86-NEXT: rolw $8, %cx
53 ; X86-NEXT: rolw $8, %dx
54 ; X86-NEXT: movzwl %cx, %eax
55 ; X86-NEXT: movzwl %dx, %ecx
56 ; X86-NEXT: subl %ecx, %eax
61 ; X64-NEXT: movzwl (%rdi), %eax
62 ; X64-NEXT: movzwl (%rsi), %ecx
63 ; X64-NEXT: rolw $8, %ax
64 ; X64-NEXT: rolw $8, %cx
65 ; X64-NEXT: movzwl %ax, %eax
66 ; X64-NEXT: movzwl %cx, %ecx
67 ; X64-NEXT: subl %ecx, %eax
69 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
73 define i1 @length2_eq(i8* %X, i8* %Y) nounwind {
74 ; X86-LABEL: length2_eq:
76 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
77 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
78 ; X86-NEXT: movzwl (%ecx), %ecx
79 ; X86-NEXT: cmpw (%eax), %cx
83 ; X64-LABEL: length2_eq:
85 ; X64-NEXT: movzwl (%rdi), %eax
86 ; X64-NEXT: cmpw (%rsi), %ax
89 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
90 %c = icmp eq i32 %m, 0
94 define i1 @length2_eq_const(i8* %X) nounwind {
95 ; X86-LABEL: length2_eq_const:
97 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
98 ; X86-NEXT: movzwl (%eax), %eax
99 ; X86-NEXT: cmpl $12849, %eax # imm = 0x3231
100 ; X86-NEXT: setne %al
103 ; X64-LABEL: length2_eq_const:
105 ; X64-NEXT: movzwl (%rdi), %eax
106 ; X64-NEXT: cmpl $12849, %eax # imm = 0x3231
107 ; X64-NEXT: setne %al
109 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
110 %c = icmp ne i32 %m, 0
114 define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind {
115 ; X86-LABEL: length2_eq_nobuiltin_attr:
119 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
120 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
121 ; X86-NEXT: calll memcmp
122 ; X86-NEXT: addl $16, %esp
123 ; X86-NEXT: testl %eax, %eax
127 ; X64-LABEL: length2_eq_nobuiltin_attr:
129 ; X64-NEXT: pushq %rax
130 ; X64-NEXT: movl $2, %edx
131 ; X64-NEXT: callq memcmp
132 ; X64-NEXT: testl %eax, %eax
134 ; X64-NEXT: popq %rcx
136 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind nobuiltin
137 %c = icmp eq i32 %m, 0
141 define i32 @length3(i8* %X, i8* %Y) nounwind {
142 ; X86-LABEL: length3:
143 ; X86: # BB#0: # %loadbb
144 ; X86-NEXT: pushl %esi
145 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
146 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
147 ; X86-NEXT: movzwl (%eax), %edx
148 ; X86-NEXT: movzwl (%ecx), %esi
149 ; X86-NEXT: rolw $8, %dx
150 ; X86-NEXT: rolw $8, %si
151 ; X86-NEXT: cmpw %si, %dx
152 ; X86-NEXT: jne .LBB6_1
153 ; X86-NEXT: # BB#2: # %loadbb1
154 ; X86-NEXT: movzbl 2(%eax), %eax
155 ; X86-NEXT: movzbl 2(%ecx), %ecx
156 ; X86-NEXT: subl %ecx, %eax
157 ; X86-NEXT: popl %esi
159 ; X86-NEXT: .LBB6_1: # %res_block
160 ; X86-NEXT: setae %al
161 ; X86-NEXT: movzbl %al, %eax
162 ; X86-NEXT: leal -1(%eax,%eax), %eax
163 ; X86-NEXT: popl %esi
166 ; X64-LABEL: length3:
167 ; X64: # BB#0: # %loadbb
168 ; X64-NEXT: movzwl (%rdi), %eax
169 ; X64-NEXT: movzwl (%rsi), %ecx
170 ; X64-NEXT: rolw $8, %ax
171 ; X64-NEXT: rolw $8, %cx
172 ; X64-NEXT: cmpw %cx, %ax
173 ; X64-NEXT: jne .LBB6_1
174 ; X64-NEXT: # BB#2: # %loadbb1
175 ; X64-NEXT: movzbl 2(%rdi), %eax
176 ; X64-NEXT: movzbl 2(%rsi), %ecx
177 ; X64-NEXT: subl %ecx, %eax
179 ; X64-NEXT: .LBB6_1: # %res_block
180 ; X64-NEXT: setae %al
181 ; X64-NEXT: movzbl %al, %eax
182 ; X64-NEXT: leal -1(%rax,%rax), %eax
184 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
188 define i1 @length3_eq(i8* %X, i8* %Y) nounwind {
189 ; X86-LABEL: length3_eq:
191 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
192 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
193 ; X86-NEXT: movzwl (%ecx), %edx
194 ; X86-NEXT: cmpw (%eax), %dx
195 ; X86-NEXT: jne .LBB7_2
196 ; X86-NEXT: # BB#1: # %loadbb1
197 ; X86-NEXT: movb 2(%ecx), %dl
198 ; X86-NEXT: xorl %ecx, %ecx
199 ; X86-NEXT: cmpb 2(%eax), %dl
200 ; X86-NEXT: je .LBB7_3
201 ; X86-NEXT: .LBB7_2: # %res_block
202 ; X86-NEXT: movl $1, %ecx
203 ; X86-NEXT: .LBB7_3: # %endblock
204 ; X86-NEXT: testl %ecx, %ecx
205 ; X86-NEXT: setne %al
208 ; X64-LABEL: length3_eq:
210 ; X64-NEXT: movzwl (%rdi), %eax
211 ; X64-NEXT: cmpw (%rsi), %ax
212 ; X64-NEXT: jne .LBB7_2
213 ; X64-NEXT: # BB#1: # %loadbb1
214 ; X64-NEXT: movb 2(%rdi), %cl
215 ; X64-NEXT: xorl %eax, %eax
216 ; X64-NEXT: cmpb 2(%rsi), %cl
217 ; X64-NEXT: je .LBB7_3
218 ; X64-NEXT: .LBB7_2: # %res_block
219 ; X64-NEXT: movl $1, %eax
220 ; X64-NEXT: .LBB7_3: # %endblock
221 ; X64-NEXT: testl %eax, %eax
222 ; X64-NEXT: setne %al
224 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
225 %c = icmp ne i32 %m, 0
229 define i32 @length4(i8* %X, i8* %Y) nounwind {
230 ; X86-LABEL: length4:
232 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
233 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
234 ; X86-NEXT: movl (%ecx), %ecx
235 ; X86-NEXT: movl (%eax), %edx
236 ; X86-NEXT: bswapl %ecx
237 ; X86-NEXT: bswapl %edx
238 ; X86-NEXT: xorl %eax, %eax
239 ; X86-NEXT: cmpl %edx, %ecx
241 ; X86-NEXT: sbbl $0, %eax
244 ; X64-LABEL: length4:
246 ; X64-NEXT: movl (%rdi), %ecx
247 ; X64-NEXT: movl (%rsi), %edx
248 ; X64-NEXT: bswapl %ecx
249 ; X64-NEXT: bswapl %edx
250 ; X64-NEXT: xorl %eax, %eax
251 ; X64-NEXT: cmpl %edx, %ecx
253 ; X64-NEXT: sbbl $0, %eax
255 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
259 define i1 @length4_eq(i8* %X, i8* %Y) nounwind {
260 ; X86-LABEL: length4_eq:
262 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
263 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
264 ; X86-NEXT: movl (%ecx), %ecx
265 ; X86-NEXT: cmpl (%eax), %ecx
266 ; X86-NEXT: setne %al
269 ; X64-LABEL: length4_eq:
271 ; X64-NEXT: movl (%rdi), %eax
272 ; X64-NEXT: cmpl (%rsi), %eax
273 ; X64-NEXT: setne %al
275 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
276 %c = icmp ne i32 %m, 0
280 define i1 @length4_eq_const(i8* %X) nounwind {
281 ; X86-LABEL: length4_eq_const:
283 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
284 ; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
288 ; X64-LABEL: length4_eq_const:
290 ; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
293 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 4) nounwind
294 %c = icmp eq i32 %m, 0
298 define i32 @length5(i8* %X, i8* %Y) nounwind {
299 ; X86-LABEL: length5:
300 ; X86: # BB#0: # %loadbb
301 ; X86-NEXT: pushl %esi
302 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
303 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
304 ; X86-NEXT: movl (%eax), %edx
305 ; X86-NEXT: movl (%ecx), %esi
306 ; X86-NEXT: bswapl %edx
307 ; X86-NEXT: bswapl %esi
308 ; X86-NEXT: cmpl %esi, %edx
309 ; X86-NEXT: jne .LBB11_1
310 ; X86-NEXT: # BB#2: # %loadbb1
311 ; X86-NEXT: movzbl 4(%eax), %eax
312 ; X86-NEXT: movzbl 4(%ecx), %ecx
313 ; X86-NEXT: subl %ecx, %eax
314 ; X86-NEXT: popl %esi
316 ; X86-NEXT: .LBB11_1: # %res_block
317 ; X86-NEXT: setae %al
318 ; X86-NEXT: movzbl %al, %eax
319 ; X86-NEXT: leal -1(%eax,%eax), %eax
320 ; X86-NEXT: popl %esi
323 ; X64-LABEL: length5:
324 ; X64: # BB#0: # %loadbb
325 ; X64-NEXT: movl (%rdi), %eax
326 ; X64-NEXT: movl (%rsi), %ecx
327 ; X64-NEXT: bswapl %eax
328 ; X64-NEXT: bswapl %ecx
329 ; X64-NEXT: cmpl %ecx, %eax
330 ; X64-NEXT: jne .LBB11_1
331 ; X64-NEXT: # BB#2: # %loadbb1
332 ; X64-NEXT: movzbl 4(%rdi), %eax
333 ; X64-NEXT: movzbl 4(%rsi), %ecx
334 ; X64-NEXT: subl %ecx, %eax
336 ; X64-NEXT: .LBB11_1: # %res_block
337 ; X64-NEXT: setae %al
338 ; X64-NEXT: movzbl %al, %eax
339 ; X64-NEXT: leal -1(%rax,%rax), %eax
341 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
345 define i1 @length5_eq(i8* %X, i8* %Y) nounwind {
346 ; X86-LABEL: length5_eq:
348 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
349 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
350 ; X86-NEXT: movl (%ecx), %edx
351 ; X86-NEXT: cmpl (%eax), %edx
352 ; X86-NEXT: jne .LBB12_2
353 ; X86-NEXT: # BB#1: # %loadbb1
354 ; X86-NEXT: movb 4(%ecx), %dl
355 ; X86-NEXT: xorl %ecx, %ecx
356 ; X86-NEXT: cmpb 4(%eax), %dl
357 ; X86-NEXT: je .LBB12_3
358 ; X86-NEXT: .LBB12_2: # %res_block
359 ; X86-NEXT: movl $1, %ecx
360 ; X86-NEXT: .LBB12_3: # %endblock
361 ; X86-NEXT: testl %ecx, %ecx
362 ; X86-NEXT: setne %al
365 ; X64-LABEL: length5_eq:
367 ; X64-NEXT: movl (%rdi), %eax
368 ; X64-NEXT: cmpl (%rsi), %eax
369 ; X64-NEXT: jne .LBB12_2
370 ; X64-NEXT: # BB#1: # %loadbb1
371 ; X64-NEXT: movb 4(%rdi), %cl
372 ; X64-NEXT: xorl %eax, %eax
373 ; X64-NEXT: cmpb 4(%rsi), %cl
374 ; X64-NEXT: je .LBB12_3
375 ; X64-NEXT: .LBB12_2: # %res_block
376 ; X64-NEXT: movl $1, %eax
377 ; X64-NEXT: .LBB12_3: # %endblock
378 ; X64-NEXT: testl %eax, %eax
379 ; X64-NEXT: setne %al
381 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
382 %c = icmp ne i32 %m, 0
386 define i32 @length8(i8* %X, i8* %Y) nounwind {
387 ; X86-LABEL: length8:
389 ; X86-NEXT: pushl %esi
390 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
391 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
392 ; X86-NEXT: movl (%esi), %ecx
393 ; X86-NEXT: movl (%eax), %edx
394 ; X86-NEXT: bswapl %ecx
395 ; X86-NEXT: bswapl %edx
396 ; X86-NEXT: cmpl %edx, %ecx
397 ; X86-NEXT: jne .LBB13_2
398 ; X86-NEXT: # BB#1: # %loadbb1
399 ; X86-NEXT: movl 4(%esi), %ecx
400 ; X86-NEXT: movl 4(%eax), %edx
401 ; X86-NEXT: bswapl %ecx
402 ; X86-NEXT: bswapl %edx
403 ; X86-NEXT: xorl %eax, %eax
404 ; X86-NEXT: cmpl %edx, %ecx
405 ; X86-NEXT: je .LBB13_3
406 ; X86-NEXT: .LBB13_2: # %res_block
407 ; X86-NEXT: xorl %eax, %eax
408 ; X86-NEXT: cmpl %edx, %ecx
409 ; X86-NEXT: setae %al
410 ; X86-NEXT: leal -1(%eax,%eax), %eax
411 ; X86-NEXT: .LBB13_3: # %endblock
412 ; X86-NEXT: popl %esi
415 ; X64-LABEL: length8:
417 ; X64-NEXT: movq (%rdi), %rcx
418 ; X64-NEXT: movq (%rsi), %rdx
419 ; X64-NEXT: bswapq %rcx
420 ; X64-NEXT: bswapq %rdx
421 ; X64-NEXT: xorl %eax, %eax
422 ; X64-NEXT: cmpq %rdx, %rcx
424 ; X64-NEXT: sbbl $0, %eax
426 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
430 define i1 @length8_eq(i8* %X, i8* %Y) nounwind {
431 ; X86-LABEL: length8_eq:
433 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
434 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
435 ; X86-NEXT: movl (%ecx), %edx
436 ; X86-NEXT: cmpl (%eax), %edx
437 ; X86-NEXT: jne .LBB14_2
438 ; X86-NEXT: # BB#1: # %loadbb1
439 ; X86-NEXT: movl 4(%ecx), %edx
440 ; X86-NEXT: xorl %ecx, %ecx
441 ; X86-NEXT: cmpl 4(%eax), %edx
442 ; X86-NEXT: je .LBB14_3
443 ; X86-NEXT: .LBB14_2: # %res_block
444 ; X86-NEXT: movl $1, %ecx
445 ; X86-NEXT: .LBB14_3: # %endblock
446 ; X86-NEXT: testl %ecx, %ecx
450 ; X64-LABEL: length8_eq:
452 ; X64-NEXT: movq (%rdi), %rax
453 ; X64-NEXT: cmpq (%rsi), %rax
456 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
457 %c = icmp eq i32 %m, 0
461 define i1 @length8_eq_const(i8* %X) nounwind {
462 ; X86-LABEL: length8_eq_const:
464 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
465 ; X86-NEXT: cmpl $858927408, (%ecx) # imm = 0x33323130
466 ; X86-NEXT: jne .LBB15_2
467 ; X86-NEXT: # BB#1: # %loadbb1
468 ; X86-NEXT: xorl %eax, %eax
469 ; X86-NEXT: cmpl $926299444, 4(%ecx) # imm = 0x37363534
470 ; X86-NEXT: je .LBB15_3
471 ; X86-NEXT: .LBB15_2: # %res_block
472 ; X86-NEXT: movl $1, %eax
473 ; X86-NEXT: .LBB15_3: # %endblock
474 ; X86-NEXT: testl %eax, %eax
475 ; X86-NEXT: setne %al
478 ; X64-LABEL: length8_eq_const:
480 ; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
481 ; X64-NEXT: cmpq %rax, (%rdi)
482 ; X64-NEXT: setne %al
484 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 8) nounwind
485 %c = icmp ne i32 %m, 0
489 define i1 @length12_eq(i8* %X, i8* %Y) nounwind {
490 ; X86-LABEL: length12_eq:
493 ; X86-NEXT: pushl $12
494 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
495 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
496 ; X86-NEXT: calll memcmp
497 ; X86-NEXT: addl $16, %esp
498 ; X86-NEXT: testl %eax, %eax
499 ; X86-NEXT: setne %al
502 ; X64-LABEL: length12_eq:
504 ; X64-NEXT: movq (%rdi), %rax
505 ; X64-NEXT: cmpq (%rsi), %rax
506 ; X64-NEXT: jne .LBB16_2
507 ; X64-NEXT: # BB#1: # %loadbb1
508 ; X64-NEXT: movl 8(%rdi), %ecx
509 ; X64-NEXT: xorl %eax, %eax
510 ; X64-NEXT: cmpl 8(%rsi), %ecx
511 ; X64-NEXT: je .LBB16_3
512 ; X64-NEXT: .LBB16_2: # %res_block
513 ; X64-NEXT: movl $1, %eax
514 ; X64-NEXT: .LBB16_3: # %endblock
515 ; X64-NEXT: testl %eax, %eax
516 ; X64-NEXT: setne %al
518 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
519 %c = icmp ne i32 %m, 0
523 define i32 @length12(i8* %X, i8* %Y) nounwind {
524 ; X86-LABEL: length12:
527 ; X86-NEXT: pushl $12
528 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
529 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
530 ; X86-NEXT: calll memcmp
531 ; X86-NEXT: addl $16, %esp
534 ; X64-LABEL: length12:
536 ; X64-NEXT: movq (%rdi), %rcx
537 ; X64-NEXT: movq (%rsi), %rdx
538 ; X64-NEXT: bswapq %rcx
539 ; X64-NEXT: bswapq %rdx
540 ; X64-NEXT: cmpq %rdx, %rcx
541 ; X64-NEXT: jne .LBB17_2
542 ; X64-NEXT: # BB#1: # %loadbb1
543 ; X64-NEXT: movl 8(%rdi), %ecx
544 ; X64-NEXT: movl 8(%rsi), %edx
545 ; X64-NEXT: bswapl %ecx
546 ; X64-NEXT: bswapl %edx
547 ; X64-NEXT: xorl %eax, %eax
548 ; X64-NEXT: cmpq %rdx, %rcx
549 ; X64-NEXT: je .LBB17_3
550 ; X64-NEXT: .LBB17_2: # %res_block
551 ; X64-NEXT: xorl %eax, %eax
552 ; X64-NEXT: cmpq %rdx, %rcx
553 ; X64-NEXT: setae %al
554 ; X64-NEXT: leal -1(%rax,%rax), %eax
555 ; X64-NEXT: .LBB17_3: # %endblock
557 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
561 ; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
563 define i32 @length16(i8* %X, i8* %Y) nounwind {
564 ; X86-LABEL: length16:
567 ; X86-NEXT: pushl $16
568 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
569 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
570 ; X86-NEXT: calll memcmp
571 ; X86-NEXT: addl $16, %esp
574 ; X64-LABEL: length16:
576 ; X64-NEXT: movq (%rdi), %rcx
577 ; X64-NEXT: movq (%rsi), %rdx
578 ; X64-NEXT: bswapq %rcx
579 ; X64-NEXT: bswapq %rdx
580 ; X64-NEXT: cmpq %rdx, %rcx
581 ; X64-NEXT: jne .LBB18_2
582 ; X64-NEXT: # BB#1: # %loadbb1
583 ; X64-NEXT: movq 8(%rdi), %rcx
584 ; X64-NEXT: movq 8(%rsi), %rdx
585 ; X64-NEXT: bswapq %rcx
586 ; X64-NEXT: bswapq %rdx
587 ; X64-NEXT: xorl %eax, %eax
588 ; X64-NEXT: cmpq %rdx, %rcx
589 ; X64-NEXT: je .LBB18_3
590 ; X64-NEXT: .LBB18_2: # %res_block
591 ; X64-NEXT: xorl %eax, %eax
592 ; X64-NEXT: cmpq %rdx, %rcx
593 ; X64-NEXT: setae %al
594 ; X64-NEXT: leal -1(%rax,%rax), %eax
595 ; X64-NEXT: .LBB18_3: # %endblock
597 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
601 define i1 @length16_eq(i8* %x, i8* %y) nounwind {
602 ; X86-NOSSE-LABEL: length16_eq:
604 ; X86-NOSSE-NEXT: pushl $0
605 ; X86-NOSSE-NEXT: pushl $16
606 ; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
607 ; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
608 ; X86-NOSSE-NEXT: calll memcmp
609 ; X86-NOSSE-NEXT: addl $16, %esp
610 ; X86-NOSSE-NEXT: testl %eax, %eax
611 ; X86-NOSSE-NEXT: setne %al
612 ; X86-NOSSE-NEXT: retl
614 ; X86-SSE1-LABEL: length16_eq:
616 ; X86-SSE1-NEXT: pushl $0
617 ; X86-SSE1-NEXT: pushl $16
618 ; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
619 ; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
620 ; X86-SSE1-NEXT: calll memcmp
621 ; X86-SSE1-NEXT: addl $16, %esp
622 ; X86-SSE1-NEXT: testl %eax, %eax
623 ; X86-SSE1-NEXT: setne %al
624 ; X86-SSE1-NEXT: retl
626 ; X86-SSE2-LABEL: length16_eq:
628 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
629 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
630 ; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
631 ; X86-SSE2-NEXT: movdqu (%eax), %xmm1
632 ; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
633 ; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
634 ; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
635 ; X86-SSE2-NEXT: setne %al
636 ; X86-SSE2-NEXT: retl
638 ; X64-SSE2-LABEL: length16_eq:
640 ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
641 ; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
642 ; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
643 ; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
644 ; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
645 ; X64-SSE2-NEXT: setne %al
646 ; X64-SSE2-NEXT: retq
648 ; X64-AVX-LABEL: length16_eq:
650 ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
651 ; X64-AVX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
652 ; X64-AVX-NEXT: vpmovmskb %xmm0, %eax
653 ; X64-AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
654 ; X64-AVX-NEXT: setne %al
656 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
657 %cmp = icmp ne i32 %call, 0
661 define i1 @length16_eq_const(i8* %X) nounwind {
662 ; X86-NOSSE-LABEL: length16_eq_const:
664 ; X86-NOSSE-NEXT: pushl $0
665 ; X86-NOSSE-NEXT: pushl $16
666 ; X86-NOSSE-NEXT: pushl $.L.str
667 ; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
668 ; X86-NOSSE-NEXT: calll memcmp
669 ; X86-NOSSE-NEXT: addl $16, %esp
670 ; X86-NOSSE-NEXT: testl %eax, %eax
671 ; X86-NOSSE-NEXT: sete %al
672 ; X86-NOSSE-NEXT: retl
674 ; X86-SSE1-LABEL: length16_eq_const:
676 ; X86-SSE1-NEXT: pushl $0
677 ; X86-SSE1-NEXT: pushl $16
678 ; X86-SSE1-NEXT: pushl $.L.str
679 ; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
680 ; X86-SSE1-NEXT: calll memcmp
681 ; X86-SSE1-NEXT: addl $16, %esp
682 ; X86-SSE1-NEXT: testl %eax, %eax
683 ; X86-SSE1-NEXT: sete %al
684 ; X86-SSE1-NEXT: retl
686 ; X86-SSE2-LABEL: length16_eq_const:
688 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
689 ; X86-SSE2-NEXT: movdqu (%eax), %xmm0
690 ; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
691 ; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
692 ; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
693 ; X86-SSE2-NEXT: sete %al
694 ; X86-SSE2-NEXT: retl
696 ; X64-SSE2-LABEL: length16_eq_const:
698 ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
699 ; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
700 ; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
701 ; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
702 ; X64-SSE2-NEXT: sete %al
703 ; X64-SSE2-NEXT: retq
705 ; X64-AVX-LABEL: length16_eq_const:
707 ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
708 ; X64-AVX-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
709 ; X64-AVX-NEXT: vpmovmskb %xmm0, %eax
710 ; X64-AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
711 ; X64-AVX-NEXT: sete %al
713 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind
714 %c = icmp eq i32 %m, 0
718 ; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
720 define i32 @length24(i8* %X, i8* %Y) nounwind {
721 ; X86-LABEL: length24:
724 ; X86-NEXT: pushl $24
725 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
726 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
727 ; X86-NEXT: calll memcmp
728 ; X86-NEXT: addl $16, %esp
731 ; X64-LABEL: length24:
733 ; X64-NEXT: movl $24, %edx
734 ; X64-NEXT: jmp memcmp # TAILCALL
735 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind
739 define i1 @length24_eq(i8* %x, i8* %y) nounwind {
740 ; X86-LABEL: length24_eq:
743 ; X86-NEXT: pushl $24
744 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
745 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
746 ; X86-NEXT: calll memcmp
747 ; X86-NEXT: addl $16, %esp
748 ; X86-NEXT: testl %eax, %eax
752 ; X64-SSE2-LABEL: length24_eq:
754 ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
755 ; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
756 ; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
757 ; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
758 ; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
759 ; X64-SSE2-NEXT: jne .LBB22_2
760 ; X64-SSE2-NEXT: # BB#1: # %loadbb1
761 ; X64-SSE2-NEXT: movq 16(%rdi), %rcx
762 ; X64-SSE2-NEXT: xorl %eax, %eax
763 ; X64-SSE2-NEXT: cmpq 16(%rsi), %rcx
764 ; X64-SSE2-NEXT: je .LBB22_3
765 ; X64-SSE2-NEXT: .LBB22_2: # %res_block
766 ; X64-SSE2-NEXT: movl $1, %eax
767 ; X64-SSE2-NEXT: .LBB22_3: # %endblock
768 ; X64-SSE2-NEXT: testl %eax, %eax
769 ; X64-SSE2-NEXT: sete %al
770 ; X64-SSE2-NEXT: retq
772 ; X64-AVX-LABEL: length24_eq:
774 ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
775 ; X64-AVX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
776 ; X64-AVX-NEXT: vpmovmskb %xmm0, %eax
777 ; X64-AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
778 ; X64-AVX-NEXT: jne .LBB22_2
779 ; X64-AVX-NEXT: # BB#1: # %loadbb1
780 ; X64-AVX-NEXT: movq 16(%rdi), %rcx
781 ; X64-AVX-NEXT: xorl %eax, %eax
782 ; X64-AVX-NEXT: cmpq 16(%rsi), %rcx
783 ; X64-AVX-NEXT: je .LBB22_3
784 ; X64-AVX-NEXT: .LBB22_2: # %res_block
785 ; X64-AVX-NEXT: movl $1, %eax
786 ; X64-AVX-NEXT: .LBB22_3: # %endblock
787 ; X64-AVX-NEXT: testl %eax, %eax
788 ; X64-AVX-NEXT: sete %al
790 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
791 %cmp = icmp eq i32 %call, 0
795 define i1 @length24_eq_const(i8* %X) nounwind {
796 ; X86-LABEL: length24_eq_const:
799 ; X86-NEXT: pushl $24
800 ; X86-NEXT: pushl $.L.str
801 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
802 ; X86-NEXT: calll memcmp
803 ; X86-NEXT: addl $16, %esp
804 ; X86-NEXT: testl %eax, %eax
805 ; X86-NEXT: setne %al
808 ; X64-SSE2-LABEL: length24_eq_const:
810 ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
811 ; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
812 ; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
813 ; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
814 ; X64-SSE2-NEXT: jne .LBB23_2
815 ; X64-SSE2-NEXT: # BB#1: # %loadbb1
816 ; X64-SSE2-NEXT: xorl %eax, %eax
817 ; X64-SSE2-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
818 ; X64-SSE2-NEXT: cmpq %rcx, 16(%rdi)
819 ; X64-SSE2-NEXT: je .LBB23_3
820 ; X64-SSE2-NEXT: .LBB23_2: # %res_block
821 ; X64-SSE2-NEXT: movl $1, %eax
822 ; X64-SSE2-NEXT: .LBB23_3: # %endblock
823 ; X64-SSE2-NEXT: testl %eax, %eax
824 ; X64-SSE2-NEXT: setne %al
825 ; X64-SSE2-NEXT: retq
827 ; X64-AVX-LABEL: length24_eq_const:
829 ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
830 ; X64-AVX-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
831 ; X64-AVX-NEXT: vpmovmskb %xmm0, %eax
832 ; X64-AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
833 ; X64-AVX-NEXT: jne .LBB23_2
834 ; X64-AVX-NEXT: # BB#1: # %loadbb1
835 ; X64-AVX-NEXT: xorl %eax, %eax
836 ; X64-AVX-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
837 ; X64-AVX-NEXT: cmpq %rcx, 16(%rdi)
838 ; X64-AVX-NEXT: je .LBB23_3
839 ; X64-AVX-NEXT: .LBB23_2: # %res_block
840 ; X64-AVX-NEXT: movl $1, %eax
841 ; X64-AVX-NEXT: .LBB23_3: # %endblock
842 ; X64-AVX-NEXT: testl %eax, %eax
843 ; X64-AVX-NEXT: setne %al
845 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
846 %c = icmp ne i32 %m, 0
850 define i32 @length32(i8* %X, i8* %Y) nounwind {
851 ; X86-LABEL: length32:
854 ; X86-NEXT: pushl $32
855 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
856 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
857 ; X86-NEXT: calll memcmp
858 ; X86-NEXT: addl $16, %esp
861 ; X64-LABEL: length32:
863 ; X64-NEXT: movl $32, %edx
864 ; X64-NEXT: jmp memcmp # TAILCALL
865 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
869 ; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
871 define i1 @length32_eq(i8* %x, i8* %y) nounwind {
872 ; X86-NOSSE-LABEL: length32_eq:
874 ; X86-NOSSE-NEXT: pushl $0
875 ; X86-NOSSE-NEXT: pushl $32
876 ; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
877 ; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
878 ; X86-NOSSE-NEXT: calll memcmp
879 ; X86-NOSSE-NEXT: addl $16, %esp
880 ; X86-NOSSE-NEXT: testl %eax, %eax
881 ; X86-NOSSE-NEXT: sete %al
882 ; X86-NOSSE-NEXT: retl
884 ; X86-SSE1-LABEL: length32_eq:
886 ; X86-SSE1-NEXT: pushl $0
887 ; X86-SSE1-NEXT: pushl $32
888 ; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
889 ; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
890 ; X86-SSE1-NEXT: calll memcmp
891 ; X86-SSE1-NEXT: addl $16, %esp
892 ; X86-SSE1-NEXT: testl %eax, %eax
893 ; X86-SSE1-NEXT: sete %al
894 ; X86-SSE1-NEXT: retl
896 ; X86-SSE2-LABEL: length32_eq:
898 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
899 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
900 ; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
901 ; X86-SSE2-NEXT: movdqu (%eax), %xmm1
902 ; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
903 ; X86-SSE2-NEXT: pmovmskb %xmm1, %edx
904 ; X86-SSE2-NEXT: cmpl $65535, %edx # imm = 0xFFFF
905 ; X86-SSE2-NEXT: jne .LBB25_2
906 ; X86-SSE2-NEXT: # BB#1: # %loadbb1
907 ; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm0
908 ; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
909 ; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
910 ; X86-SSE2-NEXT: pmovmskb %xmm1, %ecx
911 ; X86-SSE2-NEXT: xorl %eax, %eax
912 ; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
913 ; X86-SSE2-NEXT: je .LBB25_3
914 ; X86-SSE2-NEXT: .LBB25_2: # %res_block
915 ; X86-SSE2-NEXT: movl $1, %eax
916 ; X86-SSE2-NEXT: .LBB25_3: # %endblock
917 ; X86-SSE2-NEXT: testl %eax, %eax
918 ; X86-SSE2-NEXT: sete %al
919 ; X86-SSE2-NEXT: retl
921 ; X64-SSE2-LABEL: length32_eq:
923 ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
924 ; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
925 ; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
926 ; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
927 ; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
928 ; X64-SSE2-NEXT: jne .LBB25_2
929 ; X64-SSE2-NEXT: # BB#1: # %loadbb1
930 ; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
931 ; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm1
932 ; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
933 ; X64-SSE2-NEXT: pmovmskb %xmm1, %ecx
934 ; X64-SSE2-NEXT: xorl %eax, %eax
935 ; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
936 ; X64-SSE2-NEXT: je .LBB25_3
937 ; X64-SSE2-NEXT: .LBB25_2: # %res_block
938 ; X64-SSE2-NEXT: movl $1, %eax
939 ; X64-SSE2-NEXT: .LBB25_3: # %endblock
940 ; X64-SSE2-NEXT: testl %eax, %eax
941 ; X64-SSE2-NEXT: sete %al
942 ; X64-SSE2-NEXT: retq
944 ; X64-AVX1-LABEL: length32_eq:
946 ; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0
947 ; X64-AVX1-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
948 ; X64-AVX1-NEXT: vpmovmskb %xmm0, %eax
949 ; X64-AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
950 ; X64-AVX1-NEXT: jne .LBB25_2
951 ; X64-AVX1-NEXT: # BB#1: # %loadbb1
952 ; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm0
953 ; X64-AVX1-NEXT: vpcmpeqb 16(%rsi), %xmm0, %xmm0
954 ; X64-AVX1-NEXT: vpmovmskb %xmm0, %ecx
955 ; X64-AVX1-NEXT: xorl %eax, %eax
956 ; X64-AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
957 ; X64-AVX1-NEXT: je .LBB25_3
958 ; X64-AVX1-NEXT: .LBB25_2: # %res_block
959 ; X64-AVX1-NEXT: movl $1, %eax
960 ; X64-AVX1-NEXT: .LBB25_3: # %endblock
961 ; X64-AVX1-NEXT: testl %eax, %eax
962 ; X64-AVX1-NEXT: sete %al
963 ; X64-AVX1-NEXT: retq
965 ; X64-AVX2-LABEL: length32_eq:
967 ; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
968 ; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
969 ; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
970 ; X64-AVX2-NEXT: cmpl $-1, %eax
971 ; X64-AVX2-NEXT: sete %al
972 ; X64-AVX2-NEXT: vzeroupper
973 ; X64-AVX2-NEXT: retq
974 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
975 %cmp = icmp eq i32 %call, 0
979 define i1 @length32_eq_const(i8* %X) nounwind {
980 ; X86-NOSSE-LABEL: length32_eq_const:
982 ; X86-NOSSE-NEXT: pushl $0
983 ; X86-NOSSE-NEXT: pushl $32
984 ; X86-NOSSE-NEXT: pushl $.L.str
985 ; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
986 ; X86-NOSSE-NEXT: calll memcmp
987 ; X86-NOSSE-NEXT: addl $16, %esp
988 ; X86-NOSSE-NEXT: testl %eax, %eax
989 ; X86-NOSSE-NEXT: setne %al
990 ; X86-NOSSE-NEXT: retl
992 ; X86-SSE1-LABEL: length32_eq_const:
994 ; X86-SSE1-NEXT: pushl $0
995 ; X86-SSE1-NEXT: pushl $32
996 ; X86-SSE1-NEXT: pushl $.L.str
997 ; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
998 ; X86-SSE1-NEXT: calll memcmp
999 ; X86-SSE1-NEXT: addl $16, %esp
1000 ; X86-SSE1-NEXT: testl %eax, %eax
1001 ; X86-SSE1-NEXT: setne %al
1002 ; X86-SSE1-NEXT: retl
1004 ; X86-SSE2-LABEL: length32_eq_const:
1006 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1007 ; X86-SSE2-NEXT: movdqu (%eax), %xmm0
1008 ; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
1009 ; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
1010 ; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
1011 ; X86-SSE2-NEXT: jne .LBB26_2
1012 ; X86-SSE2-NEXT: # BB#1: # %loadbb1
1013 ; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
1014 ; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
1015 ; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
1016 ; X86-SSE2-NEXT: xorl %eax, %eax
1017 ; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
1018 ; X86-SSE2-NEXT: je .LBB26_3
1019 ; X86-SSE2-NEXT: .LBB26_2: # %res_block
1020 ; X86-SSE2-NEXT: movl $1, %eax
1021 ; X86-SSE2-NEXT: .LBB26_3: # %endblock
1022 ; X86-SSE2-NEXT: testl %eax, %eax
1023 ; X86-SSE2-NEXT: setne %al
1024 ; X86-SSE2-NEXT: retl
1026 ; X64-SSE2-LABEL: length32_eq_const:
1028 ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
1029 ; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
1030 ; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
1031 ; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1032 ; X64-SSE2-NEXT: jne .LBB26_2
1033 ; X64-SSE2-NEXT: # BB#1: # %loadbb1
1034 ; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
1035 ; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
1036 ; X64-SSE2-NEXT: pmovmskb %xmm0, %ecx
1037 ; X64-SSE2-NEXT: xorl %eax, %eax
1038 ; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
1039 ; X64-SSE2-NEXT: je .LBB26_3
1040 ; X64-SSE2-NEXT: .LBB26_2: # %res_block
1041 ; X64-SSE2-NEXT: movl $1, %eax
1042 ; X64-SSE2-NEXT: .LBB26_3: # %endblock
1043 ; X64-SSE2-NEXT: testl %eax, %eax
1044 ; X64-SSE2-NEXT: setne %al
1045 ; X64-SSE2-NEXT: retq
1047 ; X64-AVX1-LABEL: length32_eq_const:
1049 ; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0
1050 ; X64-AVX1-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
1051 ; X64-AVX1-NEXT: vpmovmskb %xmm0, %eax
1052 ; X64-AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1053 ; X64-AVX1-NEXT: jne .LBB26_2
1054 ; X64-AVX1-NEXT: # BB#1: # %loadbb1
1055 ; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm0
1056 ; X64-AVX1-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
1057 ; X64-AVX1-NEXT: vpmovmskb %xmm0, %ecx
1058 ; X64-AVX1-NEXT: xorl %eax, %eax
1059 ; X64-AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
1060 ; X64-AVX1-NEXT: je .LBB26_3
1061 ; X64-AVX1-NEXT: .LBB26_2: # %res_block
1062 ; X64-AVX1-NEXT: movl $1, %eax
1063 ; X64-AVX1-NEXT: .LBB26_3: # %endblock
1064 ; X64-AVX1-NEXT: testl %eax, %eax
1065 ; X64-AVX1-NEXT: setne %al
1066 ; X64-AVX1-NEXT: retq
1068 ; X64-AVX2-LABEL: length32_eq_const:
1070 ; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
1071 ; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
1072 ; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
1073 ; X64-AVX2-NEXT: cmpl $-1, %eax
1074 ; X64-AVX2-NEXT: setne %al
1075 ; X64-AVX2-NEXT: vzeroupper
1076 ; X64-AVX2-NEXT: retq
1077 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind
1078 %c = icmp ne i32 %m, 0
1082 define i32 @length64(i8* %X, i8* %Y) nounwind {
1083 ; X86-LABEL: length64:
1085 ; X86-NEXT: pushl $0
1086 ; X86-NEXT: pushl $64
1087 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1088 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1089 ; X86-NEXT: calll memcmp
1090 ; X86-NEXT: addl $16, %esp
1093 ; X64-LABEL: length64:
1095 ; X64-NEXT: movl $64, %edx
1096 ; X64-NEXT: jmp memcmp # TAILCALL
1097 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind
1101 define i1 @length64_eq(i8* %x, i8* %y) nounwind {
1102 ; X86-LABEL: length64_eq:
1104 ; X86-NEXT: pushl $0
1105 ; X86-NEXT: pushl $64
1106 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1107 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1108 ; X86-NEXT: calll memcmp
1109 ; X86-NEXT: addl $16, %esp
1110 ; X86-NEXT: testl %eax, %eax
1111 ; X86-NEXT: setne %al
1114 ; X64-SSE2-LABEL: length64_eq:
1116 ; X64-SSE2-NEXT: pushq %rax
1117 ; X64-SSE2-NEXT: movl $64, %edx
1118 ; X64-SSE2-NEXT: callq memcmp
1119 ; X64-SSE2-NEXT: testl %eax, %eax
1120 ; X64-SSE2-NEXT: setne %al
1121 ; X64-SSE2-NEXT: popq %rcx
1122 ; X64-SSE2-NEXT: retq
1124 ; X64-AVX1-LABEL: length64_eq:
1126 ; X64-AVX1-NEXT: pushq %rax
1127 ; X64-AVX1-NEXT: movl $64, %edx
1128 ; X64-AVX1-NEXT: callq memcmp
1129 ; X64-AVX1-NEXT: testl %eax, %eax
1130 ; X64-AVX1-NEXT: setne %al
1131 ; X64-AVX1-NEXT: popq %rcx
1132 ; X64-AVX1-NEXT: retq
1134 ; X64-AVX2-LABEL: length64_eq:
1136 ; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
1137 ; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
1138 ; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
1139 ; X64-AVX2-NEXT: cmpl $-1, %eax
1140 ; X64-AVX2-NEXT: jne .LBB28_2
1141 ; X64-AVX2-NEXT: # BB#1: # %loadbb1
1142 ; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
1143 ; X64-AVX2-NEXT: vpcmpeqb 32(%rsi), %ymm0, %ymm0
1144 ; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
1145 ; X64-AVX2-NEXT: xorl %eax, %eax
1146 ; X64-AVX2-NEXT: cmpl $-1, %ecx
1147 ; X64-AVX2-NEXT: je .LBB28_3
1148 ; X64-AVX2-NEXT: .LBB28_2: # %res_block
1149 ; X64-AVX2-NEXT: movl $1, %eax
1150 ; X64-AVX2-NEXT: .LBB28_3: # %endblock
1151 ; X64-AVX2-NEXT: testl %eax, %eax
1152 ; X64-AVX2-NEXT: setne %al
1153 ; X64-AVX2-NEXT: vzeroupper
1154 ; X64-AVX2-NEXT: retq
1155 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
1156 %cmp = icmp ne i32 %call, 0
1160 define i1 @length64_eq_const(i8* %X) nounwind {
1161 ; X86-LABEL: length64_eq_const:
1163 ; X86-NEXT: pushl $0
1164 ; X86-NEXT: pushl $64
1165 ; X86-NEXT: pushl $.L.str
1166 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1167 ; X86-NEXT: calll memcmp
1168 ; X86-NEXT: addl $16, %esp
1169 ; X86-NEXT: testl %eax, %eax
1170 ; X86-NEXT: sete %al
1173 ; X64-SSE2-LABEL: length64_eq_const:
1175 ; X64-SSE2-NEXT: pushq %rax
1176 ; X64-SSE2-NEXT: movl $.L.str, %esi
1177 ; X64-SSE2-NEXT: movl $64, %edx
1178 ; X64-SSE2-NEXT: callq memcmp
1179 ; X64-SSE2-NEXT: testl %eax, %eax
1180 ; X64-SSE2-NEXT: sete %al
1181 ; X64-SSE2-NEXT: popq %rcx
1182 ; X64-SSE2-NEXT: retq
1184 ; X64-AVX1-LABEL: length64_eq_const:
1186 ; X64-AVX1-NEXT: pushq %rax
1187 ; X64-AVX1-NEXT: movl $.L.str, %esi
1188 ; X64-AVX1-NEXT: movl $64, %edx
1189 ; X64-AVX1-NEXT: callq memcmp
1190 ; X64-AVX1-NEXT: testl %eax, %eax
1191 ; X64-AVX1-NEXT: sete %al
1192 ; X64-AVX1-NEXT: popq %rcx
1193 ; X64-AVX1-NEXT: retq
1195 ; X64-AVX2-LABEL: length64_eq_const:
1197 ; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
1198 ; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
1199 ; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
1200 ; X64-AVX2-NEXT: cmpl $-1, %eax
1201 ; X64-AVX2-NEXT: jne .LBB29_2
1202 ; X64-AVX2-NEXT: # BB#1: # %loadbb1
1203 ; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
1204 ; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
1205 ; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
1206 ; X64-AVX2-NEXT: xorl %eax, %eax
1207 ; X64-AVX2-NEXT: cmpl $-1, %ecx
1208 ; X64-AVX2-NEXT: je .LBB29_3
1209 ; X64-AVX2-NEXT: .LBB29_2: # %res_block
1210 ; X64-AVX2-NEXT: movl $1, %eax
1211 ; X64-AVX2-NEXT: .LBB29_3: # %endblock
1212 ; X64-AVX2-NEXT: testl %eax, %eax
1213 ; X64-AVX2-NEXT: sete %al
1214 ; X64-AVX2-NEXT: vzeroupper
1215 ; X64-AVX2-NEXT: retq
1216 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
1217 %c = icmp eq i32 %m, 0
1221 ; This checks that we do not do stupid things with huge sizes.
1222 define i32 @huge_length(i8* %X, i8* %Y) nounwind {
1223 ; X86-LABEL: huge_length:
1225 ; X86-NEXT: pushl $2147483647 # imm = 0x7FFFFFFF
1226 ; X86-NEXT: pushl $-1
1227 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1228 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1229 ; X86-NEXT: calll memcmp
1230 ; X86-NEXT: addl $16, %esp
1233 ; X64-LABEL: huge_length:
1235 ; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF
1236 ; X64-NEXT: jmp memcmp # TAILCALL
1237 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 9223372036854775807) nounwind