1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI2,X86-NOBMI2,FALLBACK0,X86-FALLBACK0
3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI2,X64-NOBMI2,FALLBACK0,X64-FALLBACK0
5 ; https://bugs.llvm.org/show_bug.cgi?id=38149
7 ; We are truncating from wider width, and then sign-extending
8 ; back to the original width. Then we equality-comparing orig and src.
9 ; If they don't match, then we had signed truncation during truncation.
11 ; This can be expressed in a several ways in IR:
12 ; trunc + sext + icmp eq <- not canonical
13 ; shl + ashr + icmp eq
16 ; However only the simplest form (with two shifts) gets lowered best.
18 ; ---------------------------------------------------------------------------- ;
19 ; shl + ashr + icmp eq
20 ; ---------------------------------------------------------------------------- ;
22 define i1 @shifts_eqcmp_i16_i8(i16 %x) nounwind {
23 ; X86-LABEL: shifts_eqcmp_i16_i8:
25 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
26 ; X86-NEXT: movsbl %al, %ecx
27 ; X86-NEXT: cmpw %ax, %cx
31 ; X64-LABEL: shifts_eqcmp_i16_i8:
33 ; X64-NEXT: movsbl %dil, %eax
34 ; X64-NEXT: cmpw %di, %ax
37 %tmp0 = shl i16 %x, 8 ; 16-8
38 %tmp1 = ashr exact i16 %tmp0, 8 ; 16-8
39 %tmp2 = icmp eq i16 %tmp1, %x
43 define i1 @shifts_eqcmp_i32_i16(i32 %x) nounwind {
44 ; X86-LABEL: shifts_eqcmp_i32_i16:
46 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
47 ; X86-NEXT: movswl %ax, %ecx
48 ; X86-NEXT: cmpl %eax, %ecx
52 ; X64-LABEL: shifts_eqcmp_i32_i16:
54 ; X64-NEXT: movswl %di, %eax
55 ; X64-NEXT: cmpl %edi, %eax
58 %tmp0 = shl i32 %x, 16 ; 32-16
59 %tmp1 = ashr exact i32 %tmp0, 16 ; 32-16
60 %tmp2 = icmp eq i32 %tmp1, %x
64 define i1 @shifts_eqcmp_i32_i8(i32 %x) nounwind {
65 ; X86-LABEL: shifts_eqcmp_i32_i8:
67 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
68 ; X86-NEXT: movsbl %al, %ecx
69 ; X86-NEXT: cmpl %eax, %ecx
73 ; X64-LABEL: shifts_eqcmp_i32_i8:
75 ; X64-NEXT: movsbl %dil, %eax
76 ; X64-NEXT: cmpl %edi, %eax
79 %tmp0 = shl i32 %x, 24 ; 32-8
80 %tmp1 = ashr exact i32 %tmp0, 24 ; 32-8
81 %tmp2 = icmp eq i32 %tmp1, %x
85 define i1 @shifts_eqcmp_i64_i32(i64 %x) nounwind {
86 ; X86-LABEL: shifts_eqcmp_i64_i32:
88 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
89 ; X86-NEXT: sarl $31, %eax
90 ; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
94 ; X64-LABEL: shifts_eqcmp_i64_i32:
96 ; X64-NEXT: movslq %edi, %rax
97 ; X64-NEXT: cmpq %rdi, %rax
100 %tmp0 = shl i64 %x, 32 ; 64-32
101 %tmp1 = ashr exact i64 %tmp0, 32 ; 64-32
102 %tmp2 = icmp eq i64 %tmp1, %x
106 define i1 @shifts_eqcmp_i64_i16(i64 %x) nounwind {
107 ; X86-LABEL: shifts_eqcmp_i64_i16:
109 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
110 ; X86-NEXT: movswl %ax, %ecx
111 ; X86-NEXT: movl %ecx, %edx
112 ; X86-NEXT: sarl $31, %edx
113 ; X86-NEXT: xorl %eax, %ecx
114 ; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
115 ; X86-NEXT: orl %ecx, %edx
119 ; X64-LABEL: shifts_eqcmp_i64_i16:
121 ; X64-NEXT: movswq %di, %rax
122 ; X64-NEXT: cmpq %rdi, %rax
125 %tmp0 = shl i64 %x, 48 ; 64-16
126 %tmp1 = ashr exact i64 %tmp0, 48 ; 64-16
127 %tmp2 = icmp eq i64 %tmp1, %x
131 define i1 @shifts_eqcmp_i64_i8(i64 %x) nounwind {
132 ; X86-LABEL: shifts_eqcmp_i64_i8:
134 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
135 ; X86-NEXT: movsbl %al, %ecx
136 ; X86-NEXT: movl %ecx, %edx
137 ; X86-NEXT: sarl $31, %edx
138 ; X86-NEXT: xorl %eax, %ecx
139 ; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
140 ; X86-NEXT: orl %ecx, %edx
144 ; X64-LABEL: shifts_eqcmp_i64_i8:
146 ; X64-NEXT: movsbq %dil, %rax
147 ; X64-NEXT: cmpq %rdi, %rax
150 %tmp0 = shl i64 %x, 56 ; 64-8
151 %tmp1 = ashr exact i64 %tmp0, 56 ; 64-8
152 %tmp2 = icmp eq i64 %tmp1, %x
156 ; ---------------------------------------------------------------------------- ;
158 ; ---------------------------------------------------------------------------- ;
160 define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
161 ; X86-LABEL: add_ugecmp_i16_i8:
163 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
164 ; X86-NEXT: movsbl %al, %ecx
165 ; X86-NEXT: cmpw %ax, %cx
169 ; X64-LABEL: add_ugecmp_i16_i8:
171 ; X64-NEXT: movsbl %dil, %eax
172 ; X64-NEXT: cmpw %di, %ax
175 %tmp0 = add i16 %x, -128 ; ~0U << (8-1)
176 %tmp1 = icmp uge i16 %tmp0, -256 ; ~0U << 8
180 define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
181 ; X86-LABEL: add_ugecmp_i32_i16:
183 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
184 ; X86-NEXT: movswl %ax, %ecx
185 ; X86-NEXT: cmpl %eax, %ecx
189 ; X64-LABEL: add_ugecmp_i32_i16:
191 ; X64-NEXT: movswl %di, %eax
192 ; X64-NEXT: cmpl %edi, %eax
195 %tmp0 = add i32 %x, -32768 ; ~0U << (16-1)
196 %tmp1 = icmp uge i32 %tmp0, -65536 ; ~0U << 16
200 define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
201 ; X86-LABEL: add_ugecmp_i32_i8:
203 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
204 ; X86-NEXT: movsbl %al, %ecx
205 ; X86-NEXT: cmpl %eax, %ecx
209 ; X64-LABEL: add_ugecmp_i32_i8:
211 ; X64-NEXT: movsbl %dil, %eax
212 ; X64-NEXT: cmpl %edi, %eax
215 %tmp0 = add i32 %x, -128 ; ~0U << (8-1)
216 %tmp1 = icmp uge i32 %tmp0, -256 ; ~0U << 8
220 define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
221 ; X86-LABEL: add_ugecmp_i64_i32:
223 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
224 ; X86-NEXT: sarl $31, %eax
225 ; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
229 ; X64-LABEL: add_ugecmp_i64_i32:
231 ; X64-NEXT: movslq %edi, %rax
232 ; X64-NEXT: cmpq %rdi, %rax
235 %tmp0 = add i64 %x, -2147483648 ; ~0U << (32-1)
236 %tmp1 = icmp uge i64 %tmp0, -4294967296 ; ~0U << 32
240 define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
241 ; X86-LABEL: add_ugecmp_i64_i16:
243 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
244 ; X86-NEXT: movswl %ax, %ecx
245 ; X86-NEXT: xorl %ecx, %eax
246 ; X86-NEXT: sarl $31, %ecx
247 ; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
248 ; X86-NEXT: orl %eax, %ecx
252 ; X64-LABEL: add_ugecmp_i64_i16:
254 ; X64-NEXT: movswq %di, %rax
255 ; X64-NEXT: cmpq %rdi, %rax
258 %tmp0 = add i64 %x, -32768 ; ~0U << (16-1)
259 %tmp1 = icmp uge i64 %tmp0, -65536 ; ~0U << 16
263 define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
264 ; X86-LABEL: add_ugecmp_i64_i8:
266 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
267 ; X86-NEXT: movsbl %al, %ecx
268 ; X86-NEXT: xorl %ecx, %eax
269 ; X86-NEXT: sarl $31, %ecx
270 ; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
271 ; X86-NEXT: orl %eax, %ecx
275 ; X64-LABEL: add_ugecmp_i64_i8:
277 ; X64-NEXT: movsbq %dil, %rax
278 ; X64-NEXT: cmpq %rdi, %rax
281 %tmp0 = add i64 %x, -128 ; ~0U << (8-1)
282 %tmp1 = icmp uge i64 %tmp0, -256 ; ~0U << 8
286 ; Slightly more canonical variant
287 define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind {
288 ; X86-LABEL: add_ugtcmp_i16_i8:
290 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
291 ; X86-NEXT: movsbl %al, %ecx
292 ; X86-NEXT: cmpw %ax, %cx
296 ; X64-LABEL: add_ugtcmp_i16_i8:
298 ; X64-NEXT: movsbl %dil, %eax
299 ; X64-NEXT: cmpw %di, %ax
302 %tmp0 = add i16 %x, -128 ; ~0U << (8-1)
303 %tmp1 = icmp ugt i16 %tmp0, -257 ; ~0U << 8 - 1
307 ; ---------------------------------------------------------------------------- ;
309 ; ---------------------------------------------------------------------------- ;
311 define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
312 ; X86-LABEL: add_ultcmp_i16_i8:
314 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
315 ; X86-NEXT: movsbl %al, %ecx
316 ; X86-NEXT: cmpw %ax, %cx
320 ; X64-LABEL: add_ultcmp_i16_i8:
322 ; X64-NEXT: movsbl %dil, %eax
323 ; X64-NEXT: cmpw %di, %ax
326 %tmp0 = add i16 %x, 128 ; 1U << (8-1)
327 %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
331 define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
332 ; X86-LABEL: add_ultcmp_i32_i16:
334 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
335 ; X86-NEXT: movswl %ax, %ecx
336 ; X86-NEXT: cmpl %eax, %ecx
340 ; X64-LABEL: add_ultcmp_i32_i16:
342 ; X64-NEXT: movswl %di, %eax
343 ; X64-NEXT: cmpl %edi, %eax
346 %tmp0 = add i32 %x, 32768 ; 1U << (16-1)
347 %tmp1 = icmp ult i32 %tmp0, 65536 ; 1U << 16
351 define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
352 ; X86-LABEL: add_ultcmp_i32_i8:
354 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
355 ; X86-NEXT: movsbl %al, %ecx
356 ; X86-NEXT: cmpl %eax, %ecx
360 ; X64-LABEL: add_ultcmp_i32_i8:
362 ; X64-NEXT: movsbl %dil, %eax
363 ; X64-NEXT: cmpl %edi, %eax
366 %tmp0 = add i32 %x, 128 ; 1U << (8-1)
367 %tmp1 = icmp ult i32 %tmp0, 256 ; 1U << 8
371 define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
372 ; X86-LABEL: add_ultcmp_i64_i32:
374 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
375 ; X86-NEXT: sarl $31, %eax
376 ; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
380 ; X64-LABEL: add_ultcmp_i64_i32:
382 ; X64-NEXT: movslq %edi, %rax
383 ; X64-NEXT: cmpq %rdi, %rax
386 %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
387 %tmp1 = icmp ult i64 %tmp0, 4294967296 ; 1U << 32
391 define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
392 ; X86-LABEL: add_ultcmp_i64_i16:
394 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
395 ; X86-NEXT: movswl %ax, %ecx
396 ; X86-NEXT: xorl %ecx, %eax
397 ; X86-NEXT: sarl $31, %ecx
398 ; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
399 ; X86-NEXT: orl %eax, %ecx
403 ; X64-LABEL: add_ultcmp_i64_i16:
405 ; X64-NEXT: movswq %di, %rax
406 ; X64-NEXT: cmpq %rdi, %rax
409 %tmp0 = add i64 %x, 32768 ; 1U << (16-1)
410 %tmp1 = icmp ult i64 %tmp0, 65536 ; 1U << 16
414 define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
415 ; X86-LABEL: add_ultcmp_i64_i8:
417 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
418 ; X86-NEXT: movsbl %al, %ecx
419 ; X86-NEXT: xorl %ecx, %eax
420 ; X86-NEXT: sarl $31, %ecx
421 ; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
422 ; X86-NEXT: orl %eax, %ecx
426 ; X64-LABEL: add_ultcmp_i64_i8:
428 ; X64-NEXT: movsbq %dil, %rax
429 ; X64-NEXT: cmpq %rdi, %rax
432 %tmp0 = add i64 %x, 128 ; 1U << (8-1)
433 %tmp1 = icmp ult i64 %tmp0, 256 ; 1U << 8
437 ; Slightly more canonical variant
438 define i1 @add_ulecmp_i16_i8(i16 %x) nounwind {
439 ; X86-LABEL: add_ulecmp_i16_i8:
441 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
442 ; X86-NEXT: movsbl %al, %ecx
443 ; X86-NEXT: cmpw %ax, %cx
447 ; X64-LABEL: add_ulecmp_i16_i8:
449 ; X64-NEXT: movsbl %dil, %eax
450 ; X64-NEXT: cmpw %di, %ax
453 %tmp0 = add i16 %x, 128 ; 1U << (8-1)
454 %tmp1 = icmp ule i16 %tmp0, 255 ; (1U << 8) - 1
459 ; ---------------------------------------------------------------------------- ;
461 ; Adding not a constant
462 define i1 @add_ultcmp_bad_i16_i8_add(i16 %x, i16 %y) nounwind {
463 ; X86-LABEL: add_ultcmp_bad_i16_i8_add:
465 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
466 ; X86-NEXT: addw {{[0-9]+}}(%esp), %ax
467 ; X86-NEXT: movzwl %ax, %eax
468 ; X86-NEXT: cmpl $256, %eax # imm = 0x100
472 ; X64-LABEL: add_ultcmp_bad_i16_i8_add:
474 ; X64-NEXT: addl %esi, %edi
475 ; X64-NEXT: movzwl %di, %eax
476 ; X64-NEXT: cmpl $256, %eax # imm = 0x100
479 %tmp0 = add i16 %x, %y
480 %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
484 ; Comparing not with a constant
485 define i1 @add_ultcmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind {
486 ; X86-LABEL: add_ultcmp_bad_i16_i8_cmp:
488 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
489 ; X86-NEXT: subl $-128, %eax
490 ; X86-NEXT: cmpw {{[0-9]+}}(%esp), %ax
494 ; X64-LABEL: add_ultcmp_bad_i16_i8_cmp:
496 ; X64-NEXT: subl $-128, %edi
497 ; X64-NEXT: cmpw %si, %di
500 %tmp0 = add i16 %x, 128 ; 1U << (8-1)
501 %tmp1 = icmp ult i16 %tmp0, %y
505 ; Second constant is not larger than the first one
506 define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind {
507 ; X86-LABEL: add_ultcmp_bad_i8_i16:
509 ; X86-NEXT: movw $128, %ax
510 ; X86-NEXT: addw {{[0-9]+}}(%esp), %ax
514 ; X64-LABEL: add_ultcmp_bad_i8_i16:
516 ; X64-NEXT: addw $128, %di
519 %tmp0 = add i16 %x, 128 ; 1U << (8-1)
520 %tmp1 = icmp ult i16 %tmp0, 128 ; 1U << (8-1)
524 ; First constant is not power of two
525 define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
526 ; X86-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo:
528 ; X86-NEXT: movl $192, %eax
529 ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
530 ; X86-NEXT: movzwl %ax, %eax
531 ; X86-NEXT: cmpl $256, %eax # imm = 0x100
535 ; X64-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo:
537 ; X64-NEXT: addl $192, %edi
538 ; X64-NEXT: movzwl %di, %eax
539 ; X64-NEXT: cmpl $256, %eax # imm = 0x100
542 %tmp0 = add i16 %x, 192 ; (1U << (8-1)) + (1U << (8-1-1))
543 %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
547 ; Second constant is not power of two
548 define i1 @add_ultcmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind {
549 ; X86-LABEL: add_ultcmp_bad_i16_i8_c1notpoweroftwo:
551 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
552 ; X86-NEXT: subl $-128, %eax
553 ; X86-NEXT: movzwl %ax, %eax
554 ; X86-NEXT: cmpl $768, %eax # imm = 0x300
558 ; X64-LABEL: add_ultcmp_bad_i16_i8_c1notpoweroftwo:
560 ; X64-NEXT: subl $-128, %edi
561 ; X64-NEXT: movzwl %di, %eax
562 ; X64-NEXT: cmpl $768, %eax # imm = 0x300
565 %tmp0 = add i16 %x, 128 ; 1U << (8-1)
566 %tmp1 = icmp ult i16 %tmp0, 768 ; (1U << 8)) + (1U << (8+1))
570 ; Magic check fails, 64 << 1 != 256
571 define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind {
572 ; X86-LABEL: add_ultcmp_bad_i16_i8_magic:
574 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
575 ; X86-NEXT: addl $64, %eax
576 ; X86-NEXT: movzwl %ax, %eax
577 ; X86-NEXT: cmpl $256, %eax # imm = 0x100
581 ; X64-LABEL: add_ultcmp_bad_i16_i8_magic:
583 ; X64-NEXT: addl $64, %edi
584 ; X64-NEXT: movzwl %di, %eax
585 ; X64-NEXT: cmpl $256, %eax # imm = 0x100
588 %tmp0 = add i16 %x, 64 ; 1U << (8-1-1)
589 %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
593 ; Bad 'destination type'
594 define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind {
595 ; X86-LABEL: add_ultcmp_bad_i16_i4:
597 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
598 ; X86-NEXT: addl $8, %eax
599 ; X86-NEXT: cmpw $16, %ax
603 ; X64-LABEL: add_ultcmp_bad_i16_i4:
605 ; X64-NEXT: addl $8, %edi
606 ; X64-NEXT: cmpw $16, %di
609 %tmp0 = add i16 %x, 8 ; 1U << (4-1)
610 %tmp1 = icmp ult i16 %tmp0, 16 ; 1U << 4
615 define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind {
616 ; X86-LABEL: add_ultcmp_bad_i24_i8:
618 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
619 ; X86-NEXT: subl $-128, %eax
620 ; X86-NEXT: andl $16777215, %eax # imm = 0xFFFFFF
621 ; X86-NEXT: cmpl $256, %eax # imm = 0x100
625 ; X64-LABEL: add_ultcmp_bad_i24_i8:
627 ; X64-NEXT: subl $-128, %edi
628 ; X64-NEXT: andl $16777215, %edi # imm = 0xFFFFFF
629 ; X64-NEXT: cmpl $256, %edi # imm = 0x100
632 %tmp0 = add i24 %x, 128 ; 1U << (8-1)
633 %tmp1 = icmp ult i24 %tmp0, 256 ; 1U << 8
637 define i1 @add_ulecmp_bad_i16_i8(i16 %x) nounwind {
638 ; CHECK-LABEL: add_ulecmp_bad_i16_i8:
640 ; CHECK-NEXT: movb $1, %al
641 ; CHECK-NEXT: ret{{[l|q]}}
642 %tmp0 = add i16 %x, 128 ; 1U << (8-1)
643 %tmp1 = icmp ule i16 %tmp0, -1 ; when we +1 it, it will wrap to 0