1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
5 declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
6 declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
7 declare { i8, i1 } @llvm.ssub.with.overflow.i8(i8, i8) nounwind readnone
8 declare { i8, i1 } @llvm.usub.with.overflow.i8(i8, i8) nounwind readnone
11 declare {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
12 declare {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
13 declare { <4 x i8>, <4 x i1> } @llvm.ssub.with.overflow.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
14 declare { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> , <4 x i8>) nounwind readnone
16 ; fold (ssub x, 0) -> x
17 define i32 @combine_ssub_zero(i32 %a0, i32 %a1) {
18 ; CHECK-LABEL: combine_ssub_zero:
20 ; CHECK-NEXT: movl %edi, %eax
22 %1 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a0, i32 zeroinitializer)
23 %2 = extractvalue {i32, i1} %1, 0
24 %3 = extractvalue {i32, i1} %1, 1
25 %4 = select i1 %3, i32 %a1, i32 %2
29 define <4 x i32> @combine_vec_ssub_zero(<4 x i32> %a0, <4 x i32> %a1) {
30 ; CHECK-LABEL: combine_vec_ssub_zero:
33 %1 = call {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer)
34 %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
35 %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1
36 %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2
40 ; fold (usub x, 0) -> x
41 define i32 @combine_usub_zero(i32 %a0, i32 %a1) {
42 ; CHECK-LABEL: combine_usub_zero:
44 ; CHECK-NEXT: movl %edi, %eax
46 %1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a0, i32 zeroinitializer)
47 %2 = extractvalue {i32, i1} %1, 0
48 %3 = extractvalue {i32, i1} %1, 1
49 %4 = select i1 %3, i32 %a1, i32 %2
53 define <4 x i32> @combine_vec_usub_zero(<4 x i32> %a0, <4 x i32> %a1) {
54 ; CHECK-LABEL: combine_vec_usub_zero:
57 %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer)
58 %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
59 %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1
60 %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2
64 ; fold (ssub x, x) -> 0
65 define i32 @combine_ssub_self(i32 %a0, i32 %a1) {
66 ; CHECK-LABEL: combine_ssub_self:
68 ; CHECK-NEXT: xorl %eax, %eax
70 %1 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a0, i32 %a0)
71 %2 = extractvalue {i32, i1} %1, 0
72 %3 = extractvalue {i32, i1} %1, 1
73 %4 = select i1 %3, i32 %a1, i32 %2
77 define <4 x i32> @combine_vec_ssub_self(<4 x i32> %a0, <4 x i32> %a1) {
78 ; SSE-LABEL: combine_vec_ssub_self:
80 ; SSE-NEXT: xorps %xmm0, %xmm0
83 ; AVX-LABEL: combine_vec_ssub_self:
85 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
87 %1 = call {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> %a0)
88 %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
89 %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1
90 %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2
94 ; fold (usub x, x) -> x
95 define i32 @combine_usub_self(i32 %a0, i32 %a1) {
96 ; CHECK-LABEL: combine_usub_self:
98 ; CHECK-NEXT: xorl %eax, %eax
100 %1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a0, i32 %a0)
101 %2 = extractvalue {i32, i1} %1, 0
102 %3 = extractvalue {i32, i1} %1, 1
103 %4 = select i1 %3, i32 %a1, i32 %2
107 define <4 x i32> @combine_vec_usub_self(<4 x i32> %a0, <4 x i32> %a1) {
108 ; SSE-LABEL: combine_vec_usub_self:
110 ; SSE-NEXT: xorps %xmm0, %xmm0
113 ; AVX-LABEL: combine_vec_usub_self:
115 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
117 %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> %a0)
118 %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
119 %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1
120 %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2
124 ; fold (usub -1, x) -> (xor x, -1) + no borrow
125 define i32 @combine_usub_negone(i32 %a0, i32 %a1) {
126 ; CHECK-LABEL: combine_usub_negone:
128 ; CHECK-NEXT: movl %edi, %eax
129 ; CHECK-NEXT: notl %eax
131 %1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 -1, i32 %a0)
132 %2 = extractvalue {i32, i1} %1, 0
133 %3 = extractvalue {i32, i1} %1, 1
134 %4 = select i1 %3, i32 %a1, i32 %2
138 define <4 x i32> @combine_vec_usub_negone(<4 x i32> %a0, <4 x i32> %a1) {
139 ; SSE-LABEL: combine_vec_usub_negone:
141 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
142 ; SSE-NEXT: pxor %xmm1, %xmm0
145 ; AVX-LABEL: combine_vec_usub_negone:
147 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
148 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
150 %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a0)
151 %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
152 %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1
153 %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2
157 define { i32, i1 } @combine_usub_nuw(i32 %a, i32 %b) {
158 ; CHECK-LABEL: combine_usub_nuw:
160 ; CHECK-NEXT: movl %edi, %eax
161 ; CHECK-NEXT: orl $-2147483648, %eax # imm = 0x80000000
162 ; CHECK-NEXT: andl $2147483647, %esi # imm = 0x7FFFFFFF
163 ; CHECK-NEXT: subl %esi, %eax
164 ; CHECK-NEXT: xorl %edx, %edx
166 %aa = or i32 %a, 2147483648
167 %bb = and i32 %b, 2147483647
168 %x = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %aa, i32 %bb)
172 define { i8, i1 } @usub_always_overflow(i8 %x) nounwind {
173 ; CHECK-LABEL: usub_always_overflow:
175 ; CHECK-NEXT: orb $64, %dil
176 ; CHECK-NEXT: movb $63, %al
177 ; CHECK-NEXT: subb %dil, %al
178 ; CHECK-NEXT: setb %dl
181 %a = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 63, i8 %y)
185 define { i8, i1 } @ssub_always_overflow(i8 %x) nounwind {
186 ; CHECK-LABEL: ssub_always_overflow:
188 ; CHECK-NEXT: cmpb $30, %dil
189 ; CHECK-NEXT: movl $29, %ecx
190 ; CHECK-NEXT: cmovgel %edi, %ecx
191 ; CHECK-NEXT: movb $-100, %al
192 ; CHECK-NEXT: subb %cl, %al
193 ; CHECK-NEXT: seto %dl
195 %c = icmp sgt i8 %x, 29
196 %y = select i1 %c, i8 %x, i8 29
197 %a = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 -100, i8 %y)
201 define { <4 x i8>, <4 x i1> } @always_usub_const_vector() nounwind {
202 ; SSE-LABEL: always_usub_const_vector:
204 ; SSE-NEXT: pcmpeqd %xmm0, %xmm0
205 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
208 ; AVX-LABEL: always_usub_const_vector:
210 ; AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
211 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
213 %x = call { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> <i8 0, i8 0, i8 0, i8 0>, <4 x i8> <i8 1, i8 1, i8 1, i8 1>)
214 ret { <4 x i8>, <4 x i1> } %x
217 define { <4 x i8>, <4 x i1> } @never_usub_const_vector() nounwind {
218 ; SSE-LABEL: never_usub_const_vector:
220 ; SSE-NEXT: movaps {{.*#+}} xmm0 = <127,255,0,254,u,u,u,u,u,u,u,u,u,u,u,u>
221 ; SSE-NEXT: xorps %xmm1, %xmm1
224 ; AVX-LABEL: never_usub_const_vector:
226 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [127,255,0,254,127,255,0,254,127,255,0,254,127,255,0,254]
227 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
229 %x = call { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> <i8 255, i8 255, i8 255, i8 255>, <4 x i8> <i8 128, i8 0, i8 255, i8 1>)
230 ret { <4 x i8>, <4 x i1> } %x