1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK,SSE2
3 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK,SSE41
5 define i32 @test_eq_1(<4 x i32> %A, <4 x i32> %B) {
6 ; SSE2-LABEL: test_eq_1:
8 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
9 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
10 ; SSE2-NEXT: movd %xmm0, %eax
11 ; SSE2-NEXT: notl %eax
14 ; SSE41-LABEL: test_eq_1:
16 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
17 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
18 ; SSE41-NEXT: notl %eax
20 %cmp = icmp slt <4 x i32> %A, %B
21 %sext = sext <4 x i1> %cmp to <4 x i32>
22 %cmp1 = icmp eq <4 x i32> %sext, zeroinitializer
23 %t0 = extractelement <4 x i1> %cmp1, i32 1
24 %t1 = sext i1 %t0 to i32
28 define i32 @test_ne_1(<4 x i32> %A, <4 x i32> %B) {
29 ; SSE2-LABEL: test_ne_1:
31 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
32 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
33 ; SSE2-NEXT: movd %xmm0, %eax
36 ; SSE41-LABEL: test_ne_1:
38 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
39 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
41 %cmp = icmp slt <4 x i32> %A, %B
42 %sext = sext <4 x i1> %cmp to <4 x i32>
43 %cmp1 = icmp ne <4 x i32> %sext, zeroinitializer
44 %t0 = extractelement <4 x i1> %cmp1, i32 1
45 %t1 = sext i1 %t0 to i32
49 define i32 @test_le_1(<4 x i32> %A, <4 x i32> %B) {
50 ; CHECK-LABEL: test_le_1:
52 ; CHECK-NEXT: movl $-1, %eax
54 %cmp = icmp slt <4 x i32> %A, %B
55 %sext = sext <4 x i1> %cmp to <4 x i32>
56 %cmp1 = icmp sle <4 x i32> %sext, zeroinitializer
57 %t0 = extractelement <4 x i1> %cmp1, i32 1
58 %t1 = sext i1 %t0 to i32
62 define i32 @test_ge_1(<4 x i32> %A, <4 x i32> %B) {
63 ; SSE2-LABEL: test_ge_1:
65 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
66 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
67 ; SSE2-NEXT: movd %xmm0, %eax
68 ; SSE2-NEXT: notl %eax
71 ; SSE41-LABEL: test_ge_1:
73 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
74 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
75 ; SSE41-NEXT: notl %eax
77 %cmp = icmp slt <4 x i32> %A, %B
78 %sext = sext <4 x i1> %cmp to <4 x i32>
79 %cmp1 = icmp sge <4 x i32> %sext, zeroinitializer
80 %t0 = extractelement <4 x i1> %cmp1, i32 1
81 %t1 = sext i1 %t0 to i32
85 define i32 @test_lt_1(<4 x i32> %A, <4 x i32> %B) {
86 ; SSE2-LABEL: test_lt_1:
88 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
89 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
90 ; SSE2-NEXT: movd %xmm0, %eax
93 ; SSE41-LABEL: test_lt_1:
95 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
96 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
98 %cmp = icmp slt <4 x i32> %A, %B
99 %sext = sext <4 x i1> %cmp to <4 x i32>
100 %cmp1 = icmp slt <4 x i32> %sext, zeroinitializer
101 %t0 = extractelement <4 x i1> %cmp, i32 1
102 %t1 = sext i1 %t0 to i32
106 define i32 @test_gt_1(<4 x i32> %A, <4 x i32> %B) {
107 ; CHECK-LABEL: test_gt_1:
109 ; CHECK-NEXT: xorl %eax, %eax
111 %cmp = icmp slt <4 x i32> %A, %B
112 %sext = sext <4 x i1> %cmp to <4 x i32>
113 %cmp1 = icmp sgt <4 x i32> %sext, zeroinitializer
114 %t0 = extractelement <4 x i1> %cmp1, i32 1
115 %t1 = sext i1 %t0 to i32
119 define i32 @test_eq_2(<4 x i32> %A, <4 x i32> %B) {
120 ; SSE2-LABEL: test_eq_2:
122 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
123 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
124 ; SSE2-NEXT: movd %xmm0, %eax
125 ; SSE2-NEXT: notl %eax
128 ; SSE41-LABEL: test_eq_2:
130 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
131 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
132 ; SSE41-NEXT: notl %eax
134 %cmp = icmp slt <4 x i32> %B, %A
135 %sext = sext <4 x i1> %cmp to <4 x i32>
136 %cmp1 = icmp eq <4 x i32> %sext, zeroinitializer
137 %t0 = extractelement <4 x i1> %cmp1, i32 1
138 %t1 = sext i1 %t0 to i32
142 define i32 @test_ne_2(<4 x i32> %A, <4 x i32> %B) {
143 ; SSE2-LABEL: test_ne_2:
145 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
146 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
147 ; SSE2-NEXT: movd %xmm0, %eax
150 ; SSE41-LABEL: test_ne_2:
152 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
153 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
155 %cmp = icmp slt <4 x i32> %B, %A
156 %sext = sext <4 x i1> %cmp to <4 x i32>
157 %cmp1 = icmp ne <4 x i32> %sext, zeroinitializer
158 %t0 = extractelement <4 x i1> %cmp1, i32 1
159 %t1 = sext i1 %t0 to i32
163 define i32 @test_le_2(<4 x i32> %A, <4 x i32> %B) {
164 ; SSE2-LABEL: test_le_2:
166 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
167 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
168 ; SSE2-NEXT: movd %xmm0, %eax
169 ; SSE2-NEXT: notl %eax
172 ; SSE41-LABEL: test_le_2:
174 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
175 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
176 ; SSE41-NEXT: notl %eax
178 %cmp = icmp slt <4 x i32> %B, %A
179 %sext = sext <4 x i1> %cmp to <4 x i32>
180 %cmp1 = icmp sle <4 x i32> zeroinitializer, %sext
181 %t0 = extractelement <4 x i1> %cmp1, i32 1
182 %t1 = sext i1 %t0 to i32
186 define i32 @test_ge_2(<4 x i32> %A, <4 x i32> %B) {
187 ; CHECK-LABEL: test_ge_2:
189 ; CHECK-NEXT: movl $-1, %eax
191 %cmp = icmp slt <4 x i32> %B, %A
192 %sext = sext <4 x i1> %cmp to <4 x i32>
193 %cmp1 = icmp sge <4 x i32> zeroinitializer, %sext
194 %t0 = extractelement <4 x i1> %cmp1, i32 1
195 %t1 = sext i1 %t0 to i32
199 define i32 @test_lt_2(<4 x i32> %A, <4 x i32> %B) {
200 ; SSE2-LABEL: test_lt_2:
202 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
203 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
204 ; SSE2-NEXT: movd %xmm0, %eax
207 ; SSE41-LABEL: test_lt_2:
209 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
210 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
212 %cmp = icmp slt <4 x i32> %B, %A
213 %sext = sext <4 x i1> %cmp to <4 x i32>
214 %cmp1 = icmp slt <4 x i32> zeroinitializer, %sext
215 %t0 = extractelement <4 x i1> %cmp, i32 1
216 %t1 = sext i1 %t0 to i32
220 define i32 @test_gt_2(<4 x i32> %A, <4 x i32> %B) {
221 ; SSE2-LABEL: test_gt_2:
223 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
224 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
225 ; SSE2-NEXT: movd %xmm0, %eax
228 ; SSE41-LABEL: test_gt_2:
230 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
231 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
233 %cmp = icmp slt <4 x i32> %B, %A
234 %sext = sext <4 x i1> %cmp to <4 x i32>
235 %cmp1 = icmp sgt <4 x i32> zeroinitializer, %sext
236 %t0 = extractelement <4 x i1> %cmp1, i32 1
237 %t1 = sext i1 %t0 to i32
241 ; (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
242 ; Don't combine with i1 - out of range constant
243 define void @test_i1_uge(i1 *%A2) {
244 ; CHECK-LABEL: test_i1_uge:
246 ; CHECK-NEXT: movb (%rdi), %al
247 ; CHECK-NEXT: movl %eax, %ecx
248 ; CHECK-NEXT: xorb $1, %cl
249 ; CHECK-NEXT: andb %cl, %al
250 ; CHECK-NEXT: movzbl %al, %eax
251 ; CHECK-NEXT: andl $1, %eax
252 ; CHECK-NEXT: negq %rax
253 ; CHECK-NEXT: andb $1, %cl
254 ; CHECK-NEXT: movb %cl, (%rdi,%rax)
256 %L5 = load i1, i1* %A2
257 %C3 = icmp ne i1 %L5, true
258 %C8 = icmp eq i1 %L5, false
259 %C9 = icmp ugt i1 %C3, %C8
260 %G3 = getelementptr i1, i1* %A2, i1 %C9
261 store i1 %C3, i1* %G3
265 ; This should not get folded to 0.
267 define i64 @PR40657(i8 %var2, i8 %var9) {
268 ; CHECK-LABEL: PR40657:
270 ; CHECK-NEXT: notb %sil
271 ; CHECK-NEXT: addb %dil, %sil
272 ; CHECK-NEXT: movzbl %sil, %eax
273 ; CHECK-NEXT: andl $1, %eax
275 %var6 = trunc i8 %var9 to i1
276 %var7 = trunc i8 175 to i1
277 %var3 = sub nsw i1 %var6, %var7
278 %var4 = icmp eq i64 1114591064, 1114591064
279 %var1 = udiv i1 %var3, %var4
280 %var0 = trunc i8 %var2 to i1
281 %res = sub nsw nuw i1 %var0, %var1
282 %res.cast = zext i1 %res to i64
286 ; This should not get folded to 0.
288 define i64 @PR40657_commute(i8 %var7, i8 %var8, i8 %var9) {
289 ; CHECK-LABEL: PR40657_commute:
291 ; CHECK-NEXT: subb %dil, %sil
292 ; CHECK-NEXT: subb %sil, %dl
293 ; CHECK-NEXT: subb %dl, %sil
294 ; CHECK-NEXT: xorb %dl, %sil
295 ; CHECK-NEXT: subb %sil, %dl
296 ; CHECK-NEXT: movzbl %dl, %eax
297 ; CHECK-NEXT: andl $1, %eax
299 %var4 = trunc i8 %var9 to i1
300 %var5 = trunc i8 %var8 to i1
301 %var6 = trunc i8 %var7 to i1
302 %var3 = sub nsw nuw i1 %var5, %var6
303 %var0 = sub nuw i1 %var4, %var3
304 %var2 = sub i1 %var3, %var0
305 %var1 = icmp ne i1 %var0, %var2
306 %res = sub nsw nuw i1 %var0, %var1
307 %res.cast = zext i1 %res to i64
311 define i64 @sub_to_shift_to_add(i32 %x, i32 %y, i64 %s1, i64 %s2) {
312 ; CHECK-LABEL: sub_to_shift_to_add:
314 ; CHECK-NEXT: movq %rdx, %rax
315 ; CHECK-NEXT: addl %esi, %esi
316 ; CHECK-NEXT: cmpl %esi, %edi
317 ; CHECK-NEXT: cmovneq %rcx, %rax
319 %sub = sub i32 %x, %y
320 %cmp = icmp eq i32 %sub, %y
321 %r = select i1 %cmp, i64 %s1, i64 %s2
325 define <4 x float> @sub_to_shift_to_add_vec(<4 x i32> %x, <4 x i32> %y, <4 x float> %s1, <4 x float> %s2) {
326 ; SSE2-LABEL: sub_to_shift_to_add_vec:
328 ; SSE2-NEXT: paddd %xmm1, %xmm1
329 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
330 ; SSE2-NEXT: pand %xmm1, %xmm2
331 ; SSE2-NEXT: pandn %xmm3, %xmm1
332 ; SSE2-NEXT: por %xmm2, %xmm1
333 ; SSE2-NEXT: movdqa %xmm1, %xmm0
336 ; SSE41-LABEL: sub_to_shift_to_add_vec:
338 ; SSE41-NEXT: paddd %xmm1, %xmm1
339 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
340 ; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3
341 ; SSE41-NEXT: movaps %xmm3, %xmm0
343 %sub = sub <4 x i32> %x, %y
344 %cmp = icmp eq <4 x i32> %sub, %y
345 %r = select <4 x i1> %cmp, <4 x float> %s1, <4 x float> %s2
349 define i64 @sub_constant_to_shift_to_add(i32 %x, i64 %s1, i64 %s2) {
350 ; CHECK-LABEL: sub_constant_to_shift_to_add:
352 ; CHECK-NEXT: movq %rsi, %rax
353 ; CHECK-NEXT: addl %edi, %edi
354 ; CHECK-NEXT: cmpl $42, %edi
355 ; CHECK-NEXT: cmovneq %rdx, %rax
357 %sub = sub i32 42, %x
358 %cmp = icmp eq i32 %sub, %x
359 %r = select i1 %cmp, i64 %s1, i64 %s2