1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2 < %s | FileCheck %s --check-prefixes=X86,X86-SSE2,X86-BMI1
3 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi < %s | FileCheck %s --check-prefixes=X86,X86-SSE2,X86-BMI1
4 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-SSE2,X86-BMI2
5 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2,+avx2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2,AVX2
6 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2 < %s | FileCheck %s --check-prefixes=X64,X64-SSE2,X64-BMI1
7 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi < %s | FileCheck %s --check-prefixes=X64,X64-SSE2,X64-BMI1
8 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-SSE2,X64-BMI2
9 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2,+avx2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,AVX2
11 ; We are looking for the following pattern here:
12 ; (X & (C << Y)) ==/!= 0
13 ; It may be optimal to hoist the constant:
14 ; ((X l>> Y) & C) ==/!= 0
16 ;------------------------------------------------------------------------------;
18 ;------------------------------------------------------------------------------;
22 define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
23 ; X86-LABEL: scalar_i8_signbit_eq:
25 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
26 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
27 ; X86-NEXT: shrb %cl, %al
28 ; X86-NEXT: testb $-128, %al
32 ; X64-LABEL: scalar_i8_signbit_eq:
34 ; X64-NEXT: movl %esi, %ecx
35 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
36 ; X64-NEXT: shrb %cl, %dil
37 ; X64-NEXT: testb $-128, %dil
42 %res = icmp eq i8 %t1, 0
46 define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
47 ; X86-LABEL: scalar_i8_lowestbit_eq:
49 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
50 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
51 ; X86-NEXT: btl %eax, %ecx
55 ; X64-LABEL: scalar_i8_lowestbit_eq:
57 ; X64-NEXT: btl %esi, %edi
62 %res = icmp eq i8 %t1, 0
66 define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
67 ; X86-LABEL: scalar_i8_bitsinmiddle_eq:
69 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
70 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
71 ; X86-NEXT: shrb %cl, %al
72 ; X86-NEXT: testb $24, %al
76 ; X64-LABEL: scalar_i8_bitsinmiddle_eq:
78 ; X64-NEXT: movl %esi, %ecx
79 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
80 ; X64-NEXT: shrb %cl, %dil
81 ; X64-NEXT: testb $24, %dil
86 %res = icmp eq i8 %t1, 0
92 define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
93 ; X86-BMI1-LABEL: scalar_i16_signbit_eq:
95 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
96 ; X86-BMI1-NEXT: movzwl {{[0-9]+}}(%esp), %eax
97 ; X86-BMI1-NEXT: shrl %cl, %eax
98 ; X86-BMI1-NEXT: testl $32768, %eax # imm = 0x8000
99 ; X86-BMI1-NEXT: sete %al
100 ; X86-BMI1-NEXT: retl
102 ; X86-BMI2-LABEL: scalar_i16_signbit_eq:
104 ; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
105 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
106 ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
107 ; X86-BMI2-NEXT: testl $32768, %eax # imm = 0x8000
108 ; X86-BMI2-NEXT: sete %al
109 ; X86-BMI2-NEXT: retl
111 ; X64-BMI1-LABEL: scalar_i16_signbit_eq:
113 ; X64-BMI1-NEXT: movl %esi, %ecx
114 ; X64-BMI1-NEXT: movzwl %di, %eax
115 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
116 ; X64-BMI1-NEXT: shrl %cl, %eax
117 ; X64-BMI1-NEXT: testl $32768, %eax # imm = 0x8000
118 ; X64-BMI1-NEXT: sete %al
119 ; X64-BMI1-NEXT: retq
121 ; X64-BMI2-LABEL: scalar_i16_signbit_eq:
123 ; X64-BMI2-NEXT: movzwl %di, %eax
124 ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
125 ; X64-BMI2-NEXT: testl $32768, %eax # imm = 0x8000
126 ; X64-BMI2-NEXT: sete %al
127 ; X64-BMI2-NEXT: retq
128 %t0 = shl i16 32768, %y
129 %t1 = and i16 %t0, %x
130 %res = icmp eq i16 %t1, 0
134 define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
135 ; X86-LABEL: scalar_i16_lowestbit_eq:
137 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
138 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
139 ; X86-NEXT: btl %eax, %ecx
140 ; X86-NEXT: setae %al
143 ; X64-LABEL: scalar_i16_lowestbit_eq:
145 ; X64-NEXT: btl %esi, %edi
146 ; X64-NEXT: setae %al
149 %t1 = and i16 %t0, %x
150 %res = icmp eq i16 %t1, 0
154 define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
155 ; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
157 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
158 ; X86-BMI1-NEXT: movzwl {{[0-9]+}}(%esp), %eax
159 ; X86-BMI1-NEXT: shrl %cl, %eax
160 ; X86-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0
161 ; X86-BMI1-NEXT: sete %al
162 ; X86-BMI1-NEXT: retl
164 ; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
166 ; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
167 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
168 ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
169 ; X86-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0
170 ; X86-BMI2-NEXT: sete %al
171 ; X86-BMI2-NEXT: retl
173 ; X64-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
175 ; X64-BMI1-NEXT: movl %esi, %ecx
176 ; X64-BMI1-NEXT: movzwl %di, %eax
177 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
178 ; X64-BMI1-NEXT: shrl %cl, %eax
179 ; X64-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0
180 ; X64-BMI1-NEXT: sete %al
181 ; X64-BMI1-NEXT: retq
183 ; X64-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
185 ; X64-BMI2-NEXT: movzwl %di, %eax
186 ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
187 ; X64-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0
188 ; X64-BMI2-NEXT: sete %al
189 ; X64-BMI2-NEXT: retq
190 %t0 = shl i16 4080, %y
191 %t1 = and i16 %t0, %x
192 %res = icmp eq i16 %t1, 0
198 define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
199 ; X86-BMI1-LABEL: scalar_i32_signbit_eq:
201 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
202 ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
203 ; X86-BMI1-NEXT: shrl %cl, %eax
204 ; X86-BMI1-NEXT: testl $-2147483648, %eax # imm = 0x80000000
205 ; X86-BMI1-NEXT: sete %al
206 ; X86-BMI1-NEXT: retl
208 ; X86-BMI2-LABEL: scalar_i32_signbit_eq:
210 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
211 ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax
212 ; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
213 ; X86-BMI2-NEXT: sete %al
214 ; X86-BMI2-NEXT: retl
216 ; X64-BMI1-LABEL: scalar_i32_signbit_eq:
218 ; X64-BMI1-NEXT: movl %esi, %ecx
219 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
220 ; X64-BMI1-NEXT: shrl %cl, %edi
221 ; X64-BMI1-NEXT: testl $-2147483648, %edi # imm = 0x80000000
222 ; X64-BMI1-NEXT: sete %al
223 ; X64-BMI1-NEXT: retq
225 ; X64-BMI2-LABEL: scalar_i32_signbit_eq:
227 ; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
228 ; X64-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
229 ; X64-BMI2-NEXT: sete %al
230 ; X64-BMI2-NEXT: retq
231 %t0 = shl i32 2147483648, %y
232 %t1 = and i32 %t0, %x
233 %res = icmp eq i32 %t1, 0
237 define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
238 ; X86-LABEL: scalar_i32_lowestbit_eq:
240 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
241 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
242 ; X86-NEXT: btl %ecx, %eax
243 ; X86-NEXT: setae %al
246 ; X64-LABEL: scalar_i32_lowestbit_eq:
248 ; X64-NEXT: btl %esi, %edi
249 ; X64-NEXT: setae %al
252 %t1 = and i32 %t0, %x
253 %res = icmp eq i32 %t1, 0
257 define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
258 ; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
260 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
261 ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
262 ; X86-BMI1-NEXT: shrl %cl, %eax
263 ; X86-BMI1-NEXT: testl $16776960, %eax # imm = 0xFFFF00
264 ; X86-BMI1-NEXT: sete %al
265 ; X86-BMI1-NEXT: retl
267 ; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
269 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
270 ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax
271 ; X86-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00
272 ; X86-BMI2-NEXT: sete %al
273 ; X86-BMI2-NEXT: retl
275 ; X64-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
277 ; X64-BMI1-NEXT: movl %esi, %ecx
278 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
279 ; X64-BMI1-NEXT: shrl %cl, %edi
280 ; X64-BMI1-NEXT: testl $16776960, %edi # imm = 0xFFFF00
281 ; X64-BMI1-NEXT: sete %al
282 ; X64-BMI1-NEXT: retq
284 ; X64-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
286 ; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
287 ; X64-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00
288 ; X64-BMI2-NEXT: sete %al
289 ; X64-BMI2-NEXT: retq
290 %t0 = shl i32 16776960, %y
291 %t1 = and i32 %t0, %x
292 %res = icmp eq i32 %t1, 0
298 define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
299 ; X86-BMI1-LABEL: scalar_i64_signbit_eq:
301 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
302 ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
303 ; X86-BMI1-NEXT: shrl %cl, %eax
304 ; X86-BMI1-NEXT: xorl %edx, %edx
305 ; X86-BMI1-NEXT: testb $32, %cl
306 ; X86-BMI1-NEXT: cmovel %eax, %edx
307 ; X86-BMI1-NEXT: testl $-2147483648, %edx # imm = 0x80000000
308 ; X86-BMI1-NEXT: sete %al
309 ; X86-BMI1-NEXT: retl
311 ; X86-BMI2-LABEL: scalar_i64_signbit_eq:
313 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
314 ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
315 ; X86-BMI2-NEXT: xorl %edx, %edx
316 ; X86-BMI2-NEXT: testb $32, %al
317 ; X86-BMI2-NEXT: cmovel %ecx, %edx
318 ; X86-BMI2-NEXT: testl $-2147483648, %edx # imm = 0x80000000
319 ; X86-BMI2-NEXT: sete %al
320 ; X86-BMI2-NEXT: retl
322 ; X64-BMI1-LABEL: scalar_i64_signbit_eq:
324 ; X64-BMI1-NEXT: movq %rsi, %rcx
325 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
326 ; X64-BMI1-NEXT: shrq %cl, %rdi
327 ; X64-BMI1-NEXT: btq $63, %rdi
328 ; X64-BMI1-NEXT: setae %al
329 ; X64-BMI1-NEXT: retq
331 ; X64-BMI2-LABEL: scalar_i64_signbit_eq:
333 ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
334 ; X64-BMI2-NEXT: btq $63, %rax
335 ; X64-BMI2-NEXT: setae %al
336 ; X64-BMI2-NEXT: retq
337 %t0 = shl i64 9223372036854775808, %y
338 %t1 = and i64 %t0, %x
339 %res = icmp eq i64 %t1, 0
343 define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
344 ; X86-BMI1-LABEL: scalar_i64_lowestbit_eq:
346 ; X86-BMI1-NEXT: pushl %esi
347 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
348 ; X86-BMI1-NEXT: movl $1, %eax
349 ; X86-BMI1-NEXT: xorl %esi, %esi
350 ; X86-BMI1-NEXT: xorl %edx, %edx
351 ; X86-BMI1-NEXT: shldl %cl, %eax, %edx
352 ; X86-BMI1-NEXT: shll %cl, %eax
353 ; X86-BMI1-NEXT: testb $32, %cl
354 ; X86-BMI1-NEXT: cmovnel %eax, %edx
355 ; X86-BMI1-NEXT: cmovnel %esi, %eax
356 ; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx
357 ; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax
358 ; X86-BMI1-NEXT: orl %edx, %eax
359 ; X86-BMI1-NEXT: sete %al
360 ; X86-BMI1-NEXT: popl %esi
361 ; X86-BMI1-NEXT: retl
363 ; X86-BMI2-LABEL: scalar_i64_lowestbit_eq:
365 ; X86-BMI2-NEXT: pushl %esi
366 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
367 ; X86-BMI2-NEXT: movl $1, %edx
368 ; X86-BMI2-NEXT: xorl %esi, %esi
369 ; X86-BMI2-NEXT: xorl %eax, %eax
370 ; X86-BMI2-NEXT: shldl %cl, %edx, %eax
371 ; X86-BMI2-NEXT: shlxl %ecx, %edx, %edx
372 ; X86-BMI2-NEXT: testb $32, %cl
373 ; X86-BMI2-NEXT: cmovnel %edx, %eax
374 ; X86-BMI2-NEXT: cmovnel %esi, %edx
375 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
376 ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
377 ; X86-BMI2-NEXT: orl %eax, %edx
378 ; X86-BMI2-NEXT: sete %al
379 ; X86-BMI2-NEXT: popl %esi
380 ; X86-BMI2-NEXT: retl
382 ; X64-LABEL: scalar_i64_lowestbit_eq:
384 ; X64-NEXT: btq %rsi, %rdi
385 ; X64-NEXT: setae %al
388 %t1 = and i64 %t0, %x
389 %res = icmp eq i64 %t1, 0
393 define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
394 ; X86-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
396 ; X86-BMI1-NEXT: pushl %esi
397 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
398 ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
399 ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
400 ; X86-BMI1-NEXT: movl %edx, %esi
401 ; X86-BMI1-NEXT: shrl %cl, %esi
402 ; X86-BMI1-NEXT: shrdl %cl, %edx, %eax
403 ; X86-BMI1-NEXT: xorl %edx, %edx
404 ; X86-BMI1-NEXT: testb $32, %cl
405 ; X86-BMI1-NEXT: cmovnel %esi, %eax
406 ; X86-BMI1-NEXT: cmovel %esi, %edx
407 ; X86-BMI1-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
408 ; X86-BMI1-NEXT: movzwl %dx, %ecx
409 ; X86-BMI1-NEXT: orl %eax, %ecx
410 ; X86-BMI1-NEXT: sete %al
411 ; X86-BMI1-NEXT: popl %esi
412 ; X86-BMI1-NEXT: retl
414 ; X86-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
416 ; X86-BMI2-NEXT: pushl %esi
417 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
418 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
419 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
420 ; X86-BMI2-NEXT: shrdl %cl, %edx, %eax
421 ; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx
422 ; X86-BMI2-NEXT: xorl %esi, %esi
423 ; X86-BMI2-NEXT: testb $32, %cl
424 ; X86-BMI2-NEXT: cmovnel %edx, %eax
425 ; X86-BMI2-NEXT: cmovel %edx, %esi
426 ; X86-BMI2-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
427 ; X86-BMI2-NEXT: movzwl %si, %ecx
428 ; X86-BMI2-NEXT: orl %eax, %ecx
429 ; X86-BMI2-NEXT: sete %al
430 ; X86-BMI2-NEXT: popl %esi
431 ; X86-BMI2-NEXT: retl
433 ; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
435 ; X64-BMI1-NEXT: movq %rsi, %rcx
436 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
437 ; X64-BMI1-NEXT: shrq %cl, %rdi
438 ; X64-BMI1-NEXT: shrq $16, %rdi
439 ; X64-BMI1-NEXT: testl %edi, %edi
440 ; X64-BMI1-NEXT: sete %al
441 ; X64-BMI1-NEXT: retq
443 ; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
445 ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
446 ; X64-BMI2-NEXT: shrq $16, %rax
447 ; X64-BMI2-NEXT: testl %eax, %eax
448 ; X64-BMI2-NEXT: sete %al
449 ; X64-BMI2-NEXT: retq
450 %t0 = shl i64 281474976645120, %y
451 %t1 = and i64 %t0, %x
452 %res = icmp eq i64 %t1, 0
456 ;------------------------------------------------------------------------------;
457 ; A few trivial vector tests
458 ;------------------------------------------------------------------------------;
460 define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
461 ; X86-SSE2-LABEL: vec_4xi32_splat_eq:
463 ; X86-SSE2-NEXT: pslld $23, %xmm1
464 ; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
465 ; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
466 ; X86-SSE2-NEXT: pand %xmm1, %xmm0
467 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
468 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
469 ; X86-SSE2-NEXT: retl
471 ; AVX2-LABEL: vec_4xi32_splat_eq:
473 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
474 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
475 ; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
476 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
477 ; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
478 ; AVX2-NEXT: ret{{[l|q]}}
480 ; X64-SSE2-LABEL: vec_4xi32_splat_eq:
482 ; X64-SSE2-NEXT: pslld $23, %xmm1
483 ; X64-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
484 ; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
485 ; X64-SSE2-NEXT: pand %xmm1, %xmm0
486 ; X64-SSE2-NEXT: pxor %xmm1, %xmm1
487 ; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
488 ; X64-SSE2-NEXT: retq
489 %t0 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
490 %t1 = and <4 x i32> %t0, %x
491 %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
495 define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
496 ; X86-SSE2-LABEL: vec_4xi32_nonsplat_eq:
498 ; X86-SSE2-NEXT: pslld $23, %xmm1
499 ; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
500 ; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
501 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
502 ; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
503 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
504 ; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
505 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
506 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
507 ; X86-SSE2-NEXT: pand %xmm1, %xmm0
508 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
509 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
510 ; X86-SSE2-NEXT: retl
512 ; AVX2-LABEL: vec_4xi32_nonsplat_eq:
514 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,16776960,2147483648]
515 ; AVX2-NEXT: vpsllvd %xmm1, %xmm2, %xmm1
516 ; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
517 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
518 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
519 ; AVX2-NEXT: ret{{[l|q]}}
521 ; X64-SSE2-LABEL: vec_4xi32_nonsplat_eq:
523 ; X64-SSE2-NEXT: pslld $23, %xmm1
524 ; X64-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
525 ; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
526 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
527 ; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
528 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
529 ; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
530 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
531 ; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
532 ; X64-SSE2-NEXT: pand %xmm1, %xmm0
533 ; X64-SSE2-NEXT: pxor %xmm1, %xmm1
534 ; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
535 ; X64-SSE2-NEXT: retq
536 %t0 = shl <4 x i32> <i32 0, i32 1, i32 16776960, i32 2147483648>, %y
537 %t1 = and <4 x i32> %t0, %x
538 %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
542 define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
543 ; X86-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
545 ; X86-SSE2-NEXT: movl $1, %eax
546 ; X86-SSE2-NEXT: movd %eax, %xmm2
547 ; X86-SSE2-NEXT: pslld $23, %xmm1
548 ; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
549 ; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
550 ; X86-SSE2-NEXT: pmuludq %xmm1, %xmm2
551 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
552 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
553 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
554 ; X86-SSE2-NEXT: pand %xmm2, %xmm0
555 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
556 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
557 ; X86-SSE2-NEXT: retl
559 ; AVX2-LABEL: vec_4xi32_nonsplat_undef0_eq:
561 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
562 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
563 ; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
564 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
565 ; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
566 ; AVX2-NEXT: ret{{[l|q]}}
568 ; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
570 ; X64-SSE2-NEXT: movl $1, %eax
571 ; X64-SSE2-NEXT: movd %eax, %xmm2
572 ; X64-SSE2-NEXT: pslld $23, %xmm1
573 ; X64-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
574 ; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
575 ; X64-SSE2-NEXT: pmuludq %xmm1, %xmm2
576 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
577 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
578 ; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
579 ; X64-SSE2-NEXT: pand %xmm2, %xmm0
580 ; X64-SSE2-NEXT: pxor %xmm1, %xmm1
581 ; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
582 ; X64-SSE2-NEXT: retq
583 %t0 = shl <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
584 %t1 = and <4 x i32> %t0, %x
585 %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
588 define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
589 ; X86-SSE2-LABEL: vec_4xi32_nonsplat_undef1_eq:
591 ; X86-SSE2-NEXT: pslld $23, %xmm1
592 ; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
593 ; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
594 ; X86-SSE2-NEXT: pand %xmm1, %xmm0
595 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
596 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
597 ; X86-SSE2-NEXT: retl
599 ; AVX2-LABEL: vec_4xi32_nonsplat_undef1_eq:
601 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
602 ; AVX2-NEXT: vpsllvd %xmm1, %xmm2, %xmm1
603 ; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
604 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
605 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
606 ; AVX2-NEXT: ret{{[l|q]}}
608 ; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef1_eq:
610 ; X64-SSE2-NEXT: pslld $23, %xmm1
611 ; X64-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
612 ; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
613 ; X64-SSE2-NEXT: pand %xmm1, %xmm0
614 ; X64-SSE2-NEXT: pxor %xmm1, %xmm1
615 ; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
616 ; X64-SSE2-NEXT: retq
617 %t0 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
618 %t1 = and <4 x i32> %t0, %x
619 %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
622 define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
623 ; X86-SSE2-LABEL: vec_4xi32_nonsplat_undef2_eq:
625 ; X86-SSE2-NEXT: movl $1, %eax
626 ; X86-SSE2-NEXT: movd %eax, %xmm2
627 ; X86-SSE2-NEXT: pslld $23, %xmm1
628 ; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
629 ; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
630 ; X86-SSE2-NEXT: pmuludq %xmm1, %xmm2
631 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
632 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
633 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
634 ; X86-SSE2-NEXT: pand %xmm2, %xmm0
635 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
636 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
637 ; X86-SSE2-NEXT: retl
639 ; AVX2-LABEL: vec_4xi32_nonsplat_undef2_eq:
641 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
642 ; AVX2-NEXT: vpsllvd %xmm1, %xmm2, %xmm1
643 ; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
644 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
645 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
646 ; AVX2-NEXT: ret{{[l|q]}}
648 ; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef2_eq:
650 ; X64-SSE2-NEXT: movl $1, %eax
651 ; X64-SSE2-NEXT: movd %eax, %xmm2
652 ; X64-SSE2-NEXT: pslld $23, %xmm1
653 ; X64-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
654 ; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
655 ; X64-SSE2-NEXT: pmuludq %xmm1, %xmm2
656 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
657 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
658 ; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
659 ; X64-SSE2-NEXT: pand %xmm2, %xmm0
660 ; X64-SSE2-NEXT: pxor %xmm1, %xmm1
661 ; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
662 ; X64-SSE2-NEXT: retq
663 %t0 = shl <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
664 %t1 = and <4 x i32> %t0, %x
665 %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
669 ;------------------------------------------------------------------------------;
671 ;------------------------------------------------------------------------------;
673 define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
674 ; X86-LABEL: scalar_i8_signbit_ne:
676 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
677 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
678 ; X86-NEXT: shrb %cl, %al
679 ; X86-NEXT: shrb $7, %al
682 ; X64-LABEL: scalar_i8_signbit_ne:
684 ; X64-NEXT: movl %esi, %ecx
685 ; X64-NEXT: movl %edi, %eax
686 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
687 ; X64-NEXT: shrb %cl, %al
688 ; X64-NEXT: shrb $7, %al
689 ; X64-NEXT: # kill: def $al killed $al killed $eax
693 %res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate
697 ;------------------------------------------------------------------------------;
698 ; What if X is a constant too?
699 ;------------------------------------------------------------------------------;
701 define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
702 ; X86-BMI1-LABEL: scalar_i32_x_is_const_eq:
704 ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
705 ; X86-BMI1-NEXT: movl $-1437226411, %eax # imm = 0xAA55AA55
706 ; X86-BMI1-NEXT: shll %cl, %eax
707 ; X86-BMI1-NEXT: testb $1, %al
708 ; X86-BMI1-NEXT: sete %al
709 ; X86-BMI1-NEXT: retl
711 ; X86-BMI2-LABEL: scalar_i32_x_is_const_eq:
713 ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
714 ; X86-BMI2-NEXT: movl $-1437226411, %ecx # imm = 0xAA55AA55
715 ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
716 ; X86-BMI2-NEXT: testb $1, %al
717 ; X86-BMI2-NEXT: sete %al
718 ; X86-BMI2-NEXT: retl
720 ; X64-BMI1-LABEL: scalar_i32_x_is_const_eq:
722 ; X64-BMI1-NEXT: movl %edi, %ecx
723 ; X64-BMI1-NEXT: movl $-1437226411, %eax # imm = 0xAA55AA55
724 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
725 ; X64-BMI1-NEXT: shll %cl, %eax
726 ; X64-BMI1-NEXT: testb $1, %al
727 ; X64-BMI1-NEXT: sete %al
728 ; X64-BMI1-NEXT: retq
730 ; X64-BMI2-LABEL: scalar_i32_x_is_const_eq:
732 ; X64-BMI2-NEXT: movl $-1437226411, %eax # imm = 0xAA55AA55
733 ; X64-BMI2-NEXT: shlxl %edi, %eax, %eax
734 ; X64-BMI2-NEXT: testb $1, %al
735 ; X64-BMI2-NEXT: sete %al
736 ; X64-BMI2-NEXT: retq
737 %t0 = shl i32 2857740885, %y
739 %res = icmp eq i32 %t1, 0
742 define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
743 ; X86-LABEL: scalar_i32_x_is_const2_eq:
745 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
746 ; X86-NEXT: movl $-1437226411, %ecx # imm = 0xAA55AA55
747 ; X86-NEXT: btl %eax, %ecx
748 ; X86-NEXT: setae %al
751 ; X64-LABEL: scalar_i32_x_is_const2_eq:
753 ; X64-NEXT: movl $-1437226411, %eax # imm = 0xAA55AA55
754 ; X64-NEXT: btl %edi, %eax
755 ; X64-NEXT: setae %al
758 %t1 = and i32 %t0, 2857740885
759 %res = icmp eq i32 %t1, 0
763 ;------------------------------------------------------------------------------;
764 ; A few negative tests
765 ;------------------------------------------------------------------------------;
767 define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
768 ; X86-LABEL: negative_scalar_i8_bitsinmiddle_slt:
770 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
771 ; X86-NEXT: movb $24, %al
772 ; X86-NEXT: shlb %cl, %al
773 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al
774 ; X86-NEXT: shrb $7, %al
777 ; X64-LABEL: negative_scalar_i8_bitsinmiddle_slt:
779 ; X64-NEXT: movl %esi, %ecx
780 ; X64-NEXT: movb $24, %al
781 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
782 ; X64-NEXT: shlb %cl, %al
783 ; X64-NEXT: andb %dil, %al
784 ; X64-NEXT: shrb $7, %al
788 %res = icmp slt i8 %t1, 0
792 define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
793 ; X86-LABEL: scalar_i8_signbit_eq_with_nonzero:
795 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
796 ; X86-NEXT: movb $-128, %al
797 ; X86-NEXT: shlb %cl, %al
798 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al
799 ; X86-NEXT: cmpb $1, %al
803 ; X64-LABEL: scalar_i8_signbit_eq_with_nonzero:
805 ; X64-NEXT: movl %esi, %ecx
806 ; X64-NEXT: movb $-128, %al
807 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
808 ; X64-NEXT: shlb %cl, %al
809 ; X64-NEXT: andb %dil, %al
810 ; X64-NEXT: cmpb $1, %al
815 %res = icmp eq i8 %t1, 1 ; should be comparing with 0