1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+prefer-movmsk-over-vtest | FileCheck %s --check-prefixes=ADL
8 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
9 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
10 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
12 ; Use widest possible vector for movmsk comparisons (PR37087)
14 define i1 @movmskps_noneof_bitcast_v2f64(<2 x double> %a0) {
15 ; SSE-LABEL: movmskps_noneof_bitcast_v2f64:
17 ; SSE-NEXT: xorpd %xmm1, %xmm1
18 ; SSE-NEXT: cmpeqpd %xmm0, %xmm1
19 ; SSE-NEXT: movmskpd %xmm1, %eax
20 ; SSE-NEXT: testl %eax, %eax
24 ; AVX-LABEL: movmskps_noneof_bitcast_v2f64:
26 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
27 ; AVX-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0
28 ; AVX-NEXT: vtestpd %xmm0, %xmm0
32 ; ADL-LABEL: movmskps_noneof_bitcast_v2f64:
34 ; ADL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
35 ; ADL-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0
36 ; ADL-NEXT: vmovmskpd %xmm0, %eax
37 ; ADL-NEXT: testl %eax, %eax
40 %1 = fcmp oeq <2 x double> zeroinitializer, %a0
41 %2 = sext <2 x i1> %1 to <2 x i64>
42 %3 = bitcast <2 x i64> %2 to <4 x float>
43 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
44 %5 = icmp eq i32 %4, 0
48 define i1 @movmskps_allof_bitcast_v2f64(<2 x double> %a0) {
49 ; SSE-LABEL: movmskps_allof_bitcast_v2f64:
51 ; SSE-NEXT: xorpd %xmm1, %xmm1
52 ; SSE-NEXT: cmpeqpd %xmm0, %xmm1
53 ; SSE-NEXT: movmskpd %xmm1, %eax
54 ; SSE-NEXT: cmpl $3, %eax
58 ; AVX-LABEL: movmskps_allof_bitcast_v2f64:
60 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
61 ; AVX-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0
62 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
63 ; AVX-NEXT: vtestpd %xmm1, %xmm0
67 ; ADL-LABEL: movmskps_allof_bitcast_v2f64:
69 ; ADL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
70 ; ADL-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0
71 ; ADL-NEXT: vmovmskpd %xmm0, %eax
72 ; ADL-NEXT: cmpl $3, %eax
75 %1 = fcmp oeq <2 x double> zeroinitializer, %a0
76 %2 = sext <2 x i1> %1 to <2 x i64>
77 %3 = bitcast <2 x i64> %2 to <4 x float>
78 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
79 %5 = icmp eq i32 %4, 15
83 define i1 @pmovmskb_noneof_bitcast_v2i64(<2 x i64> %a0) {
84 ; SSE2-LABEL: pmovmskb_noneof_bitcast_v2i64:
86 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
87 ; SSE2-NEXT: movmskps %xmm0, %eax
88 ; SSE2-NEXT: testl %eax, %eax
92 ; SSE42-LABEL: pmovmskb_noneof_bitcast_v2i64:
94 ; SSE42-NEXT: movmskpd %xmm0, %eax
95 ; SSE42-NEXT: testl %eax, %eax
96 ; SSE42-NEXT: sete %al
99 ; AVX-LABEL: pmovmskb_noneof_bitcast_v2i64:
101 ; AVX-NEXT: vtestpd %xmm0, %xmm0
105 ; ADL-LABEL: pmovmskb_noneof_bitcast_v2i64:
107 ; ADL-NEXT: vmovmskpd %xmm0, %eax
108 ; ADL-NEXT: testl %eax, %eax
111 %1 = icmp sgt <2 x i64> zeroinitializer, %a0
112 %2 = sext <2 x i1> %1 to <2 x i64>
113 %3 = bitcast <2 x i64> %2 to <16 x i8>
114 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
115 %5 = icmp eq i32 %4, 0
119 define i1 @pmovmskb_allof_bitcast_v2i64(<2 x i64> %a0) {
120 ; SSE2-LABEL: pmovmskb_allof_bitcast_v2i64:
122 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
123 ; SSE2-NEXT: movmskps %xmm0, %eax
124 ; SSE2-NEXT: cmpl $15, %eax
125 ; SSE2-NEXT: sete %al
128 ; SSE42-LABEL: pmovmskb_allof_bitcast_v2i64:
130 ; SSE42-NEXT: movmskpd %xmm0, %eax
131 ; SSE42-NEXT: cmpl $3, %eax
132 ; SSE42-NEXT: sete %al
135 ; AVX-LABEL: pmovmskb_allof_bitcast_v2i64:
137 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
138 ; AVX-NEXT: vtestpd %xmm1, %xmm0
142 ; ADL-LABEL: pmovmskb_allof_bitcast_v2i64:
144 ; ADL-NEXT: vmovmskpd %xmm0, %eax
145 ; ADL-NEXT: cmpl $3, %eax
148 %1 = icmp sgt <2 x i64> zeroinitializer, %a0
149 %2 = sext <2 x i1> %1 to <2 x i64>
150 %3 = bitcast <2 x i64> %2 to <16 x i8>
151 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
152 %5 = icmp eq i32 %4, 65535
156 define i1 @pmovmskb_noneof_bitcast_v4f32(<4 x float> %a0) {
157 ; SSE-LABEL: pmovmskb_noneof_bitcast_v4f32:
159 ; SSE-NEXT: xorps %xmm1, %xmm1
160 ; SSE-NEXT: cmpeqps %xmm0, %xmm1
161 ; SSE-NEXT: movmskps %xmm1, %eax
162 ; SSE-NEXT: testl %eax, %eax
166 ; AVX-LABEL: pmovmskb_noneof_bitcast_v4f32:
168 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
169 ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
170 ; AVX-NEXT: vtestps %xmm0, %xmm0
174 ; ADL-LABEL: pmovmskb_noneof_bitcast_v4f32:
176 ; ADL-NEXT: vxorps %xmm1, %xmm1, %xmm1
177 ; ADL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
178 ; ADL-NEXT: vmovmskps %xmm0, %eax
179 ; ADL-NEXT: testl %eax, %eax
182 %1 = fcmp oeq <4 x float> %a0, zeroinitializer
183 %2 = sext <4 x i1> %1 to <4 x i32>
184 %3 = bitcast <4 x i32> %2 to <16 x i8>
185 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
186 %5 = icmp eq i32 %4, 0
190 define i1 @pmovmskb_allof_bitcast_v4f32(<4 x float> %a0) {
191 ; SSE-LABEL: pmovmskb_allof_bitcast_v4f32:
193 ; SSE-NEXT: xorps %xmm1, %xmm1
194 ; SSE-NEXT: cmpeqps %xmm0, %xmm1
195 ; SSE-NEXT: movmskps %xmm1, %eax
196 ; SSE-NEXT: cmpl $15, %eax
200 ; AVX-LABEL: pmovmskb_allof_bitcast_v4f32:
202 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
203 ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
204 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
205 ; AVX-NEXT: vtestps %xmm1, %xmm0
209 ; ADL-LABEL: pmovmskb_allof_bitcast_v4f32:
211 ; ADL-NEXT: vxorps %xmm1, %xmm1, %xmm1
212 ; ADL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
213 ; ADL-NEXT: vmovmskps %xmm0, %eax
214 ; ADL-NEXT: cmpl $15, %eax
217 %1 = fcmp oeq <4 x float> %a0, zeroinitializer
218 %2 = sext <4 x i1> %1 to <4 x i32>
219 %3 = bitcast <4 x i32> %2 to <16 x i8>
220 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
221 %5 = icmp eq i32 %4, 65535
225 ; MOVMSK(ICMP_SGT(X,-1)) -> NOT(MOVMSK(X)))
226 define i1 @movmskps_allof_v4i32_positive(<4 x i32> %a0) {
227 ; SSE-LABEL: movmskps_allof_v4i32_positive:
229 ; SSE-NEXT: movmskps %xmm0, %eax
230 ; SSE-NEXT: xorl $15, %eax
231 ; SSE-NEXT: cmpl $15, %eax
235 ; AVX-LABEL: movmskps_allof_v4i32_positive:
237 ; AVX-NEXT: vmovmskps %xmm0, %eax
238 ; AVX-NEXT: xorl $15, %eax
239 ; AVX-NEXT: cmpl $15, %eax
243 ; ADL-LABEL: movmskps_allof_v4i32_positive:
245 ; ADL-NEXT: vmovmskps %xmm0, %eax
246 ; ADL-NEXT: xorl $15, %eax
247 ; ADL-NEXT: cmpl $15, %eax
250 %1 = icmp sgt <4 x i32> %a0, <i32 -1, i32 -1, i32 -1, i32 -1>
251 %2 = sext <4 x i1> %1 to <4 x i32>
252 %3 = bitcast <4 x i32> %2 to <4 x float>
253 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
254 %5 = icmp eq i32 %4, 15
258 define i1 @pmovmskb_noneof_v16i8_positive(<16 x i8> %a0) {
259 ; SSE-LABEL: pmovmskb_noneof_v16i8_positive:
261 ; SSE-NEXT: pmovmskb %xmm0, %eax
262 ; SSE-NEXT: xorl $65535, %eax # imm = 0xFFFF
266 ; AVX-LABEL: pmovmskb_noneof_v16i8_positive:
268 ; AVX-NEXT: vpmovmskb %xmm0, %eax
269 ; AVX-NEXT: xorl $65535, %eax # imm = 0xFFFF
273 ; ADL-LABEL: pmovmskb_noneof_v16i8_positive:
275 ; ADL-NEXT: vpmovmskb %xmm0, %eax
276 ; ADL-NEXT: xorl $65535, %eax # imm = 0xFFFF
279 %1 = icmp sgt <16 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
280 %2 = sext <16 x i1> %1 to <16 x i8>
281 %3 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %2)
282 %4 = icmp eq i32 %3, 0
286 ; MOVMSK(CMPEQ(AND(X,C1),0)) -> MOVMSK(NOT(SHL(X,C2)))
287 define i32 @movmskpd_pow2_mask(<2 x i64> %a0) {
288 ; SSE2-LABEL: movmskpd_pow2_mask:
290 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
291 ; SSE2-NEXT: pxor %xmm1, %xmm1
292 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
293 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
294 ; SSE2-NEXT: pand %xmm1, %xmm0
295 ; SSE2-NEXT: movmskpd %xmm0, %eax
298 ; SSE42-LABEL: movmskpd_pow2_mask:
300 ; SSE42-NEXT: movmskpd %xmm0, %eax
301 ; SSE42-NEXT: xorl $3, %eax
304 ; AVX-LABEL: movmskpd_pow2_mask:
306 ; AVX-NEXT: vmovmskpd %xmm0, %eax
307 ; AVX-NEXT: xorl $3, %eax
310 ; ADL-LABEL: movmskpd_pow2_mask:
312 ; ADL-NEXT: vmovmskpd %xmm0, %eax
313 ; ADL-NEXT: xorl $3, %eax
315 %1 = and <2 x i64> %a0, <i64 -9223372036854775808, i64 -9223372036854775808>
316 %2 = icmp eq <2 x i64> %1, zeroinitializer
317 %3 = sext <2 x i1> %2 to <2 x i64>
318 %4 = bitcast <2 x i64> %3 to <2 x double>
319 %5 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %4)
323 define i32 @movmskps_pow2_mask(<4 x i32> %a0) {
324 ; SSE-LABEL: movmskps_pow2_mask:
326 ; SSE-NEXT: pslld $29, %xmm0
327 ; SSE-NEXT: movmskps %xmm0, %eax
328 ; SSE-NEXT: xorl $15, %eax
331 ; AVX-LABEL: movmskps_pow2_mask:
333 ; AVX-NEXT: vpslld $29, %xmm0, %xmm0
334 ; AVX-NEXT: vmovmskps %xmm0, %eax
335 ; AVX-NEXT: xorl $15, %eax
338 ; ADL-LABEL: movmskps_pow2_mask:
340 ; ADL-NEXT: vpslld $29, %xmm0, %xmm0
341 ; ADL-NEXT: vmovmskps %xmm0, %eax
342 ; ADL-NEXT: xorl $15, %eax
344 %1 = and <4 x i32> %a0, <i32 4, i32 4, i32 4, i32 4>
345 %2 = icmp eq <4 x i32> %1, zeroinitializer
346 %3 = sext <4 x i1> %2 to <4 x i32>
347 %4 = bitcast <4 x i32> %3 to <4 x float>
348 %5 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %4)
352 define i32 @pmovmskb_pow2_mask(<16 x i8> %a0) {
353 ; SSE-LABEL: pmovmskb_pow2_mask:
355 ; SSE-NEXT: psllw $7, %xmm0
356 ; SSE-NEXT: pmovmskb %xmm0, %eax
357 ; SSE-NEXT: xorl $65535, %eax # imm = 0xFFFF
360 ; AVX-LABEL: pmovmskb_pow2_mask:
362 ; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
363 ; AVX-NEXT: vpmovmskb %xmm0, %eax
364 ; AVX-NEXT: xorl $65535, %eax # imm = 0xFFFF
367 ; ADL-LABEL: pmovmskb_pow2_mask:
369 ; ADL-NEXT: vpsllw $7, %xmm0, %xmm0
370 ; ADL-NEXT: vpmovmskb %xmm0, %eax
371 ; ADL-NEXT: xorl $65535, %eax # imm = 0xFFFF
373 %1 = and <16 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
374 %2 = icmp eq <16 x i8> %1, zeroinitializer
375 %3 = sext <16 x i1> %2 to <16 x i8>
376 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
380 ; AND(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(AND(X,Y))
381 ; XOR(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(XOR(X,Y))
382 ; OR(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(OR(X,Y))
383 ; if the elements are the same width.
385 define i32 @and_movmskpd_movmskpd(<2 x double> %a0, <2 x i64> %a1) {
386 ; SSE-LABEL: and_movmskpd_movmskpd:
388 ; SSE-NEXT: xorpd %xmm2, %xmm2
389 ; SSE-NEXT: cmpeqpd %xmm0, %xmm2
390 ; SSE-NEXT: andpd %xmm1, %xmm2
391 ; SSE-NEXT: movmskpd %xmm2, %eax
394 ; AVX-LABEL: and_movmskpd_movmskpd:
396 ; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2
397 ; AVX-NEXT: vcmpeqpd %xmm0, %xmm2, %xmm0
398 ; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
399 ; AVX-NEXT: vmovmskpd %xmm0, %eax
402 ; ADL-LABEL: and_movmskpd_movmskpd:
404 ; ADL-NEXT: vxorpd %xmm2, %xmm2, %xmm2
405 ; ADL-NEXT: vcmpeqpd %xmm0, %xmm2, %xmm0
406 ; ADL-NEXT: vandpd %xmm1, %xmm0, %xmm0
407 ; ADL-NEXT: vmovmskpd %xmm0, %eax
409 %1 = fcmp oeq <2 x double> zeroinitializer, %a0
410 %2 = sext <2 x i1> %1 to <2 x i64>
411 %3 = bitcast <2 x i64> %2 to <2 x double>
412 %4 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %3)
413 %5 = icmp sgt <2 x i64> zeroinitializer, %a1
414 %6 = bitcast <2 x i1> %5 to i2
415 %7 = zext i2 %6 to i32
420 define i32 @xor_movmskps_movmskps(<4 x float> %a0, <4 x i32> %a1) {
421 ; SSE-LABEL: xor_movmskps_movmskps:
423 ; SSE-NEXT: xorps %xmm2, %xmm2
424 ; SSE-NEXT: cmpeqps %xmm0, %xmm2
425 ; SSE-NEXT: xorps %xmm1, %xmm2
426 ; SSE-NEXT: movmskps %xmm2, %eax
429 ; AVX-LABEL: xor_movmskps_movmskps:
431 ; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
432 ; AVX-NEXT: vcmpeqps %xmm0, %xmm2, %xmm0
433 ; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
434 ; AVX-NEXT: vmovmskps %xmm0, %eax
437 ; ADL-LABEL: xor_movmskps_movmskps:
439 ; ADL-NEXT: vxorps %xmm2, %xmm2, %xmm2
440 ; ADL-NEXT: vcmpeqps %xmm0, %xmm2, %xmm0
441 ; ADL-NEXT: vxorps %xmm1, %xmm0, %xmm0
442 ; ADL-NEXT: vmovmskps %xmm0, %eax
444 %1 = fcmp oeq <4 x float> zeroinitializer, %a0
445 %2 = sext <4 x i1> %1 to <4 x i32>
446 %3 = bitcast <4 x i32> %2 to <4 x float>
447 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
448 %5 = ashr <4 x i32> %a1, <i32 31, i32 31, i32 31, i32 31>
449 %6 = bitcast <4 x i32> %5 to <4 x float>
450 %7 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %6)
455 define i32 @or_pmovmskb_pmovmskb(<16 x i8> %a0, <8 x i16> %a1) {
456 ; SSE-LABEL: or_pmovmskb_pmovmskb:
458 ; SSE-NEXT: pxor %xmm2, %xmm2
459 ; SSE-NEXT: pcmpeqb %xmm0, %xmm2
460 ; SSE-NEXT: psraw $15, %xmm1
461 ; SSE-NEXT: por %xmm2, %xmm1
462 ; SSE-NEXT: pmovmskb %xmm1, %eax
465 ; AVX-LABEL: or_pmovmskb_pmovmskb:
467 ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
468 ; AVX-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
469 ; AVX-NEXT: vpsraw $15, %xmm1, %xmm1
470 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
471 ; AVX-NEXT: vpmovmskb %xmm0, %eax
474 ; ADL-LABEL: or_pmovmskb_pmovmskb:
476 ; ADL-NEXT: vpxor %xmm2, %xmm2, %xmm2
477 ; ADL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
478 ; ADL-NEXT: vpsraw $15, %xmm1, %xmm1
479 ; ADL-NEXT: vpor %xmm1, %xmm0, %xmm0
480 ; ADL-NEXT: vpmovmskb %xmm0, %eax
482 %1 = icmp eq <16 x i8> zeroinitializer, %a0
483 %2 = sext <16 x i1> %1 to <16 x i8>
484 %3 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %2)
485 %4 = ashr <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
486 %5 = bitcast <8 x i16> %4 to <16 x i8>
487 %6 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %5)
492 ; We can't fold to ptest if we're not checking every pcmpeq result
493 define i32 @movmskps_ptest_numelts_mismatch(<16 x i8> %a0) {
494 ; SSE-LABEL: movmskps_ptest_numelts_mismatch:
496 ; SSE-NEXT: pxor %xmm1, %xmm1
497 ; SSE-NEXT: pcmpeqb %xmm0, %xmm1
498 ; SSE-NEXT: movmskps %xmm1, %ecx
499 ; SSE-NEXT: xorl %eax, %eax
500 ; SSE-NEXT: cmpl $15, %ecx
502 ; SSE-NEXT: negl %eax
505 ; AVX-LABEL: movmskps_ptest_numelts_mismatch:
507 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
508 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
509 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
510 ; AVX-NEXT: xorl %eax, %eax
511 ; AVX-NEXT: vtestps %xmm1, %xmm0
512 ; AVX-NEXT: sbbl %eax, %eax
515 ; ADL-LABEL: movmskps_ptest_numelts_mismatch:
517 ; ADL-NEXT: vpxor %xmm1, %xmm1, %xmm1
518 ; ADL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
519 ; ADL-NEXT: vmovmskps %xmm0, %ecx
520 ; ADL-NEXT: xorl %eax, %eax
521 ; ADL-NEXT: cmpl $15, %ecx
523 ; ADL-NEXT: negl %eax
525 %1 = icmp eq <16 x i8> %a0, zeroinitializer
526 %2 = sext <16 x i1> %1 to <16 x i8>
527 %3 = bitcast <16 x i8> %2 to <4 x float>
528 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
529 %5 = icmp eq i32 %4, 15
530 %6 = sext i1 %5 to i32