1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
3 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
4 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW
5 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ
6 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86
9 define i16 @mask16(i16 %x) {
10 ; CHECK-LABEL: mask16:
12 ; CHECK-NEXT: movl %edi, %eax
13 ; CHECK-NEXT: notl %eax
14 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
19 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
21 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
23 %m0 = bitcast i16 %x to <16 x i1>
24 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
25 %ret = bitcast <16 x i1> %m1 to i16
29 define i32 @mask16_zext(i16 %x) {
30 ; CHECK-LABEL: mask16_zext:
32 ; CHECK-NEXT: notl %edi
33 ; CHECK-NEXT: movzwl %di, %eax
36 ; X86-LABEL: mask16_zext:
38 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
39 ; X86-NEXT: xorl $65535, %eax ## imm = 0xFFFF
41 %m0 = bitcast i16 %x to <16 x i1>
42 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
43 %m2 = bitcast <16 x i1> %m1 to i16
44 %ret = zext i16 %m2 to i32
48 define i8 @mask8(i8 %x) {
51 ; CHECK-NEXT: movl %edi, %eax
52 ; CHECK-NEXT: notb %al
53 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
58 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
61 %m0 = bitcast i8 %x to <8 x i1>
62 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
63 %ret = bitcast <8 x i1> %m1 to i8
67 define i32 @mask8_zext(i8 %x) {
68 ; CHECK-LABEL: mask8_zext:
70 ; CHECK-NEXT: notb %dil
71 ; CHECK-NEXT: movzbl %dil, %eax
74 ; X86-LABEL: mask8_zext:
76 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
78 ; X86-NEXT: movzbl %al, %eax
80 %m0 = bitcast i8 %x to <8 x i1>
81 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
82 %m2 = bitcast <8 x i1> %m1 to i8
83 %ret = zext i8 %m2 to i32
87 define void @mask16_mem(i16* %ptr) {
88 ; CHECK-LABEL: mask16_mem:
90 ; CHECK-NEXT: kmovw (%rdi), %k0
91 ; CHECK-NEXT: knotw %k0, %k0
92 ; CHECK-NEXT: kmovw %k0, (%rdi)
95 ; X86-LABEL: mask16_mem:
97 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
98 ; X86-NEXT: kmovw (%eax), %k0
99 ; X86-NEXT: knotw %k0, %k0
100 ; X86-NEXT: kmovw %k0, (%eax)
102 %x = load i16, i16* %ptr, align 4
103 %m0 = bitcast i16 %x to <16 x i1>
104 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
105 %ret = bitcast <16 x i1> %m1 to i16
106 store i16 %ret, i16* %ptr, align 4
110 define void @mask8_mem(i8* %ptr) {
111 ; KNL-LABEL: mask8_mem:
113 ; KNL-NEXT: notb (%rdi)
116 ; SKX-LABEL: mask8_mem:
118 ; SKX-NEXT: kmovb (%rdi), %k0
119 ; SKX-NEXT: knotb %k0, %k0
120 ; SKX-NEXT: kmovb %k0, (%rdi)
123 ; AVX512BW-LABEL: mask8_mem:
124 ; AVX512BW: ## %bb.0:
125 ; AVX512BW-NEXT: notb (%rdi)
126 ; AVX512BW-NEXT: retq
128 ; AVX512DQ-LABEL: mask8_mem:
129 ; AVX512DQ: ## %bb.0:
130 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
131 ; AVX512DQ-NEXT: knotb %k0, %k0
132 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
133 ; AVX512DQ-NEXT: retq
135 ; X86-LABEL: mask8_mem:
137 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
138 ; X86-NEXT: kmovb (%eax), %k0
139 ; X86-NEXT: knotb %k0, %k0
140 ; X86-NEXT: kmovb %k0, (%eax)
142 %x = load i8, i8* %ptr, align 4
143 %m0 = bitcast i8 %x to <8 x i1>
144 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
145 %ret = bitcast <8 x i1> %m1 to i8
146 store i8 %ret, i8* %ptr, align 4
150 define i16 @mand16(i16 %x, i16 %y) {
151 ; CHECK-LABEL: mand16:
153 ; CHECK-NEXT: movl %edi, %eax
154 ; CHECK-NEXT: movl %edi, %ecx
155 ; CHECK-NEXT: andl %esi, %ecx
156 ; CHECK-NEXT: xorl %esi, %eax
157 ; CHECK-NEXT: orl %ecx, %eax
158 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
163 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
164 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
165 ; X86-NEXT: movl %eax, %edx
166 ; X86-NEXT: andl %ecx, %edx
167 ; X86-NEXT: xorl %ecx, %eax
168 ; X86-NEXT: orl %edx, %eax
169 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
171 %ma = bitcast i16 %x to <16 x i1>
172 %mb = bitcast i16 %y to <16 x i1>
173 %mc = and <16 x i1> %ma, %mb
174 %md = xor <16 x i1> %ma, %mb
175 %me = or <16 x i1> %mc, %md
176 %ret = bitcast <16 x i1> %me to i16
180 define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
181 ; KNL-LABEL: mand16_mem:
183 ; KNL-NEXT: kmovw (%rdi), %k0
184 ; KNL-NEXT: kmovw (%rsi), %k1
185 ; KNL-NEXT: kandw %k1, %k0, %k2
186 ; KNL-NEXT: kxorw %k1, %k0, %k0
187 ; KNL-NEXT: korw %k0, %k2, %k0
188 ; KNL-NEXT: kmovw %k0, %eax
189 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
192 ; SKX-LABEL: mand16_mem:
194 ; SKX-NEXT: kmovw (%rdi), %k0
195 ; SKX-NEXT: kmovw (%rsi), %k1
196 ; SKX-NEXT: kandw %k1, %k0, %k2
197 ; SKX-NEXT: kxorw %k1, %k0, %k0
198 ; SKX-NEXT: korw %k0, %k2, %k0
199 ; SKX-NEXT: kmovd %k0, %eax
200 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
203 ; AVX512BW-LABEL: mand16_mem:
204 ; AVX512BW: ## %bb.0:
205 ; AVX512BW-NEXT: kmovw (%rdi), %k0
206 ; AVX512BW-NEXT: kmovw (%rsi), %k1
207 ; AVX512BW-NEXT: kandw %k1, %k0, %k2
208 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
209 ; AVX512BW-NEXT: korw %k0, %k2, %k0
210 ; AVX512BW-NEXT: kmovd %k0, %eax
211 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
212 ; AVX512BW-NEXT: retq
214 ; AVX512DQ-LABEL: mand16_mem:
215 ; AVX512DQ: ## %bb.0:
216 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
217 ; AVX512DQ-NEXT: kmovw (%rsi), %k1
218 ; AVX512DQ-NEXT: kandw %k1, %k0, %k2
219 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
220 ; AVX512DQ-NEXT: korw %k0, %k2, %k0
221 ; AVX512DQ-NEXT: kmovw %k0, %eax
222 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
223 ; AVX512DQ-NEXT: retq
225 ; X86-LABEL: mand16_mem:
227 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
228 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
229 ; X86-NEXT: kmovw (%ecx), %k0
230 ; X86-NEXT: kmovw (%eax), %k1
231 ; X86-NEXT: kandw %k1, %k0, %k2
232 ; X86-NEXT: kxorw %k1, %k0, %k0
233 ; X86-NEXT: korw %k0, %k2, %k0
234 ; X86-NEXT: kmovd %k0, %eax
235 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
237 %ma = load <16 x i1>, <16 x i1>* %x
238 %mb = load <16 x i1>, <16 x i1>* %y
239 %mc = and <16 x i1> %ma, %mb
240 %md = xor <16 x i1> %ma, %mb
241 %me = or <16 x i1> %mc, %md
242 %ret = bitcast <16 x i1> %me to i16
246 define i8 @shuf_test1(i16 %v) nounwind {
247 ; KNL-LABEL: shuf_test1:
249 ; KNL-NEXT: kmovw %edi, %k0
250 ; KNL-NEXT: kshiftrw $8, %k0, %k0
251 ; KNL-NEXT: kmovw %k0, %eax
252 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
255 ; SKX-LABEL: shuf_test1:
257 ; SKX-NEXT: kmovd %edi, %k0
258 ; SKX-NEXT: kshiftrw $8, %k0, %k0
259 ; SKX-NEXT: kmovd %k0, %eax
260 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
263 ; AVX512BW-LABEL: shuf_test1:
264 ; AVX512BW: ## %bb.0:
265 ; AVX512BW-NEXT: kmovd %edi, %k0
266 ; AVX512BW-NEXT: kshiftrw $8, %k0, %k0
267 ; AVX512BW-NEXT: kmovd %k0, %eax
268 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
269 ; AVX512BW-NEXT: retq
271 ; AVX512DQ-LABEL: shuf_test1:
272 ; AVX512DQ: ## %bb.0:
273 ; AVX512DQ-NEXT: kmovw %edi, %k0
274 ; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0
275 ; AVX512DQ-NEXT: kmovw %k0, %eax
276 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
277 ; AVX512DQ-NEXT: retq
279 ; X86-LABEL: shuf_test1:
281 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
283 %v1 = bitcast i16 %v to <16 x i1>
284 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
285 %mask1 = bitcast <8 x i1> %mask to i8
289 define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
290 ; KNL-LABEL: zext_test1:
292 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
293 ; KNL-NEXT: kshiftrw $5, %k0, %k0
294 ; KNL-NEXT: kmovw %k0, %eax
295 ; KNL-NEXT: andl $1, %eax
296 ; KNL-NEXT: vzeroupper
299 ; SKX-LABEL: zext_test1:
301 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
302 ; SKX-NEXT: kshiftrw $5, %k0, %k0
303 ; SKX-NEXT: kmovd %k0, %eax
304 ; SKX-NEXT: andl $1, %eax
305 ; SKX-NEXT: vzeroupper
308 ; AVX512BW-LABEL: zext_test1:
309 ; AVX512BW: ## %bb.0:
310 ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
311 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
312 ; AVX512BW-NEXT: kmovd %k0, %eax
313 ; AVX512BW-NEXT: andl $1, %eax
314 ; AVX512BW-NEXT: vzeroupper
315 ; AVX512BW-NEXT: retq
317 ; AVX512DQ-LABEL: zext_test1:
318 ; AVX512DQ: ## %bb.0:
319 ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
320 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
321 ; AVX512DQ-NEXT: kmovw %k0, %eax
322 ; AVX512DQ-NEXT: andl $1, %eax
323 ; AVX512DQ-NEXT: vzeroupper
324 ; AVX512DQ-NEXT: retq
326 ; X86-LABEL: zext_test1:
328 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
329 ; X86-NEXT: kshiftrw $5, %k0, %k0
330 ; X86-NEXT: kmovd %k0, %eax
331 ; X86-NEXT: andl $1, %eax
332 ; X86-NEXT: vzeroupper
334 %cmp_res = icmp ugt <16 x i32> %a, %b
335 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
336 %res = zext i1 %cmp_res.i1 to i32
340 define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
341 ; KNL-LABEL: zext_test2:
343 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
344 ; KNL-NEXT: kshiftrw $5, %k0, %k0
345 ; KNL-NEXT: kmovw %k0, %eax
346 ; KNL-NEXT: andl $1, %eax
347 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
348 ; KNL-NEXT: vzeroupper
351 ; SKX-LABEL: zext_test2:
353 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
354 ; SKX-NEXT: kshiftrw $5, %k0, %k0
355 ; SKX-NEXT: kmovd %k0, %eax
356 ; SKX-NEXT: andl $1, %eax
357 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
358 ; SKX-NEXT: vzeroupper
361 ; AVX512BW-LABEL: zext_test2:
362 ; AVX512BW: ## %bb.0:
363 ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
364 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
365 ; AVX512BW-NEXT: kmovd %k0, %eax
366 ; AVX512BW-NEXT: andl $1, %eax
367 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
368 ; AVX512BW-NEXT: vzeroupper
369 ; AVX512BW-NEXT: retq
371 ; AVX512DQ-LABEL: zext_test2:
372 ; AVX512DQ: ## %bb.0:
373 ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
374 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
375 ; AVX512DQ-NEXT: kmovw %k0, %eax
376 ; AVX512DQ-NEXT: andl $1, %eax
377 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
378 ; AVX512DQ-NEXT: vzeroupper
379 ; AVX512DQ-NEXT: retq
381 ; X86-LABEL: zext_test2:
383 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
384 ; X86-NEXT: kshiftrw $5, %k0, %k0
385 ; X86-NEXT: kmovd %k0, %eax
386 ; X86-NEXT: andl $1, %eax
387 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
388 ; X86-NEXT: vzeroupper
390 %cmp_res = icmp ugt <16 x i32> %a, %b
391 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
392 %res = zext i1 %cmp_res.i1 to i16
396 define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
397 ; KNL-LABEL: zext_test3:
399 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
400 ; KNL-NEXT: kshiftrw $5, %k0, %k0
401 ; KNL-NEXT: kmovw %k0, %eax
402 ; KNL-NEXT: andb $1, %al
403 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
404 ; KNL-NEXT: vzeroupper
407 ; SKX-LABEL: zext_test3:
409 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
410 ; SKX-NEXT: kshiftrw $5, %k0, %k0
411 ; SKX-NEXT: kmovd %k0, %eax
412 ; SKX-NEXT: andb $1, %al
413 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
414 ; SKX-NEXT: vzeroupper
417 ; AVX512BW-LABEL: zext_test3:
418 ; AVX512BW: ## %bb.0:
419 ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
420 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
421 ; AVX512BW-NEXT: kmovd %k0, %eax
422 ; AVX512BW-NEXT: andb $1, %al
423 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
424 ; AVX512BW-NEXT: vzeroupper
425 ; AVX512BW-NEXT: retq
427 ; AVX512DQ-LABEL: zext_test3:
428 ; AVX512DQ: ## %bb.0:
429 ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
430 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
431 ; AVX512DQ-NEXT: kmovw %k0, %eax
432 ; AVX512DQ-NEXT: andb $1, %al
433 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
434 ; AVX512DQ-NEXT: vzeroupper
435 ; AVX512DQ-NEXT: retq
437 ; X86-LABEL: zext_test3:
439 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
440 ; X86-NEXT: kshiftrw $5, %k0, %k0
441 ; X86-NEXT: kmovd %k0, %eax
442 ; X86-NEXT: andb $1, %al
443 ; X86-NEXT: ## kill: def $al killed $al killed $eax
444 ; X86-NEXT: vzeroupper
446 %cmp_res = icmp ugt <16 x i32> %a, %b
447 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
448 %res = zext i1 %cmp_res.i1 to i8
452 define i8 @conv1(<8 x i1>* %R) {
453 ; CHECK-LABEL: conv1:
454 ; CHECK: ## %bb.0: ## %entry
455 ; CHECK-NEXT: movb $-1, (%rdi)
456 ; CHECK-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
457 ; CHECK-NEXT: movb $-2, %al
461 ; X86: ## %bb.0: ## %entry
462 ; X86-NEXT: subl $12, %esp
463 ; X86-NEXT: .cfi_def_cfa_offset 16
464 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
465 ; X86-NEXT: movb $-1, (%eax)
466 ; X86-NEXT: movb $-2, (%esp)
467 ; X86-NEXT: movb $-2, %al
468 ; X86-NEXT: addl $12, %esp
471 store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
473 %maskPtr = alloca <8 x i1>
474 store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
475 %mask = load <8 x i1>, <8 x i1>* %maskPtr
476 %mask_convert = bitcast <8 x i1> %mask to i8
480 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
483 ; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
484 ; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
485 ; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
486 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
487 ; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1
488 ; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
489 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
490 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
491 ; KNL-NEXT: vzeroupper
496 ; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1
497 ; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
498 ; SKX-NEXT: vpmovm2d %k0, %xmm0
499 ; SKX-NEXT: vzeroupper
502 ; AVX512BW-LABEL: test4:
503 ; AVX512BW: ## %bb.0:
504 ; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
505 ; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
506 ; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
507 ; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
508 ; AVX512BW-NEXT: vpcmpleq %zmm1, %zmm0, %k1
509 ; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
510 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
511 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
512 ; AVX512BW-NEXT: vzeroupper
513 ; AVX512BW-NEXT: retq
515 ; AVX512DQ-LABEL: test4:
516 ; AVX512DQ: ## %bb.0:
517 ; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
518 ; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
519 ; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
520 ; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
521 ; AVX512DQ-NEXT: vpcmpleq %zmm1, %zmm0, %k1
522 ; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
523 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
524 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
525 ; AVX512DQ-NEXT: vzeroupper
526 ; AVX512DQ-NEXT: retq
530 ; X86-NEXT: vpcmpleq %ymm1, %ymm0, %k1
531 ; X86-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
532 ; X86-NEXT: vpmovm2d %k0, %xmm0
533 ; X86-NEXT: vzeroupper
535 %x_gt_y = icmp sgt <4 x i64> %x, %y
536 %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
537 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
538 %resse = sext <4 x i1>%res to <4 x i32>
542 define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
545 ; KNL-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
546 ; KNL-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
547 ; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
548 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
549 ; KNL-NEXT: vpcmpleq %zmm3, %zmm2, %k1
550 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
551 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
552 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
553 ; KNL-NEXT: vzeroupper
558 ; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1
559 ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
560 ; SKX-NEXT: vpmovm2q %k0, %xmm0
563 ; AVX512BW-LABEL: test5:
564 ; AVX512BW: ## %bb.0:
565 ; AVX512BW-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
566 ; AVX512BW-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
567 ; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
568 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
569 ; AVX512BW-NEXT: vpcmpleq %zmm3, %zmm2, %k1
570 ; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
571 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
572 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
573 ; AVX512BW-NEXT: vzeroupper
574 ; AVX512BW-NEXT: retq
576 ; AVX512DQ-LABEL: test5:
577 ; AVX512DQ: ## %bb.0:
578 ; AVX512DQ-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
579 ; AVX512DQ-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
580 ; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
581 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
582 ; AVX512DQ-NEXT: vpcmpleq %zmm3, %zmm2, %k1
583 ; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 {%k1}
584 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
585 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
586 ; AVX512DQ-NEXT: vzeroupper
587 ; AVX512DQ-NEXT: retq
591 ; X86-NEXT: vpcmpleq %xmm3, %xmm2, %k1
592 ; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
593 ; X86-NEXT: vpmovm2q %k0, %xmm0
595 %x_gt_y = icmp slt <2 x i64> %x, %y
596 %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
597 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
598 %resse = sext <2 x i1>%res to <2 x i64>
600 }define void @test6(<16 x i1> %mask) {
602 %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
603 %b = bitcast <16 x i1> %a to i16
604 %c = icmp eq i16 %b, 0
605 br i1 %c, label %true, label %false
613 define void @test7(<8 x i1> %mask) {
615 ; KNL: ## %bb.0: ## %allocas
616 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
617 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
618 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
619 ; KNL-NEXT: kmovw %k0, %eax
620 ; KNL-NEXT: orb $85, %al
621 ; KNL-NEXT: vzeroupper
625 ; SKX: ## %bb.0: ## %allocas
626 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
627 ; SKX-NEXT: vpmovw2m %xmm0, %k0
628 ; SKX-NEXT: kmovd %k0, %eax
629 ; SKX-NEXT: orb $85, %al
632 ; AVX512BW-LABEL: test7:
633 ; AVX512BW: ## %bb.0: ## %allocas
634 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
635 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
636 ; AVX512BW-NEXT: kmovd %k0, %eax
637 ; AVX512BW-NEXT: orb $85, %al
638 ; AVX512BW-NEXT: vzeroupper
639 ; AVX512BW-NEXT: retq
641 ; AVX512DQ-LABEL: test7:
642 ; AVX512DQ: ## %bb.0: ## %allocas
643 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
644 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
645 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
646 ; AVX512DQ-NEXT: kmovw %k0, %eax
647 ; AVX512DQ-NEXT: orb $85, %al
648 ; AVX512DQ-NEXT: vzeroupper
649 ; AVX512DQ-NEXT: retq
652 ; X86: ## %bb.0: ## %allocas
653 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
654 ; X86-NEXT: vpmovw2m %xmm0, %k0
655 ; X86-NEXT: kmovd %k0, %eax
656 ; X86-NEXT: orb $85, %al
659 %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
660 %b = bitcast <8 x i1> %a to i8
661 %c = icmp eq i8 %b, 0
662 br i1 %c, label %true, label %false
670 define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
673 ; KNL-NEXT: cmpl %esi, %edi
674 ; KNL-NEXT: jg LBB17_1
675 ; KNL-NEXT: ## %bb.2:
676 ; KNL-NEXT: kxorw %k0, %k0, %k1
677 ; KNL-NEXT: jmp LBB17_3
679 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
680 ; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
682 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
683 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
684 ; KNL-NEXT: vzeroupper
689 ; SKX-NEXT: cmpl %esi, %edi
690 ; SKX-NEXT: jg LBB17_1
691 ; SKX-NEXT: ## %bb.2:
692 ; SKX-NEXT: kxorw %k0, %k0, %k0
693 ; SKX-NEXT: vpmovm2b %k0, %xmm0
694 ; SKX-NEXT: vzeroupper
697 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
698 ; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
699 ; SKX-NEXT: vpmovm2b %k0, %xmm0
700 ; SKX-NEXT: vzeroupper
703 ; AVX512BW-LABEL: test8:
704 ; AVX512BW: ## %bb.0:
705 ; AVX512BW-NEXT: cmpl %esi, %edi
706 ; AVX512BW-NEXT: jg LBB17_1
707 ; AVX512BW-NEXT: ## %bb.2:
708 ; AVX512BW-NEXT: kxorw %k0, %k0, %k0
709 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
710 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
711 ; AVX512BW-NEXT: vzeroupper
712 ; AVX512BW-NEXT: retq
713 ; AVX512BW-NEXT: LBB17_1:
714 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
715 ; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
716 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
717 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
718 ; AVX512BW-NEXT: vzeroupper
719 ; AVX512BW-NEXT: retq
721 ; AVX512DQ-LABEL: test8:
722 ; AVX512DQ: ## %bb.0:
723 ; AVX512DQ-NEXT: cmpl %esi, %edi
724 ; AVX512DQ-NEXT: jg LBB17_1
725 ; AVX512DQ-NEXT: ## %bb.2:
726 ; AVX512DQ-NEXT: kxorw %k0, %k0, %k0
727 ; AVX512DQ-NEXT: jmp LBB17_3
728 ; AVX512DQ-NEXT: LBB17_1:
729 ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
730 ; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
731 ; AVX512DQ-NEXT: LBB17_3:
732 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
733 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
734 ; AVX512DQ-NEXT: vzeroupper
735 ; AVX512DQ-NEXT: retq
739 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
740 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
741 ; X86-NEXT: jg LBB17_1
742 ; X86-NEXT: ## %bb.2:
743 ; X86-NEXT: kxorw %k0, %k0, %k0
744 ; X86-NEXT: vpmovm2b %k0, %xmm0
745 ; X86-NEXT: vzeroupper
748 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
749 ; X86-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
750 ; X86-NEXT: vpmovm2b %k0, %xmm0
751 ; X86-NEXT: vzeroupper
753 %cond = icmp sgt i32 %a1, %b1
754 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
755 %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
756 %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
757 %res = sext <16 x i1> %mix to <16 x i8>
760 define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
763 ; KNL-NEXT: cmpl %esi, %edi
764 ; KNL-NEXT: jg LBB18_1
765 ; KNL-NEXT: ## %bb.2:
766 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
767 ; KNL-NEXT: jmp LBB18_3
769 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
771 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
772 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
773 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
774 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
775 ; KNL-NEXT: vzeroupper
780 ; SKX-NEXT: cmpl %esi, %edi
781 ; SKX-NEXT: jg LBB18_1
782 ; SKX-NEXT: ## %bb.2:
783 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0
784 ; SKX-NEXT: jmp LBB18_3
786 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
788 ; SKX-NEXT: vpmovb2m %xmm0, %k0
789 ; SKX-NEXT: vpmovm2b %k0, %xmm0
792 ; AVX512BW-LABEL: test9:
793 ; AVX512BW: ## %bb.0:
794 ; AVX512BW-NEXT: cmpl %esi, %edi
795 ; AVX512BW-NEXT: jg LBB18_1
796 ; AVX512BW-NEXT: ## %bb.2:
797 ; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0
798 ; AVX512BW-NEXT: jmp LBB18_3
799 ; AVX512BW-NEXT: LBB18_1:
800 ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
801 ; AVX512BW-NEXT: LBB18_3:
802 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
803 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
804 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
805 ; AVX512BW-NEXT: vzeroupper
806 ; AVX512BW-NEXT: retq
808 ; AVX512DQ-LABEL: test9:
809 ; AVX512DQ: ## %bb.0:
810 ; AVX512DQ-NEXT: cmpl %esi, %edi
811 ; AVX512DQ-NEXT: jg LBB18_1
812 ; AVX512DQ-NEXT: ## %bb.2:
813 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
814 ; AVX512DQ-NEXT: jmp LBB18_3
815 ; AVX512DQ-NEXT: LBB18_1:
816 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
817 ; AVX512DQ-NEXT: LBB18_3:
818 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
819 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
820 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
821 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
822 ; AVX512DQ-NEXT: vzeroupper
823 ; AVX512DQ-NEXT: retq
827 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
828 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
829 ; X86-NEXT: jg LBB18_1
830 ; X86-NEXT: ## %bb.2:
831 ; X86-NEXT: vpsllw $7, %xmm1, %xmm0
832 ; X86-NEXT: jmp LBB18_3
834 ; X86-NEXT: vpsllw $7, %xmm0, %xmm0
836 ; X86-NEXT: vpmovb2m %xmm0, %k0
837 ; X86-NEXT: vpmovm2b %k0, %xmm0
839 %mask = icmp sgt i32 %a1, %b1
840 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
842 }define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
843 %mask = icmp sgt i32 %a1, %b1
844 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
848 define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
851 ; KNL-NEXT: cmpl %esi, %edi
852 ; KNL-NEXT: jg LBB20_1
853 ; KNL-NEXT: ## %bb.2:
854 ; KNL-NEXT: vpslld $31, %xmm1, %xmm0
855 ; KNL-NEXT: jmp LBB20_3
857 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
859 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
860 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
861 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
862 ; KNL-NEXT: vzeroupper
867 ; SKX-NEXT: cmpl %esi, %edi
868 ; SKX-NEXT: jg LBB20_1
869 ; SKX-NEXT: ## %bb.2:
870 ; SKX-NEXT: vpslld $31, %xmm1, %xmm0
871 ; SKX-NEXT: jmp LBB20_3
873 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
875 ; SKX-NEXT: vpmovd2m %xmm0, %k0
876 ; SKX-NEXT: vpmovm2d %k0, %xmm0
879 ; AVX512BW-LABEL: test11:
880 ; AVX512BW: ## %bb.0:
881 ; AVX512BW-NEXT: cmpl %esi, %edi
882 ; AVX512BW-NEXT: jg LBB20_1
883 ; AVX512BW-NEXT: ## %bb.2:
884 ; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0
885 ; AVX512BW-NEXT: jmp LBB20_3
886 ; AVX512BW-NEXT: LBB20_1:
887 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
888 ; AVX512BW-NEXT: LBB20_3:
889 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1
890 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
891 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
892 ; AVX512BW-NEXT: vzeroupper
893 ; AVX512BW-NEXT: retq
895 ; AVX512DQ-LABEL: test11:
896 ; AVX512DQ: ## %bb.0:
897 ; AVX512DQ-NEXT: cmpl %esi, %edi
898 ; AVX512DQ-NEXT: jg LBB20_1
899 ; AVX512DQ-NEXT: ## %bb.2:
900 ; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm0
901 ; AVX512DQ-NEXT: jmp LBB20_3
902 ; AVX512DQ-NEXT: LBB20_1:
903 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
904 ; AVX512DQ-NEXT: LBB20_3:
905 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
906 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
907 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
908 ; AVX512DQ-NEXT: vzeroupper
909 ; AVX512DQ-NEXT: retq
913 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
914 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
915 ; X86-NEXT: jg LBB20_1
916 ; X86-NEXT: ## %bb.2:
917 ; X86-NEXT: vpslld $31, %xmm1, %xmm0
918 ; X86-NEXT: jmp LBB20_3
920 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
922 ; X86-NEXT: vpmovd2m %xmm0, %k0
923 ; X86-NEXT: vpmovm2d %k0, %xmm0
925 %mask = icmp sgt i32 %a1, %b1
926 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
930 define i32 @test12(i32 %x, i32 %y) {
931 ; CHECK-LABEL: test12:
933 ; CHECK-NEXT: movl %edi, %eax
938 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
940 %a = bitcast i16 21845 to <16 x i1>
941 %b = extractelement <16 x i1> %a, i32 0
942 %c = select i1 %b, i32 %x, i32 %y
946 define i32 @test13(i32 %x, i32 %y) {
947 ; CHECK-LABEL: test13:
949 ; CHECK-NEXT: movl %esi, %eax
954 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
956 %a = bitcast i16 21845 to <16 x i1>
957 %b = extractelement <16 x i1> %a, i32 3
958 %c = select i1 %b, i32 %x, i32 %y
962 ; Make sure we don't crash on a large vector.
963 define i32 @test13_crash(i32 %x, i32 %y) {
964 ; CHECK-LABEL: test13_crash:
966 ; CHECK-NEXT: movl %edi, %eax
969 ; X86-LABEL: test13_crash:
971 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
973 %a = bitcast i128 2184568686868686868686868686 to <128 x i1>
974 %b = extractelement <128 x i1> %a, i32 3
975 %c = select i1 %b, i32 %x, i32 %y
979 define <4 x i1> @test14() {
980 ; CHECK-LABEL: test14:
982 ; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,0,1]
987 ; X86-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,0,1]
989 %a = bitcast i16 21845 to <16 x i1>
990 %b = extractelement <16 x i1> %a, i32 2
991 %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
995 define <16 x i1> @test15(i32 %x, i32 %y) {
998 ; KNL-NEXT: cmpl %esi, %edi
999 ; KNL-NEXT: movl $21845, %eax ## imm = 0x5555
1000 ; KNL-NEXT: movl $1, %ecx
1001 ; KNL-NEXT: cmovgl %eax, %ecx
1002 ; KNL-NEXT: kmovw %ecx, %k1
1003 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1004 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1005 ; KNL-NEXT: vzeroupper
1008 ; SKX-LABEL: test15:
1010 ; SKX-NEXT: cmpl %esi, %edi
1011 ; SKX-NEXT: movl $21845, %eax ## imm = 0x5555
1012 ; SKX-NEXT: movl $1, %ecx
1013 ; SKX-NEXT: cmovgl %eax, %ecx
1014 ; SKX-NEXT: kmovd %ecx, %k0
1015 ; SKX-NEXT: vpmovm2b %k0, %xmm0
1018 ; AVX512BW-LABEL: test15:
1019 ; AVX512BW: ## %bb.0:
1020 ; AVX512BW-NEXT: cmpl %esi, %edi
1021 ; AVX512BW-NEXT: movl $21845, %eax ## imm = 0x5555
1022 ; AVX512BW-NEXT: movl $1, %ecx
1023 ; AVX512BW-NEXT: cmovgl %eax, %ecx
1024 ; AVX512BW-NEXT: kmovd %ecx, %k0
1025 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
1026 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
1027 ; AVX512BW-NEXT: vzeroupper
1028 ; AVX512BW-NEXT: retq
1030 ; AVX512DQ-LABEL: test15:
1031 ; AVX512DQ: ## %bb.0:
1032 ; AVX512DQ-NEXT: cmpl %esi, %edi
1033 ; AVX512DQ-NEXT: movl $21845, %eax ## imm = 0x5555
1034 ; AVX512DQ-NEXT: movl $1, %ecx
1035 ; AVX512DQ-NEXT: cmovgl %eax, %ecx
1036 ; AVX512DQ-NEXT: kmovw %ecx, %k0
1037 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1038 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1039 ; AVX512DQ-NEXT: vzeroupper
1040 ; AVX512DQ-NEXT: retq
1042 ; X86-LABEL: test15:
1044 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1045 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
1046 ; X86-NEXT: movl $21845, %eax ## imm = 0x5555
1047 ; X86-NEXT: movl $1, %ecx
1048 ; X86-NEXT: cmovgl %eax, %ecx
1049 ; X86-NEXT: kmovd %ecx, %k0
1050 ; X86-NEXT: vpmovm2b %k0, %xmm0
1052 %a = bitcast i16 21845 to <16 x i1>
1053 %b = bitcast i16 1 to <16 x i1>
1054 %mask = icmp sgt i32 %x, %y
1055 %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
1059 define <64 x i8> @test16(i64 %x) {
1061 ; KNL-LABEL: test16:
1063 ; KNL-NEXT: movq %rdi, %rax
1064 ; KNL-NEXT: movl %edi, %ecx
1065 ; KNL-NEXT: kmovw %edi, %k0
1066 ; KNL-NEXT: shrq $32, %rdi
1067 ; KNL-NEXT: shrq $48, %rax
1068 ; KNL-NEXT: shrl $16, %ecx
1069 ; KNL-NEXT: kmovw %ecx, %k1
1070 ; KNL-NEXT: kmovw %eax, %k2
1071 ; KNL-NEXT: kmovw %edi, %k3
1072 ; KNL-NEXT: kshiftrw $5, %k0, %k4
1073 ; KNL-NEXT: kxnorw %k0, %k0, %k5
1074 ; KNL-NEXT: kxorw %k5, %k4, %k4
1075 ; KNL-NEXT: kshiftlw $15, %k4, %k4
1076 ; KNL-NEXT: kshiftrw $10, %k4, %k4
1077 ; KNL-NEXT: kxorw %k4, %k0, %k4
1078 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
1079 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1080 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
1081 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1082 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1083 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z}
1084 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1085 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
1086 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
1087 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1088 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1091 ; SKX-LABEL: test16:
1093 ; SKX-NEXT: kmovq %rdi, %k0
1094 ; SKX-NEXT: kxnorw %k0, %k0, %k1
1095 ; SKX-NEXT: kshiftrq $5, %k0, %k2
1096 ; SKX-NEXT: kxorq %k1, %k2, %k1
1097 ; SKX-NEXT: kshiftlq $63, %k1, %k1
1098 ; SKX-NEXT: kshiftrq $58, %k1, %k1
1099 ; SKX-NEXT: kxorq %k1, %k0, %k0
1100 ; SKX-NEXT: vpmovm2b %k0, %zmm0
1103 ; AVX512BW-LABEL: test16:
1104 ; AVX512BW: ## %bb.0:
1105 ; AVX512BW-NEXT: kmovq %rdi, %k0
1106 ; AVX512BW-NEXT: kxnorw %k0, %k0, %k1
1107 ; AVX512BW-NEXT: kshiftrq $5, %k0, %k2
1108 ; AVX512BW-NEXT: kxorq %k1, %k2, %k1
1109 ; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
1110 ; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
1111 ; AVX512BW-NEXT: kxorq %k1, %k0, %k0
1112 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
1113 ; AVX512BW-NEXT: retq
1115 ; AVX512DQ-LABEL: test16:
1116 ; AVX512DQ: ## %bb.0:
1117 ; AVX512DQ-NEXT: movq %rdi, %rax
1118 ; AVX512DQ-NEXT: movl %edi, %ecx
1119 ; AVX512DQ-NEXT: kmovw %edi, %k0
1120 ; AVX512DQ-NEXT: shrq $32, %rdi
1121 ; AVX512DQ-NEXT: shrq $48, %rax
1122 ; AVX512DQ-NEXT: shrl $16, %ecx
1123 ; AVX512DQ-NEXT: kmovw %ecx, %k1
1124 ; AVX512DQ-NEXT: kmovw %eax, %k2
1125 ; AVX512DQ-NEXT: kmovw %edi, %k3
1126 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k4
1127 ; AVX512DQ-NEXT: kxnorw %k0, %k0, %k5
1128 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
1129 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
1130 ; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
1131 ; AVX512DQ-NEXT: kxorw %k4, %k0, %k0
1132 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0
1133 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1134 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
1135 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1136 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1137 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
1138 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1139 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2
1140 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
1141 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1142 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1143 ; AVX512DQ-NEXT: retq
1145 ; X86-LABEL: test16:
1147 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
1148 ; X86-NEXT: kshiftrq $5, %k0, %k1
1149 ; X86-NEXT: kxnorw %k0, %k0, %k2
1150 ; X86-NEXT: kxorq %k2, %k1, %k1
1151 ; X86-NEXT: kshiftlq $63, %k1, %k1
1152 ; X86-NEXT: kshiftrq $58, %k1, %k1
1153 ; X86-NEXT: kxorq %k1, %k0, %k0
1154 ; X86-NEXT: vpmovm2b %k0, %zmm0
1156 %a = bitcast i64 %x to <64 x i1>
1157 %b = insertelement <64 x i1>%a, i1 true, i32 5
1158 %c = sext <64 x i1>%b to <64 x i8>
1162 define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
1164 ; KNL-LABEL: test17:
1166 ; KNL-NEXT: movq %rdi, %rax
1167 ; KNL-NEXT: movl %edi, %ecx
1168 ; KNL-NEXT: kmovw %edi, %k0
1169 ; KNL-NEXT: shrq $32, %rdi
1170 ; KNL-NEXT: shrq $48, %rax
1171 ; KNL-NEXT: shrl $16, %ecx
1172 ; KNL-NEXT: kmovw %ecx, %k1
1173 ; KNL-NEXT: kmovw %eax, %k2
1174 ; KNL-NEXT: kmovw %edi, %k3
1175 ; KNL-NEXT: cmpl %edx, %esi
1176 ; KNL-NEXT: setg %al
1177 ; KNL-NEXT: kshiftrw $5, %k0, %k4
1178 ; KNL-NEXT: kmovw %eax, %k5
1179 ; KNL-NEXT: kxorw %k5, %k4, %k4
1180 ; KNL-NEXT: kshiftlw $15, %k4, %k4
1181 ; KNL-NEXT: kshiftrw $10, %k4, %k4
1182 ; KNL-NEXT: kxorw %k4, %k0, %k4
1183 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
1184 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1185 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
1186 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1187 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1188 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z}
1189 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1190 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
1191 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
1192 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1193 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1196 ; SKX-LABEL: test17:
1198 ; SKX-NEXT: kmovq %rdi, %k0
1199 ; SKX-NEXT: cmpl %edx, %esi
1200 ; SKX-NEXT: setg %al
1201 ; SKX-NEXT: kmovd %eax, %k1
1202 ; SKX-NEXT: kshiftrq $5, %k0, %k2
1203 ; SKX-NEXT: kxorq %k1, %k2, %k1
1204 ; SKX-NEXT: kshiftlq $63, %k1, %k1
1205 ; SKX-NEXT: kshiftrq $58, %k1, %k1
1206 ; SKX-NEXT: kxorq %k1, %k0, %k0
1207 ; SKX-NEXT: vpmovm2b %k0, %zmm0
1210 ; AVX512BW-LABEL: test17:
1211 ; AVX512BW: ## %bb.0:
1212 ; AVX512BW-NEXT: kmovq %rdi, %k0
1213 ; AVX512BW-NEXT: cmpl %edx, %esi
1214 ; AVX512BW-NEXT: setg %al
1215 ; AVX512BW-NEXT: kmovd %eax, %k1
1216 ; AVX512BW-NEXT: kshiftrq $5, %k0, %k2
1217 ; AVX512BW-NEXT: kxorq %k1, %k2, %k1
1218 ; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
1219 ; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
1220 ; AVX512BW-NEXT: kxorq %k1, %k0, %k0
1221 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
1222 ; AVX512BW-NEXT: retq
1224 ; AVX512DQ-LABEL: test17:
1225 ; AVX512DQ: ## %bb.0:
1226 ; AVX512DQ-NEXT: movq %rdi, %rax
1227 ; AVX512DQ-NEXT: movl %edi, %ecx
1228 ; AVX512DQ-NEXT: kmovw %edi, %k0
1229 ; AVX512DQ-NEXT: shrq $32, %rdi
1230 ; AVX512DQ-NEXT: shrq $48, %rax
1231 ; AVX512DQ-NEXT: shrl $16, %ecx
1232 ; AVX512DQ-NEXT: kmovw %ecx, %k1
1233 ; AVX512DQ-NEXT: kmovw %eax, %k2
1234 ; AVX512DQ-NEXT: kmovw %edi, %k3
1235 ; AVX512DQ-NEXT: cmpl %edx, %esi
1236 ; AVX512DQ-NEXT: setg %al
1237 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k4
1238 ; AVX512DQ-NEXT: kmovw %eax, %k5
1239 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
1240 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
1241 ; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
1242 ; AVX512DQ-NEXT: kxorw %k4, %k0, %k0
1243 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0
1244 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1245 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
1246 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1247 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1248 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
1249 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1250 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2
1251 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
1252 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1253 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1254 ; AVX512DQ-NEXT: retq
1256 ; X86-LABEL: test17:
1258 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1259 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
1260 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
1261 ; X86-NEXT: setg %al
1262 ; X86-NEXT: kmovd %eax, %k1
1263 ; X86-NEXT: kshiftrq $5, %k0, %k2
1264 ; X86-NEXT: kxorq %k1, %k2, %k1
1265 ; X86-NEXT: kshiftlq $63, %k1, %k1
1266 ; X86-NEXT: kshiftrq $58, %k1, %k1
1267 ; X86-NEXT: kxorq %k1, %k0, %k0
1268 ; X86-NEXT: vpmovm2b %k0, %zmm0
1270 %a = bitcast i64 %x to <64 x i1>
1271 %b = icmp sgt i32 %y, %z
1272 %c = insertelement <64 x i1>%a, i1 %b, i32 5
1273 %d = sext <64 x i1>%c to <64 x i8>
1277 define <8 x i1> @test18(i8 %a, i16 %y) {
1278 ; KNL-LABEL: test18:
1280 ; KNL-NEXT: kmovw %edi, %k0
1281 ; KNL-NEXT: kmovw %esi, %k1
1282 ; KNL-NEXT: kshiftrw $8, %k1, %k2
1283 ; KNL-NEXT: kshiftrw $9, %k1, %k1
1284 ; KNL-NEXT: kshiftrw $6, %k0, %k3
1285 ; KNL-NEXT: kxorw %k1, %k3, %k1
1286 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1287 ; KNL-NEXT: kshiftrw $9, %k1, %k1
1288 ; KNL-NEXT: kxorw %k1, %k0, %k0
1289 ; KNL-NEXT: kshiftlw $9, %k0, %k0
1290 ; KNL-NEXT: kshiftrw $9, %k0, %k0
1291 ; KNL-NEXT: kshiftlw $7, %k2, %k1
1292 ; KNL-NEXT: korw %k1, %k0, %k1
1293 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1294 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
1295 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
1296 ; KNL-NEXT: vzeroupper
1299 ; SKX-LABEL: test18:
1301 ; SKX-NEXT: kmovd %edi, %k0
1302 ; SKX-NEXT: kmovd %esi, %k1
1303 ; SKX-NEXT: kshiftrw $8, %k1, %k2
1304 ; SKX-NEXT: kshiftrw $9, %k1, %k1
1305 ; SKX-NEXT: kshiftrb $6, %k0, %k3
1306 ; SKX-NEXT: kxorb %k1, %k3, %k1
1307 ; SKX-NEXT: kshiftlb $7, %k1, %k1
1308 ; SKX-NEXT: kshiftrb $1, %k1, %k1
1309 ; SKX-NEXT: kxorb %k1, %k0, %k0
1310 ; SKX-NEXT: kshiftlb $1, %k0, %k0
1311 ; SKX-NEXT: kshiftrb $1, %k0, %k0
1312 ; SKX-NEXT: kshiftlb $7, %k2, %k1
1313 ; SKX-NEXT: korb %k1, %k0, %k0
1314 ; SKX-NEXT: vpmovm2w %k0, %xmm0
1317 ; AVX512BW-LABEL: test18:
1318 ; AVX512BW: ## %bb.0:
1319 ; AVX512BW-NEXT: kmovd %edi, %k0
1320 ; AVX512BW-NEXT: kmovd %esi, %k1
1321 ; AVX512BW-NEXT: kshiftrw $8, %k1, %k2
1322 ; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
1323 ; AVX512BW-NEXT: kshiftrw $6, %k0, %k3
1324 ; AVX512BW-NEXT: kxorw %k1, %k3, %k1
1325 ; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
1326 ; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
1327 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
1328 ; AVX512BW-NEXT: kshiftlw $9, %k0, %k0
1329 ; AVX512BW-NEXT: kshiftrw $9, %k0, %k0
1330 ; AVX512BW-NEXT: kshiftlw $7, %k2, %k1
1331 ; AVX512BW-NEXT: korw %k1, %k0, %k0
1332 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
1333 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
1334 ; AVX512BW-NEXT: vzeroupper
1335 ; AVX512BW-NEXT: retq
1337 ; AVX512DQ-LABEL: test18:
1338 ; AVX512DQ: ## %bb.0:
1339 ; AVX512DQ-NEXT: kmovw %edi, %k0
1340 ; AVX512DQ-NEXT: kmovw %esi, %k1
1341 ; AVX512DQ-NEXT: kshiftrw $8, %k1, %k2
1342 ; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
1343 ; AVX512DQ-NEXT: kshiftrb $6, %k0, %k3
1344 ; AVX512DQ-NEXT: kxorb %k1, %k3, %k1
1345 ; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
1346 ; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1
1347 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
1348 ; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
1349 ; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0
1350 ; AVX512DQ-NEXT: kshiftlb $7, %k2, %k1
1351 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
1352 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1353 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
1354 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
1355 ; AVX512DQ-NEXT: vzeroupper
1356 ; AVX512DQ-NEXT: retq
1358 ; X86-LABEL: test18:
1360 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
1361 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1362 ; X86-NEXT: kshiftrw $8, %k1, %k2
1363 ; X86-NEXT: kshiftrw $9, %k1, %k1
1364 ; X86-NEXT: kshiftrb $6, %k0, %k3
1365 ; X86-NEXT: kxorb %k1, %k3, %k1
1366 ; X86-NEXT: kshiftlb $7, %k1, %k1
1367 ; X86-NEXT: kshiftrb $1, %k1, %k1
1368 ; X86-NEXT: kxorb %k1, %k0, %k0
1369 ; X86-NEXT: kshiftlb $1, %k0, %k0
1370 ; X86-NEXT: kshiftrb $1, %k0, %k0
1371 ; X86-NEXT: kshiftlb $7, %k2, %k1
1372 ; X86-NEXT: korb %k1, %k0, %k0
1373 ; X86-NEXT: vpmovm2w %k0, %xmm0
1375 %b = bitcast i8 %a to <8 x i1>
1376 %b1 = bitcast i16 %y to <16 x i1>
1377 %el1 = extractelement <16 x i1>%b1, i32 8
1378 %el2 = extractelement <16 x i1>%b1, i32 9
1379 %c = insertelement <8 x i1>%b, i1 %el1, i32 7
1380 %d = insertelement <8 x i1>%c, i1 %el2, i32 6
1383 define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
1384 ; KNL-LABEL: test21:
1386 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1387 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
1388 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
1389 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1390 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
1391 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
1392 ; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1
1393 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
1394 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
1395 ; KNL-NEXT: vpand %ymm0, %ymm2, %ymm0
1396 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1399 ; SKX-LABEL: test21:
1401 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
1402 ; SKX-NEXT: vpmovb2m %ymm1, %k1
1403 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1406 ; AVX512BW-LABEL: test21:
1407 ; AVX512BW: ## %bb.0:
1408 ; AVX512BW-NEXT: vpsllw $7, %ymm1, %ymm1
1409 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
1410 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1411 ; AVX512BW-NEXT: retq
1413 ; AVX512DQ-LABEL: test21:
1414 ; AVX512DQ: ## %bb.0:
1415 ; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1416 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3
1417 ; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1
1418 ; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1419 ; AVX512DQ-NEXT: vpsllw $15, %ymm1, %ymm1
1420 ; AVX512DQ-NEXT: vpsraw $15, %ymm1, %ymm1
1421 ; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm1
1422 ; AVX512DQ-NEXT: vpsllw $15, %ymm2, %ymm2
1423 ; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2
1424 ; AVX512DQ-NEXT: vpand %ymm0, %ymm2, %ymm0
1425 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1426 ; AVX512DQ-NEXT: retq
1428 ; X86-LABEL: test21:
1430 ; X86-NEXT: vpsllw $7, %ymm1, %ymm1
1431 ; X86-NEXT: vpmovb2m %ymm1, %k1
1432 ; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1434 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
1438 define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
1439 ; KNL-LABEL: test22:
1441 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1442 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1443 ; KNL-NEXT: kmovw %k0, %eax
1444 ; KNL-NEXT: movb %al, (%rdi)
1445 ; KNL-NEXT: vzeroupper
1448 ; SKX-LABEL: test22:
1450 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
1451 ; SKX-NEXT: vpmovd2m %xmm0, %k0
1452 ; SKX-NEXT: kmovb %k0, (%rdi)
1455 ; AVX512BW-LABEL: test22:
1456 ; AVX512BW: ## %bb.0:
1457 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
1458 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
1459 ; AVX512BW-NEXT: kmovd %k0, %eax
1460 ; AVX512BW-NEXT: movb %al, (%rdi)
1461 ; AVX512BW-NEXT: vzeroupper
1462 ; AVX512BW-NEXT: retq
1464 ; AVX512DQ-LABEL: test22:
1465 ; AVX512DQ: ## %bb.0:
1466 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1467 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1468 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1469 ; AVX512DQ-NEXT: vzeroupper
1470 ; AVX512DQ-NEXT: retq
1472 ; X86-LABEL: test22:
1474 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
1475 ; X86-NEXT: vpmovd2m %xmm0, %k0
1476 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1477 ; X86-NEXT: kmovb %k0, (%eax)
1479 store <4 x i1> %a, <4 x i1>* %addr
1483 define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
1484 ; KNL-LABEL: test23:
1486 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1487 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1488 ; KNL-NEXT: kmovw %k0, %eax
1489 ; KNL-NEXT: movb %al, (%rdi)
1490 ; KNL-NEXT: vzeroupper
1493 ; SKX-LABEL: test23:
1495 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
1496 ; SKX-NEXT: vpmovq2m %xmm0, %k0
1497 ; SKX-NEXT: kmovb %k0, (%rdi)
1500 ; AVX512BW-LABEL: test23:
1501 ; AVX512BW: ## %bb.0:
1502 ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
1503 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
1504 ; AVX512BW-NEXT: kmovd %k0, %eax
1505 ; AVX512BW-NEXT: movb %al, (%rdi)
1506 ; AVX512BW-NEXT: vzeroupper
1507 ; AVX512BW-NEXT: retq
1509 ; AVX512DQ-LABEL: test23:
1510 ; AVX512DQ: ## %bb.0:
1511 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1512 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
1513 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1514 ; AVX512DQ-NEXT: vzeroupper
1515 ; AVX512DQ-NEXT: retq
1517 ; X86-LABEL: test23:
1519 ; X86-NEXT: vpsllq $63, %xmm0, %xmm0
1520 ; X86-NEXT: vpmovq2m %xmm0, %k0
1521 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1522 ; X86-NEXT: kmovb %k0, (%eax)
1524 store <2 x i1> %a, <2 x i1>* %addr
1528 define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
1529 ; KNL-LABEL: store_v1i1:
1531 ; KNL-NEXT: kmovw %edi, %k0
1532 ; KNL-NEXT: kxnorw %k0, %k0, %k1
1533 ; KNL-NEXT: kxorw %k1, %k0, %k0
1534 ; KNL-NEXT: kmovw %k0, %eax
1535 ; KNL-NEXT: movb %al, (%rsi)
1538 ; SKX-LABEL: store_v1i1:
1540 ; SKX-NEXT: kmovd %edi, %k0
1541 ; SKX-NEXT: kxnorw %k0, %k0, %k1
1542 ; SKX-NEXT: kxorw %k1, %k0, %k0
1543 ; SKX-NEXT: kmovb %k0, (%rsi)
1546 ; AVX512BW-LABEL: store_v1i1:
1547 ; AVX512BW: ## %bb.0:
1548 ; AVX512BW-NEXT: kmovd %edi, %k0
1549 ; AVX512BW-NEXT: kxnorw %k0, %k0, %k1
1550 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
1551 ; AVX512BW-NEXT: kmovd %k0, %eax
1552 ; AVX512BW-NEXT: movb %al, (%rsi)
1553 ; AVX512BW-NEXT: retq
1555 ; AVX512DQ-LABEL: store_v1i1:
1556 ; AVX512DQ: ## %bb.0:
1557 ; AVX512DQ-NEXT: kmovw %edi, %k0
1558 ; AVX512DQ-NEXT: kxnorw %k0, %k0, %k1
1559 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
1560 ; AVX512DQ-NEXT: kmovb %k0, (%rsi)
1561 ; AVX512DQ-NEXT: retq
1563 ; X86-LABEL: store_v1i1:
1565 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1566 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1567 ; X86-NEXT: kxnorw %k0, %k0, %k1
1568 ; X86-NEXT: kxorw %k1, %k0, %k0
1569 ; X86-NEXT: kmovb %k0, (%eax)
1571 %x = xor <1 x i1> %c, <i1 1>
1572 store <1 x i1> %x, <1 x i1>* %ptr, align 4
1576 define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
1577 ; KNL-LABEL: store_v2i1:
1579 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1580 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
1581 ; KNL-NEXT: kmovw %k0, %eax
1582 ; KNL-NEXT: movb %al, (%rdi)
1583 ; KNL-NEXT: vzeroupper
1586 ; SKX-LABEL: store_v2i1:
1588 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
1589 ; SKX-NEXT: vpmovq2m %xmm0, %k0
1590 ; SKX-NEXT: knotw %k0, %k0
1591 ; SKX-NEXT: kmovb %k0, (%rdi)
1594 ; AVX512BW-LABEL: store_v2i1:
1595 ; AVX512BW: ## %bb.0:
1596 ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
1597 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
1598 ; AVX512BW-NEXT: kmovd %k0, %eax
1599 ; AVX512BW-NEXT: movb %al, (%rdi)
1600 ; AVX512BW-NEXT: vzeroupper
1601 ; AVX512BW-NEXT: retq
1603 ; AVX512DQ-LABEL: store_v2i1:
1604 ; AVX512DQ: ## %bb.0:
1605 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1606 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
1607 ; AVX512DQ-NEXT: knotw %k0, %k0
1608 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1609 ; AVX512DQ-NEXT: vzeroupper
1610 ; AVX512DQ-NEXT: retq
1612 ; X86-LABEL: store_v2i1:
1614 ; X86-NEXT: vpsllq $63, %xmm0, %xmm0
1615 ; X86-NEXT: vpmovq2m %xmm0, %k0
1616 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1617 ; X86-NEXT: knotw %k0, %k0
1618 ; X86-NEXT: kmovb %k0, (%eax)
1620 %x = xor <2 x i1> %c, <i1 1, i1 1>
1621 store <2 x i1> %x, <2 x i1>* %ptr, align 4
1625 define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
1626 ; KNL-LABEL: store_v4i1:
1628 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1629 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
1630 ; KNL-NEXT: kmovw %k0, %eax
1631 ; KNL-NEXT: movb %al, (%rdi)
1632 ; KNL-NEXT: vzeroupper
1635 ; SKX-LABEL: store_v4i1:
1637 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
1638 ; SKX-NEXT: vpmovd2m %xmm0, %k0
1639 ; SKX-NEXT: knotw %k0, %k0
1640 ; SKX-NEXT: kmovb %k0, (%rdi)
1643 ; AVX512BW-LABEL: store_v4i1:
1644 ; AVX512BW: ## %bb.0:
1645 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
1646 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
1647 ; AVX512BW-NEXT: kmovd %k0, %eax
1648 ; AVX512BW-NEXT: movb %al, (%rdi)
1649 ; AVX512BW-NEXT: vzeroupper
1650 ; AVX512BW-NEXT: retq
1652 ; AVX512DQ-LABEL: store_v4i1:
1653 ; AVX512DQ: ## %bb.0:
1654 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1655 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1656 ; AVX512DQ-NEXT: knotw %k0, %k0
1657 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1658 ; AVX512DQ-NEXT: vzeroupper
1659 ; AVX512DQ-NEXT: retq
1661 ; X86-LABEL: store_v4i1:
1663 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
1664 ; X86-NEXT: vpmovd2m %xmm0, %k0
1665 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1666 ; X86-NEXT: knotw %k0, %k0
1667 ; X86-NEXT: kmovb %k0, (%eax)
1669 %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
1670 store <4 x i1> %x, <4 x i1>* %ptr, align 4
1674 define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
1675 ; KNL-LABEL: store_v8i1:
1677 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1678 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1679 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
1680 ; KNL-NEXT: kmovw %k0, %eax
1681 ; KNL-NEXT: movb %al, (%rdi)
1682 ; KNL-NEXT: vzeroupper
1685 ; SKX-LABEL: store_v8i1:
1687 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1688 ; SKX-NEXT: vpmovw2m %xmm0, %k0
1689 ; SKX-NEXT: knotb %k0, %k0
1690 ; SKX-NEXT: kmovb %k0, (%rdi)
1693 ; AVX512BW-LABEL: store_v8i1:
1694 ; AVX512BW: ## %bb.0:
1695 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
1696 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
1697 ; AVX512BW-NEXT: knotw %k0, %k0
1698 ; AVX512BW-NEXT: kmovd %k0, %eax
1699 ; AVX512BW-NEXT: movb %al, (%rdi)
1700 ; AVX512BW-NEXT: vzeroupper
1701 ; AVX512BW-NEXT: retq
1703 ; AVX512DQ-LABEL: store_v8i1:
1704 ; AVX512DQ: ## %bb.0:
1705 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
1706 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
1707 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
1708 ; AVX512DQ-NEXT: knotb %k0, %k0
1709 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1710 ; AVX512DQ-NEXT: vzeroupper
1711 ; AVX512DQ-NEXT: retq
1713 ; X86-LABEL: store_v8i1:
1715 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
1716 ; X86-NEXT: vpmovw2m %xmm0, %k0
1717 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1718 ; X86-NEXT: knotb %k0, %k0
1719 ; X86-NEXT: kmovb %k0, (%eax)
1721 %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
1722 store <8 x i1> %x, <8 x i1>* %ptr, align 4
1726 define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
1727 ; KNL-LABEL: store_v16i1:
1729 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1730 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1731 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
1732 ; KNL-NEXT: kmovw %k0, (%rdi)
1733 ; KNL-NEXT: vzeroupper
1736 ; SKX-LABEL: store_v16i1:
1738 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
1739 ; SKX-NEXT: vpmovb2m %xmm0, %k0
1740 ; SKX-NEXT: knotw %k0, %k0
1741 ; SKX-NEXT: kmovw %k0, (%rdi)
1744 ; AVX512BW-LABEL: store_v16i1:
1745 ; AVX512BW: ## %bb.0:
1746 ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
1747 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
1748 ; AVX512BW-NEXT: knotw %k0, %k0
1749 ; AVX512BW-NEXT: kmovw %k0, (%rdi)
1750 ; AVX512BW-NEXT: vzeroupper
1751 ; AVX512BW-NEXT: retq
1753 ; AVX512DQ-LABEL: store_v16i1:
1754 ; AVX512DQ: ## %bb.0:
1755 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
1756 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
1757 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1758 ; AVX512DQ-NEXT: knotw %k0, %k0
1759 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
1760 ; AVX512DQ-NEXT: vzeroupper
1761 ; AVX512DQ-NEXT: retq
1763 ; X86-LABEL: store_v16i1:
1765 ; X86-NEXT: vpsllw $7, %xmm0, %xmm0
1766 ; X86-NEXT: vpmovb2m %xmm0, %k0
1767 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1768 ; X86-NEXT: knotw %k0, %k0
1769 ; X86-NEXT: kmovw %k0, (%eax)
1771 %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
1772 store <16 x i1> %x, <16 x i1>* %ptr, align 4
1787 @f1.v = internal unnamed_addr global i1 false, align 4
1789 define void @f1(i32 %c) {
1791 ; CHECK: ## %bb.0: ## %entry
1792 ; CHECK-NEXT: movzbl {{.*}}(%rip), %edi
1793 ; CHECK-NEXT: xorl $1, %edi
1794 ; CHECK-NEXT: movb %dil, {{.*}}(%rip)
1795 ; CHECK-NEXT: jmp _f2 ## TAILCALL
1798 ; X86: ## %bb.0: ## %entry
1799 ; X86-NEXT: subl $12, %esp
1800 ; X86-NEXT: .cfi_def_cfa_offset 16
1801 ; X86-NEXT: movzbl _f1.v, %eax
1802 ; X86-NEXT: xorl $1, %eax
1803 ; X86-NEXT: movb %al, _f1.v
1804 ; X86-NEXT: movl %eax, (%esp)
1805 ; X86-NEXT: calll _f2
1806 ; X86-NEXT: addl $12, %esp
1809 %.b1 = load i1, i1* @f1.v, align 4
1810 %not..b1 = xor i1 %.b1, true
1811 store i1 %not..b1, i1* @f1.v, align 4
1812 %0 = zext i1 %not..b1 to i32
1813 tail call void @f2(i32 %0) #2
1817 declare void @f2(i32) #1
1819 define void @store_i16_i1(i16 %x, i1 *%y) {
1820 ; CHECK-LABEL: store_i16_i1:
1822 ; CHECK-NEXT: andl $1, %edi
1823 ; CHECK-NEXT: movb %dil, (%rsi)
1826 ; X86-LABEL: store_i16_i1:
1828 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1829 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
1830 ; X86-NEXT: andl $1, %ecx
1831 ; X86-NEXT: movb %cl, (%eax)
1833 %c = trunc i16 %x to i1
1838 define void @store_i8_i1(i8 %x, i1 *%y) {
1839 ; CHECK-LABEL: store_i8_i1:
1841 ; CHECK-NEXT: andl $1, %edi
1842 ; CHECK-NEXT: movb %dil, (%rsi)
1845 ; X86-LABEL: store_i8_i1:
1847 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1848 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
1849 ; X86-NEXT: andb $1, %cl
1850 ; X86-NEXT: movb %cl, (%eax)
1852 %c = trunc i8 %x to i1
1857 define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
1858 ; KNL-LABEL: test_build_vec_v32i1:
1860 ; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
1861 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1862 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1863 ; KNL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1866 ; SKX-LABEL: test_build_vec_v32i1:
1868 ; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
1871 ; AVX512BW-LABEL: test_build_vec_v32i1:
1872 ; AVX512BW: ## %bb.0:
1873 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1874 ; AVX512BW-NEXT: retq
1876 ; AVX512DQ-LABEL: test_build_vec_v32i1:
1877 ; AVX512DQ: ## %bb.0:
1878 ; AVX512DQ-NEXT: vextractf64x4 $1, %zmm0, %ymm1
1879 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1880 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1881 ; AVX512DQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1882 ; AVX512DQ-NEXT: retq
1884 ; X86-LABEL: test_build_vec_v32i1:
1886 ; X86-NEXT: vandps LCPI40_0, %zmm0, %zmm0
1888 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
1892 define <32 x i16> @test_build_vec_v32i1_optsize(<32 x i16> %x) optsize {
1893 ; KNL-LABEL: test_build_vec_v32i1_optsize:
1895 ; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
1896 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1897 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1898 ; KNL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1901 ; SKX-LABEL: test_build_vec_v32i1_optsize:
1903 ; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495
1904 ; SKX-NEXT: kmovd %eax, %k1
1905 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1908 ; AVX512BW-LABEL: test_build_vec_v32i1_optsize:
1909 ; AVX512BW: ## %bb.0:
1910 ; AVX512BW-NEXT: movl $1497715861, %eax ## imm = 0x59455495
1911 ; AVX512BW-NEXT: kmovd %eax, %k1
1912 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1913 ; AVX512BW-NEXT: retq
1915 ; AVX512DQ-LABEL: test_build_vec_v32i1_optsize:
1916 ; AVX512DQ: ## %bb.0:
1917 ; AVX512DQ-NEXT: vextractf64x4 $1, %zmm0, %ymm1
1918 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1919 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1920 ; AVX512DQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1921 ; AVX512DQ-NEXT: retq
1923 ; X86-LABEL: test_build_vec_v32i1_optsize:
1925 ; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495
1926 ; X86-NEXT: kmovd %eax, %k1
1927 ; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1929 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
1933 define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
1934 ; KNL-LABEL: test_build_vec_v64i1:
1936 ; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
1937 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1938 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1939 ; KNL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1942 ; SKX-LABEL: test_build_vec_v64i1:
1944 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
1947 ; AVX512BW-LABEL: test_build_vec_v64i1:
1948 ; AVX512BW: ## %bb.0:
1949 ; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
1950 ; AVX512BW-NEXT: retq
1952 ; AVX512DQ-LABEL: test_build_vec_v64i1:
1953 ; AVX512DQ: ## %bb.0:
1954 ; AVX512DQ-NEXT: vextractf64x4 $1, %zmm0, %ymm1
1955 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1956 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1957 ; AVX512DQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1958 ; AVX512DQ-NEXT: retq
1960 ; X86-LABEL: test_build_vec_v64i1:
1962 ; X86-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
1964 %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
1968 define void @ktest_1(<8 x double> %in, double * %base) {
1969 ; KNL-LABEL: ktest_1:
1971 ; KNL-NEXT: vmovupd (%rdi), %zmm1
1972 ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k1
1973 ; KNL-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
1974 ; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
1975 ; KNL-NEXT: kmovw %k0, %eax
1976 ; KNL-NEXT: testb %al, %al
1977 ; KNL-NEXT: je LBB43_2
1978 ; KNL-NEXT: ## %bb.1: ## %L1
1979 ; KNL-NEXT: vmovapd %zmm0, (%rdi)
1980 ; KNL-NEXT: vzeroupper
1982 ; KNL-NEXT: LBB43_2: ## %L2
1983 ; KNL-NEXT: vmovapd %zmm0, 8(%rdi)
1984 ; KNL-NEXT: vzeroupper
1987 ; SKX-LABEL: ktest_1:
1989 ; SKX-NEXT: vmovupd (%rdi), %zmm1
1990 ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1
1991 ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
1992 ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
1993 ; SKX-NEXT: kortestb %k0, %k0
1994 ; SKX-NEXT: je LBB43_2
1995 ; SKX-NEXT: ## %bb.1: ## %L1
1996 ; SKX-NEXT: vmovapd %zmm0, (%rdi)
1997 ; SKX-NEXT: vzeroupper
1999 ; SKX-NEXT: LBB43_2: ## %L2
2000 ; SKX-NEXT: vmovapd %zmm0, 8(%rdi)
2001 ; SKX-NEXT: vzeroupper
2004 ; AVX512BW-LABEL: ktest_1:
2005 ; AVX512BW: ## %bb.0:
2006 ; AVX512BW-NEXT: vmovupd (%rdi), %zmm1
2007 ; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
2008 ; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
2009 ; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
2010 ; AVX512BW-NEXT: kmovd %k0, %eax
2011 ; AVX512BW-NEXT: testb %al, %al
2012 ; AVX512BW-NEXT: je LBB43_2
2013 ; AVX512BW-NEXT: ## %bb.1: ## %L1
2014 ; AVX512BW-NEXT: vmovapd %zmm0, (%rdi)
2015 ; AVX512BW-NEXT: vzeroupper
2016 ; AVX512BW-NEXT: retq
2017 ; AVX512BW-NEXT: LBB43_2: ## %L2
2018 ; AVX512BW-NEXT: vmovapd %zmm0, 8(%rdi)
2019 ; AVX512BW-NEXT: vzeroupper
2020 ; AVX512BW-NEXT: retq
2022 ; AVX512DQ-LABEL: ktest_1:
2023 ; AVX512DQ: ## %bb.0:
2024 ; AVX512DQ-NEXT: vmovupd (%rdi), %zmm1
2025 ; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
2026 ; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
2027 ; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
2028 ; AVX512DQ-NEXT: kortestb %k0, %k0
2029 ; AVX512DQ-NEXT: je LBB43_2
2030 ; AVX512DQ-NEXT: ## %bb.1: ## %L1
2031 ; AVX512DQ-NEXT: vmovapd %zmm0, (%rdi)
2032 ; AVX512DQ-NEXT: vzeroupper
2033 ; AVX512DQ-NEXT: retq
2034 ; AVX512DQ-NEXT: LBB43_2: ## %L2
2035 ; AVX512DQ-NEXT: vmovapd %zmm0, 8(%rdi)
2036 ; AVX512DQ-NEXT: vzeroupper
2037 ; AVX512DQ-NEXT: retq
2039 ; X86-LABEL: ktest_1:
2041 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2042 ; X86-NEXT: vmovupd (%eax), %zmm1
2043 ; X86-NEXT: vcmpltpd %zmm0, %zmm1, %k1
2044 ; X86-NEXT: vmovupd 8(%eax), %zmm1 {%k1} {z}
2045 ; X86-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
2046 ; X86-NEXT: kortestb %k0, %k0
2047 ; X86-NEXT: je LBB43_2
2048 ; X86-NEXT: ## %bb.1: ## %L1
2049 ; X86-NEXT: vmovapd %zmm0, (%eax)
2050 ; X86-NEXT: vzeroupper
2052 ; X86-NEXT: LBB43_2: ## %L2
2053 ; X86-NEXT: vmovapd %zmm0, 8(%eax)
2054 ; X86-NEXT: vzeroupper
2056 %addr1 = getelementptr double, double * %base, i64 0
2057 %addr2 = getelementptr double, double * %base, i64 1
2059 %vaddr1 = bitcast double* %addr1 to <8 x double>*
2060 %vaddr2 = bitcast double* %addr2 to <8 x double>*
2062 %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1
2063 %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1
2065 %sel1 = fcmp ogt <8 x double>%in, %val1
2066 %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer
2067 %sel2 = fcmp olt <8 x double> %in, %val3
2068 %sel3 = and <8 x i1> %sel1, %sel2
2070 %int_sel3 = bitcast <8 x i1> %sel3 to i8
2071 %res = icmp eq i8 %int_sel3, zeroinitializer
2072 br i1 %res, label %L2, label %L1
2074 store <8 x double> %in, <8 x double>* %vaddr1
2077 store <8 x double> %in, <8 x double>* %vaddr2
2083 define void @ktest_2(<32 x float> %in, float * %base) {
2085 ; KNL-LABEL: ktest_2:
2087 ; KNL-NEXT: vmovups (%rdi), %zmm2
2088 ; KNL-NEXT: vmovups 64(%rdi), %zmm3
2089 ; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1
2090 ; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2
2091 ; KNL-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z}
2092 ; KNL-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z}
2093 ; KNL-NEXT: vcmpltps %zmm3, %zmm1, %k0
2094 ; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k3
2095 ; KNL-NEXT: korw %k3, %k2, %k2
2096 ; KNL-NEXT: kmovw %k2, %eax
2097 ; KNL-NEXT: korw %k0, %k1, %k0
2098 ; KNL-NEXT: kmovw %k0, %ecx
2099 ; KNL-NEXT: shll $16, %ecx
2100 ; KNL-NEXT: orl %eax, %ecx
2101 ; KNL-NEXT: je LBB44_2
2102 ; KNL-NEXT: ## %bb.1: ## %L1
2103 ; KNL-NEXT: vmovaps %zmm0, (%rdi)
2104 ; KNL-NEXT: vmovaps %zmm1, 64(%rdi)
2105 ; KNL-NEXT: vzeroupper
2107 ; KNL-NEXT: LBB44_2: ## %L2
2108 ; KNL-NEXT: vmovaps %zmm0, 4(%rdi)
2109 ; KNL-NEXT: vmovaps %zmm1, 68(%rdi)
2110 ; KNL-NEXT: vzeroupper
2113 ; SKX-LABEL: ktest_2:
2115 ; SKX-NEXT: vmovups (%rdi), %zmm2
2116 ; SKX-NEXT: vmovups 64(%rdi), %zmm3
2117 ; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1
2118 ; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2
2119 ; SKX-NEXT: kunpckwd %k1, %k2, %k0
2120 ; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
2121 ; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
2122 ; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1
2123 ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2
2124 ; SKX-NEXT: kunpckwd %k1, %k2, %k1
2125 ; SKX-NEXT: kortestd %k1, %k0
2126 ; SKX-NEXT: je LBB44_2
2127 ; SKX-NEXT: ## %bb.1: ## %L1
2128 ; SKX-NEXT: vmovaps %zmm0, (%rdi)
2129 ; SKX-NEXT: vmovaps %zmm1, 64(%rdi)
2130 ; SKX-NEXT: vzeroupper
2132 ; SKX-NEXT: LBB44_2: ## %L2
2133 ; SKX-NEXT: vmovaps %zmm0, 4(%rdi)
2134 ; SKX-NEXT: vmovaps %zmm1, 68(%rdi)
2135 ; SKX-NEXT: vzeroupper
2138 ; AVX512BW-LABEL: ktest_2:
2139 ; AVX512BW: ## %bb.0:
2140 ; AVX512BW-NEXT: vmovups (%rdi), %zmm2
2141 ; AVX512BW-NEXT: vmovups 64(%rdi), %zmm3
2142 ; AVX512BW-NEXT: vcmpltps %zmm0, %zmm2, %k1
2143 ; AVX512BW-NEXT: vcmpltps %zmm1, %zmm3, %k2
2144 ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k0
2145 ; AVX512BW-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
2146 ; AVX512BW-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
2147 ; AVX512BW-NEXT: vcmpltps %zmm3, %zmm0, %k1
2148 ; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2
2149 ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1
2150 ; AVX512BW-NEXT: kortestd %k1, %k0
2151 ; AVX512BW-NEXT: je LBB44_2
2152 ; AVX512BW-NEXT: ## %bb.1: ## %L1
2153 ; AVX512BW-NEXT: vmovaps %zmm0, (%rdi)
2154 ; AVX512BW-NEXT: vmovaps %zmm1, 64(%rdi)
2155 ; AVX512BW-NEXT: vzeroupper
2156 ; AVX512BW-NEXT: retq
2157 ; AVX512BW-NEXT: LBB44_2: ## %L2
2158 ; AVX512BW-NEXT: vmovaps %zmm0, 4(%rdi)
2159 ; AVX512BW-NEXT: vmovaps %zmm1, 68(%rdi)
2160 ; AVX512BW-NEXT: vzeroupper
2161 ; AVX512BW-NEXT: retq
2163 ; AVX512DQ-LABEL: ktest_2:
2164 ; AVX512DQ: ## %bb.0:
2165 ; AVX512DQ-NEXT: vmovups (%rdi), %zmm2
2166 ; AVX512DQ-NEXT: vmovups 64(%rdi), %zmm3
2167 ; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k1
2168 ; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k2
2169 ; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z}
2170 ; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z}
2171 ; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm1, %k0
2172 ; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm0, %k3
2173 ; AVX512DQ-NEXT: korw %k3, %k2, %k2
2174 ; AVX512DQ-NEXT: kmovw %k2, %eax
2175 ; AVX512DQ-NEXT: korw %k0, %k1, %k0
2176 ; AVX512DQ-NEXT: kmovw %k0, %ecx
2177 ; AVX512DQ-NEXT: shll $16, %ecx
2178 ; AVX512DQ-NEXT: orl %eax, %ecx
2179 ; AVX512DQ-NEXT: je LBB44_2
2180 ; AVX512DQ-NEXT: ## %bb.1: ## %L1
2181 ; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi)
2182 ; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi)
2183 ; AVX512DQ-NEXT: vzeroupper
2184 ; AVX512DQ-NEXT: retq
2185 ; AVX512DQ-NEXT: LBB44_2: ## %L2
2186 ; AVX512DQ-NEXT: vmovaps %zmm0, 4(%rdi)
2187 ; AVX512DQ-NEXT: vmovaps %zmm1, 68(%rdi)
2188 ; AVX512DQ-NEXT: vzeroupper
2189 ; AVX512DQ-NEXT: retq
2191 ; X86-LABEL: ktest_2:
2193 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2194 ; X86-NEXT: vmovups (%eax), %zmm2
2195 ; X86-NEXT: vmovups 64(%eax), %zmm3
2196 ; X86-NEXT: vcmpltps %zmm0, %zmm2, %k1
2197 ; X86-NEXT: vcmpltps %zmm1, %zmm3, %k2
2198 ; X86-NEXT: kunpckwd %k1, %k2, %k0
2199 ; X86-NEXT: vmovups 68(%eax), %zmm2 {%k2} {z}
2200 ; X86-NEXT: vmovups 4(%eax), %zmm3 {%k1} {z}
2201 ; X86-NEXT: vcmpltps %zmm3, %zmm0, %k1
2202 ; X86-NEXT: vcmpltps %zmm2, %zmm1, %k2
2203 ; X86-NEXT: kunpckwd %k1, %k2, %k1
2204 ; X86-NEXT: kortestd %k1, %k0
2205 ; X86-NEXT: je LBB44_2
2206 ; X86-NEXT: ## %bb.1: ## %L1
2207 ; X86-NEXT: vmovaps %zmm0, (%eax)
2208 ; X86-NEXT: vmovaps %zmm1, 64(%eax)
2209 ; X86-NEXT: vzeroupper
2211 ; X86-NEXT: LBB44_2: ## %L2
2212 ; X86-NEXT: vmovaps %zmm0, 4(%eax)
2213 ; X86-NEXT: vmovaps %zmm1, 68(%eax)
2214 ; X86-NEXT: vzeroupper
2216 %addr1 = getelementptr float, float * %base, i64 0
2217 %addr2 = getelementptr float, float * %base, i64 1
2219 %vaddr1 = bitcast float* %addr1 to <32 x float>*
2220 %vaddr2 = bitcast float* %addr2 to <32 x float>*
2222 %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1
2223 %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1
2225 %sel1 = fcmp ogt <32 x float>%in, %val1
2226 %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer
2227 %sel2 = fcmp olt <32 x float> %in, %val3
2228 %sel3 = or <32 x i1> %sel1, %sel2
2230 %int_sel3 = bitcast <32 x i1> %sel3 to i32
2231 %res = icmp eq i32 %int_sel3, zeroinitializer
2232 br i1 %res, label %L2, label %L1
2234 store <32 x float> %in, <32 x float>* %vaddr1
2237 store <32 x float> %in, <32 x float>* %vaddr2
2243 define <8 x i64> @load_8i1(<8 x i1>* %a) {
2244 ; KNL-LABEL: load_8i1:
2246 ; KNL-NEXT: kmovw (%rdi), %k1
2247 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2250 ; SKX-LABEL: load_8i1:
2252 ; SKX-NEXT: kmovb (%rdi), %k0
2253 ; SKX-NEXT: vpmovm2q %k0, %zmm0
2256 ; AVX512BW-LABEL: load_8i1:
2257 ; AVX512BW: ## %bb.0:
2258 ; AVX512BW-NEXT: kmovw (%rdi), %k1
2259 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2260 ; AVX512BW-NEXT: retq
2262 ; AVX512DQ-LABEL: load_8i1:
2263 ; AVX512DQ: ## %bb.0:
2264 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
2265 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
2266 ; AVX512DQ-NEXT: retq
2268 ; X86-LABEL: load_8i1:
2270 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2271 ; X86-NEXT: kmovb (%eax), %k0
2272 ; X86-NEXT: vpmovm2q %k0, %zmm0
2274 %b = load <8 x i1>, <8 x i1>* %a
2275 %c = sext <8 x i1> %b to <8 x i64>
2279 define <16 x i32> @load_16i1(<16 x i1>* %a) {
2280 ; KNL-LABEL: load_16i1:
2282 ; KNL-NEXT: kmovw (%rdi), %k1
2283 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2286 ; SKX-LABEL: load_16i1:
2288 ; SKX-NEXT: kmovw (%rdi), %k0
2289 ; SKX-NEXT: vpmovm2d %k0, %zmm0
2292 ; AVX512BW-LABEL: load_16i1:
2293 ; AVX512BW: ## %bb.0:
2294 ; AVX512BW-NEXT: kmovw (%rdi), %k1
2295 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2296 ; AVX512BW-NEXT: retq
2298 ; AVX512DQ-LABEL: load_16i1:
2299 ; AVX512DQ: ## %bb.0:
2300 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
2301 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2302 ; AVX512DQ-NEXT: retq
2304 ; X86-LABEL: load_16i1:
2306 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2307 ; X86-NEXT: kmovw (%eax), %k0
2308 ; X86-NEXT: vpmovm2d %k0, %zmm0
2310 %b = load <16 x i1>, <16 x i1>* %a
2311 %c = sext <16 x i1> %b to <16 x i32>
2315 define <2 x i16> @load_2i1(<2 x i1>* %a) {
2316 ; KNL-LABEL: load_2i1:
2318 ; KNL-NEXT: kmovw (%rdi), %k1
2319 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2320 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
2321 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
2322 ; KNL-NEXT: vzeroupper
2325 ; SKX-LABEL: load_2i1:
2327 ; SKX-NEXT: kmovb (%rdi), %k0
2328 ; SKX-NEXT: vpmovm2w %k0, %xmm0
2331 ; AVX512BW-LABEL: load_2i1:
2332 ; AVX512BW: ## %bb.0:
2333 ; AVX512BW-NEXT: kmovw (%rdi), %k0
2334 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
2335 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
2336 ; AVX512BW-NEXT: vzeroupper
2337 ; AVX512BW-NEXT: retq
2339 ; AVX512DQ-LABEL: load_2i1:
2340 ; AVX512DQ: ## %bb.0:
2341 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
2342 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2343 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2344 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
2345 ; AVX512DQ-NEXT: vzeroupper
2346 ; AVX512DQ-NEXT: retq
2348 ; X86-LABEL: load_2i1:
2350 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2351 ; X86-NEXT: kmovb (%eax), %k0
2352 ; X86-NEXT: vpmovm2w %k0, %xmm0
2354 %b = load <2 x i1>, <2 x i1>* %a
2355 %c = sext <2 x i1> %b to <2 x i16>
2359 define <4 x i16> @load_4i1(<4 x i1>* %a) {
2360 ; KNL-LABEL: load_4i1:
2362 ; KNL-NEXT: kmovw (%rdi), %k1
2363 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2364 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
2365 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
2366 ; KNL-NEXT: vzeroupper
2369 ; SKX-LABEL: load_4i1:
2371 ; SKX-NEXT: kmovb (%rdi), %k0
2372 ; SKX-NEXT: vpmovm2w %k0, %xmm0
2375 ; AVX512BW-LABEL: load_4i1:
2376 ; AVX512BW: ## %bb.0:
2377 ; AVX512BW-NEXT: kmovw (%rdi), %k0
2378 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
2379 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
2380 ; AVX512BW-NEXT: vzeroupper
2381 ; AVX512BW-NEXT: retq
2383 ; AVX512DQ-LABEL: load_4i1:
2384 ; AVX512DQ: ## %bb.0:
2385 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
2386 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2387 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2388 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
2389 ; AVX512DQ-NEXT: vzeroupper
2390 ; AVX512DQ-NEXT: retq
2392 ; X86-LABEL: load_4i1:
2394 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2395 ; X86-NEXT: kmovb (%eax), %k0
2396 ; X86-NEXT: vpmovm2w %k0, %xmm0
2398 %b = load <4 x i1>, <4 x i1>* %a
2399 %c = sext <4 x i1> %b to <4 x i16>
2403 define <32 x i16> @load_32i1(<32 x i1>* %a) {
2404 ; KNL-LABEL: load_32i1:
2406 ; KNL-NEXT: kmovw (%rdi), %k1
2407 ; KNL-NEXT: kmovw 2(%rdi), %k2
2408 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
2409 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
2410 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
2411 ; KNL-NEXT: vpmovdw %zmm1, %ymm1
2412 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
2415 ; SKX-LABEL: load_32i1:
2417 ; SKX-NEXT: kmovd (%rdi), %k0
2418 ; SKX-NEXT: vpmovm2w %k0, %zmm0
2421 ; AVX512BW-LABEL: load_32i1:
2422 ; AVX512BW: ## %bb.0:
2423 ; AVX512BW-NEXT: kmovd (%rdi), %k0
2424 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
2425 ; AVX512BW-NEXT: retq
2427 ; AVX512DQ-LABEL: load_32i1:
2428 ; AVX512DQ: ## %bb.0:
2429 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
2430 ; AVX512DQ-NEXT: kmovw 2(%rdi), %k1
2431 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0
2432 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2433 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
2434 ; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1
2435 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
2436 ; AVX512DQ-NEXT: retq
2438 ; X86-LABEL: load_32i1:
2440 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2441 ; X86-NEXT: kmovd (%eax), %k0
2442 ; X86-NEXT: vpmovm2w %k0, %zmm0
2444 %b = load <32 x i1>, <32 x i1>* %a
2445 %c = sext <32 x i1> %b to <32 x i16>
2449 define <64 x i8> @load_64i1(<64 x i1>* %a) {
2450 ; KNL-LABEL: load_64i1:
2452 ; KNL-NEXT: kmovw (%rdi), %k1
2453 ; KNL-NEXT: kmovw 2(%rdi), %k2
2454 ; KNL-NEXT: kmovw 4(%rdi), %k3
2455 ; KNL-NEXT: kmovw 6(%rdi), %k4
2456 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
2457 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
2458 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z}
2459 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
2460 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2461 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
2462 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
2463 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
2464 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
2465 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2466 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
2469 ; SKX-LABEL: load_64i1:
2471 ; SKX-NEXT: kmovq (%rdi), %k0
2472 ; SKX-NEXT: vpmovm2b %k0, %zmm0
2475 ; AVX512BW-LABEL: load_64i1:
2476 ; AVX512BW: ## %bb.0:
2477 ; AVX512BW-NEXT: kmovq (%rdi), %k0
2478 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
2479 ; AVX512BW-NEXT: retq
2481 ; AVX512DQ-LABEL: load_64i1:
2482 ; AVX512DQ: ## %bb.0:
2483 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
2484 ; AVX512DQ-NEXT: kmovw 2(%rdi), %k1
2485 ; AVX512DQ-NEXT: kmovw 4(%rdi), %k2
2486 ; AVX512DQ-NEXT: kmovw 6(%rdi), %k3
2487 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm0
2488 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
2489 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm1
2490 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
2491 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2492 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
2493 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
2494 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2
2495 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
2496 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2497 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
2498 ; AVX512DQ-NEXT: retq
2500 ; X86-LABEL: load_64i1:
2502 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2503 ; X86-NEXT: kmovq (%eax), %k0
2504 ; X86-NEXT: vpmovm2b %k0, %zmm0
2506 %b = load <64 x i1>, <64 x i1>* %a
2507 %c = sext <64 x i1> %b to <64 x i8>
2511 define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
2512 ; KNL-LABEL: store_8i1:
2514 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
2515 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
2516 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
2517 ; KNL-NEXT: kmovw %k0, %eax
2518 ; KNL-NEXT: movb %al, (%rdi)
2519 ; KNL-NEXT: vzeroupper
2522 ; SKX-LABEL: store_8i1:
2524 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2525 ; SKX-NEXT: vpmovw2m %xmm0, %k0
2526 ; SKX-NEXT: kmovb %k0, (%rdi)
2529 ; AVX512BW-LABEL: store_8i1:
2530 ; AVX512BW: ## %bb.0:
2531 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
2532 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
2533 ; AVX512BW-NEXT: kmovd %k0, %eax
2534 ; AVX512BW-NEXT: movb %al, (%rdi)
2535 ; AVX512BW-NEXT: vzeroupper
2536 ; AVX512BW-NEXT: retq
2538 ; AVX512DQ-LABEL: store_8i1:
2539 ; AVX512DQ: ## %bb.0:
2540 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
2541 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
2542 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
2543 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
2544 ; AVX512DQ-NEXT: vzeroupper
2545 ; AVX512DQ-NEXT: retq
2547 ; X86-LABEL: store_8i1:
2549 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
2550 ; X86-NEXT: vpmovw2m %xmm0, %k0
2551 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2552 ; X86-NEXT: kmovb %k0, (%eax)
2554 store <8 x i1> %v, <8 x i1>* %a
2558 define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
2559 ; KNL-LABEL: store_8i1_1:
2561 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
2562 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
2563 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
2564 ; KNL-NEXT: kmovw %k0, %eax
2565 ; KNL-NEXT: movb %al, (%rdi)
2566 ; KNL-NEXT: vzeroupper
2569 ; SKX-LABEL: store_8i1_1:
2571 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2572 ; SKX-NEXT: vpmovw2m %xmm0, %k0
2573 ; SKX-NEXT: kmovb %k0, (%rdi)
2576 ; AVX512BW-LABEL: store_8i1_1:
2577 ; AVX512BW: ## %bb.0:
2578 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
2579 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
2580 ; AVX512BW-NEXT: kmovd %k0, %eax
2581 ; AVX512BW-NEXT: movb %al, (%rdi)
2582 ; AVX512BW-NEXT: vzeroupper
2583 ; AVX512BW-NEXT: retq
2585 ; AVX512DQ-LABEL: store_8i1_1:
2586 ; AVX512DQ: ## %bb.0:
2587 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
2588 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
2589 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
2590 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
2591 ; AVX512DQ-NEXT: vzeroupper
2592 ; AVX512DQ-NEXT: retq
2594 ; X86-LABEL: store_8i1_1:
2596 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2597 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
2598 ; X86-NEXT: vpmovw2m %xmm0, %k0
2599 ; X86-NEXT: kmovb %k0, (%eax)
2601 %v1 = trunc <8 x i16> %v to <8 x i1>
2602 store <8 x i1> %v1, <8 x i1>* %a
2606 define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
2607 ; KNL-LABEL: store_16i1:
2609 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
2610 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2611 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2612 ; KNL-NEXT: kmovw %k0, (%rdi)
2613 ; KNL-NEXT: vzeroupper
2616 ; SKX-LABEL: store_16i1:
2618 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
2619 ; SKX-NEXT: vpmovb2m %xmm0, %k0
2620 ; SKX-NEXT: kmovw %k0, (%rdi)
2623 ; AVX512BW-LABEL: store_16i1:
2624 ; AVX512BW: ## %bb.0:
2625 ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
2626 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
2627 ; AVX512BW-NEXT: kmovw %k0, (%rdi)
2628 ; AVX512BW-NEXT: vzeroupper
2629 ; AVX512BW-NEXT: retq
2631 ; AVX512DQ-LABEL: store_16i1:
2632 ; AVX512DQ: ## %bb.0:
2633 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
2634 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2635 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2636 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
2637 ; AVX512DQ-NEXT: vzeroupper
2638 ; AVX512DQ-NEXT: retq
2640 ; X86-LABEL: store_16i1:
2642 ; X86-NEXT: vpsllw $7, %xmm0, %xmm0
2643 ; X86-NEXT: vpmovb2m %xmm0, %k0
2644 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2645 ; X86-NEXT: kmovw %k0, (%eax)
2647 store <16 x i1> %v, <16 x i1>* %a
2651 define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
2652 ; KNL-LABEL: store_32i1:
2654 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
2655 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2656 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2657 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
2658 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
2659 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2660 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
2661 ; KNL-NEXT: kmovw %k1, 2(%rdi)
2662 ; KNL-NEXT: kmovw %k0, (%rdi)
2663 ; KNL-NEXT: vzeroupper
2666 ; SKX-LABEL: store_32i1:
2668 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
2669 ; SKX-NEXT: vpmovb2m %ymm0, %k0
2670 ; SKX-NEXT: kmovd %k0, (%rdi)
2671 ; SKX-NEXT: vzeroupper
2674 ; AVX512BW-LABEL: store_32i1:
2675 ; AVX512BW: ## %bb.0:
2676 ; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
2677 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
2678 ; AVX512BW-NEXT: kmovd %k0, (%rdi)
2679 ; AVX512BW-NEXT: vzeroupper
2680 ; AVX512BW-NEXT: retq
2682 ; AVX512DQ-LABEL: store_32i1:
2683 ; AVX512DQ: ## %bb.0:
2684 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1
2685 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
2686 ; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0
2687 ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
2688 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
2689 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2690 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
2691 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
2692 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
2693 ; AVX512DQ-NEXT: vzeroupper
2694 ; AVX512DQ-NEXT: retq
2696 ; X86-LABEL: store_32i1:
2698 ; X86-NEXT: vpsllw $7, %ymm0, %ymm0
2699 ; X86-NEXT: vpmovb2m %ymm0, %k0
2700 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2701 ; X86-NEXT: kmovd %k0, (%eax)
2702 ; X86-NEXT: vzeroupper
2704 store <32 x i1> %v, <32 x i1>* %a
2708 define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
2709 ; KNL-LABEL: store_32i1_1:
2711 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
2712 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
2713 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2714 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2715 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm0
2716 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2717 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
2718 ; KNL-NEXT: kmovw %k1, 2(%rdi)
2719 ; KNL-NEXT: kmovw %k0, (%rdi)
2720 ; KNL-NEXT: vzeroupper
2723 ; SKX-LABEL: store_32i1_1:
2725 ; SKX-NEXT: vpsllw $15, %zmm0, %zmm0
2726 ; SKX-NEXT: vpmovw2m %zmm0, %k0
2727 ; SKX-NEXT: kmovd %k0, (%rdi)
2728 ; SKX-NEXT: vzeroupper
2731 ; AVX512BW-LABEL: store_32i1_1:
2732 ; AVX512BW: ## %bb.0:
2733 ; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0
2734 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
2735 ; AVX512BW-NEXT: kmovd %k0, (%rdi)
2736 ; AVX512BW-NEXT: vzeroupper
2737 ; AVX512BW-NEXT: retq
2739 ; AVX512DQ-LABEL: store_32i1_1:
2740 ; AVX512DQ: ## %bb.0:
2741 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
2742 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
2743 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2744 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2745 ; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0
2746 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2747 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
2748 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
2749 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
2750 ; AVX512DQ-NEXT: vzeroupper
2751 ; AVX512DQ-NEXT: retq
2753 ; X86-LABEL: store_32i1_1:
2755 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2756 ; X86-NEXT: vpsllw $15, %zmm0, %zmm0
2757 ; X86-NEXT: vpmovw2m %zmm0, %k0
2758 ; X86-NEXT: kmovd %k0, (%eax)
2759 ; X86-NEXT: vzeroupper
2761 %v1 = trunc <32 x i16> %v to <32 x i1>
2762 store <32 x i1> %v1, <32 x i1>* %a
2767 define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
2769 ; KNL-LABEL: store_64i1:
2771 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
2772 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2773 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2774 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
2775 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2776 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
2777 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm0
2778 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2779 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2
2780 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm0
2781 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2782 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k3
2783 ; KNL-NEXT: kmovw %k3, 6(%rdi)
2784 ; KNL-NEXT: kmovw %k2, 4(%rdi)
2785 ; KNL-NEXT: kmovw %k1, 2(%rdi)
2786 ; KNL-NEXT: kmovw %k0, (%rdi)
2787 ; KNL-NEXT: vzeroupper
2790 ; SKX-LABEL: store_64i1:
2792 ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0
2793 ; SKX-NEXT: vpmovb2m %zmm0, %k0
2794 ; SKX-NEXT: kmovq %k0, (%rdi)
2795 ; SKX-NEXT: vzeroupper
2798 ; AVX512BW-LABEL: store_64i1:
2799 ; AVX512BW: ## %bb.0:
2800 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0
2801 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
2802 ; AVX512BW-NEXT: kmovq %k0, (%rdi)
2803 ; AVX512BW-NEXT: vzeroupper
2804 ; AVX512BW-NEXT: retq
2806 ; AVX512DQ-LABEL: store_64i1:
2807 ; AVX512DQ: ## %bb.0:
2808 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
2809 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2810 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2811 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
2812 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2813 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
2814 ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0
2815 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2816 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k2
2817 ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm0
2818 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2819 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k3
2820 ; AVX512DQ-NEXT: kmovw %k3, 6(%rdi)
2821 ; AVX512DQ-NEXT: kmovw %k2, 4(%rdi)
2822 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
2823 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
2824 ; AVX512DQ-NEXT: vzeroupper
2825 ; AVX512DQ-NEXT: retq
2827 ; X86-LABEL: store_64i1:
2829 ; X86-NEXT: vpsllw $7, %zmm0, %zmm0
2830 ; X86-NEXT: vpmovb2m %zmm0, %k0
2831 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2832 ; X86-NEXT: kmovq %k0, (%eax)
2833 ; X86-NEXT: vzeroupper
2835 store <64 x i1> %v, <64 x i1>* %a
2839 define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
2840 ; KNL-LABEL: test_bitcast_v8i1_zext:
2842 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
2843 ; KNL-NEXT: kmovw %k0, %eax
2844 ; KNL-NEXT: movzbl %al, %eax
2845 ; KNL-NEXT: addl %eax, %eax
2846 ; KNL-NEXT: vzeroupper
2849 ; SKX-LABEL: test_bitcast_v8i1_zext:
2851 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
2852 ; SKX-NEXT: kmovb %k0, %eax
2853 ; SKX-NEXT: addl %eax, %eax
2854 ; SKX-NEXT: vzeroupper
2857 ; AVX512BW-LABEL: test_bitcast_v8i1_zext:
2858 ; AVX512BW: ## %bb.0:
2859 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
2860 ; AVX512BW-NEXT: kmovd %k0, %eax
2861 ; AVX512BW-NEXT: movzbl %al, %eax
2862 ; AVX512BW-NEXT: addl %eax, %eax
2863 ; AVX512BW-NEXT: vzeroupper
2864 ; AVX512BW-NEXT: retq
2866 ; AVX512DQ-LABEL: test_bitcast_v8i1_zext:
2867 ; AVX512DQ: ## %bb.0:
2868 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
2869 ; AVX512DQ-NEXT: kmovb %k0, %eax
2870 ; AVX512DQ-NEXT: addl %eax, %eax
2871 ; AVX512DQ-NEXT: vzeroupper
2872 ; AVX512DQ-NEXT: retq
2874 ; X86-LABEL: test_bitcast_v8i1_zext:
2876 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
2877 ; X86-NEXT: kmovb %k0, %eax
2878 ; X86-NEXT: addl %eax, %eax
2879 ; X86-NEXT: vzeroupper
2881 %v1 = icmp eq <16 x i32> %a, zeroinitializer
2882 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2883 %mask1 = bitcast <8 x i1> %mask to i8
2884 %val = zext i8 %mask1 to i32
2885 %val1 = add i32 %val, %val
2889 define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
2890 ; CHECK-LABEL: test_bitcast_v16i1_zext:
2892 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
2893 ; CHECK-NEXT: kmovw %k0, %eax
2894 ; CHECK-NEXT: addl %eax, %eax
2895 ; CHECK-NEXT: vzeroupper
2898 ; X86-LABEL: test_bitcast_v16i1_zext:
2900 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
2901 ; X86-NEXT: kmovw %k0, %eax
2902 ; X86-NEXT: addl %eax, %eax
2903 ; X86-NEXT: vzeroupper
2905 %v1 = icmp eq <16 x i32> %a, zeroinitializer
2906 %mask1 = bitcast <16 x i1> %v1 to i16
2907 %val = zext i16 %mask1 to i32
2908 %val1 = add i32 %val, %val
2912 define i16 @test_v16i1_add(i16 %x, i16 %y) {
2913 ; KNL-LABEL: test_v16i1_add:
2915 ; KNL-NEXT: kmovw %edi, %k0
2916 ; KNL-NEXT: kmovw %esi, %k1
2917 ; KNL-NEXT: kxorw %k1, %k0, %k0
2918 ; KNL-NEXT: kmovw %k0, %eax
2919 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
2922 ; SKX-LABEL: test_v16i1_add:
2924 ; SKX-NEXT: kmovd %edi, %k0
2925 ; SKX-NEXT: kmovd %esi, %k1
2926 ; SKX-NEXT: kxorw %k1, %k0, %k0
2927 ; SKX-NEXT: kmovd %k0, %eax
2928 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
2931 ; AVX512BW-LABEL: test_v16i1_add:
2932 ; AVX512BW: ## %bb.0:
2933 ; AVX512BW-NEXT: kmovd %edi, %k0
2934 ; AVX512BW-NEXT: kmovd %esi, %k1
2935 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
2936 ; AVX512BW-NEXT: kmovd %k0, %eax
2937 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
2938 ; AVX512BW-NEXT: retq
2940 ; AVX512DQ-LABEL: test_v16i1_add:
2941 ; AVX512DQ: ## %bb.0:
2942 ; AVX512DQ-NEXT: kmovw %edi, %k0
2943 ; AVX512DQ-NEXT: kmovw %esi, %k1
2944 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
2945 ; AVX512DQ-NEXT: kmovw %k0, %eax
2946 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
2947 ; AVX512DQ-NEXT: retq
2949 ; X86-LABEL: test_v16i1_add:
2951 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
2952 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
2953 ; X86-NEXT: kxorw %k1, %k0, %k0
2954 ; X86-NEXT: kmovd %k0, %eax
2955 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
2957 %m0 = bitcast i16 %x to <16 x i1>
2958 %m1 = bitcast i16 %y to <16 x i1>
2959 %m2 = add <16 x i1> %m0, %m1
2960 %ret = bitcast <16 x i1> %m2 to i16
2964 define i16 @test_v16i1_sub(i16 %x, i16 %y) {
2965 ; KNL-LABEL: test_v16i1_sub:
2967 ; KNL-NEXT: kmovw %edi, %k0
2968 ; KNL-NEXT: kmovw %esi, %k1
2969 ; KNL-NEXT: kxorw %k1, %k0, %k0
2970 ; KNL-NEXT: kmovw %k0, %eax
2971 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
2974 ; SKX-LABEL: test_v16i1_sub:
2976 ; SKX-NEXT: kmovd %edi, %k0
2977 ; SKX-NEXT: kmovd %esi, %k1
2978 ; SKX-NEXT: kxorw %k1, %k0, %k0
2979 ; SKX-NEXT: kmovd %k0, %eax
2980 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
2983 ; AVX512BW-LABEL: test_v16i1_sub:
2984 ; AVX512BW: ## %bb.0:
2985 ; AVX512BW-NEXT: kmovd %edi, %k0
2986 ; AVX512BW-NEXT: kmovd %esi, %k1
2987 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
2988 ; AVX512BW-NEXT: kmovd %k0, %eax
2989 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
2990 ; AVX512BW-NEXT: retq
2992 ; AVX512DQ-LABEL: test_v16i1_sub:
2993 ; AVX512DQ: ## %bb.0:
2994 ; AVX512DQ-NEXT: kmovw %edi, %k0
2995 ; AVX512DQ-NEXT: kmovw %esi, %k1
2996 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
2997 ; AVX512DQ-NEXT: kmovw %k0, %eax
2998 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
2999 ; AVX512DQ-NEXT: retq
3001 ; X86-LABEL: test_v16i1_sub:
3003 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
3004 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
3005 ; X86-NEXT: kxorw %k1, %k0, %k0
3006 ; X86-NEXT: kmovd %k0, %eax
3007 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
3009 %m0 = bitcast i16 %x to <16 x i1>
3010 %m1 = bitcast i16 %y to <16 x i1>
3011 %m2 = sub <16 x i1> %m0, %m1
3012 %ret = bitcast <16 x i1> %m2 to i16
3016 define i16 @test_v16i1_mul(i16 %x, i16 %y) {
3017 ; KNL-LABEL: test_v16i1_mul:
3019 ; KNL-NEXT: kmovw %edi, %k0
3020 ; KNL-NEXT: kmovw %esi, %k1
3021 ; KNL-NEXT: kandw %k1, %k0, %k0
3022 ; KNL-NEXT: kmovw %k0, %eax
3023 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
3026 ; SKX-LABEL: test_v16i1_mul:
3028 ; SKX-NEXT: kmovd %edi, %k0
3029 ; SKX-NEXT: kmovd %esi, %k1
3030 ; SKX-NEXT: kandw %k1, %k0, %k0
3031 ; SKX-NEXT: kmovd %k0, %eax
3032 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
3035 ; AVX512BW-LABEL: test_v16i1_mul:
3036 ; AVX512BW: ## %bb.0:
3037 ; AVX512BW-NEXT: kmovd %edi, %k0
3038 ; AVX512BW-NEXT: kmovd %esi, %k1
3039 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3040 ; AVX512BW-NEXT: kmovd %k0, %eax
3041 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
3042 ; AVX512BW-NEXT: retq
3044 ; AVX512DQ-LABEL: test_v16i1_mul:
3045 ; AVX512DQ: ## %bb.0:
3046 ; AVX512DQ-NEXT: kmovw %edi, %k0
3047 ; AVX512DQ-NEXT: kmovw %esi, %k1
3048 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3049 ; AVX512DQ-NEXT: kmovw %k0, %eax
3050 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
3051 ; AVX512DQ-NEXT: retq
3053 ; X86-LABEL: test_v16i1_mul:
3055 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
3056 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
3057 ; X86-NEXT: kandw %k1, %k0, %k0
3058 ; X86-NEXT: kmovd %k0, %eax
3059 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
3061 %m0 = bitcast i16 %x to <16 x i1>
3062 %m1 = bitcast i16 %y to <16 x i1>
3063 %m2 = mul <16 x i1> %m0, %m1
3064 %ret = bitcast <16 x i1> %m2 to i16
3068 define i8 @test_v8i1_add(i8 %x, i8 %y) {
3069 ; KNL-LABEL: test_v8i1_add:
3071 ; KNL-NEXT: kmovw %edi, %k0
3072 ; KNL-NEXT: kmovw %esi, %k1
3073 ; KNL-NEXT: kxorw %k1, %k0, %k0
3074 ; KNL-NEXT: kmovw %k0, %eax
3075 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
3078 ; SKX-LABEL: test_v8i1_add:
3080 ; SKX-NEXT: kmovd %edi, %k0
3081 ; SKX-NEXT: kmovd %esi, %k1
3082 ; SKX-NEXT: kxorb %k1, %k0, %k0
3083 ; SKX-NEXT: kmovd %k0, %eax
3084 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
3087 ; AVX512BW-LABEL: test_v8i1_add:
3088 ; AVX512BW: ## %bb.0:
3089 ; AVX512BW-NEXT: kmovd %edi, %k0
3090 ; AVX512BW-NEXT: kmovd %esi, %k1
3091 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
3092 ; AVX512BW-NEXT: kmovd %k0, %eax
3093 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
3094 ; AVX512BW-NEXT: retq
3096 ; AVX512DQ-LABEL: test_v8i1_add:
3097 ; AVX512DQ: ## %bb.0:
3098 ; AVX512DQ-NEXT: kmovw %edi, %k0
3099 ; AVX512DQ-NEXT: kmovw %esi, %k1
3100 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
3101 ; AVX512DQ-NEXT: kmovw %k0, %eax
3102 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
3103 ; AVX512DQ-NEXT: retq
3105 ; X86-LABEL: test_v8i1_add:
3107 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
3108 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
3109 ; X86-NEXT: kxorb %k1, %k0, %k0
3110 ; X86-NEXT: kmovd %k0, %eax
3111 ; X86-NEXT: ## kill: def $al killed $al killed $eax
3113 %m0 = bitcast i8 %x to <8 x i1>
3114 %m1 = bitcast i8 %y to <8 x i1>
3115 %m2 = add <8 x i1> %m0, %m1
3116 %ret = bitcast <8 x i1> %m2 to i8
3120 define i8 @test_v8i1_sub(i8 %x, i8 %y) {
3121 ; KNL-LABEL: test_v8i1_sub:
3123 ; KNL-NEXT: kmovw %edi, %k0
3124 ; KNL-NEXT: kmovw %esi, %k1
3125 ; KNL-NEXT: kxorw %k1, %k0, %k0
3126 ; KNL-NEXT: kmovw %k0, %eax
3127 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
3130 ; SKX-LABEL: test_v8i1_sub:
3132 ; SKX-NEXT: kmovd %edi, %k0
3133 ; SKX-NEXT: kmovd %esi, %k1
3134 ; SKX-NEXT: kxorb %k1, %k0, %k0
3135 ; SKX-NEXT: kmovd %k0, %eax
3136 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
3139 ; AVX512BW-LABEL: test_v8i1_sub:
3140 ; AVX512BW: ## %bb.0:
3141 ; AVX512BW-NEXT: kmovd %edi, %k0
3142 ; AVX512BW-NEXT: kmovd %esi, %k1
3143 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
3144 ; AVX512BW-NEXT: kmovd %k0, %eax
3145 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
3146 ; AVX512BW-NEXT: retq
3148 ; AVX512DQ-LABEL: test_v8i1_sub:
3149 ; AVX512DQ: ## %bb.0:
3150 ; AVX512DQ-NEXT: kmovw %edi, %k0
3151 ; AVX512DQ-NEXT: kmovw %esi, %k1
3152 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
3153 ; AVX512DQ-NEXT: kmovw %k0, %eax
3154 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
3155 ; AVX512DQ-NEXT: retq
3157 ; X86-LABEL: test_v8i1_sub:
3159 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
3160 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
3161 ; X86-NEXT: kxorb %k1, %k0, %k0
3162 ; X86-NEXT: kmovd %k0, %eax
3163 ; X86-NEXT: ## kill: def $al killed $al killed $eax
3165 %m0 = bitcast i8 %x to <8 x i1>
3166 %m1 = bitcast i8 %y to <8 x i1>
3167 %m2 = sub <8 x i1> %m0, %m1
3168 %ret = bitcast <8 x i1> %m2 to i8
3172 define i8 @test_v8i1_mul(i8 %x, i8 %y) {
3173 ; KNL-LABEL: test_v8i1_mul:
3175 ; KNL-NEXT: kmovw %edi, %k0
3176 ; KNL-NEXT: kmovw %esi, %k1
3177 ; KNL-NEXT: kandw %k1, %k0, %k0
3178 ; KNL-NEXT: kmovw %k0, %eax
3179 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
3182 ; SKX-LABEL: test_v8i1_mul:
3184 ; SKX-NEXT: kmovd %edi, %k0
3185 ; SKX-NEXT: kmovd %esi, %k1
3186 ; SKX-NEXT: kandb %k1, %k0, %k0
3187 ; SKX-NEXT: kmovd %k0, %eax
3188 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
3191 ; AVX512BW-LABEL: test_v8i1_mul:
3192 ; AVX512BW: ## %bb.0:
3193 ; AVX512BW-NEXT: kmovd %edi, %k0
3194 ; AVX512BW-NEXT: kmovd %esi, %k1
3195 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3196 ; AVX512BW-NEXT: kmovd %k0, %eax
3197 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
3198 ; AVX512BW-NEXT: retq
3200 ; AVX512DQ-LABEL: test_v8i1_mul:
3201 ; AVX512DQ: ## %bb.0:
3202 ; AVX512DQ-NEXT: kmovw %edi, %k0
3203 ; AVX512DQ-NEXT: kmovw %esi, %k1
3204 ; AVX512DQ-NEXT: kandb %k1, %k0, %k0
3205 ; AVX512DQ-NEXT: kmovw %k0, %eax
3206 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
3207 ; AVX512DQ-NEXT: retq
3209 ; X86-LABEL: test_v8i1_mul:
3211 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
3212 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
3213 ; X86-NEXT: kandb %k1, %k0, %k0
3214 ; X86-NEXT: kmovd %k0, %eax
3215 ; X86-NEXT: ## kill: def $al killed $al killed $eax
3217 %m0 = bitcast i8 %x to <8 x i1>
3218 %m1 = bitcast i8 %y to <8 x i1>
3219 %m2 = mul <8 x i1> %m0, %m1
3220 %ret = bitcast <8 x i1> %m2 to i8
3224 ; Make sure we don't emit a ktest for signed comparisons.
3225 define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
3226 ; KNL-LABEL: ktest_signed:
3228 ; KNL-NEXT: pushq %rax
3229 ; KNL-NEXT: .cfi_def_cfa_offset 16
3230 ; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0
3231 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
3232 ; KNL-NEXT: kmovw %k0, %eax
3233 ; KNL-NEXT: testw %ax, %ax
3234 ; KNL-NEXT: jle LBB65_1
3235 ; KNL-NEXT: ## %bb.2: ## %bb.2
3236 ; KNL-NEXT: popq %rax
3237 ; KNL-NEXT: vzeroupper
3239 ; KNL-NEXT: LBB65_1: ## %bb.1
3240 ; KNL-NEXT: vzeroupper
3241 ; KNL-NEXT: callq _foo
3242 ; KNL-NEXT: popq %rax
3245 ; SKX-LABEL: ktest_signed:
3247 ; SKX-NEXT: pushq %rax
3248 ; SKX-NEXT: .cfi_def_cfa_offset 16
3249 ; SKX-NEXT: vpord %zmm1, %zmm0, %zmm0
3250 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
3251 ; SKX-NEXT: kmovd %k0, %eax
3252 ; SKX-NEXT: testw %ax, %ax
3253 ; SKX-NEXT: jle LBB65_1
3254 ; SKX-NEXT: ## %bb.2: ## %bb.2
3255 ; SKX-NEXT: popq %rax
3256 ; SKX-NEXT: vzeroupper
3258 ; SKX-NEXT: LBB65_1: ## %bb.1
3259 ; SKX-NEXT: vzeroupper
3260 ; SKX-NEXT: callq _foo
3261 ; SKX-NEXT: popq %rax
3264 ; AVX512BW-LABEL: ktest_signed:
3265 ; AVX512BW: ## %bb.0:
3266 ; AVX512BW-NEXT: pushq %rax
3267 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
3268 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
3269 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
3270 ; AVX512BW-NEXT: kmovd %k0, %eax
3271 ; AVX512BW-NEXT: testw %ax, %ax
3272 ; AVX512BW-NEXT: jle LBB65_1
3273 ; AVX512BW-NEXT: ## %bb.2: ## %bb.2
3274 ; AVX512BW-NEXT: popq %rax
3275 ; AVX512BW-NEXT: vzeroupper
3276 ; AVX512BW-NEXT: retq
3277 ; AVX512BW-NEXT: LBB65_1: ## %bb.1
3278 ; AVX512BW-NEXT: vzeroupper
3279 ; AVX512BW-NEXT: callq _foo
3280 ; AVX512BW-NEXT: popq %rax
3281 ; AVX512BW-NEXT: retq
3283 ; AVX512DQ-LABEL: ktest_signed:
3284 ; AVX512DQ: ## %bb.0:
3285 ; AVX512DQ-NEXT: pushq %rax
3286 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
3287 ; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
3288 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
3289 ; AVX512DQ-NEXT: kmovw %k0, %eax
3290 ; AVX512DQ-NEXT: testw %ax, %ax
3291 ; AVX512DQ-NEXT: jle LBB65_1
3292 ; AVX512DQ-NEXT: ## %bb.2: ## %bb.2
3293 ; AVX512DQ-NEXT: popq %rax
3294 ; AVX512DQ-NEXT: vzeroupper
3295 ; AVX512DQ-NEXT: retq
3296 ; AVX512DQ-NEXT: LBB65_1: ## %bb.1
3297 ; AVX512DQ-NEXT: vzeroupper
3298 ; AVX512DQ-NEXT: callq _foo
3299 ; AVX512DQ-NEXT: popq %rax
3300 ; AVX512DQ-NEXT: retq
3302 ; X86-LABEL: ktest_signed:
3304 ; X86-NEXT: subl $12, %esp
3305 ; X86-NEXT: .cfi_def_cfa_offset 16
3306 ; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
3307 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
3308 ; X86-NEXT: kmovd %k0, %eax
3309 ; X86-NEXT: testw %ax, %ax
3310 ; X86-NEXT: jle LBB65_1
3311 ; X86-NEXT: ## %bb.2: ## %bb.2
3312 ; X86-NEXT: addl $12, %esp
3313 ; X86-NEXT: vzeroupper
3315 ; X86-NEXT: LBB65_1: ## %bb.1
3316 ; X86-NEXT: vzeroupper
3317 ; X86-NEXT: calll _foo
3318 ; X86-NEXT: addl $12, %esp
3320 %a = icmp eq <16 x i32> %x, zeroinitializer
3321 %b = icmp eq <16 x i32> %y, zeroinitializer
3322 %c = and <16 x i1> %a, %b
3323 %d = bitcast <16 x i1> %c to i16
3324 %e = icmp sgt i16 %d, 0
3325 br i1 %e, label %bb.2, label %bb.1
3334 ; Make sure we can use the C flag from kortest to check for all ones.
3335 define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
3336 ; CHECK-LABEL: ktest_allones:
3338 ; CHECK-NEXT: pushq %rax
3339 ; CHECK-NEXT: .cfi_def_cfa_offset 16
3340 ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
3341 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
3342 ; CHECK-NEXT: kortestw %k0, %k0
3343 ; CHECK-NEXT: jb LBB66_2
3344 ; CHECK-NEXT: ## %bb.1: ## %bb.1
3345 ; CHECK-NEXT: vzeroupper
3346 ; CHECK-NEXT: callq _foo
3347 ; CHECK-NEXT: LBB66_2: ## %bb.2
3348 ; CHECK-NEXT: popq %rax
3349 ; CHECK-NEXT: vzeroupper
3352 ; X86-LABEL: ktest_allones:
3354 ; X86-NEXT: subl $12, %esp
3355 ; X86-NEXT: .cfi_def_cfa_offset 16
3356 ; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
3357 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
3358 ; X86-NEXT: kortestw %k0, %k0
3359 ; X86-NEXT: jb LBB66_2
3360 ; X86-NEXT: ## %bb.1: ## %bb.1
3361 ; X86-NEXT: vzeroupper
3362 ; X86-NEXT: calll _foo
3363 ; X86-NEXT: LBB66_2: ## %bb.2
3364 ; X86-NEXT: addl $12, %esp
3365 ; X86-NEXT: vzeroupper
3367 %a = icmp eq <16 x i32> %x, zeroinitializer
3368 %b = icmp eq <16 x i32> %y, zeroinitializer
3369 %c = and <16 x i1> %a, %b
3370 %d = bitcast <16 x i1> %c to i16
3371 %e = icmp eq i16 %d, -1
3372 br i1 %e, label %bb.2, label %bb.1
3380 ; This is derived from an intrinsic test where v4i1 mask was created by _mm_cmp_epi32_mask, then it was passed to _mm512_mask_blend_epi32 which uses a v16i1 mask.
3381 ; The widening happens in the scalar domain between the intrinsics. The middle end optmized it to this.
3382 define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d, <8 x i64> %e, <8 x i64> %f) {
3383 ; KNL-LABEL: mask_widening:
3384 ; KNL: ## %bb.0: ## %entry
3385 ; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
3386 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
3387 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
3388 ; KNL-NEXT: kshiftlw $12, %k0, %k0
3389 ; KNL-NEXT: kshiftrw $12, %k0, %k1
3390 ; KNL-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
3393 ; SKX-LABEL: mask_widening:
3394 ; SKX: ## %bb.0: ## %entry
3395 ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
3396 ; SKX-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
3399 ; AVX512BW-LABEL: mask_widening:
3400 ; AVX512BW: ## %bb.0: ## %entry
3401 ; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
3402 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
3403 ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
3404 ; AVX512BW-NEXT: kshiftlw $12, %k0, %k0
3405 ; AVX512BW-NEXT: kshiftrw $12, %k0, %k1
3406 ; AVX512BW-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
3407 ; AVX512BW-NEXT: retq
3409 ; AVX512DQ-LABEL: mask_widening:
3410 ; AVX512DQ: ## %bb.0: ## %entry
3411 ; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
3412 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
3413 ; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
3414 ; AVX512DQ-NEXT: kshiftlw $12, %k0, %k0
3415 ; AVX512DQ-NEXT: kshiftrw $12, %k0, %k1
3416 ; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
3417 ; AVX512DQ-NEXT: retq
3419 ; X86-LABEL: mask_widening:
3420 ; X86: ## %bb.0: ## %entry
3421 ; X86-NEXT: pushl %ebp
3422 ; X86-NEXT: .cfi_def_cfa_offset 8
3423 ; X86-NEXT: .cfi_offset %ebp, -8
3424 ; X86-NEXT: movl %esp, %ebp
3425 ; X86-NEXT: .cfi_def_cfa_register %ebp
3426 ; X86-NEXT: andl $-64, %esp
3427 ; X86-NEXT: subl $64, %esp
3428 ; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
3429 ; X86-NEXT: vmovdqa64 8(%ebp), %zmm0
3430 ; X86-NEXT: vmovdqa32 72(%ebp), %zmm0 {%k1}
3431 ; X86-NEXT: movl %ebp, %esp
3432 ; X86-NEXT: popl %ebp
3435 %0 = bitcast <2 x i64> %a to <4 x i32>
3436 %1 = bitcast <2 x i64> %b to <4 x i32>
3437 %2 = icmp eq <4 x i32> %0, %1
3438 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3439 %4 = bitcast <8 x i64> %f to <16 x i32>
3440 %5 = bitcast <8 x i64> %e to <16 x i32>
3441 %6 = shufflevector <8 x i1> %3, <8 x i1> <i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
3442 %7 = select <16 x i1> %6, <16 x i32> %4, <16 x i32> %5
3443 %8 = bitcast <16 x i32> %7 to <8 x i64>
3447 define void @store_v64i1_constant(<64 x i1>* %R) {
3448 ; CHECK-LABEL: store_v64i1_constant:
3449 ; CHECK: ## %bb.0: ## %entry
3450 ; CHECK-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
3451 ; CHECK-NEXT: movq %rax, (%rdi)
3454 ; X86-LABEL: store_v64i1_constant:
3455 ; X86: ## %bb.0: ## %entry
3456 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
3457 ; X86-NEXT: movl $-536871045, 4(%eax) ## imm = 0xDFFFFF7B
3458 ; X86-NEXT: movl $-4099, (%eax) ## imm = 0xEFFD
3461 store <64 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <64 x i1>* %R
3465 define void @store_v2i1_constant(<2 x i1>* %R) {
3466 ; CHECK-LABEL: store_v2i1_constant:
3467 ; CHECK: ## %bb.0: ## %entry
3468 ; CHECK-NEXT: movb $1, (%rdi)
3471 ; X86-LABEL: store_v2i1_constant:
3472 ; X86: ## %bb.0: ## %entry
3473 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
3474 ; X86-NEXT: movb $1, (%eax)
3477 store <2 x i1> <i1 1, i1 0>, <2 x i1>* %R
3481 define void @store_v4i1_constant(<4 x i1>* %R) {
3482 ; CHECK-LABEL: store_v4i1_constant:
3483 ; CHECK: ## %bb.0: ## %entry
3484 ; CHECK-NEXT: movb $5, (%rdi)
3487 ; X86-LABEL: store_v4i1_constant:
3488 ; X86: ## %bb.0: ## %entry
3489 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
3490 ; X86-NEXT: movb $5, (%eax)
3493 store <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i1>* %R
3497 ; Make sure we bring the -1 constant into the mask domain.
3498 define void @mask_not_cast(i8*, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
3499 ; CHECK-LABEL: mask_not_cast:
3501 ; CHECK-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
3502 ; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
3503 ; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
3504 ; CHECK-NEXT: vzeroupper
3507 ; X86-LABEL: mask_not_cast:
3509 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
3510 ; X86-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
3511 ; X86-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
3512 ; X86-NEXT: vmovdqu32 %zmm0, (%eax) {%k1}
3513 ; X86-NEXT: vzeroupper
3515 %6 = and <8 x i64> %2, %1
3516 %7 = bitcast <8 x i64> %6 to <16 x i32>
3517 %8 = icmp ne <16 x i32> %7, zeroinitializer
3518 %9 = bitcast <16 x i1> %8 to i16
3519 %10 = bitcast <8 x i64> %3 to <16 x i32>
3520 %11 = bitcast <8 x i64> %4 to <16 x i32>
3521 %12 = icmp ule <16 x i32> %10, %11
3522 %13 = bitcast <16 x i1> %12 to i16
3523 %14 = xor i16 %13, -1
3524 %15 = and i16 %14, %9
3525 %16 = bitcast <8 x i64> %1 to <16 x i32>
3526 %17 = bitcast i8* %0 to <16 x i32>*
3527 %18 = bitcast i16 %15 to <16 x i1>
3528 tail call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %16, <16 x i32>* %17, i32 1, <16 x i1> %18) #2
3531 declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
3533 define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
3534 ; KNL-LABEL: ktest_3:
3536 ; KNL-NEXT: pushq %rax
3537 ; KNL-NEXT: .cfi_def_cfa_offset 16
3538 ; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
3539 ; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
3540 ; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
3541 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
3542 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
3543 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1
3544 ; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k2
3545 ; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k3
3546 ; KNL-NEXT: korw %k1, %k0, %k0
3547 ; KNL-NEXT: korw %k3, %k2, %k1
3548 ; KNL-NEXT: kandw %k1, %k0, %k0
3549 ; KNL-NEXT: kmovw %k0, %eax
3550 ; KNL-NEXT: testb %al, %al
3551 ; KNL-NEXT: je LBB72_1
3552 ; KNL-NEXT: ## %bb.2: ## %exit
3553 ; KNL-NEXT: popq %rax
3554 ; KNL-NEXT: vzeroupper
3556 ; KNL-NEXT: LBB72_1: ## %bar
3557 ; KNL-NEXT: vzeroupper
3558 ; KNL-NEXT: callq _foo
3559 ; KNL-NEXT: popq %rax
3562 ; SKX-LABEL: ktest_3:
3564 ; SKX-NEXT: pushq %rax
3565 ; SKX-NEXT: .cfi_def_cfa_offset 16
3566 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k0
3567 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1
3568 ; SKX-NEXT: korb %k1, %k0, %k0
3569 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1
3570 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k2
3571 ; SKX-NEXT: korb %k2, %k1, %k1
3572 ; SKX-NEXT: ktestb %k1, %k0
3573 ; SKX-NEXT: je LBB72_1
3574 ; SKX-NEXT: ## %bb.2: ## %exit
3575 ; SKX-NEXT: popq %rax
3576 ; SKX-NEXT: vzeroupper
3578 ; SKX-NEXT: LBB72_1: ## %bar
3579 ; SKX-NEXT: vzeroupper
3580 ; SKX-NEXT: callq _foo
3581 ; SKX-NEXT: popq %rax
3584 ; AVX512BW-LABEL: ktest_3:
3585 ; AVX512BW: ## %bb.0:
3586 ; AVX512BW-NEXT: pushq %rax
3587 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
3588 ; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
3589 ; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
3590 ; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
3591 ; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
3592 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
3593 ; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1
3594 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k2
3595 ; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k3
3596 ; AVX512BW-NEXT: korw %k1, %k0, %k0
3597 ; AVX512BW-NEXT: korw %k3, %k2, %k1
3598 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3599 ; AVX512BW-NEXT: kmovd %k0, %eax
3600 ; AVX512BW-NEXT: testb %al, %al
3601 ; AVX512BW-NEXT: je LBB72_1
3602 ; AVX512BW-NEXT: ## %bb.2: ## %exit
3603 ; AVX512BW-NEXT: popq %rax
3604 ; AVX512BW-NEXT: vzeroupper
3605 ; AVX512BW-NEXT: retq
3606 ; AVX512BW-NEXT: LBB72_1: ## %bar
3607 ; AVX512BW-NEXT: vzeroupper
3608 ; AVX512BW-NEXT: callq _foo
3609 ; AVX512BW-NEXT: popq %rax
3610 ; AVX512BW-NEXT: retq
3612 ; AVX512DQ-LABEL: ktest_3:
3613 ; AVX512DQ: ## %bb.0:
3614 ; AVX512DQ-NEXT: pushq %rax
3615 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
3616 ; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
3617 ; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
3618 ; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
3619 ; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
3620 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
3621 ; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1
3622 ; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k2
3623 ; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k3
3624 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
3625 ; AVX512DQ-NEXT: korb %k3, %k2, %k1
3626 ; AVX512DQ-NEXT: ktestb %k1, %k0
3627 ; AVX512DQ-NEXT: je LBB72_1
3628 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
3629 ; AVX512DQ-NEXT: popq %rax
3630 ; AVX512DQ-NEXT: vzeroupper
3631 ; AVX512DQ-NEXT: retq
3632 ; AVX512DQ-NEXT: LBB72_1: ## %bar
3633 ; AVX512DQ-NEXT: vzeroupper
3634 ; AVX512DQ-NEXT: callq _foo
3635 ; AVX512DQ-NEXT: popq %rax
3636 ; AVX512DQ-NEXT: retq
3638 ; X86-LABEL: ktest_3:
3640 ; X86-NEXT: subl $12, %esp
3641 ; X86-NEXT: .cfi_def_cfa_offset 16
3642 ; X86-NEXT: vptestnmd %ymm0, %ymm0, %k0
3643 ; X86-NEXT: vptestnmd %ymm1, %ymm1, %k1
3644 ; X86-NEXT: korb %k1, %k0, %k0
3645 ; X86-NEXT: vptestnmd %ymm2, %ymm2, %k1
3646 ; X86-NEXT: vptestnmd %ymm3, %ymm3, %k2
3647 ; X86-NEXT: korb %k2, %k1, %k1
3648 ; X86-NEXT: ktestb %k1, %k0
3649 ; X86-NEXT: je LBB72_1
3650 ; X86-NEXT: ## %bb.2: ## %exit
3651 ; X86-NEXT: addl $12, %esp
3652 ; X86-NEXT: vzeroupper
3654 ; X86-NEXT: LBB72_1: ## %bar
3655 ; X86-NEXT: vzeroupper
3656 ; X86-NEXT: calll _foo
3657 ; X86-NEXT: addl $12, %esp
3659 %a = icmp eq <8 x i32> %w, zeroinitializer
3660 %b = icmp eq <8 x i32> %x, zeroinitializer
3661 %c = icmp eq <8 x i32> %y, zeroinitializer
3662 %d = icmp eq <8 x i32> %z, zeroinitializer
3663 %e = or <8 x i1> %a, %b
3664 %f = or <8 x i1> %c, %d
3665 %g = and <8 x i1> %e, %f
3666 %h = bitcast <8 x i1> %g to i8
3667 %i = icmp eq i8 %h, 0
3668 br i1 %i, label %bar, label %exit
3678 define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
3679 ; KNL-LABEL: ktest_4:
3681 ; KNL-NEXT: pushq %rax
3682 ; KNL-NEXT: .cfi_def_cfa_offset 16
3683 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
3684 ; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
3685 ; KNL-NEXT: vptestnmq %zmm2, %zmm2, %k2
3686 ; KNL-NEXT: vptestnmq %zmm3, %zmm3, %k3
3687 ; KNL-NEXT: korw %k1, %k0, %k0
3688 ; KNL-NEXT: korw %k3, %k2, %k1
3689 ; KNL-NEXT: kandw %k1, %k0, %k0
3690 ; KNL-NEXT: kmovw %k0, %eax
3691 ; KNL-NEXT: testb %al, %al
3692 ; KNL-NEXT: je LBB73_1
3693 ; KNL-NEXT: ## %bb.2: ## %exit
3694 ; KNL-NEXT: popq %rax
3695 ; KNL-NEXT: vzeroupper
3697 ; KNL-NEXT: LBB73_1: ## %bar
3698 ; KNL-NEXT: vzeroupper
3699 ; KNL-NEXT: callq _foo
3700 ; KNL-NEXT: popq %rax
3703 ; SKX-LABEL: ktest_4:
3705 ; SKX-NEXT: pushq %rax
3706 ; SKX-NEXT: .cfi_def_cfa_offset 16
3707 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
3708 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1
3709 ; SKX-NEXT: korb %k1, %k0, %k0
3710 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1
3711 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k2
3712 ; SKX-NEXT: korb %k2, %k1, %k1
3713 ; SKX-NEXT: ktestb %k1, %k0
3714 ; SKX-NEXT: je LBB73_1
3715 ; SKX-NEXT: ## %bb.2: ## %exit
3716 ; SKX-NEXT: popq %rax
3717 ; SKX-NEXT: vzeroupper
3719 ; SKX-NEXT: LBB73_1: ## %bar
3720 ; SKX-NEXT: vzeroupper
3721 ; SKX-NEXT: callq _foo
3722 ; SKX-NEXT: popq %rax
3725 ; AVX512BW-LABEL: ktest_4:
3726 ; AVX512BW: ## %bb.0:
3727 ; AVX512BW-NEXT: pushq %rax
3728 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
3729 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
3730 ; AVX512BW-NEXT: vptestnmq %zmm1, %zmm1, %k1
3731 ; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k2
3732 ; AVX512BW-NEXT: vptestnmq %zmm3, %zmm3, %k3
3733 ; AVX512BW-NEXT: korw %k1, %k0, %k0
3734 ; AVX512BW-NEXT: korw %k3, %k2, %k1
3735 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3736 ; AVX512BW-NEXT: kmovd %k0, %eax
3737 ; AVX512BW-NEXT: testb %al, %al
3738 ; AVX512BW-NEXT: je LBB73_1
3739 ; AVX512BW-NEXT: ## %bb.2: ## %exit
3740 ; AVX512BW-NEXT: popq %rax
3741 ; AVX512BW-NEXT: vzeroupper
3742 ; AVX512BW-NEXT: retq
3743 ; AVX512BW-NEXT: LBB73_1: ## %bar
3744 ; AVX512BW-NEXT: vzeroupper
3745 ; AVX512BW-NEXT: callq _foo
3746 ; AVX512BW-NEXT: popq %rax
3747 ; AVX512BW-NEXT: retq
3749 ; AVX512DQ-LABEL: ktest_4:
3750 ; AVX512DQ: ## %bb.0:
3751 ; AVX512DQ-NEXT: pushq %rax
3752 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
3753 ; AVX512DQ-NEXT: vptestnmq %zmm0, %zmm0, %k0
3754 ; AVX512DQ-NEXT: vptestnmq %zmm1, %zmm1, %k1
3755 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
3756 ; AVX512DQ-NEXT: vptestnmq %zmm2, %zmm2, %k1
3757 ; AVX512DQ-NEXT: vptestnmq %zmm3, %zmm3, %k2
3758 ; AVX512DQ-NEXT: korb %k2, %k1, %k1
3759 ; AVX512DQ-NEXT: ktestb %k1, %k0
3760 ; AVX512DQ-NEXT: je LBB73_1
3761 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
3762 ; AVX512DQ-NEXT: popq %rax
3763 ; AVX512DQ-NEXT: vzeroupper
3764 ; AVX512DQ-NEXT: retq
3765 ; AVX512DQ-NEXT: LBB73_1: ## %bar
3766 ; AVX512DQ-NEXT: vzeroupper
3767 ; AVX512DQ-NEXT: callq _foo
3768 ; AVX512DQ-NEXT: popq %rax
3769 ; AVX512DQ-NEXT: retq
3771 ; X86-LABEL: ktest_4:
3773 ; X86-NEXT: subl $12, %esp
3774 ; X86-NEXT: .cfi_def_cfa_offset 16
3775 ; X86-NEXT: vptestnmq %zmm0, %zmm0, %k0
3776 ; X86-NEXT: vptestnmq %zmm1, %zmm1, %k1
3777 ; X86-NEXT: korb %k1, %k0, %k0
3778 ; X86-NEXT: vptestnmq %zmm2, %zmm2, %k1
3779 ; X86-NEXT: vptestnmq %zmm3, %zmm3, %k2
3780 ; X86-NEXT: korb %k2, %k1, %k1
3781 ; X86-NEXT: ktestb %k1, %k0
3782 ; X86-NEXT: je LBB73_1
3783 ; X86-NEXT: ## %bb.2: ## %exit
3784 ; X86-NEXT: addl $12, %esp
3785 ; X86-NEXT: vzeroupper
3787 ; X86-NEXT: LBB73_1: ## %bar
3788 ; X86-NEXT: vzeroupper
3789 ; X86-NEXT: calll _foo
3790 ; X86-NEXT: addl $12, %esp
3792 %a = icmp eq <8 x i64> %w, zeroinitializer
3793 %b = icmp eq <8 x i64> %x, zeroinitializer
3794 %c = icmp eq <8 x i64> %y, zeroinitializer
3795 %d = icmp eq <8 x i64> %z, zeroinitializer
3796 %e = or <8 x i1> %a, %b
3797 %f = or <8 x i1> %c, %d
3798 %g = and <8 x i1> %e, %f
3799 %h = bitcast <8 x i1> %g to i8
3800 %i = icmp eq i8 %h, 0
3801 br i1 %i, label %bar, label %exit
3811 define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) {
3812 ; KNL-LABEL: ktest_5:
3814 ; KNL-NEXT: pushq %rax
3815 ; KNL-NEXT: .cfi_def_cfa_offset 16
3816 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
3817 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1
3818 ; KNL-NEXT: korw %k1, %k0, %k0
3819 ; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k1
3820 ; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k2
3821 ; KNL-NEXT: korw %k2, %k1, %k1
3822 ; KNL-NEXT: kandw %k1, %k0, %k0
3823 ; KNL-NEXT: kortestw %k0, %k0
3824 ; KNL-NEXT: je LBB74_1
3825 ; KNL-NEXT: ## %bb.2: ## %exit
3826 ; KNL-NEXT: popq %rax
3827 ; KNL-NEXT: vzeroupper
3829 ; KNL-NEXT: LBB74_1: ## %bar
3830 ; KNL-NEXT: vzeroupper
3831 ; KNL-NEXT: callq _foo
3832 ; KNL-NEXT: popq %rax
3835 ; SKX-LABEL: ktest_5:
3837 ; SKX-NEXT: pushq %rax
3838 ; SKX-NEXT: .cfi_def_cfa_offset 16
3839 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
3840 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1
3841 ; SKX-NEXT: korw %k1, %k0, %k0
3842 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1
3843 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k2
3844 ; SKX-NEXT: korw %k2, %k1, %k1
3845 ; SKX-NEXT: ktestw %k1, %k0
3846 ; SKX-NEXT: je LBB74_1
3847 ; SKX-NEXT: ## %bb.2: ## %exit
3848 ; SKX-NEXT: popq %rax
3849 ; SKX-NEXT: vzeroupper
3851 ; SKX-NEXT: LBB74_1: ## %bar
3852 ; SKX-NEXT: vzeroupper
3853 ; SKX-NEXT: callq _foo
3854 ; SKX-NEXT: popq %rax
3857 ; AVX512BW-LABEL: ktest_5:
3858 ; AVX512BW: ## %bb.0:
3859 ; AVX512BW-NEXT: pushq %rax
3860 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
3861 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
3862 ; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1
3863 ; AVX512BW-NEXT: korw %k1, %k0, %k0
3864 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k1
3865 ; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k2
3866 ; AVX512BW-NEXT: korw %k2, %k1, %k1
3867 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3868 ; AVX512BW-NEXT: kortestw %k0, %k0
3869 ; AVX512BW-NEXT: je LBB74_1
3870 ; AVX512BW-NEXT: ## %bb.2: ## %exit
3871 ; AVX512BW-NEXT: popq %rax
3872 ; AVX512BW-NEXT: vzeroupper
3873 ; AVX512BW-NEXT: retq
3874 ; AVX512BW-NEXT: LBB74_1: ## %bar
3875 ; AVX512BW-NEXT: vzeroupper
3876 ; AVX512BW-NEXT: callq _foo
3877 ; AVX512BW-NEXT: popq %rax
3878 ; AVX512BW-NEXT: retq
3880 ; AVX512DQ-LABEL: ktest_5:
3881 ; AVX512DQ: ## %bb.0:
3882 ; AVX512DQ-NEXT: pushq %rax
3883 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
3884 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
3885 ; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1
3886 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3887 ; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k1
3888 ; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k2
3889 ; AVX512DQ-NEXT: korw %k2, %k1, %k1
3890 ; AVX512DQ-NEXT: ktestw %k1, %k0
3891 ; AVX512DQ-NEXT: je LBB74_1
3892 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
3893 ; AVX512DQ-NEXT: popq %rax
3894 ; AVX512DQ-NEXT: vzeroupper
3895 ; AVX512DQ-NEXT: retq
3896 ; AVX512DQ-NEXT: LBB74_1: ## %bar
3897 ; AVX512DQ-NEXT: vzeroupper
3898 ; AVX512DQ-NEXT: callq _foo
3899 ; AVX512DQ-NEXT: popq %rax
3900 ; AVX512DQ-NEXT: retq
3902 ; X86-LABEL: ktest_5:
3904 ; X86-NEXT: subl $12, %esp
3905 ; X86-NEXT: .cfi_def_cfa_offset 16
3906 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
3907 ; X86-NEXT: vptestnmd %zmm1, %zmm1, %k1
3908 ; X86-NEXT: korw %k1, %k0, %k0
3909 ; X86-NEXT: vptestnmd %zmm2, %zmm2, %k1
3910 ; X86-NEXT: vptestnmd %zmm3, %zmm3, %k2
3911 ; X86-NEXT: korw %k2, %k1, %k1
3912 ; X86-NEXT: ktestw %k1, %k0
3913 ; X86-NEXT: je LBB74_1
3914 ; X86-NEXT: ## %bb.2: ## %exit
3915 ; X86-NEXT: addl $12, %esp
3916 ; X86-NEXT: vzeroupper
3918 ; X86-NEXT: LBB74_1: ## %bar
3919 ; X86-NEXT: vzeroupper
3920 ; X86-NEXT: calll _foo
3921 ; X86-NEXT: addl $12, %esp
3923 %a = icmp eq <16 x i32> %w, zeroinitializer
3924 %b = icmp eq <16 x i32> %x, zeroinitializer
3925 %c = icmp eq <16 x i32> %y, zeroinitializer
3926 %d = icmp eq <16 x i32> %z, zeroinitializer
3927 %e = or <16 x i1> %a, %b
3928 %f = or <16 x i1> %c, %d
3929 %g = and <16 x i1> %e, %f
3930 %h = bitcast <16 x i1> %g to i16
3931 %i = icmp eq i16 %h, 0
3932 br i1 %i, label %bar, label %exit
3942 define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z) {
3943 ; KNL-LABEL: ktest_6:
3945 ; KNL-NEXT: pushq %rax
3946 ; KNL-NEXT: .cfi_def_cfa_offset 16
3947 ; KNL-NEXT: vextracti64x4 $1, %zmm3, %ymm4
3948 ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm5
3949 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm6
3950 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm7
3951 ; KNL-NEXT: vpxor %xmm8, %xmm8, %xmm8
3952 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm0
3953 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm7, %ymm7
3954 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm1, %ymm1
3955 ; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0
3956 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm6, %ymm1
3957 ; KNL-NEXT: vpor %ymm1, %ymm7, %ymm1
3958 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm2, %ymm2
3959 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm5, %ymm5
3960 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm3, %ymm3
3961 ; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2
3962 ; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
3963 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm4, %ymm2
3964 ; KNL-NEXT: vpor %ymm2, %ymm5, %ymm2
3965 ; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
3966 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3967 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3968 ; KNL-NEXT: kmovw %k0, %eax
3969 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm0
3970 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3971 ; KNL-NEXT: kmovw %k0, %ecx
3972 ; KNL-NEXT: shll $16, %ecx
3973 ; KNL-NEXT: orl %eax, %ecx
3974 ; KNL-NEXT: je LBB75_1
3975 ; KNL-NEXT: ## %bb.2: ## %exit
3976 ; KNL-NEXT: popq %rax
3977 ; KNL-NEXT: vzeroupper
3979 ; KNL-NEXT: LBB75_1: ## %bar
3980 ; KNL-NEXT: vzeroupper
3981 ; KNL-NEXT: callq _foo
3982 ; KNL-NEXT: popq %rax
3985 ; SKX-LABEL: ktest_6:
3987 ; SKX-NEXT: pushq %rax
3988 ; SKX-NEXT: .cfi_def_cfa_offset 16
3989 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k0
3990 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1
3991 ; SKX-NEXT: kord %k1, %k0, %k0
3992 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1
3993 ; SKX-NEXT: vptestnmw %zmm3, %zmm3, %k2
3994 ; SKX-NEXT: kord %k2, %k1, %k1
3995 ; SKX-NEXT: ktestd %k1, %k0
3996 ; SKX-NEXT: je LBB75_1
3997 ; SKX-NEXT: ## %bb.2: ## %exit
3998 ; SKX-NEXT: popq %rax
3999 ; SKX-NEXT: vzeroupper
4001 ; SKX-NEXT: LBB75_1: ## %bar
4002 ; SKX-NEXT: vzeroupper
4003 ; SKX-NEXT: callq _foo
4004 ; SKX-NEXT: popq %rax
4007 ; AVX512BW-LABEL: ktest_6:
4008 ; AVX512BW: ## %bb.0:
4009 ; AVX512BW-NEXT: pushq %rax
4010 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4011 ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
4012 ; AVX512BW-NEXT: vptestnmw %zmm1, %zmm1, %k1
4013 ; AVX512BW-NEXT: kord %k1, %k0, %k0
4014 ; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1
4015 ; AVX512BW-NEXT: vptestnmw %zmm3, %zmm3, %k2
4016 ; AVX512BW-NEXT: kord %k2, %k1, %k1
4017 ; AVX512BW-NEXT: ktestd %k1, %k0
4018 ; AVX512BW-NEXT: je LBB75_1
4019 ; AVX512BW-NEXT: ## %bb.2: ## %exit
4020 ; AVX512BW-NEXT: popq %rax
4021 ; AVX512BW-NEXT: vzeroupper
4022 ; AVX512BW-NEXT: retq
4023 ; AVX512BW-NEXT: LBB75_1: ## %bar
4024 ; AVX512BW-NEXT: vzeroupper
4025 ; AVX512BW-NEXT: callq _foo
4026 ; AVX512BW-NEXT: popq %rax
4027 ; AVX512BW-NEXT: retq
4029 ; AVX512DQ-LABEL: ktest_6:
4030 ; AVX512DQ: ## %bb.0:
4031 ; AVX512DQ-NEXT: pushq %rax
4032 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4033 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm4
4034 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm5
4035 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm6
4036 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm7
4037 ; AVX512DQ-NEXT: vpxor %xmm8, %xmm8, %xmm8
4038 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm0
4039 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm7, %ymm7
4040 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm1, %ymm1
4041 ; AVX512DQ-NEXT: vpor %ymm1, %ymm0, %ymm0
4042 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm6, %ymm1
4043 ; AVX512DQ-NEXT: vpor %ymm1, %ymm7, %ymm1
4044 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm2, %ymm2
4045 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm5, %ymm5
4046 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm3, %ymm3
4047 ; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2
4048 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
4049 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm4, %ymm2
4050 ; AVX512DQ-NEXT: vpor %ymm2, %ymm5, %ymm2
4051 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
4052 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
4053 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
4054 ; AVX512DQ-NEXT: kmovw %k0, %eax
4055 ; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0
4056 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
4057 ; AVX512DQ-NEXT: kmovw %k0, %ecx
4058 ; AVX512DQ-NEXT: shll $16, %ecx
4059 ; AVX512DQ-NEXT: orl %eax, %ecx
4060 ; AVX512DQ-NEXT: je LBB75_1
4061 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
4062 ; AVX512DQ-NEXT: popq %rax
4063 ; AVX512DQ-NEXT: vzeroupper
4064 ; AVX512DQ-NEXT: retq
4065 ; AVX512DQ-NEXT: LBB75_1: ## %bar
4066 ; AVX512DQ-NEXT: vzeroupper
4067 ; AVX512DQ-NEXT: callq _foo
4068 ; AVX512DQ-NEXT: popq %rax
4069 ; AVX512DQ-NEXT: retq
4071 ; X86-LABEL: ktest_6:
4073 ; X86-NEXT: subl $12, %esp
4074 ; X86-NEXT: .cfi_def_cfa_offset 16
4075 ; X86-NEXT: vptestnmw %zmm0, %zmm0, %k0
4076 ; X86-NEXT: vptestnmw %zmm1, %zmm1, %k1
4077 ; X86-NEXT: kord %k1, %k0, %k0
4078 ; X86-NEXT: vptestnmw %zmm2, %zmm2, %k1
4079 ; X86-NEXT: vptestnmw %zmm3, %zmm3, %k2
4080 ; X86-NEXT: kord %k2, %k1, %k1
4081 ; X86-NEXT: ktestd %k1, %k0
4082 ; X86-NEXT: je LBB75_1
4083 ; X86-NEXT: ## %bb.2: ## %exit
4084 ; X86-NEXT: addl $12, %esp
4085 ; X86-NEXT: vzeroupper
4087 ; X86-NEXT: LBB75_1: ## %bar
4088 ; X86-NEXT: vzeroupper
4089 ; X86-NEXT: calll _foo
4090 ; X86-NEXT: addl $12, %esp
4092 %a = icmp eq <32 x i16> %w, zeroinitializer
4093 %b = icmp eq <32 x i16> %x, zeroinitializer
4094 %c = icmp eq <32 x i16> %y, zeroinitializer
4095 %d = icmp eq <32 x i16> %z, zeroinitializer
4096 %e = or <32 x i1> %a, %b
4097 %f = or <32 x i1> %c, %d
4098 %g = and <32 x i1> %e, %f
4099 %h = bitcast <32 x i1> %g to i32
4100 %i = icmp eq i32 %h, 0
4101 br i1 %i, label %bar, label %exit
4111 define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
4112 ; KNL-LABEL: ktest_7:
4114 ; KNL-NEXT: pushq %rax
4115 ; KNL-NEXT: .cfi_def_cfa_offset 16
4116 ; KNL-NEXT: vextracti64x4 $1, %zmm3, %ymm9
4117 ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm10
4118 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm11
4119 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm7
4120 ; KNL-NEXT: vpxor %xmm8, %xmm8, %xmm8
4121 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm13
4122 ; KNL-NEXT: vextracti128 $1, %ymm13, %xmm4
4123 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm7, %ymm7
4124 ; KNL-NEXT: vextracti128 $1, %ymm7, %xmm5
4125 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm1, %ymm1
4126 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm6
4127 ; KNL-NEXT: vpor %xmm6, %xmm4, %xmm12
4128 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm11, %ymm6
4129 ; KNL-NEXT: vextracti128 $1, %ymm6, %xmm4
4130 ; KNL-NEXT: vpor %xmm4, %xmm5, %xmm11
4131 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm2
4132 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm5
4133 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm10, %ymm10
4134 ; KNL-NEXT: vextracti128 $1, %ymm10, %xmm4
4135 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm3, %ymm3
4136 ; KNL-NEXT: vextracti128 $1, %ymm3, %xmm0
4137 ; KNL-NEXT: vpor %xmm0, %xmm5, %xmm0
4138 ; KNL-NEXT: vpand %xmm0, %xmm12, %xmm12
4139 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm9, %ymm5
4140 ; KNL-NEXT: vextracti128 $1, %ymm5, %xmm0
4141 ; KNL-NEXT: vpor %xmm0, %xmm4, %xmm0
4142 ; KNL-NEXT: vpand %xmm0, %xmm11, %xmm0
4143 ; KNL-NEXT: vpor %xmm6, %xmm7, %xmm4
4144 ; KNL-NEXT: vpor %xmm1, %xmm13, %xmm1
4145 ; KNL-NEXT: vpor %xmm5, %xmm10, %xmm5
4146 ; KNL-NEXT: vpand %xmm5, %xmm4, %xmm4
4147 ; KNL-NEXT: vpor %xmm3, %xmm2, %xmm2
4148 ; KNL-NEXT: vpand %xmm2, %xmm1, %xmm1
4149 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
4150 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
4151 ; KNL-NEXT: kmovw %k0, %eax
4152 ; KNL-NEXT: vpmovsxbd %xmm12, %zmm1
4153 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
4154 ; KNL-NEXT: kmovw %k0, %ecx
4155 ; KNL-NEXT: shll $16, %ecx
4156 ; KNL-NEXT: orl %eax, %ecx
4157 ; KNL-NEXT: vpmovsxbd %xmm4, %zmm1
4158 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
4159 ; KNL-NEXT: kmovw %k0, %eax
4160 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
4161 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
4162 ; KNL-NEXT: kmovw %k0, %edx
4163 ; KNL-NEXT: shll $16, %edx
4164 ; KNL-NEXT: orl %eax, %edx
4165 ; KNL-NEXT: shlq $32, %rdx
4166 ; KNL-NEXT: orq %rcx, %rdx
4167 ; KNL-NEXT: je LBB76_1
4168 ; KNL-NEXT: ## %bb.2: ## %exit
4169 ; KNL-NEXT: popq %rax
4170 ; KNL-NEXT: vzeroupper
4172 ; KNL-NEXT: LBB76_1: ## %bar
4173 ; KNL-NEXT: vzeroupper
4174 ; KNL-NEXT: callq _foo
4175 ; KNL-NEXT: popq %rax
4178 ; SKX-LABEL: ktest_7:
4180 ; SKX-NEXT: pushq %rax
4181 ; SKX-NEXT: .cfi_def_cfa_offset 16
4182 ; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k0
4183 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1
4184 ; SKX-NEXT: korq %k1, %k0, %k0
4185 ; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1
4186 ; SKX-NEXT: vptestnmb %zmm3, %zmm3, %k2
4187 ; SKX-NEXT: korq %k2, %k1, %k1
4188 ; SKX-NEXT: ktestq %k1, %k0
4189 ; SKX-NEXT: je LBB76_1
4190 ; SKX-NEXT: ## %bb.2: ## %exit
4191 ; SKX-NEXT: popq %rax
4192 ; SKX-NEXT: vzeroupper
4194 ; SKX-NEXT: LBB76_1: ## %bar
4195 ; SKX-NEXT: vzeroupper
4196 ; SKX-NEXT: callq _foo
4197 ; SKX-NEXT: popq %rax
4200 ; AVX512BW-LABEL: ktest_7:
4201 ; AVX512BW: ## %bb.0:
4202 ; AVX512BW-NEXT: pushq %rax
4203 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4204 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
4205 ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
4206 ; AVX512BW-NEXT: korq %k1, %k0, %k0
4207 ; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1
4208 ; AVX512BW-NEXT: vptestnmb %zmm3, %zmm3, %k2
4209 ; AVX512BW-NEXT: korq %k2, %k1, %k1
4210 ; AVX512BW-NEXT: ktestq %k1, %k0
4211 ; AVX512BW-NEXT: je LBB76_1
4212 ; AVX512BW-NEXT: ## %bb.2: ## %exit
4213 ; AVX512BW-NEXT: popq %rax
4214 ; AVX512BW-NEXT: vzeroupper
4215 ; AVX512BW-NEXT: retq
4216 ; AVX512BW-NEXT: LBB76_1: ## %bar
4217 ; AVX512BW-NEXT: vzeroupper
4218 ; AVX512BW-NEXT: callq _foo
4219 ; AVX512BW-NEXT: popq %rax
4220 ; AVX512BW-NEXT: retq
4222 ; AVX512DQ-LABEL: ktest_7:
4223 ; AVX512DQ: ## %bb.0:
4224 ; AVX512DQ-NEXT: pushq %rax
4225 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4226 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm9
4227 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm10
4228 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm11
4229 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm7
4230 ; AVX512DQ-NEXT: vpxor %xmm8, %xmm8, %xmm8
4231 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm13
4232 ; AVX512DQ-NEXT: vextracti128 $1, %ymm13, %xmm4
4233 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm7, %ymm7
4234 ; AVX512DQ-NEXT: vextracti128 $1, %ymm7, %xmm5
4235 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm1, %ymm1
4236 ; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm6
4237 ; AVX512DQ-NEXT: vpor %xmm6, %xmm4, %xmm12
4238 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm11, %ymm6
4239 ; AVX512DQ-NEXT: vextracti128 $1, %ymm6, %xmm4
4240 ; AVX512DQ-NEXT: vpor %xmm4, %xmm5, %xmm11
4241 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm2
4242 ; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm5
4243 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm10, %ymm10
4244 ; AVX512DQ-NEXT: vextracti128 $1, %ymm10, %xmm4
4245 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm3, %ymm3
4246 ; AVX512DQ-NEXT: vextracti128 $1, %ymm3, %xmm0
4247 ; AVX512DQ-NEXT: vpor %xmm0, %xmm5, %xmm0
4248 ; AVX512DQ-NEXT: vpand %xmm0, %xmm12, %xmm12
4249 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm9, %ymm5
4250 ; AVX512DQ-NEXT: vextracti128 $1, %ymm5, %xmm0
4251 ; AVX512DQ-NEXT: vpor %xmm0, %xmm4, %xmm0
4252 ; AVX512DQ-NEXT: vpand %xmm0, %xmm11, %xmm0
4253 ; AVX512DQ-NEXT: vpor %xmm6, %xmm7, %xmm4
4254 ; AVX512DQ-NEXT: vpor %xmm1, %xmm13, %xmm1
4255 ; AVX512DQ-NEXT: vpor %xmm5, %xmm10, %xmm5
4256 ; AVX512DQ-NEXT: vpand %xmm5, %xmm4, %xmm4
4257 ; AVX512DQ-NEXT: vpor %xmm3, %xmm2, %xmm2
4258 ; AVX512DQ-NEXT: vpand %xmm2, %xmm1, %xmm1
4259 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1
4260 ; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0
4261 ; AVX512DQ-NEXT: kmovw %k0, %eax
4262 ; AVX512DQ-NEXT: vpmovsxbd %xmm12, %zmm1
4263 ; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0
4264 ; AVX512DQ-NEXT: kmovw %k0, %ecx
4265 ; AVX512DQ-NEXT: shll $16, %ecx
4266 ; AVX512DQ-NEXT: orl %eax, %ecx
4267 ; AVX512DQ-NEXT: vpmovsxbd %xmm4, %zmm1
4268 ; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0
4269 ; AVX512DQ-NEXT: kmovw %k0, %eax
4270 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
4271 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
4272 ; AVX512DQ-NEXT: kmovw %k0, %edx
4273 ; AVX512DQ-NEXT: shll $16, %edx
4274 ; AVX512DQ-NEXT: orl %eax, %edx
4275 ; AVX512DQ-NEXT: shlq $32, %rdx
4276 ; AVX512DQ-NEXT: orq %rcx, %rdx
4277 ; AVX512DQ-NEXT: je LBB76_1
4278 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
4279 ; AVX512DQ-NEXT: popq %rax
4280 ; AVX512DQ-NEXT: vzeroupper
4281 ; AVX512DQ-NEXT: retq
4282 ; AVX512DQ-NEXT: LBB76_1: ## %bar
4283 ; AVX512DQ-NEXT: vzeroupper
4284 ; AVX512DQ-NEXT: callq _foo
4285 ; AVX512DQ-NEXT: popq %rax
4286 ; AVX512DQ-NEXT: retq
4288 ; X86-LABEL: ktest_7:
4290 ; X86-NEXT: subl $12, %esp
4291 ; X86-NEXT: .cfi_def_cfa_offset 16
4292 ; X86-NEXT: vptestnmb %zmm0, %zmm0, %k0
4293 ; X86-NEXT: vptestnmb %zmm1, %zmm1, %k1
4294 ; X86-NEXT: korq %k1, %k0, %k0
4295 ; X86-NEXT: vptestnmb %zmm2, %zmm2, %k1
4296 ; X86-NEXT: vptestnmb %zmm3, %zmm3, %k2
4297 ; X86-NEXT: korq %k2, %k1, %k1
4298 ; X86-NEXT: kandq %k1, %k0, %k0
4299 ; X86-NEXT: kshiftrq $32, %k0, %k1
4300 ; X86-NEXT: kortestd %k1, %k0
4301 ; X86-NEXT: je LBB76_1
4302 ; X86-NEXT: ## %bb.2: ## %exit
4303 ; X86-NEXT: addl $12, %esp
4304 ; X86-NEXT: vzeroupper
4306 ; X86-NEXT: LBB76_1: ## %bar
4307 ; X86-NEXT: vzeroupper
4308 ; X86-NEXT: calll _foo
4309 ; X86-NEXT: addl $12, %esp
4311 %a = icmp eq <64 x i8> %w, zeroinitializer
4312 %b = icmp eq <64 x i8> %x, zeroinitializer
4313 %c = icmp eq <64 x i8> %y, zeroinitializer
4314 %d = icmp eq <64 x i8> %z, zeroinitializer
4315 %e = or <64 x i1> %a, %b
4316 %f = or <64 x i1> %c, %d
4317 %g = and <64 x i1> %e, %f
4318 %h = bitcast <64 x i1> %g to i64
4319 %i = icmp eq i64 %h, 0
4320 br i1 %i, label %bar, label %exit