1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
3 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
4 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW
5 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ
6 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86
9 define i16 @mask16(i16 %x) {
10 ; CHECK-LABEL: mask16:
12 ; CHECK-NEXT: movl %edi, %eax
13 ; CHECK-NEXT: notl %eax
14 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
19 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
21 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
23 %m0 = bitcast i16 %x to <16 x i1>
24 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
25 %ret = bitcast <16 x i1> %m1 to i16
29 define i32 @mask16_zext(i16 %x) {
30 ; CHECK-LABEL: mask16_zext:
32 ; CHECK-NEXT: notl %edi
33 ; CHECK-NEXT: movzwl %di, %eax
36 ; X86-LABEL: mask16_zext:
38 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
39 ; X86-NEXT: xorl $65535, %eax ## imm = 0xFFFF
41 %m0 = bitcast i16 %x to <16 x i1>
42 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
43 %m2 = bitcast <16 x i1> %m1 to i16
44 %ret = zext i16 %m2 to i32
48 define i8 @mask8(i8 %x) {
51 ; CHECK-NEXT: movl %edi, %eax
52 ; CHECK-NEXT: notb %al
53 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
58 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
61 %m0 = bitcast i8 %x to <8 x i1>
62 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
63 %ret = bitcast <8 x i1> %m1 to i8
67 define i32 @mask8_zext(i8 %x) {
68 ; CHECK-LABEL: mask8_zext:
70 ; CHECK-NEXT: notb %dil
71 ; CHECK-NEXT: movzbl %dil, %eax
74 ; X86-LABEL: mask8_zext:
76 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
78 ; X86-NEXT: movzbl %al, %eax
80 %m0 = bitcast i8 %x to <8 x i1>
81 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
82 %m2 = bitcast <8 x i1> %m1 to i8
83 %ret = zext i8 %m2 to i32
87 define void @mask16_mem(i16* %ptr) {
88 ; CHECK-LABEL: mask16_mem:
90 ; CHECK-NEXT: kmovw (%rdi), %k0
91 ; CHECK-NEXT: knotw %k0, %k0
92 ; CHECK-NEXT: kmovw %k0, (%rdi)
95 ; X86-LABEL: mask16_mem:
97 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
98 ; X86-NEXT: kmovw (%eax), %k0
99 ; X86-NEXT: knotw %k0, %k0
100 ; X86-NEXT: kmovw %k0, (%eax)
102 %x = load i16, i16* %ptr, align 4
103 %m0 = bitcast i16 %x to <16 x i1>
104 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
105 %ret = bitcast <16 x i1> %m1 to i16
106 store i16 %ret, i16* %ptr, align 4
110 define void @mask8_mem(i8* %ptr) {
111 ; KNL-LABEL: mask8_mem:
113 ; KNL-NEXT: notb (%rdi)
116 ; SKX-LABEL: mask8_mem:
118 ; SKX-NEXT: kmovb (%rdi), %k0
119 ; SKX-NEXT: knotb %k0, %k0
120 ; SKX-NEXT: kmovb %k0, (%rdi)
123 ; AVX512BW-LABEL: mask8_mem:
124 ; AVX512BW: ## %bb.0:
125 ; AVX512BW-NEXT: notb (%rdi)
126 ; AVX512BW-NEXT: retq
128 ; AVX512DQ-LABEL: mask8_mem:
129 ; AVX512DQ: ## %bb.0:
130 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
131 ; AVX512DQ-NEXT: knotb %k0, %k0
132 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
133 ; AVX512DQ-NEXT: retq
135 ; X86-LABEL: mask8_mem:
137 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
138 ; X86-NEXT: kmovb (%eax), %k0
139 ; X86-NEXT: knotb %k0, %k0
140 ; X86-NEXT: kmovb %k0, (%eax)
142 %x = load i8, i8* %ptr, align 4
143 %m0 = bitcast i8 %x to <8 x i1>
144 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
145 %ret = bitcast <8 x i1> %m1 to i8
146 store i8 %ret, i8* %ptr, align 4
150 define i16 @mand16(i16 %x, i16 %y) {
151 ; CHECK-LABEL: mand16:
153 ; CHECK-NEXT: movl %edi, %eax
154 ; CHECK-NEXT: movl %edi, %ecx
155 ; CHECK-NEXT: andl %esi, %ecx
156 ; CHECK-NEXT: xorl %esi, %eax
157 ; CHECK-NEXT: orl %ecx, %eax
158 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
163 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
164 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
165 ; X86-NEXT: movl %eax, %edx
166 ; X86-NEXT: andl %ecx, %edx
167 ; X86-NEXT: xorl %ecx, %eax
168 ; X86-NEXT: orl %edx, %eax
169 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
171 %ma = bitcast i16 %x to <16 x i1>
172 %mb = bitcast i16 %y to <16 x i1>
173 %mc = and <16 x i1> %ma, %mb
174 %md = xor <16 x i1> %ma, %mb
175 %me = or <16 x i1> %mc, %md
176 %ret = bitcast <16 x i1> %me to i16
180 define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
181 ; KNL-LABEL: mand16_mem:
183 ; KNL-NEXT: kmovw (%rdi), %k0
184 ; KNL-NEXT: kmovw (%rsi), %k1
185 ; KNL-NEXT: kandw %k1, %k0, %k2
186 ; KNL-NEXT: kxorw %k1, %k0, %k0
187 ; KNL-NEXT: korw %k0, %k2, %k0
188 ; KNL-NEXT: kmovw %k0, %eax
189 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
192 ; SKX-LABEL: mand16_mem:
194 ; SKX-NEXT: kmovw (%rdi), %k0
195 ; SKX-NEXT: kmovw (%rsi), %k1
196 ; SKX-NEXT: kandw %k1, %k0, %k2
197 ; SKX-NEXT: kxorw %k1, %k0, %k0
198 ; SKX-NEXT: korw %k0, %k2, %k0
199 ; SKX-NEXT: kmovd %k0, %eax
200 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
203 ; AVX512BW-LABEL: mand16_mem:
204 ; AVX512BW: ## %bb.0:
205 ; AVX512BW-NEXT: kmovw (%rdi), %k0
206 ; AVX512BW-NEXT: kmovw (%rsi), %k1
207 ; AVX512BW-NEXT: kandw %k1, %k0, %k2
208 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
209 ; AVX512BW-NEXT: korw %k0, %k2, %k0
210 ; AVX512BW-NEXT: kmovd %k0, %eax
211 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
212 ; AVX512BW-NEXT: retq
214 ; AVX512DQ-LABEL: mand16_mem:
215 ; AVX512DQ: ## %bb.0:
216 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
217 ; AVX512DQ-NEXT: kmovw (%rsi), %k1
218 ; AVX512DQ-NEXT: kandw %k1, %k0, %k2
219 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
220 ; AVX512DQ-NEXT: korw %k0, %k2, %k0
221 ; AVX512DQ-NEXT: kmovw %k0, %eax
222 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
223 ; AVX512DQ-NEXT: retq
225 ; X86-LABEL: mand16_mem:
227 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
228 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
229 ; X86-NEXT: kmovw (%ecx), %k0
230 ; X86-NEXT: kmovw (%eax), %k1
231 ; X86-NEXT: kandw %k1, %k0, %k2
232 ; X86-NEXT: kxorw %k1, %k0, %k0
233 ; X86-NEXT: korw %k0, %k2, %k0
234 ; X86-NEXT: kmovd %k0, %eax
235 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
237 %ma = load <16 x i1>, <16 x i1>* %x
238 %mb = load <16 x i1>, <16 x i1>* %y
239 %mc = and <16 x i1> %ma, %mb
240 %md = xor <16 x i1> %ma, %mb
241 %me = or <16 x i1> %mc, %md
242 %ret = bitcast <16 x i1> %me to i16
246 define i8 @shuf_test1(i16 %v) nounwind {
247 ; KNL-LABEL: shuf_test1:
249 ; KNL-NEXT: kmovw %edi, %k0
250 ; KNL-NEXT: kshiftrw $8, %k0, %k0
251 ; KNL-NEXT: kmovw %k0, %eax
252 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
255 ; SKX-LABEL: shuf_test1:
257 ; SKX-NEXT: kmovd %edi, %k0
258 ; SKX-NEXT: kshiftrw $8, %k0, %k0
259 ; SKX-NEXT: kmovd %k0, %eax
260 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
263 ; AVX512BW-LABEL: shuf_test1:
264 ; AVX512BW: ## %bb.0:
265 ; AVX512BW-NEXT: kmovd %edi, %k0
266 ; AVX512BW-NEXT: kshiftrw $8, %k0, %k0
267 ; AVX512BW-NEXT: kmovd %k0, %eax
268 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
269 ; AVX512BW-NEXT: retq
271 ; AVX512DQ-LABEL: shuf_test1:
272 ; AVX512DQ: ## %bb.0:
273 ; AVX512DQ-NEXT: kmovw %edi, %k0
274 ; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0
275 ; AVX512DQ-NEXT: kmovw %k0, %eax
276 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
277 ; AVX512DQ-NEXT: retq
279 ; X86-LABEL: shuf_test1:
281 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
283 %v1 = bitcast i16 %v to <16 x i1>
284 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
285 %mask1 = bitcast <8 x i1> %mask to i8
289 define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
290 ; KNL-LABEL: zext_test1:
292 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
293 ; KNL-NEXT: kshiftrw $5, %k0, %k0
294 ; KNL-NEXT: kmovw %k0, %eax
295 ; KNL-NEXT: andl $1, %eax
296 ; KNL-NEXT: vzeroupper
299 ; SKX-LABEL: zext_test1:
301 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
302 ; SKX-NEXT: kshiftrw $5, %k0, %k0
303 ; SKX-NEXT: kmovd %k0, %eax
304 ; SKX-NEXT: andl $1, %eax
305 ; SKX-NEXT: vzeroupper
308 ; AVX512BW-LABEL: zext_test1:
309 ; AVX512BW: ## %bb.0:
310 ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
311 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
312 ; AVX512BW-NEXT: kmovd %k0, %eax
313 ; AVX512BW-NEXT: andl $1, %eax
314 ; AVX512BW-NEXT: vzeroupper
315 ; AVX512BW-NEXT: retq
317 ; AVX512DQ-LABEL: zext_test1:
318 ; AVX512DQ: ## %bb.0:
319 ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
320 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
321 ; AVX512DQ-NEXT: kmovw %k0, %eax
322 ; AVX512DQ-NEXT: andl $1, %eax
323 ; AVX512DQ-NEXT: vzeroupper
324 ; AVX512DQ-NEXT: retq
326 ; X86-LABEL: zext_test1:
328 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
329 ; X86-NEXT: kshiftrw $5, %k0, %k0
330 ; X86-NEXT: kmovd %k0, %eax
331 ; X86-NEXT: andl $1, %eax
332 ; X86-NEXT: vzeroupper
334 %cmp_res = icmp ugt <16 x i32> %a, %b
335 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
336 %res = zext i1 %cmp_res.i1 to i32
340 define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
341 ; KNL-LABEL: zext_test2:
343 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
344 ; KNL-NEXT: kshiftrw $5, %k0, %k0
345 ; KNL-NEXT: kmovw %k0, %eax
346 ; KNL-NEXT: andl $1, %eax
347 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
348 ; KNL-NEXT: vzeroupper
351 ; SKX-LABEL: zext_test2:
353 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
354 ; SKX-NEXT: kshiftrw $5, %k0, %k0
355 ; SKX-NEXT: kmovd %k0, %eax
356 ; SKX-NEXT: andl $1, %eax
357 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
358 ; SKX-NEXT: vzeroupper
361 ; AVX512BW-LABEL: zext_test2:
362 ; AVX512BW: ## %bb.0:
363 ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
364 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
365 ; AVX512BW-NEXT: kmovd %k0, %eax
366 ; AVX512BW-NEXT: andl $1, %eax
367 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
368 ; AVX512BW-NEXT: vzeroupper
369 ; AVX512BW-NEXT: retq
371 ; AVX512DQ-LABEL: zext_test2:
372 ; AVX512DQ: ## %bb.0:
373 ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
374 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
375 ; AVX512DQ-NEXT: kmovw %k0, %eax
376 ; AVX512DQ-NEXT: andl $1, %eax
377 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
378 ; AVX512DQ-NEXT: vzeroupper
379 ; AVX512DQ-NEXT: retq
381 ; X86-LABEL: zext_test2:
383 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
384 ; X86-NEXT: kshiftrw $5, %k0, %k0
385 ; X86-NEXT: kmovd %k0, %eax
386 ; X86-NEXT: andl $1, %eax
387 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
388 ; X86-NEXT: vzeroupper
390 %cmp_res = icmp ugt <16 x i32> %a, %b
391 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
392 %res = zext i1 %cmp_res.i1 to i16
396 define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
397 ; KNL-LABEL: zext_test3:
399 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
400 ; KNL-NEXT: kshiftrw $5, %k0, %k0
401 ; KNL-NEXT: kmovw %k0, %eax
402 ; KNL-NEXT: andb $1, %al
403 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
404 ; KNL-NEXT: vzeroupper
407 ; SKX-LABEL: zext_test3:
409 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
410 ; SKX-NEXT: kshiftrw $5, %k0, %k0
411 ; SKX-NEXT: kmovd %k0, %eax
412 ; SKX-NEXT: andb $1, %al
413 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
414 ; SKX-NEXT: vzeroupper
417 ; AVX512BW-LABEL: zext_test3:
418 ; AVX512BW: ## %bb.0:
419 ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
420 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
421 ; AVX512BW-NEXT: kmovd %k0, %eax
422 ; AVX512BW-NEXT: andb $1, %al
423 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
424 ; AVX512BW-NEXT: vzeroupper
425 ; AVX512BW-NEXT: retq
427 ; AVX512DQ-LABEL: zext_test3:
428 ; AVX512DQ: ## %bb.0:
429 ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
430 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
431 ; AVX512DQ-NEXT: kmovw %k0, %eax
432 ; AVX512DQ-NEXT: andb $1, %al
433 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
434 ; AVX512DQ-NEXT: vzeroupper
435 ; AVX512DQ-NEXT: retq
437 ; X86-LABEL: zext_test3:
439 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
440 ; X86-NEXT: kshiftrw $5, %k0, %k0
441 ; X86-NEXT: kmovd %k0, %eax
442 ; X86-NEXT: andb $1, %al
443 ; X86-NEXT: ## kill: def $al killed $al killed $eax
444 ; X86-NEXT: vzeroupper
446 %cmp_res = icmp ugt <16 x i32> %a, %b
447 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
448 %res = zext i1 %cmp_res.i1 to i8
452 define i8 @conv1(<8 x i1>* %R) {
453 ; CHECK-LABEL: conv1:
454 ; CHECK: ## %bb.0: ## %entry
455 ; CHECK-NEXT: movb $-1, (%rdi)
456 ; CHECK-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
457 ; CHECK-NEXT: movb $-2, %al
461 ; X86: ## %bb.0: ## %entry
462 ; X86-NEXT: subl $12, %esp
463 ; X86-NEXT: .cfi_def_cfa_offset 16
464 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
465 ; X86-NEXT: movb $-1, (%eax)
466 ; X86-NEXT: movb $-2, (%esp)
467 ; X86-NEXT: movb $-2, %al
468 ; X86-NEXT: addl $12, %esp
471 store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
473 %maskPtr = alloca <8 x i1>
474 store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
475 %mask = load <8 x i1>, <8 x i1>* %maskPtr
476 %mask_convert = bitcast <8 x i1> %mask to i8
480 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
483 ; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
484 ; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
485 ; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
486 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
487 ; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1
488 ; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
489 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
490 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
491 ; KNL-NEXT: vzeroupper
496 ; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1
497 ; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
498 ; SKX-NEXT: vpmovm2d %k0, %xmm0
499 ; SKX-NEXT: vzeroupper
502 ; AVX512BW-LABEL: test4:
503 ; AVX512BW: ## %bb.0:
504 ; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
505 ; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
506 ; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
507 ; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
508 ; AVX512BW-NEXT: vpcmpleq %zmm1, %zmm0, %k1
509 ; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
510 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
511 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
512 ; AVX512BW-NEXT: vzeroupper
513 ; AVX512BW-NEXT: retq
515 ; AVX512DQ-LABEL: test4:
516 ; AVX512DQ: ## %bb.0:
517 ; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
518 ; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
519 ; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
520 ; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
521 ; AVX512DQ-NEXT: vpcmpleq %zmm1, %zmm0, %k1
522 ; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
523 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
524 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
525 ; AVX512DQ-NEXT: vzeroupper
526 ; AVX512DQ-NEXT: retq
530 ; X86-NEXT: vpcmpleq %ymm1, %ymm0, %k1
531 ; X86-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
532 ; X86-NEXT: vpmovm2d %k0, %xmm0
533 ; X86-NEXT: vzeroupper
535 %x_gt_y = icmp sgt <4 x i64> %x, %y
536 %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
537 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
538 %resse = sext <4 x i1>%res to <4 x i32>
542 define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
545 ; KNL-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
546 ; KNL-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
547 ; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
548 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
549 ; KNL-NEXT: vpcmpleq %zmm3, %zmm2, %k1
550 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
551 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
552 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
553 ; KNL-NEXT: vzeroupper
558 ; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1
559 ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
560 ; SKX-NEXT: vpmovm2q %k0, %xmm0
563 ; AVX512BW-LABEL: test5:
564 ; AVX512BW: ## %bb.0:
565 ; AVX512BW-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
566 ; AVX512BW-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
567 ; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
568 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
569 ; AVX512BW-NEXT: vpcmpleq %zmm3, %zmm2, %k1
570 ; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
571 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
572 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
573 ; AVX512BW-NEXT: vzeroupper
574 ; AVX512BW-NEXT: retq
576 ; AVX512DQ-LABEL: test5:
577 ; AVX512DQ: ## %bb.0:
578 ; AVX512DQ-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
579 ; AVX512DQ-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
580 ; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
581 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
582 ; AVX512DQ-NEXT: vpcmpleq %zmm3, %zmm2, %k1
583 ; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 {%k1}
584 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
585 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
586 ; AVX512DQ-NEXT: vzeroupper
587 ; AVX512DQ-NEXT: retq
591 ; X86-NEXT: vpcmpleq %xmm3, %xmm2, %k1
592 ; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
593 ; X86-NEXT: vpmovm2q %k0, %xmm0
595 %x_gt_y = icmp slt <2 x i64> %x, %y
596 %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
597 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
598 %resse = sext <2 x i1>%res to <2 x i64>
600 }define void @test6(<16 x i1> %mask) {
602 %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
603 %b = bitcast <16 x i1> %a to i16
604 %c = icmp eq i16 %b, 0
605 br i1 %c, label %true, label %false
613 define void @test7(<8 x i1> %mask) {
615 ; KNL: ## %bb.0: ## %allocas
616 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
617 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
618 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
619 ; KNL-NEXT: kmovw %k0, %eax
620 ; KNL-NEXT: orb $85, %al
621 ; KNL-NEXT: vzeroupper
625 ; SKX: ## %bb.0: ## %allocas
626 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
627 ; SKX-NEXT: vpmovw2m %xmm0, %k0
628 ; SKX-NEXT: kmovd %k0, %eax
629 ; SKX-NEXT: orb $85, %al
632 ; AVX512BW-LABEL: test7:
633 ; AVX512BW: ## %bb.0: ## %allocas
634 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
635 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
636 ; AVX512BW-NEXT: kmovd %k0, %eax
637 ; AVX512BW-NEXT: orb $85, %al
638 ; AVX512BW-NEXT: vzeroupper
639 ; AVX512BW-NEXT: retq
641 ; AVX512DQ-LABEL: test7:
642 ; AVX512DQ: ## %bb.0: ## %allocas
643 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
644 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
645 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
646 ; AVX512DQ-NEXT: kmovw %k0, %eax
647 ; AVX512DQ-NEXT: orb $85, %al
648 ; AVX512DQ-NEXT: vzeroupper
649 ; AVX512DQ-NEXT: retq
652 ; X86: ## %bb.0: ## %allocas
653 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
654 ; X86-NEXT: vpmovw2m %xmm0, %k0
655 ; X86-NEXT: kmovd %k0, %eax
656 ; X86-NEXT: orb $85, %al
659 %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
660 %b = bitcast <8 x i1> %a to i8
661 %c = icmp eq i8 %b, 0
662 br i1 %c, label %true, label %false
670 define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
673 ; KNL-NEXT: cmpl %esi, %edi
674 ; KNL-NEXT: jg LBB17_1
675 ; KNL-NEXT: ## %bb.2:
676 ; KNL-NEXT: kxorw %k0, %k0, %k1
677 ; KNL-NEXT: jmp LBB17_3
679 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
680 ; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
682 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
683 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
684 ; KNL-NEXT: vzeroupper
689 ; SKX-NEXT: cmpl %esi, %edi
690 ; SKX-NEXT: jg LBB17_1
691 ; SKX-NEXT: ## %bb.2:
692 ; SKX-NEXT: kxorw %k0, %k0, %k0
693 ; SKX-NEXT: vpmovm2b %k0, %xmm0
694 ; SKX-NEXT: vzeroupper
697 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
698 ; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
699 ; SKX-NEXT: vpmovm2b %k0, %xmm0
700 ; SKX-NEXT: vzeroupper
703 ; AVX512BW-LABEL: test8:
704 ; AVX512BW: ## %bb.0:
705 ; AVX512BW-NEXT: cmpl %esi, %edi
706 ; AVX512BW-NEXT: jg LBB17_1
707 ; AVX512BW-NEXT: ## %bb.2:
708 ; AVX512BW-NEXT: kxorw %k0, %k0, %k0
709 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
710 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
711 ; AVX512BW-NEXT: vzeroupper
712 ; AVX512BW-NEXT: retq
713 ; AVX512BW-NEXT: LBB17_1:
714 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
715 ; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
716 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
717 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
718 ; AVX512BW-NEXT: vzeroupper
719 ; AVX512BW-NEXT: retq
721 ; AVX512DQ-LABEL: test8:
722 ; AVX512DQ: ## %bb.0:
723 ; AVX512DQ-NEXT: cmpl %esi, %edi
724 ; AVX512DQ-NEXT: jg LBB17_1
725 ; AVX512DQ-NEXT: ## %bb.2:
726 ; AVX512DQ-NEXT: kxorw %k0, %k0, %k0
727 ; AVX512DQ-NEXT: jmp LBB17_3
728 ; AVX512DQ-NEXT: LBB17_1:
729 ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
730 ; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
731 ; AVX512DQ-NEXT: LBB17_3:
732 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
733 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
734 ; AVX512DQ-NEXT: vzeroupper
735 ; AVX512DQ-NEXT: retq
739 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
740 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
741 ; X86-NEXT: jg LBB17_1
742 ; X86-NEXT: ## %bb.2:
743 ; X86-NEXT: kxorw %k0, %k0, %k0
744 ; X86-NEXT: vpmovm2b %k0, %xmm0
745 ; X86-NEXT: vzeroupper
748 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
749 ; X86-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
750 ; X86-NEXT: vpmovm2b %k0, %xmm0
751 ; X86-NEXT: vzeroupper
753 %cond = icmp sgt i32 %a1, %b1
754 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
755 %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
756 %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
757 %res = sext <16 x i1> %mix to <16 x i8>
760 define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
763 ; KNL-NEXT: cmpl %esi, %edi
764 ; KNL-NEXT: jg LBB18_1
765 ; KNL-NEXT: ## %bb.2:
766 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
767 ; KNL-NEXT: jmp LBB18_3
769 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
771 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
772 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
773 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
774 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
775 ; KNL-NEXT: vzeroupper
780 ; SKX-NEXT: cmpl %esi, %edi
781 ; SKX-NEXT: jg LBB18_1
782 ; SKX-NEXT: ## %bb.2:
783 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0
784 ; SKX-NEXT: jmp LBB18_3
786 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
788 ; SKX-NEXT: vpmovb2m %xmm0, %k0
789 ; SKX-NEXT: vpmovm2b %k0, %xmm0
792 ; AVX512BW-LABEL: test9:
793 ; AVX512BW: ## %bb.0:
794 ; AVX512BW-NEXT: cmpl %esi, %edi
795 ; AVX512BW-NEXT: jg LBB18_1
796 ; AVX512BW-NEXT: ## %bb.2:
797 ; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0
798 ; AVX512BW-NEXT: jmp LBB18_3
799 ; AVX512BW-NEXT: LBB18_1:
800 ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
801 ; AVX512BW-NEXT: LBB18_3:
802 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
803 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
804 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
805 ; AVX512BW-NEXT: vzeroupper
806 ; AVX512BW-NEXT: retq
808 ; AVX512DQ-LABEL: test9:
809 ; AVX512DQ: ## %bb.0:
810 ; AVX512DQ-NEXT: cmpl %esi, %edi
811 ; AVX512DQ-NEXT: jg LBB18_1
812 ; AVX512DQ-NEXT: ## %bb.2:
813 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
814 ; AVX512DQ-NEXT: jmp LBB18_3
815 ; AVX512DQ-NEXT: LBB18_1:
816 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
817 ; AVX512DQ-NEXT: LBB18_3:
818 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
819 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
820 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
821 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
822 ; AVX512DQ-NEXT: vzeroupper
823 ; AVX512DQ-NEXT: retq
827 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
828 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
829 ; X86-NEXT: jg LBB18_1
830 ; X86-NEXT: ## %bb.2:
831 ; X86-NEXT: vpsllw $7, %xmm1, %xmm0
832 ; X86-NEXT: jmp LBB18_3
834 ; X86-NEXT: vpsllw $7, %xmm0, %xmm0
836 ; X86-NEXT: vpmovb2m %xmm0, %k0
837 ; X86-NEXT: vpmovm2b %k0, %xmm0
839 %mask = icmp sgt i32 %a1, %b1
840 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
842 }define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
843 %mask = icmp sgt i32 %a1, %b1
844 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
848 define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
851 ; KNL-NEXT: cmpl %esi, %edi
852 ; KNL-NEXT: jg LBB20_1
853 ; KNL-NEXT: ## %bb.2:
854 ; KNL-NEXT: vpslld $31, %xmm1, %xmm0
855 ; KNL-NEXT: jmp LBB20_3
857 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
859 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
860 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
861 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
862 ; KNL-NEXT: vzeroupper
867 ; SKX-NEXT: cmpl %esi, %edi
868 ; SKX-NEXT: jg LBB20_1
869 ; SKX-NEXT: ## %bb.2:
870 ; SKX-NEXT: vpslld $31, %xmm1, %xmm0
871 ; SKX-NEXT: jmp LBB20_3
873 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
875 ; SKX-NEXT: vpmovd2m %xmm0, %k0
876 ; SKX-NEXT: vpmovm2d %k0, %xmm0
879 ; AVX512BW-LABEL: test11:
880 ; AVX512BW: ## %bb.0:
881 ; AVX512BW-NEXT: cmpl %esi, %edi
882 ; AVX512BW-NEXT: jg LBB20_1
883 ; AVX512BW-NEXT: ## %bb.2:
884 ; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0
885 ; AVX512BW-NEXT: jmp LBB20_3
886 ; AVX512BW-NEXT: LBB20_1:
887 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
888 ; AVX512BW-NEXT: LBB20_3:
889 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1
890 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
891 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
892 ; AVX512BW-NEXT: vzeroupper
893 ; AVX512BW-NEXT: retq
895 ; AVX512DQ-LABEL: test11:
896 ; AVX512DQ: ## %bb.0:
897 ; AVX512DQ-NEXT: cmpl %esi, %edi
898 ; AVX512DQ-NEXT: jg LBB20_1
899 ; AVX512DQ-NEXT: ## %bb.2:
900 ; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm0
901 ; AVX512DQ-NEXT: jmp LBB20_3
902 ; AVX512DQ-NEXT: LBB20_1:
903 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
904 ; AVX512DQ-NEXT: LBB20_3:
905 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
906 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
907 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
908 ; AVX512DQ-NEXT: vzeroupper
909 ; AVX512DQ-NEXT: retq
913 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
914 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
915 ; X86-NEXT: jg LBB20_1
916 ; X86-NEXT: ## %bb.2:
917 ; X86-NEXT: vpslld $31, %xmm1, %xmm0
918 ; X86-NEXT: jmp LBB20_3
920 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
922 ; X86-NEXT: vpmovd2m %xmm0, %k0
923 ; X86-NEXT: vpmovm2d %k0, %xmm0
925 %mask = icmp sgt i32 %a1, %b1
926 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
930 define i32 @test12(i32 %x, i32 %y) {
931 ; CHECK-LABEL: test12:
933 ; CHECK-NEXT: movl %edi, %eax
938 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
940 %a = bitcast i16 21845 to <16 x i1>
941 %b = extractelement <16 x i1> %a, i32 0
942 %c = select i1 %b, i32 %x, i32 %y
946 define i32 @test13(i32 %x, i32 %y) {
947 ; CHECK-LABEL: test13:
949 ; CHECK-NEXT: movl %esi, %eax
954 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
956 %a = bitcast i16 21845 to <16 x i1>
957 %b = extractelement <16 x i1> %a, i32 3
958 %c = select i1 %b, i32 %x, i32 %y
962 ; Make sure we don't crash on a large vector.
963 define i32 @test13_crash(i32 %x, i32 %y) {
964 ; CHECK-LABEL: test13_crash:
966 ; CHECK-NEXT: movl %edi, %eax
969 ; X86-LABEL: test13_crash:
971 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
973 %a = bitcast i128 2184568686868686868686868686 to <128 x i1>
974 %b = extractelement <128 x i1> %a, i32 3
975 %c = select i1 %b, i32 %x, i32 %y
979 define <4 x i1> @test14() {
980 ; CHECK-LABEL: test14:
982 ; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,0,1]
987 ; X86-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,0,1]
989 %a = bitcast i16 21845 to <16 x i1>
990 %b = extractelement <16 x i1> %a, i32 2
991 %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
995 define <16 x i1> @test15(i32 %x, i32 %y) {
998 ; KNL-NEXT: cmpl %esi, %edi
999 ; KNL-NEXT: movl $21845, %eax ## imm = 0x5555
1000 ; KNL-NEXT: movl $1, %ecx
1001 ; KNL-NEXT: cmovgl %eax, %ecx
1002 ; KNL-NEXT: kmovw %ecx, %k1
1003 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1004 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1005 ; KNL-NEXT: vzeroupper
1008 ; SKX-LABEL: test15:
1010 ; SKX-NEXT: cmpl %esi, %edi
1011 ; SKX-NEXT: movl $21845, %eax ## imm = 0x5555
1012 ; SKX-NEXT: movl $1, %ecx
1013 ; SKX-NEXT: cmovgl %eax, %ecx
1014 ; SKX-NEXT: kmovd %ecx, %k0
1015 ; SKX-NEXT: vpmovm2b %k0, %xmm0
1018 ; AVX512BW-LABEL: test15:
1019 ; AVX512BW: ## %bb.0:
1020 ; AVX512BW-NEXT: cmpl %esi, %edi
1021 ; AVX512BW-NEXT: movl $21845, %eax ## imm = 0x5555
1022 ; AVX512BW-NEXT: movl $1, %ecx
1023 ; AVX512BW-NEXT: cmovgl %eax, %ecx
1024 ; AVX512BW-NEXT: kmovd %ecx, %k0
1025 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
1026 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
1027 ; AVX512BW-NEXT: vzeroupper
1028 ; AVX512BW-NEXT: retq
1030 ; AVX512DQ-LABEL: test15:
1031 ; AVX512DQ: ## %bb.0:
1032 ; AVX512DQ-NEXT: cmpl %esi, %edi
1033 ; AVX512DQ-NEXT: movl $21845, %eax ## imm = 0x5555
1034 ; AVX512DQ-NEXT: movl $1, %ecx
1035 ; AVX512DQ-NEXT: cmovgl %eax, %ecx
1036 ; AVX512DQ-NEXT: kmovw %ecx, %k0
1037 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1038 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1039 ; AVX512DQ-NEXT: vzeroupper
1040 ; AVX512DQ-NEXT: retq
1042 ; X86-LABEL: test15:
1044 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1045 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
1046 ; X86-NEXT: movl $21845, %eax ## imm = 0x5555
1047 ; X86-NEXT: movl $1, %ecx
1048 ; X86-NEXT: cmovgl %eax, %ecx
1049 ; X86-NEXT: kmovd %ecx, %k0
1050 ; X86-NEXT: vpmovm2b %k0, %xmm0
1052 %a = bitcast i16 21845 to <16 x i1>
1053 %b = bitcast i16 1 to <16 x i1>
1054 %mask = icmp sgt i32 %x, %y
1055 %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
1059 define <64 x i8> @test16(i64 %x) {
1061 ; KNL-LABEL: test16:
1063 ; KNL-NEXT: movq %rdi, %rax
1064 ; KNL-NEXT: movl %edi, %ecx
1065 ; KNL-NEXT: kmovw %edi, %k0
1066 ; KNL-NEXT: shrq $32, %rdi
1067 ; KNL-NEXT: shrq $48, %rax
1068 ; KNL-NEXT: shrl $16, %ecx
1069 ; KNL-NEXT: kmovw %ecx, %k1
1070 ; KNL-NEXT: kmovw %eax, %k2
1071 ; KNL-NEXT: kmovw %edi, %k3
1072 ; KNL-NEXT: kshiftrw $5, %k0, %k4
1073 ; KNL-NEXT: kxnorw %k0, %k0, %k5
1074 ; KNL-NEXT: kxorw %k5, %k4, %k4
1075 ; KNL-NEXT: kshiftlw $15, %k4, %k4
1076 ; KNL-NEXT: kshiftrw $10, %k4, %k4
1077 ; KNL-NEXT: kxorw %k4, %k0, %k4
1078 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
1079 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1080 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
1081 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1082 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
1083 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
1084 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1085 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
1086 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
1087 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1090 ; SKX-LABEL: test16:
1092 ; SKX-NEXT: kmovq %rdi, %k0
1093 ; SKX-NEXT: kxnorw %k0, %k0, %k1
1094 ; SKX-NEXT: kshiftrq $5, %k0, %k2
1095 ; SKX-NEXT: kxorq %k1, %k2, %k1
1096 ; SKX-NEXT: kshiftlq $63, %k1, %k1
1097 ; SKX-NEXT: kshiftrq $58, %k1, %k1
1098 ; SKX-NEXT: kxorq %k1, %k0, %k0
1099 ; SKX-NEXT: vpmovm2b %k0, %zmm0
1102 ; AVX512BW-LABEL: test16:
1103 ; AVX512BW: ## %bb.0:
1104 ; AVX512BW-NEXT: kmovq %rdi, %k0
1105 ; AVX512BW-NEXT: kxnorw %k0, %k0, %k1
1106 ; AVX512BW-NEXT: kshiftrq $5, %k0, %k2
1107 ; AVX512BW-NEXT: kxorq %k1, %k2, %k1
1108 ; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
1109 ; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
1110 ; AVX512BW-NEXT: kxorq %k1, %k0, %k0
1111 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
1112 ; AVX512BW-NEXT: retq
1114 ; AVX512DQ-LABEL: test16:
1115 ; AVX512DQ: ## %bb.0:
1116 ; AVX512DQ-NEXT: movq %rdi, %rax
1117 ; AVX512DQ-NEXT: movl %edi, %ecx
1118 ; AVX512DQ-NEXT: kmovw %edi, %k0
1119 ; AVX512DQ-NEXT: shrq $32, %rdi
1120 ; AVX512DQ-NEXT: shrq $48, %rax
1121 ; AVX512DQ-NEXT: shrl $16, %ecx
1122 ; AVX512DQ-NEXT: kmovw %ecx, %k1
1123 ; AVX512DQ-NEXT: kmovw %eax, %k2
1124 ; AVX512DQ-NEXT: kmovw %edi, %k3
1125 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k4
1126 ; AVX512DQ-NEXT: kxnorw %k0, %k0, %k5
1127 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
1128 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
1129 ; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
1130 ; AVX512DQ-NEXT: kxorw %k4, %k0, %k0
1131 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0
1132 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1133 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
1134 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1135 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
1136 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1137 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1138 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2
1139 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
1140 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1141 ; AVX512DQ-NEXT: retq
1143 ; X86-LABEL: test16:
1145 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
1146 ; X86-NEXT: kshiftrq $5, %k0, %k1
1147 ; X86-NEXT: kxnorw %k0, %k0, %k2
1148 ; X86-NEXT: kxorq %k2, %k1, %k1
1149 ; X86-NEXT: kshiftlq $63, %k1, %k1
1150 ; X86-NEXT: kshiftrq $58, %k1, %k1
1151 ; X86-NEXT: kxorq %k1, %k0, %k0
1152 ; X86-NEXT: vpmovm2b %k0, %zmm0
1154 %a = bitcast i64 %x to <64 x i1>
1155 %b = insertelement <64 x i1>%a, i1 true, i32 5
1156 %c = sext <64 x i1>%b to <64 x i8>
1160 define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
1162 ; KNL-LABEL: test17:
1164 ; KNL-NEXT: movq %rdi, %rax
1165 ; KNL-NEXT: movl %edi, %ecx
1166 ; KNL-NEXT: kmovw %edi, %k0
1167 ; KNL-NEXT: shrq $32, %rdi
1168 ; KNL-NEXT: shrq $48, %rax
1169 ; KNL-NEXT: shrl $16, %ecx
1170 ; KNL-NEXT: kmovw %ecx, %k1
1171 ; KNL-NEXT: kmovw %eax, %k2
1172 ; KNL-NEXT: kmovw %edi, %k3
1173 ; KNL-NEXT: cmpl %edx, %esi
1174 ; KNL-NEXT: setg %al
1175 ; KNL-NEXT: kshiftrw $5, %k0, %k4
1176 ; KNL-NEXT: kmovw %eax, %k5
1177 ; KNL-NEXT: kxorw %k5, %k4, %k4
1178 ; KNL-NEXT: kshiftlw $15, %k4, %k4
1179 ; KNL-NEXT: kshiftrw $10, %k4, %k4
1180 ; KNL-NEXT: kxorw %k4, %k0, %k4
1181 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
1182 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1183 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
1184 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1185 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
1186 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
1187 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1188 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
1189 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
1190 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1193 ; SKX-LABEL: test17:
1195 ; SKX-NEXT: kmovq %rdi, %k0
1196 ; SKX-NEXT: cmpl %edx, %esi
1197 ; SKX-NEXT: setg %al
1198 ; SKX-NEXT: kmovd %eax, %k1
1199 ; SKX-NEXT: kshiftrq $5, %k0, %k2
1200 ; SKX-NEXT: kxorq %k1, %k2, %k1
1201 ; SKX-NEXT: kshiftlq $63, %k1, %k1
1202 ; SKX-NEXT: kshiftrq $58, %k1, %k1
1203 ; SKX-NEXT: kxorq %k1, %k0, %k0
1204 ; SKX-NEXT: vpmovm2b %k0, %zmm0
1207 ; AVX512BW-LABEL: test17:
1208 ; AVX512BW: ## %bb.0:
1209 ; AVX512BW-NEXT: kmovq %rdi, %k0
1210 ; AVX512BW-NEXT: cmpl %edx, %esi
1211 ; AVX512BW-NEXT: setg %al
1212 ; AVX512BW-NEXT: kmovd %eax, %k1
1213 ; AVX512BW-NEXT: kshiftrq $5, %k0, %k2
1214 ; AVX512BW-NEXT: kxorq %k1, %k2, %k1
1215 ; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
1216 ; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
1217 ; AVX512BW-NEXT: kxorq %k1, %k0, %k0
1218 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
1219 ; AVX512BW-NEXT: retq
1221 ; AVX512DQ-LABEL: test17:
1222 ; AVX512DQ: ## %bb.0:
1223 ; AVX512DQ-NEXT: movq %rdi, %rax
1224 ; AVX512DQ-NEXT: movl %edi, %ecx
1225 ; AVX512DQ-NEXT: kmovw %edi, %k0
1226 ; AVX512DQ-NEXT: shrq $32, %rdi
1227 ; AVX512DQ-NEXT: shrq $48, %rax
1228 ; AVX512DQ-NEXT: shrl $16, %ecx
1229 ; AVX512DQ-NEXT: kmovw %ecx, %k1
1230 ; AVX512DQ-NEXT: kmovw %eax, %k2
1231 ; AVX512DQ-NEXT: kmovw %edi, %k3
1232 ; AVX512DQ-NEXT: cmpl %edx, %esi
1233 ; AVX512DQ-NEXT: setg %al
1234 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k4
1235 ; AVX512DQ-NEXT: kmovw %eax, %k5
1236 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
1237 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
1238 ; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
1239 ; AVX512DQ-NEXT: kxorw %k4, %k0, %k0
1240 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0
1241 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1242 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
1243 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1244 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
1245 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1246 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1247 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2
1248 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
1249 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1250 ; AVX512DQ-NEXT: retq
1252 ; X86-LABEL: test17:
1254 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1255 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
1256 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
1257 ; X86-NEXT: setg %al
1258 ; X86-NEXT: kmovd %eax, %k1
1259 ; X86-NEXT: kshiftrq $5, %k0, %k2
1260 ; X86-NEXT: kxorq %k1, %k2, %k1
1261 ; X86-NEXT: kshiftlq $63, %k1, %k1
1262 ; X86-NEXT: kshiftrq $58, %k1, %k1
1263 ; X86-NEXT: kxorq %k1, %k0, %k0
1264 ; X86-NEXT: vpmovm2b %k0, %zmm0
1266 %a = bitcast i64 %x to <64 x i1>
1267 %b = icmp sgt i32 %y, %z
1268 %c = insertelement <64 x i1>%a, i1 %b, i32 5
1269 %d = sext <64 x i1>%c to <64 x i8>
1273 define <8 x i1> @test18(i8 %a, i16 %y) {
1274 ; KNL-LABEL: test18:
1276 ; KNL-NEXT: kmovw %edi, %k0
1277 ; KNL-NEXT: kmovw %esi, %k1
1278 ; KNL-NEXT: kshiftrw $8, %k1, %k2
1279 ; KNL-NEXT: kshiftrw $9, %k1, %k1
1280 ; KNL-NEXT: kshiftrw $6, %k0, %k3
1281 ; KNL-NEXT: kxorw %k1, %k3, %k1
1282 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1283 ; KNL-NEXT: kshiftrw $9, %k1, %k1
1284 ; KNL-NEXT: kxorw %k1, %k0, %k0
1285 ; KNL-NEXT: kshiftlw $9, %k0, %k0
1286 ; KNL-NEXT: kshiftrw $9, %k0, %k0
1287 ; KNL-NEXT: kshiftlw $7, %k2, %k1
1288 ; KNL-NEXT: korw %k1, %k0, %k1
1289 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1290 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
1291 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
1292 ; KNL-NEXT: vzeroupper
1295 ; SKX-LABEL: test18:
1297 ; SKX-NEXT: kmovd %edi, %k0
1298 ; SKX-NEXT: kmovd %esi, %k1
1299 ; SKX-NEXT: kshiftrw $8, %k1, %k2
1300 ; SKX-NEXT: kshiftrw $9, %k1, %k1
1301 ; SKX-NEXT: kshiftrb $6, %k0, %k3
1302 ; SKX-NEXT: kxorb %k1, %k3, %k1
1303 ; SKX-NEXT: kshiftlb $7, %k1, %k1
1304 ; SKX-NEXT: kshiftrb $1, %k1, %k1
1305 ; SKX-NEXT: kxorb %k1, %k0, %k0
1306 ; SKX-NEXT: kshiftlb $1, %k0, %k0
1307 ; SKX-NEXT: kshiftrb $1, %k0, %k0
1308 ; SKX-NEXT: kshiftlb $7, %k2, %k1
1309 ; SKX-NEXT: korb %k1, %k0, %k0
1310 ; SKX-NEXT: vpmovm2w %k0, %xmm0
1313 ; AVX512BW-LABEL: test18:
1314 ; AVX512BW: ## %bb.0:
1315 ; AVX512BW-NEXT: kmovd %edi, %k0
1316 ; AVX512BW-NEXT: kmovd %esi, %k1
1317 ; AVX512BW-NEXT: kshiftrw $8, %k1, %k2
1318 ; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
1319 ; AVX512BW-NEXT: kshiftrw $6, %k0, %k3
1320 ; AVX512BW-NEXT: kxorw %k1, %k3, %k1
1321 ; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
1322 ; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
1323 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
1324 ; AVX512BW-NEXT: kshiftlw $9, %k0, %k0
1325 ; AVX512BW-NEXT: kshiftrw $9, %k0, %k0
1326 ; AVX512BW-NEXT: kshiftlw $7, %k2, %k1
1327 ; AVX512BW-NEXT: korw %k1, %k0, %k0
1328 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
1329 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
1330 ; AVX512BW-NEXT: vzeroupper
1331 ; AVX512BW-NEXT: retq
1333 ; AVX512DQ-LABEL: test18:
1334 ; AVX512DQ: ## %bb.0:
1335 ; AVX512DQ-NEXT: kmovw %edi, %k0
1336 ; AVX512DQ-NEXT: kmovw %esi, %k1
1337 ; AVX512DQ-NEXT: kshiftrw $8, %k1, %k2
1338 ; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
1339 ; AVX512DQ-NEXT: kshiftrb $6, %k0, %k3
1340 ; AVX512DQ-NEXT: kxorb %k1, %k3, %k1
1341 ; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
1342 ; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1
1343 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
1344 ; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
1345 ; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0
1346 ; AVX512DQ-NEXT: kshiftlb $7, %k2, %k1
1347 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
1348 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1349 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
1350 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
1351 ; AVX512DQ-NEXT: vzeroupper
1352 ; AVX512DQ-NEXT: retq
1354 ; X86-LABEL: test18:
1356 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
1357 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1358 ; X86-NEXT: kshiftrw $8, %k1, %k2
1359 ; X86-NEXT: kshiftrw $9, %k1, %k1
1360 ; X86-NEXT: kshiftrb $6, %k0, %k3
1361 ; X86-NEXT: kxorb %k1, %k3, %k1
1362 ; X86-NEXT: kshiftlb $7, %k1, %k1
1363 ; X86-NEXT: kshiftrb $1, %k1, %k1
1364 ; X86-NEXT: kxorb %k1, %k0, %k0
1365 ; X86-NEXT: kshiftlb $1, %k0, %k0
1366 ; X86-NEXT: kshiftrb $1, %k0, %k0
1367 ; X86-NEXT: kshiftlb $7, %k2, %k1
1368 ; X86-NEXT: korb %k1, %k0, %k0
1369 ; X86-NEXT: vpmovm2w %k0, %xmm0
1371 %b = bitcast i8 %a to <8 x i1>
1372 %b1 = bitcast i16 %y to <16 x i1>
1373 %el1 = extractelement <16 x i1>%b1, i32 8
1374 %el2 = extractelement <16 x i1>%b1, i32 9
1375 %c = insertelement <8 x i1>%b, i1 %el1, i32 7
1376 %d = insertelement <8 x i1>%c, i1 %el2, i32 6
1379 define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
1380 ; KNL-LABEL: test21:
1382 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
1383 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
1384 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
1385 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
1386 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
1387 ; KNL-NEXT: vpand %ymm0, %ymm2, %ymm0
1388 ; KNL-NEXT: vpsllw $15, %ymm3, %ymm2
1389 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
1390 ; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1
1393 ; SKX-LABEL: test21:
1395 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
1396 ; SKX-NEXT: vpmovb2m %ymm1, %k1
1397 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1400 ; AVX512BW-LABEL: test21:
1401 ; AVX512BW: ## %bb.0:
1402 ; AVX512BW-NEXT: vpsllw $7, %ymm1, %ymm1
1403 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
1404 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1405 ; AVX512BW-NEXT: retq
1407 ; AVX512DQ-LABEL: test21:
1408 ; AVX512DQ: ## %bb.0:
1409 ; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3
1410 ; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
1411 ; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
1412 ; AVX512DQ-NEXT: vpsllw $15, %ymm2, %ymm2
1413 ; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2
1414 ; AVX512DQ-NEXT: vpand %ymm0, %ymm2, %ymm0
1415 ; AVX512DQ-NEXT: vpsllw $15, %ymm3, %ymm2
1416 ; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2
1417 ; AVX512DQ-NEXT: vpand %ymm1, %ymm2, %ymm1
1418 ; AVX512DQ-NEXT: retq
1420 ; X86-LABEL: test21:
1422 ; X86-NEXT: vpsllw $7, %ymm1, %ymm1
1423 ; X86-NEXT: vpmovb2m %ymm1, %k1
1424 ; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1426 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
1430 define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
1431 ; KNL-LABEL: test22:
1433 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1434 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1435 ; KNL-NEXT: kmovw %k0, %eax
1436 ; KNL-NEXT: movb %al, (%rdi)
1437 ; KNL-NEXT: vzeroupper
1440 ; SKX-LABEL: test22:
1442 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
1443 ; SKX-NEXT: vpmovd2m %xmm0, %k0
1444 ; SKX-NEXT: kmovb %k0, (%rdi)
1447 ; AVX512BW-LABEL: test22:
1448 ; AVX512BW: ## %bb.0:
1449 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
1450 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
1451 ; AVX512BW-NEXT: kmovd %k0, %eax
1452 ; AVX512BW-NEXT: movb %al, (%rdi)
1453 ; AVX512BW-NEXT: vzeroupper
1454 ; AVX512BW-NEXT: retq
1456 ; AVX512DQ-LABEL: test22:
1457 ; AVX512DQ: ## %bb.0:
1458 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1459 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1460 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1461 ; AVX512DQ-NEXT: vzeroupper
1462 ; AVX512DQ-NEXT: retq
1464 ; X86-LABEL: test22:
1466 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
1467 ; X86-NEXT: vpmovd2m %xmm0, %k0
1468 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1469 ; X86-NEXT: kmovb %k0, (%eax)
1471 store <4 x i1> %a, <4 x i1>* %addr
1475 define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
1476 ; KNL-LABEL: test23:
1478 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1479 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1480 ; KNL-NEXT: kmovw %k0, %eax
1481 ; KNL-NEXT: movb %al, (%rdi)
1482 ; KNL-NEXT: vzeroupper
1485 ; SKX-LABEL: test23:
1487 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
1488 ; SKX-NEXT: vpmovq2m %xmm0, %k0
1489 ; SKX-NEXT: kmovb %k0, (%rdi)
1492 ; AVX512BW-LABEL: test23:
1493 ; AVX512BW: ## %bb.0:
1494 ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
1495 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
1496 ; AVX512BW-NEXT: kmovd %k0, %eax
1497 ; AVX512BW-NEXT: movb %al, (%rdi)
1498 ; AVX512BW-NEXT: vzeroupper
1499 ; AVX512BW-NEXT: retq
1501 ; AVX512DQ-LABEL: test23:
1502 ; AVX512DQ: ## %bb.0:
1503 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1504 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
1505 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1506 ; AVX512DQ-NEXT: vzeroupper
1507 ; AVX512DQ-NEXT: retq
1509 ; X86-LABEL: test23:
1511 ; X86-NEXT: vpsllq $63, %xmm0, %xmm0
1512 ; X86-NEXT: vpmovq2m %xmm0, %k0
1513 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1514 ; X86-NEXT: kmovb %k0, (%eax)
1516 store <2 x i1> %a, <2 x i1>* %addr
1520 define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
1521 ; KNL-LABEL: store_v1i1:
1523 ; KNL-NEXT: kmovw %edi, %k0
1524 ; KNL-NEXT: kxnorw %k0, %k0, %k1
1525 ; KNL-NEXT: kxorw %k1, %k0, %k0
1526 ; KNL-NEXT: kmovw %k0, %eax
1527 ; KNL-NEXT: movb %al, (%rsi)
1530 ; SKX-LABEL: store_v1i1:
1532 ; SKX-NEXT: kmovd %edi, %k0
1533 ; SKX-NEXT: kxnorw %k0, %k0, %k1
1534 ; SKX-NEXT: kxorw %k1, %k0, %k0
1535 ; SKX-NEXT: kmovb %k0, (%rsi)
1538 ; AVX512BW-LABEL: store_v1i1:
1539 ; AVX512BW: ## %bb.0:
1540 ; AVX512BW-NEXT: kmovd %edi, %k0
1541 ; AVX512BW-NEXT: kxnorw %k0, %k0, %k1
1542 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
1543 ; AVX512BW-NEXT: kmovd %k0, %eax
1544 ; AVX512BW-NEXT: movb %al, (%rsi)
1545 ; AVX512BW-NEXT: retq
1547 ; AVX512DQ-LABEL: store_v1i1:
1548 ; AVX512DQ: ## %bb.0:
1549 ; AVX512DQ-NEXT: kmovw %edi, %k0
1550 ; AVX512DQ-NEXT: kxnorw %k0, %k0, %k1
1551 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
1552 ; AVX512DQ-NEXT: kmovb %k0, (%rsi)
1553 ; AVX512DQ-NEXT: retq
1555 ; X86-LABEL: store_v1i1:
1557 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1558 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1559 ; X86-NEXT: kxnorw %k0, %k0, %k1
1560 ; X86-NEXT: kxorw %k1, %k0, %k0
1561 ; X86-NEXT: kmovb %k0, (%eax)
1563 %x = xor <1 x i1> %c, <i1 1>
1564 store <1 x i1> %x, <1 x i1>* %ptr, align 4
1568 define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
1569 ; KNL-LABEL: store_v2i1:
1571 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1572 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
1573 ; KNL-NEXT: kmovw %k0, %eax
1574 ; KNL-NEXT: movb %al, (%rdi)
1575 ; KNL-NEXT: vzeroupper
1578 ; SKX-LABEL: store_v2i1:
1580 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
1581 ; SKX-NEXT: vpmovq2m %xmm0, %k0
1582 ; SKX-NEXT: knotw %k0, %k0
1583 ; SKX-NEXT: kmovb %k0, (%rdi)
1586 ; AVX512BW-LABEL: store_v2i1:
1587 ; AVX512BW: ## %bb.0:
1588 ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
1589 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
1590 ; AVX512BW-NEXT: kmovd %k0, %eax
1591 ; AVX512BW-NEXT: movb %al, (%rdi)
1592 ; AVX512BW-NEXT: vzeroupper
1593 ; AVX512BW-NEXT: retq
1595 ; AVX512DQ-LABEL: store_v2i1:
1596 ; AVX512DQ: ## %bb.0:
1597 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1598 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
1599 ; AVX512DQ-NEXT: knotw %k0, %k0
1600 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1601 ; AVX512DQ-NEXT: vzeroupper
1602 ; AVX512DQ-NEXT: retq
1604 ; X86-LABEL: store_v2i1:
1606 ; X86-NEXT: vpsllq $63, %xmm0, %xmm0
1607 ; X86-NEXT: vpmovq2m %xmm0, %k0
1608 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1609 ; X86-NEXT: knotw %k0, %k0
1610 ; X86-NEXT: kmovb %k0, (%eax)
1612 %x = xor <2 x i1> %c, <i1 1, i1 1>
1613 store <2 x i1> %x, <2 x i1>* %ptr, align 4
1617 define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
1618 ; KNL-LABEL: store_v4i1:
1620 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1621 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
1622 ; KNL-NEXT: kmovw %k0, %eax
1623 ; KNL-NEXT: movb %al, (%rdi)
1624 ; KNL-NEXT: vzeroupper
1627 ; SKX-LABEL: store_v4i1:
1629 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
1630 ; SKX-NEXT: vpmovd2m %xmm0, %k0
1631 ; SKX-NEXT: knotw %k0, %k0
1632 ; SKX-NEXT: kmovb %k0, (%rdi)
1635 ; AVX512BW-LABEL: store_v4i1:
1636 ; AVX512BW: ## %bb.0:
1637 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
1638 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
1639 ; AVX512BW-NEXT: kmovd %k0, %eax
1640 ; AVX512BW-NEXT: movb %al, (%rdi)
1641 ; AVX512BW-NEXT: vzeroupper
1642 ; AVX512BW-NEXT: retq
1644 ; AVX512DQ-LABEL: store_v4i1:
1645 ; AVX512DQ: ## %bb.0:
1646 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1647 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1648 ; AVX512DQ-NEXT: knotw %k0, %k0
1649 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1650 ; AVX512DQ-NEXT: vzeroupper
1651 ; AVX512DQ-NEXT: retq
1653 ; X86-LABEL: store_v4i1:
1655 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
1656 ; X86-NEXT: vpmovd2m %xmm0, %k0
1657 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1658 ; X86-NEXT: knotw %k0, %k0
1659 ; X86-NEXT: kmovb %k0, (%eax)
1661 %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
1662 store <4 x i1> %x, <4 x i1>* %ptr, align 4
1666 define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
1667 ; KNL-LABEL: store_v8i1:
1669 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1670 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1671 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
1672 ; KNL-NEXT: kmovw %k0, %eax
1673 ; KNL-NEXT: movb %al, (%rdi)
1674 ; KNL-NEXT: vzeroupper
1677 ; SKX-LABEL: store_v8i1:
1679 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1680 ; SKX-NEXT: vpmovw2m %xmm0, %k0
1681 ; SKX-NEXT: knotb %k0, %k0
1682 ; SKX-NEXT: kmovb %k0, (%rdi)
1685 ; AVX512BW-LABEL: store_v8i1:
1686 ; AVX512BW: ## %bb.0:
1687 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
1688 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
1689 ; AVX512BW-NEXT: knotw %k0, %k0
1690 ; AVX512BW-NEXT: kmovd %k0, %eax
1691 ; AVX512BW-NEXT: movb %al, (%rdi)
1692 ; AVX512BW-NEXT: vzeroupper
1693 ; AVX512BW-NEXT: retq
1695 ; AVX512DQ-LABEL: store_v8i1:
1696 ; AVX512DQ: ## %bb.0:
1697 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
1698 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
1699 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
1700 ; AVX512DQ-NEXT: knotb %k0, %k0
1701 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1702 ; AVX512DQ-NEXT: vzeroupper
1703 ; AVX512DQ-NEXT: retq
1705 ; X86-LABEL: store_v8i1:
1707 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
1708 ; X86-NEXT: vpmovw2m %xmm0, %k0
1709 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1710 ; X86-NEXT: knotb %k0, %k0
1711 ; X86-NEXT: kmovb %k0, (%eax)
1713 %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
1714 store <8 x i1> %x, <8 x i1>* %ptr, align 4
1718 define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
1719 ; KNL-LABEL: store_v16i1:
1721 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1722 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1723 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
1724 ; KNL-NEXT: kmovw %k0, (%rdi)
1725 ; KNL-NEXT: vzeroupper
1728 ; SKX-LABEL: store_v16i1:
1730 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
1731 ; SKX-NEXT: vpmovb2m %xmm0, %k0
1732 ; SKX-NEXT: knotw %k0, %k0
1733 ; SKX-NEXT: kmovw %k0, (%rdi)
1736 ; AVX512BW-LABEL: store_v16i1:
1737 ; AVX512BW: ## %bb.0:
1738 ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
1739 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
1740 ; AVX512BW-NEXT: knotw %k0, %k0
1741 ; AVX512BW-NEXT: kmovw %k0, (%rdi)
1742 ; AVX512BW-NEXT: vzeroupper
1743 ; AVX512BW-NEXT: retq
1745 ; AVX512DQ-LABEL: store_v16i1:
1746 ; AVX512DQ: ## %bb.0:
1747 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
1748 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
1749 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1750 ; AVX512DQ-NEXT: knotw %k0, %k0
1751 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
1752 ; AVX512DQ-NEXT: vzeroupper
1753 ; AVX512DQ-NEXT: retq
1755 ; X86-LABEL: store_v16i1:
1757 ; X86-NEXT: vpsllw $7, %xmm0, %xmm0
1758 ; X86-NEXT: vpmovb2m %xmm0, %k0
1759 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1760 ; X86-NEXT: knotw %k0, %k0
1761 ; X86-NEXT: kmovw %k0, (%eax)
1763 %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
1764 store <16 x i1> %x, <16 x i1>* %ptr, align 4
1779 @f1.v = internal unnamed_addr global i1 false, align 4
1781 define void @f1(i32 %c) {
1783 ; CHECK: ## %bb.0: ## %entry
1784 ; CHECK-NEXT: movzbl {{.*}}(%rip), %edi
1785 ; CHECK-NEXT: xorl $1, %edi
1786 ; CHECK-NEXT: movb %dil, {{.*}}(%rip)
1787 ; CHECK-NEXT: jmp _f2 ## TAILCALL
1790 ; X86: ## %bb.0: ## %entry
1791 ; X86-NEXT: subl $12, %esp
1792 ; X86-NEXT: .cfi_def_cfa_offset 16
1793 ; X86-NEXT: movzbl _f1.v, %eax
1794 ; X86-NEXT: xorl $1, %eax
1795 ; X86-NEXT: movb %al, _f1.v
1796 ; X86-NEXT: movl %eax, (%esp)
1797 ; X86-NEXT: calll _f2
1798 ; X86-NEXT: addl $12, %esp
1801 %.b1 = load i1, i1* @f1.v, align 4
1802 %not..b1 = xor i1 %.b1, true
1803 store i1 %not..b1, i1* @f1.v, align 4
1804 %0 = zext i1 %not..b1 to i32
1805 tail call void @f2(i32 %0) #2
1809 declare void @f2(i32) #1
1811 define void @store_i16_i1(i16 %x, i1 *%y) {
1812 ; CHECK-LABEL: store_i16_i1:
1814 ; CHECK-NEXT: andl $1, %edi
1815 ; CHECK-NEXT: movb %dil, (%rsi)
1818 ; X86-LABEL: store_i16_i1:
1820 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1821 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
1822 ; X86-NEXT: andl $1, %ecx
1823 ; X86-NEXT: movb %cl, (%eax)
1825 %c = trunc i16 %x to i1
1830 define void @store_i8_i1(i8 %x, i1 *%y) {
1831 ; CHECK-LABEL: store_i8_i1:
1833 ; CHECK-NEXT: andl $1, %edi
1834 ; CHECK-NEXT: movb %dil, (%rsi)
1837 ; X86-LABEL: store_i8_i1:
1839 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1840 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
1841 ; X86-NEXT: andb $1, %cl
1842 ; X86-NEXT: movb %cl, (%eax)
1844 %c = trunc i8 %x to i1
1849 define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
1850 ; KNL-LABEL: test_build_vec_v32i1:
1852 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1853 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1856 ; SKX-LABEL: test_build_vec_v32i1:
1858 ; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
1861 ; AVX512BW-LABEL: test_build_vec_v32i1:
1862 ; AVX512BW: ## %bb.0:
1863 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1864 ; AVX512BW-NEXT: retq
1866 ; AVX512DQ-LABEL: test_build_vec_v32i1:
1867 ; AVX512DQ: ## %bb.0:
1868 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1869 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1870 ; AVX512DQ-NEXT: retq
1872 ; X86-LABEL: test_build_vec_v32i1:
1874 ; X86-NEXT: vandps LCPI40_0, %zmm0, %zmm0
1876 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
1880 define <32 x i16> @test_build_vec_v32i1_optsize(<32 x i16> %x) optsize {
1881 ; KNL-LABEL: test_build_vec_v32i1_optsize:
1883 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1884 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1887 ; SKX-LABEL: test_build_vec_v32i1_optsize:
1889 ; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495
1890 ; SKX-NEXT: kmovd %eax, %k1
1891 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1894 ; AVX512BW-LABEL: test_build_vec_v32i1_optsize:
1895 ; AVX512BW: ## %bb.0:
1896 ; AVX512BW-NEXT: movl $1497715861, %eax ## imm = 0x59455495
1897 ; AVX512BW-NEXT: kmovd %eax, %k1
1898 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1899 ; AVX512BW-NEXT: retq
1901 ; AVX512DQ-LABEL: test_build_vec_v32i1_optsize:
1902 ; AVX512DQ: ## %bb.0:
1903 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1904 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1905 ; AVX512DQ-NEXT: retq
1907 ; X86-LABEL: test_build_vec_v32i1_optsize:
1909 ; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495
1910 ; X86-NEXT: kmovd %eax, %k1
1911 ; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1913 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
1917 define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
1918 ; KNL-LABEL: test_build_vec_v64i1:
1920 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1921 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1924 ; SKX-LABEL: test_build_vec_v64i1:
1926 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
1929 ; AVX512BW-LABEL: test_build_vec_v64i1:
1930 ; AVX512BW: ## %bb.0:
1931 ; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
1932 ; AVX512BW-NEXT: retq
1934 ; AVX512DQ-LABEL: test_build_vec_v64i1:
1935 ; AVX512DQ: ## %bb.0:
1936 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1937 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1938 ; AVX512DQ-NEXT: retq
1940 ; X86-LABEL: test_build_vec_v64i1:
1942 ; X86-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
1944 %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
1948 define void @ktest_1(<8 x double> %in, double * %base) {
1949 ; KNL-LABEL: ktest_1:
1951 ; KNL-NEXT: vmovupd (%rdi), %zmm1
1952 ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k1
1953 ; KNL-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
1954 ; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
1955 ; KNL-NEXT: kmovw %k0, %eax
1956 ; KNL-NEXT: testb %al, %al
1957 ; KNL-NEXT: je LBB43_2
1958 ; KNL-NEXT: ## %bb.1: ## %L1
1959 ; KNL-NEXT: vmovapd %zmm0, (%rdi)
1960 ; KNL-NEXT: vzeroupper
1962 ; KNL-NEXT: LBB43_2: ## %L2
1963 ; KNL-NEXT: vmovapd %zmm0, 8(%rdi)
1964 ; KNL-NEXT: vzeroupper
1967 ; SKX-LABEL: ktest_1:
1969 ; SKX-NEXT: vmovupd (%rdi), %zmm1
1970 ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1
1971 ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
1972 ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
1973 ; SKX-NEXT: kortestb %k0, %k0
1974 ; SKX-NEXT: je LBB43_2
1975 ; SKX-NEXT: ## %bb.1: ## %L1
1976 ; SKX-NEXT: vmovapd %zmm0, (%rdi)
1977 ; SKX-NEXT: vzeroupper
1979 ; SKX-NEXT: LBB43_2: ## %L2
1980 ; SKX-NEXT: vmovapd %zmm0, 8(%rdi)
1981 ; SKX-NEXT: vzeroupper
1984 ; AVX512BW-LABEL: ktest_1:
1985 ; AVX512BW: ## %bb.0:
1986 ; AVX512BW-NEXT: vmovupd (%rdi), %zmm1
1987 ; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
1988 ; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
1989 ; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
1990 ; AVX512BW-NEXT: kmovd %k0, %eax
1991 ; AVX512BW-NEXT: testb %al, %al
1992 ; AVX512BW-NEXT: je LBB43_2
1993 ; AVX512BW-NEXT: ## %bb.1: ## %L1
1994 ; AVX512BW-NEXT: vmovapd %zmm0, (%rdi)
1995 ; AVX512BW-NEXT: vzeroupper
1996 ; AVX512BW-NEXT: retq
1997 ; AVX512BW-NEXT: LBB43_2: ## %L2
1998 ; AVX512BW-NEXT: vmovapd %zmm0, 8(%rdi)
1999 ; AVX512BW-NEXT: vzeroupper
2000 ; AVX512BW-NEXT: retq
2002 ; AVX512DQ-LABEL: ktest_1:
2003 ; AVX512DQ: ## %bb.0:
2004 ; AVX512DQ-NEXT: vmovupd (%rdi), %zmm1
2005 ; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
2006 ; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
2007 ; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
2008 ; AVX512DQ-NEXT: kortestb %k0, %k0
2009 ; AVX512DQ-NEXT: je LBB43_2
2010 ; AVX512DQ-NEXT: ## %bb.1: ## %L1
2011 ; AVX512DQ-NEXT: vmovapd %zmm0, (%rdi)
2012 ; AVX512DQ-NEXT: vzeroupper
2013 ; AVX512DQ-NEXT: retq
2014 ; AVX512DQ-NEXT: LBB43_2: ## %L2
2015 ; AVX512DQ-NEXT: vmovapd %zmm0, 8(%rdi)
2016 ; AVX512DQ-NEXT: vzeroupper
2017 ; AVX512DQ-NEXT: retq
2019 ; X86-LABEL: ktest_1:
2021 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2022 ; X86-NEXT: vmovupd (%eax), %zmm1
2023 ; X86-NEXT: vcmpltpd %zmm0, %zmm1, %k1
2024 ; X86-NEXT: vmovupd 8(%eax), %zmm1 {%k1} {z}
2025 ; X86-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
2026 ; X86-NEXT: kortestb %k0, %k0
2027 ; X86-NEXT: je LBB43_2
2028 ; X86-NEXT: ## %bb.1: ## %L1
2029 ; X86-NEXT: vmovapd %zmm0, (%eax)
2030 ; X86-NEXT: vzeroupper
2032 ; X86-NEXT: LBB43_2: ## %L2
2033 ; X86-NEXT: vmovapd %zmm0, 8(%eax)
2034 ; X86-NEXT: vzeroupper
2036 %addr1 = getelementptr double, double * %base, i64 0
2037 %addr2 = getelementptr double, double * %base, i64 1
2039 %vaddr1 = bitcast double* %addr1 to <8 x double>*
2040 %vaddr2 = bitcast double* %addr2 to <8 x double>*
2042 %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1
2043 %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1
2045 %sel1 = fcmp ogt <8 x double>%in, %val1
2046 %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer
2047 %sel2 = fcmp olt <8 x double> %in, %val3
2048 %sel3 = and <8 x i1> %sel1, %sel2
2050 %int_sel3 = bitcast <8 x i1> %sel3 to i8
2051 %res = icmp eq i8 %int_sel3, zeroinitializer
2052 br i1 %res, label %L2, label %L1
2054 store <8 x double> %in, <8 x double>* %vaddr1
2057 store <8 x double> %in, <8 x double>* %vaddr2
2063 define void @ktest_2(<32 x float> %in, float * %base) {
2065 ; KNL-LABEL: ktest_2:
2067 ; KNL-NEXT: vmovups (%rdi), %zmm2
2068 ; KNL-NEXT: vmovups 64(%rdi), %zmm3
2069 ; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1
2070 ; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2
2071 ; KNL-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z}
2072 ; KNL-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z}
2073 ; KNL-NEXT: vcmpltps %zmm3, %zmm1, %k0
2074 ; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k3
2075 ; KNL-NEXT: korw %k3, %k2, %k2
2076 ; KNL-NEXT: kmovw %k2, %eax
2077 ; KNL-NEXT: korw %k0, %k1, %k0
2078 ; KNL-NEXT: kmovw %k0, %ecx
2079 ; KNL-NEXT: shll $16, %ecx
2080 ; KNL-NEXT: orl %eax, %ecx
2081 ; KNL-NEXT: je LBB44_2
2082 ; KNL-NEXT: ## %bb.1: ## %L1
2083 ; KNL-NEXT: vmovaps %zmm0, (%rdi)
2084 ; KNL-NEXT: vmovaps %zmm1, 64(%rdi)
2085 ; KNL-NEXT: vzeroupper
2087 ; KNL-NEXT: LBB44_2: ## %L2
2088 ; KNL-NEXT: vmovaps %zmm0, 4(%rdi)
2089 ; KNL-NEXT: vmovaps %zmm1, 68(%rdi)
2090 ; KNL-NEXT: vzeroupper
2093 ; SKX-LABEL: ktest_2:
2095 ; SKX-NEXT: vmovups (%rdi), %zmm2
2096 ; SKX-NEXT: vmovups 64(%rdi), %zmm3
2097 ; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1
2098 ; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2
2099 ; SKX-NEXT: kunpckwd %k1, %k2, %k0
2100 ; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
2101 ; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
2102 ; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1
2103 ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2
2104 ; SKX-NEXT: kunpckwd %k1, %k2, %k1
2105 ; SKX-NEXT: kortestd %k1, %k0
2106 ; SKX-NEXT: je LBB44_2
2107 ; SKX-NEXT: ## %bb.1: ## %L1
2108 ; SKX-NEXT: vmovaps %zmm0, (%rdi)
2109 ; SKX-NEXT: vmovaps %zmm1, 64(%rdi)
2110 ; SKX-NEXT: vzeroupper
2112 ; SKX-NEXT: LBB44_2: ## %L2
2113 ; SKX-NEXT: vmovaps %zmm0, 4(%rdi)
2114 ; SKX-NEXT: vmovaps %zmm1, 68(%rdi)
2115 ; SKX-NEXT: vzeroupper
2118 ; AVX512BW-LABEL: ktest_2:
2119 ; AVX512BW: ## %bb.0:
2120 ; AVX512BW-NEXT: vmovups (%rdi), %zmm2
2121 ; AVX512BW-NEXT: vmovups 64(%rdi), %zmm3
2122 ; AVX512BW-NEXT: vcmpltps %zmm0, %zmm2, %k1
2123 ; AVX512BW-NEXT: vcmpltps %zmm1, %zmm3, %k2
2124 ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k0
2125 ; AVX512BW-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
2126 ; AVX512BW-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
2127 ; AVX512BW-NEXT: vcmpltps %zmm3, %zmm0, %k1
2128 ; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2
2129 ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1
2130 ; AVX512BW-NEXT: kortestd %k1, %k0
2131 ; AVX512BW-NEXT: je LBB44_2
2132 ; AVX512BW-NEXT: ## %bb.1: ## %L1
2133 ; AVX512BW-NEXT: vmovaps %zmm0, (%rdi)
2134 ; AVX512BW-NEXT: vmovaps %zmm1, 64(%rdi)
2135 ; AVX512BW-NEXT: vzeroupper
2136 ; AVX512BW-NEXT: retq
2137 ; AVX512BW-NEXT: LBB44_2: ## %L2
2138 ; AVX512BW-NEXT: vmovaps %zmm0, 4(%rdi)
2139 ; AVX512BW-NEXT: vmovaps %zmm1, 68(%rdi)
2140 ; AVX512BW-NEXT: vzeroupper
2141 ; AVX512BW-NEXT: retq
2143 ; AVX512DQ-LABEL: ktest_2:
2144 ; AVX512DQ: ## %bb.0:
2145 ; AVX512DQ-NEXT: vmovups (%rdi), %zmm2
2146 ; AVX512DQ-NEXT: vmovups 64(%rdi), %zmm3
2147 ; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k1
2148 ; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k2
2149 ; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z}
2150 ; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z}
2151 ; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm1, %k0
2152 ; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm0, %k3
2153 ; AVX512DQ-NEXT: korw %k3, %k2, %k2
2154 ; AVX512DQ-NEXT: kmovw %k2, %eax
2155 ; AVX512DQ-NEXT: korw %k0, %k1, %k0
2156 ; AVX512DQ-NEXT: kmovw %k0, %ecx
2157 ; AVX512DQ-NEXT: shll $16, %ecx
2158 ; AVX512DQ-NEXT: orl %eax, %ecx
2159 ; AVX512DQ-NEXT: je LBB44_2
2160 ; AVX512DQ-NEXT: ## %bb.1: ## %L1
2161 ; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi)
2162 ; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi)
2163 ; AVX512DQ-NEXT: vzeroupper
2164 ; AVX512DQ-NEXT: retq
2165 ; AVX512DQ-NEXT: LBB44_2: ## %L2
2166 ; AVX512DQ-NEXT: vmovaps %zmm0, 4(%rdi)
2167 ; AVX512DQ-NEXT: vmovaps %zmm1, 68(%rdi)
2168 ; AVX512DQ-NEXT: vzeroupper
2169 ; AVX512DQ-NEXT: retq
2171 ; X86-LABEL: ktest_2:
2173 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2174 ; X86-NEXT: vmovups (%eax), %zmm2
2175 ; X86-NEXT: vmovups 64(%eax), %zmm3
2176 ; X86-NEXT: vcmpltps %zmm0, %zmm2, %k1
2177 ; X86-NEXT: vcmpltps %zmm1, %zmm3, %k2
2178 ; X86-NEXT: kunpckwd %k1, %k2, %k0
2179 ; X86-NEXT: vmovups 68(%eax), %zmm2 {%k2} {z}
2180 ; X86-NEXT: vmovups 4(%eax), %zmm3 {%k1} {z}
2181 ; X86-NEXT: vcmpltps %zmm3, %zmm0, %k1
2182 ; X86-NEXT: vcmpltps %zmm2, %zmm1, %k2
2183 ; X86-NEXT: kunpckwd %k1, %k2, %k1
2184 ; X86-NEXT: kortestd %k1, %k0
2185 ; X86-NEXT: je LBB44_2
2186 ; X86-NEXT: ## %bb.1: ## %L1
2187 ; X86-NEXT: vmovaps %zmm0, (%eax)
2188 ; X86-NEXT: vmovaps %zmm1, 64(%eax)
2189 ; X86-NEXT: vzeroupper
2191 ; X86-NEXT: LBB44_2: ## %L2
2192 ; X86-NEXT: vmovaps %zmm0, 4(%eax)
2193 ; X86-NEXT: vmovaps %zmm1, 68(%eax)
2194 ; X86-NEXT: vzeroupper
2196 %addr1 = getelementptr float, float * %base, i64 0
2197 %addr2 = getelementptr float, float * %base, i64 1
2199 %vaddr1 = bitcast float* %addr1 to <32 x float>*
2200 %vaddr2 = bitcast float* %addr2 to <32 x float>*
2202 %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1
2203 %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1
2205 %sel1 = fcmp ogt <32 x float>%in, %val1
2206 %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer
2207 %sel2 = fcmp olt <32 x float> %in, %val3
2208 %sel3 = or <32 x i1> %sel1, %sel2
2210 %int_sel3 = bitcast <32 x i1> %sel3 to i32
2211 %res = icmp eq i32 %int_sel3, zeroinitializer
2212 br i1 %res, label %L2, label %L1
2214 store <32 x float> %in, <32 x float>* %vaddr1
2217 store <32 x float> %in, <32 x float>* %vaddr2
2223 define <8 x i64> @load_8i1(<8 x i1>* %a) {
2224 ; KNL-LABEL: load_8i1:
2226 ; KNL-NEXT: kmovw (%rdi), %k1
2227 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2230 ; SKX-LABEL: load_8i1:
2232 ; SKX-NEXT: kmovb (%rdi), %k0
2233 ; SKX-NEXT: vpmovm2q %k0, %zmm0
2236 ; AVX512BW-LABEL: load_8i1:
2237 ; AVX512BW: ## %bb.0:
2238 ; AVX512BW-NEXT: kmovw (%rdi), %k1
2239 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2240 ; AVX512BW-NEXT: retq
2242 ; AVX512DQ-LABEL: load_8i1:
2243 ; AVX512DQ: ## %bb.0:
2244 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
2245 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
2246 ; AVX512DQ-NEXT: retq
2248 ; X86-LABEL: load_8i1:
2250 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2251 ; X86-NEXT: kmovb (%eax), %k0
2252 ; X86-NEXT: vpmovm2q %k0, %zmm0
2254 %b = load <8 x i1>, <8 x i1>* %a
2255 %c = sext <8 x i1> %b to <8 x i64>
2259 define <16 x i32> @load_16i1(<16 x i1>* %a) {
2260 ; KNL-LABEL: load_16i1:
2262 ; KNL-NEXT: kmovw (%rdi), %k1
2263 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2266 ; SKX-LABEL: load_16i1:
2268 ; SKX-NEXT: kmovw (%rdi), %k0
2269 ; SKX-NEXT: vpmovm2d %k0, %zmm0
2272 ; AVX512BW-LABEL: load_16i1:
2273 ; AVX512BW: ## %bb.0:
2274 ; AVX512BW-NEXT: kmovw (%rdi), %k1
2275 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2276 ; AVX512BW-NEXT: retq
2278 ; AVX512DQ-LABEL: load_16i1:
2279 ; AVX512DQ: ## %bb.0:
2280 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
2281 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2282 ; AVX512DQ-NEXT: retq
2284 ; X86-LABEL: load_16i1:
2286 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2287 ; X86-NEXT: kmovw (%eax), %k0
2288 ; X86-NEXT: vpmovm2d %k0, %zmm0
2290 %b = load <16 x i1>, <16 x i1>* %a
2291 %c = sext <16 x i1> %b to <16 x i32>
2295 define <2 x i16> @load_2i1(<2 x i1>* %a) {
2296 ; KNL-LABEL: load_2i1:
2298 ; KNL-NEXT: kmovw (%rdi), %k1
2299 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2300 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
2301 ; KNL-NEXT: vzeroupper
2304 ; SKX-LABEL: load_2i1:
2306 ; SKX-NEXT: kmovb (%rdi), %k0
2307 ; SKX-NEXT: vpmovm2q %k0, %xmm0
2310 ; AVX512BW-LABEL: load_2i1:
2311 ; AVX512BW: ## %bb.0:
2312 ; AVX512BW-NEXT: kmovw (%rdi), %k1
2313 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2314 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
2315 ; AVX512BW-NEXT: vzeroupper
2316 ; AVX512BW-NEXT: retq
2318 ; AVX512DQ-LABEL: load_2i1:
2319 ; AVX512DQ: ## %bb.0:
2320 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
2321 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
2322 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
2323 ; AVX512DQ-NEXT: vzeroupper
2324 ; AVX512DQ-NEXT: retq
2326 ; X86-LABEL: load_2i1:
2328 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2329 ; X86-NEXT: kmovb (%eax), %k0
2330 ; X86-NEXT: vpmovm2q %k0, %xmm0
2332 %b = load <2 x i1>, <2 x i1>* %a
2333 %c = sext <2 x i1> %b to <2 x i16>
2337 define <4 x i16> @load_4i1(<4 x i1>* %a) {
2338 ; KNL-LABEL: load_4i1:
2340 ; KNL-NEXT: kmovw (%rdi), %k1
2341 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2342 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
2343 ; KNL-NEXT: vzeroupper
2346 ; SKX-LABEL: load_4i1:
2348 ; SKX-NEXT: kmovb (%rdi), %k0
2349 ; SKX-NEXT: vpmovm2d %k0, %xmm0
2352 ; AVX512BW-LABEL: load_4i1:
2353 ; AVX512BW: ## %bb.0:
2354 ; AVX512BW-NEXT: kmovw (%rdi), %k1
2355 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2356 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
2357 ; AVX512BW-NEXT: vzeroupper
2358 ; AVX512BW-NEXT: retq
2360 ; AVX512DQ-LABEL: load_4i1:
2361 ; AVX512DQ: ## %bb.0:
2362 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
2363 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2364 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
2365 ; AVX512DQ-NEXT: vzeroupper
2366 ; AVX512DQ-NEXT: retq
2368 ; X86-LABEL: load_4i1:
2370 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2371 ; X86-NEXT: kmovb (%eax), %k0
2372 ; X86-NEXT: vpmovm2d %k0, %xmm0
2374 %b = load <4 x i1>, <4 x i1>* %a
2375 %c = sext <4 x i1> %b to <4 x i16>
2379 define <32 x i16> @load_32i1(<32 x i1>* %a) {
2380 ; KNL-LABEL: load_32i1:
2382 ; KNL-NEXT: kmovw (%rdi), %k1
2383 ; KNL-NEXT: kmovw 2(%rdi), %k2
2384 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2385 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
2386 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
2387 ; KNL-NEXT: vpmovdw %zmm1, %ymm1
2390 ; SKX-LABEL: load_32i1:
2392 ; SKX-NEXT: kmovd (%rdi), %k0
2393 ; SKX-NEXT: vpmovm2w %k0, %zmm0
2396 ; AVX512BW-LABEL: load_32i1:
2397 ; AVX512BW: ## %bb.0:
2398 ; AVX512BW-NEXT: kmovd (%rdi), %k0
2399 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
2400 ; AVX512BW-NEXT: retq
2402 ; AVX512DQ-LABEL: load_32i1:
2403 ; AVX512DQ: ## %bb.0:
2404 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
2405 ; AVX512DQ-NEXT: kmovw 2(%rdi), %k1
2406 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2407 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2408 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm1
2409 ; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1
2410 ; AVX512DQ-NEXT: retq
2412 ; X86-LABEL: load_32i1:
2414 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2415 ; X86-NEXT: kmovd (%eax), %k0
2416 ; X86-NEXT: vpmovm2w %k0, %zmm0
2418 %b = load <32 x i1>, <32 x i1>* %a
2419 %c = sext <32 x i1> %b to <32 x i16>
2423 define <64 x i8> @load_64i1(<64 x i1>* %a) {
2424 ; KNL-LABEL: load_64i1:
2426 ; KNL-NEXT: kmovw (%rdi), %k1
2427 ; KNL-NEXT: kmovw 2(%rdi), %k2
2428 ; KNL-NEXT: kmovw 4(%rdi), %k3
2429 ; KNL-NEXT: kmovw 6(%rdi), %k4
2430 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2431 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
2432 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
2433 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
2434 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2435 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k3} {z}
2436 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
2437 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k4} {z}
2438 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
2439 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2442 ; SKX-LABEL: load_64i1:
2444 ; SKX-NEXT: kmovq (%rdi), %k0
2445 ; SKX-NEXT: vpmovm2b %k0, %zmm0
2448 ; AVX512BW-LABEL: load_64i1:
2449 ; AVX512BW: ## %bb.0:
2450 ; AVX512BW-NEXT: kmovq (%rdi), %k0
2451 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
2452 ; AVX512BW-NEXT: retq
2454 ; AVX512DQ-LABEL: load_64i1:
2455 ; AVX512DQ: ## %bb.0:
2456 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
2457 ; AVX512DQ-NEXT: kmovw 2(%rdi), %k1
2458 ; AVX512DQ-NEXT: kmovw 4(%rdi), %k2
2459 ; AVX512DQ-NEXT: kmovw 6(%rdi), %k3
2460 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2461 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
2462 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm1
2463 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
2464 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2465 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
2466 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
2467 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm2
2468 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
2469 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2470 ; AVX512DQ-NEXT: retq
2472 ; X86-LABEL: load_64i1:
2474 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2475 ; X86-NEXT: kmovq (%eax), %k0
2476 ; X86-NEXT: vpmovm2b %k0, %zmm0
2478 %b = load <64 x i1>, <64 x i1>* %a
2479 %c = sext <64 x i1> %b to <64 x i8>
2483 define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
2484 ; KNL-LABEL: store_8i1:
2486 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
2487 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
2488 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
2489 ; KNL-NEXT: kmovw %k0, %eax
2490 ; KNL-NEXT: movb %al, (%rdi)
2491 ; KNL-NEXT: vzeroupper
2494 ; SKX-LABEL: store_8i1:
2496 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2497 ; SKX-NEXT: vpmovw2m %xmm0, %k0
2498 ; SKX-NEXT: kmovb %k0, (%rdi)
2501 ; AVX512BW-LABEL: store_8i1:
2502 ; AVX512BW: ## %bb.0:
2503 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
2504 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
2505 ; AVX512BW-NEXT: kmovd %k0, %eax
2506 ; AVX512BW-NEXT: movb %al, (%rdi)
2507 ; AVX512BW-NEXT: vzeroupper
2508 ; AVX512BW-NEXT: retq
2510 ; AVX512DQ-LABEL: store_8i1:
2511 ; AVX512DQ: ## %bb.0:
2512 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
2513 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
2514 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
2515 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
2516 ; AVX512DQ-NEXT: vzeroupper
2517 ; AVX512DQ-NEXT: retq
2519 ; X86-LABEL: store_8i1:
2521 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
2522 ; X86-NEXT: vpmovw2m %xmm0, %k0
2523 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2524 ; X86-NEXT: kmovb %k0, (%eax)
2526 store <8 x i1> %v, <8 x i1>* %a
2530 define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
2531 ; KNL-LABEL: store_8i1_1:
2533 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
2534 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
2535 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
2536 ; KNL-NEXT: kmovw %k0, %eax
2537 ; KNL-NEXT: movb %al, (%rdi)
2538 ; KNL-NEXT: vzeroupper
2541 ; SKX-LABEL: store_8i1_1:
2543 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2544 ; SKX-NEXT: vpmovw2m %xmm0, %k0
2545 ; SKX-NEXT: kmovb %k0, (%rdi)
2548 ; AVX512BW-LABEL: store_8i1_1:
2549 ; AVX512BW: ## %bb.0:
2550 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
2551 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
2552 ; AVX512BW-NEXT: kmovd %k0, %eax
2553 ; AVX512BW-NEXT: movb %al, (%rdi)
2554 ; AVX512BW-NEXT: vzeroupper
2555 ; AVX512BW-NEXT: retq
2557 ; AVX512DQ-LABEL: store_8i1_1:
2558 ; AVX512DQ: ## %bb.0:
2559 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
2560 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
2561 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
2562 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
2563 ; AVX512DQ-NEXT: vzeroupper
2564 ; AVX512DQ-NEXT: retq
2566 ; X86-LABEL: store_8i1_1:
2568 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2569 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
2570 ; X86-NEXT: vpmovw2m %xmm0, %k0
2571 ; X86-NEXT: kmovb %k0, (%eax)
2573 %v1 = trunc <8 x i16> %v to <8 x i1>
2574 store <8 x i1> %v1, <8 x i1>* %a
2578 define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
2579 ; KNL-LABEL: store_16i1:
2581 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
2582 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2583 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2584 ; KNL-NEXT: kmovw %k0, (%rdi)
2585 ; KNL-NEXT: vzeroupper
2588 ; SKX-LABEL: store_16i1:
2590 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
2591 ; SKX-NEXT: vpmovb2m %xmm0, %k0
2592 ; SKX-NEXT: kmovw %k0, (%rdi)
2595 ; AVX512BW-LABEL: store_16i1:
2596 ; AVX512BW: ## %bb.0:
2597 ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
2598 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
2599 ; AVX512BW-NEXT: kmovw %k0, (%rdi)
2600 ; AVX512BW-NEXT: vzeroupper
2601 ; AVX512BW-NEXT: retq
2603 ; AVX512DQ-LABEL: store_16i1:
2604 ; AVX512DQ: ## %bb.0:
2605 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
2606 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2607 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2608 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
2609 ; AVX512DQ-NEXT: vzeroupper
2610 ; AVX512DQ-NEXT: retq
2612 ; X86-LABEL: store_16i1:
2614 ; X86-NEXT: vpsllw $7, %xmm0, %xmm0
2615 ; X86-NEXT: vpmovb2m %xmm0, %k0
2616 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2617 ; X86-NEXT: kmovw %k0, (%eax)
2619 store <16 x i1> %v, <16 x i1>* %a
2623 define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
2624 ; KNL-LABEL: store_32i1:
2626 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
2627 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2628 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2629 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
2630 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
2631 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2632 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
2633 ; KNL-NEXT: kmovw %k1, 2(%rdi)
2634 ; KNL-NEXT: kmovw %k0, (%rdi)
2635 ; KNL-NEXT: vzeroupper
2638 ; SKX-LABEL: store_32i1:
2640 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
2641 ; SKX-NEXT: vpmovb2m %ymm0, %k0
2642 ; SKX-NEXT: kmovd %k0, (%rdi)
2643 ; SKX-NEXT: vzeroupper
2646 ; AVX512BW-LABEL: store_32i1:
2647 ; AVX512BW: ## %bb.0:
2648 ; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
2649 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
2650 ; AVX512BW-NEXT: kmovd %k0, (%rdi)
2651 ; AVX512BW-NEXT: vzeroupper
2652 ; AVX512BW-NEXT: retq
2654 ; AVX512DQ-LABEL: store_32i1:
2655 ; AVX512DQ: ## %bb.0:
2656 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1
2657 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
2658 ; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0
2659 ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
2660 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
2661 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2662 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
2663 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
2664 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
2665 ; AVX512DQ-NEXT: vzeroupper
2666 ; AVX512DQ-NEXT: retq
2668 ; X86-LABEL: store_32i1:
2670 ; X86-NEXT: vpsllw $7, %ymm0, %ymm0
2671 ; X86-NEXT: vpmovb2m %ymm0, %k0
2672 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2673 ; X86-NEXT: kmovd %k0, (%eax)
2674 ; X86-NEXT: vzeroupper
2676 store <32 x i1> %v, <32 x i1>* %a
2680 define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
2681 ; KNL-LABEL: store_32i1_1:
2683 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
2684 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2685 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2686 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm0
2687 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2688 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
2689 ; KNL-NEXT: kmovw %k1, 2(%rdi)
2690 ; KNL-NEXT: kmovw %k0, (%rdi)
2691 ; KNL-NEXT: vzeroupper
2694 ; SKX-LABEL: store_32i1_1:
2696 ; SKX-NEXT: vpsllw $15, %zmm0, %zmm0
2697 ; SKX-NEXT: vpmovw2m %zmm0, %k0
2698 ; SKX-NEXT: kmovd %k0, (%rdi)
2699 ; SKX-NEXT: vzeroupper
2702 ; AVX512BW-LABEL: store_32i1_1:
2703 ; AVX512BW: ## %bb.0:
2704 ; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0
2705 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
2706 ; AVX512BW-NEXT: kmovd %k0, (%rdi)
2707 ; AVX512BW-NEXT: vzeroupper
2708 ; AVX512BW-NEXT: retq
2710 ; AVX512DQ-LABEL: store_32i1_1:
2711 ; AVX512DQ: ## %bb.0:
2712 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
2713 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2714 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2715 ; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0
2716 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2717 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
2718 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
2719 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
2720 ; AVX512DQ-NEXT: vzeroupper
2721 ; AVX512DQ-NEXT: retq
2723 ; X86-LABEL: store_32i1_1:
2725 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2726 ; X86-NEXT: vpsllw $15, %zmm0, %zmm0
2727 ; X86-NEXT: vpmovw2m %zmm0, %k0
2728 ; X86-NEXT: kmovd %k0, (%eax)
2729 ; X86-NEXT: vzeroupper
2731 %v1 = trunc <32 x i16> %v to <32 x i1>
2732 store <32 x i1> %v1, <32 x i1>* %a
2737 define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
2739 ; KNL-LABEL: store_64i1:
2741 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
2742 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2743 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2744 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
2745 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2746 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
2747 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm0
2748 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2749 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2
2750 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm0
2751 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2752 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k3
2753 ; KNL-NEXT: kmovw %k3, 6(%rdi)
2754 ; KNL-NEXT: kmovw %k2, 4(%rdi)
2755 ; KNL-NEXT: kmovw %k1, 2(%rdi)
2756 ; KNL-NEXT: kmovw %k0, (%rdi)
2757 ; KNL-NEXT: vzeroupper
2760 ; SKX-LABEL: store_64i1:
2762 ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0
2763 ; SKX-NEXT: vpmovb2m %zmm0, %k0
2764 ; SKX-NEXT: kmovq %k0, (%rdi)
2765 ; SKX-NEXT: vzeroupper
2768 ; AVX512BW-LABEL: store_64i1:
2769 ; AVX512BW: ## %bb.0:
2770 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0
2771 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
2772 ; AVX512BW-NEXT: kmovq %k0, (%rdi)
2773 ; AVX512BW-NEXT: vzeroupper
2774 ; AVX512BW-NEXT: retq
2776 ; AVX512DQ-LABEL: store_64i1:
2777 ; AVX512DQ: ## %bb.0:
2778 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
2779 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2780 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2781 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
2782 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2783 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
2784 ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0
2785 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2786 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k2
2787 ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm0
2788 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2789 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k3
2790 ; AVX512DQ-NEXT: kmovw %k3, 6(%rdi)
2791 ; AVX512DQ-NEXT: kmovw %k2, 4(%rdi)
2792 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
2793 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
2794 ; AVX512DQ-NEXT: vzeroupper
2795 ; AVX512DQ-NEXT: retq
2797 ; X86-LABEL: store_64i1:
2799 ; X86-NEXT: vpsllw $7, %zmm0, %zmm0
2800 ; X86-NEXT: vpmovb2m %zmm0, %k0
2801 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2802 ; X86-NEXT: kmovq %k0, (%eax)
2803 ; X86-NEXT: vzeroupper
2805 store <64 x i1> %v, <64 x i1>* %a
2809 define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
2810 ; KNL-LABEL: test_bitcast_v8i1_zext:
2812 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
2813 ; KNL-NEXT: kmovw %k0, %eax
2814 ; KNL-NEXT: movzbl %al, %eax
2815 ; KNL-NEXT: addl %eax, %eax
2816 ; KNL-NEXT: vzeroupper
2819 ; SKX-LABEL: test_bitcast_v8i1_zext:
2821 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
2822 ; SKX-NEXT: kmovb %k0, %eax
2823 ; SKX-NEXT: addl %eax, %eax
2824 ; SKX-NEXT: vzeroupper
2827 ; AVX512BW-LABEL: test_bitcast_v8i1_zext:
2828 ; AVX512BW: ## %bb.0:
2829 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
2830 ; AVX512BW-NEXT: kmovd %k0, %eax
2831 ; AVX512BW-NEXT: movzbl %al, %eax
2832 ; AVX512BW-NEXT: addl %eax, %eax
2833 ; AVX512BW-NEXT: vzeroupper
2834 ; AVX512BW-NEXT: retq
2836 ; AVX512DQ-LABEL: test_bitcast_v8i1_zext:
2837 ; AVX512DQ: ## %bb.0:
2838 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
2839 ; AVX512DQ-NEXT: kmovb %k0, %eax
2840 ; AVX512DQ-NEXT: addl %eax, %eax
2841 ; AVX512DQ-NEXT: vzeroupper
2842 ; AVX512DQ-NEXT: retq
2844 ; X86-LABEL: test_bitcast_v8i1_zext:
2846 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
2847 ; X86-NEXT: kmovb %k0, %eax
2848 ; X86-NEXT: addl %eax, %eax
2849 ; X86-NEXT: vzeroupper
2851 %v1 = icmp eq <16 x i32> %a, zeroinitializer
2852 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2853 %mask1 = bitcast <8 x i1> %mask to i8
2854 %val = zext i8 %mask1 to i32
2855 %val1 = add i32 %val, %val
2859 define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
2860 ; CHECK-LABEL: test_bitcast_v16i1_zext:
2862 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
2863 ; CHECK-NEXT: kmovw %k0, %eax
2864 ; CHECK-NEXT: addl %eax, %eax
2865 ; CHECK-NEXT: vzeroupper
2868 ; X86-LABEL: test_bitcast_v16i1_zext:
2870 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
2871 ; X86-NEXT: kmovw %k0, %eax
2872 ; X86-NEXT: addl %eax, %eax
2873 ; X86-NEXT: vzeroupper
2875 %v1 = icmp eq <16 x i32> %a, zeroinitializer
2876 %mask1 = bitcast <16 x i1> %v1 to i16
2877 %val = zext i16 %mask1 to i32
2878 %val1 = add i32 %val, %val
2882 define i16 @test_v16i1_add(i16 %x, i16 %y) {
2883 ; KNL-LABEL: test_v16i1_add:
2885 ; KNL-NEXT: kmovw %edi, %k0
2886 ; KNL-NEXT: kmovw %esi, %k1
2887 ; KNL-NEXT: kxorw %k1, %k0, %k0
2888 ; KNL-NEXT: kmovw %k0, %eax
2889 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
2892 ; SKX-LABEL: test_v16i1_add:
2894 ; SKX-NEXT: kmovd %edi, %k0
2895 ; SKX-NEXT: kmovd %esi, %k1
2896 ; SKX-NEXT: kxorw %k1, %k0, %k0
2897 ; SKX-NEXT: kmovd %k0, %eax
2898 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
2901 ; AVX512BW-LABEL: test_v16i1_add:
2902 ; AVX512BW: ## %bb.0:
2903 ; AVX512BW-NEXT: kmovd %edi, %k0
2904 ; AVX512BW-NEXT: kmovd %esi, %k1
2905 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
2906 ; AVX512BW-NEXT: kmovd %k0, %eax
2907 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
2908 ; AVX512BW-NEXT: retq
2910 ; AVX512DQ-LABEL: test_v16i1_add:
2911 ; AVX512DQ: ## %bb.0:
2912 ; AVX512DQ-NEXT: kmovw %edi, %k0
2913 ; AVX512DQ-NEXT: kmovw %esi, %k1
2914 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
2915 ; AVX512DQ-NEXT: kmovw %k0, %eax
2916 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
2917 ; AVX512DQ-NEXT: retq
2919 ; X86-LABEL: test_v16i1_add:
2921 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
2922 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
2923 ; X86-NEXT: kxorw %k1, %k0, %k0
2924 ; X86-NEXT: kmovd %k0, %eax
2925 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
2927 %m0 = bitcast i16 %x to <16 x i1>
2928 %m1 = bitcast i16 %y to <16 x i1>
2929 %m2 = add <16 x i1> %m0, %m1
2930 %ret = bitcast <16 x i1> %m2 to i16
2934 define i16 @test_v16i1_sub(i16 %x, i16 %y) {
2935 ; KNL-LABEL: test_v16i1_sub:
2937 ; KNL-NEXT: kmovw %edi, %k0
2938 ; KNL-NEXT: kmovw %esi, %k1
2939 ; KNL-NEXT: kxorw %k1, %k0, %k0
2940 ; KNL-NEXT: kmovw %k0, %eax
2941 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
2944 ; SKX-LABEL: test_v16i1_sub:
2946 ; SKX-NEXT: kmovd %edi, %k0
2947 ; SKX-NEXT: kmovd %esi, %k1
2948 ; SKX-NEXT: kxorw %k1, %k0, %k0
2949 ; SKX-NEXT: kmovd %k0, %eax
2950 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
2953 ; AVX512BW-LABEL: test_v16i1_sub:
2954 ; AVX512BW: ## %bb.0:
2955 ; AVX512BW-NEXT: kmovd %edi, %k0
2956 ; AVX512BW-NEXT: kmovd %esi, %k1
2957 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
2958 ; AVX512BW-NEXT: kmovd %k0, %eax
2959 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
2960 ; AVX512BW-NEXT: retq
2962 ; AVX512DQ-LABEL: test_v16i1_sub:
2963 ; AVX512DQ: ## %bb.0:
2964 ; AVX512DQ-NEXT: kmovw %edi, %k0
2965 ; AVX512DQ-NEXT: kmovw %esi, %k1
2966 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
2967 ; AVX512DQ-NEXT: kmovw %k0, %eax
2968 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
2969 ; AVX512DQ-NEXT: retq
2971 ; X86-LABEL: test_v16i1_sub:
2973 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
2974 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
2975 ; X86-NEXT: kxorw %k1, %k0, %k0
2976 ; X86-NEXT: kmovd %k0, %eax
2977 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
2979 %m0 = bitcast i16 %x to <16 x i1>
2980 %m1 = bitcast i16 %y to <16 x i1>
2981 %m2 = sub <16 x i1> %m0, %m1
2982 %ret = bitcast <16 x i1> %m2 to i16
2986 define i16 @test_v16i1_mul(i16 %x, i16 %y) {
2987 ; KNL-LABEL: test_v16i1_mul:
2989 ; KNL-NEXT: kmovw %edi, %k0
2990 ; KNL-NEXT: kmovw %esi, %k1
2991 ; KNL-NEXT: kandw %k1, %k0, %k0
2992 ; KNL-NEXT: kmovw %k0, %eax
2993 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
2996 ; SKX-LABEL: test_v16i1_mul:
2998 ; SKX-NEXT: kmovd %edi, %k0
2999 ; SKX-NEXT: kmovd %esi, %k1
3000 ; SKX-NEXT: kandw %k1, %k0, %k0
3001 ; SKX-NEXT: kmovd %k0, %eax
3002 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
3005 ; AVX512BW-LABEL: test_v16i1_mul:
3006 ; AVX512BW: ## %bb.0:
3007 ; AVX512BW-NEXT: kmovd %edi, %k0
3008 ; AVX512BW-NEXT: kmovd %esi, %k1
3009 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3010 ; AVX512BW-NEXT: kmovd %k0, %eax
3011 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
3012 ; AVX512BW-NEXT: retq
3014 ; AVX512DQ-LABEL: test_v16i1_mul:
3015 ; AVX512DQ: ## %bb.0:
3016 ; AVX512DQ-NEXT: kmovw %edi, %k0
3017 ; AVX512DQ-NEXT: kmovw %esi, %k1
3018 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3019 ; AVX512DQ-NEXT: kmovw %k0, %eax
3020 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
3021 ; AVX512DQ-NEXT: retq
3023 ; X86-LABEL: test_v16i1_mul:
3025 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
3026 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
3027 ; X86-NEXT: kandw %k1, %k0, %k0
3028 ; X86-NEXT: kmovd %k0, %eax
3029 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
3031 %m0 = bitcast i16 %x to <16 x i1>
3032 %m1 = bitcast i16 %y to <16 x i1>
3033 %m2 = mul <16 x i1> %m0, %m1
3034 %ret = bitcast <16 x i1> %m2 to i16
3038 define i8 @test_v8i1_add(i8 %x, i8 %y) {
3039 ; KNL-LABEL: test_v8i1_add:
3041 ; KNL-NEXT: kmovw %edi, %k0
3042 ; KNL-NEXT: kmovw %esi, %k1
3043 ; KNL-NEXT: kxorw %k1, %k0, %k0
3044 ; KNL-NEXT: kmovw %k0, %eax
3045 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
3048 ; SKX-LABEL: test_v8i1_add:
3050 ; SKX-NEXT: kmovd %edi, %k0
3051 ; SKX-NEXT: kmovd %esi, %k1
3052 ; SKX-NEXT: kxorb %k1, %k0, %k0
3053 ; SKX-NEXT: kmovd %k0, %eax
3054 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
3057 ; AVX512BW-LABEL: test_v8i1_add:
3058 ; AVX512BW: ## %bb.0:
3059 ; AVX512BW-NEXT: kmovd %edi, %k0
3060 ; AVX512BW-NEXT: kmovd %esi, %k1
3061 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
3062 ; AVX512BW-NEXT: kmovd %k0, %eax
3063 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
3064 ; AVX512BW-NEXT: retq
3066 ; AVX512DQ-LABEL: test_v8i1_add:
3067 ; AVX512DQ: ## %bb.0:
3068 ; AVX512DQ-NEXT: kmovw %edi, %k0
3069 ; AVX512DQ-NEXT: kmovw %esi, %k1
3070 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
3071 ; AVX512DQ-NEXT: kmovw %k0, %eax
3072 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
3073 ; AVX512DQ-NEXT: retq
3075 ; X86-LABEL: test_v8i1_add:
3077 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
3078 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
3079 ; X86-NEXT: kxorb %k1, %k0, %k0
3080 ; X86-NEXT: kmovd %k0, %eax
3081 ; X86-NEXT: ## kill: def $al killed $al killed $eax
3083 %m0 = bitcast i8 %x to <8 x i1>
3084 %m1 = bitcast i8 %y to <8 x i1>
3085 %m2 = add <8 x i1> %m0, %m1
3086 %ret = bitcast <8 x i1> %m2 to i8
3090 define i8 @test_v8i1_sub(i8 %x, i8 %y) {
3091 ; KNL-LABEL: test_v8i1_sub:
3093 ; KNL-NEXT: kmovw %edi, %k0
3094 ; KNL-NEXT: kmovw %esi, %k1
3095 ; KNL-NEXT: kxorw %k1, %k0, %k0
3096 ; KNL-NEXT: kmovw %k0, %eax
3097 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
3100 ; SKX-LABEL: test_v8i1_sub:
3102 ; SKX-NEXT: kmovd %edi, %k0
3103 ; SKX-NEXT: kmovd %esi, %k1
3104 ; SKX-NEXT: kxorb %k1, %k0, %k0
3105 ; SKX-NEXT: kmovd %k0, %eax
3106 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
3109 ; AVX512BW-LABEL: test_v8i1_sub:
3110 ; AVX512BW: ## %bb.0:
3111 ; AVX512BW-NEXT: kmovd %edi, %k0
3112 ; AVX512BW-NEXT: kmovd %esi, %k1
3113 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
3114 ; AVX512BW-NEXT: kmovd %k0, %eax
3115 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
3116 ; AVX512BW-NEXT: retq
3118 ; AVX512DQ-LABEL: test_v8i1_sub:
3119 ; AVX512DQ: ## %bb.0:
3120 ; AVX512DQ-NEXT: kmovw %edi, %k0
3121 ; AVX512DQ-NEXT: kmovw %esi, %k1
3122 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
3123 ; AVX512DQ-NEXT: kmovw %k0, %eax
3124 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
3125 ; AVX512DQ-NEXT: retq
3127 ; X86-LABEL: test_v8i1_sub:
3129 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
3130 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
3131 ; X86-NEXT: kxorb %k1, %k0, %k0
3132 ; X86-NEXT: kmovd %k0, %eax
3133 ; X86-NEXT: ## kill: def $al killed $al killed $eax
3135 %m0 = bitcast i8 %x to <8 x i1>
3136 %m1 = bitcast i8 %y to <8 x i1>
3137 %m2 = sub <8 x i1> %m0, %m1
3138 %ret = bitcast <8 x i1> %m2 to i8
3142 define i8 @test_v8i1_mul(i8 %x, i8 %y) {
3143 ; KNL-LABEL: test_v8i1_mul:
3145 ; KNL-NEXT: kmovw %edi, %k0
3146 ; KNL-NEXT: kmovw %esi, %k1
3147 ; KNL-NEXT: kandw %k1, %k0, %k0
3148 ; KNL-NEXT: kmovw %k0, %eax
3149 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
3152 ; SKX-LABEL: test_v8i1_mul:
3154 ; SKX-NEXT: kmovd %edi, %k0
3155 ; SKX-NEXT: kmovd %esi, %k1
3156 ; SKX-NEXT: kandb %k1, %k0, %k0
3157 ; SKX-NEXT: kmovd %k0, %eax
3158 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
3161 ; AVX512BW-LABEL: test_v8i1_mul:
3162 ; AVX512BW: ## %bb.0:
3163 ; AVX512BW-NEXT: kmovd %edi, %k0
3164 ; AVX512BW-NEXT: kmovd %esi, %k1
3165 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3166 ; AVX512BW-NEXT: kmovd %k0, %eax
3167 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
3168 ; AVX512BW-NEXT: retq
3170 ; AVX512DQ-LABEL: test_v8i1_mul:
3171 ; AVX512DQ: ## %bb.0:
3172 ; AVX512DQ-NEXT: kmovw %edi, %k0
3173 ; AVX512DQ-NEXT: kmovw %esi, %k1
3174 ; AVX512DQ-NEXT: kandb %k1, %k0, %k0
3175 ; AVX512DQ-NEXT: kmovw %k0, %eax
3176 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
3177 ; AVX512DQ-NEXT: retq
3179 ; X86-LABEL: test_v8i1_mul:
3181 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
3182 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
3183 ; X86-NEXT: kandb %k1, %k0, %k0
3184 ; X86-NEXT: kmovd %k0, %eax
3185 ; X86-NEXT: ## kill: def $al killed $al killed $eax
3187 %m0 = bitcast i8 %x to <8 x i1>
3188 %m1 = bitcast i8 %y to <8 x i1>
3189 %m2 = mul <8 x i1> %m0, %m1
3190 %ret = bitcast <8 x i1> %m2 to i8
3194 ; Make sure we don't emit a ktest for signed comparisons.
3195 define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
3196 ; KNL-LABEL: ktest_signed:
3198 ; KNL-NEXT: pushq %rax
3199 ; KNL-NEXT: .cfi_def_cfa_offset 16
3200 ; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0
3201 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
3202 ; KNL-NEXT: kmovw %k0, %eax
3203 ; KNL-NEXT: testw %ax, %ax
3204 ; KNL-NEXT: jle LBB65_1
3205 ; KNL-NEXT: ## %bb.2: ## %bb.2
3206 ; KNL-NEXT: popq %rax
3207 ; KNL-NEXT: vzeroupper
3209 ; KNL-NEXT: LBB65_1: ## %bb.1
3210 ; KNL-NEXT: vzeroupper
3211 ; KNL-NEXT: callq _foo
3212 ; KNL-NEXT: popq %rax
3215 ; SKX-LABEL: ktest_signed:
3217 ; SKX-NEXT: pushq %rax
3218 ; SKX-NEXT: .cfi_def_cfa_offset 16
3219 ; SKX-NEXT: vpord %zmm1, %zmm0, %zmm0
3220 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
3221 ; SKX-NEXT: kmovd %k0, %eax
3222 ; SKX-NEXT: testw %ax, %ax
3223 ; SKX-NEXT: jle LBB65_1
3224 ; SKX-NEXT: ## %bb.2: ## %bb.2
3225 ; SKX-NEXT: popq %rax
3226 ; SKX-NEXT: vzeroupper
3228 ; SKX-NEXT: LBB65_1: ## %bb.1
3229 ; SKX-NEXT: vzeroupper
3230 ; SKX-NEXT: callq _foo
3231 ; SKX-NEXT: popq %rax
3234 ; AVX512BW-LABEL: ktest_signed:
3235 ; AVX512BW: ## %bb.0:
3236 ; AVX512BW-NEXT: pushq %rax
3237 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
3238 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
3239 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
3240 ; AVX512BW-NEXT: kmovd %k0, %eax
3241 ; AVX512BW-NEXT: testw %ax, %ax
3242 ; AVX512BW-NEXT: jle LBB65_1
3243 ; AVX512BW-NEXT: ## %bb.2: ## %bb.2
3244 ; AVX512BW-NEXT: popq %rax
3245 ; AVX512BW-NEXT: vzeroupper
3246 ; AVX512BW-NEXT: retq
3247 ; AVX512BW-NEXT: LBB65_1: ## %bb.1
3248 ; AVX512BW-NEXT: vzeroupper
3249 ; AVX512BW-NEXT: callq _foo
3250 ; AVX512BW-NEXT: popq %rax
3251 ; AVX512BW-NEXT: retq
3253 ; AVX512DQ-LABEL: ktest_signed:
3254 ; AVX512DQ: ## %bb.0:
3255 ; AVX512DQ-NEXT: pushq %rax
3256 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
3257 ; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
3258 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
3259 ; AVX512DQ-NEXT: kmovw %k0, %eax
3260 ; AVX512DQ-NEXT: testw %ax, %ax
3261 ; AVX512DQ-NEXT: jle LBB65_1
3262 ; AVX512DQ-NEXT: ## %bb.2: ## %bb.2
3263 ; AVX512DQ-NEXT: popq %rax
3264 ; AVX512DQ-NEXT: vzeroupper
3265 ; AVX512DQ-NEXT: retq
3266 ; AVX512DQ-NEXT: LBB65_1: ## %bb.1
3267 ; AVX512DQ-NEXT: vzeroupper
3268 ; AVX512DQ-NEXT: callq _foo
3269 ; AVX512DQ-NEXT: popq %rax
3270 ; AVX512DQ-NEXT: retq
3272 ; X86-LABEL: ktest_signed:
3274 ; X86-NEXT: subl $12, %esp
3275 ; X86-NEXT: .cfi_def_cfa_offset 16
3276 ; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
3277 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
3278 ; X86-NEXT: kmovd %k0, %eax
3279 ; X86-NEXT: testw %ax, %ax
3280 ; X86-NEXT: jle LBB65_1
3281 ; X86-NEXT: ## %bb.2: ## %bb.2
3282 ; X86-NEXT: addl $12, %esp
3283 ; X86-NEXT: vzeroupper
3285 ; X86-NEXT: LBB65_1: ## %bb.1
3286 ; X86-NEXT: vzeroupper
3287 ; X86-NEXT: calll _foo
3288 ; X86-NEXT: addl $12, %esp
3290 %a = icmp eq <16 x i32> %x, zeroinitializer
3291 %b = icmp eq <16 x i32> %y, zeroinitializer
3292 %c = and <16 x i1> %a, %b
3293 %d = bitcast <16 x i1> %c to i16
3294 %e = icmp sgt i16 %d, 0
3295 br i1 %e, label %bb.2, label %bb.1
3304 ; Make sure we can use the C flag from kortest to check for all ones.
3305 define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
3306 ; CHECK-LABEL: ktest_allones:
3308 ; CHECK-NEXT: pushq %rax
3309 ; CHECK-NEXT: .cfi_def_cfa_offset 16
3310 ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
3311 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
3312 ; CHECK-NEXT: kortestw %k0, %k0
3313 ; CHECK-NEXT: jb LBB66_2
3314 ; CHECK-NEXT: ## %bb.1: ## %bb.1
3315 ; CHECK-NEXT: vzeroupper
3316 ; CHECK-NEXT: callq _foo
3317 ; CHECK-NEXT: LBB66_2: ## %bb.2
3318 ; CHECK-NEXT: popq %rax
3319 ; CHECK-NEXT: vzeroupper
3322 ; X86-LABEL: ktest_allones:
3324 ; X86-NEXT: subl $12, %esp
3325 ; X86-NEXT: .cfi_def_cfa_offset 16
3326 ; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
3327 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
3328 ; X86-NEXT: kortestw %k0, %k0
3329 ; X86-NEXT: jb LBB66_2
3330 ; X86-NEXT: ## %bb.1: ## %bb.1
3331 ; X86-NEXT: vzeroupper
3332 ; X86-NEXT: calll _foo
3333 ; X86-NEXT: LBB66_2: ## %bb.2
3334 ; X86-NEXT: addl $12, %esp
3335 ; X86-NEXT: vzeroupper
3337 %a = icmp eq <16 x i32> %x, zeroinitializer
3338 %b = icmp eq <16 x i32> %y, zeroinitializer
3339 %c = and <16 x i1> %a, %b
3340 %d = bitcast <16 x i1> %c to i16
3341 %e = icmp eq i16 %d, -1
3342 br i1 %e, label %bb.2, label %bb.1
3350 ; This is derived from an intrinsic test where v4i1 mask was created by _mm_cmp_epi32_mask, then it was passed to _mm512_mask_blend_epi32 which uses a v16i1 mask.
3351 ; The widening happens in the scalar domain between the intrinsics. The middle end optmized it to this.
3352 define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d, <8 x i64> %e, <8 x i64> %f) {
3353 ; KNL-LABEL: mask_widening:
3354 ; KNL: ## %bb.0: ## %entry
3355 ; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
3356 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
3357 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
3358 ; KNL-NEXT: kshiftlw $12, %k0, %k0
3359 ; KNL-NEXT: kshiftrw $12, %k0, %k1
3360 ; KNL-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
3363 ; SKX-LABEL: mask_widening:
3364 ; SKX: ## %bb.0: ## %entry
3365 ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
3366 ; SKX-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
3369 ; AVX512BW-LABEL: mask_widening:
3370 ; AVX512BW: ## %bb.0: ## %entry
3371 ; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
3372 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
3373 ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
3374 ; AVX512BW-NEXT: kshiftlw $12, %k0, %k0
3375 ; AVX512BW-NEXT: kshiftrw $12, %k0, %k1
3376 ; AVX512BW-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
3377 ; AVX512BW-NEXT: retq
3379 ; AVX512DQ-LABEL: mask_widening:
3380 ; AVX512DQ: ## %bb.0: ## %entry
3381 ; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
3382 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
3383 ; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
3384 ; AVX512DQ-NEXT: kshiftlw $12, %k0, %k0
3385 ; AVX512DQ-NEXT: kshiftrw $12, %k0, %k1
3386 ; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
3387 ; AVX512DQ-NEXT: retq
3389 ; X86-LABEL: mask_widening:
3390 ; X86: ## %bb.0: ## %entry
3391 ; X86-NEXT: pushl %ebp
3392 ; X86-NEXT: .cfi_def_cfa_offset 8
3393 ; X86-NEXT: .cfi_offset %ebp, -8
3394 ; X86-NEXT: movl %esp, %ebp
3395 ; X86-NEXT: .cfi_def_cfa_register %ebp
3396 ; X86-NEXT: andl $-64, %esp
3397 ; X86-NEXT: subl $64, %esp
3398 ; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
3399 ; X86-NEXT: vmovdqa64 8(%ebp), %zmm0
3400 ; X86-NEXT: vmovdqa32 72(%ebp), %zmm0 {%k1}
3401 ; X86-NEXT: movl %ebp, %esp
3402 ; X86-NEXT: popl %ebp
3405 %0 = bitcast <2 x i64> %a to <4 x i32>
3406 %1 = bitcast <2 x i64> %b to <4 x i32>
3407 %2 = icmp eq <4 x i32> %0, %1
3408 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3409 %4 = bitcast <8 x i64> %f to <16 x i32>
3410 %5 = bitcast <8 x i64> %e to <16 x i32>
3411 %6 = shufflevector <8 x i1> %3, <8 x i1> <i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
3412 %7 = select <16 x i1> %6, <16 x i32> %4, <16 x i32> %5
3413 %8 = bitcast <16 x i32> %7 to <8 x i64>
3417 define void @store_v64i1_constant(<64 x i1>* %R) {
3418 ; CHECK-LABEL: store_v64i1_constant:
3419 ; CHECK: ## %bb.0: ## %entry
3420 ; CHECK-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
3421 ; CHECK-NEXT: movq %rax, (%rdi)
3424 ; X86-LABEL: store_v64i1_constant:
3425 ; X86: ## %bb.0: ## %entry
3426 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
3427 ; X86-NEXT: movl $-536871045, 4(%eax) ## imm = 0xDFFFFF7B
3428 ; X86-NEXT: movl $-4099, (%eax) ## imm = 0xEFFD
3431 store <64 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <64 x i1>* %R
3435 define void @store_v2i1_constant(<2 x i1>* %R) {
3436 ; CHECK-LABEL: store_v2i1_constant:
3437 ; CHECK: ## %bb.0: ## %entry
3438 ; CHECK-NEXT: movb $1, (%rdi)
3441 ; X86-LABEL: store_v2i1_constant:
3442 ; X86: ## %bb.0: ## %entry
3443 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
3444 ; X86-NEXT: movb $1, (%eax)
3447 store <2 x i1> <i1 1, i1 0>, <2 x i1>* %R
3451 define void @store_v4i1_constant(<4 x i1>* %R) {
3452 ; CHECK-LABEL: store_v4i1_constant:
3453 ; CHECK: ## %bb.0: ## %entry
3454 ; CHECK-NEXT: movb $5, (%rdi)
3457 ; X86-LABEL: store_v4i1_constant:
3458 ; X86: ## %bb.0: ## %entry
3459 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
3460 ; X86-NEXT: movb $5, (%eax)
3463 store <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i1>* %R
3467 ; Make sure we bring the -1 constant into the mask domain.
3468 define void @mask_not_cast(i8*, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
3469 ; CHECK-LABEL: mask_not_cast:
3471 ; CHECK-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
3472 ; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
3473 ; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
3474 ; CHECK-NEXT: vzeroupper
3477 ; X86-LABEL: mask_not_cast:
3479 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
3480 ; X86-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
3481 ; X86-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
3482 ; X86-NEXT: vmovdqu32 %zmm0, (%eax) {%k1}
3483 ; X86-NEXT: vzeroupper
3485 %6 = and <8 x i64> %2, %1
3486 %7 = bitcast <8 x i64> %6 to <16 x i32>
3487 %8 = icmp ne <16 x i32> %7, zeroinitializer
3488 %9 = bitcast <16 x i1> %8 to i16
3489 %10 = bitcast <8 x i64> %3 to <16 x i32>
3490 %11 = bitcast <8 x i64> %4 to <16 x i32>
3491 %12 = icmp ule <16 x i32> %10, %11
3492 %13 = bitcast <16 x i1> %12 to i16
3493 %14 = xor i16 %13, -1
3494 %15 = and i16 %14, %9
3495 %16 = bitcast <8 x i64> %1 to <16 x i32>
3496 %17 = bitcast i8* %0 to <16 x i32>*
3497 %18 = bitcast i16 %15 to <16 x i1>
3498 tail call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %16, <16 x i32>* %17, i32 1, <16 x i1> %18) #2
3501 declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
3503 define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
3504 ; KNL-LABEL: ktest_3:
3506 ; KNL-NEXT: pushq %rax
3507 ; KNL-NEXT: .cfi_def_cfa_offset 16
3508 ; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
3509 ; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
3510 ; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
3511 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
3512 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
3513 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1
3514 ; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k2
3515 ; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k3
3516 ; KNL-NEXT: korw %k1, %k0, %k0
3517 ; KNL-NEXT: korw %k3, %k2, %k1
3518 ; KNL-NEXT: kandw %k1, %k0, %k0
3519 ; KNL-NEXT: kmovw %k0, %eax
3520 ; KNL-NEXT: testb %al, %al
3521 ; KNL-NEXT: je LBB72_1
3522 ; KNL-NEXT: ## %bb.2: ## %exit
3523 ; KNL-NEXT: popq %rax
3524 ; KNL-NEXT: vzeroupper
3526 ; KNL-NEXT: LBB72_1: ## %bar
3527 ; KNL-NEXT: vzeroupper
3528 ; KNL-NEXT: callq _foo
3529 ; KNL-NEXT: popq %rax
3532 ; SKX-LABEL: ktest_3:
3534 ; SKX-NEXT: pushq %rax
3535 ; SKX-NEXT: .cfi_def_cfa_offset 16
3536 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k0
3537 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1
3538 ; SKX-NEXT: korb %k1, %k0, %k0
3539 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1
3540 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k2
3541 ; SKX-NEXT: korb %k2, %k1, %k1
3542 ; SKX-NEXT: ktestb %k1, %k0
3543 ; SKX-NEXT: je LBB72_1
3544 ; SKX-NEXT: ## %bb.2: ## %exit
3545 ; SKX-NEXT: popq %rax
3546 ; SKX-NEXT: vzeroupper
3548 ; SKX-NEXT: LBB72_1: ## %bar
3549 ; SKX-NEXT: vzeroupper
3550 ; SKX-NEXT: callq _foo
3551 ; SKX-NEXT: popq %rax
3554 ; AVX512BW-LABEL: ktest_3:
3555 ; AVX512BW: ## %bb.0:
3556 ; AVX512BW-NEXT: pushq %rax
3557 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
3558 ; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
3559 ; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
3560 ; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
3561 ; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
3562 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
3563 ; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1
3564 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k2
3565 ; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k3
3566 ; AVX512BW-NEXT: korw %k1, %k0, %k0
3567 ; AVX512BW-NEXT: korw %k3, %k2, %k1
3568 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3569 ; AVX512BW-NEXT: kmovd %k0, %eax
3570 ; AVX512BW-NEXT: testb %al, %al
3571 ; AVX512BW-NEXT: je LBB72_1
3572 ; AVX512BW-NEXT: ## %bb.2: ## %exit
3573 ; AVX512BW-NEXT: popq %rax
3574 ; AVX512BW-NEXT: vzeroupper
3575 ; AVX512BW-NEXT: retq
3576 ; AVX512BW-NEXT: LBB72_1: ## %bar
3577 ; AVX512BW-NEXT: vzeroupper
3578 ; AVX512BW-NEXT: callq _foo
3579 ; AVX512BW-NEXT: popq %rax
3580 ; AVX512BW-NEXT: retq
3582 ; AVX512DQ-LABEL: ktest_3:
3583 ; AVX512DQ: ## %bb.0:
3584 ; AVX512DQ-NEXT: pushq %rax
3585 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
3586 ; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
3587 ; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
3588 ; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
3589 ; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
3590 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
3591 ; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1
3592 ; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k2
3593 ; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k3
3594 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
3595 ; AVX512DQ-NEXT: korb %k3, %k2, %k1
3596 ; AVX512DQ-NEXT: ktestb %k1, %k0
3597 ; AVX512DQ-NEXT: je LBB72_1
3598 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
3599 ; AVX512DQ-NEXT: popq %rax
3600 ; AVX512DQ-NEXT: vzeroupper
3601 ; AVX512DQ-NEXT: retq
3602 ; AVX512DQ-NEXT: LBB72_1: ## %bar
3603 ; AVX512DQ-NEXT: vzeroupper
3604 ; AVX512DQ-NEXT: callq _foo
3605 ; AVX512DQ-NEXT: popq %rax
3606 ; AVX512DQ-NEXT: retq
3608 ; X86-LABEL: ktest_3:
3610 ; X86-NEXT: subl $12, %esp
3611 ; X86-NEXT: .cfi_def_cfa_offset 16
3612 ; X86-NEXT: vptestnmd %ymm0, %ymm0, %k0
3613 ; X86-NEXT: vptestnmd %ymm1, %ymm1, %k1
3614 ; X86-NEXT: korb %k1, %k0, %k0
3615 ; X86-NEXT: vptestnmd %ymm2, %ymm2, %k1
3616 ; X86-NEXT: vptestnmd %ymm3, %ymm3, %k2
3617 ; X86-NEXT: korb %k2, %k1, %k1
3618 ; X86-NEXT: ktestb %k1, %k0
3619 ; X86-NEXT: je LBB72_1
3620 ; X86-NEXT: ## %bb.2: ## %exit
3621 ; X86-NEXT: addl $12, %esp
3622 ; X86-NEXT: vzeroupper
3624 ; X86-NEXT: LBB72_1: ## %bar
3625 ; X86-NEXT: vzeroupper
3626 ; X86-NEXT: calll _foo
3627 ; X86-NEXT: addl $12, %esp
3629 %a = icmp eq <8 x i32> %w, zeroinitializer
3630 %b = icmp eq <8 x i32> %x, zeroinitializer
3631 %c = icmp eq <8 x i32> %y, zeroinitializer
3632 %d = icmp eq <8 x i32> %z, zeroinitializer
3633 %e = or <8 x i1> %a, %b
3634 %f = or <8 x i1> %c, %d
3635 %g = and <8 x i1> %e, %f
3636 %h = bitcast <8 x i1> %g to i8
3637 %i = icmp eq i8 %h, 0
3638 br i1 %i, label %bar, label %exit
3648 define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
3649 ; KNL-LABEL: ktest_4:
3651 ; KNL-NEXT: pushq %rax
3652 ; KNL-NEXT: .cfi_def_cfa_offset 16
3653 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
3654 ; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
3655 ; KNL-NEXT: vptestnmq %zmm2, %zmm2, %k2
3656 ; KNL-NEXT: vptestnmq %zmm3, %zmm3, %k3
3657 ; KNL-NEXT: korw %k1, %k0, %k0
3658 ; KNL-NEXT: korw %k3, %k2, %k1
3659 ; KNL-NEXT: kandw %k1, %k0, %k0
3660 ; KNL-NEXT: kmovw %k0, %eax
3661 ; KNL-NEXT: testb %al, %al
3662 ; KNL-NEXT: je LBB73_1
3663 ; KNL-NEXT: ## %bb.2: ## %exit
3664 ; KNL-NEXT: popq %rax
3665 ; KNL-NEXT: vzeroupper
3667 ; KNL-NEXT: LBB73_1: ## %bar
3668 ; KNL-NEXT: vzeroupper
3669 ; KNL-NEXT: callq _foo
3670 ; KNL-NEXT: popq %rax
3673 ; SKX-LABEL: ktest_4:
3675 ; SKX-NEXT: pushq %rax
3676 ; SKX-NEXT: .cfi_def_cfa_offset 16
3677 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
3678 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1
3679 ; SKX-NEXT: korb %k1, %k0, %k0
3680 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1
3681 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k2
3682 ; SKX-NEXT: korb %k2, %k1, %k1
3683 ; SKX-NEXT: ktestb %k1, %k0
3684 ; SKX-NEXT: je LBB73_1
3685 ; SKX-NEXT: ## %bb.2: ## %exit
3686 ; SKX-NEXT: popq %rax
3687 ; SKX-NEXT: vzeroupper
3689 ; SKX-NEXT: LBB73_1: ## %bar
3690 ; SKX-NEXT: vzeroupper
3691 ; SKX-NEXT: callq _foo
3692 ; SKX-NEXT: popq %rax
3695 ; AVX512BW-LABEL: ktest_4:
3696 ; AVX512BW: ## %bb.0:
3697 ; AVX512BW-NEXT: pushq %rax
3698 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
3699 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
3700 ; AVX512BW-NEXT: vptestnmq %zmm1, %zmm1, %k1
3701 ; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k2
3702 ; AVX512BW-NEXT: vptestnmq %zmm3, %zmm3, %k3
3703 ; AVX512BW-NEXT: korw %k1, %k0, %k0
3704 ; AVX512BW-NEXT: korw %k3, %k2, %k1
3705 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3706 ; AVX512BW-NEXT: kmovd %k0, %eax
3707 ; AVX512BW-NEXT: testb %al, %al
3708 ; AVX512BW-NEXT: je LBB73_1
3709 ; AVX512BW-NEXT: ## %bb.2: ## %exit
3710 ; AVX512BW-NEXT: popq %rax
3711 ; AVX512BW-NEXT: vzeroupper
3712 ; AVX512BW-NEXT: retq
3713 ; AVX512BW-NEXT: LBB73_1: ## %bar
3714 ; AVX512BW-NEXT: vzeroupper
3715 ; AVX512BW-NEXT: callq _foo
3716 ; AVX512BW-NEXT: popq %rax
3717 ; AVX512BW-NEXT: retq
3719 ; AVX512DQ-LABEL: ktest_4:
3720 ; AVX512DQ: ## %bb.0:
3721 ; AVX512DQ-NEXT: pushq %rax
3722 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
3723 ; AVX512DQ-NEXT: vptestnmq %zmm0, %zmm0, %k0
3724 ; AVX512DQ-NEXT: vptestnmq %zmm1, %zmm1, %k1
3725 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
3726 ; AVX512DQ-NEXT: vptestnmq %zmm2, %zmm2, %k1
3727 ; AVX512DQ-NEXT: vptestnmq %zmm3, %zmm3, %k2
3728 ; AVX512DQ-NEXT: korb %k2, %k1, %k1
3729 ; AVX512DQ-NEXT: ktestb %k1, %k0
3730 ; AVX512DQ-NEXT: je LBB73_1
3731 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
3732 ; AVX512DQ-NEXT: popq %rax
3733 ; AVX512DQ-NEXT: vzeroupper
3734 ; AVX512DQ-NEXT: retq
3735 ; AVX512DQ-NEXT: LBB73_1: ## %bar
3736 ; AVX512DQ-NEXT: vzeroupper
3737 ; AVX512DQ-NEXT: callq _foo
3738 ; AVX512DQ-NEXT: popq %rax
3739 ; AVX512DQ-NEXT: retq
3741 ; X86-LABEL: ktest_4:
3743 ; X86-NEXT: subl $12, %esp
3744 ; X86-NEXT: .cfi_def_cfa_offset 16
3745 ; X86-NEXT: vptestnmq %zmm0, %zmm0, %k0
3746 ; X86-NEXT: vptestnmq %zmm1, %zmm1, %k1
3747 ; X86-NEXT: korb %k1, %k0, %k0
3748 ; X86-NEXT: vptestnmq %zmm2, %zmm2, %k1
3749 ; X86-NEXT: vptestnmq %zmm3, %zmm3, %k2
3750 ; X86-NEXT: korb %k2, %k1, %k1
3751 ; X86-NEXT: ktestb %k1, %k0
3752 ; X86-NEXT: je LBB73_1
3753 ; X86-NEXT: ## %bb.2: ## %exit
3754 ; X86-NEXT: addl $12, %esp
3755 ; X86-NEXT: vzeroupper
3757 ; X86-NEXT: LBB73_1: ## %bar
3758 ; X86-NEXT: vzeroupper
3759 ; X86-NEXT: calll _foo
3760 ; X86-NEXT: addl $12, %esp
3762 %a = icmp eq <8 x i64> %w, zeroinitializer
3763 %b = icmp eq <8 x i64> %x, zeroinitializer
3764 %c = icmp eq <8 x i64> %y, zeroinitializer
3765 %d = icmp eq <8 x i64> %z, zeroinitializer
3766 %e = or <8 x i1> %a, %b
3767 %f = or <8 x i1> %c, %d
3768 %g = and <8 x i1> %e, %f
3769 %h = bitcast <8 x i1> %g to i8
3770 %i = icmp eq i8 %h, 0
3771 br i1 %i, label %bar, label %exit
3781 define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) {
3782 ; KNL-LABEL: ktest_5:
3784 ; KNL-NEXT: pushq %rax
3785 ; KNL-NEXT: .cfi_def_cfa_offset 16
3786 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
3787 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1
3788 ; KNL-NEXT: korw %k1, %k0, %k0
3789 ; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k1
3790 ; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k2
3791 ; KNL-NEXT: korw %k2, %k1, %k1
3792 ; KNL-NEXT: kandw %k1, %k0, %k0
3793 ; KNL-NEXT: kortestw %k0, %k0
3794 ; KNL-NEXT: je LBB74_1
3795 ; KNL-NEXT: ## %bb.2: ## %exit
3796 ; KNL-NEXT: popq %rax
3797 ; KNL-NEXT: vzeroupper
3799 ; KNL-NEXT: LBB74_1: ## %bar
3800 ; KNL-NEXT: vzeroupper
3801 ; KNL-NEXT: callq _foo
3802 ; KNL-NEXT: popq %rax
3805 ; SKX-LABEL: ktest_5:
3807 ; SKX-NEXT: pushq %rax
3808 ; SKX-NEXT: .cfi_def_cfa_offset 16
3809 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
3810 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1
3811 ; SKX-NEXT: korw %k1, %k0, %k0
3812 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1
3813 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k2
3814 ; SKX-NEXT: korw %k2, %k1, %k1
3815 ; SKX-NEXT: ktestw %k1, %k0
3816 ; SKX-NEXT: je LBB74_1
3817 ; SKX-NEXT: ## %bb.2: ## %exit
3818 ; SKX-NEXT: popq %rax
3819 ; SKX-NEXT: vzeroupper
3821 ; SKX-NEXT: LBB74_1: ## %bar
3822 ; SKX-NEXT: vzeroupper
3823 ; SKX-NEXT: callq _foo
3824 ; SKX-NEXT: popq %rax
3827 ; AVX512BW-LABEL: ktest_5:
3828 ; AVX512BW: ## %bb.0:
3829 ; AVX512BW-NEXT: pushq %rax
3830 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
3831 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
3832 ; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1
3833 ; AVX512BW-NEXT: korw %k1, %k0, %k0
3834 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k1
3835 ; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k2
3836 ; AVX512BW-NEXT: korw %k2, %k1, %k1
3837 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3838 ; AVX512BW-NEXT: kortestw %k0, %k0
3839 ; AVX512BW-NEXT: je LBB74_1
3840 ; AVX512BW-NEXT: ## %bb.2: ## %exit
3841 ; AVX512BW-NEXT: popq %rax
3842 ; AVX512BW-NEXT: vzeroupper
3843 ; AVX512BW-NEXT: retq
3844 ; AVX512BW-NEXT: LBB74_1: ## %bar
3845 ; AVX512BW-NEXT: vzeroupper
3846 ; AVX512BW-NEXT: callq _foo
3847 ; AVX512BW-NEXT: popq %rax
3848 ; AVX512BW-NEXT: retq
3850 ; AVX512DQ-LABEL: ktest_5:
3851 ; AVX512DQ: ## %bb.0:
3852 ; AVX512DQ-NEXT: pushq %rax
3853 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
3854 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
3855 ; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1
3856 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3857 ; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k1
3858 ; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k2
3859 ; AVX512DQ-NEXT: korw %k2, %k1, %k1
3860 ; AVX512DQ-NEXT: ktestw %k1, %k0
3861 ; AVX512DQ-NEXT: je LBB74_1
3862 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
3863 ; AVX512DQ-NEXT: popq %rax
3864 ; AVX512DQ-NEXT: vzeroupper
3865 ; AVX512DQ-NEXT: retq
3866 ; AVX512DQ-NEXT: LBB74_1: ## %bar
3867 ; AVX512DQ-NEXT: vzeroupper
3868 ; AVX512DQ-NEXT: callq _foo
3869 ; AVX512DQ-NEXT: popq %rax
3870 ; AVX512DQ-NEXT: retq
3872 ; X86-LABEL: ktest_5:
3874 ; X86-NEXT: subl $12, %esp
3875 ; X86-NEXT: .cfi_def_cfa_offset 16
3876 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
3877 ; X86-NEXT: vptestnmd %zmm1, %zmm1, %k1
3878 ; X86-NEXT: korw %k1, %k0, %k0
3879 ; X86-NEXT: vptestnmd %zmm2, %zmm2, %k1
3880 ; X86-NEXT: vptestnmd %zmm3, %zmm3, %k2
3881 ; X86-NEXT: korw %k2, %k1, %k1
3882 ; X86-NEXT: ktestw %k1, %k0
3883 ; X86-NEXT: je LBB74_1
3884 ; X86-NEXT: ## %bb.2: ## %exit
3885 ; X86-NEXT: addl $12, %esp
3886 ; X86-NEXT: vzeroupper
3888 ; X86-NEXT: LBB74_1: ## %bar
3889 ; X86-NEXT: vzeroupper
3890 ; X86-NEXT: calll _foo
3891 ; X86-NEXT: addl $12, %esp
3893 %a = icmp eq <16 x i32> %w, zeroinitializer
3894 %b = icmp eq <16 x i32> %x, zeroinitializer
3895 %c = icmp eq <16 x i32> %y, zeroinitializer
3896 %d = icmp eq <16 x i32> %z, zeroinitializer
3897 %e = or <16 x i1> %a, %b
3898 %f = or <16 x i1> %c, %d
3899 %g = and <16 x i1> %e, %f
3900 %h = bitcast <16 x i1> %g to i16
3901 %i = icmp eq i16 %h, 0
3902 br i1 %i, label %bar, label %exit
3912 define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z) {
3913 ; KNL-LABEL: ktest_6:
3915 ; KNL-NEXT: pushq %rax
3916 ; KNL-NEXT: .cfi_def_cfa_offset 16
3917 ; KNL-NEXT: vpxor %xmm8, %xmm8, %xmm8
3918 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm0
3919 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm1, %ymm1
3920 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm2, %ymm2
3921 ; KNL-NEXT: vpor %ymm2, %ymm0, %ymm0
3922 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm3, %ymm2
3923 ; KNL-NEXT: vpor %ymm2, %ymm1, %ymm1
3924 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm4, %ymm2
3925 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm5, %ymm3
3926 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm6, %ymm4
3927 ; KNL-NEXT: vpor %ymm4, %ymm2, %ymm2
3928 ; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
3929 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm7, %ymm2
3930 ; KNL-NEXT: vpor %ymm2, %ymm3, %ymm2
3931 ; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
3932 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3933 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3934 ; KNL-NEXT: kmovw %k0, %eax
3935 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm0
3936 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3937 ; KNL-NEXT: kmovw %k0, %ecx
3938 ; KNL-NEXT: shll $16, %ecx
3939 ; KNL-NEXT: orl %eax, %ecx
3940 ; KNL-NEXT: je LBB75_1
3941 ; KNL-NEXT: ## %bb.2: ## %exit
3942 ; KNL-NEXT: popq %rax
3943 ; KNL-NEXT: vzeroupper
3945 ; KNL-NEXT: LBB75_1: ## %bar
3946 ; KNL-NEXT: vzeroupper
3947 ; KNL-NEXT: callq _foo
3948 ; KNL-NEXT: popq %rax
3951 ; SKX-LABEL: ktest_6:
3953 ; SKX-NEXT: pushq %rax
3954 ; SKX-NEXT: .cfi_def_cfa_offset 16
3955 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k0
3956 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1
3957 ; SKX-NEXT: kord %k1, %k0, %k0
3958 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1
3959 ; SKX-NEXT: vptestnmw %zmm3, %zmm3, %k2
3960 ; SKX-NEXT: kord %k2, %k1, %k1
3961 ; SKX-NEXT: ktestd %k1, %k0
3962 ; SKX-NEXT: je LBB75_1
3963 ; SKX-NEXT: ## %bb.2: ## %exit
3964 ; SKX-NEXT: popq %rax
3965 ; SKX-NEXT: vzeroupper
3967 ; SKX-NEXT: LBB75_1: ## %bar
3968 ; SKX-NEXT: vzeroupper
3969 ; SKX-NEXT: callq _foo
3970 ; SKX-NEXT: popq %rax
3973 ; AVX512BW-LABEL: ktest_6:
3974 ; AVX512BW: ## %bb.0:
3975 ; AVX512BW-NEXT: pushq %rax
3976 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
3977 ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
3978 ; AVX512BW-NEXT: vptestnmw %zmm1, %zmm1, %k1
3979 ; AVX512BW-NEXT: kord %k1, %k0, %k0
3980 ; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1
3981 ; AVX512BW-NEXT: vptestnmw %zmm3, %zmm3, %k2
3982 ; AVX512BW-NEXT: kord %k2, %k1, %k1
3983 ; AVX512BW-NEXT: ktestd %k1, %k0
3984 ; AVX512BW-NEXT: je LBB75_1
3985 ; AVX512BW-NEXT: ## %bb.2: ## %exit
3986 ; AVX512BW-NEXT: popq %rax
3987 ; AVX512BW-NEXT: vzeroupper
3988 ; AVX512BW-NEXT: retq
3989 ; AVX512BW-NEXT: LBB75_1: ## %bar
3990 ; AVX512BW-NEXT: vzeroupper
3991 ; AVX512BW-NEXT: callq _foo
3992 ; AVX512BW-NEXT: popq %rax
3993 ; AVX512BW-NEXT: retq
3995 ; AVX512DQ-LABEL: ktest_6:
3996 ; AVX512DQ: ## %bb.0:
3997 ; AVX512DQ-NEXT: pushq %rax
3998 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
3999 ; AVX512DQ-NEXT: vpxor %xmm8, %xmm8, %xmm8
4000 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm0
4001 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm1, %ymm1
4002 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm2, %ymm2
4003 ; AVX512DQ-NEXT: vpor %ymm2, %ymm0, %ymm0
4004 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm3, %ymm2
4005 ; AVX512DQ-NEXT: vpor %ymm2, %ymm1, %ymm1
4006 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm4, %ymm2
4007 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm5, %ymm3
4008 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm6, %ymm4
4009 ; AVX512DQ-NEXT: vpor %ymm4, %ymm2, %ymm2
4010 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
4011 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm7, %ymm2
4012 ; AVX512DQ-NEXT: vpor %ymm2, %ymm3, %ymm2
4013 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
4014 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
4015 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
4016 ; AVX512DQ-NEXT: kmovw %k0, %eax
4017 ; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0
4018 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
4019 ; AVX512DQ-NEXT: kmovw %k0, %ecx
4020 ; AVX512DQ-NEXT: shll $16, %ecx
4021 ; AVX512DQ-NEXT: orl %eax, %ecx
4022 ; AVX512DQ-NEXT: je LBB75_1
4023 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
4024 ; AVX512DQ-NEXT: popq %rax
4025 ; AVX512DQ-NEXT: vzeroupper
4026 ; AVX512DQ-NEXT: retq
4027 ; AVX512DQ-NEXT: LBB75_1: ## %bar
4028 ; AVX512DQ-NEXT: vzeroupper
4029 ; AVX512DQ-NEXT: callq _foo
4030 ; AVX512DQ-NEXT: popq %rax
4031 ; AVX512DQ-NEXT: retq
4033 ; X86-LABEL: ktest_6:
4035 ; X86-NEXT: subl $12, %esp
4036 ; X86-NEXT: .cfi_def_cfa_offset 16
4037 ; X86-NEXT: vptestnmw %zmm0, %zmm0, %k0
4038 ; X86-NEXT: vptestnmw %zmm1, %zmm1, %k1
4039 ; X86-NEXT: kord %k1, %k0, %k0
4040 ; X86-NEXT: vptestnmw %zmm2, %zmm2, %k1
4041 ; X86-NEXT: vptestnmw %zmm3, %zmm3, %k2
4042 ; X86-NEXT: kord %k2, %k1, %k1
4043 ; X86-NEXT: ktestd %k1, %k0
4044 ; X86-NEXT: je LBB75_1
4045 ; X86-NEXT: ## %bb.2: ## %exit
4046 ; X86-NEXT: addl $12, %esp
4047 ; X86-NEXT: vzeroupper
4049 ; X86-NEXT: LBB75_1: ## %bar
4050 ; X86-NEXT: vzeroupper
4051 ; X86-NEXT: calll _foo
4052 ; X86-NEXT: addl $12, %esp
4054 %a = icmp eq <32 x i16> %w, zeroinitializer
4055 %b = icmp eq <32 x i16> %x, zeroinitializer
4056 %c = icmp eq <32 x i16> %y, zeroinitializer
4057 %d = icmp eq <32 x i16> %z, zeroinitializer
4058 %e = or <32 x i1> %a, %b
4059 %f = or <32 x i1> %c, %d
4060 %g = and <32 x i1> %e, %f
4061 %h = bitcast <32 x i1> %g to i32
4062 %i = icmp eq i32 %h, 0
4063 br i1 %i, label %bar, label %exit
4073 define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
4074 ; KNL-LABEL: ktest_7:
4076 ; KNL-NEXT: pushq %rax
4077 ; KNL-NEXT: .cfi_def_cfa_offset 16
4078 ; KNL-NEXT: vpxor %xmm8, %xmm8, %xmm8
4079 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm9
4080 ; KNL-NEXT: vextracti128 $1, %ymm9, %xmm0
4081 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm1, %ymm10
4082 ; KNL-NEXT: vextracti128 $1, %ymm10, %xmm1
4083 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm11
4084 ; KNL-NEXT: vextracti128 $1, %ymm11, %xmm2
4085 ; KNL-NEXT: vpor %xmm2, %xmm0, %xmm13
4086 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm3, %ymm2
4087 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
4088 ; KNL-NEXT: vpor %xmm3, %xmm1, %xmm12
4089 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm4, %ymm3
4090 ; KNL-NEXT: vextracti128 $1, %ymm3, %xmm4
4091 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm5, %ymm5
4092 ; KNL-NEXT: vextracti128 $1, %ymm5, %xmm1
4093 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm6, %ymm6
4094 ; KNL-NEXT: vextracti128 $1, %ymm6, %xmm0
4095 ; KNL-NEXT: vpor %xmm0, %xmm4, %xmm0
4096 ; KNL-NEXT: vpand %xmm0, %xmm13, %xmm0
4097 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm7, %ymm4
4098 ; KNL-NEXT: vextracti128 $1, %ymm4, %xmm7
4099 ; KNL-NEXT: vpor %xmm7, %xmm1, %xmm1
4100 ; KNL-NEXT: vpand %xmm1, %xmm12, %xmm1
4101 ; KNL-NEXT: vpor %xmm2, %xmm10, %xmm2
4102 ; KNL-NEXT: vpor %xmm11, %xmm9, %xmm7
4103 ; KNL-NEXT: vpor %xmm4, %xmm5, %xmm4
4104 ; KNL-NEXT: vpand %xmm4, %xmm2, %xmm2
4105 ; KNL-NEXT: vpor %xmm6, %xmm3, %xmm3
4106 ; KNL-NEXT: vpand %xmm3, %xmm7, %xmm3
4107 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
4108 ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
4109 ; KNL-NEXT: kmovw %k0, %eax
4110 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
4111 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
4112 ; KNL-NEXT: kmovw %k0, %ecx
4113 ; KNL-NEXT: shll $16, %ecx
4114 ; KNL-NEXT: orl %eax, %ecx
4115 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm0
4116 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
4117 ; KNL-NEXT: kmovw %k0, %eax
4118 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
4119 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
4120 ; KNL-NEXT: kmovw %k0, %edx
4121 ; KNL-NEXT: shll $16, %edx
4122 ; KNL-NEXT: orl %eax, %edx
4123 ; KNL-NEXT: shlq $32, %rdx
4124 ; KNL-NEXT: orq %rcx, %rdx
4125 ; KNL-NEXT: je LBB76_1
4126 ; KNL-NEXT: ## %bb.2: ## %exit
4127 ; KNL-NEXT: popq %rax
4128 ; KNL-NEXT: vzeroupper
4130 ; KNL-NEXT: LBB76_1: ## %bar
4131 ; KNL-NEXT: vzeroupper
4132 ; KNL-NEXT: callq _foo
4133 ; KNL-NEXT: popq %rax
4136 ; SKX-LABEL: ktest_7:
4138 ; SKX-NEXT: pushq %rax
4139 ; SKX-NEXT: .cfi_def_cfa_offset 16
4140 ; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k0
4141 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1
4142 ; SKX-NEXT: korq %k1, %k0, %k0
4143 ; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1
4144 ; SKX-NEXT: vptestnmb %zmm3, %zmm3, %k2
4145 ; SKX-NEXT: korq %k2, %k1, %k1
4146 ; SKX-NEXT: ktestq %k1, %k0
4147 ; SKX-NEXT: je LBB76_1
4148 ; SKX-NEXT: ## %bb.2: ## %exit
4149 ; SKX-NEXT: popq %rax
4150 ; SKX-NEXT: vzeroupper
4152 ; SKX-NEXT: LBB76_1: ## %bar
4153 ; SKX-NEXT: vzeroupper
4154 ; SKX-NEXT: callq _foo
4155 ; SKX-NEXT: popq %rax
4158 ; AVX512BW-LABEL: ktest_7:
4159 ; AVX512BW: ## %bb.0:
4160 ; AVX512BW-NEXT: pushq %rax
4161 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4162 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
4163 ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
4164 ; AVX512BW-NEXT: korq %k1, %k0, %k0
4165 ; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1
4166 ; AVX512BW-NEXT: vptestnmb %zmm3, %zmm3, %k2
4167 ; AVX512BW-NEXT: korq %k2, %k1, %k1
4168 ; AVX512BW-NEXT: ktestq %k1, %k0
4169 ; AVX512BW-NEXT: je LBB76_1
4170 ; AVX512BW-NEXT: ## %bb.2: ## %exit
4171 ; AVX512BW-NEXT: popq %rax
4172 ; AVX512BW-NEXT: vzeroupper
4173 ; AVX512BW-NEXT: retq
4174 ; AVX512BW-NEXT: LBB76_1: ## %bar
4175 ; AVX512BW-NEXT: vzeroupper
4176 ; AVX512BW-NEXT: callq _foo
4177 ; AVX512BW-NEXT: popq %rax
4178 ; AVX512BW-NEXT: retq
4180 ; AVX512DQ-LABEL: ktest_7:
4181 ; AVX512DQ: ## %bb.0:
4182 ; AVX512DQ-NEXT: pushq %rax
4183 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4184 ; AVX512DQ-NEXT: vpxor %xmm8, %xmm8, %xmm8
4185 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm9
4186 ; AVX512DQ-NEXT: vextracti128 $1, %ymm9, %xmm0
4187 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm1, %ymm10
4188 ; AVX512DQ-NEXT: vextracti128 $1, %ymm10, %xmm1
4189 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm11
4190 ; AVX512DQ-NEXT: vextracti128 $1, %ymm11, %xmm2
4191 ; AVX512DQ-NEXT: vpor %xmm2, %xmm0, %xmm13
4192 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm3, %ymm2
4193 ; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3
4194 ; AVX512DQ-NEXT: vpor %xmm3, %xmm1, %xmm12
4195 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm4, %ymm3
4196 ; AVX512DQ-NEXT: vextracti128 $1, %ymm3, %xmm4
4197 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm5, %ymm5
4198 ; AVX512DQ-NEXT: vextracti128 $1, %ymm5, %xmm1
4199 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm6, %ymm6
4200 ; AVX512DQ-NEXT: vextracti128 $1, %ymm6, %xmm0
4201 ; AVX512DQ-NEXT: vpor %xmm0, %xmm4, %xmm0
4202 ; AVX512DQ-NEXT: vpand %xmm0, %xmm13, %xmm0
4203 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm7, %ymm4
4204 ; AVX512DQ-NEXT: vextracti128 $1, %ymm4, %xmm7
4205 ; AVX512DQ-NEXT: vpor %xmm7, %xmm1, %xmm1
4206 ; AVX512DQ-NEXT: vpand %xmm1, %xmm12, %xmm1
4207 ; AVX512DQ-NEXT: vpor %xmm2, %xmm10, %xmm2
4208 ; AVX512DQ-NEXT: vpor %xmm11, %xmm9, %xmm7
4209 ; AVX512DQ-NEXT: vpor %xmm4, %xmm5, %xmm4
4210 ; AVX512DQ-NEXT: vpand %xmm4, %xmm2, %xmm2
4211 ; AVX512DQ-NEXT: vpor %xmm6, %xmm3, %xmm3
4212 ; AVX512DQ-NEXT: vpand %xmm3, %xmm7, %xmm3
4213 ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
4214 ; AVX512DQ-NEXT: vpmovd2m %zmm3, %k0
4215 ; AVX512DQ-NEXT: kmovw %k0, %eax
4216 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
4217 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
4218 ; AVX512DQ-NEXT: kmovw %k0, %ecx
4219 ; AVX512DQ-NEXT: shll $16, %ecx
4220 ; AVX512DQ-NEXT: orl %eax, %ecx
4221 ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0
4222 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
4223 ; AVX512DQ-NEXT: kmovw %k0, %eax
4224 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
4225 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
4226 ; AVX512DQ-NEXT: kmovw %k0, %edx
4227 ; AVX512DQ-NEXT: shll $16, %edx
4228 ; AVX512DQ-NEXT: orl %eax, %edx
4229 ; AVX512DQ-NEXT: shlq $32, %rdx
4230 ; AVX512DQ-NEXT: orq %rcx, %rdx
4231 ; AVX512DQ-NEXT: je LBB76_1
4232 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
4233 ; AVX512DQ-NEXT: popq %rax
4234 ; AVX512DQ-NEXT: vzeroupper
4235 ; AVX512DQ-NEXT: retq
4236 ; AVX512DQ-NEXT: LBB76_1: ## %bar
4237 ; AVX512DQ-NEXT: vzeroupper
4238 ; AVX512DQ-NEXT: callq _foo
4239 ; AVX512DQ-NEXT: popq %rax
4240 ; AVX512DQ-NEXT: retq
4242 ; X86-LABEL: ktest_7:
4244 ; X86-NEXT: subl $12, %esp
4245 ; X86-NEXT: .cfi_def_cfa_offset 16
4246 ; X86-NEXT: vptestnmb %zmm0, %zmm0, %k0
4247 ; X86-NEXT: vptestnmb %zmm1, %zmm1, %k1
4248 ; X86-NEXT: korq %k1, %k0, %k0
4249 ; X86-NEXT: vptestnmb %zmm2, %zmm2, %k1
4250 ; X86-NEXT: vptestnmb %zmm3, %zmm3, %k2
4251 ; X86-NEXT: korq %k2, %k1, %k1
4252 ; X86-NEXT: kandq %k1, %k0, %k0
4253 ; X86-NEXT: kshiftrq $32, %k0, %k1
4254 ; X86-NEXT: kortestd %k1, %k0
4255 ; X86-NEXT: je LBB76_1
4256 ; X86-NEXT: ## %bb.2: ## %exit
4257 ; X86-NEXT: addl $12, %esp
4258 ; X86-NEXT: vzeroupper
4260 ; X86-NEXT: LBB76_1: ## %bar
4261 ; X86-NEXT: vzeroupper
4262 ; X86-NEXT: calll _foo
4263 ; X86-NEXT: addl $12, %esp
4265 %a = icmp eq <64 x i8> %w, zeroinitializer
4266 %b = icmp eq <64 x i8> %x, zeroinitializer
4267 %c = icmp eq <64 x i8> %y, zeroinitializer
4268 %d = icmp eq <64 x i8> %z, zeroinitializer
4269 %e = or <64 x i1> %a, %b
4270 %f = or <64 x i1> %c, %d
4271 %g = and <64 x i1> %e, %f
4272 %h = bitcast <64 x i1> %g to i64
4273 %i = icmp eq i64 %h, 0
4274 br i1 %i, label %bar, label %exit