1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
3 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
4 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW
5 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ
6 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86
9 define i16 @mask16(i16 %x) {
10 ; CHECK-LABEL: mask16:
12 ; CHECK-NEXT: movl %edi, %eax
13 ; CHECK-NEXT: notl %eax
14 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
19 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
21 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
23 %m0 = bitcast i16 %x to <16 x i1>
24 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
25 %ret = bitcast <16 x i1> %m1 to i16
29 define i32 @mask16_zext(i16 %x) {
30 ; CHECK-LABEL: mask16_zext:
32 ; CHECK-NEXT: notl %edi
33 ; CHECK-NEXT: movzwl %di, %eax
36 ; X86-LABEL: mask16_zext:
38 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
39 ; X86-NEXT: xorl $65535, %eax ## imm = 0xFFFF
41 %m0 = bitcast i16 %x to <16 x i1>
42 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
43 %m2 = bitcast <16 x i1> %m1 to i16
44 %ret = zext i16 %m2 to i32
48 define i8 @mask8(i8 %x) {
51 ; CHECK-NEXT: movl %edi, %eax
52 ; CHECK-NEXT: notb %al
53 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
58 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
61 %m0 = bitcast i8 %x to <8 x i1>
62 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
63 %ret = bitcast <8 x i1> %m1 to i8
67 define i32 @mask8_zext(i8 %x) {
68 ; CHECK-LABEL: mask8_zext:
70 ; CHECK-NEXT: notb %dil
71 ; CHECK-NEXT: movzbl %dil, %eax
74 ; X86-LABEL: mask8_zext:
76 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
78 ; X86-NEXT: movzbl %al, %eax
80 %m0 = bitcast i8 %x to <8 x i1>
81 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
82 %m2 = bitcast <8 x i1> %m1 to i8
83 %ret = zext i8 %m2 to i32
87 define void @mask16_mem(i16* %ptr) {
88 ; CHECK-LABEL: mask16_mem:
90 ; CHECK-NEXT: kmovw (%rdi), %k0
91 ; CHECK-NEXT: knotw %k0, %k0
92 ; CHECK-NEXT: kmovw %k0, (%rdi)
95 ; X86-LABEL: mask16_mem:
97 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
98 ; X86-NEXT: kmovw (%eax), %k0
99 ; X86-NEXT: knotw %k0, %k0
100 ; X86-NEXT: kmovw %k0, (%eax)
102 %x = load i16, i16* %ptr, align 4
103 %m0 = bitcast i16 %x to <16 x i1>
104 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
105 %ret = bitcast <16 x i1> %m1 to i16
106 store i16 %ret, i16* %ptr, align 4
110 define void @mask8_mem(i8* %ptr) {
111 ; KNL-LABEL: mask8_mem:
113 ; KNL-NEXT: notb (%rdi)
116 ; SKX-LABEL: mask8_mem:
118 ; SKX-NEXT: kmovb (%rdi), %k0
119 ; SKX-NEXT: knotb %k0, %k0
120 ; SKX-NEXT: kmovb %k0, (%rdi)
123 ; AVX512BW-LABEL: mask8_mem:
124 ; AVX512BW: ## %bb.0:
125 ; AVX512BW-NEXT: notb (%rdi)
126 ; AVX512BW-NEXT: retq
128 ; AVX512DQ-LABEL: mask8_mem:
129 ; AVX512DQ: ## %bb.0:
130 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
131 ; AVX512DQ-NEXT: knotb %k0, %k0
132 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
133 ; AVX512DQ-NEXT: retq
135 ; X86-LABEL: mask8_mem:
137 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
138 ; X86-NEXT: kmovb (%eax), %k0
139 ; X86-NEXT: knotb %k0, %k0
140 ; X86-NEXT: kmovb %k0, (%eax)
142 %x = load i8, i8* %ptr, align 4
143 %m0 = bitcast i8 %x to <8 x i1>
144 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
145 %ret = bitcast <8 x i1> %m1 to i8
146 store i8 %ret, i8* %ptr, align 4
150 define i16 @mand16(i16 %x, i16 %y) {
151 ; CHECK-LABEL: mand16:
153 ; CHECK-NEXT: movl %edi, %eax
154 ; CHECK-NEXT: movl %edi, %ecx
155 ; CHECK-NEXT: andl %esi, %ecx
156 ; CHECK-NEXT: xorl %esi, %eax
157 ; CHECK-NEXT: orl %ecx, %eax
158 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
163 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
164 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
165 ; X86-NEXT: movl %eax, %edx
166 ; X86-NEXT: andl %ecx, %edx
167 ; X86-NEXT: xorl %ecx, %eax
168 ; X86-NEXT: orl %edx, %eax
169 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
171 %ma = bitcast i16 %x to <16 x i1>
172 %mb = bitcast i16 %y to <16 x i1>
173 %mc = and <16 x i1> %ma, %mb
174 %md = xor <16 x i1> %ma, %mb
175 %me = or <16 x i1> %mc, %md
176 %ret = bitcast <16 x i1> %me to i16
180 define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
181 ; KNL-LABEL: mand16_mem:
183 ; KNL-NEXT: kmovw (%rdi), %k0
184 ; KNL-NEXT: kmovw (%rsi), %k1
185 ; KNL-NEXT: kandw %k1, %k0, %k2
186 ; KNL-NEXT: kxorw %k1, %k0, %k0
187 ; KNL-NEXT: korw %k0, %k2, %k0
188 ; KNL-NEXT: kmovw %k0, %eax
189 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
192 ; SKX-LABEL: mand16_mem:
194 ; SKX-NEXT: kmovw (%rdi), %k0
195 ; SKX-NEXT: kmovw (%rsi), %k1
196 ; SKX-NEXT: kandw %k1, %k0, %k2
197 ; SKX-NEXT: kxorw %k1, %k0, %k0
198 ; SKX-NEXT: korw %k0, %k2, %k0
199 ; SKX-NEXT: kmovd %k0, %eax
200 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
203 ; AVX512BW-LABEL: mand16_mem:
204 ; AVX512BW: ## %bb.0:
205 ; AVX512BW-NEXT: kmovw (%rdi), %k0
206 ; AVX512BW-NEXT: kmovw (%rsi), %k1
207 ; AVX512BW-NEXT: kandw %k1, %k0, %k2
208 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
209 ; AVX512BW-NEXT: korw %k0, %k2, %k0
210 ; AVX512BW-NEXT: kmovd %k0, %eax
211 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
212 ; AVX512BW-NEXT: retq
214 ; AVX512DQ-LABEL: mand16_mem:
215 ; AVX512DQ: ## %bb.0:
216 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
217 ; AVX512DQ-NEXT: kmovw (%rsi), %k1
218 ; AVX512DQ-NEXT: kandw %k1, %k0, %k2
219 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
220 ; AVX512DQ-NEXT: korw %k0, %k2, %k0
221 ; AVX512DQ-NEXT: kmovw %k0, %eax
222 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
223 ; AVX512DQ-NEXT: retq
225 ; X86-LABEL: mand16_mem:
227 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
228 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
229 ; X86-NEXT: kmovw (%ecx), %k0
230 ; X86-NEXT: kmovw (%eax), %k1
231 ; X86-NEXT: kandw %k1, %k0, %k2
232 ; X86-NEXT: kxorw %k1, %k0, %k0
233 ; X86-NEXT: korw %k0, %k2, %k0
234 ; X86-NEXT: kmovd %k0, %eax
235 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
237 %ma = load <16 x i1>, <16 x i1>* %x
238 %mb = load <16 x i1>, <16 x i1>* %y
239 %mc = and <16 x i1> %ma, %mb
240 %md = xor <16 x i1> %ma, %mb
241 %me = or <16 x i1> %mc, %md
242 %ret = bitcast <16 x i1> %me to i16
246 define i8 @shuf_test1(i16 %v) nounwind {
247 ; KNL-LABEL: shuf_test1:
249 ; KNL-NEXT: kmovw %edi, %k0
250 ; KNL-NEXT: kshiftrw $8, %k0, %k0
251 ; KNL-NEXT: kmovw %k0, %eax
252 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
255 ; SKX-LABEL: shuf_test1:
257 ; SKX-NEXT: kmovd %edi, %k0
258 ; SKX-NEXT: kshiftrw $8, %k0, %k0
259 ; SKX-NEXT: kmovd %k0, %eax
260 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
263 ; AVX512BW-LABEL: shuf_test1:
264 ; AVX512BW: ## %bb.0:
265 ; AVX512BW-NEXT: kmovd %edi, %k0
266 ; AVX512BW-NEXT: kshiftrw $8, %k0, %k0
267 ; AVX512BW-NEXT: kmovd %k0, %eax
268 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
269 ; AVX512BW-NEXT: retq
271 ; AVX512DQ-LABEL: shuf_test1:
272 ; AVX512DQ: ## %bb.0:
273 ; AVX512DQ-NEXT: kmovw %edi, %k0
274 ; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0
275 ; AVX512DQ-NEXT: kmovw %k0, %eax
276 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
277 ; AVX512DQ-NEXT: retq
279 ; X86-LABEL: shuf_test1:
281 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
283 %v1 = bitcast i16 %v to <16 x i1>
284 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
285 %mask1 = bitcast <8 x i1> %mask to i8
289 define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
290 ; KNL-LABEL: zext_test1:
292 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
293 ; KNL-NEXT: kshiftrw $5, %k0, %k0
294 ; KNL-NEXT: kmovw %k0, %eax
295 ; KNL-NEXT: andl $1, %eax
296 ; KNL-NEXT: vzeroupper
299 ; SKX-LABEL: zext_test1:
301 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
302 ; SKX-NEXT: kshiftrw $5, %k0, %k0
303 ; SKX-NEXT: kmovd %k0, %eax
304 ; SKX-NEXT: andl $1, %eax
305 ; SKX-NEXT: vzeroupper
308 ; AVX512BW-LABEL: zext_test1:
309 ; AVX512BW: ## %bb.0:
310 ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
311 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
312 ; AVX512BW-NEXT: kmovd %k0, %eax
313 ; AVX512BW-NEXT: andl $1, %eax
314 ; AVX512BW-NEXT: vzeroupper
315 ; AVX512BW-NEXT: retq
317 ; AVX512DQ-LABEL: zext_test1:
318 ; AVX512DQ: ## %bb.0:
319 ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
320 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
321 ; AVX512DQ-NEXT: kmovw %k0, %eax
322 ; AVX512DQ-NEXT: andl $1, %eax
323 ; AVX512DQ-NEXT: vzeroupper
324 ; AVX512DQ-NEXT: retq
326 ; X86-LABEL: zext_test1:
328 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
329 ; X86-NEXT: kshiftrw $5, %k0, %k0
330 ; X86-NEXT: kmovd %k0, %eax
331 ; X86-NEXT: andl $1, %eax
332 ; X86-NEXT: vzeroupper
334 %cmp_res = icmp ugt <16 x i32> %a, %b
335 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
336 %res = zext i1 %cmp_res.i1 to i32
340 define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
341 ; KNL-LABEL: zext_test2:
343 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
344 ; KNL-NEXT: kshiftrw $5, %k0, %k0
345 ; KNL-NEXT: kmovw %k0, %eax
346 ; KNL-NEXT: andl $1, %eax
347 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
348 ; KNL-NEXT: vzeroupper
351 ; SKX-LABEL: zext_test2:
353 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
354 ; SKX-NEXT: kshiftrw $5, %k0, %k0
355 ; SKX-NEXT: kmovd %k0, %eax
356 ; SKX-NEXT: andl $1, %eax
357 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
358 ; SKX-NEXT: vzeroupper
361 ; AVX512BW-LABEL: zext_test2:
362 ; AVX512BW: ## %bb.0:
363 ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
364 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
365 ; AVX512BW-NEXT: kmovd %k0, %eax
366 ; AVX512BW-NEXT: andl $1, %eax
367 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
368 ; AVX512BW-NEXT: vzeroupper
369 ; AVX512BW-NEXT: retq
371 ; AVX512DQ-LABEL: zext_test2:
372 ; AVX512DQ: ## %bb.0:
373 ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
374 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
375 ; AVX512DQ-NEXT: kmovw %k0, %eax
376 ; AVX512DQ-NEXT: andl $1, %eax
377 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
378 ; AVX512DQ-NEXT: vzeroupper
379 ; AVX512DQ-NEXT: retq
381 ; X86-LABEL: zext_test2:
383 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
384 ; X86-NEXT: kshiftrw $5, %k0, %k0
385 ; X86-NEXT: kmovd %k0, %eax
386 ; X86-NEXT: andl $1, %eax
387 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
388 ; X86-NEXT: vzeroupper
390 %cmp_res = icmp ugt <16 x i32> %a, %b
391 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
392 %res = zext i1 %cmp_res.i1 to i16
396 define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
397 ; KNL-LABEL: zext_test3:
399 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
400 ; KNL-NEXT: kshiftrw $5, %k0, %k0
401 ; KNL-NEXT: kmovw %k0, %eax
402 ; KNL-NEXT: andb $1, %al
403 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
404 ; KNL-NEXT: vzeroupper
407 ; SKX-LABEL: zext_test3:
409 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
410 ; SKX-NEXT: kshiftrw $5, %k0, %k0
411 ; SKX-NEXT: kmovd %k0, %eax
412 ; SKX-NEXT: andb $1, %al
413 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
414 ; SKX-NEXT: vzeroupper
417 ; AVX512BW-LABEL: zext_test3:
418 ; AVX512BW: ## %bb.0:
419 ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
420 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
421 ; AVX512BW-NEXT: kmovd %k0, %eax
422 ; AVX512BW-NEXT: andb $1, %al
423 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
424 ; AVX512BW-NEXT: vzeroupper
425 ; AVX512BW-NEXT: retq
427 ; AVX512DQ-LABEL: zext_test3:
428 ; AVX512DQ: ## %bb.0:
429 ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
430 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
431 ; AVX512DQ-NEXT: kmovw %k0, %eax
432 ; AVX512DQ-NEXT: andb $1, %al
433 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
434 ; AVX512DQ-NEXT: vzeroupper
435 ; AVX512DQ-NEXT: retq
437 ; X86-LABEL: zext_test3:
439 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
440 ; X86-NEXT: kshiftrw $5, %k0, %k0
441 ; X86-NEXT: kmovd %k0, %eax
442 ; X86-NEXT: andb $1, %al
443 ; X86-NEXT: ## kill: def $al killed $al killed $eax
444 ; X86-NEXT: vzeroupper
446 %cmp_res = icmp ugt <16 x i32> %a, %b
447 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
448 %res = zext i1 %cmp_res.i1 to i8
452 define i8 @conv1(<8 x i1>* %R) {
453 ; CHECK-LABEL: conv1:
454 ; CHECK: ## %bb.0: ## %entry
455 ; CHECK-NEXT: movb $-1, (%rdi)
456 ; CHECK-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
457 ; CHECK-NEXT: movb $-2, %al
461 ; X86: ## %bb.0: ## %entry
462 ; X86-NEXT: subl $12, %esp
463 ; X86-NEXT: .cfi_def_cfa_offset 16
464 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
465 ; X86-NEXT: movb $-1, (%eax)
466 ; X86-NEXT: movb $-2, (%esp)
467 ; X86-NEXT: movb $-2, %al
468 ; X86-NEXT: addl $12, %esp
471 store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
473 %maskPtr = alloca <8 x i1>
474 store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
475 %mask = load <8 x i1>, <8 x i1>* %maskPtr
476 %mask_convert = bitcast <8 x i1> %mask to i8
480 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
483 ; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
484 ; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
485 ; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
486 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
487 ; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1
488 ; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
489 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
490 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
491 ; KNL-NEXT: vzeroupper
496 ; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1
497 ; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
498 ; SKX-NEXT: vpmovm2d %k0, %xmm0
499 ; SKX-NEXT: vzeroupper
502 ; AVX512BW-LABEL: test4:
503 ; AVX512BW: ## %bb.0:
504 ; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
505 ; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
506 ; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
507 ; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
508 ; AVX512BW-NEXT: vpcmpleq %zmm1, %zmm0, %k1
509 ; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
510 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
511 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
512 ; AVX512BW-NEXT: vzeroupper
513 ; AVX512BW-NEXT: retq
515 ; AVX512DQ-LABEL: test4:
516 ; AVX512DQ: ## %bb.0:
517 ; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
518 ; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
519 ; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
520 ; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
521 ; AVX512DQ-NEXT: vpcmpleq %zmm1, %zmm0, %k1
522 ; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
523 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
524 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
525 ; AVX512DQ-NEXT: vzeroupper
526 ; AVX512DQ-NEXT: retq
530 ; X86-NEXT: vpcmpleq %ymm1, %ymm0, %k1
531 ; X86-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
532 ; X86-NEXT: vpmovm2d %k0, %xmm0
533 ; X86-NEXT: vzeroupper
535 %x_gt_y = icmp sgt <4 x i64> %x, %y
536 %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
537 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
538 %resse = sext <4 x i1>%res to <4 x i32>
542 define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
545 ; KNL-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
546 ; KNL-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
547 ; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
548 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
549 ; KNL-NEXT: vpcmpleq %zmm3, %zmm2, %k1
550 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
551 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
552 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
553 ; KNL-NEXT: vzeroupper
558 ; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1
559 ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
560 ; SKX-NEXT: vpmovm2q %k0, %xmm0
563 ; AVX512BW-LABEL: test5:
564 ; AVX512BW: ## %bb.0:
565 ; AVX512BW-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
566 ; AVX512BW-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
567 ; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
568 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
569 ; AVX512BW-NEXT: vpcmpleq %zmm3, %zmm2, %k1
570 ; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
571 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
572 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
573 ; AVX512BW-NEXT: vzeroupper
574 ; AVX512BW-NEXT: retq
576 ; AVX512DQ-LABEL: test5:
577 ; AVX512DQ: ## %bb.0:
578 ; AVX512DQ-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
579 ; AVX512DQ-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
580 ; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
581 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
582 ; AVX512DQ-NEXT: vpcmpleq %zmm3, %zmm2, %k1
583 ; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 {%k1}
584 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
585 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
586 ; AVX512DQ-NEXT: vzeroupper
587 ; AVX512DQ-NEXT: retq
591 ; X86-NEXT: vpcmpleq %xmm3, %xmm2, %k1
592 ; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
593 ; X86-NEXT: vpmovm2q %k0, %xmm0
595 %x_gt_y = icmp slt <2 x i64> %x, %y
596 %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
597 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
598 %resse = sext <2 x i1>%res to <2 x i64>
600 }define void @test6(<16 x i1> %mask) {
602 %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
603 %b = bitcast <16 x i1> %a to i16
604 %c = icmp eq i16 %b, 0
605 br i1 %c, label %true, label %false
613 define void @test7(<8 x i1> %mask) {
615 ; KNL: ## %bb.0: ## %allocas
616 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
617 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
618 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
619 ; KNL-NEXT: kmovw %k0, %eax
620 ; KNL-NEXT: orb $85, %al
621 ; KNL-NEXT: vzeroupper
625 ; SKX: ## %bb.0: ## %allocas
626 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
627 ; SKX-NEXT: vpmovw2m %xmm0, %k0
628 ; SKX-NEXT: kmovd %k0, %eax
629 ; SKX-NEXT: orb $85, %al
632 ; AVX512BW-LABEL: test7:
633 ; AVX512BW: ## %bb.0: ## %allocas
634 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
635 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
636 ; AVX512BW-NEXT: kmovd %k0, %eax
637 ; AVX512BW-NEXT: orb $85, %al
638 ; AVX512BW-NEXT: vzeroupper
639 ; AVX512BW-NEXT: retq
641 ; AVX512DQ-LABEL: test7:
642 ; AVX512DQ: ## %bb.0: ## %allocas
643 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
644 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
645 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
646 ; AVX512DQ-NEXT: kmovw %k0, %eax
647 ; AVX512DQ-NEXT: orb $85, %al
648 ; AVX512DQ-NEXT: vzeroupper
649 ; AVX512DQ-NEXT: retq
652 ; X86: ## %bb.0: ## %allocas
653 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
654 ; X86-NEXT: vpmovw2m %xmm0, %k0
655 ; X86-NEXT: kmovd %k0, %eax
656 ; X86-NEXT: orb $85, %al
659 %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
660 %b = bitcast <8 x i1> %a to i8
661 %c = icmp eq i8 %b, 0
662 br i1 %c, label %true, label %false
670 define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
673 ; KNL-NEXT: cmpl %esi, %edi
674 ; KNL-NEXT: jg LBB17_1
675 ; KNL-NEXT: ## %bb.2:
676 ; KNL-NEXT: kxorw %k0, %k0, %k1
677 ; KNL-NEXT: jmp LBB17_3
679 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
680 ; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
682 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
683 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
684 ; KNL-NEXT: vzeroupper
689 ; SKX-NEXT: cmpl %esi, %edi
690 ; SKX-NEXT: jg LBB17_1
691 ; SKX-NEXT: ## %bb.2:
692 ; SKX-NEXT: kxorw %k0, %k0, %k0
693 ; SKX-NEXT: vpmovm2b %k0, %xmm0
694 ; SKX-NEXT: vzeroupper
697 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
698 ; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
699 ; SKX-NEXT: vpmovm2b %k0, %xmm0
700 ; SKX-NEXT: vzeroupper
703 ; AVX512BW-LABEL: test8:
704 ; AVX512BW: ## %bb.0:
705 ; AVX512BW-NEXT: cmpl %esi, %edi
706 ; AVX512BW-NEXT: jg LBB17_1
707 ; AVX512BW-NEXT: ## %bb.2:
708 ; AVX512BW-NEXT: kxorw %k0, %k0, %k0
709 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
710 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
711 ; AVX512BW-NEXT: vzeroupper
712 ; AVX512BW-NEXT: retq
713 ; AVX512BW-NEXT: LBB17_1:
714 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
715 ; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
716 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
717 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
718 ; AVX512BW-NEXT: vzeroupper
719 ; AVX512BW-NEXT: retq
721 ; AVX512DQ-LABEL: test8:
722 ; AVX512DQ: ## %bb.0:
723 ; AVX512DQ-NEXT: cmpl %esi, %edi
724 ; AVX512DQ-NEXT: jg LBB17_1
725 ; AVX512DQ-NEXT: ## %bb.2:
726 ; AVX512DQ-NEXT: kxorw %k0, %k0, %k0
727 ; AVX512DQ-NEXT: jmp LBB17_3
728 ; AVX512DQ-NEXT: LBB17_1:
729 ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
730 ; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
731 ; AVX512DQ-NEXT: LBB17_3:
732 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
733 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
734 ; AVX512DQ-NEXT: vzeroupper
735 ; AVX512DQ-NEXT: retq
739 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
740 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
741 ; X86-NEXT: jg LBB17_1
742 ; X86-NEXT: ## %bb.2:
743 ; X86-NEXT: kxorw %k0, %k0, %k0
744 ; X86-NEXT: vpmovm2b %k0, %xmm0
745 ; X86-NEXT: vzeroupper
748 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
749 ; X86-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
750 ; X86-NEXT: vpmovm2b %k0, %xmm0
751 ; X86-NEXT: vzeroupper
753 %cond = icmp sgt i32 %a1, %b1
754 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
755 %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
756 %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
757 %res = sext <16 x i1> %mix to <16 x i8>
760 define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
763 ; KNL-NEXT: cmpl %esi, %edi
764 ; KNL-NEXT: jg LBB18_1
765 ; KNL-NEXT: ## %bb.2:
766 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
767 ; KNL-NEXT: jmp LBB18_3
769 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
771 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
772 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
773 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
774 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
775 ; KNL-NEXT: vzeroupper
780 ; SKX-NEXT: cmpl %esi, %edi
781 ; SKX-NEXT: jg LBB18_1
782 ; SKX-NEXT: ## %bb.2:
783 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0
784 ; SKX-NEXT: jmp LBB18_3
786 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
788 ; SKX-NEXT: vpmovb2m %xmm0, %k0
789 ; SKX-NEXT: vpmovm2b %k0, %xmm0
792 ; AVX512BW-LABEL: test9:
793 ; AVX512BW: ## %bb.0:
794 ; AVX512BW-NEXT: cmpl %esi, %edi
795 ; AVX512BW-NEXT: jg LBB18_1
796 ; AVX512BW-NEXT: ## %bb.2:
797 ; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0
798 ; AVX512BW-NEXT: jmp LBB18_3
799 ; AVX512BW-NEXT: LBB18_1:
800 ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
801 ; AVX512BW-NEXT: LBB18_3:
802 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
803 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
804 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
805 ; AVX512BW-NEXT: vzeroupper
806 ; AVX512BW-NEXT: retq
808 ; AVX512DQ-LABEL: test9:
809 ; AVX512DQ: ## %bb.0:
810 ; AVX512DQ-NEXT: cmpl %esi, %edi
811 ; AVX512DQ-NEXT: jg LBB18_1
812 ; AVX512DQ-NEXT: ## %bb.2:
813 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
814 ; AVX512DQ-NEXT: jmp LBB18_3
815 ; AVX512DQ-NEXT: LBB18_1:
816 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
817 ; AVX512DQ-NEXT: LBB18_3:
818 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
819 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
820 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
821 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
822 ; AVX512DQ-NEXT: vzeroupper
823 ; AVX512DQ-NEXT: retq
827 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
828 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
829 ; X86-NEXT: jg LBB18_1
830 ; X86-NEXT: ## %bb.2:
831 ; X86-NEXT: vpsllw $7, %xmm1, %xmm0
832 ; X86-NEXT: jmp LBB18_3
834 ; X86-NEXT: vpsllw $7, %xmm0, %xmm0
836 ; X86-NEXT: vpmovb2m %xmm0, %k0
837 ; X86-NEXT: vpmovm2b %k0, %xmm0
839 %mask = icmp sgt i32 %a1, %b1
840 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
842 }define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
843 %mask = icmp sgt i32 %a1, %b1
844 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
848 define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
851 ; KNL-NEXT: cmpl %esi, %edi
852 ; KNL-NEXT: jg LBB20_1
853 ; KNL-NEXT: ## %bb.2:
854 ; KNL-NEXT: vpslld $31, %xmm1, %xmm0
855 ; KNL-NEXT: jmp LBB20_3
857 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
859 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
860 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
861 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
862 ; KNL-NEXT: vzeroupper
867 ; SKX-NEXT: cmpl %esi, %edi
868 ; SKX-NEXT: jg LBB20_1
869 ; SKX-NEXT: ## %bb.2:
870 ; SKX-NEXT: vpslld $31, %xmm1, %xmm0
871 ; SKX-NEXT: jmp LBB20_3
873 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
875 ; SKX-NEXT: vpmovd2m %xmm0, %k0
876 ; SKX-NEXT: vpmovm2d %k0, %xmm0
879 ; AVX512BW-LABEL: test11:
880 ; AVX512BW: ## %bb.0:
881 ; AVX512BW-NEXT: cmpl %esi, %edi
882 ; AVX512BW-NEXT: jg LBB20_1
883 ; AVX512BW-NEXT: ## %bb.2:
884 ; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0
885 ; AVX512BW-NEXT: jmp LBB20_3
886 ; AVX512BW-NEXT: LBB20_1:
887 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
888 ; AVX512BW-NEXT: LBB20_3:
889 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1
890 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
891 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
892 ; AVX512BW-NEXT: vzeroupper
893 ; AVX512BW-NEXT: retq
895 ; AVX512DQ-LABEL: test11:
896 ; AVX512DQ: ## %bb.0:
897 ; AVX512DQ-NEXT: cmpl %esi, %edi
898 ; AVX512DQ-NEXT: jg LBB20_1
899 ; AVX512DQ-NEXT: ## %bb.2:
900 ; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm0
901 ; AVX512DQ-NEXT: jmp LBB20_3
902 ; AVX512DQ-NEXT: LBB20_1:
903 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
904 ; AVX512DQ-NEXT: LBB20_3:
905 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
906 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
907 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
908 ; AVX512DQ-NEXT: vzeroupper
909 ; AVX512DQ-NEXT: retq
913 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
914 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
915 ; X86-NEXT: jg LBB20_1
916 ; X86-NEXT: ## %bb.2:
917 ; X86-NEXT: vpslld $31, %xmm1, %xmm0
918 ; X86-NEXT: jmp LBB20_3
920 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
922 ; X86-NEXT: vpmovd2m %xmm0, %k0
923 ; X86-NEXT: vpmovm2d %k0, %xmm0
925 %mask = icmp sgt i32 %a1, %b1
926 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
930 define i32 @test12(i32 %x, i32 %y) {
931 ; CHECK-LABEL: test12:
933 ; CHECK-NEXT: movl %edi, %eax
938 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
940 %a = bitcast i16 21845 to <16 x i1>
941 %b = extractelement <16 x i1> %a, i32 0
942 %c = select i1 %b, i32 %x, i32 %y
946 define i32 @test13(i32 %x, i32 %y) {
947 ; CHECK-LABEL: test13:
949 ; CHECK-NEXT: movl %esi, %eax
954 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
956 %a = bitcast i16 21845 to <16 x i1>
957 %b = extractelement <16 x i1> %a, i32 3
958 %c = select i1 %b, i32 %x, i32 %y
962 ; Make sure we don't crash on a large vector.
963 define i32 @test13_crash(i32 %x, i32 %y) {
964 ; CHECK-LABEL: test13_crash:
966 ; CHECK-NEXT: movl %edi, %eax
969 ; X86-LABEL: test13_crash:
971 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
973 %a = bitcast i128 2184568686868686868686868686 to <128 x i1>
974 %b = extractelement <128 x i1> %a, i32 3
975 %c = select i1 %b, i32 %x, i32 %y
979 define <4 x i1> @test14() {
980 ; CHECK-LABEL: test14:
982 ; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,0,1]
987 ; X86-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,0,1]
989 %a = bitcast i16 21845 to <16 x i1>
990 %b = extractelement <16 x i1> %a, i32 2
991 %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
995 define <16 x i1> @test15(i32 %x, i32 %y) {
998 ; KNL-NEXT: cmpl %esi, %edi
999 ; KNL-NEXT: movl $21845, %eax ## imm = 0x5555
1000 ; KNL-NEXT: movl $1, %ecx
1001 ; KNL-NEXT: cmovgl %eax, %ecx
1002 ; KNL-NEXT: kmovw %ecx, %k1
1003 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1004 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1005 ; KNL-NEXT: vzeroupper
1008 ; SKX-LABEL: test15:
1010 ; SKX-NEXT: cmpl %esi, %edi
1011 ; SKX-NEXT: movl $21845, %eax ## imm = 0x5555
1012 ; SKX-NEXT: movl $1, %ecx
1013 ; SKX-NEXT: cmovgl %eax, %ecx
1014 ; SKX-NEXT: kmovd %ecx, %k0
1015 ; SKX-NEXT: vpmovm2b %k0, %xmm0
1018 ; AVX512BW-LABEL: test15:
1019 ; AVX512BW: ## %bb.0:
1020 ; AVX512BW-NEXT: cmpl %esi, %edi
1021 ; AVX512BW-NEXT: movl $21845, %eax ## imm = 0x5555
1022 ; AVX512BW-NEXT: movl $1, %ecx
1023 ; AVX512BW-NEXT: cmovgl %eax, %ecx
1024 ; AVX512BW-NEXT: kmovd %ecx, %k0
1025 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
1026 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
1027 ; AVX512BW-NEXT: vzeroupper
1028 ; AVX512BW-NEXT: retq
1030 ; AVX512DQ-LABEL: test15:
1031 ; AVX512DQ: ## %bb.0:
1032 ; AVX512DQ-NEXT: cmpl %esi, %edi
1033 ; AVX512DQ-NEXT: movl $21845, %eax ## imm = 0x5555
1034 ; AVX512DQ-NEXT: movl $1, %ecx
1035 ; AVX512DQ-NEXT: cmovgl %eax, %ecx
1036 ; AVX512DQ-NEXT: kmovw %ecx, %k0
1037 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1038 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1039 ; AVX512DQ-NEXT: vzeroupper
1040 ; AVX512DQ-NEXT: retq
1042 ; X86-LABEL: test15:
1044 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1045 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
1046 ; X86-NEXT: movl $21845, %eax ## imm = 0x5555
1047 ; X86-NEXT: movl $1, %ecx
1048 ; X86-NEXT: cmovgl %eax, %ecx
1049 ; X86-NEXT: kmovd %ecx, %k0
1050 ; X86-NEXT: vpmovm2b %k0, %xmm0
1052 %a = bitcast i16 21845 to <16 x i1>
1053 %b = bitcast i16 1 to <16 x i1>
1054 %mask = icmp sgt i32 %x, %y
1055 %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
1059 define <64 x i8> @test16(i64 %x) {
1061 ; KNL-LABEL: test16:
1063 ; KNL-NEXT: movq %rdi, %rax
1064 ; KNL-NEXT: movl %edi, %ecx
1065 ; KNL-NEXT: kmovw %edi, %k0
1066 ; KNL-NEXT: shrq $32, %rdi
1067 ; KNL-NEXT: shrq $48, %rax
1068 ; KNL-NEXT: shrl $16, %ecx
1069 ; KNL-NEXT: kmovw %ecx, %k1
1070 ; KNL-NEXT: kmovw %eax, %k2
1071 ; KNL-NEXT: kmovw %edi, %k3
1072 ; KNL-NEXT: kshiftrw $5, %k0, %k4
1073 ; KNL-NEXT: kxnorw %k0, %k0, %k5
1074 ; KNL-NEXT: kxorw %k5, %k4, %k4
1075 ; KNL-NEXT: kshiftlw $15, %k4, %k4
1076 ; KNL-NEXT: kshiftrw $10, %k4, %k4
1077 ; KNL-NEXT: kxorw %k4, %k0, %k4
1078 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
1079 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1080 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
1081 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1082 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1083 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z}
1084 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1085 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
1086 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
1087 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1088 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1091 ; SKX-LABEL: test16:
1093 ; SKX-NEXT: kmovq %rdi, %k0
1094 ; SKX-NEXT: kxnorw %k0, %k0, %k1
1095 ; SKX-NEXT: kshiftrq $5, %k0, %k2
1096 ; SKX-NEXT: kxorq %k1, %k2, %k1
1097 ; SKX-NEXT: kshiftlq $63, %k1, %k1
1098 ; SKX-NEXT: kshiftrq $58, %k1, %k1
1099 ; SKX-NEXT: kxorq %k1, %k0, %k0
1100 ; SKX-NEXT: vpmovm2b %k0, %zmm0
1103 ; AVX512BW-LABEL: test16:
1104 ; AVX512BW: ## %bb.0:
1105 ; AVX512BW-NEXT: kmovq %rdi, %k0
1106 ; AVX512BW-NEXT: kxnorw %k0, %k0, %k1
1107 ; AVX512BW-NEXT: kshiftrq $5, %k0, %k2
1108 ; AVX512BW-NEXT: kxorq %k1, %k2, %k1
1109 ; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
1110 ; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
1111 ; AVX512BW-NEXT: kxorq %k1, %k0, %k0
1112 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
1113 ; AVX512BW-NEXT: retq
1115 ; AVX512DQ-LABEL: test16:
1116 ; AVX512DQ: ## %bb.0:
1117 ; AVX512DQ-NEXT: movq %rdi, %rax
1118 ; AVX512DQ-NEXT: movl %edi, %ecx
1119 ; AVX512DQ-NEXT: kmovw %edi, %k0
1120 ; AVX512DQ-NEXT: shrq $32, %rdi
1121 ; AVX512DQ-NEXT: shrq $48, %rax
1122 ; AVX512DQ-NEXT: shrl $16, %ecx
1123 ; AVX512DQ-NEXT: kmovw %ecx, %k1
1124 ; AVX512DQ-NEXT: kmovw %eax, %k2
1125 ; AVX512DQ-NEXT: kmovw %edi, %k3
1126 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k4
1127 ; AVX512DQ-NEXT: kxnorw %k0, %k0, %k5
1128 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
1129 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
1130 ; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
1131 ; AVX512DQ-NEXT: kxorw %k4, %k0, %k0
1132 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0
1133 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1134 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
1135 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1136 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1137 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
1138 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1139 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2
1140 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
1141 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1142 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1143 ; AVX512DQ-NEXT: retq
1145 ; X86-LABEL: test16:
1147 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
1148 ; X86-NEXT: kshiftrq $5, %k0, %k1
1149 ; X86-NEXT: kxnorw %k0, %k0, %k2
1150 ; X86-NEXT: kxorq %k2, %k1, %k1
1151 ; X86-NEXT: kshiftlq $63, %k1, %k1
1152 ; X86-NEXT: kshiftrq $58, %k1, %k1
1153 ; X86-NEXT: kxorq %k1, %k0, %k0
1154 ; X86-NEXT: vpmovm2b %k0, %zmm0
1156 %a = bitcast i64 %x to <64 x i1>
1157 %b = insertelement <64 x i1>%a, i1 true, i32 5
1158 %c = sext <64 x i1>%b to <64 x i8>
1162 define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
1164 ; KNL-LABEL: test17:
1166 ; KNL-NEXT: movq %rdi, %rax
1167 ; KNL-NEXT: movl %edi, %ecx
1168 ; KNL-NEXT: kmovw %edi, %k0
1169 ; KNL-NEXT: shrq $32, %rdi
1170 ; KNL-NEXT: shrq $48, %rax
1171 ; KNL-NEXT: shrl $16, %ecx
1172 ; KNL-NEXT: kmovw %ecx, %k1
1173 ; KNL-NEXT: kmovw %eax, %k2
1174 ; KNL-NEXT: kmovw %edi, %k3
1175 ; KNL-NEXT: cmpl %edx, %esi
1176 ; KNL-NEXT: setg %al
1177 ; KNL-NEXT: kshiftrw $5, %k0, %k4
1178 ; KNL-NEXT: kmovw %eax, %k5
1179 ; KNL-NEXT: kxorw %k5, %k4, %k4
1180 ; KNL-NEXT: kshiftlw $15, %k4, %k4
1181 ; KNL-NEXT: kshiftrw $10, %k4, %k4
1182 ; KNL-NEXT: kxorw %k4, %k0, %k4
1183 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
1184 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1185 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
1186 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1187 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1188 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z}
1189 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1190 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
1191 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
1192 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1193 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1196 ; SKX-LABEL: test17:
1198 ; SKX-NEXT: kmovq %rdi, %k0
1199 ; SKX-NEXT: cmpl %edx, %esi
1200 ; SKX-NEXT: setg %al
1201 ; SKX-NEXT: kmovd %eax, %k1
1202 ; SKX-NEXT: kshiftrq $5, %k0, %k2
1203 ; SKX-NEXT: kxorq %k1, %k2, %k1
1204 ; SKX-NEXT: kshiftlq $63, %k1, %k1
1205 ; SKX-NEXT: kshiftrq $58, %k1, %k1
1206 ; SKX-NEXT: kxorq %k1, %k0, %k0
1207 ; SKX-NEXT: vpmovm2b %k0, %zmm0
1210 ; AVX512BW-LABEL: test17:
1211 ; AVX512BW: ## %bb.0:
1212 ; AVX512BW-NEXT: kmovq %rdi, %k0
1213 ; AVX512BW-NEXT: cmpl %edx, %esi
1214 ; AVX512BW-NEXT: setg %al
1215 ; AVX512BW-NEXT: kmovd %eax, %k1
1216 ; AVX512BW-NEXT: kshiftrq $5, %k0, %k2
1217 ; AVX512BW-NEXT: kxorq %k1, %k2, %k1
1218 ; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
1219 ; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
1220 ; AVX512BW-NEXT: kxorq %k1, %k0, %k0
1221 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
1222 ; AVX512BW-NEXT: retq
1224 ; AVX512DQ-LABEL: test17:
1225 ; AVX512DQ: ## %bb.0:
1226 ; AVX512DQ-NEXT: movq %rdi, %rax
1227 ; AVX512DQ-NEXT: movl %edi, %ecx
1228 ; AVX512DQ-NEXT: kmovw %edi, %k0
1229 ; AVX512DQ-NEXT: shrq $32, %rdi
1230 ; AVX512DQ-NEXT: shrq $48, %rax
1231 ; AVX512DQ-NEXT: shrl $16, %ecx
1232 ; AVX512DQ-NEXT: kmovw %ecx, %k1
1233 ; AVX512DQ-NEXT: kmovw %eax, %k2
1234 ; AVX512DQ-NEXT: kmovw %edi, %k3
1235 ; AVX512DQ-NEXT: cmpl %edx, %esi
1236 ; AVX512DQ-NEXT: setg %al
1237 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k4
1238 ; AVX512DQ-NEXT: kmovw %eax, %k5
1239 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
1240 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
1241 ; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
1242 ; AVX512DQ-NEXT: kxorw %k4, %k0, %k0
1243 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0
1244 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1245 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
1246 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1247 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1248 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
1249 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1250 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2
1251 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
1252 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1253 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1254 ; AVX512DQ-NEXT: retq
1256 ; X86-LABEL: test17:
1258 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1259 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
1260 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
1261 ; X86-NEXT: setg %al
1262 ; X86-NEXT: kmovd %eax, %k1
1263 ; X86-NEXT: kshiftrq $5, %k0, %k2
1264 ; X86-NEXT: kxorq %k1, %k2, %k1
1265 ; X86-NEXT: kshiftlq $63, %k1, %k1
1266 ; X86-NEXT: kshiftrq $58, %k1, %k1
1267 ; X86-NEXT: kxorq %k1, %k0, %k0
1268 ; X86-NEXT: vpmovm2b %k0, %zmm0
1270 %a = bitcast i64 %x to <64 x i1>
1271 %b = icmp sgt i32 %y, %z
1272 %c = insertelement <64 x i1>%a, i1 %b, i32 5
1273 %d = sext <64 x i1>%c to <64 x i8>
1277 define <8 x i1> @test18(i8 %a, i16 %y) {
1278 ; KNL-LABEL: test18:
1280 ; KNL-NEXT: kmovw %edi, %k0
1281 ; KNL-NEXT: kmovw %esi, %k1
1282 ; KNL-NEXT: kshiftrw $8, %k1, %k2
1283 ; KNL-NEXT: kshiftrw $9, %k1, %k1
1284 ; KNL-NEXT: kshiftrw $6, %k0, %k3
1285 ; KNL-NEXT: kxorw %k1, %k3, %k1
1286 ; KNL-NEXT: kshiftlw $6, %k1, %k1
1287 ; KNL-NEXT: kxorw %k1, %k0, %k0
1288 ; KNL-NEXT: kshiftlw $9, %k0, %k0
1289 ; KNL-NEXT: kshiftrw $9, %k0, %k0
1290 ; KNL-NEXT: kshiftlw $7, %k2, %k1
1291 ; KNL-NEXT: korw %k1, %k0, %k1
1292 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1293 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
1294 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
1295 ; KNL-NEXT: vzeroupper
1298 ; SKX-LABEL: test18:
1300 ; SKX-NEXT: kmovd %edi, %k0
1301 ; SKX-NEXT: kmovd %esi, %k1
1302 ; SKX-NEXT: kshiftrw $8, %k1, %k2
1303 ; SKX-NEXT: kshiftrw $9, %k1, %k1
1304 ; SKX-NEXT: kshiftrb $6, %k0, %k3
1305 ; SKX-NEXT: kxorb %k1, %k3, %k1
1306 ; SKX-NEXT: kshiftlb $6, %k1, %k1
1307 ; SKX-NEXT: kxorb %k1, %k0, %k0
1308 ; SKX-NEXT: kshiftlb $1, %k0, %k0
1309 ; SKX-NEXT: kshiftrb $1, %k0, %k0
1310 ; SKX-NEXT: kshiftlb $7, %k2, %k1
1311 ; SKX-NEXT: korb %k1, %k0, %k0
1312 ; SKX-NEXT: vpmovm2w %k0, %xmm0
1315 ; AVX512BW-LABEL: test18:
1316 ; AVX512BW: ## %bb.0:
1317 ; AVX512BW-NEXT: kmovd %edi, %k0
1318 ; AVX512BW-NEXT: kmovd %esi, %k1
1319 ; AVX512BW-NEXT: kshiftrw $8, %k1, %k2
1320 ; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
1321 ; AVX512BW-NEXT: kshiftrw $6, %k0, %k3
1322 ; AVX512BW-NEXT: kxorw %k1, %k3, %k1
1323 ; AVX512BW-NEXT: kshiftlw $6, %k1, %k1
1324 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
1325 ; AVX512BW-NEXT: kshiftlw $9, %k0, %k0
1326 ; AVX512BW-NEXT: kshiftrw $9, %k0, %k0
1327 ; AVX512BW-NEXT: kshiftlw $7, %k2, %k1
1328 ; AVX512BW-NEXT: korw %k1, %k0, %k0
1329 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
1330 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
1331 ; AVX512BW-NEXT: vzeroupper
1332 ; AVX512BW-NEXT: retq
1334 ; AVX512DQ-LABEL: test18:
1335 ; AVX512DQ: ## %bb.0:
1336 ; AVX512DQ-NEXT: kmovw %edi, %k0
1337 ; AVX512DQ-NEXT: kmovw %esi, %k1
1338 ; AVX512DQ-NEXT: kshiftrw $8, %k1, %k2
1339 ; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
1340 ; AVX512DQ-NEXT: kshiftrb $6, %k0, %k3
1341 ; AVX512DQ-NEXT: kxorb %k1, %k3, %k1
1342 ; AVX512DQ-NEXT: kshiftlb $6, %k1, %k1
1343 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
1344 ; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
1345 ; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0
1346 ; AVX512DQ-NEXT: kshiftlb $7, %k2, %k1
1347 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
1348 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1349 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
1350 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
1351 ; AVX512DQ-NEXT: vzeroupper
1352 ; AVX512DQ-NEXT: retq
1354 ; X86-LABEL: test18:
1356 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
1357 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1358 ; X86-NEXT: kshiftrw $8, %k1, %k2
1359 ; X86-NEXT: kshiftrw $9, %k1, %k1
1360 ; X86-NEXT: kshiftrb $6, %k0, %k3
1361 ; X86-NEXT: kxorb %k1, %k3, %k1
1362 ; X86-NEXT: kshiftlb $6, %k1, %k1
1363 ; X86-NEXT: kxorb %k1, %k0, %k0
1364 ; X86-NEXT: kshiftlb $1, %k0, %k0
1365 ; X86-NEXT: kshiftrb $1, %k0, %k0
1366 ; X86-NEXT: kshiftlb $7, %k2, %k1
1367 ; X86-NEXT: korb %k1, %k0, %k0
1368 ; X86-NEXT: vpmovm2w %k0, %xmm0
1370 %b = bitcast i8 %a to <8 x i1>
1371 %b1 = bitcast i16 %y to <16 x i1>
1372 %el1 = extractelement <16 x i1>%b1, i32 8
1373 %el2 = extractelement <16 x i1>%b1, i32 9
1374 %c = insertelement <8 x i1>%b, i1 %el1, i32 7
1375 %d = insertelement <8 x i1>%c, i1 %el2, i32 6
1378 define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
1379 ; KNL-LABEL: test21:
1381 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1382 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
1383 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
1384 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1385 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
1386 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
1387 ; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1
1388 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
1389 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
1390 ; KNL-NEXT: vpand %ymm0, %ymm2, %ymm0
1391 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1394 ; SKX-LABEL: test21:
1396 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
1397 ; SKX-NEXT: vpmovb2m %ymm1, %k1
1398 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1401 ; AVX512BW-LABEL: test21:
1402 ; AVX512BW: ## %bb.0:
1403 ; AVX512BW-NEXT: vpsllw $7, %ymm1, %ymm1
1404 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
1405 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1406 ; AVX512BW-NEXT: retq
1408 ; AVX512DQ-LABEL: test21:
1409 ; AVX512DQ: ## %bb.0:
1410 ; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1411 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3
1412 ; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1
1413 ; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1414 ; AVX512DQ-NEXT: vpsllw $15, %ymm1, %ymm1
1415 ; AVX512DQ-NEXT: vpsraw $15, %ymm1, %ymm1
1416 ; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm1
1417 ; AVX512DQ-NEXT: vpsllw $15, %ymm2, %ymm2
1418 ; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2
1419 ; AVX512DQ-NEXT: vpand %ymm0, %ymm2, %ymm0
1420 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1421 ; AVX512DQ-NEXT: retq
1423 ; X86-LABEL: test21:
1425 ; X86-NEXT: vpsllw $7, %ymm1, %ymm1
1426 ; X86-NEXT: vpmovb2m %ymm1, %k1
1427 ; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1429 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
1433 define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
1434 ; KNL-LABEL: test22:
1436 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1437 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1438 ; KNL-NEXT: kmovw %k0, %eax
1439 ; KNL-NEXT: movb %al, (%rdi)
1440 ; KNL-NEXT: vzeroupper
1443 ; SKX-LABEL: test22:
1445 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
1446 ; SKX-NEXT: vpmovd2m %xmm0, %k0
1447 ; SKX-NEXT: kmovb %k0, (%rdi)
1450 ; AVX512BW-LABEL: test22:
1451 ; AVX512BW: ## %bb.0:
1452 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
1453 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
1454 ; AVX512BW-NEXT: kmovd %k0, %eax
1455 ; AVX512BW-NEXT: movb %al, (%rdi)
1456 ; AVX512BW-NEXT: vzeroupper
1457 ; AVX512BW-NEXT: retq
1459 ; AVX512DQ-LABEL: test22:
1460 ; AVX512DQ: ## %bb.0:
1461 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1462 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1463 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1464 ; AVX512DQ-NEXT: vzeroupper
1465 ; AVX512DQ-NEXT: retq
1467 ; X86-LABEL: test22:
1469 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
1470 ; X86-NEXT: vpmovd2m %xmm0, %k0
1471 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1472 ; X86-NEXT: kmovb %k0, (%eax)
1474 store <4 x i1> %a, <4 x i1>* %addr
1478 define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
1479 ; KNL-LABEL: test23:
1481 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1482 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1483 ; KNL-NEXT: kmovw %k0, %eax
1484 ; KNL-NEXT: movb %al, (%rdi)
1485 ; KNL-NEXT: vzeroupper
1488 ; SKX-LABEL: test23:
1490 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
1491 ; SKX-NEXT: vpmovq2m %xmm0, %k0
1492 ; SKX-NEXT: kmovb %k0, (%rdi)
1495 ; AVX512BW-LABEL: test23:
1496 ; AVX512BW: ## %bb.0:
1497 ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
1498 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
1499 ; AVX512BW-NEXT: kmovd %k0, %eax
1500 ; AVX512BW-NEXT: movb %al, (%rdi)
1501 ; AVX512BW-NEXT: vzeroupper
1502 ; AVX512BW-NEXT: retq
1504 ; AVX512DQ-LABEL: test23:
1505 ; AVX512DQ: ## %bb.0:
1506 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1507 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
1508 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1509 ; AVX512DQ-NEXT: vzeroupper
1510 ; AVX512DQ-NEXT: retq
1512 ; X86-LABEL: test23:
1514 ; X86-NEXT: vpsllq $63, %xmm0, %xmm0
1515 ; X86-NEXT: vpmovq2m %xmm0, %k0
1516 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1517 ; X86-NEXT: kmovb %k0, (%eax)
1519 store <2 x i1> %a, <2 x i1>* %addr
1523 define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
1524 ; KNL-LABEL: store_v1i1:
1526 ; KNL-NEXT: kmovw %edi, %k0
1527 ; KNL-NEXT: kxnorw %k0, %k0, %k1
1528 ; KNL-NEXT: kxorw %k1, %k0, %k0
1529 ; KNL-NEXT: kmovw %k0, %eax
1530 ; KNL-NEXT: movb %al, (%rsi)
1533 ; SKX-LABEL: store_v1i1:
1535 ; SKX-NEXT: kmovd %edi, %k0
1536 ; SKX-NEXT: kxnorw %k0, %k0, %k1
1537 ; SKX-NEXT: kxorw %k1, %k0, %k0
1538 ; SKX-NEXT: kmovb %k0, (%rsi)
1541 ; AVX512BW-LABEL: store_v1i1:
1542 ; AVX512BW: ## %bb.0:
1543 ; AVX512BW-NEXT: kmovd %edi, %k0
1544 ; AVX512BW-NEXT: kxnorw %k0, %k0, %k1
1545 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
1546 ; AVX512BW-NEXT: kmovd %k0, %eax
1547 ; AVX512BW-NEXT: movb %al, (%rsi)
1548 ; AVX512BW-NEXT: retq
1550 ; AVX512DQ-LABEL: store_v1i1:
1551 ; AVX512DQ: ## %bb.0:
1552 ; AVX512DQ-NEXT: kmovw %edi, %k0
1553 ; AVX512DQ-NEXT: kxnorw %k0, %k0, %k1
1554 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
1555 ; AVX512DQ-NEXT: kmovb %k0, (%rsi)
1556 ; AVX512DQ-NEXT: retq
1558 ; X86-LABEL: store_v1i1:
1560 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1561 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1562 ; X86-NEXT: kxnorw %k0, %k0, %k1
1563 ; X86-NEXT: kxorw %k1, %k0, %k0
1564 ; X86-NEXT: kmovb %k0, (%eax)
1566 %x = xor <1 x i1> %c, <i1 1>
1567 store <1 x i1> %x, <1 x i1>* %ptr, align 4
1571 define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
1572 ; KNL-LABEL: store_v2i1:
1574 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1575 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
1576 ; KNL-NEXT: kmovw %k0, %eax
1577 ; KNL-NEXT: movb %al, (%rdi)
1578 ; KNL-NEXT: vzeroupper
1581 ; SKX-LABEL: store_v2i1:
1583 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
1584 ; SKX-NEXT: vpmovq2m %xmm0, %k0
1585 ; SKX-NEXT: knotw %k0, %k0
1586 ; SKX-NEXT: kmovb %k0, (%rdi)
1589 ; AVX512BW-LABEL: store_v2i1:
1590 ; AVX512BW: ## %bb.0:
1591 ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
1592 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
1593 ; AVX512BW-NEXT: kmovd %k0, %eax
1594 ; AVX512BW-NEXT: movb %al, (%rdi)
1595 ; AVX512BW-NEXT: vzeroupper
1596 ; AVX512BW-NEXT: retq
1598 ; AVX512DQ-LABEL: store_v2i1:
1599 ; AVX512DQ: ## %bb.0:
1600 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1601 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
1602 ; AVX512DQ-NEXT: knotw %k0, %k0
1603 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1604 ; AVX512DQ-NEXT: vzeroupper
1605 ; AVX512DQ-NEXT: retq
1607 ; X86-LABEL: store_v2i1:
1609 ; X86-NEXT: vpsllq $63, %xmm0, %xmm0
1610 ; X86-NEXT: vpmovq2m %xmm0, %k0
1611 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1612 ; X86-NEXT: knotw %k0, %k0
1613 ; X86-NEXT: kmovb %k0, (%eax)
1615 %x = xor <2 x i1> %c, <i1 1, i1 1>
1616 store <2 x i1> %x, <2 x i1>* %ptr, align 4
1620 define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
1621 ; KNL-LABEL: store_v4i1:
1623 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1624 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
1625 ; KNL-NEXT: kmovw %k0, %eax
1626 ; KNL-NEXT: movb %al, (%rdi)
1627 ; KNL-NEXT: vzeroupper
1630 ; SKX-LABEL: store_v4i1:
1632 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
1633 ; SKX-NEXT: vpmovd2m %xmm0, %k0
1634 ; SKX-NEXT: knotw %k0, %k0
1635 ; SKX-NEXT: kmovb %k0, (%rdi)
1638 ; AVX512BW-LABEL: store_v4i1:
1639 ; AVX512BW: ## %bb.0:
1640 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
1641 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
1642 ; AVX512BW-NEXT: kmovd %k0, %eax
1643 ; AVX512BW-NEXT: movb %al, (%rdi)
1644 ; AVX512BW-NEXT: vzeroupper
1645 ; AVX512BW-NEXT: retq
1647 ; AVX512DQ-LABEL: store_v4i1:
1648 ; AVX512DQ: ## %bb.0:
1649 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1650 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1651 ; AVX512DQ-NEXT: knotw %k0, %k0
1652 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1653 ; AVX512DQ-NEXT: vzeroupper
1654 ; AVX512DQ-NEXT: retq
1656 ; X86-LABEL: store_v4i1:
1658 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
1659 ; X86-NEXT: vpmovd2m %xmm0, %k0
1660 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1661 ; X86-NEXT: knotw %k0, %k0
1662 ; X86-NEXT: kmovb %k0, (%eax)
1664 %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
1665 store <4 x i1> %x, <4 x i1>* %ptr, align 4
1669 define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
1670 ; KNL-LABEL: store_v8i1:
1672 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1673 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1674 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
1675 ; KNL-NEXT: kmovw %k0, %eax
1676 ; KNL-NEXT: movb %al, (%rdi)
1677 ; KNL-NEXT: vzeroupper
1680 ; SKX-LABEL: store_v8i1:
1682 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1683 ; SKX-NEXT: vpmovw2m %xmm0, %k0
1684 ; SKX-NEXT: knotb %k0, %k0
1685 ; SKX-NEXT: kmovb %k0, (%rdi)
1688 ; AVX512BW-LABEL: store_v8i1:
1689 ; AVX512BW: ## %bb.0:
1690 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
1691 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
1692 ; AVX512BW-NEXT: knotw %k0, %k0
1693 ; AVX512BW-NEXT: kmovd %k0, %eax
1694 ; AVX512BW-NEXT: movb %al, (%rdi)
1695 ; AVX512BW-NEXT: vzeroupper
1696 ; AVX512BW-NEXT: retq
1698 ; AVX512DQ-LABEL: store_v8i1:
1699 ; AVX512DQ: ## %bb.0:
1700 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
1701 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
1702 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
1703 ; AVX512DQ-NEXT: knotb %k0, %k0
1704 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1705 ; AVX512DQ-NEXT: vzeroupper
1706 ; AVX512DQ-NEXT: retq
1708 ; X86-LABEL: store_v8i1:
1710 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
1711 ; X86-NEXT: vpmovw2m %xmm0, %k0
1712 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1713 ; X86-NEXT: knotb %k0, %k0
1714 ; X86-NEXT: kmovb %k0, (%eax)
1716 %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
1717 store <8 x i1> %x, <8 x i1>* %ptr, align 4
1721 define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
1722 ; KNL-LABEL: store_v16i1:
1724 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1725 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1726 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
1727 ; KNL-NEXT: kmovw %k0, (%rdi)
1728 ; KNL-NEXT: vzeroupper
1731 ; SKX-LABEL: store_v16i1:
1733 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
1734 ; SKX-NEXT: vpmovb2m %xmm0, %k0
1735 ; SKX-NEXT: knotw %k0, %k0
1736 ; SKX-NEXT: kmovw %k0, (%rdi)
1739 ; AVX512BW-LABEL: store_v16i1:
1740 ; AVX512BW: ## %bb.0:
1741 ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
1742 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
1743 ; AVX512BW-NEXT: knotw %k0, %k0
1744 ; AVX512BW-NEXT: kmovw %k0, (%rdi)
1745 ; AVX512BW-NEXT: vzeroupper
1746 ; AVX512BW-NEXT: retq
1748 ; AVX512DQ-LABEL: store_v16i1:
1749 ; AVX512DQ: ## %bb.0:
1750 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
1751 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
1752 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1753 ; AVX512DQ-NEXT: knotw %k0, %k0
1754 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
1755 ; AVX512DQ-NEXT: vzeroupper
1756 ; AVX512DQ-NEXT: retq
1758 ; X86-LABEL: store_v16i1:
1760 ; X86-NEXT: vpsllw $7, %xmm0, %xmm0
1761 ; X86-NEXT: vpmovb2m %xmm0, %k0
1762 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1763 ; X86-NEXT: knotw %k0, %k0
1764 ; X86-NEXT: kmovw %k0, (%eax)
1766 %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
1767 store <16 x i1> %x, <16 x i1>* %ptr, align 4
1782 @f1.v = internal unnamed_addr global i1 false, align 4
1784 define void @f1(i32 %c) {
1786 ; CHECK: ## %bb.0: ## %entry
1787 ; CHECK-NEXT: movzbl {{.*}}(%rip), %edi
1788 ; CHECK-NEXT: xorl $1, %edi
1789 ; CHECK-NEXT: movb %dil, {{.*}}(%rip)
1790 ; CHECK-NEXT: jmp _f2 ## TAILCALL
1793 ; X86: ## %bb.0: ## %entry
1794 ; X86-NEXT: subl $12, %esp
1795 ; X86-NEXT: .cfi_def_cfa_offset 16
1796 ; X86-NEXT: movzbl _f1.v, %eax
1797 ; X86-NEXT: xorl $1, %eax
1798 ; X86-NEXT: movb %al, _f1.v
1799 ; X86-NEXT: movl %eax, (%esp)
1800 ; X86-NEXT: calll _f2
1801 ; X86-NEXT: addl $12, %esp
1804 %.b1 = load i1, i1* @f1.v, align 4
1805 %not..b1 = xor i1 %.b1, true
1806 store i1 %not..b1, i1* @f1.v, align 4
1807 %0 = zext i1 %not..b1 to i32
1808 tail call void @f2(i32 %0) #2
1812 declare void @f2(i32) #1
1814 define void @store_i16_i1(i16 %x, i1 *%y) {
1815 ; CHECK-LABEL: store_i16_i1:
1817 ; CHECK-NEXT: andl $1, %edi
1818 ; CHECK-NEXT: movb %dil, (%rsi)
1821 ; X86-LABEL: store_i16_i1:
1823 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1824 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
1825 ; X86-NEXT: andl $1, %ecx
1826 ; X86-NEXT: movb %cl, (%eax)
1828 %c = trunc i16 %x to i1
1833 define void @store_i8_i1(i8 %x, i1 *%y) {
1834 ; CHECK-LABEL: store_i8_i1:
1836 ; CHECK-NEXT: andl $1, %edi
1837 ; CHECK-NEXT: movb %dil, (%rsi)
1840 ; X86-LABEL: store_i8_i1:
1842 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1843 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
1844 ; X86-NEXT: andb $1, %cl
1845 ; X86-NEXT: movb %cl, (%eax)
1847 %c = trunc i8 %x to i1
1852 define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
1853 ; KNL-LABEL: test_build_vec_v32i1:
1855 ; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
1856 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1857 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1858 ; KNL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1861 ; SKX-LABEL: test_build_vec_v32i1:
1863 ; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
1866 ; AVX512BW-LABEL: test_build_vec_v32i1:
1867 ; AVX512BW: ## %bb.0:
1868 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1869 ; AVX512BW-NEXT: retq
1871 ; AVX512DQ-LABEL: test_build_vec_v32i1:
1872 ; AVX512DQ: ## %bb.0:
1873 ; AVX512DQ-NEXT: vextractf64x4 $1, %zmm0, %ymm1
1874 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1875 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1876 ; AVX512DQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1877 ; AVX512DQ-NEXT: retq
1879 ; X86-LABEL: test_build_vec_v32i1:
1881 ; X86-NEXT: vandps LCPI40_0, %zmm0, %zmm0
1883 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
1887 define <32 x i16> @test_build_vec_v32i1_optsize(<32 x i16> %x) optsize {
1888 ; KNL-LABEL: test_build_vec_v32i1_optsize:
1890 ; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
1891 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1892 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1893 ; KNL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1896 ; SKX-LABEL: test_build_vec_v32i1_optsize:
1898 ; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495
1899 ; SKX-NEXT: kmovd %eax, %k1
1900 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1903 ; AVX512BW-LABEL: test_build_vec_v32i1_optsize:
1904 ; AVX512BW: ## %bb.0:
1905 ; AVX512BW-NEXT: movl $1497715861, %eax ## imm = 0x59455495
1906 ; AVX512BW-NEXT: kmovd %eax, %k1
1907 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1908 ; AVX512BW-NEXT: retq
1910 ; AVX512DQ-LABEL: test_build_vec_v32i1_optsize:
1911 ; AVX512DQ: ## %bb.0:
1912 ; AVX512DQ-NEXT: vextractf64x4 $1, %zmm0, %ymm1
1913 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1914 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1915 ; AVX512DQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1916 ; AVX512DQ-NEXT: retq
1918 ; X86-LABEL: test_build_vec_v32i1_optsize:
1920 ; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495
1921 ; X86-NEXT: kmovd %eax, %k1
1922 ; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1924 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
1928 define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
1929 ; KNL-LABEL: test_build_vec_v64i1:
1931 ; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
1932 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1933 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1934 ; KNL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1937 ; SKX-LABEL: test_build_vec_v64i1:
1939 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
1942 ; AVX512BW-LABEL: test_build_vec_v64i1:
1943 ; AVX512BW: ## %bb.0:
1944 ; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
1945 ; AVX512BW-NEXT: retq
1947 ; AVX512DQ-LABEL: test_build_vec_v64i1:
1948 ; AVX512DQ: ## %bb.0:
1949 ; AVX512DQ-NEXT: vextractf64x4 $1, %zmm0, %ymm1
1950 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1951 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1952 ; AVX512DQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1953 ; AVX512DQ-NEXT: retq
1955 ; X86-LABEL: test_build_vec_v64i1:
1957 ; X86-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
1959 %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
1963 define void @ktest_1(<8 x double> %in, double * %base) {
1964 ; KNL-LABEL: ktest_1:
1966 ; KNL-NEXT: vcmpgtpd (%rdi), %zmm0, %k1
1967 ; KNL-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
1968 ; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
1969 ; KNL-NEXT: kmovw %k0, %eax
1970 ; KNL-NEXT: testb %al, %al
1971 ; KNL-NEXT: je LBB43_2
1972 ; KNL-NEXT: ## %bb.1: ## %L1
1973 ; KNL-NEXT: vmovapd %zmm0, (%rdi)
1974 ; KNL-NEXT: vzeroupper
1976 ; KNL-NEXT: LBB43_2: ## %L2
1977 ; KNL-NEXT: vmovapd %zmm0, 8(%rdi)
1978 ; KNL-NEXT: vzeroupper
1981 ; SKX-LABEL: ktest_1:
1983 ; SKX-NEXT: vcmpgtpd (%rdi), %zmm0, %k1
1984 ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
1985 ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
1986 ; SKX-NEXT: kortestb %k0, %k0
1987 ; SKX-NEXT: je LBB43_2
1988 ; SKX-NEXT: ## %bb.1: ## %L1
1989 ; SKX-NEXT: vmovapd %zmm0, (%rdi)
1990 ; SKX-NEXT: vzeroupper
1992 ; SKX-NEXT: LBB43_2: ## %L2
1993 ; SKX-NEXT: vmovapd %zmm0, 8(%rdi)
1994 ; SKX-NEXT: vzeroupper
1997 ; AVX512BW-LABEL: ktest_1:
1998 ; AVX512BW: ## %bb.0:
1999 ; AVX512BW-NEXT: vcmpgtpd (%rdi), %zmm0, %k1
2000 ; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
2001 ; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
2002 ; AVX512BW-NEXT: kmovd %k0, %eax
2003 ; AVX512BW-NEXT: testb %al, %al
2004 ; AVX512BW-NEXT: je LBB43_2
2005 ; AVX512BW-NEXT: ## %bb.1: ## %L1
2006 ; AVX512BW-NEXT: vmovapd %zmm0, (%rdi)
2007 ; AVX512BW-NEXT: vzeroupper
2008 ; AVX512BW-NEXT: retq
2009 ; AVX512BW-NEXT: LBB43_2: ## %L2
2010 ; AVX512BW-NEXT: vmovapd %zmm0, 8(%rdi)
2011 ; AVX512BW-NEXT: vzeroupper
2012 ; AVX512BW-NEXT: retq
2014 ; AVX512DQ-LABEL: ktest_1:
2015 ; AVX512DQ: ## %bb.0:
2016 ; AVX512DQ-NEXT: vcmpgtpd (%rdi), %zmm0, %k1
2017 ; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
2018 ; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
2019 ; AVX512DQ-NEXT: kortestb %k0, %k0
2020 ; AVX512DQ-NEXT: je LBB43_2
2021 ; AVX512DQ-NEXT: ## %bb.1: ## %L1
2022 ; AVX512DQ-NEXT: vmovapd %zmm0, (%rdi)
2023 ; AVX512DQ-NEXT: vzeroupper
2024 ; AVX512DQ-NEXT: retq
2025 ; AVX512DQ-NEXT: LBB43_2: ## %L2
2026 ; AVX512DQ-NEXT: vmovapd %zmm0, 8(%rdi)
2027 ; AVX512DQ-NEXT: vzeroupper
2028 ; AVX512DQ-NEXT: retq
2030 ; X86-LABEL: ktest_1:
2032 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2033 ; X86-NEXT: vcmpgtpd (%eax), %zmm0, %k1
2034 ; X86-NEXT: vmovupd 8(%eax), %zmm1 {%k1} {z}
2035 ; X86-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
2036 ; X86-NEXT: kortestb %k0, %k0
2037 ; X86-NEXT: je LBB43_2
2038 ; X86-NEXT: ## %bb.1: ## %L1
2039 ; X86-NEXT: vmovapd %zmm0, (%eax)
2040 ; X86-NEXT: vzeroupper
2042 ; X86-NEXT: LBB43_2: ## %L2
2043 ; X86-NEXT: vmovapd %zmm0, 8(%eax)
2044 ; X86-NEXT: vzeroupper
2046 %addr1 = getelementptr double, double * %base, i64 0
2047 %addr2 = getelementptr double, double * %base, i64 1
2049 %vaddr1 = bitcast double* %addr1 to <8 x double>*
2050 %vaddr2 = bitcast double* %addr2 to <8 x double>*
2052 %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1
2053 %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1
2055 %sel1 = fcmp ogt <8 x double>%in, %val1
2056 %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer
2057 %sel2 = fcmp olt <8 x double> %in, %val3
2058 %sel3 = and <8 x i1> %sel1, %sel2
2060 %int_sel3 = bitcast <8 x i1> %sel3 to i8
2061 %res = icmp eq i8 %int_sel3, zeroinitializer
2062 br i1 %res, label %L2, label %L1
2064 store <8 x double> %in, <8 x double>* %vaddr1
2067 store <8 x double> %in, <8 x double>* %vaddr2
2073 define void @ktest_2(<32 x float> %in, float * %base) {
2075 ; KNL-LABEL: ktest_2:
2077 ; KNL-NEXT: vcmpgtps 64(%rdi), %zmm1, %k1
2078 ; KNL-NEXT: vcmpgtps (%rdi), %zmm0, %k2
2079 ; KNL-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z}
2080 ; KNL-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z}
2081 ; KNL-NEXT: vcmpltps %zmm3, %zmm1, %k0
2082 ; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k3
2083 ; KNL-NEXT: korw %k3, %k2, %k2
2084 ; KNL-NEXT: kmovw %k2, %eax
2085 ; KNL-NEXT: korw %k0, %k1, %k0
2086 ; KNL-NEXT: kmovw %k0, %ecx
2087 ; KNL-NEXT: shll $16, %ecx
2088 ; KNL-NEXT: orl %eax, %ecx
2089 ; KNL-NEXT: je LBB44_2
2090 ; KNL-NEXT: ## %bb.1: ## %L1
2091 ; KNL-NEXT: vmovaps %zmm0, (%rdi)
2092 ; KNL-NEXT: vmovaps %zmm1, 64(%rdi)
2093 ; KNL-NEXT: vzeroupper
2095 ; KNL-NEXT: LBB44_2: ## %L2
2096 ; KNL-NEXT: vmovaps %zmm0, 4(%rdi)
2097 ; KNL-NEXT: vmovaps %zmm1, 68(%rdi)
2098 ; KNL-NEXT: vzeroupper
2101 ; SKX-LABEL: ktest_2:
2103 ; SKX-NEXT: vcmpgtps (%rdi), %zmm0, %k1
2104 ; SKX-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2
2105 ; SKX-NEXT: kunpckwd %k1, %k2, %k0
2106 ; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
2107 ; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
2108 ; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1
2109 ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2
2110 ; SKX-NEXT: kunpckwd %k1, %k2, %k1
2111 ; SKX-NEXT: kortestd %k1, %k0
2112 ; SKX-NEXT: je LBB44_2
2113 ; SKX-NEXT: ## %bb.1: ## %L1
2114 ; SKX-NEXT: vmovaps %zmm0, (%rdi)
2115 ; SKX-NEXT: vmovaps %zmm1, 64(%rdi)
2116 ; SKX-NEXT: vzeroupper
2118 ; SKX-NEXT: LBB44_2: ## %L2
2119 ; SKX-NEXT: vmovaps %zmm0, 4(%rdi)
2120 ; SKX-NEXT: vmovaps %zmm1, 68(%rdi)
2121 ; SKX-NEXT: vzeroupper
2124 ; AVX512BW-LABEL: ktest_2:
2125 ; AVX512BW: ## %bb.0:
2126 ; AVX512BW-NEXT: vcmpgtps (%rdi), %zmm0, %k1
2127 ; AVX512BW-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2
2128 ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k0
2129 ; AVX512BW-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
2130 ; AVX512BW-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
2131 ; AVX512BW-NEXT: vcmpltps %zmm3, %zmm0, %k1
2132 ; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2
2133 ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1
2134 ; AVX512BW-NEXT: kortestd %k1, %k0
2135 ; AVX512BW-NEXT: je LBB44_2
2136 ; AVX512BW-NEXT: ## %bb.1: ## %L1
2137 ; AVX512BW-NEXT: vmovaps %zmm0, (%rdi)
2138 ; AVX512BW-NEXT: vmovaps %zmm1, 64(%rdi)
2139 ; AVX512BW-NEXT: vzeroupper
2140 ; AVX512BW-NEXT: retq
2141 ; AVX512BW-NEXT: LBB44_2: ## %L2
2142 ; AVX512BW-NEXT: vmovaps %zmm0, 4(%rdi)
2143 ; AVX512BW-NEXT: vmovaps %zmm1, 68(%rdi)
2144 ; AVX512BW-NEXT: vzeroupper
2145 ; AVX512BW-NEXT: retq
2147 ; AVX512DQ-LABEL: ktest_2:
2148 ; AVX512DQ: ## %bb.0:
2149 ; AVX512DQ-NEXT: vcmpgtps 64(%rdi), %zmm1, %k1
2150 ; AVX512DQ-NEXT: vcmpgtps (%rdi), %zmm0, %k2
2151 ; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z}
2152 ; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z}
2153 ; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm1, %k0
2154 ; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm0, %k3
2155 ; AVX512DQ-NEXT: korw %k3, %k2, %k2
2156 ; AVX512DQ-NEXT: kmovw %k2, %eax
2157 ; AVX512DQ-NEXT: korw %k0, %k1, %k0
2158 ; AVX512DQ-NEXT: kmovw %k0, %ecx
2159 ; AVX512DQ-NEXT: shll $16, %ecx
2160 ; AVX512DQ-NEXT: orl %eax, %ecx
2161 ; AVX512DQ-NEXT: je LBB44_2
2162 ; AVX512DQ-NEXT: ## %bb.1: ## %L1
2163 ; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi)
2164 ; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi)
2165 ; AVX512DQ-NEXT: vzeroupper
2166 ; AVX512DQ-NEXT: retq
2167 ; AVX512DQ-NEXT: LBB44_2: ## %L2
2168 ; AVX512DQ-NEXT: vmovaps %zmm0, 4(%rdi)
2169 ; AVX512DQ-NEXT: vmovaps %zmm1, 68(%rdi)
2170 ; AVX512DQ-NEXT: vzeroupper
2171 ; AVX512DQ-NEXT: retq
2173 ; X86-LABEL: ktest_2:
2175 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2176 ; X86-NEXT: vcmpgtps (%eax), %zmm0, %k1
2177 ; X86-NEXT: vcmpgtps 64(%eax), %zmm1, %k2
2178 ; X86-NEXT: kunpckwd %k1, %k2, %k0
2179 ; X86-NEXT: vmovups 68(%eax), %zmm2 {%k2} {z}
2180 ; X86-NEXT: vmovups 4(%eax), %zmm3 {%k1} {z}
2181 ; X86-NEXT: vcmpltps %zmm3, %zmm0, %k1
2182 ; X86-NEXT: vcmpltps %zmm2, %zmm1, %k2
2183 ; X86-NEXT: kunpckwd %k1, %k2, %k1
2184 ; X86-NEXT: kortestd %k1, %k0
2185 ; X86-NEXT: je LBB44_2
2186 ; X86-NEXT: ## %bb.1: ## %L1
2187 ; X86-NEXT: vmovaps %zmm0, (%eax)
2188 ; X86-NEXT: vmovaps %zmm1, 64(%eax)
2189 ; X86-NEXT: vzeroupper
2191 ; X86-NEXT: LBB44_2: ## %L2
2192 ; X86-NEXT: vmovaps %zmm0, 4(%eax)
2193 ; X86-NEXT: vmovaps %zmm1, 68(%eax)
2194 ; X86-NEXT: vzeroupper
2196 %addr1 = getelementptr float, float * %base, i64 0
2197 %addr2 = getelementptr float, float * %base, i64 1
2199 %vaddr1 = bitcast float* %addr1 to <32 x float>*
2200 %vaddr2 = bitcast float* %addr2 to <32 x float>*
2202 %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1
2203 %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1
2205 %sel1 = fcmp ogt <32 x float>%in, %val1
2206 %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer
2207 %sel2 = fcmp olt <32 x float> %in, %val3
2208 %sel3 = or <32 x i1> %sel1, %sel2
2210 %int_sel3 = bitcast <32 x i1> %sel3 to i32
2211 %res = icmp eq i32 %int_sel3, zeroinitializer
2212 br i1 %res, label %L2, label %L1
2214 store <32 x float> %in, <32 x float>* %vaddr1
2217 store <32 x float> %in, <32 x float>* %vaddr2
2223 define <8 x i64> @load_8i1(<8 x i1>* %a) {
2224 ; KNL-LABEL: load_8i1:
2226 ; KNL-NEXT: kmovw (%rdi), %k1
2227 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2230 ; SKX-LABEL: load_8i1:
2232 ; SKX-NEXT: kmovb (%rdi), %k0
2233 ; SKX-NEXT: vpmovm2q %k0, %zmm0
2236 ; AVX512BW-LABEL: load_8i1:
2237 ; AVX512BW: ## %bb.0:
2238 ; AVX512BW-NEXT: kmovw (%rdi), %k1
2239 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2240 ; AVX512BW-NEXT: retq
2242 ; AVX512DQ-LABEL: load_8i1:
2243 ; AVX512DQ: ## %bb.0:
2244 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
2245 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
2246 ; AVX512DQ-NEXT: retq
2248 ; X86-LABEL: load_8i1:
2250 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2251 ; X86-NEXT: kmovb (%eax), %k0
2252 ; X86-NEXT: vpmovm2q %k0, %zmm0
2254 %b = load <8 x i1>, <8 x i1>* %a
2255 %c = sext <8 x i1> %b to <8 x i64>
2259 define <16 x i32> @load_16i1(<16 x i1>* %a) {
2260 ; KNL-LABEL: load_16i1:
2262 ; KNL-NEXT: kmovw (%rdi), %k1
2263 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2266 ; SKX-LABEL: load_16i1:
2268 ; SKX-NEXT: kmovw (%rdi), %k0
2269 ; SKX-NEXT: vpmovm2d %k0, %zmm0
2272 ; AVX512BW-LABEL: load_16i1:
2273 ; AVX512BW: ## %bb.0:
2274 ; AVX512BW-NEXT: kmovw (%rdi), %k1
2275 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2276 ; AVX512BW-NEXT: retq
2278 ; AVX512DQ-LABEL: load_16i1:
2279 ; AVX512DQ: ## %bb.0:
2280 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
2281 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2282 ; AVX512DQ-NEXT: retq
2284 ; X86-LABEL: load_16i1:
2286 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2287 ; X86-NEXT: kmovw (%eax), %k0
2288 ; X86-NEXT: vpmovm2d %k0, %zmm0
2290 %b = load <16 x i1>, <16 x i1>* %a
2291 %c = sext <16 x i1> %b to <16 x i32>
2295 define <2 x i16> @load_2i1(<2 x i1>* %a) {
2296 ; KNL-LABEL: load_2i1:
2298 ; KNL-NEXT: kmovw (%rdi), %k1
2299 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2300 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
2301 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
2302 ; KNL-NEXT: vzeroupper
2305 ; SKX-LABEL: load_2i1:
2307 ; SKX-NEXT: kmovb (%rdi), %k0
2308 ; SKX-NEXT: vpmovm2w %k0, %xmm0
2311 ; AVX512BW-LABEL: load_2i1:
2312 ; AVX512BW: ## %bb.0:
2313 ; AVX512BW-NEXT: kmovw (%rdi), %k0
2314 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
2315 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
2316 ; AVX512BW-NEXT: vzeroupper
2317 ; AVX512BW-NEXT: retq
2319 ; AVX512DQ-LABEL: load_2i1:
2320 ; AVX512DQ: ## %bb.0:
2321 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
2322 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2323 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2324 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
2325 ; AVX512DQ-NEXT: vzeroupper
2326 ; AVX512DQ-NEXT: retq
2328 ; X86-LABEL: load_2i1:
2330 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2331 ; X86-NEXT: kmovb (%eax), %k0
2332 ; X86-NEXT: vpmovm2w %k0, %xmm0
2334 %b = load <2 x i1>, <2 x i1>* %a
2335 %c = sext <2 x i1> %b to <2 x i16>
2339 define <4 x i16> @load_4i1(<4 x i1>* %a) {
2340 ; KNL-LABEL: load_4i1:
2342 ; KNL-NEXT: kmovw (%rdi), %k1
2343 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2344 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
2345 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
2346 ; KNL-NEXT: vzeroupper
2349 ; SKX-LABEL: load_4i1:
2351 ; SKX-NEXT: kmovb (%rdi), %k0
2352 ; SKX-NEXT: vpmovm2w %k0, %xmm0
2355 ; AVX512BW-LABEL: load_4i1:
2356 ; AVX512BW: ## %bb.0:
2357 ; AVX512BW-NEXT: kmovw (%rdi), %k0
2358 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
2359 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
2360 ; AVX512BW-NEXT: vzeroupper
2361 ; AVX512BW-NEXT: retq
2363 ; AVX512DQ-LABEL: load_4i1:
2364 ; AVX512DQ: ## %bb.0:
2365 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
2366 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2367 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2368 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
2369 ; AVX512DQ-NEXT: vzeroupper
2370 ; AVX512DQ-NEXT: retq
2372 ; X86-LABEL: load_4i1:
2374 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2375 ; X86-NEXT: kmovb (%eax), %k0
2376 ; X86-NEXT: vpmovm2w %k0, %xmm0
2378 %b = load <4 x i1>, <4 x i1>* %a
2379 %c = sext <4 x i1> %b to <4 x i16>
2383 define <32 x i16> @load_32i1(<32 x i1>* %a) {
2384 ; KNL-LABEL: load_32i1:
2386 ; KNL-NEXT: kmovw (%rdi), %k1
2387 ; KNL-NEXT: kmovw 2(%rdi), %k2
2388 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
2389 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
2390 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
2391 ; KNL-NEXT: vpmovdw %zmm1, %ymm1
2392 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
2395 ; SKX-LABEL: load_32i1:
2397 ; SKX-NEXT: kmovd (%rdi), %k0
2398 ; SKX-NEXT: vpmovm2w %k0, %zmm0
2401 ; AVX512BW-LABEL: load_32i1:
2402 ; AVX512BW: ## %bb.0:
2403 ; AVX512BW-NEXT: kmovd (%rdi), %k0
2404 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
2405 ; AVX512BW-NEXT: retq
2407 ; AVX512DQ-LABEL: load_32i1:
2408 ; AVX512DQ: ## %bb.0:
2409 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
2410 ; AVX512DQ-NEXT: kmovw 2(%rdi), %k1
2411 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0
2412 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2413 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
2414 ; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1
2415 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
2416 ; AVX512DQ-NEXT: retq
2418 ; X86-LABEL: load_32i1:
2420 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2421 ; X86-NEXT: kmovd (%eax), %k0
2422 ; X86-NEXT: vpmovm2w %k0, %zmm0
2424 %b = load <32 x i1>, <32 x i1>* %a
2425 %c = sext <32 x i1> %b to <32 x i16>
2429 define <64 x i8> @load_64i1(<64 x i1>* %a) {
2430 ; KNL-LABEL: load_64i1:
2432 ; KNL-NEXT: kmovw (%rdi), %k1
2433 ; KNL-NEXT: kmovw 2(%rdi), %k2
2434 ; KNL-NEXT: kmovw 4(%rdi), %k3
2435 ; KNL-NEXT: kmovw 6(%rdi), %k4
2436 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
2437 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
2438 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z}
2439 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
2440 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2441 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
2442 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
2443 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
2444 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
2445 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2446 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
2449 ; SKX-LABEL: load_64i1:
2451 ; SKX-NEXT: kmovq (%rdi), %k0
2452 ; SKX-NEXT: vpmovm2b %k0, %zmm0
2455 ; AVX512BW-LABEL: load_64i1:
2456 ; AVX512BW: ## %bb.0:
2457 ; AVX512BW-NEXT: kmovq (%rdi), %k0
2458 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
2459 ; AVX512BW-NEXT: retq
2461 ; AVX512DQ-LABEL: load_64i1:
2462 ; AVX512DQ: ## %bb.0:
2463 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
2464 ; AVX512DQ-NEXT: kmovw 2(%rdi), %k1
2465 ; AVX512DQ-NEXT: kmovw 4(%rdi), %k2
2466 ; AVX512DQ-NEXT: kmovw 6(%rdi), %k3
2467 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm0
2468 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
2469 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm1
2470 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
2471 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2472 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
2473 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
2474 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2
2475 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
2476 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2477 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
2478 ; AVX512DQ-NEXT: retq
2480 ; X86-LABEL: load_64i1:
2482 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2483 ; X86-NEXT: kmovq (%eax), %k0
2484 ; X86-NEXT: vpmovm2b %k0, %zmm0
2486 %b = load <64 x i1>, <64 x i1>* %a
2487 %c = sext <64 x i1> %b to <64 x i8>
2491 define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
2492 ; KNL-LABEL: store_8i1:
2494 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
2495 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
2496 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
2497 ; KNL-NEXT: kmovw %k0, %eax
2498 ; KNL-NEXT: movb %al, (%rdi)
2499 ; KNL-NEXT: vzeroupper
2502 ; SKX-LABEL: store_8i1:
2504 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2505 ; SKX-NEXT: vpmovw2m %xmm0, %k0
2506 ; SKX-NEXT: kmovb %k0, (%rdi)
2509 ; AVX512BW-LABEL: store_8i1:
2510 ; AVX512BW: ## %bb.0:
2511 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
2512 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
2513 ; AVX512BW-NEXT: kmovd %k0, %eax
2514 ; AVX512BW-NEXT: movb %al, (%rdi)
2515 ; AVX512BW-NEXT: vzeroupper
2516 ; AVX512BW-NEXT: retq
2518 ; AVX512DQ-LABEL: store_8i1:
2519 ; AVX512DQ: ## %bb.0:
2520 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
2521 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
2522 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
2523 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
2524 ; AVX512DQ-NEXT: vzeroupper
2525 ; AVX512DQ-NEXT: retq
2527 ; X86-LABEL: store_8i1:
2529 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
2530 ; X86-NEXT: vpmovw2m %xmm0, %k0
2531 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2532 ; X86-NEXT: kmovb %k0, (%eax)
2534 store <8 x i1> %v, <8 x i1>* %a
2538 define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
2539 ; KNL-LABEL: store_8i1_1:
2541 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
2542 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
2543 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
2544 ; KNL-NEXT: kmovw %k0, %eax
2545 ; KNL-NEXT: movb %al, (%rdi)
2546 ; KNL-NEXT: vzeroupper
2549 ; SKX-LABEL: store_8i1_1:
2551 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2552 ; SKX-NEXT: vpmovw2m %xmm0, %k0
2553 ; SKX-NEXT: kmovb %k0, (%rdi)
2556 ; AVX512BW-LABEL: store_8i1_1:
2557 ; AVX512BW: ## %bb.0:
2558 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
2559 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
2560 ; AVX512BW-NEXT: kmovd %k0, %eax
2561 ; AVX512BW-NEXT: movb %al, (%rdi)
2562 ; AVX512BW-NEXT: vzeroupper
2563 ; AVX512BW-NEXT: retq
2565 ; AVX512DQ-LABEL: store_8i1_1:
2566 ; AVX512DQ: ## %bb.0:
2567 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
2568 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
2569 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
2570 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
2571 ; AVX512DQ-NEXT: vzeroupper
2572 ; AVX512DQ-NEXT: retq
2574 ; X86-LABEL: store_8i1_1:
2576 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2577 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
2578 ; X86-NEXT: vpmovw2m %xmm0, %k0
2579 ; X86-NEXT: kmovb %k0, (%eax)
2581 %v1 = trunc <8 x i16> %v to <8 x i1>
2582 store <8 x i1> %v1, <8 x i1>* %a
2586 define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
2587 ; KNL-LABEL: store_16i1:
2589 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
2590 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2591 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2592 ; KNL-NEXT: kmovw %k0, (%rdi)
2593 ; KNL-NEXT: vzeroupper
2596 ; SKX-LABEL: store_16i1:
2598 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
2599 ; SKX-NEXT: vpmovb2m %xmm0, %k0
2600 ; SKX-NEXT: kmovw %k0, (%rdi)
2603 ; AVX512BW-LABEL: store_16i1:
2604 ; AVX512BW: ## %bb.0:
2605 ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
2606 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
2607 ; AVX512BW-NEXT: kmovw %k0, (%rdi)
2608 ; AVX512BW-NEXT: vzeroupper
2609 ; AVX512BW-NEXT: retq
2611 ; AVX512DQ-LABEL: store_16i1:
2612 ; AVX512DQ: ## %bb.0:
2613 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
2614 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2615 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2616 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
2617 ; AVX512DQ-NEXT: vzeroupper
2618 ; AVX512DQ-NEXT: retq
2620 ; X86-LABEL: store_16i1:
2622 ; X86-NEXT: vpsllw $7, %xmm0, %xmm0
2623 ; X86-NEXT: vpmovb2m %xmm0, %k0
2624 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2625 ; X86-NEXT: kmovw %k0, (%eax)
2627 store <16 x i1> %v, <16 x i1>* %a
2631 define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
2632 ; KNL-LABEL: store_32i1:
2634 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
2635 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2636 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2637 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
2638 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
2639 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2640 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
2641 ; KNL-NEXT: kmovw %k1, 2(%rdi)
2642 ; KNL-NEXT: kmovw %k0, (%rdi)
2643 ; KNL-NEXT: vzeroupper
2646 ; SKX-LABEL: store_32i1:
2648 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
2649 ; SKX-NEXT: vpmovb2m %ymm0, %k0
2650 ; SKX-NEXT: kmovd %k0, (%rdi)
2651 ; SKX-NEXT: vzeroupper
2654 ; AVX512BW-LABEL: store_32i1:
2655 ; AVX512BW: ## %bb.0:
2656 ; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
2657 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
2658 ; AVX512BW-NEXT: kmovd %k0, (%rdi)
2659 ; AVX512BW-NEXT: vzeroupper
2660 ; AVX512BW-NEXT: retq
2662 ; AVX512DQ-LABEL: store_32i1:
2663 ; AVX512DQ: ## %bb.0:
2664 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1
2665 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
2666 ; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0
2667 ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
2668 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
2669 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2670 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
2671 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
2672 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
2673 ; AVX512DQ-NEXT: vzeroupper
2674 ; AVX512DQ-NEXT: retq
2676 ; X86-LABEL: store_32i1:
2678 ; X86-NEXT: vpsllw $7, %ymm0, %ymm0
2679 ; X86-NEXT: vpmovb2m %ymm0, %k0
2680 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2681 ; X86-NEXT: kmovd %k0, (%eax)
2682 ; X86-NEXT: vzeroupper
2684 store <32 x i1> %v, <32 x i1>* %a
2688 define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
2689 ; KNL-LABEL: store_32i1_1:
2691 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
2692 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
2693 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2694 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2695 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm0
2696 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2697 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
2698 ; KNL-NEXT: kmovw %k1, 2(%rdi)
2699 ; KNL-NEXT: kmovw %k0, (%rdi)
2700 ; KNL-NEXT: vzeroupper
2703 ; SKX-LABEL: store_32i1_1:
2705 ; SKX-NEXT: vpsllw $15, %zmm0, %zmm0
2706 ; SKX-NEXT: vpmovw2m %zmm0, %k0
2707 ; SKX-NEXT: kmovd %k0, (%rdi)
2708 ; SKX-NEXT: vzeroupper
2711 ; AVX512BW-LABEL: store_32i1_1:
2712 ; AVX512BW: ## %bb.0:
2713 ; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0
2714 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
2715 ; AVX512BW-NEXT: kmovd %k0, (%rdi)
2716 ; AVX512BW-NEXT: vzeroupper
2717 ; AVX512BW-NEXT: retq
2719 ; AVX512DQ-LABEL: store_32i1_1:
2720 ; AVX512DQ: ## %bb.0:
2721 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
2722 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
2723 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2724 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2725 ; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0
2726 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2727 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
2728 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
2729 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
2730 ; AVX512DQ-NEXT: vzeroupper
2731 ; AVX512DQ-NEXT: retq
2733 ; X86-LABEL: store_32i1_1:
2735 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2736 ; X86-NEXT: vpsllw $15, %zmm0, %zmm0
2737 ; X86-NEXT: vpmovw2m %zmm0, %k0
2738 ; X86-NEXT: kmovd %k0, (%eax)
2739 ; X86-NEXT: vzeroupper
2741 %v1 = trunc <32 x i16> %v to <32 x i1>
2742 store <32 x i1> %v1, <32 x i1>* %a
2747 define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
2749 ; KNL-LABEL: store_64i1:
2751 ; KNL-NEXT: kmovw %ecx, %k0
2752 ; KNL-NEXT: kmovw %esi, %k2
2753 ; KNL-NEXT: kshiftlw $15, %k0, %k1
2754 ; KNL-NEXT: kshiftrw $14, %k1, %k1
2755 ; KNL-NEXT: kxorw %k1, %k2, %k2
2756 ; KNL-NEXT: kshiftrw $2, %k2, %k3
2757 ; KNL-NEXT: kxorw %k0, %k3, %k0
2758 ; KNL-NEXT: kshiftlw $15, %k0, %k0
2759 ; KNL-NEXT: kshiftrw $13, %k0, %k0
2760 ; KNL-NEXT: kxorw %k0, %k2, %k0
2761 ; KNL-NEXT: kshiftrw $3, %k0, %k2
2762 ; KNL-NEXT: kmovw %r8d, %k3
2763 ; KNL-NEXT: kxorw %k3, %k2, %k2
2764 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2765 ; KNL-NEXT: kshiftrw $12, %k2, %k2
2766 ; KNL-NEXT: kxorw %k2, %k0, %k0
2767 ; KNL-NEXT: kshiftrw $4, %k0, %k2
2768 ; KNL-NEXT: kmovw %r9d, %k3
2769 ; KNL-NEXT: kxorw %k3, %k2, %k2
2770 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2771 ; KNL-NEXT: kshiftrw $11, %k2, %k2
2772 ; KNL-NEXT: kxorw %k2, %k0, %k0
2773 ; KNL-NEXT: kshiftrw $5, %k0, %k2
2774 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2775 ; KNL-NEXT: kmovw %eax, %k3
2776 ; KNL-NEXT: kxorw %k3, %k2, %k2
2777 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2778 ; KNL-NEXT: kshiftrw $10, %k2, %k2
2779 ; KNL-NEXT: kxorw %k2, %k0, %k0
2780 ; KNL-NEXT: kshiftrw $6, %k0, %k2
2781 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2782 ; KNL-NEXT: kmovw %eax, %k3
2783 ; KNL-NEXT: kxorw %k3, %k2, %k2
2784 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2785 ; KNL-NEXT: kshiftrw $9, %k2, %k2
2786 ; KNL-NEXT: kxorw %k2, %k0, %k0
2787 ; KNL-NEXT: kshiftrw $7, %k0, %k2
2788 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2789 ; KNL-NEXT: kmovw %eax, %k3
2790 ; KNL-NEXT: kxorw %k3, %k2, %k2
2791 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2792 ; KNL-NEXT: kshiftrw $8, %k2, %k2
2793 ; KNL-NEXT: kxorw %k2, %k0, %k0
2794 ; KNL-NEXT: kshiftrw $8, %k0, %k2
2795 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2796 ; KNL-NEXT: kmovw %eax, %k3
2797 ; KNL-NEXT: kxorw %k3, %k2, %k2
2798 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2799 ; KNL-NEXT: kshiftrw $7, %k2, %k2
2800 ; KNL-NEXT: kxorw %k2, %k0, %k0
2801 ; KNL-NEXT: kshiftrw $9, %k0, %k2
2802 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2803 ; KNL-NEXT: kmovw %eax, %k3
2804 ; KNL-NEXT: kxorw %k3, %k2, %k2
2805 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2806 ; KNL-NEXT: kshiftrw $6, %k2, %k2
2807 ; KNL-NEXT: kxorw %k2, %k0, %k0
2808 ; KNL-NEXT: kshiftrw $10, %k0, %k2
2809 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2810 ; KNL-NEXT: kmovw %eax, %k3
2811 ; KNL-NEXT: kxorw %k3, %k2, %k2
2812 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2813 ; KNL-NEXT: kshiftrw $5, %k2, %k2
2814 ; KNL-NEXT: kxorw %k2, %k0, %k0
2815 ; KNL-NEXT: kshiftrw $11, %k0, %k2
2816 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2817 ; KNL-NEXT: kmovw %eax, %k3
2818 ; KNL-NEXT: kxorw %k3, %k2, %k2
2819 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2820 ; KNL-NEXT: kshiftrw $4, %k2, %k2
2821 ; KNL-NEXT: kxorw %k2, %k0, %k0
2822 ; KNL-NEXT: kshiftrw $12, %k0, %k2
2823 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2824 ; KNL-NEXT: kmovw %eax, %k3
2825 ; KNL-NEXT: kxorw %k3, %k2, %k2
2826 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2827 ; KNL-NEXT: kshiftrw $3, %k2, %k2
2828 ; KNL-NEXT: kxorw %k2, %k0, %k0
2829 ; KNL-NEXT: kshiftrw $13, %k0, %k2
2830 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2831 ; KNL-NEXT: kmovw %eax, %k3
2832 ; KNL-NEXT: kxorw %k3, %k2, %k2
2833 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2834 ; KNL-NEXT: kshiftrw $2, %k2, %k2
2835 ; KNL-NEXT: kxorw %k2, %k0, %k0
2836 ; KNL-NEXT: kshiftrw $14, %k0, %k2
2837 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2838 ; KNL-NEXT: kmovw %eax, %k3
2839 ; KNL-NEXT: kxorw %k3, %k2, %k2
2840 ; KNL-NEXT: kshiftlw $14, %k2, %k2
2841 ; KNL-NEXT: kxorw %k2, %k0, %k0
2842 ; KNL-NEXT: kshiftlw $1, %k0, %k0
2843 ; KNL-NEXT: kshiftrw $1, %k0, %k0
2844 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2845 ; KNL-NEXT: kmovw %eax, %k2
2846 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2847 ; KNL-NEXT: korw %k2, %k0, %k0
2848 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2849 ; KNL-NEXT: kmovw %eax, %k2
2850 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2851 ; KNL-NEXT: kmovw %eax, %k3
2852 ; KNL-NEXT: kxorw %k1, %k3, %k3
2853 ; KNL-NEXT: kshiftrw $2, %k3, %k4
2854 ; KNL-NEXT: kxorw %k2, %k4, %k2
2855 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2856 ; KNL-NEXT: kshiftrw $13, %k2, %k2
2857 ; KNL-NEXT: kxorw %k2, %k3, %k2
2858 ; KNL-NEXT: kshiftrw $3, %k2, %k3
2859 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2860 ; KNL-NEXT: kmovw %eax, %k4
2861 ; KNL-NEXT: kxorw %k4, %k3, %k3
2862 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2863 ; KNL-NEXT: kshiftrw $12, %k3, %k3
2864 ; KNL-NEXT: kxorw %k3, %k2, %k2
2865 ; KNL-NEXT: kshiftrw $4, %k2, %k3
2866 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2867 ; KNL-NEXT: kmovw %eax, %k4
2868 ; KNL-NEXT: kxorw %k4, %k3, %k3
2869 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2870 ; KNL-NEXT: kshiftrw $11, %k3, %k3
2871 ; KNL-NEXT: kxorw %k3, %k2, %k2
2872 ; KNL-NEXT: kshiftrw $5, %k2, %k3
2873 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2874 ; KNL-NEXT: kmovw %eax, %k4
2875 ; KNL-NEXT: kxorw %k4, %k3, %k3
2876 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2877 ; KNL-NEXT: kshiftrw $10, %k3, %k3
2878 ; KNL-NEXT: kxorw %k3, %k2, %k2
2879 ; KNL-NEXT: kshiftrw $6, %k2, %k3
2880 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2881 ; KNL-NEXT: kmovw %eax, %k4
2882 ; KNL-NEXT: kxorw %k4, %k3, %k3
2883 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2884 ; KNL-NEXT: kshiftrw $9, %k3, %k3
2885 ; KNL-NEXT: kxorw %k3, %k2, %k2
2886 ; KNL-NEXT: kshiftrw $7, %k2, %k3
2887 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2888 ; KNL-NEXT: kmovw %eax, %k4
2889 ; KNL-NEXT: kxorw %k4, %k3, %k3
2890 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2891 ; KNL-NEXT: kshiftrw $8, %k3, %k3
2892 ; KNL-NEXT: kxorw %k3, %k2, %k2
2893 ; KNL-NEXT: kshiftrw $8, %k2, %k3
2894 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2895 ; KNL-NEXT: kmovw %eax, %k4
2896 ; KNL-NEXT: kxorw %k4, %k3, %k3
2897 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2898 ; KNL-NEXT: kshiftrw $7, %k3, %k3
2899 ; KNL-NEXT: kxorw %k3, %k2, %k2
2900 ; KNL-NEXT: kshiftrw $9, %k2, %k3
2901 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2902 ; KNL-NEXT: kmovw %eax, %k4
2903 ; KNL-NEXT: kxorw %k4, %k3, %k3
2904 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2905 ; KNL-NEXT: kshiftrw $6, %k3, %k3
2906 ; KNL-NEXT: kxorw %k3, %k2, %k2
2907 ; KNL-NEXT: kshiftrw $10, %k2, %k3
2908 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2909 ; KNL-NEXT: kmovw %eax, %k4
2910 ; KNL-NEXT: kxorw %k4, %k3, %k3
2911 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2912 ; KNL-NEXT: kshiftrw $5, %k3, %k3
2913 ; KNL-NEXT: kxorw %k3, %k2, %k2
2914 ; KNL-NEXT: kshiftrw $11, %k2, %k3
2915 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2916 ; KNL-NEXT: kmovw %eax, %k4
2917 ; KNL-NEXT: kxorw %k4, %k3, %k3
2918 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2919 ; KNL-NEXT: kshiftrw $4, %k3, %k3
2920 ; KNL-NEXT: kxorw %k3, %k2, %k2
2921 ; KNL-NEXT: kshiftrw $12, %k2, %k3
2922 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2923 ; KNL-NEXT: kmovw %eax, %k4
2924 ; KNL-NEXT: kxorw %k4, %k3, %k3
2925 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2926 ; KNL-NEXT: kshiftrw $3, %k3, %k3
2927 ; KNL-NEXT: kxorw %k3, %k2, %k2
2928 ; KNL-NEXT: kshiftrw $13, %k2, %k3
2929 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2930 ; KNL-NEXT: kmovw %eax, %k4
2931 ; KNL-NEXT: kxorw %k4, %k3, %k3
2932 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2933 ; KNL-NEXT: kshiftrw $2, %k3, %k3
2934 ; KNL-NEXT: kxorw %k3, %k2, %k2
2935 ; KNL-NEXT: kshiftrw $14, %k2, %k3
2936 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2937 ; KNL-NEXT: kmovw %eax, %k4
2938 ; KNL-NEXT: kxorw %k4, %k3, %k3
2939 ; KNL-NEXT: kshiftlw $14, %k3, %k3
2940 ; KNL-NEXT: kxorw %k3, %k2, %k2
2941 ; KNL-NEXT: kshiftlw $1, %k2, %k2
2942 ; KNL-NEXT: kshiftrw $1, %k2, %k2
2943 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2944 ; KNL-NEXT: kmovw %eax, %k3
2945 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2946 ; KNL-NEXT: korw %k3, %k2, %k2
2947 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2948 ; KNL-NEXT: kmovw %eax, %k3
2949 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2950 ; KNL-NEXT: kmovw %eax, %k4
2951 ; KNL-NEXT: kxorw %k1, %k4, %k4
2952 ; KNL-NEXT: kshiftrw $2, %k4, %k5
2953 ; KNL-NEXT: kxorw %k3, %k5, %k3
2954 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2955 ; KNL-NEXT: kshiftrw $13, %k3, %k3
2956 ; KNL-NEXT: kxorw %k3, %k4, %k3
2957 ; KNL-NEXT: kshiftrw $3, %k3, %k4
2958 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2959 ; KNL-NEXT: kmovw %eax, %k5
2960 ; KNL-NEXT: kxorw %k5, %k4, %k4
2961 ; KNL-NEXT: kshiftlw $15, %k4, %k4
2962 ; KNL-NEXT: kshiftrw $12, %k4, %k4
2963 ; KNL-NEXT: kxorw %k4, %k3, %k3
2964 ; KNL-NEXT: kshiftrw $4, %k3, %k4
2965 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2966 ; KNL-NEXT: kmovw %eax, %k5
2967 ; KNL-NEXT: kxorw %k5, %k4, %k4
2968 ; KNL-NEXT: kshiftlw $15, %k4, %k4
2969 ; KNL-NEXT: kshiftrw $11, %k4, %k4
2970 ; KNL-NEXT: kxorw %k4, %k3, %k3
2971 ; KNL-NEXT: kshiftrw $5, %k3, %k4
2972 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2973 ; KNL-NEXT: kmovw %eax, %k5
2974 ; KNL-NEXT: kxorw %k5, %k4, %k4
2975 ; KNL-NEXT: kshiftlw $15, %k4, %k4
2976 ; KNL-NEXT: kshiftrw $10, %k4, %k4
2977 ; KNL-NEXT: kxorw %k4, %k3, %k3
2978 ; KNL-NEXT: kshiftrw $6, %k3, %k4
2979 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2980 ; KNL-NEXT: kmovw %eax, %k5
2981 ; KNL-NEXT: kxorw %k5, %k4, %k4
2982 ; KNL-NEXT: kshiftlw $15, %k4, %k4
2983 ; KNL-NEXT: kshiftrw $9, %k4, %k4
2984 ; KNL-NEXT: kxorw %k4, %k3, %k3
2985 ; KNL-NEXT: kshiftrw $7, %k3, %k4
2986 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2987 ; KNL-NEXT: kmovw %eax, %k5
2988 ; KNL-NEXT: kxorw %k5, %k4, %k4
2989 ; KNL-NEXT: kshiftlw $15, %k4, %k4
2990 ; KNL-NEXT: kshiftrw $8, %k4, %k4
2991 ; KNL-NEXT: kxorw %k4, %k3, %k3
2992 ; KNL-NEXT: kshiftrw $8, %k3, %k4
2993 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2994 ; KNL-NEXT: kmovw %eax, %k5
2995 ; KNL-NEXT: kxorw %k5, %k4, %k4
2996 ; KNL-NEXT: kshiftlw $15, %k4, %k4
2997 ; KNL-NEXT: kshiftrw $7, %k4, %k4
2998 ; KNL-NEXT: kxorw %k4, %k3, %k3
2999 ; KNL-NEXT: kshiftrw $9, %k3, %k4
3000 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3001 ; KNL-NEXT: kmovw %eax, %k5
3002 ; KNL-NEXT: kxorw %k5, %k4, %k4
3003 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3004 ; KNL-NEXT: kshiftrw $6, %k4, %k4
3005 ; KNL-NEXT: kxorw %k4, %k3, %k3
3006 ; KNL-NEXT: kshiftrw $10, %k3, %k4
3007 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3008 ; KNL-NEXT: kmovw %eax, %k5
3009 ; KNL-NEXT: kxorw %k5, %k4, %k4
3010 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3011 ; KNL-NEXT: kshiftrw $5, %k4, %k4
3012 ; KNL-NEXT: kxorw %k4, %k3, %k3
3013 ; KNL-NEXT: kshiftrw $11, %k3, %k4
3014 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3015 ; KNL-NEXT: kmovw %eax, %k5
3016 ; KNL-NEXT: kxorw %k5, %k4, %k4
3017 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3018 ; KNL-NEXT: kshiftrw $4, %k4, %k4
3019 ; KNL-NEXT: kxorw %k4, %k3, %k3
3020 ; KNL-NEXT: kshiftrw $12, %k3, %k4
3021 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3022 ; KNL-NEXT: kmovw %eax, %k5
3023 ; KNL-NEXT: kxorw %k5, %k4, %k4
3024 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3025 ; KNL-NEXT: kshiftrw $3, %k4, %k4
3026 ; KNL-NEXT: kxorw %k4, %k3, %k3
3027 ; KNL-NEXT: kshiftrw $13, %k3, %k4
3028 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3029 ; KNL-NEXT: kmovw %eax, %k5
3030 ; KNL-NEXT: kxorw %k5, %k4, %k4
3031 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3032 ; KNL-NEXT: kshiftrw $2, %k4, %k4
3033 ; KNL-NEXT: kxorw %k4, %k3, %k3
3034 ; KNL-NEXT: kshiftrw $14, %k3, %k4
3035 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3036 ; KNL-NEXT: kmovw %eax, %k5
3037 ; KNL-NEXT: kxorw %k5, %k4, %k4
3038 ; KNL-NEXT: kshiftlw $14, %k4, %k4
3039 ; KNL-NEXT: kxorw %k4, %k3, %k3
3040 ; KNL-NEXT: kshiftlw $1, %k3, %k3
3041 ; KNL-NEXT: kshiftrw $1, %k3, %k3
3042 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3043 ; KNL-NEXT: kmovw %eax, %k4
3044 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3045 ; KNL-NEXT: korw %k4, %k3, %k3
3046 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3047 ; KNL-NEXT: kmovw %eax, %k4
3048 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3049 ; KNL-NEXT: kmovw %eax, %k5
3050 ; KNL-NEXT: kxorw %k1, %k5, %k1
3051 ; KNL-NEXT: kshiftrw $2, %k1, %k5
3052 ; KNL-NEXT: kxorw %k4, %k5, %k4
3053 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3054 ; KNL-NEXT: kshiftrw $13, %k4, %k4
3055 ; KNL-NEXT: kxorw %k4, %k1, %k1
3056 ; KNL-NEXT: kshiftrw $3, %k1, %k4
3057 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3058 ; KNL-NEXT: kmovw %eax, %k5
3059 ; KNL-NEXT: kxorw %k5, %k4, %k4
3060 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3061 ; KNL-NEXT: kshiftrw $12, %k4, %k4
3062 ; KNL-NEXT: kxorw %k4, %k1, %k1
3063 ; KNL-NEXT: kshiftrw $4, %k1, %k4
3064 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3065 ; KNL-NEXT: kmovw %eax, %k5
3066 ; KNL-NEXT: kxorw %k5, %k4, %k4
3067 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3068 ; KNL-NEXT: kshiftrw $11, %k4, %k4
3069 ; KNL-NEXT: kxorw %k4, %k1, %k1
3070 ; KNL-NEXT: kshiftrw $5, %k1, %k4
3071 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3072 ; KNL-NEXT: kmovw %eax, %k5
3073 ; KNL-NEXT: kxorw %k5, %k4, %k4
3074 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3075 ; KNL-NEXT: kshiftrw $10, %k4, %k4
3076 ; KNL-NEXT: kxorw %k4, %k1, %k1
3077 ; KNL-NEXT: kshiftrw $6, %k1, %k4
3078 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3079 ; KNL-NEXT: kmovw %eax, %k5
3080 ; KNL-NEXT: kxorw %k5, %k4, %k4
3081 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3082 ; KNL-NEXT: kshiftrw $9, %k4, %k4
3083 ; KNL-NEXT: kxorw %k4, %k1, %k1
3084 ; KNL-NEXT: kshiftrw $7, %k1, %k4
3085 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3086 ; KNL-NEXT: kmovw %eax, %k5
3087 ; KNL-NEXT: kxorw %k5, %k4, %k4
3088 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3089 ; KNL-NEXT: kshiftrw $8, %k4, %k4
3090 ; KNL-NEXT: kxorw %k4, %k1, %k1
3091 ; KNL-NEXT: kshiftrw $8, %k1, %k4
3092 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3093 ; KNL-NEXT: kmovw %eax, %k5
3094 ; KNL-NEXT: kxorw %k5, %k4, %k4
3095 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3096 ; KNL-NEXT: kshiftrw $7, %k4, %k4
3097 ; KNL-NEXT: kxorw %k4, %k1, %k1
3098 ; KNL-NEXT: kshiftrw $9, %k1, %k4
3099 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3100 ; KNL-NEXT: kmovw %eax, %k5
3101 ; KNL-NEXT: kxorw %k5, %k4, %k4
3102 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3103 ; KNL-NEXT: kshiftrw $6, %k4, %k4
3104 ; KNL-NEXT: kxorw %k4, %k1, %k1
3105 ; KNL-NEXT: kshiftrw $10, %k1, %k4
3106 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3107 ; KNL-NEXT: kmovw %eax, %k5
3108 ; KNL-NEXT: kxorw %k5, %k4, %k4
3109 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3110 ; KNL-NEXT: kshiftrw $5, %k4, %k4
3111 ; KNL-NEXT: kxorw %k4, %k1, %k1
3112 ; KNL-NEXT: kshiftrw $11, %k1, %k4
3113 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3114 ; KNL-NEXT: kmovw %eax, %k5
3115 ; KNL-NEXT: kxorw %k5, %k4, %k4
3116 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3117 ; KNL-NEXT: kshiftrw $4, %k4, %k4
3118 ; KNL-NEXT: kxorw %k4, %k1, %k1
3119 ; KNL-NEXT: kshiftrw $12, %k1, %k4
3120 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3121 ; KNL-NEXT: kmovw %eax, %k5
3122 ; KNL-NEXT: kxorw %k5, %k4, %k4
3123 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3124 ; KNL-NEXT: kshiftrw $3, %k4, %k4
3125 ; KNL-NEXT: kxorw %k4, %k1, %k1
3126 ; KNL-NEXT: kshiftrw $13, %k1, %k4
3127 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3128 ; KNL-NEXT: kmovw %eax, %k5
3129 ; KNL-NEXT: kxorw %k5, %k4, %k4
3130 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3131 ; KNL-NEXT: kshiftrw $2, %k4, %k4
3132 ; KNL-NEXT: kxorw %k4, %k1, %k1
3133 ; KNL-NEXT: kshiftrw $14, %k1, %k4
3134 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3135 ; KNL-NEXT: kmovw %eax, %k5
3136 ; KNL-NEXT: kxorw %k5, %k4, %k4
3137 ; KNL-NEXT: kshiftlw $14, %k4, %k4
3138 ; KNL-NEXT: kxorw %k4, %k1, %k1
3139 ; KNL-NEXT: kshiftlw $1, %k1, %k1
3140 ; KNL-NEXT: kshiftrw $1, %k1, %k1
3141 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
3142 ; KNL-NEXT: kmovw %eax, %k4
3143 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3144 ; KNL-NEXT: korw %k4, %k1, %k1
3145 ; KNL-NEXT: kmovw %k1, 6(%rdi)
3146 ; KNL-NEXT: kmovw %k3, 4(%rdi)
3147 ; KNL-NEXT: kmovw %k2, 2(%rdi)
3148 ; KNL-NEXT: kmovw %k0, (%rdi)
3151 ; SKX-LABEL: store_64i1:
3153 ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0
3154 ; SKX-NEXT: vpmovb2m %zmm0, %k0
3155 ; SKX-NEXT: kmovq %k0, (%rdi)
3156 ; SKX-NEXT: vzeroupper
3159 ; AVX512BW-LABEL: store_64i1:
3160 ; AVX512BW: ## %bb.0:
3161 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0
3162 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
3163 ; AVX512BW-NEXT: kmovq %k0, (%rdi)
3164 ; AVX512BW-NEXT: vzeroupper
3165 ; AVX512BW-NEXT: retq
3167 ; AVX512DQ-LABEL: store_64i1:
3168 ; AVX512DQ: ## %bb.0:
3169 ; AVX512DQ-NEXT: kmovw %ecx, %k0
3170 ; AVX512DQ-NEXT: kmovw %esi, %k2
3171 ; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
3172 ; AVX512DQ-NEXT: kshiftrw $14, %k1, %k1
3173 ; AVX512DQ-NEXT: kxorw %k1, %k2, %k2
3174 ; AVX512DQ-NEXT: kshiftrw $2, %k2, %k3
3175 ; AVX512DQ-NEXT: kxorw %k0, %k3, %k0
3176 ; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0
3177 ; AVX512DQ-NEXT: kshiftrw $13, %k0, %k0
3178 ; AVX512DQ-NEXT: kxorw %k0, %k2, %k0
3179 ; AVX512DQ-NEXT: kshiftrw $3, %k0, %k2
3180 ; AVX512DQ-NEXT: kmovw %r8d, %k3
3181 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3182 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
3183 ; AVX512DQ-NEXT: kshiftrw $12, %k2, %k2
3184 ; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
3185 ; AVX512DQ-NEXT: kshiftrw $4, %k0, %k2
3186 ; AVX512DQ-NEXT: kmovw %r9d, %k3
3187 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3188 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
3189 ; AVX512DQ-NEXT: kshiftrw $11, %k2, %k2
3190 ; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
3191 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k2
3192 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3193 ; AVX512DQ-NEXT: kmovw %eax, %k3
3194 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3195 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
3196 ; AVX512DQ-NEXT: kshiftrw $10, %k2, %k2
3197 ; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
3198 ; AVX512DQ-NEXT: kshiftrw $6, %k0, %k2
3199 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3200 ; AVX512DQ-NEXT: kmovw %eax, %k3
3201 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3202 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
3203 ; AVX512DQ-NEXT: kshiftrw $9, %k2, %k2
3204 ; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
3205 ; AVX512DQ-NEXT: kshiftrw $7, %k0, %k2
3206 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3207 ; AVX512DQ-NEXT: kmovw %eax, %k3
3208 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3209 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
3210 ; AVX512DQ-NEXT: kshiftrw $8, %k2, %k2
3211 ; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
3212 ; AVX512DQ-NEXT: kshiftrw $8, %k0, %k2
3213 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3214 ; AVX512DQ-NEXT: kmovw %eax, %k3
3215 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3216 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
3217 ; AVX512DQ-NEXT: kshiftrw $7, %k2, %k2
3218 ; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
3219 ; AVX512DQ-NEXT: kshiftrw $9, %k0, %k2
3220 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3221 ; AVX512DQ-NEXT: kmovw %eax, %k3
3222 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3223 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
3224 ; AVX512DQ-NEXT: kshiftrw $6, %k2, %k2
3225 ; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
3226 ; AVX512DQ-NEXT: kshiftrw $10, %k0, %k2
3227 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3228 ; AVX512DQ-NEXT: kmovw %eax, %k3
3229 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3230 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
3231 ; AVX512DQ-NEXT: kshiftrw $5, %k2, %k2
3232 ; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
3233 ; AVX512DQ-NEXT: kshiftrw $11, %k0, %k2
3234 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3235 ; AVX512DQ-NEXT: kmovw %eax, %k3
3236 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3237 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
3238 ; AVX512DQ-NEXT: kshiftrw $4, %k2, %k2
3239 ; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
3240 ; AVX512DQ-NEXT: kshiftrw $12, %k0, %k2
3241 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3242 ; AVX512DQ-NEXT: kmovw %eax, %k3
3243 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3244 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
3245 ; AVX512DQ-NEXT: kshiftrw $3, %k2, %k2
3246 ; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
3247 ; AVX512DQ-NEXT: kshiftrw $13, %k0, %k2
3248 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3249 ; AVX512DQ-NEXT: kmovw %eax, %k3
3250 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3251 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
3252 ; AVX512DQ-NEXT: kshiftrw $2, %k2, %k2
3253 ; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
3254 ; AVX512DQ-NEXT: kshiftrw $14, %k0, %k2
3255 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3256 ; AVX512DQ-NEXT: kmovw %eax, %k3
3257 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3258 ; AVX512DQ-NEXT: kshiftlw $14, %k2, %k2
3259 ; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
3260 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
3261 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
3262 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3263 ; AVX512DQ-NEXT: kmovw %eax, %k2
3264 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
3265 ; AVX512DQ-NEXT: korw %k2, %k0, %k0
3266 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3267 ; AVX512DQ-NEXT: kmovw %eax, %k2
3268 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3269 ; AVX512DQ-NEXT: kmovw %eax, %k3
3270 ; AVX512DQ-NEXT: kxorw %k1, %k3, %k3
3271 ; AVX512DQ-NEXT: kshiftrw $2, %k3, %k4
3272 ; AVX512DQ-NEXT: kxorw %k2, %k4, %k2
3273 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
3274 ; AVX512DQ-NEXT: kshiftrw $13, %k2, %k2
3275 ; AVX512DQ-NEXT: kxorw %k2, %k3, %k2
3276 ; AVX512DQ-NEXT: kshiftrw $3, %k2, %k3
3277 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3278 ; AVX512DQ-NEXT: kmovw %eax, %k4
3279 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3280 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
3281 ; AVX512DQ-NEXT: kshiftrw $12, %k3, %k3
3282 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3283 ; AVX512DQ-NEXT: kshiftrw $4, %k2, %k3
3284 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3285 ; AVX512DQ-NEXT: kmovw %eax, %k4
3286 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3287 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
3288 ; AVX512DQ-NEXT: kshiftrw $11, %k3, %k3
3289 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3290 ; AVX512DQ-NEXT: kshiftrw $5, %k2, %k3
3291 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3292 ; AVX512DQ-NEXT: kmovw %eax, %k4
3293 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3294 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
3295 ; AVX512DQ-NEXT: kshiftrw $10, %k3, %k3
3296 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3297 ; AVX512DQ-NEXT: kshiftrw $6, %k2, %k3
3298 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3299 ; AVX512DQ-NEXT: kmovw %eax, %k4
3300 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3301 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
3302 ; AVX512DQ-NEXT: kshiftrw $9, %k3, %k3
3303 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3304 ; AVX512DQ-NEXT: kshiftrw $7, %k2, %k3
3305 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3306 ; AVX512DQ-NEXT: kmovw %eax, %k4
3307 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3308 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
3309 ; AVX512DQ-NEXT: kshiftrw $8, %k3, %k3
3310 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3311 ; AVX512DQ-NEXT: kshiftrw $8, %k2, %k3
3312 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3313 ; AVX512DQ-NEXT: kmovw %eax, %k4
3314 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3315 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
3316 ; AVX512DQ-NEXT: kshiftrw $7, %k3, %k3
3317 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3318 ; AVX512DQ-NEXT: kshiftrw $9, %k2, %k3
3319 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3320 ; AVX512DQ-NEXT: kmovw %eax, %k4
3321 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3322 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
3323 ; AVX512DQ-NEXT: kshiftrw $6, %k3, %k3
3324 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3325 ; AVX512DQ-NEXT: kshiftrw $10, %k2, %k3
3326 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3327 ; AVX512DQ-NEXT: kmovw %eax, %k4
3328 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3329 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
3330 ; AVX512DQ-NEXT: kshiftrw $5, %k3, %k3
3331 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3332 ; AVX512DQ-NEXT: kshiftrw $11, %k2, %k3
3333 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3334 ; AVX512DQ-NEXT: kmovw %eax, %k4
3335 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3336 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
3337 ; AVX512DQ-NEXT: kshiftrw $4, %k3, %k3
3338 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3339 ; AVX512DQ-NEXT: kshiftrw $12, %k2, %k3
3340 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3341 ; AVX512DQ-NEXT: kmovw %eax, %k4
3342 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3343 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
3344 ; AVX512DQ-NEXT: kshiftrw $3, %k3, %k3
3345 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3346 ; AVX512DQ-NEXT: kshiftrw $13, %k2, %k3
3347 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3348 ; AVX512DQ-NEXT: kmovw %eax, %k4
3349 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3350 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
3351 ; AVX512DQ-NEXT: kshiftrw $2, %k3, %k3
3352 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3353 ; AVX512DQ-NEXT: kshiftrw $14, %k2, %k3
3354 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3355 ; AVX512DQ-NEXT: kmovw %eax, %k4
3356 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3357 ; AVX512DQ-NEXT: kshiftlw $14, %k3, %k3
3358 ; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
3359 ; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2
3360 ; AVX512DQ-NEXT: kshiftrw $1, %k2, %k2
3361 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3362 ; AVX512DQ-NEXT: kmovw %eax, %k3
3363 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
3364 ; AVX512DQ-NEXT: korw %k3, %k2, %k2
3365 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3366 ; AVX512DQ-NEXT: kmovw %eax, %k3
3367 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3368 ; AVX512DQ-NEXT: kmovw %eax, %k4
3369 ; AVX512DQ-NEXT: kxorw %k1, %k4, %k4
3370 ; AVX512DQ-NEXT: kshiftrw $2, %k4, %k5
3371 ; AVX512DQ-NEXT: kxorw %k3, %k5, %k3
3372 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
3373 ; AVX512DQ-NEXT: kshiftrw $13, %k3, %k3
3374 ; AVX512DQ-NEXT: kxorw %k3, %k4, %k3
3375 ; AVX512DQ-NEXT: kshiftrw $3, %k3, %k4
3376 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3377 ; AVX512DQ-NEXT: kmovw %eax, %k5
3378 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3379 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3380 ; AVX512DQ-NEXT: kshiftrw $12, %k4, %k4
3381 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3382 ; AVX512DQ-NEXT: kshiftrw $4, %k3, %k4
3383 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3384 ; AVX512DQ-NEXT: kmovw %eax, %k5
3385 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3386 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3387 ; AVX512DQ-NEXT: kshiftrw $11, %k4, %k4
3388 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3389 ; AVX512DQ-NEXT: kshiftrw $5, %k3, %k4
3390 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3391 ; AVX512DQ-NEXT: kmovw %eax, %k5
3392 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3393 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3394 ; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
3395 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3396 ; AVX512DQ-NEXT: kshiftrw $6, %k3, %k4
3397 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3398 ; AVX512DQ-NEXT: kmovw %eax, %k5
3399 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3400 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3401 ; AVX512DQ-NEXT: kshiftrw $9, %k4, %k4
3402 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3403 ; AVX512DQ-NEXT: kshiftrw $7, %k3, %k4
3404 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3405 ; AVX512DQ-NEXT: kmovw %eax, %k5
3406 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3407 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3408 ; AVX512DQ-NEXT: kshiftrw $8, %k4, %k4
3409 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3410 ; AVX512DQ-NEXT: kshiftrw $8, %k3, %k4
3411 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3412 ; AVX512DQ-NEXT: kmovw %eax, %k5
3413 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3414 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3415 ; AVX512DQ-NEXT: kshiftrw $7, %k4, %k4
3416 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3417 ; AVX512DQ-NEXT: kshiftrw $9, %k3, %k4
3418 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3419 ; AVX512DQ-NEXT: kmovw %eax, %k5
3420 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3421 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3422 ; AVX512DQ-NEXT: kshiftrw $6, %k4, %k4
3423 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3424 ; AVX512DQ-NEXT: kshiftrw $10, %k3, %k4
3425 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3426 ; AVX512DQ-NEXT: kmovw %eax, %k5
3427 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3428 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3429 ; AVX512DQ-NEXT: kshiftrw $5, %k4, %k4
3430 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3431 ; AVX512DQ-NEXT: kshiftrw $11, %k3, %k4
3432 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3433 ; AVX512DQ-NEXT: kmovw %eax, %k5
3434 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3435 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3436 ; AVX512DQ-NEXT: kshiftrw $4, %k4, %k4
3437 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3438 ; AVX512DQ-NEXT: kshiftrw $12, %k3, %k4
3439 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3440 ; AVX512DQ-NEXT: kmovw %eax, %k5
3441 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3442 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3443 ; AVX512DQ-NEXT: kshiftrw $3, %k4, %k4
3444 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3445 ; AVX512DQ-NEXT: kshiftrw $13, %k3, %k4
3446 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3447 ; AVX512DQ-NEXT: kmovw %eax, %k5
3448 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3449 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3450 ; AVX512DQ-NEXT: kshiftrw $2, %k4, %k4
3451 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3452 ; AVX512DQ-NEXT: kshiftrw $14, %k3, %k4
3453 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3454 ; AVX512DQ-NEXT: kmovw %eax, %k5
3455 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3456 ; AVX512DQ-NEXT: kshiftlw $14, %k4, %k4
3457 ; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
3458 ; AVX512DQ-NEXT: kshiftlw $1, %k3, %k3
3459 ; AVX512DQ-NEXT: kshiftrw $1, %k3, %k3
3460 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3461 ; AVX512DQ-NEXT: kmovw %eax, %k4
3462 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3463 ; AVX512DQ-NEXT: korw %k4, %k3, %k3
3464 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3465 ; AVX512DQ-NEXT: kmovw %eax, %k4
3466 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3467 ; AVX512DQ-NEXT: kmovw %eax, %k5
3468 ; AVX512DQ-NEXT: kxorw %k1, %k5, %k1
3469 ; AVX512DQ-NEXT: kshiftrw $2, %k1, %k5
3470 ; AVX512DQ-NEXT: kxorw %k4, %k5, %k4
3471 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3472 ; AVX512DQ-NEXT: kshiftrw $13, %k4, %k4
3473 ; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
3474 ; AVX512DQ-NEXT: kshiftrw $3, %k1, %k4
3475 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3476 ; AVX512DQ-NEXT: kmovw %eax, %k5
3477 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3478 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3479 ; AVX512DQ-NEXT: kshiftrw $12, %k4, %k4
3480 ; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
3481 ; AVX512DQ-NEXT: kshiftrw $4, %k1, %k4
3482 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3483 ; AVX512DQ-NEXT: kmovw %eax, %k5
3484 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3485 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3486 ; AVX512DQ-NEXT: kshiftrw $11, %k4, %k4
3487 ; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
3488 ; AVX512DQ-NEXT: kshiftrw $5, %k1, %k4
3489 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3490 ; AVX512DQ-NEXT: kmovw %eax, %k5
3491 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3492 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3493 ; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
3494 ; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
3495 ; AVX512DQ-NEXT: kshiftrw $6, %k1, %k4
3496 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3497 ; AVX512DQ-NEXT: kmovw %eax, %k5
3498 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3499 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3500 ; AVX512DQ-NEXT: kshiftrw $9, %k4, %k4
3501 ; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
3502 ; AVX512DQ-NEXT: kshiftrw $7, %k1, %k4
3503 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3504 ; AVX512DQ-NEXT: kmovw %eax, %k5
3505 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3506 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3507 ; AVX512DQ-NEXT: kshiftrw $8, %k4, %k4
3508 ; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
3509 ; AVX512DQ-NEXT: kshiftrw $8, %k1, %k4
3510 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3511 ; AVX512DQ-NEXT: kmovw %eax, %k5
3512 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3513 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3514 ; AVX512DQ-NEXT: kshiftrw $7, %k4, %k4
3515 ; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
3516 ; AVX512DQ-NEXT: kshiftrw $9, %k1, %k4
3517 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3518 ; AVX512DQ-NEXT: kmovw %eax, %k5
3519 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3520 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3521 ; AVX512DQ-NEXT: kshiftrw $6, %k4, %k4
3522 ; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
3523 ; AVX512DQ-NEXT: kshiftrw $10, %k1, %k4
3524 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3525 ; AVX512DQ-NEXT: kmovw %eax, %k5
3526 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3527 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3528 ; AVX512DQ-NEXT: kshiftrw $5, %k4, %k4
3529 ; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
3530 ; AVX512DQ-NEXT: kshiftrw $11, %k1, %k4
3531 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3532 ; AVX512DQ-NEXT: kmovw %eax, %k5
3533 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3534 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3535 ; AVX512DQ-NEXT: kshiftrw $4, %k4, %k4
3536 ; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
3537 ; AVX512DQ-NEXT: kshiftrw $12, %k1, %k4
3538 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3539 ; AVX512DQ-NEXT: kmovw %eax, %k5
3540 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3541 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3542 ; AVX512DQ-NEXT: kshiftrw $3, %k4, %k4
3543 ; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
3544 ; AVX512DQ-NEXT: kshiftrw $13, %k1, %k4
3545 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3546 ; AVX512DQ-NEXT: kmovw %eax, %k5
3547 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3548 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3549 ; AVX512DQ-NEXT: kshiftrw $2, %k4, %k4
3550 ; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
3551 ; AVX512DQ-NEXT: kshiftrw $14, %k1, %k4
3552 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3553 ; AVX512DQ-NEXT: kmovw %eax, %k5
3554 ; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
3555 ; AVX512DQ-NEXT: kshiftlw $14, %k4, %k4
3556 ; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
3557 ; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
3558 ; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
3559 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
3560 ; AVX512DQ-NEXT: kmovw %eax, %k4
3561 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3562 ; AVX512DQ-NEXT: korw %k4, %k1, %k1
3563 ; AVX512DQ-NEXT: kmovw %k1, 6(%rdi)
3564 ; AVX512DQ-NEXT: kmovw %k3, 4(%rdi)
3565 ; AVX512DQ-NEXT: kmovw %k2, 2(%rdi)
3566 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
3567 ; AVX512DQ-NEXT: retq
3569 ; X86-LABEL: store_64i1:
3571 ; X86-NEXT: vpsllw $7, %zmm0, %zmm0
3572 ; X86-NEXT: vpmovb2m %zmm0, %k0
3573 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
3574 ; X86-NEXT: kmovq %k0, (%eax)
3575 ; X86-NEXT: vzeroupper
3577 store <64 x i1> %v, <64 x i1>* %a
3581 define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
3582 ; KNL-LABEL: test_bitcast_v8i1_zext:
3584 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
3585 ; KNL-NEXT: kmovw %k0, %eax
3586 ; KNL-NEXT: movzbl %al, %eax
3587 ; KNL-NEXT: addl %eax, %eax
3588 ; KNL-NEXT: vzeroupper
3591 ; SKX-LABEL: test_bitcast_v8i1_zext:
3593 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
3594 ; SKX-NEXT: kmovb %k0, %eax
3595 ; SKX-NEXT: addl %eax, %eax
3596 ; SKX-NEXT: vzeroupper
3599 ; AVX512BW-LABEL: test_bitcast_v8i1_zext:
3600 ; AVX512BW: ## %bb.0:
3601 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
3602 ; AVX512BW-NEXT: kmovd %k0, %eax
3603 ; AVX512BW-NEXT: movzbl %al, %eax
3604 ; AVX512BW-NEXT: addl %eax, %eax
3605 ; AVX512BW-NEXT: vzeroupper
3606 ; AVX512BW-NEXT: retq
3608 ; AVX512DQ-LABEL: test_bitcast_v8i1_zext:
3609 ; AVX512DQ: ## %bb.0:
3610 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
3611 ; AVX512DQ-NEXT: kmovb %k0, %eax
3612 ; AVX512DQ-NEXT: addl %eax, %eax
3613 ; AVX512DQ-NEXT: vzeroupper
3614 ; AVX512DQ-NEXT: retq
3616 ; X86-LABEL: test_bitcast_v8i1_zext:
3618 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
3619 ; X86-NEXT: kmovb %k0, %eax
3620 ; X86-NEXT: addl %eax, %eax
3621 ; X86-NEXT: vzeroupper
3623 %v1 = icmp eq <16 x i32> %a, zeroinitializer
3624 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3625 %mask1 = bitcast <8 x i1> %mask to i8
3626 %val = zext i8 %mask1 to i32
3627 %val1 = add i32 %val, %val
3631 define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
3632 ; CHECK-LABEL: test_bitcast_v16i1_zext:
3634 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
3635 ; CHECK-NEXT: kmovw %k0, %eax
3636 ; CHECK-NEXT: addl %eax, %eax
3637 ; CHECK-NEXT: vzeroupper
3640 ; X86-LABEL: test_bitcast_v16i1_zext:
3642 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
3643 ; X86-NEXT: kmovw %k0, %eax
3644 ; X86-NEXT: addl %eax, %eax
3645 ; X86-NEXT: vzeroupper
3647 %v1 = icmp eq <16 x i32> %a, zeroinitializer
3648 %mask1 = bitcast <16 x i1> %v1 to i16
3649 %val = zext i16 %mask1 to i32
3650 %val1 = add i32 %val, %val
3654 define i16 @test_v16i1_add(i16 %x, i16 %y) {
3655 ; KNL-LABEL: test_v16i1_add:
3657 ; KNL-NEXT: kmovw %edi, %k0
3658 ; KNL-NEXT: kmovw %esi, %k1
3659 ; KNL-NEXT: kxorw %k1, %k0, %k0
3660 ; KNL-NEXT: kmovw %k0, %eax
3661 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
3664 ; SKX-LABEL: test_v16i1_add:
3666 ; SKX-NEXT: kmovd %edi, %k0
3667 ; SKX-NEXT: kmovd %esi, %k1
3668 ; SKX-NEXT: kxorw %k1, %k0, %k0
3669 ; SKX-NEXT: kmovd %k0, %eax
3670 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
3673 ; AVX512BW-LABEL: test_v16i1_add:
3674 ; AVX512BW: ## %bb.0:
3675 ; AVX512BW-NEXT: kmovd %edi, %k0
3676 ; AVX512BW-NEXT: kmovd %esi, %k1
3677 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
3678 ; AVX512BW-NEXT: kmovd %k0, %eax
3679 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
3680 ; AVX512BW-NEXT: retq
3682 ; AVX512DQ-LABEL: test_v16i1_add:
3683 ; AVX512DQ: ## %bb.0:
3684 ; AVX512DQ-NEXT: kmovw %edi, %k0
3685 ; AVX512DQ-NEXT: kmovw %esi, %k1
3686 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
3687 ; AVX512DQ-NEXT: kmovw %k0, %eax
3688 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
3689 ; AVX512DQ-NEXT: retq
3691 ; X86-LABEL: test_v16i1_add:
3693 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
3694 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
3695 ; X86-NEXT: kxorw %k1, %k0, %k0
3696 ; X86-NEXT: kmovd %k0, %eax
3697 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
3699 %m0 = bitcast i16 %x to <16 x i1>
3700 %m1 = bitcast i16 %y to <16 x i1>
3701 %m2 = add <16 x i1> %m0, %m1
3702 %ret = bitcast <16 x i1> %m2 to i16
3706 define i16 @test_v16i1_sub(i16 %x, i16 %y) {
3707 ; KNL-LABEL: test_v16i1_sub:
3709 ; KNL-NEXT: kmovw %edi, %k0
3710 ; KNL-NEXT: kmovw %esi, %k1
3711 ; KNL-NEXT: kxorw %k1, %k0, %k0
3712 ; KNL-NEXT: kmovw %k0, %eax
3713 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
3716 ; SKX-LABEL: test_v16i1_sub:
3718 ; SKX-NEXT: kmovd %edi, %k0
3719 ; SKX-NEXT: kmovd %esi, %k1
3720 ; SKX-NEXT: kxorw %k1, %k0, %k0
3721 ; SKX-NEXT: kmovd %k0, %eax
3722 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
3725 ; AVX512BW-LABEL: test_v16i1_sub:
3726 ; AVX512BW: ## %bb.0:
3727 ; AVX512BW-NEXT: kmovd %edi, %k0
3728 ; AVX512BW-NEXT: kmovd %esi, %k1
3729 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
3730 ; AVX512BW-NEXT: kmovd %k0, %eax
3731 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
3732 ; AVX512BW-NEXT: retq
3734 ; AVX512DQ-LABEL: test_v16i1_sub:
3735 ; AVX512DQ: ## %bb.0:
3736 ; AVX512DQ-NEXT: kmovw %edi, %k0
3737 ; AVX512DQ-NEXT: kmovw %esi, %k1
3738 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
3739 ; AVX512DQ-NEXT: kmovw %k0, %eax
3740 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
3741 ; AVX512DQ-NEXT: retq
3743 ; X86-LABEL: test_v16i1_sub:
3745 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
3746 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
3747 ; X86-NEXT: kxorw %k1, %k0, %k0
3748 ; X86-NEXT: kmovd %k0, %eax
3749 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
3751 %m0 = bitcast i16 %x to <16 x i1>
3752 %m1 = bitcast i16 %y to <16 x i1>
3753 %m2 = sub <16 x i1> %m0, %m1
3754 %ret = bitcast <16 x i1> %m2 to i16
3758 define i16 @test_v16i1_mul(i16 %x, i16 %y) {
3759 ; KNL-LABEL: test_v16i1_mul:
3761 ; KNL-NEXT: kmovw %edi, %k0
3762 ; KNL-NEXT: kmovw %esi, %k1
3763 ; KNL-NEXT: kandw %k1, %k0, %k0
3764 ; KNL-NEXT: kmovw %k0, %eax
3765 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
3768 ; SKX-LABEL: test_v16i1_mul:
3770 ; SKX-NEXT: kmovd %edi, %k0
3771 ; SKX-NEXT: kmovd %esi, %k1
3772 ; SKX-NEXT: kandw %k1, %k0, %k0
3773 ; SKX-NEXT: kmovd %k0, %eax
3774 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
3777 ; AVX512BW-LABEL: test_v16i1_mul:
3778 ; AVX512BW: ## %bb.0:
3779 ; AVX512BW-NEXT: kmovd %edi, %k0
3780 ; AVX512BW-NEXT: kmovd %esi, %k1
3781 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3782 ; AVX512BW-NEXT: kmovd %k0, %eax
3783 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
3784 ; AVX512BW-NEXT: retq
3786 ; AVX512DQ-LABEL: test_v16i1_mul:
3787 ; AVX512DQ: ## %bb.0:
3788 ; AVX512DQ-NEXT: kmovw %edi, %k0
3789 ; AVX512DQ-NEXT: kmovw %esi, %k1
3790 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3791 ; AVX512DQ-NEXT: kmovw %k0, %eax
3792 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
3793 ; AVX512DQ-NEXT: retq
3795 ; X86-LABEL: test_v16i1_mul:
3797 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
3798 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
3799 ; X86-NEXT: kandw %k1, %k0, %k0
3800 ; X86-NEXT: kmovd %k0, %eax
3801 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
3803 %m0 = bitcast i16 %x to <16 x i1>
3804 %m1 = bitcast i16 %y to <16 x i1>
3805 %m2 = mul <16 x i1> %m0, %m1
3806 %ret = bitcast <16 x i1> %m2 to i16
3810 define i8 @test_v8i1_add(i8 %x, i8 %y) {
3811 ; KNL-LABEL: test_v8i1_add:
3813 ; KNL-NEXT: kmovw %edi, %k0
3814 ; KNL-NEXT: kmovw %esi, %k1
3815 ; KNL-NEXT: kxorw %k1, %k0, %k0
3816 ; KNL-NEXT: kmovw %k0, %eax
3817 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
3820 ; SKX-LABEL: test_v8i1_add:
3822 ; SKX-NEXT: kmovd %edi, %k0
3823 ; SKX-NEXT: kmovd %esi, %k1
3824 ; SKX-NEXT: kxorb %k1, %k0, %k0
3825 ; SKX-NEXT: kmovd %k0, %eax
3826 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
3829 ; AVX512BW-LABEL: test_v8i1_add:
3830 ; AVX512BW: ## %bb.0:
3831 ; AVX512BW-NEXT: kmovd %edi, %k0
3832 ; AVX512BW-NEXT: kmovd %esi, %k1
3833 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
3834 ; AVX512BW-NEXT: kmovd %k0, %eax
3835 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
3836 ; AVX512BW-NEXT: retq
3838 ; AVX512DQ-LABEL: test_v8i1_add:
3839 ; AVX512DQ: ## %bb.0:
3840 ; AVX512DQ-NEXT: kmovw %edi, %k0
3841 ; AVX512DQ-NEXT: kmovw %esi, %k1
3842 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
3843 ; AVX512DQ-NEXT: kmovw %k0, %eax
3844 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
3845 ; AVX512DQ-NEXT: retq
3847 ; X86-LABEL: test_v8i1_add:
3849 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
3850 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
3851 ; X86-NEXT: kxorb %k1, %k0, %k0
3852 ; X86-NEXT: kmovd %k0, %eax
3853 ; X86-NEXT: ## kill: def $al killed $al killed $eax
3855 %m0 = bitcast i8 %x to <8 x i1>
3856 %m1 = bitcast i8 %y to <8 x i1>
3857 %m2 = add <8 x i1> %m0, %m1
3858 %ret = bitcast <8 x i1> %m2 to i8
3862 define i8 @test_v8i1_sub(i8 %x, i8 %y) {
3863 ; KNL-LABEL: test_v8i1_sub:
3865 ; KNL-NEXT: kmovw %edi, %k0
3866 ; KNL-NEXT: kmovw %esi, %k1
3867 ; KNL-NEXT: kxorw %k1, %k0, %k0
3868 ; KNL-NEXT: kmovw %k0, %eax
3869 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
3872 ; SKX-LABEL: test_v8i1_sub:
3874 ; SKX-NEXT: kmovd %edi, %k0
3875 ; SKX-NEXT: kmovd %esi, %k1
3876 ; SKX-NEXT: kxorb %k1, %k0, %k0
3877 ; SKX-NEXT: kmovd %k0, %eax
3878 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
3881 ; AVX512BW-LABEL: test_v8i1_sub:
3882 ; AVX512BW: ## %bb.0:
3883 ; AVX512BW-NEXT: kmovd %edi, %k0
3884 ; AVX512BW-NEXT: kmovd %esi, %k1
3885 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
3886 ; AVX512BW-NEXT: kmovd %k0, %eax
3887 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
3888 ; AVX512BW-NEXT: retq
3890 ; AVX512DQ-LABEL: test_v8i1_sub:
3891 ; AVX512DQ: ## %bb.0:
3892 ; AVX512DQ-NEXT: kmovw %edi, %k0
3893 ; AVX512DQ-NEXT: kmovw %esi, %k1
3894 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
3895 ; AVX512DQ-NEXT: kmovw %k0, %eax
3896 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
3897 ; AVX512DQ-NEXT: retq
3899 ; X86-LABEL: test_v8i1_sub:
3901 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
3902 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
3903 ; X86-NEXT: kxorb %k1, %k0, %k0
3904 ; X86-NEXT: kmovd %k0, %eax
3905 ; X86-NEXT: ## kill: def $al killed $al killed $eax
3907 %m0 = bitcast i8 %x to <8 x i1>
3908 %m1 = bitcast i8 %y to <8 x i1>
3909 %m2 = sub <8 x i1> %m0, %m1
3910 %ret = bitcast <8 x i1> %m2 to i8
3914 define i8 @test_v8i1_mul(i8 %x, i8 %y) {
3915 ; KNL-LABEL: test_v8i1_mul:
3917 ; KNL-NEXT: kmovw %edi, %k0
3918 ; KNL-NEXT: kmovw %esi, %k1
3919 ; KNL-NEXT: kandw %k1, %k0, %k0
3920 ; KNL-NEXT: kmovw %k0, %eax
3921 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
3924 ; SKX-LABEL: test_v8i1_mul:
3926 ; SKX-NEXT: kmovd %edi, %k0
3927 ; SKX-NEXT: kmovd %esi, %k1
3928 ; SKX-NEXT: kandb %k1, %k0, %k0
3929 ; SKX-NEXT: kmovd %k0, %eax
3930 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
3933 ; AVX512BW-LABEL: test_v8i1_mul:
3934 ; AVX512BW: ## %bb.0:
3935 ; AVX512BW-NEXT: kmovd %edi, %k0
3936 ; AVX512BW-NEXT: kmovd %esi, %k1
3937 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3938 ; AVX512BW-NEXT: kmovd %k0, %eax
3939 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
3940 ; AVX512BW-NEXT: retq
3942 ; AVX512DQ-LABEL: test_v8i1_mul:
3943 ; AVX512DQ: ## %bb.0:
3944 ; AVX512DQ-NEXT: kmovw %edi, %k0
3945 ; AVX512DQ-NEXT: kmovw %esi, %k1
3946 ; AVX512DQ-NEXT: kandb %k1, %k0, %k0
3947 ; AVX512DQ-NEXT: kmovw %k0, %eax
3948 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
3949 ; AVX512DQ-NEXT: retq
3951 ; X86-LABEL: test_v8i1_mul:
3953 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
3954 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
3955 ; X86-NEXT: kandb %k1, %k0, %k0
3956 ; X86-NEXT: kmovd %k0, %eax
3957 ; X86-NEXT: ## kill: def $al killed $al killed $eax
3959 %m0 = bitcast i8 %x to <8 x i1>
3960 %m1 = bitcast i8 %y to <8 x i1>
3961 %m2 = mul <8 x i1> %m0, %m1
3962 %ret = bitcast <8 x i1> %m2 to i8
3966 ; Make sure we don't emit a ktest for signed comparisons.
3967 define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
3968 ; KNL-LABEL: ktest_signed:
3970 ; KNL-NEXT: pushq %rax
3971 ; KNL-NEXT: .cfi_def_cfa_offset 16
3972 ; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0
3973 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
3974 ; KNL-NEXT: kmovw %k0, %eax
3975 ; KNL-NEXT: testw %ax, %ax
3976 ; KNL-NEXT: jle LBB65_1
3977 ; KNL-NEXT: ## %bb.2: ## %bb.2
3978 ; KNL-NEXT: popq %rax
3979 ; KNL-NEXT: vzeroupper
3981 ; KNL-NEXT: LBB65_1: ## %bb.1
3982 ; KNL-NEXT: vzeroupper
3983 ; KNL-NEXT: callq _foo
3984 ; KNL-NEXT: popq %rax
3987 ; SKX-LABEL: ktest_signed:
3989 ; SKX-NEXT: pushq %rax
3990 ; SKX-NEXT: .cfi_def_cfa_offset 16
3991 ; SKX-NEXT: vpord %zmm1, %zmm0, %zmm0
3992 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
3993 ; SKX-NEXT: kmovd %k0, %eax
3994 ; SKX-NEXT: testw %ax, %ax
3995 ; SKX-NEXT: jle LBB65_1
3996 ; SKX-NEXT: ## %bb.2: ## %bb.2
3997 ; SKX-NEXT: popq %rax
3998 ; SKX-NEXT: vzeroupper
4000 ; SKX-NEXT: LBB65_1: ## %bb.1
4001 ; SKX-NEXT: vzeroupper
4002 ; SKX-NEXT: callq _foo
4003 ; SKX-NEXT: popq %rax
4006 ; AVX512BW-LABEL: ktest_signed:
4007 ; AVX512BW: ## %bb.0:
4008 ; AVX512BW-NEXT: pushq %rax
4009 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4010 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
4011 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
4012 ; AVX512BW-NEXT: kmovd %k0, %eax
4013 ; AVX512BW-NEXT: testw %ax, %ax
4014 ; AVX512BW-NEXT: jle LBB65_1
4015 ; AVX512BW-NEXT: ## %bb.2: ## %bb.2
4016 ; AVX512BW-NEXT: popq %rax
4017 ; AVX512BW-NEXT: vzeroupper
4018 ; AVX512BW-NEXT: retq
4019 ; AVX512BW-NEXT: LBB65_1: ## %bb.1
4020 ; AVX512BW-NEXT: vzeroupper
4021 ; AVX512BW-NEXT: callq _foo
4022 ; AVX512BW-NEXT: popq %rax
4023 ; AVX512BW-NEXT: retq
4025 ; AVX512DQ-LABEL: ktest_signed:
4026 ; AVX512DQ: ## %bb.0:
4027 ; AVX512DQ-NEXT: pushq %rax
4028 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4029 ; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
4030 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
4031 ; AVX512DQ-NEXT: kmovw %k0, %eax
4032 ; AVX512DQ-NEXT: testw %ax, %ax
4033 ; AVX512DQ-NEXT: jle LBB65_1
4034 ; AVX512DQ-NEXT: ## %bb.2: ## %bb.2
4035 ; AVX512DQ-NEXT: popq %rax
4036 ; AVX512DQ-NEXT: vzeroupper
4037 ; AVX512DQ-NEXT: retq
4038 ; AVX512DQ-NEXT: LBB65_1: ## %bb.1
4039 ; AVX512DQ-NEXT: vzeroupper
4040 ; AVX512DQ-NEXT: callq _foo
4041 ; AVX512DQ-NEXT: popq %rax
4042 ; AVX512DQ-NEXT: retq
4044 ; X86-LABEL: ktest_signed:
4046 ; X86-NEXT: subl $12, %esp
4047 ; X86-NEXT: .cfi_def_cfa_offset 16
4048 ; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
4049 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
4050 ; X86-NEXT: kmovd %k0, %eax
4051 ; X86-NEXT: testw %ax, %ax
4052 ; X86-NEXT: jle LBB65_1
4053 ; X86-NEXT: ## %bb.2: ## %bb.2
4054 ; X86-NEXT: addl $12, %esp
4055 ; X86-NEXT: vzeroupper
4057 ; X86-NEXT: LBB65_1: ## %bb.1
4058 ; X86-NEXT: vzeroupper
4059 ; X86-NEXT: calll _foo
4060 ; X86-NEXT: addl $12, %esp
4062 %a = icmp eq <16 x i32> %x, zeroinitializer
4063 %b = icmp eq <16 x i32> %y, zeroinitializer
4064 %c = and <16 x i1> %a, %b
4065 %d = bitcast <16 x i1> %c to i16
4066 %e = icmp sgt i16 %d, 0
4067 br i1 %e, label %bb.2, label %bb.1
4076 ; Make sure we can use the C flag from kortest to check for all ones.
4077 define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
4078 ; CHECK-LABEL: ktest_allones:
4080 ; CHECK-NEXT: pushq %rax
4081 ; CHECK-NEXT: .cfi_def_cfa_offset 16
4082 ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
4083 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
4084 ; CHECK-NEXT: kortestw %k0, %k0
4085 ; CHECK-NEXT: jb LBB66_2
4086 ; CHECK-NEXT: ## %bb.1: ## %bb.1
4087 ; CHECK-NEXT: vzeroupper
4088 ; CHECK-NEXT: callq _foo
4089 ; CHECK-NEXT: LBB66_2: ## %bb.2
4090 ; CHECK-NEXT: popq %rax
4091 ; CHECK-NEXT: vzeroupper
4094 ; X86-LABEL: ktest_allones:
4096 ; X86-NEXT: subl $12, %esp
4097 ; X86-NEXT: .cfi_def_cfa_offset 16
4098 ; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
4099 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
4100 ; X86-NEXT: kortestw %k0, %k0
4101 ; X86-NEXT: jb LBB66_2
4102 ; X86-NEXT: ## %bb.1: ## %bb.1
4103 ; X86-NEXT: vzeroupper
4104 ; X86-NEXT: calll _foo
4105 ; X86-NEXT: LBB66_2: ## %bb.2
4106 ; X86-NEXT: addl $12, %esp
4107 ; X86-NEXT: vzeroupper
4109 %a = icmp eq <16 x i32> %x, zeroinitializer
4110 %b = icmp eq <16 x i32> %y, zeroinitializer
4111 %c = and <16 x i1> %a, %b
4112 %d = bitcast <16 x i1> %c to i16
4113 %e = icmp eq i16 %d, -1
4114 br i1 %e, label %bb.2, label %bb.1
4122 ; This is derived from an intrinsic test where v4i1 mask was created by _mm_cmp_epi32_mask, then it was passed to _mm512_mask_blend_epi32 which uses a v16i1 mask.
4123 ; The widening happens in the scalar domain between the intrinsics. The middle end optmized it to this.
4124 define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d, <8 x i64> %e, <8 x i64> %f) {
4125 ; KNL-LABEL: mask_widening:
4126 ; KNL: ## %bb.0: ## %entry
4127 ; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
4128 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
4129 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
4130 ; KNL-NEXT: kshiftlw $12, %k0, %k0
4131 ; KNL-NEXT: kshiftrw $12, %k0, %k1
4132 ; KNL-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
4135 ; SKX-LABEL: mask_widening:
4136 ; SKX: ## %bb.0: ## %entry
4137 ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
4138 ; SKX-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
4141 ; AVX512BW-LABEL: mask_widening:
4142 ; AVX512BW: ## %bb.0: ## %entry
4143 ; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
4144 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
4145 ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
4146 ; AVX512BW-NEXT: kshiftlw $12, %k0, %k0
4147 ; AVX512BW-NEXT: kshiftrw $12, %k0, %k1
4148 ; AVX512BW-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
4149 ; AVX512BW-NEXT: retq
4151 ; AVX512DQ-LABEL: mask_widening:
4152 ; AVX512DQ: ## %bb.0: ## %entry
4153 ; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
4154 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
4155 ; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
4156 ; AVX512DQ-NEXT: kshiftlw $12, %k0, %k0
4157 ; AVX512DQ-NEXT: kshiftrw $12, %k0, %k1
4158 ; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
4159 ; AVX512DQ-NEXT: retq
4161 ; X86-LABEL: mask_widening:
4162 ; X86: ## %bb.0: ## %entry
4163 ; X86-NEXT: pushl %ebp
4164 ; X86-NEXT: .cfi_def_cfa_offset 8
4165 ; X86-NEXT: .cfi_offset %ebp, -8
4166 ; X86-NEXT: movl %esp, %ebp
4167 ; X86-NEXT: .cfi_def_cfa_register %ebp
4168 ; X86-NEXT: andl $-64, %esp
4169 ; X86-NEXT: subl $64, %esp
4170 ; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
4171 ; X86-NEXT: vmovdqa64 8(%ebp), %zmm0
4172 ; X86-NEXT: vmovdqa32 72(%ebp), %zmm0 {%k1}
4173 ; X86-NEXT: movl %ebp, %esp
4174 ; X86-NEXT: popl %ebp
4177 %0 = bitcast <2 x i64> %a to <4 x i32>
4178 %1 = bitcast <2 x i64> %b to <4 x i32>
4179 %2 = icmp eq <4 x i32> %0, %1
4180 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4181 %4 = bitcast <8 x i64> %f to <16 x i32>
4182 %5 = bitcast <8 x i64> %e to <16 x i32>
4183 %6 = shufflevector <8 x i1> %3, <8 x i1> <i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
4184 %7 = select <16 x i1> %6, <16 x i32> %4, <16 x i32> %5
4185 %8 = bitcast <16 x i32> %7 to <8 x i64>
4189 define void @store_v64i1_constant(<64 x i1>* %R) {
4190 ; CHECK-LABEL: store_v64i1_constant:
4191 ; CHECK: ## %bb.0: ## %entry
4192 ; CHECK-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
4193 ; CHECK-NEXT: movq %rax, (%rdi)
4196 ; X86-LABEL: store_v64i1_constant:
4197 ; X86: ## %bb.0: ## %entry
4198 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
4199 ; X86-NEXT: movl $-536871045, 4(%eax) ## imm = 0xDFFFFF7B
4200 ; X86-NEXT: movl $-4099, (%eax) ## imm = 0xEFFD
4203 store <64 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <64 x i1>* %R
4207 define void @store_v2i1_constant(<2 x i1>* %R) {
4208 ; CHECK-LABEL: store_v2i1_constant:
4209 ; CHECK: ## %bb.0: ## %entry
4210 ; CHECK-NEXT: movb $1, (%rdi)
4213 ; X86-LABEL: store_v2i1_constant:
4214 ; X86: ## %bb.0: ## %entry
4215 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
4216 ; X86-NEXT: movb $1, (%eax)
4219 store <2 x i1> <i1 1, i1 0>, <2 x i1>* %R
4223 define void @store_v4i1_constant(<4 x i1>* %R) {
4224 ; CHECK-LABEL: store_v4i1_constant:
4225 ; CHECK: ## %bb.0: ## %entry
4226 ; CHECK-NEXT: movb $5, (%rdi)
4229 ; X86-LABEL: store_v4i1_constant:
4230 ; X86: ## %bb.0: ## %entry
4231 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
4232 ; X86-NEXT: movb $5, (%eax)
4235 store <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i1>* %R
4239 ; Make sure we bring the -1 constant into the mask domain.
4240 define void @mask_not_cast(i8*, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
4241 ; CHECK-LABEL: mask_not_cast:
4243 ; CHECK-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
4244 ; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
4245 ; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
4246 ; CHECK-NEXT: vzeroupper
4249 ; X86-LABEL: mask_not_cast:
4251 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
4252 ; X86-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
4253 ; X86-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
4254 ; X86-NEXT: vmovdqu32 %zmm0, (%eax) {%k1}
4255 ; X86-NEXT: vzeroupper
4257 %6 = and <8 x i64> %2, %1
4258 %7 = bitcast <8 x i64> %6 to <16 x i32>
4259 %8 = icmp ne <16 x i32> %7, zeroinitializer
4260 %9 = bitcast <16 x i1> %8 to i16
4261 %10 = bitcast <8 x i64> %3 to <16 x i32>
4262 %11 = bitcast <8 x i64> %4 to <16 x i32>
4263 %12 = icmp ule <16 x i32> %10, %11
4264 %13 = bitcast <16 x i1> %12 to i16
4265 %14 = xor i16 %13, -1
4266 %15 = and i16 %14, %9
4267 %16 = bitcast <8 x i64> %1 to <16 x i32>
4268 %17 = bitcast i8* %0 to <16 x i32>*
4269 %18 = bitcast i16 %15 to <16 x i1>
4270 tail call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %16, <16 x i32>* %17, i32 1, <16 x i1> %18) #2
4273 declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
4275 define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
4276 ; KNL-LABEL: ktest_3:
4278 ; KNL-NEXT: pushq %rax
4279 ; KNL-NEXT: .cfi_def_cfa_offset 16
4280 ; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
4281 ; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
4282 ; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
4283 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
4284 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
4285 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1
4286 ; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k2
4287 ; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k3
4288 ; KNL-NEXT: korw %k1, %k0, %k0
4289 ; KNL-NEXT: korw %k3, %k2, %k1
4290 ; KNL-NEXT: kandw %k1, %k0, %k0
4291 ; KNL-NEXT: kmovw %k0, %eax
4292 ; KNL-NEXT: testb %al, %al
4293 ; KNL-NEXT: je LBB72_1
4294 ; KNL-NEXT: ## %bb.2: ## %exit
4295 ; KNL-NEXT: popq %rax
4296 ; KNL-NEXT: vzeroupper
4298 ; KNL-NEXT: LBB72_1: ## %bar
4299 ; KNL-NEXT: vzeroupper
4300 ; KNL-NEXT: callq _foo
4301 ; KNL-NEXT: popq %rax
4304 ; SKX-LABEL: ktest_3:
4306 ; SKX-NEXT: pushq %rax
4307 ; SKX-NEXT: .cfi_def_cfa_offset 16
4308 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k0
4309 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1
4310 ; SKX-NEXT: korb %k1, %k0, %k0
4311 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1
4312 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k2
4313 ; SKX-NEXT: korb %k2, %k1, %k1
4314 ; SKX-NEXT: ktestb %k1, %k0
4315 ; SKX-NEXT: je LBB72_1
4316 ; SKX-NEXT: ## %bb.2: ## %exit
4317 ; SKX-NEXT: popq %rax
4318 ; SKX-NEXT: vzeroupper
4320 ; SKX-NEXT: LBB72_1: ## %bar
4321 ; SKX-NEXT: vzeroupper
4322 ; SKX-NEXT: callq _foo
4323 ; SKX-NEXT: popq %rax
4326 ; AVX512BW-LABEL: ktest_3:
4327 ; AVX512BW: ## %bb.0:
4328 ; AVX512BW-NEXT: pushq %rax
4329 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4330 ; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
4331 ; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
4332 ; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
4333 ; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
4334 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
4335 ; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1
4336 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k2
4337 ; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k3
4338 ; AVX512BW-NEXT: korw %k1, %k0, %k0
4339 ; AVX512BW-NEXT: korw %k3, %k2, %k1
4340 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
4341 ; AVX512BW-NEXT: kmovd %k0, %eax
4342 ; AVX512BW-NEXT: testb %al, %al
4343 ; AVX512BW-NEXT: je LBB72_1
4344 ; AVX512BW-NEXT: ## %bb.2: ## %exit
4345 ; AVX512BW-NEXT: popq %rax
4346 ; AVX512BW-NEXT: vzeroupper
4347 ; AVX512BW-NEXT: retq
4348 ; AVX512BW-NEXT: LBB72_1: ## %bar
4349 ; AVX512BW-NEXT: vzeroupper
4350 ; AVX512BW-NEXT: callq _foo
4351 ; AVX512BW-NEXT: popq %rax
4352 ; AVX512BW-NEXT: retq
4354 ; AVX512DQ-LABEL: ktest_3:
4355 ; AVX512DQ: ## %bb.0:
4356 ; AVX512DQ-NEXT: pushq %rax
4357 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4358 ; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
4359 ; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
4360 ; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
4361 ; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
4362 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
4363 ; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1
4364 ; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k2
4365 ; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k3
4366 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
4367 ; AVX512DQ-NEXT: korb %k3, %k2, %k1
4368 ; AVX512DQ-NEXT: ktestb %k1, %k0
4369 ; AVX512DQ-NEXT: je LBB72_1
4370 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
4371 ; AVX512DQ-NEXT: popq %rax
4372 ; AVX512DQ-NEXT: vzeroupper
4373 ; AVX512DQ-NEXT: retq
4374 ; AVX512DQ-NEXT: LBB72_1: ## %bar
4375 ; AVX512DQ-NEXT: vzeroupper
4376 ; AVX512DQ-NEXT: callq _foo
4377 ; AVX512DQ-NEXT: popq %rax
4378 ; AVX512DQ-NEXT: retq
4380 ; X86-LABEL: ktest_3:
4382 ; X86-NEXT: subl $12, %esp
4383 ; X86-NEXT: .cfi_def_cfa_offset 16
4384 ; X86-NEXT: vptestnmd %ymm0, %ymm0, %k0
4385 ; X86-NEXT: vptestnmd %ymm1, %ymm1, %k1
4386 ; X86-NEXT: korb %k1, %k0, %k0
4387 ; X86-NEXT: vptestnmd %ymm2, %ymm2, %k1
4388 ; X86-NEXT: vptestnmd %ymm3, %ymm3, %k2
4389 ; X86-NEXT: korb %k2, %k1, %k1
4390 ; X86-NEXT: ktestb %k1, %k0
4391 ; X86-NEXT: je LBB72_1
4392 ; X86-NEXT: ## %bb.2: ## %exit
4393 ; X86-NEXT: addl $12, %esp
4394 ; X86-NEXT: vzeroupper
4396 ; X86-NEXT: LBB72_1: ## %bar
4397 ; X86-NEXT: vzeroupper
4398 ; X86-NEXT: calll _foo
4399 ; X86-NEXT: addl $12, %esp
4401 %a = icmp eq <8 x i32> %w, zeroinitializer
4402 %b = icmp eq <8 x i32> %x, zeroinitializer
4403 %c = icmp eq <8 x i32> %y, zeroinitializer
4404 %d = icmp eq <8 x i32> %z, zeroinitializer
4405 %e = or <8 x i1> %a, %b
4406 %f = or <8 x i1> %c, %d
4407 %g = and <8 x i1> %e, %f
4408 %h = bitcast <8 x i1> %g to i8
4409 %i = icmp eq i8 %h, 0
4410 br i1 %i, label %bar, label %exit
4420 define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
4421 ; KNL-LABEL: ktest_4:
4423 ; KNL-NEXT: pushq %rax
4424 ; KNL-NEXT: .cfi_def_cfa_offset 16
4425 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
4426 ; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
4427 ; KNL-NEXT: vptestnmq %zmm2, %zmm2, %k2
4428 ; KNL-NEXT: vptestnmq %zmm3, %zmm3, %k3
4429 ; KNL-NEXT: korw %k1, %k0, %k0
4430 ; KNL-NEXT: korw %k3, %k2, %k1
4431 ; KNL-NEXT: kandw %k1, %k0, %k0
4432 ; KNL-NEXT: kmovw %k0, %eax
4433 ; KNL-NEXT: testb %al, %al
4434 ; KNL-NEXT: je LBB73_1
4435 ; KNL-NEXT: ## %bb.2: ## %exit
4436 ; KNL-NEXT: popq %rax
4437 ; KNL-NEXT: vzeroupper
4439 ; KNL-NEXT: LBB73_1: ## %bar
4440 ; KNL-NEXT: vzeroupper
4441 ; KNL-NEXT: callq _foo
4442 ; KNL-NEXT: popq %rax
4445 ; SKX-LABEL: ktest_4:
4447 ; SKX-NEXT: pushq %rax
4448 ; SKX-NEXT: .cfi_def_cfa_offset 16
4449 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
4450 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1
4451 ; SKX-NEXT: korb %k1, %k0, %k0
4452 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1
4453 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k2
4454 ; SKX-NEXT: korb %k2, %k1, %k1
4455 ; SKX-NEXT: ktestb %k1, %k0
4456 ; SKX-NEXT: je LBB73_1
4457 ; SKX-NEXT: ## %bb.2: ## %exit
4458 ; SKX-NEXT: popq %rax
4459 ; SKX-NEXT: vzeroupper
4461 ; SKX-NEXT: LBB73_1: ## %bar
4462 ; SKX-NEXT: vzeroupper
4463 ; SKX-NEXT: callq _foo
4464 ; SKX-NEXT: popq %rax
4467 ; AVX512BW-LABEL: ktest_4:
4468 ; AVX512BW: ## %bb.0:
4469 ; AVX512BW-NEXT: pushq %rax
4470 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4471 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
4472 ; AVX512BW-NEXT: vptestnmq %zmm1, %zmm1, %k1
4473 ; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k2
4474 ; AVX512BW-NEXT: vptestnmq %zmm3, %zmm3, %k3
4475 ; AVX512BW-NEXT: korw %k1, %k0, %k0
4476 ; AVX512BW-NEXT: korw %k3, %k2, %k1
4477 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
4478 ; AVX512BW-NEXT: kmovd %k0, %eax
4479 ; AVX512BW-NEXT: testb %al, %al
4480 ; AVX512BW-NEXT: je LBB73_1
4481 ; AVX512BW-NEXT: ## %bb.2: ## %exit
4482 ; AVX512BW-NEXT: popq %rax
4483 ; AVX512BW-NEXT: vzeroupper
4484 ; AVX512BW-NEXT: retq
4485 ; AVX512BW-NEXT: LBB73_1: ## %bar
4486 ; AVX512BW-NEXT: vzeroupper
4487 ; AVX512BW-NEXT: callq _foo
4488 ; AVX512BW-NEXT: popq %rax
4489 ; AVX512BW-NEXT: retq
4491 ; AVX512DQ-LABEL: ktest_4:
4492 ; AVX512DQ: ## %bb.0:
4493 ; AVX512DQ-NEXT: pushq %rax
4494 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4495 ; AVX512DQ-NEXT: vptestnmq %zmm0, %zmm0, %k0
4496 ; AVX512DQ-NEXT: vptestnmq %zmm1, %zmm1, %k1
4497 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
4498 ; AVX512DQ-NEXT: vptestnmq %zmm2, %zmm2, %k1
4499 ; AVX512DQ-NEXT: vptestnmq %zmm3, %zmm3, %k2
4500 ; AVX512DQ-NEXT: korb %k2, %k1, %k1
4501 ; AVX512DQ-NEXT: ktestb %k1, %k0
4502 ; AVX512DQ-NEXT: je LBB73_1
4503 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
4504 ; AVX512DQ-NEXT: popq %rax
4505 ; AVX512DQ-NEXT: vzeroupper
4506 ; AVX512DQ-NEXT: retq
4507 ; AVX512DQ-NEXT: LBB73_1: ## %bar
4508 ; AVX512DQ-NEXT: vzeroupper
4509 ; AVX512DQ-NEXT: callq _foo
4510 ; AVX512DQ-NEXT: popq %rax
4511 ; AVX512DQ-NEXT: retq
4513 ; X86-LABEL: ktest_4:
4515 ; X86-NEXT: subl $12, %esp
4516 ; X86-NEXT: .cfi_def_cfa_offset 16
4517 ; X86-NEXT: vptestnmq %zmm0, %zmm0, %k0
4518 ; X86-NEXT: vptestnmq %zmm1, %zmm1, %k1
4519 ; X86-NEXT: korb %k1, %k0, %k0
4520 ; X86-NEXT: vptestnmq %zmm2, %zmm2, %k1
4521 ; X86-NEXT: vptestnmq %zmm3, %zmm3, %k2
4522 ; X86-NEXT: korb %k2, %k1, %k1
4523 ; X86-NEXT: ktestb %k1, %k0
4524 ; X86-NEXT: je LBB73_1
4525 ; X86-NEXT: ## %bb.2: ## %exit
4526 ; X86-NEXT: addl $12, %esp
4527 ; X86-NEXT: vzeroupper
4529 ; X86-NEXT: LBB73_1: ## %bar
4530 ; X86-NEXT: vzeroupper
4531 ; X86-NEXT: calll _foo
4532 ; X86-NEXT: addl $12, %esp
4534 %a = icmp eq <8 x i64> %w, zeroinitializer
4535 %b = icmp eq <8 x i64> %x, zeroinitializer
4536 %c = icmp eq <8 x i64> %y, zeroinitializer
4537 %d = icmp eq <8 x i64> %z, zeroinitializer
4538 %e = or <8 x i1> %a, %b
4539 %f = or <8 x i1> %c, %d
4540 %g = and <8 x i1> %e, %f
4541 %h = bitcast <8 x i1> %g to i8
4542 %i = icmp eq i8 %h, 0
4543 br i1 %i, label %bar, label %exit
4553 define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) {
4554 ; KNL-LABEL: ktest_5:
4556 ; KNL-NEXT: pushq %rax
4557 ; KNL-NEXT: .cfi_def_cfa_offset 16
4558 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
4559 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1
4560 ; KNL-NEXT: korw %k1, %k0, %k0
4561 ; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k1
4562 ; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k2
4563 ; KNL-NEXT: korw %k2, %k1, %k1
4564 ; KNL-NEXT: kandw %k1, %k0, %k0
4565 ; KNL-NEXT: kortestw %k0, %k0
4566 ; KNL-NEXT: je LBB74_1
4567 ; KNL-NEXT: ## %bb.2: ## %exit
4568 ; KNL-NEXT: popq %rax
4569 ; KNL-NEXT: vzeroupper
4571 ; KNL-NEXT: LBB74_1: ## %bar
4572 ; KNL-NEXT: vzeroupper
4573 ; KNL-NEXT: callq _foo
4574 ; KNL-NEXT: popq %rax
4577 ; SKX-LABEL: ktest_5:
4579 ; SKX-NEXT: pushq %rax
4580 ; SKX-NEXT: .cfi_def_cfa_offset 16
4581 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
4582 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1
4583 ; SKX-NEXT: korw %k1, %k0, %k0
4584 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1
4585 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k2
4586 ; SKX-NEXT: korw %k2, %k1, %k1
4587 ; SKX-NEXT: ktestw %k1, %k0
4588 ; SKX-NEXT: je LBB74_1
4589 ; SKX-NEXT: ## %bb.2: ## %exit
4590 ; SKX-NEXT: popq %rax
4591 ; SKX-NEXT: vzeroupper
4593 ; SKX-NEXT: LBB74_1: ## %bar
4594 ; SKX-NEXT: vzeroupper
4595 ; SKX-NEXT: callq _foo
4596 ; SKX-NEXT: popq %rax
4599 ; AVX512BW-LABEL: ktest_5:
4600 ; AVX512BW: ## %bb.0:
4601 ; AVX512BW-NEXT: pushq %rax
4602 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4603 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
4604 ; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1
4605 ; AVX512BW-NEXT: korw %k1, %k0, %k0
4606 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k1
4607 ; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k2
4608 ; AVX512BW-NEXT: korw %k2, %k1, %k1
4609 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
4610 ; AVX512BW-NEXT: kortestw %k0, %k0
4611 ; AVX512BW-NEXT: je LBB74_1
4612 ; AVX512BW-NEXT: ## %bb.2: ## %exit
4613 ; AVX512BW-NEXT: popq %rax
4614 ; AVX512BW-NEXT: vzeroupper
4615 ; AVX512BW-NEXT: retq
4616 ; AVX512BW-NEXT: LBB74_1: ## %bar
4617 ; AVX512BW-NEXT: vzeroupper
4618 ; AVX512BW-NEXT: callq _foo
4619 ; AVX512BW-NEXT: popq %rax
4620 ; AVX512BW-NEXT: retq
4622 ; AVX512DQ-LABEL: ktest_5:
4623 ; AVX512DQ: ## %bb.0:
4624 ; AVX512DQ-NEXT: pushq %rax
4625 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4626 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
4627 ; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1
4628 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
4629 ; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k1
4630 ; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k2
4631 ; AVX512DQ-NEXT: korw %k2, %k1, %k1
4632 ; AVX512DQ-NEXT: ktestw %k1, %k0
4633 ; AVX512DQ-NEXT: je LBB74_1
4634 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
4635 ; AVX512DQ-NEXT: popq %rax
4636 ; AVX512DQ-NEXT: vzeroupper
4637 ; AVX512DQ-NEXT: retq
4638 ; AVX512DQ-NEXT: LBB74_1: ## %bar
4639 ; AVX512DQ-NEXT: vzeroupper
4640 ; AVX512DQ-NEXT: callq _foo
4641 ; AVX512DQ-NEXT: popq %rax
4642 ; AVX512DQ-NEXT: retq
4644 ; X86-LABEL: ktest_5:
4646 ; X86-NEXT: subl $12, %esp
4647 ; X86-NEXT: .cfi_def_cfa_offset 16
4648 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
4649 ; X86-NEXT: vptestnmd %zmm1, %zmm1, %k1
4650 ; X86-NEXT: korw %k1, %k0, %k0
4651 ; X86-NEXT: vptestnmd %zmm2, %zmm2, %k1
4652 ; X86-NEXT: vptestnmd %zmm3, %zmm3, %k2
4653 ; X86-NEXT: korw %k2, %k1, %k1
4654 ; X86-NEXT: ktestw %k1, %k0
4655 ; X86-NEXT: je LBB74_1
4656 ; X86-NEXT: ## %bb.2: ## %exit
4657 ; X86-NEXT: addl $12, %esp
4658 ; X86-NEXT: vzeroupper
4660 ; X86-NEXT: LBB74_1: ## %bar
4661 ; X86-NEXT: vzeroupper
4662 ; X86-NEXT: calll _foo
4663 ; X86-NEXT: addl $12, %esp
4665 %a = icmp eq <16 x i32> %w, zeroinitializer
4666 %b = icmp eq <16 x i32> %x, zeroinitializer
4667 %c = icmp eq <16 x i32> %y, zeroinitializer
4668 %d = icmp eq <16 x i32> %z, zeroinitializer
4669 %e = or <16 x i1> %a, %b
4670 %f = or <16 x i1> %c, %d
4671 %g = and <16 x i1> %e, %f
4672 %h = bitcast <16 x i1> %g to i16
4673 %i = icmp eq i16 %h, 0
4674 br i1 %i, label %bar, label %exit
4684 define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z) {
4685 ; KNL-LABEL: ktest_6:
4687 ; KNL-NEXT: pushq %rax
4688 ; KNL-NEXT: .cfi_def_cfa_offset 16
4689 ; KNL-NEXT: vextracti64x4 $1, %zmm3, %ymm4
4690 ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm5
4691 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm6
4692 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm7
4693 ; KNL-NEXT: vpxor %xmm8, %xmm8, %xmm8
4694 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm0
4695 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm7, %ymm7
4696 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm1, %ymm1
4697 ; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0
4698 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm6, %ymm1
4699 ; KNL-NEXT: vpor %ymm1, %ymm7, %ymm1
4700 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm2, %ymm2
4701 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm5, %ymm5
4702 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm3, %ymm3
4703 ; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2
4704 ; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
4705 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm4, %ymm2
4706 ; KNL-NEXT: vpor %ymm2, %ymm5, %ymm2
4707 ; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
4708 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
4709 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
4710 ; KNL-NEXT: kmovw %k0, %eax
4711 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm0
4712 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
4713 ; KNL-NEXT: kmovw %k0, %ecx
4714 ; KNL-NEXT: shll $16, %ecx
4715 ; KNL-NEXT: orl %eax, %ecx
4716 ; KNL-NEXT: je LBB75_1
4717 ; KNL-NEXT: ## %bb.2: ## %exit
4718 ; KNL-NEXT: popq %rax
4719 ; KNL-NEXT: vzeroupper
4721 ; KNL-NEXT: LBB75_1: ## %bar
4722 ; KNL-NEXT: vzeroupper
4723 ; KNL-NEXT: callq _foo
4724 ; KNL-NEXT: popq %rax
4727 ; SKX-LABEL: ktest_6:
4729 ; SKX-NEXT: pushq %rax
4730 ; SKX-NEXT: .cfi_def_cfa_offset 16
4731 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k0
4732 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1
4733 ; SKX-NEXT: kord %k1, %k0, %k0
4734 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1
4735 ; SKX-NEXT: vptestnmw %zmm3, %zmm3, %k2
4736 ; SKX-NEXT: kord %k2, %k1, %k1
4737 ; SKX-NEXT: ktestd %k1, %k0
4738 ; SKX-NEXT: je LBB75_1
4739 ; SKX-NEXT: ## %bb.2: ## %exit
4740 ; SKX-NEXT: popq %rax
4741 ; SKX-NEXT: vzeroupper
4743 ; SKX-NEXT: LBB75_1: ## %bar
4744 ; SKX-NEXT: vzeroupper
4745 ; SKX-NEXT: callq _foo
4746 ; SKX-NEXT: popq %rax
4749 ; AVX512BW-LABEL: ktest_6:
4750 ; AVX512BW: ## %bb.0:
4751 ; AVX512BW-NEXT: pushq %rax
4752 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4753 ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
4754 ; AVX512BW-NEXT: vptestnmw %zmm1, %zmm1, %k1
4755 ; AVX512BW-NEXT: kord %k1, %k0, %k0
4756 ; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1
4757 ; AVX512BW-NEXT: vptestnmw %zmm3, %zmm3, %k2
4758 ; AVX512BW-NEXT: kord %k2, %k1, %k1
4759 ; AVX512BW-NEXT: ktestd %k1, %k0
4760 ; AVX512BW-NEXT: je LBB75_1
4761 ; AVX512BW-NEXT: ## %bb.2: ## %exit
4762 ; AVX512BW-NEXT: popq %rax
4763 ; AVX512BW-NEXT: vzeroupper
4764 ; AVX512BW-NEXT: retq
4765 ; AVX512BW-NEXT: LBB75_1: ## %bar
4766 ; AVX512BW-NEXT: vzeroupper
4767 ; AVX512BW-NEXT: callq _foo
4768 ; AVX512BW-NEXT: popq %rax
4769 ; AVX512BW-NEXT: retq
4771 ; AVX512DQ-LABEL: ktest_6:
4772 ; AVX512DQ: ## %bb.0:
4773 ; AVX512DQ-NEXT: pushq %rax
4774 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4775 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm4
4776 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm5
4777 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm6
4778 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm7
4779 ; AVX512DQ-NEXT: vpxor %xmm8, %xmm8, %xmm8
4780 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm0
4781 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm7, %ymm7
4782 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm1, %ymm1
4783 ; AVX512DQ-NEXT: vpor %ymm1, %ymm0, %ymm0
4784 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm6, %ymm1
4785 ; AVX512DQ-NEXT: vpor %ymm1, %ymm7, %ymm1
4786 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm2, %ymm2
4787 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm5, %ymm5
4788 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm3, %ymm3
4789 ; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2
4790 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
4791 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm4, %ymm2
4792 ; AVX512DQ-NEXT: vpor %ymm2, %ymm5, %ymm2
4793 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
4794 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
4795 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
4796 ; AVX512DQ-NEXT: kmovw %k0, %eax
4797 ; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0
4798 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
4799 ; AVX512DQ-NEXT: kmovw %k0, %ecx
4800 ; AVX512DQ-NEXT: shll $16, %ecx
4801 ; AVX512DQ-NEXT: orl %eax, %ecx
4802 ; AVX512DQ-NEXT: je LBB75_1
4803 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
4804 ; AVX512DQ-NEXT: popq %rax
4805 ; AVX512DQ-NEXT: vzeroupper
4806 ; AVX512DQ-NEXT: retq
4807 ; AVX512DQ-NEXT: LBB75_1: ## %bar
4808 ; AVX512DQ-NEXT: vzeroupper
4809 ; AVX512DQ-NEXT: callq _foo
4810 ; AVX512DQ-NEXT: popq %rax
4811 ; AVX512DQ-NEXT: retq
4813 ; X86-LABEL: ktest_6:
4815 ; X86-NEXT: subl $12, %esp
4816 ; X86-NEXT: .cfi_def_cfa_offset 16
4817 ; X86-NEXT: vptestnmw %zmm0, %zmm0, %k0
4818 ; X86-NEXT: vptestnmw %zmm1, %zmm1, %k1
4819 ; X86-NEXT: kord %k1, %k0, %k0
4820 ; X86-NEXT: vptestnmw %zmm2, %zmm2, %k1
4821 ; X86-NEXT: vptestnmw %zmm3, %zmm3, %k2
4822 ; X86-NEXT: kord %k2, %k1, %k1
4823 ; X86-NEXT: ktestd %k1, %k0
4824 ; X86-NEXT: je LBB75_1
4825 ; X86-NEXT: ## %bb.2: ## %exit
4826 ; X86-NEXT: addl $12, %esp
4827 ; X86-NEXT: vzeroupper
4829 ; X86-NEXT: LBB75_1: ## %bar
4830 ; X86-NEXT: vzeroupper
4831 ; X86-NEXT: calll _foo
4832 ; X86-NEXT: addl $12, %esp
4834 %a = icmp eq <32 x i16> %w, zeroinitializer
4835 %b = icmp eq <32 x i16> %x, zeroinitializer
4836 %c = icmp eq <32 x i16> %y, zeroinitializer
4837 %d = icmp eq <32 x i16> %z, zeroinitializer
4838 %e = or <32 x i1> %a, %b
4839 %f = or <32 x i1> %c, %d
4840 %g = and <32 x i1> %e, %f
4841 %h = bitcast <32 x i1> %g to i32
4842 %i = icmp eq i32 %h, 0
4843 br i1 %i, label %bar, label %exit
4853 define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
4854 ; KNL-LABEL: ktest_7:
4856 ; KNL-NEXT: pushq %rax
4857 ; KNL-NEXT: .cfi_def_cfa_offset 16
4858 ; KNL-NEXT: vextracti64x4 $1, %zmm3, %ymm9
4859 ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm10
4860 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm11
4861 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm7
4862 ; KNL-NEXT: vpxor %xmm8, %xmm8, %xmm8
4863 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm13
4864 ; KNL-NEXT: vextracti128 $1, %ymm13, %xmm4
4865 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm7, %ymm7
4866 ; KNL-NEXT: vextracti128 $1, %ymm7, %xmm5
4867 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm1, %ymm1
4868 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm6
4869 ; KNL-NEXT: vpor %xmm6, %xmm4, %xmm12
4870 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm11, %ymm6
4871 ; KNL-NEXT: vextracti128 $1, %ymm6, %xmm4
4872 ; KNL-NEXT: vpor %xmm4, %xmm5, %xmm11
4873 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm2
4874 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm5
4875 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm10, %ymm10
4876 ; KNL-NEXT: vextracti128 $1, %ymm10, %xmm4
4877 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm3, %ymm3
4878 ; KNL-NEXT: vextracti128 $1, %ymm3, %xmm0
4879 ; KNL-NEXT: vpor %xmm0, %xmm5, %xmm0
4880 ; KNL-NEXT: vpand %xmm0, %xmm12, %xmm12
4881 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm9, %ymm5
4882 ; KNL-NEXT: vextracti128 $1, %ymm5, %xmm0
4883 ; KNL-NEXT: vpor %xmm0, %xmm4, %xmm0
4884 ; KNL-NEXT: vpand %xmm0, %xmm11, %xmm0
4885 ; KNL-NEXT: vpor %xmm6, %xmm7, %xmm4
4886 ; KNL-NEXT: vpor %xmm1, %xmm13, %xmm1
4887 ; KNL-NEXT: vpor %xmm5, %xmm10, %xmm5
4888 ; KNL-NEXT: vpand %xmm5, %xmm4, %xmm4
4889 ; KNL-NEXT: vpor %xmm3, %xmm2, %xmm2
4890 ; KNL-NEXT: vpand %xmm2, %xmm1, %xmm1
4891 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
4892 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
4893 ; KNL-NEXT: kmovw %k0, %eax
4894 ; KNL-NEXT: vpmovsxbd %xmm12, %zmm1
4895 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
4896 ; KNL-NEXT: kmovw %k0, %ecx
4897 ; KNL-NEXT: shll $16, %ecx
4898 ; KNL-NEXT: orl %eax, %ecx
4899 ; KNL-NEXT: vpmovsxbd %xmm4, %zmm1
4900 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
4901 ; KNL-NEXT: kmovw %k0, %eax
4902 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
4903 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
4904 ; KNL-NEXT: kmovw %k0, %edx
4905 ; KNL-NEXT: shll $16, %edx
4906 ; KNL-NEXT: orl %eax, %edx
4907 ; KNL-NEXT: shlq $32, %rdx
4908 ; KNL-NEXT: orq %rcx, %rdx
4909 ; KNL-NEXT: je LBB76_1
4910 ; KNL-NEXT: ## %bb.2: ## %exit
4911 ; KNL-NEXT: popq %rax
4912 ; KNL-NEXT: vzeroupper
4914 ; KNL-NEXT: LBB76_1: ## %bar
4915 ; KNL-NEXT: vzeroupper
4916 ; KNL-NEXT: callq _foo
4917 ; KNL-NEXT: popq %rax
4920 ; SKX-LABEL: ktest_7:
4922 ; SKX-NEXT: pushq %rax
4923 ; SKX-NEXT: .cfi_def_cfa_offset 16
4924 ; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k0
4925 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1
4926 ; SKX-NEXT: korq %k1, %k0, %k0
4927 ; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1
4928 ; SKX-NEXT: vptestnmb %zmm3, %zmm3, %k2
4929 ; SKX-NEXT: korq %k2, %k1, %k1
4930 ; SKX-NEXT: ktestq %k1, %k0
4931 ; SKX-NEXT: je LBB76_1
4932 ; SKX-NEXT: ## %bb.2: ## %exit
4933 ; SKX-NEXT: popq %rax
4934 ; SKX-NEXT: vzeroupper
4936 ; SKX-NEXT: LBB76_1: ## %bar
4937 ; SKX-NEXT: vzeroupper
4938 ; SKX-NEXT: callq _foo
4939 ; SKX-NEXT: popq %rax
4942 ; AVX512BW-LABEL: ktest_7:
4943 ; AVX512BW: ## %bb.0:
4944 ; AVX512BW-NEXT: pushq %rax
4945 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4946 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
4947 ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
4948 ; AVX512BW-NEXT: korq %k1, %k0, %k0
4949 ; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1
4950 ; AVX512BW-NEXT: vptestnmb %zmm3, %zmm3, %k2
4951 ; AVX512BW-NEXT: korq %k2, %k1, %k1
4952 ; AVX512BW-NEXT: ktestq %k1, %k0
4953 ; AVX512BW-NEXT: je LBB76_1
4954 ; AVX512BW-NEXT: ## %bb.2: ## %exit
4955 ; AVX512BW-NEXT: popq %rax
4956 ; AVX512BW-NEXT: vzeroupper
4957 ; AVX512BW-NEXT: retq
4958 ; AVX512BW-NEXT: LBB76_1: ## %bar
4959 ; AVX512BW-NEXT: vzeroupper
4960 ; AVX512BW-NEXT: callq _foo
4961 ; AVX512BW-NEXT: popq %rax
4962 ; AVX512BW-NEXT: retq
4964 ; AVX512DQ-LABEL: ktest_7:
4965 ; AVX512DQ: ## %bb.0:
4966 ; AVX512DQ-NEXT: pushq %rax
4967 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4968 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm9
4969 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm10
4970 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm11
4971 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm7
4972 ; AVX512DQ-NEXT: vpxor %xmm8, %xmm8, %xmm8
4973 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm13
4974 ; AVX512DQ-NEXT: vextracti128 $1, %ymm13, %xmm4
4975 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm7, %ymm7
4976 ; AVX512DQ-NEXT: vextracti128 $1, %ymm7, %xmm5
4977 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm1, %ymm1
4978 ; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm6
4979 ; AVX512DQ-NEXT: vpor %xmm6, %xmm4, %xmm12
4980 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm11, %ymm6
4981 ; AVX512DQ-NEXT: vextracti128 $1, %ymm6, %xmm4
4982 ; AVX512DQ-NEXT: vpor %xmm4, %xmm5, %xmm11
4983 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm2
4984 ; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm5
4985 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm10, %ymm10
4986 ; AVX512DQ-NEXT: vextracti128 $1, %ymm10, %xmm4
4987 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm3, %ymm3
4988 ; AVX512DQ-NEXT: vextracti128 $1, %ymm3, %xmm0
4989 ; AVX512DQ-NEXT: vpor %xmm0, %xmm5, %xmm0
4990 ; AVX512DQ-NEXT: vpand %xmm0, %xmm12, %xmm12
4991 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm9, %ymm5
4992 ; AVX512DQ-NEXT: vextracti128 $1, %ymm5, %xmm0
4993 ; AVX512DQ-NEXT: vpor %xmm0, %xmm4, %xmm0
4994 ; AVX512DQ-NEXT: vpand %xmm0, %xmm11, %xmm0
4995 ; AVX512DQ-NEXT: vpor %xmm6, %xmm7, %xmm4
4996 ; AVX512DQ-NEXT: vpor %xmm1, %xmm13, %xmm1
4997 ; AVX512DQ-NEXT: vpor %xmm5, %xmm10, %xmm5
4998 ; AVX512DQ-NEXT: vpand %xmm5, %xmm4, %xmm4
4999 ; AVX512DQ-NEXT: vpor %xmm3, %xmm2, %xmm2
5000 ; AVX512DQ-NEXT: vpand %xmm2, %xmm1, %xmm1
5001 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1
5002 ; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0
5003 ; AVX512DQ-NEXT: kmovw %k0, %eax
5004 ; AVX512DQ-NEXT: vpmovsxbd %xmm12, %zmm1
5005 ; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0
5006 ; AVX512DQ-NEXT: kmovw %k0, %ecx
5007 ; AVX512DQ-NEXT: shll $16, %ecx
5008 ; AVX512DQ-NEXT: orl %eax, %ecx
5009 ; AVX512DQ-NEXT: vpmovsxbd %xmm4, %zmm1
5010 ; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0
5011 ; AVX512DQ-NEXT: kmovw %k0, %eax
5012 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
5013 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
5014 ; AVX512DQ-NEXT: kmovw %k0, %edx
5015 ; AVX512DQ-NEXT: shll $16, %edx
5016 ; AVX512DQ-NEXT: orl %eax, %edx
5017 ; AVX512DQ-NEXT: shlq $32, %rdx
5018 ; AVX512DQ-NEXT: orq %rcx, %rdx
5019 ; AVX512DQ-NEXT: je LBB76_1
5020 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
5021 ; AVX512DQ-NEXT: popq %rax
5022 ; AVX512DQ-NEXT: vzeroupper
5023 ; AVX512DQ-NEXT: retq
5024 ; AVX512DQ-NEXT: LBB76_1: ## %bar
5025 ; AVX512DQ-NEXT: vzeroupper
5026 ; AVX512DQ-NEXT: callq _foo
5027 ; AVX512DQ-NEXT: popq %rax
5028 ; AVX512DQ-NEXT: retq
5030 ; X86-LABEL: ktest_7:
5032 ; X86-NEXT: subl $12, %esp
5033 ; X86-NEXT: .cfi_def_cfa_offset 16
5034 ; X86-NEXT: vptestnmb %zmm0, %zmm0, %k0
5035 ; X86-NEXT: vptestnmb %zmm1, %zmm1, %k1
5036 ; X86-NEXT: korq %k1, %k0, %k0
5037 ; X86-NEXT: vptestnmb %zmm2, %zmm2, %k1
5038 ; X86-NEXT: vptestnmb %zmm3, %zmm3, %k2
5039 ; X86-NEXT: korq %k2, %k1, %k1
5040 ; X86-NEXT: kandq %k1, %k0, %k0
5041 ; X86-NEXT: kshiftrq $32, %k0, %k1
5042 ; X86-NEXT: kortestd %k1, %k0
5043 ; X86-NEXT: je LBB76_1
5044 ; X86-NEXT: ## %bb.2: ## %exit
5045 ; X86-NEXT: addl $12, %esp
5046 ; X86-NEXT: vzeroupper
5048 ; X86-NEXT: LBB76_1: ## %bar
5049 ; X86-NEXT: vzeroupper
5050 ; X86-NEXT: calll _foo
5051 ; X86-NEXT: addl $12, %esp
5053 %a = icmp eq <64 x i8> %w, zeroinitializer
5054 %b = icmp eq <64 x i8> %x, zeroinitializer
5055 %c = icmp eq <64 x i8> %y, zeroinitializer
5056 %d = icmp eq <64 x i8> %z, zeroinitializer
5057 %e = or <64 x i1> %a, %b
5058 %f = or <64 x i1> %c, %d
5059 %g = and <64 x i1> %e, %f
5060 %h = bitcast <64 x i1> %g to i64
5061 %i = icmp eq i64 %h, 0
5062 br i1 %i, label %bar, label %exit
5072 define <64 x i1> @mask64_insert(i32 %a) {
5073 ; KNL-LABEL: mask64_insert:
5075 ; KNL-NEXT: movq %rdi, %rax
5076 ; KNL-NEXT: movw $-4, %cx
5077 ; KNL-NEXT: kmovw %ecx, %k0
5078 ; KNL-NEXT: kshiftrw $1, %k0, %k0
5079 ; KNL-NEXT: kshiftlw $1, %k0, %k0
5080 ; KNL-NEXT: andl $1, %esi
5081 ; KNL-NEXT: kmovw %esi, %k1
5082 ; KNL-NEXT: korw %k1, %k0, %k0
5083 ; KNL-NEXT: kmovw %k0, (%rdi)
5084 ; KNL-NEXT: movw $-3, 6(%rdi)
5085 ; KNL-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
5088 ; SKX-LABEL: mask64_insert:
5090 ; SKX-NEXT: kmovd %edi, %k0
5091 ; SKX-NEXT: kshiftlq $63, %k0, %k0
5092 ; SKX-NEXT: kshiftrq $63, %k0, %k0
5093 ; SKX-NEXT: movabsq $-562958543486980, %rax ## imm = 0xFFFDFFFDFFFDFFFC
5094 ; SKX-NEXT: kmovq %rax, %k1
5095 ; SKX-NEXT: kshiftrq $1, %k1, %k1
5096 ; SKX-NEXT: kshiftlq $1, %k1, %k1
5097 ; SKX-NEXT: korq %k0, %k1, %k0
5098 ; SKX-NEXT: vpmovm2b %k0, %zmm0
5101 ; AVX512BW-LABEL: mask64_insert:
5102 ; AVX512BW: ## %bb.0:
5103 ; AVX512BW-NEXT: kmovd %edi, %k0
5104 ; AVX512BW-NEXT: kshiftlq $63, %k0, %k0
5105 ; AVX512BW-NEXT: kshiftrq $63, %k0, %k0
5106 ; AVX512BW-NEXT: movabsq $-562958543486980, %rax ## imm = 0xFFFDFFFDFFFDFFFC
5107 ; AVX512BW-NEXT: kmovq %rax, %k1
5108 ; AVX512BW-NEXT: kshiftrq $1, %k1, %k1
5109 ; AVX512BW-NEXT: kshiftlq $1, %k1, %k1
5110 ; AVX512BW-NEXT: korq %k0, %k1, %k0
5111 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
5112 ; AVX512BW-NEXT: retq
5114 ; AVX512DQ-LABEL: mask64_insert:
5115 ; AVX512DQ: ## %bb.0:
5116 ; AVX512DQ-NEXT: movq %rdi, %rax
5117 ; AVX512DQ-NEXT: movw $-4, %cx
5118 ; AVX512DQ-NEXT: kmovw %ecx, %k0
5119 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
5120 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
5121 ; AVX512DQ-NEXT: andl $1, %esi
5122 ; AVX512DQ-NEXT: kmovw %esi, %k1
5123 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
5124 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
5125 ; AVX512DQ-NEXT: movw $-3, 6(%rdi)
5126 ; AVX512DQ-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
5127 ; AVX512DQ-NEXT: retq
5129 ; X86-LABEL: mask64_insert:
5131 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
5132 ; X86-NEXT: movl $-131076, %eax ## imm = 0xFFFDFFFC
5133 ; X86-NEXT: kmovd %eax, %k1
5134 ; X86-NEXT: movl $-131075, %eax ## imm = 0xFFFDFFFD
5135 ; X86-NEXT: kmovd %eax, %k2
5136 ; X86-NEXT: kunpckdq %k1, %k2, %k1
5137 ; X86-NEXT: kshiftrq $1, %k1, %k1
5138 ; X86-NEXT: kshiftlq $1, %k1, %k1
5139 ; X86-NEXT: kshiftlq $63, %k0, %k0
5140 ; X86-NEXT: kshiftrq $63, %k0, %k0
5141 ; X86-NEXT: korq %k0, %k1, %k0
5142 ; X86-NEXT: vpmovm2b %k0, %zmm0
5144 %a_i = trunc i32 %a to i1
5145 %maskv = insertelement <64 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
5146 ret <64 x i1> %maskv