1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,KNL
3 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=CHECK,SKX
4 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
5 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ
6 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86
9 define i16 @mask16(i16 %x) {
10 ; CHECK-LABEL: mask16:
12 ; CHECK-NEXT: movl %edi, %eax
13 ; CHECK-NEXT: notl %eax
14 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
19 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
21 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
23 %m0 = bitcast i16 %x to <16 x i1>
24 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
25 %ret = bitcast <16 x i1> %m1 to i16
29 define i32 @mask16_zext(i16 %x) {
30 ; CHECK-LABEL: mask16_zext:
32 ; CHECK-NEXT: notl %edi
33 ; CHECK-NEXT: movzwl %di, %eax
36 ; X86-LABEL: mask16_zext:
38 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
39 ; X86-NEXT: xorl $65535, %eax ## imm = 0xFFFF
41 %m0 = bitcast i16 %x to <16 x i1>
42 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
43 %m2 = bitcast <16 x i1> %m1 to i16
44 %ret = zext i16 %m2 to i32
48 define i8 @mask8(i8 %x) {
51 ; CHECK-NEXT: movl %edi, %eax
52 ; CHECK-NEXT: notb %al
53 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
58 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
61 %m0 = bitcast i8 %x to <8 x i1>
62 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
63 %ret = bitcast <8 x i1> %m1 to i8
67 define i32 @mask8_zext(i8 %x) {
68 ; CHECK-LABEL: mask8_zext:
70 ; CHECK-NEXT: notb %dil
71 ; CHECK-NEXT: movzbl %dil, %eax
74 ; X86-LABEL: mask8_zext:
76 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
77 ; X86-NEXT: xorl $255, %eax
79 %m0 = bitcast i8 %x to <8 x i1>
80 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
81 %m2 = bitcast <8 x i1> %m1 to i8
82 %ret = zext i8 %m2 to i32
86 define void @mask16_mem(ptr %ptr) {
87 ; CHECK-LABEL: mask16_mem:
89 ; CHECK-NEXT: kmovw (%rdi), %k0
90 ; CHECK-NEXT: knotw %k0, %k0
91 ; CHECK-NEXT: kmovw %k0, (%rdi)
94 ; X86-LABEL: mask16_mem:
96 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
97 ; X86-NEXT: kmovw (%eax), %k0
98 ; X86-NEXT: knotw %k0, %k0
99 ; X86-NEXT: kmovw %k0, (%eax)
101 %x = load i16, ptr %ptr, align 4
102 %m0 = bitcast i16 %x to <16 x i1>
103 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
104 %ret = bitcast <16 x i1> %m1 to i16
105 store i16 %ret, ptr %ptr, align 4
109 define void @mask8_mem(ptr %ptr) {
110 ; KNL-LABEL: mask8_mem:
112 ; KNL-NEXT: notb (%rdi)
115 ; SKX-LABEL: mask8_mem:
117 ; SKX-NEXT: kmovb (%rdi), %k0
118 ; SKX-NEXT: knotb %k0, %k0
119 ; SKX-NEXT: kmovb %k0, (%rdi)
122 ; AVX512BW-LABEL: mask8_mem:
123 ; AVX512BW: ## %bb.0:
124 ; AVX512BW-NEXT: notb (%rdi)
125 ; AVX512BW-NEXT: retq
127 ; AVX512DQ-LABEL: mask8_mem:
128 ; AVX512DQ: ## %bb.0:
129 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
130 ; AVX512DQ-NEXT: knotb %k0, %k0
131 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
132 ; AVX512DQ-NEXT: retq
134 ; X86-LABEL: mask8_mem:
136 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
137 ; X86-NEXT: kmovb (%eax), %k0
138 ; X86-NEXT: knotb %k0, %k0
139 ; X86-NEXT: kmovb %k0, (%eax)
141 %x = load i8, ptr %ptr, align 4
142 %m0 = bitcast i8 %x to <8 x i1>
143 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
144 %ret = bitcast <8 x i1> %m1 to i8
145 store i8 %ret, ptr %ptr, align 4
149 define i16 @mand16(i16 %x, i16 %y) {
150 ; CHECK-LABEL: mand16:
152 ; CHECK-NEXT: movl %edi, %eax
153 ; CHECK-NEXT: orl %esi, %eax
154 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
159 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
160 ; X86-NEXT: orw {{[0-9]+}}(%esp), %ax
162 %ma = bitcast i16 %x to <16 x i1>
163 %mb = bitcast i16 %y to <16 x i1>
164 %mc = and <16 x i1> %ma, %mb
165 %md = xor <16 x i1> %ma, %mb
166 %me = or <16 x i1> %mc, %md
167 %ret = bitcast <16 x i1> %me to i16
171 define i16 @mand16_mem(ptr %x, ptr %y) {
172 ; KNL-LABEL: mand16_mem:
174 ; KNL-NEXT: kmovw (%rdi), %k0
175 ; KNL-NEXT: kmovw (%rsi), %k1
176 ; KNL-NEXT: korw %k1, %k0, %k0
177 ; KNL-NEXT: kmovw %k0, %eax
178 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
181 ; SKX-LABEL: mand16_mem:
183 ; SKX-NEXT: kmovw (%rdi), %k0
184 ; SKX-NEXT: kmovw (%rsi), %k1
185 ; SKX-NEXT: korw %k1, %k0, %k0
186 ; SKX-NEXT: kmovd %k0, %eax
187 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
190 ; AVX512BW-LABEL: mand16_mem:
191 ; AVX512BW: ## %bb.0:
192 ; AVX512BW-NEXT: kmovw (%rdi), %k0
193 ; AVX512BW-NEXT: kmovw (%rsi), %k1
194 ; AVX512BW-NEXT: korw %k1, %k0, %k0
195 ; AVX512BW-NEXT: kmovd %k0, %eax
196 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
197 ; AVX512BW-NEXT: retq
199 ; AVX512DQ-LABEL: mand16_mem:
200 ; AVX512DQ: ## %bb.0:
201 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
202 ; AVX512DQ-NEXT: kmovw (%rsi), %k1
203 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
204 ; AVX512DQ-NEXT: kmovw %k0, %eax
205 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
206 ; AVX512DQ-NEXT: retq
208 ; X86-LABEL: mand16_mem:
210 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
211 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
212 ; X86-NEXT: kmovw (%ecx), %k0
213 ; X86-NEXT: kmovw (%eax), %k1
214 ; X86-NEXT: korw %k1, %k0, %k0
215 ; X86-NEXT: kmovd %k0, %eax
216 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
218 %ma = load <16 x i1>, ptr %x
219 %mb = load <16 x i1>, ptr %y
220 %mc = and <16 x i1> %ma, %mb
221 %md = xor <16 x i1> %ma, %mb
222 %me = or <16 x i1> %mc, %md
223 %ret = bitcast <16 x i1> %me to i16
227 define i8 @shuf_test1(i16 %v) nounwind {
228 ; KNL-LABEL: shuf_test1:
230 ; KNL-NEXT: kmovw %edi, %k0
231 ; KNL-NEXT: kshiftrw $8, %k0, %k0
232 ; KNL-NEXT: kmovw %k0, %eax
233 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
236 ; SKX-LABEL: shuf_test1:
238 ; SKX-NEXT: kmovd %edi, %k0
239 ; SKX-NEXT: kshiftrw $8, %k0, %k0
240 ; SKX-NEXT: kmovd %k0, %eax
241 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
244 ; AVX512BW-LABEL: shuf_test1:
245 ; AVX512BW: ## %bb.0:
246 ; AVX512BW-NEXT: kmovd %edi, %k0
247 ; AVX512BW-NEXT: kshiftrw $8, %k0, %k0
248 ; AVX512BW-NEXT: kmovd %k0, %eax
249 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
250 ; AVX512BW-NEXT: retq
252 ; AVX512DQ-LABEL: shuf_test1:
253 ; AVX512DQ: ## %bb.0:
254 ; AVX512DQ-NEXT: kmovw %edi, %k0
255 ; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0
256 ; AVX512DQ-NEXT: kmovw %k0, %eax
257 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
258 ; AVX512DQ-NEXT: retq
260 ; X86-LABEL: shuf_test1:
262 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
264 %v1 = bitcast i16 %v to <16 x i1>
265 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
266 %mask1 = bitcast <8 x i1> %mask to i8
270 define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
271 ; KNL-LABEL: zext_test1:
273 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
274 ; KNL-NEXT: kshiftrw $5, %k0, %k0
275 ; KNL-NEXT: kmovw %k0, %eax
276 ; KNL-NEXT: andl $1, %eax
277 ; KNL-NEXT: vzeroupper
280 ; SKX-LABEL: zext_test1:
282 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
283 ; SKX-NEXT: kshiftrw $5, %k0, %k0
284 ; SKX-NEXT: kmovd %k0, %eax
285 ; SKX-NEXT: andl $1, %eax
286 ; SKX-NEXT: vzeroupper
289 ; AVX512BW-LABEL: zext_test1:
290 ; AVX512BW: ## %bb.0:
291 ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
292 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
293 ; AVX512BW-NEXT: kmovd %k0, %eax
294 ; AVX512BW-NEXT: andl $1, %eax
295 ; AVX512BW-NEXT: vzeroupper
296 ; AVX512BW-NEXT: retq
298 ; AVX512DQ-LABEL: zext_test1:
299 ; AVX512DQ: ## %bb.0:
300 ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
301 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
302 ; AVX512DQ-NEXT: kmovw %k0, %eax
303 ; AVX512DQ-NEXT: andl $1, %eax
304 ; AVX512DQ-NEXT: vzeroupper
305 ; AVX512DQ-NEXT: retq
307 ; X86-LABEL: zext_test1:
309 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
310 ; X86-NEXT: kshiftrw $5, %k0, %k0
311 ; X86-NEXT: kmovd %k0, %eax
312 ; X86-NEXT: andl $1, %eax
313 ; X86-NEXT: vzeroupper
315 %cmp_res = icmp ugt <16 x i32> %a, %b
316 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
317 %res = zext i1 %cmp_res.i1 to i32
321 define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
322 ; KNL-LABEL: zext_test2:
324 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
325 ; KNL-NEXT: kshiftrw $5, %k0, %k0
326 ; KNL-NEXT: kmovw %k0, %eax
327 ; KNL-NEXT: andl $1, %eax
328 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
329 ; KNL-NEXT: vzeroupper
332 ; SKX-LABEL: zext_test2:
334 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
335 ; SKX-NEXT: kshiftrw $5, %k0, %k0
336 ; SKX-NEXT: kmovd %k0, %eax
337 ; SKX-NEXT: andl $1, %eax
338 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
339 ; SKX-NEXT: vzeroupper
342 ; AVX512BW-LABEL: zext_test2:
343 ; AVX512BW: ## %bb.0:
344 ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
345 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
346 ; AVX512BW-NEXT: kmovd %k0, %eax
347 ; AVX512BW-NEXT: andl $1, %eax
348 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
349 ; AVX512BW-NEXT: vzeroupper
350 ; AVX512BW-NEXT: retq
352 ; AVX512DQ-LABEL: zext_test2:
353 ; AVX512DQ: ## %bb.0:
354 ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
355 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
356 ; AVX512DQ-NEXT: kmovw %k0, %eax
357 ; AVX512DQ-NEXT: andl $1, %eax
358 ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
359 ; AVX512DQ-NEXT: vzeroupper
360 ; AVX512DQ-NEXT: retq
362 ; X86-LABEL: zext_test2:
364 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
365 ; X86-NEXT: kshiftrw $5, %k0, %k0
366 ; X86-NEXT: kmovd %k0, %eax
367 ; X86-NEXT: andl $1, %eax
368 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
369 ; X86-NEXT: vzeroupper
371 %cmp_res = icmp ugt <16 x i32> %a, %b
372 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
373 %res = zext i1 %cmp_res.i1 to i16
377 define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
378 ; KNL-LABEL: zext_test3:
380 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
381 ; KNL-NEXT: kshiftrw $5, %k0, %k0
382 ; KNL-NEXT: kmovw %k0, %eax
383 ; KNL-NEXT: andb $1, %al
384 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
385 ; KNL-NEXT: vzeroupper
388 ; SKX-LABEL: zext_test3:
390 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
391 ; SKX-NEXT: kshiftrw $5, %k0, %k0
392 ; SKX-NEXT: kmovd %k0, %eax
393 ; SKX-NEXT: andb $1, %al
394 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
395 ; SKX-NEXT: vzeroupper
398 ; AVX512BW-LABEL: zext_test3:
399 ; AVX512BW: ## %bb.0:
400 ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
401 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
402 ; AVX512BW-NEXT: kmovd %k0, %eax
403 ; AVX512BW-NEXT: andb $1, %al
404 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
405 ; AVX512BW-NEXT: vzeroupper
406 ; AVX512BW-NEXT: retq
408 ; AVX512DQ-LABEL: zext_test3:
409 ; AVX512DQ: ## %bb.0:
410 ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
411 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
412 ; AVX512DQ-NEXT: kmovw %k0, %eax
413 ; AVX512DQ-NEXT: andb $1, %al
414 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
415 ; AVX512DQ-NEXT: vzeroupper
416 ; AVX512DQ-NEXT: retq
418 ; X86-LABEL: zext_test3:
420 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
421 ; X86-NEXT: kshiftrw $5, %k0, %k0
422 ; X86-NEXT: kmovd %k0, %eax
423 ; X86-NEXT: andb $1, %al
424 ; X86-NEXT: ## kill: def $al killed $al killed $eax
425 ; X86-NEXT: vzeroupper
427 %cmp_res = icmp ugt <16 x i32> %a, %b
428 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
429 %res = zext i1 %cmp_res.i1 to i8
433 define i8 @conv1(ptr %R) {
434 ; CHECK-LABEL: conv1:
435 ; CHECK: ## %bb.0: ## %entry
436 ; CHECK-NEXT: movb $-1, (%rdi)
437 ; CHECK-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
438 ; CHECK-NEXT: movb $-2, %al
442 ; X86: ## %bb.0: ## %entry
443 ; X86-NEXT: pushl %eax
444 ; X86-NEXT: .cfi_def_cfa_offset 8
445 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
446 ; X86-NEXT: movb $-1, (%eax)
447 ; X86-NEXT: movb $-2, {{[0-9]+}}(%esp)
448 ; X86-NEXT: movb $-2, %al
449 ; X86-NEXT: popl %ecx
452 store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, ptr %R
454 %maskPtr = alloca <8 x i1>
455 store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, ptr %maskPtr
456 %mask = load <8 x i1>, ptr %maskPtr
457 %mask_convert = bitcast <8 x i1> %mask to i8
461 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
464 ; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
465 ; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
466 ; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
467 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
468 ; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
469 ; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1 {%k1}
470 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
471 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
472 ; KNL-NEXT: vzeroupper
477 ; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k1
478 ; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k0 {%k1}
479 ; SKX-NEXT: vpmovm2d %k0, %xmm0
480 ; SKX-NEXT: vzeroupper
483 ; AVX512BW-LABEL: test4:
484 ; AVX512BW: ## %bb.0:
485 ; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
486 ; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
487 ; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
488 ; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
489 ; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
490 ; AVX512BW-NEXT: vpcmpleq %zmm1, %zmm0, %k1 {%k1}
491 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
492 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
493 ; AVX512BW-NEXT: vzeroupper
494 ; AVX512BW-NEXT: retq
496 ; AVX512DQ-LABEL: test4:
497 ; AVX512DQ: ## %bb.0:
498 ; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
499 ; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
500 ; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
501 ; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
502 ; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
503 ; AVX512DQ-NEXT: vpcmpleq %zmm1, %zmm0, %k0 {%k1}
504 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
505 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
506 ; AVX512DQ-NEXT: vzeroupper
507 ; AVX512DQ-NEXT: retq
511 ; X86-NEXT: vpcmpgtq %ymm3, %ymm2, %k1
512 ; X86-NEXT: vpcmpleq %ymm1, %ymm0, %k0 {%k1}
513 ; X86-NEXT: vpmovm2d %k0, %xmm0
514 ; X86-NEXT: vzeroupper
516 %x_gt_y = icmp sgt <4 x i64> %x, %y
517 %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
518 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
519 %resse = sext <4 x i1>%res to <4 x i32>
523 define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
526 ; KNL-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
527 ; KNL-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
528 ; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
529 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
530 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k1
531 ; KNL-NEXT: vpcmpleq %zmm3, %zmm2, %k1 {%k1}
532 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
533 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
534 ; KNL-NEXT: vzeroupper
539 ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
540 ; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k0 {%k1}
541 ; SKX-NEXT: vpmovm2q %k0, %xmm0
544 ; AVX512BW-LABEL: test5:
545 ; AVX512BW: ## %bb.0:
546 ; AVX512BW-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
547 ; AVX512BW-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
548 ; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
549 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
550 ; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k1
551 ; AVX512BW-NEXT: vpcmpleq %zmm3, %zmm2, %k1 {%k1}
552 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
553 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
554 ; AVX512BW-NEXT: vzeroupper
555 ; AVX512BW-NEXT: retq
557 ; AVX512DQ-LABEL: test5:
558 ; AVX512DQ: ## %bb.0:
559 ; AVX512DQ-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
560 ; AVX512DQ-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
561 ; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
562 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
563 ; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k1
564 ; AVX512DQ-NEXT: vpcmpleq %zmm3, %zmm2, %k0 {%k1}
565 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
566 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
567 ; AVX512DQ-NEXT: vzeroupper
568 ; AVX512DQ-NEXT: retq
572 ; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
573 ; X86-NEXT: vpcmpleq %xmm3, %xmm2, %k0 {%k1}
574 ; X86-NEXT: vpmovm2q %k0, %xmm0
576 %x_gt_y = icmp slt <2 x i64> %x, %y
577 %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
578 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
579 %resse = sext <2 x i1>%res to <2 x i64>
583 define void @test6(<16 x i1> %mask) {
584 ; CHECK-LABEL: test6:
585 ; CHECK: ## %bb.0: ## %allocas
586 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
587 ; CHECK-NEXT: vpmovmskb %xmm0, %eax
588 ; CHECK-NEXT: testl $21845, %eax ## imm = 0x5555
592 ; X86: ## %bb.0: ## %allocas
593 ; X86-NEXT: vpsllw $7, %xmm0, %xmm0
594 ; X86-NEXT: vpmovmskb %xmm0, %eax
595 ; X86-NEXT: testl $21845, %eax ## imm = 0x5555
598 %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
599 %b = bitcast <16 x i1> %a to i16
600 %c = icmp eq i16 %b, 0
601 br i1 %c, label %true, label %false
610 define void @test7(<8 x i1> %mask) {
612 ; KNL: ## %bb.0: ## %allocas
613 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
614 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
615 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
616 ; KNL-NEXT: kmovw %k0, %eax
617 ; KNL-NEXT: orb $85, %al
618 ; KNL-NEXT: vzeroupper
622 ; SKX: ## %bb.0: ## %allocas
623 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
624 ; SKX-NEXT: vpmovw2m %xmm0, %k0
625 ; SKX-NEXT: kmovd %k0, %eax
626 ; SKX-NEXT: orb $85, %al
629 ; AVX512BW-LABEL: test7:
630 ; AVX512BW: ## %bb.0: ## %allocas
631 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
632 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
633 ; AVX512BW-NEXT: kmovd %k0, %eax
634 ; AVX512BW-NEXT: orb $85, %al
635 ; AVX512BW-NEXT: vzeroupper
636 ; AVX512BW-NEXT: retq
638 ; AVX512DQ-LABEL: test7:
639 ; AVX512DQ: ## %bb.0: ## %allocas
640 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
641 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
642 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
643 ; AVX512DQ-NEXT: kmovw %k0, %eax
644 ; AVX512DQ-NEXT: orb $85, %al
645 ; AVX512DQ-NEXT: vzeroupper
646 ; AVX512DQ-NEXT: retq
649 ; X86: ## %bb.0: ## %allocas
650 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
651 ; X86-NEXT: vpmovw2m %xmm0, %k0
652 ; X86-NEXT: kmovd %k0, %eax
653 ; X86-NEXT: orb $85, %al
656 %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
657 %b = bitcast <8 x i1> %a to i8
658 %c = icmp eq i8 %b, 0
659 br i1 %c, label %true, label %false
668 define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
671 ; KNL-NEXT: cmpl %esi, %edi
672 ; KNL-NEXT: jg LBB17_1
673 ; KNL-NEXT: ## %bb.2:
674 ; KNL-NEXT: kxorw %k0, %k0, %k1
675 ; KNL-NEXT: jmp LBB17_3
677 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
678 ; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
680 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
681 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
682 ; KNL-NEXT: vzeroupper
687 ; SKX-NEXT: cmpl %esi, %edi
688 ; SKX-NEXT: jg LBB17_1
689 ; SKX-NEXT: ## %bb.2:
690 ; SKX-NEXT: kxorw %k0, %k0, %k0
691 ; SKX-NEXT: vpmovm2b %k0, %xmm0
692 ; SKX-NEXT: vzeroupper
695 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
696 ; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
697 ; SKX-NEXT: vpmovm2b %k0, %xmm0
698 ; SKX-NEXT: vzeroupper
701 ; AVX512BW-LABEL: test8:
702 ; AVX512BW: ## %bb.0:
703 ; AVX512BW-NEXT: cmpl %esi, %edi
704 ; AVX512BW-NEXT: jg LBB17_1
705 ; AVX512BW-NEXT: ## %bb.2:
706 ; AVX512BW-NEXT: kxorw %k0, %k0, %k0
707 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
708 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
709 ; AVX512BW-NEXT: vzeroupper
710 ; AVX512BW-NEXT: retq
711 ; AVX512BW-NEXT: LBB17_1:
712 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
713 ; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
714 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
715 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
716 ; AVX512BW-NEXT: vzeroupper
717 ; AVX512BW-NEXT: retq
719 ; AVX512DQ-LABEL: test8:
720 ; AVX512DQ: ## %bb.0:
721 ; AVX512DQ-NEXT: cmpl %esi, %edi
722 ; AVX512DQ-NEXT: jg LBB17_1
723 ; AVX512DQ-NEXT: ## %bb.2:
724 ; AVX512DQ-NEXT: kxorw %k0, %k0, %k0
725 ; AVX512DQ-NEXT: jmp LBB17_3
726 ; AVX512DQ-NEXT: LBB17_1:
727 ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
728 ; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
729 ; AVX512DQ-NEXT: LBB17_3:
730 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
731 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
732 ; AVX512DQ-NEXT: vzeroupper
733 ; AVX512DQ-NEXT: retq
737 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
738 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
739 ; X86-NEXT: jg LBB17_1
740 ; X86-NEXT: ## %bb.2:
741 ; X86-NEXT: kxorw %k0, %k0, %k0
742 ; X86-NEXT: vpmovm2b %k0, %xmm0
743 ; X86-NEXT: vzeroupper
746 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
747 ; X86-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
748 ; X86-NEXT: vpmovm2b %k0, %xmm0
749 ; X86-NEXT: vzeroupper
751 %cond = icmp sgt i32 %a1, %b1
752 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
753 %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
754 %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
755 %res = sext <16 x i1> %mix to <16 x i8>
759 define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
762 ; KNL-NEXT: cmpl %esi, %edi
763 ; KNL-NEXT: jg LBB18_1
764 ; KNL-NEXT: ## %bb.2:
765 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
766 ; KNL-NEXT: jmp LBB18_3
768 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
770 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
771 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
772 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
773 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
774 ; KNL-NEXT: vzeroupper
779 ; SKX-NEXT: cmpl %esi, %edi
780 ; SKX-NEXT: jg LBB18_1
781 ; SKX-NEXT: ## %bb.2:
782 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0
783 ; SKX-NEXT: jmp LBB18_3
785 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
787 ; SKX-NEXT: vpmovb2m %xmm0, %k0
788 ; SKX-NEXT: vpmovm2b %k0, %xmm0
791 ; AVX512BW-LABEL: test9:
792 ; AVX512BW: ## %bb.0:
793 ; AVX512BW-NEXT: cmpl %esi, %edi
794 ; AVX512BW-NEXT: jg LBB18_1
795 ; AVX512BW-NEXT: ## %bb.2:
796 ; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0
797 ; AVX512BW-NEXT: jmp LBB18_3
798 ; AVX512BW-NEXT: LBB18_1:
799 ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
800 ; AVX512BW-NEXT: LBB18_3:
801 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
802 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
803 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
804 ; AVX512BW-NEXT: vzeroupper
805 ; AVX512BW-NEXT: retq
807 ; AVX512DQ-LABEL: test9:
808 ; AVX512DQ: ## %bb.0:
809 ; AVX512DQ-NEXT: cmpl %esi, %edi
810 ; AVX512DQ-NEXT: jg LBB18_1
811 ; AVX512DQ-NEXT: ## %bb.2:
812 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
813 ; AVX512DQ-NEXT: jmp LBB18_3
814 ; AVX512DQ-NEXT: LBB18_1:
815 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
816 ; AVX512DQ-NEXT: LBB18_3:
817 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
818 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
819 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
820 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
821 ; AVX512DQ-NEXT: vzeroupper
822 ; AVX512DQ-NEXT: retq
826 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
827 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
828 ; X86-NEXT: jg LBB18_1
829 ; X86-NEXT: ## %bb.2:
830 ; X86-NEXT: vpsllw $7, %xmm1, %xmm0
831 ; X86-NEXT: jmp LBB18_3
833 ; X86-NEXT: vpsllw $7, %xmm0, %xmm0
835 ; X86-NEXT: vpmovb2m %xmm0, %k0
836 ; X86-NEXT: vpmovm2b %k0, %xmm0
838 %mask = icmp sgt i32 %a1, %b1
839 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
843 define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
846 ; KNL-NEXT: cmpl %esi, %edi
847 ; KNL-NEXT: jg LBB19_1
848 ; KNL-NEXT: ## %bb.2:
849 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm0
850 ; KNL-NEXT: jmp LBB19_3
852 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
854 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
855 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
856 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
857 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
858 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
859 ; KNL-NEXT: vzeroupper
864 ; SKX-NEXT: cmpl %esi, %edi
865 ; SKX-NEXT: jg LBB19_1
866 ; SKX-NEXT: ## %bb.2:
867 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm0
868 ; SKX-NEXT: jmp LBB19_3
870 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
872 ; SKX-NEXT: vpmovw2m %xmm0, %k0
873 ; SKX-NEXT: vpmovm2w %k0, %xmm0
876 ; AVX512BW-LABEL: test10:
877 ; AVX512BW: ## %bb.0:
878 ; AVX512BW-NEXT: cmpl %esi, %edi
879 ; AVX512BW-NEXT: jg LBB19_1
880 ; AVX512BW-NEXT: ## %bb.2:
881 ; AVX512BW-NEXT: vpsllw $15, %xmm1, %xmm0
882 ; AVX512BW-NEXT: jmp LBB19_3
883 ; AVX512BW-NEXT: LBB19_1:
884 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
885 ; AVX512BW-NEXT: LBB19_3:
886 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
887 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
888 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
889 ; AVX512BW-NEXT: vzeroupper
890 ; AVX512BW-NEXT: retq
892 ; AVX512DQ-LABEL: test10:
893 ; AVX512DQ: ## %bb.0:
894 ; AVX512DQ-NEXT: cmpl %esi, %edi
895 ; AVX512DQ-NEXT: jg LBB19_1
896 ; AVX512DQ-NEXT: ## %bb.2:
897 ; AVX512DQ-NEXT: vpmovsxwq %xmm1, %zmm0
898 ; AVX512DQ-NEXT: jmp LBB19_3
899 ; AVX512DQ-NEXT: LBB19_1:
900 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
901 ; AVX512DQ-NEXT: LBB19_3:
902 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
903 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
904 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
905 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
906 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
907 ; AVX512DQ-NEXT: vzeroupper
908 ; AVX512DQ-NEXT: retq
912 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
913 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
914 ; X86-NEXT: jg LBB19_1
915 ; X86-NEXT: ## %bb.2:
916 ; X86-NEXT: vpsllw $15, %xmm1, %xmm0
917 ; X86-NEXT: jmp LBB19_3
919 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
921 ; X86-NEXT: vpmovw2m %xmm0, %k0
922 ; X86-NEXT: vpmovm2w %k0, %xmm0
924 %mask = icmp sgt i32 %a1, %b1
925 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
929 define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
932 ; KNL-NEXT: cmpl %esi, %edi
933 ; KNL-NEXT: jg LBB20_1
934 ; KNL-NEXT: ## %bb.2:
935 ; KNL-NEXT: vpslld $31, %xmm1, %xmm0
936 ; KNL-NEXT: jmp LBB20_3
938 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
940 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
941 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
942 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
943 ; KNL-NEXT: vzeroupper
948 ; SKX-NEXT: cmpl %esi, %edi
949 ; SKX-NEXT: jg LBB20_1
950 ; SKX-NEXT: ## %bb.2:
951 ; SKX-NEXT: vpslld $31, %xmm1, %xmm0
952 ; SKX-NEXT: jmp LBB20_3
954 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
956 ; SKX-NEXT: vpmovd2m %xmm0, %k0
957 ; SKX-NEXT: vpmovm2d %k0, %xmm0
960 ; AVX512BW-LABEL: test11:
961 ; AVX512BW: ## %bb.0:
962 ; AVX512BW-NEXT: cmpl %esi, %edi
963 ; AVX512BW-NEXT: jg LBB20_1
964 ; AVX512BW-NEXT: ## %bb.2:
965 ; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0
966 ; AVX512BW-NEXT: jmp LBB20_3
967 ; AVX512BW-NEXT: LBB20_1:
968 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
969 ; AVX512BW-NEXT: LBB20_3:
970 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1
971 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
972 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
973 ; AVX512BW-NEXT: vzeroupper
974 ; AVX512BW-NEXT: retq
976 ; AVX512DQ-LABEL: test11:
977 ; AVX512DQ: ## %bb.0:
978 ; AVX512DQ-NEXT: cmpl %esi, %edi
979 ; AVX512DQ-NEXT: jg LBB20_1
980 ; AVX512DQ-NEXT: ## %bb.2:
981 ; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm0
982 ; AVX512DQ-NEXT: jmp LBB20_3
983 ; AVX512DQ-NEXT: LBB20_1:
984 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
985 ; AVX512DQ-NEXT: LBB20_3:
986 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
987 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
988 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
989 ; AVX512DQ-NEXT: vzeroupper
990 ; AVX512DQ-NEXT: retq
994 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
995 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
996 ; X86-NEXT: jg LBB20_1
997 ; X86-NEXT: ## %bb.2:
998 ; X86-NEXT: vpslld $31, %xmm1, %xmm0
999 ; X86-NEXT: jmp LBB20_3
1000 ; X86-NEXT: LBB20_1:
1001 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
1002 ; X86-NEXT: LBB20_3:
1003 ; X86-NEXT: vpmovd2m %xmm0, %k0
1004 ; X86-NEXT: vpmovm2d %k0, %xmm0
1006 %mask = icmp sgt i32 %a1, %b1
1007 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
1011 define i32 @test12(i32 %x, i32 %y) {
1012 ; CHECK-LABEL: test12:
1014 ; CHECK-NEXT: movl %edi, %eax
1017 ; X86-LABEL: test12:
1019 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1021 %a = bitcast i16 21845 to <16 x i1>
1022 %b = extractelement <16 x i1> %a, i32 0
1023 %c = select i1 %b, i32 %x, i32 %y
1027 define i32 @test13(i32 %x, i32 %y) {
1028 ; CHECK-LABEL: test13:
1030 ; CHECK-NEXT: movl %esi, %eax
1033 ; X86-LABEL: test13:
1035 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1037 %a = bitcast i16 21845 to <16 x i1>
1038 %b = extractelement <16 x i1> %a, i32 3
1039 %c = select i1 %b, i32 %x, i32 %y
1043 ; Make sure we don't crash on a large vector.
1044 define i32 @test13_crash(i32 %x, i32 %y) {
1045 ; CHECK-LABEL: test13_crash:
1047 ; CHECK-NEXT: movl %edi, %eax
1050 ; X86-LABEL: test13_crash:
1052 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1054 %a = bitcast i128 2184568686868686868686868686 to <128 x i1>
1055 %b = extractelement <128 x i1> %a, i32 3
1056 %c = select i1 %b, i32 %x, i32 %y
1060 define <4 x i1> @test14() {
1061 ; CHECK-LABEL: test14:
1063 ; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,0,1]
1066 ; X86-LABEL: test14:
1068 ; X86-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,0,1]
1070 %a = bitcast i16 21845 to <16 x i1>
1071 %b = extractelement <16 x i1> %a, i32 2
1072 %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
1076 define <16 x i1> @test15(i32 %x, i32 %y) {
1077 ; KNL-LABEL: test15:
1079 ; KNL-NEXT: cmpl %esi, %edi
1080 ; KNL-NEXT: movl $21845, %eax ## imm = 0x5555
1081 ; KNL-NEXT: movl $1, %ecx
1082 ; KNL-NEXT: cmovgl %eax, %ecx
1083 ; KNL-NEXT: kmovw %ecx, %k1
1084 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1085 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1086 ; KNL-NEXT: vzeroupper
1089 ; SKX-LABEL: test15:
1091 ; SKX-NEXT: cmpl %esi, %edi
1092 ; SKX-NEXT: movl $21845, %eax ## imm = 0x5555
1093 ; SKX-NEXT: movl $1, %ecx
1094 ; SKX-NEXT: cmovgl %eax, %ecx
1095 ; SKX-NEXT: kmovd %ecx, %k0
1096 ; SKX-NEXT: vpmovm2b %k0, %xmm0
1099 ; AVX512BW-LABEL: test15:
1100 ; AVX512BW: ## %bb.0:
1101 ; AVX512BW-NEXT: cmpl %esi, %edi
1102 ; AVX512BW-NEXT: movl $21845, %eax ## imm = 0x5555
1103 ; AVX512BW-NEXT: movl $1, %ecx
1104 ; AVX512BW-NEXT: cmovgl %eax, %ecx
1105 ; AVX512BW-NEXT: kmovd %ecx, %k0
1106 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
1107 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
1108 ; AVX512BW-NEXT: vzeroupper
1109 ; AVX512BW-NEXT: retq
1111 ; AVX512DQ-LABEL: test15:
1112 ; AVX512DQ: ## %bb.0:
1113 ; AVX512DQ-NEXT: cmpl %esi, %edi
1114 ; AVX512DQ-NEXT: movl $21845, %eax ## imm = 0x5555
1115 ; AVX512DQ-NEXT: movl $1, %ecx
1116 ; AVX512DQ-NEXT: cmovgl %eax, %ecx
1117 ; AVX512DQ-NEXT: kmovw %ecx, %k0
1118 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1119 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1120 ; AVX512DQ-NEXT: vzeroupper
1121 ; AVX512DQ-NEXT: retq
1123 ; X86-LABEL: test15:
1125 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1126 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
1127 ; X86-NEXT: movl $21845, %eax ## imm = 0x5555
1128 ; X86-NEXT: movl $1, %ecx
1129 ; X86-NEXT: cmovgl %eax, %ecx
1130 ; X86-NEXT: kmovd %ecx, %k0
1131 ; X86-NEXT: vpmovm2b %k0, %xmm0
1133 %a = bitcast i16 21845 to <16 x i1>
1134 %b = bitcast i16 1 to <16 x i1>
1135 %mask = icmp sgt i32 %x, %y
1136 %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
1140 define <64 x i8> @test16(i64 %x) {
1142 ; KNL-LABEL: test16:
1144 ; KNL-NEXT: movq %rdi, %rax
1145 ; KNL-NEXT: movl %edi, %ecx
1146 ; KNL-NEXT: kmovw %edi, %k0
1147 ; KNL-NEXT: shrq $32, %rdi
1148 ; KNL-NEXT: shrq $48, %rax
1149 ; KNL-NEXT: shrl $16, %ecx
1150 ; KNL-NEXT: kmovw %ecx, %k1
1151 ; KNL-NEXT: kmovw %eax, %k2
1152 ; KNL-NEXT: kmovw %edi, %k3
1153 ; KNL-NEXT: movw $-33, %ax
1154 ; KNL-NEXT: kmovw %eax, %k4
1155 ; KNL-NEXT: kandw %k4, %k0, %k0
1156 ; KNL-NEXT: movb $1, %al
1157 ; KNL-NEXT: kmovw %eax, %k4
1158 ; KNL-NEXT: kshiftlw $15, %k4, %k4
1159 ; KNL-NEXT: kshiftrw $10, %k4, %k4
1160 ; KNL-NEXT: korw %k4, %k0, %k4
1161 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
1162 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1163 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
1164 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1165 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1166 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z}
1167 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1168 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
1169 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
1170 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1171 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1174 ; SKX-LABEL: test16:
1176 ; SKX-NEXT: kmovq %rdi, %k0
1177 ; SKX-NEXT: movq $-33, %rax
1178 ; SKX-NEXT: kmovq %rax, %k1
1179 ; SKX-NEXT: kandq %k1, %k0, %k0
1180 ; SKX-NEXT: movb $1, %al
1181 ; SKX-NEXT: kmovd %eax, %k1
1182 ; SKX-NEXT: kshiftlq $63, %k1, %k1
1183 ; SKX-NEXT: kshiftrq $58, %k1, %k1
1184 ; SKX-NEXT: korq %k1, %k0, %k0
1185 ; SKX-NEXT: vpmovm2b %k0, %zmm0
1188 ; AVX512BW-LABEL: test16:
1189 ; AVX512BW: ## %bb.0:
1190 ; AVX512BW-NEXT: kmovq %rdi, %k0
1191 ; AVX512BW-NEXT: movq $-33, %rax
1192 ; AVX512BW-NEXT: kmovq %rax, %k1
1193 ; AVX512BW-NEXT: kandq %k1, %k0, %k0
1194 ; AVX512BW-NEXT: movb $1, %al
1195 ; AVX512BW-NEXT: kmovd %eax, %k1
1196 ; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
1197 ; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
1198 ; AVX512BW-NEXT: korq %k1, %k0, %k0
1199 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
1200 ; AVX512BW-NEXT: retq
1202 ; AVX512DQ-LABEL: test16:
1203 ; AVX512DQ: ## %bb.0:
1204 ; AVX512DQ-NEXT: movq %rdi, %rax
1205 ; AVX512DQ-NEXT: movl %edi, %ecx
1206 ; AVX512DQ-NEXT: kmovw %edi, %k1
1207 ; AVX512DQ-NEXT: shrq $32, %rdi
1208 ; AVX512DQ-NEXT: shrq $48, %rax
1209 ; AVX512DQ-NEXT: shrl $16, %ecx
1210 ; AVX512DQ-NEXT: kmovw %ecx, %k0
1211 ; AVX512DQ-NEXT: kmovw %eax, %k2
1212 ; AVX512DQ-NEXT: kmovw %edi, %k3
1213 ; AVX512DQ-NEXT: movw $-33, %ax
1214 ; AVX512DQ-NEXT: kmovw %eax, %k4
1215 ; AVX512DQ-NEXT: kandw %k4, %k1, %k1
1216 ; AVX512DQ-NEXT: movb $1, %al
1217 ; AVX512DQ-NEXT: kmovw %eax, %k4
1218 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
1219 ; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
1220 ; AVX512DQ-NEXT: korw %k4, %k1, %k1
1221 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0
1222 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1223 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
1224 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1225 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1226 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm1
1227 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1228 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm2
1229 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
1230 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1231 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1232 ; AVX512DQ-NEXT: retq
1234 ; X86-LABEL: test16:
1236 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
1237 ; X86-NEXT: kshiftrq $6, %k0, %k1
1238 ; X86-NEXT: kshiftlq $6, %k1, %k1
1239 ; X86-NEXT: kshiftlq $59, %k0, %k0
1240 ; X86-NEXT: kshiftrq $59, %k0, %k0
1241 ; X86-NEXT: korq %k1, %k0, %k0
1242 ; X86-NEXT: movb $1, %al
1243 ; X86-NEXT: kmovd %eax, %k1
1244 ; X86-NEXT: kshiftlq $63, %k1, %k1
1245 ; X86-NEXT: kshiftrq $58, %k1, %k1
1246 ; X86-NEXT: korq %k0, %k1, %k0
1247 ; X86-NEXT: vpmovm2b %k0, %zmm0
1249 %a = bitcast i64 %x to <64 x i1>
1250 %b = insertelement <64 x i1>%a, i1 true, i32 5
1251 %c = sext <64 x i1>%b to <64 x i8>
1255 define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
1257 ; KNL-LABEL: test17:
1259 ; KNL-NEXT: movq %rdi, %rax
1260 ; KNL-NEXT: movl %edi, %ecx
1261 ; KNL-NEXT: kmovw %edi, %k0
1262 ; KNL-NEXT: shrq $32, %rdi
1263 ; KNL-NEXT: shrq $48, %rax
1264 ; KNL-NEXT: shrl $16, %ecx
1265 ; KNL-NEXT: kmovw %ecx, %k1
1266 ; KNL-NEXT: kmovw %eax, %k2
1267 ; KNL-NEXT: kmovw %edi, %k3
1268 ; KNL-NEXT: cmpl %edx, %esi
1269 ; KNL-NEXT: setg %al
1270 ; KNL-NEXT: movw $-33, %cx
1271 ; KNL-NEXT: kmovw %ecx, %k4
1272 ; KNL-NEXT: kandw %k4, %k0, %k0
1273 ; KNL-NEXT: kmovw %eax, %k4
1274 ; KNL-NEXT: kshiftlw $15, %k4, %k4
1275 ; KNL-NEXT: kshiftrw $10, %k4, %k4
1276 ; KNL-NEXT: korw %k4, %k0, %k4
1277 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
1278 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1279 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
1280 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1281 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1282 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z}
1283 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1284 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
1285 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
1286 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1287 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1290 ; SKX-LABEL: test17:
1292 ; SKX-NEXT: kmovq %rdi, %k0
1293 ; SKX-NEXT: cmpl %edx, %esi
1294 ; SKX-NEXT: setg %al
1295 ; SKX-NEXT: movq $-33, %rcx
1296 ; SKX-NEXT: kmovq %rcx, %k1
1297 ; SKX-NEXT: kandq %k1, %k0, %k0
1298 ; SKX-NEXT: kmovd %eax, %k1
1299 ; SKX-NEXT: kshiftlq $63, %k1, %k1
1300 ; SKX-NEXT: kshiftrq $58, %k1, %k1
1301 ; SKX-NEXT: korq %k1, %k0, %k0
1302 ; SKX-NEXT: vpmovm2b %k0, %zmm0
1305 ; AVX512BW-LABEL: test17:
1306 ; AVX512BW: ## %bb.0:
1307 ; AVX512BW-NEXT: kmovq %rdi, %k0
1308 ; AVX512BW-NEXT: cmpl %edx, %esi
1309 ; AVX512BW-NEXT: setg %al
1310 ; AVX512BW-NEXT: movq $-33, %rcx
1311 ; AVX512BW-NEXT: kmovq %rcx, %k1
1312 ; AVX512BW-NEXT: kandq %k1, %k0, %k0
1313 ; AVX512BW-NEXT: kmovd %eax, %k1
1314 ; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
1315 ; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
1316 ; AVX512BW-NEXT: korq %k1, %k0, %k0
1317 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
1318 ; AVX512BW-NEXT: retq
1320 ; AVX512DQ-LABEL: test17:
1321 ; AVX512DQ: ## %bb.0:
1322 ; AVX512DQ-NEXT: movq %rdi, %rax
1323 ; AVX512DQ-NEXT: movl %edi, %ecx
1324 ; AVX512DQ-NEXT: kmovw %edi, %k1
1325 ; AVX512DQ-NEXT: shrq $32, %rdi
1326 ; AVX512DQ-NEXT: shrq $48, %rax
1327 ; AVX512DQ-NEXT: shrl $16, %ecx
1328 ; AVX512DQ-NEXT: kmovw %ecx, %k0
1329 ; AVX512DQ-NEXT: kmovw %eax, %k2
1330 ; AVX512DQ-NEXT: kmovw %edi, %k3
1331 ; AVX512DQ-NEXT: cmpl %edx, %esi
1332 ; AVX512DQ-NEXT: setg %al
1333 ; AVX512DQ-NEXT: movw $-33, %cx
1334 ; AVX512DQ-NEXT: kmovw %ecx, %k4
1335 ; AVX512DQ-NEXT: kandw %k4, %k1, %k1
1336 ; AVX512DQ-NEXT: kmovw %eax, %k4
1337 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
1338 ; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
1339 ; AVX512DQ-NEXT: korw %k4, %k1, %k1
1340 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0
1341 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1342 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
1343 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1344 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1345 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm1
1346 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1347 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm2
1348 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
1349 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1350 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1351 ; AVX512DQ-NEXT: retq
1353 ; X86-LABEL: test17:
1355 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
1356 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1357 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
1358 ; X86-NEXT: setg %al
1359 ; X86-NEXT: kshiftrq $6, %k0, %k1
1360 ; X86-NEXT: kshiftlq $6, %k1, %k1
1361 ; X86-NEXT: kshiftlq $59, %k0, %k0
1362 ; X86-NEXT: kshiftrq $59, %k0, %k0
1363 ; X86-NEXT: korq %k1, %k0, %k0
1364 ; X86-NEXT: kmovd %eax, %k1
1365 ; X86-NEXT: kshiftlq $63, %k1, %k1
1366 ; X86-NEXT: kshiftrq $58, %k1, %k1
1367 ; X86-NEXT: korq %k0, %k1, %k0
1368 ; X86-NEXT: vpmovm2b %k0, %zmm0
1370 %a = bitcast i64 %x to <64 x i1>
1371 %b = icmp sgt i32 %y, %z
1372 %c = insertelement <64 x i1>%a, i1 %b, i32 5
1373 %d = sext <64 x i1>%c to <64 x i8>
1377 define <8 x i1> @test18(i8 %a, i16 %y) {
1378 ; KNL-LABEL: test18:
1380 ; KNL-NEXT: kmovw %edi, %k0
1381 ; KNL-NEXT: kmovw %esi, %k1
1382 ; KNL-NEXT: kshiftrw $8, %k1, %k2
1383 ; KNL-NEXT: kshiftrw $9, %k1, %k1
1384 ; KNL-NEXT: movw $-65, %ax
1385 ; KNL-NEXT: kmovw %eax, %k3
1386 ; KNL-NEXT: kandw %k3, %k0, %k0
1387 ; KNL-NEXT: kshiftlw $6, %k1, %k1
1388 ; KNL-NEXT: korw %k1, %k0, %k0
1389 ; KNL-NEXT: kshiftlw $9, %k0, %k0
1390 ; KNL-NEXT: kshiftrw $9, %k0, %k0
1391 ; KNL-NEXT: kshiftlw $7, %k2, %k1
1392 ; KNL-NEXT: korw %k1, %k0, %k1
1393 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1394 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
1395 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
1396 ; KNL-NEXT: vzeroupper
1399 ; SKX-LABEL: test18:
1401 ; SKX-NEXT: kmovd %edi, %k0
1402 ; SKX-NEXT: kmovd %esi, %k1
1403 ; SKX-NEXT: kshiftrw $8, %k1, %k2
1404 ; SKX-NEXT: kshiftrw $9, %k1, %k1
1405 ; SKX-NEXT: movb $-65, %al
1406 ; SKX-NEXT: kmovd %eax, %k3
1407 ; SKX-NEXT: kandb %k3, %k0, %k0
1408 ; SKX-NEXT: kshiftlb $6, %k1, %k1
1409 ; SKX-NEXT: korb %k1, %k0, %k0
1410 ; SKX-NEXT: kshiftlb $1, %k0, %k0
1411 ; SKX-NEXT: kshiftrb $1, %k0, %k0
1412 ; SKX-NEXT: kshiftlb $7, %k2, %k1
1413 ; SKX-NEXT: korb %k1, %k0, %k0
1414 ; SKX-NEXT: vpmovm2w %k0, %xmm0
1417 ; AVX512BW-LABEL: test18:
1418 ; AVX512BW: ## %bb.0:
1419 ; AVX512BW-NEXT: kmovd %edi, %k0
1420 ; AVX512BW-NEXT: kmovd %esi, %k1
1421 ; AVX512BW-NEXT: kshiftrw $8, %k1, %k2
1422 ; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
1423 ; AVX512BW-NEXT: movw $-65, %ax
1424 ; AVX512BW-NEXT: kmovd %eax, %k3
1425 ; AVX512BW-NEXT: kandw %k3, %k0, %k0
1426 ; AVX512BW-NEXT: kshiftlw $6, %k1, %k1
1427 ; AVX512BW-NEXT: korw %k1, %k0, %k0
1428 ; AVX512BW-NEXT: kshiftlw $9, %k0, %k0
1429 ; AVX512BW-NEXT: kshiftrw $9, %k0, %k0
1430 ; AVX512BW-NEXT: kshiftlw $7, %k2, %k1
1431 ; AVX512BW-NEXT: korw %k1, %k0, %k0
1432 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
1433 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
1434 ; AVX512BW-NEXT: vzeroupper
1435 ; AVX512BW-NEXT: retq
1437 ; AVX512DQ-LABEL: test18:
1438 ; AVX512DQ: ## %bb.0:
1439 ; AVX512DQ-NEXT: kmovw %edi, %k0
1440 ; AVX512DQ-NEXT: kmovw %esi, %k1
1441 ; AVX512DQ-NEXT: kshiftrw $8, %k1, %k2
1442 ; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
1443 ; AVX512DQ-NEXT: movb $-65, %al
1444 ; AVX512DQ-NEXT: kmovw %eax, %k3
1445 ; AVX512DQ-NEXT: kandb %k3, %k0, %k0
1446 ; AVX512DQ-NEXT: kshiftlb $6, %k1, %k1
1447 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
1448 ; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
1449 ; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0
1450 ; AVX512DQ-NEXT: kshiftlb $7, %k2, %k1
1451 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
1452 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1453 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
1454 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
1455 ; AVX512DQ-NEXT: vzeroupper
1456 ; AVX512DQ-NEXT: retq
1458 ; X86-LABEL: test18:
1460 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
1461 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1462 ; X86-NEXT: kshiftrw $8, %k1, %k2
1463 ; X86-NEXT: kshiftrw $9, %k1, %k1
1464 ; X86-NEXT: movb $-65, %al
1465 ; X86-NEXT: kmovd %eax, %k3
1466 ; X86-NEXT: kandb %k3, %k0, %k0
1467 ; X86-NEXT: kshiftlb $6, %k1, %k1
1468 ; X86-NEXT: korb %k1, %k0, %k0
1469 ; X86-NEXT: kshiftlb $1, %k0, %k0
1470 ; X86-NEXT: kshiftrb $1, %k0, %k0
1471 ; X86-NEXT: kshiftlb $7, %k2, %k1
1472 ; X86-NEXT: korb %k1, %k0, %k0
1473 ; X86-NEXT: vpmovm2w %k0, %xmm0
1475 %b = bitcast i8 %a to <8 x i1>
1476 %b1 = bitcast i16 %y to <16 x i1>
1477 %el1 = extractelement <16 x i1>%b1, i32 8
1478 %el2 = extractelement <16 x i1>%b1, i32 9
1479 %c = insertelement <8 x i1>%b, i1 %el1, i32 7
1480 %d = insertelement <8 x i1>%c, i1 %el2, i32 6
1483 define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
1484 ; KNL-LABEL: test21:
1486 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2
1487 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
1488 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1489 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
1490 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
1491 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
1492 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
1493 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
1494 ; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0
1497 ; SKX-LABEL: test21:
1499 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
1500 ; SKX-NEXT: vpmovb2m %ymm1, %k1
1501 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1504 ; AVX512BW-LABEL: test21:
1505 ; AVX512BW: ## %bb.0:
1506 ; AVX512BW-NEXT: vpsllw $7, %ymm1, %ymm1
1507 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
1508 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1509 ; AVX512BW-NEXT: retq
1511 ; AVX512DQ-LABEL: test21:
1512 ; AVX512DQ: ## %bb.0:
1513 ; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2
1514 ; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
1515 ; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1516 ; AVX512DQ-NEXT: vpsllw $15, %ymm1, %ymm1
1517 ; AVX512DQ-NEXT: vpsraw $15, %ymm1, %ymm1
1518 ; AVX512DQ-NEXT: vpsllw $15, %ymm2, %ymm2
1519 ; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2
1520 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
1521 ; AVX512DQ-NEXT: vpandq %zmm0, %zmm1, %zmm0
1522 ; AVX512DQ-NEXT: retq
1524 ; X86-LABEL: test21:
1526 ; X86-NEXT: vpsllw $7, %ymm1, %ymm1
1527 ; X86-NEXT: vpmovb2m %ymm1, %k1
1528 ; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1530 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
1534 define void @test22(<4 x i1> %a, ptr %addr) {
1535 ; KNL-LABEL: test22:
1537 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1538 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1539 ; KNL-NEXT: kshiftlw $12, %k0, %k0
1540 ; KNL-NEXT: kshiftrw $12, %k0, %k0
1541 ; KNL-NEXT: kmovw %k0, %eax
1542 ; KNL-NEXT: movb %al, (%rdi)
1543 ; KNL-NEXT: vzeroupper
1546 ; SKX-LABEL: test22:
1548 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
1549 ; SKX-NEXT: vpmovd2m %xmm0, %k0
1550 ; SKX-NEXT: kmovb %k0, (%rdi)
1553 ; AVX512BW-LABEL: test22:
1554 ; AVX512BW: ## %bb.0:
1555 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
1556 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
1557 ; AVX512BW-NEXT: kshiftlw $12, %k0, %k0
1558 ; AVX512BW-NEXT: kshiftrw $12, %k0, %k0
1559 ; AVX512BW-NEXT: kmovd %k0, %eax
1560 ; AVX512BW-NEXT: movb %al, (%rdi)
1561 ; AVX512BW-NEXT: vzeroupper
1562 ; AVX512BW-NEXT: retq
1564 ; AVX512DQ-LABEL: test22:
1565 ; AVX512DQ: ## %bb.0:
1566 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1567 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1568 ; AVX512DQ-NEXT: kshiftlb $4, %k0, %k0
1569 ; AVX512DQ-NEXT: kshiftrb $4, %k0, %k0
1570 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1571 ; AVX512DQ-NEXT: vzeroupper
1572 ; AVX512DQ-NEXT: retq
1574 ; X86-LABEL: test22:
1576 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
1577 ; X86-NEXT: vpmovd2m %xmm0, %k0
1578 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1579 ; X86-NEXT: kmovb %k0, (%eax)
1581 store <4 x i1> %a, ptr %addr
1585 define void @test23(<2 x i1> %a, ptr %addr) {
1586 ; KNL-LABEL: test23:
1588 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1589 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1590 ; KNL-NEXT: kshiftlw $14, %k0, %k0
1591 ; KNL-NEXT: kshiftrw $14, %k0, %k0
1592 ; KNL-NEXT: kmovw %k0, %eax
1593 ; KNL-NEXT: movb %al, (%rdi)
1594 ; KNL-NEXT: vzeroupper
1597 ; SKX-LABEL: test23:
1599 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
1600 ; SKX-NEXT: vpmovq2m %xmm0, %k0
1601 ; SKX-NEXT: kmovb %k0, (%rdi)
1604 ; AVX512BW-LABEL: test23:
1605 ; AVX512BW: ## %bb.0:
1606 ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
1607 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
1608 ; AVX512BW-NEXT: kshiftlw $14, %k0, %k0
1609 ; AVX512BW-NEXT: kshiftrw $14, %k0, %k0
1610 ; AVX512BW-NEXT: kmovd %k0, %eax
1611 ; AVX512BW-NEXT: movb %al, (%rdi)
1612 ; AVX512BW-NEXT: vzeroupper
1613 ; AVX512BW-NEXT: retq
1615 ; AVX512DQ-LABEL: test23:
1616 ; AVX512DQ: ## %bb.0:
1617 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1618 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
1619 ; AVX512DQ-NEXT: kshiftlb $6, %k0, %k0
1620 ; AVX512DQ-NEXT: kshiftrb $6, %k0, %k0
1621 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1622 ; AVX512DQ-NEXT: vzeroupper
1623 ; AVX512DQ-NEXT: retq
1625 ; X86-LABEL: test23:
1627 ; X86-NEXT: vpsllq $63, %xmm0, %xmm0
1628 ; X86-NEXT: vpmovq2m %xmm0, %k0
1629 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1630 ; X86-NEXT: kmovb %k0, (%eax)
1632 store <2 x i1> %a, ptr %addr
1636 define void @store_v1i1(<1 x i1> %c , ptr %ptr) {
1637 ; KNL-LABEL: store_v1i1:
1639 ; KNL-NEXT: kmovw %edi, %k0
1640 ; KNL-NEXT: knotw %k0, %k0
1641 ; KNL-NEXT: kshiftlw $15, %k0, %k0
1642 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1643 ; KNL-NEXT: kmovw %k0, %eax
1644 ; KNL-NEXT: movb %al, (%rsi)
1647 ; SKX-LABEL: store_v1i1:
1649 ; SKX-NEXT: kmovd %edi, %k0
1650 ; SKX-NEXT: knotw %k0, %k0
1651 ; SKX-NEXT: kshiftlb $7, %k0, %k0
1652 ; SKX-NEXT: kshiftrb $7, %k0, %k0
1653 ; SKX-NEXT: kmovb %k0, (%rsi)
1656 ; AVX512BW-LABEL: store_v1i1:
1657 ; AVX512BW: ## %bb.0:
1658 ; AVX512BW-NEXT: kmovd %edi, %k0
1659 ; AVX512BW-NEXT: knotw %k0, %k0
1660 ; AVX512BW-NEXT: kshiftlw $15, %k0, %k0
1661 ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
1662 ; AVX512BW-NEXT: kmovd %k0, %eax
1663 ; AVX512BW-NEXT: movb %al, (%rsi)
1664 ; AVX512BW-NEXT: retq
1666 ; AVX512DQ-LABEL: store_v1i1:
1667 ; AVX512DQ: ## %bb.0:
1668 ; AVX512DQ-NEXT: kmovw %edi, %k0
1669 ; AVX512DQ-NEXT: knotw %k0, %k0
1670 ; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0
1671 ; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0
1672 ; AVX512DQ-NEXT: kmovb %k0, (%rsi)
1673 ; AVX512DQ-NEXT: retq
1675 ; X86-LABEL: store_v1i1:
1677 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1678 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1679 ; X86-NEXT: knotw %k0, %k0
1680 ; X86-NEXT: kshiftlb $7, %k0, %k0
1681 ; X86-NEXT: kshiftrb $7, %k0, %k0
1682 ; X86-NEXT: kmovb %k0, (%eax)
1684 %x = xor <1 x i1> %c, <i1 1>
1685 store <1 x i1> %x, ptr %ptr, align 4
1689 define void @store_v2i1(<2 x i1> %c , ptr %ptr) {
1690 ; KNL-LABEL: store_v2i1:
1692 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1693 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
1694 ; KNL-NEXT: kshiftlw $14, %k0, %k0
1695 ; KNL-NEXT: kshiftrw $14, %k0, %k0
1696 ; KNL-NEXT: kmovw %k0, %eax
1697 ; KNL-NEXT: movb %al, (%rdi)
1698 ; KNL-NEXT: vzeroupper
1701 ; SKX-LABEL: store_v2i1:
1703 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
1704 ; SKX-NEXT: vpmovq2m %xmm0, %k0
1705 ; SKX-NEXT: knotw %k0, %k0
1706 ; SKX-NEXT: kshiftlb $6, %k0, %k0
1707 ; SKX-NEXT: kshiftrb $6, %k0, %k0
1708 ; SKX-NEXT: kmovb %k0, (%rdi)
1711 ; AVX512BW-LABEL: store_v2i1:
1712 ; AVX512BW: ## %bb.0:
1713 ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
1714 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
1715 ; AVX512BW-NEXT: kshiftlw $14, %k0, %k0
1716 ; AVX512BW-NEXT: kshiftrw $14, %k0, %k0
1717 ; AVX512BW-NEXT: kmovd %k0, %eax
1718 ; AVX512BW-NEXT: movb %al, (%rdi)
1719 ; AVX512BW-NEXT: vzeroupper
1720 ; AVX512BW-NEXT: retq
1722 ; AVX512DQ-LABEL: store_v2i1:
1723 ; AVX512DQ: ## %bb.0:
1724 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1725 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
1726 ; AVX512DQ-NEXT: knotw %k0, %k0
1727 ; AVX512DQ-NEXT: kshiftlb $6, %k0, %k0
1728 ; AVX512DQ-NEXT: kshiftrb $6, %k0, %k0
1729 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1730 ; AVX512DQ-NEXT: vzeroupper
1731 ; AVX512DQ-NEXT: retq
1733 ; X86-LABEL: store_v2i1:
1735 ; X86-NEXT: vpsllq $63, %xmm0, %xmm0
1736 ; X86-NEXT: vpmovq2m %xmm0, %k0
1737 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1738 ; X86-NEXT: knotw %k0, %k0
1739 ; X86-NEXT: kshiftlb $6, %k0, %k0
1740 ; X86-NEXT: kshiftrb $6, %k0, %k0
1741 ; X86-NEXT: kmovb %k0, (%eax)
1743 %x = xor <2 x i1> %c, <i1 1, i1 1>
1744 store <2 x i1> %x, ptr %ptr, align 4
1748 define void @store_v4i1(<4 x i1> %c , ptr %ptr) {
1749 ; KNL-LABEL: store_v4i1:
1751 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1752 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
1753 ; KNL-NEXT: kshiftlw $12, %k0, %k0
1754 ; KNL-NEXT: kshiftrw $12, %k0, %k0
1755 ; KNL-NEXT: kmovw %k0, %eax
1756 ; KNL-NEXT: movb %al, (%rdi)
1757 ; KNL-NEXT: vzeroupper
1760 ; SKX-LABEL: store_v4i1:
1762 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
1763 ; SKX-NEXT: vpmovd2m %xmm0, %k0
1764 ; SKX-NEXT: knotw %k0, %k0
1765 ; SKX-NEXT: kshiftlb $4, %k0, %k0
1766 ; SKX-NEXT: kshiftrb $4, %k0, %k0
1767 ; SKX-NEXT: kmovb %k0, (%rdi)
1770 ; AVX512BW-LABEL: store_v4i1:
1771 ; AVX512BW: ## %bb.0:
1772 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
1773 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
1774 ; AVX512BW-NEXT: kshiftlw $12, %k0, %k0
1775 ; AVX512BW-NEXT: kshiftrw $12, %k0, %k0
1776 ; AVX512BW-NEXT: kmovd %k0, %eax
1777 ; AVX512BW-NEXT: movb %al, (%rdi)
1778 ; AVX512BW-NEXT: vzeroupper
1779 ; AVX512BW-NEXT: retq
1781 ; AVX512DQ-LABEL: store_v4i1:
1782 ; AVX512DQ: ## %bb.0:
1783 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1784 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1785 ; AVX512DQ-NEXT: knotw %k0, %k0
1786 ; AVX512DQ-NEXT: kshiftlb $4, %k0, %k0
1787 ; AVX512DQ-NEXT: kshiftrb $4, %k0, %k0
1788 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1789 ; AVX512DQ-NEXT: vzeroupper
1790 ; AVX512DQ-NEXT: retq
1792 ; X86-LABEL: store_v4i1:
1794 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
1795 ; X86-NEXT: vpmovd2m %xmm0, %k0
1796 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1797 ; X86-NEXT: knotw %k0, %k0
1798 ; X86-NEXT: kshiftlb $4, %k0, %k0
1799 ; X86-NEXT: kshiftrb $4, %k0, %k0
1800 ; X86-NEXT: kmovb %k0, (%eax)
1802 %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
1803 store <4 x i1> %x, ptr %ptr, align 4
1807 define void @store_v8i1(<8 x i1> %c , ptr %ptr) {
1808 ; KNL-LABEL: store_v8i1:
1810 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1811 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1812 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
1813 ; KNL-NEXT: kmovw %k0, %eax
1814 ; KNL-NEXT: movb %al, (%rdi)
1815 ; KNL-NEXT: vzeroupper
1818 ; SKX-LABEL: store_v8i1:
1820 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1821 ; SKX-NEXT: vpmovw2m %xmm0, %k0
1822 ; SKX-NEXT: knotb %k0, %k0
1823 ; SKX-NEXT: kmovb %k0, (%rdi)
1826 ; AVX512BW-LABEL: store_v8i1:
1827 ; AVX512BW: ## %bb.0:
1828 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
1829 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
1830 ; AVX512BW-NEXT: knotw %k0, %k0
1831 ; AVX512BW-NEXT: kmovd %k0, %eax
1832 ; AVX512BW-NEXT: movb %al, (%rdi)
1833 ; AVX512BW-NEXT: vzeroupper
1834 ; AVX512BW-NEXT: retq
1836 ; AVX512DQ-LABEL: store_v8i1:
1837 ; AVX512DQ: ## %bb.0:
1838 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
1839 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
1840 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
1841 ; AVX512DQ-NEXT: knotb %k0, %k0
1842 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
1843 ; AVX512DQ-NEXT: vzeroupper
1844 ; AVX512DQ-NEXT: retq
1846 ; X86-LABEL: store_v8i1:
1848 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
1849 ; X86-NEXT: vpmovw2m %xmm0, %k0
1850 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1851 ; X86-NEXT: knotb %k0, %k0
1852 ; X86-NEXT: kmovb %k0, (%eax)
1854 %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
1855 store <8 x i1> %x, ptr %ptr, align 4
1859 define void @store_v16i1(<16 x i1> %c , ptr %ptr) {
1860 ; KNL-LABEL: store_v16i1:
1862 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1863 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1864 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
1865 ; KNL-NEXT: kmovw %k0, (%rdi)
1866 ; KNL-NEXT: vzeroupper
1869 ; SKX-LABEL: store_v16i1:
1871 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
1872 ; SKX-NEXT: vpmovb2m %xmm0, %k0
1873 ; SKX-NEXT: knotw %k0, %k0
1874 ; SKX-NEXT: kmovw %k0, (%rdi)
1877 ; AVX512BW-LABEL: store_v16i1:
1878 ; AVX512BW: ## %bb.0:
1879 ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
1880 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
1881 ; AVX512BW-NEXT: knotw %k0, %k0
1882 ; AVX512BW-NEXT: kmovw %k0, (%rdi)
1883 ; AVX512BW-NEXT: vzeroupper
1884 ; AVX512BW-NEXT: retq
1886 ; AVX512DQ-LABEL: store_v16i1:
1887 ; AVX512DQ: ## %bb.0:
1888 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
1889 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
1890 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1891 ; AVX512DQ-NEXT: knotw %k0, %k0
1892 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
1893 ; AVX512DQ-NEXT: vzeroupper
1894 ; AVX512DQ-NEXT: retq
1896 ; X86-LABEL: store_v16i1:
1898 ; X86-NEXT: vpsllw $7, %xmm0, %xmm0
1899 ; X86-NEXT: vpmovb2m %xmm0, %k0
1900 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1901 ; X86-NEXT: knotw %k0, %k0
1902 ; X86-NEXT: kmovw %k0, (%eax)
1904 %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
1905 store <16 x i1> %x, ptr %ptr, align 4
1920 @f1.v = internal unnamed_addr global i1 false, align 4
1922 define void @f1(i32 %c) {
1924 ; CHECK: ## %bb.0: ## %entry
1925 ; CHECK-NEXT: movzbl _f1.v(%rip), %edi
1926 ; CHECK-NEXT: xorl $1, %edi
1927 ; CHECK-NEXT: movb %dil, _f1.v(%rip)
1928 ; CHECK-NEXT: jmp _f2 ## TAILCALL
1931 ; X86: ## %bb.0: ## %entry
1932 ; X86-NEXT: subl $12, %esp
1933 ; X86-NEXT: .cfi_def_cfa_offset 16
1934 ; X86-NEXT: movzbl _f1.v, %eax
1935 ; X86-NEXT: xorl $1, %eax
1936 ; X86-NEXT: movb %al, _f1.v
1937 ; X86-NEXT: movl %eax, (%esp)
1938 ; X86-NEXT: calll _f2
1939 ; X86-NEXT: addl $12, %esp
1942 %.b1 = load i1, ptr @f1.v, align 4
1943 %not..b1 = xor i1 %.b1, true
1944 store i1 %not..b1, ptr @f1.v, align 4
1945 %0 = zext i1 %not..b1 to i32
1946 tail call void @f2(i32 %0) #2
1950 declare void @f2(i32) #1
1952 define void @store_i16_i1(i16 %x, ptr%y) {
1953 ; CHECK-LABEL: store_i16_i1:
1955 ; CHECK-NEXT: andl $1, %edi
1956 ; CHECK-NEXT: movb %dil, (%rsi)
1959 ; X86-LABEL: store_i16_i1:
1961 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1962 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
1963 ; X86-NEXT: andl $1, %ecx
1964 ; X86-NEXT: movb %cl, (%eax)
1966 %c = trunc i16 %x to i1
1971 define void @store_i8_i1(i8 %x, ptr%y) {
1972 ; CHECK-LABEL: store_i8_i1:
1974 ; CHECK-NEXT: andl $1, %edi
1975 ; CHECK-NEXT: movb %dil, (%rsi)
1978 ; X86-LABEL: store_i8_i1:
1980 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1981 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1982 ; X86-NEXT: andb $1, %cl
1983 ; X86-NEXT: movb %cl, (%eax)
1985 %c = trunc i8 %x to i1
1990 define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
1991 ; KNL-LABEL: test_build_vec_v32i1:
1993 ; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1996 ; SKX-LABEL: test_build_vec_v32i1:
1998 ; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2001 ; AVX512BW-LABEL: test_build_vec_v32i1:
2002 ; AVX512BW: ## %bb.0:
2003 ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2004 ; AVX512BW-NEXT: retq
2006 ; AVX512DQ-LABEL: test_build_vec_v32i1:
2007 ; AVX512DQ: ## %bb.0:
2008 ; AVX512DQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2009 ; AVX512DQ-NEXT: retq
2011 ; X86-LABEL: test_build_vec_v32i1:
2013 ; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
2015 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
2019 define <32 x i16> @test_build_vec_v32i1_optsize(<32 x i16> %x) optsize {
2020 ; KNL-LABEL: test_build_vec_v32i1_optsize:
2022 ; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2025 ; SKX-LABEL: test_build_vec_v32i1_optsize:
2027 ; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495
2028 ; SKX-NEXT: kmovd %eax, %k1
2029 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
2032 ; AVX512BW-LABEL: test_build_vec_v32i1_optsize:
2033 ; AVX512BW: ## %bb.0:
2034 ; AVX512BW-NEXT: movl $1497715861, %eax ## imm = 0x59455495
2035 ; AVX512BW-NEXT: kmovd %eax, %k1
2036 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
2037 ; AVX512BW-NEXT: retq
2039 ; AVX512DQ-LABEL: test_build_vec_v32i1_optsize:
2040 ; AVX512DQ: ## %bb.0:
2041 ; AVX512DQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2042 ; AVX512DQ-NEXT: retq
2044 ; X86-LABEL: test_build_vec_v32i1_optsize:
2046 ; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495
2047 ; X86-NEXT: kmovd %eax, %k1
2048 ; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
2050 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
2054 define <32 x i16> @test_build_vec_v32i1_pgso(<32 x i16> %x) !prof !14 {
2055 ; KNL-LABEL: test_build_vec_v32i1_pgso:
2057 ; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2060 ; SKX-LABEL: test_build_vec_v32i1_pgso:
2062 ; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495
2063 ; SKX-NEXT: kmovd %eax, %k1
2064 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
2067 ; AVX512BW-LABEL: test_build_vec_v32i1_pgso:
2068 ; AVX512BW: ## %bb.0:
2069 ; AVX512BW-NEXT: movl $1497715861, %eax ## imm = 0x59455495
2070 ; AVX512BW-NEXT: kmovd %eax, %k1
2071 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
2072 ; AVX512BW-NEXT: retq
2074 ; AVX512DQ-LABEL: test_build_vec_v32i1_pgso:
2075 ; AVX512DQ: ## %bb.0:
2076 ; AVX512DQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2077 ; AVX512DQ-NEXT: retq
2079 ; X86-LABEL: test_build_vec_v32i1_pgso:
2081 ; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495
2082 ; X86-NEXT: kmovd %eax, %k1
2083 ; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
2085 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
2089 define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
2090 ; KNL-LABEL: test_build_vec_v64i1:
2092 ; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2095 ; SKX-LABEL: test_build_vec_v64i1:
2097 ; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2100 ; AVX512BW-LABEL: test_build_vec_v64i1:
2101 ; AVX512BW: ## %bb.0:
2102 ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2103 ; AVX512BW-NEXT: retq
2105 ; AVX512DQ-LABEL: test_build_vec_v64i1:
2106 ; AVX512DQ: ## %bb.0:
2107 ; AVX512DQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2108 ; AVX512DQ-NEXT: retq
2110 ; X86-LABEL: test_build_vec_v64i1:
2112 ; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
2114 %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
2118 define void @ktest_1(<8 x double> %in, ptr %base) {
2119 ; KNL-LABEL: ktest_1:
2121 ; KNL-NEXT: vcmpgtpd (%rdi), %zmm0, %k1
2122 ; KNL-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
2123 ; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
2124 ; KNL-NEXT: kmovw %k0, %eax
2125 ; KNL-NEXT: testb %al, %al
2126 ; KNL-NEXT: je LBB44_2
2127 ; KNL-NEXT: ## %bb.1: ## %L1
2128 ; KNL-NEXT: vmovapd %zmm0, (%rdi)
2129 ; KNL-NEXT: vzeroupper
2131 ; KNL-NEXT: LBB44_2: ## %L2
2132 ; KNL-NEXT: vmovapd %zmm0, 8(%rdi)
2133 ; KNL-NEXT: vzeroupper
2136 ; SKX-LABEL: ktest_1:
2138 ; SKX-NEXT: vcmpgtpd (%rdi), %zmm0, %k1
2139 ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
2140 ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0
2141 ; SKX-NEXT: ktestb %k0, %k1
2142 ; SKX-NEXT: je LBB44_2
2143 ; SKX-NEXT: ## %bb.1: ## %L1
2144 ; SKX-NEXT: vmovapd %zmm0, (%rdi)
2145 ; SKX-NEXT: vzeroupper
2147 ; SKX-NEXT: LBB44_2: ## %L2
2148 ; SKX-NEXT: vmovapd %zmm0, 8(%rdi)
2149 ; SKX-NEXT: vzeroupper
2152 ; AVX512BW-LABEL: ktest_1:
2153 ; AVX512BW: ## %bb.0:
2154 ; AVX512BW-NEXT: vcmpgtpd (%rdi), %zmm0, %k1
2155 ; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
2156 ; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
2157 ; AVX512BW-NEXT: kmovd %k0, %eax
2158 ; AVX512BW-NEXT: testb %al, %al
2159 ; AVX512BW-NEXT: je LBB44_2
2160 ; AVX512BW-NEXT: ## %bb.1: ## %L1
2161 ; AVX512BW-NEXT: vmovapd %zmm0, (%rdi)
2162 ; AVX512BW-NEXT: vzeroupper
2163 ; AVX512BW-NEXT: retq
2164 ; AVX512BW-NEXT: LBB44_2: ## %L2
2165 ; AVX512BW-NEXT: vmovapd %zmm0, 8(%rdi)
2166 ; AVX512BW-NEXT: vzeroupper
2167 ; AVX512BW-NEXT: retq
2169 ; AVX512DQ-LABEL: ktest_1:
2170 ; AVX512DQ: ## %bb.0:
2171 ; AVX512DQ-NEXT: vcmpgtpd (%rdi), %zmm0, %k1
2172 ; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
2173 ; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0
2174 ; AVX512DQ-NEXT: ktestb %k0, %k1
2175 ; AVX512DQ-NEXT: je LBB44_2
2176 ; AVX512DQ-NEXT: ## %bb.1: ## %L1
2177 ; AVX512DQ-NEXT: vmovapd %zmm0, (%rdi)
2178 ; AVX512DQ-NEXT: vzeroupper
2179 ; AVX512DQ-NEXT: retq
2180 ; AVX512DQ-NEXT: LBB44_2: ## %L2
2181 ; AVX512DQ-NEXT: vmovapd %zmm0, 8(%rdi)
2182 ; AVX512DQ-NEXT: vzeroupper
2183 ; AVX512DQ-NEXT: retq
2185 ; X86-LABEL: ktest_1:
2187 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2188 ; X86-NEXT: vcmpgtpd (%eax), %zmm0, %k1
2189 ; X86-NEXT: vmovupd 8(%eax), %zmm1 {%k1} {z}
2190 ; X86-NEXT: vcmpltpd %zmm1, %zmm0, %k0
2191 ; X86-NEXT: ktestb %k0, %k1
2192 ; X86-NEXT: je LBB44_2
2193 ; X86-NEXT: ## %bb.1: ## %L1
2194 ; X86-NEXT: vmovapd %zmm0, (%eax)
2195 ; X86-NEXT: vzeroupper
2197 ; X86-NEXT: LBB44_2: ## %L2
2198 ; X86-NEXT: vmovapd %zmm0, 8(%eax)
2199 ; X86-NEXT: vzeroupper
2201 %addr2 = getelementptr double, ptr %base, i64 1
2204 %val1 = load <8 x double>, ptr%base, align 1
2205 %val2 = load <8 x double>, ptr%addr2, align 1
2207 %sel1 = fcmp ogt <8 x double>%in, %val1
2208 %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer
2209 %sel2 = fcmp olt <8 x double> %in, %val3
2210 %sel3 = and <8 x i1> %sel1, %sel2
2212 %int_sel3 = bitcast <8 x i1> %sel3 to i8
2213 %res = icmp eq i8 %int_sel3, zeroinitializer
2214 br i1 %res, label %L2, label %L1
2216 store <8 x double> %in, ptr %base
2219 store <8 x double> %in, ptr %addr2
2225 define void @ktest_2(<32 x float> %in, ptr %base) {
2227 ; KNL-LABEL: ktest_2:
2229 ; KNL-NEXT: vcmpgtps (%rdi), %zmm0, %k1
2230 ; KNL-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2
2231 ; KNL-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
2232 ; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
2233 ; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0
2234 ; KNL-NEXT: vcmpltps %zmm2, %zmm1, %k3
2235 ; KNL-NEXT: korw %k3, %k2, %k2
2236 ; KNL-NEXT: korw %k0, %k1, %k0
2237 ; KNL-NEXT: kortestw %k2, %k0
2238 ; KNL-NEXT: je LBB45_2
2239 ; KNL-NEXT: ## %bb.1: ## %L1
2240 ; KNL-NEXT: vmovaps %zmm0, (%rdi)
2241 ; KNL-NEXT: vmovaps %zmm1, 64(%rdi)
2242 ; KNL-NEXT: vzeroupper
2244 ; KNL-NEXT: LBB45_2: ## %L2
2245 ; KNL-NEXT: vmovaps %zmm0, 4(%rdi)
2246 ; KNL-NEXT: vmovaps %zmm1, 68(%rdi)
2247 ; KNL-NEXT: vzeroupper
2250 ; SKX-LABEL: ktest_2:
2252 ; SKX-NEXT: vcmpgtps (%rdi), %zmm0, %k1
2253 ; SKX-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2
2254 ; SKX-NEXT: kunpckwd %k1, %k2, %k0
2255 ; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
2256 ; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
2257 ; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1
2258 ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2
2259 ; SKX-NEXT: kunpckwd %k1, %k2, %k1
2260 ; SKX-NEXT: kortestd %k1, %k0
2261 ; SKX-NEXT: je LBB45_2
2262 ; SKX-NEXT: ## %bb.1: ## %L1
2263 ; SKX-NEXT: vmovaps %zmm0, (%rdi)
2264 ; SKX-NEXT: vmovaps %zmm1, 64(%rdi)
2265 ; SKX-NEXT: vzeroupper
2267 ; SKX-NEXT: LBB45_2: ## %L2
2268 ; SKX-NEXT: vmovaps %zmm0, 4(%rdi)
2269 ; SKX-NEXT: vmovaps %zmm1, 68(%rdi)
2270 ; SKX-NEXT: vzeroupper
2273 ; AVX512BW-LABEL: ktest_2:
2274 ; AVX512BW: ## %bb.0:
2275 ; AVX512BW-NEXT: vcmpgtps (%rdi), %zmm0, %k1
2276 ; AVX512BW-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2
2277 ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k0
2278 ; AVX512BW-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
2279 ; AVX512BW-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
2280 ; AVX512BW-NEXT: vcmpltps %zmm3, %zmm0, %k1
2281 ; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2
2282 ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1
2283 ; AVX512BW-NEXT: kortestd %k1, %k0
2284 ; AVX512BW-NEXT: je LBB45_2
2285 ; AVX512BW-NEXT: ## %bb.1: ## %L1
2286 ; AVX512BW-NEXT: vmovaps %zmm0, (%rdi)
2287 ; AVX512BW-NEXT: vmovaps %zmm1, 64(%rdi)
2288 ; AVX512BW-NEXT: vzeroupper
2289 ; AVX512BW-NEXT: retq
2290 ; AVX512BW-NEXT: LBB45_2: ## %L2
2291 ; AVX512BW-NEXT: vmovaps %zmm0, 4(%rdi)
2292 ; AVX512BW-NEXT: vmovaps %zmm1, 68(%rdi)
2293 ; AVX512BW-NEXT: vzeroupper
2294 ; AVX512BW-NEXT: retq
2296 ; AVX512DQ-LABEL: ktest_2:
2297 ; AVX512DQ: ## %bb.0:
2298 ; AVX512DQ-NEXT: vcmpgtps (%rdi), %zmm0, %k1
2299 ; AVX512DQ-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2
2300 ; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
2301 ; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
2302 ; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm0, %k0
2303 ; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm1, %k3
2304 ; AVX512DQ-NEXT: korw %k3, %k2, %k2
2305 ; AVX512DQ-NEXT: korw %k0, %k1, %k0
2306 ; AVX512DQ-NEXT: kortestw %k2, %k0
2307 ; AVX512DQ-NEXT: je LBB45_2
2308 ; AVX512DQ-NEXT: ## %bb.1: ## %L1
2309 ; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi)
2310 ; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi)
2311 ; AVX512DQ-NEXT: vzeroupper
2312 ; AVX512DQ-NEXT: retq
2313 ; AVX512DQ-NEXT: LBB45_2: ## %L2
2314 ; AVX512DQ-NEXT: vmovaps %zmm0, 4(%rdi)
2315 ; AVX512DQ-NEXT: vmovaps %zmm1, 68(%rdi)
2316 ; AVX512DQ-NEXT: vzeroupper
2317 ; AVX512DQ-NEXT: retq
2319 ; X86-LABEL: ktest_2:
2321 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2322 ; X86-NEXT: vcmpgtps (%eax), %zmm0, %k1
2323 ; X86-NEXT: vcmpgtps 64(%eax), %zmm1, %k2
2324 ; X86-NEXT: kunpckwd %k1, %k2, %k0
2325 ; X86-NEXT: vmovups 68(%eax), %zmm2 {%k2} {z}
2326 ; X86-NEXT: vmovups 4(%eax), %zmm3 {%k1} {z}
2327 ; X86-NEXT: vcmpltps %zmm3, %zmm0, %k1
2328 ; X86-NEXT: vcmpltps %zmm2, %zmm1, %k2
2329 ; X86-NEXT: kunpckwd %k1, %k2, %k1
2330 ; X86-NEXT: kortestd %k1, %k0
2331 ; X86-NEXT: je LBB45_2
2332 ; X86-NEXT: ## %bb.1: ## %L1
2333 ; X86-NEXT: vmovaps %zmm0, (%eax)
2334 ; X86-NEXT: vmovaps %zmm1, 64(%eax)
2335 ; X86-NEXT: vzeroupper
2337 ; X86-NEXT: LBB45_2: ## %L2
2338 ; X86-NEXT: vmovaps %zmm0, 4(%eax)
2339 ; X86-NEXT: vmovaps %zmm1, 68(%eax)
2340 ; X86-NEXT: vzeroupper
2342 %addr2 = getelementptr float, ptr %base, i64 1
2345 %val1 = load <32 x float>, ptr%base, align 1
2346 %val2 = load <32 x float>, ptr%addr2, align 1
2348 %sel1 = fcmp ogt <32 x float>%in, %val1
2349 %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer
2350 %sel2 = fcmp olt <32 x float> %in, %val3
2351 %sel3 = or <32 x i1> %sel1, %sel2
2353 %int_sel3 = bitcast <32 x i1> %sel3 to i32
2354 %res = icmp eq i32 %int_sel3, zeroinitializer
2355 br i1 %res, label %L2, label %L1
2357 store <32 x float> %in, ptr %base
2360 store <32 x float> %in, ptr %addr2
2366 define <8 x i64> @load_8i1(ptr %a) {
2367 ; KNL-LABEL: load_8i1:
2369 ; KNL-NEXT: movzbl (%rdi), %eax
2370 ; KNL-NEXT: kmovw %eax, %k1
2371 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2374 ; SKX-LABEL: load_8i1:
2376 ; SKX-NEXT: kmovb (%rdi), %k0
2377 ; SKX-NEXT: vpmovm2q %k0, %zmm0
2380 ; AVX512BW-LABEL: load_8i1:
2381 ; AVX512BW: ## %bb.0:
2382 ; AVX512BW-NEXT: movzbl (%rdi), %eax
2383 ; AVX512BW-NEXT: kmovd %eax, %k1
2384 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2385 ; AVX512BW-NEXT: retq
2387 ; AVX512DQ-LABEL: load_8i1:
2388 ; AVX512DQ: ## %bb.0:
2389 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
2390 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
2391 ; AVX512DQ-NEXT: retq
2393 ; X86-LABEL: load_8i1:
2395 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2396 ; X86-NEXT: kmovb (%eax), %k0
2397 ; X86-NEXT: vpmovm2q %k0, %zmm0
2399 %b = load <8 x i1>, ptr %a
2400 %c = sext <8 x i1> %b to <8 x i64>
2404 define <16 x i32> @load_16i1(ptr %a) {
2405 ; KNL-LABEL: load_16i1:
2407 ; KNL-NEXT: kmovw (%rdi), %k1
2408 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2411 ; SKX-LABEL: load_16i1:
2413 ; SKX-NEXT: kmovw (%rdi), %k0
2414 ; SKX-NEXT: vpmovm2d %k0, %zmm0
2417 ; AVX512BW-LABEL: load_16i1:
2418 ; AVX512BW: ## %bb.0:
2419 ; AVX512BW-NEXT: kmovw (%rdi), %k1
2420 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2421 ; AVX512BW-NEXT: retq
2423 ; AVX512DQ-LABEL: load_16i1:
2424 ; AVX512DQ: ## %bb.0:
2425 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
2426 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2427 ; AVX512DQ-NEXT: retq
2429 ; X86-LABEL: load_16i1:
2431 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2432 ; X86-NEXT: kmovw (%eax), %k0
2433 ; X86-NEXT: vpmovm2d %k0, %zmm0
2435 %b = load <16 x i1>, ptr %a
2436 %c = sext <16 x i1> %b to <16 x i32>
2440 define <2 x i16> @load_2i1(ptr %a) {
2441 ; KNL-LABEL: load_2i1:
2443 ; KNL-NEXT: movzbl (%rdi), %eax
2444 ; KNL-NEXT: kmovw %eax, %k1
2445 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2446 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
2447 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
2448 ; KNL-NEXT: vzeroupper
2451 ; SKX-LABEL: load_2i1:
2453 ; SKX-NEXT: kmovb (%rdi), %k0
2454 ; SKX-NEXT: vpmovm2w %k0, %xmm0
2457 ; AVX512BW-LABEL: load_2i1:
2458 ; AVX512BW: ## %bb.0:
2459 ; AVX512BW-NEXT: movzbl (%rdi), %eax
2460 ; AVX512BW-NEXT: kmovd %eax, %k0
2461 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
2462 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
2463 ; AVX512BW-NEXT: vzeroupper
2464 ; AVX512BW-NEXT: retq
2466 ; AVX512DQ-LABEL: load_2i1:
2467 ; AVX512DQ: ## %bb.0:
2468 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
2469 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2470 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2471 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
2472 ; AVX512DQ-NEXT: vzeroupper
2473 ; AVX512DQ-NEXT: retq
2475 ; X86-LABEL: load_2i1:
2477 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2478 ; X86-NEXT: kmovb (%eax), %k0
2479 ; X86-NEXT: vpmovm2w %k0, %xmm0
2481 %b = load <2 x i1>, ptr %a
2482 %c = sext <2 x i1> %b to <2 x i16>
2486 define <4 x i16> @load_4i1(ptr %a) {
2487 ; KNL-LABEL: load_4i1:
2489 ; KNL-NEXT: movzbl (%rdi), %eax
2490 ; KNL-NEXT: kmovw %eax, %k1
2491 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2492 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
2493 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
2494 ; KNL-NEXT: vzeroupper
2497 ; SKX-LABEL: load_4i1:
2499 ; SKX-NEXT: kmovb (%rdi), %k0
2500 ; SKX-NEXT: vpmovm2w %k0, %xmm0
2503 ; AVX512BW-LABEL: load_4i1:
2504 ; AVX512BW: ## %bb.0:
2505 ; AVX512BW-NEXT: movzbl (%rdi), %eax
2506 ; AVX512BW-NEXT: kmovd %eax, %k0
2507 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
2508 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
2509 ; AVX512BW-NEXT: vzeroupper
2510 ; AVX512BW-NEXT: retq
2512 ; AVX512DQ-LABEL: load_4i1:
2513 ; AVX512DQ: ## %bb.0:
2514 ; AVX512DQ-NEXT: kmovb (%rdi), %k0
2515 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2516 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2517 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
2518 ; AVX512DQ-NEXT: vzeroupper
2519 ; AVX512DQ-NEXT: retq
2521 ; X86-LABEL: load_4i1:
2523 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2524 ; X86-NEXT: kmovb (%eax), %k0
2525 ; X86-NEXT: vpmovm2w %k0, %xmm0
2527 %b = load <4 x i1>, ptr %a
2528 %c = sext <4 x i1> %b to <4 x i16>
2532 define <32 x i16> @load_32i1(ptr %a) {
2533 ; KNL-LABEL: load_32i1:
2535 ; KNL-NEXT: kmovw (%rdi), %k1
2536 ; KNL-NEXT: kmovw 2(%rdi), %k2
2537 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2538 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
2539 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
2540 ; KNL-NEXT: vpmovdw %zmm1, %ymm1
2541 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
2544 ; SKX-LABEL: load_32i1:
2546 ; SKX-NEXT: kmovd (%rdi), %k0
2547 ; SKX-NEXT: vpmovm2w %k0, %zmm0
2550 ; AVX512BW-LABEL: load_32i1:
2551 ; AVX512BW: ## %bb.0:
2552 ; AVX512BW-NEXT: kmovd (%rdi), %k0
2553 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
2554 ; AVX512BW-NEXT: retq
2556 ; AVX512DQ-LABEL: load_32i1:
2557 ; AVX512DQ: ## %bb.0:
2558 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
2559 ; AVX512DQ-NEXT: kmovw 2(%rdi), %k1
2560 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2561 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
2562 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm1
2563 ; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1
2564 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
2565 ; AVX512DQ-NEXT: retq
2567 ; X86-LABEL: load_32i1:
2569 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2570 ; X86-NEXT: kmovd (%eax), %k0
2571 ; X86-NEXT: vpmovm2w %k0, %zmm0
2573 %b = load <32 x i1>, ptr %a
2574 %c = sext <32 x i1> %b to <32 x i16>
2578 define <64 x i8> @load_64i1(ptr %a) {
2579 ; KNL-LABEL: load_64i1:
2581 ; KNL-NEXT: kmovw (%rdi), %k1
2582 ; KNL-NEXT: kmovw 2(%rdi), %k2
2583 ; KNL-NEXT: kmovw 4(%rdi), %k3
2584 ; KNL-NEXT: kmovw 6(%rdi), %k4
2585 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
2586 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
2587 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z}
2588 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
2589 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2590 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
2591 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
2592 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
2593 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
2594 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2595 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
2598 ; SKX-LABEL: load_64i1:
2600 ; SKX-NEXT: kmovq (%rdi), %k0
2601 ; SKX-NEXT: vpmovm2b %k0, %zmm0
2604 ; AVX512BW-LABEL: load_64i1:
2605 ; AVX512BW: ## %bb.0:
2606 ; AVX512BW-NEXT: kmovq (%rdi), %k0
2607 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
2608 ; AVX512BW-NEXT: retq
2610 ; AVX512DQ-LABEL: load_64i1:
2611 ; AVX512DQ: ## %bb.0:
2612 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
2613 ; AVX512DQ-NEXT: kmovw 2(%rdi), %k1
2614 ; AVX512DQ-NEXT: kmovw 4(%rdi), %k2
2615 ; AVX512DQ-NEXT: kmovw 6(%rdi), %k3
2616 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm0
2617 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
2618 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm1
2619 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
2620 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2621 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
2622 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
2623 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2
2624 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
2625 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2626 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
2627 ; AVX512DQ-NEXT: retq
2629 ; X86-LABEL: load_64i1:
2631 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2632 ; X86-NEXT: kmovq (%eax), %k0
2633 ; X86-NEXT: vpmovm2b %k0, %zmm0
2635 %b = load <64 x i1>, ptr %a
2636 %c = sext <64 x i1> %b to <64 x i8>
2640 define void @store_8i1(ptr %a, <8 x i1> %v) {
2641 ; KNL-LABEL: store_8i1:
2643 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
2644 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
2645 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
2646 ; KNL-NEXT: kmovw %k0, %eax
2647 ; KNL-NEXT: movb %al, (%rdi)
2648 ; KNL-NEXT: vzeroupper
2651 ; SKX-LABEL: store_8i1:
2653 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2654 ; SKX-NEXT: vpmovw2m %xmm0, %k0
2655 ; SKX-NEXT: kmovb %k0, (%rdi)
2658 ; AVX512BW-LABEL: store_8i1:
2659 ; AVX512BW: ## %bb.0:
2660 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
2661 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
2662 ; AVX512BW-NEXT: kmovd %k0, %eax
2663 ; AVX512BW-NEXT: movb %al, (%rdi)
2664 ; AVX512BW-NEXT: vzeroupper
2665 ; AVX512BW-NEXT: retq
2667 ; AVX512DQ-LABEL: store_8i1:
2668 ; AVX512DQ: ## %bb.0:
2669 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
2670 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
2671 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
2672 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
2673 ; AVX512DQ-NEXT: vzeroupper
2674 ; AVX512DQ-NEXT: retq
2676 ; X86-LABEL: store_8i1:
2678 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
2679 ; X86-NEXT: vpmovw2m %xmm0, %k0
2680 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2681 ; X86-NEXT: kmovb %k0, (%eax)
2683 store <8 x i1> %v, ptr %a
2687 define void @store_8i1_1(ptr %a, <8 x i16> %v) {
2688 ; KNL-LABEL: store_8i1_1:
2690 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
2691 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
2692 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
2693 ; KNL-NEXT: kmovw %k0, %eax
2694 ; KNL-NEXT: movb %al, (%rdi)
2695 ; KNL-NEXT: vzeroupper
2698 ; SKX-LABEL: store_8i1_1:
2700 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2701 ; SKX-NEXT: vpmovw2m %xmm0, %k0
2702 ; SKX-NEXT: kmovb %k0, (%rdi)
2705 ; AVX512BW-LABEL: store_8i1_1:
2706 ; AVX512BW: ## %bb.0:
2707 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
2708 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
2709 ; AVX512BW-NEXT: kmovd %k0, %eax
2710 ; AVX512BW-NEXT: movb %al, (%rdi)
2711 ; AVX512BW-NEXT: vzeroupper
2712 ; AVX512BW-NEXT: retq
2714 ; AVX512DQ-LABEL: store_8i1_1:
2715 ; AVX512DQ: ## %bb.0:
2716 ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
2717 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
2718 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
2719 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
2720 ; AVX512DQ-NEXT: vzeroupper
2721 ; AVX512DQ-NEXT: retq
2723 ; X86-LABEL: store_8i1_1:
2725 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2726 ; X86-NEXT: vpsllw $15, %xmm0, %xmm0
2727 ; X86-NEXT: vpmovw2m %xmm0, %k0
2728 ; X86-NEXT: kmovb %k0, (%eax)
2730 %v1 = trunc <8 x i16> %v to <8 x i1>
2731 store <8 x i1> %v1, ptr %a
2735 define void @store_16i1(ptr %a, <16 x i1> %v) {
2736 ; KNL-LABEL: store_16i1:
2738 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
2739 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2740 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2741 ; KNL-NEXT: kmovw %k0, (%rdi)
2742 ; KNL-NEXT: vzeroupper
2745 ; SKX-LABEL: store_16i1:
2747 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
2748 ; SKX-NEXT: vpmovb2m %xmm0, %k0
2749 ; SKX-NEXT: kmovw %k0, (%rdi)
2752 ; AVX512BW-LABEL: store_16i1:
2753 ; AVX512BW: ## %bb.0:
2754 ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
2755 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
2756 ; AVX512BW-NEXT: kmovw %k0, (%rdi)
2757 ; AVX512BW-NEXT: vzeroupper
2758 ; AVX512BW-NEXT: retq
2760 ; AVX512DQ-LABEL: store_16i1:
2761 ; AVX512DQ: ## %bb.0:
2762 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
2763 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2764 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2765 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
2766 ; AVX512DQ-NEXT: vzeroupper
2767 ; AVX512DQ-NEXT: retq
2769 ; X86-LABEL: store_16i1:
2771 ; X86-NEXT: vpsllw $7, %xmm0, %xmm0
2772 ; X86-NEXT: vpmovb2m %xmm0, %k0
2773 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2774 ; X86-NEXT: kmovw %k0, (%eax)
2776 store <16 x i1> %v, ptr %a
2780 define void @store_32i1(ptr %a, <32 x i1> %v) {
2781 ; KNL-LABEL: store_32i1:
2783 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
2784 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2785 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2786 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
2787 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
2788 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2789 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
2790 ; KNL-NEXT: kmovw %k1, 2(%rdi)
2791 ; KNL-NEXT: kmovw %k0, (%rdi)
2792 ; KNL-NEXT: vzeroupper
2795 ; SKX-LABEL: store_32i1:
2797 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
2798 ; SKX-NEXT: vpmovb2m %ymm0, %k0
2799 ; SKX-NEXT: kmovd %k0, (%rdi)
2800 ; SKX-NEXT: vzeroupper
2803 ; AVX512BW-LABEL: store_32i1:
2804 ; AVX512BW: ## %bb.0:
2805 ; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
2806 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
2807 ; AVX512BW-NEXT: kmovd %k0, (%rdi)
2808 ; AVX512BW-NEXT: vzeroupper
2809 ; AVX512BW-NEXT: retq
2811 ; AVX512DQ-LABEL: store_32i1:
2812 ; AVX512DQ: ## %bb.0:
2813 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1
2814 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
2815 ; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0
2816 ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
2817 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
2818 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2819 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
2820 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
2821 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
2822 ; AVX512DQ-NEXT: vzeroupper
2823 ; AVX512DQ-NEXT: retq
2825 ; X86-LABEL: store_32i1:
2827 ; X86-NEXT: vpsllw $7, %ymm0, %ymm0
2828 ; X86-NEXT: vpmovb2m %ymm0, %k0
2829 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2830 ; X86-NEXT: kmovd %k0, (%eax)
2831 ; X86-NEXT: vzeroupper
2833 store <32 x i1> %v, ptr %a
2837 define void @store_32i1_1(ptr %a, <32 x i16> %v) {
2838 ; KNL-LABEL: store_32i1_1:
2840 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm1
2841 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2842 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2843 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
2844 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
2845 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2846 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
2847 ; KNL-NEXT: kmovw %k1, 2(%rdi)
2848 ; KNL-NEXT: kmovw %k0, (%rdi)
2849 ; KNL-NEXT: vzeroupper
2852 ; SKX-LABEL: store_32i1_1:
2854 ; SKX-NEXT: vpsllw $15, %zmm0, %zmm0
2855 ; SKX-NEXT: vpmovw2m %zmm0, %k0
2856 ; SKX-NEXT: kmovd %k0, (%rdi)
2857 ; SKX-NEXT: vzeroupper
2860 ; AVX512BW-LABEL: store_32i1_1:
2861 ; AVX512BW: ## %bb.0:
2862 ; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0
2863 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
2864 ; AVX512BW-NEXT: kmovd %k0, (%rdi)
2865 ; AVX512BW-NEXT: vzeroupper
2866 ; AVX512BW-NEXT: retq
2868 ; AVX512DQ-LABEL: store_32i1_1:
2869 ; AVX512DQ: ## %bb.0:
2870 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm1
2871 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
2872 ; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0
2873 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
2874 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
2875 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2876 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
2877 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
2878 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
2879 ; AVX512DQ-NEXT: vzeroupper
2880 ; AVX512DQ-NEXT: retq
2882 ; X86-LABEL: store_32i1_1:
2884 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2885 ; X86-NEXT: vpsllw $15, %zmm0, %zmm0
2886 ; X86-NEXT: vpmovw2m %zmm0, %k0
2887 ; X86-NEXT: kmovd %k0, (%eax)
2888 ; X86-NEXT: vzeroupper
2890 %v1 = trunc <32 x i16> %v to <32 x i1>
2891 store <32 x i1> %v1, ptr %a
2896 define void @store_64i1(ptr %a, <64 x i1> %v) {
2898 ; KNL-LABEL: store_64i1:
2900 ; KNL-NEXT: andl $1, %esi
2901 ; KNL-NEXT: kmovw %esi, %k0
2902 ; KNL-NEXT: kmovw %edx, %k1
2903 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2904 ; KNL-NEXT: kshiftrw $14, %k1, %k1
2905 ; KNL-NEXT: korw %k1, %k0, %k0
2906 ; KNL-NEXT: movw $-5, %ax
2907 ; KNL-NEXT: kmovw %eax, %k1
2908 ; KNL-NEXT: kandw %k1, %k0, %k0
2909 ; KNL-NEXT: kmovw %k1, %k7
2910 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
2911 ; KNL-NEXT: kmovw %ecx, %k1
2912 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2913 ; KNL-NEXT: kshiftrw $13, %k1, %k1
2914 ; KNL-NEXT: korw %k1, %k0, %k0
2915 ; KNL-NEXT: movw $-9, %ax
2916 ; KNL-NEXT: kmovw %eax, %k1
2917 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
2918 ; KNL-NEXT: kandw %k1, %k0, %k0
2919 ; KNL-NEXT: kmovw %r8d, %k1
2920 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2921 ; KNL-NEXT: kshiftrw $12, %k1, %k1
2922 ; KNL-NEXT: korw %k1, %k0, %k0
2923 ; KNL-NEXT: movw $-17, %ax
2924 ; KNL-NEXT: kmovw %eax, %k1
2925 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
2926 ; KNL-NEXT: kandw %k1, %k0, %k0
2927 ; KNL-NEXT: kmovw %r9d, %k1
2928 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2929 ; KNL-NEXT: kshiftrw $11, %k1, %k1
2930 ; KNL-NEXT: korw %k1, %k0, %k0
2931 ; KNL-NEXT: movw $-33, %ax
2932 ; KNL-NEXT: kmovw %eax, %k1
2933 ; KNL-NEXT: kandw %k1, %k0, %k0
2934 ; KNL-NEXT: kmovw %k1, %k2
2935 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
2936 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2937 ; KNL-NEXT: kmovw %eax, %k1
2938 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2939 ; KNL-NEXT: kshiftrw $10, %k1, %k1
2940 ; KNL-NEXT: korw %k1, %k0, %k0
2941 ; KNL-NEXT: movw $-65, %ax
2942 ; KNL-NEXT: kmovw %eax, %k1
2943 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
2944 ; KNL-NEXT: kandw %k1, %k0, %k0
2945 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2946 ; KNL-NEXT: kmovw %eax, %k1
2947 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2948 ; KNL-NEXT: kshiftrw $9, %k1, %k1
2949 ; KNL-NEXT: korw %k1, %k0, %k0
2950 ; KNL-NEXT: movw $-129, %ax
2951 ; KNL-NEXT: kmovw %eax, %k1
2952 ; KNL-NEXT: kandw %k1, %k0, %k0
2953 ; KNL-NEXT: kmovw %k1, %k3
2954 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
2955 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2956 ; KNL-NEXT: kmovw %eax, %k1
2957 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2958 ; KNL-NEXT: kshiftrw $8, %k1, %k1
2959 ; KNL-NEXT: korw %k1, %k0, %k0
2960 ; KNL-NEXT: movw $-257, %ax ## imm = 0xFEFF
2961 ; KNL-NEXT: kmovw %eax, %k1
2962 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
2963 ; KNL-NEXT: kandw %k1, %k0, %k0
2964 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2965 ; KNL-NEXT: kmovw %eax, %k1
2966 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2967 ; KNL-NEXT: kshiftrw $7, %k1, %k1
2968 ; KNL-NEXT: korw %k1, %k0, %k0
2969 ; KNL-NEXT: movw $-513, %ax ## imm = 0xFDFF
2970 ; KNL-NEXT: kmovw %eax, %k1
2971 ; KNL-NEXT: kandw %k1, %k0, %k0
2972 ; KNL-NEXT: kmovw %k1, %k4
2973 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
2974 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2975 ; KNL-NEXT: kmovw %eax, %k1
2976 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2977 ; KNL-NEXT: kshiftrw $6, %k1, %k1
2978 ; KNL-NEXT: korw %k1, %k0, %k0
2979 ; KNL-NEXT: movw $-1025, %ax ## imm = 0xFBFF
2980 ; KNL-NEXT: kmovw %eax, %k1
2981 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
2982 ; KNL-NEXT: kandw %k1, %k0, %k0
2983 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2984 ; KNL-NEXT: kmovw %eax, %k1
2985 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2986 ; KNL-NEXT: kshiftrw $5, %k1, %k1
2987 ; KNL-NEXT: korw %k1, %k0, %k0
2988 ; KNL-NEXT: movw $-2049, %ax ## imm = 0xF7FF
2989 ; KNL-NEXT: kmovw %eax, %k5
2990 ; KNL-NEXT: kandw %k5, %k0, %k0
2991 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2992 ; KNL-NEXT: kmovw %eax, %k1
2993 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2994 ; KNL-NEXT: kshiftrw $4, %k1, %k1
2995 ; KNL-NEXT: korw %k1, %k0, %k0
2996 ; KNL-NEXT: movw $-4097, %ax ## imm = 0xEFFF
2997 ; KNL-NEXT: kmovw %eax, %k1
2998 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
2999 ; KNL-NEXT: kandw %k1, %k0, %k0
3000 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3001 ; KNL-NEXT: kmovw %eax, %k1
3002 ; KNL-NEXT: kshiftlw $15, %k1, %k1
3003 ; KNL-NEXT: kshiftrw $3, %k1, %k1
3004 ; KNL-NEXT: korw %k1, %k0, %k0
3005 ; KNL-NEXT: movw $-8193, %ax ## imm = 0xDFFF
3006 ; KNL-NEXT: kmovw %eax, %k1
3007 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3008 ; KNL-NEXT: kandw %k1, %k0, %k0
3009 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3010 ; KNL-NEXT: kmovw %eax, %k1
3011 ; KNL-NEXT: kshiftlw $15, %k1, %k1
3012 ; KNL-NEXT: kshiftrw $2, %k1, %k1
3013 ; KNL-NEXT: korw %k1, %k0, %k0
3014 ; KNL-NEXT: movw $-16385, %ax ## imm = 0xBFFF
3015 ; KNL-NEXT: kmovw %eax, %k1
3016 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3017 ; KNL-NEXT: kandw %k1, %k0, %k0
3018 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3019 ; KNL-NEXT: kmovw %eax, %k6
3020 ; KNL-NEXT: kshiftlw $14, %k6, %k6
3021 ; KNL-NEXT: korw %k6, %k0, %k0
3022 ; KNL-NEXT: kshiftlw $1, %k0, %k0
3023 ; KNL-NEXT: kshiftrw $1, %k0, %k0
3024 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3025 ; KNL-NEXT: kmovw %eax, %k6
3026 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3027 ; KNL-NEXT: korw %k6, %k0, %k0
3028 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3029 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3030 ; KNL-NEXT: andl $1, %eax
3031 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
3032 ; KNL-NEXT: kmovw %ecx, %k0
3033 ; KNL-NEXT: kshiftlw $15, %k0, %k0
3034 ; KNL-NEXT: kshiftrw $14, %k0, %k0
3035 ; KNL-NEXT: kmovw %eax, %k6
3036 ; KNL-NEXT: korw %k0, %k6, %k0
3037 ; KNL-NEXT: kandw %k7, %k0, %k0
3038 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3039 ; KNL-NEXT: kmovw %eax, %k6
3040 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3041 ; KNL-NEXT: kshiftrw $13, %k6, %k6
3042 ; KNL-NEXT: korw %k6, %k0, %k0
3043 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
3044 ; KNL-NEXT: kandw %k7, %k0, %k0
3045 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3046 ; KNL-NEXT: kmovw %eax, %k6
3047 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3048 ; KNL-NEXT: kshiftrw $12, %k6, %k6
3049 ; KNL-NEXT: korw %k6, %k0, %k0
3050 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
3051 ; KNL-NEXT: kandw %k1, %k0, %k0
3052 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3053 ; KNL-NEXT: kmovw %eax, %k6
3054 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3055 ; KNL-NEXT: kshiftrw $11, %k6, %k6
3056 ; KNL-NEXT: korw %k6, %k0, %k0
3057 ; KNL-NEXT: kandw %k2, %k0, %k0
3058 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3059 ; KNL-NEXT: kmovw %eax, %k6
3060 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3061 ; KNL-NEXT: kshiftrw $10, %k6, %k6
3062 ; KNL-NEXT: korw %k6, %k0, %k0
3063 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
3064 ; KNL-NEXT: kandw %k2, %k0, %k0
3065 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3066 ; KNL-NEXT: kmovw %eax, %k6
3067 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3068 ; KNL-NEXT: kshiftrw $9, %k6, %k6
3069 ; KNL-NEXT: korw %k6, %k0, %k0
3070 ; KNL-NEXT: kandw %k3, %k0, %k0
3071 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3072 ; KNL-NEXT: kmovw %eax, %k6
3073 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3074 ; KNL-NEXT: kshiftrw $8, %k6, %k6
3075 ; KNL-NEXT: korw %k6, %k0, %k0
3076 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
3077 ; KNL-NEXT: kandw %k3, %k0, %k0
3078 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3079 ; KNL-NEXT: kmovw %eax, %k6
3080 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3081 ; KNL-NEXT: kshiftrw $7, %k6, %k6
3082 ; KNL-NEXT: korw %k6, %k0, %k0
3083 ; KNL-NEXT: kandw %k4, %k0, %k0
3084 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3085 ; KNL-NEXT: kmovw %eax, %k6
3086 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3087 ; KNL-NEXT: kshiftrw $6, %k6, %k6
3088 ; KNL-NEXT: korw %k6, %k0, %k0
3089 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
3090 ; KNL-NEXT: kandw %k4, %k0, %k0
3091 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3092 ; KNL-NEXT: kmovw %eax, %k6
3093 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3094 ; KNL-NEXT: kshiftrw $5, %k6, %k6
3095 ; KNL-NEXT: korw %k6, %k0, %k0
3096 ; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3097 ; KNL-NEXT: kandw %k5, %k0, %k0
3098 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3099 ; KNL-NEXT: kmovw %eax, %k6
3100 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3101 ; KNL-NEXT: kshiftrw $4, %k6, %k6
3102 ; KNL-NEXT: korw %k6, %k0, %k0
3103 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
3104 ; KNL-NEXT: kandw %k6, %k0, %k0
3105 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3106 ; KNL-NEXT: kmovw %eax, %k6
3107 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3108 ; KNL-NEXT: kshiftrw $3, %k6, %k6
3109 ; KNL-NEXT: korw %k6, %k0, %k0
3110 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
3111 ; KNL-NEXT: kandw %k6, %k0, %k0
3112 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3113 ; KNL-NEXT: kmovw %eax, %k6
3114 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3115 ; KNL-NEXT: kshiftrw $2, %k6, %k6
3116 ; KNL-NEXT: korw %k6, %k0, %k0
3117 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
3118 ; KNL-NEXT: kandw %k6, %k0, %k0
3119 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3120 ; KNL-NEXT: kmovw %eax, %k6
3121 ; KNL-NEXT: kshiftlw $14, %k6, %k6
3122 ; KNL-NEXT: korw %k6, %k0, %k0
3123 ; KNL-NEXT: kshiftlw $1, %k0, %k0
3124 ; KNL-NEXT: kshiftrw $1, %k0, %k0
3125 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3126 ; KNL-NEXT: kmovw %eax, %k6
3127 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3128 ; KNL-NEXT: korw %k6, %k0, %k0
3129 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3130 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3131 ; KNL-NEXT: andl $1, %eax
3132 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
3133 ; KNL-NEXT: kmovw %ecx, %k0
3134 ; KNL-NEXT: kshiftlw $15, %k0, %k0
3135 ; KNL-NEXT: kshiftrw $14, %k0, %k0
3136 ; KNL-NEXT: kmovw %eax, %k6
3137 ; KNL-NEXT: korw %k0, %k6, %k0
3138 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
3139 ; KNL-NEXT: kandw %k6, %k0, %k0
3140 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3141 ; KNL-NEXT: kmovw %eax, %k6
3142 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3143 ; KNL-NEXT: kshiftrw $13, %k6, %k6
3144 ; KNL-NEXT: korw %k6, %k0, %k0
3145 ; KNL-NEXT: kandw %k7, %k0, %k0
3146 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3147 ; KNL-NEXT: kmovw %eax, %k6
3148 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3149 ; KNL-NEXT: kshiftrw $12, %k6, %k6
3150 ; KNL-NEXT: korw %k6, %k0, %k0
3151 ; KNL-NEXT: kandw %k1, %k0, %k0
3152 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3153 ; KNL-NEXT: kmovw %eax, %k6
3154 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3155 ; KNL-NEXT: kshiftrw $11, %k6, %k6
3156 ; KNL-NEXT: korw %k6, %k0, %k0
3157 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
3158 ; KNL-NEXT: kandw %k1, %k0, %k0
3159 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3160 ; KNL-NEXT: kmovw %eax, %k6
3161 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3162 ; KNL-NEXT: kshiftrw $10, %k6, %k6
3163 ; KNL-NEXT: korw %k6, %k0, %k0
3164 ; KNL-NEXT: kandw %k2, %k0, %k0
3165 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3166 ; KNL-NEXT: kmovw %eax, %k6
3167 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3168 ; KNL-NEXT: kshiftrw $9, %k6, %k6
3169 ; KNL-NEXT: korw %k6, %k0, %k0
3170 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
3171 ; KNL-NEXT: kandw %k1, %k0, %k0
3172 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3173 ; KNL-NEXT: kmovw %eax, %k6
3174 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3175 ; KNL-NEXT: kshiftrw $8, %k6, %k6
3176 ; KNL-NEXT: korw %k6, %k0, %k0
3177 ; KNL-NEXT: kandw %k3, %k0, %k0
3178 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3179 ; KNL-NEXT: kmovw %eax, %k6
3180 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3181 ; KNL-NEXT: kshiftrw $7, %k6, %k6
3182 ; KNL-NEXT: korw %k6, %k0, %k0
3183 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
3184 ; KNL-NEXT: kandw %k3, %k0, %k0
3185 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3186 ; KNL-NEXT: kmovw %eax, %k6
3187 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3188 ; KNL-NEXT: kshiftrw $6, %k6, %k6
3189 ; KNL-NEXT: korw %k6, %k0, %k0
3190 ; KNL-NEXT: kandw %k4, %k0, %k0
3191 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3192 ; KNL-NEXT: kmovw %eax, %k6
3193 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3194 ; KNL-NEXT: kshiftrw $5, %k6, %k6
3195 ; KNL-NEXT: korw %k6, %k0, %k0
3196 ; KNL-NEXT: kandw %k5, %k0, %k0
3197 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3198 ; KNL-NEXT: kmovw %eax, %k6
3199 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3200 ; KNL-NEXT: kshiftrw $4, %k6, %k6
3201 ; KNL-NEXT: korw %k6, %k0, %k0
3202 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
3203 ; KNL-NEXT: kandw %k1, %k0, %k0
3204 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3205 ; KNL-NEXT: kmovw %eax, %k6
3206 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3207 ; KNL-NEXT: kshiftrw $3, %k6, %k6
3208 ; KNL-NEXT: korw %k6, %k0, %k0
3209 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
3210 ; KNL-NEXT: kandw %k2, %k0, %k0
3211 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3212 ; KNL-NEXT: kmovw %eax, %k6
3213 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3214 ; KNL-NEXT: kshiftrw $2, %k6, %k6
3215 ; KNL-NEXT: korw %k6, %k0, %k0
3216 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3217 ; KNL-NEXT: kandw %k5, %k0, %k0
3218 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3219 ; KNL-NEXT: kmovw %eax, %k6
3220 ; KNL-NEXT: kshiftlw $14, %k6, %k6
3221 ; KNL-NEXT: korw %k6, %k0, %k0
3222 ; KNL-NEXT: kshiftlw $1, %k0, %k0
3223 ; KNL-NEXT: kshiftrw $1, %k0, %k0
3224 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3225 ; KNL-NEXT: kmovw %eax, %k6
3226 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3227 ; KNL-NEXT: korw %k6, %k0, %k0
3228 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3229 ; KNL-NEXT: andl $1, %eax
3230 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
3231 ; KNL-NEXT: kmovw %ecx, %k6
3232 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3233 ; KNL-NEXT: kshiftrw $14, %k6, %k6
3234 ; KNL-NEXT: kmovw %eax, %k7
3235 ; KNL-NEXT: korw %k6, %k7, %k6
3236 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3237 ; KNL-NEXT: kandw %k5, %k6, %k6
3238 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3239 ; KNL-NEXT: kmovw %eax, %k7
3240 ; KNL-NEXT: kshiftlw $15, %k7, %k7
3241 ; KNL-NEXT: kshiftrw $13, %k7, %k7
3242 ; KNL-NEXT: korw %k7, %k6, %k6
3243 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3244 ; KNL-NEXT: kandw %k5, %k6, %k6
3245 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3246 ; KNL-NEXT: kmovw %eax, %k7
3247 ; KNL-NEXT: kshiftlw $15, %k7, %k7
3248 ; KNL-NEXT: kshiftrw $12, %k7, %k7
3249 ; KNL-NEXT: korw %k7, %k6, %k6
3250 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3251 ; KNL-NEXT: kandw %k5, %k6, %k6
3252 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3253 ; KNL-NEXT: kmovw %eax, %k7
3254 ; KNL-NEXT: kshiftlw $15, %k7, %k7
3255 ; KNL-NEXT: kshiftrw $11, %k7, %k7
3256 ; KNL-NEXT: korw %k7, %k6, %k6
3257 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3258 ; KNL-NEXT: kandw %k5, %k6, %k6
3259 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3260 ; KNL-NEXT: kmovw %eax, %k7
3261 ; KNL-NEXT: kshiftlw $15, %k7, %k7
3262 ; KNL-NEXT: kshiftrw $10, %k7, %k7
3263 ; KNL-NEXT: korw %k7, %k6, %k6
3264 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3265 ; KNL-NEXT: kandw %k5, %k6, %k6
3266 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3267 ; KNL-NEXT: kmovw %eax, %k7
3268 ; KNL-NEXT: kshiftlw $15, %k7, %k7
3269 ; KNL-NEXT: kshiftrw $9, %k7, %k7
3270 ; KNL-NEXT: korw %k7, %k6, %k6
3271 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3272 ; KNL-NEXT: kandw %k5, %k6, %k6
3273 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3274 ; KNL-NEXT: kmovw %eax, %k7
3275 ; KNL-NEXT: kshiftlw $15, %k7, %k7
3276 ; KNL-NEXT: kshiftrw $8, %k7, %k7
3277 ; KNL-NEXT: korw %k7, %k6, %k6
3278 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3279 ; KNL-NEXT: kandw %k5, %k6, %k6
3280 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3281 ; KNL-NEXT: kmovw %eax, %k7
3282 ; KNL-NEXT: kshiftlw $15, %k7, %k7
3283 ; KNL-NEXT: kshiftrw $7, %k7, %k7
3284 ; KNL-NEXT: korw %k7, %k6, %k6
3285 ; KNL-NEXT: kandw %k3, %k6, %k6
3286 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3287 ; KNL-NEXT: kmovw %eax, %k7
3288 ; KNL-NEXT: kshiftlw $15, %k7, %k7
3289 ; KNL-NEXT: kshiftrw $6, %k7, %k7
3290 ; KNL-NEXT: korw %k7, %k6, %k6
3291 ; KNL-NEXT: kandw %k4, %k6, %k5
3292 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3293 ; KNL-NEXT: kmovw %eax, %k6
3294 ; KNL-NEXT: kshiftlw $15, %k6, %k6
3295 ; KNL-NEXT: kshiftrw $5, %k6, %k6
3296 ; KNL-NEXT: korw %k6, %k5, %k5
3297 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
3298 ; KNL-NEXT: kandw %k3, %k5, %k4
3299 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3300 ; KNL-NEXT: kmovw %eax, %k5
3301 ; KNL-NEXT: kshiftlw $15, %k5, %k5
3302 ; KNL-NEXT: kshiftrw $4, %k5, %k5
3303 ; KNL-NEXT: korw %k5, %k4, %k4
3304 ; KNL-NEXT: kandw %k1, %k4, %k3
3305 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3306 ; KNL-NEXT: kmovw %eax, %k4
3307 ; KNL-NEXT: kshiftlw $15, %k4, %k4
3308 ; KNL-NEXT: kshiftrw $3, %k4, %k4
3309 ; KNL-NEXT: korw %k4, %k3, %k3
3310 ; KNL-NEXT: kandw %k2, %k3, %k2
3311 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3312 ; KNL-NEXT: kmovw %eax, %k3
3313 ; KNL-NEXT: kshiftlw $15, %k3, %k3
3314 ; KNL-NEXT: kshiftrw $2, %k3, %k3
3315 ; KNL-NEXT: korw %k3, %k2, %k2
3316 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
3317 ; KNL-NEXT: kandw %k1, %k2, %k1
3318 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3319 ; KNL-NEXT: kmovw %eax, %k2
3320 ; KNL-NEXT: kshiftlw $14, %k2, %k2
3321 ; KNL-NEXT: korw %k2, %k1, %k1
3322 ; KNL-NEXT: kshiftlw $1, %k1, %k1
3323 ; KNL-NEXT: kshiftrw $1, %k1, %k1
3324 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3325 ; KNL-NEXT: kmovw %eax, %k2
3326 ; KNL-NEXT: kshiftlw $15, %k2, %k2
3327 ; KNL-NEXT: korw %k2, %k1, %k1
3328 ; KNL-NEXT: kmovw %k1, 6(%rdi)
3329 ; KNL-NEXT: kmovw %k0, 4(%rdi)
3330 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
3331 ; KNL-NEXT: kmovw %k0, 2(%rdi)
3332 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
3333 ; KNL-NEXT: kmovw %k0, (%rdi)
3336 ; SKX-LABEL: store_64i1:
3338 ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0
3339 ; SKX-NEXT: vpmovb2m %zmm0, %k0
3340 ; SKX-NEXT: kmovq %k0, (%rdi)
3341 ; SKX-NEXT: vzeroupper
3344 ; AVX512BW-LABEL: store_64i1:
3345 ; AVX512BW: ## %bb.0:
3346 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0
3347 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
3348 ; AVX512BW-NEXT: kmovq %k0, (%rdi)
3349 ; AVX512BW-NEXT: vzeroupper
3350 ; AVX512BW-NEXT: retq
3352 ; AVX512DQ-LABEL: store_64i1:
3353 ; AVX512DQ: ## %bb.0:
3354 ; AVX512DQ-NEXT: andl $1, %esi
3355 ; AVX512DQ-NEXT: kmovw %esi, %k0
3356 ; AVX512DQ-NEXT: kmovw %edx, %k1
3357 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
3358 ; AVX512DQ-NEXT: kshiftrw $14, %k1, %k1
3359 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3360 ; AVX512DQ-NEXT: movw $-5, %ax
3361 ; AVX512DQ-NEXT: kmovw %eax, %k1
3362 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3363 ; AVX512DQ-NEXT: kmovw %k1, %k7
3364 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3365 ; AVX512DQ-NEXT: kmovw %ecx, %k1
3366 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
3367 ; AVX512DQ-NEXT: kshiftrw $13, %k1, %k1
3368 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3369 ; AVX512DQ-NEXT: movw $-9, %ax
3370 ; AVX512DQ-NEXT: kmovw %eax, %k1
3371 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3372 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3373 ; AVX512DQ-NEXT: kmovw %r8d, %k1
3374 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
3375 ; AVX512DQ-NEXT: kshiftrw $12, %k1, %k1
3376 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3377 ; AVX512DQ-NEXT: movw $-17, %ax
3378 ; AVX512DQ-NEXT: kmovw %eax, %k1
3379 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3380 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3381 ; AVX512DQ-NEXT: kmovw %r9d, %k1
3382 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
3383 ; AVX512DQ-NEXT: kshiftrw $11, %k1, %k1
3384 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3385 ; AVX512DQ-NEXT: movw $-33, %ax
3386 ; AVX512DQ-NEXT: kmovw %eax, %k1
3387 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3388 ; AVX512DQ-NEXT: kmovw %k1, %k2
3389 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3390 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3391 ; AVX512DQ-NEXT: kmovw %eax, %k1
3392 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
3393 ; AVX512DQ-NEXT: kshiftrw $10, %k1, %k1
3394 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3395 ; AVX512DQ-NEXT: movw $-65, %ax
3396 ; AVX512DQ-NEXT: kmovw %eax, %k1
3397 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3398 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3399 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3400 ; AVX512DQ-NEXT: kmovw %eax, %k1
3401 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
3402 ; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
3403 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3404 ; AVX512DQ-NEXT: movw $-129, %ax
3405 ; AVX512DQ-NEXT: kmovw %eax, %k1
3406 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3407 ; AVX512DQ-NEXT: kmovw %k1, %k3
3408 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3409 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3410 ; AVX512DQ-NEXT: kmovw %eax, %k1
3411 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
3412 ; AVX512DQ-NEXT: kshiftrw $8, %k1, %k1
3413 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3414 ; AVX512DQ-NEXT: movw $-257, %ax ## imm = 0xFEFF
3415 ; AVX512DQ-NEXT: kmovw %eax, %k1
3416 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3417 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3418 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3419 ; AVX512DQ-NEXT: kmovw %eax, %k1
3420 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
3421 ; AVX512DQ-NEXT: kshiftrw $7, %k1, %k1
3422 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3423 ; AVX512DQ-NEXT: movw $-513, %ax ## imm = 0xFDFF
3424 ; AVX512DQ-NEXT: kmovw %eax, %k1
3425 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3426 ; AVX512DQ-NEXT: kmovw %k1, %k4
3427 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3428 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3429 ; AVX512DQ-NEXT: kmovw %eax, %k1
3430 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
3431 ; AVX512DQ-NEXT: kshiftrw $6, %k1, %k1
3432 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3433 ; AVX512DQ-NEXT: movw $-1025, %ax ## imm = 0xFBFF
3434 ; AVX512DQ-NEXT: kmovw %eax, %k1
3435 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3436 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3437 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3438 ; AVX512DQ-NEXT: kmovw %eax, %k1
3439 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
3440 ; AVX512DQ-NEXT: kshiftrw $5, %k1, %k1
3441 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3442 ; AVX512DQ-NEXT: movw $-2049, %ax ## imm = 0xF7FF
3443 ; AVX512DQ-NEXT: kmovw %eax, %k5
3444 ; AVX512DQ-NEXT: kandw %k5, %k0, %k0
3445 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3446 ; AVX512DQ-NEXT: kmovw %eax, %k1
3447 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
3448 ; AVX512DQ-NEXT: kshiftrw $4, %k1, %k1
3449 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3450 ; AVX512DQ-NEXT: movw $-4097, %ax ## imm = 0xEFFF
3451 ; AVX512DQ-NEXT: kmovw %eax, %k1
3452 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3453 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3454 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3455 ; AVX512DQ-NEXT: kmovw %eax, %k1
3456 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
3457 ; AVX512DQ-NEXT: kshiftrw $3, %k1, %k1
3458 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3459 ; AVX512DQ-NEXT: movw $-8193, %ax ## imm = 0xDFFF
3460 ; AVX512DQ-NEXT: kmovw %eax, %k1
3461 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3462 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3463 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3464 ; AVX512DQ-NEXT: kmovw %eax, %k1
3465 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
3466 ; AVX512DQ-NEXT: kshiftrw $2, %k1, %k1
3467 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
3468 ; AVX512DQ-NEXT: movw $-16385, %ax ## imm = 0xBFFF
3469 ; AVX512DQ-NEXT: kmovw %eax, %k1
3470 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3471 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3472 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3473 ; AVX512DQ-NEXT: kmovw %eax, %k6
3474 ; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6
3475 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3476 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
3477 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
3478 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3479 ; AVX512DQ-NEXT: kmovw %eax, %k6
3480 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3481 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3482 ; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3483 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3484 ; AVX512DQ-NEXT: andl $1, %eax
3485 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
3486 ; AVX512DQ-NEXT: kmovw %ecx, %k0
3487 ; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0
3488 ; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0
3489 ; AVX512DQ-NEXT: kmovw %eax, %k6
3490 ; AVX512DQ-NEXT: korw %k0, %k6, %k0
3491 ; AVX512DQ-NEXT: kandw %k7, %k0, %k0
3492 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3493 ; AVX512DQ-NEXT: kmovw %eax, %k6
3494 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3495 ; AVX512DQ-NEXT: kshiftrw $13, %k6, %k6
3496 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3497 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
3498 ; AVX512DQ-NEXT: kandw %k7, %k0, %k0
3499 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3500 ; AVX512DQ-NEXT: kmovw %eax, %k6
3501 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3502 ; AVX512DQ-NEXT: kshiftrw $12, %k6, %k6
3503 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3504 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
3505 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3506 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3507 ; AVX512DQ-NEXT: kmovw %eax, %k6
3508 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3509 ; AVX512DQ-NEXT: kshiftrw $11, %k6, %k6
3510 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3511 ; AVX512DQ-NEXT: kandw %k2, %k0, %k0
3512 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3513 ; AVX512DQ-NEXT: kmovw %eax, %k6
3514 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3515 ; AVX512DQ-NEXT: kshiftrw $10, %k6, %k6
3516 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3517 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
3518 ; AVX512DQ-NEXT: kandw %k2, %k0, %k0
3519 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3520 ; AVX512DQ-NEXT: kmovw %eax, %k6
3521 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3522 ; AVX512DQ-NEXT: kshiftrw $9, %k6, %k6
3523 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3524 ; AVX512DQ-NEXT: kandw %k3, %k0, %k0
3525 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3526 ; AVX512DQ-NEXT: kmovw %eax, %k6
3527 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3528 ; AVX512DQ-NEXT: kshiftrw $8, %k6, %k6
3529 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3530 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
3531 ; AVX512DQ-NEXT: kandw %k3, %k0, %k0
3532 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3533 ; AVX512DQ-NEXT: kmovw %eax, %k6
3534 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3535 ; AVX512DQ-NEXT: kshiftrw $7, %k6, %k6
3536 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3537 ; AVX512DQ-NEXT: kandw %k4, %k0, %k0
3538 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3539 ; AVX512DQ-NEXT: kmovw %eax, %k6
3540 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3541 ; AVX512DQ-NEXT: kshiftrw $6, %k6, %k6
3542 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3543 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
3544 ; AVX512DQ-NEXT: kandw %k4, %k0, %k0
3545 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3546 ; AVX512DQ-NEXT: kmovw %eax, %k6
3547 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3548 ; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6
3549 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3550 ; AVX512DQ-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3551 ; AVX512DQ-NEXT: kandw %k5, %k0, %k0
3552 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3553 ; AVX512DQ-NEXT: kmovw %eax, %k6
3554 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3555 ; AVX512DQ-NEXT: kshiftrw $4, %k6, %k6
3556 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3557 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
3558 ; AVX512DQ-NEXT: kandw %k6, %k0, %k0
3559 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3560 ; AVX512DQ-NEXT: kmovw %eax, %k6
3561 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3562 ; AVX512DQ-NEXT: kshiftrw $3, %k6, %k6
3563 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3564 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
3565 ; AVX512DQ-NEXT: kandw %k6, %k0, %k0
3566 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3567 ; AVX512DQ-NEXT: kmovw %eax, %k6
3568 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3569 ; AVX512DQ-NEXT: kshiftrw $2, %k6, %k6
3570 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3571 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
3572 ; AVX512DQ-NEXT: kandw %k6, %k0, %k0
3573 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3574 ; AVX512DQ-NEXT: kmovw %eax, %k6
3575 ; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6
3576 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3577 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
3578 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
3579 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3580 ; AVX512DQ-NEXT: kmovw %eax, %k6
3581 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3582 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3583 ; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
3584 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3585 ; AVX512DQ-NEXT: andl $1, %eax
3586 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
3587 ; AVX512DQ-NEXT: kmovw %ecx, %k0
3588 ; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0
3589 ; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0
3590 ; AVX512DQ-NEXT: kmovw %eax, %k6
3591 ; AVX512DQ-NEXT: korw %k0, %k6, %k0
3592 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
3593 ; AVX512DQ-NEXT: kandw %k6, %k0, %k0
3594 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3595 ; AVX512DQ-NEXT: kmovw %eax, %k6
3596 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3597 ; AVX512DQ-NEXT: kshiftrw $13, %k6, %k6
3598 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3599 ; AVX512DQ-NEXT: kandw %k7, %k0, %k0
3600 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3601 ; AVX512DQ-NEXT: kmovw %eax, %k6
3602 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3603 ; AVX512DQ-NEXT: kshiftrw $12, %k6, %k6
3604 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3605 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3606 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3607 ; AVX512DQ-NEXT: kmovw %eax, %k6
3608 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3609 ; AVX512DQ-NEXT: kshiftrw $11, %k6, %k6
3610 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3611 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
3612 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3613 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3614 ; AVX512DQ-NEXT: kmovw %eax, %k6
3615 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3616 ; AVX512DQ-NEXT: kshiftrw $10, %k6, %k6
3617 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3618 ; AVX512DQ-NEXT: kandw %k2, %k0, %k0
3619 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3620 ; AVX512DQ-NEXT: kmovw %eax, %k6
3621 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3622 ; AVX512DQ-NEXT: kshiftrw $9, %k6, %k6
3623 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3624 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
3625 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3626 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3627 ; AVX512DQ-NEXT: kmovw %eax, %k6
3628 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3629 ; AVX512DQ-NEXT: kshiftrw $8, %k6, %k6
3630 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3631 ; AVX512DQ-NEXT: kandw %k3, %k0, %k0
3632 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3633 ; AVX512DQ-NEXT: kmovw %eax, %k6
3634 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3635 ; AVX512DQ-NEXT: kshiftrw $7, %k6, %k6
3636 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3637 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
3638 ; AVX512DQ-NEXT: kandw %k3, %k0, %k0
3639 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3640 ; AVX512DQ-NEXT: kmovw %eax, %k6
3641 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3642 ; AVX512DQ-NEXT: kshiftrw $6, %k6, %k6
3643 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3644 ; AVX512DQ-NEXT: kandw %k4, %k0, %k0
3645 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3646 ; AVX512DQ-NEXT: kmovw %eax, %k6
3647 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3648 ; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6
3649 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3650 ; AVX512DQ-NEXT: kandw %k5, %k0, %k0
3651 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3652 ; AVX512DQ-NEXT: kmovw %eax, %k6
3653 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3654 ; AVX512DQ-NEXT: kshiftrw $4, %k6, %k6
3655 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3656 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
3657 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3658 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3659 ; AVX512DQ-NEXT: kmovw %eax, %k6
3660 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3661 ; AVX512DQ-NEXT: kshiftrw $3, %k6, %k6
3662 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3663 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
3664 ; AVX512DQ-NEXT: kandw %k2, %k0, %k0
3665 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3666 ; AVX512DQ-NEXT: kmovw %eax, %k6
3667 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3668 ; AVX512DQ-NEXT: kshiftrw $2, %k6, %k6
3669 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3670 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3671 ; AVX512DQ-NEXT: kandw %k5, %k0, %k0
3672 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3673 ; AVX512DQ-NEXT: kmovw %eax, %k6
3674 ; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6
3675 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3676 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
3677 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
3678 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3679 ; AVX512DQ-NEXT: kmovw %eax, %k6
3680 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3681 ; AVX512DQ-NEXT: korw %k6, %k0, %k0
3682 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3683 ; AVX512DQ-NEXT: andl $1, %eax
3684 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
3685 ; AVX512DQ-NEXT: kmovw %ecx, %k6
3686 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3687 ; AVX512DQ-NEXT: kshiftrw $14, %k6, %k6
3688 ; AVX512DQ-NEXT: kmovw %eax, %k7
3689 ; AVX512DQ-NEXT: korw %k6, %k7, %k6
3690 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3691 ; AVX512DQ-NEXT: kandw %k5, %k6, %k6
3692 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3693 ; AVX512DQ-NEXT: kmovw %eax, %k7
3694 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
3695 ; AVX512DQ-NEXT: kshiftrw $13, %k7, %k7
3696 ; AVX512DQ-NEXT: korw %k7, %k6, %k6
3697 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3698 ; AVX512DQ-NEXT: kandw %k5, %k6, %k6
3699 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3700 ; AVX512DQ-NEXT: kmovw %eax, %k7
3701 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
3702 ; AVX512DQ-NEXT: kshiftrw $12, %k7, %k7
3703 ; AVX512DQ-NEXT: korw %k7, %k6, %k6
3704 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3705 ; AVX512DQ-NEXT: kandw %k5, %k6, %k6
3706 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3707 ; AVX512DQ-NEXT: kmovw %eax, %k7
3708 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
3709 ; AVX512DQ-NEXT: kshiftrw $11, %k7, %k7
3710 ; AVX512DQ-NEXT: korw %k7, %k6, %k6
3711 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3712 ; AVX512DQ-NEXT: kandw %k5, %k6, %k6
3713 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3714 ; AVX512DQ-NEXT: kmovw %eax, %k7
3715 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
3716 ; AVX512DQ-NEXT: kshiftrw $10, %k7, %k7
3717 ; AVX512DQ-NEXT: korw %k7, %k6, %k6
3718 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3719 ; AVX512DQ-NEXT: kandw %k5, %k6, %k6
3720 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3721 ; AVX512DQ-NEXT: kmovw %eax, %k7
3722 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
3723 ; AVX512DQ-NEXT: kshiftrw $9, %k7, %k7
3724 ; AVX512DQ-NEXT: korw %k7, %k6, %k6
3725 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3726 ; AVX512DQ-NEXT: kandw %k5, %k6, %k6
3727 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3728 ; AVX512DQ-NEXT: kmovw %eax, %k7
3729 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
3730 ; AVX512DQ-NEXT: kshiftrw $8, %k7, %k7
3731 ; AVX512DQ-NEXT: korw %k7, %k6, %k6
3732 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
3733 ; AVX512DQ-NEXT: kandw %k5, %k6, %k6
3734 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3735 ; AVX512DQ-NEXT: kmovw %eax, %k7
3736 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
3737 ; AVX512DQ-NEXT: kshiftrw $7, %k7, %k7
3738 ; AVX512DQ-NEXT: korw %k7, %k6, %k6
3739 ; AVX512DQ-NEXT: kandw %k3, %k6, %k6
3740 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3741 ; AVX512DQ-NEXT: kmovw %eax, %k7
3742 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
3743 ; AVX512DQ-NEXT: kshiftrw $6, %k7, %k7
3744 ; AVX512DQ-NEXT: korw %k7, %k6, %k6
3745 ; AVX512DQ-NEXT: kandw %k4, %k6, %k5
3746 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3747 ; AVX512DQ-NEXT: kmovw %eax, %k6
3748 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
3749 ; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6
3750 ; AVX512DQ-NEXT: korw %k6, %k5, %k5
3751 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
3752 ; AVX512DQ-NEXT: kandw %k3, %k5, %k4
3753 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3754 ; AVX512DQ-NEXT: kmovw %eax, %k5
3755 ; AVX512DQ-NEXT: kshiftlw $15, %k5, %k5
3756 ; AVX512DQ-NEXT: kshiftrw $4, %k5, %k5
3757 ; AVX512DQ-NEXT: korw %k5, %k4, %k4
3758 ; AVX512DQ-NEXT: kandw %k1, %k4, %k3
3759 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3760 ; AVX512DQ-NEXT: kmovw %eax, %k4
3761 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
3762 ; AVX512DQ-NEXT: kshiftrw $3, %k4, %k4
3763 ; AVX512DQ-NEXT: korw %k4, %k3, %k3
3764 ; AVX512DQ-NEXT: kandw %k2, %k3, %k2
3765 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3766 ; AVX512DQ-NEXT: kmovw %eax, %k3
3767 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
3768 ; AVX512DQ-NEXT: kshiftrw $2, %k3, %k3
3769 ; AVX512DQ-NEXT: korw %k3, %k2, %k2
3770 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
3771 ; AVX512DQ-NEXT: kandw %k1, %k2, %k1
3772 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3773 ; AVX512DQ-NEXT: kmovw %eax, %k2
3774 ; AVX512DQ-NEXT: kshiftlw $14, %k2, %k2
3775 ; AVX512DQ-NEXT: korw %k2, %k1, %k1
3776 ; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
3777 ; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
3778 ; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
3779 ; AVX512DQ-NEXT: kmovw %eax, %k2
3780 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
3781 ; AVX512DQ-NEXT: korw %k2, %k1, %k1
3782 ; AVX512DQ-NEXT: kmovw %k1, 6(%rdi)
3783 ; AVX512DQ-NEXT: kmovw %k0, 4(%rdi)
3784 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
3785 ; AVX512DQ-NEXT: kmovw %k0, 2(%rdi)
3786 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
3787 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
3788 ; AVX512DQ-NEXT: retq
3790 ; X86-LABEL: store_64i1:
3792 ; X86-NEXT: vpsllw $7, %zmm0, %zmm0
3793 ; X86-NEXT: vpmovb2m %zmm0, %k0
3794 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
3795 ; X86-NEXT: kmovq %k0, (%eax)
3796 ; X86-NEXT: vzeroupper
3798 store <64 x i1> %v, ptr %a
3802 define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
3803 ; KNL-LABEL: test_bitcast_v8i1_zext:
3805 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
3806 ; KNL-NEXT: kmovw %k0, %eax
3807 ; KNL-NEXT: movzbl %al, %eax
3808 ; KNL-NEXT: addl %eax, %eax
3809 ; KNL-NEXT: vzeroupper
3812 ; SKX-LABEL: test_bitcast_v8i1_zext:
3814 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
3815 ; SKX-NEXT: kmovb %k0, %eax
3816 ; SKX-NEXT: addl %eax, %eax
3817 ; SKX-NEXT: vzeroupper
3820 ; AVX512BW-LABEL: test_bitcast_v8i1_zext:
3821 ; AVX512BW: ## %bb.0:
3822 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
3823 ; AVX512BW-NEXT: kmovd %k0, %eax
3824 ; AVX512BW-NEXT: movzbl %al, %eax
3825 ; AVX512BW-NEXT: addl %eax, %eax
3826 ; AVX512BW-NEXT: vzeroupper
3827 ; AVX512BW-NEXT: retq
3829 ; AVX512DQ-LABEL: test_bitcast_v8i1_zext:
3830 ; AVX512DQ: ## %bb.0:
3831 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
3832 ; AVX512DQ-NEXT: kmovb %k0, %eax
3833 ; AVX512DQ-NEXT: addl %eax, %eax
3834 ; AVX512DQ-NEXT: vzeroupper
3835 ; AVX512DQ-NEXT: retq
3837 ; X86-LABEL: test_bitcast_v8i1_zext:
3839 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
3840 ; X86-NEXT: kmovb %k0, %eax
3841 ; X86-NEXT: addl %eax, %eax
3842 ; X86-NEXT: vzeroupper
3844 %v1 = icmp eq <16 x i32> %a, zeroinitializer
3845 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3846 %mask1 = bitcast <8 x i1> %mask to i8
3847 %val = zext i8 %mask1 to i32
3848 %val1 = add i32 %val, %val
3852 define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
3853 ; CHECK-LABEL: test_bitcast_v16i1_zext:
3855 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
3856 ; CHECK-NEXT: kmovw %k0, %eax
3857 ; CHECK-NEXT: addl %eax, %eax
3858 ; CHECK-NEXT: vzeroupper
3861 ; X86-LABEL: test_bitcast_v16i1_zext:
3863 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
3864 ; X86-NEXT: kmovw %k0, %eax
3865 ; X86-NEXT: addl %eax, %eax
3866 ; X86-NEXT: vzeroupper
3868 %v1 = icmp eq <16 x i32> %a, zeroinitializer
3869 %mask1 = bitcast <16 x i1> %v1 to i16
3870 %val = zext i16 %mask1 to i32
3871 %val1 = add i32 %val, %val
3875 define i16 @test_v16i1_add(i16 %x, i16 %y) {
3876 ; CHECK-LABEL: test_v16i1_add:
3878 ; CHECK-NEXT: movl %edi, %eax
3879 ; CHECK-NEXT: xorl %esi, %eax
3880 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
3883 ; X86-LABEL: test_v16i1_add:
3885 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
3886 ; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax
3888 %m0 = bitcast i16 %x to <16 x i1>
3889 %m1 = bitcast i16 %y to <16 x i1>
3890 %m2 = add <16 x i1> %m0, %m1
3891 %ret = bitcast <16 x i1> %m2 to i16
3895 define i16 @test_v16i1_sub(i16 %x, i16 %y) {
3896 ; CHECK-LABEL: test_v16i1_sub:
3898 ; CHECK-NEXT: movl %edi, %eax
3899 ; CHECK-NEXT: xorl %esi, %eax
3900 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
3903 ; X86-LABEL: test_v16i1_sub:
3905 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
3906 ; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax
3908 %m0 = bitcast i16 %x to <16 x i1>
3909 %m1 = bitcast i16 %y to <16 x i1>
3910 %m2 = sub <16 x i1> %m0, %m1
3911 %ret = bitcast <16 x i1> %m2 to i16
3915 define i16 @test_v16i1_mul(i16 %x, i16 %y) {
3916 ; CHECK-LABEL: test_v16i1_mul:
3918 ; CHECK-NEXT: movl %edi, %eax
3919 ; CHECK-NEXT: andl %esi, %eax
3920 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
3923 ; X86-LABEL: test_v16i1_mul:
3925 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
3926 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax
3928 %m0 = bitcast i16 %x to <16 x i1>
3929 %m1 = bitcast i16 %y to <16 x i1>
3930 %m2 = mul <16 x i1> %m0, %m1
3931 %ret = bitcast <16 x i1> %m2 to i16
3935 define i8 @test_v8i1_add(i8 %x, i8 %y) {
3936 ; CHECK-LABEL: test_v8i1_add:
3938 ; CHECK-NEXT: movl %edi, %eax
3939 ; CHECK-NEXT: xorl %esi, %eax
3940 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
3943 ; X86-LABEL: test_v8i1_add:
3945 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
3946 ; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
3948 %m0 = bitcast i8 %x to <8 x i1>
3949 %m1 = bitcast i8 %y to <8 x i1>
3950 %m2 = add <8 x i1> %m0, %m1
3951 %ret = bitcast <8 x i1> %m2 to i8
3955 define i8 @test_v8i1_sub(i8 %x, i8 %y) {
3956 ; CHECK-LABEL: test_v8i1_sub:
3958 ; CHECK-NEXT: movl %edi, %eax
3959 ; CHECK-NEXT: xorl %esi, %eax
3960 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
3963 ; X86-LABEL: test_v8i1_sub:
3965 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
3966 ; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
3968 %m0 = bitcast i8 %x to <8 x i1>
3969 %m1 = bitcast i8 %y to <8 x i1>
3970 %m2 = sub <8 x i1> %m0, %m1
3971 %ret = bitcast <8 x i1> %m2 to i8
3975 define i8 @test_v8i1_mul(i8 %x, i8 %y) {
3976 ; CHECK-LABEL: test_v8i1_mul:
3978 ; CHECK-NEXT: movl %edi, %eax
3979 ; CHECK-NEXT: andl %esi, %eax
3980 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
3983 ; X86-LABEL: test_v8i1_mul:
3985 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
3986 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al
3988 %m0 = bitcast i8 %x to <8 x i1>
3989 %m1 = bitcast i8 %y to <8 x i1>
3990 %m2 = mul <8 x i1> %m0, %m1
3991 %ret = bitcast <8 x i1> %m2 to i8
3995 ; Make sure we don't emit a ktest for signed comparisons.
3996 define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
3997 ; KNL-LABEL: ktest_signed:
3999 ; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0
4000 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
4001 ; KNL-NEXT: kmovw %k0, %eax
4002 ; KNL-NEXT: testw %ax, %ax
4003 ; KNL-NEXT: jle LBB66_1
4004 ; KNL-NEXT: ## %bb.2: ## %bb.2
4005 ; KNL-NEXT: vzeroupper
4007 ; KNL-NEXT: LBB66_1: ## %bb.1
4008 ; KNL-NEXT: pushq %rax
4009 ; KNL-NEXT: .cfi_def_cfa_offset 16
4010 ; KNL-NEXT: vzeroupper
4011 ; KNL-NEXT: callq _foo
4012 ; KNL-NEXT: addq $8, %rsp
4015 ; SKX-LABEL: ktest_signed:
4017 ; SKX-NEXT: vpord %zmm1, %zmm0, %zmm0
4018 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
4019 ; SKX-NEXT: kmovd %k0, %eax
4020 ; SKX-NEXT: testw %ax, %ax
4021 ; SKX-NEXT: jle LBB66_1
4022 ; SKX-NEXT: ## %bb.2: ## %bb.2
4023 ; SKX-NEXT: vzeroupper
4025 ; SKX-NEXT: LBB66_1: ## %bb.1
4026 ; SKX-NEXT: pushq %rax
4027 ; SKX-NEXT: .cfi_def_cfa_offset 16
4028 ; SKX-NEXT: vzeroupper
4029 ; SKX-NEXT: callq _foo
4030 ; SKX-NEXT: addq $8, %rsp
4033 ; AVX512BW-LABEL: ktest_signed:
4034 ; AVX512BW: ## %bb.0:
4035 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
4036 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
4037 ; AVX512BW-NEXT: kmovd %k0, %eax
4038 ; AVX512BW-NEXT: testw %ax, %ax
4039 ; AVX512BW-NEXT: jle LBB66_1
4040 ; AVX512BW-NEXT: ## %bb.2: ## %bb.2
4041 ; AVX512BW-NEXT: vzeroupper
4042 ; AVX512BW-NEXT: retq
4043 ; AVX512BW-NEXT: LBB66_1: ## %bb.1
4044 ; AVX512BW-NEXT: pushq %rax
4045 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4046 ; AVX512BW-NEXT: vzeroupper
4047 ; AVX512BW-NEXT: callq _foo
4048 ; AVX512BW-NEXT: addq $8, %rsp
4049 ; AVX512BW-NEXT: retq
4051 ; AVX512DQ-LABEL: ktest_signed:
4052 ; AVX512DQ: ## %bb.0:
4053 ; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
4054 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
4055 ; AVX512DQ-NEXT: kmovw %k0, %eax
4056 ; AVX512DQ-NEXT: testw %ax, %ax
4057 ; AVX512DQ-NEXT: jle LBB66_1
4058 ; AVX512DQ-NEXT: ## %bb.2: ## %bb.2
4059 ; AVX512DQ-NEXT: vzeroupper
4060 ; AVX512DQ-NEXT: retq
4061 ; AVX512DQ-NEXT: LBB66_1: ## %bb.1
4062 ; AVX512DQ-NEXT: pushq %rax
4063 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4064 ; AVX512DQ-NEXT: vzeroupper
4065 ; AVX512DQ-NEXT: callq _foo
4066 ; AVX512DQ-NEXT: addq $8, %rsp
4067 ; AVX512DQ-NEXT: retq
4069 ; X86-LABEL: ktest_signed:
4071 ; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
4072 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
4073 ; X86-NEXT: kmovd %k0, %eax
4074 ; X86-NEXT: testw %ax, %ax
4075 ; X86-NEXT: jle LBB66_1
4076 ; X86-NEXT: ## %bb.2: ## %bb.2
4077 ; X86-NEXT: vzeroupper
4079 ; X86-NEXT: LBB66_1: ## %bb.1
4080 ; X86-NEXT: subl $12, %esp
4081 ; X86-NEXT: .cfi_def_cfa_offset 16
4082 ; X86-NEXT: vzeroupper
4083 ; X86-NEXT: calll _foo
4084 ; X86-NEXT: addl $12, %esp
4086 %a = icmp eq <16 x i32> %x, zeroinitializer
4087 %b = icmp eq <16 x i32> %y, zeroinitializer
4088 %c = and <16 x i1> %a, %b
4089 %d = bitcast <16 x i1> %c to i16
4090 %e = icmp sgt i16 %d, 0
4091 br i1 %e, label %bb.2, label %bb.1
4100 ; Make sure we can use the ZF/CF flags from kortest to check for all ones.
4101 define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
4102 ; CHECK-LABEL: ktest_allones:
4104 ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
4105 ; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0
4106 ; CHECK-NEXT: kortestw %k0, %k0
4107 ; CHECK-NEXT: je LBB67_2
4108 ; CHECK-NEXT: ## %bb.1: ## %bb.1
4109 ; CHECK-NEXT: pushq %rax
4110 ; CHECK-NEXT: .cfi_def_cfa_offset 16
4111 ; CHECK-NEXT: vzeroupper
4112 ; CHECK-NEXT: callq _foo
4113 ; CHECK-NEXT: addq $8, %rsp
4114 ; CHECK-NEXT: LBB67_2: ## %bb.2
4115 ; CHECK-NEXT: vzeroupper
4118 ; X86-LABEL: ktest_allones:
4120 ; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
4121 ; X86-NEXT: vptestmd %zmm0, %zmm0, %k0
4122 ; X86-NEXT: kortestw %k0, %k0
4123 ; X86-NEXT: je LBB67_2
4124 ; X86-NEXT: ## %bb.1: ## %bb.1
4125 ; X86-NEXT: subl $12, %esp
4126 ; X86-NEXT: .cfi_def_cfa_offset 16
4127 ; X86-NEXT: vzeroupper
4128 ; X86-NEXT: calll _foo
4129 ; X86-NEXT: addl $12, %esp
4130 ; X86-NEXT: LBB67_2: ## %bb.2
4131 ; X86-NEXT: vzeroupper
4133 %a = icmp eq <16 x i32> %x, zeroinitializer
4134 %b = icmp eq <16 x i32> %y, zeroinitializer
4135 %c = and <16 x i1> %a, %b
4136 %d = bitcast <16 x i1> %c to i16
4137 %e = icmp eq i16 %d, -1
4138 br i1 %e, label %bb.2, label %bb.1
4146 ; This is derived from an intrinsic test where v4i1 mask was created by _mm_cmp_epi32_mask, then it was passed to _mm512_mask_blend_epi32 which uses a v16i1 mask.
4147 ; The widening happens in the scalar domain between the intrinsics. The middle end optmized it to this.
4148 define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d, <8 x i64> %e, <8 x i64> %f) {
4149 ; KNL-LABEL: mask_widening:
4150 ; KNL: ## %bb.0: ## %entry
4151 ; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
4152 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
4153 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
4154 ; KNL-NEXT: kshiftlw $12, %k0, %k0
4155 ; KNL-NEXT: kshiftrw $12, %k0, %k1
4156 ; KNL-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
4159 ; SKX-LABEL: mask_widening:
4160 ; SKX: ## %bb.0: ## %entry
4161 ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
4162 ; SKX-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
4165 ; AVX512BW-LABEL: mask_widening:
4166 ; AVX512BW: ## %bb.0: ## %entry
4167 ; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
4168 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
4169 ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
4170 ; AVX512BW-NEXT: kshiftlw $12, %k0, %k0
4171 ; AVX512BW-NEXT: kshiftrw $12, %k0, %k1
4172 ; AVX512BW-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
4173 ; AVX512BW-NEXT: retq
4175 ; AVX512DQ-LABEL: mask_widening:
4176 ; AVX512DQ: ## %bb.0: ## %entry
4177 ; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
4178 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
4179 ; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
4180 ; AVX512DQ-NEXT: kshiftlw $12, %k0, %k0
4181 ; AVX512DQ-NEXT: kshiftrw $12, %k0, %k1
4182 ; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
4183 ; AVX512DQ-NEXT: retq
4185 ; X86-LABEL: mask_widening:
4186 ; X86: ## %bb.0: ## %entry
4187 ; X86-NEXT: pushl %ebp
4188 ; X86-NEXT: .cfi_def_cfa_offset 8
4189 ; X86-NEXT: .cfi_offset %ebp, -8
4190 ; X86-NEXT: movl %esp, %ebp
4191 ; X86-NEXT: .cfi_def_cfa_register %ebp
4192 ; X86-NEXT: andl $-64, %esp
4193 ; X86-NEXT: subl $64, %esp
4194 ; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
4195 ; X86-NEXT: vmovdqa64 8(%ebp), %zmm0
4196 ; X86-NEXT: vmovdqa32 72(%ebp), %zmm0 {%k1}
4197 ; X86-NEXT: movl %ebp, %esp
4198 ; X86-NEXT: popl %ebp
4201 %0 = bitcast <2 x i64> %a to <4 x i32>
4202 %1 = bitcast <2 x i64> %b to <4 x i32>
4203 %2 = icmp eq <4 x i32> %0, %1
4204 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4205 %4 = bitcast <8 x i64> %f to <16 x i32>
4206 %5 = bitcast <8 x i64> %e to <16 x i32>
4207 %6 = shufflevector <8 x i1> %3, <8 x i1> <i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
4208 %7 = select <16 x i1> %6, <16 x i32> %4, <16 x i32> %5
4209 %8 = bitcast <16 x i32> %7 to <8 x i64>
4213 define void @store_v128i1_constant(ptr %R) {
4214 ; KNL-LABEL: store_v128i1_constant:
4215 ; KNL: ## %bb.0: ## %entry
4216 ; KNL-NEXT: vmovaps {{.*#+}} xmm0 = [61437,65535,65403,57343,57341,65535,65467,49151]
4217 ; KNL-NEXT: vmovaps %xmm0, (%rdi)
4220 ; SKX-LABEL: store_v128i1_constant:
4221 ; SKX: ## %bb.0: ## %entry
4222 ; SKX-NEXT: movabsq $-4611686310485172227, %rax ## imm = 0xBFFFFFBBFFFFDFFD
4223 ; SKX-NEXT: movq %rax, 8(%rdi)
4224 ; SKX-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
4225 ; SKX-NEXT: movq %rax, (%rdi)
4228 ; AVX512BW-LABEL: store_v128i1_constant:
4229 ; AVX512BW: ## %bb.0: ## %entry
4230 ; AVX512BW-NEXT: movabsq $-4611686310485172227, %rax ## imm = 0xBFFFFFBBFFFFDFFD
4231 ; AVX512BW-NEXT: movq %rax, 8(%rdi)
4232 ; AVX512BW-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
4233 ; AVX512BW-NEXT: movq %rax, (%rdi)
4234 ; AVX512BW-NEXT: retq
4236 ; AVX512DQ-LABEL: store_v128i1_constant:
4237 ; AVX512DQ: ## %bb.0: ## %entry
4238 ; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [61437,65535,65403,57343,57341,65535,65467,49151]
4239 ; AVX512DQ-NEXT: vmovaps %xmm0, (%rdi)
4240 ; AVX512DQ-NEXT: retq
4242 ; X86-LABEL: store_v128i1_constant:
4243 ; X86: ## %bb.0: ## %entry
4244 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
4245 ; X86-NEXT: vmovaps {{.*#+}} xmm0 = [4294963197,3758096251,4294959101,3221225403]
4246 ; X86-NEXT: vmovaps %xmm0, (%eax)
4249 store <128 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1>, ptr %R
4253 define void @store_v64i1_constant(ptr %R) {
4254 ; CHECK-LABEL: store_v64i1_constant:
4255 ; CHECK: ## %bb.0: ## %entry
4256 ; CHECK-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
4257 ; CHECK-NEXT: movq %rax, (%rdi)
4260 ; X86-LABEL: store_v64i1_constant:
4261 ; X86: ## %bb.0: ## %entry
4262 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
4263 ; X86-NEXT: movl $-536871045, 4(%eax) ## imm = 0xDFFFFF7B
4264 ; X86-NEXT: movl $-4099, (%eax) ## imm = 0xEFFD
4267 store <64 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, ptr %R
4271 define void @store_v2i1_constant(ptr %R) {
4272 ; CHECK-LABEL: store_v2i1_constant:
4273 ; CHECK: ## %bb.0: ## %entry
4274 ; CHECK-NEXT: movb $1, (%rdi)
4277 ; X86-LABEL: store_v2i1_constant:
4278 ; X86: ## %bb.0: ## %entry
4279 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
4280 ; X86-NEXT: movb $1, (%eax)
4283 store <2 x i1> <i1 1, i1 0>, ptr %R
4287 define void @store_v4i1_constant(ptr %R) {
4288 ; CHECK-LABEL: store_v4i1_constant:
4289 ; CHECK: ## %bb.0: ## %entry
4290 ; CHECK-NEXT: movb $5, (%rdi)
4293 ; X86-LABEL: store_v4i1_constant:
4294 ; X86: ## %bb.0: ## %entry
4295 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
4296 ; X86-NEXT: movb $5, (%eax)
4299 store <4 x i1> <i1 1, i1 0, i1 1, i1 0>, ptr %R
4303 ; Make sure we bring the -1 constant into the mask domain.
4304 define void @mask_not_cast(ptr, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
4305 ; CHECK-LABEL: mask_not_cast:
4307 ; CHECK-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
4308 ; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
4309 ; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
4310 ; CHECK-NEXT: vzeroupper
4313 ; X86-LABEL: mask_not_cast:
4315 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
4316 ; X86-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
4317 ; X86-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
4318 ; X86-NEXT: vmovdqu32 %zmm0, (%eax) {%k1}
4319 ; X86-NEXT: vzeroupper
4321 %6 = and <8 x i64> %2, %1
4322 %7 = bitcast <8 x i64> %6 to <16 x i32>
4323 %8 = icmp ne <16 x i32> %7, zeroinitializer
4324 %9 = bitcast <16 x i1> %8 to i16
4325 %10 = bitcast <8 x i64> %3 to <16 x i32>
4326 %11 = bitcast <8 x i64> %4 to <16 x i32>
4327 %12 = icmp ule <16 x i32> %10, %11
4328 %13 = bitcast <16 x i1> %12 to i16
4329 %14 = xor i16 %13, -1
4330 %15 = and i16 %14, %9
4331 %16 = bitcast <8 x i64> %1 to <16 x i32>
4332 %17 = bitcast i16 %15 to <16 x i1>
4333 tail call void @llvm.masked.store.v16i32.p0(<16 x i32> %16, ptr %0, i32 1, <16 x i1> %17) #2
4336 declare void @llvm.masked.store.v16i32.p0(<16 x i32>, ptr, i32, <16 x i1>)
4338 define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
4339 ; KNL-LABEL: ktest_3:
4341 ; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
4342 ; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
4343 ; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
4344 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
4345 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
4346 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1
4347 ; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k2
4348 ; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k3
4349 ; KNL-NEXT: korw %k1, %k0, %k0
4350 ; KNL-NEXT: korw %k3, %k2, %k1
4351 ; KNL-NEXT: kandw %k1, %k0, %k0
4352 ; KNL-NEXT: kmovw %k0, %eax
4353 ; KNL-NEXT: testb %al, %al
4354 ; KNL-NEXT: je LBB74_1
4355 ; KNL-NEXT: ## %bb.2: ## %exit
4356 ; KNL-NEXT: vzeroupper
4358 ; KNL-NEXT: LBB74_1: ## %bar
4359 ; KNL-NEXT: pushq %rax
4360 ; KNL-NEXT: .cfi_def_cfa_offset 16
4361 ; KNL-NEXT: vzeroupper
4362 ; KNL-NEXT: callq _foo
4363 ; KNL-NEXT: addq $8, %rsp
4366 ; SKX-LABEL: ktest_3:
4368 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k0
4369 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1
4370 ; SKX-NEXT: korb %k1, %k0, %k0
4371 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1
4372 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k2
4373 ; SKX-NEXT: korb %k2, %k1, %k1
4374 ; SKX-NEXT: ktestb %k1, %k0
4375 ; SKX-NEXT: je LBB74_1
4376 ; SKX-NEXT: ## %bb.2: ## %exit
4377 ; SKX-NEXT: vzeroupper
4379 ; SKX-NEXT: LBB74_1: ## %bar
4380 ; SKX-NEXT: pushq %rax
4381 ; SKX-NEXT: .cfi_def_cfa_offset 16
4382 ; SKX-NEXT: vzeroupper
4383 ; SKX-NEXT: callq _foo
4384 ; SKX-NEXT: addq $8, %rsp
4387 ; AVX512BW-LABEL: ktest_3:
4388 ; AVX512BW: ## %bb.0:
4389 ; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
4390 ; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
4391 ; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
4392 ; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
4393 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
4394 ; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1
4395 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k2
4396 ; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k3
4397 ; AVX512BW-NEXT: korw %k1, %k0, %k0
4398 ; AVX512BW-NEXT: korw %k3, %k2, %k1
4399 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
4400 ; AVX512BW-NEXT: kmovd %k0, %eax
4401 ; AVX512BW-NEXT: testb %al, %al
4402 ; AVX512BW-NEXT: je LBB74_1
4403 ; AVX512BW-NEXT: ## %bb.2: ## %exit
4404 ; AVX512BW-NEXT: vzeroupper
4405 ; AVX512BW-NEXT: retq
4406 ; AVX512BW-NEXT: LBB74_1: ## %bar
4407 ; AVX512BW-NEXT: pushq %rax
4408 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4409 ; AVX512BW-NEXT: vzeroupper
4410 ; AVX512BW-NEXT: callq _foo
4411 ; AVX512BW-NEXT: addq $8, %rsp
4412 ; AVX512BW-NEXT: retq
4414 ; AVX512DQ-LABEL: ktest_3:
4415 ; AVX512DQ: ## %bb.0:
4416 ; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
4417 ; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
4418 ; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
4419 ; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
4420 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
4421 ; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1
4422 ; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k2
4423 ; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k3
4424 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
4425 ; AVX512DQ-NEXT: korb %k3, %k2, %k1
4426 ; AVX512DQ-NEXT: ktestb %k1, %k0
4427 ; AVX512DQ-NEXT: je LBB74_1
4428 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
4429 ; AVX512DQ-NEXT: vzeroupper
4430 ; AVX512DQ-NEXT: retq
4431 ; AVX512DQ-NEXT: LBB74_1: ## %bar
4432 ; AVX512DQ-NEXT: pushq %rax
4433 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4434 ; AVX512DQ-NEXT: vzeroupper
4435 ; AVX512DQ-NEXT: callq _foo
4436 ; AVX512DQ-NEXT: addq $8, %rsp
4437 ; AVX512DQ-NEXT: retq
4439 ; X86-LABEL: ktest_3:
4441 ; X86-NEXT: vptestnmd %ymm0, %ymm0, %k0
4442 ; X86-NEXT: vptestnmd %ymm1, %ymm1, %k1
4443 ; X86-NEXT: korb %k1, %k0, %k0
4444 ; X86-NEXT: vptestnmd %ymm2, %ymm2, %k1
4445 ; X86-NEXT: vptestnmd %ymm3, %ymm3, %k2
4446 ; X86-NEXT: korb %k2, %k1, %k1
4447 ; X86-NEXT: ktestb %k1, %k0
4448 ; X86-NEXT: je LBB74_1
4449 ; X86-NEXT: ## %bb.2: ## %exit
4450 ; X86-NEXT: vzeroupper
4452 ; X86-NEXT: LBB74_1: ## %bar
4453 ; X86-NEXT: subl $12, %esp
4454 ; X86-NEXT: .cfi_def_cfa_offset 16
4455 ; X86-NEXT: vzeroupper
4456 ; X86-NEXT: calll _foo
4457 ; X86-NEXT: addl $12, %esp
4459 %a = icmp eq <8 x i32> %w, zeroinitializer
4460 %b = icmp eq <8 x i32> %x, zeroinitializer
4461 %c = icmp eq <8 x i32> %y, zeroinitializer
4462 %d = icmp eq <8 x i32> %z, zeroinitializer
4463 %e = or <8 x i1> %a, %b
4464 %f = or <8 x i1> %c, %d
4465 %g = and <8 x i1> %e, %f
4466 %h = bitcast <8 x i1> %g to i8
4467 %i = icmp eq i8 %h, 0
4468 br i1 %i, label %bar, label %exit
4478 define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
4479 ; KNL-LABEL: ktest_4:
4481 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
4482 ; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
4483 ; KNL-NEXT: vptestnmq %zmm2, %zmm2, %k2
4484 ; KNL-NEXT: vptestnmq %zmm3, %zmm3, %k3
4485 ; KNL-NEXT: korw %k1, %k0, %k0
4486 ; KNL-NEXT: korw %k3, %k2, %k1
4487 ; KNL-NEXT: kandw %k1, %k0, %k0
4488 ; KNL-NEXT: kmovw %k0, %eax
4489 ; KNL-NEXT: testb %al, %al
4490 ; KNL-NEXT: je LBB75_1
4491 ; KNL-NEXT: ## %bb.2: ## %exit
4492 ; KNL-NEXT: vzeroupper
4494 ; KNL-NEXT: LBB75_1: ## %bar
4495 ; KNL-NEXT: pushq %rax
4496 ; KNL-NEXT: .cfi_def_cfa_offset 16
4497 ; KNL-NEXT: vzeroupper
4498 ; KNL-NEXT: callq _foo
4499 ; KNL-NEXT: addq $8, %rsp
4502 ; SKX-LABEL: ktest_4:
4504 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
4505 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1
4506 ; SKX-NEXT: korb %k1, %k0, %k0
4507 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1
4508 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k2
4509 ; SKX-NEXT: korb %k2, %k1, %k1
4510 ; SKX-NEXT: ktestb %k1, %k0
4511 ; SKX-NEXT: je LBB75_1
4512 ; SKX-NEXT: ## %bb.2: ## %exit
4513 ; SKX-NEXT: vzeroupper
4515 ; SKX-NEXT: LBB75_1: ## %bar
4516 ; SKX-NEXT: pushq %rax
4517 ; SKX-NEXT: .cfi_def_cfa_offset 16
4518 ; SKX-NEXT: vzeroupper
4519 ; SKX-NEXT: callq _foo
4520 ; SKX-NEXT: addq $8, %rsp
4523 ; AVX512BW-LABEL: ktest_4:
4524 ; AVX512BW: ## %bb.0:
4525 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
4526 ; AVX512BW-NEXT: vptestnmq %zmm1, %zmm1, %k1
4527 ; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k2
4528 ; AVX512BW-NEXT: vptestnmq %zmm3, %zmm3, %k3
4529 ; AVX512BW-NEXT: korw %k1, %k0, %k0
4530 ; AVX512BW-NEXT: korw %k3, %k2, %k1
4531 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
4532 ; AVX512BW-NEXT: kmovd %k0, %eax
4533 ; AVX512BW-NEXT: testb %al, %al
4534 ; AVX512BW-NEXT: je LBB75_1
4535 ; AVX512BW-NEXT: ## %bb.2: ## %exit
4536 ; AVX512BW-NEXT: vzeroupper
4537 ; AVX512BW-NEXT: retq
4538 ; AVX512BW-NEXT: LBB75_1: ## %bar
4539 ; AVX512BW-NEXT: pushq %rax
4540 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4541 ; AVX512BW-NEXT: vzeroupper
4542 ; AVX512BW-NEXT: callq _foo
4543 ; AVX512BW-NEXT: addq $8, %rsp
4544 ; AVX512BW-NEXT: retq
4546 ; AVX512DQ-LABEL: ktest_4:
4547 ; AVX512DQ: ## %bb.0:
4548 ; AVX512DQ-NEXT: vptestnmq %zmm0, %zmm0, %k0
4549 ; AVX512DQ-NEXT: vptestnmq %zmm1, %zmm1, %k1
4550 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
4551 ; AVX512DQ-NEXT: vptestnmq %zmm2, %zmm2, %k1
4552 ; AVX512DQ-NEXT: vptestnmq %zmm3, %zmm3, %k2
4553 ; AVX512DQ-NEXT: korb %k2, %k1, %k1
4554 ; AVX512DQ-NEXT: ktestb %k1, %k0
4555 ; AVX512DQ-NEXT: je LBB75_1
4556 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
4557 ; AVX512DQ-NEXT: vzeroupper
4558 ; AVX512DQ-NEXT: retq
4559 ; AVX512DQ-NEXT: LBB75_1: ## %bar
4560 ; AVX512DQ-NEXT: pushq %rax
4561 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4562 ; AVX512DQ-NEXT: vzeroupper
4563 ; AVX512DQ-NEXT: callq _foo
4564 ; AVX512DQ-NEXT: addq $8, %rsp
4565 ; AVX512DQ-NEXT: retq
4567 ; X86-LABEL: ktest_4:
4569 ; X86-NEXT: vptestnmq %zmm0, %zmm0, %k0
4570 ; X86-NEXT: vptestnmq %zmm1, %zmm1, %k1
4571 ; X86-NEXT: korb %k1, %k0, %k0
4572 ; X86-NEXT: vptestnmq %zmm2, %zmm2, %k1
4573 ; X86-NEXT: vptestnmq %zmm3, %zmm3, %k2
4574 ; X86-NEXT: korb %k2, %k1, %k1
4575 ; X86-NEXT: ktestb %k1, %k0
4576 ; X86-NEXT: je LBB75_1
4577 ; X86-NEXT: ## %bb.2: ## %exit
4578 ; X86-NEXT: vzeroupper
4580 ; X86-NEXT: LBB75_1: ## %bar
4581 ; X86-NEXT: subl $12, %esp
4582 ; X86-NEXT: .cfi_def_cfa_offset 16
4583 ; X86-NEXT: vzeroupper
4584 ; X86-NEXT: calll _foo
4585 ; X86-NEXT: addl $12, %esp
4587 %a = icmp eq <8 x i64> %w, zeroinitializer
4588 %b = icmp eq <8 x i64> %x, zeroinitializer
4589 %c = icmp eq <8 x i64> %y, zeroinitializer
4590 %d = icmp eq <8 x i64> %z, zeroinitializer
4591 %e = or <8 x i1> %a, %b
4592 %f = or <8 x i1> %c, %d
4593 %g = and <8 x i1> %e, %f
4594 %h = bitcast <8 x i1> %g to i8
4595 %i = icmp eq i8 %h, 0
4596 br i1 %i, label %bar, label %exit
4606 define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) {
4607 ; KNL-LABEL: ktest_5:
4609 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
4610 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1
4611 ; KNL-NEXT: korw %k1, %k0, %k0
4612 ; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k1
4613 ; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k2
4614 ; KNL-NEXT: korw %k2, %k1, %k1
4615 ; KNL-NEXT: kandw %k1, %k0, %k0
4616 ; KNL-NEXT: kortestw %k0, %k0
4617 ; KNL-NEXT: je LBB76_1
4618 ; KNL-NEXT: ## %bb.2: ## %exit
4619 ; KNL-NEXT: vzeroupper
4621 ; KNL-NEXT: LBB76_1: ## %bar
4622 ; KNL-NEXT: pushq %rax
4623 ; KNL-NEXT: .cfi_def_cfa_offset 16
4624 ; KNL-NEXT: vzeroupper
4625 ; KNL-NEXT: callq _foo
4626 ; KNL-NEXT: addq $8, %rsp
4629 ; SKX-LABEL: ktest_5:
4631 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
4632 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1
4633 ; SKX-NEXT: korw %k1, %k0, %k0
4634 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1
4635 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k2
4636 ; SKX-NEXT: korw %k2, %k1, %k1
4637 ; SKX-NEXT: ktestw %k1, %k0
4638 ; SKX-NEXT: je LBB76_1
4639 ; SKX-NEXT: ## %bb.2: ## %exit
4640 ; SKX-NEXT: vzeroupper
4642 ; SKX-NEXT: LBB76_1: ## %bar
4643 ; SKX-NEXT: pushq %rax
4644 ; SKX-NEXT: .cfi_def_cfa_offset 16
4645 ; SKX-NEXT: vzeroupper
4646 ; SKX-NEXT: callq _foo
4647 ; SKX-NEXT: addq $8, %rsp
4650 ; AVX512BW-LABEL: ktest_5:
4651 ; AVX512BW: ## %bb.0:
4652 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
4653 ; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1
4654 ; AVX512BW-NEXT: korw %k1, %k0, %k0
4655 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k1
4656 ; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k2
4657 ; AVX512BW-NEXT: korw %k2, %k1, %k1
4658 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
4659 ; AVX512BW-NEXT: kortestw %k0, %k0
4660 ; AVX512BW-NEXT: je LBB76_1
4661 ; AVX512BW-NEXT: ## %bb.2: ## %exit
4662 ; AVX512BW-NEXT: vzeroupper
4663 ; AVX512BW-NEXT: retq
4664 ; AVX512BW-NEXT: LBB76_1: ## %bar
4665 ; AVX512BW-NEXT: pushq %rax
4666 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4667 ; AVX512BW-NEXT: vzeroupper
4668 ; AVX512BW-NEXT: callq _foo
4669 ; AVX512BW-NEXT: addq $8, %rsp
4670 ; AVX512BW-NEXT: retq
4672 ; AVX512DQ-LABEL: ktest_5:
4673 ; AVX512DQ: ## %bb.0:
4674 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
4675 ; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1
4676 ; AVX512DQ-NEXT: korw %k1, %k0, %k0
4677 ; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k1
4678 ; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k2
4679 ; AVX512DQ-NEXT: korw %k2, %k1, %k1
4680 ; AVX512DQ-NEXT: ktestw %k1, %k0
4681 ; AVX512DQ-NEXT: je LBB76_1
4682 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
4683 ; AVX512DQ-NEXT: vzeroupper
4684 ; AVX512DQ-NEXT: retq
4685 ; AVX512DQ-NEXT: LBB76_1: ## %bar
4686 ; AVX512DQ-NEXT: pushq %rax
4687 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4688 ; AVX512DQ-NEXT: vzeroupper
4689 ; AVX512DQ-NEXT: callq _foo
4690 ; AVX512DQ-NEXT: addq $8, %rsp
4691 ; AVX512DQ-NEXT: retq
4693 ; X86-LABEL: ktest_5:
4695 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
4696 ; X86-NEXT: vptestnmd %zmm1, %zmm1, %k1
4697 ; X86-NEXT: korw %k1, %k0, %k0
4698 ; X86-NEXT: vptestnmd %zmm2, %zmm2, %k1
4699 ; X86-NEXT: vptestnmd %zmm3, %zmm3, %k2
4700 ; X86-NEXT: korw %k2, %k1, %k1
4701 ; X86-NEXT: ktestw %k1, %k0
4702 ; X86-NEXT: je LBB76_1
4703 ; X86-NEXT: ## %bb.2: ## %exit
4704 ; X86-NEXT: vzeroupper
4706 ; X86-NEXT: LBB76_1: ## %bar
4707 ; X86-NEXT: subl $12, %esp
4708 ; X86-NEXT: .cfi_def_cfa_offset 16
4709 ; X86-NEXT: vzeroupper
4710 ; X86-NEXT: calll _foo
4711 ; X86-NEXT: addl $12, %esp
4713 %a = icmp eq <16 x i32> %w, zeroinitializer
4714 %b = icmp eq <16 x i32> %x, zeroinitializer
4715 %c = icmp eq <16 x i32> %y, zeroinitializer
4716 %d = icmp eq <16 x i32> %z, zeroinitializer
4717 %e = or <16 x i1> %a, %b
4718 %f = or <16 x i1> %c, %d
4719 %g = and <16 x i1> %e, %f
4720 %h = bitcast <16 x i1> %g to i16
4721 %i = icmp eq i16 %h, 0
4722 br i1 %i, label %bar, label %exit
4732 define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z) {
4733 ; KNL-LABEL: ktest_6:
4735 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
4736 ; KNL-NEXT: vpxor %xmm5, %xmm5, %xmm5
4737 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm4, %ymm4
4738 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm0, %ymm0
4739 ; KNL-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
4740 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm4
4741 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm4, %ymm4
4742 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1
4743 ; KNL-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm1
4744 ; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0
4745 ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm1
4746 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1
4747 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm2, %ymm2
4748 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
4749 ; KNL-NEXT: vextracti64x4 $1, %zmm3, %ymm2
4750 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm2, %ymm2
4751 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm3, %ymm3
4752 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
4753 ; KNL-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2
4754 ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm0
4755 ; KNL-NEXT: vpor %ymm0, %ymm2, %ymm0
4756 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
4757 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
4758 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
4759 ; KNL-NEXT: kortestw %k0, %k0
4760 ; KNL-NEXT: je LBB77_1
4761 ; KNL-NEXT: ## %bb.2: ## %exit
4762 ; KNL-NEXT: vzeroupper
4764 ; KNL-NEXT: LBB77_1: ## %bar
4765 ; KNL-NEXT: pushq %rax
4766 ; KNL-NEXT: .cfi_def_cfa_offset 16
4767 ; KNL-NEXT: vzeroupper
4768 ; KNL-NEXT: callq _foo
4769 ; KNL-NEXT: addq $8, %rsp
4772 ; SKX-LABEL: ktest_6:
4774 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k0
4775 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1
4776 ; SKX-NEXT: kord %k1, %k0, %k0
4777 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1
4778 ; SKX-NEXT: vptestnmw %zmm3, %zmm3, %k2
4779 ; SKX-NEXT: kord %k2, %k1, %k1
4780 ; SKX-NEXT: ktestd %k1, %k0
4781 ; SKX-NEXT: je LBB77_1
4782 ; SKX-NEXT: ## %bb.2: ## %exit
4783 ; SKX-NEXT: vzeroupper
4785 ; SKX-NEXT: LBB77_1: ## %bar
4786 ; SKX-NEXT: pushq %rax
4787 ; SKX-NEXT: .cfi_def_cfa_offset 16
4788 ; SKX-NEXT: vzeroupper
4789 ; SKX-NEXT: callq _foo
4790 ; SKX-NEXT: addq $8, %rsp
4793 ; AVX512BW-LABEL: ktest_6:
4794 ; AVX512BW: ## %bb.0:
4795 ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
4796 ; AVX512BW-NEXT: vptestnmw %zmm1, %zmm1, %k1
4797 ; AVX512BW-NEXT: kord %k1, %k0, %k0
4798 ; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1
4799 ; AVX512BW-NEXT: vptestnmw %zmm3, %zmm3, %k2
4800 ; AVX512BW-NEXT: kord %k2, %k1, %k1
4801 ; AVX512BW-NEXT: ktestd %k1, %k0
4802 ; AVX512BW-NEXT: je LBB77_1
4803 ; AVX512BW-NEXT: ## %bb.2: ## %exit
4804 ; AVX512BW-NEXT: vzeroupper
4805 ; AVX512BW-NEXT: retq
4806 ; AVX512BW-NEXT: LBB77_1: ## %bar
4807 ; AVX512BW-NEXT: pushq %rax
4808 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4809 ; AVX512BW-NEXT: vzeroupper
4810 ; AVX512BW-NEXT: callq _foo
4811 ; AVX512BW-NEXT: addq $8, %rsp
4812 ; AVX512BW-NEXT: retq
4814 ; AVX512DQ-LABEL: ktest_6:
4815 ; AVX512DQ: ## %bb.0:
4816 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm4
4817 ; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5
4818 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm4, %ymm4
4819 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm0, %ymm0
4820 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
4821 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm4
4822 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm4, %ymm4
4823 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1
4824 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm1
4825 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
4826 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm1
4827 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1
4828 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm2, %ymm2
4829 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
4830 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm2
4831 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm2, %ymm2
4832 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm3, %ymm3
4833 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
4834 ; AVX512DQ-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2
4835 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
4836 ; AVX512DQ-NEXT: vpor %ymm0, %ymm2, %ymm0
4837 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
4838 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
4839 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
4840 ; AVX512DQ-NEXT: kortestw %k0, %k0
4841 ; AVX512DQ-NEXT: je LBB77_1
4842 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
4843 ; AVX512DQ-NEXT: vzeroupper
4844 ; AVX512DQ-NEXT: retq
4845 ; AVX512DQ-NEXT: LBB77_1: ## %bar
4846 ; AVX512DQ-NEXT: pushq %rax
4847 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4848 ; AVX512DQ-NEXT: vzeroupper
4849 ; AVX512DQ-NEXT: callq _foo
4850 ; AVX512DQ-NEXT: addq $8, %rsp
4851 ; AVX512DQ-NEXT: retq
4853 ; X86-LABEL: ktest_6:
4855 ; X86-NEXT: vptestnmw %zmm0, %zmm0, %k0
4856 ; X86-NEXT: vptestnmw %zmm1, %zmm1, %k1
4857 ; X86-NEXT: kord %k1, %k0, %k0
4858 ; X86-NEXT: vptestnmw %zmm2, %zmm2, %k1
4859 ; X86-NEXT: vptestnmw %zmm3, %zmm3, %k2
4860 ; X86-NEXT: kord %k2, %k1, %k1
4861 ; X86-NEXT: ktestd %k1, %k0
4862 ; X86-NEXT: je LBB77_1
4863 ; X86-NEXT: ## %bb.2: ## %exit
4864 ; X86-NEXT: vzeroupper
4866 ; X86-NEXT: LBB77_1: ## %bar
4867 ; X86-NEXT: subl $12, %esp
4868 ; X86-NEXT: .cfi_def_cfa_offset 16
4869 ; X86-NEXT: vzeroupper
4870 ; X86-NEXT: calll _foo
4871 ; X86-NEXT: addl $12, %esp
4873 %a = icmp eq <32 x i16> %w, zeroinitializer
4874 %b = icmp eq <32 x i16> %x, zeroinitializer
4875 %c = icmp eq <32 x i16> %y, zeroinitializer
4876 %d = icmp eq <32 x i16> %z, zeroinitializer
4877 %e = or <32 x i1> %a, %b
4878 %f = or <32 x i1> %c, %d
4879 %g = and <32 x i1> %e, %f
4880 %h = bitcast <32 x i1> %g to i32
4881 %i = icmp eq i32 %h, 0
4882 br i1 %i, label %bar, label %exit
4892 define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
4893 ; KNL-LABEL: ktest_7:
4895 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
4896 ; KNL-NEXT: vpxor %xmm5, %xmm5, %xmm5
4897 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm4, %ymm4
4898 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm0
4899 ; KNL-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
4900 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm4
4901 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm4, %ymm4
4902 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm1
4903 ; KNL-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm1
4904 ; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0
4905 ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm1
4906 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm1
4907 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm2, %ymm2
4908 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
4909 ; KNL-NEXT: vextracti64x4 $1, %zmm3, %ymm2
4910 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm2, %ymm2
4911 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm3, %ymm3
4912 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
4913 ; KNL-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2
4914 ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm0
4915 ; KNL-NEXT: vpor %ymm0, %ymm2, %ymm0
4916 ; KNL-NEXT: vpmovmskb %ymm0, %eax
4917 ; KNL-NEXT: testl %eax, %eax
4918 ; KNL-NEXT: je LBB78_1
4919 ; KNL-NEXT: ## %bb.2: ## %exit
4920 ; KNL-NEXT: vzeroupper
4922 ; KNL-NEXT: LBB78_1: ## %bar
4923 ; KNL-NEXT: pushq %rax
4924 ; KNL-NEXT: .cfi_def_cfa_offset 16
4925 ; KNL-NEXT: vzeroupper
4926 ; KNL-NEXT: callq _foo
4927 ; KNL-NEXT: addq $8, %rsp
4930 ; SKX-LABEL: ktest_7:
4932 ; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k0
4933 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1
4934 ; SKX-NEXT: korq %k1, %k0, %k0
4935 ; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1
4936 ; SKX-NEXT: vptestnmb %zmm3, %zmm3, %k2
4937 ; SKX-NEXT: korq %k2, %k1, %k1
4938 ; SKX-NEXT: ktestq %k1, %k0
4939 ; SKX-NEXT: je LBB78_1
4940 ; SKX-NEXT: ## %bb.2: ## %exit
4941 ; SKX-NEXT: vzeroupper
4943 ; SKX-NEXT: LBB78_1: ## %bar
4944 ; SKX-NEXT: pushq %rax
4945 ; SKX-NEXT: .cfi_def_cfa_offset 16
4946 ; SKX-NEXT: vzeroupper
4947 ; SKX-NEXT: callq _foo
4948 ; SKX-NEXT: addq $8, %rsp
4951 ; AVX512BW-LABEL: ktest_7:
4952 ; AVX512BW: ## %bb.0:
4953 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
4954 ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
4955 ; AVX512BW-NEXT: korq %k1, %k0, %k0
4956 ; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1
4957 ; AVX512BW-NEXT: vptestnmb %zmm3, %zmm3, %k2
4958 ; AVX512BW-NEXT: korq %k2, %k1, %k1
4959 ; AVX512BW-NEXT: ktestq %k1, %k0
4960 ; AVX512BW-NEXT: je LBB78_1
4961 ; AVX512BW-NEXT: ## %bb.2: ## %exit
4962 ; AVX512BW-NEXT: vzeroupper
4963 ; AVX512BW-NEXT: retq
4964 ; AVX512BW-NEXT: LBB78_1: ## %bar
4965 ; AVX512BW-NEXT: pushq %rax
4966 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4967 ; AVX512BW-NEXT: vzeroupper
4968 ; AVX512BW-NEXT: callq _foo
4969 ; AVX512BW-NEXT: addq $8, %rsp
4970 ; AVX512BW-NEXT: retq
4972 ; AVX512DQ-LABEL: ktest_7:
4973 ; AVX512DQ: ## %bb.0:
4974 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm4
4975 ; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5
4976 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm4, %ymm4
4977 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm0
4978 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
4979 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm4
4980 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm4, %ymm4
4981 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm1
4982 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm1
4983 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
4984 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm1
4985 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm1
4986 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm2, %ymm2
4987 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
4988 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm2
4989 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm2, %ymm2
4990 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm3, %ymm3
4991 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
4992 ; AVX512DQ-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2
4993 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
4994 ; AVX512DQ-NEXT: vpor %ymm0, %ymm2, %ymm0
4995 ; AVX512DQ-NEXT: vpmovmskb %ymm0, %eax
4996 ; AVX512DQ-NEXT: testl %eax, %eax
4997 ; AVX512DQ-NEXT: je LBB78_1
4998 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
4999 ; AVX512DQ-NEXT: vzeroupper
5000 ; AVX512DQ-NEXT: retq
5001 ; AVX512DQ-NEXT: LBB78_1: ## %bar
5002 ; AVX512DQ-NEXT: pushq %rax
5003 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
5004 ; AVX512DQ-NEXT: vzeroupper
5005 ; AVX512DQ-NEXT: callq _foo
5006 ; AVX512DQ-NEXT: addq $8, %rsp
5007 ; AVX512DQ-NEXT: retq
5009 ; X86-LABEL: ktest_7:
5011 ; X86-NEXT: vptestnmb %zmm0, %zmm0, %k0
5012 ; X86-NEXT: vptestnmb %zmm1, %zmm1, %k1
5013 ; X86-NEXT: korq %k1, %k0, %k0
5014 ; X86-NEXT: vptestnmb %zmm2, %zmm2, %k1
5015 ; X86-NEXT: vptestnmb %zmm3, %zmm3, %k2
5016 ; X86-NEXT: korq %k2, %k1, %k1
5017 ; X86-NEXT: kandq %k1, %k0, %k0
5018 ; X86-NEXT: kshiftrq $32, %k0, %k1
5019 ; X86-NEXT: kortestd %k1, %k0
5020 ; X86-NEXT: je LBB78_1
5021 ; X86-NEXT: ## %bb.2: ## %exit
5022 ; X86-NEXT: vzeroupper
5024 ; X86-NEXT: LBB78_1: ## %bar
5025 ; X86-NEXT: subl $12, %esp
5026 ; X86-NEXT: .cfi_def_cfa_offset 16
5027 ; X86-NEXT: vzeroupper
5028 ; X86-NEXT: calll _foo
5029 ; X86-NEXT: addl $12, %esp
5031 %a = icmp eq <64 x i8> %w, zeroinitializer
5032 %b = icmp eq <64 x i8> %x, zeroinitializer
5033 %c = icmp eq <64 x i8> %y, zeroinitializer
5034 %d = icmp eq <64 x i8> %z, zeroinitializer
5035 %e = or <64 x i1> %a, %b
5036 %f = or <64 x i1> %c, %d
5037 %g = and <64 x i1> %e, %f
5038 %h = bitcast <64 x i1> %g to i64
5039 %i = icmp eq i64 %h, 0
5040 br i1 %i, label %bar, label %exit
5050 define <64 x i1> @mask64_insert(i32 %a) {
5051 ; KNL-LABEL: mask64_insert:
5053 ; KNL-NEXT: movq %rdi, %rax
5054 ; KNL-NEXT: andl $1, %esi
5055 ; KNL-NEXT: kmovw %esi, %k0
5056 ; KNL-NEXT: movw $-4, %cx
5057 ; KNL-NEXT: kmovw %ecx, %k1
5058 ; KNL-NEXT: kshiftrw $1, %k1, %k1
5059 ; KNL-NEXT: kshiftlw $1, %k1, %k1
5060 ; KNL-NEXT: korw %k0, %k1, %k0
5061 ; KNL-NEXT: kmovw %k0, (%rdi)
5062 ; KNL-NEXT: movw $-3, 6(%rdi)
5063 ; KNL-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
5066 ; SKX-LABEL: mask64_insert:
5068 ; SKX-NEXT: kmovd %edi, %k0
5069 ; SKX-NEXT: kshiftlq $63, %k0, %k0
5070 ; SKX-NEXT: kshiftrq $63, %k0, %k0
5071 ; SKX-NEXT: movabsq $-562958543486980, %rax ## imm = 0xFFFDFFFDFFFDFFFC
5072 ; SKX-NEXT: kmovq %rax, %k1
5073 ; SKX-NEXT: kshiftrq $1, %k1, %k1
5074 ; SKX-NEXT: kshiftlq $1, %k1, %k1
5075 ; SKX-NEXT: korq %k0, %k1, %k0
5076 ; SKX-NEXT: vpmovm2b %k0, %zmm0
5079 ; AVX512BW-LABEL: mask64_insert:
5080 ; AVX512BW: ## %bb.0:
5081 ; AVX512BW-NEXT: kmovd %edi, %k0
5082 ; AVX512BW-NEXT: kshiftlq $63, %k0, %k0
5083 ; AVX512BW-NEXT: kshiftrq $63, %k0, %k0
5084 ; AVX512BW-NEXT: movabsq $-562958543486980, %rax ## imm = 0xFFFDFFFDFFFDFFFC
5085 ; AVX512BW-NEXT: kmovq %rax, %k1
5086 ; AVX512BW-NEXT: kshiftrq $1, %k1, %k1
5087 ; AVX512BW-NEXT: kshiftlq $1, %k1, %k1
5088 ; AVX512BW-NEXT: korq %k0, %k1, %k0
5089 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
5090 ; AVX512BW-NEXT: retq
5092 ; AVX512DQ-LABEL: mask64_insert:
5093 ; AVX512DQ: ## %bb.0:
5094 ; AVX512DQ-NEXT: movq %rdi, %rax
5095 ; AVX512DQ-NEXT: andl $1, %esi
5096 ; AVX512DQ-NEXT: kmovw %esi, %k0
5097 ; AVX512DQ-NEXT: movw $-4, %cx
5098 ; AVX512DQ-NEXT: kmovw %ecx, %k1
5099 ; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
5100 ; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
5101 ; AVX512DQ-NEXT: korw %k0, %k1, %k0
5102 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
5103 ; AVX512DQ-NEXT: movw $-3, 6(%rdi)
5104 ; AVX512DQ-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
5105 ; AVX512DQ-NEXT: retq
5107 ; X86-LABEL: mask64_insert:
5109 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
5110 ; X86-NEXT: movl $-131076, %eax ## imm = 0xFFFDFFFC
5111 ; X86-NEXT: kmovd %eax, %k1
5112 ; X86-NEXT: movl $-131075, %eax ## imm = 0xFFFDFFFD
5113 ; X86-NEXT: kmovd %eax, %k2
5114 ; X86-NEXT: kunpckdq %k1, %k2, %k1
5115 ; X86-NEXT: kshiftrq $1, %k1, %k1
5116 ; X86-NEXT: kshiftlq $1, %k1, %k1
5117 ; X86-NEXT: kshiftlq $63, %k0, %k0
5118 ; X86-NEXT: kshiftrq $63, %k0, %k0
5119 ; X86-NEXT: korq %k0, %k1, %k0
5120 ; X86-NEXT: vpmovm2b %k0, %zmm0
5122 %a_i = trunc i32 %a to i1
5123 %maskv = insertelement <64 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
5124 ret <64 x i1> %maskv
5127 define i1 @test_v1i1_add(i1 %x, i1 %y) {
5128 ; CHECK-LABEL: test_v1i1_add:
5130 ; CHECK-NEXT: movl %edi, %eax
5131 ; CHECK-NEXT: xorl %esi, %eax
5132 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
5135 ; X86-LABEL: test_v1i1_add:
5137 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
5138 ; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
5140 %m0 = bitcast i1 %x to <1 x i1>
5141 %m1 = bitcast i1 %y to <1 x i1>
5142 %m2 = add <1 x i1> %m0, %m1
5143 %ret = bitcast <1 x i1> %m2 to i1
5147 define i1 @test_v1i1_sub(i1 %x, i1 %y) {
5148 ; CHECK-LABEL: test_v1i1_sub:
5150 ; CHECK-NEXT: movl %edi, %eax
5151 ; CHECK-NEXT: xorl %esi, %eax
5152 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
5155 ; X86-LABEL: test_v1i1_sub:
5157 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
5158 ; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
5160 %m0 = bitcast i1 %x to <1 x i1>
5161 %m1 = bitcast i1 %y to <1 x i1>
5162 %m2 = sub <1 x i1> %m0, %m1
5163 %ret = bitcast <1 x i1> %m2 to i1
5167 define i1 @test_v1i1_mul(i1 %x, i1 %y) {
5168 ; CHECK-LABEL: test_v1i1_mul:
5170 ; CHECK-NEXT: movl %edi, %eax
5171 ; CHECK-NEXT: andl %esi, %eax
5172 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
5175 ; X86-LABEL: test_v1i1_mul:
5177 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
5178 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al
5180 %m0 = bitcast i1 %x to <1 x i1>
5181 %m1 = bitcast i1 %y to <1 x i1>
5182 %m2 = mul <1 x i1> %m0, %m1
5183 %ret = bitcast <1 x i1> %m2 to i1
5187 define <1 x i1> @uadd_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
5188 ; CHECK-LABEL: uadd_sat_v1i1:
5190 ; CHECK-NEXT: movl %edi, %eax
5191 ; CHECK-NEXT: orl %esi, %eax
5192 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
5195 ; X86-LABEL: uadd_sat_v1i1:
5197 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
5198 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
5199 ; X86-NEXT: ## kill: def $al killed $al killed $eax
5201 %z = call <1 x i1> @llvm.uadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
5204 declare <1 x i1> @llvm.uadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
5206 define <1 x i1> @usub_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
5207 ; KNL-LABEL: usub_sat_v1i1:
5209 ; KNL-NEXT: kmovw %esi, %k0
5210 ; KNL-NEXT: kmovw %edi, %k1
5211 ; KNL-NEXT: kandnw %k1, %k0, %k0
5212 ; KNL-NEXT: kmovw %k0, %eax
5213 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
5216 ; SKX-LABEL: usub_sat_v1i1:
5218 ; SKX-NEXT: kmovd %esi, %k0
5219 ; SKX-NEXT: kmovd %edi, %k1
5220 ; SKX-NEXT: kandnw %k1, %k0, %k0
5221 ; SKX-NEXT: kmovd %k0, %eax
5222 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
5225 ; AVX512BW-LABEL: usub_sat_v1i1:
5226 ; AVX512BW: ## %bb.0:
5227 ; AVX512BW-NEXT: kmovd %esi, %k0
5228 ; AVX512BW-NEXT: kmovd %edi, %k1
5229 ; AVX512BW-NEXT: kandnw %k1, %k0, %k0
5230 ; AVX512BW-NEXT: kmovd %k0, %eax
5231 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
5232 ; AVX512BW-NEXT: retq
5234 ; AVX512DQ-LABEL: usub_sat_v1i1:
5235 ; AVX512DQ: ## %bb.0:
5236 ; AVX512DQ-NEXT: kmovw %esi, %k0
5237 ; AVX512DQ-NEXT: kmovw %edi, %k1
5238 ; AVX512DQ-NEXT: kandnw %k1, %k0, %k0
5239 ; AVX512DQ-NEXT: kmovw %k0, %eax
5240 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
5241 ; AVX512DQ-NEXT: retq
5243 ; X86-LABEL: usub_sat_v1i1:
5245 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
5246 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
5247 ; X86-NEXT: kandnw %k1, %k0, %k0
5248 ; X86-NEXT: kmovd %k0, %eax
5249 ; X86-NEXT: ## kill: def $al killed $al killed $eax
5251 %z = call <1 x i1> @llvm.usub.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
5254 declare <1 x i1> @llvm.usub.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
5256 define <1 x i1> @sadd_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
5257 ; CHECK-LABEL: sadd_sat_v1i1:
5259 ; CHECK-NEXT: movl %edi, %eax
5260 ; CHECK-NEXT: orl %esi, %eax
5261 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
5264 ; X86-LABEL: sadd_sat_v1i1:
5266 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
5267 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
5268 ; X86-NEXT: ## kill: def $al killed $al killed $eax
5270 %z = call <1 x i1> @llvm.sadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
5273 declare <1 x i1> @llvm.sadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
5275 define <1 x i1> @ssub_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
5276 ; KNL-LABEL: ssub_sat_v1i1:
5278 ; KNL-NEXT: kmovw %esi, %k0
5279 ; KNL-NEXT: kmovw %edi, %k1
5280 ; KNL-NEXT: kandnw %k1, %k0, %k0
5281 ; KNL-NEXT: kmovw %k0, %eax
5282 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
5285 ; SKX-LABEL: ssub_sat_v1i1:
5287 ; SKX-NEXT: kmovd %esi, %k0
5288 ; SKX-NEXT: kmovd %edi, %k1
5289 ; SKX-NEXT: kandnw %k1, %k0, %k0
5290 ; SKX-NEXT: kmovd %k0, %eax
5291 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
5294 ; AVX512BW-LABEL: ssub_sat_v1i1:
5295 ; AVX512BW: ## %bb.0:
5296 ; AVX512BW-NEXT: kmovd %esi, %k0
5297 ; AVX512BW-NEXT: kmovd %edi, %k1
5298 ; AVX512BW-NEXT: kandnw %k1, %k0, %k0
5299 ; AVX512BW-NEXT: kmovd %k0, %eax
5300 ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
5301 ; AVX512BW-NEXT: retq
5303 ; AVX512DQ-LABEL: ssub_sat_v1i1:
5304 ; AVX512DQ: ## %bb.0:
5305 ; AVX512DQ-NEXT: kmovw %esi, %k0
5306 ; AVX512DQ-NEXT: kmovw %edi, %k1
5307 ; AVX512DQ-NEXT: kandnw %k1, %k0, %k0
5308 ; AVX512DQ-NEXT: kmovw %k0, %eax
5309 ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
5310 ; AVX512DQ-NEXT: retq
5312 ; X86-LABEL: ssub_sat_v1i1:
5314 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
5315 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
5316 ; X86-NEXT: kandnw %k1, %k0, %k0
5317 ; X86-NEXT: kmovd %k0, %eax
5318 ; X86-NEXT: ## kill: def $al killed $al killed $eax
5320 %z = call <1 x i1> @llvm.ssub.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
5323 declare <1 x i1> @llvm.ssub.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
5325 !llvm.module.flags = !{!0}
5326 !0 = !{i32 1, !"ProfileSummary", !1}
5327 !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
5328 !2 = !{!"ProfileFormat", !"InstrProf"}
5329 !3 = !{!"TotalCount", i64 10000}
5330 !4 = !{!"MaxCount", i64 10}
5331 !5 = !{!"MaxInternalCount", i64 1}
5332 !6 = !{!"MaxFunctionCount", i64 1000}
5333 !7 = !{!"NumCounts", i64 3}
5334 !8 = !{!"NumFunctions", i64 3}
5335 !9 = !{!"DetailedSummary", !10}
5336 !10 = !{!11, !12, !13}
5337 !11 = !{i32 10000, i64 100, i32 1}
5338 !12 = !{i32 999000, i64 100, i32 1}
5339 !13 = !{i32 999999, i64 1, i32 2}
5340 !14 = !{!"function_entry_count", i64 0}