1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=SKX
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=AVX512DQNOBW
6 define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone {
7 ; KNL-LABEL: zext_8x8mem_to_8x16:
9 ; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
10 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
11 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
12 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
15 ; SKX-LABEL: zext_8x8mem_to_8x16:
17 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
18 ; SKX-NEXT: vpmovw2m %xmm0, %k1
19 ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
22 ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x16:
23 ; AVX512DQNOBW: # %bb.0:
24 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
25 ; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
26 ; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
27 ; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
28 ; AVX512DQNOBW-NEXT: retq
29 %a = load <8 x i8>,ptr%i,align 1
30 %x = zext <8 x i8> %a to <8 x i16>
31 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
35 define <8 x i16> @sext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone {
36 ; KNL-LABEL: sext_8x8mem_to_8x16:
38 ; KNL-NEXT: vpmovsxbw (%rdi), %xmm1
39 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
40 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
41 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
44 ; SKX-LABEL: sext_8x8mem_to_8x16:
46 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
47 ; SKX-NEXT: vpmovw2m %xmm0, %k1
48 ; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z}
51 ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x16:
52 ; AVX512DQNOBW: # %bb.0:
53 ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %xmm1
54 ; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
55 ; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
56 ; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
57 ; AVX512DQNOBW-NEXT: retq
58 %a = load <8 x i8>,ptr%i,align 1
59 %x = sext <8 x i8> %a to <8 x i16>
60 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
65 define <16 x i16> @zext_16x8mem_to_16x16(ptr%i , <16 x i1> %mask) nounwind readnone {
66 ; KNL-LABEL: zext_16x8mem_to_16x16:
68 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
69 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
70 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
71 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
72 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
75 ; SKX-LABEL: zext_16x8mem_to_16x16:
77 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
78 ; SKX-NEXT: vpmovb2m %xmm0, %k1
79 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
82 ; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x16:
83 ; AVX512DQNOBW: # %bb.0:
84 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
85 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
86 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
87 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
88 ; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm0, %ymm0
89 ; AVX512DQNOBW-NEXT: retq
90 %a = load <16 x i8>,ptr%i,align 1
91 %x = zext <16 x i8> %a to <16 x i16>
92 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
96 define <16 x i16> @sext_16x8mem_to_16x16(ptr%i , <16 x i1> %mask) nounwind readnone {
97 ; KNL-LABEL: sext_16x8mem_to_16x16:
99 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
100 ; KNL-NEXT: vpmovsxbw (%rdi), %ymm1
101 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
102 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
103 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
106 ; SKX-LABEL: sext_16x8mem_to_16x16:
108 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
109 ; SKX-NEXT: vpmovb2m %xmm0, %k1
110 ; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z}
113 ; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x16:
114 ; AVX512DQNOBW: # %bb.0:
115 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
116 ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm1
117 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
118 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
119 ; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm0, %ymm0
120 ; AVX512DQNOBW-NEXT: retq
121 %a = load <16 x i8>,ptr%i,align 1
122 %x = sext <16 x i8> %a to <16 x i16>
123 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
127 define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
128 ; ALL-LABEL: zext_16x8_to_16x16:
130 ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
132 %x = zext <16 x i8> %a to <16 x i16>
136 define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
137 ; KNL-LABEL: zext_16x8_to_16x16_mask:
139 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
140 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
141 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
142 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
143 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
146 ; SKX-LABEL: zext_16x8_to_16x16_mask:
148 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
149 ; SKX-NEXT: vpmovb2m %xmm1, %k1
150 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
153 ; AVX512DQNOBW-LABEL: zext_16x8_to_16x16_mask:
154 ; AVX512DQNOBW: # %bb.0:
155 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
156 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
157 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
158 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
159 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
160 ; AVX512DQNOBW-NEXT: retq
161 %x = zext <16 x i8> %a to <16 x i16>
162 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
166 define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
167 ; ALL-LABEL: sext_16x8_to_16x16:
169 ; ALL-NEXT: vpmovsxbw %xmm0, %ymm0
171 %x = sext <16 x i8> %a to <16 x i16>
175 define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
176 ; KNL-LABEL: sext_16x8_to_16x16_mask:
178 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
179 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
180 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
181 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
182 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
185 ; SKX-LABEL: sext_16x8_to_16x16_mask:
187 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
188 ; SKX-NEXT: vpmovb2m %xmm1, %k1
189 ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z}
192 ; AVX512DQNOBW-LABEL: sext_16x8_to_16x16_mask:
193 ; AVX512DQNOBW: # %bb.0:
194 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
195 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0
196 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
197 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
198 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
199 ; AVX512DQNOBW-NEXT: retq
200 %x = sext <16 x i8> %a to <16 x i16>
201 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
205 define <32 x i16> @zext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readnone {
206 ; KNL-LABEL: zext_32x8mem_to_32x16:
208 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
209 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
210 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
211 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
212 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
213 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
214 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
215 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
216 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
217 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
218 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
219 ; KNL-NEXT: vpandq %zmm2, %zmm0, %zmm0
222 ; SKX-LABEL: zext_32x8mem_to_32x16:
224 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
225 ; SKX-NEXT: vpmovb2m %ymm0, %k1
226 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero
229 ; AVX512DQNOBW-LABEL: zext_32x8mem_to_32x16:
230 ; AVX512DQNOBW: # %bb.0:
231 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
232 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
233 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
234 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
235 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
236 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
237 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
238 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
239 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
240 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
241 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
242 ; AVX512DQNOBW-NEXT: vpandq %zmm2, %zmm0, %zmm0
243 ; AVX512DQNOBW-NEXT: retq
244 %a = load <32 x i8>,ptr%i,align 1
245 %x = zext <32 x i8> %a to <32 x i16>
246 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
250 define <32 x i16> @sext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readnone {
251 ; KNL-LABEL: sext_32x8mem_to_32x16:
253 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
254 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
255 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
256 ; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm2
257 ; KNL-NEXT: vpmovsxbw (%rdi), %ymm3
258 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
259 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
260 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
261 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
262 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
263 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
264 ; KNL-NEXT: vpandq %zmm2, %zmm0, %zmm0
267 ; SKX-LABEL: sext_32x8mem_to_32x16:
269 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
270 ; SKX-NEXT: vpmovb2m %ymm0, %k1
271 ; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z}
274 ; AVX512DQNOBW-LABEL: sext_32x8mem_to_32x16:
275 ; AVX512DQNOBW: # %bb.0:
276 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
277 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
278 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
279 ; AVX512DQNOBW-NEXT: vpmovsxbw 16(%rdi), %ymm2
280 ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm3
281 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
282 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
283 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
284 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
285 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
286 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
287 ; AVX512DQNOBW-NEXT: vpandq %zmm2, %zmm0, %zmm0
288 ; AVX512DQNOBW-NEXT: retq
289 %a = load <32 x i8>,ptr%i,align 1
290 %x = sext <32 x i8> %a to <32 x i16>
291 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
295 define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
296 ; KNL-LABEL: zext_32x8_to_32x16:
298 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
299 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
300 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
301 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
304 ; SKX-LABEL: zext_32x8_to_32x16:
306 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
309 ; AVX512DQNOBW-LABEL: zext_32x8_to_32x16:
310 ; AVX512DQNOBW: # %bb.0:
311 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
312 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
313 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
314 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
315 ; AVX512DQNOBW-NEXT: retq
316 %x = zext <32 x i8> %a to <32 x i16>
320 define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
321 ; KNL-LABEL: zext_32x8_to_32x16_mask:
323 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2
324 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
325 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
326 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
327 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
328 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
329 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0
330 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
331 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
332 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
333 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
334 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
335 ; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0
338 ; SKX-LABEL: zext_32x8_to_32x16_mask:
340 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
341 ; SKX-NEXT: vpmovb2m %ymm1, %k1
342 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
345 ; AVX512DQNOBW-LABEL: zext_32x8_to_32x16_mask:
346 ; AVX512DQNOBW: # %bb.0:
347 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
348 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
349 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
350 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
351 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
352 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
353 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0
354 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
355 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
356 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm2, %ymm2
357 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm2, %ymm2
358 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
359 ; AVX512DQNOBW-NEXT: vpandq %zmm0, %zmm1, %zmm0
360 ; AVX512DQNOBW-NEXT: retq
361 %x = zext <32 x i8> %a to <32 x i16>
362 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
366 define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
367 ; KNL-LABEL: sext_32x8_to_32x16:
369 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm1
370 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
371 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
372 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
375 ; SKX-LABEL: sext_32x8_to_32x16:
377 ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0
380 ; AVX512DQNOBW-LABEL: sext_32x8_to_32x16:
381 ; AVX512DQNOBW: # %bb.0:
382 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm1
383 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
384 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0
385 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
386 ; AVX512DQNOBW-NEXT: retq
387 %x = sext <32 x i8> %a to <32 x i16>
391 define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
392 ; KNL-LABEL: sext_32x8_to_32x16_mask:
394 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2
395 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
396 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
397 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm3
398 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
399 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
400 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0
401 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
402 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
403 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
404 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
405 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
406 ; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0
409 ; SKX-LABEL: sext_32x8_to_32x16_mask:
411 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
412 ; SKX-NEXT: vpmovb2m %ymm1, %k1
413 ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z}
416 ; AVX512DQNOBW-LABEL: sext_32x8_to_32x16_mask:
417 ; AVX512DQNOBW: # %bb.0:
418 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
419 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
420 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
421 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm3
422 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
423 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0
424 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0
425 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
426 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
427 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm2, %ymm2
428 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm2, %ymm2
429 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
430 ; AVX512DQNOBW-NEXT: vpandq %zmm0, %zmm1, %zmm0
431 ; AVX512DQNOBW-NEXT: retq
432 %x = sext <32 x i8> %a to <32 x i16>
433 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
437 define <4 x i32> @zext_4x8mem_to_4x32(ptr%i , <4 x i1> %mask) nounwind readnone {
438 ; KNL-LABEL: zext_4x8mem_to_4x32:
440 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
441 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
442 ; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
443 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
444 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
445 ; KNL-NEXT: vzeroupper
448 ; AVX512DQ-LABEL: zext_4x8mem_to_4x32:
450 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
451 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
452 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
453 ; AVX512DQ-NEXT: retq
454 %a = load <4 x i8>,ptr%i,align 1
455 %x = zext <4 x i8> %a to <4 x i32>
456 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
460 define <4 x i32> @sext_4x8mem_to_4x32(ptr%i , <4 x i1> %mask) nounwind readnone {
461 ; KNL-LABEL: sext_4x8mem_to_4x32:
463 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
464 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
465 ; KNL-NEXT: vpmovsxbd (%rdi), %xmm0
466 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
467 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
468 ; KNL-NEXT: vzeroupper
471 ; AVX512DQ-LABEL: sext_4x8mem_to_4x32:
473 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
474 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
475 ; AVX512DQ-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z}
476 ; AVX512DQ-NEXT: retq
477 %a = load <4 x i8>,ptr%i,align 1
478 %x = sext <4 x i8> %a to <4 x i32>
479 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
483 define <8 x i32> @zext_8x8mem_to_8x32(ptr%i , <8 x i1> %mask) nounwind readnone {
484 ; KNL-LABEL: zext_8x8mem_to_8x32:
486 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
487 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
488 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
489 ; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
490 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
491 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
494 ; SKX-LABEL: zext_8x8mem_to_8x32:
496 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
497 ; SKX-NEXT: vpmovw2m %xmm0, %k1
498 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
501 ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x32:
502 ; AVX512DQNOBW: # %bb.0:
503 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
504 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
505 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
506 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
507 ; AVX512DQNOBW-NEXT: retq
508 %a = load <8 x i8>,ptr%i,align 1
509 %x = zext <8 x i8> %a to <8 x i32>
510 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
514 define <8 x i32> @sext_8x8mem_to_8x32(ptr%i , <8 x i1> %mask) nounwind readnone {
515 ; KNL-LABEL: sext_8x8mem_to_8x32:
517 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
518 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
519 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
520 ; KNL-NEXT: vpmovsxbd (%rdi), %ymm0
521 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
522 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
525 ; SKX-LABEL: sext_8x8mem_to_8x32:
527 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
528 ; SKX-NEXT: vpmovw2m %xmm0, %k1
529 ; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
532 ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x32:
533 ; AVX512DQNOBW: # %bb.0:
534 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
535 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
536 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
537 ; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
538 ; AVX512DQNOBW-NEXT: retq
539 %a = load <8 x i8>,ptr%i,align 1
540 %x = sext <8 x i8> %a to <8 x i32>
541 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
545 define <16 x i32> @zext_16x8mem_to_16x32(ptr%i , <16 x i1> %mask) nounwind readnone {
546 ; KNL-LABEL: zext_16x8mem_to_16x32:
548 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
549 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
550 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
551 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
554 ; SKX-LABEL: zext_16x8mem_to_16x32:
556 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
557 ; SKX-NEXT: vpmovb2m %xmm0, %k1
558 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
561 ; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x32:
562 ; AVX512DQNOBW: # %bb.0:
563 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
564 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
565 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
566 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
567 ; AVX512DQNOBW-NEXT: retq
568 %a = load <16 x i8>,ptr%i,align 1
569 %x = zext <16 x i8> %a to <16 x i32>
570 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
574 define <16 x i32> @sext_16x8mem_to_16x32(ptr%i , <16 x i1> %mask) nounwind readnone {
575 ; KNL-LABEL: sext_16x8mem_to_16x32:
577 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
578 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
579 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
580 ; KNL-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
583 ; SKX-LABEL: sext_16x8mem_to_16x32:
585 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
586 ; SKX-NEXT: vpmovb2m %xmm0, %k1
587 ; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
590 ; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x32:
591 ; AVX512DQNOBW: # %bb.0:
592 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
593 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
594 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
595 ; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
596 ; AVX512DQNOBW-NEXT: retq
597 %a = load <16 x i8>,ptr%i,align 1
598 %x = sext <16 x i8> %a to <16 x i32>
599 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
603 define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
604 ; KNL-LABEL: zext_16x8_to_16x32_mask:
606 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
607 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
608 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
609 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
612 ; SKX-LABEL: zext_16x8_to_16x32_mask:
614 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
615 ; SKX-NEXT: vpmovb2m %xmm1, %k1
616 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
619 ; AVX512DQNOBW-LABEL: zext_16x8_to_16x32_mask:
620 ; AVX512DQNOBW: # %bb.0:
621 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
622 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
623 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
624 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
625 ; AVX512DQNOBW-NEXT: retq
626 %x = zext <16 x i8> %a to <16 x i32>
627 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
631 define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
632 ; KNL-LABEL: sext_16x8_to_16x32_mask:
634 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
635 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
636 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
637 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
640 ; SKX-LABEL: sext_16x8_to_16x32_mask:
642 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
643 ; SKX-NEXT: vpmovb2m %xmm1, %k1
644 ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
647 ; AVX512DQNOBW-LABEL: sext_16x8_to_16x32_mask:
648 ; AVX512DQNOBW: # %bb.0:
649 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
650 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
651 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
652 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
653 ; AVX512DQNOBW-NEXT: retq
654 %x = sext <16 x i8> %a to <16 x i32>
655 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
659 define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
660 ; ALL-LABEL: zext_16x8_to_16x32:
662 ; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
664 %x = zext <16 x i8> %i to <16 x i32>
668 define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
669 ; ALL-LABEL: sext_16x8_to_16x32:
671 ; ALL-NEXT: vpmovsxbd %xmm0, %zmm0
673 %x = sext <16 x i8> %i to <16 x i32>
677 define <2 x i64> @zext_2x8mem_to_2x64(ptr%i , <2 x i1> %mask) nounwind readnone {
678 ; KNL-LABEL: zext_2x8mem_to_2x64:
680 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
681 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
682 ; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
683 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
684 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
685 ; KNL-NEXT: vzeroupper
688 ; AVX512DQ-LABEL: zext_2x8mem_to_2x64:
690 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
691 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
692 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
693 ; AVX512DQ-NEXT: retq
694 %a = load <2 x i8>,ptr%i,align 1
695 %x = zext <2 x i8> %a to <2 x i64>
696 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
699 define <2 x i64> @sext_2x8mem_to_2x64mask(ptr%i , <2 x i1> %mask) nounwind readnone {
700 ; KNL-LABEL: sext_2x8mem_to_2x64mask:
702 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
703 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
704 ; KNL-NEXT: vpmovsxbq (%rdi), %xmm0
705 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
706 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
707 ; KNL-NEXT: vzeroupper
710 ; AVX512DQ-LABEL: sext_2x8mem_to_2x64mask:
712 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
713 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
714 ; AVX512DQ-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z}
715 ; AVX512DQ-NEXT: retq
716 %a = load <2 x i8>,ptr%i,align 1
717 %x = sext <2 x i8> %a to <2 x i64>
718 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
721 define <2 x i64> @sext_2x8mem_to_2x64(ptr%i) nounwind readnone {
722 ; ALL-LABEL: sext_2x8mem_to_2x64:
724 ; ALL-NEXT: vpmovsxbq (%rdi), %xmm0
726 %a = load <2 x i8>,ptr%i,align 1
727 %x = sext <2 x i8> %a to <2 x i64>
731 define <4 x i64> @zext_4x8mem_to_4x64(ptr%i , <4 x i1> %mask) nounwind readnone {
732 ; KNL-LABEL: zext_4x8mem_to_4x64:
734 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
735 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
736 ; KNL-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
737 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
738 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
741 ; AVX512DQ-LABEL: zext_4x8mem_to_4x64:
743 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
744 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
745 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
746 ; AVX512DQ-NEXT: retq
747 %a = load <4 x i8>,ptr%i,align 1
748 %x = zext <4 x i8> %a to <4 x i64>
749 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
753 define <4 x i64> @sext_4x8mem_to_4x64mask(ptr%i , <4 x i1> %mask) nounwind readnone {
754 ; KNL-LABEL: sext_4x8mem_to_4x64mask:
756 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
757 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
758 ; KNL-NEXT: vpmovsxbq (%rdi), %ymm0
759 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
760 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
763 ; AVX512DQ-LABEL: sext_4x8mem_to_4x64mask:
765 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
766 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
767 ; AVX512DQ-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z}
768 ; AVX512DQ-NEXT: retq
769 %a = load <4 x i8>,ptr%i,align 1
770 %x = sext <4 x i8> %a to <4 x i64>
771 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
775 define <4 x i64> @sext_4x8mem_to_4x64(ptr%i) nounwind readnone {
776 ; ALL-LABEL: sext_4x8mem_to_4x64:
778 ; ALL-NEXT: vpmovsxbq (%rdi), %ymm0
780 %a = load <4 x i8>,ptr%i,align 1
781 %x = sext <4 x i8> %a to <4 x i64>
785 define <8 x i64> @zext_8x8mem_to_8x64(ptr%i , <8 x i1> %mask) nounwind readnone {
786 ; KNL-LABEL: zext_8x8mem_to_8x64:
788 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
789 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
790 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
791 ; KNL-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
794 ; SKX-LABEL: zext_8x8mem_to_8x64:
796 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
797 ; SKX-NEXT: vpmovw2m %xmm0, %k1
798 ; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
801 ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x64:
802 ; AVX512DQNOBW: # %bb.0:
803 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
804 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
805 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
806 ; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
807 ; AVX512DQNOBW-NEXT: retq
808 %a = load <8 x i8>,ptr%i,align 1
809 %x = zext <8 x i8> %a to <8 x i64>
810 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
814 define <8 x i64> @sext_8x8mem_to_8x64mask(ptr%i , <8 x i1> %mask) nounwind readnone {
815 ; KNL-LABEL: sext_8x8mem_to_8x64mask:
817 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
818 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
819 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
820 ; KNL-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
823 ; SKX-LABEL: sext_8x8mem_to_8x64mask:
825 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
826 ; SKX-NEXT: vpmovw2m %xmm0, %k1
827 ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
830 ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x64mask:
831 ; AVX512DQNOBW: # %bb.0:
832 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
833 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
834 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
835 ; AVX512DQNOBW-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
836 ; AVX512DQNOBW-NEXT: retq
837 %a = load <8 x i8>,ptr%i,align 1
838 %x = sext <8 x i8> %a to <8 x i64>
839 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
843 define <8 x i64> @sext_8x8mem_to_8x64(ptr%i) nounwind readnone {
844 ; ALL-LABEL: sext_8x8mem_to_8x64:
846 ; ALL-NEXT: vpmovsxbq (%rdi), %zmm0
848 %a = load <8 x i8>,ptr%i,align 1
849 %x = sext <8 x i8> %a to <8 x i64>
853 define <4 x i32> @zext_4x16mem_to_4x32(ptr%i , <4 x i1> %mask) nounwind readnone {
854 ; KNL-LABEL: zext_4x16mem_to_4x32:
856 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
857 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
858 ; KNL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
859 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
860 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
861 ; KNL-NEXT: vzeroupper
864 ; AVX512DQ-LABEL: zext_4x16mem_to_4x32:
866 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
867 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
868 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
869 ; AVX512DQ-NEXT: retq
870 %a = load <4 x i16>,ptr%i,align 1
871 %x = zext <4 x i16> %a to <4 x i32>
872 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
876 define <4 x i32> @sext_4x16mem_to_4x32mask(ptr%i , <4 x i1> %mask) nounwind readnone {
877 ; KNL-LABEL: sext_4x16mem_to_4x32mask:
879 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
880 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
881 ; KNL-NEXT: vpmovsxwd (%rdi), %xmm0
882 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
883 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
884 ; KNL-NEXT: vzeroupper
887 ; AVX512DQ-LABEL: sext_4x16mem_to_4x32mask:
889 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
890 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
891 ; AVX512DQ-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z}
892 ; AVX512DQ-NEXT: retq
893 %a = load <4 x i16>,ptr%i,align 1
894 %x = sext <4 x i16> %a to <4 x i32>
895 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
899 define <4 x i32> @sext_4x16mem_to_4x32(ptr%i) nounwind readnone {
900 ; ALL-LABEL: sext_4x16mem_to_4x32:
902 ; ALL-NEXT: vpmovsxwd (%rdi), %xmm0
904 %a = load <4 x i16>,ptr%i,align 1
905 %x = sext <4 x i16> %a to <4 x i32>
910 define <8 x i32> @zext_8x16mem_to_8x32(ptr%i , <8 x i1> %mask) nounwind readnone {
911 ; KNL-LABEL: zext_8x16mem_to_8x32:
913 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
914 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
915 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
916 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
917 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
918 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
921 ; SKX-LABEL: zext_8x16mem_to_8x32:
923 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
924 ; SKX-NEXT: vpmovw2m %xmm0, %k1
925 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
928 ; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x32:
929 ; AVX512DQNOBW: # %bb.0:
930 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
931 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
932 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
933 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
934 ; AVX512DQNOBW-NEXT: retq
935 %a = load <8 x i16>,ptr%i,align 1
936 %x = zext <8 x i16> %a to <8 x i32>
937 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
941 define <8 x i32> @sext_8x16mem_to_8x32mask(ptr%i , <8 x i1> %mask) nounwind readnone {
942 ; KNL-LABEL: sext_8x16mem_to_8x32mask:
944 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
945 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
946 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
947 ; KNL-NEXT: vpmovsxwd (%rdi), %ymm0
948 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
949 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
952 ; SKX-LABEL: sext_8x16mem_to_8x32mask:
954 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
955 ; SKX-NEXT: vpmovw2m %xmm0, %k1
956 ; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
959 ; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x32mask:
960 ; AVX512DQNOBW: # %bb.0:
961 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
962 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
963 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
964 ; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
965 ; AVX512DQNOBW-NEXT: retq
966 %a = load <8 x i16>,ptr%i,align 1
967 %x = sext <8 x i16> %a to <8 x i32>
968 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
972 define <8 x i32> @sext_8x16mem_to_8x32(ptr%i) nounwind readnone {
973 ; ALL-LABEL: sext_8x16mem_to_8x32:
975 ; ALL-NEXT: vpmovsxwd (%rdi), %ymm0
977 %a = load <8 x i16>,ptr%i,align 1
978 %x = sext <8 x i16> %a to <8 x i32>
982 define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
983 ; KNL-LABEL: zext_8x16_to_8x32mask:
985 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
986 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
987 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
988 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
989 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
990 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
993 ; SKX-LABEL: zext_8x16_to_8x32mask:
995 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
996 ; SKX-NEXT: vpmovw2m %xmm1, %k1
997 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1000 ; AVX512DQNOBW-LABEL: zext_8x16_to_8x32mask:
1001 ; AVX512DQNOBW: # %bb.0:
1002 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
1003 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
1004 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
1005 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1006 ; AVX512DQNOBW-NEXT: retq
1007 %x = zext <8 x i16> %a to <8 x i32>
1008 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
1012 define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
1013 ; ALL-LABEL: zext_8x16_to_8x32:
1015 ; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1017 %x = zext <8 x i16> %a to <8 x i32>
1021 define <16 x i32> @zext_16x16mem_to_16x32(ptr%i , <16 x i1> %mask) nounwind readnone {
1022 ; KNL-LABEL: zext_16x16mem_to_16x32:
1024 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1025 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1026 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1027 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1030 ; SKX-LABEL: zext_16x16mem_to_16x32:
1032 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
1033 ; SKX-NEXT: vpmovb2m %xmm0, %k1
1034 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1037 ; AVX512DQNOBW-LABEL: zext_16x16mem_to_16x32:
1038 ; AVX512DQNOBW: # %bb.0:
1039 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
1040 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
1041 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
1042 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1043 ; AVX512DQNOBW-NEXT: retq
1044 %a = load <16 x i16>,ptr%i,align 1
1045 %x = zext <16 x i16> %a to <16 x i32>
1046 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1050 define <16 x i32> @sext_16x16mem_to_16x32mask(ptr%i , <16 x i1> %mask) nounwind readnone {
1051 ; KNL-LABEL: sext_16x16mem_to_16x32mask:
1053 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1054 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1055 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1056 ; KNL-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1059 ; SKX-LABEL: sext_16x16mem_to_16x32mask:
1061 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
1062 ; SKX-NEXT: vpmovb2m %xmm0, %k1
1063 ; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1066 ; AVX512DQNOBW-LABEL: sext_16x16mem_to_16x32mask:
1067 ; AVX512DQNOBW: # %bb.0:
1068 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
1069 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
1070 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
1071 ; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1072 ; AVX512DQNOBW-NEXT: retq
1073 %a = load <16 x i16>,ptr%i,align 1
1074 %x = sext <16 x i16> %a to <16 x i32>
1075 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1079 define <16 x i32> @sext_16x16mem_to_16x32(ptr%i) nounwind readnone {
1080 ; ALL-LABEL: sext_16x16mem_to_16x32:
1082 ; ALL-NEXT: vpmovsxwd (%rdi), %zmm0
1084 %a = load <16 x i16>,ptr%i,align 1
1085 %x = sext <16 x i16> %a to <16 x i32>
1088 define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
1089 ; KNL-LABEL: zext_16x16_to_16x32mask:
1091 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
1092 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
1093 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
1094 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1097 ; SKX-LABEL: zext_16x16_to_16x32mask:
1099 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
1100 ; SKX-NEXT: vpmovb2m %xmm1, %k1
1101 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1104 ; AVX512DQNOBW-LABEL: zext_16x16_to_16x32mask:
1105 ; AVX512DQNOBW: # %bb.0:
1106 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
1107 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
1108 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
1109 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1110 ; AVX512DQNOBW-NEXT: retq
1111 %x = zext <16 x i16> %a to <16 x i32>
1112 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1116 define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
1117 ; ALL-LABEL: zext_16x16_to_16x32:
1119 ; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1121 %x = zext <16 x i16> %a to <16 x i32>
1125 define <2 x i64> @zext_2x16mem_to_2x64(ptr%i , <2 x i1> %mask) nounwind readnone {
1126 ; KNL-LABEL: zext_2x16mem_to_2x64:
1128 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1129 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1130 ; KNL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1131 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1132 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1133 ; KNL-NEXT: vzeroupper
1136 ; AVX512DQ-LABEL: zext_2x16mem_to_2x64:
1137 ; AVX512DQ: # %bb.0:
1138 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1139 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1140 ; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1141 ; AVX512DQ-NEXT: retq
1142 %a = load <2 x i16>,ptr%i,align 1
1143 %x = zext <2 x i16> %a to <2 x i64>
1144 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1148 define <2 x i64> @sext_2x16mem_to_2x64mask(ptr%i , <2 x i1> %mask) nounwind readnone {
1149 ; KNL-LABEL: sext_2x16mem_to_2x64mask:
1151 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1152 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1153 ; KNL-NEXT: vpmovsxwq (%rdi), %xmm0
1154 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1155 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1156 ; KNL-NEXT: vzeroupper
1159 ; AVX512DQ-LABEL: sext_2x16mem_to_2x64mask:
1160 ; AVX512DQ: # %bb.0:
1161 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1162 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1163 ; AVX512DQ-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z}
1164 ; AVX512DQ-NEXT: retq
1165 %a = load <2 x i16>,ptr%i,align 1
1166 %x = sext <2 x i16> %a to <2 x i64>
1167 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1171 define <2 x i64> @sext_2x16mem_to_2x64(ptr%i) nounwind readnone {
1172 ; ALL-LABEL: sext_2x16mem_to_2x64:
1174 ; ALL-NEXT: vpmovsxwq (%rdi), %xmm0
1176 %a = load <2 x i16>,ptr%i,align 1
1177 %x = sext <2 x i16> %a to <2 x i64>
1181 define <4 x i64> @zext_4x16mem_to_4x64(ptr%i , <4 x i1> %mask) nounwind readnone {
1182 ; KNL-LABEL: zext_4x16mem_to_4x64:
1184 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1185 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1186 ; KNL-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1187 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1188 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1191 ; AVX512DQ-LABEL: zext_4x16mem_to_4x64:
1192 ; AVX512DQ: # %bb.0:
1193 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1194 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1195 ; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1196 ; AVX512DQ-NEXT: retq
1197 %a = load <4 x i16>,ptr%i,align 1
1198 %x = zext <4 x i16> %a to <4 x i64>
1199 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1203 define <4 x i64> @sext_4x16mem_to_4x64mask(ptr%i , <4 x i1> %mask) nounwind readnone {
1204 ; KNL-LABEL: sext_4x16mem_to_4x64mask:
1206 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1207 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1208 ; KNL-NEXT: vpmovsxwq (%rdi), %ymm0
1209 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1210 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1213 ; AVX512DQ-LABEL: sext_4x16mem_to_4x64mask:
1214 ; AVX512DQ: # %bb.0:
1215 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1216 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1217 ; AVX512DQ-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z}
1218 ; AVX512DQ-NEXT: retq
1219 %a = load <4 x i16>,ptr%i,align 1
1220 %x = sext <4 x i16> %a to <4 x i64>
1221 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1225 define <4 x i64> @sext_4x16mem_to_4x64(ptr%i) nounwind readnone {
1226 ; ALL-LABEL: sext_4x16mem_to_4x64:
1228 ; ALL-NEXT: vpmovsxwq (%rdi), %ymm0
1230 %a = load <4 x i16>,ptr%i,align 1
1231 %x = sext <4 x i16> %a to <4 x i64>
1235 define <8 x i64> @zext_8x16mem_to_8x64(ptr%i , <8 x i1> %mask) nounwind readnone {
1236 ; KNL-LABEL: zext_8x16mem_to_8x64:
1238 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1239 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1240 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1241 ; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1244 ; SKX-LABEL: zext_8x16mem_to_8x64:
1246 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1247 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1248 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1251 ; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x64:
1252 ; AVX512DQNOBW: # %bb.0:
1253 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1254 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1255 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1256 ; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1257 ; AVX512DQNOBW-NEXT: retq
1258 %a = load <8 x i16>,ptr%i,align 1
1259 %x = zext <8 x i16> %a to <8 x i64>
1260 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1264 define <8 x i64> @sext_8x16mem_to_8x64mask(ptr%i , <8 x i1> %mask) nounwind readnone {
1265 ; KNL-LABEL: sext_8x16mem_to_8x64mask:
1267 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1268 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1269 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1270 ; KNL-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1273 ; SKX-LABEL: sext_8x16mem_to_8x64mask:
1275 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1276 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1277 ; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1280 ; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x64mask:
1281 ; AVX512DQNOBW: # %bb.0:
1282 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1283 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1284 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1285 ; AVX512DQNOBW-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1286 ; AVX512DQNOBW-NEXT: retq
1287 %a = load <8 x i16>,ptr%i,align 1
1288 %x = sext <8 x i16> %a to <8 x i64>
1289 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1293 define <8 x i64> @sext_8x16mem_to_8x64(ptr%i) nounwind readnone {
1294 ; ALL-LABEL: sext_8x16mem_to_8x64:
1296 ; ALL-NEXT: vpmovsxwq (%rdi), %zmm0
1298 %a = load <8 x i16>,ptr%i,align 1
1299 %x = sext <8 x i16> %a to <8 x i64>
1303 define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
1304 ; KNL-LABEL: zext_8x16_to_8x64mask:
1306 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
1307 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
1308 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
1309 ; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1312 ; SKX-LABEL: zext_8x16_to_8x64mask:
1314 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
1315 ; SKX-NEXT: vpmovw2m %xmm1, %k1
1316 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1319 ; AVX512DQNOBW-LABEL: zext_8x16_to_8x64mask:
1320 ; AVX512DQNOBW: # %bb.0:
1321 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
1322 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
1323 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
1324 ; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1325 ; AVX512DQNOBW-NEXT: retq
1326 %x = zext <8 x i16> %a to <8 x i64>
1327 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1331 define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
1332 ; ALL-LABEL: zext_8x16_to_8x64:
1334 ; ALL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1336 %ret = zext <8 x i16> %a to <8 x i64>
1340 define <2 x i64> @zext_2x32mem_to_2x64(ptr%i , <2 x i1> %mask) nounwind readnone {
1341 ; KNL-LABEL: zext_2x32mem_to_2x64:
1343 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1344 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1345 ; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1346 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1347 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1348 ; KNL-NEXT: vzeroupper
1351 ; AVX512DQ-LABEL: zext_2x32mem_to_2x64:
1352 ; AVX512DQ: # %bb.0:
1353 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1354 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1355 ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero
1356 ; AVX512DQ-NEXT: retq
1357 %a = load <2 x i32>,ptr%i,align 1
1358 %x = zext <2 x i32> %a to <2 x i64>
1359 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1363 define <2 x i64> @sext_2x32mem_to_2x64mask(ptr%i , <2 x i1> %mask) nounwind readnone {
1364 ; KNL-LABEL: sext_2x32mem_to_2x64mask:
1366 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1367 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1368 ; KNL-NEXT: vpmovsxdq (%rdi), %xmm0
1369 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1370 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1371 ; KNL-NEXT: vzeroupper
1374 ; AVX512DQ-LABEL: sext_2x32mem_to_2x64mask:
1375 ; AVX512DQ: # %bb.0:
1376 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1377 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1378 ; AVX512DQ-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z}
1379 ; AVX512DQ-NEXT: retq
1380 %a = load <2 x i32>,ptr%i,align 1
1381 %x = sext <2 x i32> %a to <2 x i64>
1382 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1386 define <2 x i64> @sext_2x32mem_to_2x64(ptr%i) nounwind readnone {
1387 ; ALL-LABEL: sext_2x32mem_to_2x64:
1389 ; ALL-NEXT: vpmovsxdq (%rdi), %xmm0
1391 %a = load <2 x i32>,ptr%i,align 1
1392 %x = sext <2 x i32> %a to <2 x i64>
1396 define <4 x i64> @zext_4x32mem_to_4x64(ptr%i , <4 x i1> %mask) nounwind readnone {
1397 ; KNL-LABEL: zext_4x32mem_to_4x64:
1399 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1400 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1401 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1402 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1403 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1406 ; AVX512DQ-LABEL: zext_4x32mem_to_4x64:
1407 ; AVX512DQ: # %bb.0:
1408 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1409 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1410 ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1411 ; AVX512DQ-NEXT: retq
1412 %a = load <4 x i32>,ptr%i,align 1
1413 %x = zext <4 x i32> %a to <4 x i64>
1414 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1418 define <4 x i64> @sext_4x32mem_to_4x64mask(ptr%i , <4 x i1> %mask) nounwind readnone {
1419 ; KNL-LABEL: sext_4x32mem_to_4x64mask:
1421 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1422 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1423 ; KNL-NEXT: vpmovsxdq (%rdi), %ymm0
1424 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1425 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1428 ; AVX512DQ-LABEL: sext_4x32mem_to_4x64mask:
1429 ; AVX512DQ: # %bb.0:
1430 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1431 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1432 ; AVX512DQ-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z}
1433 ; AVX512DQ-NEXT: retq
1434 %a = load <4 x i32>,ptr%i,align 1
1435 %x = sext <4 x i32> %a to <4 x i64>
1436 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1440 define <4 x i64> @sext_4x32mem_to_4x64(ptr%i) nounwind readnone {
1441 ; ALL-LABEL: sext_4x32mem_to_4x64:
1443 ; ALL-NEXT: vpmovsxdq (%rdi), %ymm0
1445 %a = load <4 x i32>,ptr%i,align 1
1446 %x = sext <4 x i32> %a to <4 x i64>
1450 define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
1451 ; ALL-LABEL: sext_4x32_to_4x64:
1453 ; ALL-NEXT: vpmovsxdq %xmm0, %ymm0
1455 %x = sext <4 x i32> %a to <4 x i64>
1459 define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
1460 ; KNL-LABEL: zext_4x32_to_4x64mask:
1462 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1
1463 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
1464 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1465 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1466 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1469 ; AVX512DQ-LABEL: zext_4x32_to_4x64mask:
1470 ; AVX512DQ: # %bb.0:
1471 ; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm1
1472 ; AVX512DQ-NEXT: vpmovd2m %xmm1, %k1
1473 ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1474 ; AVX512DQ-NEXT: retq
1475 %x = zext <4 x i32> %a to <4 x i64>
1476 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1480 define <8 x i64> @zext_8x32mem_to_8x64(ptr%i , <8 x i1> %mask) nounwind readnone {
1481 ; KNL-LABEL: zext_8x32mem_to_8x64:
1483 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1484 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1485 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1486 ; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1489 ; SKX-LABEL: zext_8x32mem_to_8x64:
1491 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1492 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1493 ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1496 ; AVX512DQNOBW-LABEL: zext_8x32mem_to_8x64:
1497 ; AVX512DQNOBW: # %bb.0:
1498 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1499 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1500 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1501 ; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1502 ; AVX512DQNOBW-NEXT: retq
1503 %a = load <8 x i32>,ptr%i,align 1
1504 %x = zext <8 x i32> %a to <8 x i64>
1505 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1509 define <8 x i64> @sext_8x32mem_to_8x64mask(ptr%i , <8 x i1> %mask) nounwind readnone {
1510 ; KNL-LABEL: sext_8x32mem_to_8x64mask:
1512 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1513 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1514 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1515 ; KNL-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1518 ; SKX-LABEL: sext_8x32mem_to_8x64mask:
1520 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1521 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1522 ; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1525 ; AVX512DQNOBW-LABEL: sext_8x32mem_to_8x64mask:
1526 ; AVX512DQNOBW: # %bb.0:
1527 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1528 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1529 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1530 ; AVX512DQNOBW-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1531 ; AVX512DQNOBW-NEXT: retq
1532 %a = load <8 x i32>,ptr%i,align 1
1533 %x = sext <8 x i32> %a to <8 x i64>
1534 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1538 define <8 x i64> @sext_8x32mem_to_8x64(ptr%i) nounwind readnone {
1539 ; ALL-LABEL: sext_8x32mem_to_8x64:
1541 ; ALL-NEXT: vpmovsxdq (%rdi), %zmm0
1543 %a = load <8 x i32>,ptr%i,align 1
1544 %x = sext <8 x i32> %a to <8 x i64>
1548 define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
1549 ; ALL-LABEL: sext_8x32_to_8x64:
1551 ; ALL-NEXT: vpmovsxdq %ymm0, %zmm0
1553 %x = sext <8 x i32> %a to <8 x i64>
1557 define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
1558 ; KNL-LABEL: zext_8x32_to_8x64mask:
1560 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
1561 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
1562 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
1563 ; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1566 ; SKX-LABEL: zext_8x32_to_8x64mask:
1568 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
1569 ; SKX-NEXT: vpmovw2m %xmm1, %k1
1570 ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1573 ; AVX512DQNOBW-LABEL: zext_8x32_to_8x64mask:
1574 ; AVX512DQNOBW: # %bb.0:
1575 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
1576 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
1577 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
1578 ; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1579 ; AVX512DQNOBW-NEXT: retq
1580 %x = zext <8 x i32> %a to <8 x i64>
1581 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1584 define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
1585 ; ALL-LABEL: fptrunc_test:
1587 ; ALL-NEXT: vcvtpd2ps %zmm0, %ymm0
1589 %b = fptrunc <8 x double> %a to <8 x float>
1593 define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
1594 ; ALL-LABEL: fpext_test:
1596 ; ALL-NEXT: vcvtps2pd %ymm0, %zmm0
1598 %b = fpext <8 x float> %a to <8 x double>
1602 define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
1603 ; KNL-LABEL: zext_16i1_to_16xi32:
1605 ; KNL-NEXT: kmovw %edi, %k1
1606 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1607 ; KNL-NEXT: vpsrld $31, %zmm0, %zmm0
1610 ; SKX-LABEL: zext_16i1_to_16xi32:
1612 ; SKX-NEXT: kmovd %edi, %k0
1613 ; SKX-NEXT: vpmovm2d %k0, %zmm0
1614 ; SKX-NEXT: vpsrld $31, %zmm0, %zmm0
1617 ; AVX512DQNOBW-LABEL: zext_16i1_to_16xi32:
1618 ; AVX512DQNOBW: # %bb.0:
1619 ; AVX512DQNOBW-NEXT: kmovw %edi, %k0
1620 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm0
1621 ; AVX512DQNOBW-NEXT: vpsrld $31, %zmm0, %zmm0
1622 ; AVX512DQNOBW-NEXT: retq
1623 %a = bitcast i16 %b to <16 x i1>
1624 %c = zext <16 x i1> %a to <16 x i32>
1628 define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
1629 ; KNL-LABEL: zext_8i1_to_8xi64:
1631 ; KNL-NEXT: kmovw %edi, %k1
1632 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1633 ; KNL-NEXT: vpsrlq $63, %zmm0, %zmm0
1636 ; SKX-LABEL: zext_8i1_to_8xi64:
1638 ; SKX-NEXT: kmovd %edi, %k0
1639 ; SKX-NEXT: vpmovm2q %k0, %zmm0
1640 ; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0
1643 ; AVX512DQNOBW-LABEL: zext_8i1_to_8xi64:
1644 ; AVX512DQNOBW: # %bb.0:
1645 ; AVX512DQNOBW-NEXT: kmovw %edi, %k0
1646 ; AVX512DQNOBW-NEXT: vpmovm2q %k0, %zmm0
1647 ; AVX512DQNOBW-NEXT: vpsrlq $63, %zmm0, %zmm0
1648 ; AVX512DQNOBW-NEXT: retq
1649 %a = bitcast i8 %b to <8 x i1>
1650 %c = zext <8 x i1> %a to <8 x i64>
1654 define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
1655 ; ALL-LABEL: trunc_16i8_to_16i1:
1657 ; ALL-NEXT: vpsllw $7, %xmm0, %xmm0
1658 ; ALL-NEXT: vpmovmskb %xmm0, %eax
1659 ; ALL-NEXT: # kill: def $ax killed $ax killed $eax
1661 %mask_b = trunc <16 x i8>%a to <16 x i1>
1662 %mask = bitcast <16 x i1> %mask_b to i16
1666 define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
1667 ; KNL-LABEL: trunc_16i32_to_16i1:
1669 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1670 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1671 ; KNL-NEXT: kmovw %k0, %eax
1672 ; KNL-NEXT: # kill: def $ax killed $ax killed $eax
1673 ; KNL-NEXT: vzeroupper
1676 ; SKX-LABEL: trunc_16i32_to_16i1:
1678 ; SKX-NEXT: vpslld $31, %zmm0, %zmm0
1679 ; SKX-NEXT: vpmovd2m %zmm0, %k0
1680 ; SKX-NEXT: kmovd %k0, %eax
1681 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
1682 ; SKX-NEXT: vzeroupper
1685 ; AVX512DQNOBW-LABEL: trunc_16i32_to_16i1:
1686 ; AVX512DQNOBW: # %bb.0:
1687 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
1688 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k0
1689 ; AVX512DQNOBW-NEXT: kmovw %k0, %eax
1690 ; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax
1691 ; AVX512DQNOBW-NEXT: vzeroupper
1692 ; AVX512DQNOBW-NEXT: retq
1693 %mask_b = trunc <16 x i32>%a to <16 x i1>
1694 %mask = bitcast <16 x i1> %mask_b to i16
1698 define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
1699 ; ALL-LABEL: trunc_4i32_to_4i1:
1701 ; ALL-NEXT: vpand %xmm1, %xmm0, %xmm0
1702 ; ALL-NEXT: vpslld $31, %xmm0, %xmm0
1703 ; ALL-NEXT: vpsrad $31, %xmm0, %xmm0
1705 %mask_a = trunc <4 x i32>%a to <4 x i1>
1706 %mask_b = trunc <4 x i32>%b to <4 x i1>
1707 %a_and_b = and <4 x i1>%mask_a, %mask_b
1708 %res = sext <4 x i1>%a_and_b to <4 x i32>
1713 define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
1714 ; KNL-LABEL: trunc_8i16_to_8i1:
1716 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1717 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1718 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1719 ; KNL-NEXT: kmovw %k0, %eax
1720 ; KNL-NEXT: # kill: def $al killed $al killed $eax
1721 ; KNL-NEXT: vzeroupper
1724 ; SKX-LABEL: trunc_8i16_to_8i1:
1726 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1727 ; SKX-NEXT: vpmovw2m %xmm0, %k0
1728 ; SKX-NEXT: kmovd %k0, %eax
1729 ; SKX-NEXT: # kill: def $al killed $al killed $eax
1732 ; AVX512DQNOBW-LABEL: trunc_8i16_to_8i1:
1733 ; AVX512DQNOBW: # %bb.0:
1734 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1735 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1736 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k0
1737 ; AVX512DQNOBW-NEXT: kmovw %k0, %eax
1738 ; AVX512DQNOBW-NEXT: # kill: def $al killed $al killed $eax
1739 ; AVX512DQNOBW-NEXT: vzeroupper
1740 ; AVX512DQNOBW-NEXT: retq
1741 %mask_b = trunc <8 x i16>%a to <8 x i1>
1742 %mask = bitcast <8 x i1> %mask_b to i8
1746 define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1747 ; KNL-LABEL: sext_8i1_8i32:
1749 ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1750 ; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
1751 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1754 ; AVX512DQ-LABEL: sext_8i1_8i32:
1755 ; AVX512DQ: # %bb.0:
1756 ; AVX512DQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1757 ; AVX512DQ-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0
1758 ; AVX512DQ-NEXT: retq
1759 %x = icmp slt <8 x i32> %a1, %a2
1760 %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
1761 %y = sext <8 x i1> %x1 to <8 x i32>
1766 define i16 @trunc_i32_to_i1(i32 %a) {
1767 ; KNL-LABEL: trunc_i32_to_i1:
1769 ; KNL-NEXT: andl $1, %edi
1770 ; KNL-NEXT: kmovw %edi, %k0
1771 ; KNL-NEXT: movw $-4, %ax
1772 ; KNL-NEXT: kmovw %eax, %k1
1773 ; KNL-NEXT: kshiftrw $1, %k1, %k1
1774 ; KNL-NEXT: kshiftlw $1, %k1, %k1
1775 ; KNL-NEXT: korw %k0, %k1, %k0
1776 ; KNL-NEXT: kmovw %k0, %eax
1777 ; KNL-NEXT: # kill: def $ax killed $ax killed $eax
1780 ; SKX-LABEL: trunc_i32_to_i1:
1782 ; SKX-NEXT: andl $1, %edi
1783 ; SKX-NEXT: kmovw %edi, %k0
1784 ; SKX-NEXT: movw $-4, %ax
1785 ; SKX-NEXT: kmovd %eax, %k1
1786 ; SKX-NEXT: kshiftrw $1, %k1, %k1
1787 ; SKX-NEXT: kshiftlw $1, %k1, %k1
1788 ; SKX-NEXT: korw %k0, %k1, %k0
1789 ; SKX-NEXT: kmovd %k0, %eax
1790 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
1793 ; AVX512DQNOBW-LABEL: trunc_i32_to_i1:
1794 ; AVX512DQNOBW: # %bb.0:
1795 ; AVX512DQNOBW-NEXT: andl $1, %edi
1796 ; AVX512DQNOBW-NEXT: kmovw %edi, %k0
1797 ; AVX512DQNOBW-NEXT: movw $-4, %ax
1798 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
1799 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
1800 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
1801 ; AVX512DQNOBW-NEXT: korw %k0, %k1, %k0
1802 ; AVX512DQNOBW-NEXT: kmovw %k0, %eax
1803 ; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax
1804 ; AVX512DQNOBW-NEXT: retq
1805 %a_i = trunc i32 %a to i1
1806 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
1807 %res = bitcast <16 x i1> %maskv to i16
1811 define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1812 ; KNL-LABEL: sext_8i1_8i16:
1814 ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1815 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
1816 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1817 ; KNL-NEXT: vzeroupper
1820 ; SKX-LABEL: sext_8i1_8i16:
1822 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
1823 ; SKX-NEXT: vpmovm2w %k0, %xmm0
1824 ; SKX-NEXT: vzeroupper
1827 ; AVX512DQNOBW-LABEL: sext_8i1_8i16:
1828 ; AVX512DQNOBW: # %bb.0:
1829 ; AVX512DQNOBW-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
1830 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %ymm0
1831 ; AVX512DQNOBW-NEXT: vpmovdw %ymm0, %xmm0
1832 ; AVX512DQNOBW-NEXT: vzeroupper
1833 ; AVX512DQNOBW-NEXT: retq
1834 %x = icmp slt <8 x i32> %a1, %a2
1835 %y = sext <8 x i1> %x to <8 x i16>
1839 define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
1840 ; KNL-LABEL: sext_16i1_16i32:
1842 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1843 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1846 ; AVX512DQ-LABEL: sext_16i1_16i32:
1847 ; AVX512DQ: # %bb.0:
1848 ; AVX512DQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
1849 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1850 ; AVX512DQ-NEXT: retq
1851 %x = icmp slt <16 x i32> %a1, %a2
1852 %y = sext <16 x i1> %x to <16 x i32>
1856 define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1857 ; KNL-LABEL: sext_8i1_8i64:
1859 ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1860 ; KNL-NEXT: vpmovsxdq %ymm0, %zmm0
1863 ; AVX512DQ-LABEL: sext_8i1_8i64:
1864 ; AVX512DQ: # %bb.0:
1865 ; AVX512DQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
1866 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
1867 ; AVX512DQ-NEXT: retq
1868 %x = icmp slt <8 x i32> %a1, %a2
1869 %y = sext <8 x i1> %x to <8 x i64>
1873 define void @extload_v8i64(ptr %a, ptr %res) {
1874 ; ALL-LABEL: extload_v8i64:
1876 ; ALL-NEXT: vpmovsxbq (%rdi), %zmm0
1877 ; ALL-NEXT: vmovdqa64 %zmm0, (%rsi)
1878 ; ALL-NEXT: vzeroupper
1880 %sign_load = load <8 x i8>, ptr %a
1881 %c = sext <8 x i8> %sign_load to <8 x i64>
1882 store <8 x i64> %c, ptr %res
1886 define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
1887 ; KNL-LABEL: test21:
1889 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
1890 ; KNL-NEXT: andl $1, %eax
1891 ; KNL-NEXT: kmovw %eax, %k0
1892 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
1893 ; KNL-NEXT: kmovw %eax, %k1
1894 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1895 ; KNL-NEXT: kshiftrw $14, %k1, %k1
1896 ; KNL-NEXT: korw %k1, %k0, %k0
1897 ; KNL-NEXT: movw $-5, %ax
1898 ; KNL-NEXT: kmovw %eax, %k1
1899 ; KNL-NEXT: kandw %k1, %k0, %k0
1900 ; KNL-NEXT: kmovw %k1, %k7
1901 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1902 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
1903 ; KNL-NEXT: kmovw %eax, %k1
1904 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1905 ; KNL-NEXT: kshiftrw $13, %k1, %k1
1906 ; KNL-NEXT: korw %k1, %k0, %k0
1907 ; KNL-NEXT: movw $-9, %ax
1908 ; KNL-NEXT: kmovw %eax, %k1
1909 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1910 ; KNL-NEXT: kandw %k1, %k0, %k0
1911 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
1912 ; KNL-NEXT: kmovw %eax, %k1
1913 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1914 ; KNL-NEXT: kshiftrw $12, %k1, %k1
1915 ; KNL-NEXT: korw %k1, %k0, %k0
1916 ; KNL-NEXT: movw $-17, %ax
1917 ; KNL-NEXT: kmovw %eax, %k1
1918 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1919 ; KNL-NEXT: kandw %k1, %k0, %k0
1920 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
1921 ; KNL-NEXT: kmovw %eax, %k1
1922 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1923 ; KNL-NEXT: kshiftrw $11, %k1, %k1
1924 ; KNL-NEXT: korw %k1, %k0, %k0
1925 ; KNL-NEXT: movw $-33, %ax
1926 ; KNL-NEXT: kmovw %eax, %k1
1927 ; KNL-NEXT: kandw %k1, %k0, %k0
1928 ; KNL-NEXT: kmovw %k1, %k2
1929 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1930 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
1931 ; KNL-NEXT: kmovw %eax, %k1
1932 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1933 ; KNL-NEXT: kshiftrw $10, %k1, %k1
1934 ; KNL-NEXT: korw %k1, %k0, %k0
1935 ; KNL-NEXT: movw $-65, %ax
1936 ; KNL-NEXT: kmovw %eax, %k1
1937 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1938 ; KNL-NEXT: kandw %k1, %k0, %k0
1939 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
1940 ; KNL-NEXT: kmovw %eax, %k1
1941 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1942 ; KNL-NEXT: kshiftrw $9, %k1, %k1
1943 ; KNL-NEXT: korw %k1, %k0, %k0
1944 ; KNL-NEXT: movw $-129, %ax
1945 ; KNL-NEXT: kmovw %eax, %k1
1946 ; KNL-NEXT: kandw %k1, %k0, %k0
1947 ; KNL-NEXT: kmovw %k1, %k3
1948 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1949 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
1950 ; KNL-NEXT: kmovw %eax, %k1
1951 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1952 ; KNL-NEXT: kshiftrw $8, %k1, %k1
1953 ; KNL-NEXT: korw %k1, %k0, %k0
1954 ; KNL-NEXT: movw $-257, %ax # imm = 0xFEFF
1955 ; KNL-NEXT: kmovw %eax, %k1
1956 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1957 ; KNL-NEXT: kandw %k1, %k0, %k0
1958 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
1959 ; KNL-NEXT: kmovw %eax, %k1
1960 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1961 ; KNL-NEXT: kshiftrw $7, %k1, %k1
1962 ; KNL-NEXT: korw %k1, %k0, %k0
1963 ; KNL-NEXT: movw $-513, %ax # imm = 0xFDFF
1964 ; KNL-NEXT: kmovw %eax, %k1
1965 ; KNL-NEXT: kandw %k1, %k0, %k0
1966 ; KNL-NEXT: kmovw %k1, %k4
1967 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1968 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
1969 ; KNL-NEXT: kmovw %eax, %k1
1970 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1971 ; KNL-NEXT: kshiftrw $6, %k1, %k1
1972 ; KNL-NEXT: korw %k1, %k0, %k0
1973 ; KNL-NEXT: movw $-1025, %ax # imm = 0xFBFF
1974 ; KNL-NEXT: kmovw %eax, %k1
1975 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1976 ; KNL-NEXT: kandw %k1, %k0, %k0
1977 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
1978 ; KNL-NEXT: kmovw %eax, %k1
1979 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1980 ; KNL-NEXT: kshiftrw $5, %k1, %k1
1981 ; KNL-NEXT: korw %k1, %k0, %k0
1982 ; KNL-NEXT: movw $-2049, %ax # imm = 0xF7FF
1983 ; KNL-NEXT: kmovw %eax, %k5
1984 ; KNL-NEXT: kandw %k5, %k0, %k0
1985 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
1986 ; KNL-NEXT: kmovw %eax, %k1
1987 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1988 ; KNL-NEXT: kshiftrw $4, %k1, %k1
1989 ; KNL-NEXT: korw %k1, %k0, %k0
1990 ; KNL-NEXT: movw $-4097, %ax # imm = 0xEFFF
1991 ; KNL-NEXT: kmovw %eax, %k1
1992 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1993 ; KNL-NEXT: kandw %k1, %k0, %k0
1994 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
1995 ; KNL-NEXT: kmovw %eax, %k1
1996 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1997 ; KNL-NEXT: kshiftrw $3, %k1, %k1
1998 ; KNL-NEXT: korw %k1, %k0, %k0
1999 ; KNL-NEXT: movw $-8193, %ax # imm = 0xDFFF
2000 ; KNL-NEXT: kmovw %eax, %k1
2001 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2002 ; KNL-NEXT: kandw %k1, %k0, %k0
2003 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2004 ; KNL-NEXT: kmovw %eax, %k1
2005 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2006 ; KNL-NEXT: kshiftrw $2, %k1, %k1
2007 ; KNL-NEXT: korw %k1, %k0, %k1
2008 ; KNL-NEXT: movw $-16385, %ax # imm = 0xBFFF
2009 ; KNL-NEXT: kmovw %eax, %k0
2010 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2011 ; KNL-NEXT: kandw %k0, %k1, %k1
2012 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2013 ; KNL-NEXT: kmovw %eax, %k6
2014 ; KNL-NEXT: kshiftlw $14, %k6, %k6
2015 ; KNL-NEXT: korw %k6, %k1, %k1
2016 ; KNL-NEXT: kshiftlw $1, %k1, %k1
2017 ; KNL-NEXT: kshiftrw $1, %k1, %k1
2018 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2019 ; KNL-NEXT: kmovw %eax, %k6
2020 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2021 ; KNL-NEXT: korw %k6, %k1, %k1
2022 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2023 ; KNL-NEXT: andl $1, %edi
2024 ; KNL-NEXT: kmovw %esi, %k1
2025 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2026 ; KNL-NEXT: kshiftrw $14, %k1, %k1
2027 ; KNL-NEXT: kmovw %edi, %k6
2028 ; KNL-NEXT: korw %k1, %k6, %k1
2029 ; KNL-NEXT: kandw %k7, %k1, %k1
2030 ; KNL-NEXT: kmovw %edx, %k6
2031 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2032 ; KNL-NEXT: kshiftrw $13, %k6, %k6
2033 ; KNL-NEXT: korw %k6, %k1, %k1
2034 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2035 ; KNL-NEXT: kandw %k7, %k1, %k1
2036 ; KNL-NEXT: kmovw %ecx, %k6
2037 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2038 ; KNL-NEXT: kshiftrw $12, %k6, %k6
2039 ; KNL-NEXT: korw %k6, %k1, %k1
2040 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2041 ; KNL-NEXT: kandw %k0, %k1, %k1
2042 ; KNL-NEXT: kmovw %r8d, %k6
2043 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2044 ; KNL-NEXT: kshiftrw $11, %k6, %k6
2045 ; KNL-NEXT: korw %k6, %k1, %k1
2046 ; KNL-NEXT: kandw %k2, %k1, %k1
2047 ; KNL-NEXT: kmovw %r9d, %k6
2048 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2049 ; KNL-NEXT: kshiftrw $10, %k6, %k6
2050 ; KNL-NEXT: korw %k6, %k1, %k1
2051 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2052 ; KNL-NEXT: kandw %k2, %k1, %k1
2053 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2054 ; KNL-NEXT: kmovw %eax, %k6
2055 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2056 ; KNL-NEXT: kshiftrw $9, %k6, %k6
2057 ; KNL-NEXT: korw %k6, %k1, %k1
2058 ; KNL-NEXT: kandw %k3, %k1, %k1
2059 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2060 ; KNL-NEXT: kmovw %eax, %k6
2061 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2062 ; KNL-NEXT: kshiftrw $8, %k6, %k6
2063 ; KNL-NEXT: korw %k6, %k1, %k1
2064 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2065 ; KNL-NEXT: kandw %k3, %k1, %k1
2066 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2067 ; KNL-NEXT: kmovw %eax, %k6
2068 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2069 ; KNL-NEXT: kshiftrw $7, %k6, %k6
2070 ; KNL-NEXT: korw %k6, %k1, %k1
2071 ; KNL-NEXT: kandw %k4, %k1, %k1
2072 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2073 ; KNL-NEXT: kmovw %eax, %k6
2074 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2075 ; KNL-NEXT: kshiftrw $6, %k6, %k6
2076 ; KNL-NEXT: korw %k6, %k1, %k1
2077 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
2078 ; KNL-NEXT: kandw %k4, %k1, %k1
2079 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2080 ; KNL-NEXT: kmovw %eax, %k6
2081 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2082 ; KNL-NEXT: kshiftrw $5, %k6, %k6
2083 ; KNL-NEXT: korw %k6, %k1, %k1
2084 ; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2085 ; KNL-NEXT: kandw %k5, %k1, %k1
2086 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2087 ; KNL-NEXT: kmovw %eax, %k6
2088 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2089 ; KNL-NEXT: kshiftrw $4, %k6, %k6
2090 ; KNL-NEXT: korw %k6, %k1, %k1
2091 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
2092 ; KNL-NEXT: kandw %k6, %k1, %k1
2093 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2094 ; KNL-NEXT: kmovw %eax, %k6
2095 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2096 ; KNL-NEXT: kshiftrw $3, %k6, %k6
2097 ; KNL-NEXT: korw %k6, %k1, %k1
2098 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
2099 ; KNL-NEXT: kandw %k6, %k1, %k1
2100 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2101 ; KNL-NEXT: kmovw %eax, %k6
2102 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2103 ; KNL-NEXT: kshiftrw $2, %k6, %k6
2104 ; KNL-NEXT: korw %k6, %k1, %k1
2105 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
2106 ; KNL-NEXT: kandw %k6, %k1, %k1
2107 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2108 ; KNL-NEXT: kmovw %eax, %k6
2109 ; KNL-NEXT: kshiftlw $14, %k6, %k6
2110 ; KNL-NEXT: korw %k6, %k1, %k1
2111 ; KNL-NEXT: kshiftlw $1, %k1, %k1
2112 ; KNL-NEXT: kshiftrw $1, %k1, %k1
2113 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2114 ; KNL-NEXT: kmovw %eax, %k6
2115 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2116 ; KNL-NEXT: korw %k6, %k1, %k1
2117 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2118 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2119 ; KNL-NEXT: andl $1, %eax
2120 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
2121 ; KNL-NEXT: kmovw %ecx, %k1
2122 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2123 ; KNL-NEXT: kshiftrw $14, %k1, %k1
2124 ; KNL-NEXT: kmovw %eax, %k6
2125 ; KNL-NEXT: korw %k1, %k6, %k1
2126 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
2127 ; KNL-NEXT: kandw %k6, %k1, %k1
2128 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2129 ; KNL-NEXT: kmovw %eax, %k6
2130 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2131 ; KNL-NEXT: kshiftrw $13, %k6, %k6
2132 ; KNL-NEXT: korw %k6, %k1, %k1
2133 ; KNL-NEXT: kandw %k7, %k1, %k1
2134 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2135 ; KNL-NEXT: kmovw %eax, %k6
2136 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2137 ; KNL-NEXT: kshiftrw $12, %k6, %k6
2138 ; KNL-NEXT: korw %k6, %k1, %k1
2139 ; KNL-NEXT: kandw %k0, %k1, %k1
2140 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2141 ; KNL-NEXT: kmovw %eax, %k6
2142 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2143 ; KNL-NEXT: kshiftrw $11, %k6, %k6
2144 ; KNL-NEXT: korw %k6, %k1, %k1
2145 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2146 ; KNL-NEXT: kandw %k0, %k1, %k1
2147 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2148 ; KNL-NEXT: kmovw %eax, %k6
2149 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2150 ; KNL-NEXT: kshiftrw $10, %k6, %k6
2151 ; KNL-NEXT: korw %k6, %k1, %k1
2152 ; KNL-NEXT: kandw %k2, %k1, %k1
2153 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2154 ; KNL-NEXT: kmovw %eax, %k6
2155 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2156 ; KNL-NEXT: kshiftrw $9, %k6, %k6
2157 ; KNL-NEXT: korw %k6, %k1, %k1
2158 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2159 ; KNL-NEXT: kandw %k0, %k1, %k1
2160 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2161 ; KNL-NEXT: kmovw %eax, %k6
2162 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2163 ; KNL-NEXT: kshiftrw $8, %k6, %k6
2164 ; KNL-NEXT: korw %k6, %k1, %k1
2165 ; KNL-NEXT: kandw %k3, %k1, %k1
2166 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2167 ; KNL-NEXT: kmovw %eax, %k6
2168 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2169 ; KNL-NEXT: kshiftrw $7, %k6, %k6
2170 ; KNL-NEXT: korw %k6, %k1, %k1
2171 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2172 ; KNL-NEXT: kandw %k3, %k1, %k1
2173 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2174 ; KNL-NEXT: kmovw %eax, %k6
2175 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2176 ; KNL-NEXT: kshiftrw $6, %k6, %k6
2177 ; KNL-NEXT: korw %k6, %k1, %k1
2178 ; KNL-NEXT: kandw %k4, %k1, %k1
2179 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2180 ; KNL-NEXT: kmovw %eax, %k6
2181 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2182 ; KNL-NEXT: kshiftrw $5, %k6, %k6
2183 ; KNL-NEXT: korw %k6, %k1, %k1
2184 ; KNL-NEXT: kandw %k5, %k1, %k1
2185 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2186 ; KNL-NEXT: kmovw %eax, %k6
2187 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2188 ; KNL-NEXT: kshiftrw $4, %k6, %k6
2189 ; KNL-NEXT: korw %k6, %k1, %k1
2190 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2191 ; KNL-NEXT: kandw %k0, %k1, %k1
2192 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2193 ; KNL-NEXT: kmovw %eax, %k6
2194 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2195 ; KNL-NEXT: kshiftrw $3, %k6, %k6
2196 ; KNL-NEXT: korw %k6, %k1, %k1
2197 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2198 ; KNL-NEXT: kandw %k2, %k1, %k1
2199 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2200 ; KNL-NEXT: kmovw %eax, %k6
2201 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2202 ; KNL-NEXT: kshiftrw $2, %k6, %k6
2203 ; KNL-NEXT: korw %k6, %k1, %k1
2204 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2205 ; KNL-NEXT: kandw %k5, %k1, %k1
2206 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2207 ; KNL-NEXT: kmovw %eax, %k6
2208 ; KNL-NEXT: kshiftlw $14, %k6, %k6
2209 ; KNL-NEXT: korw %k6, %k1, %k1
2210 ; KNL-NEXT: kshiftlw $1, %k1, %k1
2211 ; KNL-NEXT: kshiftrw $1, %k1, %k1
2212 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2213 ; KNL-NEXT: kmovw %eax, %k6
2214 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2215 ; KNL-NEXT: korw %k6, %k1, %k1
2216 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2217 ; KNL-NEXT: andl $1, %eax
2218 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
2219 ; KNL-NEXT: kmovw %ecx, %k6
2220 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2221 ; KNL-NEXT: kshiftrw $14, %k6, %k6
2222 ; KNL-NEXT: kmovw %eax, %k7
2223 ; KNL-NEXT: korw %k6, %k7, %k6
2224 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2225 ; KNL-NEXT: kandw %k5, %k6, %k6
2226 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2227 ; KNL-NEXT: kmovw %eax, %k7
2228 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2229 ; KNL-NEXT: kshiftrw $13, %k7, %k7
2230 ; KNL-NEXT: korw %k7, %k6, %k6
2231 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2232 ; KNL-NEXT: kandw %k5, %k6, %k6
2233 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2234 ; KNL-NEXT: kmovw %eax, %k7
2235 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2236 ; KNL-NEXT: kshiftrw $12, %k7, %k7
2237 ; KNL-NEXT: korw %k7, %k6, %k6
2238 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2239 ; KNL-NEXT: kandw %k5, %k6, %k6
2240 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2241 ; KNL-NEXT: kmovw %eax, %k7
2242 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2243 ; KNL-NEXT: kshiftrw $11, %k7, %k7
2244 ; KNL-NEXT: korw %k7, %k6, %k6
2245 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2246 ; KNL-NEXT: kandw %k5, %k6, %k6
2247 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2248 ; KNL-NEXT: kmovw %eax, %k7
2249 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2250 ; KNL-NEXT: kshiftrw $10, %k7, %k7
2251 ; KNL-NEXT: korw %k7, %k6, %k6
2252 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2253 ; KNL-NEXT: kandw %k5, %k6, %k6
2254 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2255 ; KNL-NEXT: kmovw %eax, %k7
2256 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2257 ; KNL-NEXT: kshiftrw $9, %k7, %k7
2258 ; KNL-NEXT: korw %k7, %k6, %k6
2259 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2260 ; KNL-NEXT: kandw %k5, %k6, %k6
2261 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2262 ; KNL-NEXT: kmovw %eax, %k7
2263 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2264 ; KNL-NEXT: kshiftrw $8, %k7, %k7
2265 ; KNL-NEXT: korw %k7, %k6, %k6
2266 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2267 ; KNL-NEXT: kandw %k5, %k6, %k6
2268 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2269 ; KNL-NEXT: kmovw %eax, %k7
2270 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2271 ; KNL-NEXT: kshiftrw $7, %k7, %k7
2272 ; KNL-NEXT: korw %k7, %k6, %k6
2273 ; KNL-NEXT: kandw %k3, %k6, %k6
2274 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2275 ; KNL-NEXT: kmovw %eax, %k7
2276 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2277 ; KNL-NEXT: kshiftrw $6, %k7, %k7
2278 ; KNL-NEXT: korw %k7, %k6, %k6
2279 ; KNL-NEXT: kandw %k4, %k6, %k5
2280 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2281 ; KNL-NEXT: kmovw %eax, %k6
2282 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2283 ; KNL-NEXT: kshiftrw $5, %k6, %k6
2284 ; KNL-NEXT: korw %k6, %k5, %k5
2285 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2286 ; KNL-NEXT: kandw %k3, %k5, %k4
2287 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2288 ; KNL-NEXT: kmovw %eax, %k5
2289 ; KNL-NEXT: kshiftlw $15, %k5, %k5
2290 ; KNL-NEXT: kshiftrw $4, %k5, %k5
2291 ; KNL-NEXT: korw %k5, %k4, %k4
2292 ; KNL-NEXT: kandw %k0, %k4, %k3
2293 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2294 ; KNL-NEXT: kmovw %eax, %k4
2295 ; KNL-NEXT: kshiftlw $15, %k4, %k4
2296 ; KNL-NEXT: kshiftrw $3, %k4, %k4
2297 ; KNL-NEXT: korw %k4, %k3, %k3
2298 ; KNL-NEXT: kandw %k2, %k3, %k2
2299 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2300 ; KNL-NEXT: kmovw %eax, %k3
2301 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2302 ; KNL-NEXT: kshiftrw $2, %k3, %k3
2303 ; KNL-NEXT: korw %k3, %k2, %k2
2304 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2305 ; KNL-NEXT: kandw %k0, %k2, %k0
2306 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2307 ; KNL-NEXT: kmovw %eax, %k2
2308 ; KNL-NEXT: kshiftlw $14, %k2, %k2
2309 ; KNL-NEXT: korw %k2, %k0, %k0
2310 ; KNL-NEXT: kshiftlw $1, %k0, %k0
2311 ; KNL-NEXT: kshiftrw $1, %k0, %k0
2312 ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2313 ; KNL-NEXT: kmovw %eax, %k2
2314 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2315 ; KNL-NEXT: korw %k2, %k0, %k2
2316 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
2317 ; KNL-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
2318 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2319 ; KNL-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
2320 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2321 ; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z}
2322 ; KNL-NEXT: vpmovdw %zmm2, %ymm2
2323 ; KNL-NEXT: vpmovdw %zmm3, %ymm3
2324 ; KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
2325 ; KNL-NEXT: vpandq %zmm1, %zmm2, %zmm1
2326 ; KNL-NEXT: vpmovdw %zmm4, %ymm2
2327 ; KNL-NEXT: vpmovdw %zmm5, %ymm3
2328 ; KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
2329 ; KNL-NEXT: vpandq %zmm0, %zmm2, %zmm0
2332 ; SKX-LABEL: test21:
2334 ; SKX-NEXT: vpsllw $7, %zmm2, %zmm2
2335 ; SKX-NEXT: vpmovb2m %zmm2, %k1
2336 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
2337 ; SKX-NEXT: kshiftrq $32, %k1, %k1
2338 ; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z}
2341 ; AVX512DQNOBW-LABEL: test21:
2342 ; AVX512DQNOBW: # %bb.0:
2343 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2344 ; AVX512DQNOBW-NEXT: andl $1, %eax
2345 ; AVX512DQNOBW-NEXT: kmovw %eax, %k0
2346 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2347 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2348 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2349 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k1
2350 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2351 ; AVX512DQNOBW-NEXT: movw $-5, %ax
2352 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2353 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2354 ; AVX512DQNOBW-NEXT: kmovw %k1, %k7
2355 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2356 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2357 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2358 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2359 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k1, %k1
2360 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2361 ; AVX512DQNOBW-NEXT: movw $-9, %ax
2362 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2363 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2364 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2365 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2366 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2367 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2368 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k1
2369 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2370 ; AVX512DQNOBW-NEXT: movw $-17, %ax
2371 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2372 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2373 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2374 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2375 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2376 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2377 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k1, %k1
2378 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2379 ; AVX512DQNOBW-NEXT: movw $-33, %ax
2380 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2381 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2382 ; AVX512DQNOBW-NEXT: kmovw %k1, %k2
2383 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2384 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2385 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2386 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2387 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k1, %k1
2388 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2389 ; AVX512DQNOBW-NEXT: movw $-65, %ax
2390 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2391 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2392 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2393 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2394 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2395 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2396 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k1, %k1
2397 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2398 ; AVX512DQNOBW-NEXT: movw $-129, %ax
2399 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2400 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2401 ; AVX512DQNOBW-NEXT: kmovw %k1, %k3
2402 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2403 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2404 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2405 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2406 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k1, %k1
2407 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2408 ; AVX512DQNOBW-NEXT: movw $-257, %ax # imm = 0xFEFF
2409 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2410 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2411 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2412 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2413 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2414 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2415 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k1, %k1
2416 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2417 ; AVX512DQNOBW-NEXT: movw $-513, %ax # imm = 0xFDFF
2418 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2419 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2420 ; AVX512DQNOBW-NEXT: kmovw %k1, %k4
2421 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2422 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2423 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2424 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2425 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k1, %k1
2426 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2427 ; AVX512DQNOBW-NEXT: movw $-1025, %ax # imm = 0xFBFF
2428 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2429 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2430 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2431 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2432 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2433 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2434 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k1
2435 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2436 ; AVX512DQNOBW-NEXT: movw $-2049, %ax # imm = 0xF7FF
2437 ; AVX512DQNOBW-NEXT: kmovw %eax, %k5
2438 ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0
2439 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2440 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2441 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2442 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k1, %k1
2443 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2444 ; AVX512DQNOBW-NEXT: movw $-4097, %ax # imm = 0xEFFF
2445 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2446 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2447 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2448 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2449 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2450 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2451 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k1, %k1
2452 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2453 ; AVX512DQNOBW-NEXT: movw $-8193, %ax # imm = 0xDFFF
2454 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2455 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2456 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2457 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2458 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2459 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2460 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k1, %k1
2461 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2462 ; AVX512DQNOBW-NEXT: movw $-16385, %ax # imm = 0xBFFF
2463 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2464 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2465 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2466 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2467 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2468 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6
2469 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2470 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
2471 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
2472 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2473 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2474 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2475 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2476 ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2477 ; AVX512DQNOBW-NEXT: andl $1, %edi
2478 ; AVX512DQNOBW-NEXT: kmovw %esi, %k0
2479 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0
2480 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0
2481 ; AVX512DQNOBW-NEXT: kmovw %edi, %k6
2482 ; AVX512DQNOBW-NEXT: korw %k0, %k6, %k0
2483 ; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0
2484 ; AVX512DQNOBW-NEXT: kmovw %edx, %k6
2485 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2486 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k6, %k6
2487 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2488 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2489 ; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0
2490 ; AVX512DQNOBW-NEXT: kmovw %ecx, %k6
2491 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2492 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k6, %k6
2493 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2494 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2495 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2496 ; AVX512DQNOBW-NEXT: kmovw %r8d, %k6
2497 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2498 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k6, %k6
2499 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2500 ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0
2501 ; AVX512DQNOBW-NEXT: kmovw %r9d, %k6
2502 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2503 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k6, %k6
2504 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2505 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2506 ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0
2507 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2508 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2509 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2510 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k6, %k6
2511 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2512 ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0
2513 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2514 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2515 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2516 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k6, %k6
2517 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2518 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2519 ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0
2520 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2521 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2522 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2523 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k6, %k6
2524 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2525 ; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0
2526 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2527 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2528 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2529 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k6, %k6
2530 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2531 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
2532 ; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0
2533 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2534 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2535 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2536 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6
2537 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2538 ; AVX512DQNOBW-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2539 ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0
2540 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2541 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2542 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2543 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k6, %k6
2544 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2545 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
2546 ; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0
2547 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2548 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2549 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2550 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k6, %k6
2551 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2552 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
2553 ; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0
2554 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2555 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2556 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2557 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k6, %k6
2558 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2559 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
2560 ; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0
2561 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2562 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2563 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6
2564 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2565 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
2566 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
2567 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2568 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2569 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2570 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2571 ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2572 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2573 ; AVX512DQNOBW-NEXT: andl $1, %eax
2574 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
2575 ; AVX512DQNOBW-NEXT: kmovw %ecx, %k0
2576 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0
2577 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0
2578 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2579 ; AVX512DQNOBW-NEXT: korw %k0, %k6, %k0
2580 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
2581 ; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0
2582 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2583 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2584 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2585 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k6, %k6
2586 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2587 ; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0
2588 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2589 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2590 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2591 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k6, %k6
2592 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2593 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2594 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2595 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2596 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2597 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k6, %k6
2598 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2599 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2600 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2601 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2602 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2603 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2604 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k6, %k6
2605 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2606 ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0
2607 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2608 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2609 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2610 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k6, %k6
2611 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2612 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2613 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2614 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2615 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2616 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2617 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k6, %k6
2618 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2619 ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0
2620 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2621 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2622 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2623 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k6, %k6
2624 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2625 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2626 ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0
2627 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2628 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2629 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2630 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k6, %k6
2631 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2632 ; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0
2633 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2634 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2635 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2636 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6
2637 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2638 ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0
2639 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2640 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2641 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2642 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k6, %k6
2643 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2644 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2645 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2646 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2647 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2648 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2649 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k6, %k6
2650 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2651 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2652 ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0
2653 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2654 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2655 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2656 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k6, %k6
2657 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2658 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2659 ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0
2660 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2661 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2662 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6
2663 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2664 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
2665 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
2666 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2667 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2668 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2669 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
2670 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2671 ; AVX512DQNOBW-NEXT: andl $1, %eax
2672 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
2673 ; AVX512DQNOBW-NEXT: kmovw %ecx, %k6
2674 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2675 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k6, %k6
2676 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2677 ; AVX512DQNOBW-NEXT: korw %k6, %k7, %k6
2678 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2679 ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
2680 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2681 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2682 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2683 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7
2684 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2685 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2686 ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
2687 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2688 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2689 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2690 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7
2691 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2692 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2693 ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
2694 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2695 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2696 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2697 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7
2698 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2699 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2700 ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
2701 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2702 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2703 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2704 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7
2705 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2706 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2707 ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
2708 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2709 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2710 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2711 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7
2712 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2713 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2714 ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
2715 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2716 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2717 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2718 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7
2719 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2720 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2721 ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
2722 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2723 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2724 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2725 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7
2726 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2727 ; AVX512DQNOBW-NEXT: kandw %k3, %k6, %k6
2728 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2729 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2730 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2731 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7
2732 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2733 ; AVX512DQNOBW-NEXT: kandw %k4, %k6, %k5
2734 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2735 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2736 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2737 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6
2738 ; AVX512DQNOBW-NEXT: korw %k6, %k5, %k5
2739 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2740 ; AVX512DQNOBW-NEXT: kandw %k3, %k5, %k4
2741 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2742 ; AVX512DQNOBW-NEXT: kmovw %eax, %k5
2743 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k5, %k5
2744 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k5, %k5
2745 ; AVX512DQNOBW-NEXT: korw %k5, %k4, %k4
2746 ; AVX512DQNOBW-NEXT: kandw %k1, %k4, %k3
2747 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2748 ; AVX512DQNOBW-NEXT: kmovw %eax, %k4
2749 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
2750 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k4, %k4
2751 ; AVX512DQNOBW-NEXT: korw %k4, %k3, %k3
2752 ; AVX512DQNOBW-NEXT: kandw %k2, %k3, %k2
2753 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2754 ; AVX512DQNOBW-NEXT: kmovw %eax, %k3
2755 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3
2756 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k3, %k3
2757 ; AVX512DQNOBW-NEXT: korw %k3, %k2, %k2
2758 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2759 ; AVX512DQNOBW-NEXT: kandw %k1, %k2, %k1
2760 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2761 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2762 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2
2763 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1
2764 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
2765 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
2766 ; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2767 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2768 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
2769 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1
2770 ; AVX512DQNOBW-NEXT: vpmovm2d %k1, %zmm2
2771 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm3
2772 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2773 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm4
2774 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2775 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm5
2776 ; AVX512DQNOBW-NEXT: vpmovdw %zmm2, %ymm2
2777 ; AVX512DQNOBW-NEXT: vpmovdw %zmm3, %ymm3
2778 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
2779 ; AVX512DQNOBW-NEXT: vpandq %zmm1, %zmm2, %zmm1
2780 ; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm2
2781 ; AVX512DQNOBW-NEXT: vpmovdw %zmm5, %ymm3
2782 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
2783 ; AVX512DQNOBW-NEXT: vpandq %zmm0, %zmm2, %zmm0
2784 ; AVX512DQNOBW-NEXT: retq
2785 %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
2789 define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone {
2790 ; ALL-LABEL: shuffle_zext_16x8_to_16x16:
2792 ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2794 %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
2795 %2 = bitcast <32 x i8> %1 to <16 x i16>
2799 define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone {
2800 ; KNL-LABEL: shuffle_zext_16x8_to_16x16_mask:
2802 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
2803 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2804 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
2805 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
2806 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
2809 ; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask:
2811 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
2812 ; SKX-NEXT: vpmovb2m %xmm1, %k1
2813 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2816 ; AVX512DQNOBW-LABEL: shuffle_zext_16x8_to_16x16_mask:
2817 ; AVX512DQNOBW: # %bb.0:
2818 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
2819 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2820 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
2821 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
2822 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
2823 ; AVX512DQNOBW-NEXT: retq
2824 %x = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
2825 %bc = bitcast <32 x i8> %x to <16 x i16>
2826 %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer
2830 define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) {
2831 ; ALL-LABEL: zext_32x8_to_16x16:
2833 ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2835 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32>
2836 %2 = bitcast <32 x i8> %1 to <16 x i16>
2840 define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) {
2841 ; ALL-LABEL: zext_32x8_to_8x32:
2843 ; ALL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2845 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
2846 %2 = bitcast <32 x i8> %1 to <8 x i32>
2850 define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) {
2851 ; ALL-LABEL: zext_32x8_to_4x64:
2853 ; ALL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
2855 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
2856 %2 = bitcast <32 x i8> %1 to <4 x i64>
2860 define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) {
2861 ; ALL-LABEL: zext_16x16_to_8x32:
2863 ; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2865 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16>
2866 %2 = bitcast <16 x i16> %1 to <8 x i32>
2870 define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) {
2871 ; ALL-LABEL: zext_16x16_to_4x64:
2873 ; ALL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2875 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
2876 %2 = bitcast <16 x i16> %1 to <4 x i64>
2880 define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
2881 ; ALL-LABEL: zext_8x32_to_4x64:
2883 ; ALL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2885 %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
2886 %2 = bitcast <8 x i32> %1 to <4 x i64>
2890 define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
2891 ; KNL-LABEL: zext_64xi1_to_64xi8:
2893 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
2894 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
2895 ; KNL-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2
2896 ; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
2897 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
2898 ; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
2901 ; SKX-LABEL: zext_64xi1_to_64xi8:
2903 ; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
2904 ; SKX-NEXT: vmovdqu8 {{.*#+}} zmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2907 ; AVX512DQNOBW-LABEL: zext_64xi1_to_64xi8:
2908 ; AVX512DQNOBW: # %bb.0:
2909 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2
2910 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3
2911 ; AVX512DQNOBW-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2
2912 ; AVX512DQNOBW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
2913 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
2914 ; AVX512DQNOBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
2915 ; AVX512DQNOBW-NEXT: retq
2916 %mask = icmp eq <64 x i8> %x, %y
2917 %1 = zext <64 x i1> %mask to <64 x i8>
2921 define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
2922 ; KNL-LABEL: zext_32xi1_to_32xi16:
2924 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
2925 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
2926 ; KNL-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
2927 ; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
2928 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
2929 ; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
2932 ; SKX-LABEL: zext_32xi1_to_32xi16:
2934 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
2935 ; SKX-NEXT: vpmovm2w %k0, %zmm0
2936 ; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0
2939 ; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi16:
2940 ; AVX512DQNOBW: # %bb.0:
2941 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2
2942 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3
2943 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
2944 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
2945 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
2946 ; AVX512DQNOBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
2947 ; AVX512DQNOBW-NEXT: retq
2948 %mask = icmp eq <32 x i16> %x, %y
2949 %1 = zext <32 x i1> %mask to <32 x i16>
2953 define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
2954 ; ALL-LABEL: zext_16xi1_to_16xi16:
2956 ; ALL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
2957 ; ALL-NEXT: vpsrlw $15, %ymm0, %ymm0
2959 %mask = icmp eq <16 x i16> %x, %y
2960 %1 = zext <16 x i1> %mask to <16 x i16>
2965 define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
2966 ; KNL-LABEL: zext_32xi1_to_32xi8:
2968 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
2969 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
2970 ; KNL-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
2971 ; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
2972 ; KNL-NEXT: vpacksswb %ymm2, %ymm0, %ymm0
2973 ; KNL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2974 ; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2977 ; SKX-LABEL: zext_32xi1_to_32xi8:
2979 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
2980 ; SKX-NEXT: vmovdqu8 {{.*#+}} ymm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2983 ; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi8:
2984 ; AVX512DQNOBW: # %bb.0:
2985 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2
2986 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3
2987 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
2988 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
2989 ; AVX512DQNOBW-NEXT: vpacksswb %ymm2, %ymm0, %ymm0
2990 ; AVX512DQNOBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2991 ; AVX512DQNOBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
2992 ; AVX512DQNOBW-NEXT: retq
2993 %mask = icmp eq <32 x i16> %x, %y
2994 %1 = zext <32 x i1> %mask to <32 x i8>
2998 define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
2999 ; KNL-LABEL: zext_4xi1_to_4x32:
3001 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3002 ; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3003 ; KNL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
3004 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
3007 ; SKX-LABEL: zext_4xi1_to_4x32:
3009 ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
3010 ; SKX-NEXT: vpmovm2d %k0, %xmm0
3011 ; SKX-NEXT: vpsrld $31, %xmm0, %xmm0
3014 ; AVX512DQNOBW-LABEL: zext_4xi1_to_4x32:
3015 ; AVX512DQNOBW: # %bb.0:
3016 ; AVX512DQNOBW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3017 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3018 ; AVX512DQNOBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
3019 ; AVX512DQNOBW-NEXT: retq
3020 %mask = icmp eq <4 x i8> %x, %y
3021 %1 = zext <4 x i1> %mask to <4 x i32>
3025 define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
3026 ; KNL-LABEL: zext_2xi1_to_2xi64:
3028 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3029 ; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
3030 ; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3033 ; SKX-LABEL: zext_2xi1_to_2xi64:
3035 ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
3036 ; SKX-NEXT: vpmovm2q %k0, %xmm0
3037 ; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0
3040 ; AVX512DQNOBW-LABEL: zext_2xi1_to_2xi64:
3041 ; AVX512DQNOBW: # %bb.0:
3042 ; AVX512DQNOBW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3043 ; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
3044 ; AVX512DQNOBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
3045 ; AVX512DQNOBW-NEXT: retq
3046 %mask = icmp eq <2 x i8> %x, %y
3047 %1 = zext <2 x i1> %mask to <2 x i64>