1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=SKX
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=AVX512DQNOBW
6 define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
7 ; KNL-LABEL: zext_8x8mem_to_8x16:
9 ; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
10 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
11 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
12 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
15 ; SKX-LABEL: zext_8x8mem_to_8x16:
17 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
18 ; SKX-NEXT: vpmovw2m %xmm0, %k1
19 ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
22 ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x16:
23 ; AVX512DQNOBW: # %bb.0:
24 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
25 ; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
26 ; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
27 ; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
28 ; AVX512DQNOBW-NEXT: retq
29 %a = load <8 x i8>,<8 x i8> *%i,align 1
30 %x = zext <8 x i8> %a to <8 x i16>
31 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
35 define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
36 ; KNL-LABEL: sext_8x8mem_to_8x16:
38 ; KNL-NEXT: vpmovsxbw (%rdi), %xmm1
39 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
40 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
41 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
44 ; SKX-LABEL: sext_8x8mem_to_8x16:
46 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
47 ; SKX-NEXT: vpmovw2m %xmm0, %k1
48 ; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z}
51 ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x16:
52 ; AVX512DQNOBW: # %bb.0:
53 ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %xmm1
54 ; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
55 ; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
56 ; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
57 ; AVX512DQNOBW-NEXT: retq
58 %a = load <8 x i8>,<8 x i8> *%i,align 1
59 %x = sext <8 x i8> %a to <8 x i16>
60 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
65 define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
66 ; KNL-LABEL: zext_16x8mem_to_16x16:
68 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
69 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
70 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
71 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
72 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
75 ; SKX-LABEL: zext_16x8mem_to_16x16:
77 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
78 ; SKX-NEXT: vpmovb2m %xmm0, %k1
79 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
82 ; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x16:
83 ; AVX512DQNOBW: # %bb.0:
84 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
85 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
86 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
87 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
88 ; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm0, %ymm0
89 ; AVX512DQNOBW-NEXT: retq
90 %a = load <16 x i8>,<16 x i8> *%i,align 1
91 %x = zext <16 x i8> %a to <16 x i16>
92 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
96 define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
97 ; KNL-LABEL: sext_16x8mem_to_16x16:
99 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
100 ; KNL-NEXT: vpmovsxbw (%rdi), %ymm1
101 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
102 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
103 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
106 ; SKX-LABEL: sext_16x8mem_to_16x16:
108 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
109 ; SKX-NEXT: vpmovb2m %xmm0, %k1
110 ; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z}
113 ; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x16:
114 ; AVX512DQNOBW: # %bb.0:
115 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
116 ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm1
117 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
118 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
119 ; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm0, %ymm0
120 ; AVX512DQNOBW-NEXT: retq
121 %a = load <16 x i8>,<16 x i8> *%i,align 1
122 %x = sext <16 x i8> %a to <16 x i16>
123 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
127 define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
128 ; ALL-LABEL: zext_16x8_to_16x16:
130 ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
132 %x = zext <16 x i8> %a to <16 x i16>
136 define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
137 ; KNL-LABEL: zext_16x8_to_16x16_mask:
139 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
140 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
141 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
142 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
143 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
146 ; SKX-LABEL: zext_16x8_to_16x16_mask:
148 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
149 ; SKX-NEXT: vpmovb2m %xmm1, %k1
150 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
153 ; AVX512DQNOBW-LABEL: zext_16x8_to_16x16_mask:
154 ; AVX512DQNOBW: # %bb.0:
155 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
156 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
157 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
158 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
159 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
160 ; AVX512DQNOBW-NEXT: retq
161 %x = zext <16 x i8> %a to <16 x i16>
162 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
166 define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
167 ; ALL-LABEL: sext_16x8_to_16x16:
169 ; ALL-NEXT: vpmovsxbw %xmm0, %ymm0
171 %x = sext <16 x i8> %a to <16 x i16>
175 define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
176 ; KNL-LABEL: sext_16x8_to_16x16_mask:
178 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
179 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
180 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
181 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
182 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
185 ; SKX-LABEL: sext_16x8_to_16x16_mask:
187 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
188 ; SKX-NEXT: vpmovb2m %xmm1, %k1
189 ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z}
192 ; AVX512DQNOBW-LABEL: sext_16x8_to_16x16_mask:
193 ; AVX512DQNOBW: # %bb.0:
194 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
195 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0
196 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
197 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
198 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
199 ; AVX512DQNOBW-NEXT: retq
200 %x = sext <16 x i8> %a to <16 x i16>
201 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
205 define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
206 ; KNL-LABEL: zext_32x8mem_to_32x16:
208 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
209 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
210 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
211 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
212 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
213 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
214 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
215 ; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0
216 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
217 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
218 ; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
219 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
222 ; SKX-LABEL: zext_32x8mem_to_32x16:
224 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
225 ; SKX-NEXT: vpmovb2m %ymm0, %k1
226 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero
229 ; AVX512DQNOBW-LABEL: zext_32x8mem_to_32x16:
230 ; AVX512DQNOBW: # %bb.0:
231 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
232 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
233 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
234 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
235 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
236 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
237 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
238 ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm0, %ymm0
239 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
240 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
241 ; AVX512DQNOBW-NEXT: vpand %ymm2, %ymm1, %ymm1
242 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
243 ; AVX512DQNOBW-NEXT: retq
244 %a = load <32 x i8>,<32 x i8> *%i,align 1
245 %x = zext <32 x i8> %a to <32 x i16>
246 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
250 define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
251 ; KNL-LABEL: sext_32x8mem_to_32x16:
253 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
254 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
255 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
256 ; KNL-NEXT: vpmovsxbw (%rdi), %ymm2
257 ; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm3
258 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
259 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
260 ; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0
261 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
262 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
263 ; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
264 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
267 ; SKX-LABEL: sext_32x8mem_to_32x16:
269 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
270 ; SKX-NEXT: vpmovb2m %ymm0, %k1
271 ; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z}
274 ; AVX512DQNOBW-LABEL: sext_32x8mem_to_32x16:
275 ; AVX512DQNOBW: # %bb.0:
276 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
277 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
278 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
279 ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm2
280 ; AVX512DQNOBW-NEXT: vpmovsxbw 16(%rdi), %ymm3
281 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
282 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
283 ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm0, %ymm0
284 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
285 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
286 ; AVX512DQNOBW-NEXT: vpand %ymm2, %ymm1, %ymm1
287 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
288 ; AVX512DQNOBW-NEXT: retq
289 %a = load <32 x i8>,<32 x i8> *%i,align 1
290 %x = sext <32 x i8> %a to <32 x i16>
291 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
295 define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
296 ; KNL-LABEL: zext_32x8_to_32x16:
298 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
299 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
300 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
301 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
304 ; SKX-LABEL: zext_32x8_to_32x16:
306 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
309 ; AVX512DQNOBW-LABEL: zext_32x8_to_32x16:
310 ; AVX512DQNOBW: # %bb.0:
311 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
312 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
313 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
314 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
315 ; AVX512DQNOBW-NEXT: retq
316 %x = zext <32 x i8> %a to <32 x i16>
320 define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
321 ; KNL-LABEL: zext_32x8_to_32x16_mask:
323 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
324 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
325 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
326 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
327 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
328 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
329 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
330 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
331 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
332 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm1
333 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
334 ; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1
335 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
338 ; SKX-LABEL: zext_32x8_to_32x16_mask:
340 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
341 ; SKX-NEXT: vpmovb2m %ymm1, %k1
342 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
345 ; AVX512DQNOBW-LABEL: zext_32x8_to_32x16_mask:
346 ; AVX512DQNOBW: # %bb.0:
347 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
348 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm1, %xmm1
349 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
350 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
351 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
352 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
353 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
354 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
355 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
356 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm2, %ymm1
357 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
358 ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm1, %ymm1
359 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
360 ; AVX512DQNOBW-NEXT: retq
361 %x = zext <32 x i8> %a to <32 x i16>
362 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
366 define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
367 ; KNL-LABEL: sext_32x8_to_32x16:
369 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
370 ; KNL-NEXT: vpmovsxbw %xmm1, %ymm1
371 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
372 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
375 ; SKX-LABEL: sext_32x8_to_32x16:
377 ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0
380 ; AVX512DQNOBW-LABEL: sext_32x8_to_32x16:
381 ; AVX512DQNOBW: # %bb.0:
382 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
383 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm1, %ymm1
384 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0
385 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
386 ; AVX512DQNOBW-NEXT: retq
387 %x = sext <32 x i8> %a to <32 x i16>
391 define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
392 ; KNL-LABEL: sext_32x8_to_32x16_mask:
394 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
395 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
396 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
397 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm3
398 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
399 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
400 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
401 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
402 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
403 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm1
404 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
405 ; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1
406 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
409 ; SKX-LABEL: sext_32x8_to_32x16_mask:
411 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
412 ; SKX-NEXT: vpmovb2m %ymm1, %k1
413 ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z}
416 ; AVX512DQNOBW-LABEL: sext_32x8_to_32x16_mask:
417 ; AVX512DQNOBW: # %bb.0:
418 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
419 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm1, %xmm1
420 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
421 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm3
422 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
423 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0
424 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
425 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
426 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
427 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm2, %ymm1
428 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
429 ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm1, %ymm1
430 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
431 ; AVX512DQNOBW-NEXT: retq
432 %x = sext <32 x i8> %a to <32 x i16>
433 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
437 define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
438 ; KNL-LABEL: zext_4x8mem_to_4x32:
440 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
441 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
442 ; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
443 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
444 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
445 ; KNL-NEXT: vzeroupper
448 ; AVX512DQ-LABEL: zext_4x8mem_to_4x32:
450 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
451 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
452 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
453 ; AVX512DQ-NEXT: retq
454 %a = load <4 x i8>,<4 x i8> *%i,align 1
455 %x = zext <4 x i8> %a to <4 x i32>
456 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
460 define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
461 ; KNL-LABEL: sext_4x8mem_to_4x32:
463 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
464 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
465 ; KNL-NEXT: vpmovsxbd (%rdi), %xmm0
466 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
467 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
468 ; KNL-NEXT: vzeroupper
471 ; AVX512DQ-LABEL: sext_4x8mem_to_4x32:
473 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
474 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
475 ; AVX512DQ-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z}
476 ; AVX512DQ-NEXT: retq
477 %a = load <4 x i8>,<4 x i8> *%i,align 1
478 %x = sext <4 x i8> %a to <4 x i32>
479 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
483 define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
484 ; KNL-LABEL: zext_8x8mem_to_8x32:
486 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
487 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
488 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
489 ; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
490 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
491 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
494 ; SKX-LABEL: zext_8x8mem_to_8x32:
496 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
497 ; SKX-NEXT: vpmovw2m %xmm0, %k1
498 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
501 ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x32:
502 ; AVX512DQNOBW: # %bb.0:
503 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
504 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
505 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
506 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
507 ; AVX512DQNOBW-NEXT: retq
508 %a = load <8 x i8>,<8 x i8> *%i,align 1
509 %x = zext <8 x i8> %a to <8 x i32>
510 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
514 define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
515 ; KNL-LABEL: sext_8x8mem_to_8x32:
517 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
518 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
519 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
520 ; KNL-NEXT: vpmovsxbd (%rdi), %ymm0
521 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
522 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
525 ; SKX-LABEL: sext_8x8mem_to_8x32:
527 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
528 ; SKX-NEXT: vpmovw2m %xmm0, %k1
529 ; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
532 ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x32:
533 ; AVX512DQNOBW: # %bb.0:
534 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
535 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
536 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
537 ; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
538 ; AVX512DQNOBW-NEXT: retq
539 %a = load <8 x i8>,<8 x i8> *%i,align 1
540 %x = sext <8 x i8> %a to <8 x i32>
541 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
545 define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
546 ; KNL-LABEL: zext_16x8mem_to_16x32:
548 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
549 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
550 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
551 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
554 ; SKX-LABEL: zext_16x8mem_to_16x32:
556 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
557 ; SKX-NEXT: vpmovb2m %xmm0, %k1
558 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
561 ; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x32:
562 ; AVX512DQNOBW: # %bb.0:
563 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
564 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
565 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
566 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
567 ; AVX512DQNOBW-NEXT: retq
568 %a = load <16 x i8>,<16 x i8> *%i,align 1
569 %x = zext <16 x i8> %a to <16 x i32>
570 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
574 define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
575 ; KNL-LABEL: sext_16x8mem_to_16x32:
577 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
578 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
579 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
580 ; KNL-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
583 ; SKX-LABEL: sext_16x8mem_to_16x32:
585 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
586 ; SKX-NEXT: vpmovb2m %xmm0, %k1
587 ; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
590 ; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x32:
591 ; AVX512DQNOBW: # %bb.0:
592 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
593 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
594 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
595 ; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
596 ; AVX512DQNOBW-NEXT: retq
597 %a = load <16 x i8>,<16 x i8> *%i,align 1
598 %x = sext <16 x i8> %a to <16 x i32>
599 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
603 define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
604 ; KNL-LABEL: zext_16x8_to_16x32_mask:
606 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
607 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
608 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
609 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
612 ; SKX-LABEL: zext_16x8_to_16x32_mask:
614 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
615 ; SKX-NEXT: vpmovb2m %xmm1, %k1
616 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
619 ; AVX512DQNOBW-LABEL: zext_16x8_to_16x32_mask:
620 ; AVX512DQNOBW: # %bb.0:
621 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
622 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
623 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
624 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
625 ; AVX512DQNOBW-NEXT: retq
626 %x = zext <16 x i8> %a to <16 x i32>
627 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
631 define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
632 ; KNL-LABEL: sext_16x8_to_16x32_mask:
634 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
635 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
636 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
637 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
640 ; SKX-LABEL: sext_16x8_to_16x32_mask:
642 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
643 ; SKX-NEXT: vpmovb2m %xmm1, %k1
644 ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
647 ; AVX512DQNOBW-LABEL: sext_16x8_to_16x32_mask:
648 ; AVX512DQNOBW: # %bb.0:
649 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
650 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
651 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
652 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
653 ; AVX512DQNOBW-NEXT: retq
654 %x = sext <16 x i8> %a to <16 x i32>
655 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
659 define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
660 ; ALL-LABEL: zext_16x8_to_16x32:
662 ; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
664 %x = zext <16 x i8> %i to <16 x i32>
668 define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
669 ; ALL-LABEL: sext_16x8_to_16x32:
671 ; ALL-NEXT: vpmovsxbd %xmm0, %zmm0
673 %x = sext <16 x i8> %i to <16 x i32>
677 define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
678 ; KNL-LABEL: zext_2x8mem_to_2x64:
680 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
681 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
682 ; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
683 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
684 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
685 ; KNL-NEXT: vzeroupper
688 ; AVX512DQ-LABEL: zext_2x8mem_to_2x64:
690 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
691 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
692 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
693 ; AVX512DQ-NEXT: retq
694 %a = load <2 x i8>,<2 x i8> *%i,align 1
695 %x = zext <2 x i8> %a to <2 x i64>
696 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
699 define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
700 ; KNL-LABEL: sext_2x8mem_to_2x64mask:
702 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
703 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
704 ; KNL-NEXT: vpmovsxbq (%rdi), %xmm0
705 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
706 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
707 ; KNL-NEXT: vzeroupper
710 ; AVX512DQ-LABEL: sext_2x8mem_to_2x64mask:
712 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
713 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
714 ; AVX512DQ-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z}
715 ; AVX512DQ-NEXT: retq
716 %a = load <2 x i8>,<2 x i8> *%i,align 1
717 %x = sext <2 x i8> %a to <2 x i64>
718 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
721 define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
722 ; ALL-LABEL: sext_2x8mem_to_2x64:
724 ; ALL-NEXT: vpmovsxbq (%rdi), %xmm0
726 %a = load <2 x i8>,<2 x i8> *%i,align 1
727 %x = sext <2 x i8> %a to <2 x i64>
731 define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
732 ; KNL-LABEL: zext_4x8mem_to_4x64:
734 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
735 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
736 ; KNL-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
737 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
738 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
741 ; AVX512DQ-LABEL: zext_4x8mem_to_4x64:
743 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
744 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
745 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
746 ; AVX512DQ-NEXT: retq
747 %a = load <4 x i8>,<4 x i8> *%i,align 1
748 %x = zext <4 x i8> %a to <4 x i64>
749 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
753 define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
754 ; KNL-LABEL: sext_4x8mem_to_4x64mask:
756 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
757 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
758 ; KNL-NEXT: vpmovsxbq (%rdi), %ymm0
759 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
760 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
763 ; AVX512DQ-LABEL: sext_4x8mem_to_4x64mask:
765 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
766 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
767 ; AVX512DQ-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z}
768 ; AVX512DQ-NEXT: retq
769 %a = load <4 x i8>,<4 x i8> *%i,align 1
770 %x = sext <4 x i8> %a to <4 x i64>
771 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
775 define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
776 ; ALL-LABEL: sext_4x8mem_to_4x64:
778 ; ALL-NEXT: vpmovsxbq (%rdi), %ymm0
780 %a = load <4 x i8>,<4 x i8> *%i,align 1
781 %x = sext <4 x i8> %a to <4 x i64>
785 define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
786 ; KNL-LABEL: zext_8x8mem_to_8x64:
788 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
789 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
790 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
791 ; KNL-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
794 ; SKX-LABEL: zext_8x8mem_to_8x64:
796 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
797 ; SKX-NEXT: vpmovw2m %xmm0, %k1
798 ; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
801 ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x64:
802 ; AVX512DQNOBW: # %bb.0:
803 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
804 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
805 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
806 ; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
807 ; AVX512DQNOBW-NEXT: retq
808 %a = load <8 x i8>,<8 x i8> *%i,align 1
809 %x = zext <8 x i8> %a to <8 x i64>
810 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
814 define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
815 ; KNL-LABEL: sext_8x8mem_to_8x64mask:
817 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
818 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
819 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
820 ; KNL-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
823 ; SKX-LABEL: sext_8x8mem_to_8x64mask:
825 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
826 ; SKX-NEXT: vpmovw2m %xmm0, %k1
827 ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
830 ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x64mask:
831 ; AVX512DQNOBW: # %bb.0:
832 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
833 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
834 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
835 ; AVX512DQNOBW-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
836 ; AVX512DQNOBW-NEXT: retq
837 %a = load <8 x i8>,<8 x i8> *%i,align 1
838 %x = sext <8 x i8> %a to <8 x i64>
839 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
843 define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
844 ; ALL-LABEL: sext_8x8mem_to_8x64:
846 ; ALL-NEXT: vpmovsxbq (%rdi), %zmm0
848 %a = load <8 x i8>,<8 x i8> *%i,align 1
849 %x = sext <8 x i8> %a to <8 x i64>
853 define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
854 ; KNL-LABEL: zext_4x16mem_to_4x32:
856 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
857 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
858 ; KNL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
859 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
860 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
861 ; KNL-NEXT: vzeroupper
864 ; AVX512DQ-LABEL: zext_4x16mem_to_4x32:
866 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
867 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
868 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
869 ; AVX512DQ-NEXT: retq
870 %a = load <4 x i16>,<4 x i16> *%i,align 1
871 %x = zext <4 x i16> %a to <4 x i32>
872 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
876 define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
877 ; KNL-LABEL: sext_4x16mem_to_4x32mask:
879 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
880 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
881 ; KNL-NEXT: vpmovsxwd (%rdi), %xmm0
882 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
883 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
884 ; KNL-NEXT: vzeroupper
887 ; AVX512DQ-LABEL: sext_4x16mem_to_4x32mask:
889 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
890 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
891 ; AVX512DQ-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z}
892 ; AVX512DQ-NEXT: retq
893 %a = load <4 x i16>,<4 x i16> *%i,align 1
894 %x = sext <4 x i16> %a to <4 x i32>
895 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
899 define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
900 ; ALL-LABEL: sext_4x16mem_to_4x32:
902 ; ALL-NEXT: vpmovsxwd (%rdi), %xmm0
904 %a = load <4 x i16>,<4 x i16> *%i,align 1
905 %x = sext <4 x i16> %a to <4 x i32>
910 define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
911 ; KNL-LABEL: zext_8x16mem_to_8x32:
913 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
914 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
915 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
916 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
917 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
918 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
921 ; SKX-LABEL: zext_8x16mem_to_8x32:
923 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
924 ; SKX-NEXT: vpmovw2m %xmm0, %k1
925 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
928 ; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x32:
929 ; AVX512DQNOBW: # %bb.0:
930 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
931 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
932 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
933 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
934 ; AVX512DQNOBW-NEXT: retq
935 %a = load <8 x i16>,<8 x i16> *%i,align 1
936 %x = zext <8 x i16> %a to <8 x i32>
937 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
941 define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
942 ; KNL-LABEL: sext_8x16mem_to_8x32mask:
944 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
945 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
946 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
947 ; KNL-NEXT: vpmovsxwd (%rdi), %ymm0
948 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
949 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
952 ; SKX-LABEL: sext_8x16mem_to_8x32mask:
954 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
955 ; SKX-NEXT: vpmovw2m %xmm0, %k1
956 ; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
959 ; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x32mask:
960 ; AVX512DQNOBW: # %bb.0:
961 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
962 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
963 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
964 ; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
965 ; AVX512DQNOBW-NEXT: retq
966 %a = load <8 x i16>,<8 x i16> *%i,align 1
967 %x = sext <8 x i16> %a to <8 x i32>
968 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
972 define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
973 ; ALL-LABEL: sext_8x16mem_to_8x32:
975 ; ALL-NEXT: vpmovsxwd (%rdi), %ymm0
977 %a = load <8 x i16>,<8 x i16> *%i,align 1
978 %x = sext <8 x i16> %a to <8 x i32>
982 define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
983 ; KNL-LABEL: zext_8x16_to_8x32mask:
985 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
986 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
987 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
988 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
989 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
990 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
993 ; SKX-LABEL: zext_8x16_to_8x32mask:
995 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
996 ; SKX-NEXT: vpmovw2m %xmm1, %k1
997 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1000 ; AVX512DQNOBW-LABEL: zext_8x16_to_8x32mask:
1001 ; AVX512DQNOBW: # %bb.0:
1002 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
1003 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
1004 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
1005 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1006 ; AVX512DQNOBW-NEXT: retq
1007 %x = zext <8 x i16> %a to <8 x i32>
1008 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
1012 define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
1013 ; ALL-LABEL: zext_8x16_to_8x32:
1015 ; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1017 %x = zext <8 x i16> %a to <8 x i32>
1021 define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
1022 ; KNL-LABEL: zext_16x16mem_to_16x32:
1024 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1025 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1026 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1027 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1030 ; SKX-LABEL: zext_16x16mem_to_16x32:
1032 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
1033 ; SKX-NEXT: vpmovb2m %xmm0, %k1
1034 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1037 ; AVX512DQNOBW-LABEL: zext_16x16mem_to_16x32:
1038 ; AVX512DQNOBW: # %bb.0:
1039 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
1040 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
1041 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
1042 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1043 ; AVX512DQNOBW-NEXT: retq
1044 %a = load <16 x i16>,<16 x i16> *%i,align 1
1045 %x = zext <16 x i16> %a to <16 x i32>
1046 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1050 define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
1051 ; KNL-LABEL: sext_16x16mem_to_16x32mask:
1053 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1054 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1055 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1056 ; KNL-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1059 ; SKX-LABEL: sext_16x16mem_to_16x32mask:
1061 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
1062 ; SKX-NEXT: vpmovb2m %xmm0, %k1
1063 ; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1066 ; AVX512DQNOBW-LABEL: sext_16x16mem_to_16x32mask:
1067 ; AVX512DQNOBW: # %bb.0:
1068 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
1069 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
1070 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
1071 ; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1072 ; AVX512DQNOBW-NEXT: retq
1073 %a = load <16 x i16>,<16 x i16> *%i,align 1
1074 %x = sext <16 x i16> %a to <16 x i32>
1075 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1079 define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
1080 ; ALL-LABEL: sext_16x16mem_to_16x32:
1082 ; ALL-NEXT: vpmovsxwd (%rdi), %zmm0
1084 %a = load <16 x i16>,<16 x i16> *%i,align 1
1085 %x = sext <16 x i16> %a to <16 x i32>
1088 define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
1089 ; KNL-LABEL: zext_16x16_to_16x32mask:
1091 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
1092 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
1093 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
1094 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1097 ; SKX-LABEL: zext_16x16_to_16x32mask:
1099 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
1100 ; SKX-NEXT: vpmovb2m %xmm1, %k1
1101 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1104 ; AVX512DQNOBW-LABEL: zext_16x16_to_16x32mask:
1105 ; AVX512DQNOBW: # %bb.0:
1106 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
1107 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
1108 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
1109 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1110 ; AVX512DQNOBW-NEXT: retq
1111 %x = zext <16 x i16> %a to <16 x i32>
1112 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1116 define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
1117 ; ALL-LABEL: zext_16x16_to_16x32:
1119 ; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1121 %x = zext <16 x i16> %a to <16 x i32>
1125 define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
1126 ; KNL-LABEL: zext_2x16mem_to_2x64:
1128 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1129 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1130 ; KNL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1131 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1132 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1133 ; KNL-NEXT: vzeroupper
1136 ; AVX512DQ-LABEL: zext_2x16mem_to_2x64:
1137 ; AVX512DQ: # %bb.0:
1138 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1139 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1140 ; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1141 ; AVX512DQ-NEXT: retq
1142 %a = load <2 x i16>,<2 x i16> *%i,align 1
1143 %x = zext <2 x i16> %a to <2 x i64>
1144 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1148 define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
1149 ; KNL-LABEL: sext_2x16mem_to_2x64mask:
1151 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1152 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1153 ; KNL-NEXT: vpmovsxwq (%rdi), %xmm0
1154 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1155 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1156 ; KNL-NEXT: vzeroupper
1159 ; AVX512DQ-LABEL: sext_2x16mem_to_2x64mask:
1160 ; AVX512DQ: # %bb.0:
1161 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1162 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1163 ; AVX512DQ-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z}
1164 ; AVX512DQ-NEXT: retq
1165 %a = load <2 x i16>,<2 x i16> *%i,align 1
1166 %x = sext <2 x i16> %a to <2 x i64>
1167 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1171 define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
1172 ; ALL-LABEL: sext_2x16mem_to_2x64:
1174 ; ALL-NEXT: vpmovsxwq (%rdi), %xmm0
1176 %a = load <2 x i16>,<2 x i16> *%i,align 1
1177 %x = sext <2 x i16> %a to <2 x i64>
1181 define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
1182 ; KNL-LABEL: zext_4x16mem_to_4x64:
1184 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1185 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1186 ; KNL-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1187 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1188 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1191 ; AVX512DQ-LABEL: zext_4x16mem_to_4x64:
1192 ; AVX512DQ: # %bb.0:
1193 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1194 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1195 ; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1196 ; AVX512DQ-NEXT: retq
1197 %a = load <4 x i16>,<4 x i16> *%i,align 1
1198 %x = zext <4 x i16> %a to <4 x i64>
1199 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1203 define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
1204 ; KNL-LABEL: sext_4x16mem_to_4x64mask:
1206 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1207 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1208 ; KNL-NEXT: vpmovsxwq (%rdi), %ymm0
1209 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1210 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1213 ; AVX512DQ-LABEL: sext_4x16mem_to_4x64mask:
1214 ; AVX512DQ: # %bb.0:
1215 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1216 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1217 ; AVX512DQ-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z}
1218 ; AVX512DQ-NEXT: retq
1219 %a = load <4 x i16>,<4 x i16> *%i,align 1
1220 %x = sext <4 x i16> %a to <4 x i64>
1221 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1225 define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
1226 ; ALL-LABEL: sext_4x16mem_to_4x64:
1228 ; ALL-NEXT: vpmovsxwq (%rdi), %ymm0
1230 %a = load <4 x i16>,<4 x i16> *%i,align 1
1231 %x = sext <4 x i16> %a to <4 x i64>
1235 define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
1236 ; KNL-LABEL: zext_8x16mem_to_8x64:
1238 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1239 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1240 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1241 ; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1244 ; SKX-LABEL: zext_8x16mem_to_8x64:
1246 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1247 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1248 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1251 ; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x64:
1252 ; AVX512DQNOBW: # %bb.0:
1253 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1254 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1255 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1256 ; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1257 ; AVX512DQNOBW-NEXT: retq
1258 %a = load <8 x i16>,<8 x i16> *%i,align 1
1259 %x = zext <8 x i16> %a to <8 x i64>
1260 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1264 define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
1265 ; KNL-LABEL: sext_8x16mem_to_8x64mask:
1267 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1268 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1269 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1270 ; KNL-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1273 ; SKX-LABEL: sext_8x16mem_to_8x64mask:
1275 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1276 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1277 ; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1280 ; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x64mask:
1281 ; AVX512DQNOBW: # %bb.0:
1282 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1283 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1284 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1285 ; AVX512DQNOBW-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1286 ; AVX512DQNOBW-NEXT: retq
1287 %a = load <8 x i16>,<8 x i16> *%i,align 1
1288 %x = sext <8 x i16> %a to <8 x i64>
1289 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1293 define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
1294 ; ALL-LABEL: sext_8x16mem_to_8x64:
1296 ; ALL-NEXT: vpmovsxwq (%rdi), %zmm0
1298 %a = load <8 x i16>,<8 x i16> *%i,align 1
1299 %x = sext <8 x i16> %a to <8 x i64>
1303 define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
1304 ; KNL-LABEL: zext_8x16_to_8x64mask:
1306 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
1307 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
1308 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
1309 ; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1312 ; SKX-LABEL: zext_8x16_to_8x64mask:
1314 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
1315 ; SKX-NEXT: vpmovw2m %xmm1, %k1
1316 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1319 ; AVX512DQNOBW-LABEL: zext_8x16_to_8x64mask:
1320 ; AVX512DQNOBW: # %bb.0:
1321 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
1322 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
1323 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
1324 ; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1325 ; AVX512DQNOBW-NEXT: retq
1326 %x = zext <8 x i16> %a to <8 x i64>
1327 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1331 define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
1332 ; ALL-LABEL: zext_8x16_to_8x64:
1334 ; ALL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1336 %ret = zext <8 x i16> %a to <8 x i64>
1340 define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
1341 ; KNL-LABEL: zext_2x32mem_to_2x64:
1343 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1344 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1345 ; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1346 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1347 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1348 ; KNL-NEXT: vzeroupper
1351 ; AVX512DQ-LABEL: zext_2x32mem_to_2x64:
1352 ; AVX512DQ: # %bb.0:
1353 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1354 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1355 ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero
1356 ; AVX512DQ-NEXT: retq
1357 %a = load <2 x i32>,<2 x i32> *%i,align 1
1358 %x = zext <2 x i32> %a to <2 x i64>
1359 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1363 define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
1364 ; KNL-LABEL: sext_2x32mem_to_2x64mask:
1366 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1367 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1368 ; KNL-NEXT: vpmovsxdq (%rdi), %xmm0
1369 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1370 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1371 ; KNL-NEXT: vzeroupper
1374 ; AVX512DQ-LABEL: sext_2x32mem_to_2x64mask:
1375 ; AVX512DQ: # %bb.0:
1376 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1377 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1378 ; AVX512DQ-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z}
1379 ; AVX512DQ-NEXT: retq
1380 %a = load <2 x i32>,<2 x i32> *%i,align 1
1381 %x = sext <2 x i32> %a to <2 x i64>
1382 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1386 define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
1387 ; ALL-LABEL: sext_2x32mem_to_2x64:
1389 ; ALL-NEXT: vpmovsxdq (%rdi), %xmm0
1391 %a = load <2 x i32>,<2 x i32> *%i,align 1
1392 %x = sext <2 x i32> %a to <2 x i64>
1396 define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
1397 ; KNL-LABEL: zext_4x32mem_to_4x64:
1399 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1400 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1401 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1402 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1403 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1406 ; AVX512DQ-LABEL: zext_4x32mem_to_4x64:
1407 ; AVX512DQ: # %bb.0:
1408 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1409 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1410 ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1411 ; AVX512DQ-NEXT: retq
1412 %a = load <4 x i32>,<4 x i32> *%i,align 1
1413 %x = zext <4 x i32> %a to <4 x i64>
1414 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1418 define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
1419 ; KNL-LABEL: sext_4x32mem_to_4x64mask:
1421 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1422 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1423 ; KNL-NEXT: vpmovsxdq (%rdi), %ymm0
1424 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1425 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1428 ; AVX512DQ-LABEL: sext_4x32mem_to_4x64mask:
1429 ; AVX512DQ: # %bb.0:
1430 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1431 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1432 ; AVX512DQ-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z}
1433 ; AVX512DQ-NEXT: retq
1434 %a = load <4 x i32>,<4 x i32> *%i,align 1
1435 %x = sext <4 x i32> %a to <4 x i64>
1436 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1440 define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
1441 ; ALL-LABEL: sext_4x32mem_to_4x64:
1443 ; ALL-NEXT: vpmovsxdq (%rdi), %ymm0
1445 %a = load <4 x i32>,<4 x i32> *%i,align 1
1446 %x = sext <4 x i32> %a to <4 x i64>
1450 define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
1451 ; ALL-LABEL: sext_4x32_to_4x64:
1453 ; ALL-NEXT: vpmovsxdq %xmm0, %ymm0
1455 %x = sext <4 x i32> %a to <4 x i64>
1459 define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
1460 ; KNL-LABEL: zext_4x32_to_4x64mask:
1462 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1
1463 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
1464 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1465 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1466 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1469 ; AVX512DQ-LABEL: zext_4x32_to_4x64mask:
1470 ; AVX512DQ: # %bb.0:
1471 ; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm1
1472 ; AVX512DQ-NEXT: vpmovd2m %xmm1, %k1
1473 ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1474 ; AVX512DQ-NEXT: retq
1475 %x = zext <4 x i32> %a to <4 x i64>
1476 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1480 define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
1481 ; KNL-LABEL: zext_8x32mem_to_8x64:
1483 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1484 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1485 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1486 ; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1489 ; SKX-LABEL: zext_8x32mem_to_8x64:
1491 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1492 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1493 ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1496 ; AVX512DQNOBW-LABEL: zext_8x32mem_to_8x64:
1497 ; AVX512DQNOBW: # %bb.0:
1498 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1499 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1500 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1501 ; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1502 ; AVX512DQNOBW-NEXT: retq
1503 %a = load <8 x i32>,<8 x i32> *%i,align 1
1504 %x = zext <8 x i32> %a to <8 x i64>
1505 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1509 define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
1510 ; KNL-LABEL: sext_8x32mem_to_8x64mask:
1512 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1513 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1514 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1515 ; KNL-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1518 ; SKX-LABEL: sext_8x32mem_to_8x64mask:
1520 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1521 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1522 ; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1525 ; AVX512DQNOBW-LABEL: sext_8x32mem_to_8x64mask:
1526 ; AVX512DQNOBW: # %bb.0:
1527 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1528 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1529 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1530 ; AVX512DQNOBW-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1531 ; AVX512DQNOBW-NEXT: retq
1532 %a = load <8 x i32>,<8 x i32> *%i,align 1
1533 %x = sext <8 x i32> %a to <8 x i64>
1534 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1538 define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
1539 ; ALL-LABEL: sext_8x32mem_to_8x64:
1541 ; ALL-NEXT: vpmovsxdq (%rdi), %zmm0
1543 %a = load <8 x i32>,<8 x i32> *%i,align 1
1544 %x = sext <8 x i32> %a to <8 x i64>
1548 define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
1549 ; ALL-LABEL: sext_8x32_to_8x64:
1551 ; ALL-NEXT: vpmovsxdq %ymm0, %zmm0
1553 %x = sext <8 x i32> %a to <8 x i64>
1557 define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
1558 ; KNL-LABEL: zext_8x32_to_8x64mask:
1560 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
1561 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
1562 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
1563 ; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1566 ; SKX-LABEL: zext_8x32_to_8x64mask:
1568 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
1569 ; SKX-NEXT: vpmovw2m %xmm1, %k1
1570 ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1573 ; AVX512DQNOBW-LABEL: zext_8x32_to_8x64mask:
1574 ; AVX512DQNOBW: # %bb.0:
1575 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
1576 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
1577 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
1578 ; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1579 ; AVX512DQNOBW-NEXT: retq
1580 %x = zext <8 x i32> %a to <8 x i64>
1581 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1584 define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
1585 ; ALL-LABEL: fptrunc_test:
1587 ; ALL-NEXT: vcvtpd2ps %zmm0, %ymm0
1589 %b = fptrunc <8 x double> %a to <8 x float>
1593 define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
1594 ; ALL-LABEL: fpext_test:
1596 ; ALL-NEXT: vcvtps2pd %ymm0, %zmm0
1598 %b = fpext <8 x float> %a to <8 x double>
1602 define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
1603 ; KNL-LABEL: zext_16i1_to_16xi32:
1605 ; KNL-NEXT: kmovw %edi, %k1
1606 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1607 ; KNL-NEXT: vpsrld $31, %zmm0, %zmm0
1610 ; SKX-LABEL: zext_16i1_to_16xi32:
1612 ; SKX-NEXT: kmovd %edi, %k0
1613 ; SKX-NEXT: vpmovm2d %k0, %zmm0
1614 ; SKX-NEXT: vpsrld $31, %zmm0, %zmm0
1617 ; AVX512DQNOBW-LABEL: zext_16i1_to_16xi32:
1618 ; AVX512DQNOBW: # %bb.0:
1619 ; AVX512DQNOBW-NEXT: kmovw %edi, %k0
1620 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm0
1621 ; AVX512DQNOBW-NEXT: vpsrld $31, %zmm0, %zmm0
1622 ; AVX512DQNOBW-NEXT: retq
1623 %a = bitcast i16 %b to <16 x i1>
1624 %c = zext <16 x i1> %a to <16 x i32>
1628 define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
1629 ; KNL-LABEL: zext_8i1_to_8xi64:
1631 ; KNL-NEXT: kmovw %edi, %k1
1632 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1633 ; KNL-NEXT: vpsrlq $63, %zmm0, %zmm0
1636 ; SKX-LABEL: zext_8i1_to_8xi64:
1638 ; SKX-NEXT: kmovd %edi, %k0
1639 ; SKX-NEXT: vpmovm2q %k0, %zmm0
1640 ; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0
1643 ; AVX512DQNOBW-LABEL: zext_8i1_to_8xi64:
1644 ; AVX512DQNOBW: # %bb.0:
1645 ; AVX512DQNOBW-NEXT: kmovw %edi, %k0
1646 ; AVX512DQNOBW-NEXT: vpmovm2q %k0, %zmm0
1647 ; AVX512DQNOBW-NEXT: vpsrlq $63, %zmm0, %zmm0
1648 ; AVX512DQNOBW-NEXT: retq
1649 %a = bitcast i8 %b to <8 x i1>
1650 %c = zext <8 x i1> %a to <8 x i64>
1654 define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
1655 ; ALL-LABEL: trunc_16i8_to_16i1:
1657 ; ALL-NEXT: vpsllw $7, %xmm0, %xmm0
1658 ; ALL-NEXT: vpmovmskb %xmm0, %eax
1659 ; ALL-NEXT: # kill: def $ax killed $ax killed $eax
1661 %mask_b = trunc <16 x i8>%a to <16 x i1>
1662 %mask = bitcast <16 x i1> %mask_b to i16
1666 define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
1667 ; KNL-LABEL: trunc_16i32_to_16i1:
1669 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1670 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1671 ; KNL-NEXT: kmovw %k0, %eax
1672 ; KNL-NEXT: # kill: def $ax killed $ax killed $eax
1673 ; KNL-NEXT: vzeroupper
1676 ; SKX-LABEL: trunc_16i32_to_16i1:
1678 ; SKX-NEXT: vpslld $31, %zmm0, %zmm0
1679 ; SKX-NEXT: vpmovd2m %zmm0, %k0
1680 ; SKX-NEXT: kmovd %k0, %eax
1681 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
1682 ; SKX-NEXT: vzeroupper
1685 ; AVX512DQNOBW-LABEL: trunc_16i32_to_16i1:
1686 ; AVX512DQNOBW: # %bb.0:
1687 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
1688 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k0
1689 ; AVX512DQNOBW-NEXT: kmovw %k0, %eax
1690 ; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax
1691 ; AVX512DQNOBW-NEXT: vzeroupper
1692 ; AVX512DQNOBW-NEXT: retq
1693 %mask_b = trunc <16 x i32>%a to <16 x i1>
1694 %mask = bitcast <16 x i1> %mask_b to i16
1698 define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
1699 ; ALL-LABEL: trunc_4i32_to_4i1:
1701 ; ALL-NEXT: vpand %xmm1, %xmm0, %xmm0
1702 ; ALL-NEXT: vpslld $31, %xmm0, %xmm0
1703 ; ALL-NEXT: vpsrad $31, %xmm0, %xmm0
1705 %mask_a = trunc <4 x i32>%a to <4 x i1>
1706 %mask_b = trunc <4 x i32>%b to <4 x i1>
1707 %a_and_b = and <4 x i1>%mask_a, %mask_b
1708 %res = sext <4 x i1>%a_and_b to <4 x i32>
1713 define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
1714 ; KNL-LABEL: trunc_8i16_to_8i1:
1716 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1717 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1718 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1719 ; KNL-NEXT: kmovw %k0, %eax
1720 ; KNL-NEXT: # kill: def $al killed $al killed $eax
1721 ; KNL-NEXT: vzeroupper
1724 ; SKX-LABEL: trunc_8i16_to_8i1:
1726 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1727 ; SKX-NEXT: vpmovw2m %xmm0, %k0
1728 ; SKX-NEXT: kmovd %k0, %eax
1729 ; SKX-NEXT: # kill: def $al killed $al killed $eax
1732 ; AVX512DQNOBW-LABEL: trunc_8i16_to_8i1:
1733 ; AVX512DQNOBW: # %bb.0:
1734 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1735 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1736 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k0
1737 ; AVX512DQNOBW-NEXT: kmovw %k0, %eax
1738 ; AVX512DQNOBW-NEXT: # kill: def $al killed $al killed $eax
1739 ; AVX512DQNOBW-NEXT: vzeroupper
1740 ; AVX512DQNOBW-NEXT: retq
1741 %mask_b = trunc <8 x i16>%a to <8 x i1>
1742 %mask = bitcast <8 x i1> %mask_b to i8
1746 define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1747 ; KNL-LABEL: sext_8i1_8i32:
1749 ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1750 ; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
1751 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1754 ; AVX512DQ-LABEL: sext_8i1_8i32:
1755 ; AVX512DQ: # %bb.0:
1756 ; AVX512DQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1757 ; AVX512DQ-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0
1758 ; AVX512DQ-NEXT: retq
1759 %x = icmp slt <8 x i32> %a1, %a2
1760 %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
1761 %y = sext <8 x i1> %x1 to <8 x i32>
1766 define i16 @trunc_i32_to_i1(i32 %a) {
1767 ; KNL-LABEL: trunc_i32_to_i1:
1769 ; KNL-NEXT: movw $-4, %ax
1770 ; KNL-NEXT: kmovw %eax, %k0
1771 ; KNL-NEXT: kshiftrw $1, %k0, %k0
1772 ; KNL-NEXT: kshiftlw $1, %k0, %k0
1773 ; KNL-NEXT: andl $1, %edi
1774 ; KNL-NEXT: kmovw %edi, %k1
1775 ; KNL-NEXT: korw %k1, %k0, %k0
1776 ; KNL-NEXT: kmovw %k0, %eax
1777 ; KNL-NEXT: # kill: def $ax killed $ax killed $eax
1780 ; SKX-LABEL: trunc_i32_to_i1:
1782 ; SKX-NEXT: movw $-4, %ax
1783 ; SKX-NEXT: kmovd %eax, %k0
1784 ; SKX-NEXT: kshiftrw $1, %k0, %k0
1785 ; SKX-NEXT: kshiftlw $1, %k0, %k0
1786 ; SKX-NEXT: andl $1, %edi
1787 ; SKX-NEXT: kmovw %edi, %k1
1788 ; SKX-NEXT: korw %k1, %k0, %k0
1789 ; SKX-NEXT: kmovd %k0, %eax
1790 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
1793 ; AVX512DQNOBW-LABEL: trunc_i32_to_i1:
1794 ; AVX512DQNOBW: # %bb.0:
1795 ; AVX512DQNOBW-NEXT: movw $-4, %ax
1796 ; AVX512DQNOBW-NEXT: kmovw %eax, %k0
1797 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
1798 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
1799 ; AVX512DQNOBW-NEXT: andl $1, %edi
1800 ; AVX512DQNOBW-NEXT: kmovw %edi, %k1
1801 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
1802 ; AVX512DQNOBW-NEXT: kmovw %k0, %eax
1803 ; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax
1804 ; AVX512DQNOBW-NEXT: retq
1805 %a_i = trunc i32 %a to i1
1806 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
1807 %res = bitcast <16 x i1> %maskv to i16
1811 define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1812 ; KNL-LABEL: sext_8i1_8i16:
1814 ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1815 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
1816 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1817 ; KNL-NEXT: vzeroupper
1820 ; SKX-LABEL: sext_8i1_8i16:
1822 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
1823 ; SKX-NEXT: vpmovm2w %k0, %xmm0
1824 ; SKX-NEXT: vzeroupper
1827 ; AVX512DQNOBW-LABEL: sext_8i1_8i16:
1828 ; AVX512DQNOBW: # %bb.0:
1829 ; AVX512DQNOBW-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
1830 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %ymm0
1831 ; AVX512DQNOBW-NEXT: vpmovdw %ymm0, %xmm0
1832 ; AVX512DQNOBW-NEXT: vzeroupper
1833 ; AVX512DQNOBW-NEXT: retq
1834 %x = icmp slt <8 x i32> %a1, %a2
1835 %y = sext <8 x i1> %x to <8 x i16>
1839 define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
1840 ; KNL-LABEL: sext_16i1_16i32:
1842 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1843 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1846 ; AVX512DQ-LABEL: sext_16i1_16i32:
1847 ; AVX512DQ: # %bb.0:
1848 ; AVX512DQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
1849 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1850 ; AVX512DQ-NEXT: retq
1851 %x = icmp slt <16 x i32> %a1, %a2
1852 %y = sext <16 x i1> %x to <16 x i32>
1856 define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1857 ; KNL-LABEL: sext_8i1_8i64:
1859 ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1860 ; KNL-NEXT: vpmovsxdq %ymm0, %zmm0
1863 ; AVX512DQ-LABEL: sext_8i1_8i64:
1864 ; AVX512DQ: # %bb.0:
1865 ; AVX512DQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
1866 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
1867 ; AVX512DQ-NEXT: retq
1868 %x = icmp slt <8 x i32> %a1, %a2
1869 %y = sext <8 x i1> %x to <8 x i64>
1873 define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
1874 ; ALL-LABEL: extload_v8i64:
1876 ; ALL-NEXT: vpmovsxbq (%rdi), %zmm0
1877 ; ALL-NEXT: vmovdqa64 %zmm0, (%rsi)
1878 ; ALL-NEXT: vzeroupper
1880 %sign_load = load <8 x i8>, <8 x i8>* %a
1881 %c = sext <8 x i8> %sign_load to <8 x i64>
1882 store <8 x i64> %c, <8 x i64>* %res
1886 define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
1887 ; KNL-LABEL: test21:
1889 ; KNL-NEXT: kmovw %edi, %k0
1890 ; KNL-NEXT: kshiftlw $15, %k0, %k0
1891 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1892 ; KNL-NEXT: kshiftlw $2, %k0, %k2
1893 ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1894 ; KNL-NEXT: kmovw %esi, %k1
1895 ; KNL-NEXT: kshiftlw $1, %k1, %k1
1896 ; KNL-NEXT: korw %k1, %k2, %k1
1897 ; KNL-NEXT: korw %k1, %k0, %k0
1898 ; KNL-NEXT: kshiftlw $14, %k0, %k0
1899 ; KNL-NEXT: kshiftrw $14, %k0, %k0
1900 ; KNL-NEXT: kshiftlw $3, %k0, %k3
1901 ; KNL-NEXT: kmovw %edx, %k1
1902 ; KNL-NEXT: kshiftlw $2, %k1, %k1
1903 ; KNL-NEXT: korw %k1, %k3, %k1
1904 ; KNL-NEXT: korw %k1, %k0, %k0
1905 ; KNL-NEXT: kshiftlw $13, %k0, %k0
1906 ; KNL-NEXT: kshiftrw $13, %k0, %k0
1907 ; KNL-NEXT: kshiftlw $4, %k0, %k4
1908 ; KNL-NEXT: kmovw %ecx, %k1
1909 ; KNL-NEXT: kshiftlw $3, %k1, %k1
1910 ; KNL-NEXT: korw %k1, %k4, %k1
1911 ; KNL-NEXT: korw %k1, %k0, %k0
1912 ; KNL-NEXT: kshiftlw $12, %k0, %k0
1913 ; KNL-NEXT: kshiftrw $12, %k0, %k0
1914 ; KNL-NEXT: kshiftlw $5, %k0, %k5
1915 ; KNL-NEXT: kmovw %r8d, %k1
1916 ; KNL-NEXT: kshiftlw $4, %k1, %k1
1917 ; KNL-NEXT: korw %k1, %k5, %k1
1918 ; KNL-NEXT: korw %k1, %k0, %k0
1919 ; KNL-NEXT: kshiftlw $11, %k0, %k0
1920 ; KNL-NEXT: kshiftrw $11, %k0, %k0
1921 ; KNL-NEXT: kshiftlw $6, %k0, %k6
1922 ; KNL-NEXT: kmovw %r9d, %k1
1923 ; KNL-NEXT: kshiftlw $5, %k1, %k1
1924 ; KNL-NEXT: korw %k1, %k6, %k1
1925 ; KNL-NEXT: korw %k1, %k0, %k0
1926 ; KNL-NEXT: kshiftlw $10, %k0, %k0
1927 ; KNL-NEXT: kshiftrw $10, %k0, %k0
1928 ; KNL-NEXT: kshiftlw $7, %k0, %k7
1929 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1930 ; KNL-NEXT: kmovw %eax, %k1
1931 ; KNL-NEXT: kshiftlw $6, %k1, %k1
1932 ; KNL-NEXT: korw %k1, %k7, %k1
1933 ; KNL-NEXT: korw %k1, %k0, %k0
1934 ; KNL-NEXT: kshiftlw $9, %k0, %k0
1935 ; KNL-NEXT: kshiftrw $9, %k0, %k0
1936 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1937 ; KNL-NEXT: kmovw %eax, %k1
1938 ; KNL-NEXT: kshiftlw $7, %k1, %k1
1939 ; KNL-NEXT: kshiftlw $8, %k0, %k2
1940 ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1941 ; KNL-NEXT: korw %k1, %k2, %k1
1942 ; KNL-NEXT: korw %k1, %k0, %k0
1943 ; KNL-NEXT: kshiftlw $8, %k0, %k0
1944 ; KNL-NEXT: kshiftrw $8, %k0, %k0
1945 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1946 ; KNL-NEXT: kmovw %eax, %k1
1947 ; KNL-NEXT: kshiftlw $8, %k1, %k1
1948 ; KNL-NEXT: kshiftlw $9, %k0, %k2
1949 ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1950 ; KNL-NEXT: korw %k1, %k2, %k1
1951 ; KNL-NEXT: korw %k1, %k0, %k0
1952 ; KNL-NEXT: kshiftlw $7, %k0, %k0
1953 ; KNL-NEXT: kshiftrw $7, %k0, %k0
1954 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1955 ; KNL-NEXT: kmovw %eax, %k1
1956 ; KNL-NEXT: kshiftlw $9, %k1, %k1
1957 ; KNL-NEXT: kshiftlw $10, %k0, %k2
1958 ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1959 ; KNL-NEXT: korw %k1, %k2, %k1
1960 ; KNL-NEXT: korw %k1, %k0, %k0
1961 ; KNL-NEXT: kshiftlw $6, %k0, %k0
1962 ; KNL-NEXT: kshiftrw $6, %k0, %k0
1963 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1964 ; KNL-NEXT: kmovw %eax, %k1
1965 ; KNL-NEXT: kshiftlw $10, %k1, %k1
1966 ; KNL-NEXT: kshiftlw $11, %k0, %k2
1967 ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1968 ; KNL-NEXT: korw %k1, %k2, %k1
1969 ; KNL-NEXT: korw %k1, %k0, %k0
1970 ; KNL-NEXT: kshiftlw $5, %k0, %k0
1971 ; KNL-NEXT: kshiftrw $5, %k0, %k0
1972 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1973 ; KNL-NEXT: kmovw %eax, %k1
1974 ; KNL-NEXT: kshiftlw $11, %k1, %k1
1975 ; KNL-NEXT: kshiftlw $12, %k0, %k2
1976 ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1977 ; KNL-NEXT: korw %k1, %k2, %k1
1978 ; KNL-NEXT: korw %k1, %k0, %k0
1979 ; KNL-NEXT: kshiftlw $4, %k0, %k0
1980 ; KNL-NEXT: kshiftrw $4, %k0, %k0
1981 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1982 ; KNL-NEXT: kmovw %eax, %k1
1983 ; KNL-NEXT: kshiftlw $12, %k1, %k1
1984 ; KNL-NEXT: kshiftlw $13, %k0, %k2
1985 ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1986 ; KNL-NEXT: korw %k1, %k2, %k1
1987 ; KNL-NEXT: korw %k1, %k0, %k0
1988 ; KNL-NEXT: kshiftlw $3, %k0, %k0
1989 ; KNL-NEXT: kshiftrw $3, %k0, %k0
1990 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1991 ; KNL-NEXT: kmovw %eax, %k1
1992 ; KNL-NEXT: kshiftlw $13, %k1, %k1
1993 ; KNL-NEXT: kshiftlw $14, %k0, %k2
1994 ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1995 ; KNL-NEXT: korw %k1, %k2, %k1
1996 ; KNL-NEXT: korw %k1, %k0, %k0
1997 ; KNL-NEXT: kshiftlw $2, %k0, %k0
1998 ; KNL-NEXT: kshiftrw $2, %k0, %k1
1999 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2000 ; KNL-NEXT: kmovw %eax, %k0
2001 ; KNL-NEXT: kshiftlw $14, %k0, %k0
2002 ; KNL-NEXT: kshiftlw $15, %k0, %k2
2003 ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2004 ; KNL-NEXT: korw %k0, %k2, %k0
2005 ; KNL-NEXT: korw %k0, %k1, %k0
2006 ; KNL-NEXT: kshiftlw $1, %k0, %k0
2007 ; KNL-NEXT: kshiftrw $1, %k0, %k0
2008 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2009 ; KNL-NEXT: kmovw %eax, %k1
2010 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2011 ; KNL-NEXT: korw %k1, %k0, %k1
2012 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2013 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2014 ; KNL-NEXT: kmovw %eax, %k0
2015 ; KNL-NEXT: kshiftlw $15, %k0, %k0
2016 ; KNL-NEXT: kshiftrw $15, %k0, %k0
2017 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2018 ; KNL-NEXT: kmovw %eax, %k1
2019 ; KNL-NEXT: kshiftlw $1, %k1, %k1
2020 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2021 ; KNL-NEXT: korw %k1, %k2, %k1
2022 ; KNL-NEXT: korw %k1, %k0, %k0
2023 ; KNL-NEXT: kshiftlw $14, %k0, %k0
2024 ; KNL-NEXT: kshiftrw $14, %k0, %k0
2025 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2026 ; KNL-NEXT: kmovw %eax, %k1
2027 ; KNL-NEXT: kshiftlw $2, %k1, %k1
2028 ; KNL-NEXT: kmovw %k3, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2029 ; KNL-NEXT: korw %k1, %k3, %k1
2030 ; KNL-NEXT: korw %k1, %k0, %k0
2031 ; KNL-NEXT: kshiftlw $13, %k0, %k0
2032 ; KNL-NEXT: kshiftrw $13, %k0, %k0
2033 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2034 ; KNL-NEXT: kmovw %eax, %k1
2035 ; KNL-NEXT: kshiftlw $3, %k1, %k1
2036 ; KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2037 ; KNL-NEXT: korw %k1, %k4, %k1
2038 ; KNL-NEXT: korw %k1, %k0, %k0
2039 ; KNL-NEXT: kshiftlw $12, %k0, %k0
2040 ; KNL-NEXT: kshiftrw $12, %k0, %k0
2041 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2042 ; KNL-NEXT: kmovw %eax, %k1
2043 ; KNL-NEXT: kshiftlw $4, %k1, %k1
2044 ; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2045 ; KNL-NEXT: korw %k1, %k5, %k1
2046 ; KNL-NEXT: korw %k1, %k0, %k0
2047 ; KNL-NEXT: kshiftlw $11, %k0, %k0
2048 ; KNL-NEXT: kshiftrw $11, %k0, %k0
2049 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2050 ; KNL-NEXT: kmovw %eax, %k1
2051 ; KNL-NEXT: kshiftlw $5, %k1, %k1
2052 ; KNL-NEXT: korw %k1, %k6, %k1
2053 ; KNL-NEXT: korw %k1, %k0, %k0
2054 ; KNL-NEXT: kshiftlw $10, %k0, %k0
2055 ; KNL-NEXT: kshiftrw $10, %k0, %k0
2056 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2057 ; KNL-NEXT: kmovw %eax, %k1
2058 ; KNL-NEXT: kshiftlw $6, %k1, %k1
2059 ; KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2060 ; KNL-NEXT: korw %k1, %k7, %k1
2061 ; KNL-NEXT: korw %k1, %k0, %k0
2062 ; KNL-NEXT: kshiftlw $9, %k0, %k0
2063 ; KNL-NEXT: kshiftrw $9, %k0, %k0
2064 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2065 ; KNL-NEXT: kmovw %eax, %k1
2066 ; KNL-NEXT: kshiftlw $7, %k1, %k1
2067 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2068 ; KNL-NEXT: korw %k1, %k2, %k1
2069 ; KNL-NEXT: korw %k1, %k0, %k0
2070 ; KNL-NEXT: kshiftlw $8, %k0, %k0
2071 ; KNL-NEXT: kshiftrw $8, %k0, %k0
2072 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2073 ; KNL-NEXT: kmovw %eax, %k1
2074 ; KNL-NEXT: kshiftlw $8, %k1, %k1
2075 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2076 ; KNL-NEXT: korw %k1, %k2, %k1
2077 ; KNL-NEXT: korw %k1, %k0, %k0
2078 ; KNL-NEXT: kshiftlw $7, %k0, %k0
2079 ; KNL-NEXT: kshiftrw $7, %k0, %k0
2080 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2081 ; KNL-NEXT: kmovw %eax, %k1
2082 ; KNL-NEXT: kshiftlw $9, %k1, %k1
2083 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2084 ; KNL-NEXT: korw %k1, %k2, %k1
2085 ; KNL-NEXT: korw %k1, %k0, %k0
2086 ; KNL-NEXT: kshiftlw $6, %k0, %k0
2087 ; KNL-NEXT: kshiftrw $6, %k0, %k0
2088 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2089 ; KNL-NEXT: kmovw %eax, %k1
2090 ; KNL-NEXT: kshiftlw $10, %k1, %k1
2091 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2092 ; KNL-NEXT: korw %k1, %k2, %k1
2093 ; KNL-NEXT: korw %k1, %k0, %k0
2094 ; KNL-NEXT: kshiftlw $5, %k0, %k0
2095 ; KNL-NEXT: kshiftrw $5, %k0, %k0
2096 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2097 ; KNL-NEXT: kmovw %eax, %k1
2098 ; KNL-NEXT: kshiftlw $11, %k1, %k1
2099 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2100 ; KNL-NEXT: korw %k1, %k2, %k1
2101 ; KNL-NEXT: korw %k1, %k0, %k0
2102 ; KNL-NEXT: kshiftlw $4, %k0, %k0
2103 ; KNL-NEXT: kshiftrw $4, %k0, %k0
2104 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2105 ; KNL-NEXT: kmovw %eax, %k1
2106 ; KNL-NEXT: kshiftlw $12, %k1, %k1
2107 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2108 ; KNL-NEXT: korw %k1, %k2, %k1
2109 ; KNL-NEXT: korw %k1, %k0, %k0
2110 ; KNL-NEXT: kshiftlw $3, %k0, %k0
2111 ; KNL-NEXT: kshiftrw $3, %k0, %k0
2112 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2113 ; KNL-NEXT: kmovw %eax, %k1
2114 ; KNL-NEXT: kshiftlw $13, %k1, %k1
2115 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2116 ; KNL-NEXT: korw %k1, %k2, %k1
2117 ; KNL-NEXT: korw %k1, %k0, %k0
2118 ; KNL-NEXT: kshiftlw $2, %k0, %k0
2119 ; KNL-NEXT: kshiftrw $2, %k0, %k0
2120 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2121 ; KNL-NEXT: kmovw %eax, %k1
2122 ; KNL-NEXT: kshiftlw $14, %k1, %k1
2123 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2124 ; KNL-NEXT: korw %k1, %k2, %k1
2125 ; KNL-NEXT: korw %k1, %k0, %k0
2126 ; KNL-NEXT: kshiftlw $1, %k0, %k0
2127 ; KNL-NEXT: kshiftrw $1, %k0, %k0
2128 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2129 ; KNL-NEXT: kmovw %eax, %k1
2130 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2131 ; KNL-NEXT: korw %k1, %k0, %k1
2132 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2133 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2134 ; KNL-NEXT: kmovw %eax, %k0
2135 ; KNL-NEXT: kshiftlw $15, %k0, %k0
2136 ; KNL-NEXT: kshiftrw $15, %k0, %k0
2137 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2138 ; KNL-NEXT: kmovw %eax, %k1
2139 ; KNL-NEXT: kshiftlw $1, %k1, %k1
2140 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2141 ; KNL-NEXT: korw %k1, %k2, %k1
2142 ; KNL-NEXT: korw %k1, %k0, %k0
2143 ; KNL-NEXT: kshiftlw $14, %k0, %k0
2144 ; KNL-NEXT: kshiftrw $14, %k0, %k0
2145 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2146 ; KNL-NEXT: kmovw %eax, %k1
2147 ; KNL-NEXT: kshiftlw $2, %k1, %k1
2148 ; KNL-NEXT: korw %k1, %k3, %k1
2149 ; KNL-NEXT: korw %k1, %k0, %k0
2150 ; KNL-NEXT: kshiftlw $13, %k0, %k0
2151 ; KNL-NEXT: kshiftrw $13, %k0, %k0
2152 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2153 ; KNL-NEXT: kmovw %eax, %k1
2154 ; KNL-NEXT: kshiftlw $3, %k1, %k1
2155 ; KNL-NEXT: korw %k1, %k4, %k1
2156 ; KNL-NEXT: korw %k1, %k0, %k0
2157 ; KNL-NEXT: kshiftlw $12, %k0, %k0
2158 ; KNL-NEXT: kshiftrw $12, %k0, %k0
2159 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2160 ; KNL-NEXT: kmovw %eax, %k1
2161 ; KNL-NEXT: kshiftlw $4, %k1, %k1
2162 ; KNL-NEXT: korw %k1, %k5, %k1
2163 ; KNL-NEXT: korw %k1, %k0, %k0
2164 ; KNL-NEXT: kshiftlw $11, %k0, %k0
2165 ; KNL-NEXT: kshiftrw $11, %k0, %k0
2166 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2167 ; KNL-NEXT: kmovw %eax, %k1
2168 ; KNL-NEXT: kshiftlw $5, %k1, %k1
2169 ; KNL-NEXT: korw %k1, %k6, %k1
2170 ; KNL-NEXT: korw %k1, %k0, %k0
2171 ; KNL-NEXT: kshiftlw $10, %k0, %k0
2172 ; KNL-NEXT: kshiftrw $10, %k0, %k0
2173 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2174 ; KNL-NEXT: kmovw %eax, %k1
2175 ; KNL-NEXT: kshiftlw $6, %k1, %k1
2176 ; KNL-NEXT: korw %k1, %k7, %k1
2177 ; KNL-NEXT: korw %k1, %k0, %k0
2178 ; KNL-NEXT: kshiftlw $9, %k0, %k0
2179 ; KNL-NEXT: kshiftrw $9, %k0, %k0
2180 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2181 ; KNL-NEXT: kmovw %eax, %k1
2182 ; KNL-NEXT: kshiftlw $7, %k1, %k1
2183 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2184 ; KNL-NEXT: korw %k1, %k2, %k1
2185 ; KNL-NEXT: korw %k1, %k0, %k0
2186 ; KNL-NEXT: kshiftlw $8, %k0, %k0
2187 ; KNL-NEXT: kshiftrw $8, %k0, %k0
2188 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2189 ; KNL-NEXT: kmovw %eax, %k1
2190 ; KNL-NEXT: kshiftlw $8, %k1, %k1
2191 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2192 ; KNL-NEXT: korw %k1, %k3, %k1
2193 ; KNL-NEXT: korw %k1, %k0, %k0
2194 ; KNL-NEXT: kshiftlw $7, %k0, %k0
2195 ; KNL-NEXT: kshiftrw $7, %k0, %k0
2196 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2197 ; KNL-NEXT: kmovw %eax, %k1
2198 ; KNL-NEXT: kshiftlw $9, %k1, %k1
2199 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
2200 ; KNL-NEXT: korw %k1, %k4, %k1
2201 ; KNL-NEXT: korw %k1, %k0, %k0
2202 ; KNL-NEXT: kshiftlw $6, %k0, %k0
2203 ; KNL-NEXT: kshiftrw $6, %k0, %k0
2204 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2205 ; KNL-NEXT: kmovw %eax, %k1
2206 ; KNL-NEXT: kshiftlw $10, %k1, %k1
2207 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2208 ; KNL-NEXT: korw %k1, %k5, %k1
2209 ; KNL-NEXT: korw %k1, %k0, %k0
2210 ; KNL-NEXT: kshiftlw $5, %k0, %k0
2211 ; KNL-NEXT: kshiftrw $5, %k0, %k0
2212 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2213 ; KNL-NEXT: kmovw %eax, %k1
2214 ; KNL-NEXT: kshiftlw $11, %k1, %k1
2215 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2216 ; KNL-NEXT: korw %k1, %k7, %k1
2217 ; KNL-NEXT: korw %k1, %k0, %k0
2218 ; KNL-NEXT: kshiftlw $4, %k0, %k0
2219 ; KNL-NEXT: kshiftrw $4, %k0, %k0
2220 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2221 ; KNL-NEXT: kmovw %eax, %k1
2222 ; KNL-NEXT: kshiftlw $12, %k1, %k1
2223 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2224 ; KNL-NEXT: korw %k1, %k7, %k1
2225 ; KNL-NEXT: korw %k1, %k0, %k0
2226 ; KNL-NEXT: kshiftlw $3, %k0, %k0
2227 ; KNL-NEXT: kshiftrw $3, %k0, %k0
2228 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2229 ; KNL-NEXT: kmovw %eax, %k1
2230 ; KNL-NEXT: kshiftlw $13, %k1, %k1
2231 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2232 ; KNL-NEXT: korw %k1, %k7, %k1
2233 ; KNL-NEXT: korw %k1, %k0, %k0
2234 ; KNL-NEXT: kshiftlw $2, %k0, %k0
2235 ; KNL-NEXT: kshiftrw $2, %k0, %k0
2236 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2237 ; KNL-NEXT: kmovw %eax, %k1
2238 ; KNL-NEXT: kshiftlw $14, %k1, %k1
2239 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2240 ; KNL-NEXT: korw %k1, %k7, %k1
2241 ; KNL-NEXT: korw %k1, %k0, %k0
2242 ; KNL-NEXT: kshiftlw $1, %k0, %k0
2243 ; KNL-NEXT: kshiftrw $1, %k0, %k0
2244 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2245 ; KNL-NEXT: kmovw %eax, %k1
2246 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2247 ; KNL-NEXT: korw %k1, %k0, %k1
2248 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2249 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2250 ; KNL-NEXT: kmovw %eax, %k0
2251 ; KNL-NEXT: kshiftlw $1, %k0, %k0
2252 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2253 ; KNL-NEXT: korw %k0, %k1, %k0
2254 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2255 ; KNL-NEXT: kmovw %eax, %k7
2256 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2257 ; KNL-NEXT: kshiftrw $15, %k7, %k7
2258 ; KNL-NEXT: korw %k0, %k7, %k0
2259 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2260 ; KNL-NEXT: kmovw %eax, %k7
2261 ; KNL-NEXT: kshiftlw $2, %k7, %k7
2262 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2263 ; KNL-NEXT: korw %k7, %k1, %k7
2264 ; KNL-NEXT: kshiftlw $14, %k0, %k0
2265 ; KNL-NEXT: kshiftrw $14, %k0, %k0
2266 ; KNL-NEXT: korw %k7, %k0, %k0
2267 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2268 ; KNL-NEXT: kmovw %eax, %k7
2269 ; KNL-NEXT: kshiftlw $3, %k7, %k7
2270 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2271 ; KNL-NEXT: korw %k7, %k1, %k7
2272 ; KNL-NEXT: kshiftlw $13, %k0, %k0
2273 ; KNL-NEXT: kshiftrw $13, %k0, %k0
2274 ; KNL-NEXT: korw %k7, %k0, %k0
2275 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2276 ; KNL-NEXT: kmovw %eax, %k7
2277 ; KNL-NEXT: kshiftlw $4, %k7, %k7
2278 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2279 ; KNL-NEXT: korw %k7, %k1, %k7
2280 ; KNL-NEXT: kshiftlw $12, %k0, %k0
2281 ; KNL-NEXT: kshiftrw $12, %k0, %k0
2282 ; KNL-NEXT: korw %k7, %k0, %k0
2283 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2284 ; KNL-NEXT: kmovw %eax, %k7
2285 ; KNL-NEXT: kshiftlw $5, %k7, %k7
2286 ; KNL-NEXT: korw %k7, %k6, %k7
2287 ; KNL-NEXT: kshiftlw $11, %k0, %k0
2288 ; KNL-NEXT: kshiftrw $11, %k0, %k0
2289 ; KNL-NEXT: korw %k7, %k0, %k0
2290 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2291 ; KNL-NEXT: kmovw %eax, %k7
2292 ; KNL-NEXT: kshiftlw $6, %k7, %k7
2293 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2294 ; KNL-NEXT: korw %k7, %k1, %k7
2295 ; KNL-NEXT: kshiftlw $10, %k0, %k0
2296 ; KNL-NEXT: kshiftrw $10, %k0, %k0
2297 ; KNL-NEXT: korw %k7, %k0, %k0
2298 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2299 ; KNL-NEXT: kmovw %eax, %k7
2300 ; KNL-NEXT: kshiftlw $7, %k7, %k7
2301 ; KNL-NEXT: korw %k7, %k2, %k7
2302 ; KNL-NEXT: kshiftlw $9, %k0, %k0
2303 ; KNL-NEXT: kshiftrw $9, %k0, %k0
2304 ; KNL-NEXT: korw %k7, %k0, %k0
2305 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2306 ; KNL-NEXT: kmovw %eax, %k7
2307 ; KNL-NEXT: kshiftlw $8, %k7, %k7
2308 ; KNL-NEXT: korw %k7, %k3, %k7
2309 ; KNL-NEXT: kshiftlw $8, %k0, %k0
2310 ; KNL-NEXT: kshiftrw $8, %k0, %k0
2311 ; KNL-NEXT: korw %k7, %k0, %k0
2312 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2313 ; KNL-NEXT: kmovw %eax, %k7
2314 ; KNL-NEXT: kshiftlw $9, %k7, %k7
2315 ; KNL-NEXT: korw %k7, %k4, %k7
2316 ; KNL-NEXT: kshiftlw $7, %k0, %k0
2317 ; KNL-NEXT: kshiftrw $7, %k0, %k0
2318 ; KNL-NEXT: korw %k7, %k0, %k0
2319 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2320 ; KNL-NEXT: kmovw %eax, %k7
2321 ; KNL-NEXT: kshiftlw $10, %k7, %k7
2322 ; KNL-NEXT: korw %k7, %k5, %k6
2323 ; KNL-NEXT: kshiftlw $6, %k0, %k0
2324 ; KNL-NEXT: kshiftrw $6, %k0, %k0
2325 ; KNL-NEXT: korw %k6, %k0, %k0
2326 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2327 ; KNL-NEXT: kmovw %eax, %k6
2328 ; KNL-NEXT: kshiftlw $11, %k6, %k6
2329 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2330 ; KNL-NEXT: korw %k6, %k2, %k5
2331 ; KNL-NEXT: kshiftlw $5, %k0, %k0
2332 ; KNL-NEXT: kshiftrw $5, %k0, %k0
2333 ; KNL-NEXT: korw %k5, %k0, %k0
2334 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2335 ; KNL-NEXT: kmovw %eax, %k5
2336 ; KNL-NEXT: kshiftlw $12, %k5, %k5
2337 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2338 ; KNL-NEXT: korw %k5, %k2, %k4
2339 ; KNL-NEXT: kshiftlw $4, %k0, %k0
2340 ; KNL-NEXT: kshiftrw $4, %k0, %k0
2341 ; KNL-NEXT: korw %k4, %k0, %k0
2342 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2343 ; KNL-NEXT: kmovw %eax, %k4
2344 ; KNL-NEXT: kshiftlw $13, %k4, %k4
2345 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2346 ; KNL-NEXT: korw %k4, %k2, %k3
2347 ; KNL-NEXT: kshiftlw $3, %k0, %k0
2348 ; KNL-NEXT: kshiftrw $3, %k0, %k0
2349 ; KNL-NEXT: korw %k3, %k0, %k0
2350 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2351 ; KNL-NEXT: kmovw %eax, %k3
2352 ; KNL-NEXT: kshiftlw $14, %k3, %k3
2353 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2354 ; KNL-NEXT: korw %k3, %k2, %k2
2355 ; KNL-NEXT: kshiftlw $2, %k0, %k0
2356 ; KNL-NEXT: kshiftrw $2, %k0, %k0
2357 ; KNL-NEXT: korw %k2, %k0, %k0
2358 ; KNL-NEXT: kshiftlw $1, %k0, %k0
2359 ; KNL-NEXT: kshiftrw $1, %k0, %k0
2360 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2361 ; KNL-NEXT: kmovw %eax, %k2
2362 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2363 ; KNL-NEXT: korw %k2, %k0, %k2
2364 ; KNL-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z}
2365 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2366 ; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z}
2367 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2368 ; KNL-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k1} {z}
2369 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2370 ; KNL-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7 {%k1} {z}
2371 ; KNL-NEXT: vpmovdw %zmm4, %ymm4
2372 ; KNL-NEXT: vpand %ymm1, %ymm4, %ymm1
2373 ; KNL-NEXT: vpmovdw %zmm5, %ymm4
2374 ; KNL-NEXT: vpand %ymm2, %ymm4, %ymm2
2375 ; KNL-NEXT: vpmovdw %zmm6, %ymm4
2376 ; KNL-NEXT: vpand %ymm3, %ymm4, %ymm3
2377 ; KNL-NEXT: vpmovdw %zmm7, %ymm4
2378 ; KNL-NEXT: vpand %ymm0, %ymm4, %ymm0
2381 ; SKX-LABEL: test21:
2383 ; SKX-NEXT: vpsllw $7, %zmm2, %zmm2
2384 ; SKX-NEXT: vpmovb2m %zmm2, %k1
2385 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
2386 ; SKX-NEXT: kshiftrq $32, %k1, %k1
2387 ; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z}
2390 ; AVX512DQNOBW-LABEL: test21:
2391 ; AVX512DQNOBW: # %bb.0:
2392 ; AVX512DQNOBW-NEXT: kmovw %edi, %k0
2393 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0
2394 ; AVX512DQNOBW-NEXT: kshiftrw $15, %k0, %k0
2395 ; AVX512DQNOBW-NEXT: kshiftlw $2, %k0, %k2
2396 ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2397 ; AVX512DQNOBW-NEXT: kmovw %esi, %k1
2398 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
2399 ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1
2400 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2401 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k0, %k0
2402 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0
2403 ; AVX512DQNOBW-NEXT: kshiftlw $3, %k0, %k3
2404 ; AVX512DQNOBW-NEXT: kmovw %edx, %k1
2405 ; AVX512DQNOBW-NEXT: kshiftlw $2, %k1, %k1
2406 ; AVX512DQNOBW-NEXT: korw %k1, %k3, %k1
2407 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2408 ; AVX512DQNOBW-NEXT: kshiftlw $13, %k0, %k0
2409 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k0
2410 ; AVX512DQNOBW-NEXT: kshiftlw $4, %k0, %k4
2411 ; AVX512DQNOBW-NEXT: kmovw %ecx, %k1
2412 ; AVX512DQNOBW-NEXT: kshiftlw $3, %k1, %k1
2413 ; AVX512DQNOBW-NEXT: korw %k1, %k4, %k1
2414 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2415 ; AVX512DQNOBW-NEXT: kshiftlw $12, %k0, %k0
2416 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k0, %k0
2417 ; AVX512DQNOBW-NEXT: kshiftlw $5, %k0, %k5
2418 ; AVX512DQNOBW-NEXT: kmovw %r8d, %k1
2419 ; AVX512DQNOBW-NEXT: kshiftlw $4, %k1, %k1
2420 ; AVX512DQNOBW-NEXT: korw %k1, %k5, %k1
2421 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2422 ; AVX512DQNOBW-NEXT: kshiftlw $11, %k0, %k0
2423 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k0, %k0
2424 ; AVX512DQNOBW-NEXT: kshiftlw $6, %k0, %k6
2425 ; AVX512DQNOBW-NEXT: kmovw %r9d, %k1
2426 ; AVX512DQNOBW-NEXT: kshiftlw $5, %k1, %k1
2427 ; AVX512DQNOBW-NEXT: korw %k1, %k6, %k1
2428 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2429 ; AVX512DQNOBW-NEXT: kshiftlw $10, %k0, %k0
2430 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k0, %k0
2431 ; AVX512DQNOBW-NEXT: kshiftlw $7, %k0, %k7
2432 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2433 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2434 ; AVX512DQNOBW-NEXT: kshiftlw $6, %k1, %k1
2435 ; AVX512DQNOBW-NEXT: korw %k1, %k7, %k1
2436 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2437 ; AVX512DQNOBW-NEXT: kshiftlw $9, %k0, %k0
2438 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k0, %k0
2439 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2440 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2441 ; AVX512DQNOBW-NEXT: kshiftlw $7, %k1, %k1
2442 ; AVX512DQNOBW-NEXT: kshiftlw $8, %k0, %k2
2443 ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2444 ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1
2445 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2446 ; AVX512DQNOBW-NEXT: kshiftlw $8, %k0, %k0
2447 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k0, %k0
2448 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2449 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2450 ; AVX512DQNOBW-NEXT: kshiftlw $8, %k1, %k1
2451 ; AVX512DQNOBW-NEXT: kshiftlw $9, %k0, %k2
2452 ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2453 ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1
2454 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2455 ; AVX512DQNOBW-NEXT: kshiftlw $7, %k0, %k0
2456 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k0, %k0
2457 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2458 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2459 ; AVX512DQNOBW-NEXT: kshiftlw $9, %k1, %k1
2460 ; AVX512DQNOBW-NEXT: kshiftlw $10, %k0, %k2
2461 ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2462 ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1
2463 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2464 ; AVX512DQNOBW-NEXT: kshiftlw $6, %k0, %k0
2465 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k0, %k0
2466 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2467 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2468 ; AVX512DQNOBW-NEXT: kshiftlw $10, %k1, %k1
2469 ; AVX512DQNOBW-NEXT: kshiftlw $11, %k0, %k2
2470 ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2471 ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1
2472 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2473 ; AVX512DQNOBW-NEXT: kshiftlw $5, %k0, %k0
2474 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k0, %k0
2475 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2476 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2477 ; AVX512DQNOBW-NEXT: kshiftlw $11, %k1, %k1
2478 ; AVX512DQNOBW-NEXT: kshiftlw $12, %k0, %k2
2479 ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2480 ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1
2481 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2482 ; AVX512DQNOBW-NEXT: kshiftlw $4, %k0, %k0
2483 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k0, %k0
2484 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2485 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2486 ; AVX512DQNOBW-NEXT: kshiftlw $12, %k1, %k1
2487 ; AVX512DQNOBW-NEXT: kshiftlw $13, %k0, %k2
2488 ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2489 ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1
2490 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2491 ; AVX512DQNOBW-NEXT: kshiftlw $3, %k0, %k0
2492 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k0, %k0
2493 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2494 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2495 ; AVX512DQNOBW-NEXT: kshiftlw $13, %k1, %k1
2496 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k0, %k2
2497 ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2498 ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1
2499 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2500 ; AVX512DQNOBW-NEXT: kshiftlw $2, %k0, %k0
2501 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k0, %k2
2502 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2503 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2504 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k1, %k0
2505 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k1
2506 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2507 ; AVX512DQNOBW-NEXT: korw %k0, %k1, %k0
2508 ; AVX512DQNOBW-NEXT: korw %k0, %k2, %k0
2509 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
2510 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
2511 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2512 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2513 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
2514 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2515 ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2516 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2517 ; AVX512DQNOBW-NEXT: kmovw %eax, %k0
2518 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0
2519 ; AVX512DQNOBW-NEXT: kshiftrw $15, %k0, %k0
2520 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2521 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2522 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k2, %k2
2523 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2524 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2
2525 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2526 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k0, %k0
2527 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0
2528 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2529 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2530 ; AVX512DQNOBW-NEXT: kshiftlw $2, %k2, %k2
2531 ; AVX512DQNOBW-NEXT: kmovw %k3, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2532 ; AVX512DQNOBW-NEXT: korw %k2, %k3, %k2
2533 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2534 ; AVX512DQNOBW-NEXT: kshiftlw $13, %k0, %k0
2535 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k0
2536 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2537 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2538 ; AVX512DQNOBW-NEXT: kshiftlw $3, %k2, %k2
2539 ; AVX512DQNOBW-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2540 ; AVX512DQNOBW-NEXT: korw %k2, %k4, %k2
2541 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2542 ; AVX512DQNOBW-NEXT: kshiftlw $12, %k0, %k0
2543 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k0, %k0
2544 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2545 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2546 ; AVX512DQNOBW-NEXT: kshiftlw $4, %k2, %k2
2547 ; AVX512DQNOBW-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2548 ; AVX512DQNOBW-NEXT: korw %k2, %k5, %k2
2549 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2550 ; AVX512DQNOBW-NEXT: kshiftlw $11, %k0, %k0
2551 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k0, %k0
2552 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2553 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2554 ; AVX512DQNOBW-NEXT: kshiftlw $5, %k2, %k2
2555 ; AVX512DQNOBW-NEXT: korw %k2, %k6, %k2
2556 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2557 ; AVX512DQNOBW-NEXT: kshiftlw $10, %k0, %k0
2558 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k0, %k0
2559 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2560 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2561 ; AVX512DQNOBW-NEXT: kshiftlw $6, %k2, %k2
2562 ; AVX512DQNOBW-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2563 ; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2
2564 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2565 ; AVX512DQNOBW-NEXT: kshiftlw $9, %k0, %k0
2566 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k0, %k0
2567 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2568 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2569 ; AVX512DQNOBW-NEXT: kshiftlw $7, %k2, %k2
2570 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2571 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2
2572 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2573 ; AVX512DQNOBW-NEXT: kshiftlw $8, %k0, %k0
2574 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k0, %k0
2575 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2576 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2577 ; AVX512DQNOBW-NEXT: kshiftlw $8, %k2, %k2
2578 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2579 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2
2580 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2581 ; AVX512DQNOBW-NEXT: kshiftlw $7, %k0, %k0
2582 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k0, %k0
2583 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2584 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2585 ; AVX512DQNOBW-NEXT: kshiftlw $9, %k2, %k2
2586 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2587 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2
2588 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2589 ; AVX512DQNOBW-NEXT: kshiftlw $6, %k0, %k0
2590 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k0, %k0
2591 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2592 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2593 ; AVX512DQNOBW-NEXT: kshiftlw $10, %k2, %k2
2594 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2595 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2
2596 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2597 ; AVX512DQNOBW-NEXT: kshiftlw $5, %k0, %k0
2598 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k0, %k0
2599 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2600 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2601 ; AVX512DQNOBW-NEXT: kshiftlw $11, %k2, %k2
2602 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2603 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2
2604 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2605 ; AVX512DQNOBW-NEXT: kshiftlw $4, %k0, %k0
2606 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k0, %k0
2607 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2608 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2609 ; AVX512DQNOBW-NEXT: kshiftlw $12, %k2, %k2
2610 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2611 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2
2612 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2613 ; AVX512DQNOBW-NEXT: kshiftlw $3, %k0, %k0
2614 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k0, %k0
2615 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2616 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2617 ; AVX512DQNOBW-NEXT: kshiftlw $13, %k2, %k2
2618 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2619 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2
2620 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2621 ; AVX512DQNOBW-NEXT: kshiftlw $2, %k0, %k0
2622 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k0, %k0
2623 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2624 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2625 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2
2626 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2627 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2
2628 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2629 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
2630 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
2631 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2632 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2633 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
2634 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2635 ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2636 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2637 ; AVX512DQNOBW-NEXT: kmovw %eax, %k0
2638 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0
2639 ; AVX512DQNOBW-NEXT: kshiftrw $15, %k0, %k0
2640 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2641 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2642 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k2, %k2
2643 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2644 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2
2645 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2646 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k0, %k0
2647 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0
2648 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2649 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2650 ; AVX512DQNOBW-NEXT: kshiftlw $2, %k2, %k2
2651 ; AVX512DQNOBW-NEXT: korw %k2, %k3, %k2
2652 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2653 ; AVX512DQNOBW-NEXT: kshiftlw $13, %k0, %k0
2654 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k0
2655 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2656 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2657 ; AVX512DQNOBW-NEXT: kshiftlw $3, %k2, %k2
2658 ; AVX512DQNOBW-NEXT: korw %k2, %k4, %k2
2659 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2660 ; AVX512DQNOBW-NEXT: kshiftlw $12, %k0, %k0
2661 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k0, %k0
2662 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2663 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2664 ; AVX512DQNOBW-NEXT: kshiftlw $4, %k2, %k2
2665 ; AVX512DQNOBW-NEXT: korw %k2, %k5, %k2
2666 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2667 ; AVX512DQNOBW-NEXT: kshiftlw $11, %k0, %k0
2668 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k0, %k0
2669 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2670 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2671 ; AVX512DQNOBW-NEXT: kshiftlw $5, %k2, %k2
2672 ; AVX512DQNOBW-NEXT: korw %k2, %k6, %k2
2673 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2674 ; AVX512DQNOBW-NEXT: kshiftlw $10, %k0, %k0
2675 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k0, %k0
2676 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2677 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2678 ; AVX512DQNOBW-NEXT: kshiftlw $6, %k2, %k2
2679 ; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2
2680 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2681 ; AVX512DQNOBW-NEXT: kshiftlw $9, %k0, %k0
2682 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k0, %k0
2683 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2684 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2685 ; AVX512DQNOBW-NEXT: kshiftlw $7, %k2, %k2
2686 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2687 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2
2688 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2689 ; AVX512DQNOBW-NEXT: kshiftlw $8, %k0, %k0
2690 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k0, %k0
2691 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2692 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2693 ; AVX512DQNOBW-NEXT: kshiftlw $8, %k2, %k2
2694 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2695 ; AVX512DQNOBW-NEXT: korw %k2, %k3, %k2
2696 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2697 ; AVX512DQNOBW-NEXT: kshiftlw $7, %k0, %k0
2698 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k0, %k0
2699 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2700 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2701 ; AVX512DQNOBW-NEXT: kshiftlw $9, %k2, %k2
2702 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
2703 ; AVX512DQNOBW-NEXT: korw %k2, %k4, %k2
2704 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2705 ; AVX512DQNOBW-NEXT: kshiftlw $6, %k0, %k0
2706 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k0, %k0
2707 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2708 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2709 ; AVX512DQNOBW-NEXT: kshiftlw $10, %k2, %k2
2710 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2711 ; AVX512DQNOBW-NEXT: korw %k2, %k5, %k2
2712 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2713 ; AVX512DQNOBW-NEXT: kshiftlw $5, %k0, %k0
2714 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k0, %k0
2715 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2716 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2717 ; AVX512DQNOBW-NEXT: kshiftlw $11, %k2, %k2
2718 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2719 ; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2
2720 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2721 ; AVX512DQNOBW-NEXT: kshiftlw $4, %k0, %k0
2722 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k0, %k0
2723 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2724 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2725 ; AVX512DQNOBW-NEXT: kshiftlw $12, %k2, %k2
2726 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2727 ; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2
2728 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2729 ; AVX512DQNOBW-NEXT: kshiftlw $3, %k0, %k0
2730 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k0, %k0
2731 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2732 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2733 ; AVX512DQNOBW-NEXT: kshiftlw $13, %k2, %k2
2734 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2735 ; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2
2736 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2737 ; AVX512DQNOBW-NEXT: kshiftlw $2, %k0, %k0
2738 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k0, %k0
2739 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2740 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2741 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2
2742 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2743 ; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2
2744 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2745 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
2746 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
2747 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2748 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2749 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
2750 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
2751 ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2752 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2753 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2754 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k2, %k2
2755 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2756 ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k2
2757 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2758 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2759 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2760 ; AVX512DQNOBW-NEXT: kshiftrw $15, %k7, %k7
2761 ; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2
2762 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2763 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2764 ; AVX512DQNOBW-NEXT: kshiftlw $2, %k7, %k7
2765 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2766 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k7
2767 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2
2768 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k2, %k2
2769 ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2
2770 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2771 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2772 ; AVX512DQNOBW-NEXT: kshiftlw $3, %k7, %k7
2773 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2774 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k7
2775 ; AVX512DQNOBW-NEXT: kshiftlw $13, %k2, %k2
2776 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k2, %k2
2777 ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2
2778 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2779 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2780 ; AVX512DQNOBW-NEXT: kshiftlw $4, %k7, %k7
2781 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2782 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k7
2783 ; AVX512DQNOBW-NEXT: kshiftlw $12, %k2, %k2
2784 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k2, %k2
2785 ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2
2786 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2787 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2788 ; AVX512DQNOBW-NEXT: kshiftlw $5, %k7, %k7
2789 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k7
2790 ; AVX512DQNOBW-NEXT: kshiftlw $11, %k2, %k2
2791 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k2, %k2
2792 ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2
2793 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2794 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2795 ; AVX512DQNOBW-NEXT: kshiftlw $6, %k7, %k7
2796 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2797 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k7
2798 ; AVX512DQNOBW-NEXT: kshiftlw $10, %k2, %k2
2799 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k2, %k2
2800 ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2
2801 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2802 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2803 ; AVX512DQNOBW-NEXT: kshiftlw $7, %k7, %k7
2804 ; AVX512DQNOBW-NEXT: korw %k7, %k1, %k7
2805 ; AVX512DQNOBW-NEXT: kshiftlw $9, %k2, %k2
2806 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k2, %k2
2807 ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2
2808 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2809 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2810 ; AVX512DQNOBW-NEXT: kshiftlw $8, %k7, %k7
2811 ; AVX512DQNOBW-NEXT: korw %k7, %k3, %k7
2812 ; AVX512DQNOBW-NEXT: kshiftlw $8, %k2, %k2
2813 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k2, %k2
2814 ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2
2815 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2816 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2817 ; AVX512DQNOBW-NEXT: kshiftlw $9, %k7, %k7
2818 ; AVX512DQNOBW-NEXT: korw %k7, %k4, %k7
2819 ; AVX512DQNOBW-NEXT: kshiftlw $7, %k2, %k2
2820 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k2, %k2
2821 ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2
2822 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2823 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2824 ; AVX512DQNOBW-NEXT: kshiftlw $10, %k7, %k7
2825 ; AVX512DQNOBW-NEXT: korw %k7, %k5, %k6
2826 ; AVX512DQNOBW-NEXT: kshiftlw $6, %k2, %k2
2827 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k2, %k2
2828 ; AVX512DQNOBW-NEXT: korw %k6, %k2, %k2
2829 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2830 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2831 ; AVX512DQNOBW-NEXT: kshiftlw $11, %k6, %k6
2832 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2833 ; AVX512DQNOBW-NEXT: korw %k6, %k1, %k5
2834 ; AVX512DQNOBW-NEXT: kshiftlw $5, %k2, %k2
2835 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k2, %k2
2836 ; AVX512DQNOBW-NEXT: korw %k5, %k2, %k2
2837 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2838 ; AVX512DQNOBW-NEXT: kmovw %eax, %k5
2839 ; AVX512DQNOBW-NEXT: kshiftlw $12, %k5, %k5
2840 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2841 ; AVX512DQNOBW-NEXT: korw %k5, %k1, %k4
2842 ; AVX512DQNOBW-NEXT: kshiftlw $4, %k2, %k2
2843 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k2, %k2
2844 ; AVX512DQNOBW-NEXT: korw %k4, %k2, %k2
2845 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2846 ; AVX512DQNOBW-NEXT: kmovw %eax, %k4
2847 ; AVX512DQNOBW-NEXT: kshiftlw $13, %k4, %k4
2848 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2849 ; AVX512DQNOBW-NEXT: korw %k4, %k1, %k3
2850 ; AVX512DQNOBW-NEXT: kshiftlw $3, %k2, %k2
2851 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k2, %k2
2852 ; AVX512DQNOBW-NEXT: korw %k3, %k2, %k2
2853 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2854 ; AVX512DQNOBW-NEXT: kmovw %eax, %k3
2855 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k3, %k3
2856 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2857 ; AVX512DQNOBW-NEXT: korw %k3, %k1, %k1
2858 ; AVX512DQNOBW-NEXT: kshiftlw $2, %k2, %k2
2859 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k2, %k2
2860 ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1
2861 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
2862 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
2863 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2864 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2865 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
2866 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1
2867 ; AVX512DQNOBW-NEXT: vpmovm2d %k1, %zmm4
2868 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2869 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm5
2870 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2871 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm6
2872 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2873 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm7
2874 ; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm4
2875 ; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm4, %ymm1
2876 ; AVX512DQNOBW-NEXT: vpmovdw %zmm5, %ymm4
2877 ; AVX512DQNOBW-NEXT: vpand %ymm2, %ymm4, %ymm2
2878 ; AVX512DQNOBW-NEXT: vpmovdw %zmm6, %ymm4
2879 ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm4, %ymm3
2880 ; AVX512DQNOBW-NEXT: vpmovdw %zmm7, %ymm4
2881 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm4, %ymm0
2882 ; AVX512DQNOBW-NEXT: retq
2883 %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
2887 define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone {
2888 ; ALL-LABEL: shuffle_zext_16x8_to_16x16:
2890 ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2892 %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
2893 %2 = bitcast <32 x i8> %1 to <16 x i16>
2897 define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone {
2898 ; KNL-LABEL: shuffle_zext_16x8_to_16x16_mask:
2900 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
2901 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2902 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
2903 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
2904 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
2907 ; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask:
2909 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
2910 ; SKX-NEXT: vpmovb2m %xmm1, %k1
2911 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2914 ; AVX512DQNOBW-LABEL: shuffle_zext_16x8_to_16x16_mask:
2915 ; AVX512DQNOBW: # %bb.0:
2916 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
2917 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2918 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
2919 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
2920 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
2921 ; AVX512DQNOBW-NEXT: retq
2922 %x = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
2923 %bc = bitcast <32 x i8> %x to <16 x i16>
2924 %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer
2928 define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) {
2929 ; ALL-LABEL: zext_32x8_to_16x16:
2931 ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2933 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32>
2934 %2 = bitcast <32 x i8> %1 to <16 x i16>
2938 define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) {
2939 ; ALL-LABEL: zext_32x8_to_8x32:
2941 ; ALL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2943 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
2944 %2 = bitcast <32 x i8> %1 to <8 x i32>
2948 define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) {
2949 ; ALL-LABEL: zext_32x8_to_4x64:
2951 ; ALL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
2953 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
2954 %2 = bitcast <32 x i8> %1 to <4 x i64>
2958 define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) {
2959 ; ALL-LABEL: zext_16x16_to_8x32:
2961 ; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2963 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16>
2964 %2 = bitcast <16 x i16> %1 to <8 x i32>
2968 define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) {
2969 ; ALL-LABEL: zext_16x16_to_4x64:
2971 ; ALL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2973 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
2974 %2 = bitcast <16 x i16> %1 to <4 x i64>
2978 define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
2979 ; ALL-LABEL: zext_8x32_to_4x64:
2981 ; ALL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2983 %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
2984 %2 = bitcast <8 x i32> %1 to <4 x i64>
2988 define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
2989 ; KNL-LABEL: zext_64xi1_to_64xi8:
2991 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
2992 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
2993 ; KNL-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2
2994 ; KNL-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2995 ; KNL-NEXT: vpand %ymm3, %ymm2, %ymm2
2996 ; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
2997 ; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0
2998 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
3001 ; SKX-LABEL: zext_64xi1_to_64xi8:
3003 ; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
3004 ; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
3007 ; AVX512DQNOBW-LABEL: zext_64xi1_to_64xi8:
3008 ; AVX512DQNOBW: # %bb.0:
3009 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2
3010 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3
3011 ; AVX512DQNOBW-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2
3012 ; AVX512DQNOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
3013 ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm2, %ymm2
3014 ; AVX512DQNOBW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
3015 ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm0, %ymm0
3016 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
3017 ; AVX512DQNOBW-NEXT: retq
3018 %mask = icmp eq <64 x i8> %x, %y
3019 %1 = zext <64 x i1> %mask to <64 x i8>
3023 define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
3024 ; KNL-LABEL: zext_32xi1_to_32xi16:
3026 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
3027 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
3028 ; KNL-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
3029 ; KNL-NEXT: vpsrlw $15, %ymm2, %ymm2
3030 ; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
3031 ; KNL-NEXT: vpsrlw $15, %ymm0, %ymm0
3032 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
3035 ; SKX-LABEL: zext_32xi1_to_32xi16:
3037 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
3038 ; SKX-NEXT: vpmovm2w %k0, %zmm0
3039 ; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0
3042 ; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi16:
3043 ; AVX512DQNOBW: # %bb.0:
3044 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2
3045 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3
3046 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
3047 ; AVX512DQNOBW-NEXT: vpsrlw $15, %ymm2, %ymm2
3048 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
3049 ; AVX512DQNOBW-NEXT: vpsrlw $15, %ymm0, %ymm0
3050 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
3051 ; AVX512DQNOBW-NEXT: retq
3052 %mask = icmp eq <32 x i16> %x, %y
3053 %1 = zext <32 x i1> %mask to <32 x i16>
3057 define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
3058 ; ALL-LABEL: zext_16xi1_to_16xi16:
3060 ; ALL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
3061 ; ALL-NEXT: vpsrlw $15, %ymm0, %ymm0
3063 %mask = icmp eq <16 x i16> %x, %y
3064 %1 = zext <16 x i1> %mask to <16 x i16>
3069 define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
3070 ; KNL-LABEL: zext_32xi1_to_32xi8:
3072 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
3073 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
3074 ; KNL-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
3075 ; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
3076 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3077 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
3078 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
3079 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
3080 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3081 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
3084 ; SKX-LABEL: zext_32xi1_to_32xi8:
3086 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
3087 ; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
3090 ; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi8:
3091 ; AVX512DQNOBW: # %bb.0:
3092 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2
3093 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3
3094 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
3095 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
3096 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3097 ; AVX512DQNOBW-NEXT: vpmovdb %zmm0, %xmm0
3098 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
3099 ; AVX512DQNOBW-NEXT: vpmovdb %zmm1, %xmm1
3100 ; AVX512DQNOBW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3101 ; AVX512DQNOBW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
3102 ; AVX512DQNOBW-NEXT: retq
3103 %mask = icmp eq <32 x i16> %x, %y
3104 %1 = zext <32 x i1> %mask to <32 x i8>
3108 define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
3109 ; KNL-LABEL: zext_4xi1_to_4x32:
3111 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3112 ; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3113 ; KNL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
3114 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
3117 ; SKX-LABEL: zext_4xi1_to_4x32:
3119 ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
3120 ; SKX-NEXT: vpmovm2d %k0, %xmm0
3121 ; SKX-NEXT: vpsrld $31, %xmm0, %xmm0
3124 ; AVX512DQNOBW-LABEL: zext_4xi1_to_4x32:
3125 ; AVX512DQNOBW: # %bb.0:
3126 ; AVX512DQNOBW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3127 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3128 ; AVX512DQNOBW-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
3129 ; AVX512DQNOBW-NEXT: retq
3130 %mask = icmp eq <4 x i8> %x, %y
3131 %1 = zext <4 x i1> %mask to <4 x i32>
3135 define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
3136 ; KNL-LABEL: zext_2xi1_to_2xi64:
3138 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3139 ; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
3140 ; KNL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
3143 ; SKX-LABEL: zext_2xi1_to_2xi64:
3145 ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
3146 ; SKX-NEXT: vpmovm2q %k0, %xmm0
3147 ; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0
3150 ; AVX512DQNOBW-LABEL: zext_2xi1_to_2xi64:
3151 ; AVX512DQNOBW: # %bb.0:
3152 ; AVX512DQNOBW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3153 ; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
3154 ; AVX512DQNOBW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
3155 ; AVX512DQNOBW-NEXT: retq
3156 %mask = icmp eq <2 x i8> %x, %y
3157 %1 = zext <2 x i1> %mask to <2 x i64>