1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=SKX
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=AVX512DQNOBW
6 define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
7 ; KNL-LABEL: zext_8x8mem_to_8x16:
9 ; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
10 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
11 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
12 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
15 ; SKX-LABEL: zext_8x8mem_to_8x16:
17 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
18 ; SKX-NEXT: vpmovw2m %xmm0, %k1
19 ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
22 ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x16:
23 ; AVX512DQNOBW: # %bb.0:
24 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
25 ; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
26 ; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
27 ; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
28 ; AVX512DQNOBW-NEXT: retq
29 %a = load <8 x i8>,<8 x i8> *%i,align 1
30 %x = zext <8 x i8> %a to <8 x i16>
31 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
35 define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
36 ; KNL-LABEL: sext_8x8mem_to_8x16:
38 ; KNL-NEXT: vpmovsxbw (%rdi), %xmm1
39 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
40 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
41 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
44 ; SKX-LABEL: sext_8x8mem_to_8x16:
46 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
47 ; SKX-NEXT: vpmovw2m %xmm0, %k1
48 ; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z}
51 ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x16:
52 ; AVX512DQNOBW: # %bb.0:
53 ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %xmm1
54 ; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
55 ; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
56 ; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
57 ; AVX512DQNOBW-NEXT: retq
58 %a = load <8 x i8>,<8 x i8> *%i,align 1
59 %x = sext <8 x i8> %a to <8 x i16>
60 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
65 define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
66 ; KNL-LABEL: zext_16x8mem_to_16x16:
68 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
69 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
70 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
71 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
72 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
75 ; SKX-LABEL: zext_16x8mem_to_16x16:
77 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
78 ; SKX-NEXT: vpmovb2m %xmm0, %k1
79 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
82 ; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x16:
83 ; AVX512DQNOBW: # %bb.0:
84 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
85 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
86 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
87 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
88 ; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm0, %ymm0
89 ; AVX512DQNOBW-NEXT: retq
90 %a = load <16 x i8>,<16 x i8> *%i,align 1
91 %x = zext <16 x i8> %a to <16 x i16>
92 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
96 define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
97 ; KNL-LABEL: sext_16x8mem_to_16x16:
99 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
100 ; KNL-NEXT: vpmovsxbw (%rdi), %ymm1
101 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
102 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
103 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
106 ; SKX-LABEL: sext_16x8mem_to_16x16:
108 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
109 ; SKX-NEXT: vpmovb2m %xmm0, %k1
110 ; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z}
113 ; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x16:
114 ; AVX512DQNOBW: # %bb.0:
115 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
116 ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm1
117 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
118 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
119 ; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm0, %ymm0
120 ; AVX512DQNOBW-NEXT: retq
121 %a = load <16 x i8>,<16 x i8> *%i,align 1
122 %x = sext <16 x i8> %a to <16 x i16>
123 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
127 define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
128 ; ALL-LABEL: zext_16x8_to_16x16:
130 ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
132 %x = zext <16 x i8> %a to <16 x i16>
136 define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
137 ; KNL-LABEL: zext_16x8_to_16x16_mask:
139 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
140 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
141 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
142 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
143 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
146 ; SKX-LABEL: zext_16x8_to_16x16_mask:
148 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
149 ; SKX-NEXT: vpmovb2m %xmm1, %k1
150 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
153 ; AVX512DQNOBW-LABEL: zext_16x8_to_16x16_mask:
154 ; AVX512DQNOBW: # %bb.0:
155 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
156 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
157 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
158 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
159 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
160 ; AVX512DQNOBW-NEXT: retq
161 %x = zext <16 x i8> %a to <16 x i16>
162 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
166 define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
167 ; ALL-LABEL: sext_16x8_to_16x16:
169 ; ALL-NEXT: vpmovsxbw %xmm0, %ymm0
171 %x = sext <16 x i8> %a to <16 x i16>
175 define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
176 ; KNL-LABEL: sext_16x8_to_16x16_mask:
178 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
179 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
180 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
181 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
182 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
185 ; SKX-LABEL: sext_16x8_to_16x16_mask:
187 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
188 ; SKX-NEXT: vpmovb2m %xmm1, %k1
189 ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z}
192 ; AVX512DQNOBW-LABEL: sext_16x8_to_16x16_mask:
193 ; AVX512DQNOBW: # %bb.0:
194 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
195 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0
196 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
197 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
198 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
199 ; AVX512DQNOBW-NEXT: retq
200 %x = sext <16 x i8> %a to <16 x i16>
201 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
205 define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
206 ; KNL-LABEL: zext_32x8mem_to_32x16:
208 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
209 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
210 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
211 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
212 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
213 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
214 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
215 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
216 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
217 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
218 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
219 ; KNL-NEXT: vpandq %zmm2, %zmm0, %zmm0
222 ; SKX-LABEL: zext_32x8mem_to_32x16:
224 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
225 ; SKX-NEXT: vpmovb2m %ymm0, %k1
226 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero
229 ; AVX512DQNOBW-LABEL: zext_32x8mem_to_32x16:
230 ; AVX512DQNOBW: # %bb.0:
231 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
232 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
233 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
234 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
235 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
236 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
237 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
238 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
239 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
240 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
241 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
242 ; AVX512DQNOBW-NEXT: vpandq %zmm2, %zmm0, %zmm0
243 ; AVX512DQNOBW-NEXT: retq
244 %a = load <32 x i8>,<32 x i8> *%i,align 1
245 %x = zext <32 x i8> %a to <32 x i16>
246 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
250 define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
251 ; KNL-LABEL: sext_32x8mem_to_32x16:
253 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
254 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
255 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
256 ; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm2
257 ; KNL-NEXT: vpmovsxbw (%rdi), %ymm3
258 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
259 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
260 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
261 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
262 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
263 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
264 ; KNL-NEXT: vpandq %zmm2, %zmm0, %zmm0
267 ; SKX-LABEL: sext_32x8mem_to_32x16:
269 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
270 ; SKX-NEXT: vpmovb2m %ymm0, %k1
271 ; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z}
274 ; AVX512DQNOBW-LABEL: sext_32x8mem_to_32x16:
275 ; AVX512DQNOBW: # %bb.0:
276 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
277 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
278 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
279 ; AVX512DQNOBW-NEXT: vpmovsxbw 16(%rdi), %ymm2
280 ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm3
281 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
282 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
283 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
284 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
285 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
286 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
287 ; AVX512DQNOBW-NEXT: vpandq %zmm2, %zmm0, %zmm0
288 ; AVX512DQNOBW-NEXT: retq
289 %a = load <32 x i8>,<32 x i8> *%i,align 1
290 %x = sext <32 x i8> %a to <32 x i16>
291 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
295 define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
296 ; KNL-LABEL: zext_32x8_to_32x16:
298 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
299 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
300 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
301 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
304 ; SKX-LABEL: zext_32x8_to_32x16:
306 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
309 ; AVX512DQNOBW-LABEL: zext_32x8_to_32x16:
310 ; AVX512DQNOBW: # %bb.0:
311 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
312 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
313 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
314 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
315 ; AVX512DQNOBW-NEXT: retq
316 %x = zext <32 x i8> %a to <32 x i16>
320 define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
321 ; KNL-LABEL: zext_32x8_to_32x16_mask:
323 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2
324 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
325 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
326 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
327 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
328 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
329 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0
330 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
331 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
332 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
333 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
334 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
335 ; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0
338 ; SKX-LABEL: zext_32x8_to_32x16_mask:
340 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
341 ; SKX-NEXT: vpmovb2m %ymm1, %k1
342 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
345 ; AVX512DQNOBW-LABEL: zext_32x8_to_32x16_mask:
346 ; AVX512DQNOBW: # %bb.0:
347 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
348 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
349 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
350 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
351 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
352 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
353 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0
354 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
355 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
356 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm2, %ymm2
357 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm2, %ymm2
358 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
359 ; AVX512DQNOBW-NEXT: vpandq %zmm0, %zmm1, %zmm0
360 ; AVX512DQNOBW-NEXT: retq
361 %x = zext <32 x i8> %a to <32 x i16>
362 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
366 define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
367 ; KNL-LABEL: sext_32x8_to_32x16:
369 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm1
370 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
371 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
372 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
375 ; SKX-LABEL: sext_32x8_to_32x16:
377 ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0
380 ; AVX512DQNOBW-LABEL: sext_32x8_to_32x16:
381 ; AVX512DQNOBW: # %bb.0:
382 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm1
383 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
384 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0
385 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
386 ; AVX512DQNOBW-NEXT: retq
387 %x = sext <32 x i8> %a to <32 x i16>
391 define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
392 ; KNL-LABEL: sext_32x8_to_32x16_mask:
394 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2
395 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
396 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
397 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm3
398 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
399 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
400 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0
401 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
402 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
403 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
404 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
405 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
406 ; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0
409 ; SKX-LABEL: sext_32x8_to_32x16_mask:
411 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
412 ; SKX-NEXT: vpmovb2m %ymm1, %k1
413 ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z}
416 ; AVX512DQNOBW-LABEL: sext_32x8_to_32x16_mask:
417 ; AVX512DQNOBW: # %bb.0:
418 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
419 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
420 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
421 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm3
422 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
423 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0
424 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0
425 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
426 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
427 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm2, %ymm2
428 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm2, %ymm2
429 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
430 ; AVX512DQNOBW-NEXT: vpandq %zmm0, %zmm1, %zmm0
431 ; AVX512DQNOBW-NEXT: retq
432 %x = sext <32 x i8> %a to <32 x i16>
433 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
437 define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
438 ; KNL-LABEL: zext_4x8mem_to_4x32:
440 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
441 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
442 ; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
443 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
444 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
445 ; KNL-NEXT: vzeroupper
448 ; AVX512DQ-LABEL: zext_4x8mem_to_4x32:
450 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
451 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
452 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
453 ; AVX512DQ-NEXT: retq
454 %a = load <4 x i8>,<4 x i8> *%i,align 1
455 %x = zext <4 x i8> %a to <4 x i32>
456 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
460 define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
461 ; KNL-LABEL: sext_4x8mem_to_4x32:
463 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
464 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
465 ; KNL-NEXT: vpmovsxbd (%rdi), %xmm0
466 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
467 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
468 ; KNL-NEXT: vzeroupper
471 ; AVX512DQ-LABEL: sext_4x8mem_to_4x32:
473 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
474 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
475 ; AVX512DQ-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z}
476 ; AVX512DQ-NEXT: retq
477 %a = load <4 x i8>,<4 x i8> *%i,align 1
478 %x = sext <4 x i8> %a to <4 x i32>
479 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
483 define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
484 ; KNL-LABEL: zext_8x8mem_to_8x32:
486 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
487 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
488 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
489 ; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
490 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
491 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
494 ; SKX-LABEL: zext_8x8mem_to_8x32:
496 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
497 ; SKX-NEXT: vpmovw2m %xmm0, %k1
498 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
501 ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x32:
502 ; AVX512DQNOBW: # %bb.0:
503 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
504 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
505 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
506 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
507 ; AVX512DQNOBW-NEXT: retq
508 %a = load <8 x i8>,<8 x i8> *%i,align 1
509 %x = zext <8 x i8> %a to <8 x i32>
510 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
514 define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
515 ; KNL-LABEL: sext_8x8mem_to_8x32:
517 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
518 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
519 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
520 ; KNL-NEXT: vpmovsxbd (%rdi), %ymm0
521 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
522 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
525 ; SKX-LABEL: sext_8x8mem_to_8x32:
527 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
528 ; SKX-NEXT: vpmovw2m %xmm0, %k1
529 ; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
532 ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x32:
533 ; AVX512DQNOBW: # %bb.0:
534 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
535 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
536 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
537 ; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
538 ; AVX512DQNOBW-NEXT: retq
539 %a = load <8 x i8>,<8 x i8> *%i,align 1
540 %x = sext <8 x i8> %a to <8 x i32>
541 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
545 define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
546 ; KNL-LABEL: zext_16x8mem_to_16x32:
548 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
549 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
550 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
551 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
554 ; SKX-LABEL: zext_16x8mem_to_16x32:
556 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
557 ; SKX-NEXT: vpmovb2m %xmm0, %k1
558 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
561 ; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x32:
562 ; AVX512DQNOBW: # %bb.0:
563 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
564 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
565 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
566 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
567 ; AVX512DQNOBW-NEXT: retq
568 %a = load <16 x i8>,<16 x i8> *%i,align 1
569 %x = zext <16 x i8> %a to <16 x i32>
570 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
574 define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
575 ; KNL-LABEL: sext_16x8mem_to_16x32:
577 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
578 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
579 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
580 ; KNL-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
583 ; SKX-LABEL: sext_16x8mem_to_16x32:
585 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
586 ; SKX-NEXT: vpmovb2m %xmm0, %k1
587 ; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
590 ; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x32:
591 ; AVX512DQNOBW: # %bb.0:
592 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
593 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
594 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
595 ; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
596 ; AVX512DQNOBW-NEXT: retq
597 %a = load <16 x i8>,<16 x i8> *%i,align 1
598 %x = sext <16 x i8> %a to <16 x i32>
599 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
603 define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
604 ; KNL-LABEL: zext_16x8_to_16x32_mask:
606 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
607 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
608 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
609 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
612 ; SKX-LABEL: zext_16x8_to_16x32_mask:
614 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
615 ; SKX-NEXT: vpmovb2m %xmm1, %k1
616 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
619 ; AVX512DQNOBW-LABEL: zext_16x8_to_16x32_mask:
620 ; AVX512DQNOBW: # %bb.0:
621 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
622 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
623 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
624 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
625 ; AVX512DQNOBW-NEXT: retq
626 %x = zext <16 x i8> %a to <16 x i32>
627 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
631 define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
632 ; KNL-LABEL: sext_16x8_to_16x32_mask:
634 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
635 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
636 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
637 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
640 ; SKX-LABEL: sext_16x8_to_16x32_mask:
642 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
643 ; SKX-NEXT: vpmovb2m %xmm1, %k1
644 ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
647 ; AVX512DQNOBW-LABEL: sext_16x8_to_16x32_mask:
648 ; AVX512DQNOBW: # %bb.0:
649 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
650 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
651 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
652 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
653 ; AVX512DQNOBW-NEXT: retq
654 %x = sext <16 x i8> %a to <16 x i32>
655 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
659 define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
660 ; ALL-LABEL: zext_16x8_to_16x32:
662 ; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
664 %x = zext <16 x i8> %i to <16 x i32>
668 define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
669 ; ALL-LABEL: sext_16x8_to_16x32:
671 ; ALL-NEXT: vpmovsxbd %xmm0, %zmm0
673 %x = sext <16 x i8> %i to <16 x i32>
677 define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
678 ; KNL-LABEL: zext_2x8mem_to_2x64:
680 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
681 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
682 ; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
683 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
684 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
685 ; KNL-NEXT: vzeroupper
688 ; AVX512DQ-LABEL: zext_2x8mem_to_2x64:
690 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
691 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
692 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
693 ; AVX512DQ-NEXT: retq
694 %a = load <2 x i8>,<2 x i8> *%i,align 1
695 %x = zext <2 x i8> %a to <2 x i64>
696 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
699 define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
700 ; KNL-LABEL: sext_2x8mem_to_2x64mask:
702 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
703 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
704 ; KNL-NEXT: vpmovsxbq (%rdi), %xmm0
705 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
706 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
707 ; KNL-NEXT: vzeroupper
710 ; AVX512DQ-LABEL: sext_2x8mem_to_2x64mask:
712 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
713 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
714 ; AVX512DQ-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z}
715 ; AVX512DQ-NEXT: retq
716 %a = load <2 x i8>,<2 x i8> *%i,align 1
717 %x = sext <2 x i8> %a to <2 x i64>
718 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
721 define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
722 ; ALL-LABEL: sext_2x8mem_to_2x64:
724 ; ALL-NEXT: vpmovsxbq (%rdi), %xmm0
726 %a = load <2 x i8>,<2 x i8> *%i,align 1
727 %x = sext <2 x i8> %a to <2 x i64>
731 define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
732 ; KNL-LABEL: zext_4x8mem_to_4x64:
734 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
735 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
736 ; KNL-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
737 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
738 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
741 ; AVX512DQ-LABEL: zext_4x8mem_to_4x64:
743 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
744 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
745 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
746 ; AVX512DQ-NEXT: retq
747 %a = load <4 x i8>,<4 x i8> *%i,align 1
748 %x = zext <4 x i8> %a to <4 x i64>
749 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
753 define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
754 ; KNL-LABEL: sext_4x8mem_to_4x64mask:
756 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
757 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
758 ; KNL-NEXT: vpmovsxbq (%rdi), %ymm0
759 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
760 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
763 ; AVX512DQ-LABEL: sext_4x8mem_to_4x64mask:
765 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
766 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
767 ; AVX512DQ-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z}
768 ; AVX512DQ-NEXT: retq
769 %a = load <4 x i8>,<4 x i8> *%i,align 1
770 %x = sext <4 x i8> %a to <4 x i64>
771 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
775 define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
776 ; ALL-LABEL: sext_4x8mem_to_4x64:
778 ; ALL-NEXT: vpmovsxbq (%rdi), %ymm0
780 %a = load <4 x i8>,<4 x i8> *%i,align 1
781 %x = sext <4 x i8> %a to <4 x i64>
785 define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
786 ; KNL-LABEL: zext_8x8mem_to_8x64:
788 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
789 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
790 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
791 ; KNL-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
794 ; SKX-LABEL: zext_8x8mem_to_8x64:
796 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
797 ; SKX-NEXT: vpmovw2m %xmm0, %k1
798 ; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
801 ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x64:
802 ; AVX512DQNOBW: # %bb.0:
803 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
804 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
805 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
806 ; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
807 ; AVX512DQNOBW-NEXT: retq
808 %a = load <8 x i8>,<8 x i8> *%i,align 1
809 %x = zext <8 x i8> %a to <8 x i64>
810 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
814 define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
815 ; KNL-LABEL: sext_8x8mem_to_8x64mask:
817 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
818 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
819 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
820 ; KNL-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
823 ; SKX-LABEL: sext_8x8mem_to_8x64mask:
825 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
826 ; SKX-NEXT: vpmovw2m %xmm0, %k1
827 ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
830 ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x64mask:
831 ; AVX512DQNOBW: # %bb.0:
832 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
833 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
834 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
835 ; AVX512DQNOBW-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
836 ; AVX512DQNOBW-NEXT: retq
837 %a = load <8 x i8>,<8 x i8> *%i,align 1
838 %x = sext <8 x i8> %a to <8 x i64>
839 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
843 define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
844 ; ALL-LABEL: sext_8x8mem_to_8x64:
846 ; ALL-NEXT: vpmovsxbq (%rdi), %zmm0
848 %a = load <8 x i8>,<8 x i8> *%i,align 1
849 %x = sext <8 x i8> %a to <8 x i64>
853 define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
854 ; KNL-LABEL: zext_4x16mem_to_4x32:
856 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
857 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
858 ; KNL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
859 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
860 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
861 ; KNL-NEXT: vzeroupper
864 ; AVX512DQ-LABEL: zext_4x16mem_to_4x32:
866 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
867 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
868 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
869 ; AVX512DQ-NEXT: retq
870 %a = load <4 x i16>,<4 x i16> *%i,align 1
871 %x = zext <4 x i16> %a to <4 x i32>
872 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
876 define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
877 ; KNL-LABEL: sext_4x16mem_to_4x32mask:
879 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
880 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
881 ; KNL-NEXT: vpmovsxwd (%rdi), %xmm0
882 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
883 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
884 ; KNL-NEXT: vzeroupper
887 ; AVX512DQ-LABEL: sext_4x16mem_to_4x32mask:
889 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
890 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
891 ; AVX512DQ-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z}
892 ; AVX512DQ-NEXT: retq
893 %a = load <4 x i16>,<4 x i16> *%i,align 1
894 %x = sext <4 x i16> %a to <4 x i32>
895 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
899 define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
900 ; ALL-LABEL: sext_4x16mem_to_4x32:
902 ; ALL-NEXT: vpmovsxwd (%rdi), %xmm0
904 %a = load <4 x i16>,<4 x i16> *%i,align 1
905 %x = sext <4 x i16> %a to <4 x i32>
910 define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
911 ; KNL-LABEL: zext_8x16mem_to_8x32:
913 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
914 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
915 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
916 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
917 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
918 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
921 ; SKX-LABEL: zext_8x16mem_to_8x32:
923 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
924 ; SKX-NEXT: vpmovw2m %xmm0, %k1
925 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
928 ; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x32:
929 ; AVX512DQNOBW: # %bb.0:
930 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
931 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
932 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
933 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
934 ; AVX512DQNOBW-NEXT: retq
935 %a = load <8 x i16>,<8 x i16> *%i,align 1
936 %x = zext <8 x i16> %a to <8 x i32>
937 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
941 define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
942 ; KNL-LABEL: sext_8x16mem_to_8x32mask:
944 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
945 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
946 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
947 ; KNL-NEXT: vpmovsxwd (%rdi), %ymm0
948 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
949 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
952 ; SKX-LABEL: sext_8x16mem_to_8x32mask:
954 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
955 ; SKX-NEXT: vpmovw2m %xmm0, %k1
956 ; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
959 ; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x32mask:
960 ; AVX512DQNOBW: # %bb.0:
961 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
962 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
963 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
964 ; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
965 ; AVX512DQNOBW-NEXT: retq
966 %a = load <8 x i16>,<8 x i16> *%i,align 1
967 %x = sext <8 x i16> %a to <8 x i32>
968 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
972 define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
973 ; ALL-LABEL: sext_8x16mem_to_8x32:
975 ; ALL-NEXT: vpmovsxwd (%rdi), %ymm0
977 %a = load <8 x i16>,<8 x i16> *%i,align 1
978 %x = sext <8 x i16> %a to <8 x i32>
982 define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
983 ; KNL-LABEL: zext_8x16_to_8x32mask:
985 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
986 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
987 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
988 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
989 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
990 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
993 ; SKX-LABEL: zext_8x16_to_8x32mask:
995 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
996 ; SKX-NEXT: vpmovw2m %xmm1, %k1
997 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1000 ; AVX512DQNOBW-LABEL: zext_8x16_to_8x32mask:
1001 ; AVX512DQNOBW: # %bb.0:
1002 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
1003 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
1004 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
1005 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1006 ; AVX512DQNOBW-NEXT: retq
1007 %x = zext <8 x i16> %a to <8 x i32>
1008 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
1012 define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
1013 ; ALL-LABEL: zext_8x16_to_8x32:
1015 ; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1017 %x = zext <8 x i16> %a to <8 x i32>
1021 define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
1022 ; KNL-LABEL: zext_16x16mem_to_16x32:
1024 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1025 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1026 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1027 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1030 ; SKX-LABEL: zext_16x16mem_to_16x32:
1032 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
1033 ; SKX-NEXT: vpmovb2m %xmm0, %k1
1034 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1037 ; AVX512DQNOBW-LABEL: zext_16x16mem_to_16x32:
1038 ; AVX512DQNOBW: # %bb.0:
1039 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
1040 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
1041 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
1042 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1043 ; AVX512DQNOBW-NEXT: retq
1044 %a = load <16 x i16>,<16 x i16> *%i,align 1
1045 %x = zext <16 x i16> %a to <16 x i32>
1046 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1050 define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
1051 ; KNL-LABEL: sext_16x16mem_to_16x32mask:
1053 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1054 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1055 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1056 ; KNL-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1059 ; SKX-LABEL: sext_16x16mem_to_16x32mask:
1061 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
1062 ; SKX-NEXT: vpmovb2m %xmm0, %k1
1063 ; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1066 ; AVX512DQNOBW-LABEL: sext_16x16mem_to_16x32mask:
1067 ; AVX512DQNOBW: # %bb.0:
1068 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
1069 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
1070 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
1071 ; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1072 ; AVX512DQNOBW-NEXT: retq
1073 %a = load <16 x i16>,<16 x i16> *%i,align 1
1074 %x = sext <16 x i16> %a to <16 x i32>
1075 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1079 define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
1080 ; ALL-LABEL: sext_16x16mem_to_16x32:
1082 ; ALL-NEXT: vpmovsxwd (%rdi), %zmm0
1084 %a = load <16 x i16>,<16 x i16> *%i,align 1
1085 %x = sext <16 x i16> %a to <16 x i32>
1088 define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
1089 ; KNL-LABEL: zext_16x16_to_16x32mask:
1091 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
1092 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
1093 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
1094 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1097 ; SKX-LABEL: zext_16x16_to_16x32mask:
1099 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
1100 ; SKX-NEXT: vpmovb2m %xmm1, %k1
1101 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1104 ; AVX512DQNOBW-LABEL: zext_16x16_to_16x32mask:
1105 ; AVX512DQNOBW: # %bb.0:
1106 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
1107 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
1108 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
1109 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1110 ; AVX512DQNOBW-NEXT: retq
1111 %x = zext <16 x i16> %a to <16 x i32>
1112 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1116 define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
1117 ; ALL-LABEL: zext_16x16_to_16x32:
1119 ; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1121 %x = zext <16 x i16> %a to <16 x i32>
1125 define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
1126 ; KNL-LABEL: zext_2x16mem_to_2x64:
1128 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1129 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1130 ; KNL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1131 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1132 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1133 ; KNL-NEXT: vzeroupper
1136 ; AVX512DQ-LABEL: zext_2x16mem_to_2x64:
1137 ; AVX512DQ: # %bb.0:
1138 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1139 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1140 ; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1141 ; AVX512DQ-NEXT: retq
1142 %a = load <2 x i16>,<2 x i16> *%i,align 1
1143 %x = zext <2 x i16> %a to <2 x i64>
1144 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1148 define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
1149 ; KNL-LABEL: sext_2x16mem_to_2x64mask:
1151 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1152 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1153 ; KNL-NEXT: vpmovsxwq (%rdi), %xmm0
1154 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1155 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1156 ; KNL-NEXT: vzeroupper
1159 ; AVX512DQ-LABEL: sext_2x16mem_to_2x64mask:
1160 ; AVX512DQ: # %bb.0:
1161 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1162 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1163 ; AVX512DQ-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z}
1164 ; AVX512DQ-NEXT: retq
1165 %a = load <2 x i16>,<2 x i16> *%i,align 1
1166 %x = sext <2 x i16> %a to <2 x i64>
1167 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1171 define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
1172 ; ALL-LABEL: sext_2x16mem_to_2x64:
1174 ; ALL-NEXT: vpmovsxwq (%rdi), %xmm0
1176 %a = load <2 x i16>,<2 x i16> *%i,align 1
1177 %x = sext <2 x i16> %a to <2 x i64>
1181 define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
1182 ; KNL-LABEL: zext_4x16mem_to_4x64:
1184 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1185 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1186 ; KNL-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1187 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1188 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1191 ; AVX512DQ-LABEL: zext_4x16mem_to_4x64:
1192 ; AVX512DQ: # %bb.0:
1193 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1194 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1195 ; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1196 ; AVX512DQ-NEXT: retq
1197 %a = load <4 x i16>,<4 x i16> *%i,align 1
1198 %x = zext <4 x i16> %a to <4 x i64>
1199 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1203 define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
1204 ; KNL-LABEL: sext_4x16mem_to_4x64mask:
1206 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1207 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1208 ; KNL-NEXT: vpmovsxwq (%rdi), %ymm0
1209 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1210 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1213 ; AVX512DQ-LABEL: sext_4x16mem_to_4x64mask:
1214 ; AVX512DQ: # %bb.0:
1215 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1216 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1217 ; AVX512DQ-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z}
1218 ; AVX512DQ-NEXT: retq
1219 %a = load <4 x i16>,<4 x i16> *%i,align 1
1220 %x = sext <4 x i16> %a to <4 x i64>
1221 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1225 define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
1226 ; ALL-LABEL: sext_4x16mem_to_4x64:
1228 ; ALL-NEXT: vpmovsxwq (%rdi), %ymm0
1230 %a = load <4 x i16>,<4 x i16> *%i,align 1
1231 %x = sext <4 x i16> %a to <4 x i64>
1235 define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
1236 ; KNL-LABEL: zext_8x16mem_to_8x64:
1238 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1239 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1240 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1241 ; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1244 ; SKX-LABEL: zext_8x16mem_to_8x64:
1246 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1247 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1248 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1251 ; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x64:
1252 ; AVX512DQNOBW: # %bb.0:
1253 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1254 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1255 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1256 ; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1257 ; AVX512DQNOBW-NEXT: retq
1258 %a = load <8 x i16>,<8 x i16> *%i,align 1
1259 %x = zext <8 x i16> %a to <8 x i64>
1260 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1264 define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
1265 ; KNL-LABEL: sext_8x16mem_to_8x64mask:
1267 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1268 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1269 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1270 ; KNL-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1273 ; SKX-LABEL: sext_8x16mem_to_8x64mask:
1275 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1276 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1277 ; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1280 ; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x64mask:
1281 ; AVX512DQNOBW: # %bb.0:
1282 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1283 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1284 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1285 ; AVX512DQNOBW-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1286 ; AVX512DQNOBW-NEXT: retq
1287 %a = load <8 x i16>,<8 x i16> *%i,align 1
1288 %x = sext <8 x i16> %a to <8 x i64>
1289 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1293 define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
1294 ; ALL-LABEL: sext_8x16mem_to_8x64:
1296 ; ALL-NEXT: vpmovsxwq (%rdi), %zmm0
1298 %a = load <8 x i16>,<8 x i16> *%i,align 1
1299 %x = sext <8 x i16> %a to <8 x i64>
1303 define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
1304 ; KNL-LABEL: zext_8x16_to_8x64mask:
1306 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
1307 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
1308 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
1309 ; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1312 ; SKX-LABEL: zext_8x16_to_8x64mask:
1314 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
1315 ; SKX-NEXT: vpmovw2m %xmm1, %k1
1316 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1319 ; AVX512DQNOBW-LABEL: zext_8x16_to_8x64mask:
1320 ; AVX512DQNOBW: # %bb.0:
1321 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
1322 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
1323 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
1324 ; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1325 ; AVX512DQNOBW-NEXT: retq
1326 %x = zext <8 x i16> %a to <8 x i64>
1327 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1331 define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
1332 ; ALL-LABEL: zext_8x16_to_8x64:
1334 ; ALL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1336 %ret = zext <8 x i16> %a to <8 x i64>
1340 define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
1341 ; KNL-LABEL: zext_2x32mem_to_2x64:
1343 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1344 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1345 ; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1346 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1347 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1348 ; KNL-NEXT: vzeroupper
1351 ; AVX512DQ-LABEL: zext_2x32mem_to_2x64:
1352 ; AVX512DQ: # %bb.0:
1353 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1354 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1355 ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero
1356 ; AVX512DQ-NEXT: retq
1357 %a = load <2 x i32>,<2 x i32> *%i,align 1
1358 %x = zext <2 x i32> %a to <2 x i64>
1359 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1363 define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
1364 ; KNL-LABEL: sext_2x32mem_to_2x64mask:
1366 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1367 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1368 ; KNL-NEXT: vpmovsxdq (%rdi), %xmm0
1369 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1370 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1371 ; KNL-NEXT: vzeroupper
1374 ; AVX512DQ-LABEL: sext_2x32mem_to_2x64mask:
1375 ; AVX512DQ: # %bb.0:
1376 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1377 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1378 ; AVX512DQ-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z}
1379 ; AVX512DQ-NEXT: retq
1380 %a = load <2 x i32>,<2 x i32> *%i,align 1
1381 %x = sext <2 x i32> %a to <2 x i64>
1382 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1386 define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
1387 ; ALL-LABEL: sext_2x32mem_to_2x64:
1389 ; ALL-NEXT: vpmovsxdq (%rdi), %xmm0
1391 %a = load <2 x i32>,<2 x i32> *%i,align 1
1392 %x = sext <2 x i32> %a to <2 x i64>
1396 define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
1397 ; KNL-LABEL: zext_4x32mem_to_4x64:
1399 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1400 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1401 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1402 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1403 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1406 ; AVX512DQ-LABEL: zext_4x32mem_to_4x64:
1407 ; AVX512DQ: # %bb.0:
1408 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1409 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1410 ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1411 ; AVX512DQ-NEXT: retq
1412 %a = load <4 x i32>,<4 x i32> *%i,align 1
1413 %x = zext <4 x i32> %a to <4 x i64>
1414 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1418 define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
1419 ; KNL-LABEL: sext_4x32mem_to_4x64mask:
1421 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1422 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1423 ; KNL-NEXT: vpmovsxdq (%rdi), %ymm0
1424 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1425 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1428 ; AVX512DQ-LABEL: sext_4x32mem_to_4x64mask:
1429 ; AVX512DQ: # %bb.0:
1430 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1431 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1432 ; AVX512DQ-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z}
1433 ; AVX512DQ-NEXT: retq
1434 %a = load <4 x i32>,<4 x i32> *%i,align 1
1435 %x = sext <4 x i32> %a to <4 x i64>
1436 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1440 define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
1441 ; ALL-LABEL: sext_4x32mem_to_4x64:
1443 ; ALL-NEXT: vpmovsxdq (%rdi), %ymm0
1445 %a = load <4 x i32>,<4 x i32> *%i,align 1
1446 %x = sext <4 x i32> %a to <4 x i64>
1450 define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
1451 ; ALL-LABEL: sext_4x32_to_4x64:
1453 ; ALL-NEXT: vpmovsxdq %xmm0, %ymm0
1455 %x = sext <4 x i32> %a to <4 x i64>
1459 define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
1460 ; KNL-LABEL: zext_4x32_to_4x64mask:
1462 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1
1463 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
1464 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1465 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1466 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1469 ; AVX512DQ-LABEL: zext_4x32_to_4x64mask:
1470 ; AVX512DQ: # %bb.0:
1471 ; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm1
1472 ; AVX512DQ-NEXT: vpmovd2m %xmm1, %k1
1473 ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1474 ; AVX512DQ-NEXT: retq
1475 %x = zext <4 x i32> %a to <4 x i64>
1476 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1480 define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
1481 ; KNL-LABEL: zext_8x32mem_to_8x64:
1483 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1484 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1485 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1486 ; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1489 ; SKX-LABEL: zext_8x32mem_to_8x64:
1491 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1492 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1493 ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1496 ; AVX512DQNOBW-LABEL: zext_8x32mem_to_8x64:
1497 ; AVX512DQNOBW: # %bb.0:
1498 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1499 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1500 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1501 ; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1502 ; AVX512DQNOBW-NEXT: retq
1503 %a = load <8 x i32>,<8 x i32> *%i,align 1
1504 %x = zext <8 x i32> %a to <8 x i64>
1505 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1509 define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
1510 ; KNL-LABEL: sext_8x32mem_to_8x64mask:
1512 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1513 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1514 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1515 ; KNL-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1518 ; SKX-LABEL: sext_8x32mem_to_8x64mask:
1520 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1521 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1522 ; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1525 ; AVX512DQNOBW-LABEL: sext_8x32mem_to_8x64mask:
1526 ; AVX512DQNOBW: # %bb.0:
1527 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1528 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1529 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1530 ; AVX512DQNOBW-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1531 ; AVX512DQNOBW-NEXT: retq
1532 %a = load <8 x i32>,<8 x i32> *%i,align 1
1533 %x = sext <8 x i32> %a to <8 x i64>
1534 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1538 define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
1539 ; ALL-LABEL: sext_8x32mem_to_8x64:
1541 ; ALL-NEXT: vpmovsxdq (%rdi), %zmm0
1543 %a = load <8 x i32>,<8 x i32> *%i,align 1
1544 %x = sext <8 x i32> %a to <8 x i64>
1548 define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
1549 ; ALL-LABEL: sext_8x32_to_8x64:
1551 ; ALL-NEXT: vpmovsxdq %ymm0, %zmm0
1553 %x = sext <8 x i32> %a to <8 x i64>
1557 define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
1558 ; KNL-LABEL: zext_8x32_to_8x64mask:
1560 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
1561 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
1562 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
1563 ; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1566 ; SKX-LABEL: zext_8x32_to_8x64mask:
1568 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
1569 ; SKX-NEXT: vpmovw2m %xmm1, %k1
1570 ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1573 ; AVX512DQNOBW-LABEL: zext_8x32_to_8x64mask:
1574 ; AVX512DQNOBW: # %bb.0:
1575 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
1576 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
1577 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
1578 ; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1579 ; AVX512DQNOBW-NEXT: retq
1580 %x = zext <8 x i32> %a to <8 x i64>
1581 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1584 define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
1585 ; ALL-LABEL: fptrunc_test:
1587 ; ALL-NEXT: vcvtpd2ps %zmm0, %ymm0
1589 %b = fptrunc <8 x double> %a to <8 x float>
1593 define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
1594 ; ALL-LABEL: fpext_test:
1596 ; ALL-NEXT: vcvtps2pd %ymm0, %zmm0
1598 %b = fpext <8 x float> %a to <8 x double>
1602 define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
1603 ; KNL-LABEL: zext_16i1_to_16xi32:
1605 ; KNL-NEXT: kmovw %edi, %k1
1606 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1607 ; KNL-NEXT: vpsrld $31, %zmm0, %zmm0
1610 ; SKX-LABEL: zext_16i1_to_16xi32:
1612 ; SKX-NEXT: kmovd %edi, %k0
1613 ; SKX-NEXT: vpmovm2d %k0, %zmm0
1614 ; SKX-NEXT: vpsrld $31, %zmm0, %zmm0
1617 ; AVX512DQNOBW-LABEL: zext_16i1_to_16xi32:
1618 ; AVX512DQNOBW: # %bb.0:
1619 ; AVX512DQNOBW-NEXT: kmovw %edi, %k0
1620 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm0
1621 ; AVX512DQNOBW-NEXT: vpsrld $31, %zmm0, %zmm0
1622 ; AVX512DQNOBW-NEXT: retq
1623 %a = bitcast i16 %b to <16 x i1>
1624 %c = zext <16 x i1> %a to <16 x i32>
1628 define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
1629 ; KNL-LABEL: zext_8i1_to_8xi64:
1631 ; KNL-NEXT: kmovw %edi, %k1
1632 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1633 ; KNL-NEXT: vpsrlq $63, %zmm0, %zmm0
1636 ; SKX-LABEL: zext_8i1_to_8xi64:
1638 ; SKX-NEXT: kmovd %edi, %k0
1639 ; SKX-NEXT: vpmovm2q %k0, %zmm0
1640 ; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0
1643 ; AVX512DQNOBW-LABEL: zext_8i1_to_8xi64:
1644 ; AVX512DQNOBW: # %bb.0:
1645 ; AVX512DQNOBW-NEXT: kmovw %edi, %k0
1646 ; AVX512DQNOBW-NEXT: vpmovm2q %k0, %zmm0
1647 ; AVX512DQNOBW-NEXT: vpsrlq $63, %zmm0, %zmm0
1648 ; AVX512DQNOBW-NEXT: retq
1649 %a = bitcast i8 %b to <8 x i1>
1650 %c = zext <8 x i1> %a to <8 x i64>
1654 define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
1655 ; ALL-LABEL: trunc_16i8_to_16i1:
1657 ; ALL-NEXT: vpsllw $7, %xmm0, %xmm0
1658 ; ALL-NEXT: vpmovmskb %xmm0, %eax
1659 ; ALL-NEXT: # kill: def $ax killed $ax killed $eax
1661 %mask_b = trunc <16 x i8>%a to <16 x i1>
1662 %mask = bitcast <16 x i1> %mask_b to i16
1666 define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
1667 ; KNL-LABEL: trunc_16i32_to_16i1:
1669 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1670 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1671 ; KNL-NEXT: kmovw %k0, %eax
1672 ; KNL-NEXT: # kill: def $ax killed $ax killed $eax
1673 ; KNL-NEXT: vzeroupper
1676 ; SKX-LABEL: trunc_16i32_to_16i1:
1678 ; SKX-NEXT: vpslld $31, %zmm0, %zmm0
1679 ; SKX-NEXT: vpmovd2m %zmm0, %k0
1680 ; SKX-NEXT: kmovd %k0, %eax
1681 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
1682 ; SKX-NEXT: vzeroupper
1685 ; AVX512DQNOBW-LABEL: trunc_16i32_to_16i1:
1686 ; AVX512DQNOBW: # %bb.0:
1687 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
1688 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k0
1689 ; AVX512DQNOBW-NEXT: kmovw %k0, %eax
1690 ; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax
1691 ; AVX512DQNOBW-NEXT: vzeroupper
1692 ; AVX512DQNOBW-NEXT: retq
1693 %mask_b = trunc <16 x i32>%a to <16 x i1>
1694 %mask = bitcast <16 x i1> %mask_b to i16
1698 define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
1699 ; ALL-LABEL: trunc_4i32_to_4i1:
1701 ; ALL-NEXT: vpand %xmm1, %xmm0, %xmm0
1702 ; ALL-NEXT: vpslld $31, %xmm0, %xmm0
1703 ; ALL-NEXT: vpsrad $31, %xmm0, %xmm0
1705 %mask_a = trunc <4 x i32>%a to <4 x i1>
1706 %mask_b = trunc <4 x i32>%b to <4 x i1>
1707 %a_and_b = and <4 x i1>%mask_a, %mask_b
1708 %res = sext <4 x i1>%a_and_b to <4 x i32>
1713 define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
1714 ; KNL-LABEL: trunc_8i16_to_8i1:
1716 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1717 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1718 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1719 ; KNL-NEXT: kmovw %k0, %eax
1720 ; KNL-NEXT: # kill: def $al killed $al killed $eax
1721 ; KNL-NEXT: vzeroupper
1724 ; SKX-LABEL: trunc_8i16_to_8i1:
1726 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1727 ; SKX-NEXT: vpmovw2m %xmm0, %k0
1728 ; SKX-NEXT: kmovd %k0, %eax
1729 ; SKX-NEXT: # kill: def $al killed $al killed $eax
1732 ; AVX512DQNOBW-LABEL: trunc_8i16_to_8i1:
1733 ; AVX512DQNOBW: # %bb.0:
1734 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1735 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1736 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k0
1737 ; AVX512DQNOBW-NEXT: kmovw %k0, %eax
1738 ; AVX512DQNOBW-NEXT: # kill: def $al killed $al killed $eax
1739 ; AVX512DQNOBW-NEXT: vzeroupper
1740 ; AVX512DQNOBW-NEXT: retq
1741 %mask_b = trunc <8 x i16>%a to <8 x i1>
1742 %mask = bitcast <8 x i1> %mask_b to i8
1746 define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1747 ; KNL-LABEL: sext_8i1_8i32:
1749 ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1750 ; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
1751 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1754 ; AVX512DQ-LABEL: sext_8i1_8i32:
1755 ; AVX512DQ: # %bb.0:
1756 ; AVX512DQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1757 ; AVX512DQ-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0
1758 ; AVX512DQ-NEXT: retq
1759 %x = icmp slt <8 x i32> %a1, %a2
1760 %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
1761 %y = sext <8 x i1> %x1 to <8 x i32>
1766 define i16 @trunc_i32_to_i1(i32 %a) {
1767 ; KNL-LABEL: trunc_i32_to_i1:
1769 ; KNL-NEXT: andl $1, %edi
1770 ; KNL-NEXT: kmovw %edi, %k0
1771 ; KNL-NEXT: movw $-4, %ax
1772 ; KNL-NEXT: kmovw %eax, %k1
1773 ; KNL-NEXT: kshiftrw $1, %k1, %k1
1774 ; KNL-NEXT: kshiftlw $1, %k1, %k1
1775 ; KNL-NEXT: korw %k0, %k1, %k0
1776 ; KNL-NEXT: kmovw %k0, %eax
1777 ; KNL-NEXT: # kill: def $ax killed $ax killed $eax
1780 ; SKX-LABEL: trunc_i32_to_i1:
1782 ; SKX-NEXT: andl $1, %edi
1783 ; SKX-NEXT: kmovw %edi, %k0
1784 ; SKX-NEXT: movw $-4, %ax
1785 ; SKX-NEXT: kmovd %eax, %k1
1786 ; SKX-NEXT: kshiftrw $1, %k1, %k1
1787 ; SKX-NEXT: kshiftlw $1, %k1, %k1
1788 ; SKX-NEXT: korw %k0, %k1, %k0
1789 ; SKX-NEXT: kmovd %k0, %eax
1790 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
1793 ; AVX512DQNOBW-LABEL: trunc_i32_to_i1:
1794 ; AVX512DQNOBW: # %bb.0:
1795 ; AVX512DQNOBW-NEXT: andl $1, %edi
1796 ; AVX512DQNOBW-NEXT: kmovw %edi, %k0
1797 ; AVX512DQNOBW-NEXT: movw $-4, %ax
1798 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
1799 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
1800 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
1801 ; AVX512DQNOBW-NEXT: korw %k0, %k1, %k0
1802 ; AVX512DQNOBW-NEXT: kmovw %k0, %eax
1803 ; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax
1804 ; AVX512DQNOBW-NEXT: retq
1805 %a_i = trunc i32 %a to i1
1806 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
1807 %res = bitcast <16 x i1> %maskv to i16
1811 define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1812 ; KNL-LABEL: sext_8i1_8i16:
1814 ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1815 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
1816 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1817 ; KNL-NEXT: vzeroupper
1820 ; SKX-LABEL: sext_8i1_8i16:
1822 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
1823 ; SKX-NEXT: vpmovm2w %k0, %xmm0
1824 ; SKX-NEXT: vzeroupper
1827 ; AVX512DQNOBW-LABEL: sext_8i1_8i16:
1828 ; AVX512DQNOBW: # %bb.0:
1829 ; AVX512DQNOBW-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
1830 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %ymm0
1831 ; AVX512DQNOBW-NEXT: vpmovdw %ymm0, %xmm0
1832 ; AVX512DQNOBW-NEXT: vzeroupper
1833 ; AVX512DQNOBW-NEXT: retq
1834 %x = icmp slt <8 x i32> %a1, %a2
1835 %y = sext <8 x i1> %x to <8 x i16>
1839 define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
1840 ; KNL-LABEL: sext_16i1_16i32:
1842 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1843 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1846 ; AVX512DQ-LABEL: sext_16i1_16i32:
1847 ; AVX512DQ: # %bb.0:
1848 ; AVX512DQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
1849 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1850 ; AVX512DQ-NEXT: retq
1851 %x = icmp slt <16 x i32> %a1, %a2
1852 %y = sext <16 x i1> %x to <16 x i32>
1856 define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1857 ; KNL-LABEL: sext_8i1_8i64:
1859 ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1860 ; KNL-NEXT: vpmovsxdq %ymm0, %zmm0
1863 ; AVX512DQ-LABEL: sext_8i1_8i64:
1864 ; AVX512DQ: # %bb.0:
1865 ; AVX512DQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
1866 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
1867 ; AVX512DQ-NEXT: retq
1868 %x = icmp slt <8 x i32> %a1, %a2
1869 %y = sext <8 x i1> %x to <8 x i64>
1873 define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
1874 ; ALL-LABEL: extload_v8i64:
1876 ; ALL-NEXT: vpmovsxbq (%rdi), %zmm0
1877 ; ALL-NEXT: vmovdqa64 %zmm0, (%rsi)
1878 ; ALL-NEXT: vzeroupper
1880 %sign_load = load <8 x i8>, <8 x i8>* %a
1881 %c = sext <8 x i8> %sign_load to <8 x i64>
1882 store <8 x i64> %c, <8 x i64>* %res
1886 define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
1887 ; KNL-LABEL: test21:
1889 ; KNL-NEXT: movw $-3, %ax
1890 ; KNL-NEXT: kmovw %eax, %k1
1891 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1892 ; KNL-NEXT: kmovw %eax, %k0
1893 ; KNL-NEXT: kandw %k1, %k0, %k0
1894 ; KNL-NEXT: kmovw %k1, %k2
1895 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1896 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1897 ; KNL-NEXT: kmovw %eax, %k1
1898 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1899 ; KNL-NEXT: kshiftrw $14, %k1, %k1
1900 ; KNL-NEXT: korw %k1, %k0, %k0
1901 ; KNL-NEXT: movw $-5, %ax
1902 ; KNL-NEXT: kmovw %eax, %k1
1903 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1904 ; KNL-NEXT: kandw %k1, %k0, %k0
1905 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1906 ; KNL-NEXT: kmovw %eax, %k1
1907 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1908 ; KNL-NEXT: kshiftrw $13, %k1, %k1
1909 ; KNL-NEXT: korw %k1, %k0, %k0
1910 ; KNL-NEXT: movw $-9, %ax
1911 ; KNL-NEXT: kmovw %eax, %k1
1912 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1913 ; KNL-NEXT: kandw %k1, %k0, %k0
1914 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1915 ; KNL-NEXT: kmovw %eax, %k1
1916 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1917 ; KNL-NEXT: kshiftrw $12, %k1, %k1
1918 ; KNL-NEXT: korw %k1, %k0, %k0
1919 ; KNL-NEXT: movw $-17, %ax
1920 ; KNL-NEXT: kmovw %eax, %k6
1921 ; KNL-NEXT: kandw %k6, %k0, %k0
1922 ; KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1923 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1924 ; KNL-NEXT: kmovw %eax, %k1
1925 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1926 ; KNL-NEXT: kshiftrw $11, %k1, %k1
1927 ; KNL-NEXT: korw %k1, %k0, %k0
1928 ; KNL-NEXT: movw $-33, %ax
1929 ; KNL-NEXT: kmovw %eax, %k1
1930 ; KNL-NEXT: kandw %k1, %k0, %k0
1931 ; KNL-NEXT: kmovw %k1, %k3
1932 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1933 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1934 ; KNL-NEXT: kmovw %eax, %k1
1935 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1936 ; KNL-NEXT: kshiftrw $10, %k1, %k1
1937 ; KNL-NEXT: korw %k1, %k0, %k0
1938 ; KNL-NEXT: movw $-65, %ax
1939 ; KNL-NEXT: kmovw %eax, %k1
1940 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1941 ; KNL-NEXT: kandw %k1, %k0, %k0
1942 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1943 ; KNL-NEXT: kmovw %eax, %k1
1944 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1945 ; KNL-NEXT: kshiftrw $9, %k1, %k1
1946 ; KNL-NEXT: korw %k1, %k0, %k0
1947 ; KNL-NEXT: movw $-129, %ax
1948 ; KNL-NEXT: kmovw %eax, %k1
1949 ; KNL-NEXT: kandw %k1, %k0, %k0
1950 ; KNL-NEXT: kmovw %k1, %k4
1951 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1952 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1953 ; KNL-NEXT: kmovw %eax, %k1
1954 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1955 ; KNL-NEXT: kshiftrw $8, %k1, %k1
1956 ; KNL-NEXT: korw %k1, %k0, %k0
1957 ; KNL-NEXT: movw $-257, %ax # imm = 0xFEFF
1958 ; KNL-NEXT: kmovw %eax, %k1
1959 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1960 ; KNL-NEXT: kandw %k1, %k0, %k0
1961 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1962 ; KNL-NEXT: kmovw %eax, %k1
1963 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1964 ; KNL-NEXT: kshiftrw $7, %k1, %k1
1965 ; KNL-NEXT: korw %k1, %k0, %k0
1966 ; KNL-NEXT: movw $-513, %ax # imm = 0xFDFF
1967 ; KNL-NEXT: kmovw %eax, %k1
1968 ; KNL-NEXT: kandw %k1, %k0, %k0
1969 ; KNL-NEXT: kmovw %k1, %k5
1970 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1971 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1972 ; KNL-NEXT: kmovw %eax, %k1
1973 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1974 ; KNL-NEXT: kshiftrw $6, %k1, %k1
1975 ; KNL-NEXT: korw %k1, %k0, %k0
1976 ; KNL-NEXT: movw $-1025, %ax # imm = 0xFBFF
1977 ; KNL-NEXT: kmovw %eax, %k1
1978 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1979 ; KNL-NEXT: kandw %k1, %k0, %k0
1980 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1981 ; KNL-NEXT: kmovw %eax, %k1
1982 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1983 ; KNL-NEXT: kshiftrw $5, %k1, %k1
1984 ; KNL-NEXT: korw %k1, %k0, %k0
1985 ; KNL-NEXT: movw $-2049, %ax # imm = 0xF7FF
1986 ; KNL-NEXT: kmovw %eax, %k1
1987 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1988 ; KNL-NEXT: kandw %k1, %k0, %k0
1989 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1990 ; KNL-NEXT: kmovw %eax, %k1
1991 ; KNL-NEXT: kshiftlw $15, %k1, %k1
1992 ; KNL-NEXT: kshiftrw $4, %k1, %k1
1993 ; KNL-NEXT: korw %k1, %k0, %k0
1994 ; KNL-NEXT: movw $-4097, %ax # imm = 0xEFFF
1995 ; KNL-NEXT: kmovw %eax, %k1
1996 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1997 ; KNL-NEXT: kandw %k1, %k0, %k0
1998 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
1999 ; KNL-NEXT: kmovw %eax, %k1
2000 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2001 ; KNL-NEXT: kshiftrw $3, %k1, %k1
2002 ; KNL-NEXT: korw %k1, %k0, %k0
2003 ; KNL-NEXT: movw $-8193, %ax # imm = 0xDFFF
2004 ; KNL-NEXT: kmovw %eax, %k1
2005 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2006 ; KNL-NEXT: kandw %k1, %k0, %k0
2007 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2008 ; KNL-NEXT: kmovw %eax, %k1
2009 ; KNL-NEXT: kshiftlw $15, %k1, %k1
2010 ; KNL-NEXT: kshiftrw $2, %k1, %k1
2011 ; KNL-NEXT: korw %k1, %k0, %k1
2012 ; KNL-NEXT: movw $-16385, %ax # imm = 0xBFFF
2013 ; KNL-NEXT: kmovw %eax, %k0
2014 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2015 ; KNL-NEXT: kandw %k0, %k1, %k1
2016 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2017 ; KNL-NEXT: kmovw %eax, %k7
2018 ; KNL-NEXT: kshiftlw $14, %k7, %k7
2019 ; KNL-NEXT: korw %k7, %k1, %k1
2020 ; KNL-NEXT: kshiftlw $1, %k1, %k1
2021 ; KNL-NEXT: kshiftrw $1, %k1, %k1
2022 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2023 ; KNL-NEXT: kmovw %eax, %k7
2024 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2025 ; KNL-NEXT: korw %k7, %k1, %k1
2026 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2027 ; KNL-NEXT: kmovw %edi, %k1
2028 ; KNL-NEXT: kandw %k2, %k1, %k1
2029 ; KNL-NEXT: kmovw %esi, %k7
2030 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2031 ; KNL-NEXT: kshiftrw $14, %k7, %k7
2032 ; KNL-NEXT: korw %k7, %k1, %k1
2033 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2034 ; KNL-NEXT: kandw %k0, %k1, %k1
2035 ; KNL-NEXT: kmovw %edx, %k7
2036 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2037 ; KNL-NEXT: kshiftrw $13, %k7, %k7
2038 ; KNL-NEXT: korw %k7, %k1, %k1
2039 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2040 ; KNL-NEXT: kandw %k2, %k1, %k1
2041 ; KNL-NEXT: kmovw %ecx, %k7
2042 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2043 ; KNL-NEXT: kshiftrw $12, %k7, %k7
2044 ; KNL-NEXT: korw %k7, %k1, %k1
2045 ; KNL-NEXT: kandw %k6, %k1, %k1
2046 ; KNL-NEXT: kmovw %r8d, %k7
2047 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2048 ; KNL-NEXT: kshiftrw $11, %k7, %k7
2049 ; KNL-NEXT: korw %k7, %k1, %k1
2050 ; KNL-NEXT: kandw %k3, %k1, %k1
2051 ; KNL-NEXT: kmovw %r9d, %k7
2052 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2053 ; KNL-NEXT: kshiftrw $10, %k7, %k7
2054 ; KNL-NEXT: korw %k7, %k1, %k1
2055 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
2056 ; KNL-NEXT: kandw %k6, %k1, %k1
2057 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2058 ; KNL-NEXT: kmovw %eax, %k7
2059 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2060 ; KNL-NEXT: kshiftrw $9, %k7, %k7
2061 ; KNL-NEXT: korw %k7, %k1, %k1
2062 ; KNL-NEXT: kandw %k4, %k1, %k1
2063 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2064 ; KNL-NEXT: kmovw %eax, %k7
2065 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2066 ; KNL-NEXT: kshiftrw $8, %k7, %k7
2067 ; KNL-NEXT: korw %k7, %k1, %k1
2068 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2069 ; KNL-NEXT: kandw %k3, %k1, %k1
2070 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2071 ; KNL-NEXT: kmovw %eax, %k7
2072 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2073 ; KNL-NEXT: kshiftrw $7, %k7, %k7
2074 ; KNL-NEXT: korw %k7, %k1, %k1
2075 ; KNL-NEXT: kandw %k5, %k1, %k1
2076 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2077 ; KNL-NEXT: kmovw %eax, %k7
2078 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2079 ; KNL-NEXT: kshiftrw $6, %k7, %k7
2080 ; KNL-NEXT: korw %k7, %k1, %k1
2081 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
2082 ; KNL-NEXT: kandw %k4, %k1, %k1
2083 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2084 ; KNL-NEXT: kmovw %eax, %k7
2085 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2086 ; KNL-NEXT: kshiftrw $5, %k7, %k7
2087 ; KNL-NEXT: korw %k7, %k1, %k1
2088 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2089 ; KNL-NEXT: kandw %k5, %k1, %k1
2090 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2091 ; KNL-NEXT: kmovw %eax, %k7
2092 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2093 ; KNL-NEXT: kshiftrw $4, %k7, %k7
2094 ; KNL-NEXT: korw %k7, %k1, %k1
2095 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2096 ; KNL-NEXT: kandw %k7, %k1, %k1
2097 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2098 ; KNL-NEXT: kmovw %eax, %k7
2099 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2100 ; KNL-NEXT: kshiftrw $3, %k7, %k7
2101 ; KNL-NEXT: korw %k7, %k1, %k1
2102 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2103 ; KNL-NEXT: kandw %k7, %k1, %k1
2104 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2105 ; KNL-NEXT: kmovw %eax, %k7
2106 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2107 ; KNL-NEXT: kshiftrw $2, %k7, %k7
2108 ; KNL-NEXT: korw %k7, %k1, %k1
2109 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2110 ; KNL-NEXT: kandw %k7, %k1, %k1
2111 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2112 ; KNL-NEXT: kmovw %eax, %k7
2113 ; KNL-NEXT: kshiftlw $14, %k7, %k7
2114 ; KNL-NEXT: korw %k7, %k1, %k1
2115 ; KNL-NEXT: kshiftlw $1, %k1, %k1
2116 ; KNL-NEXT: kshiftrw $1, %k1, %k1
2117 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2118 ; KNL-NEXT: kmovw %eax, %k7
2119 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2120 ; KNL-NEXT: korw %k7, %k1, %k1
2121 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2122 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2123 ; KNL-NEXT: kmovw %eax, %k1
2124 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2125 ; KNL-NEXT: kandw %k7, %k1, %k1
2126 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2127 ; KNL-NEXT: kmovw %eax, %k7
2128 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2129 ; KNL-NEXT: kshiftrw $14, %k7, %k7
2130 ; KNL-NEXT: korw %k7, %k1, %k1
2131 ; KNL-NEXT: kandw %k0, %k1, %k1
2132 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2133 ; KNL-NEXT: kmovw %eax, %k7
2134 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2135 ; KNL-NEXT: kshiftrw $13, %k7, %k7
2136 ; KNL-NEXT: korw %k7, %k1, %k1
2137 ; KNL-NEXT: kandw %k2, %k1, %k1
2138 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2139 ; KNL-NEXT: kmovw %eax, %k7
2140 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2141 ; KNL-NEXT: kshiftrw $12, %k7, %k7
2142 ; KNL-NEXT: korw %k7, %k1, %k1
2143 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2144 ; KNL-NEXT: kandw %k0, %k1, %k1
2145 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2146 ; KNL-NEXT: kmovw %eax, %k7
2147 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2148 ; KNL-NEXT: kshiftrw $11, %k7, %k7
2149 ; KNL-NEXT: korw %k7, %k1, %k1
2150 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2151 ; KNL-NEXT: kandw %k2, %k1, %k1
2152 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2153 ; KNL-NEXT: kmovw %eax, %k7
2154 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2155 ; KNL-NEXT: kshiftrw $10, %k7, %k7
2156 ; KNL-NEXT: korw %k7, %k1, %k1
2157 ; KNL-NEXT: kandw %k6, %k1, %k1
2158 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2159 ; KNL-NEXT: kmovw %eax, %k7
2160 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2161 ; KNL-NEXT: kshiftrw $9, %k7, %k7
2162 ; KNL-NEXT: korw %k7, %k1, %k1
2163 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2164 ; KNL-NEXT: kandw %k2, %k1, %k1
2165 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2166 ; KNL-NEXT: kmovw %eax, %k7
2167 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2168 ; KNL-NEXT: kshiftrw $8, %k7, %k7
2169 ; KNL-NEXT: korw %k7, %k1, %k1
2170 ; KNL-NEXT: kandw %k3, %k1, %k1
2171 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2172 ; KNL-NEXT: kmovw %eax, %k7
2173 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2174 ; KNL-NEXT: kshiftrw $7, %k7, %k7
2175 ; KNL-NEXT: korw %k7, %k1, %k1
2176 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2177 ; KNL-NEXT: kandw %k3, %k1, %k1
2178 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2179 ; KNL-NEXT: kmovw %eax, %k7
2180 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2181 ; KNL-NEXT: kshiftrw $6, %k7, %k7
2182 ; KNL-NEXT: korw %k7, %k1, %k1
2183 ; KNL-NEXT: kandw %k4, %k1, %k1
2184 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2185 ; KNL-NEXT: kmovw %eax, %k7
2186 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2187 ; KNL-NEXT: kshiftrw $5, %k7, %k7
2188 ; KNL-NEXT: korw %k7, %k1, %k1
2189 ; KNL-NEXT: kandw %k5, %k1, %k1
2190 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2191 ; KNL-NEXT: kmovw %eax, %k7
2192 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2193 ; KNL-NEXT: kshiftrw $4, %k7, %k7
2194 ; KNL-NEXT: korw %k7, %k1, %k1
2195 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2196 ; KNL-NEXT: kandw %k2, %k1, %k1
2197 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2198 ; KNL-NEXT: kmovw %eax, %k7
2199 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2200 ; KNL-NEXT: kshiftrw $3, %k7, %k7
2201 ; KNL-NEXT: korw %k7, %k1, %k1
2202 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2203 ; KNL-NEXT: kandw %k5, %k1, %k1
2204 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2205 ; KNL-NEXT: kmovw %eax, %k7
2206 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2207 ; KNL-NEXT: kshiftrw $2, %k7, %k7
2208 ; KNL-NEXT: korw %k7, %k1, %k1
2209 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2210 ; KNL-NEXT: kandw %k5, %k1, %k1
2211 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2212 ; KNL-NEXT: kmovw %eax, %k7
2213 ; KNL-NEXT: kshiftlw $14, %k7, %k7
2214 ; KNL-NEXT: korw %k7, %k1, %k1
2215 ; KNL-NEXT: kshiftlw $1, %k1, %k1
2216 ; KNL-NEXT: kshiftrw $1, %k1, %k1
2217 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2218 ; KNL-NEXT: kmovw %eax, %k7
2219 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2220 ; KNL-NEXT: korw %k7, %k1, %k1
2221 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2222 ; KNL-NEXT: kmovw %eax, %k7
2223 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2224 ; KNL-NEXT: kandw %k5, %k7, %k7
2225 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2226 ; KNL-NEXT: kmovw %eax, %k6
2227 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2228 ; KNL-NEXT: kshiftrw $14, %k6, %k6
2229 ; KNL-NEXT: korw %k6, %k7, %k6
2230 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2231 ; KNL-NEXT: kandw %k5, %k6, %k6
2232 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2233 ; KNL-NEXT: kmovw %eax, %k7
2234 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2235 ; KNL-NEXT: kshiftrw $13, %k7, %k7
2236 ; KNL-NEXT: korw %k7, %k6, %k6
2237 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2238 ; KNL-NEXT: kandw %k5, %k6, %k6
2239 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2240 ; KNL-NEXT: kmovw %eax, %k7
2241 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2242 ; KNL-NEXT: kshiftrw $12, %k7, %k7
2243 ; KNL-NEXT: korw %k7, %k6, %k6
2244 ; KNL-NEXT: kandw %k0, %k6, %k6
2245 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2246 ; KNL-NEXT: kmovw %eax, %k7
2247 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2248 ; KNL-NEXT: kshiftrw $11, %k7, %k7
2249 ; KNL-NEXT: korw %k7, %k6, %k6
2250 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2251 ; KNL-NEXT: kandw %k0, %k6, %k6
2252 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2253 ; KNL-NEXT: kmovw %eax, %k7
2254 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2255 ; KNL-NEXT: kshiftrw $10, %k7, %k7
2256 ; KNL-NEXT: korw %k7, %k6, %k6
2257 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2258 ; KNL-NEXT: kandw %k0, %k6, %k6
2259 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2260 ; KNL-NEXT: kmovw %eax, %k7
2261 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2262 ; KNL-NEXT: kshiftrw $9, %k7, %k7
2263 ; KNL-NEXT: korw %k7, %k6, %k6
2264 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2265 ; KNL-NEXT: kandw %k0, %k6, %k6
2266 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2267 ; KNL-NEXT: kmovw %eax, %k7
2268 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2269 ; KNL-NEXT: kshiftrw $8, %k7, %k7
2270 ; KNL-NEXT: korw %k7, %k6, %k6
2271 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2272 ; KNL-NEXT: kandw %k0, %k6, %k6
2273 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2274 ; KNL-NEXT: kmovw %eax, %k7
2275 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2276 ; KNL-NEXT: kshiftrw $7, %k7, %k7
2277 ; KNL-NEXT: korw %k7, %k6, %k6
2278 ; KNL-NEXT: kandw %k3, %k6, %k6
2279 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2280 ; KNL-NEXT: kmovw %eax, %k7
2281 ; KNL-NEXT: kshiftlw $15, %k7, %k7
2282 ; KNL-NEXT: kshiftrw $6, %k7, %k7
2283 ; KNL-NEXT: korw %k7, %k6, %k6
2284 ; KNL-NEXT: kandw %k4, %k6, %k5
2285 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2286 ; KNL-NEXT: kmovw %eax, %k6
2287 ; KNL-NEXT: kshiftlw $15, %k6, %k6
2288 ; KNL-NEXT: kshiftrw $5, %k6, %k6
2289 ; KNL-NEXT: korw %k6, %k5, %k5
2290 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2291 ; KNL-NEXT: kandw %k0, %k5, %k4
2292 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2293 ; KNL-NEXT: kmovw %eax, %k5
2294 ; KNL-NEXT: kshiftlw $15, %k5, %k5
2295 ; KNL-NEXT: kshiftrw $4, %k5, %k5
2296 ; KNL-NEXT: korw %k5, %k4, %k4
2297 ; KNL-NEXT: kandw %k2, %k4, %k3
2298 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2299 ; KNL-NEXT: kmovw %eax, %k4
2300 ; KNL-NEXT: kshiftlw $15, %k4, %k4
2301 ; KNL-NEXT: kshiftrw $3, %k4, %k4
2302 ; KNL-NEXT: korw %k4, %k3, %k3
2303 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2304 ; KNL-NEXT: kandw %k0, %k3, %k2
2305 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2306 ; KNL-NEXT: kmovw %eax, %k3
2307 ; KNL-NEXT: kshiftlw $15, %k3, %k3
2308 ; KNL-NEXT: kshiftrw $2, %k3, %k3
2309 ; KNL-NEXT: korw %k3, %k2, %k2
2310 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2311 ; KNL-NEXT: kandw %k0, %k2, %k0
2312 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2313 ; KNL-NEXT: kmovw %eax, %k2
2314 ; KNL-NEXT: kshiftlw $14, %k2, %k2
2315 ; KNL-NEXT: korw %k2, %k0, %k0
2316 ; KNL-NEXT: kshiftlw $1, %k0, %k0
2317 ; KNL-NEXT: kshiftrw $1, %k0, %k0
2318 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
2319 ; KNL-NEXT: kmovw %eax, %k2
2320 ; KNL-NEXT: kshiftlw $15, %k2, %k2
2321 ; KNL-NEXT: korw %k2, %k0, %k2
2322 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
2323 ; KNL-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
2324 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2325 ; KNL-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
2326 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2327 ; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z}
2328 ; KNL-NEXT: vpmovdw %zmm2, %ymm2
2329 ; KNL-NEXT: vpmovdw %zmm3, %ymm3
2330 ; KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
2331 ; KNL-NEXT: vpandq %zmm1, %zmm2, %zmm1
2332 ; KNL-NEXT: vpmovdw %zmm4, %ymm2
2333 ; KNL-NEXT: vpmovdw %zmm5, %ymm3
2334 ; KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
2335 ; KNL-NEXT: vpandq %zmm0, %zmm2, %zmm0
2338 ; SKX-LABEL: test21:
2340 ; SKX-NEXT: vpsllw $7, %zmm2, %zmm2
2341 ; SKX-NEXT: vpmovb2m %zmm2, %k1
2342 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
2343 ; SKX-NEXT: kshiftrq $32, %k1, %k1
2344 ; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z}
2347 ; AVX512DQNOBW-LABEL: test21:
2348 ; AVX512DQNOBW: # %bb.0:
2349 ; AVX512DQNOBW-NEXT: movw $-3, %ax
2350 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2351 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2352 ; AVX512DQNOBW-NEXT: kmovw %eax, %k0
2353 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2354 ; AVX512DQNOBW-NEXT: kmovw %k1, %k2
2355 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2356 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2357 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2358 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2359 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k1
2360 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2361 ; AVX512DQNOBW-NEXT: movw $-5, %ax
2362 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2363 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2364 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2365 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2366 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2367 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2368 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k1, %k1
2369 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2370 ; AVX512DQNOBW-NEXT: movw $-9, %ax
2371 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2372 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2373 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2374 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2375 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2376 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2377 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k1
2378 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2379 ; AVX512DQNOBW-NEXT: movw $-17, %ax
2380 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2381 ; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0
2382 ; AVX512DQNOBW-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2383 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2384 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2385 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2386 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k1, %k1
2387 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2388 ; AVX512DQNOBW-NEXT: movw $-33, %ax
2389 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2390 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2391 ; AVX512DQNOBW-NEXT: kmovw %k1, %k3
2392 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2393 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2394 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2395 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2396 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k1, %k1
2397 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2398 ; AVX512DQNOBW-NEXT: movw $-65, %ax
2399 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2400 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2401 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2402 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2403 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2404 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2405 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k1, %k1
2406 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2407 ; AVX512DQNOBW-NEXT: movw $-129, %ax
2408 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2409 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2410 ; AVX512DQNOBW-NEXT: kmovw %k1, %k4
2411 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2412 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2413 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2414 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2415 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k1, %k1
2416 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2417 ; AVX512DQNOBW-NEXT: movw $-257, %ax # imm = 0xFEFF
2418 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2419 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2420 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2421 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2422 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2423 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2424 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k1, %k1
2425 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2426 ; AVX512DQNOBW-NEXT: movw $-513, %ax # imm = 0xFDFF
2427 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2428 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2429 ; AVX512DQNOBW-NEXT: kmovw %k1, %k5
2430 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2431 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2432 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2433 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2434 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k1, %k1
2435 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2436 ; AVX512DQNOBW-NEXT: movw $-1025, %ax # imm = 0xFBFF
2437 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2438 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2439 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2440 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2441 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2442 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2443 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k1
2444 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2445 ; AVX512DQNOBW-NEXT: movw $-2049, %ax # imm = 0xF7FF
2446 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2447 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2448 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2449 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2450 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2451 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2452 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k1, %k1
2453 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2454 ; AVX512DQNOBW-NEXT: movw $-4097, %ax # imm = 0xEFFF
2455 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2456 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2457 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2458 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2459 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2460 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2461 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k1, %k1
2462 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2463 ; AVX512DQNOBW-NEXT: movw $-8193, %ax # imm = 0xDFFF
2464 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2465 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2466 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2467 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2468 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2469 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
2470 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k1, %k1
2471 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
2472 ; AVX512DQNOBW-NEXT: movw $-16385, %ax # imm = 0xBFFF
2473 ; AVX512DQNOBW-NEXT: kmovw %eax, %k1
2474 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2475 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2476 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2477 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2478 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k7, %k7
2479 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2480 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
2481 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
2482 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2483 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2484 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2485 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2486 ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2487 ; AVX512DQNOBW-NEXT: kmovw %edi, %k0
2488 ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0
2489 ; AVX512DQNOBW-NEXT: kmovw %esi, %k7
2490 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2491 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k7, %k7
2492 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2493 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2494 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2495 ; AVX512DQNOBW-NEXT: kmovw %edx, %k7
2496 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2497 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7
2498 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2499 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2500 ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0
2501 ; AVX512DQNOBW-NEXT: kmovw %ecx, %k7
2502 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2503 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7
2504 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2505 ; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0
2506 ; AVX512DQNOBW-NEXT: kmovw %r8d, %k7
2507 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2508 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7
2509 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2510 ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0
2511 ; AVX512DQNOBW-NEXT: kmovw %r9d, %k7
2512 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2513 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7
2514 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2515 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
2516 ; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0
2517 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2518 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2519 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2520 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7
2521 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2522 ; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0
2523 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2524 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2525 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2526 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7
2527 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2528 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2529 ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0
2530 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2531 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2532 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2533 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7
2534 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2535 ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0
2536 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2537 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2538 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2539 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7
2540 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2541 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
2542 ; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0
2543 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2544 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2545 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2546 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k7, %k7
2547 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2548 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2549 ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0
2550 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2551 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2552 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2553 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k7, %k7
2554 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2555 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2556 ; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0
2557 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2558 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2559 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2560 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k7, %k7
2561 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2562 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2563 ; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0
2564 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2565 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2566 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2567 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k7, %k7
2568 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2569 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2570 ; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0
2571 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2572 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2573 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k7, %k7
2574 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2575 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
2576 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
2577 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2578 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2579 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2580 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2581 ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2582 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2583 ; AVX512DQNOBW-NEXT: kmovw %eax, %k0
2584 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2585 ; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0
2586 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2587 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2588 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2589 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k7, %k7
2590 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2591 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2592 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2593 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2594 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2595 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7
2596 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2597 ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0
2598 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2599 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2600 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2601 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7
2602 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2603 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2604 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
2605 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2606 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2607 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2608 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7
2609 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2610 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2611 ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0
2612 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2613 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2614 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2615 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7
2616 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2617 ; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0
2618 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2619 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2620 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2621 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7
2622 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2623 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2624 ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0
2625 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2626 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2627 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2628 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7
2629 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2630 ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0
2631 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2632 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2633 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2634 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7
2635 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2636 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2637 ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0
2638 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2639 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2640 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2641 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7
2642 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2643 ; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0
2644 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2645 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2646 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2647 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k7, %k7
2648 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2649 ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0
2650 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2651 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2652 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2653 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k7, %k7
2654 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2655 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2656 ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0
2657 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2658 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2659 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2660 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k7, %k7
2661 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2662 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2663 ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0
2664 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2665 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2666 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2667 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k7, %k7
2668 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2669 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2670 ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0
2671 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2672 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2673 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k7, %k7
2674 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2675 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
2676 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
2677 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2678 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2679 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2680 ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0
2681 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2682 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2683 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2684 ; AVX512DQNOBW-NEXT: kandw %k5, %k7, %k7
2685 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2686 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2687 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2688 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k6, %k6
2689 ; AVX512DQNOBW-NEXT: korw %k6, %k7, %k6
2690 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2691 ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
2692 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2693 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2694 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2695 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7
2696 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2697 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2698 ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
2699 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2700 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2701 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2702 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7
2703 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2704 ; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6
2705 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2706 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2707 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2708 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7
2709 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2710 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2711 ; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6
2712 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2713 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2714 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2715 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7
2716 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2717 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2718 ; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6
2719 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2720 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2721 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2722 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7
2723 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2724 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2725 ; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6
2726 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2727 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2728 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2729 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7
2730 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2731 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2732 ; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6
2733 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2734 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2735 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2736 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7
2737 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2738 ; AVX512DQNOBW-NEXT: kandw %k3, %k6, %k6
2739 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2740 ; AVX512DQNOBW-NEXT: kmovw %eax, %k7
2741 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
2742 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7
2743 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
2744 ; AVX512DQNOBW-NEXT: kandw %k4, %k6, %k5
2745 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2746 ; AVX512DQNOBW-NEXT: kmovw %eax, %k6
2747 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
2748 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6
2749 ; AVX512DQNOBW-NEXT: korw %k6, %k5, %k5
2750 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2751 ; AVX512DQNOBW-NEXT: kandw %k1, %k5, %k4
2752 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2753 ; AVX512DQNOBW-NEXT: kmovw %eax, %k5
2754 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k5, %k5
2755 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k5, %k5
2756 ; AVX512DQNOBW-NEXT: korw %k5, %k4, %k4
2757 ; AVX512DQNOBW-NEXT: kandw %k2, %k4, %k3
2758 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2759 ; AVX512DQNOBW-NEXT: kmovw %eax, %k4
2760 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
2761 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k4, %k4
2762 ; AVX512DQNOBW-NEXT: korw %k4, %k3, %k3
2763 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2764 ; AVX512DQNOBW-NEXT: kandw %k1, %k3, %k2
2765 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2766 ; AVX512DQNOBW-NEXT: kmovw %eax, %k3
2767 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3
2768 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k3, %k3
2769 ; AVX512DQNOBW-NEXT: korw %k3, %k2, %k2
2770 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2771 ; AVX512DQNOBW-NEXT: kandw %k1, %k2, %k1
2772 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2773 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2774 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2
2775 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1
2776 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
2777 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
2778 ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
2779 ; AVX512DQNOBW-NEXT: kmovw %eax, %k2
2780 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
2781 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1
2782 ; AVX512DQNOBW-NEXT: vpmovm2d %k1, %zmm2
2783 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm3
2784 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2785 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm4
2786 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2787 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm5
2788 ; AVX512DQNOBW-NEXT: vpmovdw %zmm2, %ymm2
2789 ; AVX512DQNOBW-NEXT: vpmovdw %zmm3, %ymm3
2790 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
2791 ; AVX512DQNOBW-NEXT: vpandq %zmm1, %zmm2, %zmm1
2792 ; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm2
2793 ; AVX512DQNOBW-NEXT: vpmovdw %zmm5, %ymm3
2794 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
2795 ; AVX512DQNOBW-NEXT: vpandq %zmm0, %zmm2, %zmm0
2796 ; AVX512DQNOBW-NEXT: retq
2797 %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
2801 define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone {
2802 ; ALL-LABEL: shuffle_zext_16x8_to_16x16:
2804 ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2806 %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
2807 %2 = bitcast <32 x i8> %1 to <16 x i16>
2811 define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone {
2812 ; KNL-LABEL: shuffle_zext_16x8_to_16x16_mask:
2814 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
2815 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2816 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
2817 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
2818 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
2821 ; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask:
2823 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
2824 ; SKX-NEXT: vpmovb2m %xmm1, %k1
2825 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2828 ; AVX512DQNOBW-LABEL: shuffle_zext_16x8_to_16x16_mask:
2829 ; AVX512DQNOBW: # %bb.0:
2830 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
2831 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2832 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
2833 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
2834 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
2835 ; AVX512DQNOBW-NEXT: retq
2836 %x = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
2837 %bc = bitcast <32 x i8> %x to <16 x i16>
2838 %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer
2842 define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) {
2843 ; ALL-LABEL: zext_32x8_to_16x16:
2845 ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2847 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32>
2848 %2 = bitcast <32 x i8> %1 to <16 x i16>
2852 define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) {
2853 ; ALL-LABEL: zext_32x8_to_8x32:
2855 ; ALL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2857 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
2858 %2 = bitcast <32 x i8> %1 to <8 x i32>
2862 define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) {
2863 ; ALL-LABEL: zext_32x8_to_4x64:
2865 ; ALL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
2867 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
2868 %2 = bitcast <32 x i8> %1 to <4 x i64>
2872 define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) {
2873 ; ALL-LABEL: zext_16x16_to_8x32:
2875 ; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2877 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16>
2878 %2 = bitcast <16 x i16> %1 to <8 x i32>
2882 define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) {
2883 ; ALL-LABEL: zext_16x16_to_4x64:
2885 ; ALL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2887 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
2888 %2 = bitcast <16 x i16> %1 to <4 x i64>
2892 define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
2893 ; ALL-LABEL: zext_8x32_to_4x64:
2895 ; ALL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2897 %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
2898 %2 = bitcast <8 x i32> %1 to <4 x i64>
2902 define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
2903 ; KNL-LABEL: zext_64xi1_to_64xi8:
2905 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
2906 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
2907 ; KNL-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2
2908 ; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
2909 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
2910 ; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2913 ; SKX-LABEL: zext_64xi1_to_64xi8:
2915 ; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
2916 ; SKX-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0 {%k1} {z}
2919 ; AVX512DQNOBW-LABEL: zext_64xi1_to_64xi8:
2920 ; AVX512DQNOBW: # %bb.0:
2921 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2
2922 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3
2923 ; AVX512DQNOBW-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2
2924 ; AVX512DQNOBW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
2925 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
2926 ; AVX512DQNOBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2927 ; AVX512DQNOBW-NEXT: retq
2928 %mask = icmp eq <64 x i8> %x, %y
2929 %1 = zext <64 x i1> %mask to <64 x i8>
2933 define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
2934 ; KNL-LABEL: zext_32xi1_to_32xi16:
2936 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
2937 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
2938 ; KNL-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
2939 ; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
2940 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
2941 ; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2944 ; SKX-LABEL: zext_32xi1_to_32xi16:
2946 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
2947 ; SKX-NEXT: vpmovm2w %k0, %zmm0
2948 ; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0
2951 ; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi16:
2952 ; AVX512DQNOBW: # %bb.0:
2953 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2
2954 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3
2955 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
2956 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
2957 ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
2958 ; AVX512DQNOBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2959 ; AVX512DQNOBW-NEXT: retq
2960 %mask = icmp eq <32 x i16> %x, %y
2961 %1 = zext <32 x i1> %mask to <32 x i16>
2965 define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
2966 ; ALL-LABEL: zext_16xi1_to_16xi16:
2968 ; ALL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
2969 ; ALL-NEXT: vpsrlw $15, %ymm0, %ymm0
2971 %mask = icmp eq <16 x i16> %x, %y
2972 %1 = zext <16 x i1> %mask to <16 x i16>
2977 define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
2978 ; KNL-LABEL: zext_32xi1_to_32xi8:
2980 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
2981 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
2982 ; KNL-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
2983 ; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
2984 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
2985 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
2986 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
2987 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
2988 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2989 ; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2992 ; SKX-LABEL: zext_32xi1_to_32xi8:
2994 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
2995 ; SKX-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 {%k1} {z}
2998 ; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi8:
2999 ; AVX512DQNOBW: # %bb.0:
3000 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2
3001 ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3
3002 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
3003 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
3004 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3005 ; AVX512DQNOBW-NEXT: vpmovdb %zmm0, %xmm0
3006 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
3007 ; AVX512DQNOBW-NEXT: vpmovdb %zmm1, %xmm1
3008 ; AVX512DQNOBW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3009 ; AVX512DQNOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
3010 ; AVX512DQNOBW-NEXT: retq
3011 %mask = icmp eq <32 x i16> %x, %y
3012 %1 = zext <32 x i1> %mask to <32 x i8>
3016 define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
3017 ; KNL-LABEL: zext_4xi1_to_4x32:
3019 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3020 ; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3021 ; KNL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
3022 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
3025 ; SKX-LABEL: zext_4xi1_to_4x32:
3027 ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
3028 ; SKX-NEXT: vpmovm2d %k0, %xmm0
3029 ; SKX-NEXT: vpsrld $31, %xmm0, %xmm0
3032 ; AVX512DQNOBW-LABEL: zext_4xi1_to_4x32:
3033 ; AVX512DQNOBW: # %bb.0:
3034 ; AVX512DQNOBW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3035 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3036 ; AVX512DQNOBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
3037 ; AVX512DQNOBW-NEXT: retq
3038 %mask = icmp eq <4 x i8> %x, %y
3039 %1 = zext <4 x i1> %mask to <4 x i32>
3043 define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
3044 ; KNL-LABEL: zext_2xi1_to_2xi64:
3046 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3047 ; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
3048 ; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3051 ; SKX-LABEL: zext_2xi1_to_2xi64:
3053 ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
3054 ; SKX-NEXT: vpmovm2q %k0, %xmm0
3055 ; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0
3058 ; AVX512DQNOBW-LABEL: zext_2xi1_to_2xi64:
3059 ; AVX512DQNOBW: # %bb.0:
3060 ; AVX512DQNOBW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3061 ; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
3062 ; AVX512DQNOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3063 ; AVX512DQNOBW-NEXT: retq
3064 %mask = icmp eq <2 x i8> %x, %y
3065 %1 = zext <2 x i1> %mask to <2 x i64>